From 76eb365b53286f69a92cbbcc3007833b7ea05cfa Mon Sep 17 00:00:00 2001 From: "Aaron M. Ucko" Date: Wed, 13 Jul 2011 20:30:10 -0400 Subject: Imported Upstream version 6.1.20110713 --- api/aceread.c | 98 +- api/aceread.h | 9 +- api/asn2gnb1.c | 378 +- api/asn2gnb2.c | 205 +- api/asn2gnb3.c | 256 +- api/asn2gnb4.c | 714 +- api/asn2gnb5.c | 118 +- api/asn2gnb6.c | 247 +- api/asn2gnbi.h | 52 +- api/ecnum_ambiguous.inc | 1234 +- api/ecnum_deleted.inc | 40 +- api/ecnum_replaced.inc | 194 +- api/ecnum_specific.inc | 8785 +++++------ api/explore.h | 18 +- api/gather.c | 7 +- api/gbftdef.h | 5 +- api/gbftglob.c | 194 +- api/macroapi.c | 38042 ++++++++++++++++++++++++++++------------------ api/macroapi.h | 108 +- api/objmgr.c | 5 +- api/seqmgr.c | 76 +- api/seqport.c | 46 +- api/seqport.h | 12 +- api/sequtil.c | 64 +- api/sqnutil1.c | 2099 ++- api/sqnutil2.c | 2152 ++- api/sqnutil3.c | 7506 ++++++--- api/sqnutil4.c | 7882 +++++++++- api/sqnutils.h | 196 +- api/subutil.c | 234 +- api/subutil.h | 44 +- api/tofasta.c | 390 +- api/tofasta.h | 24 +- api/utilpub.c | 9 +- api/utilpub.h | 2 + api/valapi.c | 15 +- api/valid.c | 6291 +++++--- api/valid.h | 99 +- api/valid.msg | 40 + api/validerr.h | 12 + 40 files changed, 51756 insertions(+), 26146 deletions(-) (limited to 'api') diff --git a/api/aceread.c b/api/aceread.c index 045acca8..efab0ea1 100644 --- a/api/aceread.c +++ b/api/aceread.c @@ -1,5 +1,5 @@ /* - * $Id: aceread.c,v 1.19 2010/05/26 15:13:01 bollin Exp $ + * $Id: aceread.c,v 1.23 2010/12/13 16:28:14 bollin Exp $ * * =========================================================================== * @@ -154,7 +154,7 @@ static int s_IsGapChar (char ch, char *gap_chars) /* The Trace Archive Gap String is a list of the number of nucleotides to skip before adding the next gap */ -extern TGapInfoPtr GapInfoFromSequenceString (char *seq_str, char *gap_chars) +extern TGapInfoPtr GapInfoFromSequenceString (char *seq_str, char gap_char) { char * cp; int num_gaps = 0, pos, gap_num = 0; @@ -165,7 +165,7 @@ extern TGapInfoPtr GapInfoFromSequenceString (char *seq_str, char *gap_chars) /* first determine number of gaps */ cp = seq_str; while (*cp != 0) { - if (s_IsGapChar(*cp, gap_chars)) { + if (*cp == gap_char) { num_gaps++; } cp++; @@ -178,7 +178,7 @@ extern TGapInfoPtr GapInfoFromSequenceString (char *seq_str, char *gap_chars) cp = seq_str; pos = 0; while (*cp != 0) { - if (s_IsGapChar(*cp, gap_chars)) { + if (*cp == gap_char) { g->gap_offsets[gap_num] = pos; gap_num++; pos = 0; @@ -191,18 +191,18 @@ extern TGapInfoPtr GapInfoFromSequenceString (char *seq_str, char *gap_chars) return g; } -extern void RemoveGapCharsFromSequenceString (char *seq_str, char *gap_chars) +extern void RemoveGapCharsFromSequenceString (char *seq_str, char gap_char) { char *cp_src, *cp_dst; - if (seq_str == NULL || gap_chars == NULL) { + if (seq_str == NULL) { return; } cp_src = seq_str; cp_dst = seq_str; while (*cp_src != 0) { - if (!s_IsGapChar(*cp_src, gap_chars)) { + if (*cp_src != gap_char) { *cp_dst = *cp_src; cp_dst++; } @@ -755,10 +755,66 @@ extern void ACEFileFree (TACEFilePtr afp) static char s_IsSeqChar (char ch) { - if (ch == '*' || ch == '-' || isalpha (ch)) { - return 1; - } else { - return 0; + switch (ch) { + case '*': + case '-': + case 'A': + case 'B': + case 'C': + case 'D': + case 'E': + case 'F': + case 'G': + case 'H': + case 'I': + case 'J': + case 'K': + case 'L': + case 'M': + case 'N': + case 'O': + case 'P': + case 'Q': + case 'R': + case 'S': + case 'T': + case 'U': + case 'V': + case 'W': + case 'X': + case 'Y': + case 'Z': + case 'a': + case 'b': + case 'c': + case 'd': + case 'e': + case 'f': + case 'g': + case 'h': + case 'i': + case 'j': + case 'k': + case 'l': + case 'm': + case 'n': + case 'o': + case 'p': + case 'q': + case 'r': + case 's': + case 't': + case 'u': + case 'v': + case 'w': + case 'x': + case 'y': + case 'z': + return 1; + break; + default: + return 0; + break; } } @@ -1228,13 +1284,13 @@ static void s_CalculateContigOffsets (TContigPtr contig) } -static int s_GetUngappedSeqLen (char *str, char *gap_chars) +static int s_GetUngappedSeqLen (char *str, char gap_char) { int len = 0; if (str == NULL) return 0; while (*str != 0) { - if (!s_IsGapChar (*str, gap_chars)) { + if (*str != gap_char) { len++; } str++; @@ -1250,6 +1306,8 @@ static char * s_AddToTagComment (char *orig, char *extra) if (orig == NULL) { tag = extra; + } else if (extra == NULL) { + tag = orig; } else { tag_len = strlen (orig) + strlen (extra) + 1; tag = malloc (sizeof (char) * (tag_len + 1)); @@ -1400,10 +1458,10 @@ static TContigPtr s_ReadContig } /* record actual length of consensus seq */ - contig->consensus_seq_len = s_GetUngappedSeqLen (contig->consensus_seq, "*"); + contig->consensus_seq_len = s_GetUngappedSeqLen (contig->consensus_seq, '*'); /* calculate gap info */ - contig->gaps = GapInfoFromSequenceString (contig->consensus_seq, "*"); + contig->gaps = GapInfoFromSequenceString (contig->consensus_seq, '*'); /* read quality scores */ if (make_qual_scores) { @@ -1453,8 +1511,8 @@ static TContigPtr s_ReadContig return NULL; } s_AdjustContigReadForTerminalNs (contig->reads[read_num]); - contig->reads[read_num]->read_len = s_GetUngappedSeqLen (contig->reads[read_num]->read_seq, "*"); - contig->reads[read_num]->gaps = GapInfoFromSequenceString (contig->reads[read_num]->read_seq, "*"); + contig->reads[read_num]->read_len = s_GetUngappedSeqLen (contig->reads[read_num]->read_seq, '*'); + contig->reads[read_num]->gaps = GapInfoFromSequenceString (contig->reads[read_num]->read_seq, '*'); read_num++; report_read_num = read_num - 1; } else if (linestring [0] == 'Q' && linestring[1] == 'A' && isspace (linestring[2])) { @@ -1668,7 +1726,7 @@ static void s_WriteContig (FILE *fp, TContigPtr contig) } fprintf (fp, "\n"); for (i = 0; i < contig->num_reads; i++) { - fprintf (fp, "RD %s %d\n", contig->reads[i]->read_id, strlen (contig->reads[i]->read_seq)); + fprintf (fp, "RD %s %d\n", contig->reads[i]->read_id, (int) strlen (contig->reads[i]->read_seq)); s_WriteSeq (fp, contig->reads[i]->read_seq); fprintf (fp, "\n"); } @@ -2013,9 +2071,9 @@ ReadContigFromString read->read_stop = read->read_len; if (n_is_gap) { /* adjust for gaps */ - read->gaps = GapInfoFromSequenceString (read->read_seq, "N"); + read->gaps = GapInfoFromSequenceString (read->read_seq, 'N'); if (read->gaps->num_gaps > 0) { - RemoveGapCharsFromSequenceString (read->read_seq, "N"); + RemoveGapCharsFromSequenceString (read->read_seq, 'N'); read->read_stop -= read->gaps->num_gaps; read->read_len -= read->gaps->num_gaps; } diff --git a/api/aceread.h b/api/aceread.h index 982a304c..595cf31f 100644 --- a/api/aceread.h +++ b/api/aceread.h @@ -2,7 +2,7 @@ #define API_ACEREAD__H /* - * $Id: aceread.h,v 1.13 2010/03/03 18:46:08 bollin Exp $ + * $Id: aceread.h,v 1.14 2010/08/30 12:39:23 bollin Exp $ * * =========================================================================== * @@ -69,8 +69,8 @@ typedef struct gapinfo { extern TGapInfoPtr GapInfoNew (void); extern void GapInfoFree (TGapInfoPtr g); -extern TGapInfoPtr GapInfoFromSequenceString (char *seq_str, char *gap_chars); -extern void RemoveGapCharsFromSequenceString (char *seq_str, char *gap_chars); +extern TGapInfoPtr GapInfoFromSequenceString (char *seq_str, char gap_char); +extern void RemoveGapCharsFromSequenceString (char *seq_str, char gap_char); extern int SeqPosFromTilingPos (int tiling_pos, TGapInfoPtr gap_info); extern int TilingPosFromSeqPos (int seq_pos, TGapInfoPtr gap_info); @@ -291,6 +291,9 @@ ProcessLargeACEFileForContigFastaAndQualScores * ========================================================================== * * $Log: aceread.h,v $ + * Revision 1.14 2010/08/30 12:39:23 bollin + * Performance improvements for aceread_tst + * * Revision 1.13 2010/03/03 18:46:08 bollin * use unsigned int to keep track of the number of contigs. * diff --git a/api/asn2gnb1.c b/api/asn2gnb1.c index 2b03644b..45d3157a 100644 --- a/api/asn2gnb1.c +++ b/api/asn2gnb1.c @@ -31,7 +31,7 @@ * * Version Creation Date: 10/21/98 * -* $Revision: 1.198 $ +* $Revision: 1.217 $ * * File Description: New GenBank flatfile generator - work in progress * @@ -743,6 +743,7 @@ NLM_EXTERN void FFCatenateSubString ( Int4 max_i, min_i, i, len = 0; StringItemPtr current; Boolean in_url = FALSE, found_start = FALSE; + Boolean in_html_ampersand_escape = FALSE; IntAsn2gbJobPtr ajp = (IntAsn2gbJobPtr)dest->iajp; Uint4 char_count = 0; @@ -767,7 +768,7 @@ NLM_EXTERN void FFCatenateSubString ( * HTML specific processing: * ---------------------------------------------------------------------*/ if ( GetWWW(ajp) ) { - if ( ! in_url ) { + if ( ! in_url && ! in_html_ampersand_escape ) { if ( current->buf[i] == '<' ) { /* Watch out! */ if (FFIsStartOfLinkEx (current, i, &len)) { @@ -781,6 +782,15 @@ NLM_EXTERN void FFCatenateSubString ( continue; } } + if( current->buf[i] == '&' ) + { + FFAddOneChar(dest, '&', FALSE); + if( FFIsStartOfHTMLAmpersandEscape(current, i) ) { + in_html_ampersand_escape = TRUE; + } + ++char_count; + continue; + } if (char_count == line_max) { break; } @@ -805,8 +815,18 @@ NLM_EXTERN void FFCatenateSubString ( } } + else if( in_html_ampersand_escape ) { + FFAddOneChar(dest, current->buf[i], FALSE); + if( current->buf[i] == ';' ) { + in_html_ampersand_escape = FALSE; + } + continue; + } + else /* in_url */ { - if ( current->buf[i] == '&' ) { + if ( current->buf[i] == '&' && + ! FFStartsWith(current, i, "&", TRUE) ) + { /* encode ampersand for XHMLT */ FFAddOneString(dest, "&", FALSE, FALSE, TILDE_IGNORE); continue; @@ -839,29 +859,47 @@ NLM_EXTERN void FFCatenateSubString ( } } -NLM_EXTERN CharPtr FFToCharPtr (StringItemPtr sip) { +NLM_EXTERN CharPtr FFToCharPtrEx (StringItemPtr sip, CharPtr pfx, CharPtr sfx) + +{ Int4 size = 0; StringItemPtr iter; CharPtr result, temp; + size_t pfx_len, sfx_len; + + pfx_len = StringLen (pfx); + sfx_len = StringLen (sfx); for ( iter = sip; iter != NULL; iter = iter->next ) { size += iter->pos; } - result = (CharPtr)MemNew(size + 2); + result = (CharPtr)MemNew(size + pfx_len + sfx_len + 2); temp = result; + if (pfx_len > 0) { + MemCpy( temp, pfx, pfx_len ); + temp += pfx_len; + } for ( iter = sip; iter != NULL; iter = iter->next ) { MemCpy( temp, iter->buf, iter->pos ); temp += iter->pos; } + if (sfx_len > 0) { + MemCpy( temp, sfx, sfx_len ); + temp += sfx_len; + } *temp = '\0'; return result; } +NLM_EXTERN CharPtr FFToCharPtr (StringItemPtr sip) +{ + return FFToCharPtrEx (sip, NULL, NULL); +} /* word wrap functions */ @@ -877,6 +915,8 @@ static CharPtr url_anchor_strings [] = { "", NULL }; @@ -941,6 +981,24 @@ NLM_EXTERN void FFSkipLink (StringItemPtr PNTR iterp, Int4Ptr ip) { *ip = i; } +NLM_EXTERN void FFSkipHTMLAmpersandEscape (StringItemPtr PNTR iterp, Int4Ptr ip) +{ + StringItemPtr iter = *iterp; + Int4 i = *ip; + + while ( (iter != NULL) && (iter->buf[i] != ';') ) { + ++i; + + if ( i == iter->pos ) { + iter = iter->next; + i = 0; + } + } + + *iterp = iter; + *ip = i; +} + static Boolean FFIsStartOfLinkEx (StringItemPtr iter, Int4 pos, Int4Ptr lenP) { @@ -987,6 +1045,48 @@ NLM_EXTERN Boolean FFIsStartOfLink (StringItemPtr iter, Int4 pos) return FFIsStartOfLinkEx (iter, pos, NULL); } +NLM_EXTERN Boolean FFIsStartOfHTMLAmpersandEscape ( + StringItemPtr iter, + Int4 pos ) +{ + Char ch; + Int4 i; + Int4 max_len = 20; + + if ( iter == NULL || pos >= iter->pos ) return FALSE; + if ( iter->buf [pos] != '&' ) return FALSE; + + /* skip the initial '&' */ + pos++; + if (pos >= iter->pos) { + iter = iter->next; + pos = 0; + if (iter == NULL) return FALSE; + } + + for (i = 0; i < max_len; i++) { + ch = iter->buf [pos]; + if( isalnum(ch) || ch == '#' ) { + /* fine; these are chars expected in HTML ampersand char */ + } else if( ch == ';' ) { + /* found end of HTML ampersand char */ + return TRUE; + } else { + /* illegal char in HTML ampersand char */ + return FALSE; + } + + pos++; + if (pos >= iter->pos) { + iter = iter->next; + pos = 0; + if (iter == NULL) return FALSE; + } + } + + return FALSE; +} + /* NLM_EXTERN Boolean FFIsStartOfLink (StringItemPtr iter, Int4 pos) { static CharPtr start_link = "buf[text_pos]) : + pattern[pattern_pos] == text->buf[text_pos] ) ) + { + /* advance pattern; if we reach the end, + * text starts with pattern */ + ++pattern_pos; + if( pattern[pattern_pos] == '\0' ) { + return TRUE; + } + + /* advance text, if we reach the end, text does NOT start + * with pattern */ + FFAdvanceChar( &text, &text_pos ); + if( NULL == text ) { + return FALSE; + } + } + + return FALSE; +} /* */ /* IsWholeWordSubstr () -- Determines if a substring that is */ @@ -2881,7 +3065,8 @@ static void DoBioseqSetList ( bssp->_class == BioseqseqSet_class_phy_set || bssp->_class == BioseqseqSet_class_eco_set || bssp->_class == BioseqseqSet_class_wgs_set || - bssp->_class == BioseqseqSet_class_gen_prod_set) { + bssp->_class == BioseqseqSet_class_gen_prod_set || + bssp->_class == BioseqseqSet_class_small_genome_set) { /* if popset within genbank set, for example, recurse */ @@ -2917,7 +3102,8 @@ static void DoOneBioseqSet ( bssp->_class == BioseqseqSet_class_phy_set || bssp->_class == BioseqseqSet_class_eco_set || bssp->_class == BioseqseqSet_class_wgs_set || - bssp->_class == BioseqseqSet_class_gen_prod_set) { + bssp->_class == BioseqseqSet_class_gen_prod_set || + bssp->_class == BioseqseqSet_class_small_genome_set) { /* this is a pop/phy/mut/eco set, catenate separate reports */ @@ -3179,7 +3365,8 @@ static void CountBioseqSetList ( bssp->_class == BioseqseqSet_class_phy_set || bssp->_class == BioseqseqSet_class_eco_set || bssp->_class == BioseqseqSet_class_wgs_set || - bssp->_class == BioseqseqSet_class_gen_prod_set) { + bssp->_class == BioseqseqSet_class_gen_prod_set || + bssp->_class == BioseqseqSet_class_small_genome_set) { CountBioseqSetList (bssp->seq_set, awp); @@ -3211,7 +3398,8 @@ static void CountOneBioseqSet ( bssp->_class == BioseqseqSet_class_phy_set || bssp->_class == BioseqseqSet_class_eco_set || bssp->_class == BioseqseqSet_class_wgs_set || - bssp->_class == BioseqseqSet_class_gen_prod_set) { + bssp->_class == BioseqseqSet_class_gen_prod_set || + bssp->_class == BioseqseqSet_class_small_genome_set) { CountBioseqSetList (bssp->seq_set, awp); @@ -3640,7 +3828,8 @@ static void LookForGEDetc ( static void MakeGapFeatsBase ( BioseqPtr bsp, Pointer userdata, - Boolean isSP + Boolean isSP, + Boolean rev_comp ) { @@ -3702,7 +3891,11 @@ static void MakeGapFeatsBase ( sprintf (buf, "%ld", (long) litp->length); AddQualifierToFeature (sfp, "estimated_length", buf); } - sfp->location = AddIntervalToLocation (NULL, sip, currpos, currpos + litp->length - 1, FALSE, FALSE); + if (rev_comp) { + sfp->location = AddIntervalToLocation (NULL, sip, currpos + litp->length - 1, currpos, FALSE, FALSE); + } else { + sfp->location = AddIntervalToLocation (NULL, sip, currpos, currpos + litp->length - 1, FALSE, FALSE); + } } else if (isSP && litp->length == 0) { if (fakebsp == NULL) { /* to be freed with MemFree, not BioseqFree */ @@ -3730,7 +3923,11 @@ static void MakeGapFeatsBase ( sprintf (buf, "%ld", (long) litp->length); AddQualifierToFeature (sfp, "estimated_length", buf); } - sfp->location = AddIntervalToLocation (NULL, sip, currpos - 1, currpos, FALSE, FALSE); + if (rev_comp) { + sfp->location = AddIntervalToLocation (NULL, sip, currpos, currpos - 1, FALSE, FALSE); + } else { + sfp->location = AddIntervalToLocation (NULL, sip, currpos - 1, currpos, FALSE, FALSE); + } sfp->comment = StringSave ("Non-consecutive residues"); } } @@ -3745,7 +3942,16 @@ static void MakeSPGapFeats ( ) { - MakeGapFeatsBase (bsp, userdata, TRUE); + MakeGapFeatsBase (bsp, userdata, TRUE, FALSE); +} + +static void MakeRCGapFeats ( + BioseqPtr bsp, + Pointer userdata +) + +{ + MakeGapFeatsBase (bsp, userdata, FALSE, TRUE); } static void MakeGapFeats ( @@ -3754,7 +3960,7 @@ static void MakeGapFeats ( ) { - MakeGapFeatsBase (bsp, userdata, FALSE); + MakeGapFeatsBase (bsp, userdata, FALSE, FALSE); } static CharPtr gapstr1 = " gap "; @@ -3893,6 +4099,20 @@ static void FindMultiIntervalGenes ( } } +static void FindSegmentedBioseqs ( + BioseqPtr bsp, + Pointer userdata +) + +{ + BoolPtr segmentedBioseqsP; + + if (bsp == NULL || bsp->repr != Seq_repr_seg) return; + segmentedBioseqsP = (BoolPtr) userdata; + if (segmentedBioseqsP == NULL) return; + *segmentedBioseqsP = TRUE; +} + static CharPtr bad_html_strings [] = { "from == 0 && sintp->to == bsp->length - 1 && - sintp->strand == Seq_strand_plus) { + sintp->strand != Seq_strand_minus) { slp = NULL; SeqLocFree (loc); loc = NULL; + } else if (sintp != NULL && + sintp->from == 0 && + sintp->to == bsp->length - 1 && + sintp->strand == Seq_strand_minus) { + rev_comp = TRUE; } } } + if (slp != NULL && (! rev_comp)) { + /* suppress gaps if using sub-location, but show gaps if location was whole or interval 0..length-1 on either strand */ + was_slp = TRUE; + } + if (bsp != NULL) { bssp = NULL; entityID = ObjMgrGetEntityIDForPointer (bsp); @@ -4139,24 +4372,26 @@ static Asn2gbJobPtr asn2gnbk_setup_ex ( VisitDescriptorsInSep (sep, (Pointer) &featpolicy, LookFarFeatFetchPolicy); fargaps = NULL; - if (format != FTABLE_FMT) { + if (format != FTABLE_FMT && (! was_slp)) { if (isRefSeq && isNC && VisitFeaturesInSep (sep, NULL, NULL) == 0) { if ((Boolean) ((custom & HIDE_GAP_FEATS) == 0)) { VisitBioseqsInSep (sep, (Pointer) &fargaps, MakeFarGapFeats); } + if (fargaps != NULL && fargaps->choice == 1) { + fargaps = ValNodeFreeData (fargaps); + } } } - if (fargaps != NULL && fargaps->choice == 1) { - fargaps = ValNodeFreeData (fargaps); - } ajp->fargaps = fargaps; gapvnp = NULL; - if (fargaps == NULL && format != FTABLE_FMT) { + if (fargaps == NULL && format != FTABLE_FMT && (! was_slp)) { if (isGED /* was isG */ || isTPG || isOnlyLocal || isRefSeq || isSP || (isGeneral && (! isGED))) { if ((Boolean) ((custom & HIDE_GAP_FEATS) == 0)) { if (isSP) { VisitBioseqsInSep (sep, (Pointer) &gapvnp, MakeSPGapFeats); + } else if (rev_comp) { + VisitBioseqsInSep (sep, (Pointer) &gapvnp, MakeRCGapFeats); } else { VisitBioseqsInSep (sep, (Pointer) &gapvnp, MakeGapFeats); } @@ -4234,9 +4469,9 @@ static Asn2gbJobPtr asn2gnbk_setup_ex ( } } - /* if location specified, normal defaults to master style */ + /* if location specified, other than full reverse complement, normal defaults to master style */ - if (ajp->ajp.slp != NULL && style == NORMAL_STYLE) { + if (ajp->ajp.slp != NULL && style == NORMAL_STYLE && (! rev_comp)) { style = MASTER_STYLE; } @@ -4327,6 +4562,8 @@ static Asn2gbJobPtr asn2gnbk_setup_ex ( VisitFeaturesInSep (sep, (Pointer) &multiIntervalGenes, FindMultiIntervalGenes); ajp->multiIntervalGenes = multiIntervalGenes; + VisitBioseqsInSep (sep, (Pointer) &segmentedBioseqs, FindSegmentedBioseqs); + ajp->segmentedBioseqs = segmentedBioseqs; ajp->relModeError = FALSE; ajp->skipProts = skipProts; @@ -4470,6 +4707,7 @@ static Asn2gbJobPtr asn2gnbk_setup_ex ( aw.copyGpsGeneDown = (Boolean) ((flags & COPY_GPS_GENE_DOWN) != 0); } } + aw.isRefSeq = isRefSeq; aw.showContigAndSeq = (Boolean) ((flags & SHOW_CONTIG_AND_SEQ) != 0); /* @@ -5537,6 +5775,12 @@ static void AddOneFtableQual ( if (StringHasNoText (qual)) return; if (StringHasNoText (val)) return; + if (StringCmp (qual, "orig_protein_id") == 0) { + qual = "protein_id"; + } else if (StringCmp (qual, "orig_transcript_id") == 0) { + qual = "transcript_id"; + } + len = StringLen (qual) + StringLen (val) + 10; tmp = (CharPtr) MemNew (sizeof (Char) * len); if (tmp == NULL) return; diff --git a/api/asn2gnb2.c b/api/asn2gnb2.c index e435d2bc..3fbb0b60 100644 --- a/api/asn2gnb2.c +++ b/api/asn2gnb2.c @@ -30,7 +30,7 @@ * * Version Creation Date: 10/21/98 * -* $Revision: 1.147 $ +* $Revision: 1.161 $ * * File Description: New GenBank flatfile generator - work in progress * @@ -63,7 +63,7 @@ #endif #endif -static CharPtr link_projid = "http://www.ncbi.nlm.nih.gov/sites/entrez?db=genomeprj&cmd=Retrieve&dopt=Overview&list_uids="; +static CharPtr link_projid = "http://www.ncbi.nlm.nih.gov/bioproject/"; static CharPtr link_srr = "http://www.ncbi.nlm.nih.gov/sites/entrez?db=sra&term="; @@ -449,6 +449,7 @@ NLM_EXTERN void AddLocusBlock ( Char buf [1024]; Boolean cagemaster = FALSE; SeqFeatPtr cds; + Char ch1, ch2, ch3; Int4 currGi; Char dataclass [10]; Char date [40]; @@ -494,6 +495,7 @@ NLM_EXTERN void AddLocusBlock ( OrgRefPtr orp; BioseqPtr parent; Int4 prevGi; + CharPtr ptr; SeqDescrPtr sdp; Char sect [128]; Char seg [32]; @@ -619,6 +621,27 @@ NLM_EXTERN void AddLocusBlock ( } } + if (sip != NULL && sip->choice == SEQID_PDB) { + ptr = StringChr (locus, '_'); + if (ptr != NULL) { + ch1 = ptr [1]; + if (ch1 != '\0') { + ch2 = ptr [2]; + if (ch2 != '\0') { + ch3 = ptr [3]; + if (ch3 == '\0') { + if (ch1 == ch2) { + if (IS_UPPER (ch1)) { + ptr [1] = TO_LOWER (ch1); + ptr [2] = '\0'; + } + } + } + } + } + } + } + if (is_np) { sfp = SeqMgrGetCDSgivenProduct (bsp, &fcontext); if (sfp != NULL && fcontext.bsp != NULL) { @@ -1763,6 +1786,7 @@ NLM_EXTERN void AddAccessionBlock ( BaseBlockPtr bbp; BioseqPtr bsp; Char buf [41]; + Char ch1, ch2, ch3; SeqMgrDescContext dcontext; EMBLBlockPtr ebp; ValNodePtr extra_access; @@ -1777,6 +1801,7 @@ NLM_EXTERN void AddAccessionBlock ( SeqIdPtr lcl = NULL; size_t len = 0; MolInfoPtr mip; + CharPtr ptr; SeqDescrPtr sdp; ValNodePtr secondary_acc; CharPtr separator = " "; @@ -1875,6 +1900,27 @@ NLM_EXTERN void AddAccessionBlock ( SeqIdWrite (sip, buf, PRINTID_TEXTID_ACC_ONLY, sizeof (buf)); + if (sip->choice == SEQID_PDB) { + ptr = StringChr (buf, '_'); + if (ptr != NULL) { + ch1 = ptr [1]; + if (ch1 != '\0') { + ch2 = ptr [2]; + if (ch2 != '\0') { + ch3 = ptr [3]; + if (ch3 == '\0') { + if (ch1 == ch2) { + if (IS_UPPER (ch1)) { + ptr [1] = TO_LOWER (ch1); + ptr [2] = '\0'; + } + } + } + } + } + } + } + bbp = Asn2gbAddBlock (awp, ACCESSION_BLOCK, sizeof (BaseBlock)); if (bbp == NULL) return; @@ -2051,22 +2097,24 @@ NLM_EXTERN void AddVersionBlock ( ) { - SeqIdPtr accn = NULL; - IntAsn2gbJobPtr ajp; - Asn2gbSectPtr asp; - BaseBlockPtr bbp; - BioseqPtr bsp; - Char buf [41]; - Uint1 format = PRINTID_TEXTID_ACC_VER; - GBSeqPtr gbseq; - Int4 gi = -1; - SeqIdPtr gpp = NULL; - IndxPtr index; - CharPtr ptr; - SeqIdPtr sip; - Char tmp [41]; - Char version [64]; - StringItemPtr ffstring; + SeqIdPtr accn = NULL; + IntAsn2gbJobPtr ajp; + Asn2gbSectPtr asp; + BaseBlockPtr bbp; + BioseqPtr bsp; + Char buf [41]; + Char ch1, ch2, ch3; + Uint1 format = PRINTID_TEXTID_ACC_VER; + GBSeqPtr gbseq; + Int4 gi = -1; + SeqIdPtr gpp = NULL; + IntAsn2gbSectPtr iasp; + IndxPtr index; + CharPtr ptr; + SeqIdPtr sip; + Char tmp [41]; + Char version [64]; + StringItemPtr ffstring; if (awp == NULL) return; ajp = awp->ajp; @@ -2079,6 +2127,8 @@ NLM_EXTERN void AddVersionBlock ( ffstring = FFGetString(ajp); if ( ffstring == NULL ) return; + iasp = (IntAsn2gbSectPtr) asp; + for (sip = bsp->id; sip != NULL; sip = sip->next) { switch (sip->choice) { case SEQID_GI : @@ -2166,11 +2216,45 @@ NLM_EXTERN void AddVersionBlock ( return; } + if (iasp != NULL && GetWWW (ajp) && ajp->mode == ENTREZ_MODE) { + sprintf (version, "%ld", (long) gi); + iasp->gi = StringSave (version); + } + if (accn != NULL) { buf [0] = '\0'; SeqIdWrite (accn, buf, format, sizeof (buf) - 1); + if (accn->choice == SEQID_PDB) { + ptr = StringChr (buf, '_'); + if (ptr != NULL) { + ch1 = ptr [1]; + if (ch1 != '\0') { + ch2 = ptr [2]; + if (ch2 != '\0') { + ch3 = ptr [3]; + if (ch3 == '\0') { + if (ch1 == ch2) { + if (IS_UPPER (ch1)) { + ptr [1] = TO_LOWER (ch1); + ptr [2] = '\0'; + } + } + } + } + } + } + } + + if (iasp != NULL && GetWWW (ajp) && ajp->mode == ENTREZ_MODE) { + iasp->acc = StringSave (buf); + ptr = StringChr (iasp->acc, '.'); + if (ptr != NULL) { + *ptr = '\0'; + } + } + if (gi > 0) { sprintf (version, "%s GI:%ld", buf, (long) gi); } else { @@ -3065,12 +3149,14 @@ static void AddSPBlock ( str = NULL; if ( oip->str != NULL ) { str = oip->str; - if (StringNCmp (str, "GO:", 3) == 0) { + if (StringCmp (db->db, "GO") == 0 && StringNCmp (str, "GO:", 3) == 0) { str += 3; } else if (StringNCmp (str, "MGI:", 4) == 0) { str += 4; - } else if (StringNCmp (str, "HGNC:", 5) == 0) { + } else if (StringCmp (db->db, "HGNC") == 0 && StringNCmp (str, "HGNC:", 5) == 0) { str += 5; + } else if (StringCmp (db->db, "DIP") == 0 && StringNCmp (str, "DIP:", 4) == 0) { + str += 4; } } else if ( oip->id > 0 ) { sprintf (numbuf, "%d", oip->id); @@ -3869,13 +3955,13 @@ typedef struct finstatdata { } FinStatData, PNTR FinStatPtr; static FinStatData finStatKywds [] = { - {"Standard-Draft", "STANDARD_DRAFT"}, - {"High-quality-draft", "HIGH_QUALITY_DRAFT"}, - {"Improved-high-quality-draft", "IMPROVED_HIGH_QUALITY_DRAFT"}, - {"Annotation-grade", "ANNOTATION_GRADE"}, - {"Non-contiguous-finished", "NON_CONTIGUOUS_FINISHED"}, + {"Standard-draft", "STANDARD_DRAFT"}, + {"High-quality-draft", "HIGH_QUALITY_DRAFT"}, + {"Improved-high-quality-draft", "IMPROVED_HIGH_QUALITY_DRAFT"}, + {"Annotation-directed-improvement", "ANNOTATION_DIRECTED_IMPROVEMENT"}, + {"Noncontiguous-finished", "NONCONTIGUOUS_FINISHED"}, /* - {"Finished", "FINISHED"}, + {"Finished", "FINISHED"}, */ {NULL, NULL} }; @@ -3937,6 +4023,7 @@ NLM_EXTERN void AddKeywordsBlock ( Boolean is_sts = FALSE; Boolean is_env_sample = FALSE; Boolean is_genome_assembly = FALSE; + Boolean is_unverified = FALSE; ValNodePtr keywords; CharPtr kwd; MolInfoPtr mip; @@ -4124,10 +4211,18 @@ NLM_EXTERN void AddKeywordsBlock ( finishing_status = GetFinishingStatus ((CharPtr) curr->data.ptrvalue); } } + } else if (oip != NULL && StringICmp (oip->str, "Unverified") == 0) { + is_unverified = TRUE; } } sdp = SeqMgrGetNextDescriptor (bsp, sdp, Seq_descr_user, &dcontext); } + if (is_unverified) { + if (head != NULL) { + ValNodeCopyStr (&head, 0, "; "); + } + ValNodeCopyStr (&head, 0, "UNVERIFIED"); + } if (add_encode) { if (head != NULL) { ValNodeCopyStr (&head, 0, "; "); @@ -4822,6 +4917,13 @@ static int LIBCALLBACK SortReferences ( status = DateMatch (irp1->date, irp2->date, TRUE); if (status == 1 || status == -1) return status; + /* if dates incomparable, do other comparisons */ + if ( status != 0 ) { + if( (NULL != irp1->date) && (NULL != irp2->date ) ) { + /* std date comes before str date */ + return ( irp2->date->data[0] - irp1->date->data[0] ); + } + } /* if dates (e.g., years) match, try to distinguish by uids */ @@ -4870,14 +4972,6 @@ static int LIBCALLBACK SortReferences ( return -1; } - /* for publication features, sort in explore index order */ - - if (irp1->index > irp2->index) { - return 1; - } else if (irp1->index < irp2->index) { - return -1; - } - /* next use author string */ if (irp1->authstr != NULL && irp2->authstr != NULL) { @@ -4900,6 +4994,14 @@ static int LIBCALLBACK SortReferences ( } } + /* for publication features, sort in explore index order - probably superset of itemID below */ + + if (irp1->index > irp2->index) { + return 1; + } else if (irp1->index < irp2->index) { + return -1; + } + /* last resort for equivalent publication descriptors, sort in itemID order */ if (rbp1->itemtype == OBJ_SEQDESC && rbp2->itemtype == OBJ_SEQDESC) { @@ -5069,7 +5171,8 @@ static void GetRefsOnBioseq ( BioseqPtr bsp, Int4 from, Int4 to, - SeqLocPtr cdsloc + SeqLocPtr cdsloc, + BioseqPtr cdsbsp ) { @@ -5094,6 +5197,7 @@ static void GetRefsOnBioseq ( SeqDescrPtr sdp; SeqFeatPtr sfp; SeqInt sint; + SeqIntPtr sintp; SeqIdPtr sip; Boolean split; Int4 start; @@ -5161,9 +5265,22 @@ static void GetRefsOnBioseq ( rbp->itemtype = OBJ_SEQDESC; irp = (IntRefBlockPtr) rbp; - irp->loc = SeqLocMerge (target, &vn, NULL, FALSE, TRUE, FALSE); - irp->left = 0; - irp->right = target->length - 1; + if (cdsloc != NULL && cdsbsp != NULL) { + sintp = SeqIntNew (); + sintp->from = 0; + sintp->to = cdsbsp->length - 1; + sintp->id = SeqIdDup (cdsbsp->id); + irp->loc = ValNodeAddPointer (NULL, SEQLOC_INT, (Pointer) sintp); + /* + irp->loc = SeqLocWholeNew (cdsbsp); + */ + irp->left = 0; + irp->right = cdsbsp->length - 1; + } else { + irp->loc = SeqLocMerge (target, &vn, NULL, FALSE, TRUE, FALSE); + irp->left = from; + irp->right = to; + } alp = GetAuthListPtr (pdp, NULL); if (alp != NULL) { irp->authstr = GetAuthorsPlusConsortium (awp->format, alp); @@ -5178,7 +5295,7 @@ static void GetRefsOnBioseq ( if (cdsloc != NULL) { cp.awp = awp; - cp.target = target; + cp.target = cdsbsp; cp.vnp = &vn; SeqMgrGetAllOverlappingFeatures (cdsloc, FEATDEF_PUB, NULL, 0, LOCATION_SUBSET, (Pointer) &cp, GetRefsOnCDS); } @@ -5201,8 +5318,8 @@ static void GetRefsOnBioseq ( irp = (IntRefBlockPtr) rbp; irp->loc = SeqLocMerge (target, &vn, NULL, FALSE, TRUE, FALSE); - irp->left = 0; - irp->right = target->length - 1; + irp->left = from; + irp->right = to; alp = GetAuthListPtr (pdp, NULL); if (alp != NULL) { irp->authstr = GetAuthorsPlusConsortium (awp->format, alp); @@ -5334,7 +5451,7 @@ static Boolean LIBCALLBACK GetRefsOnSeg ( SeqEntrySetScope (oldscope); if (bsp != NULL) { - GetRefsOnBioseq (awp, awp->refs, bsp, from, to, NULL); + GetRefsOnBioseq (awp, awp->refs, bsp, from, to, NULL, NULL); return TRUE; } @@ -5394,7 +5511,7 @@ NLM_EXTERN Boolean AddReferenceBlock ( /* collect publications on bioseq */ awp->pubhead = NULL; - GetRefsOnBioseq (awp, bsp, refs, awp->from, awp->to, NULL); + GetRefsOnBioseq (awp, bsp, refs, awp->from, awp->to, NULL, NULL); target = bsp; for (sip = bsp->id; sip != NULL; sip = sip->next) { @@ -5425,7 +5542,7 @@ NLM_EXTERN Boolean AddReferenceBlock ( if (cds != NULL) { dna = BioseqFindFromSeqLoc (cds->location); if (dna != NULL) { - GetRefsOnBioseq (awp, dna, dna, context.left, context.right, cds->location); + GetRefsOnBioseq (awp, dna, dna, context.left, context.right, cds->location, bsp); target = dna; } } diff --git a/api/asn2gnb3.c b/api/asn2gnb3.c index 5a12103b..1ff6eea4 100644 --- a/api/asn2gnb3.c +++ b/api/asn2gnb3.c @@ -30,7 +30,7 @@ * * Version Creation Date: 10/21/98 * -* $Revision: 1.129 $ +* $Revision: 1.142 $ * * File Description: New GenBank flatfile generator - work in progress * @@ -84,15 +84,16 @@ static void AddHistCommentString ( CharPtr suffix, DatePtr dp, SeqIdPtr ids, - Boolean is_na + Boolean is_na, + Boolean use_accn ) { Int2 count = 0; - Char buf [256]; - Boolean first; + Char buf [256], id [42]; + Boolean first, skip; Int4 gi = 0; - SeqIdPtr sip; + SeqIdPtr sip, sip2; CharPtr strd; if (dp == NULL || ids == NULL || prefix == NULL || suffix == NULL || ffstring == NULL) return; @@ -131,21 +132,50 @@ static void AddHistCommentString ( FFAddOneString (ffstring, ",", FALSE, FALSE, TILDE_IGNORE); } first = FALSE; - if ( GetWWW(ajp) ) { - FFAddOneString (ffstring, " gi:", FALSE, FALSE, TILDE_IGNORE); - FFAddOneString (ffstring, "", FALSE, FALSE, TILDE_IGNORE); + FFAddOneString (ffstring, id, FALSE, FALSE, TILDE_EXPAND); + FFAddOneString (ffstring, "", FALSE, FALSE, TILDE_IGNORE); + } else { + sprintf (buf, " %s", id); + FFAddOneString (ffstring, buf, FALSE, FALSE, TILDE_EXPAND); + } + skip = TRUE; + } + SeqIdFree (sip2); + } + } + if (! skip) { + if ( GetWWW(ajp) ) { + FFAddOneString (ffstring, " gi:", FALSE, FALSE, TILDE_IGNORE); + FFAddOneString (ffstring, "", FALSE, FALSE, TILDE_IGNORE); + FFAddOneString (ffstring, buf, FALSE, FALSE, TILDE_EXPAND); + FFAddOneString (ffstring, "", FALSE, FALSE, TILDE_IGNORE); } else { - FF_Add_NCBI_Base_URL (ffstring, link_seqp); + sprintf (buf, " gi:%ld", (long) gi); + FFAddOneString (ffstring, buf, FALSE, FALSE, TILDE_EXPAND); } - sprintf (buf, "%ld", (long) gi); - FFAddTextToString (ffstring, /* "val=" */ NULL, buf, "\">", FALSE, FALSE, TILDE_IGNORE); - FFAddOneString (ffstring, buf, FALSE, FALSE, TILDE_EXPAND); - FFAddOneString (ffstring, "", FALSE, FALSE, TILDE_IGNORE); - } else { - sprintf (buf, " gi:%ld", (long) gi); - FFAddOneString (ffstring, buf, FALSE, FALSE, TILDE_EXPAND); } } } @@ -2043,6 +2073,7 @@ NLM_EXTERN void AddCommentBlock ( Boolean is_tpa = FALSE; Boolean is_wgs = FALSE; Boolean isRefSeqStandard = FALSE; + Boolean is_unverified = FALSE; SeqLitPtr litp; ObjectIdPtr localID = NULL; Char locusID [32]; @@ -2066,6 +2097,7 @@ NLM_EXTERN void AddCommentBlock ( Char tmp [32]; TextSeqIdPtr tsip; UserFieldPtr ufp; + Int4 unverified_itemID = 0; UserObjectPtr uop; CharPtr wgsaccn = NULL; CharPtr wgsname = NULL; @@ -2102,6 +2134,10 @@ NLM_EXTERN void AddCommentBlock ( } oip = uop->type; if (oip != NULL) { + if (StringICmp (oip->str, "Unverified") == 0) { + is_unverified = TRUE; + unverified_itemID = dcontext.itemID; + } if (StringICmp (oip->str, "ENCODE") == 0) { is_encode = TRUE; encodeUop = uop; @@ -2116,6 +2152,36 @@ NLM_EXTERN void AddCommentBlock ( sdp = SeqMgrGetNextDescriptor (bsp, sdp, Seq_descr_user, &dcontext); } + if (is_unverified) { + cbp = (CommentBlockPtr) Asn2gbAddBlock (awp, COMMENT_BLOCK, sizeof (CommentBlock)); + if (cbp != NULL) { + + cbp->entityID = awp->entityID; + cbp->itemID = unverified_itemID; + cbp->itemtype = OBJ_SEQDESC; + cbp->first = first; + first = FALSE; + + if (cbp->first) { + FFStartPrint (ffstring, awp->format, 0, 12, "COMMENT", 12, 5, 5, "CC", TRUE); + } else { + FFStartPrint (ffstring, awp->format, 0, 12, NULL, 12, 5, 5, "CC", FALSE); + } + + FFAddOneString (ffstring, + "GenBank staff is unable to verify sequence and/or annotation provided by the submitter.", + FALSE, FALSE, TILDE_IGNORE); + + cbp->string = FFEndPrint(ajp, ffstring, awp->format, 12, 12, 5, 5, "CC"); + FFRecycleString(ajp, ffstring); + ffstring = FFGetString(ajp); + + if (awp->afp != NULL) { + DoImmediateFormat (awp->afp, (BaseBlockPtr) cbp); + } + } + } + /* if (dblinkUop != NULL) { str = GetDBLinkString (dblinkUop); @@ -2153,7 +2219,7 @@ NLM_EXTERN void AddCommentBlock ( if (tsip != NULL) { is_other = TRUE; - if (StringNCmp (tsip->accession, "NC_", 3) == 0) { + if (StringNCmp (tsip->accession, "NC_", 3) == 0 || StringNCmp (tsip->accession, "AC_", 3) == 0) { if (hasRefTrackStatus) { /* will print elsewhere */ } else if (! StringHasNoText (genomeBuildNumber)) { @@ -2950,8 +3016,13 @@ NLM_EXTERN void AddCommentBlock ( FFStartPrint (ffstring, awp->format, 0, 12, NULL, 12, 5, 5, "CC", FALSE); } - AddHistCommentString (ajp, ffstring, "[WARNING] On", "this sequence was replaced by", - hist->replaced_by_date, hist->replaced_by_ids, ISA_na (bsp->mol)); + if (wgsaccn != NULL) { + AddHistCommentString (ajp, ffstring, "[WARNING] On", "this project was updated. The new version is", + hist->replaced_by_date, hist->replaced_by_ids, ISA_na (bsp->mol), TRUE); + } else { + AddHistCommentString (ajp, ffstring, "[WARNING] On", "this sequence was replaced by", + hist->replaced_by_date, hist->replaced_by_ids, ISA_na (bsp->mol), FALSE); + } cbp->string = FFEndPrint(ajp, ffstring, awp->format, 12, 12, 5, 5, "CC"); FFRecycleString(ajp, ffstring); @@ -2990,7 +3061,7 @@ NLM_EXTERN void AddCommentBlock ( } AddHistCommentString (ajp, ffstring, "On", "this sequence version replaced", - hist->replace_date, hist->replace_ids, ISA_na (bsp->mol)); + hist->replace_date, hist->replace_ids, ISA_na (bsp->mol), FALSE); cbp->string = FFEndPrint(ajp, ffstring, awp->format, 12, 12, 5, 5, "CC"); FFRecycleString(ajp, ffstring); @@ -4189,6 +4260,7 @@ NLM_EXTERN void AddSourceFeatBlock ( BioseqPtr bsp; SeqFeatPtr cds; SeqMgrFeatContext context; + Int4 currGi = 0; BioseqPtr dna; SeqLocPtr duploc; Boolean excise; @@ -4199,9 +4271,12 @@ NLM_EXTERN void AddSourceFeatBlock ( IntSrcBlockPtr lastisp; IntSrcBlockPtr descrIsp; ValNodePtr next; + Char pfx [128], sfx [128]; ValNodePtr PNTR prev; SeqInt sint; + SeqIdPtr sip; SeqLocPtr slp; + Int4 source_count = 0; CharPtr str; BioseqPtr target; ValNode vn; @@ -4220,6 +4295,8 @@ NLM_EXTERN void AddSourceFeatBlock ( ffstring = FFGetString(ajp); if ( ffstring == NULL ) return; + pfx [0] = '\0'; + sfx [0] = '\0'; /* collect biosources on bioseq */ @@ -4272,6 +4349,18 @@ NLM_EXTERN void AddSourceFeatBlock ( vn.next = NULL; FFStartPrint (ffstring, awp->format, 5, 21, NULL, 0, 5, 21, "FT", FALSE); + + for (sip = bsp->id; sip != NULL; sip = sip->next) { + if (sip->choice == SEQID_GI) { + currGi = (Int4) sip->data.intvalue; + } + } + + if (GetWWW (ajp) && ajp->mode == ENTREZ_MODE && + (ajp->format == GENBANK_FMT || ajp->format == GENPEPT_FMT)) { + sprintf (pfx, "", (long) currGi); + } + FFAddOneString(ffstring, "source", FALSE, FALSE, TILDE_IGNORE); FFAddNChar(ffstring, ' ', 21 - 5 - StringLen("source"), FALSE); @@ -4335,7 +4424,12 @@ NLM_EXTERN void AddSourceFeatBlock ( FFAddTextToString (ffstring, "/mol_type=\"", str, "\"", FALSE, TRUE, TILDE_TO_SPACES); } - str = FFEndPrint(ajp, ffstring, awp->format, 5, 21, 5, 21, "FT"); + if (GetWWW (ajp) && ajp->mode == ENTREZ_MODE && + (ajp->format == GENBANK_FMT || ajp->format == GENPEPT_FMT)) { + sprintf (sfx, ""); + } + + str = FFEndPrintEx (ajp, ffstring, awp->format, 5, 21, 5, 21, "FT", pfx, sfx); bbp = (BaseBlockPtr) Asn2gbAddBlock (awp, SOURCEFEAT_BLOCK, sizeof (IntSrcBlock)); if (bbp != NULL) { @@ -4357,6 +4451,8 @@ NLM_EXTERN void AddSourceFeatBlock ( AddFeatureToGbseq (gbseq, gbfeat, str, NULL); } } + + return; } if (head == NULL) return; @@ -4488,6 +4584,13 @@ NLM_EXTERN void AddSourceFeatBlock ( } FFRecycleString(ajp, ffstring); + for (vnp = head; vnp != NULL; vnp = vnp->next) { + isp = (IntSrcBlockPtr) vnp->data.ptrvalue; + if (isp == NULL) continue; + isp->source_count = source_count; + source_count++; + } + if (awp->afp != NULL) { for (vnp = head; vnp != NULL; vnp = vnp->next) { isp = (IntSrcBlockPtr) vnp->data.ptrvalue; @@ -4514,6 +4617,92 @@ static Boolean IsCDD ( return FALSE; } +static void SetIfpFeatCount ( + IntFeatBlockPtr ifp, + IntAsn2gbJobPtr ajp, + Asn2gbWorkPtr awp, + Boolean isProt +) + +{ + FeatBlockPtr fbp; + Uint1 featdeftype; + IntAsn2gbSectPtr iasp; + Boolean is_other = FALSE; + + if (ifp == NULL || ajp == NULL || awp == NULL) return; + iasp = (IntAsn2gbSectPtr) awp->asp; + if (iasp == NULL) return; + + fbp = (FeatBlockPtr) ifp; + + featdeftype = fbp->featdeftype; + + if (featdeftype == FEATDEF_COMMENT) { + featdeftype = FEATDEF_misc_feature; + } + + if (! isProt) { + if (featdeftype == FEATDEF_REGION || featdeftype == FEATDEF_BOND || featdeftype == FEATDEF_SITE) { + featdeftype = FEATDEF_misc_feature; + } + } + + if (ajp->format == GENPEPT_FMT && isProt) { + if (ifp->mapToPep) { + if (featdeftype >= FEATDEF_preprotein && featdeftype <= FEATDEF_transit_peptide_aa) { + featdeftype = FEATDEF_preprotein; + } + } + } + + if (featdeftype == FEATDEF_Imp_CDS) { + featdeftype = FEATDEF_CDS; + } + if (featdeftype == FEATDEF_preRNA) { + featdeftype = FEATDEF_precursor_RNA; + } + if (featdeftype == FEATDEF_otherRNA) { + featdeftype = FEATDEF_misc_RNA; + } + if (featdeftype == FEATDEF_mat_peptide_aa) { + featdeftype = FEATDEF_mat_peptide; + } + if (featdeftype == FEATDEF_sig_peptide_aa) { + featdeftype = FEATDEF_sig_peptide; + } + if (featdeftype == FEATDEF_transit_peptide_aa) { + featdeftype = FEATDEF_transit_peptide; + } + + if (ajp->refseqConventions || awp->isRefSeq) { + is_other = TRUE; + } + + if (! isProt) { + if (featdeftype == FEATDEF_preprotein) { + if (! is_other) { + featdeftype = FEATDEF_misc_feature; + } + } + } + + if (featdeftype == FEATDEF_CLONEREF) { + if (ajp->mode == RELEASE_MODE || ajp->mode == ENTREZ_MODE) { + featdeftype = FEATDEF_misc_feature; + } + } + + if (featdeftype == FEATDEF_repeat_unit && (ajp->mode == RELEASE_MODE || ajp->mode == ENTREZ_MODE)) { + featdeftype = FEATDEF_repeat_region; + } + + if (featdeftype < FEATDEF_MAX) { + ifp->feat_count = iasp->feat_counts [featdeftype]; + (iasp->feat_counts [featdeftype])++; + } +} + static void GetFeatsOnCdsProduct ( SeqFeatPtr cds, BioseqPtr nbsp, @@ -4635,6 +4824,7 @@ static void GetFeatsOnCdsProduct ( ifp->mapToPep = FALSE; ifp->left = 0; ifp->right = 0; + SetIfpFeatCount (ifp, ajp, awp, FALSE); ifp->firstfeat = awp->firstfeat; awp->firstfeat = FALSE; @@ -4802,6 +4992,7 @@ static void GetRemoteFeatsOnCdsProduct ( ifp->mapToPep = FALSE; ifp->left = 0; ifp->right = 0; + SetIfpFeatCount (ifp, ajp, awp, FALSE); ifp->firstfeat = awp->firstfeat; awp->firstfeat = FALSE; @@ -4932,6 +5123,7 @@ static Boolean LIBCALLBACK GetFeatsOnBioseq ( if (awp->hideImpFeats && sfp->data.choice == SEQFEAT_IMP && fcontext->featdeftype != FEATDEF_operon) return TRUE; if (awp->hideVariations && fcontext->featdeftype == FEATDEF_variation) return TRUE; if (awp->hideRepeatRegions && fcontext->featdeftype == FEATDEF_repeat_region) return TRUE; + if (awp->hideRepeatRegions && fcontext->featdeftype == FEATDEF_mobile_element) return TRUE; if (awp->hideGaps && fcontext->featdeftype == FEATDEF_gap) return TRUE; if (ISA_aa (bsp->mol) && fcontext->featdeftype == FEATDEF_REGION && awp->hideCddFeats && IsCDD (sfp)) return TRUE; @@ -5286,6 +5478,18 @@ static Boolean LIBCALLBACK GetFeatsOnBioseq ( } break; + case FEATDEF_mobile_element: + /* mobile_element requires FTQUAL_mobile_element_type */ + gbq = sfp->qual; + while (gbq != NULL) { + if (StringICmp (gbq->qual, "mobile_element_type") == 0 && (StringDoesHaveText (gbq->val))) { + okay = TRUE; + break; + } + gbq = gbq->next; + } + break; + default: if (fcontext->featdeftype >= FEATDEF_GENE && fcontext->featdeftype < FEATDEF_MAX) { okay = TRUE; @@ -5338,6 +5542,7 @@ static Boolean LIBCALLBACK GetFeatsOnBioseq ( ifp->mapToPep = FALSE; ifp->left = 0; ifp->right = 0; + SetIfpFeatCount (ifp, ajp, awp, ISA_aa (bsp->mol)); ifp->firstfeat = awp->firstfeat; awp->firstfeat = FALSE; @@ -5415,6 +5620,7 @@ static Boolean LIBCALLBACK GetFeatsOnBioseq ( ifp->mapToPep = FALSE; ifp->left = 0; ifp->right = 0; + SetIfpFeatCount (ifp, ajp, awp, FALSE); ifp->firstfeat = awp->firstfeat; awp->firstfeat = FALSE; @@ -5757,6 +5963,7 @@ NLM_EXTERN void AddFeatureBlock ( ifp->isCDS = TRUE; ifp->left = 0; ifp->right = 0; + SetIfpFeatCount (ifp, ajp, awp, FALSE); ifp->firstfeat = awp->firstfeat; awp->firstfeat = FALSE; @@ -5846,6 +6053,7 @@ NLM_EXTERN void AddFeatureBlock ( ifp->isCDS = TRUE; ifp->left = 0; ifp->right = 0; + SetIfpFeatCount (ifp, ajp, awp, TRUE); ifp->firstfeat = awp->firstfeat; awp->firstfeat = FALSE; @@ -5873,6 +6081,7 @@ NLM_EXTERN void AddFeatureBlock ( ifp->isCDS = TRUE; ifp->left = 0; ifp->right = 0; + SetIfpFeatCount (ifp, ajp, awp, TRUE); ifp->firstfeat = awp->firstfeat; awp->firstfeat = FALSE; @@ -5910,6 +6119,7 @@ NLM_EXTERN void AddFeatureBlock ( ifp->mapToPep = TRUE; ifp->left = 0; ifp->right = 0; + SetIfpFeatCount (ifp, ajp, awp, TRUE); ifp->firstfeat = awp->firstfeat; awp->firstfeat = FALSE; diff --git a/api/asn2gnb4.c b/api/asn2gnb4.c index 6491f6dc..e788b0bf 100644 --- a/api/asn2gnb4.c +++ b/api/asn2gnb4.c @@ -30,7 +30,7 @@ * * Version Creation Date: 10/21/98 * -* $Revision: 1.215 $ +* $Revision: 1.249 $ * * File Description: New GenBank flatfile generator - work in progress * @@ -133,6 +133,7 @@ static FtQualType feat_qual_order [] = { FTQUAL_ribosomal_slippage, FTQUAL_trans_splicing, FTQUAL_artificial_location, + FTQUAL_artificial_location_str, FTQUAL_note, FTQUAL_citation, @@ -171,11 +172,14 @@ static FtQualType feat_qual_order [] = { FTQUAL_rpt_unit_seq, FTQUAL_satellite, FTQUAL_mobile_element, + FTQUAL_mobile_element_type, FTQUAL_usedin, FTQUAL_illegal_qual, FTQUAL_replace, + FTQUAL_delta_item, + FTQUAL_variation_set, FTQUAL_transl_except, FTQUAL_transl_table, @@ -189,6 +193,7 @@ static FtQualType feat_qual_order [] = { FTQUAL_transcript_id, FTQUAL_db_xref, FTQUAL_gene_xref, + FTQUAL_variation_id, FTQUAL_mol_wt, FTQUAL_translation, FTQUAL_transcription, @@ -261,6 +266,7 @@ static FeaturQual asn2gnbk_featur_quals [ASN2GNBK_TOTAL_FEATUR] = { { "allele", Qual_class_quote }, { "anticodon", Qual_class_anti_codon }, { "artificial_location", Qual_class_boolean }, + { "artificial_location", Qual_class_string }, { "bond", Qual_class_bond }, { "bond_type", Qual_class_bond }, { "bound_moiety", Qual_class_quote }, @@ -274,6 +280,7 @@ static FeaturQual asn2gnbk_featur_quals [ASN2GNBK_TOTAL_FEATUR] = { { "codon_start", Qual_class_int }, { "cons_splice", Qual_class_consplice }, { "db_xref", Qual_class_db_xref }, + { "delta_item", Qual_class_delta_item }, { "derived_from", Qual_class_seq_loc }, { "direction", Qual_class_L_R_B }, { "EC_number", Qual_class_EC_quote }, @@ -282,7 +289,7 @@ static FeaturQual asn2gnbk_featur_quals [ASN2GNBK_TOTAL_FEATUR] = { { "evidence", Qual_class_evidence }, { "exception", Qual_class_exception }, { "exception_note", Qual_class_exception }, - { "experiment", Qual_class_quote }, + { "experiment", Qual_class_experiment }, { "experiment", Qual_class_string }, { "product", Qual_class_valnode }, { "figure", Qual_class_string }, @@ -314,6 +321,7 @@ static FeaturQual asn2gnbk_featur_quals [ASN2GNBK_TOTAL_FEATUR] = { { "map", Qual_class_quote }, { "maploc", Qual_class_string }, { "mobile_element", Qual_class_mobile_element }, + { "mobile_element_type", Qual_class_mobile_element }, { "mod_base", Qual_class_noquote }, { "model_evidence", Qual_class_model_ev }, { "calculated_mol_wt", Qual_class_mol_wt }, @@ -381,6 +389,8 @@ static FeaturQual asn2gnbk_featur_quals [ASN2GNBK_TOTAL_FEATUR] = { { "trna_codons", Qual_class_trna_codons }, { "UniProtKB_evidence", Qual_class_quote }, { "usedin", Qual_class_usedin }, + { "db_xref", Qual_class_variation_id }, + { "variation_set", Qual_class_variation_set }, { "xtra_products", Qual_class_xtraprds } }; @@ -390,50 +400,51 @@ typedef struct qualfeatur { FtQualType featurclass; } QualFeatur, PNTR QualFeaturPtr; -#define NUM_GB_QUALS 41 +#define NUM_GB_QUALS 42 static QualFeatur qualToFeature [NUM_GB_QUALS] = { - { "allele", FTQUAL_allele }, - { "bound_moiety", FTQUAL_bound_moiety }, - { "clone", FTQUAL_clone }, - { "codon", FTQUAL_codon }, - { "compare", FTQUAL_compare }, - { "cons_splice", FTQUAL_cons_splice }, - { "cyt_map", FTQUAL_gene_cyt_map }, - { "direction", FTQUAL_direction }, - { "EC_number", FTQUAL_EC_number }, - { "estimated_length", FTQUAL_estimated_length }, - { "experiment", FTQUAL_experiment }, - { "frequency", FTQUAL_frequency }, - { "function", FTQUAL_function }, - { "gen_map", FTQUAL_gene_gen_map }, - { "inference", FTQUAL_inference }, - { "insertion_seq", FTQUAL_insertion_seq }, - { "label", FTQUAL_label }, - { "map", FTQUAL_map }, - { "mobile_element", FTQUAL_mobile_element }, - { "mod_base", FTQUAL_mod_base }, - { "ncRNA_class", FTQUAL_ncRNA_class }, - { "number", FTQUAL_number }, - { "old_locus_tag", FTQUAL_old_locus_tag }, - { "operon", FTQUAL_operon }, - { "organism", FTQUAL_organism }, - { "PCR_conditions", FTQUAL_PCR_conditions }, - { "phenotype", FTQUAL_phenotype }, - { "product", FTQUAL_product_quals }, - { "rad_map", FTQUAL_gene_rad_map }, - { "replace", FTQUAL_replace }, - { "rpt_family", FTQUAL_rpt_family }, - { "rpt_type", FTQUAL_rpt_type }, - { "rpt_unit", FTQUAL_rpt_unit }, - { "rpt_unit_range", FTQUAL_rpt_unit_range }, - { "rpt_unit_seq", FTQUAL_rpt_unit_seq }, - { "satellite", FTQUAL_satellite }, - { "standard_name", FTQUAL_standard_name }, - { "tag_peptide", FTQUAL_tag_peptide }, - { "transposon", FTQUAL_transposon }, - { "UniProtKB_evidence", FTQUAL_UniProtKB_evidence }, - { "usedin", FTQUAL_usedin } + { "allele", FTQUAL_allele }, + { "bound_moiety", FTQUAL_bound_moiety }, + { "clone", FTQUAL_clone }, + { "codon", FTQUAL_codon }, + { "compare", FTQUAL_compare }, + { "cons_splice", FTQUAL_cons_splice }, + { "cyt_map", FTQUAL_gene_cyt_map }, + { "direction", FTQUAL_direction }, + { "EC_number", FTQUAL_EC_number }, + { "estimated_length", FTQUAL_estimated_length }, + { "experiment", FTQUAL_experiment }, + { "frequency", FTQUAL_frequency }, + { "function", FTQUAL_function }, + { "gen_map", FTQUAL_gene_gen_map }, + { "inference", FTQUAL_inference }, + { "insertion_seq", FTQUAL_insertion_seq }, + { "label", FTQUAL_label }, + { "map", FTQUAL_map }, + { "mobile_element", FTQUAL_mobile_element }, + { "mobile_element_type", FTQUAL_mobile_element_type }, + { "mod_base", FTQUAL_mod_base }, + { "ncRNA_class", FTQUAL_ncRNA_class }, + { "number", FTQUAL_number }, + { "old_locus_tag", FTQUAL_old_locus_tag }, + { "operon", FTQUAL_operon }, + { "organism", FTQUAL_organism }, + { "PCR_conditions", FTQUAL_PCR_conditions }, + { "phenotype", FTQUAL_phenotype }, + { "product", FTQUAL_product_quals }, + { "rad_map", FTQUAL_gene_rad_map }, + { "replace", FTQUAL_replace }, + { "rpt_family", FTQUAL_rpt_family }, + { "rpt_type", FTQUAL_rpt_type }, + { "rpt_unit", FTQUAL_rpt_unit }, + { "rpt_unit_range", FTQUAL_rpt_unit_range }, + { "rpt_unit_seq", FTQUAL_rpt_unit_seq }, + { "satellite", FTQUAL_satellite }, + { "standard_name", FTQUAL_standard_name }, + { "tag_peptide", FTQUAL_tag_peptide }, + { "transposon", FTQUAL_transposon }, + { "UniProtKB_evidence", FTQUAL_UniProtKB_evidence }, + { "usedin", FTQUAL_usedin } }; static Int2 GbqualToFeaturIndex ( @@ -689,7 +700,7 @@ NLM_EXTERN Int2 MatchRef ( for (j = 0; j < numReferences; j++) { rbp = rbpp [j]; if (rbp == NULL) continue; - if (MatchCit (ppr, rbp)) return j + 1; + if (MatchCit (ppr, rbp)) return rbp->serial; } return 0; } @@ -1478,7 +1489,28 @@ static ValQual legalGbqualList [] = { { FEATDEF_tmRNA , FTQUAL_operon }, { FEATDEF_tmRNA , FTQUAL_product }, { FEATDEF_tmRNA , FTQUAL_standard_name }, - { FEATDEF_tmRNA , FTQUAL_tag_peptide } + { FEATDEF_tmRNA , FTQUAL_tag_peptide }, + + { FEATDEF_VARIATIONREF , FTQUAL_allele }, + { FEATDEF_VARIATIONREF , FTQUAL_compare }, + { FEATDEF_VARIATIONREF , FTQUAL_frequency }, + { FEATDEF_VARIATIONREF , FTQUAL_label }, + { FEATDEF_VARIATIONREF , FTQUAL_map }, + { FEATDEF_VARIATIONREF , FTQUAL_old_locus_tag }, + { FEATDEF_VARIATIONREF , FTQUAL_phenotype }, + { FEATDEF_VARIATIONREF , FTQUAL_product }, + { FEATDEF_VARIATIONREF , FTQUAL_replace }, + { FEATDEF_VARIATIONREF , FTQUAL_standard_name }, + + { FEATDEF_mobile_element , FTQUAL_allele }, + { FEATDEF_mobile_element , FTQUAL_function }, + { FEATDEF_mobile_element , FTQUAL_label }, + { FEATDEF_mobile_element , FTQUAL_map }, + { FEATDEF_mobile_element , FTQUAL_mobile_element_type }, + { FEATDEF_mobile_element , FTQUAL_old_locus_tag }, + { FEATDEF_mobile_element , FTQUAL_rpt_family }, + { FEATDEF_mobile_element , FTQUAL_rpt_type }, + { FEATDEF_mobile_element , FTQUAL_standard_name } }; /* comparison of ValQual's -- first compare featdef then ftqual */ @@ -2806,6 +2838,84 @@ static Boolean OnlyOneRealGeneral (SeqIdPtr sip) return FALSE; } +static void AddExperimentWithPMIDlinks( + IntAsn2gbJobPtr ajp, + StringItemPtr ffstring, + CharPtr str +) + +{ + Char ch; + Boolean had_pmid; + CharPtr pmid; + CharPtr prefix = "PMID:"; + CharPtr ptr; + + if (! GetWWW (ajp)) { + FFAddOneString (ffstring, str, FALSE, TRUE, TILDE_IGNORE); + return; + } + + if (CommentHasSuspiciousHtml (ajp, str)) { + FFAddOneString (ffstring, str, FALSE, TRUE, TILDE_IGNORE); + return; + } + + while (StringDoesHaveText (str)) { + ptr = StringStr (str, prefix); + if (ptr == NULL) { + FFAddOneString (ffstring, str, FALSE, TRUE, TILDE_IGNORE); + return; + } + *ptr = '\0'; + FFAddOneString (ffstring, str, FALSE, TRUE, TILDE_IGNORE); + ptr += StringLen (prefix); + pmid = ptr; + ch = *ptr; + while (ch == ' ') { + ptr++; + pmid = ptr; + ch = *ptr; + } + while (IS_DIGIT (ch)) { + ptr++; + ch = *ptr; + } + *ptr = '\0'; + + had_pmid = FALSE; + if (StringDoesHaveText (pmid)) { + FFAddOneString (ffstring, prefix, FALSE, TRUE, TILDE_IGNORE); + FFAddOneString (ffstring, "", FALSE, FALSE, TILDE_IGNORE); + FFAddOneString (ffstring, pmid, FALSE, FALSE, TILDE_IGNORE); + FFAddOneString (ffstring, "", FALSE, FALSE, TILDE_IGNORE); + had_pmid = TRUE; + } + + *ptr = ch; + str = ptr; + + prefix = "PMID:"; + ptr = str; + ch = *ptr; + if (had_pmid) { + if (ch == ',') { + ptr++; + ch = *ptr; + while (ch == ' ') { + ptr++; + ch = *ptr; + } + if (IS_DIGIT (ch)) { + prefix = ","; + } + } + } + } +} + static void FormatFeatureBlockQuals ( StringItemPtr ffstring, IntAsn2gbJobPtr ajp, @@ -2847,6 +2957,7 @@ static void FormatFeatureBlockQuals ( Int4 gi; Boolean hadProtDesc = FALSE; DbtagPtr dbt; + DeltaItemPtr dip; UserFieldPtr entry; Int4 exp_ev; GBQualPtr gbq; @@ -2860,6 +2971,9 @@ static void FormatFeatureBlockQuals ( Boolean is_sc; Int2 j; FtQualType jdx; + Int2 k; + Int2 k_lower; + Int2 k_upper; Int4 len; Boolean link_is_na; FloatHi molwt; @@ -2885,10 +2999,11 @@ static void FormatFeatureBlockQuals ( Uint1 residue; SeqCodeTablePtr sctp; Int4 sec_str; + ValNodePtr seq_seq; Uint1 seqcode; Char seqid [50]; - SeqIntPtr sintp; SeqIdPtr sip; + SeqLitPtr slitp; SeqLocPtr slp; Boolean split; CharPtr start; @@ -2898,7 +3013,9 @@ static void FormatFeatureBlockQuals ( tRNAPtr trna; UserFieldPtr ufp; UserObjectPtr uop; - ValNodePtr vnp; + ValNodePtr vnp, vnp2, vnp3; + VariationInstPtr vip; + VariationRefPtr vrp; StringItemPtr unique; Boolean indexerVersion; @@ -3114,13 +3231,8 @@ static void FormatFeatureBlockQuals ( if (str == NULL) continue; if (ajp->flags.dropIllegalQuals) { - tmp = str; - while (*tmp != '\0' && *tmp == '\"') - tmp++; - for (; *tmp != '\0' && *tmp != '\"'; tmp++) { - if (!IS_DIGIT(*tmp) && *tmp != '.' && *tmp != '-') { - okay = FALSE; - } + if (! ECNumberFormatOkay (str, ajp->flags.forGbRelease)) { + okay = FALSE; } } if (!okay) continue; @@ -3192,6 +3304,27 @@ static void FormatFeatureBlockQuals ( } break; + case Qual_class_experiment : + gbq = qvp [idx].gbq; + if (gbq == NULL || (ajp->flags.dropIllegalQuals && + (! AllowedValQual (featdeftype, idx, ajp->flags.forGbRelease)))) break; + if (lasttype == NULL) { + lasttype = gbq->qual; + } + while (gbq != NULL && StringICmp (gbq->qual, lasttype) == 0) { + if (! StringHasNoText (gbq->val)) { + FFAddTextToString(ffstring, "/", asn2gnbk_featur_quals [idx].name, "=\"", + FALSE, TRUE, TILDE_IGNORE); + if (!StringIsJustQuotes (gbq->val)) { + AddExperimentWithPMIDlinks(ajp, ffstring, gbq->val); + } + FFAddOneChar(ffstring, '\"', FALSE); + FFAddOneChar(ffstring, '\n', FALSE); + } + gbq = gbq->next; + } + break; + case Qual_class_noquote : gbq = qvp [idx].gbq; if (gbq == NULL || (ajp->flags.dropIllegalQuals && @@ -3788,32 +3921,22 @@ static void FormatFeatureBlockQuals ( } str = qvp [FTQUAL_trna_aa].str; if (slp != NULL && StringDoesHaveText (str)) { - if (ajp->mode == RELEASE_MODE) { /* !!! quarantined pending collab approval !!! */ - if (slp->choice == SEQLOC_INT) { - sintp = (SeqIntPtr) slp->data.ptrvalue; - if (sintp != NULL) { - sprintf(numbuf, "%ld", (long) sintp->from + 1); - FFAddTextToString (ffstring, "/anticodon=(pos:", numbuf, "..", - FALSE, FALSE, TILDE_IGNORE); - sprintf (numbuf, "%ld", (long) sintp->to + 1); - FFAddTextToString (ffstring, NULL, numbuf, ",", - FALSE, FALSE, TILDE_IGNORE); - FFAddTextToString (ffstring, "aa:", str, ")", - FALSE, FALSE, TILDE_IGNORE); - FFAddOneChar (ffstring, '\n', FALSE); - } - } - } else { - tmp = FFFlatLoc (ajp, target, slp, ajp->masterStyle, FALSE); - if (tmp != NULL) { + tmp = FFFlatLoc (ajp, target, slp, ajp->masterStyle, FALSE); + if (tmp != NULL) { + if (ajp->mode == RELEASE_MODE && + (StringStr (tmp, "join") != NULL || + StringStr (tmp, "order") != NULL || + StringStr (tmp, "complement") != NULL)) { + /* !!! join in anticodon quarantined pending collab approval !!! */ + } else { FFAddTextToString (ffstring, "/anticodon=(pos:", tmp, ",", FALSE, FALSE, TILDE_IGNORE); FFAddTextToString(ffstring, "aa:", str, ")", FALSE, FALSE, TILDE_IGNORE); FFAddOneChar(ffstring, '\n', FALSE); } - MemFree (tmp); } + MemFree (tmp); } if (newloc != NULL) { SeqLocFree (newloc); @@ -3875,10 +3998,21 @@ static void FormatFeatureBlockQuals ( sprintf (numbuf, "%d", (int) j); FFAddOneString(ffstring, "/citation=[", FALSE, TRUE, TILDE_TO_SPACES); pmid = 0; - if (j <= asp->numReferences) { - rbp = asp->referenceArray [j - 1]; - if (rbp != NULL) { - pmid = rbp->pmid; + if( GetWWW (ajp) && asp->numReferences > 0 ) { + /* binary search for reference that matches serial number j */ + k_lower = 0; + k_upper = (asp->numReferences - 1); + while( k_lower <= k_upper ) { + k = (k_upper + k_lower) / 2; + rbp = asp->referenceArray [k]; + if( rbp->serial == j ) { + pmid = rbp->pmid; + break; + } else if( rbp->serial < j ) { + k_lower = (k+1); + } else { /* rbp->serial > j */ + k_upper = (k-1); + } } } if (pmid > 0 && GetWWW (ajp)) { @@ -4011,6 +4145,103 @@ static void FormatFeatureBlockQuals ( } break; + case Qual_class_variation_id : + dbt = qvp [idx].dbt; + if (dbt != NULL) { + buf [0] = '\0'; + if (StringICmp (dbt->db, "dbSNP") == 0) { + oip = dbt->tag; + if (oip != NULL && StringDoesHaveText (oip->str)) { + str = oip->str; + if (StringNICmp (str, "rs", 2) == 0) { + FFAddOneString(ffstring, "/db_xref=\"", FALSE, FALSE, TILDE_IGNORE); + FF_www_db_xref(ajp, ffstring, dbt->db, str + 2, bsp); + FFAddOneString(ffstring, "\"\n", FALSE, FALSE, TILDE_IGNORE); + } + } + } + } + break; + + case Qual_class_delta_item : + for (vnp = qvp [idx].vnp; vnp != NULL; vnp = vnp->next) { + dip = (DeltaItemPtr) vnp->data.ptrvalue; + if (dip == NULL) continue; + seq_seq = dip->Seq_seq; + if (seq_seq != NULL && seq_seq->choice == Seq_seq_literal) { + slitp = (SeqLitPtr) seq_seq->data.ptrvalue; + if (slitp != NULL) { + if (slitp->length > 0 && slitp->seq_data_type != Seq_code_gap && slitp->seq_data != NULL) { + str = (CharPtr) MemNew ((size_t) (slitp->length + 6)); + if (str != NULL) { + SeqPortStreamLit (slitp, 0, (Pointer) str, NULL); + FFAddOneString(ffstring, "/replace=\"", FALSE, FALSE, TILDE_IGNORE); + if (StringDoesHaveText (str)) { + ptr = str; + ch = *ptr; + while (ch != '\0') { + if (IS_UPPER (ch)) { + ch = TO_LOWER (ch); + *ptr = ch; + } + ptr++; + ch = *ptr; + } + FFAddOneString(ffstring, str, FALSE, FALSE, TILDE_IGNORE); + } + FFAddOneString(ffstring, "\"\n", FALSE, FALSE, TILDE_IGNORE); + } + MemFree (str); + } + } + } + } + break; + + case Qual_class_variation_set: + for (vnp = qvp [idx].vnp; vnp != NULL; vnp = vnp->next) { + vrp = (VariationRefPtr) vnp->data.ptrvalue; + if (vrp == NULL) continue; + vnp2 = vrp->data; + if (vnp2 == NULL) continue; + if (vnp2->choice != VarRefData_instance) continue; + vip = (VariationInstPtr) vnp2->data.ptrvalue; + if (vip == NULL) continue; + for (vnp3 = vip->delta; vnp3 != NULL; vnp3 = vnp3->next) { + dip = (DeltaItemPtr) vnp3->data.ptrvalue; + if (dip == NULL) continue; + seq_seq = dip->Seq_seq; + if (seq_seq != NULL && seq_seq->choice == Seq_seq_literal) { + slitp = (SeqLitPtr) seq_seq->data.ptrvalue; + if (slitp != NULL) { + if (slitp->length > 0 && slitp->seq_data_type != Seq_code_gap && slitp->seq_data != NULL) { + str = (CharPtr) MemNew ((size_t) (slitp->length + 6)); + if (str != NULL) { + SeqPortStreamLit (slitp, 0, (Pointer) str, NULL); + FFAddOneString(ffstring, "/replace=\"", FALSE, FALSE, TILDE_IGNORE); + if (StringDoesHaveText (str)) { + ptr = str; + ch = *ptr; + while (ch != '\0') { + if (IS_UPPER (ch)) { + ch = TO_LOWER (ch); + *ptr = ch; + } + ptr++; + ch = *ptr; + } + FFAddOneString(ffstring, str, FALSE, FALSE, TILDE_IGNORE); + } + FFAddOneString(ffstring, "\"\n", FALSE, FALSE, TILDE_IGNORE); + } + MemFree (str); + } + } + } + } + } + break; + case Qual_class_nuc_id : link_is_na = TRUE; /* fall through */ @@ -4332,6 +4563,10 @@ static void FormatFeatureBlockQuals ( for (vnp = qvp [idx].vnp; vnp != NULL; vnp = vnp->next) { str = (CharPtr) vnp->data.ptrvalue; if (str != NULL) { + if (ajp->mode == SEQUIN_MODE) { + if (StringNICmp (str, "/orig_protein_id=", 17) == 0) continue; + if (StringNICmp (str, "/orig_transcript_id=", 20) == 0) continue; + } FFAddOneString(ffstring, str, FALSE, FALSE, TILDE_TO_SPACES); FFAddNewLine(ffstring); } @@ -5025,6 +5260,7 @@ static void FF_asn2gb_www_featkey ( FFAddOneString(ffstring, buf, FALSE, FALSE, TILDE_IGNORE); prefix = "&"; } + /* if ( is_aa ) { FFAddOneString(ffstring, prefix, FALSE, FALSE, TILDE_IGNORE); FFAddOneString(ffstring, "report=gpwithparts", FALSE, FALSE, TILDE_IGNORE); @@ -5032,6 +5268,7 @@ static void FF_asn2gb_www_featkey ( FFAddOneString(ffstring, prefix, FALSE, FALSE, TILDE_IGNORE); FFAddOneString(ffstring, "report=gbwithparts", FALSE, FALSE, TILDE_IGNORE); } + */ FFAddOneString(ffstring, "\">", FALSE, FALSE, TILDE_IGNORE); } @@ -5306,7 +5543,9 @@ static void ParseException ( Uint1 subtype, BoolPtr riboSlipP, BoolPtr transSpliceP, - BoolPtr artLocP + BoolPtr artLocP, + BoolPtr hetPopP, + BoolPtr lowQualP ) { @@ -5321,6 +5560,8 @@ static void ParseException ( *riboSlipP = FALSE; *transSpliceP = FALSE; *artLocP = FALSE; + *hetPopP = FALSE; + *lowQualP = FALSE; if (StringHasNoText (original)) return; @@ -5393,12 +5634,18 @@ static void ParseException ( ValNodeCopyStr (¬e, 0, tmp); } found = TRUE; - } else if (StringICmp (tmp, "heterogeneous population sequenced") == 0 || - StringICmp (tmp, "low-quality sequence region") == 0) { + } else if (StringICmp (tmp, "heterogeneous population sequenced") == 0) { if (subtype == FEATDEF_CDS || subtype == FEATDEF_mRNA) { - *artLocP = TRUE; - ValNodeCopyStr (¬e, 0, tmp); /* also copy to note */ + *hetPopP = TRUE; + } else { + ValNodeCopyStr (¬e, 0, tmp); + } + found = TRUE; + } else if (StringICmp (tmp, "low-quality sequence region") == 0) { + if (subtype == FEATDEF_CDS || + subtype == FEATDEF_mRNA) { + *lowQualP = TRUE; } else { ValNodeCopyStr (¬e, 0, tmp); } @@ -5459,6 +5706,14 @@ static void ParseException ( MemFree (str); } +static CharPtr legalCategoryPrefixes [] = { + "", + "COORDINATES: ", + "DESCRIPTION: ", + "EXISTENCE: ", + NULL +}; + static CharPtr legalInferencePrefixes [] = { "", "similar to sequence", @@ -5487,6 +5742,7 @@ static void ParseInference ( ValNodePtr good = NULL, bad = NULL; GBQualPtr gbq; size_t len; + CharPtr skip, val; *good_inferenceP = NULL; *bad_inferenceP = NULL; @@ -5496,10 +5752,20 @@ static void ParseInference ( for (gbq = quals; gbq != NULL; gbq = gbq->next) { if (StringICmp (gbq->qual, "inference") != 0) continue; if (StringHasNoText (gbq->val)) continue; + val = gbq->val; + skip = NULL; + for (j = 0; legalCategoryPrefixes [j] != NULL; j++) { + len = StringLen (legalCategoryPrefixes [j]); + if (StringNICmp (val, legalCategoryPrefixes [j], len) != 0) continue; + skip = val + len; + } + if (skip != NULL) { + val = skip; + } best = -1; for (j = 0; legalInferencePrefixes [j] != NULL; j++) { len = StringLen (legalInferencePrefixes [j]); - if (StringNICmp (gbq->val, legalInferencePrefixes [j], len) != 0) continue; + if (StringNICmp (val, legalInferencePrefixes [j], len) != 0) continue; best = j; } if (best >= 0 && legalInferencePrefixes [best] != NULL) { @@ -5563,6 +5829,8 @@ static SeqFeatPtr GetOverlappingGeneInEntity ( SeqMgrFeatContextPtr fcontext, SeqMgrFeatContextPtr gcontext, SeqLocPtr locforgene, + Boolean is_ed, + Boolean is_oldgb, IntAsn2gbJobPtr ajp ) @@ -5616,10 +5884,10 @@ static SeqFeatPtr GetOverlappingGeneInEntity ( } } else { if (fcontext->bad_order || fcontext->mixed_strand) { - gene = SeqMgrGetOverlappingFeature (locforgene, FEATDEF_GENE, NULL, 0, NULL, LOCATION_SUBSET, gcontext); + gene = SeqMgrGetOverlappingFeatureEx (locforgene, FEATDEF_GENE, NULL, 0, NULL, LOCATION_SUBSET, gcontext, TRUE); } else if (ajp->multiIntervalGenes) { - gene = SeqMgrGetOverlappingFeature (locforgene, FEATDEF_GENE, NULL, 0, NULL, LOCATION_SUBSET, gcontext); - if (gene == NULL) { + gene = SeqMgrGetOverlappingFeatureEx (locforgene, FEATDEF_GENE, NULL, 0, NULL, LOCATION_SUBSET, gcontext, TRUE); + if (gene == NULL && (ajp->segmentedBioseqs || is_ed || is_oldgb)) { gene = SeqMgrGetOverlappingGene (locforgene, gcontext); } } else { @@ -5693,7 +5961,118 @@ static CharPtr SeqLoc2Str ( } */ +static CharPtr AddJsPush ( + BioseqPtr target, + SeqLocPtr location +) + +{ + ValNodePtr head = NULL, tail = NULL; + IntFuzzPtr ifp; + SeqLocPtr slp; + SeqPntPtr spp; + Int4 start, stop; + Char str [64]; + CharPtr tmp; + + if (target == NULL || location == NULL) return NULL; + + if (location->choice == SEQLOC_PNT) { + spp = (SeqPntPtr) location->data.ptrvalue; + if (spp != NULL) { + ifp = spp->fuzz; + if (ifp != NULL && ifp->choice == 4 && ifp->a == 3) { + sprintf (str, "[[%ld, %ld]]", (long) (spp->point + 1), (long) (spp->point + 2)); + return StringSave (str); + } + } + } + + slp = SeqLocFindNext (location, NULL); + if (slp == NULL) return NULL; + + start = GetOffsetInBioseq (slp, target, SEQLOC_START) + 1; + stop = GetOffsetInBioseq (slp, target, SEQLOC_STOP) + 1; + sprintf (str, "[%ld, %ld]", (long) start, (long) stop); + ValNodeCopyStrEx (&head, &tail, 0, str); + + while ((slp = SeqLocFindNext (location, slp)) != NULL) { + start = GetOffsetInBioseq (slp, target, SEQLOC_START) + 1; + stop = GetOffsetInBioseq (slp, target, SEQLOC_STOP) + 1; + if (start != 0 && stop != 0) { + sprintf (str, "[%ld, %ld]", (long) start, (long) stop); + ValNodeCopyStrEx (&head, &tail, 0, str); + } + } + + tmp = ValNodeMergeStrsExEx (head, ",", "[", "]"); + ValNodeFreeData (head); + + return tmp; +} + +static CharPtr AddJsInterval ( + IntAsn2gbSectPtr iasp, + CharPtr pfx, + BioseqPtr target, + Uint1 featdeftype, + SeqLocPtr location +) + +{ + Char buf [512]; + ValNodePtr head = NULL, tail = NULL; + CharPtr ivls; + CharPtr key = NULL; + CharPtr tmp; + + if (iasp == NULL || target == NULL || location == NULL) return NULL; + if (featdeftype >= FEATDEF_MAX) return NULL; + + if (StringICmp (iasp->feat_key [featdeftype], "misc_feature") == 0) { + featdeftype = FEATDEF_misc_feature; + if (iasp->feat_key [featdeftype] == NULL) { + iasp->feat_key [featdeftype] = StringSave ("misc_feature"); + } + } + + key = iasp->feat_key [featdeftype]; + if (StringHasNoText (key)) return NULL; + + if (StringDoesHaveText (pfx)) { + ValNodeCopyStrEx (&head, &tail, 0, pfx); + } + + ValNodeCopyStrEx (&head, &tail, 0, ""); + + tmp = ValNodeMergeStrs (head); + ValNodeFreeData (head); + return tmp; +} + static CharPtr FormatFeatureBlockEx ( + Asn2gbFormatPtr afp, IntAsn2gbJobPtr ajp, Asn2gbSectPtr asp, BioseqPtr bsp, @@ -5723,11 +6102,15 @@ static CharPtr FormatFeatureBlockEx ( Char ch; Uint1 code = Seq_code_ncbieaa; CdRegionPtr crp; + Int4 currGi = 0; SeqMgrDescContext dcontext; Boolean encode_prefix = FALSE; CharPtr exception_note = NULL; CharPtr exception_string = NULL; + Char fbuf [32]; Uint1 featdeftype; + CharPtr featid = NULL; + ObjectIdPtr fid = NULL; Uint1 from; GBQualPtr gbq; GBFeaturePtr gbfeat = NULL; @@ -5738,6 +6121,8 @@ static CharPtr FormatFeatureBlockEx ( ValNodePtr gene_syn = NULL; ValNodePtr good_inference = NULL; GeneRefPtr grp = NULL; + Boolean hetPop = FALSE; + IntAsn2gbSectPtr iasp; IntCdsBlockPtr icp; Uint2 idx; ValNodePtr illegal = NULL; @@ -5747,11 +6132,13 @@ static CharPtr FormatFeatureBlockEx ( Boolean is_ged = FALSE; Boolean is_gps = FALSE; Boolean is_journalscan = FALSE; + Boolean is_oldgb = FALSE; Boolean is_other = FALSE; Boolean is_misc_rna = FALSE; Boolean isGap = FALSE; Uint4 itemID; CharPtr its_prod = NULL; + CharPtr js = NULL; CharPtr key = NULL; CharPtr lasttype = NULL; Int4 left = -1; @@ -5759,6 +6146,7 @@ static CharPtr FormatFeatureBlockEx ( SeqLocPtr location = NULL; SeqLocPtr locforgene = NULL; SeqLocPtr locformatpep = NULL; + Boolean lowQual = FALSE; SeqMgrFeatContext mcontext; MolInfoPtr mip; SeqFeatPtr mrna; @@ -5771,6 +6159,7 @@ static CharPtr FormatFeatureBlockEx ( SeqFeatPtr operon = NULL; Uint2 partial; SeqMgrFeatContext pcontext; + Char pfx [128], sfx [128]; BioseqPtr prd; CharPtr precursor_comment = NULL; BioseqPtr prod = NULL; @@ -5805,9 +6194,13 @@ static CharPtr FormatFeatureBlockEx ( CharPtr tmp; Boolean transSplice = FALSE; tRNAPtr trna; + TextSeqIdPtr tsip; UserFieldPtr ufp; BioseqPtr unlockme = NULL; UserObjectPtr uop; + VariationInstPtr vip; + VariationRefPtr vrp; + VarRefDataSetPtr vsp; ValNodePtr vnp; StringItemPtr ffstring; /* @@ -5832,7 +6225,10 @@ static CharPtr FormatFeatureBlockEx ( } else { gbseq = NULL; } - + + pfx [0] = '\0'; + sfx [0] = '\0'; + protein_pid_g [0] = '\0'; itemID = fcontext->itemID; @@ -5927,7 +6323,11 @@ static CharPtr FormatFeatureBlockEx ( locforgene = sfp->location; } if (location == NULL) return NULL; - + + if (loc != NULL) { + NormalizeNullsBetween (loc); + } + sep = GetTopSeqEntryForEntityID (ajp->ajp.entityID); if (sep != NULL && IS_Bioseq_set (sep)) { bssp = (BioseqSetPtr) sep->data.ptrvalue; @@ -5948,20 +6348,21 @@ static CharPtr FormatFeatureBlockEx ( is_journalscan = TRUE; break; case SEQID_GENBANK : + case SEQID_TPG : is_ged = TRUE; + tsip = (TextSeqIdPtr) sip->data.ptrvalue; + if (tsip != NULL) { + if (StringLen (tsip->accession) == 6) { + is_oldgb = TRUE; + } + } break; case SEQID_EMBL : + case SEQID_TPE : is_ged = TRUE; is_ed = TRUE; break; case SEQID_DDBJ : - is_ged = TRUE; - is_ed = TRUE; - break; - case SEQID_TPG : - is_ged = TRUE; - break; - case SEQID_TPE : case SEQID_TPD : is_ged = TRUE; is_ed = TRUE; @@ -6007,11 +6408,13 @@ static CharPtr FormatFeatureBlockEx ( key = "misc_feature"; } } + /* if (featdeftype == FEATDEF_VARIATIONREF) { if (ajp->mode == RELEASE_MODE || ajp->mode == ENTREZ_MODE) { key = "misc_feature"; } } + */ /* deal with unmappable impfeats */ @@ -6027,7 +6430,27 @@ static CharPtr FormatFeatureBlockEx ( key = "repeat_region"; } + for (sip = bsp->id; sip != NULL; sip = sip->next) { + if (sip->choice == SEQID_GI) { + currGi = (Int4) sip->data.intvalue; + } + } + + iasp = (IntAsn2gbSectPtr) asp; + + if (iasp != NULL && GetWWW (ajp) && ajp->mode == ENTREZ_MODE && featdeftype < FEATDEF_MAX) { + if (iasp->feat_key [featdeftype] == NULL) { + iasp->feat_key [featdeftype] = StringSave (key); + } + } + + if (afp != NULL && GetWWW (ajp) && ajp->mode == ENTREZ_MODE && + (ajp->format == GENBANK_FMT || ajp->format == GENPEPT_FMT)) { + sprintf (pfx, "", (long) currGi, key, (long) ifp->feat_count); + } + FFStartPrint(ffstring, format, 5, 21, NULL, 0, 5, 21, "FT", /* ifp->firstfeat */ FALSE); + if (ajp->ajp.slp != NULL) { FFAddOneString(ffstring, key, FALSE, FALSE, TILDE_IGNORE); } else if ( GetWWW(ajp) && StringICmp (key, "gap") != 0 && bsp != NULL /* && SeqMgrGetParentOfPart (bsp, NULL) == NULL */ ) { @@ -6047,7 +6470,6 @@ static CharPtr FormatFeatureBlockEx ( if (imp == NULL || StringHasNoText (imp->loc)) { - if (ajp->ajp.slp != NULL) { sip = SeqIdParse ("lcl|dummy"); left = GetOffsetInBioseq (ajp->ajp.slp, bsp, SEQLOC_LEFT_END); @@ -6059,7 +6481,9 @@ static CharPtr FormatFeatureBlockEx ( newloc = SeqLocCopyRegion (sip, location, bsp, left, right, strand, &split); */ SeqIdFree (sip); - if (newloc == NULL) return NULL; + if (newloc == NULL) { + return NULL; + } /* firstloc = SeqLoc2Str (newloc); */ @@ -6068,6 +6492,9 @@ static CharPtr FormatFeatureBlockEx ( secondloc = SeqLoc2Str (newloc); */ str = FFFlatLoc (ajp, target, newloc, ajp->masterStyle, isGap); + if (iasp != NULL && GetWWW (ajp) && ajp->mode == ENTREZ_MODE && featdeftype < FEATDEF_MAX) { + js = AddJsInterval (iasp, pfx, target, featdeftype, newloc); + } SeqLocFree (newloc); /* thirdloc = SeqLoc2Str (ajp->ajp.slp); @@ -6079,6 +6506,9 @@ static CharPtr FormatFeatureBlockEx ( */ } else { str = FFFlatLoc (ajp, target, location, ajp->masterStyle, isGap); + if (iasp != NULL && GetWWW (ajp) && ajp->mode == ENTREZ_MODE && featdeftype < FEATDEF_MAX) { + js = AddJsInterval (iasp, pfx, target, featdeftype, location); + } /* if (StringCmp (str, "?") == 0) { firstloc = SeqLoc2Str (location); @@ -6229,7 +6659,15 @@ static CharPtr FormatFeatureBlockEx ( /* if mat_peptide, grp is already be set based on parent CDS, otherwise check current feature */ if (grp == NULL) { - grp = SeqMgrGetGeneXref (sfp); + grp = SeqMgrGetGeneXrefEx (sfp, &fid); + if (fid != NULL) { + if (StringDoesHaveText (fid->str)) { + featid = fid->str; + } else { + sprintf (fbuf, "%ld", (long) fid->id); + featid = fbuf; + } + } } /* if gene xref, then find referenced gene, take everything as if it overlapped */ @@ -6243,7 +6681,9 @@ static CharPtr FormatFeatureBlockEx ( } bspx = BioseqFindFromSeqLoc (sfp->location); if (bspx != NULL) { - if (StringDoesHaveText (grp->locus_tag)) { + if (featid != NULL) { + gene = SeqMgrGetFeatureByFeatID (0, bspx, featid, NULL, &gcontext); + } else if (StringDoesHaveText (grp->locus_tag)) { gene = SeqMgrGetGeneByLocusTag (bspx, grp->locus_tag, &gcontext); } else if (StringDoesHaveText (grp->locus)) { gene = SeqMgrGetFeatureByLabel (bspx, grp->locus, SEQFEAT_GENE, 0, &gcontext); @@ -6268,9 +6708,9 @@ static CharPtr FormatFeatureBlockEx ( /* first look for gene that exactly matches mat_peptide DNA projection */ if (gene == NULL && grp == NULL && locformatpep != NULL) { - gene = GetOverlappingGeneInEntity (ajp->ajp.entityID, fcontext, &gcontext, locformatpep, ajp); + gene = GetOverlappingGeneInEntity (ajp->ajp.entityID, fcontext, &gcontext, locformatpep, is_ed, is_oldgb, ajp); if (gene == NULL && ajp->ajp.entityID != sfp->idx.entityID) { - gene = GetOverlappingGeneInEntity (sfp->idx.entityID, fcontext, &gcontext, locformatpep, ajp); + gene = GetOverlappingGeneInEntity (sfp->idx.entityID, fcontext, &gcontext, locformatpep, is_ed, is_oldgb, ajp); } if (gene != NULL) { @@ -6297,9 +6737,9 @@ static CharPtr FormatFeatureBlockEx ( if (gene == NULL && grp == NULL) { if (featdeftype != FEATDEF_primer_bind) { - gene = GetOverlappingGeneInEntity (ajp->ajp.entityID, fcontext, &gcontext, locforgene, ajp); + gene = GetOverlappingGeneInEntity (ajp->ajp.entityID, fcontext, &gcontext, locforgene, is_ed, is_oldgb, ajp); if (gene == NULL && ajp->ajp.entityID != sfp->idx.entityID) { - gene = GetOverlappingGeneInEntity (sfp->idx.entityID, fcontext, &gcontext, locforgene, ajp); + gene = GetOverlappingGeneInEntity (sfp->idx.entityID, fcontext, &gcontext, locforgene, is_ed, is_oldgb, ajp); } } @@ -6333,7 +6773,9 @@ static CharPtr FormatFeatureBlockEx ( pseudo = TRUE; } - if (grp != NULL && (featdeftype != FEATDEF_repeat_region || is_ed || gene == NULL)) { + if (grp != NULL && + ((featdeftype != FEATDEF_repeat_region && featdeftype != FEATDEF_mobile_element) || + is_ed || gene == NULL)) { if (! StringHasNoText (grp->locus)) { qvp [FTQUAL_gene].str = grp->locus; qvp [FTQUAL_locus_tag].str = grp->locus_tag; @@ -6361,11 +6803,11 @@ static CharPtr FormatFeatureBlockEx ( } if (grp != NULL && featdeftype != FEATDEF_variation && - (featdeftype != FEATDEF_repeat_region || is_ed)) { + ((featdeftype != FEATDEF_repeat_region && featdeftype != FEATDEF_mobile_element) || is_ed)) { qvp [FTQUAL_gene_allele].str = grp->allele; /* now propagating /allele */ } - if (gene != NULL && (featdeftype != FEATDEF_repeat_region || is_ed)) { + if (gene != NULL && ((featdeftype != FEATDEF_repeat_region && featdeftype != FEATDEF_mobile_element) || is_ed)) { /* now propagate old_locus_tag to almost any underlying feature */ for (gbq = gene->qual; gbq != NULL; gbq = gbq->next) { if (StringHasNoText (gbq->val)) continue; @@ -6977,7 +7419,9 @@ static CharPtr FormatFeatureBlockEx ( if (StringICmp (oip->str, "definition") == 0) { str = (CharPtr) ufp->data.ptrvalue; if (StringDoesHaveText (str)) { - qvp [FTQUAL_cdd_definition].str = str; + if (StringICmp (str, (CharPtr) sfp->data.value.ptrvalue) != 0) { + qvp [FTQUAL_cdd_definition].str = str; + } } } } @@ -7018,6 +7462,26 @@ static CharPtr FormatFeatureBlockEx ( case SEQFEAT_HET : qvp [FTQUAL_heterogen].str = (CharPtr) sfp->data.value.ptrvalue; break; + case SEQFEAT_VARIATIONREF : + vrp = (VariationRefPtr) sfp->data.value.ptrvalue; + if (vrp != NULL) { + qvp [FTQUAL_variation_id].dbt = vrp->id; + vnp = vrp->data; + if (vnp != NULL) { + if (vnp->choice == VarRefData_instance) { + vip = (VariationInstPtr) vnp->data.ptrvalue; + if (vip != NULL) { + qvp [FTQUAL_delta_item].vnp = vip->delta; + } + } else if (vnp->choice == VarRefData_set) { + vsp = (VarRefDataSetPtr) vnp->data.ptrvalue; + if (vsp != NULL) { + qvp [FTQUAL_variation_set].vnp = vsp->variations; + } + } + } + } + break; default : break; } @@ -7038,7 +7502,7 @@ static CharPtr FormatFeatureBlockEx ( qvp [FTQUAL_go_function].ufp = NULL; } - if (featdeftype == FEATDEF_repeat_region) { + if (featdeftype == FEATDEF_repeat_region || featdeftype == FEATDEF_mobile_element) { pseudo = FALSE; } @@ -7098,6 +7562,7 @@ static CharPtr FormatFeatureBlockEx ( case FEATDEF_misc_signal: case FEATDEF_misc_structure: case FEATDEF_modified_base: + case FEATDEF_mobile_element: case FEATDEF_mutation: case FEATDEF_old_sequence: case FEATDEF_polyA_signal: @@ -7146,13 +7611,21 @@ static CharPtr FormatFeatureBlockEx ( sfp->idx.subtype, &riboSlippage, &transSplice, - &artLoc); + &artLoc, + &hetPop, + &lowQual); qvp [FTQUAL_exception].str = exception_string; qvp [FTQUAL_exception_note].str = exception_note; qvp [FTQUAL_ribosomal_slippage].ble = riboSlippage; qvp [FTQUAL_trans_splicing].ble = transSplice; qvp [FTQUAL_artificial_location].ble = artLoc; + if (hetPop) { + qvp [FTQUAL_artificial_location_str].str = "heterogeneous population sequenced"; + } + if (lowQual) { + qvp [FTQUAL_artificial_location_str].str = "low-quality sequence region"; + } /* if (StringHasNoText (qvp [FTQUAL_exception].str)) { @@ -7512,7 +7985,20 @@ static CharPtr FormatFeatureBlockEx ( BioseqUnlock (unlockme); - str = FFEndPrint (ajp, ffstring, format, 21, 21, 21, 21, "FT"); + if (afp != NULL && GetWWW (ajp) && ajp->mode == ENTREZ_MODE && + (ajp->format == GENBANK_FMT || ajp->format == GENPEPT_FMT)) { + sprintf (sfx, ""); + } + + str = NULL; + + if (js != NULL) { + str = FFEndPrintEx (ajp, ffstring, format, 21, 21, 21, 21, "FT", js, sfx); + } else { + str = FFEndPrintEx (ajp, ffstring, format, 21, 21, 21, 21, "FT", pfx, sfx); + } + + MemFree (js); /* optionally populate gbseq for XML-ized GenBank format */ @@ -7572,7 +8058,7 @@ NLM_EXTERN CharPtr FormatFeatureBlock ( /* otherwise do regular flatfile formatting */ - return FormatFeatureBlockEx (ajp, asp, bsp, target, sfp, &fcontext, qvp, + return FormatFeatureBlockEx (afp, ajp, asp, bsp, target, sfp, &fcontext, qvp, format, (IntFeatBlockPtr) bbp, ISA_aa (bsp->mol), TRUE); } @@ -7742,7 +8228,7 @@ NLM_EXTERN void DoImmediateRemoteFeatureFormat ( oldscope = SeqEntrySetScope (sep); if (ajp->format != FTABLE_FMT) { - str = FormatFeatureBlockEx (ajp, asp, bsp, target, sfp, &fcontext, qvp, + str = FormatFeatureBlockEx (afp, ajp, asp, bsp, target, sfp, &fcontext, qvp, ajp->format, (IntFeatBlockPtr) bbp, ISA_aa (bsp->mol), TRUE); } @@ -7801,7 +8287,7 @@ NLM_EXTERN CharPtr FormatFeatureQuals ( qvp = MemNew (sizeof (QualVal) * (max + 5)); if (qvp == NULL) return NULL; - str = FormatFeatureBlockEx (ajp, NULL, NULL, NULL, sfp, &fcontext, qvp, + str = FormatFeatureBlockEx (NULL, ajp, NULL, NULL, NULL, sfp, &fcontext, qvp, GENBANK_FMT, ifp, FALSE, FALSE); MemFree (qvp); diff --git a/api/asn2gnb5.c b/api/asn2gnb5.c index 724eab36..d311080e 100644 --- a/api/asn2gnb5.c +++ b/api/asn2gnb5.c @@ -30,7 +30,7 @@ * * Version Creation Date: 10/21/98 * -* $Revision: 1.171 $ +* $Revision: 1.185 $ * * File Description: New GenBank flatfile generator - work in progress * @@ -149,6 +149,7 @@ static UrlData Nlm_url_base [] = { {"dictyBase", "http://dictybase.org/db/cgi-bin/gene_page.pl?dictybaseid="}, {"ECOCYC", "http://biocyc.org/ECOLI/new-image?type=GENE&object="}, {"EcoGene", "http://ecogene.org/geneInfo.php?eg_id="}, + {"ENSEMBL", "http://www.ensembl.org/id/"}, {"ERIC", "http://www.ericbrc.org/genbank/dbxref/"}, {"FANTOM_DB", "http://fantom.gsc.riken.jp/db/annotate/main.cgi?masterid="}, {"FLYBASE", "http://flybase.bio.indiana.edu/.bin/fbidq.html?"}, @@ -165,6 +166,7 @@ static UrlData Nlm_url_base [] = { {"HOMD", "http://www.homd.org/"}, {"HPRD", "http://www.hprd.org/protein/"}, {"HSSP", "http://srs.ebi.ac.uk/srsbin/cgi-bin/wgetz?-newId+-e+hssp-ID:"}, + {"IKMC", "http://www.knockoutmouse.org/martsearch/project/"}, {"IMGT/GENE-DB", "http://imgt.cines.fr/cgi-bin/GENElect.jv?species=Homo+sapiens&query=2+"}, {"IMGT/LIGM", "http://imgt.cines.fr:8104/cgi-bin/IMGTlect.jv?query=202+"}, {"InterimID", "http://www.ncbi.nlm.nih.gov/LocusLink/LocRpt.cgi?l="}, @@ -175,7 +177,7 @@ static UrlData Nlm_url_base [] = { {"JCM", "http://www.jcm.riken.go.jp/cgi-bin/jcm/jcm_number?JCM="}, {"JGIDB", "http://genome.jgi-psf.org/cgi-bin/jgrs?id="}, {"LocusID", "http://www.ncbi.nlm.nih.gov/LocusLink/LocRpt.cgi?l="}, - {"MaizeGDB", "http://www.maizegdb.org/supersearch.php?show=loc&pattern="}, + {"MaizeGDB", "http://www.maizegdb.org/cgi-bin/displaylocusrecord.cgi?"}, {"MGI", "http://www.informatics.jax.org/searches/accession_report.cgi?id=MGI:"}, {"MIM", "http://www.ncbi.nlm.nih.gov/entrez/dispomim.cgi?id="}, {"miRBase", "http://www.mirbase.org/cgi-bin/mirna_entry.pl?acc="}, @@ -198,7 +200,7 @@ static UrlData Nlm_url_base [] = { {"RATMAP", "http://ratmap.gen.gu.se/ShowSingleLocus.htm?accno="}, {"REBASE", "http://rebase.neb.com/rebase/enz/"}, {"RFAM", "http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?"}, - {"RGD", "http://rgd.mcw.edu/query/query.cgi?id="}, + {"RGD", "http://rgd.mcw.edu/generalSearch/RgdSearch.jsp?quickSearch=1&searchKeyword="}, {"RiceGenes", "http://ars-genome.cornell.edu/cgi-bin/WebAce/webace?db=ricegenes&class=Marker&object="}, {"SEED", "http://www.theseed.org/linkin.cgi?id="}, {"SGD", "http://db.yeastgenome.org/cgi-bin/SGD/locus.pl?locus="}, @@ -262,9 +264,7 @@ static void FF_www_get_url ( { CharPtr base = NULL, prefix = NULL, profix = NULL, ident = NULL, suffix = NULL, url = NULL, ptr, str; Char ch, buf [128], id [20], taxname [128]; - /* Boolean is_numeric; - */ Int2 R; if (ffstring == NULL || StringHasNoText (db) || StringHasNoText (identifier)) return; @@ -369,7 +369,6 @@ static void FF_www_get_url ( url = "http://www.jbirc.aist.go.jp/hinv/hinvsys/servlet/ExecServlet?KEN_INDEX=0&KEN_TYPE=31&KEN_STR="; } - } else if (StringCmp (db, "HOMD") == 0) { if (StringStr (identifier, "tax_") != NULL ) { @@ -391,6 +390,31 @@ static void FF_www_get_url ( } } + } else if (StringCmp (db, "MaizeGDB") == 0) { + + is_numeric = TRUE; + str = identifier; + ch = *str; + while (ch != '\0') { + if (! IS_DIGIT (ch)) { + is_numeric = FALSE; + } + str++; + ch = *str; + } + + if (is_numeric) { + prefix = "id="; + } else { + prefix = "term="; + } + + } else if (StringCmp (db, "miRBase") == 0) { + + if (StringStr (identifier, "MIMAT") != NULL) { + url = "http://www.mirbase.org/cgi-bin/mature.pl?mature_acc="; + } + } else if (StringCmp (db, "niaEST") == 0) { suffix = "&val=1"; @@ -1538,7 +1562,7 @@ static CharPtr FormatCitJour ( if (dp->data [1] != 0) { sprintf (year, " (%ld)", (long) (1900 + dp->data [1])); } - } else { + } else if (StringDoesHaveText (dp->str) && StringCmp (dp->str, "?") != 0) { StringCpy (year, " ("); StringNCat (year, dp->str, 4); StringCat (year, ")"); @@ -2427,6 +2451,7 @@ static CharPtr FormatCitPat ( static CharPtr FormatCitGen ( FmtType format, Boolean dropBadCitGens, + Boolean is_ed, Boolean noAffilOnUnpub, CitGenPtr cgp ) @@ -2452,7 +2477,7 @@ static CharPtr FormatCitGen ( /* !!! temporarily put date in unpublished citation for QA !!! */ - if (dropBadCitGens) { + if (dropBadCitGens && is_ed) { year [0] = '\0'; dp = cgp->date; if (dp != NULL) { @@ -2560,7 +2585,7 @@ static CharPtr FormatCitGen ( } if (! StringHasNoText (pages)) { - if (format == GENBANK_FMT) { + if (format == GENBANK_FMT || format == GENPEPT_FMT) { AddValNodeString (&head, ", ", pages, NULL); } else if (format == EMBL_FMT) { AddValNodeString (&head, ":", pages, NULL); @@ -2635,6 +2660,7 @@ static CharPtr GetPubJournal ( FmtType format, ModType mode, Boolean dropBadCitGens, + Boolean is_ed, Boolean noAffilOnUnpub, Boolean citArtIsoJta, PubdescPtr pdp, @@ -2668,7 +2694,7 @@ static CharPtr GetPubJournal ( break; /* skip just serial number */ } } - journal = FormatCitGen (format, dropBadCitGens, noAffilOnUnpub, cgp); + journal = FormatCitGen (format, dropBadCitGens, is_ed, noAffilOnUnpub, cgp); } break; case PUB_Sub : @@ -3154,6 +3180,7 @@ NLM_EXTERN CharPtr FormatReferenceBlock ( ImprintPtr imp; IndxPtr index; IntRefBlockPtr irp; + Boolean is_ed = FALSE; size_t len; SeqLocPtr loc = NULL; MedlineEntryPtr mep; @@ -3334,7 +3361,7 @@ NLM_EXTERN CharPtr FormatReferenceBlock ( } else if (afp->format == EMBL_FMT || afp->format == EMBLPEPT_FMT) { if (rbp->sites == 0) { - FFLineWrap(ffstring, temp, 0, 5, ASN2FF_EMBL_MAX, "RN"); + FFLineWrap(ajp, ffstring, temp, 0, 5, ASN2FF_EMBL_MAX, "RN"); FFRecycleString(ajp, temp); temp = FFGetString(ajp); FFStartPrint(temp, afp->format, 0, 0, NULL, 0, 5, 5, "RP", FALSE); @@ -3345,15 +3372,15 @@ NLM_EXTERN CharPtr FormatReferenceBlock ( if (afp->format == GENBANK_FMT || afp->format == GENPEPT_FMT) { FFAddOneString (temp, "(sites)", FALSE, FALSE, TILDE_TO_SPACES); - FFLineWrap(ffstring, temp, 12, 12, ASN2FF_GB_MAX, NULL); + FFLineWrap(ajp, ffstring, temp, 12, 12, ASN2FF_GB_MAX, NULL); } else { - FFLineWrap(ffstring, temp, 5, 5, ASN2FF_EMBL_MAX, "RP"); + FFLineWrap(ajp, ffstring, temp, 5, 5, ASN2FF_EMBL_MAX, "RP"); } } else if (rbp->sites == 3) { if (afp->format == GENBANK_FMT || afp->format == GENPEPT_FMT) { - FFLineWrap(ffstring, temp, 12, 12, ASN2FF_GB_MAX, NULL); + FFLineWrap(ajp, ffstring, temp, 12, 12, ASN2FF_GB_MAX, NULL); } else { - FFLineWrap(ffstring, temp, 5, 5, ASN2FF_EMBL_MAX, "RP"); + FFLineWrap(ajp, ffstring, temp, 5, 5, ASN2FF_EMBL_MAX, "RP"); } } else { if (afp->format == GENBANK_FMT || afp->format == GENPEPT_FMT) { @@ -3410,9 +3437,9 @@ NLM_EXTERN CharPtr FormatReferenceBlock ( FFAddOneString (temp, ")", FALSE, FALSE, TILDE_TO_SPACES); } if (afp->format == GENBANK_FMT || afp->format == GENPEPT_FMT) { - FFLineWrap(ffstring, temp, 12, 12, ASN2FF_GB_MAX, NULL); + FFLineWrap(ajp, ffstring, temp, 12, 12, ASN2FF_GB_MAX, NULL); } else { - FFLineWrap(ffstring, temp, 5, 5, ASN2FF_EMBL_MAX, "RP"); + FFLineWrap(ajp, ffstring, temp, 5, 5, ASN2FF_EMBL_MAX, "RP"); } } @@ -3461,9 +3488,9 @@ NLM_EXTERN CharPtr FormatReferenceBlock ( } if (afp->format == GENBANK_FMT || afp->format == GENPEPT_FMT) { - FFLineWrap(ffstring, temp, 12, 12, ASN2FF_GB_MAX, NULL); + FFLineWrap(ajp, ffstring, temp, 12, 12, ASN2FF_GB_MAX, NULL); } else { - FFLineWrap(ffstring, temp, 5, 5, ASN2FF_EMBL_MAX, "RA"); + FFLineWrap(ajp, ffstring, temp, 5, 5, ASN2FF_EMBL_MAX, "RA"); } } MemFree (str); @@ -3476,9 +3503,9 @@ NLM_EXTERN CharPtr FormatReferenceBlock ( FFStartPrint (temp, afp->format, 2, 12, "CONSRTM", 12, 5, 5, "RG", FALSE); FFAddTextToString (temp, NULL, consortium, suffix, FALSE, FALSE, TILDE_TO_SPACES); if (afp->format == GENBANK_FMT || afp->format == GENPEPT_FMT) { - FFLineWrap(ffstring, temp, 12, 12, ASN2FF_GB_MAX, NULL); + FFLineWrap(ajp, ffstring, temp, 12, 12, ASN2FF_GB_MAX, NULL); } else { - FFLineWrap(ffstring, temp, 5, 5, ASN2FF_EMBL_MAX, "RG"); + FFLineWrap(ajp, ffstring, temp, 5, 5, ASN2FF_EMBL_MAX, "RG"); } } MemFree (consortium); @@ -3509,7 +3536,7 @@ NLM_EXTERN CharPtr FormatReferenceBlock ( FFStartPrint (temp, afp->format, 2, 12, "TITLE", 12, 5, 5, "RT", FALSE); FFAddTextToString (temp, prefix, str, suffix, FALSE, FALSE, TILDE_TO_SPACES); - FFLineWrap(ffstring, temp, 12, 12, ASN2FF_GB_MAX, NULL); + FFLineWrap(ajp, ffstring, temp, 12, 12, ASN2FF_GB_MAX, NULL); } } else if (afp->format == EMBL_FMT || afp->format == EMBLPEPT_FMT) { FFStartPrint (temp, afp->format, 2, 12, "TITLE", 12, 5, 5, "RT", FALSE); @@ -3520,7 +3547,7 @@ NLM_EXTERN CharPtr FormatReferenceBlock ( } else { FFAddOneChar (temp, ';', FALSE); } - FFLineWrap(ffstring, temp, 5, 5, ASN2FF_EMBL_MAX, "RT"); + FFLineWrap(ajp, ffstring, temp, 5, 5, ASN2FF_EMBL_MAX, "RT"); } if (gbseq != NULL) { @@ -3551,14 +3578,17 @@ NLM_EXTERN CharPtr FormatReferenceBlock ( sip->choice == SEQID_TPD) { strict_isojta = TRUE; } + if (sip->choice == SEQID_EMBL || sip->choice == SEQID_DDBJ) { + is_ed = TRUE; + } } if (! strict_isojta) { citArtIsoJta = FALSE; } str = GetPubJournal (afp->format, ajp->mode, ajp->flags.dropBadCitGens, - ajp->flags.noAffilOnUnpub, citArtIsoJta, pdp, csp, - bsp->id, index, ajp); + is_ed, ajp->flags.noAffilOnUnpub, citArtIsoJta, + pdp, csp, bsp->id, index, ajp); if (str == NULL) { str = StringSave ("Unpublished"); } @@ -3598,9 +3628,9 @@ NLM_EXTERN CharPtr FormatReferenceBlock ( MemFree (str); if (afp->format == GENBANK_FMT || afp->format == GENPEPT_FMT) { - FFLineWrap(ffstring, temp, 12, 12, ASN2FF_GB_MAX, NULL); + FFLineWrap(ajp, ffstring, temp, 12, 12, ASN2FF_GB_MAX, NULL); } else { - FFLineWrap(ffstring, temp, 5, 5, ASN2FF_EMBL_MAX, "RL"); + FFLineWrap(ajp, ffstring, temp, 5, 5, ASN2FF_EMBL_MAX, "RL"); } if (gbseq != NULL) { @@ -3643,11 +3673,11 @@ NLM_EXTERN CharPtr FormatReferenceBlock ( if (afp->format == GENBANK_FMT || afp->format == GENPEPT_FMT) { FF_www_muid (ajp, temp, muid); - FFLineWrap(ffstring, temp, 12, 12, ASN2FF_GB_MAX, NULL); + FFLineWrap(ajp, ffstring, temp, 12, 12, ASN2FF_GB_MAX, NULL); } else if (afp->format == EMBL_FMT || afp->format == EMBLPEPT_FMT) { sprintf (buf, "MEDLINE; %ld.", (long) muid); FFAddOneString (temp, buf, FALSE, FALSE, TILDE_TO_SPACES); - FFLineWrap(ffstring, temp, 5, 5, ASN2FF_EMBL_MAX, "RX"); + FFLineWrap(ajp, ffstring, temp, 5, 5, ASN2FF_EMBL_MAX, "RX"); } } @@ -3658,11 +3688,11 @@ NLM_EXTERN CharPtr FormatReferenceBlock ( FFStartPrint (temp, afp->format, 3, 12, "PUBMED", 12, 5, 5, "RX", FALSE); if (afp->format == GENBANK_FMT || afp->format == GENPEPT_FMT) { FF_www_muid (ajp, temp, pmid); - FFLineWrap(ffstring, temp, 12, 12, ASN2FF_GB_MAX, NULL); + FFLineWrap(ajp, ffstring, temp, 12, 12, ASN2FF_GB_MAX, NULL); } else if (afp->format == EMBL_FMT || afp->format == EMBLPEPT_FMT) { sprintf (buf, "PUBMED; %ld.", (long) pmid); FFAddOneString (temp, buf, FALSE, FALSE, TILDE_TO_SPACES); - FFLineWrap(ffstring, temp, 5, 5, ASN2FF_EMBL_MAX, "RX"); + FFLineWrap(ajp, ffstring, temp, 5, 5, ASN2FF_EMBL_MAX, "RX"); } } FFRecycleString(ajp, temp); @@ -3684,7 +3714,7 @@ NLM_EXTERN CharPtr FormatReferenceBlock ( FFStartPrint (temp, afp->format, 2, 12, "REMARK", 12, 5, 5, NULL, FALSE); /* FFAddOneString (temp, csp->descr, FALSE, TRUE, TILDE_EXPAND); */ AddCommentWithURLlinks(ajp, temp, NULL, csp->descr, NULL); - FFLineWrap(ffstring, temp, 12, 12, ASN2FF_GB_MAX, NULL); + FFLineWrap(ajp, ffstring, temp, 12, 12, ASN2FF_GB_MAX, NULL); } } @@ -3747,7 +3777,7 @@ NLM_EXTERN CharPtr FormatReferenceBlock ( } else { FFAddOneString (temp, buf, FALSE, FALSE, TILDE_EXPAND); } - FFLineWrap (ffstring, temp, 12, 12, ASN2FF_GB_MAX, NULL); + FFLineWrap (ajp, ffstring, temp, 12, 12, ASN2FF_GB_MAX, NULL); prefix = NULL; } } @@ -3771,7 +3801,7 @@ NLM_EXTERN CharPtr FormatReferenceBlock ( FFStartPrint (temp, afp->format, 2, 12, prefix, 12, 5, 5, NULL, FALSE); FFAddOneString (temp, pdp->comment, FALSE, TRUE, TILDE_EXPAND); /* AddCommentWithURLlinks(ajp, temp, NULL, pdp->comment, NULL); */ - FFLineWrap(ffstring, temp, 12, 12, ASN2FF_GB_MAX, NULL); + FFLineWrap(ajp, ffstring, temp, 12, 12, ASN2FF_GB_MAX, NULL); prefix = NULL; if (gbseq != NULL) { @@ -3809,7 +3839,7 @@ NLM_EXTERN CharPtr FormatReferenceBlock ( remprefix = "; "; FFStartPrint (temp, afp->format, 2, 12, prefix, 12, 5, 5, NULL, FALSE); FFAddOneString (temp, buf, FALSE, FALSE, TILDE_EXPAND); - FFLineWrap(ffstring, temp, 12, 12, ASN2FF_GB_MAX, NULL); + FFLineWrap(ajp, ffstring, temp, 12, 12, ASN2FF_GB_MAX, NULL); prefix = NULL; /* gibbsq comment section (fields may be copied from degenerate pubdesc) */ @@ -3830,7 +3860,7 @@ NLM_EXTERN CharPtr FormatReferenceBlock ( remprefix = "; "; FFStartPrint (temp, afp->format, 2, 12, prefix, 12, 5, 5, NULL, FALSE); FFAddOneString (temp, buf, TRUE, TRUE, TILDE_EXPAND); - FFLineWrap(ffstring, temp, 12, 12, ASN2FF_GB_MAX, NULL); + FFLineWrap(ajp, ffstring, temp, 12, 12, ASN2FF_GB_MAX, NULL); prefix = NULL; } @@ -3845,7 +3875,7 @@ NLM_EXTERN CharPtr FormatReferenceBlock ( remprefix = "; "; FFStartPrint (temp, afp->format, 2, 12, prefix, 12, 5, 5, NULL, FALSE); FFAddOneString (temp, "Polyadenylate residues occurring in the figure were omitted from the sequence.", TRUE, TRUE, TILDE_EXPAND); - FFLineWrap(ffstring, temp, 12, 12, ASN2FF_GB_MAX, NULL); + FFLineWrap(ajp, ffstring, temp, 12, 12, ASN2FF_GB_MAX, NULL); prefix = NULL; } @@ -3865,7 +3895,7 @@ NLM_EXTERN CharPtr FormatReferenceBlock ( remprefix = "; "; FFStartPrint (temp, afp->format, 2, 12, prefix, 12, 5, 5, NULL, FALSE); FFAddOneString (temp, buf, TRUE, TRUE, TILDE_EXPAND); - FFLineWrap(ffstring, temp, 12, 12, ASN2FF_GB_MAX, NULL); + FFLineWrap(ajp, ffstring, temp, 12, 12, ASN2FF_GB_MAX, NULL); prefix = NULL; } @@ -3906,7 +3936,7 @@ NLM_EXTERN CharPtr FormatReferenceBlock ( if (StringDoesHaveText (crp->exp)) { FFAddTextToString (temp, ":[", crp->exp, "]", FALSE, TRUE, TILDE_EXPAND); } - FFLineWrap(ffstring, temp, 12, 12, ASN2FF_GB_MAX, NULL); + FFLineWrap(ajp, ffstring, temp, 12, 12, ASN2FF_GB_MAX, NULL); prefix = NULL; } else if (crp->type == 3) { FFRecycleString(ajp, temp); @@ -3933,7 +3963,7 @@ NLM_EXTERN CharPtr FormatReferenceBlock ( if (StringDoesHaveText (crp->exp)) { FFAddTextToString (temp, ":[", crp->exp, "]", FALSE, TRUE, TILDE_EXPAND); } - FFLineWrap(ffstring, temp, 12, 12, ASN2FF_GB_MAX, NULL); + FFLineWrap(ajp, ffstring, temp, 12, 12, ASN2FF_GB_MAX, NULL); prefix = NULL; } else if (crp->type == 4) { FFRecycleString(ajp, temp); @@ -3960,7 +3990,7 @@ NLM_EXTERN CharPtr FormatReferenceBlock ( if (StringDoesHaveText (crp->exp)) { FFAddTextToString (temp, " to:[", crp->exp, "]", FALSE, TRUE, TILDE_EXPAND); } - FFLineWrap(ffstring, temp, 12, 12, ASN2FF_GB_MAX, NULL); + FFLineWrap(ajp, ffstring, temp, 12, 12, ASN2FF_GB_MAX, NULL); prefix = NULL; } } @@ -3982,7 +4012,7 @@ NLM_EXTERN CharPtr FormatReferenceBlock ( FFStartPrint (temp, afp->format, 2, 12, prefix, 12, 5, 5, NULL, FALSE); /* FFAddOneString (temp, csp->descr, FALSE, TRUE, TILDE_EXPAND); */ AddCommentWithURLlinks(ajp, temp, NULL, csp->descr, NULL); - FFLineWrap(ffstring, temp, 12, 12, ASN2FF_GB_MAX, NULL); + FFLineWrap(ajp, ffstring, temp, 12, 12, ASN2FF_GB_MAX, NULL); prefix = NULL; } } @@ -4007,7 +4037,7 @@ NLM_EXTERN CharPtr FormatReferenceBlock ( remprefix = "; "; FFStartPrint (temp, afp->format, 2, 12, prefix, 12, 5, 5, NULL, FALSE); FFAddOneString (temp, pubstatnote, FALSE, FALSE, TILDE_EXPAND); - FFLineWrap(ffstring, temp, 12, 12, ASN2FF_GB_MAX, NULL); + FFLineWrap(ajp, ffstring, temp, 12, 12, ASN2FF_GB_MAX, NULL); prefix = NULL; } @@ -4036,7 +4066,7 @@ NLM_EXTERN CharPtr FormatReferenceBlock ( FFAddOneString (temp, "DOI: ", FALSE, FALSE, TILDE_EXPAND); FFAddOneString (temp, doi, FALSE, FALSE, TILDE_EXPAND); } - FFLineWrap(ffstring, temp, 12, 12, ASN2FF_GB_MAX, NULL); + FFLineWrap(ajp, ffstring, temp, 12, 12, ASN2FF_GB_MAX, NULL); prefix = NULL; } } diff --git a/api/asn2gnb6.c b/api/asn2gnb6.c index a70c1f5c..62bd744a 100644 --- a/api/asn2gnb6.c +++ b/api/asn2gnb6.c @@ -30,7 +30,7 @@ * * Version Creation Date: 10/21/98 * -* $Revision: 1.227 $ +* $Revision: 1.257 $ * * File Description: New GenBank flatfile generator - work in progress * @@ -477,6 +477,7 @@ NLM_EXTERN CharPtr legalDbXrefs [] = { "HMP", "HOMD", "HSSP", + "IKMC", "IMGT/GENE-DB", "IMGT/HLA", "IMGT/LIGM", @@ -517,6 +518,7 @@ NLM_EXTERN CharPtr legalDbXrefs [] = { "SGN", "SoyBase", "SubtiList", + "TAIR", "taxon", "TIGRFAM", "UniGene", @@ -547,6 +549,7 @@ NLM_EXTERN CharPtr legalSrcDbXrefs [] = { "GRIN", "HMP", "HOMD", + "IKMC", "IMGT/HLA", "IMGT/LIGM", "JCM", @@ -562,6 +565,7 @@ NLM_EXTERN CharPtr legalSrcDbXrefs [] = { NLM_EXTERN CharPtr legalRefSeqDbXrefs [] = { "BEEBASE", + "BioProject", "CCDS", "CGNC", "CloneID", @@ -573,7 +577,6 @@ NLM_EXTERN CharPtr legalRefSeqDbXrefs [] = { "PBR", "REBASE", "SK-FST", - "TAIR", "VBRC", NULL }; @@ -996,7 +999,7 @@ NLM_EXTERN CharPtr FormatSourceBlock ( /* If the organelle prefix is already on the */ /* name, don't add it. */ - if (StringNCmp (organelle, taxname, StringLen (organelle)) == 0) + if (StringNICmp (organelle, taxname, StringLen (organelle)) == 0) organelle = ""; if (StringHasNoText (common)) { @@ -1228,13 +1231,13 @@ NLM_EXTERN CharPtr FormatOrganismBlock ( } else { FFAddOneString(temp, taxname, FALSE, FALSE, TILDE_IGNORE); } - FFLineWrap(ffstring, temp, 12, 12, ASN2FF_GB_MAX, NULL); + FFLineWrap(ajp, ffstring, temp, 12, 12, ASN2FF_GB_MAX, NULL); FFRecycleString(ajp, temp); temp = FFGetString(ajp); FFStartPrint(temp, afp->format, 12, 12, NULL, 0, 5, 5, "OC", FALSE); FFAddTextToString(temp, NULL, lineage, NULL, TRUE, FALSE, TILDE_TO_SPACES); - FFLineWrap(ffstring, temp, 12, 12, ASN2FF_GB_MAX, NULL); + FFLineWrap(ajp, ffstring, temp, 12, 12, ASN2FF_GB_MAX, NULL); FFRecycleString(ajp, temp); /* optionally populate gbseq for XML-ized GenBank format */ @@ -1258,14 +1261,14 @@ NLM_EXTERN CharPtr FormatOrganismBlock ( } else if (afp->format == EMBL_FMT || afp->format == EMBLPEPT_FMT) { FFStartPrint(temp, afp->format, 12, 12, NULL, 0, 5, 5, "OC", FALSE); FFAddTextToString(temp, NULL, lineage, NULL, TRUE, FALSE, TILDE_TO_SPACES); - FFLineWrap(ffstring, temp, 5, 5, ASN2FF_EMBL_MAX, "OC"); + FFLineWrap(ajp, ffstring, temp, 5, 5, ASN2FF_EMBL_MAX, "OC"); FFRecycleString(ajp, temp); if ( !StringHasNoText(organelle) ) { temp = FFGetString(ajp); if ( temp != NULL ) { FFStartPrint(temp, afp->format, 12, 12, NULL, 0, 5, 5, "OG", FALSE); FFAddTextToString(temp, NULL, organelle, NULL, TRUE, FALSE, TILDE_TO_SPACES); - FFLineWrap(ffstring, temp, 5, 5, ASN2FF_EMBL_MAX, "OG"); + FFLineWrap(ajp, ffstring, temp, 5, 5, ASN2FF_EMBL_MAX, "OG"); FFRecycleString(ajp, temp); } } @@ -1428,9 +1431,9 @@ static CharPtr StrucCommentFFEndPrint ( if ( (ffstring == NULL) || (ajp == NULL) ) return NULL; if (format == GENBANK_FMT || format == GENPEPT_FMT) { - FFLineWrap (temp, ffstring, gb_init_indent, gb_cont_indent, ASN2FF_GB_MAX - 12, NULL); + FFLineWrap (ajp, temp, ffstring, gb_init_indent, gb_cont_indent, ASN2FF_GB_MAX - 12, NULL); } else { - FFLineWrap (temp, ffstring, eb_init_indent, eb_cont_indent, ASN2FF_EMBL_MAX - 5, eb_line_prefix); + FFLineWrap (ajp, temp, ffstring, eb_init_indent, eb_cont_indent, ASN2FF_EMBL_MAX - 5, eb_line_prefix); } result = FFToCharPtr (temp); FFRecycleString (ajp, temp); @@ -1573,6 +1576,10 @@ static CharPtr GetStrForStructuredComment ( FFAddOneString (ffstring, "", FALSE, FALSE, TILDE_IGNORE); } else if (GetWWW (ajp) && StringCmp (field, "url") == 0) { AddCommentWithURLlinks (ajp, ffstring, NULL, str, NULL); + } else if (GetWWW (ajp) && StringNICmp (str, "http://", 7) == 0) { + AddCommentWithURLlinks (ajp, ffstring, NULL, str, NULL); + } else if (GetWWW (ajp) && StringNICmp (str, "https://", 8) == 0) { + AddCommentWithURLlinks (ajp, ffstring, NULL, str, NULL); } else { FFAddOneString (ffstring, str, FALSE, FALSE, TILDE_EXPAND); } @@ -1830,7 +1837,7 @@ static void CatenateCommentInGbseq ( if (gbseq->comment == NULL) { gbseq->comment = cpy; } else { - tmp = (CharPtr) MemNew (StringLen (gbseq->comment) + StringLen (str) + 10); + tmp = (CharPtr) MemNew (StringLen (gbseq->comment) + StringLen (cpy) + 10); if (tmp == NULL) return; StringCpy (tmp, gbseq->comment); if (ajp->oldXmlPolicy) { @@ -3696,15 +3703,20 @@ static CharPtr FullNameFromInstCode (CharPtr code) #define s_atcc_base "http://www.atcc.org/SearchCatalogs/linkin?id=" #define s_bcrc_base "http://strain.bcrc.firdi.org.tw/BSAS/controller?event=SEARCH&bcrc_no=" -#define s_ccmp_base "http://ccmp.bigelow.org/SD/display.php?strain=CCMP" +#define s_cbs_base "http://www.cbs.knaw.nl/collections/BioloMICS.aspx?Fields=All&ExactMatch=T&Table=CBS+strain+database&Name=CBS+" +#define s_ccap_base "http://www.ccap.ac.uk/strain_info.php?Strain_No=" +#define s_ccmp_base "https://ccmp.bigelow.org/node/1/strain/CCMP" #define s_ccug_base "http://www.ccug.se/default.cfm?page=search_record.cfm&db=mc&s_tests=1&ccugno=" +#define s_cori_base "http://ccr.coriell.org/Sections/Search/Sample_Detail.aspx?Ref=" #define s_dsmz_base "http://www.dsmz.de/microorganisms/search_no.php?q=" #define s_fsu_base "http://www.prz.uni-jena.de/data.php?fsu=" #define s_icmp_base "http://nzfungi.landcareresearch.co.nz/icmp/results_cultures.asp?ID=&icmpVAR=" +#define s_kctc_base "http://www.brc.re.kr/English/_SearchView.aspx?sn=" #define s_ku_base "http://collections.nhm.ku.edu/" -#define s_pcc_base "http://www.pasteur.fr/recherche/banques/PCC/docs/pcc" +#define s_pcc_base "http://www.crbip.pasteur.fr/fiches/fichecata.jsp?crbip=PCC+" #define s_pcmb_base "http://www2.bishopmuseum.org/HBS/PCMB/results3.asp?searchterm3=" #define s_pdd_base "http://nzfungi.landcareresearch.co.nz/html/data_collections_details.asp?CID=" +#define s_sag_base "http://sagdb.uni-goettingen.de/detailedList.php?str_number=" #define s_tgrc_base "http://tgrc.ucdavis.edu/Data/Acc/AccDetail.aspx?AccessionNum=" #define s_uam_base "http://arctos.database.museum/guid/" #define s_ypm_base "http://peabody.research.yale.edu/cgi-bin/Query.Ledger?" @@ -3723,7 +3735,6 @@ static CharPtr FullNameFromInstCode (CharPtr code) #define s_ypmorn_pfx "LE=orn&ID=" #define s_bcrc_sfx "&type_id=6&keyword=;;" -#define s_pcc_sfx ".htm" typedef struct vouch { CharPtr sites; @@ -3736,17 +3747,23 @@ typedef struct vouch { static VouchData Nlm_spec_vouchers [] = { { "ATCC", s_atcc_base, FALSE, NULL, NULL }, { "BCRC", s_bcrc_base, FALSE, NULL, s_bcrc_sfx }, + { "CBS", s_cbs_base, FALSE, NULL, NULL }, + { "CCAP", s_ccap_base, FALSE, NULL, NULL }, { "CCMP", s_ccmp_base, FALSE, NULL, NULL }, { "CCUG", s_ccug_base, FALSE, NULL, NULL }, + { "Coriell", s_cori_base, FALSE, NULL, NULL }, { "CRCM:Bird", s_uam_base, TRUE, s_colon_pfx, NULL }, { "DGR:Bird", s_uam_base, TRUE, s_colon_pfx, NULL }, { "DGR:Ento", s_uam_base, TRUE, s_colon_pfx, NULL }, { "DGR:Fish", s_uam_base, TRUE, s_colon_pfx, NULL }, { "DGR:Herp", s_uam_base, TRUE, s_colon_pfx, NULL }, { "DGR:Mamm", s_uam_base, TRUE, s_colon_pfx, NULL }, + { "DMNS:Bird", s_uam_base, TRUE, s_colon_pfx, NULL }, + { "DMNS:Mamm", s_uam_base, TRUE, s_colon_pfx, NULL }, { "DSM", s_dsmz_base, FALSE, NULL, NULL }, { "FSU", s_fsu_base, FALSE, NULL, NULL }, { "ICMP", s_icmp_base, FALSE, NULL, NULL }, + { "KCTC", s_kctc_base, FALSE, NULL, NULL }, { "KU:I", s_ku_base, FALSE, s_kui_pfx, NULL }, { "KU:IT", s_ku_base, FALSE, s_kuit_pfx, NULL }, { "KWP:Ento", s_uam_base, TRUE, s_colon_pfx, NULL }, @@ -3762,10 +3779,11 @@ static VouchData Nlm_spec_vouchers [] = { { "MVZ:Page", s_uam_base, TRUE, s_colon_pfx, NULL }, { "MVZObs:Herp", s_uam_base, TRUE, s_colon_pfx, NULL }, { "NBSB:Bird", s_uam_base, TRUE, s_colon_pfx, NULL }, - { "PCC", s_pcc_base, FALSE, NULL, s_pcc_sfx }, + { "PCC", s_pcc_base, FALSE, NULL, NULL }, { "PCMB", s_pcmb_base, FALSE, NULL, NULL }, { "PDD", s_pdd_base, FALSE, NULL, NULL }, { "PSU:Mamm", s_uam_base, FALSE, s_psu_pfx, NULL }, + { "SAG", s_sag_base, FALSE, NULL, NULL }, { "TGRC", s_tgrc_base, FALSE, NULL, NULL }, { "UAM:Bird", s_uam_base, TRUE, s_colon_pfx, NULL }, { "UAM:Bryo", s_uam_base, TRUE, s_colon_pfx, NULL }, @@ -4061,14 +4079,16 @@ NLM_EXTERN CharPtr FormatSourceFeatBlock ( BioSourcePtr biop = NULL; BioseqPtr bsp; BioseqSetPtr bssp; - Char buf [80]; + Char buf [128], pfx [512], sfx [128]; CharPtr common = NULL; + Int4 currGi = 0; DbtagPtr dbt; SeqMgrDescContext dcontext; SeqMgrFeatContext fcontext; GBFeaturePtr gbfeat = NULL; GBSeqPtr gbseq; Int2 i; + IntAsn2gbSectPtr iasp; Uint1 idx; IntSrcBlockPtr isp; Boolean is_desc = TRUE; @@ -4121,6 +4141,9 @@ NLM_EXTERN CharPtr FormatSourceFeatBlock ( qvp = afp->qvp; if (qvp == NULL) return NULL; + pfx [0] = '\0'; + sfx [0] = '\0'; + if (ajp->gbseq) { gbseq = &asp->gbseq; } else { @@ -4164,6 +4187,26 @@ NLM_EXTERN CharPtr FormatSourceFeatBlock ( if ( ffstring == NULL ) return NULL; FFStartPrint (ffstring, afp->format, 5, 21, NULL, 0, 5, 21, "FT", FALSE); + + for (sip = bsp->id; sip != NULL; sip = sip->next) { + if (sip->choice == SEQID_GI) { + currGi = (Int4) sip->data.intvalue; + } + } + + iasp = (IntAsn2gbSectPtr) asp; + + if (iasp != NULL && GetWWW (ajp) && ajp->mode == ENTREZ_MODE && + (ajp->format == GENBANK_FMT || ajp->format == GENPEPT_FMT)) { + if (! iasp->feat_js_prefix_added) { + sprintf (pfx, "", + (long) currGi, (long) isp->source_count, iasp->gi, iasp->acc); + iasp->feat_js_prefix_added = TRUE; + } else { + sprintf (pfx, "", (long) currGi, (long) isp->source_count); + } + } + FFAddOneString (ffstring, "source", FALSE, FALSE, TILDE_IGNORE); FFAddNChar(ffstring, ' ', 21 - 5 - StringLen("source"), FALSE); @@ -4878,7 +4921,12 @@ NLM_EXTERN CharPtr FormatSourceFeatBlock ( /* and then deal with the various note types separately (not in order table) */ - str = FFEndPrint(ajp, ffstring, afp->format, 21, 21, 5, 21, "FT"); + if (GetWWW (ajp) && ajp->mode == ENTREZ_MODE && + (ajp->format == GENBANK_FMT || ajp->format == GENPEPT_FMT)) { + sprintf (sfx, ""); + } + + str = FFEndPrintEx (ajp, ffstring, afp->format, 21, 21, 5, 21, "FT", pfx, sfx); /* optionally populate gbseq for XML-ized GenBank format */ @@ -5034,12 +5082,12 @@ static void PrintSeqLine ( sprintf (pos, "%9ld", (long) (start + 1)); FFAddOneString(ffstring, pos, FALSE, FALSE, TILDE_TO_SPACES); FFAddOneChar(ffstring, ' ', FALSE); - if (ajp != NULL && ajp->seqspans) { + if (ajp != NULL && GetWWW (ajp)) { sprintf (tmp, "", (long) gi, (long) (start + 1)); FFAddOneString(ffstring, tmp, FALSE, FALSE, TILDE_TO_SPACES); } FFAddOneString(ffstring, buf, FALSE, FALSE, TILDE_TO_SPACES); - if (ajp != NULL && ajp->seqspans) { + if (ajp != NULL && GetWWW (ajp)) { FFAddOneString(ffstring, "", FALSE, FALSE, TILDE_TO_SPACES); } FFAddOneChar(ffstring, '\n', FALSE); @@ -5117,7 +5165,7 @@ static void PrintGenome ( Boolean first = TRUE; SeqIdPtr freeid = NULL, sid = NULL, newid = NULL; SeqLocPtr slp = NULL; - Int4 from = 0, to = 0, start = 0, stop = 0, gi = 0; + Int4 start = 0, stop = 0, gi = 0; BioseqPtr bsp = NULL; Int2 p1 = 0, p2 = 0; @@ -5125,12 +5173,11 @@ static void PrintGenome ( gibuf [0] = '\0'; vbuf [0] = '\0'; for (slp = slp_head; slp; slp = slp->next) { - from = to = 0; sid = SeqLocId (slp); - if (slp->choice == SEQLOC_INT || slp->choice == SEQLOC_WHOLE) { - start = from = SeqLocStart (slp); - stop = to = SeqLocStop (slp); - } else if (slp->choice == SEQLOC_NULL){ + if (slp->choice == SEQLOC_INT || slp->choice == SEQLOC_PNT || slp->choice == SEQLOC_WHOLE) { + start = SeqLocStart (slp); + stop = SeqLocStop (slp); + } else if (slp->choice == SEQLOC_NULL) { sprintf (vbuf, ",%s", "gap()"); FFAddOneString (ffstring, vbuf, FALSE, FALSE, TILDE_IGNORE); continue; @@ -5230,6 +5277,82 @@ static void PrintGenome ( } } +static DeltaSeqPtr RevCompDelta ( + DeltaSeqPtr seq_ext +) + +{ + DeltaSeqPtr dsp; + ValNodePtr head = NULL; + Int4 from, to, tmp; + SeqLocPtr nslp, slp; + Boolean partial5, partial3; + SeqIntPtr sintp; + SeqLitPtr slitp, slip; + ValNodePtr vnp; + + for (dsp = seq_ext; dsp != NULL; dsp = dsp->next) { + vnp = NULL; + + if (dsp->choice == 1) { + + slp = (SeqLocPtr) dsp->data.ptrvalue; + if (slp != NULL) { + + if (slp->choice == SEQLOC_NULL) { + + nslp = ValNodeAddPointer (NULL, SEQLOC_NULL, NULL); + + vnp = ValNodeAddPointer (NULL, 1, nslp); + + } else if (slp->choice == SEQLOC_INT) { + + sintp = (SeqIntPtr) slp->data.ptrvalue; + if (sintp != NULL) { + CheckSeqLocForPartial (slp, &partial5, &partial3); + from = sintp->from; + to = sintp->to; + if (sintp->strand != Seq_strand_minus) { + tmp = from; + from = to; + to = tmp; + } + nslp = AddIntervalToLocation (NULL, sintp->id, from, to, partial3, partial5); + + vnp = ValNodeAddPointer (NULL, 1, nslp); + + } + } + } + + } else if (dsp->choice == 2) { + + slitp = (SeqLitPtr) dsp->data.ptrvalue; + if (slitp != NULL && slitp->seq_data == NULL) { + slip = SeqLitNew (); + if (slip != NULL) { + slip->length = slitp->length; + /* not copying fuzz */ + slip->seq_data_type = slitp->seq_data_type; + vnp = ValNodeAddPointer (NULL, 2, (Pointer) slip); + } + } else { + ValNodeFree (head); + return NULL; + } + } + + /* save in new list in reverse order */ + + if (vnp != NULL) { + vnp->next = head; + head = vnp; + } + } + + return head; +} + NLM_EXTERN CharPtr FormatContigBlock ( Asn2gbFormatPtr afp, BaseBlockPtr bbp @@ -5239,13 +5362,19 @@ NLM_EXTERN CharPtr FormatContigBlock ( IntAsn2gbJobPtr ajp; Asn2gbSectPtr asp; BioseqPtr bsp; + DeltaSeqPtr delta_head = NULL; DeltaSeqPtr dsp; + DeltaSeqPtr dspnext; IntFuzzPtr fuzz; GBSeqPtr gbseq; Boolean is_na; SeqLitPtr litp; + DeltaSeqPtr new_delta = NULL; CharPtr prefix = NULL; + Boolean rev_comp = FALSE; Boolean segWithParts = FALSE; + SeqIntPtr sintp; + SeqLocPtr slp; SeqLocPtr slp_head = NULL; CharPtr str; Char tmp [16]; @@ -5267,6 +5396,18 @@ NLM_EXTERN CharPtr FormatContigBlock ( is_na = ISA_na (bsp->mol); + if (ajp->ajp.slp != NULL) { + slp = ajp->ajp.slp; + if (slp->choice == SEQLOC_INT) { + sintp = (SeqIntPtr) slp->data.ptrvalue; + if (sintp != NULL) { + if (sintp->from == 0 && sintp->to == bsp->length - 1 && sintp->strand == Seq_strand_minus) { + rev_comp = TRUE; + } + } + } + } + FFStartPrint (ffstring, afp->format, 0, 0, "CONTIG", 12, 5, 5, "CO", FALSE); /* if ( GetWWW(ajp) ) { @@ -5292,7 +5433,14 @@ NLM_EXTERN CharPtr FormatContigBlock ( } else if (bsp->seq_ext_type == 4) { - for (dsp = (DeltaSeqPtr) bsp->seq_ext; dsp != NULL; dsp=dsp->next) { + if (rev_comp) { + new_delta = RevCompDelta ((DeltaSeqPtr) bsp->seq_ext); + delta_head = new_delta; + } else { + delta_head = (DeltaSeqPtr) bsp->seq_ext; + } + + for (dsp = delta_head; dsp != NULL; dsp = dsp->next) { if (dsp->choice == 1) { slp_head = (SeqLocPtr) dsp->data.ptrvalue; @@ -5359,6 +5507,16 @@ NLM_EXTERN CharPtr FormatContigBlock ( StripAllSpaces (gbseq->contig); } + if (new_delta != NULL) { + dsp = new_delta; + while (dsp != NULL) { + dspnext = dsp->next; + dsp->next = NULL; + DeltaSeqFree (dsp); + dsp = dsp->next; + } + } + return str; } @@ -5536,8 +5694,11 @@ static Int2 ProcessGapSpecialFormat ( Char gi_buf [16]; Boolean is_na; Char pad; + Char rgn_buf [64]; SeqIdPtr sip; + SeqLocPtr slp; Int2 startgapgap = 0, endgap = 0; + Int4 from, to; is_na = ISA_na (bsp->mol); if (is_na) { @@ -5574,6 +5735,13 @@ static Int2 ProcessGapSpecialFormat ( sprintf(fmt_buf, "?fmt_mask=%ld", (long) EXPANDED_GAP_DISPLAY); if (bsp->repr == Seq_repr_delta && (! DeltaLitOnly (bsp))) { StringCat (fmt_buf, "&report=gbwithparts"); + if (ajp->ajp.slp != NULL) { + slp = ajp->ajp.slp; + from = SeqLocStart (slp) + 1; + to = SeqLocStop (slp) + 1; + sprintf (rgn_buf, "&from=%ld&to=%ld", (long) from, (long) to); + StringCat (fmt_buf, rgn_buf); + } } FFAddOneString (ffstring, " 0) { FixGapAtStart (buf, pad); + startgapgap = 0; } endgap = GapAtEnd (buf); @@ -5870,13 +6039,15 @@ NLM_EXTERN CharPtr FormatSlashBlock ( ) { - IntAsn2gbJobPtr ajp; - Asn2gbSectPtr asp; - GBFeaturePtr currf, headf, nextf; - GBReferencePtr currr, headr, nextr; - GBSeqPtr gbseq, gbtmp; - IndxPtr index; - INSDSeq is; + IntAsn2gbJobPtr ajp; + Asn2gbSectPtr asp; + GBFeaturePtr currf, headf, nextf; + GBReferencePtr currr, headr, nextr; + Uint1 featdeftype; + GBSeqPtr gbseq, gbtmp; + IntAsn2gbSectPtr iasp; + IndxPtr index; + INSDSeq is; /* Int2 moltype, strandedness, topology; */ @@ -5887,6 +6058,8 @@ NLM_EXTERN CharPtr FormatSlashBlock ( asp = afp->asp; if (asp == NULL) return NULL; + iasp = (IntAsn2gbSectPtr) asp; + /* sort and unique indexes */ index = ajp->index; @@ -6017,7 +6190,15 @@ NLM_EXTERN CharPtr FormatSlashBlock ( GBSeqFree (gbtmp); } - /* slash always has string pre-allocated by add slash block function */ + /* then clean up javascript components */ + + iasp->gi = MemFree (iasp->gi); + iasp->acc = MemFree (iasp->acc); + for (featdeftype = 0; featdeftype < FEATDEF_MAX; featdeftype++) { + iasp->feat_key [featdeftype] = MemFree (iasp->feat_key [featdeftype]); + } + + /* slash has string pre-allocated by add slash block function */ return StringSaveNoNull (bbp->string); } diff --git a/api/asn2gnbi.h b/api/asn2gnbi.h index a9e09d89..8c1512a2 100644 --- a/api/asn2gnbi.h +++ b/api/asn2gnbi.h @@ -29,7 +29,7 @@ * * Version Creation Date: 12/30/03 * -* $Revision: 1.123 $ +* $Revision: 1.138 $ * * File Description: New GenBank flatfile generator, internal header * @@ -107,6 +107,11 @@ typedef struct asn2gbflags { typedef struct int_Asn2gbSect { Asn2gbSect asp; + Int4 feat_counts [FEATDEF_MAX]; + CharPtr gi; + CharPtr acc; + CharPtr feat_key [FEATDEF_MAX]; + Boolean feat_js_prefix_added; } IntAsn2gbSect, PNTR IntAsn2gbSectPtr; /* string structure */ @@ -160,6 +165,7 @@ typedef struct int_asn2gb_job { Boolean specialGapFormat; Boolean hideGoTerms; Boolean multiIntervalGenes; + Boolean segmentedBioseqs; Boolean reindex; Int4 seqGapCurrLen; ValNodePtr gihead; @@ -188,6 +194,7 @@ typedef union qualval { RNAGenPtr rgp; GeneNomenclaturePtr gnp; PCRReactionSetPtr prp; + DbtagPtr dbt; } QualVal, PNTR QualValPtr; /* structure passed to individual paragraph format functions */ @@ -306,6 +313,8 @@ typedef struct asn2gbwork { Boolean copyGpsCdsUp; Boolean copyGpsGeneDown; + Boolean isRefSeq; + Boolean showContigAndSeq; Char basename [SEQID_MAX_LEN]; @@ -384,6 +393,7 @@ typedef struct int_src_block { ValNodePtr vnp; Int4 left; Int4 right; + Int4 source_count; } IntSrcBlock, PNTR IntSrcBlockPtr; /* internal feature block has fields on top of FeatBlock fields */ @@ -400,6 +410,7 @@ typedef struct int_feat_block { Boolean firstfeat; Int4 left; Int4 right; + Int4 feat_count; /* unique in combination with feature type */ } IntFeatBlock, PNTR IntFeatBlockPtr; /* internal cds block has fields on top of IntFeatBlock fields */ @@ -488,7 +499,11 @@ typedef enum { Qual_class_voucher, Qual_class_lat_lon, Qual_class_mobile_element, - Qual_class_tag_peptide + Qual_class_tag_peptide, + Qual_class_variation_id, + Qual_class_delta_item, + Qual_class_variation_set, + Qual_class_experiment } QualType; /* source 'feature' */ @@ -609,6 +624,7 @@ typedef enum { FTQUAL_allele = 1, FTQUAL_anticodon, FTQUAL_artificial_location, + FTQUAL_artificial_location_str, FTQUAL_bond, FTQUAL_bond_type, FTQUAL_bound_moiety, @@ -622,6 +638,7 @@ typedef enum { FTQUAL_codon_start, FTQUAL_cons_splice, FTQUAL_db_xref, + FTQUAL_delta_item, FTQUAL_derived_from, FTQUAL_direction, FTQUAL_EC_number, @@ -662,6 +679,7 @@ typedef enum { FTQUAL_map, FTQUAL_maploc, FTQUAL_mobile_element, + FTQUAL_mobile_element_type, FTQUAL_mod_base, FTQUAL_modelev, FTQUAL_mol_wt, @@ -729,6 +747,8 @@ typedef enum { FTQUAL_trna_codons_note, FTQUAL_UniProtKB_evidence, FTQUAL_usedin, + FTQUAL_variation_id, + FTQUAL_variation_set, FTQUAL_xtra_prod_quals, ASN2GNBK_TOTAL_FEATUR } FtQualType; @@ -840,12 +860,18 @@ NLM_EXTERN void FFCatenateSubString ( Uint4 line_max ); NLM_EXTERN CharPtr FFToCharPtr (StringItemPtr sip); +NLM_EXTERN CharPtr FFToCharPtrEx (StringItemPtr sip, CharPtr pfx, CharPtr sfx); NLM_EXTERN void FFSkipLink (StringItemPtr PNTR iterp, Int4Ptr ip); +NLM_EXTERN void FFSkipHTMLAmpersandEscape (StringItemPtr PNTR iterp, Int4Ptr ip); NLM_EXTERN Boolean FFIsStartOfLink ( StringItemPtr iter, Int4 pos ); +NLM_EXTERN Boolean FFIsStartOfHTMLAmpersandEscape ( + StringItemPtr iter, + Int4 pos ); + NLM_EXTERN void FFSavePosition(StringItemPtr ffstring, StringItemPtr PNTR bufptr, Int4 PNTR posptr); NLM_EXTERN void FFTrim ( StringItemPtr ffstring, @@ -863,9 +889,11 @@ NLM_EXTERN void FFAdvanceChar( ); NLM_EXTERN void FFCalculateLineBreak ( StringItemPtr PNTR break_sip, Int4 PNTR break_pos, - Int4 init_indent, Int4 visible + Int4 init_indent, Int4 visible, + Boolean is_html ); NLM_EXTERN void FFLineWrap ( + IntAsn2gbJobPtr ajp, StringItemPtr dest, StringItemPtr src, Int4 init_indent, @@ -904,6 +932,18 @@ NLM_EXTERN CharPtr FFEndPrint ( Int2 eb_cont_indent, CharPtr eb_line_prefix ); +NLM_EXTERN CharPtr FFEndPrintEx ( + IntAsn2gbJobPtr ajp, + StringItemPtr ffstring, + FmtType format, + Int2 gb_init_indent, + Int2 gb_cont_indent, + Int2 eb_init_indent, + Int2 eb_cont_indent, + CharPtr eb_line_prefix, + CharPtr pfx, + CharPtr sfx +); NLM_EXTERN Uint4 FFLength(StringItemPtr ffstring); NLM_EXTERN Char FFCharAt(StringItemPtr ffstring, Uint4 pos); NLM_EXTERN Char FFFindChar ( @@ -919,6 +959,12 @@ NLM_EXTERN Int4 FFStringSearch ( const CharPtr pattern, Uint4 position ); +NLM_EXTERN Boolean FFStartsWith( + StringItemPtr text, + Int4 text_pos, + const CharPtr pattern, + Boolean case_insens +); /* * Scans the given buffer from a given scan position, for the next occurrence of diff --git a/api/ecnum_ambiguous.inc b/api/ecnum_ambiguous.inc index a9408e2f..3535c7f7 100644 --- a/api/ecnum_ambiguous.inc +++ b/api/ecnum_ambiguous.inc @@ -1,4 +1,4 @@ -/* $Id: ecnum_ambiguous.inc,v 1.1 2010/07/09 16:36:07 ucko Exp $ +/* $Id: ecnum_ambiguous.inc,v 1.3 2011/06/30 16:04:31 kazimird Exp $ * =========================================================================== * * PUBLIC DOMAIN NOTICE @@ -31,672 +31,568 @@ */ static const char* const kECNum_ambiguous[] = { - "1.-.-.-", - "1.1.-.-", - "1.1.1.-", - "1.1.1.n", - "1.1.2.-", - "1.1.2.n", - "1.1.3.-", - "1.1.3.n", - "1.1.4.-", - "1.1.4.n", - "1.1.5.-", - "1.1.5.n", - "1.1.98.-", - "1.1.98.n", - "1.1.99.-", - "1.1.99.n", - "1.1.n.n", - "1.2.-.-", - "1.2.1.-", - "1.2.1.n", - "1.2.2.-", - "1.2.2.n", - "1.2.3.-", - "1.2.3.n", - "1.2.4.-", - "1.2.4.n", - "1.2.7.-", - "1.2.7.n", - "1.2.99.-", - "1.2.99.n", - "1.2.n.n", - "1.3.-.-", - "1.3.1.-", - "1.3.1.n", - "1.3.2.-", - "1.3.2.n", - "1.3.3.-", - "1.3.3.n", - "1.3.5.-", - "1.3.5.n", - "1.3.7.-", - "1.3.7.n", - "1.3.99.-", - "1.3.99.n", - "1.3.n.n", - "1.4.-.-", - "1.4.1.-", - "1.4.1.n", - "1.4.2.-", - "1.4.2.n", - "1.4.3.-", - "1.4.3.n", - "1.4.4.-", - "1.4.4.n", - "1.4.5.-", - "1.4.5.n", - "1.4.7.-", - "1.4.7.n", - "1.4.99.-", - "1.4.99.n", - "1.4.n.n", - "1.5.-.-", - "1.5.1.-", - "1.5.1.n", - "1.5.3.-", - "1.5.3.n", - "1.5.4.-", - "1.5.4.n", - "1.5.5.-", - "1.5.5.n", - "1.5.7.-", - "1.5.7.n", - "1.5.8.-", - "1.5.8.n", - "1.5.99.-", - "1.5.99.n", - "1.5.n.n", - "1.6.-.-", - "1.6.1.-", - "1.6.1.n", - "1.6.2.-", - "1.6.2.n", - "1.6.3.-", - "1.6.3.n", - "1.6.4.-", - "1.6.4.n", - "1.6.5.-", - "1.6.5.n", - "1.6.6.-", - "1.6.6.n", - "1.6.7.-", - "1.6.7.n", - "1.6.8.-", - "1.6.8.n", - "1.6.99.-", - "1.6.99.n", - "1.6.n.n", - "1.7.-.-", - "1.7.1.-", - "1.7.1.n", - "1.7.2.-", - "1.7.2.n", - "1.7.3.-", - "1.7.3.n", - "1.7.5.-", - "1.7.5.n", - "1.7.7.-", - "1.7.7.n", - "1.7.99.-", - "1.7.99.n", - "1.7.n.n", - "1.8.-.-", - "1.8.1.-", - "1.8.1.n", - "1.8.2.-", - "1.8.2.n", - "1.8.3.-", - "1.8.3.n", - "1.8.4.-", - "1.8.4.n", - "1.8.5.-", - "1.8.5.n", - "1.8.6.-", - "1.8.6.n", - "1.8.7.-", - "1.8.7.n", - "1.8.98.-", - "1.8.98.n", - "1.8.99.-", - "1.8.99.n", - "1.8.n.n", - "1.9.-.-", - "1.9.3.-", - "1.9.3.n", - "1.9.6.-", - "1.9.6.n", - "1.9.99.-", - "1.9.99.n", - "1.9.n.n", - "1.10.-.-", - "1.10.1.-", - "1.10.1.n", - "1.10.2.-", - "1.10.2.n", - "1.10.3.-", - "1.10.3.n", - "1.10.99.-", - "1.10.99.n", - "1.10.n.n", - "1.11.-.-", - "1.11.1.-", - "1.11.1.n", - "1.11.n.n", - "1.12.-.-", - "1.12.1.-", - "1.12.1.n", - "1.12.2.-", - "1.12.2.n", - "1.12.5.-", - "1.12.5.n", - "1.12.7.-", - "1.12.7.n", - "1.12.98.-", - "1.12.98.n", - "1.12.99.-", - "1.12.99.n", - "1.12.n.n", - "1.13.-.-", - "1.13.1.-", - "1.13.1.n", - "1.13.11.-", - "1.13.11.n", - "1.13.12.-", - "1.13.12.n", - "1.13.99.-", - "1.13.99.n", - "1.13.n.n", - "1.14.-.-", - "1.14.1.-", - "1.14.1.n", - "1.14.2.-", - "1.14.2.n", - "1.14.3.-", - "1.14.3.n", - "1.14.11.-", - "1.14.11.n", - "1.14.12.-", - "1.14.12.n", - "1.14.13.-", - "1.14.13.n", - "1.14.14.-", - "1.14.14.n", - "1.14.15.-", - "1.14.15.n", - "1.14.16.-", - "1.14.16.n", - "1.14.17.-", - "1.14.17.n", - "1.14.18.-", - "1.14.18.n", - "1.14.19.-", - "1.14.19.n", - "1.14.20.-", - "1.14.20.n", - "1.14.21.-", - "1.14.21.n", - "1.14.99.-", - "1.14.99.n", - "1.14.n.n", - "1.15.-.-", - "1.15.1.-", - "1.15.1.n", - "1.15.n.n", - "1.16.-.-", - "1.16.1.-", - "1.16.1.n", - "1.16.3.-", - "1.16.3.n", - "1.16.8.-", - "1.16.8.n", - "1.16.n.n", - "1.17.-.-", - "1.17.1.-", - "1.17.1.n", - "1.17.3.-", - "1.17.3.n", - "1.17.4.-", - "1.17.4.n", - "1.17.5.-", - "1.17.5.n", - "1.17.7.-", - "1.17.7.n", - "1.17.99.-", - "1.17.99.n", - "1.17.n.n", - "1.18.-.-", - "1.18.1.-", - "1.18.1.n", - "1.18.2.-", - "1.18.2.n", - "1.18.3.-", - "1.18.3.n", - "1.18.6.-", - "1.18.6.n", - "1.18.96.-", - "1.18.96.n", - "1.18.99.-", - "1.18.99.n", - "1.18.n.n", - "1.19.-.-", - "1.19.6.-", - "1.19.6.n", - "1.19.n.n", - "1.20.-.-", - "1.20.1.-", - "1.20.1.n", - "1.20.4.-", - "1.20.4.n", - "1.20.98.-", - "1.20.98.n", - "1.20.99.-", - "1.20.99.n", - "1.20.n.n", - "1.21.-.-", - "1.21.3.-", - "1.21.3.n", - "1.21.4.-", - "1.21.4.n", - "1.21.99.-", - "1.21.99.n", - "1.21.n.n", - "1.22.-.-", - "1.22.1.-", - "1.22.1.n", - "1.22.n.n", - "1.97.-.-", - "1.97.1.-", - "1.97.1.n", - "1.97.n.n", - "1.98.-.-", - "1.98.1.-", - "1.98.1.n", - "1.98.n.n", - "1.99.-.-", - "1.99.1.-", - "1.99.1.n", - "1.99.2.-", - "1.99.2.n", - "1.99.n.n", - "1.n.n.n", - "2.-.-.-", - "2.1.-.-", - "2.1.1.-", - "2.1.1.n", - "2.1.2.-", - "2.1.2.n", - "2.1.3.-", - "2.1.3.n", - "2.1.4.-", - "2.1.4.n", - "2.1.n.n", - "2.2.-.-", - "2.2.1.-", - "2.2.1.n", - "2.2.n.n", - "2.3.-.-", - "2.3.1.-", - "2.3.1.n", - "2.3.2.-", - "2.3.2.n", - "2.3.3.-", - "2.3.3.n", - "2.3.n.n", - "2.4.-.-", - "2.4.1.-", - "2.4.1.n", - "2.4.2.-", - "2.4.2.n", - "2.4.99.-", - "2.4.99.n", - "2.4.n.n", - "2.5.-.-", - "2.5.1.-", - "2.5.1.n", - "2.5.n.n", - "2.6.-.-", - "2.6.1.-", - "2.6.1.n", - "2.6.2.-", - "2.6.2.n", - "2.6.3.-", - "2.6.3.n", - "2.6.99.-", - "2.6.99.n", - "2.6.n.n", - "2.7.-.-", - "2.7.1.-", - "2.7.1.n", - "2.7.2.-", - "2.7.2.n", - "2.7.3.-", - "2.7.3.n", - "2.7.4.-", - "2.7.4.n", - "2.7.5.-", - "2.7.5.n", - "2.7.6.-", - "2.7.6.n", - "2.7.7.-", - "2.7.7.n", - "2.7.8.-", - "2.7.8.n", - "2.7.9.-", - "2.7.9.n", - "2.7.10.-", - "2.7.10.n", - "2.7.11.-", - "2.7.11.n", - "2.7.12.-", - "2.7.12.n", - "2.7.13.-", - "2.7.13.n", - "2.7.99.-", - "2.7.99.n", - "2.7.n.n", - "2.8.-.-", - "2.8.1.-", - "2.8.1.n", - "2.8.2.-", - "2.8.2.n", - "2.8.3.-", - "2.8.3.n", - "2.8.4.-", - "2.8.4.n", - "2.8.n.n", - "2.9.-.-", - "2.9.1.-", - "2.9.1.n", - "2.9.n.n", - "2.n.n.n", - "3.-.-.-", - "3.1.-.-", - "3.1.1.-", - "3.1.1.n", - "3.1.2.-", - "3.1.2.n", - "3.1.3.-", - "3.1.3.n", - "3.1.4.-", - "3.1.4.n", - "3.1.5.-", - "3.1.5.n", - "3.1.6.-", - "3.1.6.n", - "3.1.7.-", - "3.1.7.n", - "3.1.8.-", - "3.1.8.n", - "3.1.11.-", - "3.1.11.n", - "3.1.13.-", - "3.1.13.n", - "3.1.14.-", - "3.1.14.n", - "3.1.15.-", - "3.1.15.n", - "3.1.16.-", - "3.1.16.n", - "3.1.21.-", - "3.1.21.n", - "3.1.22.-", - "3.1.22.n", - "3.1.23.-", - "3.1.23.n", - "3.1.24.-", - "3.1.24.n", - "3.1.25.-", - "3.1.25.n", - "3.1.26.-", - "3.1.26.n", - "3.1.27.-", - "3.1.27.n", - "3.1.30.-", - "3.1.30.n", - "3.1.31.-", - "3.1.31.n", - "3.1.n.n", - "3.2.-.-", - "3.2.1.-", - "3.2.1.n", - "3.2.2.-", - "3.2.2.n", - "3.2.3.-", - "3.2.3.n", - "3.2.n.n", - "3.3.-.-", - "3.3.1.-", - "3.3.1.n", - "3.3.2.-", - "3.3.2.n", - "3.3.n.n", - "3.4.-.-", - "3.4.1.-", - "3.4.1.n", - "3.4.2.-", - "3.4.2.n", - "3.4.3.-", - "3.4.3.n", - "3.4.4.-", - "3.4.4.n", - "3.4.11.-", - "3.4.11.n", - "3.4.12.-", - "3.4.12.n", - "3.4.13.-", - "3.4.13.n", - "3.4.14.-", - "3.4.14.n", - "3.4.15.-", - "3.4.15.n", - "3.4.16.-", - "3.4.16.n", - "3.4.17.-", - "3.4.17.n", - "3.4.18.-", - "3.4.18.n", - "3.4.19.-", - "3.4.19.n", - "3.4.21.-", - "3.4.21.n", - "3.4.22.-", - "3.4.22.n", - "3.4.23.-", - "3.4.23.n", - "3.4.24.-", - "3.4.24.n", - "3.4.25.-", - "3.4.25.n", - "3.4.99.-", - "3.4.99.n", - "3.4.n.n", - "3.5.-.-", - "3.5.1.-", - "3.5.1.n", - "3.5.2.-", - "3.5.2.n", - "3.5.3.-", - "3.5.3.n", - "3.5.4.-", - "3.5.4.n", - "3.5.5.-", - "3.5.5.n", - "3.5.99.-", - "3.5.99.n", - "3.5.n.n", - "3.6.-.-", - "3.6.1.-", - "3.6.1.n", - "3.6.2.-", - "3.6.2.n", - "3.6.3.-", - "3.6.3.n", - "3.6.4.-", - "3.6.4.n", - "3.6.5.-", - "3.6.5.n", - "3.6.n.n", - "3.7.-.-", - "3.7.1.-", - "3.7.1.n", - "3.7.n.n", - "3.8.-.-", - "3.8.1.-", - "3.8.1.n", - "3.8.2.-", - "3.8.2.n", - "3.8.n.n", - "3.9.-.-", - "3.9.1.-", - "3.9.1.n", - "3.9.n.n", - "3.10.-.-", - "3.10.1.-", - "3.10.1.n", - "3.10.n.n", - "3.11.-.-", - "3.11.1.-", - "3.11.1.n", - "3.11.n.n", - "3.12.-.-", - "3.12.1.-", - "3.12.1.n", - "3.12.n.n", - "3.13.-.-", - "3.13.1.-", - "3.13.1.n", - "3.13.n.n", - "3.n.n.n", - "4.-.-.-", - "4.1.-.-", - "4.1.1.-", - "4.1.1.n", - "4.1.2.-", - "4.1.2.n", - "4.1.3.-", - "4.1.3.n", - "4.1.99.-", - "4.1.99.n", - "4.1.n.n", - "4.2.-.-", - "4.2.1.-", - "4.2.1.n", - "4.2.2.-", - "4.2.2.n", - "4.2.3.-", - "4.2.3.n", - "4.2.99.-", - "4.2.99.n", - "4.2.n.n", - "4.3.-.-", - "4.3.1.-", - "4.3.1.n", - "4.3.2.-", - "4.3.2.n", - "4.3.3.-", - "4.3.3.n", - "4.3.99.-", - "4.3.99.n", - "4.3.n.n", - "4.4.-.-", - "4.4.1.-", - "4.4.1.n", - "4.4.n.n", - "4.5.-.-", - "4.5.1.-", - "4.5.1.n", - "4.5.n.n", - "4.6.-.-", - "4.6.1.-", - "4.6.1.n", - "4.6.n.n", - "4.99.-.-", - "4.99.1.-", - "4.99.1.n", - "4.99.n.n", - "4.n.n.n", - "5.-.-.-", - "5.1.-.-", - "5.1.1.-", - "5.1.1.n", - "5.1.2.-", - "5.1.2.n", - "5.1.3.-", - "5.1.3.n", - "5.1.99.-", - "5.1.99.n", - "5.1.n.n", - "5.2.-.-", - "5.2.1.-", - "5.2.1.n", - "5.2.n.n", - "5.3.-.-", - "5.3.1.-", - "5.3.1.n", - "5.3.2.-", - "5.3.2.n", - "5.3.3.-", - "5.3.3.n", - "5.3.4.-", - "5.3.4.n", - "5.3.99.-", - "5.3.99.n", - "5.3.n.n", - "5.4.-.-", - "5.4.1.-", - "5.4.1.n", - "5.4.2.-", - "5.4.2.n", - "5.4.3.-", - "5.4.3.n", - "5.4.4.-", - "5.4.4.n", - "5.4.99.-", - "5.4.99.n", - "5.4.n.n", - "5.5.-.-", - "5.5.1.-", - "5.5.1.n", - "5.5.n.n", - "5.99.-.-", - "5.99.1.-", - "5.99.1.n", - "5.99.n.n", - "5.n.n.n", - "6.-.-.-", - "6.1.-.-", - "6.1.1.-", - "6.1.1.n", - "6.1.n.n", - "6.2.-.-", - "6.2.1.-", - "6.2.1.n", - "6.2.n.n", - "6.3.-.-", - "6.3.1.-", - "6.3.1.n", - "6.3.2.-", - "6.3.2.n", - "6.3.3.-", - "6.3.3.n", - "6.3.4.-", - "6.3.4.n", - "6.3.5.-", - "6.3.5.n", - "6.3.n.n", - "6.4.-.-", - "6.4.1.-", - "6.4.1.n", - "6.4.n.n", - "6.5.-.-", - "6.5.1.-", - "6.5.1.n", - "6.5.n.n", - "6.6.-.-", - "6.6.1.-", - "6.6.1.n", - "6.6.n.n", - "6.n.n.n" + "1.-.-.- Oxidoreductases", + "1.n.n.n Oxidoreductases", + "1.1.-.- Acting on the CH-OH group of donors", + "1.1.n.n Acting on the CH-OH group of donors", + "1.1.1.- With NAD(+) or NADP(+) as acceptor", + "1.1.1.n With NAD(+) or NADP(+) as acceptor", + "1.1.2.- With a cytochrome as acceptor", + "1.1.2.n With a cytochrome as acceptor", + "1.1.3.- With oxygen as acceptor", + "1.1.3.n With oxygen as acceptor", + "1.1.4.- With a disulfide as acceptor", + "1.1.4.n With a disulfide as acceptor", + "1.1.5.- With a quinone or similar compound as acceptor", + "1.1.5.n With a quinone or similar compound as acceptor", + "1.1.98.- With other, known, acceptors", + "1.1.98.n With other, known, acceptors", + "1.1.99.- With other acceptors", + "1.1.99.n With other acceptors", + "1.2.-.- Acting on the aldehyde or oxo group of donors", + "1.2.n.n Acting on the aldehyde or oxo group of donors", + "1.2.1.- With NAD(+) or NADP(+) as acceptor", + "1.2.1.n With NAD(+) or NADP(+) as acceptor", + "1.2.2.- With a cytochrome as acceptor", + "1.2.2.n With a cytochrome as acceptor", + "1.2.3.- With oxygen as acceptor", + "1.2.3.n With oxygen as acceptor", + "1.2.4.- With a disulfide as acceptor", + "1.2.4.n With a disulfide as acceptor", + "1.2.5.- With a quinone or similar compound as acceptor", + "1.2.5.n With a quinone or similar compound as acceptor", + "1.2.7.- With an iron-sulfur protein as acceptor", + "1.2.7.n With an iron-sulfur protein as acceptor", + "1.2.99.- With other acceptors", + "1.2.99.n With other acceptors", + "1.3.-.- Acting on the CH-CH group of donors", + "1.3.n.n Acting on the CH-CH group of donors", + "1.3.1.- With NAD(+) or NADP(+) as acceptor", + "1.3.1.n With NAD(+) or NADP(+) as acceptor", + "1.3.2.- With a cytochrome as acceptor", + "1.3.2.n With a cytochrome as acceptor", + "1.3.3.- With oxygen as acceptor", + "1.3.3.n With oxygen as acceptor", + "1.3.5.- With a quinone or related compound as acceptor", + "1.3.5.n With a quinone or related compound as acceptor", + "1.3.7.- With an iron-sulfur protein as acceptor", + "1.3.7.n With an iron-sulfur protein as acceptor", + "1.3.99.- With other acceptors", + "1.3.99.n With other acceptors", + "1.4.-.- Acting on the CH-NH(2) group of donors", + "1.4.n.n Acting on the CH-NH(2) group of donors", + "1.4.1.- With NAD(+) or NADP(+) as acceptor", + "1.4.1.n With NAD(+) or NADP(+) as acceptor", + "1.4.2.- With a cytochrome as acceptor", + "1.4.2.n With a cytochrome as acceptor", + "1.4.3.- With oxygen as acceptor", + "1.4.3.n With oxygen as acceptor", + "1.4.4.- With a disulfide as acceptor", + "1.4.4.n With a disulfide as acceptor", + "1.4.7.- With an iron-sulfur protein as acceptor", + "1.4.7.n With an iron-sulfur protein as acceptor", + "1.4.99.- With other acceptors", + "1.4.99.n With other acceptors", + "1.5.-.- Acting on the CH-NH group of donors", + "1.5.n.n Acting on the CH-NH group of donors", + "1.5.1.- With NAD(+) or NADP(+) as acceptor", + "1.5.1.n With NAD(+) or NADP(+) as acceptor", + "1.5.3.- With oxygen as acceptor", + "1.5.3.n With oxygen as acceptor", + "1.5.4.- With a disulfide as acceptor", + "1.5.4.n With a disulfide as acceptor", + "1.5.5.- With a quinone or similar compound as acceptor", + "1.5.5.n With a quinone or similar compound as acceptor", + "1.5.7.- With an iron-sulfur protein as acceptor", + "1.5.7.n With an iron-sulfur protein as acceptor", + "1.5.8.- With a flavin as acceptor", + "1.5.8.n With a flavin as acceptor", + "1.5.99.- With other acceptors", + "1.5.99.n With other acceptors", + "1.6.-.- Acting on NADH or NADPH", + "1.6.n.n Acting on NADH or NADPH", + "1.6.1.- With NAD(+) or NADP(+) as acceptor", + "1.6.1.n With NAD(+) or NADP(+) as acceptor", + "1.6.2.- With a heme protein as acceptor", + "1.6.2.n With a heme protein as acceptor", + "1.6.3.- With a oxygen as acceptor", + "1.6.3.n With a oxygen as acceptor", + "1.6.5.- With a quinone or similar compound as acceptor", + "1.6.5.n With a quinone or similar compound as acceptor", + "1.6.6.- With a nitrogenous group as acceptor", + "1.6.6.n With a nitrogenous group as acceptor", + "1.6.99.- With other acceptors", + "1.6.99.n With other acceptors", + "1.7.-.- Acting on other nitrogenous compounds as donors", + "1.7.n.n Acting on other nitrogenous compounds as donors", + "1.7.1.- With NAD(+) or NADP(+) as acceptor", + "1.7.1.n With NAD(+) or NADP(+) as acceptor", + "1.7.2.- With a cytochrome as acceptor", + "1.7.2.n With a cytochrome as acceptor", + "1.7.3.- With oxygen as acceptor", + "1.7.3.n With oxygen as acceptor", + "1.7.7.- With an iron-sulfur protein as acceptor", + "1.7.7.n With an iron-sulfur protein as acceptor", + "1.7.99.- With other acceptors", + "1.7.99.n With other acceptors", + "1.8.-.- Acting on a sulfur group of donors", + "1.8.n.n Acting on a sulfur group of donors", + "1.8.1.- With NAD(+) or NADP(+) as acceptor", + "1.8.1.n With NAD(+) or NADP(+) as acceptor", + "1.8.2.- With a cytochrome as acceptor", + "1.8.2.n With a cytochrome as acceptor", + "1.8.3.- With oxygen as acceptor", + "1.8.3.n With oxygen as acceptor", + "1.8.4.- With a disulfide as acceptor", + "1.8.4.n With a disulfide as acceptor", + "1.8.5.- With a quinone or similar compound as acceptor", + "1.8.5.n With a quinone or similar compound as acceptor", + "1.8.7.- With an iron-sulfur protein as acceptor", + "1.8.7.n With an iron-sulfur protein as acceptor", + "1.8.98.- With other, known, acceptors", + "1.8.98.n With other, known, acceptors", + "1.8.99.- With other acceptors", + "1.8.99.n With other acceptors", + "1.9.-.- Acting on a heme group of donors", + "1.9.n.n Acting on a heme group of donors", + "1.9.3.- With oxygen as acceptor", + "1.9.3.n With oxygen as acceptor", + "1.9.6.- With a nitrogenous group as acceptor", + "1.9.6.n With a nitrogenous group as acceptor", + "1.9.99.- With other acceptors", + "1.9.99.n With other acceptors", + "1.10.-.- Acting on diphenols and related substances as donors", + "1.10.n.n Acting on diphenols and related substances as donors", + "1.10.1.- With NAD(+) or NADP(+) as acceptor", + "1.10.1.n With NAD(+) or NADP(+) as acceptor", + "1.10.2.- With a cytochrome as acceptor", + "1.10.2.n With a cytochrome as acceptor", + "1.10.3.- With oxygen as acceptor", + "1.10.3.n With oxygen as acceptor", + "1.11.-.- Acting on a peroxide as acceptor", + "1.11.n.n Acting on a peroxide as acceptor", + "1.11.1.- Peroxidases", + "1.11.1.n Peroxidases", + "1.11.2.- With H(2)O(2) as acceptor, one oxygen atom of which is incorporated into the product", + "1.11.2.n With H(2)O(2) as acceptor, one oxygen atom of which is incorporated into the product", + "1.12.-.- Acting on hydrogen as donor", + "1.12.n.n Acting on hydrogen as donor", + "1.12.1.- With NAD(+) or NADP(+) as acceptor", + "1.12.1.n With NAD(+) or NADP(+) as acceptor", + "1.12.2.- With a cytochrome as acceptor", + "1.12.2.n With a cytochrome as acceptor", + "1.12.5.- With a quinone or similar compound as acceptor", + "1.12.5.n With a quinone or similar compound as acceptor", + "1.12.7.- With an iron-sulfur protein as acceptor", + "1.12.7.n With an iron-sulfur protein as acceptor", + "1.13.-.- Acting on single donors with incorporation of molecular oxygen", + "1.13.n.n Acting on single donors with incorporation of molecular oxygen", + "1.14.-.- Acting on paired donors, with incorporation or reduction of molecular oxygen", + "1.14.n.n Acting on paired donors, with incorporation or reduction of molecular oxygen", + "1.15.-.- Acting on superoxide as acceptor", + "1.15.n.n Acting on superoxide as acceptor", + "1.15.1.- Acting on superoxide as acceptor", + "1.15.1.n Acting on superoxide as acceptor", + "1.16.-.- Oxidizing metal ions", + "1.16.n.n Oxidizing metal ions", + "1.16.1.- With NAD(+) or NADP(+) as acceptor", + "1.16.1.n With NAD(+) or NADP(+) as acceptor", + "1.16.3.- With oxygen as acceptor", + "1.16.3.n With oxygen as acceptor", + "1.16.8.- With flavin as acceptor", + "1.16.8.n With flavin as acceptor", + "1.17.-.- Acting on CH or CH(2) groups", + "1.17.n.n Acting on CH or CH(2) groups", + "1.17.1.- With NAD(+) or NADP(+) as acceptor", + "1.17.1.n With NAD(+) or NADP(+) as acceptor", + "1.17.2.- With a cytochrome as acceptor", + "1.17.2.n With a cytochrome as acceptor", + "1.17.3.- With oxygen as acceptor", + "1.17.3.n With oxygen as acceptor", + "1.17.4.- With a disulfide as acceptor", + "1.17.4.n With a disulfide as acceptor", + "1.17.5.- With a quinone or similar compound as acceptor", + "1.17.5.n With a quinone or similar compound as acceptor", + "1.17.7.- With an iron-sulfur protein as acceptor", + "1.17.7.n With an iron-sulfur protein as acceptor", + "1.18.-.- Acting on iron-sulfur proteins as donors", + "1.18.n.n Acting on iron-sulfur proteins as donors", + "1.18.1.- With NAD(+) or NADP(+) as acceptor", + "1.18.1.n With NAD(+) or NADP(+) as acceptor", + "1.18.6.- With dinitrogen as acceptor", + "1.18.6.n With dinitrogen as acceptor", + "1.19.-.- Acting on reduced flavodoxin as donor", + "1.19.n.n Acting on reduced flavodoxin as donor", + "1.19.6.- With dinitrogen as acceptor", + "1.19.6.n With dinitrogen as acceptor", + "1.20.-.- Acting on phosphorus or arsenic in donors", + "1.20.n.n Acting on phosphorus or arsenic in donors", + "1.20.1.- Acting on phosphorus or arsenic in donors, with NAD(P)(+) as acceptor", + "1.20.1.n Acting on phosphorus or arsenic in donors, with NAD(P)(+) as acceptor", + "1.20.4.- Acting on phosphorus or arsenic in donors, with disulfide as acceptor", + "1.20.4.n Acting on phosphorus or arsenic in donors, with disulfide as acceptor", + "1.21.-.- Acting on x-H and y-H to form an x-y bond", + "1.21.n.n Acting on x-H and y-H to form an x-y bond", + "1.21.3.- With oxygen as acceptor", + "1.21.3.n With oxygen as acceptor", + "1.21.4.- With a disulfide as acceptor", + "1.21.4.n With a disulfide as acceptor", + "1.97.-.- Other oxidoreductases", + "1.97.n.n Other oxidoreductases", + "1.97.1.- Sole sub-subclass for oxidoreductases that do not belong in the other subclasses", + "1.97.1.n Sole sub-subclass for oxidoreductases that do not belong in the other subclasses", + "2.-.-.- Transferases", + "2.n.n.n Transferases", + "2.1.-.- Transferring one-carbon groups", + "2.1.n.n Transferring one-carbon groups", + "2.1.1.- Methyltransferases", + "2.1.1.n Methyltransferases", + "2.1.2.- Hydroxymethyl-, formyl- and related transferases", + "2.1.2.n Hydroxymethyl-, formyl- and related transferases", + "2.1.3.- Carboxyl- and carbamoyltransferases", + "2.1.3.n Carboxyl- and carbamoyltransferases", + "2.1.4.- Amidinotransferases", + "2.1.4.n Amidinotransferases", + "2.2.-.- Transferring aldehyde or ketone residues", + "2.2.n.n Transferring aldehyde or ketone residues", + "2.2.1.- Transketolases and transaldolases", + "2.2.1.n Transketolases and transaldolases", + "2.3.-.- Acyltransferases", + "2.3.n.n Acyltransferases", + "2.3.1.- Transferring groups other than amino-acyl groups", + "2.3.1.n Transferring groups other than amino-acyl groups", + "2.3.2.- Aminoacyltransferases", + "2.3.2.n Aminoacyltransferases", + "2.3.3.- Acyl groups converted into alkyl on transfer", + "2.3.3.n Acyl groups converted into alkyl on transfer", + "2.4.-.- Glycosyltransferases", + "2.4.n.n Glycosyltransferases", + "2.4.1.- Hexosyltransferases", + "2.4.1.n Hexosyltransferases", + "2.4.2.- Pentosyltransferases", + "2.4.2.n Pentosyltransferases", + "2.4.99.- Transferring other glycosyl groups", + "2.4.99.n Transferring other glycosyl groups", + "2.5.-.- Transferring alkyl or aryl groups, other than methyl groups", + "2.5.n.n Transferring alkyl or aryl groups, other than methyl groups", + "2.5.1.- Transferring alkyl or aryl groups, other than methyl groups", + "2.5.1.n Transferring alkyl or aryl groups, other than methyl groups", + "2.6.-.- Transferring nitrogenous groups", + "2.6.n.n Transferring nitrogenous groups", + "2.6.1.- Transaminases (aminotransferases)", + "2.6.1.n Transaminases (aminotransferases)", + "2.6.3.- Oximinotransferases", + "2.6.3.n Oximinotransferases", + "2.6.99.- Transferring other nitrogenous groups", + "2.6.99.n Transferring other nitrogenous groups", + "2.7.-.- Transferring phosphorous-containing groups", + "2.7.n.n Transferring phosphorous-containing groups", + "2.7.1.- Phosphotransferases with an alcohol group as acceptor", + "2.7.1.n Phosphotransferases with an alcohol group as acceptor", + "2.7.2.- Phosphotransferases with a carboxyl group as acceptor", + "2.7.2.n Phosphotransferases with a carboxyl group as acceptor", + "2.7.3.- Phosphotransferases with a nitrogenous group as acceptor", + "2.7.3.n Phosphotransferases with a nitrogenous group as acceptor", + "2.7.4.- Phosphotransferases with a phosphate group as acceptor", + "2.7.4.n Phosphotransferases with a phosphate group as acceptor", + "2.7.6.- Diphosphotransferases", + "2.7.6.n Diphosphotransferases", + "2.7.7.- Nucleotidyltransferases", + "2.7.7.n Nucleotidyltransferases", + "2.7.8.- Transferases for other substituted phosphate groups", + "2.7.8.n Transferases for other substituted phosphate groups", + "2.7.9.- Phosphotransferases with paired acceptors", + "2.7.9.n Phosphotransferases with paired acceptors", + "2.7.10.- Protein-tyrosine kinases", + "2.7.10.n Protein-tyrosine kinases", + "2.7.11.- Protein-serine/threonine kinases", + "2.7.11.n Protein-serine/threonine kinases", + "2.7.12.- Dual-specificity kinases (those acting on Ser/Thr and Tyr residues)", + "2.7.12.n Dual-specificity kinases (those acting on Ser/Thr and Tyr residues)", + "2.7.13.- Protein-histidine kinases", + "2.7.13.n Protein-histidine kinases", + "2.7.99.- Other protein kinases", + "2.7.99.n Other protein kinases", + "2.8.-.- Transferring sulfur-containing groups", + "2.8.n.n Transferring sulfur-containing groups", + "2.8.1.- Sulfurtransferases", + "2.8.1.n Sulfurtransferases", + "2.8.2.- Sulfotransferases", + "2.8.2.n Sulfotransferases", + "2.8.3.- CoA-transferases", + "2.8.3.n CoA-transferases", + "2.8.4.- Transferring alkylthio groups", + "2.8.4.n Transferring alkylthio groups", + "2.9.-.- Transferring selenium-containing groups", + "2.9.n.n Transferring selenium-containing groups", + "2.9.1.- Selenotransferases", + "2.9.1.n Selenotransferases", + "3.-.-.- Hydrolases", + "3.n.n.n Hydrolases", + "3.1.-.- Acting on ester bonds", + "3.1.n.n Acting on ester bonds", + "3.1.1.- Carboxylic ester hydrolases", + "3.1.1.n Carboxylic ester hydrolases", + "3.1.2.- Thiolester hydrolases", + "3.1.2.n Thiolester hydrolases", + "3.1.3.- Phosphoric monoester hydrolases", + "3.1.3.n Phosphoric monoester hydrolases", + "3.1.4.- Phosphoric diester hydrolases", + "3.1.4.n Phosphoric diester hydrolases", + "3.1.5.- Triphosphoric monoester hydrolases", + "3.1.5.n Triphosphoric monoester hydrolases", + "3.1.6.- Sulfuric ester hydrolases", + "3.1.6.n Sulfuric ester hydrolases", + "3.1.7.- Diphosphoric monoester hydrolases", + "3.1.7.n Diphosphoric monoester hydrolases", + "3.1.8.- Phosphoric triester hydrolases", + "3.1.8.n Phosphoric triester hydrolases", + "3.1.11.- Exodeoxyribonucleases producing 5'-phosphomonoesters", + "3.1.11.n Exodeoxyribonucleases producing 5'-phosphomonoesters", + "3.1.13.- Exoribonucleases producing 5'-phosphomonoesters", + "3.1.13.n Exoribonucleases producing 5'-phosphomonoesters", + "3.1.14.- Exoribonucleases producing 3'-phosphomonoesters", + "3.1.14.n Exoribonucleases producing 3'-phosphomonoesters", + "3.1.15.- Exonucleases active with either ribo- or deoxyribonucleic acid and producing 5'-phosphomonoesters", + "3.1.15.n Exonucleases active with either ribo- or deoxyribonucleic acid and producing 5'-phosphomonoesters", + "3.1.16.- Exonucleases active with either ribo- or deoxyribonucleic acid and producing 3'-phosphomonoesters", + "3.1.16.n Exonucleases active with either ribo- or deoxyribonucleic acid and producing 3'-phosphomonoesters", + "3.1.21.- Endodeoxyribonucleases producing 5'-phosphomonoesters", + "3.1.21.n Endodeoxyribonucleases producing 5'-phosphomonoesters", + "3.1.22.- Endodeoxyribonucleases producing other than 5'-phosphomonoesters", + "3.1.22.n Endodeoxyribonucleases producing other than 5'-phosphomonoesters", + "3.1.25.- Site-specific endodeoxyribonucleases specific for altered bases", + "3.1.25.n Site-specific endodeoxyribonucleases specific for altered bases", + "3.1.26.- Endoribonucleases producing 5'-phosphomonoesters", + "3.1.26.n Endoribonucleases producing 5'-phosphomonoesters", + "3.1.27.- Endoribonucleases producing other than 5'-phosphomonoesters", + "3.1.27.n Endoribonucleases producing other than 5'-phosphomonoesters", + "3.1.30.- Endoribonucleases active with either ribo- or deoxyribonucleic acid and producing 5'-phosphomonoesters", + "3.1.30.n Endoribonucleases active with either ribo- or deoxyribonucleic acid and producing 5'-phosphomonoesters", + "3.1.31.- Endoribonucleases active with either ribo- or deoxyribonucleic acid and producing 3'-phosphomonoesters", + "3.1.31.n Endoribonucleases active with either ribo- or deoxyribonucleic acid and producing 3'-phosphomonoesters", + "3.2.-.- Glycosylases", + "3.2.n.n Glycosylases", + "3.2.1.- Glycosidases, i.e. enzymes hydrolyzing O- and S-glycosyl compounds", + "3.2.1.n Glycosidases, i.e. enzymes hydrolyzing O- and S-glycosyl compounds", + "3.2.2.- Hydrolyzing N-glycosyl compounds", + "3.2.2.n Hydrolyzing N-glycosyl compounds", + "3.3.-.- Acting on ether bonds", + "3.3.n.n Acting on ether bonds", + "3.3.1.- Thioether and trialkylsulfonium hydrolases", + "3.3.1.n Thioether and trialkylsulfonium hydrolases", + "3.3.2.- Ether hydrolases", + "3.3.2.n Ether hydrolases", + "3.4.-.- Acting on peptide bonds (peptide hydrolases)", + "3.4.n.n Acting on peptide bonds (peptide hydrolases)", + "3.4.11.- Aminopeptidases", + "3.4.11.n Aminopeptidases", + "3.4.13.- Dipeptidases", + "3.4.13.n Dipeptidases", + "3.4.14.- Dipeptidyl-peptidases and tripeptidyl-peptidases", + "3.4.14.n Dipeptidyl-peptidases and tripeptidyl-peptidases", + "3.4.15.- Peptidyl-dipeptidases", + "3.4.15.n Peptidyl-dipeptidases", + "3.4.16.- Serine-type carboxypeptidases", + "3.4.16.n Serine-type carboxypeptidases", + "3.4.17.- Metallocarboxypeptidases", + "3.4.17.n Metallocarboxypeptidases", + "3.4.18.- Cysteine-type carboxypeptidases", + "3.4.18.n Cysteine-type carboxypeptidases", + "3.4.19.- Omega peptidases", + "3.4.19.n Omega peptidases", + "3.4.21.- Serine endopeptidases", + "3.4.21.n Serine endopeptidases", + "3.4.22.- Cysteine endopeptidases", + "3.4.22.n Cysteine endopeptidases", + "3.4.23.- Aspartic endopeptidases", + "3.4.23.n Aspartic endopeptidases", + "3.4.24.- Metalloendopeptidases", + "3.4.24.n Metalloendopeptidases", + "3.4.25.- Threonine endopeptidases", + "3.4.25.n Threonine endopeptidases", + "3.4.99.- Endopeptidases of unknown catalytic mechanism", + "3.4.99.n Endopeptidases of unknown catalytic mechanism", + "3.5.-.- Acting on carbon-nitrogen bonds, other than peptide bonds", + "3.5.n.n Acting on carbon-nitrogen bonds, other than peptide bonds", + "3.5.1.- In linear amides", + "3.5.1.n In linear amides", + "3.5.2.- In cyclic amides", + "3.5.2.n In cyclic amides", + "3.5.3.- In linear amidines", + "3.5.3.n In linear amidines", + "3.5.4.- In cyclic amidines", + "3.5.4.n In cyclic amidines", + "3.5.5.- In nitriles", + "3.5.5.n In nitriles", + "3.5.99.- In other compounds", + "3.5.99.n In other compounds", + "3.6.-.- Acting on acid anhydrides", + "3.6.n.n Acting on acid anhydrides", + "3.6.1.- In phosphorous-containing anhydrides", + "3.6.1.n In phosphorous-containing anhydrides", + "3.6.2.- In sulfonyl-containing anhydrides", + "3.6.2.n In sulfonyl-containing anhydrides", + "3.6.3.- Acting on acid anhydrides; catalyzing transmembrane movement of substances", + "3.6.3.n Acting on acid anhydrides; catalyzing transmembrane movement of substances", + "3.6.4.- Acting on acid anhydrides; involved in cellular and subcellular movement", + "3.6.4.n Acting on acid anhydrides; involved in cellular and subcellular movement", + "3.6.5.- Acting on GTP; involved in cellular and subcellular movement", + "3.6.5.n Acting on GTP; involved in cellular and subcellular movement", + "3.7.-.- Acting on carbon-carbon bonds", + "3.7.n.n Acting on carbon-carbon bonds", + "3.7.1.- In ketonic substances", + "3.7.1.n In ketonic substances", + "3.8.-.- Acting on halide bonds", + "3.8.n.n Acting on halide bonds", + "3.8.1.- In C-halide compounds", + "3.8.1.n In C-halide compounds", + "3.9.-.- Acting on phosphorus-nitrogen bonds", + "3.9.n.n Acting on phosphorus-nitrogen bonds", + "3.9.1.- Acting on phosphorus-nitrogen bonds", + "3.9.1.n Acting on phosphorus-nitrogen bonds", + "3.10.-.- Acting on sulfur-nitrogen bonds", + "3.10.n.n Acting on sulfur-nitrogen bonds", + "3.10.1.- Acting on sulfur-nitrogen bonds", + "3.10.1.n Acting on sulfur-nitrogen bonds", + "3.11.-.- Acting on carbon-phosphorus bonds", + "3.11.n.n Acting on carbon-phosphorus bonds", + "3.11.1.- Acting on carbon-phosphorus bonds", + "3.11.1.n Acting on carbon-phosphorus bonds", + "3.12.-.- Acting on sulfur-sulfur bonds", + "3.12.n.n Acting on sulfur-sulfur bonds", + "3.12.1.- Acting on sulfur-sulfur bonds", + "3.12.1.n Acting on sulfur-sulfur bonds", + "3.13.-.- Acting on carbon-sulfur bonds", + "3.13.n.n Acting on carbon-sulfur bonds", + "3.13.1.- Acting on carbon-sulfur bonds", + "3.13.1.n Acting on carbon-sulfur bonds", + "4.-.-.- Lyases", + "4.n.n.n Lyases", + "4.1.-.- Carbon-carbon lyases", + "4.1.n.n Carbon-carbon lyases", + "4.1.1.- Carboxy-lyases", + "4.1.1.n Carboxy-lyases", + "4.1.2.- Aldehyde-lyases", + "4.1.2.n Aldehyde-lyases", + "4.1.3.- Oxo-acid-lyases", + "4.1.3.n Oxo-acid-lyases", + "4.1.99.- Other carbon-carbon lyases", + "4.1.99.n Other carbon-carbon lyases", + "4.2.-.- Carbon-oxygen lyases", + "4.2.n.n Carbon-oxygen lyases", + "4.2.1.- Hydro-lyases", + "4.2.1.n Hydro-lyases", + "4.2.2.- Acting on polysaccharides", + "4.2.2.n Acting on polysaccharides", + "4.2.3.- Acting on phosphates", + "4.2.3.n Acting on phosphates", + "4.2.99.- Other carbon-oxygen lyases", + "4.2.99.n Other carbon-oxygen lyases", + "4.3.-.- Carbon-nitrogen lyases", + "4.3.n.n Carbon-nitrogen lyases", + "4.3.1.- Ammonia-lyases", + "4.3.1.n Ammonia-lyases", + "4.3.2.- Lyases acting on amides, amidines, etc", + "4.3.2.n Lyases acting on amides, amidines, etc", + "4.3.3.- Amine-lyases", + "4.3.3.n Amine-lyases", + "4.4.-.- Carbon-sulfur lyases", + "4.4.n.n Carbon-sulfur lyases", + "4.4.1.- Carbon-sulfur lyases", + "4.4.1.n Carbon-sulfur lyases", + "4.5.-.- Carbon-halide lyases", + "4.5.n.n Carbon-halide lyases", + "4.5.1.- Carbon-halide lyases", + "4.5.1.n Carbon-halide lyases", + "4.6.-.- Phosphorus-oxygen lyases", + "4.6.n.n Phosphorus-oxygen lyases", + "4.6.1.- Phosphorus-oxygen lyases", + "4.6.1.n Phosphorus-oxygen lyases", + "4.99.-.- Other lyases", + "4.99.n.n Other lyases", + "4.99.1.- Sole sub-subclass for lyases that do not belong in the other subclasses", + "4.99.1.n Sole sub-subclass for lyases that do not belong in the other subclasses", + "5.-.-.- Isomerases", + "5.n.n.n Isomerases", + "5.1.-.- Racemases and epimerases", + "5.1.n.n Racemases and epimerases", + "5.1.1.- Acting on amino acids and derivatives", + "5.1.1.n Acting on amino acids and derivatives", + "5.1.2.- Acting on hydroxy acids and derivatives", + "5.1.2.n Acting on hydroxy acids and derivatives", + "5.1.3.- Acting on carbohydrates and derivatives", + "5.1.3.n Acting on carbohydrates and derivatives", + "5.1.99.- Acting on other compounds", + "5.1.99.n Acting on other compounds", + "5.2.-.- Cis-trans-isomerases", + "5.2.n.n Cis-trans-isomerases", + "5.2.1.- Cis-trans Isomerases", + "5.2.1.n Cis-trans Isomerases", + "5.3.-.- Intramolecular oxidoreductases", + "5.3.n.n Intramolecular oxidoreductases", + "5.3.1.- Interconverting aldoses and ketoses, and related compounds", + "5.3.1.n Interconverting aldoses and ketoses, and related compounds", + "5.3.2.- Interconverting keto- and enol- groups", + "5.3.2.n Interconverting keto- and enol- groups", + "5.3.3.- Transposing C==C bonds", + "5.3.3.n Transposing C==C bonds", + "5.3.4.- Transposing S-S bonds", + "5.3.4.n Transposing S-S bonds", + "5.3.99.- Other intramolecular oxidoreductases", + "5.3.99.n Other intramolecular oxidoreductases", + "5.4.-.- Intramolecular transferases (mutases)", + "5.4.n.n Intramolecular transferases (mutases)", + "5.4.1.- Transferring acyl groups", + "5.4.1.n Transferring acyl groups", + "5.4.2.- Phosphotransferases (phosphomutases)", + "5.4.2.n Phosphotransferases (phosphomutases)", + "5.4.3.- Transferring amino groups", + "5.4.3.n Transferring amino groups", + "5.4.4.- Transferring hydroxy groups", + "5.4.4.n Transferring hydroxy groups", + "5.4.99.- Transferring other groups", + "5.4.99.n Transferring other groups", + "5.5.-.- Intramolecular lyases", + "5.5.n.n Intramolecular lyases", + "5.5.1.- Intramolecular lyases", + "5.5.1.n Intramolecular lyases", + "5.99.-.- Other isomerases", + "5.99.n.n Other isomerases", + "5.99.1.- Sole sub-subclass for isomerases that do not belong in the other subclasses", + "5.99.1.n Sole sub-subclass for isomerases that do not belong in the other subclasses", + "6.-.-.- Ligases", + "6.n.n.n Ligases", + "6.1.-.- Forming carbon-oxygen bonds", + "6.1.n.n Forming carbon-oxygen bonds", + "6.1.1.- Ligases forming aminoacyl-tRNA and related compounds", + "6.1.1.n Ligases forming aminoacyl-tRNA and related compounds", + "6.2.-.- Forming carbon-sulfur bonds", + "6.2.n.n Forming carbon-sulfur bonds", + "6.2.1.- Acid--thiol ligases", + "6.2.1.n Acid--thiol ligases", + "6.3.-.- Forming carbon-nitrogen bonds", + "6.3.n.n Forming carbon-nitrogen bonds", + "6.3.1.- Acid--ammonia (or amide) ligases (amide synthases)", + "6.3.1.n Acid--ammonia (or amide) ligases (amide synthases)", + "6.3.2.- Acid--D-amino-acid ligases (peptide synthases)", + "6.3.2.n Acid--D-amino-acid ligases (peptide synthases)", + "6.3.3.- Cyclo-ligases", + "6.3.3.n Cyclo-ligases", + "6.3.4.- Other carbon--nitrogen ligases", + "6.3.4.n Other carbon--nitrogen ligases", + "6.3.5.- Carbon--nitrogen ligases with glutamine as amido-N-donor", + "6.3.5.n Carbon--nitrogen ligases with glutamine as amido-N-donor", + "6.4.-.- Forming carbon-carbon bonds", + "6.4.n.n Forming carbon-carbon bonds", + "6.4.1.- Ligases that form carbon-carbon bonds", + "6.4.1.n Ligases that form carbon-carbon bonds", + "6.5.-.- Forming phosphoric ester bonds", + "6.5.n.n Forming phosphoric ester bonds", + "6.5.1.- Ligases that form phosphoric-ester bonds", + "6.5.1.n Ligases that form phosphoric-ester bonds", + "6.6.-.- Forming nitrogen-metal bonds", + "6.6.n.n Forming nitrogen-metal bonds", + "6.6.1.- Forming coordination complexes", + "6.6.1.n Forming coordination complexes" }; diff --git a/api/ecnum_deleted.inc b/api/ecnum_deleted.inc index 4a6b6a9e..56fca97e 100644 --- a/api/ecnum_deleted.inc +++ b/api/ecnum_deleted.inc @@ -1,4 +1,4 @@ -/* $Id: ecnum_deleted.inc,v 1.1 2010/07/09 16:36:07 ucko Exp $ +/* $Id: ecnum_deleted.inc,v 1.3 2011/06/30 16:04:31 kazimird Exp $ * =========================================================================== * * PUBLIC DOMAIN NOTICE @@ -32,39 +32,31 @@ static const char* const kECNum_deleted[] = { "1.1.1.74", - "1.1.1.155", - "1.1.1.249", - "1.1.3.25", + "1.1.1.293", "1.1.3.31", "1.1.5.1", "1.2.1.6", + "1.2.2.2", "1.2.3.10", - "1.2.7.9", "1.3.1.23", "1.3.1.55", "1.3.1.59", "1.3.1.61", - "1.4.3.6", "1.4.3.18", "1.5.3.3", "1.6.2.3", "1.6.5.1", "1.7.1.8", "1.7.99.2", - "1.7.99.5", "1.8.1.1", "1.12.99.2", - "1.12.99.5", "1.13.1.7", "1.13.11.7", "1.13.11.42", "1.13.12.10", - "1.13.12.11", "1.14.1.9", "1.14.1.11", - "1.14.11.5", "1.14.13.65", - "1.14.14.4", "1.14.99.18", "1.99.1.3", "1.99.1.4", @@ -73,9 +65,6 @@ static const char* const kECNum_deleted[] = { "1.99.1.10", "1.99.1.12", "2.1.1.30", - "2.1.1.73", - "2.1.1.92", - "2.1.1.93", "2.1.1.138", "2.1.3.4", "2.3.1.70", @@ -83,27 +72,16 @@ static const char* const kECNum_deleted[] = { "2.3.1.124", "2.4.1.6", "2.4.1.75", - "2.4.1.112", - "2.4.1.112", "2.4.1.154", - "2.4.1.233", "2.4.1.235", - "2.5.1.64", "2.6.1.20", - "2.6.1.61", "2.6.1.69", "2.7.1.9", "2.7.1.57", - "2.7.1.70", - "2.7.1.97", "2.7.1.98", - "2.7.1.120", "2.7.7.20", - "2.7.7.29", - "2.8.2.12", "2.8.3.4", "3.1.1.9", - "3.1.1.16", "3.1.2.9", "3.1.3.61", "3.1.4.24", @@ -172,20 +150,14 @@ static const char* const kECNum_deleted[] = { "3.4.99.39", "3.4.99.40", "3.4.99.42", - "3.5.1.80", - "3.6.3.13", - "3.6.3.45", "3.13.1.2", "4.1.1.13", "4.1.2.3", "4.1.2.6", + "4.1.99.15", "4.2.1.23", - "4.2.1.71", - "4.2.1.86", "4.2.99.5", - "4.3.1.5", "4.3.1.11", - "4.3.1.21", "4.4.1.12", "5.2.1.11", "5.3.1.2", @@ -193,7 +165,5 @@ static const char* const kECNum_deleted[] = { "5.3.1.18", "5.3.99.1", "5.4.3.1", - "6.1.1.8", - "6.2.1.29", - "6.3.2.15" + "6.1.1.8" }; diff --git a/api/ecnum_replaced.inc b/api/ecnum_replaced.inc index 3aea7c17..f511ad44 100644 --- a/api/ecnum_replaced.inc +++ b/api/ecnum_replaced.inc @@ -1,4 +1,4 @@ -/* $Id: ecnum_replaced.inc,v 1.1 2010/07/09 16:36:07 ucko Exp $ +/* $Id: ecnum_replaced.inc,v 1.4 2011/06/30 16:04:31 kazimird Exp $ * =========================================================================== * * PUBLIC DOMAIN NOTICE @@ -31,23 +31,29 @@ */ static const char* const kECNum_replaced[] = { - "1.1.1.5 1.1.1.303", + "1.1.1.5 1.1.1.303 1.1.1.304", "1.1.1.68 1.5.1.20", "1.1.1.70 1.2.1.3", "1.1.1.89 1.1.1.86", "1.1.1.109 1.3.1.28", "1.1.1.139 1.1.1.21", + "1.1.1.155 1.1.1.87", "1.1.1.171 1.5.1.20", "1.1.1.180 1.1.1.131", - "1.1.1.182 1.1.1.228", + "1.1.1.182 1.1.1.198 1.1.1.227 1.1.1.228", "1.1.1.204 1.17.1.4", "1.1.1.242 1.3.1.69", + "1.1.1.249 2.5.1.46", "1.1.1.253 1.5.1.33", + "1.1.1.n1 1.1.1.305", + "1.1.1.n2 1.1.1.300", + "1.1.1.n10 1.1.1.301", "1.1.2.1 1.1.5.3", "1.1.3.1 1.1.3.15", "1.1.3.2 1.13.12.4", "1.1.3.22 1.17.3.2", "1.1.3.24 1.3.3.12", + "1.1.3.25 1.1.99.18", "1.1.3.26 1.21.3.2", "1.1.3.32 1.14.21.1", "1.1.3.33 1.14.21.2", @@ -55,20 +61,27 @@ static const char* const kECNum_replaced[] = { "1.1.3.35 1.14.21.4", "1.1.3.36 1.14.21.5", "1.1.99.5 1.1.5.3", + "1.1.99.8 1.1.2.7 1.1.2.8", "1.1.99.15 1.5.1.20", "1.1.99.16 1.1.5.4", "1.1.99.17 1.1.5.2", "1.1.99.19 1.17.99.4", - "1.2.1.1 1.1.1.284", + "1.1.99.23 1.1.2.6", + "1.1.99.25 1.1.5.8", + "1.1.99.34 1.1.98.2", + "1.2.1.1 1.1.1.284 4.4.1.22", "1.2.1.14 1.1.1.205", "1.2.1.34 1.1.1.131", "1.2.1.35 1.1.1.203", "1.2.1.37 1.17.1.4", "1.2.1.55 1.1.1.279", "1.2.1.56 1.1.1.280", + "1.2.1.66 1.1.1.306", + "1.2.1.n1 1.2.1.77", "1.2.3.2 1.17.3.2", "1.2.3.12 1.14.13.82", "1.2.4.3 1.2.4.4", + "1.2.7.9 1.2.7.3", "1.2.99.1 1.17.99.4", "1.3.1.50 1.1.1.252", "1.3.2.1 1.3.99.2", @@ -77,6 +90,7 @@ static const char* const kECNum_replaced[] = { "1.3.99.9 1.21.99.1", "1.3.99.11 1.3.5.2", "1.4.1.6 1.21.4.1", + "1.4.3.6 1.4.3.21 1.4.3.22", "1.4.3.9 1.4.3.4", "1.4.3.17 1.3.3.10", "1.4.4.1 1.21.4.1", @@ -86,6 +100,17 @@ static const char* const kECNum_replaced[] = { "1.5.1.35 1.2.1.19", "1.5.3.8 1.3.3.8", "1.5.3.9 1.21.3.3", + "1.5.3.11 1.5.3.13 1.5.3.14 1.5.3.15 1.5.3.16 1.5.3.17", + "1.5.3.n1 1.5.3.16", + "1.5.3.n2 1.5.3.16", + "1.5.3.n3 1.5.3.13 1.5.3.16", + "1.5.3.n4 1.5.3.13", + "1.5.3.n5 1.5.3.16", + "1.5.3.n6 1.5.3.14 1.5.3.15", + "1.5.3.n7 1.5.3.14 1.5.3.15", + "1.5.3.n8 1.5.3.14 1.5.3.15", + "1.5.3.n9 1.5.3.14 1.5.3.15", + "1.5.3.n10 1.5.3.13", "1.5.99.7 1.5.8.2", "1.5.99.10 1.5.8.1", "1.6.2.1 1.6.99.3", @@ -125,7 +150,8 @@ static const char* const kECNum_replaced[] = { "1.6.99.12 1.16.1.6", "1.6.99.13 1.16.1.7", "1.7.99.3 1.7.2.1", - "1.8.4.5 1.8.4.13", + "1.7.99.5 1.5.1.20", + "1.8.4.5 1.8.4.13 1.8.4.14", "1.8.4.6 1.8.4.11", "1.8.6.1 2.5.1.18", "1.8.99.4 1.8.4.8", @@ -138,6 +164,7 @@ static const char* const kECNum_replaced[] = { "1.12.99.1 1.12.98.1", "1.12.99.3 1.12.5.1", "1.12.99.4 1.12.98.2", + "1.12.99.5 1.13.11.47", "1.13.1.1 1.13.11.1", "1.13.1.2 1.13.11.2", "1.13.1.3 1.13.11.3", @@ -152,12 +179,13 @@ static const char* const kECNum_replaced[] = { "1.13.1.13 1.13.11.12", "1.13.11.21 1.14.99.36", "1.13.11.32 1.13.12.16", + "1.13.12.11 1.14.13.8", "1.13.99.2 1.14.12.10", "1.13.99.4 1.14.12.9", "1.13.99.5 1.13.11.47", "1.14.1.1 1.14.14.1", "1.14.1.2 1.14.13.9", - "1.14.1.3 1.14.99.7", + "1.14.1.3 1.14.99.7 5.4.99.7", "1.14.1.4 1.14.99.2", "1.14.1.5 1.14.13.5", "1.14.1.6 1.14.15.4", @@ -167,12 +195,18 @@ static const char* const kECNum_replaced[] = { "1.14.2.1 1.14.17.1", "1.14.2.2 1.13.11.27", "1.14.3.1 1.14.16.1", + "1.14.11.5 1.14.11.6", "1.14.12.2 1.14.13.35", "1.14.12.6 1.14.13.66", + "1.14.12.n1 1.14.12.21", "1.14.13.45 1.14.18.2", "1.14.14.2 1.14.14.1", + "1.14.14.4 1.14.15.7", "1.14.14.6 1.14.13.111", "1.14.17.2 1.14.18.1", + "1.14.19.n1 1.14.19.4", + "1.14.19.n2 1.14.19.5", + "1.14.19.n3 1.14.19.6", "1.14.99.5 1.14.19.1", "1.14.99.6 1.14.19.2", "1.14.99.8 1.14.14.1", @@ -180,6 +214,7 @@ static const char* const kECNum_replaced[] = { "1.14.99.16 1.14.13.72", "1.14.99.17 1.14.16.5", "1.14.99.25 1.14.19.3", + "1.14.99.n1 1.14.99.41", "1.17.1.6 1.17.99.5", "1.17.4.3 1.17.7.1", "1.18.2.1 1.18.6.1", @@ -196,7 +231,7 @@ static const char* const kECNum_replaced[] = { "1.99.1.7 1.14.15.4", "1.99.1.9 1.14.99.9", "1.99.1.11 1.14.99.10", - "1.99.1.13 1.14.99.7", + "1.99.1.13 1.14.99.7 5.4.99.7", "1.99.1.14 1.13.11.27", "1.99.2.1 1.13.11.12", "1.99.2.2 1.13.11.1", @@ -204,18 +239,27 @@ static const char* const kECNum_replaced[] = { "1.99.2.4 1.13.11.4", "1.99.2.5 1.13.11.5", "1.99.2.6 1.13.99.1", - "2.1.1.23 2.1.1.126", - "2.1.1.24 2.1.1.100", - "2.1.1.58 2.6.1.5", + "2.1.1.23 2.1.1.124 2.1.1.125 2.1.1.126", + "2.1.1.24 2.1.1.77 2.1.1.80 2.1.1.100", + "2.1.1.29 2.1.1.202 2.1.1.203 2.1.1.204", + "2.1.1.48 2.1.1.181 2.1.1.182 2.1.1.183 2.1.1.184", + "2.1.1.51 2.1.1.187 2.1.1.188", + "2.1.1.52 2.1.1.171 2.1.1.172 2.1.1.173 2.1.1.174", + "2.1.1.58 2.1.1.57", + "2.1.1.73 2.1.1.37", "2.1.1.81 2.1.1.49", + "2.1.1.92 2.1.1.69", + "2.1.1.93 2.1.1.70", "2.1.1.134 2.1.1.129", "2.1.1.135 1.16.1.8", "2.1.2.6 2.1.2.5", "2.1.2.12 2.1.1.74", + "2.1.2.n1 2.1.2.13", "2.3.1.55 2.3.1.82", + "2.3.1.n1 2.3.1.191", "2.4.1.3 2.4.1.25", "2.4.1.42 2.4.1.17", - "2.4.1.51 2.4.1.145", + "2.4.1.51 2.4.1.101 2.4.1.143 2.4.1.144 2.4.1.145", "2.4.1.55 2.7.8.14", "2.4.1.59 2.4.1.17", "2.4.1.61 2.4.1.17", @@ -228,35 +272,51 @@ static const char* const kECNum_replaced[] = { "2.4.1.98 2.4.1.90", "2.4.1.107 2.4.1.17", "2.4.1.108 2.4.1.17", + "2.4.1.112 2.4.1.186", "2.4.1.124 2.4.1.87", "2.4.1.151 2.4.1.87", "2.4.1.169 2.4.2.39", "2.4.1.200 4.2.2.17", "2.4.1.204 2.4.2.40", + "2.4.1.233 2.4.1.115", + "2.4.1.n1 2.4.1.245", + "2.4.1.n3 2.4.1.250", "2.4.2.13 2.5.1.6", + "2.4.2.n1 2.4.2.43", "2.5.1.8 2.5.1.75", + "2.5.1.11 2.5.1.84 2.5.1.85", "2.5.1.12 2.5.1.18", "2.5.1.13 2.5.1.18", "2.5.1.14 2.5.1.18", + "2.5.1.33 2.5.1.82 2.5.1.83", "2.5.1.37 4.4.1.20", "2.5.1.40 4.2.3.9", + "2.5.1.64 2.2.1.9 4.2.99.20", + "2.5.1.n1 2.2.1.9", + "2.5.1.n2 2.5.1.81", + "2.5.1.n3 2.5.1.73", "2.6.1.10 2.6.1.21", "2.6.1.25 2.6.1.24", "2.6.1.53 1.4.1.13", + "2.6.1.61 2.6.1.40", + "2.6.1.n1 2.6.1.87", "2.6.2.1 2.1.4.1", "2.7.1.37 2.7.11.1", "2.7.1.38 2.7.11.19", + "2.7.1.70 2.7.11.1", "2.7.1.75 2.7.1.21", "2.7.1.96 2.7.1.86", + "2.7.1.97 2.7.11.14", "2.7.1.99 2.7.11.2", "2.7.1.104 2.7.99.1", "2.7.1.109 2.7.11.31", "2.7.1.110 2.7.11.3", "2.7.1.111 2.7.11.27", - "2.7.1.112 2.7.10.1", + "2.7.1.112 2.7.10.1 2.7.10.2", "2.7.1.115 2.7.11.4", "2.7.1.116 2.7.11.5", "2.7.1.117 2.7.11.18", + "2.7.1.120 2.7.11.17", "2.7.1.123 2.7.11.17", "2.7.1.124 2.7.11.6", "2.7.1.125 2.7.11.14", @@ -271,6 +331,9 @@ static const char* const kECNum_replaced[] = { "2.7.1.141 2.7.11.23", "2.7.1.152 2.7.4.21", "2.7.1.155 2.7.4.24", + "2.7.1.n2 2.7.1.161", + "2.7.1.n3 2.7.1.164", + "2.7.1.n6 2.7.1.163", "2.7.2.5 6.3.4.16", "2.7.2.9 6.3.5.5", "2.7.3.11 2.7.13.1", @@ -285,16 +348,28 @@ static const char* const kECNum_replaced[] = { "2.7.5.7 5.4.2.8", "2.7.7.16 3.1.27.5", "2.7.7.17 3.1.27.1", + "2.7.7.21 2.7.7.72", + "2.7.7.25 2.7.7.72", "2.7.7.26 3.1.27.3", + "2.7.7.29 2.7.7.28", + "2.7.7.n2 2.7.7.67", + "2.7.7.n3 2.7.7.73", "2.7.8.16 2.7.8.2", + "2.7.8.n1 2.7.8.30", + "2.8.2.12 2.8.2.8", + "2.9.1.n1 2.9.1.2", "3.1.1.12 3.1.1.1", + "3.1.1.16 3.1.1.24 5.3.3.4", "3.1.1.18 3.1.1.17", "3.1.1.62 3.5.1.47", "3.1.1.69 3.5.1.89", + "3.1.1.n1 3.5.1.103", "3.1.2.8 3.1.2.6", "3.1.2.24 3.13.1.3", - "3.1.3.30 3.1.3.7", + "3.1.2.n1 3.1.2.28", + "3.1.3.30 3.1.3.31", "3.1.3.65 3.1.3.64", + "3.1.3.n3 3.1.3.78", "3.1.4.5 3.1.21.1", "3.1.4.6 3.1.22.1", "3.1.4.7 3.1.31.1", @@ -314,6 +389,7 @@ static const char* const kECNum_replaced[] = { "3.1.4.31 3.1.11.4", "3.1.4.36 3.1.4.43", "3.1.4.47 4.6.1.14", + "3.1.4.n1 3.1.4.53", "3.1.22.3 3.1.21.7", "3.1.23.1 3.1.21.4", "3.1.23.2 3.1.21.4", @@ -379,6 +455,7 @@ static const char* const kECNum_replaced[] = { "3.1.24.3 3.1.21.5", "3.1.24.4 3.1.21.5", "3.1.25.2 4.2.99.18", + "3.1.26.n1 3.1.26.12", "3.2.1.12 3.2.1.54", "3.2.1.13 3.2.1.54", "3.2.1.29 3.2.1.52", @@ -393,7 +470,7 @@ static const char* const kECNum_replaced[] = { "3.2.2.18 3.5.1.52", "3.2.3.1 3.2.1.147", "3.3.1.3 4.4.1.21", - "3.3.2.3 3.3.2.9", + "3.3.2.3 3.3.2.9 3.3.2.10", "3.4.1.1 3.4.11.1", "3.4.1.2 3.4.11.2", "3.4.1.3 3.4.11.4", @@ -401,12 +478,12 @@ static const char* const kECNum_replaced[] = { "3.4.2.1 3.4.17.1", "3.4.2.2 3.4.17.2", "3.4.2.3 3.4.17.4", - "3.4.3.1 3.4.13.18", - "3.4.3.2 3.4.13.18", + "3.4.3.1 3.4.13.18 3.4.13.19", + "3.4.3.2 3.4.13.18 3.4.13.19", "3.4.3.3 3.4.13.3", "3.4.3.4 3.4.13.5", "3.4.3.5 3.4.11.2", - "3.4.3.6 3.4.13.18", + "3.4.3.6 3.4.13.18 3.4.13.19", "3.4.3.7 3.4.13.9", "3.4.4.1 3.4.23.1", "3.4.4.2 3.4.23.2", @@ -414,7 +491,7 @@ static const char* const kECNum_replaced[] = { "3.4.4.4 3.4.21.4", "3.4.4.5 3.4.21.1", "3.4.4.6 3.4.21.1", - "3.4.4.7 3.4.21.36", + "3.4.4.7 3.4.21.36 3.4.21.37", "3.4.4.8 3.4.21.9", "3.4.4.9 3.4.14.1", "3.4.4.10 3.4.22.2", @@ -423,17 +500,18 @@ static const char* const kECNum_replaced[] = { "3.4.4.13 3.4.21.5", "3.4.4.14 3.4.21.7", "3.4.4.15 3.4.23.15", - "3.4.4.16 3.4.21.62", - "3.4.4.17 3.4.23.20", + "3.4.4.16 3.4.21.62 3.4.21.63 3.4.21.64 3.4.21.65 3.4.21.66 3.4.21.67", + "3.4.4.17 3.4.21.103 3.4.23.20 3.4.23.21 3.4.23.22 3.4.23.23 3.4.23.24 3.4.23.25 3.4.23.26 3.4.23.28 3.4.23.29 3.4.23.30", "3.4.4.18 3.4.22.10", "3.4.4.19 3.4.24.3", "3.4.4.20 3.4.22.8", "3.4.4.21 3.4.21.34", "3.4.4.22 3.4.23.3", "3.4.4.23 3.4.23.5", - "3.4.4.24 3.4.22.32", + "3.4.4.24 3.4.22.32 3.4.22.33", "3.4.11.8 3.4.19.3", - "3.4.12.1 3.4.16.5", + "3.4.11.n1 3.4.11.24", + "3.4.12.1 3.4.16.5 3.4.16.6", "3.4.12.2 3.4.17.1", "3.4.12.3 3.4.17.2", "3.4.12.4 3.4.16.2", @@ -443,48 +521,48 @@ static const char* const kECNum_replaced[] = { "3.4.12.8 3.4.17.4", "3.4.12.10 3.4.19.9", "3.4.12.11 3.4.17.6", - "3.4.12.12 3.4.16.5", - "3.4.13.1 3.4.13.18", - "3.4.13.2 3.4.13.18", + "3.4.12.12 3.4.16.5 3.4.16.6", + "3.4.13.1 3.4.13.18 3.4.13.19", + "3.4.13.2 3.4.13.18 3.4.13.19", "3.4.13.6 3.4.11.2", - "3.4.13.8 3.4.13.18", + "3.4.13.8 3.4.13.18 3.4.13.19", "3.4.13.10 3.4.19.5", - "3.4.13.11 3.4.13.18", + "3.4.13.11 3.4.13.18 3.4.13.19", "3.4.13.13 3.4.13.3", - "3.4.13.15 3.4.13.18", + "3.4.13.15 3.4.13.18 3.4.13.19", "3.4.14.3 3.4.19.1", - "3.4.14.8 3.4.14.9", + "3.4.14.8 3.4.14.9 3.4.14.10", "3.4.15.2 3.4.19.2", "3.4.15.3 3.4.15.5", - "3.4.16.1 3.4.16.5", - "3.4.16.3 3.4.16.5", + "3.4.16.1 3.4.16.5 3.4.16.6", + "3.4.16.3 3.4.16.5 3.4.16.6", "3.4.17.7 3.5.1.28", "3.4.17.9 3.4.17.4", "3.4.19.8 3.4.17.21", "3.4.19.10 3.5.1.28", - "3.4.21.8 3.4.21.35", - "3.4.21.11 3.4.21.36", - "3.4.21.13 3.4.16.5", - "3.4.21.14 3.4.21.62", + "3.4.21.8 3.4.21.34 3.4.21.35", + "3.4.21.11 3.4.21.36 3.4.21.37", + "3.4.21.13 3.4.16.5 3.4.16.6", + "3.4.21.14 3.4.21.62 3.4.21.63 3.4.21.64 3.4.21.65 3.4.21.67", "3.4.21.15 3.4.21.63", "3.4.21.28 3.4.21.74", "3.4.21.29 3.4.21.74", "3.4.21.30 3.4.21.74", - "3.4.21.31 3.4.21.68", + "3.4.21.31 3.4.21.68 3.4.21.73", "3.4.21.44 3.4.21.43", "3.4.21.87 3.4.23.49", - "3.4.22.4 3.4.22.32", + "3.4.22.4 3.4.22.32 3.4.22.33", "3.4.22.5 3.4.22.33", "3.4.22.9 3.4.21.48", "3.4.22.11 3.4.24.56", "3.4.22.12 3.4.19.9", - "3.4.22.17 3.4.22.52", + "3.4.22.17 3.4.22.52 3.4.22.53", "3.4.22.18 3.4.21.26", "3.4.22.19 3.4.24.15", "3.4.22.21 3.4.25.1", "3.4.22.22 3.4.24.37", "3.4.22.23 3.4.21.61", - "3.4.23.6 3.4.23.18", + "3.4.23.6 3.4.21.103 3.4.23.18 3.4.23.19 3.4.23.20 3.4.23.21 3.4.23.22 3.4.23.23 3.4.23.24 3.4.23.25 3.4.23.26 3.4.23.28 3.4.23.30", "3.4.23.7 3.4.23.20", "3.4.23.8 3.4.23.25", "3.4.23.9 3.4.23.21", @@ -492,8 +570,8 @@ static const char* const kECNum_replaced[] = { "3.4.23.27 3.4.21.103", "3.4.23.33 3.4.21.101", "3.4.23.37 3.4.21.100", - "3.4.24.4 3.4.24.25", - "3.4.24.5 3.4.25.1", + "3.4.24.4 3.4.24.25 3.4.24.26 3.4.24.27 3.4.24.28 3.4.24.29 3.4.24.30 3.4.24.31 3.4.24.32 3.4.24.39 3.4.24.40", + "3.4.24.5 3.4.22.52 3.4.22.53 3.4.25.1", "3.4.24.8 3.4.24.3", "3.4.99.1 3.4.23.28", "3.4.99.4 3.4.23.12", @@ -504,7 +582,7 @@ static const char* const kECNum_replaced[] = { "3.4.99.19 3.4.23.15", "3.4.99.22 3.4.24.29", "3.4.99.25 3.4.23.21", - "3.4.99.26 3.4.21.73", + "3.4.99.26 3.4.21.68 3.4.21.73", "3.4.99.28 3.4.21.60", "3.4.99.30 3.4.24.20", "3.4.99.31 3.4.24.15", @@ -520,6 +598,9 @@ static const char* const kECNum_replaced[] = { "3.5.1.34 3.4.13.5", "3.5.1.37 3.5.1.26", "3.5.1.45 6.3.4.6", + "3.5.1.80 3.5.1.25", + "3.5.1.n1 3.5.1.108", + "3.5.1.n2 3.5.1.99", "3.5.2.8 3.5.2.6", "3.5.5.3 4.2.1.104", "3.6.1.4 3.6.1.3", @@ -536,6 +617,9 @@ static const char* const kECNum_replaced[] = { "3.6.1.49 3.6.5.4", "3.6.1.50 3.6.5.5", "3.6.1.51 3.6.5.6", + "3.6.1.n5 3.6.1.54", + "3.6.3.13 3.6.3.1", + "3.6.3.45 3.6.3.44", "3.8.1.4 1.97.1.10", "3.8.2.1 3.1.8.2", "4.1.1.10 4.1.1.12", @@ -546,7 +630,9 @@ static const char* const kECNum_replaced[] = { "4.1.2.15 2.5.1.54", "4.1.2.16 2.5.1.55", "4.1.2.31 4.1.3.16", - "4.1.2.39 4.1.2.37", + "4.1.2.37 4.1.2.46 4.1.2.47", + "4.1.2.39 4.1.2.46 4.1.2.47", + "4.1.2.n1 4.1.2.44", "4.1.3.2 2.3.3.9", "4.1.3.5 2.3.3.10", "4.1.3.7 2.3.3.1", @@ -581,16 +667,22 @@ static const char* const kECNum_replaced[] = { "4.2.1.29 4.99.1.6", "4.2.1.37 3.3.2.4", "4.2.1.38 4.3.1.20", - "4.2.1.63 3.3.2.9", - "4.2.1.64 3.3.2.9", + "4.2.1.63 3.3.2.9 3.3.2.10", + "4.2.1.64 3.3.2.9 3.3.2.10", + "4.2.1.71 4.2.1.27", "4.2.1.72 4.1.1.78", + "4.2.1.86 4.2.1.98", "4.2.1.102 4.2.1.100", - "4.2.2.4 4.2.2.20", + "4.2.2.4 4.2.2.20 4.2.2.21", + "4.2.3.n1 4.2.3.38", + "4.2.3.n3 4.2.3.56", + "4.2.3.n5 4.2.3.52", + "4.2.3.n9 4.2.3.44", "4.2.99.1 4.2.2.1", "4.2.99.2 4.2.3.1", "4.2.99.3 4.2.2.2", "4.2.99.4 4.2.2.3", - "4.2.99.6 4.2.2.5", + "4.2.99.6 4.2.2.5 4.2.2.20 4.2.2.21", "4.2.99.7 4.2.3.2", "4.2.99.8 2.5.1.47", "4.2.99.9 2.5.1.48", @@ -602,7 +694,10 @@ static const char* const kECNum_replaced[] = { "4.2.99.16 2.5.1.53", "4.2.99.17 2.5.1.51", "4.2.99.19 4.4.1.23", + "4.2.99.n1 4.2.99.20", + "4.3.1.5 4.3.1.23 4.3.1.24 4.3.1.25", "4.3.1.8 2.5.1.61", + "4.3.1.21 4.3.1.9", "4.3.99.1 4.2.1.104", "4.4.1.7 2.5.1.18", "4.4.1.18 1.8.3.5", @@ -618,7 +713,12 @@ static const char* const kECNum_replaced[] = { "5.3.1.19 2.6.1.16", "5.4.99.6 5.4.4.2", "5.4.99.10 5.4.99.11", + "6.1.1.n1 6.3.1.13", + "6.1.1.n2 6.1.1.27", "6.2.1.21 6.2.1.30", + "6.2.1.29 6.2.1.7", "6.3.1.3 6.3.4.13", + "6.3.2.15 6.3.2.10", + "6.3.2.22 6.3.1.14", "6.3.5.8 2.6.1.85" }; diff --git a/api/ecnum_specific.inc b/api/ecnum_specific.inc index b7f0d4f1..03f8d197 100644 --- a/api/ecnum_specific.inc +++ b/api/ecnum_specific.inc @@ -1,4 +1,4 @@ -/* $Id: ecnum_specific.inc,v 1.1 2010/07/09 16:36:07 ucko Exp $ +/* $Id: ecnum_specific.inc,v 1.3 2011/06/30 16:04:31 kazimird Exp $ * =========================================================================== * * PUBLIC DOMAIN NOTICE @@ -31,4262 +31,4529 @@ */ static const char* const kECNum_specific[] = { - "1.1.1.1", - "1.1.1.2", - "1.1.1.3", - "1.1.1.4", - "1.1.1.6", - "1.1.1.7", - "1.1.1.8", - "1.1.1.9", - "1.1.1.10", - "1.1.1.11", - "1.1.1.12", - "1.1.1.13", - "1.1.1.14", - "1.1.1.15", - "1.1.1.16", - "1.1.1.17", - "1.1.1.18", - "1.1.1.19", - "1.1.1.20", - "1.1.1.21", - "1.1.1.22", - "1.1.1.23", - "1.1.1.24", - "1.1.1.25", - "1.1.1.26", - "1.1.1.27", - "1.1.1.28", - "1.1.1.29", - "1.1.1.30", - "1.1.1.31", - "1.1.1.32", - "1.1.1.33", - "1.1.1.34", - "1.1.1.35", - "1.1.1.36", - "1.1.1.37", - "1.1.1.38", - "1.1.1.39", - "1.1.1.40", - "1.1.1.41", - "1.1.1.42", - "1.1.1.43", - "1.1.1.44", - "1.1.1.45", - "1.1.1.46", - "1.1.1.47", - "1.1.1.48", - "1.1.1.49", - "1.1.1.50", - "1.1.1.51", - "1.1.1.52", - "1.1.1.53", - "1.1.1.54", - "1.1.1.55", - "1.1.1.56", - "1.1.1.57", - "1.1.1.58", - "1.1.1.59", - "1.1.1.60", - "1.1.1.61", - "1.1.1.62", - "1.1.1.63", - "1.1.1.64", - "1.1.1.65", - "1.1.1.66", - "1.1.1.67", - "1.1.1.69", - "1.1.1.71", - "1.1.1.72", - "1.1.1.73", - "1.1.1.75", - "1.1.1.76", - "1.1.1.77", - "1.1.1.78", - "1.1.1.79", - "1.1.1.80", - "1.1.1.81", - "1.1.1.82", - "1.1.1.83", - "1.1.1.84", - "1.1.1.85", - "1.1.1.86", - "1.1.1.87", - "1.1.1.88", - "1.1.1.90", - "1.1.1.91", - "1.1.1.92", - "1.1.1.93", - "1.1.1.94", - "1.1.1.95", - "1.1.1.96", - "1.1.1.97", - "1.1.1.98", - "1.1.1.99", - "1.1.1.100", - "1.1.1.101", - "1.1.1.102", - "1.1.1.103", - "1.1.1.104", - "1.1.1.105", - "1.1.1.106", - "1.1.1.107", - "1.1.1.108", - "1.1.1.110", - "1.1.1.111", - "1.1.1.112", - "1.1.1.113", - "1.1.1.114", - "1.1.1.115", - "1.1.1.116", - "1.1.1.117", - "1.1.1.118", - "1.1.1.119", - "1.1.1.120", - "1.1.1.121", - "1.1.1.122", - "1.1.1.123", - "1.1.1.124", - "1.1.1.125", - "1.1.1.126", - "1.1.1.127", - "1.1.1.128", - "1.1.1.129", - "1.1.1.130", - "1.1.1.131", - "1.1.1.132", - "1.1.1.133", - "1.1.1.134", - "1.1.1.135", - "1.1.1.136", - "1.1.1.137", - "1.1.1.138", - "1.1.1.140", - "1.1.1.141", - "1.1.1.142", - "1.1.1.143", - "1.1.1.144", - "1.1.1.145", - "1.1.1.146", - "1.1.1.147", - "1.1.1.148", - "1.1.1.149", - "1.1.1.150", - "1.1.1.151", - "1.1.1.152", - "1.1.1.153", - "1.1.1.154", - "1.1.1.156", - "1.1.1.157", - "1.1.1.158", - "1.1.1.159", - "1.1.1.160", - "1.1.1.161", - "1.1.1.162", - "1.1.1.163", - "1.1.1.164", - "1.1.1.165", - "1.1.1.166", - "1.1.1.167", - "1.1.1.168", - "1.1.1.169", - "1.1.1.170", - "1.1.1.172", - "1.1.1.173", - "1.1.1.174", - "1.1.1.175", - "1.1.1.176", - "1.1.1.177", - "1.1.1.178", - "1.1.1.179", - "1.1.1.181", - "1.1.1.183", - "1.1.1.184", - "1.1.1.185", - "1.1.1.186", - "1.1.1.187", - "1.1.1.188", - "1.1.1.189", - "1.1.1.190", - "1.1.1.191", - "1.1.1.192", - "1.1.1.193", - "1.1.1.194", - "1.1.1.195", - "1.1.1.196", - "1.1.1.197", - "1.1.1.198", - "1.1.1.199", - "1.1.1.200", - "1.1.1.201", - "1.1.1.202", - "1.1.1.203", - "1.1.1.205", - "1.1.1.206", - "1.1.1.207", - "1.1.1.208", - "1.1.1.209", - "1.1.1.210", - "1.1.1.211", - "1.1.1.212", - "1.1.1.213", - "1.1.1.214", - "1.1.1.215", - "1.1.1.216", - "1.1.1.217", - "1.1.1.218", - "1.1.1.219", - "1.1.1.220", - "1.1.1.221", - "1.1.1.222", - "1.1.1.223", - "1.1.1.224", - "1.1.1.225", - "1.1.1.226", - "1.1.1.227", - "1.1.1.228", - "1.1.1.229", - "1.1.1.230", - "1.1.1.231", - "1.1.1.232", - "1.1.1.233", - "1.1.1.234", - "1.1.1.235", - "1.1.1.236", - "1.1.1.237", - "1.1.1.238", - "1.1.1.239", - "1.1.1.240", - "1.1.1.241", - "1.1.1.243", - "1.1.1.244", - "1.1.1.245", - "1.1.1.246", - "1.1.1.247", - "1.1.1.248", - "1.1.1.250", - "1.1.1.251", - "1.1.1.252", - "1.1.1.254", - "1.1.1.255", - "1.1.1.256", - "1.1.1.257", - "1.1.1.258", - "1.1.1.259", - "1.1.1.260", - "1.1.1.261", - "1.1.1.262", - "1.1.1.263", - "1.1.1.264", - "1.1.1.265", - "1.1.1.266", - "1.1.1.267", - "1.1.1.268", - "1.1.1.269", - "1.1.1.270", - "1.1.1.271", - "1.1.1.272", - "1.1.1.273", - "1.1.1.274", - "1.1.1.275", - "1.1.1.276", - "1.1.1.277", - "1.1.1.278", - "1.1.1.279", - "1.1.1.280", - "1.1.1.281", - "1.1.1.282", - "1.1.1.283", - "1.1.1.284", - "1.1.1.285", - "1.1.1.286", - "1.1.1.287", - "1.1.1.288", - "1.1.1.289", - "1.1.1.290", - "1.1.1.291", - "1.1.1.292", - "1.1.1.294", - "1.1.1.295", - "1.1.1.296", - "1.1.1.297", - "1.1.1.298", - "1.1.1.299", - "1.1.1.300", - "1.1.1.301", - "1.1.1.302", - "1.1.1.303", - "1.1.1.304", - "1.1.2.2", - "1.1.2.3", - "1.1.2.4", - "1.1.2.5", - "1.1.3.3", - "1.1.3.4", - "1.1.3.5", - "1.1.3.6", - "1.1.3.7", - "1.1.3.8", - "1.1.3.9", - "1.1.3.10", - "1.1.3.11", - "1.1.3.12", - "1.1.3.13", - "1.1.3.14", - "1.1.3.15", - "1.1.3.16", - "1.1.3.17", - "1.1.3.18", - "1.1.3.19", - "1.1.3.20", - "1.1.3.21", - "1.1.3.23", - "1.1.3.27", - "1.1.3.28", - "1.1.3.29", - "1.1.3.30", - "1.1.3.37", - "1.1.3.38", - "1.1.3.39", - "1.1.3.40", - "1.1.3.41", - "1.1.4.1", - "1.1.4.2", - "1.1.5.2", - "1.1.5.3", - "1.1.5.4", - "1.1.5.5", - "1.1.5.6", - "1.1.5.7", - "1.1.99.1", - "1.1.99.2", - "1.1.99.3", - "1.1.99.4", - "1.1.99.6", - "1.1.99.7", - "1.1.99.8", - "1.1.99.9", - "1.1.99.10", - "1.1.99.11", - "1.1.99.12", - "1.1.99.13", - "1.1.99.14", - "1.1.99.18", - "1.1.99.20", - "1.1.99.21", - "1.1.99.22", - "1.1.99.23", - "1.1.99.24", - "1.1.99.25", - "1.1.99.26", - "1.1.99.27", - "1.1.99.28", - "1.1.99.29", - "1.1.99.30", - "1.1.99.31", - "1.1.99.32", - "1.1.99.33", - "1.2.1.2", - "1.2.1.3", - "1.2.1.4", - "1.2.1.5", - "1.2.1.7", - "1.2.1.8", - "1.2.1.9", - "1.2.1.10", - "1.2.1.11", - "1.2.1.12", - "1.2.1.13", - "1.2.1.15", - "1.2.1.16", - "1.2.1.17", - "1.2.1.18", - "1.2.1.19", - "1.2.1.20", - "1.2.1.21", - "1.2.1.22", - "1.2.1.23", - "1.2.1.24", - "1.2.1.25", - "1.2.1.26", - "1.2.1.27", - "1.2.1.28", - "1.2.1.29", - "1.2.1.30", - "1.2.1.31", - "1.2.1.32", - "1.2.1.33", - "1.2.1.36", - "1.2.1.38", - "1.2.1.39", - "1.2.1.40", - "1.2.1.41", - "1.2.1.42", - "1.2.1.43", - "1.2.1.44", - "1.2.1.45", - "1.2.1.46", - "1.2.1.47", - "1.2.1.48", - "1.2.1.49", - "1.2.1.50", - "1.2.1.51", - "1.2.1.52", - "1.2.1.53", - "1.2.1.54", - "1.2.1.57", - "1.2.1.58", - "1.2.1.59", - "1.2.1.60", - "1.2.1.61", - "1.2.1.62", - "1.2.1.63", - "1.2.1.64", - "1.2.1.65", - "1.2.1.66", - "1.2.1.67", - "1.2.1.68", - "1.2.1.69", - "1.2.1.70", - "1.2.1.71", - "1.2.1.72", - "1.2.1.73", - "1.2.1.74", - "1.2.1.75", - "1.2.1.76", - "1.2.1.77", - "1.2.1.78", - "1.2.2.1", - "1.2.2.2", - "1.2.2.3", - "1.2.2.4", - "1.2.3.1", - "1.2.3.3", - "1.2.3.4", - "1.2.3.5", - "1.2.3.6", - "1.2.3.7", - "1.2.3.8", - "1.2.3.9", - "1.2.3.11", - "1.2.3.13", - "1.2.3.14", - "1.2.4.1", - "1.2.4.2", - "1.2.4.4", - "1.2.7.1", - "1.2.7.2", - "1.2.7.3", - "1.2.7.4", - "1.2.7.5", - "1.2.7.6", - "1.2.7.7", - "1.2.7.8", - "1.2.99.2", - "1.2.99.3", - "1.2.99.4", - "1.2.99.5", - "1.2.99.6", - "1.2.99.7", - "1.3.1.1", - "1.3.1.2", - "1.3.1.3", - "1.3.1.4", - "1.3.1.5", - "1.3.1.6", - "1.3.1.7", - "1.3.1.8", - "1.3.1.9", - "1.3.1.10", - "1.3.1.11", - "1.3.1.12", - "1.3.1.13", - "1.3.1.14", - "1.3.1.15", - "1.3.1.16", - "1.3.1.17", - "1.3.1.18", - "1.3.1.19", - "1.3.1.20", - "1.3.1.21", - "1.3.1.22", - "1.3.1.24", - "1.3.1.25", - "1.3.1.26", - "1.3.1.27", - "1.3.1.28", - "1.3.1.29", - "1.3.1.30", - "1.3.1.31", - "1.3.1.32", - "1.3.1.33", - "1.3.1.34", - "1.3.1.35", - "1.3.1.36", - "1.3.1.37", - "1.3.1.38", - "1.3.1.39", - "1.3.1.40", - "1.3.1.41", - "1.3.1.42", - "1.3.1.43", - "1.3.1.44", - "1.3.1.45", - "1.3.1.46", - "1.3.1.47", - "1.3.1.48", - "1.3.1.49", - "1.3.1.51", - "1.3.1.52", - "1.3.1.53", - "1.3.1.54", - "1.3.1.56", - "1.3.1.57", - "1.3.1.58", - "1.3.1.60", - "1.3.1.62", - "1.3.1.63", - "1.3.1.64", - "1.3.1.65", - "1.3.1.66", - "1.3.1.67", - "1.3.1.68", - "1.3.1.69", - "1.3.1.70", - "1.3.1.71", - "1.3.1.72", - "1.3.1.73", - "1.3.1.74", - "1.3.1.75", - "1.3.1.76", - "1.3.1.77", - "1.3.1.78", - "1.3.1.79", - "1.3.1.80", - "1.3.1.81", - "1.3.1.82", - "1.3.1.83", - "1.3.1.84", - "1.3.2.3", - "1.3.3.1", - "1.3.3.3", - "1.3.3.4", - "1.3.3.5", - "1.3.3.6", - "1.3.3.7", - "1.3.3.8", - "1.3.3.9", - "1.3.3.10", - "1.3.3.11", - "1.3.3.12", - "1.3.5.1", - "1.3.5.2", - "1.3.7.1", - "1.3.7.2", - "1.3.7.3", - "1.3.7.4", - "1.3.7.5", - "1.3.7.6", - "1.3.99.1", - "1.3.99.2", - "1.3.99.3", - "1.3.99.4", - "1.3.99.5", - "1.3.99.6", - "1.3.99.7", - "1.3.99.8", - "1.3.99.10", - "1.3.99.12", - "1.3.99.13", - "1.3.99.14", - "1.3.99.15", - "1.3.99.16", - "1.3.99.17", - "1.3.99.18", - "1.3.99.19", - "1.3.99.20", - "1.3.99.21", - "1.3.99.22", - "1.3.99.23", - "1.3.99.24", - "1.3.99.25", - "1.4.1.1", - "1.4.1.2", - "1.4.1.3", - "1.4.1.4", - "1.4.1.5", - "1.4.1.7", - "1.4.1.8", - "1.4.1.9", - "1.4.1.10", - "1.4.1.11", - "1.4.1.12", - "1.4.1.13", - "1.4.1.14", - "1.4.1.15", - "1.4.1.16", - "1.4.1.17", - "1.4.1.18", - "1.4.1.19", - "1.4.1.20", - "1.4.1.21", - "1.4.2.1", - "1.4.3.1", - "1.4.3.2", - "1.4.3.3", - "1.4.3.4", - "1.4.3.5", - "1.4.3.7", - "1.4.3.8", - "1.4.3.10", - "1.4.3.11", - "1.4.3.12", - "1.4.3.13", - "1.4.3.14", - "1.4.3.15", - "1.4.3.16", - "1.4.3.19", - "1.4.3.20", - "1.4.3.21", - "1.4.3.22", - "1.4.3.23", - "1.4.4.2", - "1.4.5.1", - "1.4.7.1", - "1.4.99.1", - "1.4.99.2", - "1.4.99.3", - "1.4.99.4", - "1.4.99.5", - "1.5.1.1", - "1.5.1.2", - "1.5.1.3", - "1.5.1.5", - "1.5.1.6", - "1.5.1.7", - "1.5.1.8", - "1.5.1.9", - "1.5.1.10", - "1.5.1.11", - "1.5.1.12", - "1.5.1.15", - "1.5.1.16", - "1.5.1.17", - "1.5.1.18", - "1.5.1.19", - "1.5.1.20", - "1.5.1.21", - "1.5.1.22", - "1.5.1.23", - "1.5.1.24", - "1.5.1.25", - "1.5.1.26", - "1.5.1.27", - "1.5.1.28", - "1.5.1.29", - "1.5.1.30", - "1.5.1.31", - "1.5.1.32", - "1.5.1.33", - "1.5.1.34", - "1.5.3.1", - "1.5.3.2", - "1.5.3.4", - "1.5.3.5", - "1.5.3.6", - "1.5.3.7", - "1.5.3.10", - "1.5.3.11", - "1.5.3.12", - "1.5.3.13", - "1.5.3.14", - "1.5.3.15", - "1.5.3.16", - "1.5.3.17", - "1.5.4.1", - "1.5.5.1", - "1.5.7.1", - "1.5.8.1", - "1.5.8.2", - "1.5.99.1", - "1.5.99.2", - "1.5.99.3", - "1.5.99.4", - "1.5.99.5", - "1.5.99.6", - "1.5.99.8", - "1.5.99.9", - "1.5.99.11", - "1.5.99.12", - "1.5.99.13", - "1.6.1.1", - "1.6.1.2", - "1.6.2.2", - "1.6.2.4", - "1.6.2.5", - "1.6.2.6", - "1.6.3.1", - "1.6.5.2", - "1.6.5.3", - "1.6.5.4", - "1.6.5.5", - "1.6.5.6", - "1.6.5.7", - "1.6.6.9", - "1.6.99.1", - "1.6.99.3", - "1.6.99.5", - "1.6.99.6", - "1.7.1.1", - "1.7.1.2", - "1.7.1.3", - "1.7.1.4", - "1.7.1.5", - "1.7.1.6", - "1.7.1.7", - "1.7.1.9", - "1.7.1.10", - "1.7.1.11", - "1.7.1.12", - "1.7.1.13", - "1.7.2.1", - "1.7.2.2", - "1.7.2.3", - "1.7.3.1", - "1.7.3.2", - "1.7.3.3", - "1.7.3.4", - "1.7.3.5", - "1.7.5.1", - "1.7.7.1", - "1.7.7.2", - "1.7.99.1", - "1.7.99.4", - "1.7.99.6", - "1.7.99.7", - "1.7.99.8", - "1.8.1.2", - "1.8.1.3", - "1.8.1.4", - "1.8.1.5", - "1.8.1.6", - "1.8.1.7", - "1.8.1.8", - "1.8.1.9", - "1.8.1.10", - "1.8.1.11", - "1.8.1.12", - "1.8.1.13", - "1.8.1.14", - "1.8.1.15", - "1.8.1.16", - "1.8.2.1", - "1.8.2.2", - "1.8.3.1", - "1.8.3.2", - "1.8.3.3", - "1.8.3.4", - "1.8.3.5", - "1.8.4.1", - "1.8.4.2", - "1.8.4.3", - "1.8.4.4", - "1.8.4.7", - "1.8.4.8", - "1.8.4.9", - "1.8.4.10", - "1.8.4.11", - "1.8.4.12", - "1.8.4.13", - "1.8.4.14", - "1.8.5.1", - "1.8.5.2", - "1.8.7.1", - "1.8.98.1", - "1.8.98.2", - "1.8.99.1", - "1.8.99.2", - "1.8.99.3", - "1.9.3.1", - "1.9.6.1", - "1.9.99.1", - "1.10.1.1", - "1.10.2.1", - "1.10.2.2", - "1.10.3.1", - "1.10.3.2", - "1.10.3.3", - "1.10.3.4", - "1.10.3.5", - "1.10.3.6", - "1.10.99.1", - "1.10.99.2", - "1.10.99.3", - "1.11.1.1", - "1.11.1.2", - "1.11.1.3", - "1.11.1.5", - "1.11.1.6", - "1.11.1.7", - "1.11.1.8", - "1.11.1.9", - "1.11.1.10", - "1.11.1.11", - "1.11.1.12", - "1.11.1.13", - "1.11.1.14", - "1.11.1.15", - "1.11.1.16", - "1.11.1.17", - "1.12.1.2", - "1.12.1.3", - "1.12.2.1", - "1.12.5.1", - "1.12.7.2", - "1.12.98.1", - "1.12.98.2", - "1.12.98.3", - "1.12.99.6", - "1.13.11.1", - "1.13.11.2", - "1.13.11.3", - "1.13.11.4", - "1.13.11.5", - "1.13.11.6", - "1.13.11.8", - "1.13.11.9", - "1.13.11.10", - "1.13.11.11", - "1.13.11.12", - "1.13.11.13", - "1.13.11.14", - "1.13.11.15", - "1.13.11.16", - "1.13.11.17", - "1.13.11.18", - "1.13.11.19", - "1.13.11.20", - "1.13.11.22", - "1.13.11.23", - "1.13.11.24", - "1.13.11.25", - "1.13.11.26", - "1.13.11.27", - "1.13.11.28", - "1.13.11.29", - "1.13.11.30", - "1.13.11.31", - "1.13.11.33", - "1.13.11.34", - "1.13.11.35", - "1.13.11.36", - "1.13.11.37", - "1.13.11.38", - "1.13.11.39", - "1.13.11.40", - "1.13.11.41", - "1.13.11.43", - "1.13.11.44", - "1.13.11.45", - "1.13.11.46", - "1.13.11.47", - "1.13.11.48", - "1.13.11.49", - "1.13.11.50", - "1.13.11.51", - "1.13.11.52", - "1.13.11.53", - "1.13.11.54", - "1.13.11.55", - "1.13.11.56", - "1.13.12.1", - "1.13.12.2", - "1.13.12.3", - "1.13.12.4", - "1.13.12.5", - "1.13.12.6", - "1.13.12.7", - "1.13.12.8", - "1.13.12.9", - "1.13.12.12", - "1.13.12.13", - "1.13.12.14", - "1.13.12.15", - "1.13.12.16", - "1.13.12.17", - "1.13.99.1", - "1.13.99.3", - "1.14.11.1", - "1.14.11.2", - "1.14.11.3", - "1.14.11.4", - "1.14.11.6", - "1.14.11.7", - "1.14.11.8", - "1.14.11.9", - "1.14.11.10", - "1.14.11.11", - "1.14.11.12", - "1.14.11.13", - "1.14.11.14", - "1.14.11.15", - "1.14.11.16", - "1.14.11.17", - "1.14.11.18", - "1.14.11.19", - "1.14.11.20", - "1.14.11.21", - "1.14.11.22", - "1.14.11.23", - "1.14.11.24", - "1.14.11.25", - "1.14.11.26", - "1.14.11.27", - "1.14.11.28", - "1.14.12.1", - "1.14.12.3", - "1.14.12.4", - "1.14.12.5", - "1.14.12.7", - "1.14.12.8", - "1.14.12.9", - "1.14.12.10", - "1.14.12.11", - "1.14.12.12", - "1.14.12.13", - "1.14.12.14", - "1.14.12.15", - "1.14.12.16", - "1.14.12.17", - "1.14.12.18", - "1.14.12.19", - "1.14.12.20", - "1.14.12.21", - "1.14.13.1", - "1.14.13.2", - "1.14.13.3", - "1.14.13.4", - "1.14.13.5", - "1.14.13.6", - "1.14.13.7", - "1.14.13.8", - "1.14.13.9", - "1.14.13.10", - "1.14.13.11", - "1.14.13.12", - "1.14.13.13", - "1.14.13.14", - "1.14.13.15", - "1.14.13.16", - "1.14.13.17", - "1.14.13.18", - "1.14.13.19", - "1.14.13.20", - "1.14.13.21", - "1.14.13.22", - "1.14.13.23", - "1.14.13.24", - "1.14.13.25", - "1.14.13.26", - "1.14.13.27", - "1.14.13.28", - "1.14.13.29", - "1.14.13.30", - "1.14.13.31", - "1.14.13.32", - "1.14.13.33", - "1.14.13.34", - "1.14.13.35", - "1.14.13.36", - "1.14.13.37", - "1.14.13.38", - "1.14.13.39", - "1.14.13.40", - "1.14.13.41", - "1.14.13.42", - "1.14.13.43", - "1.14.13.44", - "1.14.13.46", - "1.14.13.47", - "1.14.13.48", - "1.14.13.49", - "1.14.13.50", - "1.14.13.51", - "1.14.13.52", - "1.14.13.53", - "1.14.13.54", - "1.14.13.55", - "1.14.13.56", - "1.14.13.57", - "1.14.13.58", - "1.14.13.59", - "1.14.13.60", - "1.14.13.61", - "1.14.13.62", - "1.14.13.63", - "1.14.13.64", - "1.14.13.66", - "1.14.13.67", - "1.14.13.68", - "1.14.13.69", - "1.14.13.70", - "1.14.13.71", - "1.14.13.72", - "1.14.13.73", - "1.14.13.74", - "1.14.13.75", - "1.14.13.76", - "1.14.13.77", - "1.14.13.78", - "1.14.13.79", - "1.14.13.80", - "1.14.13.81", - "1.14.13.82", - "1.14.13.83", - "1.14.13.84", - "1.14.13.85", - "1.14.13.86", - "1.14.13.87", - "1.14.13.88", - "1.14.13.89", - "1.14.13.90", - "1.14.13.91", - "1.14.13.92", - "1.14.13.93", - "1.14.13.94", - "1.14.13.95", - "1.14.13.96", - "1.14.13.97", - "1.14.13.98", - "1.14.13.99", - "1.14.13.100", - "1.14.13.101", - "1.14.13.102", - "1.14.13.103", - "1.14.13.104", - "1.14.13.105", - "1.14.13.106", - "1.14.13.107", - "1.14.13.108", - "1.14.13.109", - "1.14.13.110", - "1.14.13.111", - "1.14.13.112", - "1.14.13.113", - "1.14.14.1", - "1.14.14.3", - "1.14.14.5", - "1.14.14.7", - "1.14.15.1", - "1.14.15.2", - "1.14.15.3", - "1.14.15.4", - "1.14.15.5", - "1.14.15.6", - "1.14.15.7", - "1.14.15.8", - "1.14.16.1", - "1.14.16.2", - "1.14.16.3", - "1.14.16.4", - "1.14.16.5", - "1.14.16.6", - "1.14.17.1", - "1.14.17.3", - "1.14.17.4", - "1.14.18.1", - "1.14.18.2", - "1.14.19.1", - "1.14.19.2", - "1.14.19.3", - "1.14.19.4", - "1.14.19.5", - "1.14.19.6", - "1.14.20.1", - "1.14.21.1", - "1.14.21.2", - "1.14.21.3", - "1.14.21.4", - "1.14.21.5", - "1.14.21.6", - "1.14.21.7", - "1.14.99.1", - "1.14.99.2", - "1.14.99.3", - "1.14.99.4", - "1.14.99.7", - "1.14.99.9", - "1.14.99.10", - "1.14.99.11", - "1.14.99.12", - "1.14.99.14", - "1.14.99.15", - "1.14.99.19", - "1.14.99.20", - "1.14.99.21", - "1.14.99.22", - "1.14.99.23", - "1.14.99.24", - "1.14.99.26", - "1.14.99.27", - "1.14.99.28", - "1.14.99.29", - "1.14.99.30", - "1.14.99.31", - "1.14.99.32", - "1.14.99.33", - "1.14.99.34", - "1.14.99.35", - "1.14.99.36", - "1.14.99.37", - "1.14.99.38", - "1.14.99.39", - "1.14.99.40", - "1.15.1.1", - "1.15.1.2", - "1.16.1.1", - "1.16.1.2", - "1.16.1.3", - "1.16.1.4", - "1.16.1.5", - "1.16.1.6", - "1.16.1.7", - "1.16.1.8", - "1.16.3.1", - "1.16.8.1", - "1.17.1.1", - "1.17.1.2", - "1.17.1.3", - "1.17.1.4", - "1.17.1.5", - "1.17.3.1", - "1.17.3.2", - "1.17.3.3", - "1.17.4.1", - "1.17.4.2", - "1.17.5.1", - "1.17.7.1", - "1.17.99.1", - "1.17.99.2", - "1.17.99.3", - "1.17.99.4", - "1.17.99.5", - "1.18.1.1", - "1.18.1.2", - "1.18.1.3", - "1.18.1.4", - "1.18.6.1", - "1.19.6.1", - "1.20.1.1", - "1.20.4.1", - "1.20.4.2", - "1.20.4.3", - "1.20.98.1", - "1.20.99.1", - "1.21.3.1", - "1.21.3.2", - "1.21.3.3", - "1.21.3.4", - "1.21.3.5", - "1.21.3.6", - "1.21.4.1", - "1.21.4.2", - "1.21.4.3", - "1.21.4.4", - "1.21.99.1", - "1.22.1.1", - "1.97.1.1", - "1.97.1.2", - "1.97.1.3", - "1.97.1.4", - "1.97.1.8", - "1.97.1.9", - "1.97.1.10", - "1.97.1.11", - "2.1.1.1", - "2.1.1.2", - "2.1.1.3", - "2.1.1.4", - "2.1.1.5", - "2.1.1.6", - "2.1.1.7", - "2.1.1.8", - "2.1.1.9", - "2.1.1.10", - "2.1.1.11", - "2.1.1.12", - "2.1.1.13", - "2.1.1.14", - "2.1.1.15", - "2.1.1.16", - "2.1.1.17", - "2.1.1.18", - "2.1.1.19", - "2.1.1.20", - "2.1.1.21", - "2.1.1.22", - "2.1.1.25", - "2.1.1.26", - "2.1.1.27", - "2.1.1.28", - "2.1.1.29", - "2.1.1.31", - "2.1.1.32", - "2.1.1.33", - "2.1.1.34", - "2.1.1.35", - "2.1.1.36", - "2.1.1.37", - "2.1.1.38", - "2.1.1.39", - "2.1.1.40", - "2.1.1.41", - "2.1.1.42", - "2.1.1.43", - "2.1.1.44", - "2.1.1.45", - "2.1.1.46", - "2.1.1.47", - "2.1.1.48", - "2.1.1.49", - "2.1.1.50", - "2.1.1.51", - "2.1.1.52", - "2.1.1.53", - "2.1.1.54", - "2.1.1.55", - "2.1.1.56", - "2.1.1.57", - "2.1.1.59", - "2.1.1.60", - "2.1.1.61", - "2.1.1.62", - "2.1.1.63", - "2.1.1.64", - "2.1.1.65", - "2.1.1.66", - "2.1.1.67", - "2.1.1.68", - "2.1.1.69", - "2.1.1.70", - "2.1.1.71", - "2.1.1.72", - "2.1.1.74", - "2.1.1.75", - "2.1.1.76", - "2.1.1.77", - "2.1.1.78", - "2.1.1.79", - "2.1.1.80", - "2.1.1.82", - "2.1.1.83", - "2.1.1.84", - "2.1.1.85", - "2.1.1.86", - "2.1.1.87", - "2.1.1.88", - "2.1.1.89", - "2.1.1.90", - "2.1.1.91", - "2.1.1.94", - "2.1.1.95", - "2.1.1.96", - "2.1.1.97", - "2.1.1.98", - "2.1.1.99", - "2.1.1.100", - "2.1.1.101", - "2.1.1.102", - "2.1.1.103", - "2.1.1.104", - "2.1.1.105", - "2.1.1.106", - "2.1.1.107", - "2.1.1.108", - "2.1.1.109", - "2.1.1.110", - "2.1.1.111", - "2.1.1.112", - "2.1.1.113", - "2.1.1.114", - "2.1.1.115", - "2.1.1.116", - "2.1.1.117", - "2.1.1.118", - "2.1.1.119", - "2.1.1.120", - "2.1.1.121", - "2.1.1.122", - "2.1.1.123", - "2.1.1.124", - "2.1.1.125", - "2.1.1.126", - "2.1.1.127", - "2.1.1.128", - "2.1.1.129", - "2.1.1.130", - "2.1.1.131", - "2.1.1.132", - "2.1.1.133", - "2.1.1.136", - "2.1.1.137", - "2.1.1.139", - "2.1.1.140", - "2.1.1.141", - "2.1.1.142", - "2.1.1.143", - "2.1.1.144", - "2.1.1.145", - "2.1.1.146", - "2.1.1.147", - "2.1.1.148", - "2.1.1.149", - "2.1.1.150", - "2.1.1.151", - "2.1.1.152", - "2.1.1.153", - "2.1.1.154", - "2.1.1.155", - "2.1.1.156", - "2.1.1.157", - "2.1.1.158", - "2.1.1.159", - "2.1.1.160", - "2.1.1.161", - "2.1.1.162", - "2.1.1.163", - "2.1.1.164", - "2.1.1.165", - "2.1.2.1", - "2.1.2.2", - "2.1.2.3", - "2.1.2.4", - "2.1.2.5", - "2.1.2.7", - "2.1.2.8", - "2.1.2.9", - "2.1.2.10", - "2.1.2.11", - "2.1.3.1", - "2.1.3.2", - "2.1.3.3", - "2.1.3.5", - "2.1.3.6", - "2.1.3.7", - "2.1.3.8", - "2.1.3.9", - "2.1.3.10", - "2.1.3.11", - "2.1.4.1", - "2.1.4.2", - "2.2.1.1", - "2.2.1.2", - "2.2.1.3", - "2.2.1.4", - "2.2.1.5", - "2.2.1.6", - "2.2.1.7", - "2.2.1.8", - "2.2.1.9", - "2.3.1.1", - "2.3.1.2", - "2.3.1.3", - "2.3.1.4", - "2.3.1.5", - "2.3.1.6", - "2.3.1.7", - "2.3.1.8", - "2.3.1.9", - "2.3.1.10", - "2.3.1.11", - "2.3.1.12", - "2.3.1.13", - "2.3.1.14", - "2.3.1.15", - "2.3.1.16", - "2.3.1.17", - "2.3.1.18", - "2.3.1.19", - "2.3.1.20", - "2.3.1.21", - "2.3.1.22", - "2.3.1.23", - "2.3.1.24", - "2.3.1.25", - "2.3.1.26", - "2.3.1.27", - "2.3.1.28", - "2.3.1.29", - "2.3.1.30", - "2.3.1.31", - "2.3.1.32", - "2.3.1.33", - "2.3.1.34", - "2.3.1.35", - "2.3.1.36", - "2.3.1.37", - "2.3.1.38", - "2.3.1.39", - "2.3.1.40", - "2.3.1.41", - "2.3.1.42", - "2.3.1.43", - "2.3.1.44", - "2.3.1.45", - "2.3.1.46", - "2.3.1.47", - "2.3.1.48", - "2.3.1.49", - "2.3.1.50", - "2.3.1.51", - "2.3.1.52", - "2.3.1.53", - "2.3.1.54", - "2.3.1.56", - "2.3.1.57", - "2.3.1.58", - "2.3.1.59", - "2.3.1.60", - "2.3.1.61", - "2.3.1.62", - "2.3.1.63", - "2.3.1.64", - "2.3.1.65", - "2.3.1.66", - "2.3.1.67", - "2.3.1.68", - "2.3.1.69", - "2.3.1.71", - "2.3.1.72", - "2.3.1.73", - "2.3.1.74", - "2.3.1.75", - "2.3.1.76", - "2.3.1.77", - "2.3.1.78", - "2.3.1.79", - "2.3.1.80", - "2.3.1.81", - "2.3.1.82", - "2.3.1.83", - "2.3.1.84", - "2.3.1.85", - "2.3.1.86", - "2.3.1.87", - "2.3.1.88", - "2.3.1.89", - "2.3.1.90", - "2.3.1.91", - "2.3.1.92", - "2.3.1.93", - "2.3.1.94", - "2.3.1.95", - "2.3.1.96", - "2.3.1.97", - "2.3.1.98", - "2.3.1.99", - "2.3.1.100", - "2.3.1.101", - "2.3.1.102", - "2.3.1.103", - "2.3.1.104", - "2.3.1.105", - "2.3.1.106", - "2.3.1.107", - "2.3.1.108", - "2.3.1.109", - "2.3.1.110", - "2.3.1.111", - "2.3.1.112", - "2.3.1.113", - "2.3.1.114", - "2.3.1.115", - "2.3.1.116", - "2.3.1.117", - "2.3.1.118", - "2.3.1.119", - "2.3.1.121", - "2.3.1.122", - "2.3.1.123", - "2.3.1.125", - "2.3.1.126", - "2.3.1.127", - "2.3.1.128", - "2.3.1.129", - "2.3.1.130", - "2.3.1.131", - "2.3.1.132", - "2.3.1.133", - "2.3.1.134", - "2.3.1.135", - "2.3.1.136", - "2.3.1.137", - "2.3.1.138", - "2.3.1.139", - "2.3.1.140", - "2.3.1.141", - "2.3.1.142", - "2.3.1.143", - "2.3.1.144", - "2.3.1.145", - "2.3.1.146", - "2.3.1.147", - "2.3.1.148", - "2.3.1.149", - "2.3.1.150", - "2.3.1.151", - "2.3.1.152", - "2.3.1.153", - "2.3.1.154", - "2.3.1.155", - "2.3.1.156", - "2.3.1.157", - "2.3.1.158", - "2.3.1.159", - "2.3.1.160", - "2.3.1.161", - "2.3.1.162", - "2.3.1.163", - "2.3.1.164", - "2.3.1.165", - "2.3.1.166", - "2.3.1.167", - "2.3.1.168", - "2.3.1.169", - "2.3.1.170", - "2.3.1.171", - "2.3.1.172", - "2.3.1.173", - "2.3.1.174", - "2.3.1.175", - "2.3.1.176", - "2.3.1.177", - "2.3.1.178", - "2.3.1.179", - "2.3.1.180", - "2.3.1.181", - "2.3.1.182", - "2.3.1.183", - "2.3.1.184", - "2.3.1.185", - "2.3.1.186", - "2.3.1.187", - "2.3.1.188", - "2.3.1.189", - "2.3.1.190", - "2.3.2.1", - "2.3.2.2", - "2.3.2.3", - "2.3.2.4", - "2.3.2.5", - "2.3.2.6", - "2.3.2.7", - "2.3.2.8", - "2.3.2.9", - "2.3.2.10", - "2.3.2.11", - "2.3.2.12", - "2.3.2.13", - "2.3.2.14", - "2.3.2.15", - "2.3.3.1", - "2.3.3.2", - "2.3.3.3", - "2.3.3.4", - "2.3.3.5", - "2.3.3.6", - "2.3.3.7", - "2.3.3.8", - "2.3.3.9", - "2.3.3.10", - "2.3.3.11", - "2.3.3.12", - "2.3.3.13", - "2.3.3.14", - "2.3.3.15", - "2.4.1.1", - "2.4.1.2", - "2.4.1.4", - "2.4.1.5", - "2.4.1.7", - "2.4.1.8", - "2.4.1.9", - "2.4.1.10", - "2.4.1.11", - "2.4.1.12", - "2.4.1.13", - "2.4.1.14", - "2.4.1.15", - "2.4.1.16", - "2.4.1.17", - "2.4.1.18", - "2.4.1.19", - "2.4.1.20", - "2.4.1.21", - "2.4.1.22", - "2.4.1.23", - "2.4.1.24", - "2.4.1.25", - "2.4.1.26", - "2.4.1.27", - "2.4.1.28", - "2.4.1.29", - "2.4.1.30", - "2.4.1.31", - "2.4.1.32", - "2.4.1.33", - "2.4.1.34", - "2.4.1.35", - "2.4.1.36", - "2.4.1.37", - "2.4.1.38", - "2.4.1.39", - "2.4.1.40", - "2.4.1.41", - "2.4.1.43", - "2.4.1.44", - "2.4.1.45", - "2.4.1.46", - "2.4.1.47", - "2.4.1.48", - "2.4.1.49", - "2.4.1.50", - "2.4.1.52", - "2.4.1.53", - "2.4.1.54", - "2.4.1.56", - "2.4.1.57", - "2.4.1.58", - "2.4.1.60", - "2.4.1.62", - "2.4.1.63", - "2.4.1.64", - "2.4.1.65", - "2.4.1.66", - "2.4.1.67", - "2.4.1.68", - "2.4.1.69", - "2.4.1.70", - "2.4.1.71", - "2.4.1.73", - "2.4.1.74", - "2.4.1.78", - "2.4.1.79", - "2.4.1.80", - "2.4.1.81", - "2.4.1.82", - "2.4.1.83", - "2.4.1.85", - "2.4.1.86", - "2.4.1.87", - "2.4.1.88", - "2.4.1.90", - "2.4.1.91", - "2.4.1.92", - "2.4.1.94", - "2.4.1.95", - "2.4.1.96", - "2.4.1.97", - "2.4.1.99", - "2.4.1.100", - "2.4.1.101", - "2.4.1.102", - "2.4.1.103", - "2.4.1.104", - "2.4.1.105", - "2.4.1.106", - "2.4.1.109", - "2.4.1.110", - "2.4.1.111", - "2.4.1.113", - "2.4.1.114", - "2.4.1.115", - "2.4.1.116", - "2.4.1.117", - "2.4.1.118", - "2.4.1.119", - "2.4.1.120", - "2.4.1.121", - "2.4.1.122", - "2.4.1.123", - "2.4.1.125", - "2.4.1.126", - "2.4.1.127", - "2.4.1.128", - "2.4.1.129", - "2.4.1.130", - "2.4.1.131", - "2.4.1.132", - "2.4.1.133", - "2.4.1.134", - "2.4.1.135", - "2.4.1.136", - "2.4.1.137", - "2.4.1.138", - "2.4.1.139", - "2.4.1.140", - "2.4.1.141", - "2.4.1.142", - "2.4.1.143", - "2.4.1.144", - "2.4.1.145", - "2.4.1.146", - "2.4.1.147", - "2.4.1.148", - "2.4.1.149", - "2.4.1.150", - "2.4.1.152", - "2.4.1.153", - "2.4.1.155", - "2.4.1.156", - "2.4.1.157", - "2.4.1.158", - "2.4.1.159", - "2.4.1.160", - "2.4.1.161", - "2.4.1.162", - "2.4.1.163", - "2.4.1.164", - "2.4.1.165", - "2.4.1.166", - "2.4.1.167", - "2.4.1.168", - "2.4.1.170", - "2.4.1.171", - "2.4.1.172", - "2.4.1.173", - "2.4.1.174", - "2.4.1.175", - "2.4.1.176", - "2.4.1.177", - "2.4.1.178", - "2.4.1.179", - "2.4.1.180", - "2.4.1.181", - "2.4.1.182", - "2.4.1.183", - "2.4.1.184", - "2.4.1.185", - "2.4.1.186", - "2.4.1.187", - "2.4.1.188", - "2.4.1.189", - "2.4.1.190", - "2.4.1.191", - "2.4.1.192", - "2.4.1.193", - "2.4.1.194", - "2.4.1.195", - "2.4.1.196", - "2.4.1.197", - "2.4.1.198", - "2.4.1.199", - "2.4.1.201", - "2.4.1.202", - "2.4.1.203", - "2.4.1.205", - "2.4.1.206", - "2.4.1.207", - "2.4.1.208", - "2.4.1.209", - "2.4.1.210", - "2.4.1.211", - "2.4.1.212", - "2.4.1.213", - "2.4.1.214", - "2.4.1.215", - "2.4.1.216", - "2.4.1.217", - "2.4.1.218", - "2.4.1.219", - "2.4.1.220", - "2.4.1.221", - "2.4.1.222", - "2.4.1.223", - "2.4.1.224", - "2.4.1.225", - "2.4.1.226", - "2.4.1.227", - "2.4.1.228", - "2.4.1.229", - "2.4.1.230", - "2.4.1.231", - "2.4.1.232", - "2.4.1.234", - "2.4.1.236", - "2.4.1.237", - "2.4.1.238", - "2.4.1.239", - "2.4.1.240", - "2.4.1.241", - "2.4.1.242", - "2.4.1.243", - "2.4.1.244", - "2.4.1.245", - "2.4.1.246", - "2.4.1.247", - "2.4.1.248", - "2.4.1.249", - "2.4.1.250", - "2.4.2.1", - "2.4.2.2", - "2.4.2.3", - "2.4.2.4", - "2.4.2.5", - "2.4.2.6", - "2.4.2.7", - "2.4.2.8", - "2.4.2.9", - "2.4.2.10", - "2.4.2.11", - "2.4.2.12", - "2.4.2.14", - "2.4.2.15", - "2.4.2.16", - "2.4.2.17", - "2.4.2.18", - "2.4.2.19", - "2.4.2.20", - "2.4.2.21", - "2.4.2.22", - "2.4.2.23", - "2.4.2.24", - "2.4.2.25", - "2.4.2.26", - "2.4.2.27", - "2.4.2.28", - "2.4.2.29", - "2.4.2.30", - "2.4.2.31", - "2.4.2.32", - "2.4.2.33", - "2.4.2.34", - "2.4.2.35", - "2.4.2.36", - "2.4.2.37", - "2.4.2.38", - "2.4.2.39", - "2.4.2.40", - "2.4.2.41", - "2.4.2.42", - "2.4.99.1", - "2.4.99.2", - "2.4.99.3", - "2.4.99.4", - "2.4.99.5", - "2.4.99.6", - "2.4.99.7", - "2.4.99.8", - "2.4.99.9", - "2.4.99.10", - "2.4.99.11", - "2.5.1.1", - "2.5.1.2", - "2.5.1.3", - "2.5.1.4", - "2.5.1.5", - "2.5.1.6", - "2.5.1.7", - "2.5.1.9", - "2.5.1.10", - "2.5.1.11", - "2.5.1.15", - "2.5.1.16", - "2.5.1.17", - "2.5.1.18", - "2.5.1.19", - "2.5.1.20", - "2.5.1.21", - "2.5.1.22", - "2.5.1.23", - "2.5.1.24", - "2.5.1.25", - "2.5.1.26", - "2.5.1.27", - "2.5.1.28", - "2.5.1.29", - "2.5.1.30", - "2.5.1.31", - "2.5.1.32", - "2.5.1.33", - "2.5.1.34", - "2.5.1.35", - "2.5.1.36", - "2.5.1.38", - "2.5.1.39", - "2.5.1.41", - "2.5.1.42", - "2.5.1.43", - "2.5.1.44", - "2.5.1.45", - "2.5.1.46", - "2.5.1.47", - "2.5.1.48", - "2.5.1.49", - "2.5.1.50", - "2.5.1.51", - "2.5.1.52", - "2.5.1.53", - "2.5.1.54", - "2.5.1.55", - "2.5.1.56", - "2.5.1.57", - "2.5.1.58", - "2.5.1.59", - "2.5.1.60", - "2.5.1.61", - "2.5.1.62", - "2.5.1.63", - "2.5.1.65", - "2.5.1.66", - "2.5.1.67", - "2.5.1.68", - "2.5.1.69", - "2.5.1.70", - "2.5.1.71", - "2.5.1.72", - "2.5.1.73", - "2.5.1.74", - "2.5.1.75", - "2.5.1.76", - "2.5.1.77", - "2.5.1.78", - "2.5.1.79", - "2.5.1.80", - "2.6.1.1", - "2.6.1.2", - "2.6.1.3", - "2.6.1.4", - "2.6.1.5", - "2.6.1.6", - "2.6.1.7", - "2.6.1.8", - "2.6.1.9", - "2.6.1.11", - "2.6.1.12", - "2.6.1.13", - "2.6.1.14", - "2.6.1.15", - "2.6.1.16", - "2.6.1.17", - "2.6.1.18", - "2.6.1.19", - "2.6.1.21", - "2.6.1.22", - "2.6.1.23", - "2.6.1.24", - "2.6.1.26", - "2.6.1.27", - "2.6.1.28", - "2.6.1.29", - "2.6.1.30", - "2.6.1.31", - "2.6.1.32", - "2.6.1.33", - "2.6.1.34", - "2.6.1.35", - "2.6.1.36", - "2.6.1.37", - "2.6.1.38", - "2.6.1.39", - "2.6.1.40", - "2.6.1.41", - "2.6.1.42", - "2.6.1.43", - "2.6.1.44", - "2.6.1.45", - "2.6.1.46", - "2.6.1.47", - "2.6.1.48", - "2.6.1.49", - "2.6.1.50", - "2.6.1.51", - "2.6.1.52", - "2.6.1.54", - "2.6.1.55", - "2.6.1.56", - "2.6.1.57", - "2.6.1.58", - "2.6.1.59", - "2.6.1.60", - "2.6.1.62", - "2.6.1.63", - "2.6.1.64", - "2.6.1.65", - "2.6.1.66", - "2.6.1.67", - "2.6.1.68", - "2.6.1.70", - "2.6.1.71", - "2.6.1.72", - "2.6.1.73", - "2.6.1.74", - "2.6.1.75", - "2.6.1.76", - "2.6.1.77", - "2.6.1.78", - "2.6.1.79", - "2.6.1.80", - "2.6.1.81", - "2.6.1.82", - "2.6.1.83", - "2.6.1.84", - "2.6.1.85", - "2.6.1.86", - "2.6.3.1", - "2.6.99.1", - "2.6.99.2", - "2.7.1.1", - "2.7.1.2", - "2.7.1.3", - "2.7.1.4", - "2.7.1.5", - "2.7.1.6", - "2.7.1.7", - "2.7.1.8", - "2.7.1.10", - "2.7.1.11", - "2.7.1.12", - "2.7.1.13", - "2.7.1.14", - "2.7.1.15", - "2.7.1.16", - "2.7.1.17", - "2.7.1.18", - "2.7.1.19", - "2.7.1.20", - "2.7.1.21", - "2.7.1.22", - "2.7.1.23", - "2.7.1.24", - "2.7.1.25", - "2.7.1.26", - "2.7.1.27", - "2.7.1.28", - "2.7.1.29", - "2.7.1.30", - "2.7.1.31", - "2.7.1.32", - "2.7.1.33", - "2.7.1.34", - "2.7.1.35", - "2.7.1.36", - "2.7.1.39", - "2.7.1.40", - "2.7.1.41", - "2.7.1.42", - "2.7.1.43", - "2.7.1.44", - "2.7.1.45", - "2.7.1.46", - "2.7.1.47", - "2.7.1.48", - "2.7.1.49", - "2.7.1.50", - "2.7.1.51", - "2.7.1.52", - "2.7.1.53", - "2.7.1.54", - "2.7.1.55", - "2.7.1.56", - "2.7.1.58", - "2.7.1.59", - "2.7.1.60", - "2.7.1.61", - "2.7.1.62", - "2.7.1.63", - "2.7.1.64", - "2.7.1.65", - "2.7.1.66", - "2.7.1.67", - "2.7.1.68", - "2.7.1.69", - "2.7.1.71", - "2.7.1.72", - "2.7.1.73", - "2.7.1.74", - "2.7.1.76", - "2.7.1.77", - "2.7.1.78", - "2.7.1.79", - "2.7.1.80", - "2.7.1.81", - "2.7.1.82", - "2.7.1.83", - "2.7.1.84", - "2.7.1.85", - "2.7.1.86", - "2.7.1.87", - "2.7.1.88", - "2.7.1.89", - "2.7.1.90", - "2.7.1.91", - "2.7.1.92", - "2.7.1.93", - "2.7.1.94", - "2.7.1.95", - "2.7.1.100", - "2.7.1.101", - "2.7.1.102", - "2.7.1.103", - "2.7.1.105", - "2.7.1.106", - "2.7.1.107", - "2.7.1.108", - "2.7.1.113", - "2.7.1.114", - "2.7.1.118", - "2.7.1.119", - "2.7.1.121", - "2.7.1.122", - "2.7.1.127", - "2.7.1.130", - "2.7.1.134", - "2.7.1.136", - "2.7.1.137", - "2.7.1.138", - "2.7.1.140", - "2.7.1.142", - "2.7.1.143", - "2.7.1.144", - "2.7.1.145", - "2.7.1.146", - "2.7.1.147", - "2.7.1.148", - "2.7.1.149", - "2.7.1.150", - "2.7.1.151", - "2.7.1.153", - "2.7.1.154", - "2.7.1.156", - "2.7.1.157", - "2.7.1.158", - "2.7.1.159", - "2.7.1.160", - "2.7.1.161", - "2.7.1.162", - "2.7.1.163", - "2.7.1.164", - "2.7.1.165", - "2.7.2.1", - "2.7.2.2", - "2.7.2.3", - "2.7.2.4", - "2.7.2.6", - "2.7.2.7", - "2.7.2.8", - "2.7.2.10", - "2.7.2.11", - "2.7.2.12", - "2.7.2.13", - "2.7.2.14", - "2.7.2.15", - "2.7.3.1", - "2.7.3.2", - "2.7.3.3", - "2.7.3.4", - "2.7.3.5", - "2.7.3.6", - "2.7.3.7", - "2.7.3.8", - "2.7.3.9", - "2.7.3.10", - "2.7.4.1", - "2.7.4.2", - "2.7.4.3", - "2.7.4.4", - "2.7.4.6", - "2.7.4.7", - "2.7.4.8", - "2.7.4.9", - "2.7.4.10", - "2.7.4.11", - "2.7.4.12", - "2.7.4.13", - "2.7.4.14", - "2.7.4.15", - "2.7.4.16", - "2.7.4.17", - "2.7.4.18", - "2.7.4.19", - "2.7.4.20", - "2.7.4.21", - "2.7.4.22", - "2.7.4.23", - "2.7.4.24", - "2.7.6.1", - "2.7.6.2", - "2.7.6.3", - "2.7.6.4", - "2.7.6.5", - "2.7.7.1", - "2.7.7.2", - "2.7.7.3", - "2.7.7.4", - "2.7.7.5", - "2.7.7.6", - "2.7.7.7", - "2.7.7.8", - "2.7.7.9", - "2.7.7.10", - "2.7.7.11", - "2.7.7.12", - "2.7.7.13", - "2.7.7.14", - "2.7.7.15", - "2.7.7.18", - "2.7.7.19", - "2.7.7.21", - "2.7.7.22", - "2.7.7.23", - "2.7.7.24", - "2.7.7.25", - "2.7.7.27", - "2.7.7.28", - "2.7.7.30", - "2.7.7.31", - "2.7.7.32", - "2.7.7.33", - "2.7.7.34", - "2.7.7.35", - "2.7.7.36", - "2.7.7.37", - "2.7.7.38", - "2.7.7.39", - "2.7.7.40", - "2.7.7.41", - "2.7.7.42", - "2.7.7.43", - "2.7.7.44", - "2.7.7.45", - "2.7.7.46", - "2.7.7.47", - "2.7.7.48", - "2.7.7.49", - "2.7.7.50", - "2.7.7.51", - "2.7.7.52", - "2.7.7.53", - "2.7.7.54", - "2.7.7.55", - "2.7.7.56", - "2.7.7.57", - "2.7.7.58", - "2.7.7.59", - "2.7.7.60", - "2.7.7.61", - "2.7.7.62", - "2.7.7.63", - "2.7.7.64", - "2.7.7.65", - "2.7.7.66", - "2.7.7.67", - "2.7.7.68", - "2.7.8.1", - "2.7.8.2", - "2.7.8.3", - "2.7.8.4", - "2.7.8.5", - "2.7.8.6", - "2.7.8.7", - "2.7.8.8", - "2.7.8.9", - "2.7.8.10", - "2.7.8.11", - "2.7.8.12", - "2.7.8.13", - "2.7.8.14", - "2.7.8.15", - "2.7.8.17", - "2.7.8.18", - "2.7.8.19", - "2.7.8.20", - "2.7.8.21", - "2.7.8.22", - "2.7.8.23", - "2.7.8.24", - "2.7.8.25", - "2.7.8.26", - "2.7.8.27", - "2.7.8.28", - "2.7.9.1", - "2.7.9.2", - "2.7.9.3", - "2.7.9.4", - "2.7.9.5", - "2.7.10.1", - "2.7.10.2", - "2.7.11.1", - "2.7.11.2", - "2.7.11.3", - "2.7.11.4", - "2.7.11.5", - "2.7.11.6", - "2.7.11.7", - "2.7.11.8", - "2.7.11.9", - "2.7.11.10", - "2.7.11.11", - "2.7.11.12", - "2.7.11.13", - "2.7.11.14", - "2.7.11.15", - "2.7.11.16", - "2.7.11.17", - "2.7.11.18", - "2.7.11.19", - "2.7.11.20", - "2.7.11.21", - "2.7.11.22", - "2.7.11.23", - "2.7.11.24", - "2.7.11.25", - "2.7.11.26", - "2.7.11.27", - "2.7.11.28", - "2.7.11.29", - "2.7.11.30", - "2.7.11.31", - "2.7.12.1", - "2.7.12.2", - "2.7.13.1", - "2.7.13.2", - "2.7.13.3", - "2.7.99.1", - "2.8.1.1", - "2.8.1.2", - "2.8.1.3", - "2.8.1.4", - "2.8.1.5", - "2.8.1.6", - "2.8.1.7", - "2.8.1.8", - "2.8.2.1", - "2.8.2.2", - "2.8.2.3", - "2.8.2.4", - "2.8.2.5", - "2.8.2.6", - "2.8.2.7", - "2.8.2.8", - "2.8.2.9", - "2.8.2.10", - "2.8.2.11", - "2.8.2.13", - "2.8.2.14", - "2.8.2.15", - "2.8.2.16", - "2.8.2.17", - "2.8.2.18", - "2.8.2.19", - "2.8.2.20", - "2.8.2.21", - "2.8.2.22", - "2.8.2.23", - "2.8.2.24", - "2.8.2.25", - "2.8.2.26", - "2.8.2.27", - "2.8.2.28", - "2.8.2.29", - "2.8.2.30", - "2.8.2.31", - "2.8.2.32", - "2.8.2.33", - "2.8.2.34", - "2.8.3.1", - "2.8.3.2", - "2.8.3.3", - "2.8.3.5", - "2.8.3.6", - "2.8.3.7", - "2.8.3.8", - "2.8.3.9", - "2.8.3.10", - "2.8.3.11", - "2.8.3.12", - "2.8.3.13", - "2.8.3.14", - "2.8.3.15", - "2.8.3.16", - "2.8.3.17", - "2.8.4.1", - "2.8.4.2", - "2.9.1.1", - "2.9.1.2", - "3.1.1.1", - "3.1.1.2", - "3.1.1.3", - "3.1.1.4", - "3.1.1.5", - "3.1.1.6", - "3.1.1.7", - "3.1.1.8", - "3.1.1.10", - "3.1.1.11", - "3.1.1.13", - "3.1.1.14", - "3.1.1.15", - "3.1.1.17", - "3.1.1.19", - "3.1.1.20", - "3.1.1.21", - "3.1.1.22", - "3.1.1.23", - "3.1.1.24", - "3.1.1.25", - "3.1.1.26", - "3.1.1.27", - "3.1.1.28", - "3.1.1.29", - "3.1.1.30", - "3.1.1.31", - "3.1.1.32", - "3.1.1.33", - "3.1.1.34", - "3.1.1.35", - "3.1.1.36", - "3.1.1.37", - "3.1.1.38", - "3.1.1.39", - "3.1.1.40", - "3.1.1.41", - "3.1.1.42", - "3.1.1.43", - "3.1.1.44", - "3.1.1.45", - "3.1.1.46", - "3.1.1.47", - "3.1.1.48", - "3.1.1.49", - "3.1.1.50", - "3.1.1.51", - "3.1.1.52", - "3.1.1.53", - "3.1.1.54", - "3.1.1.55", - "3.1.1.56", - "3.1.1.57", - "3.1.1.58", - "3.1.1.59", - "3.1.1.60", - "3.1.1.61", - "3.1.1.63", - "3.1.1.64", - "3.1.1.65", - "3.1.1.66", - "3.1.1.67", - "3.1.1.68", - "3.1.1.70", - "3.1.1.71", - "3.1.1.72", - "3.1.1.73", - "3.1.1.74", - "3.1.1.75", - "3.1.1.76", - "3.1.1.77", - "3.1.1.78", - "3.1.1.79", - "3.1.1.80", - "3.1.1.81", - "3.1.1.82", - "3.1.1.83", - "3.1.1.84", - "3.1.2.1", - "3.1.2.2", - "3.1.2.3", - "3.1.2.4", - "3.1.2.5", - "3.1.2.6", - "3.1.2.7", - "3.1.2.10", - "3.1.2.11", - "3.1.2.12", - "3.1.2.13", - "3.1.2.14", - "3.1.2.15", - "3.1.2.16", - "3.1.2.17", - "3.1.2.18", - "3.1.2.19", - "3.1.2.20", - "3.1.2.21", - "3.1.2.22", - "3.1.2.23", - "3.1.2.25", - "3.1.2.26", - "3.1.2.27", - "3.1.3.1", - "3.1.3.2", - "3.1.3.3", - "3.1.3.4", - "3.1.3.5", - "3.1.3.6", - "3.1.3.7", - "3.1.3.8", - "3.1.3.9", - "3.1.3.10", - "3.1.3.11", - "3.1.3.12", - "3.1.3.13", - "3.1.3.14", - "3.1.3.15", - "3.1.3.16", - "3.1.3.17", - "3.1.3.18", - "3.1.3.19", - "3.1.3.20", - "3.1.3.21", - "3.1.3.22", - "3.1.3.23", - "3.1.3.24", - "3.1.3.25", - "3.1.3.26", - "3.1.3.27", - "3.1.3.28", - "3.1.3.29", - "3.1.3.31", - "3.1.3.32", - "3.1.3.33", - "3.1.3.34", - "3.1.3.35", - "3.1.3.36", - "3.1.3.37", - "3.1.3.38", - "3.1.3.39", - "3.1.3.40", - "3.1.3.41", - "3.1.3.42", - "3.1.3.43", - "3.1.3.44", - "3.1.3.45", - "3.1.3.46", - "3.1.3.47", - "3.1.3.48", - "3.1.3.49", - "3.1.3.50", - "3.1.3.51", - "3.1.3.52", - "3.1.3.53", - "3.1.3.54", - "3.1.3.55", - "3.1.3.56", - "3.1.3.57", - "3.1.3.58", - "3.1.3.59", - "3.1.3.60", - "3.1.3.62", - "3.1.3.63", - "3.1.3.64", - "3.1.3.66", - "3.1.3.67", - "3.1.3.68", - "3.1.3.69", - "3.1.3.70", - "3.1.3.71", - "3.1.3.72", - "3.1.3.73", - "3.1.3.74", - "3.1.3.75", - "3.1.3.76", - "3.1.3.77", - "3.1.3.78", - "3.1.3.79", - "3.1.3.80", - "3.1.4.1", - "3.1.4.2", - "3.1.4.3", - "3.1.4.4", - "3.1.4.11", - "3.1.4.12", - "3.1.4.13", - "3.1.4.14", - "3.1.4.15", - "3.1.4.16", - "3.1.4.17", - "3.1.4.35", - "3.1.4.37", - "3.1.4.38", - "3.1.4.39", - "3.1.4.40", - "3.1.4.41", - "3.1.4.42", - "3.1.4.43", - "3.1.4.44", - "3.1.4.45", - "3.1.4.46", - "3.1.4.48", - "3.1.4.49", - "3.1.4.50", - "3.1.4.51", - "3.1.4.52", - "3.1.4.53", - "3.1.5.1", - "3.1.6.1", - "3.1.6.2", - "3.1.6.3", - "3.1.6.4", - "3.1.6.6", - "3.1.6.7", - "3.1.6.8", - "3.1.6.9", - "3.1.6.10", - "3.1.6.11", - "3.1.6.12", - "3.1.6.13", - "3.1.6.14", - "3.1.6.15", - "3.1.6.16", - "3.1.6.17", - "3.1.6.18", - "3.1.7.1", - "3.1.7.2", - "3.1.7.3", - "3.1.7.4", - "3.1.7.5", - "3.1.8.1", - "3.1.8.2", - "3.1.11.1", - "3.1.11.2", - "3.1.11.3", - "3.1.11.4", - "3.1.11.5", - "3.1.11.6", - "3.1.13.1", - "3.1.13.2", - "3.1.13.3", - "3.1.13.4", - "3.1.13.5", - "3.1.14.1", - "3.1.15.1", - "3.1.16.1", - "3.1.21.1", - "3.1.21.2", - "3.1.21.3", - "3.1.21.4", - "3.1.21.5", - "3.1.21.6", - "3.1.21.7", - "3.1.22.1", - "3.1.22.2", - "3.1.22.4", - "3.1.22.5", - "3.1.25.1", - "3.1.26.1", - "3.1.26.2", - "3.1.26.3", - "3.1.26.4", - "3.1.26.5", - "3.1.26.6", - "3.1.26.7", - "3.1.26.8", - "3.1.26.9", - "3.1.26.10", - "3.1.26.11", - "3.1.26.12", - "3.1.26.13", - "3.1.27.1", - "3.1.27.2", - "3.1.27.3", - "3.1.27.4", - "3.1.27.5", - "3.1.27.6", - "3.1.27.7", - "3.1.27.8", - "3.1.27.9", - "3.1.27.10", - "3.1.30.1", - "3.1.30.2", - "3.1.31.1", - "3.2.1.1", - "3.2.1.2", - "3.2.1.3", - "3.2.1.4", - "3.2.1.6", - "3.2.1.7", - "3.2.1.8", - "3.2.1.10", - "3.2.1.11", - "3.2.1.14", - "3.2.1.15", - "3.2.1.17", - "3.2.1.18", - "3.2.1.20", - "3.2.1.21", - "3.2.1.22", - "3.2.1.23", - "3.2.1.24", - "3.2.1.25", - "3.2.1.26", - "3.2.1.28", - "3.2.1.31", - "3.2.1.32", - "3.2.1.33", - "3.2.1.35", - "3.2.1.36", - "3.2.1.37", - "3.2.1.38", - "3.2.1.39", - "3.2.1.40", - "3.2.1.41", - "3.2.1.42", - "3.2.1.43", - "3.2.1.44", - "3.2.1.45", - "3.2.1.46", - "3.2.1.47", - "3.2.1.48", - "3.2.1.49", - "3.2.1.50", - "3.2.1.51", - "3.2.1.52", - "3.2.1.53", - "3.2.1.54", - "3.2.1.55", - "3.2.1.56", - "3.2.1.57", - "3.2.1.58", - "3.2.1.59", - "3.2.1.60", - "3.2.1.61", - "3.2.1.62", - "3.2.1.63", - "3.2.1.64", - "3.2.1.65", - "3.2.1.66", - "3.2.1.67", - "3.2.1.68", - "3.2.1.70", - "3.2.1.71", - "3.2.1.72", - "3.2.1.73", - "3.2.1.74", - "3.2.1.75", - "3.2.1.76", - "3.2.1.77", - "3.2.1.78", - "3.2.1.80", - "3.2.1.81", - "3.2.1.82", - "3.2.1.83", - "3.2.1.84", - "3.2.1.85", - "3.2.1.86", - "3.2.1.87", - "3.2.1.88", - "3.2.1.89", - "3.2.1.91", - "3.2.1.92", - "3.2.1.93", - "3.2.1.94", - "3.2.1.95", - "3.2.1.96", - "3.2.1.97", - "3.2.1.98", - "3.2.1.99", - "3.2.1.100", - "3.2.1.101", - "3.2.1.102", - "3.2.1.103", - "3.2.1.104", - "3.2.1.105", - "3.2.1.106", - "3.2.1.107", - "3.2.1.108", - "3.2.1.109", - "3.2.1.111", - "3.2.1.112", - "3.2.1.113", - "3.2.1.114", - "3.2.1.115", - "3.2.1.116", - "3.2.1.117", - "3.2.1.118", - "3.2.1.119", - "3.2.1.120", - "3.2.1.121", - "3.2.1.122", - "3.2.1.123", - "3.2.1.124", - "3.2.1.125", - "3.2.1.126", - "3.2.1.127", - "3.2.1.128", - "3.2.1.129", - "3.2.1.130", - "3.2.1.131", - "3.2.1.132", - "3.2.1.133", - "3.2.1.134", - "3.2.1.135", - "3.2.1.136", - "3.2.1.137", - "3.2.1.139", - "3.2.1.140", - "3.2.1.141", - "3.2.1.142", - "3.2.1.143", - "3.2.1.144", - "3.2.1.145", - "3.2.1.146", - "3.2.1.147", - "3.2.1.149", - "3.2.1.150", - "3.2.1.151", - "3.2.1.152", - "3.2.1.153", - "3.2.1.154", - "3.2.1.155", - "3.2.1.156", - "3.2.1.157", - "3.2.1.158", - "3.2.1.159", - "3.2.1.161", - "3.2.1.162", - "3.2.1.163", - "3.2.1.164", - "3.2.1.165", - "3.2.2.1", - "3.2.2.2", - "3.2.2.3", - "3.2.2.4", - "3.2.2.5", - "3.2.2.6", - "3.2.2.7", - "3.2.2.8", - "3.2.2.9", - "3.2.2.10", - "3.2.2.11", - "3.2.2.12", - "3.2.2.13", - "3.2.2.14", - "3.2.2.15", - "3.2.2.16", - "3.2.2.17", - "3.2.2.19", - "3.2.2.20", - "3.2.2.21", - "3.2.2.22", - "3.2.2.23", - "3.2.2.24", - "3.2.2.25", - "3.2.2.26", - "3.2.2.27", - "3.2.2.28", - "3.2.2.29", - "3.3.1.1", - "3.3.1.2", - "3.3.2.1", - "3.3.2.2", - "3.3.2.4", - "3.3.2.5", - "3.3.2.6", - "3.3.2.7", - "3.3.2.8", - "3.3.2.9", - "3.3.2.10", - "3.3.2.11", - "3.4.11.1", - "3.4.11.2", - "3.4.11.3", - "3.4.11.4", - "3.4.11.5", - "3.4.11.6", - "3.4.11.7", - "3.4.11.9", - "3.4.11.10", - "3.4.11.13", - "3.4.11.14", - "3.4.11.15", - "3.4.11.16", - "3.4.11.17", - "3.4.11.18", - "3.4.11.19", - "3.4.11.20", - "3.4.11.21", - "3.4.11.22", - "3.4.11.23", - "3.4.11.24", - "3.4.13.3", - "3.4.13.4", - "3.4.13.5", - "3.4.13.7", - "3.4.13.9", - "3.4.13.12", - "3.4.13.17", - "3.4.13.18", - "3.4.13.19", - "3.4.13.20", - "3.4.13.21", - "3.4.13.22", - "3.4.14.1", - "3.4.14.2", - "3.4.14.4", - "3.4.14.5", - "3.4.14.6", - "3.4.14.9", - "3.4.14.10", - "3.4.14.11", - "3.4.14.12", - "3.4.15.1", - "3.4.15.4", - "3.4.15.5", - "3.4.15.6", - "3.4.16.2", - "3.4.16.4", - "3.4.16.5", - "3.4.16.6", - "3.4.17.1", - "3.4.17.2", - "3.4.17.3", - "3.4.17.4", - "3.4.17.6", - "3.4.17.8", - "3.4.17.10", - "3.4.17.11", - "3.4.17.12", - "3.4.17.13", - "3.4.17.14", - "3.4.17.15", - "3.4.17.16", - "3.4.17.17", - "3.4.17.18", - "3.4.17.19", - "3.4.17.20", - "3.4.17.21", - "3.4.17.22", - "3.4.17.23", - "3.4.18.1", - "3.4.19.1", - "3.4.19.2", - "3.4.19.3", - "3.4.19.5", - "3.4.19.6", - "3.4.19.7", - "3.4.19.9", - "3.4.19.11", - "3.4.19.12", - "3.4.21.1", - "3.4.21.2", - "3.4.21.3", - "3.4.21.4", - "3.4.21.5", - "3.4.21.6", - "3.4.21.7", - "3.4.21.9", - "3.4.21.10", - "3.4.21.12", - "3.4.21.19", - "3.4.21.20", - "3.4.21.21", - "3.4.21.22", - "3.4.21.25", - "3.4.21.26", - "3.4.21.27", - "3.4.21.32", - "3.4.21.34", - "3.4.21.35", - "3.4.21.36", - "3.4.21.37", - "3.4.21.38", - "3.4.21.39", - "3.4.21.41", - "3.4.21.42", - "3.4.21.43", - "3.4.21.45", - "3.4.21.46", - "3.4.21.47", - "3.4.21.48", - "3.4.21.49", - "3.4.21.50", - "3.4.21.53", - "3.4.21.54", - "3.4.21.55", - "3.4.21.57", - "3.4.21.59", - "3.4.21.60", - "3.4.21.61", - "3.4.21.62", - "3.4.21.63", - "3.4.21.64", - "3.4.21.65", - "3.4.21.66", - "3.4.21.67", - "3.4.21.68", - "3.4.21.69", - "3.4.21.70", - "3.4.21.71", - "3.4.21.72", - "3.4.21.73", - "3.4.21.74", - "3.4.21.75", - "3.4.21.76", - "3.4.21.77", - "3.4.21.78", - "3.4.21.79", - "3.4.21.80", - "3.4.21.81", - "3.4.21.82", - "3.4.21.83", - "3.4.21.84", - "3.4.21.85", - "3.4.21.86", - "3.4.21.88", - "3.4.21.89", - "3.4.21.90", - "3.4.21.91", - "3.4.21.92", - "3.4.21.93", - "3.4.21.94", - "3.4.21.95", - "3.4.21.96", - "3.4.21.97", - "3.4.21.98", - "3.4.21.99", - "3.4.21.100", - "3.4.21.101", - "3.4.21.102", - "3.4.21.103", - "3.4.21.104", - "3.4.21.105", - "3.4.21.106", - "3.4.21.107", - "3.4.21.108", - "3.4.21.109", - "3.4.21.110", - "3.4.21.111", - "3.4.21.112", - "3.4.21.113", - "3.4.21.114", - "3.4.21.115", - "3.4.21.116", - "3.4.21.117", - "3.4.21.118", - "3.4.21.119", - "3.4.21.120", - "3.4.22.1", - "3.4.22.2", - "3.4.22.3", - "3.4.22.6", - "3.4.22.7", - "3.4.22.8", - "3.4.22.10", - "3.4.22.14", - "3.4.22.15", - "3.4.22.16", - "3.4.22.24", - "3.4.22.25", - "3.4.22.26", - "3.4.22.27", - "3.4.22.28", - "3.4.22.29", - "3.4.22.30", - "3.4.22.31", - "3.4.22.32", - "3.4.22.33", - "3.4.22.34", - "3.4.22.35", - "3.4.22.36", - "3.4.22.37", - "3.4.22.38", - "3.4.22.39", - "3.4.22.40", - "3.4.22.41", - "3.4.22.42", - "3.4.22.43", - "3.4.22.44", - "3.4.22.45", - "3.4.22.46", - "3.4.22.47", - "3.4.22.48", - "3.4.22.49", - "3.4.22.50", - "3.4.22.51", - "3.4.22.52", - "3.4.22.53", - "3.4.22.54", - "3.4.22.55", - "3.4.22.56", - "3.4.22.57", - "3.4.22.58", - "3.4.22.59", - "3.4.22.60", - "3.4.22.61", - "3.4.22.62", - "3.4.22.63", - "3.4.22.64", - "3.4.22.65", - "3.4.22.66", - "3.4.22.67", - "3.4.22.68", - "3.4.22.69", - "3.4.22.70", - "3.4.22.71", - "3.4.23.1", - "3.4.23.2", - "3.4.23.3", - "3.4.23.4", - "3.4.23.5", - "3.4.23.12", - "3.4.23.15", - "3.4.23.16", - "3.4.23.17", - "3.4.23.18", - "3.4.23.19", - "3.4.23.20", - "3.4.23.21", - "3.4.23.22", - "3.4.23.23", - "3.4.23.24", - "3.4.23.25", - "3.4.23.26", - "3.4.23.28", - "3.4.23.29", - "3.4.23.30", - "3.4.23.31", - "3.4.23.32", - "3.4.23.34", - "3.4.23.35", - "3.4.23.36", - "3.4.23.38", - "3.4.23.39", - "3.4.23.40", - "3.4.23.41", - "3.4.23.42", - "3.4.23.43", - "3.4.23.44", - "3.4.23.45", - "3.4.23.46", - "3.4.23.47", - "3.4.23.48", - "3.4.23.49", - "3.4.23.50", - "3.4.23.51", - "3.4.24.1", - "3.4.24.3", - "3.4.24.6", - "3.4.24.7", - "3.4.24.11", - "3.4.24.12", - "3.4.24.13", - "3.4.24.14", - "3.4.24.15", - "3.4.24.16", - "3.4.24.17", - "3.4.24.18", - "3.4.24.19", - "3.4.24.20", - "3.4.24.21", - "3.4.24.22", - "3.4.24.23", - "3.4.24.24", - "3.4.24.25", - "3.4.24.26", - "3.4.24.27", - "3.4.24.28", - "3.4.24.29", - "3.4.24.30", - "3.4.24.31", - "3.4.24.32", - "3.4.24.33", - "3.4.24.34", - "3.4.24.35", - "3.4.24.36", - "3.4.24.37", - "3.4.24.38", - "3.4.24.39", - "3.4.24.40", - "3.4.24.41", - "3.4.24.42", - "3.4.24.43", - "3.4.24.44", - "3.4.24.45", - "3.4.24.46", - "3.4.24.47", - "3.4.24.48", - "3.4.24.49", - "3.4.24.50", - "3.4.24.51", - "3.4.24.52", - "3.4.24.53", - "3.4.24.54", - "3.4.24.55", - "3.4.24.56", - "3.4.24.57", - "3.4.24.58", - "3.4.24.59", - "3.4.24.60", - "3.4.24.61", - "3.4.24.62", - "3.4.24.63", - "3.4.24.64", - "3.4.24.65", - "3.4.24.66", - "3.4.24.67", - "3.4.24.68", - "3.4.24.69", - "3.4.24.70", - "3.4.24.71", - "3.4.24.72", - "3.4.24.73", - "3.4.24.74", - "3.4.24.75", - "3.4.24.76", - "3.4.24.77", - "3.4.24.78", - "3.4.24.79", - "3.4.24.80", - "3.4.24.81", - "3.4.24.82", - "3.4.24.83", - "3.4.24.84", - "3.4.24.85", - "3.4.24.86", - "3.4.24.87", - "3.4.25.1", - "3.4.25.2", - "3.5.1.1", - "3.5.1.2", - "3.5.1.3", - "3.5.1.4", - "3.5.1.5", - "3.5.1.6", - "3.5.1.7", - "3.5.1.8", - "3.5.1.9", - "3.5.1.10", - "3.5.1.11", - "3.5.1.12", - "3.5.1.13", - "3.5.1.14", - "3.5.1.15", - "3.5.1.16", - "3.5.1.17", - "3.5.1.18", - "3.5.1.19", - "3.5.1.20", - "3.5.1.21", - "3.5.1.22", - "3.5.1.23", - "3.5.1.24", - "3.5.1.25", - "3.5.1.26", - "3.5.1.27", - "3.5.1.28", - "3.5.1.29", - "3.5.1.30", - "3.5.1.31", - "3.5.1.32", - "3.5.1.33", - "3.5.1.35", - "3.5.1.36", - "3.5.1.38", - "3.5.1.39", - "3.5.1.40", - "3.5.1.41", - "3.5.1.42", - "3.5.1.43", - "3.5.1.44", - "3.5.1.46", - "3.5.1.47", - "3.5.1.48", - "3.5.1.49", - "3.5.1.50", - "3.5.1.51", - "3.5.1.52", - "3.5.1.53", - "3.5.1.54", - "3.5.1.55", - "3.5.1.56", - "3.5.1.57", - "3.5.1.58", - "3.5.1.59", - "3.5.1.60", - "3.5.1.61", - "3.5.1.62", - "3.5.1.63", - "3.5.1.64", - "3.5.1.65", - "3.5.1.66", - "3.5.1.67", - "3.5.1.68", - "3.5.1.69", - "3.5.1.70", - "3.5.1.71", - "3.5.1.72", - "3.5.1.73", - "3.5.1.74", - "3.5.1.75", - "3.5.1.76", - "3.5.1.77", - "3.5.1.78", - "3.5.1.79", - "3.5.1.81", - "3.5.1.82", - "3.5.1.83", - "3.5.1.84", - "3.5.1.85", - "3.5.1.86", - "3.5.1.87", - "3.5.1.88", - "3.5.1.89", - "3.5.1.90", - "3.5.1.91", - "3.5.1.92", - "3.5.1.93", - "3.5.1.94", - "3.5.1.95", - "3.5.1.96", - "3.5.1.97", - "3.5.1.98", - "3.5.1.99", - "3.5.1.100", - "3.5.1.101", - "3.5.1.102", - "3.5.1.103", - "3.5.2.1", - "3.5.2.2", - "3.5.2.3", - "3.5.2.4", - "3.5.2.5", - "3.5.2.6", - "3.5.2.7", - "3.5.2.9", - "3.5.2.10", - "3.5.2.11", - "3.5.2.12", - "3.5.2.13", - "3.5.2.14", - "3.5.2.15", - "3.5.2.16", - "3.5.2.17", - "3.5.2.18", - "3.5.3.1", - "3.5.3.2", - "3.5.3.3", - "3.5.3.4", - "3.5.3.5", - "3.5.3.6", - "3.5.3.7", - "3.5.3.8", - "3.5.3.9", - "3.5.3.10", - "3.5.3.11", - "3.5.3.12", - "3.5.3.13", - "3.5.3.14", - "3.5.3.15", - "3.5.3.16", - "3.5.3.17", - "3.5.3.18", - "3.5.3.19", - "3.5.3.20", - "3.5.3.21", - "3.5.3.22", - "3.5.3.23", - "3.5.4.1", - "3.5.4.2", - "3.5.4.3", - "3.5.4.4", - "3.5.4.5", - "3.5.4.6", - "3.5.4.7", - "3.5.4.8", - "3.5.4.9", - "3.5.4.10", - "3.5.4.11", - "3.5.4.12", - "3.5.4.13", - "3.5.4.14", - "3.5.4.15", - "3.5.4.16", - "3.5.4.17", - "3.5.4.18", - "3.5.4.19", - "3.5.4.20", - "3.5.4.21", - "3.5.4.22", - "3.5.4.23", - "3.5.4.24", - "3.5.4.25", - "3.5.4.26", - "3.5.4.27", - "3.5.4.28", - "3.5.4.29", - "3.5.4.30", - "3.5.5.1", - "3.5.5.2", - "3.5.5.4", - "3.5.5.5", - "3.5.5.6", - "3.5.5.7", - "3.5.5.8", - "3.5.99.1", - "3.5.99.2", - "3.5.99.3", - "3.5.99.4", - "3.5.99.5", - "3.5.99.6", - "3.5.99.7", - "3.6.1.1", - "3.6.1.2", - "3.6.1.3", - "3.6.1.5", - "3.6.1.6", - "3.6.1.7", - "3.6.1.8", - "3.6.1.9", - "3.6.1.10", - "3.6.1.11", - "3.6.1.12", - "3.6.1.13", - "3.6.1.14", - "3.6.1.15", - "3.6.1.16", - "3.6.1.17", - "3.6.1.18", - "3.6.1.19", - "3.6.1.20", - "3.6.1.21", - "3.6.1.22", - "3.6.1.23", - "3.6.1.24", - "3.6.1.25", - "3.6.1.26", - "3.6.1.27", - "3.6.1.28", - "3.6.1.29", - "3.6.1.30", - "3.6.1.31", - "3.6.1.39", - "3.6.1.40", - "3.6.1.41", - "3.6.1.42", - "3.6.1.43", - "3.6.1.44", - "3.6.1.45", - "3.6.1.52", - "3.6.1.53", - "3.6.2.1", - "3.6.2.2", - "3.6.3.1", - "3.6.3.2", - "3.6.3.3", - "3.6.3.4", - "3.6.3.5", - "3.6.3.6", - "3.6.3.7", - "3.6.3.8", - "3.6.3.9", - "3.6.3.10", - "3.6.3.11", - "3.6.3.12", - "3.6.3.14", - "3.6.3.15", - "3.6.3.16", - "3.6.3.17", - "3.6.3.18", - "3.6.3.19", - "3.6.3.20", - "3.6.3.21", - "3.6.3.22", - "3.6.3.23", - "3.6.3.24", - "3.6.3.25", - "3.6.3.26", - "3.6.3.27", - "3.6.3.28", - "3.6.3.29", - "3.6.3.30", - "3.6.3.31", - "3.6.3.32", - "3.6.3.33", - "3.6.3.34", - "3.6.3.35", - "3.6.3.36", - "3.6.3.37", - "3.6.3.38", - "3.6.3.39", - "3.6.3.40", - "3.6.3.41", - "3.6.3.42", - "3.6.3.43", - "3.6.3.44", - "3.6.3.46", - "3.6.3.47", - "3.6.3.48", - "3.6.3.49", - "3.6.3.50", - "3.6.3.51", - "3.6.3.52", - "3.6.3.53", - "3.6.4.1", - "3.6.4.2", - "3.6.4.3", - "3.6.4.4", - "3.6.4.5", - "3.6.4.6", - "3.6.4.7", - "3.6.4.8", - "3.6.4.9", - "3.6.4.10", - "3.6.4.11", - "3.6.4.12", - "3.6.4.13", - "3.6.5.1", - "3.6.5.2", - "3.6.5.3", - "3.6.5.4", - "3.6.5.5", - "3.6.5.6", - "3.7.1.1", - "3.7.1.2", - "3.7.1.3", - "3.7.1.4", - "3.7.1.5", - "3.7.1.6", - "3.7.1.7", - "3.7.1.8", - "3.7.1.9", - "3.7.1.10", - "3.7.1.11", - "3.8.1.1", - "3.8.1.2", - "3.8.1.3", - "3.8.1.5", - "3.8.1.6", - "3.8.1.7", - "3.8.1.8", - "3.8.1.9", - "3.8.1.10", - "3.8.1.11", - "3.9.1.1", - "3.10.1.1", - "3.10.1.2", - "3.11.1.1", - "3.11.1.2", - "3.11.1.3", - "3.12.1.1", - "3.13.1.1", - "3.13.1.3", - "4.1.1.1", - "4.1.1.2", - "4.1.1.3", - "4.1.1.4", - "4.1.1.5", - "4.1.1.6", - "4.1.1.7", - "4.1.1.8", - "4.1.1.9", - "4.1.1.11", - "4.1.1.12", - "4.1.1.14", - "4.1.1.15", - "4.1.1.16", - "4.1.1.17", - "4.1.1.18", - "4.1.1.19", - "4.1.1.20", - "4.1.1.21", - "4.1.1.22", - "4.1.1.23", - "4.1.1.24", - "4.1.1.25", - "4.1.1.28", - "4.1.1.29", - "4.1.1.30", - "4.1.1.31", - "4.1.1.32", - "4.1.1.33", - "4.1.1.34", - "4.1.1.35", - "4.1.1.36", - "4.1.1.37", - "4.1.1.38", - "4.1.1.39", - "4.1.1.40", - "4.1.1.41", - "4.1.1.42", - "4.1.1.43", - "4.1.1.44", - "4.1.1.45", - "4.1.1.46", - "4.1.1.47", - "4.1.1.48", - "4.1.1.49", - "4.1.1.50", - "4.1.1.51", - "4.1.1.52", - "4.1.1.53", - "4.1.1.54", - "4.1.1.55", - "4.1.1.56", - "4.1.1.57", - "4.1.1.58", - "4.1.1.59", - "4.1.1.60", - "4.1.1.61", - "4.1.1.62", - "4.1.1.63", - "4.1.1.64", - "4.1.1.65", - "4.1.1.66", - "4.1.1.67", - "4.1.1.68", - "4.1.1.69", - "4.1.1.70", - "4.1.1.71", - "4.1.1.72", - "4.1.1.73", - "4.1.1.74", - "4.1.1.75", - "4.1.1.76", - "4.1.1.77", - "4.1.1.78", - "4.1.1.79", - "4.1.1.80", - "4.1.1.81", - "4.1.1.82", - "4.1.1.83", - "4.1.1.84", - "4.1.1.85", - "4.1.1.86", - "4.1.1.87", - "4.1.1.88", - "4.1.1.89", - "4.1.1.90", - "4.1.2.2", - "4.1.2.4", - "4.1.2.5", - "4.1.2.8", - "4.1.2.9", - "4.1.2.10", - "4.1.2.11", - "4.1.2.12", - "4.1.2.13", - "4.1.2.14", - "4.1.2.17", - "4.1.2.18", - "4.1.2.19", - "4.1.2.20", - "4.1.2.21", - "4.1.2.22", - "4.1.2.23", - "4.1.2.24", - "4.1.2.25", - "4.1.2.26", - "4.1.2.27", - "4.1.2.28", - "4.1.2.29", - "4.1.2.30", - "4.1.2.32", - "4.1.2.33", - "4.1.2.34", - "4.1.2.35", - "4.1.2.36", - "4.1.2.37", - "4.1.2.38", - "4.1.2.40", - "4.1.2.41", - "4.1.2.42", - "4.1.2.43", - "4.1.2.44", - "4.1.2.45", - "4.1.3.1", - "4.1.3.3", - "4.1.3.4", - "4.1.3.6", - "4.1.3.13", - "4.1.3.14", - "4.1.3.16", - "4.1.3.17", - "4.1.3.22", - "4.1.3.24", - "4.1.3.25", - "4.1.3.26", - "4.1.3.27", - "4.1.3.30", - "4.1.3.32", - "4.1.3.34", - "4.1.3.35", - "4.1.3.36", - "4.1.3.38", - "4.1.3.39", - "4.1.3.40", - "4.1.99.1", - "4.1.99.2", - "4.1.99.3", - "4.1.99.5", - "4.1.99.11", - "4.1.99.12", - "4.1.99.13", - "4.1.99.14", - "4.1.99.15", - "4.2.1.1", - "4.2.1.2", - "4.2.1.3", - "4.2.1.4", - "4.2.1.5", - "4.2.1.6", - "4.2.1.7", - "4.2.1.8", - "4.2.1.9", - "4.2.1.10", - "4.2.1.11", - "4.2.1.12", - "4.2.1.17", - "4.2.1.18", - "4.2.1.19", - "4.2.1.20", - "4.2.1.22", - "4.2.1.24", - "4.2.1.25", - "4.2.1.27", - "4.2.1.28", - "4.2.1.30", - "4.2.1.31", - "4.2.1.32", - "4.2.1.33", - "4.2.1.34", - "4.2.1.35", - "4.2.1.36", - "4.2.1.39", - "4.2.1.40", - "4.2.1.41", - "4.2.1.42", - "4.2.1.43", - "4.2.1.44", - "4.2.1.45", - "4.2.1.46", - "4.2.1.47", - "4.2.1.48", - "4.2.1.49", - "4.2.1.50", - "4.2.1.51", - "4.2.1.52", - "4.2.1.53", - "4.2.1.54", - "4.2.1.55", - "4.2.1.56", - "4.2.1.57", - "4.2.1.58", - "4.2.1.59", - "4.2.1.60", - "4.2.1.61", - "4.2.1.62", - "4.2.1.65", - "4.2.1.66", - "4.2.1.67", - "4.2.1.68", - "4.2.1.69", - "4.2.1.70", - "4.2.1.73", - "4.2.1.74", - "4.2.1.75", - "4.2.1.76", - "4.2.1.77", - "4.2.1.78", - "4.2.1.79", - "4.2.1.80", - "4.2.1.81", - "4.2.1.82", - "4.2.1.83", - "4.2.1.84", - "4.2.1.85", - "4.2.1.87", - "4.2.1.88", - "4.2.1.89", - "4.2.1.90", - "4.2.1.91", - "4.2.1.92", - "4.2.1.93", - "4.2.1.94", - "4.2.1.95", - "4.2.1.96", - "4.2.1.97", - "4.2.1.98", - "4.2.1.99", - "4.2.1.100", - "4.2.1.101", - "4.2.1.103", - "4.2.1.104", - "4.2.1.105", - "4.2.1.106", - "4.2.1.107", - "4.2.1.108", - "4.2.1.109", - "4.2.1.110", - "4.2.1.111", - "4.2.1.112", - "4.2.1.113", - "4.2.1.114", - "4.2.1.115", - "4.2.1.116", - "4.2.1.117", - "4.2.1.118", - "4.2.1.119", - "4.2.1.120", - "4.2.2.1", - "4.2.2.2", - "4.2.2.3", - "4.2.2.5", - "4.2.2.6", - "4.2.2.7", - "4.2.2.8", - "4.2.2.9", - "4.2.2.10", - "4.2.2.11", - "4.2.2.12", - "4.2.2.13", - "4.2.2.14", - "4.2.2.15", - "4.2.2.16", - "4.2.2.17", - "4.2.2.18", - "4.2.2.19", - "4.2.2.20", - "4.2.2.21", - "4.2.2.22", - "4.2.3.1", - "4.2.3.2", - "4.2.3.3", - "4.2.3.4", - "4.2.3.5", - "4.2.3.6", - "4.2.3.7", - "4.2.3.8", - "4.2.3.9", - "4.2.3.10", - "4.2.3.11", - "4.2.3.12", - "4.2.3.13", - "4.2.3.14", - "4.2.3.15", - "4.2.3.16", - "4.2.3.17", - "4.2.3.18", - "4.2.3.19", - "4.2.3.20", - "4.2.3.21", - "4.2.3.22", - "4.2.3.23", - "4.2.3.24", - "4.2.3.25", - "4.2.3.26", - "4.2.3.27", - "4.2.3.28", - "4.2.3.29", - "4.2.3.30", - "4.2.3.31", - "4.2.3.32", - "4.2.3.33", - "4.2.3.34", - "4.2.3.35", - "4.2.3.36", - "4.2.3.37", - "4.2.3.38", - "4.2.3.39", - "4.2.3.40", - "4.2.3.41", - "4.2.3.42", - "4.2.3.43", - "4.2.3.44", - "4.2.3.45", - "4.2.99.12", - "4.2.99.18", - "4.2.99.20", - "4.3.1.1", - "4.3.1.2", - "4.3.1.3", - "4.3.1.4", - "4.3.1.6", - "4.3.1.7", - "4.3.1.9", - "4.3.1.10", - "4.3.1.12", - "4.3.1.13", - "4.3.1.14", - "4.3.1.15", - "4.3.1.16", - "4.3.1.17", - "4.3.1.18", - "4.3.1.19", - "4.3.1.20", - "4.3.1.22", - "4.3.1.23", - "4.3.1.24", - "4.3.1.25", - "4.3.1.26", - "4.3.2.1", - "4.3.2.2", - "4.3.2.3", - "4.3.2.4", - "4.3.2.5", - "4.3.3.1", - "4.3.3.2", - "4.3.3.3", - "4.3.3.4", - "4.3.3.5", - "4.3.99.2", - "4.4.1.1", - "4.4.1.2", - "4.4.1.3", - "4.4.1.4", - "4.4.1.5", - "4.4.1.6", - "4.4.1.8", - "4.4.1.9", - "4.4.1.10", - "4.4.1.11", - "4.4.1.13", - "4.4.1.14", - "4.4.1.15", - "4.4.1.16", - "4.4.1.17", - "4.4.1.19", - "4.4.1.20", - "4.4.1.21", - "4.4.1.22", - "4.4.1.23", - "4.4.1.24", - "4.4.1.25", - "4.5.1.1", - "4.5.1.2", - "4.5.1.3", - "4.5.1.4", - "4.5.1.5", - "4.6.1.1", - "4.6.1.2", - "4.6.1.6", - "4.6.1.12", - "4.6.1.13", - "4.6.1.14", - "4.6.1.15", - "4.99.1.1", - "4.99.1.2", - "4.99.1.3", - "4.99.1.4", - "4.99.1.5", - "4.99.1.6", - "4.99.1.7", - "4.99.1.8", - "5.1.1.1", - "5.1.1.2", - "5.1.1.3", - "5.1.1.4", - "5.1.1.5", - "5.1.1.6", - "5.1.1.7", - "5.1.1.8", - "5.1.1.9", - "5.1.1.10", - "5.1.1.11", - "5.1.1.12", - "5.1.1.13", - "5.1.1.14", - "5.1.1.15", - "5.1.1.16", - "5.1.1.17", - "5.1.1.18", - "5.1.2.1", - "5.1.2.2", - "5.1.2.3", - "5.1.2.4", - "5.1.2.5", - "5.1.2.6", - "5.1.3.1", - "5.1.3.2", - "5.1.3.3", - "5.1.3.4", - "5.1.3.5", - "5.1.3.6", - "5.1.3.7", - "5.1.3.8", - "5.1.3.9", - "5.1.3.10", - "5.1.3.11", - "5.1.3.12", - "5.1.3.13", - "5.1.3.14", - "5.1.3.15", - "5.1.3.16", - "5.1.3.17", - "5.1.3.18", - "5.1.3.19", - "5.1.3.20", - "5.1.3.21", - "5.1.3.22", - "5.1.3.23", - "5.1.99.1", - "5.1.99.2", - "5.1.99.3", - "5.1.99.4", - "5.1.99.5", - "5.2.1.1", - "5.2.1.2", - "5.2.1.3", - "5.2.1.4", - "5.2.1.5", - "5.2.1.6", - "5.2.1.7", - "5.2.1.8", - "5.2.1.9", - "5.2.1.10", - "5.3.1.1", - "5.3.1.3", - "5.3.1.4", - "5.3.1.5", - "5.3.1.6", - "5.3.1.7", - "5.3.1.8", - "5.3.1.9", - "5.3.1.12", - "5.3.1.13", - "5.3.1.14", - "5.3.1.15", - "5.3.1.16", - "5.3.1.17", - "5.3.1.20", - "5.3.1.21", - "5.3.1.22", - "5.3.1.23", - "5.3.1.24", - "5.3.1.25", - "5.3.1.26", - "5.3.1.27", - "5.3.2.1", - "5.3.2.2", - "5.3.3.1", - "5.3.3.2", - "5.3.3.3", - "5.3.3.4", - "5.3.3.5", - "5.3.3.6", - "5.3.3.7", - "5.3.3.8", - "5.3.3.9", - "5.3.3.10", - "5.3.3.11", - "5.3.3.12", - "5.3.3.13", - "5.3.3.14", - "5.3.3.15", - "5.3.4.1", - "5.3.99.2", - "5.3.99.3", - "5.3.99.4", - "5.3.99.5", - "5.3.99.6", - "5.3.99.7", - "5.3.99.8", - "5.3.99.9", - "5.4.1.1", - "5.4.1.2", - "5.4.2.1", - "5.4.2.2", - "5.4.2.3", - "5.4.2.4", - "5.4.2.5", - "5.4.2.6", - "5.4.2.7", - "5.4.2.8", - "5.4.2.9", - "5.4.2.10", - "5.4.3.2", - "5.4.3.3", - "5.4.3.4", - "5.4.3.5", - "5.4.3.6", - "5.4.3.7", - "5.4.3.8", - "5.4.4.1", - "5.4.4.2", - "5.4.4.3", - "5.4.99.1", - "5.4.99.2", - "5.4.99.3", - "5.4.99.4", - "5.4.99.5", - "5.4.99.7", - "5.4.99.8", - "5.4.99.9", - "5.4.99.11", - "5.4.99.12", - "5.4.99.13", - "5.4.99.14", - "5.4.99.15", - "5.4.99.16", - "5.4.99.17", - "5.4.99.18", - "5.5.1.1", - "5.5.1.2", - "5.5.1.3", - "5.5.1.4", - "5.5.1.5", - "5.5.1.6", - "5.5.1.7", - "5.5.1.8", - "5.5.1.9", - "5.5.1.10", - "5.5.1.11", - "5.5.1.12", - "5.5.1.13", - "5.5.1.14", - "5.5.1.15", - "5.5.1.16", - "5.99.1.1", - "5.99.1.2", - "5.99.1.3", - "5.99.1.4", - "6.1.1.1", - "6.1.1.2", - "6.1.1.3", - "6.1.1.4", - "6.1.1.5", - "6.1.1.6", - "6.1.1.7", - "6.1.1.9", - "6.1.1.10", - "6.1.1.11", - "6.1.1.12", - "6.1.1.13", - "6.1.1.14", - "6.1.1.15", - "6.1.1.16", - "6.1.1.17", - "6.1.1.18", - "6.1.1.19", - "6.1.1.20", - "6.1.1.21", - "6.1.1.22", - "6.1.1.23", - "6.1.1.24", - "6.1.1.25", - "6.1.1.26", - "6.1.1.27", - "6.2.1.1", - "6.2.1.2", - "6.2.1.3", - "6.2.1.4", - "6.2.1.5", - "6.2.1.6", - "6.2.1.7", - "6.2.1.8", - "6.2.1.9", - "6.2.1.10", - "6.2.1.11", - "6.2.1.12", - "6.2.1.13", - "6.2.1.14", - "6.2.1.15", - "6.2.1.16", - "6.2.1.17", - "6.2.1.18", - "6.2.1.19", - "6.2.1.20", - "6.2.1.22", - "6.2.1.23", - "6.2.1.24", - "6.2.1.25", - "6.2.1.26", - "6.2.1.27", - "6.2.1.28", - "6.2.1.30", - "6.2.1.31", - "6.2.1.32", - "6.2.1.33", - "6.2.1.34", - "6.2.1.35", - "6.2.1.36", - "6.3.1.1", - "6.3.1.2", - "6.3.1.4", - "6.3.1.5", - "6.3.1.6", - "6.3.1.7", - "6.3.1.8", - "6.3.1.9", - "6.3.1.10", - "6.3.1.11", - "6.3.1.12", - "6.3.1.13", - "6.3.2.1", - "6.3.2.2", - "6.3.2.3", - "6.3.2.4", - "6.3.2.5", - "6.3.2.6", - "6.3.2.7", - "6.3.2.8", - "6.3.2.9", - "6.3.2.10", - "6.3.2.11", - "6.3.2.12", - "6.3.2.13", - "6.3.2.14", - "6.3.2.16", - "6.3.2.17", - "6.3.2.18", - "6.3.2.19", - "6.3.2.20", - "6.3.2.21", - "6.3.2.22", - "6.3.2.23", - "6.3.2.24", - "6.3.2.25", - "6.3.2.26", - "6.3.2.27", - "6.3.2.28", - "6.3.2.29", - "6.3.2.30", - "6.3.2.31", - "6.3.2.32", - "6.3.2.33", - "6.3.2.34", - "6.3.3.1", - "6.3.3.2", - "6.3.3.3", - "6.3.3.4", - "6.3.4.1", - "6.3.4.2", - "6.3.4.3", - "6.3.4.4", - "6.3.4.5", - "6.3.4.6", - "6.3.4.7", - "6.3.4.8", - "6.3.4.9", - "6.3.4.10", - "6.3.4.11", - "6.3.4.12", - "6.3.4.13", - "6.3.4.14", - "6.3.4.15", - "6.3.4.16", - "6.3.4.17", - "6.3.4.18", - "6.3.5.1", - "6.3.5.2", - "6.3.5.3", - "6.3.5.4", - "6.3.5.5", - "6.3.5.6", - "6.3.5.7", - "6.3.5.9", - "6.3.5.10", - "6.4.1.1", - "6.4.1.2", - "6.4.1.3", - "6.4.1.4", - "6.4.1.5", - "6.4.1.6", - "6.4.1.7", - "6.5.1.1", - "6.5.1.2", - "6.5.1.3", - "6.5.1.4", - "6.6.1.1", - "6.6.1.2" + "1.1.1.1 Alcohol dehydrogenase", + "1.1.1.2 Alcohol dehydrogenase (NADP(+))", + "1.1.1.3 Homoserine dehydrogenase", + "1.1.1.4 (R,R)-butanediol dehydrogenase", + "1.1.1.6 Glycerol dehydrogenase", + "1.1.1.7 Propanediol-phosphate dehydrogenase", + "1.1.1.8 Glycerol-3-phosphate dehydrogenase (NAD(+))", + "1.1.1.9 D-xylulose reductase", + "1.1.1.10 L-xylulose reductase", + "1.1.1.11 D-arabinitol 4-dehydrogenase", + "1.1.1.12 L-arabinitol 4-dehydrogenase", + "1.1.1.13 L-arabinitol 2-dehydrogenase", + "1.1.1.14 L-iditol 2-dehydrogenase", + "1.1.1.15 D-iditol 2-dehydrogenase", + "1.1.1.16 Galactitol 2-dehydrogenase", + "1.1.1.17 Mannitol-1-phosphate 5-dehydrogenase", + "1.1.1.18 Inositol 2-dehydrogenase", + "1.1.1.19 Glucuronate reductase", + "1.1.1.20 Glucuronolactone reductase", + "1.1.1.21 Aldehyde reductase", + "1.1.1.22 UDP-glucose 6-dehydrogenase", + "1.1.1.23 Histidinol dehydrogenase", + "1.1.1.24 Quinate dehydrogenase", + "1.1.1.25 Shikimate dehydrogenase", + "1.1.1.26 Glyoxylate reductase", + "1.1.1.27 L-lactate dehydrogenase", + "1.1.1.28 D-lactate dehydrogenase", + "1.1.1.29 Glycerate dehydrogenase", + "1.1.1.30 3-hydroxybutyrate dehydrogenase", + "1.1.1.31 3-hydroxyisobutyrate dehydrogenase", + "1.1.1.32 Mevaldate reductase", + "1.1.1.33 Mevaldate reductase (NADPH)", + "1.1.1.34 Hydroxymethylglutaryl-CoA reductase (NADPH)", + "1.1.1.35 3-hydroxyacyl-CoA dehydrogenase", + "1.1.1.36 Acetoacetyl-CoA reductase", + "1.1.1.37 Malate dehydrogenase", + "1.1.1.38 Malate dehydrogenase (oxaloacetate-decarboxylating)", + "1.1.1.39 Malate dehydrogenase (decarboxylating)", + "1.1.1.40 Malate dehydrogenase (oxaloacetate-decarboxylating) (NADP(+))", + "1.1.1.41 Isocitrate dehydrogenase (NAD(+))", + "1.1.1.42 Isocitrate dehydrogenase (NADP(+))", + "1.1.1.43 Phosphogluconate 2-dehydrogenase", + "1.1.1.44 Phosphogluconate dehydrogenase (decarboxylating)", + "1.1.1.45 L-gulonate 3-dehydrogenase", + "1.1.1.46 L-arabinose 1-dehydrogenase", + "1.1.1.47 Glucose 1-dehydrogenase", + "1.1.1.48 Galactose 1-dehydrogenase", + "1.1.1.49 Glucose-6-phosphate dehydrogenase", + "1.1.1.50 3-alpha-hydroxysteroid dehydrogenase (B-specific)", + "1.1.1.51 3(or 17)-beta-hydroxysteroid dehydrogenase", + "1.1.1.52 3-alpha-hydroxycholanate dehydrogenase", + "1.1.1.53 3-alpha-(or 20-beta)-hydroxysteroid dehydrogenase", + "1.1.1.54 Allyl-alcohol dehydrogenase", + "1.1.1.55 Lactaldehyde reductase (NADPH)", + "1.1.1.56 Ribitol 2-dehydrogenase", + "1.1.1.57 Fructuronate reductase", + "1.1.1.58 Tagaturonate reductase", + "1.1.1.59 3-hydroxypropionate dehydrogenase", + "1.1.1.60 2-hydroxy-3-oxopropionate reductase", + "1.1.1.61 4-hydroxybutyrate dehydrogenase", + "1.1.1.62 Estradiol 17-beta-dehydrogenase", + "1.1.1.63 Testosterone 17-beta-dehydrogenase", + "1.1.1.64 Testosterone 17-beta-dehydrogenase (NADP(+))", + "1.1.1.65 Pyridoxine 4-dehydrogenase", + "1.1.1.66 Omega-hydroxydecanoate dehydrogenase", + "1.1.1.67 Mannitol 2-dehydrogenase", + "1.1.1.69 Gluconate 5-dehydrogenase", + "1.1.1.71 Alcohol dehydrogenase (NAD(P)(+))", + "1.1.1.72 Glycerol dehydrogenase (NADP(+))", + "1.1.1.73 Octanol dehydrogenase", + "1.1.1.75 (R)-aminopropanol dehydrogenase", + "1.1.1.76 (S,S)-butanediol dehydrogenase", + "1.1.1.77 Lactaldehyde reductase", + "1.1.1.78 Methylglyoxal reductase (NADH-dependent)", + "1.1.1.79 Glyoxylate reductase (NADP(+))", + "1.1.1.80 Isopropanol dehydrogenase (NADP(+))", + "1.1.1.81 Hydroxypyruvate reductase", + "1.1.1.82 Malate dehydrogenase (NADP(+))", + "1.1.1.83 D-malate dehydrogenase (decarboxylating)", + "1.1.1.84 Dimethylmalate dehydrogenase", + "1.1.1.85 3-isopropylmalate dehydrogenase", + "1.1.1.86 Ketol-acid reductoisomerase", + "1.1.1.87 Homoisocitrate dehydrogenase", + "1.1.1.88 Hydroxymethylglutaryl-CoA reductase", + "1.1.1.90 Aryl-alcohol dehydrogenase", + "1.1.1.91 Aryl-alcohol dehydrogenase (NADP(+))", + "1.1.1.92 Oxaloglycolate reductase (decarboxylating)", + "1.1.1.93 Tartrate dehydrogenase", + "1.1.1.94 Glycerol-3-phosphate dehydrogenase (NAD(P)(+))", + "1.1.1.95 Phosphoglycerate dehydrogenase", + "1.1.1.96 Diiodophenylpyruvate reductase", + "1.1.1.97 3-hydroxybenzyl-alcohol dehydrogenase", + "1.1.1.98 (R)-2-hydroxy-fatty-acid dehydrogenase", + "1.1.1.99 (S)-2-hydroxy-fatty-acid dehydrogenase", + "1.1.1.100 3-oxoacyl-[acyl-carrier-protein] reductase", + "1.1.1.101 Acylglycerone-phosphate reductase", + "1.1.1.102 3-dehydrosphinganine reductase", + "1.1.1.103 L-threonine 3-dehydrogenase", + "1.1.1.104 4-oxoproline reductase", + "1.1.1.105 Retinol dehydrogenase", + "1.1.1.106 Pantoate 4-dehydrogenase", + "1.1.1.107 Pyridoxal 4-dehydrogenase", + "1.1.1.108 Carnitine 3-dehydrogenase", + "1.1.1.110 Indolelactate dehydrogenase", + "1.1.1.111 3-(imidazol-5-yl)lactate dehydrogenase", + "1.1.1.112 Indanol dehydrogenase", + "1.1.1.113 L-xylose 1-dehydrogenase", + "1.1.1.114 Apiose 1-reductase", + "1.1.1.115 Ribose 1-dehydrogenase (NADP(+))", + "1.1.1.116 D-arabinose 1-dehydrogenase", + "1.1.1.117 D-arabinose 1-dehydrogenase (NAD(P)(+))", + "1.1.1.118 Glucose 1-dehydrogenase (NAD(+))", + "1.1.1.119 Glucose 1-dehydrogenase (NADP(+))", + "1.1.1.120 Galactose 1-dehydrogenase (NADP(+))", + "1.1.1.121 Aldose 1-dehydrogenase", + "1.1.1.122 D-threo-aldose 1-dehydrogenase", + "1.1.1.123 Sorbose 5-dehydrogenase (NADP(+))", + "1.1.1.124 Fructose 5-dehydrogenase (NADP(+))", + "1.1.1.125 2-deoxy-D-gluconate 3-dehydrogenase", + "1.1.1.126 2-dehydro-3-deoxy-D-gluconate 6-dehydrogenase", + "1.1.1.127 2-dehydro-3-deoxy-D-gluconate 5-dehydrogenase", + "1.1.1.128 L-idonate 2-dehydrogenase", + "1.1.1.129 L-threonate 3-dehydrogenase", + "1.1.1.130 3-dehydro-L-gulonate 2-dehydrogenase", + "1.1.1.131 Mannuronate reductase", + "1.1.1.132 GDP-mannose 6-dehydrogenase", + "1.1.1.133 dTDP-4-dehydrorhamnose reductase", + "1.1.1.134 dTDP-6-deoxy-L-talose 4-dehydrogenase", + "1.1.1.135 GDP-6-deoxy-D-talose 4-dehydrogenase", + "1.1.1.136 UDP-N-acetylglucosamine 6-dehydrogenase", + "1.1.1.137 Ribitol-5-phosphate 2-dehydrogenase", + "1.1.1.138 Mannitol 2-dehydrogenase (NADP(+))", + "1.1.1.140 Sorbitol-6-phosphate 2-dehydrogenase", + "1.1.1.141 15-hydroxyprostaglandin dehydrogenase (NAD(+))", + "1.1.1.142 D-pinitol dehydrogenase", + "1.1.1.143 Sequoyitol dehydrogenase", + "1.1.1.144 Perillyl-alcohol dehydrogenase", + "1.1.1.145 3-beta-hydroxy-Delta(5)-steroid dehydrogenase", + "1.1.1.146 11-beta-hydroxysteroid dehydrogenase", + "1.1.1.147 16-alpha-hydroxysteroid dehydrogenase", + "1.1.1.148 Estradiol 17-alpha-dehydrogenase", + "1.1.1.149 20-alpha-hydroxysteroid dehydrogenase", + "1.1.1.150 21-hydroxysteroid dehydrogenase (NAD(+))", + "1.1.1.151 21-hydroxysteroid dehydrogenase (NADP(+))", + "1.1.1.152 3-alpha-hydroxy-5-beta-androstane-17-one 3-alpha-dehydrogenase", + "1.1.1.153 Sepiapterin reductase", + "1.1.1.154 Ureidoglycolate dehydrogenase", + "1.1.1.156 Glycerol 2-dehydrogenase (NADP(+))", + "1.1.1.157 3-hydroxybutyryl-CoA dehydrogenase", + "1.1.1.158 UDP-N-acetylmuramate dehydrogenase", + "1.1.1.159 7-alpha-hydroxysteroid dehydrogenase", + "1.1.1.160 Dihydrobunolol dehydrogenase", + "1.1.1.161 Cholestanetetraol 26-dehydrogenase", + "1.1.1.162 Erythrulose reductase", + "1.1.1.163 Cyclopentanol dehydrogenase", + "1.1.1.164 Hexadecanol dehydrogenase", + "1.1.1.165 2-alkyn-1-ol dehydrogenase", + "1.1.1.166 Hydroxycyclohexanecarboxylate dehydrogenase", + "1.1.1.167 Hydroxymalonate dehydrogenase", + "1.1.1.168 2-dehydropantolactone reductase (A-specific)", + "1.1.1.169 2-dehydropantoate 2-reductase", + "1.1.1.170 Sterol-4-alpha-carboxylate 3-dehydrogenase (decarboxylating)", + "1.1.1.172 2-oxoadipate reductase", + "1.1.1.173 L-rhamnose 1-dehydrogenase", + "1.1.1.174 Cyclohexane-1,2-diol dehydrogenase", + "1.1.1.175 D-xylose 1-dehydrogenase", + "1.1.1.176 12-alpha-hydroxysteroid dehydrogenase", + "1.1.1.177 Glycerol-3-phosphate 1-dehydrogenase (NADP(+))", + "1.1.1.178 3-hydroxy-2-methylbutyryl-CoA dehydrogenase", + "1.1.1.179 D-xylose 1-dehydrogenase (NADP(+))", + "1.1.1.181 Cholest-5-ene-3-beta,7-alpha-diol 3-beta-dehydrogenase", + "1.1.1.183 Geraniol dehydrogenase", + "1.1.1.184 Carbonyl reductase (NADPH)", + "1.1.1.185 L-glycol dehydrogenase", + "1.1.1.186 dTDP-galactose 6-dehydrogenase", + "1.1.1.187 GDP-4-dehydro-D-rhamnose reductase", + "1.1.1.188 Prostaglandin-F synthase", + "1.1.1.189 Prostaglandin-E(2) 9-reductase", + "1.1.1.190 Indole-3-acetaldehyde reductase (NADH)", + "1.1.1.191 Indole-3-acetaldehyde reductase (NADPH)", + "1.1.1.192 Long-chain-alcohol dehydrogenase", + "1.1.1.193 5-amino-6-(5-phosphoribosylamino)uracil reductase", + "1.1.1.194 Coniferyl-alcohol dehydrogenase", + "1.1.1.195 Cinnamyl-alcohol dehydrogenase", + "1.1.1.196 15-hydroxyprostaglandin-D dehydrogenase (NADP(+))", + "1.1.1.197 15-hydroxyprostaglandin dehydrogenase (NADP(+))", + "1.1.1.198 (+)-borneol dehydrogenase", + "1.1.1.199 (S)-usnate reductase", + "1.1.1.200 Aldose-6-phosphate reductase (NADPH)", + "1.1.1.201 7-beta-hydroxysteroid dehydrogenase (NADP(+))", + "1.1.1.202 1,3-propanediol dehydrogenase", + "1.1.1.203 Uronate dehydrogenase", + "1.1.1.205 IMP dehydrogenase", + "1.1.1.206 Tropinone reductase I", + "1.1.1.207 (-)-menthol dehydrogenase", + "1.1.1.208 (+)-neomenthol dehydrogenase", + "1.1.1.209 3(or 17)-alpha-hydroxysteroid dehydrogenase", + "1.1.1.210 3-beta-(or 20-alpha)-hydroxysteroid dehydrogenase", + "1.1.1.211 Long-chain-3-hydroxyacyl-CoA dehydrogenase", + "1.1.1.212 3-oxoacyl-[acyl-carrier-protein] reductase (NADH)", + "1.1.1.213 3-alpha-hydroxysteroid dehydrogenase (A-specific)", + "1.1.1.214 2-dehydropantolactone reductase (B-specific)", + "1.1.1.215 Gluconate 2-dehydrogenase", + "1.1.1.216 Farnesol dehydrogenase", + "1.1.1.217 Benzyl-2-methyl-hydroxybutyrate dehydrogenase", + "1.1.1.218 Morphine 6-dehydrogenase", + "1.1.1.219 Dihydrokaempferol 4-reductase", + "1.1.1.220 6-pyruvoyltetrahydropterin 2'-reductase", + "1.1.1.221 Vomifoliol dehydrogenase", + "1.1.1.222 (R)-4-hydroxyphenyllactate dehydrogenase", + "1.1.1.223 Isopiperitenol dehydrogenase", + "1.1.1.224 Mannose-6-phosphate 6-reductase", + "1.1.1.225 Chlordecone reductase", + "1.1.1.226 4-hydroxycyclohexanecarboxylate dehydrogenase", + "1.1.1.227 (-)-borneol dehydrogenase", + "1.1.1.228 (+)-sabinol dehydrogenase", + "1.1.1.229 Diethyl 2-methyl-3-oxosuccinate reductase", + "1.1.1.230 3-alpha-hydroxyglycyrrhetinate dehydrogenase", + "1.1.1.231 15-hydroxyprostaglandin-I dehydrogenase (NADP(+))", + "1.1.1.232 15-hydroxyicosatetraenoate dehydrogenase", + "1.1.1.233 N-acylmannosamine 1-dehydrogenase", + "1.1.1.234 Flavanone 4-reductase", + "1.1.1.235 8-oxocoformycin reductase", + "1.1.1.236 Tropinone reductase II", + "1.1.1.237 Hydroxyphenylpyruvate reductase", + "1.1.1.238 12-beta-hydroxysteroid dehydrogenase", + "1.1.1.239 3-alpha-(17-beta)-hydroxysteroid dehydrogenase (NAD(+))", + "1.1.1.240 N-acetylhexosamine 1-dehydrogenase", + "1.1.1.241 6-endo-hydroxycineole dehydrogenase", + "1.1.1.243 Carveol dehydrogenase", + "1.1.1.244 Methanol dehydrogenase", + "1.1.1.245 Cyclohexanol dehydrogenase", + "1.1.1.246 Pterocarpin synthase", + "1.1.1.247 Codeinone reductase (NADPH)", + "1.1.1.248 Salutaridine reductase (NADPH)", + "1.1.1.250 D-arabinitol 2-dehydrogenase", + "1.1.1.251 Galactitol-1-phosphate 5-dehydrogenase", + "1.1.1.252 Tetrahydroxynaphthalene reductase", + "1.1.1.254 (S)-carnitine 3-dehydrogenase", + "1.1.1.255 Mannitol dehydrogenase", + "1.1.1.256 Fluoren-9-ol dehydrogenase", + "1.1.1.257 4-(hydroxymethyl)benzenesulfonate dehydrogenase", + "1.1.1.258 6-hydroxyhexanoate dehydrogenase", + "1.1.1.259 3-hydroxypimeloyl-CoA dehydrogenase", + "1.1.1.260 Sulcatone reductase", + "1.1.1.261 sn-glycerol-1-phosphate dehydrogenase", + "1.1.1.262 4-hydroxythreonine-4-phosphate dehydrogenase", + "1.1.1.263 1,5-anhydro-D-fructose reductase", + "1.1.1.264 L-idonate 5-dehydrogenase", + "1.1.1.265 3-methylbutanal reductase", + "1.1.1.266 dTDP-4-dehydro-6-deoxyglucose reductase", + "1.1.1.267 1-deoxy-D-xylulose-5-phosphate reductoisomerase", + "1.1.1.268 2-(R)-hydroxypropyl-CoM dehydrogenase", + "1.1.1.269 2-(S)-hydroxypropyl-CoM dehydrogenase", + "1.1.1.270 3-keto-steroid reductase", + "1.1.1.271 GDP-L-fucose synthase", + "1.1.1.272 (R)-2-hydroxyacid dehydrogenase", + "1.1.1.273 Vellosimine dehydrogenase", + "1.1.1.274 2,5-didehydrogluconate reductase", + "1.1.1.275 (+)-trans-carveol dehydrogenase", + "1.1.1.276 Serine 3-dehydrogenase", + "1.1.1.277 3-beta-hydroxy-5-beta-steroid dehydrogenase", + "1.1.1.278 3-beta-hydroxy-5-alpha-steroid dehydrogenase", + "1.1.1.279 (R)-3-hydroxyacid-ester dehydrogenase", + "1.1.1.280 (S)-3-hydroxyacid-ester dehydrogenase", + "1.1.1.281 GDP-4-dehydro-6-deoxy-D-mannose reductase", + "1.1.1.282 Quinate/shikimate dehydrogenase", + "1.1.1.283 Methylglyoxal reductase (NADPH-dependent)", + "1.1.1.284 S-(hydroxymethyl)glutathione dehydrogenase", + "1.1.1.285 3''-deamino-3''-oxonicotianamine reductase", + "1.1.1.286 Isocitrate--homoisocitrate dehydrogenase", + "1.1.1.287 D-arabinitol dehydrogenase (NADP(+))", + "1.1.1.288 Xanthoxin dehydrogenase", + "1.1.1.289 Sorbose reductase", + "1.1.1.290 4-phosphoerythronate dehydrogenase", + "1.1.1.291 2-hydroxymethylglutarate dehydrogenase", + "1.1.1.292 1,5-anhydro-D-fructose reductase (1,5-anhydro-D-mannitol-forming)", + "1.1.1.294 Chlorophyll(ide) b reductase", + "1.1.1.295 Momilactone-A synthase", + "1.1.1.296 Dihydrocarveol dehydrogenase", + "1.1.1.297 Limonene-1,2-diol dehydrogenase", + "1.1.1.298 3-hydroxypropionate dehydrogenase (NADP(+))", + "1.1.1.299 Malate dehydrogenase (NAD(P)(+))", + "1.1.1.300 NADP-retinol dehydrogenase", + "1.1.1.301 D-arabitol-phosphate dehydrogenase", + "1.1.1.302 2,5-diamino-6-(ribosylamino)-4(3H)-pyrimidinone 5'-phosphate reductase", + "1.1.1.303 Diacetyl reductase ((R)-acetoin forming)", + "1.1.1.304 Diacetyl reductase ((S)-acetoin forming)", + "1.1.1.305 UDP-glucuronic acid oxidase (UDP-4-keto-hexauronic acid decarboxylating)", + "1.1.1.306 S-(hydroxymethyl)mycothiol dehydrogenase", + "1.1.1.307 D-xylose reductase", + "1.1.1.308 Sulfopropanediol 3-dehydrogenase", + "1.1.1.309 Phosphonoacetaldehyde reductase (NADH)", + "1.1.1.310 (S)-sulfolactate dehydrogenase", + "1.1.1.311 (S)-1-phenylethanol dehydrogenase", + "1.1.1.n3 UDP-N-acetyl-D-mannosamine dehydrogenase", + "1.1.1.n4 (-)-trans-carveol dehydrogenase", + "1.1.1.n5 3-methylmalate dehydrogenase", + "1.1.1.n6 D-chiro-inositol 3-dehydrogenase", + "1.1.1.n7 Benzil reductase", + "1.1.1.n8 L-idonate dehydrogenase", + "1.1.1.n9 D-galacturonate reductase", + "1.1.1.n11 Succinic semialdehyde reductase", + "1.1.2.2 Mannitol dehydrogenase (cytochrome)", + "1.1.2.3 L-lactate dehydrogenase (cytochrome)", + "1.1.2.4 D-lactate dehydrogenase (cytochrome)", + "1.1.2.5 D-lactate dehydrogenase (cytochrome c-553)", + "1.1.2.6 Polyvinyl alcohol dehydrogenase (cytochrome)", + "1.1.2.7 Methanol dehydrogenase (cytochrome c)", + "1.1.2.8 Alcohol dehydrogenase (cytochrome c)", + "1.1.3.3 Malate oxidase", + "1.1.3.4 Glucose oxidase", + "1.1.3.5 Hexose oxidase", + "1.1.3.6 Cholesterol oxidase", + "1.1.3.7 Aryl-alcohol oxidase", + "1.1.3.8 L-gulonolactone oxidase", + "1.1.3.9 Galactose oxidase", + "1.1.3.10 Pyranose oxidase", + "1.1.3.11 L-sorbose oxidase", + "1.1.3.12 Pyridoxine 4-oxidase", + "1.1.3.13 Alcohol oxidase", + "1.1.3.14 Catechol oxidase (dimerizing)", + "1.1.3.15 (S)-2-hydroxy-acid oxidase", + "1.1.3.16 Ecdysone oxidase", + "1.1.3.17 Choline oxidase", + "1.1.3.18 Secondary-alcohol oxidase", + "1.1.3.19 4-hydroxymandelate oxidase", + "1.1.3.20 Long-chain-alcohol oxidase", + "1.1.3.21 Glycerol-3-phosphate oxidase", + "1.1.3.23 Thiamine oxidase", + "1.1.3.27 Hydroxyphytanate oxidase", + "1.1.3.28 Nucleoside oxidase", + "1.1.3.29 N-acylhexosamine oxidase", + "1.1.3.30 Polyvinyl-alcohol oxidase", + "1.1.3.37 D-arabinono-1,4-lactone oxidase", + "1.1.3.38 Vanillyl-alcohol oxidase", + "1.1.3.39 Nucleoside oxidase (H(2)O(2)-forming)", + "1.1.3.40 D-mannitol oxidase", + "1.1.3.41 Alditol oxidase", + "1.1.4.1 Vitamin-K-epoxide reductase (warfarin-sensitive)", + "1.1.4.2 Vitamin-K-epoxide reductase (warfarin-insensitive)", + "1.1.5.2 Quinoprotein glucose dehydrogenase", + "1.1.5.3 Glycerol-3-phosphate dehydrogenase", + "1.1.5.4 Malate dehydrogenase (quinone)", + "1.1.5.5 Alcohol dehydrogenase (quinone)", + "1.1.5.6 Formate dehydrogenase-N", + "1.1.5.7 Cyclic alcohol dehydrogenase (quinone)", + "1.1.5.8 Quinate dehydrogenase (quinone)", + "1.1.5.n1 Quinoprotein inositol dehydrogenase", + "1.1.98.1 Alcohol dehydrogenase (azurin)", + "1.1.98.2 Glucose-6-phosphate dehydrogenase (coenzyme-F420)", + "1.1.99.1 Choline dehydrogenase", + "1.1.99.2 2-hydroxyglutarate dehydrogenase", + "1.1.99.3 Gluconate 2-dehydrogenase (acceptor)", + "1.1.99.4 Dehydrogluconate dehydrogenase", + "1.1.99.6 D-2-hydroxy-acid dehydrogenase", + "1.1.99.7 Lactate--malate transhydrogenase", + "1.1.99.9 Pyridoxine 5-dehydrogenase", + "1.1.99.10 Glucose dehydrogenase (acceptor)", + "1.1.99.11 Fructose 5-dehydrogenase", + "1.1.99.12 Sorbose dehydrogenase", + "1.1.99.13 Glucoside 3-dehydrogenase", + "1.1.99.14 Glycolate dehydrogenase", + "1.1.99.18 Cellobiose dehydrogenase (acceptor)", + "1.1.99.20 Alkan-1-ol dehydrogenase (acceptor)", + "1.1.99.21 D-sorbitol dehydrogenase (acceptor)", + "1.1.99.22 Glycerol dehydrogenase (acceptor)", + "1.1.99.24 Hydroxyacid-oxoacid transhydrogenase", + "1.1.99.26 3-hydroxycyclohexanone dehydrogenase", + "1.1.99.27 (R)-pantolactone dehydrogenase (flavin)", + "1.1.99.28 Glucose-fructose oxidoreductase", + "1.1.99.29 Pyranose dehydrogenase (acceptor)", + "1.1.99.30 2-oxo-acid reductase", + "1.1.99.31 (S)-mandelate dehydrogenase", + "1.1.99.32 L-sorbose 1-dehydrogenase", + "1.1.99.33 Formate dehydrogenase (acceptor)", + "1.1.99.35 Soluble quinoprotein glucose dehydrogenase", + "1.1.99.36 NDMA-dependent alcohol dehydrogenase", + "1.1.99.37 NDMA-dependent methanol dehydrogenase", + "1.2.1.2 Formate dehydrogenase", + "1.2.1.3 Aldehyde dehydrogenase (NAD(+))", + "1.2.1.4 Aldehyde dehydrogenase (NADP(+))", + "1.2.1.5 Aldehyde dehydrogenase (NAD(P)(+))", + "1.2.1.7 Benzaldehyde dehydrogenase (NADP(+))", + "1.2.1.8 Betaine-aldehyde dehydrogenase", + "1.2.1.9 Glyceraldehyde-3-phosphate dehydrogenase (NADP(+))", + "1.2.1.10 Acetaldehyde dehydrogenase (acetylating)", + "1.2.1.11 Aspartate-semialdehyde dehydrogenase", + "1.2.1.12 Glyceraldehyde-3-phosphate dehydrogenase (phosphorylating)", + "1.2.1.13 Glyceraldehyde-3-phosphate dehydrogenase (NADP(+)) (phosphorylating)", + "1.2.1.15 Malonate-semialdehyde dehydrogenase", + "1.2.1.16 Succinate-semialdehyde dehydrogenase (NAD(P)(+))", + "1.2.1.17 Glyoxylate dehydrogenase (acylating)", + "1.2.1.18 Malonate-semialdehyde dehydrogenase (acetylating)", + "1.2.1.19 Aminobutyraldehyde dehydrogenase", + "1.2.1.20 Glutarate-semialdehyde dehydrogenase", + "1.2.1.21 Glycolaldehyde dehydrogenase", + "1.2.1.22 Lactaldehyde dehydrogenase", + "1.2.1.23 2-oxoaldehyde dehydrogenase (NAD(+))", + "1.2.1.24 Succinate-semialdehyde dehydrogenase (NAD(+))", + "1.2.1.25 2-oxoisovalerate dehydrogenase (acylating)", + "1.2.1.26 2,5-dioxovalerate dehydrogenase", + "1.2.1.27 Methylmalonate-semialdehyde dehydrogenase (acylating)", + "1.2.1.28 Benzaldehyde dehydrogenase (NAD(+))", + "1.2.1.29 Aryl-aldehyde dehydrogenase", + "1.2.1.30 Aryl-aldehyde dehydrogenase (NADP(+))", + "1.2.1.31 L-aminoadipate-semialdehyde dehydrogenase", + "1.2.1.32 Aminomuconate-semialdehyde dehydrogenase", + "1.2.1.33 (R)-dehydropantoate dehydrogenase", + "1.2.1.36 Retinal dehydrogenase", + "1.2.1.38 N-acetyl-gamma-glutamyl-phosphate reductase", + "1.2.1.39 Phenylacetaldehyde dehydrogenase", + "1.2.1.40 3-alpha,7-alpha,12-alpha-trihydroxycholestan-26-al 26-oxidoreductase", + "1.2.1.41 Glutamate-5-semialdehyde dehydrogenase", + "1.2.1.42 Hexadecanal dehydrogenase (acylating)", + "1.2.1.43 Formate dehydrogenase (NADP(+))", + "1.2.1.44 Cinnamoyl-CoA reductase", + "1.2.1.45 4-carboxy-2-hydroxymuconate-6-semialdehyde dehydrogenase", + "1.2.1.46 Formaldehyde dehydrogenase", + "1.2.1.47 4-trimethylammoniobutyraldehyde dehydrogenase", + "1.2.1.48 Long-chain-aldehyde dehydrogenase", + "1.2.1.49 2-oxoaldehyde dehydrogenase (NADP(+))", + "1.2.1.50 Long-chain-fatty-acyl-CoA reductase", + "1.2.1.51 Pyruvate dehydrogenase (NADP(+))", + "1.2.1.52 Oxoglutarate dehydrogenase (NADP(+))", + "1.2.1.53 4-hydroxyphenylacetaldehyde dehydrogenase", + "1.2.1.54 Gamma-guanidinobutyraldehyde dehydrogenase", + "1.2.1.57 Butanal dehydrogenase", + "1.2.1.58 Phenylglyoxylate dehydrogenase (acylating)", + "1.2.1.59 Glyceraldehyde-3-phosphate dehydrogenase (NAD(P)(+)) (phosphorylating)", + "1.2.1.60 5-carboxymethyl-2-hydroxymuconic-semialdehyde dehydrogenase", + "1.2.1.61 4-hydroxymuconic-semialdehyde dehydrogenase", + "1.2.1.62 4-formylbenzenesulfonate dehydrogenase", + "1.2.1.63 6-oxohexanoate dehydrogenase", + "1.2.1.64 4-hydroxybenzaldehyde dehydrogenase", + "1.2.1.65 Salicylaldehyde dehydrogenase", + "1.2.1.67 Vanillin dehydrogenase", + "1.2.1.68 Coniferyl-aldehyde dehydrogenase", + "1.2.1.69 Fluoroacetaldehyde dehydrogenase", + "1.2.1.70 Glutamyl-tRNA reductase", + "1.2.1.71 Succinylglutamate-semialdehyde dehydrogenase", + "1.2.1.72 Erythrose-4-phosphate dehydrogenase", + "1.2.1.73 Sulfoacetaldehyde dehydrogenase", + "1.2.1.74 Abietadienal dehydrogenase", + "1.2.1.75 Malonyl CoA reductase (malonate semialdehyde-forming)", + "1.2.1.76 Succinate-semialdehyde dehydrogenase (acetylating)", + "1.2.1.77 3,4-dehydroadipyl-CoA semialdehyde dehydrogenase (NADP(+))", + "1.2.1.78 2-formylbenzoate dehydrogenase", + "1.2.1.79 Succinate-semialdehyde dehydrogenase (NADP(+))", + "1.2.1.80 Long-chain acyl-[acyl-carrier-protein] reductase", + "1.2.1.n2 Fatty acyl-CoA reductase", + "1.2.2.1 Formate dehydrogenase (cytochrome)", + "1.2.2.3 Formate dehydrogenase (cytochrome c-553)", + "1.2.2.4 Carbon-monoxide dehydrogenase (cytochrome b-561)", + "1.2.3.1 Aldehyde oxidase", + "1.2.3.3 Pyruvate oxidase", + "1.2.3.4 Oxalate oxidase", + "1.2.3.5 Glyoxylate oxidase", + "1.2.3.6 Pyruvate oxidase (CoA-acetylating)", + "1.2.3.7 Indole-3-acetaldehyde oxidase", + "1.2.3.8 Pyridoxal oxidase", + "1.2.3.9 Aryl-aldehyde oxidase", + "1.2.3.11 Retinal oxidase", + "1.2.3.13 4-hydroxyphenylpyruvate oxidase", + "1.2.3.14 Abscisic-aldehyde oxidase", + "1.2.4.1 Pyruvate dehydrogenase (acetyl-transferring)", + "1.2.4.2 Oxoglutarate dehydrogenase (succinyl-transferring)", + "1.2.4.4 3-methyl-2-oxobutanoate dehydrogenase (2-methylpropanoyl-transferring)", + "1.2.5.1 Pyruvate dehydrogenase (quinone)", + "1.2.7.1 Pyruvate synthase", + "1.2.7.2 2-oxobutyrate synthase", + "1.2.7.3 2-oxoglutarate synthase", + "1.2.7.4 Carbon-monoxide dehydrogenase (ferredoxin)", + "1.2.7.5 Aldehyde ferredoxin oxidoreductase", + "1.2.7.6 Glyceraldehyde-3-phosphate dehydrogenase (ferredoxin)", + "1.2.7.7 3-methyl-2-oxobutanoate dehydrogenase (ferredoxin)", + "1.2.7.8 Indolepyruvate ferredoxin oxidoreductase", + "1.2.99.2 Carbon-monoxide dehydrogenase (acceptor)", + "1.2.99.3 Aldehyde dehydrogenase (pyrroloquinoline-quinone)", + "1.2.99.4 Formaldehyde dismutase", + "1.2.99.5 Formylmethanofuran dehydrogenase", + "1.2.99.6 Carboxylate reductase", + "1.2.99.7 Aldehyde dehydrogenase (FAD-independent)", + "1.3.1.1 Dihydrouracil dehydrogenase (NAD(+))", + "1.3.1.2 Dihydropyrimidine dehydrogenase (NADP(+))", + "1.3.1.3 Delta(4)-3-oxosteroid 5-beta-reductase", + "1.3.1.4 Cortisone alpha-reductase", + "1.3.1.5 Cucurbitacin Delta(23)-reductase", + "1.3.1.6 Fumarate reductase (NADH)", + "1.3.1.7 Meso-tartrate dehydrogenase", + "1.3.1.8 Acyl-CoA dehydrogenase (NADP(+))", + "1.3.1.9 Enoyl-[acyl-carrier-protein] reductase (NADH)", + "1.3.1.10 Enoyl-[acyl-carrier-protein] reductase (NADPH, B-specific)", + "1.3.1.11 2-coumarate reductase", + "1.3.1.12 Prephenate dehydrogenase", + "1.3.1.13 Prephenate dehydrogenase (NADP(+))", + "1.3.1.14 Orotate reductase (NADH)", + "1.3.1.15 Orotate reductase (NADPH)", + "1.3.1.16 Beta-nitroacrylate reductase", + "1.3.1.17 3-methyleneoxindole reductase", + "1.3.1.18 Kynurenate-7,8-dihydrodiol dehydrogenase", + "1.3.1.19 Cis-1,2-dihydrobenzene-1,2-diol dehydrogenase", + "1.3.1.20 Trans-1,2-dihydrobenzene-1,2-diol dehydrogenase", + "1.3.1.21 7-dehydrocholesterol reductase", + "1.3.1.22 Cholestenone 5-alpha-reductase", + "1.3.1.24 Biliverdin reductase", + "1.3.1.25 1,6-dihydroxycyclohexa-2,4-diene-1-carboxylate dehydrogenase", + "1.3.1.26 Dihydrodipicolinate reductase", + "1.3.1.27 2-hexadecenal reductase", + "1.3.1.28 2,3-dihydro-2,3-dihydroxybenzoate dehydrogenase", + "1.3.1.29 Cis-1,2-dihydro-1,2-dihydroxynaphthalene dehydrogenase", + "1.3.1.30 Progesterone 5-alpha-reductase", + "1.3.1.31 2-enoate reductase", + "1.3.1.32 Maleylacetate reductase", + "1.3.1.33 Protochlorophyllide reductase", + "1.3.1.34 2,4-dienoyl-CoA reductase (NADPH)", + "1.3.1.35 Phosphatidylcholine desaturase", + "1.3.1.36 Geissoschizine dehydrogenase", + "1.3.1.37 Cis-2-enoyl-CoA reductase (NADPH)", + "1.3.1.38 Trans-2-enoyl-CoA reductase (NADPH)", + "1.3.1.39 Enoyl-[acyl-carrier-protein] reductase (NADPH, A-specific)", + "1.3.1.40 2-hydroxy-6-oxo-6-phenylhexa-2,4-dienoate reductase", + "1.3.1.41 Xanthommatin reductase", + "1.3.1.42 12-oxophytodienoate reductase", + "1.3.1.43 Arogenate dehydrogenase", + "1.3.1.44 Trans-2-enoyl-CoA reductase (NAD(+))", + "1.3.1.45 2'-hydroxyisoflavone reductase", + "1.3.1.46 Biochanin-A reductase", + "1.3.1.47 Alpha-santonin 1,2-reductase", + "1.3.1.48 15-oxoprostaglandin 13-oxidase", + "1.3.1.49 Cis-3,4-dihydrophenanthrene-3,4-diol dehydrogenase", + "1.3.1.51 2'-hydroxydaidzein reductase", + "1.3.1.52 2-methyl-branched-chain-enoyl-CoA reductase", + "1.3.1.53 (3S,4R)-3,4-dihydroxycyclohexa-1,5-diene-1,4-dicarboxylate dehydrogenase", + "1.3.1.54 Precorrin-6A reductase", + "1.3.1.56 Cis-2,3-dihydrobiphenyl-2,3-diol dehydrogenase", + "1.3.1.57 Phloroglucinol reductase", + "1.3.1.58 2,3-dihydroxy-2,3-dihydro-p-cumate dehydrogenase", + "1.3.1.60 Dibenzothiophene dihydrodiol dehydrogenase", + "1.3.1.62 Pimeloyl-CoA dehydrogenase", + "1.3.1.63 2,4-dichlorobenzoyl-CoA reductase", + "1.3.1.64 Phthalate 4,5-cis-dihydrodiol dehydrogenase", + "1.3.1.65 5,6-dihydroxy-3-methyl-2-oxo-1,2,5,6-tetrahydroquinoline dehydrogenase", + "1.3.1.66 Cis-dihydroethylcatechol dehydrogenase", + "1.3.1.67 Cis-1,2-dihydroxy-4-methylcyclohexa-3,5-diene-1-carboxylate dehydrogenase", + "1.3.1.68 1,2-dihydroxy-6-methylcyclohexa-3,5-dienecarboxylate dehydrogenase", + "1.3.1.69 Zeatin reductase", + "1.3.1.70 Delta(14)-sterol reductase", + "1.3.1.71 Delta(24(24(1)))-sterol reductase", + "1.3.1.72 Delta(24)-sterol reductase", + "1.3.1.73 1,2-dihydrovomilenine reductase", + "1.3.1.74 2-alkenal reductase", + "1.3.1.75 Divinyl chlorophyllide a 8-vinyl-reductase", + "1.3.1.76 Precorrin-2 dehydrogenase", + "1.3.1.77 Anthocyanidin reductase", + "1.3.1.78 Arogenate dehydrogenase (NADP(+))", + "1.3.1.79 Arogenate dehydrogenase (NAD(P)(+))", + "1.3.1.80 Red chlorophyll catabolite reductase", + "1.3.1.81 (+)-pulegone reductase", + "1.3.1.82 (-)-isopiperitenone reductase", + "1.3.1.83 Geranylgeranyl diphosphate reductase", + "1.3.1.84 Acrylyl-CoA reductase (NADPH)", + "1.3.1.85 Crotonyl-CoA carboxylase/reductase", + "1.3.1.86 Crotonyl-CoA reductase", + "1.3.1.n1 3-(cis-5,6-dihydroxycyclohexa-1,3-dien-1-yl)propanoate dehydrogenase", + "1.3.2.3 L-galactonolactone dehydrogenase", + "1.3.3.1 Dihydroorotate oxidase", + "1.3.3.3 Coproporphyrinogen oxidase", + "1.3.3.4 Protoporphyrinogen oxidase", + "1.3.3.5 Bilirubin oxidase", + "1.3.3.6 Acyl-CoA oxidase", + "1.3.3.7 Dihydrouracil oxidase", + "1.3.3.8 Tetrahydroberberine oxidase", + "1.3.3.9 Secologanin synthase", + "1.3.3.10 Tryptophan alpha,beta-oxidase", + "1.3.3.11 Pyrroloquinoline-quinone synthase", + "1.3.3.12 L-galactonolactone oxidase", + "1.3.5.1 Succinate dehydrogenase (ubiquinone)", + "1.3.5.2 Dihydroorotate dehydrogenase", + "1.3.5.3 Protoporphyrinogen IX dehydrogenase (menaquinone)", + "1.3.5.4 Fumarate reductase (menaquinone)", + "1.3.7.1 6-hydroxynicotinate reductase", + "1.3.7.2 15,16-dihydrobiliverdin:ferredoxin oxidoreductase", + "1.3.7.3 Phycoerythrobilin:ferredoxin oxidoreductase", + "1.3.7.4 Phytochromobilin:ferredoxin oxidoreductase", + "1.3.7.5 Phycocyanobilin:ferredoxin oxidoreductase", + "1.3.7.6 Phycoerythrobilin synthase", + "1.3.7.7 Ferredoxin:protochlorophyllide reductase (ATP-dependent)", + "1.3.99.1 Succinate dehydrogenase", + "1.3.99.2 Butyryl-CoA dehydrogenase", + "1.3.99.3 Acyl-CoA dehydrogenase", + "1.3.99.4 3-oxosteroid 1-dehydrogenase", + "1.3.99.5 3-oxo-5-alpha-steroid 4-dehydrogenase", + "1.3.99.6 3-oxo-5-beta-steroid 4-dehydrogenase", + "1.3.99.7 Glutaryl-CoA dehydrogenase", + "1.3.99.8 2-furoyl-CoA dehydrogenase", + "1.3.99.10 Isovaleryl-CoA dehydrogenase", + "1.3.99.12 2-methylacyl-CoA dehydrogenase", + "1.3.99.13 Long-chain-acyl-CoA dehydrogenase", + "1.3.99.14 Cyclohexanone dehydrogenase", + "1.3.99.15 Benzoyl-CoA reductase", + "1.3.99.16 Isoquinoline 1-oxidoreductase", + "1.3.99.17 Quinoline 2-oxidoreductase", + "1.3.99.18 Quinaldate 4-oxidoreductase", + "1.3.99.19 Quinoline-4-carboxylate 2-oxidoreductase", + "1.3.99.20 4-hydroxybenzoyl-CoA reductase", + "1.3.99.21 (R)-benzylsuccinyl-CoA dehydrogenase", + "1.3.99.22 Coproporphyrinogen dehydrogenase", + "1.3.99.23 All-trans-retinol 13,14-reductase", + "1.3.99.24 2-amino-4-deoxychorismate dehydrogenase", + "1.3.99.25 Carvone reductase", + "1.3.99.n1 3-hydroxybenzoyl-CoA reductase", + "1.3.99.n2 2-iminoacetate synthase", + "1.4.1.1 Alanine dehydrogenase", + "1.4.1.2 Glutamate dehydrogenase", + "1.4.1.3 Glutamate dehydrogenase (NAD(P)(+))", + "1.4.1.4 Glutamate dehydrogenase (NADP(+))", + "1.4.1.5 L-amino-acid dehydrogenase", + "1.4.1.7 Serine 2-dehydrogenase", + "1.4.1.8 Valine dehydrogenase (NADP(+))", + "1.4.1.9 Leucine dehydrogenase", + "1.4.1.10 Glycine dehydrogenase", + "1.4.1.11 L-erythro-3,5-diaminohexanoate dehydrogenase", + "1.4.1.12 2,4-diaminopentanoate dehydrogenase", + "1.4.1.13 Glutamate synthase (NADPH)", + "1.4.1.14 Glutamate synthase (NADH)", + "1.4.1.15 Lysine dehydrogenase", + "1.4.1.16 Diaminopimelate dehydrogenase", + "1.4.1.17 N-methylalanine dehydrogenase", + "1.4.1.18 Lysine 6-dehydrogenase", + "1.4.1.19 Tryptophan dehydrogenase", + "1.4.1.20 Phenylalanine dehydrogenase", + "1.4.1.21 Aspartate dehydrogenase", + "1.4.2.1 Glycine dehydrogenase (cytochrome)", + "1.4.3.1 D-aspartate oxidase", + "1.4.3.2 L-amino-acid oxidase", + "1.4.3.3 D-amino-acid oxidase", + "1.4.3.4 Monoamine oxidase", + "1.4.3.5 Pyridoxal 5'-phosphate synthase", + "1.4.3.7 D-glutamate oxidase", + "1.4.3.8 Ethanolamine oxidase", + "1.4.3.10 Putrescine oxidase", + "1.4.3.11 L-glutamate oxidase", + "1.4.3.12 Cyclohexylamine oxidase", + "1.4.3.13 Protein-lysine 6-oxidase", + "1.4.3.14 L-lysine oxidase", + "1.4.3.15 D-glutamate(D-aspartate) oxidase", + "1.4.3.16 L-aspartate oxidase", + "1.4.3.19 Glycine oxidase", + "1.4.3.20 L-lysine 6-oxidase", + "1.4.3.21 Primary-amine oxidase", + "1.4.3.22 Diamine oxidase", + "1.4.3.23 7-chloro-L-tryptophan oxidase", + "1.4.4.2 Glycine dehydrogenase (decarboxylating)", + "1.4.5.1 D-amino acid dehydrogenase (quinone)", + "1.4.7.1 Glutamate synthase (ferredoxin)", + "1.4.99.1 D-amino-acid dehydrogenase", + "1.4.99.2 Taurine dehydrogenase", + "1.4.99.3 Amine dehydrogenase", + "1.4.99.4 Aralkylamine dehydrogenase", + "1.4.99.5 Glycine dehydrogenase (cyanide-forming)", + "1.5.1.1 Pyrroline-2-carboxylate reductase", + "1.5.1.2 Pyrroline-5-carboxylate reductase", + "1.5.1.3 Dihydrofolate reductase", + "1.5.1.5 Methylenetetrahydrofolate dehydrogenase (NADP(+))", + "1.5.1.6 Formyltetrahydrofolate dehydrogenase", + "1.5.1.7 Saccharopine dehydrogenase (NAD(+), L-lysine-forming)", + "1.5.1.8 Saccharopine dehydrogenase (NADP(+), L-lysine-forming)", + "1.5.1.9 Saccharopine dehydrogenase (NAD(+), L-glutamate-forming)", + "1.5.1.10 Saccharopine dehydrogenase (NADP(+), L-glutamate-forming)", + "1.5.1.11 D-octopine dehydrogenase", + "1.5.1.12 1-pyrroline-5-carboxylate dehydrogenase", + "1.5.1.15 Methylenetetrahydrofolate dehydrogenase (NAD(+))", + "1.5.1.16 D-lysopine dehydrogenase", + "1.5.1.17 Alanopine dehydrogenase", + "1.5.1.18 Ephedrine dehydrogenase", + "1.5.1.19 D-nopaline dehydrogenase", + "1.5.1.20 Methylenetetrahydrofolate reductase (NAD(P)H)", + "1.5.1.21 Delta(1)-piperideine-2-carboxylate reductase", + "1.5.1.22 Strombine dehydrogenase", + "1.5.1.23 Tauropine dehydrogenase", + "1.5.1.24 N(5)-(carboxyethyl)ornithine synthase", + "1.5.1.25 Thiomorpholine-carboxylate dehydrogenase", + "1.5.1.26 Beta-alanopine dehydrogenase", + "1.5.1.27 1,2-dehydroreticulinium reductase (NADPH)", + "1.5.1.28 Opine dehydrogenase", + "1.5.1.29 FMN reductase", + "1.5.1.30 Flavin reductase", + "1.5.1.31 Berberine reductase", + "1.5.1.32 Vomilenine reductase", + "1.5.1.33 Pteridine reductase", + "1.5.1.34 6,7-dihydropteridine reductase", + "1.5.3.1 Sarcosine oxidase", + "1.5.3.2 N-methyl-L-amino-acid oxidase", + "1.5.3.4 N(6)-methyl-lysine oxidase", + "1.5.3.5 (S)-6-hydroxynicotine oxidase", + "1.5.3.6 (R)-6-hydroxynicotine oxidase", + "1.5.3.7 L-pipecolate oxidase", + "1.5.3.10 Dimethylglycine oxidase", + "1.5.3.12 Dihydrobenzophenanthridine oxidase", + "1.5.3.13 N(1)-acetylpolyamine oxidase", + "1.5.3.14 Polyamine oxidase (propane-1,3-diamine-forming)", + "1.5.3.15 N(8)-acetylspermidine oxidase (propane-1,3-diamine-forming)", + "1.5.3.16 Spermine oxidase", + "1.5.3.17 Non-specific polyamine oxidase", + "1.5.3.18 L-saccharopine oxidase", + "1.5.4.1 Pyrimidodiazepine synthase", + "1.5.5.1 Electron-transferring-flavoprotein dehydrogenase", + "1.5.7.1 Methylenetetrahydrofolate reductase (ferredoxin)", + "1.5.8.1 Dimethylamine dehydrogenase", + "1.5.8.2 Trimethylamine dehydrogenase", + "1.5.99.1 Sarcosine dehydrogenase", + "1.5.99.2 Dimethylglycine dehydrogenase", + "1.5.99.3 L-pipecolate dehydrogenase", + "1.5.99.4 Nicotine dehydrogenase", + "1.5.99.5 Methylglutamate dehydrogenase", + "1.5.99.6 Spermidine dehydrogenase", + "1.5.99.8 Proline dehydrogenase", + "1.5.99.9 Methylenetetrahydromethanopterin dehydrogenase", + "1.5.99.11 5,10-methylenetetrahydromethanopterin reductase", + "1.5.99.12 Cytokinin dehydrogenase", + "1.5.99.13 D-proline dehydrogenase", + "1.6.1.1 NAD(P)(+) transhydrogenase (B-specific)", + "1.6.1.2 NAD(P)(+) transhydrogenase (AB-specific)", + "1.6.2.2 Cytochrome-b5 reductase", + "1.6.2.4 NADPH--hemoprotein reductase", + "1.6.2.5 NADPH--cytochrome-c2 reductase", + "1.6.2.6 Leghemoglobin reductase", + "1.6.3.1 NAD(P)H oxidase", + "1.6.5.2 NAD(P)H dehydrogenase (quinone)", + "1.6.5.3 NADH dehydrogenase (ubiquinone)", + "1.6.5.4 Monodehydroascorbate reductase (NADH)", + "1.6.5.5 NADPH:quinone reductase", + "1.6.5.6 p-benzoquinone reductase (NADPH)", + "1.6.5.7 2-hydroxy-1,4-benzoquinone reductase", + "1.6.5.8 NADH:ubiquinone reductase (Na(+)-transporting)", + "1.6.6.9 Trimethylamine-N-oxide reductase", + "1.6.99.1 NADPH dehydrogenase", + "1.6.99.3 NADH dehydrogenase", + "1.6.99.5 NADH dehydrogenase (quinone)", + "1.6.99.6 NADPH dehydrogenase (quinone)", + "1.7.1.1 Nitrate reductase (NADH)", + "1.7.1.2 Nitrate reductase (NAD(P)H)", + "1.7.1.3 Nitrate reductase (NADPH)", + "1.7.1.4 Nitrite reductase (NAD(P)H)", + "1.7.1.5 Hyponitrite reductase", + "1.7.1.6 Azobenzene reductase", + "1.7.1.7 GMP reductase", + "1.7.1.9 Nitroquinoline-N-oxide reductase", + "1.7.1.10 Hydroxylamine reductase (NADH)", + "1.7.1.11 4-(dimethylamino)phenylazoxybenzene reductase", + "1.7.1.12 N-hydroxy-2-acetamidofluorene reductase", + "1.7.1.13 PreQ(1) synthase", + "1.7.2.1 Nitrite reductase (NO-forming)", + "1.7.2.2 Nitrite reductase (cytochrome; ammonia-forming)", + "1.7.2.3 Trimethylamine-N-oxide reductase (cytochrome c)", + "1.7.3.1 Nitroalkane oxidase", + "1.7.3.2 Acetylindoxyl oxidase", + "1.7.3.3 Factor independent urate hydroxylase", + "1.7.3.4 Hydroxylamine oxidase", + "1.7.3.5 3-aci-nitropropanoate oxidase", + "1.7.5.1 Nitrate reductase (quinone)", + "1.7.7.1 Ferredoxin--nitrite reductase", + "1.7.7.2 Ferredoxin--nitrate reductase", + "1.7.99.1 Hydroxylamine reductase", + "1.7.99.4 Nitrate reductase", + "1.7.99.6 Nitrous-oxide reductase", + "1.7.99.7 Nitric-oxide reductase", + "1.7.99.8 Hydrazine oxidoreductase", + "1.8.1.2 Sulfite reductase (NADPH)", + "1.8.1.3 Hypotaurine dehydrogenase", + "1.8.1.4 Dihydrolipoyl dehydrogenase", + "1.8.1.5 2-oxopropyl-CoM reductase (carboxylating)", + "1.8.1.6 Cystine reductase", + "1.8.1.7 Glutathione-disulfide reductase", + "1.8.1.8 Protein-disulfide reductase", + "1.8.1.9 Thioredoxin-disulfide reductase", + "1.8.1.10 CoA-glutathione reductase", + "1.8.1.11 Asparagusate reductase", + "1.8.1.12 Trypanothione-disulfide reductase", + "1.8.1.13 Bis-gamma-glutamylcystine reductase", + "1.8.1.14 CoA-disulfide reductase", + "1.8.1.15 Mycothione reductase", + "1.8.1.16 Glutathione amide reductase", + "1.8.2.1 Sulfite dehydrogenase", + "1.8.2.2 Thiosulfate dehydrogenase", + "1.8.3.1 Sulfite oxidase", + "1.8.3.2 Thiol oxidase", + "1.8.3.3 Glutathione oxidase", + "1.8.3.4 Methanethiol oxidase", + "1.8.3.5 Prenylcysteine oxidase", + "1.8.4.1 Glutathione--homocystine transhydrogenase", + "1.8.4.2 Protein-disulfide reductase (glutathione)", + "1.8.4.3 Glutathione--CoA-glutathione transhydrogenase", + "1.8.4.4 Glutathione--cystine transhydrogenase", + "1.8.4.7 Enzyme-thiol transhydrogenase (glutathione-disulfide)", + "1.8.4.8 Phosphoadenylyl-sulfate reductase (thioredoxin)", + "1.8.4.9 Adenylyl-sulfate reductase (glutathione)", + "1.8.4.10 Adenylyl-sulfate reductase (thioredoxin)", + "1.8.4.11 Peptide-methionine (S)-S-oxide reductase", + "1.8.4.12 Peptide-methionine (R)-S-oxide reductase", + "1.8.4.13 L-methionine (S)-S-oxide reductase", + "1.8.4.14 L-methionine (R)-S-oxide reductase", + "1.8.5.1 Glutathione dehydrogenase (ascorbate)", + "1.8.5.2 Thiosulfate dehydrogenase (quinone)", + "1.8.7.1 Sulfite reductase (ferredoxin)", + "1.8.7.2 Ferredoxin:thioredoxin reductase", + "1.8.98.1 CoB--CoM heterodisulfide reductase", + "1.8.98.2 Sulfiredoxin", + "1.8.99.1 Sulfite reductase", + "1.8.99.2 Adenylyl-sulfate reductase", + "1.8.99.3 Hydrogensulfite reductase", + "1.9.3.1 Cytochrome-c oxidase", + "1.9.6.1 Nitrate reductase (cytochrome)", + "1.9.99.1 Iron--cytochrome-c reductase", + "1.10.1.1 Trans-acenaphthene-1,2-diol dehydrogenase", + "1.10.2.1 L-ascorbate--cytochrome-b5 reductase", + "1.10.2.2 Ubiquinol--cytochrome-c reductase", + "1.10.3.1 Catechol oxidase", + "1.10.3.2 Laccase", + "1.10.3.3 L-ascorbate oxidase", + "1.10.3.4 o-aminophenol oxidase", + "1.10.3.5 3-hydroxyanthranilate oxidase", + "1.10.3.6 Rifamycin-B oxidase", + "1.10.99.1 Plastoquinol--plastocyanin reductase", + "1.10.99.2 Ribosyldihydronicotinamide dehydrogenase (quinone)", + "1.10.99.3 Violaxanthin de-epoxidase", + "1.11.1.1 NADH peroxidase", + "1.11.1.2 NADPH peroxidase", + "1.11.1.3 Fatty-acid peroxidase", + "1.11.1.5 Cytochrome-c peroxidase", + "1.11.1.6 Catalase", + "1.11.1.7 Peroxidase", + "1.11.1.8 Iodide peroxidase", + "1.11.1.9 Glutathione peroxidase", + "1.11.1.10 Chloride peroxidase", + "1.11.1.11 L-ascorbate peroxidase", + "1.11.1.12 Phospholipid-hydroperoxide glutathione peroxidase", + "1.11.1.13 Manganese peroxidase", + "1.11.1.14 Lignin peroxidase", + "1.11.1.15 Peroxiredoxin", + "1.11.1.16 Versatile peroxidase", + "1.11.1.17 Glutathione amide-dependent peroxidase", + "1.11.1.18 Bromide peroxidase", + "1.11.1.19 Dye decolorizing peroxidase", + "1.11.1.20 Prostamide/prostaglandin F(2-alpha) synthase", + "1.11.2.1 Unspecific peroxygenase", + "1.11.2.2 Myeloperoxidase", + "1.11.2.3 Plant seed peroxygenase", + "1.11.2.4 Fatty-acid peroxygenase", + "1.12.1.2 Hydrogen dehydrogenase", + "1.12.1.3 Hydrogen dehydrogenase (NADP(+))", + "1.12.2.1 Cytochrome-c3 hydrogenase", + "1.12.5.1 Hydrogen:quinone oxidoreductase", + "1.12.7.2 Ferredoxin hydrogenase", + "1.12.98.1 Coenzyme F420 hydrogenase", + "1.12.98.2 5,10-methenyltetrahydromethanopterin hydrogenase", + "1.12.98.3 Methanosarcina-phenazine hydrogenase", + "1.12.99.6 Hydrogenase (acceptor)", + "1.13.11.1 Catechol 1,2-dioxygenase", + "1.13.11.2 Catechol 2,3-dioxygenase", + "1.13.11.3 Protocatechuate 3,4-dioxygenase", + "1.13.11.4 Gentisate 1,2-dioxygenase", + "1.13.11.5 Homogentisate 1,2-dioxygenase", + "1.13.11.6 3-hydroxyanthranilate 3,4-dioxygenase", + "1.13.11.8 Protocatechuate 4,5-dioxygenase", + "1.13.11.9 2,5-dihydroxypyridine 5,6-dioxygenase", + "1.13.11.10 7,8-dihydroxykynurenate 8,8a-dioxygenase", + "1.13.11.11 Tryptophan 2,3-dioxygenase", + "1.13.11.12 Lipoxygenase", + "1.13.11.13 Ascorbate 2,3-dioxygenase", + "1.13.11.14 2,3-dihydroxybenzoate 3,4-dioxygenase", + "1.13.11.15 3,4-dihydroxyphenylacetate 2,3-dioxygenase", + "1.13.11.16 3-carboxyethylcatechol 2,3-dioxygenase", + "1.13.11.17 Indole 2,3-dioxygenase", + "1.13.11.18 Sulfur dioxygenase", + "1.13.11.19 Cysteamine dioxygenase", + "1.13.11.20 Cysteine dioxygenase", + "1.13.11.22 Caffeate 3,4-dioxygenase", + "1.13.11.23 2,3-dihydroxyindole 2,3-dioxygenase", + "1.13.11.24 Quercetin 2,3-dioxygenase", + "1.13.11.25 3,4-dihydroxy-9,10-secoandrosta-1,3,5(10)-triene-9,17-dione 4,5-dioxygenase", + "1.13.11.26 Peptide-tryptophan 2,3-dioxygenase", + "1.13.11.27 4-hydroxyphenylpyruvate dioxygenase", + "1.13.11.28 2,3-dihydroxybenzoate 2,3-dioxygenase", + "1.13.11.29 Stizolobate synthase", + "1.13.11.30 Stizolobinate synthase", + "1.13.11.31 Arachidonate 12-lipoxygenase", + "1.13.11.33 Arachidonate 15-lipoxygenase", + "1.13.11.34 Arachidonate 5-lipoxygenase", + "1.13.11.35 Pyrogallol 1,2-oxygenase", + "1.13.11.36 Chloridazon-catechol dioxygenase", + "1.13.11.37 Hydroxyquinol 1,2-dioxygenase", + "1.13.11.38 1-hydroxy-2-naphthoate 1,2-dioxygenase", + "1.13.11.39 Biphenyl-2,3-diol 1,2-dioxygenase", + "1.13.11.40 Arachidonate 8-lipoxygenase", + "1.13.11.41 2,4'-dihydroxyacetophenone dioxygenase", + "1.13.11.43 Lignostilbene alpha-beta-dioxygenase", + "1.13.11.44 Linoleate diol synthase", + "1.13.11.45 Linoleate 11-lipoxygenase", + "1.13.11.46 4-hydroxymandelate synthase", + "1.13.11.47 3-hydroxy-4-oxoquinoline 2,4-dioxygenase", + "1.13.11.48 3-hydroxy-2-methylquinolin-4-one 2,4-dioxygenase", + "1.13.11.49 Chlorite O(2)-lyase", + "1.13.11.50 Acetylacetone-cleaving enzyme", + "1.13.11.51 9-cis-epoxycarotenoid dioxygenase", + "1.13.11.52 Indoleamine 2,3-dioxygenase", + "1.13.11.53 Acireductone dioxygenase (Ni(2+)-requiring)", + "1.13.11.54 Acireductone dioxygenase (Fe(2+)-requiring)", + "1.13.11.55 Sulfur oxygenase/reductase", + "1.13.11.56 1,2-dihydroxynaphthalene dioxygenase", + "1.13.11.n1 2-aminophenol 1,6-dioxygenase", + "1.13.12.1 Arginine 2-monooxygenase", + "1.13.12.2 Lysine 2-monooxygenase", + "1.13.12.3 Tryptophan 2-monooxygenase", + "1.13.12.4 Lactate 2-monooxygenase", + "1.13.12.5 Renilla-luciferin 2-monooxygenase", + "1.13.12.6 Cypridina-luciferin 2-monooxygenase", + "1.13.12.7 Photinus-luciferin 4-monooxygenase (ATP-hydrolyzing)", + "1.13.12.8 Watasenia-luciferin 2-monooxygenase", + "1.13.12.9 Phenylalanine 2-monooxygenase", + "1.13.12.12 Apo-beta-carotenoid-14',13'-dioxygenase", + "1.13.12.13 Oplophorus-luciferin 2-monooxygenase", + "1.13.12.14 Chlorophyllide-a oxygenase", + "1.13.12.15 3,4-dihydroxyphenylalanine oxidative deaminase", + "1.13.12.16 Nitronate monooxygenase", + "1.13.12.17 Dichloroarcyriaflavin A synthase", + "1.13.12.18 Dinoflagellate luciferase", + "1.13.99.1 Inositol oxygenase", + "1.13.99.3 Tryptophan 2'-dioxygenase", + "1.14.11.1 Gamma-butyrobetaine dioxygenase", + "1.14.11.2 Procollagen-proline dioxygenase", + "1.14.11.3 Pyrimidine-deoxynucleoside 2'-dioxygenase", + "1.14.11.4 Procollagen-lysine 5-dioxygenase", + "1.14.11.6 Thymine dioxygenase", + "1.14.11.7 Procollagen-proline 3-dioxygenase", + "1.14.11.8 Trimethyllysine dioxygenase", + "1.14.11.9 Flavanone 3-dioxygenase", + "1.14.11.10 Pyrimidine-deoxynucleoside 1'-dioxygenase", + "1.14.11.11 Hyoscyamine (6S)-dioxygenase", + "1.14.11.12 Gibberellin-44 dioxygenase", + "1.14.11.13 Gibberellin 2-beta-dioxygenase", + "1.14.11.14 6-beta-hydroxyhyoscyamine epoxidase", + "1.14.11.15 Gibberellin 3-beta-dioxygenase", + "1.14.11.16 Peptide-aspartate beta-dioxygenase", + "1.14.11.17 Taurine dioxygenase", + "1.14.11.18 Phytanoyl-CoA dioxygenase", + "1.14.11.19 Leucocyanidin oxygenase", + "1.14.11.20 Deacetoxyvindoline 4-hydroxylase", + "1.14.11.21 Clavaminate synthase", + "1.14.11.22 Flavone synthase", + "1.14.11.23 Flavonol synthase", + "1.14.11.24 2'-deoxymugineic-acid 2'-dioxygenase", + "1.14.11.25 Mugineic-acid 3-dioxygenase", + "1.14.11.26 Deacetoxycephalosporin-C hydroxylase", + "1.14.11.27 [Histone H3]-lysine-36 demethylase", + "1.14.11.28 Proline 3-hydroxylase", + "1.14.11.29 Hypoxia-inducible factor-proline dioxygenase", + "1.14.11.30 Hypoxia-inducible factor-asparagine dioxygenase", + "1.14.11.31 Thebaine 6-O-demethylase", + "1.14.11.32 Codeine 3-O-demethylase", + "1.14.11.n1 L-asparagine oxygenase", + "1.14.12.1 Anthranilate 1,2-dioxygenase (deaminating, decarboxylating)", + "1.14.12.3 Benzene 1,2-dioxygenase", + "1.14.12.4 3-hydroxy-2-methylpyridinecarboxylate dioxygenase", + "1.14.12.5 5-pyridoxate dioxygenase", + "1.14.12.7 Phthalate 4,5-dioxygenase", + "1.14.12.8 4-sulfobenzoate 3,4-dioxygenase", + "1.14.12.9 4-chlorophenylacetate 3,4-dioxygenase", + "1.14.12.10 Benzoate 1,2-dioxygenase", + "1.14.12.11 Toluene dioxygenase", + "1.14.12.12 Naphthalene 1,2-dioxygenase", + "1.14.12.13 2-chlorobenzoate 1,2-dioxygenase", + "1.14.12.14 2-aminobenzenesulfonate 2,3-dioxygenase", + "1.14.12.15 Terephthalate 1,2-dioxygenase", + "1.14.12.16 2-hydroxyquinoline 5,6-dioxygenase", + "1.14.12.17 Nitric oxide dioxygenase", + "1.14.12.18 Biphenyl 2,3-dioxygenase", + "1.14.12.19 3-phenylpropanoate dioxygenase", + "1.14.12.20 Pheophorbide a oxygenase", + "1.14.12.21 Benzoyl-CoA 2,3-dioxygenase", + "1.14.12.22 Carbazole 1,9a-dioxygenase", + "1.14.13.1 Salicylate 1-monooxygenase", + "1.14.13.2 4-hydroxybenzoate 3-monooxygenase", + "1.14.13.3 4-hydroxyphenylacetate 3-monooxygenase", + "1.14.13.4 Melilotate 3-monooxygenase", + "1.14.13.5 Imidazoleacetate 4-monooxygenase", + "1.14.13.6 Orcinol 2-monooxygenase", + "1.14.13.7 Phenol 2-monooxygenase", + "1.14.13.8 Flavin-containing monooxygenase", + "1.14.13.9 Kynurenine 3-monooxygenase", + "1.14.13.10 2,6-dihydroxypyridine 3-monooxygenase", + "1.14.13.11 Trans-cinnamate 4-monooxygenase", + "1.14.13.12 Benzoate 4-monooxygenase", + "1.14.13.13 Calcidiol 1-monooxygenase", + "1.14.13.14 Trans-cinnamate 2-monooxygenase", + "1.14.13.15 Cholestanetriol 26-monooxygenase", + "1.14.13.16 Cyclopentanone monooxygenase", + "1.14.13.17 Cholesterol 7-alpha-monooxygenase", + "1.14.13.18 4-hydroxyphenylacetate 1-monooxygenase", + "1.14.13.19 Taxifolin 8-monooxygenase", + "1.14.13.20 2,4-dichlorophenol 6-monooxygenase", + "1.14.13.21 Flavonoid 3'-monooxygenase", + "1.14.13.22 Cyclohexanone monooxygenase", + "1.14.13.23 3-hydroxybenzoate 4-monooxygenase", + "1.14.13.24 3-hydroxybenzoate 6-monooxygenase", + "1.14.13.25 Methane monooxygenase", + "1.14.13.26 Phosphatidylcholine 12-monooxygenase", + "1.14.13.27 4-aminobenzoate 1-monooxygenase", + "1.14.13.28 3,9-dihydroxypterocarpan 6A-monooxygenase", + "1.14.13.29 4-nitrophenol 2-monooxygenase", + "1.14.13.30 Leukotriene-B(4) 20-monooxygenase", + "1.14.13.31 2-nitrophenol 2-monooxygenase", + "1.14.13.32 Albendazole monooxygenase", + "1.14.13.33 4-hydroxybenzoate 3-monooxygenase (NAD(P)H)", + "1.14.13.34 Leukotriene-E(4) 20-monooxygenase", + "1.14.13.35 Anthranilate 3-monooxygenase (deaminating)", + "1.14.13.36 5-O-(4-coumaroyl)-D-quinate 3'-monooxygenase", + "1.14.13.37 Methyltetrahydroprotoberberine 14-monooxygenase", + "1.14.13.38 Anhydrotetracycline monooxygenase", + "1.14.13.39 Nitric-oxide synthase", + "1.14.13.40 Anthraniloyl-CoA monooxygenase", + "1.14.13.41 Tyrosine N-monooxygenase", + "1.14.13.42 Hydroxyphenylacetonitrile 2-monooxygenase", + "1.14.13.43 Questin monooxygenase", + "1.14.13.44 2-hydroxybiphenyl 3-monooxygenase", + "1.14.13.46 (-)-menthol monooxygenase", + "1.14.13.47 (S)-limonene 3-monooxygenase", + "1.14.13.48 (S)-limonene 6-monooxygenase", + "1.14.13.49 (S)-limonene 7-monooxygenase", + "1.14.13.50 Pentachlorophenol monooxygenase", + "1.14.13.51 6-oxocineole dehydrogenase", + "1.14.13.52 Isoflavone 3'-hydroxylase", + "1.14.13.53 4'-methoxyisoflavone 2'-hydroxylase", + "1.14.13.54 Ketosteroid monooxygenase", + "1.14.13.55 Protopine 6-monooxygenase", + "1.14.13.56 Dihydrosanguinarine 10-monooxygenase", + "1.14.13.57 Dihydrochelirubine 12-monooxygenase", + "1.14.13.58 Benzoyl-CoA 3-monooxygenase", + "1.14.13.59 L-lysine 6-monooxygenase (NADPH)", + "1.14.13.60 27-hydroxycholesterol 7-alpha-monooxygenase", + "1.14.13.61 2-hydroxyquinoline 8-monooxygenase", + "1.14.13.62 4-hydroxyquinoline 3-monooxygenase", + "1.14.13.63 3-hydroxyphenylacetate 6-hydroxylase", + "1.14.13.64 4-hydroxybenzoate 1-hydroxylase", + "1.14.13.66 2-hydroxycyclohexanone 2-monooxygenase", + "1.14.13.67 Quinine 3-monooxygenase", + "1.14.13.68 4-hydroxyphenylacetaldehyde oxime monooxygenase", + "1.14.13.69 Alkene monooxygenase", + "1.14.13.70 Sterol 14-demethylase", + "1.14.13.71 N-methylcoclaurine 3'-monooxygenase", + "1.14.13.72 Methylsterol monooxygenase", + "1.14.13.73 Tabersonine 16-hydroxylase", + "1.14.13.74 7-deoxyloganin 7-hydroxylase", + "1.14.13.75 Vinorine hydroxylase", + "1.14.13.76 Taxane 10-beta-hydroxylase", + "1.14.13.77 Taxane 13-alpha-hydroxylase", + "1.14.13.78 Ent-kaurene oxidase", + "1.14.13.79 Ent-kaurenoic acid oxidase", + "1.14.13.80 (R)-limonene 6-monooxygenase", + "1.14.13.81 Magnesium-protoporphyrin IX monomethyl ester (oxidative) cyclase", + "1.14.13.82 Vanillate monooxygenase", + "1.14.13.83 Precorrin-3B synthase", + "1.14.13.84 4-hydroxyacetophenone monooxygenase", + "1.14.13.85 Glyceollin synthase", + "1.14.13.86 2-hydroxyisoflavanone synthase", + "1.14.13.87 Licodione synthase", + "1.14.13.88 Flavonoid 3',5'-hydroxylase", + "1.14.13.89 Isoflavone 2'-hydroxylase", + "1.14.13.90 Zeaxanthin epoxidase", + "1.14.13.91 Deoxysarpagine hydroxylase", + "1.14.13.92 Phenylacetone monooxygenase", + "1.14.13.93 (+)-abscisic acid 8'-hydroxylase", + "1.14.13.94 Lithocholate 6-beta-hydroxylase", + "1.14.13.95 7-alpha-hydroxycholest-4-en-3-one 12-alpha-hydroxylase", + "1.14.13.96 5-beta-cholestane-3-alpha,7-alpha-diol 12-alpha-hydroxylase", + "1.14.13.97 Taurochenodeoxycholate 6-alpha-hydroxylase", + "1.14.13.98 Cholesterol 24-hydroxylase", + "1.14.13.99 24-hydroxycholesterol 7-alpha-hydroxylase", + "1.14.13.100 25-hydroxycholesterol 7-alpha-hydroxylase", + "1.14.13.101 Senecionine N-oxygenase", + "1.14.13.102 Psoralen synthase", + "1.14.13.103 8-dimethylallylnaringenin 2'-hydroxylase", + "1.14.13.104 (+)-menthofuran synthase", + "1.14.13.105 Monocyclic monoterpene ketone monooxygenase", + "1.14.13.106 Epi-isozizaene 5-monooxygenase", + "1.14.13.107 Limonene 1,2-monooxygenase", + "1.14.13.108 Abietadiene hydroxylase", + "1.14.13.109 Abietadienol hydroxylase", + "1.14.13.110 Geranylgeraniol 18-hydroxylase", + "1.14.13.111 Methanesulfonate monooxygenase", + "1.14.13.112 3-epi-6-deoxocathasterone 23-monooxygenase", + "1.14.13.113 FAD-dependent urate hydroxylase", + "1.14.13.114 6-hydroxynicotinate 3-monooxygenase", + "1.14.13.115 Angelicin synthase", + "1.14.13.116 Geranylhydroquinone 3''-hydroxylase", + "1.14.13.117 Isoleucine N-monooxygenase", + "1.14.13.118 Valine N-monooxygenase", + "1.14.13.119 5-epiaristolochene 1,3-dihydroxylase", + "1.14.13.120 Costunolide synthase", + "1.14.13.121 Premnaspirodiene oxygenase", + "1.14.13.n1 Phenylalanine N-monooxygenase", + "1.14.13.n2 Tryptophan N-monooxygenase", + "1.14.13.n3 3-(3-hydroxy-phenyl)propanoic acid hydroxylase", + "1.14.13.n4 Vitamin D(3) 24-hydroxylase", + "1.14.13.n5 Dihomomethionine N-hydroxylase", + "1.14.13.n6 Hexahomomethionine N-hydroxylase", + "1.14.13.n7 4-nitrophenol 2-hydroxylase", + "1.14.14.1 Unspecific monooxygenase", + "1.14.14.3 Alkanal monooxygenase (FMN-linked)", + "1.14.14.5 Alkanesulfonate monooxygenase", + "1.14.14.7 Tryptophan 7-halogenase", + "1.14.14.8 Anthranilate 3-monooxygenase (FAD)", + "1.14.15.1 Camphor 5-monooxygenase", + "1.14.15.2 Camphor 1,2-monooxygenase", + "1.14.15.3 Alkane 1-monooxygenase", + "1.14.15.4 Steroid 11-beta-monooxygenase", + "1.14.15.5 Corticosterone 18-monooxygenase", + "1.14.15.6 Cholesterol monooxygenase (side-chain-cleaving)", + "1.14.15.7 Choline monooxygenase", + "1.14.15.8 Steroid 15-beta-monooxygenase", + "1.14.16.1 Phenylalanine 4-monooxygenase", + "1.14.16.2 Tyrosine 3-monooxygenase", + "1.14.16.3 Anthranilate 3-monooxygenase", + "1.14.16.4 Tryptophan 5-monooxygenase", + "1.14.16.5 Alkylglycerol monooxygenase", + "1.14.16.6 Mandelate 4-monooxygenase", + "1.14.17.1 Dopamine beta-monooxygenase", + "1.14.17.3 Peptidylglycine monooxygenase", + "1.14.17.4 Aminocyclopropanecarboxylate oxidase", + "1.14.18.1 Monophenol monooxygenase", + "1.14.18.2 CMP-N-acetylneuraminate monooxygenase", + "1.14.19.1 Stearoyl-CoA 9-desaturase", + "1.14.19.2 Acyl-[acyl-carrier-protein] desaturase", + "1.14.19.3 Linoleoyl-CoA desaturase", + "1.14.19.4 Delta(8)-fatty-acid desaturase", + "1.14.19.5 Delta(11)-fatty-acid desaturase", + "1.14.19.6 Delta(12)-fatty-acid desaturase", + "1.14.20.1 Deacetoxycephalosporin-C synthase", + "1.14.21.1 (S)-stylopine synthase", + "1.14.21.2 (S)-cheilanthifoline synthase", + "1.14.21.3 Berbamunine synthase", + "1.14.21.4 Salutaridine synthase", + "1.14.21.5 (S)-canadine synthase", + "1.14.21.6 Lathosterol oxidase", + "1.14.21.7 Biflaviolin synthase", + "1.14.21.8 Pseudobaptigenin synthase", + "1.14.99.1 Prostaglandin-endoperoxide synthase", + "1.14.99.2 Kynurenine 7,8-hydroxylase", + "1.14.99.3 Heme oxygenase", + "1.14.99.4 Progesterone monooxygenase", + "1.14.99.7 Squalene monooxygenase", + "1.14.99.9 Steroid 17-alpha-monooxygenase", + "1.14.99.10 Steroid 21-monooxygenase", + "1.14.99.11 Estradiol 6-beta-monooxygenase", + "1.14.99.12 Androst-4-ene-3,17-dione monooxygenase", + "1.14.99.14 Progesterone 11-alpha-monooxygenase", + "1.14.99.15 4-methoxybenzoate monooxygenase (O-demethylating)", + "1.14.99.19 Plasmanylethanolamine desaturase", + "1.14.99.20 Phylloquinone monooxygenase (2,3-epoxidizing)", + "1.14.99.21 Latia-luciferin monooxygenase (demethylating)", + "1.14.99.22 Ecdysone 20-monooxygenase", + "1.14.99.23 3-hydroxybenzoate 2-monooxygenase", + "1.14.99.24 Steroid 9-alpha-monooxygenase", + "1.14.99.26 2-hydroxypyridine 5-monooxygenase", + "1.14.99.27 Juglone 3-monooxygenase", + "1.14.99.28 Linalool 8-monooxygenase", + "1.14.99.29 Deoxyhypusine monooxygenase", + "1.14.99.30 Carotene 7,8-desaturase", + "1.14.99.31 Myristoyl-CoA 11-(E) desaturase", + "1.14.99.32 Myristoyl-CoA 11-(Z) desaturase", + "1.14.99.33 Delta(12)-fatty acid dehydrogenase", + "1.14.99.34 Monoprenyl isoflavone epoxidase", + "1.14.99.35 Thiophene-2-carbonyl-CoA monooxygenase", + "1.14.99.36 Beta-carotene 15,15'-monooxygenase", + "1.14.99.37 Taxadiene 5-alpha-hydroxylase", + "1.14.99.38 Cholesterol 25-hydroxylase", + "1.14.99.39 Ammonia monooxygenase", + "1.14.99.40 5,6-dimethylbenzimidazole synthase", + "1.14.99.41 All-trans-8'-apo-beta-carotenal 15,15'-oxygenase", + "1.14.99.n2 Beta,beta-carotene 9',10'-oxygenase", + "1.14.99.n3 Zeaxanthin 7,8-dioxygenase", + "1.15.1.1 Superoxide dismutase", + "1.15.1.2 Superoxide reductase", + "1.16.1.1 Mercury(II) reductase", + "1.16.1.2 Diferric-transferrin reductase", + "1.16.1.3 Aquacobalamin reductase", + "1.16.1.4 Cob(II)alamin reductase", + "1.16.1.5 Aquacobalamin reductase (NADPH)", + "1.16.1.6 Cyanocobalamin reductase (cyanide-eliminating)", + "1.16.1.7 Ferric-chelate reductase", + "1.16.1.8 [Methionine synthase] reductase", + "1.16.3.1 Ferroxidase", + "1.16.8.1 Cob(II)yrinic acid a,c-diamide reductase", + "1.17.1.1 CDP-4-dehydro-6-deoxyglucose reductase", + "1.17.1.2 4-hydroxy-3-methylbut-2-enyl diphosphate reductase", + "1.17.1.3 Leucoanthocyanidin reductase", + "1.17.1.4 Xanthine dehydrogenase", + "1.17.1.5 Nicotinate dehydrogenase", + "1.17.2.1 Nicotinate dehydrogenase (cytochrome)", + "1.17.3.1 Pteridine oxidase", + "1.17.3.2 Xanthine oxidase", + "1.17.3.3 6-hydroxynicotinate dehydrogenase", + "1.17.4.1 Ribonucleoside-diphosphate reductase", + "1.17.4.2 Ribonucleoside-triphosphate reductase", + "1.17.5.1 Phenylacetyl-CoA dehydrogenase", + "1.17.5.2 Caffeine dehydrogenase", + "1.17.7.1 (E)-4-hydroxy-3-methylbut-2-enyl-diphosphate synthase", + "1.17.99.1 4-methylphenol dehydrogenase (hydroxylating)", + "1.17.99.2 Ethylbenzene hydroxylase", + "1.17.99.3 3-alpha,7-alpha,12-alpha-trihydroxy-5-beta-cholestanoyl-CoA 24-hydroxylase", + "1.17.99.4 Uracil/thymine dehydrogenase", + "1.17.99.5 Bile-acid 7-alpha-dehydroxylase", + "1.18.1.1 Rubredoxin--NAD(+) reductase", + "1.18.1.2 Ferredoxin--NADP(+) reductase", + "1.18.1.3 Ferredoxin--NAD(+) reductase", + "1.18.1.4 Rubredoxin--NAD(P)(+) reductase", + "1.18.6.1 Nitrogenase", + "1.19.6.1 Nitrogenase (flavodoxin)", + "1.20.1.1 Phosphonate dehydrogenase", + "1.20.4.1 Arsenate reductase (glutaredoxin)", + "1.20.4.2 Methylarsonate reductase", + "1.20.4.3 Mycoredoxin", + "1.20.98.1 Arsenate reductase (azurin)", + "1.20.99.1 Arsenate reductase (donor)", + "1.21.3.1 Isopenicillin-N synthase", + "1.21.3.2 Columbamine oxidase", + "1.21.3.3 Reticuline oxidase", + "1.21.3.4 Sulochrin oxidase ((+)-bisdechlorogeodin-forming)", + "1.21.3.5 Sulochrin oxidase ((-)-bisdechlorogeodin-forming)", + "1.21.3.6 Aureusidin synthase", + "1.21.4.1 D-proline reductase (dithiol)", + "1.21.4.2 Glycine reductase", + "1.21.4.3 Sarcosine reductase", + "1.21.4.4 Betaine reductase", + "1.21.99.1 Beta-cyclopiazonate dehydrogenase", + "1.22.1.1 Iodotyrosine deiodinase", + "1.97.1.1 Chlorate reductase", + "1.97.1.2 Pyrogallol hydroxytransferase", + "1.97.1.3 Sulfur reductase", + "1.97.1.4 [Formate-C-acetyltransferase]-activating enzyme", + "1.97.1.8 Tetrachloroethene reductive dehalogenase", + "1.97.1.9 Selenate reductase", + "1.97.1.10 Thyroxine 5'-deiodinase", + "1.97.1.11 Thyroxine 5-deiodinase", + "2.1.1.1 Nicotinamide N-methyltransferase", + "2.1.1.2 Guanidinoacetate N-methyltransferase", + "2.1.1.3 Thetin--homocysteine S-methyltransferase", + "2.1.1.4 Acetylserotonin O-methyltransferase", + "2.1.1.5 Betaine--homocysteine S-methyltransferase", + "2.1.1.6 Catechol O-methyltransferase", + "2.1.1.7 Nicotinate N-methyltransferase", + "2.1.1.8 Histamine N-methyltransferase", + "2.1.1.9 Thiol S-methyltransferase", + "2.1.1.10 Homocysteine S-methyltransferase", + "2.1.1.11 Magnesium protoporphyrin IX methyltransferase", + "2.1.1.12 Methionine S-methyltransferase", + "2.1.1.13 Methionine synthase", + "2.1.1.14 5-methyltetrahydropteroyltriglutamate--homocysteine S-methyltransferase", + "2.1.1.15 Fatty-acid O-methyltransferase", + "2.1.1.16 Methylene-fatty-acyl-phospholipid synthase", + "2.1.1.17 Phosphatidylethanolamine N-methyltransferase", + "2.1.1.18 Polysaccharide O-methyltransferase", + "2.1.1.19 Trimethylsulfonium--tetrahydrofolate N-methyltransferase", + "2.1.1.20 Glycine N-methyltransferase", + "2.1.1.21 Methylamine--glutamate N-methyltransferase", + "2.1.1.22 Carnosine N-methyltransferase", + "2.1.1.25 Phenol O-methyltransferase", + "2.1.1.26 Iodophenol O-methyltransferase", + "2.1.1.27 Tyramine N-methyltransferase", + "2.1.1.28 Phenylethanolamine N-methyltransferase", + "2.1.1.31 tRNA (guanine-N(1)-)-methyltransferase", + "2.1.1.32 tRNA (guanine-N(2)-)-methyltransferase", + "2.1.1.33 tRNA (guanine-N(7)-)-methyltransferase", + "2.1.1.34 tRNA guanosine-2'-O-methyltransferase", + "2.1.1.35 tRNA (uracil-5-)-methyltransferase", + "2.1.1.36 tRNA (adenine-N(1)-)-methyltransferase", + "2.1.1.37 DNA (cytosine-5-)-methyltransferase", + "2.1.1.38 O-demethylpuromycin O-methyltransferase", + "2.1.1.39 Inositol 3-methyltransferase", + "2.1.1.40 Inositol 1-methyltransferase", + "2.1.1.41 Sterol 24-C-methyltransferase", + "2.1.1.42 Luteolin O-methyltransferase", + "2.1.1.43 Histone-lysine N-methyltransferase", + "2.1.1.44 Dimethylhistidine N-methyltransferase", + "2.1.1.45 Thymidylate synthase", + "2.1.1.46 Isoflavone 4'-O-methyltransferase", + "2.1.1.47 Indolepyruvate C-methyltransferase", + "2.1.1.49 Amine N-methyltransferase", + "2.1.1.50 Loganate O-methyltransferase", + "2.1.1.53 Putrescine N-methyltransferase", + "2.1.1.54 Deoxycytidylate C-methyltransferase", + "2.1.1.55 tRNA (adenine-N(6)-)-methyltransferase", + "2.1.1.56 mRNA (guanine-N(7)-)-methyltransferase", + "2.1.1.57 mRNA (nucleoside-2'-O-)-methyltransferase", + "2.1.1.59 [Cytochrome c]-lysine N-methyltransferase", + "2.1.1.60 Calmodulin-lysine N-methyltransferase", + "2.1.1.61 tRNA (5-methylaminomethyl-2-thiouridylate)-methyltransferase", + "2.1.1.62 mRNA (2'-O-methyladenosine-N(6)-)-methyltransferase", + "2.1.1.63 Methylated-DNA--[protein]-cysteine S-methyltransferase", + "2.1.1.64 3-demethylubiquinol 3-O-methyltransferase", + "2.1.1.65 Licodione 2'-O-methyltransferase", + "2.1.1.66 rRNA (adenosine-2'-O-)-methyltransferase", + "2.1.1.67 Thiopurine S-methyltransferase", + "2.1.1.68 Caffeate O-methyltransferase", + "2.1.1.69 5-hydroxyfuranocoumarin 5-O-methyltransferase", + "2.1.1.70 8-hydroxyfuranocoumarin 8-O-methyltransferase", + "2.1.1.71 Phosphatidyl-N-methylethanolamine N-methyltransferase", + "2.1.1.72 Site-specific DNA-methyltransferase (adenine-specific)", + "2.1.1.74 Methylenetetrahydrofolate--tRNA-(uracil-5-)-methyltransferase (FADH(2)-oxidizing)", + "2.1.1.75 Apigenin 4'-O-methyltransferase", + "2.1.1.76 Quercetin 3-O-methyltransferase", + "2.1.1.77 Protein-L-isoaspartate(D-aspartate) O-methyltransferase", + "2.1.1.78 Isoorientin 3'-O-methyltransferase", + "2.1.1.79 Cyclopropane-fatty-acyl-phospholipid synthase", + "2.1.1.80 Protein-glutamate O-methyltransferase", + "2.1.1.82 3-methylquercetin 7-O-methyltransferase", + "2.1.1.83 3,7-dimethylquercetin 4'-O-methyltransferase", + "2.1.1.84 Methylquercetagetin 6-O-methyltransferase", + "2.1.1.85 Protein-histidine N-methyltransferase", + "2.1.1.86 Tetrahydromethanopterin S-methyltransferase", + "2.1.1.87 Pyridine N-methyltransferase", + "2.1.1.88 8-hydroxyquercetin 8-O-methyltransferase", + "2.1.1.89 Tetrahydrocolumbamine 2-O-methyltransferase", + "2.1.1.90 Methanol--5-hydroxybenzimidazolylcobamide Co-methyltransferase", + "2.1.1.91 Isobutyraldoxime O-methyltransferase", + "2.1.1.94 Tabersonine 16-O-methyltransferase", + "2.1.1.95 Tocopherol O-methyltransferase", + "2.1.1.96 Thioether S-methyltransferase", + "2.1.1.97 3-hydroxyanthranilate 4-C-methyltransferase", + "2.1.1.98 Diphthine synthase", + "2.1.1.99 3-hydroxy-16-methoxy-2,3-dihydrotabersonine N-methyltransferase", + "2.1.1.100 Protein-S-isoprenylcysteine O-methyltransferase", + "2.1.1.101 Macrocin O-methyltransferase", + "2.1.1.102 Demethylmacrocin O-methyltransferase", + "2.1.1.103 Phosphoethanolamine N-methyltransferase", + "2.1.1.104 Caffeoyl-CoA O-methyltransferase", + "2.1.1.105 N-benzoyl-4-hydroxyanthranilate 4-O-methyltransferase", + "2.1.1.106 Tryptophan 2-C-methyltransferase", + "2.1.1.107 Uroporphyrinogen-III C-methyltransferase", + "2.1.1.108 6-hydroxymellein O-methyltransferase", + "2.1.1.109 Demethylsterigmatocystin 6-O-methyltransferase", + "2.1.1.110 Sterigmatocystin 8-O-methyltransferase", + "2.1.1.111 Anthranilate N-methyltransferase", + "2.1.1.112 Glucuronoxylan 4-O-methyltransferase", + "2.1.1.113 Site-specific DNA-methyltransferase (cytosine-N(4)-specific)", + "2.1.1.114 Polyprenyldihydroxybenzoate methyltransferase", + "2.1.1.115 (RS)-1-benzyl-1,2,3,4-tetrahydroisoquinoline N-methyltransferase", + "2.1.1.116 3'-hydroxy-N-methyl-(S)-coclaurine 4'-O-methyltransferase", + "2.1.1.117 (S)-scoulerine 9-O-methyltransferase", + "2.1.1.118 Columbamine O-methyltransferase", + "2.1.1.119 10-hydroxydihydrosanguinarine 10-O-methyltransferase", + "2.1.1.120 12-hydroxydihydrochelirubine 12-O-methyltransferase", + "2.1.1.121 6-O-methylnorlaudanosoline 5'-O-methyltransferase", + "2.1.1.122 (S)-tetrahydroprotoberberine N-methyltransferase", + "2.1.1.123 [Cytochrome c]-methionine S-methyltransferase", + "2.1.1.124 [Cytochrome c]-arginine N-methyltransferase", + "2.1.1.125 Histone-arginine N-methyltransferase", + "2.1.1.126 [Myelin basic protein]-arginine N-methyltransferase", + "2.1.1.127 [Ribulose-bisphosphate carboxylase]-lysine N-methyltransferase", + "2.1.1.128 (RS)-norcoclaurine 6-O-methyltransferase", + "2.1.1.129 Inositol 4-methyltransferase", + "2.1.1.130 Precorrin-2 C(20)-methyltransferase", + "2.1.1.131 Precorrin-3B C(17)-methyltransferase", + "2.1.1.132 Precorrin-6Y C(5,15)-methyltransferase (decarboxylating)", + "2.1.1.133 Precorrin-4 C(11)-methyltransferase", + "2.1.1.136 Chlorophenol O-methyltransferase", + "2.1.1.137 Arsenite methyltransferase", + "2.1.1.139 3'-demethylstaurosporine O-methyltransferase", + "2.1.1.140 (S)-coclaurine-N-methyltransferase", + "2.1.1.141 Jasmonate O-methyltransferase", + "2.1.1.142 Cycloartenol 24-C-methyltransferase", + "2.1.1.143 24-methylenesterol C-methyltransferase", + "2.1.1.144 Trans-aconitate 2-methyltransferase", + "2.1.1.145 Trans-aconitate 3-methyltransferase", + "2.1.1.146 (Iso)eugenol O-methyltransferase", + "2.1.1.147 Corydaline synthase", + "2.1.1.148 Thymidylate synthase (FAD)", + "2.1.1.149 Myricetin O-methyltransferase", + "2.1.1.150 Isoflavone 7-O-methyltransferase", + "2.1.1.151 Cobalt-factor II C(20)-methyltransferase", + "2.1.1.152 Precorrin-6A synthase (deacetylating)", + "2.1.1.153 Vitexin 2''-O-rhamnoside 7-O-methyltransferase", + "2.1.1.154 Isoliquiritigenin 2'-O-methyltransferase", + "2.1.1.155 Kaempferol 4'-O-methyltransferase", + "2.1.1.156 Glycine/sarcosine N-methyltransferase", + "2.1.1.157 Sarcosine/dimethylglycine N-methyltransferase", + "2.1.1.158 7-methylxanthosine synthase", + "2.1.1.159 Theobromine synthase", + "2.1.1.160 Caffeine synthase", + "2.1.1.161 Dimethylglycine N-methyltransferase", + "2.1.1.162 Glycine/sarcosine/dimethylglycine N-methyltransferase", + "2.1.1.163 Demethylmenaquinone methyltransferase", + "2.1.1.164 Demethylrebeccamycin-D-glucose O-methyltransferase", + "2.1.1.165 Methyl halide transferase", + "2.1.1.166 23S rRNA (uridine(2552)-2'-O-)-methyltransferase", + "2.1.1.167 27S pre-rRNA (guanosine(2922)-2'-O-)-methyltransferase", + "2.1.1.168 21S rRNA (uridine(2791)-2'-O-)-methyltransferase", + "2.1.1.169 Tricetin 3',4',5'-O-trimethyltransferase", + "2.1.1.170 16S rRNA (guanine(527)-N(7))-methyltransferase", + "2.1.1.171 16S rRNA (guanine(966)-N(2))-methyltransferase", + "2.1.1.172 16S rRNA (guanine(1207)-N(2))-methyltransferase", + "2.1.1.173 23S rRNA (guanine(2445)-N(2))-methyltransferase", + "2.1.1.174 23S rRNA (guanine(1835)-N(2))-methyltransferase", + "2.1.1.175 Tricin synthase", + "2.1.1.176 16S rRNA (cytosine(967)-C(5))-methyltransferase", + "2.1.1.177 23S rRNA (pseudouridine(1915)-N(3))-methyltransferase", + "2.1.1.178 16S rRNA (cytosine(1407)-C(5))-methyltransferase", + "2.1.1.179 16S rRNA (guanine(1405)-C(7))-methyltransferase", + "2.1.1.180 16S rRNA (adenine(1408)-N(1))-methyltransferase", + "2.1.1.181 23S rRNA (adenine(1618)-N(6))-methyltransferase", + "2.1.1.182 16S rRNA (adenine(1518)-N(6)/adenine(1519)-N(6))-dimethyltransferase", + "2.1.1.183 18S rRNA (adenine(1779)-N(6)/adenine(1780)-N(6))-dimethyltransferase", + "2.1.1.184 23S rRNA (adenine(2085)-N(6))-dimethyltransferase", + "2.1.1.185 23S rRNA (guanine(2251)-2'-O)-methyltransferase", + "2.1.1.186 23S rRNA (cytidine(2498)-2'-O)-methyltransferase", + "2.1.1.187 23S rRNA (guanine(745)-N(1))-methyltransferase", + "2.1.1.188 23S rRNA (guanine(748)-N(1))-methyltransferase", + "2.1.1.189 23S rRNA (uracil(747)-C(5))-methyltransferase", + "2.1.1.190 23S rRNA (uracil(1939)-C(5))-methyltransferase", + "2.1.1.191 23S rRNA (cytosine(1962)-C(5))-methyltransferase", + "2.1.1.192 23S rRNA (adenine(2503)-C(2))-methyltransferase", + "2.1.1.193 16S rRNA (uracil(1498)-N(3))-methyltransferase", + "2.1.1.194 23S rRNA (adenine(2503)-C(2),C(8))-methyltransferase", + "2.1.1.195 Cobalt-precorrin-5B (C(1))-methyltransferase", + "2.1.1.196 Cobalt-precorrin-7 (C(15))-methyltransferase (decarboxylating)", + "2.1.1.197 Malonyl-CoA O-methyltransferase", + "2.1.1.198 16S rRNA (cytidine(1402)-2'-O)-methyltransferase", + "2.1.1.199 16S rRNA (cytosine(1402)-N(4))-methyltransferase", + "2.1.1.200 tRNA (cytidine(32)/uridine(32)-2'-O)-methyltransferase", + "2.1.1.201 2-methoxy-6-polyprenyl-1,4-benzoquinol methylase", + "2.1.1.202 Multisite-specific tRNA:(cytosine-C(5))-methyltransferase", + "2.1.1.203 tRNA (cytosine(34)-C(5))-methyltransferase", + "2.1.1.204 tRNA (cytosine(38)-C(5))-methyltransferase", + "2.1.1.205 tRNA (cytidine(32)/guanosine(34)-2'-O)-methyltransferase", + "2.1.1.206 tRNA (cytidine(56)-2'-O)-methyltransferase", + "2.1.1.n1 Resorcinol O-methyltransferase", + "2.1.1.n2 tRNA (uridine(44)-2'-O-)-methyltransferase", + "2.1.1.n3 Selenocysteine Se-methyltransferase", + "2.1.1.n4 Thiocyanate methyltransferase", + "2.1.1.n5 N-terminal protein methyltransferase", + "2.1.1.n6 Geranyl diphosphate 2-C-methyltransferase", + "2.1.1.n7 5-pentadecatrienyl resorcinol O-methyltransferase", + "2.1.2.1 Glycine hydroxymethyltransferase", + "2.1.2.2 Phosphoribosylglycinamide formyltransferase", + "2.1.2.3 Phosphoribosylaminoimidazolecarboxamide formyltransferase", + "2.1.2.4 Glycine formimidoyltransferase", + "2.1.2.5 Glutamate formimidoyltransferase", + "2.1.2.7 D-alanine 2-hydroxymethyltransferase", + "2.1.2.8 Deoxycytidylate 5-hydroxymethyltransferase", + "2.1.2.9 Methionyl-tRNA formyltransferase", + "2.1.2.10 Aminomethyltransferase", + "2.1.2.11 3-methyl-2-oxobutanoate hydroxymethyltransferase", + "2.1.2.13 UDP-4-amino-4-deoxy-L-arabinose formyltransferase", + "2.1.3.1 Methylmalonyl-CoA carboxytransferase", + "2.1.3.2 Aspartate carbamoyltransferase", + "2.1.3.3 Ornithine carbamoyltransferase", + "2.1.3.5 Oxamate carbamoyltransferase", + "2.1.3.6 Putrescine carbamoyltransferase", + "2.1.3.7 3-hydroxymethylcephem carbamoyltransferase", + "2.1.3.8 Lysine carbamoyltransferase", + "2.1.3.9 N-acetylornithine carbamoyltransferase", + "2.1.3.10 Malonyl-S-ACP:biotin-protein carboxyltransferase", + "2.1.3.11 N-succinylornithine carbamoyltransferase", + "2.1.4.1 Glycine amidinotransferase", + "2.1.4.2 Scyllo-inosamine-4-phosphate amidinotransferase", + "2.2.1.1 Transketolase", + "2.2.1.2 Transaldolase", + "2.2.1.3 Formaldehyde transketolase", + "2.2.1.4 Acetoin--ribose-5-phosphate transaldolase", + "2.2.1.5 2-hydroxy-3-oxoadipate synthase", + "2.2.1.6 Acetolactate synthase", + "2.2.1.7 1-deoxy-D-xylulose-5-phosphate synthase", + "2.2.1.8 Fluorothreonine transaldolase", + "2.2.1.9 2-succinyl-5-enolpyruvyl-6-hydroxy-3-cyclohexene-1-carboxylic-acid synthase", + "2.3.1.1 Amino-acid N-acetyltransferase", + "2.3.1.2 Imidazole N-acetyltransferase", + "2.3.1.3 Glucosamine N-acetyltransferase", + "2.3.1.4 Glucosamine-phosphate N-acetyltransferase", + "2.3.1.5 Arylamine N-acetyltransferase", + "2.3.1.6 Choline O-acetyltransferase", + "2.3.1.7 Carnitine O-acetyltransferase", + "2.3.1.8 Phosphate acetyltransferase", + "2.3.1.9 Acetyl-CoA C-acetyltransferase", + "2.3.1.10 Hydrogen-sulfide S-acetyltransferase", + "2.3.1.11 Thioethanolamine S-acetyltransferase", + "2.3.1.12 Dihydrolipoyllysine-residue acetyltransferase", + "2.3.1.13 Glycine N-acyltransferase", + "2.3.1.14 Glutamine N-phenylacetyltransferase", + "2.3.1.15 Glycerol-3-phosphate O-acyltransferase", + "2.3.1.16 Acetyl-CoA C-acyltransferase", + "2.3.1.17 Aspartate N-acetyltransferase", + "2.3.1.18 Galactoside O-acetyltransferase", + "2.3.1.19 Phosphate butyryltransferase", + "2.3.1.20 Diacylglycerol O-acyltransferase", + "2.3.1.21 Carnitine O-palmitoyltransferase", + "2.3.1.22 2-acylglycerol O-acyltransferase", + "2.3.1.23 1-acylglycerophosphocholine O-acyltransferase", + "2.3.1.24 Sphingosine N-acyltransferase", + "2.3.1.25 Plasmalogen synthase", + "2.3.1.26 Sterol O-acyltransferase", + "2.3.1.27 Cortisol O-acetyltransferase", + "2.3.1.28 Chloramphenicol O-acetyltransferase", + "2.3.1.29 Glycine C-acetyltransferase", + "2.3.1.30 Serine O-acetyltransferase", + "2.3.1.31 Homoserine O-acetyltransferase", + "2.3.1.32 Lysine N-acetyltransferase", + "2.3.1.33 Histidine N-acetyltransferase", + "2.3.1.34 D-tryptophan N-acetyltransferase", + "2.3.1.35 Glutamate N-acetyltransferase", + "2.3.1.36 D-amino-acid N-acetyltransferase", + "2.3.1.37 5-aminolevulinate synthase", + "2.3.1.38 [Acyl-carrier-protein] S-acetyltransferase", + "2.3.1.39 [Acyl-carrier-protein] S-malonyltransferase", + "2.3.1.40 Acyl-[acyl-carrier-protein]--phospholipid O-acyltransferase", + "2.3.1.41 Beta-ketoacyl-acyl-carrier-protein synthase I", + "2.3.1.42 Glycerone-phosphate O-acyltransferase", + "2.3.1.43 Phosphatidylcholine--sterol O-acyltransferase", + "2.3.1.44 N-acetylneuraminate 4-O-acetyltransferase", + "2.3.1.45 N-acetylneuraminate 7-O(or 9-O)-acetyltransferase", + "2.3.1.46 Homoserine O-succinyltransferase", + "2.3.1.47 8-amino-7-oxononanoate synthase", + "2.3.1.48 Histone acetyltransferase", + "2.3.1.49 Deacetyl-[citrate-(pro-3S)-lyase] S-acetyltransferase", + "2.3.1.50 Serine C-palmitoyltransferase", + "2.3.1.51 1-acylglycerol-3-phosphate O-acyltransferase", + "2.3.1.52 2-acylglycerol-3-phosphate O-acyltransferase", + "2.3.1.53 Phenylalanine N-acetyltransferase", + "2.3.1.54 Formate C-acetyltransferase", + "2.3.1.56 Aromatic-hydroxylamine O-acetyltransferase", + "2.3.1.57 Diamine N-acetyltransferase", + "2.3.1.58 2,3-diaminopropionate N-oxalyltransferase", + "2.3.1.59 Gentamicin 2'-N-acetyltransferase", + "2.3.1.60 Gentamicin 3'-N-acetyltransferase", + "2.3.1.61 Dihydrolipoyllysine-residue succinyltransferase", + "2.3.1.62 2-acylglycerophosphocholine O-acyltransferase", + "2.3.1.63 1-alkylglycerophosphocholine O-acyltransferase", + "2.3.1.64 Agmatine N(4)-coumaroyltransferase", + "2.3.1.65 Bile acid-CoA:amino acid N-acyltransferase", + "2.3.1.66 Leucine N-acetyltransferase", + "2.3.1.67 1-alkylglycerophosphocholine O-acetyltransferase", + "2.3.1.68 Glutamine N-acyltransferase", + "2.3.1.69 Monoterpenol O-acetyltransferase", + "2.3.1.71 Glycine N-benzoyltransferase", + "2.3.1.72 Indoleacetylglucose--inositol O-acyltransferase", + "2.3.1.73 Diacylglycerol--sterol O-acyltransferase", + "2.3.1.74 Naringenin-chalcone synthase", + "2.3.1.75 Long-chain-alcohol O-fatty-acyltransferase", + "2.3.1.76 Retinol O-fatty-acyltransferase", + "2.3.1.77 Triacylglycerol--sterol O-acyltransferase", + "2.3.1.78 Heparan-alpha-glucosaminide N-acetyltransferase", + "2.3.1.79 Maltose O-acetyltransferase", + "2.3.1.80 Cysteine-S-conjugate N-acetyltransferase", + "2.3.1.81 Aminoglycoside N(3')-acetyltransferase", + "2.3.1.82 Aminoglycoside N(6')-acetyltransferase", + "2.3.1.83 Phosphatidylcholine--dolichol O-acyltransferase", + "2.3.1.84 Alcohol O-acetyltransferase", + "2.3.1.85 Fatty-acid synthase", + "2.3.1.86 Fatty-acyl-CoA synthase", + "2.3.1.87 Aralkylamine N-acetyltransferase", + "2.3.1.88 Peptide alpha-N-acetyltransferase", + "2.3.1.89 Tetrahydrodipicolinate N-acetyltransferase", + "2.3.1.90 Beta-glucogallin O-galloyltransferase", + "2.3.1.91 Sinapoylglucose--choline O-sinapoyltransferase", + "2.3.1.92 Sinapoylglucose--malate O-sinapoyltransferase", + "2.3.1.93 13-hydroxylupinine O-tigloyltransferase", + "2.3.1.94 6-deoxyerythronolide-B synthase", + "2.3.1.95 Trihydroxystilbene synthase", + "2.3.1.96 Glycoprotein N-palmitoyltransferase", + "2.3.1.97 Glycylpeptide N-tetradecanoyltransferase", + "2.3.1.98 Chlorogenate--glucarate O-hydroxycinnamoyltransferase", + "2.3.1.99 Quinate O-hydroxycinnamoyltransferase", + "2.3.1.100 [Myelin-proteolipid] O-palmitoyltransferase", + "2.3.1.101 Formylmethanofuran--tetrahydromethanopterin N-formyltransferase", + "2.3.1.102 N(6)-hydroxylysine O-acetyltransferase", + "2.3.1.103 Sinapoylglucose--sinapoylglucose O-sinapoyltransferase", + "2.3.1.104 1-alkenylglycerophosphocholine O-acyltransferase", + "2.3.1.105 Alkylglycerophosphate 2-O-acetyltransferase", + "2.3.1.106 Tartronate O-hydroxycinnamoyltransferase", + "2.3.1.107 Deacetylvindoline O-acetyltransferase", + "2.3.1.108 Alpha-tubulin N-acetyltransferase", + "2.3.1.109 Arginine N-succinyltransferase", + "2.3.1.110 Tyramine N-feruloyltransferase", + "2.3.1.111 Mycocerosate synthase", + "2.3.1.112 D-tryptophan N-malonyltransferase", + "2.3.1.113 Anthranilate N-malonyltransferase", + "2.3.1.114 3,4-dichloroaniline N-malonyltransferase", + "2.3.1.115 Isoflavone-7-O-beta-glucoside 6''-O-malonyltransferase", + "2.3.1.116 Flavonol-3-O-beta-glucoside O-malonyltransferase", + "2.3.1.117 2,3,4,5-tetrahydropyridine-2,6-dicarboxylate N-succinyltransferase", + "2.3.1.118 N-hydroxyarylamine O-acetyltransferase", + "2.3.1.119 Icosanoyl-CoA synthase", + "2.3.1.121 1-alkenylglycerophosphoethanolamine O-acyltransferase", + "2.3.1.122 Trehalose O-mycolyltransferase", + "2.3.1.123 Dolichol O-acyltransferase", + "2.3.1.125 1-alkyl-2-acetylglycerol O-acyltransferase", + "2.3.1.126 Isocitrate O-dihydroxycinnamoyltransferase", + "2.3.1.127 Ornithine N-benzoyltransferase", + "2.3.1.128 Ribosomal-protein-alanine N-acetyltransferase", + "2.3.1.129 Acyl-[acyl-carrier-protein]--UDP-N-acetylglucosamine O-acyltransferase", + "2.3.1.130 Galactarate O-hydroxycinnamoyltransferase", + "2.3.1.131 Glucarate O-hydroxycinnamoyltransferase", + "2.3.1.132 Glucarolactone O-hydroxycinnamoyltransferase", + "2.3.1.133 Shikimate O-hydroxycinnamoyltransferase", + "2.3.1.134 Galactolipid O-acyltransferase", + "2.3.1.135 Phosphatidylcholine--retinol O-acyltransferase", + "2.3.1.136 Polysialic-acid O-acetyltransferase", + "2.3.1.137 Carnitine O-octanoyltransferase", + "2.3.1.138 Putrescine N-hydroxycinnamoyltransferase", + "2.3.1.139 Ecdysone O-acyltransferase", + "2.3.1.140 Rosmarinate synthase", + "2.3.1.141 Galactosylacylglycerol O-acyltransferase", + "2.3.1.142 Glycoprotein O-fatty-acyltransferase", + "2.3.1.143 Beta-glucogallin--tetrakisgalloylglucose O-galloyltransferase", + "2.3.1.144 Anthranilate N-benzoyltransferase", + "2.3.1.145 Piperidine N-piperoyltransferase", + "2.3.1.146 Pinosylvin synthase", + "2.3.1.147 Glycerophospholipid arachidonoyl-transferase (CoA-independent)", + "2.3.1.148 Glycerophospholipid acyltransferase (CoA-dependent)", + "2.3.1.149 Platelet-activating factor acetyltransferase", + "2.3.1.150 Salutaridinol 7-O-acetyltransferase", + "2.3.1.151 Benzophenone synthase", + "2.3.1.152 Alcohol O-cinnamoyltransferase", + "2.3.1.153 Anthocyanin 5-aromatic acyltransferase", + "2.3.1.154 Propionyl-CoA C(2)-trimethyltridecanoyltransferase", + "2.3.1.155 Acetyl-CoA C-myristoyltransferase", + "2.3.1.156 Phloroisovalerophenone synthase", + "2.3.1.157 Glucosamine-1-phosphate N-acetyltransferase", + "2.3.1.158 Phospholipid:diacylglycerol acyltransferase", + "2.3.1.159 Acridone synthase", + "2.3.1.160 Vinorine synthase", + "2.3.1.161 Lovastatin nonaketide synthase", + "2.3.1.162 Taxadien-5-alpha-ol O-acetyltransferase", + "2.3.1.163 10-hydroxytaxane O-acetyltransferase", + "2.3.1.164 Isopenicillin-N N-acyltransferase", + "2.3.1.165 6-methylsalicylic acid synthase", + "2.3.1.166 2-alpha-hydroxytaxane 2-O-benzoyltransferase", + "2.3.1.167 10-deacetylbaccatin III 10-O-acetyltransferase", + "2.3.1.168 Dihydrolipoyllysine-residue (2-methylpropanoyl)transferase", + "2.3.1.169 CO-methylating acetyl-CoA synthase", + "2.3.1.170 6'-deoxychalcone synthase", + "2.3.1.171 Anthocyanin 6''-O-malonyltransferase", + "2.3.1.172 Anthocyanin 5-O-glucoside 6'''-O-malonyltransferase", + "2.3.1.173 Flavonol-3-O-triglucoside O-coumaroyltransferase", + "2.3.1.174 3-oxoadipyl-CoA thiolase", + "2.3.1.175 Deacetylcephalosporin-C acetyltransferase", + "2.3.1.176 Propanoyl-CoA C-acyltransferase", + "2.3.1.177 Biphenyl synthase", + "2.3.1.178 Diaminobutyrate acetyltransferase", + "2.3.1.179 Beta-ketoacyl-acyl-carrier-protein synthase II", + "2.3.1.180 Beta-ketoacyl-acyl-carrier-protein synthase III", + "2.3.1.181 Lipoyl(octanoyl) transferase", + "2.3.1.182 (R)-citramalate synthase", + "2.3.1.183 Phosphinothricin acetyltransferase", + "2.3.1.184 Acyl-homoserine-lactone synthase", + "2.3.1.185 Tropine acyltransferase", + "2.3.1.186 Pseudotropine acyltransferase", + "2.3.1.187 Acetyl-S-ACP:malonate ACP transferase", + "2.3.1.188 Omega-hydroxypalmitate O-feruloyl transferase", + "2.3.1.189 Mycothiol synthase", + "2.3.1.190 Acetoin dehydrogenase", + "2.3.1.191 UDP-3-O-(3-hydroxymyristoyl)glucosamine N-acyltransferase", + "2.3.1.192 Glycine N-phenylacetyltransferase", + "2.3.1.193 tRNA(Met) cytidine acetyltransferase", + "2.3.1.194 Acetoacetyl-CoA synthase", + "2.3.1.195 (Z)-3-hexen-1-ol acetyltransferase", + "2.3.2.1 D-glutamyltransferase", + "2.3.2.2 Gamma-glutamyltransferase", + "2.3.2.3 Lysyltransferase", + "2.3.2.4 Gamma-glutamylcyclotransferase", + "2.3.2.5 Glutaminyl-peptide cyclotransferase", + "2.3.2.6 Leucyltransferase", + "2.3.2.7 Aspartyltransferase", + "2.3.2.8 Arginyltransferase", + "2.3.2.9 Agaritine gamma-glutamyltransferase", + "2.3.2.10 UDP-N-acetylmuramoylpentapeptide-lysine N(6)-alanyltransferase", + "2.3.2.11 Alanylphosphatidylglycerol synthase", + "2.3.2.12 Peptidyltransferase", + "2.3.2.13 Protein-glutamine gamma-glutamyltransferase", + "2.3.2.14 D-alanine gamma-glutamyltransferase", + "2.3.2.15 Glutathione gamma-glutamylcysteinyltransferase", + "2.3.2.16 Lipid II:glycine glycyltransferase", + "2.3.2.17 N-acetylmuramoyl-L-alanyl-D-glutamyl-L-lysyl-(N(6)-glycyl)-D-alanyl-D-alanine-diphosphoundecaprenyl-N-acetylglucosamine:glycine glycyltransferase", + "2.3.2.18 N-acetylmuramoyl-L-alanyl-D-glutamyl-L-lysyl-(N(6)-triglycine)-D-alanyl-D-alanine-diphosphoundecaprenyl-N-acetylglucosamine:glycine glycyltransferase", + "2.3.3.1 Citrate (Si)-synthase", + "2.3.3.2 Decylcitrate synthase", + "2.3.3.3 Citrate (Re)-synthase", + "2.3.3.4 Decylhomocitrate synthase", + "2.3.3.5 2-methylcitrate synthase", + "2.3.3.6 2-ethylmalate synthase", + "2.3.3.7 3-ethylmalate synthase", + "2.3.3.8 ATP citrate synthase", + "2.3.3.9 Malate synthase", + "2.3.3.10 Hydroxymethylglutaryl-CoA synthase", + "2.3.3.11 2-hydroxyglutarate synthase", + "2.3.3.12 3-propylmalate synthase", + "2.3.3.13 2-isopropylmalate synthase", + "2.3.3.14 Homocitrate synthase", + "2.3.3.15 Sulfoacetaldehyde acetyltransferase", + "2.4.1.1 Phosphorylase", + "2.4.1.2 Dextrin dextranase", + "2.4.1.4 Amylosucrase", + "2.4.1.5 Dextransucrase", + "2.4.1.7 Sucrose phosphorylase", + "2.4.1.8 Maltose phosphorylase", + "2.4.1.9 Inulosucrase", + "2.4.1.10 Levansucrase", + "2.4.1.11 Glycogen(starch) synthase", + "2.4.1.12 Cellulose synthase (UDP-forming)", + "2.4.1.13 Sucrose synthase", + "2.4.1.14 Sucrose-phosphate synthase", + "2.4.1.15 Alpha,alpha-trehalose-phosphate synthase (UDP-forming)", + "2.4.1.16 Chitin synthase", + "2.4.1.17 Glucuronosyltransferase", + "2.4.1.18 1,4-alpha-glucan branching enzyme", + "2.4.1.19 Cyclomaltodextrin glucanotransferase", + "2.4.1.20 Cellobiose phosphorylase", + "2.4.1.21 Starch synthase", + "2.4.1.22 Lactose synthase", + "2.4.1.23 Sphingosine beta-galactosyltransferase", + "2.4.1.24 1,4-alpha-glucan 6-alpha-glucosyltransferase", + "2.4.1.25 4-alpha-glucanotransferase", + "2.4.1.26 DNA alpha-glucosyltransferase", + "2.4.1.27 DNA beta-glucosyltransferase", + "2.4.1.28 Glucosyl-DNA beta-glucosyltransferase", + "2.4.1.29 Cellulose synthase (GDP-forming)", + "2.4.1.30 1,3-beta-oligoglucan phosphorylase", + "2.4.1.31 Laminaribiose phosphorylase", + "2.4.1.32 Glucomannan 4-beta-mannosyltransferase", + "2.4.1.33 Alginate synthase", + "2.4.1.34 1,3-beta-glucan synthase", + "2.4.1.35 Phenol beta-glucosyltransferase", + "2.4.1.36 Alpha,alpha-trehalose-phosphate synthase (GDP-forming)", + "2.4.1.37 Fucosylgalactoside 3-alpha-galactosyltransferase", + "2.4.1.38 Beta-N-acetylglucosaminylglycopeptide beta-1,4-galactosyltransferase", + "2.4.1.39 Steroid N-acetylglucosaminyltransferase", + "2.4.1.40 Glycoprotein-fucosylgalactoside alpha-N-acetylgalactosaminyltransferase", + "2.4.1.41 Polypeptide N-acetylgalactosaminyltransferase", + "2.4.1.43 Polygalacturonate 4-alpha-galacturonosyltransferase", + "2.4.1.44 Lipopolysaccharide 3-alpha-galactosyltransferase", + "2.4.1.45 2-hydroxyacylsphingosine 1-beta-galactosyltransferase", + "2.4.1.46 Monogalactosyldiacylglycerol synthase", + "2.4.1.47 N-acylsphingosine galactosyltransferase", + "2.4.1.48 Heteroglycan alpha-mannosyltransferase", + "2.4.1.49 Cellodextrin phosphorylase", + "2.4.1.50 Procollagen galactosyltransferase", + "2.4.1.52 Poly(glycerol-phosphate) alpha-glucosyltransferase", + "2.4.1.53 Poly(ribitol-phosphate) beta-glucosyltransferase", + "2.4.1.54 Undecaprenyl-phosphate mannosyltransferase", + "2.4.1.56 Lipopolysaccharide N-acetylglucosaminyltransferase", + "2.4.1.57 Phosphatidylinositol alpha-mannosyltransferase", + "2.4.1.58 Lipopolysaccharide glucosyltransferase I", + "2.4.1.60 Abequosyltransferase", + "2.4.1.62 Ganglioside galactosyltransferase", + "2.4.1.63 Linamarin synthase", + "2.4.1.64 Alpha,alpha-trehalose phosphorylase", + "2.4.1.65 3-galactosyl-N-acetylglucosaminide 4-alpha-L-fucosyltransferase", + "2.4.1.66 Procollagen glucosyltransferase", + "2.4.1.67 Galactinol--raffinose galactosyltransferase", + "2.4.1.68 Glycoprotein 6-alpha-L-fucosyltransferase", + "2.4.1.69 Galactoside 2-alpha-L-fucosyltransferase", + "2.4.1.70 Poly(ribitol-phosphate) N-acetylglucosaminyltransferase", + "2.4.1.71 Arylamine glucosyltransferase", + "2.4.1.73 Lipopolysaccharide glucosyltransferase II", + "2.4.1.74 Glycosaminoglycan galactosyltransferase", + "2.4.1.78 Phosphopolyprenol glucosyltransferase", + "2.4.1.79 Globotriaosylceramide 3-beta-N-acetylgalactosaminyltransferase", + "2.4.1.80 Ceramide glucosyltransferase", + "2.4.1.81 Flavone 7-O-beta-glucosyltransferase", + "2.4.1.82 Galactinol--sucrose galactosyltransferase", + "2.4.1.83 Dolichyl-phosphate beta-D-mannosyltransferase", + "2.4.1.85 Cyanohydrin beta-glucosyltransferase", + "2.4.1.86 Glucosaminylgalactosylglucosylceramide beta-galactosyltransferase", + "2.4.1.87 N-acetyllactosaminide 3-alpha-galactosyltransferase", + "2.4.1.88 Globoside alpha-N-acetylgalactosaminyltransferase", + "2.4.1.90 N-acetyllactosamine synthase", + "2.4.1.91 Flavonol 3-O-glucosyltransferase", + "2.4.1.92 (N-acetylneuraminyl)-galactosylglucosylceramide N-acetylgalactosaminyltransferase", + "2.4.1.94 Protein N-acetylglucosaminyltransferase", + "2.4.1.95 Bilirubin-glucuronoside glucuronosyltransferase", + "2.4.1.96 sn-glycerol-3-phosphate 1-galactosyltransferase", + "2.4.1.97 1,3-beta-D-glucan phosphorylase", + "2.4.1.99 Sucrose:sucrose fructosyltransferase", + "2.4.1.100 2,1-fructan:2,1-fructan 1-fructosyltransferase", + "2.4.1.101 Alpha-1,3-mannosyl-glycoprotein 2-beta-N-acetylglucosaminyltransferase", + "2.4.1.102 Beta-1,3-galactosyl-O-glycosyl-glycoprotein beta-1,6-N-acetylglucosaminyltransferase", + "2.4.1.103 Alizarin 2-beta-glucosyltransferase", + "2.4.1.104 o-dihydroxycoumarin 7-O-glucosyltransferase", + "2.4.1.105 Vitexin beta-glucosyltransferase", + "2.4.1.106 Isovitexin beta-glucosyltransferase", + "2.4.1.109 Dolichyl-phosphate-mannose-protein mannosyltransferase", + "2.4.1.110 tRNA-queuosine beta-mannosyltransferase", + "2.4.1.111 Coniferyl-alcohol glucosyltransferase", + "2.4.1.113 Alpha-1,4-glucan-protein synthase (ADP-forming)", + "2.4.1.114 2-coumarate O-beta-glucosyltransferase", + "2.4.1.115 Anthocyanidin 3-O-glucosyltransferase", + "2.4.1.116 Cyanidin 3-O-rutinoside 5-O-glucosyltransferase", + "2.4.1.117 Dolichyl-phosphate beta-glucosyltransferase", + "2.4.1.118 Cytokinin 7-beta-glucosyltransferase", + "2.4.1.119 Dolichyl-diphosphooligosaccharide--protein glycotransferase", + "2.4.1.120 Sinapate 1-glucosyltransferase", + "2.4.1.121 Indole-3-acetate beta-glucosyltransferase", + "2.4.1.122 Glycoprotein-N-acetylgalactosamine 3-beta-galactosyltransferase", + "2.4.1.123 Inositol 3-alpha-galactosyltransferase", + "2.4.1.125 Sucrose--1,6-alpha-glucan 3(6)-alpha-glucosyltransferase", + "2.4.1.126 Hydroxycinnamate 4-beta-glucosyltransferase", + "2.4.1.127 Monoterpenol beta-glucosyltransferase", + "2.4.1.128 Scopoletin glucosyltransferase", + "2.4.1.129 Peptidoglycan glycosyltransferase", + "2.4.1.130 Dolichyl-phosphate-mannose--glycolipid alpha-mannosyltransferase", + "2.4.1.131 Glycolipid 2-alpha-mannosyltransferase", + "2.4.1.132 Glycolipid 3-alpha-mannosyltransferase", + "2.4.1.133 Xylosylprotein 4-beta-galactosyltransferase", + "2.4.1.134 Galactosylxylosylprotein 3-beta-galactosyltransferase", + "2.4.1.135 Galactosylgalactosylxylosylprotein 3-beta-glucuronosyltransferase", + "2.4.1.136 Gallate 1-beta-glucosyltransferase", + "2.4.1.137 sn-glycerol-3-phosphate 2-alpha-galactosyltransferase", + "2.4.1.138 Mannotetraose 2-alpha-N-acetylglucosaminyltransferase", + "2.4.1.139 Maltose synthase", + "2.4.1.140 Alternansucrase", + "2.4.1.141 N-acetylglucosaminyldiphosphodolichol N-acetylglucosaminyltransferase", + "2.4.1.142 Chitobiosyldiphosphodolichol beta-mannosyltransferase", + "2.4.1.143 Alpha-1,6-mannosyl-glycoprotein 2-beta-N-acetylglucosaminyltransferase", + "2.4.1.144 Beta-1,4-mannosyl-glycoprotein 4-beta-N-acetylglucosaminyltransferase", + "2.4.1.145 Alpha-1,3-mannosyl-glycoprotein 4-beta-N-acetylglucosaminyltransferase", + "2.4.1.146 Beta-1,3-galactosyl-O-glycosyl-glycoprotein beta-1,3-N-acetylglucosaminyltransferase", + "2.4.1.147 Acetylgalactosaminyl-O-glycosyl-glycoprotein beta-1,3-N-acetylglucosaminyltransferase", + "2.4.1.148 Acetylgalactosaminyl-O-glycosyl-glycoprotein beta-1,6-N-acetylglucosaminyltransferase", + "2.4.1.149 N-acetyllactosaminide beta-1,3-N-acetylglucosaminyltransferase", + "2.4.1.150 N-acetyllactosaminide beta-1,6-N-acetylglucosaminyl-transferase", + "2.4.1.152 4-galactosyl-N-acetylglucosaminide 3-alpha-L-fucosyltransferase", + "2.4.1.153 Dolichyl-phosphate alpha-N-acetylglucosaminyltransferase", + "2.4.1.155 Alpha-1,6-mannosyl-glycoprotein 6-beta-N-acetylglucosaminyltransferase", + "2.4.1.156 Indolylacetyl-myo-inositol galactosyltransferase", + "2.4.1.157 1,2-diacylglycerol 3-glucosyltransferase", + "2.4.1.158 13-hydroxydocosanoate 13-beta-glucosyltransferase", + "2.4.1.159 Flavonol-3-O-glucoside L-rhamnosyltransferase", + "2.4.1.160 Pyridoxine 5'-O-beta-D-glucosyltransferase", + "2.4.1.161 Oligosaccharide 4-alpha-D-glucosyltransferase", + "2.4.1.162 Aldose beta-D-fructosyltransferase", + "2.4.1.163 Beta-galactosyl-N-acetylglucosaminylgalactosylglucosyl-ceramide beta-1,3-acetylglucosaminyltransferase", + "2.4.1.164 Galactosyl-N-acetylglucosaminylgalactosylglucosyl-ceramide beta-1,6-N-acetylglucosaminyltransferase", + "2.4.1.165 N-acetylneuraminylgalactosylglucosylceramide beta-1,4-N-acetylgalactosaminyltransferase", + "2.4.1.166 Raffinose--raffinose alpha-galactosyltransferase", + "2.4.1.167 Sucrose 6(F)-alpha-galactosyltransferase", + "2.4.1.168 Xyloglucan 4-glucosyltransferase", + "2.4.1.170 Isoflavone 7-O-glucosyltransferase", + "2.4.1.171 Methyl-ONN-azoxymethanol beta-D-glucosyltransferase", + "2.4.1.172 Salicyl-alcohol beta-D-glucosyltransferase", + "2.4.1.173 Sterol 3-beta-glucosyltransferase", + "2.4.1.174 Glucuronylgalactosylproteoglycan 4-beta-N-acetylgalactosaminyltransferase", + "2.4.1.175 Glucuronosyl-N-acetylgalactosaminyl-proteoglycan 4-beta-N-acetylgalactosaminyltransferase", + "2.4.1.176 Gibberellin beta-D-glucosyltransferase", + "2.4.1.177 Cinnamate beta-D-glucosyltransferase", + "2.4.1.178 Hydroxymandelonitrile glucosyltransferase", + "2.4.1.179 Lactosylceramide beta-1,3-galactosyltransferase", + "2.4.1.180 Lipopolysaccharide N-acetylmannosaminouronosyltransferase", + "2.4.1.181 Hydroxyanthraquinone glucosyltransferase", + "2.4.1.182 Lipid-A-disaccharide synthase", + "2.4.1.183 Alpha-1,3-glucan synthase", + "2.4.1.184 Galactolipid galactosyltransferase", + "2.4.1.185 Flavanone 7-O-beta-glucosyltransferase", + "2.4.1.186 Glycogenin glucosyltransferase", + "2.4.1.187 N-acetylglucosaminyldiphosphoundecaprenol N-acetyl-beta-D-mannosaminyltransferase", + "2.4.1.188 N-acetylglucosaminyldiphosphoundecaprenol glucosyltransferase", + "2.4.1.189 Luteolin 7-O-glucuronosyltransferase", + "2.4.1.190 Luteolin-7-O-glucuronide 2''-O-glucuronosyltransferase", + "2.4.1.191 Luteolin-7-O-diglucuronide 4'-O-glucuronosyltransferase", + "2.4.1.192 Nuatigenin 3-beta-glucosyltransferase", + "2.4.1.193 Sarsapogenin 3-beta-glucosyltransferase", + "2.4.1.194 4-hydroxybenzoate 4-O-beta-D-glucosyltransferase", + "2.4.1.195 N-hydroxythioamide S-beta-glucosyltransferase", + "2.4.1.196 Nicotinate glucosyltransferase", + "2.4.1.197 High-mannose-oligosaccharide beta-1,4-N-acetylglucosaminyltransferase", + "2.4.1.198 Phosphatidylinositol N-acetylglucosaminyltransferase", + "2.4.1.199 Beta-mannosylphosphodecaprenol--mannooligosaccharide 6-mannosyltransferase", + "2.4.1.201 Alpha-1,6-mannosyl-glycoprotein 4-beta-N-acetylglucosaminyltransferase", + "2.4.1.202 2,4-dihydroxy-7-methoxy-2H-1,4-benzoxazin-3(4H)-one 2-D-glucosyltransferase", + "2.4.1.203 Trans-zeatin O-beta-D-glucosyltransferase", + "2.4.1.205 Galactogen 6-beta-galactosyltransferase", + "2.4.1.206 Lactosylceramide 1,3-N-acetyl-beta-D-glucosaminyltransferase", + "2.4.1.207 Xyloglucan:xyloglucosyl transferase", + "2.4.1.208 Diglucosyl diacylglycerol synthase", + "2.4.1.209 Cis-p-coumarate glucosyltransferase", + "2.4.1.210 Limonoid glucosyltransferase", + "2.4.1.211 1,3-beta-galactosyl-N-acetylhexosamine phosphorylase", + "2.4.1.212 Hyaluronan synthase", + "2.4.1.213 Glucosylglycerol-phosphate synthase", + "2.4.1.214 Glycoprotein 3-alpha-L-fucosyltransferase", + "2.4.1.215 Cis-zeatin O-beta-D-glucosyltransferase", + "2.4.1.216 Trehalose 6-phosphate phosphorylase", + "2.4.1.217 Mannosyl-3-phosphoglycerate synthase", + "2.4.1.218 Hydroquinone glucosyltransferase", + "2.4.1.219 Vomilenine glucosyltransferase", + "2.4.1.220 Indoxyl-UDPG glucosyltransferase", + "2.4.1.221 Peptide-O-fucosyltransferase", + "2.4.1.222 O-fucosylpeptide 3-beta-N-acetylglucosaminyltransferase", + "2.4.1.223 Glucuronyl-galactosyl-proteoglycan 4-alpha-N-acetylglucosaminyltransferase", + "2.4.1.224 Glucuronosyl-N-acetylglucosaminyl-proteoglycan 4-alpha-N-acetylglucosaminyltransferase", + "2.4.1.225 N-acetylglucosaminyl-proteoglycan 4-beta-glucuronosyltransferase", + "2.4.1.226 N-acetylgalactosaminyl-proteoglycan 3-beta-glucuronosyltransferase", + "2.4.1.227 Undecaprenyldiphospho-muramoylpentapeptide beta-N-acetylglucosaminyltransferase", + "2.4.1.228 Lactosylceramide 4-alpha-galactosyltransferase", + "2.4.1.229 [Skp1-protein]-hydroxyproline N-acetylglucosaminyltransferase", + "2.4.1.230 Kojibiose phosphorylase", + "2.4.1.231 Alpha,alpha-trehalose phosphorylase (configuration-retaining)", + "2.4.1.232 Initiation-specific alpha-1,6-mannosyltransferase", + "2.4.1.234 Kaempferol 3-O-galactosyltransferase", + "2.4.1.236 Flavanone 7-O-glucoside 2''-O-beta-L-rhamnosyltransferase", + "2.4.1.237 Flavonol 7-O-beta-glucosyltransferase", + "2.4.1.238 Anthocyanin 3'-O-beta-glucosyltransferase", + "2.4.1.239 Flavonol-3-O-glucoside glucosyltransferase", + "2.4.1.240 Flavonol-3-O-glycoside glucosyltransferase", + "2.4.1.241 Digalactosyldiacylglycerol synthase", + "2.4.1.242 NDP-glucose--starch glucosyltransferase", + "2.4.1.243 6(G)-fructosyltransferase", + "2.4.1.244 N-acetyl-beta-glucosaminyl-glycoprotein 4-beta-N-acetylgalactosaminyltransferase", + "2.4.1.245 Alpha,alpha-trehalose synthase", + "2.4.1.246 Mannosylfructose-phosphate synthase", + "2.4.1.247 Beta-D-galactosyl-(1->4)-L-rhamnose phosphorylase", + "2.4.1.248 Cycloisomaltooligosaccharide glucanotransferase", + "2.4.1.249 Delphinidin 3',5'-O-glucosyltransferase", + "2.4.1.250 D-inositol-3-phosphate glycosyltransferase", + "2.4.1.251 GlcA-beta-(1->2)-D-Man-alpha-(1->3)-D-Glc-beta-(1->4)-D-Glc-alpha-1-diphospho-di-trans,octa-cis-undecaprenol 4-beta-mannosyltransferase", + "2.4.1.252 GDP-mannose:cellobiosyl-diphosphopolyprenol alpha-mannosyltransferase", + "2.4.1.253 Baicalein 7-O-glucuronosyltransferase", + "2.4.1.254 Cyanidin-3-O-glucoside 2-O-glucuronosyltransferase", + "2.4.1.255 Protein O-GlcNAc transferase", + "2.4.1.n2 Loliose synthase", + "2.4.2.1 Purine-nucleoside phosphorylase", + "2.4.2.2 Pyrimidine-nucleoside phosphorylase", + "2.4.2.3 Uridine phosphorylase", + "2.4.2.4 Thymidine phosphorylase", + "2.4.2.5 Nucleoside ribosyltransferase", + "2.4.2.6 Nucleoside deoxyribosyltransferase", + "2.4.2.7 Adenine phosphoribosyltransferase", + "2.4.2.8 Hypoxanthine phosphoribosyltransferase", + "2.4.2.9 Uracil phosphoribosyltransferase", + "2.4.2.10 Orotate phosphoribosyltransferase", + "2.4.2.11 Nicotinate phosphoribosyltransferase", + "2.4.2.12 Nicotinamide phosphoribosyltransferase", + "2.4.2.14 Amidophosphoribosyltransferase", + "2.4.2.15 Guanosine phosphorylase", + "2.4.2.16 Urate-ribonucleotide phosphorylase", + "2.4.2.17 ATP phosphoribosyltransferase", + "2.4.2.18 Anthranilate phosphoribosyltransferase", + "2.4.2.19 Nicotinate-nucleotide diphosphorylase (carboxylating)", + "2.4.2.20 Dioxotetrahydropyrimidine phosphoribosyltransferase", + "2.4.2.21 Nicotinate-nucleotide--dimethylbenzimidazole phosphoribosyltransferase", + "2.4.2.22 Xanthine phosphoribosyltransferase", + "2.4.2.23 Deoxyuridine phosphorylase", + "2.4.2.24 1,4-beta-D-xylan synthase", + "2.4.2.25 Flavone apiosyltransferase", + "2.4.2.26 Protein xylosyltransferase", + "2.4.2.27 dTDP-dihydrostreptose--streptidine-6-phosphate dihydrostreptosyltransferase", + "2.4.2.28 S-methyl-5'-thioadenosine phosphorylase", + "2.4.2.29 tRNA-guanine transglycosylase", + "2.4.2.30 NAD(+) ADP-ribosyltransferase", + "2.4.2.31 NAD(+)--protein-arginine ADP-ribosyltransferase", + "2.4.2.32 Dolichyl-phosphate D-xylosyltransferase", + "2.4.2.33 Dolichyl-xylosyl-phosphate--protein xylosyltransferase", + "2.4.2.34 Indolylacetylinositol arabinosyltransferase", + "2.4.2.35 Flavonol-3-O-glycoside xylosyltransferase", + "2.4.2.36 NAD(+)--diphthamide ADP-ribosyltransferase", + "2.4.2.37 NAD(+)--dinitrogen-reductase ADP-D-ribosyltransferase", + "2.4.2.38 Glycoprotein 2-beta-D-xylosyltransferase", + "2.4.2.39 Xyloglucan 6-xylosyltransferase", + "2.4.2.40 Zeatin O-beta-D-xylosyltransferase", + "2.4.2.41 Xylogalacturonan beta-1,3-xylosyltransferase", + "2.4.2.42 UDP-D-xylose:beta-D-glucoside alpha-1,3-D-xylosyltransferase", + "2.4.2.43 Lipid IV(A) 4-amino-4-deoxy-L-arabinosyltransferase", + "2.4.99.1 Beta-galactoside alpha-2,6-sialyltransferase", + "2.4.99.2 Monosialoganglioside sialyltransferase", + "2.4.99.3 Alpha-N-acetylgalactosaminide alpha-2,6-sialyltransferase", + "2.4.99.4 Beta-galactoside alpha-2,3-sialyltransferase", + "2.4.99.5 Galactosyldiacylglycerol alpha-2,3-sialyltransferase", + "2.4.99.6 N-acetyllactosaminide alpha-2,3-sialyltransferase", + "2.4.99.7 Alpha-N-acetylneuraminyl-2,3-beta-galactosyl-1,3-N-acetylgalactosaminide 6-alpha-sialyltransferase", + "2.4.99.8 Alpha-N-acetylneuraminate alpha-2,8-sialyltransferase", + "2.4.99.9 Lactosylceramide alpha-2,3-sialyltransferase", + "2.4.99.10 Neolactotetraosylceramide alpha-2,3-sialyltransferase", + "2.4.99.11 Lactosylceramide alpha-2,6-N-sialyltransferase", + "2.4.99.12 Lipid IV(A) 3-deoxy-D-manno-octulosonic acid transferase", + "2.4.99.13 (KDO)-lipid IV(A) 3-deoxy-D-manno-octulosonic acid transferase", + "2.4.99.14 (KDO)(2)-lipid IV(A) (2-8) 3-deoxy-D-manno-octulosonic acid transferase", + "2.4.99.15 (KDO)(3)-lipid IV(A) (2-4) 3-deoxy-D-manno-octulosonic acid transferase", + "2.5.1.1 Dimethylallyltranstransferase", + "2.5.1.2 Thiamine pyridinylase", + "2.5.1.3 Thiamine-phosphate diphosphorylase", + "2.5.1.4 Adenosylmethionine cyclotransferase", + "2.5.1.5 Galactose-6-sulfurylase", + "2.5.1.6 Methionine adenosyltransferase", + "2.5.1.7 UDP-N-acetylglucosamine 1-carboxyvinyltransferase", + "2.5.1.9 Riboflavin synthase", + "2.5.1.10 (2E,6E)-farnesyl diphosphate synthase", + "2.5.1.15 Dihydropteroate synthase", + "2.5.1.16 Spermidine synthase", + "2.5.1.17 Cob(I)yrinic acid a,c-diamide adenosyltransferase", + "2.5.1.18 Glutathione transferase", + "2.5.1.19 3-phosphoshikimate 1-carboxyvinyltransferase", + "2.5.1.20 Rubber cis-polyprenylcistransferase", + "2.5.1.21 Squalene synthase", + "2.5.1.22 Spermine synthase", + "2.5.1.23 Sym-norspermidine synthase", + "2.5.1.24 Discadenine synthase", + "2.5.1.25 tRNA-uridine aminocarboxypropyltransferase", + "2.5.1.26 Alkylglycerone-phosphate synthase", + "2.5.1.27 Adenylate dimethylallyltransferase", + "2.5.1.28 Dimethylallylcistransferase", + "2.5.1.29 Farnesyltranstransferase", + "2.5.1.30 Heptaprenyl diphosphate synthase", + "2.5.1.31 Di-trans,poly-cis-undecaprenyl-diphosphate synthase ((2E,6E)-farnesyl-diphosphate specific)", + "2.5.1.32 Phytoene synthase", + "2.5.1.34 4-dimethylallyltryptophan synthase", + "2.5.1.35 Aspulvinone dimethylallyltransferase", + "2.5.1.36 Trihydroxypterocarpan dimethylallyltransferase", + "2.5.1.38 Isonocardicin synthase", + "2.5.1.39 4-hydroxybenzoate polyprenyltransferase", + "2.5.1.41 Phosphoglycerol geranylgeranyltransferase", + "2.5.1.42 Geranylgeranylglycerol-phosphate geranylgeranyltransferase", + "2.5.1.43 Nicotianamine synthase", + "2.5.1.44 Homospermidine synthase", + "2.5.1.45 Homospermidine synthase (spermidine-specific)", + "2.5.1.46 Deoxyhypusine synthase", + "2.5.1.47 Cysteine synthase", + "2.5.1.48 Cystathionine gamma-synthase", + "2.5.1.49 O-acetylhomoserine aminocarboxypropyltransferase", + "2.5.1.50 Zeatin 9-aminocarboxyethyltransferase", + "2.5.1.51 Beta-pyrazolylalanine synthase", + "2.5.1.52 L-mimosine synthase", + "2.5.1.53 Uracilylalanine synthase", + "2.5.1.54 3-deoxy-7-phosphoheptulonate synthase", + "2.5.1.55 3-deoxy-8-phosphooctulonate synthase", + "2.5.1.56 N-acetylneuraminate synthase", + "2.5.1.57 N-acylneuraminate-9-phosphate synthase", + "2.5.1.58 Protein farnesyltransferase", + "2.5.1.59 Protein geranylgeranyltransferase type I", + "2.5.1.60 Protein geranylgeranyltransferase type II", + "2.5.1.61 Hydroxymethylbilane synthase", + "2.5.1.62 Chlorophyll synthase", + "2.5.1.63 Adenosyl-fluoride synthase", + "2.5.1.65 O-phosphoserine sulfhydrylase", + "2.5.1.66 N(2)-(2-carboxyethyl)arginine synthase", + "2.5.1.67 Chrysanthemyl diphosphate synthase", + "2.5.1.68 (2Z,6E)-farnesyl diphosphate synthase", + "2.5.1.69 Lavandulyl diphosphate synthase", + "2.5.1.70 Naringenin 8-dimethylallyltransferase", + "2.5.1.71 Leachianone-G 2''-dimethylallyltransferase", + "2.5.1.72 Quinolinate synthase", + "2.5.1.73 O-phospho-L-seryl-tRNA:Cys-tRNA synthase", + "2.5.1.74 1,4-dihydroxy-2-naphthoate polyprenyltransferase", + "2.5.1.75 tRNA dimethylallyltransferase", + "2.5.1.76 Cysteate synthase", + "2.5.1.77 7,8-didemethyl-8-hydroxy-5-deazariboflavin synthase", + "2.5.1.78 6,7-dimethyl-8-ribityllumazine synthase", + "2.5.1.79 Thermospermine synthase", + "2.5.1.80 7-dimethylallyltryptophan synthase", + "2.5.1.81 Geranylfarnesyl diphosphate synthase", + "2.5.1.82 Hexaprenyl diphosphate synthase (geranylgeranyl-diphosphate specific)", + "2.5.1.83 Hexaprenyl-diphosphate synthase ((2E,6E)-farnesyl-diphosphate specific)", + "2.5.1.84 All-trans-nonaprenyl-diphosphate synthase (geranyl-diphosphate specific)", + "2.5.1.85 All-trans-nonaprenyl-diphosphate synthase (geranylgeranyl-diphosphate specific)", + "2.5.1.86 Trans,poly-cis-decaprenyl diphosphate synthase", + "2.5.1.87 Di-trans,poly-cis-polyprenyl diphosphate synthase ((2E,6E)-farnesyl diphosphate specific)", + "2.5.1.88 Trans,poly-cis-polyprenyl diphosphate synthase ((2Z,6E)-farnesyl diphosphate specific)", + "2.5.1.89 Tri-trans,poly-cis-undecaprenyl-diphosphate synthase (geranylgeranyl-diphosphate specific)", + "2.5.1.90 All-trans-octaprenyl-diphosphate synthase", + "2.5.1.91 All-trans-decaprenyl-diphosphate synthase", + "2.5.1.92 (2Z,6Z)-farnesyl diphosphate synthase", + "2.5.1.93 4-hydroxybenzoate geranyltransferase", + "2.5.1.94 Adenosyl-chloride synthase", + "2.6.1.1 Aspartate transaminase", + "2.6.1.2 Alanine transaminase", + "2.6.1.3 Cysteine transaminase", + "2.6.1.4 Glycine transaminase", + "2.6.1.5 Tyrosine transaminase", + "2.6.1.6 Leucine transaminase", + "2.6.1.7 Kynurenine--oxoglutarate transaminase", + "2.6.1.8 2,5-diaminovalerate transaminase", + "2.6.1.9 Histidinol-phosphate transaminase", + "2.6.1.11 Acetylornithine transaminase", + "2.6.1.12 Alanine--oxo-acid transaminase", + "2.6.1.13 Ornithine aminotransferase", + "2.6.1.14 Asparagine--oxo-acid transaminase", + "2.6.1.15 Glutamine--pyruvate transaminase", + "2.6.1.16 Glutamine--fructose-6-phosphate transaminase (isomerizing)", + "2.6.1.17 Succinyldiaminopimelate transaminase", + "2.6.1.18 Beta-alanine--pyruvate transaminase", + "2.6.1.19 4-aminobutyrate transaminase", + "2.6.1.21 D-amino-acid transaminase", + "2.6.1.22 (S)-3-amino-2-methylpropionate transaminase", + "2.6.1.23 4-hydroxyglutamate transaminase", + "2.6.1.24 Diiodotyrosine transaminase", + "2.6.1.26 Thyroid-hormone transaminase", + "2.6.1.27 Tryptophan transaminase", + "2.6.1.28 Tryptophan--phenylpyruvate transaminase", + "2.6.1.29 Diamine transaminase", + "2.6.1.30 Pyridoxamine--pyruvate transaminase", + "2.6.1.31 Pyridoxamine--oxaloacetate transaminase", + "2.6.1.32 Valine--3-methyl-2-oxovalerate transaminase", + "2.6.1.33 dTDP-4-amino-4,6-dideoxy-D-glucose transaminase", + "2.6.1.34 UDP-2-acetamido-4-amino-2,4,6-trideoxyglucose transaminase", + "2.6.1.35 Glycine--oxaloacetate transaminase", + "2.6.1.36 L-lysine 6-transaminase", + "2.6.1.37 2-aminoethylphosphonate--pyruvate transaminase", + "2.6.1.38 Histidine transaminase", + "2.6.1.39 2-aminoadipate transaminase", + "2.6.1.40 (R)-3-amino-2-methylpropionate--pyruvate transaminase", + "2.6.1.41 D-methionine--pyruvate transaminase", + "2.6.1.42 Branched-chain-amino-acid transaminase", + "2.6.1.43 Aminolevulinate transaminase", + "2.6.1.44 Alanine--glyoxylate transaminase", + "2.6.1.45 Serine--glyoxylate transaminase", + "2.6.1.46 Diaminobutyrate--pyruvate transaminase", + "2.6.1.47 Alanine--oxomalonate transaminase", + "2.6.1.48 5-aminovalerate transaminase", + "2.6.1.49 Dihydroxyphenylalanine transaminase", + "2.6.1.50 Glutamine--scyllo-inositol transaminase", + "2.6.1.51 Serine--pyruvate transaminase", + "2.6.1.52 Phosphoserine transaminase", + "2.6.1.54 Pyridoxamine-phosphate transaminase", + "2.6.1.55 Taurine--2-oxoglutarate transaminase", + "2.6.1.56 1D-1-guanidino-3-amino-1,3-dideoxy-scyllo-inositol transaminase", + "2.6.1.57 Aromatic-amino-acid transaminase", + "2.6.1.58 Phenylalanine(histidine) transaminase", + "2.6.1.59 dTDP-4-amino-4,6-dideoxygalactose transaminase", + "2.6.1.60 Aromatic-amino-acid--glyoxylate transaminase", + "2.6.1.62 Adenosylmethionine--8-amino-7-oxononanoate transaminase", + "2.6.1.63 Kynurenine--glyoxylate transaminase", + "2.6.1.64 Glutamine--phenylpyruvate transaminase", + "2.6.1.65 N(6)-acetyl-beta-lysine transaminase", + "2.6.1.66 Valine--pyruvate transaminase", + "2.6.1.67 2-aminohexanoate transaminase", + "2.6.1.68 Ornithine(lysine) transaminase", + "2.6.1.70 Aspartate--phenylpyruvate transaminase", + "2.6.1.71 Lysine--pyruvate 6-transaminase", + "2.6.1.72 D-4-hydroxyphenylglycine transaminase", + "2.6.1.73 Methionine--glyoxylate transaminase", + "2.6.1.74 Cephalosporin-C transaminase", + "2.6.1.75 Cysteine-conjugate transaminase", + "2.6.1.76 Diaminobutyrate--2-oxoglutarate transaminase", + "2.6.1.77 Taurine--pyruvate aminotransferase", + "2.6.1.78 Aspartate--prephenate aminotransferase", + "2.6.1.79 Glutamate--prephenate aminotransferase", + "2.6.1.80 Nicotianamine aminotransferase", + "2.6.1.81 Succinylornithine transaminase", + "2.6.1.82 Putrescine aminotransferase", + "2.6.1.83 LL-diaminopimelate aminotransferase", + "2.6.1.84 Arginine--pyruvate transaminase", + "2.6.1.85 Aminodeoxychorismate synthase", + "2.6.1.86 2-amino-4-deoxychorismate synthase", + "2.6.1.87 UDP-4-amino-4-deoxy-L-arabinose aminotransferase", + "2.6.3.1 Oximinotransferase", + "2.6.99.1 dATP(dGTP)--DNA purinetransferase", + "2.6.99.2 Pyridoxine 5'-phosphate synthase", + "2.7.1.1 Hexokinase", + "2.7.1.2 Glucokinase", + "2.7.1.3 Ketohexokinase", + "2.7.1.4 Fructokinase", + "2.7.1.5 Rhamnulokinase", + "2.7.1.6 Galactokinase", + "2.7.1.7 Mannokinase", + "2.7.1.8 Glucosamine kinase", + "2.7.1.10 Phosphoglucokinase", + "2.7.1.11 6-phosphofructokinase", + "2.7.1.12 Gluconokinase", + "2.7.1.13 Dehydrogluconokinase", + "2.7.1.14 Sedoheptulokinase", + "2.7.1.15 Ribokinase", + "2.7.1.16 Ribulokinase", + "2.7.1.17 Xylulokinase", + "2.7.1.18 Phosphoribokinase", + "2.7.1.19 Phosphoribulokinase", + "2.7.1.20 Adenosine kinase", + "2.7.1.21 Thymidine kinase", + "2.7.1.22 Ribosylnicotinamide kinase", + "2.7.1.23 NAD(+) kinase", + "2.7.1.24 Dephospho-CoA kinase", + "2.7.1.25 Adenylyl-sulfate kinase", + "2.7.1.26 Riboflavin kinase", + "2.7.1.27 Erythritol kinase", + "2.7.1.28 Triokinase", + "2.7.1.29 Glycerone kinase", + "2.7.1.30 Glycerol kinase", + "2.7.1.31 Glycerate kinase", + "2.7.1.32 Choline kinase", + "2.7.1.33 Pantothenate kinase", + "2.7.1.34 Pantetheine kinase", + "2.7.1.35 Pyridoxal kinase", + "2.7.1.36 Mevalonate kinase", + "2.7.1.39 Homoserine kinase", + "2.7.1.40 Pyruvate kinase", + "2.7.1.41 Glucose-1-phosphate phosphodismutase", + "2.7.1.42 Riboflavin phosphotransferase", + "2.7.1.43 Glucuronokinase", + "2.7.1.44 Galacturonokinase", + "2.7.1.45 2-dehydro-3-deoxygluconokinase", + "2.7.1.46 L-arabinokinase", + "2.7.1.47 D-ribulokinase", + "2.7.1.48 Uridine kinase", + "2.7.1.49 Hydroxymethylpyrimidine kinase", + "2.7.1.50 Hydroxyethylthiazole kinase", + "2.7.1.51 L-fuculokinase", + "2.7.1.52 Fucokinase", + "2.7.1.53 L-xylulokinase", + "2.7.1.54 D-arabinokinase", + "2.7.1.55 Allose kinase", + "2.7.1.56 1-phosphofructokinase", + "2.7.1.58 2-dehydro-3-deoxygalactonokinase", + "2.7.1.59 N-acetylglucosamine kinase", + "2.7.1.60 N-acylmannosamine kinase", + "2.7.1.61 Acyl-phosphate--hexose phosphotransferase", + "2.7.1.62 Phosphoramidate--hexose phosphotransferase", + "2.7.1.63 Polyphosphate--glucose phosphotransferase", + "2.7.1.64 Inositol 3-kinase", + "2.7.1.65 Scyllo-inosamine 4-kinase", + "2.7.1.66 Undecaprenol kinase", + "2.7.1.67 1-phosphatidylinositol 4-kinase", + "2.7.1.68 1-phosphatidylinositol-4-phosphate 5-kinase", + "2.7.1.69 Protein-N(pi)-phosphohistidine--sugar phosphotransferase", + "2.7.1.71 Shikimate kinase", + "2.7.1.72 Streptomycin 6-kinase", + "2.7.1.73 Inosine kinase", + "2.7.1.74 Deoxycytidine kinase", + "2.7.1.76 Deoxyadenosine kinase", + "2.7.1.77 Nucleoside phosphotransferase", + "2.7.1.78 Polynucleotide 5'-hydroxyl-kinase", + "2.7.1.79 Diphosphate--glycerol phosphotransferase", + "2.7.1.80 Diphosphate--serine phosphotransferase", + "2.7.1.81 Hydroxylysine kinase", + "2.7.1.82 Ethanolamine kinase", + "2.7.1.83 Pseudouridine kinase", + "2.7.1.84 Alkylglycerone kinase", + "2.7.1.85 Beta-glucoside kinase", + "2.7.1.86 NADH kinase", + "2.7.1.87 Streptomycin 3''-kinase", + "2.7.1.88 Dihydrostreptomycin-6-phosphate 3'-alpha-kinase", + "2.7.1.89 Thiamine kinase", + "2.7.1.90 Diphosphate--fructose-6-phosphate 1-phosphotransferase", + "2.7.1.91 Sphinganine kinase", + "2.7.1.92 5-dehydro-2-deoxygluconokinase", + "2.7.1.93 Alkylglycerol kinase", + "2.7.1.94 Acylglycerol kinase", + "2.7.1.95 Kanamycin kinase", + "2.7.1.100 S-methyl-5-thioribose kinase", + "2.7.1.101 Tagatose kinase", + "2.7.1.102 Hamamelose kinase", + "2.7.1.103 Viomycin kinase", + "2.7.1.105 6-phosphofructo-2-kinase", + "2.7.1.106 Glucose-1,6-bisphosphate synthase", + "2.7.1.107 Diacylglycerol kinase", + "2.7.1.108 Dolichol kinase", + "2.7.1.113 Deoxyguanosine kinase", + "2.7.1.114 AMP--thymidine kinase", + "2.7.1.118 ADP--thymidine kinase", + "2.7.1.119 Hygromycin-B 7''-O-kinase", + "2.7.1.121 Phosphoenolpyruvate--glycerone phosphotransferase", + "2.7.1.122 Xylitol kinase", + "2.7.1.127 Inositol-trisphosphate 3-kinase", + "2.7.1.130 Tetraacyldisaccharide 4'-kinase", + "2.7.1.134 Inositol-tetrakisphosphate 1-kinase", + "2.7.1.136 Macrolide 2'-kinase", + "2.7.1.137 Phosphatidylinositol 3-kinase", + "2.7.1.138 Ceramide kinase", + "2.7.1.140 Inositol-tetrakisphosphate 5-kinase", + "2.7.1.142 Glycerol-3-phosphate--glucose phosphotransferase", + "2.7.1.143 Diphosphate-purine nucleoside kinase", + "2.7.1.144 Tagatose-6-phosphate kinase", + "2.7.1.145 Deoxynucleoside kinase", + "2.7.1.146 ADP-specific phosphofructokinase", + "2.7.1.147 ADP-specific glucokinase", + "2.7.1.148 4-(cytidine 5'-diphospho)-2-C-methyl-D-erythritol kinase", + "2.7.1.149 1-phosphatidylinositol-5-phosphate 4-kinase", + "2.7.1.150 1-phosphatidylinositol-3-phosphate 5-kinase", + "2.7.1.151 Inositol-polyphosphate multikinase", + "2.7.1.153 Phosphatidylinositol-4,5-bisphosphate 3-kinase", + "2.7.1.154 Phosphatidylinositol-4-phosphate 3-kinase", + "2.7.1.156 Adenosylcobinamide kinase", + "2.7.1.157 N-acetylgalactosamine kinase", + "2.7.1.158 Inositol-pentakisphosphate 2-kinase", + "2.7.1.159 Inositol-1,3,4-trisphosphate 5/6-kinase", + "2.7.1.160 2'-phosphotransferase", + "2.7.1.161 CTP-dependent riboflavin kinase", + "2.7.1.162 N-acetylhexosamine 1-kinase", + "2.7.1.163 Hygromycin B 4-O-kinase", + "2.7.1.164 O-phosphoseryl-tRNA(Sec) kinase", + "2.7.1.165 Glycerate 2-kinase", + "2.7.1.166 3-deoxy-D-manno-octulosonic acid kinase", + "2.7.1.167 D-glycero-beta-D-manno-heptose-7-phosphate kinase", + "2.7.1.168 D-glycero-alpha-D-manno-heptose-7-phosphate kinase", + "2.7.1.169 Pantoate kinase", + "2.7.1.n1 Anhydro-N-acetylmuramic acid kinase", + "2.7.1.n4 Nicotinamide riboside kinase", + "2.7.1.n5 Diacylglycerol kinase (CTP dependent)", + "2.7.2.1 Acetate kinase", + "2.7.2.2 Carbamate kinase", + "2.7.2.3 Phosphoglycerate kinase", + "2.7.2.4 Aspartate kinase", + "2.7.2.6 Formate kinase", + "2.7.2.7 Butyrate kinase", + "2.7.2.8 Acetylglutamate kinase", + "2.7.2.10 Phosphoglycerate kinase (GTP)", + "2.7.2.11 Glutamate 5-kinase", + "2.7.2.12 Acetate kinase (diphosphate)", + "2.7.2.13 Glutamate 1-kinase", + "2.7.2.14 Branched-chain-fatty-acid kinase", + "2.7.2.15 Propionate kinase", + "2.7.3.1 Guanidinoacetate kinase", + "2.7.3.2 Creatine kinase", + "2.7.3.3 Arginine kinase", + "2.7.3.4 Taurocyamine kinase", + "2.7.3.5 Lombricine kinase", + "2.7.3.6 Hypotaurocyamine kinase", + "2.7.3.7 Opheline kinase", + "2.7.3.8 Ammonia kinase", + "2.7.3.9 Phosphoenolpyruvate--protein phosphotransferase", + "2.7.3.10 Agmatine kinase", + "2.7.4.1 Polyphosphate kinase", + "2.7.4.2 Phosphomevalonate kinase", + "2.7.4.3 Adenylate kinase", + "2.7.4.4 Nucleoside-phosphate kinase", + "2.7.4.6 Nucleoside-diphosphate kinase", + "2.7.4.7 Phosphomethylpyrimidine kinase", + "2.7.4.8 Guanylate kinase", + "2.7.4.9 dTMP kinase", + "2.7.4.10 Nucleoside-triphosphate--adenylate kinase", + "2.7.4.11 (Deoxy)adenylate kinase", + "2.7.4.12 T(2)-induced deoxynucleotide kinase", + "2.7.4.13 (Deoxy)nucleoside-phosphate kinase", + "2.7.4.14 UMP/CMP kinase", + "2.7.4.15 Thiamine-diphosphate kinase", + "2.7.4.16 Thiamine-phosphate kinase", + "2.7.4.17 3-phosphoglyceroyl-phosphate--polyphosphate phosphotransferase", + "2.7.4.18 Farnesyl-diphosphate kinase", + "2.7.4.19 5-methyldeoxycytidine-5'-phosphate kinase", + "2.7.4.20 Dolichyl-diphosphate--polyphosphate phosphotransferase", + "2.7.4.21 Inositol-hexakisphosphate kinase", + "2.7.4.22 UMP kinase", + "2.7.4.23 Ribose 1,5-bisphosphate phosphokinase", + "2.7.4.24 Diphosphoinositol-pentakisphosphate kinase", + "2.7.4.25 (d)CMP kinase", + "2.7.6.1 Ribose-phosphate diphosphokinase", + "2.7.6.2 Thiamine diphosphokinase", + "2.7.6.3 2-amino-4-hydroxy-6-hydroxymethyldihydropteridine diphosphokinase", + "2.7.6.4 Nucleotide diphosphokinase", + "2.7.6.5 GTP diphosphokinase", + "2.7.7.1 Nicotinamide-nucleotide adenylyltransferase", + "2.7.7.2 FAD synthetase", + "2.7.7.3 Pantetheine-phosphate adenylyltransferase", + "2.7.7.4 Sulfate adenylyltransferase", + "2.7.7.5 Sulfate adenylyltransferase (ADP)", + "2.7.7.6 DNA-directed RNA polymerase", + "2.7.7.7 DNA-directed DNA polymerase", + "2.7.7.8 Polyribonucleotide nucleotidyltransferase", + "2.7.7.9 UTP--glucose-1-phosphate uridylyltransferase", + "2.7.7.10 UTP--hexose-1-phosphate uridylyltransferase", + "2.7.7.11 UTP--xylose-1-phosphate uridylyltransferase", + "2.7.7.12 UDP-glucose--hexose-1-phosphate uridylyltransferase", + "2.7.7.13 Mannose-1-phosphate guanylyltransferase", + "2.7.7.14 Ethanolamine-phosphate cytidylyltransferase", + "2.7.7.15 Choline-phosphate cytidylyltransferase", + "2.7.7.18 Nicotinate-nucleotide adenylyltransferase", + "2.7.7.19 Polynucleotide adenylyltransferase", + "2.7.7.22 Mannose-1-phosphate guanylyltransferase (GDP)", + "2.7.7.23 UDP-N-acetylglucosamine diphosphorylase", + "2.7.7.24 Glucose-1-phosphate thymidylyltransferase", + "2.7.7.27 Glucose-1-phosphate adenylyltransferase", + "2.7.7.28 Nucleoside-triphosphate-aldose-1-phosphate nucleotidyltransferase", + "2.7.7.30 Fucose-1-phosphate guanylyltransferase", + "2.7.7.31 DNA nucleotidylexotransferase", + "2.7.7.32 Galactose-1-phosphate thymidylyltransferase", + "2.7.7.33 Glucose-1-phosphate cytidylyltransferase", + "2.7.7.34 Glucose-1-phosphate guanylyltransferase", + "2.7.7.35 Ribose-5-phosphate adenylyltransferase", + "2.7.7.36 Aldose-1-phosphate adenylyltransferase", + "2.7.7.37 Aldose-1-phosphate nucleotidyltransferase", + "2.7.7.38 3-deoxy-manno-octulosonate cytidylyltransferase", + "2.7.7.39 Glycerol-3-phosphate cytidylyltransferase", + "2.7.7.40 D-ribitol-5-phosphate cytidylyltransferase", + "2.7.7.41 Phosphatidate cytidylyltransferase", + "2.7.7.42 [Glutamate--ammonia-ligase] adenylyltransferase", + "2.7.7.43 N-acylneuraminate cytidylyltransferase", + "2.7.7.44 Glucuronate-1-phosphate uridylyltransferase", + "2.7.7.45 Guanosine-triphosphate guanylyltransferase", + "2.7.7.46 Gentamicin 2''-nucleotidyltransferase", + "2.7.7.47 Streptomycin 3''-adenylyltransferase", + "2.7.7.48 RNA-directed RNA polymerase", + "2.7.7.49 RNA-directed DNA polymerase", + "2.7.7.50 mRNA guanylyltransferase", + "2.7.7.51 Adenylylsulfate--ammonia adenylyltransferase", + "2.7.7.52 RNA uridylyltransferase", + "2.7.7.53 ATP adenylyltransferase", + "2.7.7.54 Phenylalanine adenylyltransferase", + "2.7.7.55 Anthranilate adenylyltransferase", + "2.7.7.56 tRNA nucleotidyltransferase", + "2.7.7.57 N-methylphosphoethanolamine cytidylyltransferase", + "2.7.7.58 (2,3-dihydroxybenzoyl)adenylate synthase", + "2.7.7.59 [Protein-PII] uridylyltransferase", + "2.7.7.60 2-C-methyl-D-erythritol 4-phosphate cytidylyltransferase", + "2.7.7.61 Citrate lyase holo-[acyl-carrier-protein] synthase", + "2.7.7.62 Adenosylcobinamide-phosphate guanylyltransferase", + "2.7.7.63 Lipoate--protein ligase", + "2.7.7.64 UTP-monosaccharide-1-phosphate uridylyltransferase", + "2.7.7.65 Diguanylate cyclase", + "2.7.7.66 Malonate decarboxylase holo-[acyl-carrier-protein] synthase", + "2.7.7.67 CDP-archaeol synthase", + "2.7.7.68 2-phospho-L-lactate guanylyltransferase", + "2.7.7.69 GDP-L-galactose phosphorylase", + "2.7.7.70 D-glycero-beta-D-manno-heptose 1-phosphate adenylyltransferase", + "2.7.7.71 D-glycero-alpha-D-manno-heptose 1-phosphate guanylyltransferase", + "2.7.7.72 CCA tRNA nucleotidyltransferase", + "2.7.7.73 Sulfur carrier protein ThiS adenylyltransferase", + "2.7.7.n1 Adenosine monophosphate-protein transferase", + "2.7.8.1 Ethanolaminephosphotransferase", + "2.7.8.2 Diacylglycerol cholinephosphotransferase", + "2.7.8.3 Ceramide cholinephosphotransferase", + "2.7.8.4 Serine-phosphoethanolamine synthase", + "2.7.8.5 CDP-diacylglycerol--glycerol-3-phosphate 3-phosphatidyltransferase", + "2.7.8.6 Undecaprenyl-phosphate galactose phosphotransferase", + "2.7.8.7 Holo-[acyl-carrier-protein] synthase", + "2.7.8.8 CDP-diacylglycerol--serine O-phosphatidyltransferase", + "2.7.8.9 Phosphomannan mannosephosphotransferase", + "2.7.8.10 Sphingosine cholinephosphotransferase", + "2.7.8.11 CDP-diacylglycerol--inositol 3-phosphatidyltransferase", + "2.7.8.12 CDP-glycerol glycerophosphotransferase", + "2.7.8.13 Phospho-N-acetylmuramoyl-pentapeptide-transferase", + "2.7.8.14 CDP-ribitol ribitolphosphotransferase", + "2.7.8.15 UDP-N-acetylglucosamine--dolichyl-phosphate N-acetylglucosaminephosphotransferase", + "2.7.8.17 UDP-N-acetylglucosamine--lysosomal-enzyme N-acetylglucosaminephosphotransferase", + "2.7.8.18 UDP-galactose--UDP-N-acetylglucosamine galactose phosphotransferase", + "2.7.8.19 UDP-glucose--glycoprotein glucose phosphotransferase", + "2.7.8.20 Phosphatidylglycerol--membrane-oligosaccharide glycerophosphotransferase", + "2.7.8.21 Membrane-oligosaccharide glycerophosphotransferase", + "2.7.8.22 1-alkenyl-2-acylglycerol choline phosphotransferase", + "2.7.8.23 Carboxyvinyl-carboxyphosphonate phosphorylmutase", + "2.7.8.24 Phosphatidylcholine synthase", + "2.7.8.25 Triphosphoribosyl-dephospho-CoA synthase", + "2.7.8.26 Adenosylcobinamide-GDP ribazoletransferase", + "2.7.8.27 Sphingomyelin synthase", + "2.7.8.28 2-phospho-L-lactate transferase", + "2.7.8.29 L-serine-phosphatidylethanolamine phosphatidyltransferase", + "2.7.8.30 Undecaprenyl-phosphate 4-deoxy-4-formamido-L-arabinose transferase", + "2.7.8.31 Undecaprenyl-phosphate glucose phosphotransferase", + "2.7.8.32 3-O-alpha-D-mannopyranosyl-alpha-D-mannopyranose xylosylphosphotransferase", + "2.7.8.n2 UDP-GlcNAc:undecaprenyl-phosphate GlcNAc-1-phosphate transferase", + "2.7.9.1 Pyruvate, phosphate dikinase", + "2.7.9.2 Pyruvate, water dikinase", + "2.7.9.3 Selenide, water dikinase", + "2.7.9.4 Alpha-glucan, water dikinase", + "2.7.9.5 Phosphoglucan, water dikinase", + "2.7.10.1 Receptor protein-tyrosine kinase", + "2.7.10.2 Non-specific protein-tyrosine kinase", + "2.7.11.1 Non-specific serine/threonine protein kinase", + "2.7.11.2 [Pyruvate dehydrogenase (acetyl-transferring)] kinase", + "2.7.11.3 Dephospho-[reductase kinase] kinase", + "2.7.11.4 [3-methyl-2-oxobutanoate dehydrogenase (acetyl-transferring)] kinase", + "2.7.11.5 [Isocitrate dehydrogenase (NADP(+))] kinase", + "2.7.11.6 [Tyrosine 3-monooxygenase] kinase", + "2.7.11.7 [Myosin heavy-chain] kinase", + "2.7.11.8 Fas-activated serine/threonine kinase", + "2.7.11.9 [Goodpasture-antigen-binding protein] kinase", + "2.7.11.10 I-kappa-B kinase", + "2.7.11.11 cAMP-dependent protein kinase", + "2.7.11.12 cGMP-dependent protein kinase", + "2.7.11.13 Protein kinase C", + "2.7.11.14 Rhodopsin kinase", + "2.7.11.15 [Beta-adrenergic-receptor] kinase", + "2.7.11.16 [G-protein-coupled receptor] kinase", + "2.7.11.17 Calcium/calmodulin-dependent protein kinase", + "2.7.11.18 [Myosin light-chain] kinase", + "2.7.11.19 Phosphorylase kinase", + "2.7.11.20 [Elongation factor 2] kinase", + "2.7.11.21 Polo kinase", + "2.7.11.22 Cyclin-dependent kinase", + "2.7.11.23 [RNA-polymerase]-subunit kinase", + "2.7.11.24 Mitogen-activated protein kinase", + "2.7.11.25 Mitogen-activated protein kinase kinase kinase", + "2.7.11.26 [Tau protein] kinase", + "2.7.11.27 [Acetyl-CoA carboxylase] kinase", + "2.7.11.28 Tropomyosin kinase", + "2.7.11.29 [Low-density-lipoprotein receptor] kinase", + "2.7.11.30 Receptor protein serine/threonine kinase", + "2.7.11.31 [Hydroxymethylglutaryl-CoA reductase (NADPH)] kinase", + "2.7.12.1 Dual-specificity kinase", + "2.7.12.2 Mitogen-activated protein kinase kinase", + "2.7.13.1 Protein-histidine pros-kinase", + "2.7.13.2 Protein-histidine tele-kinase", + "2.7.13.3 Histidine kinase", + "2.7.99.1 Triphosphate--protein phosphotransferase", + "2.8.1.1 Thiosulfate sulfurtransferase", + "2.8.1.2 3-mercaptopyruvate sulfurtransferase", + "2.8.1.3 Thiosulfate--thiol sulfurtransferase", + "2.8.1.4 tRNA sulfurtransferase", + "2.8.1.5 Thiosulfate--dithiol sulfurtransferase", + "2.8.1.6 Biotin synthase", + "2.8.1.7 Cysteine desulfurase", + "2.8.1.8 Lipoyl synthase", + "2.8.2.1 Aryl sulfotransferase", + "2.8.2.2 Alcohol sulfotransferase", + "2.8.2.3 Amine sulfotransferase", + "2.8.2.4 Estrone sulfotransferase", + "2.8.2.5 Chondroitin 4-sulfotransferase", + "2.8.2.6 Choline sulfotransferase", + "2.8.2.7 UDP-N-acetylgalactosamine-4-sulfate sulfotransferase", + "2.8.2.8 [Heparan sulfate]-glucosamine N-sulfotransferase", + "2.8.2.9 Tyrosine-ester sulfotransferase", + "2.8.2.10 Renilla-luciferin sulfotransferase", + "2.8.2.11 Galactosylceramide sulfotransferase", + "2.8.2.13 Psychosine sulfotransferase", + "2.8.2.14 Bile-salt sulfotransferase", + "2.8.2.15 Steroid sulfotransferase", + "2.8.2.16 Thiol sulfotransferase", + "2.8.2.17 Chondroitin 6-sulfotransferase", + "2.8.2.18 Cortisol sulfotransferase", + "2.8.2.19 Triglucosylalkylacylglycerol sulfotransferase", + "2.8.2.20 Protein-tyrosine sulfotransferase", + "2.8.2.21 Keratan sulfotransferase", + "2.8.2.22 Aryl-sulfate sulfotransferase", + "2.8.2.23 [Heparan sulfate]-glucosamine 3-sulfotransferase 1", + "2.8.2.24 Desulfoglucosinolate sulfotransferase", + "2.8.2.25 Flavonol 3-sulfotransferase", + "2.8.2.26 Quercetin-3-sulfate 3'-sulfotransferase", + "2.8.2.27 Quercetin-3-sulfate 4'-sulfotransferase", + "2.8.2.28 Quercetin-3,3'-bissulfate 7-sulfotransferase", + "2.8.2.29 [Heparan sulfate]-glucosamine 3-sulfotransferase 2", + "2.8.2.30 [Heparan sulfate]-glucosamine 3-sulfotransferase 3", + "2.8.2.31 Petromyzonol sulfotransferase", + "2.8.2.32 Scymnol sulfotransferase", + "2.8.2.33 N-acetylgalactosamine 4-sulfate 6-O-sulfotransferase", + "2.8.2.34 Glycochenodeoxycholate sulfotransferase", + "2.8.2.35 Dermatan 4-sulfotransferase", + "2.8.3.1 Propionate CoA-transferase", + "2.8.3.2 Oxalate CoA-transferase", + "2.8.3.3 Malonate CoA-transferase", + "2.8.3.5 3-oxoacid CoA-transferase", + "2.8.3.6 3-oxoadipate CoA-transferase", + "2.8.3.7 Succinate--citramalate CoA-transferase", + "2.8.3.8 Acetate CoA-transferase", + "2.8.3.9 Butyrate--acetoacetate CoA-transferase", + "2.8.3.10 Citrate CoA-transferase", + "2.8.3.11 Citramalate CoA-transferase", + "2.8.3.12 Glutaconate CoA-transferase", + "2.8.3.13 Succinate--hydroxymethylglutarate CoA-transferase", + "2.8.3.14 5-hydroxypentanoate CoA-transferase", + "2.8.3.15 Succinyl-CoA:(R)-benzylsuccinate CoA-transferase", + "2.8.3.16 Formyl-CoA transferase", + "2.8.3.17 Cinnamoyl-CoA:phenyllactate CoA-transferase", + "2.8.4.1 Coenzyme-B sulfoethylthiotransferase", + "2.8.4.2 Arsenate-mycothiol transferase", + "2.9.1.1 L-seryl-tRNA(Sec) selenium transferase", + "2.9.1.2 O-phospho-L-seryl-tRNA(Sec):L-selenocysteinyl-tRNA synthase", + "3.1.1.1 Carboxylesterase", + "3.1.1.2 Arylesterase", + "3.1.1.3 Triacylglycerol lipase", + "3.1.1.4 Phospholipase A(2)", + "3.1.1.5 Lysophospholipase", + "3.1.1.6 Acetylesterase", + "3.1.1.7 Acetylcholinesterase", + "3.1.1.8 Cholinesterase", + "3.1.1.10 Tropinesterase", + "3.1.1.11 Pectinesterase", + "3.1.1.13 Sterol esterase", + "3.1.1.14 Chlorophyllase", + "3.1.1.15 L-arabinonolactonase", + "3.1.1.17 Gluconolactonase", + "3.1.1.19 Uronolactonase", + "3.1.1.20 Tannase", + "3.1.1.21 Retinyl-palmitate esterase", + "3.1.1.22 Hydroxybutyrate-dimer hydrolase", + "3.1.1.23 Acylglycerol lipase", + "3.1.1.24 3-oxoadipate enol-lactonase", + "3.1.1.25 1,4-lactonase", + "3.1.1.26 Galactolipase", + "3.1.1.27 4-pyridoxolactonase", + "3.1.1.28 Acylcarnitine hydrolase", + "3.1.1.29 Aminoacyl-tRNA hydrolase", + "3.1.1.30 D-arabinonolactonase", + "3.1.1.31 6-phosphogluconolactonase", + "3.1.1.32 Phospholipase A(1)", + "3.1.1.33 6-acetylglucose deacetylase", + "3.1.1.34 Lipoprotein lipase", + "3.1.1.35 Dihydrocoumarin hydrolase", + "3.1.1.36 Limonin-D-ring-lactonase", + "3.1.1.37 Steroid-lactonase", + "3.1.1.38 Triacetate-lactonase", + "3.1.1.39 Actinomycin lactonase", + "3.1.1.40 Orsellinate-depside hydrolase", + "3.1.1.41 Cephalosporin-C deacetylase", + "3.1.1.42 Chlorogenate hydrolase", + "3.1.1.43 Alpha-amino-acid esterase", + "3.1.1.44 4-methyloxaloacetate esterase", + "3.1.1.45 Carboxymethylenebutenolidase", + "3.1.1.46 Deoxylimonate A-ring-lactonase", + "3.1.1.47 1-alkyl-2-acetylglycerophosphocholine esterase", + "3.1.1.48 Fusarinine-C ornithinesterase", + "3.1.1.49 Sinapine esterase", + "3.1.1.50 Wax-ester hydrolase", + "3.1.1.51 Phorbol-diester hydrolase", + "3.1.1.52 Phosphatidylinositol deacylase", + "3.1.1.53 Sialate O-acetylesterase", + "3.1.1.54 Acetoxybutynylbithiophene deacetylase", + "3.1.1.55 Acetylsalicylate deacetylase", + "3.1.1.56 Methylumbelliferyl-acetate deacetylase", + "3.1.1.57 2-pyrone-4,6-dicarboxylate lactonase", + "3.1.1.58 N-acetylgalactosaminoglycan deacetylase", + "3.1.1.59 Juvenile-hormone esterase", + "3.1.1.60 Bis(2-ethylhexyl)phthalate esterase", + "3.1.1.61 Protein-glutamate methylesterase", + "3.1.1.63 11-cis-retinyl-palmitate hydrolase", + "3.1.1.64 All-trans-retinyl-palmitate hydrolase", + "3.1.1.65 L-rhamnono-1,4-lactonase", + "3.1.1.66 5-(3,4-diacetoxybut-1-ynyl)-2,2'-bithiophene deacetylase", + "3.1.1.67 Fatty-acyl-ethyl-ester synthase", + "3.1.1.68 Xylono-1,4-lactonase", + "3.1.1.70 Cetraxate benzylesterase", + "3.1.1.71 Acetylalkylglycerol acetylhydrolase", + "3.1.1.72 Acetylxylan esterase", + "3.1.1.73 Feruloyl esterase", + "3.1.1.74 Cutinase", + "3.1.1.75 Poly(3-hydroxybutyrate) depolymerase", + "3.1.1.76 Poly(3-hydroxyoctanoate) depolymerase", + "3.1.1.77 Acyloxyacyl hydrolase", + "3.1.1.78 Polyneuridine-aldehyde esterase", + "3.1.1.79 Hormone-sensitive lipase", + "3.1.1.80 Acetylajmaline esterase", + "3.1.1.81 Quorum-quenching N-acyl-homoserine lactonase", + "3.1.1.82 Pheophorbidase", + "3.1.1.83 Monoterpene epsilon-lactone hydrolase", + "3.1.1.84 Cocaine esterase", + "3.1.2.1 Acetyl-CoA hydrolase", + "3.1.2.2 Palmitoyl-CoA hydrolase", + "3.1.2.3 Succinyl-CoA hydrolase", + "3.1.2.4 3-hydroxyisobutyryl-CoA hydrolase", + "3.1.2.5 Hydroxymethylglutaryl-CoA hydrolase", + "3.1.2.6 Hydroxyacylglutathione hydrolase", + "3.1.2.7 Glutathione thiolesterase", + "3.1.2.10 Formyl-CoA hydrolase", + "3.1.2.11 Acetoacetyl-CoA hydrolase", + "3.1.2.12 S-formylglutathione hydrolase", + "3.1.2.13 S-succinylglutathione hydrolase", + "3.1.2.14 Oleoyl-[acyl-carrier-protein] hydrolase", + "3.1.2.15 Ubiquitin thiolesterase", + "3.1.2.16 Citrate-lyase deacetylase", + "3.1.2.17 (S)-methylmalonyl-CoA hydrolase", + "3.1.2.18 ADP-dependent short-chain-acyl-CoA hydrolase", + "3.1.2.19 ADP-dependent medium-chain-acyl-CoA hydrolase", + "3.1.2.20 Acyl-CoA hydrolase", + "3.1.2.21 Dodecanoyl-[acyl-carrier-protein] hydrolase", + "3.1.2.22 Palmitoyl-protein hydrolase", + "3.1.2.23 4-hydroxybenzoyl-CoA thioesterase", + "3.1.2.25 Phenylacetyl-CoA hydrolase", + "3.1.2.26 Bile-acid-CoA hydrolase", + "3.1.2.27 Choloyl-CoA hydrolase", + "3.1.2.28 1,4-dihydroxy-2-naphthoyl-CoA hydrolase", + "3.1.3.1 Alkaline phosphatase", + "3.1.3.2 Acid phosphatase", + "3.1.3.3 Phosphoserine phosphatase", + "3.1.3.4 Phosphatidate phosphatase", + "3.1.3.5 5'-nucleotidase", + "3.1.3.6 3'-nucleotidase", + "3.1.3.7 3'(2'),5'-bisphosphate nucleotidase", + "3.1.3.8 3-phytase", + "3.1.3.9 Glucose-6-phosphatase", + "3.1.3.10 Glucose-1-phosphatase", + "3.1.3.11 Fructose-bisphosphatase", + "3.1.3.12 Trehalose-phosphatase", + "3.1.3.13 Bisphosphoglycerate phosphatase", + "3.1.3.14 Methylphosphothioglycerate phosphatase", + "3.1.3.15 Histidinol-phosphatase", + "3.1.3.16 Phosphoprotein phosphatase", + "3.1.3.17 [Phosphorylase] phosphatase", + "3.1.3.18 Phosphoglycolate phosphatase", + "3.1.3.19 Glycerol-2-phosphatase", + "3.1.3.20 Phosphoglycerate phosphatase", + "3.1.3.21 Glycerol-1-phosphatase", + "3.1.3.22 Mannitol-1-phosphatase", + "3.1.3.23 Sugar-phosphatase", + "3.1.3.24 Sucrose-phosphate phosphatase", + "3.1.3.25 Inositol-phosphate phosphatase", + "3.1.3.26 4-phytase", + "3.1.3.27 Phosphatidylglycerophosphatase", + "3.1.3.28 ADP-phosphoglycerate phosphatase", + "3.1.3.29 N-acylneuraminate-9-phosphatase", + "3.1.3.31 Nucleotidase", + "3.1.3.32 Polynucleotide 3'-phosphatase", + "3.1.3.33 Polynucleotide 5'-phosphatase", + "3.1.3.34 Deoxynucleotide 3'-phosphatase", + "3.1.3.35 Thymidylate 5'-phosphatase", + "3.1.3.36 Phosphoinositide 5-phosphatase", + "3.1.3.37 Sedoheptulose-bisphosphatase", + "3.1.3.38 3-phosphoglycerate phosphatase", + "3.1.3.39 Streptomycin-6-phosphatase", + "3.1.3.40 Guanidinodeoxy-scyllo-inositol-4-phosphatase", + "3.1.3.41 4-nitrophenylphosphatase", + "3.1.3.42 [Glycogen-synthase-D] phosphatase", + "3.1.3.43 [Pyruvate dehydrogenase (acetyl-transferring)]-phosphatase", + "3.1.3.44 [Acetyl-CoA carboxylase]-phosphatase", + "3.1.3.45 3-deoxy-manno-octulosonate-8-phosphatase", + "3.1.3.46 Fructose-2,6-bisphosphate 2-phosphatase", + "3.1.3.47 [Hydroxymethylglutaryl-CoA reductase (NADPH)]-phosphatase", + "3.1.3.48 Protein-tyrosine-phosphatase", + "3.1.3.49 [Pyruvate kinase]-phosphatase", + "3.1.3.50 Sorbitol-6-phosphatase", + "3.1.3.51 Dolichyl-phosphatase", + "3.1.3.52 [3-methyl-2-oxobutanoate dehydrogenase (2-methylpropanoyl-transferring)]-phosphatase", + "3.1.3.53 [Myosin-light-chain] phosphatase", + "3.1.3.54 Fructose-2,6-bisphosphate 6-phosphatase", + "3.1.3.55 Caldesmon-phosphatase", + "3.1.3.56 Inositol-polyphosphate 5-phosphatase", + "3.1.3.57 Inositol-1,4-bisphosphate 1-phosphatase", + "3.1.3.58 Sugar-terminal-phosphatase", + "3.1.3.59 Alkylacetylglycerophosphatase", + "3.1.3.60 Phosphoenolpyruvate phosphatase", + "3.1.3.62 Multiple inositol-polyphosphate phosphatase", + "3.1.3.63 2-carboxy-D-arabinitol-1-phosphatase", + "3.1.3.64 Phosphatidylinositol-3-phosphatase", + "3.1.3.66 Phosphatidylinositol-3,4-bisphosphate 4-phosphatase", + "3.1.3.67 Phosphatidylinositol-3,4,5-trisphosphate 3-phosphatase", + "3.1.3.68 2-deoxyglucose-6-phosphatase", + "3.1.3.69 Glucosylglycerol 3-phosphatase", + "3.1.3.70 Mannosyl-3-phosphoglycerate phosphatase", + "3.1.3.71 2-phosphosulfolactate phosphatase", + "3.1.3.72 5-phytase", + "3.1.3.73 Alpha-ribazole phosphatase", + "3.1.3.74 Pyridoxal phosphatase", + "3.1.3.75 Phosphoethanolamine/phosphocholine phosphatase", + "3.1.3.76 Lipid-phosphate phosphatase", + "3.1.3.77 Acireductone synthase", + "3.1.3.78 Phosphatidylinositol-4,5-bisphosphate 4-phosphatase", + "3.1.3.79 Mannosylfructose-phosphate phosphatase", + "3.1.3.80 2,3-bisphosphoglycerate 3-phosphatase", + "3.1.3.81 Diacylglycerol diphosphate phosphatase", + "3.1.3.82 D-glycero-beta-D-manno-heptose 1,7-bisphosphate 7-phosphatase", + "3.1.3.83 D-glycero-alpha-D-manno-heptose-1,7-bisphosphate 7-phosphatase", + "3.1.3.n1 Phosphatidylinositol-3,4,5-trisphosphate 5-phosphatase", + "3.1.3.n2 ADP-ribose 1''-phosphate phosphatase", + "3.1.3.n4 2-hydroxy-3-keto-5-methylthiopentenyl-1-phosphate phosphatase", + "3.1.4.1 Phosphodiesterase I", + "3.1.4.2 Glycerophosphocholine phosphodiesterase", + "3.1.4.3 Phospholipase C", + "3.1.4.4 Phospholipase D", + "3.1.4.11 Phosphoinositide phospholipase C", + "3.1.4.12 Sphingomyelin phosphodiesterase", + "3.1.4.13 Serine-ethanolaminephosphate phosphodiesterase", + "3.1.4.14 [Acyl-carrier-protein] phosphodiesterase", + "3.1.4.15 Adenylyl-[glutamate--ammonia ligase] hydrolase", + "3.1.4.16 2',3'-cyclic-nucleotide 2'-phosphodiesterase", + "3.1.4.17 3',5'-cyclic-nucleotide phosphodiesterase", + "3.1.4.35 3',5'-cyclic-GMP phosphodiesterase", + "3.1.4.37 2',3'-cyclic-nucleotide 3'-phosphodiesterase", + "3.1.4.38 Glycerophosphocholine cholinephosphodiesterase", + "3.1.4.39 Alkylglycerophosphoethanolamine phosphodiesterase", + "3.1.4.40 CMP-N-acylneuraminate phosphodiesterase", + "3.1.4.41 Sphingomyelin phosphodiesterase D", + "3.1.4.42 Glycerol-1,2-cyclic-phosphate 2-phosphodiesterase", + "3.1.4.43 Glycerophosphoinositol inositolphosphodiesterase", + "3.1.4.44 Glycerophosphoinositol glycerophosphodiesterase", + "3.1.4.45 N-acetylglucosamine-1-phosphodiester alpha-N-acetylglucosaminidase", + "3.1.4.46 Glycerophosphodiester phosphodiesterase", + "3.1.4.48 Dolichylphosphate-glucose phosphodiesterase", + "3.1.4.49 Dolichylphosphate-mannose phosphodiesterase", + "3.1.4.50 Glycosylphosphatidylinositol phospholipase D", + "3.1.4.51 Glucose-1-phospho-D-mannosylglycoprotein phosphodiesterase", + "3.1.4.52 Cyclic-guanylate-specific phosphodiesterase", + "3.1.4.53 3',5'-cyclic-AMP phosphodiesterase", + "3.1.5.1 dGTPase", + "3.1.6.1 Arylsulfatase", + "3.1.6.2 Steryl-sulfatase", + "3.1.6.3 Glycosulfatase", + "3.1.6.4 N-acetylgalactosamine-6-sulfatase", + "3.1.6.6 Choline-sulfatase", + "3.1.6.7 Cellulose-polysulfatase", + "3.1.6.8 Cerebroside-sulfatase", + "3.1.6.9 Chondro-4-sulfatase", + "3.1.6.10 Chondro-6-sulfatase", + "3.1.6.11 Disulfoglucosamine-6-sulfatase", + "3.1.6.12 N-acetylgalactosamine-4-sulfatase", + "3.1.6.13 Iduronate-2-sulfatase", + "3.1.6.14 N-acetylglucosamine-6-sulfatase", + "3.1.6.15 N-sulfoglucosamine-3-sulfatase", + "3.1.6.16 Monomethyl-sulfatase", + "3.1.6.17 D-lactate-2-sulfatase", + "3.1.6.18 Glucuronate-2-sulfatase", + "3.1.7.1 Prenyl-diphosphatase", + "3.1.7.2 Guanosine-3',5'-bis(diphosphate) 3'-diphosphatase", + "3.1.7.3 Monoterpenyl-diphosphatase", + "3.1.7.4 Sclareol cyclase", + "3.1.7.5 Geranylgeranyl diphosphate diphosphatase", + "3.1.7.6 Farnesyl diphosphatase", + "3.1.7.7 Drimenol cyclase", + "3.1.8.1 Aryldialkylphosphatase", + "3.1.8.2 Diisopropyl-fluorophosphatase", + "3.1.11.1 Exodeoxyribonuclease I", + "3.1.11.2 Exodeoxyribonuclease III", + "3.1.11.3 Exodeoxyribonuclease (lambda-induced)", + "3.1.11.4 Exodeoxyribonuclease (phage SP3-induced)", + "3.1.11.5 Exodeoxyribonuclease V", + "3.1.11.6 Exodeoxyribonuclease VII", + "3.1.13.1 Exoribonuclease II", + "3.1.13.2 Exoribonuclease H", + "3.1.13.3 Oligonucleotidase", + "3.1.13.4 Poly(A)-specific ribonuclease", + "3.1.13.5 Ribonuclease D", + "3.1.14.1 Yeast ribonuclease", + "3.1.15.1 Venom exonuclease", + "3.1.16.1 Spleen exonuclease", + "3.1.21.1 Deoxyribonuclease I", + "3.1.21.2 Deoxyribonuclease IV (phage-T(4)-induced)", + "3.1.21.3 Type I site-specific deoxyribonuclease", + "3.1.21.4 Type II site-specific deoxyribonuclease", + "3.1.21.5 Type III site-specific deoxyribonuclease", + "3.1.21.6 CC-preferring endodeoxyribonuclease", + "3.1.21.7 Deoxyribonuclease V", + "3.1.22.1 Deoxyribonuclease II", + "3.1.22.2 Aspergillus deoxyribonuclease K(1)", + "3.1.22.4 Crossover junction endodeoxyribonuclease", + "3.1.22.5 Deoxyribonuclease X", + "3.1.25.1 Deoxyribonuclease (pyrimidine dimer)", + "3.1.26.1 Physarum polycephalum ribonuclease", + "3.1.26.2 Ribonuclease alpha", + "3.1.26.3 Ribonuclease III", + "3.1.26.4 Ribonuclease H", + "3.1.26.5 Ribonuclease P", + "3.1.26.6 Ribonuclease IV", + "3.1.26.7 Ribonuclease P4", + "3.1.26.8 Ribonuclease M5", + "3.1.26.9 Ribonuclease (poly-(U)-specific)", + "3.1.26.10 Ribonuclease IX", + "3.1.26.11 Ribonuclease Z", + "3.1.26.12 Ribonuclease E", + "3.1.26.13 Retroviral ribonuclease H", + "3.1.27.1 Ribonuclease T(2)", + "3.1.27.2 Bacillus subtilis ribonuclease", + "3.1.27.3 Ribonuclease T(1)", + "3.1.27.4 Ribonuclease U(2)", + "3.1.27.5 Pancreatic ribonuclease", + "3.1.27.6 Enterobacter ribonuclease", + "3.1.27.7 Ribonuclease F", + "3.1.27.8 Ribonuclease V", + "3.1.27.9 tRNA-intron endonuclease", + "3.1.27.10 rRNA endonuclease", + "3.1.30.1 Aspergillus nuclease S(1)", + "3.1.30.2 Serratia marcescens nuclease", + "3.1.31.1 Micrococcal nuclease", + "3.2.1.1 Alpha-amylase", + "3.2.1.2 Beta-amylase", + "3.2.1.3 Glucan 1,4-alpha-glucosidase", + "3.2.1.4 Cellulase", + "3.2.1.6 Endo-1,3(4)-beta-glucanase", + "3.2.1.7 Inulinase", + "3.2.1.8 Endo-1,4-beta-xylanase", + "3.2.1.10 Oligo-1,6-glucosidase", + "3.2.1.11 Dextranase", + "3.2.1.14 Chitinase", + "3.2.1.15 Polygalacturonase", + "3.2.1.17 Lysozyme", + "3.2.1.18 Exo-alpha-sialidase", + "3.2.1.20 Alpha-glucosidase", + "3.2.1.21 Beta-glucosidase", + "3.2.1.22 Alpha-galactosidase", + "3.2.1.23 Beta-galactosidase", + "3.2.1.24 Alpha-mannosidase", + "3.2.1.25 Beta-mannosidase", + "3.2.1.26 Beta-fructofuranosidase", + "3.2.1.28 Alpha,alpha-trehalase", + "3.2.1.31 Beta-glucuronidase", + "3.2.1.32 Xylan endo-1,3-beta-xylosidase", + "3.2.1.33 Amylo-alpha-1,6-glucosidase", + "3.2.1.35 Hyaluronoglucosaminidase", + "3.2.1.36 Hyaluronoglucuronidase", + "3.2.1.37 Xylan 1,4-beta-xylosidase", + "3.2.1.38 Beta-D-fucosidase", + "3.2.1.39 Glucan endo-1,3-beta-D-glucosidase", + "3.2.1.40 Alpha-L-rhamnosidase", + "3.2.1.41 Pullulanase", + "3.2.1.42 GDP-glucosidase", + "3.2.1.43 Beta-L-rhamnosidase", + "3.2.1.44 Fucoidanase", + "3.2.1.45 Glucosylceramidase", + "3.2.1.46 Galactosylceramidase", + "3.2.1.47 Galactosylgalactosylglucosylceramidase", + "3.2.1.48 Sucrose alpha-glucosidase", + "3.2.1.49 Alpha-N-acetylgalactosaminidase", + "3.2.1.50 Alpha-N-acetylglucosaminidase", + "3.2.1.51 Alpha-L-fucosidase", + "3.2.1.52 Beta-N-acetylhexosaminidase", + "3.2.1.53 Beta-N-acetylgalactosaminidase", + "3.2.1.54 Cyclomaltodextrinase", + "3.2.1.55 Alpha-N-arabinofuranosidase", + "3.2.1.56 Glucuronosyl-disulfoglucosamine glucuronidase", + "3.2.1.57 Isopullulanase", + "3.2.1.58 Glucan 1,3-beta-glucosidase", + "3.2.1.59 Glucan endo-1,3-alpha-glucosidase", + "3.2.1.60 Glucan 1,4-alpha-maltotetraohydrolase", + "3.2.1.61 Mycodextranase", + "3.2.1.62 Glycosylceramidase", + "3.2.1.63 1,2-alpha-L-fucosidase", + "3.2.1.64 2,6-beta-fructan 6-levanbiohydrolase", + "3.2.1.65 Levanase", + "3.2.1.66 Quercitrinase", + "3.2.1.67 Galacturan 1,4-alpha-galacturonidase", + "3.2.1.68 Isoamylase", + "3.2.1.70 Glucan 1,6-alpha-glucosidase", + "3.2.1.71 Glucan endo-1,2-beta-glucosidase", + "3.2.1.72 Xylan 1,3-beta-xylosidase", + "3.2.1.73 Licheninase", + "3.2.1.74 Glucan 1,4-beta-glucosidase", + "3.2.1.75 Glucan endo-1,6-beta-glucosidase", + "3.2.1.76 L-iduronidase", + "3.2.1.77 Mannan 1,2-(1,3)-alpha-mannosidase", + "3.2.1.78 Mannan endo-1,4-beta-mannosidase", + "3.2.1.80 Fructan beta-fructosidase", + "3.2.1.81 Beta-agarase", + "3.2.1.82 Exo-poly-alpha-galacturonosidase", + "3.2.1.83 Kappa-carrageenase", + "3.2.1.84 Glucan 1,3-alpha-glucosidase", + "3.2.1.85 6-phospho-beta-galactosidase", + "3.2.1.86 6-phospho-beta-glucosidase", + "3.2.1.87 Capsular-polysaccharide endo-1,3-alpha-galactosidase", + "3.2.1.88 Beta-L-arabinosidase", + "3.2.1.89 Arabinogalactan endo-1,4-beta-galactosidase", + "3.2.1.91 Cellulose 1,4-beta-cellobiosidase", + "3.2.1.92 Peptidoglycan beta-N-acetylmuramidase", + "3.2.1.93 Alpha,alpha-phosphotrehalase", + "3.2.1.94 Glucan 1,6-alpha-isomaltosidase", + "3.2.1.95 Dextran 1,6-alpha-isomaltotriosidase", + "3.2.1.96 Mannosyl-glycoprotein endo-beta-N-acetylglucosaminidase", + "3.2.1.97 Glycopeptide alpha-N-acetylgalactosaminidase", + "3.2.1.98 Glucan 1,4-alpha-maltohexaosidase", + "3.2.1.99 Arabinan endo-1,5-alpha-L-arabinosidase", + "3.2.1.100 Mannan 1,4-mannobiosidase", + "3.2.1.101 Mannan endo-1,6-alpha-mannosidase", + "3.2.1.102 Blood-group-substance endo-1,4-beta-galactosidase", + "3.2.1.103 Keratan-sulfate endo-1,4-beta-galactosidase", + "3.2.1.104 Steryl-beta-glucosidase", + "3.2.1.105 3-alpha-(S)-strictosidine beta-glucosidase", + "3.2.1.106 Mannosyl-oligosaccharide glucosidase", + "3.2.1.107 Protein-glucosylgalactosylhydroxylysine glucosidase", + "3.2.1.108 Lactase", + "3.2.1.109 Endogalactosaminidase", + "3.2.1.111 1,3-alpha-L-fucosidase", + "3.2.1.112 2-deoxyglucosidase", + "3.2.1.113 Mannosyl-oligosaccharide 1,2-alpha-mannosidase", + "3.2.1.114 Mannosyl-oligosaccharide 1,3-1,6-alpha-mannosidase", + "3.2.1.115 Branched-dextran exo-1,2-alpha-glucosidase", + "3.2.1.116 Glucan 1,4-alpha-maltotriohydrolase", + "3.2.1.117 Amygdalin beta-glucosidase", + "3.2.1.118 Prunasin beta-glucosidase", + "3.2.1.119 Vicianin beta-glucosidase", + "3.2.1.120 Oligoxyloglucan beta-glycosidase", + "3.2.1.121 Polymannuronate hydrolase", + "3.2.1.122 Maltose-6'-phosphate glucosidase", + "3.2.1.123 Endoglycosylceramidase", + "3.2.1.124 3-deoxy-2-octulosonidase", + "3.2.1.125 Raucaffricine beta-glucosidase", + "3.2.1.126 Coniferin beta-glucosidase", + "3.2.1.127 1,6-alpha-L-fucosidase", + "3.2.1.128 Glycyrrhizinate beta-glucuronidase", + "3.2.1.129 Endo-alpha-sialidase", + "3.2.1.130 Glycoprotein endo-alpha-1,2-mannosidase", + "3.2.1.131 Xylan alpha-1,2-glucuronosidase", + "3.2.1.132 Chitosanase", + "3.2.1.133 Glucan 1,4-alpha-maltohydrolase", + "3.2.1.134 Difructose-anhydride synthase", + "3.2.1.135 Neopullulanase", + "3.2.1.136 Glucuronoarabinoxylan endo-1,4-beta-xylanase", + "3.2.1.137 Mannan exo-1,2-1,6-alpha-mannosidase", + "3.2.1.139 Alpha-glucuronidase", + "3.2.1.140 Lacto-N-biosidase", + "3.2.1.141 4-alpha-D-((1->4)-alpha-D-glucano)trehalose trehalohydrolase", + "3.2.1.142 Limit dextrinase", + "3.2.1.143 Poly(ADP-ribose) glycohydrolase", + "3.2.1.144 3-deoxyoctulosonase", + "3.2.1.145 Galactan 1,3-beta-galactosidase", + "3.2.1.146 Beta-galactofuranosidase", + "3.2.1.147 Thioglucosidase", + "3.2.1.149 Beta-primeverosidase", + "3.2.1.150 Oligoxyloglucan reducing-end-specific cellobiohydrolase", + "3.2.1.151 Xyloglucan-specific endo-beta-1,4-glucanase", + "3.2.1.152 Mannosylglycoprotein endo-beta-mannosidase", + "3.2.1.153 Fructan beta-(2,1)-fructosidase", + "3.2.1.154 Fructan beta-(2,6)-fructosidase", + "3.2.1.155 Xyloglucan-specific exo-beta-1,4-glucanase", + "3.2.1.156 Oligosaccharide reducing-end xylanase", + "3.2.1.157 Iota-carrageenase", + "3.2.1.158 Alpha-agarase", + "3.2.1.159 Alpha-neoagaro-oligosaccharide hydrolase", + "3.2.1.161 Beta-apiosyl-beta-glucosidase", + "3.2.1.162 Lambda-carrageenase", + "3.2.1.163 1,6-alpha-D-mannosidase", + "3.2.1.164 Galactan endo-1,6-beta-galactosidase", + "3.2.1.165 Exo-1,4-beta-D-glucosaminidase", + "3.2.1.166 Heparanase", + "3.2.1.167 Baicalin-beta-D-glucuronidase", + "3.2.1.168 Hesperidin 6-O-alpha-L-rhamnosyl-beta-D-glucosidase", + "3.2.1.169 Protein O-GlcNAcase", + "3.2.1.n1 Blood group B branched chain alpha-1,3-galactosidase", + "3.2.1.n2 Blood group B linear chain alpha-1,3-galactosidase", + "3.2.1.n3 Dictyostelium lysozyme A", + "3.2.2.1 Purine nucleosidase", + "3.2.2.2 Inosine nucleosidase", + "3.2.2.3 Uridine nucleosidase", + "3.2.2.4 AMP nucleosidase", + "3.2.2.5 NAD(+) nucleosidase", + "3.2.2.6 NAD(P)(+) nucleosidase", + "3.2.2.7 Adenosine nucleosidase", + "3.2.2.8 Ribosylpyrimidine nucleosidase", + "3.2.2.9 Adenosylhomocysteine nucleosidase", + "3.2.2.10 Pyrimidine-5'-nucleotide nucleosidase", + "3.2.2.11 Beta-aspartyl-N-acetylglucosaminidase", + "3.2.2.12 Inosinate nucleosidase", + "3.2.2.13 1-methyladenosine nucleosidase", + "3.2.2.14 NMN nucleosidase", + "3.2.2.15 DNA-deoxyinosine glycosylase", + "3.2.2.16 Methylthioadenosine nucleosidase", + "3.2.2.17 Deoxyribodipyrimidine endonucleosidase", + "3.2.2.19 [Protein ADP-ribosylarginine] hydrolase", + "3.2.2.20 DNA-3-methyladenine glycosylase I", + "3.2.2.21 DNA-3-methyladenine glycosylase II", + "3.2.2.22 rRNA N-glycosylase", + "3.2.2.23 DNA-formamidopyrimidine glycosylase", + "3.2.2.24 ADP-ribosyl-[dinitrogen reductase] hydrolase", + "3.2.2.25 N-methyl nucleosidase", + "3.2.2.26 Futalosine hydrolase", + "3.2.2.27 Uracil-DNA glycosylase", + "3.2.2.28 Double-stranded uracil-DNA glycosylase", + "3.2.2.29 Thymine-DNA glycosylase", + "3.3.1.1 Adenosylhomocysteinase", + "3.3.1.2 Adenosylmethionine hydrolase", + "3.3.2.1 Isochorismatase", + "3.3.2.2 Alkenylglycerophosphocholine hydrolase", + "3.3.2.4 Trans-epoxysuccinate hydrolase", + "3.3.2.5 Alkenylglycerophosphoethanolamine hydrolase", + "3.3.2.6 Leukotriene-A(4) hydrolase", + "3.3.2.7 Hepoxilin-epoxide hydrolase", + "3.3.2.8 Limonene-1,2-epoxide hydrolase", + "3.3.2.9 Microsomal epoxide hydrolase", + "3.3.2.10 Soluble epoxide hydrolase", + "3.3.2.11 Cholesterol-5,6-oxide hydrolase", + "3.4.11.1 Leucyl aminopeptidase", + "3.4.11.2 Membrane alanyl aminopeptidase", + "3.4.11.3 Cystinyl aminopeptidase", + "3.4.11.4 Tripeptide aminopeptidase", + "3.4.11.5 Prolyl aminopeptidase", + "3.4.11.6 Aminopeptidase B", + "3.4.11.7 Glutamyl aminopeptidase", + "3.4.11.9 Xaa-Pro aminopeptidase", + "3.4.11.10 Bacterial leucyl aminopeptidase", + "3.4.11.13 Clostridial aminopeptidase", + "3.4.11.14 Cytosol alanyl aminopeptidase", + "3.4.11.15 Aminopeptidase Y", + "3.4.11.16 Xaa-Trp aminopeptidase", + "3.4.11.17 Tryptophanyl aminopeptidase", + "3.4.11.18 Methionyl aminopeptidase", + "3.4.11.19 D-stereospecific aminopeptidase", + "3.4.11.20 Aminopeptidase Ey", + "3.4.11.21 Aspartyl aminopeptidase", + "3.4.11.22 Aminopeptidase I", + "3.4.11.23 PepB aminopeptidase", + "3.4.11.24 Aminopeptidase S", + "3.4.13.3 Xaa-His dipeptidase", + "3.4.13.4 Xaa-Arg dipeptidase", + "3.4.13.5 Xaa-methyl-His dipeptidase", + "3.4.13.7 Glu-Glu dipeptidase", + "3.4.13.9 Xaa-Pro dipeptidase", + "3.4.13.12 Met-Xaa dipeptidase", + "3.4.13.17 Non-stereospecific dipeptidase", + "3.4.13.18 Cytosol nonspecific dipeptidase", + "3.4.13.19 Membrane dipeptidase", + "3.4.13.20 Beta-Ala-His dipeptidase", + "3.4.13.21 Dipeptidase E", + "3.4.13.22 D-Ala-D-Ala dipeptidase", + "3.4.14.1 Dipeptidyl-peptidase I", + "3.4.14.2 Dipeptidyl-peptidase II", + "3.4.14.4 Dipeptidyl-peptidase III", + "3.4.14.5 Dipeptidyl-peptidase IV", + "3.4.14.6 Dipeptidyl-dipeptidase", + "3.4.14.9 Tripeptidyl-peptidase I", + "3.4.14.10 Tripeptidyl-peptidase II", + "3.4.14.11 Xaa-Pro dipeptidyl-peptidase", + "3.4.14.12 Xaa-Xaa-Pro tripeptidyl-peptidase", + "3.4.15.1 Peptidyl-dipeptidase A", + "3.4.15.4 Peptidyl-dipeptidase B", + "3.4.15.5 Peptidyl-dipeptidase Dcp", + "3.4.15.6 Cyanophycinase", + "3.4.16.2 Lysosomal Pro-Xaa carboxypeptidase", + "3.4.16.4 Serine-type D-Ala-D-Ala carboxypeptidase", + "3.4.16.5 Carboxypeptidase C", + "3.4.16.6 Carboxypeptidase D", + "3.4.17.1 Carboxypeptidase A", + "3.4.17.2 Carboxypeptidase B", + "3.4.17.3 Lysine carboxypeptidase", + "3.4.17.4 Gly-Xaa carboxypeptidase", + "3.4.17.6 Alanine carboxypeptidase", + "3.4.17.8 Muramoylpentapeptide carboxypeptidase", + "3.4.17.10 Carboxypeptidase E", + "3.4.17.11 Glutamate carboxypeptidase", + "3.4.17.12 Carboxypeptidase M", + "3.4.17.13 Muramoyltetrapeptide carboxypeptidase", + "3.4.17.14 Zinc D-Ala-D-Ala carboxypeptidase", + "3.4.17.15 Carboxypeptidase A2", + "3.4.17.16 Membrane Pro-Xaa carboxypeptidase", + "3.4.17.17 Tubulinyl-Tyr carboxypeptidase", + "3.4.17.18 Carboxypeptidase T", + "3.4.17.19 Carboxypeptidase Taq", + "3.4.17.20 Carboxypeptidase U", + "3.4.17.21 Glutamate carboxypeptidase II", + "3.4.17.22 Metallocarboxypeptidase D", + "3.4.17.23 Angiotensin-converting enzyme 2", + "3.4.18.1 Cathepsin X", + "3.4.19.1 Acylaminoacyl-peptidase", + "3.4.19.2 Peptidyl-glycinamidase", + "3.4.19.3 Pyroglutamyl-peptidase I", + "3.4.19.5 Beta-aspartyl-peptidase", + "3.4.19.6 Pyroglutamyl-peptidase II", + "3.4.19.7 N-formylmethionyl-peptidase", + "3.4.19.9 Gamma-glutamyl hydrolase", + "3.4.19.11 Gamma-D-glutamyl-meso-diaminopimelate peptidase", + "3.4.19.12 Ubiquitinyl hydrolase 1", + "3.4.21.1 Chymotrypsin", + "3.4.21.2 Chymotrypsin C", + "3.4.21.3 Metridin", + "3.4.21.4 Trypsin", + "3.4.21.5 Thrombin", + "3.4.21.6 Coagulation factor Xa", + "3.4.21.7 Plasmin", + "3.4.21.9 Enteropeptidase", + "3.4.21.10 Acrosin", + "3.4.21.12 Alpha-lytic endopeptidase", + "3.4.21.19 Glutamyl endopeptidase", + "3.4.21.20 Cathepsin G", + "3.4.21.21 Coagulation factor VIIa", + "3.4.21.22 Coagulation factor IXa", + "3.4.21.25 Cucumisin", + "3.4.21.26 Prolyl oligopeptidase", + "3.4.21.27 Coagulation factor XIa", + "3.4.21.32 Brachyurin", + "3.4.21.34 Plasma kallikrein", + "3.4.21.35 Tissue kallikrein", + "3.4.21.36 Pancreatic elastase", + "3.4.21.37 Leukocyte elastase", + "3.4.21.38 Coagulation factor XIIa", + "3.4.21.39 Chymase", + "3.4.21.41 Complement subcomponent C1r", + "3.4.21.42 Complement subcomponent C1s", + "3.4.21.43 Classical-complement-pathway C3/C5 convertase", + "3.4.21.45 Complement factor I", + "3.4.21.46 Complement factor D", + "3.4.21.47 Alternative-complement-pathway C3/C5 convertase", + "3.4.21.48 Cerevisin", + "3.4.21.49 Hypodermin C", + "3.4.21.50 Lysyl endopeptidase", + "3.4.21.53 Endopeptidase La", + "3.4.21.54 Gamma-renin", + "3.4.21.55 Venombin AB", + "3.4.21.57 Leucyl endopeptidase", + "3.4.21.59 Tryptase", + "3.4.21.60 Scutelarin", + "3.4.21.61 Kexin", + "3.4.21.62 Subtilisin", + "3.4.21.63 Oryzin", + "3.4.21.64 Peptidase K", + "3.4.21.65 Thermomycolin", + "3.4.21.66 Thermitase", + "3.4.21.67 Endopeptidase So", + "3.4.21.68 T-plasminogen activator", + "3.4.21.69 Protein C (activated)", + "3.4.21.70 Pancreatic endopeptidase E", + "3.4.21.71 Pancreatic elastase II", + "3.4.21.72 IgA-specific serine endopeptidase", + "3.4.21.73 U-plasminogen activator", + "3.4.21.74 Venombin A", + "3.4.21.75 Furin", + "3.4.21.76 Myeloblastin", + "3.4.21.77 Semenogelase", + "3.4.21.78 Granzyme A", + "3.4.21.79 Granzyme B", + "3.4.21.80 Streptogrisin A", + "3.4.21.81 Streptogrisin B", + "3.4.21.82 Glutamyl endopeptidase II", + "3.4.21.83 Oligopeptidase B", + "3.4.21.84 Limulus clotting factor C", + "3.4.21.85 Limulus clotting factor B", + "3.4.21.86 Limulus clotting enzyme", + "3.4.21.88 Repressor lexA", + "3.4.21.89 Signal peptidase I", + "3.4.21.90 Togavirin", + "3.4.21.91 Flavivirin", + "3.4.21.92 Endopeptidase Clp", + "3.4.21.93 Proprotein convertase 1", + "3.4.21.94 Proprotein convertase 2", + "3.4.21.95 Snake venom factor V activator", + "3.4.21.96 Lactocepin", + "3.4.21.97 Assemblin", + "3.4.21.98 Hepacivirin", + "3.4.21.99 Spermosin", + "3.4.21.100 Sedolisin", + "3.4.21.101 Xanthomonalisin", + "3.4.21.102 C-terminal processing peptidase", + "3.4.21.103 Physarolisin", + "3.4.21.104 Mannan-binding lectin-associated serine protease-2", + "3.4.21.105 Rhomboid protease", + "3.4.21.106 Hepsin", + "3.4.21.107 Peptidase Do", + "3.4.21.108 HtrA2 peptidase", + "3.4.21.109 Matriptase", + "3.4.21.110 C5a peptidase", + "3.4.21.111 Aqualysin 1", + "3.4.21.112 Site-1 protease", + "3.4.21.113 Pestivirus NS3 polyprotein peptidase", + "3.4.21.114 Equine arterivirus serine peptidase", + "3.4.21.115 Infectious pancreatic necrosis birnavirus Vp4 peptidase", + "3.4.21.116 SpoIVB peptidase", + "3.4.21.117 Stratum corneum chymotryptic enzyme", + "3.4.21.118 Kallikrein 8", + "3.4.21.119 Kallikrein 13", + "3.4.21.120 Oviductin", + "3.4.22.1 Cathepsin B", + "3.4.22.2 Papain", + "3.4.22.3 Ficain", + "3.4.22.6 Chymopapain", + "3.4.22.7 Asclepain", + "3.4.22.8 Clostripain", + "3.4.22.10 Streptopain", + "3.4.22.14 Actinidain", + "3.4.22.15 Cathepsin L", + "3.4.22.16 Cathepsin H", + "3.4.22.24 Cathepsin T", + "3.4.22.25 Glycyl endopeptidase", + "3.4.22.26 Cancer procoagulant", + "3.4.22.27 Cathepsin S", + "3.4.22.28 Picornain 3C", + "3.4.22.29 Picornain 2A", + "3.4.22.30 Caricain", + "3.4.22.31 Ananain", + "3.4.22.32 Stem bromelain", + "3.4.22.33 Fruit bromelain", + "3.4.22.34 Legumain", + "3.4.22.35 Histolysain", + "3.4.22.36 Caspase-1", + "3.4.22.37 Gingipain R", + "3.4.22.38 Cathepsin K", + "3.4.22.39 Adenain", + "3.4.22.40 Bleomycin hydrolase", + "3.4.22.41 Cathepsin F", + "3.4.22.42 Cathepsin O", + "3.4.22.43 Cathepsin V", + "3.4.22.44 Nuclear-inclusion-a endopeptidase", + "3.4.22.45 Helper-component proteinase", + "3.4.22.46 L-peptidase", + "3.4.22.47 Gingipain K", + "3.4.22.48 Staphopain", + "3.4.22.49 Separase", + "3.4.22.50 V-cath endopeptidase", + "3.4.22.51 Cruzipain", + "3.4.22.52 Calpain-1", + "3.4.22.53 Calpain-2", + "3.4.22.54 Calpain-3", + "3.4.22.55 Caspase-2", + "3.4.22.56 Caspase-3", + "3.4.22.57 Caspase-4", + "3.4.22.58 Caspase-5", + "3.4.22.59 Caspase-6", + "3.4.22.60 Caspase-7", + "3.4.22.61 Caspase-8", + "3.4.22.62 Caspase-9", + "3.4.22.63 Caspase-10", + "3.4.22.64 Caspase-11", + "3.4.22.65 Peptidase 1 (mite)", + "3.4.22.66 Calicivirin", + "3.4.22.67 Zingipain", + "3.4.22.68 Ulp1 peptidase", + "3.4.22.69 SARS coronavirus main proteinase", + "3.4.22.70 Sortase A", + "3.4.22.71 Sortase B", + "3.4.23.1 Pepsin A", + "3.4.23.2 Pepsin B", + "3.4.23.3 Gastricsin", + "3.4.23.4 Chymosin", + "3.4.23.5 Cathepsin D", + "3.4.23.12 Nepenthesin", + "3.4.23.15 Renin", + "3.4.23.16 HIV-1 retropepsin", + "3.4.23.17 Pro-opiomelanocortin converting enzyme", + "3.4.23.18 Aspergillopepsin I", + "3.4.23.19 Aspergillopepsin II", + "3.4.23.20 Penicillopepsin", + "3.4.23.21 Rhizopuspepsin", + "3.4.23.22 Endothiapepsin", + "3.4.23.23 Mucorpepsin", + "3.4.23.24 Candidapepsin", + "3.4.23.25 Saccharopepsin", + "3.4.23.26 Rhodotorulapepsin", + "3.4.23.28 Acrocylindropepsin", + "3.4.23.29 Polyporopepsin", + "3.4.23.30 Pycnoporopepsin", + "3.4.23.31 Scytalidopepsin A", + "3.4.23.32 Scytalidopepsin B", + "3.4.23.34 Cathepsin E", + "3.4.23.35 Barrierpepsin", + "3.4.23.36 Signal peptidase II", + "3.4.23.38 Plasmepsin I", + "3.4.23.39 Plasmepsin II", + "3.4.23.40 Phytepsin", + "3.4.23.41 Yapsin 1", + "3.4.23.42 Thermopsin", + "3.4.23.43 Prepilin peptidase", + "3.4.23.44 Nodavirus endopeptidase", + "3.4.23.45 Memapsin 1", + "3.4.23.46 Memapsin 2", + "3.4.23.47 HIV-2 retropepsin", + "3.4.23.48 Plasminogen activator Pla", + "3.4.23.49 Omptin", + "3.4.23.50 Human endogenous retrovirus K endopeptidase", + "3.4.23.51 HycI peptidase", + "3.4.24.1 Atrolysin A", + "3.4.24.3 Microbial collagenase", + "3.4.24.6 Leucolysin", + "3.4.24.7 Interstitial collagenase", + "3.4.24.11 Neprilysin", + "3.4.24.12 Envelysin", + "3.4.24.13 IgA-specific metalloendopeptidase", + "3.4.24.14 Procollagen N-endopeptidase", + "3.4.24.15 Thimet oligopeptidase", + "3.4.24.16 Neurolysin", + "3.4.24.17 Stromelysin 1", + "3.4.24.18 Meprin A", + "3.4.24.19 Procollagen C-endopeptidase", + "3.4.24.20 Peptidyl-Lys metalloendopeptidase", + "3.4.24.21 Astacin", + "3.4.24.22 Stromelysin 2", + "3.4.24.23 Matrilysin", + "3.4.24.24 Gelatinase A", + "3.4.24.25 Vibriolysin", + "3.4.24.26 Pseudolysin", + "3.4.24.27 Thermolysin", + "3.4.24.28 Bacillolysin", + "3.4.24.29 Aureolysin", + "3.4.24.30 Coccolysin", + "3.4.24.31 Mycolysin", + "3.4.24.32 Beta-lytic metalloendopeptidase", + "3.4.24.33 Peptidyl-Asp metalloendopeptidase", + "3.4.24.34 Neutrophil collagenase", + "3.4.24.35 Gelatinase B", + "3.4.24.36 Leishmanolysin", + "3.4.24.37 Saccharolysin", + "3.4.24.38 Gametolysin", + "3.4.24.39 Deuterolysin", + "3.4.24.40 Serralysin", + "3.4.24.41 Atrolysin B", + "3.4.24.42 Atrolysin C", + "3.4.24.43 Atroxase", + "3.4.24.44 Atrolysin E", + "3.4.24.45 Atrolysin F", + "3.4.24.46 Adamalysin", + "3.4.24.47 Horrilysin", + "3.4.24.48 Ruberlysin", + "3.4.24.49 Bothropasin", + "3.4.24.50 Bothrolysin", + "3.4.24.51 Ophiolysin", + "3.4.24.52 Trimerelysin I", + "3.4.24.53 Trimerelysin II", + "3.4.24.54 Mucrolysin", + "3.4.24.55 Pitrilysin", + "3.4.24.56 Insulysin", + "3.4.24.57 O-sialoglycoprotein endopeptidase", + "3.4.24.58 Russellysin", + "3.4.24.59 Mitochondrial intermediate peptidase", + "3.4.24.60 Dactylysin", + "3.4.24.61 Nardilysin", + "3.4.24.62 Magnolysin", + "3.4.24.63 Meprin B", + "3.4.24.64 Mitochondrial processing peptidase", + "3.4.24.65 Macrophage elastase", + "3.4.24.66 Choriolysin L", + "3.4.24.67 Choriolysin H", + "3.4.24.68 Tentoxilysin", + "3.4.24.69 Bontoxilysin", + "3.4.24.70 Oligopeptidase A", + "3.4.24.71 Endothelin-converting enzyme 1", + "3.4.24.72 Fibrolase", + "3.4.24.73 Jararhagin", + "3.4.24.74 Fragilysin", + "3.4.24.75 Lysostaphin", + "3.4.24.76 Flavastacin", + "3.4.24.77 Snapalysin", + "3.4.24.78 GPR endopeptidase", + "3.4.24.79 Pappalysin-1", + "3.4.24.80 Membrane-type matrix metalloproteinase-1", + "3.4.24.81 ADAM10 endopeptidase", + "3.4.24.82 ADAMTS-4 endopeptidase", + "3.4.24.83 Anthrax lethal factor endopeptidase", + "3.4.24.84 Ste24 endopeptidase", + "3.4.24.85 S2P endopeptidase", + "3.4.24.86 ADAM 17 endopeptidase", + "3.4.24.87 ADAMTS13 endopeptidase", + "3.4.25.1 Proteasome endopeptidase complex", + "3.4.25.2 HslU--HslV peptidase", + "3.5.1.1 Asparaginase", + "3.5.1.2 Glutaminase", + "3.5.1.3 Omega-amidase", + "3.5.1.4 Amidase", + "3.5.1.5 Urease", + "3.5.1.6 Beta-ureidopropionase", + "3.5.1.7 Ureidosuccinase", + "3.5.1.8 Formylaspartate deformylase", + "3.5.1.9 Arylformamidase", + "3.5.1.10 Formyltetrahydrofolate deformylase", + "3.5.1.11 Penicillin amidase", + "3.5.1.12 Biotinidase", + "3.5.1.13 Aryl-acylamidase", + "3.5.1.14 Aminoacylase", + "3.5.1.15 Aspartoacylase", + "3.5.1.16 Acetylornithine deacetylase", + "3.5.1.17 Acyl-lysine deacylase", + "3.5.1.18 Succinyl-diaminopimelate desuccinylase", + "3.5.1.19 Nicotinamidase", + "3.5.1.20 Citrullinase", + "3.5.1.21 N-acetyl-beta-alanine deacetylase", + "3.5.1.22 Pantothenase", + "3.5.1.23 Ceramidase", + "3.5.1.24 Choloylglycine hydrolase", + "3.5.1.25 N-acetylglucosamine-6-phosphate deacetylase", + "3.5.1.26 N(4)-(beta-N-acetylglucosaminyl)-L-asparaginase", + "3.5.1.27 N-formylmethionylaminoacyl-tRNA deformylase", + "3.5.1.28 N-acetylmuramoyl-L-alanine amidase", + "3.5.1.29 2-(acetamidomethylene)succinate hydrolase", + "3.5.1.30 5-aminopentanamidase", + "3.5.1.31 Formylmethionine deformylase", + "3.5.1.32 Hippurate hydrolase", + "3.5.1.33 N-acetylglucosamine deacetylase", + "3.5.1.35 D-glutaminase", + "3.5.1.36 N-methyl-2-oxoglutaramate hydrolase", + "3.5.1.38 Glutamin-(asparagin-)ase", + "3.5.1.39 Alkylamidase", + "3.5.1.40 Acylagmatine amidase", + "3.5.1.41 Chitin deacetylase", + "3.5.1.42 Nicotinamide-nucleotide amidase", + "3.5.1.43 Peptidyl-glutaminase", + "3.5.1.44 Protein-glutamine glutaminase", + "3.5.1.46 6-aminohexanoate-dimer hydrolase", + "3.5.1.47 N-acetyldiaminopimelate deacetylase", + "3.5.1.48 Acetylspermidine deacetylase", + "3.5.1.49 Formamidase", + "3.5.1.50 Pentanamidase", + "3.5.1.51 4-acetamidobutyryl-CoA deacetylase", + "3.5.1.52 Peptide-N(4)-(N-acetyl-beta-glucosaminyl)asparagine amidase", + "3.5.1.53 N-carbamoylputrescine amidase", + "3.5.1.54 Allophanate hydrolase", + "3.5.1.55 Long-chain-fatty-acyl-glutamate deacylase", + "3.5.1.56 N,N-dimethylformamidase", + "3.5.1.57 Tryptophanamidase", + "3.5.1.58 N-benzyloxycarbonylglycine hydrolase", + "3.5.1.59 N-carbamoylsarcosine amidase", + "3.5.1.60 N-(long-chain-acyl)ethanolamine deacylase", + "3.5.1.61 Mimosinase", + "3.5.1.62 Acetylputrescine deacetylase", + "3.5.1.63 4-acetamidobutyrate deacetylase", + "3.5.1.64 N(alpha)-benzyloxycarbonylleucine hydrolase", + "3.5.1.65 Theanine hydrolase", + "3.5.1.66 2-(hydroxymethyl)-3-(acetamidomethylene)succinate hydrolase", + "3.5.1.67 4-methyleneglutaminase", + "3.5.1.68 N-formylglutamate deformylase", + "3.5.1.69 Glycosphingolipid deacylase", + "3.5.1.70 Aculeacin-A deacylase", + "3.5.1.71 N-feruloylglycine deacylase", + "3.5.1.72 D-benzoylarginine-4-nitroanilide amidase", + "3.5.1.73 Carnitinamidase", + "3.5.1.74 Chenodeoxycholoyltaurine hydrolase", + "3.5.1.75 Urethanase", + "3.5.1.76 Arylalkyl acylamidase", + "3.5.1.77 N-carbamoyl-D-amino-acid hydrolase", + "3.5.1.78 Glutathionylspermidine amidase", + "3.5.1.79 Phthalyl amidase", + "3.5.1.81 N-acyl-D-amino-acid deacylase", + "3.5.1.82 N-acyl-D-glutamate deacylase", + "3.5.1.83 N-acyl-D-aspartate deacylase", + "3.5.1.84 Biuret amidohydrolase", + "3.5.1.85 (S)-N-acetyl-1-phenylethylamine hydrolase", + "3.5.1.86 Mandelamide amidase", + "3.5.1.87 N-carbamoyl-L-amino-acid hydrolase", + "3.5.1.88 Peptide deformylase", + "3.5.1.89 N-acetylglucosaminylphosphatidylinositol deacetylase", + "3.5.1.90 Adenosylcobinamide hydrolase", + "3.5.1.91 N-substituted formamide deformylase", + "3.5.1.92 Pantetheine hydrolase", + "3.5.1.93 Glutaryl-7-aminocephalosporanic-acid acylase", + "3.5.1.94 Gamma-glutamyl-gamma-aminobutyrate hydrolase", + "3.5.1.95 N-malonylurea hydrolase", + "3.5.1.96 Succinylglutamate desuccinylase", + "3.5.1.97 Acyl-homoserine-lactone acylase", + "3.5.1.98 Histone deacetylase", + "3.5.1.99 Fatty acid amide hydrolase", + "3.5.1.100 (R)-amidase", + "3.5.1.101 L-proline amide hydrolase", + "3.5.1.102 2-amino-5-formylamino-6-ribosylaminopyrimidin-4(3H)-one 5'-monophosphate deformylase", + "3.5.1.103 N-acetyl-1-D-myo-inositol-2-amino-2-deoxy-alpha-D-glucopyranoside deacetylase", + "3.5.1.104 Peptidoglycan-N-acetylglucosamine deacetylase", + "3.5.1.105 Chitin disaccharide deacetylase", + "3.5.1.106 N-formylmaleamate deformylase", + "3.5.1.107 Maleamate amidohydrolase", + "3.5.1.108 UDP-3-O-acyl-N-acetylglucosamine deacetylase", + "3.5.1.n3 4-deoxy-4-formamido-L-arabinose-phosphoundecaprenol deformylase", + "3.5.2.1 Barbiturase", + "3.5.2.2 Dihydropyrimidinase", + "3.5.2.3 Dihydroorotase", + "3.5.2.4 Carboxymethylhydantoinase", + "3.5.2.5 Allantoinase", + "3.5.2.6 Beta-lactamase", + "3.5.2.7 Imidazolonepropionase", + "3.5.2.9 5-oxoprolinase (ATP-hydrolyzing)", + "3.5.2.10 Creatininase", + "3.5.2.11 L-lysine-lactamase", + "3.5.2.12 6-aminohexanoate-cyclic-dimer hydrolase", + "3.5.2.13 2,5-dioxopiperazine hydrolase", + "3.5.2.14 N-methylhydantoinase (ATP-hydrolyzing)", + "3.5.2.15 Cyanuric acid amidohydrolase", + "3.5.2.16 Maleimide hydrolase", + "3.5.2.17 Hydroxyisourate hydrolase", + "3.5.2.18 Enamidase", + "3.5.2.19 Streptothricin hydrolase", + "3.5.3.1 Arginase", + "3.5.3.2 Guanidinoacetase", + "3.5.3.3 Creatinase", + "3.5.3.4 Allantoicase", + "3.5.3.5 Formimidoylaspartate deiminase", + "3.5.3.6 Arginine deiminase", + "3.5.3.7 Guanidinobutyrase", + "3.5.3.8 Formimidoylglutamase", + "3.5.3.9 Allantoate deiminase", + "3.5.3.10 D-arginase", + "3.5.3.11 Agmatinase", + "3.5.3.12 Agmatine deiminase", + "3.5.3.13 Formimidoylglutamate deiminase", + "3.5.3.14 Amidinoaspartase", + "3.5.3.15 Protein-arginine deiminase", + "3.5.3.16 Methylguanidinase", + "3.5.3.17 Guanidinopropionase", + "3.5.3.18 Dimethylargininase", + "3.5.3.19 Ureidoglycolate hydrolase", + "3.5.3.20 Diguanidinobutanase", + "3.5.3.21 Methylenediurea deaminase", + "3.5.3.22 Proclavaminate amidinohydrolase", + "3.5.3.23 N-succinylarginine dihydrolase", + "3.5.4.1 Cytosine deaminase", + "3.5.4.2 Adenine deaminase", + "3.5.4.3 Guanine deaminase", + "3.5.4.4 Adenosine deaminase", + "3.5.4.5 Cytidine deaminase", + "3.5.4.6 AMP deaminase", + "3.5.4.7 ADP deaminase", + "3.5.4.8 Aminoimidazolase", + "3.5.4.9 Methenyltetrahydrofolate cyclohydrolase", + "3.5.4.10 IMP cyclohydrolase", + "3.5.4.11 Pterin deaminase", + "3.5.4.12 dCMP deaminase", + "3.5.4.13 dCTP deaminase", + "3.5.4.14 Deoxycytidine deaminase", + "3.5.4.15 Guanosine deaminase", + "3.5.4.16 GTP cyclohydrolase I", + "3.5.4.17 Adenosine-phosphate deaminase", + "3.5.4.18 ATP deaminase", + "3.5.4.19 Phosphoribosyl-AMP cyclohydrolase", + "3.5.4.20 Pyrithiamine deaminase", + "3.5.4.21 Creatinine deaminase", + "3.5.4.22 1-pyrroline-4-hydroxy-2-carboxylate deaminase", + "3.5.4.23 Blasticidin-S deaminase", + "3.5.4.24 Sepiapterin deaminase", + "3.5.4.25 GTP cyclohydrolase II", + "3.5.4.26 Diaminohydroxyphosphoribosylaminopyrimidine deaminase", + "3.5.4.27 Methenyltetrahydromethanopterin cyclohydrolase", + "3.5.4.28 S-adenosylhomocysteine deaminase", + "3.5.4.29 GTP cyclohydrolase IIa", + "3.5.4.30 dCTP deaminase (dUMP-forming)", + "3.5.4.n1 5-methylthioadenosine deaminase", + "3.5.4.n2 GTP cyclohydrolase (cyclic phosphate forming)", + "3.5.5.1 Nitrilase", + "3.5.5.2 Ricinine nitrilase", + "3.5.5.4 Cyanoalanine nitrilase", + "3.5.5.5 Arylacetonitrilase", + "3.5.5.6 Bromoxynil nitrilase", + "3.5.5.7 Aliphatic nitrilase", + "3.5.5.8 Thiocyanate hydrolase", + "3.5.99.1 Riboflavinase", + "3.5.99.2 Thiaminase", + "3.5.99.3 Hydroxydechloroatrazine ethylaminohydrolase", + "3.5.99.4 N-isopropylammelide isopropylaminohydrolase", + "3.5.99.5 2-aminomuconate deaminase", + "3.5.99.6 Glucosamine-6-phosphate deaminase", + "3.5.99.7 1-aminocyclopropane-1-carboxylate deaminase", + "3.5.99.8 5-nitroanthranilic acid aminohydrolase", + "3.6.1.1 Inorganic diphosphatase", + "3.6.1.2 Trimetaphosphatase", + "3.6.1.3 Adenosinetriphosphatase", + "3.6.1.5 Apyrase", + "3.6.1.6 Nucleoside-diphosphatase", + "3.6.1.7 Acylphosphatase", + "3.6.1.8 ATP diphosphatase", + "3.6.1.9 Nucleotide diphosphatase", + "3.6.1.10 Endopolyphosphatase", + "3.6.1.11 Exopolyphosphatase", + "3.6.1.12 dCTP diphosphatase", + "3.6.1.13 ADP-ribose diphosphatase", + "3.6.1.14 Adenosine-tetraphosphatase", + "3.6.1.15 Nucleoside-triphosphatase", + "3.6.1.16 CDP-glycerol diphosphatase", + "3.6.1.17 Bis(5'-nucleosyl)-tetraphosphatase (asymmetrical)", + "3.6.1.18 FAD diphosphatase", + "3.6.1.19 Nucleoside-triphosphate diphosphatase", + "3.6.1.20 5'-acylphosphoadenosine hydrolase", + "3.6.1.21 ADP-sugar diphosphatase", + "3.6.1.22 NAD(+) diphosphatase", + "3.6.1.23 dUTP diphosphatase", + "3.6.1.24 Nucleoside phosphoacylhydrolase", + "3.6.1.25 Triphosphatase", + "3.6.1.26 CDP-diacylglycerol diphosphatase", + "3.6.1.27 Undecaprenyl-diphosphatase", + "3.6.1.28 Thiamine-triphosphatase", + "3.6.1.29 Bis(5'-adenosyl)-triphosphatase", + "3.6.1.30 M(7)G(5')pppN diphosphatase", + "3.6.1.31 Phosphoribosyl-ATP diphosphatase", + "3.6.1.39 Thymidine-triphosphatase", + "3.6.1.40 Guanosine-5'-triphosphate,3'-diphosphate diphosphatase", + "3.6.1.41 Bis(5'-nucleosyl)-tetraphosphatase (symmetrical)", + "3.6.1.42 Guanosine-diphosphatase", + "3.6.1.43 Dolichyldiphosphatase", + "3.6.1.44 Oligosaccharide-diphosphodolichol diphosphatase", + "3.6.1.45 UDP-sugar diphosphatase", + "3.6.1.52 Diphosphoinositol-polyphosphate diphosphatase", + "3.6.1.53 Mn(2+)-dependent ADP-ribose/CDP-alcohol diphosphatase", + "3.6.1.54 UDP-2,3-diacylglucosamine diphosphatase", + "3.6.1.n1 D-tyrosyl-tRNA(Tyr) hydrolase", + "3.6.1.n2 L-cysteinyl-tRNA(Pro)", + "3.6.1.n3 L-cysteinyl-tRNA(Cys) hydrolase", + "3.6.1.n4 Dihydroneopterin triphosphate diphosphatase", + "3.6.2.1 Adenylylsulfatase", + "3.6.2.2 Phosphoadenylylsulfatase", + "3.6.3.1 Phospholipid-translocating ATPase", + "3.6.3.2 Magnesium-importing ATPase", + "3.6.3.3 Cadmium-exporting ATPase", + "3.6.3.4 Copper-exporting ATPase", + "3.6.3.5 Zinc-exporting ATPase", + "3.6.3.6 Proton-exporting ATPase", + "3.6.3.7 Sodium-exporting ATPase", + "3.6.3.8 Calcium-transporting ATPase", + "3.6.3.9 Sodium/potassium-exchanging ATPase", + "3.6.3.10 Hydrogen/potassium-exchanging ATPase", + "3.6.3.11 Chloride-transporting ATPase", + "3.6.3.12 Potassium-transporting ATPase", + "3.6.3.14 H(+)-transporting two-sector ATPase", + "3.6.3.15 Sodium-transporting two-sector ATPase", + "3.6.3.16 Arsenite-transporting ATPase", + "3.6.3.17 Monosaccharide-transporting ATPase", + "3.6.3.18 Oligosaccharide-transporting ATPase", + "3.6.3.19 Maltose-transporting ATPase", + "3.6.3.20 Glycerol-3-phosphate-transporting ATPase", + "3.6.3.21 Polar-amino-acid-transporting ATPase", + "3.6.3.22 Nonpolar-amino-acid-transporting ATPase", + "3.6.3.23 Oligopeptide-transporting ATPase", + "3.6.3.24 Nickel-transporting ATPase", + "3.6.3.25 Sulfate-transporting ATPase", + "3.6.3.26 Nitrate-transporting ATPase", + "3.6.3.27 Phosphate-transporting ATPase", + "3.6.3.28 Phosphonate-transporting ATPase", + "3.6.3.29 Molybdate-transporting ATPase", + "3.6.3.30 Fe(3+)-transporting ATPase", + "3.6.3.31 Polyamine-transporting ATPase", + "3.6.3.32 Quaternary-amine-transporting ATPase", + "3.6.3.33 Vitamin B12-transporting ATPase", + "3.6.3.34 Iron-chelate-transporting ATPase", + "3.6.3.35 Manganese-transporting ATPase", + "3.6.3.36 Taurine-transporting ATPase", + "3.6.3.37 Guanine-transporting ATPase", + "3.6.3.38 Capsular-polysaccharide-transporting ATPase", + "3.6.3.39 Lipopolysaccharide-transporting ATPase", + "3.6.3.40 Teichoic-acid-transporting ATPase", + "3.6.3.41 Heme-transporting ATPase", + "3.6.3.42 Beta-glucan-transporting ATPase", + "3.6.3.43 Peptide-transporting ATPase", + "3.6.3.44 Xenobiotic-transporting ATPase", + "3.6.3.46 Cadmium-transporting ATPase", + "3.6.3.47 Fatty-acyl-CoA-transporting ATPase", + "3.6.3.48 Alpha-factor-transporting ATPase", + "3.6.3.49 Channel-conductance-controlling ATPase", + "3.6.3.50 Protein-secreting ATPase", + "3.6.3.51 Mitochondrial protein-transporting ATPase", + "3.6.3.52 Chloroplast protein-transporting ATPase", + "3.6.3.53 Ag(+)-exporting ATPase", + "3.6.3.n1 Cu(+) exporting ATPase", + "3.6.4.1 Myosin ATPase", + "3.6.4.2 Dynein ATPase", + "3.6.4.3 Microtubule-severing ATPase", + "3.6.4.4 Plus-end-directed kinesin ATPase", + "3.6.4.5 Minus-end-directed kinesin ATPase", + "3.6.4.6 Vesicle-fusing ATPase", + "3.6.4.7 Peroxisome-assembly ATPase", + "3.6.4.8 Proteasome ATPase", + "3.6.4.9 Chaperonin ATPase", + "3.6.4.10 Non-chaperonin molecular chaperone ATPase", + "3.6.4.11 Nucleoplasmin ATPase", + "3.6.4.12 DNA helicase", + "3.6.4.13 RNA helicase", + "3.6.5.1 Heterotrimeric G-protein GTPase", + "3.6.5.2 Small monomeric GTPase", + "3.6.5.3 Protein-synthesizing GTPase", + "3.6.5.4 Signal-recognition-particle GTPase", + "3.6.5.5 Dynamin GTPase", + "3.6.5.6 Tubulin GTPase", + "3.6.5.n1 Elongation factor 4", + "3.7.1.1 Oxaloacetase", + "3.7.1.2 Fumarylacetoacetase", + "3.7.1.3 Kynureninase", + "3.7.1.4 Phloretin hydrolase", + "3.7.1.5 Acylpyruvate hydrolase", + "3.7.1.6 Acetylpyruvate hydrolase", + "3.7.1.7 Beta-diketone hydrolase", + "3.7.1.8 2,6-dioxo-6-phenylhexa-3-enoate hydrolase", + "3.7.1.9 2-hydroxymuconate-semialdehyde hydrolase", + "3.7.1.10 Cyclohexane-1,3-dione hydrolase", + "3.7.1.11 Cyclohexane-1,2-dione hydrolase", + "3.7.1.12 Cobalt-precorrin 5A hydrolase", + "3.7.1.13 2-hydroxy-6-oxo-6-(2-aminophenyl)hexa-2,4-dienoate hydrolase", + "3.7.1.n1 2-hydroxy-6-oxonona-2,4-dienedioate hydrolase", + "3.7.1.n2 3,5/4-trihydroxycyclohexa-1,2-dione hydrolase", + "3.8.1.1 Alkylhalidase", + "3.8.1.2 (S)-2-haloacid dehalogenase", + "3.8.1.3 Haloacetate dehalogenase", + "3.8.1.5 Haloalkane dehalogenase", + "3.8.1.6 4-chlorobenzoate dehalogenase", + "3.8.1.7 4-chlorobenzoyl-CoA dehalogenase", + "3.8.1.8 Atrazine chlorohydrolase", + "3.8.1.9 (R)-2-haloacid dehalogenase", + "3.8.1.10 2-haloacid dehalogenase (configuration-inverting)", + "3.8.1.11 2-haloacid dehalogenase (configuration-retaining)", + "3.9.1.1 Phosphoamidase", + "3.10.1.1 N-sulfoglucosamine sulfohydrolase", + "3.10.1.2 Cyclamate sulfohydrolase", + "3.11.1.1 Phosphonoacetaldehyde hydrolase", + "3.11.1.2 Phosphonoacetate hydrolase", + "3.11.1.3 Phosphonopyruvate hydrolase", + "3.12.1.1 Trithionate hydrolase", + "3.13.1.1 UDP-sulfoquinovose synthase", + "3.13.1.3 2'-hydroxybiphenyl-2-sulfinate desulfinase", + "4.1.1.1 Pyruvate decarboxylase", + "4.1.1.2 Oxalate decarboxylase", + "4.1.1.3 Oxaloacetate decarboxylase", + "4.1.1.4 Acetoacetate decarboxylase", + "4.1.1.5 Acetolactate decarboxylase", + "4.1.1.6 Aconitate decarboxylase", + "4.1.1.7 Benzoylformate decarboxylase", + "4.1.1.8 Oxalyl-CoA decarboxylase", + "4.1.1.9 Malonyl-CoA decarboxylase", + "4.1.1.11 Aspartate 1-decarboxylase", + "4.1.1.12 Aspartate 4-decarboxylase", + "4.1.1.14 Valine decarboxylase", + "4.1.1.15 Glutamate decarboxylase", + "4.1.1.16 Hydroxyglutamate decarboxylase", + "4.1.1.17 Ornithine decarboxylase", + "4.1.1.18 Lysine decarboxylase", + "4.1.1.19 Arginine decarboxylase", + "4.1.1.20 Diaminopimelate decarboxylase", + "4.1.1.21 Phosphoribosylaminoimidazole carboxylase", + "4.1.1.22 Histidine decarboxylase", + "4.1.1.23 Orotidine-5'-phosphate decarboxylase", + "4.1.1.24 Aminobenzoate decarboxylase", + "4.1.1.25 Tyrosine decarboxylase", + "4.1.1.28 Aromatic-L-amino-acid decarboxylase", + "4.1.1.29 Sulfinoalanine decarboxylase", + "4.1.1.30 Pantothenoylcysteine decarboxylase", + "4.1.1.31 Phosphoenolpyruvate carboxylase", + "4.1.1.32 Phosphoenolpyruvate carboxykinase (GTP)", + "4.1.1.33 Diphosphomevalonate decarboxylase", + "4.1.1.34 Dehydro-L-gulonate decarboxylase", + "4.1.1.35 UDP-glucuronate decarboxylase", + "4.1.1.36 Phosphopantothenoylcysteine decarboxylase", + "4.1.1.37 Uroporphyrinogen decarboxylase", + "4.1.1.38 Phosphoenolpyruvate carboxykinase (diphosphate)", + "4.1.1.39 Ribulose-bisphosphate carboxylase", + "4.1.1.40 Hydroxypyruvate decarboxylase", + "4.1.1.41 Methylmalonyl-CoA decarboxylase", + "4.1.1.42 Carnitine decarboxylase", + "4.1.1.43 Phenylpyruvate decarboxylase", + "4.1.1.44 4-carboxymuconolactone decarboxylase", + "4.1.1.45 Aminocarboxymuconate-semialdehyde decarboxylase", + "4.1.1.46 o-pyrocatechuate decarboxylase", + "4.1.1.47 Tartronate-semialdehyde synthase", + "4.1.1.48 Indole-3-glycerol-phosphate synthase", + "4.1.1.49 Phosphoenolpyruvate carboxykinase (ATP)", + "4.1.1.50 Adenosylmethionine decarboxylase", + "4.1.1.51 3-hydroxy-2-methylpyridine-4,5-dicarboxylate 4-decarboxylase", + "4.1.1.52 6-methylsalicylate decarboxylase", + "4.1.1.53 Phenylalanine decarboxylase", + "4.1.1.54 Dihydroxyfumarate decarboxylase", + "4.1.1.55 4,5-dihydroxyphthalate decarboxylase", + "4.1.1.56 3-oxolaurate decarboxylase", + "4.1.1.57 Methionine decarboxylase", + "4.1.1.58 Orsellinate decarboxylase", + "4.1.1.59 Gallate decarboxylase", + "4.1.1.60 Stipitatonate decarboxylase", + "4.1.1.61 4-hydroxybenzoate decarboxylase", + "4.1.1.62 Gentisate decarboxylase", + "4.1.1.63 Protocatechuate decarboxylase", + "4.1.1.64 2,2-dialkylglycine decarboxylase (pyruvate)", + "4.1.1.65 Phosphatidylserine decarboxylase", + "4.1.1.66 Uracil-5-carboxylate decarboxylase", + "4.1.1.67 UDP-galacturonate decarboxylase", + "4.1.1.68 5-oxopent-3-ene-1,2,5-tricarboxylate decarboxylase", + "4.1.1.69 3,4-dihydroxyphthalate decarboxylase", + "4.1.1.70 Glutaconyl-CoA decarboxylase", + "4.1.1.71 2-oxoglutarate decarboxylase", + "4.1.1.72 Branched-chain-2-oxoacid decarboxylase", + "4.1.1.73 Tartrate decarboxylase", + "4.1.1.74 Indolepyruvate decarboxylase", + "4.1.1.75 5-guanidino-2-oxopentanoate decarboxylase", + "4.1.1.76 Arylmalonate decarboxylase", + "4.1.1.77 4-oxalocrotonate decarboxylase", + "4.1.1.78 Acetylenedicarboxylate decarboxylase", + "4.1.1.79 Sulfopyruvate decarboxylase", + "4.1.1.80 4-hydroxyphenylpyruvate decarboxylase", + "4.1.1.81 Threonine-phosphate decarboxylase", + "4.1.1.82 Phosphonopyruvate decarboxylase", + "4.1.1.83 4-hydroxyphenylacetate decarboxylase", + "4.1.1.84 D-dopachrome decarboxylase", + "4.1.1.85 3-dehydro-L-gulonate-6-phosphate decarboxylase", + "4.1.1.86 Diaminobutyrate decarboxylase", + "4.1.1.87 Malonyl-S-ACP decarboxylase", + "4.1.1.88 Biotin-independent malonate decarboxylase", + "4.1.1.89 Biotin-dependent malonate decarboxylase", + "4.1.1.90 Peptidyl-glutamate 4-carboxylase", + "4.1.2.2 Ketotetrose-phosphate aldolase", + "4.1.2.4 Deoxyribose-phosphate aldolase", + "4.1.2.5 Threonine aldolase", + "4.1.2.8 Indole-3-glycerol-phosphate lyase", + "4.1.2.9 Phosphoketolase", + "4.1.2.10 (R)-mandelonitrile lyase", + "4.1.2.11 Hydroxymandelonitrile lyase", + "4.1.2.12 2-dehydropantoate aldolase", + "4.1.2.13 Fructose-bisphosphate aldolase", + "4.1.2.14 2-dehydro-3-deoxy-phosphogluconate aldolase", + "4.1.2.17 L-fuculose-phosphate aldolase", + "4.1.2.18 2-dehydro-3-deoxy-L-pentonate aldolase", + "4.1.2.19 Rhamnulose-1-phosphate aldolase", + "4.1.2.20 2-dehydro-3-deoxyglucarate aldolase", + "4.1.2.21 2-dehydro-3-deoxy-6-phosphogalactonate aldolase", + "4.1.2.22 Fructose-6-phosphate phosphoketolase", + "4.1.2.23 3-deoxy-D-manno-octulosonate aldolase", + "4.1.2.24 Dimethylaniline-N-oxide aldolase", + "4.1.2.25 Dihydroneopterin aldolase", + "4.1.2.26 Phenylserine aldolase", + "4.1.2.27 Sphinganine-1-phosphate aldolase", + "4.1.2.28 2-dehydro-3-deoxy-D-pentonate aldolase", + "4.1.2.29 5-dehydro-2-deoxyphosphogluconate aldolase", + "4.1.2.30 17-alpha-hydroxyprogesterone aldolase", + "4.1.2.32 Trimethylamine-oxide aldolase", + "4.1.2.33 Fucosterol-epoxide lyase", + "4.1.2.34 4-(2-carboxyphenyl)-2-oxobut-3-enoate aldolase", + "4.1.2.35 Propioin synthase", + "4.1.2.36 Lactate aldolase", + "4.1.2.38 Benzoin aldolase", + "4.1.2.40 Tagatose-bisphosphate aldolase", + "4.1.2.41 Vanillin synthase", + "4.1.2.42 D-threonine aldolase", + "4.1.2.43 3-hexulose-6-phosphate synthase", + "4.1.2.44 Benzoyl-CoA-dihydrodiol lyase", + "4.1.2.45 Trans-o-hydroxybenzylidenepyruvate hydratase-aldolase", + "4.1.2.46 Aliphatic (R)-hydroxynitrile lyase", + "4.1.2.47 (S)-hydroxynitrile lyase", + "4.1.2.n2 2-hydroxyphytanoyl-CoA lyase", + "4.1.2.n3 2-keto-3-deoxy-L-rhamnonate aldolase", + "4.1.2.n4 4-hydroxy-2-oxo-heptane-1,7-dioate aldolase", + "4.1.2.n5 2-amino-3,7-dideoxy-D-threo-hept-6-ulosonate synthase", + "4.1.3.1 Isocitrate lyase", + "4.1.3.3 N-acetylneuraminate lyase", + "4.1.3.4 Hydroxymethylglutaryl-CoA lyase", + "4.1.3.6 Citrate (pro-3S)-lyase", + "4.1.3.13 Oxalomalate lyase", + "4.1.3.14 L-erythro-3-hydroxyaspartate aldolase", + "4.1.3.16 4-hydroxy-2-oxoglutarate aldolase", + "4.1.3.17 4-hydroxy-4-methyl-2-oxoglutarate aldolase", + "4.1.3.22 Citramalate lyase", + "4.1.3.24 Malyl-CoA lyase", + "4.1.3.25 Citramalyl-CoA lyase", + "4.1.3.26 3-hydroxy-3-isohexenylglutaryl-CoA lyase", + "4.1.3.27 Anthranilate synthase", + "4.1.3.30 Methylisocitrate lyase", + "4.1.3.32 2,3-dimethylmalate lyase", + "4.1.3.34 Citryl-CoA lyase", + "4.1.3.35 (1-hydroxycyclohexan-1-yl)acetyl-CoA lyase", + "4.1.3.36 1,4-dihydroxy-2-naphthoyl-CoA synthase", + "4.1.3.38 Aminodeoxychorismate lyase", + "4.1.3.39 4-hydroxy-2-oxovalerate aldolase", + "4.1.3.40 Chorismate lyase", + "4.1.3.41 3-hydroxy-D-aspartate aldolase", + "4.1.99.1 Tryptophanase", + "4.1.99.2 Tyrosine phenol-lyase", + "4.1.99.3 Deoxyribodipyrimidine photo-lyase", + "4.1.99.5 Octadecanal decarbonylase", + "4.1.99.11 Benzylsuccinate synthase", + "4.1.99.12 3,4-dihydroxy-2-butanone-4-phosphate synthase", + "4.1.99.13 (6-4)DNA photolyase", + "4.1.99.14 Spore photoproduct lyase", + "4.2.1.1 Carbonate dehydratase", + "4.2.1.2 Fumarate hydratase", + "4.2.1.3 Aconitate hydratase", + "4.2.1.4 Citrate dehydratase", + "4.2.1.5 Arabinonate dehydratase", + "4.2.1.6 Galactonate dehydratase", + "4.2.1.7 Altronate dehydratase", + "4.2.1.8 Mannonate dehydratase", + "4.2.1.9 Dihydroxy-acid dehydratase", + "4.2.1.10 3-dehydroquinate dehydratase", + "4.2.1.11 Phosphopyruvate hydratase", + "4.2.1.12 Phosphogluconate dehydratase", + "4.2.1.17 Enoyl-CoA hydratase", + "4.2.1.18 Methylglutaconyl-CoA hydratase", + "4.2.1.19 Imidazoleglycerol-phosphate dehydratase", + "4.2.1.20 Tryptophan synthase", + "4.2.1.22 Cystathionine beta-synthase", + "4.2.1.24 Porphobilinogen synthase", + "4.2.1.25 L-arabinonate dehydratase", + "4.2.1.27 Acetylenecarboxylate hydratase", + "4.2.1.28 Propanediol dehydratase", + "4.2.1.30 Glycerol dehydratase", + "4.2.1.31 Maleate hydratase", + "4.2.1.32 L(+)-tartrate dehydratase", + "4.2.1.33 3-isopropylmalate dehydratase", + "4.2.1.34 (S)-2-methylmalate dehydratase", + "4.2.1.35 (R)-2-methylmalate dehydratase", + "4.2.1.36 Homoaconitate hydratase", + "4.2.1.39 Gluconate dehydratase", + "4.2.1.40 Glucarate dehydratase", + "4.2.1.41 5-dehydro-4-deoxyglucarate dehydratase", + "4.2.1.42 Galactarate dehydratase", + "4.2.1.43 2-dehydro-3-deoxy-L-arabinonate dehydratase", + "4.2.1.44 Myo-inosose-2 dehydratase", + "4.2.1.45 CDP-glucose 4,6-dehydratase", + "4.2.1.46 dTDP-glucose 4,6-dehydratase", + "4.2.1.47 GDP-mannose 4,6-dehydratase", + "4.2.1.48 D-glutamate cyclase", + "4.2.1.49 Urocanate hydratase", + "4.2.1.50 Pyrazolylalanine synthase", + "4.2.1.51 Prephenate dehydratase", + "4.2.1.52 Dihydrodipicolinate synthase", + "4.2.1.53 Oleate hydratase", + "4.2.1.54 Lactoyl-CoA dehydratase", + "4.2.1.55 3-hydroxybutyryl-CoA dehydratase", + "4.2.1.56 Itaconyl-CoA hydratase", + "4.2.1.57 Isohexenylglutaconyl-CoA hydratase", + "4.2.1.58 Crotonoyl-[acyl-carrier-protein] hydratase", + "4.2.1.59 3-hydroxyoctanoyl-[acyl-carrier-protein] dehydratase", + "4.2.1.60 3-hydroxydecanoyl-[acyl-carrier-protein] dehydratase", + "4.2.1.61 3-hydroxypalmitoyl-[acyl-carrier-protein] dehydratase", + "4.2.1.62 5-alpha-hydroxysteroid dehydratase", + "4.2.1.65 3-cyanoalanine hydratase", + "4.2.1.66 Cyanide hydratase", + "4.2.1.67 D-fuconate dehydratase", + "4.2.1.68 L-fuconate dehydratase", + "4.2.1.69 Cyanamide hydratase", + "4.2.1.70 Pseudouridylate synthase", + "4.2.1.73 Protoaphin-aglucone dehydratase (cyclizing)", + "4.2.1.74 Long-chain-enoyl-CoA hydratase", + "4.2.1.75 Uroporphyrinogen-III synthase", + "4.2.1.76 UDP-glucose 4,6-dehydratase", + "4.2.1.77 Trans-L-3-hydroxyproline dehydratase", + "4.2.1.78 (S)-norcoclaurine synthase", + "4.2.1.79 2-methylcitrate dehydratase", + "4.2.1.80 2-oxopent-4-enoate hydratase", + "4.2.1.81 D(-)-tartrate dehydratase", + "4.2.1.82 Xylonate dehydratase", + "4.2.1.83 4-oxalmesaconate hydratase", + "4.2.1.84 Nitrile hydratase", + "4.2.1.85 Dimethylmaleate hydratase", + "4.2.1.87 Octopamine dehydratase", + "4.2.1.88 Synephrine dehydratase", + "4.2.1.89 Carnitine dehydratase", + "4.2.1.90 L-rhamnonate dehydratase", + "4.2.1.91 Arogenate dehydratase", + "4.2.1.92 Hydroperoxide dehydratase", + "4.2.1.93 ATP-dependent NAD(P)H-hydrate dehydratase", + "4.2.1.94 Scytalone dehydratase", + "4.2.1.95 Kievitone hydratase", + "4.2.1.96 4a-hydroxytetrahydrobiopterin dehydratase", + "4.2.1.97 Phaseollidin hydratase", + "4.2.1.98 16-alpha-hydroxyprogesterone dehydratase", + "4.2.1.99 2-methylisocitrate dehydratase", + "4.2.1.100 Cyclohexa-1,5-dienecarbonyl-CoA hydratase", + "4.2.1.101 Trans-feruloyl-CoA hydratase", + "4.2.1.103 Cyclohexyl-isocyanide hydratase", + "4.2.1.104 Cyanase", + "4.2.1.105 2-hydroxyisoflavanone dehydratase", + "4.2.1.106 Bile-acid 7-alpha-dehydratase", + "4.2.1.107 3-alpha,7-alpha,12-alpha-trihydroxy-5-beta-cholest-24-enoyl-CoA hydratase", + "4.2.1.108 Ectoine synthase", + "4.2.1.109 Methylthioribulose 1-phosphate dehydratase", + "4.2.1.110 Aldos-2-ulose dehydratase", + "4.2.1.111 1,5-anhydro-D-fructose dehydratase", + "4.2.1.112 Acetylene hydratase", + "4.2.1.113 o-succinylbenzoate synthase", + "4.2.1.114 Methanogen homoaconitase", + "4.2.1.115 UDP-N-acetylglucosamine 4,6-dehydratase (inverting)", + "4.2.1.116 3-hydroxypropionyl-CoA dehydratase", + "4.2.1.117 2-methylcitrate dehydratase (2-methyl-trans-aconitate forming)", + "4.2.1.118 3-dehydroshikimate dehydratase", + "4.2.1.119 Enoyl-CoA hydratase 2", + "4.2.1.120 4-hydroxybutanoyl-CoA dehydratase", + "4.2.1.121 Colneleate synthase", + "4.2.1.n1 N-acetylmuramic acid 6-phosphate etherase", + "4.2.2.1 Hyaluronate lyase", + "4.2.2.2 Pectate lyase", + "4.2.2.3 Poly(beta-D-mannuronate) lyase", + "4.2.2.5 Chondroitin AC lyase", + "4.2.2.6 Oligogalacturonide lyase", + "4.2.2.7 Heparin lyase", + "4.2.2.8 Heparin-sulfate lyase", + "4.2.2.9 Pectate disaccharide-lyase", + "4.2.2.10 Pectin lyase", + "4.2.2.11 Poly(alpha-L-guluronate) lyase", + "4.2.2.12 Xanthan lyase", + "4.2.2.13 Exo-(1->4)-alpha-D-glucan lyase", + "4.2.2.14 Glucuronan lyase", + "4.2.2.15 Anhydrosialidase", + "4.2.2.16 Levan fructotransferase (DFA-IV-forming)", + "4.2.2.17 Inulin fructotransferase (DFA-I-forming)", + "4.2.2.18 Inulin fructotransferase (DFA-III-forming)", + "4.2.2.19 Chondroitin B lyase", + "4.2.2.20 Chondroitin-sulfate-ABC endolyase", + "4.2.2.21 Chondroitin-sulfate-ABC exolyase", + "4.2.2.22 Pectate trisaccharide-lyase", + "4.2.2.n1 Peptidoglycan lytic exotransglycosylase", + "4.2.2.n2 Peptidoglycan lytic endotransglycosylase", + "4.2.3.1 Threonine synthase", + "4.2.3.2 Ethanolamine-phosphate phospho-lyase", + "4.2.3.3 Methylglyoxal synthase", + "4.2.3.4 3-dehydroquinate synthase", + "4.2.3.5 Chorismate synthase", + "4.2.3.6 Trichodiene synthase", + "4.2.3.7 Pentalenene synthase", + "4.2.3.8 Casbene synthase", + "4.2.3.9 Aristolochene synthase", + "4.2.3.10 (-)-endo-fenchol synthase", + "4.2.3.11 Sabinene-hydrate synthase", + "4.2.3.12 6-pyruvoyltetrahydropterin synthase", + "4.2.3.13 (+)-delta-cadinene synthase", + "4.2.3.14 Pinene synthase", + "4.2.3.15 Myrcene synthase", + "4.2.3.16 (4S)-limonene synthase", + "4.2.3.17 Taxadiene synthase", + "4.2.3.18 Abietadiene synthase", + "4.2.3.19 Ent-kaurene synthase", + "4.2.3.20 (R)-limonene synthase", + "4.2.3.21 Vetispiradiene synthase", + "4.2.3.22 Germacradienol synthase", + "4.2.3.23 Germacrene-A synthase", + "4.2.3.24 Amorpha-4,11-diene synthase", + "4.2.3.25 S-linalool synthase", + "4.2.3.26 R-linalool synthase", + "4.2.3.27 Isoprene synthase", + "4.2.3.28 Ent-cassa-12,15-diene synthase", + "4.2.3.29 Ent-sandaracopimaradiene synthase", + "4.2.3.30 Ent-pimara-8(14),15-diene synthase", + "4.2.3.31 Ent-pimara-9(11),15-diene synthase", + "4.2.3.32 Levopimaradiene synthase", + "4.2.3.33 Stemar-13-ene synthase", + "4.2.3.34 Stemod-13(17)-ene synthase", + "4.2.3.35 Syn-pimara-7,15-diene synthase", + "4.2.3.36 Terpentetriene synthase", + "4.2.3.37 Epi-isozizaene synthase", + "4.2.3.38 Alpha-bisabolene synthase", + "4.2.3.39 Epi-cedrol synthase", + "4.2.3.40 (Z)-gamma-bisabolene synthase", + "4.2.3.41 Elisabethatriene synthase", + "4.2.3.42 Aphidicolan-16-beta-ol synthase", + "4.2.3.43 Fusicocca-2,10(14)-diene synthase", + "4.2.3.44 Isopimara-7,15-diene synthase", + "4.2.3.45 Phyllocladan-16-alpha-ol synthase", + "4.2.3.46 Alpha-farnesene synthase", + "4.2.3.47 Beta-farnesene synthase", + "4.2.3.48 (3S,6E)-nerolidol synthase", + "4.2.3.49 (3R,6E)-nerolidol synthase", + "4.2.3.50 (+)-alpha-santalene synthase ((2Z,6Z)-farnesyl diphosphate cyclizing)", + "4.2.3.51 Beta-phellandrene synthase (neryl-diphosphate-cyclizing)", + "4.2.3.52 (4S)-beta-phellandrene synthase (geranyl-diphosphate-cyclizing)", + "4.2.3.53 (+)-endo-beta-bergamotene synthase ((2Z,6Z)-farnesyl diphosphate cyclizing)", + "4.2.3.54 (-)-endo-alpha-bergamotene synthase ((2Z,6Z)-farnesyl diphosphate cyclizing)", + "4.2.3.55 (S)-beta-bisabolene synthase", + "4.2.3.56 Gamma-humulene synthase", + "4.2.3.57 Beta-caryophyllene synthase", + "4.2.3.58 Longifolene synthase", + "4.2.3.59 (E)-gamma-bisabolene synthase", + "4.2.3.60 Germacrene C synthase", + "4.2.3.n2 Delta-selinene synthase", + "4.2.3.n4 (-)-camphene synthase", + "4.2.3.n6 Terpinolene synthase", + "4.2.3.n7 (-)-(S)-limonene/(-)-alpha-pinene synthase", + "4.2.3.n8 Ent-isokaurene synthase", + "4.2.3.n12 Zingiberene synthase", + "4.2.3.n14 2-methylisoborneol synthase", + "4.2.99.12 Carboxymethyloxysuccinate lyase", + "4.2.99.18 DNA-(apurinic or apyrimidinic site) lyase", + "4.2.99.20 2-succinyl-6-hydroxy-2,4-cyclohexadiene-1-carboxylate synthase", + "4.2.99.21 Isochorismate lyase", + "4.3.1.1 Aspartate ammonia-lyase", + "4.3.1.2 Methylaspartate ammonia-lyase", + "4.3.1.3 Histidine ammonia-lyase", + "4.3.1.4 Formimidoyltetrahydrofolate cyclodeaminase", + "4.3.1.6 Beta-alanyl-CoA ammonia-lyase", + "4.3.1.7 Ethanolamine ammonia-lyase", + "4.3.1.9 Glucosaminate ammonia-lyase", + "4.3.1.10 Serine-sulfate ammonia-lyase", + "4.3.1.12 Ornithine cyclodeaminase", + "4.3.1.13 Carbamoyl-serine ammonia-lyase", + "4.3.1.14 3-aminobutyryl-CoA ammonia-lyase", + "4.3.1.15 Diaminopropionate ammonia-lyase", + "4.3.1.16 Threo-3-hydroxy-L-aspartate ammonia-lyase", + "4.3.1.17 L-serine ammonia-lyase", + "4.3.1.18 D-serine ammonia-lyase", + "4.3.1.19 Threonine ammonia-lyase", + "4.3.1.20 Erythro-3-hydroxy-L-aspartate ammonia-lyase", + "4.3.1.22 3,4-dihydroxyphenylalanine reductive deaminase", + "4.3.1.23 Tyrosine ammonia-lyase", + "4.3.1.24 Phenylalanine ammonia-lyase", + "4.3.1.25 Phenylalanine/tyrosine ammonia-lyase", + "4.3.1.26 Chromopyrrolate synthase", + "4.3.1.27 Threo-3-hydroxy-D-aspartate ammonia-lyase", + "4.3.2.1 Argininosuccinate lyase", + "4.3.2.2 Adenylosuccinate lyase", + "4.3.2.3 Ureidoglycolate lyase", + "4.3.2.4 Purine imidazole-ring cyclase", + "4.3.2.5 Peptidylamidoglycolate lyase", + "4.3.3.1 3-ketovalidoxylamine C-N-lyase", + "4.3.3.2 Strictosidine synthase", + "4.3.3.3 Deacetylisoipecoside synthase", + "4.3.3.4 Deacetylipecoside synthase", + "4.3.3.5 4'-demethylrebeccamycin synthase", + "4.3.99.2 Carboxybiotin decarboxylase", + "4.4.1.1 Cystathionine gamma-lyase", + "4.4.1.2 Homocysteine desulfhydrase", + "4.4.1.3 Dimethylpropiothetin dethiomethylase", + "4.4.1.4 Alliin lyase", + "4.4.1.5 Lactoylglutathione lyase", + "4.4.1.6 S-alkylcysteine lyase", + "4.4.1.8 Cystathionine beta-lyase", + "4.4.1.9 L-3-cyanoalanine synthase", + "4.4.1.10 Cysteine lyase", + "4.4.1.11 Methionine gamma-lyase", + "4.4.1.13 Cysteine-S-conjugate beta-lyase", + "4.4.1.14 1-aminocyclopropane-1-carboxylate synthase", + "4.4.1.15 D-cysteine desulfhydrase", + "4.4.1.16 Selenocysteine lyase", + "4.4.1.17 Holocytochrome-c synthase", + "4.4.1.19 Phosphosulfolactate synthase", + "4.4.1.20 Leukotriene-C(4) synthase", + "4.4.1.21 S-ribosylhomocysteine lyase", + "4.4.1.22 S-(hydroxymethyl)glutathione synthase", + "4.4.1.23 2-hydroxypropyl-CoM lyase", + "4.4.1.24 Sulfolactate sulfo-lyase", + "4.4.1.25 L-cysteate sulfo-lyase", + "4.5.1.1 DDT-dehydrochlorinase", + "4.5.1.2 3-chloro-D-alanine dehydrochlorinase", + "4.5.1.3 Dichloromethane dehalogenase", + "4.5.1.4 L-2-amino-4-chloropent-4-enoate dehydrochlorinase", + "4.5.1.5 S-carboxymethylcysteine synthase", + "4.6.1.1 Adenylate cyclase", + "4.6.1.2 Guanylate cyclase", + "4.6.1.6 Cytidylate cyclase", + "4.6.1.12 2-C-methyl-D-erythritol 2,4-cyclodiphosphate synthase", + "4.6.1.13 Phosphatidylinositol diacylglycerol-lyase", + "4.6.1.14 Glycosylphosphatidylinositol diacylglycerol-lyase", + "4.6.1.15 FAD-AMP lyase (cyclizing)", + "4.99.1.1 Ferrochelatase", + "4.99.1.2 Alkylmercury lyase", + "4.99.1.3 Sirohydrochlorin cobaltochelatase", + "4.99.1.4 Sirohydrochlorin ferrochelatase", + "4.99.1.5 Aliphatic aldoxime dehydratase", + "4.99.1.6 Indoleacetaldoxime dehydratase", + "4.99.1.7 Phenylacetaldoxime dehydratase", + "4.99.1.8 Heme ligase", + "5.1.1.1 Alanine racemase", + "5.1.1.2 Methionine racemase", + "5.1.1.3 Glutamate racemase", + "5.1.1.4 Proline racemase", + "5.1.1.5 Lysine racemase", + "5.1.1.6 Threonine racemase", + "5.1.1.7 Diaminopimelate epimerase", + "5.1.1.8 4-hydroxyproline epimerase", + "5.1.1.9 Arginine racemase", + "5.1.1.10 Amino-acid racemase", + "5.1.1.11 Phenylalanine racemase (ATP-hydrolyzing)", + "5.1.1.12 Ornithine racemase", + "5.1.1.13 Aspartate racemase", + "5.1.1.14 Nocardicin-A epimerase", + "5.1.1.15 2-aminohexano-6-lactam racemase", + "5.1.1.16 Protein-serine epimerase", + "5.1.1.17 Isopenicillin-N epimerase", + "5.1.1.18 Serine racemase", + "5.1.2.1 Lactate racemase", + "5.1.2.2 Mandelate racemase", + "5.1.2.3 3-hydroxybutyryl-CoA epimerase", + "5.1.2.4 Acetoin racemase", + "5.1.2.5 Tartrate epimerase", + "5.1.2.6 Isocitrate epimerase", + "5.1.3.1 Ribulose-phosphate 3-epimerase", + "5.1.3.2 UDP-glucose 4-epimerase", + "5.1.3.3 Aldose 1-epimerase", + "5.1.3.4 L-ribulose-5-phosphate 4-epimerase", + "5.1.3.5 UDP-arabinose 4-epimerase", + "5.1.3.6 UDP-glucuronate 4-epimerase", + "5.1.3.7 UDP-N-acetylglucosamine 4-epimerase", + "5.1.3.8 N-acylglucosamine 2-epimerase", + "5.1.3.9 N-acylglucosamine-6-phosphate 2-epimerase", + "5.1.3.10 CDP-paratose 2-epimerase", + "5.1.3.11 Cellobiose epimerase", + "5.1.3.12 UDP-glucuronate 5'-epimerase", + "5.1.3.13 dTDP-4-dehydrorhamnose 3,5-epimerase", + "5.1.3.14 UDP-N-acetylglucosamine 2-epimerase", + "5.1.3.15 Glucose-6-phosphate 1-epimerase", + "5.1.3.16 UDP-glucosamine 4-epimerase", + "5.1.3.17 Heparosan-N-sulfate-glucuronate 5-epimerase", + "5.1.3.18 GDP-mannose 3,5-epimerase", + "5.1.3.19 Chondroitin-glucuronate 5-epimerase", + "5.1.3.20 ADP-glyceromanno-heptose 6-epimerase", + "5.1.3.21 Maltose epimerase", + "5.1.3.22 L-ribulose-5-phosphate 3-epimerase", + "5.1.3.23 UDP-2,3-diacetamido-2,3-dideoxyglucuronic acid 2-epimerase", + "5.1.3.n1 Sialic acid epimerase", + "5.1.3.n2 L-fucose mutarotase", + "5.1.3.n3 L-rhamnose mutarotase", + "5.1.99.1 Methylmalonyl-CoA epimerase", + "5.1.99.2 16-hydroxysteroid epimerase", + "5.1.99.3 Allantoin racemase", + "5.1.99.4 Alpha-methylacyl-CoA racemase", + "5.1.99.5 Hydantoin racemase", + "5.2.1.1 Maleate isomerase", + "5.2.1.2 Maleylacetoacetate isomerase", + "5.2.1.3 Retinal isomerase", + "5.2.1.4 Maleylpyruvate isomerase", + "5.2.1.5 Linoleate isomerase", + "5.2.1.6 Furylfuramide isomerase", + "5.2.1.7 Retinol isomerase", + "5.2.1.8 Peptidylprolyl isomerase", + "5.2.1.9 Farnesol 2-isomerase", + "5.2.1.10 2-chloro-4-carboxymethylenebut-2-en-1,4-olide isomerase", + "5.3.1.1 Triose-phosphate isomerase", + "5.3.1.3 Arabinose isomerase", + "5.3.1.4 L-arabinose isomerase", + "5.3.1.5 Xylose isomerase", + "5.3.1.6 Ribose-5-phosphate isomerase", + "5.3.1.7 Mannose isomerase", + "5.3.1.8 Mannose-6-phosphate isomerase", + "5.3.1.9 Glucose-6-phosphate isomerase", + "5.3.1.12 Glucuronate isomerase", + "5.3.1.13 Arabinose-5-phosphate isomerase", + "5.3.1.14 L-rhamnose isomerase", + "5.3.1.15 D-lyxose ketol-isomerase", + "5.3.1.16 1-(5-phosphoribosyl)-5-((5-phosphoribosylamino)methylideneamino)imidazole-4-carboxamide isomerase", + "5.3.1.17 4-deoxy-L-threo-5-hexosulose-uronate ketol-isomerase", + "5.3.1.20 Ribose isomerase", + "5.3.1.21 Corticosteroid side-chain-isomerase", + "5.3.1.22 Hydroxypyruvate isomerase", + "5.3.1.23 S-methyl-5-thioribose-1-phosphate isomerase", + "5.3.1.24 Phosphoribosylanthranilate isomerase", + "5.3.1.25 L-fucose isomerase", + "5.3.1.26 Galactose-6-phosphate isomerase", + "5.3.1.27 6-phospho-3-hexuloisomerase", + "5.3.1.28 D-sedoheptulose 7-phosphate isomerase", + "5.3.1.n1 5-deoxy-glucuronate isomerase", + "5.3.2.1 Phenylpyruvate tautomerase", + "5.3.2.2 Oxaloacetate tautomerase", + "5.3.2.n1 2,3-diketo-5-methylthiopentyl-1-phosphate enolase", + "5.3.3.1 Steroid Delta-isomerase", + "5.3.3.2 Isopentenyl-diphosphate Delta-isomerase", + "5.3.3.3 Vinylacetyl-CoA Delta-isomerase", + "5.3.3.4 Muconolactone Delta-isomerase", + "5.3.3.5 Cholestenol Delta-isomerase", + "5.3.3.6 Methylitaconate Delta-isomerase", + "5.3.3.7 Aconitate Delta-isomerase", + "5.3.3.8 Dodecenoyl-CoA isomerase", + "5.3.3.9 Prostaglandin-A(1) Delta-isomerase", + "5.3.3.10 5-carboxymethyl-2-hydroxymuconate Delta-isomerase", + "5.3.3.11 Isopiperitenone Delta-isomerase", + "5.3.3.12 L-dopachrome isomerase", + "5.3.3.13 Polyenoic fatty acid isomerase", + "5.3.3.14 Trans-2-decenoyl-[acyl-carrier-protein] isomerase", + "5.3.3.15 Ascopyrone tautomerase", + "5.3.4.1 Protein disulfide-isomerase", + "5.3.99.2 Prostaglandin-D synthase", + "5.3.99.3 Prostaglandin-E synthase", + "5.3.99.4 Prostaglandin-I synthase", + "5.3.99.5 Thromboxane-A synthase", + "5.3.99.6 Allene-oxide cyclase", + "5.3.99.7 Styrene-oxide isomerase", + "5.3.99.8 Capsanthin/capsorubin synthase", + "5.3.99.9 Neoxanthin synthase", + "5.3.99.n1 2-keto-myo-inositol isomerase", + "5.4.1.1 Lysolecithin acylmutase", + "5.4.1.2 Precorrin-8X methylmutase", + "5.4.2.1 Phosphoglycerate mutase", + "5.4.2.2 Phosphoglucomutase", + "5.4.2.3 Phosphoacetylglucosamine mutase", + "5.4.2.4 Bisphosphoglycerate mutase", + "5.4.2.5 Phosphoglucomutase (glucose-cofactor)", + "5.4.2.6 Beta-phosphoglucomutase", + "5.4.2.7 Phosphopentomutase", + "5.4.2.8 Phosphomannomutase", + "5.4.2.9 Phosphoenolpyruvate mutase", + "5.4.2.10 Phosphoglucosamine mutase", + "5.4.3.2 Lysine 2,3-aminomutase", + "5.4.3.3 Beta-lysine 5,6-aminomutase", + "5.4.3.4 D-lysine 5,6-aminomutase", + "5.4.3.5 D-ornithine 4,5-aminomutase", + "5.4.3.6 Tyrosine 2,3-aminomutase", + "5.4.3.7 Leucine 2,3-aminomutase", + "5.4.3.8 Glutamate-1-semialdehyde 2,1-aminomutase", + "5.4.4.1 (Hydroxyamino)benzene mutase", + "5.4.4.2 Isochorismate synthase", + "5.4.4.3 3-(hydroxyamino)phenol mutase", + "5.4.99.1 Methylaspartate mutase", + "5.4.99.2 Methylmalonyl-CoA mutase", + "5.4.99.3 2-acetolactate mutase", + "5.4.99.4 2-methyleneglutarate mutase", + "5.4.99.5 Chorismate mutase", + "5.4.99.7 Lanosterol synthase", + "5.4.99.8 Cycloartenol synthase", + "5.4.99.9 UDP-galactopyranose mutase", + "5.4.99.11 Isomaltulose synthase", + "5.4.99.12 tRNA pseudouridine(38-40) synthase", + "5.4.99.13 Isobutyryl-CoA mutase", + "5.4.99.14 4-carboxymethyl-4-methylbutenolide mutase", + "5.4.99.15 (1->4)-alpha-D-glucan 1-alpha-D-glucosylmutase", + "5.4.99.16 Maltose alpha-D-glucosyltransferase", + "5.4.99.17 Squalene--hopene cyclase", + "5.4.99.18 5-(carboxyamino)imidazole ribonucleotide mutase", + "5.4.99.19 16S rRNA pseudouridine(516) synthase", + "5.4.99.20 23S rRNA pseudouridine(2457) synthase", + "5.4.99.21 23S rRNA pseudouridine(2604) synthase", + "5.4.99.22 23S rRNA pseudouridine(2605) synthase", + "5.4.99.23 23S rRNA pseudouridine(1911/1915/1917) synthase", + "5.4.99.24 23S rRNA pseudouridine(955/2504/2580) synthase", + "5.4.99.25 tRNA pseudouridine(55) synthase", + "5.4.99.26 tRNA pseudouridine(65) synthase", + "5.4.99.27 tRNA pseudouridine(13) synthase", + "5.4.99.28 tRNA pseudouridine(32) synthase", + "5.4.99.29 23S rRNA pseudouridine(746) synthase", + "5.4.99.30 UDP-arabinopyranose mutase", + "5.5.1.1 Muconate cycloisomerase", + "5.5.1.2 3-carboxy-cis,cis-muconate cycloisomerase", + "5.5.1.3 Tetrahydroxypteridine cycloisomerase", + "5.5.1.4 Inositol-3-phosphate synthase", + "5.5.1.5 Carboxy-cis,cis-muconate cyclase", + "5.5.1.6 Chalcone isomerase", + "5.5.1.7 Chloromuconate cycloisomerase", + "5.5.1.8 Bornyl diphosphate synthase", + "5.5.1.9 Cycloeucalenol cycloisomerase", + "5.5.1.10 Alpha-pinene-oxide decyclase", + "5.5.1.11 Dichloromuconate cycloisomerase", + "5.5.1.12 Copalyl diphosphate synthase", + "5.5.1.13 Ent-copalyl diphosphate synthase", + "5.5.1.14 Syn-copalyl-diphosphate synthase", + "5.5.1.15 Terpentedienyl-diphosphate synthase", + "5.5.1.16 Halimadienyl-diphosphate synthase", + "5.5.1.17 (S)-beta-macrocarpene synthase", + "5.5.1.n1 D-ribose pyranase", + "5.99.1.1 Thiocyanate isomerase", + "5.99.1.2 DNA topoisomerase", + "5.99.1.3 DNA topoisomerase (ATP-hydrolyzing)", + "5.99.1.4 2-hydroxychromene-2-carboxylate isomerase", + "6.1.1.1 Tyrosine--tRNA ligase", + "6.1.1.2 Tryptophan--tRNA ligase", + "6.1.1.3 Threonine--tRNA ligase", + "6.1.1.4 Leucine--tRNA ligase", + "6.1.1.5 Isoleucine--tRNA ligase", + "6.1.1.6 Lysine--tRNA ligase", + "6.1.1.7 Alanine--tRNA ligase", + "6.1.1.9 Valine--tRNA ligase", + "6.1.1.10 Methionine--tRNA ligase", + "6.1.1.11 Serine--tRNA ligase", + "6.1.1.12 Aspartate--tRNA ligase", + "6.1.1.13 D-alanine--poly(phosphoribitol) ligase", + "6.1.1.14 Glycine--tRNA ligase", + "6.1.1.15 Proline--tRNA ligase", + "6.1.1.16 Cysteine--tRNA ligase", + "6.1.1.17 Glutamate--tRNA ligase", + "6.1.1.18 Glutamine--tRNA ligase", + "6.1.1.19 Arginine--tRNA ligase", + "6.1.1.20 Phenylalanine--tRNA ligase", + "6.1.1.21 Histidine--tRNA ligase", + "6.1.1.22 Asparagine--tRNA ligase", + "6.1.1.23 Aspartate--tRNA(Asn) ligase", + "6.1.1.24 Glutamate--tRNA(Gln) ligase", + "6.1.1.25 Lysine--tRNA(Pyl) ligase", + "6.1.1.26 Pyrrolysine--tRNA(Pyl) ligase", + "6.1.1.27 O-phosphoserine--tRNA ligase", + "6.1.2.1 D-alanine--(R)-lactate ligase", + "6.2.1.1 Acetate--CoA ligase", + "6.2.1.2 Butyrate--CoA ligase", + "6.2.1.3 Long-chain-fatty-acid--CoA ligase", + "6.2.1.4 Succinate--CoA ligase (GDP-forming)", + "6.2.1.5 Succinate--CoA ligase (ADP-forming)", + "6.2.1.6 Glutarate--CoA ligase", + "6.2.1.7 Cholate--CoA ligase", + "6.2.1.8 Oxalate--CoA ligase", + "6.2.1.9 Malate--CoA ligase", + "6.2.1.10 Acid--CoA ligase (GDP-forming)", + "6.2.1.11 Biotin--CoA ligase", + "6.2.1.12 4-coumarate--CoA ligase", + "6.2.1.13 Acetate--CoA ligase (ADP-forming)", + "6.2.1.14 6-carboxyhexanoate--CoA ligase", + "6.2.1.15 Arachidonate--CoA ligase", + "6.2.1.16 Acetoacetate--CoA ligase", + "6.2.1.17 Propionate--CoA ligase", + "6.2.1.18 Citrate--CoA ligase", + "6.2.1.19 Long-chain-fatty-acid--luciferin-component ligase", + "6.2.1.20 Long-chain-fatty-acid--[acyl-carrier-protein] ligase", + "6.2.1.22 [Citrate (pro-3S)-lyase] ligase", + "6.2.1.23 Dicarboxylate--CoA ligase", + "6.2.1.24 Phytanate--CoA ligase", + "6.2.1.25 Benzoate--CoA ligase", + "6.2.1.26 o-succinylbenzoate--CoA ligase", + "6.2.1.27 4-hydroxybenzoate--CoA ligase", + "6.2.1.28 3-alpha,7-alpha-dihydroxy-5-beta-cholestanate--CoA ligase", + "6.2.1.30 Phenylacetate--CoA ligase", + "6.2.1.31 2-furoate--CoA ligase", + "6.2.1.32 Anthranilate--CoA ligase", + "6.2.1.33 4-chlorobenzoate--CoA ligase", + "6.2.1.34 Trans-feruloyl-CoA synthase", + "6.2.1.35 ACP-SH:acetate ligase", + "6.2.1.36 3-hydroxypropionyl-CoA synthase", + "6.2.1.n1 3-hydroxybenzoate--CoA ligase", + "6.3.1.1 Aspartate--ammonia ligase", + "6.3.1.2 Glutamate--ammonia ligase", + "6.3.1.4 Aspartate--ammonia ligase (ADP-forming)", + "6.3.1.5 NAD(+) synthase", + "6.3.1.6 Glutamate--ethylamine ligase", + "6.3.1.7 4-methyleneglutamate--ammonia ligase", + "6.3.1.8 Glutathionylspermidine synthase", + "6.3.1.9 Trypanothione synthase", + "6.3.1.10 Adenosylcobinamide-phosphate synthase", + "6.3.1.11 Glutamate--putrescine ligase", + "6.3.1.12 D-aspartate ligase", + "6.3.1.13 L-cysteine:1D-myo-inositol 2-amino-2-deoxy-alpha-D-glucopyranoside ligase", + "6.3.1.14 Diphthine--ammonia ligase", + "6.3.2.1 Pantoate--beta-alanine ligase", + "6.3.2.2 Glutamate--cysteine ligase", + "6.3.2.3 Glutathione synthase", + "6.3.2.4 D-alanine--D-alanine ligase", + "6.3.2.5 Phosphopantothenate--cysteine ligase", + "6.3.2.6 Phosphoribosylaminoimidazolesuccinocarboxamide synthase", + "6.3.2.7 UDP-N-acetylmuramoyl-L-alanyl-D-glutamate--L-lysine ligase", + "6.3.2.8 UDP-N-acetylmuramate--L-alanine ligase", + "6.3.2.9 UDP-N-acetylmuramoyl-L-alanine--D-glutamate ligase", + "6.3.2.10 UDP-N-acetylmuramoyl-tripeptide--D-alanyl-D-alanine ligase", + "6.3.2.11 Carnosine synthase", + "6.3.2.12 Dihydrofolate synthase", + "6.3.2.13 UDP-N-acetylmuramoyl-L-alanyl-D-glutamate--2,6-diaminopimelate ligase", + "6.3.2.14 2,3-dihydroxybenzoate--serine ligase", + "6.3.2.16 D-alanine--alanyl-poly(glycerolphosphate) ligase", + "6.3.2.17 Tetrahydrofolate synthase", + "6.3.2.18 Gamma-glutamylhistamine synthase", + "6.3.2.19 Ubiquitin--protein ligase", + "6.3.2.20 Indoleacetate--lysine synthetase", + "6.3.2.21 Ubiquitin--calmodulin ligase", + "6.3.2.23 Homoglutathione synthase", + "6.3.2.24 Tyrosine--arginine ligase", + "6.3.2.25 Tubulin--tyrosine ligase", + "6.3.2.26 N-(5-amino-5-carboxypentanoyl)-L-cysteinyl-D-valine synthase", + "6.3.2.27 Aerobactin synthase", + "6.3.2.28 L-amino-acid alpha-ligase", + "6.3.2.29 Cyanophycin synthase (L-aspartate-adding)", + "6.3.2.30 Cyanophycin synthase (L-arginine-adding)", + "6.3.2.31 Coenzyme F420-0:L-glutamate ligase", + "6.3.2.32 Coenzyme gamma-F420-2:alpha-L-glutamate ligase", + "6.3.2.33 Tetrahydrosarcinapterin synthase", + "6.3.2.34 Coenzyme F420-1:gamma-L-glutamate ligase", + "6.3.2.35 D-alanine--D-serine ligase", + "6.3.2.36 4-phosphopantoate--beta-alanine ligase", + "6.3.2.n1 UDP-N-acetylmuramoyl-L-alanyl-D-glutamate--D-lysine ligase", + "6.3.2.n2 Pup--protein ligase", + "6.3.2.n3 ISG15--protein ligase", + "6.3.2.n4 Alpha-aminoadipate--LysW ligase", + "6.3.2.n5 Pantoate--beta-alanine ligase (ADP-forming)", + "6.3.3.1 Phosphoribosylformylglycinamidine cyclo-ligase", + "6.3.3.2 5-formyltetrahydrofolate cyclo-ligase", + "6.3.3.3 Dethiobiotin synthase", + "6.3.3.4 (Carboxyethyl)arginine beta-lactam-synthase", + "6.3.4.1 GMP synthase", + "6.3.4.2 CTP synthase", + "6.3.4.3 Formate--tetrahydrofolate ligase", + "6.3.4.4 Adenylosuccinate synthase", + "6.3.4.5 Argininosuccinate synthase", + "6.3.4.6 Urea carboxylase", + "6.3.4.7 Ribose-5-phosphate--ammonia ligase", + "6.3.4.8 Imidazoleacetate--phosphoribosyldiphosphate ligase", + "6.3.4.9 Biotin--[methylmalonyl-CoA-carboxytransferase] ligase", + "6.3.4.10 Biotin--[propionyl-CoA-carboxylase (ATP-hydrolyzing)] ligase", + "6.3.4.11 Biotin--[methylcrotonoyl-CoA-carboxylase] ligase", + "6.3.4.12 Glutamate--methylamine ligase", + "6.3.4.13 Phosphoribosylamine--glycine ligase", + "6.3.4.14 Biotin carboxylase", + "6.3.4.15 Biotin--[acetyl-CoA-carboxylase] ligase", + "6.3.4.16 Carbamoyl-phosphate synthase (ammonia)", + "6.3.4.17 Formate--dihydrofolate ligase", + "6.3.4.18 5-(carboxyamino)imidazole ribonucleotide synthase", + "6.3.5.1 NAD(+) synthase (glutamine-hydrolyzing)", + "6.3.5.2 GMP synthase (glutamine-hydrolyzing)", + "6.3.5.3 Phosphoribosylformylglycinamidine synthase", + "6.3.5.4 Asparagine synthase (glutamine-hydrolyzing)", + "6.3.5.5 Carbamoyl-phosphate synthase (glutamine-hydrolyzing)", + "6.3.5.6 Asparaginyl-tRNA synthase (glutamine-hydrolyzing)", + "6.3.5.7 Glutaminyl-tRNA synthase (glutamine-hydrolyzing)", + "6.3.5.9 Hydrogenobyrinic acid a,c-diamide synthase (glutamine-hydrolyzing)", + "6.3.5.10 Adenosylcobyric acid synthase (glutamine-hydrolyzing)", + "6.3.5.11 Cobyrinate a,c-diamide synthase (glutamine-hydrolyzing)", + "6.4.1.1 Pyruvate carboxylase", + "6.4.1.2 Acetyl-CoA carboxylase", + "6.4.1.3 Propionyl-CoA carboxylase", + "6.4.1.4 Methylcrotonoyl-CoA carboxylase", + "6.4.1.5 Geranoyl-CoA carboxylase", + "6.4.1.6 Acetone carboxylase", + "6.4.1.7 2-oxoglutarate carboxylase", + "6.5.1.1 DNA ligase (ATP)", + "6.5.1.2 DNA ligase (NAD(+))", + "6.5.1.3 RNA ligase (ATP)", + "6.5.1.4 RNA-3'-phosphate cyclase", + "6.6.1.1 Magnesium chelatase", + "6.6.1.2 Cobaltochelatase" }; diff --git a/api/explore.h b/api/explore.h index b7f2ea72..ee000c11 100644 --- a/api/explore.h +++ b/api/explore.h @@ -29,7 +29,7 @@ * * Version Creation Date: 6/30/98 * -* $Revision: 6.55 $ +* $Revision: 6.57 $ * * File Description: Reengineered and optimized exploration functions * to be used for future code @@ -212,6 +212,11 @@ NLM_EXTERN GeneRefPtr LIBCALL SeqMgrGetGeneXref ( SeqFeatPtr sfp ); +NLM_EXTERN GeneRefPtr LIBCALL SeqMgrGetGeneXrefEx ( + SeqFeatPtr sfp, + ObjectIdPtr PNTR oipP +); + NLM_EXTERN Boolean LIBCALL SeqMgrGeneIsSuppressed ( GeneRefPtr grp ); @@ -571,6 +576,17 @@ NLM_EXTERN SeqFeatPtr LIBCALL SeqMgrGetOverlappingFeature ( SeqMgrFeatContext PNTR context ); +NLM_EXTERN SeqFeatPtr LIBCALL SeqMgrGetOverlappingFeatureEx ( + SeqLocPtr slp, + Uint2 subtype, + VoidPtr featarray, + Int4 numfeats, + Int4Ptr position, + Int2 overlapType, + SeqMgrFeatContext PNTR context, + Boolean special +); + NLM_EXTERN SeqFeatPtr LIBCALL SeqMgrGetFeatureInIndex ( BioseqPtr bsp, VoidPtr featarray, diff --git a/api/gather.c b/api/gather.c index 1100f519..baef0f11 100644 --- a/api/gather.c +++ b/api/gather.c @@ -29,7 +29,7 @@ * * Version Creation Date: 10/7/94 * -* $Revision: 6.56 $ +* $Revision: 6.57 $ * * File Description: * @@ -181,6 +181,11 @@ NLM_EXTERN Boolean SeqLocOffset (SeqLocPtr seq_loc, SeqLocPtr sfp_loc, GatherRan return FALSE; } + if( sfp_loc->choice == SEQLOC_NULL || + sfp_loc->choice == SEQLOC_EMPTY ) + { + return FALSE; + } if(ck_extreme(sfp_loc, &across_zero)) { diff --git a/api/gbftdef.h b/api/gbftdef.h index c91d0d3e..8c20fe02 100644 --- a/api/gbftdef.h +++ b/api/gbftdef.h @@ -121,13 +121,14 @@ #define GBQUAL_artificial_location 111 #define GBQUAL_non_functional 112 #define GBQUAL_pseudogene 113 +#define GBQUAL_mobile_element_type 114 -#define ParFlat_TOTAL_GBQUAL 114 +#define ParFlat_TOTAL_GBQUAL 115 #define ParFlat_TOTAL_IntOr 3 #define ParFlat_TOTAL_LRB 3 #define ParFlat_TOTAL_Exp 2 #define ParFlat_TOTAL_Rpt 7 -#define ParFlat_TOTAL_GBFEAT 69 +#define ParFlat_TOTAL_GBFEAT 70 #define Class_pos_aa 1 #define Class_text 2 diff --git a/api/gbftglob.c b/api/gbftglob.c index e7863a5a..80a140c4 100644 --- a/api/gbftglob.c +++ b/api/gbftglob.c @@ -65,8 +65,8 @@ static GbFeatName STATIC__ParFlat_GBQual_names[ParFlat_TOTAL_GBQUAL] = { {"tag_peptide", Class_text}, { "mating_type", Class_text}, {"satellite", Class_text}, { "gene_synonym", Class_text}, {"UniProtKB_evidence", Class_text}, {"haplogroup", Class_text}, - {"artificial_location", Class_none}, {"non_functional", Class_none}, - {"pseudogene", Class_none} + {"artificial_location", Class_text}, {"non_functional", Class_none}, + {"pseudogene", Class_none}, {"mobile_element_type", Class_text} }; NLM_EXTERN GbFeatNamePtr x_ParFlat_GBQual_names(void) { @@ -95,7 +95,6 @@ static SematicFeat STATIC__ParFlat_GBFeat[ParFlat_TOTAL_GBFEAT] = { GBQUAL_gene, GBQUAL_gene_synonym, GBQUAL_inference, - GBQUAL_label, GBQUAL_locus_tag, GBQUAL_map, GBQUAL_note, @@ -121,7 +120,6 @@ static SematicFeat STATIC__ParFlat_GBFeat[ParFlat_TOTAL_GBFEAT] = { GBQUAL_gene, GBQUAL_gene_synonym, GBQUAL_inference, - GBQUAL_label, GBQUAL_locus_tag, GBQUAL_map, GBQUAL_note, @@ -145,7 +143,6 @@ static SematicFeat STATIC__ParFlat_GBFeat[ParFlat_TOTAL_GBFEAT] = { GBQUAL_gene, GBQUAL_gene_synonym, GBQUAL_inference, - GBQUAL_label, GBQUAL_locus_tag, GBQUAL_map, GBQUAL_non_functional, @@ -172,7 +169,6 @@ static SematicFeat STATIC__ParFlat_GBFeat[ParFlat_TOTAL_GBFEAT] = { GBQUAL_gene, GBQUAL_gene_synonym, GBQUAL_inference, - GBQUAL_label, GBQUAL_locus_tag, GBQUAL_map, GBQUAL_note, @@ -201,7 +197,6 @@ static SematicFeat STATIC__ParFlat_GBFeat[ParFlat_TOTAL_GBFEAT] = { GBQUAL_gene, GBQUAL_gene_synonym, GBQUAL_inference, - GBQUAL_label, GBQUAL_locus_tag, GBQUAL_map, GBQUAL_non_functional, @@ -224,7 +219,7 @@ static SematicFeat STATIC__ParFlat_GBFeat[ParFlat_TOTAL_GBFEAT] = { -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}}, - {"conflict", 1, { + {"conflict", 1, { GBQUAL_citation, -1, -1, -1, -1}, 14, { GBQUAL_allele, @@ -247,7 +242,7 @@ static SematicFeat STATIC__ParFlat_GBFeat[ParFlat_TOTAL_GBFEAT] = { -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}}, - {"D-loop", 0, {-1, -1, -1, -1, -1}, 15, + {"D-loop", 0, {-1, -1, -1, -1, -1}, 15, { GBQUAL_allele, GBQUAL_citation, @@ -257,7 +252,6 @@ static SematicFeat STATIC__ParFlat_GBFeat[ParFlat_TOTAL_GBFEAT] = { GBQUAL_gene, GBQUAL_gene_synonym, GBQUAL_inference, - GBQUAL_label, GBQUAL_locus_tag, GBQUAL_map, GBQUAL_note, @@ -279,7 +273,6 @@ static SematicFeat STATIC__ParFlat_GBFeat[ParFlat_TOTAL_GBFEAT] = { GBQUAL_gene, GBQUAL_gene_synonym, GBQUAL_inference, - GBQUAL_label, GBQUAL_locus_tag, GBQUAL_map, GBQUAL_non_functional, @@ -296,7 +289,7 @@ static SematicFeat STATIC__ParFlat_GBFeat[ParFlat_TOTAL_GBFEAT] = { -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}}, - {"enhancer", 0, {-1, -1, -1, -1, -1}, 17, + {"enhancer", 0, {-1, -1, -1, -1, -1}, 17, { GBQUAL_allele, GBQUAL_bound_moiety, @@ -307,7 +300,6 @@ static SematicFeat STATIC__ParFlat_GBFeat[ParFlat_TOTAL_GBFEAT] = { GBQUAL_gene, GBQUAL_gene_synonym, GBQUAL_inference, - GBQUAL_label, GBQUAL_locus_tag, GBQUAL_map, GBQUAL_note, @@ -332,7 +324,6 @@ static SematicFeat STATIC__ParFlat_GBFeat[ParFlat_TOTAL_GBFEAT] = { GBQUAL_gene, GBQUAL_gene_synonym, GBQUAL_inference, - GBQUAL_label, GBQUAL_locus_tag, GBQUAL_map, GBQUAL_non_functional, @@ -350,7 +341,7 @@ static SematicFeat STATIC__ParFlat_GBFeat[ParFlat_TOTAL_GBFEAT] = { -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}}, - {"gap", 1, { + {"gap", 1, { GBQUAL_estimated_length, -1, -1, -1, -1}, 5, { GBQUAL_evidence, @@ -374,7 +365,6 @@ static SematicFeat STATIC__ParFlat_GBFeat[ParFlat_TOTAL_GBFEAT] = { GBQUAL_gene, GBQUAL_gene_synonym, GBQUAL_inference, - GBQUAL_label, GBQUAL_locus_tag, GBQUAL_map, GBQUAL_note, @@ -397,7 +387,6 @@ static SematicFeat STATIC__ParFlat_GBFeat[ParFlat_TOTAL_GBFEAT] = { GBQUAL_gene, GBQUAL_gene_synonym, GBQUAL_inference, - GBQUAL_label, GBQUAL_locus_tag, GBQUAL_map, GBQUAL_non_functional, @@ -427,7 +416,6 @@ static SematicFeat STATIC__ParFlat_GBFeat[ParFlat_TOTAL_GBFEAT] = { GBQUAL_gene, GBQUAL_gene_synonym, GBQUAL_inference, - GBQUAL_label, GBQUAL_locus_tag, GBQUAL_map, GBQUAL_note, @@ -453,7 +441,6 @@ static SematicFeat STATIC__ParFlat_GBFeat[ParFlat_TOTAL_GBFEAT] = { GBQUAL_gene, GBQUAL_gene_synonym, GBQUAL_inference, - GBQUAL_label, GBQUAL_locus_tag, GBQUAL_map, GBQUAL_non_functional, @@ -480,7 +467,6 @@ static SematicFeat STATIC__ParFlat_GBFeat[ParFlat_TOTAL_GBFEAT] = { GBQUAL_gene, GBQUAL_gene_synonym, GBQUAL_inference, - GBQUAL_label, GBQUAL_locus_tag, GBQUAL_map, GBQUAL_non_functional, @@ -508,7 +494,6 @@ static SematicFeat STATIC__ParFlat_GBFeat[ParFlat_TOTAL_GBFEAT] = { GBQUAL_gene, GBQUAL_gene_synonym, GBQUAL_inference, - GBQUAL_label, GBQUAL_locus_tag, GBQUAL_note, GBQUAL_old_locus_tag, @@ -520,7 +505,7 @@ static SematicFeat STATIC__ParFlat_GBFeat[ParFlat_TOTAL_GBFEAT] = { -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}}, - {"mat_peptide", 0, {-1, -1, -1, -1, -1}, 25, + {"mat_peptide", 0, {-1, -1, -1, -1, -1}, 25, { GBQUAL_allele, GBQUAL_citation, @@ -535,7 +520,6 @@ static SematicFeat STATIC__ParFlat_GBFeat[ParFlat_TOTAL_GBFEAT] = { GBQUAL_gene_synonym, GBQUAL_inference, GBQUAL_inference, - GBQUAL_label, GBQUAL_locus_tag, GBQUAL_map, GBQUAL_non_functional, @@ -551,7 +535,7 @@ static SematicFeat STATIC__ParFlat_GBFeat[ParFlat_TOTAL_GBFEAT] = { -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}}, - {"misc_binding", 1, { + {"misc_binding", 1, { GBQUAL_bound_moiety, -1, -1, -1, -1}, 16, { GBQUAL_allele, @@ -563,7 +547,6 @@ static SematicFeat STATIC__ParFlat_GBFeat[ParFlat_TOTAL_GBFEAT] = { GBQUAL_gene, GBQUAL_gene_synonym, GBQUAL_inference, - GBQUAL_label, GBQUAL_locus_tag, GBQUAL_map, GBQUAL_note, @@ -587,7 +570,6 @@ static SematicFeat STATIC__ParFlat_GBFeat[ParFlat_TOTAL_GBFEAT] = { GBQUAL_gene, GBQUAL_gene_synonym, GBQUAL_inference, - GBQUAL_label, GBQUAL_locus_tag, GBQUAL_map, GBQUAL_note, @@ -602,7 +584,7 @@ static SematicFeat STATIC__ParFlat_GBFeat[ParFlat_TOTAL_GBFEAT] = { -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}}, - {"misc_feature", 0, {-1, -1, -1, -1, -1}, 23, + {"misc_feature", 0, {-1, -1, -1, -1, -1}, 23, { GBQUAL_allele, GBQUAL_citation, @@ -613,7 +595,6 @@ static SematicFeat STATIC__ParFlat_GBFeat[ParFlat_TOTAL_GBFEAT] = { GBQUAL_gene, GBQUAL_gene_synonym, GBQUAL_inference, - GBQUAL_label, GBQUAL_locus_tag, GBQUAL_map, GBQUAL_non_functional, @@ -642,7 +623,6 @@ static SematicFeat STATIC__ParFlat_GBFeat[ParFlat_TOTAL_GBFEAT] = { GBQUAL_gene, GBQUAL_gene_synonym, GBQUAL_inference, - GBQUAL_label, GBQUAL_locus_tag, GBQUAL_map, GBQUAL_note, @@ -655,7 +635,7 @@ static SematicFeat STATIC__ParFlat_GBFeat[ParFlat_TOTAL_GBFEAT] = { -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}}, - {"misc_RNA", 0, {-1, -1, -1, -1, -1}, 23, + {"misc_RNA", 0, {-1, -1, -1, -1, -1}, 23, { GBQUAL_allele, GBQUAL_citation, @@ -666,7 +646,6 @@ static SematicFeat STATIC__ParFlat_GBFeat[ParFlat_TOTAL_GBFEAT] = { GBQUAL_gene, GBQUAL_gene_synonym, GBQUAL_inference, - GBQUAL_label, GBQUAL_locus_tag, GBQUAL_map, GBQUAL_non_functional, @@ -685,7 +664,7 @@ static SematicFeat STATIC__ParFlat_GBFeat[ParFlat_TOTAL_GBFEAT] = { -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}}, - {"misc_signal", 0, {-1, -1, -1, -1, -1}, 19, + {"misc_signal", 0, {-1, -1, -1, -1, -1}, 19, { GBQUAL_allele, GBQUAL_citation, @@ -696,7 +675,6 @@ static SematicFeat STATIC__ParFlat_GBFeat[ParFlat_TOTAL_GBFEAT] = { GBQUAL_gene, GBQUAL_gene_synonym, GBQUAL_inference, - GBQUAL_label, GBQUAL_locus_tag, GBQUAL_map, GBQUAL_note, @@ -711,7 +689,7 @@ static SematicFeat STATIC__ParFlat_GBFeat[ParFlat_TOTAL_GBFEAT] = { -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}}, - {"misc_structure", 0, {-1, -1, -1, -1, -1}, 17, + {"misc_structure", 0, {-1, -1, -1, -1, -1}, 17, { GBQUAL_allele, GBQUAL_citation, @@ -722,7 +700,6 @@ static SematicFeat STATIC__ParFlat_GBFeat[ParFlat_TOTAL_GBFEAT] = { GBQUAL_gene, GBQUAL_gene_synonym, GBQUAL_inference, - GBQUAL_label, GBQUAL_locus_tag, GBQUAL_map, GBQUAL_note, @@ -735,7 +712,7 @@ static SematicFeat STATIC__ParFlat_GBFeat[ParFlat_TOTAL_GBFEAT] = { -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}}, - {"modified_base", 1, { + {"modified_base", 1, { GBQUAL_mod_base, -1, -1, -1, -1}, 15, { GBQUAL_allele, @@ -747,7 +724,6 @@ static SematicFeat STATIC__ParFlat_GBFeat[ParFlat_TOTAL_GBFEAT] = { GBQUAL_gene, GBQUAL_gene_synonym, GBQUAL_inference, - GBQUAL_label, GBQUAL_locus_tag, GBQUAL_map, GBQUAL_note, @@ -758,7 +734,7 @@ static SematicFeat STATIC__ParFlat_GBFeat[ParFlat_TOTAL_GBFEAT] = { -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}}, - {"mRNA", 0, {-1, -1, -1, -1, -1}, 25, + {"mRNA", 0, {-1, -1, -1, -1, -1}, 25, { GBQUAL_allele, GBQUAL_artificial_location, @@ -770,7 +746,6 @@ static SematicFeat STATIC__ParFlat_GBFeat[ParFlat_TOTAL_GBFEAT] = { GBQUAL_gene, GBQUAL_gene_synonym, GBQUAL_inference, - GBQUAL_label, GBQUAL_locus_tag, GBQUAL_map, GBQUAL_non_functional, @@ -789,7 +764,7 @@ static SematicFeat STATIC__ParFlat_GBFeat[ParFlat_TOTAL_GBFEAT] = { -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}}, - {"mutation", 0, {-1, -1, -1, -1, -1}, 18, + {"mutation", 0, {-1, -1, -1, -1, -1}, 18, { GBQUAL_citation, GBQUAL_db_xref, @@ -799,7 +774,6 @@ static SematicFeat STATIC__ParFlat_GBFeat[ParFlat_TOTAL_GBFEAT] = { GBQUAL_gene, GBQUAL_gene_synonym, GBQUAL_inference, - GBQUAL_label, GBQUAL_locus_tag, GBQUAL_map, GBQUAL_note, @@ -814,7 +788,7 @@ static SematicFeat STATIC__ParFlat_GBFeat[ParFlat_TOTAL_GBFEAT] = { -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}}, - {"N_region", 0, {-1, -1, -1, -1, -1}, 18, + {"N_region", 0, {-1, -1, -1, -1, -1}, 18, { GBQUAL_citation, GBQUAL_db_xref, @@ -823,7 +797,6 @@ static SematicFeat STATIC__ParFlat_GBFeat[ParFlat_TOTAL_GBFEAT] = { GBQUAL_gene, GBQUAL_gene_synonym, GBQUAL_inference, - GBQUAL_label, GBQUAL_locus_tag, GBQUAL_map, GBQUAL_non_functional, @@ -839,7 +812,7 @@ static SematicFeat STATIC__ParFlat_GBFeat[ParFlat_TOTAL_GBFEAT] = { -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}}, - {"ncRNA", 1, { + {"ncRNA", 1, { GBQUAL_ncRNA_class, -1, -1, -1, -1}, 23, { GBQUAL_allele, @@ -851,7 +824,6 @@ static SematicFeat STATIC__ParFlat_GBFeat[ParFlat_TOTAL_GBFEAT] = { GBQUAL_gene, GBQUAL_gene_synonym, GBQUAL_inference, - GBQUAL_label, GBQUAL_locus_tag, GBQUAL_map, GBQUAL_non_functional, @@ -903,7 +875,6 @@ static SematicFeat STATIC__ParFlat_GBFeat[ParFlat_TOTAL_GBFEAT] = { GBQUAL_experiment, GBQUAL_function, GBQUAL_inference, - GBQUAL_label, GBQUAL_map, GBQUAL_non_functional, GBQUAL_note, @@ -929,7 +900,6 @@ static SematicFeat STATIC__ParFlat_GBFeat[ParFlat_TOTAL_GBFEAT] = { GBQUAL_gene, GBQUAL_gene_synonym, GBQUAL_inference, - GBQUAL_label, GBQUAL_locus_tag, GBQUAL_map, GBQUAL_note, @@ -957,7 +927,6 @@ static SematicFeat STATIC__ParFlat_GBFeat[ParFlat_TOTAL_GBFEAT] = { GBQUAL_gene, GBQUAL_gene_synonym, GBQUAL_inference, - GBQUAL_label, GBQUAL_locus_tag, GBQUAL_map, GBQUAL_note, @@ -979,7 +948,6 @@ static SematicFeat STATIC__ParFlat_GBFeat[ParFlat_TOTAL_GBFEAT] = { GBQUAL_gene, GBQUAL_gene_synonym, GBQUAL_inference, - GBQUAL_label, GBQUAL_locus_tag, GBQUAL_map, GBQUAL_note, @@ -991,7 +959,7 @@ static SematicFeat STATIC__ParFlat_GBFeat[ParFlat_TOTAL_GBFEAT] = { -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}}, - {"precursor_RNA", 0, {-1, -1, -1, -1, -1}, 20, + {"precursor_RNA", 0, {-1, -1, -1, -1, -1}, 20, { GBQUAL_allele, GBQUAL_citation, @@ -1002,7 +970,6 @@ static SematicFeat STATIC__ParFlat_GBFeat[ParFlat_TOTAL_GBFEAT] = { GBQUAL_gene, GBQUAL_gene_synonym, GBQUAL_inference, - GBQUAL_label, GBQUAL_locus_tag, GBQUAL_map, GBQUAL_note, @@ -1018,7 +985,7 @@ static SematicFeat STATIC__ParFlat_GBFeat[ParFlat_TOTAL_GBFEAT] = { -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}}, - {"prim_transcript", 0, {-1, -1, -1, -1, -1}, 18, + {"prim_transcript", 0, {-1, -1, -1, -1, -1}, 18, { GBQUAL_allele, GBQUAL_citation, @@ -1029,7 +996,6 @@ static SematicFeat STATIC__ParFlat_GBFeat[ParFlat_TOTAL_GBFEAT] = { GBQUAL_gene, GBQUAL_gene_synonym, GBQUAL_inference, - GBQUAL_label, GBQUAL_locus_tag, GBQUAL_map, GBQUAL_note, @@ -1043,7 +1009,7 @@ static SematicFeat STATIC__ParFlat_GBFeat[ParFlat_TOTAL_GBFEAT] = { -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}}, - {"primer_bind", 0, {-1, -1, -1, -1, -1}, 17, + {"primer_bind", 0, {-1, -1, -1, -1, -1}, 17, { GBQUAL_allele, GBQUAL_citation, @@ -1053,7 +1019,6 @@ static SematicFeat STATIC__ParFlat_GBFeat[ParFlat_TOTAL_GBFEAT] = { GBQUAL_gene, GBQUAL_gene_synonym, GBQUAL_inference, - GBQUAL_label, GBQUAL_locus_tag, GBQUAL_map, GBQUAL_note, @@ -1067,7 +1032,7 @@ static SematicFeat STATIC__ParFlat_GBFeat[ParFlat_TOTAL_GBFEAT] = { -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}}, - {"promoter", 0, {-1, -1, -1, -1, -1}, 23, + {"promoter", 0, {-1, -1, -1, -1, -1}, 23, { GBQUAL_allele, GBQUAL_bound_moiety, @@ -1079,7 +1044,6 @@ static SematicFeat STATIC__ParFlat_GBFeat[ParFlat_TOTAL_GBFEAT] = { GBQUAL_gene, GBQUAL_gene_synonym, GBQUAL_inference, - GBQUAL_label, GBQUAL_locus_tag, GBQUAL_map, GBQUAL_non_functional, @@ -1097,7 +1061,7 @@ static SematicFeat STATIC__ParFlat_GBFeat[ParFlat_TOTAL_GBFEAT] = { -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}}, - {"protein_bind", 1, { + {"protein_bind", 1, { GBQUAL_bound_moiety, -1, -1, -1, -1}, 18, { GBQUAL_allele, @@ -1109,7 +1073,6 @@ static SematicFeat STATIC__ParFlat_GBFeat[ParFlat_TOTAL_GBFEAT] = { GBQUAL_gene, GBQUAL_gene_synonym, GBQUAL_inference, - GBQUAL_label, GBQUAL_locus_tag, GBQUAL_map, GBQUAL_note, @@ -1123,7 +1086,7 @@ static SematicFeat STATIC__ParFlat_GBFeat[ParFlat_TOTAL_GBFEAT] = { -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}}, - {"RBS", 0, {-1, -1, -1, -1, -1}, 16, + {"RBS", 0, {-1, -1, -1, -1, -1}, 16, { GBQUAL_allele, GBQUAL_citation, @@ -1133,7 +1096,6 @@ static SematicFeat STATIC__ParFlat_GBFeat[ParFlat_TOTAL_GBFEAT] = { GBQUAL_gene, GBQUAL_gene_synonym, GBQUAL_inference, - GBQUAL_label, GBQUAL_locus_tag, GBQUAL_map, GBQUAL_note, @@ -1146,7 +1108,7 @@ static SematicFeat STATIC__ParFlat_GBFeat[ParFlat_TOTAL_GBFEAT] = { -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}}, - {"repeat_region", 0, {-1, -1, -1, -1, -1}, 26, + {"repeat_region", 0, {-1, -1, -1, -1, -1}, 26, { GBQUAL_allele, GBQUAL_citation, @@ -1158,7 +1120,6 @@ static SematicFeat STATIC__ParFlat_GBFeat[ParFlat_TOTAL_GBFEAT] = { GBQUAL_gene_synonym, GBQUAL_inference, GBQUAL_insertion_seq, - GBQUAL_label, GBQUAL_locus_tag, GBQUAL_map, GBQUAL_mobile_element, @@ -1178,7 +1139,7 @@ static SematicFeat STATIC__ParFlat_GBFeat[ParFlat_TOTAL_GBFEAT] = { -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}}, - {"repeat_unit", 0, {-1, -1, -1, -1, -1}, 21, + {"repeat_unit", 0, {-1, -1, -1, -1, -1}, 21, { GBQUAL_allele, GBQUAL_citation, @@ -1189,7 +1150,6 @@ static SematicFeat STATIC__ParFlat_GBFeat[ParFlat_TOTAL_GBFEAT] = { GBQUAL_gene, GBQUAL_gene_synonym, GBQUAL_inference, - GBQUAL_label, GBQUAL_locus_tag, GBQUAL_map, GBQUAL_note, @@ -1206,7 +1166,7 @@ static SematicFeat STATIC__ParFlat_GBFeat[ParFlat_TOTAL_GBFEAT] = { -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}}, - {"rep_origin", 0, {-1, -1, -1, -1, -1}, 17, + {"rep_origin", 0, {-1, -1, -1, -1, -1}, 17, { GBQUAL_allele, GBQUAL_citation, @@ -1217,7 +1177,6 @@ static SematicFeat STATIC__ParFlat_GBFeat[ParFlat_TOTAL_GBFEAT] = { GBQUAL_gene, GBQUAL_gene_synonym, GBQUAL_inference, - GBQUAL_label, GBQUAL_locus_tag, GBQUAL_map, GBQUAL_note, @@ -1230,7 +1189,7 @@ static SematicFeat STATIC__ParFlat_GBFeat[ParFlat_TOTAL_GBFEAT] = { -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}}, - {"rRNA", 0, {-1, -1, -1, -1, -1}, 22, + {"rRNA", 0, {-1, -1, -1, -1, -1}, 22, { GBQUAL_allele, GBQUAL_citation, @@ -1241,7 +1200,6 @@ static SematicFeat STATIC__ParFlat_GBFeat[ParFlat_TOTAL_GBFEAT] = { GBQUAL_gene, GBQUAL_gene_synonym, GBQUAL_inference, - GBQUAL_label, GBQUAL_locus_tag, GBQUAL_map, GBQUAL_non_functional, @@ -1259,7 +1217,7 @@ static SematicFeat STATIC__ParFlat_GBFeat[ParFlat_TOTAL_GBFEAT] = { -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}}, - {"S_region", 0, {-1, -1, -1, -1, -1}, 20, + {"S_region", 0, {-1, -1, -1, -1, -1}, 20, { GBQUAL_allele, GBQUAL_citation, @@ -1269,7 +1227,6 @@ static SematicFeat STATIC__ParFlat_GBFeat[ParFlat_TOTAL_GBFEAT] = { GBQUAL_gene, GBQUAL_gene_synonym, GBQUAL_inference, - GBQUAL_label, GBQUAL_locus_tag, GBQUAL_map, GBQUAL_non_functional, @@ -1286,7 +1243,7 @@ static SematicFeat STATIC__ParFlat_GBFeat[ParFlat_TOTAL_GBFEAT] = { -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}}, - {"satellite", 0, {-1, -1, -1, -1, -1}, 21, + {"satellite", 0, {-1, -1, -1, -1, -1}, 21, { GBQUAL_allele, GBQUAL_citation, @@ -1296,7 +1253,6 @@ static SematicFeat STATIC__ParFlat_GBFeat[ParFlat_TOTAL_GBFEAT] = { GBQUAL_gene, GBQUAL_gene_synonym, GBQUAL_inference, - GBQUAL_label, GBQUAL_locus_tag, GBQUAL_map, GBQUAL_note, @@ -1314,7 +1270,7 @@ static SematicFeat STATIC__ParFlat_GBFeat[ParFlat_TOTAL_GBFEAT] = { -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}}, - {"scRNA", 0, {-1, -1, -1, -1, -1}, 21, + {"scRNA", 0, {-1, -1, -1, -1, -1}, 21, { GBQUAL_allele, GBQUAL_citation, @@ -1325,7 +1281,6 @@ static SematicFeat STATIC__ParFlat_GBFeat[ParFlat_TOTAL_GBFEAT] = { GBQUAL_gene, GBQUAL_gene_synonym, GBQUAL_inference, - GBQUAL_label, GBQUAL_locus_tag, GBQUAL_map, GBQUAL_non_functional, @@ -1353,7 +1308,6 @@ static SematicFeat STATIC__ParFlat_GBFeat[ParFlat_TOTAL_GBFEAT] = { GBQUAL_gene, GBQUAL_gene_synonym, GBQUAL_inference, - GBQUAL_label, GBQUAL_locus_tag, GBQUAL_map, GBQUAL_non_functional, @@ -1370,7 +1324,7 @@ static SematicFeat STATIC__ParFlat_GBFeat[ParFlat_TOTAL_GBFEAT] = { -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}}, - {"snoRNA", 0, {-1, -1, -1, -1, -1}, 21, + {"snoRNA", 0, {-1, -1, -1, -1, -1}, 21, { GBQUAL_allele, GBQUAL_citation, @@ -1381,7 +1335,6 @@ static SematicFeat STATIC__ParFlat_GBFeat[ParFlat_TOTAL_GBFEAT] = { GBQUAL_gene, GBQUAL_gene_synonym, GBQUAL_inference, - GBQUAL_label, GBQUAL_locus_tag, GBQUAL_map, GBQUAL_non_functional, @@ -1398,7 +1351,7 @@ static SematicFeat STATIC__ParFlat_GBFeat[ParFlat_TOTAL_GBFEAT] = { -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}}, - {"snRNA", 0, {-1, -1, -1, -1, -1}, 21, + {"snRNA", 0, {-1, -1, -1, -1, -1}, 21, { GBQUAL_allele, GBQUAL_citation, @@ -1409,7 +1362,6 @@ static SematicFeat STATIC__ParFlat_GBFeat[ParFlat_TOTAL_GBFEAT] = { GBQUAL_gene, GBQUAL_gene_synonym, GBQUAL_inference, - GBQUAL_label, GBQUAL_locus_tag, GBQUAL_map, GBQUAL_non_functional, @@ -1458,7 +1410,6 @@ static SematicFeat STATIC__ParFlat_GBFeat[ParFlat_TOTAL_GBFEAT] = { GBQUAL_isolation_source, GBQUAL_kinetoplast, GBQUAL_lab_host, - GBQUAL_label, GBQUAL_lat_lon, GBQUAL_macronuclear, GBQUAL_map, @@ -1493,7 +1444,7 @@ static SematicFeat STATIC__ParFlat_GBFeat[ParFlat_TOTAL_GBFEAT] = { GBQUAL_virion, GBQUAL_haplogroup, -1, -1}}, - {"stem_loop", 0, {-1, -1, -1, -1, -1}, 18, + {"stem_loop", 0, {-1, -1, -1, -1, -1}, 18, { GBQUAL_allele, GBQUAL_citation, @@ -1504,7 +1455,6 @@ static SematicFeat STATIC__ParFlat_GBFeat[ParFlat_TOTAL_GBFEAT] = { GBQUAL_gene, GBQUAL_gene_synonym, GBQUAL_inference, - GBQUAL_label, GBQUAL_locus_tag, GBQUAL_map, GBQUAL_note, @@ -1518,7 +1468,7 @@ static SematicFeat STATIC__ParFlat_GBFeat[ParFlat_TOTAL_GBFEAT] = { -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}}, - {"STS", 0, {-1, -1, -1, -1, -1}, 16, + {"STS", 0, {-1, -1, -1, -1, -1}, 16, { GBQUAL_allele, GBQUAL_citation, @@ -1528,7 +1478,6 @@ static SematicFeat STATIC__ParFlat_GBFeat[ParFlat_TOTAL_GBFEAT] = { GBQUAL_gene, GBQUAL_gene_synonym, GBQUAL_inference, - GBQUAL_label, GBQUAL_locus_tag, GBQUAL_map, GBQUAL_note, @@ -1541,7 +1490,7 @@ static SematicFeat STATIC__ParFlat_GBFeat[ParFlat_TOTAL_GBFEAT] = { -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}}, - {"TATA_signal", 0, {-1, -1, -1, -1, -1}, 15, + {"TATA_signal", 0, {-1, -1, -1, -1, -1}, 15, { GBQUAL_allele, GBQUAL_citation, @@ -1551,7 +1500,6 @@ static SematicFeat STATIC__ParFlat_GBFeat[ParFlat_TOTAL_GBFEAT] = { GBQUAL_gene, GBQUAL_gene_synonym, GBQUAL_inference, - GBQUAL_label, GBQUAL_locus_tag, GBQUAL_map, GBQUAL_note, @@ -1563,7 +1511,7 @@ static SematicFeat STATIC__ParFlat_GBFeat[ParFlat_TOTAL_GBFEAT] = { -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}}, - {"terminator", 0, {-1, -1, -1, -1, -1}, 17, + {"terminator", 0, {-1, -1, -1, -1, -1}, 17, { GBQUAL_allele, GBQUAL_citation, @@ -1573,7 +1521,6 @@ static SematicFeat STATIC__ParFlat_GBFeat[ParFlat_TOTAL_GBFEAT] = { GBQUAL_gene, GBQUAL_gene_synonym, GBQUAL_inference, - GBQUAL_label, GBQUAL_locus_tag, GBQUAL_map, GBQUAL_note, @@ -1587,7 +1534,7 @@ static SematicFeat STATIC__ParFlat_GBFeat[ParFlat_TOTAL_GBFEAT] = { -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}}, - {"tmRNA", 0, {-1, -1, -1, -1, -1}, 23, + {"tmRNA", 0, {-1, -1, -1, -1, -1}, 23, { GBQUAL_allele, GBQUAL_citation, @@ -1598,7 +1545,6 @@ static SematicFeat STATIC__ParFlat_GBFeat[ParFlat_TOTAL_GBFEAT] = { GBQUAL_gene, GBQUAL_gene_synonym, GBQUAL_inference, - GBQUAL_label, GBQUAL_locus_tag, GBQUAL_map, GBQUAL_non_functional, @@ -1617,7 +1563,7 @@ static SematicFeat STATIC__ParFlat_GBFeat[ParFlat_TOTAL_GBFEAT] = { -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}}, - {"transit_peptide", 0, {-1, -1, -1, -1, -1}, 21, + {"transit_peptide", 0, {-1, -1, -1, -1, -1}, 21, { GBQUAL_allele, GBQUAL_citation, @@ -1628,7 +1574,6 @@ static SematicFeat STATIC__ParFlat_GBFeat[ParFlat_TOTAL_GBFEAT] = { GBQUAL_gene, GBQUAL_gene_synonym, GBQUAL_inference, - GBQUAL_label, GBQUAL_locus_tag, GBQUAL_map, GBQUAL_non_functional, @@ -1645,7 +1590,7 @@ static SematicFeat STATIC__ParFlat_GBFeat[ParFlat_TOTAL_GBFEAT] = { -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}}, - {"tRNA", 0, {-1, -1, -1, -1, -1}, 24, + {"tRNA", 0, {-1, -1, -1, -1, -1}, 24, { GBQUAL_allele, GBQUAL_anticodon, @@ -1657,7 +1602,6 @@ static SematicFeat STATIC__ParFlat_GBFeat[ParFlat_TOTAL_GBFEAT] = { GBQUAL_gene, GBQUAL_gene_synonym, GBQUAL_inference, - GBQUAL_label, GBQUAL_locus_tag, GBQUAL_map, GBQUAL_non_functional, @@ -1676,7 +1620,7 @@ static SematicFeat STATIC__ParFlat_GBFeat[ParFlat_TOTAL_GBFEAT] = { -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}}, - {"unsure", 0, {-1, -1, -1, -1, -1}, 15, + {"unsure", 0, {-1, -1, -1, -1, -1}, 15, { GBQUAL_allele, GBQUAL_citation, @@ -1686,7 +1630,6 @@ static SematicFeat STATIC__ParFlat_GBFeat[ParFlat_TOTAL_GBFEAT] = { GBQUAL_gene, GBQUAL_gene_synonym, GBQUAL_inference, - GBQUAL_label, GBQUAL_locus_tag, GBQUAL_map, GBQUAL_note, @@ -1698,7 +1641,7 @@ static SematicFeat STATIC__ParFlat_GBFeat[ParFlat_TOTAL_GBFEAT] = { -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}}, - {"V_region", 0, {-1, -1, -1, -1, -1}, 20, + {"V_region", 0, {-1, -1, -1, -1, -1}, 20, { GBQUAL_allele, GBQUAL_citation, @@ -1708,7 +1651,6 @@ static SematicFeat STATIC__ParFlat_GBFeat[ParFlat_TOTAL_GBFEAT] = { GBQUAL_gene, GBQUAL_gene_synonym, GBQUAL_inference, - GBQUAL_label, GBQUAL_locus_tag, GBQUAL_map, GBQUAL_non_functional, @@ -1725,7 +1667,7 @@ static SematicFeat STATIC__ParFlat_GBFeat[ParFlat_TOTAL_GBFEAT] = { -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}}, - {"V_segment", 0, {-1, -1, -1, -1, -1}, 20, + {"V_segment", 0, {-1, -1, -1, -1, -1}, 20, { GBQUAL_allele, GBQUAL_citation, @@ -1735,7 +1677,6 @@ static SematicFeat STATIC__ParFlat_GBFeat[ParFlat_TOTAL_GBFEAT] = { GBQUAL_gene, GBQUAL_gene_synonym, GBQUAL_inference, - GBQUAL_label, GBQUAL_locus_tag, GBQUAL_map, GBQUAL_non_functional, @@ -1752,7 +1693,7 @@ static SematicFeat STATIC__ParFlat_GBFeat[ParFlat_TOTAL_GBFEAT] = { -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}}, - {"variation", 0, {-1, -1, -1, -1, -1}, 21, + {"variation", 0, {-1, -1, -1, -1, -1}, 21, { GBQUAL_allele, GBQUAL_citation, @@ -1764,7 +1705,6 @@ static SematicFeat STATIC__ParFlat_GBFeat[ParFlat_TOTAL_GBFEAT] = { GBQUAL_gene, GBQUAL_gene_synonym, GBQUAL_inference, - GBQUAL_label, GBQUAL_locus_tag, GBQUAL_map, GBQUAL_note, @@ -1780,7 +1720,7 @@ static SematicFeat STATIC__ParFlat_GBFeat[ParFlat_TOTAL_GBFEAT] = { -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}}, - {"3'clip", 0, {-1, -1, -1, -1, -1}, 18, + {"3'clip", 0, {-1, -1, -1, -1, -1}, 18, { GBQUAL_allele, GBQUAL_citation, @@ -1791,7 +1731,6 @@ static SematicFeat STATIC__ParFlat_GBFeat[ParFlat_TOTAL_GBFEAT] = { GBQUAL_gene, GBQUAL_gene_synonym, GBQUAL_inference, - GBQUAL_label, GBQUAL_locus_tag, GBQUAL_map, GBQUAL_note, @@ -1805,7 +1744,7 @@ static SematicFeat STATIC__ParFlat_GBFeat[ParFlat_TOTAL_GBFEAT] = { -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}}, - {"3'UTR", 0, {-1, -1, -1, -1, -1}, 18, + {"3'UTR", 0, {-1, -1, -1, -1, -1}, 18, { GBQUAL_allele, GBQUAL_citation, @@ -1816,7 +1755,6 @@ static SematicFeat STATIC__ParFlat_GBFeat[ParFlat_TOTAL_GBFEAT] = { GBQUAL_gene, GBQUAL_gene_synonym, GBQUAL_inference, - GBQUAL_label, GBQUAL_locus_tag, GBQUAL_map, GBQUAL_note, @@ -1830,7 +1768,7 @@ static SematicFeat STATIC__ParFlat_GBFeat[ParFlat_TOTAL_GBFEAT] = { -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}}, - {"5'clip", 0, {-1, -1, -1, -1, -1}, 18, + {"5'clip", 0, {-1, -1, -1, -1, -1}, 18, { GBQUAL_allele, GBQUAL_citation, @@ -1841,7 +1779,6 @@ static SematicFeat STATIC__ParFlat_GBFeat[ParFlat_TOTAL_GBFEAT] = { GBQUAL_gene, GBQUAL_gene_synonym, GBQUAL_inference, - GBQUAL_label, GBQUAL_locus_tag, GBQUAL_map, GBQUAL_note, @@ -1855,7 +1792,7 @@ static SematicFeat STATIC__ParFlat_GBFeat[ParFlat_TOTAL_GBFEAT] = { -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}}, - {"5'UTR", 0, {-1, -1, -1, -1, -1}, 18, + {"5'UTR", 0, {-1, -1, -1, -1, -1}, 18, { GBQUAL_allele, GBQUAL_citation, @@ -1866,7 +1803,6 @@ static SematicFeat STATIC__ParFlat_GBFeat[ParFlat_TOTAL_GBFEAT] = { GBQUAL_gene, GBQUAL_gene_synonym, GBQUAL_inference, - GBQUAL_label, GBQUAL_locus_tag, GBQUAL_map, GBQUAL_note, @@ -1880,7 +1816,7 @@ static SematicFeat STATIC__ParFlat_GBFeat[ParFlat_TOTAL_GBFEAT] = { -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}}, - {"-10_signal", 0, {-1, -1, -1, -1, -1}, 17, + {"-10_signal", 0, {-1, -1, -1, -1, -1}, 17, { GBQUAL_allele, GBQUAL_citation, @@ -1890,7 +1826,6 @@ static SematicFeat STATIC__ParFlat_GBFeat[ParFlat_TOTAL_GBFEAT] = { GBQUAL_gene, GBQUAL_gene_synonym, GBQUAL_inference, - GBQUAL_label, GBQUAL_locus_tag, GBQUAL_map, GBQUAL_note, @@ -1904,7 +1839,7 @@ static SematicFeat STATIC__ParFlat_GBFeat[ParFlat_TOTAL_GBFEAT] = { -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}}, - {"-35_signal", 0, {-1, -1, -1, -1, -1}, 17, + {"-35_signal", 0, {-1, -1, -1, -1, -1}, 17, { GBQUAL_allele, GBQUAL_citation, @@ -1914,7 +1849,6 @@ static SematicFeat STATIC__ParFlat_GBFeat[ParFlat_TOTAL_GBFEAT] = { GBQUAL_gene, GBQUAL_gene_synonym, GBQUAL_inference, - GBQUAL_label, GBQUAL_locus_tag, GBQUAL_map, GBQUAL_note, @@ -1927,7 +1861,35 @@ static SematicFeat STATIC__ParFlat_GBFeat[ParFlat_TOTAL_GBFEAT] = { -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1}} + -1, -1, -1, -1, -1, -1, -1, -1}}, + {"mobile_element", 1, { + GBQUAL_mobile_element_type, -1, -1, -1, -1}, 21, + { + GBQUAL_allele, + GBQUAL_citation, + GBQUAL_db_xref, + GBQUAL_evidence, + GBQUAL_experiment, + GBQUAL_function, + GBQUAL_gene, + GBQUAL_gene_synonym, + GBQUAL_inference, + GBQUAL_insertion_seq, + GBQUAL_locus_tag, + GBQUAL_map, + GBQUAL_note, + GBQUAL_old_locus_tag, + GBQUAL_partial, + GBQUAL_rpt_family, + GBQUAL_rpt_type, + GBQUAL_standard_name, + GBQUAL_transposon, + GBQUAL_usedin, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1}} }; NLM_EXTERN SematicFeatPtr x_ParFlat_GBFeat(void) { diff --git a/api/macroapi.c b/api/macroapi.c index ecae0231..9b6e3dc5 100755 --- a/api/macroapi.c +++ b/api/macroapi.c @@ -29,7 +29,7 @@ * * Version Creation Date: 11/8/2007 * -* $Revision: 1.262 $ +* $Revision: 1.405 $ * * File Description: * @@ -61,6 +61,40 @@ #include #include #include +#include + +/* static void CollectNucBioseqCallback (BioseqPtr bsp, Pointer data); */ +static void AddCommentDescriptorDestinationsForBioseq (BioseqPtr bsp, ValNodePtr PNTR dest_list); +static void AddDeflineDescriptorDestinationsForBioseq (BioseqPtr bsp, ValNodePtr PNTR dest_list); +static void AddGenbankBlockDescriptorDestinationsForBioseq (BioseqPtr bsp, ValNodePtr PNTR dest_list); + +static void GetNucBioseqCallback (BioseqPtr bsp, Pointer userdata) + +{ + ValNodeBlockPtr vbp; + + if (bsp == NULL) return; + if (! ISA_na (bsp->mol)) return; + vbp = (ValNodeBlockPtr) userdata; + if (vbp == NULL) return; + + ValNodeAddPointerEx (&(vbp->head), &(vbp->tail), OBJ_BIOSEQ, bsp); +} + +static ValNodePtr CollectNucBioseqs (SeqEntryPtr sep) + +{ + ValNodeBlock vnb; + + if (sep == NULL) return NULL; + + vnb.head = NULL; + vnb.tail = NULL; + + VisitBioseqsInSep (sep, &vnb, GetNucBioseqCallback); + + return vnb.head; +} static Boolean IsAllDigits (CharPtr str) { @@ -83,7 +117,6 @@ static Boolean IsAllDigits (CharPtr str) static Boolean IsAllCaps (CharPtr str) { CharPtr cp; - Boolean at_least_one = FALSE; if (StringHasNoText (str)) return FALSE; @@ -92,13 +125,47 @@ static Boolean IsAllCaps (CharPtr str) if (isalpha (*cp)) { if (islower (*cp)) { return FALSE; - } else { - at_least_one = TRUE; } } cp++; } - return at_least_one; + return TRUE; +} + + +static Boolean IsAllLowerCase (CharPtr str) +{ + CharPtr cp; + + if (StringHasNoText (str)) return FALSE; + + cp = str; + while (*cp != 0) { + if (isalpha (*cp)) { + if (isupper (*cp)) { + return FALSE; + } + } + cp++; + } + return TRUE; +} + + +static Boolean IsAllPunctuation (CharPtr str) +{ + CharPtr cp; + + if (StringHasNoText (str)) return FALSE; + + cp = str; + while (*cp != 0) { + if (!ispunct (*cp)) { + return FALSE; + } + cp++; + } + return TRUE; } @@ -118,6 +185,7 @@ static int LIBCALLBACK SortVnpByChoiceAndPtrvalue (VoidPtr ptr1, VoidPtr ptr2); * GetFieldValueForObject * RemoveFieldValueForObject * SetFieldValueForObject + * SortFieldsForObject * GetObjectListForFieldType * GetFieldListForFieldType * IsFieldTypeEmpty @@ -186,6 +254,7 @@ NLM_EXTERN FieldTypePtr GetFromFieldFromFieldPair (FieldPairTypePtr fieldpair) CDSGeneProtFieldPairPtr cp; MolinfoFieldPairPtr mp; StructuredCommentFieldPairPtr scfp; + DBLinkFieldPairPtr dbfp; ValNodePtr vnp; if (fieldpair == NULL) return NULL; @@ -284,6 +353,14 @@ NLM_EXTERN FieldTypePtr GetFromFieldFromFieldPair (FieldPairTypePtr fieldpair) f->data.ptrvalue = AsnIoMemCopy (scfp->from, (AsnReadFunc) StructuredCommentFieldAsnRead, (AsnWriteFunc) StructuredCommentFieldAsnWrite); } break; + case FieldPairType_dblink: + dbfp = (DBLinkFieldPairPtr) fieldpair->data.ptrvalue; + if (dbfp != NULL) { + f = ValNodeNew (NULL); + f->choice = FieldType_dblink; + f->data.intvalue = dbfp->from; + } + break; } return f; } @@ -301,7 +378,7 @@ NLM_EXTERN FieldTypePtr GetToFieldFromFieldPair (FieldPairTypePtr fieldpair) CDSGeneProtFieldPairPtr cp; MolinfoFieldPairPtr mp; StructuredCommentFieldPairPtr scfp; - + DBLinkFieldPairPtr dbfp; ValNodePtr vnp; if (fieldpair == NULL) return NULL; @@ -400,6 +477,14 @@ NLM_EXTERN FieldTypePtr GetToFieldFromFieldPair (FieldPairTypePtr fieldpair) f->data.ptrvalue = AsnIoMemCopy (scfp->to, (AsnReadFunc) StructuredCommentFieldAsnRead, (AsnWriteFunc) StructuredCommentFieldAsnWrite); } break; + case FieldPairType_dblink: + dbfp = (DBLinkFieldPairPtr) fieldpair->data.ptrvalue; + if (dbfp != NULL) { + f = ValNodeNew (NULL); + f->choice = FieldType_dblink; + f->data.intvalue = dbfp->to; + } + break; } return f; } @@ -415,6 +500,7 @@ NLM_EXTERN FieldPairTypePtr BuildFieldPairFromFromField (FieldTypePtr field_from RnaQualPtr rq; CDSGeneProtFieldPairPtr cp; StructuredCommentFieldPairPtr scfp; + DBLinkFieldPairPtr dbfp; ValNodePtr mp; MolinfoMoleculePairPtr mol_p; MolinfoTechniquePairPtr tech_p; @@ -532,6 +618,13 @@ NLM_EXTERN FieldPairTypePtr BuildFieldPairFromFromField (FieldTypePtr field_from scfp->from = AsnIoMemCopy (field_from, (AsnReadFunc) StructuredCommentFieldAsnRead, (AsnWriteFunc) StructuredCommentFieldAsnWrite); pair->data.ptrvalue = scfp; break; + case FieldType_dblink: + pair = ValNodeNew (NULL); + pair->choice = FieldPairType_dblink; + dbfp = DBLinkFieldPairNew (); + dbfp->from = field_from->data.intvalue; + pair->data.ptrvalue = dbfp; + break; } return pair; } @@ -560,6 +653,9 @@ NLM_EXTERN Uint1 FieldTypeChoiceFromFieldPairTypeChoice (Uint1 field_pair_choice case FieldPairType_struc_comment_field: field_type_choice = FieldType_struc_comment_field; break; + case FieldPairType_dblink: + field_type_choice = FieldType_dblink; + break; } return field_type_choice; @@ -567,12 +663,62 @@ NLM_EXTERN Uint1 FieldTypeChoiceFromFieldPairTypeChoice (Uint1 field_pair_choice /* functions for handling single fields */ -NLM_EXTERN int CompareFieldTypes (FieldTypePtr vnp1, FieldTypePtr vnp2) + +static int CompareSourceQuals (VoidPtr ptr1, VoidPtr ptr2) +{ + ValNodePtr vnp1; + ValNodePtr vnp2; + CharPtr tmp1, tmp2; + int rval = 0; + + if (ptr1 != NULL && ptr2 != NULL) { + vnp1 = *((ValNodePtr PNTR) ptr1); + vnp2 = *((ValNodePtr PNTR) ptr2); + if (vnp1 == NULL && vnp2 == NULL) { + rval = 0; + } else if (vnp1 == NULL) { + rval = -1; + } else if (vnp2 == NULL) { + rval = 1; + } else if (vnp1->choice > vnp2->choice) { + rval = 1; + } else if (vnp1->choice < vnp2->choice) { + rval = -1; + } else if (vnp1->choice == SourceQualChoice_textqual) { + if (vnp1->data.intvalue == vnp2->data.intvalue) { + return 0; + } else if (vnp1->data.intvalue == Source_qual_taxname) { + return -1; + } else if (vnp2->data.intvalue == Source_qual_taxname) { + return 1; + } else if (vnp1->data.intvalue == Source_qual_taxid) { + return -1; + } else if (vnp2->data.intvalue == Source_qual_taxid) { + return 1; + } else { + tmp1 = GetSourceQualName(vnp1->data.intvalue); + tmp2 = GetSourceQualName (vnp2->data.intvalue); + rval = StringCmp (tmp1, tmp2); + } + } else if (vnp1->data.intvalue > vnp2->data.intvalue) { + rval = 1; + } else if (vnp1->data.intvalue < vnp2->data.intvalue) { + rval = -1; + } else { + rval = 0; + } + } + return rval; +} + + +static int CompareFieldTypesEx (FieldTypePtr vnp1, FieldTypePtr vnp2, Boolean use_source_qual_sort) { int rval = 0; FeatureFieldPtr field1, field2; RnaQualPtr rq1, rq2; StructuredCommentFieldPtr scf1, scf2; + Int4 v1, v2; if (vnp1 == NULL && vnp2 == NULL) { rval = 0; @@ -587,6 +733,14 @@ NLM_EXTERN int CompareFieldTypes (FieldTypePtr vnp1, FieldTypePtr vnp2) } else { switch (vnp1->choice) { case FieldType_source_qual: + vnp1 = vnp1->data.ptrvalue; + vnp2 = vnp2->data.ptrvalue; + if (use_source_qual_sort) { + rval = CompareSourceQuals(&vnp1, &vnp2); + } else { + rval = SortVnpByChoiceAndIntvalue (&vnp1, &vnp2); + } + break; case FieldType_molinfo_field: vnp1 = vnp1->data.ptrvalue; vnp2 = vnp2->data.ptrvalue; @@ -675,11 +829,29 @@ NLM_EXTERN int CompareFieldTypes (FieldTypePtr vnp1, FieldTypePtr vnp2) rval = StringCmp (scf1->data.ptrvalue, scf2->data.ptrvalue); } break; + case FieldType_dblink: + v1 = vnp1->data.intvalue; + v2 = vnp2->data.intvalue; + if (v1 == v2) { + rval = 0; + } else if (v1 < v2) { + rval = -1; + } else { + rval = 1; + } + break; } } return rval; } + +NLM_EXTERN int CompareFieldTypes (FieldTypePtr vnp1, FieldTypePtr vnp2) +{ + return CompareFieldTypesEx (vnp1, vnp2, FALSE); +} + + static Boolean DoFieldTypesMatch (FieldTypePtr field1, FieldTypePtr field2) { if (CompareFieldTypes (field1, field2) == 0) { @@ -694,16 +866,16 @@ static Int2 FeatureTypeFromCDSGeneProtField (Uint2 cds_gene_prot_field); NLM_EXTERN Int2 FeatureTypeFromFieldType (FieldTypePtr field) { - Int2 feat_type = Feature_type_any; + Int2 feat_type = Macro_feature_type_any; FeatureFieldPtr ffp; RnaQualPtr rq; if (field == NULL) { - feat_type = Feature_type_any; + feat_type = Macro_feature_type_any; } else { switch (field->choice) { case FieldType_source_qual: - feat_type = Feature_type_biosrc; + feat_type = Macro_feature_type_biosrc; break; case FieldType_feature_field: ffp = (FeatureFieldPtr) field->data.ptrvalue; @@ -734,6 +906,24 @@ NLM_EXTERN Boolean IsFeatureFieldEmpty (FeatureFieldPtr field) } +NLM_EXTERN ValNodePtr MakeFeatureFieldField (Uint2 ftype, Int4 legalqual) +{ + FeatureFieldPtr ff; + ValNodePtr field; + + ff = FeatureFieldNew(); + ff->type = ftype; + ff->field = ValNodeNew (NULL); + ff->field->choice = FeatQualChoice_legal_qual; + ff->field->data.intvalue = legalqual; + + field = ValNodeNew (NULL); + field->choice = FieldType_feature_field; + field->data.ptrvalue = ff; + return field; +} + + NLM_EXTERN Boolean IsRnaQualEmpty (RnaQualPtr rq) { if (rq == NULL) return TRUE; @@ -777,6 +967,12 @@ NLM_EXTERN Boolean IsFieldTypeEmpty (FieldTypePtr field) rval = FALSE; } break; + case FieldType_dblink: + if (field->data.intvalue < 1) { + rval = TRUE; + } else { + rval = FALSE; + } case FieldType_misc: rval = FALSE; break; @@ -801,18 +997,30 @@ NLM_EXTERN Boolean AllowFieldMulti (FieldTypePtr field) feature_field = (FeatureFieldPtr) field->data.ptrvalue; if (feature_field != NULL && feature_field->field != NULL && feature_field->field->choice == FeatQualChoice_legal_qual - && feature_field->field->data.intvalue == Feat_qual_legal_db_xref) { + && (feature_field->field->data.intvalue == Feat_qual_legal_db_xref + || feature_field->field->data.intvalue == Feat_qual_legal_ec_number)) { rval = TRUE; } break; case FieldType_cds_gene_prot: + if (field->data.intvalue == CDSGeneProt_field_prot_ec_number + || field->data.intvalue == CDSGeneProt_field_mat_peptide_ec_number + || field->data.intvalue == CDSGeneProt_field_gene_synonym) { + rval = TRUE; + } break; case FieldType_pub: break; case FieldType_rna_field: + if (field->data.intvalue == Rna_field_gene_synonym) { + rval = TRUE; + } break; case FieldType_struc_comment_field: break; + case FieldType_dblink: + rval = TRUE; + break; case FieldType_misc: if (field->data.intvalue == Misc_field_keyword) { rval = TRUE; @@ -823,7 +1031,7 @@ NLM_EXTERN Boolean AllowFieldMulti (FieldTypePtr field) } -static Boolean IsUserObjectStructuredComment (UserObjectPtr uop) +NLM_EXTERN Boolean IsUserObjectStructuredComment (UserObjectPtr uop) { if (uop != NULL && uop->type != NULL && StringCmp (uop->type->str, "StructuredComment") == 0) { return TRUE; @@ -833,6 +1041,29 @@ static Boolean IsUserObjectStructuredComment (UserObjectPtr uop) } +static Boolean IsEmptyStructuredComment (UserObjectPtr uop) +{ + if (!IsUserObjectStructuredComment(uop)) { + return FALSE; + } + if (uop->data == NULL) { + return TRUE; + } else { + return FALSE; + } +} + + +static Boolean IsUserObjectDBLink (UserObjectPtr uop) +{ + if (uop != NULL && uop->type != NULL && StringCmp (uop->type->str, "DBLink") == 0) { + return TRUE; + } else { + return FALSE; + } +} + + static Boolean IsObjectAppropriateForFieldValue (Uint1 choice, Pointer data, FieldTypePtr field) { SeqFeatPtr sfp; @@ -861,7 +1092,7 @@ static Boolean IsObjectAppropriateForFieldValue (Uint1 choice, Pointer data, Fie if (choice == OBJ_SEQFEAT) { sfp = (SeqFeatPtr) data; fp = (FeatureFieldPtr) field->data.ptrvalue; - if (fp != NULL && (fp->type == Feature_type_any || GetFeatdefFromFeatureType (fp->type) == sfp->idx.subtype)) { + if (fp != NULL && (fp->type == Macro_feature_type_any || GetFeatdefFromFeatureType (fp->type) == sfp->idx.subtype)) { rval = TRUE; } } @@ -906,6 +1137,14 @@ static Boolean IsObjectAppropriateForFieldValue (Uint1 choice, Pointer data, Fie } } break; + case FieldType_dblink: + if (choice == OBJ_SEQDESC) { + sdp = (SeqDescrPtr) data; + if (sdp->choice == Seq_descr_user && IsUserObjectDBLink (sdp->data.ptrvalue)) { + rval = TRUE; + } + } + break; case FieldType_misc: if (choice == OBJ_BIOSEQ && field->data.intvalue == Misc_field_genome_project_id) { rval = TRUE; @@ -1008,100 +1247,101 @@ typedef struct feattypefeatdef { } FeatTypeFeatDefData, PNTR FeatTypeFeatDefPtr; static FeatTypeFeatDefData feattype_featdef[] = { - { Feature_type_any , FEATDEF_ANY , "any" } , - { Feature_type_gene , FEATDEF_GENE , "gene" } , - { Feature_type_org , FEATDEF_ORG , "org" } , - { Feature_type_cds , FEATDEF_CDS , "CDS" } , - { Feature_type_prot , FEATDEF_PROT , "Protein" } , - { Feature_type_preRNA , FEATDEF_preRNA , "preRNA" } , - { Feature_type_mRNA , FEATDEF_mRNA , "mRNA" } , - { Feature_type_tRNA , FEATDEF_tRNA , "tRNA" } , - { Feature_type_rRNA , FEATDEF_rRNA , "rRNA" } , - { Feature_type_snRNA , FEATDEF_snRNA , "snRNA" } , - { Feature_type_scRNA , FEATDEF_scRNA , "scRNA" } , - { Feature_type_otherRNA , FEATDEF_otherRNA , "misc_RNA" } , - { Feature_type_pub , FEATDEF_PUB , "pub" } , - { Feature_type_seq , FEATDEF_SEQ , "seq" } , - { Feature_type_imp , FEATDEF_IMP , "imp" } , - { Feature_type_allele , FEATDEF_allele , "allele" } , - { Feature_type_attenuator , FEATDEF_attenuator , "attenuator" } , - { Feature_type_c_region , FEATDEF_C_region , "c_region" } , - { Feature_type_caat_signal , FEATDEF_CAAT_signal , "caat_signal" } , - { Feature_type_imp_CDS , FEATDEF_Imp_CDS , "imp_CDS" } , - { Feature_type_conflict , FEATDEF_conflict , "conflict" } , - { Feature_type_d_loop , FEATDEF_D_loop , "d_loop" } , - { Feature_type_d_segment , FEATDEF_D_segment , "d_segment" } , - { Feature_type_enhancer , FEATDEF_enhancer , "enhancer" } , - { Feature_type_exon , FEATDEF_exon , "exon" } , - { Feature_type_gC_signal , FEATDEF_GC_signal , "gC_signal" } , - { Feature_type_iDNA , FEATDEF_iDNA , "iDNA" } , - { Feature_type_intron , FEATDEF_intron , "intron" } , - { Feature_type_j_segment , FEATDEF_J_segment , "j_segment" } , - { Feature_type_ltr , FEATDEF_LTR , "ltr" } , - { Feature_type_mat_peptide , FEATDEF_mat_peptide , "mat_peptide" } , - { Feature_type_misc_binding , FEATDEF_misc_binding , "misc_binding" } , - { Feature_type_misc_difference , FEATDEF_misc_difference , "misc_difference" } , - { Feature_type_misc_feature , FEATDEF_misc_feature , "misc_feature" } , - { Feature_type_misc_recomb , FEATDEF_misc_recomb , "misc_recomb" } , - { Feature_type_misc_RNA , FEATDEF_otherRNA , "misc_RNA" } , - { Feature_type_misc_signal , FEATDEF_misc_signal , "misc_signal" } , - { Feature_type_misc_structure , FEATDEF_misc_structure , "misc_structure" } , - { Feature_type_modified_base , FEATDEF_modified_base , "modified_base" } , - { Feature_type_mutation , FEATDEF_mutation , "mutation" } , - { Feature_type_n_region , FEATDEF_N_region , "n_region" } , - { Feature_type_old_sequence , FEATDEF_old_sequence , "old_sequence" } , - { Feature_type_polyA_signal , FEATDEF_polyA_signal , "polyA_signal" } , - { Feature_type_polyA_site , FEATDEF_polyA_site , "polyA_site" } , - { Feature_type_precursor_RNA , FEATDEF_preRNA , "precursor_RNA" } , - { Feature_type_prim_transcript , FEATDEF_prim_transcript , "prim_transcript" } , - { Feature_type_primer_bind , FEATDEF_primer_bind , "primer_bind" } , - { Feature_type_promoter , FEATDEF_promoter , "promoter" } , - { Feature_type_protein_bind , FEATDEF_protein_bind , "protein_bind" } , - { Feature_type_rbs , FEATDEF_RBS , "rbs" } , - { Feature_type_repeat_region , FEATDEF_repeat_region , "repeat_region" } , - { Feature_type_rep_origin , FEATDEF_rep_origin , "rep_origin" } , - { Feature_type_s_region , FEATDEF_S_region , "s_region" } , - { Feature_type_sig_peptide , FEATDEF_sig_peptide , "sig_peptide" } , - { Feature_type_source , FEATDEF_source , "source" } , - { Feature_type_stem_loop , FEATDEF_stem_loop , "stem_loop" } , - { Feature_type_sts , FEATDEF_STS , "sts" } , - { Feature_type_tata_signal , FEATDEF_TATA_signal , "tata_signal" } , - { Feature_type_terminator , FEATDEF_terminator , "terminator" } , - { Feature_type_transit_peptide , FEATDEF_transit_peptide , "transit_peptide" } , - { Feature_type_unsure , FEATDEF_unsure , "unsure" } , - { Feature_type_v_region , FEATDEF_V_region , "v_region" } , - { Feature_type_v_segment , FEATDEF_V_segment , "v_segment" } , - { Feature_type_variation , FEATDEF_variation , "variation" } , - { Feature_type_virion , FEATDEF_virion , "virion" } , - { Feature_type_n3clip , FEATDEF_3clip , "3'clip" } , - { Feature_type_n3UTR , FEATDEF_3UTR , "3'UTR" } , - { Feature_type_n5clip , FEATDEF_5clip , "5'clip" } , - { Feature_type_n5UTR , FEATDEF_5UTR , "5'UTR" } , - { Feature_type_n10_signal , FEATDEF_10_signal , "10_signal" } , - { Feature_type_n35_signal , FEATDEF_35_signal , "35_signal" } , - { Feature_type_site_ref , FEATDEF_site_ref , "site_ref" } , - { Feature_type_region , FEATDEF_REGION , "region" } , - { Feature_type_comment , FEATDEF_COMMENT , "comment" } , - { Feature_type_bond , FEATDEF_BOND , "bond" } , - { Feature_type_site , FEATDEF_SITE , "site" } , - { Feature_type_rsite , FEATDEF_RSITE , "rsite" } , - { Feature_type_user , FEATDEF_USER , "user" } , - { Feature_type_txinit , FEATDEF_TXINIT , "txinit" } , - { Feature_type_num , FEATDEF_NUM , "num" } , - { Feature_type_psec_str , FEATDEF_PSEC_STR , "psec_str" } , - { Feature_type_non_std_residue , FEATDEF_NON_STD_RESIDUE , "non_std_residue" } , - { Feature_type_het , FEATDEF_HET , "het" } , - { Feature_type_biosrc , FEATDEF_BIOSRC , "biosrc" } , - { Feature_type_preprotein , FEATDEF_preprotein , "preprotein" } , - { Feature_type_mat_peptide_aa , FEATDEF_mat_peptide_aa , "mat_peptide_aa" } , - { Feature_type_sig_peptide_aa , FEATDEF_sig_peptide_aa , "sig_peptide_aa" } , - { Feature_type_transit_peptide_aa , FEATDEF_transit_peptide_aa , "transit_peptide_aa" } , - { Feature_type_snoRNA , FEATDEF_snoRNA , "snoRNA" } , - { Feature_type_gap , FEATDEF_gap , "gap" } , - { Feature_type_operon , FEATDEF_operon , "operon" } , - { Feature_type_oriT , FEATDEF_oriT , "oriT" } , - { Feature_type_ncRNA , FEATDEF_ncRNA , "ncRNA" } , - { Feature_type_tmRNA , FEATDEF_tmRNA , "tmRNA" }}; + { Macro_feature_type_any , FEATDEF_ANY , "any" } , + { Macro_feature_type_gene , FEATDEF_GENE , "gene" } , + { Macro_feature_type_org , FEATDEF_ORG , "org" } , + { Macro_feature_type_cds , FEATDEF_CDS , "CDS" } , + { Macro_feature_type_prot , FEATDEF_PROT , "Protein" } , + { Macro_feature_type_preRNA , FEATDEF_preRNA , "preRNA" } , + { Macro_feature_type_mRNA , FEATDEF_mRNA , "mRNA" } , + { Macro_feature_type_tRNA , FEATDEF_tRNA , "tRNA" } , + { Macro_feature_type_rRNA , FEATDEF_rRNA , "rRNA" } , + { Macro_feature_type_snRNA , FEATDEF_snRNA , "snRNA" } , + { Macro_feature_type_scRNA , FEATDEF_scRNA , "scRNA" } , + { Macro_feature_type_otherRNA , FEATDEF_otherRNA , "misc_RNA" } , + { Macro_feature_type_pub , FEATDEF_PUB , "pub" } , + { Macro_feature_type_seq , FEATDEF_SEQ , "seq" } , + { Macro_feature_type_imp , FEATDEF_IMP , "imp" } , + { Macro_feature_type_allele , FEATDEF_allele , "allele" } , + { Macro_feature_type_attenuator , FEATDEF_attenuator , "attenuator" } , + { Macro_feature_type_c_region , FEATDEF_C_region , "c_region" } , + { Macro_feature_type_caat_signal , FEATDEF_CAAT_signal , "caat_signal" } , + { Macro_feature_type_imp_CDS , FEATDEF_Imp_CDS , "imp_CDS" } , + { Macro_feature_type_d_loop , FEATDEF_D_loop , "d_loop" } , + { Macro_feature_type_d_segment , FEATDEF_D_segment , "d_segment" } , + { Macro_feature_type_enhancer , FEATDEF_enhancer , "enhancer" } , + { Macro_feature_type_exon , FEATDEF_exon , "exon" } , + { Macro_feature_type_gC_signal , FEATDEF_GC_signal , "gC_signal" } , + { Macro_feature_type_iDNA , FEATDEF_iDNA , "iDNA" } , + { Macro_feature_type_intron , FEATDEF_intron , "intron" } , + { Macro_feature_type_j_segment , FEATDEF_J_segment , "j_segment" } , + { Macro_feature_type_ltr , FEATDEF_LTR , "LTR" } , + { Macro_feature_type_mat_peptide , FEATDEF_mat_peptide , "mat_peptide" } , + { Macro_feature_type_misc_binding , FEATDEF_misc_binding , "misc_binding" } , + { Macro_feature_type_misc_difference , FEATDEF_misc_difference , "misc_difference" } , + { Macro_feature_type_misc_feature , FEATDEF_misc_feature , "misc_feature" } , + { Macro_feature_type_misc_recomb , FEATDEF_misc_recomb , "misc_recomb" } , + { Macro_feature_type_misc_RNA , FEATDEF_otherRNA , "misc_RNA" } , + { Macro_feature_type_misc_signal , FEATDEF_misc_signal , "misc_signal" } , + { Macro_feature_type_misc_structure , FEATDEF_misc_structure , "misc_structure" } , + { Macro_feature_type_modified_base , FEATDEF_modified_base , "modified_base" } , + { Macro_feature_type_mutation , FEATDEF_mutation , "mutation" } , + { Macro_feature_type_n_region , FEATDEF_N_region , "n_region" } , + { Macro_feature_type_old_sequence , FEATDEF_old_sequence , "old_sequence" } , + { Macro_feature_type_polyA_signal , FEATDEF_polyA_signal , "polyA_signal" } , + { Macro_feature_type_polyA_site , FEATDEF_polyA_site , "polyA_site" } , + { Macro_feature_type_precursor_RNA , FEATDEF_preRNA , "precursor_RNA" } , + { Macro_feature_type_prim_transcript , FEATDEF_prim_transcript , "prim_transcript" } , + { Macro_feature_type_primer_bind , FEATDEF_primer_bind , "primer_bind" } , + { Macro_feature_type_promoter , FEATDEF_promoter , "promoter" } , + { Macro_feature_type_protein_bind , FEATDEF_protein_bind , "protein_bind" } , + { Macro_feature_type_rbs , FEATDEF_RBS , "rbs" } , + { Macro_feature_type_repeat_region , FEATDEF_repeat_region , "repeat_region" } , + { Macro_feature_type_rep_origin , FEATDEF_rep_origin , "rep_origin" } , + { Macro_feature_type_s_region , FEATDEF_S_region , "s_region" } , + { Macro_feature_type_sig_peptide , FEATDEF_sig_peptide , "sig_peptide" } , + { Macro_feature_type_source , FEATDEF_source , "source" } , + { Macro_feature_type_stem_loop , FEATDEF_stem_loop , "stem_loop" } , + { Macro_feature_type_sts , FEATDEF_STS , "sts" } , + { Macro_feature_type_tata_signal , FEATDEF_TATA_signal , "tata_signal" } , + { Macro_feature_type_terminator , FEATDEF_terminator , "terminator" } , + { Macro_feature_type_transit_peptide , FEATDEF_transit_peptide , "transit_peptide" } , + { Macro_feature_type_unsure , FEATDEF_unsure , "unsure" } , + { Macro_feature_type_v_region , FEATDEF_V_region , "v_region" } , + { Macro_feature_type_v_segment , FEATDEF_V_segment , "v_segment" } , + { Macro_feature_type_variation , FEATDEF_variation , "variation" } , + { Macro_feature_type_virion , FEATDEF_virion , "virion" } , + { Macro_feature_type_n3clip , FEATDEF_3clip , "3'clip" } , + { Macro_feature_type_n3UTR , FEATDEF_3UTR , "3'UTR" } , + { Macro_feature_type_n5clip , FEATDEF_5clip , "5'clip" } , + { Macro_feature_type_n5UTR , FEATDEF_5UTR , "5'UTR" } , + { Macro_feature_type_n10_signal , FEATDEF_10_signal , "10_signal" } , + { Macro_feature_type_n35_signal , FEATDEF_35_signal , "35_signal" } , + { Macro_feature_type_site_ref , FEATDEF_site_ref , "site_ref" } , + { Macro_feature_type_region , FEATDEF_REGION , "region" } , + { Macro_feature_type_comment , FEATDEF_COMMENT , "comment" } , + { Macro_feature_type_bond , FEATDEF_BOND , "bond" } , + { Macro_feature_type_site , FEATDEF_SITE , "site" } , + { Macro_feature_type_rsite , FEATDEF_RSITE , "rsite" } , + { Macro_feature_type_user , FEATDEF_USER , "user" } , + { Macro_feature_type_txinit , FEATDEF_TXINIT , "txinit" } , + { Macro_feature_type_num , FEATDEF_NUM , "num" } , + { Macro_feature_type_psec_str , FEATDEF_PSEC_STR , "psec_str" } , + { Macro_feature_type_non_std_residue , FEATDEF_NON_STD_RESIDUE , "non_std_residue" } , + { Macro_feature_type_het , FEATDEF_HET , "het" } , + { Macro_feature_type_biosrc , FEATDEF_BIOSRC , "biosrc" } , + { Macro_feature_type_preprotein , FEATDEF_preprotein , "preprotein" } , + { Macro_feature_type_mat_peptide_aa , FEATDEF_mat_peptide_aa , "mat_peptide_aa" } , + { Macro_feature_type_sig_peptide_aa , FEATDEF_sig_peptide_aa , "sig_peptide_aa" } , + { Macro_feature_type_transit_peptide_aa , FEATDEF_transit_peptide_aa , "transit_peptide_aa" } , + { Macro_feature_type_snoRNA , FEATDEF_snoRNA , "snoRNA" } , + { Macro_feature_type_gap , FEATDEF_gap , "gap" } , + { Macro_feature_type_operon , FEATDEF_operon , "operon" } , + { Macro_feature_type_oriT , FEATDEF_oriT , "oriT" } , + { Macro_feature_type_ncRNA , FEATDEF_ncRNA , "ncRNA" } , + { Macro_feature_type_tmRNA , FEATDEF_tmRNA , "tmRNA" } , + { Macro_feature_type_mobile_element, FEATDEF_mobile_element, "mobile_element" } +}; #define NUM_feattype_featdef sizeof (feattype_featdef) / sizeof (FeatTypeFeatDefData) @@ -1138,7 +1378,7 @@ NLM_EXTERN CharPtr GetFeatureNameFromFeatureType (Int4 feature_type) for (i = 0; i < NUM_feattype_featdef && str == NULL; i++) { if (feature_type == feattype_featdef[i].feattype) { - str = feattype_featdef[feature_type].featname; + str = feattype_featdef[i].featname; } } if (str == NULL) { @@ -1219,7 +1459,8 @@ NLM_EXTERN void AddImportFeaturesToChoiceList (ValNodePtr PNTR feature_type_list ValNodePtr tmp_list = NULL; for (i = 1; i < NUM_feattype_featdef; i++) { - if (feattype_featdef[i].feattype == Feature_type_gap) continue; + if (feattype_featdef[i].feattype == Macro_feature_type_gap) continue; + if (feattype_featdef[i].feattype == Macro_feature_type_conflict) continue; seqfeattype = FindFeatFromFeatDefType (feattype_featdef[i].featdef); if (seqfeattype == SEQFEAT_IMP) { featname = GetFeatureNameFromFeatureType (feattype_featdef[i].feattype); @@ -1236,14 +1477,15 @@ NLM_EXTERN void AddImportFeaturesToChoiceList (ValNodePtr PNTR feature_type_list static Boolean IsMostUsedFeature (Uint1 val) { - if (val == Feature_type_gene - || val == Feature_type_cds - || val == Feature_type_prot - || val == Feature_type_exon - || val == Feature_type_intron - || val == Feature_type_mRNA - || val == Feature_type_rRNA - || val == Feature_type_otherRNA) { + if (val == Macro_feature_type_gene + || val == Macro_feature_type_cds + || val == Macro_feature_type_prot + || val == Macro_feature_type_exon + || val == Macro_feature_type_intron + || val == Macro_feature_type_mRNA + || val == Macro_feature_type_rRNA + || val == Macro_feature_type_otherRNA + || val == Macro_feature_type_misc_feature) { return TRUE; } else { return FALSE; @@ -1290,7 +1532,7 @@ NLM_EXTERN void AddAllFeaturesToChoiceList (ValNodePtr PNTR feature_type_list) ValNodePtr tmp_list = NULL; for (i = 1; i < NUM_feattype_featdef; i++) { - if (feattype_featdef[i].feattype == Feature_type_gap) continue; + if (feattype_featdef[i].feattype == Macro_feature_type_gap) continue; featname = GetFeatureNameFromFeatureType (feattype_featdef[i].feattype); if (featname != NULL) { ValNodeAddPointer (&tmp_list, feattype_featdef[i].feattype, StringSave (featname)); @@ -1330,15 +1572,15 @@ static FeatQualGBQualData featqual_gbqual[] = { { Feat_qual_legal_function , GBQUAL_function , 0, "function" } , { Feat_qual_legal_gene , GBQUAL_gene , 0, "locus" } , { Feat_qual_legal_inference , GBQUAL_inference , 0, "inference" } , - { Feat_qual_legal_label , GBQUAL_label , 0, "label" } , - { Feat_qual_legal_location , 255 , 0, "location" } , + { Feat_qual_legal_location , -1 , 0, "location" } , { Feat_qual_legal_locus_tag , GBQUAL_locus_tag , 0, "locus-tag" } , { Feat_qual_legal_map , GBQUAL_map , 0, "map" } , - { Feat_qual_legal_mobile_element , GBQUAL_mobile_element , 0, "mobile-element" } , - { Feat_qual_legal_mobile_element_type , GBQUAL_mobile_element , 1, "mobile-element-type"} , - { Feat_qual_legal_mobile_element_name , GBQUAL_mobile_element , 2, "mobile-element-name"} , + { Feat_qual_legal_mobile_element_type , GBQUAL_mobile_element_type , 0, "mobile-element-type" } , + { Feat_qual_legal_mobile_element_type_type , GBQUAL_mobile_element_type , 1, "mobile-element-type-type"} , + { Feat_qual_legal_mobile_element_name , GBQUAL_mobile_element_type , 2, "mobile-element-name"} , { Feat_qual_legal_mod_base , GBQUAL_mod_base , 0, "mod-base" } , { Feat_qual_legal_mol_type , GBQUAL_mol_type , 0, "mol-type" } , + { Feat_qual_legal_name, -1 , 0 , "name" } , { Feat_qual_legal_ncRNA_class , GBQUAL_ncRNA_class , 0, "ncRNA-class" } , { Feat_qual_legal_note , GBQUAL_note , 0, "note" } , { Feat_qual_legal_number , GBQUAL_number , 0, "number" } , @@ -1371,7 +1613,8 @@ static FeatQualGBQualData featqual_gbqual[] = { { Feat_qual_legal_translation , GBQUAL_translation , 0, "translation" } , { Feat_qual_legal_transl_except , GBQUAL_transl_except , 0, "transl-except" } , { Feat_qual_legal_transl_table , GBQUAL_transl_table , 0, "transl-table" } , - { Feat_qual_legal_usedin , GBQUAL_usedin , 0, "usedin" } }; + { Feat_qual_legal_usedin , GBQUAL_usedin , 0, "usedin" } +}; #define NUM_featqual_gbqual sizeof (featqual_gbqual) / sizeof (FeatQualGBQualData) @@ -1486,13 +1729,13 @@ typedef struct rnatypemap { } RnaTypeMapData, PNTR RnaTypeMapPtr; static RnaTypeMapData rnatypemap[] = { - { RnaFeatType_preRNA , RNA_TYPE_premsg, Feature_type_preRNA, "preRNA" } , - { RnaFeatType_mRNA , RNA_TYPE_mRNA, Feature_type_mRNA, "mRNA" } , - { RnaFeatType_tRNA , RNA_TYPE_tRNA, Feature_type_tRNA, "tRNA" } , - { RnaFeatType_rRNA , RNA_TYPE_rRNA, Feature_type_rRNA, "rRNA" } , - { RnaFeatType_ncRNA , RNA_TYPE_ncRNA , Feature_type_ncRNA, "ncRNA" } , - { RnaFeatType_tmRNA , RNA_TYPE_tmRNA , Feature_type_tmRNA, "tmRNA" } , - { RnaFeatType_miscRNA , RNA_TYPE_misc_RNA , Feature_type_misc_RNA, "misc_RNA" } + { RnaFeatType_preRNA , RNA_TYPE_premsg, Macro_feature_type_preRNA, "preRNA" } , + { RnaFeatType_mRNA , RNA_TYPE_mRNA, Macro_feature_type_mRNA, "mRNA" } , + { RnaFeatType_tRNA , RNA_TYPE_tRNA, Macro_feature_type_tRNA, "tRNA" } , + { RnaFeatType_rRNA , RNA_TYPE_rRNA, Macro_feature_type_rRNA, "rRNA" } , + { RnaFeatType_ncRNA , RNA_TYPE_ncRNA , Macro_feature_type_ncRNA, "ncRNA" } , + { RnaFeatType_tmRNA , RNA_TYPE_tmRNA , Macro_feature_type_tmRNA, "tmRNA" } , + { RnaFeatType_miscRNA , RNA_TYPE_misc_RNA , Macro_feature_type_misc_RNA, "misc_RNA" } }; #define NUM_rnatypemap sizeof (rnatypemap) / sizeof (RnaTypeMapData) @@ -1572,7 +1815,7 @@ static Boolean DoesFeatureMatchRnaType (SeqFeatPtr sfp, RnaFeatTypePtr rt) if (sfp == NULL || sfp->data.choice != SEQFEAT_RNA) { return FALSE; } - if (rt == NULL) return TRUE; + if (rt == NULL || rt->choice == RnaFeatType_any) return TRUE; rrp = (RnaRefPtr) sfp->data.value.ptrvalue; if (rrp == NULL) return FALSE; @@ -1582,7 +1825,7 @@ static Boolean DoesFeatureMatchRnaType (SeqFeatPtr sfp, RnaFeatTypePtr rt) case RnaFeatType_ncRNA: if (rt->data.ptrvalue == NULL) { rval = TRUE; - } else if ((rgp = (RNAGenPtr) rrp->ext.value.ptrvalue) != NULL && StringCmp (rgp->_class, rt->data.ptrvalue)) { + } else if ((rgp = (RNAGenPtr) rrp->ext.value.ptrvalue) != NULL && StringCmp (rgp->_class, rt->data.ptrvalue) == 0) { rval = TRUE; } break; @@ -1788,39 +2031,39 @@ NLM_EXTERN RnaQualPtr RnaQualFromFeatureField (FeatureFieldPtr ffp) rq->field = rnafieldnames[i].featqual; rq->type = ValNodeNew (NULL); switch (ffp->type) { - case Feature_type_preRNA: - case Feature_type_precursor_RNA: + case Macro_feature_type_preRNA: + case Macro_feature_type_precursor_RNA: rq->type->choice = RnaFeatType_preRNA; break; - case Feature_type_mRNA: + case Macro_feature_type_mRNA: rq->type->choice = RnaFeatType_mRNA; break; - case Feature_type_tRNA: + case Macro_feature_type_tRNA: rq->type->choice = RnaFeatType_tRNA; break; - case Feature_type_rRNA: + case Macro_feature_type_rRNA: rq->type->choice = RnaFeatType_rRNA; break; - case Feature_type_snRNA: + case Macro_feature_type_snRNA: rq->type->choice = RnaFeatType_ncRNA; rq->type->data.ptrvalue = StringSave ("snRNA"); break; - case Feature_type_scRNA: + case Macro_feature_type_scRNA: rq->type->choice = RnaFeatType_ncRNA; rq->type->data.ptrvalue = StringSave ("scRNA"); break; - case Feature_type_snoRNA: + case Macro_feature_type_snoRNA: rq->type->choice = RnaFeatType_ncRNA; rq->type->data.ptrvalue = StringSave ("snoRNA"); break; - case Feature_type_otherRNA: - case Feature_type_misc_RNA: + case Macro_feature_type_otherRNA: + case Macro_feature_type_misc_RNA: rq->type->choice = RnaFeatType_miscRNA; break; - case Feature_type_ncRNA: + case Macro_feature_type_ncRNA: rq->type->choice = RnaFeatType_ncRNA; break; - case Feature_type_tmRNA: + case Macro_feature_type_tmRNA: rq->type->choice = RnaFeatType_tmRNA; break; default: @@ -1838,7 +2081,7 @@ NLM_EXTERN CharPtr SummarizeRnaType (RnaFeatTypePtr rt) CharPtr rnatypename = NULL; CharPtr fmt = "%s ncRNA"; - if (rt == NULL) { + if (rt == NULL || rt->choice == RnaFeatType_any) { rnatypename = StringSave ("Any RNA"); } else if (rt->choice == RnaFeatType_ncRNA) { if (StringHasNoText (rt->data.ptrvalue)) { @@ -1885,13 +2128,15 @@ static CharPtr SummarizeRnaQual (RnaQualPtr rq) static CharPtr SummarizeStructuredCommentField (StructuredCommentFieldPtr field) { CharPtr summ = NULL; + CharPtr fmt = "structured comment field %s"; if (field == NULL) return NULL; if (field->choice == StructuredCommentField_database) { - summ = StringSave ("database"); + summ = StringSave ("structured comment database"); } else if (field->choice == StructuredCommentField_named) { - summ = StringSave (field->data.ptrvalue); + summ = (CharPtr) MemNew (sizeof (Char) * (StringLen (fmt) + StringLen (field->data.ptrvalue))); + sprintf (summ, fmt, field->data.ptrvalue == NULL ? "" : field->data.ptrvalue); } return summ; } @@ -2008,7 +2253,8 @@ static SrcQualSCQualData srcqual_scqual[] = { { Source_qual_all_quals , 0 , IS_OTHER , 0, kAllQualsStr } , { Source_qual_mating_type , SUBSRC_mating_type , IS_SUBSRC , 0 , "mating-type" } , { Source_qual_linkage_group , SUBSRC_linkage_group , IS_SUBSRC , 0 , "linkage-group" } , - { Source_qual_haplogroup , SUBSRC_haplogroup, IS_SUBSRC, 0, "haplogroup"} + { Source_qual_haplogroup , SUBSRC_haplogroup, IS_SUBSRC, 0, "haplogroup"} , + { Source_qual_taxid , 0 , IS_OTHER , 0 , "taxid" } , }; #define NUM_srcqual_scqual sizeof (srcqual_scqual) / sizeof (SrcQualSCQualData) @@ -2101,6 +2347,8 @@ NLM_EXTERN Boolean IsNonTextFieldType (FieldTypePtr field) if (field == NULL) { return FALSE; + } else if (field->choice == FieldType_molinfo_field) { + return TRUE; } else if (field->choice != FieldType_source_qual) { return FALSE; } else if ((vnp = field->data.ptrvalue) == NULL) { @@ -2182,6 +2430,8 @@ NLM_EXTERN ValNodePtr GetSourceQualFieldListFromBioSource (BioSourcePtr biop) OrgModPtr mod; ValNodePtr list = NULL, vnp; Int4 i; + PCRReactionSetPtr ps; + PCRPrimerPtr pp; if (biop == NULL) { return NULL; @@ -2214,6 +2464,14 @@ NLM_EXTERN ValNodePtr GetSourceQualFieldListFromBioSource (BioSourcePtr biop) } } + /* add taxid */ + if (HasTaxonomyID(biop)) { + vnp = ValNodeNew (NULL); + vnp->choice = SourceQualChoice_textqual; + vnp->data.intvalue = Source_qual_taxid; + ValNodeAddPointer (&list, FieldType_source_qual, vnp); + } + /* add subtypes */ for (ssp = biop->subtype; ssp != NULL; ssp = ssp->next) { for (i = 0; @@ -2240,6 +2498,39 @@ NLM_EXTERN ValNodePtr GetSourceQualFieldListFromBioSource (BioSourcePtr biop) } } } + + /* add PCR primers */ + for (ps = biop->pcr_primers; ps != NULL; ps = ps->next) { + for (pp = ps->forward; pp != NULL; pp = pp->next) { + if (!StringHasNoText (pp->name)) { + vnp = ValNodeNew (NULL); + vnp->choice = SourceQualChoice_textqual; + vnp->data.intvalue = Source_qual_fwd_primer_name; + ValNodeAddPointer (&list, FieldType_source_qual, vnp); + } + if (!StringHasNoText (pp->seq)) { + vnp = ValNodeNew (NULL); + vnp->choice = SourceQualChoice_textqual; + vnp->data.intvalue = Source_qual_fwd_primer_seq; + ValNodeAddPointer (&list, FieldType_source_qual, vnp); + } + } + for (pp = ps->reverse; pp != NULL; pp = pp->next) { + if (!StringHasNoText (pp->name)) { + vnp = ValNodeNew (NULL); + vnp->choice = SourceQualChoice_textqual; + vnp->data.intvalue = Source_qual_rev_primer_name; + ValNodeAddPointer (&list, FieldType_source_qual, vnp); + } + if (!StringHasNoText (pp->seq)) { + vnp = ValNodeNew (NULL); + vnp->choice = SourceQualChoice_textqual; + vnp->data.intvalue = Source_qual_rev_primer_seq; + ValNodeAddPointer (&list, FieldType_source_qual, vnp); + } + } + } + return list; } @@ -2253,65 +2544,30 @@ NLM_EXTERN Boolean AllowSourceQualMulti (SourceQualChoicePtr s) } else if (s->data.intvalue == Source_qual_culture_collection || s->data.intvalue == Source_qual_bio_material || s->data.intvalue == Source_qual_specimen_voucher - || s->data.intvalue == Source_qual_dbxref) { + || s->data.intvalue == Source_qual_dbxref + || s->data.intvalue == Source_qual_fwd_primer_name + || s->data.intvalue == Source_qual_fwd_primer_seq + || s->data.intvalue == Source_qual_rev_primer_name + || s->data.intvalue == Source_qual_rev_primer_seq) { rval = TRUE; } return rval; } -static Boolean IsNotForParsing (Int4 srcqual) -{ - if (srcqual == Source_qual_all_notes - || srcqual == Source_qual_all_quals - || srcqual == Source_qual_common - || srcqual == Source_qual_acronym - || srcqual == Source_qual_dosage - || srcqual == Source_qual_nat_host - || srcqual == Source_qual_specimen_voucher - || srcqual == Source_qual_authority - || srcqual == Source_qual_synonym - || srcqual == Source_qual_anamorph - || srcqual == Source_qual_teleomorph - || srcqual == Source_qual_gb_acronym - || srcqual == Source_qual_gb_anamorph - || srcqual == Source_qual_gb_synonym - || srcqual == Source_qual_culture_collection - || srcqual == Source_qual_bio_material - || srcqual == Source_qual_metagenome_source - || srcqual == Source_qual_old_lineage - || srcqual == Source_qual_old_name) { - return TRUE; - } else { - return FALSE; - } -} - - NLM_EXTERN TextFsaPtr GetOrgModSearch (void) { -#if 0 - Int4 i; -#endif TextFsaPtr tags; tags = TextFsaNew(); -#if 0 - for (i = 0; i < NUM_srcqual_scqual; i++) { - if (!IsNotForParsing(srcqual_scqual[i].srcqual) - && (srcqual_scqual[i].typeflag & IS_ORGMOD)) { - TextFsaAdd (tags, srcqual_scqual[i].qualname); - } - } -#else TextFsaAdd (tags, "pathovar"); TextFsaAdd (tags, "serovar"); TextFsaAdd (tags, "strain"); TextFsaAdd (tags, "sub-species"); TextFsaAdd (tags, "variety"); -#endif + /* abbreviations */ TextFsaAdd (tags, "subsp."); TextFsaAdd (tags, "var."); TextFsaAdd (tags, "str."); @@ -2408,7 +2664,7 @@ NLM_EXTERN Int4 GenomeFromLocName (CharPtr loc_name) NLM_EXTERN ValNodePtr GetLocationList (Boolean for_remove) { - ValNodePtr list = NULL; + ValNodePtr list = NULL, start = NULL; Int4 i; for (i = 0; i < NUM_srcloc_genome; i++) { @@ -2419,6 +2675,11 @@ NLM_EXTERN ValNodePtr GetLocationList (Boolean for_remove) } } list = ValNodeSort (list, SortVnpByString); + /* put mitochondrion and chloroplast at top of list */ + ValNodeAddPointer (&start, Source_location_mitochondrion, StringSave ("mitochondrion")); + ValNodeAddPointer (&start, Source_location_chloroplast, StringSave ("chloroplast")); + ValNodeLink (&start, list); + list = start; return list; } @@ -2779,6 +3040,9 @@ NLM_EXTERN void AddAllCDSGeneProtFieldsToChoiceList (ValNodePtr PNTR field_list) { Int4 i; + ValNodeAddPointer (field_list, CDSGeneProt_field_prot_name, StringSave ("protein name")); + ValNodeAddPointer (field_list, CDSGeneProt_field_prot_description, StringSave ("protein description")); + for (i = 0; i < NUM_cdsgeneprotfield_name; i++) { ValNodeAddPointer (field_list, cdsgeneprotfield_name[i].field, StringSave (cdsgeneprotfield_name[i].name)); } @@ -2857,7 +3121,7 @@ static Boolean IsFieldTypeMatPeptideRelated (FieldTypePtr field) rval = FALSE; } else if ((field->choice == FieldType_feature_field && (ff = field->data.ptrvalue) != NULL - && ff->type == Feature_type_mat_peptide_aa) + && ff->type == Macro_feature_type_mat_peptide_aa) || (field->choice == FieldType_cds_gene_prot && IsCDSGeneProtFieldMatPeptideRelated(field->data.intvalue))) { rval = TRUE; @@ -2900,13 +3164,13 @@ static Boolean IsConstraintChoiceMatPeptideRelated (ConstraintChoicePtr constrai static Int2 FeatureTypeFromCDSGeneProtField (Uint2 cds_gene_prot_field) { - Int2 feat_type = Feature_type_any; + Int2 feat_type = Macro_feature_type_any; switch (cds_gene_prot_field) { case CDSGeneProt_field_cds_comment: case CDSGeneProt_field_cds_inference: case CDSGeneProt_field_codon_start: - feat_type = Feature_type_cds; + feat_type = Macro_feature_type_cds; break; case CDSGeneProt_field_gene_locus: case CDSGeneProt_field_gene_description: @@ -2917,25 +3181,25 @@ static Int2 FeatureTypeFromCDSGeneProtField (Uint2 cds_gene_prot_field) case CDSGeneProt_field_gene_synonym: case CDSGeneProt_field_gene_old_locus_tag: case CDSGeneProt_field_gene_inference: - feat_type = Feature_type_gene; + feat_type = Macro_feature_type_gene; break; case CDSGeneProt_field_mrna_product: case CDSGeneProt_field_mrna_comment: - feat_type = Feature_type_mRNA; + feat_type = Macro_feature_type_mRNA; break; case CDSGeneProt_field_prot_name: case CDSGeneProt_field_prot_description: case CDSGeneProt_field_prot_ec_number: case CDSGeneProt_field_prot_activity: case CDSGeneProt_field_prot_comment: - feat_type = Feature_type_prot; + feat_type = Macro_feature_type_prot; break; case CDSGeneProt_field_mat_peptide_name: case CDSGeneProt_field_mat_peptide_description: case CDSGeneProt_field_mat_peptide_ec_number: case CDSGeneProt_field_mat_peptide_activity: case CDSGeneProt_field_mat_peptide_comment: - feat_type = Feature_type_mat_peptide_aa; + feat_type = Macro_feature_type_mat_peptide_aa; break; } return feat_type; @@ -2949,168 +3213,168 @@ NLM_EXTERN FeatureFieldPtr FeatureFieldFromCDSGeneProtField (Uint2 cds_gene_prot switch (cds_gene_prot_field) { case CDSGeneProt_field_cds_comment: f = FeatureFieldNew (); - f->type = Feature_type_cds; + f->type = Macro_feature_type_cds; f->field = ValNodeNew (NULL); f->field->choice = FeatQualChoice_legal_qual; f->field->data.intvalue = Feat_qual_legal_note; break; case CDSGeneProt_field_cds_inference: f = FeatureFieldNew (); - f->type = Feature_type_cds; + f->type = Macro_feature_type_cds; f->field = ValNodeNew (NULL); f->field->choice = FeatQualChoice_legal_qual; f->field->data.intvalue = Feat_qual_legal_inference; break; case CDSGeneProt_field_codon_start: f = FeatureFieldNew (); - f->type = Feature_type_cds; + f->type = Macro_feature_type_cds; f->field = ValNodeNew (NULL); f->field->choice = FeatQualChoice_legal_qual; f->field->data.intvalue = Feat_qual_legal_codon_start; break; case CDSGeneProt_field_gene_locus: f = FeatureFieldNew (); - f->type = Feature_type_gene; + f->type = Macro_feature_type_gene; f->field = ValNodeNew (NULL); f->field->choice = FeatQualChoice_legal_qual; f->field->data.intvalue = Feat_qual_legal_gene; break; case CDSGeneProt_field_gene_description: f = FeatureFieldNew (); - f->type = Feature_type_gene; + f->type = Macro_feature_type_gene; f->field = ValNodeNew (NULL); f->field->choice = FeatQualChoice_legal_qual; f->field->data.intvalue = Feat_qual_legal_gene_description; break; case CDSGeneProt_field_gene_comment: f = FeatureFieldNew (); - f->type = Feature_type_gene; + f->type = Macro_feature_type_gene; f->field = ValNodeNew (NULL); f->field->choice = FeatQualChoice_legal_qual; f->field->data.intvalue = Feat_qual_legal_note; break; case CDSGeneProt_field_gene_allele: f = FeatureFieldNew (); - f->type = Feature_type_gene; + f->type = Macro_feature_type_gene; f->field = ValNodeNew (NULL); f->field->choice = FeatQualChoice_legal_qual; f->field->data.intvalue = Feat_qual_legal_allele; break; case CDSGeneProt_field_gene_maploc: f = FeatureFieldNew (); - f->type = Feature_type_gene; + f->type = Macro_feature_type_gene; f->field = ValNodeNew (NULL); f->field->choice = FeatQualChoice_legal_qual; f->field->data.intvalue = Feat_qual_legal_map; break; case CDSGeneProt_field_gene_locus_tag: f = FeatureFieldNew (); - f->type = Feature_type_gene; + f->type = Macro_feature_type_gene; f->field = ValNodeNew (NULL); f->field->choice = FeatQualChoice_legal_qual; f->field->data.intvalue = Feat_qual_legal_locus_tag; break; case CDSGeneProt_field_gene_synonym: f = FeatureFieldNew (); - f->type = Feature_type_gene; + f->type = Macro_feature_type_gene; f->field = ValNodeNew (NULL); f->field->choice = FeatQualChoice_legal_qual; f->field->data.intvalue = Feat_qual_legal_synonym; break; case CDSGeneProt_field_gene_old_locus_tag: f = FeatureFieldNew (); - f->type = Feature_type_gene; + f->type = Macro_feature_type_gene; f->field = ValNodeNew (NULL); f->field->choice = FeatQualChoice_legal_qual; f->field->data.intvalue = Feat_qual_legal_old_locus_tag; break; case CDSGeneProt_field_gene_inference: f = FeatureFieldNew (); - f->type = Feature_type_gene; + f->type = Macro_feature_type_gene; f->field = ValNodeNew (NULL); f->field->choice = FeatQualChoice_legal_qual; f->field->data.intvalue = Feat_qual_legal_inference; break; case CDSGeneProt_field_mrna_product: f = FeatureFieldNew (); - f->type = Feature_type_mRNA; + f->type = Macro_feature_type_mRNA; f->field = ValNodeNew (NULL); f->field->choice = FeatQualChoice_legal_qual; f->field->data.intvalue = Feat_qual_legal_product; break; case CDSGeneProt_field_mrna_comment: f = FeatureFieldNew (); - f->type = Feature_type_mRNA; + f->type = Macro_feature_type_mRNA; f->field = ValNodeNew (NULL); f->field->choice = FeatQualChoice_legal_qual; f->field->data.intvalue = Feat_qual_legal_note; break; case CDSGeneProt_field_prot_name: f = FeatureFieldNew (); - f->type = Feature_type_prot; + f->type = Macro_feature_type_prot; f->field = ValNodeNew (NULL); f->field->choice = FeatQualChoice_legal_qual; f->field->data.intvalue = Feat_qual_legal_product; break; case CDSGeneProt_field_prot_description: f = FeatureFieldNew (); - f->type = Feature_type_prot; + f->type = Macro_feature_type_prot; f->field = ValNodeNew (NULL); f->field->choice = FeatQualChoice_legal_qual; f->field->data.intvalue = Feat_qual_legal_description; break; case CDSGeneProt_field_prot_ec_number: f = FeatureFieldNew (); - f->type = Feature_type_prot; + f->type = Macro_feature_type_prot; f->field = ValNodeNew (NULL); f->field->choice = FeatQualChoice_legal_qual; f->field->data.intvalue = Feat_qual_legal_ec_number; break; case CDSGeneProt_field_prot_activity: f = FeatureFieldNew (); - f->type = Feature_type_prot; + f->type = Macro_feature_type_prot; f->field = ValNodeNew (NULL); f->field->choice = FeatQualChoice_legal_qual; f->field->data.intvalue = Feat_qual_legal_activity; break; case CDSGeneProt_field_prot_comment: f = FeatureFieldNew (); - f->type = Feature_type_prot; + f->type = Macro_feature_type_prot; f->field = ValNodeNew (NULL); f->field->choice = FeatQualChoice_legal_qual; f->field->data.intvalue = Feat_qual_legal_note; break; case CDSGeneProt_field_mat_peptide_name: f = FeatureFieldNew (); - f->type = Feature_type_mat_peptide_aa; + f->type = Macro_feature_type_mat_peptide_aa; f->field = ValNodeNew (NULL); f->field->choice = FeatQualChoice_legal_qual; f->field->data.intvalue = Feat_qual_legal_product; break; case CDSGeneProt_field_mat_peptide_description: f = FeatureFieldNew (); - f->type = Feature_type_mat_peptide_aa; + f->type = Macro_feature_type_mat_peptide_aa; f->field = ValNodeNew (NULL); f->field->choice = FeatQualChoice_legal_qual; f->field->data.intvalue = Feat_qual_legal_description; break; case CDSGeneProt_field_mat_peptide_ec_number: f = FeatureFieldNew (); - f->type = Feature_type_mat_peptide_aa; + f->type = Macro_feature_type_mat_peptide_aa; f->field = ValNodeNew (NULL); f->field->choice = FeatQualChoice_legal_qual; f->field->data.intvalue = Feat_qual_legal_ec_number; break; case CDSGeneProt_field_mat_peptide_activity: f = FeatureFieldNew (); - f->type = Feature_type_mat_peptide_aa; + f->type = Macro_feature_type_mat_peptide_aa; f->field = ValNodeNew (NULL); f->field->choice = FeatQualChoice_legal_qual; f->field->data.intvalue = Feat_qual_legal_activity; break; case CDSGeneProt_field_mat_peptide_comment: f = FeatureFieldNew (); - f->type = Feature_type_mat_peptide_aa; + f->type = Macro_feature_type_mat_peptide_aa; f->field = ValNodeNew (NULL); f->field->choice = FeatQualChoice_legal_qual; f->field->data.intvalue = Feat_qual_legal_note; @@ -3128,29 +3392,29 @@ static Uint2 CDSGeneProtFieldFromFeatureField (FeatureFieldPtr ffp) switch (ffp->field->data.intvalue) { case Feat_qual_legal_note: switch (ffp->type) { - case Feature_type_cds: + case Macro_feature_type_cds: cds_gene_prot_field = CDSGeneProt_field_cds_comment; break; - case Feature_type_gene: + case Macro_feature_type_gene: cds_gene_prot_field = CDSGeneProt_field_gene_comment; break; - case Feature_type_mRNA: + case Macro_feature_type_mRNA: cds_gene_prot_field = CDSGeneProt_field_mrna_comment; break; - case Feature_type_prot: + case Macro_feature_type_prot: cds_gene_prot_field = CDSGeneProt_field_prot_comment; break; - case Feature_type_mat_peptide_aa: + case Macro_feature_type_mat_peptide_aa: cds_gene_prot_field = CDSGeneProt_field_mat_peptide_comment; break; } break; case Feat_qual_legal_inference: switch (ffp->type) { - case Feature_type_cds: + case Macro_feature_type_cds: cds_gene_prot_field = CDSGeneProt_field_cds_inference; break; - case Feature_type_gene: + case Macro_feature_type_gene: cds_gene_prot_field = CDSGeneProt_field_gene_inference; break; } @@ -3181,46 +3445,46 @@ static Uint2 CDSGeneProtFieldFromFeatureField (FeatureFieldPtr ffp) break; case Feat_qual_legal_product: switch (ffp->type) { - case Feature_type_mRNA: + case Macro_feature_type_mRNA: cds_gene_prot_field = CDSGeneProt_field_mrna_product; break; - case Feature_type_prot: + case Macro_feature_type_prot: cds_gene_prot_field = CDSGeneProt_field_prot_name; break; - case Feature_type_mat_peptide_aa: + case Macro_feature_type_mat_peptide_aa: cds_gene_prot_field = CDSGeneProt_field_mat_peptide_name; break; } break; case Feat_qual_legal_description: switch (ffp->type) { - case Feature_type_gene: + case Macro_feature_type_gene: cds_gene_prot_field = CDSGeneProt_field_gene_description; break; - case Feature_type_prot: + case Macro_feature_type_prot: cds_gene_prot_field = CDSGeneProt_field_prot_description; break; - case Feature_type_mat_peptide_aa: + case Macro_feature_type_mat_peptide_aa: cds_gene_prot_field = CDSGeneProt_field_mat_peptide_description; break; } break; case Feat_qual_legal_ec_number: switch (ffp->type) { - case Feature_type_prot: + case Macro_feature_type_prot: cds_gene_prot_field = CDSGeneProt_field_prot_ec_number; break; - case Feature_type_mat_peptide_aa: + case Macro_feature_type_mat_peptide_aa: cds_gene_prot_field = CDSGeneProt_field_mat_peptide_ec_number; break; } break; case Feat_qual_legal_activity: switch (ffp->type) { - case Feature_type_prot: + case Macro_feature_type_prot: cds_gene_prot_field = CDSGeneProt_field_prot_activity; break; - case Feature_type_mat_peptide_aa: + case Macro_feature_type_mat_peptide_aa: cds_gene_prot_field = CDSGeneProt_field_mat_peptide_activity; break; } @@ -3250,7 +3514,7 @@ static MoleculeTypeBiomolData moleculetype_biomol[] = { { Molecule_type_transcribed_RNA, MOLECULE_TYPE_TRANSCRIBED_RNA, "transcribed RNA" } , { Molecule_type_ncRNA, MOLECULE_TYPE_NCRNA, "ncRNA" } , { Molecule_type_transfer_messenger_RNA, MOLECULE_TYPE_TMRNA, "tmRNA" } , - { Molecule_type_other, MOLECULE_TYPE_OTHER_GENETIC_MATERIAL, "other-genetic" } + { Molecule_type_macro_other, MOLECULE_TYPE_OTHER_GENETIC_MATERIAL, "other-genetic" } }; @@ -4021,12 +4285,12 @@ static Boolean IsWholeWordMatch (CharPtr start, CharPtr found, Int4 match_len) if (found != start) { char_before = *(found - 1); - if (isalpha ((Int4) char_before) || isdigit ((Int4) char_before)) + if (isalpha ((Int4) char_before) || isdigit ((Int4) char_before) || char_before == '_') { rval = FALSE; } } - if (char_after != 0 && (isalpha ((Int4) char_after) || isdigit ((Int4)char_after))) + if (char_after != 0 && (isalpha ((Int4) char_after) || isdigit ((Int4)char_after) || char_after == '_')) { rval = FALSE; } @@ -4037,55 +4301,362 @@ static Boolean IsWholeWordMatch (CharPtr start, CharPtr found, Int4 match_len) NLM_EXTERN Boolean IsStringConstraintEmpty (StringConstraintPtr scp) { - if (scp == NULL || StringHasNoText (scp->match_text)) return TRUE; - else return FALSE; + if (scp == NULL) { + return TRUE; + } + if (scp->is_all_caps || scp->is_all_lower || scp->is_all_punct) { + return FALSE; + } else if (scp->match_text == NULL || scp->match_text[0] == 0) { + return TRUE; + } else { + return FALSE; + } } -NLM_EXTERN Boolean DoesSingleStringMatchConstraint (CharPtr str, StringConstraintPtr scp) +static void StripUnimportantCharacters (CharPtr str, Boolean strip_space, Boolean strip_punct) { - CharPtr pFound; - Boolean rval = FALSE; - Char char_after = 0; - - if (IsStringConstraintEmpty (scp)) return TRUE; - if (StringHasNoText (str)) return FALSE; + CharPtr src, dst; - switch (scp->match_location) - { - case String_location_contains: - if (scp->case_sensitive) - { - pFound = StringSearch (str, scp->match_text); - } - else - { - pFound = StringISearch (str, scp->match_text); - } - if (pFound == NULL) - { - rval = FALSE; - } - else if (scp->whole_word) - { - rval = IsWholeWordMatch (str, pFound, StringLen (scp->match_text)); - while (!rval && pFound != NULL) - { - if (scp->case_sensitive) - { - pFound = StringSearch (pFound + 1, scp->match_text); - } - else - { - pFound = StringISearch (pFound + 1, scp->match_text); - } - if (pFound != NULL) - { - rval = IsWholeWordMatch (str, pFound, StringLen (scp->match_text)); - } - } + if (str == NULL) { + return; + } + + src = str; + dst = str; + while (*src != 0) { + if ((strip_space && isspace (*src)) || (strip_punct && ispunct (*src))) { + /* don't copy this character */ + } else { + if (src > dst) { + *dst = *src; } - else + dst++; + } + src++; + } + *dst = 0; +} + + +static Boolean IsWholeWordAtStart (CharPtr str, CharPtr cp, Boolean is_start) +{ + if (cp == str) { + return is_start; + } else { + return !isalpha (*(cp - 1)); + } +} + + +static int CaseNCompare (CharPtr str1, CharPtr str2, Int4 n, Boolean case_sensitive) +{ + if (n == 0) { + return 0; + } else if (case_sensitive) { + return StringNCmp (str1, str2, n); + } else { + return StringNICmp (str1, str2, n); + } +} + + +static Boolean +AdvancedStringCompare +(CharPtr str, + CharPtr str_match, + StringConstraintPtr scp, + Boolean is_start, + Int4Ptr p_target_match_len) +{ + CharPtr cp_s, cp_m; + Boolean match = TRUE, recursive_match = FALSE; + Boolean word_start_s, word_start_m; + WordSubstitutionPtr word; + Int4 len1, len2, init_target_match_len = 0, target_match_len = 0; + ValNodePtr syn; + + if (str == NULL) { + return FALSE; + } else if (scp == NULL || str_match == NULL) { + return TRUE; + } + + cp_s = str; + cp_m = str_match; + if (p_target_match_len != NULL) { + init_target_match_len = *p_target_match_len; + } + + while (match && *cp_m != 0 && !recursive_match) { + /* first, check to see if we're skipping synonyms */ + for (word = scp->ignore_words; word != NULL && !recursive_match; word = word->next) { + len1 = StringLen (word->word); + if (CaseNCompare(word->word, cp_m, len1, word->case_sensitive) == 0) { /* text match */ + word_start_m = IsWholeWordAtStart (str_match, cp_m, is_start); + if (!word->whole_word || (!isalpha (*(cp_m + len1)) && word_start_m)) { /* whole word match */ + if (word->synonyms == NULL) { + if (AdvancedStringCompare (cp_s, cp_m + len1, scp, word_start_m, &target_match_len)) { + recursive_match = TRUE; + } + } else { + for (syn = word->synonyms; syn != NULL && !recursive_match; syn = syn->next) { + len2 = StringLen (syn->data.ptrvalue); + if (CaseNCompare(syn->data.ptrvalue, cp_s, len2, word->case_sensitive) == 0) { /* text match */ + word_start_s = IsWholeWordAtStart (str, cp_s, is_start); + if (!word->whole_word || (!isalpha (*(cp_s + len2)) && word_start_s)) { /* whole word match */ + if (AdvancedStringCompare (cp_s + len2, cp_m + len1, scp, word_start_m && word_start_s, &target_match_len)) { + recursive_match = TRUE; + } + } + } + } + } + } + } + } + if (!recursive_match) { + if (CaseNCompare(cp_m, cp_s, 1, scp->case_sensitive) == 0) { + cp_m++; + cp_s++; + target_match_len++; + } else if (scp->ignore_space && (isspace (*cp_m) || isspace (*cp_s))) { + if (isspace (*cp_m)) { + cp_m++; + } + if (isspace (*cp_s)) { + cp_s++; + target_match_len++; + } + } else if (scp->ignore_punct && (ispunct (*cp_m) || ispunct (*cp_s))) { + if (ispunct (*cp_m)) { + cp_m++; + } + if (ispunct (*cp_s)) { + cp_s++; + target_match_len++; + } + } else { + match = FALSE; + } + } + } + + if (match && !recursive_match) { + while ((scp->ignore_space && isspace (*cp_s)) || (scp->ignore_punct && ispunct (*cp_s))) { + cp_s++; + target_match_len++; + } + while ((scp->ignore_space && isspace (*cp_m)) || (scp->ignore_punct && ispunct (*cp_m))) { + cp_m++; + } + + if (*cp_m != 0) { + match = FALSE; + } else if ((scp->match_location == String_location_ends || scp->match_location == String_location_equals) && *cp_s != 0) { + match = FALSE; + } else if (scp->whole_word && (!is_start || isalpha (*cp_s))) { + match = FALSE; + } + } + if (match && p_target_match_len != NULL) { + (*p_target_match_len) += target_match_len; + } + + return match; +} + + +static Boolean AdvancedStringMatch (CharPtr str, StringConstraintPtr scp) +{ + CharPtr cp; + Boolean rval = FALSE; + + if (str == NULL) { + rval = FALSE; + } else if (scp == NULL) { + rval = TRUE; + } else if (AdvancedStringCompare (str, scp->match_text, scp, TRUE, NULL)) { + rval = TRUE; + } else if (scp->match_location == String_location_starts || scp->match_location == String_location_equals) { + rval = FALSE; + } else { + cp = str + 1; + while (!rval && *cp != 0) { + if (scp->whole_word) { + while (*cp != 0 && isalpha (*(cp-1))) { + cp++; + } + } + if (*cp != 0) { + if (AdvancedStringCompare (cp, scp->match_text, scp, TRUE, NULL)) { + rval = TRUE; + } else { + cp++; + } + } + } + } + return rval; +} + +static void TestAdvancedStringMatch (void) +{ + StringConstraintPtr scp; + CharPtr text = "The quick brown fox jumped over the lazy dog."; + CharPtr summ; + + scp = StringConstraintNew (); + scp->match_location = String_location_contains; + scp->match_text = StringSave ("dog leaped"); + scp->ignore_words = WordSubstitutionNew(); + scp->ignore_words->word = StringSave ("leap"); + ValNodeAddPointer (&scp->ignore_words->synonyms, 0, StringSave ("jump")); + scp->ignore_words->next = WordSubstitutionNew(); + scp->ignore_words->next->word = StringSave ("dog"); + ValNodeAddPointer (&scp->ignore_words->next->synonyms, 0, StringSave ("fox")); + + + AdvancedStringMatch(text, scp); + summ = SummarizeStringConstraint (scp); + summ = MemFree (summ); + scp = StringConstraintFree (scp); + + scp = StringConstraintNew (); + scp->match_location = String_location_equals; + scp->match_text = StringSave ("A fast beige wolf leaped across a sleepy beagle."); + scp->ignore_words = WordSubstitutionNew(); + scp->ignore_words->word = StringSave ("a"); + scp->ignore_words->whole_word = TRUE; + ValNodeAddPointer (&scp->ignore_words->synonyms, 0, StringSave ("the")); + scp->ignore_words->next = WordSubstitutionNew(); + scp->ignore_words->next->word = StringSave ("fast"); + ValNodeAddPointer (&scp->ignore_words->next->synonyms, 0, StringSave ("quick")); + scp->ignore_words->next->next = WordSubstitutionNew(); + scp->ignore_words->next->next->word = StringSave ("beige"); + ValNodeAddPointer (&scp->ignore_words->next->next->synonyms, 0, StringSave ("brown")); + scp->ignore_words->next->next->next = WordSubstitutionNew(); + scp->ignore_words->next->next->next->word = StringSave ("wolf"); + ValNodeAddPointer (&scp->ignore_words->next->next->next->synonyms, 0, StringSave ("fox")); + scp->ignore_words->next->next->next->next = WordSubstitutionNew(); + scp->ignore_words->next->next->next->next->word = StringSave ("across"); + ValNodeAddPointer (&scp->ignore_words->next->next->next->next->synonyms, 0, StringSave ("over")); + scp->ignore_words->next->next->next->next->next = WordSubstitutionNew(); + scp->ignore_words->next->next->next->next->next->word = StringSave ("beagle"); + ValNodeAddPointer (&scp->ignore_words->next->next->next->next->next->synonyms, 0, StringSave ("dog")); + + AdvancedStringMatch(text, scp); + summ = SummarizeStringConstraint (scp); + summ = MemFree (summ); + scp = StringConstraintFree (scp); +} + + +static const CharPtr kPutative = "putative"; + +static CharPtr s_weasels[] = { + "hypothetical", + "probable", + "putative", + NULL +}; + +static CharPtr SkipWeasel (CharPtr str) +{ + Int4 i, len; + CharPtr cp = str; + + for (i = 0; s_weasels[i] != NULL; i++) { + len = StringLen (s_weasels[i]); + if (StringNICmp (str, s_weasels[i], len) == 0 + && isspace (*(str + len))) { + cp = str + len + 1; + while (isspace (*cp)) { + cp++; + } + return cp; + } + } + return cp; +} + + +NLM_EXTERN Boolean DoesSingleStringMatchConstraint (CharPtr str, StringConstraintPtr scp) +{ + CharPtr pFound; + Boolean rval = FALSE; + Char char_after = 0; + CharPtr search, pattern; + + if (IsStringConstraintEmpty (scp)) return TRUE; + if (StringHasNoText (str)) return FALSE; + + if (scp->ignore_weasel) { + str = SkipWeasel(str); + } + + if (scp->is_all_caps && !IsAllCaps(str)) { + return FALSE; + } + if (scp->is_all_lower && !IsAllLowerCase(str)) { + return FALSE; + } + if (scp->is_all_punct && !IsAllPunctuation(str)) { + return FALSE; + } + if (scp->match_text == NULL) { + return TRUE; + } + + if (scp->match_location != String_location_inlist && scp->ignore_words != NULL) { + return AdvancedStringMatch(str, scp); + } + + if (scp->match_location != String_location_inlist && (scp->ignore_space || scp->ignore_punct)) { + search = StringSave (str); + StripUnimportantCharacters (search, scp->ignore_space, scp->ignore_punct); + pattern = StringSave (scp->match_text); + StripUnimportantCharacters (pattern, scp->ignore_space, scp->ignore_punct); + } else { + search = str; + pattern = scp->match_text; + } + + switch (scp->match_location) + { + case String_location_contains: + if (scp->case_sensitive) + { + pFound = StringSearch (search, pattern); + } + else + { + pFound = StringISearch (search, pattern); + } + if (pFound == NULL) + { + rval = FALSE; + } + else if (scp->whole_word) + { + rval = IsWholeWordMatch (search, pFound, StringLen (pattern)); + while (!rval && pFound != NULL) + { + if (scp->case_sensitive) + { + pFound = StringSearch (pFound + 1, pattern); + } + else + { + pFound = StringISearch (pFound + 1, pattern); + } + if (pFound != NULL) + { + rval = IsWholeWordMatch (search, pFound, StringLen (pattern)); + } + } + } + else { rval = TRUE; } @@ -4093,17 +4664,17 @@ NLM_EXTERN Boolean DoesSingleStringMatchConstraint (CharPtr str, StringConstrain case String_location_starts: if (scp->case_sensitive) { - pFound = StringSearch (str, scp->match_text); + pFound = StringSearch (search, pattern); } else { - pFound = StringISearch (str, scp->match_text); + pFound = StringISearch (search, pattern); } - if (pFound == str) + if (pFound == search) { if (scp->whole_word) { - rval = IsWholeWordMatch (str, pFound, StringLen (scp->match_text)); + rval = IsWholeWordMatch (search, pFound, StringLen (pattern)); } else { @@ -4114,19 +4685,19 @@ NLM_EXTERN Boolean DoesSingleStringMatchConstraint (CharPtr str, StringConstrain case String_location_ends: if (scp->case_sensitive) { - pFound = StringSearch (str, scp->match_text); + pFound = StringSearch (search, pattern); } else { - pFound = StringISearch (str, scp->match_text); + pFound = StringISearch (search, pattern); } while (pFound != NULL && !rval) { - char_after = *(pFound + StringLen (scp->match_text)); + char_after = *(pFound + StringLen (pattern)); if (char_after == 0) { if (scp->whole_word) { - rval = IsWholeWordMatch (str, pFound, StringLen (scp->match_text)); + rval = IsWholeWordMatch (search, pFound, StringLen (pattern)); } else { @@ -4139,11 +4710,11 @@ NLM_EXTERN Boolean DoesSingleStringMatchConstraint (CharPtr str, StringConstrain { if (scp->case_sensitive) { - pFound = StringSearch (pFound + 1, scp->match_text); + pFound = StringSearch (pFound + 1, pattern); } else { - pFound = StringISearch (pFound + 1, scp->match_text); + pFound = StringISearch (pFound + 1, pattern); } } } @@ -4151,14 +4722,14 @@ NLM_EXTERN Boolean DoesSingleStringMatchConstraint (CharPtr str, StringConstrain case String_location_equals: if (scp->case_sensitive) { - if (StringCmp (str, scp->match_text) == 0) + if (StringCmp (search, pattern) == 0) { rval = TRUE; } } else { - if (StringICmp (str, scp->match_text) == 0) + if (StringICmp (search, pattern) == 0) { rval = TRUE; } @@ -4167,11 +4738,11 @@ NLM_EXTERN Boolean DoesSingleStringMatchConstraint (CharPtr str, StringConstrain case String_location_inlist: if (scp->case_sensitive) { - pFound = StringSearch (scp->match_text, str); + pFound = StringSearch (pattern, search); } else { - pFound = StringISearch (scp->match_text, str); + pFound = StringISearch (pattern, search); } if (pFound == NULL) { @@ -4179,29 +4750,36 @@ NLM_EXTERN Boolean DoesSingleStringMatchConstraint (CharPtr str, StringConstrain } else { - rval = IsWholeWordMatch (scp->match_text, pFound, StringLen (str)); + rval = IsWholeWordMatch (pattern, pFound, StringLen (search)); while (!rval && pFound != NULL) { if (scp->case_sensitive) { - pFound = StringSearch (pFound + 1, str); + pFound = StringSearch (pFound + 1, search); } else { - pFound = StringISearch (pFound + 1, str); + pFound = StringISearch (pFound + 1, search); } if (pFound != NULL) { - rval = IsWholeWordMatch (scp->match_text, pFound, StringLen (str)); + rval = IsWholeWordMatch (pattern, pFound, StringLen (str)); } } } if (!rval) { /* look for spans */ - rval = IsStringInSpanInList (str, scp->match_text); + rval = IsStringInSpanInList (search, pattern); } break; } + + if (search != str) { + search = MemFree (search); + } + if (pattern != scp->match_text) { + pattern = MemFree (pattern); + } return rval; } @@ -4248,6 +4826,86 @@ static Boolean DoesStringListMatchConstraint (ValNodePtr list, StringConstraintP } +NLM_EXTERN Boolean ReplaceStringConstraintPortionInString (CharPtr PNTR str, CharPtr replace, StringConstraintPtr scp) +{ + Boolean rval = FALSE; + CharPtr match_start, new_str; + Int4 match_len, front_len; + + if (str == NULL) { + return FALSE; + } else if (*str == NULL) { + if (IsStringConstraintEmpty (scp) || scp->not_present) { + *str = StringSave (replace); + rval = TRUE; + } + } else if (IsStringConstraintEmpty (scp)) { + *str = MemFree (*str); + *str = StringSave (replace); + rval = TRUE; + } else { + switch (scp->match_location) + { + case String_location_equals: + case String_location_inlist: + if (DoesStringMatchConstraint (*str, scp)) { + *str = MemFree (*str); + *str = StringSave (replace); + rval = TRUE; + } + break; + case String_location_starts: + match_len = 0; + if (AdvancedStringCompare (*str, scp->match_text, scp, TRUE, &match_len)) { + new_str = (CharPtr) MemNew (sizeof (Char) * (StringLen (*str) - match_len + StringLen (replace) + 1)); + StringCpy (new_str, replace); + StringCat (new_str, (*str) + match_len); + *str = MemFree (*str); + *str = new_str; + rval = TRUE; + } + break; + case String_location_contains: + match_start = *str; + while (*match_start != 0) { + match_len = 0; + if (AdvancedStringCompare (match_start, scp->match_text, scp, (match_start == *str || !isalpha (*(match_start - 1))), &match_len)) { + new_str = (CharPtr) MemNew (sizeof (Char) * (StringLen (*str) - match_len + StringLen (replace) + 1)); + front_len = match_start - *str; + StringNCpy (new_str, *str, front_len); + StringCat (new_str, replace); + StringCat (new_str, match_start + match_len); + *str = MemFree (*str); + *str = new_str; + match_start = (*str) + front_len + StringLen (replace); + rval = TRUE; + } else { + match_start++; + } + } + break; + case String_location_ends: + match_start = *str; + while (!rval && *match_start != 0) { + match_len = 0; + if (AdvancedStringCompare (match_start, scp->match_text, scp, (match_start == *str), &match_len) + && *(match_start + match_len) == 0) { + new_str = (CharPtr) MemNew (sizeof (Char) * ((match_start - *str) + StringLen (replace) + 1)); + StringNCpy (new_str, *str, match_start - *str); + StringCat (new_str, replace); + *str = MemFree (*str); + *str = new_str; + rval = TRUE; + } else { + match_start++; + } + } + break; + } + } + return rval; +} + NLM_EXTERN Boolean RemoveStringConstraintPortionFromString (CharPtr PNTR str, StringConstraintPtr scp) { @@ -4781,6 +5439,66 @@ static Boolean DoesFeatureMatchLocationConstraint (SeqFeatPtr sfp, LocationConst } + +static Boolean DoesSeqFeatMatchLocationConstraint (SeqFeatPtr sfp, LocationConstraintPtr constraint) +{ + if (sfp == NULL) { + return FALSE; + } else if (IsLocationConstraintEmpty(constraint)) { + return TRUE; + } else if (!DoesLocationMatchPartialnessConstraint (sfp->location, constraint)) { + return FALSE; + } else if (!DoesStrandMatchConstraint (sfp->location, constraint)) { + return FALSE; + } else if (!DoesLocationMatchTypeConstraint (sfp->location, constraint)) { + return FALSE; + } else if (!DoesLocationMatchDistanceConstraint(sfp->location, constraint)) { + return FALSE; + } else { + return TRUE; + } +} + + +static Boolean DoesBioseqMatchLocationConstraint (BioseqPtr bsp, LocationConstraintPtr constraint) +{ + Boolean at_least_one = FALSE; + Boolean rval = TRUE; + SeqFeatPtr sfp; + SeqMgrFeatContext context; + + if (bsp == NULL) return FALSE; + + if (IsLocationConstraintEmpty(constraint)) { + return TRUE; + } + + if (!DoesBioseqMatchSequenceType(bsp, constraint->seq_type)) { + return FALSE; + } + if (constraint->strand != Strand_constraint_any + || constraint->partial5 != Partial_constraint_either + || constraint->partial3 != Partial_constraint_either) { + if (ISA_aa (bsp->mol)) { + sfp = SeqMgrGetCDSgivenProduct (bsp, &context); + return DoesSeqFeatMatchLocationConstraint(sfp, constraint); + } else { + at_least_one = FALSE; + for (sfp = SeqMgrGetNextFeature (bsp, NULL, 0, 0, &context); + sfp != NULL && rval; + sfp = SeqMgrGetNextFeature (bsp, sfp, 0, 0, &context)) { + rval = DoesSeqFeatMatchLocationConstraint (sfp, constraint); + at_least_one = TRUE; + } + return rval && at_least_one; + } + } else { + return TRUE; + } + +} + + static Boolean DoesObjectMatchLocationConstraint (Uint1 choice, Pointer data, LocationConstraintPtr constraint) { SeqFeatPtr sfp; @@ -4790,7 +5508,6 @@ static Boolean DoesObjectMatchLocationConstraint (Uint1 choice, Pointer data, Lo BioseqSetPtr bssp; ValNodePtr vnp; ObjValNodePtr ovp; - SeqMgrFeatContext context; if (data == NULL) return FALSE; @@ -4814,31 +5531,7 @@ static Boolean DoesObjectMatchLocationConstraint (Uint1 choice, Pointer data, Lo bsp = (BioseqPtr) ovp->idx.parentptr; } } - if (!DoesBioseqMatchSequenceType(bsp, constraint->seq_type)) { - return FALSE; - } - if (constraint->strand != Strand_constraint_any - || constraint->partial5 != Partial_constraint_either - || constraint->partial3 != Partial_constraint_either) { - if (ISA_aa (bsp->mol)) { - sfp = SeqMgrGetCDSgivenProduct (bsp, &context); - if (sfp == NULL) { - return FALSE; - } else if (!DoesLocationMatchPartialnessConstraint (sfp->location, constraint)) { - return FALSE; - } else if (!DoesStrandMatchConstraint (sfp->location, constraint)) { - return FALSE; - } else if (DoesLocationMatchTypeConstraint (sfp->location, constraint)) { - return FALSE; - } else { - return TRUE; - } - } else { - return FALSE; - } - } else { - return TRUE; - } + return DoesBioseqMatchLocationConstraint(bsp, constraint); } else if (choice == 0) { if (constraint->seq_type != Seqtype_constraint_any) { return FALSE; @@ -4865,6 +5558,8 @@ static Boolean DoesObjectMatchLocationConstraint (Uint1 choice, Pointer data, Lo } } return FALSE; + } else if (choice == OBJ_BIOSEQ) { + return DoesBioseqMatchLocationConstraint((BioseqPtr)data, constraint); } else { return FALSE; } @@ -4984,14 +5679,85 @@ static CharPtr FindTextMarker(CharPtr str, Int4Ptr len, TextMarkerPtr marker, Bo } +static CharPtr ApplyEditToString (CharPtr str, FieldEditPtr edit); + /* for parsing and editing */ -NLM_EXTERN CharPtr GetTextPortionFromString (CharPtr str, TextPortionPtr text_portion) +NLM_EXTERN void ApplyTextTransformsToString (CharPtr PNTR str, ValNodePtr transform_list) { - CharPtr portion = NULL; - CharPtr found_start, found_end; - Int4 left_len = 0, right_len = 0, found_len; + CharPtr tmp; - if (StringHasNoText (str)) { + if (str == NULL || *str == NULL) { + return; + } + + while (transform_list != NULL) { + switch (transform_list->choice) { + case TextTransform_edit: + tmp = ApplyEditToString (*str, transform_list->data.ptrvalue); + *str = MemFree (*str); + *str = tmp; + break; + case TextTransform_caps: + FixCapitalizationInString (str, transform_list->data.intvalue, NULL); + break; + case TextTransform_remove: + RemoveTextPortionFromString (*str, (TextPortionPtr)transform_list->data.ptrvalue); + break; + } + transform_list = transform_list->next; + } +} + + +static Boolean IsTextPortionEmpty (TextPortionPtr text_portion) +{ + if (text_portion == NULL + || (IsTextMarkerEmpty (text_portion->left_marker) + && IsTextMarkerEmpty (text_portion->right_marker))) { + return TRUE; + } else { + return FALSE; + } +} + + +NLM_EXTERN Boolean IsTextTransformEmpty (ValNodePtr vnp) +{ + Boolean rval = TRUE; + FieldEditPtr edit; + + if (vnp == NULL) { + return TRUE; + } + switch (vnp->choice) { + case TextTransform_edit: + if ((edit = (FieldEditPtr) vnp->data.ptrvalue) != NULL + && edit->find_txt != NULL) { + rval = FALSE; + } + break; + case TextTransform_caps: + if (vnp->data.intvalue > Cap_change_none) { + rval = FALSE; + } + break; + case TextTransform_remove: + if (!IsTextPortionEmpty (vnp->data.ptrvalue)) { + rval = FALSE; + } + break; + } + return rval; +} + + +NLM_EXTERN CharPtr GetTextPortionFromString (CharPtr str, TextPortionPtr text_portion) +{ + CharPtr portion = NULL; + CharPtr found_start, found_end; + Int4 left_len = 0, right_len = 0, found_len; + + if (StringHasNoText (str)) { return NULL; } if (text_portion == NULL) { @@ -5445,7 +6211,7 @@ static CharPtr MakeValFromThreeFields (CharPtr PNTR fields) val = (CharPtr) MemNew (sizeof (Char) * (StringLen (fields[1]) + 2)); sprintf (val, ":%s:", fields[1]); } else if (empty[1] && empty[2]) { - val = (CharPtr) MemNew (sizeof (Char) * (StringLen (fields[1]) + 2)); + val = (CharPtr) MemNew (sizeof (Char) * (StringLen (fields[0]) + 2)); sprintf (val, "%s:", fields[0]); } else if (empty[0]) { val = (CharPtr) MemNew (sizeof (Char) * (StringLen (fields[1]) + StringLen (fields[2]) + 3)); @@ -5585,7 +6351,7 @@ SetStringsInValNodeStringList static Boolean SetStringInGBQualList (GBQualPtr PNTR list, ValNodePtr field, StringConstraintPtr scp, CharPtr new_val, Uint2 existing_text) { - Boolean rval = FALSE, does_match; + Boolean rval = FALSE, does_match, any_found = FALSE; Int4 gbqual, subfield; CharPtr qual_name = NULL, tmp; GBQualPtr gbq, last_gbq = NULL; @@ -5623,10 +6389,11 @@ static Boolean SetStringInGBQualList (GBQualPtr PNTR list, ValNodePtr field, Str } else if (DoesStringMatchConstraint (gbq->val, scp)) { rval |= SetStringValue (&(gbq->val), new_val, existing_text); } + any_found = TRUE; } last_gbq = gbq; } - if (!rval && (scp == NULL || scp->match_text == NULL)) { + if (!rval && (scp == NULL || scp->match_text == NULL || (any_found == FALSE && scp->not_present))) { gbq = GBQualNew (); gbq->qual = StringSave (qual_name); gbq->val = StringSave (new_val); @@ -6218,6 +6985,7 @@ static Boolean RemoveDbxrefString (ValNodePtr PNTR list, StringConstraintPtr scp } else { vnp_prev = vnp; } + vnp = vnp_next; } return rval; } @@ -6706,7 +7474,7 @@ static Boolean ParseCodonsRecognizedFromCommaDelimitedList (CharPtr str, Uint1Pt ch = str [k]; while (ch != '\0' && q < 3 && rval) { ch = TO_UPPER (ch); - if (StringChr ("ACGTU", ch) != NULL) { + if (StringChr ("ACGTUYNKMRYSWBVHD", ch) != NULL) { if (ch == 'U') { ch = 'T'; } @@ -7289,7 +8057,7 @@ static CharPtr GetAnticodonLocString (SeqFeatPtr sfp) -static ProtRefPtr GetProtRefForFeature (SeqFeatPtr sfp) +NLM_EXTERN ProtRefPtr GetProtRefForFeature (SeqFeatPtr sfp) { BioseqPtr protbsp; SeqFeatPtr protsfp; @@ -7320,7 +8088,7 @@ static ProtRefPtr GetProtRefForFeature (SeqFeatPtr sfp) } -static void GetGeneInfoForFeature (SeqFeatPtr sfp, GeneRefPtr PNTR p_grp, SeqFeatPtr PNTR p_gene) +NLM_EXTERN void GetGeneInfoForFeature (SeqFeatPtr sfp, GeneRefPtr PNTR p_grp, SeqFeatPtr PNTR p_gene) { GeneRefPtr grp = NULL; SeqFeatPtr gene = NULL; @@ -7442,7 +8210,7 @@ static CharPtr GetCodeBreakString (SeqFeatPtr sfp) } -NLM_EXTERN CharPtr GetQualFromFeatureEx (SeqFeatPtr sfp, FeatureFieldPtr field, StringConstraintPtr scp, BatchExtraPtr batch_extra) +static CharPtr GetQualFromFeatureAnyType (SeqFeatPtr sfp, ValNodePtr field, StringConstraintPtr scp, BatchExtraPtr batch_extra) { CharPtr str = NULL; GeneRefPtr grp = NULL; @@ -7452,12 +8220,9 @@ NLM_EXTERN CharPtr GetQualFromFeatureEx (SeqFeatPtr sfp, FeatureFieldPtr field, CdRegionPtr crp; ValNodePtr vnp; Char buf[20]; + BioseqPtr protbsp; - if (sfp == NULL || field == NULL || field->field == NULL) - { - return NULL; - } - if (field->type != Feature_type_any && sfp->idx.subtype != GetFeatdefFromFeatureType (field->type)) + if (sfp == NULL || field == NULL) { return NULL; } @@ -7470,8 +8235,8 @@ NLM_EXTERN CharPtr GetQualFromFeatureEx (SeqFeatPtr sfp, FeatureFieldPtr field, /* fields common to all features */ /* note, also known as comment */ - if ((field->field->choice == FeatQualChoice_legal_qual && field->field->data.intvalue == Feat_qual_legal_note) - || (field->field->choice == FeatQualChoice_illegal_qual && DoesStringMatchConstraint ("note", field->field->data.ptrvalue))) + if ((field->choice == FeatQualChoice_legal_qual && field->data.intvalue == Feat_qual_legal_note) + || (field->choice == FeatQualChoice_illegal_qual && DoesStringMatchConstraint ("note", field->data.ptrvalue))) { if (!StringHasNoText (sfp->comment) && DoesStringMatchConstraint(sfp->comment, scp)) { @@ -7480,15 +8245,15 @@ NLM_EXTERN CharPtr GetQualFromFeatureEx (SeqFeatPtr sfp, FeatureFieldPtr field, } /* db-xref */ if (str == NULL - && ((field->field->choice == FeatQualChoice_legal_qual && field->field->data.intvalue == Feat_qual_legal_db_xref) - || (field->field->choice == FeatQualChoice_illegal_qual && DoesStringMatchConstraint ("db_xref", field->field->data.ptrvalue)))) + && ((field->choice == FeatQualChoice_legal_qual && field->data.intvalue == Feat_qual_legal_db_xref) + || (field->choice == FeatQualChoice_illegal_qual && DoesStringMatchConstraint ("db_xref", field->data.ptrvalue)))) { str = GetDbxrefString (sfp->dbxref, scp); } /* exception */ if (str == NULL - && ((field->field->choice == FeatQualChoice_legal_qual && field->field->data.intvalue == Feat_qual_legal_exception) - || (field->field->choice == FeatQualChoice_illegal_qual && DoesStringMatchConstraint ("exception", field->field->data.ptrvalue)))) + && ((field->choice == FeatQualChoice_legal_qual && field->data.intvalue == Feat_qual_legal_exception) + || (field->choice == FeatQualChoice_illegal_qual && DoesStringMatchConstraint ("exception", field->data.ptrvalue)))) { if (!StringHasNoText (sfp->except_text) && DoesStringMatchConstraint(sfp->except_text, scp)) { @@ -7497,8 +8262,8 @@ NLM_EXTERN CharPtr GetQualFromFeatureEx (SeqFeatPtr sfp, FeatureFieldPtr field, } /* evidence */ if (str == NULL - && ((field->field->choice == FeatQualChoice_legal_qual && field->field->data.intvalue == Feat_qual_legal_evidence) - || (field->field->choice == FeatQualChoice_illegal_qual && DoesStringMatchConstraint ("evidence", field->field->data.ptrvalue)))) + && ((field->choice == FeatQualChoice_legal_qual && field->data.intvalue == Feat_qual_legal_evidence) + || (field->choice == FeatQualChoice_illegal_qual && DoesStringMatchConstraint ("evidence", field->data.ptrvalue)))) { if (sfp->exp_ev == 1) { @@ -7515,16 +8280,16 @@ NLM_EXTERN CharPtr GetQualFromFeatureEx (SeqFeatPtr sfp, FeatureFieldPtr field, /* citation */ if (str == NULL - && ((field->field->choice == FeatQualChoice_legal_qual && field->field->data.intvalue == Feat_qual_legal_citation) - || (field->field->choice == FeatQualChoice_illegal_qual && DoesStringMatchConstraint ("citation", field->field->data.ptrvalue)))) + && ((field->choice == FeatQualChoice_legal_qual && field->data.intvalue == Feat_qual_legal_citation) + || (field->choice == FeatQualChoice_illegal_qual && DoesStringMatchConstraint ("citation", field->data.ptrvalue)))) { str = GetCitationTextFromFeature (sfp, scp, batch_extra == NULL ? NULL : batch_extra->cit_list); } /* location */ if (str == NULL - && ((field->field->choice == FeatQualChoice_legal_qual && field->field->data.intvalue == Feat_qual_legal_location) - || (field->field->choice == FeatQualChoice_illegal_qual && DoesStringMatchConstraint ("location", field->field->data.ptrvalue)))) + && ((field->choice == FeatQualChoice_legal_qual && field->data.intvalue == Feat_qual_legal_location) + || (field->choice == FeatQualChoice_illegal_qual && DoesStringMatchConstraint ("location", field->data.ptrvalue)))) { str = SeqLocPrintUseBestID (sfp->location); } @@ -7532,8 +8297,8 @@ NLM_EXTERN CharPtr GetQualFromFeatureEx (SeqFeatPtr sfp, FeatureFieldPtr field, /* fields common to some features */ /* product */ if (str == NULL - && ((field->field->choice == FeatQualChoice_legal_qual && field->field->data.intvalue == Feat_qual_legal_product) - || (field->field->choice == FeatQualChoice_illegal_qual && DoesStringMatchConstraint ("product", field->field->data.ptrvalue)))) + && ((field->choice == FeatQualChoice_legal_qual && field->data.intvalue == Feat_qual_legal_product) + || (field->choice == FeatQualChoice_illegal_qual && DoesStringMatchConstraint ("product", field->data.ptrvalue)))) { if (prp != NULL) { str = GetFirstValNodeStringMatch (prp->name, scp); @@ -7545,8 +8310,8 @@ NLM_EXTERN CharPtr GetQualFromFeatureEx (SeqFeatPtr sfp, FeatureFieldPtr field, /* Gene fields */ /* locus */ if (str == NULL - && ((field->field->choice == FeatQualChoice_legal_qual && field->field->data.intvalue == Feat_qual_legal_gene) - || (field->field->choice == FeatQualChoice_illegal_qual && DoesStringMatchConstraint ("locus", field->field->data.ptrvalue))) + && ((field->choice == FeatQualChoice_legal_qual && field->data.intvalue == Feat_qual_legal_gene) + || (field->choice == FeatQualChoice_illegal_qual && DoesStringMatchConstraint ("locus", field->data.ptrvalue))) && grp != NULL) { if (!StringHasNoText (grp->locus) && DoesStringMatchConstraint(grp->locus, scp)) @@ -7556,8 +8321,8 @@ NLM_EXTERN CharPtr GetQualFromFeatureEx (SeqFeatPtr sfp, FeatureFieldPtr field, } /* description */ if (str == NULL - && ((field->field->choice == FeatQualChoice_legal_qual && field->field->data.intvalue == Feat_qual_legal_gene_description) - || (field->field->choice == FeatQualChoice_illegal_qual && DoesStringMatchConstraint ("description", field->field->data.ptrvalue))) + && ((field->choice == FeatQualChoice_legal_qual && field->data.intvalue == Feat_qual_legal_gene_description) + || (field->choice == FeatQualChoice_illegal_qual && DoesStringMatchConstraint ("description", field->data.ptrvalue))) && grp != NULL) { if (!StringHasNoText (grp->desc) && DoesStringMatchConstraint(grp->desc, scp)) @@ -7567,8 +8332,8 @@ NLM_EXTERN CharPtr GetQualFromFeatureEx (SeqFeatPtr sfp, FeatureFieldPtr field, } /* maploc */ if (str == NULL - && ((field->field->choice == FeatQualChoice_legal_qual && field->field->data.intvalue == Feat_qual_legal_map) - || (field->field->choice == FeatQualChoice_illegal_qual && DoesStringMatchConstraint ("map", field->field->data.ptrvalue))) + && ((field->choice == FeatQualChoice_legal_qual && field->data.intvalue == Feat_qual_legal_map) + || (field->choice == FeatQualChoice_illegal_qual && DoesStringMatchConstraint ("map", field->data.ptrvalue))) && grp != NULL) { if (!StringHasNoText (grp->maploc) && DoesStringMatchConstraint(grp->maploc, scp)) @@ -7578,8 +8343,8 @@ NLM_EXTERN CharPtr GetQualFromFeatureEx (SeqFeatPtr sfp, FeatureFieldPtr field, } /* allele */ if (str == NULL - && ((field->field->choice == FeatQualChoice_legal_qual && field->field->data.intvalue == Feat_qual_legal_allele) - || (field->field->choice == FeatQualChoice_illegal_qual && DoesStringMatchConstraint ("allele", field->field->data.ptrvalue))) + && ((field->choice == FeatQualChoice_legal_qual && field->data.intvalue == Feat_qual_legal_allele) + || (field->choice == FeatQualChoice_illegal_qual && DoesStringMatchConstraint ("allele", field->data.ptrvalue))) && grp != NULL && sfp->idx.subtype != FEATDEF_variation) { @@ -7590,8 +8355,8 @@ NLM_EXTERN CharPtr GetQualFromFeatureEx (SeqFeatPtr sfp, FeatureFieldPtr field, } /* locus_tag */ if (str == NULL - && ((field->field->choice == FeatQualChoice_legal_qual && field->field->data.intvalue == Feat_qual_legal_locus_tag) - || (field->field->choice == FeatQualChoice_illegal_qual && DoesStringMatchConstraint ("locus_tag", field->field->data.ptrvalue))) + && ((field->choice == FeatQualChoice_legal_qual && field->data.intvalue == Feat_qual_legal_locus_tag) + || (field->choice == FeatQualChoice_illegal_qual && DoesStringMatchConstraint ("locus_tag", field->data.ptrvalue))) && grp != NULL) { if (!StringHasNoText (grp->locus_tag) && DoesStringMatchConstraint(grp->locus_tag, scp)) @@ -7601,16 +8366,16 @@ NLM_EXTERN CharPtr GetQualFromFeatureEx (SeqFeatPtr sfp, FeatureFieldPtr field, } /* synonym */ if (str == NULL - && ((field->field->choice == FeatQualChoice_legal_qual && field->field->data.intvalue == Feat_qual_legal_synonym) - || (field->field->choice == FeatQualChoice_illegal_qual && DoesStringMatchConstraint ("synonym", field->field->data.ptrvalue))) + && ((field->choice == FeatQualChoice_legal_qual && field->data.intvalue == Feat_qual_legal_synonym) + || (field->choice == FeatQualChoice_illegal_qual && DoesStringMatchConstraint ("synonym", field->data.ptrvalue))) && grp != NULL) { str = GetFirstValNodeStringMatch (grp->syn, scp); } /* gene comment */ if (str == NULL - && field->field->choice == FeatQualChoice_legal_qual - && field->field->data.intvalue == Feat_qual_legal_gene_comment + && field->choice == FeatQualChoice_legal_qual + && field->data.intvalue == Feat_qual_legal_gene_comment && gene != NULL && !StringHasNoText (gene->comment) && DoesStringMatchConstraint (gene->comment, scp)) { @@ -7622,8 +8387,8 @@ NLM_EXTERN CharPtr GetQualFromFeatureEx (SeqFeatPtr sfp, FeatureFieldPtr field, /* note - product handled above */ /* description */ if (str == NULL - && ((field->field->choice == FeatQualChoice_legal_qual && field->field->data.intvalue == Feat_qual_legal_description) - || (field->field->choice == FeatQualChoice_illegal_qual && DoesStringMatchConstraint ("description", field->field->data.ptrvalue))) + && ((field->choice == FeatQualChoice_legal_qual && field->data.intvalue == Feat_qual_legal_description) + || (field->choice == FeatQualChoice_illegal_qual && DoesStringMatchConstraint ("description", field->data.ptrvalue))) && prp != NULL) { if (!StringHasNoText (prp->desc) && DoesStringMatchConstraint(prp->desc, scp)) { @@ -7632,16 +8397,16 @@ NLM_EXTERN CharPtr GetQualFromFeatureEx (SeqFeatPtr sfp, FeatureFieldPtr field, } /* ec_number */ if (str == NULL - && ((field->field->choice == FeatQualChoice_legal_qual && field->field->data.intvalue == Feat_qual_legal_ec_number) - || (field->field->choice == FeatQualChoice_illegal_qual && DoesStringMatchConstraint ("ec_number", field->field->data.ptrvalue))) + && ((field->choice == FeatQualChoice_legal_qual && field->data.intvalue == Feat_qual_legal_ec_number) + || (field->choice == FeatQualChoice_illegal_qual && DoesStringMatchConstraint ("ec_number", field->data.ptrvalue))) && prp != NULL) { str = GetFirstValNodeStringMatch (prp->ec, scp); } /* activity */ if (str == NULL - && ((field->field->choice == FeatQualChoice_legal_qual && field->field->data.intvalue == Feat_qual_legal_activity) - || (field->field->choice == FeatQualChoice_illegal_qual && DoesStringMatchConstraint ("activity", field->field->data.ptrvalue))) + && ((field->choice == FeatQualChoice_legal_qual && field->data.intvalue == Feat_qual_legal_activity) + || (field->choice == FeatQualChoice_illegal_qual && DoesStringMatchConstraint ("activity", field->data.ptrvalue))) && prp != NULL) { str = GetFirstValNodeStringMatch (prp->activity, scp); @@ -7649,13 +8414,13 @@ NLM_EXTERN CharPtr GetQualFromFeatureEx (SeqFeatPtr sfp, FeatureFieldPtr field, /* coding region fields */ /* transl_except */ - if (field->field->choice == FeatQualChoice_legal_qual && field->field->data.intvalue == Feat_qual_legal_transl_except + if (field->choice == FeatQualChoice_legal_qual && field->data.intvalue == Feat_qual_legal_transl_except && sfp->data.choice == SEQFEAT_CDREGION) { str = GetCodeBreakString (sfp); } /* transl_table */ - if (field->field->choice == FeatQualChoice_legal_qual && field->field->data.intvalue == Feat_qual_legal_transl_table + if (field->choice == FeatQualChoice_legal_qual && field->data.intvalue == Feat_qual_legal_transl_table && sfp->data.choice == SEQFEAT_CDREGION && (crp = (CdRegionPtr) sfp->data.value.ptrvalue) != NULL) { @@ -7665,36 +8430,46 @@ NLM_EXTERN CharPtr GetQualFromFeatureEx (SeqFeatPtr sfp, FeatureFieldPtr field, str = StringSave (buf); } } + /* translation */ + if (field->choice == FeatQualChoice_legal_qual && field->data.intvalue == Feat_qual_legal_translation + && sfp->data.choice == SEQFEAT_CDREGION) + { + if (sfp->product != NULL) + { + protbsp = BioseqFindFromSeqLoc (sfp->product); + str = GetSequenceByBsp (protbsp); + } + } /* special RNA qualifiers */ /* tRNA qualifiers */ /* codon-recognized */ if (str == NULL - && ((field->field->choice == FeatQualChoice_legal_qual && field->field->data.intvalue == Feat_qual_legal_codons_recognized) - || (field->field->choice == FeatQualChoice_illegal_qual && DoesStringMatchConstraint ("codon-recognized", field->field->data.ptrvalue)))) { + && ((field->choice == FeatQualChoice_legal_qual && field->data.intvalue == Feat_qual_legal_codons_recognized) + || (field->choice == FeatQualChoice_illegal_qual && DoesStringMatchConstraint ("codon-recognized", field->data.ptrvalue)))) { str = GettRNACodonsRecognized (sfp, scp); } /* anticodon */ if (str == NULL - && ((field->field->choice == FeatQualChoice_legal_qual && field->field->data.intvalue == Feat_qual_legal_anticodon) - || (field->field->choice == FeatQualChoice_illegal_qual && DoesStringMatchConstraint ("anticodon", field->field->data.ptrvalue)))) { + && ((field->choice == FeatQualChoice_legal_qual && field->data.intvalue == Feat_qual_legal_anticodon) + || (field->choice == FeatQualChoice_illegal_qual && DoesStringMatchConstraint ("anticodon", field->data.ptrvalue)))) { str = GetAnticodonLocString (sfp); } /* tag-peptide */ if (str == NULL - && ((field->field->choice == FeatQualChoice_legal_qual && field->field->data.intvalue == Feat_qual_legal_tag_peptide) - || (field->field->choice == FeatQualChoice_illegal_qual && DoesStringMatchConstraint ("tag-peptide", field->field->data.ptrvalue)))) { + && ((field->choice == FeatQualChoice_legal_qual && field->data.intvalue == Feat_qual_legal_tag_peptide) + || (field->choice == FeatQualChoice_illegal_qual && DoesStringMatchConstraint ("tag-peptide", field->data.ptrvalue)))) { str = GettmRNATagPeptide (sfp->data.value.ptrvalue, scp); } /* ncRNA_class */ if (str == NULL - && ((field->field->choice == FeatQualChoice_legal_qual && field->field->data.intvalue == Feat_qual_legal_ncRNA_class) - || (field->field->choice == FeatQualChoice_illegal_qual && DoesStringMatchConstraint ("ncRNA_class", field->field->data.ptrvalue)))) { + && ((field->choice == FeatQualChoice_legal_qual && field->data.intvalue == Feat_qual_legal_ncRNA_class) + || (field->choice == FeatQualChoice_illegal_qual && DoesStringMatchConstraint ("ncRNA_class", field->data.ptrvalue)))) { str = GetncRNAClass (sfp->data.value.ptrvalue, scp); } /* codon-start */ - if (field->field->choice == FeatQualChoice_legal_qual && field->field->data.intvalue == Feat_qual_legal_codon_start + if (field->choice == FeatQualChoice_legal_qual && field->data.intvalue == Feat_qual_legal_codon_start && sfp->data.choice == SEQFEAT_CDREGION) { crp = (CdRegionPtr) sfp->data.value.ptrvalue; @@ -7709,25 +8484,48 @@ NLM_EXTERN CharPtr GetQualFromFeatureEx (SeqFeatPtr sfp, FeatureFieldPtr field, } } + /* special region qualifiers */ + if (sfp->idx.subtype == FEATDEF_REGION + && field->choice == FeatQualChoice_legal_qual + && field->data.intvalue == Feat_qual_legal_name + && DoesStringMatchConstraint (sfp->data.value.ptrvalue, scp)) { + str = StringSave (sfp->data.value.ptrvalue); + } + /* actual GenBank qualifiers */ if (str == NULL) { - if (field->field->choice == FeatQualChoice_legal_qual) + if (field->choice == FeatQualChoice_legal_qual) { - gbqual = GetGBQualFromFeatQual (field->field->data.intvalue, &subfield); + gbqual = GetGBQualFromFeatQual (field->data.intvalue, &subfield); if (gbqual > -1) { str = GetFirstGBQualMatch (sfp->qual, ParFlat_GBQual_names [gbqual].name, subfield, scp); } else { /* need to do something with non-qualifier qualifiers */ } } else { - str = GetFirstGBQualMatchConstraintName (sfp->qual, field->field->data.ptrvalue, scp); + str = GetFirstGBQualMatchConstraintName (sfp->qual, field->data.ptrvalue, scp); } } return str; } +NLM_EXTERN CharPtr GetQualFromFeatureEx (SeqFeatPtr sfp, FeatureFieldPtr field, StringConstraintPtr scp, BatchExtraPtr batch_extra) +{ + if (sfp == NULL || field == NULL || field->field == NULL) + { + return NULL; + } + if (field->type != Macro_feature_type_any && sfp->idx.subtype != GetFeatdefFromFeatureType (field->type)) + { + return NULL; + } + return GetQualFromFeatureAnyType (sfp, field->field, scp, batch_extra); + +} + + NLM_EXTERN CharPtr GetQualFromFeature (SeqFeatPtr sfp, FeatureFieldPtr field, StringConstraintPtr scp) { return GetQualFromFeatureEx (sfp, field, scp, NULL); @@ -7755,7 +8553,7 @@ static Boolean RemoveCodeBreak (CdRegionPtr crp) } -NLM_EXTERN Boolean RemoveQualFromFeature (SeqFeatPtr sfp, FeatureFieldPtr field, StringConstraintPtr scp) +static Boolean RemoveQualFromFeatureAnyType (SeqFeatPtr sfp, ValNodePtr field, StringConstraintPtr scp) { Boolean rval = FALSE; GeneRefPtr grp = NULL; @@ -7767,11 +8565,7 @@ NLM_EXTERN Boolean RemoveQualFromFeature (SeqFeatPtr sfp, FeatureFieldPtr field, SeqFeatPtr gene = NULL; SeqMgrFeatContext fcontext; - if (sfp == NULL || field == NULL || field->field == NULL) - { - return FALSE; - } - if (field->type != Feature_type_any && sfp->idx.subtype != GetFeatdefFromFeatureType (field->type)) + if (sfp == NULL || field == NULL) { return FALSE; } @@ -7802,8 +8596,8 @@ NLM_EXTERN Boolean RemoveQualFromFeature (SeqFeatPtr sfp, FeatureFieldPtr field, /* fields common to all features */ /* note, also known as comment */ - if ((field->field->choice == FeatQualChoice_legal_qual && field->field->data.intvalue == Feat_qual_legal_note) - || (field->field->choice == FeatQualChoice_illegal_qual && DoesStringMatchConstraint ("note", field->field->data.ptrvalue))) + if ((field->choice == FeatQualChoice_legal_qual && field->data.intvalue == Feat_qual_legal_note) + || (field->choice == FeatQualChoice_illegal_qual && DoesStringMatchConstraint ("note", field->data.ptrvalue))) { if (!StringHasNoText (sfp->comment) && DoesStringMatchConstraint (sfp->comment, scp)) { @@ -7812,24 +8606,25 @@ NLM_EXTERN Boolean RemoveQualFromFeature (SeqFeatPtr sfp, FeatureFieldPtr field, } } /* db-xref */ - if ((field->field->choice == FeatQualChoice_legal_qual && field->field->data.intvalue == Feat_qual_legal_db_xref) - || (field->field->choice == FeatQualChoice_illegal_qual && DoesStringMatchConstraint ("db_xref", field->field->data.ptrvalue))) + if ((field->choice == FeatQualChoice_legal_qual && field->data.intvalue == Feat_qual_legal_db_xref) + || (field->choice == FeatQualChoice_illegal_qual && DoesStringMatchConstraint ("db_xref", field->data.ptrvalue))) { rval = RemoveDbxrefString (&(sfp->dbxref), scp); } /* exception */ - if ((field->field->choice == FeatQualChoice_legal_qual && field->field->data.intvalue == Feat_qual_legal_exception) - || (field->field->choice == FeatQualChoice_illegal_qual && DoesStringMatchConstraint ("exception", field->field->data.ptrvalue))) + if ((field->choice == FeatQualChoice_legal_qual && field->data.intvalue == Feat_qual_legal_exception) + || (field->choice == FeatQualChoice_illegal_qual && DoesStringMatchConstraint ("exception", field->data.ptrvalue))) { if (!StringHasNoText (sfp->except_text) && DoesStringMatchConstraint (sfp->except_text, scp)) { sfp->except_text = MemFree (sfp->except_text); + sfp->excpt = FALSE; rval = TRUE; } } /* evidence */ - if ((field->field->choice == FeatQualChoice_legal_qual && field->field->data.intvalue == Feat_qual_legal_evidence) - || (field->field->choice == FeatQualChoice_illegal_qual && DoesStringMatchConstraint ("evidence", field->field->data.ptrvalue))) + if ((field->choice == FeatQualChoice_legal_qual && field->data.intvalue == Feat_qual_legal_evidence) + || (field->choice == FeatQualChoice_illegal_qual && DoesStringMatchConstraint ("evidence", field->data.ptrvalue))) { if ((sfp->exp_ev == 1 && DoesStringMatchConstraint("experimental", scp)) || (sfp->exp_ev == 2 && DoesStringMatchConstraint("non-experimental", scp))) { @@ -7839,8 +8634,8 @@ NLM_EXTERN Boolean RemoveQualFromFeature (SeqFeatPtr sfp, FeatureFieldPtr field, } /* citation */ - if ((field->field->choice == FeatQualChoice_legal_qual && field->field->data.intvalue == Feat_qual_legal_citation) - || (field->field->choice == FeatQualChoice_illegal_qual && DoesStringMatchConstraint ("citation", field->field->data.ptrvalue))) + if ((field->choice == FeatQualChoice_legal_qual && field->data.intvalue == Feat_qual_legal_citation) + || (field->choice == FeatQualChoice_illegal_qual && DoesStringMatchConstraint ("citation", field->data.ptrvalue))) { if (sfp->cit != NULL) { sfp->cit = PubSetFree (sfp->cit); @@ -7849,8 +8644,8 @@ NLM_EXTERN Boolean RemoveQualFromFeature (SeqFeatPtr sfp, FeatureFieldPtr field, } /* location */ - if ((field->field->choice == FeatQualChoice_legal_qual && field->field->data.intvalue == Feat_qual_legal_location) - || (field->field->choice == FeatQualChoice_illegal_qual && DoesStringMatchConstraint ("location", field->field->data.ptrvalue))) + if ((field->choice == FeatQualChoice_legal_qual && field->data.intvalue == Feat_qual_legal_location) + || (field->choice == FeatQualChoice_illegal_qual && DoesStringMatchConstraint ("location", field->data.ptrvalue))) { if (sfp->location != NULL) { sfp->location = SeqLocFree (sfp->location); @@ -7859,8 +8654,8 @@ NLM_EXTERN Boolean RemoveQualFromFeature (SeqFeatPtr sfp, FeatureFieldPtr field, } /* pseudo */ - if ((field->field->choice == FeatQualChoice_legal_qual && field->field->data.intvalue == Feat_qual_legal_pseudo) - || (field->field->choice == FeatQualChoice_illegal_qual && DoesStringMatchConstraint ("pseudo", field->field->data.ptrvalue))) + if ((field->choice == FeatQualChoice_legal_qual && field->data.intvalue == Feat_qual_legal_pseudo) + || (field->choice == FeatQualChoice_illegal_qual && DoesStringMatchConstraint ("pseudo", field->data.ptrvalue))) { if (gene != NULL) { if (gene->pseudo) { @@ -7877,8 +8672,8 @@ NLM_EXTERN Boolean RemoveQualFromFeature (SeqFeatPtr sfp, FeatureFieldPtr field, /* fields common to some features */ /* product */ - if ((field->field->choice == FeatQualChoice_legal_qual && field->field->data.intvalue == Feat_qual_legal_product) - || (field->field->choice == FeatQualChoice_illegal_qual && DoesStringMatchConstraint ("product", field->field->data.ptrvalue))) + if ((field->choice == FeatQualChoice_legal_qual && field->data.intvalue == Feat_qual_legal_product) + || (field->choice == FeatQualChoice_illegal_qual && DoesStringMatchConstraint ("product", field->data.ptrvalue))) { if (prp != NULL) { rval = RemoveValNodeStringMatch (&(prp->name), scp); @@ -7889,8 +8684,8 @@ NLM_EXTERN Boolean RemoveQualFromFeature (SeqFeatPtr sfp, FeatureFieldPtr field, /* Gene fields */ /* locus */ - if (((field->field->choice == FeatQualChoice_legal_qual && field->field->data.intvalue == Feat_qual_legal_gene) - || (field->field->choice == FeatQualChoice_illegal_qual && DoesStringMatchConstraint ("locus", field->field->data.ptrvalue))) + if (((field->choice == FeatQualChoice_legal_qual && field->data.intvalue == Feat_qual_legal_gene) + || (field->choice == FeatQualChoice_illegal_qual && DoesStringMatchConstraint ("locus", field->data.ptrvalue))) && grp != NULL) { if (!StringHasNoText (grp->locus) && DoesStringMatchConstraint (grp->locus, scp)) { @@ -7899,8 +8694,8 @@ NLM_EXTERN Boolean RemoveQualFromFeature (SeqFeatPtr sfp, FeatureFieldPtr field, } } /* description */ - if (((field->field->choice == FeatQualChoice_legal_qual && field->field->data.intvalue == Feat_qual_legal_gene_description) - || (field->field->choice == FeatQualChoice_illegal_qual && DoesStringMatchConstraint ("description", field->field->data.ptrvalue))) + if (((field->choice == FeatQualChoice_legal_qual && field->data.intvalue == Feat_qual_legal_gene_description) + || (field->choice == FeatQualChoice_illegal_qual && DoesStringMatchConstraint ("description", field->data.ptrvalue))) && grp != NULL) { if (!StringHasNoText (grp->desc) && DoesStringMatchConstraint(grp->desc, scp)) @@ -7910,8 +8705,8 @@ NLM_EXTERN Boolean RemoveQualFromFeature (SeqFeatPtr sfp, FeatureFieldPtr field, } } /* maploc */ - if (((field->field->choice == FeatQualChoice_legal_qual && field->field->data.intvalue == Feat_qual_legal_map) - || (field->field->choice == FeatQualChoice_illegal_qual && DoesStringMatchConstraint ("map", field->field->data.ptrvalue))) + if (((field->choice == FeatQualChoice_legal_qual && field->data.intvalue == Feat_qual_legal_map) + || (field->choice == FeatQualChoice_illegal_qual && DoesStringMatchConstraint ("map", field->data.ptrvalue))) && grp != NULL) { if (!StringHasNoText (grp->maploc) && DoesStringMatchConstraint(grp->maploc, scp)) @@ -7921,8 +8716,8 @@ NLM_EXTERN Boolean RemoveQualFromFeature (SeqFeatPtr sfp, FeatureFieldPtr field, } } /* allele */ - if (((field->field->choice == FeatQualChoice_legal_qual && field->field->data.intvalue == Feat_qual_legal_allele) - || (field->field->choice == FeatQualChoice_illegal_qual && DoesStringMatchConstraint ("allele", field->field->data.ptrvalue))) + if (((field->choice == FeatQualChoice_legal_qual && field->data.intvalue == Feat_qual_legal_allele) + || (field->choice == FeatQualChoice_illegal_qual && DoesStringMatchConstraint ("allele", field->data.ptrvalue))) && grp != NULL && sfp->idx.subtype != FEATDEF_variation) { @@ -7933,8 +8728,8 @@ NLM_EXTERN Boolean RemoveQualFromFeature (SeqFeatPtr sfp, FeatureFieldPtr field, } } /* locus_tag */ - if (((field->field->choice == FeatQualChoice_legal_qual && field->field->data.intvalue == Feat_qual_legal_locus_tag) - || (field->field->choice == FeatQualChoice_illegal_qual && DoesStringMatchConstraint ("locus_tag", field->field->data.ptrvalue))) + if (((field->choice == FeatQualChoice_legal_qual && field->data.intvalue == Feat_qual_legal_locus_tag) + || (field->choice == FeatQualChoice_illegal_qual && DoesStringMatchConstraint ("locus_tag", field->data.ptrvalue))) && grp != NULL) { if (!StringHasNoText (grp->locus_tag) && DoesStringMatchConstraint(grp->locus_tag, scp)) @@ -7944,15 +8739,15 @@ NLM_EXTERN Boolean RemoveQualFromFeature (SeqFeatPtr sfp, FeatureFieldPtr field, } } /* synonym */ - if (((field->field->choice == FeatQualChoice_legal_qual && field->field->data.intvalue == Feat_qual_legal_synonym) - || (field->field->choice == FeatQualChoice_illegal_qual && DoesStringMatchConstraint ("synonym", field->field->data.ptrvalue))) + if (((field->choice == FeatQualChoice_legal_qual && field->data.intvalue == Feat_qual_legal_synonym) + || (field->choice == FeatQualChoice_illegal_qual && DoesStringMatchConstraint ("synonym", field->data.ptrvalue))) && grp != NULL) { rval = RemoveValNodeStringMatch (&(grp->syn), scp); } /* gene comment */ - if (field->field->choice == FeatQualChoice_legal_qual - && field->field->data.intvalue == Feat_qual_legal_gene_comment + if (field->choice == FeatQualChoice_legal_qual + && field->data.intvalue == Feat_qual_legal_gene_comment && gene != NULL && !StringHasNoText (gene->comment) && DoesStringMatchConstraint (gene->comment, scp)) { @@ -7963,8 +8758,8 @@ NLM_EXTERN Boolean RemoveQualFromFeature (SeqFeatPtr sfp, FeatureFieldPtr field, /* protein fields */ /* note - product handled above */ /* description */ - if (((field->field->choice == FeatQualChoice_legal_qual && field->field->data.intvalue == Feat_qual_legal_description) - || (field->field->choice == FeatQualChoice_illegal_qual && DoesStringMatchConstraint ("description", field->field->data.ptrvalue))) + if (((field->choice == FeatQualChoice_legal_qual && field->data.intvalue == Feat_qual_legal_description) + || (field->choice == FeatQualChoice_illegal_qual && DoesStringMatchConstraint ("description", field->data.ptrvalue))) && prp != NULL) { if (!StringHasNoText (prp->desc) && DoesStringMatchConstraint(prp->desc, scp)) { @@ -7973,19 +8768,19 @@ NLM_EXTERN Boolean RemoveQualFromFeature (SeqFeatPtr sfp, FeatureFieldPtr field, } } /* ec_number */ - if (((field->field->choice == FeatQualChoice_legal_qual && field->field->data.intvalue == Feat_qual_legal_ec_number) - || (field->field->choice == FeatQualChoice_illegal_qual && DoesStringMatchConstraint ("ec_number", field->field->data.ptrvalue))) + if (((field->choice == FeatQualChoice_legal_qual && field->data.intvalue == Feat_qual_legal_ec_number) + || (field->choice == FeatQualChoice_illegal_qual && DoesStringMatchConstraint ("ec_number", field->data.ptrvalue))) && prp != NULL) { rval = RemoveValNodeStringMatch (&(prp->ec), scp); } /* activity */ - if (((field->field->choice == FeatQualChoice_legal_qual - && (field->field->data.intvalue == Feat_qual_legal_activity - || field->field->data.intvalue == Feat_qual_legal_function)) - || (field->field->choice == FeatQualChoice_illegal_qual - && (DoesStringMatchConstraint ("activity", field->field->data.ptrvalue) - || DoesStringMatchConstraint ("function", field->field->data.ptrvalue)))) + if (((field->choice == FeatQualChoice_legal_qual + && (field->data.intvalue == Feat_qual_legal_activity + || field->data.intvalue == Feat_qual_legal_function)) + || (field->choice == FeatQualChoice_illegal_qual + && (DoesStringMatchConstraint ("activity", field->data.ptrvalue) + || DoesStringMatchConstraint ("function", field->data.ptrvalue)))) && prp != NULL) { rval = RemoveValNodeStringMatch (&(prp->activity), scp); @@ -7993,14 +8788,14 @@ NLM_EXTERN Boolean RemoveQualFromFeature (SeqFeatPtr sfp, FeatureFieldPtr field, /* special coding region fields */ /* transl_except */ - if (field->field->choice == FeatQualChoice_legal_qual && field->field->data.intvalue == Feat_qual_legal_transl_except + if (field->choice == FeatQualChoice_legal_qual && field->data.intvalue == Feat_qual_legal_transl_except && sfp->data.choice == SEQFEAT_CDREGION) { crp = (CdRegionPtr) sfp->data.value.ptrvalue; rval = RemoveCodeBreak (crp); } /* transl_table */ - if (field->field->choice == FeatQualChoice_legal_qual && field->field->data.intvalue == Feat_qual_legal_transl_table + if (field->choice == FeatQualChoice_legal_qual && field->data.intvalue == Feat_qual_legal_transl_table && sfp->data.choice == SEQFEAT_CDREGION && (crp = (CdRegionPtr) sfp->data.value.ptrvalue) != NULL) { @@ -8013,8 +8808,8 @@ NLM_EXTERN Boolean RemoveQualFromFeature (SeqFeatPtr sfp, FeatureFieldPtr field, /* special RNA fields */ /* anticodon */ - if (((field->field->choice == FeatQualChoice_legal_qual && field->field->data.intvalue == Feat_qual_legal_anticodon) - || (field->field->choice == FeatQualChoice_illegal_qual && DoesStringMatchConstraint ("anticodon", field->field->data.ptrvalue))) + if (((field->choice == FeatQualChoice_legal_qual && field->data.intvalue == Feat_qual_legal_anticodon) + || (field->choice == FeatQualChoice_illegal_qual && DoesStringMatchConstraint ("anticodon", field->data.ptrvalue))) && rrp != NULL && rrp->ext.choice == 2) { trp = (tRNAPtr) rrp->ext.value.ptrvalue; @@ -8024,38 +8819,50 @@ NLM_EXTERN Boolean RemoveQualFromFeature (SeqFeatPtr sfp, FeatureFieldPtr field, } } /* codons recognized */ - if (((field->field->choice == FeatQualChoice_legal_qual && field->field->data.intvalue == Feat_qual_legal_codons_recognized) - || (field->field->choice == FeatQualChoice_illegal_qual && DoesStringMatchConstraint ("codon-recognized", field->field->data.ptrvalue))) + if (((field->choice == FeatQualChoice_legal_qual && field->data.intvalue == Feat_qual_legal_codons_recognized) + || (field->choice == FeatQualChoice_illegal_qual && DoesStringMatchConstraint ("codon-recognized", field->data.ptrvalue))) && rrp != NULL && rrp->ext.choice == 2) { rval = RemovetRNACodons_Recognized (sfp); } /* tag_peptide */ - if (((field->field->choice == FeatQualChoice_legal_qual && field->field->data.intvalue == Feat_qual_legal_tag_peptide) - || (field->field->choice == FeatQualChoice_illegal_qual && DoesStringMatchConstraint ("tag-peptide", field->field->data.ptrvalue))) + if (((field->choice == FeatQualChoice_legal_qual && field->data.intvalue == Feat_qual_legal_tag_peptide) + || (field->choice == FeatQualChoice_illegal_qual && DoesStringMatchConstraint ("tag-peptide", field->data.ptrvalue))) && rrp != NULL && rrp->ext.choice == 3) { rval = RemovetmRNATagPeptide (rrp, scp); } - if (((field->field->choice == FeatQualChoice_legal_qual && field->field->data.intvalue == Feat_qual_legal_ncRNA_class) - || (field->field->choice == FeatQualChoice_illegal_qual && DoesStringMatchConstraint ("ncRNA_class", field->field->data.ptrvalue))) + if (((field->choice == FeatQualChoice_legal_qual && field->data.intvalue == Feat_qual_legal_ncRNA_class) + || (field->choice == FeatQualChoice_illegal_qual && DoesStringMatchConstraint ("ncRNA_class", field->data.ptrvalue))) && rrp != NULL && rrp->ext.choice == 3) { rval = RemovencRNAClass (rrp, scp); } + /* special region qualifiers */ + if (sfp->idx.subtype == FEATDEF_REGION + && field->choice == FeatQualChoice_legal_qual + && field->data.intvalue == Feat_qual_legal_name + && !StringHasNoText (sfp->data.value.ptrvalue) + && DoesStringMatchConstraint (sfp->data.value.ptrvalue, scp)) { + sfp->data.value.ptrvalue = MemFree (sfp->data.value.ptrvalue); + rval = TRUE; + } + + + if (!rval) { /* actual GenBank qualifiers */ - if (field->field->choice == FeatQualChoice_legal_qual) + if (field->choice == FeatQualChoice_legal_qual) { - gbqual = GetGBQualFromFeatQual (field->field->data.intvalue, &subfield); + gbqual = GetGBQualFromFeatQual (field->data.intvalue, &subfield); if (gbqual > -1) { rval = RemoveGBQualMatch (&(sfp->qual), ParFlat_GBQual_names [gbqual].name, subfield, scp); } else { /* need to do something with non-qualifier qualifiers */ } } else { - rval = RemoveGBQualMatchConstraintName (&(sfp->qual), field->field->data.ptrvalue, scp); + rval = RemoveGBQualMatchConstraintName (&(sfp->qual), field->data.ptrvalue, scp); } } @@ -8063,6 +8870,21 @@ NLM_EXTERN Boolean RemoveQualFromFeature (SeqFeatPtr sfp, FeatureFieldPtr field, } +NLM_EXTERN Boolean RemoveQualFromFeature (SeqFeatPtr sfp, FeatureFieldPtr field, StringConstraintPtr scp) +{ + if (sfp == NULL || field == NULL || field->field == NULL) + { + return FALSE; + } + if (field->type != Macro_feature_type_any && sfp->idx.subtype != GetFeatdefFromFeatureType (field->type)) + { + return FALSE; + } + + return RemoveQualFromFeatureAnyType (sfp, field->field, scp); +} + + static Boolean ChooseBestFrame (SeqFeatPtr sfp) { CdRegionPtr crp; @@ -8103,6 +8925,62 @@ static Boolean ChooseBestFrame (SeqFeatPtr sfp) } +static Boolean ChooseMatchingFrame (SeqFeatPtr sfp) +{ + CdRegionPtr crp; + BioseqPtr protbsp; + CharPtr expected_translation, frame_translation; + Uint1 new_frame = 0, i, orig_frame; + ByteStorePtr bs; + Boolean retval = FALSE; + + if (sfp == NULL + || sfp->data.choice != SEQFEAT_CDREGION + || sfp->product == NULL + || (protbsp = BioseqFindFromSeqLoc (sfp->product)) == NULL + || (crp = sfp->data.value.ptrvalue) == NULL) { + return FALSE; + } + + expected_translation = GetSequenceByBsp (protbsp); + if (StringHasNoText (expected_translation)) { + expected_translation = MemFree (expected_translation); + return FALSE; + } + + orig_frame = crp->frame; + + for (i = 1; i <= 3 && !retval; i++) { + crp->frame = i; + bs = ProteinFromCdRegionEx (sfp, FALSE, FALSE); + frame_translation = BSMerge (bs, NULL); + if (StringCmp (frame_translation, expected_translation) == 0) { + new_frame = i; + retval = TRUE; + } + BSFree (bs); + frame_translation = MemFree (frame_translation); + } + expected_translation = MemFree (expected_translation); + + if (new_frame == 1 && orig_frame == 0) { + new_frame = 0; + } + + if (retval) { + crp->frame = new_frame; + if (new_frame == orig_frame) { + /* didn't actually change the frame */ + retval = FALSE; + } + } else { + crp->frame = orig_frame; + } + + return retval; +} + + static SeqFeatPtr CreateGeneForFeature (SeqFeatPtr sfp) { BioseqPtr bsp; @@ -8266,7 +9144,7 @@ static Boolean SetGeneticCode (CdRegionPtr crp, Int4 value) } -static Boolean SetQualOnFeatureEx (SeqFeatPtr sfp, FeatureFieldPtr field, StringConstraintPtr scp, CharPtr value, Uint2 existing_text, BatchExtraPtr batch_extra) +static Boolean SetQualOnFeatureAnyType (SeqFeatPtr sfp, ValNodePtr field, StringConstraintPtr scp, CharPtr value, Uint2 existing_text, BatchExtraPtr batch_extra) { Boolean rval = FALSE; Boolean matched_term = FALSE; @@ -8277,11 +9155,7 @@ static Boolean SetQualOnFeatureEx (SeqFeatPtr sfp, FeatureFieldPtr field, String SeqFeatPtr gene = NULL; SeqMgrFeatContext fcontext; - if (sfp == NULL || field == NULL || field->field == NULL) - { - return FALSE; - } - if (field->type != Feature_type_any && sfp->idx.subtype != GetFeatdefFromFeatureType (field->type)) + if (sfp == NULL || field == NULL) { return FALSE; } @@ -8305,8 +9179,8 @@ static Boolean SetQualOnFeatureEx (SeqFeatPtr sfp, FeatureFieldPtr field, String /* fields common to all features */ /* note, also known as comment */ - if ((field->field->choice == FeatQualChoice_legal_qual && field->field->data.intvalue == Feat_qual_legal_note) - || (field->field->choice == FeatQualChoice_illegal_qual && DoesStringMatchConstraint ("note", field->field->data.ptrvalue))) + if ((field->choice == FeatQualChoice_legal_qual && field->data.intvalue == Feat_qual_legal_note) + || (field->choice == FeatQualChoice_illegal_qual && DoesStringMatchConstraint ("note", field->data.ptrvalue))) { if (DoesStringMatchConstraint(sfp->comment, scp)) { @@ -8315,24 +9189,29 @@ static Boolean SetQualOnFeatureEx (SeqFeatPtr sfp, FeatureFieldPtr field, String matched_term = TRUE; } /* db-xref */ - if ((field->field->choice == FeatQualChoice_legal_qual && field->field->data.intvalue == Feat_qual_legal_db_xref) - || (field->field->choice == FeatQualChoice_illegal_qual && DoesStringMatchConstraint ("db_xref", field->field->data.ptrvalue))) + if ((field->choice == FeatQualChoice_legal_qual && field->data.intvalue == Feat_qual_legal_db_xref) + || (field->choice == FeatQualChoice_illegal_qual && DoesStringMatchConstraint ("db_xref", field->data.ptrvalue))) { rval = SetDbxrefString (&(sfp->dbxref), scp, value, existing_text); } /* exception */ - if ((field->field->choice == FeatQualChoice_legal_qual && field->field->data.intvalue == Feat_qual_legal_exception) - || (field->field->choice == FeatQualChoice_illegal_qual && DoesStringMatchConstraint ("exception", field->field->data.ptrvalue))) + if ((field->choice == FeatQualChoice_legal_qual && field->data.intvalue == Feat_qual_legal_exception) + || (field->choice == FeatQualChoice_illegal_qual && DoesStringMatchConstraint ("exception", field->data.ptrvalue))) { if (DoesStringMatchConstraint(sfp->except_text, scp)) { rval = SetStringValue ( &(sfp->except_text), value, existing_text); + if (StringHasNoText(sfp->except_text)) { + sfp->excpt = FALSE; + } else { + sfp->excpt = TRUE; + } } matched_term = TRUE; } /* evidence */ - if ((field->field->choice == FeatQualChoice_legal_qual && field->field->data.intvalue == Feat_qual_legal_evidence) - || (field->field->choice == FeatQualChoice_illegal_qual && DoesStringMatchConstraint ("evidence", field->field->data.ptrvalue))) + if ((field->choice == FeatQualChoice_legal_qual && field->data.intvalue == Feat_qual_legal_evidence) + || (field->choice == FeatQualChoice_illegal_qual && DoesStringMatchConstraint ("evidence", field->data.ptrvalue))) { tmp = NULL; if (sfp->exp_ev == 1) @@ -8363,23 +9242,23 @@ static Boolean SetQualOnFeatureEx (SeqFeatPtr sfp, FeatureFieldPtr field, String } /* citation */ - if ((field->field->choice == FeatQualChoice_legal_qual && field->field->data.intvalue == Feat_qual_legal_citation) - || (field->field->choice == FeatQualChoice_illegal_qual && DoesStringMatchConstraint ("citation", field->field->data.ptrvalue))) + if ((field->choice == FeatQualChoice_legal_qual && field->data.intvalue == Feat_qual_legal_citation) + || (field->choice == FeatQualChoice_illegal_qual && DoesStringMatchConstraint ("citation", field->data.ptrvalue))) { rval = SetCitationTextOnFeature (sfp, scp, value, existing_text, batch_extra == NULL ? NULL : batch_extra->cit_list); } /* location */ - if ((field->field->choice == FeatQualChoice_legal_qual && field->field->data.intvalue == Feat_qual_legal_location) - || (field->field->choice == FeatQualChoice_illegal_qual && DoesStringMatchConstraint ("location", field->field->data.ptrvalue))) + if ((field->choice == FeatQualChoice_legal_qual && field->data.intvalue == Feat_qual_legal_location) + || (field->choice == FeatQualChoice_illegal_qual && DoesStringMatchConstraint ("location", field->data.ptrvalue))) { rval = SetFeatureLocation (sfp, value, existing_text); return rval; } /* pseudo */ - if ((field->field->choice == FeatQualChoice_legal_qual && field->field->data.intvalue == Feat_qual_legal_pseudo) - || (field->field->choice == FeatQualChoice_illegal_qual && DoesStringMatchConstraint ("pseudo", field->field->data.ptrvalue))) + if ((field->choice == FeatQualChoice_legal_qual && field->data.intvalue == Feat_qual_legal_pseudo) + || (field->choice == FeatQualChoice_illegal_qual && DoesStringMatchConstraint ("pseudo", field->data.ptrvalue))) { if (gene != NULL) { if (!gene->pseudo) { @@ -8398,8 +9277,8 @@ static Boolean SetQualOnFeatureEx (SeqFeatPtr sfp, FeatureFieldPtr field, String /* fields common to some features */ /* product */ - if ((field->field->choice == FeatQualChoice_legal_qual && field->field->data.intvalue == Feat_qual_legal_product) - || (field->field->choice == FeatQualChoice_illegal_qual && DoesStringMatchConstraint ("product", field->field->data.ptrvalue))) + if ((field->choice == FeatQualChoice_legal_qual && field->data.intvalue == Feat_qual_legal_product) + || (field->choice == FeatQualChoice_illegal_qual && DoesStringMatchConstraint ("product", field->data.ptrvalue))) { if (prp != NULL) { rval = SetStringsInValNodeStringList (&(prp->name), scp, value, existing_text); @@ -8411,8 +9290,8 @@ static Boolean SetQualOnFeatureEx (SeqFeatPtr sfp, FeatureFieldPtr field, String /* Gene fields */ /* locus */ - if ((field->field->choice == FeatQualChoice_legal_qual && field->field->data.intvalue == Feat_qual_legal_gene) - || (field->field->choice == FeatQualChoice_illegal_qual && DoesStringMatchConstraint ("locus", field->field->data.ptrvalue))) + if ((field->choice == FeatQualChoice_legal_qual && field->data.intvalue == Feat_qual_legal_gene) + || (field->choice == FeatQualChoice_illegal_qual && DoesStringMatchConstraint ("locus", field->data.ptrvalue))) { if (grp == NULL && IsStringConstraintEmpty (scp)) { @@ -8431,8 +9310,8 @@ static Boolean SetQualOnFeatureEx (SeqFeatPtr sfp, FeatureFieldPtr field, String } /* description */ - if (((field->field->choice == FeatQualChoice_legal_qual && field->field->data.intvalue == Feat_qual_legal_gene_description) - || (field->field->choice == FeatQualChoice_illegal_qual && DoesStringMatchConstraint ("description", field->field->data.ptrvalue))) + if (((field->choice == FeatQualChoice_legal_qual && field->data.intvalue == Feat_qual_legal_gene_description) + || (field->choice == FeatQualChoice_illegal_qual && DoesStringMatchConstraint ("description", field->data.ptrvalue))) && grp != NULL) { if (DoesStringMatchConstraint(grp->desc, scp)) @@ -8442,8 +9321,8 @@ static Boolean SetQualOnFeatureEx (SeqFeatPtr sfp, FeatureFieldPtr field, String matched_term = TRUE; } /* maploc */ - if (((field->field->choice == FeatQualChoice_legal_qual && field->field->data.intvalue == Feat_qual_legal_map) - || (field->field->choice == FeatQualChoice_illegal_qual && DoesStringMatchConstraint ("map", field->field->data.ptrvalue))) + if (((field->choice == FeatQualChoice_legal_qual && field->data.intvalue == Feat_qual_legal_map) + || (field->choice == FeatQualChoice_illegal_qual && DoesStringMatchConstraint ("map", field->data.ptrvalue))) && grp != NULL) { if (DoesStringMatchConstraint(grp->maploc, scp)) @@ -8453,8 +9332,8 @@ static Boolean SetQualOnFeatureEx (SeqFeatPtr sfp, FeatureFieldPtr field, String matched_term = TRUE; } /* allele */ - if (((field->field->choice == FeatQualChoice_legal_qual && field->field->data.intvalue == Feat_qual_legal_allele) - || (field->field->choice == FeatQualChoice_illegal_qual && DoesStringMatchConstraint ("allele", field->field->data.ptrvalue))) + if (((field->choice == FeatQualChoice_legal_qual && field->data.intvalue == Feat_qual_legal_allele) + || (field->choice == FeatQualChoice_illegal_qual && DoesStringMatchConstraint ("allele", field->data.ptrvalue))) && grp != NULL && sfp->idx.subtype != FEATDEF_variation) { @@ -8465,8 +9344,8 @@ static Boolean SetQualOnFeatureEx (SeqFeatPtr sfp, FeatureFieldPtr field, String matched_term = TRUE; } /* locus_tag */ - if (((field->field->choice == FeatQualChoice_legal_qual && field->field->data.intvalue == Feat_qual_legal_locus_tag) - || (field->field->choice == FeatQualChoice_illegal_qual && DoesStringMatchConstraint ("locus_tag", field->field->data.ptrvalue))) + if (((field->choice == FeatQualChoice_legal_qual && field->data.intvalue == Feat_qual_legal_locus_tag) + || (field->choice == FeatQualChoice_illegal_qual && DoesStringMatchConstraint ("locus_tag", field->data.ptrvalue))) && grp != NULL) { if (DoesStringMatchConstraint(grp->locus_tag, scp)) @@ -8476,16 +9355,16 @@ static Boolean SetQualOnFeatureEx (SeqFeatPtr sfp, FeatureFieldPtr field, String matched_term = TRUE; } /* synonym */ - if (((field->field->choice == FeatQualChoice_legal_qual && field->field->data.intvalue == Feat_qual_legal_synonym) - || (field->field->choice == FeatQualChoice_illegal_qual && DoesStringMatchConstraint ("synonym", field->field->data.ptrvalue))) + if (((field->choice == FeatQualChoice_legal_qual && field->data.intvalue == Feat_qual_legal_synonym) + || (field->choice == FeatQualChoice_illegal_qual && DoesStringMatchConstraint ("synonym", field->data.ptrvalue))) && grp != NULL) { rval = SetStringsInValNodeStringList (&(grp->syn), scp, value, existing_text); matched_term = TRUE; } /* gene comment */ - if (field->field->choice == FeatQualChoice_legal_qual - && field->field->data.intvalue == Feat_qual_legal_gene_comment + if (field->choice == FeatQualChoice_legal_qual + && field->data.intvalue == Feat_qual_legal_gene_comment && gene != NULL) { rval = SetStringValue (&(gene->comment), value, existing_text); matched_term = TRUE; @@ -8494,8 +9373,8 @@ static Boolean SetQualOnFeatureEx (SeqFeatPtr sfp, FeatureFieldPtr field, String /* protein fields */ /* note - product handled above */ /* description */ - if (((field->field->choice == FeatQualChoice_legal_qual && field->field->data.intvalue == Feat_qual_legal_description) - || (field->field->choice == FeatQualChoice_illegal_qual && DoesStringMatchConstraint ("description", field->field->data.ptrvalue))) + if (((field->choice == FeatQualChoice_legal_qual && field->data.intvalue == Feat_qual_legal_description) + || (field->choice == FeatQualChoice_illegal_qual && DoesStringMatchConstraint ("description", field->data.ptrvalue))) && prp != NULL) { if (DoesStringMatchConstraint(prp->desc, scp)) { @@ -8503,15 +9382,15 @@ static Boolean SetQualOnFeatureEx (SeqFeatPtr sfp, FeatureFieldPtr field, String } } /* ec_number */ - if (((field->field->choice == FeatQualChoice_legal_qual && field->field->data.intvalue == Feat_qual_legal_ec_number) - || (field->field->choice == FeatQualChoice_illegal_qual && DoesStringMatchConstraint ("ec_number", field->field->data.ptrvalue))) + if (((field->choice == FeatQualChoice_legal_qual && field->data.intvalue == Feat_qual_legal_ec_number) + || (field->choice == FeatQualChoice_illegal_qual && DoesStringMatchConstraint ("ec_number", field->data.ptrvalue))) && prp != NULL) { rval = SetStringsInValNodeStringList (&(prp->ec), scp, value, existing_text); } /* activity */ - if (((field->field->choice == FeatQualChoice_legal_qual && field->field->data.intvalue == Feat_qual_legal_activity) - || (field->field->choice == FeatQualChoice_illegal_qual && DoesStringMatchConstraint ("activity", field->field->data.ptrvalue))) + if (((field->choice == FeatQualChoice_legal_qual && field->data.intvalue == Feat_qual_legal_activity) + || (field->choice == FeatQualChoice_illegal_qual && DoesStringMatchConstraint ("activity", field->data.ptrvalue))) && prp != NULL) { rval = SetStringsInValNodeStringList (&(prp->activity), scp, value, existing_text); @@ -8520,7 +9399,7 @@ static Boolean SetQualOnFeatureEx (SeqFeatPtr sfp, FeatureFieldPtr field, String /* special coding region fields */ /* codon start */ /* note - if product existed before, it will be retranslated */ - if (field->field->choice == FeatQualChoice_legal_qual && field->field->data.intvalue == Feat_qual_legal_codon_start + if (field->choice == FeatQualChoice_legal_qual && field->data.intvalue == Feat_qual_legal_codon_start && sfp->data.choice == SEQFEAT_CDREGION) { crp = (CdRegionPtr) sfp->data.value.ptrvalue; @@ -8528,6 +9407,10 @@ static Boolean SetQualOnFeatureEx (SeqFeatPtr sfp, FeatureFieldPtr field, String { rval = ChooseBestFrame (sfp); } + else if (StringICmp (value, "match") == 0) + { + rval = ChooseMatchingFrame (sfp); + } else if (StringCmp (value, "1") == 0) { crp->frame = 1; @@ -8546,9 +9429,10 @@ static Boolean SetQualOnFeatureEx (SeqFeatPtr sfp, FeatureFieldPtr field, String if (rval && sfp->product != NULL) { AdjustProteinSequenceForReadingFrame (sfp); } + matched_term = TRUE; } /* transl_except */ - if (field->field->choice == FeatQualChoice_legal_qual && field->field->data.intvalue == Feat_qual_legal_transl_except + if (field->choice == FeatQualChoice_legal_qual && field->data.intvalue == Feat_qual_legal_transl_except && sfp->data.choice == SEQFEAT_CDREGION) { crp = (CdRegionPtr) sfp->data.value.ptrvalue; @@ -8562,7 +9446,7 @@ static Boolean SetQualOnFeatureEx (SeqFeatPtr sfp, FeatureFieldPtr field, String } } /* transl_table */ - if (field->field->choice == FeatQualChoice_legal_qual && field->field->data.intvalue == Feat_qual_legal_transl_table + if (field->choice == FeatQualChoice_legal_qual && field->data.intvalue == Feat_qual_legal_transl_table && sfp->data.choice == SEQFEAT_CDREGION && (crp = (CdRegionPtr) sfp->data.value.ptrvalue) != NULL && IsAllDigits (value)) @@ -8575,136 +9459,326 @@ static Boolean SetQualOnFeatureEx (SeqFeatPtr sfp, FeatureFieldPtr field, String } - /* special RNA fields + /* special RNA fields */ /* tRNA fields */ if (sfp->idx.subtype == FEATDEF_tRNA - && ((field->field->choice == FeatQualChoice_legal_qual - && field->field->data.intvalue == Feat_qual_legal_codons_recognized) - || (field->field->choice == FeatQualChoice_illegal_qual - && DoesStringMatchConstraint ("codon-recognized", field->field->data.ptrvalue)))) + && ((field->choice == FeatQualChoice_legal_qual + && field->data.intvalue == Feat_qual_legal_codons_recognized) + || (field->choice == FeatQualChoice_illegal_qual + && DoesStringMatchConstraint ("codon-recognized", field->data.ptrvalue)))) { rval = SettRNACodons_Recognized (sfp, scp, value, existing_text); } if (sfp->idx.subtype == FEATDEF_tRNA - && ((field->field->choice == FeatQualChoice_legal_qual - && field->field->data.intvalue == Feat_qual_legal_anticodon) - || (field->field->choice == FeatQualChoice_illegal_qual - && DoesStringMatchConstraint ("anticodon", field->field->data.ptrvalue)))) + && ((field->choice == FeatQualChoice_legal_qual + && field->data.intvalue == Feat_qual_legal_anticodon) + || (field->choice == FeatQualChoice_illegal_qual + && DoesStringMatchConstraint ("anticodon", field->data.ptrvalue)))) { rval = SetAnticodon (sfp, scp, value, existing_text); } if (sfp->idx.subtype == FEATDEF_tmRNA - && ((field->field->choice == FeatQualChoice_legal_qual - && field->field->data.intvalue == Feat_qual_legal_tag_peptide) - || (field->field->choice == FeatQualChoice_illegal_qual - && DoesStringMatchConstraint ("tag-peptide", field->field->data.ptrvalue)))) + && ((field->choice == FeatQualChoice_legal_qual + && field->data.intvalue == Feat_qual_legal_tag_peptide) + || (field->choice == FeatQualChoice_illegal_qual + && DoesStringMatchConstraint ("tag-peptide", field->data.ptrvalue)))) { rval = SettmRNATagPeptide (sfp->data.value.ptrvalue, scp, value, existing_text); } if (sfp->idx.subtype == FEATDEF_ncRNA - && ((field->field->choice == FeatQualChoice_legal_qual - && field->field->data.intvalue == Feat_qual_legal_ncRNA_class) - || (field->field->choice == FeatQualChoice_illegal_qual - && DoesStringMatchConstraint ("ncRNA_class", field->field->data.ptrvalue)))) + && ((field->choice == FeatQualChoice_legal_qual + && field->data.intvalue == Feat_qual_legal_ncRNA_class) + || (field->choice == FeatQualChoice_illegal_qual + && DoesStringMatchConstraint ("ncRNA_class", field->data.ptrvalue)))) { rval = SetncRNAClass (sfp->data.value.ptrvalue, scp, value, existing_text); matched_term = TRUE; } + /* special region qualifiers */ + if (sfp->idx.subtype == FEATDEF_REGION + && field->choice == FeatQualChoice_legal_qual + && field->data.intvalue == Feat_qual_legal_name + && DoesStringMatchConstraint(sfp->data.value.ptrvalue, scp)) + { + rval = SetStringValue ((CharPtr PNTR)(&(sfp->data.value.ptrvalue)), value, existing_text); + matched_term = TRUE; + } + /* actual GenBank qualifiers */ if (!rval && !matched_term) { - rval = SetStringInGBQualList (&(sfp->qual), field->field, scp, value, existing_text); + rval = SetStringInGBQualList (&(sfp->qual), field, scp, value, existing_text); } return rval; } +static Boolean SetQualOnFeatureEx (SeqFeatPtr sfp, FeatureFieldPtr field, StringConstraintPtr scp, CharPtr value, Uint2 existing_text, BatchExtraPtr batch_extra) +{ + if (sfp == NULL || field == NULL || field->field == NULL) + { + return FALSE; + } + if (field->type != Macro_feature_type_any && sfp->idx.subtype != GetFeatdefFromFeatureType (field->type)) + { + return FALSE; + } + + return SetQualOnFeatureAnyType (sfp, field->field, scp, value, existing_text, batch_extra); +} + + NLM_EXTERN Boolean SetQualOnFeature (SeqFeatPtr sfp, FeatureFieldPtr field, StringConstraintPtr scp, CharPtr value, Uint2 existing_text) { return SetQualOnFeatureEx (sfp, field, scp, value, existing_text, NULL); } -static void AddLegalFeatureField (ValNodePtr PNTR list, Uint2 featdef, Uint2 qual) + +NLM_EXTERN CharPtr GetRNAQualFromFeature (SeqFeatPtr sfp, RnaQualPtr rq, StringConstraintPtr scp, BatchExtraPtr batch_extra) { - FeatureFieldPtr ffield; - Int4 gbqual, num_subfields, i, legal_qual; + ValNode vn; - if (list == NULL) return; + if (sfp == NULL || rq == NULL || !DoesFeatureMatchRnaType(sfp, rq->type)) + { + return NULL; + } - ffield = FeatureFieldNew (); - ffield->type = GetFeatureTypeFromFeatdef (featdef); - ValNodeAddInt (&(ffield->field), FeatQualChoice_legal_qual, qual); - ValNodeAddPointer (list, FieldType_feature_field, ffield); + MemSet (&vn, 0, sizeof (ValNode)); + vn.choice = FeatQualChoice_legal_qual; + vn.data.intvalue = GetFeatQualForRnaField (rq->field); - /* also add subfields */ - gbqual = GetGBQualFromFeatQual (qual, NULL); - num_subfields = NumGbQualSubfields (gbqual); - for (i = 1; i <= num_subfields; i++) { - legal_qual = GetFeatQualByGBQualAndSubfield (gbqual, i); - if (legal_qual > -1) { - ffield = FeatureFieldNew (); - ffield->type = GetFeatureTypeFromFeatdef (featdef); - ValNodeAddInt (&(ffield->field), FeatQualChoice_legal_qual, legal_qual); - ValNodeAddPointer (list, FieldType_feature_field, ffield); - } + return GetQualFromFeatureAnyType (sfp, &vn, scp, batch_extra); +} + + +NLM_EXTERN Boolean RemoveRNAQualFromFeature (SeqFeatPtr sfp, RnaQualPtr rq, StringConstraintPtr scp) +{ + ValNode vn; + + if (sfp == NULL || rq == NULL || !DoesFeatureMatchRnaType(sfp, rq->type)) + { + return FALSE; } + MemSet (&vn, 0, sizeof (ValNode)); + vn.choice = FeatQualChoice_legal_qual; + vn.data.intvalue = GetFeatQualForRnaField (rq->field); + + return RemoveQualFromFeatureAnyType (sfp, &vn, scp); } -static ValNodePtr GetFieldListFromFeature (SeqFeatPtr sfp) +NLM_EXTERN Boolean SetRNAQualOnFeature (SeqFeatPtr sfp, RnaQualPtr rq, StringConstraintPtr scp, CharPtr value, Uint2 existing_text) { - GeneRefPtr grp = NULL; - SeqFeatPtr gene = NULL; - ProtRefPtr prp = NULL; - ValNodePtr list = NULL; - GBQualPtr qual; - Int4 qual_num; + ValNode vn; - if (sfp == NULL) + if (sfp == NULL || rq == NULL || !DoesFeatureMatchRnaType(sfp, rq->type)) { - return NULL; + return FALSE; } - // for gene fields - GetGeneInfoForFeature (sfp, &grp, &gene); + MemSet (&vn, 0, sizeof (ValNode)); + vn.choice = FeatQualChoice_legal_qual; + vn.data.intvalue = GetFeatQualForRnaField (rq->field); - /* add gene-specific fields */ - if (grp != NULL) { - if (!StringHasNoText (grp->locus)) { - AddLegalFeatureField (&list, sfp->idx.subtype, Feat_qual_legal_gene); - } - if (!StringHasNoText (grp->allele)) { - AddLegalFeatureField (&list, sfp->idx.subtype, Feat_qual_legal_allele); - } - if (!StringHasNoText (grp->desc)) { - AddLegalFeatureField (&list, sfp->idx.subtype, Feat_qual_legal_gene_description); - } - if (!StringHasNoText (grp->maploc)) { - AddLegalFeatureField (&list, sfp->idx.subtype, Feat_qual_legal_map); - } - if (!StringHasNoText (grp->locus_tag)) { - AddLegalFeatureField (&list, sfp->idx.subtype, Feat_qual_legal_locus_tag); - } - if (grp->syn != NULL) { - AddLegalFeatureField (&list, sfp->idx.subtype, Feat_qual_legal_synonym); + return SetQualOnFeatureAnyType (sfp, &vn, scp, value, existing_text, NULL); +} + + +static int LIBCALLBACK SortVnpByStringLenShortToLong (VoidPtr ptr1, VoidPtr ptr2) +{ + ValNodePtr vnp1; + ValNodePtr vnp2; + Int4 len1, len2; + + if (ptr1 != NULL && ptr2 != NULL) { + vnp1 = *((ValNodePtr PNTR) ptr1); + vnp2 = *((ValNodePtr PNTR) ptr2); + if (vnp1 != NULL && vnp2 != NULL) { + len1 = StringLen (vnp1->data.ptrvalue); + len2 = StringLen (vnp2->data.ptrvalue); + if (len1 < len2) { + return -1; + } else if (len1 > len2) { + return 1; + } else { + return StringCmp (vnp1->data.ptrvalue, vnp2->data.ptrvalue); + } } } + return 0; +} - /* add protein-specific fields */ - prp = GetProtRefForFeature (sfp); - if (prp != NULL) { - /* product name */ - if (prp->name != NULL) { - AddLegalFeatureField (&list, sfp->idx.subtype, Feat_qual_legal_product); - } - /* protein description */ - if (!StringHasNoText (prp->desc)) { - AddLegalFeatureField (&list, sfp->idx.subtype, Feat_qual_legal_description); + +static int LIBCALLBACK SortVnpByStringLenLongToShort (VoidPtr ptr1, VoidPtr ptr2) +{ + ValNodePtr vnp1; + ValNodePtr vnp2; + Int4 len1, len2; + + if (ptr1 != NULL && ptr2 != NULL) { + vnp1 = *((ValNodePtr PNTR) ptr1); + vnp2 = *((ValNodePtr PNTR) ptr2); + if (vnp1 != NULL && vnp2 != NULL) { + len1 = StringLen (vnp1->data.ptrvalue); + len2 = StringLen (vnp2->data.ptrvalue); + if (len1 < len2) { + return 1; + } else if (len1 > len2) { + return -1; + } else { + return StringCmp (vnp1->data.ptrvalue, vnp2->data.ptrvalue); + } + } + } + return 0; +} + + +static Boolean SortProtNames (SeqFeatPtr sfp, Uint2 order) +{ + ProtRefPtr prp; + Boolean rval = FALSE; + + if (sfp == NULL || sfp->data.choice != SEQFEAT_PROT + || (prp = (ProtRefPtr) sfp->data.value.ptrvalue) == NULL + || prp->name == NULL + || prp->name->next == NULL) { + return FALSE; + } + switch (order) { + case Sort_order_short_to_long: + if (!ValNodeIsSorted(prp->name, SortVnpByStringLenShortToLong)) { + prp->name = ValNodeSort (prp->name, SortVnpByStringLenShortToLong); + rval = TRUE; + } + break; + case Sort_order_long_to_short: + if (!ValNodeIsSorted(prp->name, SortVnpByStringLenLongToShort)) { + prp->name = ValNodeSort (prp->name, SortVnpByStringLenLongToShort); + rval = TRUE; + } + break; + case Sort_order_alphabetical: + if (!ValNodeIsSorted(prp->name, SortVnpByStringCS)) { + prp->name = ValNodeSort (prp->name, SortVnpByStringCS); + rval = TRUE; + } + break; + } + return rval; +} + + +NLM_EXTERN Boolean SortQualOnFeature (SeqFeatPtr sfp, FeatureFieldPtr field, Uint2 order) +{ + SeqFeatPtr prot = NULL; + BioseqPtr protbsp; + SeqMgrFeatContext context; + Boolean rval = FALSE; + + if (sfp == NULL || field == NULL) { + return FALSE; + } + + if (field->type == Macro_feature_type_cds || field->type == Macro_feature_type_prot) { + if (field->field->choice == FeatQualChoice_legal_qual + && field->field->data.intvalue == Feat_qual_legal_product) { + if (sfp->data.choice == SEQFEAT_CDREGION) { + protbsp = BioseqFindFromSeqLoc (sfp->product); + prot = SeqMgrGetNextFeature (protbsp, NULL, 0, FEATDEF_PROT, &context); + } else if (sfp->idx.subtype == FEATDEF_PROT) { + prot = sfp; + } + rval = SortProtNames (prot, order); + } + } + + return rval; +} + + +static void AddLegalFeatureField (ValNodePtr PNTR list, Uint2 featdef, Uint2 qual) +{ + FeatureFieldPtr ffield; + Int4 gbqual, num_subfields, i, legal_qual; + + if (list == NULL) return; + + ffield = FeatureFieldNew (); + ffield->type = GetFeatureTypeFromFeatdef (featdef); + ValNodeAddInt (&(ffield->field), FeatQualChoice_legal_qual, qual); + ValNodeAddPointer (list, FieldType_feature_field, ffield); + + /* also add subfields */ + gbqual = GetGBQualFromFeatQual (qual, NULL); + num_subfields = NumGbQualSubfields (gbqual); + for (i = 1; i <= num_subfields; i++) { + legal_qual = GetFeatQualByGBQualAndSubfield (gbqual, i); + if (legal_qual > -1) { + ffield = FeatureFieldNew (); + ffield->type = GetFeatureTypeFromFeatdef (featdef); + ValNodeAddInt (&(ffield->field), FeatQualChoice_legal_qual, legal_qual); + ValNodeAddPointer (list, FieldType_feature_field, ffield); + } + } + +} + + +static ValNodePtr GetFieldListFromFeature (SeqFeatPtr sfp) +{ + GeneRefPtr grp = NULL; + SeqFeatPtr gene = NULL; + ProtRefPtr prp = NULL; + ValNodePtr list = NULL; + GBQualPtr qual; + Int4 qual_num; + + if (sfp == NULL) + { + return NULL; + } + + // for gene fields + GetGeneInfoForFeature (sfp, &grp, &gene); + + /* add gene-specific fields */ + if (grp != NULL) { + if (!StringHasNoText (grp->locus)) { + AddLegalFeatureField (&list, sfp->idx.subtype, Feat_qual_legal_gene); + } + if (!StringHasNoText (grp->allele)) { + AddLegalFeatureField (&list, sfp->idx.subtype, Feat_qual_legal_allele); + } + if (!StringHasNoText (grp->desc)) { + AddLegalFeatureField (&list, sfp->idx.subtype, Feat_qual_legal_gene_description); + } + if (!StringHasNoText (grp->maploc)) { + AddLegalFeatureField (&list, sfp->idx.subtype, Feat_qual_legal_map); + } + if (!StringHasNoText (grp->locus_tag)) { + AddLegalFeatureField (&list, sfp->idx.subtype, Feat_qual_legal_locus_tag); + } + if (grp->syn != NULL) { + AddLegalFeatureField (&list, sfp->idx.subtype, Feat_qual_legal_synonym); + } + } + + /* add protein-specific fields */ + prp = GetProtRefForFeature (sfp); + if (prp != NULL) { + /* product name */ + if (prp->name != NULL) { + AddLegalFeatureField (&list, sfp->idx.subtype, Feat_qual_legal_product); + } + /* protein description */ + if (!StringHasNoText (prp->desc)) { + AddLegalFeatureField (&list, sfp->idx.subtype, Feat_qual_legal_description); } /* ec_number */ if (prp->ec != NULL) { @@ -8749,6 +9823,11 @@ static ValNodePtr GetFieldListFromFeature (SeqFeatPtr sfp) AddLegalFeatureField (&list, sfp->idx.subtype, Feat_qual_legal_codon_start); } + /* regions */ + if (sfp->idx.subtype == FEATDEF_REGION) { + AddLegalFeatureField (&list, sfp->idx.subtype, Feat_qual_legal_name); + } + /* actual GenBank qualifiers */ for (qual = sfp->qual; qual != NULL; qual = qual->next) { @@ -8761,711 +9840,951 @@ static ValNodePtr GetFieldListFromFeature (SeqFeatPtr sfp) } -NLM_EXTERN CharPtr GetSourceQualFromBioSource (BioSourcePtr biop, SourceQualChoicePtr scp, StringConstraintPtr constraint) +/* Functions for handling new PCR primer sets: + * GetPrimerValueFromBioSource + * GetMultiplePrimerValuesFromBioSource + * RemovePrimerValueFromBioSource + * SetPrimerValueInBioSource +*/ + +static CharPtr GetPrimerValueFromBioSource (BioSourcePtr biop, Int4 field, StringConstraintPtr constraint) { + PCRReactionSetPtr ps; + PCRPrimerPtr pp; CharPtr str = NULL; - SubSourcePtr ssp; - OrgModPtr mod; - Int4 orgmod_subtype = -1, subsrc_subtype = -1; - Int4 subfield; - ValNode vn; - Char buf[15]; - if (biop == NULL || scp == NULL) return NULL; + if (biop == NULL) { + return NULL; + } - switch (scp->choice) - { - case SourceQualChoice_textqual: - if (scp->data.intvalue == Source_qual_taxname) { - if (biop->org != NULL && !StringHasNoText (biop->org->taxname) - && DoesStringMatchConstraint (biop->org->taxname, constraint)) { - str = StringSave (biop->org->taxname); + ps = biop->pcr_primers; + while (ps != NULL && str == NULL) { + switch (field) { + case Source_qual_fwd_primer_name: + pp = ps->forward; + while (pp != NULL && str == NULL) { + if (!StringHasNoText (pp->name) && DoesStringMatchConstraint (pp->name, constraint)) { + str = StringSave (pp->name); + } + pp = pp->next; } - } else if (scp->data.intvalue == Source_qual_common_name) { - if (biop->org != NULL && !StringHasNoText (biop->org->common) - && DoesStringMatchConstraint (biop->org->common, constraint)) { - str = StringSave (biop->org->common); + break; + case Source_qual_fwd_primer_seq: + pp = ps->forward; + while (pp != NULL && str == NULL) { + if (!StringHasNoText (pp->seq) && DoesStringMatchConstraint (pp->seq, constraint)) { + str = StringSave (pp->seq); + } + pp = pp->next; } - } else if (scp->data.intvalue == Source_qual_lineage) { - if (biop->org != NULL && biop->org->orgname != NULL && !StringHasNoText (biop->org->orgname->lineage) - && DoesStringMatchConstraint (biop->org->orgname->lineage, constraint)) { - str = StringSave (biop->org->orgname->lineage); + break; + case Source_qual_rev_primer_name: + pp = ps->reverse; + while (pp != NULL && str == NULL) { + if (!StringHasNoText (pp->name) && DoesStringMatchConstraint (pp->name, constraint)) { + str = StringSave (pp->name); + } + pp = pp->next; } - } else if (scp->data.intvalue == Source_qual_division) { - if (biop->org != NULL && biop->org->orgname != NULL && !StringHasNoText (biop->org->orgname->div) - && DoesStringMatchConstraint (biop->org->orgname->div, constraint)) { - str = StringSave (biop->org->orgname->div); + break; + case Source_qual_rev_primer_seq: + pp = ps->reverse; + while (pp != NULL && str == NULL) { + if (!StringHasNoText (pp->seq) && DoesStringMatchConstraint (pp->seq, constraint)) { + str = StringSave (pp->seq); + } + pp = pp->next; } - } else if (scp->data.intvalue == Source_qual_dbxref) { - if (biop->org != NULL) { - str = GetDbxrefString (biop->org->db, constraint); + break; + } + ps = ps->next; + } + return str; +} + + +static ValNodePtr GetMultiplePrimerValuesFromBioSource (BioSourcePtr biop, Int4 field, StringConstraintPtr constraint) +{ + PCRReactionSetPtr ps; + PCRPrimerPtr pp; + ValNodePtr list = NULL; + + if (biop == NULL) { + return NULL; + } + + ps = biop->pcr_primers; + while (ps != NULL) { + switch (field) { + case Source_qual_fwd_primer_name: + pp = ps->forward; + while (pp != NULL) { + if (!StringHasNoText (pp->name) && DoesStringMatchConstraint (pp->name, constraint)) { + ValNodeAddPointer (&list, 0, StringSave (pp->name)); + } + pp = pp->next; } - } else if (scp->data.intvalue == Source_qual_all_notes) { - vn.choice = SourceQualChoice_textqual; - vn.data.intvalue = Source_qual_subsource_note; - vn.next = NULL; - str = GetSourceQualFromBioSource (biop, &vn, constraint); - if (str == NULL) { - vn.data.intvalue = Source_qual_orgmod_note; - str = GetSourceQualFromBioSource (biop, &vn, constraint); + break; + case Source_qual_fwd_primer_seq: + pp = ps->forward; + while (pp != NULL) { + if (!StringHasNoText (pp->seq) && DoesStringMatchConstraint (pp->seq, constraint)) { + ValNodeAddPointer (&list, 0, StringSave (pp->seq)); + } + pp = pp->next; } - } else if (scp->data.intvalue == Source_qual_all_quals) { - /* will not do */ - } else { - orgmod_subtype = GetOrgModQualFromSrcQual (scp->data.intvalue, &subfield); - if (orgmod_subtype == -1) { - subsrc_subtype = GetSubSrcQualFromSrcQual (scp->data.intvalue, &subfield); - for (ssp = biop->subtype; ssp != NULL && str == NULL; ssp = ssp->next) { - if (ssp->subtype == subsrc_subtype) { - if (StringHasNoText (ssp->name)) { - if (IsNonTextSourceQual (scp->data.intvalue) - && DoesStringMatchConstraint ("TRUE", constraint)) { - str = StringSave ("TRUE"); - } - } else { - if (subfield == 0) { - if (DoesStringMatchConstraint (ssp->name, constraint)) { - str = StringSave (ssp->name); - } - } else { - str = GetThreeFieldSubfield (ssp->name, subfield); - if (StringHasNoText (str) || !DoesStringMatchConstraint (str, constraint)) { - str = MemFree (str); - } - } - } - } + break; + case Source_qual_rev_primer_name: + pp = ps->reverse; + while (pp != NULL) { + if (!StringHasNoText (pp->name) && DoesStringMatchConstraint (pp->name, constraint)) { + ValNodeAddPointer (&list, 0, StringSave (pp->name)); } - } else { - if (biop->org != NULL && biop->org->orgname != NULL) { - for (mod = biop->org->orgname->mod; mod != NULL && str == NULL; mod = mod->next) { - if (mod->subtype == orgmod_subtype) { - if (StringHasNoText (mod->subname)) { - if (IsNonTextSourceQual (scp->data.intvalue) - && DoesStringMatchConstraint ("TRUE", constraint)) { - str = StringSave ("TRUE"); - } - } else { - if (subfield == 0) { - if (DoesStringMatchConstraint (mod->subname, constraint)) { - str = StringSave (mod->subname); - } - } else { - str = GetThreeFieldSubfield (mod->subname, subfield); - if (StringHasNoText (str) || !DoesStringMatchConstraint (str, constraint)) { - str = MemFree (str); - } - } - } - } - } + pp = pp->next; + } + break; + case Source_qual_rev_primer_seq: + pp = ps->reverse; + while (pp != NULL) { + if (!StringHasNoText (pp->seq) && DoesStringMatchConstraint (pp->seq, constraint)) { + ValNodeAddPointer (&list, 0, StringSave (pp->seq)); } + pp = pp->next; } - } - break; - case SourceQualChoice_location: - str = LocNameFromGenome (biop->genome); - if (DoesStringMatchConstraint (str, constraint)) { - str = StringSave (str); - } else { - str = NULL; - } - break; - case SourceQualChoice_origin: - str = OriginNameFromOrigin (biop->origin); - if (DoesStringMatchConstraint (str, constraint)) { - str = StringSave (str); - } else { - str = NULL; - } - break; - case SourceQualChoice_gcode: - if (biop->org != NULL && biop->org->orgname != NULL && biop->org->orgname->gcode != 0) { - sprintf (buf, "%d", biop->org->orgname->gcode); - str = StringSave (buf); - } - break; - case SourceQualChoice_mgcode: - if (biop->org != NULL && biop->org->orgname != NULL && biop->org->orgname->mgcode != 0) { - sprintf (buf, "%d", biop->org->orgname->mgcode); - str = StringSave (buf); - } - break; + break; + } + ps = ps->next; } - return str; + return list; } -NLM_EXTERN ValNodePtr GetMultipleSourceQualsFromBioSource (BioSourcePtr biop, SourceQualChoicePtr scp, StringConstraintPtr constraint) +static Boolean PCRPrimerIsEmpty (PCRPrimerPtr primer) { - ValNodePtr val_list = NULL; - CharPtr str = NULL; - SubSourcePtr ssp; - OrgModPtr mod; - Int4 orgmod_subtype = -1, subsrc_subtype = -1; - Int4 subfield; - ValNode vn; + if (primer == NULL) { + return TRUE; + } else if (StringHasNoText (primer->name) && StringHasNoText (primer->seq)) { + return TRUE; + } else { + return FALSE; + } +} - if (biop == NULL || scp == NULL) return NULL; - if (scp->choice == SourceQualChoice_textqual) { - if (scp->data.intvalue == Source_qual_taxname) { - if (biop->org != NULL && !StringHasNoText (biop->org->taxname) - && DoesStringMatchConstraint (biop->org->taxname, constraint)) { - ValNodeAddPointer (&val_list, 0, StringSave (biop->org->taxname)); - } - } else if (scp->data.intvalue == Source_qual_common_name) { - if (biop->org != NULL && !StringHasNoText (biop->org->common) - && DoesStringMatchConstraint (biop->org->common, constraint)) { - ValNodeAddPointer (&val_list, 0, StringSave (biop->org->common)); - } - } else if (scp->data.intvalue == Source_qual_lineage) { - if (biop->org != NULL && biop->org->orgname != NULL && !StringHasNoText (biop->org->orgname->lineage) - && DoesStringMatchConstraint (biop->org->orgname->lineage, constraint)) { - ValNodeAddPointer (&val_list, 0, StringSave (biop->org->orgname->lineage)); - } - } else if (scp->data.intvalue == Source_qual_division) { - if (biop->org != NULL && biop->org->orgname != NULL && !StringHasNoText (biop->org->orgname->div) - && DoesStringMatchConstraint (biop->org->orgname->div, constraint)) { - ValNodeAddPointer (&val_list, 0, StringSave (biop->org->orgname->div)); - } - } else if (scp->data.intvalue == Source_qual_dbxref) { - if (biop->org != NULL) { - ValNodeLink (&val_list, GetMultipleDbxrefStrings (biop->org->db, constraint)); - } - } else if (scp->data.intvalue == Source_qual_all_notes) { - vn.choice = SourceQualChoice_textqual; - vn.data.intvalue = Source_qual_subsource_note; - vn.next = NULL; - str = GetSourceQualFromBioSource (biop, &vn, constraint); - if (str != NULL) { - ValNodeAddPointer (&val_list, 0, str); - } - vn.data.intvalue = Source_qual_orgmod_note; - str = GetSourceQualFromBioSource (biop, &vn, constraint); - if (str != NULL) { - ValNodeAddPointer (&val_list, 0, str); - } - } else if (scp->data.intvalue == Source_qual_all_quals) { - /* will not do */ - } else { - orgmod_subtype = GetOrgModQualFromSrcQual (scp->data.intvalue, &subfield); - if (orgmod_subtype == -1) { - subsrc_subtype = GetSubSrcQualFromSrcQual (scp->data.intvalue, &subfield); - for (ssp = biop->subtype; ssp != NULL; ssp = ssp->next) { - if (ssp->subtype == subsrc_subtype) { - if (StringHasNoText (ssp->name)) { - if (IsNonTextSourceQual (scp->data.intvalue) - && DoesStringMatchConstraint ("TRUE", constraint)) { - ValNodeAddPointer (&val_list, 0, StringSave ("TRUE")); - } - } else { - if (subfield == 0) { - if (DoesStringMatchConstraint (ssp->name, constraint)) { - ValNodeAddPointer (&val_list, 0, StringSave (ssp->name)); - } - } else { - str = GetThreeFieldSubfield (ssp->name, subfield); - if (StringHasNoText (str) || !DoesStringMatchConstraint (str, constraint)) { - str = MemFree (str); - } else { - ValNodeAddPointer (&val_list, 0, str); - } - } - } - } - } +static Boolean PCRPrimerListIsEmpty (PCRPrimerPtr primer) +{ + Boolean rval = TRUE; + + while (primer != NULL && rval) { + rval = PCRPrimerIsEmpty(primer); + primer = primer->next; + } + return rval; +} + + +NLM_EXTERN Boolean PCRReactionIsEmpty (PCRReactionPtr pr) +{ + if (pr == NULL) { + return TRUE; + } else if (PCRPrimerListIsEmpty(pr->forward) && PCRPrimerListIsEmpty(pr->reverse)) { + return TRUE; + } else { + return FALSE; + } +} + + +static Boolean RemoveNameFromPrimerList (PCRPrimerPtr PNTR pp_list, StringConstraintPtr constraint) +{ + PCRPrimerPtr pp, prev_pp = NULL, next_pp; + Boolean rval = FALSE; + + if (pp_list == NULL || (pp = (PCRPrimerPtr) *pp_list) == NULL) { + return FALSE; + } + while (pp != NULL) { + if (!StringHasNoText (pp->name) && DoesStringMatchConstraint (pp->name, constraint)) { + pp->name = MemFree (pp->name); + rval = TRUE; + } + next_pp = pp->next; + if (PCRPrimerIsEmpty(pp)) { + pp->next = NULL; + pp = PCRPrimerFree (pp); + if (prev_pp == NULL) { + *pp_list = next_pp; } else { - if (biop->org != NULL && biop->org->orgname != NULL) { - for (mod = biop->org->orgname->mod; mod != NULL && str == NULL; mod = mod->next) { - if (mod->subtype == orgmod_subtype) { - if (StringHasNoText (mod->subname)) { - if (IsNonTextSourceQual (scp->data.intvalue) - && DoesStringMatchConstraint ("TRUE", constraint)) { - ValNodeAddPointer (&val_list, 0, StringSave ("TRUE")); - } - } else { - if (subfield == 0) { - if (DoesStringMatchConstraint (mod->subname, constraint)) { - ValNodeAddPointer (&val_list, 0, StringSave (mod->subname)); - } - } else { - str = GetThreeFieldSubfield (mod->subname, subfield); - if (StringHasNoText (str) || !DoesStringMatchConstraint (str, constraint)) { - str = MemFree (str); - } else { - ValNodeAddPointer (&val_list, 0, str); - } - } - } - } - } - } + prev_pp->next = next_pp; } + } else { + prev_pp = pp; } - } else { - str = GetSourceQualFromBioSource (biop, scp, constraint); - if (str != NULL) { - ValNodeAddPointer (&val_list, 0, str); - } + pp = next_pp; } - return val_list; + return rval; } -static Boolean RemoveAllSourceQualsFromBioSource (BioSourcePtr biop, StringConstraintPtr constraint) +static Boolean RemoveSeqFromPrimerList (PCRPrimerPtr PNTR pp_list, StringConstraintPtr constraint) { - Int4 i; - Boolean rval = FALSE; - ValNode vn; - - vn.next = NULL; - vn.choice = SourceQualChoice_textqual; + PCRPrimerPtr pp, prev_pp = NULL, next_pp; + Boolean rval = FALSE; - for (i = 0; i < NUM_srcqual_scqual; i++) { - if (srcqual_scqual[i].srcqual != Source_qual_all_quals - && srcqual_scqual[i].srcqual != Source_qual_all_notes) { - vn.data.intvalue = srcqual_scqual[i].srcqual; - rval |= RemoveSourceQualFromBioSource (biop, &vn, constraint); + if (pp_list == NULL || (pp = (PCRPrimerPtr) *pp_list) == NULL) { + return FALSE; + } + while (pp != NULL) { + if (!StringHasNoText (pp->seq) && DoesStringMatchConstraint (pp->seq, constraint)) { + pp->seq = MemFree (pp->seq); + rval = TRUE; + } + next_pp = pp->next; + if (PCRPrimerIsEmpty(pp)) { + pp->next = NULL; + pp = PCRPrimerFree (pp); + if (prev_pp == NULL) { + *pp_list = next_pp; + } else { + prev_pp->next = next_pp; + } + } else { + prev_pp = pp; } + pp = next_pp; } return rval; } -static void Lcl_RemoveOldName (OrgRefPtr orp) + +static Boolean RemovePrimerValueFromBioSource (BioSourcePtr biop, Int4 field, StringConstraintPtr constraint) { - OrgModPtr prev = NULL, curr, next_mod; - - if (orp == NULL || orp->orgname == NULL) return; - - curr = orp->orgname->mod; - while (curr != NULL) - { - next_mod = curr->next; - if (curr->subtype == ORGMOD_old_name) - { - if (prev == NULL) - { - orp->orgname->mod = curr->next; - } - else - { - prev->next = curr->next; - } - curr->next = NULL; - OrgModFree (curr); + PCRReactionSetPtr ps, prev_ps = NULL, next_ps; + Boolean rval = FALSE; + + if (biop == NULL) { + return FALSE; + } + + ps = biop->pcr_primers; + while (ps != NULL) { + switch (field) { + case Source_qual_fwd_primer_name: + rval |= RemoveNameFromPrimerList (&(ps->forward), constraint); + break; + case Source_qual_fwd_primer_seq: + rval |= RemoveSeqFromPrimerList (&(ps->forward), constraint); + break; + case Source_qual_rev_primer_name: + rval |= RemoveNameFromPrimerList (&(ps->reverse), constraint); + break; + case Source_qual_rev_primer_seq: + rval |= RemoveSeqFromPrimerList (&(ps->reverse), constraint); + break; } - else - { - prev = curr; + next_ps = ps->next; + if (PCRReactionIsEmpty(ps)) { + ps->next = NULL; + ps = PCRReactionFree (ps); + if (prev_ps == NULL) { + biop->pcr_primers = next_ps; + } else { + prev_ps->next = next_ps; + } + } else { + prev_ps = ps; } - - curr = next_mod; + ps = next_ps; } + return rval; } -NLM_EXTERN Boolean RemoveSourceQualFromBioSource (BioSourcePtr biop, SourceQualChoicePtr scp, StringConstraintPtr constraint) + +static Boolean IsCompoundPrimerValue (CharPtr value) { - SubSourcePtr ssp, ssp_prev = NULL, ssp_next; - OrgModPtr mod, mod_prev = NULL, mod_next; - Int4 orgmod_subtype = -1, subsrc_subtype = -1, subfield; - CharPtr str, tmp; - Boolean rval = FALSE, do_remove, does_match; - ValNode vn; + Int4 len; - if (biop == NULL || scp == NULL) return FALSE; + if (StringHasNoText (value)) { + return FALSE; + } else if (StringChr (value, ':') != NULL + || StringChr (value, ',') != NULL) { + return TRUE; + } + len = StringLen (value); + if (*value == '(' && value[len - 1] == ')') { + return TRUE; + } else { + return FALSE; + } +} - switch (scp->choice) - { - case SourceQualChoice_textqual: - if (scp->data.intvalue == Source_qual_taxname) { - if (biop->org != NULL && !StringHasNoText (biop->org->taxname) - && DoesStringMatchConstraint (biop->org->taxname, constraint)) { - biop->org->taxname = MemFree (biop->org->taxname); - RemoveTaxRef (biop->org); - Lcl_RemoveOldName (biop->org); - rval = TRUE; - } - } else if (scp->data.intvalue == Source_qual_common_name) { - if (biop->org != NULL && !StringHasNoText (biop->org->common) - && DoesStringMatchConstraint (biop->org->common, constraint)) { - biop->org->common = MemFree (biop->org->common); - rval = TRUE; - } - } else if (scp->data.intvalue == Source_qual_lineage) { - if (biop->org != NULL && biop->org->orgname != NULL && !StringHasNoText (biop->org->orgname->lineage) - && DoesStringMatchConstraint (biop->org->orgname->lineage, constraint)) { - biop->org->orgname->lineage = MemFree (biop->org->orgname->lineage); - rval = TRUE; - } - } else if (scp->data.intvalue == Source_qual_division) { - if (biop->org != NULL && biop->org->orgname != NULL && !StringHasNoText (biop->org->orgname->div) - && DoesStringMatchConstraint (biop->org->orgname->div, constraint)) { - biop->org->orgname->div = MemFree (biop->org->orgname->div); - rval = TRUE; - } - } else if (scp->data.intvalue == Source_qual_dbxref) { - if (biop->org != NULL) { - rval = RemoveDbxrefString (&(biop->org->db), constraint); - } - } else if (scp->data.intvalue == Source_qual_all_notes) { - vn.choice = SourceQualChoice_textqual; - vn.data.intvalue = Source_qual_subsource_note; - vn.next = NULL; - rval |= RemoveSourceQualFromBioSource (biop, &vn, constraint); - vn.data.intvalue = Source_qual_orgmod_note; - rval |= RemoveSourceQualFromBioSource (biop, &vn, constraint); - } else if (scp->data.intvalue == Source_qual_all_quals) { - rval |= RemoveAllSourceQualsFromBioSource (biop, constraint); - } else { - orgmod_subtype = GetOrgModQualFromSrcQual (scp->data.intvalue, &subfield); - if (orgmod_subtype == -1) { - subsrc_subtype = GetSubSrcQualFromSrcQual (scp->data.intvalue, &subfield); - ssp = biop->subtype; - while (ssp != NULL) { - ssp_next = ssp->next; - do_remove = FALSE; - if (ssp->subtype == subsrc_subtype) { - if (subfield == 0) { - if (DoesStringMatchConstraint (ssp->name, constraint)) { - do_remove = TRUE; - } - } else { - does_match = TRUE; - if (!IsStringConstraintEmpty (constraint)) { - tmp = GetThreeFieldSubfield (ssp->name, subfield); - does_match = DoesStringMatchConstraint (tmp, constraint); - tmp = MemFree (tmp); - } - if (does_match) { - rval |= RemoveThreeFieldSubfield (&(ssp->name), subfield); - if (StringHasNoText (ssp->name)) { - do_remove = TRUE; - } - } - } - } - if (do_remove) { - if (ssp_prev == NULL) { - biop->subtype = ssp->next; - } else { - ssp_prev->next = ssp->next; - } - ssp->next = NULL; - ssp = SubSourceFree (ssp); - rval = TRUE; - } else { - ssp_prev = ssp; - } - ssp = ssp_next; - } - } else { - if (biop->org != NULL && biop->org->orgname != NULL) { - mod = biop->org->orgname->mod; - while (mod != NULL) { - mod_next = mod->next; - do_remove = FALSE; - if (mod->subtype == orgmod_subtype) { - if (subfield == 0) { - if (DoesStringMatchConstraint (mod->subname, constraint)) { - do_remove = TRUE; - } - } else { - does_match = TRUE; - if (!IsStringConstraintEmpty (constraint)) { - tmp = GetThreeFieldSubfield (mod->subname, subfield); - does_match = DoesStringMatchConstraint (tmp, constraint); - tmp = MemFree (tmp); - } - if (does_match) { - rval |= RemoveThreeFieldSubfield (&(mod->subname), subfield); - } - if (StringHasNoText (mod->subname)) { - do_remove = TRUE; - } - } - } - if (do_remove) { - if (mod_prev == NULL) { - biop->org->orgname->mod = mod->next; - } else { - mod_prev->next = mod->next; - } - mod->next = NULL; - mod = OrgModFree (mod); - rval = TRUE; - } else { - mod_prev = mod; - } - mod = mod_next; - } - } - } + +static Boolean HasMultiplePrimerSets (CharPtr value) +{ + if (StringChr (value, ',')) { + return TRUE; + } else { + return FALSE; + } +} + + +static ValNodePtr GetPrimerSetComponents (CharPtr value) +{ + CharPtr cp, last_cp, tmp, src, dst; + ValNodePtr list = NULL; + + last_cp = value; + for (cp = StringChr (value, ','); cp != NULL; cp = StringChr (last_cp, ',')) { + tmp = (CharPtr) MemNew (sizeof (Char) * (cp - last_cp + 1)); + src = last_cp; + dst = tmp; + while (src < cp) { + if (*src != '(' && *src != ')') { + *dst = *src; + dst++; } - break; - case SourceQualChoice_location: - str = LocNameFromGenome (biop->genome); - if (DoesStringMatchConstraint (str, constraint)) { - if (scp->data.intvalue == 0 || biop->genome == GenomeFromSrcLoc (scp->data.intvalue)) { - biop->genome = 0; - rval = TRUE; - } + src++; + } + *dst = 0; + ValNodeAddPointer (&list, 0, tmp); + last_cp = cp + 1; + } + if (*last_cp != 0) { + tmp = (CharPtr) MemNew (sizeof (Char) * (StringLen (last_cp) + 1)); + src = last_cp; + dst = tmp; + while (*src != 0) { + if (*src != '(' && *src != ')') { + *dst = *src; + dst++; } - break; - case SourceQualChoice_origin: - str = OriginNameFromOrigin (biop->origin); - if (DoesStringMatchConstraint (str, constraint)) { - if (scp->data.intvalue == 0 || biop->origin == OriginFromSrcOrig (scp->data.intvalue)) { - biop->origin = 0; - rval = TRUE; - } + src++; + } + *dst = 0; + ValNodeAddPointer (&list, 0, tmp); + } + return list; +} + + +static ValNodePtr GetPrimerElements (CharPtr value) +{ + CharPtr cp, last_cp, tmp; + ValNodePtr list = NULL; + Int4 len; + + last_cp = value; + for (cp = StringChr (value, ':'); cp != NULL; cp = StringChr (last_cp, ':')) { + len = cp - last_cp + 1; + tmp = (CharPtr) MemNew (sizeof (Char) * len); + StringNCpy (tmp, last_cp, len - 1); + tmp[len - 1] = 0; + ValNodeAddPointer (&list, 0, tmp); + last_cp = cp + 1; + } + if (*last_cp != 0) { + ValNodeAddPointer (&list, 0, StringSave (last_cp)); + } + return list; +} + + +static Boolean OverwriteNameStringIntoPrimerList (CharPtr value, PCRPrimerPtr PNTR p_list) +{ + ValNodePtr elements, vnp; + PCRPrimerPtr pp, prev_pp = NULL; + Boolean any_change = FALSE; + + if (p_list == NULL) { + return FALSE; + } + + elements = GetPrimerElements (value); + for (vnp = elements, pp = *p_list; vnp != NULL; vnp = vnp->next) { + if (pp == NULL) { + pp = PCRPrimerNew (); + if (prev_pp == NULL) { + *p_list = pp; + } else { + prev_pp->next = pp; } - break; - case SourceQualChoice_gcode: - if (biop->org != NULL && biop->org->orgname != NULL && biop->org->orgname->gcode != 0) { - biop->org->orgname->gcode = 0; - rval = TRUE; + any_change = TRUE; + } + if (StringCmp (pp->name, vnp->data.ptrvalue) != 0) { + pp->name = MemFree (pp->name); + pp->name = vnp->data.ptrvalue; + vnp->data.ptrvalue = NULL; + any_change = TRUE; + } + prev_pp = pp; + pp = pp->next; + } + while (pp != NULL) { + if (!StringHasNoText (pp->name)) { + any_change = TRUE; + } + pp->name = MemFree (pp->name); + pp = pp->next; + } + elements = ValNodeFreeData (elements); + return any_change; +} + + +static Boolean OverwriteSeqStringIntoPrimerList (CharPtr value, PCRPrimerPtr PNTR p_list) +{ + ValNodePtr elements, vnp; + PCRPrimerPtr pp, prev_pp = NULL; + Boolean any_change = FALSE; + + if (p_list == NULL) { + return FALSE; + } + + elements = GetPrimerElements (value); + for (vnp = elements, pp = *p_list; vnp != NULL; vnp = vnp->next) { + if (pp == NULL) { + pp = PCRPrimerNew (); + if (prev_pp == NULL) { + *p_list = pp; + } else { + prev_pp->next = pp; } - break; - case SourceQualChoice_mgcode: - if (biop->org != NULL && biop->org->orgname != NULL && biop->org->orgname->mgcode != 0) { - biop->org->orgname->mgcode = 0; - rval = TRUE; + any_change = TRUE; + } + if (StringCmp (pp->seq, vnp->data.ptrvalue) != 0) { + pp->seq = MemFree (pp->seq); + pp->seq = vnp->data.ptrvalue; + vnp->data.ptrvalue = NULL; + any_change = TRUE; + } + prev_pp = pp; + pp = pp->next; + } + while (pp != NULL) { + if (!StringHasNoText (pp->seq)) { + any_change = TRUE; + } + pp->seq = MemFree (pp->seq); + pp = pp->next; + } + elements = ValNodeFreeData (elements); + return any_change; +} + + +static Boolean OverwriteFwdNameStringIntoPCRReactionSet (CharPtr value, PCRReactionPtr PNTR p_list) +{ + ValNodePtr sets, vnp; + PCRReactionPtr ps, prev_ps = NULL; + Boolean any_change = FALSE; + + if (p_list == NULL) { + return FALSE; + } + + sets = GetPrimerSetComponents (value); + for (vnp = sets, ps = *p_list; vnp != NULL; vnp = vnp->next) { + if (ps == NULL) { + ps = PCRReactionNew (); + if (prev_ps == NULL) { + *p_list = ps; + } else { + prev_ps->next = ps; } - break; + any_change = TRUE; + } + any_change |= OverwriteNameStringIntoPrimerList (vnp->data.ptrvalue, &(ps->forward)); + prev_ps = ps; + ps = ps->next; } - return rval; + while (ps != NULL) { + any_change |= RemoveNameFromPrimerList (&(ps->forward), NULL); + ps = ps->next; + } + sets = ValNodeFreeData (sets); + return any_change; } -NLM_EXTERN Boolean SetSourceQualInBioSource (BioSourcePtr biop, SourceQualChoicePtr scp, StringConstraintPtr constraint, CharPtr value, Uint2 existing_text) +static Boolean OverwriteRevNameStringIntoPCRReactionSet (CharPtr value, PCRReactionPtr PNTR p_list) { - SubSourcePtr ssp, ssp_prev = NULL, ssp_next; - OrgModPtr mod, mod_prev = NULL, mod_next; - Int4 orgmod_subtype = -1, subsrc_subtype = -1, subfield; - CharPtr str, tmp; - Boolean rval = FALSE, found = FALSE, does_match; - ValNode vn; + ValNodePtr sets, vnp; + PCRReactionPtr ps, prev_ps = NULL; + Boolean any_change = FALSE; - if (biop == NULL || scp == NULL) return FALSE; + if (p_list == NULL) { + return FALSE; + } - switch (scp->choice) - { - case SourceQualChoice_textqual: - if (scp->data.intvalue == Source_qual_taxname) { - if ((biop->org == NULL && IsStringConstraintEmpty (constraint)) - || (biop->org != NULL - && DoesStringMatchConstraint (biop->org->taxname, constraint))) { - if (biop->org == NULL) { - biop->org = OrgRefNew(); - } - rval = SetStringValue (&(biop->org->taxname), value, existing_text); - if (rval) { - RemoveTaxRef (biop->org); - Lcl_RemoveOldName (biop->org); - } + sets = GetPrimerSetComponents (value); + for (vnp = sets, ps = *p_list; vnp != NULL; vnp = vnp->next) { + if (ps == NULL) { + ps = PCRReactionNew (); + if (prev_ps == NULL) { + *p_list = ps; + } else { + prev_ps->next = ps; + } + any_change = TRUE; + } + any_change |= OverwriteNameStringIntoPrimerList (vnp->data.ptrvalue, &(ps->reverse)); + prev_ps = ps; + ps = ps->next; + } + while (ps != NULL) { + any_change |= RemoveNameFromPrimerList (&(ps->reverse), NULL); + ps = ps->next; + } + sets = ValNodeFreeData (sets); + return any_change; +} + + +static Boolean OverwriteFwdSeqStringIntoPCRReactionSet (CharPtr value, PCRReactionPtr PNTR p_list) +{ + ValNodePtr sets, vnp; + PCRReactionPtr ps, prev_ps = NULL; + Boolean any_change = FALSE; + + if (p_list == NULL) { + return FALSE; + } + + sets = GetPrimerSetComponents (value); + for (vnp = sets, ps = *p_list; vnp != NULL; vnp = vnp->next) { + if (ps == NULL) { + ps = PCRReactionNew (); + if (prev_ps == NULL) { + *p_list = ps; + } else { + prev_ps->next = ps; + } + any_change = TRUE; + } + any_change |= OverwriteSeqStringIntoPrimerList (vnp->data.ptrvalue, &(ps->forward)); + prev_ps = ps; + ps = ps->next; + } + while (ps != NULL) { + any_change |= RemoveSeqFromPrimerList (&(ps->forward), NULL); + ps = ps->next; + } + sets = ValNodeFreeData (sets); + return any_change; +} + + +static Boolean OverwriteRevSeqStringIntoPCRReactionSet (CharPtr value, PCRReactionPtr PNTR p_list) +{ + ValNodePtr sets, vnp; + PCRReactionPtr ps, prev_ps = NULL; + Boolean any_change = FALSE; + + if (p_list == NULL) { + return FALSE; + } + + sets = GetPrimerSetComponents (value); + for (vnp = sets, ps = *p_list; vnp != NULL; vnp = vnp->next) { + if (ps == NULL) { + ps = PCRReactionNew (); + if (prev_ps == NULL) { + *p_list = ps; + } else { + prev_ps->next = ps; + } + any_change = TRUE; + } + any_change |= OverwriteSeqStringIntoPrimerList (vnp->data.ptrvalue, &(ps->reverse)); + prev_ps = ps; + ps = ps->next; + } + while (ps != NULL) { + any_change |= RemoveSeqFromPrimerList (&(ps->reverse), NULL); + ps = ps->next; + } + sets = ValNodeFreeData (sets); + return any_change; +} + + +static Boolean SetNameInPrimerList (PCRPrimerPtr PNTR pp_list, StringConstraintPtr constraint, CharPtr value, Uint2 existing_text) +{ + PCRPrimerPtr pp, prev_pp = NULL; + Boolean rval = FALSE; + + if (pp_list == NULL) { + return FALSE; + } + pp = *pp_list; + + while (pp != NULL) { + if (DoesStringMatchConstraint (pp->name, constraint)) { + rval = SetStringValue (&(pp->name), value, existing_text); + } + prev_pp = pp; + pp = pp->next; + } + return rval; +} + + +static Boolean SetSeqInPrimerList (PCRPrimerPtr PNTR pp_list, StringConstraintPtr constraint, CharPtr value, Uint2 existing_text) +{ + PCRPrimerPtr pp, prev_pp = NULL; + Boolean rval = FALSE; + + if (pp_list == NULL) { + return FALSE; + } + pp = *pp_list; + + while (pp != NULL) { + if (DoesStringMatchConstraint (pp->seq, constraint)) { + rval = SetStringValue (&(pp->seq), value, existing_text); + } + prev_pp = pp; + pp = pp->next; + } + return rval; +} + + +static Boolean SetPrimerValueInBioSource(BioSourcePtr biop, Int4 field, StringConstraintPtr constraint, CharPtr value, Uint2 existing_text) +{ + PCRReactionSetPtr ps, prev_ps = NULL; + Boolean rval = FALSE; + + ps = biop->pcr_primers; + + if (IsCompoundPrimerValue(value)) { + if (existing_text != ExistingTextOption_leave_old || biop->pcr_primers == NULL) { + switch (field) { + case Source_qual_fwd_primer_name: + rval = OverwriteFwdNameStringIntoPCRReactionSet (value, &(biop->pcr_primers)); + break; + case Source_qual_fwd_primer_seq: + rval = OverwriteFwdSeqStringIntoPCRReactionSet (value, &(biop->pcr_primers)); + break; + case Source_qual_rev_primer_name: + rval = OverwriteRevNameStringIntoPCRReactionSet (value, &(biop->pcr_primers)); + break; + case Source_qual_rev_primer_seq: + rval = OverwriteRevSeqStringIntoPCRReactionSet (value, &(biop->pcr_primers)); + break; + } + } + } else { + while (ps != NULL) { + switch (field) { + case Source_qual_fwd_primer_name: + rval |= SetNameInPrimerList (&(ps->forward), constraint, value, existing_text); + break; + case Source_qual_fwd_primer_seq: + rval |= SetSeqInPrimerList (&(ps->forward), constraint, value, existing_text); + break; + case Source_qual_rev_primer_name: + rval |= SetNameInPrimerList (&(ps->reverse), constraint, value, existing_text); + break; + case Source_qual_rev_primer_seq: + rval |= SetSeqInPrimerList (&(ps->reverse), constraint, value, existing_text); + break; + } + prev_ps = ps; + ps = ps->next; + } + + if (IsStringConstraintEmpty (constraint) && !rval && (existing_text != ExistingTextOption_leave_old || biop->pcr_primers == NULL)) { + if (prev_ps == NULL) { + ps = PCRReactionSetNew (); + biop->pcr_primers = ps; + } else if ((PCRPrimerListIsEmpty(prev_ps->forward) + && (field == Source_qual_fwd_primer_name || field == Source_qual_fwd_primer_seq)) + || (PCRPrimerListIsEmpty(prev_ps->reverse) + && (field == Source_qual_rev_primer_name || field == Source_qual_rev_primer_seq))) { + /* add to previous set */ + ps = prev_ps; + } else { + /* field is filled on previous, build a new one */ + ps = PCRReactionSetNew (); + prev_ps->next = ps; + } + switch (field) { + case Source_qual_fwd_primer_name: + ps->forward = PCRPrimerNew (); + ps->forward->name = StringSave (value); + rval = TRUE; + break; + case Source_qual_fwd_primer_seq: + ps->forward = PCRPrimerNew (); + ps->forward->seq = StringSave (value); + rval = TRUE; + break; + case Source_qual_rev_primer_name: + ps->reverse = PCRPrimerNew (); + ps->reverse->name = StringSave (value); + rval = TRUE; + break; + case Source_qual_rev_primer_seq: + ps->reverse = PCRPrimerNew (); + ps->reverse->seq = StringSave (value); + rval = TRUE; + break; + } + } + } + return rval; +} + + + +/* functions for source qualifiers */ + +NLM_EXTERN Boolean HasTaxonomyID (BioSourcePtr biop) +{ + ValNodePtr db; + DbtagPtr dbt; + Boolean rval = FALSE; + + if (biop == NULL || biop->org == NULL) { + return FALSE; + } + for (db = biop->org->db; db != NULL && !rval; db = db->next) { + dbt = (DbtagPtr) db->data.ptrvalue; + if (dbt != NULL && dbt->db != NULL && + StringICmp (dbt->db, "taxon") == 0) { + rval = TRUE; + } + } + return rval; +} + + +static CharPtr GetTaxonomyId (BioSourcePtr biop, StringConstraintPtr scp) +{ + ValNodePtr db; + DbtagPtr dbt; + CharPtr str = NULL; + Char buf[15]; + + if (biop == NULL || biop->org == NULL) { + return NULL; + } + for (db = biop->org->db; db != NULL && str == NULL; db = db->next) { + dbt = (DbtagPtr) db->data.ptrvalue; + if (dbt != NULL && dbt->db != NULL && + StringICmp (dbt->db, "taxon") == 0) { + if (dbt->tag->id > 0) { + sprintf (buf, "%d", dbt->tag->id); + if (DoesStringMatchConstraint (buf, scp)) { + str = StringSave (buf); + } + } else if (!StringHasNoText (dbt->tag->str) && DoesStringMatchConstraint (dbt->tag->str, scp)) { + str = StringSave (dbt->tag->str); + } + } + } + return str; +} + + +static ValNodePtr GetMultipleTaxidStrings (ValNodePtr list, StringConstraintPtr scp) +{ + ValNodePtr vnp, val_list = NULL; + DbtagPtr dbt; + CharPtr str = NULL; + Char buf[15]; + + for (vnp = list; vnp != NULL; vnp = vnp->next) { + dbt = (DbtagPtr) vnp->data.ptrvalue; + if (dbt != NULL && StringCmp (dbt->db, "taxon") == 0) { + if (dbt->tag->id > 0) { + sprintf (buf, "%d", dbt->tag->id); + if (DoesStringMatchConstraint (buf, scp)) { + str = StringSave (buf); + } + } else if (!StringHasNoText (dbt->tag->str) && DoesStringMatchConstraint (dbt->tag->str, scp)) { + str = StringSave (dbt->tag->str); + } + if (str != NULL) { + ValNodeAddPointer (&val_list, 0, str); + } + } + } + + return val_list; +} + + +static Boolean SetTaxonomyId (BioSourcePtr biop, StringConstraintPtr scp, CharPtr value, Uint2 existing_text) +{ + CharPtr tmp; + CharPtr fmt = "taxon:%s"; + Boolean rval; + + if (biop == NULL) { + return FALSE; + } + if (biop->org == NULL) { + biop->org = OrgRefNew(); + } + tmp = (CharPtr) MemNew (sizeof (Char) * (StringLen (value) + StringLen (fmt))); + sprintf (tmp, fmt, value == NULL ? "" : value); + rval = SetDbxrefString (&(biop->org->db), scp, tmp, existing_text); + tmp = MemFree (tmp); + return rval; +} + + +static Boolean RemoveTaxonomyId (BioSourcePtr biop, StringConstraintPtr scp) +{ + ValNodePtr db, db_prev = NULL, db_next; + DbtagPtr dbt; + Boolean rval = FALSE, do_remove; + Char buf[15]; + + if (biop == NULL || biop->org == NULL) { + return FALSE; + } + for (db = biop->org->db; db != NULL; db = db_next) { + db_next = db->next; + dbt = (DbtagPtr) db->data.ptrvalue; + do_remove = FALSE; + if (dbt != NULL && dbt->db != NULL && + StringICmp (dbt->db, "taxon") == 0) { + if (dbt->tag->id > 0) { + sprintf (buf, "%d", dbt->tag->id); + if (DoesStringMatchConstraint (buf, scp)) { + do_remove = TRUE; + } + } else if (!StringHasNoText (dbt->tag->str) && DoesStringMatchConstraint (dbt->tag->str, scp)) { + do_remove = TRUE; + } + } + if (do_remove) { + if (db_prev == NULL) { + biop->org->db = db_next; + } else { + db_prev->next = db_next; + } + db->next = NULL; + db->data.ptrvalue = DbtagFree (db->data.ptrvalue); + db = ValNodeFree (db); + rval = TRUE; + } else { + db_prev = db; + } + } + return rval; +} + + +NLM_EXTERN CharPtr GetSourceQualFromBioSource (BioSourcePtr biop, SourceQualChoicePtr scp, StringConstraintPtr constraint) +{ + CharPtr str = NULL; + SubSourcePtr ssp; + OrgModPtr mod; + Int4 orgmod_subtype = -1, subsrc_subtype = -1; + Int4 subfield; + ValNode vn; + Char buf[15]; + + if (biop == NULL || scp == NULL) return NULL; + + switch (scp->choice) + { + case SourceQualChoice_textqual: + if (scp->data.intvalue == Source_qual_taxname) { + if (biop->org != NULL && !StringHasNoText (biop->org->taxname) + && DoesStringMatchConstraint (biop->org->taxname, constraint)) { + str = StringSave (biop->org->taxname); } } else if (scp->data.intvalue == Source_qual_common_name) { - if ((biop->org == NULL && IsStringConstraintEmpty (constraint)) - || (biop->org != NULL - && DoesStringMatchConstraint (biop->org->common, constraint))) { - if (biop->org == NULL) { - biop->org = OrgRefNew(); - } - rval = SetStringValue (&(biop->org->common), value, existing_text); + if (biop->org != NULL && !StringHasNoText (biop->org->common) + && DoesStringMatchConstraint (biop->org->common, constraint)) { + str = StringSave (biop->org->common); } } else if (scp->data.intvalue == Source_qual_lineage) { - if ((biop->org == NULL && IsStringConstraintEmpty (constraint)) - ||(biop->org != NULL && biop->org->orgname == NULL && IsStringConstraintEmpty (constraint)) - ||(biop->org != NULL && biop->org->orgname != NULL - && DoesStringMatchConstraint (biop->org->orgname->lineage, constraint))) { - if (biop->org == NULL) { - biop->org = OrgRefNew(); - } - if (biop->org->orgname == NULL) { - biop->org->orgname = OrgNameNew (); - } - rval = SetStringValue (&(biop->org->orgname->lineage), value, existing_text); + if (biop->org != NULL && biop->org->orgname != NULL && !StringHasNoText (biop->org->orgname->lineage) + && DoesStringMatchConstraint (biop->org->orgname->lineage, constraint)) { + str = StringSave (biop->org->orgname->lineage); } } else if (scp->data.intvalue == Source_qual_division) { - if ((biop->org == NULL && IsStringConstraintEmpty (constraint)) - || (biop->org != NULL && biop->org->orgname == NULL && IsStringConstraintEmpty (constraint)) - || (biop->org != NULL && biop->org->orgname != NULL - && DoesStringMatchConstraint (biop->org->orgname->div, constraint))) { - if (biop->org == NULL) { - biop->org = OrgRefNew(); - } - if (biop->org->orgname == NULL) { - biop->org->orgname = OrgNameNew (); - } - rval = SetStringValue (&(biop->org->orgname->div), value, existing_text); + if (biop->org != NULL && biop->org->orgname != NULL && !StringHasNoText (biop->org->orgname->div) + && DoesStringMatchConstraint (biop->org->orgname->div, constraint)) { + str = StringSave (biop->org->orgname->div); } } else if (scp->data.intvalue == Source_qual_dbxref) { - if (biop->org == NULL) { - biop->org = OrgRefNew (); + if (biop->org != NULL) { + str = GetDbxrefString (biop->org->db, constraint); } - rval = SetDbxrefString (&(biop->org->db), constraint, value, existing_text); + } else if (scp->data.intvalue == Source_qual_taxid) { + str = GetTaxonomyId (biop, constraint); } else if (scp->data.intvalue == Source_qual_all_notes) { vn.choice = SourceQualChoice_textqual; vn.data.intvalue = Source_qual_subsource_note; vn.next = NULL; - rval |= SetSourceQualInBioSource (biop, &vn, constraint, value, existing_text); - vn.data.intvalue = Source_qual_orgmod_note; - rval |= SetSourceQualInBioSource (biop, &vn, constraint, value, existing_text); + str = GetSourceQualFromBioSource (biop, &vn, constraint); + if (str == NULL) { + vn.data.intvalue = Source_qual_orgmod_note; + str = GetSourceQualFromBioSource (biop, &vn, constraint); + } } else if (scp->data.intvalue == Source_qual_all_quals) { - /* will not do this */ + /* will not do */ + } else if (scp->data.intvalue == Source_qual_fwd_primer_name + || scp->data.intvalue == Source_qual_fwd_primer_seq + || scp->data.intvalue == Source_qual_rev_primer_name + || scp->data.intvalue == Source_qual_rev_primer_seq) { + /* fetch from new primer object */ + str = GetPrimerValueFromBioSource (biop, scp->data.intvalue, constraint); } else { orgmod_subtype = GetOrgModQualFromSrcQual (scp->data.intvalue, &subfield); if (orgmod_subtype == -1) { subsrc_subtype = GetSubSrcQualFromSrcQual (scp->data.intvalue, &subfield); - if (subsrc_subtype > -1) { - if (existing_text == ExistingTextOption_add_qual) { - /* create new subsource */ - ssp = SubSourceNew (); - ssp->subtype = subsrc_subtype; - rval = SetThreeFieldSubfield (&(ssp->name), subfield, value, existing_text); - /* find last in current list */ - ssp_prev = biop->subtype; - while (ssp_prev != NULL && ssp_prev->next != NULL) { - ssp_prev = ssp_prev->next; - } - - /* add to end of list */ - if (ssp_prev == NULL) { - biop->subtype = ssp; + for (ssp = biop->subtype; ssp != NULL && str == NULL; ssp = ssp->next) { + if (ssp->subtype == subsrc_subtype) { + if (StringHasNoText (ssp->name)) { + if (IsNonTextSourceQual (scp->data.intvalue) + && DoesStringMatchConstraint ("TRUE", constraint)) { + str = StringSave ("TRUE"); + } } else { - ssp_prev->next = ssp; - } - } else { - ssp = biop->subtype; - while (ssp != NULL) { - ssp_next = ssp->next; - if (ssp->subtype == subsrc_subtype) { - if (subfield == 0) { - if (DoesStringMatchConstraint (ssp->name, constraint)) { - rval = SetStringValue (&(ssp->name), value, existing_text); - found = TRUE; - } - } else { - does_match = TRUE; - if (!IsStringConstraintEmpty (constraint)) { - tmp = GetThreeFieldSubfield (ssp->name, subfield); - does_match = DoesStringMatchConstraint (tmp, constraint); - } - if (does_match) { - rval = SetThreeFieldSubfield (&(ssp->name), subfield, value, existing_text); - found = TRUE; - } - } - if (rval && StringHasNoText (ssp->name) && !IsNonTextSourceQual(scp->data.intvalue)) { - if (ssp_prev == NULL) { - biop->subtype = ssp->next; - } else { - ssp_prev->next = ssp->next; - } - ssp->next = NULL; - ssp = SubSourceFree (ssp); - } else { - ssp_prev = ssp; + if (subfield == 0) { + if (DoesStringMatchConstraint (ssp->name, constraint)) { + str = StringSave (ssp->name); } } else { - ssp_prev = ssp; - } - ssp = ssp_next; - } - if (!found && IsStringConstraintEmpty (constraint)) { - ssp = SubSourceNew (); - ssp->subtype = subsrc_subtype; - if (StringHasNoText (value) && IsNonTextSourceQual(scp->data.intvalue)) { - ssp->name = StringSave (""); - } else { - rval = SetThreeFieldSubfield (&(ssp->name), subfield, value, existing_text); - } - if (ssp_prev == NULL) { - biop->subtype = ssp; - } else { - ssp_prev->next = ssp; + str = GetThreeFieldSubfield (ssp->name, subfield); + if (StringHasNoText (str) || !DoesStringMatchConstraint (str, constraint)) { + str = MemFree (str); + } } } } } } else { - if (existing_text == ExistingTextOption_add_qual) { - if (biop->org == NULL) { - biop->org = OrgRefNew(); - } - if (biop->org->orgname == NULL) { - biop->org->orgname = OrgNameNew(); - } - /* create new orgmod */ - mod = OrgModNew (); - mod->subtype = orgmod_subtype; - rval = SetThreeFieldSubfield (&(mod->subname), subfield, value, existing_text); - /* find last in current list */ - mod_prev = biop->org->orgname->mod; - while (mod_prev != NULL && mod_prev->next != NULL) { - mod_prev = mod_prev->next; - } - /* add to end of list */ - if (mod_prev == NULL) { - biop->org->orgname->mod = mod; - } else { - mod_prev->next = mod; - } - } else { - if (biop->org != NULL && biop->org->orgname != NULL) { - mod = biop->org->orgname->mod; - while (mod != NULL) { - mod_next = mod->next; - if (mod->subtype == orgmod_subtype) { + if (biop->org != NULL && biop->org->orgname != NULL) { + for (mod = biop->org->orgname->mod; mod != NULL && str == NULL; mod = mod->next) { + if (mod->subtype == orgmod_subtype) { + if (StringHasNoText (mod->subname)) { + if (IsNonTextSourceQual (scp->data.intvalue) + && DoesStringMatchConstraint ("TRUE", constraint)) { + str = StringSave ("TRUE"); + } + } else { if (subfield == 0) { if (DoesStringMatchConstraint (mod->subname, constraint)) { - rval = SetStringValue (&(mod->subname), value, existing_text); - found = TRUE; + str = StringSave (mod->subname); } } else { - does_match = TRUE; - if (!IsStringConstraintEmpty (constraint)) { - tmp = GetThreeFieldSubfield (mod->subname, subfield); - does_match = DoesStringMatchConstraint (tmp, constraint); - tmp = MemFree (tmp); - } - if (does_match) { - rval = SetThreeFieldSubfield (&(mod->subname), subfield, value, existing_text); - found = TRUE; - } - } - if (rval && StringHasNoText (mod->subname) && !IsNonTextSourceQual(scp->data.intvalue)) { - if (mod_prev == NULL) { - biop->org->orgname->mod = mod->next; - } else { - mod_prev->next = mod->next; + str = GetThreeFieldSubfield (mod->subname, subfield); + if (StringHasNoText (str) || !DoesStringMatchConstraint (str, constraint)) { + str = MemFree (str); } - mod->next = NULL; - mod = OrgModFree (mod); - } else { - mod_prev = mod; } - } else { - mod_prev = mod; } - mod = mod_next; - } - } - if (!found && IsStringConstraintEmpty (constraint)) { - if (biop->org == NULL) { - biop->org = OrgRefNew(); - } - if (biop->org->orgname == NULL) { - biop->org->orgname = OrgNameNew(); - } - mod = OrgModNew (); - mod->subtype = orgmod_subtype; - rval = SetThreeFieldSubfield (&(mod->subname), subfield, value, existing_text); - if (mod_prev == NULL) { - biop->org->orgname->mod = mod; - } else { - mod_prev->next = mod; } } } @@ -9475,5834 +10794,5784 @@ NLM_EXTERN Boolean SetSourceQualInBioSource (BioSourcePtr biop, SourceQualChoice case SourceQualChoice_location: str = LocNameFromGenome (biop->genome); if (DoesStringMatchConstraint (str, constraint)) { - biop->genome = GenomeFromSrcLoc (scp->data.intvalue); - rval = TRUE; + str = StringSave (str); + } else { + str = NULL; } break; case SourceQualChoice_origin: str = OriginNameFromOrigin (biop->origin); if (DoesStringMatchConstraint (str, constraint)) { - biop->origin = OriginFromSrcOrig(scp->data.intvalue); - rval = TRUE; + str = StringSave (str); + } else { + str = NULL; } - break; + break; case SourceQualChoice_gcode: - if (biop->org == NULL) { - biop->org = OrgRefNew(); - } - if (biop->org->orgname == NULL) { - biop->org->orgname = OrgNameNew(); + if (biop->org != NULL && biop->org->orgname != NULL && biop->org->orgname->gcode != 0) { + sprintf (buf, "%d", biop->org->orgname->gcode); + str = StringSave (buf); } - biop->org->orgname->gcode = scp->data.intvalue; - rval = TRUE; break; case SourceQualChoice_mgcode: - if (biop->org == NULL) { - biop->org = OrgRefNew(); - } - if (biop->org->orgname == NULL) { - biop->org->orgname = OrgNameNew(); + if (biop->org != NULL && biop->org->orgname != NULL && biop->org->orgname->mgcode != 0) { + sprintf (buf, "%d", biop->org->orgname->mgcode); + str = StringSave (buf); } - biop->org->orgname->mgcode = scp->data.intvalue; - rval = TRUE; break; } - return rval; -} - - -NLM_EXTERN BioseqPtr GetRepresentativeBioseqFromBioseqSet (BioseqSetPtr bssp) -{ - SeqEntryPtr sep; - BioseqPtr bsp = NULL; - - if (bssp == NULL || (bssp->_class != BioseqseqSet_class_segset && bssp->_class != BioseqseqSet_class_nuc_prot)) { - return NULL; - } - sep = bssp->seq_set; - if (sep->data.ptrvalue == NULL) { - bsp = NULL; - } else if (IS_Bioseq(sep)) { - bsp = sep->data.ptrvalue; - } else if (IS_Bioseq_set (sep)) { - bsp = GetRepresentativeBioseqFromBioseqSet (sep->data.ptrvalue); - } - return bsp; + return str; } -NLM_EXTERN BioseqPtr GetSequenceForObject (Uint1 choice, Pointer data) +NLM_EXTERN ValNodePtr GetMultipleSourceQualsFromBioSource (BioSourcePtr biop, SourceQualChoicePtr scp, StringConstraintPtr constraint) { - BioseqPtr bsp = NULL; - SeqFeatPtr sfp; - SeqDescrPtr sdp; - ObjValNodePtr ovp; - CGPSetPtr cgp; - ValNodePtr vnp; + ValNodePtr val_list = NULL; + CharPtr str = NULL; + SubSourcePtr ssp; + OrgModPtr mod; + Int4 orgmod_subtype = -1, subsrc_subtype = -1; + Int4 subfield; + ValNode vn; - if (data == NULL) return NULL; + if (biop == NULL || scp == NULL) return NULL; - switch (choice) { - case OBJ_BIOSEQ: - bsp = (BioseqPtr) data; - break; - case OBJ_SEQFEAT: - sfp = (SeqFeatPtr) data; - bsp = BioseqFindFromSeqLoc (sfp->location); - break; - case OBJ_SEQDESC: - sdp = (SeqDescrPtr) data; - if (sdp->extended) { - ovp = (ObjValNodePtr) sdp; - if (ovp->idx.parenttype == OBJ_BIOSEQ && ovp->idx.parentptr != NULL) { - bsp = ovp->idx.parentptr; - } else if (ovp->idx.parenttype == OBJ_BIOSEQSET) { - bsp = GetRepresentativeBioseqFromBioseqSet (ovp->idx.parentptr); - } + if (scp->choice == SourceQualChoice_textqual) { + if (scp->data.intvalue == Source_qual_taxname) { + if (biop->org != NULL && !StringHasNoText (biop->org->taxname) + && DoesStringMatchConstraint (biop->org->taxname, constraint)) { + ValNodeAddPointer (&val_list, 0, StringSave (biop->org->taxname)); } - break; - case 0: - cgp = (CGPSetPtr) data; - for (vnp = cgp->cds_list; vnp != NULL && bsp == NULL; vnp = vnp->next) { - sfp = vnp->data.ptrvalue; - if (sfp != NULL) { - bsp = BioseqFindFromSeqLoc (sfp->location); - } + } else if (scp->data.intvalue == Source_qual_common_name) { + if (biop->org != NULL && !StringHasNoText (biop->org->common) + && DoesStringMatchConstraint (biop->org->common, constraint)) { + ValNodeAddPointer (&val_list, 0, StringSave (biop->org->common)); } - for (vnp = cgp->mrna_list; vnp != NULL && bsp == NULL; vnp = vnp->next) { - sfp = vnp->data.ptrvalue; - if (sfp != NULL) { - bsp = BioseqFindFromSeqLoc (sfp->location); - } + } else if (scp->data.intvalue == Source_qual_lineage) { + if (biop->org != NULL && biop->org->orgname != NULL && !StringHasNoText (biop->org->orgname->lineage) + && DoesStringMatchConstraint (biop->org->orgname->lineage, constraint)) { + ValNodeAddPointer (&val_list, 0, StringSave (biop->org->orgname->lineage)); } - for (vnp = cgp->gene_list; vnp != NULL && bsp == NULL; vnp = vnp->next) { - sfp = vnp->data.ptrvalue; - if (sfp != NULL) { - bsp = BioseqFindFromSeqLoc (sfp->location); + } else if (scp->data.intvalue == Source_qual_division) { + if (biop->org != NULL && biop->org->orgname != NULL && !StringHasNoText (biop->org->orgname->div) + && DoesStringMatchConstraint (biop->org->orgname->div, constraint)) { + ValNodeAddPointer (&val_list, 0, StringSave (biop->org->orgname->div)); + } + } else if (scp->data.intvalue == Source_qual_dbxref) { + if (biop->org != NULL) { + ValNodeLink (&val_list, GetMultipleDbxrefStrings (biop->org->db, constraint)); + } + } else if (scp->data.intvalue == Source_qual_taxid) { + if (biop->org != NULL) { + ValNodeLink (&val_list, GetMultipleTaxidStrings (biop->org->db, constraint)); + } + } else if (scp->data.intvalue == Source_qual_fwd_primer_name + || scp->data.intvalue == Source_qual_fwd_primer_seq + || scp->data.intvalue == Source_qual_rev_primer_name + || scp->data.intvalue == Source_qual_rev_primer_seq) { + /* fetch from new primer object */ + ValNodeLink (&val_list, GetMultiplePrimerValuesFromBioSource (biop, scp->data.intvalue, constraint)); + } else if (scp->data.intvalue == Source_qual_all_notes) { + vn.choice = SourceQualChoice_textqual; + vn.data.intvalue = Source_qual_subsource_note; + vn.next = NULL; + str = GetSourceQualFromBioSource (biop, &vn, constraint); + if (str != NULL) { + ValNodeAddPointer (&val_list, 0, str); + } + vn.data.intvalue = Source_qual_orgmod_note; + str = GetSourceQualFromBioSource (biop, &vn, constraint); + if (str != NULL) { + ValNodeAddPointer (&val_list, 0, str); + } + } else if (scp->data.intvalue == Source_qual_all_quals) { + /* will not do */ + } else { + orgmod_subtype = GetOrgModQualFromSrcQual (scp->data.intvalue, &subfield); + if (orgmod_subtype == -1) { + subsrc_subtype = GetSubSrcQualFromSrcQual (scp->data.intvalue, &subfield); + for (ssp = biop->subtype; ssp != NULL; ssp = ssp->next) { + if (ssp->subtype == subsrc_subtype) { + if (StringHasNoText (ssp->name)) { + if (IsNonTextSourceQual (scp->data.intvalue) + && DoesStringMatchConstraint ("TRUE", constraint)) { + ValNodeAddPointer (&val_list, 0, StringSave ("TRUE")); + } + } else { + if (subfield == 0) { + if (DoesStringMatchConstraint (ssp->name, constraint)) { + ValNodeAddPointer (&val_list, 0, StringSave (ssp->name)); + } + } else { + str = GetThreeFieldSubfield (ssp->name, subfield); + if (StringHasNoText (str) || !DoesStringMatchConstraint (str, constraint)) { + str = MemFree (str); + } else { + ValNodeAddPointer (&val_list, 0, str); + } + } + } + } + } + } else { + if (biop->org != NULL && biop->org->orgname != NULL) { + for (mod = biop->org->orgname->mod; mod != NULL && str == NULL; mod = mod->next) { + if (mod->subtype == orgmod_subtype) { + if (StringHasNoText (mod->subname)) { + if (IsNonTextSourceQual (scp->data.intvalue) + && DoesStringMatchConstraint ("TRUE", constraint)) { + ValNodeAddPointer (&val_list, 0, StringSave ("TRUE")); + } + } else { + if (subfield == 0) { + if (DoesStringMatchConstraint (mod->subname, constraint)) { + ValNodeAddPointer (&val_list, 0, StringSave (mod->subname)); + } + } else { + str = GetThreeFieldSubfield (mod->subname, subfield); + if (StringHasNoText (str) || !DoesStringMatchConstraint (str, constraint)) { + str = MemFree (str); + } else { + ValNodeAddPointer (&val_list, 0, str); + } + } + } + } + } } } - break; + } + } else { + str = GetSourceQualFromBioSource (biop, scp, constraint); + if (str != NULL) { + ValNodeAddPointer (&val_list, 0, str); + } } - return bsp; + return val_list; } -NLM_EXTERN BioSourcePtr GetBioSourceFromObject (Uint1 choice, Pointer data) +static Boolean RemoveAllSourceQualsFromBioSource (BioSourcePtr biop, StringConstraintPtr constraint) { - BioSourcePtr biop = NULL; - SeqDescrPtr sdp; - SeqFeatPtr sfp; - BioseqPtr bsp = NULL; - SeqMgrDescContext context; + Int4 i; + Boolean rval = FALSE; + ValNode vn; - if (data == NULL) return NULL; + vn.next = NULL; + vn.choice = SourceQualChoice_textqual; - switch (choice) - { - case OBJ_SEQDESC: - sdp = (SeqDescrPtr) data; - if (sdp->choice == Seq_descr_source) { - biop = sdp->data.ptrvalue; - } - break; - case OBJ_SEQFEAT: - sfp = (SeqFeatPtr) data; - if (sfp->data.choice == SEQFEAT_BIOSRC) { - biop = sfp->data.value.ptrvalue; - } - break; - } - if (biop == NULL) { - bsp = GetSequenceForObject (choice, data); - sdp = SeqMgrGetNextDescriptor (bsp, NULL, Seq_descr_source, &context); - if (sdp != NULL && sdp->choice == Seq_descr_source) { - biop = sdp->data.ptrvalue; + for (i = 0; i < NUM_srcqual_scqual; i++) { + if (srcqual_scqual[i].srcqual != Source_qual_all_quals + && srcqual_scqual[i].srcqual != Source_qual_all_notes) { + vn.data.intvalue = srcqual_scqual[i].srcqual; + rval |= RemoveSourceQualFromBioSource (biop, &vn, constraint); } } - return biop; + return rval; } - -NLM_EXTERN Uint2 GetEntityIdFromObject (Uint1 choice, Pointer data) +static void Lcl_RemoveOldName (OrgRefPtr orp) { - Uint2 entityID = 0; - SeqDescrPtr sdp; - ObjValNodePtr ovp; - SeqFeatPtr sfp; - BioseqPtr bsp; - - if (data == NULL) return 0; - - switch (choice) + OrgModPtr prev = NULL, curr, next_mod; + + if (orp == NULL || orp->orgname == NULL) return; + + curr = orp->orgname->mod; + while (curr != NULL) { - case OBJ_SEQDESC: - sdp = (SeqDescrPtr) data; - if (sdp->extended) { - ovp = (ObjValNodePtr) sdp; - entityID = ovp->idx.entityID; + next_mod = curr->next; + if (curr->subtype == ORGMOD_old_name) + { + if (prev == NULL) + { + orp->orgname->mod = curr->next; } - break; - case OBJ_SEQFEAT: - sfp = (SeqFeatPtr) data; - entityID = sfp->idx.entityID; - break; - default: - bsp = GetSequenceForObject (choice, data); - if (bsp != NULL) { - entityID = bsp->idx.entityID; + else + { + prev->next = curr->next; } - break; + curr->next = NULL; + OrgModFree (curr); + } + else + { + prev = curr; + } + curr = next_mod; } - - return entityID; } - -/* functions for dealing with CDS-Gene-Prot sets */ -static CharPtr GetFieldValueFromCGPSet (CGPSetPtr c, Uint2 field, StringConstraintPtr scp) +NLM_EXTERN Boolean RemoveSourceQualFromBioSource (BioSourcePtr biop, SourceQualChoicePtr scp, StringConstraintPtr constraint) { - CharPtr str = NULL; - ValNodePtr vnp; - SeqFeatPtr sfp; - GeneRefPtr grp; - RnaRefPtr rrp; - ProtRefPtr prp; - FeatureFieldPtr ffield; - - if (c == NULL) return NULL; - switch (field) { - case CDSGeneProt_field_cds_comment: - case CDSGeneProt_field_cds_inference: - case CDSGeneProt_field_codon_start: - ffield = FeatureFieldFromCDSGeneProtField (field); - for (vnp = c->cds_list; vnp != NULL && str == NULL; vnp = vnp->next) { - sfp = (SeqFeatPtr) vnp->data.ptrvalue; - str = GetQualFromFeature (sfp, ffield, scp); - } - ffield = FeatureFieldFree (ffield); - break; - case CDSGeneProt_field_gene_locus: - case CDSGeneProt_field_gene_inference: - ffield = FeatureFieldFromCDSGeneProtField (field); - for (vnp = c->gene_list; vnp != NULL && str == NULL; vnp = vnp->next) { - sfp = (SeqFeatPtr) vnp->data.ptrvalue; - str = GetQualFromFeature (sfp, ffield, scp); - } - ffield = FeatureFieldFree (ffield); - break; - case CDSGeneProt_field_gene_description: - for (vnp = c->gene_list; vnp != NULL && str == NULL; vnp = vnp->next) { - sfp = (SeqFeatPtr) vnp->data.ptrvalue; - if (sfp != NULL && sfp->data.choice == SEQFEAT_GENE - && (grp = sfp->data.value.ptrvalue) != NULL - && !StringHasNoText (grp->desc) - && DoesStringMatchConstraint(grp->desc, scp)) - { - str = StringSave (grp->desc); - } - } - break; - case CDSGeneProt_field_gene_comment: - for (vnp = c->gene_list; vnp != NULL && str == NULL; vnp = vnp->next) { - sfp = (SeqFeatPtr) vnp->data.ptrvalue; - if (sfp != NULL && !StringHasNoText (sfp->comment) && DoesStringMatchConstraint(sfp->comment, scp)) - { - str = StringSave (sfp->comment); - } - } - break; - case CDSGeneProt_field_gene_allele: - for (vnp = c->gene_list; vnp != NULL && str == NULL; vnp = vnp->next) { - sfp = (SeqFeatPtr) vnp->data.ptrvalue; - if (sfp != NULL && sfp->data.choice == SEQFEAT_GENE - && (grp = sfp->data.value.ptrvalue) != NULL - && !StringHasNoText (grp->allele) - && DoesStringMatchConstraint(grp->allele, scp)) - { - str = StringSave (grp->allele); - } - } - break; - case CDSGeneProt_field_gene_maploc: - for (vnp = c->gene_list; vnp != NULL && str == NULL; vnp = vnp->next) { - sfp = (SeqFeatPtr) vnp->data.ptrvalue; - if (sfp != NULL && sfp->data.choice == SEQFEAT_GENE - && (grp = sfp->data.value.ptrvalue) != NULL - && !StringHasNoText (grp->maploc) - && DoesStringMatchConstraint(grp->maploc, scp)) - { - str = StringSave (grp->maploc); - } - } - break; - case CDSGeneProt_field_gene_locus_tag: - for (vnp = c->gene_list; vnp != NULL && str == NULL; vnp = vnp->next) { - sfp = (SeqFeatPtr) vnp->data.ptrvalue; - if (sfp != NULL && sfp->data.choice == SEQFEAT_GENE - && (grp = sfp->data.value.ptrvalue) != NULL - && !StringHasNoText (grp->locus_tag) - && DoesStringMatchConstraint(grp->locus_tag, scp)) - { - str = StringSave (grp->locus_tag); + SubSourcePtr ssp, ssp_prev = NULL, ssp_next; + OrgModPtr mod, mod_prev = NULL, mod_next; + Int4 orgmod_subtype = -1, subsrc_subtype = -1, subfield; + CharPtr str, tmp; + Boolean rval = FALSE, do_remove, does_match; + ValNode vn; + + if (biop == NULL || scp == NULL) return FALSE; + + switch (scp->choice) + { + case SourceQualChoice_textqual: + if (scp->data.intvalue == Source_qual_taxname) { + if (biop->org != NULL && !StringHasNoText (biop->org->taxname) + && DoesStringMatchConstraint (biop->org->taxname, constraint)) { + biop->org->taxname = MemFree (biop->org->taxname); + RemoveTaxRef (biop->org); + Lcl_RemoveOldName (biop->org); + rval = TRUE; } - } - break; - case CDSGeneProt_field_gene_synonym: - for (vnp = c->gene_list; vnp != NULL && str == NULL; vnp = vnp->next) { - sfp = (SeqFeatPtr) vnp->data.ptrvalue; - if (sfp != NULL && sfp->data.choice == SEQFEAT_GENE - && (grp = sfp->data.value.ptrvalue) != NULL) - { - str = GetFirstValNodeStringMatch (grp->syn, scp); + } else if (scp->data.intvalue == Source_qual_common_name) { + if (biop->org != NULL && !StringHasNoText (biop->org->common) + && DoesStringMatchConstraint (biop->org->common, constraint)) { + biop->org->common = MemFree (biop->org->common); + rval = TRUE; } - } - break; - case CDSGeneProt_field_gene_old_locus_tag: - for (vnp = c->gene_list; vnp != NULL && str == NULL; vnp = vnp->next) { - sfp = (SeqFeatPtr) vnp->data.ptrvalue; - if (sfp != NULL) { - str = GetFirstGBQualMatch (sfp->qual, "old-locus-tag", 0, scp); + } else if (scp->data.intvalue == Source_qual_lineage) { + if (biop->org != NULL && biop->org->orgname != NULL && !StringHasNoText (biop->org->orgname->lineage) + && DoesStringMatchConstraint (biop->org->orgname->lineage, constraint)) { + biop->org->orgname->lineage = MemFree (biop->org->orgname->lineage); + rval = TRUE; } - } - break; - case CDSGeneProt_field_mrna_product: - for (vnp = c->mrna_list; vnp != NULL && str == NULL; vnp = vnp->next) { - sfp = (SeqFeatPtr) vnp->data.ptrvalue; - if (sfp != NULL && sfp->data.choice == SEQFEAT_RNA - && (rrp = sfp->data.value.ptrvalue) != NULL - && rrp->ext.choice == 1 - && !StringHasNoText (rrp->ext.value.ptrvalue) - && DoesStringMatchConstraint(rrp->ext.value.ptrvalue, scp)) - { - str = StringSave (rrp->ext.value.ptrvalue); + } else if (scp->data.intvalue == Source_qual_division) { + if (biop->org != NULL && biop->org->orgname != NULL && !StringHasNoText (biop->org->orgname->div) + && DoesStringMatchConstraint (biop->org->orgname->div, constraint)) { + biop->org->orgname->div = MemFree (biop->org->orgname->div); + rval = TRUE; } - } - break; - case CDSGeneProt_field_mrna_comment: - for (vnp = c->mrna_list; vnp != NULL && str == NULL; vnp = vnp->next) { - sfp = (SeqFeatPtr) vnp->data.ptrvalue; - if (sfp != NULL && !StringHasNoText (sfp->comment) && DoesStringMatchConstraint(sfp->comment, scp)) - { - str = StringSave (sfp->comment); + } else if (scp->data.intvalue == Source_qual_dbxref) { + if (biop->org != NULL) { + rval = RemoveDbxrefString (&(biop->org->db), constraint); + } + } else if (scp->data.intvalue == Source_qual_taxid) { + rval = RemoveTaxonomyId (biop, constraint); + } else if (scp->data.intvalue == Source_qual_fwd_primer_name + || scp->data.intvalue == Source_qual_fwd_primer_seq + || scp->data.intvalue == Source_qual_rev_primer_name + || scp->data.intvalue == Source_qual_rev_primer_seq) { + /* remove from new primer object */ + rval = RemovePrimerValueFromBioSource (biop, scp->data.intvalue, constraint); + } else if (scp->data.intvalue == Source_qual_all_notes) { + vn.choice = SourceQualChoice_textqual; + vn.data.intvalue = Source_qual_subsource_note; + vn.next = NULL; + rval |= RemoveSourceQualFromBioSource (biop, &vn, constraint); + vn.data.intvalue = Source_qual_orgmod_note; + rval |= RemoveSourceQualFromBioSource (biop, &vn, constraint); + } else if (scp->data.intvalue == Source_qual_all_quals) { + rval |= RemoveAllSourceQualsFromBioSource (biop, constraint); + } else { + orgmod_subtype = GetOrgModQualFromSrcQual (scp->data.intvalue, &subfield); + if (orgmod_subtype == -1) { + subsrc_subtype = GetSubSrcQualFromSrcQual (scp->data.intvalue, &subfield); + ssp = biop->subtype; + while (ssp != NULL) { + ssp_next = ssp->next; + do_remove = FALSE; + if (ssp->subtype == subsrc_subtype) { + if (subfield == 0) { + if (DoesStringMatchConstraint (ssp->name, constraint)) { + do_remove = TRUE; + } + } else { + does_match = TRUE; + if (!IsStringConstraintEmpty (constraint)) { + tmp = GetThreeFieldSubfield (ssp->name, subfield); + does_match = DoesStringMatchConstraint (tmp, constraint); + tmp = MemFree (tmp); + } + if (does_match) { + rval |= RemoveThreeFieldSubfield (&(ssp->name), subfield); + if (StringHasNoText (ssp->name)) { + do_remove = TRUE; + } + } + } + } + if (do_remove) { + if (ssp_prev == NULL) { + biop->subtype = ssp->next; + } else { + ssp_prev->next = ssp->next; + } + ssp->next = NULL; + ssp = SubSourceFree (ssp); + rval = TRUE; + } else { + ssp_prev = ssp; + } + ssp = ssp_next; + } + } else { + if (biop->org != NULL && biop->org->orgname != NULL) { + mod = biop->org->orgname->mod; + while (mod != NULL) { + mod_next = mod->next; + do_remove = FALSE; + if (mod->subtype == orgmod_subtype) { + if (subfield == 0) { + if (DoesStringMatchConstraint (mod->subname, constraint)) { + do_remove = TRUE; + } + } else { + does_match = TRUE; + if (!IsStringConstraintEmpty (constraint)) { + tmp = GetThreeFieldSubfield (mod->subname, subfield); + does_match = DoesStringMatchConstraint (tmp, constraint); + tmp = MemFree (tmp); + } + if (does_match) { + rval |= RemoveThreeFieldSubfield (&(mod->subname), subfield); + } + if (StringHasNoText (mod->subname)) { + do_remove = TRUE; + } + } + } + if (do_remove) { + if (mod_prev == NULL) { + biop->org->orgname->mod = mod->next; + } else { + mod_prev->next = mod->next; + } + mod->next = NULL; + mod = OrgModFree (mod); + rval = TRUE; + } else { + mod_prev = mod; + } + mod = mod_next; + } + } } } break; - case CDSGeneProt_field_prot_name: - for (vnp = c->prot_list; vnp != NULL && str == NULL; vnp = vnp->next) { - sfp = (SeqFeatPtr) vnp->data.ptrvalue; - if (sfp != NULL && sfp->data.choice == SEQFEAT_PROT - && sfp->idx.subtype == FEATDEF_PROT - && (prp = sfp->data.value.ptrvalue) != NULL) - { - str = GetFirstValNodeStringMatch (prp->name, scp); + case SourceQualChoice_location: + str = LocNameFromGenome (biop->genome); + if (DoesStringMatchConstraint (str, constraint)) { + if (scp->data.intvalue == 0 || biop->genome == GenomeFromSrcLoc (scp->data.intvalue)) { + biop->genome = 0; + rval = TRUE; } } break; - case CDSGeneProt_field_prot_description: - for (vnp = c->prot_list; vnp != NULL && str == NULL; vnp = vnp->next) { - sfp = (SeqFeatPtr) vnp->data.ptrvalue; - if (sfp != NULL && sfp->data.choice == SEQFEAT_PROT - && sfp->idx.subtype == FEATDEF_PROT - && (prp = sfp->data.value.ptrvalue) != NULL - && !StringHasNoText (prp->desc) && DoesStringMatchConstraint(prp->desc, scp)) { - str = StringSave (prp->desc); + case SourceQualChoice_origin: + str = OriginNameFromOrigin (biop->origin); + if (DoesStringMatchConstraint (str, constraint)) { + if (scp->data.intvalue == 0 || biop->origin == OriginFromSrcOrig (scp->data.intvalue)) { + biop->origin = 0; + rval = TRUE; } } - break; - case CDSGeneProt_field_prot_ec_number: - for (vnp = c->prot_list; vnp != NULL && str == NULL; vnp = vnp->next) { - sfp = (SeqFeatPtr) vnp->data.ptrvalue; - if (sfp != NULL && sfp->data.choice == SEQFEAT_PROT - && sfp->idx.subtype == FEATDEF_PROT - && (prp = sfp->data.value.ptrvalue) != NULL) - { - str = GetFirstValNodeStringMatch (prp->ec, scp); - } + break; + case SourceQualChoice_gcode: + if (biop->org != NULL && biop->org->orgname != NULL && biop->org->orgname->gcode != 0) { + biop->org->orgname->gcode = 0; + rval = TRUE; } break; - case CDSGeneProt_field_prot_activity: - for (vnp = c->prot_list; vnp != NULL && str == NULL; vnp = vnp->next) { - sfp = (SeqFeatPtr) vnp->data.ptrvalue; - if (sfp != NULL && sfp->data.choice == SEQFEAT_PROT - && sfp->idx.subtype == FEATDEF_PROT - && (prp = sfp->data.value.ptrvalue) != NULL) - { - str = GetFirstValNodeStringMatch (prp->activity, scp); - } - } - break; - case CDSGeneProt_field_prot_comment: - for (vnp = c->prot_list; vnp != NULL && str == NULL; vnp = vnp->next) { - sfp = (SeqFeatPtr) vnp->data.ptrvalue; - if (sfp != NULL && sfp->idx.subtype == FEATDEF_PROT - && !StringHasNoText (sfp->comment) && DoesStringMatchConstraint(sfp->comment, scp)) - { - str = StringSave (sfp->comment); - } - } - break; - case CDSGeneProt_field_mat_peptide_name: - for (vnp = c->prot_list; vnp != NULL && str == NULL; vnp = vnp->next) { - sfp = (SeqFeatPtr) vnp->data.ptrvalue; - if (sfp != NULL && sfp->data.choice == SEQFEAT_PROT - && sfp->idx.subtype == FEATDEF_mat_peptide_aa - && (prp = sfp->data.value.ptrvalue) != NULL) - { - str = GetFirstValNodeStringMatch (prp->name, scp); - } - } - break; - case CDSGeneProt_field_mat_peptide_description: - for (vnp = c->prot_list; vnp != NULL && str == NULL; vnp = vnp->next) { - sfp = (SeqFeatPtr) vnp->data.ptrvalue; - if (sfp != NULL && sfp->data.choice == SEQFEAT_PROT - && sfp->idx.subtype == FEATDEF_mat_peptide_aa - && (prp = sfp->data.value.ptrvalue) != NULL - && !StringHasNoText (prp->desc) && DoesStringMatchConstraint(prp->desc, scp)) { - str = StringSave (prp->desc); - } - } - break; - case CDSGeneProt_field_mat_peptide_ec_number: - for (vnp = c->prot_list; vnp != NULL && str == NULL; vnp = vnp->next) { - sfp = (SeqFeatPtr) vnp->data.ptrvalue; - if (sfp != NULL && sfp->data.choice == SEQFEAT_PROT - && sfp->idx.subtype == FEATDEF_mat_peptide_aa - && (prp = sfp->data.value.ptrvalue) != NULL) - { - str = GetFirstValNodeStringMatch (prp->ec, scp); - } - } - break; - case CDSGeneProt_field_mat_peptide_activity: - for (vnp = c->prot_list; vnp != NULL && str == NULL; vnp = vnp->next) { - sfp = (SeqFeatPtr) vnp->data.ptrvalue; - if (sfp != NULL && sfp->data.choice == SEQFEAT_PROT - && sfp->idx.subtype == FEATDEF_mat_peptide_aa - && (prp = sfp->data.value.ptrvalue) != NULL) - { - str = GetFirstValNodeStringMatch (prp->activity, scp); - } - } - break; - case CDSGeneProt_field_mat_peptide_comment: - for (vnp = c->prot_list; vnp != NULL && str == NULL; vnp = vnp->next) { - sfp = (SeqFeatPtr) vnp->data.ptrvalue; - if (sfp != NULL && sfp->idx.subtype == FEATDEF_mat_peptide_aa - && !StringHasNoText (sfp->comment) && DoesStringMatchConstraint(sfp->comment, scp)) - { - str = StringSave (sfp->comment); - } + case SourceQualChoice_mgcode: + if (biop->org != NULL && biop->org->orgname != NULL && biop->org->orgname->mgcode != 0) { + biop->org->orgname->mgcode = 0; + rval = TRUE; } break; } - return str; + return rval; } -static Boolean RemoveFieldValueFromCGPSet (CGPSetPtr c, Uint2 field, StringConstraintPtr scp) +NLM_EXTERN Boolean SetSourceQualInBioSource (BioSourcePtr biop, SourceQualChoicePtr scp, StringConstraintPtr constraint, CharPtr value, Uint2 existing_text) { - Boolean rval = FALSE; - ValNodePtr vnp; - SeqFeatPtr sfp; - GeneRefPtr grp; - RnaRefPtr rrp; - ProtRefPtr prp; - FeatureFieldPtr ffield; - - if (c == NULL) return FALSE; - switch (field) { - case CDSGeneProt_field_cds_comment: - case CDSGeneProt_field_cds_inference: - case CDSGeneProt_field_codon_start: - ffield = FeatureFieldFromCDSGeneProtField (field); - for (vnp = c->cds_list; vnp != NULL; vnp = vnp->next) { - sfp = (SeqFeatPtr) vnp->data.ptrvalue; - rval |= RemoveQualFromFeature (sfp, ffield, scp); - } - ffield = FeatureFieldFree (ffield); - break; - case CDSGeneProt_field_gene_locus: - case CDSGeneProt_field_gene_inference: - ffield = FeatureFieldFromCDSGeneProtField (field); - for (vnp = c->gene_list; vnp != NULL; vnp = vnp->next) { - sfp = (SeqFeatPtr) vnp->data.ptrvalue; - rval |= RemoveQualFromFeature (sfp, ffield, scp); - } - ffield = FeatureFieldFree (ffield); - break; - case CDSGeneProt_field_gene_description: - for (vnp = c->gene_list; vnp != NULL; vnp = vnp->next) { - sfp = (SeqFeatPtr) vnp->data.ptrvalue; - if (sfp != NULL && sfp->data.choice == SEQFEAT_GENE - && (grp = sfp->data.value.ptrvalue) != NULL - && !StringHasNoText (grp->desc) - && DoesStringMatchConstraint(grp->desc, scp)) - { - grp->desc = MemFree(grp->desc); - rval = TRUE; - } - } - break; - case CDSGeneProt_field_gene_comment: - for (vnp = c->gene_list; vnp != NULL; vnp = vnp->next) { - sfp = (SeqFeatPtr) vnp->data.ptrvalue; - if (sfp != NULL && !StringHasNoText (sfp->comment) && DoesStringMatchConstraint(sfp->comment, scp)) - { - sfp->comment = MemFree (sfp->comment); - rval = TRUE; - } - } - break; - case CDSGeneProt_field_gene_allele: - for (vnp = c->gene_list; vnp != NULL; vnp = vnp->next) { - sfp = (SeqFeatPtr) vnp->data.ptrvalue; - if (sfp != NULL && sfp->data.choice == SEQFEAT_GENE - && (grp = sfp->data.value.ptrvalue) != NULL - && !StringHasNoText (grp->allele) - && DoesStringMatchConstraint(grp->allele, scp)) - { - grp->allele = MemFree (grp->allele); - rval = TRUE; - } - } - break; - case CDSGeneProt_field_gene_maploc: - for (vnp = c->gene_list; vnp != NULL; vnp = vnp->next) { - sfp = (SeqFeatPtr) vnp->data.ptrvalue; - if (sfp != NULL && sfp->data.choice == SEQFEAT_GENE - && (grp = sfp->data.value.ptrvalue) != NULL - && !StringHasNoText (grp->maploc) - && DoesStringMatchConstraint(grp->maploc, scp)) - { - grp->maploc = MemFree (grp->maploc); - rval = TRUE; - } - } - break; - case CDSGeneProt_field_gene_locus_tag: - for (vnp = c->gene_list; vnp != NULL; vnp = vnp->next) { - sfp = (SeqFeatPtr) vnp->data.ptrvalue; - if (sfp != NULL && sfp->data.choice == SEQFEAT_GENE - && (grp = sfp->data.value.ptrvalue) != NULL - && !StringHasNoText (grp->locus_tag) - && DoesStringMatchConstraint(grp->locus_tag, scp)) - { - grp->locus_tag = MemFree (grp->locus_tag); - rval = TRUE; + SubSourcePtr ssp, ssp_prev = NULL, ssp_next; + OrgModPtr mod, mod_prev = NULL, mod_next; + Int4 orgmod_subtype = -1, subsrc_subtype = -1, subfield; + CharPtr str, tmp; + Boolean rval = FALSE, found = FALSE, does_match; + ValNode vn; + + if (biop == NULL || scp == NULL) return FALSE; + + switch (scp->choice) + { + case SourceQualChoice_textqual: + if (scp->data.intvalue == Source_qual_taxname) { + if ((biop->org == NULL && IsStringConstraintEmpty (constraint)) + || (biop->org != NULL + && DoesStringMatchConstraint (biop->org->taxname, constraint))) { + if (biop->org == NULL) { + biop->org = OrgRefNew(); + } + rval = SetStringValue (&(biop->org->taxname), value, existing_text); + if (rval) { + RemoveTaxRef (biop->org); + Lcl_RemoveOldName (biop->org); + } } - } - break; - case CDSGeneProt_field_gene_synonym: - for (vnp = c->gene_list; vnp != NULL; vnp = vnp->next) { - sfp = (SeqFeatPtr) vnp->data.ptrvalue; - if (sfp != NULL && sfp->data.choice == SEQFEAT_GENE - && (grp = sfp->data.value.ptrvalue) != NULL) - { - rval |= RemoveValNodeStringMatch (&(grp->syn), scp); + } else if (scp->data.intvalue == Source_qual_common_name) { + if ((biop->org == NULL && IsStringConstraintEmpty (constraint)) + || (biop->org != NULL + && DoesStringMatchConstraint (biop->org->common, constraint))) { + if (biop->org == NULL) { + biop->org = OrgRefNew(); + } + rval = SetStringValue (&(biop->org->common), value, existing_text); } - } - break; - case CDSGeneProt_field_gene_old_locus_tag: - for (vnp = c->gene_list; vnp != NULL; vnp = vnp->next) { - sfp = (SeqFeatPtr) vnp->data.ptrvalue; - if (sfp != NULL) { - rval |= RemoveGBQualMatch (&(sfp->qual), "old-locus-tag", 0, scp); + } else if (scp->data.intvalue == Source_qual_lineage) { + if ((biop->org == NULL && IsStringConstraintEmpty (constraint)) + ||(biop->org != NULL && biop->org->orgname == NULL && IsStringConstraintEmpty (constraint)) + ||(biop->org != NULL && biop->org->orgname != NULL + && DoesStringMatchConstraint (biop->org->orgname->lineage, constraint))) { + if (biop->org == NULL) { + biop->org = OrgRefNew(); + } + if (biop->org->orgname == NULL) { + biop->org->orgname = OrgNameNew (); + } + rval = SetStringValue (&(biop->org->orgname->lineage), value, existing_text); } - } - break; - case CDSGeneProt_field_mrna_product: - for (vnp = c->mrna_list; vnp != NULL; vnp = vnp->next) { - sfp = (SeqFeatPtr) vnp->data.ptrvalue; - if (sfp != NULL && sfp->data.choice == SEQFEAT_RNA - && (rrp = sfp->data.value.ptrvalue) != NULL - && rrp->ext.choice == 1 - && !StringHasNoText (rrp->ext.value.ptrvalue) - && DoesStringMatchConstraint(rrp->ext.value.ptrvalue, scp)) - { - rrp->ext.value.ptrvalue = MemFree (rrp->ext.value.ptrvalue); - rrp->ext.choice = 0; - rval = TRUE; + } else if (scp->data.intvalue == Source_qual_division) { + if ((biop->org == NULL && IsStringConstraintEmpty (constraint)) + || (biop->org != NULL && biop->org->orgname == NULL && IsStringConstraintEmpty (constraint)) + || (biop->org != NULL && biop->org->orgname != NULL + && DoesStringMatchConstraint (biop->org->orgname->div, constraint))) { + if (biop->org == NULL) { + biop->org = OrgRefNew(); + } + if (biop->org->orgname == NULL) { + biop->org->orgname = OrgNameNew (); + } + rval = SetStringValue (&(biop->org->orgname->div), value, existing_text); } - } - break; - case CDSGeneProt_field_mrna_comment: - for (vnp = c->mrna_list; vnp != NULL; vnp = vnp->next) { - sfp = (SeqFeatPtr) vnp->data.ptrvalue; - if (sfp != NULL && !StringHasNoText (sfp->comment) && DoesStringMatchConstraint(sfp->comment, scp)) - { - sfp->comment = MemFree (sfp->comment); - rval = TRUE; + } else if (scp->data.intvalue == Source_qual_dbxref) { + if (biop->org == NULL) { + biop->org = OrgRefNew (); } - } - break; - case CDSGeneProt_field_prot_name: - for (vnp = c->prot_list; vnp != NULL; vnp = vnp->next) { - sfp = (SeqFeatPtr) vnp->data.ptrvalue; - if (sfp != NULL && sfp->data.choice == SEQFEAT_PROT - && sfp->idx.subtype == FEATDEF_PROT - && (prp = sfp->data.value.ptrvalue) != NULL) - { - rval |= RemoveValNodeStringMatch (&(prp->name), scp); + rval = SetDbxrefString (&(biop->org->db), constraint, value, existing_text); + } else if (scp->data.intvalue == Source_qual_taxid) { + rval = SetTaxonomyId(biop, constraint, value, existing_text); + } else if (scp->data.intvalue == Source_qual_all_notes) { + vn.choice = SourceQualChoice_textqual; + vn.data.intvalue = Source_qual_subsource_note; + vn.next = NULL; + rval |= SetSourceQualInBioSource (biop, &vn, constraint, value, existing_text); + vn.data.intvalue = Source_qual_orgmod_note; + rval |= SetSourceQualInBioSource (biop, &vn, constraint, value, existing_text); + } else if (scp->data.intvalue == Source_qual_fwd_primer_name + || scp->data.intvalue == Source_qual_fwd_primer_seq + || scp->data.intvalue == Source_qual_rev_primer_name + || scp->data.intvalue == Source_qual_rev_primer_seq) { + /* remove from new primer object */ + rval = SetPrimerValueInBioSource (biop, scp->data.intvalue, constraint, value, existing_text); + } else if (scp->data.intvalue == Source_qual_all_quals) { + /* will not do this */ + } else { + orgmod_subtype = GetOrgModQualFromSrcQual (scp->data.intvalue, &subfield); + if (orgmod_subtype == -1) { + subsrc_subtype = GetSubSrcQualFromSrcQual (scp->data.intvalue, &subfield); + if (subsrc_subtype > -1) { + if (existing_text == ExistingTextOption_add_qual) { + /* create new subsource */ + ssp = SubSourceNew (); + ssp->subtype = subsrc_subtype; + rval = SetThreeFieldSubfield (&(ssp->name), subfield, value, existing_text); + /* find last in current list */ + ssp_prev = biop->subtype; + while (ssp_prev != NULL && ssp_prev->next != NULL) { + ssp_prev = ssp_prev->next; + } + + /* add to end of list */ + if (ssp_prev == NULL) { + biop->subtype = ssp; + } else { + ssp_prev->next = ssp; + } + } else { + ssp = biop->subtype; + while (ssp != NULL) { + ssp_next = ssp->next; + if (ssp->subtype == subsrc_subtype) { + if (subfield == 0) { + if (DoesStringMatchConstraint (ssp->name, constraint)) { + rval = SetStringValue (&(ssp->name), value, existing_text); + found = TRUE; + } + } else { + does_match = TRUE; + if (!IsStringConstraintEmpty (constraint)) { + tmp = GetThreeFieldSubfield (ssp->name, subfield); + does_match = DoesStringMatchConstraint (tmp, constraint); + } + if (does_match) { + rval = SetThreeFieldSubfield (&(ssp->name), subfield, value, existing_text); + found = TRUE; + } + } + if (rval && StringHasNoText (ssp->name) && !IsNonTextSourceQual(scp->data.intvalue)) { + if (ssp_prev == NULL) { + biop->subtype = ssp->next; + } else { + ssp_prev->next = ssp->next; + } + ssp->next = NULL; + ssp = SubSourceFree (ssp); + } else { + ssp_prev = ssp; + } + } else { + ssp_prev = ssp; + } + ssp = ssp_next; + } + if (!found && IsStringConstraintEmpty (constraint)) { + ssp = SubSourceNew (); + ssp->subtype = subsrc_subtype; + if (StringHasNoText (value) && IsNonTextSourceQual(scp->data.intvalue)) { + ssp->name = StringSave (""); + } else { + rval = SetThreeFieldSubfield (&(ssp->name), subfield, value, existing_text); + } + if (ssp_prev == NULL) { + biop->subtype = ssp; + } else { + ssp_prev->next = ssp; + } + } + } + } + } else { + if (existing_text == ExistingTextOption_add_qual) { + if (biop->org == NULL) { + biop->org = OrgRefNew(); + } + if (biop->org->orgname == NULL) { + biop->org->orgname = OrgNameNew(); + } + /* create new orgmod */ + mod = OrgModNew (); + mod->subtype = orgmod_subtype; + rval = SetThreeFieldSubfield (&(mod->subname), subfield, value, existing_text); + /* find last in current list */ + mod_prev = biop->org->orgname->mod; + while (mod_prev != NULL && mod_prev->next != NULL) { + mod_prev = mod_prev->next; + } + /* add to end of list */ + if (mod_prev == NULL) { + biop->org->orgname->mod = mod; + } else { + mod_prev->next = mod; + } + } else { + if (biop->org != NULL && biop->org->orgname != NULL) { + mod = biop->org->orgname->mod; + while (mod != NULL) { + mod_next = mod->next; + if (mod->subtype == orgmod_subtype) { + if (subfield == 0) { + if (DoesStringMatchConstraint (mod->subname, constraint)) { + rval = SetStringValue (&(mod->subname), value, existing_text); + found = TRUE; + } + } else { + does_match = TRUE; + if (!IsStringConstraintEmpty (constraint)) { + tmp = GetThreeFieldSubfield (mod->subname, subfield); + does_match = DoesStringMatchConstraint (tmp, constraint); + tmp = MemFree (tmp); + } + if (does_match) { + rval = SetThreeFieldSubfield (&(mod->subname), subfield, value, existing_text); + found = TRUE; + } + } + if (rval && StringHasNoText (mod->subname) && !IsNonTextSourceQual(scp->data.intvalue)) { + if (mod_prev == NULL) { + biop->org->orgname->mod = mod->next; + } else { + mod_prev->next = mod->next; + } + mod->next = NULL; + mod = OrgModFree (mod); + } else { + mod_prev = mod; + } + } else { + mod_prev = mod; + } + mod = mod_next; + } + } + if (!found && IsStringConstraintEmpty (constraint)) { + if (biop->org == NULL) { + biop->org = OrgRefNew(); + } + if (biop->org->orgname == NULL) { + biop->org->orgname = OrgNameNew(); + } + mod = OrgModNew (); + mod->subtype = orgmod_subtype; + rval = SetThreeFieldSubfield (&(mod->subname), subfield, value, existing_text); + if (mod_prev == NULL) { + biop->org->orgname->mod = mod; + } else { + mod_prev->next = mod; + } + } + } } } break; - case CDSGeneProt_field_prot_description: - for (vnp = c->prot_list; vnp != NULL; vnp = vnp->next) { - sfp = (SeqFeatPtr) vnp->data.ptrvalue; - if (sfp != NULL && sfp->data.choice == SEQFEAT_PROT - && sfp->idx.subtype == FEATDEF_PROT - && (prp = sfp->data.value.ptrvalue) != NULL - && !StringHasNoText (prp->desc) && DoesStringMatchConstraint(prp->desc, scp)) { - prp->desc = MemFree (prp->desc); - rval = TRUE; - } + case SourceQualChoice_location: + str = LocNameFromGenome (biop->genome); + if (DoesStringMatchConstraint (str, constraint)) { + biop->genome = GenomeFromSrcLoc (scp->data.intvalue); + rval = TRUE; } break; - case CDSGeneProt_field_prot_ec_number: - for (vnp = c->prot_list; vnp != NULL; vnp = vnp->next) { - sfp = (SeqFeatPtr) vnp->data.ptrvalue; - if (sfp != NULL && sfp->data.choice == SEQFEAT_PROT - && sfp->idx.subtype == FEATDEF_PROT - && (prp = sfp->data.value.ptrvalue) != NULL) - { - rval |= RemoveValNodeStringMatch (&(prp->ec), scp); - } + case SourceQualChoice_origin: + str = OriginNameFromOrigin (biop->origin); + if (DoesStringMatchConstraint (str, constraint)) { + biop->origin = OriginFromSrcOrig(scp->data.intvalue); + rval = TRUE; } - break; - case CDSGeneProt_field_prot_activity: - for (vnp = c->prot_list; vnp != NULL; vnp = vnp->next) { - sfp = (SeqFeatPtr) vnp->data.ptrvalue; - if (sfp != NULL && sfp->data.choice == SEQFEAT_PROT - && sfp->idx.subtype == FEATDEF_PROT - && (prp = sfp->data.value.ptrvalue) != NULL) - { - rval |= RemoveValNodeStringMatch (&(prp->activity), scp); - } + break; + case SourceQualChoice_gcode: + if (biop->org == NULL) { + biop->org = OrgRefNew(); } - break; - case CDSGeneProt_field_prot_comment: - for (vnp = c->prot_list; vnp != NULL; vnp = vnp->next) { - sfp = (SeqFeatPtr) vnp->data.ptrvalue; - if (sfp != NULL && sfp->idx.subtype == FEATDEF_PROT - && !StringHasNoText (sfp->comment) && DoesStringMatchConstraint(sfp->comment, scp)) - { - sfp->comment = MemFree (sfp->comment); - rval = TRUE; - } + if (biop->org->orgname == NULL) { + biop->org->orgname = OrgNameNew(); } + biop->org->orgname->gcode = scp->data.intvalue; + rval = TRUE; break; - case CDSGeneProt_field_mat_peptide_name: - for (vnp = c->prot_list; vnp != NULL; vnp = vnp->next) { - sfp = (SeqFeatPtr) vnp->data.ptrvalue; - if (sfp != NULL && sfp->data.choice == SEQFEAT_PROT - && sfp->idx.subtype == FEATDEF_mat_peptide_aa - && (prp = sfp->data.value.ptrvalue) != NULL) - { - rval |= RemoveValNodeStringMatch (&(prp->name), scp); - } + case SourceQualChoice_mgcode: + if (biop->org == NULL) { + biop->org = OrgRefNew(); } - break; - case CDSGeneProt_field_mat_peptide_description: - for (vnp = c->prot_list; vnp != NULL; vnp = vnp->next) { - sfp = (SeqFeatPtr) vnp->data.ptrvalue; - if (sfp != NULL && sfp->data.choice == SEQFEAT_PROT - && sfp->idx.subtype == FEATDEF_mat_peptide_aa - && (prp = sfp->data.value.ptrvalue) != NULL - && !StringHasNoText (prp->desc) && DoesStringMatchConstraint(prp->desc, scp)) { - prp->desc = MemFree (prp->desc); - rval = TRUE; - } + if (biop->org->orgname == NULL) { + biop->org->orgname = OrgNameNew(); } + biop->org->orgname->mgcode = scp->data.intvalue; + rval = TRUE; break; - case CDSGeneProt_field_mat_peptide_ec_number: - for (vnp = c->prot_list; vnp != NULL; vnp = vnp->next) { - sfp = (SeqFeatPtr) vnp->data.ptrvalue; - if (sfp != NULL && sfp->data.choice == SEQFEAT_PROT - && sfp->idx.subtype == FEATDEF_mat_peptide_aa - && (prp = sfp->data.value.ptrvalue) != NULL) - { - rval |= RemoveValNodeStringMatch (&(prp->ec), scp); + } + return rval; +} + + +NLM_EXTERN BioseqPtr GetRepresentativeBioseqFromBioseqSet (BioseqSetPtr bssp) +{ + SeqEntryPtr sep; + BioseqPtr bsp = NULL; + + if (bssp == NULL || (bssp->_class != BioseqseqSet_class_segset && bssp->_class != BioseqseqSet_class_nuc_prot)) { + return NULL; + } + sep = bssp->seq_set; + if (sep->data.ptrvalue == NULL) { + bsp = NULL; + } else if (IS_Bioseq(sep)) { + bsp = sep->data.ptrvalue; + } else if (IS_Bioseq_set (sep)) { + bsp = GetRepresentativeBioseqFromBioseqSet (sep->data.ptrvalue); + } + return bsp; +} + + +NLM_EXTERN BioseqPtr GetSequenceForObject (Uint1 choice, Pointer data) +{ + BioseqPtr bsp = NULL; + SeqFeatPtr sfp; + SeqDescrPtr sdp; + ObjValNodePtr ovp; + CGPSetPtr cgp; + ValNodePtr vnp; + + if (data == NULL) return NULL; + + switch (choice) { + case OBJ_BIOSEQ: + bsp = (BioseqPtr) data; + break; + case OBJ_SEQFEAT: + sfp = (SeqFeatPtr) data; + bsp = BioseqFindFromSeqLoc (sfp->location); + break; + case OBJ_SEQDESC: + sdp = (SeqDescrPtr) data; + if (sdp->extended) { + ovp = (ObjValNodePtr) sdp; + if (ovp->idx.parenttype == OBJ_BIOSEQ && ovp->idx.parentptr != NULL) { + bsp = ovp->idx.parentptr; + } else if (ovp->idx.parenttype == OBJ_BIOSEQSET) { + bsp = GetRepresentativeBioseqFromBioseqSet (ovp->idx.parentptr); } } break; - case CDSGeneProt_field_mat_peptide_activity: - for (vnp = c->prot_list; vnp != NULL; vnp = vnp->next) { - sfp = (SeqFeatPtr) vnp->data.ptrvalue; - if (sfp != NULL && sfp->data.choice == SEQFEAT_PROT - && sfp->idx.subtype == FEATDEF_mat_peptide_aa - && (prp = sfp->data.value.ptrvalue) != NULL) - { - rval |= RemoveValNodeStringMatch (&(prp->activity), scp); + case 0: + cgp = (CGPSetPtr) data; + for (vnp = cgp->cds_list; vnp != NULL && bsp == NULL; vnp = vnp->next) { + sfp = vnp->data.ptrvalue; + if (sfp != NULL) { + bsp = BioseqFindFromSeqLoc (sfp->location); } } - break; - case CDSGeneProt_field_mat_peptide_comment: - for (vnp = c->prot_list; vnp != NULL; vnp = vnp->next) { - sfp = (SeqFeatPtr) vnp->data.ptrvalue; - if (sfp != NULL && sfp->idx.subtype == FEATDEF_mat_peptide_aa - && !StringHasNoText (sfp->comment) && DoesStringMatchConstraint(sfp->comment, scp)) - { - sfp->comment = MemFree (sfp->comment); - rval = TRUE; + for (vnp = cgp->mrna_list; vnp != NULL && bsp == NULL; vnp = vnp->next) { + sfp = vnp->data.ptrvalue; + if (sfp != NULL) { + bsp = BioseqFindFromSeqLoc (sfp->location); + } + } + for (vnp = cgp->gene_list; vnp != NULL && bsp == NULL; vnp = vnp->next) { + sfp = vnp->data.ptrvalue; + if (sfp != NULL) { + bsp = BioseqFindFromSeqLoc (sfp->location); } } break; } - return rval; + return bsp; } -static SeqFeatPtr CreateGeneForCGPSet (CGPSetPtr c) +NLM_EXTERN BioSourcePtr GetBioSourceFromObject (Uint1 choice, Pointer data) { - SeqFeatPtr gene = NULL, sfp = NULL; - BioseqPtr bsp; - ValNodePtr vnp; + BioSourcePtr biop = NULL; + SeqDescrPtr sdp; + SeqFeatPtr sfp; + BioseqPtr bsp = NULL; + SeqMgrDescContext context; - if (c == NULL) return NULL; + if (data == NULL) return NULL; - for (vnp = c->cds_list; vnp != NULL && sfp == NULL; vnp = vnp->next) { - sfp = vnp->data.ptrvalue; + switch (choice) + { + case OBJ_SEQDESC: + sdp = (SeqDescrPtr) data; + if (sdp->choice == Seq_descr_source) { + biop = sdp->data.ptrvalue; + } + break; + case OBJ_SEQFEAT: + sfp = (SeqFeatPtr) data; + if (sfp->data.choice == SEQFEAT_BIOSRC) { + biop = sfp->data.value.ptrvalue; + } + break; } - for (vnp = c->mrna_list; vnp != NULL && sfp == NULL; vnp = vnp->next) { - sfp = vnp->data.ptrvalue; + if (biop == NULL) { + bsp = GetSequenceForObject (choice, data); + sdp = SeqMgrGetNextDescriptor (bsp, NULL, Seq_descr_source, &context); + if (sdp != NULL && sdp->choice == Seq_descr_source) { + biop = sdp->data.ptrvalue; + } } - if (sfp != NULL) { - bsp = BioseqFindFromSeqLoc (sfp->location); - if (bsp != NULL) { - gene = CreateNewFeatureOnBioseq (bsp, SEQFEAT_GENE, sfp->location); - if (gene != NULL) { - gene->data.value.ptrvalue = GeneRefNew(); + return biop; +} + + +NLM_EXTERN Uint2 GetEntityIdFromObject (Uint1 choice, Pointer data) +{ + Uint2 entityID = 0; + SeqDescrPtr sdp; + ObjValNodePtr ovp; + SeqFeatPtr sfp; + BioseqPtr bsp; + + if (data == NULL) return 0; + + switch (choice) + { + case OBJ_SEQDESC: + sdp = (SeqDescrPtr) data; + if (sdp->extended) { + ovp = (ObjValNodePtr) sdp; + entityID = ovp->idx.entityID; } - } + break; + case OBJ_SEQFEAT: + sfp = (SeqFeatPtr) data; + entityID = sfp->idx.entityID; + break; + default: + bsp = GetSequenceForObject (choice, data); + if (bsp != NULL) { + entityID = bsp->idx.entityID; + } + break; + } - return gene; + + return entityID; } -static Boolean SetFieldValueInCGPSet (CGPSetPtr c, Uint2 field, StringConstraintPtr scp, CharPtr value, Uint2 existing_text) +/* functions for dealing with CDS-Gene-Prot sets */ +static CharPtr GetFieldValueFromCGPSet (CGPSetPtr c, Uint2 field, StringConstraintPtr scp) { - Boolean rval = FALSE; + CharPtr str = NULL; ValNodePtr vnp; SeqFeatPtr sfp; GeneRefPtr grp; + RnaRefPtr rrp; ProtRefPtr prp; FeatureFieldPtr ffield; - if (c == NULL) return FALSE; + if (c == NULL) return NULL; switch (field) { case CDSGeneProt_field_cds_comment: case CDSGeneProt_field_cds_inference: case CDSGeneProt_field_codon_start: ffield = FeatureFieldFromCDSGeneProtField (field); - for (vnp = c->cds_list; vnp != NULL; vnp = vnp->next) { + for (vnp = c->cds_list; vnp != NULL && str == NULL; vnp = vnp->next) { sfp = (SeqFeatPtr) vnp->data.ptrvalue; - rval |= SetQualOnFeature (sfp, ffield, scp, value, existing_text); + str = GetQualFromFeature (sfp, ffield, scp); } ffield = FeatureFieldFree (ffield); break; case CDSGeneProt_field_gene_locus: - if (c->gene_list == NULL && scp == NULL) { - sfp = CreateGeneForCGPSet (c); - if (sfp != NULL) { - ValNodeAddPointer (&(c->gene_list), 0, sfp); - } + case CDSGeneProt_field_gene_inference: + ffield = FeatureFieldFromCDSGeneProtField (field); + for (vnp = c->gene_list; vnp != NULL && str == NULL; vnp = vnp->next) { + sfp = (SeqFeatPtr) vnp->data.ptrvalue; + str = GetQualFromFeature (sfp, ffield, scp); } - for (vnp = c->gene_list; vnp != NULL; vnp = vnp->next) { + ffield = FeatureFieldFree (ffield); + break; + case CDSGeneProt_field_gene_description: + for (vnp = c->gene_list; vnp != NULL && str == NULL; vnp = vnp->next) { sfp = (SeqFeatPtr) vnp->data.ptrvalue; if (sfp != NULL && sfp->data.choice == SEQFEAT_GENE && (grp = sfp->data.value.ptrvalue) != NULL - && DoesStringMatchConstraint(grp->locus, scp)) + && !StringHasNoText (grp->desc) + && DoesStringMatchConstraint(grp->desc, scp)) { - rval |= SetStringValue ( &(grp->locus), value, existing_text); + str = StringSave (grp->desc); } } break; - case CDSGeneProt_field_gene_description: - case CDSGeneProt_field_gene_inference: - ffield = FeatureFieldFromCDSGeneProtField (field); - for (vnp = c->gene_list; vnp != NULL; vnp = vnp->next) { - sfp = (SeqFeatPtr) vnp->data.ptrvalue; - rval |= SetQualOnFeature (sfp, ffield, scp, value, existing_text); - } - ffield = FeatureFieldFree (ffield); - break; case CDSGeneProt_field_gene_comment: - for (vnp = c->gene_list; vnp != NULL; vnp = vnp->next) { + for (vnp = c->gene_list; vnp != NULL && str == NULL; vnp = vnp->next) { sfp = (SeqFeatPtr) vnp->data.ptrvalue; - if (sfp != NULL && DoesStringMatchConstraint(sfp->comment, scp)) + if (sfp != NULL && !StringHasNoText (sfp->comment) && DoesStringMatchConstraint(sfp->comment, scp)) { - rval |= SetStringValue ( &(sfp->comment), value, existing_text); + str = StringSave (sfp->comment); } } break; case CDSGeneProt_field_gene_allele: - for (vnp = c->gene_list; vnp != NULL; vnp = vnp->next) { + for (vnp = c->gene_list; vnp != NULL && str == NULL; vnp = vnp->next) { sfp = (SeqFeatPtr) vnp->data.ptrvalue; if (sfp != NULL && sfp->data.choice == SEQFEAT_GENE && (grp = sfp->data.value.ptrvalue) != NULL + && !StringHasNoText (grp->allele) && DoesStringMatchConstraint(grp->allele, scp)) { - rval |= SetStringValue (&(grp->allele), value, existing_text); + str = StringSave (grp->allele); } } break; case CDSGeneProt_field_gene_maploc: - for (vnp = c->gene_list; vnp != NULL; vnp = vnp->next) { + for (vnp = c->gene_list; vnp != NULL && str == NULL; vnp = vnp->next) { sfp = (SeqFeatPtr) vnp->data.ptrvalue; if (sfp != NULL && sfp->data.choice == SEQFEAT_GENE && (grp = sfp->data.value.ptrvalue) != NULL + && !StringHasNoText (grp->maploc) && DoesStringMatchConstraint(grp->maploc, scp)) { - rval |= SetStringValue ( &(grp->maploc), value, existing_text); + str = StringSave (grp->maploc); } } break; case CDSGeneProt_field_gene_locus_tag: - for (vnp = c->gene_list; vnp != NULL; vnp = vnp->next) { + for (vnp = c->gene_list; vnp != NULL && str == NULL; vnp = vnp->next) { sfp = (SeqFeatPtr) vnp->data.ptrvalue; if (sfp != NULL && sfp->data.choice == SEQFEAT_GENE && (grp = sfp->data.value.ptrvalue) != NULL + && !StringHasNoText (grp->locus_tag) && DoesStringMatchConstraint(grp->locus_tag, scp)) { - rval |= SetStringValue ( &(grp->locus_tag), value, existing_text); + str = StringSave (grp->locus_tag); } } break; case CDSGeneProt_field_gene_synonym: - for (vnp = c->gene_list; vnp != NULL; vnp = vnp->next) { + for (vnp = c->gene_list; vnp != NULL && str == NULL; vnp = vnp->next) { sfp = (SeqFeatPtr) vnp->data.ptrvalue; if (sfp != NULL && sfp->data.choice == SEQFEAT_GENE && (grp = sfp->data.value.ptrvalue) != NULL) { - rval |= SetStringsInValNodeStringList (&(grp->syn), scp, value, existing_text); + str = GetFirstValNodeStringMatch (grp->syn, scp); } } break; case CDSGeneProt_field_gene_old_locus_tag: - for (vnp = c->gene_list; vnp != NULL; vnp = vnp->next) { + for (vnp = c->gene_list; vnp != NULL && str == NULL; vnp = vnp->next) { sfp = (SeqFeatPtr) vnp->data.ptrvalue; if (sfp != NULL) { - rval |= RemoveGBQualMatch (&(sfp->qual), "old-locus-tag", 0, scp); + str = GetFirstGBQualMatch (sfp->qual, "old-locus-tag", 0, scp); } } break; case CDSGeneProt_field_mrna_product: - for (vnp = c->mrna_list; vnp != NULL; vnp = vnp->next) { + for (vnp = c->mrna_list; vnp != NULL && str == NULL; vnp = vnp->next) { sfp = (SeqFeatPtr) vnp->data.ptrvalue; - rval |= SetRNAProductString (sfp, scp, value, existing_text); + if (sfp != NULL && sfp->data.choice == SEQFEAT_RNA + && (rrp = sfp->data.value.ptrvalue) != NULL + && rrp->ext.choice == 1 + && !StringHasNoText (rrp->ext.value.ptrvalue) + && DoesStringMatchConstraint(rrp->ext.value.ptrvalue, scp)) + { + str = StringSave (rrp->ext.value.ptrvalue); + } } break; case CDSGeneProt_field_mrna_comment: - for (vnp = c->mrna_list; vnp != NULL; vnp = vnp->next) { + for (vnp = c->mrna_list; vnp != NULL && str == NULL; vnp = vnp->next) { sfp = (SeqFeatPtr) vnp->data.ptrvalue; - if (sfp != NULL&& DoesStringMatchConstraint(sfp->comment, scp)) + if (sfp != NULL && !StringHasNoText (sfp->comment) && DoesStringMatchConstraint(sfp->comment, scp)) { - rval |= SetStringValue ( &(sfp->comment), value, existing_text); + str = StringSave (sfp->comment); } } break; case CDSGeneProt_field_prot_name: - for (vnp = c->prot_list; vnp != NULL; vnp = vnp->next) { + for (vnp = c->prot_list; vnp != NULL && str == NULL; vnp = vnp->next) { sfp = (SeqFeatPtr) vnp->data.ptrvalue; if (sfp != NULL && sfp->data.choice == SEQFEAT_PROT && sfp->idx.subtype == FEATDEF_PROT && (prp = sfp->data.value.ptrvalue) != NULL) { - rval |= SetStringsInValNodeStringList (&(prp->name), scp, value, existing_text); + str = GetFirstValNodeStringMatch (prp->name, scp); } } break; case CDSGeneProt_field_prot_description: - for (vnp = c->prot_list; vnp != NULL; vnp = vnp->next) { + for (vnp = c->prot_list; vnp != NULL && str == NULL; vnp = vnp->next) { sfp = (SeqFeatPtr) vnp->data.ptrvalue; if (sfp != NULL && sfp->data.choice == SEQFEAT_PROT && sfp->idx.subtype == FEATDEF_PROT && (prp = sfp->data.value.ptrvalue) != NULL - && DoesStringMatchConstraint(prp->desc, scp)) { - rval |= SetStringValue ( &(prp->desc), value, existing_text); + && !StringHasNoText (prp->desc) && DoesStringMatchConstraint(prp->desc, scp)) { + str = StringSave (prp->desc); } } break; case CDSGeneProt_field_prot_ec_number: - for (vnp = c->prot_list; vnp != NULL; vnp = vnp->next) { + for (vnp = c->prot_list; vnp != NULL && str == NULL; vnp = vnp->next) { sfp = (SeqFeatPtr) vnp->data.ptrvalue; if (sfp != NULL && sfp->data.choice == SEQFEAT_PROT && sfp->idx.subtype == FEATDEF_PROT && (prp = sfp->data.value.ptrvalue) != NULL) { - rval |= SetStringsInValNodeStringList (&(prp->ec), scp, value, existing_text); + str = GetFirstValNodeStringMatch (prp->ec, scp); } } break; case CDSGeneProt_field_prot_activity: - for (vnp = c->prot_list; vnp != NULL; vnp = vnp->next) { + for (vnp = c->prot_list; vnp != NULL && str == NULL; vnp = vnp->next) { sfp = (SeqFeatPtr) vnp->data.ptrvalue; if (sfp != NULL && sfp->data.choice == SEQFEAT_PROT && sfp->idx.subtype == FEATDEF_PROT && (prp = sfp->data.value.ptrvalue) != NULL) { - rval |= SetStringsInValNodeStringList (&(prp->activity), scp, value, existing_text); + str = GetFirstValNodeStringMatch (prp->activity, scp); } } break; case CDSGeneProt_field_prot_comment: - for (vnp = c->prot_list; vnp != NULL; vnp = vnp->next) { + for (vnp = c->prot_list; vnp != NULL && str == NULL; vnp = vnp->next) { sfp = (SeqFeatPtr) vnp->data.ptrvalue; if (sfp != NULL && sfp->idx.subtype == FEATDEF_PROT - && DoesStringMatchConstraint(sfp->comment, scp)) + && !StringHasNoText (sfp->comment) && DoesStringMatchConstraint(sfp->comment, scp)) { - rval |= SetStringValue ( &(sfp->comment), value, existing_text); + str = StringSave (sfp->comment); } } break; case CDSGeneProt_field_mat_peptide_name: - for (vnp = c->prot_list; vnp != NULL; vnp = vnp->next) { + for (vnp = c->prot_list; vnp != NULL && str == NULL; vnp = vnp->next) { sfp = (SeqFeatPtr) vnp->data.ptrvalue; if (sfp != NULL && sfp->data.choice == SEQFEAT_PROT && sfp->idx.subtype == FEATDEF_mat_peptide_aa && (prp = sfp->data.value.ptrvalue) != NULL) { - rval |= SetStringsInValNodeStringList (&(prp->name), scp, value, existing_text); + str = GetFirstValNodeStringMatch (prp->name, scp); } } break; case CDSGeneProt_field_mat_peptide_description: - for (vnp = c->prot_list; vnp != NULL; vnp = vnp->next) { + for (vnp = c->prot_list; vnp != NULL && str == NULL; vnp = vnp->next) { sfp = (SeqFeatPtr) vnp->data.ptrvalue; if (sfp != NULL && sfp->data.choice == SEQFEAT_PROT && sfp->idx.subtype == FEATDEF_mat_peptide_aa && (prp = sfp->data.value.ptrvalue) != NULL - && DoesStringMatchConstraint(prp->desc, scp)) { - rval |= SetStringValue ( &(prp->desc), value, existing_text); + && !StringHasNoText (prp->desc) && DoesStringMatchConstraint(prp->desc, scp)) { + str = StringSave (prp->desc); } } break; case CDSGeneProt_field_mat_peptide_ec_number: - for (vnp = c->prot_list; vnp != NULL; vnp = vnp->next) { + for (vnp = c->prot_list; vnp != NULL && str == NULL; vnp = vnp->next) { sfp = (SeqFeatPtr) vnp->data.ptrvalue; if (sfp != NULL && sfp->data.choice == SEQFEAT_PROT && sfp->idx.subtype == FEATDEF_mat_peptide_aa && (prp = sfp->data.value.ptrvalue) != NULL) { - rval |= SetStringsInValNodeStringList (&(prp->ec), scp, value, existing_text); + str = GetFirstValNodeStringMatch (prp->ec, scp); } } break; case CDSGeneProt_field_mat_peptide_activity: - for (vnp = c->prot_list; vnp != NULL; vnp = vnp->next) { + for (vnp = c->prot_list; vnp != NULL && str == NULL; vnp = vnp->next) { sfp = (SeqFeatPtr) vnp->data.ptrvalue; if (sfp != NULL && sfp->data.choice == SEQFEAT_PROT && sfp->idx.subtype == FEATDEF_mat_peptide_aa && (prp = sfp->data.value.ptrvalue) != NULL) { - rval |= SetStringsInValNodeStringList (&(prp->activity), scp, value, existing_text); + str = GetFirstValNodeStringMatch (prp->activity, scp); } } break; case CDSGeneProt_field_mat_peptide_comment: - for (vnp = c->prot_list; vnp != NULL; vnp = vnp->next) { + for (vnp = c->prot_list; vnp != NULL && str == NULL; vnp = vnp->next) { sfp = (SeqFeatPtr) vnp->data.ptrvalue; if (sfp != NULL && sfp->idx.subtype == FEATDEF_mat_peptide_aa - && DoesStringMatchConstraint(sfp->comment, scp)) + && !StringHasNoText (sfp->comment) && DoesStringMatchConstraint(sfp->comment, scp)) { - rval |= SetStringValue ( &(sfp->comment), value, existing_text); + str = StringSave (sfp->comment); } } break; } - return rval; + return str; } -static MolInfoPtr GetMolInfoForBioseq (BioseqPtr bsp) +static Boolean RemoveFieldValueFromCGPSet (CGPSetPtr c, Uint2 field, StringConstraintPtr scp) { - MolInfoPtr m = NULL; - SeqDescrPtr sdp; - - if (bsp == NULL) return NULL; - sdp = bsp->descr; - while (sdp != NULL && sdp->choice != Seq_descr_molinfo) { - sdp = sdp->next; - } - if (sdp != NULL) { - m = (MolInfoPtr) sdp->data.ptrvalue; - } - return m; -} + Boolean rval = FALSE; + ValNodePtr vnp; + SeqFeatPtr sfp; + GeneRefPtr grp; + RnaRefPtr rrp; + ProtRefPtr prp; + FeatureFieldPtr ffield; - -static CharPtr GetSequenceQualFromBioseq (BioseqPtr bsp, ValNodePtr field) -{ - CharPtr rval = NULL; - MolInfoPtr m; - - if (bsp == NULL || field == NULL) return NULL; - - switch (field->choice) { - case MolinfoField_molecule: - m = GetMolInfoForBioseq (bsp); - if (m != NULL) { - rval = BiomolNameFromBiomol (m->biomol); + if (c == NULL) return FALSE; + switch (field) { + case CDSGeneProt_field_cds_comment: + case CDSGeneProt_field_cds_inference: + case CDSGeneProt_field_codon_start: + ffield = FeatureFieldFromCDSGeneProtField (field); + for (vnp = c->cds_list; vnp != NULL; vnp = vnp->next) { + sfp = (SeqFeatPtr) vnp->data.ptrvalue; + rval |= RemoveQualFromFeature (sfp, ffield, scp); } + ffield = FeatureFieldFree (ffield); break; - case MolinfoField_technique: - m = GetMolInfoForBioseq (bsp); - if (m != NULL) { - rval = TechNameFromTech (m->tech); + case CDSGeneProt_field_gene_locus: + case CDSGeneProt_field_gene_inference: + ffield = FeatureFieldFromCDSGeneProtField (field); + for (vnp = c->gene_list; vnp != NULL; vnp = vnp->next) { + sfp = (SeqFeatPtr) vnp->data.ptrvalue; + rval |= RemoveQualFromFeature (sfp, ffield, scp); } + ffield = FeatureFieldFree (ffield); break; - case MolinfoField_completedness: - m = GetMolInfoForBioseq (bsp); - if (m != NULL) { - rval = CompletenessNameFromCompleteness (m->completeness); + case CDSGeneProt_field_gene_description: + for (vnp = c->gene_list; vnp != NULL; vnp = vnp->next) { + sfp = (SeqFeatPtr) vnp->data.ptrvalue; + if (sfp != NULL && sfp->data.choice == SEQFEAT_GENE + && (grp = sfp->data.value.ptrvalue) != NULL + && !StringHasNoText (grp->desc) + && DoesStringMatchConstraint(grp->desc, scp)) + { + grp->desc = MemFree(grp->desc); + rval = TRUE; + } } break; - case MolinfoField_mol_class: - rval = MolNameFromMol (bsp->mol); - break; - case MolinfoField_topology: - rval = TopologyNameFromTopology (bsp->topology); + case CDSGeneProt_field_gene_comment: + for (vnp = c->gene_list; vnp != NULL; vnp = vnp->next) { + sfp = (SeqFeatPtr) vnp->data.ptrvalue; + if (sfp != NULL && !StringHasNoText (sfp->comment) && DoesStringMatchConstraint(sfp->comment, scp)) + { + sfp->comment = MemFree (sfp->comment); + rval = TRUE; + } + } break; - case MolinfoField_strand: - rval = StrandNameFromStrand (bsp->strand); + case CDSGeneProt_field_gene_allele: + for (vnp = c->gene_list; vnp != NULL; vnp = vnp->next) { + sfp = (SeqFeatPtr) vnp->data.ptrvalue; + if (sfp != NULL && sfp->data.choice == SEQFEAT_GENE + && (grp = sfp->data.value.ptrvalue) != NULL + && !StringHasNoText (grp->allele) + && DoesStringMatchConstraint(grp->allele, scp)) + { + grp->allele = MemFree (grp->allele); + rval = TRUE; + } + } break; - } - if (rval != NULL) rval = StringSave (rval); - return rval; -} - - -static Boolean RemoveSequenceQualFromBioseq (BioseqPtr bsp, ValNodePtr field) -{ - MolInfoPtr m; - Boolean rval = FALSE; - - if (bsp == NULL || field == NULL) return FALSE; - - switch (field->choice) { - case MolinfoField_molecule: - m = GetMolInfoForBioseq (bsp); - if (m != NULL) { - m->biomol = 0; - rval = TRUE; + case CDSGeneProt_field_gene_maploc: + for (vnp = c->gene_list; vnp != NULL; vnp = vnp->next) { + sfp = (SeqFeatPtr) vnp->data.ptrvalue; + if (sfp != NULL && sfp->data.choice == SEQFEAT_GENE + && (grp = sfp->data.value.ptrvalue) != NULL + && !StringHasNoText (grp->maploc) + && DoesStringMatchConstraint(grp->maploc, scp)) + { + grp->maploc = MemFree (grp->maploc); + rval = TRUE; + } } break; - case MolinfoField_technique: - m = GetMolInfoForBioseq (bsp); - if (m != NULL) { - m->tech = 0; - rval = TRUE; + case CDSGeneProt_field_gene_locus_tag: + for (vnp = c->gene_list; vnp != NULL; vnp = vnp->next) { + sfp = (SeqFeatPtr) vnp->data.ptrvalue; + if (sfp != NULL && sfp->data.choice == SEQFEAT_GENE + && (grp = sfp->data.value.ptrvalue) != NULL + && !StringHasNoText (grp->locus_tag) + && DoesStringMatchConstraint(grp->locus_tag, scp)) + { + grp->locus_tag = MemFree (grp->locus_tag); + rval = TRUE; + } } break; - case MolinfoField_completedness: - m = GetMolInfoForBioseq (bsp); - if (m != NULL) { - m->completeness = 0; - rval = TRUE; + case CDSGeneProt_field_gene_synonym: + for (vnp = c->gene_list; vnp != NULL; vnp = vnp->next) { + sfp = (SeqFeatPtr) vnp->data.ptrvalue; + if (sfp != NULL && sfp->data.choice == SEQFEAT_GENE + && (grp = sfp->data.value.ptrvalue) != NULL) + { + rval |= RemoveValNodeStringMatch (&(grp->syn), scp); + } } break; - case MolinfoField_mol_class: - bsp->mol = 0; - rval = TRUE; + case CDSGeneProt_field_gene_old_locus_tag: + for (vnp = c->gene_list; vnp != NULL; vnp = vnp->next) { + sfp = (SeqFeatPtr) vnp->data.ptrvalue; + if (sfp != NULL) { + rval |= RemoveGBQualMatch (&(sfp->qual), "old-locus-tag", 0, scp); + } + } break; - case MolinfoField_topology: - bsp->topology = 0; - rval = TRUE; + case CDSGeneProt_field_mrna_product: + for (vnp = c->mrna_list; vnp != NULL; vnp = vnp->next) { + sfp = (SeqFeatPtr) vnp->data.ptrvalue; + if (sfp != NULL && sfp->data.choice == SEQFEAT_RNA + && (rrp = sfp->data.value.ptrvalue) != NULL + && rrp->ext.choice == 1 + && !StringHasNoText (rrp->ext.value.ptrvalue) + && DoesStringMatchConstraint(rrp->ext.value.ptrvalue, scp)) + { + rrp->ext.value.ptrvalue = MemFree (rrp->ext.value.ptrvalue); + rrp->ext.choice = 0; + rval = TRUE; + } + } break; - case MolinfoField_strand: - bsp->strand = 0; - rval = TRUE; + case CDSGeneProt_field_mrna_comment: + for (vnp = c->mrna_list; vnp != NULL; vnp = vnp->next) { + sfp = (SeqFeatPtr) vnp->data.ptrvalue; + if (sfp != NULL && !StringHasNoText (sfp->comment) && DoesStringMatchConstraint(sfp->comment, scp)) + { + sfp->comment = MemFree (sfp->comment); + rval = TRUE; + } + } break; - } - return rval; -} - - -static MolInfoPtr AddMolInfoToBioseq (BioseqPtr bsp) -{ - SeqDescrPtr sdp; - MolInfoPtr m; - - sdp = CreateNewDescriptorOnBioseq (bsp, Seq_descr_molinfo); - m = MolInfoNew (); - sdp->data.ptrvalue = m; - return m; -} - - -static Boolean SetSequenceQualOnBioseq (BioseqPtr bsp, ValNodePtr field) -{ - MolInfoPtr m; - Boolean rval = FALSE; - - if (bsp == NULL || field == NULL) return FALSE; - - switch (field->choice) { - case MolinfoField_molecule: - m = GetMolInfoForBioseq (bsp); - if (m == NULL) { - m = AddMolInfoToBioseq (bsp); + case CDSGeneProt_field_prot_name: + for (vnp = c->prot_list; vnp != NULL; vnp = vnp->next) { + sfp = (SeqFeatPtr) vnp->data.ptrvalue; + if (sfp != NULL && sfp->data.choice == SEQFEAT_PROT + && sfp->idx.subtype == FEATDEF_PROT + && (prp = sfp->data.value.ptrvalue) != NULL) + { + rval |= RemoveValNodeStringMatch (&(prp->name), scp); + } } - m->biomol = BiomolFromMoleculeType (field->data.intvalue); - rval = TRUE; break; - case MolinfoField_technique: - m = GetMolInfoForBioseq (bsp); - if (m == NULL) { - m = AddMolInfoToBioseq (bsp); + case CDSGeneProt_field_prot_description: + for (vnp = c->prot_list; vnp != NULL; vnp = vnp->next) { + sfp = (SeqFeatPtr) vnp->data.ptrvalue; + if (sfp != NULL && sfp->data.choice == SEQFEAT_PROT + && sfp->idx.subtype == FEATDEF_PROT + && (prp = sfp->data.value.ptrvalue) != NULL + && !StringHasNoText (prp->desc) && DoesStringMatchConstraint(prp->desc, scp)) { + prp->desc = MemFree (prp->desc); + rval = TRUE; + } } - m->tech = TechFromTechniqueType (field->data.intvalue); - rval = TRUE; break; - case MolinfoField_completedness: - m = GetMolInfoForBioseq (bsp); - if (m == NULL) { - m = AddMolInfoToBioseq (bsp); + case CDSGeneProt_field_prot_ec_number: + for (vnp = c->prot_list; vnp != NULL; vnp = vnp->next) { + sfp = (SeqFeatPtr) vnp->data.ptrvalue; + if (sfp != NULL && sfp->data.choice == SEQFEAT_PROT + && sfp->idx.subtype == FEATDEF_PROT + && (prp = sfp->data.value.ptrvalue) != NULL) + { + rval |= RemoveValNodeStringMatch (&(prp->ec), scp); + } } - m->completeness = CompletenessFromCompletednessType (field->data.intvalue); - rval = TRUE; break; - case MolinfoField_mol_class: - bsp->mol = MolFromMoleculeClassType (field->data.intvalue); - rval = TRUE; + case CDSGeneProt_field_prot_activity: + for (vnp = c->prot_list; vnp != NULL; vnp = vnp->next) { + sfp = (SeqFeatPtr) vnp->data.ptrvalue; + if (sfp != NULL && sfp->data.choice == SEQFEAT_PROT + && sfp->idx.subtype == FEATDEF_PROT + && (prp = sfp->data.value.ptrvalue) != NULL) + { + rval |= RemoveValNodeStringMatch (&(prp->activity), scp); + } + } break; - case MolinfoField_topology: - bsp->topology = TopologyFromTopologyType (field->data.intvalue); - rval = TRUE; + case CDSGeneProt_field_prot_comment: + for (vnp = c->prot_list; vnp != NULL; vnp = vnp->next) { + sfp = (SeqFeatPtr) vnp->data.ptrvalue; + if (sfp != NULL && sfp->idx.subtype == FEATDEF_PROT + && !StringHasNoText (sfp->comment) && DoesStringMatchConstraint(sfp->comment, scp)) + { + sfp->comment = MemFree (sfp->comment); + rval = TRUE; + } + } break; - case MolinfoField_strand: - bsp->strand = StrandFromStrandType (field->data.intvalue); - rval = TRUE; + case CDSGeneProt_field_mat_peptide_name: + for (vnp = c->prot_list; vnp != NULL; vnp = vnp->next) { + sfp = (SeqFeatPtr) vnp->data.ptrvalue; + if (sfp != NULL && sfp->data.choice == SEQFEAT_PROT + && sfp->idx.subtype == FEATDEF_mat_peptide_aa + && (prp = sfp->data.value.ptrvalue) != NULL) + { + rval |= RemoveValNodeStringMatch (&(prp->name), scp); + } + } + break; + case CDSGeneProt_field_mat_peptide_description: + for (vnp = c->prot_list; vnp != NULL; vnp = vnp->next) { + sfp = (SeqFeatPtr) vnp->data.ptrvalue; + if (sfp != NULL && sfp->data.choice == SEQFEAT_PROT + && sfp->idx.subtype == FEATDEF_mat_peptide_aa + && (prp = sfp->data.value.ptrvalue) != NULL + && !StringHasNoText (prp->desc) && DoesStringMatchConstraint(prp->desc, scp)) { + prp->desc = MemFree (prp->desc); + rval = TRUE; + } + } + break; + case CDSGeneProt_field_mat_peptide_ec_number: + for (vnp = c->prot_list; vnp != NULL; vnp = vnp->next) { + sfp = (SeqFeatPtr) vnp->data.ptrvalue; + if (sfp != NULL && sfp->data.choice == SEQFEAT_PROT + && sfp->idx.subtype == FEATDEF_mat_peptide_aa + && (prp = sfp->data.value.ptrvalue) != NULL) + { + rval |= RemoveValNodeStringMatch (&(prp->ec), scp); + } + } + break; + case CDSGeneProt_field_mat_peptide_activity: + for (vnp = c->prot_list; vnp != NULL; vnp = vnp->next) { + sfp = (SeqFeatPtr) vnp->data.ptrvalue; + if (sfp != NULL && sfp->data.choice == SEQFEAT_PROT + && sfp->idx.subtype == FEATDEF_mat_peptide_aa + && (prp = sfp->data.value.ptrvalue) != NULL) + { + rval |= RemoveValNodeStringMatch (&(prp->activity), scp); + } + } + break; + case CDSGeneProt_field_mat_peptide_comment: + for (vnp = c->prot_list; vnp != NULL; vnp = vnp->next) { + sfp = (SeqFeatPtr) vnp->data.ptrvalue; + if (sfp != NULL && sfp->idx.subtype == FEATDEF_mat_peptide_aa + && !StringHasNoText (sfp->comment) && DoesStringMatchConstraint(sfp->comment, scp)) + { + sfp->comment = MemFree (sfp->comment); + rval = TRUE; + } + } break; } return rval; } -static CharPtr GetGenomeProjectIdFromBioseq (BioseqPtr bsp, StringConstraintPtr scp) +static SeqFeatPtr CreateGeneForCGPSet (CGPSetPtr c) { - SeqDescrPtr sdp; - SeqMgrDescContext context; - Char buf[50]; - UserObjectPtr uop; - UserFieldPtr ufp; + SeqFeatPtr gene = NULL, sfp = NULL; + BioseqPtr bsp; + ValNodePtr vnp; - sdp = SeqMgrGetNextDescriptor (bsp, NULL, Seq_descr_user, &context); - while (sdp != NULL) { - uop = (UserObjectPtr) sdp->data.ptrvalue; - if (uop != NULL && uop->type != NULL && StringCmp (uop->type->str, "GenomeProjectsDB") == 0) - { - ufp = uop->data; - while (ufp != NULL) { - if (ufp->label != NULL - && StringCmp (ufp->label->str, "ProjectID") == 0 - && ufp->choice == 2) { - sprintf (buf, "%d", ufp->data.intvalue); - if (IsStringConstraintEmpty (scp) || DoesStringMatchConstraint (buf, scp)) { - return StringSave (buf); - } - } - ufp = ufp->next; + if (c == NULL) return NULL; + + for (vnp = c->cds_list; vnp != NULL && sfp == NULL; vnp = vnp->next) { + sfp = vnp->data.ptrvalue; + } + for (vnp = c->mrna_list; vnp != NULL && sfp == NULL; vnp = vnp->next) { + sfp = vnp->data.ptrvalue; + } + if (sfp != NULL) { + bsp = BioseqFindFromSeqLoc (sfp->location); + if (bsp != NULL) { + gene = CreateNewFeatureOnBioseq (bsp, SEQFEAT_GENE, sfp->location); + if (gene != NULL) { + gene->data.value.ptrvalue = GeneRefNew(); } } - sdp = SeqMgrGetNextDescriptor (bsp, sdp, Seq_descr_user, &context); } - - return NULL; + return gene; } -static Boolean RemoveGenomeProjectIdFromBioseq (BioseqPtr bsp, StringConstraintPtr scp) +static Boolean SetFieldValueInCGPSet (CGPSetPtr c, Uint2 field, StringConstraintPtr scp, CharPtr value, Uint2 existing_text) { - SeqDescrPtr sdp; - SeqMgrDescContext context; - Char buf[50]; - UserObjectPtr uop; - UserFieldPtr ufp; - ObjValNodePtr ovn; - Boolean rval = FALSE; - - sdp = SeqMgrGetNextDescriptor (bsp, NULL, Seq_descr_user, &context); - while (sdp != NULL) { - uop = (UserObjectPtr) sdp->data.ptrvalue; - if (uop != NULL && uop->type != NULL && StringCmp (uop->type->str, "GenomeProjectsDB") == 0) - { - ufp = uop->data; - while (ufp != NULL) { - if (ufp->label != NULL - && StringCmp (ufp->label->str, "ProjectID") == 0 - && ufp->choice == 2) { - sprintf (buf, "%d", ufp->data.intvalue); - if (IsStringConstraintEmpty (scp) || DoesStringMatchConstraint (buf, scp)) { - if (sdp->extended != 0) { - ovn = (ObjValNodePtr) sdp; - ovn->idx.deleteme = TRUE; - rval = TRUE; - } - } + Boolean rval = FALSE; + ValNodePtr vnp; + SeqFeatPtr sfp; + GeneRefPtr grp; + ProtRefPtr prp; + FeatureFieldPtr ffield; + + if (c == NULL) return FALSE; + switch (field) { + case CDSGeneProt_field_cds_comment: + case CDSGeneProt_field_cds_inference: + case CDSGeneProt_field_codon_start: + ffield = FeatureFieldFromCDSGeneProtField (field); + for (vnp = c->cds_list; vnp != NULL; vnp = vnp->next) { + sfp = (SeqFeatPtr) vnp->data.ptrvalue; + rval |= SetQualOnFeature (sfp, ffield, scp, value, existing_text); + } + ffield = FeatureFieldFree (ffield); + break; + case CDSGeneProt_field_gene_locus: + if (c->gene_list == NULL && scp == NULL) { + sfp = CreateGeneForCGPSet (c); + if (sfp != NULL) { + ValNodeAddPointer (&(c->gene_list), 0, sfp); } - ufp = ufp->next; } - } - sdp = SeqMgrGetNextDescriptor (bsp, sdp, Seq_descr_user, &context); - } - return rval; -} - - -static Boolean SetGenomeProjectIdOnBioseq (BioseqPtr bsp, StringConstraintPtr scp, CharPtr value, Uint2 existing_text) -{ - SeqDescrPtr sdp; - SeqMgrDescContext context; - Char buf[50]; - CharPtr tmp; - UserObjectPtr uop; - UserFieldPtr ufp; - Boolean rval = FALSE; - - if (bsp == NULL || !IsAllDigits (value)) { - return FALSE; - } - sdp = SeqMgrGetNextDescriptor (bsp, NULL, Seq_descr_user, &context); - while (sdp != NULL) { - uop = (UserObjectPtr) sdp->data.ptrvalue; - if (uop != NULL && uop->type != NULL && StringCmp (uop->type->str, "GenomeProjectsDB") == 0) - { - ufp = uop->data; - while (ufp != NULL) { - if (ufp->label != NULL - && StringCmp (ufp->label->str, "ProjectID") == 0 - && ufp->choice == 2) { - sprintf (buf, "%d", ufp->data.intvalue); - if (IsStringConstraintEmpty (scp) || DoesStringMatchConstraint (buf, scp)) { - tmp = StringSave (buf); - if (SetStringValue (&tmp, value, existing_text) && IsAllDigits (tmp)) { - ufp->data.intvalue = atoi (tmp); - rval = TRUE; - } - tmp = MemFree (tmp); - } + for (vnp = c->gene_list; vnp != NULL; vnp = vnp->next) { + sfp = (SeqFeatPtr) vnp->data.ptrvalue; + if (sfp != NULL && sfp->data.choice == SEQFEAT_GENE + && (grp = sfp->data.value.ptrvalue) != NULL + && DoesStringMatchConstraint(grp->locus, scp)) + { + rval |= SetStringValue ( &(grp->locus), value, existing_text); } - ufp = ufp->next; } - } - sdp = SeqMgrGetNextDescriptor (bsp, sdp, Seq_descr_user, &context); - } - if (!rval && IsStringConstraintEmpty (scp)) { - sdp = CreateNewDescriptorOnBioseq (bsp, Seq_descr_user); - uop = CreateGenomeProjectsDBUserObject (); - AddIDsToGenomeProjectsDBUserObject (uop, atoi (value), 0); - sdp->data.ptrvalue = uop; - rval = TRUE; + break; + case CDSGeneProt_field_gene_description: + case CDSGeneProt_field_gene_inference: + ffield = FeatureFieldFromCDSGeneProtField (field); + for (vnp = c->gene_list; vnp != NULL; vnp = vnp->next) { + sfp = (SeqFeatPtr) vnp->data.ptrvalue; + rval |= SetQualOnFeature (sfp, ffield, scp, value, existing_text); + } + ffield = FeatureFieldFree (ffield); + break; + case CDSGeneProt_field_gene_comment: + for (vnp = c->gene_list; vnp != NULL; vnp = vnp->next) { + sfp = (SeqFeatPtr) vnp->data.ptrvalue; + if (sfp != NULL && DoesStringMatchConstraint(sfp->comment, scp)) + { + rval |= SetStringValue ( &(sfp->comment), value, existing_text); + } + } + break; + case CDSGeneProt_field_gene_allele: + for (vnp = c->gene_list; vnp != NULL; vnp = vnp->next) { + sfp = (SeqFeatPtr) vnp->data.ptrvalue; + if (sfp != NULL && sfp->data.choice == SEQFEAT_GENE + && (grp = sfp->data.value.ptrvalue) != NULL + && DoesStringMatchConstraint(grp->allele, scp)) + { + rval |= SetStringValue (&(grp->allele), value, existing_text); + } + } + break; + case CDSGeneProt_field_gene_maploc: + for (vnp = c->gene_list; vnp != NULL; vnp = vnp->next) { + sfp = (SeqFeatPtr) vnp->data.ptrvalue; + if (sfp != NULL && sfp->data.choice == SEQFEAT_GENE + && (grp = sfp->data.value.ptrvalue) != NULL + && DoesStringMatchConstraint(grp->maploc, scp)) + { + rval |= SetStringValue ( &(grp->maploc), value, existing_text); + } + } + break; + case CDSGeneProt_field_gene_locus_tag: + for (vnp = c->gene_list; vnp != NULL; vnp = vnp->next) { + sfp = (SeqFeatPtr) vnp->data.ptrvalue; + if (sfp != NULL && sfp->data.choice == SEQFEAT_GENE + && (grp = sfp->data.value.ptrvalue) != NULL + && DoesStringMatchConstraint(grp->locus_tag, scp)) + { + rval |= SetStringValue ( &(grp->locus_tag), value, existing_text); + } + } + break; + case CDSGeneProt_field_gene_synonym: + for (vnp = c->gene_list; vnp != NULL; vnp = vnp->next) { + sfp = (SeqFeatPtr) vnp->data.ptrvalue; + if (sfp != NULL && sfp->data.choice == SEQFEAT_GENE + && (grp = sfp->data.value.ptrvalue) != NULL) + { + rval |= SetStringsInValNodeStringList (&(grp->syn), scp, value, existing_text); + } + } + break; + case CDSGeneProt_field_gene_old_locus_tag: + for (vnp = c->gene_list; vnp != NULL; vnp = vnp->next) { + sfp = (SeqFeatPtr) vnp->data.ptrvalue; + if (sfp != NULL) { + rval |= RemoveGBQualMatch (&(sfp->qual), "old-locus-tag", 0, scp); + } + } + break; + case CDSGeneProt_field_mrna_product: + for (vnp = c->mrna_list; vnp != NULL; vnp = vnp->next) { + sfp = (SeqFeatPtr) vnp->data.ptrvalue; + rval |= SetRNAProductString (sfp, scp, value, existing_text); + } + break; + case CDSGeneProt_field_mrna_comment: + for (vnp = c->mrna_list; vnp != NULL; vnp = vnp->next) { + sfp = (SeqFeatPtr) vnp->data.ptrvalue; + if (sfp != NULL&& DoesStringMatchConstraint(sfp->comment, scp)) + { + rval |= SetStringValue ( &(sfp->comment), value, existing_text); + } + } + break; + case CDSGeneProt_field_prot_name: + for (vnp = c->prot_list; vnp != NULL; vnp = vnp->next) { + sfp = (SeqFeatPtr) vnp->data.ptrvalue; + if (sfp != NULL && sfp->data.choice == SEQFEAT_PROT + && sfp->idx.subtype == FEATDEF_PROT + && (prp = sfp->data.value.ptrvalue) != NULL) + { + rval |= SetStringsInValNodeStringList (&(prp->name), scp, value, existing_text); + } + } + break; + case CDSGeneProt_field_prot_description: + for (vnp = c->prot_list; vnp != NULL; vnp = vnp->next) { + sfp = (SeqFeatPtr) vnp->data.ptrvalue; + if (sfp != NULL && sfp->data.choice == SEQFEAT_PROT + && sfp->idx.subtype == FEATDEF_PROT + && (prp = sfp->data.value.ptrvalue) != NULL + && DoesStringMatchConstraint(prp->desc, scp)) { + rval |= SetStringValue ( &(prp->desc), value, existing_text); + } + } + break; + case CDSGeneProt_field_prot_ec_number: + for (vnp = c->prot_list; vnp != NULL; vnp = vnp->next) { + sfp = (SeqFeatPtr) vnp->data.ptrvalue; + if (sfp != NULL && sfp->data.choice == SEQFEAT_PROT + && sfp->idx.subtype == FEATDEF_PROT + && (prp = sfp->data.value.ptrvalue) != NULL) + { + rval |= SetStringsInValNodeStringList (&(prp->ec), scp, value, existing_text); + } + } + break; + case CDSGeneProt_field_prot_activity: + for (vnp = c->prot_list; vnp != NULL; vnp = vnp->next) { + sfp = (SeqFeatPtr) vnp->data.ptrvalue; + if (sfp != NULL && sfp->data.choice == SEQFEAT_PROT + && sfp->idx.subtype == FEATDEF_PROT + && (prp = sfp->data.value.ptrvalue) != NULL) + { + rval |= SetStringsInValNodeStringList (&(prp->activity), scp, value, existing_text); + } + } + break; + case CDSGeneProt_field_prot_comment: + for (vnp = c->prot_list; vnp != NULL; vnp = vnp->next) { + sfp = (SeqFeatPtr) vnp->data.ptrvalue; + if (sfp != NULL && sfp->idx.subtype == FEATDEF_PROT + && DoesStringMatchConstraint(sfp->comment, scp)) + { + rval |= SetStringValue ( &(sfp->comment), value, existing_text); + } + } + break; + case CDSGeneProt_field_mat_peptide_name: + for (vnp = c->prot_list; vnp != NULL; vnp = vnp->next) { + sfp = (SeqFeatPtr) vnp->data.ptrvalue; + if (sfp != NULL && sfp->data.choice == SEQFEAT_PROT + && sfp->idx.subtype == FEATDEF_mat_peptide_aa + && (prp = sfp->data.value.ptrvalue) != NULL) + { + rval |= SetStringsInValNodeStringList (&(prp->name), scp, value, existing_text); + } + } + break; + case CDSGeneProt_field_mat_peptide_description: + for (vnp = c->prot_list; vnp != NULL; vnp = vnp->next) { + sfp = (SeqFeatPtr) vnp->data.ptrvalue; + if (sfp != NULL && sfp->data.choice == SEQFEAT_PROT + && sfp->idx.subtype == FEATDEF_mat_peptide_aa + && (prp = sfp->data.value.ptrvalue) != NULL + && DoesStringMatchConstraint(prp->desc, scp)) { + rval |= SetStringValue ( &(prp->desc), value, existing_text); + } + } + break; + case CDSGeneProt_field_mat_peptide_ec_number: + for (vnp = c->prot_list; vnp != NULL; vnp = vnp->next) { + sfp = (SeqFeatPtr) vnp->data.ptrvalue; + if (sfp != NULL && sfp->data.choice == SEQFEAT_PROT + && sfp->idx.subtype == FEATDEF_mat_peptide_aa + && (prp = sfp->data.value.ptrvalue) != NULL) + { + rval |= SetStringsInValNodeStringList (&(prp->ec), scp, value, existing_text); + } + } + break; + case CDSGeneProt_field_mat_peptide_activity: + for (vnp = c->prot_list; vnp != NULL; vnp = vnp->next) { + sfp = (SeqFeatPtr) vnp->data.ptrvalue; + if (sfp != NULL && sfp->data.choice == SEQFEAT_PROT + && sfp->idx.subtype == FEATDEF_mat_peptide_aa + && (prp = sfp->data.value.ptrvalue) != NULL) + { + rval |= SetStringsInValNodeStringList (&(prp->activity), scp, value, existing_text); + } + } + break; + case CDSGeneProt_field_mat_peptide_comment: + for (vnp = c->prot_list; vnp != NULL; vnp = vnp->next) { + sfp = (SeqFeatPtr) vnp->data.ptrvalue; + if (sfp != NULL && sfp->idx.subtype == FEATDEF_mat_peptide_aa + && DoesStringMatchConstraint(sfp->comment, scp)) + { + rval |= SetStringValue ( &(sfp->comment), value, existing_text); + } + } + break; } return rval; } -static Boolean SetTextDescriptor (SeqDescrPtr sdp, StringConstraintPtr scp, CharPtr value, Uint2 existing_text) +static Boolean SortFieldInCGPSet (CGPSetPtr c, Uint2 field, Uint2 order) { - Boolean rval = FALSE; - CharPtr cp; - ObjValNodePtr ovp; - Boolean was_empty; + ValNodePtr vnp; + SeqFeatPtr sfp; + Boolean rval = FALSE; - if (sdp == NULL) { + if (c == NULL) { return FALSE; } - - if (IsStringConstraintEmpty (scp) || DoesStringMatchConstraint (sdp->data.ptrvalue, scp)) { - if (StringHasNoText (sdp->data.ptrvalue)) { - was_empty = TRUE; - } else { - was_empty = FALSE; - } - cp = sdp->data.ptrvalue; - if (SetStringValue (&cp, value, existing_text)) { - rval = TRUE; - } - sdp->data.ptrvalue = cp; - if (was_empty) { - ovp = (ObjValNodePtr) sdp; - ovp->idx.deleteme = FALSE; + if (field == CDSGeneProt_field_prot_name) { + for (vnp = c->prot_list; vnp != NULL; vnp = vnp->next) { + sfp = (SeqFeatPtr) vnp->data.ptrvalue; + if (sfp != NULL && sfp->idx.subtype == FEATDEF_PROT) { + rval |= SortProtNames (sfp, order); + } } } - return rval; } -static CharPtr s_StringEndsWith (CharPtr str, CharPtr end) +static MolInfoPtr GetMolInfoForBioseq (BioseqPtr bsp) { - Int4 str_len, end_len; - if (end == NULL || str == NULL) { - return NULL; - } - str_len = StringLen (str); - end_len = StringLen (end); - if (end_len > str_len) { - return NULL; + MolInfoPtr m = NULL; + SeqDescrPtr sdp; + + if (bsp == NULL) return NULL; + sdp = bsp->descr; + while (sdp != NULL && sdp->choice != Seq_descr_molinfo) { + sdp = sdp->next; } - if (StringCmp (str + str_len - end_len, end) == 0) { - return str + str_len - end_len; - } else { - return NULL; + if (sdp != NULL) { + m = (MolInfoPtr) sdp->data.ptrvalue; } + return m; } + - -static CharPtr DbnameValFromPrefixOrSuffix (CharPtr val) +static CharPtr GetSequenceQualFromBioseq (BioseqPtr bsp, ValNodePtr field) { - CharPtr rval = NULL, stop; + CharPtr rval = NULL; + MolInfoPtr m; - if (val == NULL) { - return NULL; - } + if (bsp == NULL || field == NULL) return NULL; - if (StringNCmp (val, "##", 2) == 0) { - val += 2; - } - rval = StringSave (val); - if ((stop = s_StringEndsWith (rval, "Data-START##")) != NULL - || (stop = s_StringEndsWith (rval, "-START##")) != NULL - || (stop = s_StringEndsWith (rval, "-START##")) != NULL - || (stop = s_StringEndsWith (rval, "START##")) != NULL - || (stop = s_StringEndsWith (rval, "Data-END##")) != NULL - || (stop = s_StringEndsWith (rval, "-END##")) != NULL - || (stop = s_StringEndsWith (rval, "END##")) != NULL) { - *stop = 0; + switch (field->choice) { + case MolinfoField_molecule: + m = GetMolInfoForBioseq (bsp); + if (m != NULL) { + rval = BiomolNameFromBiomol (m->biomol); + } + break; + case MolinfoField_technique: + m = GetMolInfoForBioseq (bsp); + if (m != NULL) { + rval = TechNameFromTech (m->tech); + } + break; + case MolinfoField_completedness: + m = GetMolInfoForBioseq (bsp); + if (m != NULL) { + rval = CompletenessNameFromCompleteness (m->completeness); + } + break; + case MolinfoField_mol_class: + rval = MolNameFromMol (bsp->mol); + break; + case MolinfoField_topology: + rval = TopologyNameFromTopology (bsp->topology); + break; + case MolinfoField_strand: + rval = StrandNameFromStrand (bsp->strand); + break; } + if (rval != NULL) rval = StringSave (rval); return rval; } -static Boolean IsUserFieldStructuredCommentPrefixOrSuffix (UserFieldPtr ufp) -{ - if (ufp == NULL || ufp->label == NULL) { - return FALSE; - } else if (StringCmp (ufp->label->str, "StructuredCommentPrefix") == 0 - || StringCmp (ufp->label->str, "StructuredCommentSuffix") == 0) { - return TRUE; - } else { - return FALSE; - } -} - - -static CharPtr GetStructuredCommentFieldFromUserObject (UserObjectPtr uop, StructuredCommentFieldPtr field, StringConstraintPtr scp) +static Boolean RemoveSequenceQualFromBioseq (BioseqPtr bsp, ValNodePtr field) { - UserFieldPtr curr; - CharPtr rval = NULL; + MolInfoPtr m; + Boolean rval = FALSE; - if (!IsUserObjectStructuredComment(uop) || field == NULL) { - return NULL; - } + if (bsp == NULL || field == NULL) return FALSE; - if (field->choice == StructuredCommentField_database) { - for (curr = uop->data; curr != NULL && rval == NULL; curr = curr->next) { - if (IsUserFieldStructuredCommentPrefixOrSuffix(curr) && curr->choice == 1) { - rval = DbnameValFromPrefixOrSuffix (curr->data.ptrvalue); - if (!IsStringConstraintEmpty (scp) && !DoesStringMatchConstraint (rval, scp)) { - rval = MemFree (rval); - } + switch (field->choice) { + case MolinfoField_molecule: + m = GetMolInfoForBioseq (bsp); + if (m != NULL) { + m->biomol = 0; + rval = TRUE; } - } - } else if (field->choice == StructuredCommentField_named) { - for (curr = uop->data; curr != NULL && rval == NULL; curr = curr->next) { - if (curr->label != NULL && StringICmp (curr->label->str, field->data.ptrvalue) == 0) { - if (curr->choice == 1) { - if (IsStringConstraintEmpty (scp) || DoesStringMatchConstraint (curr->data.ptrvalue, scp)) { - rval = StringSave (curr->data.ptrvalue); - } - } + break; + case MolinfoField_technique: + m = GetMolInfoForBioseq (bsp); + if (m != NULL) { + m->tech = 0; + rval = TRUE; } - } - } else if (field->choice == StructuredCommentField_field_name) { - for (curr = uop->data; curr != NULL && rval == NULL; curr = curr->next) { - if (!IsUserFieldStructuredCommentPrefixOrSuffix (curr) - && DoesObjectIdMatchStringConstraint(curr->label, scp)) { - rval = GetObjectIdString (curr->label); + break; + case MolinfoField_completedness: + m = GetMolInfoForBioseq (bsp); + if (m != NULL) { + m->completeness = 0; + rval = TRUE; } - } + break; + case MolinfoField_mol_class: + bsp->mol = 0; + rval = TRUE; + break; + case MolinfoField_topology: + bsp->topology = 0; + rval = TRUE; + break; + case MolinfoField_strand: + bsp->strand = 0; + rval = TRUE; + break; } return rval; } -static Boolean RemoveStructuredCommentFieldFromUserObject (UserObjectPtr uop, ValNodePtr field, StringConstraintPtr scp) +static MolInfoPtr AddMolInfoToBioseq (BioseqPtr bsp) { - UserFieldPtr curr, prev = NULL, ufp_next; - Boolean rval = FALSE, do_remove; - CharPtr val; + SeqDescrPtr sdp; + MolInfoPtr m; - if (!IsUserObjectStructuredComment(uop) || field == NULL) { - return FALSE; - } + sdp = CreateNewDescriptorOnBioseq (bsp, Seq_descr_molinfo); + m = MolInfoNew (); + sdp->data.ptrvalue = m; + return m; +} - if (field->choice == StructuredCommentField_database) { - for (curr = uop->data; curr != NULL; curr = ufp_next) { - do_remove = FALSE; - ufp_next = curr->next; - if (IsUserFieldStructuredCommentPrefixOrSuffix (curr) - && curr->choice == 1) { - val = DbnameValFromPrefixOrSuffix (curr->data.ptrvalue); - if (IsStringConstraintEmpty (scp) || !DoesStringMatchConstraint (val, scp)) { - do_remove = TRUE; + +static Boolean SetSequenceQualOnBioseq (BioseqPtr bsp, ValNodePtr field) +{ + MolInfoPtr m = NULL; + Boolean rval = FALSE; + Int4 new_val; + + if (bsp == NULL || field == NULL) return FALSE; + + switch (field->choice) { + case MolinfoField_molecule: + if (m == NULL) { + m = GetMolInfoForBioseq (bsp); + if (m == NULL) { + m = AddMolInfoToBioseq (bsp); + rval = TRUE; } - val = MemFree (val); } - if (do_remove) { - if (prev == NULL) { - uop->data = curr->next; - } else { - prev->next = curr->next; - } - curr->next = NULL; - curr = UserFieldFree (curr); + new_val = BiomolFromMoleculeType (field->data.intvalue); + if (m->biomol != new_val) { + m->biomol = new_val; rval = TRUE; - } else { - prev = curr; } - } - } else if (field->choice == StructuredCommentField_named) { - for (curr = uop->data; curr != NULL; curr = ufp_next) { - do_remove = FALSE; - ufp_next = curr->next; - if (curr->label != NULL && StringICmp (curr->label->str, field->data.ptrvalue) == 0) { - if (curr->choice == 1) { - if (IsStringConstraintEmpty (scp) || DoesStringMatchConstraint (curr->data.ptrvalue, scp)) { - do_remove = TRUE; - } + break; + case MolinfoField_technique: + if (m == NULL) { + m = GetMolInfoForBioseq (bsp); + if (m == NULL) { + m = AddMolInfoToBioseq (bsp); } } - if (do_remove) { - if (prev == NULL) { - uop->data = curr->next; - } else { - prev->next = curr->next; - } - curr->next = NULL; - curr = UserFieldFree (curr); + new_val = TechFromTechniqueType (field->data.intvalue); + if (m->tech != new_val) { + m->tech = new_val; rval = TRUE; - } else { - prev = curr; } - } - } else if (field->choice == StructuredCommentField_field_name) { - for (curr = uop->data; curr != NULL; curr = ufp_next) { - do_remove = FALSE; - ufp_next = curr->next; - if (!IsUserFieldStructuredCommentPrefixOrSuffix (curr) && DoesObjectIdMatchStringConstraint (curr->label, scp)) { - if (prev == NULL) { - uop->data = curr->next; - } else { - prev->next = curr->next; + break; + case MolinfoField_completedness: + if (m == NULL) { + m = GetMolInfoForBioseq (bsp); + if (m == NULL) { + m = AddMolInfoToBioseq (bsp); } - curr->next = NULL; - curr = UserFieldFree (curr); + } + new_val = CompletenessFromCompletednessType (field->data.intvalue); + if (m->completeness != new_val) { + m->completeness = new_val; rval = TRUE; - } else { - prev = curr; } - } + break; + case MolinfoField_mol_class: + new_val = MolFromMoleculeClassType (field->data.intvalue); + if (bsp->mol != new_val) { + bsp->mol = new_val; + rval = TRUE; + } + break; + case MolinfoField_topology: + new_val = TopologyFromTopologyType (field->data.intvalue); + if (bsp->topology != new_val) { + bsp->topology = new_val; + rval = TRUE; + } + break; + case MolinfoField_strand: + new_val = StrandFromStrandType (field->data.intvalue); + if (bsp->strand != new_val) { + bsp->strand = new_val; + rval = TRUE; + } + break; } return rval; } -static Boolean SetStructuredCommentFieldOnUserObject (UserObjectPtr uop, StructuredCommentFieldPtr field, StringConstraintPtr scp, CharPtr value, Uint2 existing_text) +static CharPtr GetGenomeProjectIdFromBioseq (BioseqPtr bsp, StringConstraintPtr scp) { - UserFieldPtr curr, first = NULL, last = NULL, ufp; - Boolean rval = FALSE; - CharPtr oldval, newval, fmt; - CharPtr prefix_fmt = "##%sData-START##"; - CharPtr suffix_fmt = "##%sData-END##"; - - if (!IsUserObjectStructuredComment(uop) || field == NULL) { - return FALSE; - } + SeqDescrPtr sdp; + SeqMgrDescContext context; + Char buf[50]; + UserObjectPtr uop; + UserFieldPtr ufp; - if (field->choice == StructuredCommentField_database) { - first = uop->data; - curr = first; - while (curr != NULL) { - if (IsUserFieldStructuredCommentPrefixOrSuffix (curr) - && curr->choice == 1) { - oldval = DbnameValFromPrefixOrSuffix (curr->data.ptrvalue); - if (IsStringConstraintEmpty (scp) || DoesStringMatchConstraint (oldval, scp)) { - if (StringCmp (curr->label->str, "StructuredCommentPrefix") == 0) { - fmt = prefix_fmt; - } else { - fmt = suffix_fmt; + sdp = SeqMgrGetNextDescriptor (bsp, NULL, Seq_descr_user, &context); + while (sdp != NULL) { + uop = (UserObjectPtr) sdp->data.ptrvalue; + if (uop != NULL && uop->type != NULL && StringCmp (uop->type->str, "GenomeProjectsDB") == 0) + { + ufp = uop->data; + while (ufp != NULL) { + if (ufp->label != NULL + && StringCmp (ufp->label->str, "ProjectID") == 0 + && ufp->choice == 2) { + sprintf (buf, "%d", ufp->data.intvalue); + if (IsStringConstraintEmpty (scp) || DoesStringMatchConstraint (buf, scp)) { + return StringSave (buf); } - SetStringValue (&oldval, value, existing_text); - newval = (CharPtr) MemNew (sizeof (Char) * (StringLen (fmt) + StringLen (oldval))); - sprintf (newval, fmt, oldval); - curr->data.ptrvalue = MemFree (curr->data.ptrvalue); - curr->data.ptrvalue = newval; - rval = TRUE; } - oldval = MemFree (oldval); + ufp = ufp->next; } - last = curr; - curr = curr->next; } - if (!rval && IsStringConstraintEmpty (scp)) { - /* make prefix */ - curr = UserFieldNew (); - curr->label = ObjectIdNew (); - curr->label->str = StringSave ("StructuredCommentPrefix"); - curr->choice = 1; - newval = (CharPtr) MemNew (sizeof (Char) * (StringLen (prefix_fmt) + StringLen (value))); - sprintf (newval, prefix_fmt, value); - curr->data.ptrvalue = newval; - curr->next = first; - uop->data = curr; - first = curr; + sdp = SeqMgrGetNextDescriptor (bsp, sdp, Seq_descr_user, &context); + } - /* make suffix */ - curr = UserFieldNew (); - curr->label = ObjectIdNew (); - curr->label->str = StringSave ("StructuredCommentSuffix"); - curr->choice = 1; - newval = (CharPtr) MemNew (sizeof (Char) * (StringLen (suffix_fmt) + StringLen (value))); - sprintf (newval, suffix_fmt, value); - curr->data.ptrvalue = newval; - if (last == NULL) { - first->next = curr; - } else { - last->next = curr; - } - rval = TRUE; - } - } else if (field->choice == StructuredCommentField_named) { - last = uop->data; - for (curr = uop->data; curr != NULL; curr = curr->next) { - if (curr->label != NULL && StringICmp (curr->label->str, field->data.ptrvalue) == 0) { - if (curr->choice == 1) { - if (IsStringConstraintEmpty (scp) || DoesStringMatchConstraint (curr->data.ptrvalue, scp)) { - newval = (CharPtr) curr->data.ptrvalue; - SetStringValue (&newval, value, existing_text); - curr->data.ptrvalue = newval; - rval = TRUE; + return NULL; +} + + +static Boolean RemoveGenomeProjectIdFromBioseq (BioseqPtr bsp, StringConstraintPtr scp) +{ + SeqDescrPtr sdp; + SeqMgrDescContext context; + Char buf[50]; + UserObjectPtr uop; + UserFieldPtr ufp; + ObjValNodePtr ovn; + Boolean rval = FALSE; + + sdp = SeqMgrGetNextDescriptor (bsp, NULL, Seq_descr_user, &context); + while (sdp != NULL) { + uop = (UserObjectPtr) sdp->data.ptrvalue; + if (uop != NULL && uop->type != NULL && StringCmp (uop->type->str, "GenomeProjectsDB") == 0) + { + ufp = uop->data; + while (ufp != NULL) { + if (ufp->label != NULL + && StringCmp (ufp->label->str, "ProjectID") == 0 + && ufp->choice == 2) { + sprintf (buf, "%d", ufp->data.intvalue); + if (IsStringConstraintEmpty (scp) || DoesStringMatchConstraint (buf, scp)) { + if (sdp->extended != 0) { + ovn = (ObjValNodePtr) sdp; + ovn->idx.deleteme = TRUE; + rval = TRUE; + } } } + ufp = ufp->next; } - last = curr; - } - if (!rval && IsStringConstraintEmpty (scp)) { - curr = UserFieldNew (); - curr->label = ObjectIdNew (); - curr->label->str = StringSave (field->data.ptrvalue); - curr->choice = 1; - curr->data.ptrvalue = StringSave (value); - if (last == NULL) { - uop->data = curr; - } else { - last->next = curr; - } - rval = TRUE; } - } else if (field->choice == StructuredCommentField_field_name) { - last = uop->data; - for (curr = uop->data; curr != NULL; curr = curr->next) { - if (!IsUserFieldStructuredCommentPrefixOrSuffix (curr)) { - if (DoesObjectIdMatchStringConstraint (curr->label, scp)) { - rval = SetObjectIdString (curr->label, value, existing_text); + sdp = SeqMgrGetNextDescriptor (bsp, sdp, Seq_descr_user, &context); + } + return rval; +} + + +static Boolean SetGenomeProjectIdOnBioseq (BioseqPtr bsp, StringConstraintPtr scp, CharPtr value, Uint2 existing_text) +{ + SeqDescrPtr sdp; + SeqMgrDescContext context; + Char buf[50]; + CharPtr tmp; + UserObjectPtr uop; + UserFieldPtr ufp; + Boolean rval = FALSE; + + if (bsp == NULL || !IsAllDigits (value)) { + return FALSE; + } + sdp = SeqMgrGetNextDescriptor (bsp, NULL, Seq_descr_user, &context); + while (sdp != NULL) { + uop = (UserObjectPtr) sdp->data.ptrvalue; + if (uop != NULL && uop->type != NULL && StringCmp (uop->type->str, "GenomeProjectsDB") == 0) + { + ufp = uop->data; + while (ufp != NULL) { + if (ufp->label != NULL + && StringCmp (ufp->label->str, "ProjectID") == 0 + && ufp->choice == 2) { + sprintf (buf, "%d", ufp->data.intvalue); + if (IsStringConstraintEmpty (scp) || DoesStringMatchConstraint (buf, scp)) { + tmp = StringSave (buf); + if (SetStringValue (&tmp, value, existing_text) && IsAllDigits (tmp)) { + ufp->data.intvalue = atoi (tmp); + rval = TRUE; + } + tmp = MemFree (tmp); + } } - last = curr; - } - } - if (!rval && IsStringConstraintEmpty (scp)) { - curr = UserFieldNew (); - curr->label = ObjectIdNew (); - curr->label->str = StringSave (value); - curr->choice = 1; - curr->data.ptrvalue = StringSave (""); - if (last == NULL) { - ufp = uop->data; - curr->next = ufp->next; - ufp->next = curr; - } else { - curr->next = last->next; - last->next = curr; + ufp = ufp->next; } - rval = TRUE; } + sdp = SeqMgrGetNextDescriptor (bsp, sdp, Seq_descr_user, &context); + } + if (!rval && IsStringConstraintEmpty (scp)) { + sdp = CreateNewDescriptorOnBioseq (bsp, Seq_descr_user); + uop = CreateGenomeProjectsDBUserObject (); + AddIDsToGenomeProjectsDBUserObject (uop, atoi (value), 0); + sdp->data.ptrvalue = uop; + rval = TRUE; } return rval; } +static Boolean SetTextDescriptor (SeqDescrPtr sdp, StringConstraintPtr scp, CharPtr value, Uint2 existing_text) +{ + Boolean rval = FALSE; + CharPtr cp; + ObjValNodePtr ovp; + Boolean was_empty; + if (sdp == NULL) { + return FALSE; + } + + if (IsStringConstraintEmpty (scp) || DoesStringMatchConstraint (sdp->data.ptrvalue, scp)) { + if (StringHasNoText (sdp->data.ptrvalue)) { + was_empty = TRUE; + } else { + was_empty = FALSE; + } + cp = sdp->data.ptrvalue; + if (SetStringValue (&cp, value, existing_text)) { + rval = TRUE; + } + sdp->data.ptrvalue = cp; + if (was_empty) { + ovp = (ObjValNodePtr) sdp; + ovp->idx.deleteme = FALSE; + } + } + return rval; +} -/* The following functions are used for getting and setting various types of data - * in publications. - */ +static CharPtr s_StringEndsWith (CharPtr str, CharPtr end) +{ + Int4 str_len, end_len; + if (end == NULL || str == NULL) { + return NULL; + } + str_len = StringLen (str); + end_len = StringLen (end); + if (end_len > str_len) { + return NULL; + } + if (StringCmp (str + str_len - end_len, end) == 0) { + return str + str_len - end_len; + } else { + return NULL; + } +} -static CharPtr legalMonths [] = { - "Jan", - "Feb", - "Mar", - "Apr", - "May", - "Jun", - "Jul", - "Aug", - "Sep", - "Oct", - "Nov", - "Dec", - NULL -}; - - -static DatePtr ReadDateFromString (CharPtr date_str) +static CharPtr DbnameValFromPrefixOrSuffix (CharPtr val) { - Char ch; - Int2 i; - CharPtr ptr1, ptr2, month = NULL, day = NULL, year = NULL; - CharPtr str; - Int4 day_val = 0; - Uint1 month_num = 0; - long val; - Int4 year_val = 0; - DatePtr dp = NULL; - Boolean critical_error = FALSE; - - if (StringHasNoText (date_str)) return NULL; - - str = StringSave (date_str); - ptr1 = StringChr (str, '-'); - if (ptr1 != NULL) { - *ptr1 = '\0'; - ptr1++; - ptr2 = StringChr (ptr1, '-'); - if (ptr2 != NULL) { - *ptr2 = '\0'; - ptr2++; - day = str; - month = ptr1; - year = ptr2; - } else { - month = str; - year = ptr1; - } - } else { - year = str; - } + CharPtr rval = NULL, stop; - if (day != NULL) { - if (sscanf (day, "%ld", &val) != 1 || val < 1 || val > 31) { - critical_error = TRUE; - } - day_val = val; + if (val == NULL) { + return NULL; } - if (month != NULL) { - for (i = 0; legalMonths [i] != NULL; i++) { - if (StringCmp (month, legalMonths [i]) == 0) { - month_num = i + 1; - break; - } - } - if (legalMonths [i] == NULL) critical_error = TRUE; + if (StringNCmp (val, "##", 2) == 0) { + val += 2; } - - if (year != NULL) { - ptr1 = year; - ch = *ptr1; - while (ch != '\0') { - if (! (IS_DIGIT (ch))) critical_error = TRUE; - ptr1++; - ch = *ptr1; - } - if (sscanf (year, "%ld", &val) == 1) { - if (val < 1700 || val > 2100) critical_error = TRUE; - year_val = val - 1900; - } - else - { - critical_error = TRUE; - } + rval = StringSave (val); + if ((stop = s_StringEndsWith (rval, "-START##")) != NULL + || (stop = s_StringEndsWith (rval, "-START##")) != NULL + || (stop = s_StringEndsWith (rval, "START##")) != NULL + || (stop = s_StringEndsWith (rval, "-END##")) != NULL + || (stop = s_StringEndsWith (rval, "END##")) != NULL) { + *stop = 0; } + return rval; +} - str = MemFree (str); - if (!critical_error) { - dp = DateNew(); - dp->data[0] = 1; - dp->data[1] = (Uint1) year_val; - dp->data[2] = month_num; - dp->data[3] = (Uint1) day_val; +static Boolean IsUserFieldStructuredCommentPrefixOrSuffix (UserFieldPtr ufp) +{ + if (ufp == NULL || ufp->label == NULL) { + return FALSE; + } else if (StringCmp (ufp->label->str, "StructuredCommentPrefix") == 0 + || StringCmp (ufp->label->str, "StructuredCommentSuffix") == 0) { + return TRUE; + } else { + return FALSE; } - return dp; } -static CharPtr GetAuthorStringEx (AuthorPtr author, Boolean use_initials) +NLM_EXTERN CharPtr GetStructuredCommentFieldFromUserObject (UserObjectPtr uop, StructuredCommentFieldPtr field, StringConstraintPtr scp) { - CharPtr str = NULL; - NameStdPtr n; - Int4 len; - Boolean has_middle = FALSE; + UserFieldPtr curr; + CharPtr rval = NULL; - if (author == NULL || author->name == NULL) return NULL; + if (!IsUserObjectStructuredComment(uop) || field == NULL) { + return NULL; + } - switch (author->name->choice) { - case 1: /* dbtag */ - str = GetDbtagString (author->name->data); - break; - case 2: /* name */ - n = (NameStdPtr) author->name->data; - if (n != NULL) { - if (use_initials) { - len = StringLen (n->names[0]) + StringLen (n->names[4]) + 2; - str = (CharPtr) MemNew (sizeof (Char) * (len)); - sprintf (str, "%s%s", StringHasNoText (n->names[4]) ? "" : n->names[4], - StringHasNoText (n->names[0]) ? "" : n->names[0]); - } else { - len = StringLen (n->names[1]) + StringLen (n->names[0]) + 2; - if (StringLen (n->names[4]) > 2) { - len += StringLen (n->names[4]) - 1; - has_middle = TRUE; + if (field->choice == StructuredCommentField_database) { + for (curr = uop->data; curr != NULL && rval == NULL; curr = curr->next) { + if (IsUserFieldStructuredCommentPrefixOrSuffix(curr) && curr->choice == 1) { + rval = DbnameValFromPrefixOrSuffix (curr->data.ptrvalue); + if (!IsStringConstraintEmpty (scp) && !DoesStringMatchConstraint (rval, scp)) { + rval = MemFree (rval); + } + } + } + } else if (field->choice == StructuredCommentField_named) { + for (curr = uop->data; curr != NULL && rval == NULL; curr = curr->next) { + if (curr->label != NULL && StringICmp (curr->label->str, field->data.ptrvalue) == 0) { + if (curr->choice == 1) { + if (IsStringConstraintEmpty (scp) || DoesStringMatchConstraint (curr->data.ptrvalue, scp)) { + rval = StringSave (curr->data.ptrvalue); } - str = (CharPtr) MemNew (sizeof (Char) * (len)); - sprintf (str, "%s%s%s%s%s", - StringHasNoText (n->names[1]) ? "" : n->names[1], - StringHasNoText (n->names[1]) ? "" : " ", - has_middle ? n->names[4] + 2 : "", - has_middle ? " " : "", - StringHasNoText (n->names[0]) ? "" : n->names[0]); } } - break; - case 3: /* ml */ - case 4: /* str */ - case 5: /* consortium */ - str = StringSave (author->name->data); - break; + } + } else if (field->choice == StructuredCommentField_field_name) { + for (curr = uop->data; curr != NULL && rval == NULL; curr = curr->next) { + if (!IsUserFieldStructuredCommentPrefixOrSuffix (curr) + && DoesObjectIdMatchStringConstraint(curr->label, scp)) { + rval = GetObjectIdString (curr->label); + } + } } - return str; -} - - -static CharPtr GetAuthorString (AuthorPtr author) -{ - return GetAuthorStringEx (author, FALSE); + return rval; } -static CharPtr GetAuthorListStringEx (AuthListPtr alp, StringConstraintPtr scp, Boolean use_initials) +static Boolean RemoveStructuredCommentFieldFromUserObject (UserObjectPtr uop, ValNodePtr field, StringConstraintPtr scp) { - CharPtr str = NULL, tmp; - Int4 len = 0; - ValNodePtr list = NULL, vnp; + UserFieldPtr curr, prev = NULL, ufp_next; + Boolean rval = FALSE, do_remove; + CharPtr val; - if (alp == NULL) return NULL; + if (!IsUserObjectStructuredComment(uop) || field == NULL) { + return FALSE; + } - switch (alp->choice) { - case 1: - for (vnp = alp->names; vnp != NULL; vnp = vnp->next) { - tmp = GetAuthorStringEx (vnp->data.ptrvalue, use_initials); - if (tmp != NULL) { - if (DoesStringMatchConstraint (tmp, scp)) { - ValNodeAddPointer (&list, 0, tmp); - len += StringLen (tmp) + 2; - } else { - tmp = MemFree (tmp); - } + if (field->choice == StructuredCommentField_database) { + for (curr = uop->data; curr != NULL; curr = ufp_next) { + do_remove = FALSE; + ufp_next = curr->next; + if (IsUserFieldStructuredCommentPrefixOrSuffix (curr) + && curr->choice == 1) { + val = DbnameValFromPrefixOrSuffix (curr->data.ptrvalue); + if (IsStringConstraintEmpty (scp) || DoesStringMatchConstraint (val, scp)) { + do_remove = TRUE; } + val = MemFree (val); } - break; - case 2: - case 3: - for (vnp = alp->names; vnp != NULL; vnp = vnp->next) { - if (vnp->data.ptrvalue != NULL && DoesStringMatchConstraint (vnp->data.ptrvalue, scp)) { - ValNodeAddPointer (&list, 0, StringSave (vnp->data.ptrvalue)); - len += StringLen (vnp->data.ptrvalue) + 2; + if (do_remove) { + if (prev == NULL) { + uop->data = curr->next; + } else { + prev->next = curr->next; } + curr->next = NULL; + curr = UserFieldFree (curr); + rval = TRUE; + } else { + prev = curr; } - break; - } - - if (len > 0) { - str = (CharPtr) MemNew (sizeof (Char) * (len + 1)); - for (vnp = list; vnp != NULL; vnp = vnp->next) { - StringCat (str, vnp->data.ptrvalue); - if (vnp->next != NULL) { - StringCat (str, ", "); + } + } else if (field->choice == StructuredCommentField_named) { + for (curr = uop->data; curr != NULL; curr = ufp_next) { + do_remove = FALSE; + ufp_next = curr->next; + if (curr->label != NULL && StringICmp (curr->label->str, field->data.ptrvalue) == 0) { + if (curr->choice == 1) { + if (IsStringConstraintEmpty (scp) || DoesStringMatchConstraint (curr->data.ptrvalue, scp)) { + do_remove = TRUE; + } + } + } + if (do_remove) { + if (prev == NULL) { + uop->data = curr->next; + } else { + prev->next = curr->next; + } + curr->next = NULL; + curr = UserFieldFree (curr); + rval = TRUE; + } else { + prev = curr; } } - } - return str; -} - - -static CharPtr GetAuthorListString (AuthListPtr alp, StringConstraintPtr scp) -{ - return GetAuthorListStringEx (alp, scp, FALSE); + } else if (field->choice == StructuredCommentField_field_name) { + for (curr = uop->data; curr != NULL; curr = ufp_next) { + do_remove = FALSE; + ufp_next = curr->next; + if (!IsUserFieldStructuredCommentPrefixOrSuffix (curr) && DoesObjectIdMatchStringConstraint (curr->label, scp)) { + if (prev == NULL) { + uop->data = curr->next; + } else { + prev->next = curr->next; + } + curr->next = NULL; + curr = UserFieldFree (curr); + rval = TRUE; + } else { + prev = curr; + } + } + } + return rval; } -static Boolean RemoveAuthorListString (AuthListPtr alp, StringConstraintPtr scp) +static Boolean SetStructuredCommentFieldOnUserObject (UserObjectPtr uop, StructuredCommentFieldPtr field, StringConstraintPtr scp, CharPtr value, Uint2 existing_text) { - CharPtr tmp; - Boolean rval = FALSE; - ValNodePtr vnp, vnp_next, vnp_prev = NULL; + UserFieldPtr curr, first = NULL, last = NULL, ufp; + Boolean rval = FALSE; + CharPtr oldval, newval, fmt; + CharPtr prefix_fmt = "##%s-START##"; + CharPtr suffix_fmt = "##%s-END##"; - if (alp == NULL) return FALSE; + if (!IsUserObjectStructuredComment(uop) || field == NULL) { + return FALSE; + } - switch (alp->choice) { - case 1: - for (vnp = alp->names; vnp != NULL; vnp = vnp_next) { - vnp_next = vnp->next; - tmp = GetAuthorString (vnp->data.ptrvalue); - if (tmp != NULL) { - if (DoesStringMatchConstraint (tmp, scp)) { - if (vnp_prev == NULL) { - alp->names = vnp->next; - } else { - vnp_prev->next = vnp->next; - } - vnp->next = NULL; - vnp->data.ptrvalue = AuthorFree (vnp->data.ptrvalue); - vnp = ValNodeFree (vnp); - rval = TRUE; + if (field->choice == StructuredCommentField_database) { + first = uop->data; + curr = first; + while (curr != NULL) { + if (IsUserFieldStructuredCommentPrefixOrSuffix (curr) + && curr->choice == 1) { + oldval = DbnameValFromPrefixOrSuffix (curr->data.ptrvalue); + if (IsStringConstraintEmpty (scp) || DoesStringMatchConstraint (oldval, scp)) { + if (StringCmp (curr->label->str, "StructuredCommentPrefix") == 0) { + fmt = prefix_fmt; } else { - vnp_prev = vnp; + fmt = suffix_fmt; } - tmp = MemFree (tmp); - } else { - vnp_prev = vnp; + SetStringValue (&oldval, value, existing_text); + newval = (CharPtr) MemNew (sizeof (Char) * (StringLen (fmt) + StringLen (oldval))); + sprintf (newval, fmt, oldval); + curr->data.ptrvalue = MemFree (curr->data.ptrvalue); + curr->data.ptrvalue = newval; + rval = TRUE; } + oldval = MemFree (oldval); } - break; - case 2: - case 3: - for (vnp = alp->names; vnp != NULL; vnp = vnp_next) { - vnp_next = vnp->next; - if (vnp->data.ptrvalue != NULL && DoesStringMatchConstraint (vnp->data.ptrvalue, scp)) { - if (vnp_prev == NULL) { - alp->names = vnp->next; - } else { - vnp_prev->next = vnp->next; + last = curr; + curr = curr->next; + } + if (!rval && IsStringConstraintEmpty (scp)) { + /* make prefix */ + curr = UserFieldNew (); + curr->label = ObjectIdNew (); + curr->label->str = StringSave ("StructuredCommentPrefix"); + curr->choice = 1; + newval = (CharPtr) MemNew (sizeof (Char) * (StringLen (prefix_fmt) + StringLen (value))); + sprintf (newval, prefix_fmt, value); + curr->data.ptrvalue = newval; + curr->next = first; + uop->data = curr; + first = curr; + + /* make suffix */ + curr = UserFieldNew (); + curr->label = ObjectIdNew (); + curr->label->str = StringSave ("StructuredCommentSuffix"); + curr->choice = 1; + newval = (CharPtr) MemNew (sizeof (Char) * (StringLen (suffix_fmt) + StringLen (value))); + sprintf (newval, suffix_fmt, value); + curr->data.ptrvalue = newval; + if (last == NULL) { + first->next = curr; + } else { + last->next = curr; + } + rval = TRUE; + } + } else if (field->choice == StructuredCommentField_named) { + last = uop->data; + for (curr = uop->data; curr != NULL; curr = curr->next) { + if (curr->label != NULL && StringICmp (curr->label->str, field->data.ptrvalue) == 0) { + if (curr->choice == 1) { + if (IsStringConstraintEmpty (scp) || DoesStringMatchConstraint (curr->data.ptrvalue, scp)) { + newval = (CharPtr) curr->data.ptrvalue; + SetStringValue (&newval, value, existing_text); + curr->data.ptrvalue = newval; + rval = TRUE; } - vnp->next = NULL; - vnp = ValNodeFreeData (vnp); - rval = TRUE; - } else { - vnp_prev = vnp; } } - break; + last = curr; + } + if (!rval && IsStringConstraintEmpty (scp)) { + curr = UserFieldNew (); + curr->label = ObjectIdNew (); + curr->label->str = StringSave (field->data.ptrvalue); + curr->choice = 1; + curr->data.ptrvalue = StringSave (value); + if (last == NULL) { + uop->data = curr; + } else { + last->next = curr; + } + rval = TRUE; + } + } else if (field->choice == StructuredCommentField_field_name) { + last = uop->data; + for (curr = uop->data; curr != NULL; curr = curr->next) { + if (!IsUserFieldStructuredCommentPrefixOrSuffix (curr)) { + if (DoesObjectIdMatchStringConstraint (curr->label, scp)) { + rval = SetObjectIdString (curr->label, value, existing_text); + } + last = curr; + } + } + if (!rval && IsStringConstraintEmpty (scp)) { + curr = UserFieldNew (); + curr->label = ObjectIdNew (); + curr->label->str = StringSave (value); + curr->choice = 1; + curr->data.ptrvalue = StringSave (""); + if (last == NULL) { + ufp = uop->data; + curr->next = ufp->next; + ufp->next = curr; + } else { + curr->next = last->next; + last->next = curr; + } + rval = TRUE; + } } - return rval; } -static NameStdPtr ReadNameFromString (CharPtr str, CharPtr PNTR next_name) +typedef struct dblinkname { + Int4 field_type; + CharPtr field_name; +} DBLinkNameData, PNTR DBLinkNamePtr; + +static DBLinkNameData dblink_names[] = { + { DBLink_field_type_trace_assembly , "Trace Assembly Archive" } , + { DBLink_field_type_bio_sample , "Bio Sample" } , + { DBLink_field_type_probe_db , "ProbeDB" } , + { DBLink_field_type_sequence_read_archve , "Sequence Read Archive" } +}; + +#define NUM_dblinkname sizeof (dblink_names) / sizeof (DBLinkNameData) + +NLM_EXTERN CharPtr GetDBLinkNameFromDBLinkFieldType (Int4 field_type) { - CharPtr cp_end, cp_space; - CharPtr p_repl1 = NULL, p_repl2 = NULL, p_repl3 = NULL; - Char ch_r1, ch_r2, ch_r3; - NameStdPtr n; + CharPtr str = NULL; + Int4 i; - if (StringHasNoText (str)) - { - if (next_name != NULL) - { - *next_name = NULL; + for (i = 0; i < NUM_dblinkname && str == NULL; i++) { + if (field_type == dblink_names[i].field_type) { + str = dblink_names[i].field_name; } - return NULL; + } + if (str == NULL) { + str = "Unknown field type"; } + return str; +} - /* skip over any leading spaces */ - str += StringSpn (str, " \t"); - /* skip over "and" if found */ - if (StringNCmp (str, "and ", 4) == 0) - { - str += 4; - } - if (StringHasNoText (str)) { - str = MemFree (str); +NLM_EXTERN Int4 GetDBLinkFieldTypeFromDBLinkName (CharPtr field_name) +{ + Int4 rval = -1; + Int4 i; + + for (i = 0; i < NUM_dblinkname && rval < 0; i++) { + if (StringCmp (field_name, dblink_names[i].field_name) == 0) { + rval = dblink_names[i].field_type; + } + } + return rval; +} + + +NLM_EXTERN Int4 GetNumDBLinkFields (void) +{ + return NUM_dblinkname; +} + + +static CharPtr GetDBLinkFieldFromUserObject (UserObjectPtr uop, Int4 field, StringConstraintPtr scp) +{ + UserFieldPtr curr; + CharPtr rval = NULL; + CharPtr field_name; + Char buf[15]; + CharPtr PNTR cpp; + Int4Ptr ipp; + Int4 i; + + if (!IsUserObjectDBLink(uop) || field < 1) { return NULL; } - cp_end = StringChr (str, ','); - if (cp_end != NULL) - { - p_repl1 = cp_end; - ch_r1 = *p_repl1; - *cp_end = 0; - if (next_name != NULL) - { - if (StringHasNoText (cp_end + 1)) - { - *next_name = NULL; - } - else - { - *next_name = cp_end + 1; + field_name = GetDBLinkNameFromDBLinkFieldType (field); + for (curr = uop->data; curr != NULL && rval == NULL; curr = curr->next) { + if (curr->label != NULL && StringCmp (curr->label->str, field_name) == 0) { + if (curr->choice == 7) { + if (curr->num > 0 && (cpp = (CharPtr PNTR) curr->data.ptrvalue) != NULL) { + for (i = 0; i < curr->num && rval == NULL; i++) { + if (IsStringConstraintEmpty (scp) || DoesStringMatchConstraint (cpp[i], scp)) { + rval = StringSave (cpp[i]); + } + } + } + } else if (curr->choice == 8) { + if (curr->num > 0 && (ipp = (Int4Ptr) curr->data.ptrvalue) != NULL) { + for (i = 0; i < curr->num && rval == NULL; i++) { + sprintf (buf, "%d", ipp[i]); + if (IsStringConstraintEmpty (scp) || DoesStringMatchConstraint (buf, scp)) { + rval = StringSave (buf); + } + } + } } } } - else if (next_name != NULL) - { - *next_name = NULL; + return rval; +} + + +static Boolean RemoveDBLinkFieldFromUserObject (UserObjectPtr uop, Int4 field, StringConstraintPtr scp) +{ + UserFieldPtr curr, prev_type = NULL, next_type; + Boolean rval = FALSE; + Char buf[15]; + CharPtr field_name; + CharPtr PNTR cpp; + Int4Ptr ipp; + Int4 i, j; + + if (!IsUserObjectDBLink(uop) || field < 1) { + return FALSE; } - n = NameStdNew (); - /* look for elements in name */ - cp_space = StringRChr (str, ' '); - if (cp_space == NULL) - { - n->names[0] = StringSave (str); - } - else - { - n->names[0] = StringSave (cp_space + 1); - while (isspace (*cp_space)) - { - cp_space--; - } - p_repl2 = cp_space + 1; - ch_r2 = *p_repl2; - *(cp_space + 1) = 0; - cp_space = StringChr (str, ' '); - if (cp_space == NULL) - { - n->names[1] = StringSave (str); - n->names[4] = (CharPtr) MemNew (sizeof (Char) * 3); - sprintf (n->names[4], "%c.", *(n->names[1])); + field_name = GetDBLinkNameFromDBLinkFieldType (field); + for (curr = uop->data; curr != NULL; curr = next_type) { + next_type = curr->next; + if (curr->label != NULL && StringCmp (curr->label->str, field_name) == 0) { + if (curr->choice == 7) { + if (curr->num > 0 && (cpp = (CharPtr PNTR) curr->data.ptrvalue) != NULL) { + for (i = 0; i < curr->num; i++) { + if (IsStringConstraintEmpty (scp) || DoesStringMatchConstraint (cpp[i], scp)) { + cpp[i] = MemFree (cpp[i]); + for (j = i + 1; j < curr->num; j++) { + cpp[j - 1] = cpp[j]; + } + curr->num--; + rval = TRUE; + i--; + } + } + } + } else if (curr->choice == 8) { + if (curr->num > 0 && (ipp = (Int4Ptr) curr->data.ptrvalue) != NULL) { + for (i = 0; i < curr->num; i++) { + sprintf (buf, "%d", ipp[i]); + if (IsStringConstraintEmpty (scp) || DoesStringMatchConstraint (buf, scp)) { + for (j = i + 1; j < curr->num; j++) { + ipp[j - 1] = ipp[j]; + } + curr->num--; + rval = TRUE; + i--; + } + } + } + } } - else - { - p_repl3 = cp_space; - ch_r3 = *p_repl3; - *(cp_space) = 0; - n->names[1] = StringSave (str); - - cp_space++; - while (isspace (*cp_space)) - { - cp_space++; + if (curr->num == 0) { + if (prev_type == NULL) { + uop->data = next_type; + } else { + prev_type->next = next_type; } - - n->names[4] = (CharPtr) MemNew (sizeof (Char) * (4 + StringLen (cp_space))); - sprintf (n->names[4], "%c.%s.", *(n->names[1]), cp_space); + curr->next = NULL; + curr = UserFieldFree (curr); + } else { + prev_type = curr; } } - if (p_repl1 != NULL) { - *p_repl1 = ch_r1; - } - if (p_repl2 != NULL) { - *p_repl2 = ch_r2; - } - if (p_repl3 != NULL) { - *p_repl3 = ch_r3; - } - - return n; + return rval; } -static ValNodePtr ReadNameListFromString (CharPtr value) +static Boolean SetDBLinkFieldOnUserObject (UserObjectPtr uop, Int4 field, StringConstraintPtr scp, CharPtr value, Uint2 existing_text) { - ValNodePtr names = NULL; - AuthorPtr ap; - NameStdPtr n; - CharPtr next_cp, cp; + UserFieldPtr curr, last = NULL; + Boolean rval = FALSE; + CharPtr newval; + CharPtr field_name; + CharPtr PNTR cpp; + CharPtr PNTR new_cpp; + Int4Ptr ipp, new_ipp; + Int4 i; + Char buf[15]; + + if (!IsUserObjectDBLink(uop) || field < 1) { + return FALSE; + } - cp = value; - next_cp = NULL; - while (cp != NULL) { - n = ReadNameFromString (cp, &next_cp); - if (n != NULL) { - ap = AuthorNew (); - ap->name = PersonIdNew (); - ap->name->choice = 2; - ap->name->data = n; - ValNodeAddPointer (&names, 1, ap); + field_name = GetDBLinkNameFromDBLinkFieldType (field); + + for (curr = uop->data; curr != NULL; curr = curr->next) { + if (curr->label != NULL && StringCmp (curr->label->str, field_name) == 0) { + if (curr->choice == 7) { + if (curr->num > 0 && (cpp = (CharPtr PNTR) curr->data.ptrvalue) != NULL) { + for (i = 0; i < curr->num; i++) { + if (IsStringConstraintEmpty (scp) || DoesStringMatchConstraint (cpp[i], scp)) { + newval = cpp[i]; + SetStringValue (&newval, value, existing_text); + cpp[i] = newval; + rval = TRUE; + } + } + } + if (!rval && IsStringConstraintEmpty (scp)) { + new_cpp = (CharPtr PNTR) MemNew (sizeof (CharPtr) * (curr->num + 1)); + for (i = 0; i < curr->num; i++) { + new_cpp[i] = cpp[i]; + cpp[i] = NULL; + } + new_cpp[i] = StringSave (value); + cpp = MemFree (cpp); + curr->data.ptrvalue = new_cpp; + curr->num++; + rval = TRUE; + } + } else if (curr->choice == 8 && IsAllDigits (value)) { + if (curr->num > 0 && (ipp = (Int4Ptr) curr->data.ptrvalue) != NULL) { + for (i = 0; i < curr->num; i++) { + sprintf (buf, "%d", ipp[i]); + if (IsStringConstraintEmpty (scp) || DoesStringMatchConstraint (buf, scp)) { + newval = StringSave (buf); + SetStringValue (&newval, value, existing_text); + if (IsAllDigits (newval)) { + ipp[i] = atoi (newval); + rval = TRUE; + } + newval = MemFree (newval); + } + } + } + if (!rval && IsStringConstraintEmpty (scp)) { + new_ipp = (Int4Ptr) MemNew (sizeof (Int4) * (curr->num + 1)); + for (i = 0; i < curr->num; i++) { + new_ipp[i] = ipp[i]; + } + new_ipp[i] = atoi (value); + ipp = MemFree (ipp); + curr->data.ptrvalue = new_ipp; + curr->num++; + rval = TRUE; + } + } } - cp = next_cp; + last = curr; } - return names; + if (!rval && IsStringConstraintEmpty (scp) && (field != DBLink_field_type_trace_assembly || IsAllDigits (value))) { + curr = UserFieldNew (); + curr->label = ObjectIdNew (); + curr->label->str = StringSave (field_name); + + if (field == DBLink_field_type_trace_assembly) { + curr->choice = 8; + curr->num = 1; + ipp = (Int4Ptr) MemNew (sizeof (Int4) * curr->num); + ipp[0] = atoi (value); + curr->data.ptrvalue = ipp; + } else { + curr->choice = 7; + curr->num = 1; + cpp = (CharPtr PNTR) MemNew (sizeof (CharPtr) * curr->num); + cpp[0] = StringSave (value); + curr->data.ptrvalue = cpp; + } + if (last == NULL) { + uop->data = curr; + } else { + last->next = curr; + } + rval = TRUE; + } + return rval; } -static ValNodePtr FreeNameList (Uint1 choice, ValNodePtr name_list) -{ - ValNodePtr curr, next; - curr = name_list; - while (curr != NULL) { - if (choice == 1) /* std type */ - AuthorFree((AuthorPtr) curr->data.ptrvalue); - else /* ml or str */ - MemFree(curr->data.ptrvalue); - next = curr->next; - MemFree(curr); - curr = next; - } - return curr; -} +/* The following functions are used for getting and setting various types of data + * in publications. + */ + -static Boolean SetAuthorListFromString (AuthListPtr alp, StringConstraintPtr scp, CharPtr value, Uint2 existing_text) +static CharPtr legalMonths [] = { + "Jan", + "Feb", + "Mar", + "Apr", + "May", + "Jun", + "Jul", + "Aug", + "Sep", + "Oct", + "Nov", + "Dec", + NULL +}; + + +static DatePtr ReadDateFromString (CharPtr date_str) { - ValNodePtr name_list = NULL, vnp, vnp_prev, vnp_next, vnp_tmp; - CharPtr tmp; - Boolean rval = FALSE, found, ok_to_set = FALSE; + Char ch; + Int2 i; + CharPtr ptr1, ptr2, month = NULL, day = NULL, year = NULL; + CharPtr str; + Int4 day_val = 0; + Uint1 month_num = 0; + long val; + Int4 year_val = 0; + DatePtr dp = NULL; + Boolean critical_error = FALSE; - if (alp == NULL || StringHasNoText (value)) return FALSE; + if (StringHasNoText (date_str)) return NULL; - /* can only combine lists if existing list is same type */ - if (alp->names == NULL || alp->choice == 1) { - ok_to_set = TRUE; + str = StringSave (date_str); + ptr1 = StringChr (str, '-'); + if (ptr1 != NULL) { + *ptr1 = '\0'; + ptr1++; + ptr2 = StringChr (ptr1, '-'); + if (ptr2 != NULL) { + *ptr2 = '\0'; + ptr2++; + day = str; + month = ptr1; + year = ptr2; + } else { + month = str; + year = ptr1; + } } else { - switch (existing_text) { - case ExistingTextOption_replace_old: - if (IsStringConstraintEmpty (scp)) { - ok_to_set = TRUE; - } - break; - case ExistingTextOption_append_space: - case ExistingTextOption_append_colon: - case ExistingTextOption_append_none: - case ExistingTextOption_prefix_space: - case ExistingTextOption_prefix_colon: - case ExistingTextOption_prefix_none: - ok_to_set = TRUE; - break; + year = str; + } + + if (day != NULL) { + if (sscanf (day, "%ld", &val) != 1 || val < 1 || val > 31) { + critical_error = TRUE; } + day_val = val; } - if (!ok_to_set) { - return FALSE; + + if (month != NULL) { + for (i = 0; legalMonths [i] != NULL; i++) { + if (StringCmp (month, legalMonths [i]) == 0) { + month_num = i + 1; + break; + } + } + if (legalMonths [i] == NULL) critical_error = TRUE; } - if (alp->names == NULL && IsStringConstraintEmpty (scp)) { - /* no prior values - just add new list */ - name_list = ReadNameListFromString (value); - if (name_list != NULL) { - ValNodeLink (&alp->names, name_list); - alp->choice = 1; - rval = TRUE; + if (year != NULL) { + ptr1 = year; + ch = *ptr1; + while (ch != '\0') { + if (! (IS_DIGIT (ch))) critical_error = TRUE; + ptr1++; + ch = *ptr1; } - } else { - switch (existing_text) { - case ExistingTextOption_append_semi: - case ExistingTextOption_append_comma: - name_list = ReadNameListFromString (value); - if (IsStringConstraintEmpty (scp)) { - /* append to list */ - ValNodeLink (&(alp->names), name_list); - rval = TRUE; - } else { - /* insert in list after first match */ - vnp = alp->names; - found = FALSE; - while (vnp != NULL && !found) { - tmp = GetAuthorString (vnp->data.ptrvalue); - if (tmp != NULL && DoesStringMatchConstraint (tmp, scp)) { - found = TRUE; - } - tmp = MemFree (tmp); - if (!found) { - vnp = vnp->next; - } - } - if (found) { - ValNodeLink (&name_list, vnp->next); - vnp->next = name_list; - rval = TRUE; - } - } - break; - case ExistingTextOption_prefix_semi: - case ExistingTextOption_prefix_comma: - name_list = ReadNameListFromString (value); - if (IsStringConstraintEmpty (scp)) { - /* prepend to list */ - ValNodeLink (&name_list, alp->names); - alp->names = name_list; - rval = TRUE; - } else { - /* insert in list before first match */ - vnp = alp->names; - vnp_prev = NULL; - found = FALSE; - while (vnp != NULL && !found) { - tmp = GetAuthorString (vnp->data.ptrvalue); - if (tmp != NULL && DoesStringMatchConstraint (tmp, scp)) { - found = TRUE; - } - tmp = MemFree (tmp); - if (!found) { - vnp_prev = vnp; - vnp = vnp->next; - } - } - if (found) { - if (vnp_prev == NULL) { - ValNodeLink (&name_list, alp->names); - alp->names = name_list; - } else { - ValNodeLink (&name_list, vnp_prev->next); - vnp_prev->next = name_list; - } - rval = TRUE; - } - } - break; - case ExistingTextOption_replace_old: - name_list = ReadNameListFromString (value); - if (IsStringConstraintEmpty (scp)) { - /* replace entire list */ - alp->names = FreeNameList (alp->choice, alp->names); - alp->names = name_list; - alp->choice = 1; - rval = TRUE; - } else { - /* replace first author that matches with new match, remove others that match */ - vnp = alp->names; - vnp_prev = NULL; - found = FALSE; - while (vnp != NULL) { - vnp_next = vnp->next; - tmp = GetAuthorString (vnp->data.ptrvalue); - if (tmp != NULL && DoesStringMatchConstraint (tmp, scp)) { - if (found) { - if (vnp_prev == NULL) { - alp->names = vnp->next; - } else { - vnp_prev->next = vnp->next; - } - } else { - vnp_tmp = name_list; - while (vnp_tmp->next != NULL) { - vnp_tmp = vnp_tmp->next; - } - ValNodeLink (&name_list, vnp->next); - if (vnp_prev == NULL) { - alp->names = name_list; - } else { - vnp_prev->next = name_list; - } - vnp_prev = vnp_tmp; - found = TRUE; - rval = TRUE; - } - vnp->next = NULL; - vnp = FreeNameList (alp->choice, vnp); - } else { - vnp_prev = vnp; - } - tmp = MemFree (tmp); - vnp = vnp_next; - } - } - break; - case ExistingTextOption_append_space: - case ExistingTextOption_append_colon: - case ExistingTextOption_append_none: - case ExistingTextOption_prefix_space: - case ExistingTextOption_prefix_colon: - case ExistingTextOption_prefix_none: - vnp_prev = NULL; - for (vnp = alp->names; vnp != NULL; vnp = vnp_next) { - vnp_next = vnp->next; - if (alp->choice == 1) { - tmp = GetAuthorString (vnp->data.ptrvalue); - if (tmp != NULL && DoesStringMatchConstraint (tmp, scp) - && SetStringValue (&tmp, value, existing_text)) { - name_list = ReadNameListFromString (tmp); - if (name_list != NULL) { - vnp_tmp = name_list; - while (vnp_tmp->next != NULL) { - vnp_tmp = vnp_tmp->next; - } - ValNodeLink (&name_list, vnp_next); - if (vnp_prev == NULL) { - alp->names = name_list; - } else { - vnp_prev->next = name_list; - } - vnp_prev = vnp_tmp; - vnp->next = NULL; - vnp = FreeNameList (alp->choice, vnp); - rval = TRUE; - name_list = NULL; - } else { - vnp_prev = vnp; - } - } else { - vnp_prev = vnp; - } - tmp = MemFree (tmp); - } else { - if (vnp->data.ptrvalue != NULL && DoesStringMatchConstraint (vnp->data.ptrvalue, scp)) { - tmp = (CharPtr) vnp->data.ptrvalue; - rval |= SetStringValue (&tmp, value, existing_text); - vnp->data.ptrvalue = tmp; - } - } - } - break; + if (sscanf (year, "%ld", &val) == 1) { + if (val < 1700 || val > 2100) critical_error = TRUE; + year_val = val - 1900; + } + else + { + critical_error = TRUE; } } - if (!rval && name_list != NULL) { - name_list = FreeNameList (1, vnp); + + str = MemFree (str); + + if (!critical_error) { + dp = DateNew(); + dp->data[0] = 1; + dp->data[1] = (Uint1) year_val; + dp->data[2] = month_num; + dp->data[3] = (Uint1) day_val; } - return rval; + return dp; } -static CharPtr GetPubFieldFromAffil (AffilPtr ap, Int4 field, StringConstraintPtr scp) +static CharPtr GetAuthorStringEx (AuthorPtr author, Boolean use_initials) { CharPtr str = NULL; + NameStdPtr n; + Int4 len; + Boolean has_middle = FALSE; - if (ap == NULL) return NULL; + if (author == NULL || author->name == NULL) return NULL; - switch (field) { - case Publication_field_affiliation: - if (!StringHasNoText (ap->affil) && DoesStringMatchConstraint (ap->affil, scp)) { - str = StringSave (ap->affil); - } - break; - case Publication_field_affil_div: - if (!StringHasNoText (ap->div) && DoesStringMatchConstraint (ap->div, scp)) { - str = StringSave (ap->div); - } - break; - case Publication_field_affil_city: - if (!StringHasNoText (ap->city) && DoesStringMatchConstraint (ap->city, scp)) { - str = StringSave (ap->city); - } - break; - case Publication_field_affil_sub: - if (!StringHasNoText (ap->sub) && DoesStringMatchConstraint (ap->sub, scp)) { - str = StringSave (ap->sub); - } - break; - case Publication_field_affil_country: - if (!StringHasNoText (ap->country) && DoesStringMatchConstraint (ap->country, scp)) { - str = StringSave (ap->country); - } - break; - case Publication_field_affil_street: - if (!StringHasNoText (ap->street) && DoesStringMatchConstraint (ap->street, scp)) { - str = StringSave (ap->street); - } - break; - case Publication_field_affil_email: - if (!StringHasNoText (ap->email) && DoesStringMatchConstraint (ap->email, scp)) { - str = StringSave (ap->email); - } - break; - case Publication_field_affil_fax: - if (!StringHasNoText (ap->fax) && DoesStringMatchConstraint (ap->fax, scp)) { - str = StringSave (ap->fax); - } + switch (author->name->choice) { + case 1: /* dbtag */ + str = GetDbtagString (author->name->data); break; - case Publication_field_affil_phone: - if (!StringHasNoText (ap->phone) && DoesStringMatchConstraint (ap->phone, scp)) { - str = StringSave (ap->phone); + case 2: /* name */ + n = (NameStdPtr) author->name->data; + if (n != NULL) { + if (use_initials) { + len = StringLen (n->names[0]) + StringLen (n->names[4]) + 2; + str = (CharPtr) MemNew (sizeof (Char) * (len)); + sprintf (str, "%s%s", StringHasNoText (n->names[4]) ? "" : n->names[4], + StringHasNoText (n->names[0]) ? "" : n->names[0]); + } else { + len = StringLen (n->names[1]) + StringLen (n->names[0]) + 2; + if (StringLen (n->names[4]) > 2) { + len += StringLen (n->names[4]) - 1; + has_middle = TRUE; + } + str = (CharPtr) MemNew (sizeof (Char) * (len)); + sprintf (str, "%s%s%s%s%s", + StringHasNoText (n->names[1]) ? "" : n->names[1], + StringHasNoText (n->names[1]) ? "" : " ", + has_middle ? n->names[4] + 2 : "", + has_middle ? " " : "", + StringHasNoText (n->names[0]) ? "" : n->names[0]); + } } break; - case Publication_field_affil_zipcode: - if (!StringHasNoText (ap->postal_code) && DoesStringMatchConstraint (ap->postal_code, scp)) { - str = StringSave (ap->postal_code); - } + case 3: /* ml */ + case 4: /* str */ + case 5: /* consortium */ + str = StringSave (author->name->data); break; } return str; } -static Boolean RemovePubFieldFromAffil (AffilPtr ap, Int4 field, StringConstraintPtr scp) +static CharPtr GetAuthorString (AuthorPtr author) { - Boolean rval = FALSE; - if (ap == NULL) return FALSE; - - switch (field) { - case Publication_field_affiliation: - if (!StringHasNoText (ap->affil) && DoesStringMatchConstraint (ap->affil, scp)) { - ap->affil = MemFree (ap->affil); - rval = TRUE; - } - break; - case Publication_field_affil_div: - if (!StringHasNoText (ap->div) && DoesStringMatchConstraint (ap->div, scp)) { - ap->div = MemFree (ap->div); - rval = TRUE; - } - break; - case Publication_field_affil_city: - if (!StringHasNoText (ap->city) && DoesStringMatchConstraint (ap->city, scp)) { - ap->city = MemFree (ap->city); - rval = TRUE; - } - break; - case Publication_field_affil_sub: - if (!StringHasNoText (ap->sub) && DoesStringMatchConstraint (ap->sub, scp)) { - ap->sub = MemFree (ap->sub); - rval = TRUE; - } - break; - case Publication_field_affil_country: - if (!StringHasNoText (ap->country) && DoesStringMatchConstraint (ap->country, scp)) { - ap->country = MemFree (ap->country); - rval = TRUE; - } - break; - case Publication_field_affil_street: - if (!StringHasNoText (ap->street) && DoesStringMatchConstraint (ap->street, scp)) { - ap->street = MemFree (ap->street); - rval = TRUE; - } - break; - case Publication_field_affil_email: - if (!StringHasNoText (ap->email) && DoesStringMatchConstraint (ap->email, scp)) { - ap->email = MemFree (ap->email); - rval = TRUE; - } - break; - case Publication_field_affil_fax: - if (!StringHasNoText (ap->fax) && DoesStringMatchConstraint (ap->fax, scp)) { - ap->fax = MemFree (ap->fax); - rval = TRUE; - } - break; - case Publication_field_affil_phone: - if (!StringHasNoText (ap->phone) && DoesStringMatchConstraint (ap->phone, scp)) { - ap->phone = MemFree (ap->phone); - rval = TRUE; - } - break; - case Publication_field_affil_zipcode: - if (!StringHasNoText (ap->postal_code) && DoesStringMatchConstraint (ap->postal_code, scp)) { - ap->postal_code = MemFree (ap->postal_code); - rval = TRUE; - } - break; - } - return rval; -} + return GetAuthorStringEx (author, FALSE); +} -static Boolean SetAffilPubField (AffilPtr ap, Int4 field, StringConstraintPtr scp, CharPtr value, Uint2 existing_text) +static CharPtr GetAuthorListStringEx (AuthListPtr alp, StringConstraintPtr scp, Boolean use_initials) { - Boolean rval = FALSE; - if (ap == NULL) return FALSE; + CharPtr str = NULL, tmp; + Int4 len = 0; + ValNodePtr list = NULL, vnp; - switch (field) { - case Publication_field_affiliation: - if (!StringHasNoText (ap->affil) && DoesStringMatchConstraint (ap->affil, scp)) { - rval = SetStringValue (&(ap->affil), value, existing_text); - } - break; - case Publication_field_affil_div: - if (!StringHasNoText (ap->div) && DoesStringMatchConstraint (ap->div, scp)) { - rval = SetStringValue (&(ap->div), value, existing_text); - } - break; - case Publication_field_affil_city: - if (!StringHasNoText (ap->city) && DoesStringMatchConstraint (ap->city, scp)) { - rval = SetStringValue (&(ap->city), value, existing_text); - } - break; - case Publication_field_affil_sub: - if (!StringHasNoText (ap->sub) && DoesStringMatchConstraint (ap->sub, scp)) { - rval = SetStringValue (&(ap->sub), value, existing_text); - } - break; - case Publication_field_affil_country: - if (!StringHasNoText (ap->country) && DoesStringMatchConstraint (ap->country, scp)) { - rval = SetStringValue (&(ap->country), value, existing_text); - } - break; - case Publication_field_affil_street: - if (!StringHasNoText (ap->street) && DoesStringMatchConstraint (ap->street, scp)) { - rval = SetStringValue (&(ap->street), value, existing_text); - } - break; - case Publication_field_affil_email: - if (!StringHasNoText (ap->email) && DoesStringMatchConstraint (ap->email, scp)) { - rval = SetStringValue (&(ap->email), value, existing_text); - } - break; - case Publication_field_affil_fax: - if (!StringHasNoText (ap->fax) && DoesStringMatchConstraint (ap->fax, scp)) { - rval = SetStringValue (&(ap->fax), value, existing_text); + if (alp == NULL) return NULL; + + switch (alp->choice) { + case 1: + for (vnp = alp->names; vnp != NULL; vnp = vnp->next) { + tmp = GetAuthorStringEx (vnp->data.ptrvalue, use_initials); + if (tmp != NULL) { + if (DoesStringMatchConstraint (tmp, scp)) { + ValNodeAddPointer (&list, 0, tmp); + len += StringLen (tmp) + 2; + } else { + tmp = MemFree (tmp); + } + } } break; - case Publication_field_affil_phone: - if (!StringHasNoText (ap->phone) && DoesStringMatchConstraint (ap->phone, scp)) { - rval = SetStringValue (&(ap->phone), value, existing_text); + case 2: + case 3: + for (vnp = alp->names; vnp != NULL; vnp = vnp->next) { + if (vnp->data.ptrvalue != NULL && DoesStringMatchConstraint (vnp->data.ptrvalue, scp)) { + ValNodeAddPointer (&list, 0, StringSave (vnp->data.ptrvalue)); + len += StringLen (vnp->data.ptrvalue) + 2; + } } break; - case Publication_field_affil_zipcode: - if (!StringHasNoText (ap->postal_code) && DoesStringMatchConstraint (ap->postal_code, scp)) { - rval = SetStringValue (&(ap->postal_code), value, existing_text); + } + + if (len > 0) { + str = (CharPtr) MemNew (sizeof (Char) * (len + 1)); + for (vnp = list; vnp != NULL; vnp = vnp->next) { + StringCat (str, vnp->data.ptrvalue); + if (vnp->next != NULL) { + StringCat (str, ", "); } - break; + } } - return rval; + return str; } -static CharPtr GetPubFieldFromImprint (ImprintPtr imprint, Int4 field, StringConstraintPtr scp) +static CharPtr GetAuthorListString (AuthListPtr alp, StringConstraintPtr scp) { - CharPtr str = NULL; - if (imprint == NULL) return NULL; - - switch (field) { - case Publication_field_volume: - if (!StringHasNoText (imprint->volume) && DoesStringMatchConstraint (imprint->volume, scp)) { - str = StringSave (imprint->volume); - } - break; - case Publication_field_issue: - if (!StringHasNoText (imprint->issue) && DoesStringMatchConstraint (imprint->issue, scp)) { - str = StringSave (imprint->issue); - } - break; - case Publication_field_pages: - if (!StringHasNoText (imprint->pages) && DoesStringMatchConstraint (imprint->pages, scp)) { - str = StringSave (imprint->pages); - } - break; - case Publication_field_date: - str = PrintDate (imprint->date); - if (StringHasNoText (str) || !DoesStringMatchConstraint (str, scp)) { - str = MemFree (str); - } - break; - } - return str; + return GetAuthorListStringEx (alp, scp, FALSE); } -static Boolean RemovePubDate (DatePtr PNTR pDate, StringConstraintPtr scp) +static Boolean RemoveAuthorListString (AuthListPtr alp, StringConstraintPtr scp) { - CharPtr str; + CharPtr tmp; Boolean rval = FALSE; + ValNodePtr vnp, vnp_next, vnp_prev = NULL; - if (pDate == NULL || *pDate == NULL) { - return FALSE; - } + if (alp == NULL) return FALSE; - str = PrintDate (*pDate); - if (!StringHasNoText (str) && DoesStringMatchConstraint (str, scp)) { - *pDate = DateFree (*pDate); - rval = TRUE; + switch (alp->choice) { + case 1: + for (vnp = alp->names; vnp != NULL; vnp = vnp_next) { + vnp_next = vnp->next; + tmp = GetAuthorString (vnp->data.ptrvalue); + if (tmp != NULL) { + if (DoesStringMatchConstraint (tmp, scp)) { + if (vnp_prev == NULL) { + alp->names = vnp->next; + } else { + vnp_prev->next = vnp->next; + } + vnp->next = NULL; + vnp->data.ptrvalue = AuthorFree (vnp->data.ptrvalue); + vnp = ValNodeFree (vnp); + rval = TRUE; + } else { + vnp_prev = vnp; + } + tmp = MemFree (tmp); + } else { + vnp_prev = vnp; + } + } + break; + case 2: + case 3: + for (vnp = alp->names; vnp != NULL; vnp = vnp_next) { + vnp_next = vnp->next; + if (vnp->data.ptrvalue != NULL && DoesStringMatchConstraint (vnp->data.ptrvalue, scp)) { + if (vnp_prev == NULL) { + alp->names = vnp->next; + } else { + vnp_prev->next = vnp->next; + } + vnp->next = NULL; + vnp = ValNodeFreeData (vnp); + rval = TRUE; + } else { + vnp_prev = vnp; + } + } + break; } - str = MemFree (str); + return rval; } -static Boolean SetPubDate (DatePtr PNTR pDate, StringConstraintPtr scp, CharPtr value, Uint2 existing_text) +static NameStdPtr ReadNameFromString (CharPtr str, CharPtr PNTR next_name) { - CharPtr tmp; - DatePtr dp = NULL; - Boolean rval = FALSE; + CharPtr cp_end, cp_space; + CharPtr p_repl1 = NULL, p_repl2 = NULL, p_repl3 = NULL; + Char ch_r1, ch_r2, ch_r3; + NameStdPtr n; - if (pDate == NULL) { - return FALSE; - } - tmp = PrintDate (*pDate); - if (DoesStringMatchConstraint (tmp, scp) - && SetStringValue (&tmp, value, existing_text)) { - dp = ReadDateFromString (tmp); - if (dp != NULL) { - *pDate = DateFree (*pDate); - *pDate = dp; - rval = TRUE; + if (StringHasNoText (str)) + { + if (next_name != NULL) + { + *next_name = NULL; } + return NULL; } - tmp = MemFree (tmp); - return rval; -} + /* skip over any leading spaces */ + str += StringSpn (str, " \t"); -static Boolean RemovePubFieldFromImprint (ImprintPtr imprint, Int4 field, StringConstraintPtr scp) -{ - Boolean rval = FALSE; - if (imprint == NULL) return FALSE; + /* skip over "and" if found */ + if (StringNCmp (str, "and ", 4) == 0) + { + str += 4; + } + if (StringHasNoText (str)) { + str = MemFree (str); + return NULL; + } - switch (field) { - case Publication_field_volume: - if (!StringHasNoText (imprint->volume) && DoesStringMatchConstraint (imprint->volume, scp)) { - imprint->volume = MemFree (imprint->volume); - rval = TRUE; - } - break; - case Publication_field_issue: - if (!StringHasNoText (imprint->issue) && DoesStringMatchConstraint (imprint->issue, scp)) { - imprint->issue = MemFree (imprint->issue); - rval = TRUE; + cp_end = StringChr (str, ','); + if (cp_end != NULL) + { + p_repl1 = cp_end; + ch_r1 = *p_repl1; + *cp_end = 0; + if (next_name != NULL) + { + if (StringHasNoText (cp_end + 1)) + { + *next_name = NULL; } - break; - case Publication_field_pages: - if (!StringHasNoText (imprint->pages) && DoesStringMatchConstraint (imprint->pages, scp)) { - imprint->pages = MemFree (imprint->pages); - rval = TRUE; + else + { + *next_name = cp_end + 1; } - break; - case Publication_field_date: - rval = RemovePubDate (&(imprint->date), scp); - break; + } + } + else if (next_name != NULL) + { + *next_name = NULL; } - return rval; -} - - -static Boolean SetPubFieldOnImprint (ImprintPtr imprint, Int4 field, StringConstraintPtr scp, CharPtr value, Uint2 existing_text) -{ - Boolean rval = FALSE; - if (imprint == NULL) return FALSE; + n = NameStdNew (); + /* look for elements in name */ + cp_space = StringRChr (str, ' '); + if (cp_space == NULL) + { + n->names[0] = StringSave (str); + } + else + { + n->names[0] = StringSave (cp_space + 1); + while (isspace (*cp_space)) + { + cp_space--; + } + p_repl2 = cp_space + 1; + ch_r2 = *p_repl2; + *(cp_space + 1) = 0; + cp_space = StringChr (str, ' '); + if (cp_space == NULL) + { + n->names[1] = StringSave (str); + n->names[4] = (CharPtr) MemNew (sizeof (Char) * 3); + sprintf (n->names[4], "%c.", *(n->names[1])); + } + else + { + p_repl3 = cp_space; + ch_r3 = *p_repl3; + *(cp_space) = 0; + n->names[1] = StringSave (str); - switch (field) { - case Publication_field_volume: - if (DoesStringMatchConstraint (imprint->volume, scp)) { - rval = SetStringValue (&(imprint->volume), value, existing_text); - } - break; - case Publication_field_issue: - if (!StringHasNoText (imprint->issue) && DoesStringMatchConstraint (imprint->issue, scp)) { - rval = SetStringValue (&(imprint->issue), value, existing_text); - } - break; - case Publication_field_pages: - if (!StringHasNoText (imprint->pages) && DoesStringMatchConstraint (imprint->pages, scp)) { - rval = SetStringValue (&(imprint->pages), value, existing_text); + cp_space++; + while (isspace (*cp_space)) + { + cp_space++; } - break; - case Publication_field_date: - rval = SetPubDate (&(imprint->date), scp, value, existing_text); - break; + + n->names[4] = (CharPtr) MemNew (sizeof (Char) * (4 + StringLen (cp_space))); + sprintf (n->names[4], "%c.%s.", *(n->names[1]), cp_space); + } } - return rval; -} - -static void SetValNodeChoices (ValNodePtr list, Uint1 new_choice) -{ - while (list != NULL) { - list->choice = new_choice; - list = list->next; + if (p_repl1 != NULL) { + *p_repl1 = ch_r1; } -} - - -static CharPtr GetPubFieldFromCitJour (CitJourPtr cjp, Int4 field, StringConstraintPtr scp) -{ - CharPtr str = NULL; - if (cjp == NULL) return NULL; - - switch (field) { - case Publication_field_journal: - str = GetFirstValNodeStringMatch (cjp->title, scp); - break; - case Publication_field_volume: - case Publication_field_issue: - case Publication_field_pages: - case Publication_field_date: - str = GetPubFieldFromImprint (cjp->imp, field, scp); - break; + if (p_repl2 != NULL) { + *p_repl2 = ch_r2; } - - return str; -} - - -static Boolean RemovePubFieldFromCitJour (CitJourPtr cjp, Int4 field, StringConstraintPtr scp) -{ - Boolean rval = FALSE; - if (cjp == NULL) return FALSE; - - switch (field) { - case Publication_field_journal: - rval = RemoveValNodeStringMatch (&(cjp->title), scp); - break; - case Publication_field_volume: - case Publication_field_issue: - case Publication_field_pages: - case Publication_field_date: - rval = RemovePubFieldFromImprint (cjp->imp, field, scp); - break; + if (p_repl3 != NULL) { + *p_repl3 = ch_r3; } - - return rval; + + return n; } -static Boolean SetPubFieldOnCitJour (CitJourPtr cjp, Int4 field, StringConstraintPtr scp, CharPtr value, Uint2 existing_text) +static ValNodePtr ReadNameListFromString (CharPtr value) { - Boolean rval = FALSE; - if (cjp == NULL) return FALSE; + ValNodePtr names = NULL; + AuthorPtr ap; + NameStdPtr n; + CharPtr next_cp, cp; - switch (field) { - case Publication_field_journal: - rval = SetStringsInValNodeStringList (&(cjp->title), scp, value, existing_text); - SetValNodeChoices (cjp->title, 1); - break; - case Publication_field_volume: - case Publication_field_issue: - case Publication_field_pages: - case Publication_field_date: - rval = SetPubFieldOnImprint (cjp->imp, field, scp, value, existing_text); - break; + cp = value; + next_cp = NULL; + while (cp != NULL) { + n = ReadNameFromString (cp, &next_cp); + if (n != NULL) { + ap = AuthorNew (); + ap->name = PersonIdNew (); + ap->name->choice = 2; + ap->name->data = n; + ValNodeAddPointer (&names, 1, ap); + } + cp = next_cp; } - - return rval; + return names; } -static CharPtr GetPubFieldFromCitBook (CitBookPtr cbp, Int4 field, StringConstraintPtr scp) +static ValNodePtr FreeNameList (Uint1 choice, ValNodePtr name_list) { - CharPtr str = NULL; + ValNodePtr curr, next; - if (cbp == NULL) return NULL; + curr = name_list; + while (curr != NULL) { + if (choice == 1) /* std type */ + AuthorFree((AuthorPtr) curr->data.ptrvalue); + else /* ml or str */ + MemFree(curr->data.ptrvalue); - switch (field) { - case Publication_field_title: - str = GetFirstValNodeStringMatch (cbp->title, scp); - break; - case Publication_field_authors: - str = GetAuthorListString (cbp->authors, scp); - break; - case Publication_field_authors_initials: - str = GetAuthorListStringEx (cbp->authors, scp, TRUE); - break; - case Publication_field_affiliation: - case Publication_field_affil_div: - case Publication_field_affil_city: - case Publication_field_affil_sub: - case Publication_field_affil_country: - case Publication_field_affil_street: - case Publication_field_affil_email: - case Publication_field_affil_fax: - case Publication_field_affil_phone: - case Publication_field_affil_zipcode: - if (cbp->authors != NULL) { - str = GetPubFieldFromAffil (cbp->authors->affil, field, scp); - } - break; - case Publication_field_volume: - case Publication_field_issue: - case Publication_field_pages: - case Publication_field_date: - str = GetPubFieldFromImprint (cbp->imp, field, scp); - break; + next = curr->next; + MemFree(curr); + curr = next; } - - return str; + return curr; } -static Boolean RemovePubFieldFromCitBook (CitBookPtr cbp, Int4 field, StringConstraintPtr scp) -{ - Boolean rval = FALSE; - if (cbp == NULL) return FALSE; - switch (field) { - case Publication_field_title: - rval = RemoveValNodeStringMatch (&(cbp->title), scp); - break; - case Publication_field_authors: - rval = RemoveAuthorListString (cbp->authors, scp); - break; - case Publication_field_affiliation: - case Publication_field_affil_div: - case Publication_field_affil_city: - case Publication_field_affil_sub: - case Publication_field_affil_country: - case Publication_field_affil_street: - case Publication_field_affil_email: - case Publication_field_affil_fax: - case Publication_field_affil_phone: - case Publication_field_affil_zipcode: - if (cbp->authors != NULL) { - rval = RemovePubFieldFromAffil(cbp->authors->affil, field, scp); - } - break; - case Publication_field_volume: - case Publication_field_issue: - case Publication_field_pages: - case Publication_field_date: - rval = RemovePubFieldFromImprint (cbp->imp, field, scp); - break; +static Boolean SetAuthorListFromString (AuthListPtr alp, StringConstraintPtr scp, CharPtr value, Uint2 existing_text) +{ + ValNodePtr name_list = NULL, vnp, vnp_prev, vnp_next, vnp_tmp; + CharPtr tmp; + Boolean rval = FALSE, found, ok_to_set = FALSE; + + if (alp == NULL || StringHasNoText (value)) return FALSE; + + /* can only combine lists if existing list is same type */ + if (alp->names == NULL || alp->choice == 1) { + ok_to_set = TRUE; + } else { + switch (existing_text) { + case ExistingTextOption_replace_old: + if (IsStringConstraintEmpty (scp)) { + ok_to_set = TRUE; + } + break; + case ExistingTextOption_append_space: + case ExistingTextOption_append_colon: + case ExistingTextOption_append_none: + case ExistingTextOption_prefix_space: + case ExistingTextOption_prefix_colon: + case ExistingTextOption_prefix_none: + ok_to_set = TRUE; + break; + } + } + if (!ok_to_set) { + return FALSE; } + if (alp->names == NULL && IsStringConstraintEmpty (scp)) { + /* no prior values - just add new list */ + name_list = ReadNameListFromString (value); + if (name_list != NULL) { + ValNodeLink (&alp->names, name_list); + alp->choice = 1; + rval = TRUE; + } + } else { + switch (existing_text) { + case ExistingTextOption_append_semi: + case ExistingTextOption_append_comma: + name_list = ReadNameListFromString (value); + if (IsStringConstraintEmpty (scp)) { + /* append to list */ + ValNodeLink (&(alp->names), name_list); + rval = TRUE; + } else { + /* insert in list after first match */ + vnp = alp->names; + found = FALSE; + while (vnp != NULL && !found) { + tmp = GetAuthorString (vnp->data.ptrvalue); + if (tmp != NULL && DoesStringMatchConstraint (tmp, scp)) { + found = TRUE; + } + tmp = MemFree (tmp); + if (!found) { + vnp = vnp->next; + } + } + if (found) { + ValNodeLink (&name_list, vnp->next); + vnp->next = name_list; + rval = TRUE; + } + } + break; + case ExistingTextOption_prefix_semi: + case ExistingTextOption_prefix_comma: + name_list = ReadNameListFromString (value); + if (IsStringConstraintEmpty (scp)) { + /* prepend to list */ + ValNodeLink (&name_list, alp->names); + alp->names = name_list; + rval = TRUE; + } else { + /* insert in list before first match */ + vnp = alp->names; + vnp_prev = NULL; + found = FALSE; + while (vnp != NULL && !found) { + tmp = GetAuthorString (vnp->data.ptrvalue); + if (tmp != NULL && DoesStringMatchConstraint (tmp, scp)) { + found = TRUE; + } + tmp = MemFree (tmp); + if (!found) { + vnp_prev = vnp; + vnp = vnp->next; + } + } + if (found) { + if (vnp_prev == NULL) { + ValNodeLink (&name_list, alp->names); + alp->names = name_list; + } else { + ValNodeLink (&name_list, vnp_prev->next); + vnp_prev->next = name_list; + } + rval = TRUE; + } + } + break; + case ExistingTextOption_replace_old: + name_list = ReadNameListFromString (value); + if (IsStringConstraintEmpty (scp)) { + /* replace entire list */ + alp->names = FreeNameList (alp->choice, alp->names); + alp->names = name_list; + alp->choice = 1; + rval = TRUE; + } else { + /* replace first author that matches with new match, remove others that match */ + vnp = alp->names; + vnp_prev = NULL; + found = FALSE; + while (vnp != NULL) { + vnp_next = vnp->next; + tmp = GetAuthorString (vnp->data.ptrvalue); + if (tmp != NULL && DoesStringMatchConstraint (tmp, scp)) { + if (found) { + if (vnp_prev == NULL) { + alp->names = vnp->next; + } else { + vnp_prev->next = vnp->next; + } + } else { + vnp_tmp = name_list; + while (vnp_tmp->next != NULL) { + vnp_tmp = vnp_tmp->next; + } + ValNodeLink (&name_list, vnp->next); + if (vnp_prev == NULL) { + alp->names = name_list; + } else { + vnp_prev->next = name_list; + } + vnp_prev = vnp_tmp; + found = TRUE; + rval = TRUE; + } + vnp->next = NULL; + vnp = FreeNameList (alp->choice, vnp); + } else { + vnp_prev = vnp; + } + tmp = MemFree (tmp); + vnp = vnp_next; + } + } + break; + case ExistingTextOption_append_space: + case ExistingTextOption_append_colon: + case ExistingTextOption_append_none: + case ExistingTextOption_prefix_space: + case ExistingTextOption_prefix_colon: + case ExistingTextOption_prefix_none: + vnp_prev = NULL; + for (vnp = alp->names; vnp != NULL; vnp = vnp_next) { + vnp_next = vnp->next; + if (alp->choice == 1) { + tmp = GetAuthorString (vnp->data.ptrvalue); + if (tmp != NULL && DoesStringMatchConstraint (tmp, scp) + && SetStringValue (&tmp, value, existing_text)) { + name_list = ReadNameListFromString (tmp); + if (name_list != NULL) { + vnp_tmp = name_list; + while (vnp_tmp->next != NULL) { + vnp_tmp = vnp_tmp->next; + } + ValNodeLink (&name_list, vnp_next); + if (vnp_prev == NULL) { + alp->names = name_list; + } else { + vnp_prev->next = name_list; + } + vnp_prev = vnp_tmp; + vnp->next = NULL; + vnp = FreeNameList (alp->choice, vnp); + rval = TRUE; + name_list = NULL; + } else { + vnp_prev = vnp; + } + } else { + vnp_prev = vnp; + } + tmp = MemFree (tmp); + } else { + if (vnp->data.ptrvalue != NULL && DoesStringMatchConstraint (vnp->data.ptrvalue, scp)) { + tmp = (CharPtr) vnp->data.ptrvalue; + rval |= SetStringValue (&tmp, value, existing_text); + vnp->data.ptrvalue = tmp; + } + } + } + break; + } + } + if (!rval && name_list != NULL) { + name_list = FreeNameList (1, vnp); + } return rval; } -static Boolean SetPubFieldOnCitBook (CitBookPtr cbp, Int4 field, StringConstraintPtr scp, CharPtr value, Uint2 existing_text) +static CharPtr GetPubFieldFromAffil (AffilPtr ap, Int4 field, StringConstraintPtr scp) { - Boolean rval = FALSE; + CharPtr str = NULL; - if (cbp == NULL) return FALSE; + if (ap == NULL) return NULL; switch (field) { - case Publication_field_title: - rval = SetStringsInValNodeStringList (&(cbp->title), scp, value, existing_text); - SetValNodeChoices (cbp->title, 1); - break; - case Publication_field_authors: - rval = SetAuthorListFromString (cbp->authors, scp, value, existing_text); - break; case Publication_field_affiliation: + if (!StringHasNoText (ap->affil) && DoesStringMatchConstraint (ap->affil, scp)) { + str = StringSave (ap->affil); + } + break; case Publication_field_affil_div: + if (!StringHasNoText (ap->div) && DoesStringMatchConstraint (ap->div, scp)) { + str = StringSave (ap->div); + } + break; case Publication_field_affil_city: + if (!StringHasNoText (ap->city) && DoesStringMatchConstraint (ap->city, scp)) { + str = StringSave (ap->city); + } + break; case Publication_field_affil_sub: + if (!StringHasNoText (ap->sub) && DoesStringMatchConstraint (ap->sub, scp)) { + str = StringSave (ap->sub); + } + break; case Publication_field_affil_country: + if (!StringHasNoText (ap->country) && DoesStringMatchConstraint (ap->country, scp)) { + str = StringSave (ap->country); + } + break; case Publication_field_affil_street: + if (!StringHasNoText (ap->street) && DoesStringMatchConstraint (ap->street, scp)) { + str = StringSave (ap->street); + } + break; case Publication_field_affil_email: + if (!StringHasNoText (ap->email) && DoesStringMatchConstraint (ap->email, scp)) { + str = StringSave (ap->email); + } + break; case Publication_field_affil_fax: + if (!StringHasNoText (ap->fax) && DoesStringMatchConstraint (ap->fax, scp)) { + str = StringSave (ap->fax); + } + break; case Publication_field_affil_phone: - case Publication_field_affil_zipcode: - if (cbp->authors != NULL) { - rval = SetAffilPubField (cbp->authors->affil, field, scp, value, existing_text); + if (!StringHasNoText (ap->phone) && DoesStringMatchConstraint (ap->phone, scp)) { + str = StringSave (ap->phone); } break; - case Publication_field_volume: - case Publication_field_issue: - case Publication_field_pages: - case Publication_field_date: - rval = SetPubFieldOnImprint (cbp->imp, field, scp, value, existing_text); + case Publication_field_affil_zipcode: + if (!StringHasNoText (ap->postal_code) && DoesStringMatchConstraint (ap->postal_code, scp)) { + str = StringSave (ap->postal_code); + } break; } - - return rval; + return str; } -NLM_EXTERN CharPtr GetPubFieldFromPub (PubPtr the_pub, Int4 field, StringConstraintPtr scp) +static Boolean RemovePubFieldFromAffil (AffilPtr ap, Int4 field, StringConstraintPtr scp) { - CitGenPtr cgp; - CitArtPtr cap; - CitBookPtr cbp; - CitPatPtr cpp; - CitSubPtr csp; - CitJourPtr cjp; - CharPtr str = NULL; + Boolean rval = FALSE; + if (ap == NULL) return FALSE; - if (the_pub == NULL || the_pub->data.ptrvalue == NULL) return NULL; - - switch (the_pub->choice) { - case PUB_Gen : - cgp = (CitGenPtr) the_pub->data.ptrvalue; - switch (field) { - case Publication_field_cit: - if (!StringHasNoText (cgp->cit) && DoesStringMatchConstraint (cgp->title, scp)) { - str = StringSave (cgp->cit); - } - break; - case Publication_field_authors: - str = GetAuthorListString (cgp->authors, scp); - break; - case Publication_field_authors_initials: - str = GetAuthorListStringEx (cgp->authors, scp, TRUE); - break; - case Publication_field_affiliation: - case Publication_field_affil_div: - case Publication_field_affil_city: - case Publication_field_affil_sub: - case Publication_field_affil_country: - case Publication_field_affil_street: - case Publication_field_affil_email: - case Publication_field_affil_fax: - case Publication_field_affil_phone: - case Publication_field_affil_zipcode: - if (cgp->authors != NULL) { - str = GetPubFieldFromAffil (cgp->authors->affil, field, scp); - } - break; - case Publication_field_journal: - str = GetFirstValNodeStringMatch (cgp->journal, scp); - break; - case Publication_field_volume: - if (!StringHasNoText (cgp->volume) && DoesStringMatchConstraint (cgp->volume, scp)) { - str = StringSave (cgp->volume); - } - break; - case Publication_field_issue: - if (!StringHasNoText (cgp->issue) && DoesStringMatchConstraint (cgp->issue, scp)) { - str = StringSave (cgp->issue); - } - break; - case Publication_field_pages: - if (!StringHasNoText (cgp->pages) && DoesStringMatchConstraint (cgp->pages, scp)) { - str = StringSave (cgp->pages); - } - break; - case Publication_field_date: - if (cgp->date != NULL) { - str = PrintDate (cgp->date); - if (StringHasNoText (str) || !DoesStringMatchConstraint (str, scp)) { - str = MemFree (str); - } - } - break; - case Publication_field_serial_number: - str = GetInt2ValueFromString (cgp->serial_number, scp); - break; - case Publication_field_title: - if (!StringHasNoText (cgp->title) && DoesStringMatchConstraint (cgp->title, scp)) { - str = StringSave (cgp->title); - } - break; + switch (field) { + case Publication_field_affiliation: + if (!StringHasNoText (ap->affil) && DoesStringMatchConstraint (ap->affil, scp)) { + ap->affil = MemFree (ap->affil); + rval = TRUE; } break; - case PUB_Sub : - csp = (CitSubPtr) the_pub->data.ptrvalue; - switch (field) { - case Publication_field_title: - if (!StringHasNoText (csp->descr) && DoesStringMatchConstraint (csp->descr, scp)) { - str = StringSave (csp->descr); - } - break; - case Publication_field_authors: - str = GetAuthorListString (csp->authors, scp); - break; - case Publication_field_authors_initials: - str = GetAuthorListStringEx (csp->authors, scp, TRUE); - break; - case Publication_field_affiliation: - case Publication_field_affil_div: - case Publication_field_affil_city: - case Publication_field_affil_sub: - case Publication_field_affil_country: - case Publication_field_affil_street: - case Publication_field_affil_email: - case Publication_field_affil_fax: - case Publication_field_affil_phone: - case Publication_field_affil_zipcode: - if (csp->authors != NULL) { - str = GetPubFieldFromAffil (csp->authors->affil, field, scp); - } - break; - case Publication_field_date: - str = PrintDate (csp->date); - if (StringHasNoText (str) || !DoesStringMatchConstraint (str, scp)) { - str = MemFree (str); - } - break; + case Publication_field_affil_div: + if (!StringHasNoText (ap->div) && DoesStringMatchConstraint (ap->div, scp)) { + ap->div = MemFree (ap->div); + rval = TRUE; } break; - case PUB_Article : - cap = (CitArtPtr) the_pub->data.ptrvalue; - switch (field) { - case Publication_field_title: - str = GetFirstValNodeStringMatch (cap->title, scp); - break; - case Publication_field_authors: - str = GetAuthorListString (cap->authors, scp); - break; - case Publication_field_authors_initials: - str = GetAuthorListStringEx (cap->authors, scp, TRUE); - break; - case Publication_field_affiliation: - case Publication_field_affil_div: - case Publication_field_affil_city: - case Publication_field_affil_sub: - case Publication_field_affil_country: - case Publication_field_affil_street: - case Publication_field_affil_email: - case Publication_field_affil_fax: - case Publication_field_affil_phone: - case Publication_field_affil_zipcode: - if (cap->authors != NULL) { - str = GetPubFieldFromAffil (cap->authors->affil, field, scp); - } - break; - default: - if (cap->from == 1) { - str = GetPubFieldFromCitJour (cap->fromptr, field, scp); - } else if (cap->from == 2) { - str = GetPubFieldFromCitBook (cap->fromptr, field, scp); - } - break; + case Publication_field_affil_city: + if (!StringHasNoText (ap->city) && DoesStringMatchConstraint (ap->city, scp)) { + ap->city = MemFree (ap->city); + rval = TRUE; } break; - case PUB_Journal: - cjp = (CitJourPtr) the_pub->data.ptrvalue; - str = GetPubFieldFromCitJour (cjp, field, scp); + case Publication_field_affil_sub: + if (!StringHasNoText (ap->sub) && DoesStringMatchConstraint (ap->sub, scp)) { + ap->sub = MemFree (ap->sub); + rval = TRUE; + } break; - case PUB_Book : - case PUB_Man : - cbp = (CitBookPtr) the_pub->data.ptrvalue; - str = GetPubFieldFromCitBook (cbp, field, scp); + case Publication_field_affil_country: + if (!StringHasNoText (ap->country) && DoesStringMatchConstraint (ap->country, scp)) { + ap->country = MemFree (ap->country); + rval = TRUE; + } break; - case PUB_Patent : - cpp = (CitPatPtr) the_pub->data.ptrvalue; - switch (field) { - case Publication_field_title: - if (!StringHasNoText (cpp->title) && DoesStringMatchConstraint (cpp->title, scp)) { - str = StringSave (cpp->title); - } - break; - case Publication_field_authors: - str = GetAuthorListString (cpp->authors, scp); - break; - case Publication_field_authors_initials: - str = GetAuthorListStringEx (cpp->authors, scp, TRUE); - break; - case Publication_field_affiliation: - case Publication_field_affil_div: - case Publication_field_affil_city: - case Publication_field_affil_sub: - case Publication_field_affil_country: - case Publication_field_affil_street: - case Publication_field_affil_email: - case Publication_field_affil_fax: - case Publication_field_affil_phone: - case Publication_field_affil_zipcode: - if (cpp->authors != NULL) { - str = GetPubFieldFromAffil (cpp->authors->affil, field, scp); - } - break; + case Publication_field_affil_street: + if (!StringHasNoText (ap->street) && DoesStringMatchConstraint (ap->street, scp)) { + ap->street = MemFree (ap->street); + rval = TRUE; } break; - default : + case Publication_field_affil_email: + if (!StringHasNoText (ap->email) && DoesStringMatchConstraint (ap->email, scp)) { + ap->email = MemFree (ap->email); + rval = TRUE; + } break; - } - return str; -} - - -static Boolean RemovePubFieldFromPub (PubPtr the_pub, Int4 field, StringConstraintPtr scp) -{ - CitGenPtr cgp; - CitArtPtr cap; - CitBookPtr cbp; - CitPatPtr cpp; - CitSubPtr csp; - Boolean rval = FALSE; - Char num[15]; + case Publication_field_affil_fax: + if (!StringHasNoText (ap->fax) && DoesStringMatchConstraint (ap->fax, scp)) { + ap->fax = MemFree (ap->fax); + rval = TRUE; + } + break; + case Publication_field_affil_phone: + if (!StringHasNoText (ap->phone) && DoesStringMatchConstraint (ap->phone, scp)) { + ap->phone = MemFree (ap->phone); + rval = TRUE; + } + break; + case Publication_field_affil_zipcode: + if (!StringHasNoText (ap->postal_code) && DoesStringMatchConstraint (ap->postal_code, scp)) { + ap->postal_code = MemFree (ap->postal_code); + rval = TRUE; + } + break; + } + return rval; +} - if (the_pub == NULL) return FALSE; - - switch (the_pub->choice) { - case PUB_Gen : - cgp = (CitGenPtr) the_pub->data.ptrvalue; - switch (field) { - case Publication_field_cit: - if (!StringHasNoText (cgp->cit) && DoesStringMatchConstraint (cgp->title, scp)) { - cgp->cit = MemFree (cgp->cit); - rval = TRUE; - } - break; - case Publication_field_authors: - rval = RemoveAuthorListString (cgp->authors, scp); - break; - case Publication_field_affiliation: - case Publication_field_affil_div: - case Publication_field_affil_city: - case Publication_field_affil_sub: - case Publication_field_affil_country: - case Publication_field_affil_street: - case Publication_field_affil_email: - case Publication_field_affil_fax: - case Publication_field_affil_phone: - case Publication_field_affil_zipcode: - if (cgp->authors != NULL) { - rval = RemovePubFieldFromAffil(cgp->authors->affil, field, scp); - } - break; - case Publication_field_journal: - rval = RemoveValNodeStringMatch (&(cgp->journal), scp); - break; - case Publication_field_volume: - if (!StringHasNoText (cgp->volume) && DoesStringMatchConstraint (cgp->volume, scp)) { - cgp->volume = MemFree (cgp->volume); - rval = TRUE; - } - break; - case Publication_field_issue: - if (!StringHasNoText (cgp->issue) && DoesStringMatchConstraint (cgp->issue, scp)) { - cgp->issue = MemFree (cgp->issue); - rval = TRUE; - } - break; - case Publication_field_pages: - if (!StringHasNoText (cgp->pages) && DoesStringMatchConstraint (cgp->pages, scp)) { - cgp->pages = MemFree (cgp->pages); - rval = TRUE; - } - break; - case Publication_field_date: - rval = RemovePubDate (&(cgp->date), scp); - break; - case Publication_field_serial_number: - if (cgp->serial_number > 0) { - sprintf (num, "%d", cgp->serial_number); - if (DoesStringMatchConstraint (num, scp)) { - cgp->serial_number = 0; - rval = TRUE; - } - } - break; - case Publication_field_title: - if (!StringHasNoText (cgp->title) && DoesStringMatchConstraint (cgp->title, scp)) { - cgp->title = MemFree (cgp->title); - rval = TRUE; - } - break; + +static Boolean SetAffilPubField (AffilPtr ap, Int4 field, StringConstraintPtr scp, CharPtr value, Uint2 existing_text) +{ + Boolean rval = FALSE; + if (ap == NULL) return FALSE; + + switch (field) { + case Publication_field_affiliation: + if (!StringHasNoText (ap->affil) && DoesStringMatchConstraint (ap->affil, scp)) { + rval = SetStringValue (&(ap->affil), value, existing_text); } break; - case PUB_Sub : - csp = (CitSubPtr) the_pub->data.ptrvalue; - switch (field) { - case Publication_field_title: - if (!StringHasNoText (csp->descr) && DoesStringMatchConstraint (csp->descr, scp)) { - csp->descr = MemFree (csp->descr); - rval = TRUE; - } - break; - case Publication_field_authors: - rval = RemoveAuthorListString (csp->authors, scp); - break; - case Publication_field_affiliation: - case Publication_field_affil_div: - case Publication_field_affil_city: - case Publication_field_affil_sub: - case Publication_field_affil_country: - case Publication_field_affil_street: - case Publication_field_affil_email: - case Publication_field_affil_fax: - case Publication_field_affil_phone: - case Publication_field_affil_zipcode: - if (csp->authors != NULL) { - rval = RemovePubFieldFromAffil(csp->authors->affil, field, scp); - } - break; - case Publication_field_date: - rval = RemovePubDate (&(csp->date), scp); - break; + case Publication_field_affil_div: + if (!StringHasNoText (ap->div) && DoesStringMatchConstraint (ap->div, scp)) { + rval = SetStringValue (&(ap->div), value, existing_text); } break; - case PUB_Article : - cap = (CitArtPtr) the_pub->data.ptrvalue; - switch (field) { - case Publication_field_title: - rval = RemoveValNodeStringMatch (&(cap->title), scp); - break; - case Publication_field_authors: - rval = RemoveAuthorListString (cap->authors, scp); - break; - case Publication_field_affiliation: - case Publication_field_affil_div: - case Publication_field_affil_city: - case Publication_field_affil_sub: - case Publication_field_affil_country: - case Publication_field_affil_street: - case Publication_field_affil_email: - case Publication_field_affil_fax: - case Publication_field_affil_phone: - case Publication_field_affil_zipcode: - if (cap->authors != NULL) { - rval = RemovePubFieldFromAffil(cap->authors->affil, field, scp); - } - break; - default: - if (cap->from == 1) { - rval = RemovePubFieldFromCitJour (cap->fromptr, field, scp); - } else if (cap->from == 2) { - rval = RemovePubFieldFromCitBook (cap->fromptr, field, scp); - } - break; + case Publication_field_affil_city: + if (!StringHasNoText (ap->city) && DoesStringMatchConstraint (ap->city, scp)) { + rval = SetStringValue (&(ap->city), value, existing_text); } break; - case PUB_Journal: - rval = RemovePubFieldFromCitJour (the_pub->data.ptrvalue, field, scp); + case Publication_field_affil_sub: + if (!StringHasNoText (ap->sub) && DoesStringMatchConstraint (ap->sub, scp)) { + rval = SetStringValue (&(ap->sub), value, existing_text); + } break; - case PUB_Book : - case PUB_Man : - cbp = (CitBookPtr) the_pub->data.ptrvalue; - rval = RemovePubFieldFromCitBook (cbp, field, scp); + case Publication_field_affil_country: + if (!StringHasNoText (ap->country) && DoesStringMatchConstraint (ap->country, scp)) { + rval = SetStringValue (&(ap->country), value, existing_text); + } break; - case PUB_Patent : - cpp = (CitPatPtr) the_pub->data.ptrvalue; - switch (field) { - case Publication_field_title: - if (!StringHasNoText (cpp->title) && DoesStringMatchConstraint (cpp->title, scp)) { - cpp->title = MemFree (cpp->title); - rval = TRUE; - } - break; - case Publication_field_authors: - rval = RemoveAuthorListString (cpp->authors, scp); - break; - case Publication_field_affiliation: - case Publication_field_affil_div: - case Publication_field_affil_city: - case Publication_field_affil_sub: - case Publication_field_affil_country: - case Publication_field_affil_street: - case Publication_field_affil_email: - case Publication_field_affil_fax: - case Publication_field_affil_phone: - case Publication_field_affil_zipcode: - if (cpp->authors != NULL) { - rval = RemovePubFieldFromAffil(cpp->authors->affil, field, scp); - } - break; + case Publication_field_affil_street: + if (!StringHasNoText (ap->street) && DoesStringMatchConstraint (ap->street, scp)) { + rval = SetStringValue (&(ap->street), value, existing_text); } break; - default : + case Publication_field_affil_email: + if (!StringHasNoText (ap->email) && DoesStringMatchConstraint (ap->email, scp)) { + rval = SetStringValue (&(ap->email), value, existing_text); + } break; - } + case Publication_field_affil_fax: + if (!StringHasNoText (ap->fax) && DoesStringMatchConstraint (ap->fax, scp)) { + rval = SetStringValue (&(ap->fax), value, existing_text); + } + break; + case Publication_field_affil_phone: + if (!StringHasNoText (ap->phone) && DoesStringMatchConstraint (ap->phone, scp)) { + rval = SetStringValue (&(ap->phone), value, existing_text); + } + break; + case Publication_field_affil_zipcode: + if (!StringHasNoText (ap->postal_code) && DoesStringMatchConstraint (ap->postal_code, scp)) { + rval = SetStringValue (&(ap->postal_code), value, existing_text); + } + break; + } return rval; } -static Boolean SetPubFieldOnPub (PubPtr the_pub, Int4 field, StringConstraintPtr scp, CharPtr value, Uint2 existing_text) +static CharPtr GetPubFieldFromImprint (ImprintPtr imprint, Int4 field, StringConstraintPtr scp) { - CitGenPtr cgp; - CitArtPtr cap; - CitBookPtr cbp; - CitPatPtr cpp; - CitSubPtr csp; - Boolean rval = FALSE; + CharPtr str = NULL; + if (imprint == NULL) return NULL; - if (the_pub == NULL || value == NULL) return FALSE; - - switch (the_pub->choice) { - case PUB_Gen : - cgp = (CitGenPtr) the_pub->data.ptrvalue; - switch (field) { - case Publication_field_cit: - if (DoesStringMatchConstraint (cgp->cit, scp)) { - rval = SetStringValue ( &(cgp->cit), value, existing_text); - } - break; - case Publication_field_authors: - rval = SetAuthorListFromString (cgp->authors, scp, value, existing_text); - break; - case Publication_field_affiliation: - case Publication_field_affil_div: - case Publication_field_affil_city: - case Publication_field_affil_sub: - case Publication_field_affil_country: - case Publication_field_affil_street: - case Publication_field_affil_email: - case Publication_field_affil_fax: - case Publication_field_affil_phone: - case Publication_field_affil_zipcode: - if (cgp->authors != NULL) { - rval = SetAffilPubField (cgp->authors->affil, field, scp, value, existing_text); - } - break; - case Publication_field_journal: - rval = SetStringsInValNodeStringList (&(cgp->journal), scp, value, existing_text); - SetValNodeChoices (cgp->journal, 1); - break; - case Publication_field_volume: - if (DoesStringMatchConstraint (cgp->volume, scp)) { - rval = SetStringValue ( &(cgp->volume), value, existing_text); - } - break; - case Publication_field_issue: - if (DoesStringMatchConstraint (cgp->issue, scp)) { - rval = SetStringValue ( &(cgp->issue), value, existing_text); - } - break; - case Publication_field_pages: - if (DoesStringMatchConstraint (cgp->pages, scp)) { - rval = SetStringValue ( &(cgp->pages), value, existing_text); - } - break; - case Publication_field_date: - rval = SetPubDate (&(cgp->date), scp, value, existing_text); - break; - case Publication_field_serial_number: - rval = SetInt2ValueWithString (&(cgp->serial_number), value, existing_text); - break; - case Publication_field_title: - if (DoesStringMatchConstraint(cgp->title, scp)) { - rval = SetStringValue ( &(cgp->title), value, existing_text); - } - break; + switch (field) { + case Publication_field_volume: + if (!StringHasNoText (imprint->volume) && DoesStringMatchConstraint (imprint->volume, scp)) { + str = StringSave (imprint->volume); } break; - case PUB_Sub : - csp = (CitSubPtr) the_pub->data.ptrvalue; - switch (field) { - case Publication_field_title: - if (DoesStringMatchConstraint (csp->descr, scp)) { - rval = SetStringValue (&(csp->descr), value, existing_text); - } - break; - case Publication_field_authors: - rval = SetAuthorListFromString (csp->authors, scp, value, existing_text); - break; - case Publication_field_affiliation: - case Publication_field_affil_div: - case Publication_field_affil_city: - case Publication_field_affil_sub: - case Publication_field_affil_country: - case Publication_field_affil_street: - case Publication_field_affil_email: - case Publication_field_affil_fax: - case Publication_field_affil_phone: - case Publication_field_affil_zipcode: - if (csp->authors != NULL) { - rval = SetAffilPubField (csp->authors->affil, field, scp, value, existing_text); - } - break; - case Publication_field_date: - rval = SetPubDate (&(csp->date), scp, value, existing_text); - break; + case Publication_field_issue: + if (!StringHasNoText (imprint->issue) && DoesStringMatchConstraint (imprint->issue, scp)) { + str = StringSave (imprint->issue); } break; - case PUB_Article : - cap = (CitArtPtr) the_pub->data.ptrvalue; - switch (field) { - case Publication_field_title: - rval = SetStringsInValNodeStringList (&(cap->title), scp, value, existing_text); - SetValNodeChoices (cap->title, 1); - break; - case Publication_field_authors: - rval = SetAuthorListFromString (cap->authors, scp, value, existing_text); - break; - case Publication_field_affiliation: - case Publication_field_affil_div: - case Publication_field_affil_city: - case Publication_field_affil_sub: - case Publication_field_affil_country: - case Publication_field_affil_street: - case Publication_field_affil_email: - case Publication_field_affil_fax: - case Publication_field_affil_phone: - case Publication_field_affil_zipcode: - if (cap->authors != NULL) { - rval = SetAffilPubField (cap->authors->affil, field, scp, value, existing_text); - } - break; - default: - if (cap->from == 1) { - rval = SetPubFieldOnCitJour (cap->fromptr, field, scp, value, existing_text); - } else if (cap->from == 2) { - rval = SetPubFieldOnCitBook (cap->fromptr, field, scp, value, existing_text); - } - break; + case Publication_field_pages: + if (!StringHasNoText (imprint->pages) && DoesStringMatchConstraint (imprint->pages, scp)) { + str = StringSave (imprint->pages); } break; - case PUB_Journal: - rval = SetPubFieldOnCitJour (the_pub->data.ptrvalue, field, scp, value, existing_text); - break; - case PUB_Book : - case PUB_Man : - cbp = (CitBookPtr) the_pub->data.ptrvalue; - rval = SetPubFieldOnCitBook (cbp, field, scp, value, existing_text); - break; - case PUB_Patent : - cpp = (CitPatPtr) the_pub->data.ptrvalue; - switch (field) { - case Publication_field_title: - if (DoesStringMatchConstraint(cpp->title, scp)) { - rval = SetStringValue ( &(cpp->title), value, existing_text); - } - break; - case Publication_field_authors: - rval = SetAuthorListFromString (cpp->authors, scp, value, existing_text); - break; - case Publication_field_affiliation: - case Publication_field_affil_div: - case Publication_field_affil_city: - case Publication_field_affil_sub: - case Publication_field_affil_country: - case Publication_field_affil_street: - case Publication_field_affil_email: - case Publication_field_affil_fax: - case Publication_field_affil_phone: - case Publication_field_affil_zipcode: - if (cpp->authors != NULL) { - rval = SetAffilPubField (cpp->authors->affil, field, scp, value, existing_text); - } - break; + case Publication_field_date: + str = PrintDate (imprint->date); + if (StringHasNoText (str) || !DoesStringMatchConstraint (str, scp)) { + str = MemFree (str); } break; - default : - break; } - return rval; + return str; } - -static CharPtr GetPubFieldFromObject (Uint1 choice, Pointer data, Int4 field, StringConstraintPtr scp) +static Boolean RemovePubDate (DatePtr PNTR pDate, StringConstraintPtr scp) { - CharPtr rval = NULL; - PubdescPtr pdp = NULL; - PubPtr pub; - SeqFeatPtr sfp; - SeqDescrPtr sdp; + CharPtr str; + Boolean rval = FALSE; - if (data == NULL) return NULL; - if (choice == OBJ_SEQFEAT) { - sfp = (SeqFeatPtr) data; - if (sfp->data.choice == SEQFEAT_PUB) { - pdp = sfp->data.value.ptrvalue; - } - } else if (choice == OBJ_SEQDESC) { - sdp = (SeqDescrPtr) data; - if (sdp->choice == Seq_descr_pub) { - pdp = sdp->data.ptrvalue; - } + if (pDate == NULL || *pDate == NULL) { + return FALSE; } - if (pdp == NULL) return NULL; - for (pub = pdp->pub; pub != NULL && rval == NULL; pub = pub->next) { - rval = GetPubFieldFromPub (pub, field, scp); + str = PrintDate (*pDate); + if (!StringHasNoText (str) && DoesStringMatchConstraint (str, scp)) { + *pDate = DateFree (*pDate); + rval = TRUE; } - + str = MemFree (str); return rval; } -static Boolean RemovePubFieldFromObject (Uint1 choice, Pointer data, Int4 field, StringConstraintPtr scp) +static Boolean SetPubDate (DatePtr PNTR pDate, StringConstraintPtr scp, CharPtr value, Uint2 existing_text) { - Boolean rval = FALSE; - PubdescPtr pdp = NULL; - PubPtr pub; - SeqFeatPtr sfp; - SeqDescrPtr sdp; + CharPtr tmp; + DatePtr dp = NULL; + Boolean rval = FALSE; - if (data == NULL) return FALSE; - if (choice == OBJ_SEQFEAT) { - sfp = (SeqFeatPtr) data; - if (sfp->data.choice == SEQFEAT_PUB) { - pdp = sfp->data.value.ptrvalue; - } - } else if (choice == OBJ_SEQDESC) { - sdp = (SeqDescrPtr) data; - if (sdp->choice == Seq_descr_pub) { - pdp = sdp->data.ptrvalue; + if (pDate == NULL) { + return FALSE; + } + tmp = PrintDate (*pDate); + if (DoesStringMatchConstraint (tmp, scp) + && SetStringValue (&tmp, value, existing_text)) { + dp = ReadDateFromString (tmp); + if (dp != NULL) { + *pDate = DateFree (*pDate); + *pDate = dp; + rval = TRUE; } } + tmp = MemFree (tmp); + return rval; +} - if (pdp == NULL) return FALSE; - for (pub = pdp->pub; pub != NULL; pub = pub->next) { - rval |= RemovePubFieldFromPub (pub, field, scp); +static Boolean RemovePubFieldFromImprint (ImprintPtr imprint, Int4 field, StringConstraintPtr scp) +{ + Boolean rval = FALSE; + if (imprint == NULL) return FALSE; + + switch (field) { + case Publication_field_volume: + if (!StringHasNoText (imprint->volume) && DoesStringMatchConstraint (imprint->volume, scp)) { + imprint->volume = MemFree (imprint->volume); + rval = TRUE; + } + break; + case Publication_field_issue: + if (!StringHasNoText (imprint->issue) && DoesStringMatchConstraint (imprint->issue, scp)) { + imprint->issue = MemFree (imprint->issue); + rval = TRUE; + } + break; + case Publication_field_pages: + if (!StringHasNoText (imprint->pages) && DoesStringMatchConstraint (imprint->pages, scp)) { + imprint->pages = MemFree (imprint->pages); + rval = TRUE; + } + break; + case Publication_field_date: + rval = RemovePubDate (&(imprint->date), scp); + break; } return rval; } -static Boolean SetPubFieldOnObject (Uint1 choice, Pointer data, Int4 field, StringConstraintPtr scp, CharPtr value, Uint2 existing_text) +static Boolean SetPubFieldOnImprint (ImprintPtr imprint, Int4 field, StringConstraintPtr scp, CharPtr value, Uint2 existing_text) { - Boolean rval = FALSE; - PubdescPtr pdp = NULL; - PubPtr pub; - SeqFeatPtr sfp; - SeqDescrPtr sdp; - - if (data == NULL) return FALSE; - if (choice == OBJ_SEQFEAT) { - sfp = (SeqFeatPtr) data; - if (sfp->data.choice == SEQFEAT_PUB) { - pdp = sfp->data.value.ptrvalue; - } - } else if (choice == OBJ_SEQDESC) { - sdp = (SeqDescrPtr) data; - if (sdp->choice == Seq_descr_pub) { - pdp = sdp->data.ptrvalue; - } - } - - if (pdp == NULL) return FALSE; - - for (pub = pdp->pub; pub != NULL; pub = pub->next) { - rval |= SetPubFieldOnPub (pub, field, scp, value, existing_text); - } - return rval; -} - - + Boolean rval = FALSE; -NLM_EXTERN Uint1 FieldTypeFromAECRAction (AECRActionPtr action) -{ - Uint1 field_type = 0; - ApplyActionPtr a; - EditActionPtr e; - ConvertActionPtr v; - CopyActionPtr c; - SwapActionPtr s; - RemoveActionPtr r; - AECRParseActionPtr p; + if (imprint == NULL) return FALSE; - if (action == NULL || action->action == NULL || action->action->data.ptrvalue == NULL) { - return 0; - } - switch (action->action->choice) { - case ActionChoice_apply: - a = (ApplyActionPtr) action->action->data.ptrvalue; - if (a->field != NULL) { - field_type = a->field->choice; - } - break; - case ActionChoice_edit: - e = (EditActionPtr) action->action->data.ptrvalue; - if (e->field != NULL) { - field_type = e->field->choice; - } - break; - case ActionChoice_convert: - v = (ConvertActionPtr) action->action->data.ptrvalue; - if (v->fields != NULL) { - field_type = FieldTypeChoiceFromFieldPairTypeChoice (v->fields->choice); - } - break; - case ActionChoice_copy: - c = (CopyActionPtr) action->action->data.ptrvalue; - if (c->fields != NULL) { - field_type = FieldTypeChoiceFromFieldPairTypeChoice (c->fields->choice); + switch (field) { + case Publication_field_volume: + if (DoesStringMatchConstraint (imprint->volume, scp)) { + rval = SetStringValue (&(imprint->volume), value, existing_text); } break; - case ActionChoice_swap: - s = (SwapActionPtr) action->action->data.ptrvalue; - if (s->fields != NULL) { - field_type = FieldTypeChoiceFromFieldPairTypeChoice (s->fields->choice); + case Publication_field_issue: + if (!StringHasNoText (imprint->issue) && DoesStringMatchConstraint (imprint->issue, scp)) { + rval = SetStringValue (&(imprint->issue), value, existing_text); } break; - case ActionChoice_remove: - r = (RemoveActionPtr) action->action->data.ptrvalue; - if (r->field != NULL) { - field_type = r->field->choice; + case Publication_field_pages: + if (!StringHasNoText (imprint->pages) && DoesStringMatchConstraint (imprint->pages, scp)) { + rval = SetStringValue (&(imprint->pages), value, existing_text); } break; - case ActionChoice_parse: - p = (AECRParseActionPtr) action->action->data.ptrvalue; - if (p->fields != NULL) { - field_type = FieldTypeChoiceFromFieldPairTypeChoice (p->fields->choice); - } + case Publication_field_date: + rval = SetPubDate (&(imprint->date), scp, value, existing_text); break; } - return field_type; + return rval; } -typedef struct pubserialnumber { - BioseqPtr bsp; - Int4 serial_number; - ValNodePtr min_pub; -} PubSerialNumberData, PNTR PubSerialNumberPtr; - -static PubSerialNumberPtr PubSerialNumberNew () +static void SetValNodeChoices (ValNodePtr list, Uint1 new_choice) { - PubSerialNumberPtr psn; - - psn = (PubSerialNumberPtr) MemNew (sizeof (PubSerialNumberData)); - psn->bsp = NULL; - psn->serial_number = 0; - psn->min_pub = NULL; - - return psn; + while (list != NULL) { + list->choice = new_choice; + list = list->next; + } } -static PubSerialNumberPtr PubSerialNumberFree (PubSerialNumberPtr psn) +static CharPtr GetPubFieldFromCitJour (CitJourPtr cjp, Int4 field, StringConstraintPtr scp) { - if (psn != NULL) { - psn->min_pub = PubSetFree (psn->min_pub); - psn = MemFree (psn); + CharPtr str = NULL; + if (cjp == NULL) return NULL; + + switch (field) { + case Publication_field_journal: + str = GetFirstValNodeStringMatch (cjp->title, scp); + break; + case Publication_field_volume: + case Publication_field_issue: + case Publication_field_pages: + case Publication_field_date: + str = GetPubFieldFromImprint (cjp->imp, field, scp); + break; } - return psn; + + return str; } -NLM_EXTERN ValNodePtr PubSerialNumberListFree (ValNodePtr vnp) +static Boolean RemovePubFieldFromCitJour (CitJourPtr cjp, Int4 field, StringConstraintPtr scp) { - ValNodePtr vnp_next; + Boolean rval = FALSE; + if (cjp == NULL) return FALSE; - while (vnp != NULL) { - vnp_next = vnp->next; - vnp->next = NULL; - vnp->data.ptrvalue = PubSerialNumberFree (vnp->data.ptrvalue); - vnp = ValNodeFree (vnp); - vnp = vnp_next; + switch (field) { + case Publication_field_journal: + rval = RemoveValNodeStringMatch (&(cjp->title), scp); + break; + case Publication_field_volume: + case Publication_field_issue: + case Publication_field_pages: + case Publication_field_date: + rval = RemovePubFieldFromImprint (cjp->imp, field, scp); + break; } - return vnp; + + return rval; } -static void CaptureRefBlockSerialNumbers -(CharPtr str, - Pointer userdata, - BlockType blocktype, - Uint2 entityID, - Uint2 itemtype, - Uint4 itemID, - Int4 left, - Int4 right -) +static Boolean SetPubFieldOnCitJour (CitJourPtr cjp, Int4 field, StringConstraintPtr scp, CharPtr value, Uint2 existing_text) { - CharPtr cp; - Int4 serial_number; - ValNodePtr vnp; - BioseqPtr bsp = NULL; - SeqFeatPtr sfp; - SeqDescrPtr sdp; - SeqMgrFeatContext fcontext; - SeqMgrDescContext dcontext; - PubSerialNumberPtr psn; - ValNodePtr ppr; - PubdescPtr pdp = NULL; - - if (blocktype != REFERENCE_BLOCK || userdata == NULL) return; - if (StringNICmp (str, "REFERENCE", 9) != 0) { - return; - } - cp = str + 9; - while (isspace (*cp)) { - cp++; - } - if (!isdigit (*cp)) { - return; - } - serial_number = atoi (cp); + Boolean rval = FALSE; + if (cjp == NULL) return FALSE; - if (itemtype == OBJ_SEQFEAT) { - sfp = SeqMgrGetDesiredFeature (entityID, NULL, itemID, 0, NULL, &fcontext); - if (sfp != NULL && sfp->data.choice == SEQFEAT_PUB) { - pdp = (PubdescPtr) sfp->data.value.ptrvalue; - bsp = GetSequenceForObject (OBJ_SEQFEAT, sfp); - } - } else if (itemtype == OBJ_SEQDESC) { - sdp = SeqMgrGetDesiredDescriptor (entityID, NULL, itemID, 0, NULL, &dcontext); - if (sdp != NULL && sdp->choice == Seq_descr_pub) { - pdp = (PubdescPtr) sdp->data.ptrvalue; - bsp = GetSequenceForObject (OBJ_SEQDESC, sdp); - } + switch (field) { + case Publication_field_journal: + rval = SetStringsInValNodeStringList (&(cjp->title), scp, value, existing_text); + SetValNodeChoices (cjp->title, 1); + break; + case Publication_field_volume: + case Publication_field_issue: + case Publication_field_pages: + case Publication_field_date: + rval = SetPubFieldOnImprint (cjp->imp, field, scp, value, existing_text); + break; } - if (pdp != NULL && bsp != NULL) { - vnp = ValNodeNew (NULL); - if (vnp != NULL) { - vnp->choice = PUB_Equiv; - vnp->data.ptrvalue = pdp->pub; - ppr = MinimizePub (vnp); - ValNodeFree (vnp); - } - vnp = ValNodeNew (NULL); - if (vnp != NULL) { - vnp->choice = PUB_Equiv; - vnp->data.ptrvalue = ppr; - psn = PubSerialNumberNew (); - psn->bsp = bsp; - psn->serial_number = serial_number; - psn->min_pub = vnp; - ValNodeAddPointer ((ValNodePtr PNTR) userdata, 0, psn); - } - } + return rval; } -NLM_EXTERN ValNodePtr GetCitListsForSeqEntry (SeqEntryPtr sep) +static CharPtr GetPubFieldFromCitBook (CitBookPtr cbp, Int4 field, StringConstraintPtr scp) { - XtraBlock xtra; - ValNodePtr head = NULL; - ErrSev level; - Boolean okay; - SeqEntryPtr oldscope; - Uint2 entityID; + CharPtr str = NULL; - if (sep == NULL) return NULL; - - MemSet ((Pointer) &xtra, 0, sizeof (XtraBlock)); - xtra.ffwrite = CaptureRefBlockSerialNumbers; - xtra.userdata = (Pointer) &head; - level = ErrSetMessageLevel (SEV_MAX); - oldscope = SeqEntrySetScope (sep); - okay = SeqEntryToGnbk (sep, NULL, GENBANK_FMT, SEQUIN_MODE, NORMAL_STYLE, - SHOW_CONTIG_FEATURES, 0, 0, &xtra, NULL); - entityID = SeqMgrGetEntityIDForSeqEntry (sep); - SeqEntrySetScope (oldscope); - ErrSetMessageLevel (level); - return head; -} - - -NLM_EXTERN Int4 GetCitationNumberForMinPub (BioseqPtr bsp, ValNodePtr min_pub, ValNodePtr pub_list) -{ - Int4 rval = -1; - PubSerialNumberPtr psn; - ValNodePtr vnp, tmp; - - if (bsp == NULL || min_pub == NULL || pub_list == NULL) { - return -1; - } - - tmp = ValNodeNew (NULL); - tmp->choice = PUB_Equiv; - tmp->data.ptrvalue = min_pub; + if (cbp == NULL) return NULL; - for (vnp = pub_list; vnp != NULL && rval == -1; vnp = vnp->next) { - psn = (PubSerialNumberPtr) vnp->data.ptrvalue; - if (psn->bsp == bsp) { - if (PubLabelMatch (tmp, psn->min_pub) == 0) { - rval = psn->serial_number; + switch (field) { + case Publication_field_title: + str = GetFirstValNodeStringMatch (cbp->title, scp); + break; + case Publication_field_authors: + str = GetAuthorListString (cbp->authors, scp); + break; + case Publication_field_authors_initials: + str = GetAuthorListStringEx (cbp->authors, scp, TRUE); + break; + case Publication_field_affiliation: + case Publication_field_affil_div: + case Publication_field_affil_city: + case Publication_field_affil_sub: + case Publication_field_affil_country: + case Publication_field_affil_street: + case Publication_field_affil_email: + case Publication_field_affil_fax: + case Publication_field_affil_phone: + case Publication_field_affil_zipcode: + if (cbp->authors != NULL) { + str = GetPubFieldFromAffil (cbp->authors->affil, field, scp); } - } + break; + case Publication_field_volume: + case Publication_field_issue: + case Publication_field_pages: + case Publication_field_date: + str = GetPubFieldFromImprint (cbp->imp, field, scp); + break; } - tmp = ValNodeFree (tmp); - - return rval; + return str; } -NLM_EXTERN ValNodePtr GetMinPubForCitationNumber (BioseqPtr bsp, Int4 number, ValNodePtr pub_list) +static Boolean RemovePubFieldFromCitBook (CitBookPtr cbp, Int4 field, StringConstraintPtr scp) { - ValNodePtr rval = NULL; - PubSerialNumberPtr psn; - ValNodePtr vnp; + Boolean rval = FALSE; - if (bsp == NULL || number < 0 || pub_list == NULL) { - return NULL; - } + if (cbp == NULL) return FALSE; - for (vnp = pub_list; vnp != NULL && rval == NULL; vnp = vnp->next) { - psn = (PubSerialNumberPtr) vnp->data.ptrvalue; - if (psn->bsp == bsp && psn->serial_number == number) { - rval = psn->min_pub; - } + switch (field) { + case Publication_field_title: + rval = RemoveValNodeStringMatch (&(cbp->title), scp); + break; + case Publication_field_authors: + rval = RemoveAuthorListString (cbp->authors, scp); + break; + case Publication_field_affiliation: + case Publication_field_affil_div: + case Publication_field_affil_city: + case Publication_field_affil_sub: + case Publication_field_affil_country: + case Publication_field_affil_street: + case Publication_field_affil_email: + case Publication_field_affil_fax: + case Publication_field_affil_phone: + case Publication_field_affil_zipcode: + if (cbp->authors != NULL) { + rval = RemovePubFieldFromAffil(cbp->authors->affil, field, scp); + } + break; + case Publication_field_volume: + case Publication_field_issue: + case Publication_field_pages: + case Publication_field_date: + rval = RemovePubFieldFromImprint (cbp->imp, field, scp); + break; } return rval; } -/* - * Some batch operations will be faster if information about the entire record is collected once - * and reused. The BatchExtra structure is where such data belongs. - */ -NLM_EXTERN BatchExtraPtr BatchExtraNew () -{ - BatchExtraPtr b; - - b = (BatchExtraPtr) MemNew (sizeof (BatchExtraData)); - b->cit_list = NULL; - - return b; -} - - -NLM_EXTERN BatchExtraPtr BatchExtraFree (BatchExtraPtr b) -{ - if (b != NULL) { - b->cit_list = PubSerialNumberListFree (b->cit_list); - - b = MemFree (b); - } - return b; -} - - -static Boolean IsCitationField (FieldTypePtr field) -{ - FeatureFieldPtr feature_field; - - if (field != NULL - && field->choice == FieldType_feature_field - && (feature_field = field->data.ptrvalue) != NULL - && feature_field->field != NULL - && ((feature_field->field->choice == FeatQualChoice_legal_qual - && feature_field->field->data.intvalue == Feat_qual_legal_citation) - || (feature_field->field->choice == FeatQualChoice_illegal_qual - && DoesStringMatchConstraint ("citation", feature_field->field->data.ptrvalue)))) { - return TRUE; - } else { - return FALSE; - } - -} - - -static void InitBatchExtraForField (BatchExtraPtr batch_extra, FieldTypePtr field, SeqEntryPtr sep) -{ - if (batch_extra == NULL) { - return; - } - /* only need to collect citations if citation is in the list of applicable fields */ - if (IsCitationField (field)) { - ValNodeLink (&(batch_extra->cit_list), GetCitListsForSeqEntry (sep)); - } -} - - -static void InitBatchExtraForAECRAction (BatchExtraPtr batch_extra, AECRActionPtr action, SeqEntryPtr sep) +static Boolean SetPubFieldOnCitBook (CitBookPtr cbp, Int4 field, StringConstraintPtr scp, CharPtr value, Uint2 existing_text) { - ValNodePtr field_list, field; - - if (batch_extra == NULL || action == NULL) { - return; - } - - field_list = GetFieldTypeListFromAECRAction (action); - for (field = field_list; field != NULL; field = field->next) { - InitBatchExtraForField (batch_extra, field, sep); - } - field_list = FieldTypeListFree (field_list); -} - - -NLM_EXTERN int LIBCALLBACK SortVnpByObject (VoidPtr ptr1, VoidPtr ptr2) + Boolean rval = FALSE; -{ - ValNodePtr vnp1; - ValNodePtr vnp2; - CharPtr str1, str2; - int rval = 0; + if (cbp == NULL) return FALSE; - if (ptr1 != NULL && ptr2 != NULL) { - vnp1 = *((ValNodePtr PNTR) ptr1); - vnp2 = *((ValNodePtr PNTR) ptr2); - if (vnp1 != NULL && vnp2 != NULL) { - if (vnp1->choice < vnp2->choice) { - rval = -1; - } else if (vnp1->choice > vnp2->choice) { - rval = 1; - } else { - str1 = GetDiscrepancyItemText (vnp1); - str2 = GetDiscrepancyItemText (vnp2); - rval = StringCmp (str1, str1); - str1 = MemFree (str1); - str2 = MemFree (str2); + switch (field) { + case Publication_field_title: + rval = SetStringsInValNodeStringList (&(cbp->title), scp, value, existing_text); + SetValNodeChoices (cbp->title, 1); + break; + case Publication_field_authors: + rval = SetAuthorListFromString (cbp->authors, scp, value, existing_text); + break; + case Publication_field_affiliation: + case Publication_field_affil_div: + case Publication_field_affil_city: + case Publication_field_affil_sub: + case Publication_field_affil_country: + case Publication_field_affil_street: + case Publication_field_affil_email: + case Publication_field_affil_fax: + case Publication_field_affil_phone: + case Publication_field_affil_zipcode: + if (cbp->authors != NULL) { + rval = SetAffilPubField (cbp->authors->affil, field, scp, value, existing_text); } - } + break; + case Publication_field_volume: + case Publication_field_issue: + case Publication_field_pages: + case Publication_field_date: + rval = SetPubFieldOnImprint (cbp->imp, field, scp, value, existing_text); + break; } return rval; } -static ValNodePtr BioseqListForObjectList (ValNodePtr object_list) -{ - ValNodePtr vnp, bsp_list = NULL; - BioseqPtr bsp; - - for (vnp = object_list; vnp != NULL; vnp = vnp->next) { - bsp = GetSequenceForObject (vnp->choice, vnp->data.ptrvalue); - if (bsp != NULL) { - ValNodeAddPointer (&bsp_list, OBJ_BIOSEQ, bsp); - } - } - bsp_list = ValNodeSort (bsp_list, SortVnpByObject); - ValNodeUnique (&bsp_list, SortVnpByObject, ValNodeFree); - return bsp_list; -} - - -static void InitBatchExtraForAECRActionAndObjectList (BatchExtraPtr batch_extra, AECRActionPtr action, ValNodePtr object_list) +NLM_EXTERN CharPtr GetPubFieldFromPub (PubPtr the_pub, Int4 field, StringConstraintPtr scp) { - ValNodePtr field_list, field; - ValNodePtr bsp_list = NULL, vnp; - SeqEntryPtr sep; - - if (batch_extra == NULL || action == NULL) { - return; - } + CitGenPtr cgp; + CitArtPtr cap; + CitBookPtr cbp; + CitPatPtr cpp; + CitSubPtr csp; + CitJourPtr cjp; + CharPtr str = NULL; - field_list = GetFieldTypeListFromAECRAction (action); - bsp_list = BioseqListForObjectList (object_list); - for (vnp = bsp_list; vnp != NULL; vnp = vnp->next) { - sep = SeqMgrGetSeqEntryForData (vnp->data.ptrvalue); - for (field = field_list; field != NULL; field = field->next) { - InitBatchExtraForField (batch_extra, field, sep); - } - } - bsp_list = ValNodeFree (bsp_list); - - field_list = FieldTypeListFree (field_list); - -} - - - - - -NLM_EXTERN CharPtr GetFieldValueForObjectEx (Uint1 choice, Pointer data, FieldTypePtr field, StringConstraintPtr scp, BatchExtraPtr batch_extra) -{ - CharPtr str = NULL; - FeatureFieldPtr feature_field; - SeqDescrPtr sdp; - GBBlockPtr gb; - - if (data == NULL || field == NULL || field->data.ptrvalue == NULL) return NULL; - - switch (field->choice) { - case FieldType_source_qual : - str = GetSourceQualFromBioSource (GetBioSourceFromObject (choice, data), (SourceQualChoicePtr) field->data.ptrvalue, scp); - break; - case FieldType_feature_field : - if (choice == OBJ_SEQFEAT) { - str = GetQualFromFeatureEx ((SeqFeatPtr) data, (FeatureFieldPtr) field->data.ptrvalue, scp, batch_extra); + if (the_pub == NULL || the_pub->data.ptrvalue == NULL) return NULL; + + switch (the_pub->choice) { + case PUB_Gen : + cgp = (CitGenPtr) the_pub->data.ptrvalue; + switch (field) { + case Publication_field_cit: + if (!StringHasNoText (cgp->cit) && DoesStringMatchConstraint (cgp->title, scp)) { + str = StringSave (cgp->cit); + } + break; + case Publication_field_authors: + str = GetAuthorListString (cgp->authors, scp); + break; + case Publication_field_authors_initials: + str = GetAuthorListStringEx (cgp->authors, scp, TRUE); + break; + case Publication_field_affiliation: + case Publication_field_affil_div: + case Publication_field_affil_city: + case Publication_field_affil_sub: + case Publication_field_affil_country: + case Publication_field_affil_street: + case Publication_field_affil_email: + case Publication_field_affil_fax: + case Publication_field_affil_phone: + case Publication_field_affil_zipcode: + if (cgp->authors != NULL) { + str = GetPubFieldFromAffil (cgp->authors->affil, field, scp); + } + break; + case Publication_field_journal: + str = GetFirstValNodeStringMatch (cgp->journal, scp); + break; + case Publication_field_volume: + if (!StringHasNoText (cgp->volume) && DoesStringMatchConstraint (cgp->volume, scp)) { + str = StringSave (cgp->volume); + } + break; + case Publication_field_issue: + if (!StringHasNoText (cgp->issue) && DoesStringMatchConstraint (cgp->issue, scp)) { + str = StringSave (cgp->issue); + } + break; + case Publication_field_pages: + if (!StringHasNoText (cgp->pages) && DoesStringMatchConstraint (cgp->pages, scp)) { + str = StringSave (cgp->pages); + } + break; + case Publication_field_date: + if (cgp->date != NULL) { + str = PrintDate (cgp->date); + if (StringHasNoText (str) || !DoesStringMatchConstraint (str, scp)) { + str = MemFree (str); + } + } + break; + case Publication_field_serial_number: + str = GetInt2ValueFromString (cgp->serial_number, scp); + break; + case Publication_field_title: + if (!StringHasNoText (cgp->title) && DoesStringMatchConstraint (cgp->title, scp)) { + str = StringSave (cgp->title); + } + break; } break; - case FieldType_cds_gene_prot : - if (choice == 0) { - str = GetFieldValueFromCGPSet ((CGPSetPtr) data, field->data.intvalue, scp); - } else if (choice == OBJ_SEQFEAT) { - feature_field = FeatureFieldFromCDSGeneProtField (field->data.intvalue); - str = GetQualFromFeature ((SeqFeatPtr) data, feature_field, scp); - feature_field = FeatureFieldFree (feature_field); + case PUB_Sub : + csp = (CitSubPtr) the_pub->data.ptrvalue; + switch (field) { + case Publication_field_title: + if (!StringHasNoText (csp->descr) && DoesStringMatchConstraint (csp->descr, scp)) { + str = StringSave (csp->descr); + } + break; + case Publication_field_authors: + str = GetAuthorListString (csp->authors, scp); + break; + case Publication_field_authors_initials: + str = GetAuthorListStringEx (csp->authors, scp, TRUE); + break; + case Publication_field_affiliation: + case Publication_field_affil_div: + case Publication_field_affil_city: + case Publication_field_affil_sub: + case Publication_field_affil_country: + case Publication_field_affil_street: + case Publication_field_affil_email: + case Publication_field_affil_fax: + case Publication_field_affil_phone: + case Publication_field_affil_zipcode: + if (csp->authors != NULL) { + str = GetPubFieldFromAffil (csp->authors->affil, field, scp); + } + break; + case Publication_field_date: + str = PrintDate (csp->date); + if (StringHasNoText (str) || !DoesStringMatchConstraint (str, scp)) { + str = MemFree (str); + } + break; } break; - case FieldType_molinfo_field : - if (choice == OBJ_BIOSEQ) { - str = GetSequenceQualFromBioseq ((BioseqPtr) data, field->data.ptrvalue); + case PUB_Article : + cap = (CitArtPtr) the_pub->data.ptrvalue; + switch (field) { + case Publication_field_title: + str = GetFirstValNodeStringMatch (cap->title, scp); + break; + case Publication_field_authors: + str = GetAuthorListString (cap->authors, scp); + break; + case Publication_field_authors_initials: + str = GetAuthorListStringEx (cap->authors, scp, TRUE); + break; + case Publication_field_affiliation: + case Publication_field_affil_div: + case Publication_field_affil_city: + case Publication_field_affil_sub: + case Publication_field_affil_country: + case Publication_field_affil_street: + case Publication_field_affil_email: + case Publication_field_affil_fax: + case Publication_field_affil_phone: + case Publication_field_affil_zipcode: + if (cap->authors != NULL) { + str = GetPubFieldFromAffil (cap->authors->affil, field, scp); + } + break; + default: + if (cap->from == 1) { + str = GetPubFieldFromCitJour (cap->fromptr, field, scp); + } else if (cap->from == 2) { + str = GetPubFieldFromCitBook (cap->fromptr, field, scp); + } + break; } break; - case FieldType_pub : - str = GetPubFieldFromObject (choice, data, field->data.intvalue, scp); + case PUB_Journal: + cjp = (CitJourPtr) the_pub->data.ptrvalue; + str = GetPubFieldFromCitJour (cjp, field, scp); break; - case FieldType_rna_field : - if (choice == OBJ_SEQFEAT) { - feature_field = FeatureFieldFromRnaQual (field->data.ptrvalue); - str = GetQualFromFeature ((SeqFeatPtr) data, feature_field, scp); - feature_field = FeatureFieldFree (feature_field); - } + case PUB_Book : + case PUB_Man : + cbp = (CitBookPtr) the_pub->data.ptrvalue; + str = GetPubFieldFromCitBook (cbp, field, scp); break; - case FieldType_struc_comment_field: - if (choice == OBJ_SEQDESC && data != NULL) { - sdp = (SeqDescrPtr) data; - if (sdp != NULL && sdp->choice == Seq_descr_user) { - str = GetStructuredCommentFieldFromUserObject (sdp->data.ptrvalue, field->data.ptrvalue, scp); - } + case PUB_Patent : + cpp = (CitPatPtr) the_pub->data.ptrvalue; + switch (field) { + case Publication_field_title: + if (!StringHasNoText (cpp->title) && DoesStringMatchConstraint (cpp->title, scp)) { + str = StringSave (cpp->title); + } + break; + case Publication_field_authors: + str = GetAuthorListString (cpp->authors, scp); + break; + case Publication_field_authors_initials: + str = GetAuthorListStringEx (cpp->authors, scp, TRUE); + break; + case Publication_field_affiliation: + case Publication_field_affil_div: + case Publication_field_affil_city: + case Publication_field_affil_sub: + case Publication_field_affil_country: + case Publication_field_affil_street: + case Publication_field_affil_email: + case Publication_field_affil_fax: + case Publication_field_affil_phone: + case Publication_field_affil_zipcode: + if (cpp->authors != NULL) { + str = GetPubFieldFromAffil (cpp->authors->affil, field, scp); + } + break; } break; - case FieldType_misc: - if (choice == OBJ_BIOSEQ && field->data.intvalue == Misc_field_genome_project_id) { - str = GetGenomeProjectIdFromBioseq ((BioseqPtr) data, scp); - } else if (choice == OBJ_SEQDESC && field->data.intvalue == Misc_field_comment_descriptor) { - sdp = (SeqDescrPtr) data; - if (sdp != NULL && sdp->choice == Seq_descr_comment && !StringHasNoText (sdp->data.ptrvalue)) { - str = StringSave (sdp->data.ptrvalue); - } - } else if (choice == OBJ_SEQDESC && field->data.intvalue == Misc_field_defline) { - sdp = (SeqDescrPtr) data; - if (sdp != NULL && sdp->choice == Seq_descr_title && !StringHasNoText (sdp->data.ptrvalue)) { - str = StringSave (sdp->data.ptrvalue); - } - } else if (choice == OBJ_SEQDESC && field->data.intvalue == Misc_field_keyword) { - sdp = (SeqDescrPtr) data; - if (sdp != NULL && sdp->choice == Seq_descr_genbank && (gb = (GBBlockPtr) sdp->data.ptrvalue) != NULL) { - str = GetFirstValNodeStringMatch (gb->keywords, scp); - } - } + default : break; } return str; } -NLM_EXTERN CharPtr GetFieldValueForObject (Uint1 choice, Pointer data, FieldTypePtr field, StringConstraintPtr scp) +static Boolean RemovePubFieldFromPub (PubPtr the_pub, Int4 field, StringConstraintPtr scp) { - return GetFieldValueForObjectEx (choice, data, field, scp, NULL); -} - + CitGenPtr cgp; + CitArtPtr cap; + CitBookPtr cbp; + CitPatPtr cpp; + CitSubPtr csp; + Boolean rval = FALSE; + Char num[15]; -NLM_EXTERN ValNodePtr GetMultipleFieldValuesForObject (Uint1 choice, Pointer data, FieldTypePtr field, StringConstraintPtr scp, BatchExtraPtr batch_extra) -{ - CharPtr str = NULL; - ValNodePtr val_list = NULL; - - if (data == NULL || field == NULL || field->data.ptrvalue == NULL) return FALSE; - - if (field->choice == FieldType_source_qual) { - val_list = GetMultipleSourceQualsFromBioSource (GetBioSourceFromObject (choice, data), (SourceQualChoicePtr) field->data.ptrvalue, scp); - } else { - str = GetFieldValueForObjectEx (choice, data, field, scp, batch_extra); - if (str != NULL) { - ValNodeAddPointer (&val_list, 0, str); - } - } - return val_list; -} - - -NLM_EXTERN Boolean GBBlockIsCompletelyEmpty (GBBlockPtr gb) -{ - if (gb != NULL - && gb->extra_accessions == NULL - && gb->keywords == NULL - && gb->source == NULL - && gb->origin == NULL - && gb->date == NULL - && gb->div == NULL - && gb->taxonomy == NULL - && gb->entry_date == NULL) { - return TRUE; - } else { - return FALSE; - } -} - - -static Boolean RemoveFieldValueForObject (Uint1 choice, Pointer data, FieldTypePtr field, StringConstraintPtr scp) -{ - Boolean rval = FALSE; - FeatureFieldPtr feature_field; - SeqDescrPtr sdp; - ObjValNodePtr ovp; - GBBlockPtr gb; - - if (data == NULL || field == NULL || field->data.ptrvalue == NULL) return FALSE; - - switch (field->choice) { - case FieldType_source_qual : - rval = RemoveSourceQualFromBioSource (GetBioSourceFromObject (choice, data), (SourceQualChoicePtr) field->data.ptrvalue, scp); - break; - case FieldType_feature_field : - if (choice == OBJ_SEQFEAT) { - rval = RemoveQualFromFeature ((SeqFeatPtr) data, (FeatureFieldPtr) field->data.ptrvalue, scp); + if (the_pub == NULL) return FALSE; + + switch (the_pub->choice) { + case PUB_Gen : + cgp = (CitGenPtr) the_pub->data.ptrvalue; + switch (field) { + case Publication_field_cit: + if (!StringHasNoText (cgp->cit) && DoesStringMatchConstraint (cgp->title, scp)) { + cgp->cit = MemFree (cgp->cit); + rval = TRUE; + } + break; + case Publication_field_authors: + rval = RemoveAuthorListString (cgp->authors, scp); + break; + case Publication_field_affiliation: + case Publication_field_affil_div: + case Publication_field_affil_city: + case Publication_field_affil_sub: + case Publication_field_affil_country: + case Publication_field_affil_street: + case Publication_field_affil_email: + case Publication_field_affil_fax: + case Publication_field_affil_phone: + case Publication_field_affil_zipcode: + if (cgp->authors != NULL) { + rval = RemovePubFieldFromAffil(cgp->authors->affil, field, scp); + } + break; + case Publication_field_journal: + rval = RemoveValNodeStringMatch (&(cgp->journal), scp); + break; + case Publication_field_volume: + if (!StringHasNoText (cgp->volume) && DoesStringMatchConstraint (cgp->volume, scp)) { + cgp->volume = MemFree (cgp->volume); + rval = TRUE; + } + break; + case Publication_field_issue: + if (!StringHasNoText (cgp->issue) && DoesStringMatchConstraint (cgp->issue, scp)) { + cgp->issue = MemFree (cgp->issue); + rval = TRUE; + } + break; + case Publication_field_pages: + if (!StringHasNoText (cgp->pages) && DoesStringMatchConstraint (cgp->pages, scp)) { + cgp->pages = MemFree (cgp->pages); + rval = TRUE; + } + break; + case Publication_field_date: + rval = RemovePubDate (&(cgp->date), scp); + break; + case Publication_field_serial_number: + if (cgp->serial_number > 0) { + sprintf (num, "%d", cgp->serial_number); + if (DoesStringMatchConstraint (num, scp)) { + cgp->serial_number = 0; + rval = TRUE; + } + } + break; + case Publication_field_title: + if (!StringHasNoText (cgp->title) && DoesStringMatchConstraint (cgp->title, scp)) { + cgp->title = MemFree (cgp->title); + rval = TRUE; + } + break; } break; - case FieldType_cds_gene_prot: - if (choice == 0) { - rval = RemoveFieldValueFromCGPSet ((CGPSetPtr) data, field->data.intvalue, scp); - } else if (choice == OBJ_SEQFEAT) { - feature_field = FeatureFieldFromCDSGeneProtField (field->data.intvalue); - rval = RemoveQualFromFeature ((SeqFeatPtr) data, feature_field, scp); - feature_field = FeatureFieldFree (feature_field); + case PUB_Sub : + csp = (CitSubPtr) the_pub->data.ptrvalue; + switch (field) { + case Publication_field_title: + if (!StringHasNoText (csp->descr) && DoesStringMatchConstraint (csp->descr, scp)) { + csp->descr = MemFree (csp->descr); + rval = TRUE; + } + break; + case Publication_field_authors: + rval = RemoveAuthorListString (csp->authors, scp); + break; + case Publication_field_affiliation: + case Publication_field_affil_div: + case Publication_field_affil_city: + case Publication_field_affil_sub: + case Publication_field_affil_country: + case Publication_field_affil_street: + case Publication_field_affil_email: + case Publication_field_affil_fax: + case Publication_field_affil_phone: + case Publication_field_affil_zipcode: + if (csp->authors != NULL) { + rval = RemovePubFieldFromAffil(csp->authors->affil, field, scp); + } + break; + case Publication_field_date: + rval = RemovePubDate (&(csp->date), scp); + break; } break; - case FieldType_molinfo_field : - if (choice == OBJ_BIOSEQ) { - rval = RemoveSequenceQualFromBioseq ((BioseqPtr) data, field->data.ptrvalue); + case PUB_Article : + cap = (CitArtPtr) the_pub->data.ptrvalue; + switch (field) { + case Publication_field_title: + rval = RemoveValNodeStringMatch (&(cap->title), scp); + break; + case Publication_field_authors: + rval = RemoveAuthorListString (cap->authors, scp); + break; + case Publication_field_affiliation: + case Publication_field_affil_div: + case Publication_field_affil_city: + case Publication_field_affil_sub: + case Publication_field_affil_country: + case Publication_field_affil_street: + case Publication_field_affil_email: + case Publication_field_affil_fax: + case Publication_field_affil_phone: + case Publication_field_affil_zipcode: + if (cap->authors != NULL) { + rval = RemovePubFieldFromAffil(cap->authors->affil, field, scp); + } + break; + default: + if (cap->from == 1) { + rval = RemovePubFieldFromCitJour (cap->fromptr, field, scp); + } else if (cap->from == 2) { + rval = RemovePubFieldFromCitBook (cap->fromptr, field, scp); + } + break; } break; - case FieldType_pub : - rval = RemovePubFieldFromObject (choice, data, field->data.intvalue, scp); - break; - case FieldType_rna_field : - if (choice == OBJ_SEQFEAT) { - feature_field = FeatureFieldFromRnaQual (field->data.ptrvalue); - rval = RemoveQualFromFeature ((SeqFeatPtr) data, feature_field, scp); - feature_field = FeatureFieldFree (feature_field); - } + case PUB_Journal: + rval = RemovePubFieldFromCitJour (the_pub->data.ptrvalue, field, scp); break; - case FieldType_struc_comment_field: - if (choice == OBJ_SEQDESC && data != NULL) { - sdp = (SeqDescrPtr) data; - if (sdp != NULL && sdp->choice == Seq_descr_user) { - rval = RemoveStructuredCommentFieldFromUserObject (sdp->data.ptrvalue, field->data.ptrvalue, scp); - } - } + case PUB_Book : + case PUB_Man : + cbp = (CitBookPtr) the_pub->data.ptrvalue; + rval = RemovePubFieldFromCitBook (cbp, field, scp); break; - case FieldType_misc: - if (choice == OBJ_BIOSEQ && field->data.intvalue == Misc_field_genome_project_id) { - rval = RemoveGenomeProjectIdFromBioseq ((BioseqPtr) data, scp); - } else if (choice == OBJ_SEQDESC && field->data.intvalue == Misc_field_comment_descriptor) { - sdp = (SeqDescrPtr) data; - ovp = (ObjValNodePtr) sdp; - if (sdp->choice == Seq_descr_comment) { - ovp->idx.deleteme = TRUE; - rval = TRUE; - } - } else if (choice == OBJ_SEQDESC && field->data.intvalue == Misc_field_defline) { - sdp = (SeqDescrPtr) data; - ovp = (ObjValNodePtr) sdp; - if (sdp->choice == Seq_descr_title) { - ovp->idx.deleteme = TRUE; - rval = TRUE; - } - } else if (choice == OBJ_SEQDESC && field->data.intvalue == Misc_field_keyword) { - sdp = (SeqDescrPtr) data; - ovp = (ObjValNodePtr) sdp; - if (sdp->choice == Seq_descr_genbank && (gb = (GBBlockPtr) sdp->data.ptrvalue) != NULL) { - if (RemoveValNodeStringMatch (&(gb->keywords), scp)) { + case PUB_Patent : + cpp = (CitPatPtr) the_pub->data.ptrvalue; + switch (field) { + case Publication_field_title: + if (!StringHasNoText (cpp->title) && DoesStringMatchConstraint (cpp->title, scp)) { + cpp->title = MemFree (cpp->title); rval = TRUE; - if (GBBlockIsCompletelyEmpty(gb)) { - ovp->idx.deleteme = TRUE; - } } - } - } - break; - } - return rval; -} - - -NLM_EXTERN Boolean SetFieldValueForObjectEx (Uint1 choice, Pointer data, FieldTypePtr field, StringConstraintPtr scp, CharPtr value, Uint2 existing_text, BatchExtraPtr batch_extra) -{ - Boolean rval = FALSE; - FeatureFieldPtr feature_field; - SeqDescrPtr sdp; - ObjValNodePtr ovp; - GBBlockPtr gb; - Boolean was_empty; + break; + case Publication_field_authors: + rval = RemoveAuthorListString (cpp->authors, scp); + break; + case Publication_field_affiliation: + case Publication_field_affil_div: + case Publication_field_affil_city: + case Publication_field_affil_sub: + case Publication_field_affil_country: + case Publication_field_affil_street: + case Publication_field_affil_email: + case Publication_field_affil_fax: + case Publication_field_affil_phone: + case Publication_field_affil_zipcode: + if (cpp->authors != NULL) { + rval = RemovePubFieldFromAffil(cpp->authors->affil, field, scp); + } + break; + } + break; + default : + break; + } + return rval; +} - if (data == NULL || field == NULL || field->data.ptrvalue == NULL) return FALSE; - switch (field->choice) { - case FieldType_source_qual : - rval = SetSourceQualInBioSource (GetBioSourceFromObject (choice, data), (SourceQualChoicePtr) field->data.ptrvalue, scp, value, existing_text); - break; - case FieldType_feature_field : - if (choice == OBJ_SEQFEAT) { - rval = SetQualOnFeatureEx ((SeqFeatPtr) data, (FeatureFieldPtr) field->data.ptrvalue, scp, value, existing_text, batch_extra); +static Boolean SetPubFieldOnPub (PubPtr the_pub, Int4 field, StringConstraintPtr scp, CharPtr value, Uint2 existing_text) +{ + CitGenPtr cgp; + CitArtPtr cap; + CitBookPtr cbp; + CitPatPtr cpp; + CitSubPtr csp; + Boolean rval = FALSE; + + if (the_pub == NULL || value == NULL) return FALSE; + + switch (the_pub->choice) { + case PUB_Gen : + cgp = (CitGenPtr) the_pub->data.ptrvalue; + switch (field) { + case Publication_field_cit: + if (DoesStringMatchConstraint (cgp->cit, scp)) { + rval = SetStringValue ( &(cgp->cit), value, existing_text); + } + break; + case Publication_field_authors: + rval = SetAuthorListFromString (cgp->authors, scp, value, existing_text); + break; + case Publication_field_affiliation: + case Publication_field_affil_div: + case Publication_field_affil_city: + case Publication_field_affil_sub: + case Publication_field_affil_country: + case Publication_field_affil_street: + case Publication_field_affil_email: + case Publication_field_affil_fax: + case Publication_field_affil_phone: + case Publication_field_affil_zipcode: + if (cgp->authors != NULL) { + rval = SetAffilPubField (cgp->authors->affil, field, scp, value, existing_text); + } + break; + case Publication_field_journal: + rval = SetStringsInValNodeStringList (&(cgp->journal), scp, value, existing_text); + SetValNodeChoices (cgp->journal, 1); + break; + case Publication_field_volume: + if (DoesStringMatchConstraint (cgp->volume, scp)) { + rval = SetStringValue ( &(cgp->volume), value, existing_text); + } + break; + case Publication_field_issue: + if (DoesStringMatchConstraint (cgp->issue, scp)) { + rval = SetStringValue ( &(cgp->issue), value, existing_text); + } + break; + case Publication_field_pages: + if (DoesStringMatchConstraint (cgp->pages, scp)) { + rval = SetStringValue ( &(cgp->pages), value, existing_text); + } + break; + case Publication_field_date: + rval = SetPubDate (&(cgp->date), scp, value, existing_text); + break; + case Publication_field_serial_number: + rval = SetInt2ValueWithString (&(cgp->serial_number), value, existing_text); + break; + case Publication_field_title: + if (DoesStringMatchConstraint(cgp->title, scp)) { + rval = SetStringValue ( &(cgp->title), value, existing_text); + } + break; } break; - case FieldType_cds_gene_prot: - if (choice == 0) { - rval = SetFieldValueInCGPSet ((CGPSetPtr) data, field->data.intvalue, scp, value, existing_text); - } else if (choice == OBJ_SEQFEAT) { - feature_field = FeatureFieldFromCDSGeneProtField (field->data.intvalue); - rval = SetQualOnFeatureEx ((SeqFeatPtr) data, feature_field, scp, value, existing_text, batch_extra); - feature_field = FeatureFieldFree (feature_field); + case PUB_Sub : + csp = (CitSubPtr) the_pub->data.ptrvalue; + switch (field) { + case Publication_field_title: + if (DoesStringMatchConstraint (csp->descr, scp)) { + rval = SetStringValue (&(csp->descr), value, existing_text); + } + break; + case Publication_field_authors: + rval = SetAuthorListFromString (csp->authors, scp, value, existing_text); + break; + case Publication_field_affiliation: + case Publication_field_affil_div: + case Publication_field_affil_city: + case Publication_field_affil_sub: + case Publication_field_affil_country: + case Publication_field_affil_street: + case Publication_field_affil_email: + case Publication_field_affil_fax: + case Publication_field_affil_phone: + case Publication_field_affil_zipcode: + if (csp->authors != NULL) { + rval = SetAffilPubField (csp->authors->affil, field, scp, value, existing_text); + } + break; + case Publication_field_date: + rval = SetPubDate (&(csp->date), scp, value, existing_text); + break; } break; - case FieldType_molinfo_field: - if (choice == OBJ_BIOSEQ) { - rval = SetSequenceQualOnBioseq ((BioseqPtr) data, field->data.ptrvalue); + case PUB_Article : + cap = (CitArtPtr) the_pub->data.ptrvalue; + switch (field) { + case Publication_field_title: + rval = SetStringsInValNodeStringList (&(cap->title), scp, value, existing_text); + SetValNodeChoices (cap->title, 1); + break; + case Publication_field_authors: + rval = SetAuthorListFromString (cap->authors, scp, value, existing_text); + break; + case Publication_field_affiliation: + case Publication_field_affil_div: + case Publication_field_affil_city: + case Publication_field_affil_sub: + case Publication_field_affil_country: + case Publication_field_affil_street: + case Publication_field_affil_email: + case Publication_field_affil_fax: + case Publication_field_affil_phone: + case Publication_field_affil_zipcode: + if (cap->authors != NULL) { + rval = SetAffilPubField (cap->authors->affil, field, scp, value, existing_text); + } + break; + default: + if (cap->from == 1) { + rval = SetPubFieldOnCitJour (cap->fromptr, field, scp, value, existing_text); + } else if (cap->from == 2) { + rval = SetPubFieldOnCitBook (cap->fromptr, field, scp, value, existing_text); + } + break; } break; - case FieldType_pub : - rval = SetPubFieldOnObject (choice, data, field->data.intvalue, scp, value, existing_text); - break; - case FieldType_rna_field : - if (choice == OBJ_SEQFEAT) { - feature_field = FeatureFieldFromRnaQual (field->data.ptrvalue); - rval = SetQualOnFeatureEx ((SeqFeatPtr) data, feature_field, scp, value, existing_text, batch_extra); - feature_field = FeatureFieldFree (feature_field); - } + case PUB_Journal: + rval = SetPubFieldOnCitJour (the_pub->data.ptrvalue, field, scp, value, existing_text); break; - case FieldType_struc_comment_field: - if (choice == OBJ_SEQDESC && data != NULL) { - sdp = (SeqDescrPtr) data; - if (sdp != NULL && sdp->choice == Seq_descr_user) { - rval = SetStructuredCommentFieldOnUserObject (sdp->data.ptrvalue, field->data.ptrvalue, scp, value, existing_text); - } - } + case PUB_Book : + case PUB_Man : + cbp = (CitBookPtr) the_pub->data.ptrvalue; + rval = SetPubFieldOnCitBook (cbp, field, scp, value, existing_text); break; - case FieldType_misc: - if (choice == OBJ_BIOSEQ && field->data.intvalue == Misc_field_genome_project_id) { - rval = SetGenomeProjectIdOnBioseq ((BioseqPtr) data, scp, value, existing_text); - } else if (choice == OBJ_SEQDESC && field->data.intvalue == Misc_field_comment_descriptor) { - sdp = (SeqDescrPtr) data; - if (sdp->choice == Seq_descr_comment) { - rval = SetTextDescriptor (sdp, scp, value, existing_text); - } - } else if (choice == OBJ_SEQDESC && field->data.intvalue == Misc_field_defline) { - sdp = (SeqDescrPtr) data; - if (sdp->choice == Seq_descr_title) { - rval = SetTextDescriptor (sdp, scp, value, existing_text); - } - } else if (choice == OBJ_SEQDESC && field->data.intvalue == Misc_field_keyword) { - sdp = (SeqDescrPtr) data; - if (sdp->choice == Seq_descr_genbank && (gb = (GBBlockPtr) sdp->data.ptrvalue) != NULL) { - was_empty = GBBlockIsCompletelyEmpty (gb); - if (SetStringsInValNodeStringList (&(gb->keywords), scp, value, existing_text)) { - rval = TRUE; - if (sdp->extended) { - ovp = (ObjValNodePtr) sdp; - if (GBBlockIsCompletelyEmpty(gb)) { - ovp->idx.deleteme = TRUE; - } else if (was_empty) { - ovp->idx.deleteme = FALSE; - } - } + case PUB_Patent : + cpp = (CitPatPtr) the_pub->data.ptrvalue; + switch (field) { + case Publication_field_title: + if (DoesStringMatchConstraint(cpp->title, scp)) { + rval = SetStringValue ( &(cpp->title), value, existing_text); } - } + break; + case Publication_field_authors: + rval = SetAuthorListFromString (cpp->authors, scp, value, existing_text); + break; + case Publication_field_affiliation: + case Publication_field_affil_div: + case Publication_field_affil_city: + case Publication_field_affil_sub: + case Publication_field_affil_country: + case Publication_field_affil_street: + case Publication_field_affil_email: + case Publication_field_affil_fax: + case Publication_field_affil_phone: + case Publication_field_affil_zipcode: + if (cpp->authors != NULL) { + rval = SetAffilPubField (cpp->authors->affil, field, scp, value, existing_text); + } + break; } break; + default : + break; } return rval; } -NLM_EXTERN Boolean SetFieldValueForObject (Uint1 choice, Pointer data, FieldTypePtr field, StringConstraintPtr scp, CharPtr value, Uint2 existing_text) -{ - return SetFieldValueForObjectEx (choice, data, field, scp, value, existing_text, NULL); -} - -NLM_EXTERN ValNodePtr GetFieldTypeListFromAECRAction (AECRActionPtr action) +static CharPtr GetPubFieldFromObject (Uint1 choice, Pointer data, Int4 field, StringConstraintPtr scp) { - ValNodePtr field_list = NULL; - ApplyActionPtr apply; - EditActionPtr edit; - ConvertActionPtr convert; - CopyActionPtr copy; - SwapActionPtr swap; - RemoveActionPtr remove; - AECRParseActionPtr parse; + CharPtr rval = NULL; + PubdescPtr pdp = NULL; + PubPtr pub; + SeqFeatPtr sfp; + SeqDescrPtr sdp; - if (action == NULL) { - return NULL; + if (data == NULL) return NULL; + if (choice == OBJ_SEQFEAT) { + sfp = (SeqFeatPtr) data; + if (sfp->data.choice == SEQFEAT_PUB) { + pdp = sfp->data.value.ptrvalue; + } + } else if (choice == OBJ_SEQDESC) { + sdp = (SeqDescrPtr) data; + if (sdp->choice == Seq_descr_pub) { + pdp = sdp->data.ptrvalue; + } } - /* todo - add fields from constraints ? */ - - /* get fields from action */ - if (action->action != NULL) { - switch (action->action->choice) { - case ActionChoice_apply: - apply = (ApplyActionPtr) action->action->data.ptrvalue; - if (apply != NULL) { - ValNodeLink (&field_list, FieldTypeCopy (apply->field)); - } - break; - case ActionChoice_edit: - edit = (EditActionPtr) action->action->data.ptrvalue; - if (edit != NULL) { - ValNodeLink (&field_list, FieldTypeCopy (edit->field)); - } - break; - case ActionChoice_convert: - convert = (ConvertActionPtr) action->action->data.ptrvalue; - if (convert != NULL) { - ValNodeLink (&field_list, GetFromFieldFromFieldPair (convert->fields)); - ValNodeLink (&field_list, GetToFieldFromFieldPair (convert->fields)); - } - break; - case ActionChoice_copy: - copy = (CopyActionPtr) action->action->data.ptrvalue; - if (copy != NULL) { - ValNodeLink (&field_list, GetFromFieldFromFieldPair (copy->fields)); - ValNodeLink (&field_list, GetToFieldFromFieldPair (copy->fields)); - } - break; - case ActionChoice_swap: - swap = (SwapActionPtr) action->action->data.ptrvalue; - if (swap != NULL) { - ValNodeLink (&field_list, GetFromFieldFromFieldPair (swap->fields)); - ValNodeLink (&field_list, GetToFieldFromFieldPair (swap->fields)); - } - break; - case ActionChoice_remove: - remove = (RemoveActionPtr) action->action->data.ptrvalue; - if (remove != NULL) { - ValNodeLink (&field_list, FieldTypeCopy (remove->field)); - } - break; - case ActionChoice_parse: - parse = (AECRParseActionPtr) action->action->data.ptrvalue; - if (parse != NULL) { - ValNodeLink (&field_list, GetFromFieldFromFieldPair (parse->fields)); - ValNodeLink (&field_list, GetToFieldFromFieldPair (parse->fields)); - } - break; - } + if (pdp == NULL) return NULL; + for (pub = pdp->pub; pub != NULL && rval == NULL; pub = pub->next) { + rval = GetPubFieldFromPub (pub, field, scp); } - return field_list; + + return rval; } -NLM_EXTERN Boolean AreAECRActionFieldsEqual (AECRActionPtr action1, AECRActionPtr action2) +static Boolean RemovePubFieldFromObject (Uint1 choice, Pointer data, Int4 field, StringConstraintPtr scp) { - ApplyActionPtr a1, a2; - EditActionPtr e1, e2; - ConvertActionPtr v1, v2; - CopyActionPtr c1, c2; - SwapActionPtr s1, s2; - RemoveActionPtr r1, r2; - AECRParseActionPtr p1, p2; - FieldTypePtr field1, field2; - Boolean rval = FALSE; + Boolean rval = FALSE; + PubdescPtr pdp = NULL; + PubPtr pub; + SeqFeatPtr sfp; + SeqDescrPtr sdp; - if (action1 == NULL && action2 == NULL) { - return TRUE; - } else if (action1 == NULL || action2 == NULL) { - return FALSE; - } else if (action1->action == NULL && action2->action == NULL) { - return TRUE; - } else if (action1->action == NULL || action2->action == NULL) { - return FALSE; - } else if (action1->action->choice != action2->action->choice) { - return FALSE; - } else if (action1->action->data.ptrvalue == NULL && action2->action->data.ptrvalue == NULL) { - return TRUE; - } else if (action1->action->data.ptrvalue == NULL || action2->action->data.ptrvalue == NULL) { - return FALSE; - } else { - switch (action1->action->choice) { - case ActionChoice_apply: - a1 = (ApplyActionPtr) action1->action->data.ptrvalue; - a2 = (ApplyActionPtr) action2->action->data.ptrvalue; - rval = DoFieldTypesMatch (a1->field, a2->field); - break; - case ActionChoice_edit: - e1 = (EditActionPtr) action1->action->data.ptrvalue; - e2 = (EditActionPtr) action2->action->data.ptrvalue; - rval = DoFieldTypesMatch (e1->field, e2->field); - break; - case ActionChoice_convert: - v1 = (ConvertActionPtr) action1->action->data.ptrvalue; - v2 = (ConvertActionPtr) action2->action->data.ptrvalue; - field1 = GetFromFieldFromFieldPair (v1->fields); - field2 = GetFromFieldFromFieldPair (v2->fields); - rval = DoFieldTypesMatch (field1, field2); - if (rval) { - field1 = FieldTypeFree (field1); - field2 = FieldTypeFree (field2); - field1 = GetToFieldFromFieldPair (v1->fields); - field2 = GetToFieldFromFieldPair (v2->fields); - rval = DoFieldTypesMatch (field1, field2); - } - field1 = FieldTypeFree (field1); - field2 = FieldTypeFree (field2); - if (rval) { - if ((v1->keep_original && !v2->keep_original) - || (!v1->keep_original && v2->keep_original)) { - rval = FALSE; - } - } - break; - case ActionChoice_copy: - c1 = (CopyActionPtr) action1->action->data.ptrvalue; - c2 = (CopyActionPtr) action2->action->data.ptrvalue; - field1 = GetFromFieldFromFieldPair (c1->fields); - field2 = GetFromFieldFromFieldPair (c2->fields); - rval = DoFieldTypesMatch (field1, field2); - if (rval) { - field1 = FieldTypeFree (field1); - field2 = FieldTypeFree (field2); - field1 = GetToFieldFromFieldPair (c1->fields); - field2 = GetToFieldFromFieldPair (c2->fields); - rval = DoFieldTypesMatch (field1, field2); - } - field1 = FieldTypeFree (field1); - field2 = FieldTypeFree (field2); - break; - case ActionChoice_swap: - s1 = (SwapActionPtr) action1->action->data.ptrvalue; - s2 = (SwapActionPtr) action2->action->data.ptrvalue; - field1 = GetFromFieldFromFieldPair (s1->fields); - field2 = GetFromFieldFromFieldPair (s2->fields); - rval = DoFieldTypesMatch (field1, field2); - if (rval) { - field1 = FieldTypeFree (field1); - field2 = FieldTypeFree (field2); - field1 = GetToFieldFromFieldPair (s1->fields); - field2 = GetToFieldFromFieldPair (s2->fields); - rval = DoFieldTypesMatch (field1, field2); - } - field1 = FieldTypeFree (field1); - field2 = FieldTypeFree (field2); - break; - case ActionChoice_remove: - r1 = (RemoveActionPtr) action1->action->data.ptrvalue; - r2 = (RemoveActionPtr) action2->action->data.ptrvalue; - rval = DoFieldTypesMatch (r1->field, r2->field); - break; - case ActionChoice_parse: - p1 = (AECRParseActionPtr) action1->action->data.ptrvalue; - p2 = (AECRParseActionPtr) action2->action->data.ptrvalue; - field1 = GetFromFieldFromFieldPair (p1->fields); - field2 = GetFromFieldFromFieldPair (p2->fields); - rval = DoFieldTypesMatch (field1, field2); - if (rval) { - field1 = FieldTypeFree (field1); - field2 = FieldTypeFree (field2); - field1 = GetToFieldFromFieldPair (p1->fields); - field2 = GetToFieldFromFieldPair (p2->fields); - rval = DoFieldTypesMatch (field1, field2); - } - field1 = FieldTypeFree (field1); - field2 = FieldTypeFree (field2); - break; + if (data == NULL) return FALSE; + if (choice == OBJ_SEQFEAT) { + sfp = (SeqFeatPtr) data; + if (sfp->data.choice == SEQFEAT_PUB) { + pdp = sfp->data.value.ptrvalue; + } + } else if (choice == OBJ_SEQDESC) { + sdp = (SeqDescrPtr) data; + if (sdp->choice == Seq_descr_pub) { + pdp = sdp->data.ptrvalue; } } + + if (pdp == NULL) return FALSE; + + for (pub = pdp->pub; pub != NULL; pub = pub->next) { + rval |= RemovePubFieldFromPub (pub, field, scp); + } return rval; } -static Boolean IsNonTextSourceQualPresent (BioSourcePtr biop, Int4 srcqual) +static Boolean SetPubFieldOnObject (Uint1 choice, Pointer data, Int4 field, StringConstraintPtr scp, CharPtr value, Uint2 existing_text) { - Int4 orgmod_subtype, subsrc_subtype, subfield; - OrgModPtr mod; - SubSourcePtr ssp; - Boolean rval = FALSE; - - if (biop == NULL) return FALSE; + Boolean rval = FALSE; + PubdescPtr pdp = NULL; + PubPtr pub; + SeqFeatPtr sfp; + SeqDescrPtr sdp; - orgmod_subtype = GetOrgModQualFromSrcQual (srcqual, &subfield); - if (orgmod_subtype == -1) { - subsrc_subtype = GetSubSrcQualFromSrcQual (srcqual, &subfield); - for (ssp = biop->subtype; ssp != NULL && !rval; ssp = ssp->next) { - if (ssp->subtype == subsrc_subtype) { - rval = TRUE; - } + if (data == NULL) return FALSE; + if (choice == OBJ_SEQFEAT) { + sfp = (SeqFeatPtr) data; + if (sfp->data.choice == SEQFEAT_PUB) { + pdp = sfp->data.value.ptrvalue; } - } else { - if (biop->org != NULL && biop->org->orgname != NULL) { - for (mod = biop->org->orgname->mod; mod != NULL && !rval; mod = mod->next) { - if (mod->subtype == orgmod_subtype) { - rval = TRUE; - } - } + } else if (choice == OBJ_SEQDESC) { + sdp = (SeqDescrPtr) data; + if (sdp->choice == Seq_descr_pub) { + pdp = sdp->data.ptrvalue; } } + + if (pdp == NULL) return FALSE; + + for (pub = pdp->pub; pub != NULL; pub = pub->next) { + rval |= SetPubFieldOnPub (pub, field, scp, value, existing_text); + } return rval; } -static Boolean IsSourceQualPresent (BioSourcePtr biop, SourceQualChoicePtr scp) -{ - Boolean rval = FALSE; - CharPtr str; - if (biop == NULL) return FALSE; - if (scp == NULL) return TRUE; +NLM_EXTERN Uint1 FieldTypeFromAECRAction (AECRActionPtr action) +{ + Uint1 field_type = 0; + ApplyActionPtr a; + EditActionPtr e; + ConvertActionPtr v; + CopyActionPtr c; + SwapActionPtr s; + RemoveActionPtr r; + AECRParseActionPtr p; - switch (scp->choice) { - case SourceQualChoice_textqual: - if (IsNonTextSourceQual (scp->data.intvalue)) { - rval = IsNonTextSourceQualPresent (biop, scp->data.intvalue); - } else { - str = GetSourceQualFromBioSource (biop, scp, NULL); - if (!StringHasNoText (str)) { - rval = TRUE; - } - str = MemFree (str); + if (action == NULL || action->action == NULL || action->action->data.ptrvalue == NULL) { + return 0; + } + switch (action->action->choice) { + case ActionChoice_apply: + a = (ApplyActionPtr) action->action->data.ptrvalue; + if (a->field != NULL) { + field_type = a->field->choice; } break; - case SourceQualChoice_location: - if (biop->genome != 0) { - rval = TRUE; + case ActionChoice_edit: + e = (EditActionPtr) action->action->data.ptrvalue; + if (e->field != NULL) { + field_type = e->field->choice; } break; - case SourceQualChoice_origin: - if (biop->origin != 0) { - rval = TRUE; + case ActionChoice_convert: + v = (ConvertActionPtr) action->action->data.ptrvalue; + if (v->fields != NULL) { + field_type = FieldTypeChoiceFromFieldPairTypeChoice (v->fields->choice); + } + break; + case ActionChoice_copy: + c = (CopyActionPtr) action->action->data.ptrvalue; + if (c->fields != NULL) { + field_type = FieldTypeChoiceFromFieldPairTypeChoice (c->fields->choice); + } + break; + case ActionChoice_swap: + s = (SwapActionPtr) action->action->data.ptrvalue; + if (s->fields != NULL) { + field_type = FieldTypeChoiceFromFieldPairTypeChoice (s->fields->choice); + } + break; + case ActionChoice_remove: + r = (RemoveActionPtr) action->action->data.ptrvalue; + if (r->field != NULL) { + field_type = r->field->choice; + } + break; + case ActionChoice_parse: + p = (AECRParseActionPtr) action->action->data.ptrvalue; + if (p->fields != NULL) { + field_type = FieldTypeChoiceFromFieldPairTypeChoice (p->fields->choice); } break; } - return rval; + return field_type; } +typedef struct pubserialnumber { + BioseqPtr bsp; + Int4 serial_number; + ValNodePtr min_pub; +} PubSerialNumberData, PNTR PubSerialNumberPtr; -typedef struct objecthasstring + +static PubSerialNumberPtr PubSerialNumberNew () { - StringConstraintPtr scp; - Boolean found; -} ObjectHasStringData, PNTR ObjectHasStringPtr; + PubSerialNumberPtr psn; + psn = (PubSerialNumberPtr) MemNew (sizeof (PubSerialNumberData)); + psn->bsp = NULL; + psn->serial_number = 0; + psn->min_pub = NULL; -static void LIBCALLBACK AsnWriteConstraintCallBack (AsnExpOptStructPtr pAEOS) + return psn; +} -{ - CharPtr pchSource; - ObjectHasStringPtr ohsp; - ohsp = (ObjectHasStringPtr) pAEOS->data; - if (ISA_STRINGTYPE (AsnFindBaseIsa (pAEOS->atp))) - { - pchSource = (CharPtr) pAEOS->dvp->ptrvalue; - ohsp->found |= DoesSingleStringMatchConstraint (pchSource, ohsp->scp); +static PubSerialNumberPtr PubSerialNumberFree (PubSerialNumberPtr psn) +{ + if (psn != NULL) { + psn->min_pub = PubSetFree (psn->min_pub); + psn = MemFree (psn); } + return psn; } -static Boolean DoesObjectMatchStringConstraint (Uint1 choice, Pointer data, StringConstraintPtr scp) - +NLM_EXTERN ValNodePtr PubSerialNumberListFree (ValNodePtr vnp) { - ObjMgrPtr omp; - ObjMgrTypePtr omtp; - AsnIoPtr aip; - AsnExpOptPtr aeop; - ObjectHasStringData ohsd; - SeqFeatPtr sfp, prot; - SeqMgrFeatContext fcontext; - CharPtr search_txt; - CGPSetPtr c; - ValNodePtr vnp; - Boolean all_match = TRUE, any_match = FALSE, rval; - BioseqPtr protbsp; - ImpFeatPtr imp; - - if (data == NULL) return FALSE; - if (scp == NULL) return TRUE; + ValNodePtr vnp_next; - if (choice == 0) { - /* CDS-Gene-Prot set */ - c = (CGPSetPtr) data; - for (vnp = c->gene_list; vnp != NULL && (!any_match || all_match); vnp = vnp->next) { - if (DoesObjectMatchStringConstraint (OBJ_SEQFEAT, vnp->data.ptrvalue, scp)) { - any_match = TRUE; - } else { - all_match = FALSE; - } - } - for (vnp = c->cds_list; vnp != NULL && (!any_match || all_match); vnp = vnp->next) { - if (DoesObjectMatchStringConstraint (OBJ_SEQFEAT, vnp->data.ptrvalue, scp)) { - any_match = TRUE; - } else { - all_match = FALSE; - } - } - for (vnp = c->mrna_list; vnp != NULL && (!any_match || all_match); vnp = vnp->next) { - if (DoesObjectMatchStringConstraint (OBJ_SEQFEAT, vnp->data.ptrvalue, scp)) { - any_match = TRUE; - } else { - all_match = FALSE; - } - } - for (vnp = c->prot_list; vnp != NULL && (!any_match || all_match); vnp = vnp->next) { - if (DoesObjectMatchStringConstraint (OBJ_SEQFEAT, vnp->data.ptrvalue, scp)) { - any_match = TRUE; - } else { - all_match = FALSE; - } - } - if (scp->not_present) { - rval = all_match; - } else { - rval = any_match; - } - } else { - omp = ObjMgrGet (); - omtp = ObjMgrTypeFind (omp, choice, NULL, NULL); - if (omtp == NULL) return FALSE; - aip = AsnIoNullOpen (); - aeop = AsnExpOptNew (aip, NULL, NULL, AsnWriteConstraintCallBack); - ohsd.found = FALSE; - ohsd.scp = scp; - if (aeop != NULL) { - aeop->user_data = (Pointer) &ohsd; - } - - (omtp->asnwrite) (data, aip, NULL); - - if (!ohsd.found && omtp->datatype == OBJ_SEQFEAT) - { - sfp = (SeqFeatPtr) data; - if (sfp->data.choice == SEQFEAT_CDREGION) { - protbsp = BioseqFindFromSeqLoc (sfp->product); - prot = SeqMgrGetNextFeature (protbsp, NULL, 0, FEATDEF_PROT, &fcontext); - if (prot != NULL) { - (omtp->asnwrite) (prot, aip, NULL); - } - } else { - if (SeqMgrFeaturesAreIndexed(sfp->idx.entityID) == 0) { - SeqMgrIndexFeatures (sfp->idx.entityID, NULL); - } - if (sfp->idx.subtype == FEATDEF_tRNA) { - sfp = SeqMgrGetDesiredFeature (sfp->idx.entityID, NULL, sfp->idx.itemID, 0, sfp, &fcontext); - ohsd.found = DoesSingleStringMatchConstraint (fcontext.label, ohsd.scp); - if (!ohsd.found && sfp != NULL && sfp->idx.subtype == FEATDEF_tRNA) - { - search_txt = (CharPtr) MemNew ((StringLen (fcontext.label) + 6) * sizeof (Char)); - if (search_txt != NULL) - { - sprintf (search_txt, "tRNA-%s", fcontext.label); - ohsd.found = DoesSingleStringMatchConstraint (search_txt, ohsd.scp); - search_txt = MemFree (search_txt); - } - } - } else if (!ohsd.found && sfp != NULL - && sfp->data.choice == SEQFEAT_IMP - && (imp = (ImpFeatPtr) sfp->data.value.ptrvalue) != NULL) { - ohsd.found = DoesSingleStringMatchConstraint (imp->key, ohsd.scp); - } - } - } - AsnIoClose (aip); - if (scp->not_present) { - rval = !ohsd.found; - } else { - rval = ohsd.found; - } + while (vnp != NULL) { + vnp_next = vnp->next; + vnp->next = NULL; + vnp->data.ptrvalue = PubSerialNumberFree (vnp->data.ptrvalue); + vnp = ValNodeFree (vnp); + vnp = vnp_next; } - return rval; + return vnp; } -NLM_EXTERN Boolean IsSourceConstraintEmpty (SourceConstraintPtr scp) +static void CaptureRefBlockSerialNumbers +(CharPtr str, + Pointer userdata, + BlockType blocktype, + Uint2 entityID, + Uint2 itemtype, + Uint4 itemID, + Int4 left, + Int4 right +) { - if (scp == NULL) return TRUE; + CharPtr cp; + Int4 serial_number; + ValNodePtr vnp; + BioseqPtr bsp = NULL; + SeqFeatPtr sfp; + SeqDescrPtr sdp; + SeqMgrFeatContext fcontext; + SeqMgrDescContext dcontext; + PubSerialNumberPtr psn; + ValNodePtr ppr; + PubdescPtr pdp = NULL; - if (scp->field1 == NULL - && scp->field2 == NULL - && IsStringConstraintEmpty(scp->constraint)) { - return TRUE; - } else { - return FALSE; + if (blocktype != REFERENCE_BLOCK || userdata == NULL) return; + if (StringNICmp (str, "REFERENCE", 9) != 0) { + return; } -} - -NLM_EXTERN Boolean DoesBiosourceMatchConstraint (BioSourcePtr biop, SourceConstraintPtr scp) -{ - Boolean rval = FALSE; - CharPtr str1, str2; - ValNode vn; + cp = str + 9; + while (isspace (*cp)) { + cp++; + } + if (!isdigit (*cp)) { + return; + } + serial_number = atoi (cp); - if (biop == NULL) return FALSE; - if (scp == NULL) return TRUE; + if (itemtype == OBJ_SEQFEAT) { + sfp = SeqMgrGetDesiredFeature (entityID, NULL, itemID, 0, NULL, &fcontext); + if (sfp != NULL && sfp->data.choice == SEQFEAT_PUB) { + pdp = (PubdescPtr) sfp->data.value.ptrvalue; + bsp = GetSequenceForObject (OBJ_SEQFEAT, sfp); + } + } else if (itemtype == OBJ_SEQDESC) { + sdp = SeqMgrGetDesiredDescriptor (entityID, NULL, itemID, 0, NULL, &dcontext); + if (sdp != NULL && sdp->choice == Seq_descr_pub) { + pdp = (PubdescPtr) sdp->data.ptrvalue; + bsp = GetSequenceForObject (OBJ_SEQDESC, sdp); + } + } + if (pdp != NULL && bsp != NULL) { + vnp = ValNodeNew (NULL); + if (vnp != NULL) { + vnp->choice = PUB_Equiv; + vnp->data.ptrvalue = pdp->pub; + ppr = MinimizePub (vnp); + ValNodeFree (vnp); + } + vnp = ValNodeNew (NULL); + if (vnp != NULL) { + vnp->choice = PUB_Equiv; + vnp->data.ptrvalue = ppr; - if (IsStringConstraintEmpty(scp->constraint)) { - /* looking for qual present */ - if (scp->field1 != NULL && scp->field2 == NULL) { - rval = IsSourceQualPresent (biop, scp->field1); - } else if (scp->field2 != NULL && scp->field1 == NULL) { - rval = IsSourceQualPresent (biop, scp->field2); - /* looking for quals to match */ - } else if (scp->field1 != NULL && scp->field2 != NULL) { - str1 = GetSourceQualFromBioSource (biop, scp->field1, NULL); - str2 = GetSourceQualFromBioSource (biop, scp->field2, NULL); - if (StringCmp (str1, str2) == 0) { - rval = TRUE; - } - str1 = MemFree (str1); - str2 = MemFree (str2); - } else { - /* nothing specified, automatic match */ - rval = TRUE; + psn = PubSerialNumberNew (); + psn->bsp = bsp; + psn->serial_number = serial_number; + psn->min_pub = vnp; + ValNodeAddPointer ((ValNodePtr PNTR) userdata, 0, psn); } - } else { - if (scp->field1 != NULL && scp->field2 == NULL) { - str1 = GetSourceQualFromBioSource (biop, scp->field1, scp->constraint); - if (str1 == NULL) { - if (scp->constraint->not_present) { - str1 = GetSourceQualFromBioSource (biop, scp->field1, NULL); - if (str1 == NULL) { - rval = TRUE; - } - } - } else if (!StringHasNoText (str1)) { - rval = TRUE; - } - str1 = MemFree (str1); - } else if (scp->field2 != NULL && scp->field1 == NULL) { - str2 = GetSourceQualFromBioSource (biop, scp->field2, scp->constraint); - if (str2 == NULL) { - if (scp->constraint->not_present) { - str2 = GetSourceQualFromBioSource (biop, scp->field2, NULL); - if (str2 == NULL) { - rval = TRUE; - } - } - } else if (!StringHasNoText (str2)) { - rval = TRUE; - } - str2 = MemFree (str2); - } else if (scp->field1 != NULL && scp->field2 != NULL) { - str1 = GetSourceQualFromBioSource (biop, scp->field1, scp->constraint); - str2 = GetSourceQualFromBioSource (biop, scp->field2, scp->constraint); - if (StringCmp (str1, str2) == 0) { - rval = TRUE; + } +} + + +NLM_EXTERN ValNodePtr GetCitListsForSeqEntry (SeqEntryPtr sep) +{ + XtraBlock xtra; + ValNodePtr head = NULL; + ErrSev level; + Boolean okay; + SeqEntryPtr oldscope; + Uint2 entityID; + + if (sep == NULL) return NULL; + + MemSet ((Pointer) &xtra, 0, sizeof (XtraBlock)); + xtra.ffwrite = CaptureRefBlockSerialNumbers; + xtra.userdata = (Pointer) &head; + level = ErrSetMessageLevel (SEV_MAX); + oldscope = SeqEntrySetScope (sep); + okay = SeqEntryToGnbk (sep, NULL, GENBANK_FMT, SEQUIN_MODE, NORMAL_STYLE, + SHOW_CONTIG_FEATURES, 0, 0, &xtra, NULL); + entityID = SeqMgrGetEntityIDForSeqEntry (sep); + SeqEntrySetScope (oldscope); + ErrSetMessageLevel (level); + return head; +} + + +NLM_EXTERN Int4 GetCitationNumberForMinPub (BioseqPtr bsp, ValNodePtr min_pub, ValNodePtr pub_list) +{ + Int4 rval = -1; + PubSerialNumberPtr psn; + ValNodePtr vnp, tmp; + + if (bsp == NULL || min_pub == NULL || pub_list == NULL) { + return -1; + } + + tmp = ValNodeNew (NULL); + tmp->choice = PUB_Equiv; + tmp->data.ptrvalue = min_pub; + + for (vnp = pub_list; vnp != NULL && rval == -1; vnp = vnp->next) { + psn = (PubSerialNumberPtr) vnp->data.ptrvalue; + if (psn->bsp == bsp) { + if (PubLabelMatch (tmp, psn->min_pub) == 0) { + rval = psn->serial_number; } - str1 = MemFree (str1); - str2 = MemFree (str2); - } else { - /* generic string constraint */ - vn.choice = Seq_descr_source; - vn.next = NULL; - vn.extended = 0; - vn.data.ptrvalue = biop; - rval = DoesObjectMatchStringConstraint (OBJ_SEQDESC, &vn, scp->constraint); } } + + tmp = ValNodeFree (tmp); + return rval; } -static Boolean DoesCGPSetMatchPseudoConstraint (CGPSetPtr c, CDSGeneProtPseudoConstraintPtr constraint) +NLM_EXTERN ValNodePtr GetMinPubForCitationNumber (BioseqPtr bsp, Int4 number, ValNodePtr pub_list) { - Boolean any_pseudo = FALSE; + ValNodePtr rval = NULL; + PubSerialNumberPtr psn; ValNodePtr vnp; - SeqFeatPtr sfp; - Boolean rval = FALSE; - - if (c == NULL) return FALSE; - if (constraint == NULL) return TRUE; - switch (constraint->feature) { - case CDSGeneProt_feature_type_constraint_gene : - for (vnp = c->gene_list; vnp != NULL && !any_pseudo; vnp = vnp->next) { - sfp = (SeqFeatPtr) vnp->data.ptrvalue; - if (sfp != NULL && sfp->pseudo) { - any_pseudo = TRUE; - } - } - break; - case CDSGeneProt_feature_type_constraint_mRNA : - for (vnp = c->mrna_list; vnp != NULL && !any_pseudo; vnp = vnp->next) { - sfp = (SeqFeatPtr) vnp->data.ptrvalue; - if (sfp != NULL && sfp->pseudo) { - any_pseudo = TRUE; - } - } - break; - case CDSGeneProt_feature_type_constraint_cds : - for (vnp = c->mrna_list; vnp != NULL && !any_pseudo; vnp = vnp->next) { - sfp = (SeqFeatPtr) vnp->data.ptrvalue; - if (sfp != NULL && sfp->pseudo) { - any_pseudo = TRUE; - } - } - break; - case CDSGeneProt_feature_type_constraint_prot : - for (vnp = c->mrna_list; vnp != NULL && !any_pseudo; vnp = vnp->next) { - sfp = (SeqFeatPtr) vnp->data.ptrvalue; - if (sfp != NULL && sfp->pseudo && sfp->idx.subtype == FEATDEF_PROT) { - any_pseudo = TRUE; - } - } - break; - case CDSGeneProt_feature_type_constraint_mat_peptide : - for (vnp = c->mrna_list; vnp != NULL && !any_pseudo; vnp = vnp->next) { - sfp = (SeqFeatPtr) vnp->data.ptrvalue; - if (sfp != NULL && sfp->pseudo && sfp->idx.subtype == FEATDEF_mat_peptide_aa) { - any_pseudo = TRUE; - } - } - break; + if (bsp == NULL || number < 0 || pub_list == NULL) { + return NULL; } - if ((any_pseudo && constraint->is_pseudo) - || (!any_pseudo && !constraint->is_pseudo)) { - rval = TRUE; + for (vnp = pub_list; vnp != NULL && rval == NULL; vnp = vnp->next) { + psn = (PubSerialNumberPtr) vnp->data.ptrvalue; + if (psn->bsp == bsp && psn->serial_number == number) { + rval = psn->min_pub; + } } + return rval; } -static Boolean DoesFeatureMatchCGPPseudoConstraint (SeqFeatPtr sfp, CDSGeneProtPseudoConstraintPtr constraint) +/* + * Some batch operations will be faster if information about the entire record is collected once + * and reused. The BatchExtra structure is where such data belongs. + */ +NLM_EXTERN BatchExtraPtr BatchExtraNew () { - Boolean any_pseudo = FALSE; - ValNodePtr feat_list, vnp; - SeqFeatPtr gene, mrna, cds, prot; - Boolean rval = FALSE; - SeqMgrFeatContext fcontext; + BatchExtraPtr b; - if (sfp == NULL) return FALSE; - if (constraint == NULL) return TRUE; + b = (BatchExtraPtr) MemNew (sizeof (BatchExtraData)); + b->cit_list = NULL; - switch (constraint->feature) { - case CDSGeneProt_feature_type_constraint_gene : - if (sfp->data.choice == SEQFEAT_GENE) { - if (sfp->pseudo) { - any_pseudo = TRUE; - } - } else if (sfp->data.choice == SEQFEAT_PROT) { - cds = SeqMgrGetCDSgivenProduct (BioseqFindFromSeqLoc (sfp->location), &fcontext); - if (cds != NULL) { - gene = GetGeneForFeature (cds); - if (gene != NULL && gene->pseudo) { - any_pseudo = TRUE; - } - } - } else { - gene = GetGeneForFeature (sfp); - if (gene != NULL && gene->pseudo) { - any_pseudo = TRUE; - } - } - break; - case CDSGeneProt_feature_type_constraint_mRNA : - if (sfp->idx.subtype == FEATDEF_mRNA) { - if (sfp->pseudo) { - any_pseudo = TRUE; - } - } else if (sfp->data.choice == SEQFEAT_PROT) { - cds = SeqMgrGetCDSgivenProduct (BioseqFindFromSeqLoc (sfp->location), &fcontext); - if (cds != NULL) { - mrna = SeqMgrGetOverlappingmRNA (cds->location, &fcontext); - if (mrna != NULL && mrna->pseudo) { - any_pseudo = TRUE; - } - } - } else { - mrna = SeqMgrGetOverlappingmRNA (sfp->location, &fcontext); - if (mrna != NULL && mrna->pseudo) { - any_pseudo = TRUE; - } - } - break; - case CDSGeneProt_feature_type_constraint_cds : - if (sfp->idx.subtype == FEATDEF_CDS) { - if (sfp->pseudo) { - any_pseudo = TRUE; - } - } else if (sfp->data.choice == SEQFEAT_PROT) { - cds = SeqMgrGetCDSgivenProduct (BioseqFindFromSeqLoc (sfp->location), &fcontext); - if (cds != NULL && cds->pseudo) { - any_pseudo = TRUE; - } - } else { - feat_list = ListFeaturesInLocation (BioseqFindFromSeqLoc (sfp->location), sfp->location, SEQFEAT_CDREGION, FEATDEF_CDS); - for (vnp = feat_list; vnp != NULL && !any_pseudo; vnp = vnp->next) { - cds = vnp->data.ptrvalue; - if (cds != NULL && cds->pseudo) { - any_pseudo = TRUE; - } - } - feat_list = ValNodeFree (feat_list); - } - break; - case CDSGeneProt_feature_type_constraint_prot : - if (sfp->idx.subtype == FEATDEF_PROT) { - if (sfp->pseudo) { - any_pseudo = TRUE; - } - } else if (sfp->data.choice = SEQFEAT_PROT) { - prot = SeqMgrGetNextFeature (BioseqFindFromSeqLoc (sfp->location), NULL, 0, FEATDEF_PROT, &fcontext); - if (prot != NULL && prot->pseudo) { - any_pseudo = TRUE; - } - } else if (sfp->idx.subtype == FEATDEF_CDS) { - prot = SeqMgrGetNextFeature (BioseqFindFromSeqLoc (sfp->product), NULL, 0, FEATDEF_PROT, &fcontext); - if (prot != NULL && prot->pseudo) { - any_pseudo = TRUE; - } - } else { - feat_list = ListFeaturesInLocation (BioseqFindFromSeqLoc (sfp->location), sfp->location, SEQFEAT_CDREGION, FEATDEF_CDS); - for (vnp = feat_list; vnp != NULL && !any_pseudo; vnp = vnp->next) { - cds = vnp->data.ptrvalue; - if (cds != NULL) { - prot = SeqMgrGetNextFeature (BioseqFindFromSeqLoc (cds->product), NULL, 0, FEATDEF_PROT, &fcontext); - if (prot != NULL && prot->pseudo) { - any_pseudo = TRUE; - } - } - } - feat_list = ValNodeFree (feat_list); - } - break; - case CDSGeneProt_feature_type_constraint_mat_peptide : - if (sfp->idx.subtype == FEATDEF_mat_peptide_aa) { - if (sfp->pseudo) { - any_pseudo = TRUE; - } - } - break; - } + return b; +} - if ((any_pseudo && constraint->is_pseudo) - || (!any_pseudo && !constraint->is_pseudo)) { - rval = TRUE; + +NLM_EXTERN BatchExtraPtr BatchExtraFree (BatchExtraPtr b) +{ + if (b != NULL) { + b->cit_list = PubSerialNumberListFree (b->cit_list); + + b = MemFree (b); } - return rval; + return b; } -NLM_EXTERN Boolean IsCDSGeneProtQualConstraintEmpty (CDSGeneProtQualConstraintPtr constraint) +static Boolean IsCitationField (FieldTypePtr field) { - if (constraint == NULL) return TRUE; - if (constraint->field1 == NULL && constraint->field2 == NULL && IsStringConstraintEmpty (constraint->constraint)) { + FeatureFieldPtr feature_field; + + if (field != NULL + && field->choice == FieldType_feature_field + && (feature_field = field->data.ptrvalue) != NULL + && feature_field->field != NULL + && ((feature_field->field->choice == FeatQualChoice_legal_qual + && feature_field->field->data.intvalue == Feat_qual_legal_citation) + || (feature_field->field->choice == FeatQualChoice_illegal_qual + && DoesStringMatchConstraint ("citation", feature_field->field->data.ptrvalue)))) { return TRUE; } else { return FALSE; } + } -static Boolean DoesCGPSetMatchQualConstraint (CGPSetPtr c, CDSGeneProtQualConstraintPtr constraint) +static void InitBatchExtraForField (BatchExtraPtr batch_extra, FieldTypePtr field, SeqEntryPtr sep) { - Boolean rval = FALSE; - CharPtr str, str1, str2; + if (batch_extra == NULL) { + return; + } + /* only need to collect citations if citation is in the list of applicable fields */ + if (IsCitationField (field)) { + ValNodeLink (&(batch_extra->cit_list), GetCitListsForSeqEntry (sep)); + } +} - if (c == NULL) return FALSE; - if (constraint == NULL) return TRUE; - if (IsStringConstraintEmpty (constraint->constraint)) { - /* looking for qual present */ - if (constraint->field1 != NULL && constraint->field2 == NULL) { - str = GetFieldValueFromCGPSet (c, constraint->field1->data.intvalue, NULL); - if (str != NULL) { - rval = TRUE; - str = MemFree (str); - } - } else if (constraint->field2 != NULL && constraint->field1 == NULL) { - str = GetFieldValueFromCGPSet (c, constraint->field2->data.intvalue, NULL); - if (str == NULL) { - rval = FALSE; +static void InitBatchExtraForAECRAction (BatchExtraPtr batch_extra, AECRActionPtr action, SeqEntryPtr sep) +{ + ValNodePtr field_list, field; + + if (batch_extra == NULL || action == NULL) { + return; + } + + field_list = GetFieldTypeListFromAECRAction (action); + for (field = field_list; field != NULL; field = field->next) { + InitBatchExtraForField (batch_extra, field, sep); + } + field_list = FieldTypeListFree (field_list); +} + + +NLM_EXTERN int LIBCALLBACK SortVnpByObject (VoidPtr ptr1, VoidPtr ptr2) + +{ + ValNodePtr vnp1; + ValNodePtr vnp2; + CharPtr str1, str2; + int rval = 0; + + if (ptr1 != NULL && ptr2 != NULL) { + vnp1 = *((ValNodePtr PNTR) ptr1); + vnp2 = *((ValNodePtr PNTR) ptr2); + if (vnp1 != NULL && vnp2 != NULL) { + if (vnp1->choice < vnp2->choice) { + rval = -1; + } else if (vnp1->choice > vnp2->choice) { + rval = 1; } else { - str = MemFree (str); - } - /* looking for quals to match */ - } else if (constraint->field1 != NULL && constraint->field2 != NULL) { - str1 = GetFieldValueFromCGPSet (c, constraint->field1->data.intvalue, NULL); - str2 = GetFieldValueFromCGPSet (c, constraint->field2->data.intvalue, NULL); - if (StringCmp (str1, str2) == 0) { - rval = TRUE; - } - str1 = MemFree (str1); - str2 = MemFree (str2); - } else { - /* nothing specified, automatic match */ - rval = TRUE; - } - } else { - if (constraint->field1 != NULL && constraint->field2 == NULL) { - str1 = GetFieldValueFromCGPSet (c, constraint->field1->data.intvalue, constraint->constraint); - if (str1 == NULL) { - if (constraint->constraint->not_present) { - str1 = GetFieldValueFromCGPSet (c, constraint->field1->data.intvalue, NULL); - if (str1 == NULL) { - rval = TRUE; - } - } - } else if (!StringHasNoText (str1)) { - rval = TRUE; - } - str1 = MemFree (str1); - } else if (constraint->field2 != NULL && constraint->field1 == NULL) { - str2 = GetFieldValueFromCGPSet (c, constraint->field2->data.intvalue, constraint->constraint); - if (str2 == NULL) { - if (constraint->constraint->not_present) { - str2 = GetFieldValueFromCGPSet (c, constraint->field2->data.intvalue, NULL); - if (str2 == NULL) { - rval = TRUE; - } - } - } else if (!StringHasNoText (str2)) { - rval = TRUE; - } - str2 = MemFree (str2); - } else if (constraint->field1 != NULL && constraint->field2 != NULL) { - str1 = GetFieldValueFromCGPSet (c, constraint->field1->data.intvalue, constraint->constraint); - str2 = GetFieldValueFromCGPSet (c, constraint->field2->data.intvalue, constraint->constraint); - if (StringCmp (str1, str2) == 0) { - rval = TRUE; + str1 = GetDiscrepancyItemText (vnp1); + str2 = GetDiscrepancyItemText (vnp2); + rval = StringCmp (str1, str1); + str1 = MemFree (str1); + str2 = MemFree (str2); } - str1 = MemFree (str1); - str2 = MemFree (str2); - } else { - /* generic string constraint */ - rval = DoesObjectMatchStringConstraint (0, c, constraint->constraint); } } + return rval; } -static Boolean DoesSequenceHaveFeatureWithQualPresent (BioseqPtr bsp, FeatureFieldPtr feature_field, StringConstraintPtr scp) +static ValNodePtr BioseqListForObjectList (ValNodePtr object_list) { - Boolean rval = FALSE; - SeqFeatPtr sfp, sfp_p; - SeqMgrFeatContext context1, context2; - Int4 featdef; - Uint1 seqfeattype; - CharPtr str; - BioseqPtr prot_bsp; + ValNodePtr vnp, bsp_list = NULL; + BioseqPtr bsp; - if (bsp == NULL) { - return FALSE; - } else if (feature_field == NULL) { - return TRUE; + for (vnp = object_list; vnp != NULL; vnp = vnp->next) { + bsp = GetSequenceForObject (vnp->choice, vnp->data.ptrvalue); + if (bsp != NULL) { + ValNodeAddPointer (&bsp_list, OBJ_BIOSEQ, bsp); + } } - featdef = GetFeatdefFromFeatureType(feature_field->type); - seqfeattype = FindFeatFromFeatDefType (featdef); - if (seqfeattype == SEQFEAT_PROT) { - for (sfp = SeqMgrGetNextFeature (bsp, NULL, 0, FEATDEF_CDS, &context1); - sfp != NULL && !rval; - sfp = SeqMgrGetNextFeature (bsp, sfp, 0, FEATDEF_CDS, &context1)) { - prot_bsp = BioseqFindFromSeqLoc (sfp->product); - for (sfp_p = SeqMgrGetNextFeature (prot_bsp, NULL, 0, featdef, &context2); - sfp_p != NULL && !rval; - sfp_p = SeqMgrGetNextFeature (prot_bsp, sfp_p, 0, featdef, &context2)) { - str = GetQualFromFeature (sfp_p, feature_field, scp); - if (str == NULL && scp != NULL) { - if (scp->not_present) { - str = GetQualFromFeature (sfp_p, feature_field, NULL); - if (str == NULL) { - rval = TRUE; - } - } - } else if (!StringHasNoText (str)) { - rval = TRUE; - } - str = MemFree (str); - } - } - } else { - for (sfp = SeqMgrGetNextFeature (bsp, NULL, 0, featdef, &context1); - sfp != NULL && !rval; - sfp = SeqMgrGetNextFeature (bsp, sfp, 0, featdef, &context1)) { - str = GetQualFromFeature (sfp, feature_field, scp); - if (str == NULL && scp != NULL) { - if (scp->not_present) { - str = GetQualFromFeature (sfp, feature_field, NULL); - if (str == NULL) { - rval = TRUE; - } - } - } else if (!StringHasNoText (str)) { - rval = TRUE; - } - str = MemFree (str); - } - } - return rval; + bsp_list = ValNodeSort (bsp_list, SortVnpByObject); + ValNodeUnique (&bsp_list, SortVnpByObject, ValNodeFree); + return bsp_list; } -static Boolean -DoesSequenceHaveFeatureWithMatchingQuals -(BioseqPtr bsp, - CDSGeneProtConstraintFieldPtr f1, - CDSGeneProtConstraintFieldPtr f2, - StringConstraintPtr scp) +static void InitBatchExtraForAECRActionAndObjectList (BatchExtraPtr batch_extra, AECRActionPtr action, ValNodePtr object_list) { - Int4 featdef; - Uint1 seqfeattype; - SeqFeatPtr sfp, sfp_p; - CharPtr str, str2; - SeqMgrFeatContext context1, context2; - FeatureFieldPtr feature_field1 = NULL, feature_field2 = NULL; - CGPSetPtr c; - Boolean b = FALSE; - Boolean rval = FALSE; - BioseqPtr prot_bsp; - - if (bsp == NULL || f1 == NULL || f2 == NULL) { - return FALSE; - } - feature_field1 = FeatureFieldFromCDSGeneProtField(f1->data.intvalue); - feature_field2 = FeatureFieldFromCDSGeneProtField(f2->data.intvalue); + ValNodePtr field_list, field; + ValNodePtr bsp_list = NULL, vnp; + SeqEntryPtr sep; - if (feature_field1 == NULL || feature_field2 == NULL) { - feature_field1 = FeatureFieldFree (feature_field1); - feature_field2 = FeatureFieldFree (feature_field2); - return FALSE; + if (batch_extra == NULL || action == NULL) { + return; } - if (feature_field1->type == feature_field2->type) { - featdef = GetFeatdefFromFeatureType(feature_field1->type); - seqfeattype = FindFeatFromFeatDefType (featdef); - if (seqfeattype == SEQFEAT_PROT) { - for (sfp = SeqMgrGetNextFeature (bsp, NULL, 0, FEATDEF_CDS, &context1); - sfp != NULL && !rval; - sfp = SeqMgrGetNextFeature (bsp, sfp, 0, FEATDEF_CDS, &context1)) { - prot_bsp = BioseqFindFromSeqLoc (sfp->product); - for (sfp_p = SeqMgrGetNextFeature (prot_bsp, NULL, 0, featdef, &context2); - sfp_p != NULL && !rval; - sfp_p = SeqMgrGetNextFeature (prot_bsp, sfp_p, 0, featdef, &context2)) { - str = GetQualFromFeature (sfp_p, feature_field1, scp); - str2 = GetQualFromFeature (sfp_p, feature_field2, scp); - if (str != NULL && str2 != NULL && StringCmp (str, str2) == 0) { - rval = TRUE; - } - str = MemFree (str); - str2 = MemFree (str2); - } - } - } else { - for (sfp = SeqMgrGetNextFeature (bsp, NULL, 0, featdef, &context1); - sfp != NULL && !rval; - sfp = SeqMgrGetNextFeature (bsp, sfp, 0, featdef, &context1)) { - str = GetQualFromFeature (sfp, feature_field1, scp); - str2 = GetQualFromFeature (sfp, feature_field2, scp); - if (str != NULL && str2 != NULL && StringCmp (str, str2) == 0) { - rval = TRUE; - } - str = MemFree (str); - str2 = MemFree (str2); - } - } - } else { - for (sfp = SeqMgrGetNextFeature (bsp, NULL, 0, FEATDEF_CDS, &context1); - sfp != NULL && !rval; - sfp = SeqMgrGetNextFeature (bsp, sfp, 0, FEATDEF_CDS, &context1)) { - c = BuildCGPSetFromCodingRegion (sfp, &b); - str = GetFieldValueFromCGPSet (c, f1->data.intvalue, scp); - str2 = GetFieldValueFromCGPSet (c, f2->data.intvalue, scp); - if (str != NULL && str2 != NULL && StringCmp (str, str2) == 0) { - rval = TRUE; - } - str = MemFree (str); - str2 = MemFree (str2); - c = CGPSetFree (c); + field_list = GetFieldTypeListFromAECRAction (action); + bsp_list = BioseqListForObjectList (object_list); + for (vnp = bsp_list; vnp != NULL; vnp = vnp->next) { + sep = SeqMgrGetSeqEntryForData (vnp->data.ptrvalue); + for (field = field_list; field != NULL; field = field->next) { + InitBatchExtraForField (batch_extra, field, sep); } } - return rval; + bsp_list = ValNodeFree (bsp_list); + + field_list = FieldTypeListFree (field_list); + } -static Boolean DoesSequenceMatchCGPQualConstraint (BioseqPtr bsp, CDSGeneProtQualConstraintPtr constraint) + + + +NLM_EXTERN CharPtr GetFieldValueForObjectEx (Uint1 choice, Pointer data, FieldTypePtr field, StringConstraintPtr scp, BatchExtraPtr batch_extra) { + CharPtr str = NULL; FeatureFieldPtr feature_field; - Boolean rval = FALSE; + SeqDescrPtr sdp; + GBBlockPtr gb; + SeqMgrDescContext context; - if (bsp == NULL) { - return FALSE; - } else if (constraint == NULL) { - return TRUE; - } + if (data == NULL || field == NULL || field->data.ptrvalue == NULL) return NULL; - if (IsStringConstraintEmpty (constraint->constraint)) { - /* looking for qual present */ - if ((constraint->field1 != NULL && constraint->field2 == NULL) - || (constraint->field2 != NULL && constraint->field1 == NULL)) { - if (constraint->field1 != NULL) { - feature_field = FeatureFieldFromCDSGeneProtField (constraint->field1->data.intvalue); - } else { - feature_field = FeatureFieldFromCDSGeneProtField (constraint->field2->data.intvalue); + switch (field->choice) { + case FieldType_source_qual : + str = GetSourceQualFromBioSource (GetBioSourceFromObject (choice, data), (SourceQualChoicePtr) field->data.ptrvalue, scp); + break; + case FieldType_feature_field : + if (choice == OBJ_SEQFEAT) { + str = GetQualFromFeatureEx ((SeqFeatPtr) data, (FeatureFieldPtr) field->data.ptrvalue, scp, batch_extra); } - if (feature_field != NULL) { - rval = DoesSequenceHaveFeatureWithQualPresent (bsp, feature_field, NULL); + break; + case FieldType_cds_gene_prot : + if (choice == 0) { + str = GetFieldValueFromCGPSet ((CGPSetPtr) data, field->data.intvalue, scp); + } else if (choice == OBJ_SEQFEAT) { + feature_field = FeatureFieldFromCDSGeneProtField (field->data.intvalue); + str = GetQualFromFeature ((SeqFeatPtr) data, feature_field, scp); feature_field = FeatureFieldFree (feature_field); } - /* looking for quals to match */ - } else if (constraint->field1 != NULL && constraint->field2 != NULL) { - rval = DoesSequenceHaveFeatureWithMatchingQuals (bsp, constraint->field1, constraint->field2, NULL); - } else { - /* nothing specified, automatic match */ - rval = TRUE; - } - } else if ((constraint->field1 != NULL && constraint->field2 == NULL) - || (constraint->field1 == NULL && constraint->field2 != NULL)) { - /* one field must match constraint */ - if (constraint->field1 != NULL) { - feature_field = FeatureFieldFromCDSGeneProtField (constraint->field1->data.intvalue); - } else { - feature_field = FeatureFieldFromCDSGeneProtField (constraint->field2->data.intvalue); - } - if (feature_field != NULL) { - rval = DoesSequenceHaveFeatureWithQualPresent (bsp, feature_field, constraint->constraint); - feature_field = FeatureFieldFree (feature_field); - } - } else if (constraint->field1 != NULL && constraint->field2 != NULL) { - /* two fields must match and match constraint */ - rval = DoesSequenceHaveFeatureWithMatchingQuals (bsp, constraint->field1, constraint->field2, constraint->constraint); - } else { - /* generic string constraint */ - rval = DoesObjectMatchStringConstraint (OBJ_BIOSEQ, bsp, constraint->constraint); + break; + case FieldType_molinfo_field : + if (choice == OBJ_BIOSEQ) { + str = GetSequenceQualFromBioseq ((BioseqPtr) data, field->data.ptrvalue); + } + break; + case FieldType_pub : + str = GetPubFieldFromObject (choice, data, field->data.intvalue, scp); + break; + case FieldType_rna_field : + if (choice == OBJ_SEQFEAT) { + str = GetRNAQualFromFeature ((SeqFeatPtr) data, field->data.ptrvalue, scp, NULL); + } + break; + case FieldType_struc_comment_field: + if (choice == OBJ_SEQDESC && data != NULL) { + sdp = (SeqDescrPtr) data; + if (sdp != NULL && sdp->choice == Seq_descr_user) { + str = GetStructuredCommentFieldFromUserObject (sdp->data.ptrvalue, field->data.ptrvalue, scp); + } + } + break; + case FieldType_dblink: + if (choice == OBJ_SEQDESC && data != NULL) { + sdp = (SeqDescrPtr) data; + if (sdp != NULL && sdp->choice == Seq_descr_user) { + str = GetDBLinkFieldFromUserObject (sdp->data.ptrvalue, field->data.intvalue, scp); + } + } + break; + case FieldType_misc: + if (choice == OBJ_BIOSEQ) { + if (field->data.intvalue == Misc_field_genome_project_id) { + str = GetGenomeProjectIdFromBioseq ((BioseqPtr) data, scp); + } else if (field->data.intvalue == Misc_field_comment_descriptor) { + str = NULL; + for (sdp = SeqMgrGetNextDescriptor ((BioseqPtr) data, NULL, Seq_descr_comment, &context); + sdp != NULL && str == NULL; + sdp = SeqMgrGetNextDescriptor ((BioseqPtr) data, sdp, Seq_descr_comment, &context)) { + if (DoesStringMatchConstraint (sdp->data.ptrvalue, scp)) { + str = StringSave (sdp->data.ptrvalue); + } + } + } else if (field->data.intvalue == Misc_field_defline) { + str = NULL; + for (sdp = SeqMgrGetNextDescriptor ((BioseqPtr) data, NULL, Seq_descr_title, &context); + sdp != NULL && str == NULL; + sdp = SeqMgrGetNextDescriptor ((BioseqPtr) data, sdp, Seq_descr_title, &context)) { + if (DoesStringMatchConstraint (sdp->data.ptrvalue, scp)) { + str = StringSave (sdp->data.ptrvalue); + } + } + } else if (field->data.intvalue == Misc_field_keyword) { + str = NULL; + for (sdp = SeqMgrGetNextDescriptor ((BioseqPtr) data, NULL, Seq_descr_genbank, &context); + sdp != NULL && str == NULL; + sdp = SeqMgrGetNextDescriptor ((BioseqPtr) data, sdp, Seq_descr_genbank, &context)) { + gb = (GBBlockPtr) sdp->data.ptrvalue; + str = GetFirstValNodeStringMatch (gb->keywords, scp); + } + } + } else if (choice == OBJ_SEQDESC && field->data.intvalue == Misc_field_comment_descriptor) { + sdp = (SeqDescrPtr) data; + if (sdp != NULL && sdp->choice == Seq_descr_comment && !StringHasNoText (sdp->data.ptrvalue)) { + str = StringSave (sdp->data.ptrvalue); + } + } else if (choice == OBJ_SEQDESC && field->data.intvalue == Misc_field_defline) { + sdp = (SeqDescrPtr) data; + if (sdp != NULL && sdp->choice == Seq_descr_title && !StringHasNoText (sdp->data.ptrvalue)) { + str = StringSave (sdp->data.ptrvalue); + } + } else if (choice == OBJ_SEQDESC && field->data.intvalue == Misc_field_keyword) { + sdp = (SeqDescrPtr) data; + if (sdp != NULL && sdp->choice == Seq_descr_genbank && (gb = (GBBlockPtr) sdp->data.ptrvalue) != NULL) { + str = GetFirstValNodeStringMatch (gb->keywords, scp); + } + } + break; } - return rval; + return str; } -static Boolean DoesSequenceInSetMatchCGPQualConstraint (BioseqSetPtr bssp, CDSGeneProtQualConstraintPtr constraint) +NLM_EXTERN CharPtr GetFieldValueForObject (Uint1 choice, Pointer data, FieldTypePtr field, StringConstraintPtr scp) { - Boolean rval = FALSE; - SeqEntryPtr sep; - - if (bssp == NULL) return FALSE; - if (constraint == NULL) return TRUE; - - for (sep = bssp->seq_set; sep != NULL && !rval; sep = sep->next) { - if (IS_Bioseq (sep)) { - rval = DoesSequenceMatchCGPQualConstraint ((BioseqPtr) sep->data.ptrvalue, constraint); - } else if (IS_Bioseq_set (sep)) { - rval = DoesSequenceInSetMatchCGPQualConstraint ((BioseqSetPtr) sep->data.ptrvalue, constraint); - } - } - return rval; + return GetFieldValueForObjectEx (choice, data, field, scp, NULL); } -static Boolean DoesSeqDescMatchCGPQualConstraint (SeqDescrPtr sdp, CDSGeneProtQualConstraintPtr constraint) +NLM_EXTERN ValNodePtr GetMultipleFieldValuesForObject (Uint1 choice, Pointer data, FieldTypePtr field, StringConstraintPtr scp, BatchExtraPtr batch_extra) { - Boolean rval = FALSE; - BioseqPtr bsp; - ObjValNodePtr ovp; + CharPtr str = NULL; + ValNodePtr val_list = NULL; - if (sdp == NULL) return FALSE; - if (constraint == NULL) return TRUE; + if (data == NULL || field == NULL || field->data.ptrvalue == NULL) return FALSE; - bsp = GetSequenceForObject (OBJ_SEQDESC, sdp); - if (bsp == NULL) { - if (sdp->extended) { - ovp = (ObjValNodePtr) sdp; - if (ovp->idx.parenttype == OBJ_BIOSEQSET && ovp->idx.parentptr != NULL) { - rval = DoesSequenceInSetMatchCGPQualConstraint ((BioseqSetPtr) ovp->idx.parentptr, constraint); - } - } + if (field->choice == FieldType_source_qual) { + val_list = GetMultipleSourceQualsFromBioSource (GetBioSourceFromObject (choice, data), (SourceQualChoicePtr) field->data.ptrvalue, scp); } else { - rval = DoesSequenceMatchCGPQualConstraint (bsp, constraint); + str = GetFieldValueForObjectEx (choice, data, field, scp, batch_extra); + if (str != NULL) { + ValNodeAddPointer (&val_list, 0, str); + } } - - return rval; + return val_list; } -static Boolean DoesFeatureMatchCGPQualConstraint (SeqFeatPtr sfp, CDSGeneProtQualConstraintPtr constraint) +NLM_EXTERN Boolean GBBlockIsCompletelyEmpty (GBBlockPtr gb) { - CGPSetPtr c = NULL; - Boolean b = FALSE; - SeqMgrFeatContext context; - Boolean rval = FALSE; - FeatureFieldPtr ff; - SeqFeatPtr cds; - CharPtr str1 = NULL, str2 = NULL; - - if (sfp == NULL) { - return FALSE; - } else if (constraint == NULL) { + if (gb != NULL + && gb->extra_accessions == NULL + && gb->keywords == NULL + && gb->source == NULL + && gb->origin == NULL + && gb->date == NULL + && gb->div == NULL + && gb->taxonomy == NULL + && gb->entry_date == NULL) { return TRUE; + } else { + return FALSE; } - - if (sfp->data.choice == SEQFEAT_CDREGION) { - c = BuildCGPSetFromCodingRegion (sfp, &b); - } else if (sfp->data.choice == SEQFEAT_PROT) { - cds = SeqMgrGetCDSgivenProduct (BioseqFindFromSeqLoc (sfp->location), &context); - c = BuildCGPSetFromCodingRegion (cds, &b); - } else if (sfp->data.choice == SEQFEAT_GENE) { - c = BuildCGPSetFromGene (sfp); - } else if (sfp->data.choice == SEQFEAT_RNA) { - c = BuildCGPSetFrommRNA (sfp); - } - - rval = DoesCGPSetMatchQualConstraint (c, constraint); - if (rval && sfp->idx.subtype == FEATDEF_mat_peptide_aa) { - if (constraint->field1 != NULL) { - if (IsCDSGeneProtFieldMatPeptideRelated (constraint->field1->data.intvalue)) { - ff = FeatureFieldFromCDSGeneProtField (constraint->field1->data.intvalue); - str1 = GetQualFromFeature (sfp, ff, constraint->constraint); - ff = FeatureFieldFree (ff); - } else { - str1 = GetFieldValueFromCGPSet (c, constraint->field1->data.intvalue, constraint->constraint); - } - if (str1 == NULL) { - rval = FALSE; - } - } - if (constraint->field2 != NULL) { - if (IsCDSGeneProtFieldMatPeptideRelated (constraint->field2->data.intvalue)) { - ff = FeatureFieldFromCDSGeneProtField (constraint->field2->data.intvalue); - str2 = GetQualFromFeature (sfp, ff, constraint->constraint); - ff = FeatureFieldFree (ff); - } else { - str2 = GetFieldValueFromCGPSet (c, constraint->field2->data.intvalue, constraint->constraint); - } - if (str2 == NULL) { - rval = FALSE; - } - } - if (rval && constraint->field1 != NULL && constraint->field2 != NULL && StringCmp (str1, str2) != 0) { - rval = FALSE; - } - str1 = MemFree (str1); - str2 = MemFree (str2); - } - c = CGPSetFree (c); - return rval; -} - - -NLM_EXTERN Boolean IsSequenceConstraintEmpty (SequenceConstraintPtr constraint) -{ - if (constraint == NULL) return TRUE; - if (constraint->seqtype != NULL && constraint->seqtype->choice != SequenceConstraintMolTypeConstraint_any) return FALSE; - if (constraint->feature != Feature_type_any) return FALSE; - if (!IsStringConstraintEmpty (constraint->id)) return FALSE; - if (constraint->num_features != NULL) return FALSE; - return TRUE; } -static Boolean DoesTextMatchBankItId (SeqIdPtr sip, StringConstraintPtr scp) +static Boolean RemoveFieldValueForObject (Uint1 choice, Pointer data, FieldTypePtr field, StringConstraintPtr scp) { Boolean rval = FALSE; - Int4 text_len, offset; - CharPtr text, tmp; - DbtagPtr dbtag; + FeatureFieldPtr feature_field; + SeqDescrPtr sdp; + ObjValNodePtr ovp; + GBBlockPtr gb; - if (scp == NULL || (text = scp->match_text) == NULL || sip == NULL || sip->choice != SEQID_GENERAL - || (dbtag = (DbtagPtr) sip->data.ptrvalue) == NULL - || StringCmp (dbtag->db, "BankIt") != 0 - || dbtag->tag == NULL) { - return FALSE; - } - text_len = StringLen (scp->match_text); - if (text_len > 6 && StringNICmp (text, "BankIt", 6) == 0) { - text += 6; - text += StrSpn (text, ":/ "); - } + if (data == NULL || field == NULL || field->data.ptrvalue == NULL) return FALSE; - text = StringSave (text); - tmp = scp->match_text; - scp->match_text = text; - rval = DoesObjectIdMatchStringConstraint (dbtag->tag, scp); - if (!rval) { - offset = StringCSpn (text, "/ "); - if (text[offset] != 0) { - text[offset] = '_'; - rval = DoesObjectIdMatchStringConstraint (dbtag->tag, scp); - } + switch (field->choice) { + case FieldType_source_qual : + rval = RemoveSourceQualFromBioSource (GetBioSourceFromObject (choice, data), (SourceQualChoicePtr) field->data.ptrvalue, scp); + break; + case FieldType_feature_field : + if (choice == OBJ_SEQFEAT) { + rval = RemoveQualFromFeature ((SeqFeatPtr) data, (FeatureFieldPtr) field->data.ptrvalue, scp); + } + break; + case FieldType_cds_gene_prot: + if (choice == 0) { + rval = RemoveFieldValueFromCGPSet ((CGPSetPtr) data, field->data.intvalue, scp); + } else if (choice == OBJ_SEQFEAT) { + feature_field = FeatureFieldFromCDSGeneProtField (field->data.intvalue); + rval = RemoveQualFromFeature ((SeqFeatPtr) data, feature_field, scp); + feature_field = FeatureFieldFree (feature_field); + } + break; + case FieldType_molinfo_field : + if (choice == OBJ_BIOSEQ) { + rval = RemoveSequenceQualFromBioseq ((BioseqPtr) data, field->data.ptrvalue); + } + break; + case FieldType_pub : + rval = RemovePubFieldFromObject (choice, data, field->data.intvalue, scp); + break; + case FieldType_rna_field : + if (choice == OBJ_SEQFEAT) { + rval = RemoveRNAQualFromFeature ((SeqFeatPtr) data, field->data.ptrvalue, scp); + } + break; + case FieldType_struc_comment_field: + if (choice == OBJ_SEQDESC && data != NULL) { + sdp = (SeqDescrPtr) data; + if (sdp != NULL && sdp->choice == Seq_descr_user) { + rval = RemoveStructuredCommentFieldFromUserObject (sdp->data.ptrvalue, field->data.ptrvalue, scp); + if (rval && IsEmptyStructuredComment (sdp->data.ptrvalue)) { + ovp = (ObjValNodePtr) sdp; + ovp->idx.deleteme = TRUE; + } + } + } + break; + case FieldType_dblink: + if (choice == OBJ_SEQDESC && data != NULL) { + sdp = (SeqDescrPtr) data; + if (sdp != NULL && sdp->choice == Seq_descr_user) { + rval = RemoveDBLinkFieldFromUserObject (sdp->data.ptrvalue, field->data.intvalue, scp); + } + } + break; + case FieldType_misc: + if (choice == OBJ_BIOSEQ && field->data.intvalue == Misc_field_genome_project_id) { + rval = RemoveGenomeProjectIdFromBioseq ((BioseqPtr) data, scp); + } else if (choice == OBJ_SEQDESC && field->data.intvalue == Misc_field_comment_descriptor) { + sdp = (SeqDescrPtr) data; + ovp = (ObjValNodePtr) sdp; + if (sdp->choice == Seq_descr_comment) { + ovp->idx.deleteme = TRUE; + rval = TRUE; + } + } else if (choice == OBJ_SEQDESC && field->data.intvalue == Misc_field_defline) { + sdp = (SeqDescrPtr) data; + ovp = (ObjValNodePtr) sdp; + if (sdp->choice == Seq_descr_title) { + ovp->idx.deleteme = TRUE; + rval = TRUE; + } + } else if (choice == OBJ_SEQDESC && field->data.intvalue == Misc_field_keyword) { + sdp = (SeqDescrPtr) data; + ovp = (ObjValNodePtr) sdp; + if (sdp->choice == Seq_descr_genbank && (gb = (GBBlockPtr) sdp->data.ptrvalue) != NULL) { + if (RemoveValNodeStringMatch (&(gb->keywords), scp)) { + rval = TRUE; + if (GBBlockIsCompletelyEmpty(gb)) { + ovp->idx.deleteme = TRUE; + } + } + } + } + break; } - text = MemFree (text); - scp->match_text = tmp; - return rval; } -NLM_EXTERN Boolean DoesSeqIDListMeetStringConstraint (SeqIdPtr sip, StringConstraintPtr string_constraint) +NLM_EXTERN Boolean SetFieldValueForObjectEx (Uint1 choice, Pointer data, FieldTypePtr field, StringConstraintPtr scp, CharPtr value, Uint2 existing_text, BatchExtraPtr batch_extra) { - CharPtr id; - CharPtr cp, cp_dst; - SeqIdPtr tmp; - Boolean match, changed; + Boolean rval = FALSE; + FeatureFieldPtr feature_field; + SeqDescrPtr sdp; + ObjValNodePtr ovp; + GBBlockPtr gb; + Boolean was_empty; - if (sip == NULL) - { - return FALSE; - } - if (string_constraint == NULL) - { - return TRUE; - } + if (data == NULL || field == NULL || field->data.ptrvalue == NULL) return FALSE; - while (sip != NULL) - { - /* temporary disconnect ID from list */ - tmp = sip->next; - sip->next = NULL; - id = SeqIdWholeLabel (sip, PRINTID_FASTA_LONG); - match = DoesSingleStringMatchConstraint (id, string_constraint); - if (!match) - { - changed = FALSE; - /* remove terminating pipe character */ - if (id[StringLen(id) - 1] == '|') - { - id[StringLen(id) - 1] = 0; - changed = TRUE; + switch (field->choice) { + case FieldType_source_qual : + rval = SetSourceQualInBioSource (GetBioSourceFromObject (choice, data), (SourceQualChoicePtr) field->data.ptrvalue, scp, value, existing_text); + break; + case FieldType_feature_field : + if (choice == OBJ_SEQFEAT) { + rval = SetQualOnFeatureEx ((SeqFeatPtr) data, (FeatureFieldPtr) field->data.ptrvalue, scp, value, existing_text, batch_extra); } - /* remove leading pipe identifier */ - cp = StringChr (id, '|'); - if (cp != NULL) - { - changed = TRUE; - cp++; - cp_dst = id; - while (*cp != 0) - { - *cp_dst = *cp; - cp_dst++; - cp++; - } - *cp_dst = 0; - } - if (changed) - { - match = DoesSingleStringMatchConstraint (id, string_constraint); + break; + case FieldType_cds_gene_prot: + if (choice == 0) { + rval = SetFieldValueInCGPSet ((CGPSetPtr) data, field->data.intvalue, scp, value, existing_text); + } else if (choice == OBJ_SEQFEAT) { + feature_field = FeatureFieldFromCDSGeneProtField (field->data.intvalue); + rval = SetQualOnFeatureEx ((SeqFeatPtr) data, feature_field, scp, value, existing_text, batch_extra); + feature_field = FeatureFieldFree (feature_field); } - - /* if search text doesn't have ., try ID without version */ - if (!match && StringChr (string_constraint->match_text, '.') == NULL) - { - cp = StringChr (id, '.'); - if (cp != NULL) - { - *cp = 0; - match = DoesSingleStringMatchConstraint (id, string_constraint); - *cp = '.'; - } + break; + case FieldType_molinfo_field: + if (choice == OBJ_BIOSEQ) { + rval = SetSequenceQualOnBioseq ((BioseqPtr) data, field->data.ptrvalue); } - - /* Bankit? */ - if (!match && DoesTextMatchBankItId (sip, string_constraint)) - { - match = TRUE; + break; + case FieldType_pub : + rval = SetPubFieldOnObject (choice, data, field->data.intvalue, scp, value, existing_text); + break; + case FieldType_rna_field : + if (choice == OBJ_SEQFEAT) { + rval = SetRNAQualOnFeature ((SeqFeatPtr) data, field->data.ptrvalue, scp, value, existing_text); } - } - id = MemFree (id); - sip->next = tmp; - - if (match) - { - if (string_constraint->not_present) - { - return FALSE; + break; + case FieldType_struc_comment_field: + if (choice == OBJ_SEQDESC && data != NULL) { + sdp = (SeqDescrPtr) data; + if (sdp != NULL && sdp->choice == Seq_descr_user) { + was_empty = IsEmptyStructuredComment (sdp->data.ptrvalue); + rval = SetStructuredCommentFieldOnUserObject (sdp->data.ptrvalue, field->data.ptrvalue, scp, value, existing_text); + if (was_empty && !IsEmptyStructuredComment (sdp->data.ptrvalue)) { + ovp = (ObjValNodePtr) sdp; + ovp->idx.deleteme = FALSE; + } + } } - else - { - return TRUE; + break; + case FieldType_dblink: + if (choice == OBJ_SEQDESC && data != NULL) { + sdp = (SeqDescrPtr) data; + if (sdp != NULL && sdp->choice == Seq_descr_user) { + rval = SetDBLinkFieldOnUserObject (sdp->data.ptrvalue, field->data.intvalue, scp, value, existing_text); + } } - } - sip = sip->next; - } - if (string_constraint->not_present) - { - return TRUE; - } - else - { - return FALSE; + break; + case FieldType_misc: + if (choice == OBJ_BIOSEQ && field->data.intvalue == Misc_field_genome_project_id) { + rval = SetGenomeProjectIdOnBioseq ((BioseqPtr) data, scp, value, existing_text); + } else if (choice == OBJ_SEQDESC && field->data.intvalue == Misc_field_comment_descriptor) { + sdp = (SeqDescrPtr) data; + if (sdp->choice == Seq_descr_comment) { + rval = SetTextDescriptor (sdp, scp, value, existing_text); + } + } else if (choice == OBJ_SEQDESC && field->data.intvalue == Misc_field_defline) { + sdp = (SeqDescrPtr) data; + if (sdp->choice == Seq_descr_title) { + rval = SetTextDescriptor (sdp, scp, value, existing_text); + } + } else if (choice == OBJ_SEQDESC && field->data.intvalue == Misc_field_keyword) { + sdp = (SeqDescrPtr) data; + if (sdp->choice == Seq_descr_genbank && (gb = (GBBlockPtr) sdp->data.ptrvalue) != NULL) { + was_empty = GBBlockIsCompletelyEmpty (gb); + if (SetStringsInValNodeStringList (&(gb->keywords), scp, value, existing_text)) { + rval = TRUE; + if (sdp->extended) { + ovp = (ObjValNodePtr) sdp; + if (GBBlockIsCompletelyEmpty(gb)) { + ovp->idx.deleteme = TRUE; + } else if (was_empty) { + ovp->idx.deleteme = FALSE; + } + } + } + } + } + break; } + return rval; } -typedef struct rnatypebiomol { - Int4 rnatype; - Uint1 biomol; - CharPtr rnamolname; -} RnaTypeBiomolData, PNTR RnaTypeBiomolPtr; - -static RnaTypeBiomolData rna_type_biomol[] = { -{ Sequence_constraint_rnamol_genomic , MOLECULE_TYPE_GENOMIC, "Genomic RNA" } , -{ Sequence_constraint_rnamol_precursor_RNA , MOLECULE_TYPE_PRE_MRNA , "Precursor RNA" } , -{ Sequence_constraint_rnamol_mRNA , MOLECULE_TYPE_MRNA , "mRNA [cDNA]" } , -{ Sequence_constraint_rnamol_rRNA , MOLECULE_TYPE_RRNA , "Ribosomal RNA" } , -{ Sequence_constraint_rnamol_tRNA , MOLECULE_TYPE_TRNA , "Transfer RNA" } , -{ Sequence_constraint_rnamol_genomic_mRNA , MOLECULE_TYPE_GENOMIC_MRNA_MIX , "Genomic-mRNA" } , -{ Sequence_constraint_rnamol_cRNA , MOLECULE_TYPE_CRNA , "cRNA" } , -{ Sequence_constraint_rnamol_transcribed_RNA , MOLECULE_TYPE_TRANSCRIBED_RNA , "Transcribed RNA" } , -{ Sequence_constraint_rnamol_ncRNA , MOLECULE_TYPE_NCRNA , "Non-coding RNA" } , -{ Sequence_constraint_rnamol_transfer_messenger_RNA , MOLECULE_TYPE_TMRNA , "Transfer-messenger RNA" } } ; - -#define NUM_rna_type_biomol sizeof (rna_type_biomol) / sizeof (RnaTypeBiomolData) - - -NLM_EXTERN Uint1 GetBiomolForRnaType (Int4 rnatype) +NLM_EXTERN Boolean SetFieldValueForObject (Uint1 choice, Pointer data, FieldTypePtr field, StringConstraintPtr scp, CharPtr value, Uint2 existing_text) { - Int4 i; - - for (i = 0; i < NUM_rna_type_biomol; i++) { - if (rna_type_biomol[i].rnatype == rnatype) { - return rna_type_biomol[i].biomol; - } - } - return 0; + return SetFieldValueForObjectEx (choice, data, field, scp, value, existing_text, NULL); } -NLM_EXTERN CharPtr GetBiomolNameForRnaType (Int4 rnatype) +NLM_EXTERN Boolean SortFieldsForObject (Uint1 choice, Pointer data, FieldTypePtr field, Uint2 order) { - Int4 i; + Boolean rval = FALSE; + FeatureFieldPtr feature_field; - for (i = 0; i < NUM_rna_type_biomol; i++) { - if (rna_type_biomol[i].rnatype == rnatype) { - return rna_type_biomol[i].rnamolname; - } + if (data == NULL || field == NULL || field->data.ptrvalue == NULL) return FALSE; + + switch (field->choice) { + case FieldType_source_qual : + break; + case FieldType_feature_field : + if (choice == OBJ_SEQFEAT) { + rval = SortQualOnFeature ((SeqFeatPtr) data, (FeatureFieldPtr) field->data.ptrvalue, order); + } + break; + case FieldType_cds_gene_prot: + if (choice == 0) { + rval = SortFieldInCGPSet ((CGPSetPtr) data, field->data.intvalue, order); + } else if (choice == OBJ_SEQFEAT) { + feature_field = FeatureFieldFromCDSGeneProtField (field->data.intvalue); + rval = SortQualOnFeature ((SeqFeatPtr) data, feature_field, order); + feature_field = FeatureFieldFree (feature_field); + } + break; + case FieldType_molinfo_field: + break; + case FieldType_pub : + break; + case FieldType_rna_field : + break; + case FieldType_struc_comment_field: + break; + case FieldType_dblink: + break; + case FieldType_misc: + break; } - return "invalid RNA type"; + return rval; } -NLM_EXTERN void AddAllRNASubtypesToChoiceList (ValNodePtr PNTR field_list) -{ - Int4 i; - if (field_list == NULL) return; +NLM_EXTERN ValNodePtr GetFieldTypeListFromAECRAction (AECRActionPtr action) +{ + ValNodePtr field_list = NULL; + ApplyActionPtr apply; + EditActionPtr edit; + ConvertActionPtr convert; + CopyActionPtr copy; + SwapActionPtr swap; + RemoveActionPtr remove; + AECRParseActionPtr parse; - ValNodeAddPointer (field_list, Sequence_constraint_rnamol_any, StringSave ("Any RNA")); - for (i = 0; i < NUM_rna_type_biomol; i++) { - ValNodeAddPointer (field_list, rna_type_biomol[i].rnatype, StringSave (rna_type_biomol[i].rnamolname)); + if (action == NULL) { + return NULL; } -} - -static Boolean DoesSequenceMatchSequenceConstraint (BioseqPtr bsp, SequenceConstraintPtr constraint) -{ - SeqFeatPtr sfp; - SeqMgrFeatContext fcontext; - SeqDescrPtr sdp; - SeqMgrDescContext dcontext; - MolInfoPtr mip; - Int4 num_features = 0; - - if (bsp == NULL) return FALSE; - if (IsSequenceConstraintEmpty (constraint)) return TRUE; + /* todo - add fields from constraints ? */ - if (constraint->seqtype != NULL && constraint->seqtype->choice != SequenceConstraintMolTypeConstraint_any) { - switch (constraint->seqtype->choice) { - case SequenceConstraintMolTypeConstraint_nucleotide : - if (ISA_aa (bsp->mol)) { - return FALSE; + /* get fields from action */ + if (action->action != NULL) { + switch (action->action->choice) { + case ActionChoice_apply: + apply = (ApplyActionPtr) action->action->data.ptrvalue; + if (apply != NULL) { + ValNodeLink (&field_list, FieldTypeCopy (apply->field)); } break; - case SequenceConstraintMolTypeConstraint_dna : - if (bsp->mol != Seq_mol_dna) { - return FALSE; + case ActionChoice_edit: + edit = (EditActionPtr) action->action->data.ptrvalue; + if (edit != NULL) { + ValNodeLink (&field_list, FieldTypeCopy (edit->field)); } break; - case SequenceConstraintMolTypeConstraint_rna : - if (bsp->mol != Seq_mol_rna) { - return FALSE; + case ActionChoice_convert: + convert = (ConvertActionPtr) action->action->data.ptrvalue; + if (convert != NULL) { + ValNodeLink (&field_list, GetFromFieldFromFieldPair (convert->fields)); + ValNodeLink (&field_list, GetToFieldFromFieldPair (convert->fields)); } - if (constraint->seqtype->data.intvalue != Sequence_constraint_rnamol_any) { - sdp = SeqMgrGetNextDescriptor (bsp, NULL, Seq_descr_molinfo, &dcontext); - if (sdp == NULL || sdp->data.ptrvalue == NULL || sdp->choice != Seq_descr_molinfo) { - return FALSE; - } - mip = (MolInfoPtr) sdp->data.ptrvalue; - if (GetBiomolForRnaType (constraint->seqtype->data.intvalue) != mip->biomol) { - return FALSE; - } + break; + case ActionChoice_copy: + copy = (CopyActionPtr) action->action->data.ptrvalue; + if (copy != NULL) { + ValNodeLink (&field_list, GetFromFieldFromFieldPair (copy->fields)); + ValNodeLink (&field_list, GetToFieldFromFieldPair (copy->fields)); } break; - case SequenceConstraintMolTypeConstraint_protein : - if (!ISA_aa (bsp->mol)) { - return FALSE; + case ActionChoice_swap: + swap = (SwapActionPtr) action->action->data.ptrvalue; + if (swap != NULL) { + ValNodeLink (&field_list, GetFromFieldFromFieldPair (swap->fields)); + ValNodeLink (&field_list, GetToFieldFromFieldPair (swap->fields)); + } + break; + case ActionChoice_remove: + remove = (RemoveActionPtr) action->action->data.ptrvalue; + if (remove != NULL) { + ValNodeLink (&field_list, FieldTypeCopy (remove->field)); + } + break; + case ActionChoice_parse: + parse = (AECRParseActionPtr) action->action->data.ptrvalue; + if (parse != NULL) { + ValNodeLink (&field_list, GetFromFieldFromFieldPair (parse->fields)); + ValNodeLink (&field_list, GetToFieldFromFieldPair (parse->fields)); } break; } } + return field_list; +} - if (constraint->feature != Feature_type_any) { - sfp = SeqMgrGetNextFeature (bsp, NULL, 0, GetFeatdefFromFeatureType (constraint->feature), &fcontext); - if (sfp == NULL) { - return FALSE; - } - } - if (!IsStringConstraintEmpty (constraint->id) && !DoesSeqIDListMeetStringConstraint (bsp->id, constraint->id)) { - return FALSE; - } +NLM_EXTERN Boolean AreAECRActionFieldsEqual (AECRActionPtr action1, AECRActionPtr action2) +{ + ApplyActionPtr a1, a2; + EditActionPtr e1, e2; + ConvertActionPtr v1, v2; + CopyActionPtr c1, c2; + SwapActionPtr s1, s2; + RemoveActionPtr r1, r2; + AECRParseActionPtr p1, p2; + FieldTypePtr field1, field2; + Boolean rval = FALSE; - if (constraint->num_features != NULL) { - for (sfp = SeqMgrGetNextFeature (bsp, NULL, 0, 0, &fcontext); - sfp != NULL; - sfp = SeqMgrGetNextFeature (bsp, sfp, 0, 0, &fcontext)) { - num_features++; - /* note - break out of loop or return as soon as we know constraint - * succeeds or passes - no need to iterate through all features - */ - if (constraint->num_features->choice == QuantityConstraint_equals - && num_features > constraint->num_features->data.intvalue) { - return FALSE; - } else if (constraint->num_features->choice == QuantityConstraint_greater_than - && num_features > constraint->num_features->data.intvalue) { + if (action1 == NULL && action2 == NULL) { + return TRUE; + } else if (action1 == NULL || action2 == NULL) { + return FALSE; + } else if (action1->action == NULL && action2->action == NULL) { + return TRUE; + } else if (action1->action == NULL || action2->action == NULL) { + return FALSE; + } else if (action1->action->choice != action2->action->choice) { + return FALSE; + } else if (action1->action->data.ptrvalue == NULL && action2->action->data.ptrvalue == NULL) { + return TRUE; + } else if (action1->action->data.ptrvalue == NULL || action2->action->data.ptrvalue == NULL) { + return FALSE; + } else { + switch (action1->action->choice) { + case ActionChoice_apply: + a1 = (ApplyActionPtr) action1->action->data.ptrvalue; + a2 = (ApplyActionPtr) action2->action->data.ptrvalue; + rval = DoFieldTypesMatch (a1->field, a2->field); + break; + case ActionChoice_edit: + e1 = (EditActionPtr) action1->action->data.ptrvalue; + e2 = (EditActionPtr) action2->action->data.ptrvalue; + rval = DoFieldTypesMatch (e1->field, e2->field); + break; + case ActionChoice_convert: + v1 = (ConvertActionPtr) action1->action->data.ptrvalue; + v2 = (ConvertActionPtr) action2->action->data.ptrvalue; + field1 = GetFromFieldFromFieldPair (v1->fields); + field2 = GetFromFieldFromFieldPair (v2->fields); + rval = DoFieldTypesMatch (field1, field2); + if (rval) { + field1 = FieldTypeFree (field1); + field2 = FieldTypeFree (field2); + field1 = GetToFieldFromFieldPair (v1->fields); + field2 = GetToFieldFromFieldPair (v2->fields); + rval = DoFieldTypesMatch (field1, field2); + } + field1 = FieldTypeFree (field1); + field2 = FieldTypeFree (field2); + if (rval) { + if ((v1->keep_original && !v2->keep_original) + || (!v1->keep_original && v2->keep_original)) { + rval = FALSE; + } + } + break; + case ActionChoice_copy: + c1 = (CopyActionPtr) action1->action->data.ptrvalue; + c2 = (CopyActionPtr) action2->action->data.ptrvalue; + field1 = GetFromFieldFromFieldPair (c1->fields); + field2 = GetFromFieldFromFieldPair (c2->fields); + rval = DoFieldTypesMatch (field1, field2); + if (rval) { + field1 = FieldTypeFree (field1); + field2 = FieldTypeFree (field2); + field1 = GetToFieldFromFieldPair (c1->fields); + field2 = GetToFieldFromFieldPair (c2->fields); + rval = DoFieldTypesMatch (field1, field2); + } + field1 = FieldTypeFree (field1); + field2 = FieldTypeFree (field2); + break; + case ActionChoice_swap: + s1 = (SwapActionPtr) action1->action->data.ptrvalue; + s2 = (SwapActionPtr) action2->action->data.ptrvalue; + field1 = GetFromFieldFromFieldPair (s1->fields); + field2 = GetFromFieldFromFieldPair (s2->fields); + rval = DoFieldTypesMatch (field1, field2); + if (rval) { + field1 = FieldTypeFree (field1); + field2 = FieldTypeFree (field2); + field1 = GetToFieldFromFieldPair (s1->fields); + field2 = GetToFieldFromFieldPair (s2->fields); + rval = DoFieldTypesMatch (field1, field2); + } + field1 = FieldTypeFree (field1); + field2 = FieldTypeFree (field2); + break; + case ActionChoice_remove: + r1 = (RemoveActionPtr) action1->action->data.ptrvalue; + r2 = (RemoveActionPtr) action2->action->data.ptrvalue; + rval = DoFieldTypesMatch (r1->field, r2->field); + break; + case ActionChoice_parse: + p1 = (AECRParseActionPtr) action1->action->data.ptrvalue; + p2 = (AECRParseActionPtr) action2->action->data.ptrvalue; + field1 = GetFromFieldFromFieldPair (p1->fields); + field2 = GetFromFieldFromFieldPair (p2->fields); + rval = DoFieldTypesMatch (field1, field2); + if (rval) { + field1 = FieldTypeFree (field1); + field2 = FieldTypeFree (field2); + field1 = GetToFieldFromFieldPair (p1->fields); + field2 = GetToFieldFromFieldPair (p2->fields); + rval = DoFieldTypesMatch (field1, field2); + } + field1 = FieldTypeFree (field1); + field2 = FieldTypeFree (field2); break; - } else if (constraint->num_features->choice == QuantityConstraint_less_than - && num_features >= constraint->num_features->data.intvalue) { - return FALSE; - } - } - if (constraint->num_features->choice == QuantityConstraint_equals - && num_features != constraint->num_features->data.intvalue) { - return FALSE; - } else if (constraint->num_features->choice == QuantityConstraint_greater_than - && num_features <= constraint->num_features->data.intvalue) { - return FALSE; - } else if (constraint->num_features->choice == QuantityConstraint_less_than - && num_features >= constraint->num_features->data.intvalue) { - return FALSE; } } - - return TRUE; + return rval; } -static Boolean DoesSequenceInSetMatchSequenceConstraint (BioseqSetPtr bssp, SequenceConstraintPtr constraint) + +static Boolean IsNonTextSourceQualPresent (BioSourcePtr biop, Int4 srcqual) { - Boolean rval = FALSE; - SeqEntryPtr sep; + Int4 orgmod_subtype, subsrc_subtype, subfield; + OrgModPtr mod; + SubSourcePtr ssp; + Boolean rval = FALSE; - if (bssp == NULL) return FALSE; - if (IsSequenceConstraintEmpty (constraint)) return TRUE; - - for (sep = bssp->seq_set; sep != NULL && !rval; sep = sep->next) { - if (IS_Bioseq (sep)) { - rval = DoesSequenceMatchSequenceConstraint ((BioseqPtr) sep->data.ptrvalue, constraint); - } else if (IS_Bioseq_set (sep)) { - rval = DoesSequenceInSetMatchSequenceConstraint ((BioseqSetPtr) sep->data.ptrvalue, constraint); + if (biop == NULL) return FALSE; + + orgmod_subtype = GetOrgModQualFromSrcQual (srcqual, &subfield); + if (orgmod_subtype == -1) { + subsrc_subtype = GetSubSrcQualFromSrcQual (srcqual, &subfield); + for (ssp = biop->subtype; ssp != NULL && !rval; ssp = ssp->next) { + if (ssp->subtype == subsrc_subtype) { + rval = TRUE; + } + } + } else { + if (biop->org != NULL && biop->org->orgname != NULL) { + for (mod = biop->org->orgname->mod; mod != NULL && !rval; mod = mod->next) { + if (mod->subtype == orgmod_subtype) { + rval = TRUE; + } + } } } return rval; } -static Boolean DoesObjectMatchSequenceConstraint (Uint1 choice, Pointer data, SequenceConstraintPtr constraint) +static Boolean IsSourceQualPresent (BioSourcePtr biop, SourceQualChoicePtr scp) { - BioseqPtr bsp; - SeqDescrPtr sdp; - ObjValNodePtr ovp; - Boolean rval = FALSE; + Boolean rval = FALSE; + CharPtr str; - if (data == NULL) return FALSE; - if (IsSequenceConstraintEmpty (constraint)) return TRUE; + if (biop == NULL) return FALSE; + if (scp == NULL) return TRUE; - bsp = GetSequenceForObject (choice, data); - if (bsp == NULL) { - if (choice == OBJ_SEQDESC) { - sdp = (SeqDescrPtr) data; - if (sdp->extended) { - ovp = (ObjValNodePtr) sdp; - if (ovp->idx.parenttype == OBJ_BIOSEQSET && ovp->idx.parentptr != NULL) { - rval = DoesSequenceInSetMatchSequenceConstraint ((BioseqSetPtr) ovp->idx.parentptr, constraint); + switch (scp->choice) { + case SourceQualChoice_textqual: + if (IsNonTextSourceQual (scp->data.intvalue)) { + rval = IsNonTextSourceQualPresent (biop, scp->data.intvalue); + } else { + str = GetSourceQualFromBioSource (biop, scp, NULL); + if (!StringHasNoText (str)) { + rval = TRUE; } + str = MemFree (str); } - } - } else { - rval = DoesSequenceMatchSequenceConstraint (bsp, constraint); + break; + case SourceQualChoice_location: + if (biop->genome != 0) { + rval = TRUE; + } + break; + case SourceQualChoice_origin: + if (biop->origin != 0) { + rval = TRUE; + } + break; } - return rval; + return rval; } -NLM_EXTERN CharPtr GetPubFieldLabel (Int4 pub_field) +typedef struct objecthasstring { - CharPtr rval = NULL; - switch (pub_field) { - case Publication_field_cit: - rval = "citation"; - break; - case Publication_field_authors: - rval = "authors"; - break; - case Publication_field_journal: - rval = "journal"; - break; - case Publication_field_volume: - rval = "volume"; - break; - case Publication_field_issue: - rval = "issue"; - break; - case Publication_field_pages: - rval = "pages"; - break; - case Publication_field_date: - rval = "date"; - break; - case Publication_field_serial_number: - rval = "serial number"; - break; - case Publication_field_title: - rval = "title"; - break; - case Publication_field_affiliation: - rval = "affiliation"; - break; - case Publication_field_affil_div: - rval = "department"; - break; - case Publication_field_affil_city: - rval = "city"; - break; - case Publication_field_affil_sub: - rval = "state"; - break; - case Publication_field_affil_country: - rval = "country"; - break; - case Publication_field_affil_street: - rval = "street"; - break; - case Publication_field_affil_email: - rval = "email"; - break; - case Publication_field_affil_fax: - rval = "fax"; - break; - case Publication_field_affil_phone: - rval = "phone"; - break; - case Publication_field_affil_zipcode: - rval = "postal code"; - break; - } - return rval; -} + StringConstraintPtr scp; + Boolean found; +} ObjectHasStringData, PNTR ObjectHasStringPtr; -NLM_EXTERN ValNodePtr GetPubFieldList (void) -{ - ValNodePtr val_list = NULL; +static void LIBCALLBACK AsnWriteConstraintCallBack (AsnExpOptStructPtr pAEOS) - ValNodeAddPointer (&val_list, Publication_field_title, StringSave ("title")); - ValNodeAddPointer (&val_list, Publication_field_authors, StringSave ("authors")); - ValNodeAddPointer (&val_list, Publication_field_journal, StringSave ("journal")); - ValNodeAddPointer (&val_list, Publication_field_issue, StringSave ("issue")); - ValNodeAddPointer (&val_list, Publication_field_pages, StringSave ("pages")); - ValNodeAddPointer (&val_list, Publication_field_serial_number, StringSave ("serial number")); - ValNodeAddPointer (&val_list, Publication_field_date, StringSave ("date")); - ValNodeAddPointer (&val_list, Publication_field_cit, StringSave ("citation")); - ValNodeAddPointer (&val_list, Publication_field_affiliation, StringSave ("affiliation")); - ValNodeAddPointer (&val_list, Publication_field_affil_div, StringSave ("department")); - ValNodeAddPointer (&val_list, Publication_field_affil_city, StringSave ("city")); - ValNodeAddPointer (&val_list, Publication_field_affil_sub, StringSave ("state")); - ValNodeAddPointer (&val_list, Publication_field_affil_country, StringSave ("country")); - ValNodeAddPointer (&val_list, Publication_field_affil_street, StringSave ("street")); - ValNodeAddPointer (&val_list, Publication_field_affil_email, StringSave ("email")); - ValNodeAddPointer (&val_list, Publication_field_affil_fax, StringSave ("fax")); - ValNodeAddPointer (&val_list, Publication_field_affil_phone, StringSave ("phone")); - ValNodeAddPointer (&val_list, Publication_field_affil_zipcode, StringSave ("postal code")); +{ + CharPtr pchSource; + ObjectHasStringPtr ohsp; - return val_list; + ohsp = (ObjectHasStringPtr) pAEOS->data; + if (ISA_STRINGTYPE (AsnFindBaseIsa (pAEOS->atp))) + { + pchSource = (CharPtr) pAEOS->dvp->ptrvalue; + ohsp->found |= DoesSingleStringMatchConstraint (pchSource, ohsp->scp); + } } -static ValNodePtr MakePubFieldTypeList (void) +static Boolean DoesObjectMatchStringConstraint (Uint1 choice, Pointer data, StringConstraintPtr scp) + { - ValNodePtr field_list = NULL; + ObjMgrPtr omp; + ObjMgrTypePtr omtp; + AsnIoPtr aip; + AsnExpOptPtr aeop; + ObjectHasStringData ohsd; + SeqFeatPtr sfp, prot; + SeqMgrFeatContext fcontext; + CharPtr search_txt; + CGPSetPtr c; + ValNodePtr vnp; + Boolean all_match = TRUE, any_match = FALSE, rval; + BioseqPtr protbsp; + ImpFeatPtr imp; - ValNodeAddInt (&field_list, FieldType_pub, Publication_field_title); - ValNodeAddInt (&field_list, FieldType_pub, Publication_field_authors); - ValNodeAddInt (&field_list, FieldType_pub, Publication_field_journal); - ValNodeAddInt (&field_list, FieldType_pub, Publication_field_issue); - ValNodeAddInt (&field_list, FieldType_pub, Publication_field_pages); - ValNodeAddInt (&field_list, FieldType_pub, Publication_field_serial_number); - ValNodeAddInt (&field_list, FieldType_pub, Publication_field_date); - ValNodeAddInt (&field_list, FieldType_pub, Publication_field_cit); - ValNodeAddInt (&field_list, FieldType_pub, Publication_field_affiliation); - ValNodeAddInt (&field_list, FieldType_pub, Publication_field_affil_div); - ValNodeAddInt (&field_list, FieldType_pub, Publication_field_affil_city); - ValNodeAddInt (&field_list, FieldType_pub, Publication_field_affil_sub); - ValNodeAddInt (&field_list, FieldType_pub, Publication_field_affil_country); - ValNodeAddInt (&field_list, FieldType_pub, Publication_field_affil_street); - ValNodeAddInt (&field_list, FieldType_pub, Publication_field_affil_email); - ValNodeAddInt (&field_list, FieldType_pub, Publication_field_affil_fax); - ValNodeAddInt (&field_list, FieldType_pub, Publication_field_affil_phone); - ValNodeAddInt (&field_list, FieldType_pub, Publication_field_affil_zipcode); + if (data == NULL) return FALSE; + if (scp == NULL) return TRUE; - return field_list; + if (choice == 0) { + /* CDS-Gene-Prot set */ + c = (CGPSetPtr) data; + for (vnp = c->gene_list; vnp != NULL && (!any_match || all_match); vnp = vnp->next) { + if (DoesObjectMatchStringConstraint (OBJ_SEQFEAT, vnp->data.ptrvalue, scp)) { + any_match = TRUE; + } else { + all_match = FALSE; + } + } + for (vnp = c->cds_list; vnp != NULL && (!any_match || all_match); vnp = vnp->next) { + if (DoesObjectMatchStringConstraint (OBJ_SEQFEAT, vnp->data.ptrvalue, scp)) { + any_match = TRUE; + } else { + all_match = FALSE; + } + } + for (vnp = c->mrna_list; vnp != NULL && (!any_match || all_match); vnp = vnp->next) { + if (DoesObjectMatchStringConstraint (OBJ_SEQFEAT, vnp->data.ptrvalue, scp)) { + any_match = TRUE; + } else { + all_match = FALSE; + } + } + for (vnp = c->prot_list; vnp != NULL && (!any_match || all_match); vnp = vnp->next) { + if (DoesObjectMatchStringConstraint (OBJ_SEQFEAT, vnp->data.ptrvalue, scp)) { + any_match = TRUE; + } else { + all_match = FALSE; + } + } + if (scp->not_present) { + rval = all_match; + } else { + rval = any_match; + } + } else { + omp = ObjMgrGet (); + omtp = ObjMgrTypeFind (omp, choice, NULL, NULL); + if (omtp == NULL) return FALSE; + aip = AsnIoNullOpen (); + aeop = AsnExpOptNew (aip, NULL, NULL, AsnWriteConstraintCallBack); + ohsd.found = FALSE; + ohsd.scp = scp; + if (aeop != NULL) { + aeop->user_data = (Pointer) &ohsd; + } + + (omtp->asnwrite) (data, aip, NULL); + + if (!ohsd.found && omtp->datatype == OBJ_SEQFEAT) + { + sfp = (SeqFeatPtr) data; + if (sfp->data.choice == SEQFEAT_CDREGION) { + protbsp = BioseqFindFromSeqLoc (sfp->product); + prot = SeqMgrGetNextFeature (protbsp, NULL, 0, FEATDEF_PROT, &fcontext); + if (prot != NULL) { + (omtp->asnwrite) (prot, aip, NULL); + } + } else { + if (SeqMgrFeaturesAreIndexed(sfp->idx.entityID) == 0) { + SeqMgrIndexFeatures (sfp->idx.entityID, NULL); + } + if (sfp->idx.subtype == FEATDEF_tRNA) { + sfp = SeqMgrGetDesiredFeature (sfp->idx.entityID, NULL, sfp->idx.itemID, 0, sfp, &fcontext); + ohsd.found = DoesSingleStringMatchConstraint (fcontext.label, ohsd.scp); + if (!ohsd.found && sfp != NULL && sfp->idx.subtype == FEATDEF_tRNA) + { + search_txt = (CharPtr) MemNew ((StringLen (fcontext.label) + 6) * sizeof (Char)); + if (search_txt != NULL) + { + sprintf (search_txt, "tRNA-%s", fcontext.label); + ohsd.found = DoesSingleStringMatchConstraint (search_txt, ohsd.scp); + search_txt = MemFree (search_txt); + } + } + } else if (!ohsd.found && sfp != NULL + && sfp->data.choice == SEQFEAT_IMP + && (imp = (ImpFeatPtr) sfp->data.value.ptrvalue) != NULL) { + ohsd.found = DoesSingleStringMatchConstraint (imp->key, ohsd.scp); + } + } + } + AsnIoClose (aip); + if (scp->not_present) { + rval = !ohsd.found; + } else { + rval = ohsd.found; + } + } + return rval; } -NLM_EXTERN Boolean IsPublicationConstraintEmpty (PublicationConstraintPtr constraint) +NLM_EXTERN Boolean IsSourceConstraintEmpty (SourceConstraintPtr scp) { - Boolean rval = FALSE; + if (scp == NULL) return TRUE; - if (constraint == NULL - || (constraint->type == Pub_type_any - && (constraint->field == NULL - || IsStringConstraintEmpty (constraint->field->constraint)) - && (constraint->special_field == NULL - || constraint->special_field->constraint == NULL))) { - rval = TRUE; + if (scp->field1 == NULL + && scp->field2 == NULL + && IsStringConstraintEmpty(scp->constraint)) { + return TRUE; + } else { + return FALSE; } - return rval; } - -NLM_EXTERN Int4 GetPubMLStatus (PubPtr the_pub) +NLM_EXTERN Boolean DoesBiosourceMatchConstraint (BioSourcePtr biop, SourceConstraintPtr scp) { - CitGenPtr cgp; - CitSubPtr csp; - CitArtPtr cap; - CitBookPtr cbp; - CitJourPtr cjp; - ImprintPtr imp = NULL; - Int4 status = Pub_type_any; - - if (the_pub == NULL || the_pub->data.ptrvalue == NULL) - { - return Pub_type_any; - } - - switch (the_pub->choice) - { - case PUB_Gen : - cgp = (CitGenPtr) the_pub->data.ptrvalue; - if (cgp->cit != NULL && StringICmp (cgp->cit, "unpublished") == 0) - { - status = Pub_type_unpublished; - } - else - { - status = Pub_type_published; - } - break; - case PUB_Sub : - csp = (CitSubPtr) the_pub->data.ptrvalue; - status = Pub_type_submitter_block; - break; - case PUB_Article : - cap = (CitArtPtr) the_pub->data.ptrvalue; - if (cap->from == 1) - { - cjp = (CitJourPtr) cap->fromptr; - if (cjp != NULL) - { - imp = cjp->imp; - } - } - else if (cap->from == 2 || cap->from == 3) - { - cbp = (CitBookPtr) cap->fromptr; - if (cbp != NULL) { - imp = cbp->imp; - } - } - break; - case PUB_Journal : - cjp = (CitJourPtr) the_pub->data.ptrvalue; - imp = cjp->imp; - case PUB_Book : - case PUB_Man : - cbp = (CitBookPtr) the_pub->data.ptrvalue; - imp = cbp->imp; - break; - case PUB_Patent : - status = Pub_type_published; - break; - default : - break; - - } - if (imp != NULL) - { - if (imp->prepub == 0) - { - status = Pub_type_published; - } - else if (imp->prepub == 2) - { - status = Pub_type_in_press; - } - else if (imp->prepub == 1 && the_pub->choice == PUB_Sub) - { - status = Pub_type_submitter_block; - } - else - { - status = Pub_type_unpublished; - } - - } - return status; -} - - -static Boolean DoesPubFieldMatch (PubdescPtr pdp, PubFieldConstraintPtr field) -{ - Boolean rval = FALSE, match_all = TRUE; - PubPtr pub; - CharPtr tmp; + Boolean rval = FALSE; + CharPtr str1, str2; + ValNode vn; - if (pdp == NULL) return FALSE; - if (field == NULL) return TRUE; + if (biop == NULL) return FALSE; + if (scp == NULL) return TRUE; - if (field->constraint->not_present) { - match_all = TRUE; - for (pub = pdp->pub; pub != NULL && match_all; pub = pub->next) { - tmp = GetPubFieldFromPub (pub, field->field, NULL); - if (!DoesStringMatchConstraint (tmp, field->constraint)) { - match_all = FALSE; + if (IsStringConstraintEmpty(scp->constraint)) { + /* looking for qual present */ + if (scp->field1 != NULL && scp->field2 == NULL) { + rval = IsSourceQualPresent (biop, scp->field1); + } else if (scp->field2 != NULL && scp->field1 == NULL) { + rval = IsSourceQualPresent (biop, scp->field2); + /* looking for quals to match */ + } else if (scp->field1 != NULL && scp->field2 != NULL) { + str1 = GetSourceQualFromBioSource (biop, scp->field1, NULL); + str2 = GetSourceQualFromBioSource (biop, scp->field2, NULL); + if (StringCmp (str1, str2) == 0) { + rval = TRUE; } - tmp = MemFree (tmp); + str1 = MemFree (str1); + str2 = MemFree (str2); + } else { + /* nothing specified, automatic match */ + rval = TRUE; } - rval = match_all; } else { - for (pub = pdp->pub; pub != NULL && !rval; pub = pub->next) { - tmp = GetPubFieldFromPub (pub, field->field, field->constraint); - if (tmp != NULL) { + if (scp->field1 != NULL && scp->field2 == NULL) { + if (AllowSourceQualMulti(scp->field1) && scp->constraint->not_present) { + scp->constraint->not_present = FALSE; + str1 = GetSourceQualFromBioSource (biop, scp->field1, scp->constraint); + scp->constraint->not_present = TRUE; + if (str1 != NULL) { + rval = FALSE; + } else { + rval = TRUE; + } + str1 = MemFree (str1); + } else { + str1 = GetSourceQualFromBioSource (biop, scp->field1, scp->constraint); + if (str1 == NULL) { + if (scp->constraint->not_present) { + str1 = GetSourceQualFromBioSource (biop, scp->field1, NULL); + if (str1 == NULL) { + rval = TRUE; + } + } + } else if (!StringHasNoText (str1)) { + rval = TRUE; + } + str1 = MemFree (str1); + } + } else if (scp->field2 != NULL && scp->field1 == NULL) { + str2 = GetSourceQualFromBioSource (biop, scp->field2, scp->constraint); + if (str2 == NULL) { + if (scp->constraint->not_present) { + str2 = GetSourceQualFromBioSource (biop, scp->field2, NULL); + if (str2 == NULL) { + rval = TRUE; + } + } + } else if (!StringHasNoText (str2)) { rval = TRUE; } - tmp = MemFree (tmp); + str2 = MemFree (str2); + } else if (scp->field1 != NULL && scp->field2 != NULL) { + str1 = GetSourceQualFromBioSource (biop, scp->field1, scp->constraint); + str2 = GetSourceQualFromBioSource (biop, scp->field2, scp->constraint); + if (StringCmp (str1, str2) == 0) { + rval = TRUE; + } + str1 = MemFree (str1); + str2 = MemFree (str2); + } else { + /* generic string constraint */ + vn.choice = Seq_descr_source; + vn.next = NULL; + vn.extended = 0; + vn.data.ptrvalue = biop; + rval = DoesObjectMatchStringConstraint (OBJ_SEQDESC, &vn, scp->constraint); } } return rval; } -static Boolean DoesPubFieldSpecialMatch (PubdescPtr pdp, PubFieldSpecialConstraintPtr field) +static Boolean DoesCGPSetMatchPseudoConstraint (CGPSetPtr c, CDSGeneProtPseudoConstraintPtr constraint) { - Boolean rval = FALSE, match_all = TRUE; - PubPtr pub; - CharPtr tmp; + Boolean any_pseudo = FALSE; + ValNodePtr vnp; + SeqFeatPtr sfp; + Boolean rval = FALSE; - if (pdp == NULL) return FALSE; - if (field == NULL) return TRUE; + if (c == NULL) return FALSE; + if (constraint == NULL) return TRUE; - if (field->constraint->choice == PubFieldSpecialConstraintType_is_present) { - for (pub = pdp->pub; pub != NULL && !rval; pub = pub->next) { - tmp = GetPubFieldFromPub (pub, field->field, NULL); - if (!StringHasNoText (tmp)) { - /* at least one is present and non-empty */ - rval = TRUE; + switch (constraint->feature) { + case CDSGeneProt_feature_type_constraint_gene : + for (vnp = c->gene_list; vnp != NULL && !any_pseudo; vnp = vnp->next) { + sfp = (SeqFeatPtr) vnp->data.ptrvalue; + if (sfp != NULL && sfp->pseudo) { + any_pseudo = TRUE; + } } - tmp = MemFree (tmp); - } - } else if (field->constraint->choice == PubFieldSpecialConstraintType_is_not_present) { - rval = TRUE; - for (pub = pdp->pub; pub != NULL && rval; pub = pub->next) { - tmp = GetPubFieldFromPub (pub, field->field, NULL); - if (!StringHasNoText (tmp)) { - /* at least one is present and non-empty */ - rval = FALSE; + break; + case CDSGeneProt_feature_type_constraint_mRNA : + for (vnp = c->mrna_list; vnp != NULL && !any_pseudo; vnp = vnp->next) { + sfp = (SeqFeatPtr) vnp->data.ptrvalue; + if (sfp != NULL && sfp->pseudo) { + any_pseudo = TRUE; + } } - tmp = MemFree (tmp); - } - } else if (field->constraint->choice == PubFieldSpecialConstraintType_is_all_caps) { - rval = TRUE; - for (pub = pdp->pub; pub != NULL && rval; pub = pub->next) { - tmp = GetPubFieldFromPub (pub, field->field, NULL); - if (!IsAllCaps (tmp)) { - /* at least one is not all caps */ - rval = FALSE; + break; + case CDSGeneProt_feature_type_constraint_cds : + for (vnp = c->mrna_list; vnp != NULL && !any_pseudo; vnp = vnp->next) { + sfp = (SeqFeatPtr) vnp->data.ptrvalue; + if (sfp != NULL && sfp->pseudo) { + any_pseudo = TRUE; + } } - tmp = MemFree (tmp); - } + break; + case CDSGeneProt_feature_type_constraint_prot : + for (vnp = c->mrna_list; vnp != NULL && !any_pseudo; vnp = vnp->next) { + sfp = (SeqFeatPtr) vnp->data.ptrvalue; + if (sfp != NULL && sfp->pseudo && sfp->idx.subtype == FEATDEF_PROT) { + any_pseudo = TRUE; + } + } + break; + case CDSGeneProt_feature_type_constraint_mat_peptide : + for (vnp = c->mrna_list; vnp != NULL && !any_pseudo; vnp = vnp->next) { + sfp = (SeqFeatPtr) vnp->data.ptrvalue; + if (sfp != NULL && sfp->pseudo && sfp->idx.subtype == FEATDEF_mat_peptide_aa) { + any_pseudo = TRUE; + } + } + break; } + if ((any_pseudo && constraint->is_pseudo) + || (!any_pseudo && !constraint->is_pseudo)) { + rval = TRUE; + } return rval; } -static Boolean DoesPubMatchPublicationConstraint (PubdescPtr pdp, PublicationConstraintPtr constraint) +static Boolean DoesFeatureMatchCGPPseudoConstraint (SeqFeatPtr sfp, CDSGeneProtPseudoConstraintPtr constraint) { - Boolean type_ok = TRUE, rval = FALSE; - PubPtr pub; + Boolean any_pseudo = FALSE; + ValNodePtr feat_list, vnp; + SeqFeatPtr gene, mrna, cds, prot; + Boolean rval = FALSE; + SeqMgrFeatContext fcontext; - if (pdp == NULL) return FALSE; - if (IsPublicationConstraintEmpty (constraint)) return TRUE; + if (sfp == NULL) return FALSE; + if (constraint == NULL) return TRUE; - if (constraint->type != Pub_type_any) { - type_ok = FALSE; - for (pub = pdp->pub; pub != NULL && !type_ok; pub = pub->next) { - if (GetPubMLStatus (pub) == constraint->type) { - type_ok = TRUE; - } - } - } - if (type_ok) { - rval = DoesPubFieldMatch (pdp, constraint->field) && DoesPubFieldSpecialMatch (pdp, constraint->special_field); - } - return rval; -} - - -static Boolean DoesObjectMatchPublicationConstraint (Uint1 choice, Pointer data, PublicationConstraintPtr constraint) -{ - Boolean rval = TRUE; - SeqFeatPtr sfp; - SeqDescrPtr sdp; - - if (data == NULL) return FALSE; - if (IsPublicationConstraintEmpty (constraint)) return TRUE; - - switch (choice) { - case OBJ_SEQFEAT: - sfp = (SeqFeatPtr) data; - if (sfp->data.choice == SEQFEAT_PUB) { - rval = DoesPubMatchPublicationConstraint (sfp->data.value.ptrvalue, constraint); + switch (constraint->feature) { + case CDSGeneProt_feature_type_constraint_gene : + if (sfp->data.choice == SEQFEAT_GENE) { + if (sfp->pseudo) { + any_pseudo = TRUE; + } + } else if (sfp->data.choice == SEQFEAT_PROT) { + cds = SeqMgrGetCDSgivenProduct (BioseqFindFromSeqLoc (sfp->location), &fcontext); + if (cds != NULL) { + gene = GetGeneForFeature (cds); + if (gene != NULL && gene->pseudo) { + any_pseudo = TRUE; + } + } + } else { + gene = GetGeneForFeature (sfp); + if (gene != NULL && gene->pseudo) { + any_pseudo = TRUE; + } } break; - case OBJ_SEQDESC: - sdp = (SeqDescrPtr) data; - if (sdp->choice == Seq_descr_pub) { - rval = DoesPubMatchPublicationConstraint (sdp->data.ptrvalue, constraint); + case CDSGeneProt_feature_type_constraint_mRNA : + if (sfp->idx.subtype == FEATDEF_mRNA) { + if (sfp->pseudo) { + any_pseudo = TRUE; + } + } else if (sfp->data.choice == SEQFEAT_PROT) { + cds = SeqMgrGetCDSgivenProduct (BioseqFindFromSeqLoc (sfp->location), &fcontext); + if (cds != NULL) { + mrna = GetmRNAforCDS (cds); + if (mrna != NULL && mrna->pseudo) { + any_pseudo = TRUE; + } + } + } else { + mrna = GetmRNAforCDS (sfp); + if (mrna != NULL && mrna->pseudo) { + any_pseudo = TRUE; + } + } + break; + case CDSGeneProt_feature_type_constraint_cds : + if (sfp->idx.subtype == FEATDEF_CDS) { + if (sfp->pseudo) { + any_pseudo = TRUE; + } + } else if (sfp->data.choice == SEQFEAT_PROT) { + cds = SeqMgrGetCDSgivenProduct (BioseqFindFromSeqLoc (sfp->location), &fcontext); + if (cds != NULL && cds->pseudo) { + any_pseudo = TRUE; + } + } else { + feat_list = ListFeaturesInLocation (BioseqFindFromSeqLoc (sfp->location), sfp->location, SEQFEAT_CDREGION, FEATDEF_CDS); + for (vnp = feat_list; vnp != NULL && !any_pseudo; vnp = vnp->next) { + cds = vnp->data.ptrvalue; + if (cds != NULL && cds->pseudo) { + any_pseudo = TRUE; + } + } + feat_list = ValNodeFree (feat_list); + } + break; + case CDSGeneProt_feature_type_constraint_prot : + if (sfp->idx.subtype == FEATDEF_PROT) { + if (sfp->pseudo) { + any_pseudo = TRUE; + } + } else if (sfp->data.choice == SEQFEAT_PROT) { + prot = SeqMgrGetNextFeature (BioseqFindFromSeqLoc (sfp->location), NULL, 0, FEATDEF_PROT, &fcontext); + if (prot != NULL && prot->pseudo) { + any_pseudo = TRUE; + } + } else if (sfp->idx.subtype == FEATDEF_CDS) { + prot = SeqMgrGetNextFeature (BioseqFindFromSeqLoc (sfp->product), NULL, 0, FEATDEF_PROT, &fcontext); + if (prot != NULL && prot->pseudo) { + any_pseudo = TRUE; + } + } else { + feat_list = ListFeaturesInLocation (BioseqFindFromSeqLoc (sfp->location), sfp->location, SEQFEAT_CDREGION, FEATDEF_CDS); + for (vnp = feat_list; vnp != NULL && !any_pseudo; vnp = vnp->next) { + cds = vnp->data.ptrvalue; + if (cds != NULL) { + prot = SeqMgrGetNextFeature (BioseqFindFromSeqLoc (cds->product), NULL, 0, FEATDEF_PROT, &fcontext); + if (prot != NULL && prot->pseudo) { + any_pseudo = TRUE; + } + } + } + feat_list = ValNodeFree (feat_list); } break; + case CDSGeneProt_feature_type_constraint_mat_peptide : + if (sfp->idx.subtype == FEATDEF_mat_peptide_aa) { + if (sfp->pseudo) { + any_pseudo = TRUE; + } + } + break; + } + + if ((any_pseudo && constraint->is_pseudo) + || (!any_pseudo && !constraint->is_pseudo)) { + rval = TRUE; } return rval; } -NLM_EXTERN Boolean IsFieldConstraintEmpty (FieldConstraintPtr constraint) +NLM_EXTERN Boolean IsCDSGeneProtQualConstraintEmpty (CDSGeneProtQualConstraintPtr constraint) { - RnaQualPtr rq; - FeatureFieldPtr ffp; - - if (constraint == NULL || constraint->field == NULL || IsStringConstraintEmpty (constraint->string_constraint)) { - return TRUE; - } else if (constraint->field->choice == FieldType_rna_field - && ((rq = (RnaQualPtr)constraint->field->data.ptrvalue) == NULL - || rq->type == NULL)) { - return TRUE; - } else if (constraint->field->choice == FieldType_feature_field - && (ffp = (FeatureFieldPtr)constraint->field->data.ptrvalue) == NULL) { + if (constraint == NULL) return TRUE; + if (constraint->field1 == NULL && constraint->field2 == NULL && IsStringConstraintEmpty (constraint->constraint)) { return TRUE; } else { return FALSE; @@ -15310,5251 +16579,7255 @@ NLM_EXTERN Boolean IsFieldConstraintEmpty (FieldConstraintPtr constraint) } -static Boolean DoesObjectMatchFeatureFieldConstraint (Uint1 choice, Pointer data, FeatureFieldPtr ffp, StringConstraintPtr string_constraint) +static Boolean DoesCGPSetMatchQualConstraint (CGPSetPtr c, CDSGeneProtQualConstraintPtr constraint) { - Boolean rval = FALSE; - CharPtr str; - BioseqPtr bsp; - Int4 subtype; - SeqFeatPtr sfp; - SeqMgrFeatContext fcontext; - Boolean not_present; - CGPSetPtr cgp; - Uint2 cds_gene_prot_field; + Boolean rval = FALSE; + CharPtr str, str1, str2; - if (data == NULL) { - return FALSE; - } - if (IsStringConstraintEmpty (string_constraint)) { - return TRUE; - } - - switch (choice) { - case OBJ_SEQFEAT: - str = GetQualFromFeature ((SeqFeatPtr) data, ffp, string_constraint); + if (c == NULL) return FALSE; + if (constraint == NULL) return TRUE; + + if (IsStringConstraintEmpty (constraint->constraint)) { + /* looking for qual present */ + if (constraint->field1 != NULL && constraint->field2 == NULL) { + str = GetFieldValueFromCGPSet (c, constraint->field1->data.intvalue, NULL); if (str != NULL) { rval = TRUE; str = MemFree (str); } - break; - case OBJ_SEQDESC: - bsp = GetSequenceForObject (choice, data); - if (bsp != NULL) { - subtype = GetFeatdefFromFeatureType (ffp->type); - not_present = string_constraint->not_present; - string_constraint->not_present = FALSE; - for (sfp = SeqMgrGetNextFeature (bsp, NULL, 0, subtype, &fcontext); - !rval && sfp != NULL; - sfp = SeqMgrGetNextFeature (bsp, sfp, 0, subtype, &fcontext)) { - str = GetQualFromFeature (sfp, ffp, string_constraint); - if (str != NULL) { + } else if (constraint->field2 != NULL && constraint->field1 == NULL) { + str = GetFieldValueFromCGPSet (c, constraint->field2->data.intvalue, NULL); + if (str == NULL) { + rval = FALSE; + } else { + str = MemFree (str); + } + /* looking for quals to match */ + } else if (constraint->field1 != NULL && constraint->field2 != NULL) { + str1 = GetFieldValueFromCGPSet (c, constraint->field1->data.intvalue, NULL); + str2 = GetFieldValueFromCGPSet (c, constraint->field2->data.intvalue, NULL); + if (StringCmp (str1, str2) == 0) { + rval = TRUE; + } + str1 = MemFree (str1); + str2 = MemFree (str2); + } else { + /* nothing specified, automatic match */ + rval = TRUE; + } + } else { + if (constraint->field1 != NULL && constraint->field2 == NULL) { + str1 = GetFieldValueFromCGPSet (c, constraint->field1->data.intvalue, constraint->constraint); + if (str1 == NULL) { + if (constraint->constraint->not_present) { + str1 = GetFieldValueFromCGPSet (c, constraint->field1->data.intvalue, NULL); + if (str1 == NULL) { rval = TRUE; - str = MemFree (str); } } - if (not_present) { - rval = !rval; - string_constraint->not_present = TRUE; - } + } else if (!StringHasNoText (str1)) { + rval = TRUE; } - break; - case 0: - cgp = (CGPSetPtr) data; - cds_gene_prot_field = CDSGeneProtFieldFromFeatureField (ffp); - if (cds_gene_prot_field > 0) { - not_present = string_constraint->not_present; - string_constraint->not_present = FALSE; - str = GetFieldValueFromCGPSet (cgp, cds_gene_prot_field, string_constraint); - if (str != NULL) { - rval = TRUE; - str = MemFree (str); - } - if (not_present) { - rval = !rval; - string_constraint->not_present = TRUE; + str1 = MemFree (str1); + } else if (constraint->field2 != NULL && constraint->field1 == NULL) { + str2 = GetFieldValueFromCGPSet (c, constraint->field2->data.intvalue, constraint->constraint); + if (str2 == NULL) { + if (constraint->constraint->not_present) { + str2 = GetFieldValueFromCGPSet (c, constraint->field2->data.intvalue, NULL); + if (str2 == NULL) { + rval = TRUE; + } } + } else if (!StringHasNoText (str2)) { + rval = TRUE; } - break; + str2 = MemFree (str2); + } else if (constraint->field1 != NULL && constraint->field2 != NULL) { + str1 = GetFieldValueFromCGPSet (c, constraint->field1->data.intvalue, constraint->constraint); + str2 = GetFieldValueFromCGPSet (c, constraint->field2->data.intvalue, constraint->constraint); + if (StringCmp (str1, str2) == 0) { + rval = TRUE; + } + str1 = MemFree (str1); + str2 = MemFree (str2); + } else { + /* generic string constraint */ + rval = DoesObjectMatchStringConstraint (0, c, constraint->constraint); + } } return rval; } -static Boolean DoesObjectMatchFieldConstraint (Uint1 choice, Pointer data, FieldConstraintPtr constraint) +static Boolean DoesSequenceHaveFeatureWithQualPresent (BioseqPtr bsp, FeatureFieldPtr feature_field, StringConstraintPtr scp) { - Boolean rval = FALSE; - BioSourcePtr biop; - BioseqPtr bsp; - CharPtr str; - FeatureFieldPtr ffp; + Boolean rval = FALSE; + SeqFeatPtr sfp, sfp_p; + SeqMgrFeatContext context1, context2; + Int4 featdef; + Uint1 seqfeattype; + CharPtr str; + BioseqPtr prot_bsp; - if (data == NULL) return FALSE; - if (IsFieldConstraintEmpty (constraint)) { + if (bsp == NULL) { + return FALSE; + } else if (feature_field == NULL) { return TRUE; } - - switch (constraint->field->choice) { - case FieldType_source_qual: - biop = GetBioSourceFromObject (choice, data); - if (biop != NULL) { - str = GetSourceQualFromBioSource (biop, constraint->field->data.ptrvalue, constraint->string_constraint); - if (str != NULL) { + featdef = GetFeatdefFromFeatureType(feature_field->type); + seqfeattype = FindFeatFromFeatDefType (featdef); + if (seqfeattype == SEQFEAT_PROT) { + for (sfp = SeqMgrGetNextFeature (bsp, NULL, 0, FEATDEF_CDS, &context1); + sfp != NULL && !rval; + sfp = SeqMgrGetNextFeature (bsp, sfp, 0, FEATDEF_CDS, &context1)) { + prot_bsp = BioseqFindFromSeqLoc (sfp->product); + for (sfp_p = SeqMgrGetNextFeature (prot_bsp, NULL, 0, featdef, &context2); + sfp_p != NULL && !rval; + sfp_p = SeqMgrGetNextFeature (prot_bsp, sfp_p, 0, featdef, &context2)) { + str = GetQualFromFeature (sfp_p, feature_field, scp); + if (str == NULL && scp != NULL) { + if (scp->not_present) { + str = GetQualFromFeature (sfp_p, feature_field, NULL); + if (str == NULL) { + rval = TRUE; + } + } + } else if (!StringHasNoText (str)) { rval = TRUE; - str = MemFree (str); } + str = MemFree (str); } - break; - case FieldType_feature_field: - ffp = (FeatureFieldPtr) constraint->field->data.ptrvalue; - rval = DoesObjectMatchFeatureFieldConstraint (choice, data, ffp, constraint->string_constraint); - break; - case FieldType_rna_field: - ffp = FeatureFieldFromRnaQual (constraint->field->data.ptrvalue); - rval = DoesObjectMatchFeatureFieldConstraint (choice, data, ffp, constraint->string_constraint); - ffp = FeatureFieldFree (ffp); - break; - case FieldType_cds_gene_prot: - ffp = FeatureFieldFromCDSGeneProtField (constraint->field->data.intvalue); - rval = DoesObjectMatchFeatureFieldConstraint (choice, data, ffp, constraint->string_constraint); - ffp = FeatureFieldFree (ffp); - break; - case FieldType_molinfo_field: - bsp = GetSequenceForObject (choice, data); - if (bsp != NULL) { - str = GetSequenceQualFromBioseq (bsp, constraint->field->data.ptrvalue); - if (str == NULL && constraint->string_constraint->not_present) { - rval = TRUE; - } else if (str != NULL && DoesStringMatchConstraint (str, constraint->string_constraint)) { - rval = TRUE; + } + } else { + for (sfp = SeqMgrGetNextFeature (bsp, NULL, 0, featdef, &context1); + sfp != NULL && !rval; + sfp = SeqMgrGetNextFeature (bsp, sfp, 0, featdef, &context1)) { + str = GetQualFromFeature (sfp, feature_field, scp); + if (str == NULL && scp != NULL) { + if (scp->not_present) { + str = GetQualFromFeature (sfp, feature_field, NULL); + if (str == NULL) { + rval = TRUE; + } } - str = MemFree (str); + } else if (!StringHasNoText (str)) { + rval = TRUE; } - break; -/* TODO LATER */ - case FieldType_pub: - case FieldType_misc: - break; + str = MemFree (str); + } } - return rval; + return rval; } -static Boolean DoesObjectMatchConstraint (Uint1 choice, Pointer data, ConstraintChoicePtr constraint) +static Boolean +DoesSequenceHaveFeatureWithMatchingQuals +(BioseqPtr bsp, + CDSGeneProtConstraintFieldPtr f1, + CDSGeneProtConstraintFieldPtr f2, + StringConstraintPtr scp) { - Boolean rval = TRUE; + Int4 featdef; + Uint1 seqfeattype; + SeqFeatPtr sfp, sfp_p; + CharPtr str, str2; + SeqMgrFeatContext context1, context2; + FeatureFieldPtr feature_field1 = NULL, feature_field2 = NULL; + CGPSetPtr c; + Boolean b = FALSE; + Boolean rval = FALSE; + BioseqPtr prot_bsp; - if (data == NULL) return FALSE; - if (constraint == NULL) return TRUE; + if (bsp == NULL || f1 == NULL || f2 == NULL) { + return FALSE; + } + feature_field1 = FeatureFieldFromCDSGeneProtField(f1->data.intvalue); + feature_field2 = FeatureFieldFromCDSGeneProtField(f2->data.intvalue); - switch (constraint->choice) { - case ConstraintChoice_string : - rval = DoesObjectMatchStringConstraint (choice, data, constraint->data.ptrvalue); - break; - case ConstraintChoice_location : - rval = DoesObjectMatchLocationConstraint (choice, data, constraint->data.ptrvalue); - break; - case ConstraintChoice_field : - rval = DoesObjectMatchFieldConstraint (choice, data, constraint->data.ptrvalue); - break; - case ConstraintChoice_source : - rval = DoesBiosourceMatchConstraint (GetBioSourceFromObject (choice, data), constraint->data.ptrvalue); - break; - case ConstraintChoice_cdsgeneprot_qual : - if (choice == 0) { - rval = DoesCGPSetMatchQualConstraint (data, constraint->data.ptrvalue); - } else if (choice == OBJ_SEQDESC) { - rval = DoesSeqDescMatchCGPQualConstraint (data, constraint->data.ptrvalue); - } else if (choice == OBJ_SEQFEAT) { - rval = DoesFeatureMatchCGPQualConstraint (data, constraint->data.ptrvalue); - } else if (choice == OBJ_BIOSEQ) { - rval = DoesSequenceMatchCGPQualConstraint (data, constraint->data.ptrvalue); - } else { - rval = FALSE; + if (feature_field1 == NULL || feature_field2 == NULL) { + feature_field1 = FeatureFieldFree (feature_field1); + feature_field2 = FeatureFieldFree (feature_field2); + return FALSE; + } + + if (feature_field1->type == feature_field2->type) { + featdef = GetFeatdefFromFeatureType(feature_field1->type); + seqfeattype = FindFeatFromFeatDefType (featdef); + if (seqfeattype == SEQFEAT_PROT) { + for (sfp = SeqMgrGetNextFeature (bsp, NULL, 0, FEATDEF_CDS, &context1); + sfp != NULL && !rval; + sfp = SeqMgrGetNextFeature (bsp, sfp, 0, FEATDEF_CDS, &context1)) { + prot_bsp = BioseqFindFromSeqLoc (sfp->product); + for (sfp_p = SeqMgrGetNextFeature (prot_bsp, NULL, 0, featdef, &context2); + sfp_p != NULL && !rval; + sfp_p = SeqMgrGetNextFeature (prot_bsp, sfp_p, 0, featdef, &context2)) { + str = GetQualFromFeature (sfp_p, feature_field1, scp); + str2 = GetQualFromFeature (sfp_p, feature_field2, scp); + if (str != NULL && str2 != NULL && StringCmp (str, str2) == 0) { + rval = TRUE; + } + str = MemFree (str); + str2 = MemFree (str2); + } } - break; - case ConstraintChoice_cdsgeneprot_pseudo : - if (choice == 0) { - rval = DoesCGPSetMatchPseudoConstraint (data, constraint->data.ptrvalue); - } else if (choice == OBJ_SEQFEAT) { - rval = DoesFeatureMatchCGPPseudoConstraint (data, constraint->data.ptrvalue); + } else { + for (sfp = SeqMgrGetNextFeature (bsp, NULL, 0, featdef, &context1); + sfp != NULL && !rval; + sfp = SeqMgrGetNextFeature (bsp, sfp, 0, featdef, &context1)) { + str = GetQualFromFeature (sfp, feature_field1, scp); + str2 = GetQualFromFeature (sfp, feature_field2, scp); + if (str != NULL && str2 != NULL && StringCmp (str, str2) == 0) { + rval = TRUE; + } + str = MemFree (str); + str2 = MemFree (str2); } - break; - case ConstraintChoice_sequence : - rval = DoesObjectMatchSequenceConstraint (choice, data, constraint->data.ptrvalue); - break; - case ConstraintChoice_pub: - rval = DoesObjectMatchPublicationConstraint (choice, data, constraint->data.ptrvalue); - break; + } + } else { + for (sfp = SeqMgrGetNextFeature (bsp, NULL, 0, FEATDEF_CDS, &context1); + sfp != NULL && !rval; + sfp = SeqMgrGetNextFeature (bsp, sfp, 0, FEATDEF_CDS, &context1)) { + c = BuildCGPSetFromCodingRegion (sfp, &b); + str = GetFieldValueFromCGPSet (c, f1->data.intvalue, scp); + str2 = GetFieldValueFromCGPSet (c, f2->data.intvalue, scp); + if (str != NULL && str2 != NULL && StringCmp (str, str2) == 0) { + rval = TRUE; + } + str = MemFree (str); + str2 = MemFree (str2); + c = CGPSetFree (c); + } } return rval; } -NLM_EXTERN Boolean DoesObjectMatchConstraintChoiceSet (Uint1 choice, Pointer data, ConstraintChoiceSetPtr csp) +static Boolean DoesSequenceMatchCGPQualConstraint (BioseqPtr bsp, CDSGeneProtQualConstraintPtr constraint) { - Boolean rval = TRUE; + FeatureFieldPtr feature_field; + Boolean rval = FALSE; - if (data == NULL) return FALSE; + if (bsp == NULL) { + return FALSE; + } else if (constraint == NULL) { + return TRUE; + } - while (csp != NULL && rval) { - rval = DoesObjectMatchConstraint (choice, data, csp); - csp = csp->next; + if (IsStringConstraintEmpty (constraint->constraint)) { + /* looking for qual present */ + if ((constraint->field1 != NULL && constraint->field2 == NULL) + || (constraint->field2 != NULL && constraint->field1 == NULL)) { + if (constraint->field1 != NULL) { + feature_field = FeatureFieldFromCDSGeneProtField (constraint->field1->data.intvalue); + } else { + feature_field = FeatureFieldFromCDSGeneProtField (constraint->field2->data.intvalue); + } + if (feature_field != NULL) { + rval = DoesSequenceHaveFeatureWithQualPresent (bsp, feature_field, NULL); + feature_field = FeatureFieldFree (feature_field); + } + /* looking for quals to match */ + } else if (constraint->field1 != NULL && constraint->field2 != NULL) { + rval = DoesSequenceHaveFeatureWithMatchingQuals (bsp, constraint->field1, constraint->field2, NULL); + } else { + /* nothing specified, automatic match */ + rval = TRUE; + } + } else if ((constraint->field1 != NULL && constraint->field2 == NULL) + || (constraint->field1 == NULL && constraint->field2 != NULL)) { + /* one field must match constraint */ + if (constraint->field1 != NULL) { + feature_field = FeatureFieldFromCDSGeneProtField (constraint->field1->data.intvalue); + } else { + feature_field = FeatureFieldFromCDSGeneProtField (constraint->field2->data.intvalue); + } + if (feature_field != NULL) { + rval = DoesSequenceHaveFeatureWithQualPresent (bsp, feature_field, constraint->constraint); + feature_field = FeatureFieldFree (feature_field); + } + } else if (constraint->field1 != NULL && constraint->field2 != NULL) { + /* two fields must match and match constraint */ + rval = DoesSequenceHaveFeatureWithMatchingQuals (bsp, constraint->field1, constraint->field2, constraint->constraint); + } else { + /* generic string constraint */ + rval = DoesObjectMatchStringConstraint (OBJ_BIOSEQ, bsp, constraint->constraint); } return rval; } -NLM_EXTERN StringConstraintPtr FindStringConstraintInConstraintSetForField (FieldTypePtr field, ConstraintChoiceSetPtr csp) +static Boolean DoesSequenceInSetMatchCGPQualConstraint (BioseqSetPtr bssp, CDSGeneProtQualConstraintPtr constraint) { - StringConstraintPtr scp = NULL; - SourceConstraintPtr source_constraint; - CDSGeneProtQualConstraintPtr cgp_constraint; - PublicationConstraintPtr pub_constraint; - FieldConstraintPtr field_constraint; - FieldType ft; + Boolean rval = FALSE; + SeqEntryPtr sep; - while (csp != NULL) { - switch (csp->choice) { - case ConstraintChoice_string : - scp = csp->data.ptrvalue; - break; - case ConstraintChoice_source : - source_constraint = (SourceConstraintPtr) csp->data.ptrvalue; - if (source_constraint != NULL && source_constraint->constraint != NULL) { - if (source_constraint->field1 != NULL) { - ft.choice = FieldType_source_qual; - ft.data.ptrvalue = source_constraint->field1; - ft.next = NULL; - if (DoFieldTypesMatch (field, &ft)) { - scp = source_constraint->constraint; - } - } - if (scp == NULL && source_constraint->field2 == NULL) { - ft.choice = FieldType_source_qual; - ft.data.ptrvalue = source_constraint->field2; - ft.next = NULL; - if (DoFieldTypesMatch (field, &ft)) { - scp = source_constraint->constraint; - } - } - } - break; - case ConstraintChoice_cdsgeneprot_qual : - cgp_constraint = (CDSGeneProtQualConstraintPtr) csp->data.ptrvalue; - if (field->choice == FieldType_cds_gene_prot - && cgp_constraint != NULL && cgp_constraint->constraint != NULL - && ((cgp_constraint->field1 != NULL && cgp_constraint->field1->data.intvalue == field->data.intvalue) - || (cgp_constraint->field2 != NULL && cgp_constraint->field2->data.intvalue == field->data.intvalue))) { - scp = cgp_constraint->constraint; - } - break; - case ConstraintChoice_pub : - pub_constraint = csp->data.ptrvalue; - if (pub_constraint != NULL && pub_constraint->field != NULL) { - if (field->data.intvalue == pub_constraint->field->field - && !IsStringConstraintEmpty (pub_constraint->field->constraint)) { - scp = pub_constraint->field->constraint; - } - } - break; - case ConstraintChoice_field : - field_constraint = csp->data.ptrvalue; - if (field_constraint != NULL - && field_constraint->field != NULL - && DoFieldTypesMatch (field, field_constraint->field)) { - scp = field_constraint->string_constraint; - } - break; + if (bssp == NULL) return FALSE; + if (constraint == NULL) return TRUE; + + for (sep = bssp->seq_set; sep != NULL && !rval; sep = sep->next) { + if (IS_Bioseq (sep)) { + rval = DoesSequenceMatchCGPQualConstraint ((BioseqPtr) sep->data.ptrvalue, constraint); + } else if (IS_Bioseq_set (sep)) { + rval = DoesSequenceInSetMatchCGPQualConstraint ((BioseqSetPtr) sep->data.ptrvalue, constraint); } - csp = csp->next; } - return scp; + return rval; } -NLM_EXTERN StringConstraintPtr FindStringConstraintInConstraintSetForFieldPair (FieldPairTypePtr fieldpair, ConstraintChoiceSetPtr csp) -{ - StringConstraintPtr scp; - FieldTypePtr f; - - f = GetFromFieldFromFieldPair (fieldpair); - scp = FindStringConstraintInConstraintSetForField (f, csp); - f = FieldTypeFree (f); - return scp; -} - - -NLM_EXTERN StringConstraintPtr StringConstraintFromFieldEdit (FieldEditPtr edit) +static Boolean DoesSeqDescMatchCGPQualConstraint (SeqDescrPtr sdp, CDSGeneProtQualConstraintPtr constraint) { - StringConstraintPtr scp; + Boolean rval = FALSE; + BioseqPtr bsp; + ObjValNodePtr ovp; - if (edit == NULL || edit->find_txt == NULL) return NULL; - scp = StringConstraintNew (); - scp->match_text = StringSave (edit->find_txt); + if (sdp == NULL) return FALSE; + if (constraint == NULL) return TRUE; - switch (edit->location) { - case Field_edit_location_anywhere : - scp->match_location = String_location_contains; - break; - case Field_edit_location_beginning : - scp->match_location = String_location_starts; - break; - case Field_edit_location_end : - scp->match_location = String_location_ends; - break; + bsp = GetSequenceForObject (OBJ_SEQDESC, sdp); + if (bsp == NULL) { + if (sdp->extended) { + ovp = (ObjValNodePtr) sdp; + if (ovp->idx.parenttype == OBJ_BIOSEQSET && ovp->idx.parentptr != NULL) { + rval = DoesSequenceInSetMatchCGPQualConstraint ((BioseqSetPtr) ovp->idx.parentptr, constraint); + } + } + } else { + rval = DoesSequenceMatchCGPQualConstraint (bsp, constraint); } - scp->case_sensitive = TRUE; - scp->whole_word = FALSE; - scp->not_present = FALSE; - - return scp; + return rval; } -static CharPtr ApplyEditToString (CharPtr str, FieldEditPtr edit) +static void UnmarkFeatureList (ValNodePtr list) { - CharPtr cp_found, new_str; - Int4 found_len, replace_len, new_len; - - if (edit == NULL) return StringSave (str); - - str = StringSave (str); - cp_found = StringISearch (str, edit->find_txt); + SeqFeatPtr sfp; - found_len = StringLen (edit->find_txt); - replace_len = StringLen (edit->repl_txt); - if (edit->location == Field_edit_location_beginning - && cp_found != str) { - cp_found = NULL; - } - while (cp_found != NULL) + while (list != NULL) { - if (edit->location == Field_edit_location_end - && cp_found != str + StringLen (str) - found_len) { - cp_found = StringISearch (cp_found + found_len, edit->find_txt); - } else { - new_len = StringLen (str) + 1 - found_len + replace_len; - new_str = (CharPtr) MemNew (new_len * sizeof (Char)); - if (new_str != NULL) - { - if (cp_found != str) - { - StringNCpy (new_str, str, cp_found - str); - } - StringCat (new_str, edit->repl_txt); - StringCat (new_str, cp_found + found_len); - cp_found = new_str + (cp_found - str) + replace_len; - str = MemFree (str); - str = new_str; - } - cp_found = StringISearch (cp_found, edit->find_txt); + sfp = list->data.ptrvalue; + if (sfp != NULL) + { + sfp->idx.deleteme = FALSE; } + list = list->next; } - return str; } -static void RemoveFieldNameFromString (CharPtr field_name, CharPtr str) +static Boolean DoesFeatureMatchCGPQualConstraint (SeqFeatPtr sfp, CDSGeneProtQualConstraintPtr constraint) { - Uint4 field_name_len; - CharPtr src, dst; + CGPSetPtr c = NULL; + Boolean b = FALSE; + SeqMgrFeatContext context; + Boolean rval = FALSE; + FeatureFieldPtr ff; + SeqFeatPtr cds; + CharPtr str1 = NULL, str2 = NULL; - if (StringHasNoText (field_name) || StringHasNoText (str)) { - return; + if (sfp == NULL) { + return FALSE; + } else if (constraint == NULL) { + return TRUE; } - field_name_len = StringLen (field_name); - - if (!StringHasNoText (field_name) && StringNICmp(str, field_name, field_name_len) == 0 - && StringLen (str) > field_name_len - && str[field_name_len] == ' ') - { - src = str + field_name_len + 1; - while (*src == ' ') - { - src++; + + if (sfp->data.choice == SEQFEAT_CDREGION) { + c = BuildCGPSetFromCodingRegion (sfp, &b); + } else if (sfp->data.choice == SEQFEAT_PROT) { + cds = SeqMgrGetCDSgivenProduct (BioseqFindFromSeqLoc (sfp->location), &context); + c = BuildCGPSetFromCodingRegion (cds, &b); + } else if (sfp->data.choice == SEQFEAT_GENE) { + c = BuildCGPSetFromGene (sfp); + } else if (sfp->data.choice == SEQFEAT_RNA) { + c = BuildCGPSetFrommRNA (sfp); + } + UnmarkFeatureList (c->cds_list); + UnmarkFeatureList (c->mrna_list); + UnmarkFeatureList (c->gene_list); + + rval = DoesCGPSetMatchQualConstraint (c, constraint); + if (rval && sfp->idx.subtype == FEATDEF_mat_peptide_aa) { + if (constraint->field1 != NULL) { + if (IsCDSGeneProtFieldMatPeptideRelated (constraint->field1->data.intvalue)) { + ff = FeatureFieldFromCDSGeneProtField (constraint->field1->data.intvalue); + str1 = GetQualFromFeature (sfp, ff, constraint->constraint); + ff = FeatureFieldFree (ff); + } else { + str1 = GetFieldValueFromCGPSet (c, constraint->field1->data.intvalue, constraint->constraint); + } + if (str1 == NULL) { + rval = FALSE; + } } - dst = str; - while (*src != 0) - { - *dst = *src; - dst++; - src++; + if (constraint->field2 != NULL) { + if (IsCDSGeneProtFieldMatPeptideRelated (constraint->field2->data.intvalue)) { + ff = FeatureFieldFromCDSGeneProtField (constraint->field2->data.intvalue); + str2 = GetQualFromFeature (sfp, ff, constraint->constraint); + ff = FeatureFieldFree (ff); + } else { + str2 = GetFieldValueFromCGPSet (c, constraint->field2->data.intvalue, constraint->constraint); + } + if (str2 == NULL) { + rval = FALSE; + } } - *dst = 0; + if (rval && constraint->field1 != NULL && constraint->field2 != NULL && StringCmp (str1, str2) != 0) { + rval = FALSE; + } + str1 = MemFree (str1); + str2 = MemFree (str2); } + c = CGPSetFree (c); + return rval; } -typedef struct objectcollection { - AECRActionPtr action; - ValNodePtr object_list; - BatchExtraPtr batch_extra; -} ObjectCollectionData, PNTR ObjectCollectionPtr; +NLM_EXTERN Boolean IsSequenceConstraintEmpty (SequenceConstraintPtr constraint) +{ + if (constraint == NULL) return TRUE; + if (constraint->seqtype != NULL && constraint->seqtype->choice != SequenceConstraintMolTypeConstraint_any) return FALSE; + if (constraint->feature != Macro_feature_type_any) return FALSE; + if (!IsStringConstraintEmpty (constraint->id)) return FALSE; + if (constraint->num_features != NULL) return FALSE; + if (constraint->length != NULL) return FALSE; + if (constraint->strandedness != Feature_strandedness_constraint_any) return FALSE; + return TRUE; +} -static void AECRActionObjectCollectionItemCallback (Uint1 objecttype, Pointer objectdata, ObjectCollectionPtr o) +static Boolean DoesTextMatchBankItId (SeqIdPtr sip, StringConstraintPtr scp) { - ApplyActionPtr a; - EditActionPtr e; - ConvertActionPtr v; - CopyActionPtr c; - SwapActionPtr s; - RemoveActionPtr r; - AECRParseActionPtr p; - CharPtr str, portion, field_name; - StringConstraintPtr scp; - FieldTypePtr field_from = NULL, field_to = NULL; + Boolean rval = FALSE; + Int4 text_len, offset; + CharPtr text, tmp; + DbtagPtr dbtag; - if (objectdata == NULL || o == NULL) return; + if (scp == NULL || (text = scp->match_text) == NULL || sip == NULL || sip->choice != SEQID_GENERAL + || (dbtag = (DbtagPtr) sip->data.ptrvalue) == NULL + || StringCmp (dbtag->db, "BankIt") != 0 + || dbtag->tag == NULL) { + return FALSE; + } + text_len = StringLen (scp->match_text); + if (text_len > 6 && StringNICmp (text, "BankIt", 6) == 0) { + text += 6; + text += StrSpn (text, ":/ "); + } - /* check to make sure object is appropriate for field and meets filter */ - switch (o->action->action->choice) { - case ActionChoice_apply : - a = (ApplyActionPtr) o->action->action->data.ptrvalue; - if (a != NULL - && IsObjectAppropriateForFieldValue (objecttype, objectdata, a->field) - && DoesObjectMatchConstraintChoiceSet (objecttype, objectdata, o->action->constraint)) { - ValNodeAddPointer (&(o->object_list), objecttype, objectdata); + text = StringSave (text); + tmp = scp->match_text; + scp->match_text = text; + rval = DoesObjectIdMatchStringConstraint (dbtag->tag, scp); + if (!rval) { + offset = StringCSpn (text, "/ "); + if (text[offset] != 0) { + text[offset] = '_'; + rval = DoesObjectIdMatchStringConstraint (dbtag->tag, scp); + } + } + text = MemFree (text); + scp->match_text = tmp; + + return rval; +} + + +NLM_EXTERN Boolean DoesSeqIDListMeetStringConstraint (SeqIdPtr sip, StringConstraintPtr string_constraint) +{ + CharPtr id; + CharPtr cp, cp_dst; + SeqIdPtr tmp; + Boolean match, changed; + DbtagPtr dbtag; + CharPtr tmp_id; + + if (sip == NULL) + { + return FALSE; + } + if (string_constraint == NULL) + { + return TRUE; + } + + while (sip != NULL) + { + /* temporary disconnect ID from list */ + tmp = sip->next; + sip->next = NULL; + id = SeqIdWholeLabel (sip, PRINTID_FASTA_LONG); + match = DoesSingleStringMatchConstraint (id, string_constraint); + if (!match) + { + changed = FALSE; + /* remove terminating pipe character */ + if (id[StringLen(id) - 1] == '|') + { + id[StringLen(id) - 1] = 0; + changed = TRUE; } - break; - case ActionChoice_edit : - e = (EditActionPtr) o->action->action->data.ptrvalue; - if (e != NULL - && IsObjectAppropriateForFieldValue (objecttype, objectdata, e->field) - && DoesObjectMatchConstraintChoiceSet (objecttype, objectdata, o->action->constraint)) { - scp = StringConstraintFromFieldEdit (e->edit); - str = GetFieldValueForObjectEx (objecttype, objectdata, e->field, scp, o->batch_extra); - if (!StringHasNoText (str)) { - ValNodeAddPointer (&(o->object_list), objecttype, objectdata); + /* remove leading pipe identifier */ + cp = StringChr (id, '|'); + if (cp != NULL) + { + changed = TRUE; + cp++; + cp_dst = id; + while (*cp != 0) + { + *cp_dst = *cp; + cp_dst++; + cp++; } - str = MemFree (str); + *cp_dst = 0; + } + if (changed) + { + match = DoesSingleStringMatchConstraint (id, string_constraint); } - break; - case ActionChoice_convert : - v = (ConvertActionPtr) o->action->action->data.ptrvalue; - if (v != NULL - && (field_from = GetFromFieldFromFieldPair(v->fields)) != NULL - && IsObjectAppropriateForFieldValue (objecttype, objectdata, field_from) - && DoesObjectMatchConstraintChoiceSet (objecttype, objectdata, o->action->constraint)) { - scp = FindStringConstraintInConstraintSetForField (field_from, o->action->constraint); - str = GetFieldValueForObjectEx (objecttype, objectdata, field_from, scp, o->batch_extra); - if (v->strip_name) { - field_to = GetToFieldFromFieldPair (v->fields); - field_name = SummarizeFieldType (field_to); - RemoveFieldNameFromString (field_name, str); - field_name = MemFree (field_name); - field_to = FieldTypeFree (field_to); - } - if (!StringHasNoText (str)) { - ValNodeAddPointer (&(o->object_list), objecttype, objectdata); + + /* if search text doesn't have ., try ID without version */ + if (!match && StringChr (string_constraint->match_text, '.') == NULL) + { + cp = StringChr (id, '.'); + if (cp != NULL) + { + *cp = 0; + match = DoesSingleStringMatchConstraint (id, string_constraint); + *cp = '.'; } - str = MemFree (str); } - field_from = FieldTypeFree (field_from); - break; - case ActionChoice_copy : - c = (CopyActionPtr) o->action->action->data.ptrvalue; - if (c != NULL - && (field_from = GetFromFieldFromFieldPair(c->fields)) != NULL - && (field_to = GetFromFieldFromFieldPair(c->fields)) != NULL - && IsObjectAppropriateForFieldValue (objecttype, objectdata, field_from) - && IsObjectAppropriateForFieldValue (objecttype, objectdata, field_to) - && DoesObjectMatchConstraintChoiceSet (objecttype, objectdata, o->action->constraint)) { - ValNodeAddPointer (&(o->object_list), objecttype, objectdata); + + /* Bankit? */ + if (!match && DoesTextMatchBankItId (sip, string_constraint)) + { + match = TRUE; } - field_from = FieldTypeFree (field_from); - field_to = FieldTypeFree (field_to); - break; - case ActionChoice_swap : - s = (SwapActionPtr) o->action->action->data.ptrvalue; - if (s != NULL - && (field_from = GetFromFieldFromFieldPair(s->fields)) != NULL - && (field_to = GetFromFieldFromFieldPair(s->fields)) != NULL - && IsObjectAppropriateForFieldValue (objecttype, objectdata, field_from) - && IsObjectAppropriateForFieldValue (objecttype, objectdata, field_to) - && DoesObjectMatchConstraintChoiceSet (objecttype, objectdata, o->action->constraint)) { - ValNodeAddPointer (&(o->object_list), objecttype, objectdata); + + if (!match && sip->choice == SEQID_GENERAL && sip->data.ptrvalue != NULL) { + dbtag = (DbtagPtr) sip->data.ptrvalue; + if (StringCmp (dbtag->db, "NCBIFILE") == 0 && dbtag->tag != NULL) { + if (DoesSingleStringMatchConstraint (dbtag->tag->str, string_constraint)) { + match = TRUE; + } else if ((cp = StringRChr (dbtag->tag->str, '/')) != NULL) { + tmp_id = (CharPtr) MemNew (sizeof (Char) * (cp - dbtag->tag->str + 1)); + StringNCpy (tmp_id, dbtag->tag->str, cp - dbtag->tag->str); + tmp_id[cp - dbtag->tag->str] = 0; + if (DoesSingleStringMatchConstraint (tmp_id, string_constraint)) { + match = TRUE; + } + tmp_id = MemFree (tmp_id); + } + } } - field_from = FieldTypeFree (field_from); - field_to = FieldTypeFree (field_to); - break; - case ActionChoice_remove : - r = (RemoveActionPtr) o->action->action->data.ptrvalue; - if (r != NULL - && IsObjectAppropriateForFieldValue (objecttype, objectdata, r->field) - && DoesObjectMatchConstraintChoiceSet (objecttype, objectdata, o->action->constraint)) { - ValNodeAddPointer (&(o->object_list), objecttype, objectdata); + } + id = MemFree (id); + sip->next = tmp; + + if (match) + { + if (string_constraint->not_present) + { + return FALSE; } - break; - case ActionChoice_parse : - p = (AECRParseActionPtr) o->action->action->data.ptrvalue; - if (p != NULL - && (field_from = GetFromFieldFromFieldPair(p->fields)) != NULL - && (field_to = GetFromFieldFromFieldPair(p->fields)) != NULL - && IsObjectAppropriateForFieldValue (objecttype, objectdata, field_from) - && IsObjectAppropriateForFieldValue (objecttype, objectdata, field_to) - && DoesObjectMatchConstraintChoiceSet (objecttype, objectdata, o->action->constraint)) { - scp = FindStringConstraintInConstraintSetForField (field_from, o->action->constraint); - str = GetFieldValueForObjectEx (objecttype, objectdata, field_from, scp, o->batch_extra); - portion = GetTextPortionFromString (str, p->portion); - if (!StringHasNoText (portion)) { - ValNodeAddPointer (&(o->object_list), objecttype, objectdata); - } - portion = MemFree (portion); - str = MemFree (str); + else + { + return TRUE; } - field_from = FieldTypeFree (field_from); - field_to = FieldTypeFree (field_to); - break; + } + sip = sip->next; + } + if (string_constraint->not_present) + { + return TRUE; + } + else + { + return FALSE; } - } -static void AECRActionObjectCollectionFeatureCallback (SeqFeatPtr sfp, Pointer data) -{ - ObjectCollectionPtr o; - if (sfp == NULL || data == NULL) return; +typedef struct rnatypebiomol { + Int4 rnatype; + Uint1 biomol; + CharPtr rnamolname; +} RnaTypeBiomolData, PNTR RnaTypeBiomolPtr; - o = (ObjectCollectionPtr) data; - AECRActionObjectCollectionItemCallback (OBJ_SEQFEAT, sfp, o); +static RnaTypeBiomolData rna_type_biomol[] = { +{ Sequence_constraint_rnamol_genomic , MOLECULE_TYPE_GENOMIC, "Genomic RNA" } , +{ Sequence_constraint_rnamol_precursor_RNA , MOLECULE_TYPE_PRE_MRNA , "Precursor RNA" } , +{ Sequence_constraint_rnamol_mRNA , MOLECULE_TYPE_MRNA , "mRNA [cDNA]" } , +{ Sequence_constraint_rnamol_rRNA , MOLECULE_TYPE_RRNA , "Ribosomal RNA" } , +{ Sequence_constraint_rnamol_tRNA , MOLECULE_TYPE_TRNA , "Transfer RNA" } , +{ Sequence_constraint_rnamol_genomic_mRNA , MOLECULE_TYPE_GENOMIC_MRNA_MIX , "Genomic-mRNA" } , +{ Sequence_constraint_rnamol_cRNA , MOLECULE_TYPE_CRNA , "cRNA" } , +{ Sequence_constraint_rnamol_transcribed_RNA , MOLECULE_TYPE_TRANSCRIBED_RNA , "Transcribed RNA" } , +{ Sequence_constraint_rnamol_ncRNA , MOLECULE_TYPE_NCRNA , "Non-coding RNA" } , +{ Sequence_constraint_rnamol_transfer_messenger_RNA , MOLECULE_TYPE_TMRNA , "Transfer-messenger RNA" } } ; -} +#define NUM_rna_type_biomol sizeof (rna_type_biomol) / sizeof (RnaTypeBiomolData) -static void AECRActionObjectCollectionDescriptorCallback (SeqDescrPtr sdp, Pointer data) +NLM_EXTERN Uint1 GetBiomolForRnaType (Int4 rnatype) { - ObjectCollectionPtr o; - - if (sdp == NULL || data == NULL) return; + Int4 i; - o = (ObjectCollectionPtr) data; - AECRActionObjectCollectionItemCallback (OBJ_SEQDESC, sdp, o); + for (i = 0; i < NUM_rna_type_biomol; i++) { + if (rna_type_biomol[i].rnatype == rnatype) { + return rna_type_biomol[i].biomol; + } + } + return 0; } -static void AECRObjectCollectionBioseqCallback (BioseqPtr bsp, Pointer data) +NLM_EXTERN CharPtr GetBiomolNameForRnaType (Int4 rnatype) { - ObjectCollectionPtr o; - - if (bsp == NULL || data == NULL) return; + Int4 i; - o = (ObjectCollectionPtr) data; - AECRActionObjectCollectionItemCallback (OBJ_BIOSEQ, bsp, o); + for (i = 0; i < NUM_rna_type_biomol; i++) { + if (rna_type_biomol[i].rnatype == rnatype) { + return rna_type_biomol[i].rnamolname; + } + } + return "invalid RNA type"; } +NLM_EXTERN void AddAllRNASubtypesToChoiceList (ValNodePtr PNTR field_list) +{ + Int4 i; -static void CollectNucBioseqCallback (BioseqPtr bsp, Pointer data); -static void AddCommentDescriptorDestinationsForBioseq (BioseqPtr bsp, ValNodePtr PNTR dest_list); -static void AddDeflineDescriptorDestinationsForBioseq (BioseqPtr bsp, ValNodePtr PNTR dest_list); -static void AddGenbankBlockDescriptorDestinationsForBioseq (BioseqPtr bsp, ValNodePtr PNTR dest_list); + if (field_list == NULL) return; -static ValNodePtr CollectMiscObjectsForApply (SeqEntryPtr sep, Int4 misc_type, ValNodePtr constraint) + ValNodeAddPointer (field_list, Sequence_constraint_rnamol_any, StringSave ("Any RNA")); + for (i = 0; i < NUM_rna_type_biomol; i++) { + ValNodeAddPointer (field_list, rna_type_biomol[i].rnatype, StringSave (rna_type_biomol[i].rnamolname)); + } +} + + +static Boolean DoesValueMatchQuantityConstraint (Int4 val, ValNodePtr quantity) { - ValNodePtr target_list = NULL, bsp_list = NULL, tmp_list = NULL, vnp; + Boolean rval = TRUE; - if (sep == NULL) { - return NULL; + if (quantity == NULL) { + rval = TRUE; + } else if (quantity->choice == QuantityConstraint_equals + && val != quantity->data.intvalue) { + return FALSE; + } else if (quantity->choice == QuantityConstraint_greater_than + && val <= quantity->data.intvalue) { + return FALSE; + } else if (quantity->choice == QuantityConstraint_less_than + && val >= quantity->data.intvalue) { + return FALSE; } - - VisitBioseqsInSep (sep, &bsp_list, CollectNucBioseqCallback); - for (vnp = bsp_list; vnp != NULL; vnp = vnp->next) { - if (DoesObjectMatchConstraintChoiceSet (vnp->choice, vnp->data.ptrvalue, constraint)) { - ValNodeAddPointer (&tmp_list, vnp->choice, vnp->data.ptrvalue); - } + return rval; +} + + +static Boolean DoesSequenceMatchStrandednessConstraint (BioseqPtr bsp, Uint2 strandedness) +{ + SeqMgrFeatContext context; + SeqFeatPtr sfp; + Int4 num_minus = 0; + Int4 num_plus = 0; + Boolean rval = FALSE; + + if (bsp == NULL) { + return FALSE; + } else if (strandedness == Feature_strandedness_constraint_any) { + return TRUE; } - bsp_list = ValNodeFree (bsp_list); - if (misc_type == Misc_field_genome_project_id) { - target_list = tmp_list; - tmp_list = NULL; - } else if (misc_type == Misc_field_comment_descriptor) { - for (vnp = tmp_list; vnp != NULL; vnp = vnp->next) { - AddCommentDescriptorDestinationsForBioseq (vnp->data.ptrvalue, &target_list); - } - } else if (misc_type == Misc_field_defline) { - for (vnp = tmp_list; vnp != NULL; vnp = vnp->next) { - AddDeflineDescriptorDestinationsForBioseq (vnp->data.ptrvalue, &target_list); - } - tmp_list = ValNodeFree (tmp_list); - } else if (misc_type == Misc_field_keyword) { - for (vnp = tmp_list; vnp != NULL; vnp = vnp->next) { - AddGenbankBlockDescriptorDestinationsForBioseq (vnp->data.ptrvalue, &target_list); + sfp = SeqMgrGetNextFeature (bsp, NULL, 0, 0, &context); + while (sfp != NULL) { + if (context.strand == Seq_strand_minus) { + num_minus++; + if (strandedness == Feature_strandedness_constraint_plus_only + || strandedness == Feature_strandedness_constraint_no_minus) { + return FALSE; + } else if (strandedness == Feature_strandedness_constraint_at_least_one_minus) { + return TRUE; + } + } else { + num_plus++; + if (strandedness == Feature_strandedness_constraint_minus_only + || strandedness == Feature_strandedness_constraint_no_plus) { + return FALSE; + } else if (strandedness == Feature_strandedness_constraint_at_least_one_plus) { + return TRUE; + } } - tmp_list = ValNodeFree (tmp_list); + sfp = SeqMgrGetNextFeature (bsp, sfp, 0, 0, &context); } - tmp_list = ValNodeFree (tmp_list); - return target_list; + + switch (strandedness) { + case Feature_strandedness_constraint_minus_only: + if (num_minus > 0 && num_plus == 0) { + rval = TRUE; + } + break; + case Feature_strandedness_constraint_plus_only: + if (num_plus > 0 && num_minus == 0) { + rval = TRUE; + } + break; + case Feature_strandedness_constraint_at_least_one_minus: + if (num_minus > 0) { + rval = TRUE; + } + break; + case Feature_strandedness_constraint_at_least_one_plus: + if (num_plus > 0) { + rval = TRUE; + } + break; + case Feature_strandedness_constraint_no_minus: + if (num_minus == 0) { + rval = TRUE; + } + break; + case Feature_strandedness_constraint_no_plus: + if (num_plus == 0) { + rval = TRUE; + } + break; + } + return rval; } -NLM_EXTERN ValNodePtr GetObjectListForAECRActionEx (SeqEntryPtr sep, AECRActionPtr action, BatchExtraPtr batch_extra) +NLM_EXTERN Boolean DoesSequenceMatchSequenceConstraint (BioseqPtr bsp, SequenceConstraintPtr constraint) { - ObjectCollectionData ocd; - ApplyActionPtr apply; - Uint1 field_type; + SeqFeatPtr sfp; + SeqMgrFeatContext fcontext; + SeqDescrPtr sdp; + SeqMgrDescContext dcontext; + MolInfoPtr mip; + Int4 num_features = 0; + + if (bsp == NULL) return FALSE; + if (IsSequenceConstraintEmpty (constraint)) return TRUE; - if (action == NULL) return NULL; + if (constraint->seqtype != NULL && constraint->seqtype->choice != SequenceConstraintMolTypeConstraint_any) { + switch (constraint->seqtype->choice) { + case SequenceConstraintMolTypeConstraint_nucleotide : + if (ISA_aa (bsp->mol)) { + return FALSE; + } + break; + case SequenceConstraintMolTypeConstraint_dna : + if (bsp->mol != Seq_mol_dna) { + return FALSE; + } + break; + case SequenceConstraintMolTypeConstraint_rna : + if (bsp->mol != Seq_mol_rna) { + return FALSE; + } + if (constraint->seqtype->data.intvalue != Sequence_constraint_rnamol_any) { + sdp = SeqMgrGetNextDescriptor (bsp, NULL, Seq_descr_molinfo, &dcontext); + if (sdp == NULL || sdp->data.ptrvalue == NULL || sdp->choice != Seq_descr_molinfo) { + return FALSE; + } + mip = (MolInfoPtr) sdp->data.ptrvalue; + if (GetBiomolForRnaType (constraint->seqtype->data.intvalue) != mip->biomol) { + return FALSE; + } + } + break; + case SequenceConstraintMolTypeConstraint_protein : + if (!ISA_aa (bsp->mol)) { + return FALSE; + } + break; + } + } - ocd.action = action; - ocd.object_list = NULL; - if (batch_extra == NULL) { - ocd.batch_extra = BatchExtraNew (); - InitBatchExtraForAECRAction (ocd.batch_extra, action, sep); - } else { - ocd.batch_extra = batch_extra; + if (constraint->feature != Macro_feature_type_any) { + sfp = SeqMgrGetNextFeature (bsp, NULL, 0, GetFeatdefFromFeatureType (constraint->feature), &fcontext); + if (sfp == NULL) { + return FALSE; + } } - field_type = FieldTypeFromAECRAction (action); - if (field_type == FieldType_molinfo_field) { - VisitBioseqsInSep (sep, &ocd, AECRObjectCollectionBioseqCallback); - } else if (field_type == FieldType_misc - && action->action != NULL - && action->action->choice == ActionChoice_apply - && (apply = action->action->data.ptrvalue) != NULL) { - ocd.object_list = CollectMiscObjectsForApply (sep, apply->field->data.intvalue, action->constraint); - } else { - VisitFeaturesInSep (sep, &ocd, AECRActionObjectCollectionFeatureCallback); - VisitDescriptorsInSep (sep, &ocd, AECRActionObjectCollectionDescriptorCallback); - if (field_type == FieldType_misc) { - VisitBioseqsInSep (sep, &ocd, AECRObjectCollectionBioseqCallback); + if (!IsStringConstraintEmpty (constraint->id) && !DoesSeqIDListMeetStringConstraint (bsp->id, constraint->id)) { + return FALSE; + } + + if (constraint->num_features != NULL) { + for (sfp = SeqMgrGetNextFeature (bsp, NULL, 0, 0, &fcontext); + sfp != NULL; + sfp = SeqMgrGetNextFeature (bsp, sfp, 0, 0, &fcontext)) { + num_features++; + /* note - break out of loop or return as soon as we know constraint + * succeeds or passes - no need to iterate through all features + */ + if (constraint->num_features->choice == QuantityConstraint_equals + && num_features > constraint->num_features->data.intvalue) { + return FALSE; + } else if (constraint->num_features->choice == QuantityConstraint_greater_than + && num_features > constraint->num_features->data.intvalue) { + break; + } else if (constraint->num_features->choice == QuantityConstraint_less_than + && num_features >= constraint->num_features->data.intvalue) { + return FALSE; + } + } + if (!DoesValueMatchQuantityConstraint(num_features, constraint->num_features)) { + return FALSE; } } - if (batch_extra != ocd.batch_extra) { - ocd.batch_extra = BatchExtraFree (ocd.batch_extra); + if (!DoesValueMatchQuantityConstraint(bsp->length, constraint->length)) { + return FALSE; } - return ocd.object_list; -} + if (!DoesSequenceMatchStrandednessConstraint(bsp, constraint->strandedness)) { + return FALSE; + } -NLM_EXTERN ValNodePtr GetObjectListForAECRAction (SeqEntryPtr sep, AECRActionPtr action) -{ - return GetObjectListForAECRActionEx (sep, action, NULL); + return TRUE; } - -NLM_EXTERN ValNodePtr FreeObjectList (ValNodePtr vnp) +static Boolean DoesSequenceInSetMatchSequenceConstraint (BioseqSetPtr bssp, SequenceConstraintPtr constraint) { - ValNodePtr vnp_next; + Boolean rval = FALSE; + SeqEntryPtr sep; - while (vnp != NULL) { - vnp_next = vnp->next; - vnp->next = NULL; - if (vnp->choice == 0) { - vnp->data.ptrvalue = CGPSetFree (vnp->data.ptrvalue); + if (bssp == NULL) return FALSE; + if (IsSequenceConstraintEmpty (constraint)) return TRUE; + + for (sep = bssp->seq_set; sep != NULL && !rval; sep = sep->next) { + if (IS_Bioseq (sep)) { + rval = DoesSequenceMatchSequenceConstraint ((BioseqPtr) sep->data.ptrvalue, constraint); + } else if (IS_Bioseq_set (sep)) { + rval = DoesSequenceInSetMatchSequenceConstraint ((BioseqSetPtr) sep->data.ptrvalue, constraint); } - vnp = ValNodeFree (vnp); - vnp = vnp_next; } - return vnp; + return rval; } -typedef struct buildcgpset +static Boolean DoesObjectMatchSequenceConstraint (Uint1 choice, Pointer data, SequenceConstraintPtr constraint) { - ValNodePtr cds_list; - ValNodePtr mrna_list; - ValNodePtr gene_list; -} BuildCGPSetData, PNTR BuildCGPSetPtr; + BioseqPtr bsp; + SeqDescrPtr sdp; + ObjValNodePtr ovp; + Boolean rval = FALSE; -static void BuildCGPSetCallback (SeqFeatPtr sfp, Pointer userdata) -{ - BuildCGPSetPtr b; + if (data == NULL) return FALSE; + if (IsSequenceConstraintEmpty (constraint)) return TRUE; - if (sfp == NULL || sfp->idx.deleteme || userdata == NULL) return; - b = (BuildCGPSetPtr) userdata; - if (sfp->data.choice == SEQFEAT_CDREGION) - { - ValNodeAddPointer (&(b->cds_list), OBJ_SEQFEAT, sfp); - } - else if (sfp->data.choice == SEQFEAT_GENE) - { - ValNodeAddPointer (&(b->gene_list), OBJ_SEQFEAT, sfp); - } - else if (sfp->idx.subtype == FEATDEF_mRNA) - { - ValNodeAddPointer (&(b->mrna_list), OBJ_SEQFEAT, sfp); - } - else if (SeqMgrGetGeneXref (sfp) != NULL) - { - ValNodeAddPointer (&(b->gene_list), OBJ_SEQFEAT, sfp); + bsp = GetSequenceForObject (choice, data); + if (bsp == NULL) { + if (choice == OBJ_SEQDESC) { + sdp = (SeqDescrPtr) data; + if (sdp->extended) { + ovp = (ObjValNodePtr) sdp; + if (ovp->idx.parenttype == OBJ_BIOSEQSET && ovp->idx.parentptr != NULL) { + rval = DoesSequenceInSetMatchSequenceConstraint ((BioseqSetPtr) ovp->idx.parentptr, constraint); + } + } + } + } else { + rval = DoesSequenceMatchSequenceConstraint (bsp, constraint); } + return rval; } -static CGPSetPtr BuildCGPSetFromCodingRegion (SeqFeatPtr cds, BoolPtr indexing_needed) +NLM_EXTERN CharPtr GetPubFieldLabel (Int4 pub_field) { - SeqMgrFeatContext fcontext; - SeqFeatPtr gene = NULL, mrna, prot; - BioseqPtr protbsp; - CGPSetPtr cdsp; - ProtRefPtr prp; - - if (cds == NULL || cds->data.choice != SEQFEAT_CDREGION) return NULL; - - cdsp = (CGPSetPtr) MemNew (sizeof (CGPSetData)); - ValNodeAddPointer (&(cdsp->cds_list), 0, cds); - - gene = GetGeneForFeature (cds); - if (gene != NULL) - { - ValNodeAddPointer (&(cdsp->gene_list), 0, gene); - /* mark gene, so that we'll know it isn't lonely */ - gene->idx.deleteme = TRUE; - } - - mrna = SeqMgrGetOverlappingmRNA (cds->location, &fcontext); - if (mrna != NULL) - { - ValNodeAddPointer (&(cdsp->mrna_list), 0, mrna); - /* mark mrna, so that we'll know it's already in a set */ - mrna->idx.deleteme = TRUE; + CharPtr rval = NULL; + switch (pub_field) { + case Publication_field_cit: + rval = "citation"; + break; + case Publication_field_authors: + rval = "authors"; + break; + case Publication_field_journal: + rval = "journal"; + break; + case Publication_field_volume: + rval = "volume"; + break; + case Publication_field_issue: + rval = "issue"; + break; + case Publication_field_pages: + rval = "pages"; + break; + case Publication_field_date: + rval = "date"; + break; + case Publication_field_serial_number: + rval = "serial number"; + break; + case Publication_field_title: + rval = "title"; + break; + case Publication_field_affiliation: + rval = "affiliation"; + break; + case Publication_field_affil_div: + rval = "department"; + break; + case Publication_field_affil_city: + rval = "city"; + break; + case Publication_field_affil_sub: + rval = "state"; + break; + case Publication_field_affil_country: + rval = "country"; + break; + case Publication_field_affil_street: + rval = "street"; + break; + case Publication_field_affil_email: + rval = "email"; + break; + case Publication_field_affil_fax: + rval = "fax"; + break; + case Publication_field_affil_phone: + rval = "phone"; + break; + case Publication_field_affil_zipcode: + rval = "postal code"; + break; } - - if (cds->product != NULL) - { - protbsp = BioseqFindFromSeqLoc (cds->product); - if (protbsp != NULL) - { - prot = SeqMgrGetNextFeature (protbsp, NULL, SEQFEAT_PROT, FEATDEF_PROT, &fcontext); - /* if there is no full-length protein feature, make one */ - if (prot == NULL) - { - prp = ProtRefNew (); - prot = CreateNewFeatureOnBioseq (protbsp, SEQFEAT_PROT, NULL); - if (prot != NULL) - { - prot->data.value.ptrvalue = prp; - if (indexing_needed != NULL) - { - *indexing_needed = TRUE; - } - } - } - if (prot != NULL) - { - ValNodeAddPointer (&(cdsp->prot_list), 0, prot); - } - - /* also add in mat_peptides from protein feature */ - prot = SeqMgrGetNextFeature (protbsp, NULL, SEQFEAT_PROT, FEATDEF_mat_peptide_aa, &fcontext); - while (prot != NULL) - { - ValNodeAddPointer (&(cdsp->prot_list), 0, prot); - prot = SeqMgrGetNextFeature (protbsp, prot, SEQFEAT_PROT, FEATDEF_mat_peptide_aa, &fcontext); - } - } - } - return cdsp; + return rval; } -static CGPSetPtr BuildCGPSetFrommRNA (SeqFeatPtr mrna) +NLM_EXTERN ValNodePtr GetPubFieldList (void) { - SeqFeatPtr gene; - CGPSetPtr cdsp; - - if (mrna == NULL || mrna->idx.deleteme || mrna->idx.subtype != FEATDEF_mRNA) return NULL; - - cdsp = (CGPSetPtr) MemNew (sizeof (CGPSetData)); - ValNodeAddPointer (&(cdsp->mrna_list), 0, mrna); + ValNodePtr val_list = NULL; - gene = GetGeneForFeature (mrna); - if (gene != NULL) - { - ValNodeAddPointer (&(cdsp->gene_list), 0, gene); - /* mark gene, so that we'll know it isn't lonely */ - gene->idx.deleteme = TRUE; - } + ValNodeAddPointer (&val_list, Publication_field_title, StringSave ("title")); + ValNodeAddPointer (&val_list, Publication_field_authors, StringSave ("authors")); + ValNodeAddPointer (&val_list, Publication_field_journal, StringSave ("journal")); + ValNodeAddPointer (&val_list, Publication_field_issue, StringSave ("issue")); + ValNodeAddPointer (&val_list, Publication_field_pages, StringSave ("pages")); + ValNodeAddPointer (&val_list, Publication_field_serial_number, StringSave ("serial number")); + ValNodeAddPointer (&val_list, Publication_field_date, StringSave ("date")); + ValNodeAddPointer (&val_list, Publication_field_cit, StringSave ("citation")); + ValNodeAddPointer (&val_list, Publication_field_affiliation, StringSave ("affiliation")); + ValNodeAddPointer (&val_list, Publication_field_affil_div, StringSave ("department")); + ValNodeAddPointer (&val_list, Publication_field_affil_city, StringSave ("city")); + ValNodeAddPointer (&val_list, Publication_field_affil_sub, StringSave ("state")); + ValNodeAddPointer (&val_list, Publication_field_affil_country, StringSave ("country")); + ValNodeAddPointer (&val_list, Publication_field_affil_street, StringSave ("street")); + ValNodeAddPointer (&val_list, Publication_field_affil_email, StringSave ("email")); + ValNodeAddPointer (&val_list, Publication_field_affil_fax, StringSave ("fax")); + ValNodeAddPointer (&val_list, Publication_field_affil_phone, StringSave ("phone")); + ValNodeAddPointer (&val_list, Publication_field_affil_zipcode, StringSave ("postal code")); - return cdsp; + return val_list; } -static CGPSetPtr BuildCGPSetFromGene (SeqFeatPtr gene) +static ValNodePtr MakePubFieldTypeList (void) { - CGPSetPtr cdsp; + ValNodePtr field_list = NULL; - if (gene == NULL || gene->idx.deleteme || gene->idx.subtype != FEATDEF_GENE) { - return NULL; - } + ValNodeAddInt (&field_list, FieldType_pub, Publication_field_title); + ValNodeAddInt (&field_list, FieldType_pub, Publication_field_authors); + ValNodeAddInt (&field_list, FieldType_pub, Publication_field_journal); + ValNodeAddInt (&field_list, FieldType_pub, Publication_field_issue); + ValNodeAddInt (&field_list, FieldType_pub, Publication_field_pages); + ValNodeAddInt (&field_list, FieldType_pub, Publication_field_serial_number); + ValNodeAddInt (&field_list, FieldType_pub, Publication_field_date); + ValNodeAddInt (&field_list, FieldType_pub, Publication_field_cit); + ValNodeAddInt (&field_list, FieldType_pub, Publication_field_affiliation); + ValNodeAddInt (&field_list, FieldType_pub, Publication_field_affil_div); + ValNodeAddInt (&field_list, FieldType_pub, Publication_field_affil_city); + ValNodeAddInt (&field_list, FieldType_pub, Publication_field_affil_sub); + ValNodeAddInt (&field_list, FieldType_pub, Publication_field_affil_country); + ValNodeAddInt (&field_list, FieldType_pub, Publication_field_affil_street); + ValNodeAddInt (&field_list, FieldType_pub, Publication_field_affil_email); + ValNodeAddInt (&field_list, FieldType_pub, Publication_field_affil_fax); + ValNodeAddInt (&field_list, FieldType_pub, Publication_field_affil_phone); + ValNodeAddInt (&field_list, FieldType_pub, Publication_field_affil_zipcode); - cdsp = CGPSetNew (); - ValNodeAddPointer (&(cdsp->gene_list), 0, gene); - return cdsp; + return field_list; } -static void UnmarkFeatureList (ValNodePtr list) +NLM_EXTERN Boolean IsPublicationConstraintEmpty (PublicationConstraintPtr constraint) { - SeqFeatPtr sfp; + Boolean rval = FALSE; - while (list != NULL) - { - sfp = list->data.ptrvalue; - if (sfp != NULL) - { - sfp->idx.deleteme = FALSE; - } - list = list->next; + if (constraint == NULL + || (constraint->type == Pub_type_any + && (constraint->field == NULL + || IsStringConstraintEmpty (constraint->field->constraint)) + && (constraint->special_field == NULL + || constraint->special_field->constraint == NULL))) { + rval = TRUE; } + return rval; } -static void -AdjustCGPObjectListForMatPeptides -(ValNodePtr PNTR cgp_list, - FieldTypePtr field1, - FieldTypePtr field2, - ConstraintChoiceSetPtr constraints) +NLM_EXTERN Int4 GetPubMLStatus (PubPtr the_pub) { - ConstraintChoiceSetPtr mat_peptide_constraints = NULL; - ValNodePtr vnp, vnp_prev, vnp_next; - ValNodePtr m_vnp, m_vnp_prev, m_vnp_next, mat_peptide_list; - CGPSetPtr cdsp; - SeqFeatPtr sfp; - - if (cgp_list == NULL - || *cgp_list == NULL - || constraints == NULL - || (field1 == NULL && field2 == NULL) /* no fields specified */ - || (!IsFieldTypeMatPeptideRelated (field1) && !IsFieldTypeMatPeptideRelated(field2))) { - return; + CitGenPtr cgp; + CitSubPtr csp; + CitArtPtr cap; + CitBookPtr cbp; + CitJourPtr cjp; + ImprintPtr imp = NULL; + Int4 status = Pub_type_any; + + if (the_pub == NULL || the_pub->data.ptrvalue == NULL) + { + return Pub_type_any; } - - /* get list of constraints that apply to mat-peptide features */ - while (constraints != NULL) { - if (IsConstraintChoiceMatPeptideRelated (constraints)) { - ValNodeLink (&mat_peptide_constraints, AsnIoMemCopy (constraints, (AsnReadFunc) ConstraintChoiceAsnRead, (AsnWriteFunc) ConstraintChoiceAsnWrite)); - } - constraints = constraints->next; - } - if (mat_peptide_constraints == NULL) { - return; - } - - /* if both fields are mat-peptide related, or one is mat-peptide related and the other is NULL, - * convert sets to lists of mat-peptide features - * otherwise just remove mat-peptide features from the prot-list that do not match the constraints. - */ - if ((field1 != NULL && !IsFieldTypeMatPeptideRelated (field1)) - || (field2 != NULL && !IsFieldTypeMatPeptideRelated (field2))) { - for (vnp = *cgp_list; vnp != NULL; vnp = vnp->next) { - if (vnp->choice == 0) { - cdsp = (CGPSetPtr) vnp->data.ptrvalue; - m_vnp_prev = NULL; - for (m_vnp = cdsp->prot_list; m_vnp != NULL; m_vnp = m_vnp_next) { - m_vnp_next = m_vnp->next; - sfp = m_vnp->data.ptrvalue; - if (sfp == NULL - || (sfp->idx.subtype == FEATDEF_mat_peptide_aa - && !DoesObjectMatchConstraintChoiceSet (OBJ_SEQFEAT, sfp, mat_peptide_constraints))) { - if (m_vnp_prev == NULL) { - cdsp->prot_list = m_vnp->next; - } else { - m_vnp_prev->next = m_vnp->next; - } - m_vnp->next = NULL; - m_vnp = ValNodeFree (m_vnp); - } else { - m_vnp_prev = m_vnp; - } - } + + switch (the_pub->choice) + { + case PUB_Gen : + cgp = (CitGenPtr) the_pub->data.ptrvalue; + if (cgp->cit != NULL && StringICmp (cgp->cit, "unpublished") == 0) + { + status = Pub_type_unpublished; } - } - } else { - vnp_prev = NULL; - for (vnp = *cgp_list; vnp != NULL; vnp = vnp_next) { - vnp_next = vnp->next; - if (vnp->choice == 0) { - mat_peptide_list = NULL; - cdsp = (CGPSetPtr) vnp->data.ptrvalue; - for (m_vnp = cdsp->prot_list; m_vnp != NULL; m_vnp = m_vnp->next) { - sfp = m_vnp->data.ptrvalue; - if (sfp->idx.subtype == FEATDEF_mat_peptide_aa - && DoesObjectMatchConstraintChoiceSet (OBJ_SEQFEAT, sfp, mat_peptide_constraints)) { - ValNodeAddPointer (&mat_peptide_list, OBJ_SEQFEAT, sfp); - } - } - if (mat_peptide_list == NULL) { - if (vnp_prev == NULL) { - *cgp_list = vnp->next; - } else { - vnp_prev->next = vnp->next; - } - vnp->next = NULL; - vnp = FreeObjectList (vnp); - } else { - m_vnp = mat_peptide_list; - while (m_vnp->next != NULL) { - m_vnp = m_vnp->next; - } - if (vnp_prev == NULL) { - *cgp_list = mat_peptide_list; - } else { - vnp_prev->next = mat_peptide_list; - } - m_vnp->next = vnp_next; - vnp_prev = m_vnp; - vnp->next = NULL; - vnp = FreeObjectList (vnp); + else + { + status = Pub_type_published; + } + break; + case PUB_Sub : + csp = (CitSubPtr) the_pub->data.ptrvalue; + status = Pub_type_submitter_block; + break; + case PUB_Article : + cap = (CitArtPtr) the_pub->data.ptrvalue; + if (cap->from == 1) + { + cjp = (CitJourPtr) cap->fromptr; + if (cjp != NULL) + { + imp = cjp->imp; } - } else { - vnp_prev = vnp; } - } + else if (cap->from == 2 || cap->from == 3) + { + cbp = (CitBookPtr) cap->fromptr; + if (cbp != NULL) { + imp = cbp->imp; + } + } + break; + case PUB_Journal : + cjp = (CitJourPtr) the_pub->data.ptrvalue; + imp = cjp->imp; + case PUB_Book : + case PUB_Man : + cbp = (CitBookPtr) the_pub->data.ptrvalue; + imp = cbp->imp; + break; + case PUB_Patent : + status = Pub_type_published; + break; + default : + break; + } - mat_peptide_constraints = ConstraintChoiceSetFree (mat_peptide_constraints); -} - - -static ValNodePtr BuildCGPSetList (Uint2 entityID, AECRActionPtr act) -{ - SeqEntryPtr sep; - BuildCGPSetData b; - CGPSetPtr cdsp; - ValNodePtr vnp, vnp_next, vnp_prev; - ValNodePtr cdset_list = NULL; - SeqFeatPtr cds, gene, mrna; - Boolean need_indexing = FALSE; - ApplyActionPtr a; - EditActionPtr e; - ConvertActionPtr c; - CopyActionPtr cp; - SwapActionPtr s; - AECRParseActionPtr pa; - RemoveActionPtr r; - FieldTypePtr field_from, field_to; - - sep = GetTopSeqEntryForEntityID (entityID); - - b.cds_list = NULL; - b.gene_list = NULL; - b.mrna_list = NULL; - - VisitFeaturesInSep (sep, &b, BuildCGPSetCallback); - - /* build cdsets that have coding regions */ - for (vnp = b.cds_list; vnp != NULL; vnp = vnp->next) + if (imp != NULL) { - cds = (SeqFeatPtr) vnp->data.ptrvalue; - if (cds == NULL) continue; - cdsp = BuildCGPSetFromCodingRegion (cds, &need_indexing); - if (cdsp != NULL) + if (imp->prepub == 0) { - ValNodeAddPointer (&cdset_list, 0, cdsp); + status = Pub_type_published; } - } - if (need_indexing) - { - /* indexing because we have created full-length protein features */ - SeqMgrIndexFeatures (entityID, NULL); - } - - /* build cdsets for mrna features that don't have coding regions */ - for (vnp = b.mrna_list; vnp != NULL; vnp = vnp->next) - { - mrna = (SeqFeatPtr) vnp->data.ptrvalue; - if (mrna == NULL || mrna->idx.deleteme) continue; - cdsp = BuildCGPSetFrommRNA (mrna); - if (cdsp != NULL) + else if (imp->prepub == 2) { - ValNodeAddPointer (&cdset_list, 0, cdsp); + status = Pub_type_in_press; } - } - - /* build cdsets for lonely genes / features with gene xrefs that are not coding regions or mrnas */ - for (vnp = b.gene_list; vnp != NULL; vnp = vnp->next) - { - gene = (SeqFeatPtr) vnp->data.ptrvalue; - if (gene == NULL || gene->idx.deleteme) continue; - cdsp = BuildCGPSetFromGene (gene); - if (cdsp != NULL) { - ValNodeAddPointer (&cdset_list, 0, cdsp); + else if (imp->prepub == 1 && the_pub->choice == PUB_Sub) + { + status = Pub_type_submitter_block; + } + else + { + status = Pub_type_unpublished; } + } + return status; +} - /* now unmark features */ - UnmarkFeatureList (b.cds_list); - UnmarkFeatureList (b.mrna_list); - UnmarkFeatureList (b.gene_list); - b.cds_list = ValNodeFree (b.cds_list); - b.mrna_list = ValNodeFree (b.mrna_list); - b.gene_list = ValNodeFree (b.gene_list); +static Boolean DoesPubFieldMatch (PubdescPtr pdp, PubFieldConstraintPtr field) +{ + Boolean rval = FALSE, match_all = TRUE; + PubPtr pub; + CharPtr tmp; - /* now remove sets that don't match our choice constraint */ - if (act != NULL && act->constraint != NULL) { - vnp_prev = NULL; - for (vnp = cdset_list; vnp != NULL; vnp = vnp_next) - { - vnp_next = vnp->next; - if (!DoesObjectMatchConstraintChoiceSet (0, vnp->data.ptrvalue, act->constraint)) - { - if (vnp_prev == NULL) - { - cdset_list = vnp->next; - } - else - { - vnp_prev->next = vnp->next; - } - vnp->next = NULL; - FreeCGPSetList (vnp); + if (pdp == NULL) return FALSE; + if (field == NULL) return TRUE; + + if (field->constraint->not_present) { + match_all = TRUE; + for (pub = pdp->pub; pub != NULL && match_all; pub = pub->next) { + tmp = GetPubFieldFromPub (pub, field->field, NULL); + if (!DoesStringMatchConstraint (tmp, field->constraint)) { + match_all = FALSE; } - else - { - vnp_prev = vnp; + tmp = MemFree (tmp); + } + rval = match_all; + } else { + for (pub = pdp->pub; pub != NULL && !rval; pub = pub->next) { + tmp = GetPubFieldFromPub (pub, field->field, field->constraint); + if (tmp != NULL) { + rval = TRUE; } + tmp = MemFree (tmp); } } - - /* adjust if action fields are mat-peptide specific */ - if (act != NULL && act->action != NULL && act->action->data.ptrvalue != NULL) { - switch (act->action->choice) { - case ActionChoice_apply: - a = (ApplyActionPtr) act->action->data.ptrvalue; - AdjustCGPObjectListForMatPeptides (&cdset_list, a->field, NULL, act->constraint); - break; - case ActionChoice_edit: - e = (EditActionPtr) act->action->data.ptrvalue; - AdjustCGPObjectListForMatPeptides (&cdset_list, e->field, NULL, act->constraint); - break; - case ActionChoice_convert: - c = (ConvertActionPtr) act->action->data.ptrvalue; - field_from = GetFromFieldFromFieldPair (c->fields); - field_to = GetToFieldFromFieldPair (c->fields); - AdjustCGPObjectListForMatPeptides (&cdset_list, field_from, field_to, act->constraint); - field_from = FieldTypeFree (field_from); - field_to = FieldTypeFree (field_to); - break; - case ActionChoice_copy: - cp = (CopyActionPtr) act->action->data.ptrvalue; - field_from = GetFromFieldFromFieldPair (cp->fields); - field_to = GetToFieldFromFieldPair (cp->fields); - AdjustCGPObjectListForMatPeptides (&cdset_list, field_from, field_to, act->constraint); - field_from = FieldTypeFree (field_from); - field_to = FieldTypeFree (field_to); - break; - case ActionChoice_swap: - s = (SwapActionPtr) act->action->data.ptrvalue; - field_from = GetFromFieldFromFieldPair (s->fields); - field_to = GetToFieldFromFieldPair (s->fields); - AdjustCGPObjectListForMatPeptides (&cdset_list, field_from, field_to, act->constraint); - field_from = FieldTypeFree (field_from); - field_to = FieldTypeFree (field_to); - break; - case ActionChoice_remove: - r = (RemoveActionPtr) act->action->data.ptrvalue; - AdjustCGPObjectListForMatPeptides (&cdset_list, r->field, NULL, act->constraint); - break; - case ActionChoice_parse: - pa = (AECRParseActionPtr) act->action->data.ptrvalue; - field_from = GetFromFieldFromFieldPair (pa->fields); - field_to = GetToFieldFromFieldPair (pa->fields); - AdjustCGPObjectListForMatPeptides (&cdset_list, field_from, field_to, act->constraint); - field_from = FieldTypeFree (field_from); - field_to = FieldTypeFree (field_to); - break; - } - } - return cdset_list; -} + return rval; +} -static void AlsoChangeMrnaForObject (Uint1 choice, Pointer data) +static Boolean DoesPubFieldSpecialMatch (PubdescPtr pdp, PubFieldSpecialConstraintPtr field) { - CharPtr str; - SeqFeatPtr sfp, mrna; - SeqMgrFeatContext context; - FeatureField f; + Boolean rval = FALSE; + PubPtr pub; + CharPtr tmp; - if (choice == 0) { - str = GetFieldValueFromCGPSet (data, CDSGeneProt_field_prot_name, NULL); - SetFieldValueInCGPSet (data, CDSGeneProt_field_mrna_product, NULL, str, ExistingTextOption_replace_old); - str = MemFree (str); - } else if (choice == OBJ_SEQFEAT) { - sfp = (SeqFeatPtr) data; - if (sfp != NULL && sfp->data.choice == SEQFEAT_CDREGION) { - mrna = SeqMgrGetOverlappingmRNA (sfp->location, &context); - if (mrna != NULL) { - f.type = Feature_type_cds; - f.field = ValNodeNew(NULL); - f.field->next = NULL; - f.field->choice = FeatQualChoice_legal_qual; - f.field->data.intvalue = Feat_qual_legal_product; - str = GetQualFromFeature (sfp, &f, NULL); - f.type = Feature_type_mRNA; - SetQualOnFeature (mrna, &f, NULL, str, ExistingTextOption_replace_old); - str = MemFree (str); - f.field = ValNodeFree (f.field); + if (pdp == NULL) return FALSE; + if (field == NULL) return TRUE; + + if (field->constraint->choice == PubFieldSpecialConstraintType_is_present) { + for (pub = pdp->pub; pub != NULL && !rval; pub = pub->next) { + tmp = GetPubFieldFromPub (pub, field->field, NULL); + if (!StringHasNoText (tmp)) { + /* at least one is present and non-empty */ + rval = TRUE; } - } + tmp = MemFree (tmp); + } + } else if (field->constraint->choice == PubFieldSpecialConstraintType_is_not_present) { + rval = TRUE; + for (pub = pdp->pub; pub != NULL && rval; pub = pub->next) { + tmp = GetPubFieldFromPub (pub, field->field, NULL); + if (!StringHasNoText (tmp)) { + /* at least one is present and non-empty */ + rval = FALSE; + } + tmp = MemFree (tmp); + } + } else if (field->constraint->choice == PubFieldSpecialConstraintType_is_all_caps) { + rval = TRUE; + for (pub = pdp->pub; pub != NULL && rval; pub = pub->next) { + tmp = GetPubFieldFromPub (pub, field->field, NULL); + if (tmp != NULL && !IsAllCaps (tmp)) { + /* at least one is not all caps */ + rval = FALSE; + } + tmp = MemFree (tmp); + } + } else if (field->constraint->choice == PubFieldSpecialConstraintType_is_all_lower) { + rval = TRUE; + for (pub = pdp->pub; pub != NULL && rval; pub = pub->next) { + tmp = GetPubFieldFromPub (pub, field->field, NULL); + if (tmp != NULL && !IsAllLowerCase (tmp)) { + /* at least one is not all caps */ + rval = FALSE; + } + tmp = MemFree (tmp); + } + } else if (field->constraint->choice == PubFieldSpecialConstraintType_is_all_punct) { + rval = TRUE; + for (pub = pdp->pub; pub != NULL && rval; pub = pub->next) { + tmp = GetPubFieldFromPub (pub, field->field, NULL); + if (tmp != NULL && !IsAllPunctuation (tmp)) { + /* at least one is not all punctuation */ + rval = FALSE; + } + tmp = MemFree (tmp); + } } + + return rval; } -NLM_EXTERN Int4 DoApplyActionToObjectListEx (ApplyActionPtr action, ValNodePtr object_list, Boolean also_change_mrna, StringConstraintPtr scp, BatchExtraPtr batch_extra) +static Boolean DoesPubMatchPublicationConstraint (PubdescPtr pdp, PublicationConstraintPtr constraint) { - ValNodePtr vnp; - Int4 num_succeed = 0, num_fail = 0; + Boolean type_ok = TRUE, rval = FALSE; + PubPtr pub; - if (action == NULL || object_list == NULL) return 0; + if (pdp == NULL) return FALSE; + if (IsPublicationConstraintEmpty (constraint)) return TRUE; - for (vnp = object_list; vnp != NULL; vnp = vnp->next) { - if (SetFieldValueForObjectEx (vnp->choice, vnp->data.ptrvalue, action->field, scp, action->value, action->existing_text, batch_extra)) { - if (also_change_mrna) { - AlsoChangeMrnaForObject (vnp->choice, vnp->data.ptrvalue); + if (constraint->type != Pub_type_any) { + type_ok = FALSE; + for (pub = pdp->pub; pub != NULL && !type_ok; pub = pub->next) { + if (GetPubMLStatus (pub) == constraint->type) { + type_ok = TRUE; } - num_succeed ++; - } else { - num_fail++; } } - - return num_succeed; -} - - -NLM_EXTERN Int4 DoApplyActionToObjectList (ApplyActionPtr action, ValNodePtr object_list, Boolean also_change_mrna, StringConstraintPtr scp) -{ - return DoApplyActionToObjectListEx (action, object_list, also_change_mrna, scp, NULL); + if (type_ok) { + rval = (constraint->field == NULL || DoesPubFieldMatch (pdp, constraint->field)) + && (constraint->special_field == NULL || DoesPubFieldSpecialMatch (pdp, constraint->special_field)); + } + return rval; } -NLM_EXTERN Int4 DoEditActionToObjectListEx (EditActionPtr action, ValNodePtr object_list, Boolean also_change_mrna, BatchExtraPtr batch_extra) +static Boolean DoesObjectMatchPublicationConstraint (Uint1 choice, Pointer data, PublicationConstraintPtr constraint) { - ValNodePtr vnp; - Int4 num_succeed = 0, num_fail = 0; - StringConstraintPtr scp; - CharPtr str, new_str; - - if (action == NULL || object_list == NULL) return 0; + Boolean rval = TRUE; + SeqFeatPtr sfp; + SeqDescrPtr sdp; - scp = StringConstraintFromFieldEdit (action->edit); + if (data == NULL) return FALSE; + if (IsPublicationConstraintEmpty (constraint)) return TRUE; - for (vnp = object_list; vnp != NULL; vnp = vnp->next) { - str = GetFieldValueForObjectEx (vnp->choice, vnp->data.ptrvalue, action->field, scp, batch_extra); - new_str = ApplyEditToString (str, action->edit); - if (StringCmp (str, new_str) != 0 - && SetFieldValueForObjectEx (vnp->choice, vnp->data.ptrvalue, action->field, scp, new_str, ExistingTextOption_replace_old, batch_extra)) { - if (also_change_mrna) { - AlsoChangeMrnaForObject (vnp->choice, vnp->data.ptrvalue); + switch (choice) { + case OBJ_SEQFEAT: + sfp = (SeqFeatPtr) data; + if (sfp->data.choice == SEQFEAT_PUB) { + rval = DoesPubMatchPublicationConstraint (sfp->data.value.ptrvalue, constraint); } - num_succeed ++; - } else { - num_fail++; - } - new_str = MemFree (new_str); - str = MemFree (str); + break; + case OBJ_SEQDESC: + sdp = (SeqDescrPtr) data; + if (sdp->choice == Seq_descr_pub) { + rval = DoesPubMatchPublicationConstraint (sdp->data.ptrvalue, constraint); + } + break; } - return num_succeed; + return rval; } -NLM_EXTERN Int4 DoEditActionToObjectList (EditActionPtr action, ValNodePtr object_list, Boolean also_change_mrna) +NLM_EXTERN Boolean IsFieldConstraintEmpty (FieldConstraintPtr constraint) { - return DoEditActionToObjectListEx (action, object_list, also_change_mrna, NULL); -} + RnaQualPtr rq; + FeatureFieldPtr ffp; -static Boolean IsFieldTypeProteinDesc (FieldTypePtr ft); + if (constraint == NULL || constraint->field == NULL || IsStringConstraintEmpty (constraint->string_constraint)) { + return TRUE; + } else if (constraint->field->choice == FieldType_rna_field + && ((rq = (RnaQualPtr)constraint->field->data.ptrvalue) == NULL + || rq->type == NULL)) { + return TRUE; + } else if (constraint->field->choice == FieldType_feature_field + && (ffp = (FeatureFieldPtr)constraint->field->data.ptrvalue) == NULL) { + return TRUE; + } else { + return FALSE; + } +} -NLM_EXTERN Int4 DoConvertActionToObjectListEx (ConvertActionPtr action, ValNodePtr object_list, Boolean also_change_mrna, StringConstraintPtr scp, BatchExtraPtr batch_extra) +static Boolean DoesObjectMatchFeatureFieldConstraint (Uint1 choice, Pointer data, FeatureFieldPtr ffp, StringConstraintPtr string_constraint) { - ValNodePtr vnp; - Int4 num_succeed = 0, num_fail = 0; - CharPtr str, from_val, field_name = NULL; - FieldTypePtr field_from, field_to; - StringConstraint remove_constraint; - Boolean fail; - - if (action == NULL || object_list == NULL || action->fields == NULL) return 0; - - field_from = GetFromFieldFromFieldPair (action->fields); - field_to = GetToFieldFromFieldPair (action->fields); + Boolean rval = FALSE; + CharPtr str; + BioseqPtr bsp; + Int4 subtype; + SeqFeatPtr sfp; + SeqMgrFeatContext fcontext; + Boolean not_present; + CGPSetPtr cgp; + Uint2 cds_gene_prot_field; - if (action->strip_name) { - field_name = SummarizeFieldType (field_to); + if (data == NULL) { + return FALSE; } - - if (action->fields->choice == FieldPairType_molinfo_field) { - for (vnp = object_list; vnp != NULL; vnp = vnp->next) { - str = GetFieldValueForObjectEx (vnp->choice, vnp->data.ptrvalue, field_from, NULL, batch_extra); - from_val = GetSequenceQualValName (field_from->data.ptrvalue); - if (StringCmp (str, from_val) == 0 - && SetFieldValueForObjectEx (vnp->choice, vnp->data.ptrvalue, field_to, NULL, str, ExistingTextOption_replace_old, batch_extra)) { - num_succeed ++; + if (IsStringConstraintEmpty (string_constraint)) { + return TRUE; + } + + switch (choice) { + case OBJ_SEQFEAT: + not_present = string_constraint->not_present; + string_constraint->not_present = FALSE; + str = GetQualFromFeature ((SeqFeatPtr) data, ffp, string_constraint); + if (str != NULL) { + rval = TRUE; + str = MemFree (str); } - str = MemFree (str); - } - } else { - for (vnp = object_list; vnp != NULL; vnp = vnp->next) { - /* there may be multiple qualifiers */ - remove_constraint.case_sensitive = TRUE; - remove_constraint.match_location = String_location_equals; - remove_constraint.not_present = FALSE; - remove_constraint.whole_word = FALSE; - fail = FALSE; - - while ((str = GetFieldValueForObjectEx (vnp->choice, vnp->data.ptrvalue, field_from, scp, batch_extra)) != NULL && !fail) { - remove_constraint.match_text = StringSave (str); - if (action->strip_name) { - RemoveFieldNameFromString (field_name, str); - } - FixCapitalizationInString(&str, action->capitalization, NULL); - - if (SetFieldValueForObjectEx (vnp->choice, vnp->data.ptrvalue, field_to, NULL, str, action->existing_text, batch_extra) - && (action->keep_original || RemoveFieldValueForObject (vnp->choice, vnp->data.ptrvalue, field_from, &remove_constraint))) { - if (also_change_mrna) { - AlsoChangeMrnaForObject (vnp->choice, vnp->data.ptrvalue); + if (not_present) { + rval = !rval; + string_constraint->not_present = TRUE; + } + break; + case OBJ_SEQDESC: + case OBJ_BIOSEQ: + bsp = GetSequenceForObject (choice, data); + if (bsp != NULL) { + subtype = GetFeatdefFromFeatureType (ffp->type); + not_present = string_constraint->not_present; + string_constraint->not_present = FALSE; + for (sfp = SeqMgrGetNextFeature (bsp, NULL, 0, subtype, &fcontext); + !rval && sfp != NULL; + sfp = SeqMgrGetNextFeature (bsp, sfp, 0, subtype, &fcontext)) { + str = GetQualFromFeature (sfp, ffp, string_constraint); + if (str != NULL) { + rval = TRUE; + str = MemFree (str); } - num_succeed ++; - } else { - num_fail++; - fail = TRUE; } - str = MemFree (str); - remove_constraint.match_text = MemFree (remove_constraint.match_text); - if (action->keep_original) { - break; + if (not_present) { + rval = !rval; + string_constraint->not_present = TRUE; } } - } + break; + case 0: + cgp = (CGPSetPtr) data; + cds_gene_prot_field = CDSGeneProtFieldFromFeatureField (ffp); + if (cds_gene_prot_field > 0) { + not_present = string_constraint->not_present; + string_constraint->not_present = FALSE; + str = GetFieldValueFromCGPSet (cgp, cds_gene_prot_field, string_constraint); + if (str != NULL) { + rval = TRUE; + str = MemFree (str); + } + if (not_present) { + rval = !rval; + string_constraint->not_present = TRUE; + } + } + break; } - - field_from = FieldTypeFree (field_from); - field_to = FieldTypeFree (field_to); - field_name = MemFree (field_name); - - return num_succeed; + return rval; } -NLM_EXTERN Int4 DoConvertActionToObjectList (ConvertActionPtr action, ValNodePtr object_list, Boolean also_change_mrna, StringConstraintPtr scp) +static Boolean DoesObjectMatchRnaQualConstraint (Uint1 choice, Pointer data, RnaQualPtr rq, StringConstraintPtr string_constraint) { - return DoConvertActionToObjectListEx (action, object_list, also_change_mrna, scp, NULL); + Boolean rval = FALSE; + CharPtr str; + BioseqPtr bsp; + Int4 subtype; + SeqFeatPtr sfp; + SeqMgrFeatContext fcontext; + Boolean not_present; + Uint1 feat_choice = 0; + + if (data == NULL) { + return FALSE; + } + if (IsStringConstraintEmpty (string_constraint)) { + return TRUE; + } + + switch (choice) { + case OBJ_SEQFEAT: + not_present = string_constraint->not_present; + string_constraint->not_present = FALSE; + str = GetRNAQualFromFeature ((SeqFeatPtr) data, rq, string_constraint, NULL); + if (str != NULL) { + rval = TRUE; + str = MemFree (str); + } + if (not_present) { + rval = !rval; + string_constraint->not_present = TRUE; + } + break; + case OBJ_SEQDESC: + case OBJ_BIOSEQ: + bsp = GetSequenceForObject (choice, data); + if (bsp != NULL) { + if (rq->type == NULL || rq->type->choice == RnaFeatType_any) { + feat_choice = SEQFEAT_RNA; + subtype = 0; + } else { + feat_choice = 0; + subtype = GetFeatdefFromFeatureType(GetFeatureTypeForRnaType(rq->type->choice)); + } + + not_present = string_constraint->not_present; + string_constraint->not_present = FALSE; + for (sfp = SeqMgrGetNextFeature (bsp, NULL, feat_choice, subtype, &fcontext); + !rval && sfp != NULL; + sfp = SeqMgrGetNextFeature (bsp, sfp, feat_choice, subtype, &fcontext)) { + str = GetRNAQualFromFeature (sfp, rq, string_constraint, NULL); + if (str != NULL) { + rval = TRUE; + str = MemFree (str); + } + } + if (not_present) { + rval = !rval; + string_constraint->not_present = TRUE; + } + } + break; + } + return rval; } -NLM_EXTERN Int4 DoCopyActionToObjectListEx (CopyActionPtr action, ValNodePtr object_list, Boolean also_change_mrna, StringConstraintPtr scp, BatchExtraPtr batch_extra) +static Boolean DoesObjectMatchFieldConstraint (Uint1 choice, Pointer data, FieldConstraintPtr constraint) { - ValNodePtr vnp; - Int4 num_succeed = 0, num_fail = 0; - CharPtr str; - FieldTypePtr field_from, field_to; + Boolean rval = FALSE; + BioSourcePtr biop; + BioseqPtr bsp; + CharPtr str; + FeatureFieldPtr ffp; - if (action == NULL || object_list == NULL) return 0; - field_from = GetFromFieldFromFieldPair (action->fields); - field_to = GetToFieldFromFieldPair (action->fields); + if (data == NULL) return FALSE; + if (IsFieldConstraintEmpty (constraint)) { + return TRUE; + } - for (vnp = object_list; vnp != NULL; vnp = vnp->next) { - str = GetFieldValueForObjectEx (vnp->choice, vnp->data.ptrvalue, field_from, scp, batch_extra); - if (SetFieldValueForObjectEx (vnp->choice, vnp->data.ptrvalue, field_to, NULL, str, action->existing_text, batch_extra)) { - if (also_change_mrna) { - AlsoChangeMrnaForObject (vnp->choice, vnp->data.ptrvalue); + switch (constraint->field->choice) { + case FieldType_source_qual: + biop = GetBioSourceFromObject (choice, data); + if (biop != NULL) { + str = GetSourceQualFromBioSource (biop, constraint->field->data.ptrvalue, constraint->string_constraint); + if (str != NULL) { + rval = TRUE; + str = MemFree (str); + } } - num_succeed ++; - } else { - num_fail++; - } - str = MemFree (str); + break; + case FieldType_feature_field: + ffp = (FeatureFieldPtr) constraint->field->data.ptrvalue; + rval = DoesObjectMatchFeatureFieldConstraint (choice, data, ffp, constraint->string_constraint); + break; + case FieldType_rna_field: + rval = DoesObjectMatchRnaQualConstraint (choice, data, constraint->field->data.ptrvalue, constraint->string_constraint); + break; + case FieldType_cds_gene_prot: + ffp = FeatureFieldFromCDSGeneProtField (constraint->field->data.intvalue); + rval = DoesObjectMatchFeatureFieldConstraint (choice, data, ffp, constraint->string_constraint); + ffp = FeatureFieldFree (ffp); + break; + case FieldType_molinfo_field: + bsp = GetSequenceForObject (choice, data); + if (bsp != NULL) { + str = GetSequenceQualFromBioseq (bsp, constraint->field->data.ptrvalue); + if (str == NULL && constraint->string_constraint->not_present) { + rval = TRUE; + } else if (str != NULL && DoesStringMatchConstraint (str, constraint->string_constraint)) { + rval = TRUE; + } + str = MemFree (str); + } + break; + case FieldType_misc: + bsp = GetSequenceForObject (choice, data); + if (bsp != NULL) { + str = GetFieldValueForObjectEx (OBJ_BIOSEQ, bsp, constraint->field, constraint->string_constraint, NULL); + if (str != NULL) { + rval = TRUE; + } + str = MemFree (str); + } + break; +/* TODO LATER */ + case FieldType_pub: + break; } - - field_from = FieldTypeFree (field_from); - field_to = FieldTypeFree (field_to); - return num_succeed; + return rval; } -NLM_EXTERN Int4 DoCopyActionToObjectList (CopyActionPtr action, ValNodePtr object_list, Boolean also_change_mrna, StringConstraintPtr scp) +static CharPtr GetFeatureFieldFromObject (Uint1 choice, Pointer data, FeatureFieldPtr ffp, StringConstraintPtr scp) { - return DoCopyActionToObjectListEx (action, object_list, also_change_mrna, scp, NULL); + CharPtr rval = NULL; + BioseqPtr bsp; + CGPSetPtr cgp; + SeqFeatPtr sfp; + SeqMgrFeatContext fcontext; + Int4 subtype; + Uint2 cds_gene_prot_field; + + if (ffp == NULL || data == NULL) { + return NULL; + } + switch (choice) { + case OBJ_SEQFEAT: + rval = GetQualFromFeature ((SeqFeatPtr) data, ffp, scp); + break; + case OBJ_SEQDESC: + case OBJ_BIOSEQ: + bsp = GetSequenceForObject (choice, data); + if (bsp != NULL) { + subtype = GetFeatdefFromFeatureType (ffp->type); + for (sfp = SeqMgrGetNextFeature (bsp, NULL, 0, subtype, &fcontext); + rval == NULL && sfp != NULL; + sfp = SeqMgrGetNextFeature (bsp, sfp, 0, subtype, &fcontext)) { + rval = GetQualFromFeature (sfp, ffp, scp); + } + } + break; + case 0: + cgp = (CGPSetPtr) data; + cds_gene_prot_field = CDSGeneProtFieldFromFeatureField (ffp); + if (cds_gene_prot_field > 0) { + rval = GetFieldValueFromCGPSet (cgp, cds_gene_prot_field, scp); + } + break; + } + return rval; } -NLM_EXTERN Int4 DoSwapActionToObjectListEx (SwapActionPtr action, ValNodePtr object_list, Boolean also_change_mrna, StringConstraintPtr scp, BatchExtraPtr batch_extra) + +static CharPtr GetConstraintFieldFromObject (Uint1 choice, Pointer data, ValNodePtr field, StringConstraintPtr scp) { - ValNodePtr vnp; - Int4 num_succeed = 0, num_fail = 0; - CharPtr str1, str2; - FieldTypePtr field_from, field_to; + BioSourcePtr biop; + BioseqPtr bsp; + SeqFeatPtr sfp; + SeqMgrFeatContext fcontext; + Int4 subtype; + FeatureFieldPtr ffp; + RnaQualPtr rq; + Uint1 feat_choice = 0; + CharPtr rval = NULL; - if (action == NULL || object_list == NULL) return 0; - field_from = GetFromFieldFromFieldPair (action->fields); - field_to = GetToFieldFromFieldPair (action->fields); + if (data == NULL || field == NULL) { + return NULL; + } - for (vnp = object_list; vnp != NULL; vnp = vnp->next) { - str1 = GetFieldValueForObjectEx (vnp->choice, vnp->data.ptrvalue, field_from, scp, batch_extra); - str2 = GetFieldValueForObjectEx (vnp->choice, vnp->data.ptrvalue, field_to, NULL, batch_extra); - if (SetFieldValueForObjectEx (vnp->choice, vnp->data.ptrvalue, field_to, NULL, str1, ExistingTextOption_replace_old, batch_extra) - && SetFieldValueForObjectEx (vnp->choice, vnp->data.ptrvalue, field_from, scp, str2, ExistingTextOption_replace_old, batch_extra)) { - if (also_change_mrna) { - AlsoChangeMrnaForObject (vnp->choice, vnp->data.ptrvalue); + switch (field->choice) { + case FieldType_source_qual: + biop = GetBioSourceFromObject (choice, data); + if (biop != NULL) { + rval = GetSourceQualFromBioSource (biop, field->data.ptrvalue, scp); } - num_succeed ++; - } else { - num_fail++; - } - str1 = MemFree (str1); - str2 = MemFree (str2); + break; + case FieldType_feature_field: + rval = GetFeatureFieldFromObject(choice, data, (FeatureFieldPtr) field->data.ptrvalue, scp); + break; + case FieldType_rna_field: + rq = (RnaQualPtr) field->data.ptrvalue; + switch (choice) { + case OBJ_SEQFEAT: + rval = GetRNAQualFromFeature ((SeqFeatPtr) data, rq, scp, NULL); + break; + case OBJ_SEQDESC: + case OBJ_BIOSEQ: + bsp = GetSequenceForObject (choice, data); + if (bsp != NULL) { + if (rq->type == NULL || rq->type->choice == RnaFeatType_any) { + feat_choice = SEQFEAT_RNA; + subtype = 0; + } else { + feat_choice = 0; + subtype = GetFeatdefFromFeatureType(GetFeatureTypeForRnaType(rq->type->choice)); + } + + for (sfp = SeqMgrGetNextFeature (bsp, NULL, feat_choice, subtype, &fcontext); + rval == NULL && sfp != NULL; + sfp = SeqMgrGetNextFeature (bsp, sfp, feat_choice, subtype, &fcontext)) { + rval = GetRNAQualFromFeature (sfp, rq, scp, NULL); + } + } + break; + } + break; + case FieldType_cds_gene_prot: + ffp = FeatureFieldFromCDSGeneProtField (field->data.intvalue); + rval = GetFeatureFieldFromObject (choice, data, ffp, scp); + ffp = FeatureFieldFree (ffp); + break; + case FieldType_molinfo_field: + bsp = GetSequenceForObject (choice, data); + if (bsp != NULL) { + rval = GetSequenceQualFromBioseq (bsp, field->data.ptrvalue); + if (rval != NULL && scp != NULL && !DoesStringMatchConstraint (rval, scp)) { + rval = MemFree (rval); + } + } + break; + case FieldType_misc: + bsp = GetSequenceForObject (choice, data); + if (bsp != NULL) { + rval = GetFieldValueForObjectEx (OBJ_BIOSEQ, bsp, field, scp, NULL); + } + break; } - field_from = FieldTypeFree (field_from); - field_to = FieldTypeFree (field_to); - return num_succeed; + + return rval; } -NLM_EXTERN Int4 DoSwapActionToObjectList (SwapActionPtr action, ValNodePtr object_list, Boolean also_change_mrna, StringConstraintPtr scp) +static Boolean DoesObjectMatchFieldMissingConstraint(Uint1 choice, Pointer data, ValNodePtr field) { - return DoSwapActionToObjectListEx (action, object_list, also_change_mrna, scp, NULL); + Boolean rval = FALSE; + CharPtr str; + + if (data == NULL || field == NULL) return FALSE; + + str = GetConstraintFieldFromObject(choice, data, field, NULL); + if (str == NULL) { + rval = TRUE; + } + str = MemFree (str); + return rval; } -NLM_EXTERN Int4 DoRemoveActionToObjectList (RemoveActionPtr action, ValNodePtr object_list, Boolean also_change_mrna, StringConstraintPtr scp) +NLM_EXTERN Boolean IsMolinfoFieldConstraintEmpty (MolinfoFieldConstraintPtr constraint) { - ValNodePtr vnp; - Int4 num_succeed = 0, num_fail = 0; + if (constraint == NULL || constraint->field == NULL) { + return TRUE; + } else { + return FALSE; + } +} - if (action == NULL || object_list == NULL) return 0; - for (vnp = object_list; vnp != NULL; vnp = vnp->next) { - if (RemoveFieldValueForObject (vnp->choice, vnp->data.ptrvalue, action->field, scp)) { - if (also_change_mrna) { - AlsoChangeMrnaForObject (vnp->choice, vnp->data.ptrvalue); - } - num_succeed ++; - } else { - num_fail++; +static Boolean DoesObjectMatchMolinfoFieldConstraint (Uint1 choice, Pointer data, MolinfoFieldConstraintPtr constraint) +{ + BioseqPtr bsp; + MolInfoPtr mip; + Boolean rval = FALSE; + + bsp = GetSequenceForObject (choice, data); + if (bsp == NULL) { + rval = FALSE; + } else if (IsMolinfoFieldConstraintEmpty(constraint)) { + rval = TRUE; + } else { + mip = GetMolInfoForBioseq (bsp); + rval = FALSE; + switch (constraint->field->choice) { + case MolinfoField_molecule: + if (mip == NULL && constraint->field->data.intvalue == 0) { + rval = TRUE; + } else if (mip != NULL && mip->biomol == BiomolFromMoleculeType (constraint->field->data.intvalue)) { + rval = TRUE; + } + break; + case MolinfoField_technique: + if (mip == NULL && constraint->field->data.intvalue == 0) { + rval = TRUE; + } else if (mip != NULL && mip->tech == TechFromTechniqueType (constraint->field->data.intvalue)) { + rval = TRUE; + } + break; + case MolinfoField_completedness: + if (mip == NULL && constraint->field->data.intvalue == 0) { + rval = TRUE; + } else if (mip != NULL && mip->completeness == CompletenessFromCompletednessType (constraint->field->data.intvalue)) { + rval = TRUE; + } + break; + case MolinfoField_mol_class: + if (bsp->mol == MolFromMoleculeClassType (constraint->field->data.intvalue)) { + rval = TRUE; + } + break; + case MolinfoField_topology: + if (bsp->topology == TopologyFromTopologyType (constraint->field->data.intvalue)) { + rval = TRUE; + } + break; + case MolinfoField_strand: + if (bsp->strand == StrandFromStrandType (constraint->field->data.intvalue)) { + rval = TRUE; + } + break; + } + if (constraint->is_not) { + rval = !rval; } } - return num_succeed; + + return rval; } -NLM_EXTERN Int4 DoParseActionToObjectListEx (AECRParseActionPtr action, ValNodePtr object_list, Boolean also_change_mrna, StringConstraintPtr scp, BatchExtraPtr batch_extra) +static Boolean DoesCodingRegionMatchTranslationConstraint (SeqFeatPtr sfp, TranslationConstraintPtr constraint) { - ValNodePtr vnp; - CharPtr str1, str2, cp, tmp; - Int4 len, num_succeed = 0, diff, left_len, right_len; - FieldTypePtr field_from, field_to; + ByteStorePtr trans_prot = NULL; + BioseqPtr actual_prot = NULL; + CharPtr translation = NULL; + Int4 translation_len = 0; + CharPtr actual = NULL; + Int4 actual_len = 0; + CharPtr stop, cp1, cp2; + Boolean rval = TRUE, alt_start = FALSE; + StringConstraintPtr scp; + Int4 pos, comp_len; + Int4 num = 0; - if (action == NULL || object_list == NULL) return 0; - field_from = GetFromFieldFromFieldPair (action->fields); - field_to = GetToFieldFromFieldPair (action->fields); + if (sfp == NULL || sfp->data.choice != SEQFEAT_CDREGION) { + return FALSE; + } else if (constraint == NULL) { + return TRUE; + } - for (vnp = object_list; vnp != NULL; vnp = vnp->next) { - str1 = GetFieldValueForObjectEx (vnp->choice, vnp->data.ptrvalue, field_from, scp, batch_extra); - str2 = GetTextPortionFromString (str1, action->portion); - if (str2 != NULL) { - if (action->remove_from_parsed) { - cp = FindTextPortionLocationInString (str1, action->portion); - if (cp != NULL) { - len = StringLen (str2); - tmp = cp; - if (action->portion != NULL && !IsTextMarkerEmpty (action->portion->left_marker) - && (tmp = FindTextMarker (str1, &left_len, action->portion->left_marker, - action->portion->case_sensitive, action->portion->whole_word)) != NULL) { - if (action->portion->include_left) { - /* adjust */ - } else if (!action->portion->include_left) { - /* adjust */ - if (action->remove_left) { - len += left_len; - } else { - cp += left_len; - } - } - } - if (!IsTextMarkerEmpty (action->portion->right_marker) - && action->remove_right - && !action->portion->include_right - && action->portion != NULL - && (tmp = FindTextMarker (tmp, &right_len, action->portion->right_marker, - action->portion->case_sensitive, action->portion->whole_word)) != NULL) { - diff = right_len; - len += diff; - } - StringCpy (cp, cp + len); - SetFieldValueForObjectEx (vnp->choice, vnp->data.ptrvalue, field_from, scp, str1, ExistingTextOption_replace_old, batch_extra); - } + if (constraint->actual_strings != NULL + || constraint->num_mismatches != NULL) { + actual_prot = BioseqLockById(SeqLocId(sfp->product)); + if (actual_prot != NULL) { + actual = (CharPtr) MemNew (sizeof (Char) * (actual_prot->length + 1)); + SeqPortStreamInt (actual_prot, 0, actual_prot->length - 1, Seq_strand_plus, EXPAND_GAPS_TO_DASHES, (Pointer) (actual), NULL); + actual_len = StringLen (actual); + } + } + + for (scp = constraint->actual_strings; scp != NULL && rval; scp = scp->next) { + rval = DoesStringMatchConstraint (actual, scp); + } + + if (rval) { + if (constraint->transl_strings != NULL + || constraint->internal_stops != Match_type_constraint_dont_care + || constraint->num_mismatches != NULL) { + trans_prot = ProteinFromCdRegionExEx (sfp, TRUE, FALSE, &alt_start, TRUE); /* include stop codons, do not remove trailing X/B/Z */ + if (trans_prot != NULL) { + translation = BSMerge (trans_prot, NULL); + translation_len = StringLen (translation); } - if (SetFieldValueForObjectEx (vnp->choice, vnp->data.ptrvalue, field_to, NULL, str2, action->existing_text, batch_extra)) { - if (also_change_mrna) { - AlsoChangeMrnaForObject (vnp->choice, vnp->data.ptrvalue); + BSFree (trans_prot); + } + for (scp = constraint->transl_strings; scp != NULL && rval; scp = scp->next) { + rval = DoesStringMatchConstraint (translation, scp); + } + + if (rval && constraint->internal_stops != Match_type_constraint_dont_care) { + stop = StringChr (translation, '*'); + if (stop != NULL && stop != translation + translation_len - 1) { + if (constraint->internal_stops == Match_type_constraint_no) { + rval = FALSE; + } + } else { + if (constraint->internal_stops == Match_type_constraint_yes) { + rval = FALSE; } - num_succeed++; } } - str1 = MemFree (str1); - str2 = MemFree (str2); } - field_from = FieldTypeFree (field_from); - field_to = FieldTypeFree (field_to); - return num_succeed; -} + if (rval && constraint->num_mismatches != NULL) { + stop = StringRChr (translation, '*'); + if (stop != NULL && stop == translation + translation_len - 1) { + translation_len--; + } + stop = StringRChr (actual, '*'); + if (stop != NULL && stop == actual + actual_len - 1) { + actual_len--; + } + if (translation_len > actual_len) { + num = translation_len - actual_len; + comp_len = actual_len; + } else { + num = actual_len - translation_len; + comp_len = translation_len; + } + + cp1 = actual; + cp2 = translation; + for (pos = 0; pos < comp_len && rval; pos++) { + if (*cp1 != *cp2) { + num++; + if (constraint->num_mismatches->choice == QuantityConstraint_equals + && num > constraint->num_mismatches->data.intvalue) { + rval = FALSE; + } else if (constraint->num_mismatches->choice == QuantityConstraint_less_than + && num >= constraint->num_mismatches->data.intvalue) { + rval = FALSE; + } + } + cp1++; + cp2++; + } + if (rval) { + if (constraint->num_mismatches->choice == QuantityConstraint_greater_than + && num <= constraint->num_mismatches->data.intvalue) { + rval = FALSE; + } else if (constraint->num_mismatches->choice == QuantityConstraint_equals + && num != constraint->num_mismatches->data.intvalue) { + rval = FALSE; + } else if (constraint->num_mismatches->choice == QuantityConstraint_less_than + && num >= constraint->num_mismatches->data.intvalue) { + rval = FALSE; + } + } + } -NLM_EXTERN Int4 DoParseActionToObjectList (AECRParseActionPtr action, ValNodePtr object_list, Boolean also_change_mrna, StringConstraintPtr scp) -{ - return DoParseActionToObjectListEx (action, object_list, also_change_mrna, scp, NULL); + if (actual_prot != NULL) { + BioseqUnlock(actual_prot); + } + actual = MemFree (actual); + translation = MemFree (translation); + return rval; } -static Int4 ApplyAECRActionToSeqEntry (AECRActionPtr act, SeqEntryPtr sep) +static Boolean DoesObjectMatchTranslationConstraint (Uint1 choice, Pointer data, TranslationConstraintPtr constraint) { - StringConstraintPtr scp; - ApplyActionPtr a; - ConvertActionPtr c; - RemoveActionPtr r; - EditActionPtr e; - ValNodePtr object_list = NULL; - Uint1 field_type; - Uint2 entityID; - Int4 num_succeed = 0; - FieldTypePtr field_from; - BatchExtraPtr batch_extra; - - if (act == NULL || act->action == NULL) return 0; - - batch_extra = BatchExtraNew (); - InitBatchExtraForAECRAction (batch_extra, act, sep); + Boolean rval = FALSE; + SeqFeatPtr sfp = NULL; + BioseqPtr bsp; + SeqMgrFeatContext context; - field_type = FieldTypeFromAECRAction (act); - if (field_type == FieldType_cds_gene_prot) { - entityID = ObjMgrGetEntityIDForChoice(sep); - object_list = BuildCGPSetList (entityID, act); - - } else { - object_list = GetObjectListForAECRActionEx (sep, act, batch_extra); + if (data == NULL) { + return FALSE; + } else if (constraint == NULL) { + return TRUE; } - - switch (act->action->choice) { - case ActionChoice_apply: - a = (ApplyActionPtr) act->action->data.ptrvalue; - scp = FindStringConstraintInConstraintSetForField (a->field, act->constraint); - num_succeed = DoApplyActionToObjectListEx (act->action->data.ptrvalue, object_list, act->also_change_mrna, scp, batch_extra); - if (a->field->choice == FieldType_misc) { - DeleteMarkedObjects (ObjMgrGetEntityIDForChoice(sep), 0, NULL); + switch (choice) { + case OBJ_SEQFEAT: + /* must be coding region or protein feature */ + sfp = (SeqFeatPtr) data; + if (sfp != NULL && sfp->data.choice == SEQFEAT_PROT) { + bsp = BioseqFindFromSeqLoc (sfp->location); + sfp = SeqMgrGetCDSgivenProduct (bsp, &context); } + rval = DoesCodingRegionMatchTranslationConstraint (sfp, constraint); break; - case ActionChoice_edit: - e = (EditActionPtr) act->action->data.ptrvalue; - num_succeed = DoEditActionToObjectListEx (e, object_list, act->also_change_mrna, batch_extra); - if (e->field->choice == FieldType_misc) { - DeleteMarkedObjects (ObjMgrGetEntityIDForChoice(sep), 0, NULL); + case OBJ_BIOSEQ: + /* must be protein sequence, or nucleotide bioseq with only one coding region */ + bsp = data; + if (bsp != NULL) { + if (ISA_aa (bsp->mol)) { + sfp = SeqMgrGetCDSgivenProduct (bsp, &context); + } else { + sfp = SeqMgrGetNextFeature (bsp, NULL, SEQFEAT_CDREGION, 0, &context); + if (SeqMgrGetNextFeature (bsp, sfp, SEQFEAT_CDREGION, 0, &context) != NULL) { + sfp = NULL; + } + } + rval = DoesCodingRegionMatchTranslationConstraint (sfp, constraint); } break; - case ActionChoice_convert: - scp = NULL; - if (act->constraint != NULL) { - c = (ConvertActionPtr) act->action->data.ptrvalue; - field_from = GetFromFieldFromFieldPair (c->fields); - scp = FindStringConstraintInConstraintSetForField (field_from, act->constraint); - field_from = FieldTypeFree (field_from); - } - num_succeed = DoConvertActionToObjectListEx (act->action->data.ptrvalue, object_list, act->also_change_mrna, scp, batch_extra); + } + return rval; +} + + +static Boolean DoesObjectMatchConstraint (Uint1 choice, Pointer data, ConstraintChoicePtr constraint) +{ + Boolean rval = TRUE; + + if (data == NULL) return FALSE; + if (constraint == NULL) return TRUE; + + switch (constraint->choice) { + case ConstraintChoice_string : + rval = DoesObjectMatchStringConstraint (choice, data, constraint->data.ptrvalue); break; - case ActionChoice_swap: - num_succeed = DoSwapActionToObjectListEx (act->action->data.ptrvalue, object_list, act->also_change_mrna, NULL, batch_extra); + case ConstraintChoice_location : + rval = DoesObjectMatchLocationConstraint (choice, data, constraint->data.ptrvalue); break; - case ActionChoice_copy: - num_succeed = DoCopyActionToObjectListEx (act->action->data.ptrvalue, object_list, act->also_change_mrna, NULL, batch_extra); + case ConstraintChoice_field : + rval = DoesObjectMatchFieldConstraint (choice, data, constraint->data.ptrvalue); break; - case ActionChoice_remove: - r = (RemoveActionPtr) act->action->data.ptrvalue; - scp = FindStringConstraintInConstraintSetForField (r->field, act->constraint); - num_succeed = DoRemoveActionToObjectList (act->action->data.ptrvalue, object_list, act->also_change_mrna, scp); - if (r->field->choice == FieldType_misc) { - DeleteMarkedObjects (ObjMgrGetEntityIDForChoice(sep), 0, NULL); + case ConstraintChoice_source : + rval = DoesBiosourceMatchConstraint (GetBioSourceFromObject (choice, data), constraint->data.ptrvalue); + break; + case ConstraintChoice_cdsgeneprot_qual : + if (choice == 0) { + rval = DoesCGPSetMatchQualConstraint (data, constraint->data.ptrvalue); + } else if (choice == OBJ_SEQDESC) { + rval = DoesSeqDescMatchCGPQualConstraint (data, constraint->data.ptrvalue); + } else if (choice == OBJ_SEQFEAT) { + rval = DoesFeatureMatchCGPQualConstraint (data, constraint->data.ptrvalue); + } else if (choice == OBJ_BIOSEQ) { + rval = DoesSequenceMatchCGPQualConstraint (data, constraint->data.ptrvalue); + } else { + rval = FALSE; } break; - case ActionChoice_parse: - num_succeed = DoParseActionToObjectListEx (act->action->data.ptrvalue, object_list, act->also_change_mrna, NULL, batch_extra); + case ConstraintChoice_cdsgeneprot_pseudo : + if (choice == 0) { + rval = DoesCGPSetMatchPseudoConstraint (data, constraint->data.ptrvalue); + } else if (choice == OBJ_SEQFEAT) { + rval = DoesFeatureMatchCGPPseudoConstraint (data, constraint->data.ptrvalue); + } + break; + case ConstraintChoice_sequence : + rval = DoesObjectMatchSequenceConstraint (choice, data, constraint->data.ptrvalue); + break; + case ConstraintChoice_pub: + rval = DoesObjectMatchPublicationConstraint (choice, data, constraint->data.ptrvalue); + break; + case ConstraintChoice_molinfo: + rval = DoesObjectMatchMolinfoFieldConstraint (choice, data, constraint->data.ptrvalue); + break; + case ConstraintChoice_field_missing: + rval = DoesObjectMatchFieldMissingConstraint (choice, data, constraint->data.ptrvalue); + break; + case ConstraintChoice_translation: + rval = DoesObjectMatchTranslationConstraint (choice, data, constraint->data.ptrvalue); break; } - object_list = FreeObjectList (object_list); - batch_extra = BatchExtraFree (batch_extra); - return num_succeed; + return rval; } -static AECRSamplePtr AECRSampleNew (void) +NLM_EXTERN Boolean DoesObjectMatchConstraintChoiceSet (Uint1 choice, Pointer data, ConstraintChoiceSetPtr csp) { - AECRSamplePtr sample; - - sample = (AECRSamplePtr) MemNew (sizeof (AECRSampleData)); - MemSet (sample, 0, sizeof (AECRSampleData)); - sample->all_same = TRUE; - return sample; -} + Boolean rval = TRUE; + if (data == NULL) return FALSE; -NLM_EXTERN AECRSamplePtr AECRSampleFree (AECRSamplePtr sample) -{ - if (sample != NULL) { - sample->field = FieldTypeFree (sample->field); - sample->first_value = MemFree (sample->first_value); - sample = MemFree (sample); + while (csp != NULL && rval) { + rval = DoesObjectMatchConstraint (choice, data, csp); + csp = csp->next; } - return sample; + return rval; } -NLM_EXTERN ValNodePtr AECRSampleListFree (ValNodePtr list) +NLM_EXTERN StringConstraintPtr FindStringConstraintInConstraintSetForField (FieldTypePtr field, ConstraintChoiceSetPtr csp) { - ValNodePtr list_next; + StringConstraintPtr scp = NULL; + SourceConstraintPtr source_constraint; + CDSGeneProtQualConstraintPtr cgp_constraint; + PublicationConstraintPtr pub_constraint; + FieldConstraintPtr field_constraint; + FieldType ft; - while (list != NULL) { - list_next = list->next; - list->next = NULL; - list->data.ptrvalue = AECRSampleFree (list->data.ptrvalue); - list = ValNodeFree (list); - list = list_next; + while (csp != NULL) { + switch (csp->choice) { + case ConstraintChoice_string : + scp = csp->data.ptrvalue; + break; + case ConstraintChoice_source : + source_constraint = (SourceConstraintPtr) csp->data.ptrvalue; + if (source_constraint != NULL && source_constraint->constraint != NULL) { + if (source_constraint->field1 != NULL) { + ft.choice = FieldType_source_qual; + ft.data.ptrvalue = source_constraint->field1; + ft.next = NULL; + if (DoFieldTypesMatch (field, &ft)) { + scp = source_constraint->constraint; + } + } + if (scp == NULL && source_constraint->field2 == NULL) { + ft.choice = FieldType_source_qual; + ft.data.ptrvalue = source_constraint->field2; + ft.next = NULL; + if (DoFieldTypesMatch (field, &ft)) { + scp = source_constraint->constraint; + } + } + } + break; + case ConstraintChoice_cdsgeneprot_qual : + cgp_constraint = (CDSGeneProtQualConstraintPtr) csp->data.ptrvalue; + if (field->choice == FieldType_cds_gene_prot + && cgp_constraint != NULL && cgp_constraint->constraint != NULL + && ((cgp_constraint->field1 != NULL && cgp_constraint->field1->data.intvalue == field->data.intvalue) + || (cgp_constraint->field2 != NULL && cgp_constraint->field2->data.intvalue == field->data.intvalue))) { + scp = cgp_constraint->constraint; + } + break; + case ConstraintChoice_pub : + pub_constraint = csp->data.ptrvalue; + if (pub_constraint != NULL && pub_constraint->field != NULL) { + if (field->data.intvalue == pub_constraint->field->field + && !IsStringConstraintEmpty (pub_constraint->field->constraint)) { + scp = pub_constraint->field->constraint; + } + } + break; + case ConstraintChoice_field : + field_constraint = csp->data.ptrvalue; + if (field_constraint != NULL + && field_constraint->field != NULL + && DoFieldTypesMatch (field, field_constraint->field)) { + scp = field_constraint->string_constraint; + } + break; + } + csp = csp->next; } - return list; + return scp; } -static void AddTextToAECRSample (AECRSamplePtr sample, CharPtr txt) +NLM_EXTERN StringConstraintPtr FindStringConstraintInConstraintSetForFieldPair (FieldPairTypePtr fieldpair, ConstraintChoiceSetPtr csp) { - if (StringHasNoText (txt)) { - txt = MemFree (txt); - } else if (sample != NULL) { - sample->num_found ++; - if (sample->first_value == NULL) { - sample->first_value = txt; - } else { - if (sample->all_same && StringCmp (sample->first_value, txt) != 0) { - sample->all_same = FALSE; - } - txt = MemFree (txt); - } - } -} + StringConstraintPtr scp; + FieldTypePtr f; + f = GetFromFieldFromFieldPair (fieldpair); + scp = FindStringConstraintInConstraintSetForField (f, csp); + f = FieldTypeFree (f); + return scp; +} + -NLM_EXTERN AECRSamplePtr GetAECRSampleFromObjectListEx (ValNodePtr object_list, FieldTypePtr field, BatchExtraPtr batch_extra) +NLM_EXTERN StringConstraintPtr StringConstraintFromFieldEdit (FieldEditPtr edit) { - AECRSamplePtr sample; - ValNodePtr vnp, prot_vnp, bsp_list; - CharPtr txt; - CGPSetPtr cgp; - SeqFeatPtr sfp; - BatchExtraPtr b = NULL; - SeqEntryPtr sep; + StringConstraintPtr scp; - if (object_list == NULL || field == NULL) return NULL; + if (edit == NULL || edit->find_txt == NULL) return NULL; + scp = StringConstraintNew (); + scp->match_text = StringSave (edit->find_txt); - if (batch_extra == NULL) { - b = BatchExtraNew (); - batch_extra = b; - bsp_list = BioseqListForObjectList (object_list); - for (vnp = bsp_list; vnp != NULL; vnp = vnp->next) { - sep = SeqMgrGetSeqEntryForData (vnp->data.ptrvalue); - InitBatchExtraForField (batch_extra, field, sep); - } - bsp_list = ValNodeFree (bsp_list); + switch (edit->location) { + case Field_edit_location_anywhere : + scp->match_location = String_location_contains; + break; + case Field_edit_location_beginning : + scp->match_location = String_location_starts; + break; + case Field_edit_location_end : + scp->match_location = String_location_ends; + break; } - sample = AECRSampleNew (); - sample->field = FieldTypeCopy (field); - for (vnp = object_list; vnp != NULL; vnp = vnp->next) { - if (vnp->choice == 0 && IsFieldTypeMatPeptideRelated (field)) { - cgp = (CGPSetPtr) vnp->data.ptrvalue; - if (cgp != NULL) { - for (prot_vnp = cgp->prot_list; prot_vnp != NULL; prot_vnp = prot_vnp->next) { - sfp = (SeqFeatPtr) prot_vnp->data.ptrvalue; - if (sfp != NULL && sfp->idx.subtype == FEATDEF_mat_peptide_aa) { - txt = GetFieldValueForObjectEx (OBJ_SEQFEAT, sfp, field, NULL, batch_extra); - AddTextToAECRSample (sample, txt); - } - } - } - } else { - txt = GetFieldValueForObjectEx (vnp->choice, vnp->data.ptrvalue, field, NULL, batch_extra); - AddTextToAECRSample (sample, txt); - } - } + scp->case_sensitive = !(edit->case_insensitive); + scp->whole_word = FALSE; + scp->not_present = FALSE; - b = BatchExtraFree (b); - return sample; + return scp; } -NLM_EXTERN AECRSamplePtr GetAECRSampleFromObjectList (ValNodePtr object_list, FieldTypePtr field) +static CharPtr ApplyEditToString (CharPtr str, FieldEditPtr edit) { - return GetAECRSampleFromObjectListEx (object_list, field, NULL); -} + CharPtr cp_found, new_str; + Int4 found_len, replace_len, new_len; + if (edit == NULL) return StringSave (str); -static void GetFieldsFromAECR (AECRActionPtr act, FieldTypePtr PNTR pField, ValNodePtr PNTR pFieldPair) -{ - ApplyActionPtr a; - EditActionPtr e; - ConvertActionPtr c; - SwapActionPtr s; - CopyActionPtr cp; - RemoveActionPtr r; - AECRParseActionPtr p; - - if (pField != NULL) { - *pField = NULL; + str = StringSave (str); + if (edit->case_insensitive) { + cp_found = StringISearch (str, edit->find_txt); + } else { + cp_found = StringSearch (str, edit->find_txt); } - if (pFieldPair != NULL) { - *pFieldPair = NULL; + + found_len = StringLen (edit->find_txt); + replace_len = StringLen (edit->repl_txt); + while (cp_found != NULL) + { + if (edit->location == Field_edit_location_beginning + && cp_found != str) { + cp_found = NULL; + } else if (edit->location == Field_edit_location_end + && cp_found != str + StringLen (str) - found_len) { + if (edit->case_insensitive) { + cp_found = StringISearch (cp_found + found_len, edit->find_txt); + } else { + cp_found = StringSearch (cp_found + found_len, edit->find_txt); + } + } else { + new_len = StringLen (str) + 1 - found_len + replace_len; + new_str = (CharPtr) MemNew (new_len * sizeof (Char)); + if (new_str != NULL) + { + if (cp_found != str) + { + StringNCpy (new_str, str, cp_found - str); + } + StringCat (new_str, edit->repl_txt); + StringCat (new_str, cp_found + found_len); + cp_found = new_str + (cp_found - str) + replace_len; + str = MemFree (str); + str = new_str; + } + if (edit->case_insensitive) { + cp_found = StringISearch (cp_found, edit->find_txt); + } else { + cp_found = StringSearch (cp_found, edit->find_txt); + } + } } - if (act == NULL || act->action == NULL || act->action->data.ptrvalue == NULL) { + return str; +} + + +static void RemoveFieldNameFromString (CharPtr field_name, CharPtr str) +{ + Uint4 field_name_len; + CharPtr src, dst; + + if (StringHasNoText (field_name) || StringHasNoText (str)) { return; } + field_name_len = StringLen (field_name); + + if (!StringHasNoText (field_name) && StringNICmp(str, field_name, field_name_len) == 0 + && StringLen (str) > field_name_len + && str[field_name_len] == ' ') + { + src = str + field_name_len + 1; + while (*src == ' ') + { + src++; + } + dst = str; + while (*src != 0) + { + *dst = *src; + dst++; + src++; + } + *dst = 0; + } +} - switch (act->action->choice) { - case ActionChoice_apply: - if (pField != NULL) { - a = (ApplyActionPtr) act->action->data.ptrvalue; - *pField = a->field; + +typedef struct objectcollection { + AECRActionPtr action; + ValNodePtr object_list; + ValNodePtr object_tail; + BatchExtraPtr batch_extra; +} ObjectCollectionData, PNTR ObjectCollectionPtr; + + +static void AECRActionObjectCollectionItemCallback (Uint1 objecttype, Pointer objectdata, ObjectCollectionPtr o) +{ + ApplyActionPtr a; + EditActionPtr e; + ConvertActionPtr v; + CopyActionPtr c; + SwapActionPtr s; + RemoveActionPtr r; + AECRParseActionPtr p; + CharPtr str, portion, field_name; + StringConstraintPtr scp; + FieldTypePtr field_from = NULL, field_to = NULL; + + if (objectdata == NULL || o == NULL) return; + + /* check to make sure object is appropriate for field and meets filter */ + switch (o->action->action->choice) { + case ActionChoice_apply : + a = (ApplyActionPtr) o->action->action->data.ptrvalue; + if (a != NULL + && IsObjectAppropriateForFieldValue (objecttype, objectdata, a->field) + && DoesObjectMatchConstraintChoiceSet (objecttype, objectdata, o->action->constraint)) { + ValNodeAddPointerEx (&(o->object_list), &(o->object_tail), objecttype, objectdata); } break; - case ActionChoice_edit: - if (pField != NULL) { - e = (EditActionPtr) act->action->data.ptrvalue; - *pField = e->field; + case ActionChoice_edit : + e = (EditActionPtr) o->action->action->data.ptrvalue; + if (e != NULL + && IsObjectAppropriateForFieldValue (objecttype, objectdata, e->field) + && DoesObjectMatchConstraintChoiceSet (objecttype, objectdata, o->action->constraint)) { + scp = StringConstraintFromFieldEdit (e->edit); + str = GetFieldValueForObjectEx (objecttype, objectdata, e->field, scp, o->batch_extra); + if (!StringHasNoText (str)) { + ValNodeAddPointerEx (&(o->object_list), &(o->object_tail), objecttype, objectdata); + } + str = MemFree (str); } break; - case ActionChoice_convert: - if (pFieldPair != NULL) { - c = (ConvertActionPtr) act->action->data.ptrvalue; - *pFieldPair = c->fields; + case ActionChoice_convert : + v = (ConvertActionPtr) o->action->action->data.ptrvalue; + if (v != NULL + && (field_from = GetFromFieldFromFieldPair(v->fields)) != NULL + && IsObjectAppropriateForFieldValue (objecttype, objectdata, field_from) + && DoesObjectMatchConstraintChoiceSet (objecttype, objectdata, o->action->constraint)) { + scp = FindStringConstraintInConstraintSetForField (field_from, o->action->constraint); + str = GetFieldValueForObjectEx (objecttype, objectdata, field_from, scp, o->batch_extra); + if (v->strip_name) { + field_to = GetToFieldFromFieldPair (v->fields); + field_name = SummarizeFieldType (field_to); + RemoveFieldNameFromString (field_name, str); + field_name = MemFree (field_name); + field_to = FieldTypeFree (field_to); + } + if (!StringHasNoText (str)) { + ValNodeAddPointerEx (&(o->object_list), &(o->object_tail), objecttype, objectdata); + } + str = MemFree (str); } + field_from = FieldTypeFree (field_from); break; - case ActionChoice_swap: - if (pFieldPair != NULL) { - s = (SwapActionPtr) act->action->data.ptrvalue; - *pFieldPair = s->fields; + case ActionChoice_copy : + c = (CopyActionPtr) o->action->action->data.ptrvalue; + if (c != NULL + && (field_from = GetFromFieldFromFieldPair(c->fields)) != NULL + && (field_to = GetFromFieldFromFieldPair(c->fields)) != NULL + && IsObjectAppropriateForFieldValue (objecttype, objectdata, field_from) + && IsObjectAppropriateForFieldValue (objecttype, objectdata, field_to) + && DoesObjectMatchConstraintChoiceSet (objecttype, objectdata, o->action->constraint)) { + ValNodeAddPointerEx (&(o->object_list), &(o->object_tail), objecttype, objectdata); } + field_from = FieldTypeFree (field_from); + field_to = FieldTypeFree (field_to); break; - case ActionChoice_copy: - if (pFieldPair != NULL) { - cp = (CopyActionPtr) act->action->data.ptrvalue; - *pFieldPair = cp->fields; + case ActionChoice_swap : + s = (SwapActionPtr) o->action->action->data.ptrvalue; + if (s != NULL + && (field_from = GetFromFieldFromFieldPair(s->fields)) != NULL + && (field_to = GetFromFieldFromFieldPair(s->fields)) != NULL + && IsObjectAppropriateForFieldValue (objecttype, objectdata, field_from) + && IsObjectAppropriateForFieldValue (objecttype, objectdata, field_to) + && DoesObjectMatchConstraintChoiceSet (objecttype, objectdata, o->action->constraint)) { + ValNodeAddPointerEx (&(o->object_list), &(o->object_tail), objecttype, objectdata); } + field_from = FieldTypeFree (field_from); + field_to = FieldTypeFree (field_to); break; - case ActionChoice_remove: - if (pField != NULL) { - r = (RemoveActionPtr) act->action->data.ptrvalue; - *pField = r->field; + case ActionChoice_remove : + r = (RemoveActionPtr) o->action->action->data.ptrvalue; + if (r != NULL + && IsObjectAppropriateForFieldValue (objecttype, objectdata, r->field) + && DoesObjectMatchConstraintChoiceSet (objecttype, objectdata, o->action->constraint)) { + ValNodeAddPointerEx (&(o->object_list), &(o->object_tail), objecttype, objectdata); } break; - case ActionChoice_parse: - if (pFieldPair != NULL) { - p = (AECRParseActionPtr) act->action->data.ptrvalue; - *pFieldPair = p->fields; + case ActionChoice_parse : + p = (AECRParseActionPtr) o->action->action->data.ptrvalue; + if (p != NULL + && (field_from = GetFromFieldFromFieldPair(p->fields)) != NULL + && (field_to = GetFromFieldFromFieldPair(p->fields)) != NULL + && IsObjectAppropriateForFieldValue (objecttype, objectdata, field_from) + && IsObjectAppropriateForFieldValue (objecttype, objectdata, field_to) + && DoesObjectMatchConstraintChoiceSet (objecttype, objectdata, o->action->constraint)) { + scp = FindStringConstraintInConstraintSetForField (field_from, o->action->constraint); + str = GetFieldValueForObjectEx (objecttype, objectdata, field_from, scp, o->batch_extra); + portion = GetTextPortionFromString (str, p->portion); + ApplyTextTransformsToString (&portion, p->transform); + if (!StringHasNoText (portion)) { + ValNodeAddPointerEx (&(o->object_list), &(o->object_tail), objecttype, objectdata); + } + portion = MemFree (portion); + str = MemFree (str); } + field_from = FieldTypeFree (field_from); + field_to = FieldTypeFree (field_to); break; } + } -NLM_EXTERN ValNodePtr LIBCALLBACK FieldTypeListFree (ValNodePtr list) +static void AECRActionObjectCollectionFeatureCallback (SeqFeatPtr sfp, Pointer data) { - ValNodePtr list_next; + ObjectCollectionPtr o; + if (sfp == NULL || data == NULL) return; + + o = (ObjectCollectionPtr) data; + AECRActionObjectCollectionItemCallback (OBJ_SEQFEAT, sfp, o); - while (list != NULL) { - list_next = list->next; - list->next = NULL; - list = FieldTypeFree (list); - list = list_next; - } - return list; } -NLM_EXTERN ValNodePtr LIBCALLBACK FieldTypeListCopy (ValNodePtr orig) +static void AECRActionObjectCollectionDescriptorCallback (SeqDescrPtr sdp, Pointer data) { - ValNodePtr prev = NULL, new_list = NULL, vnp; + ObjectCollectionPtr o; - while (orig != NULL) { - vnp = FieldTypeCopy (orig); - if (prev == NULL) { - new_list = vnp; - } else { - prev->next = vnp; - } - prev = vnp; - orig = orig->next; - } - return new_list; -} + if (sdp == NULL || data == NULL) return; + o = (ObjectCollectionPtr) data; + AECRActionObjectCollectionItemCallback (OBJ_SEQDESC, sdp, o); +} -static int LIBCALLBACK SortVnpByChoiceAndIntvalue (VoidPtr ptr1, VoidPtr ptr2) +static void AECRObjectCollectionBioseqCallback (BioseqPtr bsp, Pointer data) { - ValNodePtr vnp1; - ValNodePtr vnp2; - int rval = 0; - - if (ptr1 != NULL && ptr2 != NULL) { - vnp1 = *((ValNodePtr PNTR) ptr1); - vnp2 = *((ValNodePtr PNTR) ptr2); - if (vnp1 == NULL && vnp2 == NULL) { - rval = 0; - } else if (vnp1 == NULL) { - rval = -1; - } else if (vnp2 == NULL) { - rval = 1; - } else if (vnp1->choice > vnp2->choice) { - rval = 1; - } else if (vnp1->choice < vnp2->choice) { - rval = -1; - } else if (vnp1->data.intvalue > vnp2->data.intvalue) { - rval = 1; - } else if (vnp1->data.intvalue < vnp2->data.intvalue) { - rval = -1; - } else { - rval = 0; - } - } - return rval; -} + ObjectCollectionPtr o; + if (bsp == NULL || data == NULL) return; -/* Callback function used for sorting and uniqueing */ + o = (ObjectCollectionPtr) data; + AECRActionObjectCollectionItemCallback (OBJ_BIOSEQ, bsp, o); +} -static int LIBCALLBACK SortVnpByFieldType (VoidPtr ptr1, VoidPtr ptr2) +static ValNodePtr CollectMiscObjectsForApply (SeqEntryPtr sep, Int4 misc_type, ValNodePtr constraint) { - ValNodePtr vnp1; - ValNodePtr vnp2; - int rval = 0; + ValNodePtr target_list = NULL, bsp_list = NULL, tmp_list = NULL, tmp_tail = NULL, vnp; - if (ptr1 != NULL && ptr2 != NULL) { - vnp1 = *((ValNodePtr PNTR) ptr1); - vnp2 = *((ValNodePtr PNTR) ptr2); - rval = CompareFieldTypes (vnp1, vnp2); + if (sep == NULL) { + return NULL; + } + + /* VisitBioseqsInSep (sep, &bsp_list, CollectNucBioseqCallback); */ + bsp_list = CollectNucBioseqs (sep); + for (vnp = bsp_list; vnp != NULL; vnp = vnp->next) { + if (DoesObjectMatchConstraintChoiceSet (vnp->choice, vnp->data.ptrvalue, constraint)) { + ValNodeAddPointerEx (&tmp_list, &tmp_tail, vnp->choice, vnp->data.ptrvalue); + } } + bsp_list = ValNodeFree (bsp_list); - return rval; + if (misc_type == Misc_field_genome_project_id) { + target_list = tmp_list; + tmp_list = NULL; + } else if (misc_type == Misc_field_comment_descriptor) { + for (vnp = tmp_list; vnp != NULL; vnp = vnp->next) { + AddCommentDescriptorDestinationsForBioseq (vnp->data.ptrvalue, &target_list); + } + } else if (misc_type == Misc_field_defline) { + for (vnp = tmp_list; vnp != NULL; vnp = vnp->next) { + AddDeflineDescriptorDestinationsForBioseq (vnp->data.ptrvalue, &target_list); + } + tmp_list = ValNodeFree (tmp_list); + } else if (misc_type == Misc_field_keyword) { + for (vnp = tmp_list; vnp != NULL; vnp = vnp->next) { + AddGenbankBlockDescriptorDestinationsForBioseq (vnp->data.ptrvalue, &target_list); + } + tmp_list = ValNodeFree (tmp_list); + } + tmp_list = ValNodeFree (tmp_list); + return target_list; } -static void GetBioSourceFields (BioSourcePtr biop, Pointer userdata) +static void AddStructuredCommentDescriptorDestinationsForBioseq (BioseqPtr bsp, ValNodePtr PNTR dest_list, ValNodePtr PNTR dest_tail) { + SeqDescrPtr sdp; + SeqMgrDescContext context; + Boolean found = FALSE; + ObjValNodePtr ovp; + UserObjectPtr uop; - if (biop == NULL || userdata == NULL) { + if (bsp == NULL || dest_list == NULL || dest_tail == NULL) { return; } - ValNodeLink ((ValNodePtr PNTR) userdata, GetSourceQualFieldListFromBioSource (biop)); -} - - -NLM_EXTERN void SortUniqueFieldTypeList (ValNodePtr PNTR field_list) -{ - if (field_list == NULL) return; - *field_list = ValNodeSort (*field_list, SortVnpByFieldType); - ValNodeUnique (field_list, SortVnpByFieldType, FieldTypeListFree); + for (sdp = SeqMgrGetNextDescriptor (bsp, NULL, Seq_descr_user, &context); + sdp != NULL; + sdp = SeqMgrGetNextDescriptor (bsp, sdp, Seq_descr_user, &context)) { + if (IsUserObjectStructuredComment (sdp->data.ptrvalue)) { + ValNodeAddPointerEx (dest_list, dest_tail, OBJ_SEQDESC, sdp); + found = TRUE; + } + } + if (!found) { + /* if no existing structured comment descriptor, create one, marked for delete. + * unmark it for deletion when it gets populated. + */ + sdp = CreateNewDescriptorOnBioseq (bsp, Seq_descr_user); + uop = UserObjectNew (); + uop->type = ObjectIdNew (); + uop->type->str = StringSave ("StructuredComment"); + sdp->data.ptrvalue = uop; + ovp = (ObjValNodePtr) sdp; + ovp->idx.deleteme = TRUE; + ValNodeAddPointerEx (dest_list, dest_tail, OBJ_SEQDESC, sdp); + } } -NLM_EXTERN ValNodePtr GetSourceQualSampleFieldList (SeqEntryPtr sep) +static ValNodePtr CollectStructuredCommentsForApply (SeqEntryPtr sep, ValNodePtr constraint) { - ValNodePtr field_list = NULL; - ValNodePtr vnp_prev = NULL, vnp, sq; - Boolean done = FALSE; - - VisitBioSourcesInSep (sep, &field_list, GetBioSourceFields); - field_list = ValNodeSort (field_list, SortVnpByFieldType); - ValNodeUnique (&field_list, SortVnpByFieldType, FieldTypeListFree); + ValNodePtr target_list = NULL, target_tail = NULL, bsp_list = NULL, tmp_list = NULL, tmp_tail = NULL, vnp; - /* rearrange so that taxname is always first */ - for (vnp = field_list; vnp != NULL && !done; vnp = vnp->next) { - if (vnp->choice == FieldType_source_qual - && (sq = vnp->data.ptrvalue) != NULL - && sq->choice == SourceQualChoice_textqual - && sq->data.intvalue == Source_qual_taxname) { - if (vnp_prev != NULL) { - vnp_prev->next = vnp->next; - vnp->next = field_list; - field_list = vnp; - } - done = TRUE; - } else { - vnp_prev = vnp; + if (sep == NULL) { + return NULL; + } + + /* VisitBioseqsInSep (sep, &bsp_list, CollectNucBioseqCallback); */ + bsp_list = CollectNucBioseqs (sep); + for (vnp = bsp_list; vnp != NULL; vnp = vnp->next) { + if (DoesObjectMatchConstraintChoiceSet (vnp->choice, vnp->data.ptrvalue, constraint)) { + ValNodeAddPointerEx (&tmp_list, &tmp_tail, vnp->choice, vnp->data.ptrvalue); } } + bsp_list = ValNodeFree (bsp_list); - return field_list; + + for (vnp = tmp_list; vnp != NULL; vnp = vnp->next) { + AddStructuredCommentDescriptorDestinationsForBioseq (vnp->data.ptrvalue, &target_list, &target_tail); + } + tmp_list = ValNodeFree (tmp_list); + return target_list; } -NLM_EXTERN ValNodePtr GetSourceQualSampleFieldListForSeqEntryList (ValNodePtr list) +NLM_EXTERN ValNodePtr GetObjectListForAECRActionEx (SeqEntryPtr sep, AECRActionPtr action, BatchExtraPtr batch_extra) { - ValNodePtr field_list = NULL; - ValNodePtr vnp_prev = NULL, vnp, sq; - Boolean done = FALSE; + ObjectCollectionData ocd; + ApplyActionPtr apply; + Uint1 field_type; - if (list == NULL) { - return NULL; - } + if (action == NULL) return NULL; - for (vnp = list; vnp != NULL; vnp = vnp->next) { - VisitBioSourcesInSep (vnp->data.ptrvalue, &field_list, GetBioSourceFields); + ocd.action = action; + ocd.object_list = NULL; + ocd.object_tail = NULL; + if (batch_extra == NULL) { + ocd.batch_extra = BatchExtraNew (); + InitBatchExtraForAECRAction (ocd.batch_extra, action, sep); + } else { + ocd.batch_extra = batch_extra; } - field_list = ValNodeSort (field_list, SortVnpByFieldType); - ValNodeUnique (&field_list, SortVnpByFieldType, FieldTypeListFree); - /* rearrange so that taxname is always first */ - for (vnp = field_list; vnp != NULL && !done; vnp = vnp->next) { - if (vnp->choice == FieldType_source_qual - && (sq = vnp->data.ptrvalue) != NULL - && sq->choice == SourceQualChoice_textqual - && sq->data.intvalue == Source_qual_taxname) { - if (vnp_prev != NULL) { - vnp_prev->next = vnp->next; - vnp->next = field_list; - field_list = vnp; - } - done = TRUE; - } else { - vnp_prev = vnp; + field_type = FieldTypeFromAECRAction (action); + if (field_type == FieldType_molinfo_field) { + VisitBioseqsInSep (sep, &ocd, AECRObjectCollectionBioseqCallback); + } else if (field_type == FieldType_misc + && action->action != NULL + && action->action->choice == ActionChoice_apply + && (apply = action->action->data.ptrvalue) != NULL) { + ocd.object_list = CollectMiscObjectsForApply (sep, apply->field->data.intvalue, action->constraint); + } else if (field_type == FieldType_struc_comment_field) { + ocd.object_list = CollectStructuredCommentsForApply (sep, action->constraint); + } else { + VisitFeaturesInSep (sep, &ocd, AECRActionObjectCollectionFeatureCallback); + VisitDescriptorsInSep (sep, &ocd, AECRActionObjectCollectionDescriptorCallback); + if (field_type == FieldType_misc) { + VisitBioseqsInSep (sep, &ocd, AECRObjectCollectionBioseqCallback); } } - return field_list; + if (batch_extra != ocd.batch_extra) { + ocd.batch_extra = BatchExtraFree (ocd.batch_extra); + } + return ocd.object_list; } -static void GetFeatureQualFieldListForAECRSampleCallback (SeqFeatPtr sfp, Pointer data) +NLM_EXTERN ValNodePtr GetObjectListForAECRAction (SeqEntryPtr sep, AECRActionPtr action) { - ValNodePtr PNTR list; - - list = (ValNodePtr PNTR) data; - if (list == NULL || sfp == NULL) return; - - ValNodeLink (list, GetFieldListFromFeature (sfp)); + return GetObjectListForAECRActionEx (sep, action, NULL); } -static ValNodePtr GetFeatureQualFieldList (SeqEntryPtr sep) + +NLM_EXTERN ValNodePtr FreeObjectList (ValNodePtr vnp) { - ValNodePtr field_list = NULL; + ValNodePtr vnp_next; - VisitFeaturesInSep (sep, &field_list, GetFeatureQualFieldListForAECRSampleCallback); - field_list = ValNodeSort (field_list, SortVnpByFieldType); - ValNodeUnique (&field_list, SortVnpByFieldType, FieldTypeListFree); - return field_list; + while (vnp != NULL) { + vnp_next = vnp->next; + vnp->next = NULL; + if (vnp->choice == 0) { + vnp->data.ptrvalue = CGPSetFree (vnp->data.ptrvalue); + } + vnp = ValNodeFree (vnp); + vnp = vnp_next; + } + return vnp; } -static void GetRnaQualFieldListForAECRSampleCallback (SeqFeatPtr sfp, Pointer userdata) +typedef struct buildcgpset { - RnaFeatTypePtr type; - RnaRefPtr rrp; - RnaQualPtr rq; - RNAGenPtr rgp; - GeneRefPtr grp = NULL; - SeqFeatPtr gene = NULL; - SeqMgrFeatContext fcontext; + ValNodePtr cds_list; + ValNodePtr mrna_list; + ValNodePtr gene_list; +} BuildCGPSetData, PNTR BuildCGPSetPtr; - if (sfp == NULL || sfp->data.choice != SEQFEAT_RNA || sfp->data.value.ptrvalue == NULL || userdata == NULL) { - return; +static void BuildCGPSetCallback (SeqFeatPtr sfp, Pointer userdata) +{ + BuildCGPSetPtr b; + + if (sfp == NULL || sfp->idx.deleteme || userdata == NULL) return; + b = (BuildCGPSetPtr) userdata; + if (sfp->data.choice == SEQFEAT_CDREGION) + { + ValNodeAddPointer (&(b->cds_list), OBJ_SEQFEAT, sfp); + } + else if (sfp->data.choice == SEQFEAT_GENE) + { + ValNodeAddPointer (&(b->gene_list), OBJ_SEQFEAT, sfp); + } + else if (sfp->idx.subtype == FEATDEF_mRNA) + { + ValNodeAddPointer (&(b->mrna_list), OBJ_SEQFEAT, sfp); + } + else if (SeqMgrGetGeneXref (sfp) != NULL) + { + ValNodeAddPointer (&(b->gene_list), OBJ_SEQFEAT, sfp); } +} - rrp = (RnaRefPtr) sfp->data.value.ptrvalue; - type = RnaFeatTypeFromSeqFeat (sfp); +static CGPSetPtr BuildCGPSetFromCodingRegion (SeqFeatPtr cds, BoolPtr indexing_needed) +{ + SeqMgrFeatContext fcontext; + SeqFeatPtr gene = NULL, mrna, prot; + BioseqPtr protbsp; + CGPSetPtr cdsp; + ProtRefPtr prp; - if (type == NULL) return; + if (cds == NULL || cds->data.choice != SEQFEAT_CDREGION) return NULL; - /* add product if appropriate */ - if ((type->choice == RnaFeatType_preRNA || type->choice == RnaFeatType_mRNA - || type->choice == RnaFeatType_rRNA || type->choice == RnaFeatType_miscRNA) - && rrp->ext.choice == 1 - && !StringHasNoText (rrp->ext.value.ptrvalue)) { - rq = RnaQualNew (); - rq->type = AsnIoMemCopy (type, (AsnReadFunc) RnaFeatTypeAsnRead, (AsnWriteFunc) RnaFeatTypeAsnWrite); - rq->field = Rna_field_product; - ValNodeAddPointer ((ValNodePtr PNTR) userdata, FieldType_rna_field, rq); - } else if (rrp->ext.choice == 3 && (rgp = (RNAGenPtr) rrp->ext.value.ptrvalue) != NULL - && !StringHasNoText (rgp->product)) { - rq = RnaQualNew (); - rq->type = AsnIoMemCopy (type, (AsnReadFunc) RnaFeatTypeAsnRead, (AsnWriteFunc) RnaFeatTypeAsnWrite); - rq->field = Rna_field_product; - ValNodeAddPointer ((ValNodePtr PNTR) userdata, FieldType_rna_field, rq); + cdsp = (CGPSetPtr) MemNew (sizeof (CGPSetData)); + ValNodeAddPointer (&(cdsp->cds_list), 0, cds); + + gene = GetGeneForFeature (cds); + if (gene != NULL) + { + ValNodeAddPointer (&(cdsp->gene_list), 0, gene); + /* mark gene, so that we'll know it isn't lonely */ + gene->idx.deleteme = TRUE; } - /* add comment if present */ - if (!StringHasNoText (sfp->comment)) { - rq = RnaQualNew (); - rq->type = AsnIoMemCopy (type, (AsnReadFunc) RnaFeatTypeAsnRead, (AsnWriteFunc) RnaFeatTypeAsnWrite); - rq->field = Rna_field_comment; - ValNodeAddPointer ((ValNodePtr PNTR) userdata, FieldType_rna_field, rq); - } - - /* add tRNA specific if appropriate */ - if (type->choice == RnaFeatType_tRNA) { - /* codons recognized */ - rq = RnaQualNew (); - rq->type = AsnIoMemCopy (type, (AsnReadFunc) RnaFeatTypeAsnRead, (AsnWriteFunc) RnaFeatTypeAsnWrite); - rq->field = Rna_field_codons_recognized; - ValNodeAddPointer ((ValNodePtr PNTR) userdata, FieldType_rna_field, rq); - - /* anticodon */ - rq = RnaQualNew (); - rq->type = AsnIoMemCopy (type, (AsnReadFunc) RnaFeatTypeAsnRead, (AsnWriteFunc) RnaFeatTypeAsnWrite); - rq->field = Rna_field_anticodon; - ValNodeAddPointer ((ValNodePtr PNTR) userdata, FieldType_rna_field, rq); - } - - /* add ncRNA class if appropriate and present */ - if (type->choice == RnaFeatType_ncRNA - && rrp->ext.choice == 3 && (rgp = (RNAGenPtr) rrp->ext.value.ptrvalue) != NULL - && !StringHasNoText (rgp->_class)) { - rq = RnaQualNew (); - rq->type = AsnIoMemCopy (type, (AsnReadFunc) RnaFeatTypeAsnRead, (AsnWriteFunc) RnaFeatTypeAsnWrite); - rq->field = Rna_field_ncrna_class; - ValNodeAddPointer ((ValNodePtr PNTR) userdata, FieldType_rna_field, rq); - } - - /* add transcript ID if present */ - if (sfp->product != NULL) { - rq = RnaQualNew (); - rq->type = AsnIoMemCopy (type, (AsnReadFunc) RnaFeatTypeAsnRead, (AsnWriteFunc) RnaFeatTypeAsnWrite); - rq->field = Rna_field_transcript_id; - ValNodeAddPointer ((ValNodePtr PNTR) userdata, FieldType_rna_field, rq); + mrna = GetmRNAforCDS (cds); + if (mrna != NULL) + { + ValNodeAddPointer (&(cdsp->mrna_list), 0, mrna); + /* mark mrna, so that we'll know it's already in a set */ + mrna->idx.deleteme = TRUE; } - /* add gene fields */ - grp = SeqMgrGetGeneXref (sfp); - if (grp == NULL) { - gene = SeqMgrGetOverlappingGene (sfp->location, &fcontext); - if (gene != NULL) { - grp = gene->data.value.ptrvalue; - } - } - if (grp != NULL && !SeqMgrGeneIsSuppressed (grp)) { - /* gene locus */ - if (!StringHasNoText (grp->locus)) { - rq = RnaQualNew (); - rq->type = AsnIoMemCopy (type, (AsnReadFunc) RnaFeatTypeAsnRead, (AsnWriteFunc) RnaFeatTypeAsnWrite); - rq->field = Rna_field_gene_locus; - ValNodeAddPointer ((ValNodePtr PNTR) userdata, FieldType_rna_field, rq); - } - /* gene description */ - if (!StringHasNoText (grp->desc)) { - rq = RnaQualNew (); - rq->type = AsnIoMemCopy (type, (AsnReadFunc) RnaFeatTypeAsnRead, (AsnWriteFunc) RnaFeatTypeAsnWrite); - rq->field = Rna_field_gene_locus; - ValNodeAddPointer ((ValNodePtr PNTR) userdata, FieldType_rna_field, rq); - } - /* maploc */ - if (!StringHasNoText (grp->maploc)) { - rq = RnaQualNew (); - rq->type = AsnIoMemCopy (type, (AsnReadFunc) RnaFeatTypeAsnRead, (AsnWriteFunc) RnaFeatTypeAsnWrite); - rq->field = Rna_field_gene_maploc; - ValNodeAddPointer ((ValNodePtr PNTR) userdata, FieldType_rna_field, rq); - } - /* locus tag */ - if (!StringHasNoText (grp->locus_tag)) { - rq = RnaQualNew (); - rq->type = AsnIoMemCopy (type, (AsnReadFunc) RnaFeatTypeAsnRead, (AsnWriteFunc) RnaFeatTypeAsnWrite); - rq->field = Rna_field_gene_locus_tag; - ValNodeAddPointer ((ValNodePtr PNTR) userdata, FieldType_rna_field, rq); - } - /* synonym */ - if (grp->syn != NULL) { - rq = RnaQualNew (); - rq->type = AsnIoMemCopy (type, (AsnReadFunc) RnaFeatTypeAsnRead, (AsnWriteFunc) RnaFeatTypeAsnWrite); - rq->field = Rna_field_gene_synonym; - ValNodeAddPointer ((ValNodePtr PNTR) userdata, FieldType_rna_field, rq); + if (cds->product != NULL) + { + protbsp = BioseqFindFromSeqLoc (cds->product); + if (protbsp != NULL) + { + prot = SeqMgrGetNextFeature (protbsp, NULL, SEQFEAT_PROT, FEATDEF_PROT, &fcontext); + /* if there is no full-length protein feature, make one */ + if (prot == NULL) + { + prp = ProtRefNew (); + prot = CreateNewFeatureOnBioseq (protbsp, SEQFEAT_PROT, NULL); + if (prot != NULL) + { + prot->data.value.ptrvalue = prp; + if (indexing_needed != NULL) + { + *indexing_needed = TRUE; + } + } + } + if (prot != NULL) + { + ValNodeAddPointer (&(cdsp->prot_list), 0, prot); + } + + /* also add in mat_peptides from protein feature */ + prot = SeqMgrGetNextFeature (protbsp, NULL, SEQFEAT_PROT, FEATDEF_mat_peptide_aa, &fcontext); + while (prot != NULL) + { + ValNodeAddPointer (&(cdsp->prot_list), 0, prot); + prot = SeqMgrGetNextFeature (protbsp, prot, SEQFEAT_PROT, FEATDEF_mat_peptide_aa, &fcontext); + } } - } - - /* gene comment */ - if (gene != NULL && !StringHasNoText (gene->comment)) { - rq = RnaQualNew (); - rq->type = AsnIoMemCopy (type, (AsnReadFunc) RnaFeatTypeAsnRead, (AsnWriteFunc) RnaFeatTypeAsnWrite); - rq->field = Rna_field_gene_comment; - ValNodeAddPointer ((ValNodePtr PNTR) userdata, FieldType_rna_field, rq); - } + } + return cdsp; } -static ValNodePtr GetRnaQualFieldList (SeqEntryPtr sep) +static CGPSetPtr BuildCGPSetFrommRNA (SeqFeatPtr mrna) { - ValNodePtr field_list = NULL; - - VisitFeaturesInSep (sep, &field_list, GetRnaQualFieldListForAECRSampleCallback); - field_list = ValNodeSort (field_list, SortVnpByFieldType); - ValNodeUnique (&field_list, SortVnpByFieldType, FieldTypeListFree); - return field_list; -} - + SeqFeatPtr gene; + CGPSetPtr cdsp; -static void GetStructuredCommentFieldsCallback (SeqDescrPtr sdp, Pointer data) -{ - UserObjectPtr uop; - UserFieldPtr ufp; - ValNodePtr vnp; + if (mrna == NULL || mrna->idx.deleteme || mrna->idx.subtype != FEATDEF_mRNA) return NULL; - if (sdp != NULL && data != NULL && sdp->choice == Seq_descr_user - && (uop = sdp->data.ptrvalue) != NULL - && IsUserObjectStructuredComment (uop)) { + cdsp = (CGPSetPtr) MemNew (sizeof (CGPSetData)); + ValNodeAddPointer (&(cdsp->mrna_list), 0, mrna); - ufp = uop->data; - while (ufp != NULL) { - if (ufp->label != NULL && ufp->label->str != NULL - && StringCmp (ufp->label->str, "StructuredCommentPrefix") != 0 - && StringCmp (ufp->label->str, "StructuredCommentSuffix") != 0) { - vnp = ValNodeNew (NULL); - vnp->choice = StructuredCommentField_named; - vnp->data.ptrvalue = StringSave (ufp->label->str); - ValNodeAddPointer ((ValNodePtr PNTR) data, FieldType_struc_comment_field, vnp); - } - ufp = ufp->next; - } + gene = GetGeneForFeature (mrna); + if (gene != NULL) + { + ValNodeAddPointer (&(cdsp->gene_list), 0, gene); + /* mark gene, so that we'll know it isn't lonely */ + gene->idx.deleteme = TRUE; } -} - - -static ValNodePtr GetStructuredCommentFieldList (SeqEntryPtr sep) -{ - ValNodePtr field_list = NULL; - ValNodePtr dbname, field_name; - - dbname = ValNodeNew (NULL); - dbname->choice = StructuredCommentField_database; - ValNodeAddPointer (&field_list, FieldType_struc_comment_field, dbname); - - field_name = ValNodeNew (NULL); - field_name->choice = StructuredCommentField_field_name; - ValNodeAddPointer (&field_list, FieldType_struc_comment_field, field_name); - - VisitDescriptorsInSep (sep, &field_list, GetStructuredCommentFieldsCallback); - field_list = ValNodeSort (field_list, SortVnpByFieldType); - ValNodeUnique (&field_list, SortVnpByFieldType, FieldTypeListFree); - return field_list; + return cdsp; } -static void CollectBioSourceDescCallback (SeqDescrPtr sdp, Pointer data) +static CGPSetPtr BuildCGPSetFromGene (SeqFeatPtr gene) { - if (sdp != NULL && sdp->choice == Seq_descr_source && data != NULL) { - ValNodeAddPointer ((ValNodePtr PNTR) data, OBJ_SEQDESC, sdp); - } -} + CGPSetPtr cdsp; -static void CollectBioSourceFeatCallback (SeqFeatPtr sfp, Pointer data) -{ - if (sfp != NULL && sfp->data.choice == SEQFEAT_BIOSRC) { - ValNodeAddPointer ((ValNodePtr PNTR) data, OBJ_SEQFEAT, sfp); + if (gene == NULL || gene->idx.deleteme || gene->idx.subtype != FEATDEF_GENE) { + return NULL; } -} - -static void CollectFeaturesCallback (SeqFeatPtr sfp, Pointer data) -{ - if (sfp != NULL && data != NULL && sfp->data.choice != SEQFEAT_BIOSRC && sfp->data.choice != SEQFEAT_PUB) { - ValNodeAddPointer ((ValNodePtr PNTR) data, OBJ_SEQFEAT, sfp); - } + cdsp = CGPSetNew (); + ValNodeAddPointer (&(cdsp->gene_list), 0, gene); + return cdsp; } -static void CollectPubDescCallback (SeqDescrPtr sdp, Pointer data) +static void +AdjustCGPObjectListForMatPeptides +(ValNodePtr PNTR cgp_list, + FieldTypePtr field1, + FieldTypePtr field2, + ConstraintChoiceSetPtr constraints) { - if (sdp != NULL && sdp->choice == Seq_descr_pub && data != NULL) { - ValNodeAddPointer ((ValNodePtr PNTR) data, OBJ_SEQDESC, sdp); - } -} + ConstraintChoiceSetPtr mat_peptide_constraints = NULL; + ValNodePtr vnp, vnp_prev, vnp_next; + ValNodePtr m_vnp, m_vnp_prev, m_vnp_next, mat_peptide_list; + CGPSetPtr cdsp; + SeqFeatPtr sfp; -static void CollectPubFeatCallback (SeqFeatPtr sfp, Pointer data) -{ - if (sfp != NULL && sfp->data.choice == SEQFEAT_PUB) { - ValNodeAddPointer ((ValNodePtr PNTR) data, OBJ_SEQFEAT, sfp); + if (cgp_list == NULL + || *cgp_list == NULL + || constraints == NULL + || (field1 == NULL && field2 == NULL) /* no fields specified */ + || (!IsFieldTypeMatPeptideRelated (field1) && !IsFieldTypeMatPeptideRelated(field2))) { + return; } -} - -static void CollectBioseqCallback (BioseqPtr bsp, Pointer data) -{ - if (bsp != NULL && data != NULL) { - ValNodeAddPointer ((ValNodePtr PNTR) data, OBJ_BIOSEQ, bsp); + /* get list of constraints that apply to mat-peptide features */ + while (constraints != NULL) { + if (IsConstraintChoiceMatPeptideRelated (constraints)) { + ValNodeLink (&mat_peptide_constraints, AsnIoMemCopy (constraints, (AsnReadFunc) ConstraintChoiceAsnRead, (AsnWriteFunc) ConstraintChoiceAsnWrite)); + } + constraints = constraints->next; + } + if (mat_peptide_constraints == NULL) { + return; } -} - -static void CollectNucBioseqCallback (BioseqPtr bsp, Pointer data) -{ - if (bsp != NULL && data != NULL && !ISA_aa (bsp->mol)) { - ValNodeAddPointer ((ValNodePtr PNTR) data, OBJ_BIOSEQ, bsp); + /* if both fields are mat-peptide related, or one is mat-peptide related and the other is NULL, + * convert sets to lists of mat-peptide features + * otherwise just remove mat-peptide features from the prot-list that do not match the constraints. + */ + if ((field1 != NULL && !IsFieldTypeMatPeptideRelated (field1)) + || (field2 != NULL && !IsFieldTypeMatPeptideRelated (field2))) { + for (vnp = *cgp_list; vnp != NULL; vnp = vnp->next) { + if (vnp->choice == 0) { + cdsp = (CGPSetPtr) vnp->data.ptrvalue; + m_vnp_prev = NULL; + for (m_vnp = cdsp->prot_list; m_vnp != NULL; m_vnp = m_vnp_next) { + m_vnp_next = m_vnp->next; + sfp = m_vnp->data.ptrvalue; + if (sfp == NULL + || (sfp->idx.subtype == FEATDEF_mat_peptide_aa + && !DoesObjectMatchConstraintChoiceSet (OBJ_SEQFEAT, sfp, mat_peptide_constraints))) { + if (m_vnp_prev == NULL) { + cdsp->prot_list = m_vnp->next; + } else { + m_vnp_prev->next = m_vnp->next; + } + m_vnp->next = NULL; + m_vnp = ValNodeFree (m_vnp); + } else { + m_vnp_prev = m_vnp; + } + } + } + } + } else { + vnp_prev = NULL; + for (vnp = *cgp_list; vnp != NULL; vnp = vnp_next) { + vnp_next = vnp->next; + if (vnp->choice == 0) { + mat_peptide_list = NULL; + cdsp = (CGPSetPtr) vnp->data.ptrvalue; + for (m_vnp = cdsp->prot_list; m_vnp != NULL; m_vnp = m_vnp->next) { + sfp = m_vnp->data.ptrvalue; + if (sfp->idx.subtype == FEATDEF_mat_peptide_aa + && DoesObjectMatchConstraintChoiceSet (OBJ_SEQFEAT, sfp, mat_peptide_constraints)) { + ValNodeAddPointer (&mat_peptide_list, OBJ_SEQFEAT, sfp); + } + } + if (mat_peptide_list == NULL) { + if (vnp_prev == NULL) { + *cgp_list = vnp->next; + } else { + vnp_prev->next = vnp->next; + } + vnp->next = NULL; + vnp = FreeObjectList (vnp); + } else { + m_vnp = mat_peptide_list; + while (m_vnp->next != NULL) { + m_vnp = m_vnp->next; + } + if (vnp_prev == NULL) { + *cgp_list = mat_peptide_list; + } else { + vnp_prev->next = mat_peptide_list; + } + m_vnp->next = vnp_next; + vnp_prev = m_vnp; + vnp->next = NULL; + vnp = FreeObjectList (vnp); + } + } else { + vnp_prev = vnp; + } + } } + mat_peptide_constraints = ConstraintChoiceSetFree (mat_peptide_constraints); } -static void AddCommentDescriptorDestinationsForBioseq (BioseqPtr bsp, ValNodePtr PNTR dest_list) +static ValNodePtr BuildCGPSetList (Uint2 entityID, AECRActionPtr act, BoolPtr created_protein_features) { - SeqDescrPtr sdp; - SeqMgrDescContext context; - Boolean found = FALSE; - ObjValNodePtr ovp; + SeqEntryPtr sep; + BuildCGPSetData b; + CGPSetPtr cdsp; + ValNodePtr vnp, vnp_next, vnp_prev; + ValNodePtr cdset_list = NULL; + SeqFeatPtr cds, gene, mrna; + Boolean need_indexing = FALSE; + ApplyActionPtr a; + EditActionPtr e; + ConvertActionPtr c; + CopyActionPtr cp; + SwapActionPtr s; + AECRParseActionPtr pa; + RemoveActionPtr r; + FieldTypePtr field_from, field_to; + + sep = GetTopSeqEntryForEntityID (entityID); - if (bsp == NULL || dest_list == NULL) { - return; + b.cds_list = NULL; + b.gene_list = NULL; + b.mrna_list = NULL; + + if (created_protein_features != NULL) { + *created_protein_features = FALSE; } + + VisitFeaturesInSep (sep, &b, BuildCGPSetCallback); - for (sdp = SeqMgrGetNextDescriptor (bsp, NULL, Seq_descr_comment, &context); - sdp != NULL; - sdp = SeqMgrGetNextDescriptor (bsp, sdp, Seq_descr_comment, &context)) { - ValNodeAddPointer (dest_list, OBJ_SEQDESC, sdp); - found = TRUE; + /* build cdsets that have coding regions */ + for (vnp = b.cds_list; vnp != NULL; vnp = vnp->next) + { + cds = (SeqFeatPtr) vnp->data.ptrvalue; + if (cds == NULL) continue; + cdsp = BuildCGPSetFromCodingRegion (cds, &need_indexing); + if (cdsp != NULL) + { + ValNodeAddPointer (&cdset_list, 0, cdsp); + } } - if (!found) { - /* if no existing comment descriptor, create one, marked for delete. - * unmark it for deletion when it gets populated. - */ - sdp = CreateNewDescriptorOnBioseq (bsp, Seq_descr_comment); - sdp->data.ptrvalue = StringSave (""); - ovp = (ObjValNodePtr) sdp; - ovp->idx.deleteme = TRUE; - ValNodeAddPointer (dest_list, OBJ_SEQDESC, sdp); + if (need_indexing) + { + /* indexing because we have created full-length protein features */ + SeqMgrIndexFeatures (entityID, NULL); + if (created_protein_features != NULL) { + *created_protein_features = TRUE; + } } -} - - -static ValNodePtr CollectCommentDescriptors (SeqEntryPtr sep) -{ - ValNodePtr seq_list = NULL, vnp, desc_list = NULL; - if (sep == NULL) { - return NULL; + /* build cdsets for mrna features that don't have coding regions */ + for (vnp = b.mrna_list; vnp != NULL; vnp = vnp->next) + { + mrna = (SeqFeatPtr) vnp->data.ptrvalue; + if (mrna == NULL || mrna->idx.deleteme) continue; + cdsp = BuildCGPSetFrommRNA (mrna); + if (cdsp != NULL) + { + ValNodeAddPointer (&cdset_list, 0, cdsp); + } } - VisitBioseqsInSep (sep, &seq_list, CollectNucBioseqCallback); - - for (vnp = seq_list; vnp != NULL; vnp = vnp->next) { - AddCommentDescriptorDestinationsForBioseq (vnp->data.ptrvalue, &desc_list); + /* build cdsets for lonely genes / features with gene xrefs that are not coding regions or mrnas */ + for (vnp = b.gene_list; vnp != NULL; vnp = vnp->next) + { + gene = (SeqFeatPtr) vnp->data.ptrvalue; + if (gene == NULL || gene->idx.deleteme) continue; + cdsp = BuildCGPSetFromGene (gene); + if (cdsp != NULL) { + ValNodeAddPointer (&cdset_list, 0, cdsp); + } } - seq_list = ValNodeFree (seq_list); - return desc_list; -} + /* now unmark features */ + UnmarkFeatureList (b.cds_list); + UnmarkFeatureList (b.mrna_list); + UnmarkFeatureList (b.gene_list); -static void CollectStructuredCommentsCallback (SeqDescrPtr sdp, Pointer data) -{ - UserObjectPtr uop; + b.cds_list = ValNodeFree (b.cds_list); + b.mrna_list = ValNodeFree (b.mrna_list); + b.gene_list = ValNodeFree (b.gene_list); - if (sdp != NULL && data != NULL && sdp->choice == Seq_descr_user - && (uop = sdp->data.ptrvalue) != NULL - && IsUserObjectStructuredComment (uop)) { - ValNodeAddPointer ((ValNodePtr PNTR) data, OBJ_SEQDESC, sdp); + /* now remove sets that don't match our choice constraint */ + if (act != NULL && act->constraint != NULL) { + vnp_prev = NULL; + for (vnp = cdset_list; vnp != NULL; vnp = vnp_next) + { + vnp_next = vnp->next; + if (!DoesObjectMatchConstraintChoiceSet (0, vnp->data.ptrvalue, act->constraint)) + { + if (vnp_prev == NULL) + { + cdset_list = vnp->next; + } + else + { + vnp_prev->next = vnp->next; + } + vnp->next = NULL; + FreeCGPSetList (vnp); + } + else + { + vnp_prev = vnp; + } + } } + + /* adjust if action fields are mat-peptide specific */ + if (act != NULL && act->action != NULL && act->action->data.ptrvalue != NULL) { + switch (act->action->choice) { + case ActionChoice_apply: + a = (ApplyActionPtr) act->action->data.ptrvalue; + AdjustCGPObjectListForMatPeptides (&cdset_list, a->field, NULL, act->constraint); + break; + case ActionChoice_edit: + e = (EditActionPtr) act->action->data.ptrvalue; + AdjustCGPObjectListForMatPeptides (&cdset_list, e->field, NULL, act->constraint); + break; + case ActionChoice_convert: + c = (ConvertActionPtr) act->action->data.ptrvalue; + field_from = GetFromFieldFromFieldPair (c->fields); + field_to = GetToFieldFromFieldPair (c->fields); + AdjustCGPObjectListForMatPeptides (&cdset_list, field_from, field_to, act->constraint); + field_from = FieldTypeFree (field_from); + field_to = FieldTypeFree (field_to); + break; + case ActionChoice_copy: + cp = (CopyActionPtr) act->action->data.ptrvalue; + field_from = GetFromFieldFromFieldPair (cp->fields); + field_to = GetToFieldFromFieldPair (cp->fields); + AdjustCGPObjectListForMatPeptides (&cdset_list, field_from, field_to, act->constraint); + field_from = FieldTypeFree (field_from); + field_to = FieldTypeFree (field_to); + break; + case ActionChoice_swap: + s = (SwapActionPtr) act->action->data.ptrvalue; + field_from = GetFromFieldFromFieldPair (s->fields); + field_to = GetToFieldFromFieldPair (s->fields); + AdjustCGPObjectListForMatPeptides (&cdset_list, field_from, field_to, act->constraint); + field_from = FieldTypeFree (field_from); + field_to = FieldTypeFree (field_to); + break; + case ActionChoice_remove: + r = (RemoveActionPtr) act->action->data.ptrvalue; + AdjustCGPObjectListForMatPeptides (&cdset_list, r->field, NULL, act->constraint); + break; + case ActionChoice_parse: + pa = (AECRParseActionPtr) act->action->data.ptrvalue; + field_from = GetFromFieldFromFieldPair (pa->fields); + field_to = GetToFieldFromFieldPair (pa->fields); + AdjustCGPObjectListForMatPeptides (&cdset_list, field_from, field_to, act->constraint); + field_from = FieldTypeFree (field_from); + field_to = FieldTypeFree (field_to); + break; + } + } + return cdset_list; } -static void AddDeflineDescriptorDestinationsForBioseq (BioseqPtr bsp, ValNodePtr PNTR dest_list) +static void AlsoChangeMrnaForObject (Uint1 choice, Pointer data) { - SeqDescrPtr sdp; - SeqMgrDescContext context; - Boolean found = FALSE; - ObjValNodePtr ovp; - - if (bsp == NULL || dest_list == NULL) { - return; - } + CharPtr str; + SeqFeatPtr sfp, mrna; + FeatureField f; - for (sdp = SeqMgrGetNextDescriptor (bsp, NULL, Seq_descr_title, &context); - sdp != NULL; - sdp = SeqMgrGetNextDescriptor (bsp, sdp, Seq_descr_title, &context)) { - ValNodeAddPointer (dest_list, OBJ_SEQDESC, sdp); - found = TRUE; - } - if (!found) { - /* if no existing comment descriptor, create one, marked for delete. - * unmark it for deletion when it gets populated. - */ - sdp = CreateNewDescriptorOnBioseq (bsp, Seq_descr_title); - sdp->data.ptrvalue = StringSave (""); - ovp = (ObjValNodePtr) sdp; - ovp->idx.deleteme = TRUE; - ValNodeAddPointer (dest_list, OBJ_SEQDESC, sdp); + if (choice == 0) { + str = GetFieldValueFromCGPSet (data, CDSGeneProt_field_prot_name, NULL); + SetFieldValueInCGPSet (data, CDSGeneProt_field_mrna_product, NULL, str, ExistingTextOption_replace_old); + str = MemFree (str); + } else if (choice == OBJ_SEQFEAT) { + sfp = (SeqFeatPtr) data; + if (sfp != NULL && sfp->data.choice == SEQFEAT_CDREGION) { + mrna = GetmRNAforCDS (sfp); + if (mrna != NULL) { + f.type = Macro_feature_type_cds; + f.field = ValNodeNew(NULL); + f.field->next = NULL; + f.field->choice = FeatQualChoice_legal_qual; + f.field->data.intvalue = Feat_qual_legal_product; + str = GetQualFromFeature (sfp, &f, NULL); + f.type = Macro_feature_type_mRNA; + SetQualOnFeature (mrna, &f, NULL, str, ExistingTextOption_replace_old); + str = MemFree (str); + f.field = ValNodeFree (f.field); + } + } } } -static ValNodePtr CollectDeflineDescriptors (SeqEntryPtr sep) +NLM_EXTERN Int4 DoApplyActionToObjectListEx (ApplyActionPtr action, ValNodePtr object_list, Boolean also_change_mrna, StringConstraintPtr scp, BatchExtraPtr batch_extra) { - ValNodePtr seq_list = NULL, vnp, desc_list = NULL; - - if (sep == NULL) { - return NULL; - } + ValNodePtr vnp; + Int4 num_succeed = 0, num_fail = 0; + CharPtr old_str, new_str; - VisitBioseqsInSep (sep, &seq_list, CollectNucBioseqCallback); + if (action == NULL || object_list == NULL) return 0; - for (vnp = seq_list; vnp != NULL; vnp = vnp->next) { - AddDeflineDescriptorDestinationsForBioseq (vnp->data.ptrvalue, &desc_list); + for (vnp = object_list; vnp != NULL; vnp = vnp->next) { + old_str = GetFieldValueForObjectEx (vnp->choice, vnp->data.ptrvalue, action->field, scp, batch_extra); + if (SetFieldValueForObjectEx (vnp->choice, vnp->data.ptrvalue, action->field, scp, action->value, action->existing_text, batch_extra)) { + new_str = GetFieldValueForObjectEx (vnp->choice, vnp->data.ptrvalue, action->field, scp, batch_extra); + if (StringCmp (old_str, new_str) != 0) { + if (also_change_mrna) { + AlsoChangeMrnaForObject (vnp->choice, vnp->data.ptrvalue); + } + num_succeed ++; + } + new_str = MemFree (new_str); + } else { + num_fail++; + } + old_str = MemFree (old_str); } - seq_list = ValNodeFree (seq_list); - return desc_list; + + return num_succeed; } -static void AddGenbankBlockDescriptorDestinationsForBioseq (BioseqPtr bsp, ValNodePtr PNTR dest_list) +NLM_EXTERN Int4 DoApplyActionToObjectList (ApplyActionPtr action, ValNodePtr object_list, Boolean also_change_mrna, StringConstraintPtr scp) { - SeqDescrPtr sdp; - SeqMgrDescContext context; - Boolean found = FALSE; - ObjValNodePtr ovp; - - if (bsp == NULL || dest_list == NULL) { - return; - } - - for (sdp = SeqMgrGetNextDescriptor (bsp, NULL, Seq_descr_genbank, &context); - sdp != NULL; - sdp = SeqMgrGetNextDescriptor (bsp, sdp, Seq_descr_genbank, &context)) { - ValNodeAddPointer (dest_list, OBJ_SEQDESC, sdp); - found = TRUE; - } - if (!found) { - /* if no existing comment descriptor, create one, marked for delete. - * unmark it for deletion when it gets populated. - */ - sdp = CreateNewDescriptorOnBioseq (bsp, Seq_descr_genbank); - sdp->data.ptrvalue = GBBlockNew (); - ovp = (ObjValNodePtr) sdp; - ovp->idx.deleteme = TRUE; - ValNodeAddPointer (dest_list, OBJ_SEQDESC, sdp); - } + return DoApplyActionToObjectListEx (action, object_list, also_change_mrna, scp, NULL); } -static ValNodePtr CollectGenbankBlockDescriptors (SeqEntryPtr sep) +NLM_EXTERN Int4 DoEditActionToObjectListEx (EditActionPtr action, ValNodePtr object_list, Boolean also_change_mrna, BatchExtraPtr batch_extra) { - ValNodePtr seq_list = NULL, vnp, desc_list = NULL; + ValNodePtr vnp; + Int4 num_succeed = 0, num_fail = 0; + StringConstraintPtr scp; + CharPtr str, new_str; - if (sep == NULL) { - return NULL; - } + if (action == NULL || object_list == NULL) return 0; - VisitBioseqsInSep (sep, &seq_list, CollectNucBioseqCallback); + scp = StringConstraintFromFieldEdit (action->edit); - for (vnp = seq_list; vnp != NULL; vnp = vnp->next) { - AddGenbankBlockDescriptorDestinationsForBioseq (vnp->data.ptrvalue, &desc_list); + for (vnp = object_list; vnp != NULL; vnp = vnp->next) { + str = GetFieldValueForObjectEx (vnp->choice, vnp->data.ptrvalue, action->field, scp, batch_extra); + new_str = ApplyEditToString (str, action->edit); + if (StringCmp (str, new_str) != 0 + && SetFieldValueForObjectEx (vnp->choice, vnp->data.ptrvalue, action->field, scp, new_str, ExistingTextOption_replace_old, batch_extra)) { + if (also_change_mrna) { + AlsoChangeMrnaForObject (vnp->choice, vnp->data.ptrvalue); + } + num_succeed ++; + } else { + num_fail++; + } + new_str = MemFree (new_str); + str = MemFree (str); } - seq_list = ValNodeFree (seq_list); - return desc_list; + return num_succeed; } - -NLM_EXTERN ValNodePtr GetObjectListForFieldType (Uint1 field_type, SeqEntryPtr sep) +NLM_EXTERN Int4 DoEditActionToObjectList (EditActionPtr action, ValNodePtr object_list, Boolean also_change_mrna) { - ValNodePtr object_list = NULL; - Uint2 entityID; - - switch (field_type) { - case FieldType_source_qual: - VisitDescriptorsInSep (sep, &object_list, CollectBioSourceDescCallback); - VisitFeaturesInSep (sep, &object_list, CollectBioSourceFeatCallback); - break; - case FieldType_cds_gene_prot: - entityID = ObjMgrGetEntityIDForChoice(sep); - object_list = BuildCGPSetList (entityID, NULL); - break; - case FieldType_feature_field: - VisitFeaturesInSep (sep, &object_list, CollectFeaturesCallback); - break; - case FieldType_molinfo_field: - VisitBioseqsInSep (sep, &object_list, CollectBioseqCallback); - break; - case FieldType_pub: - VisitDescriptorsInSep (sep, &object_list, CollectPubDescCallback); - VisitFeaturesInSep (sep, &object_list, CollectPubFeatCallback); - break; - case FieldType_rna_field: - VisitFeaturesInSep (sep, &object_list, CollectFeaturesCallback); - break; - case FieldType_struc_comment_field: - VisitDescriptorsInSep (sep, &object_list, CollectStructuredCommentsCallback); - break; - case FieldType_misc: - VisitBioseqsInSep (sep, &object_list, CollectNucBioseqCallback); - ValNodeLink (&object_list, CollectCommentDescriptors (sep)); - break; - } - return object_list; + return DoEditActionToObjectListEx (action, object_list, also_change_mrna, NULL); } - -NLM_EXTERN ValNodePtr GetFieldListForFieldType (Uint1 field_type, SeqEntryPtr sep) -{ - ValNodePtr fields = NULL; - - /* get a list of the fields that are appropriate for the objects collected */ - switch (field_type) { - case FieldType_cds_gene_prot: - fields = MakeCDSGeneProtFieldTypeList (); - break; - case FieldType_source_qual: - fields = GetSourceQualSampleFieldList (sep); - break; - case FieldType_feature_field: - fields = GetFeatureQualFieldList (sep); - break; - case FieldType_molinfo_field: - fields = MakeSequenceQualFieldTypeList (); - break; - case FieldType_pub: - fields = MakePubFieldTypeList (); - break; - case FieldType_rna_field: - fields = GetRnaQualFieldList (sep); - break; - case FieldType_struc_comment_field: - fields = GetStructuredCommentFieldList (sep); - break; - case FieldType_misc: - ValNodeAddInt (&fields, FieldType_misc, Misc_field_genome_project_id); - ValNodeAddInt (&fields, FieldType_misc, Misc_field_comment_descriptor); - ValNodeAddInt (&fields, FieldType_misc, Misc_field_defline); - ValNodeAddInt (&fields, FieldType_misc, Misc_field_keyword); - break; - } - return fields; -} +static Boolean IsFieldTypeProteinDesc (FieldTypePtr ft); -NLM_EXTERN ValNodePtr GetAECRSampleListForSeqEntry (Uint1 field_type, SeqEntryPtr sep) +NLM_EXTERN Int4 DoConvertActionToObjectListEx (ConvertActionPtr action, ValNodePtr object_list, Boolean also_change_mrna, StringConstraintPtr scp, BatchExtraPtr batch_extra) { - ValNodePtr object_list; - ValNodePtr fields = NULL, vnp; - ValNodePtr list = NULL; - AECRSamplePtr sample; - BatchExtraPtr batch_extra; + ValNodePtr vnp; + Int4 num_succeed = 0, num_fail = 0; + CharPtr str, from_val, field_name = NULL; + FieldTypePtr field_from, field_to; + StringConstraint remove_constraint; + Boolean fail; - object_list = GetObjectListForFieldType (field_type, sep); + if (action == NULL || object_list == NULL || action->fields == NULL) return 0; - /* get a list of the fields that are appropriate for the objects collected */ - fields = GetFieldListForFieldType (field_type, sep); + field_from = GetFromFieldFromFieldPair (action->fields); + field_to = GetToFieldFromFieldPair (action->fields); - batch_extra = BatchExtraNew (); - for (vnp = fields; vnp != NULL; vnp = vnp->next) { - InitBatchExtraForField (batch_extra, vnp, sep); - } - for (vnp = fields; vnp != NULL; vnp = vnp->next) { - sample = GetAECRSampleFromObjectListEx (object_list, vnp, batch_extra); - if (sample != NULL && sample->num_found > 0) { - ValNodeAddPointer (&list, 0, sample); - } else { - sample = AECRSampleFree (sample); - } + if (action->strip_name) { + field_name = SummarizeFieldType (field_to); } - batch_extra = BatchExtraFree (batch_extra); - fields = FieldTypeListFree (fields); - - object_list = FreeObjectList (object_list); - return list; -} - - -NLM_EXTERN ValNodePtr GetAECRSampleList (AECRActionPtr act, SeqEntryPtr sep) -{ - Uint1 field_type; - Uint2 entityID; - ValNodePtr object_list; - ValNodePtr fields = NULL, vnp; - ValNodePtr list = NULL; - AECRSamplePtr sample; - BatchExtraPtr batch_extra; - - batch_extra = BatchExtraNew (); - InitBatchExtraForAECRAction (batch_extra, act, sep); - - field_type = FieldTypeFromAECRAction (act); - if (field_type == FieldType_cds_gene_prot) { - entityID = ObjMgrGetEntityIDForChoice(sep); - object_list = BuildCGPSetList (entityID, act); + if (action->fields->choice == FieldPairType_molinfo_field) { + for (vnp = object_list; vnp != NULL; vnp = vnp->next) { + str = GetFieldValueForObjectEx (vnp->choice, vnp->data.ptrvalue, field_from, NULL, batch_extra); + from_val = GetSequenceQualValName (field_from->data.ptrvalue); + if (StringCmp (str, from_val) == 0 + && SetFieldValueForObjectEx (vnp->choice, vnp->data.ptrvalue, field_to, NULL, str, ExistingTextOption_replace_old, batch_extra)) { + num_succeed ++; + } + str = MemFree (str); + } } else { - object_list = GetObjectListForAECRActionEx (sep, act, batch_extra); - } - - /* get fields used in action */ - fields = GetFieldTypeListFromAECRAction (act); - - for (vnp = fields; vnp != NULL; vnp = vnp->next) { - sample = GetAECRSampleFromObjectListEx (object_list, vnp, batch_extra); - if (sample != NULL && sample->num_found > 0) { - ValNodeAddPointer (&list, 0, sample); - } else { - sample = AECRSampleFree (sample); + for (vnp = object_list; vnp != NULL; vnp = vnp->next) { + /* there may be multiple qualifiers */ + MemSet (&remove_constraint, 0, sizeof (StringConstraint)); + remove_constraint.case_sensitive = TRUE; + remove_constraint.match_location = String_location_equals; + remove_constraint.not_present = FALSE; + remove_constraint.whole_word = FALSE; + fail = FALSE; + + while ((str = GetFieldValueForObjectEx (vnp->choice, vnp->data.ptrvalue, field_from, scp, batch_extra)) != NULL && !fail) { + remove_constraint.match_text = StringSave (str); + if (action->strip_name) { + RemoveFieldNameFromString (field_name, str); + } + FixCapitalizationInString(&str, action->capitalization, NULL); + + if (SetFieldValueForObjectEx (vnp->choice, vnp->data.ptrvalue, field_to, NULL, str, action->existing_text, batch_extra) + && (action->keep_original || RemoveFieldValueForObject (vnp->choice, vnp->data.ptrvalue, field_from, &remove_constraint))) { + if (also_change_mrna) { + AlsoChangeMrnaForObject (vnp->choice, vnp->data.ptrvalue); + } + num_succeed ++; + } else { + num_fail++; + fail = TRUE; + } + str = MemFree (str); + remove_constraint.match_text = MemFree (remove_constraint.match_text); + if (action->keep_original) { + break; + } + } } } - fields = FieldTypeListFree (fields); - - batch_extra = BatchExtraFree (batch_extra); + field_from = FieldTypeFree (field_from); + field_to = FieldTypeFree (field_to); + field_name = MemFree (field_name); - FreeObjectList (object_list); - return list; + return num_succeed; } -NLM_EXTERN AECRSamplePtr GetFieldSampleFromList (ValNodePtr list, FieldTypePtr field) +NLM_EXTERN Int4 DoConvertActionToObjectList (ConvertActionPtr action, ValNodePtr object_list, Boolean also_change_mrna, StringConstraintPtr scp) { - AECRSamplePtr sample = NULL; - - while (list != NULL && sample == NULL) { - sample = list->data.ptrvalue; - if (sample != NULL && !DoFieldTypesMatch (sample->field, field)) { - sample = NULL; - } - list = list->next; - } - return sample; + return DoConvertActionToObjectListEx (action, object_list, also_change_mrna, scp, NULL); } -static void RemoveFieldsForWhichThereAreNoData (ValNodePtr PNTR field_list, ValNodePtr object_list) +NLM_EXTERN Int4 DoCopyActionToObjectListEx (CopyActionPtr action, ValNodePtr object_list, Boolean also_change_mrna, StringConstraintPtr scp, BatchExtraPtr batch_extra) { - ValNodePtr vnp_prev = NULL, vnp_f, vnp_next; - AECRSamplePtr sample; + ValNodePtr vnp; + Int4 num_succeed = 0, num_fail = 0; + CharPtr str; + FieldTypePtr field_from, field_to; - if (field_list == NULL || *field_list == NULL) { - return; - } + if (action == NULL || object_list == NULL) return 0; + field_from = GetFromFieldFromFieldPair (action->fields); + field_to = GetToFieldFromFieldPair (action->fields); - vnp_prev = NULL; - vnp_f = *field_list; - while (vnp_f != NULL) { - vnp_next = vnp_f->next; - if (vnp_f->choice == FieldType_source_qual - || vnp_f->choice == FieldType_feature_field - || vnp_f->choice == FieldType_rna_field) { - vnp_prev = vnp_f; - } else { - sample = GetAECRSampleFromObjectList (object_list, vnp_f); - if (sample == NULL || sample->num_found == 0) { - if (vnp_prev == NULL) { - *field_list = vnp_next; - } else { - vnp_prev->next = vnp_next; - } - vnp_f->next = NULL; - vnp_f = FieldTypeFree (vnp_f); - } else { - vnp_prev = vnp_f; + for (vnp = object_list; vnp != NULL; vnp = vnp->next) { + str = GetFieldValueForObjectEx (vnp->choice, vnp->data.ptrvalue, field_from, scp, batch_extra); + if (SetFieldValueForObjectEx (vnp->choice, vnp->data.ptrvalue, field_to, NULL, str, action->existing_text, batch_extra)) { + if (also_change_mrna) { + AlsoChangeMrnaForObject (vnp->choice, vnp->data.ptrvalue); } - sample = AECRSampleFree (sample); + num_succeed ++; + } else { + num_fail++; } - vnp_f = vnp_next; + str = MemFree (str); } + + field_from = FieldTypeFree (field_from); + field_to = FieldTypeFree (field_to); + return num_succeed; } -NLM_EXTERN void GetAECRExistingTextList (Uint1 field_type, SeqEntryPtr sep, FILE *fp) +NLM_EXTERN Int4 DoCopyActionToObjectList (CopyActionPtr action, ValNodePtr object_list, Boolean also_change_mrna, StringConstraintPtr scp) { - ValNodePtr object_list, vnp_f, vnp_o; - ValNodePtr fields = NULL; - BioseqPtr bsp; - Char id_buf[255]; - CharPtr txt1 = NULL; - - object_list = GetObjectListForFieldType (field_type, sep); + return DoCopyActionToObjectListEx (action, object_list, also_change_mrna, scp, NULL); +} - /* get a list of the fields that are appropriate for the objects collected */ - fields = GetFieldListForFieldType (field_type, sep); +NLM_EXTERN Int4 DoSwapActionToObjectListEx (SwapActionPtr action, ValNodePtr object_list, Boolean also_change_mrna, StringConstraintPtr scp, BatchExtraPtr batch_extra) +{ + ValNodePtr vnp; + Int4 num_succeed = 0, num_fail = 0; + CharPtr str1, str2; + FieldTypePtr field_from, field_to; - /* remove fields for which there is no data */ - RemoveFieldsForWhichThereAreNoData (&fields, object_list); + if (action == NULL || object_list == NULL) return 0; + field_from = GetFromFieldFromFieldPair (action->fields); + field_to = GetToFieldFromFieldPair (action->fields); - /* add header */ - fprintf (fp, "Accession"); - for (vnp_f = fields; vnp_f != NULL; vnp_f = vnp_f->next) { - txt1 = SummarizeFieldType (vnp_f); - fprintf (fp, "\t%s", txt1); - txt1 = MemFree (txt1); - } - fprintf (fp, "\n"); - - for (vnp_o = object_list; vnp_o != NULL; vnp_o = vnp_o->next) { - bsp = GetSequenceForObject (vnp_o->choice, vnp_o->data.ptrvalue); - if (bsp == NULL) { - id_buf[0] = 0; + for (vnp = object_list; vnp != NULL; vnp = vnp->next) { + str1 = GetFieldValueForObjectEx (vnp->choice, vnp->data.ptrvalue, field_from, scp, batch_extra); + str2 = GetFieldValueForObjectEx (vnp->choice, vnp->data.ptrvalue, field_to, NULL, batch_extra); + if (SetFieldValueForObjectEx (vnp->choice, vnp->data.ptrvalue, field_to, NULL, str1, ExistingTextOption_replace_old, batch_extra) + && SetFieldValueForObjectEx (vnp->choice, vnp->data.ptrvalue, field_from, scp, str2, ExistingTextOption_replace_old, batch_extra)) { + if (also_change_mrna) { + AlsoChangeMrnaForObject (vnp->choice, vnp->data.ptrvalue); + } + num_succeed ++; } else { - SeqIdWrite (SeqIdFindBest (bsp->id, SEQID_GENBANK), id_buf, PRINTID_REPORT, sizeof (id_buf) - 1); - } - fprintf (fp, "%s", id_buf); - for (vnp_f = fields; vnp_f != NULL; vnp_f = vnp_f->next) { - txt1 = GetFieldValueForObject (vnp_o->choice, vnp_o->data.ptrvalue, vnp_f, NULL); - fprintf (fp, "\t%s", txt1 == NULL ? "" : txt1); - txt1 = MemFree (txt1); + num_fail++; } - fprintf (fp, "\n"); + str1 = MemFree (str1); + str2 = MemFree (str2); } - - fields = FieldTypeListFree (fields); - - object_list = FreeObjectList (object_list); + field_from = FieldTypeFree (field_from); + field_to = FieldTypeFree (field_to); + return num_succeed; } -NLM_EXTERN void ExportFieldTable (Uint1 field_type, ValNodePtr src_field_list, SeqEntryPtr sep, FILE *fp) +NLM_EXTERN Int4 DoSwapActionToObjectList (SwapActionPtr action, ValNodePtr object_list, Boolean also_change_mrna, StringConstraintPtr scp) { - ValNodePtr object_list, vnp_f, vnp_o; - ValNodePtr fields = NULL; - BioseqPtr bsp; - Char id_buf[255]; - CharPtr txt1 = NULL, title; - SeqDescrPtr sdp, pub_sdp; - SeqMgrDescContext context, pub_context; + return DoSwapActionToObjectListEx (action, object_list, also_change_mrna, scp, NULL); +} - if (field_type == 0) { - object_list = GetObjectListForFieldType (FieldType_source_qual, sep); - } else if (field_type == FieldType_misc) { - object_list = CollectDeflineDescriptors (sep); - ValNodeAddInt (&fields, FieldType_misc, Misc_field_defline); - } else if (field_type == FieldType_pub) { - object_list = GetObjectListForFieldType (FieldType_source_qual, sep); - /* only get publication titles */ - ValNodeAddInt (&fields, FieldType_pub, Publication_field_title); - } else { - object_list = GetObjectListForFieldType (field_type, sep); - /* get a list of the fields that are appropriate for the objects collected */ - fields = GetFieldListForFieldType (field_type, sep); - /* remove fields for which there is no data */ - RemoveFieldsForWhichThereAreNoData (&fields, object_list); - } +NLM_EXTERN Int4 DoRemoveActionToObjectList (RemoveActionPtr action, ValNodePtr object_list, Boolean also_change_mrna, StringConstraintPtr scp) +{ + ValNodePtr vnp; + Int4 num_succeed = 0, num_fail = 0; - /* add header */ - /* accession is first column */ - fprintf (fp, "Accession"); - /* list source fields first */ - for (vnp_f = src_field_list; vnp_f != NULL; vnp_f = vnp_f->next) { - txt1 = SummarizeFieldType (vnp_f); - fprintf (fp, "\t%s", txt1); - txt1 = MemFree (txt1); - } - /* list fields */ - for (vnp_f = fields; vnp_f != NULL; vnp_f = vnp_f->next) { - txt1 = SummarizeFieldType (vnp_f); - fprintf (fp, "\t%s", txt1); - txt1 = MemFree (txt1); - } - fprintf (fp, "\n"); - - for (vnp_o = object_list; vnp_o != NULL; vnp_o = vnp_o->next) { - bsp = GetSequenceForObject (vnp_o->choice, vnp_o->data.ptrvalue); - if (bsp == NULL) { - id_buf[0] = 0; - } else { - SeqIdWrite (SeqIdFindBest (bsp->id, SEQID_GENBANK), id_buf, PRINTID_REPORT, sizeof (id_buf) - 1); - } + if (action == NULL || object_list == NULL) return 0; - if (field_type == FieldType_pub) { - for (pub_sdp = SeqMgrGetNextDescriptor (bsp, NULL, Seq_descr_pub, &pub_context); - pub_sdp != NULL; - pub_sdp = SeqMgrGetNextDescriptor (bsp, pub_sdp, Seq_descr_pub, &pub_context)) { - - /* Get Publication Title */ - title = GetFieldValueForObject (OBJ_SEQDESC, pub_sdp, fields, NULL); - if (!StringHasNoText (title)) { - /* print accession */ - fprintf (fp, "%s", id_buf); - /* print source fields */ - if (src_field_list != NULL) { - sdp = SeqMgrGetNextDescriptor (bsp, NULL, Seq_descr_source, &context); - for (vnp_f = src_field_list; vnp_f != NULL; vnp_f = vnp_f->next) { - txt1 = GetFieldValueForObject (OBJ_SEQDESC, sdp, vnp_f, NULL); - fprintf (fp, "\t%s", txt1 == NULL ? "" : txt1); - txt1 = MemFree (txt1); - } - } - /* print publication title */ - fprintf (fp, "\t%s", title == NULL ? "" : title); - fprintf (fp, "\n"); - } - title = MemFree (title); + for (vnp = object_list; vnp != NULL; vnp = vnp->next) { + if (RemoveFieldValueForObject (vnp->choice, vnp->data.ptrvalue, action->field, scp)) { + if (also_change_mrna) { + AlsoChangeMrnaForObject (vnp->choice, vnp->data.ptrvalue); } + num_succeed ++; } else { - /* print accession */ - fprintf (fp, "%s", id_buf); - /* print source fields */ - if (src_field_list != NULL) { - sdp = SeqMgrGetNextDescriptor (bsp, NULL, Seq_descr_source, &context); - for (vnp_f = src_field_list; vnp_f != NULL; vnp_f = vnp_f->next) { - txt1 = GetFieldValueForObject (OBJ_SEQDESC, sdp, vnp_f, NULL); - fprintf (fp, "\t%s", txt1 == NULL ? "" : txt1); - txt1 = MemFree (txt1); - } - } - /* get requested fields */ - for (vnp_f = fields; vnp_f != NULL; vnp_f = vnp_f->next) { - txt1 = GetFieldValueForObject (vnp_o->choice, vnp_o->data.ptrvalue, vnp_f, NULL); - fprintf (fp, "\t%s", txt1 == NULL ? "" : txt1); - txt1 = MemFree (txt1); - } - fprintf (fp, "\n"); + num_fail++; } } + return num_succeed; +} - fields = FieldTypeListFree (fields); - object_list = FreeObjectList (object_list); +NLM_EXTERN Int4 DoParseActionToObjectListEx (AECRParseActionPtr action, ValNodePtr object_list, Boolean also_change_mrna, StringConstraintPtr scp, BatchExtraPtr batch_extra) +{ + ValNodePtr vnp; + CharPtr str1, str2, str3, cp, tmp; + Int4 len, num_succeed = 0, diff, left_len, right_len; + FieldTypePtr field_from, field_to; + + if (action == NULL || object_list == NULL) return 0; + field_from = GetFromFieldFromFieldPair (action->fields); + field_to = GetToFieldFromFieldPair (action->fields); + + for (vnp = object_list; vnp != NULL; vnp = vnp->next) { + str1 = GetFieldValueForObjectEx (vnp->choice, vnp->data.ptrvalue, field_from, scp, batch_extra); + str2 = GetTextPortionFromString (str1, action->portion); + str3 = StringSave (str2); + ApplyTextTransformsToString (&str3, action->transform); + if (str3 != NULL) { + if (action->remove_from_parsed) { + cp = FindTextPortionLocationInString (str1, action->portion); + if (cp != NULL) { + len = StringLen (str2); + tmp = cp; + if (action->portion != NULL && !IsTextMarkerEmpty (action->portion->left_marker) + && (tmp = FindTextMarker (str1, &left_len, action->portion->left_marker, + action->portion->case_sensitive, action->portion->whole_word)) != NULL) { + if (action->portion->include_left) { + /* adjust */ + } else if (!action->portion->include_left) { + /* adjust */ + if (action->remove_left) { + len += left_len; + } else { + cp += left_len; + } + } + } + if (!IsTextMarkerEmpty (action->portion->right_marker) + && action->remove_right + && !action->portion->include_right + && action->portion != NULL + && (tmp = FindTextMarker (tmp, &right_len, action->portion->right_marker, + action->portion->case_sensitive, action->portion->whole_word)) != NULL) { + diff = right_len; + len += diff; + } + StringCpy (cp, cp + len); + SetFieldValueForObjectEx (vnp->choice, vnp->data.ptrvalue, field_from, scp, str1, ExistingTextOption_replace_old, batch_extra); + } + } + if (SetFieldValueForObjectEx (vnp->choice, vnp->data.ptrvalue, field_to, NULL, str3, action->existing_text, batch_extra)) { + if (also_change_mrna) { + AlsoChangeMrnaForObject (vnp->choice, vnp->data.ptrvalue); + } + num_succeed++; + } + } + str1 = MemFree (str1); + str2 = MemFree (str2); + str3 = MemFree (str3); + } + field_from = FieldTypeFree (field_from); + field_to = FieldTypeFree (field_to); + return num_succeed; } -/* This section handles parsing where the source field and destination field may not be on the same - * group of objects. */ -typedef struct parsesourceinfo +NLM_EXTERN Int4 DoParseActionToObjectList (AECRParseActionPtr action, ValNodePtr object_list, Boolean also_change_mrna, StringConstraintPtr scp) { - BioseqPtr bsp; - SeqFeatPtr sfp; - SeqDescrPtr sdp; - SeqIdPtr sip; - ValNodePtr dest_list; - CharPtr parse_src_txt; -} ParseSourceInfoData, PNTR ParseSourceInfoPtr; + return DoParseActionToObjectListEx (action, object_list, also_change_mrna, scp, NULL); +} -static ParseSourceInfoPtr ParseSourceInfoNew (BioseqPtr bsp, SeqFeatPtr sfp, SeqDescrPtr sdp, SeqIdPtr sip, CharPtr parse_src_txt) + +static Int4 ApplyAECRActionToSeqEntry (AECRActionPtr act, SeqEntryPtr sep, BoolPtr created_protein_features) { - ParseSourceInfoPtr psip; + StringConstraintPtr scp; + ApplyActionPtr a; + ConvertActionPtr c; + RemoveActionPtr r; + EditActionPtr e; + ValNodePtr object_list = NULL; + Uint1 field_type; + Uint2 entityID; + Int4 num_succeed = 0; + FieldTypePtr field_from; + BatchExtraPtr batch_extra; - psip = (ParseSourceInfoPtr) MemNew (sizeof (ParseSourceInfoData)); - if (psip != NULL) { - psip->bsp = bsp; - psip->sdp = sdp; - psip->sfp = sfp; - psip->sip = sip; - psip->dest_list = NULL; - psip->parse_src_txt = parse_src_txt; - } - return psip; + if (act == NULL || act->action == NULL) return 0; + + batch_extra = BatchExtraNew (); + InitBatchExtraForAECRAction (batch_extra, act, sep); + + field_type = FieldTypeFromAECRAction (act); + if (field_type == FieldType_cds_gene_prot) { + entityID = ObjMgrGetEntityIDForChoice(sep); + object_list = BuildCGPSetList (entityID, act, created_protein_features); + + } else { + object_list = GetObjectListForAECRActionEx (sep, act, batch_extra); + } + + + switch (act->action->choice) { + case ActionChoice_apply: + a = (ApplyActionPtr) act->action->data.ptrvalue; + scp = FindStringConstraintInConstraintSetForField (a->field, act->constraint); + num_succeed = DoApplyActionToObjectListEx (act->action->data.ptrvalue, object_list, act->also_change_mrna, scp, batch_extra); + if (a->field->choice == FieldType_misc) { + DeleteMarkedObjects (ObjMgrGetEntityIDForChoice(sep), 0, NULL); + } + break; + case ActionChoice_edit: + e = (EditActionPtr) act->action->data.ptrvalue; + num_succeed = DoEditActionToObjectListEx (e, object_list, act->also_change_mrna, batch_extra); + if (e->field->choice == FieldType_misc) { + DeleteMarkedObjects (ObjMgrGetEntityIDForChoice(sep), 0, NULL); + } + break; + case ActionChoice_convert: + scp = NULL; + if (act->constraint != NULL) { + c = (ConvertActionPtr) act->action->data.ptrvalue; + field_from = GetFromFieldFromFieldPair (c->fields); + scp = FindStringConstraintInConstraintSetForField (field_from, act->constraint); + field_from = FieldTypeFree (field_from); + } + num_succeed = DoConvertActionToObjectListEx (act->action->data.ptrvalue, object_list, act->also_change_mrna, scp, batch_extra); + break; + case ActionChoice_swap: + num_succeed = DoSwapActionToObjectListEx (act->action->data.ptrvalue, object_list, act->also_change_mrna, NULL, batch_extra); + break; + case ActionChoice_copy: + num_succeed = DoCopyActionToObjectListEx (act->action->data.ptrvalue, object_list, act->also_change_mrna, NULL, batch_extra); + break; + case ActionChoice_remove: + r = (RemoveActionPtr) act->action->data.ptrvalue; + scp = FindStringConstraintInConstraintSetForField (r->field, act->constraint); + num_succeed = DoRemoveActionToObjectList (act->action->data.ptrvalue, object_list, act->also_change_mrna, scp); + if (r->field->choice == FieldType_misc) { + DeleteMarkedObjects (ObjMgrGetEntityIDForChoice(sep), 0, NULL); + } + break; + case ActionChoice_parse: + num_succeed = DoParseActionToObjectListEx (act->action->data.ptrvalue, object_list, act->also_change_mrna, NULL, batch_extra); + break; + } + object_list = FreeObjectList (object_list); + batch_extra = BatchExtraFree (batch_extra); + return num_succeed; } -static ParseSourceInfoPtr ParseSourceInfoFree (ParseSourceInfoPtr psip) +static AECRSamplePtr AECRSampleNew (void) { - if (psip != NULL) - { - psip->dest_list = ValNodeFree (psip->dest_list); - psip->parse_src_txt = MemFree (psip->parse_src_txt); - psip = MemFree (psip); - } - return psip; + AECRSamplePtr sample; + + sample = (AECRSamplePtr) MemNew (sizeof (AECRSampleData)); + MemSet (sample, 0, sizeof (AECRSampleData)); + sample->all_same = TRUE; + return sample; } -static ParseSourceInfoPtr ParseSourceInfoCopy (ParseSourceInfoPtr psip) + +NLM_EXTERN AECRSamplePtr AECRSampleFree (AECRSamplePtr sample) { - ParseSourceInfoPtr pcopy = NULL; - - if (psip != NULL) - { - pcopy = (ParseSourceInfoPtr) MemNew (sizeof (ParseSourceInfoData)); - if (pcopy != NULL) { - pcopy->bsp = psip->bsp; - pcopy->sfp = psip->sfp; - pcopy->sdp = psip->sdp; - pcopy->sip = psip->sip; - pcopy->dest_list = NULL; - pcopy->parse_src_txt = NULL; - } + if (sample != NULL) { + sample->field = FieldTypeFree (sample->field); + sample->first_value = MemFree (sample->first_value); + sample = MemFree (sample); } - return pcopy; + return sample; } -static ValNodePtr ParseSourceListFree (ValNodePtr vnp) + +NLM_EXTERN ValNodePtr AECRSampleListFree (ValNodePtr list) { - ValNodePtr vnp_next; - while (vnp != NULL) { - vnp_next = vnp->next; - vnp->next = NULL; - vnp->data.ptrvalue = ParseSourceInfoFree (vnp->data.ptrvalue); - vnp = ValNodeFree (vnp); - vnp = vnp_next; + ValNodePtr list_next; + + while (list != NULL) { + list_next = list->next; + list->next = NULL; + list->data.ptrvalue = AECRSampleFree (list->data.ptrvalue); + list = ValNodeFree (list); + list = list_next; } - return vnp; + return list; } -static void -GetDeflineSourcesForBioseq -(BioseqPtr bsp, - TextPortionPtr portion, - ValNodePtr PNTR source_list) +static void AddTextToAECRSample (AECRSamplePtr sample, CharPtr txt) { - SeqDescrPtr sdp; - SeqMgrDescContext dcontext; - CharPtr str; - ParseSourceInfoPtr psip; - - if (bsp == NULL || source_list == NULL) - { - return; - } - - sdp = SeqMgrGetNextDescriptor (bsp, NULL, Seq_descr_title, &dcontext); - while (sdp != NULL) - { - str = GetTextPortionFromString (sdp->data.ptrvalue, portion); - if (str != NULL) { - psip = ParseSourceInfoNew (bsp, NULL, sdp, NULL, str); - if (psip != NULL) { - ValNodeAddPointer (source_list, 0, psip); - } else { - str = MemFree (str); + if (StringHasNoText (txt)) { + txt = MemFree (txt); + } else if (sample != NULL) { + sample->num_found ++; + if (sample->first_value == NULL) { + sample->first_value = txt; + } else { + if (sample->all_same && StringCmp (sample->first_value, txt) != 0) { + sample->all_same = FALSE; } + txt = MemFree (txt); } - sdp = SeqMgrGetNextDescriptor (bsp, sdp, Seq_descr_title, &dcontext); } } -static CharPtr GetIDSrc (SeqIdPtr sip, Uint1 id_type, CharPtr tag) +NLM_EXTERN AECRSamplePtr GetAECRSampleFromObjectListEx (ValNodePtr object_list, FieldTypePtr field, BatchExtraPtr batch_extra) { - DbtagPtr dbt = NULL; - ObjectIdPtr oip = NULL; - Char id_str[128]; - CharPtr str_src = NULL; + AECRSamplePtr sample; + ValNodePtr vnp, prot_vnp, bsp_list; + CharPtr txt; + CGPSetPtr cgp; + SeqFeatPtr sfp; + BatchExtraPtr b = NULL; + SeqEntryPtr sep; - if (sip == NULL || sip->choice != id_type) return NULL; + if (object_list == NULL || field == NULL) return NULL; - if (id_type == SEQID_GENERAL) - { - dbt = (DbtagPtr) sip->data.ptrvalue; - if (dbt == NULL || (tag != NULL && StringCmp (dbt->db, tag) != 0)) return NULL; - oip = dbt->tag; - } - else if (id_type == SEQID_LOCAL) - { - oip = sip->data.ptrvalue; + if (batch_extra == NULL) { + b = BatchExtraNew (); + batch_extra = b; + bsp_list = BioseqListForObjectList (object_list); + for (vnp = bsp_list; vnp != NULL; vnp = vnp->next) { + sep = SeqMgrGetSeqEntryForData (vnp->data.ptrvalue); + InitBatchExtraForField (batch_extra, field, sep); + } + bsp_list = ValNodeFree (bsp_list); } - if (oip == NULL) - { - SeqIdWrite (sip, id_str, PRINTID_REPORT, sizeof (id_str)); - str_src = StringSave (id_str); - } - else - { - if (oip->str == NULL) - { - sprintf (id_str, "%d", oip->id); - str_src = StringSave (id_str); - } - else - { - str_src = StringSave (oip->str); + sample = AECRSampleNew (); + sample->field = FieldTypeCopy (field); + for (vnp = object_list; vnp != NULL; vnp = vnp->next) { + if (vnp->choice == 0 && IsFieldTypeMatPeptideRelated (field)) { + cgp = (CGPSetPtr) vnp->data.ptrvalue; + if (cgp != NULL) { + for (prot_vnp = cgp->prot_list; prot_vnp != NULL; prot_vnp = prot_vnp->next) { + sfp = (SeqFeatPtr) prot_vnp->data.ptrvalue; + if (sfp != NULL && sfp->idx.subtype == FEATDEF_mat_peptide_aa) { + txt = GetFieldValueForObjectEx (OBJ_SEQFEAT, sfp, field, NULL, batch_extra); + AddTextToAECRSample (sample, txt); + } + } + } + } else { + txt = GetFieldValueForObjectEx (vnp->choice, vnp->data.ptrvalue, field, NULL, batch_extra); + AddTextToAECRSample (sample, txt); } } - return str_src; + + b = BatchExtraFree (b); + return sample; } -static void -GetIDSourcesForBioseq -(BioseqPtr bsp, - TextPortionPtr portion, - Uint1 id_type, - CharPtr tag, - ValNodePtr PNTR source_list) +NLM_EXTERN AECRSamplePtr GetAECRSampleFromObjectList (ValNodePtr object_list, FieldTypePtr field) { - SeqIdPtr sip; - ParseSourceInfoPtr psip; - CharPtr src_str = NULL, str; + return GetAECRSampleFromObjectListEx (object_list, field, NULL); +} + + +static void GetFieldsFromAECR (AECRActionPtr act, FieldTypePtr PNTR pField, ValNodePtr PNTR pFieldPair) +{ + ApplyActionPtr a; + EditActionPtr e; + ConvertActionPtr c; + SwapActionPtr s; + CopyActionPtr cp; + RemoveActionPtr r; + AECRParseActionPtr p; - if (bsp == NULL || source_list == NULL) - { + if (pField != NULL) { + *pField = NULL; + } + if (pFieldPair != NULL) { + *pFieldPair = NULL; + } + if (act == NULL || act->action == NULL || act->action->data.ptrvalue == NULL) { return; } - - sip = bsp->id; - while (sip != NULL) - { - if ((src_str = GetIDSrc (sip, id_type, tag)) != NULL) { - str = GetTextPortionFromString (src_str, portion); - if (str != NULL) { - psip = ParseSourceInfoNew (bsp, NULL, NULL, sip, str); - if (psip != NULL) { - ValNodeAddPointer (source_list, 0, psip); - } else { - str = MemFree (str); - } + + switch (act->action->choice) { + case ActionChoice_apply: + if (pField != NULL) { + a = (ApplyActionPtr) act->action->data.ptrvalue; + *pField = a->field; } - src_str = MemFree (src_str); - } - sip = sip->next; + break; + case ActionChoice_edit: + if (pField != NULL) { + e = (EditActionPtr) act->action->data.ptrvalue; + *pField = e->field; + } + break; + case ActionChoice_convert: + if (pFieldPair != NULL) { + c = (ConvertActionPtr) act->action->data.ptrvalue; + *pFieldPair = c->fields; + } + break; + case ActionChoice_swap: + if (pFieldPair != NULL) { + s = (SwapActionPtr) act->action->data.ptrvalue; + *pFieldPair = s->fields; + } + break; + case ActionChoice_copy: + if (pFieldPair != NULL) { + cp = (CopyActionPtr) act->action->data.ptrvalue; + *pFieldPair = cp->fields; + } + break; + case ActionChoice_remove: + if (pField != NULL) { + r = (RemoveActionPtr) act->action->data.ptrvalue; + *pField = r->field; + } + break; + case ActionChoice_parse: + if (pFieldPair != NULL) { + p = (AECRParseActionPtr) act->action->data.ptrvalue; + *pFieldPair = p->fields; + } + break; } } -static void -GetLocalIDSourcesForBioseq -(BioseqPtr bsp, - TextPortionPtr tp, - ValNodePtr PNTR source_list) +NLM_EXTERN ValNodePtr LIBCALLBACK FieldTypeListFree (ValNodePtr list) { - GetIDSourcesForBioseq (bsp, tp, SEQID_LOCAL, NULL, source_list); -} - + ValNodePtr list_next; -static void GetNcbiFileSourceForBioseq -(BioseqPtr bsp, - TextPortionPtr tp, - ValNodePtr PNTR source_list) -{ - GetIDSourcesForBioseq (bsp, tp, SEQID_GENERAL, "NCBIFILE", source_list); + while (list != NULL) { + list_next = list->next; + list->next = NULL; + list = FieldTypeFree (list); + list = list_next; + } + return list; } -static void StripBankitCommentForParse (SeqDescrPtr sdp, TextPortionPtr tp) +NLM_EXTERN ValNodePtr LIBCALLBACK FieldTypeListCopy (ValNodePtr orig) { - UserObjectPtr uop; - ObjectIdPtr oip; - UserFieldPtr ufp; - - if (sdp == NULL || sdp->choice != Seq_descr_user || tp == NULL) { - return; - } - - /* Bankit Comments */ - uop = (UserObjectPtr) sdp->data.ptrvalue; - if (uop != NULL && StringCmp (uop->_class, "SMART_V1.0") != 0) { - oip = uop->type; - if (oip != NULL && StringCmp (oip->str, "Submission") == 0) { - for (ufp = uop->data; ufp != NULL; ufp = ufp->next) { - oip = ufp->label; - if (oip != NULL && StringCmp (oip->str, "AdditionalComment") == 0) { - ReplaceStringForParse (ufp->data.ptrvalue, tp); - } - } + ValNodePtr prev = NULL, new_list = NULL, vnp; + + while (orig != NULL) { + vnp = FieldTypeCopy (orig); + if (prev == NULL) { + new_list = vnp; + } else { + prev->next = vnp; } + prev = vnp; + orig = orig->next; } + return new_list; } -static void StripStructuredCommentForParse (SeqDescrPtr sdp, CharPtr comment_field, TextPortionPtr tp) +static int LIBCALLBACK SortVnpByChoiceAndIntvalue (VoidPtr ptr1, VoidPtr ptr2) + { - UserObjectPtr uop; - ObjectIdPtr oip; - UserFieldPtr ufp; + ValNodePtr vnp1; + ValNodePtr vnp2; + int rval = 0; - if (sdp == NULL || sdp->choice != Seq_descr_user || tp == NULL || StringHasNoText (comment_field)) { - return; - } - - uop = (UserObjectPtr) sdp->data.ptrvalue; - if (IsUserObjectStructuredComment (uop)) { - for (ufp = uop->data; ufp != NULL; ufp = ufp->next) { - oip = ufp->label; - if (oip != NULL && StringCmp (oip->str, comment_field) == 0) { - ReplaceStringForParse (ufp->data.ptrvalue, tp); - } + if (ptr1 != NULL && ptr2 != NULL) { + vnp1 = *((ValNodePtr PNTR) ptr1); + vnp2 = *((ValNodePtr PNTR) ptr2); + if (vnp1 == NULL && vnp2 == NULL) { + rval = 0; + } else if (vnp1 == NULL) { + rval = -1; + } else if (vnp2 == NULL) { + rval = 1; + } else if (vnp1->choice > vnp2->choice) { + rval = 1; + } else if (vnp1->choice < vnp2->choice) { + rval = -1; + } else if (vnp1->data.intvalue > vnp2->data.intvalue) { + rval = 1; + } else if (vnp1->data.intvalue < vnp2->data.intvalue) { + rval = -1; + } else { + rval = 0; } } + return rval; } -static void -GetBankitCommentSourcesForBioseq -(BioseqPtr bsp, - TextPortionPtr tp, - ValNodePtr PNTR source_list) -{ - SeqDescrPtr sdp; - SeqMgrDescContext dcontext; - ParseSourceInfoPtr psip; - UserObjectPtr uop; - ObjectIdPtr oip; - UserFieldPtr ufp; - CharPtr str = NULL; - - if (bsp == NULL || source_list == NULL) { - return; - } - - sdp = SeqMgrGetNextDescriptor (bsp, NULL, Seq_descr_user, &dcontext); - while (sdp != NULL) { - if (sdp->extended != 0) { - /* Bankit Comments */ - uop = (UserObjectPtr) sdp->data.ptrvalue; - if (uop != NULL && StringCmp (uop->_class, "SMART_V1.0") != 0) { - oip = uop->type; - if (oip != NULL && StringCmp (oip->str, "Submission") == 0) { - for (ufp = uop->data; ufp != NULL; ufp = ufp->next) { - oip = ufp->label; - if (oip != NULL && StringCmp (oip->str, "AdditionalComment") == 0) { - str = GetTextPortionFromString (ufp->data.ptrvalue, tp); - if (str != NULL) { - psip = ParseSourceInfoNew (bsp, NULL, sdp, NULL, str); - if (psip == NULL) { - str = MemFree (str); - } else { - ValNodeAddPointer (source_list, 0, psip); - } - } - } - } - } - } - } - sdp = SeqMgrGetNextDescriptor (bsp, sdp, Seq_descr_user, &dcontext); +/* Callback function used for sorting and uniqueing */ + +static int LIBCALLBACK SortVnpByFieldType (VoidPtr ptr1, VoidPtr ptr2) + +{ + ValNodePtr vnp1; + ValNodePtr vnp2; + int rval = 0; + + if (ptr1 != NULL && ptr2 != NULL) { + vnp1 = *((ValNodePtr PNTR) ptr1); + vnp2 = *((ValNodePtr PNTR) ptr2); + rval = CompareFieldTypes (vnp1, vnp2); } + + return rval; } -static void -GetCommentSourcesForBioseq -(BioseqPtr bsp, - TextPortionPtr tp, - ValNodePtr PNTR source_list) +static int LIBCALLBACK SortVnpByFieldTypeAndSourceQualifier (VoidPtr ptr1, VoidPtr ptr2) + { - SeqDescrPtr sdp; - SeqFeatPtr sfp; - SeqMgrFeatContext fcontext; - SeqMgrDescContext dcontext; - ParseSourceInfoPtr psip; - CharPtr str; - - if (bsp == NULL || source_list == NULL) { - return; - } - - sdp = SeqMgrGetNextDescriptor (bsp, NULL, Seq_descr_comment, &dcontext); - while (sdp != NULL) { - str = GetTextPortionFromString (sdp->data.ptrvalue, tp); - if (str != NULL) { - psip = ParseSourceInfoNew (bsp, NULL, sdp, NULL, str); - if (psip == NULL) { - str = MemFree (str); - } else { - ValNodeAddPointer (source_list, 0, psip); - } - } - sdp = SeqMgrGetNextDescriptor (bsp, sdp, Seq_descr_comment, &dcontext); - } - - sfp = SeqMgrGetNextFeature (bsp, NULL, SEQFEAT_COMMENT, 0, &fcontext); - while (sfp != NULL) { - str = GetTextPortionFromString (sfp->data.value.ptrvalue, tp); - if (str != NULL) { - psip = ParseSourceInfoNew (bsp, sfp, NULL, NULL, str); - if (psip == NULL) { - str = MemFree (str); - } else { - ValNodeAddPointer (source_list, 0, psip); - } - } - sfp = SeqMgrGetNextFeature (bsp, sfp, SEQFEAT_COMMENT, 0, &fcontext); + ValNodePtr vnp1; + ValNodePtr vnp2; + int rval = 0; + + if (ptr1 != NULL && ptr2 != NULL) { + vnp1 = *((ValNodePtr PNTR) ptr1); + vnp2 = *((ValNodePtr PNTR) ptr2); + rval = CompareFieldTypesEx (vnp1, vnp2, TRUE); } - GetBankitCommentSourcesForBioseq (bsp, tp, source_list); + + return rval; } -static void -GetStructuredCommentSourcesForBioseq -(BioseqPtr bsp, - TextPortionPtr tp, - CharPtr comment_field, - ValNodePtr PNTR source_list) +static void GetBioSourceFields (BioSourcePtr biop, Pointer userdata) { - SeqDescrPtr sdp; - UserObjectPtr uop; - ObjectIdPtr oip; - UserFieldPtr ufp; - SeqMgrDescContext dcontext; - CharPtr str; - ParseSourceInfoPtr psip; - - if (bsp == NULL || source_list == NULL) - { + + if (biop == NULL || userdata == NULL) { return; } - - sdp = SeqMgrGetNextDescriptor (bsp, NULL, Seq_descr_user, &dcontext); - while (sdp != NULL) { - if (sdp->extended != 0 - && sdp->data.ptrvalue != NULL) { - uop = (UserObjectPtr) sdp->data.ptrvalue; - if (IsUserObjectStructuredComment (uop)) { - for (ufp = uop->data; ufp != NULL; ufp = ufp->next) { - oip = ufp->label; - if (oip != NULL && StringCmp (oip->str, comment_field) == 0) { - str = GetTextPortionFromString (ufp->data.ptrvalue, tp); - if (str != NULL) { - psip = ParseSourceInfoNew (bsp, NULL, sdp, NULL, str); - if (psip == NULL) { - str = MemFree (str); - } else { - ValNodeAddPointer (source_list, 0, psip); - } - } - } - } - } - } - sdp = SeqMgrGetNextDescriptor (bsp, sdp, Seq_descr_user, &dcontext); - } + + ValNodeLink ((ValNodePtr PNTR) userdata, GetSourceQualFieldListFromBioSource (biop)); } -static void GetFlatFileSourcesForBioseq -(BioseqPtr bsp, - TextPortionPtr tp, - ValNodePtr PNTR source_list) +NLM_EXTERN void SortUniqueFieldTypeList (ValNodePtr PNTR field_list) +{ + if (field_list == NULL) return; + *field_list = ValNodeSort (*field_list, SortVnpByFieldType); + ValNodeUnique (field_list, SortVnpByFieldType, FieldTypeListFree); +} + +NLM_EXTERN ValNodePtr GetSourceQualSampleFieldList (SeqEntryPtr sep) { - SeqEntryPtr sep; - Asn2gbJobPtr ajp; - Int4 index; - ErrSev level; - CharPtr string, str; - ParseSourceInfoPtr psip; + ValNodePtr field_list = NULL; - if (bsp == NULL || source_list == NULL) - { - return; - } + VisitBioSourcesInSep (sep, &field_list, GetBioSourceFields); + field_list = ValNodeSort (field_list, SortVnpByFieldTypeAndSourceQualifier); + ValNodeUnique (&field_list, SortVnpByFieldTypeAndSourceQualifier, FieldTypeListFree); - sep = SeqMgrGetSeqEntryForData (bsp); - if (sep == NULL) { - return; + return field_list; +} + + +NLM_EXTERN ValNodePtr GetSourceQualSampleFieldListForSeqEntryList (ValNodePtr list) +{ + ValNodePtr field_list = NULL; + ValNodePtr vnp; + + if (list == NULL) { + return NULL; } - - level = ErrSetMessageLevel (SEV_MAX); - ajp = asn2gnbk_setup (bsp, NULL, NULL, (FmtType)GENBANK_FMT, SEQUIN_MODE, NORMAL_STYLE, 0, 0, 0, NULL); - if (ajp != NULL) { - for (index = 0; index < ajp->numParagraphs; index++) { - string = asn2gnbk_format (ajp, (Int4) index); - if (string != NULL && *string != '\0') { - CompressSpaces (string); - str = GetTextPortionFromString (string, tp); - if (str != NULL) { - psip = ParseSourceInfoNew (bsp, NULL, NULL, NULL, str); - if (psip == NULL) { - str = MemFree (str); - } else { - ValNodeAddPointer (source_list, 0, psip); - } - } - } - MemFree (string); - } - asn2gnbk_cleanup (ajp); + for (vnp = list; vnp != NULL; vnp = vnp->next) { + VisitBioSourcesInSep (vnp->data.ptrvalue, &field_list, GetBioSourceFields); } + field_list = ValNodeSort (field_list, SortVnpByFieldTypeAndSourceQualifier); + ValNodeUnique (&field_list, SortVnpByFieldTypeAndSourceQualifier, FieldTypeListFree); - ErrSetMessageLevel (level); + return field_list; } -const CharPtr nomial_keywords[] = { -"f. sp. ", -"var.", -"pv.", -"bv.", -"serovar", -"subsp." }; +static void GetFeatureQualFieldListForAECRSampleCallback (SeqFeatPtr sfp, Pointer data) +{ + ValNodePtr PNTR list; -const Int4 num_nomial_keywords = sizeof(nomial_keywords) / sizeof (CharPtr); + list = (ValNodePtr PNTR) data; + if (list == NULL || sfp == NULL) return; -static CharPtr GetTextAfterNomial (CharPtr taxname) + ValNodeLink (list, GetFieldListFromFeature (sfp)); +} +static ValNodePtr GetFeatureQualFieldList (SeqEntryPtr sep) { - CharPtr ptr, nomial_end; - Int4 i; - Boolean found_keyword = TRUE; - - ptr = StringChr (taxname, ' '); - if (ptr == NULL) return NULL; - /* skip over the first word and the spaces after it. */ - while (*ptr == ' ') { - ptr++; - } - ptr = StringChr (ptr, ' '); - /* if there are only two words, give up. */ - if (ptr == NULL) { - return NULL; - } - nomial_end = ptr; - while (*ptr == ' ') { - ptr++; - } - - while (found_keyword) { - found_keyword = FALSE; - /* if the next word is a nomial keyword, skip that plus the first word that follows it. */ - for (i = 0; i < num_nomial_keywords && *nomial_end != 0; i++) { - if (StringNCmp (ptr, nomial_keywords[i], StringLen(nomial_keywords[i])) == 0) { - ptr += StringLen(nomial_keywords[i]); - while (*ptr == ' ' ) { - ptr++; - } - nomial_end = StringChr (ptr, ' '); - if (nomial_end == NULL) { - nomial_end = ptr + StringLen (ptr); - } else { - ptr = nomial_end; - while (*ptr == ' ') { - ptr++; - } - found_keyword = TRUE; - } - } - } - } - return nomial_end; + ValNodePtr field_list = NULL; + + VisitFeaturesInSep (sep, &field_list, GetFeatureQualFieldListForAECRSampleCallback); + field_list = ValNodeSort (field_list, SortVnpByFieldType); + ValNodeUnique (&field_list, SortVnpByFieldType, FieldTypeListFree); + return field_list; } -static void -GetOrgParseSourcesForBioSource -(BioSourcePtr biop, - BioseqPtr bsp, - SeqDescrPtr sdp, - SeqFeatPtr sfp, - ParseSrcOrgPtr o, - TextPortionPtr tp, - ValNodePtr PNTR source_list) +static void GetRnaQualFieldListForAECRSampleCallback (SeqFeatPtr sfp, Pointer userdata) { - CharPtr str = NULL, portion, tmp; - ValNode vn; - ParseSourceInfoPtr psip; + RnaFeatTypePtr type; + RnaRefPtr rrp; + RnaQualPtr rq; + RNAGenPtr rgp; + GeneRefPtr grp = NULL; + SeqFeatPtr gene = NULL; + SeqMgrFeatContext fcontext; - if (biop == NULL || o == NULL || o->field == NULL || source_list == NULL) return; + if (sfp == NULL || sfp->data.choice != SEQFEAT_RNA || sfp->data.value.ptrvalue == NULL || userdata == NULL) { + return; + } - switch (o->field->choice) { - case ParseSrcOrgChoice_source_qual : - vn.choice = SourceQualChoice_textqual; - vn.data.intvalue = o->field->data.intvalue; - vn.next = NULL; - str = GetSourceQualFromBioSource (biop, &vn, NULL); - break; - case ParseSrcOrgChoice_taxname_after_binomial : - vn.choice = SourceQualChoice_textqual; - vn.data.intvalue = Source_qual_taxname; - vn.next = NULL; - str = GetSourceQualFromBioSource (biop, &vn, NULL); - tmp = GetTextAfterNomial (str); - tmp = StringSave (tmp); - str = MemFree (str); - str = tmp; - break; + rrp = (RnaRefPtr) sfp->data.value.ptrvalue; + + type = RnaFeatTypeFromSeqFeat (sfp); + + if (type == NULL) return; + + /* add product if appropriate */ + if ((type->choice == RnaFeatType_preRNA || type->choice == RnaFeatType_mRNA + || type->choice == RnaFeatType_rRNA || type->choice == RnaFeatType_miscRNA + || type->choice == RnaFeatType_any) + && rrp->ext.choice == 1 + && !StringHasNoText (rrp->ext.value.ptrvalue)) { + rq = RnaQualNew (); + rq->type = AsnIoMemCopy (type, (AsnReadFunc) RnaFeatTypeAsnRead, (AsnWriteFunc) RnaFeatTypeAsnWrite); + rq->field = Rna_field_product; + ValNodeAddPointer ((ValNodePtr PNTR) userdata, FieldType_rna_field, rq); + } else if (rrp->ext.choice == 3 && (rgp = (RNAGenPtr) rrp->ext.value.ptrvalue) != NULL + && !StringHasNoText (rgp->product)) { + rq = RnaQualNew (); + rq->type = AsnIoMemCopy (type, (AsnReadFunc) RnaFeatTypeAsnRead, (AsnWriteFunc) RnaFeatTypeAsnWrite); + rq->field = Rna_field_product; + ValNodeAddPointer ((ValNodePtr PNTR) userdata, FieldType_rna_field, rq); } - portion = GetTextPortionFromString (str, tp); - if (portion != NULL) { - psip = ParseSourceInfoNew (bsp, sfp, sdp, NULL, portion); - if (psip == NULL) { - portion = MemFree (portion); - } else { - ValNodeAddPointer (source_list, 0, psip); - } + + /* add comment if present */ + if (!StringHasNoText (sfp->comment)) { + rq = RnaQualNew (); + rq->type = AsnIoMemCopy (type, (AsnReadFunc) RnaFeatTypeAsnRead, (AsnWriteFunc) RnaFeatTypeAsnWrite); + rq->field = Rna_field_comment; + ValNodeAddPointer ((ValNodePtr PNTR) userdata, FieldType_rna_field, rq); } - str = MemFree (str); -} + /* add tRNA specific if appropriate */ + if (type->choice == RnaFeatType_tRNA || (type->choice == RnaFeatType_any && rrp->type == 2)) { + /* codons recognized */ + rq = RnaQualNew (); + rq->type = AsnIoMemCopy (type, (AsnReadFunc) RnaFeatTypeAsnRead, (AsnWriteFunc) RnaFeatTypeAsnWrite); + rq->field = Rna_field_codons_recognized; + ValNodeAddPointer ((ValNodePtr PNTR) userdata, FieldType_rna_field, rq); -static void GetOrgParseSourcesForBioseq (BioseqPtr bsp, ParseSrcOrgPtr o, TextPortionPtr tp, ValNodePtr PNTR source_list) -{ - SeqDescrPtr sdp; - SeqFeatPtr sfp; - SeqMgrFeatContext fcontext; - SeqMgrDescContext dcontext; + /* anticodon */ + rq = RnaQualNew (); + rq->type = AsnIoMemCopy (type, (AsnReadFunc) RnaFeatTypeAsnRead, (AsnWriteFunc) RnaFeatTypeAsnWrite); + rq->field = Rna_field_anticodon; + ValNodeAddPointer ((ValNodePtr PNTR) userdata, FieldType_rna_field, rq); + } - if (bsp == NULL || o == NULL || source_list == NULL) return; + /* add ncRNA class if appropriate and present */ + if ((type->choice == RnaFeatType_ncRNA || type->choice == RnaFeatType_any) + && rrp->ext.choice == 3 && (rgp = (RNAGenPtr) rrp->ext.value.ptrvalue) != NULL + && !StringHasNoText (rgp->_class)) { + rq = RnaQualNew (); + rq->type = AsnIoMemCopy (type, (AsnReadFunc) RnaFeatTypeAsnRead, (AsnWriteFunc) RnaFeatTypeAsnWrite); + rq->field = Rna_field_ncrna_class; + ValNodeAddPointer ((ValNodePtr PNTR) userdata, FieldType_rna_field, rq); + } - if (o->type == Object_type_constraint_any || o->type == Object_type_constraint_descriptor) { - for (sdp = SeqMgrGetNextDescriptor (bsp, NULL, Seq_descr_source, &dcontext); - sdp != NULL; - sdp = SeqMgrGetNextDescriptor (bsp, sdp, Seq_descr_source, &dcontext)) { - GetOrgParseSourcesForBioSource (sdp->data.ptrvalue, bsp, sdp, NULL, o, tp, source_list); - } + /* add transcript ID if present */ + if (sfp->product != NULL) { + rq = RnaQualNew (); + rq->type = AsnIoMemCopy (type, (AsnReadFunc) RnaFeatTypeAsnRead, (AsnWriteFunc) RnaFeatTypeAsnWrite); + rq->field = Rna_field_transcript_id; + ValNodeAddPointer ((ValNodePtr PNTR) userdata, FieldType_rna_field, rq); } - if (o->type == Object_type_constraint_any || o->type == Object_type_constraint_feature) { - for (sfp = SeqMgrGetNextFeature (bsp, NULL, SEQFEAT_BIOSRC, 0, &fcontext); - sfp != NULL; - sfp = SeqMgrGetNextFeature (bsp, sfp, SEQFEAT_BIOSRC, 0, &fcontext)) { - GetOrgParseSourcesForBioSource (sfp->data.value.ptrvalue, bsp, NULL, sfp, o, tp, source_list); + /* add gene fields */ + grp = SeqMgrGetGeneXref (sfp); + if (grp == NULL) { + gene = SeqMgrGetOverlappingGene (sfp->location, &fcontext); + if (gene != NULL) { + grp = gene->data.value.ptrvalue; + } + } + if (grp != NULL && !SeqMgrGeneIsSuppressed (grp)) { + /* gene locus */ + if (!StringHasNoText (grp->locus)) { + rq = RnaQualNew (); + rq->type = AsnIoMemCopy (type, (AsnReadFunc) RnaFeatTypeAsnRead, (AsnWriteFunc) RnaFeatTypeAsnWrite); + rq->field = Rna_field_gene_locus; + ValNodeAddPointer ((ValNodePtr PNTR) userdata, FieldType_rna_field, rq); + } + /* gene description */ + if (!StringHasNoText (grp->desc)) { + rq = RnaQualNew (); + rq->type = AsnIoMemCopy (type, (AsnReadFunc) RnaFeatTypeAsnRead, (AsnWriteFunc) RnaFeatTypeAsnWrite); + rq->field = Rna_field_gene_locus; + ValNodeAddPointer ((ValNodePtr PNTR) userdata, FieldType_rna_field, rq); + } + /* maploc */ + if (!StringHasNoText (grp->maploc)) { + rq = RnaQualNew (); + rq->type = AsnIoMemCopy (type, (AsnReadFunc) RnaFeatTypeAsnRead, (AsnWriteFunc) RnaFeatTypeAsnWrite); + rq->field = Rna_field_gene_maploc; + ValNodeAddPointer ((ValNodePtr PNTR) userdata, FieldType_rna_field, rq); + } + /* locus tag */ + if (!StringHasNoText (grp->locus_tag)) { + rq = RnaQualNew (); + rq->type = AsnIoMemCopy (type, (AsnReadFunc) RnaFeatTypeAsnRead, (AsnWriteFunc) RnaFeatTypeAsnWrite); + rq->field = Rna_field_gene_locus_tag; + ValNodeAddPointer ((ValNodePtr PNTR) userdata, FieldType_rna_field, rq); + } + /* synonym */ + if (grp->syn != NULL) { + rq = RnaQualNew (); + rq->type = AsnIoMemCopy (type, (AsnReadFunc) RnaFeatTypeAsnRead, (AsnWriteFunc) RnaFeatTypeAsnWrite); + rq->field = Rna_field_gene_synonym; + ValNodeAddPointer ((ValNodePtr PNTR) userdata, FieldType_rna_field, rq); } } -} - -typedef struct parsesrccollection { - ParseSrcPtr src; - TextPortionPtr portion; - ValNodePtr src_list; -} ParseSrcCollectionData, PNTR ParseSrcCollectionPtr; + /* gene comment */ + if (gene != NULL && !StringHasNoText (gene->comment)) { + rq = RnaQualNew (); + rq->type = AsnIoMemCopy (type, (AsnReadFunc) RnaFeatTypeAsnRead, (AsnWriteFunc) RnaFeatTypeAsnWrite); + rq->field = Rna_field_gene_comment; + ValNodeAddPointer ((ValNodePtr PNTR) userdata, FieldType_rna_field, rq); + } +} -static void FindParseSourceBioseqCallback (BioseqPtr bsp, Pointer userdata) +static ValNodePtr GetRnaQualFieldList (SeqEntryPtr sep) { - ParseSrcCollectionPtr psp; - - if (bsp == NULL || ISA_aa (bsp->mol) || userdata == NULL) - { - return; - } - - psp = (ParseSrcCollectionPtr) userdata; - if (psp->src == NULL) return; + ValNodePtr field_list = NULL; - switch (psp->src->choice) - { - case ParseSrc_defline: - if (!ISA_aa (bsp->mol)) { - GetDeflineSourcesForBioseq (bsp, psp->portion, &(psp->src_list)); - } - break; - case ParseSrc_flatfile: - GetFlatFileSourcesForBioseq (bsp, psp->portion, &(psp->src_list)); - break; - case ParseSrc_local_id: - if (! ISA_aa (bsp->mol) && bsp->repr != Seq_repr_seg) { - GetLocalIDSourcesForBioseq (bsp, psp->portion, &(psp->src_list)); - } - break; - case ParseSrc_file_id: - GetNcbiFileSourceForBioseq (bsp, psp->portion, &(psp->src_list)); - break; - case ParseSrc_org: - GetOrgParseSourcesForBioseq (bsp, psp->src->data.ptrvalue, psp->portion, &(psp->src_list)); - break; - case ParseSrc_comment: - GetCommentSourcesForBioseq (bsp, psp->portion, &(psp->src_list)); - break; - case ParseSrc_structured_comment: - GetStructuredCommentSourcesForBioseq(bsp, psp->portion, psp->src->data.ptrvalue, &(psp->src_list)); - break; - case ParseSrc_bankit_comment: - if (!ISA_aa (bsp->mol)) { - GetBankitCommentSourcesForBioseq (bsp, psp->portion, &(psp->src_list)); - } - break; - } + VisitFeaturesInSep (sep, &field_list, GetRnaQualFieldListForAECRSampleCallback); + field_list = ValNodeSort (field_list, SortVnpByFieldType); + ValNodeUnique (&field_list, SortVnpByFieldType, FieldTypeListFree); + return field_list; } -static void GetOrgNamesInRecordCallback (BioSourcePtr biop, Pointer userdata) +static void GetStructuredCommentFieldsCallback (SeqDescrPtr sdp, Pointer data) { - ValNodePtr PNTR org_names; - - if (biop == NULL || biop->org == NULL || StringHasNoText (biop->org->taxname) - || userdata == NULL) - { - return; - } - - org_names = (ValNodePtr PNTR) userdata; - - ValNodeAddPointer (org_names, 0, biop->org->taxname); -} + UserObjectPtr uop; + UserFieldPtr ufp; + ValNodePtr vnp; + if (sdp != NULL && data != NULL && sdp->choice == Seq_descr_user + && (uop = sdp->data.ptrvalue) != NULL + && IsUserObjectStructuredComment (uop)) { -static void SetToUpper (CharPtr cp) -{ - if (cp == NULL) return; - while (*cp != 0) { - if (isalpha (*cp)) { - *cp = toupper (*cp); + ufp = uop->data; + while (ufp != NULL) { + if (ufp->label != NULL && ufp->label->str != NULL + && StringCmp (ufp->label->str, "StructuredCommentPrefix") != 0 + && StringCmp (ufp->label->str, "StructuredCommentSuffix") != 0) { + vnp = ValNodeNew (NULL); + vnp->choice = StructuredCommentField_named; + vnp->data.ptrvalue = StringSave (ufp->label->str); + ValNodeAddPointer ((ValNodePtr PNTR) data, FieldType_struc_comment_field, vnp); + } + ufp = ufp->next; } - cp++; } } -NLM_EXTERN void -FixCapitalizationInString -(CharPtr PNTR pTitle, - Uint2 capitalization, - ValNodePtr org_names) +static ValNodePtr GetStructuredCommentFieldList (SeqEntryPtr sep) { - if (pTitle == NULL || capitalization == Cap_change_none) return; + ValNodePtr field_list = NULL; + ValNodePtr dbname, field_name; - switch (capitalization) { - case Cap_change_tolower: - ResetCapitalization (FALSE, *pTitle); - FixAbbreviationsInElement (pTitle); - FixOrgNamesInString (*pTitle, org_names); - break; - case Cap_change_toupper: - SetToUpper (*pTitle); - FixAbbreviationsInElement (pTitle); - FixOrgNamesInString (*pTitle, org_names); - break; - case Cap_change_firstcap: - ResetCapitalization (TRUE, *pTitle); - FixAbbreviationsInElement (pTitle); - FixOrgNamesInString (*pTitle, org_names); - break; - case Cap_change_firstcaprestnochange: - if (*pTitle != NULL && isalpha (**pTitle)) { - **pTitle = toupper (**pTitle); - } - break; - } + dbname = ValNodeNew (NULL); + dbname->choice = StructuredCommentField_database; + ValNodeAddPointer (&field_list, FieldType_struc_comment_field, dbname); + + field_name = ValNodeNew (NULL); + field_name->choice = StructuredCommentField_field_name; + ValNodeAddPointer (&field_list, FieldType_struc_comment_field, field_name); + + VisitDescriptorsInSep (sep, &field_list, GetStructuredCommentFieldsCallback); + + field_list = ValNodeSort (field_list, SortVnpByFieldType); + ValNodeUnique (&field_list, SortVnpByFieldType, FieldTypeListFree); + return field_list; } -static void AddDeflineDestinationsForBioseq (BioseqPtr bsp, ValNodePtr PNTR dest_list) +static void CollectBioSourceDescCallback (SeqDescrPtr sdp, Pointer data) { - SeqDescrPtr sdp; - SeqMgrDescContext dcontext; + if (sdp != NULL && sdp->choice == Seq_descr_source && data != NULL) { + ValNodeAddPointer ((ValNodePtr PNTR) data, OBJ_SEQDESC, sdp); + } +} - if (bsp == NULL || dest_list == NULL) { - return; +static void CollectBioSourceFeatCallback (SeqFeatPtr sfp, Pointer data) +{ + if (sfp != NULL && sfp->data.choice == SEQFEAT_BIOSRC) { + ValNodeAddPointer ((ValNodePtr PNTR) data, OBJ_SEQFEAT, sfp); } - - sdp = SeqMgrGetNextDescriptor (bsp, NULL, Seq_descr_title, &dcontext); - while (sdp != NULL) { - ValNodeAddPointer (dest_list, OBJ_SEQDESC, sdp); - sdp = SeqMgrGetNextDescriptor (bsp, sdp, Seq_descr_title, &dcontext); +} + + +static void CollectFeaturesCallback (SeqFeatPtr sfp, Pointer data) +{ + if (sfp != NULL && data != NULL && sfp->data.choice != SEQFEAT_BIOSRC && sfp->data.choice != SEQFEAT_PUB) { + ValNodeAddPointer ((ValNodePtr PNTR) data, OBJ_SEQFEAT, sfp); } } -static ValNodePtr GetFeatureListForNucleotideBioseq (Uint1 featdef, BioseqPtr bsp); -static ValNodePtr GetFeatureListForProteinBioseq (Uint1 featdef, BioseqPtr bsp); +static void CollectPubDescCallback (SeqDescrPtr sdp, Pointer data) +{ + if (sdp != NULL && sdp->choice == Seq_descr_pub && data != NULL) { + ValNodeAddPointer ((ValNodePtr PNTR) data, OBJ_SEQDESC, sdp); + } +} -static void AddFeatureDestinationsForBioseq (BioseqPtr bsp, FeatureFieldLegalPtr featfield, ValNodePtr PNTR dest_list) +static void CollectPubFeatCallback (SeqFeatPtr sfp, Pointer data) { - Int4 featdef; + if (sfp != NULL && sfp->data.choice == SEQFEAT_PUB) { + ValNodeAddPointer ((ValNodePtr PNTR) data, OBJ_SEQFEAT, sfp); + } +} - if (bsp == NULL || featfield == NULL || dest_list == NULL) return; - featdef = GetFeatdefFromFeatureType (featfield->type); - if (ISA_aa (bsp->mol)) { - ValNodeLink (dest_list, GetFeatureListForProteinBioseq (featdef, bsp)); - } else { - ValNodeLink (dest_list, GetFeatureListForNucleotideBioseq (featdef, bsp)); +static void CollectBioseqCallback (BioseqPtr bsp, Pointer data) +{ + if (bsp != NULL && data != NULL) { + ValNodeAddPointer ((ValNodePtr PNTR) data, OBJ_BIOSEQ, bsp); } +} + +/* +static void CollectNucBioseqCallback (BioseqPtr bsp, Pointer data) +{ + if (bsp != NULL && data != NULL && !ISA_aa (bsp->mol)) { + ValNodeAddPointer ((ValNodePtr PNTR) data, OBJ_BIOSEQ, bsp); + } } +*/ -static void GetBioSourceDestinationsForBioseq (BioseqPtr bsp, Uint2 object_type, ValNodePtr PNTR dest_list) +static void AddCommentDescriptorDestinationsForBioseq (BioseqPtr bsp, ValNodePtr PNTR dest_list) { - SeqDescrPtr sdp; - SeqFeatPtr sfp; - SeqMgrFeatContext fcontext; - SeqMgrDescContext dcontext; + SeqDescrPtr sdp; + SeqMgrDescContext context; + Boolean found = FALSE; + ObjValNodePtr ovp; - if (bsp == NULL || dest_list == NULL) - { + if (bsp == NULL || dest_list == NULL) { return; } - - if (object_type == Object_type_constraint_any || object_type == Object_type_constraint_descriptor) - { - sdp = SeqMgrGetNextDescriptor (bsp, NULL, Seq_descr_source, &dcontext); - while (sdp != NULL) - { - ValNodeAddPointer (dest_list, OBJ_SEQDESC, sdp); - sdp = SeqMgrGetNextDescriptor (bsp, sdp, Seq_descr_source, &dcontext); - } + + for (sdp = SeqMgrGetNextDescriptor (bsp, NULL, Seq_descr_comment, &context); + sdp != NULL; + sdp = SeqMgrGetNextDescriptor (bsp, sdp, Seq_descr_comment, &context)) { + ValNodeAddPointer (dest_list, OBJ_SEQDESC, sdp); + found = TRUE; } - - if (object_type == Object_type_constraint_any || object_type == Object_type_constraint_feature) - { - sfp = SeqMgrGetNextFeature (bsp, NULL, SEQFEAT_BIOSRC, 0, &fcontext); - while (sfp != NULL) - { - ValNodeAddPointer (dest_list, OBJ_SEQFEAT, sfp); - sfp = SeqMgrGetNextFeature (bsp, sfp, SEQFEAT_BIOSRC, 0, &fcontext); - } + if (!found) { + /* if no existing comment descriptor, create one, marked for delete. + * unmark it for deletion when it gets populated. + */ + sdp = CreateNewDescriptorOnBioseq (bsp, Seq_descr_comment); + sdp->data.ptrvalue = StringSave (""); + ovp = (ObjValNodePtr) sdp; + ovp->idx.deleteme = TRUE; + ValNodeAddPointer (dest_list, OBJ_SEQDESC, sdp); } } -static void AddParseDestinations (ParseSourceInfoPtr psip, ParseDestPtr dst) +static ValNodePtr CollectCommentDescriptors (SeqEntryPtr sep) { - ParseDstOrgPtr o; + ValNodePtr seq_list = NULL, vnp, desc_list = NULL; - if (psip == NULL || dst == NULL) return; + if (sep == NULL) { + return NULL; + } - switch (dst->choice) { - case ParseDest_defline : - AddDeflineDestinationsForBioseq (psip->bsp, &(psip->dest_list)); - break; - case ParseDest_org : - o = (ParseDstOrgPtr) dst->data.ptrvalue; - if ((o->type == Object_type_constraint_any || o->type == Object_type_constraint_descriptor) - && psip->sdp != NULL && psip->sdp->choice == Seq_descr_source) { - ValNodeAddPointer (&(psip->dest_list), OBJ_SEQDESC, psip->sdp); - } else if ((o->type == Object_type_constraint_any || o->type == Object_type_constraint_feature) - && psip->sfp != NULL && psip->sfp->data.choice == SEQFEAT_BIOSRC) { - ValNodeAddPointer (&(psip->dest_list), OBJ_SEQFEAT, psip->sfp); - } else { - GetBioSourceDestinationsForBioseq (psip->bsp, o->type, &(psip->dest_list)); - } - break; - case ParseDest_featqual : - AddFeatureDestinationsForBioseq (psip->bsp, dst->data.ptrvalue, &(psip->dest_list)); - break; - case ParseDest_comment_descriptor : - AddCommentDescriptorDestinationsForBioseq (psip->bsp, &(psip->dest_list)); - break; - case ParseDest_dbxref : - GetBioSourceDestinationsForBioseq (psip->bsp, Object_type_constraint_any, &(psip->dest_list)); - break; + /* VisitBioseqsInSep (sep, &seq_list, CollectNucBioseqCallback); */ + seq_list = CollectNucBioseqs (sep); + + for (vnp = seq_list; vnp != NULL; vnp = vnp->next) { + AddCommentDescriptorDestinationsForBioseq (vnp->data.ptrvalue, &desc_list); } + seq_list = ValNodeFree (seq_list); + return desc_list; } -static Boolean SourceHasOneUndeletedDestination (ParseSourceInfoPtr source) +static void CollectStructuredCommentsCallback (SeqDescrPtr sdp, Pointer data) { - Int4 num_seen = 0; - ValNodePtr vnp; - - if (source == NULL - || source->dest_list == NULL) - { - return FALSE; - } - - vnp = source->dest_list; - while (vnp != NULL && num_seen < 2) - { - if (vnp->choice > 1) - { - num_seen ++; - } - vnp = vnp->next; - } - if (num_seen == 1) - { - return TRUE; - } - else - { - return FALSE; + UserObjectPtr uop; + + if (sdp != NULL && data != NULL && sdp->choice == Seq_descr_user + && (uop = sdp->data.ptrvalue) != NULL + && IsUserObjectStructuredComment (uop)) { + ValNodeAddPointer ((ValNodePtr PNTR) data, OBJ_SEQDESC, sdp); } } -static void CombineSourcesForDestinations (ValNodePtr PNTR source_list) +static void CollectDBLinksCallback (SeqDescrPtr sdp, Pointer data) { - ValNodePtr source1_vnp, source2_vnp, dest1_vnp, dest2_vnp; - ValNodePtr source_new, del_vnp; - ParseSourceInfoPtr psip1, psip2, new_psip; - CharPtr comb_txt; - - for (source1_vnp = *source_list; - source1_vnp != NULL; - source1_vnp = source1_vnp->next) - { - psip1 = (ParseSourceInfoPtr) source1_vnp->data.ptrvalue; - if (psip1 == NULL || psip1->dest_list == NULL) - { - continue; - } - for (source2_vnp = source1_vnp->next; - source2_vnp != NULL; - source2_vnp = source2_vnp->next) - { - if (source2_vnp->choice > 0) - { - /* already marked for deletion */ - continue; - } - psip2 = (ParseSourceInfoPtr) source2_vnp->data.ptrvalue; - if (psip2 == NULL || psip2->dest_list == NULL) - { - continue; - } - for (dest1_vnp = psip1->dest_list; - dest1_vnp != NULL; - dest1_vnp = dest1_vnp->next) - { - if (dest1_vnp->choice == 0) - { - /* already marked for deletion */ - continue; - } - for (dest2_vnp = psip2->dest_list; - dest2_vnp != NULL; - dest2_vnp = dest2_vnp->next) - { - if (dest2_vnp->choice == 0) - { - /* already marked for deletion */ - continue; - } - if (dest1_vnp->choice == dest2_vnp->choice - && dest1_vnp->data.ptrvalue == dest2_vnp->data.ptrvalue) - { - comb_txt = (CharPtr) (MemNew (sizeof (Char) - * (StringLen (psip1->parse_src_txt) - + StringLen (psip2->parse_src_txt) - + 2))); - StringCpy (comb_txt, psip1->parse_src_txt); - StringCat (comb_txt, ";"); - StringCat (comb_txt, psip2->parse_src_txt); - - /* If the first source has a single destination, then we can - * add the text from the second source to the first and remove - * the destination from the second source. - */ - if (SourceHasOneUndeletedDestination (psip1)) - { - - psip1->parse_src_txt = MemFree (psip1->parse_src_txt); - psip1->parse_src_txt = comb_txt; - dest2_vnp->choice = 0; - } - /* If the first source has more than one destination and - * the second source has a single destination, then we can - * remove the repeated desination from the first source - * and add the text from the first source to the second source. - */ - else if (SourceHasOneUndeletedDestination (psip2)) - { - psip2->parse_src_txt = MemFree (psip2->parse_src_txt); - psip2->parse_src_txt = comb_txt; - dest1_vnp->choice = 0; - } - /* If the first and second sources have multiple destinations, - * we need to remove the repeated destination from both the first - * and second source and create a new source with the combined - * text for just the repeated destination. - */ - else - { - new_psip = ParseSourceInfoNew (NULL, NULL, NULL, NULL, comb_txt); - ValNodeAddPointer (&(new_psip->dest_list), - dest1_vnp->choice, - dest1_vnp->data.ptrvalue); - dest1_vnp->choice = 0; - dest2_vnp->choice = 0; - source_new = ValNodeNew (NULL); - source_new->choice = 0; - source_new->data.ptrvalue = new_psip; - source_new->next = source1_vnp->next; - source1_vnp->next = source_new; - } - } - } - } - - del_vnp = ValNodeExtractList (&(psip1->dest_list), 0); - del_vnp = ValNodeFree (del_vnp); - if (psip1->dest_list == NULL) - { - source1_vnp->choice = 1; - } - del_vnp = ValNodeExtractList (&(psip2->dest_list), 0); - del_vnp = ValNodeFree (del_vnp); - if (psip2->dest_list == NULL) - { - source2_vnp->choice = 1; - } - } - } + UserObjectPtr uop; - /* now remove sources deleted */ - del_vnp = ValNodeExtractList (source_list, 1); - del_vnp = ParseSourceListFree (del_vnp); + if (sdp != NULL && data != NULL && sdp->choice == Seq_descr_user + && (uop = sdp->data.ptrvalue) != NULL + && IsUserObjectDBLink (uop)) { + ValNodeAddPointer ((ValNodePtr PNTR) data, OBJ_SEQDESC, sdp); + } } -static BioseqSetPtr GetPartsForSourceDescriptorOnSegSet (SeqDescrPtr sdp) +static void AddDeflineDescriptorDestinationsForBioseq (BioseqPtr bsp, ValNodePtr PNTR dest_list) { + SeqDescrPtr sdp; + SeqMgrDescContext context; + Boolean found = FALSE; ObjValNodePtr ovp; - BioseqSetPtr bssp; - SeqEntryPtr sep; - - if (sdp == NULL || sdp->extended != 1) { - return NULL; - } - ovp = (ObjValNodePtr) sdp; - if (ovp->idx.parenttype != OBJ_BIOSEQSET || ovp->idx.parentptr == NULL) { - return NULL; + + if (bsp == NULL || dest_list == NULL) { + return; } - bssp = (BioseqSetPtr) ovp->idx.parentptr; - - if (bssp->_class == BioseqseqSet_class_nuc_prot - && IS_Bioseq_set (bssp->seq_set) - && bssp->seq_set->data.ptrvalue != NULL) { - bssp = (BioseqSetPtr) bssp->seq_set->data.ptrvalue; + + for (sdp = SeqMgrGetNextDescriptor (bsp, NULL, Seq_descr_title, &context); + sdp != NULL; + sdp = SeqMgrGetNextDescriptor (bsp, sdp, Seq_descr_title, &context)) { + ValNodeAddPointer (dest_list, OBJ_SEQDESC, sdp); + found = TRUE; } - - if (bssp->_class == BioseqseqSet_class_segset) { - sep = bssp->seq_set; - while (sep != NULL) { - if (IS_Bioseq_set (sep) && sep->data.ptrvalue != NULL) { - bssp = (BioseqSetPtr) sep->data.ptrvalue; - if (bssp->_class == BioseqseqSet_class_parts) { - return bssp; - } - } - sep = sep->next; - } + if (!found) { + /* if no existing comment descriptor, create one, marked for delete. + * unmark it for deletion when it gets populated. + */ + sdp = CreateNewDescriptorOnBioseq (bsp, Seq_descr_title); + sdp->data.ptrvalue = StringSave (""); + ovp = (ObjValNodePtr) sdp; + ovp->idx.deleteme = TRUE; + ValNodeAddPointer (dest_list, OBJ_SEQDESC, sdp); } - - return NULL; } -static SeqDescrPtr FindSourceDescriptorInSeqEntry (SeqEntryPtr sep) +static ValNodePtr CollectDeflineDescriptors (SeqEntryPtr sep) { - BioseqPtr bsp; - BioseqSetPtr bssp; - SeqDescrPtr sdp = NULL; - - if (sep != NULL && sep->data.ptrvalue != NULL) { - if (IS_Bioseq (sep)) { - bsp = (BioseqPtr) sep->data.ptrvalue; - sdp = bsp->descr; - } else if (IS_Bioseq_set (sep)) { - bssp = (BioseqSetPtr) sep->data.ptrvalue; - sdp = bssp->descr; - } - while (sdp != NULL && sdp->choice != Seq_descr_source) - { - sdp = sdp->next; - } + ValNodePtr seq_list = NULL, vnp, desc_list = NULL; + + if (sep == NULL) { + return NULL; } - return sdp; + + /* VisitBioseqsInSep (sep, &seq_list, CollectNucBioseqCallback); */ + seq_list = CollectNucBioseqs (sep); + + for (vnp = seq_list; vnp != NULL; vnp = vnp->next) { + AddDeflineDescriptorDestinationsForBioseq (vnp->data.ptrvalue, &desc_list); + } + seq_list = ValNodeFree (seq_list); + return desc_list; } -static SeqDescrPtr PropagateToSeqEntry (SeqEntryPtr sep, SeqDescrPtr sdp) +static void AddGenbankBlockDescriptorDestinationsForBioseq (BioseqPtr bsp, ValNodePtr PNTR dest_list) { - BioseqPtr bsp; - BioseqSetPtr bssp; - SeqDescrPtr new_sdp = NULL; - - if (sep != NULL && sep->data.ptrvalue != NULL) { - if (IS_Bioseq (sep)) { - bsp = (BioseqPtr) sep->data.ptrvalue; - new_sdp = AsnIoMemCopy ((Pointer) sdp, - (AsnReadFunc) SeqDescrAsnRead, - (AsnWriteFunc) SeqDescrAsnWrite); - ValNodeLink (&(bsp->descr), new_sdp); - } else if (IS_Bioseq_set (sep)) { - bssp = (BioseqSetPtr) sep->data.ptrvalue; - new_sdp = AsnIoMemCopy ((Pointer) sdp, - (AsnReadFunc) SeqDescrAsnRead, - (AsnWriteFunc) SeqDescrAsnWrite); - ValNodeLink (&(bssp->descr), new_sdp); - } + SeqDescrPtr sdp; + SeqMgrDescContext context; + Boolean found = FALSE; + ObjValNodePtr ovp; + + if (bsp == NULL || dest_list == NULL) { + return; + } + + for (sdp = SeqMgrGetNextDescriptor (bsp, NULL, Seq_descr_genbank, &context); + sdp != NULL; + sdp = SeqMgrGetNextDescriptor (bsp, sdp, Seq_descr_genbank, &context)) { + ValNodeAddPointer (dest_list, OBJ_SEQDESC, sdp); + found = TRUE; + } + if (!found) { + /* if no existing comment descriptor, create one, marked for delete. + * unmark it for deletion when it gets populated. + */ + sdp = CreateNewDescriptorOnBioseq (bsp, Seq_descr_genbank); + sdp->data.ptrvalue = GBBlockNew (); + ovp = (ObjValNodePtr) sdp; + ovp->idx.deleteme = TRUE; + ValNodeAddPointer (dest_list, OBJ_SEQDESC, sdp); } - return new_sdp; } -static void PropagateSourceOnSegSetForParse (ValNodePtr parse_source_list) +static ValNodePtr CollectGenbankBlockDescriptors (SeqEntryPtr sep) { - ParseSourceInfoPtr psip; - ValNodePtr vnp_src, vnp_dst; - SeqDescrPtr sdp, other_sdp; - SeqEntryPtr sep; - ValNodePtr extra_dests = NULL; - BioseqSetPtr parts_bssp; - - for (vnp_src = parse_source_list; vnp_src != NULL; vnp_src = vnp_src->next) { - psip = (ParseSourceInfoPtr) vnp_src->data.ptrvalue; - if (psip != NULL) { - for (vnp_dst = psip->dest_list; vnp_dst != NULL; vnp_dst = vnp_dst->next) { - if (vnp_dst->choice == OBJ_SEQDESC) { - sdp = (SeqDescrPtr) vnp_dst->data.ptrvalue; - if (sdp != NULL && sdp->choice == Seq_descr_source) { - parts_bssp = GetPartsForSourceDescriptorOnSegSet (sdp); - if (parts_bssp != NULL) { - for (sep = parts_bssp->seq_set; sep != NULL; sep = sep->next) { - if (IS_Bioseq(sep) && sep->data.ptrvalue == psip->bsp) { - other_sdp = FindSourceDescriptorInSeqEntry (sep); - if (other_sdp == NULL) { - other_sdp = PropagateToSeqEntry (sep, sdp); - ValNodeAddPointer (&extra_dests, OBJ_SEQDESC, other_sdp); - } - } - } - - /* set choice to 0 so master won't be a destination */ - vnp_dst->choice = 0; - - } - } - } - } - /* add extra destinations to list */ - ValNodeLink (&psip->dest_list, extra_dests); - extra_dests = NULL; - } + ValNodePtr seq_list = NULL, vnp, desc_list = NULL; + + if (sep == NULL) { + return NULL; } - + + /* VisitBioseqsInSep (sep, &seq_list, CollectNucBioseqCallback); */ + seq_list = CollectNucBioseqs (sep); + + for (vnp = seq_list; vnp != NULL; vnp = vnp->next) { + AddGenbankBlockDescriptorDestinationsForBioseq (vnp->data.ptrvalue, &desc_list); + } + seq_list = ValNodeFree (seq_list); + return desc_list; } -NLM_EXTERN CharPtr GetDBxrefFromBioSource (BioSourcePtr biop, CharPtr db_name) +NLM_EXTERN ValNodePtr GetObjectListForFieldType (Uint1 field_type, SeqEntryPtr sep) { - CharPtr rval = NULL; - ValNodePtr vnp; - DbtagPtr dbtag; + ValNodePtr object_list = NULL; + Uint2 entityID; - if (biop == NULL || biop->org == NULL || StringHasNoText (db_name)) { - return NULL; - } - for (vnp = biop->org->db; vnp != NULL && rval == NULL; vnp = vnp->next) { - dbtag = (DbtagPtr) vnp->data.ptrvalue; - if (dbtag != NULL && StringCmp (db_name, dbtag->db) == 0) { - rval = GetObjectIdString (dbtag->tag); - } + switch (field_type) { + case FieldType_source_qual: + VisitDescriptorsInSep (sep, &object_list, CollectBioSourceDescCallback); + VisitFeaturesInSep (sep, &object_list, CollectBioSourceFeatCallback); + break; + case FieldType_cds_gene_prot: + entityID = ObjMgrGetEntityIDForChoice(sep); + object_list = BuildCGPSetList (entityID, NULL, NULL); + break; + case FieldType_feature_field: + VisitFeaturesInSep (sep, &object_list, CollectFeaturesCallback); + break; + case FieldType_molinfo_field: + VisitBioseqsInSep (sep, &object_list, CollectBioseqCallback); + break; + case FieldType_pub: + VisitDescriptorsInSep (sep, &object_list, CollectPubDescCallback); + VisitFeaturesInSep (sep, &object_list, CollectPubFeatCallback); + break; + case FieldType_rna_field: + VisitFeaturesInSep (sep, &object_list, CollectFeaturesCallback); + break; + case FieldType_struc_comment_field: + VisitDescriptorsInSep (sep, &object_list, CollectStructuredCommentsCallback); + break; + case FieldType_misc: + /* VisitBioseqsInSep (sep, &object_list, CollectNucBioseqCallback); */ + object_list = CollectNucBioseqs (sep); + ValNodeLink (&object_list, CollectCommentDescriptors (sep)); + break; } - return rval; + return object_list; } -NLM_EXTERN Boolean SetDBxrefForBioSource (BioSourcePtr biop, CharPtr db_name, CharPtr str, Uint2 existing_text) +typedef struct seqcollector { + ValNodePtr object_list; + ConstraintChoiceSetPtr csp; +} SeqCollectorData, PNTR SeqCollectorPtr; + + +static void SeqCollectorCallback (BioseqPtr bsp, Pointer data) { - ValNodePtr dbx; - DbtagPtr dbtag; - Boolean found = FALSE; - Char buf[20]; - Boolean rval = FALSE; + SeqCollectorPtr s; - if (biop == NULL || StringHasNoText (db_name) || StringHasNoText (str)) { - return FALSE; + if ((s = (SeqCollectorPtr) data) == NULL) { + return; } - if (biop->org == NULL) - { - biop->org = OrgRefNew(); - } - dbx = biop->org->db; - while (dbx != NULL && !found) - { - dbtag = (DbtagPtr) dbx->data.ptrvalue; - if (dbtag != NULL && dbtag->tag != NULL - && StringCmp (dbtag->db, db_name) == 0) - { - found = TRUE; - } - if (!found) - { - dbx = dbx->next; - } - } - if (!found) - { - dbtag = DbtagNew(); - dbtag->db = StringSave (db_name); - ValNodeAddPointer (&(biop->org->db), 0, dbtag); - } - if (dbtag->tag == NULL) - { - dbtag->tag = ObjectIdNew(); - } - /* if it was a number before, make it a string now */ - if (dbtag->tag->id > 0 && dbtag->tag->str == NULL) - { - sprintf (buf, "%d", dbtag->tag->id); - dbtag->tag->id = 0; - dbtag->tag->str = StringSave (buf); + if (DoesObjectMatchConstraintChoiceSet (OBJ_BIOSEQ, bsp, s->csp)) { + ValNodeAddPointer (&(s->object_list), OBJ_BIOSEQ, bsp); } - rval = SetStringValue (&(dbtag->tag->str), str, existing_text); - return rval; } -NLM_EXTERN Boolean RemoveDBxrefForBioSource (BioSourcePtr biop, CharPtr db_name, StringConstraintPtr scp) +NLM_EXTERN ValNodePtr GetSequenceListForConstraint (SeqEntryPtr sep, ConstraintChoiceSetPtr csp) { - ValNodePtr dbx, prev = NULL, dbx_next; - DbtagPtr dbtag; - CharPtr str; - Boolean found = FALSE; + SeqCollectorData s; - if (biop == NULL || StringHasNoText (db_name)) { - return FALSE; + MemSet (&s, 0, sizeof (SeqCollectorData)); + s.csp = csp; + VisitBioseqsInSep (sep, &s, SeqCollectorCallback); + return s.object_list; +} + + +NLM_EXTERN ValNodePtr GetFieldListForFieldType (Uint1 field_type, SeqEntryPtr sep) +{ + ValNodePtr fields = NULL; + + /* get a list of the fields that are appropriate for the objects collected */ + switch (field_type) { + case FieldType_cds_gene_prot: + fields = MakeCDSGeneProtFieldTypeList (); + break; + case FieldType_source_qual: + fields = GetSourceQualSampleFieldList (sep); + break; + case FieldType_feature_field: + fields = GetFeatureQualFieldList (sep); + break; + case FieldType_molinfo_field: + fields = MakeSequenceQualFieldTypeList (); + break; + case FieldType_pub: + fields = MakePubFieldTypeList (); + break; + case FieldType_rna_field: + fields = GetRnaQualFieldList (sep); + break; + case FieldType_struc_comment_field: + fields = GetStructuredCommentFieldList (sep); + break; + case FieldType_misc: + ValNodeAddInt (&fields, FieldType_misc, Misc_field_genome_project_id); + ValNodeAddInt (&fields, FieldType_misc, Misc_field_comment_descriptor); + ValNodeAddInt (&fields, FieldType_misc, Misc_field_defline); + ValNodeAddInt (&fields, FieldType_misc, Misc_field_keyword); + break; } + return fields; +} - if (biop->org == NULL) - { - biop->org = OrgRefNew(); + +NLM_EXTERN ValNodePtr GetAECRSampleListForSeqEntry (Uint1 field_type, SeqEntryPtr sep) +{ + ValNodePtr object_list; + ValNodePtr fields = NULL, vnp; + ValNodePtr list = NULL; + AECRSamplePtr sample; + BatchExtraPtr batch_extra; + + object_list = GetObjectListForFieldType (field_type, sep); + + /* get a list of the fields that are appropriate for the objects collected */ + fields = GetFieldListForFieldType (field_type, sep); + + batch_extra = BatchExtraNew (); + for (vnp = fields; vnp != NULL; vnp = vnp->next) { + InitBatchExtraForField (batch_extra, vnp, sep); } - dbx = biop->org->db; - for (dbx = biop->org->db; dbx != NULL; dbx = dbx_next) - { - dbx_next = dbx->next; - dbtag = (DbtagPtr) dbx->data.ptrvalue; - str = NULL; - if (dbtag != NULL && dbtag->tag != NULL - && StringCmp (dbtag->db, db_name) == 0 - && (scp == NULL || ((str = GetDbtagString(dbtag)) != NULL && DoesStringMatchConstraint (str, scp)))) - { - if (prev == NULL) { - biop->org->db = dbx->next; - } else { - prev->next = dbx->next; - } - dbx->data.ptrvalue = DbtagFree (dbx->data.ptrvalue); - dbx = ValNodeFree (dbx); - found = TRUE; - } - else - { - prev = dbx; + for (vnp = fields; vnp != NULL; vnp = vnp->next) { + sample = GetAECRSampleFromObjectListEx (object_list, vnp, batch_extra); + if (sample != NULL && sample->num_found > 0) { + ValNodeAddPointer (&list, 0, sample); + } else { + sample = AECRSampleFree (sample); } - str = MemFree (str); } - return found; -} + batch_extra = BatchExtraFree (batch_extra); + fields = FieldTypeListFree (fields); -static Int4 SetFieldForDestList (ValNodePtr dest_list, ParseDestPtr field, CharPtr str, Uint2 existing_text) -{ - ValNodePtr vnp; - SeqDescrPtr sdp; - ObjValNodePtr ovp; - CharPtr cp; - BioSourcePtr biop; - ParseDstOrgPtr o; - FeatureFieldLegalPtr fl; - FeatureField f; - Boolean was_empty; - Int4 num_succeeded = 0; + object_list = FreeObjectList (object_list); + return list; +} - if (dest_list == NULL || field == NULL) return 0; - switch (field->choice) { - case ParseDest_defline : - for (vnp = dest_list; vnp != NULL; vnp = vnp->next) { - if (vnp->choice == OBJ_SEQDESC && vnp->data.ptrvalue != NULL) { - sdp = (SeqDescrPtr) vnp->data.ptrvalue; - if (sdp->choice == Seq_descr_title) { - cp = sdp->data.ptrvalue; - if (SetStringValue (&cp, str, existing_text)) { - num_succeeded++; - } - sdp->data.ptrvalue = cp; - } - } - } - break; - case ParseDest_org : - o = (ParseDstOrgPtr) field->data.ptrvalue; - if (o != NULL) { - for (vnp = dest_list; vnp != NULL; vnp = vnp->next) { - biop = GetBioSourceFromObject (vnp->choice, vnp->data.ptrvalue); - if (SetSourceQualInBioSource (biop, o->field, NULL, str, existing_text)) { - num_succeeded++; - } - } - } - break; - case ParseDest_featqual: - fl = (FeatureFieldLegalPtr) field->data.ptrvalue; - if (fl != NULL) { - f.type = fl->type; - f.field = ValNodeNew(NULL); - f.field->next = NULL; - f.field->choice = FeatQualChoice_legal_qual; - f.field->data.intvalue = fl->field; - for (vnp = dest_list; vnp != NULL; vnp = vnp->next) { - if (SetQualOnFeature (vnp->data.ptrvalue, &f, NULL, str, existing_text)) { - num_succeeded++; - } - } - f.field = ValNodeFree (f.field); - } - break; - case ParseDest_comment_descriptor: - for (vnp = dest_list; vnp != NULL; vnp = vnp->next) { - sdp = vnp->data.ptrvalue; - if (StringHasNoText (sdp->data.ptrvalue)) { - was_empty = TRUE; - } else { - was_empty = FALSE; - } - cp = sdp->data.ptrvalue; - if (SetStringValue (&cp, str, existing_text)) { - num_succeeded++; - } - sdp->data.ptrvalue = cp; - if (was_empty) { - ovp = (ObjValNodePtr) sdp; - ovp->idx.deleteme = FALSE; - } - } - break; - case ParseDest_dbxref: - if (!StringHasNoText (field->data.ptrvalue)) { - for (vnp = dest_list; vnp != NULL; vnp = vnp->next) { - biop = GetBioSourceFromObject (vnp->choice, vnp->data.ptrvalue); - if (SetDBxrefForBioSource (biop, field->data.ptrvalue, str, existing_text)) { - num_succeeded++; - } - } - } - break; +NLM_EXTERN ValNodePtr GetAECRSampleList (AECRActionPtr act, SeqEntryPtr sep) +{ + Uint1 field_type; + Uint2 entityID; + ValNodePtr object_list; + ValNodePtr fields = NULL, vnp; + ValNodePtr list = NULL; + AECRSamplePtr sample; + BatchExtraPtr batch_extra; + + batch_extra = BatchExtraNew (); + InitBatchExtraForAECRAction (batch_extra, act, sep); + + field_type = FieldTypeFromAECRAction (act); + if (field_type == FieldType_cds_gene_prot) { + entityID = ObjMgrGetEntityIDForChoice(sep); + object_list = BuildCGPSetList (entityID, act, NULL); + } else { + object_list = GetObjectListForAECRActionEx (sep, act, batch_extra); } - return num_succeeded; -} + /* get fields used in action */ + fields = GetFieldTypeListFromAECRAction (act); + for (vnp = fields; vnp != NULL; vnp = vnp->next) { + sample = GetAECRSampleFromObjectListEx (object_list, vnp, batch_extra); + if (sample != NULL && sample->num_found > 0) { + ValNodeAddPointer (&list, 0, sample); + } else { + sample = AECRSampleFree (sample); + } + } -static void AddToSampleForDestList (AECRSamplePtr sample, ValNodePtr dest_list, ParseDestPtr field) -{ - ValNodePtr vnp; - SeqDescrPtr sdp; - BioSourcePtr biop; - ParseDstOrgPtr o; - FeatureFieldLegalPtr fl; - FeatureField f; + fields = FieldTypeListFree (fields); - if (dest_list == NULL || field == NULL || sample == NULL) return; + batch_extra = BatchExtraFree (batch_extra); + DeleteMarkedObjects (ObjMgrGetEntityIDForChoice(sep), 0, NULL); - switch (field->choice) { - case ParseDest_defline : - for (vnp = dest_list; vnp != NULL; vnp = vnp->next) { - if (vnp->choice == OBJ_SEQDESC && vnp->data.ptrvalue != NULL) { - sdp = (SeqDescrPtr) vnp->data.ptrvalue; - if (sdp->choice == Seq_descr_title) { - AddTextToAECRSample (sample, StringSave (sdp->data.ptrvalue)); - } - } - } - break; - case ParseDest_org : - o = (ParseDstOrgPtr) field->data.ptrvalue; - if (o != NULL) { - for (vnp = dest_list; vnp != NULL; vnp = vnp->next) { - biop = GetBioSourceFromObject (vnp->choice, vnp->data.ptrvalue); - AddTextToAECRSample (sample, GetSourceQualFromBioSource (biop, o->field, NULL)); - } - } - break; - case ParseDest_featqual: - fl = (FeatureFieldLegalPtr) field->data.ptrvalue; - if (fl != NULL) { - f.type = fl->type; - f.field = ValNodeNew(NULL); - f.field->next = NULL; - f.field->choice = FeatQualChoice_legal_qual; - f.field->data.intvalue = fl->field; - for (vnp = dest_list; vnp != NULL; vnp = vnp->next) { - AddTextToAECRSample (sample, GetQualFromFeature (vnp->data.ptrvalue, &f, NULL)); - } - f.field = ValNodeFree (f.field); - } - break; - case ParseDest_comment_descriptor: - for (vnp = dest_list; vnp != NULL; vnp = vnp->next) { - sdp = (SeqDescrPtr) vnp->data.ptrvalue; - AddTextToAECRSample (sample, StringSave (sdp->data.ptrvalue)); - } - break; - case ParseDest_dbxref: - if (!StringHasNoText (field->data.ptrvalue)) { - for (vnp = dest_list; vnp != NULL; vnp = vnp->next) { - biop = GetBioSourceFromObject (vnp->choice, vnp->data.ptrvalue); - AddTextToAECRSample (sample, GetDBxrefFromBioSource (biop, field->data.ptrvalue)); - } - } - break; + FreeObjectList (object_list); + return list; +} + + +NLM_EXTERN AECRSamplePtr GetFieldSampleFromList (ValNodePtr list, FieldTypePtr field) +{ + AECRSamplePtr sample = NULL; + + while (list != NULL && sample == NULL) { + sample = list->data.ptrvalue; + if (sample != NULL && !DoFieldTypesMatch (sample->field, field)) { + sample = NULL; + } + list = list->next; } + return sample; } -static void StripFieldForSrcList (ParseSourceInfoPtr psip, ParseSrcPtr field, TextPortionPtr text_portion) +static void RemoveFieldsForWhichThereAreNoData (ValNodePtr PNTR field_list, ValNodePtr object_list) { - CharPtr str; - ParseSrcOrgPtr o; - BioSourcePtr biop; + ValNodePtr vnp_prev = NULL, vnp_f, vnp_next; + AECRSamplePtr sample; - if (psip == NULL || field == NULL || text_portion == NULL) return; + if (field_list == NULL || *field_list == NULL) { + return; + } - switch (field->choice) { - case ParseSrc_defline : - if (psip->sdp != NULL && psip->sdp->choice == Seq_descr_title) { - ReplaceStringForParse (psip->sdp->data.ptrvalue, text_portion); - } - break; - case ParseSrc_org : - o = (ParseSrcOrgPtr) field->data.ptrvalue; - if (o != NULL) { - if (psip->sdp != NULL && psip->sdp->choice == Seq_descr_source) { - biop = (BioSourcePtr) psip->sdp->data.ptrvalue; - str = GetSourceQualFromBioSource (biop, o->field, NULL); - ReplaceStringForParse (str, text_portion); - SetSourceQualInBioSource (biop, o->field, NULL, str, ExistingTextOption_replace_old); - str = MemFree (str); - } else if (psip->sfp != NULL && psip->sfp->data.choice == SEQFEAT_BIOSRC) { - biop = (BioSourcePtr) psip->sfp->data.value.ptrvalue; - str = GetSourceQualFromBioSource (biop, o->field, NULL); - ReplaceStringForParse (str, text_portion); - SetSourceQualInBioSource (biop, o->field, NULL, str, ExistingTextOption_replace_old); - str = MemFree (str); - } - } - break; - case ParseSrc_comment: - if (psip->sdp != NULL) { - if (psip->sdp->choice == Seq_descr_user) { - StripBankitCommentForParse (psip->sdp, text_portion); - } else if (psip->sdp->choice == Seq_descr_comment) { - ReplaceStringForParse (psip->sdp->data.ptrvalue, text_portion); + vnp_prev = NULL; + vnp_f = *field_list; + while (vnp_f != NULL) { + vnp_next = vnp_f->next; + if (vnp_f->choice == FieldType_source_qual + || vnp_f->choice == FieldType_feature_field + || vnp_f->choice == FieldType_rna_field) { + vnp_prev = vnp_f; + } else { + sample = GetAECRSampleFromObjectList (object_list, vnp_f); + if (sample == NULL || sample->num_found == 0) { + if (vnp_prev == NULL) { + *field_list = vnp_next; + } else { + vnp_prev->next = vnp_next; } + vnp_f->next = NULL; + vnp_f = FieldTypeFree (vnp_f); + } else { + vnp_prev = vnp_f; } - if (psip->sfp != NULL && psip->sfp->data.choice == SEQFEAT_COMMENT) { - ReplaceStringForParse (psip->sfp->data.value.ptrvalue, text_portion); - } - break; - case ParseSrc_bankit_comment: - if (psip->sdp != NULL && psip->sdp->choice == Seq_descr_user) { - StripBankitCommentForParse (psip->sdp, text_portion); - } - break; - case ParseSrc_structured_comment: - if (psip->sdp != NULL && psip->sdp->choice == Seq_descr_user) { - StripStructuredCommentForParse (psip->sdp, field->data.ptrvalue, text_portion); - } - break; + sample = AECRSampleFree (sample); + } + vnp_f = vnp_next; } } - -NLM_EXTERN AECRSamplePtr GetExistingTextForParseAction (ParseActionPtr action, SeqEntryPtr sep) +NLM_EXTERN void GetAECRExistingTextList (Uint1 field_type, SeqEntryPtr sep, FILE *fp) { - ParseSrcCollectionData psd; - ParseSourceInfoPtr psip; - ValNodePtr vnp; - ValNodePtr dest_list = NULL; - AECRSamplePtr sample; - - if (action == NULL || sep == NULL) return 0; - - psd.src = action->src; - psd.portion = action->portion; - psd.src_list = NULL; - - /* first, we need to get a list of the parse sources */ - VisitBioseqsInSep (sep, &psd, FindParseSourceBioseqCallback); + ValNodePtr object_list, vnp_f, vnp_o; + ValNodePtr fields = NULL; + BioseqPtr bsp; + Char id_buf[255]; + CharPtr txt1 = NULL; + object_list = GetObjectListForFieldType (field_type, sep); - /* for each parse source, get a list of the destinations */ - for (vnp = psd.src_list; vnp != NULL; vnp = vnp->next) - { - if (vnp->data.ptrvalue == NULL) continue; - psip = (ParseSourceInfoPtr) vnp->data.ptrvalue; + /* get a list of the fields that are appropriate for the objects collected */ + fields = GetFieldListForFieldType (field_type, sep); - /* find destinations */ - AddParseDestinations (psip, action->dest); + /* remove fields for which there is no data */ + RemoveFieldsForWhichThereAreNoData (&fields, object_list); - /* add destinations to list */ - ValNodeLink (&dest_list, psip->dest_list); - psip->dest_list = NULL; + /* add header */ + fprintf (fp, "Accession"); + for (vnp_f = fields; vnp_f != NULL; vnp_f = vnp_f->next) { + txt1 = SummarizeFieldType (vnp_f); + fprintf (fp, "\t%s", txt1); + txt1 = MemFree (txt1); + } + fprintf (fp, "\n"); + + for (vnp_o = object_list; vnp_o != NULL; vnp_o = vnp_o->next) { + bsp = GetSequenceForObject (vnp_o->choice, vnp_o->data.ptrvalue); + if (bsp == NULL) { + id_buf[0] = 0; + } else { + SeqIdWrite (SeqIdFindBest (bsp->id, SEQID_GENBANK), id_buf, PRINTID_REPORT, sizeof (id_buf) - 1); + } + fprintf (fp, "%s", id_buf); + for (vnp_f = fields; vnp_f != NULL; vnp_f = vnp_f->next) { + txt1 = GetFieldValueForObject (vnp_o->choice, vnp_o->data.ptrvalue, vnp_f, NULL); + fprintf (fp, "\t%s", txt1 == NULL ? "" : txt1); + txt1 = MemFree (txt1); + } + fprintf (fp, "\n"); } - psd.src_list = ParseSourceListFree (psd.src_list); + fields = FieldTypeListFree (fields); - /* get sample for dest_list */ - sample = AECRSampleNew (); - AddToSampleForDestList (sample, dest_list, action->dest); - dest_list = ValNodeFree (dest_list); - return sample; + object_list = FreeObjectList (object_list); } -static Int4 ApplyParseActionToSeqEntry (ParseActionPtr action, SeqEntryPtr sep) +static void InsertBlanksInRow (ValNodePtr row, Int4 insert_pos, Int4Ptr num_field_per_pos, Int4 num_blanks) { - ParseSrcCollectionData psd; - ParseSourceInfoPtr psip; - ValNodePtr orgnames = NULL, source_list_for_removal = NULL, vnp; - Int4 num_succeeded = 0; + ValNodePtr vnp, prev, vnp_blank; + Int4 pos = 0, skip; - if (action == NULL || sep == NULL) return 0; + /* first, skip accession */ + prev = row; + vnp = row->next; + while (vnp != NULL && pos <= insert_pos) { + for (skip = 0; skip < num_field_per_pos[pos] && vnp != NULL; skip++, vnp = vnp->next) { + prev = vnp; + } + pos++; + } + for (skip = 0; skip < num_blanks; skip++) { + vnp_blank = ValNodeNew (NULL); + vnp_blank->next = prev->next; + prev->next = vnp_blank; + } - psd.src = action->src; - psd.portion = action->portion; - psd.src_list = NULL; +} - /* first, we need to get a list of the parse sources */ - VisitBioseqsInSep (sep, &psd, FindParseSourceBioseqCallback); - if (action->capitalization != Cap_change_none) { - /* if we will be fixing capitalization, get org names to use in fixes */ - VisitBioSourcesInSep (sep, &orgnames, GetOrgNamesInRecordCallback); - } +static void AddListToTabTable (ValNodePtr vals, ValNodePtr text_table, ValNodePtr this_row, Int4 pos, Int4Ptr num_field_per_pos) +{ + Int4 num_new_fields; + ValNodePtr vnp; - /* for each parse source, we need to get a list of the destinations */ - for (vnp = psd.src_list; vnp != NULL; vnp = vnp->next) - { - if (vnp->data.ptrvalue == NULL) continue; - psip = (ParseSourceInfoPtr) vnp->data.ptrvalue; - if (action->remove_from_parsed) { - ValNodeAddPointer (&source_list_for_removal, 0, ParseSourceInfoCopy (psip)); + num_new_fields = ValNodeLen (vals); + if (num_new_fields > num_field_per_pos[pos]) { + /* go back and insert blanks in all the previous rows */ + for (vnp = text_table; vnp != NULL; vnp = vnp->next) { + InsertBlanksInRow (vnp->data.ptrvalue, pos, num_field_per_pos, num_new_fields - num_field_per_pos[pos]); } - /* fix source text */ - FixCapitalizationInString (&(psip->parse_src_txt), action->capitalization, orgnames); + num_field_per_pos[pos] = num_new_fields; + } + ValNodeLink (&this_row, vals); + while (num_new_fields < num_field_per_pos[pos]) { + ValNodeAddPointer (&this_row, 0, NULL); + num_new_fields++; + } +} - /* find destinations */ - AddParseDestinations (psip, action->dest); +static ValNodePtr StartRowWithSourceFields (CharPtr id, BioseqPtr bsp, ValNodePtr src_field_list, Int4Ptr num_field_per_pos, ValNodePtr text_table) +{ + ValNodePtr text_row = NULL; + SeqDescPtr sdp; + ValNodePtr vals, vnp_f; + Int4 pos; + SeqMgrDescContext context; + + /* add accession */ + ValNodeAddPointer (&text_row, 0, StringSave (id)); + + /* add source fields */ + if (src_field_list != NULL) { + sdp = SeqMgrGetNextDescriptor (bsp, NULL, Seq_descr_source, &context); + for (vnp_f = src_field_list, pos = 0; vnp_f != NULL; vnp_f = vnp_f->next, pos++) { + vals = GetMultipleFieldValuesForObject (OBJ_SEQDESC, sdp, vnp_f, NULL, NULL); + AddListToTabTable (vals, text_table, text_row, pos, num_field_per_pos); + } } + return text_row; +} - /* free orgname list if we created it */ - orgnames = ValNodeFree (orgnames); - CombineSourcesForDestinations (&(psd.src_list)); +NLM_EXTERN void ExportFieldTable (Uint1 field_type, ValNodePtr src_field_list, SeqEntryPtr sep, FILE *fp) +{ + ValNodePtr object_list, vnp_f, vnp_o; + ValNodePtr fields = NULL; + ValNodePtr text_table = NULL, text_row; + BioseqPtr bsp; + Char id_buf[255]; + CharPtr txt1 = NULL, title; + SeqDescrPtr pub_sdp; + SeqMgrDescContext pub_context; + Int4 num_orig_fields; + Int4Ptr num_field_per_pos; + Int4 pos, i; - if (action->dest->choice == ParseDest_org) { - PropagateSourceOnSegSetForParse (psd.src_list); + if (field_type == 0) { + object_list = GetObjectListForFieldType (FieldType_source_qual, sep); + } else if (field_type == FieldType_misc) { + object_list = CollectDeflineDescriptors (sep); + ValNodeAddInt (&fields, FieldType_misc, Misc_field_defline); + } else if (field_type == FieldType_pub) { + object_list = GetObjectListForFieldType (FieldType_source_qual, sep); + /* only get publication titles */ + ValNodeAddInt (&fields, FieldType_pub, Publication_field_title); + } else { + object_list = GetObjectListForFieldType (field_type, sep); + /* get a list of the fields that are appropriate for the objects collected */ + fields = GetFieldListForFieldType (field_type, sep); + /* remove fields for which there is no data */ + RemoveFieldsForWhichThereAreNoData (&fields, object_list); } - - /* now do the parsing */ - for (vnp = psd.src_list; vnp != NULL; vnp = vnp->next) { - psip = (ParseSourceInfoPtr) vnp->data.ptrvalue; - num_succeeded += SetFieldForDestList (psip->dest_list, action->dest, psip->parse_src_txt, action->existing_text); + + num_orig_fields = ValNodeLen (src_field_list); + num_field_per_pos = (Int4Ptr) MemNew (sizeof (Int4) * num_orig_fields); + for (pos = 0; pos < num_orig_fields; pos++) { + num_field_per_pos[pos] = 1; } - /* now remove strings from sources */ - for (vnp = source_list_for_removal; vnp != NULL; vnp = vnp->next) - { - if (vnp->data.ptrvalue == NULL) continue; - psip = (ParseSourceInfoPtr) vnp->data.ptrvalue; - StripFieldForSrcList (psip, action->src, action->portion); + /* get text table */ + for (vnp_o = object_list; vnp_o != NULL; vnp_o = vnp_o->next) { + bsp = GetSequenceForObject (vnp_o->choice, vnp_o->data.ptrvalue); + if (bsp != NULL) { + /* first column is accession */ + SeqIdWrite (SeqIdFindBest (bsp->id, SEQID_GENBANK), id_buf, PRINTID_REPORT, sizeof (id_buf) - 1); + if (field_type == FieldType_pub) { + for (pub_sdp = SeqMgrGetNextDescriptor (bsp, NULL, Seq_descr_pub, &pub_context); + pub_sdp != NULL; + pub_sdp = SeqMgrGetNextDescriptor (bsp, pub_sdp, Seq_descr_pub, &pub_context)) { + + /* Get Publication Title */ + title = GetFieldValueForObject (OBJ_SEQDESC, pub_sdp, fields, NULL); + + if (!StringHasNoText (title)) { + text_row = StartRowWithSourceFields (id_buf, bsp, src_field_list, num_field_per_pos, text_table); + + /* add publication title */ + ValNodeAddPointer (&text_row, 0, title); + + /* add row to table */ + ValNodeAddPointer (&text_table, 0, text_row); + } + title = MemFree (title); + } + } else { + text_row = StartRowWithSourceFields (id_buf, bsp, src_field_list, num_field_per_pos, text_table); + /* get requested fields */ + for (vnp_f = fields; vnp_f != NULL; vnp_f = vnp_f->next) { + txt1 = GetFieldValueForObject (vnp_o->choice, vnp_o->data.ptrvalue, vnp_f, NULL); + ValNodeAddPointer (&text_row, 0, txt1); + } + /* add row to table */ + ValNodeAddPointer (&text_table, 0, text_row); + } + } } - psd.src_list = ParseSourceListFree (psd.src_list); - return num_succeeded; + /* add header */ + /* accession is first column */ + fprintf (fp, "Accession"); + /* list source fields first */ + for (vnp_f = src_field_list, pos = 0; vnp_f != NULL; vnp_f = vnp_f->next, pos++) { + txt1 = SummarizeFieldType (vnp_f); + for (i = 0; i < num_field_per_pos[pos]; i++) { + fprintf (fp, "\t%s", txt1); + } + txt1 = MemFree (txt1); + } + /* list fields */ + for (vnp_f = fields; vnp_f != NULL; vnp_f = vnp_f->next) { + txt1 = SummarizeFieldType (vnp_f); + fprintf (fp, "\t%s", txt1); + txt1 = MemFree (txt1); + } + fprintf (fp, "\n"); + WriteTabTableToFile (text_table, fp); + FreeTabTable(text_table); + + fields = FieldTypeListFree (fields); + object_list = FreeObjectList (object_list); + num_field_per_pos = MemFree (num_field_per_pos); } -static void SetCdRegionGeneticCode (SeqFeatPtr cds) +/* This section handles parsing where the source field and destination field may not be on the same + * group of objects. */ +typedef struct parsesourceinfo { - CdRegionPtr crp; - SeqEntryPtr parent_sep; BioseqPtr bsp; - Int4 genCode; - ValNodePtr code, vnp; + SeqFeatPtr sfp; + SeqDescrPtr sdp; + SeqIdPtr sip; + ValNodePtr dest_list; + CharPtr parse_src_txt; +} ParseSourceInfoData, PNTR ParseSourceInfoPtr; - if (cds == NULL || cds->data.choice != SEQFEAT_CDREGION) return; - if (cds->data.value.ptrvalue == NULL) { - cds->data.value.ptrvalue = CdRegionNew(); +static ParseSourceInfoPtr ParseSourceInfoNew (BioseqPtr bsp, SeqFeatPtr sfp, SeqDescrPtr sdp, SeqIdPtr sip, CharPtr parse_src_txt) +{ + ParseSourceInfoPtr psip; + + psip = (ParseSourceInfoPtr) MemNew (sizeof (ParseSourceInfoData)); + if (psip != NULL) { + psip->bsp = bsp; + psip->sdp = sdp; + psip->sfp = sfp; + psip->sip = sip; + psip->dest_list = NULL; + psip->parse_src_txt = parse_src_txt; + } + return psip; +} + + +static ParseSourceInfoPtr ParseSourceInfoFree (ParseSourceInfoPtr psip) +{ + if (psip != NULL) + { + psip->dest_list = ValNodeFree (psip->dest_list); + psip->parse_src_txt = MemFree (psip->parse_src_txt); + psip = MemFree (psip); } - crp = (CdRegionPtr) cds->data.value.ptrvalue; - bsp = BioseqFindFromSeqLoc (cds->location); - if (bsp == NULL) return; - parent_sep = GetBestTopParentForData (bsp->idx.entityID, bsp); - genCode = SeqEntryToGeneticCode (parent_sep, NULL, NULL, 0); + return psip; +} - code = ValNodeNew (NULL); - if (code != NULL) { - code->choice = 254; - vnp = ValNodeNew (NULL); - code->data.ptrvalue = vnp; - if (vnp != NULL) { - vnp->choice = 2; - vnp->data.intvalue = genCode; +static ParseSourceInfoPtr ParseSourceInfoCopy (ParseSourceInfoPtr psip) +{ + ParseSourceInfoPtr pcopy = NULL; + + if (psip != NULL) + { + pcopy = (ParseSourceInfoPtr) MemNew (sizeof (ParseSourceInfoData)); + if (pcopy != NULL) { + pcopy->bsp = psip->bsp; + pcopy->sfp = psip->sfp; + pcopy->sdp = psip->sdp; + pcopy->sip = psip->sip; + pcopy->dest_list = NULL; + pcopy->parse_src_txt = NULL; } } - crp->genetic_code = code; + return pcopy; } - -static void CreateDataForFeature (SeqFeatPtr sfp, Int4 feature_type) +static ValNodePtr ParseSourceListFree (ValNodePtr vnp) { - Int4 featdef, seqfeattype; - CharPtr label = NULL; - RnaRefPtr rrp; - RNAGenPtr rgp; - ImpFeatPtr ifp; - - featdef = GetFeatdefFromFeatureType (feature_type); - sfp->idx.subtype = featdef; - seqfeattype = FindFeatFromFeatDefType (featdef); - switch (seqfeattype) { - case SEQFEAT_GENE: - sfp->data.value.ptrvalue = GeneRefNew(); - break; - case SEQFEAT_CDREGION: - sfp->data.value.ptrvalue = CdRegionNew(); - SetCdRegionGeneticCode (sfp); - break; - case SEQFEAT_RNA: - rrp = RnaRefNew(); - rrp->ext.choice = 0; - sfp->data.value.ptrvalue = rrp; - switch (featdef) { - case FEATDEF_preRNA: - rrp->type = RNA_TYPE_premsg; - break; - case FEATDEF_mRNA: - rrp->type = RNA_TYPE_mRNA; - break; - case FEATDEF_tRNA: - rrp->type = RNA_TYPE_tRNA; - break; - case FEATDEF_rRNA: - rrp->type = RNA_TYPE_rRNA; - break; - case FEATDEF_snRNA: - rrp->type = RNA_TYPE_ncRNA; - SetncRNAClass (rrp, NULL, "snRNA", ExistingTextOption_replace_old); - break; - case FEATDEF_scRNA: - rrp->type = RNA_TYPE_ncRNA; - SetncRNAClass (rrp, NULL, "scRNA", ExistingTextOption_replace_old); - break; - case FEATDEF_tmRNA: - rrp->type = RNA_TYPE_tmRNA; - rgp = RNAGenNew (); - rrp->ext.choice = 3; - rrp->ext.value.ptrvalue = rgp; - break; - case FEATDEF_ncRNA: - rrp->type = RNA_TYPE_ncRNA; - rgp = RNAGenNew (); - rrp->ext.choice = 3; - rrp->ext.value.ptrvalue = rgp; - break; - case FEATDEF_otherRNA: - rrp->type = RNA_TYPE_misc_RNA; - rgp = RNAGenNew(); - rrp->ext.choice = 3; - rrp->ext.value.ptrvalue = rgp; - break; - } - break; - case SEQFEAT_IMP: - ifp = ImpFeatNew(); - sfp->data.value.ptrvalue = ifp; - label = GetFeatureNameFromFeatureType (feature_type); - ifp->key = StringSave (label); - break; + ValNodePtr vnp_next; + while (vnp != NULL) { + vnp_next = vnp->next; + vnp->next = NULL; + vnp->data.ptrvalue = ParseSourceInfoFree (vnp->data.ptrvalue); + vnp = ValNodeFree (vnp); + vnp = vnp_next; } + return vnp; } -static SeqLocPtr LocationFromApplyFeatureAction (BioseqPtr bsp, ApplyFeatureActionPtr action) +static void +GetDeflineSourcesForBioseq +(BioseqPtr bsp, + TextPortionPtr portion, + ValNodePtr PNTR source_list) { - LocationIntervalPtr l; - SeqLocPtr slp = NULL; - Uint1 strand = Seq_strand_plus; - Int4 from, to; - - if (bsp == NULL || action == NULL || action->location == NULL) return NULL; - - if (!action->plus_strand) { - strand = Seq_strand_minus; + SeqDescrPtr sdp; + SeqMgrDescContext dcontext; + CharPtr str; + ParseSourceInfoPtr psip; + + if (bsp == NULL || source_list == NULL) + { + return; } - if (action->location->choice == LocationChoice_interval) { - l = (LocationIntervalPtr) action->location->data.ptrvalue; - if (l != NULL) { - from = MIN (l->from, l->to) - 1; - to = MAX (l->from, l->to) - 1; - slp = SeqLocIntNew (from, to, strand, SeqIdFindWorst (bsp->id)); + + sdp = SeqMgrGetNextDescriptor (bsp, NULL, Seq_descr_title, &dcontext); + while (sdp != NULL) + { + str = GetTextPortionFromString (sdp->data.ptrvalue, portion); + if (str != NULL) { + psip = ParseSourceInfoNew (bsp, NULL, sdp, NULL, str); + if (psip != NULL) { + ValNodeAddPointer (source_list, 0, psip); + } else { + str = MemFree (str); + } } - SetSeqLocPartial (slp, action->partial5, action->partial3); - } else if (action->location->choice == LocationChoice_whole_sequence) { - slp = SeqLocIntNew (0, bsp->length - 1, strand, SeqIdFindWorst (bsp->id)); - SetSeqLocPartial (slp, action->partial5, action->partial3); - } else if (action->location->choice == LocationChoice_point) { - AddSeqLocPoint (&slp, SeqIdStripLocus (SeqIdDup (SeqIdFindBest (bsp->id, 0))), - action->location->data.intvalue, FALSE, TRUE, strand); + sdp = SeqMgrGetNextDescriptor (bsp, sdp, Seq_descr_title, &dcontext); } - return slp; } -static Boolean OkToApplyToBioseq (ApplyFeatureActionPtr action, BioseqPtr bsp) +static CharPtr GetIDSrc (SeqIdPtr sip, Uint1 id_type, CharPtr tag) { - SeqFeatPtr sfp; - SeqMgrFeatContext context; - Int4 featdef; - Boolean rval = TRUE; + DbtagPtr dbt = NULL; + ObjectIdPtr oip = NULL; + Char id_str[128]; + CharPtr str_src = NULL; - if (action == NULL || bsp == NULL) return FALSE; + if (sip == NULL || sip->choice != id_type) return NULL; - if (!action->add_redundant) { - featdef = GetFeatdefFromFeatureType (action->type); - sfp = SeqMgrGetNextFeature (bsp, NULL, 0, featdef, &context); - if (sfp != NULL) { - rval = FALSE; + if (id_type == SEQID_GENERAL) + { + dbt = (DbtagPtr) sip->data.ptrvalue; + if (dbt == NULL || (tag != NULL && StringCmp (dbt->db, tag) != 0)) return NULL; + oip = dbt->tag; + } + else if (id_type == SEQID_LOCAL) + { + oip = sip->data.ptrvalue; + } + + if (oip == NULL) + { + SeqIdWrite (sip, id_str, PRINTID_REPORT, sizeof (id_str)); + str_src = StringSave (id_str); + } + else + { + if (oip->str == NULL) + { + sprintf (id_str, "%d", oip->id); + str_src = StringSave (id_str); + } + else + { + str_src = StringSave (oip->str); } } - return rval; -} + return str_src; +} -static void AddParts (ApplyFeatureActionPtr action, BioseqSetPtr parts, ValNodePtr PNTR bsp_list) -{ - SeqEntryPtr sep; - Int4 seg_num; - if (action == NULL || !action->apply_to_parts - || parts == NULL || parts->_class != BioseqseqSet_class_parts - || bsp_list == NULL) { +static void +GetIDSourcesForBioseq +(BioseqPtr bsp, + TextPortionPtr portion, + Uint1 id_type, + CharPtr tag, + ValNodePtr PNTR source_list) +{ + SeqIdPtr sip; + ParseSourceInfoPtr psip; + CharPtr src_str = NULL, str; + + if (bsp == NULL || source_list == NULL) + { return; } - - if (action->only_seg_num > -1) { - seg_num = 0; - sep = parts->seq_set; - while (seg_num < action->only_seg_num && sep != NULL) { - sep = sep->next; - seg_num++; - } - if (sep != NULL && IS_Bioseq (sep) && OkToApplyToBioseq (action, sep->data.ptrvalue)) { - ValNodeAddPointer (bsp_list, OBJ_BIOSEQ, sep->data.ptrvalue); - } - } else { - for (sep = parts->seq_set; sep != NULL; sep = sep->next) { - if (IS_Bioseq (sep) && OkToApplyToBioseq (action, sep->data.ptrvalue)) { - ValNodeAddPointer (bsp_list, OBJ_BIOSEQ, sep->data.ptrvalue); + + sip = bsp->id; + while (sip != NULL) + { + if ((src_str = GetIDSrc (sip, id_type, tag)) != NULL) { + str = GetTextPortionFromString (src_str, portion); + if (str != NULL) { + psip = ParseSourceInfoNew (bsp, NULL, NULL, sip, str); + if (psip != NULL) { + ValNodeAddPointer (source_list, 0, psip); + } else { + str = MemFree (str); + } } + src_str = MemFree (src_str); } - } + sip = sip->next; + } } -static void AddSequenceOrParts (ApplyFeatureActionPtr action, BioseqPtr bsp, ValNodePtr PNTR bsp_list) +static void +GetLocalIDSourcesForBioseq +(BioseqPtr bsp, + TextPortionPtr tp, + ValNodePtr PNTR source_list) { - BioseqSetPtr bssp, parts; - SeqEntryPtr sep; + GetIDSourcesForBioseq (bsp, tp, SEQID_LOCAL, NULL, source_list); +} - if (action == NULL || bsp == NULL || bsp_list == NULL) return; - if (bsp->idx.parenttype == OBJ_BIOSEQSET && bsp->idx.parentptr != NULL) { - bssp = (BioseqSetPtr) bsp->idx.parentptr; - if (bssp->_class == BioseqseqSet_class_segset) { - if (action->apply_to_parts) { - sep = bssp->seq_set; - while (sep != NULL && !IS_Bioseq_set (sep)) { - sep = sep->next; - } - if (sep != NULL) { - AddParts (action, sep->data.ptrvalue, bsp_list); - } - } else { - if (OkToApplyToBioseq (action, bsp)) { - ValNodeAddPointer (bsp_list, OBJ_BIOSEQ, bsp); - } - } - } else if (bssp->_class == BioseqseqSet_class_parts) { - if (action->apply_to_parts) { - AddParts (action, bssp, bsp_list); - } else { - parts = bssp; - if (parts->idx.parenttype == OBJ_BIOSEQSET && parts->idx.parentptr != NULL) { - bssp = (BioseqSetPtr) parts->idx.parentptr; - if (IS_Bioseq (bssp->seq_set) && OkToApplyToBioseq (action, bssp->seq_set->data.ptrvalue)) { - ValNodeAddPointer (bsp_list, OBJ_BIOSEQ, bsp_list); - } - } - } - } else { - if (OkToApplyToBioseq (action, bsp)) { - ValNodeAddPointer (bsp_list, OBJ_BIOSEQ, bsp); - } - } - } else { - if (OkToApplyToBioseq (action, bsp)) { - ValNodeAddPointer (bsp_list, OBJ_BIOSEQ, bsp); - } - } -} - -static void AddSequenceOrPartsFromSeqEntry (ApplyFeatureActionPtr action, SeqEntryPtr sep, ValNodePtr PNTR bsp_list) +static void GetNcbiFileSourceForBioseq +(BioseqPtr bsp, + TextPortionPtr tp, + ValNodePtr PNTR source_list) { - BioseqSetPtr bssp; - SeqEntryPtr seq_set; + GetIDSourcesForBioseq (bsp, tp, SEQID_GENERAL, "NCBIFILE", source_list); +} - if (action == NULL || sep == NULL) return; - while (sep != NULL) { - if (IS_Bioseq (sep)) { - AddSequenceOrParts (action, sep->data.ptrvalue, bsp_list); - } else if (IS_Bioseq_set (sep)) { - bssp = (BioseqSetPtr) sep->data.ptrvalue; - if (bssp->_class == BioseqseqSet_class_segset) { - /* find master segment */ - seq_set = bssp->seq_set; - while (seq_set != NULL && !IS_Bioseq (seq_set)) { - seq_set = seq_set->next; - } - if (seq_set != NULL) { - AddSequenceOrParts (action, seq_set->data.ptrvalue, bsp_list); - } - } else if (bssp->_class == BioseqseqSet_class_nuc_prot) { - /* find nucleotide sequence */ - seq_set = bssp->seq_set; - if (seq_set != NULL) { - if (IS_Bioseq_set (seq_set)) { - /* nucleotide is segmented set */ - bssp = (BioseqSetPtr) seq_set->data.ptrvalue; - if (bssp != NULL && bssp->_class == BioseqseqSet_class_segset - && bssp->seq_set != NULL && IS_Bioseq (bssp->seq_set)) { - AddSequenceOrParts (action, bssp->seq_set->data.ptrvalue, bsp_list); - } - } else if (IS_Bioseq (seq_set)) { - AddSequenceOrParts (action, seq_set->data.ptrvalue, bsp_list); - } - } +static void +GetGeneralIdTextSourcesForBioseq +(BioseqPtr bsp, + Boolean db_only, + TextPortionPtr portion, + ValNodePtr PNTR source_list) +{ + SeqIdPtr sip; + ParseSourceInfoPtr psip; + DbtagPtr dbtag; + CharPtr src_str = NULL, str; + + if (bsp == NULL || source_list == NULL) + { + return; + } + + for (sip = bsp->id; sip != NULL; sip = sip->next) { + if (sip->choice == SEQID_GENERAL && (dbtag = (DbtagPtr) sip->data.ptrvalue) != NULL) { + if (db_only) { + str = GetTextPortionFromString (dbtag->db, portion); } else { - /* add from set members */ - AddSequenceOrPartsFromSeqEntry (action, bssp->seq_set, bsp_list); + src_str = GetDbtagString (dbtag); + str = GetTextPortionFromString (src_str, portion); + src_str = MemFree (src_str); } - } - sep = sep->next; - } -} - - -static void AdjustProteinSequenceForReadingFrame (SeqFeatPtr cds) -{ - BioseqPtr protbsp, bsp; - ByteStorePtr bs; - SeqFeatPtr prot_sfp; - Boolean partial5, partial3; - - if (cds == NULL || cds->data.choice != SEQFEAT_CDREGION) return; - - protbsp = BioseqFindFromSeqLoc (cds->product); - - if (protbsp == NULL) { - bsp = BioseqFindFromSeqLoc (cds->location); - if (bsp != NULL) { - ExtraCDSCreationActions (cds, GetBestTopParentForData (bsp->idx.entityID, bsp)); - } - } else { - bs = ProteinFromCdRegionExWithTrailingCodonHandling (cds, - TRUE, - FALSE, - TRUE); - protbsp->seq_data = (SeqDataPtr) BSFree ((ByteStorePtr)(protbsp->seq_data)); - protbsp->seq_data = (SeqDataPtr) bs; - protbsp->length = BSLen (bs); - prot_sfp = GetProtFeature (protbsp); - if (prot_sfp == NULL) { - prot_sfp = CreateNewFeatureOnBioseq (protbsp, SEQFEAT_PROT, NULL); - prot_sfp->data.value.ptrvalue = ProtRefNew (); - CheckSeqLocForPartial (cds->location, &partial5, &partial3); - SetSeqLocPartial (prot_sfp->location, partial5, partial3); - prot_sfp->partial = (partial5 || partial3); - } else { - if (SeqLocLen (prot_sfp->location) != protbsp->length) { - prot_sfp->location = SeqLocFree (prot_sfp->location); - prot_sfp->location = SeqLocIntNew (0, protbsp->length - 1, Seq_strand_plus, SeqIdFindWorst (protbsp->id)); - CheckSeqLocForPartial (cds->location, &partial5, &partial3); - SetSeqLocPartial (prot_sfp->location, partial5, partial3); - prot_sfp->partial = (partial5 || partial3); + if (str != NULL) { + psip = ParseSourceInfoNew (bsp, NULL, NULL, sip, str); + if (psip != NULL) { + ValNodeAddPointer (source_list, 0, psip); + } else { + str = MemFree (str); + } } } } } -NLM_EXTERN SeqFeatPtr -ApplyOneFeatureToBioseq -(BioseqPtr bsp, - Uint1 featdef, - SeqLocPtr slp, - ValNodePtr fields, - ValNodePtr src_fields, - Boolean add_mrna) +static void GetGeneralIDSourcesForBioseq +(BioseqPtr bsp, + ValNodePtr general_id, + TextPortionPtr tp, + ValNodePtr PNTR source_list) { - Int4 seqfeattype; - SeqFeatPtr sfp, gene = NULL, mrna = NULL; - FeatQualLegalValPtr q; - FeatureField f; - ValNodePtr field_vnp; - Int4 feature_type; - - seqfeattype = FindFeatFromFeatDefType (featdef); - sfp = CreateNewFeatureOnBioseq (bsp, seqfeattype, slp); - if (sfp == NULL) return NULL; - feature_type = GetFeatureTypeFromFeatdef(featdef); - CreateDataForFeature (sfp, feature_type); - /* any extra actions */ - switch (featdef) { - case FEATDEF_CDS : - ExtraCDSCreationActions (sfp, GetBestTopParentForData (bsp->idx.entityID, bsp)); + if (general_id == NULL) { + return; + } + switch (general_id->choice) { + case ParseSrcGeneralId_whole_text: + GetGeneralIdTextSourcesForBioseq (bsp, FALSE, tp, source_list); break; - case FEATDEF_source : - if (src_fields != NULL) { - sfp->data.value.ptrvalue = ImpFeatFree (sfp->data.value.ptrvalue); - sfp->data.choice = SEQFEAT_BIOSRC; - sfp->data.value.ptrvalue = BioSourceFromSourceQualVals (src_fields); - } + case ParseSrcGeneralId_db: + GetGeneralIdTextSourcesForBioseq (bsp, TRUE, tp, source_list); break; - } - for (field_vnp = fields; field_vnp != NULL; field_vnp = field_vnp->next) { - q = (FeatQualLegalValPtr) field_vnp->data.ptrvalue; - if (q != NULL) { - f.field = ValNodeNew(NULL); - f.field->next = NULL; - f.field->choice = FeatQualChoice_legal_qual; - f.field->data.intvalue = q->qual; - if (sfp->data.choice != SEQFEAT_GENE - && (q->qual == Feat_qual_legal_gene || q->qual == Feat_qual_legal_gene_description)) { - if (gene == NULL) { - gene = CreateNewFeatureOnBioseq (bsp, SEQFEAT_GENE, slp); - CreateDataForFeature (gene, Feature_type_gene); - } - f.type = Feature_type_gene; - SetQualOnFeature (gene, &f, NULL, q->val, ExistingTextOption_replace_old); + case ParseSrcGeneralId_tag: + if (StringHasNoText (general_id->data.ptrvalue)) { + GetIDSourcesForBioseq (bsp, tp, SEQID_GENERAL, NULL, source_list); } else { - f.type = feature_type; - SetQualOnFeature (sfp, &f, NULL, q->val, ExistingTextOption_replace_old); + GetIDSourcesForBioseq (bsp, tp, SEQID_GENERAL, general_id->data.ptrvalue, source_list); } - } + break; + default: + break; } - if (featdef == FEATDEF_CDS) { - /* retranslate, to account for change in reading frame */ - AdjustProteinSequenceForReadingFrame (sfp); - /* after the feature has been created, then adjust it for gaps */ - /* Note - this step may result in multiple coding regions being created. */ - AdjustCDSLocationsForUnknownGapsCallback (sfp, NULL); - if (add_mrna) { - slp = SeqLocCopy (slp); - mrna = CreateNewFeatureOnBioseq (bsp, SEQFEAT_RNA, slp); - CreateDataForFeature (mrna, Feature_type_mRNA); - for (field_vnp = fields; field_vnp != NULL; field_vnp = field_vnp->next) { - q = (FeatQualLegalValPtr) field_vnp->data.ptrvalue; - if (q != NULL && q->qual == Feat_qual_legal_product) { - f.field = ValNodeNew(NULL); - f.field->next = NULL; - f.field->choice = FeatQualChoice_legal_qual; - f.field->data.intvalue = q->qual; - f.type = Feature_type_mRNA; - SetQualOnFeature (mrna, &f, NULL, q->val, ExistingTextOption_replace_old); +} + + +static void StripBankitCommentForParse (SeqDescrPtr sdp, TextPortionPtr tp) +{ + UserObjectPtr uop; + ObjectIdPtr oip; + UserFieldPtr ufp; + + if (sdp == NULL || sdp->choice != Seq_descr_user || tp == NULL) { + return; + } + + /* Bankit Comments */ + uop = (UserObjectPtr) sdp->data.ptrvalue; + if (uop != NULL && StringCmp (uop->_class, "SMART_V1.0") != 0) { + oip = uop->type; + if (oip != NULL && StringCmp (oip->str, "Submission") == 0) { + for (ufp = uop->data; ufp != NULL; ufp = ufp->next) { + oip = ufp->label; + if (oip != NULL && StringCmp (oip->str, "AdditionalComment") == 0) { + ReplaceStringForParse (ufp->data.ptrvalue, tp); } } } } - return sfp; } -static Int4 ApplyApplyFeatureActionToSeqEntry (ApplyFeatureActionPtr action, SeqEntryPtr sep) +static void StripStructuredCommentForParse (SeqDescrPtr sdp, CharPtr comment_field, TextPortionPtr tp) { - ValNodePtr bsp_list = NULL, vnp; - Int4 featdef; - BioseqPtr bsp; - SeqFeatPtr sfp; - SeqLocPtr slp; - SeqIdPtr sip; - Int4 num_created = 0; - - if (sep == NULL || action == NULL) return 0; + UserObjectPtr uop; + ObjectIdPtr oip; + UserFieldPtr ufp; - /* first, get list of Bioseqs to apply features to */ - /* relevant values : seq_list, add_redundant, apply_to_parts, only_seg_num */ - if (action->seq_list != NULL && action->seq_list->choice == SequenceListChoice_list) { - for (vnp = action->seq_list->data.ptrvalue; vnp != NULL; vnp = vnp->next) { - sip = CreateSeqIdFromText (vnp->data.ptrvalue, sep); - bsp = BioseqFind (sip); - if (bsp != NULL) { - AddSequenceOrParts (action, bsp, &bsp_list); + if (sdp == NULL || sdp->choice != Seq_descr_user || tp == NULL || StringHasNoText (comment_field)) { + return; + } + + uop = (UserObjectPtr) sdp->data.ptrvalue; + if (IsUserObjectStructuredComment (uop)) { + for (ufp = uop->data; ufp != NULL; ufp = ufp->next) { + oip = ufp->label; + if (oip != NULL && StringCmp (oip->str, comment_field) == 0) { + ReplaceStringForParse (ufp->data.ptrvalue, tp); } - } - } else { - AddSequenceOrPartsFromSeqEntry (action, sep, &bsp_list); + } } +} - /* now add feature to each bioseq in list */ - for (vnp = bsp_list; vnp != NULL; vnp = vnp->next) { - bsp = vnp->data.ptrvalue; - if (bsp == NULL) continue; - featdef = GetFeatdefFromFeatureType (action->type); - slp = LocationFromApplyFeatureAction (bsp, action); - sfp = ApplyOneFeatureToBioseq (bsp, featdef, slp, action->fields, action->src_fields, action->add_mrna); - if (sfp != NULL) { - num_created++; - } - } - return num_created; -} - - -typedef struct convertandremovefeaturecollection { - Uint1 featdef; - ValNodePtr constraint_set; - ValNodePtr feature_list; -} ConvertAndRemoveFeatureCollectionData, PNTR ConvertAndRemoveFeatureCollectionPtr; -static void ConvertAndRemoveFeatureCollectionCallback (SeqFeatPtr sfp, Pointer data) +static void +GetBankitCommentSourcesForBioseq +(BioseqPtr bsp, + TextPortionPtr tp, + ValNodePtr PNTR source_list) { - ConvertAndRemoveFeatureCollectionPtr p; - - if (sfp == NULL || data == NULL) return; - - p = (ConvertAndRemoveFeatureCollectionPtr) data; - if (sfp->idx.subtype == p->featdef && DoesObjectMatchConstraintChoiceSet (OBJ_SEQFEAT, sfp, p->constraint_set)) { - ValNodeAddPointer (&(p->feature_list), OBJ_SEQFEAT, sfp); + SeqDescrPtr sdp; + SeqMgrDescContext dcontext; + ParseSourceInfoPtr psip; + UserObjectPtr uop; + ObjectIdPtr oip; + UserFieldPtr ufp; + CharPtr str = NULL; + + if (bsp == NULL || source_list == NULL) { + return; } -} - - -static Int4 ApplyRemoveFeatureActionToSeqEntry (RemoveFeatureActionPtr action, SeqEntryPtr sep) -{ - ConvertAndRemoveFeatureCollectionData d; - ValNodePtr vnp; - SeqFeatPtr sfp; - Int4 num_deleted = 0; - - if (action == NULL) return 0; - - d.featdef = GetFeatdefFromFeatureType (action->type); - d.constraint_set = action->constraint; - d.feature_list = NULL; - - VisitFeaturesInSep (sep, &d, ConvertAndRemoveFeatureCollectionCallback); - for (vnp = d.feature_list; vnp != NULL; vnp = vnp->next) { - sfp = vnp->data.ptrvalue; - if (sfp != NULL) { - sfp->idx.deleteme = TRUE; - num_deleted ++; + + sdp = SeqMgrGetNextDescriptor (bsp, NULL, Seq_descr_user, &dcontext); + while (sdp != NULL) { + if (sdp->extended != 0) { + /* Bankit Comments */ + uop = (UserObjectPtr) sdp->data.ptrvalue; + if (uop != NULL && StringCmp (uop->_class, "SMART_V1.0") != 0) { + oip = uop->type; + if (oip != NULL && StringCmp (oip->str, "Submission") == 0) { + for (ufp = uop->data; ufp != NULL; ufp = ufp->next) { + oip = ufp->label; + if (oip != NULL && StringCmp (oip->str, "AdditionalComment") == 0) { + str = GetTextPortionFromString (ufp->data.ptrvalue, tp); + if (str != NULL) { + psip = ParseSourceInfoNew (bsp, NULL, sdp, NULL, str); + if (psip == NULL) { + str = MemFree (str); + } else { + ValNodeAddPointer (source_list, 0, psip); + } + } + } + } + } + } } + sdp = SeqMgrGetNextDescriptor (bsp, sdp, Seq_descr_user, &dcontext); } - d.feature_list = ValNodeFree (d.feature_list); - DeleteMarkedObjects (ObjMgrGetEntityIDForChoice(sep), 0, NULL); - return num_deleted; } -/* functions for converting features */ - -static Boolean ApplyConvertFeatureSrcOptions (SeqFeatPtr sfp, ValNodePtr src_options, Boolean keep_original) +static void +GetCommentSourcesForBioseq +(BioseqPtr bsp, + TextPortionPtr tp, + ValNodePtr PNTR source_list) { - ConvertFromCDSOptionsPtr options = NULL; - Boolean rval = FALSE; - - if (sfp == NULL) return FALSE; - if (src_options == NULL) return TRUE; - - if (src_options->choice == ConvertFeatureSrcOptions_cds) { - options = (ConvertFromCDSOptionsPtr) src_options->data.ptrvalue; - if (options != NULL) { - ApplyCDSOptionsToFeature (sfp, options->remove_mRNA, options->remove_gene, options->remove_transcript_id, keep_original); - rval = TRUE; - } + SeqDescrPtr sdp; + SeqFeatPtr sfp; + SeqMgrFeatContext fcontext; + SeqMgrDescContext dcontext; + ParseSourceInfoPtr psip; + CharPtr str; + + if (bsp == NULL || source_list == NULL) { + return; } - return rval; -} - -typedef Boolean (*ConvertFeatureFunc) PROTO ((SeqFeatPtr, Int4, ConvertFeatureDstOptionsPtr)); - -static void ApplyRNADestinationOptions (SeqFeatPtr sfp, Int4 featdef_to, ConvertFeatureDstOptionsPtr dst_options) -{ - CharPtr existing_class; - FeatureField ff; - - /* apply destination options */ - if (featdef_to == FEATDEF_ncRNA - && dst_options != NULL - && dst_options->choice == ConvertFeatureDstOptions_ncrna_class - && !StringHasNoText (dst_options->data.ptrvalue)) { - ff.type = Feature_type_ncRNA; - ff.field = ValNodeNew (NULL); - ff.field->choice = FeatQualChoice_legal_qual; - ff.field->data.intvalue = Feat_qual_legal_ncRNA_class; - existing_class = GetQualFromFeature (sfp, &ff, NULL); - if (StringCmp (dst_options->data.ptrvalue, existing_class) != 0) { - sfp->idx.subtype = FEATDEF_ncRNA; - SetQualOnFeature (sfp, &ff, NULL, dst_options->data.ptrvalue, ExistingTextOption_append_semi); + + sdp = SeqMgrGetNextDescriptor (bsp, NULL, Seq_descr_comment, &dcontext); + while (sdp != NULL) { + str = GetTextPortionFromString (sdp->data.ptrvalue, tp); + if (str != NULL) { + psip = ParseSourceInfoNew (bsp, NULL, sdp, NULL, str); + if (psip == NULL) { + str = MemFree (str); + } else { + ValNodeAddPointer (source_list, 0, psip); + } } - existing_class = MemFree (existing_class); - ff.field = ValNodeFree (ff.field); - } -} - -static Boolean ConvertCDSToRNAFunc (SeqFeatPtr sfp, Int4 featdef_to, ConvertFeatureDstOptionsPtr dst_options) -{ - Boolean rval; - - if (sfp == NULL || sfp->data.choice != SEQFEAT_CDREGION) { - return FALSE; + sdp = SeqMgrGetNextDescriptor (bsp, sdp, Seq_descr_comment, &dcontext); } - - rval = ConvertCDSToRNA (sfp, featdef_to); - if (rval) { - ApplyRNADestinationOptions (sfp, featdef_to, dst_options); + + sfp = SeqMgrGetNextFeature (bsp, NULL, SEQFEAT_COMMENT, 0, &fcontext); + while (sfp != NULL) { + str = GetTextPortionFromString (sfp->data.value.ptrvalue, tp); + if (str != NULL) { + psip = ParseSourceInfoNew (bsp, sfp, NULL, NULL, str); + if (psip == NULL) { + str = MemFree (str); + } else { + ValNodeAddPointer (source_list, 0, psip); + } + } + sfp = SeqMgrGetNextFeature (bsp, sfp, SEQFEAT_COMMENT, 0, &fcontext); } - return rval; + GetBankitCommentSourcesForBioseq (bsp, tp, source_list); } -static Boolean ConvertGeneToRNAFunc (SeqFeatPtr sfp, Int4 featdef_to, ConvertFeatureDstOptionsPtr dst_options) +static void +GetStructuredCommentSourcesForBioseq +(BioseqPtr bsp, + TextPortionPtr tp, + CharPtr comment_field, + ValNodePtr PNTR source_list) { - Boolean rval; - - rval = ConvertGeneToRNA (sfp, featdef_to); - if (rval) { - ApplyRNADestinationOptions (sfp, featdef_to, dst_options); + SeqDescrPtr sdp; + UserObjectPtr uop; + ObjectIdPtr oip; + UserFieldPtr ufp; + SeqMgrDescContext dcontext; + CharPtr str; + ParseSourceInfoPtr psip; + + if (bsp == NULL || source_list == NULL) + { + return; + } + + sdp = SeqMgrGetNextDescriptor (bsp, NULL, Seq_descr_user, &dcontext); + while (sdp != NULL) { + if (sdp->extended != 0 + && sdp->data.ptrvalue != NULL) { + uop = (UserObjectPtr) sdp->data.ptrvalue; + if (IsUserObjectStructuredComment (uop)) { + for (ufp = uop->data; ufp != NULL; ufp = ufp->next) { + oip = ufp->label; + if (oip != NULL && StringCmp (oip->str, comment_field) == 0) { + str = GetTextPortionFromString (ufp->data.ptrvalue, tp); + if (str != NULL) { + psip = ParseSourceInfoNew (bsp, NULL, sdp, NULL, str); + if (psip == NULL) { + str = MemFree (str); + } else { + ValNodeAddPointer (source_list, 0, psip); + } + } + } + } + } + } + sdp = SeqMgrGetNextDescriptor (bsp, sdp, Seq_descr_user, &dcontext); } - return rval; } -static Boolean ConvertBioSrcToRegionFunc (SeqFeatPtr sfp, Int4 featdef_to, ConvertFeatureDstOptionsPtr dst_options) -{ - return ConvertBioSrcToRepeatRegion (sfp, featdef_to); -} - +static void GetFlatFileSourcesForBioseq +(BioseqPtr bsp, + TextPortionPtr tp, + ValNodePtr PNTR source_list) -static Boolean ConvertCDSToMiscFeatFunc (SeqFeatPtr sfp, Int4 featdef_to, ConvertFeatureDstOptionsPtr dst_options) { - Boolean rval = FALSE; - if (sfp == NULL || sfp->data.choice != SEQFEAT_CDREGION) { - return FALSE; - } - else if (sfp->pseudo) + SeqEntryPtr sep; + Asn2gbJobPtr ajp; + Int4 index; + ErrSev level; + CharPtr string, str; + ParseSourceInfoPtr psip; + + if (bsp == NULL || source_list == NULL) { - rval = ConvertOnePseudoCDSToMiscFeatEx (sfp, FALSE); + return; } - else - { - /* do other here */ - rval = ConvertNonPseudoCDSToMiscFeat (sfp, FALSE); - } - return rval; -} -static Boolean ConvertImpToProtFuncEx (SeqFeatPtr sfp, Int4 featdef_to, ConvertFeatureDstOptionsPtr dst_options) -{ - return ConvertImpToProtFunc (sfp, featdef_to); -} + sep = SeqMgrGetSeqEntryForData (bsp); + if (sep == NULL) { + return; + } + + level = ErrSetMessageLevel (SEV_MAX); + ajp = asn2gnbk_setup (bsp, NULL, NULL, (FmtType)GENBANK_FMT, SEQUIN_MODE, NORMAL_STYLE, 0, 0, 0, NULL); + if (ajp != NULL) { + for (index = 0; index < ajp->numParagraphs; index++) { + string = asn2gnbk_format (ajp, (Int4) index); + if (string != NULL && *string != '\0') { + CompressSpaces (string); + str = GetTextPortionFromString (string, tp); + if (str != NULL) { + psip = ParseSourceInfoNew (bsp, NULL, NULL, NULL, str); + if (psip == NULL) { + str = MemFree (str); + } else { + ValNodeAddPointer (source_list, 0, psip); + } + } + } + MemFree (string); + } + asn2gnbk_cleanup (ajp); + } -static Boolean ConvertProtToImpFuncEx (SeqFeatPtr sfp, Int4 featdef_to, ConvertFeatureDstOptionsPtr dst_options) -{ - return ConvertProtToImpFunc (sfp, featdef_to); + ErrSetMessageLevel (level); } -static Boolean ConvertProtToProt (SeqFeatPtr sfp, Int4 featdef_to, ConvertFeatureDstOptionsPtr dst_options) -{ - return ConvertProtToProtFunc (sfp, featdef_to); -} - +const CharPtr nomial_keywords[] = { +"f. sp. ", +"var.", +"pv.", +"bv.", +"serovar", +"subsp." }; -static Boolean ConvertCDSToMatPeptide (SeqFeatPtr sfp, Int4 featdef_to, ConvertFeatureDstOptionsPtr dst_options) -{ - return AutoConvertCDSToMiscFeat (sfp, (dst_options == NULL || dst_options->choice != ConvertFeatureDstOptions_remove_original) ? FALSE : dst_options->data.boolvalue); -} +const Int4 num_nomial_keywords = sizeof(nomial_keywords) / sizeof (CharPtr); +static CharPtr GetTextAfterNomial (CharPtr taxname) -static Boolean ConvertImpToRNAFunc (SeqFeatPtr sfp, Int4 featdef_to, ConvertFeatureDstOptionsPtr dst_options) { - RnaRefPtr rrp; - GBQualPtr qual, qual_prev = NULL; - Boolean add_to_comment = FALSE; - CharPtr old_comment = NULL; - - if (sfp == NULL || sfp->data.choice != SEQFEAT_IMP) - { - return FALSE; + CharPtr ptr, nomial_end; + Int4 i; + Boolean found_keyword = TRUE; + + ptr = StringChr (taxname, ' '); + if (ptr == NULL) return NULL; + /* skip over the first word and the spaces after it. */ + while (*ptr == ' ') { + ptr++; } - - for (qual = sfp->qual; qual != NULL && StringCmp (qual->qual, "product") != 0; qual = qual->next) { - qual_prev = qual; + ptr = StringChr (ptr, ' '); + /* if there are only two words, give up. */ + if (ptr == NULL) { + return NULL; } - if (qual != NULL) { - old_comment = StringSave (qual->val); - if (qual_prev == NULL) { - sfp->qual = qual->next; - } else { - qual_prev->next = qual->next; - } - qual->next = NULL; - qual = GBQualFree (qual); - } else { - old_comment = sfp->comment; - sfp->comment = NULL; + nomial_end = ptr; + while (*ptr == ' ') { + ptr++; } - - rrp = RnaRefFromLabel (featdef_to, old_comment, &add_to_comment); - - sfp->data.value.ptrvalue = ImpFeatFree ((ImpFeatPtr) sfp->data.value.ptrvalue); - sfp->data.choice = SEQFEAT_RNA; - sfp->data.value.ptrvalue = (Pointer) rrp; - SetRNAProductString (sfp, NULL, old_comment, ExistingTextOption_replace_old); - if (add_to_comment) { - SetStringValue (&(sfp->comment), old_comment, ExistingTextOption_append_semi); + while (found_keyword) { + found_keyword = FALSE; + /* if the next word is a nomial keyword, skip that plus the first word that follows it. */ + for (i = 0; i < num_nomial_keywords && *nomial_end != 0; i++) { + if (StringNCmp (ptr, nomial_keywords[i], StringLen(nomial_keywords[i])) == 0) { + ptr += StringLen(nomial_keywords[i]); + while (*ptr == ' ' ) { + ptr++; + } + nomial_end = StringChr (ptr, ' '); + if (nomial_end == NULL) { + nomial_end = ptr + StringLen (ptr); + } else { + ptr = nomial_end; + while (*ptr == ' ') { + ptr++; + } + found_keyword = TRUE; + } + } + } } - old_comment = MemFree (old_comment); - - ApplyRNADestinationOptions (sfp, featdef_to, dst_options); - - return TRUE; -} - - -static Boolean ConvertRegionToImp (SeqFeatPtr sfp, Int4 featdef_to, ConvertFeatureDstOptionsPtr dst_options) -{ - return ConvertRegionToImpFunc (sfp, featdef_to); + return nomial_end; } -static Boolean ConvertImpToImp (SeqFeatPtr sfp, Int4 featdef_to, ConvertFeatureDstOptionsPtr dst_options) +static void +GetOrgParseSourcesForBioSource +(BioSourcePtr biop, + BioseqPtr bsp, + SeqDescrPtr sdp, + SeqFeatPtr sfp, + ParseSrcOrgPtr o, + TextPortionPtr tp, + ValNodePtr PNTR source_list) { - return ConvertImpToImpFunc (sfp, featdef_to); -} + CharPtr str = NULL, portion, tmp; + ValNode vn; + ParseSourceInfoPtr psip; + if (biop == NULL || o == NULL || o->field == NULL || source_list == NULL) return; -static Boolean ConvertRegionToRNA (SeqFeatPtr sfp, Int4 featdef_to, ConvertFeatureDstOptionsPtr dst_options) -{ - Boolean rval; - rval = ConvertRegionToRNAFunc (sfp, featdef_to); - if (rval) { - ApplyRNADestinationOptions (sfp, featdef_to, dst_options); + switch (o->field->choice) { + case ParseSrcOrgChoice_source_qual : + vn.choice = SourceQualChoice_textqual; + vn.data.intvalue = o->field->data.intvalue; + vn.next = NULL; + str = GetSourceQualFromBioSource (biop, &vn, NULL); + break; + case ParseSrcOrgChoice_taxname_after_binomial : + vn.choice = SourceQualChoice_textqual; + vn.data.intvalue = Source_qual_taxname; + vn.next = NULL; + str = GetSourceQualFromBioSource (biop, &vn, NULL); + tmp = GetTextAfterNomial (str); + tmp = StringSave (tmp); + str = MemFree (str); + str = tmp; + break; } - return rval; + portion = GetTextPortionFromString (str, tp); + if (portion != NULL) { + psip = ParseSourceInfoNew (bsp, sfp, sdp, NULL, portion); + if (psip == NULL) { + portion = MemFree (portion); + } else { + ValNodeAddPointer (source_list, 0, psip); + } + } + str = MemFree (str); } -static Boolean ConvertCommentToMiscFeat (SeqFeatPtr sfp, Int4 featdef_to, ConvertFeatureDstOptionsPtr dst_options) +static void GetOrgParseSourcesForBioseq (BioseqPtr bsp, ParseSrcOrgPtr o, TextPortionPtr tp, ValNodePtr PNTR source_list) { - ImpFeatPtr ifp; + SeqDescrPtr sdp; + SeqFeatPtr sfp; + SeqMgrFeatContext fcontext; + SeqMgrDescContext dcontext; - if (sfp == NULL || sfp->data.choice != SEQFEAT_COMMENT || sfp->data.value.ptrvalue != NULL) - { - return FALSE; + if (bsp == NULL || o == NULL || source_list == NULL) return; + + if (o->type == Object_type_constraint_any || o->type == Object_type_constraint_descriptor) { + for (sdp = SeqMgrGetNextDescriptor (bsp, NULL, Seq_descr_source, &dcontext); + sdp != NULL; + sdp = SeqMgrGetNextDescriptor (bsp, sdp, Seq_descr_source, &dcontext)) { + GetOrgParseSourcesForBioSource (sdp->data.ptrvalue, bsp, sdp, NULL, o, tp, source_list); + } } - - ifp = ImpFeatNew (); - if (ifp != NULL) { - ifp->key = StringSave ("misc_feature"); - sfp->data.choice = SEQFEAT_IMP; - sfp->data.value.ptrvalue = (Pointer) ifp; - return TRUE; + + if (o->type == Object_type_constraint_any || o->type == Object_type_constraint_feature) { + for (sfp = SeqMgrGetNextFeature (bsp, NULL, SEQFEAT_BIOSRC, 0, &fcontext); + sfp != NULL; + sfp = SeqMgrGetNextFeature (bsp, sfp, SEQFEAT_BIOSRC, 0, &fcontext)) { + GetOrgParseSourcesForBioSource (sfp->data.value.ptrvalue, bsp, NULL, sfp, o, tp, source_list); + } } - return FALSE; } -static Boolean ConvertGeneToMiscFeat (SeqFeatPtr sfp, Int4 featdef_to, ConvertFeatureDstOptionsPtr dst_options) -{ - return ConvertGeneToMiscFeatFunc (sfp, featdef_to); -} +typedef struct parsesrccollection { + ParseSrcPtr src; + TextPortionPtr portion; + ValNodePtr src_list; +} ParseSrcCollectionData, PNTR ParseSrcCollectionPtr; -static Boolean ConvertRNAToImpFeat (SeqFeatPtr sfp, Int4 featdef_to, ConvertFeatureDstOptionsPtr dst_options) +static void FindParseSourceBioseqCallback (BioseqPtr bsp, Pointer userdata) { - CharPtr product = NULL; - ImpFeatPtr ifp; - Uint1 seqfeattype; - - if (sfp == NULL || sfp->data.choice != SEQFEAT_RNA) { - return FALSE; - } - - seqfeattype = FindFeatFromFeatDefType (featdef_to); - if (seqfeattype != SEQFEAT_IMP) { - return FALSE; + ParseSrcCollectionPtr psp; + + if (bsp == NULL || ISA_aa (bsp->mol) || userdata == NULL) + { + return; } + + psp = (ParseSrcCollectionPtr) userdata; + if (psp->src == NULL) return; - product = GetRNAProductString (sfp, NULL); - - RemoveRNAProductString (sfp, NULL); - - sfp->data.value.ptrvalue = RnaRefFree (sfp->data.value.ptrvalue); - - ifp = ImpFeatNew (); - ifp->key = StringSave (GetImportFeatureName (featdef_to)); - sfp->data.choice = SEQFEAT_IMP; - sfp->data.value.ptrvalue = (Pointer) ifp; - - SetStringValue (&(sfp->comment), product, ExistingTextOption_append_semi); - product = MemFree (product); - return TRUE; + switch (psp->src->choice) + { + case ParseSrc_defline: + if (!ISA_aa (bsp->mol)) { + GetDeflineSourcesForBioseq (bsp, psp->portion, &(psp->src_list)); + } + break; + case ParseSrc_flatfile: + GetFlatFileSourcesForBioseq (bsp, psp->portion, &(psp->src_list)); + break; + case ParseSrc_local_id: + if (! ISA_aa (bsp->mol) && bsp->repr != Seq_repr_seg) { + GetLocalIDSourcesForBioseq (bsp, psp->portion, &(psp->src_list)); + } + break; + case ParseSrc_file_id: + GetNcbiFileSourceForBioseq (bsp, psp->portion, &(psp->src_list)); + break; + case ParseSrc_general_id: + GetGeneralIDSourcesForBioseq (bsp, psp->src->data.ptrvalue, psp->portion, &(psp->src_list)); + break; + case ParseSrc_org: + GetOrgParseSourcesForBioseq (bsp, psp->src->data.ptrvalue, psp->portion, &(psp->src_list)); + break; + case ParseSrc_comment: + GetCommentSourcesForBioseq (bsp, psp->portion, &(psp->src_list)); + break; + case ParseSrc_structured_comment: + GetStructuredCommentSourcesForBioseq(bsp, psp->portion, psp->src->data.ptrvalue, &(psp->src_list)); + break; + case ParseSrc_bankit_comment: + if (!ISA_aa (bsp->mol)) { + GetBankitCommentSourcesForBioseq (bsp, psp->portion, &(psp->src_list)); + } + break; + } } -static Boolean ConvertSiteToImpFeat (SeqFeatPtr sfp, Int4 featdef_to, ConvertFeatureDstOptionsPtr dst_options) +static void GetOrgNamesInRecordCallback (BioSourcePtr biop, Pointer userdata) { - GBQualPtr gbqual; - ImpFeatPtr ifp; - Int2 sitetype; - CharPtr str; - - if (sfp == NULL || sfp->data.choice != SEQFEAT_SITE) - { - return FALSE; - } - - ifp = ImpFeatNew (); - if (NULL == ifp) + ValNodePtr PNTR org_names; + + if (biop == NULL || biop->org == NULL || StringHasNoText (biop->org->taxname) + || userdata == NULL) { - return FALSE; - } - - sitetype = (Int2) sfp->data.value.intvalue; - sfp->data.choice = SEQFEAT_IMP; - sfp->data.value.ptrvalue = (Pointer) ifp; - ifp->key = StringSave (GetImportFeatureName (featdef_to)); - str = GetMacroSiteTypeName (MacroSiteTypeFromAsn1SiteType (sitetype)); - if (str != NULL) { - gbqual = GBQualNew (); - if (gbqual != NULL) { - gbqual->qual = StringSave ("note"); - gbqual->val = StringSave (str); - gbqual->next = sfp->qual; - sfp->qual = gbqual; - } + return; } - return TRUE; + + org_names = (ValNodePtr PNTR) userdata; + + ValNodeAddPointer (org_names, 0, biop->org->taxname); } -static Boolean ConvertProtToRegion (SeqFeatPtr sfp, Int4 featdef_to, ConvertFeatureDstOptionsPtr dst_options) +static void SetToUpper (CharPtr cp) { - ProtRefPtr prp; - ValNodePtr vnp; - CharPtr str; - - if (sfp == NULL || sfp->data.choice != SEQFEAT_PROT) - { - return FALSE; - } - prp = (ProtRefPtr) sfp->data.value.ptrvalue; - if (NULL == prp) - { - return FALSE; - } - - vnp = prp->name; - if (vnp != NULL && vnp->next == NULL) { - str = (CharPtr) vnp->data.ptrvalue; - if (! StringHasNoText (str)) { - vnp->data.ptrvalue = NULL; - sfp->data.value.ptrvalue = ProtRefFree (prp); - sfp->data.choice = SEQFEAT_REGION; - sfp->data.value.ptrvalue = (Pointer) str; + if (cp == NULL) return; + while (*cp != 0) { + if (isalpha (*cp)) { + *cp = toupper (*cp); } + cp++; } - return TRUE; } -static Boolean ConvertRegionToProt (SeqFeatPtr sfp, Int4 featdef_to, ConvertFeatureDstOptionsPtr dst_options) +NLM_EXTERN void +FixCapitalizationInString +(CharPtr PNTR pTitle, + Uint2 capitalization, + ValNodePtr org_names) { - return ConvertRegionToProtFunc (sfp, featdef_to); + if (pTitle == NULL || capitalization == Cap_change_none) return; + + switch (capitalization) { + case Cap_change_tolower: + ResetCapitalization (FALSE, *pTitle); + FixAbbreviationsInElement (pTitle); + FixOrgNamesInString (*pTitle, org_names); + break; + case Cap_change_toupper: + SetToUpper (*pTitle); + FixAbbreviationsInElement (pTitle); + FixOrgNamesInString (*pTitle, org_names); + break; + case Cap_change_firstcap: + ResetCapitalization (TRUE, *pTitle); + FixAbbreviationsInElement (pTitle); + FixOrgNamesInString (*pTitle, org_names); + break; + case Cap_change_firstcaprestnochange: + if (*pTitle != NULL && isalpha (**pTitle)) { + **pTitle = toupper (**pTitle); + } + break; + } } -static Boolean ConvertToBond (SeqFeatPtr sfp, Int4 featdef_to, ConvertFeatureDstOptionsPtr dst_options) +static void AddDeflineDestinationsForBioseq (BioseqPtr bsp, ValNodePtr PNTR dest_list) { - SeqLocPtr slp; - BioseqPtr bsp; - SeqEntryPtr sep; - Boolean no_cds = FALSE; - SeqFeatPtr new_sfp; - SeqIdPtr sip; - SeqBondPtr sbp; - SeqPntPtr spp; + SeqDescrPtr sdp; + SeqMgrDescContext dcontext; - if (sfp == NULL || featdef_to != FEATDEF_BOND || dst_options == NULL || dst_options->choice != ConvertFeatureDstOptions_bond) { - return FALSE; + if (bsp == NULL || dest_list == NULL) { + return; } - - SeqFeatDataFree (&(sfp->data)); - sfp->data.choice = SEQFEAT_BOND; - sfp->data.value.intvalue = Asn1BondTypeFromMacroBondType (dst_options->data.intvalue); - bsp = BioseqFindFromSeqLoc (sfp->location); - - if (!ISA_aa (bsp->mol)) - { - slp = GetProteinLocationForNucleotideFeatureConversion (sfp->location, &no_cds); - if (no_cds || slp == NULL) { - return FALSE; - } - sfp->location = SeqLocFree (sfp->location); - sfp->location = slp; + sdp = SeqMgrGetNextDescriptor (bsp, NULL, Seq_descr_title, &dcontext); + while (sdp != NULL) { + ValNodeAddPointer (dest_list, OBJ_SEQDESC, sdp); + sdp = SeqMgrGetNextDescriptor (bsp, sdp, Seq_descr_title, &dcontext); } +} - if (sfp->location->choice != SEQLOC_BOND) { - sip = SeqLocId (sfp->location); - if (sip != NULL) { - sbp = SeqBondNew (); - if (sbp != NULL) { - slp = ValNodeNew (NULL); - if (slp != NULL) { - slp->choice = SEQLOC_BOND; - slp->data.ptrvalue = (Pointer) sbp; - spp = SeqPntNew (); - if (spp != NULL) { - spp->strand = SeqLocStrand (sfp->location); - spp->id = SeqIdStripLocus (SeqIdDup (SeqIdFindBest (sip, 0))); - spp->point = SeqLocStart (sfp->location); - sbp->a = spp; - } - spp = SeqPntNew (); - if (spp != NULL) { - spp->strand = SeqLocStrand (sfp->location); - spp->id = SeqIdStripLocus (SeqIdDup (SeqIdFindBest (sip, 0))); - spp->point = SeqLocStop (sfp->location); - sbp->b = spp; - } - sfp->location = SeqLocFree (sfp->location); - sfp->location = slp; - } - } - } - } - sfp->idx.subtype = 0; +static ValNodePtr GetFeatureListForNucleotideBioseq (Uint1 featdef, BioseqPtr bsp); +static ValNodePtr GetFeatureListForProteinBioseq (Uint1 featdef, BioseqPtr bsp); - bsp = GetBioseqGivenSeqLoc (slp, sfp->idx.entityID); - if (bsp == NULL) { - return FALSE; - } - sep = SeqMgrGetSeqEntryForData (bsp); - if (sep == NULL) { - return FALSE; - } +static void AddFeatureDestinationsForBioseq (BioseqPtr bsp, FeatureFieldLegalPtr featfield, ValNodePtr PNTR dest_list) +{ + Int4 featdef; - new_sfp = (SeqFeatPtr) AsnIoMemCopy (sfp, (AsnReadFunc) SeqFeatAsnRead, (AsnWriteFunc) SeqFeatAsnWrite); - sfp->idx.deleteme = TRUE; - CreateNewFeature (sep, NULL, SEQFEAT_BOND, new_sfp); + if (bsp == NULL || featfield == NULL || dest_list == NULL) return; + + featdef = GetFeatdefFromFeatureType (featfield->type); + if (ISA_aa (bsp->mol)) { + ValNodeLink (dest_list, GetFeatureListForProteinBioseq (featdef, bsp)); + } else { + ValNodeLink (dest_list, GetFeatureListForNucleotideBioseq (featdef, bsp)); + } - return TRUE; } -static Boolean ConvertToSite (SeqFeatPtr sfp, Int4 featdef_to, ConvertFeatureDstOptionsPtr dst_options) +static void GetBioSourceDestinationsForBioseq (BioseqPtr bsp, Uint2 object_type, ValNodePtr PNTR dest_list) { - SeqLocPtr slp; - BioseqPtr bsp; - SeqEntryPtr sep; - Boolean no_cds = FALSE; - SeqFeatPtr new_sfp; + SeqDescrPtr sdp; + SeqFeatPtr sfp; + SeqMgrFeatContext fcontext; + SeqMgrDescContext dcontext; - if (sfp == NULL || featdef_to != FEATDEF_SITE || dst_options == NULL || dst_options->choice != ConvertFeatureDstOptions_site) { - return FALSE; + if (bsp == NULL || dest_list == NULL) + { + return; } - - SeqFeatDataFree (&(sfp->data)); - sfp->data.choice = SEQFEAT_SITE; - sfp->data.value.intvalue = Asn1SiteTypeFromMacroSiteType (dst_options->data.intvalue); - bsp = BioseqFindFromSeqLoc (sfp->location); - - if (!ISA_aa (bsp->mol)) + if (object_type == Object_type_constraint_any || object_type == Object_type_constraint_descriptor) { - slp = GetProteinLocationForNucleotideFeatureConversion (sfp->location, &no_cds); - if (no_cds || slp == NULL) { - return FALSE; + sdp = SeqMgrGetNextDescriptor (bsp, NULL, Seq_descr_source, &dcontext); + while (sdp != NULL) + { + ValNodeAddPointer (dest_list, OBJ_SEQDESC, sdp); + sdp = SeqMgrGetNextDescriptor (bsp, sdp, Seq_descr_source, &dcontext); } - sfp->location = SeqLocFree (sfp->location); - sfp->location = slp; } + + if (object_type == Object_type_constraint_any || object_type == Object_type_constraint_feature) + { + sfp = SeqMgrGetNextFeature (bsp, NULL, SEQFEAT_BIOSRC, 0, &fcontext); + while (sfp != NULL) + { + ValNodeAddPointer (dest_list, OBJ_SEQFEAT, sfp); + sfp = SeqMgrGetNextFeature (bsp, sfp, SEQFEAT_BIOSRC, 0, &fcontext); + } + } +} - sfp->idx.subtype = 0; - bsp = GetBioseqGivenSeqLoc (slp, sfp->idx.entityID); - if (bsp == NULL) { - return FALSE; - } - sep = SeqMgrGetSeqEntryForData (bsp); - if (sep == NULL) { - return FALSE; - } +static void AddParseDestinations (ParseSourceInfoPtr psip, ParseDestPtr dst) +{ + ParseDstOrgPtr o; - new_sfp = (SeqFeatPtr) AsnIoMemCopy (sfp, (AsnReadFunc) SeqFeatAsnRead, (AsnWriteFunc) SeqFeatAsnWrite); - sfp->idx.deleteme = TRUE; - CreateNewFeature (sep, NULL, SEQFEAT_SITE, new_sfp); + if (psip == NULL || dst == NULL) return; - return TRUE; + switch (dst->choice) { + case ParseDest_defline : + AddDeflineDestinationsForBioseq (psip->bsp, &(psip->dest_list)); + break; + case ParseDest_org : + o = (ParseDstOrgPtr) dst->data.ptrvalue; + if ((o->type == Object_type_constraint_any || o->type == Object_type_constraint_descriptor) + && psip->sdp != NULL && psip->sdp->choice == Seq_descr_source) { + ValNodeAddPointer (&(psip->dest_list), OBJ_SEQDESC, psip->sdp); + } else if ((o->type == Object_type_constraint_any || o->type == Object_type_constraint_feature) + && psip->sfp != NULL && psip->sfp->data.choice == SEQFEAT_BIOSRC) { + ValNodeAddPointer (&(psip->dest_list), OBJ_SEQFEAT, psip->sfp); + } else { + GetBioSourceDestinationsForBioseq (psip->bsp, o->type, &(psip->dest_list)); + } + break; + case ParseDest_featqual : + AddFeatureDestinationsForBioseq (psip->bsp, dst->data.ptrvalue, &(psip->dest_list)); + break; + case ParseDest_comment_descriptor : + AddCommentDescriptorDestinationsForBioseq (psip->bsp, &(psip->dest_list)); + break; + case ParseDest_dbxref : + GetBioSourceDestinationsForBioseq (psip->bsp, Object_type_constraint_any, &(psip->dest_list)); + break; + } } -static Boolean ConvertToRegion (SeqFeatPtr sfp, Int4 featdef_to, ConvertFeatureDstOptionsPtr dst_options) +static Boolean SourceHasOneUndeletedDestination (ParseSourceInfoPtr source) { - BioseqPtr bsp; - RegionTypePtr r; - Boolean create_prot_feats, no_cds = FALSE; - SeqLocPtr slp; - SeqEntryPtr sep; - SeqFeatPtr new_sfp; - - if (sfp == NULL || featdef_to != FEATDEF_REGION || dst_options == NULL || dst_options->choice != ConvertFeatureDstOptions_region || dst_options->data.ptrvalue == NULL) { + Int4 num_seen = 0; + ValNodePtr vnp; + + if (source == NULL + || source->dest_list == NULL) + { return FALSE; } - - r = (RegionTypePtr) dst_options->data.ptrvalue; - create_prot_feats = !r->create_nucleotide; - bsp = BioseqFindFromSeqLoc (sfp->location); - if (bsp == NULL) return FALSE; - - if (ISA_aa (bsp->mol)) + vnp = source->dest_list; + while (vnp != NULL && num_seen < 2) { - if (create_prot_feats) - { - slp = (SeqLocPtr) AsnIoMemCopy (sfp->location, (AsnReadFunc) SeqLocAsnRead, (AsnWriteFunc) SeqLocAsnWrite); - } - else + if (vnp->choice > 1) { - slp = FindNucleotideLocationForProteinFeatureConversion (sfp->location); + num_seen ++; } - sfp->location = SeqLocFree (sfp->location); - sfp->location = slp; + vnp = vnp->next; } - else if (create_prot_feats) + if (num_seen == 1) { - slp = GetProteinLocationForNucleotideFeatureConversion (sfp->location, &no_cds); - if (no_cds) { - return FALSE; - } - sfp->location = SeqLocFree (sfp->location); - sfp->location = slp; + return TRUE; } - - bsp = GetBioseqGivenSeqLoc (sfp->location, sfp->idx.entityID); - if (bsp == NULL) { - return FALSE; - } - - sep = SeqMgrGetSeqEntryForData (bsp); - if (sep == NULL) { + else + { return FALSE; } - - SeqFeatDataFree (&(sfp->data)); - sfp->data.choice = SEQFEAT_REGION; - sfp->data.value.ptrvalue = sfp->comment; - sfp->comment = NULL; - - new_sfp = (SeqFeatPtr) AsnIoMemCopy (sfp, (AsnReadFunc) SeqFeatAsnRead, (AsnWriteFunc) SeqFeatAsnWrite); - sfp->idx.deleteme = TRUE; - CreateNewFeature (sep, NULL, SEQFEAT_REGION, new_sfp); - return TRUE; } -static Boolean ConvertRNAToRNA (SeqFeatPtr sfp, Int4 featdef_to, ConvertFeatureDstOptionsPtr dst_options) +static void CombineSourcesForDestinations (ValNodePtr PNTR source_list) { - RnaRefPtr rrp; - Boolean add_to_comment = FALSE; - CharPtr product; - - rrp = (RnaRefPtr) sfp->data.value.ptrvalue; - if (NULL == rrp) { - return FALSE; - } - - product = GetRNAProductString (sfp, NULL); + ValNodePtr source1_vnp, source2_vnp, dest1_vnp, dest2_vnp; + ValNodePtr source_new, del_vnp; + ParseSourceInfoPtr psip1, psip2, new_psip; + CharPtr comb_txt; - RemoveRNAProductString (sfp, NULL); - - sfp->data.value.ptrvalue = RnaRefFree (sfp->data.value.ptrvalue); - - sfp->data.value.ptrvalue = RnaRefFromLabel (featdef_to, product, &add_to_comment); - - SetRNAProductString (sfp, NULL, product, ExistingTextOption_replace_old); - if (add_to_comment) { - SetStringValue (&(sfp->comment), product, ExistingTextOption_append_semi); - } - product = MemFree (product); - - /* apply destination options */ - ApplyRNADestinationOptions (sfp, featdef_to, dst_options); - - sfp->idx.subtype = 0; - return TRUE; -} - - -static Boolean MiscFeatToCodingRegionConvertFunc (SeqFeatPtr sfp, Int4 featdef_to, ConvertFeatureDstOptionsPtr dst_options) -{ - return ConvertMiscFeatToCodingRegion (sfp); -} - - -typedef struct convertfeattable { - Uint2 seqfeat_from; - Uint2 featdef_from; - Uint2 seqfeat_to; - Uint2 featdef_to; - ConvertFeatureFunc func; - CharPtr help_text; -} ConvertFeatTableData, PNTR ConvertFeatTablePtr; - -static ConvertFeatTableData conversion_functions[] = { - { SEQFEAT_CDREGION, FEATDEF_CDS, SEQFEAT_RNA, FEATDEF_ANY, - ConvertCDSToRNAFunc, - "Delete protein product sequence.\nClear product field if transcript ID removal was requested.\nIf converting to tRNA and anticodon value can be parsed from label, set aa value, and add any text that could not be parsed into an anticodon value to the feature note.\nIf converting to other RNA, put label in RNA product." }, - { SEQFEAT_GENE, FEATDEF_GENE, SEQFEAT_RNA, FEATDEF_ANY, - ConvertGeneToRNAFunc, - "If converting to tRNA and anticodon value can be parsed from label, set aa value, and add any text that could not be parsed into an anticodon value to the feature note. If converting to other RNA, put label in RNA product. Also append gene locus, allele, description, map location, and locus tag to comment (as long as these values are not already in the label and therefore in the RNA product)." }, - { SEQFEAT_BIOSRC, FEATDEF_BIOSRC, SEQFEAT_IMP, FEATDEF_repeat_region, - ConvertBioSrcToRegionFunc, - "Creates a repeat_region with mobile_element qualifiers for the transposon and/or insertion sequence qualifiers on the BioSource. All other BioSource information is discarded." }, - { SEQFEAT_CDREGION, FEATDEF_CDS, SEQFEAT_IMP, FEATDEF_misc_feature, - ConvertCDSToMiscFeatFunc, - "Copy comment from coding region to new misc_feature and remove product field. If not pseudo coding region, add product name from protein feature to new misc_feature comment and delete product sequence." }, - { SEQFEAT_IMP, FEATDEF_ANY, SEQFEAT_PROT, FEATDEF_ANY, - ConvertImpToProtFuncEx, - "Original feature must be on nucleotide sequence and be contained in coding region location. Coding region must have product protein sequence. New feature is created on product protein sequence so that the translated location will be as close as possible to the original nucleotide location (may not be exact because of codon boundaries)." }, - { SEQFEAT_PROT, FEATDEF_mat_peptide_aa, SEQFEAT_IMP, FEATDEF_ANY, - ConvertProtToImpFuncEx, - "Original feature must be on a protein sequence that is a product of a coding region.\nNew feature will be created on same sequence as coding region.\n" - "If protein feature has name, this will be saved as /product qualifier on new feature.\nIf protein feature does not have name but does have description, this will be saved as /product qualifier on new feature.\n" - "EC_number values from the protein feature will be saved as /EC_number qualifiers on the new feature.\nActivity values will be saved as /function qualifiers on the new feature.\n" - "Db_xref values from the protein feature will be saved as /db_xref qualifers on the new feature." }, - { SEQFEAT_PROT, FEATDEF_sig_peptide_aa, SEQFEAT_IMP, FEATDEF_ANY, - ConvertProtToImpFuncEx, - "Original feature must be on a protein sequence that is a product of a coding region.\nNew feature will be created on same sequence as coding region.\n" - "If protein feature has name, this will be saved as /product qualifier on new feature.\nIf protein feature does not have name but does have description, this will be saved as /product qualifier on new feature.\n" - "EC_number values from the protein feature will be saved as /EC_number qualifiers on the new feature.\nActivity values will be saved as /function qualifiers on the new feature.\n" - "Db_xref values from the protein feature will be saved as /db_xref qualifers on the new feature." }, - { SEQFEAT_PROT, FEATDEF_transit_peptide_aa, SEQFEAT_IMP, FEATDEF_ANY, - ConvertProtToImpFuncEx, - "Original feature must be on a protein sequence that is a product of a coding region.\nNew feature will be created on same sequence as coding region.\n" - "If protein feature has name, this will be saved as /product qualifier on new feature.\nIf protein feature does not have name but does have description, this will be saved as /product qualifier on new feature.\n" - "EC_number values from the protein feature will be saved as /EC_number qualifiers on the new feature.\nActivity values will be saved as /function qualifiers on the new feature.\n" - "Db_xref values from the protein feature will be saved as /db_xref qualifers on the new feature." }, - { SEQFEAT_IMP, FEATDEF_ANY, SEQFEAT_RNA, FEATDEF_ANY, - ConvertImpToRNAFunc, - "Creates an RNA feature of the specified subtype. Import feature key is discarded." }, - { SEQFEAT_IMP, FEATDEF_misc_feature, SEQFEAT_CDREGION, FEATDEF_CDS, - MiscFeatToCodingRegionConvertFunc, - "Use misc_feature comment for coding region product name." }, - { SEQFEAT_REGION, FEATDEF_REGION, SEQFEAT_IMP, FEATDEF_ANY, - ConvertRegionToImp, - "Creates a misc_feature with the region name saved as a /note qualifier." }, - { SEQFEAT_REGION, FEATDEF_REGION, SEQFEAT_RNA, FEATDEF_ANY, - ConvertRegionToRNA, - "Creates an RNA feature with the region name as the product name." }, - { SEQFEAT_COMMENT, FEATDEF_ANY, SEQFEAT_IMP, FEATDEF_misc_feature, - ConvertCommentToMiscFeat, - "Creates a misc_feature with the same note as the original. Note - the flatfile display for the feature is the same." }, - { SEQFEAT_GENE, FEATDEF_GENE, SEQFEAT_IMP, FEATDEF_misc_feature, - ConvertGeneToMiscFeat, - "Creates a misc_feature with the gene description and locus prepended to the original comment, separated by semicolons." }, - { SEQFEAT_RNA, FEATDEF_ANY, SEQFEAT_IMP, FEATDEF_ANY, - ConvertRNAToImpFeat, - "Creates an import feature of the specified subtype and adds the RNA product name to the comment." } , - { SEQFEAT_SITE, FEATDEF_ANY, SEQFEAT_IMP, FEATDEF_ANY, - ConvertSiteToImpFeat, - "Creates an import feature of the specified subtype with the site type name as a /note qualifier." } , - { SEQFEAT_PROT, FEATDEF_mat_peptide_aa, SEQFEAT_REGION, FEATDEF_REGION, - NULL, - "Creates a Region feature with the protein name as the region name." }, - { SEQFEAT_PROT, FEATDEF_ANY, SEQFEAT_REGION, FEATDEF_REGION, - ConvertProtToRegion, - "Creates a Region feature with the protein name as the region name." }, - { SEQFEAT_REGION, FEATDEF_REGION, SEQFEAT_PROT, FEATDEF_ANY, - ConvertRegionToProt, - "If feature is on nucleotide sequence, will create feature on protein product sequence for overlapping coding region. Protein name will be region name." }, - { 0, FEATDEF_ANY, SEQFEAT_BOND, FEATDEF_BOND, - ConvertToBond, - "Create Bond feature with specified bond type. Location is a SeqLocBond with a point at the start of the original location and a point at the end of the original location. All feature ID, partialness, except, comment, product, location, genbank qualifiers, title, citation, experimental evidence, gene xrefs, db xrefs, and pseudo-ness information is discarded." }, - { 0, FEATDEF_ANY, SEQFEAT_SITE, FEATDEF_SITE, + for (source1_vnp = *source_list; + source1_vnp != NULL; + source1_vnp = source1_vnp->next) + { + psip1 = (ParseSourceInfoPtr) source1_vnp->data.ptrvalue; + if (psip1 == NULL || psip1->dest_list == NULL) + { + continue; + } + for (source2_vnp = source1_vnp->next; + source2_vnp != NULL; + source2_vnp = source2_vnp->next) + { + if (source2_vnp->choice > 0) + { + /* already marked for deletion */ + continue; + } + psip2 = (ParseSourceInfoPtr) source2_vnp->data.ptrvalue; + if (psip2 == NULL || psip2->dest_list == NULL) + { + continue; + } + for (dest1_vnp = psip1->dest_list; + dest1_vnp != NULL; + dest1_vnp = dest1_vnp->next) + { + if (dest1_vnp->choice == 0) + { + /* already marked for deletion */ + continue; + } + for (dest2_vnp = psip2->dest_list; + dest2_vnp != NULL; + dest2_vnp = dest2_vnp->next) + { + if (dest2_vnp->choice == 0) + { + /* already marked for deletion */ + continue; + } + if (dest1_vnp->choice == dest2_vnp->choice + && dest1_vnp->data.ptrvalue == dest2_vnp->data.ptrvalue) + { + comb_txt = (CharPtr) (MemNew (sizeof (Char) + * (StringLen (psip1->parse_src_txt) + + StringLen (psip2->parse_src_txt) + + 2))); + StringCpy (comb_txt, psip1->parse_src_txt); + StringCat (comb_txt, ";"); + StringCat (comb_txt, psip2->parse_src_txt); + + /* If the first source has a single destination, then we can + * add the text from the second source to the first and remove + * the destination from the second source. + */ + if (SourceHasOneUndeletedDestination (psip1)) + { + + psip1->parse_src_txt = MemFree (psip1->parse_src_txt); + psip1->parse_src_txt = comb_txt; + dest2_vnp->choice = 0; + } + /* If the first source has more than one destination and + * the second source has a single destination, then we can + * remove the repeated desination from the first source + * and add the text from the first source to the second source. + */ + else if (SourceHasOneUndeletedDestination (psip2)) + { + psip2->parse_src_txt = MemFree (psip2->parse_src_txt); + psip2->parse_src_txt = comb_txt; + dest1_vnp->choice = 0; + } + /* If the first and second sources have multiple destinations, + * we need to remove the repeated destination from both the first + * and second source and create a new source with the combined + * text for just the repeated destination. + */ + else + { + new_psip = ParseSourceInfoNew (NULL, NULL, NULL, NULL, comb_txt); + ValNodeAddPointer (&(new_psip->dest_list), + dest1_vnp->choice, + dest1_vnp->data.ptrvalue); + dest1_vnp->choice = 0; + dest2_vnp->choice = 0; + source_new = ValNodeNew (NULL); + source_new->choice = 0; + source_new->data.ptrvalue = new_psip; + source_new->next = source1_vnp->next; + source1_vnp->next = source_new; + } + } + } + } + + del_vnp = ValNodeExtractList (&(psip1->dest_list), 0); + del_vnp = ValNodeFree (del_vnp); + if (psip1->dest_list == NULL) + { + source1_vnp->choice = 1; + } + del_vnp = ValNodeExtractList (&(psip2->dest_list), 0); + del_vnp = ValNodeFree (del_vnp); + if (psip2->dest_list == NULL) + { + source2_vnp->choice = 1; + } + } + } + + /* now remove sources deleted */ + del_vnp = ValNodeExtractList (source_list, 1); + del_vnp = ParseSourceListFree (del_vnp); +} + + +static BioseqSetPtr GetPartsForSourceDescriptorOnSegSet (SeqDescrPtr sdp) +{ + ObjValNodePtr ovp; + BioseqSetPtr bssp; + SeqEntryPtr sep; + + if (sdp == NULL || sdp->extended != 1) { + return NULL; + } + ovp = (ObjValNodePtr) sdp; + if (ovp->idx.parenttype != OBJ_BIOSEQSET || ovp->idx.parentptr == NULL) { + return NULL; + } + bssp = (BioseqSetPtr) ovp->idx.parentptr; + + if (bssp->_class == BioseqseqSet_class_nuc_prot + && IS_Bioseq_set (bssp->seq_set) + && bssp->seq_set->data.ptrvalue != NULL) { + bssp = (BioseqSetPtr) bssp->seq_set->data.ptrvalue; + } + + if (bssp->_class == BioseqseqSet_class_segset) { + sep = bssp->seq_set; + while (sep != NULL) { + if (IS_Bioseq_set (sep) && sep->data.ptrvalue != NULL) { + bssp = (BioseqSetPtr) sep->data.ptrvalue; + if (bssp->_class == BioseqseqSet_class_parts) { + return bssp; + } + } + sep = sep->next; + } + } + + return NULL; +} + + +static SeqDescrPtr FindSourceDescriptorInSeqEntry (SeqEntryPtr sep) +{ + BioseqPtr bsp; + BioseqSetPtr bssp; + SeqDescrPtr sdp = NULL; + + if (sep != NULL && sep->data.ptrvalue != NULL) { + if (IS_Bioseq (sep)) { + bsp = (BioseqPtr) sep->data.ptrvalue; + sdp = bsp->descr; + } else if (IS_Bioseq_set (sep)) { + bssp = (BioseqSetPtr) sep->data.ptrvalue; + sdp = bssp->descr; + } + while (sdp != NULL && sdp->choice != Seq_descr_source) + { + sdp = sdp->next; + } + } + return sdp; +} + + +static SeqDescrPtr PropagateToSeqEntry (SeqEntryPtr sep, SeqDescrPtr sdp) +{ + BioseqPtr bsp; + BioseqSetPtr bssp; + SeqDescrPtr new_sdp = NULL; + + if (sep != NULL && sep->data.ptrvalue != NULL) { + if (IS_Bioseq (sep)) { + bsp = (BioseqPtr) sep->data.ptrvalue; + new_sdp = AsnIoMemCopy ((Pointer) sdp, + (AsnReadFunc) SeqDescrAsnRead, + (AsnWriteFunc) SeqDescrAsnWrite); + ValNodeLink (&(bsp->descr), new_sdp); + } else if (IS_Bioseq_set (sep)) { + bssp = (BioseqSetPtr) sep->data.ptrvalue; + new_sdp = AsnIoMemCopy ((Pointer) sdp, + (AsnReadFunc) SeqDescrAsnRead, + (AsnWriteFunc) SeqDescrAsnWrite); + ValNodeLink (&(bssp->descr), new_sdp); + } + } + return new_sdp; +} + + +static void PropagateSourceOnSegSetForParse (ValNodePtr parse_source_list) +{ + ParseSourceInfoPtr psip; + ValNodePtr vnp_src, vnp_dst; + SeqDescrPtr sdp, other_sdp; + SeqEntryPtr sep; + ValNodePtr extra_dests = NULL; + BioseqSetPtr parts_bssp; + + for (vnp_src = parse_source_list; vnp_src != NULL; vnp_src = vnp_src->next) { + psip = (ParseSourceInfoPtr) vnp_src->data.ptrvalue; + if (psip != NULL) { + for (vnp_dst = psip->dest_list; vnp_dst != NULL; vnp_dst = vnp_dst->next) { + if (vnp_dst->choice == OBJ_SEQDESC) { + sdp = (SeqDescrPtr) vnp_dst->data.ptrvalue; + if (sdp != NULL && sdp->choice == Seq_descr_source) { + parts_bssp = GetPartsForSourceDescriptorOnSegSet (sdp); + if (parts_bssp != NULL) { + for (sep = parts_bssp->seq_set; sep != NULL; sep = sep->next) { + if (IS_Bioseq(sep) && sep->data.ptrvalue == psip->bsp) { + other_sdp = FindSourceDescriptorInSeqEntry (sep); + if (other_sdp == NULL) { + other_sdp = PropagateToSeqEntry (sep, sdp); + ValNodeAddPointer (&extra_dests, OBJ_SEQDESC, other_sdp); + } + } + } + + /* set choice to 0 so master won't be a destination */ + vnp_dst->choice = 0; + + } + } + } + } + /* add extra destinations to list */ + ValNodeLink (&psip->dest_list, extra_dests); + extra_dests = NULL; + } + } + +} + + + +NLM_EXTERN CharPtr GetDBxrefFromBioSource (BioSourcePtr biop, CharPtr db_name) +{ + CharPtr rval = NULL; + ValNodePtr vnp; + DbtagPtr dbtag; + + if (biop == NULL || biop->org == NULL || StringHasNoText (db_name)) { + return NULL; + } + for (vnp = biop->org->db; vnp != NULL && rval == NULL; vnp = vnp->next) { + dbtag = (DbtagPtr) vnp->data.ptrvalue; + if (dbtag != NULL && StringCmp (db_name, dbtag->db) == 0) { + rval = GetObjectIdString (dbtag->tag); + } + } + return rval; +} + + +NLM_EXTERN Boolean SetDBxrefForBioSource (BioSourcePtr biop, CharPtr db_name, CharPtr str, Uint2 existing_text) +{ + ValNodePtr dbx; + DbtagPtr dbtag; + Boolean found = FALSE; + Char buf[20]; + Boolean rval = FALSE; + + if (biop == NULL || StringHasNoText (db_name) || StringHasNoText (str)) { + return FALSE; + } + + if (biop->org == NULL) + { + biop->org = OrgRefNew(); + } + dbx = biop->org->db; + while (dbx != NULL && !found) + { + dbtag = (DbtagPtr) dbx->data.ptrvalue; + if (dbtag != NULL && dbtag->tag != NULL + && StringCmp (dbtag->db, db_name) == 0) + { + found = TRUE; + } + if (!found) + { + dbx = dbx->next; + } + } + if (!found) + { + dbtag = DbtagNew(); + dbtag->db = StringSave (db_name); + ValNodeAddPointer (&(biop->org->db), 0, dbtag); + } + if (dbtag->tag == NULL) + { + dbtag->tag = ObjectIdNew(); + } + /* if it was a number before, make it a string now */ + if (dbtag->tag->id > 0 && dbtag->tag->str == NULL) + { + sprintf (buf, "%d", dbtag->tag->id); + dbtag->tag->id = 0; + dbtag->tag->str = StringSave (buf); + } + rval = SetStringValue (&(dbtag->tag->str), str, existing_text); + return rval; +} + + +NLM_EXTERN Boolean RemoveDBxrefForBioSource (BioSourcePtr biop, CharPtr db_name, StringConstraintPtr scp) +{ + ValNodePtr dbx, prev = NULL, dbx_next; + DbtagPtr dbtag; + CharPtr str; + Boolean found = FALSE; + + if (biop == NULL || StringHasNoText (db_name)) { + return FALSE; + } + + if (biop->org == NULL) + { + biop->org = OrgRefNew(); + } + dbx = biop->org->db; + for (dbx = biop->org->db; dbx != NULL; dbx = dbx_next) + { + dbx_next = dbx->next; + dbtag = (DbtagPtr) dbx->data.ptrvalue; + str = NULL; + if (dbtag != NULL && dbtag->tag != NULL + && StringCmp (dbtag->db, db_name) == 0 + && (scp == NULL || ((str = GetDbtagString(dbtag)) != NULL && DoesStringMatchConstraint (str, scp)))) + { + if (prev == NULL) { + biop->org->db = dbx->next; + } else { + prev->next = dbx->next; + } + dbx->data.ptrvalue = DbtagFree (dbx->data.ptrvalue); + dbx = ValNodeFree (dbx); + found = TRUE; + } + else + { + prev = dbx; + } + str = MemFree (str); + } + return found; +} + + +static Int4 SetFieldForDestList (ValNodePtr dest_list, ParseDestPtr field, CharPtr str, Uint2 existing_text) +{ + ValNodePtr vnp; + SeqDescrPtr sdp; + ObjValNodePtr ovp; + CharPtr cp; + BioSourcePtr biop; + ParseDstOrgPtr o; + FeatureFieldLegalPtr fl; + FeatureField f; + Boolean was_empty; + Int4 num_succeeded = 0; + + if (dest_list == NULL || field == NULL) return 0; + + switch (field->choice) { + case ParseDest_defline : + for (vnp = dest_list; vnp != NULL; vnp = vnp->next) { + if (vnp->choice == OBJ_SEQDESC && vnp->data.ptrvalue != NULL) { + sdp = (SeqDescrPtr) vnp->data.ptrvalue; + if (sdp->choice == Seq_descr_title) { + cp = sdp->data.ptrvalue; + if (SetStringValue (&cp, str, existing_text)) { + num_succeeded++; + } + sdp->data.ptrvalue = cp; + } + } + } + break; + case ParseDest_org : + o = (ParseDstOrgPtr) field->data.ptrvalue; + if (o != NULL) { + for (vnp = dest_list; vnp != NULL; vnp = vnp->next) { + biop = GetBioSourceFromObject (vnp->choice, vnp->data.ptrvalue); + if (SetSourceQualInBioSource (biop, o->field, NULL, str, existing_text)) { + num_succeeded++; + } + } + } + break; + case ParseDest_featqual: + fl = (FeatureFieldLegalPtr) field->data.ptrvalue; + if (fl != NULL) { + f.type = fl->type; + f.field = ValNodeNew(NULL); + f.field->next = NULL; + f.field->choice = FeatQualChoice_legal_qual; + f.field->data.intvalue = fl->field; + for (vnp = dest_list; vnp != NULL; vnp = vnp->next) { + if (SetQualOnFeature (vnp->data.ptrvalue, &f, NULL, str, existing_text)) { + num_succeeded++; + } + } + f.field = ValNodeFree (f.field); + } + break; + case ParseDest_comment_descriptor: + for (vnp = dest_list; vnp != NULL; vnp = vnp->next) { + sdp = vnp->data.ptrvalue; + if (StringHasNoText (sdp->data.ptrvalue)) { + was_empty = TRUE; + } else { + was_empty = FALSE; + } + cp = sdp->data.ptrvalue; + if (SetStringValue (&cp, str, existing_text)) { + num_succeeded++; + } + sdp->data.ptrvalue = cp; + if (was_empty) { + ovp = (ObjValNodePtr) sdp; + ovp->idx.deleteme = FALSE; + } + } + break; + case ParseDest_dbxref: + if (!StringHasNoText (field->data.ptrvalue)) { + for (vnp = dest_list; vnp != NULL; vnp = vnp->next) { + biop = GetBioSourceFromObject (vnp->choice, vnp->data.ptrvalue); + if (SetDBxrefForBioSource (biop, field->data.ptrvalue, str, existing_text)) { + num_succeeded++; + } + } + } + break; + } + return num_succeeded; +} + + + +static void AddToSampleForDestList (AECRSamplePtr sample, ValNodePtr dest_list, ParseDestPtr field) +{ + ValNodePtr vnp; + SeqDescrPtr sdp; + BioSourcePtr biop; + ParseDstOrgPtr o; + FeatureFieldLegalPtr fl; + FeatureField f; + + if (dest_list == NULL || field == NULL || sample == NULL) return; + + switch (field->choice) { + case ParseDest_defline : + for (vnp = dest_list; vnp != NULL; vnp = vnp->next) { + if (vnp->choice == OBJ_SEQDESC && vnp->data.ptrvalue != NULL) { + sdp = (SeqDescrPtr) vnp->data.ptrvalue; + if (sdp->choice == Seq_descr_title) { + AddTextToAECRSample (sample, StringSave (sdp->data.ptrvalue)); + } + } + } + break; + case ParseDest_org : + o = (ParseDstOrgPtr) field->data.ptrvalue; + if (o != NULL) { + for (vnp = dest_list; vnp != NULL; vnp = vnp->next) { + biop = GetBioSourceFromObject (vnp->choice, vnp->data.ptrvalue); + AddTextToAECRSample (sample, GetSourceQualFromBioSource (biop, o->field, NULL)); + } + } + break; + case ParseDest_featqual: + fl = (FeatureFieldLegalPtr) field->data.ptrvalue; + if (fl != NULL) { + f.type = fl->type; + f.field = ValNodeNew(NULL); + f.field->next = NULL; + f.field->choice = FeatQualChoice_legal_qual; + f.field->data.intvalue = fl->field; + for (vnp = dest_list; vnp != NULL; vnp = vnp->next) { + AddTextToAECRSample (sample, GetQualFromFeature (vnp->data.ptrvalue, &f, NULL)); + } + f.field = ValNodeFree (f.field); + } + break; + case ParseDest_comment_descriptor: + for (vnp = dest_list; vnp != NULL; vnp = vnp->next) { + sdp = (SeqDescrPtr) vnp->data.ptrvalue; + AddTextToAECRSample (sample, StringSave (sdp->data.ptrvalue)); + } + break; + case ParseDest_dbxref: + if (!StringHasNoText (field->data.ptrvalue)) { + for (vnp = dest_list; vnp != NULL; vnp = vnp->next) { + biop = GetBioSourceFromObject (vnp->choice, vnp->data.ptrvalue); + AddTextToAECRSample (sample, GetDBxrefFromBioSource (biop, field->data.ptrvalue)); + } + } + break; + } +} + + +static void StripFieldForSrcList (ParseSourceInfoPtr psip, ParseSrcPtr field, TextPortionPtr text_portion) +{ + CharPtr str; + ParseSrcOrgPtr o; + BioSourcePtr biop; + + if (psip == NULL || field == NULL || text_portion == NULL) return; + + switch (field->choice) { + case ParseSrc_defline : + if (psip->sdp != NULL && psip->sdp->choice == Seq_descr_title) { + ReplaceStringForParse (psip->sdp->data.ptrvalue, text_portion); + } + break; + case ParseSrc_org : + o = (ParseSrcOrgPtr) field->data.ptrvalue; + if (o != NULL) { + if (psip->sdp != NULL && psip->sdp->choice == Seq_descr_source) { + biop = (BioSourcePtr) psip->sdp->data.ptrvalue; + str = GetSourceQualFromBioSource (biop, o->field, NULL); + ReplaceStringForParse (str, text_portion); + SetSourceQualInBioSource (biop, o->field, NULL, str, ExistingTextOption_replace_old); + str = MemFree (str); + } else if (psip->sfp != NULL && psip->sfp->data.choice == SEQFEAT_BIOSRC) { + biop = (BioSourcePtr) psip->sfp->data.value.ptrvalue; + str = GetSourceQualFromBioSource (biop, o->field, NULL); + ReplaceStringForParse (str, text_portion); + SetSourceQualInBioSource (biop, o->field, NULL, str, ExistingTextOption_replace_old); + str = MemFree (str); + } + } + break; + case ParseSrc_comment: + if (psip->sdp != NULL) { + if (psip->sdp->choice == Seq_descr_user) { + StripBankitCommentForParse (psip->sdp, text_portion); + } else if (psip->sdp->choice == Seq_descr_comment) { + ReplaceStringForParse (psip->sdp->data.ptrvalue, text_portion); + } + } + if (psip->sfp != NULL && psip->sfp->data.choice == SEQFEAT_COMMENT) { + ReplaceStringForParse (psip->sfp->data.value.ptrvalue, text_portion); + } + break; + case ParseSrc_bankit_comment: + if (psip->sdp != NULL && psip->sdp->choice == Seq_descr_user) { + StripBankitCommentForParse (psip->sdp, text_portion); + } + break; + case ParseSrc_structured_comment: + if (psip->sdp != NULL && psip->sdp->choice == Seq_descr_user) { + StripStructuredCommentForParse (psip->sdp, field->data.ptrvalue, text_portion); + } + break; + } +} + + + +NLM_EXTERN AECRSamplePtr GetExistingTextForParseAction (ParseActionPtr action, SeqEntryPtr sep) +{ + ParseSrcCollectionData psd; + ParseSourceInfoPtr psip; + ValNodePtr vnp; + ValNodePtr dest_list = NULL; + AECRSamplePtr sample; + + if (action == NULL || sep == NULL) return 0; + + psd.src = action->src; + psd.portion = action->portion; + psd.src_list = NULL; + + /* first, we need to get a list of the parse sources */ + VisitBioseqsInSep (sep, &psd, FindParseSourceBioseqCallback); + + + /* for each parse source, get a list of the destinations */ + for (vnp = psd.src_list; vnp != NULL; vnp = vnp->next) + { + if (vnp->data.ptrvalue == NULL) continue; + psip = (ParseSourceInfoPtr) vnp->data.ptrvalue; + + /* find destinations */ + AddParseDestinations (psip, action->dest); + + /* add destinations to list */ + ValNodeLink (&dest_list, psip->dest_list); + psip->dest_list = NULL; + } + + psd.src_list = ParseSourceListFree (psd.src_list); + + /* get sample for dest_list */ + sample = AECRSampleNew (); + AddToSampleForDestList (sample, dest_list, action->dest); + dest_list = ValNodeFree (dest_list); + return sample; +} + + +static Int4 ApplyParseActionToSeqEntry (ParseActionPtr action, SeqEntryPtr sep) +{ + ParseSrcCollectionData psd; + ParseSourceInfoPtr psip; + ValNodePtr orgnames = NULL, source_list_for_removal = NULL, vnp; + Int4 num_succeeded = 0; + + if (action == NULL || sep == NULL) return 0; + + psd.src = action->src; + psd.portion = action->portion; + psd.src_list = NULL; + + /* first, we need to get a list of the parse sources */ + VisitBioseqsInSep (sep, &psd, FindParseSourceBioseqCallback); + + if (action->capitalization != Cap_change_none) { + /* if we will be fixing capitalization, get org names to use in fixes */ + VisitBioSourcesInSep (sep, &orgnames, GetOrgNamesInRecordCallback); + } + + /* for each parse source, we need to get a list of the destinations */ + for (vnp = psd.src_list; vnp != NULL; vnp = vnp->next) + { + if (vnp->data.ptrvalue == NULL) continue; + psip = (ParseSourceInfoPtr) vnp->data.ptrvalue; + if (action->remove_from_parsed) { + ValNodeAddPointer (&source_list_for_removal, 0, ParseSourceInfoCopy (psip)); + } + /* fix source text */ + FixCapitalizationInString (&(psip->parse_src_txt), action->capitalization, orgnames); + ApplyTextTransformsToString (&(psip->parse_src_txt), action->transform); + + /* find destinations */ + AddParseDestinations (psip, action->dest); + + } + + /* free orgname list if we created it */ + orgnames = ValNodeFree (orgnames); + + CombineSourcesForDestinations (&(psd.src_list)); + + if (action->dest->choice == ParseDest_org) { + PropagateSourceOnSegSetForParse (psd.src_list); + } + + /* now do the parsing */ + for (vnp = psd.src_list; vnp != NULL; vnp = vnp->next) { + psip = (ParseSourceInfoPtr) vnp->data.ptrvalue; + num_succeeded += SetFieldForDestList (psip->dest_list, action->dest, psip->parse_src_txt, action->existing_text); + } + + /* now remove strings from sources */ + for (vnp = source_list_for_removal; vnp != NULL; vnp = vnp->next) + { + if (vnp->data.ptrvalue == NULL) continue; + psip = (ParseSourceInfoPtr) vnp->data.ptrvalue; + StripFieldForSrcList (psip, action->src, action->portion); + } + + psd.src_list = ParseSourceListFree (psd.src_list); + return num_succeeded; +} + + +static void SetCdRegionGeneticCode (SeqFeatPtr cds) +{ + CdRegionPtr crp; + SeqEntryPtr parent_sep; + BioseqPtr bsp; + Int4 genCode; + ValNodePtr code, vnp; + + if (cds == NULL || cds->data.choice != SEQFEAT_CDREGION) return; + if (cds->data.value.ptrvalue == NULL) { + cds->data.value.ptrvalue = CdRegionNew(); + } + crp = (CdRegionPtr) cds->data.value.ptrvalue; + bsp = BioseqFindFromSeqLoc (cds->location); + if (bsp == NULL) return; + parent_sep = GetBestTopParentForData (bsp->idx.entityID, bsp); + genCode = SeqEntryToGeneticCode (parent_sep, NULL, NULL, 0); + + code = ValNodeNew (NULL); + if (code != NULL) { + code->choice = 254; + vnp = ValNodeNew (NULL); + code->data.ptrvalue = vnp; + if (vnp != NULL) { + vnp->choice = 2; + vnp->data.intvalue = genCode; + } + } + crp->genetic_code = code; +} + + +static void CreateDataForFeature (SeqFeatPtr sfp, Int4 feature_type) +{ + Int4 featdef, seqfeattype; + CharPtr label = NULL; + RnaRefPtr rrp; + RNAGenPtr rgp; + ImpFeatPtr ifp; + + featdef = GetFeatdefFromFeatureType (feature_type); + sfp->idx.subtype = featdef; + seqfeattype = FindFeatFromFeatDefType (featdef); + switch (seqfeattype) { + case SEQFEAT_GENE: + sfp->data.value.ptrvalue = GeneRefNew(); + break; + case SEQFEAT_CDREGION: + sfp->data.value.ptrvalue = CdRegionNew(); + SetCdRegionGeneticCode (sfp); + break; + case SEQFEAT_RNA: + rrp = RnaRefNew(); + rrp->ext.choice = 0; + sfp->data.value.ptrvalue = rrp; + switch (featdef) { + case FEATDEF_preRNA: + rrp->type = RNA_TYPE_premsg; + break; + case FEATDEF_mRNA: + rrp->type = RNA_TYPE_mRNA; + break; + case FEATDEF_tRNA: + rrp->type = RNA_TYPE_tRNA; + break; + case FEATDEF_rRNA: + rrp->type = RNA_TYPE_rRNA; + break; + case FEATDEF_snRNA: + rrp->type = RNA_TYPE_ncRNA; + SetncRNAClass (rrp, NULL, "snRNA", ExistingTextOption_replace_old); + break; + case FEATDEF_scRNA: + rrp->type = RNA_TYPE_ncRNA; + SetncRNAClass (rrp, NULL, "scRNA", ExistingTextOption_replace_old); + break; + case FEATDEF_tmRNA: + rrp->type = RNA_TYPE_tmRNA; + rgp = RNAGenNew (); + rrp->ext.choice = 3; + rrp->ext.value.ptrvalue = rgp; + break; + case FEATDEF_ncRNA: + rrp->type = RNA_TYPE_ncRNA; + rgp = RNAGenNew (); + rrp->ext.choice = 3; + rrp->ext.value.ptrvalue = rgp; + break; + case FEATDEF_otherRNA: + rrp->type = RNA_TYPE_misc_RNA; + rgp = RNAGenNew(); + rrp->ext.choice = 3; + rrp->ext.value.ptrvalue = rgp; + break; + } + break; + case SEQFEAT_IMP: + ifp = ImpFeatNew(); + sfp->data.value.ptrvalue = ifp; + label = GetFeatureNameFromFeatureType (feature_type); + ifp->key = StringSave (label); + break; + } +} + + +static SeqLocPtr LocationFromApplyFeatureAction (BioseqPtr bsp, ApplyFeatureActionPtr action) +{ + LocationIntervalPtr l; + SeqLocPtr slp = NULL; + Uint1 strand = Seq_strand_plus; + Int4 from, to; + + if (bsp == NULL || action == NULL || action->location == NULL) return NULL; + + if (!action->plus_strand) { + strand = Seq_strand_minus; + } + if (action->location->choice == LocationChoice_interval) { + l = (LocationIntervalPtr) action->location->data.ptrvalue; + if (l != NULL) { + from = MIN (l->from, l->to) - 1; + to = MAX (l->from, l->to) - 1; + slp = SeqLocIntNew (from, to, strand, SeqIdFindWorst (bsp->id)); + } + SetSeqLocPartial (slp, action->partial5, action->partial3); + } else if (action->location->choice == LocationChoice_whole_sequence) { + slp = SeqLocIntNew (0, bsp->length - 1, strand, SeqIdFindWorst (bsp->id)); + SetSeqLocPartial (slp, action->partial5, action->partial3); + } else if (action->location->choice == LocationChoice_point) { + AddSeqLocPoint (&slp, SeqIdStripLocus (SeqIdDup (SeqIdFindBest (bsp->id, 0))), + action->location->data.intvalue, FALSE, TRUE, strand); + } + return slp; +} + + +static Boolean OkToApplyToBioseq (ApplyFeatureActionPtr action, BioseqPtr bsp) +{ + SeqFeatPtr sfp; + SeqMgrFeatContext context; + Int4 featdef; + Boolean rval = TRUE; + + if (action == NULL || bsp == NULL) return FALSE; + + if (!action->add_redundant) { + featdef = GetFeatdefFromFeatureType (action->type); + sfp = SeqMgrGetNextFeature (bsp, NULL, 0, featdef, &context); + if (sfp != NULL) { + rval = FALSE; + } + } + return rval; +} + +static void AddParts (ApplyFeatureActionPtr action, BioseqSetPtr parts, ValNodePtr PNTR bsp_list) +{ + SeqEntryPtr sep; + Int4 seg_num; + + if (action == NULL || !action->apply_to_parts + || parts == NULL || parts->_class != BioseqseqSet_class_parts + || bsp_list == NULL) { + return; + } + + if (action->only_seg_num > -1) { + seg_num = 0; + sep = parts->seq_set; + while (seg_num < action->only_seg_num && sep != NULL) { + sep = sep->next; + seg_num++; + } + if (sep != NULL && IS_Bioseq (sep) && OkToApplyToBioseq (action, sep->data.ptrvalue)) { + ValNodeAddPointer (bsp_list, OBJ_BIOSEQ, sep->data.ptrvalue); + } + } else { + for (sep = parts->seq_set; sep != NULL; sep = sep->next) { + if (IS_Bioseq (sep) && OkToApplyToBioseq (action, sep->data.ptrvalue)) { + ValNodeAddPointer (bsp_list, OBJ_BIOSEQ, sep->data.ptrvalue); + } + } + } +} + + +static void AddSequenceOrParts (ApplyFeatureActionPtr action, BioseqPtr bsp, ValNodePtr PNTR bsp_list) +{ + BioseqSetPtr bssp, parts; + SeqEntryPtr sep; + + if (action == NULL || bsp == NULL || bsp_list == NULL) return; + + if (bsp->idx.parenttype == OBJ_BIOSEQSET && bsp->idx.parentptr != NULL) { + bssp = (BioseqSetPtr) bsp->idx.parentptr; + if (bssp->_class == BioseqseqSet_class_segset) { + if (action->apply_to_parts) { + sep = bssp->seq_set; + while (sep != NULL && !IS_Bioseq_set (sep)) { + sep = sep->next; + } + if (sep != NULL) { + AddParts (action, sep->data.ptrvalue, bsp_list); + } + } else { + if (OkToApplyToBioseq (action, bsp)) { + ValNodeAddPointer (bsp_list, OBJ_BIOSEQ, bsp); + } + } + } else if (bssp->_class == BioseqseqSet_class_parts) { + if (action->apply_to_parts) { + AddParts (action, bssp, bsp_list); + } else { + parts = bssp; + if (parts->idx.parenttype == OBJ_BIOSEQSET && parts->idx.parentptr != NULL) { + bssp = (BioseqSetPtr) parts->idx.parentptr; + if (IS_Bioseq (bssp->seq_set) && OkToApplyToBioseq (action, bssp->seq_set->data.ptrvalue)) { + ValNodeAddPointer (bsp_list, OBJ_BIOSEQ, bsp_list); + } + } + } + } else { + if (OkToApplyToBioseq (action, bsp)) { + ValNodeAddPointer (bsp_list, OBJ_BIOSEQ, bsp); + } + } + } else { + if (OkToApplyToBioseq (action, bsp)) { + ValNodeAddPointer (bsp_list, OBJ_BIOSEQ, bsp); + } + } +} + +static void AddSequenceOrPartsFromSeqEntry (ApplyFeatureActionPtr action, SeqEntryPtr sep, ValNodePtr PNTR bsp_list) +{ + BioseqSetPtr bssp; + SeqEntryPtr seq_set; + + if (action == NULL || sep == NULL) return; + + while (sep != NULL) { + if (IS_Bioseq (sep)) { + AddSequenceOrParts (action, sep->data.ptrvalue, bsp_list); + } else if (IS_Bioseq_set (sep)) { + bssp = (BioseqSetPtr) sep->data.ptrvalue; + if (bssp->_class == BioseqseqSet_class_segset) { + /* find master segment */ + seq_set = bssp->seq_set; + while (seq_set != NULL && !IS_Bioseq (seq_set)) { + seq_set = seq_set->next; + } + if (seq_set != NULL) { + AddSequenceOrParts (action, seq_set->data.ptrvalue, bsp_list); + } + } else if (bssp->_class == BioseqseqSet_class_nuc_prot) { + /* find nucleotide sequence */ + seq_set = bssp->seq_set; + if (seq_set != NULL) { + if (IS_Bioseq_set (seq_set)) { + /* nucleotide is segmented set */ + bssp = (BioseqSetPtr) seq_set->data.ptrvalue; + if (bssp != NULL && bssp->_class == BioseqseqSet_class_segset + && bssp->seq_set != NULL && IS_Bioseq (bssp->seq_set)) { + AddSequenceOrParts (action, bssp->seq_set->data.ptrvalue, bsp_list); + } + } else if (IS_Bioseq (seq_set)) { + AddSequenceOrParts (action, seq_set->data.ptrvalue, bsp_list); + } + } + } else { + /* add from set members */ + AddSequenceOrPartsFromSeqEntry (action, bssp->seq_set, bsp_list); + } + } + sep = sep->next; + } +} + + +static void AdjustProteinSequenceForReadingFrame (SeqFeatPtr cds) +{ + BioseqPtr protbsp, bsp; + ByteStorePtr bs; + SeqFeatPtr prot_sfp; + Boolean partial5, partial3; + + if (cds == NULL || cds->data.choice != SEQFEAT_CDREGION) return; + + protbsp = BioseqFindFromSeqLoc (cds->product); + + if (protbsp == NULL) { + bsp = BioseqFindFromSeqLoc (cds->location); + if (bsp != NULL) { + ExtraCDSCreationActions (cds, GetBestTopParentForData (bsp->idx.entityID, bsp)); + } + } else { + bs = ProteinFromCdRegionExWithTrailingCodonHandling (cds, + TRUE, + FALSE, + TRUE); + protbsp->seq_data = (SeqDataPtr) BSFree ((ByteStorePtr)(protbsp->seq_data)); + protbsp->seq_data = (SeqDataPtr) bs; + protbsp->length = BSLen (bs); + prot_sfp = GetProtFeature (protbsp); + if (prot_sfp == NULL) { + prot_sfp = CreateNewFeatureOnBioseq (protbsp, SEQFEAT_PROT, NULL); + prot_sfp->data.value.ptrvalue = ProtRefNew (); + CheckSeqLocForPartial (cds->location, &partial5, &partial3); + SetSeqLocPartial (prot_sfp->location, partial5, partial3); + prot_sfp->partial = (partial5 || partial3); + } else { + if (SeqLocLen (prot_sfp->location) != protbsp->length) { + prot_sfp->location = SeqLocFree (prot_sfp->location); + prot_sfp->location = SeqLocIntNew (0, protbsp->length - 1, Seq_strand_plus, SeqIdFindWorst (protbsp->id)); + CheckSeqLocForPartial (cds->location, &partial5, &partial3); + SetSeqLocPartial (prot_sfp->location, partial5, partial3); + prot_sfp->partial = (partial5 || partial3); + } + } + } +} + + +NLM_EXTERN SeqFeatPtr +ApplyOneFeatureToBioseq +(BioseqPtr bsp, + Uint1 featdef, + SeqLocPtr slp, + ValNodePtr fields, + ValNodePtr src_fields, + Boolean add_mrna) +{ + Int4 seqfeattype; + SeqFeatPtr sfp, gene = NULL, mrna = NULL; + FeatQualLegalValPtr q; + FeatureField f; + ValNodePtr field_vnp; + Int4 feature_type; + + seqfeattype = FindFeatFromFeatDefType (featdef); + sfp = CreateNewFeatureOnBioseq (bsp, seqfeattype, slp); + if (sfp == NULL) return NULL; + feature_type = GetFeatureTypeFromFeatdef(featdef); + CreateDataForFeature (sfp, feature_type); + /* any extra actions */ + switch (featdef) { + case FEATDEF_CDS : + ExtraCDSCreationActions (sfp, GetBestTopParentForData (bsp->idx.entityID, bsp)); + break; + case FEATDEF_source : + if (src_fields != NULL) { + sfp->data.value.ptrvalue = ImpFeatFree (sfp->data.value.ptrvalue); + sfp->data.choice = SEQFEAT_BIOSRC; + sfp->data.value.ptrvalue = BioSourceFromSourceQualVals (src_fields); + } + break; + } + for (field_vnp = fields; field_vnp != NULL; field_vnp = field_vnp->next) { + q = (FeatQualLegalValPtr) field_vnp->data.ptrvalue; + if (q != NULL) { + f.field = ValNodeNew(NULL); + f.field->next = NULL; + f.field->choice = FeatQualChoice_legal_qual; + f.field->data.intvalue = q->qual; + if (sfp->data.choice != SEQFEAT_GENE + && (q->qual == Feat_qual_legal_gene || q->qual == Feat_qual_legal_gene_description)) { + if (gene == NULL) { + gene = CreateNewFeatureOnBioseq (bsp, SEQFEAT_GENE, slp); + CreateDataForFeature (gene, Macro_feature_type_gene); + } + f.type = Macro_feature_type_gene; + SetQualOnFeature (gene, &f, NULL, q->val, ExistingTextOption_replace_old); + } else { + f.type = feature_type; + SetQualOnFeature (sfp, &f, NULL, q->val, ExistingTextOption_replace_old); + } + } + } + if (featdef == FEATDEF_CDS) { + /* retranslate, to account for change in reading frame */ + AdjustProteinSequenceForReadingFrame (sfp); + /* after the feature has been created, then adjust it for gaps */ + /* Note - this step may result in multiple coding regions being created. */ + AdjustCDSLocationsForUnknownGapsCallback (sfp, NULL); + if (add_mrna) { + slp = SeqLocCopy (slp); + mrna = CreateNewFeatureOnBioseq (bsp, SEQFEAT_RNA, slp); + CreateDataForFeature (mrna, Macro_feature_type_mRNA); + for (field_vnp = fields; field_vnp != NULL; field_vnp = field_vnp->next) { + q = (FeatQualLegalValPtr) field_vnp->data.ptrvalue; + if (q != NULL && q->qual == Feat_qual_legal_product) { + f.field = ValNodeNew(NULL); + f.field->next = NULL; + f.field->choice = FeatQualChoice_legal_qual; + f.field->data.intvalue = q->qual; + f.type = Macro_feature_type_mRNA; + SetQualOnFeature (mrna, &f, NULL, q->val, ExistingTextOption_replace_old); + } + } + } + } + return sfp; +} + + +static Int4 ApplyApplyFeatureActionToSeqEntry (ApplyFeatureActionPtr action, SeqEntryPtr sep) +{ + ValNodePtr bsp_list = NULL, vnp; + Int4 featdef; + BioseqPtr bsp; + SeqFeatPtr sfp; + SeqLocPtr slp; + SeqIdPtr sip; + Int4 num_created = 0; + Int4 len; + CharPtr list_delimiters = " ,\t;"; + CharPtr cp, tmp; + + if (sep == NULL || action == NULL) return 0; + + /* first, get list of Bioseqs to apply features to */ + /* relevant values : seq_list, add_redundant, apply_to_parts, only_seg_num */ + if (action->seq_list != NULL && action->seq_list->choice == SequenceListChoice_list) { + for (vnp = action->seq_list->data.ptrvalue; vnp != NULL; vnp = vnp->next) { + cp = (CharPtr) vnp->data.ptrvalue; + while (cp != NULL && *cp != 0) { + len = StringCSpn (cp, list_delimiters); + if (len > 0) { + tmp = (CharPtr) MemNew (sizeof (Char) * (len + 1)); + StringNCpy (tmp, cp, len); + tmp[len] = 0; + sip = CreateSeqIdFromText (tmp, sep); + bsp = BioseqFind (sip); + if (bsp != NULL) { + AddSequenceOrParts (action, bsp, &bsp_list); + } + cp += len; + } + cp += StringSpn (cp, list_delimiters); + } + } + } else { + AddSequenceOrPartsFromSeqEntry (action, sep, &bsp_list); + } + + /* now add feature to each bioseq in list */ + for (vnp = bsp_list; vnp != NULL; vnp = vnp->next) { + bsp = vnp->data.ptrvalue; + if (bsp == NULL) continue; + featdef = GetFeatdefFromFeatureType (action->type); + slp = LocationFromApplyFeatureAction (bsp, action); + sfp = ApplyOneFeatureToBioseq (bsp, featdef, slp, action->fields, action->src_fields, action->add_mrna); + if (sfp != NULL) { + num_created++; + } + } + return num_created; +} + + +typedef struct convertandremovefeaturecollection { + Uint1 featdef; + ValNodePtr constraint_set; + ValNodePtr feature_list; +} ConvertAndRemoveFeatureCollectionData, PNTR ConvertAndRemoveFeatureCollectionPtr; + +static void ConvertAndRemoveFeatureCollectionCallback (SeqFeatPtr sfp, Pointer data) +{ + ConvertAndRemoveFeatureCollectionPtr p; + + if (sfp == NULL || data == NULL) return; + + p = (ConvertAndRemoveFeatureCollectionPtr) data; + if ((p->featdef == FEATDEF_ANY || sfp->idx.subtype == p->featdef ) + && DoesObjectMatchConstraintChoiceSet (OBJ_SEQFEAT, sfp, p->constraint_set)) { + ValNodeAddPointer (&(p->feature_list), OBJ_SEQFEAT, sfp); + } +} + + +static Int4 ApplyRemoveFeatureActionToSeqEntry (RemoveFeatureActionPtr action, SeqEntryPtr sep) +{ + ConvertAndRemoveFeatureCollectionData d; + ValNodePtr vnp; + SeqFeatPtr sfp; + Int4 num_deleted = 0; + + if (action == NULL) return 0; + + d.featdef = GetFeatdefFromFeatureType (action->type); + d.constraint_set = action->constraint; + d.feature_list = NULL; + + VisitFeaturesInSep (sep, &d, ConvertAndRemoveFeatureCollectionCallback); + for (vnp = d.feature_list; vnp != NULL; vnp = vnp->next) { + sfp = vnp->data.ptrvalue; + if (sfp != NULL) { + sfp->idx.deleteme = TRUE; + num_deleted ++; + } + } + d.feature_list = ValNodeFree (d.feature_list); + DeleteMarkedObjects (ObjMgrGetEntityIDForChoice(sep), 0, NULL); + return num_deleted; +} + + +/* functions for converting features */ + +static Boolean ApplyConvertFeatureSrcOptions (SeqFeatPtr sfp, ValNodePtr src_options, Boolean keep_original) +{ + ConvertFromCDSOptionsPtr options = NULL; + Boolean rval = FALSE; + + if (sfp == NULL) return FALSE; + if (src_options == NULL) return TRUE; + + if (src_options->choice == ConvertFeatureSrcOptions_cds) { + options = (ConvertFromCDSOptionsPtr) src_options->data.ptrvalue; + if (options != NULL) { + ApplyCDSOptionsToFeature (sfp, options->remove_mRNA, options->remove_gene, options->remove_transcript_id, keep_original); + rval = TRUE; + } + } + return rval; +} + +typedef Boolean (*ConvertFeatureFunc) PROTO ((SeqFeatPtr, Int4, ConvertFeatureDstOptionsPtr)); + +static void ApplyRNADestinationOptions (SeqFeatPtr sfp, Int4 featdef_to, ConvertFeatureDstOptionsPtr dst_options) +{ + CharPtr existing_class; + FeatureField ff; + + /* apply destination options */ + if (featdef_to == FEATDEF_ncRNA + && dst_options != NULL + && dst_options->choice == ConvertFeatureDstOptions_ncrna_class + && !StringHasNoText (dst_options->data.ptrvalue)) { + ff.type = Macro_feature_type_ncRNA; + ff.field = ValNodeNew (NULL); + ff.field->choice = FeatQualChoice_legal_qual; + ff.field->data.intvalue = Feat_qual_legal_ncRNA_class; + existing_class = GetQualFromFeature (sfp, &ff, NULL); + if (StringCmp (dst_options->data.ptrvalue, existing_class) != 0) { + sfp->idx.subtype = FEATDEF_ncRNA; + SetQualOnFeature (sfp, &ff, NULL, dst_options->data.ptrvalue, ExistingTextOption_append_semi); + } + existing_class = MemFree (existing_class); + ff.field = ValNodeFree (ff.field); + } +} + +static Boolean ConvertCDSToRNAFunc (SeqFeatPtr sfp, Int4 featdef_to, ConvertFeatureDstOptionsPtr dst_options) +{ + Boolean rval; + + if (sfp == NULL || sfp->data.choice != SEQFEAT_CDREGION) { + return FALSE; + } + + rval = ConvertCDSToRNA (sfp, featdef_to); + if (rval) { + ApplyRNADestinationOptions (sfp, featdef_to, dst_options); + } + return rval; +} + + +static Boolean ConvertGeneToRNAFunc (SeqFeatPtr sfp, Int4 featdef_to, ConvertFeatureDstOptionsPtr dst_options) +{ + Boolean rval; + + rval = ConvertGeneToRNA (sfp, featdef_to); + if (rval) { + ApplyRNADestinationOptions (sfp, featdef_to, dst_options); + } + return rval; +} + + +static Boolean ConvertBioSrcToRegionFunc (SeqFeatPtr sfp, Int4 featdef_to, ConvertFeatureDstOptionsPtr dst_options) +{ + return ConvertBioSrcToRepeatRegion (sfp, featdef_to); +} + + +static Boolean ConvertCDSToMiscFeatFunc (SeqFeatPtr sfp, Int4 featdef_to, ConvertFeatureDstOptionsPtr dst_options) +{ + Boolean rval = FALSE; + if (sfp == NULL || sfp->data.choice != SEQFEAT_CDREGION) { + return FALSE; + } + else if (sfp->pseudo) + { + rval = ConvertOnePseudoCDSToMiscFeatEx (sfp, FALSE); + } + else + { + /* do other here */ + rval = ConvertNonPseudoCDSToMiscFeat (sfp, FALSE); + } + return rval; +} + +static Boolean ConvertImpToProtFuncEx (SeqFeatPtr sfp, Int4 featdef_to, ConvertFeatureDstOptionsPtr dst_options) +{ + return ConvertImpToProtFunc (sfp, featdef_to); +} + + +static Boolean ConvertProtToImpFuncEx (SeqFeatPtr sfp, Int4 featdef_to, ConvertFeatureDstOptionsPtr dst_options) +{ + return ConvertProtToImpFunc (sfp, featdef_to); +} + + +static Boolean ConvertProtToProt (SeqFeatPtr sfp, Int4 featdef_to, ConvertFeatureDstOptionsPtr dst_options) +{ + return ConvertProtToProtFunc (sfp, featdef_to); +} + + +static Boolean ConvertCDSToMatPeptide (SeqFeatPtr sfp, Int4 featdef_to, ConvertFeatureDstOptionsPtr dst_options) +{ + return AutoConvertCDSToMiscFeat (sfp, (dst_options == NULL || dst_options->choice != ConvertFeatureDstOptions_remove_original) ? FALSE : dst_options->data.boolvalue); +} + + +static Boolean ConvertImpToRNAFunc (SeqFeatPtr sfp, Int4 featdef_to, ConvertFeatureDstOptionsPtr dst_options) +{ + RnaRefPtr rrp; + GBQualPtr qual, qual_prev = NULL; + Boolean add_to_comment = FALSE; + CharPtr old_comment = NULL; + + if (sfp == NULL || sfp->data.choice != SEQFEAT_IMP) + { + return FALSE; + } + + for (qual = sfp->qual; qual != NULL && StringCmp (qual->qual, "product") != 0; qual = qual->next) { + qual_prev = qual; + } + if (qual != NULL) { + old_comment = StringSave (qual->val); + if (qual_prev == NULL) { + sfp->qual = qual->next; + } else { + qual_prev->next = qual->next; + } + qual->next = NULL; + qual = GBQualFree (qual); + } else { + old_comment = sfp->comment; + sfp->comment = NULL; + } + + rrp = RnaRefFromLabel (featdef_to, old_comment, &add_to_comment); + + sfp->data.value.ptrvalue = ImpFeatFree ((ImpFeatPtr) sfp->data.value.ptrvalue); + sfp->data.choice = SEQFEAT_RNA; + sfp->data.value.ptrvalue = (Pointer) rrp; + SetRNAProductString (sfp, NULL, old_comment, ExistingTextOption_replace_old); + + if (add_to_comment) { + SetStringValue (&(sfp->comment), old_comment, ExistingTextOption_append_semi); + } + old_comment = MemFree (old_comment); + + ApplyRNADestinationOptions (sfp, featdef_to, dst_options); + + return TRUE; +} + + +static Boolean ConvertRegionToImp (SeqFeatPtr sfp, Int4 featdef_to, ConvertFeatureDstOptionsPtr dst_options) +{ + return ConvertRegionToImpFunc (sfp, featdef_to); +} + + +static Boolean ConvertImpToImp (SeqFeatPtr sfp, Int4 featdef_to, ConvertFeatureDstOptionsPtr dst_options) +{ + return ConvertImpToImpFunc (sfp, featdef_to); +} + + +static Boolean ConvertRegionToRNA (SeqFeatPtr sfp, Int4 featdef_to, ConvertFeatureDstOptionsPtr dst_options) +{ + Boolean rval; + rval = ConvertRegionToRNAFunc (sfp, featdef_to); + if (rval) { + ApplyRNADestinationOptions (sfp, featdef_to, dst_options); + } + return rval; +} + + +static Boolean ConvertncRNAToMiscBinding (SeqFeatPtr sfp, Int4 featdef_to, ConvertFeatureDstOptionsPtr dst_options) +{ + RnaRefPtr rrp; + RNAGenPtr rgp; + ImpFeatPtr ifp; + + rrp = (RnaRefPtr) sfp->data.value.ptrvalue; + if (NULL == rrp) + return FALSE; + + if (rrp->ext.choice == 1) { + /* move product to note */ + SetStringValue (&(sfp->comment), rrp->ext.value.ptrvalue, ExistingTextOption_append_semi); + } else if (rrp->ext.choice == 3 && (rgp = (RNAGenPtr) rrp->ext.value.ptrvalue) != NULL + && !StringHasNoText (rgp->product)) { + SetStringValue (&(sfp->comment), rgp->product, ExistingTextOption_append_semi); + } + rrp = RnaRefFree (rrp); + sfp->data.choice = SEQFEAT_IMP; + ifp = ImpFeatNew (); + ifp->key = StringSave ("misc_binding"); + sfp->data.value.ptrvalue = ifp; + sfp->idx.subtype = 0; + + return TRUE; +} + + +static Boolean ConvertCommentToMiscFeat (SeqFeatPtr sfp, Int4 featdef_to, ConvertFeatureDstOptionsPtr dst_options) +{ + ImpFeatPtr ifp; + + if (sfp == NULL || sfp->data.choice != SEQFEAT_COMMENT || sfp->data.value.ptrvalue != NULL) + { + return FALSE; + } + + ifp = ImpFeatNew (); + if (ifp != NULL) { + ifp->key = StringSave ("misc_feature"); + sfp->data.choice = SEQFEAT_IMP; + sfp->data.value.ptrvalue = (Pointer) ifp; + return TRUE; + } + return FALSE; +} + + +static Boolean ConvertGeneToMiscFeat (SeqFeatPtr sfp, Int4 featdef_to, ConvertFeatureDstOptionsPtr dst_options) +{ + return ConvertGeneToMiscFeatFunc (sfp, featdef_to); +} + + +static Boolean ConvertRNAToImpFeat (SeqFeatPtr sfp, Int4 featdef_to, ConvertFeatureDstOptionsPtr dst_options) +{ + CharPtr product = NULL; + ImpFeatPtr ifp; + Uint1 seqfeattype; + + if (sfp == NULL || sfp->data.choice != SEQFEAT_RNA) { + return FALSE; + } + + seqfeattype = FindFeatFromFeatDefType (featdef_to); + if (seqfeattype != SEQFEAT_IMP) { + return FALSE; + } + + product = GetRNAProductString (sfp, NULL); + + RemoveRNAProductString (sfp, NULL); + + sfp->data.value.ptrvalue = RnaRefFree (sfp->data.value.ptrvalue); + + ifp = ImpFeatNew (); + ifp->key = StringSave (GetImportFeatureName (featdef_to)); + sfp->data.choice = SEQFEAT_IMP; + sfp->data.value.ptrvalue = (Pointer) ifp; + + SetStringValue (&(sfp->comment), product, ExistingTextOption_append_semi); + product = MemFree (product); + return TRUE; +} + + +static Boolean ConvertSiteToImpFeat (SeqFeatPtr sfp, Int4 featdef_to, ConvertFeatureDstOptionsPtr dst_options) +{ + GBQualPtr gbqual; + ImpFeatPtr ifp; + Int2 sitetype; + CharPtr str; + + if (sfp == NULL || sfp->data.choice != SEQFEAT_SITE) + { + return FALSE; + } + + ifp = ImpFeatNew (); + if (NULL == ifp) + { + return FALSE; + } + + sitetype = (Int2) sfp->data.value.intvalue; + sfp->data.choice = SEQFEAT_IMP; + sfp->data.value.ptrvalue = (Pointer) ifp; + ifp->key = StringSave (GetImportFeatureName (featdef_to)); + str = GetMacroSiteTypeName (MacroSiteTypeFromAsn1SiteType (sitetype)); + if (str != NULL) { + gbqual = GBQualNew (); + if (gbqual != NULL) { + gbqual->qual = StringSave ("note"); + gbqual->val = StringSave (str); + gbqual->next = sfp->qual; + sfp->qual = gbqual; + } + } + return TRUE; +} + + +static Boolean ConvertProtToRegion (SeqFeatPtr sfp, Int4 featdef_to, ConvertFeatureDstOptionsPtr dst_options) +{ + ProtRefPtr prp; + ValNodePtr vnp; + CharPtr str; + + if (sfp == NULL || sfp->data.choice != SEQFEAT_PROT) + { + return FALSE; + } + prp = (ProtRefPtr) sfp->data.value.ptrvalue; + if (NULL == prp) + { + return FALSE; + } + + vnp = prp->name; + if (vnp != NULL && vnp->next == NULL) { + str = (CharPtr) vnp->data.ptrvalue; + if (! StringHasNoText (str)) { + vnp->data.ptrvalue = NULL; + sfp->data.value.ptrvalue = ProtRefFree (prp); + sfp->data.choice = SEQFEAT_REGION; + sfp->data.value.ptrvalue = (Pointer) str; + } + } + return TRUE; +} + + +static Boolean ConvertRegionToProt (SeqFeatPtr sfp, Int4 featdef_to, ConvertFeatureDstOptionsPtr dst_options) +{ + return ConvertRegionToProtFunc (sfp, featdef_to); +} + + +static Boolean ConvertToBond (SeqFeatPtr sfp, Int4 featdef_to, ConvertFeatureDstOptionsPtr dst_options) +{ + SeqLocPtr slp; + BioseqPtr bsp; + SeqEntryPtr sep; + Boolean no_cds = FALSE; + SeqFeatPtr new_sfp; + SeqIdPtr sip; + SeqBondPtr sbp; + SeqPntPtr spp; + + if (sfp == NULL || featdef_to != FEATDEF_BOND || dst_options == NULL || dst_options->choice != ConvertFeatureDstOptions_bond) { + return FALSE; + } + + SeqFeatDataFree (&(sfp->data)); + sfp->data.choice = SEQFEAT_BOND; + sfp->data.value.intvalue = Asn1BondTypeFromMacroBondType (dst_options->data.intvalue); + + bsp = BioseqFindFromSeqLoc (sfp->location); + + if (!ISA_aa (bsp->mol)) + { + slp = GetProteinLocationForNucleotideFeatureConversion (sfp->location, &no_cds); + if (no_cds || slp == NULL) { + return FALSE; + } + sfp->location = SeqLocFree (sfp->location); + sfp->location = slp; + } + + if (sfp->location->choice != SEQLOC_BOND) { + sip = SeqLocId (sfp->location); + if (sip != NULL) { + sbp = SeqBondNew (); + if (sbp != NULL) { + slp = ValNodeNew (NULL); + if (slp != NULL) { + slp->choice = SEQLOC_BOND; + slp->data.ptrvalue = (Pointer) sbp; + spp = SeqPntNew (); + if (spp != NULL) { + spp->strand = SeqLocStrand (sfp->location); + spp->id = SeqIdStripLocus (SeqIdDup (SeqIdFindBest (sip, 0))); + spp->point = SeqLocStart (sfp->location); + sbp->a = spp; + } + spp = SeqPntNew (); + if (spp != NULL) { + spp->strand = SeqLocStrand (sfp->location); + spp->id = SeqIdStripLocus (SeqIdDup (SeqIdFindBest (sip, 0))); + spp->point = SeqLocStop (sfp->location); + sbp->b = spp; + } + sfp->location = SeqLocFree (sfp->location); + sfp->location = slp; + } + } + } + } + + sfp->idx.subtype = 0; + + bsp = GetBioseqGivenSeqLoc (slp, sfp->idx.entityID); + if (bsp == NULL) { + return FALSE; + } + sep = SeqMgrGetSeqEntryForData (bsp); + if (sep == NULL) { + return FALSE; + } + + new_sfp = (SeqFeatPtr) AsnIoMemCopy (sfp, (AsnReadFunc) SeqFeatAsnRead, (AsnWriteFunc) SeqFeatAsnWrite); + sfp->idx.deleteme = TRUE; + CreateNewFeature (sep, NULL, SEQFEAT_BOND, new_sfp); + + return TRUE; +} + + +static Boolean ConvertToSite (SeqFeatPtr sfp, Int4 featdef_to, ConvertFeatureDstOptionsPtr dst_options) +{ + SeqLocPtr slp; + BioseqPtr bsp; + SeqEntryPtr sep; + Boolean no_cds = FALSE; + SeqFeatPtr new_sfp; + + if (sfp == NULL || featdef_to != FEATDEF_SITE || dst_options == NULL || dst_options->choice != ConvertFeatureDstOptions_site) { + return FALSE; + } + + SeqFeatDataFree (&(sfp->data)); + sfp->data.choice = SEQFEAT_SITE; + sfp->data.value.intvalue = Asn1SiteTypeFromMacroSiteType (dst_options->data.intvalue); + + bsp = BioseqFindFromSeqLoc (sfp->location); + + if (!ISA_aa (bsp->mol)) + { + slp = GetProteinLocationForNucleotideFeatureConversion (sfp->location, &no_cds); + if (no_cds || slp == NULL) { + return FALSE; + } + sfp->location = SeqLocFree (sfp->location); + sfp->location = slp; + } + + sfp->idx.subtype = 0; + + bsp = GetBioseqGivenSeqLoc (slp, sfp->idx.entityID); + if (bsp == NULL) { + return FALSE; + } + sep = SeqMgrGetSeqEntryForData (bsp); + if (sep == NULL) { + return FALSE; + } + + new_sfp = (SeqFeatPtr) AsnIoMemCopy (sfp, (AsnReadFunc) SeqFeatAsnRead, (AsnWriteFunc) SeqFeatAsnWrite); + sfp->idx.deleteme = TRUE; + CreateNewFeature (sep, NULL, SEQFEAT_SITE, new_sfp); + + return TRUE; +} + + +static Boolean ConvertToRegion (SeqFeatPtr sfp, Int4 featdef_to, ConvertFeatureDstOptionsPtr dst_options) +{ + BioseqPtr bsp; + RegionTypePtr r; + Boolean create_prot_feats, no_cds = FALSE; + SeqLocPtr slp; + SeqEntryPtr sep; + SeqFeatPtr new_sfp; + + if (sfp == NULL || featdef_to != FEATDEF_REGION || dst_options == NULL || dst_options->choice != ConvertFeatureDstOptions_region || dst_options->data.ptrvalue == NULL) { + return FALSE; + } + + r = (RegionTypePtr) dst_options->data.ptrvalue; + create_prot_feats = !r->create_nucleotide; + + bsp = BioseqFindFromSeqLoc (sfp->location); + if (bsp == NULL) return FALSE; + + if (ISA_aa (bsp->mol)) + { + if (create_prot_feats) + { + slp = (SeqLocPtr) AsnIoMemCopy (sfp->location, (AsnReadFunc) SeqLocAsnRead, (AsnWriteFunc) SeqLocAsnWrite); + } + else + { + slp = FindNucleotideLocationForProteinFeatureConversion (sfp->location); + } + sfp->location = SeqLocFree (sfp->location); + sfp->location = slp; + } + else if (create_prot_feats) + { + slp = GetProteinLocationForNucleotideFeatureConversion (sfp->location, &no_cds); + if (no_cds) { + return FALSE; + } + sfp->location = SeqLocFree (sfp->location); + sfp->location = slp; + } + + bsp = GetBioseqGivenSeqLoc (sfp->location, sfp->idx.entityID); + if (bsp == NULL) { + return FALSE; + } + + sep = SeqMgrGetSeqEntryForData (bsp); + if (sep == NULL) { + return FALSE; + } + + SeqFeatDataFree (&(sfp->data)); + sfp->data.choice = SEQFEAT_REGION; + sfp->data.value.ptrvalue = sfp->comment; + sfp->comment = NULL; + + new_sfp = (SeqFeatPtr) AsnIoMemCopy (sfp, (AsnReadFunc) SeqFeatAsnRead, (AsnWriteFunc) SeqFeatAsnWrite); + sfp->idx.deleteme = TRUE; + CreateNewFeature (sep, NULL, SEQFEAT_REGION, new_sfp); + return TRUE; +} + + +static Boolean ConvertRNAToRNA (SeqFeatPtr sfp, Int4 featdef_to, ConvertFeatureDstOptionsPtr dst_options) +{ + RnaRefPtr rrp; + Boolean add_to_comment = FALSE; + CharPtr product; + + rrp = (RnaRefPtr) sfp->data.value.ptrvalue; + if (NULL == rrp) { + return FALSE; + } + + product = GetRNAProductString (sfp, NULL); + + RemoveRNAProductString (sfp, NULL); + + sfp->data.value.ptrvalue = RnaRefFree (sfp->data.value.ptrvalue); + + sfp->data.value.ptrvalue = RnaRefFromLabel (featdef_to, product, &add_to_comment); + + SetRNAProductString (sfp, NULL, product, ExistingTextOption_replace_old); + if (add_to_comment) { + SetStringValue (&(sfp->comment), product, ExistingTextOption_append_semi); + } + product = MemFree (product); + + /* apply destination options */ + ApplyRNADestinationOptions (sfp, featdef_to, dst_options); + + sfp->idx.subtype = 0; + return TRUE; +} + + +static Boolean MiscFeatToCodingRegionConvertFunc (SeqFeatPtr sfp, Int4 featdef_to, ConvertFeatureDstOptionsPtr dst_options) +{ + return ConvertMiscFeatToCodingRegion (sfp); +} + + +static Boolean MiscFeatToGeneConvertFunc (SeqFeatPtr sfp, Int4 featdef_to, ConvertFeatureDstOptionsPtr dst_options) +{ + return ConvertMiscFeatToGene (sfp); +} + + +typedef struct convertfeattable { + Uint2 seqfeat_from; + Uint2 featdef_from; + Uint2 seqfeat_to; + Uint2 featdef_to; + ConvertFeatureFunc func; + CharPtr help_text; +} ConvertFeatTableData, PNTR ConvertFeatTablePtr; + +static ConvertFeatTableData conversion_functions[] = { + { SEQFEAT_CDREGION, FEATDEF_CDS, SEQFEAT_RNA, FEATDEF_ANY, + ConvertCDSToRNAFunc, + "Delete protein product sequence.\nClear product field if transcript ID removal was requested.\nIf converting to tRNA and anticodon value can be parsed from label, set aa value, and add any text that could not be parsed into an anticodon value to the feature note.\nIf converting to other RNA, put label in RNA product." }, + { SEQFEAT_GENE, FEATDEF_GENE, SEQFEAT_RNA, FEATDEF_ANY, + ConvertGeneToRNAFunc, + "If converting to tRNA and anticodon value can be parsed from label, set aa value, and add any text that could not be parsed into an anticodon value to the feature note. If converting to other RNA, put label in RNA product. Also append gene locus, allele, description, map location, and locus tag to comment (as long as these values are not already in the label and therefore in the RNA product)." }, + { SEQFEAT_BIOSRC, FEATDEF_BIOSRC, SEQFEAT_IMP, FEATDEF_repeat_region, + ConvertBioSrcToRegionFunc, + "Creates a repeat_region with mobile_element qualifiers for the transposon and/or insertion sequence qualifiers on the BioSource. All other BioSource information is discarded." }, + { SEQFEAT_CDREGION, FEATDEF_CDS, SEQFEAT_IMP, FEATDEF_misc_feature, + ConvertCDSToMiscFeatFunc, + "Copy comment from coding region to new misc_feature and remove product field. If not pseudo coding region, add product name from protein feature to new misc_feature comment and delete product sequence." }, + { SEQFEAT_IMP, FEATDEF_ANY, SEQFEAT_PROT, FEATDEF_ANY, + ConvertImpToProtFuncEx, + "Original feature must be on nucleotide sequence and be contained in coding region location. Coding region must have product protein sequence. New feature is created on product protein sequence so that the translated location will be as close as possible to the original nucleotide location (may not be exact because of codon boundaries)." }, + { SEQFEAT_PROT, FEATDEF_mat_peptide_aa, SEQFEAT_IMP, FEATDEF_ANY, + ConvertProtToImpFuncEx, + "Original feature must be on a protein sequence that is a product of a coding region.\nNew feature will be created on same sequence as coding region.\n" + "If protein feature has name, this will be saved as /product qualifier on new feature.\nIf protein feature does not have name but does have description, this will be saved as /product qualifier on new feature.\n" + "EC_number values from the protein feature will be saved as /EC_number qualifiers on the new feature.\nActivity values will be saved as /function qualifiers on the new feature.\n" + "Db_xref values from the protein feature will be saved as /db_xref qualifers on the new feature." }, + { SEQFEAT_PROT, FEATDEF_sig_peptide_aa, SEQFEAT_IMP, FEATDEF_ANY, + ConvertProtToImpFuncEx, + "Original feature must be on a protein sequence that is a product of a coding region.\nNew feature will be created on same sequence as coding region.\n" + "If protein feature has name, this will be saved as /product qualifier on new feature.\nIf protein feature does not have name but does have description, this will be saved as /product qualifier on new feature.\n" + "EC_number values from the protein feature will be saved as /EC_number qualifiers on the new feature.\nActivity values will be saved as /function qualifiers on the new feature.\n" + "Db_xref values from the protein feature will be saved as /db_xref qualifers on the new feature." }, + { SEQFEAT_PROT, FEATDEF_transit_peptide_aa, SEQFEAT_IMP, FEATDEF_ANY, + ConvertProtToImpFuncEx, + "Original feature must be on a protein sequence that is a product of a coding region.\nNew feature will be created on same sequence as coding region.\n" + "If protein feature has name, this will be saved as /product qualifier on new feature.\nIf protein feature does not have name but does have description, this will be saved as /product qualifier on new feature.\n" + "EC_number values from the protein feature will be saved as /EC_number qualifiers on the new feature.\nActivity values will be saved as /function qualifiers on the new feature.\n" + "Db_xref values from the protein feature will be saved as /db_xref qualifers on the new feature." }, + { SEQFEAT_IMP, FEATDEF_ANY, SEQFEAT_RNA, FEATDEF_ANY, + ConvertImpToRNAFunc, + "Creates an RNA feature of the specified subtype. Import feature key is discarded." }, + { SEQFEAT_IMP, FEATDEF_misc_feature, SEQFEAT_CDREGION, FEATDEF_CDS, + MiscFeatToCodingRegionConvertFunc, + "Use misc_feature comment for coding region product name." }, + { SEQFEAT_IMP, FEATDEF_misc_feature, SEQFEAT_GENE, FEATDEF_GENE, + MiscFeatToGeneConvertFunc, + "Creates gene with locus value from misc_feature comment." }, + { SEQFEAT_REGION, FEATDEF_REGION, SEQFEAT_IMP, FEATDEF_ANY, + ConvertRegionToImp, + "Creates a misc_feature with the region name saved as a /note qualifier." }, + { SEQFEAT_REGION, FEATDEF_REGION, SEQFEAT_RNA, FEATDEF_ANY, + ConvertRegionToRNA, + "Creates an RNA feature with the region name as the product name." }, + { SEQFEAT_COMMENT, FEATDEF_ANY, SEQFEAT_IMP, FEATDEF_misc_feature, + ConvertCommentToMiscFeat, + "Creates a misc_feature with the same note as the original. Note - the flatfile display for the feature is the same." }, + { SEQFEAT_GENE, FEATDEF_GENE, SEQFEAT_IMP, FEATDEF_misc_feature, + ConvertGeneToMiscFeat, + "Creates a misc_feature with the gene description and locus prepended to the original comment, separated by semicolons." }, + { SEQFEAT_RNA, FEATDEF_ANY, SEQFEAT_IMP, FEATDEF_ANY, + ConvertRNAToImpFeat, + "Creates an import feature of the specified subtype and adds the RNA product name to the comment." } , + { SEQFEAT_SITE, FEATDEF_ANY, SEQFEAT_IMP, FEATDEF_ANY, + ConvertSiteToImpFeat, + "Creates an import feature of the specified subtype with the site type name as a /note qualifier." } , + { SEQFEAT_PROT, FEATDEF_mat_peptide_aa, SEQFEAT_REGION, FEATDEF_REGION, + NULL, + "Creates a Region feature with the protein name as the region name." }, + { SEQFEAT_PROT, FEATDEF_ANY, SEQFEAT_REGION, FEATDEF_REGION, + ConvertProtToRegion, + "Creates a Region feature with the protein name as the region name." }, + { SEQFEAT_REGION, FEATDEF_REGION, SEQFEAT_PROT, FEATDEF_ANY, + ConvertRegionToProt, + "If feature is on nucleotide sequence, will create feature on protein product sequence for overlapping coding region. Protein name will be region name." }, + { 0, FEATDEF_ANY, SEQFEAT_BOND, FEATDEF_BOND, + ConvertToBond, + "Create Bond feature with specified bond type. Location is a SeqLocBond with a point at the start of the original location and a point at the end of the original location. All feature ID, partialness, except, comment, product, location, genbank qualifiers, title, citation, experimental evidence, gene xrefs, db xrefs, and pseudo-ness information is discarded." }, + { 0, FEATDEF_ANY, SEQFEAT_SITE, FEATDEF_SITE, ConvertToSite, "Create Site feature with specified site type. All feature ID, partialness, except, comment, product, location, genbank qualifiers, title, citation, experimental evidence, gene xrefs, db xrefs, and pseudo-ness information is discarded." }, { 0, FEATDEF_ANY, SEQFEAT_REGION, FEATDEF_REGION, @@ -20567,6 +23840,9 @@ static ConvertFeatTableData conversion_functions[] = { { SEQFEAT_RNA, FEATDEF_ANY, SEQFEAT_RNA, FEATDEF_ANY, ConvertRNAToRNA, "Changes type of RNA feature." }, + { SEQFEAT_RNA, FEATDEF_ncRNA, SEQFEAT_IMP, FEATDEF_misc_binding, + ConvertncRNAToMiscBinding, + "Changes ncRNA to misc_binding." }, { SEQFEAT_PROT, FEATDEF_ANY, SEQFEAT_PROT, FEATDEF_ANY, ConvertProtToProt, "Changes type of protein feature." }, @@ -20574,6117 +23850,10761 @@ static ConvertFeatTableData conversion_functions[] = { ConvertCDSToMatPeptide, "If coding region is overlapped by another coding region, will convert the coding region to a mat-peptide on the overlapping coding region's protein sequence, otherwise if you have checked \"Leave Original Feature\" it will create a mat-peptide with the same protein names and description on the protein sequence for the coding region." } -}; +}; + + +static Int4 num_convert_feature_table_lines = sizeof (conversion_functions) / sizeof (ConvertFeatTableData); + +static Int4 GetConversionFunctionTableLine (Uint2 seqfeat_from, Uint2 featdef_from, Uint2 seqfeat_to, Uint2 featdef_to) +{ + Int4 i, table_line_num = -1; + + for (i = 0; i < num_convert_feature_table_lines && table_line_num == -1; i++) + { + if ((conversion_functions[i].seqfeat_from == 0 || conversion_functions[i].seqfeat_from == seqfeat_from) + && (conversion_functions[i].featdef_from == FEATDEF_ANY || conversion_functions[i].featdef_from == featdef_from) + && (conversion_functions[i].seqfeat_to == 0 || conversion_functions[i].seqfeat_to == seqfeat_to) + && (conversion_functions[i].featdef_to == FEATDEF_ANY || conversion_functions[i].featdef_to == featdef_to)) + { + table_line_num = i; + } + } + return table_line_num; +} + + +NLM_EXTERN Boolean IsConversionSupported (Uint2 type_from, Uint2 type_to) +{ + Int4 line; + Uint2 featdef_from, featdef_to, seqfeat_from, seqfeat_to; + + featdef_from = GetFeatdefFromFeatureType (type_from); + seqfeat_from = FindFeatFromFeatDefType (featdef_from); + featdef_to = GetFeatdefFromFeatureType (type_to); + seqfeat_to = FindFeatFromFeatDefType (featdef_to); + line = GetConversionFunctionTableLine (seqfeat_from, featdef_from, seqfeat_to, featdef_to); + if (line > -1 && conversion_functions[line].func != NULL) { + return TRUE; + } else { + return FALSE; + } +} + + +static CharPtr GetFeatureTextForLogging (SeqFeatPtr sfp) +{ + ValNode vn; + Int4 len; + CharPtr txt = NULL; + + MemSet (&vn, 0, sizeof (ValNode)); + vn.choice = OBJ_SEQFEAT; + vn.data.ptrvalue = sfp; + txt = GetDiscrepancyItemText (&vn); + if (txt == NULL) { + txt = StringSave ("(null)"); + } else { + len = StringLen (txt); + if (len > 0 && txt[len - 1] == '\n') { + txt[len - 1] = 0; + } + } + return txt; +} + + +static Int4 ApplyConvertFeatureActionToSeqEntry (ConvertFeatureActionPtr action, SeqEntryPtr sep, FILE *log_fp) +{ + ConvertAndRemoveFeatureCollectionData d; + ValNodePtr vnp; + SeqFeatPtr sfp, sfp_copy; + Int4 num_affected = 0, table_line; + Uint2 seqfeat_from, featdef_from, seqfeat_to, featdef_to; + /* variables for logging */ + CharPtr txt_old, txt_new; + + if (action == NULL) return 0; + + featdef_from = GetFeatdefFromFeatureType (action->type_from); + seqfeat_from = FindFeatFromFeatDefType(featdef_from); + featdef_to = GetFeatdefFromFeatureType (action->type_to); + seqfeat_to = FindFeatFromFeatDefType (featdef_to); + table_line = GetConversionFunctionTableLine (seqfeat_from, featdef_from, seqfeat_to, featdef_to); + if (table_line < 0 || conversion_functions[table_line].func == NULL) { + return 0; + } + + d.featdef = GetFeatdefFromFeatureType (action->type_from); + d.constraint_set = action->src_feat_constraint; + d.feature_list = NULL; + + VisitFeaturesInSep (sep, &d, ConvertAndRemoveFeatureCollectionCallback); + for (vnp = d.feature_list; vnp != NULL; vnp = vnp->next) { + sfp = vnp->data.ptrvalue; + if (sfp != NULL) { + sfp_copy = (SeqFeatPtr) AsnIoMemCopy (sfp, (AsnReadFunc) SeqFeatAsnRead, (AsnWriteFunc) SeqFeatAsnWrite); + /* add subtype value to copy */ + sfp_copy->idx.subtype = sfp->idx.subtype; + sfp_copy->next = sfp->next; + sfp->next = sfp_copy; + + if (conversion_functions[table_line].func (sfp_copy, featdef_to, action->dst_options)) { + ApplyConvertFeatureSrcOptions (sfp_copy, action->src_options, action->leave_original); + num_affected ++; + if (!action->leave_original) { + sfp->idx.deleteme = TRUE; + } + if (log_fp != NULL) { + txt_old = GetFeatureTextForLogging (sfp); + txt_new = GetFeatureTextForLogging (sfp_copy); + if (action->leave_original) { + fprintf (log_fp, "Added new feature %s based on %s\n", txt_new, txt_old); + } else { + fprintf (log_fp, "Replaced feature %s with %s\n", txt_old, txt_new); + } + txt_old = MemFree (txt_old); + txt_new = MemFree (txt_new); + } + sfp_copy->idx.subtype = 0; + } else { + sfp_copy->idx.deleteme = TRUE; + } + } + } + DeleteMarkedObjects (ObjMgrGetEntityIDForChoice(sep), 0, NULL); + RenormalizeNucProtSets (sep, TRUE); + return num_affected; +} + + +/* Functions for editing feature locations */ +static Boolean DoesStrandMatch (Int4 strand_choice, Uint1 strand_val) +{ + Boolean rval = FALSE; + + switch (strand_choice) + { + case Feature_location_strand_from_any: + rval = TRUE; + break; + case Feature_location_strand_from_unknown: + if (strand_val == Seq_strand_unknown) + { + rval = TRUE; + } + break; + case Feature_location_strand_from_plus: + if (strand_val != Seq_strand_minus) + { + rval = TRUE; + } + break; + case Feature_location_strand_from_minus: + if (strand_val == Seq_strand_minus) + { + rval = TRUE; + } + break; + case Feature_location_strand_from_both: + if (strand_val == Seq_strand_both) + { + rval = TRUE; + } + break; + } + return rval; +} + + +static Uint1 GetNewStrandValue (Int4 strand_choice, Uint1 strand_val) +{ + Uint1 rval = Seq_strand_unknown; + + switch (strand_choice) + { + case Feature_location_strand_to_reverse: + switch (strand_val) + { + case Seq_strand_plus: + case Seq_strand_unknown: + rval = Seq_strand_minus; + break; + case Seq_strand_minus: + rval = Seq_strand_plus; + break; + default: + rval = strand_val; + break; + } + break; + case Feature_location_strand_to_unknown: + rval = Seq_strand_unknown; + break; + case Feature_location_strand_to_plus: + rval = Seq_strand_plus; + break; + case Feature_location_strand_to_minus: + rval = Seq_strand_minus; + break; + case Feature_location_strand_to_both: + rval = Seq_strand_both; + break; + } + return rval; +} + + +static Boolean ConvertLocationStrand (SeqLocPtr slp, Int4 fromStrand, Int4 toStrand) +{ + SeqLocPtr loc; + PackSeqPntPtr psp; + SeqBondPtr sbp; + SeqIntPtr sinp; + SeqPntPtr spp; + Boolean rval = FALSE; + Uint1 strand_orig; + + while (slp != NULL) { + switch (slp->choice) { + case SEQLOC_NULL : + break; + case SEQLOC_EMPTY : + case SEQLOC_WHOLE : + break; + case SEQLOC_INT : + sinp = (SeqIntPtr) slp->data.ptrvalue; + if (sinp != NULL && DoesStrandMatch (fromStrand, sinp->strand)) + { + strand_orig = sinp->strand; + sinp->strand = GetNewStrandValue (toStrand, sinp->strand); + if (strand_orig != sinp->strand) { + rval = TRUE; + } + } + break; + case SEQLOC_PNT : + spp = (SeqPntPtr) slp->data.ptrvalue; + if (spp != NULL && DoesStrandMatch (fromStrand, spp->strand)) + { + strand_orig = spp->strand; + spp->strand = GetNewStrandValue (toStrand, spp->strand); + if (strand_orig != spp->strand) { + rval = TRUE; + } + } + break; + case SEQLOC_PACKED_PNT : + psp = (PackSeqPntPtr) slp->data.ptrvalue; + if (psp != NULL && DoesStrandMatch (fromStrand, psp->strand)) + { + strand_orig = psp->strand; + psp->strand = GetNewStrandValue (toStrand, psp->strand); + if (strand_orig != psp->strand) { + rval = TRUE; + } + } + break; + case SEQLOC_PACKED_INT : + case SEQLOC_MIX : + case SEQLOC_EQUIV : + loc = (SeqLocPtr) slp->data.ptrvalue; + while (loc != NULL) { + rval |= ConvertLocationStrand (loc, fromStrand, toStrand); + loc = loc->next; + } + break; + case SEQLOC_BOND : + sbp = (SeqBondPtr) slp->data.ptrvalue; + if (sbp != NULL) { + spp = (SeqPntPtr) sbp->a; + if (spp != NULL && DoesStrandMatch (fromStrand, spp->strand)) + { + strand_orig = spp->strand; + spp->strand = GetNewStrandValue (toStrand, spp->strand); + if (strand_orig != spp->strand) { + rval = TRUE; + } + } + spp = (SeqPntPtr) sbp->b; + if (spp != NULL && DoesStrandMatch (fromStrand, spp->strand)) + { + strand_orig = spp->strand; + spp->strand = GetNewStrandValue (toStrand, spp->strand); + if (strand_orig != spp->strand) { + rval = TRUE; + } + } + } + break; + case SEQLOC_FEAT : + break; + default : + break; + } + slp = slp->next; + } + return rval; +} + + +static Boolean ApplyEditLocationStrandToSeqFeat (EditLocationStrandPtr edit, SeqFeatPtr sfp) +{ + Boolean rval = FALSE; + + if (edit == NULL || sfp == NULL) { + return FALSE; + } + + rval = ConvertLocationStrand (sfp->location, edit->strand_from, edit->strand_to); + return rval; +} + + +static Boolean At5EndOfSequence (SeqLocPtr slp, BioseqPtr bsp) +{ + Uint1 strand; + Int4 start; + Boolean at_end = FALSE; + + if (slp == NULL || bsp == NULL) return FALSE; + + strand = SeqLocStrand (slp); + + if (strand == Seq_strand_minus) { + start = SeqLocStop (slp); + if (start == bsp->length - 1) { + at_end = TRUE; + } + } else { + start = SeqLocStart (slp); + if (start == 0) { + at_end = TRUE; + } + } + return at_end; +} + + +static Boolean HasGoodStartCodon (SeqFeatPtr sfp) +{ + ByteStorePtr bs; + CharPtr prot; + Boolean has_start = FALSE; + + if (sfp != NULL && sfp->data.choice == SEQFEAT_CDREGION) { + bs = ProteinFromCdRegionEx (sfp, TRUE, FALSE); + if (bs != NULL) { + prot = BSMerge (bs, NULL); + bs = BSFree (bs); + if (prot != NULL && *prot == 'M') { + has_start = TRUE; + } + prot = MemFree (prot); + } + } + return has_start; +} + + +static Boolean ApplyPartial5SetActionToSeqFeat (Partial5SetActionPtr action, SeqFeatPtr sfp) +{ + Boolean rval = FALSE; + Boolean make_partial = FALSE; + Uint1 strand; + BioseqPtr bsp; + CdRegionPtr crp; + Boolean partial5, partial3; + + if (action == NULL || sfp == NULL) return FALSE; + bsp = BioseqFindFromSeqLoc (sfp->location); + strand = SeqLocStrand (sfp->location); + + switch (action->constraint) { + case Partial_5_set_constraint_all: + make_partial = TRUE; + break; + case Partial_5_set_constraint_at_end: + make_partial = At5EndOfSequence (sfp->location, bsp); + break; + case Partial_5_set_constraint_bad_start: + make_partial = HasGoodStartCodon (sfp); + break; + case Partial_5_set_constraint_frame_not_one: + if (sfp->data.choice == SEQFEAT_CDREGION + && (crp = sfp->data.value.ptrvalue) != NULL + && crp->frame != 0 && crp->frame != 1) { + make_partial = TRUE; + } + break; + } + + if (make_partial) { + CheckSeqLocForPartial (sfp->location, &partial5, &partial3); + if (!partial5) { + SetSeqLocPartial (sfp->location, TRUE, partial3); + if (action->extend && bsp != NULL) { + if (ExtendSeqLocToEnd (sfp->location, bsp, TRUE)) { + ChooseBestFrame (sfp); + } + } + rval = TRUE; + } + } + return rval; +} + + +static Boolean ApplyClear5PartialToSeqFeat (Int4 action, SeqFeatPtr sfp) +{ + Boolean rval = FALSE, clear_partial = FALSE; + Boolean partial5, partial3; + + if (sfp == NULL) return FALSE; + + switch (action) { + case Partial_5_clear_constraint_all: + clear_partial = TRUE; + break; + case Partial_5_clear_constraint_not_at_end: + clear_partial = !At5EndOfSequence(sfp->location, BioseqFindFromSeqLoc (sfp->location)); + break; + case Partial_5_clear_constraint_good_start: + clear_partial = !HasGoodStartCodon(sfp); + break; + } + if (clear_partial) { + CheckSeqLocForPartial (sfp->location, &partial5, &partial3); + if (partial5) { + SetSeqLocPartial (sfp->location, FALSE, partial3); + rval = TRUE; + } + } + return rval; +} + + +static Boolean At3EndOfSequence (SeqLocPtr slp, BioseqPtr bsp) +{ + Uint1 strand; + Int4 stop; + Boolean at_end = FALSE; + + if (slp == NULL || bsp == NULL) return FALSE; + + strand = SeqLocStrand (slp); + + if (strand == Seq_strand_minus) { + stop = SeqLocStart (slp); + if (stop == 0) { + at_end = TRUE; + } + } else { + stop = SeqLocStop (slp); + if (stop == bsp->length - 1) { + at_end = TRUE; + } + } + return at_end; +} + + +static Boolean HasGoodStopCodon (SeqFeatPtr sfp) +{ + ByteStorePtr bs; + CharPtr prot; + Boolean has_stop = FALSE; + + if (sfp != NULL && sfp->data.choice == SEQFEAT_CDREGION) { + bs = ProteinFromCdRegionEx (sfp, TRUE, FALSE); + if (bs != NULL) { + prot = BSMerge (bs, NULL); + bs = BSFree (bs); + if (prot != NULL && prot[StringLen (prot) - 1] == '*') { + has_stop = TRUE; + } + prot = MemFree (prot); + } + } + return has_stop; +} + + +static Boolean ApplyPartial3SetActionToSeqFeat (Partial3SetActionPtr action, SeqFeatPtr sfp) +{ + Boolean rval = FALSE; + Boolean make_partial = FALSE; + Uint1 strand; + BioseqPtr bsp; + Boolean partial5, partial3; + + if (action == NULL || sfp == NULL) return FALSE; + bsp = BioseqFindFromSeqLoc (sfp->location); + strand = SeqLocStrand (sfp->location); + + switch (action->constraint) { + case Partial_3_set_constraint_all: + make_partial = TRUE; + break; + case Partial_3_set_constraint_at_end: + make_partial = At3EndOfSequence (sfp->location, bsp); + break; + case Partial_3_set_constraint_bad_end: + make_partial = HasGoodStopCodon (sfp); + break; + } + + if (make_partial) { + CheckSeqLocForPartial (sfp->location, &partial5, &partial3); + if (!partial3) { + SetSeqLocPartial (sfp->location, partial5, TRUE); + if (action->extend && bsp != NULL) { + ExtendSeqLocToEnd (sfp->location, bsp, FALSE); + } + rval = TRUE; + } + } + return rval; +} + + +static Boolean ApplyClear3PartialToSeqFeat (Int4 action, SeqFeatPtr sfp) +{ + Boolean rval = FALSE, clear_partial = FALSE; + Boolean partial5, partial3; + + if (sfp == NULL) return FALSE; + + switch (action) { + case Partial_3_clear_constraint_all: + clear_partial = TRUE; + break; + case Partial_3_clear_constraint_not_at_end: + clear_partial = !At3EndOfSequence(sfp->location, BioseqFindFromSeqLoc (sfp->location)); + break; + case Partial_3_clear_constraint_good_end: + clear_partial = !HasGoodStopCodon(sfp); + break; + } + if (clear_partial) { + CheckSeqLocForPartial (sfp->location, &partial5, &partial3); + if (partial3) { + SetSeqLocPartial (sfp->location, partial5, FALSE); + rval = TRUE; + } + } + return rval; +} + + +static Boolean ApplyPartialBothSetActionToSeqFeat (PartialBothSetActionPtr action, SeqFeatPtr sfp) +{ + Boolean rval = FALSE; + Boolean make_partial = FALSE; + Uint1 strand; + BioseqPtr bsp; + Boolean partial5, partial3; + + if (action == NULL || sfp == NULL) return FALSE; + bsp = BioseqFindFromSeqLoc (sfp->location); + strand = SeqLocStrand (sfp->location); + + switch (action->constraint) { + case Partial_both_set_constraint_all: + make_partial = TRUE; + break; + case Partial_both_set_constraint_at_end: + make_partial = At5EndOfSequence (sfp->location, bsp) && At3EndOfSequence (sfp->location, bsp); + break; + } + + if (make_partial) { + CheckSeqLocForPartial (sfp->location, &partial5, &partial3); + if (!partial5 || !partial3) { + SetSeqLocPartial (sfp->location, TRUE, TRUE); + if (action->extend && bsp != NULL) { + ExtendSeqLocToEnd (sfp->location, bsp, FALSE); + if (ExtendSeqLocToEnd (sfp->location, bsp, TRUE)) { + ChooseBestFrame (sfp); + } + } + rval = TRUE; + } + } + return rval; +} + + +static Boolean ApplyClearBothPartialToSeqFeat (Int4 action, SeqFeatPtr sfp) +{ + Boolean rval = FALSE, clear_partial = FALSE; + Boolean partial5, partial3; + BioseqPtr bsp; + + if (sfp == NULL) return FALSE; + + switch (action) { + case Partial_both_clear_constraint_all: + clear_partial = TRUE; + break; + case Partial_both_clear_constraint_not_at_end: + bsp = BioseqFindFromSeqLoc (sfp->location); + clear_partial = !At5EndOfSequence (sfp->location, bsp) && !At3EndOfSequence(sfp->location, bsp); + break; + case Partial_3_clear_constraint_good_end: + clear_partial = !HasGoodStopCodon(sfp); + break; + } + if (clear_partial) { + CheckSeqLocForPartial (sfp->location, &partial5, &partial3); + if (partial5 || partial3) { + SetSeqLocPartial (sfp->location, FALSE, FALSE); + rval = TRUE; + } + } + return rval; +} + + +static Boolean ApplyConvertLocationToSeqFeat (Int4 convert_location, SeqFeatPtr sfp) +{ + Boolean hasNulls, rval = FALSE; + SeqLocPtr slp; + BioseqPtr bsp; + Boolean partial5, partial3; + + if (sfp == NULL || (bsp = BioseqFindFromSeqLoc (sfp->location))== NULL) { + return FALSE; + } + + CheckSeqLocForPartial (sfp->location, &partial5, &partial3); + hasNulls = LocationHasNullsBetween (sfp->location); + switch (convert_location) + { + case Convert_location_type_join : + if (hasNulls) + { + slp = SeqLocMerge (bsp, sfp->location, NULL, FALSE, FALSE, FALSE); + sfp->location = SeqLocFree (sfp->location); + sfp->location = slp; + if (bsp->repr == Seq_repr_seg) + { + slp = SegLocToPartsEx (bsp, sfp->location, FALSE); + sfp->location = SeqLocFree (sfp->location); + sfp->location = slp; + hasNulls = LocationHasNullsBetween (sfp->location); + sfp->partial = (sfp->partial || hasNulls); + } + FreeAllFuzz (sfp->location); + SetSeqLocPartial (sfp->location, partial5, partial3); + rval = TRUE; + } + break; + case Convert_location_type_order : + if (!hasNulls) + { + slp = SeqLocMerge (bsp, sfp->location, NULL, FALSE, FALSE, TRUE); + sfp->location = SeqLocFree (sfp->location); + sfp->location = slp; + if (bsp->repr == Seq_repr_seg) + { + slp = SegLocToPartsEx (bsp, sfp->location, TRUE); + sfp->location = SeqLocFree (sfp->location); + sfp->location = slp; + hasNulls = LocationHasNullsBetween (sfp->location); + sfp->partial = (sfp->partial || hasNulls); + } + FreeAllFuzz (sfp->location); + SetSeqLocPartial (sfp->location, partial5, partial3); + rval = TRUE; + } + break; + case Convert_location_type_merge : + if (sfp->location->choice != SEQLOC_INT) { + slp = SeqLocMerge (bsp, sfp->location, NULL, TRUE, FALSE, FALSE); + sfp->location = SeqLocFree (sfp->location); + sfp->location = slp; + SetSeqLocPartial (sfp->location, partial5, partial3); + rval = TRUE; + } + default: + break; + } + return rval; +} + + +static Boolean ExtendSeqFeat5 (SeqFeatPtr sfp) +{ + BioseqPtr bsp; + CdRegionPtr crp; + Int4 start_diff; + Boolean partial5, partial3; + + if (sfp == NULL || (bsp = BioseqFindFromSeqLoc (sfp->location)) == NULL) + { + return FALSE; + } + + if ((start_diff = ExtendSeqLocToEnd (sfp->location, bsp, TRUE)) > 0) + { + if (sfp->data.choice == SEQFEAT_CDREGION) { + CheckSeqLocForPartial (sfp->location, &partial5, &partial3); + if (partial5) { + crp = (CdRegionPtr) sfp->data.value.ptrvalue; + if (crp != NULL) { + if (crp->frame == 0) { + crp->frame = 1; + } + crp->frame = (crp->frame + start_diff - 1) % 3 + 1; + } + } + } + return TRUE; + } + else + { + return FALSE; + } +} + + +static Boolean ExtendSeqFeat3 (SeqFeatPtr sfp) +{ + BioseqPtr bsp; + Uint1 strand; + Int4 stop_before, stop_after; + + if (sfp == NULL || (bsp = BioseqFindFromSeqLoc (sfp->location)) == NULL) + { + return FALSE; + } + strand = SeqLocStrand (sfp->location); + if (strand == Seq_strand_minus) { + stop_before = SeqLocStart (sfp->location); + } else { + stop_before = SeqLocStop (sfp->location); + } + ExtendSeqLocToEnd (sfp->location, bsp, FALSE); + if (strand == Seq_strand_minus) { + stop_after = SeqLocStart (sfp->location); + } else { + stop_after = SeqLocStop (sfp->location); + } + if (stop_before == stop_after) + { + return FALSE; + } else { + return TRUE; + } +} + + +static Boolean ApplyLocationEditTypeToSeqFeat (ValNodePtr action, SeqFeatPtr sfp) +{ + Boolean rval = FALSE; + + if (action == NULL || sfp == NULL) { + return FALSE; + } + + switch (action->choice) { + case LocationEditType_strand: + rval = ApplyEditLocationStrandToSeqFeat (action->data.ptrvalue, sfp); + break; + case LocationEditType_set_5_partial: + rval = ApplyPartial5SetActionToSeqFeat (action->data.ptrvalue, sfp); + break; + case LocationEditType_clear_5_partial: + rval = ApplyClear5PartialToSeqFeat (action->data.intvalue, sfp); + break; + case LocationEditType_set_3_partial: + rval = ApplyPartial3SetActionToSeqFeat (action->data.ptrvalue, sfp); + break; + case LocationEditType_clear_3_partial: + rval = ApplyClear3PartialToSeqFeat (action->data.intvalue, sfp); + break; + case LocationEditType_set_both_partial: + rval = ApplyPartialBothSetActionToSeqFeat (action->data.ptrvalue, sfp); + break; + case LocationEditType_clear_both_partial: + rval = ApplyClearBothPartialToSeqFeat (action->data.intvalue, sfp); + break; + case LocationEditType_convert: + rval = ApplyConvertLocationToSeqFeat (action->data.intvalue, sfp); + break; + case LocationEditType_extend_5: + rval = ExtendSeqFeat5 (sfp); + break; + case LocationEditType_extend_3: + rval = ExtendSeqFeat3 (sfp); + break; + } + return rval; +} + + +static Int4 ApplyEditFeatureLocationActionToSeqEntry (EditFeatureLocationActionPtr action, SeqEntryPtr sep, FILE *log_fp) +{ + ConvertAndRemoveFeatureCollectionData d; + ValNodePtr vnp; + SeqFeatPtr sfp; + Int4 num_affected = 0; + /* variables for logging */ + CharPtr old_loc = NULL, new_loc; + Boolean retranslated; + + if (action == NULL) return 0; + + d.featdef = GetFeatdefFromFeatureType (action->type); + d.constraint_set = action->constraint; + d.feature_list = NULL; + + VisitFeaturesInSep (sep, &d, ConvertAndRemoveFeatureCollectionCallback); + for (vnp = d.feature_list; vnp != NULL; vnp = vnp->next) { + sfp = vnp->data.ptrvalue; + if (sfp != NULL) { + if (log_fp != NULL) { + old_loc = SeqLocPrintUseBestID (sfp->location); + } + if (ApplyLocationEditTypeToSeqFeat (action->action, sfp)) { + retranslated = FALSE; + if (sfp->data.choice == SEQFEAT_CDREGION && action->retranslate_cds) { + retranslated = RetranslateOneCDS (sfp, sfp->idx.entityID, TRUE, TRUE); + } + num_affected++; + if (log_fp != NULL) { + new_loc = SeqLocPrintUseBestID (sfp->location); + fprintf (log_fp, "Changed location %s to %s%s\n", old_loc, new_loc, retranslated ? " and retranslated protein" : ""); + new_loc = MemFree (new_loc); + } + } + old_loc = MemFree (old_loc); + } + } + return num_affected; +} + + +typedef struct molinfoblocklog { + MolinfoBlockPtr mib; + FILE *log_fp; + Boolean any_change; +} MolInfoBlockLogData, PNTR MolInfoBlockLogPtr; + +static void ApplyMolinfoBlockCallback (BioseqPtr bsp, Pointer data) +{ + MolInfoBlockLogPtr ml; + MolinfoBlockPtr mib; + ValNodePtr field; + MolInfoPtr mip; + Char id_buf[100]; + CharPtr field_name; + + if (bsp == NULL) { + return; + } + + ml = (MolInfoBlockLogPtr) data; + if (ml == NULL || ml->mib == NULL) { + return; + } + mib = ml->mib; + + if (!DoesObjectMatchConstraintChoiceSet (OBJ_BIOSEQ, bsp, mib->constraint)) { + return; + } + + mip = GetMolInfoForBioseq (bsp); + + for (field = mib->from_list; field != NULL; field = field->next) { + switch (field->choice) { + case MolinfoField_molecule: + if (mip == NULL || mip->biomol != BiomolFromMoleculeType (field->data.intvalue)) { + return; + } + break; + case MolinfoField_technique: + if (mip == NULL || mip->tech != TechFromTechniqueType (field->data.intvalue)) { + return; + } + break; + case MolinfoField_completedness: + if (mip == NULL || mip->completeness != CompletenessFromCompletednessType (field->data.intvalue)) { + return; + } + break; + case MolinfoField_mol_class: + if (bsp->mol != MolFromMoleculeClassType (field->data.intvalue)) { + return; + } + break; + case MolinfoField_topology: + if (bsp->topology != TopologyFromTopologyType (field->data.intvalue)) { + return; + } + break; + case MolinfoField_strand: + if (bsp->strand != StrandFromStrandType (field->data.intvalue)) { + return; + } + break; + } + } + + + for (field = mib->to_list; field != NULL; field = field->next) { + if (SetSequenceQualOnBioseq (bsp, field)) { + if (ml->log_fp != NULL) { + SeqIdWrite (SeqIdFindBest (bsp->id, SEQID_GENBANK), id_buf, PRINTID_REPORT, sizeof (id_buf) - 1); + field_name = GetSequenceQualName (field); + fprintf (ml->log_fp, "Changed to %s for %s\n", field_name, id_buf); + field_name = MemFree (field_name); + } + ml->any_change = TRUE; + } + } +} + + +static Boolean ApplyMolinfoBlockToSeqEntryEx (SeqEntryPtr sep, MolinfoBlockPtr mib, FILE *log_fp) +{ + MolInfoBlockLogData md; + + md.any_change = FALSE; + md.log_fp = log_fp; + md.mib = mib; + + VisitBioseqsInSep (sep, &md, ApplyMolinfoBlockCallback); + return md.any_change; +} + + +NLM_EXTERN void ApplyMolinfoBlockToSeqEntry (SeqEntryPtr sep, MolinfoBlockPtr mib) +{ + ApplyMolinfoBlockToSeqEntryEx (sep, mib, NULL); +} + + +static Boolean ApplyFixPubCapsToSeqEntry (FixPubCapsActionPtr action, SeqEntryPtr sep, FILE *log_fp); + +static Boolean ApplyFixCapsToSeqEntry (SeqEntryPtr sep, FixCapsActionPtr action, FILE *log_fp) +{ + Boolean any_change = FALSE; + + if (sep == NULL || action == NULL) { + return FALSE; + } + + switch (action->choice) { + case FixCapsAction_pub: + any_change = ApplyFixPubCapsToSeqEntry (action->data.ptrvalue, sep, log_fp); + break; + case FixCapsAction_src_country: + any_change = FixupCountryQualsWithLog (sep, FALSE, log_fp); + break; + case FixCapsAction_mouse_strain: + any_change = FixupMouseStrains (sep, log_fp); + break; + case FixCapsAction_src_qual: + any_change = FixSrcQualCaps (sep, action->data.intvalue, log_fp); + break; + } + + return any_change; +} + + +static void FixCollectionDatesCallback (BioSourcePtr biop, Pointer data) +{ + LogInfoPtr lip; + SubSourcePtr ssp; + CharPtr new_date; + + if (biop == NULL) { + return; + } + + lip = (LogInfoPtr) data; + + for (ssp = biop->subtype; ssp != NULL; ssp = ssp->next) { + if (ssp->subtype == SUBSRC_collection_date) { + new_date = ReformatDateWithMonthNames (ssp->name); + if (new_date != NULL && StringCmp (new_date, ssp->name) != 0) { + if (lip != NULL) { + if (lip->fp != NULL) { + fprintf (lip->fp, "Changed '%s' to '%s'\n", ssp->name, new_date); + } + lip->data_in_log = TRUE; + } + ssp->name = MemFree (ssp->name); + ssp->name = new_date; + new_date = NULL; + } + new_date = MemFree (new_date); + } + } +} + + +NLM_EXTERN SubSourcePtr FindBadLatLon (BioSourcePtr biop) +{ + SubSourcePtr ssp, ssp_bad = NULL; + Boolean format_ok, lat_in_range, lon_in_range; + + if (biop == NULL) + { + return NULL; + } + + for (ssp = biop->subtype; ssp != NULL && ssp_bad == NULL; ssp = ssp->next) + { + if (ssp->subtype == SUBSRC_lat_lon) + { + IsCorrectLatLonFormat (ssp->name, &format_ok, &lat_in_range, &lon_in_range); + if (!format_ok || !lat_in_range || !lon_in_range) + { + ssp_bad = ssp; + } + } + } + return ssp_bad; +} + + +static void FindBadLatLonDesc (SeqDescrPtr sdp, Pointer userdata) +{ + if (sdp == NULL || sdp->choice != Seq_descr_source || userdata == NULL) + { + return; + } + if (FindBadLatLon (sdp->data.ptrvalue) != NULL) + { + ValNodeAddPointer ((ValNodePtr PNTR) userdata, OBJ_SEQDESC, sdp); + } +} + + +static void FindBadLatLonFeat (SeqFeatPtr sfp, Pointer userdata) +{ + if (sfp == NULL || sfp->data.choice != SEQFEAT_BIOSRC || userdata == NULL) + { + return; + } + if (FindBadLatLon (sfp->data.value.ptrvalue) != NULL) + { + ValNodeAddPointer ((ValNodePtr PNTR) userdata, OBJ_SEQFEAT, sfp); + } +} + + +NLM_EXTERN ValNodePtr FindBadLatLonObjects (SeqEntryPtr sep) +{ + ValNodePtr list = NULL; + + VisitDescriptorsInSep (sep, &list, FindBadLatLonDesc); + VisitFeaturesInSep (sep, &list, FindBadLatLonFeat); + return list; +} + + +static void AddAltitudeToSubSourceNote (BioSourcePtr biop, CharPtr extra_text) +{ + SubSourcePtr ssp; + CharPtr new_note, new_note_fmt = "%s%saltitude:%s"; + + if (biop == NULL || StringHasNoText (extra_text)) + { + return; + } + + ssp = biop->subtype; + while (ssp != NULL && ssp->subtype != SUBSRC_other) + { + ssp = ssp->next; + } + if (ssp == NULL) + { + ssp = SubSourceNew (); + ssp->subtype = SUBSRC_other; + ssp->next = biop->subtype; + biop->subtype = ssp; + } + new_note = (CharPtr) MemNew (sizeof (Char) * (StringLen (ssp->name) + + StringLen (extra_text) + + StringLen (new_note_fmt))); + sprintf (new_note, new_note_fmt, ssp->name == NULL ? "" : ssp->name, + ssp->name == NULL ? "" : "; ", + extra_text); + ssp->name = MemFree (ssp->name); + ssp->name = new_note; +} + + +NLM_EXTERN Boolean LatLonAutocorrectList (FILE *fp, ValNodePtr object_list) +{ + ValNodePtr vnp; + SeqDescrPtr sdp; + BioSourcePtr biop; + SubSourcePtr bad_ssp; + CharPtr fix, extra_text; + Boolean any_change = FALSE; + + if (fp == NULL || object_list == NULL) return FALSE; + + for (vnp = object_list; vnp != NULL; vnp = vnp->next) + { + if (vnp->choice != OBJ_SEQDESC) continue; + sdp = vnp->data.ptrvalue; + if (sdp != NULL && sdp->choice == Seq_descr_source) + { + biop = (BioSourcePtr) sdp->data.ptrvalue; + bad_ssp = FindBadLatLon (biop); + if (bad_ssp != NULL) + { + fix = FixLatLonFormat (bad_ssp->name); + if (fix != NULL) + { + extra_text = StringChr (fix, ','); + if (extra_text != NULL) + { + *extra_text = 0; + extra_text++; + while (isspace (*extra_text)) + { + extra_text++; + } + } + fprintf (fp, "Corrected %s to %s\n", bad_ssp->name, fix); + bad_ssp->name = MemFree (bad_ssp->name); + bad_ssp->name = fix; + if (extra_text != NULL) + { + AddAltitudeToSubSourceNote (biop, extra_text); + fprintf (fp, "Moved %s to subsource note\n", extra_text); + } + any_change = TRUE; + } + else + { + fprintf (fp, "Unable to correct %s\n", bad_ssp->name); + } + } + } + } + return any_change; +} + + +static void ReplaceiInSeq (CharPtr PNTR seq, LogInfoPtr lip) +{ + CharPtr cp, new_seq, src, dst; + Int4 num_i = 0, num_extra = 0; + + if (seq == NULL) { + return; + } + + cp = StringISearch (*seq, "i"); + while (cp != NULL) { + if (cp == *seq || *(cp - 1) != '<') { + num_extra++; + } + if (*(cp + 1) != '>') { + num_extra++; + } + num_i++; + cp = StringISearch (cp + 1, "i"); + } + + if (num_extra != 0) { + new_seq = (CharPtr) MemNew (sizeof (Char) * (StringLen (*seq) + 1 + num_extra)); + src = *seq; + dst = new_seq; + while (*src != 0) { + if (*src == 'i' || *src == 'I') { + if (src == *seq || *(src - 1) != '<') { + *dst = '<'; + dst++; + } + *dst = 'i'; + dst++; + if (*(src + 1) != '>') { + *dst = '>'; + dst++; + } + } else { + *dst = *src; + dst++; + } + src++; + } + *dst = 0; + if (lip != NULL) { + if (lip->fp != NULL) { + fprintf (lip->fp, "Changed primer sequence from '%s' to '%s'\n", *seq, new_seq); + } + lip->data_in_log = TRUE; + } + + *seq = MemFree (*seq); + *seq = new_seq; + } +} + + +NLM_EXTERN void FixiPCRPrimerSeqsCallback (BioSourcePtr biop, Pointer data) +{ + PCRReactionSetPtr ps; + PCRPrimerPtr p; + LogInfoPtr lip; + + if (biop == NULL) { + return; + } + lip = (LogInfoPtr) data; + + for (ps = biop->pcr_primers; ps != NULL; ps = ps->next) { + for (p = ps->forward; p != NULL; p = p->next) { + ReplaceiInSeq (&(p->seq), lip); + } + for (p = ps->reverse; p != NULL; p = p->next) { + ReplaceiInSeq (&(p->seq), lip); + } + } +} + + +typedef struct fixproteinnameformat { + Boolean any_change; + FILE *fp; + ValNodePtr orgnames; +} FixProteinNameFormatData, PNTR FixProteinNameFormatPtr; + + +static void FixProteinNameFormatCallback (SeqFeatPtr sfp, Pointer data) +{ + FixProteinNameFormatPtr f; + ProtRefPtr prp; + ValNodePtr vnp_n, vnp_p; + CharPtr cp; + Int4 len; + + if (sfp == NULL || sfp->data.choice != SEQFEAT_PROT || (prp = (ProtRefPtr) sfp->data.value.ptrvalue) == NULL + || (f = (FixProteinNameFormatPtr) data) == NULL) { + return; + } + + for (vnp_n = f->orgnames; vnp_n != NULL; vnp_n = vnp_n->next) { + for (vnp_p = prp->name; vnp_p != NULL; vnp_p = vnp_p->next) { + if ((cp = StringISearch (vnp_p->data.ptrvalue, vnp_n->data.ptrvalue)) != NULL) { + len = StringLen (vnp_n->data.ptrvalue); + if (cp != vnp_p->data.ptrvalue + && ((*(cp - 1) == '(' && *(cp + len) == ')') || (*(cp - 1) == '[' && *(cp + len) == ']'))) { + cp--; + len+= 2; + } + if (*(cp + len) == 0 && isspace (*(cp - 1))) { + *(cp - 1) = 0; + f->any_change = TRUE; + if (f->fp != NULL) { + fprintf (f->fp, "Removed '%s' from protein name (now '%s')\n", (CharPtr) vnp_n->data.ptrvalue, (CharPtr) vnp_p->data.ptrvalue); + } + } else { + if (isspace (*(cp + len))) { + len ++; + } + StringCpy (cp, cp + len); + f->any_change = TRUE; + if (f->fp != NULL) { + fprintf (f->fp, "Removed '%s' from protein name (now '%s')\n", (CharPtr) vnp_n->data.ptrvalue, (CharPtr) vnp_p->data.ptrvalue); + } + } + } + } + } +} + + +static Boolean ApplyFixFormatToSeqEntry (SeqEntryPtr sep, FixFormatActionPtr action, FILE *log_fp) +{ + LogInfoData lid; + FixProteinNameFormatData protformat; + ValNodePtr list; + + + if (sep == NULL || action == NULL) { + return FALSE; + } + + MemSet (&lid, 0, sizeof (LogInfoData)); + lid.fp = log_fp; + + switch (action->choice) { + case FixFormatAction_collection_date: + VisitBioSourcesInSep (sep, &lid, FixCollectionDatesCallback); + break; + case FixFormatAction_lat_lon: + list = FindBadLatLonObjects (sep); + lid.data_in_log = LatLonAutocorrectList (lid.fp, list); + list = FreeObjectList (list); + break; + case FixFormatAction_primers: + VisitBioSourcesInSep (sep, &lid, FixiPCRPrimerSeqsCallback); + break; + case FixFormatAction_protein_name: + MemSet (&protformat, 0, sizeof (FixProteinNameFormatData)); + protformat.fp = log_fp; + VisitBioSourcesInSep (sep, &(protformat.orgnames), GetOrgNamesInRecordCallback); + VisitFeaturesInSep (sep, &protformat, FixProteinNameFormatCallback); + protformat.orgnames = ValNodeFree (protformat.orgnames); + lid.data_in_log = protformat.any_change; + break; + } + return lid.data_in_log; +} + + +typedef struct replacepair { + CharPtr find; + CharPtr replace; +} ReplacePairData, PNTR ReplacePairPtr; + +static ReplacePairData macro_spell_fixes[] = { + {"univeristy", "University" }, + {"univerisity", "University" }, + {"univercity", "University" }, + {"uiniversity", "University" }, + {"uinversity", "University" }, + {"univesity", "University" }, + {"uviversity", "University" }, + {"universtiy", "University" }, + {"protien", "protein" }, + {"Insitiute", "Institute" }, + {"Instutite", "Institute" }, + {"instute", "Institute" }, + {"institue", "Institute" }, + {"insitute", "Institute" }, + {"insititute","Institute" }, + {NULL, NULL}}; + + +static void SetFlagWhenChanged (Uint2 entityID, Uint4 itemID, Uint2 itemtype, Pointer userdata) +{ + BoolPtr flag; + + if ((flag = (BoolPtr) userdata) != NULL) { + *flag = TRUE; + } +} + + +static Boolean SpellFixSeqEntry (SeqEntryPtr sep, Pointer data, FILE *log_fp) +{ + Boolean any_changes = FALSE, this_change; + Uint2 entityID; + Int4 i; + + entityID = ObjMgrGetEntityIDForChoice (sep); + for (i = 0; macro_spell_fixes[i].find != NULL; i++) { + this_change = FALSE; + FindReplaceInEntity (entityID, macro_spell_fixes[i].find, macro_spell_fixes[i].replace, FALSE, TRUE, TRUE, + FALSE, 0, NULL, NULL, NULL, FALSE, SetFlagWhenChanged, &this_change); + if (this_change) { + if (log_fp != NULL) { + fprintf (log_fp, "Replaced '%s' with '%s'\n", macro_spell_fixes[i].find, macro_spell_fixes[i].replace); + } + any_changes = TRUE; + } + } + return any_changes; +} + + +typedef struct descriptortypename { + Int4 descriptortype; + Uint1 descriptor_choice; + CharPtr descriptorname; +} DescriptorTypeNameData, PNTR DescriptorTypeNamePtr; + +static DescriptorTypeNameData descriptortypename[] = { + { Descriptor_type_all , 0 , "Any" } , + { Descriptor_type_title , Seq_descr_title , "Title" } , + { Descriptor_type_source , Seq_descr_source , "Source" } , + { Descriptor_type_publication , Seq_descr_pub , "Publication" } , + { Descriptor_type_comment , Seq_descr_comment , "Comment" } , + { Descriptor_type_genbank , Seq_descr_genbank , "GenBank" } , + { Descriptor_type_user , Seq_descr_user , "User" } , + { Descriptor_type_create_date , Seq_descr_create_date , "CreateDate" } , + { Descriptor_type_update_date , Seq_descr_update_date , "UpdateDate" } , + { Descriptor_type_mol_info , Seq_descr_molinfo , "MolInfo" } , + { Descriptor_type_structured_comment , Seq_descr_user , "StructuredComment" } , + { Descriptor_type_genome_project_id , Seq_descr_user , "GenomeProjectID" } +}; + +#define NUM_descriptortypename sizeof (descriptortypename) / sizeof (DescriptorTypeNameData) + +static Int4 GetDescriptorTypeFromDescriptorChoice (Uint1 descriptor_choice) +{ + Int4 i; + + for (i = 0; i < NUM_descriptortypename; i++) { + if (descriptor_choice == descriptortypename[i].descriptor_choice) { + return descriptortypename[i].descriptortype; + } + } + return -1; +} + + +static Uint1 GetDescriptorChoiceFromDescriptorType (Int4 descriptortype) +{ + Int4 i; + + for (i = 0; i < NUM_descriptortypename; i++) { + if (descriptortype == descriptortypename[i].descriptortype) { + return descriptortypename[i].descriptor_choice; + } + } + return SEQDESCR_MAX; +} + + +NLM_EXTERN CharPtr GetDescriptorNameFromDescriptorType (Int4 descriptortype) +{ + CharPtr str = NULL; + Int4 i; + + for (i = 0; i < NUM_descriptortypename && str == NULL; i++) { + if (descriptortype == descriptortypename[i].descriptortype) { + str = descriptortypename[descriptortype].descriptorname; + } + } + if (str == NULL) { + str = "Unknown descriptor type"; + } + return str; +} + + +NLM_EXTERN void AddAllDescriptorsToChoiceList (ValNodePtr PNTR descriptor_type_list) +{ + Int4 i; + ValNodePtr tmp_list = NULL; + + for (i = 0; i < NUM_descriptortypename; i++) { + ValNodeAddPointer (&tmp_list, descriptortypename[i].descriptortype, StringSave (descriptortypename[i].descriptorname)); + } + tmp_list = ValNodeSort (tmp_list, SortVnpByString); + ValNodeLink (descriptor_type_list, tmp_list); +} + + + +static Boolean DoesDescriptorMatchType (SeqDescrPtr sdp, Int4 descriptortype) +{ + Uint1 descriptorchoice; + UserObjectPtr uop; + + if (sdp == NULL) { + return FALSE; + } else if (descriptortype == Descriptor_type_all) { + return TRUE; + } else if ((descriptorchoice = GetDescriptorChoiceFromDescriptorType (descriptortype)) == SEQDESCR_MAX) { + return FALSE; + } else if (descriptorchoice != sdp->choice) { + return FALSE; + } else if (descriptortype == Descriptor_type_structured_comment) { + if (sdp->choice == Seq_descr_user + && ((uop = (UserObjectPtr) sdp->data.ptrvalue) == NULL + || uop->type == NULL + || StringCmp (uop->type->str, "StructuredComment") != 0)) { + return FALSE; + } else { + return TRUE; + } + } else if (descriptortype == Descriptor_type_genome_project_id) { + if (sdp->choice == Seq_descr_user + && ((uop = (UserObjectPtr) sdp->data.ptrvalue) == NULL + || uop->type == NULL + || StringCmp (uop->type->str, "GenomeProjectsDB") != 0)) { + return FALSE; + } else { + return TRUE; + } + + } else { + return TRUE; + } +} + + +typedef struct removedescriptoractioncollection { + RemoveDescriptorActionPtr action; + ValNodePtr obj_list; +} RemoveDescriptorActionCollectionData, PNTR RemoveDescriptorActionCollectionPtr; + + +static void RemoveDescriptorCollectionCallback (SeqDescrPtr sdp, Pointer data) +{ + RemoveDescriptorActionCollectionPtr d; + + if (sdp == NULL || (d = (RemoveDescriptorActionCollectionPtr) data) == NULL + || d->action == NULL) { + return; + } + + if (DoesDescriptorMatchType (sdp, d->action->type) + && DoesObjectMatchConstraintChoiceSet (OBJ_SEQDESC, sdp, d->action->constraint)) { + ValNodeAddPointer (&(d->obj_list), OBJ_SEQDESC, sdp); + } +} + + +static Int4 ApplyRemoveDescriptorActionToSeqEntry (RemoveDescriptorActionPtr action, SeqEntryPtr sep) +{ + RemoveDescriptorActionCollectionData d; + SeqDescrPtr sdp; + ObjValNodePtr ovp; + ValNodePtr vnp; + Int4 num_deleted = 0; + + if (action == NULL) return 0; + + d.action = action; + d.obj_list = NULL; + + VisitDescriptorsInSep (sep, &d, RemoveDescriptorCollectionCallback); + for (vnp = d.obj_list; vnp != NULL; vnp = vnp->next) { + sdp = vnp->data.ptrvalue; + if (sdp != NULL && sdp->extended != 0) { + ovp = (ObjValNodePtr) sdp; + ovp->idx.deleteme = TRUE; + num_deleted ++; + } + } + DeleteMarkedObjects (ObjMgrGetEntityIDForChoice(sep), 0, NULL); + return num_deleted; +} + + +static DefLineType DefLineTypeFromAutodefListType(Uint2 list_type) +{ + DefLineType deflinetype = DEFLINE_USE_FEATURES; + + switch (list_type) { + case Autodef_list_type_feature_list: + deflinetype = DEFLINE_USE_FEATURES; + break; + case Autodef_list_type_complete_sequence: + deflinetype = DEFLINE_COMPLETE_SEQUENCE; + break; + case Autodef_list_type_complete_genome: + deflinetype = DEFLINE_COMPLETE_GENOME; + break; + } + return deflinetype; +} + + +static void ApplyAutodefActionToSeqEntry (AutodefActionPtr action, SeqEntryPtr sep) +{ + OrganismDescriptionModifiers od; + ModifierItemLocalPtr modList; + DeflineFeatureRequestList dfrl; + ValNodePtr vnp, modifier_indices = NULL; + ValNode field_type, source_qual_choice; + Uint4 i; + Int4 defline_pos; + + InitOrganismDescriptionModifiers (&od, NULL); + od.use_modifiers = TRUE; + + modList = MemNew (NumDefLineModifiers () * sizeof (ModifierItemLocalData)); + for (i = 0; i < NumDefLineModifiers(); i++) { + modList[i].any_present = FALSE; + modList[i].all_present = FALSE; + modList[i].is_unique = FALSE; + modList[i].first_value_seen = NULL; + modList[i].values_seen = NULL; + modList[i].all_unique = FALSE; + modList[i].status = NULL; + modList[i].required = FALSE; + } + SetRequiredModifiers (modList); + + /* add modifiers specified in action */ + source_qual_choice.next = NULL; + source_qual_choice.choice = SourceQualChoice_textqual; + field_type.next = NULL; + field_type.choice = FieldType_source_qual; + field_type.data.ptrvalue = &source_qual_choice; + + for (vnp = action->modifiers; vnp != NULL; vnp = vnp->next) { + source_qual_choice.data.intvalue = vnp->data.intvalue; + defline_pos = GetDeflinePosForFieldType (&field_type); + if (defline_pos > -1) { + modList[defline_pos].required = TRUE; + modList[defline_pos].any_present = TRUE; + ValNodeAddInt (&modifier_indices, 0, defline_pos); + + } + } + + InitFeatureRequests (&dfrl); + dfrl.feature_list_type = DefLineTypeFromAutodefListType (action->clause_list_type); + + AutoDefForSeqEntry (sep, SeqMgrGetEntityIDForSeqEntry (sep), &od, modList, modifier_indices, &dfrl, + DEFAULT_ORGANELLE_CLAUSE, FALSE, FALSE); + + modList = MemFree (modList); + modifier_indices = ValNodeFree (modifier_indices); + +} + + +NLM_EXTERN Boolean IsFixPubCapsActionEmpty (FixPubCapsActionPtr action) +{ + if (action == NULL) { + return TRUE; + } + if (action->affiliation || action->authors || action->title || action->affil_country) { + return FALSE; + } else { + return TRUE; + } +} + + +typedef struct fixpubcaps { + FixPubCapsActionPtr action; + ValNodePtr orgnames; + Int4 num_pub_fields; + Int4 num_sub_fields; + ValNodePtr object_list; +} FixPubCapsData, PNTR FixPubCapsPtr; + + +static Boolean IsPubASub (ValNodePtr pub) +{ + if (pub == NULL) { + return FALSE; + } else if (pub->choice == PUB_Sub) { + return TRUE; + } else if (pub->choice == PUB_Equiv) { + return IsPubASub(pub->data.ptrvalue); + } else { + return FALSE; + } +} + + +static void ApplyFixPubCapsCallback (PubdescPtr pdp, Pointer data) +{ + FixPubCapsPtr f; + CharPtr orig, tmp; + ValNodePtr pub; + AuthListPtr alp = NULL; + ValNodePtr names; + AuthorPtr ap, ap_orig; + AffilPtr affil_orig; + + f = (FixPubCapsPtr)data; + if (f == NULL || f->action == NULL) { + return; + } + + if (f->action->title) { + for (pub = pdp->pub; pub != NULL; pub = pub->next) { + orig = GetPubFieldFromPub (pub, Publication_field_title, NULL); + if (orig != NULL) { + tmp = StringSave (orig); + if (!f->action->punct_only) { + FixCapitalizationInTitle (&tmp, TRUE, f->orgnames); + } + if (StringCmp (orig, tmp) != 0) { + SetPubFieldOnPub (pub, Publication_field_title, NULL, tmp, ExistingTextOption_replace_old); + if (IsPubASub(pub)) { + f->num_sub_fields++; + } else { + f->num_pub_fields++; + } + } + tmp = MemFree (tmp); + orig = MemFree (orig); + } + } + } + + if (f->action->authors && !f->action->punct_only) { + alp = GetAuthListPtr (pdp, NULL); + if (alp != NULL) { + for (names = alp->names; names != NULL; names = names->next) { + ap = names->data.ptrvalue; + ap_orig = AsnIoMemCopy (ap, (AsnReadFunc) AuthorAsnRead, (AsnWriteFunc) AuthorAsnWrite); + FixCapitalizationInAuthor (ap); + if (!AsnIoMemComp (ap, ap_orig, (AsnWriteFunc) AuthorAsnWrite)) { + if (IsPubASub(pdp->pub)) { + f->num_sub_fields++; + } else { + f->num_pub_fields++; + } + } + ap_orig = AuthorFree (ap_orig); + } + } + } + + if (f->action->affiliation) { + if (alp == NULL) { + alp = GetAuthListPtr (pdp, NULL); + } + if (alp != NULL && alp->affil != NULL) { + affil_orig = AsnIoMemCopy (alp->affil, (AsnReadFunc) AffilAsnRead, (AsnWriteFunc) AffilAsnWrite); + FixCapsInPubAffilEx (alp->affil, f->action->punct_only); + if (!AsnIoMemComp (alp->affil, affil_orig, (AsnWriteFunc) AffilAsnWrite)) { + if (IsPubASub(pdp->pub)) { + f->num_sub_fields++; + } else { + f->num_pub_fields++; + } + } + affil_orig = AffilFree (affil_orig); + } + } else if (f->action->affil_country) { + if (alp == NULL) { + alp = GetAuthListPtr (pdp, NULL); + } + if (alp != NULL && alp->affil != NULL && !StringHasNoText (alp->affil->country)) { + orig = StringSave (alp->affil->country); + FixCapitalizationInCountryStringEx (&(alp->affil->country), f->action->punct_only); + if (StringCmp (orig, alp->affil->country) != 0) { + if (IsPubASub(pdp->pub)) { + f->num_sub_fields++; + } else { + f->num_pub_fields++; + } + } + if (StringCmp (alp->affil->country, "USA") == 0 && !StringHasNoText (alp->affil->sub) && !f->action->punct_only) { + orig = StringSave (alp->affil->sub); + FixStateAbbreviationsInAffil (alp->affil, NULL); + if (StringCmp (orig, alp->affil->sub) != 0) { + if (IsPubASub(pdp->pub)) { + f->num_sub_fields++; + } else { + f->num_pub_fields++; + } + } + orig = MemFree (orig); + } + orig = MemFree (orig); + } + } +} + + +static void CollectPubObjectsFeatCallback (SeqFeatPtr sfp, Pointer data) +{ + FixPubCapsPtr f; + + if (sfp == NULL || sfp->data.choice != SEQFEAT_PUB || (f = (FixPubCapsPtr) data) == NULL) { + return; + } + + if (DoesObjectMatchConstraintChoiceSet (OBJ_SEQFEAT, sfp, f->action->constraint)) { + ValNodeAddPointer (&(f->object_list), OBJ_SEQFEAT, sfp); + } +} + + +static void CollectPubObjectsDescCallback (SeqDescPtr sdp, Pointer data) +{ + FixPubCapsPtr f; + + if (sdp == NULL || sdp->choice != Seq_descr_pub || (f = (FixPubCapsPtr) data) == NULL) { + return; + } + + if (DoesObjectMatchConstraintChoiceSet (OBJ_SEQDESC, sdp, f->action->constraint)) { + ValNodeAddPointer (&(f->object_list), OBJ_SEQDESC, sdp); + } +} + + +static Boolean ApplyFixPubCapsToSeqEntry (FixPubCapsActionPtr action, SeqEntryPtr sep, FILE *log_fp) +{ + FixPubCapsData f; + ValNodePtr vnp; + PubdescPtr pdp; + SeqFeatPtr sfp; + SeqDescPtr sdp; + CharPtr summ; + Boolean rval = FALSE; + + if (action == NULL || sep == NULL) return FALSE; + + MemSet (&f, 0, sizeof (FixPubCapsData)); + f.action = action; + + /* collect pub objects that match constraint */ + VisitDescriptorsInSep (sep, &f, CollectPubObjectsDescCallback); + VisitFeaturesInSep (sep, &f, CollectPubObjectsFeatCallback); + + if (f.object_list == NULL) { + /* nothing to change */ + return FALSE; + } + + if (action->title) { + /* get org names to use in fixes */ + VisitBioSourcesInSep (sep, &f.orgnames, GetOrgNamesInRecordCallback); + } + + for (vnp = f.object_list; vnp != NULL; vnp = vnp->next) { + pdp = NULL; + if (vnp->choice == OBJ_SEQFEAT) { + sfp = vnp->data.ptrvalue; + pdp = sfp->data.value.ptrvalue; + } else if (vnp->choice == OBJ_SEQDESC) { + sdp = vnp->data.ptrvalue; + pdp = sdp->data.ptrvalue; + } + ApplyFixPubCapsCallback (pdp, &f); + } + + f.orgnames = ValNodeFree (f.orgnames); + + if (f.num_sub_fields > 0 || f.num_pub_fields > 0) { + rval = TRUE; + if (log_fp != NULL) { + summ = SummarizeFixPubCapsAction (action); + if (f.num_sub_fields > 0) { + fprintf (log_fp, "Fixed capitalization in %d publication fields in submitter blocks during %s\n", f.num_sub_fields, summ); + } + if (f.num_pub_fields > 0) { + fprintf (log_fp, "Fixed capitalization in %d publication fields in publication blocks during %s\n", f.num_pub_fields, summ); + } + summ = MemFree (summ); + } + } + + return rval; +} + + +NLM_EXTERN Boolean IsFieldSortable (FieldTypePtr field) +{ + Boolean rval = FALSE; + FeatureFieldPtr ffield; + + if (field == NULL) { + return FALSE; + } + if (field->choice == FieldType_feature_field) { + ffield = field->data.ptrvalue; + if (ffield != NULL) { + if ((ffield->type == Macro_feature_type_cds || ffield->type == Macro_feature_type_prot) + && ffield->field->choice == FeatQualChoice_legal_qual + && ffield->field->data.intvalue == Feat_qual_legal_product) { + rval = TRUE; + } + } + } else if (field->choice == FieldType_cds_gene_prot) { + if (field->data.intvalue == CDSGeneProt_field_prot_name) { + rval = TRUE; + } + } + return rval; +} + + +static Int4 SortFieldsInSeqEntry (SortFieldsActionPtr action, SeqEntryPtr sep) +{ + ValNodePtr object_list = NULL, vnp; + Int4 num = 0; + + if (action == NULL || action->field == NULL || !IsFieldSortable(action->field) || sep == NULL) { + return 0; + } + + object_list = GetObjectListForFieldType (action->field->choice, sep); + for (vnp = object_list; vnp != NULL; vnp = vnp->next) { + if (DoesObjectMatchConstraintChoiceSet (vnp->choice, vnp->data.ptrvalue, action->constraint) + && IsObjectAppropriateForFieldValue(vnp->choice, vnp->data.ptrvalue, action->field)) { + if (SortFieldsForObject (vnp->choice, vnp->data.ptrvalue, action->field, action->order)) { + num++; + } + } + } + + + return num; +} + + +typedef struct dupfeats { + ValNodePtr delete_list; + RemoveDuplicateFeatureActionPtr action; +} DupFeatsData, PNTR DupFeatsPtr; + + +static void FindDuplicateFeatsCallback (BioseqPtr bsp, Pointer data) +{ + DupFeatsPtr dfp; + SeqFeatPtr sfp1, sfp2; + SeqMgrFeatContext fcontext; + Uint1 featdef; + ValNodePtr vnp_prev = NULL; + + if (bsp == NULL || (dfp = (DupFeatsPtr) data) == NULL) { + return; + } + + if (dfp->action->type == Macro_feature_type_any) { + featdef = 0; + } else { + featdef = GetFeatdefFromFeatureType (dfp->action->type); + } + sfp1 = SeqMgrGetNextFeature (bsp, NULL, 0, featdef, &fcontext); + while (sfp1 != NULL) { + sfp2 = SeqMgrGetNextFeature (bsp, sfp1, 0, featdef, &fcontext); + if (sfp1 == sfp2) { + break; + } + if (DoFeaturesMatch (sfp1, sfp2, FALSE, dfp->action->case_sensitive, dfp->action->ignore_partials)) { + if (DoesObjectMatchConstraintChoiceSet (OBJ_SEQFEAT, sfp2, dfp->action->rd_constraint)) { + vnp_prev = ValNodeAddPointer (&(dfp->delete_list), OBJ_SEQFEAT, sfp2); + } else if ((vnp_prev == NULL || vnp_prev->data.ptrvalue != sfp1) + && DoesObjectMatchConstraintChoiceSet (OBJ_SEQFEAT, sfp1, dfp->action->rd_constraint)) { + ValNodeAddPointer (&(dfp->delete_list), OBJ_SEQFEAT, sfp1); + } + } + sfp1 = sfp2; + } + +} + + +NLM_EXTERN ValNodePtr GetDuplicateFeaturesForRemoval (SeqEntryPtr sep, RemoveDuplicateFeatureActionPtr action) +{ + DupFeatsData df; + + MemSet (&df, 0, sizeof (DupFeatsData)); + df.action = action; + + VisitBioseqsInSep (sep, &df, FindDuplicateFeatsCallback); + return df.delete_list; +} + + +NLM_EXTERN void RemoveDuplicateFeaturesInList (ValNodePtr delete_list, Uint2 entityID, Boolean remove_proteins) +{ + ValNodePtr vnp; + SeqFeatPtr sfp; + BioseqPtr protbsp; + SeqEntryPtr sep; + + for (vnp = delete_list; vnp != NULL; vnp = vnp->next) { + sfp = (SeqFeatPtr) vnp->data.ptrvalue; + if (sfp != NULL) { + if (remove_proteins && sfp->data.choice == SEQFEAT_CDREGION && sfp->product != NULL) { + protbsp = BioseqFindFromSeqLoc (sfp->product); + if (protbsp != NULL) { + protbsp->idx.deleteme = TRUE; + } + } + sfp->idx.deleteme = TRUE; + } + } + + DeleteMarkedObjects (entityID, 0, NULL); + if (remove_proteins) { + sep = GetTopSeqEntryForEntityID (entityID); + RenormalizeNucProtSets (sep, TRUE); + } + +} + + +NLM_EXTERN Boolean RemoveDuplicateFeaturesInSeqEntry (SeqEntryPtr sep, RemoveDuplicateFeatureActionPtr action, FILE *log_fp) +{ + ValNodePtr delete_list; + Int4 num; + + delete_list = GetDuplicateFeaturesForRemoval (sep, action); + if (delete_list == NULL) { + return FALSE; + } + + if (log_fp != NULL) { + num = ValNodeLen (delete_list); + fprintf (log_fp, "Removed %d duplicate features\n", num); + } + + RemoveDuplicateFeaturesInList (delete_list, ObjMgrGetEntityIDForChoice(sep), action->remove_proteins); + return TRUE; +} + + +NLM_EXTERN Boolean DoesTextContainOnlyTheseWords (CharPtr txt, ValNodePtr word_list) +{ + CharPtr cp; + ValNodePtr vnp; + Boolean match; + Boolean at_least_one = FALSE; + Int4 len; + + if (StringHasNoText(txt)) { + return FALSE; + } + + cp = txt; + while (isspace (*cp) || ispunct(*cp)) { + cp++; + } + match = TRUE; + while (*cp != 0 && match) { + match = FALSE; + for (vnp = word_list; vnp != NULL && !match; vnp = vnp->next) { + len = StringLen (vnp->data.ptrvalue); + if (StringNICmp (cp, vnp->data.ptrvalue, len) == 0 + && (*(cp + len) == 0 || isspace(*(cp + len)) || ispunct(*(cp + len)))) { + match = TRUE; + cp += len; + at_least_one = TRUE; + } + } + while (isspace (*cp) || ispunct(*cp)) { + cp++; + } + } + return (match && at_least_one); +} + + +static ValNodePtr WordListFromText (CharPtr txt) +{ + ValNodePtr list = NULL; + CharPtr start, end, word; + Int4 len; + + if (StringHasNoText(txt)) { + return NULL; + } + + start = txt; + + while (isspace (*start) || ispunct(*start)) { + start++; + } + while (*start != 0) { + end = start + 1; + len = 1; + while (*end != 0 && !isspace (*end) && !ispunct(*end)) { + end++; + len++; + } + word = (CharPtr) MemNew (sizeof (Char) * (len + 1)); + StringNCpy (word, start, len); + word[len] = 0; + ValNodeAddPointer (&list, 0, word); + start = end; + while (isspace (*start) || ispunct(*start)) { + start++; + } + } + + return list; +} + + +static CharPtr s_SpecialLineageWords[] = { + "Domain", + "Phylum", + "Kingdom", + "Family", + "Class", + "Superfamily", + "Order", + "Genus", + "Species", + "Organism", + "Note", + "Taxonomic classification", + "Lineage", + "Tax class/lineage", + NULL +}; + +static Boolean RemoveLineageNoteFromBioSource (BioSourcePtr biop, FILE *fp) +{ + SubSourcePtr ssp, ssp_prev = NULL, ssp_next; + OrgModPtr mod, mod_prev = NULL, mod_next; + Boolean any_removed = FALSE; + ValNodePtr word_list = NULL; + Int4 i; + + if (!HasTaxonomyID (biop) || biop->org == NULL + || biop->org->orgname == NULL + || StringHasNoText (biop->org->orgname->lineage)) { + return FALSE; + } + + word_list = WordListFromText(biop->org->orgname->lineage); + ValNodeLink (&word_list, WordListFromText(biop->org->taxname)); + for (i = 0; s_SpecialLineageWords[i] != NULL; i++) { + ValNodeAddPointer (&word_list, 0, StringSave (s_SpecialLineageWords[i])); + } + + for (ssp = biop->subtype; ssp != NULL; ssp = ssp_next) { + ssp_next = ssp->next; + if (ssp->subtype == SUBSRC_other && DoesTextContainOnlyTheseWords(ssp->name, word_list)) { + if (ssp_prev == NULL) { + biop->subtype = ssp_next; + } else { + ssp_prev->next = ssp_next; + } + ssp->next = NULL; + if (fp != NULL) { + fprintf (fp, "Removed note %s where lineage is %s\n", ssp->name, biop->org->orgname->lineage); + } + ssp = SubSourceFree (ssp); + any_removed = TRUE; + } else { + ssp_prev = ssp; + } + } + + for (mod = biop->org->orgname->mod; mod != NULL; mod = mod_next) { + mod_next = mod->next; + if (mod->subtype == ORGMOD_other && DoesTextContainOnlyTheseWords(mod->subname, word_list)) { + if (mod_prev == NULL) { + biop->org->orgname->mod = mod_next; + } else { + mod_prev->next = mod_next; + } + mod->next = NULL; + if (fp != NULL) { + fprintf (fp, "Removed note %s where lineage is %s\n", mod->subname, biop->org->orgname->lineage); + } + mod = OrgModFree (mod); + any_removed = TRUE; + } else { + mod_prev = mod; + } + } + word_list = ValNodeFreeData (word_list); + return any_removed; +} + + +static void RemoveLineageNotesCallback (BioSourcePtr biop, Pointer data) +{ + LogInfoPtr lip; + + if (biop == NULL) { + return; + } + lip = (LogInfoPtr) data; + + if (RemoveLineageNoteFromBioSource(biop, lip == NULL ? NULL : lip->fp)) { + if (lip) { + lip->data_in_log = TRUE; + } + } +} + + +static Boolean RemoveLineageNotesInSeqEntry (SeqEntryPtr sep, FILE *log_fp) +{ + LogInfoData lid; + + MemSet (&lid, 0, sizeof (LogInfoData)); + lid.fp = log_fp; + + VisitBioSourcesInSep (sep, &lid, RemoveLineageNotesCallback); + return lid.data_in_log; +} + + +typedef struct logandpointer { + LogInfoData lid; + Pointer action; +} LogAndPointerData, PNTR LogAndPointerPtr; + + +static Boolean GeneXrefMatchesSuppression (GeneRefPtr grp, Uint2 suppression) +{ + Boolean rval = FALSE; + + if (grp == NULL) { + return FALSE; + } + + switch (suppression) { + case Gene_xref_suppression_type_any: + rval = TRUE; + break; + case Gene_xref_suppression_type_suppressing: + if (SeqMgrGeneIsSuppressed(grp)) { + rval = TRUE; + } + break; + case Gene_xref_suppression_type_non_suppressing: + if (!SeqMgrGeneIsSuppressed(grp)) { + rval = TRUE; + } + break; + } + return rval; +} + + +static Boolean GeneXrefMatchesNecessary (SeqFeatPtr sfp, GeneRefPtr grp, Uint2 necessary) +{ + Boolean rval = FALSE; + + if (sfp == NULL || grp == NULL) { + return FALSE; + } + + switch (necessary) { + case Gene_xref_necessary_type_any: + rval = TRUE; + break; + case Gene_xref_necessary_type_necessary: + if (!SeqMgrGeneIsSuppressed (grp) && !IsGeneXrefRedundant (sfp)) { + rval = TRUE; + } + break; + case Gene_xref_necessary_type_unnecessary: + if (!SeqMgrGeneIsSuppressed (grp) && IsGeneXrefRedundant (sfp)) { + rval = TRUE; + } + break; + } + return rval; +} + + +static Boolean RemoveXref (SeqFeatPtr sfp, Uint2 choice, Pointer data) +{ + SeqFeatXrefPtr xref, xref_next, xref_prev = NULL; + Boolean removed = FALSE; + + if (sfp == NULL) return FALSE; + for (xref = sfp->xref; xref != NULL; xref = xref_next) { + xref_next = xref->next; + if ((xref->data.choice == choice || choice == 0) + && (xref->data.value.ptrvalue == data || data == NULL)) { + if (xref_prev == NULL) { + sfp->xref = xref_next; + } else { + xref_prev->next = xref_next; + } + xref->next = NULL; + xref = SeqFeatXrefFree (xref); + removed = TRUE; + } else { + xref_prev = xref; + } + } + return removed; +} + + +static void MacroRemoveXrefsCallback(SeqFeatPtr sfp, Pointer data) +{ + LogAndPointerPtr lp; + RemoveXrefsActionPtr action; + GeneXrefTypePtr gene; + GeneRefPtr grp; + CharPtr text; + ValNode vn; + + if (sfp == NULL || (lp = (LogAndPointerPtr)data) == NULL + || (action = (RemoveXrefsActionPtr)lp->action) == NULL + || action->xref_type == NULL) { + return; + } + if (!DoesObjectMatchConstraintChoiceSet (OBJ_SEQFEAT, sfp, action->constraint)) { + return; + } + + switch (action->xref_type->choice) { + case XrefType_gene: + grp = SeqMgrGetGeneXref (sfp); + if (grp != NULL) { + gene = (GeneXrefTypePtr) action->xref_type->data.ptrvalue; + if (gene != NULL) { + if ((gene->feature == Macro_feature_type_any || gene->feature == GetFeatureTypeFromFeatdef(sfp->idx.subtype)) + && GeneXrefMatchesSuppression(grp, gene->suppression) + && GeneXrefMatchesNecessary(sfp, grp, gene->necessary)) { + if (RemoveXref(sfp, SEQFEAT_GENE, grp)) { + lp->lid.data_in_log = TRUE; + if (lp->lid.fp != NULL) { + MemSet (&vn, 0, sizeof (ValNode)); + vn.choice = OBJ_SEQFEAT; + vn.data.ptrvalue = sfp; + text = GetDiscrepancyItemText (&vn); + fprintf (lp->lid.fp, "Removed Gene xref from %s\n", text); + text = MemFree (text); + } + } + } + } + } + break; + } +} + + +static Boolean MacroRemoveXrefs (SeqEntryPtr sep, RemoveXrefsActionPtr action, FILE *log_fp) +{ + LogAndPointerData ld; + + MemSet (&ld.lid, 0, sizeof (LogAndPointerData)); + ld.lid.fp = log_fp; + ld.action = action; + + VisitFeaturesInSep (sep, &ld, MacroRemoveXrefsCallback); + return ld.lid.data_in_log; +} + +static void MacroMakeGeneXrefsCallback(SeqFeatPtr sfp, Pointer data) +{ + LogAndPointerPtr lp; + MakeGeneXrefActionPtr action; + SeqFeatPtr gene; + GeneRefPtr grp; + CharPtr text; + ValNode vn; + SeqMgrFeatContext context; + SeqFeatXrefPtr xref; + + if (sfp == NULL || sfp->data.choice == SEQFEAT_GENE || (lp = (LogAndPointerPtr)data) == NULL + || (action = (MakeGeneXrefActionPtr) lp->action) == NULL) { + return; + } + + if (action->feature != Macro_feature_type_any && action->feature != GetFeatureTypeFromFeatdef(sfp->idx.subtype)) { + return; + } + + if (!DoesObjectMatchConstraintChoiceSet (OBJ_SEQFEAT, sfp, action->constraint)) { + return; + } + + grp = SeqMgrGetGeneXref (sfp); + + if (grp != NULL) { + return; + } + + gene = SeqMgrGetOverlappingGene (sfp->location, &context); + if (gene != NULL && (grp = (GeneRefPtr) gene->data.value.ptrvalue) != NULL) { + grp = (GeneRefPtr) AsnIoMemCopy (grp, (AsnReadFunc)GeneRefAsnRead, (AsnWriteFunc)GeneRefAsnWrite); + xref = SeqFeatXrefNew (); + xref->data.choice = SEQFEAT_GENE; + xref->data.value.ptrvalue = grp; + xref->next = sfp->xref; + sfp->xref = xref; + lp->lid.data_in_log = TRUE; + if (lp->lid.fp != NULL) { + MemSet (&vn, 0, sizeof (ValNode)); + vn.choice = OBJ_SEQFEAT; + vn.data.ptrvalue = sfp; + text = GetDiscrepancyItemText (&vn); + fprintf (lp->lid.fp, "Added Gene xref to %s\n", text); + text = MemFree (text); + } + } +} + + +static Boolean MacroMakeGeneXrefs (SeqEntryPtr sep, MakeGeneXrefActionPtr action, FILE *log_fp) +{ + LogAndPointerData ld; + + MemSet (&ld.lid, 0, sizeof (LogAndPointerData)); + ld.lid.fp = log_fp; + ld.action = action; + + VisitFeaturesInSep (sep, &ld, MacroMakeGeneXrefsCallback); + return ld.lid.data_in_log; +} + + +static Boolean MacroMakeBoldXrefs (SeqEntryPtr sep, FILE *log_fp) +{ + Int4 num_created = 0; + + VisitBioseqsInSep (sep, &num_created, ApplyBarcodeDbxrefsToBioseq); + + if (num_created > 0) { + if (log_fp != NULL) { + fprintf (log_fp, "Created %d BARCODE dbxrefs\n", num_created); + } + return TRUE; + } else { + return FALSE; + } +} + + +NLM_EXTERN Boolean StripSuffixFromAuthor (AuthorPtr pAuthor) +{ + NameStdPtr pNameStandard; + Boolean rval = FALSE; + + if (pAuthor == NULL) + return FALSE; + else if(pAuthor->name->choice != 2) + return FALSE; + pNameStandard = pAuthor->name->data; + if (pNameStandard != NULL && pNameStandard->names[5] != NULL) + { + pNameStandard->names[5][0] = 0; + rval = TRUE; + } + return rval; +} + +NLM_EXTERN Boolean TruncateAuthorMiddleInitials (AuthorPtr pAuthor) +{ + NameStdPtr pNameStandard; + CharPtr cp; + Boolean rval = FALSE; + + if (pAuthor == NULL) + return FALSE; + else if(pAuthor->name->choice != 2) + return FALSE; + pNameStandard = pAuthor->name->data; + if (pNameStandard != NULL) + { + cp = StringChr (pNameStandard->names[4], '.'); + if (cp == NULL || StringChr (cp + 1, '.') == NULL) { + if (StringLen (pNameStandard->names[4]) > 3) + { + pNameStandard->names[4][3] = 0; + pNameStandard->names[4][2] = '.'; + rval = TRUE; + } + } else if (StringLen (pNameStandard->names[4]) > 4) { + pNameStandard->names[4][4] = 0; + pNameStandard->names[4][3] = '.'; + rval = TRUE; + } + } + return rval; +} + + +static Boolean MoveAuthorMiddleToFirst (AuthorPtr pAuthor) +{ + NameStdPtr pNameStandard; + CharPtr cp; + Int4 num_letters = 0; + Boolean rval = FALSE; + + if (pAuthor == NULL) + return FALSE; + else if(pAuthor->name->choice != 2) + return FALSE; + pNameStandard = pAuthor->name->data; + if (pNameStandard != NULL) + { + cp = StringChr (pNameStandard->names[4], '.'); + if (cp != NULL) { + cp++; + while (isalpha(*(cp + num_letters))) { + num_letters++; + } + if (num_letters > 1) { + SetStringValue (&(pNameStandard->names[1]), cp, ExistingTextOption_append_space); + *cp = 0; + rval = TRUE; + } + } + } + return rval; +} + + +const CharPtr s_AuthorFixActionNames[] = { + "Truncate middle initials", + "Strip author suffix", + "Move middle name to first name" +}; + + +NLM_EXTERN CharPtr SummarizeAuthorFixAction (AuthorFixActionPtr a) +{ + CharPtr rval = NULL; + CharPtr constraint; + + if (a == NULL) { + return StringSave("Unknown action"); + } + + if (a->fix_type < 1 || a->fix_type > sizeof (s_AuthorFixActionNames) / sizeof (CharPtr)) { + return StringSave("Unknown action"); + } + + constraint = SummarizeConstraintSet (a->constraint); + if (constraint == NULL) { + rval = StringSave (s_AuthorFixActionNames[a->fix_type - 1]); + } else { + rval = (CharPtr) MemNew (sizeof (Char) * (StringLen (s_AuthorFixActionNames[a->fix_type - 1]) + StringLen (constraint) + 2)); + StringCpy (rval, s_AuthorFixActionNames[a->fix_type - 1]); + StringCat (rval, " "); + StringCat (rval, constraint); + constraint = MemFree (constraint); + } + return rval; +} + + +typedef struct pubcollect { + ValNodePtr list; + ValNodePtr constraint; +} PubCollectData, PNTR PubCollectPtr; + +static void GetPubsForAuthorFixDesc (SeqDescPtr sdp, Pointer data) +{ + PubCollectPtr p; + + if (sdp == NULL || sdp->choice != Seq_descr_pub || (p = (PubCollectPtr) data) == NULL) { + return; + } + + if (DoesObjectMatchConstraintChoiceSet (OBJ_SEQDESC, sdp, p->constraint)) { + ValNodeAddPointer (&(p->list), OBJ_SEQDESC, sdp); + } +} + +static void GetPubsForAuthorFixFeat (SeqFeatPtr sfp, Pointer data) +{ + PubCollectPtr p; + + if (sfp == NULL || sfp->data.choice != SEQFEAT_PUB || (p = (PubCollectPtr) data) == NULL) { + return; + } + + if (DoesObjectMatchConstraintChoiceSet (OBJ_SEQFEAT, sfp, p->constraint)) { + ValNodeAddPointer (&(p->list), OBJ_SEQFEAT, sfp); + } +} + +static Boolean ApplyAuthorFixToSeqEntry (SeqEntryPtr sep, AuthorFixActionPtr action, FILE *log_fp) +{ + PubCollectData p; + ValNodePtr vnp, pub; + PubdescPtr pdp; + SeqFeatPtr sfp; + SeqDescPtr sdp; + AuthListPtr alp; + ValNodePtr names; + AuthorPtr ap; + Int4 num_changed = 0; + + if (sep == NULL || action == NULL) { + return FALSE; + } + + MemSet (&p, 0, sizeof (PubCollectData)); + p.constraint = action->constraint; + VisitDescriptorsInSep (sep, &p, GetPubsForAuthorFixDesc); + VisitFeaturesInSep (sep, &p, GetPubsForAuthorFixFeat); + for (vnp = p.list; vnp != NULL; vnp = vnp->next) { + pdp = NULL; + if (vnp->choice == OBJ_SEQFEAT) { + sfp = (SeqFeatPtr) vnp->data.ptrvalue; + if (sfp != NULL && sfp->data.choice == SEQFEAT_PUB) { + pdp = sfp->data.value.ptrvalue; + } + } else if (vnp->choice == OBJ_SEQDESC) { + sdp = (SeqDescPtr) vnp->data.ptrvalue; + if (sdp != NULL && sdp->choice == Seq_descr_pub) { + pdp = sdp->data.ptrvalue; + } + } + if (pdp != NULL) { + for (pub = pdp->pub; pub != NULL; pub = pub->next) { + alp = GetAuthorListForPub (pub); + if (alp != NULL) { + for (names = alp->names; names != NULL; names = names->next) { + ap = names->data.ptrvalue; + switch (action->fix_type) { + case Author_fix_type_truncate_middle_initials: + if (TruncateAuthorMiddleInitials(ap)) { + num_changed++; + } + break; + case Author_fix_type_strip_suffix: + if (StripSuffixFromAuthor(ap)) { + num_changed++; + } + break; + case Author_fix_type_move_middle_to_first: + if (MoveAuthorMiddleToFirst (ap)) { + num_changed++; + } + break; + } + } + } + } + } + } + + p.list = ValNodeFree (p.list); + if (num_changed > 0) { + if (log_fp != NULL) { + fprintf (log_fp, "%s for %d names\n", s_AuthorFixActionNames[action->fix_type - 1], num_changed); + } + return TRUE; + } else { + return FALSE; + } +} + + +NLM_EXTERN Boolean ApplyMacroToSeqEntryEx (SeqEntryPtr sep, ValNodePtr macro, FILE *log_fp) +{ + Int4 num_AECR = 0, num_parse = 0, num; + Uint2 entityID; + Boolean needs_update = FALSE; + CharPtr summ; + Boolean any_change = FALSE; + Boolean created_protein_features = FALSE; + + entityID = SeqMgrGetEntityIDForSeqEntry(sep); + + while (macro != NULL) { + needs_update = TRUE; + switch (macro->choice) { + case MacroActionChoice_aecr: + num = ApplyAECRActionToSeqEntry ((AECRActionPtr) macro->data.ptrvalue, sep, &created_protein_features); + num_AECR += num; + if (num > 0) { + if (log_fp != NULL) { + summ = SummarizeAECRAction ((AECRActionPtr) macro->data.ptrvalue); + fprintf (log_fp, "Changed %d fields during %s\n", num, summ); + summ = MemFree (summ); + } + any_change = TRUE; + } + if (created_protein_features) { + if (log_fp != NULL) { + fprintf (log_fp, "Created protein features\n"); + } + any_change = TRUE; + } + break; + case MacroActionChoice_parse: + num = ApplyParseActionToSeqEntry ((ParseActionPtr) macro->data.ptrvalue, sep); + num_parse += num; + if (num > 0) { + if (log_fp != NULL) { + summ = SummarizeParseAction ((ParseActionPtr) macro->data.ptrvalue); + fprintf (log_fp, "Changed %d fields during %s\n", num, summ); + summ = MemFree (summ); + } + any_change = TRUE; + } + break; + case MacroActionChoice_add_feature: + num = ApplyApplyFeatureActionToSeqEntry ((ApplyFeatureActionPtr) macro->data.ptrvalue, sep); + if (num > 0) { + if (log_fp != NULL) { + fprintf (log_fp, "Added %d features\n", num); + } + any_change = TRUE; + } + SeqMgrIndexFeatures (entityID, NULL); + break; + case MacroActionChoice_remove_feature: + num = ApplyRemoveFeatureActionToSeqEntry ((RemoveFeatureActionPtr) macro->data.ptrvalue, sep); + if (num > 0) { + if (log_fp != NULL) { + fprintf (log_fp, "Removed %d features\n", num); + } + any_change = TRUE; + ObjMgrSetDirtyFlag (entityID, TRUE); + ObjMgrSendMsg (OM_MSG_UPDATE, entityID, 0, 0); + needs_update = FALSE; + } + break; + case MacroActionChoice_edit_location: + num = ApplyEditFeatureLocationActionToSeqEntry ((EditFeatureLocationActionPtr) macro->data.ptrvalue, sep, log_fp); + if (num > 0) { + any_change = TRUE; + } + break; + case MacroActionChoice_convert_feature: + num = ApplyConvertFeatureActionToSeqEntry ((ConvertFeatureActionPtr) macro->data.ptrvalue, sep, log_fp); + if (num > 0) { + any_change = TRUE; + } + ObjMgrSetDirtyFlag (entityID, TRUE); + ObjMgrSendMsg (OM_MSG_UPDATE, entityID, 0, 0); + needs_update = FALSE; + break; + case MacroActionChoice_remove_descriptor: + num = ApplyRemoveDescriptorActionToSeqEntry ((RemoveDescriptorActionPtr) macro->data.ptrvalue, sep); + if (num > 0) { + if (log_fp != NULL) { + summ = SummarizeRemoveDescriptorAction ((RemoveDescriptorActionPtr) macro->data.ptrvalue); + fprintf (log_fp, "Removed %d descriptors during %s\n", num, summ); + summ = MemFree (summ); + } + any_change = TRUE; + } + break; + case MacroActionChoice_autodef: + ApplyAutodefActionToSeqEntry ((AutodefActionPtr) macro->data.ptrvalue, sep); + if (log_fp != NULL) { + summ = SummarizeAutodefAction ((AutodefActionPtr) macro->data.ptrvalue); + if (summ != NULL) { + fprintf (log_fp, "Performed %s\n", summ); + } + summ = MemFree (summ); + } + any_change = TRUE; + break; + case MacroActionChoice_removesets: + if (RemoveDuplicateNestedSetsForEntityID (entityID)) { + if (log_fp != NULL) { + fprintf (log_fp, "Removed duplicate nested sets\n"); + } + any_change = TRUE; + } + break; + case MacroActionChoice_trim_junk_from_primer_seq: + any_change |= TrimPrimerSeqJunkInSeqEntry (sep, log_fp); + break; + case MacroActionChoice_fix_usa_and_states: + any_change |= FixUsaAndStateAbbreviations (entityID, log_fp); + break; + case MacroActionChoice_trim_stop_from_complete_cds: + if (TrimStopsFromCompleteCodingRegions(sep, log_fp)) { + ObjMgrSetDirtyFlag (entityID, TRUE); + ObjMgrSendMsg (OM_MSG_UPDATE, entityID, 0, 0); + needs_update = FALSE; + any_change = TRUE; + } + break; + case MacroActionChoice_synchronize_cds_partials: + if (ResynchCodingRegionPartialsEx(sep, log_fp)) { + ObjMgrSetDirtyFlag (entityID, TRUE); + ObjMgrSendMsg (OM_MSG_UPDATE, entityID, 0, 0); + needs_update = FALSE; + any_change = TRUE; + } + break; + case MacroActionChoice_adjust_for_consensus_splice: + if (AdjustSeqEntryForConsensusSpliceEx(sep, log_fp)) { + ObjMgrSetDirtyFlag (entityID, TRUE); + ObjMgrSendMsg (OM_MSG_UPDATE, entityID, 0, 0); + needs_update = FALSE; + any_change = TRUE; + } + break; + case MacroActionChoice_fix_pub_caps: + any_change |= ApplyFixPubCapsToSeqEntry (macro->data.ptrvalue, sep, log_fp); + break; + case MacroActionChoice_remove_seg_gaps: + num = RemoveSegGapsInSeqEntry (sep); + if (num > 0) { + if (log_fp != NULL) { + fprintf (log_fp, "Removed gaps in %d alignments\n", num); + } + any_change = TRUE; + } + break; + case MacroActionChoice_sort_fields: + num = SortFieldsInSeqEntry (macro->data.ptrvalue, sep); + if (num > 0) { + if (log_fp != NULL) { + summ = SummarizeSortFieldsAction (macro->data.ptrvalue); + fprintf (log_fp, "Changed order of fields for %d objects during %s\n", num, summ); + summ = MemFree (summ); + } + any_change = TRUE; + } + break; + case MacroActionChoice_apply_molinfo_block: + any_change |= ApplyMolinfoBlockToSeqEntryEx (sep, macro->data.ptrvalue, log_fp); + break; + case MacroActionChoice_fix_caps: + any_change |= ApplyFixCapsToSeqEntry (sep, macro->data.ptrvalue, log_fp); + break; + case MacroActionChoice_fix_format: + any_change |= ApplyFixFormatToSeqEntry (sep, macro->data.ptrvalue, log_fp); + break; + case MacroActionChoice_fix_spell: + any_change |= SpellFixSeqEntry (sep, macro->data.ptrvalue, log_fp); + break; + case MacroActionChoice_remove_duplicate_features: + any_change |= RemoveDuplicateFeaturesInSeqEntry (sep, macro->data.ptrvalue, log_fp); + ObjMgrSetDirtyFlag (entityID, TRUE); + ObjMgrSendMsg (OM_MSG_UPDATE, entityID, 0, 0); + needs_update = FALSE; + break; + case MacroActionChoice_remove_lineage_notes: + any_change |= RemoveLineageNotesInSeqEntry (sep, log_fp); + break; + case MacroActionChoice_remove_xrefs: + any_change |= MacroRemoveXrefs (sep, macro->data.ptrvalue, log_fp); + break; + case MacroActionChoice_make_gene_xrefs: + any_change |= MacroMakeGeneXrefs (sep, macro->data.ptrvalue, log_fp); + break; + case MacroActionChoice_make_bold_xrefs: + any_change |= MacroMakeBoldXrefs (sep, log_fp); + break; + case MacroActionChoice_fix_author: + any_change |= ApplyAuthorFixToSeqEntry (sep, macro->data.ptrvalue, log_fp); + break; + } + macro = macro->next; + } + + if (needs_update) { + ObjMgrSetDirtyFlag (entityID, TRUE); + ObjMgrSendMsg (OM_MSG_UPDATE, entityID, 0, 0); + } + return any_change; +} + + +NLM_EXTERN void ApplyMacroToSeqEntry (SeqEntryPtr sep, ValNodePtr macro) +{ + ApplyMacroToSeqEntryEx (sep, macro, NULL); +} + + +/* for generating text descriptions of macro objects */ +NLM_EXTERN CharPtr SummarizeSourceQual (ValNodePtr field) +{ + CharPtr summ = NULL, locname, origname; + Int4 genome, origin; + CharPtr loc_fmt = "location %s"; + CharPtr orig_fmt = "origin %s"; + + if (field == NULL) return NULL; + switch (field->choice) { + case SourceQualChoice_textqual: + summ = StringSave (GetSourceQualName (field->data.intvalue)); + break; + case SourceQualChoice_location: + genome = GenomeFromSrcLoc (field->data.intvalue); + locname = LocNameFromGenome (genome); + summ = (CharPtr) MemNew (sizeof (Char) * (StringLen (loc_fmt) + StringLen (locname))); + sprintf (summ, loc_fmt, locname); + break; + case SourceQualChoice_origin: + origin = OriginFromSrcOrig (field->data.intvalue); + origname = OriginNameFromOrigin (origin); + summ = (CharPtr) MemNew (sizeof (Char) * (StringLen (orig_fmt) + StringLen (origname))); + sprintf (summ, orig_fmt, origname); + break; + } + return summ; +} + + +NLM_EXTERN CharPtr FeatureFieldLabel (CharPtr feature_name, ValNodePtr field) +{ + CharPtr cp; + CharPtr label = NULL; + CharPtr legal_fmt = "%s %s"; + CharPtr illegal_fmt = "constrained field on %s"; + + if (feature_name == NULL) { + feature_name = "Unknown feature"; + } + + if (field == NULL) { + return StringSave ("missing field"); + } else if (field->choice == FeatQualChoice_legal_qual) { + cp = GetFeatQualName (field->data.intvalue); + if (cp == NULL) cp = "Unknown field type"; + label = (CharPtr) MemNew (sizeof (Char) * (StringLen (legal_fmt) + StringLen (feature_name) + StringLen (cp))); + sprintf (label, legal_fmt, feature_name, cp); + } else if (field->choice == FeatQualChoice_illegal_qual) { + label = (CharPtr) MemNew (sizeof (Char) * (StringLen (illegal_fmt) + StringLen (feature_name))); + sprintf (label, illegal_fmt, feature_name); + } else { + label = StringSave ("illegal field value"); + } + return label; +} + + +NLM_EXTERN CharPtr SummarizeFieldType (ValNodePtr vnp) +{ + FeatureFieldPtr ffp; + CharPtr str = NULL; + CharPtr label = NULL; + CharPtr pub_fmt = "publication %s"; + + if (vnp == NULL) { + str = StringSave ("missing field"); + } else { + switch (vnp->choice) { + case FieldType_source_qual: + str = SummarizeSourceQual (vnp->data.ptrvalue); + break; + case FieldType_feature_field: + ffp = (FeatureFieldPtr) vnp->data.ptrvalue; + if (ffp == NULL || ffp->field == NULL) { + str = StringSave ("missing field"); + } else { + label = GetFeatureNameFromFeatureType (ffp->type); + str = FeatureFieldLabel (label, ffp->field); + } + break; + case FieldType_cds_gene_prot: + str = StringSaveNoNull (CDSGeneProtNameFromField (vnp->data.intvalue)); + if (str == NULL) { + str = StringSave ("Invalid CDS-Gene-Prot Field"); + } + break; + case FieldType_molinfo_field: + str = GetSequenceQualName (vnp->data.ptrvalue); + if (str == NULL) { + str = StringSave ("Invalid Sequence Qual Field"); + } + break; + case FieldType_pub: + switch (vnp->data.intvalue) { + case Publication_field_cit: + str = StringSave ("publication citation"); + break; + case Publication_field_authors: + str = StringSave ("publication authors"); + break; + case Publication_field_journal: + str = StringSave ("publication journal"); + break; + case Publication_field_volume: + str = StringSave ("publication volume"); + break; + case Publication_field_issue: + str = StringSave ("publication issue"); + break; + case Publication_field_pages: + str = StringSave ("publication pages"); + break; + case Publication_field_date: + str = StringSave ("publication date"); + break; + case Publication_field_serial_number: + str = StringSave ("publication serial number"); + break; + case Publication_field_title: + str = StringSave ("publication title"); + break; + default: + label = GetPubFieldLabel (vnp->data.intvalue); + if (label == NULL) { + str = StringSave ("Invalid field type"); + } else { + str = MemNew (sizeof (Char) * (StringLen (pub_fmt) + StringLen (label))); + sprintf (str, pub_fmt, label); + } + break; + } + break; + case FieldType_rna_field: + str = SummarizeRnaQual (vnp->data.ptrvalue); + break; + case FieldType_struc_comment_field: + str = SummarizeStructuredCommentField (vnp->data.ptrvalue); + break; + case FieldType_dblink: + str = StringSave (GetDBLinkNameFromDBLinkFieldType (vnp->data.intvalue)); + break; + case FieldType_misc: + if (vnp->data.intvalue == Misc_field_genome_project_id) { + str = StringSave ("Genome Project ID"); + } else if (vnp->data.intvalue == Misc_field_comment_descriptor) { + str = StringSave ("Comment Descriptor"); + } else if (vnp->data.intvalue == Misc_field_defline) { + str = StringSave ("Definition Line"); + } else if (vnp->data.intvalue == Misc_field_keyword) { + str = StringSave ("Keyword"); + } else { + str = StringSave ("Invalid field type"); + } + break; + default: + str = StringSave ("Invalid field type"); + break; + } + } + return str; +} + + +NLM_EXTERN FieldTypePtr FieldTypeFromString (CharPtr str) +{ + Int4 qual_type, feat_type = -1; + FieldTypePtr ft = NULL; + FeatureFieldPtr ffp; + ValNodePtr vnp; + CharPtr cpy, cp; + RnaQualPtr rq; + + if (StringHasNoText (str)) { + return NULL; + } + + /* check source quals first */ + qual_type = GetSourceQualTypeByName (str); + if (qual_type > -1) { + vnp = ValNodeNew (NULL); + vnp->choice = SourceQualChoice_textqual; + vnp->data.intvalue = qual_type; + ft = ValNodeNew (NULL); + ft->choice = FieldType_source_qual; + ft->data.ptrvalue = vnp; + } else { + /* try feature fields */ + cpy = StringSave (str); + cp = StringChr (cpy, ' '); + while (cp != NULL && feat_type == -1) { + *cp = 0; + feat_type = GetFeatureTypeByName (cpy); + if (feat_type < 0) { + *cp = ' '; + cp = StringChr (cp + 1, ' '); + } + } + if (feat_type > -1) { + qual_type = GetFeatQualByName (cp + 1); + if (qual_type > -1) { + ffp = FeatureFieldNew (); + ffp->type = feat_type; + ValNodeAddInt (&ffp->field, FeatQualChoice_legal_qual, qual_type); + ft = ValNodeNew (NULL); + ft->choice = FieldType_feature_field; + ft->data.ptrvalue = ffp; + } + } + cpy = MemFree (cpy); + if (ft == NULL) { + /* try CDS-gene-prot */ + qual_type = CDSGeneProtFieldFromName (str); + if (qual_type > -1) { + ft = ValNodeNew (NULL); + ft->choice = FieldType_cds_gene_prot; + ft->data.intvalue = qual_type; + } + } + if (ft == NULL) { + /* try RNA Quals */ + cpy = StringSave (str); + cp = StringChr (cpy, ' '); + if (cp != NULL) { + *cp = 0; + feat_type = GetRnaTypeForName (cpy); + qual_type = GetRnaFieldForName (cp + 1); + if (feat_type > -1 && qual_type > -1) { + rq = RnaQualNew (); + rq->type = ValNodeNew (NULL); + rq->type->choice = feat_type; + rq->type->data.ptrvalue = NULL; + rq->field = qual_type; + ft = ValNodeNew (NULL); + ft->choice = FieldType_rna_field; + ft->data.ptrvalue = rq; + } + } + cpy = MemFree (cpy); + } + } + return ft; +} + + +NLM_EXTERN Boolean IsFieldTypeNonText (ValNodePtr field_type) +{ + ValNodePtr vnp; + FeatureFieldPtr ffp; + Boolean rval = FALSE; + + if (field_type == NULL) { + return FALSE; + } + switch (field_type->choice) { + case FieldType_source_qual : + vnp = (ValNodePtr) field_type->data.ptrvalue; + if (vnp != NULL) { + if (vnp->choice == SourceQualChoice_location || vnp->choice == SourceQualChoice_origin) { + rval = TRUE; + } else if (vnp->choice == SourceQualChoice_textqual) { + if (IsNonTextSourceQual (vnp->data.intvalue)) { + rval = TRUE; + } + } + } + break; + case FieldType_feature_field : + ffp = (FeatureFieldPtr) field_type->data.ptrvalue; + if (ffp != NULL && ffp->field != NULL && ffp->field->choice == FeatQualChoice_legal_qual + && ffp->field->data.intvalue == Feat_qual_legal_pseudo) { + rval = TRUE; + } + break; + case FieldType_molinfo_field : + rval = TRUE; + break; + } + return rval; +} + + +NLM_EXTERN CharPtr SummarizeExistingText (Uint2 existing_text) +{ + CharPtr str = NULL; + + switch (existing_text) { + case ExistingTextOption_append_semi : + str = "append separated by semicolon"; + break; + case ExistingTextOption_append_space : + str = "append separated by space"; + break; + case ExistingTextOption_append_colon : + str = "append separated by colon"; + break; + case ExistingTextOption_append_comma: + str = "append separated by comma"; + break; + case ExistingTextOption_append_none : + str = "append (no separator)"; + break; + case ExistingTextOption_prefix_semi : + str = "prefix separated by semicolon"; + break; + case ExistingTextOption_prefix_space : + str = "prefix separated by space"; + break; + case ExistingTextOption_prefix_colon : + str = "prefix separated by colon"; + break; + case ExistingTextOption_prefix_comma: + str = "prefix separated by comma"; + break; + case ExistingTextOption_prefix_none : + str = "prefix (no separator)"; + break; + case ExistingTextOption_leave_old : + str = "ignore new text when existing text is present"; + break; + case ExistingTextOption_replace_old : + str = "overwrite existing text"; + break; + case ExistingTextOption_add_qual : + str = "add new qual"; + break; + default: + str = "invalid existing_text option"; + break; + } + return str; +} + + +static CharPtr SummarizeTextMarker (TextMarkerPtr text_marker) +{ + CharPtr summ = NULL; + + if (IsTextMarkerEmpty (text_marker)) { + return NULL; + } else if (text_marker->choice == TextMarker_free_text) { + summ = StringSave (text_marker->data.ptrvalue); + } else if (text_marker->choice == TextMarker_digits) { + summ = StringSave ("numbers"); + } else if (text_marker->choice == TextMarker_letters) { + summ = StringSave ("letters"); + } + return summ; +} + + +NLM_EXTERN CharPtr SummarizeTextPortion (TextPortionPtr text_portion) +{ + CharPtr summ = NULL; + CharPtr left_fmt = NULL, right_fmt = NULL; + CharPtr left_text = NULL, right_text = NULL; + Int4 len = 6; + + if (text_portion == NULL + || (IsTextMarkerEmpty (text_portion->left_marker) + && IsTextMarkerEmpty (text_portion->right_marker))) { + summ = StringSave ("entire text"); + } else { + left_text = SummarizeTextMarker(text_portion->left_marker); + right_text = SummarizeTextMarker(text_portion->right_marker); + + if (text_portion->inside) { + if (left_text != NULL) { + if (text_portion->include_left) { + left_fmt = "starting with "; + } else { + left_fmt = "just after "; + } + len += StringLen (left_fmt) + StringLen (left_text) + 3; + } + if (right_text != NULL) { + if (text_portion->include_right) { + right_fmt = "up to and including "; + } else { + right_fmt = "up to "; + } + len += StringLen (right_fmt) + StringLen (right_text) + 3; + if (left_fmt != NULL) { + len += 2; + } + } + if (left_fmt == NULL && right_fmt == NULL) { + summ = StringSave ("entire text"); + } else { + summ = (CharPtr) MemNew (sizeof (Char) * len); + StringCat (summ, "text "); + if (left_fmt != NULL) { + StringCat (summ, left_fmt); + StringCat (summ, "'"); + StringCat (summ, left_text); + StringCat (summ, "'"); + if (right_fmt != NULL) { + StringCat (summ, ", "); + } + } + if (right_fmt != NULL) { + StringCat (summ, right_fmt); + StringCat (summ, "'"); + StringCat (summ, right_text); + StringCat (summ, "'"); + } + } + } else { + if (right_text != NULL) { + if (text_portion->include_right) { + right_fmt = "starting with "; + } else { + right_fmt = "after "; + } + len += StringLen (right_fmt) + StringLen (right_text) + 3; + } + if (left_text != NULL) { + if (text_portion->include_left) { + left_fmt = "up to and including "; + } else { + left_fmt = "before "; + } + len += StringLen (left_fmt) + StringLen (left_text) + 3; + if (right_fmt != NULL) { + len += 5; + } + } + + if (left_fmt == NULL && right_fmt == NULL) { + summ = StringSave ("entire text"); + } else { + summ = (CharPtr) MemNew (sizeof (Char) * len); + StringCat (summ, "text "); + if (right_fmt != NULL) { + StringCat (summ, right_fmt); + StringCat (summ, "'"); + StringCat (summ, right_text); + StringCat (summ, "'"); + if (left_fmt != NULL) { + StringCat (summ, " and "); + } + } + if (left_fmt != NULL) { + StringCat (summ, left_fmt); + StringCat (summ, "'"); + StringCat (summ, left_text); + StringCat (summ, "'"); + } + } + } + left_text = MemFree (left_text); + right_text = MemFree (right_text); + } + return summ; +} + + +const CharPtr kTaxnameAfterBinomialString = "Taxname after binomial"; + + +static CharPtr SummarizeParseSrcGeneralId (ValNodePtr vnp) +{ + CharPtr summ = NULL; + CharPtr fmt = "general ID %s tag"; + + if (vnp == NULL) { + return StringSave ("invalid id"); + } + switch (vnp->choice) { + case ParseSrcGeneralId_whole_text: + summ = StringSave ("entire general ID"); + break; + case ParseSrcGeneralId_db: + summ = StringSave ("general ID database"); + break; + case ParseSrcGeneralId_tag: + if (vnp->data.ptrvalue == NULL || StringHasNoText (vnp->data.ptrvalue)) { + summ = StringSave ("general ID tag"); + } else { + summ = (CharPtr) MemNew (sizeof (Char) * (StringLen (fmt) + StringLen (vnp->data.ptrvalue))); + sprintf (summ, fmt, vnp->data.ptrvalue); + } + break; + default: + summ = StringSave ("invalid id"); + break; + } + return summ; +} + + +NLM_EXTERN CharPtr SummarizeParseSrc (ValNodePtr src) +{ + CharPtr summ = NULL; + CharPtr fmt = "structured comment field %s"; + ParseSrcOrgPtr src_org; + Boolean need_to_save = TRUE; + + if (src != NULL) { + switch (src->choice) { + case ParseSrc_defline: + summ = "defline"; + break; + case ParseSrc_flatfile: + summ = "flat file"; + break; + case ParseSrc_local_id: + summ = "local ID"; + break; + case ParseSrc_org: + src_org = (ParseSrcOrgPtr) src->data.ptrvalue; + if (src_org != NULL) { + if (src_org->field != NULL) { + if (src_org->field->choice == ParseSrcOrgChoice_taxname_after_binomial) { + summ = kTaxnameAfterBinomialString; + } else if (src_org->field->choice == ParseSrcOrgChoice_source_qual) { + summ = GetSourceQualName (src_org->field->data.intvalue); + } + } + } + break; + case ParseSrc_comment: + summ = "comment"; + break; + case ParseSrc_bankit_comment: + summ = "BankIT comment"; + break; + case ParseSrc_structured_comment: + if (!StringHasNoText (src->data.ptrvalue)) { + summ = (CharPtr) MemNew (sizeof (Char) * (StringLen (src->data.ptrvalue) + StringLen (fmt))); + sprintf (summ, fmt, src->data.ptrvalue); + need_to_save = FALSE; + } + break; + case ParseSrc_file_id: + summ = "file ID"; + break; + case ParseSrc_general_id: + summ = SummarizeParseSrcGeneralId(src->data.ptrvalue); + need_to_save = FALSE; + break; + } + } + if (summ == NULL) { + summ = StringSave ("missing field"); + } else if (need_to_save) { + summ = StringSave (summ); + } + return summ; +} + + +NLM_EXTERN CharPtr SummarizeParseDst (ValNodePtr dst) +{ + CharPtr summ = NULL; + CharPtr fmt = "%s %s"; + CharPtr feature, field; + ParseDstOrgPtr dst_org; + Boolean need_to_save = TRUE; + FeatureFieldLegalPtr ffp; + + if (dst != NULL) { + switch (dst->choice) { + case ParseDest_defline: + summ = "defline"; + break; + case ParseDest_org: + dst_org = (ParseDstOrgPtr) dst->data.ptrvalue; + if (dst_org != NULL) { + if (dst_org->field != NULL) { + switch (dst_org->field->choice) { + case SourceQualChoice_textqual: + summ = GetSourceQualName (dst_org->field->data.intvalue); + break; + case SourceQualChoice_location: + summ = "location"; + break; + case SourceQualChoice_origin: + summ = "origin"; + break; + } + } + } + break; + case ParseDest_featqual: + ffp = (FeatureFieldLegalPtr) dst->data.ptrvalue; + if (ffp != NULL) { + feature = GetFeatureNameFromFeatureType (ffp->type); + field = GetFeatQualName (ffp->field); + summ = (CharPtr) MemNew (sizeof (Char) * (StringLen (fmt) + StringLen (feature) + StringLen (field))); + sprintf (summ, fmt, feature, field); + need_to_save = FALSE; + } + break; + case ParseDest_dbxref: + summ = "dbxref"; + break; + } + } + if (summ == NULL) { + summ = StringSave ("missing field"); + } else if (need_to_save) { + summ = StringSave (summ); + } + return summ; +} + + +/* summarizing AECR actions */ +static CharPtr SummarizeFieldPairType (ValNodePtr vnp, CharPtr connect_word) +{ + FeatureFieldPairPtr ffp; + CDSGeneProtFieldPairPtr cgp; + SourceQualPairPtr quals; + MolinfoFieldPairPtr m_fields; + RnaQualPairPtr rna_quals; + CharPtr str = NULL; + CharPtr from_label = NULL, to_label = NULL; + CharPtr label_fmt = "%s %s %s"; + CharPtr type_label_fmt = "%s %s %s %s"; + CharPtr label = NULL; + + if (connect_word == NULL) { + connect_word = "to"; + } + if (vnp == NULL) { + str = StringSave ("missing field"); + } else { + switch (vnp->choice) { + case FieldPairType_source_qual: + if (vnp->data.ptrvalue != NULL) { + quals = (SourceQualPairPtr) vnp->data.ptrvalue; + from_label = GetSourceQualName (quals->field_from); + to_label = GetSourceQualName (quals->field_to); + } + if (from_label != NULL && to_label != NULL) { + str = (CharPtr) MemNew (sizeof (Char) * + (StringLen (from_label) + StringLen (connect_word) + StringLen (to_label) + + 3)); + sprintf (str, "%s %s %s", from_label, connect_word, to_label); + } else { + str = StringSave ("missing field"); + } + break; + case FieldPairType_feature_field: + ffp = (FeatureFieldPairPtr) vnp->data.ptrvalue; + if (ffp == NULL || ffp->field_from == NULL || ffp->field_to == NULL) { + str = StringSave ("missing field"); + } else { + label = GetFeatureNameFromFeatureType (ffp->type); + from_label = FeatureFieldLabel (label, ffp->field_from); + to_label = FeatureFieldLabel (label, ffp->field_to); + str = (CharPtr) MemNew (sizeof (Char) * + (StringLen (label_fmt) + + StringLen (from_label) + StringLen (to_label) + + StringLen (connect_word))); + sprintf (str, label_fmt, from_label, connect_word, to_label); + from_label = MemFree (from_label); + to_label = MemFree (to_label); + } + break; + case FieldPairType_cds_gene_prot: + cgp = (CDSGeneProtFieldPairPtr) vnp->data.ptrvalue; + from_label = CDSGeneProtNameFromField (cgp->field_from); + to_label = CDSGeneProtNameFromField (cgp->field_to); + str = (CharPtr) MemNew (sizeof (Char) * + StringLen (from_label) + StringLen (connect_word) + StringLen (to_label) + + 3); + sprintf (str, "%s %s %s", from_label, connect_word, to_label); + break; + case FieldPairType_molinfo_field: + m_fields = (MolinfoFieldPairPtr) vnp->data.ptrvalue; + from_label = NULL; + to_label = NULL; + label = NULL; + switch (m_fields->choice) { + case MolinfoFieldPair_molecule: + from_label = BiomolNameFromBiomol (BiomolFromMoleculeType (((MolinfoMoleculePairPtr) m_fields->data.ptrvalue)->from)); + to_label = BiomolNameFromBiomol (BiomolFromMoleculeType (((MolinfoMoleculePairPtr) m_fields->data.ptrvalue)->to)); + label = "molecule"; + break; + case MolinfoFieldPair_technique: + from_label = TechNameFromTech (TechFromTechniqueType (((MolinfoTechniquePairPtr) m_fields->data.ptrvalue)->from)); + to_label = TechNameFromTech (TechFromTechniqueType (((MolinfoTechniquePairPtr) m_fields->data.ptrvalue)->to)); + label = "technique"; + break; + case MolinfoFieldPair_completedness: + from_label = CompletenessNameFromCompleteness (CompletenessFromCompletednessType (((MolinfoCompletednessPairPtr) m_fields->data.ptrvalue)->from)); + to_label = CompletenessNameFromCompleteness (CompletenessFromCompletednessType (((MolinfoCompletednessPairPtr) m_fields->data.ptrvalue)->to)); + label = "completeness"; + break; + case MolinfoFieldPair_mol_class: + from_label = MolNameFromMol (MolFromMoleculeClassType (((MolinfoMolClassPairPtr) m_fields->data.ptrvalue)->from)); + to_label = MolNameFromMol (MolFromMoleculeClassType (((MolinfoMolClassPairPtr) m_fields->data.ptrvalue)->to)); + label = "class"; + break; + case MolinfoFieldPair_topology: + from_label = TopologyNameFromTopology (TopologyFromTopologyType (((MolinfoTopologyPairPtr) m_fields->data.ptrvalue)->from)); + to_label = TopologyNameFromTopology (TopologyFromTopologyType (((MolinfoTopologyPairPtr) m_fields->data.ptrvalue)->to)); + label = "topology"; + break; + case MolinfoFieldPair_strand: + from_label = StrandNameFromStrand (StrandFromStrandType (((MolinfoStrandPairPtr) m_fields->data.ptrvalue)->from)); + to_label = StrandNameFromStrand (StrandFromStrandType (((MolinfoStrandPairPtr) m_fields->data.ptrvalue)->to)); + label = "strand"; + break; + } + if (from_label == NULL) { + from_label = "Unknown value"; + } + if (to_label == NULL) { + to_label = "Unknown value"; + } + if (label == NULL) { + label = "Unknown molinfo field"; + } + str = (CharPtr) MemNew (sizeof (Char) * (StringLen (type_label_fmt) + + StringLen (label) + + StringLen (from_label) + + StringLen (to_label) + + StringLen (connect_word))); + sprintf (str, type_label_fmt, label, from_label, connect_word, to_label); + break; + case FieldPairType_rna_field: + if (vnp->data.ptrvalue != NULL) { + rna_quals = (RnaQualPairPtr) vnp->data.ptrvalue; + label = SummarizeRnaType (rna_quals->type); + from_label = GetNameForRnaField (rna_quals->field_from); + to_label = GetNameForRnaField (rna_quals->field_to); + } + if (from_label != NULL && to_label != NULL && label != NULL) { + str = (CharPtr) MemNew (sizeof (Char) * (StringLen (type_label_fmt) + + StringLen (label) + + StringLen (from_label) + StringLen (connect_word) + StringLen (to_label))); + sprintf (str, type_label_fmt, label, from_label, connect_word, to_label); + } else { + str = StringSave ("missing field"); + } + label = MemFree (label); + break; + + default: + str = StringSave ("Invalid field type"); + break; + } + } + return str; +} + +static CharPtr SummarizeApplyAction (ApplyActionPtr a) +{ + CharPtr str = NULL; + CharPtr fmt = "Apply %s to %s (%s)"; + CharPtr nontextqual_fmt = "Apply %s (%s)"; + CharPtr field, existing_text; + if (a == NULL) { + str = StringSave ("No action"); + } else if (a->value == NULL || a->field == NULL) { + str = StringSave ("Invalid action"); + } else { + field = SummarizeFieldType (a->field); + existing_text = SummarizeExistingText (a->existing_text); + if (IsFieldTypeNonText (a->field)) { + str = (CharPtr) MemNew (sizeof (Char) * StringLen (nontextqual_fmt) + StringLen (field) + StringLen (existing_text)); + sprintf (str, nontextqual_fmt, field, existing_text); + } else { + str = (CharPtr) MemNew (sizeof (Char) * (StringLen (fmt) + StringLen (a->value) + StringLen (field) + StringLen (existing_text))); + sprintf (str, fmt, a->value, field, existing_text); + } + field = MemFree (field); + } + return str; +} -static Int4 num_convert_feature_table_lines = sizeof (conversion_functions) / sizeof (ConvertFeatTableData); -static Int4 GetConversionFunctionTableLine (Uint2 seqfeat_from, Uint2 featdef_from, Uint2 seqfeat_to, Uint2 featdef_to) +static CharPtr SummarizeEditAction (EditActionPtr a) { - Int4 i, table_line_num = -1; + CharPtr str = NULL; + CharPtr fmt = "Edit %s replace '%s'%s with '%s'"; + CharPtr case_insensitive = " (case insensitive)"; + CharPtr field; + Int4 len; - for (i = 0; i < num_convert_feature_table_lines && table_line_num == -1; i++) - { - if ((conversion_functions[i].seqfeat_from == 0 || conversion_functions[i].seqfeat_from == seqfeat_from) - && (conversion_functions[i].featdef_from == FEATDEF_ANY || conversion_functions[i].featdef_from == featdef_from) - && (conversion_functions[i].seqfeat_to == 0 || conversion_functions[i].seqfeat_to == seqfeat_to) - && (conversion_functions[i].featdef_to == FEATDEF_ANY || conversion_functions[i].featdef_to == featdef_to)) - { - table_line_num = i; + if (a == NULL) { + str = StringSave ("No action"); + } else if (a->field == NULL || a->field == NULL || a->edit == NULL || a->edit->find_txt == NULL) { + str = StringSave ("Invalid action"); + } else { + field = SummarizeFieldType (a->field); + len = StringLen (fmt) + StringLen (field) + StringLen (a->edit->find_txt) + StringLen (a->edit->repl_txt); + if (a->edit->case_insensitive) { + len += StringLen (case_insensitive); } + + str = (CharPtr) MemNew (sizeof (Char) * len); + sprintf (str, fmt, field, a->edit->find_txt, + a->edit->case_insensitive ? case_insensitive : "", + a->edit->repl_txt == NULL ? "" : a->edit->repl_txt); + field = MemFree (field); } - return table_line_num; + return str; } -NLM_EXTERN Boolean IsConversionSupported (Uint2 type_from, Uint2 type_to) +static CharPtr SummarizeConvertAction (ConvertActionPtr a) { - Int4 line; - Uint2 featdef_from, featdef_to, seqfeat_from, seqfeat_to; + CharPtr str = NULL; + CharPtr fmt = "Convert %s (%s)"; + CharPtr fields, existing_text; - featdef_from = GetFeatdefFromFeatureType (type_from); - seqfeat_from = FindFeatFromFeatDefType (featdef_from); - featdef_to = GetFeatdefFromFeatureType (type_to); - seqfeat_to = FindFeatFromFeatDefType (featdef_to); - line = GetConversionFunctionTableLine (seqfeat_from, featdef_from, seqfeat_to, featdef_to); - if (line > -1 && conversion_functions[line].func != NULL) { - return TRUE; + if (a == NULL) { + str = StringSave ("No action"); + } else if (a->fields == NULL || a->fields == NULL) { + str = StringSave ("Invalid action"); } else { - return FALSE; + fields = SummarizeFieldPairType (a->fields, "to"); + existing_text = SummarizeExistingText (a->existing_text); + str = (CharPtr) MemNew (sizeof (Char) * (StringLen (fmt) + StringLen (fields) + StringLen (existing_text))); + sprintf (str, fmt, fields, existing_text); + fields = MemFree (fields); } + return str; } -static CharPtr GetFeatureTextForLogging (SeqFeatPtr sfp) +static CharPtr SummarizeCopyAction (CopyActionPtr a) { - ValNode vn; - Int4 len; - CharPtr txt = NULL; + CharPtr str = NULL; + CharPtr fmt = "Copy %s (%s)"; + CharPtr fields, existing_text; - MemSet (&vn, 0, sizeof (ValNode)); - vn.choice = OBJ_SEQFEAT; - vn.data.ptrvalue = sfp; - txt = GetDiscrepancyItemText (&vn); - if (txt == NULL) { - txt = StringSave ("(null)"); + if (a == NULL) { + str = StringSave ("No action"); + } else if (a->fields == NULL) { + str = StringSave ("Invalid action"); } else { - len = StringLen (txt); - if (len > 0 && txt[len - 1] == '\n') { - txt[len - 1] = 0; - } + fields = SummarizeFieldPairType (a->fields, "to"); + existing_text = SummarizeExistingText (a->existing_text); + str = (CharPtr) MemNew (sizeof (Char) * (StringLen (fmt) + StringLen (fields) + StringLen (existing_text))); + sprintf (str, fmt, fields, existing_text); + fields = MemFree (fields); } - return txt; + return str; } -static Int4 ApplyConvertFeatureActionToSeqEntry (ConvertFeatureActionPtr action, SeqEntryPtr sep, FILE *log_fp) +static CharPtr SummarizeSwapAction (SwapActionPtr a) { - ConvertAndRemoveFeatureCollectionData d; - ValNodePtr vnp; - SeqFeatPtr sfp, sfp_copy; - Int4 num_affected = 0, table_line; - Uint2 seqfeat_from, featdef_from, seqfeat_to, featdef_to; - /* variables for logging */ - CharPtr txt_old, txt_new; + CharPtr str = NULL; + CharPtr fmt = "Swap %s"; + CharPtr fields; - if (action == NULL) return 0; + if (a == NULL) { + str = StringSave ("No action"); + } else if (a->fields == NULL) { + str = StringSave ("Invalid action"); + } else { + fields = SummarizeFieldPairType (a->fields, "with"); + str = (CharPtr) MemNew (sizeof (Char) * (StringLen (fmt) + StringLen (fields))); + sprintf (str, fmt, fields); + fields = MemFree (fields); + } + return str; +} - featdef_from = GetFeatdefFromFeatureType (action->type_from); - seqfeat_from = FindFeatFromFeatDefType(featdef_from); - featdef_to = GetFeatdefFromFeatureType (action->type_to); - seqfeat_to = FindFeatFromFeatDefType (featdef_to); - table_line = GetConversionFunctionTableLine (seqfeat_from, featdef_from, seqfeat_to, featdef_to); - if (table_line < 0 || conversion_functions[table_line].func == NULL) { - return 0; + +static CharPtr SummarizeCapChange (Uint1 cap_change) +{ + CharPtr rval = NULL; + + switch (cap_change) { + case Cap_change_tolower: + rval = StringSave ("change capitalization to lower"); + break; + case Cap_change_toupper: + rval = StringSave ("change capitalization to upper"); + break; + case Cap_change_firstcap: + rval = StringSave ("capitalize first letter, remaining lower case"); + break; + case Cap_change_firstcaprestnochange: + rval = StringSave ("capitalize first letter, do not change other characters"); + break; } + return rval; +} - d.featdef = GetFeatdefFromFeatureType (action->type_from); - d.constraint_set = action->src_feat_constraint; - d.feature_list = NULL; - VisitFeaturesInSep (sep, &d, ConvertAndRemoveFeatureCollectionCallback); - for (vnp = d.feature_list; vnp != NULL; vnp = vnp->next) { - sfp = vnp->data.ptrvalue; - if (sfp != NULL) { - sfp_copy = (SeqFeatPtr) AsnIoMemCopy (sfp, (AsnReadFunc) SeqFeatAsnRead, (AsnWriteFunc) SeqFeatAsnWrite); - /* add subtype value to copy */ - sfp_copy->idx.subtype = sfp->idx.subtype; - sfp_copy->next = sfp->next; - sfp->next = sfp_copy; +NLM_EXTERN CharPtr SummarizeTextTransform (ValNodePtr transform) +{ + FieldEditPtr edit; + CharPtr replace_fmt = "replace '%s' with '%s'"; + CharPtr remove_fmt = "remove %s"; + CharPtr case_insensitive = " (case insensitive)"; + CharPtr rval = NULL, tmp; + Int4 len = 0; - if (conversion_functions[table_line].func (sfp_copy, featdef_to, action->dst_options)) { - ApplyConvertFeatureSrcOptions (sfp_copy, action->src_options, action->leave_original); - num_affected ++; - if (!action->leave_original) { - sfp->idx.deleteme = TRUE; + if (transform == NULL) { + return NULL; + } + + switch (transform->choice) { + case TextTransform_edit: + if ((edit = (FieldEditPtr) transform->data.ptrvalue) != NULL) { + len = StringLen (replace_fmt) + StringLen (edit->find_txt) + StringLen (edit->repl_txt); + if (edit->case_insensitive) { + len += StringLen (case_insensitive); } - if (log_fp != NULL) { - txt_old = GetFeatureTextForLogging (sfp); - txt_new = GetFeatureTextForLogging (sfp_copy); - if (action->leave_original) { - fprintf (log_fp, "Added new feature %s based on %s\n", txt_new, txt_old); - } else { - fprintf (log_fp, "Replaced feature %s with %s\n", txt_old, txt_new); - } - txt_old = MemFree (txt_old); - txt_new = MemFree (txt_new); + rval = (CharPtr) MemNew (sizeof (Char) * len); + sprintf (rval, replace_fmt, edit->find_txt == NULL ? "" : edit->find_txt, edit->repl_txt == NULL ? "" : edit->repl_txt); + if (edit->case_insensitive) { + StringCat (rval, case_insensitive); } - sfp_copy->idx.subtype = 0; + } + break; + case TextTransform_caps: + rval = SummarizeCapChange(transform->data.intvalue); + break; + case TextTransform_remove: + tmp = SummarizeTextPortion (transform->data.ptrvalue); + rval = (CharPtr) MemNew (sizeof (Char) * (StringLen (remove_fmt) + StringLen (tmp))); + sprintf (rval, remove_fmt, tmp); + tmp = MemFree (tmp); + break; + } + return rval; +} + + +static CharPtr SummarizeTextTransformList (ValNodePtr text_transform) +{ + ValNodePtr str_list = NULL, vnp; + Int4 len = 0; + CharPtr rval = NULL, tmp; + + for (vnp = text_transform; vnp != NULL; vnp = vnp->next) { + tmp = SummarizeTextTransform (vnp); + if (tmp != NULL) { + ValNodeAddPointer (&str_list, 0, tmp); + len += StringLen (tmp) + 3; + } + } + + rval = (CharPtr) MemNew (sizeof (Char) * len); + for (vnp = str_list; vnp != NULL; vnp = vnp->next) { + StringCat (rval, vnp->data.ptrvalue); + if (vnp->next != NULL) { + StringCat (rval, ", "); + } + } + str_list = ValNodeFreeData (str_list); + return rval; +} + + +static CharPtr SummarizeAECRParseAction (AECRParseActionPtr a) +{ + CharPtr str = NULL; + CharPtr fmt = "Parse %s%s%s from %s(%s)"; + CharPtr fields, existing_text, text_portion, transform; + + if (a == NULL) { + str = StringSave ("No action"); + } else if (a->fields == NULL) { + str = StringSave ("Invalid action"); + } else { + fields = SummarizeFieldPairType (a->fields, "to"); + existing_text = SummarizeExistingText (a->existing_text); + text_portion = SummarizeTextPortion (a->portion); + transform = SummarizeTextTransformList(a->transform); + str = (CharPtr) MemNew (sizeof (Char) * (StringLen (fmt) + StringLen (text_portion) + StringLen (transform) + StringLen (fields) + StringLen (existing_text))); + sprintf (str, fmt, text_portion, transform == NULL ? "" : " ", transform == NULL ? "" : transform, fields, existing_text); + fields = MemFree (fields); + text_portion = MemFree (text_portion); + transform = MemFree (transform); + } + return str; +} + + +static CharPtr SummarizeRemoveAction (RemoveActionPtr a) +{ + CharPtr str = NULL; + CharPtr fmt = "Remove %s"; + CharPtr field; + + if (a == NULL) { + str = StringSave ("No action"); + } else if (a->field == NULL || a->field == NULL) { + str = StringSave ("Invalid action"); + } else { + field = SummarizeFieldType (a->field); + str = (CharPtr) MemNew (sizeof (Char) * (StringLen (fmt) + StringLen (field))); + sprintf (str, fmt, field); + field = MemFree (field); + } + return str; +} + + +NLM_EXTERN CharPtr SummarizeAECRAction (AECRActionPtr a) +{ + CharPtr str = NULL, act = NULL, constraint = NULL; + if (a == NULL) { + str = StringSave ("No action"); + } else if (a->action == NULL) { + str = StringSave ("Invalid command"); + } else { + switch (a->action->choice) { + case ActionChoice_apply: + act = SummarizeApplyAction (a->action->data.ptrvalue); + break; + case ActionChoice_edit: + act = SummarizeEditAction (a->action->data.ptrvalue); + break; + case ActionChoice_convert: + act = SummarizeConvertAction (a->action->data.ptrvalue); + break; + case ActionChoice_copy: + act = SummarizeCopyAction (a->action->data.ptrvalue); + break; + case ActionChoice_swap: + act = SummarizeSwapAction (a->action->data.ptrvalue); + break; + case ActionChoice_remove: + act = SummarizeRemoveAction (a->action->data.ptrvalue); + break; + case ActionChoice_parse: + act = SummarizeAECRParseAction (a->action->data.ptrvalue); + break; + } + if (act == NULL) { + str = StringSave ("Invalid action"); + } else { + constraint = SummarizeConstraintSet (a->constraint); + if (constraint == NULL) { + str = act; } else { - sfp_copy->idx.deleteme = TRUE; + str = (CharPtr) MemNew (sizeof (Char) * (StringLen(act) + 2 + StringLen (constraint))); + sprintf (str, "%s %s", act, constraint); + act = MemFree (act); + constraint = MemFree (constraint); } } } - DeleteMarkedObjects (ObjMgrGetEntityIDForChoice(sep), 0, NULL); - RenormalizeNucProtSets (sep, TRUE); - return num_affected; + return str; } -/* Functions for editing feature locations */ -static Boolean DoesStrandMatch (Int4 strand_choice, Uint1 strand_val) +NLM_EXTERN CharPtr SummarizeParseAction (ParseActionPtr p) { - Boolean rval = FALSE; - - switch (strand_choice) - { - case Feature_location_strand_from_any: - rval = TRUE; + CharPtr field_from = NULL, field_to = NULL; + CharPtr existing_text = NULL, text_portion = NULL, transform; + CharPtr summ = NULL; + CharPtr fmt = "Parse %s from %s to %s%s%s (%s)"; + + if (p == NULL) { + summ = StringSave ("No action"); + } else { + field_from = SummarizeParseSrc (p->src); + field_to = SummarizeParseDst (p->dest); + existing_text = SummarizeExistingText (p->existing_text); + text_portion = SummarizeTextPortion (p->portion); + transform = SummarizeTextTransformList(p->transform); + + summ = (CharPtr) MemNew (sizeof (Char) * (StringLen (fmt) + + StringLen (text_portion) + + StringLen (field_from) + + StringLen (field_to) + + StringLen (transform) + + StringLen (existing_text))); + sprintf (summ, fmt, text_portion, field_from, field_to, transform == NULL ? "" : " ", transform == NULL ? "" : transform, existing_text); + text_portion = MemFree (text_portion); + field_from = MemFree (field_from); + field_to = MemFree (field_to); + } + return summ; +} + + +static CharPtr SummarizeAutodefClauseListType (Uint2 clause_list_type) +{ + CharPtr str = "complete sequence"; + + switch (clause_list_type) { + case Autodef_list_type_feature_list: + str = "list features"; break; - case Feature_location_strand_from_unknown: - if (strand_val == Seq_strand_unknown) - { - rval = TRUE; - } + case Autodef_list_type_complete_sequence: + str = "complete sequence"; break; - case Feature_location_strand_from_plus: - if (strand_val != Seq_strand_minus) - { - rval = TRUE; - } + case Autodef_list_type_complete_genome: + str = "complete genome"; break; - case Feature_location_strand_from_minus: - if (strand_val == Seq_strand_minus) - { - rval = TRUE; + } + return str; +} + + +NLM_EXTERN CharPtr SummarizeAutodefAction (AutodefActionPtr autodef) +{ + CharPtr label = NULL, mod_name; + CharPtr str = NULL; + CharPtr fmt = "Autodef %s"; + CharPtr modifiers_fmt = " with modifier"; + Int4 len; + ValNodePtr mod_names = NULL, vnp; + + if (autodef == NULL) { + str = StringSave ("No action"); + } else { + label = SummarizeAutodefClauseListType (autodef->clause_list_type); + len = StringLen (fmt) + StringLen (label); + if (autodef->modifiers != NULL) { + len += StringLen (modifiers_fmt) + 2; + for (vnp = autodef->modifiers; vnp != NULL; vnp = vnp->next) { + mod_name = GetSourceQualName (vnp->data.intvalue); + len += StringLen (mod_name) + 3; + ValNodeAddPointer (&mod_names, 0, mod_name); } - break; - case Feature_location_strand_from_both: - if (strand_val == Seq_strand_both) - { - rval = TRUE; + } + + str = (CharPtr) MemNew (sizeof (Char) * (len + 1)); + sprintf (str, fmt, label); + + if (autodef->modifiers != NULL) { + StringCat (str, modifiers_fmt); + if (autodef->modifiers->next != NULL) { + StringCat (str, "s"); } - break; + for (vnp = mod_names; vnp != NULL; vnp = vnp->next) { + StringCat (str, " "); + StringCat (str, vnp->data.ptrvalue); + if (vnp->next != NULL) { + StringCat (str, ","); + } + } + } + + mod_names = ValNodeFree (mod_names); } - return rval; + + return str; } -static Uint1 GetNewStrandValue (Int4 strand_choice, Uint1 strand_val) +NLM_EXTERN CharPtr SummarizeRemoveDescriptorAction (RemoveDescriptorActionPtr a) { - Uint1 rval = Seq_strand_unknown; - - switch (strand_choice) - { - case Feature_location_strand_to_reverse: - switch (strand_val) - { - case Seq_strand_plus: - case Seq_strand_unknown: - rval = Seq_strand_minus; - break; - case Seq_strand_minus: - rval = Seq_strand_plus; - break; - default: - rval = strand_val; - break; - } - break; - case Feature_location_strand_to_unknown: - rval = Seq_strand_unknown; - break; - case Feature_location_strand_to_plus: - rval = Seq_strand_plus; - break; - case Feature_location_strand_to_minus: - rval = Seq_strand_minus; - break; - case Feature_location_strand_to_both: - rval = Seq_strand_both; - break; - } - return rval; + CharPtr label = NULL; + CharPtr constraint, str; + CharPtr fmt = "Remove %s"; + CharPtr constraint_fmt = "Remove %s descriptors %s"; + + if (a == NULL) { + str = StringSave ("No action"); + } else { + label = GetDescriptorNameFromDescriptorType (a->type); + constraint = SummarizeConstraintSet (a->constraint); + if (constraint == NULL) { + str = (CharPtr) MemNew (sizeof (Char) * (StringLen (fmt) + StringLen (label))); + sprintf (str, fmt, label); + } else { + str = (CharPtr) MemNew (sizeof (Char) * (StringLen (constraint_fmt) + StringLen (label) + StringLen (constraint))); + sprintf (str, constraint_fmt, label, constraint); + constraint = MemFree (constraint); + } + } + + return str; } -static Boolean ConvertLocationStrand (SeqLocPtr slp, Int4 fromStrand, Int4 toStrand) +NLM_EXTERN CharPtr SummarizeFixPubCapsAction (FixPubCapsActionPtr a) { - SeqLocPtr loc; - PackSeqPntPtr psp; - SeqBondPtr sbp; - SeqIntPtr sinp; - SeqPntPtr spp; - Boolean rval = FALSE; - Uint1 strand_orig; + CharPtr constraint = NULL; + Int4 len = 0; + CharPtr descriptions[] = {"affiliation", "title", "authors", "affiliation country"}; + CharPtr punct_only = " (punctuation only)"; + Boolean present[4]; + Int4 i, first = 4, last = 0, num_items = 0; + CharPtr summ = NULL; - while (slp != NULL) { - switch (slp->choice) { - case SEQLOC_NULL : - break; - case SEQLOC_EMPTY : - case SEQLOC_WHOLE : - break; - case SEQLOC_INT : - sinp = (SeqIntPtr) slp->data.ptrvalue; - if (sinp != NULL && DoesStrandMatch (fromStrand, sinp->strand)) - { - strand_orig = sinp->strand; - sinp->strand = GetNewStrandValue (toStrand, sinp->strand); - if (strand_orig != sinp->strand) { - rval = TRUE; - } - } - break; - case SEQLOC_PNT : - spp = (SeqPntPtr) slp->data.ptrvalue; - if (spp != NULL && DoesStrandMatch (fromStrand, spp->strand)) - { - strand_orig = spp->strand; - spp->strand = GetNewStrandValue (toStrand, spp->strand); - if (strand_orig != spp->strand) { - rval = TRUE; - } - } - break; - case SEQLOC_PACKED_PNT : - psp = (PackSeqPntPtr) slp->data.ptrvalue; - if (psp != NULL && DoesStrandMatch (fromStrand, psp->strand)) - { - strand_orig = psp->strand; - psp->strand = GetNewStrandValue (toStrand, psp->strand); - if (strand_orig != psp->strand) { - rval = TRUE; - } - } - break; - case SEQLOC_PACKED_INT : - case SEQLOC_MIX : - case SEQLOC_EQUIV : - loc = (SeqLocPtr) slp->data.ptrvalue; - while (loc != NULL) { - rval |= ConvertLocationStrand (loc, fromStrand, toStrand); - loc = loc->next; - } - break; - case SEQLOC_BOND : - sbp = (SeqBondPtr) slp->data.ptrvalue; - if (sbp != NULL) { - spp = (SeqPntPtr) sbp->a; - if (spp != NULL && DoesStrandMatch (fromStrand, spp->strand)) - { - strand_orig = spp->strand; - spp->strand = GetNewStrandValue (toStrand, spp->strand); - if (strand_orig != spp->strand) { - rval = TRUE; - } + if (a == NULL) { + return NULL; + } + + present[0] = a->affiliation; + present[1] = a->title; + present[2] = a->authors; + present[3] = a->affil_country; + + for (i = 0; i < 4; i++) { + if (present[i]) { + len += 6 + StringLen (descriptions[i]); + if (first == 4) { + first = i; + } + last = i; + num_items++; + } + } + + if (len > 0) { + if (a->punct_only) { + len += StringLen (punct_only); + } + constraint = SummarizeConstraintSet (a->constraint); + len += StringLen (constraint) + 14; + summ = (CharPtr) MemNew (sizeof (Char) * len); + sprintf (summ, "Fix pub "); + for (i = 0; i < 4; i++) { + if (present[i]) { + if (i != first) { + if (num_items > 2) { + StringCat (summ, ", "); } - spp = (SeqPntPtr) sbp->b; - if (spp != NULL && DoesStrandMatch (fromStrand, spp->strand)) - { - strand_orig = spp->strand; - spp->strand = GetNewStrandValue (toStrand, spp->strand); - if (strand_orig != spp->strand) { - rval = TRUE; - } + if (i == last) { + StringCat (summ, " and "); } } + StringCat (summ, descriptions[i]); + } + } + if (a->punct_only) { + StringCat (summ, punct_only); + } + if (constraint != NULL) { + StringCat (summ, " where "); + StringCat (summ, constraint); + } + constraint = MemFree (constraint); + } + + return summ; +} + + +NLM_EXTERN CharPtr SummarizeFixCapsAction (FixCapsActionPtr action) +{ + CharPtr summ = NULL, tmp; + CharPtr fmt = "Fix capitalization in %s source qualifier"; + + if (action == NULL) { + summ = StringSave ("Invalid action"); + } else { + switch (action->choice) { + case FixCapsAction_pub: + summ = SummarizeFixPubCapsAction (action->data.ptrvalue); break; - case SEQLOC_FEAT : + case FixCapsAction_src_country: + summ = StringSave ("Fix source country qualifier capitalization"); break; - default : + case FixCapsAction_mouse_strain: + summ = StringSave ("Fix capitalization in common Mus musculus strains"); + break; + case FixCapsAction_src_qual: + tmp = GetSourceQualName (action->data.intvalue); + summ = (CharPtr) MemNew (sizeof (Char) * (StringLen (tmp) + StringLen (fmt))); + sprintf (summ, fmt, tmp); + break; + default: + summ = StringSave ("Invalid action"); break; } - slp = slp->next; } - return rval; + return summ; } -static Boolean ApplyEditLocationStrandToSeqFeat (EditLocationStrandPtr edit, SeqFeatPtr sfp) +NLM_EXTERN CharPtr SummarizeFixFormatAction (FixFormatActionPtr action) { - Boolean rval = FALSE; - - if (edit == NULL || sfp == NULL) { - return FALSE; + CharPtr summ = NULL; + if (action == NULL) { + summ = StringSave ("Invalid action"); + } else { + switch (action->choice) { + case FixFormatAction_collection_date: + summ = StringSave ("Fix collection-date format"); + break; + case FixFormatAction_lat_lon: + summ = StringSave ("Fix lat-lon format"); + break; + case FixFormatAction_primers: + summ = StringSave ("Fix i in primer sequence"); + break; + case FixFormatAction_protein_name: + summ = StringSave ("Remove organism names from protein names"); + break; + default: + summ = StringSave ("Invalid action"); + break; + } } - - rval = ConvertLocationStrand (sfp->location, edit->strand_from, edit->strand_to); - return rval; + return summ; } -static Boolean At5EndOfSequence (SeqLocPtr slp, BioseqPtr bsp) +NLM_EXTERN CharPtr SummarizeRemoveDuplicateFeaturesAction (RemoveDuplicateFeatureActionPtr action) { - Uint1 strand; - Int4 start; - Boolean at_end = FALSE; + CharPtr summ = NULL; + CharPtr start_fmt = "Remove duplicate%s%s features"; + CharPtr feat_type; + CharPtr case_sensitive = "(case-sensitive)"; + CharPtr ignore_partials = "(ignore partials)"; + CharPtr remove_proteins = " and remove protein products"; - if (slp == NULL || bsp == NULL) return FALSE; + Int4 len = 0; - strand = SeqLocStrand (slp); + if (action == NULL) { + summ = StringSave ("Invalid action"); + } else { + len = StringLen (start_fmt); + if (action->type == Macro_feature_type_any) { + feat_type = ""; + } else { + feat_type = GetFeatureNameFromFeatureType (action->type); + } + len += StringLen (feat_type) + 1; + if (action->case_sensitive) { + len += StringLen (case_sensitive); + } + if (action->ignore_partials) { + len += StringLen (ignore_partials); + } + if (action->remove_proteins) { + len += StringLen (remove_proteins); + } - if (strand == Seq_strand_minus) { - start = SeqLocStop (slp); - if (start == bsp->length - 1) { - at_end = TRUE; + summ = (CharPtr) MemNew (sizeof (Char) * len); + sprintf (summ, start_fmt, action->type == Macro_feature_type_any ? "" : " ", feat_type); + if (action->case_sensitive) { + StringCat (summ, case_sensitive); } - } else { - start = SeqLocStart (slp); - if (start == 0) { - at_end = TRUE; + if (action->ignore_partials) { + StringCat (summ, ignore_partials); + } + if (action->remove_proteins) { + StringCat (summ, remove_proteins); } } - return at_end; + return summ; } -static Boolean HasGoodStartCodon (SeqFeatPtr sfp) + +NLM_EXTERN CharPtr GetSortOrderName (Uint2 order) { - ByteStorePtr bs; - CharPtr prot; - Boolean has_start = FALSE; + CharPtr rval = NULL; - if (sfp != NULL && sfp->data.choice == SEQFEAT_CDREGION) { - bs = ProteinFromCdRegionEx (sfp, TRUE, FALSE); - if (bs != NULL) { - prot = BSMerge (bs, NULL); - bs = BSFree (bs); - if (prot != NULL && *prot == 'M') { - has_start = TRUE; - } - prot = MemFree (prot); - } + switch (order) { + case Sort_order_short_to_long: + rval = "by length, short to long"; + break; + case Sort_order_long_to_short: + rval = "by length, long to short"; + break; + case Sort_order_alphabetical: + rval = "alphabetically"; + break; + default: + rval = "unknown order"; + break; } - return has_start; + return rval; } -static Boolean ApplyPartial5SetActionToSeqFeat (Partial5SetActionPtr action, SeqFeatPtr sfp) +NLM_EXTERN CharPtr SummarizeSortFieldsAction (SortFieldsActionPtr action) { - Boolean rval = FALSE; - Boolean make_partial = FALSE; - Uint1 strand; - BioseqPtr bsp; - CdRegionPtr crp; - Boolean partial5, partial3; + CharPtr label, order, constraint, summ; + CharPtr fmt = "Sort %s fields %s%s%s"; - if (action == NULL || sfp == NULL) return FALSE; - bsp = BioseqFindFromSeqLoc (sfp->location); - strand = SeqLocStrand (sfp->location); + label = SummarizeFieldType (action->field); + order = GetSortOrderName(action->order); + constraint = SummarizeConstraintSet (action->constraint); - switch (action->constraint) { - case Partial_5_set_constraint_all: - make_partial = TRUE; - break; - case Partial_5_set_constraint_at_end: - make_partial = At5EndOfSequence (sfp->location, bsp); - break; - case Partial_5_set_constraint_bad_start: - make_partial = HasGoodStartCodon (sfp); - break; - case Partial_5_set_constraint_frame_not_one: - if (sfp->data.choice == SEQFEAT_CDREGION - && (crp = sfp->data.value.ptrvalue) != NULL - && crp->frame != 0 && crp->frame != 1) { - make_partial = TRUE; + summ = (CharPtr) MemNew (sizeof (Char) * (StringLen (fmt) + StringLen (label) + StringLen(order) + StringLen (constraint))); + sprintf (summ, fmt, label, order, constraint == NULL ? "" : " where ", constraint == NULL ? "" : constraint); + label = MemFree (label); + constraint = MemFree (constraint); + + return summ; +} + + +NLM_EXTERN CharPtr SummarizeMolinfoBlockAction (MolinfoBlockPtr mib) +{ + CharPtr field_label, constraint, summ; + ValNodePtr field, field_strs = NULL, from_strs = NULL, vnp; + Int4 len = 11; + Int4 num_from = 0; + Int4 num_to = 0; + + if (mib == NULL) { + return NULL; + } + + + for (field = mib->to_list; field != NULL; field = field->next) { + field_label = GetSequenceQualName (field); + ValNodeAddPointer (&field_strs, 0, field_label); + len += StringLen (field_label) + 2; + num_to++; + } + + for (field = mib->from_list; field != NULL; field = field->next) { + field_label = GetSequenceQualName (field); + ValNodeAddPointer (&from_strs, 0, field_label); + len += StringLen (field_label) + 2; + num_from++; + } + + constraint = SummarizeConstraintSet (mib->constraint); + len += StringLen (constraint); + if (constraint != NULL || num_from > 0) { + len += 12; + } + + if (num_to > 1) { + len += 5; + } + if (num_from > 1) { + len += 5; + } + + summ = (CharPtr) MemNew (sizeof (Char) * len); + sprintf (summ, "Change to "); + for (vnp = field_strs; vnp != NULL; vnp = vnp->next) { + StringCat (summ, vnp->data.ptrvalue); + if (vnp->next != NULL) { + if (num_to > 2) { + if (vnp->next->next == NULL) { + StringCat (summ, ", and"); + } else { + StringCat (summ, ", "); + } + } else { + StringCat (summ, " and "); } - break; + } } - if (make_partial) { - CheckSeqLocForPartial (sfp->location, &partial5, &partial3); - if (!partial5) { - SetSeqLocPartial (sfp->location, TRUE, partial3); - if (action->extend && bsp != NULL) { - ExtendSeqLocToEnd (sfp->location, bsp, TRUE); + if (num_from > 0 || constraint != NULL) { + StringCat (summ, " where "); + } + + for (vnp = from_strs; vnp != NULL; vnp = vnp->next) { + StringCat (summ, vnp->data.ptrvalue); + if (vnp->next != NULL) { + if (num_from > 2) { + if (vnp->next->next == NULL && constraint == NULL) { + StringCat (summ, ", and"); + } else { + StringCat (summ, ", "); + } + } else if (constraint == NULL) { + StringCat (summ, " and "); + } else { + StringCat (summ, ", "); } - rval = TRUE; } } - return rval; + + if (constraint != NULL && num_from > 0) { + StringCat (summ, " and "); + } + + StringCat (summ, constraint); + + field_strs = ValNodeFreeData (field_strs); + from_strs = ValNodeFreeData (from_strs); + constraint = MemFree (constraint); + + return summ; + } -static Boolean ApplyClear5PartialToSeqFeat (Int4 action, SeqFeatPtr sfp) +/* summarizing constraints */ +static CharPtr GetStringLocationPhrase (Uint2 match_location, Boolean not_present) { - Boolean rval = FALSE, clear_partial = FALSE; - Boolean partial5, partial3; - - if (sfp == NULL) return FALSE; + CharPtr location_word = NULL; - switch (action) { - case Partial_5_clear_constraint_all: - clear_partial = TRUE; + switch (match_location) { + case String_location_contains : + if (not_present) { + location_word = "does not contain"; + } else { + location_word = "contains"; + } break; - case Partial_5_clear_constraint_not_at_end: - clear_partial = !At5EndOfSequence(sfp->location, BioseqFindFromSeqLoc (sfp->location)); + case String_location_equals : + if (not_present) { + location_word = "does not equal"; + } else { + location_word = "equals"; + } break; - case Partial_5_clear_constraint_good_start: - clear_partial = !HasGoodStartCodon(sfp); + case String_location_starts : + if (not_present) { + location_word = "does not start with"; + } else { + location_word = "starts with"; + } + break; + case String_location_ends : + if (not_present) { + location_word = "does not end with"; + } else { + location_word = "ends with"; + } + break; + case String_location_inlist : + if (not_present) { + location_word = "is not one of"; + } else { + location_word = "is one of"; + } break; } - if (clear_partial) { - CheckSeqLocForPartial (sfp->location, &partial5, &partial3); - if (partial5) { - SetSeqLocPartial (sfp->location, FALSE, partial3); - rval = TRUE; - } - } - return rval; + return location_word; } -static Boolean At3EndOfSequence (SeqLocPtr slp, BioseqPtr bsp) +static const CharPtr kCaseSensitive = "case-sensitive"; +static const CharPtr kWholeWord = "whole word"; + +NLM_EXTERN CharPtr SummarizeWordSubstitution (WordSubstitutionPtr word) { - Uint1 strand; - Int4 stop; - Boolean at_end = FALSE; - - if (slp == NULL || bsp == NULL) return FALSE; + CharPtr fmt = "allow '%s' to be replaced by '%s'"; + Int4 len = 0; + ValNodePtr vnp; + CharPtr summ = NULL; - strand = SeqLocStrand (slp); + if (word == NULL && word->synonyms == NULL) { + return NULL; + } - if (strand == Seq_strand_minus) { - stop = SeqLocStart (slp); - if (stop == 0) { - at_end = TRUE; - } - } else { - stop = SeqLocStop (slp); - if (stop == bsp->length - 1) { - at_end = TRUE; - } + len = StringLen (fmt) + StringLen (word->word); + for (vnp = word->synonyms; vnp != NULL; vnp = vnp->next) { + len += StringLen (vnp->data.ptrvalue) + 4; } - return at_end; -} + if (word->case_sensitive) { + len += StringLen (kCaseSensitive) + 3; + } + if (word->whole_word) { + len += StringLen (kWholeWord) + 3; + } -static Boolean HasGoodStopCodon (SeqFeatPtr sfp) -{ - ByteStorePtr bs; - CharPtr prot; - Boolean has_stop = FALSE; - if (sfp != NULL && sfp->data.choice == SEQFEAT_CDREGION) { - bs = ProteinFromCdRegionEx (sfp, TRUE, FALSE); - if (bs != NULL) { - prot = BSMerge (bs, NULL); - bs = BSFree (bs); - if (prot != NULL && prot[StringLen (prot) - 1] == '*') { - has_stop = TRUE; + summ = (CharPtr) MemNew (sizeof (Char) * len); + sprintf (summ, fmt, + word->word == NULL ? "" : word->word, + (word->synonyms == NULL || word->synonyms->data.ptrvalue == NULL) ? "" : word->synonyms->data.ptrvalue); + if (word->synonyms != NULL) { + for (vnp = word->synonyms->next; vnp != NULL; vnp = vnp->next) { + if (word->synonyms->next->next != NULL) { + StringCat (summ, ","); } - prot = MemFree (prot); + StringCat (summ, " "); + if (vnp->next == NULL) { + StringCat (summ, "and "); + } + StringCat (summ, "'"); + if (vnp->data.ptrvalue != NULL) { + StringCat (summ, vnp->data.ptrvalue); + } + StringCat (summ, "'"); } } - return has_stop; + if (word->case_sensitive) { + StringCat (summ, ", "); + StringCat (summ, kCaseSensitive); + } + if (word->whole_word) { + StringCat (summ, ", "); + StringCat (summ, kWholeWord); + } + + return summ; } -static Boolean ApplyPartial3SetActionToSeqFeat (Partial3SetActionPtr action, SeqFeatPtr sfp) +NLM_EXTERN CharPtr SummarizeStringConstraint (StringConstraintPtr constraint) { - Boolean rval = FALSE; - Boolean make_partial = FALSE; - Uint1 strand; - BioseqPtr bsp; - Boolean partial5, partial3; + CharPtr location_word = NULL; + CharPtr ignore_space = "ignore spaces"; + CharPtr ignore_punct = "ignore punctuation"; + CharPtr ignore_weasel = "ignore 'putative' synonyms"; + CharPtr str = NULL; + Int4 len; + CharPtr fmt = "%s '%s'"; + Boolean has_extra = FALSE; + WordSubstitutionPtr word; + ValNodePtr subst_words = NULL, vnp; + CharPtr tmp; - if (action == NULL || sfp == NULL) return FALSE; - bsp = BioseqFindFromSeqLoc (sfp->location); - strand = SeqLocStrand (sfp->location); + if (IsStringConstraintEmpty (constraint)) return NULL; - switch (action->constraint) { - case Partial_3_set_constraint_all: - make_partial = TRUE; - break; - case Partial_3_set_constraint_at_end: - make_partial = At3EndOfSequence (sfp->location, bsp); - break; - case Partial_3_set_constraint_bad_end: - make_partial = HasGoodStopCodon (sfp); - break; - } + if (constraint->match_text != NULL) { + location_word = GetStringLocationPhrase (constraint->match_location, constraint->not_present); + if (location_word == NULL) return NULL; + len = StringLen (location_word) + StringLen (constraint->match_text) + StringLen (fmt); + if (constraint->case_sensitive) { + len += StringLen (kCaseSensitive) + 3; + } + if (constraint->whole_word) { + len += StringLen (kWholeWord) + 3; + } + if (constraint->ignore_space) { + len += StringLen (ignore_space) + 3; + } + if (constraint->ignore_punct) { + len += StringLen (ignore_punct) + 3; + } + if (constraint->ignore_weasel) { + len += StringLen (ignore_weasel) + 3; + } - if (make_partial) { - CheckSeqLocForPartial (sfp->location, &partial5, &partial3); - if (!partial3) { - SetSeqLocPartial (sfp->location, partial5, TRUE); - if (action->extend && bsp != NULL) { - ExtendSeqLocToEnd (sfp->location, bsp, FALSE); + /* allocate space for substitution phrases */ + for (word = constraint->ignore_words; word != NULL; word = word->next) { + tmp = SummarizeWordSubstitution (word); + if (tmp != NULL) { + ValNodeAddPointer (&subst_words, 0, tmp); + len += StringLen (tmp) + 2; } - rval = TRUE; } + + str = (CharPtr) MemNew (sizeof (Char) * len); + sprintf (str, fmt, location_word, constraint->match_text); + if (constraint->case_sensitive || constraint->whole_word || constraint->ignore_space || constraint->ignore_punct) { + StringCat (str, " ("); + } + if (constraint->case_sensitive) { + StringCat (str, kCaseSensitive); + has_extra = TRUE; + } + if (constraint->whole_word) { + if (has_extra) { + StringCat (str, ", "); + } + StringCat (str, kWholeWord); + has_extra = TRUE; + } + if (constraint->ignore_space) { + if (has_extra) { + StringCat (str, ", "); + } + StringCat (str, ignore_space); + has_extra = TRUE; + } + if (constraint->ignore_punct) { + if (has_extra) { + StringCat (str, ", "); + } + StringCat (str, ignore_punct); + has_extra = TRUE; + } + if (constraint->ignore_weasel) { + if (has_extra) { + StringCat (str, ", "); + } + StringCat (str, ignore_weasel); + has_extra = TRUE; + } + + if (constraint->case_sensitive || constraint->whole_word || constraint->ignore_space || constraint->ignore_punct) { + StringCat (str, ")"); + } + + for (vnp = subst_words; vnp != NULL; vnp = vnp->next) { + StringCat (str, ", "); + StringCat (str, vnp->data.ptrvalue); + } + + subst_words = ValNodeFreeData (subst_words); } - return rval; + if (constraint->is_all_caps) { + SetStringValue(&str, "all letters are uppercase", ExistingTextOption_append_comma); + } + if (constraint->is_all_lower) { + SetStringValue(&str, "all letters are lowercase", ExistingTextOption_append_comma); + } + if (constraint->is_all_punct) { + SetStringValue(&str, "all characters are punctuation", ExistingTextOption_append_comma); + } + + return str; } -static Boolean ApplyClear3PartialToSeqFeat (Int4 action, SeqFeatPtr sfp) +static CharPtr SummarizePartialnessForLocationConstraint (LocationConstraintPtr constraint) { - Boolean rval = FALSE, clear_partial = FALSE; - Boolean partial5, partial3; - - if (sfp == NULL) return FALSE; - - switch (action) { - case Partial_3_clear_constraint_all: - clear_partial = TRUE; - break; - case Partial_3_clear_constraint_not_at_end: - clear_partial = !At3EndOfSequence(sfp->location, BioseqFindFromSeqLoc (sfp->location)); - break; - case Partial_3_clear_constraint_good_end: - clear_partial = !HasGoodStopCodon(sfp); - break; + if (constraint == NULL + || (constraint->partial5 == Partial_constraint_either + && constraint->partial3 == Partial_constraint_either)) { + return NULL; } - if (clear_partial) { - CheckSeqLocForPartial (sfp->location, &partial5, &partial3); - if (partial3) { - SetSeqLocPartial (sfp->location, partial5, FALSE); - rval = TRUE; + if (constraint->partial5 == Partial_constraint_either) { + if (constraint->partial3 == Partial_constraint_partial) { + return "that are 3' partial"; + } else { + return "that are 3' complete"; } + } else if (constraint->partial3 == Partial_constraint_either) { + if (constraint->partial5 == Partial_constraint_partial) { + return "that are 5' partial"; + } else { + return "that are 5' complete"; + } + } else if (constraint->partial5 == Partial_constraint_partial + && constraint->partial3 == Partial_constraint_partial) { + return "that are partial on both ends"; + } else if (constraint->partial5 == Partial_constraint_complete + && constraint->partial3 == Partial_constraint_complete) { + return "that are complete on both ends"; + } else if (constraint->partial5 == Partial_constraint_complete + && constraint->partial3 == Partial_constraint_partial) { + return "that are 5' complete and 3' partial"; + } else if (constraint->partial5 == Partial_constraint_partial + && constraint->partial3 == Partial_constraint_complete) { + return "that are 5' partial and 3' complete"; + } else { + return NULL; } - return rval; } -static Boolean ApplyConvertLocationToSeqFeat (Int4 convert_location, SeqFeatPtr sfp) +static CharPtr SummarizeLocationType (LocationConstraintPtr constraint) { - Boolean hasNulls, rval = FALSE; - SeqLocPtr slp; - BioseqPtr bsp; - Boolean partial5, partial3; - - if (sfp == NULL || (bsp = BioseqFindFromSeqLoc (sfp->location))== NULL) { - return FALSE; + if (constraint == NULL + || constraint->location_type == Location_type_constraint_any) { + return NULL; + } else if (constraint->location_type == Location_type_constraint_single_interval) { + return "with single interval"; + } else if (constraint->location_type == Location_type_constraint_joined) { + return "with joined intervals"; + } else if (constraint->location_type == Location_type_constraint_ordered) { + return "with ordered intervals"; + } else { + return NULL; } - - CheckSeqLocForPartial (sfp->location, &partial5, &partial3); - hasNulls = LocationHasNullsBetween (sfp->location); - switch (convert_location) - { - case Convert_location_type_join : - if (hasNulls) - { - slp = SeqLocMerge (bsp, sfp->location, NULL, FALSE, FALSE, FALSE); - sfp->location = SeqLocFree (sfp->location); - sfp->location = slp; - if (bsp->repr == Seq_repr_seg) - { - slp = SegLocToPartsEx (bsp, sfp->location, FALSE); - sfp->location = SeqLocFree (sfp->location); - sfp->location = slp; - hasNulls = LocationHasNullsBetween (sfp->location); - sfp->partial = (sfp->partial || hasNulls); - } - FreeAllFuzz (sfp->location); - SetSeqLocPartial (sfp->location, partial5, partial3); - rval = TRUE; - } - break; - case Convert_location_type_order : - if (!hasNulls) - { - slp = SeqLocMerge (bsp, sfp->location, NULL, FALSE, FALSE, TRUE); - sfp->location = SeqLocFree (sfp->location); - sfp->location = slp; - if (bsp->repr == Seq_repr_seg) - { - slp = SegLocToPartsEx (bsp, sfp->location, TRUE); - sfp->location = SeqLocFree (sfp->location); - sfp->location = slp; - hasNulls = LocationHasNullsBetween (sfp->location); - sfp->partial = (sfp->partial || hasNulls); - } - FreeAllFuzz (sfp->location); - SetSeqLocPartial (sfp->location, partial5, partial3); - rval = TRUE; - } - break; - case Convert_location_type_merge : - if (sfp->location->choice != SEQLOC_INT) { - slp = SeqLocMerge (bsp, sfp->location, NULL, TRUE, FALSE, FALSE); - sfp->location = SeqLocFree (sfp->location); - sfp->location = slp; - SetSeqLocPartial (sfp->location, partial5, partial3); - rval = TRUE; - } - default: - break; - } - return rval; } -static Boolean ExtendSeqFeat5 (SeqFeatPtr sfp) -{ - BioseqPtr bsp; +static CharPtr distance_words[] = { NULL, "exactly", "no more than", "no less than" }; - if (sfp == NULL || (bsp = BioseqFindFromSeqLoc (sfp->location)) == NULL) - { - return FALSE; - } - if (ExtendSeqLocToEnd (sfp->location, bsp, TRUE) > 0) - { - return TRUE; - } - else - { - return FALSE; +static CharPtr SummarizeEndDistance (ValNodePtr vnp, CharPtr end_name) +{ + CharPtr str = NULL; + CharPtr fmt = "with %s %s %d from end of sequence"; + + if (vnp == NULL || vnp->choice < 1 || vnp->choice > 3) { + return NULL; } + + str = (CharPtr) MemNew (sizeof (Char) * (StringLen (distance_words[vnp->choice]) + StringLen (end_name) + StringLen (fmt) + 15)); + sprintf (str, fmt, end_name, distance_words[vnp->choice], vnp->data.intvalue); + + return str; } -static Boolean ExtendSeqFeat3 (SeqFeatPtr sfp) +static CharPtr SummarizeLocationConstraint (LocationConstraintPtr constraint) { - BioseqPtr bsp; + CharPtr str = NULL; + CharPtr strand_word = NULL, seq_word = NULL; + CharPtr fmt = "only objects"; + CharPtr partial; + CharPtr location_type; + CharPtr dist5 = NULL, dist3 = NULL; + Int4 len = 0; - if (sfp == NULL || (bsp = BioseqFindFromSeqLoc (sfp->location)) == NULL) - { - return FALSE; + if (IsLocationConstraintEmpty (constraint)) { + return NULL; } - ExtendSeqLocToEnd (sfp->location, bsp, FALSE); - return TRUE; -} + partial = SummarizePartialnessForLocationConstraint (constraint); + location_type = SummarizeLocationType(constraint); + dist5 = SummarizeEndDistance (constraint->end5, "5' end"); + dist3 = SummarizeEndDistance (constraint->end3, "3' end"); -static Boolean ApplyLocationEditTypeToSeqFeat (ValNodePtr action, SeqFeatPtr sfp) -{ - Boolean rval = FALSE; + if (constraint->seq_type == Seqtype_constraint_nuc) { + seq_word = "nucleotide sequences"; + } else if (constraint->seq_type == Seqtype_constraint_prot) { + seq_word = "protein sequences"; + } - if (action == NULL || sfp == NULL) { - return FALSE; + if (constraint->strand == Strand_constraint_plus) { + strand_word = " on plus strands"; + } else if (constraint->strand == Strand_constraint_minus) { + strand_word = " on minus strands"; } - switch (action->choice) { - case LocationEditType_strand: - rval = ApplyEditLocationStrandToSeqFeat (action->data.ptrvalue, sfp); - break; - case LocationEditType_set_5_partial: - rval = ApplyPartial5SetActionToSeqFeat (action->data.ptrvalue, sfp); - break; - case LocationEditType_clear_5_partial: - rval = ApplyClear5PartialToSeqFeat (action->data.intvalue, sfp); - break; - case LocationEditType_set_3_partial: - rval = ApplyPartial3SetActionToSeqFeat (action->data.ptrvalue, sfp); - break; - case LocationEditType_clear_3_partial: - rval = ApplyClear3PartialToSeqFeat (action->data.intvalue, sfp); - break; - case LocationEditType_convert: - rval = ApplyConvertLocationToSeqFeat (action->data.intvalue, sfp); - break; - case LocationEditType_extend_5: - rval = ExtendSeqFeat5 (sfp); - break; - case LocationEditType_extend_3: - rval = ExtendSeqFeat3 (sfp); - break; + len = StringLen (fmt) + 1; + if (strand_word != NULL) { + len += StringLen (strand_word); } - return rval; + if (seq_word != NULL) { + len += StringLen (seq_word) + 4; + } + if (partial != NULL) { + len += StringLen (partial) + 2; + } + if (location_type != NULL) { + len += StringLen (location_type) + 2; + } + if (dist5 != NULL) { + len += StringLen (dist5) + 1; + } + if (dist3 != NULL) { + len += StringLen (dist3) + 1; + } + str = (CharPtr) MemNew (sizeof (Char) * len); + sprintf (str, "%s", fmt); + if (strand_word == NULL && seq_word != NULL) { + StringCat (str, " on "); + StringCat (str, seq_word); + } else if (strand_word != NULL) { + StringCat (str, strand_word); + if (seq_word != NULL) { + StringCat (str, " of "); + StringCat (str, seq_word); + } + } + if (partial != NULL) { + StringCat (str, " "); + StringCat (str, partial); + } + if (location_type != NULL) { + StringCat (str, " "); + StringCat (str, location_type); + } + + if (dist5 != NULL) { + StringCat (str, " "); + StringCat (str, dist5); + dist5 = MemFree (dist5); + } + if (dist3 != NULL) { + StringCat (str, " "); + StringCat (str, dist3); + dist3 = MemFree (dist3); + } + + return str; } -static Int4 ApplyEditFeatureLocationActionToSeqEntry (EditFeatureLocationActionPtr action, SeqEntryPtr sep, FILE *log_fp) +static CharPtr SummarizeSourceConstraint (SourceConstraintPtr constraint) { - ConvertAndRemoveFeatureCollectionData d; - ValNodePtr vnp; - SeqFeatPtr sfp; - Int4 num_affected = 0; - /* variables for logging */ - CharPtr old_loc = NULL, new_loc; + CharPtr string, intro = NULL, field1, field2; + CharPtr match_fmt = "%s %s matches %s"; + CharPtr present_fmt = "%s %s is present"; + CharPtr text_fmt = "%s text %s"; + CharPtr two_match_fmt = "%s %s matches %s and %s %s"; + CharPtr one_match_fmt = "%s %s %s"; + CharPtr summ = NULL; - if (action == NULL) return 0; + if (constraint == NULL) return NULL; - d.featdef = GetFeatdefFromFeatureType (action->type); - d.constraint_set = action->constraint; - d.feature_list = NULL; + string = SummarizeStringConstraint (constraint->constraint); + field1 = SummarizeSourceQual (constraint->field1); + field2 = SummarizeSourceQual (constraint->field2); - VisitFeaturesInSep (sep, &d, ConvertAndRemoveFeatureCollectionCallback); - for (vnp = d.feature_list; vnp != NULL; vnp = vnp->next) { - sfp = vnp->data.ptrvalue; - if (sfp != NULL) { - if (log_fp != NULL) { - old_loc = SeqLocPrintUseBestID (sfp->location); - } - if (ApplyLocationEditTypeToSeqFeat (action->action, sfp)) { - num_affected++; - if (log_fp != NULL) { - new_loc = SeqLocPrintUseBestID (sfp->location); - fprintf (log_fp, "Changed location %s to %s\n", old_loc, new_loc); - new_loc = MemFree (new_loc); + if (constraint->field1 == NULL && constraint->field2 == NULL && string == NULL) { + if (constraint->type_constraint == Object_type_constraint_feature) { + summ = StringSave ("where source is a feature"); + } else if (constraint->type_constraint == Object_type_constraint_descriptor) { + summ = StringSave ("where source is a descriptor"); + } + } else { + if (constraint->type_constraint == Object_type_constraint_any) { + intro = "where source"; + } else if (constraint->type_constraint == Object_type_constraint_feature) { + intro = "where source feature"; + } else if (constraint->type_constraint == Object_type_constraint_descriptor) { + intro = "where source descriptor"; + } else { + string = MemFree (string); + field1 = MemFree (field1); + field2 = MemFree (field2); + return NULL; + } + + if (string == NULL) { + if (field1 == NULL && field2 == NULL) { + if (constraint->type_constraint == Object_type_constraint_feature) { + summ = StringSave ("where source is a feature"); + } else if (constraint->type_constraint == Object_type_constraint_descriptor) { + summ = StringSave ("where source is a descriptor"); } + } else if (field1 != NULL && field2 != NULL) { + summ = (CharPtr) MemNew (sizeof (Char) * (StringLen (match_fmt) + StringLen (intro) + StringLen (field1) + StringLen (field2))); + sprintf (summ, match_fmt, intro, field1, field2); + } else if (field1 != NULL) { + summ = (CharPtr) MemNew (sizeof (Char) * (StringLen (present_fmt) + StringLen (intro) + StringLen (field1))); + sprintf (summ, present_fmt, intro, field1); + } else if (field2 != NULL) { + summ = (CharPtr) MemNew (sizeof (Char) * (StringLen (present_fmt) + StringLen (intro) + StringLen (field2))); + sprintf (summ, present_fmt, intro, field2); + } + } else { + if (field1 == NULL && field2 == NULL) { + summ = (CharPtr) MemNew (sizeof (Char) * (StringLen (text_fmt) + StringLen (intro) + StringLen (string))); + sprintf (summ, text_fmt, intro, string); + } else if (field1 != NULL && field2 != NULL) { + summ = (CharPtr) MemNew (sizeof (Char) * (StringLen (two_match_fmt) + StringLen (intro) + + 2 * StringLen (field1) + StringLen (field2) + StringLen (string))); + sprintf (summ, two_match_fmt, intro, field1, field2, field1, string); + } else if (field1 != NULL) { + summ = (CharPtr) MemNew (sizeof (Char) * (StringLen (one_match_fmt) + StringLen (intro) + StringLen (field1) + StringLen (string))); + sprintf (summ, one_match_fmt, intro, field1, string); + } else if (field2 != NULL) { + summ = (CharPtr) MemNew (sizeof (Char) * (StringLen (one_match_fmt) + StringLen (intro) + StringLen (field2) + StringLen (string))); + sprintf (summ, one_match_fmt, intro, field2, string); } - old_loc = MemFree (old_loc); } } - return num_affected; + string = MemFree (string); + field1 = MemFree (field1); + field2 = MemFree (field2); + return summ; } -static void ApplyMolinfoBlockCallback (BioseqPtr bsp, Pointer data) + +static CharPtr SummarizeCDSGeneProtPseudoConstraint (CDSGeneProtPseudoConstraintPtr constraint) { - MolinfoBlockPtr mib; - ValNodePtr field; - MolInfoPtr mip; + CharPtr summ = NULL, pseudo_feat; + CharPtr is_pseudo_fmt = "where %s is pseudo"; + CharPtr not_pseudo_fmt = "where %s is not pseudo"; - if (bsp == NULL) { - return; + if (constraint != NULL) { + pseudo_feat = CDSGeneProtFeatureNameFromFeatureType (constraint->feature); + if (pseudo_feat != NULL) { + if (constraint->is_pseudo) { + summ = (CharPtr) MemNew (sizeof (Char) * (StringLen (is_pseudo_fmt) + StringLen (pseudo_feat))); + sprintf (summ, is_pseudo_fmt, pseudo_feat); + } else { + summ = (CharPtr) MemNew (sizeof (Char) * (StringLen (not_pseudo_fmt) + StringLen (pseudo_feat))); + sprintf (summ, not_pseudo_fmt, pseudo_feat); + } + } + } + + return summ; +} + + +static CharPtr SummarizeCDSGeneProtQualConstraint (CDSGeneProtQualConstraintPtr constraint) +{ + CharPtr string, field1 = NULL, field2 = NULL; + CharPtr match_fmt = "where %s matches %s"; + CharPtr present_fmt = "where %s is present"; + CharPtr text_fmt = "where CDS-gene-prot text %s"; + CharPtr two_match_fmt = "where %s matches %s and %s %s"; + CharPtr one_match_fmt = "where %s %s"; + CharPtr summ = NULL; + + if (constraint == NULL) return NULL; + + string = SummarizeStringConstraint (constraint->constraint); + if (constraint->field1 != NULL && constraint->field1->choice == CDSGeneProtConstraintField_field) { + field1 = CDSGeneProtNameFromField (constraint->field1->data.intvalue); } - - mib = (MolinfoBlockPtr) data; - if (mib == NULL) { - return; + if (constraint->field2 != NULL && constraint->field2->choice == CDSGeneProtConstraintField_field) { + field2 = CDSGeneProtNameFromField (constraint->field2->data.intvalue); } - if (!DoesObjectMatchConstraintChoiceSet (OBJ_BIOSEQ, bsp, mib->constraint)) { - return; + if (string == NULL) { + if (field1 != NULL && field2 != NULL) { + summ = (CharPtr) MemNew (sizeof (Char) * (StringLen (match_fmt) + StringLen (field1) + StringLen (field2))); + sprintf (summ, match_fmt, field1, field2); + } else if (field1 != NULL) { + summ = (CharPtr) MemNew (sizeof (Char) * (StringLen (present_fmt) + StringLen (field1))); + sprintf (summ, present_fmt, field1); + } else if (field2 != NULL) { + summ = (CharPtr) MemNew (sizeof (Char) * (StringLen (present_fmt) + StringLen (field2))); + sprintf (summ, present_fmt, field2); + } + } else { + if (field1 == NULL && field2 == NULL) { + summ = (CharPtr) MemNew (sizeof (Char) * (StringLen (text_fmt) + StringLen (string))); + sprintf (summ, text_fmt, string); + } else if (field1 != NULL && field2 != NULL) { + summ = (CharPtr) MemNew (sizeof (Char) * (StringLen (two_match_fmt) + + 2 * StringLen (field1) + StringLen (field2) + StringLen (string))); + sprintf (summ, two_match_fmt, field1, field2, field1, string); + } else if (field1 != NULL) { + summ = (CharPtr) MemNew (sizeof (Char) * (StringLen (one_match_fmt) + StringLen (field1) + StringLen (string))); + sprintf (summ, one_match_fmt, field1, string); + } else if (field2 != NULL) { + summ = (CharPtr) MemNew (sizeof (Char) * (StringLen (one_match_fmt) + StringLen (field2) + StringLen (string))); + sprintf (summ, one_match_fmt, field2, string); + } } + + string = MemFree (string); + /* note - field1 and field2 aren't allocated, so we don't need to free them */ - mip = GetMolInfoForBioseq (bsp); + return summ; +} - for (field = mib->from_list; field != NULL; field = field->next) { - switch (field->choice) { - case MolinfoField_molecule: - if (mip == NULL || mip->biomol != BiomolFromMoleculeType (field->data.intvalue)) { - return; - } - break; - case MolinfoField_technique: - if (mip == NULL || mip->tech != TechFromTechniqueType (field->data.intvalue)) { - return; - } - break; - case MolinfoField_completedness: - if (mip == NULL || mip->completeness != CompletenessFromCompletednessType (field->data.intvalue)) { - return; - } - break; - case MolinfoField_mol_class: - if (bsp->mol != MolFromMoleculeClassType (field->data.intvalue)) { - return; - } - break; - case MolinfoField_topology: - if (bsp->topology != TopologyFromTopologyType (field->data.intvalue)) { - return; - } - break; - case MolinfoField_strand: - if (bsp->strand != StrandFromStrandType (field->data.intvalue)) { - return; - } - break; - } - } +const CharPtr s_QuantityWords [] = { "exactly", "more than", "less than" }; +const Int4 k_NumQuantityWords = sizeof (s_QuantityWords) / sizeof (CharPtr); - for (field = mib->to_list; field != NULL; field = field->next) { - SetSequenceQualOnBioseq (bsp, field); +static CharPtr SummarizeFeatureQuantity (ValNodePtr v) +{ + CharPtr fmt = "sequence has %s %d feature%s"; + CharPtr summ = NULL; + + if (v == NULL || v->choice < 1 || v->choice > k_NumQuantityWords) { + return NULL; } + + summ = (CharPtr) MemNew (sizeof (Char) * (StringLen (fmt) + StringLen (s_QuantityWords[v->choice - 1]) + 15)); + sprintf (summ, fmt, s_QuantityWords[v->choice - 1], v->data.intvalue, v->data.intvalue == 1 ? "" : "s"); + return summ; } -NLM_EXTERN void ApplyMolinfoBlockToSeqEntry (SeqEntryPtr sep, MolinfoBlockPtr mib) +static CharPtr SummarizeSequenceLength (ValNodePtr v) { - VisitBioseqsInSep (sep, mib, ApplyMolinfoBlockCallback); + CharPtr fmt = "sequence is %s %d in length"; + CharPtr summ = NULL; + if (v == NULL || v->choice < 1 || v->choice > k_NumQuantityWords) { + return NULL; + } + + summ = (CharPtr) MemNew (sizeof (Char) * (StringLen (fmt) + StringLen (s_QuantityWords[v->choice - 1]) + 15)); + sprintf (summ, fmt, s_QuantityWords[v->choice - 1], v->data.intvalue); + return summ; } -typedef struct descriptortypename { - Int4 descriptortype; - Uint1 descriptor_choice; - CharPtr descriptorname; -} DescriptorTypeNameData, PNTR DescriptorTypeNamePtr; - -static DescriptorTypeNameData descriptortypename[] = { - { Descriptor_type_all , 0 , "Any" } , - { Descriptor_type_title , Seq_descr_title , "Title" } , - { Descriptor_type_source , Seq_descr_source , "Source" } , - { Descriptor_type_publication , Seq_descr_pub , "Publication" } , - { Descriptor_type_comment , Seq_descr_comment , "Comment" } , - { Descriptor_type_genbank , Seq_descr_genbank , "GenBank" } , - { Descriptor_type_user , Seq_descr_user , "User" } , - { Descriptor_type_create_date , Seq_descr_create_date , "CreateDate" } , - { Descriptor_type_update_date , Seq_descr_update_date , "UpdateDate" } , - { Descriptor_type_mol_info , Seq_descr_molinfo , "MolInfo" } , - { Descriptor_type_structured_comment , Seq_descr_user , "StructuredComment" } , - { Descriptor_type_genome_project_id , Seq_descr_user , "GenomeProjectID" } +static CharPtr s_SequenceConstraintStrandedness[] = { + "Any", + "sequence contains only minus strand features", + "sequence contains only plus strand features", + "sequence contains at least one minus strand feature", + "sequence contains at least one plus strand feature", + "sequence contains no minus strand features", + "sequence contains no plus strand features" }; -#define NUM_descriptortypename sizeof (descriptortypename) / sizeof (DescriptorTypeNameData) -static Int4 GetDescriptorTypeFromDescriptorChoice (Uint1 descriptor_choice) +NLM_EXTERN CharPtr SummarizeFeatureStrandedness (Uint2 strandedness) { - Int4 i; - - for (i = 0; i < NUM_descriptortypename; i++) { - if (descriptor_choice == descriptortypename[i].descriptor_choice) { - return descriptortypename[i].descriptortype; - } + if (strandedness < sizeof (s_SequenceConstraintStrandedness) / sizeof (CharPtr)) { + return s_SequenceConstraintStrandedness[strandedness]; + } else { + return NULL; } - return -1; } -static Uint1 GetDescriptorChoiceFromDescriptorType (Int4 descriptortype) +static CharPtr SummarizeSequenceConstraint (SequenceConstraintPtr constraint) { - Int4 i; + CharPtr summ = NULL; + CharPtr seq_word = NULL, featpresent = NULL, id = NULL; + Int4 len = 0; + CharPtr seq_word_intro = "where sequence type is "; + CharPtr feat_after = " is present"; + CharPtr id_intro = "sequence ID "; + CharPtr feat_quantity = NULL; + CharPtr length_quantity = NULL; + CharPtr strandedness = NULL; + + if (IsSequenceConstraintEmpty (constraint)) { + summ = StringSave ("Missing sequence constraint"); + } else { + if (constraint->seqtype != NULL && constraint->seqtype->choice != SequenceConstraintMolTypeConstraint_any) { + switch (constraint->seqtype->choice) { + case SequenceConstraintMolTypeConstraint_nucleotide: + seq_word = "nucleotide"; + break; + case SequenceConstraintMolTypeConstraint_dna: + seq_word = "DNA"; + break; + case SequenceConstraintMolTypeConstraint_rna: + if (constraint->seqtype->data.intvalue == Sequence_constraint_rnamol_any) { + seq_word = "RNA"; + } else { + seq_word = GetBiomolNameForRnaType (constraint->seqtype->data.intvalue); + } + break; + case SequenceConstraintMolTypeConstraint_protein: + seq_word = "protein"; + break; + } + } - for (i = 0; i < NUM_descriptortypename; i++) { - if (descriptortype == descriptortypename[i].descriptortype) { - return descriptortypename[i].descriptor_choice; + if (constraint->feature != Macro_feature_type_any) { + featpresent = GetFeatureNameFromFeatureType (constraint->feature); } - } - return SEQDESCR_MAX; -} + if (!IsStringConstraintEmpty (constraint->id)) { + id = SummarizeStringConstraint (constraint->id); + } + + if (seq_word != NULL) { + len += StringLen (seq_word) + StringLen (seq_word_intro); + } -NLM_EXTERN CharPtr GetDescriptorNameFromDescriptorType (Int4 descriptortype) -{ - CharPtr str = NULL; - Int4 i; + if (featpresent != NULL) { + if (len == 0) { + len += 6; + } else { + len += 5; + } + len += StringLen (featpresent); + len += StringLen (feat_after); + } - for (i = 0; i < NUM_descriptortypename && str == NULL; i++) { - if (descriptortype == descriptortypename[i].descriptortype) { - str = descriptortypename[descriptortype].descriptorname; + if (id != NULL) { + if (len == 0) { + len += 6; + } else { + len += 5; + } + len += StringLen (id_intro); + len += StringLen (id); + } + + feat_quantity = SummarizeFeatureQuantity (constraint->num_features); + if (feat_quantity != NULL) { + len += StringLen (feat_quantity) + 6; } - } - if (str == NULL) { - str = "Unknown descriptor type"; - } - return str; -} + length_quantity = SummarizeSequenceLength (constraint->length); + if (length_quantity != NULL) { + len += StringLen (length_quantity) + 6; + } -NLM_EXTERN void AddAllDescriptorsToChoiceList (ValNodePtr PNTR descriptor_type_list) -{ - Int4 i; - ValNodePtr tmp_list = NULL; + if (constraint->strandedness > Feature_strandedness_constraint_any) { + strandedness = SummarizeFeatureStrandedness(constraint->strandedness); + len += StringLen (strandedness) + 6; + } - for (i = 0; i < NUM_descriptortypename; i++) { - ValNodeAddPointer (&tmp_list, descriptortypename[i].descriptortype, StringSave (descriptortypename[i].descriptorname)); + if (len == 0) { + summ = StringSave ("missing sequence constraint"); + } else { + len++; + summ = (CharPtr) MemNew (sizeof (Char) * len); + summ[0] = 0; + if (seq_word != NULL) { + StringCat (summ, seq_word_intro); + StringCat (summ, seq_word); + } + if (featpresent != NULL) { + if (seq_word == NULL) { + StringCat (summ, "where "); + } else { + StringCat (summ, " and "); + } + StringCat (summ, featpresent); + StringCat (summ, feat_after); + } + if (id != NULL) { + if (seq_word == NULL && featpresent == NULL) { + StringCat (summ, "where "); + } else { + StringCat (summ, " and "); + } + StringCat (summ, id_intro); + StringCat (summ, id); + } + if (feat_quantity != NULL) { + if (StringHasNoText (summ)) { + StringCat (summ, "where "); + } else { + StringCat (summ, " and "); + } + StringCat (summ, feat_quantity); + } + if (length_quantity != NULL) { + if (StringHasNoText (summ)) { + StringCat (summ, "where "); + } else { + StringCat (summ, " and "); + } + StringCat (summ, length_quantity); + } + if (strandedness != NULL) { + if (StringHasNoText (summ)) { + StringCat (summ, "where "); + } else { + StringCat (summ, " and "); + } + StringCat (summ, strandedness); + } + } + id = MemFree (id); + feat_quantity = MemFree (feat_quantity); + length_quantity = MemFree (length_quantity); } - tmp_list = ValNodeSort (tmp_list, SortVnpByString); - ValNodeLink (descriptor_type_list, tmp_list); + return summ; } +const CharPtr s_SpecialPubFieldWords [] = { "is present", "is not present", "is all caps", "is all lowercase", "is all punctuation" }; +const Int4 k_NumSpecialPubFieldWords = sizeof (s_SpecialPubFieldWords) / sizeof (CharPtr); -static Boolean DoesDescriptorMatchType (SeqDescrPtr sdp, Int4 descriptortype) +static CharPtr SummarizePubFieldSpecialConstraint (PubFieldSpecialConstraintPtr field) { - Uint1 descriptorchoice; - UserObjectPtr uop; - - if (sdp == NULL) { - return FALSE; - } else if (descriptortype == Descriptor_type_all) { - return TRUE; - } else if ((descriptorchoice = GetDescriptorChoiceFromDescriptorType (descriptortype)) == SEQDESCR_MAX) { - return FALSE; - } else if (descriptorchoice != sdp->choice) { - return FALSE; - } else if (descriptortype == Descriptor_type_structured_comment) { - if (sdp->choice == Seq_descr_user - && (uop = (UserObjectPtr) sdp->data.ptrvalue) == NULL - || uop->type == NULL - || StringCmp (uop->type->str, "StructuredComment") != 0) { - return FALSE; - } else { - return TRUE; - } - } else if (descriptortype == Descriptor_type_genome_project_id) { - if (sdp->choice == Seq_descr_user - && (uop = (UserObjectPtr) sdp->data.ptrvalue) == NULL - || uop->type == NULL - || StringCmp (uop->type->str, "GenomeProjectsDB") != 0) { - return FALSE; - } else { - return TRUE; - } + CharPtr fmt = "%s %s"; + CharPtr label, summ = NULL; - } else { - return TRUE; + if (field == NULL || field->constraint == NULL + || field->constraint->choice < 1 + || field->constraint->choice > k_NumSpecialPubFieldWords) { + return NULL; } -} - - -typedef struct removedescriptoractioncollection { - RemoveDescriptorActionPtr action; - ValNodePtr obj_list; -} RemoveDescriptorActionCollectionData, PNTR RemoveDescriptorActionCollectionPtr; - -static void RemoveDescriptorCollectionCallback (SeqDescrPtr sdp, Pointer data) -{ - RemoveDescriptorActionCollectionPtr d; + label = GetPubFieldLabel (field->field); - if (sdp == NULL || (d = (RemoveDescriptorActionCollectionPtr) data) == NULL - || d->action == NULL) { - return; - } - - if (DoesDescriptorMatchType (sdp, d->action->type) - && DoesObjectMatchConstraintChoiceSet (OBJ_SEQDESC, sdp, d->action->constraint)) { - ValNodeAddPointer (&(d->obj_list), OBJ_SEQDESC, sdp); - } + summ = (CharPtr) MemNew (sizeof (Char) * (StringLen (fmt) + StringLen (label) + + StringLen (s_SpecialPubFieldWords[field->constraint->choice - 1]))); + sprintf (summ, fmt, label, s_SpecialPubFieldWords[field->constraint->choice - 1]); + return summ; } -static Int4 ApplyRemoveDescriptorActionToSeqEntry (RemoveDescriptorActionPtr action, SeqEntryPtr sep) +static CharPtr SummarizePubFieldConstraint (PubFieldConstraintPtr field) { - RemoveDescriptorActionCollectionData d; - SeqDescrPtr sdp; - ObjValNodePtr ovp; - ValNodePtr vnp; - Int4 num_deleted = 0; + CharPtr fmt = "%s %s", summ = NULL; + CharPtr string, label; - if (action == NULL) return 0; + if (field == NULL || field->constraint == NULL) { + return NULL; + } - d.action = action; - d.obj_list = NULL; + string = SummarizeStringConstraint (field->constraint); + label = GetPubFieldLabel (field->field); - VisitDescriptorsInSep (sep, &d, RemoveDescriptorCollectionCallback); - for (vnp = d.obj_list; vnp != NULL; vnp = vnp->next) { - sdp = vnp->data.ptrvalue; - if (sdp != NULL && sdp->extended != 0) { - ovp = (ObjValNodePtr) sdp; - ovp->idx.deleteme = TRUE; - num_deleted ++; - } - } - DeleteMarkedObjects (ObjMgrGetEntityIDForChoice(sep), 0, NULL); - return num_deleted; + summ = (CharPtr) MemNew (sizeof (Char) * (StringLen (fmt) + StringLen (label) + StringLen (string))); + sprintf (summ, fmt, label, string); + string = MemFree (string); + return summ; } -static DefLineType DefLineTypeFromAutodefListType(Uint2 list_type) +static CharPtr SummarizePublicationConstraint (PublicationConstraintPtr constraint) { - DefLineType deflinetype = DEFLINE_USE_FEATURES; + CharPtr type = NULL, field = NULL, special = NULL, summ = NULL; + Boolean first = TRUE; + Int4 len; - switch (list_type) { - case Autodef_list_type_feature_list: - deflinetype = DEFLINE_USE_FEATURES; + if (IsPublicationConstraintEmpty (constraint)) return NULL; + + switch (constraint->type) { + case Pub_type_published: + type = "pub is published"; break; - case Autodef_list_type_complete_sequence: - deflinetype = DEFLINE_COMPLETE_SEQUENCE; + case Pub_type_unpublished: + type = "pub is unpublished"; break; - case Autodef_list_type_complete_genome: - deflinetype = DEFLINE_COMPLETE_GENOME; + case Pub_type_in_press: + type = "pub is in press"; + break; + case Pub_type_submitter_block: + type = "pub is submitter block"; break; } - return deflinetype; -} - - -static void ApplyAutodefActionToSeqEntry (AutodefActionPtr action, SeqEntryPtr sep) -{ - OrganismDescriptionModifiers od; - ModifierItemLocalPtr modList; - DeflineFeatureRequestList dfrl; - ValNodePtr vnp, modifier_indices = NULL; - ValNode field_type, source_qual_choice; - Uint4 i; - Int4 defline_pos; - - InitOrganismDescriptionModifiers (&od, NULL); - od.use_modifiers = TRUE; - - modList = MemNew (NumDefLineModifiers () * sizeof (ModifierItemLocalData)); - for (i = 0; i < NumDefLineModifiers(); i++) { - modList[i].any_present = FALSE; - modList[i].all_present = FALSE; - modList[i].is_unique = FALSE; - modList[i].first_value_seen = NULL; - modList[i].values_seen = NULL; - modList[i].all_unique = FALSE; - modList[i].status = NULL; - modList[i].required = FALSE; - } - SetRequiredModifiers (modList); - - /* add modifiers specified in action */ - source_qual_choice.next = NULL; - source_qual_choice.choice = SourceQualChoice_textqual; - field_type.next = NULL; - field_type.choice = FieldType_source_qual; - field_type.data.ptrvalue = &source_qual_choice; - for (vnp = action->modifiers; vnp != NULL; vnp = vnp->next) { - source_qual_choice.data.intvalue = vnp->data.intvalue; - defline_pos = GetDeflinePosForFieldType (&field_type); - if (defline_pos > -1) { - modList[defline_pos].required = TRUE; - modList[defline_pos].any_present = TRUE; - ValNodeAddInt (&modifier_indices, 0, defline_pos); + field = SummarizePubFieldConstraint (constraint->field); + special = SummarizePubFieldSpecialConstraint (constraint->special_field); - } + if (type == NULL && field == NULL && special == NULL) { + return NULL; } - - InitFeatureRequests (&dfrl); - dfrl.feature_list_type = DefLineTypeFromAutodefListType (action->clause_list_type); - AutoDefForSeqEntry (sep, SeqMgrGetEntityIDForSeqEntry (sep), &od, modList, modifier_indices, &dfrl, - DEFAULT_ORGANELLE_CLAUSE, FALSE, FALSE); - - modList = MemFree (modList); - modifier_indices = ValNodeFree (modifier_indices); - -} - - -NLM_EXTERN Boolean IsFixPubCapsActionEmpty (FixPubCapsActionPtr action) -{ - if (action == NULL) { - return TRUE; - } - if (action->affiliation || action->authors || action->title) { - return FALSE; - } else { - return TRUE; - } -} - - -typedef struct fixpubcaps { - FixPubCapsActionPtr action; - ValNodePtr orgnames; - Int4 num_fields; - ValNodePtr object_list; -} FixPubCapsData, PNTR FixPubCapsPtr; - -static void ApplyFixPubCapsCallback (PubdescPtr pdp, Pointer data) -{ - FixPubCapsPtr f; - CharPtr orig, tmp; - ValNodePtr pub; - AuthListPtr alp = NULL; - ValNodePtr names; - AuthorPtr ap, ap_orig; - AffilPtr affil_orig; - - f = (FixPubCapsPtr)data; - if (f == NULL || f->action == NULL) { - return; + len = 17 + StringLen (type) + StringLen (field) + StringLen (special); + summ = (CharPtr) MemNew (sizeof (Char) * len); + sprintf (summ, "where "); + if (type != NULL) { + StringCat (summ, type); + first = FALSE; } - - if (f->action->title) { - for (pub = pdp->pub; pub != NULL; pub = pub->next) { - orig = GetPubFieldFromPub (pub, Publication_field_title, NULL); - if (orig != NULL) { - tmp = StringSave (orig); - FixCapitalizationInTitle (&tmp, TRUE, f->orgnames); - if (StringCmp (orig, tmp) != 0) { - SetPubFieldOnPub (pub, Publication_field_title, NULL, tmp, ExistingTextOption_replace_old); - f->num_fields++; - } - tmp = MemFree (tmp); - orig = MemFree (orig); - } + if (field != NULL) { + if (!first) { + StringCat (summ, " and "); } + StringCat (summ, field); + first = FALSE; } - if (f->action->authors) { - alp = GetAuthListPtr (pdp, NULL); - if (alp != NULL) { - for (names = alp->names; names != NULL; names = names->next) { - ap = names->data.ptrvalue; - ap_orig = AsnIoMemCopy (ap, (AsnReadFunc) AuthorAsnRead, (AsnWriteFunc) AuthorAsnWrite); - FixCapitalizationInAuthor (ap); - if (!AsnIoMemComp (ap, ap_orig, (AsnWriteFunc) AuthorAsnWrite)) { - f->num_fields++; - } - ap_orig = AuthorFree (ap_orig); - } + if (special != NULL) { + if (!first) { + StringCat (summ, " and "); } + StringCat (summ, special); + first = FALSE; } - if (f->action->affiliation) { - if (alp == NULL) { - alp = GetAuthListPtr (pdp, NULL); - } - if (alp != NULL && alp->affil != NULL) { - affil_orig = AsnIoMemCopy (alp->affil, (AsnReadFunc) AffilAsnRead, (AsnWriteFunc) AffilAsnWrite); - FixCapsInPubAffil (alp->affil); - if (!AsnIoMemComp (alp->affil, affil_orig, (AsnWriteFunc) AffilAsnWrite)) { - f->num_fields++; - } - affil_orig = AffilFree (affil_orig); - } - } + field = MemFree (field); + special = MemFree (special); + + return summ; } -static void CollectPubObjectsFeatCallback (SeqFeatPtr sfp, Pointer data) +static CharPtr SummarizeFieldConstraint (FieldConstraintPtr constraint) { - FixPubCapsPtr f; + CharPtr rval = NULL; + CharPtr string = NULL, label = NULL; + CharPtr fmt = "where %s %s"; - if (sfp == NULL || sfp->data.choice != SEQFEAT_PUB || (f = (FixPubCapsPtr) data) == NULL) { - return; - } + if (IsFieldConstraintEmpty (constraint)) return NULL; - if (DoesObjectMatchConstraintChoiceSet (OBJ_SEQFEAT, sfp, f->action->constraint)) { - ValNodeAddPointer (&(f->object_list), OBJ_SEQFEAT, sfp); - } + string = SummarizeStringConstraint (constraint->string_constraint); + label = SummarizeFieldType (constraint->field); + + if (string != NULL && label != NULL) { + rval = (CharPtr) MemNew (sizeof (Char) * (StringLen (fmt) + StringLen (label) + StringLen (string))); + sprintf (rval, fmt, label, string); + } + string = MemFree (string); + label = MemFree (label); + + return rval; } -static void CollectPubObjectsDescCallback (SeqDescPtr sdp, Pointer data) +static CharPtr SummarizeMissingFieldConstraint (FieldTypePtr field) { - FixPubCapsPtr f; + CharPtr rval = NULL; + CharPtr label = NULL; + CharPtr fmt = "where %s is missing"; - if (sdp == NULL || sdp->choice != Seq_descr_pub || (f = (FixPubCapsPtr) data) == NULL) { - return; - } + if (field == NULL) return NULL; - if (DoesObjectMatchConstraintChoiceSet (OBJ_SEQDESC, sdp, f->action->constraint)) { - ValNodeAddPointer (&(f->object_list), OBJ_SEQDESC, sdp); - } + label = SummarizeFieldType (field); + + if (label != NULL) { + rval = (CharPtr) MemNew (sizeof (Char) * (StringLen (fmt) + StringLen (label))); + sprintf (rval, fmt, label); + } + label = MemFree (label); + + return rval; } -static Int4 ApplyFixPubCapsToSeqEntry (FixPubCapsActionPtr action, SeqEntryPtr sep) +static CharPtr SummarizeMolinfoFieldConstraint (MolinfoFieldConstraintPtr constraint) { - FixPubCapsData f; - ValNodePtr vnp; - Int4 num_succeeded = 0; - PubdescPtr pdp; - SeqFeatPtr sfp; - SeqDescPtr sdp; - - if (action == NULL || sep == NULL) return 0; + CharPtr label, cp; + CharPtr fmt = "where %s is%s %s"; + CharPtr rval = NULL; + Int4 len, offset; - MemSet (&f, 0, sizeof (FixPubCapsData)); - f.action = action; + if (IsMolinfoFieldConstraintEmpty(constraint)) { + return NULL; + } + label = GetSequenceQualName (constraint->field); + if (label == NULL) { + return NULL; + } + cp = StringChr (label, ' '); + if (cp == NULL) { + return NULL; + } + offset = cp - label; + len = StringLen (fmt) + StringLen (label); + if (constraint->is_not) { + len += 4; + } + rval = (CharPtr) MemNew (sizeof (Char) * len); + sprintf (rval, "where %s", label); + StringCpy (rval + 7 + offset, constraint->is_not ? "is not " : "is "); + StringCat (rval, cp + 1); - /* collect pub objects that match constraint */ - VisitDescriptorsInSep (sep, &f, CollectPubObjectsDescCallback); - VisitFeaturesInSep (sep, &f, CollectPubObjectsFeatCallback); + return rval; +} - if (f.object_list == NULL) { - /* nothing to change */ - return 0; - } - if (action->title) { - /* get org names to use in fixes */ - VisitBioSourcesInSep (sep, &f.orgnames, GetOrgNamesInRecordCallback); +NLM_EXTERN Boolean IsTranslationConstraintEmpty (TranslationConstraintPtr constraint) +{ + if (constraint == NULL) { + return TRUE; + } else if (constraint->num_mismatches != NULL) { + return FALSE; + } else if (constraint->internal_stops != Match_type_constraint_dont_care) { + return FALSE; + } else if (!IsStringConstraintEmpty (constraint->actual_strings)) { + return FALSE; + } else if (!IsStringConstraintEmpty (constraint->transl_strings)) { + return FALSE; + } else { + return TRUE; } +} - for (vnp = f.object_list; vnp != NULL; vnp = vnp->next) { - pdp = NULL; - if (vnp->choice == OBJ_SEQFEAT) { - sfp = vnp->data.ptrvalue; - pdp = sfp->data.value.ptrvalue; - } else if (vnp->choice == OBJ_SEQDESC) { - sdp = vnp->data.ptrvalue; - pdp = sdp->data.ptrvalue; - } - ApplyFixPubCapsCallback (pdp, &f); - } - f.orgnames = ValNodeFree (f.orgnames); +static CharPtr SummarizeTranslationMismatches (ValNodePtr v) +{ + CharPtr fmt = "there are %s %d mismatches between the actual and translated protein sequences"; + CharPtr summ = NULL; - return f.num_fields; + if (v == NULL || v->choice < 1 || v->choice > k_NumQuantityWords) { + return NULL; + } + + summ = (CharPtr) MemNew (sizeof (Char) * (StringLen (fmt) + StringLen (s_QuantityWords[v->choice - 1]) + 15)); + sprintf (summ, fmt, s_QuantityWords[v->choice - 1], v->data.intvalue); + return summ; } -NLM_EXTERN Boolean ApplyMacroToSeqEntryEx (SeqEntryPtr sep, ValNodePtr macro, Int4Ptr pNumFields, Int4Ptr pNumFeat, FILE *log_fp) +static CharPtr SummarizeTranslationConstraint (TranslationConstraintPtr constraint) { - Int4 num_AECR = 0, num_parse = 0, num_feature = 0, num_fields = 0, num; - Uint2 entityID; - Boolean needs_update = FALSE; - CharPtr summ; - Boolean any_change = FALSE; - - while (macro != NULL) { - needs_update = TRUE; - switch (macro->choice) { - case MacroActionChoice_aecr: - num = ApplyAECRActionToSeqEntry ((AECRActionPtr) macro->data.ptrvalue, sep); - num_AECR += num; - if (num > 0) { - if (log_fp != NULL) { - summ = SummarizeAECRAction ((AECRActionPtr) macro->data.ptrvalue); - fprintf (log_fp, "Changed %d fields during %s\n", num, summ); - summ = MemFree (summ); - } - any_change = TRUE; - } - break; - case MacroActionChoice_parse: - num = ApplyParseActionToSeqEntry ((ParseActionPtr) macro->data.ptrvalue, sep); - num_parse += num; - if (num > 0) { - if (log_fp != NULL) { - summ = SummarizeParseAction ((ParseActionPtr) macro->data.ptrvalue); - fprintf (log_fp, "Changed %d fields during %s\n", num, summ); - summ = MemFree (summ); - } - any_change = TRUE; - } - break; - case MacroActionChoice_add_feature: - num = ApplyApplyFeatureActionToSeqEntry ((ApplyFeatureActionPtr) macro->data.ptrvalue, sep); - num_feature += num; - if (num > 0) { - if (log_fp != NULL) { - fprintf (log_fp, "Added %d features\n", num); - } - any_change = TRUE; - } - SeqMgrIndexFeatures (ObjMgrGetEntityIDForChoice(sep), NULL); - break; - case MacroActionChoice_remove_feature: - num = ApplyRemoveFeatureActionToSeqEntry ((RemoveFeatureActionPtr) macro->data.ptrvalue, sep); - if (num > 0) { - if (log_fp != NULL) { - fprintf (log_fp, "Removed %d features\n", num); - } - any_change = TRUE; - } - break; - case MacroActionChoice_edit_location: - num = ApplyEditFeatureLocationActionToSeqEntry ((EditFeatureLocationActionPtr) macro->data.ptrvalue, sep, log_fp); - if (num > 0) { - num_fields += num; - any_change = TRUE; - } - break; - case MacroActionChoice_convert_feature: - num += ApplyConvertFeatureActionToSeqEntry ((ConvertFeatureActionPtr) macro->data.ptrvalue, sep, log_fp); - num_feature += num; - entityID = SeqMgrGetEntityIDForSeqEntry (sep); - ObjMgrSetDirtyFlag (entityID, TRUE); - ObjMgrSendMsg (OM_MSG_UPDATE, entityID, 0, 0); - needs_update = FALSE; - any_change = TRUE; - break; - case MacroActionChoice_remove_descriptor: - num = ApplyRemoveDescriptorActionToSeqEntry ((RemoveDescriptorActionPtr) macro->data.ptrvalue, sep); - if (num > 0) { - if (log_fp != NULL) { - summ = SummarizeRemoveDescriptorAction ((RemoveDescriptorActionPtr) macro->data.ptrvalue); - fprintf (log_fp, "Removed %d descriptors during %s\n", num, summ); - summ = MemFree (summ); - } - any_change = TRUE; - } - break; - case MacroActionChoice_autodef: - ApplyAutodefActionToSeqEntry ((AutodefActionPtr) macro->data.ptrvalue, sep); - if (log_fp != NULL) { - summ = SummarizeAutodefAction ((AutodefActionPtr) macro->data.ptrvalue); - if (summ != NULL) { - fprintf (log_fp, "Performed %s\n", summ); - } - summ = MemFree (summ); - } - any_change = TRUE; - break; - case MacroActionChoice_removesets: - if (RemoveDuplicateNestedSetsForEntityID (SeqMgrGetEntityIDForSeqEntry (sep))) { - if (log_fp != NULL) { - fprintf (log_fp, "Removed duplicate nested sets\n"); - } - any_change = TRUE; - } - break; - case MacroActionChoice_trim_junk_from_primer_seq: - any_change |= TrimPrimerSeqJunkInSeqEntry (sep, log_fp); - break; - case MacroActionChoice_fix_usa_and_states: - any_change |= FixUsaAndStateAbbreviations (entityID, log_fp); - break; - case MacroActionChoice_trim_stop_from_complete_cds: - if (TrimStopsFromCompleteCodingRegions(sep, log_fp)) { - entityID = SeqMgrGetEntityIDForSeqEntry (sep); - ObjMgrSetDirtyFlag (entityID, TRUE); - ObjMgrSendMsg (OM_MSG_UPDATE, entityID, 0, 0); - needs_update = FALSE; - any_change = TRUE; - } - break; - case MacroActionChoice_synchronize_cds_partials: - if (ResynchCodingRegionPartialsEx(sep, log_fp)) { - entityID = SeqMgrGetEntityIDForSeqEntry (sep); - ObjMgrSetDirtyFlag (entityID, TRUE); - ObjMgrSendMsg (OM_MSG_UPDATE, entityID, 0, 0); - needs_update = FALSE; - any_change = TRUE; - } - break; - case MacroActionChoice_adjust_for_consensus_splice: - if (AdjustSeqEntryForConsensusSpliceEx(sep, log_fp)) { - entityID = SeqMgrGetEntityIDForSeqEntry (sep); - ObjMgrSetDirtyFlag (entityID, TRUE); - ObjMgrSendMsg (OM_MSG_UPDATE, entityID, 0, 0); - needs_update = FALSE; - any_change = TRUE; - } - break; - case MacroActionChoice_fix_pub_caps: - num = ApplyFixPubCapsToSeqEntry (macro->data.ptrvalue, sep); - if (num > 0) { - if (log_fp != NULL) { - summ = SummarizeFixPubCapsAction (macro->data.ptrvalue); - fprintf (log_fp, "Fixed capitalization in %d publication fields during %s\n", num, summ); - summ = MemFree (summ); - } - any_change = TRUE; - } - num_fields += num; - break; - case MacroActionChoice_remove_seg_gaps: - num = RemoveSegGapsInSeqEntry (sep); - if (num > 0) { - if (log_fp != NULL) { - fprintf (log_fp, "Removed gaps in %d alignments\n", num); - } - any_change = TRUE; - } - num_fields += num; - break; + CharPtr rval = NULL; + CharPtr mismatch = NULL; + CharPtr tmp; + CharPtr where_actual_sequence = "where actual sequence "; + CharPtr where_transl_sequence = "where translated sequence "; + CharPtr has_internal_stops = "sequence has internal stops"; + CharPtr no_internal_stops = "sequence has no internal stops"; + Int4 len = 0; + StringConstraintPtr scp; + ValNodePtr actual_phrases = NULL, transl_phrases = NULL, vnp; + Int4 num_phrases = 0, phrase_num = 1; + + if (IsTranslationConstraintEmpty(constraint)) { + return NULL; + } + + if (constraint->actual_strings != NULL) { + len += StringLen (where_actual_sequence); + for (scp = constraint->actual_strings; scp != NULL; scp = scp->next) { + tmp = SummarizeStringConstraint (scp); + if (tmp != NULL) { + len += StringLen (tmp) + 2; + ValNodeAddPointer (&actual_phrases, 0, tmp); + } } - macro = macro->next; + len += 5; + num_phrases ++; + } + if (constraint->transl_strings != NULL) { + len += StringLen (where_transl_sequence); + for (scp = constraint->transl_strings; scp != NULL; scp = scp->next) { + tmp = SummarizeStringConstraint (scp); + if (tmp != NULL) { + len += StringLen (tmp) + 2; + ValNodeAddPointer (&transl_phrases, 0, tmp); + } + } + len += 5; + num_phrases ++; } - if (needs_update) { - entityID = SeqMgrGetEntityIDForSeqEntry (sep); - ObjMgrSetDirtyFlag (entityID, TRUE); - ObjMgrSendMsg (OM_MSG_UPDATE, entityID, 0, 0); + if (constraint->num_mismatches != NULL) { + mismatch = SummarizeTranslationMismatches(constraint->num_mismatches); + len += StringLen (mismatch) + 5; + num_phrases ++; } - if (pNumFields != NULL) { - *pNumFields = num_AECR + num_parse + num_fields; + + if (constraint->internal_stops == Match_type_constraint_yes) { + len += StringLen (has_internal_stops) + 5; + num_phrases ++; + } else if (constraint->internal_stops == Match_type_constraint_no) { + len += StringLen (no_internal_stops) + 5; + num_phrases ++; } - if (pNumFeat != NULL) { - *pNumFeat = num_feature; + + rval = (CharPtr) MemNew (sizeof (Char) * len); + rval[0] = 0; + if (actual_phrases != NULL) { + StringCat (rval, where_actual_sequence); + for (vnp = actual_phrases; vnp != NULL; vnp = vnp->next) { + StringCat (rval, vnp->data.ptrvalue); + if (vnp->next != NULL) { + StringCat (rval, ", "); + } + } + actual_phrases = ValNodeFreeData (actual_phrases); + phrase_num++; + } + + if (transl_phrases != NULL) { + if (phrase_num > 1) { + if (num_phrases > 2) { + StringCat (rval, ", "); + } + if (phrase_num == num_phrases) { + StringCat (rval, " and "); + } + } + StringCat (rval, where_transl_sequence); + for (vnp = transl_phrases; vnp != NULL; vnp = vnp->next) { + StringCat (rval, vnp->data.ptrvalue); + if (vnp->next != NULL) { + StringCat (rval, ", "); + } + } + transl_phrases = ValNodeFreeData (transl_phrases); + phrase_num++; } - return any_change; -} + if (mismatch != NULL) { + if (phrase_num > 1) { + if (num_phrases > 2) { + StringCat (rval, ", "); + } + if (phrase_num == num_phrases) { + StringCat (rval, " and "); + } + } + StringCat (rval, mismatch); + mismatch = MemFree (mismatch); + phrase_num++; + } -NLM_EXTERN void ApplyMacroToSeqEntry (SeqEntryPtr sep, ValNodePtr macro, Int4Ptr pNumFields, Int4Ptr pNumFeat) -{ - ApplyMacroToSeqEntryEx (sep, macro, pNumFields, pNumFeat, NULL); + if (constraint->internal_stops == Match_type_constraint_yes) { + if (phrase_num > 1) { + if (num_phrases > 2) { + StringCat (rval, ", "); + } + if (phrase_num == num_phrases) { + StringCat (rval, " and "); + } + } + StringCat (rval, has_internal_stops); + phrase_num++; + } else if (constraint->internal_stops == Match_type_constraint_yes) { + len += StringLen (no_internal_stops) + 5; + if (phrase_num > 1) { + if (num_phrases > 2) { + StringCat (rval, ", "); + } + if (phrase_num == num_phrases) { + StringCat (rval, " and "); + } + } + StringCat (rval, no_internal_stops); + phrase_num++; + } + + return rval; } -/* for generating text descriptions of macro objects */ -NLM_EXTERN CharPtr SummarizeSourceQual (ValNodePtr field) +NLM_EXTERN CharPtr SummarizeConstraint (ValNodePtr constraint) { - CharPtr summ = NULL, locname, origname; - Int4 genome, origin; - CharPtr loc_fmt = "location %s"; - CharPtr orig_fmt = "origin %s"; + CharPtr phrase = NULL, tmp; + CharPtr fmt = "where object text %s"; - if (field == NULL) return NULL; - switch (field->choice) { - case SourceQualChoice_textqual: - summ = StringSave (GetSourceQualName (field->data.intvalue)); + if (constraint == NULL) return NULL; + switch (constraint->choice) { + case ConstraintChoice_string: + tmp = SummarizeStringConstraint (constraint->data.ptrvalue); + if (tmp != NULL) { + phrase = (CharPtr) MemNew (sizeof (Char) * (StringLen (tmp) + StringLen (fmt))); + sprintf (phrase, fmt, tmp); + tmp = MemFree (tmp); + } break; - case SourceQualChoice_location: - genome = GenomeFromSrcLoc (field->data.intvalue); - locname = LocNameFromGenome (genome); - summ = (CharPtr) MemNew (sizeof (Char) * (StringLen (loc_fmt) + StringLen (locname))); - sprintf (summ, loc_fmt, locname); + case ConstraintChoice_location: + phrase = SummarizeLocationConstraint (constraint->data.ptrvalue); break; - case SourceQualChoice_origin: - origin = OriginFromSrcOrig (field->data.intvalue); - origname = OriginNameFromOrigin (origin); - summ = (CharPtr) MemNew (sizeof (Char) * (StringLen (orig_fmt) + StringLen (origname))); - sprintf (summ, orig_fmt, origname); + case ConstraintChoice_source: + phrase = SummarizeSourceConstraint (constraint->data.ptrvalue); + break; + case ConstraintChoice_cdsgeneprot_qual: + phrase = SummarizeCDSGeneProtQualConstraint (constraint->data.ptrvalue); + break; + case ConstraintChoice_cdsgeneprot_pseudo: + phrase = SummarizeCDSGeneProtPseudoConstraint (constraint->data.ptrvalue); + break; + case ConstraintChoice_sequence: + phrase = SummarizeSequenceConstraint (constraint->data.ptrvalue); + break; + case ConstraintChoice_pub: + phrase = SummarizePublicationConstraint (constraint->data.ptrvalue); + break; + case ConstraintChoice_field: + phrase = SummarizeFieldConstraint (constraint->data.ptrvalue); + break; + case ConstraintChoice_molinfo: + phrase = SummarizeMolinfoFieldConstraint (constraint->data.ptrvalue); + break; + case ConstraintChoice_field_missing: + phrase = SummarizeMissingFieldConstraint (constraint->data.ptrvalue); + break; + case ConstraintChoice_translation: + phrase = SummarizeTranslationConstraint (constraint->data.ptrvalue); break; } - return summ; + return phrase; } -NLM_EXTERN CharPtr FeatureFieldLabel (CharPtr feature_name, ValNodePtr field) +NLM_EXTERN CharPtr SummarizeConstraintSet (ValNodePtr constraint_set) { - CharPtr cp; - CharPtr label = NULL; - CharPtr legal_fmt = "%s %s"; - CharPtr illegal_fmt = "constrained field on %s"; - - if (feature_name == NULL) { - feature_name = "Unknown feature"; - } + ValNodePtr phrases = NULL, vnp; + Int4 len = 0; + CharPtr phrase, str = NULL; - if (field == NULL) { - return StringSave ("missing field"); - } else if (field->choice == FeatQualChoice_legal_qual) { - cp = GetFeatQualName (field->data.intvalue); - if (cp == NULL) cp = "Unknown field type"; - label = (CharPtr) MemNew (sizeof (Char) * (StringLen (legal_fmt) + StringLen (feature_name) + StringLen (cp))); - sprintf (label, legal_fmt, feature_name, cp); - } else if (field->choice == FeatQualChoice_illegal_qual) { - label = (CharPtr) MemNew (sizeof (Char) * (StringLen (illegal_fmt) + StringLen (feature_name))); - sprintf (label, illegal_fmt, feature_name); - } else { - label = StringSave ("illegal field value"); + while (constraint_set != NULL) { + phrase = SummarizeConstraint (constraint_set); + if (phrase != NULL) { + ValNodeAddPointer (&phrases, 0, phrase); + if (len > 0) { + len += 5; /* for " and " */ + } else { + len += 1; /* for terminal NULL */ + } + len += StringLen (phrase); + } + constraint_set = constraint_set->next; } - return label; + if (len > 0) { + str = (CharPtr) MemNew (sizeof (Char) * len); + for (vnp = phrases; vnp != NULL; vnp = vnp->next) { + StringCat (str, vnp->data.ptrvalue); + if (vnp->next != NULL) { + StringCat (str, " and "); + } + } + } + return str; } -NLM_EXTERN CharPtr SummarizeFieldType (ValNodePtr vnp) -{ - FeatureFieldPtr ffp; - CharPtr str = NULL; - CharPtr label = NULL; - CharPtr pub_fmt = "publication %s"; +/* for table readers that use the macro language functions */ - if (vnp == NULL) { - str = StringSave ("missing field"); - } else { - switch (vnp->choice) { - case FieldType_source_qual: - str = SummarizeSourceQual (vnp->data.ptrvalue); - break; - case FieldType_feature_field: - ffp = (FeatureFieldPtr) vnp->data.ptrvalue; - if (ffp == NULL || ffp->field == NULL) { - str = StringSave ("missing field"); - } else { - label = GetFeatureNameFromFeatureType (ffp->type); - str = FeatureFieldLabel (label, ffp->field); - } - break; - case FieldType_cds_gene_prot: - str = StringSaveNoNull (CDSGeneProtNameFromField (vnp->data.intvalue)); - if (str == NULL) { - str = StringSave ("Invalid CDS-Gene-Prot Field"); - } - break; - case FieldType_molinfo_field: - str = GetSequenceQualName (vnp->data.ptrvalue); - if (str == NULL) { - str = StringSave ("Invalid Sequence Qual Field"); - } - break; - case FieldType_pub: - switch (vnp->data.intvalue) { - case Publication_field_cit: - str = StringSave ("publication citation"); - break; - case Publication_field_authors: - str = StringSave ("publication authors"); - break; - case Publication_field_journal: - str = StringSave ("publication journal"); - break; - case Publication_field_volume: - str = StringSave ("publication volume"); - break; - case Publication_field_issue: - str = StringSave ("publication issue"); - break; - case Publication_field_pages: - str = StringSave ("publication pages"); - break; - case Publication_field_date: - str = StringSave ("publication date"); - break; - case Publication_field_serial_number: - str = StringSave ("publication serial number"); - break; - case Publication_field_title: - str = StringSave ("publication title"); - break; - default: - label = GetPubFieldLabel (vnp->data.intvalue); - if (label == NULL) { - str = StringSave ("Invalid field type"); - } else { - str = MemNew (sizeof (Char) * (StringLen (pub_fmt) + StringLen (label))); - sprintf (str, pub_fmt, label); - } - break; - } - break; - case FieldType_rna_field: - str = SummarizeRnaQual (vnp->data.ptrvalue); - break; - case FieldType_struc_comment_field: - str = SummarizeStructuredCommentField (vnp->data.ptrvalue); - break; - case FieldType_misc: - if (vnp->data.intvalue == Misc_field_genome_project_id) { - str = StringSave ("Genome Project ID"); - } else if (vnp->data.intvalue == Misc_field_comment_descriptor) { - str = StringSave ("Comment Descriptor"); - } else if (vnp->data.intvalue == Misc_field_defline) { - str = StringSave ("Definition Line"); - } else if (vnp->data.intvalue == Misc_field_keyword) { - str = StringSave ("Keyword"); - } else { - str = StringSave ("Invalid field type"); - } - break; - default: - str = StringSave ("Invalid field type"); - break; +/* MatchType is used to represent how the column should be matched. + */ + +NLM_EXTERN MatchTypePtr MatchTypeNew () +{ + MatchTypePtr match_type = MemNew (sizeof (MatchTypeData)); + match_type->data = NULL; + match_type->match_location = String_location_equals; + match_type->choice = eTableMatchNucID; + return match_type; +} + + +NLM_EXTERN MatchTypePtr MatchTypeFree (MatchTypePtr match_type) +{ + if (match_type != NULL) { + if (match_type->choice == eTableMatchSourceQual) { + match_type->data = SourceQualChoiceFree (match_type->data); } + match_type = MemFree (match_type); } - return str; + return match_type; } -NLM_EXTERN FieldTypePtr FieldTypeFromString (CharPtr str) +static MatchTypePtr MatchTypeCopy (MatchTypePtr orig) { - Int4 qual_type, feat_type = -1; - FieldTypePtr ft = NULL; - FeatureFieldPtr ffp; - ValNodePtr vnp; - CharPtr cpy, cp; - RnaQualPtr rq; - - if (StringHasNoText (str)) { - return NULL; + MatchTypePtr match_type = NULL; + + if (orig != NULL) { + match_type = MatchTypeNew(); + match_type->choice = orig->choice; + match_type->match_location = orig->match_location; + if (match_type->choice == eTableMatchSourceQual) { + match_type->data = AsnIoMemCopy (orig->data, (AsnReadFunc) SourceQualChoiceAsnRead, (AsnWriteFunc) SourceQualChoiceAsnWrite); + } } + return match_type; +} - /* check source quals first */ - qual_type = GetSourceQualTypeByName (str); - if (qual_type > -1) { - vnp = ValNodeNew (NULL); - vnp->choice = SourceQualChoice_textqual; - vnp->data.intvalue = qual_type; - ft = ValNodeNew (NULL); - ft->choice = FieldType_source_qual; - ft->data.ptrvalue = vnp; - } else { - /* try feature fields */ - cpy = StringSave (str); - cp = StringChr (cpy, ' '); - while (cp != NULL && feat_type == -1) { - *cp = 0; - feat_type = GetFeatureTypeByName (cpy); - if (feat_type < 0) { - *cp = ' '; - cp = StringChr (cp + 1, ' '); - } - } - if (feat_type > -1) { - qual_type = GetFeatQualByName (cp + 1); - if (qual_type > -1) { - ffp = FeatureFieldNew (); - ffp->type = feat_type; - ValNodeAddInt (&ffp->field, FeatQualChoice_legal_qual, qual_type); - ft = ValNodeNew (NULL); - ft->choice = FieldType_feature_field; - ft->data.ptrvalue = ffp; - } - } - cpy = MemFree (cpy); - if (ft == NULL) { - /* try CDS-gene-prot */ - qual_type = CDSGeneProtFieldFromName (str); - if (qual_type > -1) { - ft = ValNodeNew (NULL); - ft->choice = FieldType_cds_gene_prot; - ft->data.intvalue = qual_type; - } - } - if (ft == NULL) { - /* try RNA Quals */ - cpy = StringSave (str); - cp = StringChr (cpy, ' '); - if (cp != NULL) { - *cp = 0; - feat_type = GetRnaTypeForName (cpy); - qual_type = GetRnaFieldForName (cp + 1); - if (feat_type > -1 && qual_type > -1) { - rq = RnaQualNew (); - rq->type = ValNodeNew (NULL); - rq->type->choice = feat_type; - rq->type->data.ptrvalue = NULL; - rq->field = qual_type; - ft = ValNodeNew (NULL); - ft->choice = FieldType_rna_field; - ft->data.ptrvalue = rq; - } - } - cpy = MemFree (cpy); + +static MatchTypePtr FindMatchTypeInHeader (ValNodePtr columns) +{ + ValNodePtr col_vnp; + MatchTypePtr match_type = NULL; + TabColumnConfigPtr t; + + for (col_vnp = columns; + col_vnp != NULL && match_type == NULL; + col_vnp = col_vnp->next) { + t = (TabColumnConfigPtr) col_vnp->data.ptrvalue; + if (t != NULL && t->match_type != NULL) { + match_type = MatchTypeCopy (t->match_type); } } - return ft; + return match_type; } -NLM_EXTERN Boolean IsFieldTypeNonText (ValNodePtr field_type) +NLM_EXTERN TabColumnConfigPtr TabColumnConfigNew (void) { - ValNodePtr vnp; - FeatureFieldPtr ffp; - Boolean rval = FALSE; + TabColumnConfigPtr t; - if (field_type == NULL) { - return FALSE; - } - switch (field_type->choice) { - case FieldType_source_qual : - vnp = (ValNodePtr) field_type->data.ptrvalue; - if (vnp != NULL) { - if (vnp->choice == SourceQualChoice_location || vnp->choice == SourceQualChoice_origin) { - rval = TRUE; - } else if (vnp->choice == SourceQualChoice_textqual) { - if (IsNonTextSourceQual (vnp->data.intvalue)) { - rval = TRUE; - } - } - } - break; - case FieldType_feature_field : - ffp = (FeatureFieldPtr) field_type->data.ptrvalue; - if (ffp != NULL && ffp->field != NULL && ffp->field->choice == FeatQualChoice_legal_qual - && ffp->field->data.intvalue == Feat_qual_legal_pseudo) { - rval = TRUE; - } - break; - case FieldType_molinfo_field : - rval = TRUE; - break; - } - return rval; + t = (TabColumnConfigPtr) MemNew (sizeof (TabColumnConfigData)); + t->match_type = NULL; + t->field = NULL; + t->existing_text = ExistingTextOption_replace_old; + t->constraint = NULL; + t->skip_blank = TRUE; + return t; } -NLM_EXTERN CharPtr SummarizeExistingText (Uint2 existing_text) +NLM_EXTERN TabColumnConfigPtr TabColumnConfigFree (TabColumnConfigPtr t) { - CharPtr str = NULL; + if (t != NULL) { + t->field = FieldTypeFree (t->field); + t->match_type = MatchTypeFree (t->match_type); + t->constraint = ConstraintChoiceSetFree (t->constraint); + t = MemFree (t); + } + return t; +} - switch (existing_text) { - case ExistingTextOption_append_semi : - str = "append separated by semicolon"; - break; - case ExistingTextOption_append_space : - str = "append separated by space"; - break; - case ExistingTextOption_append_colon : - str = "append separated by colon"; - break; - case ExistingTextOption_append_none : - str = "append (no separator)"; - break; - case ExistingTextOption_prefix_semi : - str = "prefix separated by semicolon"; - break; - case ExistingTextOption_prefix_space : - str = "prefix separated by space"; - break; - case ExistingTextOption_prefix_colon : - str = "prefix separated by colon"; - break; - case ExistingTextOption_prefix_none : - str = "prefix (no separator)"; - break; - case ExistingTextOption_leave_old : - str = "ignore new text when existing text is present"; - break; - case ExistingTextOption_replace_old : - str = "overwrite existing text"; - break; - case ExistingTextOption_add_qual : - str = "add new qual"; - break; - default: - str = "invalid existing_text option"; - break; + +NLM_EXTERN TabColumnConfigPtr TabColumnConfigCopy (TabColumnConfigPtr orig) +{ + TabColumnConfigPtr t = NULL; + + if (orig != NULL) { + t = TabColumnConfigNew (); + + t->match_type = MatchTypeCopy (orig->match_type); + t->existing_text = orig->existing_text; + t->skip_blank = orig->skip_blank; + t->match_mrna = orig->match_mrna; + t->field = FieldTypeCopy (orig->field); + t->constraint = AsnIoMemCopy (orig->constraint, (AsnReadFunc) ConstraintChoiceSetAsnRead, (AsnWriteFunc) ConstraintChoiceSetAsnWrite); } - return str; + return t; } -static CharPtr SummarizeTextMarker (TextMarkerPtr text_marker) +NLM_EXTERN void TabColumnConfigReset (TabColumnConfigPtr t) { - CharPtr summ = NULL; + if (t != NULL) { + t->match_type = MatchTypeFree (t->match_type); + t->field = FieldTypeFree (t->field); + t->constraint = ConstraintChoiceSetFree (t->constraint); + t->existing_text = ExistingTextOption_replace_old; + t->skip_blank = TRUE; + t->match_mrna = FALSE; + } +} - if (IsTextMarkerEmpty (text_marker)) { - return NULL; - } else if (text_marker->choice == TextMarker_free_text) { - summ = StringSave (text_marker->data.ptrvalue); - } else if (text_marker->choice == TextMarker_digits) { - summ = StringSave ("numbers"); - } else if (text_marker->choice == TextMarker_letters) { - summ = StringSave ("letters"); + +NLM_EXTERN ValNodePtr TabColumnConfigListFree (ValNodePtr columns) +{ + ValNodePtr vnp_next; + + while (columns != NULL) { + vnp_next = columns->next; + columns->data.ptrvalue = TabColumnConfigFree (columns->data.ptrvalue); + columns->next = NULL; + columns = ValNodeFree (columns); + columns = vnp_next; + } + return columns; +} + + +NLM_EXTERN ValNodePtr TabColumnConfigListCopy (ValNodePtr orig) +{ + ValNodePtr new_list = NULL; + TabColumnConfigPtr t; + + while (orig != NULL) { + t = TabColumnConfigCopy (orig->data.ptrvalue); + ValNodeAddPointer (&new_list, 0, t); + orig = orig->next; } - return summ; + return new_list; } -NLM_EXTERN CharPtr SummarizeTextPortion (TextPortionPtr text_portion) -{ - CharPtr summ = NULL; - CharPtr left_fmt = NULL, right_fmt = NULL; - CharPtr left_text = NULL, right_text = NULL; - Int4 len = 6; - - if (text_portion == NULL - || (IsTextMarkerEmpty (text_portion->left_marker) - && IsTextMarkerEmpty (text_portion->right_marker))) { - summ = StringSave ("entire text"); - } else { - left_text = SummarizeTextMarker(text_portion->left_marker); - right_text = SummarizeTextMarker(text_portion->right_marker); - if (text_portion->inside) { - if (left_text != NULL) { - if (text_portion->include_left) { - left_fmt = "starting with "; - } else { - left_fmt = "just after "; - } - len += StringLen (left_fmt) + StringLen (left_text) + 3; - } - if (right_text != NULL) { - if (text_portion->include_right) { - right_fmt = "up to and including "; - } else { - right_fmt = "up to "; - } - len += StringLen (right_fmt) + StringLen (right_text) + 3; - if (left_fmt != NULL) { - len += 2; - } - } - if (left_fmt == NULL && right_fmt == NULL) { - summ = StringSave ("entire text"); - } else { - summ = (CharPtr) MemNew (sizeof (Char) * len); - StringCat (summ, "text "); - if (left_fmt != NULL) { - StringCat (summ, left_fmt); - StringCat (summ, "'"); - StringCat (summ, left_text); - StringCat (summ, "'"); - if (right_fmt != NULL) { - StringCat (summ, ", "); - } - } - if (right_fmt != NULL) { - StringCat (summ, right_fmt); - StringCat (summ, "'"); - StringCat (summ, right_text); - StringCat (summ, "'"); +/* This checks the column names and returns a list of the feature fields */ +NLM_EXTERN ValNodePtr ValidateFeatureFieldColumnNames (ValNodePtr header_line, ValNodePtr PNTR perr_list) +{ + ValNodePtr header_vnp; + ValNodePtr err_list = NULL, col_list = NULL; + Boolean rval = TRUE; + TabColumnConfigPtr t; + FeatureFieldPtr field; + Int4 featqual, feat_type; + CharPtr first_space; + + if (header_line == NULL) + { + return FALSE; + } + + header_vnp = header_line->data.ptrvalue; + if (header_vnp == NULL || header_vnp->next == NULL) + { + return FALSE; + } + + /* skip ID column */ + header_vnp = header_vnp->next; + while (header_vnp != NULL && rval) + { + first_space = StringChr (header_vnp->data.ptrvalue, ' '); + if (first_space != NULL) { + *first_space = 0; + feat_type = GetFeatureTypeByName (header_vnp->data.ptrvalue); + featqual = GetFeatQualByName (first_space + 1); + *first_space = ' '; + if (feat_type < 0 || featqual < 0) { + /* unable to recognize column name */ + ValNodeAddPointer (&err_list, 0, StringSave (header_vnp->data.ptrvalue)); + /* if we're not able to send back a list of errors, just quit now */ + if (perr_list == NULL) { + rval = FALSE; } + } else if (err_list == NULL) { + /* if we've already found errors, don't bother collecting more fields */ + field = FeatureFieldNew (); + field->type = feat_type; + field->field = ValNodeNew (NULL); + field->field->choice = FeatQualChoice_legal_qual; + field->field->data.intvalue = featqual; + t = TabColumnConfigNew (); + t->field = ValNodeNew (NULL); + t->field->choice = FieldType_feature_field; + t->field->data.ptrvalue = field; + ValNodeAddPointer (&col_list, 0, t); } } else { - if (right_text != NULL) { - if (text_portion->include_right) { - right_fmt = "starting with "; - } else { - right_fmt = "after "; - } - len += StringLen (right_fmt) + StringLen (right_text) + 3; - } - if (left_text != NULL) { - if (text_portion->include_left) { - left_fmt = "up to and including "; - } else { - left_fmt = "before "; - } - len += StringLen (left_fmt) + StringLen (left_text) + 3; - if (right_fmt != NULL) { - len += 5; + featqual = GetFeatQualByName (header_vnp->data.ptrvalue); + if (featqual < 0) { + /* unable to recognize column name */ + ValNodeAddPointer (&err_list, 0, StringSave (header_vnp->data.ptrvalue)); + /* if we're not able to send back a list of errors, just quit now */ + if (perr_list == NULL) { + rval = FALSE; } + } else if (err_list == NULL) { + /* if we've already found errors, don't bother collecting more fields */ + field = FeatureFieldNew (); + field->type = Macro_feature_type_any; + field->field = ValNodeNew (NULL); + field->field->choice = FeatQualChoice_legal_qual; + field->field->data.intvalue = featqual; + t = TabColumnConfigNew (); + t->field = ValNodeNew (NULL); + t->field->choice = FieldType_feature_field; + t->field->data.ptrvalue = field; + ValNodeAddPointer (&col_list, 0, t); } - - if (left_fmt == NULL && right_fmt == NULL) { - summ = StringSave ("entire text"); - } else { - summ = (CharPtr) MemNew (sizeof (Char) * len); - StringCat (summ, "text "); - if (right_fmt != NULL) { - StringCat (summ, right_fmt); - StringCat (summ, "'"); - StringCat (summ, right_text); - StringCat (summ, "'"); - if (left_fmt != NULL) { - StringCat (summ, " and "); - } - } - if (left_fmt != NULL) { - StringCat (summ, left_fmt); - StringCat (summ, "'"); - StringCat (summ, left_text); - StringCat (summ, "'"); - } - } } - left_text = MemFree (left_text); - right_text = MemFree (right_text); + header_vnp = header_vnp->next; } - return summ; + if (err_list != NULL) { + col_list = TabColumnConfigListFree (col_list); + if (perr_list != NULL) { + *perr_list = err_list; + } else { + err_list = ValNodeFreeData (err_list); + } + } + return col_list; } +typedef struct findgenelocustag { + CharPtr locus_tag; + ValNodePtr gene_list; +} FindGeneLocusTagData, PNTR FindGeneLocusTagPtr; -const CharPtr kTaxnameAfterBinomialString = "Taxname after binomial"; +static void FindGeneByLocusTagBioseqCallback (BioseqPtr bsp, Pointer userdata) +{ + FindGeneLocusTagPtr p; + SeqFeatPtr gene; + SeqMgrFeatContext fcontext; + if (bsp == NULL || userdata == NULL || !ISA_na (bsp->mol)) { + return; + } -NLM_EXTERN CharPtr SummarizeParseSrc (ValNodePtr src) + p = (FindGeneLocusTagPtr) userdata; + + gene = SeqMgrGetGeneByLocusTag (bsp, p->locus_tag, &fcontext); + if (gene != NULL) { + ValNodeAddPointer (&p->gene_list, OBJ_SEQFEAT, gene); + } +} + + +typedef struct objbymatch { + ValNodePtr obj_list; + StringConstraintPtr scp; +} ObjByMatchData, PNTR ObjByMatchPtr; + +static void GetFeaturesByDbxrefCallback (SeqFeatPtr sfp, Pointer userdata) { - CharPtr summ = NULL; - CharPtr fmt = "structured comment field %s"; - ParseSrcOrgPtr src_org; - Boolean need_to_save = TRUE; - - if (src != NULL) { - switch (src->choice) { - case ParseSrc_defline: - summ = "defline"; - break; - case ParseSrc_flatfile: - summ = "flat file"; - break; - case ParseSrc_local_id: - summ = "local ID"; - break; - case ParseSrc_org: - src_org = (ParseSrcOrgPtr) src->data.ptrvalue; - if (src_org != NULL) { - if (src_org->field != NULL) { - if (src_org->field->choice == ParseSrcOrgChoice_taxname_after_binomial) { - summ = kTaxnameAfterBinomialString; - } else if (src_org->field->choice == ParseSrcOrgChoice_source_qual) { - summ = GetSourceQualName (src_org->field->data.intvalue); - } - } - } - break; - case ParseSrc_comment: - summ = "comment"; - break; - case ParseSrc_bankit_comment: - summ = "BankIT comment"; - break; - case ParseSrc_structured_comment: - if (!StringHasNoText (src->data.ptrvalue)) { - summ = (CharPtr) MemNew (sizeof (Char) * (StringLen (src->data.ptrvalue) + StringLen (fmt))); - sprintf (summ, fmt, src->data.ptrvalue); - need_to_save = FALSE; + ObjByMatchPtr p; + ValNodePtr vnp; + DbtagPtr dbt; + Char buf[20]; + Boolean found = FALSE; + + if (sfp == NULL || sfp->dbxref == NULL || userdata == NULL) return; + p = (ObjByMatchPtr) userdata; + + if (IsStringConstraintEmpty (p->scp)) return; + + for (vnp = sfp->dbxref; vnp != NULL && !found; vnp = vnp->next) { + dbt = (DbtagPtr) vnp->data.ptrvalue; + if (dbt != NULL && dbt->tag != NULL) { + if (dbt->tag->id > 0) { + sprintf (buf, "%d", dbt->tag->id); + if (DoesStringMatchConstraint (buf, p->scp)) { + found = TRUE; } - break; - case ParseSrc_file_id: - summ = "file ID"; - break; + } else if (DoesStringMatchConstraint (dbt->tag->str, p->scp)) { + found = TRUE; + } } } - if (summ == NULL) { - summ = StringSave ("missing field"); - } else if (need_to_save) { - summ = StringSave (summ); - } - return summ; + if (found) { + ValNodeAddPointer (&(p->obj_list), OBJ_SEQFEAT, sfp); + } } -NLM_EXTERN CharPtr SummarizeParseDst (ValNodePtr dst) +static ValNodePtr GetFeaturesByDbxref (SeqEntryPtr sep, CharPtr dbxref, Uint1 match_location) { - CharPtr summ = NULL; - CharPtr fmt = "%s %s"; - CharPtr feature, field; - ParseDstOrgPtr dst_org; - Boolean need_to_save = TRUE; - FeatureFieldLegalPtr ffp; - - if (dst != NULL) { - switch (dst->choice) { - case ParseDest_defline: - summ = "defline"; - break; - case ParseDest_org: - dst_org = (ParseDstOrgPtr) dst->data.ptrvalue; - if (dst_org != NULL) { - if (dst_org->field != NULL) { - switch (dst_org->field->choice) { - case SourceQualChoice_textqual: - summ = GetSourceQualName (dst_org->field->data.intvalue); - break; - case SourceQualChoice_location: - summ = "location"; - break; - case SourceQualChoice_origin: - summ = "origin"; - break; - } - } - } - break; - case ParseDest_featqual: - ffp = (FeatureFieldLegalPtr) dst->data.ptrvalue; - if (ffp != NULL) { - feature = GetFeatureNameFromFeatureType (ffp->type); - field = GetFeatQualName (ffp->field); - summ = (CharPtr) MemNew (sizeof (Char) * (StringLen (fmt) + StringLen (feature) + StringLen (field))); - sprintf (summ, fmt, feature, field); - need_to_save = FALSE; - } - break; - case ParseDest_dbxref: - summ = "dbxref"; - break; - } - } - if (summ == NULL) { - summ = StringSave ("missing field"); - } else if (need_to_save) { - summ = StringSave (summ); - } - return summ; + ObjByMatchData d; + + d.scp = StringConstraintNew (); + d.scp->match_text = StringSave (dbxref); + d.scp->match_location = match_location; + d.obj_list = NULL; + VisitFeaturesInSep (sep, &d, GetFeaturesByDbxrefCallback); + d.scp = StringConstraintFree (d.scp); + return d.obj_list; } -/* summarizing AECR actions */ -static CharPtr SummarizeFieldPairType (ValNodePtr vnp, CharPtr connect_word) +static void GetBioSourcesByTaxNameDescriptorCallback (SeqDescrPtr sdp, Pointer userdata) { - FeatureFieldPairPtr ffp; - CDSGeneProtFieldPairPtr cgp; - SourceQualPairPtr quals; - MolinfoFieldPairPtr m_fields; - RnaQualPairPtr rna_quals; - CharPtr str = NULL; - CharPtr from_label = NULL, to_label = NULL; - CharPtr label_fmt = "%s %s %s"; - CharPtr type_label_fmt = "%s %s %s %s"; - CharPtr label = NULL; + ObjByMatchPtr p; + BioSourcePtr biop; - if (connect_word == NULL) { - connect_word = "to"; - } - if (vnp == NULL) { - str = StringSave ("missing field"); - } else { - switch (vnp->choice) { - case FieldPairType_source_qual: - if (vnp->data.ptrvalue != NULL) { - quals = (SourceQualPairPtr) vnp->data.ptrvalue; - from_label = GetSourceQualName (quals->field_from); - to_label = GetSourceQualName (quals->field_to); - } - if (from_label != NULL && to_label != NULL) { - str = (CharPtr) MemNew (sizeof (Char) * - (StringLen (from_label) + StringLen (connect_word) + StringLen (to_label) - + 3)); - sprintf (str, "%s %s %s", from_label, connect_word, to_label); - } else { - str = StringSave ("missing field"); - } - break; - case FieldPairType_feature_field: - ffp = (FeatureFieldPairPtr) vnp->data.ptrvalue; - if (ffp == NULL || ffp->field_from == NULL || ffp->field_to == NULL) { - str = StringSave ("missing field"); - } else { - label = GetFeatureNameFromFeatureType (ffp->type); - from_label = FeatureFieldLabel (label, ffp->field_from); - to_label = FeatureFieldLabel (label, ffp->field_to); - str = (CharPtr) MemNew (sizeof (Char) * - (StringLen (label_fmt) - + StringLen (from_label) + StringLen (to_label) - + StringLen (connect_word))); - sprintf (str, label_fmt, from_label, connect_word, to_label); - from_label = MemFree (from_label); - to_label = MemFree (to_label); - } - break; - case FieldPairType_cds_gene_prot: - cgp = (CDSGeneProtFieldPairPtr) vnp->data.ptrvalue; - from_label = CDSGeneProtNameFromField (cgp->field_from); - to_label = CDSGeneProtNameFromField (cgp->field_to); - str = (CharPtr) MemNew (sizeof (Char) * - StringLen (from_label) + StringLen (connect_word) + StringLen (to_label) - + 3); - sprintf (str, "%s %s %s", from_label, connect_word, to_label); - break; - case FieldPairType_molinfo_field: - m_fields = (MolinfoFieldPairPtr) vnp->data.ptrvalue; - from_label = NULL; - to_label = NULL; - label = NULL; - switch (m_fields->choice) { - case MolinfoFieldPair_molecule: - from_label = BiomolNameFromBiomol (BiomolFromMoleculeType (((MolinfoMoleculePairPtr) m_fields->data.ptrvalue)->from)); - to_label = BiomolNameFromBiomol (BiomolFromMoleculeType (((MolinfoMoleculePairPtr) m_fields->data.ptrvalue)->to)); - label = "molecule"; - break; - case MolinfoFieldPair_technique: - from_label = TechNameFromTech (TechFromTechniqueType (((MolinfoTechniquePairPtr) m_fields->data.ptrvalue)->from)); - to_label = TechNameFromTech (TechFromTechniqueType (((MolinfoTechniquePairPtr) m_fields->data.ptrvalue)->to)); - label = "technique"; - break; - case MolinfoFieldPair_completedness: - from_label = CompletenessNameFromCompleteness (CompletenessFromCompletednessType (((MolinfoCompletednessPairPtr) m_fields->data.ptrvalue)->from)); - to_label = CompletenessNameFromCompleteness (CompletenessFromCompletednessType (((MolinfoCompletednessPairPtr) m_fields->data.ptrvalue)->to)); - label = "completeness"; - break; - case MolinfoFieldPair_mol_class: - from_label = MolNameFromMol (MolFromMoleculeClassType (((MolinfoMolClassPairPtr) m_fields->data.ptrvalue)->from)); - to_label = MolNameFromMol (MolFromMoleculeClassType (((MolinfoMolClassPairPtr) m_fields->data.ptrvalue)->to)); - label = "class"; - break; - case MolinfoFieldPair_topology: - from_label = TopologyNameFromTopology (TopologyFromTopologyType (((MolinfoTopologyPairPtr) m_fields->data.ptrvalue)->from)); - to_label = TopologyNameFromTopology (TopologyFromTopologyType (((MolinfoTopologyPairPtr) m_fields->data.ptrvalue)->to)); - label = "topology"; - break; - case MolinfoFieldPair_strand: - from_label = StrandNameFromStrand (StrandFromStrandType (((MolinfoStrandPairPtr) m_fields->data.ptrvalue)->from)); - to_label = StrandNameFromStrand (StrandFromStrandType (((MolinfoStrandPairPtr) m_fields->data.ptrvalue)->to)); - label = "strand"; - break; - } - if (from_label == NULL) { - from_label = "Unknown value"; - } - if (to_label == NULL) { - to_label = "Unknown value"; - } - if (label == NULL) { - label = "Unknown molinfo field"; - } - str = (CharPtr) MemNew (sizeof (Char) * (StringLen (type_label_fmt) - + StringLen (label) - + StringLen (from_label) - + StringLen (to_label) - + StringLen (connect_word))); - sprintf (str, type_label_fmt, label, from_label, connect_word, to_label); - break; - case FieldPairType_rna_field: - if (vnp->data.ptrvalue != NULL) { - rna_quals = (RnaQualPairPtr) vnp->data.ptrvalue; - label = SummarizeRnaType (rna_quals->type); - from_label = GetNameForRnaField (rna_quals->field_from); - to_label = GetNameForRnaField (rna_quals->field_to); - } - if (from_label != NULL && to_label != NULL && label != NULL) { - str = (CharPtr) MemNew (sizeof (Char) * (StringLen (type_label_fmt) - + StringLen (label) - + StringLen (from_label) + StringLen (connect_word) + StringLen (to_label))); - sprintf (str, type_label_fmt, label, from_label, connect_word, to_label); - } else { - str = StringSave ("missing field"); - } - label = MemFree (label); - break; + if (sdp == NULL || sdp->choice != Seq_descr_source || userdata == NULL) return; + p = (ObjByMatchPtr) userdata; - default: - str = StringSave ("Invalid field type"); - break; - } + if (IsStringConstraintEmpty (p->scp)) return; + + biop = (BioSourcePtr) sdp->data.ptrvalue; + if (biop != NULL && biop->org != NULL && DoesStringMatchConstraint (biop->org->taxname, p->scp)) { + ValNodeAddPointer (&(p->obj_list), OBJ_SEQDESC, sdp); } - return str; + } -static CharPtr SummarizeApplyAction (ApplyActionPtr a) + +static void GetBioSourcesByTaxNameFeatureCallback (SeqFeatPtr sfp, Pointer userdata) { - CharPtr str = NULL; - CharPtr fmt = "Apply %s to %s (%s)"; - CharPtr nontextqual_fmt = "Apply %s (%s)"; - CharPtr field, existing_text; + ObjByMatchPtr p; + BioSourcePtr biop; - if (a == NULL) { - str = StringSave ("No action"); - } else if (a->value == NULL || a->field == NULL) { - str = StringSave ("Invalid action"); - } else { - field = SummarizeFieldType (a->field); - existing_text = SummarizeExistingText (a->existing_text); - if (IsFieldTypeNonText (a->field)) { - str = (CharPtr) MemNew (sizeof (Char) * StringLen (nontextqual_fmt) + StringLen (field) + StringLen (existing_text)); - sprintf (str, nontextqual_fmt, field, existing_text); - } else { - str = (CharPtr) MemNew (sizeof (Char) * (StringLen (fmt) + StringLen (a->value) + StringLen (field) + StringLen (existing_text))); - sprintf (str, fmt, a->value, field, existing_text); - } - field = MemFree (field); + if (sfp == NULL || sfp->data.choice != SEQFEAT_BIOSRC || userdata == NULL) return; + p = (ObjByMatchPtr) userdata; + + if (IsStringConstraintEmpty (p->scp)) return; + + biop = (BioSourcePtr) sfp->data.value.ptrvalue; + if (biop != NULL && biop->org != NULL && DoesStringMatchConstraint (biop->org->taxname, p->scp)) { + ValNodeAddPointer (&(p->obj_list), OBJ_SEQFEAT, sfp); } - return str; + } -static CharPtr SummarizeEditAction (EditActionPtr a) +static ValNodePtr GetBioSourcesByTaxName (SeqEntryPtr sep, CharPtr taxname, Uint1 match_location) { - CharPtr str = NULL; - CharPtr fmt = "Edit %s replace '%s' with '%s'"; - CharPtr field; + ObjByMatchData d; - if (a == NULL) { - str = StringSave ("No action"); - } else if (a->field == NULL || a->field == NULL || a->edit == NULL || a->edit->find_txt == NULL) { - str = StringSave ("Invalid action"); - } else { - field = SummarizeFieldType (a->field); - str = (CharPtr) MemNew (sizeof (Char) * (StringLen (fmt) + StringLen (field) + StringLen (a->edit->find_txt) + StringLen (a->edit->repl_txt))); - sprintf (str, fmt, field, a->edit->find_txt, a->edit->repl_txt == NULL ? "" : a->edit->repl_txt); - field = MemFree (field); - } - return str; + d.scp = StringConstraintNew (); + d.scp->match_text = StringSave (taxname); + d.scp->match_location = match_location; + d.obj_list = NULL; + VisitDescriptorsInSep (sep, &d, GetBioSourcesByTaxNameDescriptorCallback); + + VisitFeaturesInSep (sep, &d, GetBioSourcesByTaxNameFeatureCallback); + d.scp = StringConstraintFree (d.scp); + return d.obj_list; } -static CharPtr SummarizeConvertAction (ConvertActionPtr a) +typedef struct objbystrinfld { + ValNodePtr obj_list; + FieldTypePtr field; + StringConstraintPtr scp; +} ObjByStrInFldData, PNTR ObjByStrInFldPtr; + + +static void GetBioSourcesBySourceQualDescriptorCallback (SeqDescrPtr sdp, Pointer userdata) { - CharPtr str = NULL; - CharPtr fmt = "Convert %s (%s)"; - CharPtr fields, existing_text; + ObjByStrInFldPtr p; + CharPtr tmp; - if (a == NULL) { - str = StringSave ("No action"); - } else if (a->fields == NULL || a->fields == NULL) { - str = StringSave ("Invalid action"); - } else { - fields = SummarizeFieldPairType (a->fields, "to"); - existing_text = SummarizeExistingText (a->existing_text); - str = (CharPtr) MemNew (sizeof (Char) * (StringLen (fmt) + StringLen (fields) + StringLen (existing_text))); - sprintf (str, fmt, fields, existing_text); - fields = MemFree (fields); + if (sdp == NULL || sdp->choice != Seq_descr_source || userdata == NULL) return; + p = (ObjByStrInFldPtr) userdata; + + if (IsStringConstraintEmpty (p->scp)) return; + + tmp = GetFieldValueForObject (OBJ_SEQDESC, sdp, p->field, p->scp); + if (tmp != NULL) { + ValNodeAddPointer (&(p->obj_list), OBJ_SEQDESC, sdp); } - return str; + tmp = MemFree (tmp); } -static CharPtr SummarizeCopyAction (CopyActionPtr a) +static void GetBioSourcesBySourceQualFeatureCallback (SeqFeatPtr sfp, Pointer userdata) { - CharPtr str = NULL; - CharPtr fmt = "Copy %s (%s)"; - CharPtr fields, existing_text; + ObjByStrInFldPtr p; + CharPtr tmp; - if (a == NULL) { - str = StringSave ("No action"); - } else if (a->fields == NULL) { - str = StringSave ("Invalid action"); - } else { - fields = SummarizeFieldPairType (a->fields, "to"); - existing_text = SummarizeExistingText (a->existing_text); - str = (CharPtr) MemNew (sizeof (Char) * (StringLen (fmt) + StringLen (fields) + StringLen (existing_text))); - sprintf (str, fmt, fields, existing_text); - fields = MemFree (fields); + if (sfp == NULL || sfp->data.choice != SEQFEAT_BIOSRC || userdata == NULL) return; + p = (ObjByStrInFldPtr) userdata; + + if (IsStringConstraintEmpty (p->scp)) return; + + tmp = GetFieldValueForObject (OBJ_SEQFEAT, sfp, p->field, p->scp); + if (tmp != NULL) { + ValNodeAddPointer (&(p->obj_list), OBJ_SEQFEAT, sfp); } - return str; + tmp = MemFree (tmp); } -static CharPtr SummarizeSwapAction (SwapActionPtr a) +static ValNodePtr GetBioSourcesBySourceQual (SeqEntryPtr sep, SourceQualChoicePtr q, CharPtr val, Uint1 match_location) { - CharPtr str = NULL; - CharPtr fmt = "Swap %s"; - CharPtr fields; + ObjByStrInFldData od; - if (a == NULL) { - str = StringSave ("No action"); - } else if (a->fields == NULL) { - str = StringSave ("Invalid action"); - } else { - fields = SummarizeFieldPairType (a->fields, "with"); - str = (CharPtr) MemNew (sizeof (Char) * (StringLen (fmt) + StringLen (fields))); - sprintf (str, fmt, fields); - fields = MemFree (fields); - } - return str; + od.scp = StringConstraintNew(); + od.scp->match_text = StringSave (val); + od.scp->match_location = match_location; + od.obj_list = NULL; + od.field = ValNodeNew (NULL); + od.field->choice = FieldType_source_qual; + od.field->data.ptrvalue = q; + + VisitDescriptorsInSep (sep, &od, GetBioSourcesBySourceQualDescriptorCallback); + + VisitFeaturesInSep (sep, &od, GetBioSourcesBySourceQualFeatureCallback); + + od.field = ValNodeFree (od.field); + od.scp = StringConstraintFree (od.scp); + return od.obj_list; } -static CharPtr SummarizeAECRParseAction (AECRParseActionPtr a) +static void GetBioseqsByIdCallback (BioseqPtr bsp, Pointer data) { - CharPtr str = NULL; - CharPtr fmt = "Parse %s from %s (%s)"; - CharPtr fields, existing_text, text_portion; + ObjByMatchPtr d; + ObjectIdPtr oip; + SeqIdPtr sip; + Boolean found_match = FALSE; + DbtagPtr dbtag; + CharPtr cp, tmp_id; - if (a == NULL) { - str = StringSave ("No action"); - } else if (a->fields == NULL) { - str = StringSave ("Invalid action"); - } else { - fields = SummarizeFieldPairType (a->fields, "to"); - existing_text = SummarizeExistingText (a->existing_text); - text_portion = SummarizeTextPortion (a->portion); - str = (CharPtr) MemNew (sizeof (Char) * (StringLen (fmt) + StringLen (text_portion) + StringLen (fields) + StringLen (existing_text))); - sprintf (str, fmt, text_portion, fields, existing_text); - fields = MemFree (fields); - text_portion = MemFree (text_portion); + if (bsp == NULL || data == NULL || (d = (ObjByMatchPtr) data) == NULL) { + return; + } + + found_match = DoesSeqIDListMeetStringConstraint (bsp->id, d->scp); + + for (sip = bsp->id; sip != NULL && !found_match; sip = sip->next) { + if (sip->choice == SEQID_GENERAL && sip->data.ptrvalue != NULL) { + dbtag = (DbtagPtr) sip->data.ptrvalue; + if (StringCmp (dbtag->db, "NCBIFILE") == 0 && dbtag->tag != NULL) { + if (DoesStringMatchConstraint (dbtag->tag->str, d->scp)) { + found_match = TRUE; + } else if ((cp = StringRChr (dbtag->tag->str, '/')) != NULL) { + tmp_id = (CharPtr) MemNew (sizeof (Char) * (cp - dbtag->tag->str + 1)); + StringNCpy (tmp_id, dbtag->tag->str, cp - dbtag->tag->str); + tmp_id[cp - dbtag->tag->str] = 0; + if (DoesStringMatchConstraint (tmp_id, d->scp)) { + found_match = TRUE; + } + tmp_id = MemFree (tmp_id); + } + } + } else if (sip->choice == SEQID_LOCAL && (oip = sip->data.ptrvalue) != NULL + && StringNICmp (oip->str, "bankit", 6) == 0 + && DoesStringMatchConstraint (oip->str + 6, d->scp)) { + found_match = TRUE; + } + } + if (found_match) { + ValNodeAddPointer (&(d->obj_list), OBJ_BIOSEQ, bsp); } - return str; } -static CharPtr SummarizeRemoveAction (RemoveActionPtr a) +static ValNodePtr FindBioseqsByMatchType (SeqEntryPtr sep, Uint1 match_location, CharPtr match_str) { - CharPtr str = NULL; - CharPtr fmt = "Remove %s"; - CharPtr field; + ObjByMatchData d; - if (a == NULL) { - str = StringSave ("No action"); - } else if (a->field == NULL || a->field == NULL) { - str = StringSave ("Invalid action"); - } else { - field = SummarizeFieldType (a->field); - str = (CharPtr) MemNew (sizeof (Char) * (StringLen (fmt) + StringLen (field))); - sprintf (str, fmt, field); - field = MemFree (field); + if (sep == NULL || StringHasNoText (match_str)) { + return NULL; } - return str; + d.scp = StringConstraintNew (); + d.scp->match_text = StringSave (match_str); + d.scp->match_location = match_location; + d.obj_list = NULL; + VisitBioseqsInSep (sep, &d, GetBioseqsByIdCallback); + d.scp = StringConstraintFree (d.scp); + return d.obj_list; } +typedef struct bioseqsearchitem { + BioseqPtr bsp; + CharPtr str; + Int4 num; + Boolean free_str; +} BioseqSearchItemData, PNTR BioseqSearchItemPtr; -NLM_EXTERN CharPtr SummarizeAECRAction (AECRActionPtr a) +static BioseqSearchItemPtr BioseqSearchItemNewStr (BioseqPtr bsp, CharPtr str, Boolean need_free) { - CharPtr str = NULL, act = NULL, constraint = NULL; - if (a == NULL) { - str = StringSave ("No action"); - } else if (a->action == NULL) { - str = StringSave ("Invalid command"); - } else { - switch (a->action->choice) { - case ActionChoice_apply: - act = SummarizeApplyAction (a->action->data.ptrvalue); - break; - case ActionChoice_edit: - act = SummarizeEditAction (a->action->data.ptrvalue); - break; - case ActionChoice_convert: - act = SummarizeConvertAction (a->action->data.ptrvalue); - break; - case ActionChoice_copy: - act = SummarizeCopyAction (a->action->data.ptrvalue); - break; - case ActionChoice_swap: - act = SummarizeSwapAction (a->action->data.ptrvalue); - break; - case ActionChoice_remove: - act = SummarizeRemoveAction (a->action->data.ptrvalue); - break; - case ActionChoice_parse: - act = SummarizeAECRParseAction (a->action->data.ptrvalue); - break; - } - if (act == NULL) { - str = StringSave ("Invalid action"); - } else { - constraint = SummarizeConstraintSet (a->constraint); - if (constraint == NULL) { - str = act; - } else { - str = (CharPtr) MemNew (sizeof (Char) * (StringLen(act) + 2 + StringLen (constraint))); - sprintf (str, "%s %s", act, constraint); - act = MemFree (act); - constraint = MemFree (constraint); - } - } + BioseqSearchItemPtr bsi; + + bsi = (BioseqSearchItemPtr) MemNew (sizeof (BioseqSearchItemData)); + bsi->bsp = bsp; + bsi->str = str; + bsi->free_str = need_free; + if (IsAllDigits (bsi->str)) { + bsi->num = atoi (bsi->str); } - return str; + return bsi; } -NLM_EXTERN CharPtr SummarizeParseAction (ParseActionPtr p) +static BioseqSearchItemPtr BioseqSearchItemNewInt (BioseqPtr bsp, Int4 num) { - CharPtr field_from = NULL, field_to = NULL; - CharPtr existing_text = NULL, text_portion = NULL; - CharPtr summ = NULL; - CharPtr fmt = "Parse %s from %s to %s (%s)"; + BioseqSearchItemPtr bsi; - if (p == NULL) { - summ = StringSave ("No action"); - } else { - field_from = SummarizeParseSrc (p->src); - field_to = SummarizeParseDst (p->dest); - existing_text = SummarizeExistingText (p->existing_text); - text_portion = SummarizeTextPortion (p->portion); - summ = (CharPtr) MemNew (sizeof (Char) * (StringLen (fmt) + StringLen (text_portion) + StringLen (field_from) + StringLen (field_to) + StringLen (existing_text))); - sprintf (summ, fmt, text_portion, field_from, field_to, existing_text); - text_portion = MemFree (text_portion); - field_from = MemFree (field_from); - field_to = MemFree (field_to); + bsi = (BioseqSearchItemPtr) MemNew (sizeof (BioseqSearchItemData)); + bsi->bsp = bsp; + bsi->num = num; + bsi->free_str = FALSE; + return bsi; +} + + +static BioseqSearchItemPtr BioseqSearchItemFree (BioseqSearchItemPtr bsi) +{ + if (bsi != NULL) { + if (bsi->free_str) { + bsi->str = MemFree (bsi->str); + } + bsi = MemFree (bsi); } - return summ; + return bsi; } -static CharPtr SummarizeAutodefClauseListType (Uint2 clause_list_type) +static ValNodePtr BioseqSearchItemListFree (ValNodePtr vnp) { - CharPtr str = "complete sequence"; + ValNodePtr vnp_next; - switch (clause_list_type) { - case Autodef_list_type_feature_list: - str = "list features"; - break; - case Autodef_list_type_complete_sequence: - str = "complete sequence"; - break; - case Autodef_list_type_complete_genome: - str = "complete genome"; - break; + while (vnp != NULL) { + vnp_next = vnp->next; + vnp->next = NULL; + vnp->data.ptrvalue = BioseqSearchItemFree (vnp->data.ptrvalue); + vnp = ValNodeFree (vnp); + vnp = vnp_next; } - return str; + return vnp; } -NLM_EXTERN CharPtr SummarizeAutodefAction (AutodefActionPtr autodef) +static int CompareBioseqSearchItem (BioseqSearchItemPtr b1, BioseqSearchItemPtr b2) { - CharPtr label = NULL, mod_name; - CharPtr str = NULL; - CharPtr fmt = "Autodef %s"; - CharPtr modifiers_fmt = " with modifier"; - Int4 len; - ValNodePtr mod_names = NULL, vnp; - - if (autodef == NULL) { - str = StringSave ("No action"); - } else { - label = SummarizeAutodefClauseListType (autodef->clause_list_type); - len = StringLen (fmt) + StringLen (label); - if (autodef->modifiers != NULL) { - len += StringLen (modifiers_fmt) + 2; - for (vnp = autodef->modifiers; vnp != NULL; vnp = vnp->next) { - mod_name = GetSourceQualName (vnp->data.intvalue); - len += StringLen (mod_name) + 3; - ValNodeAddPointer (&mod_names, 0, mod_name); - } + if (b1 == NULL && b2 == NULL) { + return 0; + } else if (b1 == NULL) { + return 1; + } else if (b2 == NULL) { + return -1; + } else if (b1->num > 0 && b2->num > 0) { + if (b1->num < b2->num) { + return -1; + } else if (b1->num == b2->num) { + return 0; + } else { + return 1; } - - str = (CharPtr) MemNew (sizeof (Char) * (len + 1)); - sprintf (str, fmt, label); + } else if (b1->num > 0) { + return 1; + } else if (b2->num > 0) { + return -1; + } else { + return StringICmp (b1->str, b2->str); + } +} - if (autodef->modifiers != NULL) { - StringCat (str, modifiers_fmt); - if (autodef->modifiers->next != NULL) { - StringCat (str, "s"); - } - for (vnp = mod_names; vnp != NULL; vnp = vnp->next) { - StringCat (str, " "); - StringCat (str, vnp->data.ptrvalue); - if (vnp->next != NULL) { - StringCat (str, ","); - } - } - } - mod_names = ValNodeFree (mod_names); - } +static int LIBCALLBACK SortVnpByBioseqSearchItem (VoidPtr ptr1, VoidPtr ptr2) - return str; +{ + ValNodePtr vnp1; + ValNodePtr vnp2; + + if (ptr1 == NULL || ptr2 == NULL) return 0; + vnp1 = *((ValNodePtr PNTR) ptr1); + vnp2 = *((ValNodePtr PNTR) ptr2); + if (vnp1 == NULL || vnp2 == NULL) return 0; + + return CompareBioseqSearchItem(vnp1->data.ptrvalue, vnp2->data.ptrvalue); } -NLM_EXTERN CharPtr SummarizeRemoveDescriptorAction (RemoveDescriptorActionPtr a) +NLM_EXTERN void InitValNodeBlock (ValNodeBlockPtr vnbp, ValNodePtr list) { - CharPtr label = NULL; - CharPtr constraint, str; - CharPtr fmt = "Remove %s"; - CharPtr constraint_fmt = "Remove %s descriptors %s"; - - if (a == NULL) { - str = StringSave ("No action"); - } else { - label = GetDescriptorNameFromDescriptorType (a->type); - constraint = SummarizeConstraintSet (a->constraint); - if (constraint == NULL) { - str = (CharPtr) MemNew (sizeof (Char) * (StringLen (fmt) + StringLen (label))); - sprintf (str, fmt, label); - } else { - str = (CharPtr) MemNew (sizeof (Char) * (StringLen (constraint_fmt) + StringLen (label) + StringLen (constraint))); - sprintf (str, constraint_fmt, label, constraint); - constraint = MemFree (constraint); + vnbp->head = list; + vnbp->tail = list; + if (vnbp->tail != NULL) { + while (vnbp->tail->next != NULL) { + vnbp->tail = vnbp->tail->next; } } - - return str; } -NLM_EXTERN CharPtr SummarizeFixPubCapsAction (FixPubCapsActionPtr a) +NLM_EXTERN void ValNodeAddPointerToEnd (ValNodeBlockPtr vnbp, Uint1 choice, Pointer data) { - CharPtr constraint = NULL; - Int4 len = 0; - CharPtr affil = "affiliation"; - CharPtr title = "title"; - CharPtr authors = "authors"; - Boolean first = TRUE; - CharPtr summ = NULL; + ValNodePtr vnp_new; - if (a == NULL) { - return NULL; + vnp_new = ValNodeAddPointer (&(vnbp->tail), choice, data); + if (vnbp->head == NULL) { + vnbp->head = vnp_new; } + vnbp->tail = vnp_new; +} - if (a->title) { - len += 6 + StringLen (title); - } - if (a->authors) { - len += 6 + StringLen (authors); - } - if (a->affiliation) { - len += 6 + StringLen (affil); - } - if (len > 0) { - constraint = SummarizeConstraintSet (a->constraint); - len += StringLen (constraint) + 14; - summ = (CharPtr) MemNew (sizeof (Char) * len); - sprintf (summ, "Fix pub "); - if (a->title) { - StringCat (summ, title); - first = FALSE; - } - if (a->authors) { - if (!first) { - if (a->affiliation) { - StringCat (summ, ", "); - } else { - StringCat (summ, " and "); - } - } - first = FALSE; - StringCat (summ, authors); - } - if (a->affiliation) { - if (!first) { - if (a->title && a->authors) { - StringCat (summ, ", and "); - } else { - StringCat (summ, " and "); - } - } - first = FALSE; - StringCat (summ, affil); - } - if (constraint != NULL) { - StringCat (summ, " where "); - StringCat (summ, constraint); +NLM_EXTERN void ValNodeAddPointerToFront (ValNodeBlockPtr vnbp, Uint1 choice, Pointer data) +{ + ValNodePtr vnp; + + vnp = ValNodeNew (NULL); + vnp->choice = choice; + vnp->data.ptrvalue = data; + vnp->next = vnbp->head; + vnbp->head = vnp; +} + + +NLM_EXTERN void ValNodeLinkToEnd (ValNodeBlockPtr vnbp, ValNodePtr list) +{ + if (vnbp->head == NULL) { + vnbp->head = list; + vnbp->tail = list; + } else { + vnbp->tail->next = list; + while (vnbp->tail->next != NULL) { + vnbp->tail = vnbp->tail->next; } - constraint = MemFree (constraint); } +} - return summ; + +static SeqIdPtr FindLocalId (SeqIdPtr list) +{ + while (list != NULL && list->choice != SEQID_LOCAL) { + list = list->next; + } + return list; } -/* summarizing constraints */ -static CharPtr GetStringLocationPhrase (Uint2 match_location, Boolean not_present) +static void BuildIdStringsListForIdList (SeqIdPtr sip_list, BioseqPtr bsp, ValNodeBlockPtr block) { - CharPtr location_word = NULL; + SeqIdPtr sip, sip_next, local; + CharPtr id, cp, tmp; + DbtagPtr dbtag; + ObjectIdPtr oid; + Int4 len; - switch (match_location) { - case String_location_contains : - if (not_present) { - location_word = "does not contain"; - } else { - location_word = "contains"; - } - break; - case String_location_equals : - if (not_present) { - location_word = "does not equal"; - } else { - location_word = "equals"; - } - break; - case String_location_starts : - if (not_present) { - location_word = "does not start with"; - } else { - location_word = "starts with"; - } - break; - case String_location_ends : - if (not_present) { - location_word = "does not end with"; - } else { - location_word = "ends with"; + for (sip = sip_list; sip != NULL; sip = sip->next) { + sip_next = sip->next; + sip->next = NULL; + id = SeqIdWholeLabel (sip, PRINTID_FASTA_LONG); + sip->next = sip_next; + if (id != NULL) { + /* remove terminating pipe character */ + if (id[StringLen(id) - 1] == '|') + { + id[StringLen(id) - 1] = 0; } - break; - case String_location_inlist : - if (not_present) { - location_word = "is not one of"; + ValNodeAddPointerToEnd (block, 0, BioseqSearchItemNewStr (bsp, id, TRUE)); + + /* remove leading pipe identifier */ + cp = StringChr (id, '|'); + if (cp != NULL) + { + cp = cp + 1; + ValNodeAddPointerToEnd (block, 0, BioseqSearchItemNewStr (bsp, cp, FALSE)); } else { - location_word = "is one of"; + cp = id; } - break; + + if (sip->choice == SEQID_GENBANK + || sip->choice == SEQID_EMBL + || sip->choice == SEQID_DDBJ + || sip->choice == SEQID_TPG + || sip->choice == SEQID_TPE + || sip->choice == SEQID_TPD + || sip->choice == SEQID_PIR + || sip->choice == SEQID_SWISSPROT) { + /* if this is an ID that has a version, try text without version */ + id = StringSave (cp); + cp = StringChr (id, '.'); + if (cp != NULL && IsAllDigits (cp + 1)) + { + *cp = 0; + ValNodeAddPointerToEnd (block, 0, BioseqSearchItemNewStr (bsp, id, TRUE)); + } else { + id = MemFree (id); + } + } + + /* just bankit number */ + if (sip->choice == SEQID_GENERAL + && (dbtag = (DbtagPtr) sip->data.ptrvalue) != NULL) { + if (StringCmp (dbtag->db, "BankIt") == 0) { + if (dbtag->tag->id > 0) { + id = (CharPtr) MemNew (sizeof (Char) * 22); + sprintf (id, "BankIt%d", dbtag->tag->id); + ValNodeAddPointerToEnd (block, 0, BioseqSearchItemNewStr (bsp, id, TRUE)); + ValNodeAddPointerToEnd (block, 0, BioseqSearchItemNewInt (bsp, dbtag->tag->id)); + } else { + id = (CharPtr) MemNew (sizeof (Char) * (8 + StringLen (dbtag->tag->str))); + sprintf (id, "BankIt%s", dbtag->tag->str); + ValNodeAddPointerToEnd (block, 0, BioseqSearchItemNewStr (bsp, id, TRUE)); + ValNodeAddPointerToEnd (block, 0, BioseqSearchItemNewStr (bsp, dbtag->tag->str, FALSE)); + } + /* also look for BankIt id with forward slash instead of _ */ + if ((cp = StringRChr (id, '_')) != NULL) { + len = cp - id; + tmp = StringSave (id); + tmp[len] = '/'; + ValNodeAddPointerToEnd (block, 0, BioseqSearchItemNewStr (bsp, tmp, TRUE)); + } + } else if (StringCmp (dbtag->db, "NCBIFILE") == 0 && dbtag->tag != NULL) { + ValNodeAddPointerToEnd (block, 0, BioseqSearchItemNewStr (bsp, dbtag->tag->str, FALSE)); + if ((local = FindLocalId(bsp->id)) != NULL + && (oid = (ObjectIdPtr) local->data.ptrvalue) != NULL + && oid->str != NULL + && (cp = StringSearch (dbtag->tag->str, oid->str)) == dbtag->tag->str + StringLen (dbtag->tag->str) - StringLen (oid->str)) { + /* file ID already ends with local ID, don't need to add twice, but do add file name */ + id = (CharPtr) MemNew (sizeof (Char) * (cp - dbtag->tag->str)); + StringNCpy (id, dbtag->tag->str, cp - dbtag->tag->str - 1); + ValNodeAddPointerToEnd (block, 0, BioseqSearchItemNewStr (bsp, id, TRUE)); + } else if ((cp = StringRChr (dbtag->tag->str, '/')) != NULL) { + ValNodeAddPointerToEnd (block, 0, BioseqSearchItemNewStr (bsp, cp + 1, FALSE)); + /* also add string for just file name */ + id = (CharPtr) MemNew (sizeof (Char) * (cp - dbtag->tag->str)); + StringNCpy (id, dbtag->tag->str, cp - dbtag->tag->str - 1); + ValNodeAddPointerToEnd (block, 0, BioseqSearchItemNewStr (bsp, id, TRUE)); + } + } + } + } } - return location_word; } -static CharPtr SummarizeStringConstraint (StringConstraintPtr constraint) +static void BuildIDStringsListCallback (BioseqPtr bsp, Pointer data) { - CharPtr location_word = NULL; - CharPtr case_sensitive = "case-sensitive"; - CharPtr whole_word = "whole word"; - CharPtr str = NULL; - Int4 len; - CharPtr fmt = "%s '%s'"; + if (bsp != NULL) { + BuildIdStringsListForIdList (bsp->id, bsp, (ValNodeBlockPtr) data); + } +} - if (constraint == NULL || constraint->match_text == NULL) return NULL; - location_word = GetStringLocationPhrase (constraint->match_location, constraint->not_present); - if (location_word == NULL) return NULL; - len = StringLen (location_word) + StringLen (constraint->match_text) + StringLen (fmt); - if (constraint->case_sensitive) { - len += StringLen (case_sensitive) + 3; - } - if (constraint->whole_word) { - len += StringLen (whole_word) + 3; - } - str = (CharPtr) MemNew (sizeof (Char) * len); - sprintf (str, fmt, location_word, constraint->match_text); - if (constraint->case_sensitive || constraint->whole_word) { - StringCat (str, " ("); - } - if (constraint->case_sensitive) { - StringCat (str, case_sensitive); - if (constraint->whole_word) { - StringCat (str, ", "); +static void AddBankItSingletons (ValNodeBlockPtr list) +{ + BioseqSearchItemPtr item, item2; + ValNodePtr vnp, forw; + CharPtr bankit_str = NULL, cp; + ValNodePtr other_list = NULL; + Int4 len1, len2; + Boolean add_truncated; + + for (vnp = list->head; vnp != NULL; vnp = vnp->next) { + item = (BioseqSearchItemPtr) vnp->data.ptrvalue; + if (item != NULL && StringNICmp (item->str, "BankIt", 6) == 0 + && item->str[6] != '|' + && StringChr (item->str, '_') != NULL) { + ValNodeAddPointer (&other_list, 0, item); } } - if (constraint->whole_word) { - StringCat (str, whole_word); - } - if (constraint->case_sensitive || constraint->whole_word) { - StringCat (str, ")"); + other_list = ValNodeSort (other_list, SortVnpByBioseqSearchItem); + vnp = other_list; + while (vnp != NULL) { + item = (BioseqSearchItemPtr) vnp->data.ptrvalue; + add_truncated = TRUE; + if (vnp->next != NULL) { + item2 = vnp->next->data.ptrvalue; + cp = StringRChr (item->str, '_'); + len1 = cp - item->str; + cp = StringRChr (item2->str, '_'); + len2 = cp - item2->str; + if (len1 == len2 && StringNICmp (item->str, item2->str, len1) == 0) { + add_truncated = FALSE; + forw = vnp->next->next; + while (forw != NULL && (item2 = (BioseqSearchItemPtr) forw->data.ptrvalue) != NULL + && (cp = StringRChr (item2->str, '_')) != NULL + && (len2 = cp - item2->str) == len1 + && StringNICmp (item->str, item2->str, len1) == 0) { + forw = forw->next; + } + vnp = forw; + } + } + if (add_truncated) { + bankit_str = StringSave (item->str); + cp = StringRChr (bankit_str, '_'); + if (cp != NULL) { + *cp = 0; + } + ValNodeAddPointerToEnd (list, 0, BioseqSearchItemNewStr (item->bsp, bankit_str, TRUE)); + vnp = vnp->next; + } } - - return str; + other_list = ValNodeFree (other_list); } + + +/* first are str, second are int */ +typedef struct bioseqsearchindex { + Int4 num_str; + Int4 num_int; + Int4 num_total; + BioseqSearchItemPtr PNTR items; +} BioseqSearchIndexData, PNTR BioseqSearchIndexPtr; -static CharPtr SummarizePartialnessForLocationConstraint (LocationConstraintPtr constraint) +static BioseqSearchIndexPtr BioseqSearchIndexFree (BioseqSearchIndexPtr index) { - if (constraint == NULL - || (constraint->partial5 == Partial_constraint_either - && constraint->partial3 == Partial_constraint_either)) { - return NULL; - } - if (constraint->partial5 == Partial_constraint_either) { - if (constraint->partial3 == Partial_constraint_partial) { - return "that are 3' partial"; - } else { - return "that are 3' complete"; - } - } else if (constraint->partial3 == Partial_constraint_either) { - if (constraint->partial5 == Partial_constraint_partial) { - return "that are 5' partial"; - } else { - return "that are 5' complete"; + Int4 i; + + if (index != NULL) { + for (i = 0; i < index->num_total; i++) { + index->items[i] = BioseqSearchItemFree(index->items[i]); } - } else if (constraint->partial5 == Partial_constraint_partial - && constraint->partial3 == Partial_constraint_partial) { - return "that are partial on both ends"; - } else if (constraint->partial5 == Partial_constraint_complete - && constraint->partial3 == Partial_constraint_complete) { - return "that are complete on both ends"; - } else if (constraint->partial5 == Partial_constraint_complete - && constraint->partial3 == Partial_constraint_partial) { - return "that are 5' complete and 3' partial"; - } else if (constraint->partial5 == Partial_constraint_partial - && constraint->partial3 == Partial_constraint_complete) { - return "that are 5' partial and 3' complete"; - } else { - return NULL; + index->items = MemFree (index->items); + index = MemFree (index); } + return index; } -static CharPtr SummarizeLocationType (LocationConstraintPtr constraint) +static BioseqSearchIndexPtr BuildIDStringsList (SeqEntryPtr sep) { - if (constraint == NULL - || constraint->location_type == Location_type_constraint_any) { - return NULL; - } else if (constraint->location_type == Location_type_constraint_single_interval) { - return "with single interval"; - } else if (constraint->location_type == Location_type_constraint_joined) { - return "with joined intervals"; - } else if (constraint->location_type == Location_type_constraint_ordered) { - return "with ordered intervals"; - } else { - return NULL; - } -} + ValNodeBlock vnb; + ValNodePtr list = NULL, vnp; + Int4 num_total, i; + BioseqSearchIndexPtr index; + vnb.head = NULL; + vnb.tail = NULL; -static CharPtr distance_words[] = { NULL, "exactly", "no more than", "no less than" }; + VisitBioseqsInSep (sep, &vnb, BuildIDStringsListCallback); + AddBankItSingletons(&vnb); + list = vnb.head; + list = ValNodeSort (list, SortVnpByBioseqSearchItem); -static CharPtr SummarizeEndDistance (ValNodePtr vnp, CharPtr end_name) -{ - CharPtr str = NULL; - CharPtr fmt = "with %s %s %d from end of sequence"; + num_total = ValNodeLen (list); - if (vnp == NULL || vnp->choice < 1 || vnp->choice > 3) { - return NULL; + index = (BioseqSearchIndexPtr) MemNew (sizeof (BioseqSearchIndexData)); + index->items = (BioseqSearchItemPtr PNTR) MemNew (sizeof (BioseqSearchItemPtr) * num_total); + for (vnp = list, i = 0; vnp != NULL && i < num_total; vnp = vnp->next, i++) { + index->items[i] = vnp->data.ptrvalue; + vnp->data.ptrvalue = NULL; + if (index->items[i]->num > 0) { + index->num_int++; + } else { + index->num_str++; + } } + index->num_total = index->num_int + index->num_str; + list = ValNodeFree (list); - str = (CharPtr) MemNew (sizeof (Char) * (StringLen (distance_words[vnp->choice]) + StringLen (end_name) + StringLen (fmt) + 15)); - sprintf (str, fmt, end_name, distance_words[vnp->choice], vnp->data.intvalue); - - return str; + return index; } -static CharPtr SummarizeLocationConstraint (LocationConstraintPtr constraint) + +static BioseqPtr FindStringInIdListIndex (CharPtr str, BioseqSearchIndexPtr index) { - CharPtr str = NULL; - CharPtr strand_word = NULL, seq_word = NULL; - CharPtr fmt = "only objects"; - CharPtr partial; - CharPtr location_type; - CharPtr dist5 = NULL, dist3 = NULL; - Int4 len = 0; + CharPtr tmp; + Int4 match, imax, imin, i, j; + Int4 num = -1; - if (IsLocationConstraintEmpty (constraint)) { + if (index == NULL) { return NULL; } - - partial = SummarizePartialnessForLocationConstraint (constraint); - location_type = SummarizeLocationType(constraint); - dist5 = SummarizeEndDistance (constraint->end5, "5' end"); - dist3 = SummarizeEndDistance (constraint->end3, "3' end"); - - if (constraint->seq_type == Seqtype_constraint_nuc) { - seq_word = "nucleotide sequences"; - } else if (constraint->seq_type == Seqtype_constraint_prot) { - seq_word = "protein sequences"; - } - - if (constraint->strand == Strand_constraint_plus) { - strand_word = " on plus strands"; - } else if (constraint->strand == Strand_constraint_minus) { - strand_word = " on minus strands"; - } - - len = StringLen (fmt) + 1; - if (strand_word != NULL) { - len += StringLen (strand_word); - } - if (seq_word != NULL) { - len += StringLen (seq_word) + 4; - } - if (partial != NULL) { - len += StringLen (partial) + 2; - } - if (location_type != NULL) { - len += StringLen (location_type) + 2; - } - if (dist5 != NULL) { - len += StringLen (dist5) + 1; - } - if (dist3 != NULL) { - len += StringLen (dist3) + 1; - } - str = (CharPtr) MemNew (sizeof (Char) * len); - sprintf (str, "%s", fmt); - if (strand_word == NULL && seq_word != NULL) { - StringCat (str, " on "); - StringCat (str, seq_word); - } else if (strand_word != NULL) { - StringCat (str, strand_word); - if (seq_word != NULL) { - StringCat (str, " of "); - StringCat (str, seq_word); + if (IsAllDigits (str)) { + match = atoi (str); + imax = index->num_total - 1; + imin = index->num_str; + while (imax >= imin) + { + i = (imax + imin)/2; + if (index->items[i]->num > match) + imax = i - 1; + else if (index->items[i]->num < match) + imin = i + 1; + else + { + num = i; + break; + } + } + + } else { + imax = index->num_str - 1; + imin = 0; + while (imax >= imin) + { + i = (imax + imin)/2; + tmp = index->items[i]->str; + if ((j = StringICmp(tmp, str)) > 0) + imax = i - 1; + else if (j < 0) + imin = i + 1; + else + { + num = i; + break; + } } - } - if (partial != NULL) { - StringCat (str, " "); - StringCat (str, partial); - } - if (location_type != NULL) { - StringCat (str, " "); - StringCat (str, location_type); } - if (dist5 != NULL) { - StringCat (str, " "); - StringCat (str, dist5); - dist5 = MemFree (dist5); - } - if (dist3 != NULL) { - StringCat (str, " "); - StringCat (str, dist3); - dist3 = MemFree (dist3); + if (num > -1) { + return index->items[num]->bsp; + } else { + return NULL; } - - return str; } -static CharPtr SummarizeSourceConstraint (SourceConstraintPtr constraint) +static ValNodePtr FindListInIdListIndex (Uint1 match_location, CharPtr match_str, BioseqSearchIndexPtr index) { - CharPtr string, intro = NULL, field1, field2; - CharPtr match_fmt = "%s %s matches %s"; - CharPtr present_fmt = "%s %s is present"; - CharPtr text_fmt = "%s text %s"; - CharPtr two_match_fmt = "%s %s matches %s and %s %s"; - CharPtr one_match_fmt = "%s %s %s"; - CharPtr summ = NULL; - - if (constraint == NULL) return NULL; - - string = SummarizeStringConstraint (constraint->constraint); - field1 = SummarizeSourceQual (constraint->field1); - field2 = SummarizeSourceQual (constraint->field2); - - if (constraint->field1 == NULL && constraint->field2 == NULL && string == NULL) { - if (constraint->type_constraint == Object_type_constraint_feature) { - summ = StringSave ("where source is a feature"); - } else if (constraint->type_constraint == Object_type_constraint_descriptor) { - summ = StringSave ("where source is a descriptor"); - } - } else { - if (constraint->type_constraint == Object_type_constraint_any) { - intro = "where source"; - } else if (constraint->type_constraint == Object_type_constraint_feature) { - intro = "where source feature"; - } else if (constraint->type_constraint == Object_type_constraint_descriptor) { - intro = "where source descriptor"; - } else { - string = MemFree (string); - field1 = MemFree (field1); - field2 = MemFree (field2); - return NULL; - } + Int4 i; + ValNodePtr list = NULL; + StringConstraintPtr scp; - if (string == NULL) { - if (field1 == NULL && field2 == NULL) { - if (constraint->type_constraint == Object_type_constraint_feature) { - summ = StringSave ("where source is a feature"); - } else if (constraint->type_constraint == Object_type_constraint_descriptor) { - summ = StringSave ("where source is a descriptor"); - } - } else if (field1 != NULL && field2 != NULL) { - summ = (CharPtr) MemNew (sizeof (Char) * (StringLen (match_fmt) + StringLen (intro) + StringLen (field1) + StringLen (field2))); - sprintf (summ, match_fmt, intro, field1, field2); - } else if (field1 != NULL) { - summ = (CharPtr) MemNew (sizeof (Char) * (StringLen (present_fmt) + StringLen (intro) + StringLen (field1))); - sprintf (summ, present_fmt, intro, field1); - } else if (field2 != NULL) { - summ = (CharPtr) MemNew (sizeof (Char) * (StringLen (present_fmt) + StringLen (intro) + StringLen (field2))); - sprintf (summ, present_fmt, intro, field2); - } - } else { - if (field1 == NULL && field2 == NULL) { - summ = (CharPtr) MemNew (sizeof (Char) * (StringLen (text_fmt) + StringLen (intro) + StringLen (string))); - sprintf (summ, text_fmt, intro, string); - } else if (field1 != NULL && field2 != NULL) { - summ = (CharPtr) MemNew (sizeof (Char) * (StringLen (two_match_fmt) + StringLen (intro) - + 2 * StringLen (field1) + StringLen (field2) + StringLen (string))); - sprintf (summ, two_match_fmt, intro, field1, field2, field1, string); - } else if (field1 != NULL) { - summ = (CharPtr) MemNew (sizeof (Char) * (StringLen (one_match_fmt) + StringLen (intro) + StringLen (field1) + StringLen (string))); - sprintf (summ, one_match_fmt, intro, field1, string); - } else if (field2 != NULL) { - summ = (CharPtr) MemNew (sizeof (Char) * (StringLen (one_match_fmt) + StringLen (intro) + StringLen (field2) + StringLen (string))); - sprintf (summ, one_match_fmt, intro, field2, string); - } + if (StringHasNoText (match_str) || index == NULL) { + return NULL; + } + scp = StringConstraintNew (); + scp->match_text = StringSave (match_str); + scp->match_location = match_location; + for (i = 0; i < index->num_str; i++) { + if (DoesStringMatchConstraint (index->items[i]->str, scp)) { + ValNodeAddPointer (&list, OBJ_BIOSEQ, index->items[i]->bsp); } } - string = MemFree (string); - field1 = MemFree (field1); - field2 = MemFree (field2); - return summ; -} + scp = StringConstraintFree (scp); + list = ValNodeSort (list, SortVnpByChoiceAndPtrvalue); + ValNodeUnique (&list, SortVnpByChoiceAndPtrvalue, ValNodeFree); + return list; +} -static CharPtr SummarizeCDSGeneProtPseudoConstraint (CDSGeneProtPseudoConstraintPtr constraint) +static ValNodePtr +FindMatchForRowEx +(MatchTypePtr match_type, + CharPtr match_str, + Uint2 entityID, + SeqEntryPtr sep, + BioseqSearchIndexPtr index + ) { - CharPtr summ = NULL, pseudo_feat; - CharPtr is_pseudo_fmt = "where %s is pseudo"; - CharPtr not_pseudo_fmt = "where %s is not pseudo"; + ValNodePtr match_list = NULL; + FindGeneLocusTagData fd; + SeqFeatPtr sfp; + SeqMgrFeatContext fcontext; + BioseqPtr bsp; - if (constraint != NULL) { - pseudo_feat = CDSGeneProtFeatureNameFromFeatureType (constraint->feature); - if (pseudo_feat != NULL) { - if (constraint->is_pseudo) { - summ = (CharPtr) MemNew (sizeof (Char) * (StringLen (is_pseudo_fmt) + StringLen (pseudo_feat))); - sprintf (summ, is_pseudo_fmt, pseudo_feat); + if (match_type == NULL || sep == NULL) return NULL; + + switch (match_type->choice) { + case eTableMatchFeatureID: + sfp = SeqMgrGetFeatureByFeatID (entityID, NULL, match_str, NULL, &fcontext); + if (sfp != NULL) { + ValNodeAddPointer (&match_list, OBJ_SEQFEAT, sfp); + } + break; + case eTableMatchGeneLocusTag: + fd.locus_tag = match_str; + fd.gene_list = NULL; + VisitBioseqsInSep (sep, &fd, FindGeneByLocusTagBioseqCallback); + ValNodeLink (&match_list, fd.gene_list); + break; + case eTableMatchProteinID: + case eTableMatchNucID: + if (match_type->match_location == String_location_equals && index != NULL) { + bsp = FindStringInIdListIndex (match_str, index); + if (bsp != NULL) { + ValNodeAddPointer (&match_list, OBJ_BIOSEQ, bsp); + } + } else if (index != NULL) { + ValNodeLink (&match_list, FindListInIdListIndex (match_type->match_location, match_str, index)); } else { - summ = (CharPtr) MemNew (sizeof (Char) * (StringLen (not_pseudo_fmt) + StringLen (pseudo_feat))); - sprintf (summ, not_pseudo_fmt, pseudo_feat); + ValNodeLink (&match_list, FindBioseqsByMatchType (sep, match_type->match_location, match_str)); } - } + break; + case eTableMatchDbxref: + match_list = GetFeaturesByDbxref (sep, match_str, match_type->match_location); + break; + case eTableMatchBioSource: + match_list = GetBioSourcesByTaxName (sep, match_str, match_type->match_location); + break; + case eTableMatchSourceQual: + match_list = GetBioSourcesBySourceQual (sep, match_type->data, match_str, match_type->match_location); + break; } - - return summ; + return match_list; } -static CharPtr SummarizeCDSGeneProtQualConstraint (CDSGeneProtQualConstraintPtr constraint) +static ValNodePtr +FindMatchForRow +(MatchTypePtr match_type, + CharPtr match_str, + Uint2 entityID, + SeqEntryPtr sep + ) { - CharPtr string, field1 = NULL, field2 = NULL; - CharPtr match_fmt = "where %s matches %s"; - CharPtr present_fmt = "where %s is present"; - CharPtr text_fmt = "where CDS-gene-prot text %s"; - CharPtr two_match_fmt = "where %s matches %s and %s %s"; - CharPtr one_match_fmt = "where %s %s"; - CharPtr summ = NULL; + return FindMatchForRowEx (match_type, match_str, entityID, sep, NULL); +} - if (constraint == NULL) return NULL; - string = SummarizeStringConstraint (constraint->constraint); - if (constraint->field1 != NULL && constraint->field1->choice == CDSGeneProtConstraintField_field) { - field1 = CDSGeneProtNameFromField (constraint->field1->data.intvalue); - } - if (constraint->field2 != NULL && constraint->field2->choice == CDSGeneProtConstraintField_field) { - field2 = CDSGeneProtNameFromField (constraint->field2->data.intvalue); +static ValNodePtr GetFeatureListForProteinBioseq (Uint1 featdef, BioseqPtr bsp) +{ + ValNodePtr feat_list = NULL; + SeqFeatPtr sfp, cds; + SeqMgrFeatContext fcontext; + Int4 seqfeattype; + + if (bsp == NULL || !ISA_aa (bsp->mol)) + { + return NULL; } - if (string == NULL) { - if (field1 != NULL && field2 != NULL) { - summ = (CharPtr) MemNew (sizeof (Char) * (StringLen (match_fmt) + StringLen (field1) + StringLen (field2))); - sprintf (summ, match_fmt, field1, field2); - } else if (field1 != NULL) { - summ = (CharPtr) MemNew (sizeof (Char) * (StringLen (present_fmt) + StringLen (field1))); - sprintf (summ, present_fmt, field1); - } else if (field2 != NULL) { - summ = (CharPtr) MemNew (sizeof (Char) * (StringLen (present_fmt) + StringLen (field2))); - sprintf (summ, present_fmt, field2); + seqfeattype = FindFeatFromFeatDefType (featdef); + if (seqfeattype == SEQFEAT_PROT) + { + for (sfp = SeqMgrGetNextFeature (bsp, NULL, 0, featdef, &fcontext); + sfp != NULL; + sfp = SeqMgrGetNextFeature (bsp, sfp, 0, featdef, &fcontext)) + { + ValNodeAddPointer (&feat_list, OBJ_SEQFEAT, sfp); } - } else { - if (field1 == NULL && field2 == NULL) { - summ = (CharPtr) MemNew (sizeof (Char) * (StringLen (text_fmt) + StringLen (string))); - sprintf (summ, text_fmt, string); - } else if (field1 != NULL && field2 != NULL) { - summ = (CharPtr) MemNew (sizeof (Char) * (StringLen (two_match_fmt) - + 2 * StringLen (field1) + StringLen (field2) + StringLen (string))); - sprintf (summ, two_match_fmt, field1, field2, field1, string); - } else if (field1 != NULL) { - summ = (CharPtr) MemNew (sizeof (Char) * (StringLen (one_match_fmt) + StringLen (field1) + StringLen (string))); - sprintf (summ, one_match_fmt, field1, string); - } else if (field2 != NULL) { - summ = (CharPtr) MemNew (sizeof (Char) * (StringLen (one_match_fmt) + StringLen (field2) + StringLen (string))); - sprintf (summ, one_match_fmt, field2, string); + } + else + { + cds = SeqMgrGetCDSgivenProduct (bsp, NULL); + if (cds != NULL) + { + if (featdef == FEATDEF_CDS) + { + sfp = cds; + } + else if (featdef == FEATDEF_GENE) + { + sfp = GetGeneForFeature (cds); + } + else if (featdef == FEATDEF_mRNA) + { + sfp = GetmRNAforCDS (cds); + } + if (sfp != NULL) + { + ValNodeAddPointer (&feat_list, OBJ_SEQFEAT, sfp); + } } } - - string = MemFree (string); - /* note - field1 and field2 aren't allocated, so we don't need to free them */ - - return summ; + return feat_list; } -const CharPtr s_QuantityWords [] = { "exactly", "more than", "less than" }; -const Int4 k_NumQuantityWords = sizeof (s_QuantityWords) / sizeof (CharPtr); - -static CharPtr SummarizeFeatureQuantity (ValNodePtr v) +static ValNodePtr GetFeatureListForNucleotideBioseq (Uint1 featdef, BioseqPtr bsp) { - CharPtr fmt = "sequence has %s %d feature%s"; - CharPtr summ = NULL; + ValNodePtr feat_list = NULL; + SeqFeatPtr sfp; + SeqMgrFeatContext fcontext; + Int4 seqfeattype; + BioseqPtr prot_bsp; - if (v == NULL || v->choice < 1 || v->choice > k_NumQuantityWords) { + if (bsp == NULL || ISA_aa (bsp->mol)) + { return NULL; } - - summ = (CharPtr) MemNew (sizeof (Char) * (StringLen (fmt) + StringLen (s_QuantityWords[v->choice - 1]) + 15)); - sprintf (summ, fmt, s_QuantityWords[v->choice - 1], v->data.intvalue, v->data.intvalue == 1 ? "" : "s"); - return summ; + + seqfeattype = FindFeatFromFeatDefType (featdef); + if (seqfeattype == SEQFEAT_PROT) + { + for (sfp = SeqMgrGetNextFeature (bsp, NULL, 0, FEATDEF_CDS, &fcontext); + sfp != NULL; + sfp = SeqMgrGetNextFeature (bsp, sfp, 0, FEATDEF_CDS, &fcontext)) + { + prot_bsp = BioseqFindFromSeqLoc (sfp->product); + ValNodeLink (&feat_list, GetFeatureListForProteinBioseq (featdef, prot_bsp)); + } + } + else + { + for (sfp = SeqMgrGetNextFeature (bsp, NULL, 0, featdef, &fcontext); + sfp != NULL; + sfp = SeqMgrGetNextFeature (bsp, sfp, 0, featdef, &fcontext)) + { + ValNodeAddPointer (&feat_list, OBJ_SEQFEAT, sfp); + } + } + return feat_list; } -static CharPtr SummarizeSequenceConstraint (SequenceConstraintPtr constraint) +static ValNodePtr GetFeaturesForGene (SeqFeatPtr gene, Uint1 featdef) { - CharPtr summ = NULL; - CharPtr seq_word = NULL, featpresent = NULL, id = NULL; - Int4 len = 0; - CharPtr seq_word_intro = "where sequence type is "; - CharPtr feat_after = " is present"; - CharPtr id_intro = "sequence ID "; - CharPtr feat_quantity = NULL; - - if (IsSequenceConstraintEmpty (constraint)) { - summ = StringSave ("Missing sequence constraint"); - } else { - if (constraint->seqtype != NULL && constraint->seqtype->choice != SequenceConstraintMolTypeConstraint_any) { - switch (constraint->seqtype->choice) { - case SequenceConstraintMolTypeConstraint_nucleotide: - seq_word = "nucleotide"; - break; - case SequenceConstraintMolTypeConstraint_dna: - seq_word = "DNA"; - break; - case SequenceConstraintMolTypeConstraint_rna: - if (constraint->seqtype->data.intvalue == Sequence_constraint_rnamol_any) { - seq_word = "RNA"; - } else { - seq_word = GetBiomolNameForRnaType (constraint->seqtype->data.intvalue); - } - break; - case SequenceConstraintMolTypeConstraint_protein: - seq_word = "protein"; - break; - } - } + BioseqPtr bsp; + SeqFeatPtr sfp; + ValNodePtr feat_list = NULL; + SeqMgrFeatContext fcontext; + Int4 start, stop, swap; - if (constraint->feature != Feature_type_any) { - featpresent = GetFeatureNameFromFeatureType (constraint->feature); - } + if (gene == NULL) return NULL; - if (!IsStringConstraintEmpty (constraint->id)) { - id = SummarizeStringConstraint (constraint->id); - } - - if (seq_word != NULL) { - len += StringLen (seq_word) + StringLen (seq_word_intro); + bsp = BioseqFindFromSeqLoc (gene->location); + start = SeqLocStart (gene->location); + stop = SeqLocStop (gene->location); + if (stop < start) + { + swap = start; + start = stop; + stop = swap; + } + for (sfp = SeqMgrGetNextFeature (bsp, NULL, 0, featdef, &fcontext); + sfp != NULL && fcontext.left < stop; + sfp = SeqMgrGetNextFeature (bsp, sfp, 0, featdef, &fcontext)) + { + if (fcontext.right >= start && gene == GetGeneForFeature (sfp)) + { + ValNodeAddPointer (&feat_list, OBJ_SEQFEAT, sfp); } + } + return feat_list; +} - if (featpresent != NULL) { - if (len == 0) { - len += 6; - } else { - len += 5; - } - len += StringLen (featpresent); - len += StringLen (feat_after); - } - if (id != NULL) { - if (len == 0) { - len += 6; - } else { - len += 5; - } - len += StringLen (id_intro); - len += StringLen (id); - } +static ValNodePtr GetFeatureListForGene (Uint1 featdef, SeqFeatPtr gene) +{ + ValNodePtr feat_list = NULL, cds_list, vnp; + SeqFeatPtr sfp, cds; + SeqMgrFeatContext fcontext; + BioseqPtr protbsp; - feat_quantity = SummarizeFeatureQuantity (constraint->num_features); - if (feat_quantity != NULL) { - len += StringLen (feat_quantity) + 6; - } + if (gene == NULL) + { + return NULL; + } - if (len == 0) { - summ = StringSave ("missing sequence constraint"); - } else { - len++; - summ = (CharPtr) MemNew (sizeof (Char) * len); - summ[0] = 0; - if (seq_word != NULL) { - StringCat (summ, seq_word_intro); - StringCat (summ, seq_word); - } - if (featpresent != NULL) { - if (seq_word == NULL) { - StringCat (summ, "where "); - } else { - StringCat (summ, " and "); - } - StringCat (summ, featpresent); - StringCat (summ, feat_after); - } - if (id != NULL) { - if (seq_word == NULL && featpresent == NULL) { - StringCat (summ, "where "); - } else { - StringCat (summ, " and "); + if (featdef == FEATDEF_GENE) + { + ValNodeAddPointer (&feat_list, OBJ_SEQFEAT, gene); + } + else if (FindFeatFromFeatDefType (featdef == SEQFEAT_PROT)) + { + cds_list = GetFeaturesForGene (gene, FEATDEF_CDS); + for (vnp = cds_list; vnp != NULL; vnp = vnp->next) + { + cds = vnp->data.ptrvalue; + if (cds != NULL) + { + protbsp = BioseqFindFromSeqLoc (cds->product); + for (sfp = SeqMgrGetNextFeature (protbsp, NULL, 0, featdef, &fcontext); + sfp != NULL; + sfp = SeqMgrGetNextFeature (protbsp, sfp, 0, featdef, &fcontext)) + { + ValNodeAddPointer (&feat_list, OBJ_SEQFEAT, sfp); } - StringCat (summ, id_intro); - StringCat (summ, id); } - if (feat_quantity != NULL) { - if (StringHasNoText (summ)) { - StringCat (summ, "where "); - } else { - StringCat (summ, " and "); + } + cds_list = ValNodeFree (cds_list); + } + else + { + feat_list = GetFeaturesForGene (gene, featdef); + } + + return feat_list; +} + + +static ValNodePtr AddFeaturesFromBioseqSet (BioseqSetPtr bssp, Uint1 featdef) +{ + SeqEntryPtr sep; + BioseqPtr bsp; + Int4 seqfeattype; + ValNodePtr item_list = NULL; + + if (bssp == NULL) return NULL; + + seqfeattype = FindFeatFromFeatDefType (featdef); + for (sep = bssp->seq_set; sep != NULL; sep = sep->next) { + if (sep->data.ptrvalue == NULL) continue; + if (IS_Bioseq (sep)) { + bsp = sep->data.ptrvalue; + if (seqfeattype == SEQFEAT_PROT) { + if (ISA_aa (bsp->mol)) { + ValNodeLink (&item_list, GetFeatureListForProteinBioseq (featdef, bsp)); } - StringCat (summ, feat_quantity); + } else if (!ISA_aa (bsp->mol)) { + ValNodeLink (&item_list, GetFeatureListForNucleotideBioseq (featdef, bsp)); } + } else if (IS_Bioseq_set (sep)) { + ValNodeLink (&item_list, AddFeaturesFromBioseqSet (sep->data.ptrvalue, featdef)); } - id = MemFree (id); - feat_quantity = MemFree (feat_quantity); } - return summ; + return item_list; } -const CharPtr s_SpecialPubFieldWords [] = { "is present", "is not present", "is all caps" }; -const Int4 k_NumSpecialPubFieldWords = sizeof (s_SpecialPubFieldWords) / sizeof (CharPtr); - -static CharPtr SummarizePubFieldSpecialConstraint (PubFieldSpecialConstraintPtr field) +static ValNodePtr GetFeatureListForBioSourceObjects (ValNodePtr item_list, FeatureFieldPtr field) { - CharPtr fmt = "%s %s"; - CharPtr label, summ = NULL; + ValNodePtr vnp; + SeqFeatPtr sfp; + SeqDescrPtr sdp; + BioseqPtr bsp; + ObjValNodePtr ovp; + ValNodePtr feature_list = NULL; - if (field == NULL || field->constraint == NULL - || field->constraint->choice < 1 - || field->constraint->choice > k_NumSpecialPubFieldWords) { - return NULL; + if (item_list == NULL || field == NULL) return NULL; + + for (vnp = item_list; vnp != NULL; vnp = vnp->next) { + if (vnp->choice == OBJ_SEQFEAT) { + sfp = vnp->data.ptrvalue; + if (sfp != NULL) { + bsp = BioseqFindFromSeqLoc (sfp->location); + ValNodeLink (&feature_list, GetFeatureListForNucleotideBioseq (GetFeatdefFromFeatureType(field->type), bsp)); + } + } else if (vnp->choice == OBJ_SEQDESC) { + sdp = vnp->data.ptrvalue; + if (sdp != NULL && sdp->extended != 0) { + ovp = (ObjValNodePtr) sdp; + if (ovp->idx.parenttype == OBJ_BIOSEQSET) { + ValNodeLink (&feature_list, AddFeaturesFromBioseqSet (ovp->idx.parentptr, GetFeatdefFromFeatureType(field->type))); + } else if (ovp->idx.parenttype == OBJ_BIOSEQ) { + bsp = (BioseqPtr) ovp->idx.parentptr; + ValNodeLink (&feature_list, GetFeatureListForNucleotideBioseq (GetFeatdefFromFeatureType(field->type), bsp)); + } + } + } } - - label = GetPubFieldLabel (field->field); - - summ = (CharPtr) MemNew (sizeof (Char) * (StringLen (fmt) + StringLen (label) - + StringLen (s_SpecialPubFieldWords[field->constraint->choice - 1]))); - sprintf (summ, fmt, label, s_SpecialPubFieldWords[field->constraint->choice - 1]); - return summ; + return feature_list; } -static CharPtr SummarizePubFieldConstraint (PubFieldConstraintPtr field) +NLM_EXTERN ValNodePtr ValNodeCopyPtr (ValNodePtr orig) { - CharPtr fmt = "%s %s", summ = NULL; - CharPtr string, label; + ValNodePtr new_list = NULL, last_vnp = NULL, vnp; - if (field == NULL || field->constraint == NULL) { - return NULL; + while (orig != NULL) { + vnp = ValNodeNew (NULL); + vnp->choice = orig->choice; + vnp->data.ptrvalue = orig->data.ptrvalue; + if (last_vnp == NULL) { + new_list = vnp; + } else { + last_vnp->next = vnp; + } + last_vnp = vnp; + orig = orig->next; } - - string = SummarizeStringConstraint (field->constraint); - label = GetPubFieldLabel (field->field); - - summ = (CharPtr) MemNew (sizeof (Char) * (StringLen (fmt) + StringLen (label) + StringLen (string))); - sprintf (summ, fmt, label, string); - string = MemFree (string); - return summ; + return new_list; } -static CharPtr SummarizePublicationConstraint (PublicationConstraintPtr constraint) +static ValNodePtr GetFeatureListForRowAndColumn (MatchTypePtr match_type, ValNodePtr match_list, FeatureFieldPtr field) { - CharPtr type = NULL, field = NULL, special = NULL, summ = NULL; - Boolean first = TRUE; - Int4 len; + ValNodePtr feature_list = NULL, vnp; - if (IsPublicationConstraintEmpty (constraint)) return NULL; + if (match_list == NULL || field == NULL || match_type == NULL) return NULL; - switch (constraint->type) { - case Pub_type_published: - type = "pub is published"; + switch (match_type->choice) { + case eTableMatchFeatureID: + feature_list = ValNodeCopyPtr (match_list); break; - case Pub_type_unpublished: - type = "pub is unpublished"; + case eTableMatchGeneLocusTag: + for (vnp = match_list; vnp != NULL; vnp = vnp->next) { + ValNodeLink (&feature_list, GetFeatureListForGene (GetFeatdefFromFeatureType(field->type), vnp->data.ptrvalue)); + } break; - case Pub_type_in_press: - type = "pub is in press"; + case eTableMatchProteinID: + for (vnp = match_list; vnp != NULL; vnp = vnp->next) { + ValNodeLink (&feature_list, GetFeatureListForProteinBioseq (GetFeatdefFromFeatureType(field->type), vnp->data.ptrvalue)); + } break; - case Pub_type_submitter_block: - type = "pub is submitter block"; + case eTableMatchDbxref: + feature_list = ValNodeCopyPtr (match_list); + break; + case eTableMatchNucID: + for (vnp = match_list; vnp != NULL; vnp = vnp->next) { + ValNodeLink (&feature_list, GetFeatureListForNucleotideBioseq (GetFeatdefFromFeatureType(field->type), vnp->data.ptrvalue)); + } + break; + case eTableMatchBioSource: + case eTableMatchSourceQual: + ValNodeLink (&feature_list, GetFeatureListForBioSourceObjects (match_list, field)); break; } + return feature_list; +} - field = SummarizePubFieldConstraint (constraint->field); - special = SummarizePubFieldSpecialConstraint (constraint->special_field); - if (type == NULL && field == NULL && special == NULL) { - return NULL; - } - - len = 17 + StringLen (type) + StringLen (field) + StringLen (special); - summ = (CharPtr) MemNew (sizeof (Char) * len); - sprintf (summ, "where "); - if (type != NULL) { - StringCat (summ, type); - first = FALSE; - } - if (field != NULL) { - if (!first) { - StringCat (summ, " and "); - } - StringCat (summ, field); - first = FALSE; - } +static void AddBioSourcesForBioseq (BioseqPtr bsp, ValNodePtr PNTR feature_list) +{ + SeqDescrPtr sdp; + SeqMgrDescContext context; + Boolean any = FALSE; + SeqEntryPtr sep; - if (special != NULL) { - if (!first) { - StringCat (summ, " and "); - } - StringCat (summ, special); - first = FALSE; + if (bsp == NULL || feature_list == NULL) return; + for (sdp = SeqMgrGetNextDescriptor (bsp, NULL, Seq_descr_source, &context); + sdp != NULL; + sdp = SeqMgrGetNextDescriptor (bsp, sdp, Seq_descr_source, &context)) { + ValNodeAddPointer (feature_list, OBJ_SEQDESC, sdp); + any = TRUE; + } + if (!any && !ISA_aa (bsp->mol)) { + sep = GetBestTopParentForData (bsp->idx.entityID, bsp); + sdp = CreateNewDescriptor (sep, Seq_descr_source); + sdp->data.ptrvalue = BioSourceNew (); + ValNodeAddPointer (feature_list, OBJ_SEQDESC, sdp); } - - field = MemFree (field); - special = MemFree (special); - - return summ; } - -static CharPtr SummarizeFieldConstraint (FieldConstraintPtr constraint) +static void AddBioSourcesForFeature (SeqFeatPtr sfp, ValNodePtr PNTR feature_list) { - CharPtr rval = NULL; - CharPtr string = NULL, label = NULL; - CharPtr fmt = "where %s %s"; - - if (IsFieldConstraintEmpty (constraint)) return NULL; - - string = SummarizeStringConstraint (constraint->string_constraint); - label = SummarizeFieldType (constraint->field); + BioseqPtr bsp; - if (string != NULL && label != NULL) { - rval = (CharPtr) MemNew (sizeof (Char) * (StringLen (fmt) + StringLen (label) + StringLen (string))); - sprintf (rval, fmt, label, string); - } - string = MemFree (string); - label = MemFree (label); + if (sfp == NULL || feature_list == NULL) return; - return rval; + if (sfp->data.choice == SEQFEAT_BIOSRC) { + ValNodeAddPointer (feature_list, OBJ_SEQFEAT, sfp); + } else { + bsp = BioseqFindFromSeqLoc (sfp->location); + AddBioSourcesForBioseq (bsp, feature_list); + } } -NLM_EXTERN CharPtr SummarizeConstraint (ValNodePtr constraint) +static ValNodePtr GetBioSourceListForRowAndColumn (MatchTypePtr match_type, ValNodePtr match_list, FeatureFieldPtr field) { - CharPtr phrase = NULL, tmp; - CharPtr fmt = "where object text %s"; + ValNodePtr feature_list = NULL, vnp; - if (constraint == NULL) return NULL; - switch (constraint->choice) { - case ConstraintChoice_string: - tmp = SummarizeStringConstraint (constraint->data.ptrvalue); - if (tmp != NULL) { - phrase = (CharPtr) MemNew (sizeof (Char) * (StringLen (tmp) + StringLen (fmt))); - sprintf (phrase, fmt, tmp); - tmp = MemFree (tmp); + if (match_list == NULL || field == NULL || match_type == NULL) return NULL; + + switch (match_type->choice) { + case eTableMatchFeatureID: + for (vnp = match_list; vnp != NULL; vnp = vnp->next) { + if (vnp->choice == OBJ_SEQFEAT && vnp->data.ptrvalue != NULL) { + AddBioSourcesForFeature (vnp->data.ptrvalue, &feature_list); + } } break; - case ConstraintChoice_location: - phrase = SummarizeLocationConstraint (constraint->data.ptrvalue); - break; - case ConstraintChoice_source: - phrase = SummarizeSourceConstraint (constraint->data.ptrvalue); - break; - case ConstraintChoice_cdsgeneprot_qual: - phrase = SummarizeCDSGeneProtQualConstraint (constraint->data.ptrvalue); - break; - case ConstraintChoice_cdsgeneprot_pseudo: - phrase = SummarizeCDSGeneProtPseudoConstraint (constraint->data.ptrvalue); + case eTableMatchGeneLocusTag: + for (vnp = match_list; vnp != NULL; vnp = vnp->next) { + if (vnp->choice == OBJ_SEQFEAT && vnp->data.ptrvalue != NULL) { + AddBioSourcesForFeature (vnp->data.ptrvalue, &feature_list); + } + } break; - case ConstraintChoice_sequence: - phrase = SummarizeSequenceConstraint (constraint->data.ptrvalue); + case eTableMatchProteinID: + case eTableMatchNucID: + for (vnp = match_list; vnp != NULL; vnp = vnp->next) { + if (vnp->choice == OBJ_BIOSEQ) { + AddBioSourcesForBioseq (vnp->data.ptrvalue, &feature_list); + } + } break; - case ConstraintChoice_pub: - phrase = SummarizePublicationConstraint (constraint->data.ptrvalue); + case eTableMatchDbxref: + for (vnp = match_list; vnp != NULL; vnp = vnp->next) { + if (vnp->choice == OBJ_SEQFEAT && vnp->data.ptrvalue != NULL) { + AddBioSourcesForFeature (vnp->data.ptrvalue, &feature_list); + } + } break; - case ConstraintChoice_field: - phrase = SummarizeFieldConstraint (constraint->data.ptrvalue); + case eTableMatchBioSource: + case eTableMatchSourceQual: + feature_list = ValNodeCopyPtr (match_list); break; } - return phrase; + return feature_list; } -NLM_EXTERN CharPtr SummarizeConstraintSet (ValNodePtr constraint_set) +static void AddPubsForBioseq (BioseqPtr bsp, ValNodePtr PNTR feature_list) { - ValNodePtr phrases = NULL, vnp; - Int4 len = 0; - CharPtr phrase, str = NULL; + SeqDescrPtr sdp; + SeqMgrDescContext dcontext; + SeqFeatPtr sfp; + SeqMgrFeatContext fcontext; - while (constraint_set != NULL) { - phrase = SummarizeConstraint (constraint_set); - if (phrase != NULL) { - ValNodeAddPointer (&phrases, 0, phrase); - if (len > 0) { - len += 5; /* for " and " */ - } else { - len += 1; /* for terminal NULL */ - } - len += StringLen (phrase); - } - constraint_set = constraint_set->next; + if (bsp == NULL || feature_list == NULL) return; + + for (sdp = SeqMgrGetNextDescriptor (bsp, NULL, Seq_descr_pub, &dcontext); + sdp != NULL; + sdp = SeqMgrGetNextDescriptor (bsp, sdp, Seq_descr_pub, &dcontext)) { + ValNodeAddPointer (feature_list, OBJ_SEQDESC, sdp); } - if (len > 0) { - str = (CharPtr) MemNew (sizeof (Char) * len); - for (vnp = phrases; vnp != NULL; vnp = vnp->next) { - StringCat (str, vnp->data.ptrvalue); - if (vnp->next != NULL) { - StringCat (str, " and "); - } - } + for (sfp = SeqMgrGetNextFeature (bsp, NULL, SEQFEAT_PUB, 0, &fcontext); + sfp != NULL; + sfp = SeqMgrGetNextFeature (bsp, sfp, SEQFEAT_PUB, 0, &fcontext)) { + ValNodeAddPointer (feature_list, OBJ_SEQFEAT, sfp); } - return str; } -/* for table readers that use the macro language functions */ +static ValNodePtr AddPubListFromBioseqSet (BioseqSetPtr bssp) +{ + SeqEntryPtr sep; + BioseqPtr bsp; + ValNodePtr item_list = NULL; -/* MatchType is used to represent how the column should be matched. - */ + if (bssp == NULL) return NULL; -NLM_EXTERN MatchTypePtr MatchTypeNew () -{ - MatchTypePtr match_type = MemNew (sizeof (MatchTypeData)); - match_type->data = NULL; - match_type->match_location = String_location_equals; - match_type->choice = eTableMatchNucID; - return match_type; + for (sep = bssp->seq_set; sep != NULL; sep = sep->next) { + if (sep->data.ptrvalue == NULL) continue; + if (IS_Bioseq (sep)) { + bsp = sep->data.ptrvalue; + if (!ISA_aa (bsp->mol)) { + AddPubsForBioseq (bsp, &item_list); + } + } else if (IS_Bioseq_set (sep)) { + ValNodeLink (&item_list, AddPubListFromBioseqSet (sep->data.ptrvalue)); + } + } + return item_list; } -NLM_EXTERN MatchTypePtr MatchTypeFree (MatchTypePtr match_type) +static ValNodePtr GetPubListForBioSourceObjects (ValNodePtr item_list) { - if (match_type != NULL) { - if (match_type->choice == eTableMatchSourceQual) { - match_type->data = SourceQualChoiceFree (match_type->data); + ValNodePtr vnp; + SeqFeatPtr sfp; + SeqDescrPtr sdp; + BioseqPtr bsp; + ObjValNodePtr ovp; + ValNodePtr feature_list = NULL; + + if (item_list == NULL) return NULL; + + for (vnp = item_list; vnp != NULL; vnp = vnp->next) { + if (vnp->choice == OBJ_SEQFEAT) { + sfp = vnp->data.ptrvalue; + if (sfp != NULL) { + bsp = BioseqFindFromSeqLoc (sfp->location); + AddPubsForBioseq (bsp, &feature_list); + } + } else if (vnp->choice == OBJ_SEQDESC) { + sdp = vnp->data.ptrvalue; + if (sdp != NULL && sdp->extended != 0) { + ovp = (ObjValNodePtr) sdp; + if (ovp->idx.parenttype == OBJ_BIOSEQSET) { + ValNodeLink (&feature_list, AddPubListFromBioseqSet (ovp->idx.parentptr)); + } else if (ovp->idx.parenttype == OBJ_BIOSEQ) { + bsp = (BioseqPtr) ovp->idx.parentptr; + AddPubsForBioseq (bsp, &feature_list); + } + } } - match_type = MemFree (match_type); } - return match_type; + return feature_list; } -static MatchTypePtr MatchTypeCopy (MatchTypePtr orig) +static ValNodePtr GetPubListForRowAndColumn (MatchTypePtr match_type, ValNodePtr match_list) { - MatchTypePtr match_type = NULL; - - if (orig != NULL) { - match_type = MatchTypeNew(); - match_type->choice = orig->choice; - match_type->match_location = orig->match_location; - if (match_type->choice == eTableMatchSourceQual) { - match_type->data = AsnIoMemCopy (orig->data, (AsnReadFunc) SourceQualChoiceAsnRead, (AsnWriteFunc) SourceQualChoiceAsnWrite); - } + SeqFeatPtr sfp; + ValNodePtr vnp; + ValNodePtr feature_list = NULL; + + if (match_type == NULL) return NULL; + + switch (match_type->choice) { + case eTableMatchFeatureID: + for (vnp = match_list; vnp != NULL; vnp = vnp->next) { + if (vnp->choice == OBJ_SEQFEAT && vnp->data.ptrvalue != NULL) { + sfp = (SeqFeatPtr) vnp->data.ptrvalue; + AddPubsForBioseq (BioseqFindFromSeqLoc (sfp->location), &feature_list); + } + } + break; + case eTableMatchGeneLocusTag: + for (vnp = match_list; vnp != NULL; vnp = vnp->next) { + if (vnp->choice == OBJ_SEQFEAT && vnp->data.ptrvalue != NULL) { + sfp = (SeqFeatPtr) vnp->data.ptrvalue; + AddPubsForBioseq (BioseqFindFromSeqLoc (sfp->location), &feature_list); + } + } + break; + case eTableMatchProteinID: + case eTableMatchNucID: + for (vnp = match_list; vnp != NULL; vnp = vnp->next) { + if (vnp->choice == OBJ_BIOSEQ) { + AddPubsForBioseq (vnp->data.ptrvalue, &feature_list); + } + } + break; + case eTableMatchDbxref: + for (vnp = match_list; vnp != NULL; vnp = vnp->next) { + if (vnp->choice == OBJ_SEQFEAT && vnp->data.ptrvalue != NULL) { + sfp = (SeqFeatPtr) vnp->data.ptrvalue; + AddPubsForBioseq (BioseqFindFromSeqLoc (sfp->location), &feature_list); + } + } + break; + case eTableMatchBioSource: + case eTableMatchSourceQual: + feature_list = GetPubListForBioSourceObjects (match_list); + break; } - return match_type; + return feature_list; } -static MatchTypePtr FindMatchTypeInHeader (ValNodePtr columns) +static ValNodePtr GetSequenceListForBioSourceObjects (ValNodePtr item_list) { - ValNodePtr col_vnp; - MatchTypePtr match_type = NULL; - TabColumnConfigPtr t; + ValNodePtr vnp; + SeqFeatPtr sfp; + SeqDescrPtr sdp; + BioseqPtr bsp; + ObjValNodePtr ovp; + ValNodePtr seq_list = NULL; + SeqEntryPtr sep; - for (col_vnp = columns; - col_vnp != NULL && match_type == NULL; - col_vnp = col_vnp->next) { - t = (TabColumnConfigPtr) col_vnp->data.ptrvalue; - if (t != NULL && t->match_type != NULL) { - match_type = MatchTypeCopy (t->match_type); + if (item_list == NULL) return NULL; + + for (vnp = item_list; vnp != NULL; vnp = vnp->next) { + if (vnp->choice == OBJ_SEQFEAT) { + sfp = vnp->data.ptrvalue; + if (sfp != NULL) { + bsp = BioseqFindFromSeqLoc (sfp->location); + if (bsp != NULL) { + ValNodeAddPointer (&seq_list, OBJ_BIOSEQ, bsp); + } + } + } else if (vnp->choice == OBJ_SEQDESC) { + sdp = vnp->data.ptrvalue; + if (sdp != NULL && sdp->extended != 0) { + ovp = (ObjValNodePtr) sdp; + if (ovp->idx.parenttype == OBJ_BIOSEQSET) { + sep = SeqMgrGetSeqEntryForData (ovp->idx.parentptr); + /* VisitBioseqsInSep (sep, &seq_list, CollectNucBioseqCallback); */ + seq_list = CollectNucBioseqs (sep); + } else if (ovp->idx.parenttype == OBJ_BIOSEQ) { + bsp = (BioseqPtr) ovp->idx.parentptr; + if (bsp != NULL) { + ValNodeAddPointer (&seq_list, OBJ_BIOSEQ, bsp); + } + } + } } } - return match_type; + return seq_list; } -NLM_EXTERN TabColumnConfigPtr TabColumnConfigNew (void) +static ValNodePtr GetSequenceListForRowAndColumn (MatchTypePtr match_type, ValNodePtr match_list) { - TabColumnConfigPtr t; + SeqFeatPtr sfp; + ValNodePtr vnp; + ValNodePtr seq_list = NULL; + BioseqPtr bsp; - t = (TabColumnConfigPtr) MemNew (sizeof (TabColumnConfigData)); - t->match_type = NULL; - t->field = NULL; - t->existing_text = ExistingTextOption_replace_old; - t->constraint = NULL; - t->skip_blank = TRUE; - return t; + if (match_type == NULL) return NULL; + + switch (match_type->choice) { + case eTableMatchFeatureID: + for (vnp = match_list; vnp != NULL; vnp = vnp->next) { + if (vnp->choice == OBJ_SEQFEAT && vnp->data.ptrvalue != NULL) { + sfp = (SeqFeatPtr) vnp->data.ptrvalue; + bsp = BioseqFindFromSeqLoc (sfp->location); + if (bsp != NULL) { + ValNodeAddPointer (&seq_list, OBJ_BIOSEQ, bsp); + } + } + } + break; + case eTableMatchGeneLocusTag: + for (vnp = match_list; vnp != NULL; vnp = vnp->next) { + if (vnp->choice == OBJ_SEQFEAT && vnp->data.ptrvalue != NULL) { + sfp = (SeqFeatPtr) vnp->data.ptrvalue; + bsp = BioseqFindFromSeqLoc (sfp->location); + if (bsp != NULL) { + ValNodeAddPointer (&seq_list, OBJ_BIOSEQ, bsp); + } + } + } + break; + case eTableMatchProteinID: + case eTableMatchNucID: + for (vnp = match_list; vnp != NULL; vnp = vnp->next) { + if (vnp->choice == OBJ_BIOSEQ) { + ValNodeAddPointer (&seq_list, OBJ_BIOSEQ, vnp->data.ptrvalue); + } + } + break; + case eTableMatchDbxref: + for (vnp = match_list; vnp != NULL; vnp = vnp->next) { + if (vnp->choice == OBJ_SEQFEAT && vnp->data.ptrvalue != NULL) { + sfp = (SeqFeatPtr) vnp->data.ptrvalue; + bsp = BioseqFindFromSeqLoc (sfp->location); + if (bsp != NULL) { + ValNodeAddPointer (&seq_list, OBJ_BIOSEQ, bsp); + } + } + } + break; + case eTableMatchBioSource: + case eTableMatchSourceQual: + seq_list = GetSequenceListForBioSourceObjects (match_list); + break; + } + return seq_list; } -NLM_EXTERN TabColumnConfigPtr TabColumnConfigFree (TabColumnConfigPtr t) +static ValNodePtr GetStructuredCommentListForRowAndColumn (MatchTypePtr match_type, ValNodePtr match_list) { - if (t != NULL) { - t->field = FieldTypeFree (t->field); - t->match_type = MatchTypeFree (t->match_type); - t->constraint = ConstraintChoiceSetFree (t->constraint); - t = MemFree (t); - } - return t; + ValNodePtr seq_list, target_list = NULL, vnp; + SeqDescrPtr sdp; + SeqMgrDescContext context; + + seq_list = GetSequenceListForRowAndColumn (match_type, match_list); + + for (vnp = seq_list; vnp != NULL; vnp = vnp->next) { + if (vnp->choice == OBJ_BIOSEQ) { + for (sdp = SeqMgrGetNextDescriptor (vnp->data.ptrvalue, NULL, Seq_descr_user, &context); + sdp != NULL; + sdp = SeqMgrGetNextDescriptor (vnp->data.ptrvalue, sdp, Seq_descr_user, &context)) { + if (IsUserObjectStructuredComment (sdp->data.ptrvalue)) { + ValNodeAddPointer (&target_list, OBJ_SEQDESC, sdp); + } + } + } + } + seq_list = ValNodeFree (seq_list); + return target_list; } -NLM_EXTERN TabColumnConfigPtr TabColumnConfigCopy (TabColumnConfigPtr orig) +static ValNodePtr GetDBLinkListForRowAndColumn (MatchTypePtr match_type, ValNodePtr match_list) { - TabColumnConfigPtr t = NULL; + ValNodePtr seq_list, target_list = NULL, vnp; + SeqDescrPtr sdp; + SeqMgrDescContext context; - if (orig != NULL) { - t = TabColumnConfigNew (); + seq_list = GetSequenceListForRowAndColumn (match_type, match_list); - t->match_type = MatchTypeCopy (orig->match_type); - t->existing_text = orig->existing_text; - t->skip_blank = orig->skip_blank; - t->match_mrna = orig->match_mrna; - t->field = FieldTypeCopy (orig->field); - t->constraint = AsnIoMemCopy (orig->constraint, (AsnReadFunc) ConstraintChoiceSetAsnRead, (AsnWriteFunc) ConstraintChoiceSetAsnWrite); - } - return t; + for (vnp = seq_list; vnp != NULL; vnp = vnp->next) { + if (vnp->choice == OBJ_BIOSEQ) { + for (sdp = SeqMgrGetNextDescriptor (vnp->data.ptrvalue, NULL, Seq_descr_user, &context); + sdp != NULL; + sdp = SeqMgrGetNextDescriptor (vnp->data.ptrvalue, sdp, Seq_descr_user, &context)) { + if (IsUserObjectDBLink (sdp->data.ptrvalue)) { + ValNodeAddPointer (&target_list, OBJ_SEQDESC, sdp); + } + } + } + } + seq_list = ValNodeFree (seq_list); + return target_list; } -NLM_EXTERN ValNodePtr TabColumnConfigListFree (ValNodePtr columns) +static ValNodePtr GetTargetListForRowAndColumn (MatchTypePtr match_type, ValNodePtr match_list, FieldTypePtr field, ValNodePtr constraint) { - ValNodePtr vnp_next; + ValNodePtr target_list = NULL, vnp_prev = NULL, vnp, vnp_next, tmp_list; + FeatureFieldPtr feature_field; - while (columns != NULL) { - vnp_next = columns->next; - columns->data.ptrvalue = TabColumnConfigFree (columns->data.ptrvalue); - columns->next = NULL; - columns = ValNodeFree (columns); - columns = vnp_next; + if (field == NULL || match_type == NULL) return NULL; + switch (field->choice) { + case FieldType_source_qual: + target_list = GetBioSourceListForRowAndColumn (match_type, match_list, field->data.ptrvalue); + break; + case FieldType_feature_field: + target_list = GetFeatureListForRowAndColumn (match_type, match_list, field->data.ptrvalue); + break; + case FieldType_cds_gene_prot: + feature_field = FeatureFieldFromCDSGeneProtField (field->data.intvalue); + target_list = GetFeatureListForRowAndColumn (match_type, match_list, feature_field); + feature_field = FeatureFieldFree (feature_field); + break; + case FieldType_pub: + target_list = GetPubListForRowAndColumn (match_type, match_list); + break; + case FieldType_rna_field: + feature_field = FeatureFieldFromRnaQual (field->data.ptrvalue); + target_list = GetFeatureListForRowAndColumn (match_type, match_list, feature_field); + feature_field = FeatureFieldFree (feature_field); + break; + case FieldType_struc_comment_field: + target_list = GetStructuredCommentListForRowAndColumn (match_type, match_list); + break; + case FieldType_dblink: + target_list = GetDBLinkListForRowAndColumn (match_type, match_list); + break; + case FieldType_misc: + if (field->data.intvalue == Misc_field_genome_project_id) { + target_list = GetSequenceListForRowAndColumn (match_type, match_list); + } else if (field->data.intvalue == Misc_field_comment_descriptor) { + tmp_list = GetSequenceListForRowAndColumn (match_type, match_list); + for (vnp = tmp_list; vnp != NULL; vnp = vnp->next) { + AddCommentDescriptorDestinationsForBioseq (vnp->data.ptrvalue, &target_list); + } + tmp_list = ValNodeFree (tmp_list); + } else if (field->data.intvalue == Misc_field_defline) { + tmp_list = GetSequenceListForRowAndColumn (match_type, match_list); + for (vnp = tmp_list; vnp != NULL; vnp = vnp->next) { + AddDeflineDescriptorDestinationsForBioseq (vnp->data.ptrvalue, &target_list); + } + tmp_list = ValNodeFree (tmp_list); + } else if (field->data.intvalue == Misc_field_keyword) { + tmp_list = GetSequenceListForRowAndColumn (match_type, match_list); + for (vnp = tmp_list; vnp != NULL; vnp = vnp->next) { + AddGenbankBlockDescriptorDestinationsForBioseq (vnp->data.ptrvalue, &target_list); + } + tmp_list = ValNodeFree (tmp_list); + } + break; + case FieldType_molinfo_field: + target_list = GetSequenceListForRowAndColumn(match_type, match_list); + break; } - return columns; -} + /* remove targets that do not match constraint */ + vnp = target_list; + while (vnp != NULL) { + vnp_next = vnp->next; + if (!DoesObjectMatchConstraintChoiceSet (vnp->choice, vnp->data.ptrvalue, constraint)) { + if (vnp_prev == NULL) { + target_list = vnp->next; + } else { + vnp_prev->next = vnp->next; + } + vnp->next = NULL; + vnp = ValNodeFree (vnp); + } else { + vnp_prev = vnp; + } + vnp = vnp_next; + } -NLM_EXTERN ValNodePtr TabColumnConfigListCopy (ValNodePtr orig) -{ - ValNodePtr new_list = NULL; - TabColumnConfigPtr t; + /* remove targets found twice */ + target_list = ValNodeSort (target_list, SortVnpByChoiceAndPtrvalue); + ValNodeUnique (&target_list, SortVnpByChoiceAndPtrvalue, ValNodeFree); - while (orig != NULL) { - t = TabColumnConfigCopy (orig->data.ptrvalue); - ValNodeAddPointer (&new_list, 0, t); - orig = orig->next; - } - return new_list; + return target_list; } - -/* This checks the column names and returns a list of the feature fields */ -NLM_EXTERN ValNodePtr ValidateFeatureFieldColumnNames (ValNodePtr header_line, ValNodePtr PNTR perr_list) +static void ReportMissingTargets (ValNodeBlockPtr err_list, FieldTypePtr ft, CharPtr match_val, Int4 col_num, Int4 line_num) { - ValNodePtr header_vnp; - ValNodePtr err_list = NULL, col_list = NULL; - Boolean rval = TRUE; - TabColumnConfigPtr t; + CharPtr feat_name; FeatureFieldPtr field; - Int4 featqual, feat_type; - CharPtr first_space; - - if (header_line == NULL) - { - return FALSE; - } - - header_vnp = header_line->data.ptrvalue; - if (header_vnp == NULL || header_vnp->next == NULL) - { - return FALSE; - } - - /* skip ID column */ - header_vnp = header_vnp->next; - while (header_vnp != NULL && rval) - { - first_space = StringChr (header_vnp->data.ptrvalue, ' '); - if (first_space != NULL) { - *first_space = 0; - feat_type = GetFeatureTypeByName (header_vnp->data.ptrvalue); - featqual = GetFeatQualByName (first_space + 1); - *first_space = ' '; - if (feat_type < 0 || featqual < 0) { - /* unable to recognize column name */ - ValNodeAddPointer (&err_list, 0, StringSave (header_vnp->data.ptrvalue)); - /* if we're not able to send back a list of errors, just quit now */ - if (perr_list == NULL) { - rval = FALSE; - } - } else if (err_list == NULL) { - /* if we've already found errors, don't bother collecting more fields */ - field = FeatureFieldNew (); - field->type = feat_type; - field->field = ValNodeNew (NULL); - field->field->choice = FeatQualChoice_legal_qual; - field->field->data.intvalue = featqual; - t = TabColumnConfigNew (); - t->field = ValNodeNew (NULL); - t->field->choice = FieldType_feature_field; - t->field->data.ptrvalue = field; - ValNodeAddPointer (&col_list, 0, t); + CharPtr no_feat_fmt = "No %s feature for %s (column %d, line %d)"; + CharPtr no_src_fmt = "No biosource for %s (column %d, line %d)"; + CharPtr no_seq_fmt = "No sequence for %s (column %d, line %d)"; + CharPtr no_cmt_fmt = "No structured comment for %s (column %d, line %d)"; + CharPtr no_dblink_fmt = "No DBLink object for %s (column %d, line %d)"; + CharPtr err_msg; + RnaQualPtr rq; + + if (err_list == NULL || ft == NULL || match_val == NULL) return; + + switch (ft->choice) { + case FieldType_source_qual: + err_msg = (CharPtr) MemNew (sizeof (Char) * (StringLen (no_src_fmt) + + StringLen (match_val) + + 30)); + sprintf (err_msg, no_src_fmt, match_val, col_num, line_num); + ValNodeAddPointerToEnd (err_list, 0, err_msg); + break; + case FieldType_feature_field: + field = (FeatureFieldPtr) ft->data.ptrvalue; + if (field != NULL) { + feat_name = GetFeatureNameFromFeatureType (field->type); + err_msg = (CharPtr) MemNew (sizeof (Char) * (StringLen (no_feat_fmt) + + StringLen (feat_name) + + StringLen (match_val) + + 30)); + sprintf (err_msg, no_feat_fmt, feat_name, match_val, col_num, line_num); + ValNodeAddPointerToEnd (err_list, 0, err_msg); } - } else { - featqual = GetFeatQualByName (header_vnp->data.ptrvalue); - if (featqual < 0) { - /* unable to recognize column name */ - ValNodeAddPointer (&err_list, 0, StringSave (header_vnp->data.ptrvalue)); - /* if we're not able to send back a list of errors, just quit now */ - if (perr_list == NULL) { - rval = FALSE; - } - } else if (err_list == NULL) { - /* if we've already found errors, don't bother collecting more fields */ - field = FeatureFieldNew (); - field->type = Feature_type_any; - field->field = ValNodeNew (NULL); - field->field->choice = FeatQualChoice_legal_qual; - field->field->data.intvalue = featqual; - t = TabColumnConfigNew (); - t->field = ValNodeNew (NULL); - t->field->choice = FieldType_feature_field; - t->field->data.ptrvalue = field; - ValNodeAddPointer (&col_list, 0, t); + break; + case FieldType_cds_gene_prot: + field = FeatureFieldFromCDSGeneProtField (ft->data.intvalue); + if (field != NULL) { + feat_name = GetFeatureNameFromFeatureType (field->type); + err_msg = (CharPtr) MemNew (sizeof (Char) * (StringLen (no_feat_fmt) + + StringLen (feat_name) + + StringLen (match_val) + + 30)); + sprintf (err_msg, no_feat_fmt, feat_name, match_val, col_num, line_num); + ValNodeAddPointerToEnd (err_list, 0, err_msg); } - } - header_vnp = header_vnp->next; - } - if (err_list != NULL) { - col_list = TabColumnConfigListFree (col_list); - if (perr_list != NULL) { - *perr_list = err_list; - } else { - err_list = ValNodeFreeData (err_list); - } + field = FeatureFieldFree (field); + break; + case FieldType_rna_field: + rq = (RnaQualPtr) ft->data.ptrvalue; + if (rq != NULL) { + feat_name = SummarizeRnaType (rq->type); + err_msg = (CharPtr) MemNew (sizeof (Char) * (StringLen (no_feat_fmt) + + StringLen (feat_name) + + StringLen (match_val) + + 30)); + sprintf (err_msg, no_feat_fmt, feat_name, match_val, col_num, line_num); + ValNodeAddPointerToEnd (err_list, 0, err_msg); + } + break; + case FieldType_struc_comment_field: + err_msg = (CharPtr) MemNew (sizeof (Char) * (StringLen (no_cmt_fmt) + StringLen (match_val) + 30)); + sprintf (err_msg, no_cmt_fmt, match_val, col_num, line_num); + ValNodeAddPointerToEnd (err_list, 0, err_msg); + break; + case FieldType_dblink: + err_msg = (CharPtr) MemNew (sizeof (Char) * (StringLen (no_dblink_fmt) + StringLen (match_val) + 30)); + sprintf (err_msg, no_dblink_fmt, match_val, col_num, line_num); + ValNodeAddPointerToEnd (err_list, 0, err_msg); + break; + case FieldType_misc: + err_msg = (CharPtr) MemNew (sizeof (Char) * (StringLen (no_seq_fmt) + + StringLen (match_val) + + 30)); + sprintf (err_msg, no_seq_fmt, match_val, col_num, line_num); + ValNodeAddPointerToEnd (err_list, 0, err_msg); + break; } - return col_list; } -typedef struct findgenelocustag { - CharPtr locus_tag; - ValNodePtr gene_list; -} FindGeneLocusTagData, PNTR FindGeneLocusTagPtr; -static void FindGeneByLocusTagBioseqCallback (BioseqPtr bsp, Pointer userdata) +static void ReportEmptyIDColumn (ValNodeBlockPtr vnb, Int4 line_num) { - FindGeneLocusTagPtr p; - SeqFeatPtr gene; - SeqMgrFeatContext fcontext; + CharPtr err_msg; + CharPtr missing_id_fmt = "No ID for line %d"; - if (bsp == NULL || userdata == NULL || !ISA_na (bsp->mol)) { - return; - } + err_msg = (CharPtr) MemNew (sizeof (Char) * (StringLen (missing_id_fmt) + 15)); + sprintf (err_msg, missing_id_fmt, line_num); + ValNodeAddPointerToEnd (vnb, 0, err_msg); +} - p = (FindGeneLocusTagPtr) userdata; +static ValNodePtr FindMatchChoiceInLine (ValNodePtr val_vnp, ValNodePtr col_vnp) +{ + TabColumnConfigPtr t; - gene = SeqMgrGetGeneByLocusTag (bsp, p->locus_tag, &fcontext); - if (gene != NULL) { - ValNodeAddPointer (&p->gene_list, OBJ_SEQFEAT, gene); + while (val_vnp != NULL && col_vnp != NULL) { + t = (TabColumnConfigPtr) col_vnp->data.ptrvalue; + if (t != NULL && t->match_type != NULL) { + return val_vnp; + } + val_vnp = val_vnp->next; + col_vnp = col_vnp->next; } + return NULL; } -typedef struct objbymatch { - ValNodePtr obj_list; - StringConstraintPtr scp; -} ObjByMatchData, PNTR ObjByMatchPtr; +NLM_EXTERN SeqFeatPtr GetmRNAForFeature (SeqFeatPtr sfp) +{ + BioseqPtr pbsp; -static void GetFeaturesByDbxrefCallback (SeqFeatPtr sfp, Pointer userdata) + if (sfp == NULL) return NULL; + if (sfp->data.choice == SEQFEAT_PROT) + { + pbsp = BioseqFindFromSeqLoc (sfp->location); + sfp = SeqMgrGetCDSgivenProduct (pbsp, NULL); + if (sfp == NULL) return NULL; + } + return GetmRNAforCDS (sfp); +} + + +NLM_EXTERN Boolean AdjustmRNAProductToMatchProteinProduct (SeqFeatPtr sfp) { - ObjByMatchPtr p; - ValNodePtr vnp; - DbtagPtr dbt; - Char buf[20]; - Boolean found = FALSE; + SeqFeatPtr mrna; + ProtRefPtr prp; + RnaRefPtr rrp; - if (sfp == NULL || sfp->dbxref == NULL || userdata == NULL) return; - p = (ObjByMatchPtr) userdata; + if (sfp == NULL || sfp->data.choice != SEQFEAT_PROT) return FALSE; - if (IsStringConstraintEmpty (p->scp)) return; + prp = (ProtRefPtr) sfp->data.value.ptrvalue; + mrna = GetmRNAForFeature (sfp); - for (vnp = sfp->dbxref; vnp != NULL && !found; vnp = vnp->next) { - dbt = (DbtagPtr) vnp->data.ptrvalue; - if (dbt != NULL && dbt->tag != NULL) { - if (dbt->tag->id > 0) { - sprintf (buf, "%d", dbt->tag->id); - if (DoesStringMatchConstraint (buf, p->scp)) { - found = TRUE; - } - } else if (DoesStringMatchConstraint (dbt->tag->str, p->scp)) { - found = TRUE; - } - } + if (mrna == NULL) return FALSE; + + rrp = (RnaRefPtr) mrna->data.value.ptrvalue; + if (rrp == NULL) + { + rrp = RnaRefNew(); + mrna->data.value.ptrvalue = rrp; } - if (found) { - ValNodeAddPointer (&(p->obj_list), OBJ_SEQFEAT, sfp); + + rrp->ext.value.ptrvalue = MemFree (rrp->ext.value.ptrvalue); + if (prp == NULL || prp->name == NULL || StringHasNoText (prp->name->data.ptrvalue)) + { + rrp->ext.choice = 0; + } + else + { + rrp->ext.choice = 1; + rrp->ext.value.ptrvalue = StringSave (prp->name->data.ptrvalue); } + return TRUE; } -static ValNodePtr GetFeaturesByDbxref (SeqEntryPtr sep, CharPtr dbxref, Uint1 match_location) +NLM_EXTERN Boolean IsFieldTypeCDSProduct (FieldTypePtr ft) { - ObjByMatchData d; + FeatureFieldPtr field; + Boolean rval = FALSE; - d.scp = StringConstraintNew (); - d.scp->match_text = StringSave (dbxref); - d.scp->match_location = match_location; - d.obj_list = NULL; - VisitFeaturesInSep (sep, &d, GetFeaturesByDbxrefCallback); - d.scp = StringConstraintFree (d.scp); - return d.obj_list; + if (ft == NULL) return FALSE; + if (ft->choice == FieldType_feature_field) { + field = (FeatureFieldPtr) ft->data.ptrvalue; + if (field != NULL && field->type == Macro_feature_type_cds + && field->field != NULL + && field->field->choice == FeatQualChoice_legal_qual + && field->field->data.intvalue == Feat_qual_legal_product) { + rval = TRUE; + } + } else if (ft->choice == FieldType_cds_gene_prot) { + if (ft->data.intvalue == CDSGeneProt_field_prot_name) { + rval = TRUE; + } + } + return rval; } -static void GetBioSourcesByTaxNameDescriptorCallback (SeqDescrPtr sdp, Pointer userdata) +static Boolean IsFieldTypeProteinDesc (FieldTypePtr ft) { - ObjByMatchPtr p; - BioSourcePtr biop; + FeatureFieldPtr field; + Boolean rval = FALSE; - if (sdp == NULL || sdp->choice != Seq_descr_source || userdata == NULL) return; - p = (ObjByMatchPtr) userdata; + if (ft == NULL) return FALSE; + if (ft->choice == FieldType_feature_field) { + field = (FeatureFieldPtr) ft->data.ptrvalue; + if (field != NULL && (field->type == Macro_feature_type_cds || field->type == Macro_feature_type_prot) + && field->field != NULL + && field->field->choice == FeatQualChoice_legal_qual + && field->field->data.intvalue == Feat_qual_legal_description) { + rval = TRUE; + } + } else if (ft->choice == FieldType_cds_gene_prot) { + if (ft->data.intvalue == CDSGeneProt_field_prot_description) { + rval = TRUE; + } + } + return rval; +} - if (IsStringConstraintEmpty (p->scp)) return; - biop = (BioSourcePtr) sdp->data.ptrvalue; - if (biop != NULL && biop->org != NULL && DoesStringMatchConstraint (biop->org->taxname, p->scp)) { - ValNodeAddPointer (&(p->obj_list), OBJ_SEQDESC, sdp); +static Boolean IsFieldTypeGeneLocusTag (FieldTypePtr ft) +{ + FeatureFieldPtr field; + RnaQualPtr rq; + Boolean rval = FALSE; + + if (ft == NULL) return FALSE; + if (ft->choice == FieldType_feature_field) { + field = (FeatureFieldPtr) ft->data.ptrvalue; + if (field != NULL && field->type == Macro_feature_type_gene + && field->field != NULL + && field->field->choice == FeatQualChoice_legal_qual + && field->field->data.intvalue == Feat_qual_legal_locus_tag) { + rval = TRUE; + } + } else if (ft->choice == FieldType_cds_gene_prot) { + if (ft->data.intvalue == CDSGeneProt_field_gene_locus_tag) { + rval = TRUE; + } + } else if (ft->choice == FieldType_rna_field) { + rq = (RnaQualPtr) ft->data.ptrvalue; + if (rq != NULL && rq->field == Rna_field_gene_locus_tag) { + rval = TRUE; + } } + return rval; } -static void GetBioSourcesByTaxNameFeatureCallback (SeqFeatPtr sfp, Pointer userdata) + +NLM_EXTERN ValNodePtr ValidateTabTableValues (ValNodePtr table, ValNodePtr columns) { - ObjByMatchPtr p; - BioSourcePtr biop; + ValNodePtr err_list = NULL; + ValNodePtr line_vnp, col_vnp, val_vnp; + Int4 line_num, col_num; + TabColumnConfigPtr t; + ValNodePtr locus_tag_values = NULL, bad_locus_tags = NULL, vnp, tmp_field; + CharPtr bad_format_fmt = "Locus tag %s has incorrect format"; + CharPtr dup_fmt = "Locus tag %s appears in the table more than once"; + CharPtr inconsistent_fmt = "Locus tag prefix for %s is inconsistent"; + CharPtr bad_molinfo_fmt = "'%s' is not a valid value for this field"; + CharPtr err_msg; - if (sfp == NULL || sfp->data.choice != SEQFEAT_BIOSRC || userdata == NULL) return; - p = (ObjByMatchPtr) userdata; + if (table == NULL || columns == NULL) { + return NULL; + } - if (IsStringConstraintEmpty (p->scp)) return; + for (line_vnp = table, line_num = 1; + line_vnp != NULL; + line_vnp = line_vnp->next, line_num++) { + for (val_vnp = line_vnp->data.ptrvalue, col_vnp = columns, col_num = 1; + val_vnp != NULL && col_vnp != NULL; + val_vnp = val_vnp->next, col_vnp = col_vnp->next, col_num++) { + t = (TabColumnConfigPtr) col_vnp->data.ptrvalue; + if (t == NULL || t->match_type != NULL || val_vnp == NULL || StringHasNoText (val_vnp->data.ptrvalue)) { + continue; + } + if (IsFieldTypeGeneLocusTag (t->field)) { + ValNodeAddPointer (&locus_tag_values, 0, val_vnp->data.ptrvalue); + } else if (t->field != NULL && t->field->choice == FieldType_molinfo_field && val_vnp->data.ptrvalue != NULL) { + tmp_field = MolinfoFieldFromFieldAndStringValue (t->field->data.ptrvalue, val_vnp->data.ptrvalue); + if (tmp_field == NULL) { + err_msg =(CharPtr) MemNew (sizeof (Char) * (StringLen(bad_molinfo_fmt) + StringLen (val_vnp->data.ptrvalue))); + sprintf (err_msg, bad_molinfo_fmt, val_vnp->data.ptrvalue); + ValNodeAddPointer (&err_list, 0, err_msg); + } + } + } + } - biop = (BioSourcePtr) sfp->data.value.ptrvalue; - if (biop != NULL && biop->org != NULL && DoesStringMatchConstraint (biop->org->taxname, p->scp)) { - ValNodeAddPointer (&(p->obj_list), OBJ_SEQFEAT, sfp); + bad_locus_tags = FindBadLocusTagsInList (locus_tag_values); + for (vnp = bad_locus_tags; vnp != NULL; vnp = vnp->next) { + switch (vnp->choice) { + case eLocusTagErrorBadFormat: + err_msg = (CharPtr) MemNew (sizeof (Char) * (StringLen (bad_format_fmt) + StringLen (vnp->data.ptrvalue))); + sprintf (err_msg, bad_format_fmt, vnp->data.ptrvalue); + ValNodeAddPointer (&err_list, 0, err_msg); + break; + case eLocusTagErrorDuplicate: + err_msg = (CharPtr) MemNew (sizeof (Char) * (StringLen (dup_fmt) + StringLen (vnp->data.ptrvalue))); + sprintf (err_msg, dup_fmt, vnp->data.ptrvalue); + ValNodeAddPointer (&err_list, 0, err_msg); + break; + case eLocusTagErrorInconsistentPrefix: + err_msg = (CharPtr) MemNew (sizeof (Char) * (StringLen (inconsistent_fmt) + StringLen (vnp->data.ptrvalue))); + sprintf (err_msg, inconsistent_fmt, vnp->data.ptrvalue); + ValNodeAddPointer (&err_list, 0, err_msg); + break; + } } - + locus_tag_values = ValNodeFree (locus_tag_values); + return err_list; } -static ValNodePtr GetBioSourcesByTaxName (SeqEntryPtr sep, CharPtr taxname, Uint1 match_location) +NLM_EXTERN ValNodePtr GetSequenceListsForMatchTypeInTabTable (SeqEntryPtr sep, ValNodePtr table, Int4 col, MatchTypePtr match_type, ValNodePtr PNTR p_err_list) { - ObjByMatchData d; + ValNodePtr vnp_row, vnp; + ValNodePtr sequence_lists = NULL, match_list, target_list; + Uint2 entityID; + Int4 num, line; + CharPtr no_match_fmt = "No match for %s, line %d"; + CharPtr no_match_txt_fmt = "No match text for line %d"; + CharPtr msg; + BioseqSearchIndexPtr index = NULL; - d.scp = StringConstraintNew (); - d.scp->match_text = StringSave (taxname); - d.scp->match_location = match_location; - d.obj_list = NULL; - VisitDescriptorsInSep (sep, &d, GetBioSourcesByTaxNameDescriptorCallback); - VisitFeaturesInSep (sep, &d, GetBioSourcesByTaxNameFeatureCallback); - d.scp = StringConstraintFree (d.scp); - return d.obj_list; -} + if (sep == NULL || table == NULL || match_type == NULL || col < 0) { + return NULL; + } + entityID = SeqMgrGetEntityIDForSeqEntry (sep); -typedef struct objbystrinfld { - ValNodePtr obj_list; - FieldTypePtr field; - StringConstraintPtr scp; -} ObjByStrInFldData, PNTR ObjByStrInFldPtr; + index = BuildIDStringsList(sep); + for (vnp_row = table, line = 1; vnp_row != NULL; vnp_row = vnp_row->next, line++) { + vnp = vnp_row->data.ptrvalue; + num = 0; + while (vnp != NULL && num < col) { + vnp = vnp->next; + num++; + } + if (vnp == NULL || StringHasNoText (vnp->data.ptrvalue)) { + ValNodeAddPointer (&sequence_lists, 0, NULL); + if (p_err_list != NULL) { + msg = (CharPtr) MemNew (sizeof (Char) * (StringLen (no_match_txt_fmt) + 15)); + sprintf (msg, no_match_txt_fmt, line); + ValNodeAddPointer (p_err_list, 0, msg); + } + } else { + match_list = FindMatchForRowEx (match_type, vnp->data.ptrvalue, entityID, sep, index); + target_list = GetSequenceListForRowAndColumn (match_type, match_list); + match_list = ValNodeFree (match_list); + ValNodeAddPointer (&sequence_lists, 0, target_list); + if (target_list == NULL && p_err_list != NULL) { + msg = (CharPtr) MemNew (sizeof (Char) * (StringLen (no_match_fmt) + StringLen (vnp->data.ptrvalue) + 15)); + sprintf (msg, no_match_fmt, vnp->data.ptrvalue, line); + ValNodeAddPointer (p_err_list, 0, msg); + } + } + } + index = BioseqSearchIndexFree (index); -static void GetBioSourcesBySourceQualDescriptorCallback (SeqDescrPtr sdp, Pointer userdata) -{ - ObjByStrInFldPtr p; - CharPtr tmp; + return sequence_lists; +} - if (sdp == NULL || sdp->choice != Seq_descr_source || userdata == NULL) return; - p = (ObjByStrInFldPtr) userdata; - if (IsStringConstraintEmpty (p->scp)) return; +NLM_EXTERN ValNodePtr FreeSequenceLists (ValNodePtr lists) +{ + ValNodePtr vnp; - tmp = GetFieldValueForObject (OBJ_SEQDESC, sdp, p->field, p->scp); - if (tmp != NULL) { - ValNodeAddPointer (&(p->obj_list), OBJ_SEQDESC, sdp); + for (vnp = lists; vnp != NULL; vnp = vnp->next) { + vnp->data.ptrvalue = ValNodeFree (vnp->data.ptrvalue); } - tmp = MemFree (tmp); + lists = ValNodeFree (lists); + return lists; } -static void GetBioSourcesBySourceQualFeatureCallback (SeqFeatPtr sfp, Pointer userdata) +NLM_EXTERN ValNodePtr GetBioseqMatchesForSequenceIDs (ValNodePtr query_list, Uint1 match_location, SeqEntryPtr sep) { - ObjByStrInFldPtr p; - CharPtr tmp; + ValNodePtr response_list = NULL, vnp, single_list, vnp_t; + BioseqSearchIndexPtr index = NULL; + BioseqPtr bsp; + ValNodeBlock thisid_index; + BioseqSearchItemPtr si; + Char num_buf[15]; + CharPtr match_str; - if (sfp == NULL || sfp->data.choice != SEQFEAT_BIOSRC || userdata == NULL) return; - p = (ObjByStrInFldPtr) userdata; + index = BuildIDStringsList(sep); - if (IsStringConstraintEmpty (p->scp)) return; + for (vnp = query_list; vnp != NULL; vnp = vnp->next) { + InitValNodeBlock (&thisid_index, NULL); + BuildIdStringsListForIdList (vnp->data.ptrvalue, NULL, &thisid_index); - tmp = GetFieldValueForObject (OBJ_SEQFEAT, sfp, p->field, p->scp); - if (tmp != NULL) { - ValNodeAddPointer (&(p->obj_list), OBJ_SEQFEAT, sfp); + bsp = NULL; + for (vnp_t = thisid_index.head; vnp_t != NULL && bsp == NULL; vnp_t = vnp_t->next) { + si = (BioseqSearchItemPtr) vnp_t->data.ptrvalue; + if (si->num > 0) { + sprintf (num_buf, "%d", si->num); + match_str = num_buf; + } else { + match_str = si->str; + } + if (match_location == String_location_equals) { + bsp = FindStringInIdListIndex (match_str, index); + } else { + single_list = FindListInIdListIndex (match_location, match_str, index); + if (single_list != NULL && single_list->next == NULL) { + bsp = single_list->data.ptrvalue; + } + single_list = ValNodeFree (single_list); + } + } + + thisid_index.head = BioseqSearchItemListFree(thisid_index.head); + ValNodeAddPointer (&response_list, OBJ_BIOSEQ, bsp); } - tmp = MemFree (tmp); + + index = BioseqSearchIndexFree (index); + return response_list; } -static ValNodePtr GetBioSourcesBySourceQual (SeqEntryPtr sep, SourceQualChoicePtr q, CharPtr val, Uint1 match_location) +static ValNodePtr ReportTableSummaryLine (Int4 err_lines, Int4 total_lines, CharPtr fmt) { - ObjByStrInFldData od; - - od.scp = StringConstraintNew(); - od.scp->match_text = StringSave (val); - od.scp->match_location = match_location; - od.obj_list = NULL; - od.field = ValNodeNew (NULL); - od.field->choice = FieldType_source_qual; - od.field->data.ptrvalue = q; - - VisitDescriptorsInSep (sep, &od, GetBioSourcesBySourceQualDescriptorCallback); - - VisitFeaturesInSep (sep, &od, GetBioSourcesBySourceQualFeatureCallback); + CharPtr str; + ValNodePtr vnp; - od.field = ValNodeFree (od.field); - od.scp = StringConstraintFree (od.scp); - return od.obj_list; + str = (CharPtr) MemNew (sizeof (Char) + (StringLen (fmt) + 30)); + sprintf (str, fmt, err_lines, total_lines); + vnp = ValNodeNew (NULL); + vnp->data.ptrvalue = str; + return vnp; } -static void GetBioseqsByIdCallback (BioseqPtr bsp, Pointer data) +NLM_EXTERN ValNodePtr GetObjectTableForTabTable (SeqEntryPtr sep, ValNodePtr table, ValNodePtr columns, ValNodePtr PNTR p_err_list) { - ObjByMatchPtr d; - ObjectIdPtr oip; - SeqIdPtr sip; - Boolean found_match = FALSE; - DbtagPtr dbtag; - CharPtr cp, tmp_id; - - if (bsp == NULL || data == NULL || (d = (ObjByMatchPtr) data) == NULL) { - return; - } + ValNodeBlock vnb; + ValNodePtr line_vnp, val_vnp, col_vnp, err_vnp; + ValNodePtr obj_table = NULL, obj_row, last_obj = NULL, tmp, last = NULL; + Int4 line_num = 1, col_num; + Uint2 entityID; + ValNodePtr match_list, match_choice, target_list; + TabColumnConfigPtr t; + CharPtr err_msg; + CharPtr no_match_fmt = "No match for %s, line %d"; + MatchTypePtr match_type; + Int4 num_empty = 0, num_missing = 0, num_no_targets = 0; + BioseqSearchIndexPtr index = NULL; - found_match = DoesSeqIDListMeetStringConstraint (bsp->id, d->scp); + vnb.head = NULL; + vnb.tail = NULL; - for (sip = bsp->id; sip != NULL && !found_match; sip = sip->next) { - if (sip->choice == SEQID_GENERAL && sip->data.ptrvalue != NULL) { - dbtag = (DbtagPtr) sip->data.ptrvalue; - if (StringCmp (dbtag->db, "NCBIFILE") == 0 && dbtag->tag != NULL) { - if (DoesStringMatchConstraint (dbtag->tag->str, d->scp)) { - found_match = TRUE; - } else if ((cp = StringRChr (dbtag->tag->str, '/')) != NULL) { - tmp_id = (CharPtr) MemNew (sizeof (Char) * (cp - dbtag->tag->str + 1)); - StringNCpy (tmp_id, dbtag->tag->str, cp - dbtag->tag->str); - tmp_id[cp - dbtag->tag->str] = 0; - if (DoesStringMatchConstraint (tmp_id, d->scp)) { - found_match = TRUE; - } - tmp_id = MemFree (tmp_id); - } - } - } else if (sip->choice == SEQID_LOCAL && (oip = sip->data.ptrvalue) != NULL - && StringNICmp (oip->str, "bankit", 6) == 0 - && DoesStringMatchConstraint (oip->str + 6, d->scp)) { - found_match = TRUE; - } + if (sep == NULL) { + ValNodeAddPointerToEnd (&vnb, 0, StringSave ("No SeqEntry")); } - if (found_match) { - ValNodeAddPointer (&(d->obj_list), OBJ_BIOSEQ, bsp); + if (table == NULL) { + ValNodeAddPointerToEnd (&vnb, 0, StringSave ("No table")); } -} - - -static ValNodePtr FindBioseqsByMatchType (SeqEntryPtr sep, Uint1 match_location, CharPtr match_str) -{ - ObjByMatchData d; - - if (sep == NULL || StringHasNoText (match_str)) { + if (columns == NULL) { + ValNodeAddPointerToEnd (&vnb, 0, StringSave ("No column information")); + } + if (vnb.head != NULL) { + if (p_err_list == NULL) { + vnb.head = ValNodeFreeData (vnb.head); + } else { + *p_err_list = vnb.head; + } return NULL; } - d.scp = StringConstraintNew (); - d.scp->match_text = StringSave (match_str); - d.scp->match_location = match_location; - d.obj_list = NULL; - VisitBioseqsInSep (sep, &d, GetBioseqsByIdCallback); - d.scp = StringConstraintFree (d.scp); - return d.obj_list; -} -typedef struct bioseqsearchitem { - BioseqPtr bsp; - CharPtr str; - Int4 num; - Boolean free_str; -} BioseqSearchItemData, PNTR BioseqSearchItemPtr; + entityID = SeqMgrGetEntityIDForSeqEntry (sep); -static BioseqSearchItemPtr BioseqSearchItemNewStr (BioseqPtr bsp, CharPtr str, Boolean need_free) -{ - BioseqSearchItemPtr bsi; + match_type = FindMatchTypeInHeader (columns); + if (match_type == NULL) return NULL; - bsi = (BioseqSearchItemPtr) MemNew (sizeof (BioseqSearchItemData)); - bsi->bsp = bsp; - bsi->str = str; - bsi->free_str = need_free; - if (IsAllDigits (bsi->str)) { - bsi->num = atoi (bsi->str); + index = BuildIDStringsList(sep); + + last = NULL; + for (line_vnp = table, line_num = 1; line_vnp != NULL; line_vnp = line_vnp->next, line_num++) { + obj_row = NULL; + match_choice = FindMatchChoiceInLine (line_vnp->data.ptrvalue, columns); + if (match_choice == NULL || StringHasNoText (match_choice->data.ptrvalue)) { + ReportEmptyIDColumn (&vnb, line_num); + num_empty++; + } else { + match_list = FindMatchForRowEx (match_type, match_choice->data.ptrvalue, entityID, sep, index); + if (match_list == NULL) { + err_msg = (CharPtr) MemNew (sizeof (Char) * (StringLen (no_match_fmt) + StringLen (match_choice->data.ptrvalue) + 15)); + sprintf (err_msg, no_match_fmt, match_choice->data.ptrvalue, line_num); + ValNodeAddPointerToEnd (&vnb, 0, err_msg); + num_missing ++; + } else { + for (val_vnp = line_vnp->data.ptrvalue, col_vnp = columns, col_num = 1; + col_vnp != NULL; + col_vnp = col_vnp->next, col_num++) { + target_list = NULL; + t = (TabColumnConfigPtr) col_vnp->data.ptrvalue; + if (t == NULL || t->match_type != NULL + || (t->skip_blank && (val_vnp == NULL || StringHasNoText (val_vnp->data.ptrvalue)))) { + /* no targets */ + } else { + target_list = GetTargetListForRowAndColumn (match_type, match_list, t->field, t->constraint); + if (target_list == NULL) { + ReportMissingTargets (&vnb, t->field, match_choice->data.ptrvalue, col_num, line_num); + num_no_targets++; + } + } + ValNodeAddPointer (&obj_row, 0, target_list); + if (val_vnp != NULL) { + val_vnp = val_vnp->next; + } + } + } + } + tmp = ValNodeAddPointer (&last_obj, 0, obj_row); + if (obj_table == NULL) { + obj_table = last_obj; + } + last_obj = tmp; } - return bsi; -} - - -static BioseqSearchItemPtr BioseqSearchItemNewInt (BioseqPtr bsp, Int4 num) -{ - BioseqSearchItemPtr bsi; - - bsi = (BioseqSearchItemPtr) MemNew (sizeof (BioseqSearchItemData)); - bsi->bsp = bsp; - bsi->num = num; - bsi->free_str = FALSE; - return bsi; -} + match_type = MatchTypeFree (match_type); + index = BioseqSearchIndexFree (index); -static BioseqSearchItemPtr BioseqSearchItemFree (BioseqSearchItemPtr bsi) -{ - if (bsi != NULL) { - if (bsi->free_str) { - bsi->str = MemFree (bsi->str); + if (vnb.head != NULL) { + if (num_empty > 0) { + err_vnp = ReportTableSummaryLine (num_empty, line_num - 1, "%d lines out of %d have no ID value"); + err_vnp->next = vnb.head; + vnb.head = err_vnp; } - bsi = MemFree (bsi); - } - return bsi; + if (num_no_targets > 0) { + err_vnp = ReportTableSummaryLine (num_no_targets, line_num - 1, "%d lines out of %d have no targets"); + err_vnp->next = vnb.head; + vnb.head = err_vnp; + } + if (num_missing > 0) { + err_vnp = ReportTableSummaryLine (num_missing, line_num - 1, "%d lines out of %d have no match"); + err_vnp->next = vnb.head; + vnb.head = err_vnp; + } + if (p_err_list == NULL) { + vnb.head = ValNodeFreeData (vnb.head); + } else { + *p_err_list = vnb.head; + } + } + return obj_table; } -static int CompareBioseqSearchItem (BioseqSearchItemPtr b1, BioseqSearchItemPtr b2) +NLM_EXTERN ValNodePtr FreeObjectTableForTabTable (ValNodePtr table) { - if (b1 == NULL && b2 == NULL) { - return 0; - } else if (b1 == NULL) { - return 1; - } else if (b2 == NULL) { - return -1; - } else if (b1->num > 0 && b2->num > 0) { - if (b1->num < b2->num) { - return -1; - } else if (b1->num == b2->num) { - return 0; - } else { - return 1; + ValNodePtr vnp_next, vnp_row, vnp_row_next; + + while (table != NULL) { + vnp_next = table->next; + table->next = NULL; + vnp_row = table->data.ptrvalue; + while (vnp_row != NULL) { + vnp_row_next = vnp_row->next; + vnp_row->next = NULL; + vnp_row->data.ptrvalue = ValNodeFree (vnp_row->data.ptrvalue); + vnp_row = ValNodeFree (vnp_row); + vnp_row = vnp_row_next; } - } else if (b1->num > 0) { - return 1; - } else if (b2->num > 0) { - return -1; - } else { - return StringICmp (b1->str, b2->str); + table = ValNodeFree (table); + table = vnp_next; } + return table; } -static int LIBCALLBACK SortVnpByBioseqSearchItem (VoidPtr ptr1, VoidPtr ptr2) +typedef struct countfeat { + Uint1 featdef; + Int4 num; +} CountFeatData, PNTR CountFeatPtr; + +static void CountFeaturesCallback (SeqFeatPtr sfp, Pointer userdata) { - ValNodePtr vnp1; - ValNodePtr vnp2; + CountFeatPtr p; - if (ptr1 == NULL || ptr2 == NULL) return 0; - vnp1 = *((ValNodePtr PNTR) ptr1); - vnp2 = *((ValNodePtr PNTR) ptr2); - if (vnp1 == NULL || vnp2 == NULL) return 0; + if (sfp == NULL || userdata == NULL) return; - return CompareBioseqSearchItem(vnp1->data.ptrvalue, vnp2->data.ptrvalue); + p = (CountFeatPtr) userdata; + if (sfp->idx.subtype == p->featdef) { + p->num++; + } } - -NLM_EXTERN void ValNodeAddPointerToEnd (ValNodeBlockPtr vnbp, Uint1 choice, Pointer data) +static void CountBioSourceDescriptorsCallback (SeqDescrPtr sdp, Pointer userdata) { - ValNodePtr vnp_new; + Int4Ptr p; - vnp_new = ValNodeAddPointer (&(vnbp->tail), choice, data); - if (vnbp->head == NULL) { - vnbp->head = vnp_new; + p = (Int4Ptr) userdata; + if (sdp != NULL && p != NULL && sdp->choice == Seq_descr_source) { + (*p)++; } - vnbp->tail = vnp_new; } -NLM_EXTERN void ValNodeAddPointerToFront (ValNodeBlockPtr vnbp, Uint1 choice, Pointer data) +static void CountPubDescriptorsCallback (SeqDescrPtr sdp, Pointer userdata) { - ValNodePtr vnp; + Int4Ptr p; - vnp = ValNodeNew (NULL); - vnp->choice = choice; - vnp->data.ptrvalue = data; - vnp->next = vnbp->head; - vnbp->head = vnp; + p = (Int4Ptr) userdata; + if (sdp != NULL && p != NULL && sdp->choice == Seq_descr_pub) { + (*p)++; + } } -static void BuildIDStringsListCallback (BioseqPtr bsp, Pointer data) +static ValNodePtr CountObjectsForColumnFields (SeqEntryPtr sep, ValNodePtr columns) { - SeqIdPtr sip, sip_next; - CharPtr id, cp; - DbtagPtr dbtag; - - for (sip = bsp->id; sip != NULL; sip = sip->next) { - sip_next = sip->next; - sip->next = NULL; - id = SeqIdWholeLabel (sip, PRINTID_FASTA_LONG); - sip->next = sip_next; - if (id != NULL) { - /* remove terminating pipe character */ - if (id[StringLen(id) - 1] == '|') - { - id[StringLen(id) - 1] = 0; - } - ValNodeAddPointerToEnd ((ValNodeBlockPtr)data, 0, BioseqSearchItemNewStr (bsp, id, TRUE)); + ValNodePtr count_list = NULL, vnp; + TabColumnConfigPtr t; + CountFeatData d; + FeatureFieldPtr f; + Int4 num; + Uint1 featdef = 0; + ValNodePtr tmp_list = NULL; - /* remove leading pipe identifier */ - cp = StringChr (id, '|'); - if (cp != NULL) - { - cp = cp + 1; - ValNodeAddPointerToEnd ((ValNodeBlockPtr)data, 0, BioseqSearchItemNewStr (bsp, cp, FALSE)); - } else { - cp = id; + d.featdef = 0; + d.num = 0; + for (vnp = columns; vnp != NULL; vnp = vnp->next) { + num = 0; + t = (TabColumnConfigPtr) vnp->data.ptrvalue; + if (t != NULL && t->match_type == NULL && t->field != NULL) { + switch (t->field->choice) { + case FieldType_source_qual: + if (featdef != FEATDEF_BIOSRC) { + d.featdef = FEATDEF_BIOSRC; + d.num = 0; + VisitFeaturesInSep (sep, &d, CountFeaturesCallback); + VisitDescriptorsInSep (sep, &(d.num), CountBioSourceDescriptorsCallback); + } + num = d.num; + break; + case FieldType_feature_field: + f = (FeatureFieldPtr) t->field->data.ptrvalue; + if (f != NULL) { + featdef = GetFeatdefFromFeatureType(f->type); + if (featdef != d.featdef) { + d.featdef = featdef; + d.num = 0; + VisitFeaturesInSep (sep, &d, CountFeaturesCallback); + } + num = d.num; + } + break; + case FieldType_cds_gene_prot: + f = FeatureFieldFromCDSGeneProtField (t->field->data.intvalue); + if (f != NULL) { + featdef = GetFeatdefFromFeatureType(f->type); + if (featdef != d.featdef) { + d.featdef = featdef; + d.num = 0; + VisitFeaturesInSep (sep, &d, CountFeaturesCallback); + } + num = d.num; + } + f = FeatureFieldFree (f); + break; + case FieldType_rna_field: + f = FeatureFieldFromRnaQual (t->field->data.ptrvalue); + if (f != NULL) { + featdef = GetFeatdefFromFeatureType(f->type); + if (featdef != d.featdef) { + d.featdef = featdef; + d.num = 0; + VisitFeaturesInSep (sep, &d, CountFeaturesCallback); + } + num = d.num; + } + f = FeatureFieldFree (f); + break; + case FieldType_pub: + d.featdef = FEATDEF_PUB; + d.num = 0; + VisitFeaturesInSep (sep, &d, CountFeaturesCallback); + VisitDescriptorsInSep (sep, &(d.num), CountPubDescriptorsCallback); + num = d.num; + break; + case FieldType_struc_comment_field: + VisitDescriptorsInSep (sep, &tmp_list, CollectStructuredCommentsCallback); + num = ValNodeLen (tmp_list); + tmp_list = ValNodeFree (tmp_list); + break; + case FieldType_dblink: + VisitDescriptorsInSep (sep, &tmp_list, CollectDBLinksCallback); + num = ValNodeLen (tmp_list); + tmp_list = ValNodeFree (tmp_list); + break; + case FieldType_misc: + if (t->field->data.intvalue == Misc_field_genome_project_id) { + /* VisitBioseqsInSep (sep, &tmp_list, CollectNucBioseqCallback); */ + tmp_list = CollectNucBioseqs (sep); + num = ValNodeLen (tmp_list); + tmp_list = ValNodeFree (tmp_list); + } else if (t->field->data.intvalue == Misc_field_comment_descriptor) { + tmp_list = CollectCommentDescriptors (sep); + num = ValNodeLen (tmp_list); + tmp_list = ValNodeFree (tmp_list); + } else if (t->field->data.intvalue == Misc_field_defline) { + tmp_list = CollectDeflineDescriptors (sep); + num = ValNodeLen (tmp_list); + tmp_list = ValNodeFree (tmp_list); + } else if (t->field->data.intvalue == Misc_field_keyword) { + tmp_list = CollectGenbankBlockDescriptors (sep); + num = ValNodeLen (tmp_list); + tmp_list = ValNodeFree (tmp_list); + } + break; + case FieldType_molinfo_field: + VisitBioseqsInSep (sep, &tmp_list, CollectBioseqCallback); + num = ValNodeLen (tmp_list); + tmp_list = ValNodeFree (tmp_list); + break; } + } + ValNodeAddInt (&count_list, 0, num); + } + return count_list; +} - /* try ID without version */ - id = StringSave (cp); - cp = StringChr (id, '.'); - if (cp != NULL) - { - *cp = 0; - ValNodeAddPointerToEnd ((ValNodeBlockPtr)data, 0, BioseqSearchItemNewStr (bsp, id, TRUE)); - } else { - id = MemFree (id); - } +NLM_EXTERN ValNodePtr ApplyTableValuesToObjectTable (SeqEntryPtr sep, ValNodePtr table, ValNodePtr columns, ValNodePtr obj_table) +{ + ValNodePtr val_line_vnp, obj_line_vnp; + ValNodePtr val_vnp, obj_vnp, col_vnp; + ValNodePtr target_vnp, tmp_field; + TabColumnConfigPtr t; + CharPtr val, qual_name; + ValNodePtr err_list = NULL, count_list, count_affected_list = NULL, count_vnp, count_tot_vnp; + CharPtr err_msg; + CharPtr bad_col_val_fmt = "Did not set value for column %d, line %d"; + CharPtr num_affected_fmt = "%d fields affected"; + CharPtr col_num_affected_fmt = "For %s (column %d), %d items were affected out of %d total"; + Int4 num_fields_affected = 0, col_num, line_num, num_this_column; + Boolean success; + ValNodePtr count_msg = NULL; - /* just bankit number */ - if (sip->choice == SEQID_GENERAL - && (dbtag = (DbtagPtr) sip->data.ptrvalue) != NULL) { - if (StringCmp (dbtag->db, "BankIt") == 0) { - if (dbtag->tag->id > 0) { - id = (CharPtr) MemNew (sizeof (Char) * 22); - sprintf (id, "BankIt%d", dbtag->tag->id); - ValNodeAddPointerToEnd ((ValNodeBlockPtr)data, 0, BioseqSearchItemNewStr (bsp, id, TRUE)); - ValNodeAddPointerToEnd ((ValNodeBlockPtr)data, 0, BioseqSearchItemNewInt (bsp, dbtag->tag->id)); + count_list = CountObjectsForColumnFields (sep, columns); + + for (val_line_vnp = table, obj_line_vnp = obj_table, line_num = 1; + val_line_vnp != NULL && obj_line_vnp != NULL; + val_line_vnp = val_line_vnp->next, obj_line_vnp = obj_line_vnp->next, line_num++) { + val_vnp = val_line_vnp->data.ptrvalue; + obj_vnp = obj_line_vnp->data.ptrvalue; + col_vnp = columns; + col_num = 1; + count_vnp = count_affected_list; + while (obj_vnp != NULL && col_vnp != NULL) { + num_this_column = 0; + if (obj_vnp->data.ptrvalue != NULL) { + t = (TabColumnConfigPtr) col_vnp->data.ptrvalue; + if (t == NULL || t->match_type != NULL + || (t->skip_blank && (val_vnp == NULL || StringHasNoText (val_vnp->data.ptrvalue)))) { + /* ignore column or skip blank value */ + } else { + if (val_vnp == NULL || val_vnp->data.ptrvalue == NULL) { + val = ""; } else { - id = (CharPtr) MemNew (sizeof (Char) * (8 + StringLen (dbtag->tag->str))); - sprintf (id, "BankIt%s", dbtag->tag->str); - ValNodeAddPointerToEnd ((ValNodeBlockPtr)data, 0, BioseqSearchItemNewStr (bsp, id, TRUE)); - ValNodeAddPointerToEnd ((ValNodeBlockPtr)data, 0, BioseqSearchItemNewStr (bsp, dbtag->tag->str, FALSE)); + val = val_vnp->data.ptrvalue; } - } else if (StringCmp (dbtag->db, "NCBIFILE") == 0 && dbtag->tag != NULL) { - ValNodeAddPointerToEnd ((ValNodeBlockPtr)data, 0, BioseqSearchItemNewStr (bsp, dbtag->tag->str, FALSE)); - if ((cp = StringRChr (dbtag->tag->str, '/')) != NULL) { - ValNodeAddPointerToEnd ((ValNodeBlockPtr)data, 0, BioseqSearchItemNewStr (bsp, cp + 1, FALSE)); + for (target_vnp = obj_vnp->data.ptrvalue; target_vnp != NULL; target_vnp = target_vnp->next) { + if (val[0] == 0) { + success = RemoveFieldValueForObject (target_vnp->choice, target_vnp->data.ptrvalue, t->field, NULL); + } else { + if (t->field != NULL && t->field->choice == FieldType_molinfo_field) { + success = FALSE; + if (target_vnp->choice == OBJ_BIOSEQ) { + tmp_field = MolinfoFieldFromFieldAndStringValue (t->field->data.ptrvalue, val_vnp->data.ptrvalue); + if (tmp_field != NULL) { + success = SetSequenceQualOnBioseq ((BioseqPtr) target_vnp->data.ptrvalue, tmp_field); + tmp_field = MolinfoFieldFree(tmp_field); + } + } + } else { + success = SetFieldValueForObject (target_vnp->choice, target_vnp->data.ptrvalue, t->field, NULL, + val_vnp->data.ptrvalue, t->existing_text); + } + } + if (success) { + num_fields_affected++; + num_this_column++; + if (t->match_mrna && IsFieldTypeCDSProduct (t->field) + && target_vnp->choice == OBJ_SEQFEAT) { + if (AdjustmRNAProductToMatchProteinProduct (target_vnp->data.ptrvalue)) { + num_fields_affected++; + } + } + } else { + err_msg = (CharPtr) MemNew (sizeof (Char) * (StringLen (bad_col_val_fmt) + 30)); + sprintf (err_msg, bad_col_val_fmt, col_num, line_num); + ValNodeAddPointer (&err_list, 0, err_msg); + } } } } + if (val_vnp != NULL) { + val_vnp = val_vnp->next; + } + if (count_vnp == NULL) { + ValNodeAddInt (&count_affected_list, 0, num_this_column); + } else { + count_vnp->data.intvalue += num_this_column; + count_vnp = count_vnp->next; + } + obj_vnp = obj_vnp->next; + col_vnp = col_vnp->next; + col_num++; } } -} - - -/* first are str, second are int */ -typedef struct bioseqsearchindex { - Int4 num_str; - Int4 num_int; - Int4 num_total; - BioseqSearchItemPtr PNTR items; -} BioseqSearchIndexData, PNTR BioseqSearchIndexPtr; - -static BioseqSearchIndexPtr BioseqSearchIndexFree (BioseqSearchIndexPtr index) -{ - Int4 i; + /* put message at top of list for number of fields affected */ + err_msg = (CharPtr) MemNew (sizeof (Char) * (StringLen (num_affected_fmt) + 15)); + sprintf (err_msg, num_affected_fmt, num_fields_affected); + ValNodeAddPointer (&count_msg, 0, err_msg); - if (index != NULL) { - for (i = 0; i < index->num_total; i++) { - index->items[i] = BioseqSearchItemFree(index->items[i]); + /* if any affected, list number of fields per column, and the total in the record */ + if (num_fields_affected > 0) { + for (count_vnp = count_affected_list, count_tot_vnp = count_list, col_vnp = columns, col_num = 1; + count_vnp != NULL && count_tot_vnp != NULL && col_vnp != NULL; + count_vnp = count_vnp->next, count_tot_vnp = count_tot_vnp->next, col_vnp = col_vnp->next, col_num++) { + t = (TabColumnConfigPtr) col_vnp->data.ptrvalue; + if (t != NULL && t->match_type == NULL) { + qual_name = SummarizeFieldType (t->field); + err_msg = (CharPtr) MemNew (sizeof (Char) * (StringLen (col_num_affected_fmt) + StringLen (qual_name) + 45)); + sprintf (err_msg, col_num_affected_fmt, qual_name, col_num, count_vnp->data.intvalue, count_tot_vnp->data.intvalue); + ValNodeAddPointer (&count_msg, 0, err_msg); + qual_name = MemFree (qual_name); + } } - index->items = MemFree (index->items); - index = MemFree (index); } - return index; -} + ValNodeLink (&count_msg, err_list); -static BioseqSearchIndexPtr BuildIDStringsList (SeqEntryPtr sep) -{ - ValNodeBlock vnb; - ValNodePtr list = NULL, vnp; - Int4 num_total, i; - BioseqSearchIndexPtr index; + count_list = ValNodeFree (count_list); + count_affected_list = ValNodeFree (count_affected_list); - vnb.head = NULL; - vnb.tail = NULL; + return count_msg; +} - VisitBioseqsInSep (sep, &vnb, BuildIDStringsListCallback); - list = vnb.head; - list = ValNodeSort (list, SortVnpByBioseqSearchItem); - num_total = ValNodeLen (list); +static int LIBCALLBACK SortVnpByChoiceAndPtrvalue (VoidPtr ptr1, VoidPtr ptr2) - index = (BioseqSearchIndexPtr) MemNew (sizeof (BioseqSearchIndexData)); - index->items = (BioseqSearchItemPtr PNTR) MemNew (sizeof (BioseqSearchItemPtr) * num_total); - for (vnp = list, i = 0; vnp != NULL && i < num_total; vnp = vnp->next, i++) { - index->items[i] = vnp->data.ptrvalue; - vnp->data.ptrvalue = NULL; - if (index->items[i]->num > 0) { - index->num_int++; - } else { - index->num_str++; +{ + ValNodePtr vnp1; + ValNodePtr vnp2; + + if (ptr1 != NULL && ptr2 != NULL) { + vnp1 = *((ValNodePtr PNTR) ptr1); + vnp2 = *((ValNodePtr PNTR) ptr2); + if (vnp1 != NULL && vnp2 != NULL) { + if (vnp1->choice > vnp2->choice) { + return 1; + } else if (vnp1->choice < vnp2->choice) { + return -1; + } else if (vnp1->data.ptrvalue > vnp2->data.ptrvalue) { + return 1; + } else if (vnp1->data.ptrvalue < vnp2->data.ptrvalue) { + return -1; + } else { + return 0; + } } } - index->num_total = index->num_int + index->num_str; - list = ValNodeFree (list); - - return index; + return 0; } - -static BioseqPtr FindStringInIdListIndex (CharPtr str, BioseqSearchIndexPtr index) +static ValNodePtr FindRowsForObjectInObjectTable (ValNodePtr obj_table, Int4 column, Uint1 choice, Pointer data) { - CharPtr tmp; - Int4 match, imax, imin, i, j; - Int4 num = -1; + Int4 col_num, row_num; + ValNodePtr line_vnp, col_vnp, obj_vnp; + ValNodePtr match_rows = NULL; - if (index == NULL) { + if (obj_table == NULL || column < 0) { return NULL; } - if (IsAllDigits (str)) { - match = atoi (str); - imax = index->num_total - 1; - imin = index->num_str; - while (imax >= imin) - { - i = (imax + imin)/2; - if (index->items[i]->num < match) - imax = i - 1; - else if (index->items[i]->num > match) - imin = i + 1; - else - { - num = i; - break; - } - } - } else { - imax = index->num_str - 1; - imin = 0; - while (imax >= imin) - { - i = (imax + imin)/2; - tmp = index->items[i]->str; - if ((j = StringICmp(tmp, str)) > 0) - imax = i - 1; - else if (j < 0) - imin = i + 1; - else - { - num = i; - break; - } + for (line_vnp = obj_table, row_num = 0; line_vnp != NULL; line_vnp = line_vnp->next, row_num++) { + col_vnp = line_vnp->data.ptrvalue; + col_num = 0; + while (col_num < column && col_vnp != NULL) { + col_vnp = col_vnp->next; + col_num++; + } + if (col_vnp != NULL) { + obj_vnp = col_vnp->data.ptrvalue; + while (obj_vnp != NULL && (obj_vnp->choice != choice || obj_vnp->data.ptrvalue != data)) { + obj_vnp = obj_vnp->next; + } + if (obj_vnp != NULL) { + ValNodeAddInt (&match_rows, 0, row_num); + } } } - - if (num > -1) { - return index->items[num]->bsp; - } else { - return NULL; - } + return match_rows; } -static ValNodePtr FindListInIdListIndex (Uint1 match_location, CharPtr match_str, BioseqSearchIndexPtr index) +static CharPtr FormatMultipleDestinationErrorMessage (Int4 col_num, ValNodePtr match_rows) { - Int4 i; - ValNodePtr list = NULL; - StringConstraintPtr scp; + CharPtr multi_fmt = "Multiple rows apply to the same object for column %d. Matching rows:"; + CharPtr err_msg; + Char buf[16]; + ValNodePtr vnp; - if (StringHasNoText (match_str) || index == NULL) { - return NULL; - } - scp = StringConstraintNew (); - scp->match_text = StringSave (match_str); - scp->match_location = match_location; - for (i = 0; i < index->num_str; i++) { - if (DoesStringMatchConstraint (index->items[i]->str, scp)) { - ValNodeAddPointer (&list, OBJ_BIOSEQ, index->items[i]->bsp); + err_msg = (CharPtr) MemNew (sizeof (Char) * (StringLen (multi_fmt) + + 30 + 15 * ValNodeLen (match_rows))); + sprintf (err_msg, multi_fmt, col_num); + for (vnp = match_rows; vnp != NULL; vnp = vnp->next) { + sprintf (buf, "%d", vnp->data.intvalue + 1); + StringCat (err_msg, buf); + if (vnp->next != NULL) { + StringCat (err_msg, ","); } } - scp = StringConstraintFree (scp); - - list = ValNodeSort (list, SortVnpByChoiceAndPtrvalue); - ValNodeUnique (&list, SortVnpByChoiceAndPtrvalue, ValNodeFree); - return list; + return err_msg; } -static ValNodePtr -FindMatchForRowEx -(MatchTypePtr match_type, - CharPtr match_str, - Uint2 entityID, - SeqEntryPtr sep, - BioseqSearchIndexPtr index - ) +NLM_EXTERN ValNodePtr CheckObjTableForRowsThatApplyToTheSameDestination (ValNodePtr obj_table) { - ValNodePtr match_list = NULL; - FindGeneLocusTagData fd; - SeqFeatPtr sfp; - SeqMgrFeatContext fcontext; - BioseqPtr bsp; + Int4 col_num; + ValNodeBlock vnb, err_list; + ValNodePtr line_vnp, col_vnp, obj_vnp, vnp; + ValNodePtr col_obj_list; + Boolean any_column_values_left; + ValNodePtr match_rows; - if (match_type == NULL || sep == NULL) return NULL; + vnb.head = NULL; + vnb.tail = NULL; + err_list.head = NULL; + err_list.tail = NULL; - switch (match_type->choice) { - case eTableMatchFeatureID: - sfp = SeqMgrGetFeatureByFeatID (entityID, NULL, match_str, NULL, &fcontext); - if (sfp != NULL) { - ValNodeAddPointer (&match_list, OBJ_SEQFEAT, sfp); + /* now, for each row, get pointer to first column */ + for (line_vnp = obj_table; line_vnp != NULL; line_vnp = line_vnp->next) { + ValNodeAddPointerToEnd (&vnb, 0, line_vnp->data.ptrvalue); + } + + /* now for each column, make a list of all features in the column, then sort to see if there are duplicates */ + any_column_values_left = TRUE; + col_num = 1; + while (any_column_values_left) { + any_column_values_left = FALSE; + col_obj_list = NULL; + for (vnp = vnb.head; vnp != NULL; vnp = vnp->next) { + col_vnp = vnp->data.ptrvalue; + if (col_vnp != NULL) { + obj_vnp = col_vnp->data.ptrvalue; + ValNodeLink (&col_obj_list, ValNodeCopyPtr (obj_vnp)); + vnp->data.ptrvalue = col_vnp->next; + any_column_values_left = TRUE; } - break; - case eTableMatchGeneLocusTag: - fd.locus_tag = match_str; - fd.gene_list = NULL; - VisitBioseqsInSep (sep, &fd, FindGeneByLocusTagBioseqCallback); - ValNodeLink (&match_list, fd.gene_list); - break; - case eTableMatchProteinID: - case eTableMatchNucID: - if (match_type->match_location == String_location_equals && index != NULL) { - bsp = FindStringInIdListIndex (match_str, index); - if (bsp != NULL) { - ValNodeAddPointer (&match_list, OBJ_BIOSEQ, bsp); + } + if (col_obj_list != NULL) { + col_obj_list = ValNodeSort (col_obj_list, SortVnpByChoiceAndPtrvalue); + for (vnp = col_obj_list; vnp != NULL && vnp->next != NULL; vnp = vnp->next) { + if (vnp->choice == vnp->next->choice + && vnp->data.ptrvalue == vnp->next->data.ptrvalue) { + match_rows = FindRowsForObjectInObjectTable (obj_table, col_num - 1, vnp->choice, vnp->data.ptrvalue); + /* report rows with matches */ + ValNodeAddPointerToEnd (&err_list, col_num, FormatMultipleDestinationErrorMessage (col_num, match_rows)); + match_rows = ValNodeFree (match_rows); + /* skip over the cluster of matches */ + while (vnp->next != NULL && vnp->choice == vnp->next->choice) { + vnp = vnp->next; + } } - } else if (index != NULL) { - ValNodeLink (&match_list, FindListInIdListIndex (match_type->match_location, match_str, index)); - } else { - ValNodeLink (&match_list, FindBioseqsByMatchType (sep, match_type->match_location, match_str)); } - break; - case eTableMatchDbxref: - match_list = GetFeaturesByDbxref (sep, match_str, match_type->match_location); - break; - case eTableMatchBioSource: - match_list = GetBioSourcesByTaxName (sep, match_str, match_type->match_location); - break; - case eTableMatchSourceQual: - match_list = GetBioSourcesBySourceQual (sep, match_type->data, match_str, match_type->match_location); - break; + col_obj_list = ValNodeFree (col_obj_list); + } + col_num++; } - return match_list; + vnb.head = ValNodeFree (vnb.head); + return err_list.head; } -static ValNodePtr -FindMatchForRow -(MatchTypePtr match_type, - CharPtr match_str, - Uint2 entityID, - SeqEntryPtr sep - ) +static CharPtr GetMatchTextForLine (ValNodePtr values, ValNodePtr columns) { - return FindMatchForRowEx (match_type, match_str, entityID, sep, NULL); + ValNodePtr val_vnp, col_vnp; + CharPtr match_txt = NULL; + TabColumnConfigPtr t; + + for (val_vnp = values, col_vnp = columns; + val_vnp != NULL && col_vnp != NULL; + val_vnp = val_vnp->next, col_vnp = col_vnp->next) { + t = col_vnp->data.ptrvalue; + if (t != NULL && t->match_type != NULL) { + match_txt = val_vnp->data.ptrvalue; + break; + } + } + return match_txt; } -static ValNodePtr GetFeatureListForProteinBioseq (Uint1 featdef, BioseqPtr bsp) +/* Note - when creating error messages, mark summary messages with choice = 1 */ +NLM_EXTERN ValNodePtr CheckObjTableForExistingText (SeqEntryPtr sep, ValNodePtr table, ValNodePtr columns, ValNodePtr obj_table) { - ValNodePtr feat_list = NULL; - SeqFeatPtr sfp, cds; - SeqMgrFeatContext fcontext; - Int4 seqfeattype; + ValNodeBlock vnb; + ValNodePtr val_line_vnp, obj_line_vnp; + ValNodePtr val_vnp, obj_vnp, col_vnp; + ValNodePtr col_tot = NULL, col_tot_vnp; + Int4 line_num = 1, col_num, num_existing_text = 0; + Uint2 entityID; + TabColumnConfigPtr t; + CharPtr err_msg, str, qual_name, val; + CharPtr already_has_val_fmt = "%s\t%s\t%s\t%d\t%s\t%d"; + CharPtr num_existing_text_fmt = "%d fields already have text.\nID\tOld Value\tReplacement\tColumn\tQualifier\tLine"; + CharPtr mrna_warn_fmt = "%d coding region features have mRNAs, but %d do not."; + CharPtr col_tot_fmt = "For column %d, %d out of %d fields already have text."; + ValNodePtr target_list, feat_vnp; + Int4 num_with_mrna = 0, num_without_mrna = 0; + CharPtr match_txt; + CharPtr new_val; - if (bsp == NULL || !ISA_aa (bsp->mol)) - { - return NULL; + vnb.head = NULL; + vnb.tail = NULL; + + if (sep == NULL) { + ValNodeAddPointerToEnd (&vnb, 1, StringSave ("No SeqEntry")); + } + if (table == NULL) { + ValNodeAddPointerToEnd (&vnb, 1, StringSave ("No table")); + } + if (columns == NULL) { + ValNodeAddPointerToEnd (&vnb, 1, StringSave ("No column information")); + } + if (vnb.head != NULL) { + return vnb.head; } - seqfeattype = FindFeatFromFeatDefType (featdef); - if (seqfeattype == SEQFEAT_PROT) - { - for (sfp = SeqMgrGetNextFeature (bsp, NULL, 0, featdef, &fcontext); - sfp != NULL; - sfp = SeqMgrGetNextFeature (bsp, sfp, 0, featdef, &fcontext)) - { - ValNodeAddPointer (&feat_list, OBJ_SEQFEAT, sfp); + entityID = SeqMgrGetEntityIDForSeqEntry (sep); + + for (val_line_vnp = table, obj_line_vnp = obj_table, line_num = 1; + val_line_vnp != NULL && obj_line_vnp != NULL; + val_line_vnp = val_line_vnp->next, obj_line_vnp = obj_line_vnp->next, line_num++) { + val_vnp = val_line_vnp->data.ptrvalue; + obj_vnp = obj_line_vnp->data.ptrvalue; + col_vnp = columns; + if (val_vnp == NULL || obj_vnp == NULL) continue; + col_num = 1; + col_tot_vnp = col_tot; + if (col_tot_vnp == NULL) { + col_tot_vnp = ValNodeAddInt (&col_tot, 0, 0); } - } - else - { - cds = SeqMgrGetCDSgivenProduct (bsp, NULL); - if (cds != NULL) - { - if (featdef == FEATDEF_CDS) - { - sfp = cds; + while (obj_vnp != NULL && col_vnp != NULL) { + if (obj_vnp->data.ptrvalue != NULL) { + t = (TabColumnConfigPtr) col_vnp->data.ptrvalue; + if (t == NULL || t->match_type != NULL + || (t->skip_blank && (val_vnp == NULL || StringHasNoText (val_vnp->data.ptrvalue)))) { + /* ignore column or skip blank value */ + } else { + target_list = obj_vnp->data.ptrvalue; + if (val_vnp == NULL || val_vnp->data.ptrvalue == NULL) { + val = ""; + } else { + val = val_vnp->data.ptrvalue; + } + for (feat_vnp = target_list; feat_vnp != NULL; feat_vnp = feat_vnp->next) { + /* check for existing text */ + str = GetFieldValueForObject (feat_vnp->choice, feat_vnp->data.ptrvalue, t->field, NULL); + if (!StringHasNoText (str)) { + qual_name = SummarizeFieldType (t->field); + match_txt = GetMatchTextForLine (val_line_vnp->data.ptrvalue, columns); + if (match_txt == NULL) { + match_txt = ""; + } + new_val = StringSave (str); + SetStringValue (&new_val, val, t->existing_text); + err_msg = (CharPtr) MemNew (sizeof (Char) * (StringLen (already_has_val_fmt) + + StringLen (match_txt) + + StringLen (str) + + StringLen (new_val) + + StringLen (qual_name) + + 30)); + sprintf (err_msg, already_has_val_fmt, match_txt, str, new_val, col_num, qual_name, line_num); + ValNodeAddPointerToEnd (&vnb, 0, err_msg); + num_existing_text ++; + new_val = MemFree (new_val); + col_tot_vnp->data.intvalue ++; + } + str = MemFree (str); + /* check for mrna if changing CDS product */ + if (IsFieldTypeCDSProduct (t->field) && feat_vnp->choice == OBJ_SEQFEAT) { + if (GetmRNAForFeature (feat_vnp->data.ptrvalue) != NULL) { + num_with_mrna++; + } else { + num_without_mrna++; + } + } + } + } } - else if (featdef == FEATDEF_GENE) - { - sfp = GetGeneForFeature (cds); + if (val_vnp != NULL) { + val_vnp = val_vnp->next; } - else if (featdef == FEATDEF_mRNA) - { - sfp = SeqMgrGetOverlappingmRNA (cds->location, &fcontext); + obj_vnp = obj_vnp->next; + col_vnp = col_vnp->next; + col_num++; + col_tot_vnp = col_tot_vnp->next; + if (col_tot_vnp == NULL) { + col_tot_vnp = ValNodeAddInt (&col_tot, 0, 0); } - if (sfp != NULL) - { - ValNodeAddPointer (&feat_list, OBJ_SEQFEAT, sfp); + } + } + if (num_existing_text > 0) { + for (col_tot_vnp = col_tot, col_num = 1; col_tot_vnp != NULL; col_tot_vnp = col_tot_vnp->next, col_num++) { + if (col_tot_vnp->data.intvalue > 0) { + err_msg = (CharPtr) MemNew (sizeof (Char) * (StringLen (col_tot_fmt) + 45)); + sprintf (err_msg, col_tot_fmt, col_num, col_tot_vnp->data.intvalue, line_num - 1); + ValNodeAddPointerToEnd (&vnb, 1, err_msg); } } - } - return feat_list; -} - - -static ValNodePtr GetFeatureListForNucleotideBioseq (Uint1 featdef, BioseqPtr bsp) -{ - ValNodePtr feat_list = NULL; - SeqFeatPtr sfp; - SeqMgrFeatContext fcontext; - Int4 seqfeattype; - BioseqPtr prot_bsp; - if (bsp == NULL || ISA_aa (bsp->mol)) - { - return NULL; + err_msg = (CharPtr) MemNew (sizeof (Char) * (StringLen (num_existing_text_fmt) + + 15)); + sprintf (err_msg, num_existing_text_fmt, num_existing_text); + ValNodeAddPointerToFront (&vnb, 0, err_msg); } + col_tot = ValNodeFree (col_tot); + if (num_with_mrna > 0 && num_without_mrna > 0) { + err_msg = (CharPtr) MemNew (sizeof (Char) * (StringLen (mrna_warn_fmt) + + 30)); + sprintf (err_msg, mrna_warn_fmt, num_with_mrna, num_without_mrna); + ValNodeAddPointerToFront (&vnb, 1, err_msg); + } - seqfeattype = FindFeatFromFeatDefType (featdef); - if (seqfeattype == SEQFEAT_PROT) - { - for (sfp = SeqMgrGetNextFeature (bsp, NULL, 0, FEATDEF_CDS, &fcontext); - sfp != NULL; - sfp = SeqMgrGetNextFeature (bsp, sfp, 0, FEATDEF_CDS, &fcontext)) - { - prot_bsp = BioseqFindFromSeqLoc (sfp->product); - ValNodeLink (&feat_list, GetFeatureListForProteinBioseq (featdef, prot_bsp)); - } - } - else - { - for (sfp = SeqMgrGetNextFeature (bsp, NULL, 0, featdef, &fcontext); - sfp != NULL; - sfp = SeqMgrGetNextFeature (bsp, sfp, 0, featdef, &fcontext)) - { - ValNodeAddPointer (&feat_list, OBJ_SEQFEAT, sfp); - } - } - return feat_list; + return vnb.head; } -static ValNodePtr GetFeaturesForGene (SeqFeatPtr gene, Uint1 featdef) +NLM_EXTERN ValNodePtr ApplyTableToFeatures (SeqEntryPtr sep, ValNodePtr table, ValNodePtr columns) { - BioseqPtr bsp; - SeqFeatPtr sfp; - ValNodePtr feat_list = NULL; - SeqMgrFeatContext fcontext; - Int4 start, stop, swap; - - if (gene == NULL) return NULL; - - bsp = BioseqFindFromSeqLoc (gene->location); - start = SeqLocStart (gene->location); - stop = SeqLocStop (gene->location); - if (stop < start) - { - swap = start; - start = stop; - stop = swap; - } - for (sfp = SeqMgrGetNextFeature (bsp, NULL, 0, featdef, &fcontext); - sfp != NULL && fcontext.left < stop; - sfp = SeqMgrGetNextFeature (bsp, sfp, 0, featdef, &fcontext)) - { - if (fcontext.right >= start && gene == GetGeneForFeature (sfp)) - { - ValNodeAddPointer (&feat_list, OBJ_SEQFEAT, sfp); - } - } - return feat_list; -} - + ValNodeBlock vnb; + ValNodePtr line_vnp, val_vnp, col_vnp; + Int4 line_num = 1, col_num; + Uint2 entityID; + ValNodePtr match_list, match_choice, target_list, feat_vnp; + TabColumnConfigPtr t; + CharPtr err_msg; + CharPtr no_match_fmt = "No match for %s, line %d"; + CharPtr bad_col_val_fmt = "Did not set value for column %d, line %d"; + CharPtr num_affected_fmt = "%d fields affected"; + Int4 num_fields_affected = 0; + CharPtr val; + Boolean success; + MatchTypePtr match_type; -static ValNodePtr GetFeatureListForGene (Uint1 featdef, SeqFeatPtr gene) -{ - ValNodePtr feat_list = NULL, cds_list, vnp; - SeqFeatPtr sfp, cds; - SeqMgrFeatContext fcontext; - BioseqPtr protbsp; + vnb.head = NULL; + vnb.tail = NULL; - if (gene == NULL) - { - return NULL; + if (sep == NULL) { + ValNodeAddPointerToEnd (&vnb, 0, StringSave ("No SeqEntry")); } - - if (featdef == FEATDEF_GENE) - { - ValNodeAddPointer (&feat_list, OBJ_SEQFEAT, gene); + if (table == NULL) { + ValNodeAddPointerToEnd (&vnb, 0, StringSave ("No table")); } - else if (FindFeatFromFeatDefType (featdef == SEQFEAT_PROT)) - { - cds_list = GetFeaturesForGene (gene, FEATDEF_CDS); - for (vnp = cds_list; vnp != NULL; vnp = vnp->next) - { - cds = vnp->data.ptrvalue; - if (cds != NULL) - { - protbsp = BioseqFindFromSeqLoc (cds->product); - for (sfp = SeqMgrGetNextFeature (protbsp, NULL, 0, featdef, &fcontext); - sfp != NULL; - sfp = SeqMgrGetNextFeature (protbsp, sfp, 0, featdef, &fcontext)) - { - ValNodeAddPointer (&feat_list, OBJ_SEQFEAT, sfp); - } - } - } - cds_list = ValNodeFree (cds_list); + if (columns == NULL) { + ValNodeAddPointerToEnd (&vnb, 0, StringSave ("No column information")); } - else - { - feat_list = GetFeaturesForGene (gene, featdef); + if (vnb.head != NULL) { + return vnb.head; } - return feat_list; -} - - -static ValNodePtr AddFeaturesFromBioseqSet (BioseqSetPtr bssp, Uint1 featdef) -{ - SeqEntryPtr sep; - BioseqPtr bsp; - Int4 seqfeattype; - ValNodePtr item_list = NULL; + match_type = FindMatchTypeInHeader (columns); - if (bssp == NULL) return NULL; + entityID = SeqMgrGetEntityIDForSeqEntry (sep); - seqfeattype = FindFeatFromFeatDefType (featdef); - for (sep = bssp->seq_set; sep != NULL; sep = sep->next) { - if (sep->data.ptrvalue == NULL) continue; - if (IS_Bioseq (sep)) { - bsp = sep->data.ptrvalue; - if (seqfeattype == SEQFEAT_PROT) { - if (ISA_aa (bsp->mol)) { - ValNodeLink (&item_list, GetFeatureListForProteinBioseq (featdef, bsp)); + for (line_vnp = table, line_num = 1; line_vnp != NULL; line_vnp = line_vnp->next, line_num++) { + match_choice = FindMatchChoiceInLine (line_vnp->data.ptrvalue, columns); + if (match_choice == NULL || StringHasNoText (match_choice->data.ptrvalue)) { + ReportEmptyIDColumn (&vnb, line_num); + } else { + match_list = FindMatchForRow (match_type, match_choice->data.ptrvalue, entityID, sep); + if (match_list == NULL) { + err_msg = (CharPtr) MemNew (sizeof (Char) * (StringLen (no_match_fmt) + StringLen (match_choice->data.ptrvalue) + 15)); + sprintf (err_msg, no_match_fmt, match_choice->data.ptrvalue, line_num); + ValNodeAddPointerToEnd (&vnb, 0, err_msg); + } else { + for (val_vnp = line_vnp->data.ptrvalue, col_vnp = columns, col_num = 1; + col_vnp != NULL; + col_vnp = col_vnp->next, col_num++) { + t = (TabColumnConfigPtr) col_vnp->data.ptrvalue; + if (t == NULL || t->match_type != NULL + || (t->skip_blank && (val_vnp == NULL || StringHasNoText (val_vnp->data.ptrvalue)))) { + if (val_vnp != NULL) { + val_vnp = val_vnp->next; + } + continue; + } + + target_list = GetTargetListForRowAndColumn (match_type, match_list, t->field, t->constraint); + if (target_list == NULL) { + ReportMissingTargets (&vnb, t->field, match_choice->data.ptrvalue, col_num, line_num); + } else { + if (val_vnp == NULL || val_vnp->data.ptrvalue == NULL) { + val = ""; + } else { + val = val_vnp->data.ptrvalue; + } + for (feat_vnp = target_list; feat_vnp != NULL; feat_vnp = feat_vnp->next) { + if (val[0] == 0) { + success = RemoveFieldValueForObject (feat_vnp->choice, feat_vnp->data.ptrvalue, t->field, NULL); + } else { + success = SetFieldValueForObject (feat_vnp->choice, feat_vnp->data.ptrvalue, t->field, NULL, + val_vnp->data.ptrvalue, t->existing_text); + } + if (success) { + num_fields_affected++; + if (t->match_mrna && IsFieldTypeCDSProduct (t->field) + && feat_vnp->choice == OBJ_SEQFEAT) { + if (AdjustmRNAProductToMatchProteinProduct (feat_vnp->data.ptrvalue)) { + num_fields_affected++; + } + } + } else { + err_msg = (CharPtr) MemNew (sizeof (Char) * (StringLen (bad_col_val_fmt) + 30)); + sprintf (err_msg, bad_col_val_fmt, col_num, line_num); + ValNodeAddPointerToEnd (&vnb, 0, err_msg); + } + } + } + target_list = ValNodeFree (target_list); + if (val_vnp != NULL) { + val_vnp = val_vnp->next; + } } - } else if (!ISA_aa (bsp->mol)) { - ValNodeLink (&item_list, GetFeatureListForNucleotideBioseq (featdef, bsp)); } - } else if (IS_Bioseq_set (sep)) { - ValNodeLink (&item_list, AddFeaturesFromBioseqSet (sep->data.ptrvalue, featdef)); + match_list = ValNodeFree (match_list); } } - return item_list; -} + + err_msg = (CharPtr) MemNew (sizeof (Char) * (StringLen (num_affected_fmt) + 15)); + sprintf (err_msg, num_affected_fmt, num_fields_affected); + ValNodeAddPointerToFront (&vnb, 0, err_msg); + match_type = MatchTypeFree (match_type); + return vnb.head; +} -static ValNodePtr GetFeatureListForBioSourceObjects (ValNodePtr item_list, FeatureFieldPtr field) +NLM_EXTERN ValNodePtr CheckTableForExistingText (SeqEntryPtr sep, ValNodePtr table, ValNodePtr columns) { - ValNodePtr vnp; - SeqFeatPtr sfp; - SeqDescrPtr sdp; - BioseqPtr bsp; - ObjValNodePtr ovp; - ValNodePtr feature_list = NULL; + ValNodeBlock vnb; + ValNodePtr line_vnp, val_vnp, col_vnp; + Int4 line_num = 1, col_num, num_existing_text = 0; + Uint2 entityID; + TabColumnConfigPtr t; + CharPtr err_msg, str, qual_name, val; + CharPtr no_match_fmt = "No match for %s, line %d"; + CharPtr already_has_val_fmt = "%s already has value '%s' (column %d), line %d. Replacement is '%s'"; + CharPtr num_existing_text_fmt = "%d fields already have text."; + ValNodePtr match_choice, match_list; + ValNodePtr target_list, feat_vnp; + MatchTypePtr match_type; - if (item_list == NULL || field == NULL) return NULL; + vnb.head = NULL; + vnb.tail = NULL; + if (sep == NULL) { + ValNodeAddPointerToEnd (&vnb, 1, StringSave ("No SeqEntry")); + } + if (table == NULL) { + ValNodeAddPointerToEnd (&vnb, 1, StringSave ("No table")); + } + if (columns == NULL) { + ValNodeAddPointerToEnd (&vnb, 1, StringSave ("No column information")); + } + if (vnb.head != NULL) { + return vnb.head; + } - for (vnp = item_list; vnp != NULL; vnp = vnp->next) { - if (vnp->choice == OBJ_SEQFEAT) { - sfp = vnp->data.ptrvalue; - if (sfp != NULL) { - bsp = BioseqFindFromSeqLoc (sfp->location); - ValNodeLink (&feature_list, GetFeatureListForNucleotideBioseq (GetFeatdefFromFeatureType(field->type), bsp)); + match_type = FindMatchTypeInHeader (columns); + if (match_type == NULL) return NULL; + + entityID = SeqMgrGetEntityIDForSeqEntry (sep); + + for (line_vnp = table, line_num = 1; line_vnp != NULL; line_vnp = line_vnp->next, line_num++) { + match_choice = FindMatchChoiceInLine (line_vnp->data.ptrvalue, columns); + if (match_choice == NULL || StringHasNoText (match_choice->data.ptrvalue)) { + ReportEmptyIDColumn (&vnb, line_num); + if (vnb.head == NULL) { + vnb.head = vnb.tail; } - } else if (vnp->choice == OBJ_SEQDESC) { - sdp = vnp->data.ptrvalue; - if (sdp != NULL && sdp->extended != 0) { - ovp = (ObjValNodePtr) sdp; - if (ovp->idx.parenttype == OBJ_BIOSEQSET) { - ValNodeLink (&feature_list, AddFeaturesFromBioseqSet (ovp->idx.parentptr, GetFeatdefFromFeatureType(field->type))); - } else if (ovp->idx.parenttype == OBJ_BIOSEQ) { - bsp = (BioseqPtr) ovp->idx.parentptr; - ValNodeLink (&feature_list, GetFeatureListForNucleotideBioseq (GetFeatdefFromFeatureType(field->type), bsp)); + } else { + match_list = FindMatchForRow (match_type, match_choice->data.ptrvalue, entityID, sep); + if (match_list == NULL) { + err_msg = (CharPtr) MemNew (sizeof (Char) * (StringLen (no_match_fmt) + StringLen (match_choice->data.ptrvalue) + 15)); + sprintf (err_msg, no_match_fmt, match_choice->data.ptrvalue, line_num); + ValNodeAddPointerToEnd (&vnb, 0, err_msg); + } else { + for (val_vnp = line_vnp->data.ptrvalue, col_vnp = columns, col_num = 1; + col_vnp != NULL; + col_vnp = col_vnp->next, col_num++) { + t = (TabColumnConfigPtr) col_vnp->data.ptrvalue; + if (t == NULL || t->match_type != NULL + || (t->skip_blank && (val_vnp == NULL || StringHasNoText (val_vnp->data.ptrvalue)))) { + if (val_vnp != NULL) { + val_vnp = val_vnp->next; + } + continue; + } + target_list = GetTargetListForRowAndColumn (match_type, match_list, t->field, t->constraint); + if (target_list == NULL) { + ReportMissingTargets (&vnb, t->field, match_choice->data.ptrvalue, col_num, line_num); + } else { + if (val_vnp == NULL || val_vnp->data.ptrvalue == NULL) { + val = ""; + } else { + val = val_vnp->data.ptrvalue; + } + for (feat_vnp = target_list; feat_vnp != NULL; feat_vnp = feat_vnp->next) { + str = GetFieldValueForObject (feat_vnp->choice, feat_vnp->data.ptrvalue, t->field, NULL); + if (!StringHasNoText (str)) { + qual_name = SummarizeFieldType (t->field); + err_msg = (CharPtr) MemNew (sizeof (Char) * (StringLen (already_has_val_fmt) + + StringLen (qual_name) + StringLen (str) + + StringLen (val) + + 30)); + sprintf (err_msg, already_has_val_fmt, qual_name, str, col_num, line_num, val); + ValNodeAddPointerToEnd (&vnb, col_num, err_msg); + num_existing_text ++; + } + str = MemFree (str); + } + } + target_list = ValNodeFree (target_list); + if (val_vnp != NULL) { + val_vnp = val_vnp->next; + } } } + match_list = ValNodeFree (match_list); } + } + if (num_existing_text > 0) { + err_msg = (CharPtr) MemNew (sizeof (Char) * (StringLen (num_existing_text_fmt) + + 15)); + sprintf (err_msg, num_existing_text_fmt, num_existing_text); + ValNodeAddPointerToFront (&vnb, 0, err_msg); } - return feature_list; + + return vnb.head; } -NLM_EXTERN ValNodePtr ValNodeCopyPtr (ValNodePtr orig) +/* Reporting functions for SMART */ +static void GetDescriptorPubTitles (SeqDescrPtr sdp, Pointer userdata) { - ValNodePtr new_list = NULL, last_vnp = NULL, vnp; + CharPtr title; - while (orig != NULL) { - vnp = ValNodeNew (NULL); - vnp->choice = orig->choice; - vnp->data.ptrvalue = orig->data.ptrvalue; - if (last_vnp == NULL) { - new_list = vnp; - } else { - last_vnp->next = vnp; - } - last_vnp = vnp; - orig = orig->next; + if (sdp == NULL || sdp->choice != Seq_descr_pub || userdata == NULL) { + return; + } + + title = GetPubFieldFromObject (OBJ_SEQDESC, sdp, Publication_field_title, NULL); + if (title != NULL) { + ValNodeAddPointer ((ValNodePtr PNTR) userdata, 0, title); } - return new_list; } -static ValNodePtr GetFeatureListForRowAndColumn (MatchTypePtr match_type, ValNodePtr match_list, FeatureFieldPtr field) +static void GetFeaturePubTitles (SeqFeatPtr sfp, Pointer userdata) { - ValNodePtr feature_list = NULL, vnp; + CharPtr title; - if (match_list == NULL || field == NULL || match_type == NULL) return NULL; + if (sfp == NULL || sfp->data.choice != SEQFEAT_PUB || userdata == NULL) { + return; + } - switch (match_type->choice) { - case eTableMatchFeatureID: - feature_list = ValNodeCopyPtr (match_list); - break; - case eTableMatchGeneLocusTag: - for (vnp = match_list; vnp != NULL; vnp = vnp->next) { - ValNodeLink (&feature_list, GetFeatureListForGene (GetFeatdefFromFeatureType(field->type), vnp->data.ptrvalue)); - } - break; - case eTableMatchProteinID: - for (vnp = match_list; vnp != NULL; vnp = vnp->next) { - ValNodeLink (&feature_list, GetFeatureListForProteinBioseq (GetFeatdefFromFeatureType(field->type), vnp->data.ptrvalue)); - } - break; - case eTableMatchDbxref: - feature_list = ValNodeCopyPtr (match_list); - break; - case eTableMatchNucID: - for (vnp = match_list; vnp != NULL; vnp = vnp->next) { - ValNodeLink (&feature_list, GetFeatureListForNucleotideBioseq (GetFeatdefFromFeatureType(field->type), vnp->data.ptrvalue)); - } - break; - case eTableMatchBioSource: - case eTableMatchSourceQual: - ValNodeLink (&feature_list, GetFeatureListForBioSourceObjects (match_list, field)); - break; + title = GetPubFieldFromObject (OBJ_SEQFEAT, sfp, Publication_field_title, NULL); + if (title != NULL) { + ValNodeAddPointer ((ValNodePtr PNTR) userdata, 0, title); } - return feature_list; } -static void AddBioSourcesForBioseq (BioseqPtr bsp, ValNodePtr PNTR feature_list) +NLM_EXTERN ValNodePtr GetPublicationTitlesInSep (SeqEntryPtr sep) { - SeqDescrPtr sdp; - SeqMgrDescContext context; + ValNodePtr title_list = NULL; - if (feature_list == NULL) return; - for (sdp = SeqMgrGetNextDescriptor (bsp, NULL, Seq_descr_source, &context); - sdp != NULL; - sdp = SeqMgrGetNextDescriptor (bsp, sdp, Seq_descr_source, &context)) { - ValNodeAddPointer (feature_list, OBJ_SEQDESC, sdp); - } + VisitDescriptorsInSep (sep, &title_list, GetDescriptorPubTitles); + VisitFeaturesInSep (sep, &title_list, GetFeaturePubTitles); + return title_list; } -static void AddBioSourcesForFeature (SeqFeatPtr sfp, ValNodePtr PNTR feature_list) -{ - BioseqPtr bsp; - if (sfp == NULL || feature_list == NULL) return; +NLM_EXTERN ValNodePtr GetPublicationTitlesOnSep (SeqEntryPtr sep) +{ + ValNodePtr title_list = NULL; - if (sfp->data.choice == SEQFEAT_BIOSRC) { - ValNodeAddPointer (feature_list, OBJ_SEQFEAT, sfp); - } else { - bsp = BioseqFindFromSeqLoc (sfp->location); - AddBioSourcesForBioseq (bsp, feature_list); - } + VisitDescriptorsOnSep (sep, &title_list, GetDescriptorPubTitles); + VisitFeaturesOnSep (sep, &title_list, GetFeaturePubTitles); + return title_list; } -static ValNodePtr GetBioSourceListForRowAndColumn (MatchTypePtr match_type, ValNodePtr match_list, FeatureFieldPtr field) +static void GetBankitCommentsCallback (SeqDescrPtr sdp, Pointer userdata) { - ValNodePtr feature_list = NULL, vnp; + UserObjectPtr uop; + ObjectIdPtr oip; + UserFieldPtr ufp; - if (match_list == NULL || field == NULL || match_type == NULL) return NULL; + if (sdp == NULL || sdp->choice != Seq_descr_user || userdata == NULL) { + return; + } - switch (match_type->choice) { - case eTableMatchFeatureID: - for (vnp = match_list; vnp != NULL; vnp = vnp->next) { - if (vnp->choice == OBJ_SEQFEAT && vnp->data.ptrvalue != NULL) { - AddBioSourcesForFeature (vnp->data.ptrvalue, &feature_list); - } - } - break; - case eTableMatchGeneLocusTag: - for (vnp = match_list; vnp != NULL; vnp = vnp->next) { - if (vnp->choice == OBJ_SEQFEAT && vnp->data.ptrvalue != NULL) { - AddBioSourcesForFeature (vnp->data.ptrvalue, &feature_list); - } - } - break; - case eTableMatchProteinID: - case eTableMatchNucID: - for (vnp = match_list; vnp != NULL; vnp = vnp->next) { - if (vnp->choice == OBJ_BIOSEQ) { - AddBioSourcesForBioseq (vnp->data.ptrvalue, &feature_list); - } - } - break; - case eTableMatchDbxref: - for (vnp = match_list; vnp != NULL; vnp = vnp->next) { - if (vnp->choice == OBJ_SEQFEAT && vnp->data.ptrvalue != NULL) { - AddBioSourcesForFeature (vnp->data.ptrvalue, &feature_list); + uop = (UserObjectPtr) sdp->data.ptrvalue; + if (uop != NULL && StringCmp (uop->_class, "SMART_V1.0") != 0) { + oip = uop->type; + if (oip != NULL && StringCmp (oip->str, "Submission") == 0) { + for (ufp = uop->data; ufp != NULL; ufp = ufp->next) { + oip = ufp->label; + if (oip != NULL + && StringCmp (oip->str, "AdditionalComment") == 0 + && !StringHasNoText (ufp->data.ptrvalue)) { + ValNodeAddPointer ((ValNodePtr PNTR) userdata, 0, StringSave (ufp->data.ptrvalue)); } } - break; - case eTableMatchBioSource: - case eTableMatchSourceQual: - feature_list = ValNodeCopyPtr (match_list); - break; + } } - return feature_list; } -static void AddPubsForBioseq (BioseqPtr bsp, ValNodePtr PNTR feature_list) +NLM_EXTERN ValNodePtr GetBankitCommentsInSep (SeqEntryPtr sep) { - SeqDescrPtr sdp; - SeqMgrDescContext dcontext; - SeqFeatPtr sfp; - SeqMgrFeatContext fcontext; + ValNodePtr comment_list = NULL; - if (bsp == NULL || feature_list == NULL) return; + VisitDescriptorsInSep (sep, &comment_list, GetBankitCommentsCallback); + return comment_list; +} - for (sdp = SeqMgrGetNextDescriptor (bsp, NULL, Seq_descr_pub, &dcontext); - sdp != NULL; - sdp = SeqMgrGetNextDescriptor (bsp, sdp, Seq_descr_pub, &dcontext)) { - ValNodeAddPointer (feature_list, OBJ_SEQDESC, sdp); + +NLM_EXTERN ValNodePtr GetBankitCommentsOnSep (SeqEntryPtr sep) +{ + ValNodePtr comment_list = NULL; + + VisitDescriptorsOnSep (sep, &comment_list, GetBankitCommentsCallback); + return comment_list; +} + + +static void SplitPCRPrimersByPositionCallback (BioSourcePtr biop, Pointer data) +{ + PCRReactionPtr ps, ps_next, ps_new; + PCRPrimerPtr pp_f, pp_r; + + if (biop == NULL || biop->pcr_primers == NULL) { + return; } - for (sfp = SeqMgrGetNextFeature (bsp, NULL, SEQFEAT_PUB, 0, &fcontext); - sfp != NULL; - sfp = SeqMgrGetNextFeature (bsp, sfp, SEQFEAT_PUB, 0, &fcontext)) { - ValNodeAddPointer (feature_list, OBJ_SEQFEAT, sfp); + + for (ps = biop->pcr_primers; ps != NULL; ps = ps_next) { + ps_next = ps->next; + + pp_f = ps->forward; + pp_r = ps->reverse; + while (pp_f != NULL && pp_r != NULL && pp_f->next != NULL && pp_r->next != NULL) { + ps_new = PCRReactionNew (); + ps_new->forward = pp_f->next; + ps_new->reverse = pp_r->next; + pp_f->next = NULL; + pp_r->next = NULL; + ps->next = ps_new; + ps_new->next = ps_next; + ps = ps_new; + pp_f = ps->forward; + pp_r = ps->reverse; + } } } -static ValNodePtr AddPubListFromBioseqSet (BioseqSetPtr bssp) +NLM_EXTERN void SplitPCRPrimersByPosition (SeqEntryPtr sep) { - SeqEntryPtr sep; - BioseqPtr bsp; - ValNodePtr item_list = NULL; + VisitBioSourcesInSep (sep, NULL, SplitPCRPrimersByPositionCallback); +} - if (bssp == NULL) return NULL; - for (sep = bssp->seq_set; sep != NULL; sep = sep->next) { - if (sep->data.ptrvalue == NULL) continue; - if (IS_Bioseq (sep)) { - bsp = sep->data.ptrvalue; - if (!ISA_aa (bsp->mol)) { - AddPubsForBioseq (bsp, &item_list); +static void MergePCRPrimersCallback (BioSourcePtr biop, Pointer data) +{ + PCRReactionPtr ps, ps_next; + PCRPrimerPtr pp_f_last, pp_r_last; + + if (biop == NULL || biop->pcr_primers == NULL || biop->pcr_primers->next == NULL) { + return; + } + + pp_f_last = biop->pcr_primers->forward; + if (pp_f_last != NULL) { + while (pp_f_last->next != NULL) { + pp_f_last = pp_f_last->next; + } + } + pp_r_last = biop->pcr_primers->reverse; + if (pp_r_last != NULL) { + while (pp_r_last->next != NULL) { + pp_r_last = pp_r_last->next; + } + } + ps = biop->pcr_primers->next; + biop->pcr_primers->next = NULL; + + while (ps != NULL) { + ps_next = ps->next; + ps->next = NULL; + if (ps->forward != NULL) { + if (pp_f_last == NULL) { + biop->pcr_primers->forward = ps->forward; + } else { + pp_f_last->next = ps->forward; } - } else if (IS_Bioseq_set (sep)) { - ValNodeLink (&item_list, AddPubListFromBioseqSet (sep->data.ptrvalue)); + while (pp_f_last->next != NULL) { + pp_f_last = pp_f_last->next; + } + ps->forward = NULL; + } + if (ps->reverse != NULL) { + if (pp_r_last == NULL) { + biop->pcr_primers->reverse = ps->reverse; + } else { + pp_r_last->next = ps->reverse; + } + while (pp_r_last->next != NULL) { + pp_r_last = pp_r_last->next; + } + ps->reverse = NULL; } + ps = PCRReactionFree (ps); + ps = ps_next; } - return item_list; } -static ValNodePtr GetPubListForBioSourceObjects (ValNodePtr item_list) +NLM_EXTERN void MergePCRPrimers (SeqEntryPtr sep) { - ValNodePtr vnp; - SeqFeatPtr sfp; - SeqDescrPtr sdp; - BioseqPtr bsp; - ObjValNodePtr ovp; - ValNodePtr feature_list = NULL; + VisitBioSourcesInSep (sep, NULL, MergePCRPrimersCallback); +} - if (item_list == NULL) return NULL; - for (vnp = item_list; vnp != NULL; vnp = vnp->next) { - if (vnp->choice == OBJ_SEQFEAT) { - sfp = vnp->data.ptrvalue; - if (sfp != NULL) { - bsp = BioseqFindFromSeqLoc (sfp->location); - AddPubsForBioseq (bsp, &feature_list); +static PCRPrimerPtr ExtractPrimersByConstraint (PCRPrimerPtr PNTR pp_list, StringConstraintPtr scp) +{ + PCRPrimerPtr new_list = NULL, last_new = NULL, prev = NULL, pp, pp_next; + + if (pp_list == NULL || *pp_list == NULL) { + return NULL; + } + + pp = *pp_list; + while (pp != NULL) { + pp_next = pp->next; + if (DoesStringMatchConstraint(pp->name, scp)) { + if (prev == NULL) { + *pp_list = pp->next; + } else { + prev->next = pp->next; } - } else if (vnp->choice == OBJ_SEQDESC) { - sdp = vnp->data.ptrvalue; - if (sdp != NULL && sdp->extended != 0) { - ovp = (ObjValNodePtr) sdp; - if (ovp->idx.parenttype == OBJ_BIOSEQSET) { - ValNodeLink (&feature_list, AddPubListFromBioseqSet (ovp->idx.parentptr)); - } else if (ovp->idx.parenttype == OBJ_BIOSEQ) { - bsp = (BioseqPtr) ovp->idx.parentptr; - AddPubsForBioseq (bsp, &feature_list); - } + pp->next = NULL; + if (last_new == NULL) { + new_list = pp; + } else { + last_new->next = pp; } + last_new = pp; + } else { + prev = pp; } + pp = pp_next; } - return feature_list; + return new_list; } -static ValNodePtr GetPubListForRowAndColumn (MatchTypePtr match_type, ValNodePtr match_list) +typedef struct stringconstraintpair { + StringConstraintPtr scp1; + StringConstraintPtr scp2; +} StringConstraintPairData, PNTR StringConstraintPairPtr; + +static void SplitPCRPrimersByConstraintsCallback (BioSourcePtr biop, Pointer data) { - SeqFeatPtr sfp; - ValNodePtr vnp; - ValNodePtr feature_list = NULL; + PCRReactionPtr ps, ps_new, last_ps = NULL; + PCRPrimerPtr pp_match, last_fwd = NULL, last_rev = NULL; + StringConstraintPairPtr pair; - if (match_type == NULL) return NULL; + if (biop == NULL || biop->pcr_primers == NULL || (pair = (StringConstraintPairPtr) data) == NULL) { + return; + } - switch (match_type->choice) { - case eTableMatchFeatureID: - for (vnp = match_list; vnp != NULL; vnp = vnp->next) { - if (vnp->choice == OBJ_SEQFEAT && vnp->data.ptrvalue != NULL) { - sfp = (SeqFeatPtr) vnp->data.ptrvalue; - AddPubsForBioseq (BioseqFindFromSeqLoc (sfp->location), &feature_list); - } + ps_new = PCRReactionNew (); + + for (ps = biop->pcr_primers; ps != NULL; ps = ps->next) { + /* take forward matches */ + pp_match = ExtractPrimersByConstraint (&(ps->forward), pair->scp1); + if (pp_match != NULL) { + if (last_fwd == NULL) { + ps_new->forward = pp_match; + } else { + last_fwd->next = pp_match; } - break; - case eTableMatchGeneLocusTag: - for (vnp = match_list; vnp != NULL; vnp = vnp->next) { - if (vnp->choice == OBJ_SEQFEAT && vnp->data.ptrvalue != NULL) { - sfp = (SeqFeatPtr) vnp->data.ptrvalue; - AddPubsForBioseq (BioseqFindFromSeqLoc (sfp->location), &feature_list); - } + last_fwd = pp_match; + while (last_fwd->next != NULL) { + last_fwd = last_fwd->next; } - break; - case eTableMatchProteinID: - case eTableMatchNucID: - for (vnp = match_list; vnp != NULL; vnp = vnp->next) { - if (vnp->choice == OBJ_BIOSEQ) { - AddPubsForBioseq (vnp->data.ptrvalue, &feature_list); - } + } + /* take reverse matches */ + pp_match = ExtractPrimersByConstraint (&(ps->reverse), pair->scp2); + if (pp_match != NULL) { + if (last_rev == NULL) { + ps_new->reverse = pp_match; + } else { + last_rev->next = pp_match; } - break; - case eTableMatchDbxref: - for (vnp = match_list; vnp != NULL; vnp = vnp->next) { - if (vnp->choice == OBJ_SEQFEAT && vnp->data.ptrvalue != NULL) { - sfp = (SeqFeatPtr) vnp->data.ptrvalue; - AddPubsForBioseq (BioseqFindFromSeqLoc (sfp->location), &feature_list); - } + last_rev = pp_match; + while (last_rev->next != NULL) { + last_rev = last_rev->next; } - break; - case eTableMatchBioSource: - case eTableMatchSourceQual: - feature_list = GetPubListForBioSourceObjects (match_list); - break; + } + last_ps = ps; + } + if (ps_new->forward != NULL || ps_new->reverse != NULL) { + last_ps->next = ps_new; + } else { + ps_new = PCRReactionFree (ps_new); } - return feature_list; } -static ValNodePtr GetSequenceListForBioSourceObjects (ValNodePtr item_list) +NLM_EXTERN void SplitPCRPrimersByConstraints (SeqEntryPtr sep, StringConstraintPtr scp_fwd, StringConstraintPtr scp_rev) { - ValNodePtr vnp; - SeqFeatPtr sfp; - SeqDescrPtr sdp; - BioseqPtr bsp; - ObjValNodePtr ovp; - ValNodePtr seq_list = NULL; - SeqEntryPtr sep; + StringConstraintPairData pair; - if (item_list == NULL) return NULL; + pair.scp1 = scp_fwd; + pair.scp2 = scp_rev; - for (vnp = item_list; vnp != NULL; vnp = vnp->next) { - if (vnp->choice == OBJ_SEQFEAT) { - sfp = vnp->data.ptrvalue; - if (sfp != NULL) { - bsp = BioseqFindFromSeqLoc (sfp->location); - if (bsp != NULL) { - ValNodeAddPointer (&seq_list, OBJ_BIOSEQ, bsp); - } - } - } else if (vnp->choice == OBJ_SEQDESC) { - sdp = vnp->data.ptrvalue; - if (sdp != NULL && sdp->extended != 0) { - ovp = (ObjValNodePtr) sdp; - if (ovp->idx.parenttype == OBJ_BIOSEQSET) { - sep = SeqMgrGetSeqEntryForData (ovp->idx.parentptr); - VisitBioseqsInSep (sep, &seq_list, CollectNucBioseqCallback); - } else if (ovp->idx.parenttype == OBJ_BIOSEQ) { - bsp = (BioseqPtr) ovp->idx.parentptr; - if (bsp != NULL) { - ValNodeAddPointer (&seq_list, OBJ_BIOSEQ, bsp); - } - } - } - } - } - return seq_list; + VisitBioSourcesInSep (sep, &pair, SplitPCRPrimersByConstraintsCallback); } -static ValNodePtr GetSequenceListForRowAndColumn (MatchTypePtr match_type, ValNodePtr match_list) +/* product name fixing rules */ + +NLM_EXTERN Int4 CountSuspectRuleSet (SuspectRuleSetPtr set) { - SeqFeatPtr sfp; - ValNodePtr vnp; - ValNodePtr seq_list = NULL; - BioseqPtr bsp; + Int4 num = 0; + while (set != NULL) { + num++; + set = set->next; + } + return num; +} - if (match_type == NULL) return NULL; - switch (match_type->choice) { - case eTableMatchFeatureID: - for (vnp = match_list; vnp != NULL; vnp = vnp->next) { - if (vnp->choice == OBJ_SEQFEAT && vnp->data.ptrvalue != NULL) { - sfp = (SeqFeatPtr) vnp->data.ptrvalue; - bsp = BioseqFindFromSeqLoc (sfp->location); - if (bsp != NULL) { - ValNodeAddPointer (&seq_list, OBJ_BIOSEQ, bsp); - } - } - } - break; - case eTableMatchGeneLocusTag: - for (vnp = match_list; vnp != NULL; vnp = vnp->next) { - if (vnp->choice == OBJ_SEQFEAT && vnp->data.ptrvalue != NULL) { - sfp = (SeqFeatPtr) vnp->data.ptrvalue; - bsp = BioseqFindFromSeqLoc (sfp->location); - if (bsp != NULL) { - ValNodeAddPointer (&seq_list, OBJ_BIOSEQ, bsp); - } - } - } - break; - case eTableMatchProteinID: - case eTableMatchNucID: - for (vnp = match_list; vnp != NULL; vnp = vnp->next) { - if (vnp->choice == OBJ_BIOSEQ) { - ValNodeAddPointer (&seq_list, OBJ_BIOSEQ, vnp->data.ptrvalue); - } - } - break; - case eTableMatchDbxref: - for (vnp = match_list; vnp != NULL; vnp = vnp->next) { - if (vnp->choice == OBJ_SEQFEAT && vnp->data.ptrvalue != NULL) { - sfp = (SeqFeatPtr) vnp->data.ptrvalue; - bsp = BioseqFindFromSeqLoc (sfp->location); - if (bsp != NULL) { - ValNodeAddPointer (&seq_list, OBJ_BIOSEQ, bsp); - } - } - } - break; - case eTableMatchBioSource: - case eTableMatchSourceQual: - seq_list = GetSequenceListForBioSourceObjects (match_list); - break; +/* emptiness */ +NLM_EXTERN Boolean IsSearchFuncEmpty (SearchFuncPtr func) +{ + Boolean rval = TRUE; + + if (func == NULL) { + rval = TRUE; + } else { + switch (func->choice) { + case SearchFunc_string_constraint: + rval = IsStringConstraintEmpty (func->data.ptrvalue); + break; + case SearchFunc_prefix_and_numbers: + rval = StringHasNoText (func->data.ptrvalue); + break; + default: + rval = FALSE; + } } - return seq_list; + return rval; } -static ValNodePtr GetStructuredCommentListForRowAndColumn (MatchTypePtr match_type, ValNodePtr match_list) +NLM_EXTERN Boolean IsSuspectRuleEmpty (SuspectRulePtr rule) { - ValNodePtr seq_list, target_list = NULL, vnp; - SeqDescrPtr sdp; - SeqMgrDescContext context; + if (rule == NULL) { + return TRUE; + } else if (IsSearchFuncEmpty(rule->find)) { + return TRUE; + } else { + return FALSE; + } +} - seq_list = GetSequenceListForRowAndColumn (match_type, match_list); - for (vnp = seq_list; vnp != NULL; vnp = vnp->next) { - if (vnp->choice == OBJ_BIOSEQ) { - for (sdp = SeqMgrGetNextDescriptor (vnp->data.ptrvalue, NULL, Seq_descr_user, &context); - sdp != NULL; - sdp = SeqMgrGetNextDescriptor (vnp->data.ptrvalue, sdp, Seq_descr_user, &context)) { - if (IsUserObjectStructuredComment (sdp->data.ptrvalue)) { - ValNodeAddPointer (&target_list, OBJ_SEQDESC, sdp); - } - } +/* summarization */ +NLM_EXTERN CharPtr SummarizeSearchFunc (SearchFuncPtr func) +{ + CharPtr summ = NULL; + CharPtr bracket_fmt = "Contains %d or more brackets or parentheses"; + CharPtr prefix_fmt = "Contains '%s' followed by numbers"; + CharPtr length_fmt = "Is longer than %d characters"; + CharPtr term_fmt = "Contains '%s' at start or separated from other letters by numbers, spaces, or punctuation, but does not also contain 'domain'"; + + if (func == NULL) { + summ = StringSave ("No search function"); + } else { + switch (func->choice) { + case SearchFunc_string_constraint: + summ = SummarizeStringConstraint (func->data.ptrvalue); + break; + case SearchFunc_contains_plural: + summ = StringSave ("May contain plural"); + break; + case SearchFunc_n_or_more_brackets_or_parentheses: + summ = (CharPtr) MemNew (sizeof (Char) * (StringLen (bracket_fmt) + 15)); + sprintf (summ, bracket_fmt, func->data.intvalue); + break; + case SearchFunc_three_numbers: + summ = StringSave ("Three or more numbers together"); + break; + case SearchFunc_underscore: + summ = StringSave ("Contains underscore"); + break; + case SearchFunc_prefix_and_numbers: + summ = (CharPtr) MemNew (sizeof (Char) * (StringLen (prefix_fmt) + StringLen (func->data.ptrvalue))); + sprintf (summ, prefix_fmt, func->data.ptrvalue == NULL ? "" : func->data.ptrvalue); + break; + case SearchFunc_all_caps: + summ = StringSave ("Is all capital letters"); + break; + case SearchFunc_unbalanced_paren: + summ = StringSave ("Contains unbalanced brackets or parentheses"); + break; + case SearchFunc_too_long: + summ = (CharPtr) MemNew (sizeof (Char) * (StringLen (length_fmt) + 15)); + sprintf (summ, length_fmt, func->data.intvalue); + break; + case SearchFunc_has_term: + summ = (CharPtr) MemNew (sizeof (Char) * (StringLen (term_fmt) + StringLen (func->data.ptrvalue))); + sprintf (summ, term_fmt, func->data.ptrvalue == NULL ? "" : func->data.ptrvalue); + break; + default: + summ = StringSave ("Unknown search function"); + break; } - } - seq_list = ValNodeFree (seq_list); - return target_list; + } + return summ; } -static ValNodePtr GetTargetListForRowAndColumn (MatchTypePtr match_type, ValNodePtr match_list, FieldTypePtr field, ValNodePtr constraint) +NLM_EXTERN CharPtr SummarizeReplaceFunc (ReplaceFuncPtr replace) { - ValNodePtr target_list = NULL, vnp_prev = NULL, vnp, vnp_next, tmp_list; - FeatureFieldPtr feature_field; + CharPtr summ = NULL; + SimpleReplacePtr simple; + CharPtr replace_fmt = "Replace %swith '%s'"; + CharPtr whole = "entire name "; + CharPtr weasel_to_putative = ", retain and normalize 'putative' synonym"; + Int4 len; - if (field == NULL || match_type == NULL) return NULL; - switch (field->choice) { - case FieldType_source_qual: - target_list = GetBioSourceListForRowAndColumn (match_type, match_list, field->data.ptrvalue); - break; - case FieldType_feature_field: - target_list = GetFeatureListForRowAndColumn (match_type, match_list, field->data.ptrvalue); - break; - case FieldType_cds_gene_prot: - feature_field = FeatureFieldFromCDSGeneProtField (field->data.intvalue); - target_list = GetFeatureListForRowAndColumn (match_type, match_list, feature_field); - feature_field = FeatureFieldFree (feature_field); - break; - case FieldType_pub: - target_list = GetPubListForRowAndColumn (match_type, match_list); - break; - case FieldType_rna_field: - feature_field = FeatureFieldFromRnaQual (field->data.ptrvalue); - target_list = GetFeatureListForRowAndColumn (match_type, match_list, feature_field); - feature_field = FeatureFieldFree (feature_field); - break; - case FieldType_struc_comment_field: - target_list = GetStructuredCommentListForRowAndColumn (match_type, match_list); - break; - case FieldType_misc: - if (field->data.intvalue == Misc_field_genome_project_id) { - target_list = GetSequenceListForRowAndColumn (match_type, match_list); - } else if (field->data.intvalue == Misc_field_comment_descriptor) { - tmp_list = GetSequenceListForRowAndColumn (match_type, match_list); - for (vnp = tmp_list; vnp != NULL; vnp = vnp->next) { - AddCommentDescriptorDestinationsForBioseq (vnp->data.ptrvalue, &target_list); - } - tmp_list = ValNodeFree (tmp_list); - } else if (field->data.intvalue == Misc_field_defline) { - tmp_list = GetSequenceListForRowAndColumn (match_type, match_list); - for (vnp = tmp_list; vnp != NULL; vnp = vnp->next) { - AddDeflineDescriptorDestinationsForBioseq (vnp->data.ptrvalue, &target_list); - } - tmp_list = ValNodeFree (tmp_list); - } else if (field->data.intvalue == Misc_field_keyword) { - tmp_list = GetSequenceListForRowAndColumn (match_type, match_list); - for (vnp = tmp_list; vnp != NULL; vnp = vnp->next) { - AddGenbankBlockDescriptorDestinationsForBioseq (vnp->data.ptrvalue, &target_list); - } - tmp_list = ValNodeFree (tmp_list); + if (replace == NULL) { + return NULL; + } + switch (replace->choice) { + case ReplaceFunc_simple_replace: + simple = (SimpleReplacePtr) replace->data.ptrvalue; + len = StringLen (replace_fmt) + StringLen (simple->replace) + 1; + if (simple->whole_string) { + len += StringLen (whole); + } + if (simple->weasel_to_putative) { + len += StringLen (weasel_to_putative); + } + summ = (CharPtr) MemNew (sizeof (Char) * len); + sprintf (summ, replace_fmt, + simple->whole_string ? whole : "" , + simple->replace == NULL ? "" : simple->replace); + if (simple->weasel_to_putative) { + StringCat (summ, weasel_to_putative); } break; - case FieldType_molinfo_field: - target_list = GetSequenceListForRowAndColumn(match_type, match_list); + case ReplaceFunc_haem_replace: + summ = StringSave ("Replace with 'heme' if whole word, 'hem' otherwise"); + break; + default: + summ = StringSave ("Unknown replacement function"); break; } + return summ; +} - /* remove targets that do not match constraint */ - vnp = target_list; - while (vnp != NULL) { - vnp_next = vnp->next; - if (!DoesObjectMatchConstraintChoiceSet (vnp->choice, vnp->data.ptrvalue, constraint)) { - if (vnp_prev == NULL) { - target_list = vnp->next; - } else { - vnp_prev->next = vnp->next; - } - vnp->next = NULL; - vnp = ValNodeFree (vnp); - } else { - vnp_prev = vnp; - } - vnp = vnp_next; +static CharPtr fix_type_names[] = { + "None", + "Typo", + "Quick fix", + "Organelles not appropriate in prokaryote", + "Suspicous phrase; should this be nonfunctional?", + "May contain database identifer more appropriate in note; remove from product name", + "Remove organism from product name", + "Possible parsing error or incorrect formatting; remove inappropriate symbols", + "Implies evolutionary relationship; change to -like protein", + "Use xxx protein or xxx-containing protein", + "Use hypothetical protein", + "Use American spelling", + "Use short product name instead of descriptive phrase", + "use protein instead of gene as appropriate" +}; + +NLM_EXTERN CharPtr SummarizeFixType (Uint2 fix_type) +{ + if (fix_type < sizeof (fix_type_names) / sizeof (CharPtr)) { + return fix_type_names[fix_type]; + } else { + return "Unknown fix type"; } +} - /* remove targets found twice */ - target_list = ValNodeSort (target_list, SortVnpByChoiceAndPtrvalue); - ValNodeUnique (&target_list, SortVnpByChoiceAndPtrvalue, ValNodeFree); - return target_list; +NLM_EXTERN CharPtr SummarizeReplaceRule (ReplaceRulePtr replace) +{ + CharPtr add_note = ", move original to note"; + CharPtr func; + CharPtr summ = NULL; + Int4 len; + + if (replace == NULL) { + return NULL; + } + func = SummarizeReplaceFunc (replace->replace_func); + len = StringLen (func) + 1; + if (replace->move_to_note) { + len += StringLen (add_note); + } + summ = (CharPtr) MemNew (sizeof (Char) * len); + StringCpy (summ, func); + if (replace->move_to_note) { + StringCat (summ, add_note); + } + + func = MemFree (func); + return summ; } -static void ReportMissingTargets (ValNodeBlockPtr err_list, FieldTypePtr ft, CharPtr match_val, Int4 col_num, Int4 line_num) +NLM_EXTERN CharPtr SummarizeSuspectRule (SuspectRulePtr rule) { - CharPtr feat_name; - FeatureFieldPtr field; - CharPtr no_feat_fmt = "No %s feature for %s (column %d, line %d)"; - CharPtr no_src_fmt = "No biosource for %s (column %d, line %d)"; - CharPtr no_seq_fmt = "No sequence for %s (column %d, line %d)"; - CharPtr no_cmt_fmt = "No structured comment for %s (column %d, line %d)"; - CharPtr err_msg; + CharPtr find = NULL, replace = NULL, fix_type = NULL, feat_constraint = NULL, except = NULL; + CharPtr summ = NULL; + CharPtr butnot = " but not "; + Int4 len; - if (err_list == NULL || ft == NULL || match_val == NULL) return; + if (rule == NULL) { + return NULL; + } - switch (ft->choice) { - case FieldType_source_qual: - err_msg = (CharPtr) MemNew (sizeof (Char) * (StringLen (no_src_fmt) - + StringLen (match_val) - + 30)); - sprintf (err_msg, no_src_fmt, match_val, col_num, line_num); - ValNodeAddPointerToEnd (err_list, 0, err_msg); - break; - case FieldType_feature_field: - field = (FeatureFieldPtr) ft->data.ptrvalue; - if (field != NULL) { - feat_name = GetFeatureNameFromFeatureType (field->type); - err_msg = (CharPtr) MemNew (sizeof (Char) * (StringLen (no_feat_fmt) - + StringLen (feat_name) - + StringLen (match_val) - + 30)); - sprintf (err_msg, no_feat_fmt, feat_name, match_val, col_num, line_num); - ValNodeAddPointerToEnd (err_list, 0, err_msg); - } - break; - case FieldType_cds_gene_prot: - field = FeatureFieldFromCDSGeneProtField (ft->data.intvalue); - if (field != NULL) { - feat_name = GetFeatureNameFromFeatureType (field->type); - err_msg = (CharPtr) MemNew (sizeof (Char) * (StringLen (no_feat_fmt) - + StringLen (feat_name) - + StringLen (match_val) - + 30)); - sprintf (err_msg, no_feat_fmt, feat_name, match_val, col_num, line_num); - ValNodeAddPointerToEnd (err_list, 0, err_msg); - } - field = FeatureFieldFree (field); - break; - case FieldType_rna_field: - field = FeatureFieldFromRnaQual (ft->data.ptrvalue); - if (field != NULL) { - feat_name = GetFeatureNameFromFeatureType (field->type); - err_msg = (CharPtr) MemNew (sizeof (Char) * (StringLen (no_feat_fmt) - + StringLen (feat_name) - + StringLen (match_val) - + 30)); - sprintf (err_msg, no_feat_fmt, feat_name, match_val, col_num, line_num); - ValNodeAddPointerToEnd (err_list, 0, err_msg); - } - field = FeatureFieldFree (field); - break; - case FieldType_struc_comment_field: - err_msg = (CharPtr) MemNew (sizeof (Char) * (StringLen (no_cmt_fmt) + StringLen (match_val) + 30)); - sprintf (err_msg, no_cmt_fmt, match_val, col_num, line_num); - ValNodeAddPointerToEnd (err_list, 0, err_msg); - break; - case FieldType_misc: - err_msg = (CharPtr) MemNew (sizeof (Char) * (StringLen (no_seq_fmt) - + StringLen (match_val) - + 30)); - sprintf (err_msg, no_seq_fmt, match_val, col_num, line_num); - ValNodeAddPointerToEnd (err_list, 0, err_msg); - break; + find = SummarizeSearchFunc (rule->find); + if (!IsSearchFuncEmpty(rule->except)) { + except = SummarizeSearchFunc (rule->except); + } + feat_constraint = SummarizeConstraintSet (rule->feat_constraint); + replace = SummarizeReplaceRule (rule->replace); + if (rule->rule_type != Fix_type_none) { + fix_type = SummarizeFixType (rule->rule_type); + } + + len = StringLen (find) + StringLen (except) + StringLen (feat_constraint) + StringLen (replace) + StringLen (fix_type) + 6; + if (feat_constraint != NULL) { + len += 2; + } + if (except != NULL) { + len += StringLen (butnot); + } + summ = (CharPtr) MemNew (sizeof (Char) * len); + StringCpy (summ, find); + if (except != NULL) { + StringCat (summ, butnot); + StringCat (summ, except); } + + if (feat_constraint != NULL) { + StringCat (summ, ", "); + StringCat (summ, feat_constraint); + } + + if (replace != NULL) { + StringCat (summ, ", "); + StringCat (summ, replace); + } + + if (rule->rule_type != Fix_type_none) { + StringCat (summ, " ("); + StringCat (summ, fix_type); + StringCat (summ, ")"); + } + + find = MemFree (find); + except = MemFree (except); + feat_constraint = MemFree (feat_constraint); + replace = MemFree (replace); + return summ; } -static void ReportEmptyIDColumn (ValNodeBlockPtr vnb, Int4 line_num) +NLM_EXTERN Boolean StringMayContainPlural (CharPtr search) { - CharPtr err_msg; - CharPtr missing_id_fmt = "No ID for line %d"; - - err_msg = (CharPtr) MemNew (sizeof (Char) * (StringLen (missing_id_fmt) + 15)); - sprintf (err_msg, missing_id_fmt, line_num); - ValNodeAddPointerToEnd (vnb, 0, err_msg); + CharPtr cp; + Char last_letter, second_to_last_letter, next_letter; + Int4 word_len = 0; + Boolean may_contain_plural = FALSE; + CharPtr word_skip = " ,"; + + if (search == NULL) return FALSE; + cp = search; + while (*cp != 0 && !may_contain_plural) { + word_len = StringCSpn (cp, word_skip); + last_letter = *(cp + word_len - 1); + if (last_letter == 's') { + if (word_len >=5 && StringNCmp (cp + word_len - 5, "trans", 5) == 0) { + /* not plural */ + cp = cp + word_len; + cp += StringSpn (cp, word_skip); + } else if (word_len > 2 + && (second_to_last_letter = *(cp + word_len - 2)) != 's' + && second_to_last_letter != 'i' + && second_to_last_letter != 'u' + && ((next_letter = *(cp + word_len)) == ',' || next_letter == 0)) { + may_contain_plural = TRUE; + } else { + cp = cp + word_len; + cp += StringSpn (cp, word_skip); + } + } else { + cp = cp + word_len; + cp += StringSpn (cp, word_skip); + } + } + return may_contain_plural; } -static ValNodePtr FindMatchChoiceInLine (ValNodePtr val_vnp, ValNodePtr col_vnp) + +static CharPtr FindFirstOpen (CharPtr cp) { - TabColumnConfigPtr t; + CharPtr pa, ba; - while (val_vnp != NULL && col_vnp != NULL) { - t = (TabColumnConfigPtr) col_vnp->data.ptrvalue; - if (t != NULL && t->match_type != NULL) { - return val_vnp; - } - val_vnp = val_vnp->next; - col_vnp = col_vnp->next; + if (cp == NULL) { + return NULL; + } + pa = StringChr (cp, '('); + ba = StringChr (cp, '['); + if (pa == NULL) { + return ba; + } else if (ba == NULL || ba > pa) { + return pa; + } else { + return ba; } - return NULL; } -NLM_EXTERN SeqFeatPtr GetmRNAForFeature (SeqFeatPtr sfp) +static Char GetClose (Char ch) { - SeqMgrFeatContext fcontext; - BioseqPtr pbsp; - - if (sfp == NULL) return NULL; - if (sfp->data.choice == SEQFEAT_PROT) - { - pbsp = BioseqFindFromSeqLoc (sfp->location); - sfp = SeqMgrGetCDSgivenProduct (pbsp, NULL); - if (sfp == NULL) return NULL; + if (ch == '(') { + return ')'; + } else if (ch == '[') { + return ']'; + } else if (ch == '{') { + return '}'; + } else { + return ch; } - return SeqMgrGetOverlappingmRNA (sfp->location, &fcontext); } -NLM_EXTERN Boolean AdjustmRNAProductToMatchProteinProduct (SeqFeatPtr sfp) +static Boolean SkipBracketOrParen (CharPtr bp, CharPtr start, CharPtr PNTR skip_to) { - SeqFeatPtr mrna; - ProtRefPtr prp; - RnaRefPtr rrp; + Boolean rval = FALSE; + CharPtr ep, ns; - if (sfp == NULL || sfp->data.choice != SEQFEAT_PROT) return FALSE; + if (bp - start > 2 && StringNCmp (bp - 3, "NAD(P)", 6) == 0) { + rval = TRUE; + *skip_to = bp + 6; + } else if (StringNCmp (bp, "(NAD(P)H)", 9) == 0) { + rval = TRUE; + *skip_to = bp + 9; + } else if (StringNCmp (bp, "(NAD(P))", 8) == 0) { + rval = TRUE; + *skip_to = bp + 8; + } else if (StringNCmp (bp, "(I)", 3) == 0) { + rval = TRUE; + *skip_to = bp + 4; + } else if (StringNCmp (bp, "(II)", 4) == 0) { + rval = TRUE; + *skip_to = bp + 5; + } else if (StringNCmp (bp, "(III)", 5) == 0) { + rval = TRUE; + *skip_to = bp + 6; + } else if (StringNCmp (bp, "(NADPH)", 7) == 0) { + rval = TRUE; + *skip_to = bp + 7; + } else if (StringNCmp (bp, "(NAD+)", 6) == 0) { + rval = TRUE; + *skip_to = bp + 6; + } else if (StringNCmp (bp, "(NAPPH/NADH)", 12) == 0) { + rval = TRUE; + *skip_to = bp + 12; + } else if (StringNCmp (bp, "(NADP+)", 7) == 0) { + rval = TRUE; + *skip_to = bp + 7; + } else if (StringNCmp (bp, "[acyl-carrier protein]", 22) == 0) { + rval = TRUE; + *skip_to = bp + 22; + } else if (StringNCmp (bp, "[acyl-carrier-protein]", 22) == 0) { + rval = TRUE; + *skip_to = bp + 22; + } else if (StringNCmp (bp, "(acyl carrier protein)", 22) == 0) { + rval = TRUE; + *skip_to = bp + 22; + } else { + ns = StringChr (bp + 1, *bp); + ep = StringChr (bp + 1, GetClose(*bp)); + if (ep != NULL && (ns == NULL || ns > ep)) { + if (ep - bp < 5) { + rval = TRUE; + *skip_to = ep + 1; + } else if (ep - bp > 3 && StringNCmp (ep - 3, "ing", 3) == 0) { + rval = TRUE; + *skip_to = ep + 1; + } + } + } + return rval; +} - prp = (ProtRefPtr) sfp->data.value.ptrvalue; - mrna = GetmRNAForFeature (sfp); - if (mrna == NULL) return FALSE; +NLM_EXTERN Boolean ContainsNorMoreSetsOfBracketsOrParentheses (CharPtr search, Int4 n) +{ + CharPtr cp, end; + Int4 num_found = 0; - rrp = (RnaRefPtr) mrna->data.value.ptrvalue; - if (rrp == NULL) - { - rrp = RnaRefNew(); - mrna->data.value.ptrvalue = rrp; + if (search == NULL) { + return FALSE; } - rrp->ext.value.ptrvalue = MemFree (rrp->ext.value.ptrvalue); - if (prp == NULL || prp->name == NULL || StringHasNoText (prp->name->data.ptrvalue)) - { - rrp->ext.choice = 0; + cp = FindFirstOpen(search); + while (num_found < n && cp != NULL && *cp != 0) { + if (SkipBracketOrParen(cp, search, &cp)) { + /* ignore it */ + cp = FindFirstOpen (cp); + } else if ((end = StringChr (cp, GetClose (*cp))) == NULL) { + /* skip, doesn't close the bracket */ + cp = FindFirstOpen (cp + 1); + } else { + cp = FindFirstOpen (end); + num_found ++; + } } - else - { - rrp->ext.choice = 1; - rrp->ext.value.ptrvalue = StringSave (prp->name->data.ptrvalue); + + if (num_found >= n) { + return TRUE; + } else { + return FALSE; } - return TRUE; } -NLM_EXTERN Boolean IsFieldTypeCDSProduct (FieldTypePtr ft) +static Boolean FollowedByFamily (CharPtr PNTR str) { - FeatureFieldPtr field; - Boolean rval = FALSE; + Int4 word_len; - if (ft == NULL) return FALSE; - if (ft->choice == FieldType_feature_field) { - field = (FeatureFieldPtr) ft->data.ptrvalue; - if (field != NULL && field->type == Feature_type_cds - && field->field != NULL - && field->field->choice == FeatQualChoice_legal_qual - && field->field->data.intvalue == Feat_qual_legal_product) { - rval = TRUE; - } - } else if (ft->choice == FieldType_cds_gene_prot) { - if (ft->data.intvalue == CDSGeneProt_field_prot_name) { - rval = TRUE; - } + if (str == NULL || *str == NULL || **str == 0) { + return FALSE; + } + + word_len = StringCSpn (*str + 1, " "); + if (*(*str + word_len + 1) != 0 && StringNCmp (*str + word_len + 2, "family", 6) == 0) { + *str = *str + word_len + 7; + return TRUE; + } else { + return FALSE; } - return rval; } -static Boolean IsFieldTypeProteinDesc (FieldTypePtr ft) +static Boolean InWordBeforeCytochromeOrCoenzyme (CharPtr cp, CharPtr start) { - FeatureFieldPtr field; - Boolean rval = FALSE; + if (cp == NULL) { + return FALSE; + } - if (ft == NULL) return FALSE; - if (ft->choice == FieldType_feature_field) { - field = (FeatureFieldPtr) ft->data.ptrvalue; - if (field != NULL && (field->type == Feature_type_cds || field->type == Feature_type_prot) - && field->field != NULL - && field->field->choice == FeatQualChoice_legal_qual - && field->field->data.intvalue == Feat_qual_legal_description) { - rval = TRUE; - } - } else if (ft->choice == FieldType_cds_gene_prot) { - if (ft->data.intvalue == CDSGeneProt_field_prot_description) { - rval = TRUE; - } + while (cp > start && !isspace (*cp)) { + cp--; + } + if (cp == start) { + return FALSE; + } + while (cp > start && isspace (*cp)) { + cp--; + } + if (cp - start >= 9 && StringNICmp (cp - 9, "cytochrome", 10) == 0) { + return TRUE; + } else if (cp - start >= 7 && StringNCmp (cp - 7, "coenzyme", 8) == 0) { + return TRUE; + } else { + return FALSE; } - return rval; } -static Boolean IsFieldTypeGeneLocusTag (FieldTypePtr ft) +static Boolean PrecededByPrefix (CharPtr search, CharPtr cp, CharPtr prefix) { - FeatureFieldPtr field; - RnaQualPtr rq; - Boolean rval = FALSE; + Int4 len; - if (ft == NULL) return FALSE; - if (ft->choice == FieldType_feature_field) { - field = (FeatureFieldPtr) ft->data.ptrvalue; - if (field != NULL && field->type == Feature_type_gene - && field->field != NULL - && field->field->choice == FeatQualChoice_legal_qual - && field->field->data.intvalue == Feat_qual_legal_locus_tag) { - rval = TRUE; - } - } else if (ft->choice == FieldType_cds_gene_prot) { - if (ft->data.intvalue == CDSGeneProt_field_gene_locus_tag) { - rval = TRUE; - } - } else if (ft->choice == FieldType_rna_field) { - rq = (RnaQualPtr) ft->data.ptrvalue; - if (rq != NULL && rq->field == Rna_field_gene_locus_tag) { - rval = TRUE; - } + if (search == NULL || cp == NULL || StringHasNoText (prefix)) { + return FALSE; + } + len = StringLen (prefix); + if (cp - search >= len && StringNCmp (cp - len, prefix, len) == 0) { + return TRUE; + } else { + return FALSE; } - - return rval; } - -NLM_EXTERN ValNodePtr ValidateTabTableValues (ValNodePtr table, ValNodePtr columns) +NLM_EXTERN Boolean ContainsThreeOrMoreNumbersTogether (CharPtr search) { - ValNodePtr err_list = NULL; - ValNodePtr line_vnp, col_vnp, val_vnp; - Int4 line_num, col_num; - TabColumnConfigPtr t; - ValNodePtr locus_tag_values = NULL, bad_locus_tags = NULL, vnp, tmp_field; - CharPtr bad_format_fmt = "Locus tag %s has incorrect format"; - CharPtr dup_fmt = "Locus tag %s appears in the table more than once"; - CharPtr inconsistent_fmt = "Locus tag prefix for %s is inconsistent"; - CharPtr bad_molinfo_fmt = "'%s' is not a valid value for this field"; - CharPtr err_msg; + CharPtr p; + Int4 num_digits = 0; - if (table == NULL || columns == NULL) { - return NULL; + if (search == NULL) { + return FALSE; } - for (line_vnp = table, line_num = 1; - line_vnp != NULL; - line_vnp = line_vnp->next, line_num++) { - for (val_vnp = line_vnp->data.ptrvalue, col_vnp = columns, col_num = 1; - val_vnp != NULL && col_vnp != NULL; - val_vnp = val_vnp->next, col_vnp = col_vnp->next, col_num++) { - t = (TabColumnConfigPtr) col_vnp->data.ptrvalue; - if (t == NULL || t->match_type != NULL || val_vnp == NULL || StringHasNoText (val_vnp->data.ptrvalue)) { - continue; - } - if (IsFieldTypeGeneLocusTag (t->field)) { - ValNodeAddPointer (&locus_tag_values, 0, val_vnp->data.ptrvalue); - } else if (t->field != NULL && t->field->choice == FieldType_molinfo_field && val_vnp->data.ptrvalue != NULL) { - tmp_field = MolinfoFieldFromFieldAndStringValue (t->field->data.ptrvalue, val_vnp->data.ptrvalue); - if (tmp_field == NULL) { - err_msg =(CharPtr) MemNew (sizeof (Char) * (StringLen(bad_molinfo_fmt) + StringLen (val_vnp->data.ptrvalue))); - sprintf (err_msg, bad_molinfo_fmt, val_vnp->data.ptrvalue); - ValNodeAddPointer (&err_list, 0, err_msg); + p = search; + while (*p != 0) { + if (isdigit (*p)) { + if (PrecededByPrefix(search, p, "DUF") + || PrecededByPrefix(search, p, "UPF") + || PrecededByPrefix(search, p, "IS") + || PrecededByPrefix(search, p, "TIGR")) { + p += StrSpn (p, "0123456789") - 1; + num_digits = 0; + } else if (InWordBeforeCytochromeOrCoenzyme (p, search)) { + p += StrSpn (p, "0123456789") - 1; + num_digits = 0; + } else { + num_digits ++; + if (num_digits == 3) { + if (FollowedByFamily (&p)) { + num_digits = 0; + } else { + return TRUE; + } } } + } else { + num_digits = 0; } + p++; } - - bad_locus_tags = FindBadLocusTagsInList (locus_tag_values); - for (vnp = bad_locus_tags; vnp != NULL; vnp = vnp->next) { - switch (vnp->choice) { - case eLocusTagErrorBadFormat: - err_msg = (CharPtr) MemNew (sizeof (Char) * (StringLen (bad_format_fmt) + StringLen (vnp->data.ptrvalue))); - sprintf (err_msg, bad_format_fmt, vnp->data.ptrvalue); - ValNodeAddPointer (&err_list, 0, err_msg); - break; - case eLocusTagErrorDuplicate: - err_msg = (CharPtr) MemNew (sizeof (Char) * (StringLen (dup_fmt) + StringLen (vnp->data.ptrvalue))); - sprintf (err_msg, dup_fmt, vnp->data.ptrvalue); - ValNodeAddPointer (&err_list, 0, err_msg); - break; - case eLocusTagErrorInconsistentPrefix: - err_msg = (CharPtr) MemNew (sizeof (Char) * (StringLen (inconsistent_fmt) + StringLen (vnp->data.ptrvalue))); - sprintf (err_msg, inconsistent_fmt, vnp->data.ptrvalue); - ValNodeAddPointer (&err_list, 0, err_msg); - break; - } - } - locus_tag_values = ValNodeFree (locus_tag_values); - return err_list; + return FALSE; } -NLM_EXTERN ValNodePtr GetSequenceListsForMatchTypeInTabTable (SeqEntryPtr sep, ValNodePtr table, Int4 col, MatchTypePtr match_type, ValNodePtr PNTR p_err_list) +NLM_EXTERN Boolean StringContainsUnderscore (CharPtr search) { - ValNodePtr vnp_row, vnp; - ValNodePtr sequence_lists = NULL, match_list, target_list; - Uint2 entityID; - Int4 num, line; - CharPtr no_match_fmt = "No match for %s, line %d"; - CharPtr no_match_txt_fmt = "No match text for line %d"; - CharPtr msg; - + CharPtr cp; - if (sep == NULL || table == NULL || match_type == NULL || col < 0) { - return NULL; + if (search == NULL) { + return FALSE; } - entityID = SeqMgrGetEntityIDForSeqEntry (sep); - - for (vnp_row = table, line = 1; vnp_row != NULL; vnp_row = vnp_row->next, line++) { - vnp = vnp_row->data.ptrvalue; - num = 0; - while (vnp != NULL && num < col) { - vnp = vnp->next; - num++; - } - if (vnp == NULL || StringHasNoText (vnp->data.ptrvalue)) { - ValNodeAddPointer (&sequence_lists, 0, NULL); - if (p_err_list != NULL) { - msg = (CharPtr) MemNew (sizeof (Char) * (StringLen (no_match_txt_fmt) + 15)); - sprintf (msg, no_match_txt_fmt, line); - ValNodeAddPointer (p_err_list, 0, msg); - } + cp = StringChr (search, '_'); + while (cp != NULL) { + if (FollowedByFamily (&cp)) { + /* search again */ + cp = StringChr (cp, '_'); + } else if (cp - search < 3 || *(cp + 1) == 0) { + return TRUE; + } else if ((StringNCmp (cp - 3, "MFS", 3) == 0 + || StringNCmp (cp - 3, "TPR", 3) == 0 + || StringNCmp (cp - 3, "AAA", 3) == 0) + && isdigit (*(cp + 1)) && !isdigit (*(cp + 2))) { + cp = StringChr (cp + 1, '_'); } else { - match_list = FindMatchForRow (match_type, vnp->data.ptrvalue, entityID, sep); - target_list = GetSequenceListForRowAndColumn (match_type, match_list); - match_list = ValNodeFree (match_list); - ValNodeAddPointer (&sequence_lists, 0, target_list); - if (target_list == NULL && p_err_list != NULL) { - msg = (CharPtr) MemNew (sizeof (Char) * (StringLen (no_match_fmt) + StringLen (vnp->data.ptrvalue) + 15)); - sprintf (msg, no_match_fmt, vnp->data.ptrvalue, line); - ValNodeAddPointer (p_err_list, 0, msg); - } + return TRUE; } } - return sequence_lists; + return FALSE; +} + + +NLM_EXTERN Boolean ProductContainsTerm (CharPtr pattern, CharPtr search) +{ + CharPtr str; + + /* don't bother searching for c-term or n-term if product name contains "domain" */ + if (StringISearch (search, "domain") != NULL) { + return FALSE; + } + + str = StringISearch(search, pattern); + /* c-term and n-term must be either first word or separated from other word by space, num, or punct */ + if (str != NULL && (str == search || !isalpha (*(str - 1)))) { + return TRUE; + } else { + return FALSE; + } } -NLM_EXTERN ValNodePtr FreeSequenceLists (ValNodePtr lists) +NLM_EXTERN Boolean IsPrefixPlusNumbers (CharPtr prefix, CharPtr search) { - ValNodePtr vnp; + Int4 pattern_len, digit_len; - for (vnp = lists; vnp != NULL; vnp = vnp->next) { - vnp->data.ptrvalue = ValNodeFree (vnp->data.ptrvalue); + if (search == NULL) { + return FALSE; + } + pattern_len = StringLen (prefix); + if (pattern_len > 0 && StringNCmp (search, prefix, pattern_len) != 0) { + return FALSE; + } + + digit_len = StringSpn (search + pattern_len, "1234567890"); + if (digit_len > 0 && *(search + pattern_len + digit_len) == 0) { + return TRUE; + } else { + return FALSE; } - lists = ValNodeFree (lists); - return lists; } -static ValNodePtr ReportTableSummaryLine (Int4 err_lines, Int4 total_lines, CharPtr fmt) +NLM_EXTERN Boolean StringContainsUnbalancedParentheses (CharPtr search) { - CharPtr str; - ValNodePtr vnp; + CharPtr buffer, cp_src; + Int4 pos = 0; + Boolean is_bad = FALSE; - str = (CharPtr) MemNew (sizeof (Char) + (StringLen (fmt) + 30)); - sprintf (str, fmt, err_lines, total_lines); - vnp = ValNodeNew (NULL); - vnp->data.ptrvalue = str; - return vnp; + if (search == NULL) { + return FALSE; + } + + /* note - don't need space for terminating character */ + buffer = MemNew (sizeof (Char) * StringLen (search)); + cp_src = search; + while (*cp_src != 0 && !is_bad) { + if (*cp_src == '(' || *cp_src == '[') { + buffer[pos++] = *cp_src; + } else if (*cp_src == ')') { + if (pos < 1) { + is_bad = TRUE; + } else if (buffer[pos - 1] != '(') { + is_bad = TRUE; + } else { + pos --; + } + } else if (*cp_src == ']') { + if (pos < 1) { + is_bad = TRUE; + } else if (buffer[pos - 1] != '[') { + is_bad = TRUE; + } else { + pos--; + } + } + ++cp_src; + } + + if (pos > 0) { + is_bad = TRUE; + } + buffer = MemFree (buffer); + return is_bad; } -NLM_EXTERN ValNodePtr GetObjectTableForTabTable (SeqEntryPtr sep, ValNodePtr table, ValNodePtr columns, ValNodePtr PNTR p_err_list) +static Boolean MatchesSearchFunc (CharPtr str, SearchFuncPtr search) { - ValNodeBlock vnb; - ValNodePtr line_vnp, val_vnp, col_vnp, err_vnp; - ValNodePtr obj_table = NULL, obj_row, last_obj = NULL, tmp, last = NULL; - Int4 line_num = 1, col_num; - Uint2 entityID; - ValNodePtr match_list, match_choice, target_list; - TabColumnConfigPtr t; - CharPtr err_msg; - CharPtr no_match_fmt = "No match for %s, line %d"; - MatchTypePtr match_type; - Int4 num_empty = 0, num_missing = 0, num_no_targets = 0; - BioseqSearchIndexPtr index = NULL; - - vnb.head = NULL; - vnb.tail = NULL; + Boolean rval = FALSE; - if (sep == NULL) { - ValNodeAddPointerToEnd (&vnb, 0, StringSave ("No SeqEntry")); + if (str == NULL) { + return FALSE; + } else if (search == NULL) { + return TRUE; } - if (table == NULL) { - ValNodeAddPointerToEnd (&vnb, 0, StringSave ("No table")); + + switch (search->choice) { + case SearchFunc_string_constraint: + rval = DoesStringMatchConstraint(str, (StringConstraintPtr) search->data.ptrvalue); + break; + case SearchFunc_contains_plural: + rval = StringMayContainPlural (str); + break; + case SearchFunc_n_or_more_brackets_or_parentheses: + rval = ContainsNorMoreSetsOfBracketsOrParentheses (str, search->data.intvalue); + break; + case SearchFunc_three_numbers: + rval = ContainsThreeOrMoreNumbersTogether (str); + break; + case SearchFunc_underscore: + rval = StringContainsUnderscore (str); + break; + case SearchFunc_prefix_and_numbers: + rval = IsPrefixPlusNumbers (search->data.ptrvalue, str); + break; + case SearchFunc_all_caps: + rval = IsAllCaps (str); + break; + case SearchFunc_unbalanced_paren: + rval = StringContainsUnbalancedParentheses (str); + break; + case SearchFunc_too_long: + if (StringISearch (str, "bifunctional") == NULL && StringISearch (str, "multifunctional") == NULL + && StringLen (str) > search->data.intvalue) { + rval = TRUE; + } + break; + case SearchFunc_has_term: + rval = ProductContainsTerm (search->data.ptrvalue, str); + break; } - if (columns == NULL) { - ValNodeAddPointerToEnd (&vnb, 0, StringSave ("No column information")); + return rval; +} + + +static Boolean MatchesSuspectProductRule (CharPtr str, SuspectRulePtr rule) +{ + if (str == NULL) { + return FALSE; + } else if (rule == NULL) { + return TRUE; } - if (vnb.head != NULL) { - if (p_err_list == NULL) { - vnb.head = ValNodeFreeData (vnb.head); - } else { - *p_err_list = vnb.head; - } - return NULL; + + if (!IsSearchFuncEmpty(rule->find) && !MatchesSearchFunc(str, rule->find)) { + return FALSE; + } else if (!IsSearchFuncEmpty(rule->except) && MatchesSearchFunc (str, rule->except)) { + return FALSE; + } else { + return TRUE; } +} - entityID = SeqMgrGetEntityIDForSeqEntry (sep); - match_type = FindMatchTypeInHeader (columns); - if (match_type == NULL) return NULL; +typedef struct suspectrulecallback { + SuspectRuleSetPtr rules; + ValNodePtr obj_lists; + Uint2 featdef; +} SuspectRuleCallbackData, PNTR SuspectRuleCallbackPtr; - index = BuildIDStringsList(sep); - last = NULL; - for (line_vnp = table, line_num = 1; line_vnp != NULL; line_vnp = line_vnp->next, line_num++) { - obj_row = NULL; - match_choice = FindMatchChoiceInLine (line_vnp->data.ptrvalue, columns); - if (match_choice == NULL || StringHasNoText (match_choice->data.ptrvalue)) { - ReportEmptyIDColumn (&vnb, line_num); - num_empty++; - } else { - match_list = FindMatchForRowEx (match_type, match_choice->data.ptrvalue, entityID, sep, index); - if (match_list == NULL) { - err_msg = (CharPtr) MemNew (sizeof (Char) * (StringLen (no_match_fmt) + StringLen (match_choice->data.ptrvalue) + 15)); - sprintf (err_msg, no_match_fmt, match_choice->data.ptrvalue, line_num); - ValNodeAddPointerToEnd (&vnb, 0, err_msg); - num_missing ++; - } else { - for (val_vnp = line_vnp->data.ptrvalue, col_vnp = columns, col_num = 1; - col_vnp != NULL; - col_vnp = col_vnp->next, col_num++) { - target_list = NULL; - t = (TabColumnConfigPtr) col_vnp->data.ptrvalue; - if (t == NULL || t->match_type != NULL - || (t->skip_blank && (val_vnp == NULL || StringHasNoText (val_vnp->data.ptrvalue)))) { - /* no targets */ - } else { - target_list = GetTargetListForRowAndColumn (match_type, match_list, t->field, t->constraint); - if (target_list == NULL) { - ReportMissingTargets (&vnb, t->field, match_choice->data.ptrvalue, col_num, line_num); - num_no_targets++; - } - } - ValNodeAddPointer (&obj_row, 0, target_list); - if (val_vnp != NULL) { - val_vnp = val_vnp->next; - } +NLM_EXTERN Boolean DoesStringMatchSuspectRule (CharPtr str, SeqFeatPtr sfp, SuspectRulePtr rule) +{ + BioseqPtr bsp; + SeqFeatPtr cds; + Boolean rval = FALSE; + + if (rule == NULL) { + return TRUE; + } + if (MatchesSuspectProductRule(str, rule)) { + /* we want to list the coding region, rather than the protein feature, if we can */ + if (sfp != NULL && sfp->data.choice == SEQFEAT_PROT) { + bsp = BioseqFindFromSeqLoc (sfp->location); + if (bsp != NULL) { + cds = SeqMgrGetCDSgivenProduct (bsp, NULL); + if (cds != NULL) { + sfp = cds; } } } - tmp = ValNodeAddPointer (&last_obj, 0, obj_row); - if (obj_table == NULL) { - obj_table = last_obj; + if (sfp == NULL) { + if (rule->feat_constraint == NULL) { + rval = TRUE; + } + } else if (DoesObjectMatchConstraintChoiceSet (OBJ_SEQFEAT, sfp, rule->feat_constraint)) { + rval = TRUE; } - last_obj = tmp; } + return rval; +} - match_type = MatchTypeFree (match_type); - index = BioseqSearchIndexFree (index); - if (vnb.head != NULL) { - if (num_empty > 0) { - err_vnp = ReportTableSummaryLine (num_empty, line_num - 1, "%d lines out of %d have no ID value"); - err_vnp->next = vnb.head; - vnb.head = err_vnp; +static void SuspectRuleFeatCallback (SeqFeatPtr sfp, Pointer data) +{ + SuspectRuleCallbackPtr s; + ProtRefPtr prp; + SuspectRulePtr rule; + ValNodePtr vnp; + SeqFeatPtr cds; + BioseqPtr bsp; + ValNodePtr list; + SeqFeatPtr report_sfp = sfp; + CharPtr check_val = NULL; + + if (sfp == NULL + || (s = (SuspectRuleCallbackPtr) data) == NULL + || sfp->idx.subtype != s->featdef) { + return; + } + + if (s->featdef == FEATDEF_PROT) { + prp = (ProtRefPtr) sfp->data.value.ptrvalue; + if (prp == NULL || prp->name == NULL) { + return; } - if (num_no_targets > 0) { - err_vnp = ReportTableSummaryLine (num_no_targets, line_num - 1, "%d lines out of %d have no targets"); - err_vnp->next = vnb.head; - vnb.head = err_vnp; + check_val = prp->name->data.ptrvalue; + /* we want to list the coding region, rather than the protein feature, if we can */ + bsp = BioseqFindFromSeqLoc (sfp->location); + if (bsp != NULL) { + cds = SeqMgrGetCDSgivenProduct (bsp, NULL); + if (cds != NULL) { + report_sfp = cds; + } } - if (num_missing > 0) { - err_vnp = ReportTableSummaryLine (num_missing, line_num - 1, "%d lines out of %d have no match"); - err_vnp->next = vnb.head; - vnb.head = err_vnp; + } else if (s->featdef == FEATDEF_rRNA) { + check_val = GetRNAProductString (sfp, NULL); + } + + for (rule = s->rules, vnp = s->obj_lists; rule != NULL; rule = rule->next, vnp = vnp->next) { + /* make sure we have space in the object lists */ + if (vnp == NULL) { + vnp = ValNodeNew (s->obj_lists); + if (s->obj_lists == NULL) { + s->obj_lists = vnp; + } } - if (p_err_list == NULL) { - vnb.head = ValNodeFreeData (vnb.head); - } else { - *p_err_list = vnb.head; + + if (MatchesSuspectProductRule (check_val, rule)) { + if (DoesObjectMatchConstraintChoiceSet (OBJ_SEQFEAT, report_sfp, rule->feat_constraint)) { + list = vnp->data.ptrvalue; + ValNodeAddPointer (&list, OBJ_SEQFEAT, report_sfp); + vnp->data.ptrvalue = list; + } } - } - return obj_table; + } } -NLM_EXTERN ValNodePtr FreeObjectTableForTabTable (ValNodePtr table) +NLM_EXTERN ValNodePtr GetFeaturesForSuspectRules (SeqEntryPtr sep, SuspectRuleSetPtr rules, Uint2 featdef) { - ValNodePtr vnp_next, vnp_row, vnp_row_next; + SuspectRuleCallbackData d; - while (table != NULL) { - vnp_next = table->next; - table->next = NULL; - vnp_row = table->data.ptrvalue; - while (vnp_row != NULL) { - vnp_row_next = vnp_row->next; - vnp_row->next = NULL; - vnp_row->data.ptrvalue = ValNodeFree (vnp_row->data.ptrvalue); - vnp_row = ValNodeFree (vnp_row); - vnp_row = vnp_row_next; - } - table = ValNodeFree (table); - table = vnp_next; - } - return table; + MemSet (&d, 0, sizeof (SuspectRuleCallbackData)); + d.obj_lists = NULL; + d.rules = rules; + d.featdef = featdef; + + VisitFeaturesInSep (sep, &d, SuspectRuleFeatCallback); + return d.obj_lists; } -typedef struct countfeat { - Uint1 featdef; - Int4 num; -} CountFeatData, PNTR CountFeatPtr; +NLM_EXTERN ValNodePtr FreeListOfObjectLists (ValNodePtr list) +{ + ValNodePtr vnp; + for (vnp = list; vnp != NULL; vnp = vnp->next) { + vnp->data.ptrvalue = FreeObjectList (vnp->data.ptrvalue); + } + list = ValNodeFree (list); + return list; +} -static void CountFeaturesCallback (SeqFeatPtr sfp, Pointer userdata) + +NLM_EXTERN Boolean ApplySuspectProductNameFixToString (SuspectRulePtr rule, CharPtr PNTR str) { - CountFeatPtr p; + SimpleReplacePtr simple_replace; + Boolean rval = FALSE; + Boolean use_putative = FALSE; - if (sfp == NULL || userdata == NULL) return; + if (str == NULL || rule == NULL || rule->replace == NULL || rule->replace->replace_func == NULL) { + return FALSE; + } - p = (CountFeatPtr) userdata; - if (sfp->idx.subtype == p->featdef) { - p->num++; + switch (rule->replace->replace_func->choice) { + case ReplaceFunc_simple_replace: + simple_replace = (SimpleReplacePtr) rule->replace->replace_func->data.ptrvalue; + if (simple_replace != NULL) { + if (simple_replace->weasel_to_putative) { + if (SkipWeasel(*str) != *str) { + use_putative = TRUE; + } + } + + if (rule->find == NULL || rule->find->choice != SearchFunc_string_constraint) { + *str = MemFree (*str); + *str = StringSave (simple_replace->replace); + rval = TRUE; + } else if (simple_replace->whole_string && DoesStringMatchConstraint (*str, rule->find->data.ptrvalue)) { + *str = MemFree (*str); + *str = StringSave (simple_replace->replace); + rval = TRUE; + } else { + rval = ReplaceStringConstraintPortionInString (str, simple_replace->replace, rule->find->data.ptrvalue); + } + if (use_putative && StringNCmp (*str, kPutative, StringLen (kPutative)) != 0) { + SetStringValue (str, kPutative, ExistingTextOption_prefix_space); + } + } + break; + case ReplaceFunc_haem_replace: + FindReplaceString (str, rule->replace->replace_func->data.ptrvalue, "heme", FALSE, TRUE); + FindReplaceString (str, rule->replace->replace_func->data.ptrvalue, "hem", FALSE, FALSE); + break; } + return rval; } -static void CountBioSourceDescriptorsCallback (SeqDescrPtr sdp, Pointer userdata) + +NLM_EXTERN Boolean ApplySuspectProductNameFixToFeature (SuspectRulePtr rule, SeqFeatPtr cds, FILE *fp) { - Int4Ptr p; + BioseqPtr protbsp; + SeqFeatPtr protfeat; + SeqMgrFeatContext context; + ProtRefPtr prp; + CharPtr new_name, desc; + Boolean rval = FALSE; + ValNode vn; - p = (Int4Ptr) userdata; - if (sdp != NULL && p != NULL && sdp->choice == Seq_descr_source) { - (*p)++; + if (rule == NULL || rule->replace == NULL || cds == NULL || cds->data.choice != SEQFEAT_CDREGION) { + return FALSE; + } + + protbsp = BioseqFindFromSeqLoc (cds->product); + protfeat = SeqMgrGetNextFeature (protbsp, NULL, 0, FEATDEF_PROT, &context); + if (protfeat == NULL || protfeat->idx.subtype != FEATDEF_PROT + || (prp = (ProtRefPtr) protfeat->data.value.ptrvalue) == NULL + || prp->name == NULL) { + return FALSE; + } + new_name = StringSave (prp->name->data.ptrvalue); + if (ApplySuspectProductNameFixToString (rule, &new_name)) { + if (fp != NULL) { + fprintf (fp, "Changed '%s' to '%s'", prp->name->data.ptrvalue == NULL ? "" : (CharPtr) prp->name->data.ptrvalue, new_name); + } + if (rule->replace->move_to_note) { + if (SetStringValue (&(cds->comment), prp->name->data.ptrvalue, ExistingTextOption_append_semi)) { + if (fp != NULL) { + fprintf (fp, " and moved original to note"); + } + } + } + prp->name->data.ptrvalue = MemFree (prp->name->data.ptrvalue); + prp->name->data.ptrvalue = new_name; + if (AdjustmRNAProductToMatchProteinProduct(protfeat)) { + if (fp != NULL) { + fprintf (fp, " and adjusted mRNA"); + } + } + if (fp != NULL) { + MemSet (&vn, 0, sizeof (ValNode)); + vn.choice = OBJ_SEQFEAT; + vn.data.ptrvalue = cds; + desc = GetDiscrepancyItemText (&vn); + if (desc != NULL) { + fprintf (fp, " for %s", desc); + desc = MemFree (desc); + } + fprintf (fp, "\n"); + } + rval = TRUE; + } else { + new_name = MemFree (new_name); } + return rval; } -static void CountPubDescriptorsCallback (SeqDescrPtr sdp, Pointer userdata) +static CharPtr TextFromSearchFunc (ValNodePtr s) { - Int4Ptr p; + StringConstraintPtr scp; - p = (Int4Ptr) userdata; - if (sdp != NULL && p != NULL && sdp->choice == Seq_descr_pub) { - (*p)++; + CharPtr rval = NULL; + + if (s == NULL) { + return NULL; } + switch (s->choice) { + case SearchFunc_string_constraint: + scp = (StringConstraintPtr) s->data.ptrvalue; + if (scp != NULL) { + rval = scp->match_text; + } + break; + case SearchFunc_contains_plural: + case SearchFunc_n_or_more_brackets_or_parentheses: + case SearchFunc_three_numbers: + case SearchFunc_all_caps: + case SearchFunc_unbalanced_paren: + case SearchFunc_too_long: + /* no text */ + break; + case SearchFunc_underscore: + rval = "_"; + break; + case SearchFunc_prefix_and_numbers: + case SearchFunc_has_term: + rval = s->data.ptrvalue; + break; + } + return rval; } -static ValNodePtr CountObjectsForColumnFields (SeqEntryPtr sep, ValNodePtr columns) +static int CompareSearchFunc (ValNodePtr s1, ValNodePtr s2) { - ValNodePtr count_list = NULL, vnp; - TabColumnConfigPtr t; - CountFeatData d; - FeatureFieldPtr f; - Int4 num; - Uint1 featdef = 0; - ValNodePtr tmp_list = NULL; + CharPtr txt1, txt2; + int rval; - d.featdef = 0; - d.num = 0; - for (vnp = columns; vnp != NULL; vnp = vnp->next) { - num = 0; - t = (TabColumnConfigPtr) vnp->data.ptrvalue; - if (t != NULL && t->match_type == NULL && t->field != NULL) { - switch (t->field->choice) { - case FieldType_source_qual: - if (featdef != FEATDEF_BIOSRC) { - d.featdef = FEATDEF_BIOSRC; - d.num = 0; - VisitFeaturesInSep (sep, &d, CountFeaturesCallback); - VisitDescriptorsInSep (sep, &(d.num), CountBioSourceDescriptorsCallback); - } - num = d.num; - break; - case FieldType_feature_field: - f = (FeatureFieldPtr) t->field->data.ptrvalue; - if (f != NULL) { - featdef = GetFeatdefFromFeatureType(f->type); - if (featdef != d.featdef) { - d.featdef = featdef; - d.num = 0; - VisitFeaturesInSep (sep, &d, CountFeaturesCallback); - } - num = d.num; - } - break; - case FieldType_cds_gene_prot: - f = FeatureFieldFromCDSGeneProtField (t->field->data.intvalue); - if (f != NULL) { - featdef = GetFeatdefFromFeatureType(f->type); - if (featdef != d.featdef) { - d.featdef = featdef; - d.num = 0; - VisitFeaturesInSep (sep, &d, CountFeaturesCallback); - } - num = d.num; - } - f = FeatureFieldFree (f); - break; - case FieldType_rna_field: - f = FeatureFieldFromRnaQual (t->field->data.ptrvalue); - if (f != NULL) { - featdef = GetFeatdefFromFeatureType(f->type); - if (featdef != d.featdef) { - d.featdef = featdef; - d.num = 0; - VisitFeaturesInSep (sep, &d, CountFeaturesCallback); - } - num = d.num; - } - f = FeatureFieldFree (f); - break; - case FieldType_pub: - d.featdef = FEATDEF_PUB; - d.num = 0; - VisitFeaturesInSep (sep, &d, CountFeaturesCallback); - VisitDescriptorsInSep (sep, &(d.num), CountPubDescriptorsCallback); - num = d.num; - break; - case FieldType_struc_comment_field: - VisitDescriptorsInSep (sep, &tmp_list, CollectStructuredCommentsCallback); - num = ValNodeLen (tmp_list); - tmp_list = ValNodeFree (tmp_list); - break; - case FieldType_misc: - if (t->field->data.intvalue == Misc_field_genome_project_id) { - VisitBioseqsInSep (sep, &tmp_list, CollectNucBioseqCallback); - num = ValNodeLen (tmp_list); - tmp_list = ValNodeFree (tmp_list); - } else if (t->field->data.intvalue == Misc_field_comment_descriptor) { - tmp_list = CollectCommentDescriptors (sep); - num = ValNodeLen (tmp_list); - tmp_list = ValNodeFree (tmp_list); - } else if (t->field->data.intvalue == Misc_field_defline) { - tmp_list = CollectDeflineDescriptors (sep); - num = ValNodeLen (tmp_list); - tmp_list = ValNodeFree (tmp_list); - } else if (t->field->data.intvalue == Misc_field_keyword) { - tmp_list = CollectGenbankBlockDescriptors (sep); - num = ValNodeLen (tmp_list); - tmp_list = ValNodeFree (tmp_list); - } - break; - case FieldType_molinfo_field: - VisitBioseqsInSep (sep, &tmp_list, CollectBioseqCallback); - num = ValNodeLen (tmp_list); - tmp_list = ValNodeFree (tmp_list); - break; + if (s1 == NULL && s2 == NULL) { + rval = 0; + } else if (s1 == NULL) { + rval = -1; + } else if (s2 == NULL) { + rval = 1; + } else { + txt1 = TextFromSearchFunc (s1); + txt2 = TextFromSearchFunc (s2); + rval = StringICmp (txt1, txt2); + if (rval == 0) { + if (s1->choice < s2->choice) { + rval = -1; + } else if (s1->choice > s2->choice) { + rval = 1; } } - ValNodeAddInt (&count_list, 0, num); } - return count_list; + return rval; } + - -NLM_EXTERN ValNodePtr ApplyTableValuesToObjectTable (SeqEntryPtr sep, ValNodePtr table, ValNodePtr columns, ValNodePtr obj_table) +static int CompareSuspectRuleByFind (SuspectRulePtr rule1, SuspectRulePtr rule2) { - ValNodePtr val_line_vnp, obj_line_vnp; - ValNodePtr val_vnp, obj_vnp, col_vnp; - ValNodePtr target_vnp, tmp_field; - TabColumnConfigPtr t; - CharPtr val, qual_name; - ValNodePtr err_list = NULL, count_list, count_affected_list = NULL, count_vnp, count_tot_vnp; - CharPtr err_msg; - CharPtr bad_col_val_fmt = "Did not set value for column %d, line %d"; - CharPtr num_affected_fmt = "%d fields affected"; - CharPtr col_num_affected_fmt = "For %s (column %d), %d items were affected out of %d total"; - Int4 num_fields_affected = 0, col_num, line_num, num_this_column; - Boolean success; - ValNodePtr count_msg = NULL; - - count_list = CountObjectsForColumnFields (sep, columns); + int rval = 0; - for (val_line_vnp = table, obj_line_vnp = obj_table, line_num = 1; - val_line_vnp != NULL && obj_line_vnp != NULL; - val_line_vnp = val_line_vnp->next, obj_line_vnp = obj_line_vnp->next, line_num++) { - val_vnp = val_line_vnp->data.ptrvalue; - obj_vnp = obj_line_vnp->data.ptrvalue; - col_vnp = columns; - col_num = 1; - count_vnp = count_affected_list; - while (obj_vnp != NULL && col_vnp != NULL) { - num_this_column = 0; - if (obj_vnp->data.ptrvalue != NULL) { - t = (TabColumnConfigPtr) col_vnp->data.ptrvalue; - if (t == NULL || t->match_type != NULL - || (t->skip_blank && (val_vnp == NULL || StringHasNoText (val_vnp->data.ptrvalue)))) { - /* ignore column or skip blank value */ - } else { - if (val_vnp == NULL || val_vnp->data.ptrvalue == NULL) { - val = ""; - } else { - val = val_vnp->data.ptrvalue; - } - for (target_vnp = obj_vnp->data.ptrvalue; target_vnp != NULL; target_vnp = target_vnp->next) { - if (val[0] == 0) { - success = RemoveFieldValueForObject (target_vnp->choice, target_vnp->data.ptrvalue, t->field, NULL); - } else { - if (t->field != NULL && t->field->choice == FieldType_molinfo_field) { - success = FALSE; - if (target_vnp->choice == OBJ_BIOSEQ) { - tmp_field = MolinfoFieldFromFieldAndStringValue (t->field->data.ptrvalue, val_vnp->data.ptrvalue); - if (tmp_field != NULL) { - success = SetSequenceQualOnBioseq ((BioseqPtr) target_vnp->data.ptrvalue, tmp_field); - tmp_field = MolinfoFieldFree(tmp_field); - } - } - } else { - success = SetFieldValueForObject (target_vnp->choice, target_vnp->data.ptrvalue, t->field, NULL, - val_vnp->data.ptrvalue, t->existing_text); - } - } - if (success) { - num_fields_affected++; - num_this_column++; - if (t->match_mrna && IsFieldTypeCDSProduct (t->field) - && target_vnp->choice == OBJ_SEQFEAT) { - if (AdjustmRNAProductToMatchProteinProduct (target_vnp->data.ptrvalue)) { - num_fields_affected++; - } - } - } else { - err_msg = (CharPtr) MemNew (sizeof (Char) * (StringLen (bad_col_val_fmt) + 30)); - sprintf (err_msg, bad_col_val_fmt, col_num, line_num); - ValNodeAddPointer (&err_list, 0, err_msg); - } - } - } - } - if (val_vnp != NULL) { - val_vnp = val_vnp->next; - } - if (count_vnp == NULL) { - ValNodeAddInt (&count_affected_list, 0, num_this_column); - } else { - count_vnp->data.intvalue += num_this_column; - count_vnp = count_vnp->next; - } - obj_vnp = obj_vnp->next; - col_vnp = col_vnp->next; - col_num++; - } + if (rule1 == NULL && rule2 == NULL) { + rval = 0; + } else if (rule1 == NULL) { + rval = -1; + } else if (rule2 == NULL) { + rval = 1; + } else if ((rval = CompareSearchFunc (rule1->find, rule2->find)) != 0) { + /* no further comparisons */ } - /* put message at top of list for number of fields affected */ - err_msg = (CharPtr) MemNew (sizeof (Char) * (StringLen (num_affected_fmt) + 15)); - sprintf (err_msg, num_affected_fmt, num_fields_affected); - ValNodeAddPointer (&count_msg, 0, err_msg); + return rval; +} - /* if any affected, list number of fields per column, and the total in the record */ - if (num_fields_affected > 0) { - for (count_vnp = count_affected_list, count_tot_vnp = count_list, col_vnp = columns, col_num = 1; - count_vnp != NULL && count_tot_vnp != NULL && col_vnp != NULL; - count_vnp = count_vnp->next, count_tot_vnp = count_tot_vnp->next, col_vnp = col_vnp->next, col_num++) { - t = (TabColumnConfigPtr) col_vnp->data.ptrvalue; - if (t != NULL && t->match_type == NULL) { - qual_name = SummarizeFieldType (t->field); - err_msg = (CharPtr) MemNew (sizeof (Char) * (StringLen (col_num_affected_fmt) + StringLen (qual_name) + 45)); - sprintf (err_msg, col_num_affected_fmt, qual_name, col_num, count_vnp->data.intvalue, count_tot_vnp->data.intvalue); - ValNodeAddPointer (&count_msg, 0, err_msg); - qual_name = MemFree (qual_name); - } + +static int LIBCALLBACK SortVnpBySuspectRuleFind (VoidPtr ptr1, VoidPtr ptr2) +{ + ValNodePtr vnp1; + ValNodePtr vnp2; + int rval = 0; + + if (ptr1 != NULL && ptr2 != NULL) { + vnp1 = *((ValNodePtr PNTR) ptr1); + vnp2 = *((ValNodePtr PNTR) ptr2); + if (vnp1 == NULL && vnp2 == NULL) { + rval = 0; + } else if (vnp1 == NULL) { + rval = -1; + } else if (vnp2 == NULL) { + rval = 1; + } else { + rval = CompareSuspectRuleByFind (vnp1->data.ptrvalue, vnp2->data.ptrvalue); } } + return rval; +} - ValNodeLink (&count_msg, err_list); - count_list = ValNodeFree (count_list); - count_affected_list = ValNodeFree (count_affected_list); +static int CompareSuspectRuleByFixTypeThenFind (SuspectRulePtr rule1, SuspectRulePtr rule2) +{ + int rval = 0; - return count_msg; -} + if (rule1 == NULL && rule2 == NULL) { + rval = 0; + } else if (rule1 == NULL) { + rval = -1; + } else if (rule2 == NULL) { + rval = 1; + } else if (rule1->rule_type < rule2->rule_type) { + rval = -1; + } else if (rule1->rule_type > rule2->rule_type) { + rval = 1; + } else if ((rval = CompareSearchFunc (rule1->find, rule2->find)) != 0) { + /* no further comparisons */ + } + return rval; +} -static int LIBCALLBACK SortVnpByChoiceAndPtrvalue (VoidPtr ptr1, VoidPtr ptr2) +static int LIBCALLBACK SortVnpBySuspectRuleFixTypeThenFind (VoidPtr ptr1, VoidPtr ptr2) { ValNodePtr vnp1; ValNodePtr vnp2; + int rval = 0; if (ptr1 != NULL && ptr2 != NULL) { vnp1 = *((ValNodePtr PNTR) ptr1); vnp2 = *((ValNodePtr PNTR) ptr2); - if (vnp1 != NULL && vnp2 != NULL) { - if (vnp1->choice > vnp2->choice) { - return 1; - } else if (vnp1->choice < vnp2->choice) { - return -1; - } else if (vnp1->data.ptrvalue > vnp2->data.ptrvalue) { - return 1; - } else if (vnp1->data.ptrvalue < vnp2->data.ptrvalue) { - return -1; - } else { - return 0; - } + if (vnp1 == NULL && vnp2 == NULL) { + rval = 0; + } else if (vnp1 == NULL) { + rval = -1; + } else if (vnp2 == NULL) { + rval = 1; + } else { + rval = CompareSuspectRuleByFixTypeThenFind (vnp1->data.ptrvalue, vnp2->data.ptrvalue); } } - return 0; + return rval; } -static ValNodePtr FindRowsForObjectInObjectTable (ValNodePtr obj_table, Int4 column, Uint1 choice, Pointer data) +static ValNodePtr MakeValNodeListFromSuspectRuleSet (SuspectRuleSetPtr rules) { - Int4 col_num, row_num; - ValNodePtr line_vnp, col_vnp, obj_vnp; - ValNodePtr match_rows = NULL; + ValNodeBlock block; + SuspectRulePtr one; - if (obj_table == NULL || column < 0) { - return NULL; + InitValNodeBlock (&block, NULL); + for (one = rules; one != NULL; one = one->next) { + ValNodeAddPointerToEnd (&block, 0, one); } + return block.head; +} - for (line_vnp = obj_table, row_num = 0; line_vnp != NULL; line_vnp = line_vnp->next, row_num++) { - col_vnp = line_vnp->data.ptrvalue; - col_num = 0; - while (col_num < column && col_vnp != NULL) { - col_vnp = col_vnp->next; - col_num++; + +static SuspectRuleSetPtr MakeSuspectRuleSetFromValNodeList (ValNodePtr tmp_list) +{ + ValNodePtr vnp; + SuspectRuleSetPtr first = NULL, last = NULL; + + for (vnp = tmp_list; vnp != NULL; vnp = vnp->next) { + if (last == NULL) { + first = vnp->data.ptrvalue; + } else { + last->next = vnp->data.ptrvalue; } - if (col_vnp != NULL) { - obj_vnp = col_vnp->data.ptrvalue; - while (obj_vnp != NULL && (obj_vnp->choice != choice || obj_vnp->data.ptrvalue != data)) { - obj_vnp = obj_vnp->next; - } - if (obj_vnp != NULL) { - ValNodeAddInt (&match_rows, 0, row_num); + last = vnp->data.ptrvalue; + last->next = NULL; + } + return first; +} + + +NLM_EXTERN void SortSuspectRuleSetByFind (SuspectRuleSetPtr PNTR rules) +{ + ValNodePtr tmp_list; + + if (rules == NULL || *rules == NULL) { + return; + } + + tmp_list = MakeValNodeListFromSuspectRuleSet (*rules); + tmp_list = ValNodeSort (tmp_list, SortVnpBySuspectRuleFind); + + *rules = MakeSuspectRuleSetFromValNodeList (tmp_list); + tmp_list = ValNodeFree (tmp_list); +} + + +NLM_EXTERN void SortSuspectRuleSetByFixTypeThenFind (SuspectRuleSetPtr PNTR rules) +{ + ValNodePtr tmp_list; + + if (rules == NULL || *rules == NULL) { + return; + } + + tmp_list = MakeValNodeListFromSuspectRuleSet (*rules); + tmp_list = ValNodeSort (tmp_list, SortVnpBySuspectRuleFixTypeThenFind); + + *rules = MakeSuspectRuleSetFromValNodeList (tmp_list); + tmp_list = ValNodeFree (tmp_list); +} + + +NLM_EXTERN void PrintSuspectRuleMatches (SeqEntryPtr sep, SuspectRuleSetPtr rules, FILE *fp) +{ + ValNodePtr vnp_l, vnp_o, obj_lists; + SuspectRulePtr rule; + CharPtr summ; + + if (sep == NULL || rules == NULL || fp == NULL) { + return; + } + + obj_lists = GetFeaturesForSuspectRules (sep, rules, FEATDEF_PROT); + + for (vnp_l = obj_lists, rule = rules; vnp_l != NULL && rule != NULL; vnp_l = vnp_l->next, rule = rule->next) { + if (ValNodeLen (vnp_l->data.ptrvalue) > 0) { + summ = SummarizeSuspectRule (rule); + fprintf (fp, "%s:%d\n", summ, ValNodeLen (vnp_l->data.ptrvalue)); + summ = MemFree (summ); + for (vnp_o = vnp_l->data.ptrvalue; vnp_o != NULL; vnp_o = vnp_o->next) { + summ = GetDiscrepancyItemText (vnp_o); + fprintf (fp, "\t%s", summ); + summ = MemFree (summ); } } } - return match_rows; + + obj_lists = FreeListOfObjectLists (obj_lists); } -static CharPtr FormatMultipleDestinationErrorMessage (Int4 col_num, ValNodePtr match_rows) +NLM_EXTERN ValNodePtr +GetSuspectRuleDiscrepancies +(SeqEntryPtr sep, + SuspectRuleSetPtr rules, + Uint2 featdef, + Uint4 clickable_item_type) { - CharPtr multi_fmt = "Multiple rows apply to the same object for column %d. Matching rows:"; - CharPtr err_msg; - Char buf[16]; - ValNodePtr vnp; + ValNodePtr vnp_l, obj_lists, rval = NULL; + SuspectRulePtr rule; + CharPtr summ; + CharPtr rna_fmt = "%%d rRNA product names %s"; + CharPtr cds_fmt = "%%d product names %s"; + CharPtr template_fmt; + CharPtr fmt; - err_msg = (CharPtr) MemNew (sizeof (Char) * (StringLen (multi_fmt) - + 30 + 15 * ValNodeLen (match_rows))); - sprintf (err_msg, multi_fmt, col_num); - for (vnp = match_rows; vnp != NULL; vnp = vnp->next) { - sprintf (buf, "%d", vnp->data.intvalue + 1); - StringCat (err_msg, buf); - if (vnp->next != NULL) { - StringCat (err_msg, ","); + if (sep == NULL || rules == NULL) { + return NULL; + } + + obj_lists = GetFeaturesForSuspectRules (sep, rules, featdef); + if (featdef == FEATDEF_rRNA) { + template_fmt = rna_fmt; + } else { + template_fmt = cds_fmt; + } + + for (vnp_l = obj_lists, rule = rules; vnp_l != NULL && rule != NULL; vnp_l = vnp_l->next, rule = rule->next) { + if (ValNodeLen (vnp_l->data.ptrvalue) > 0) { + summ = SummarizeSuspectRule (rule); + fmt = (CharPtr) MemNew (sizeof (Char) * (StringLen (summ) + StringLen (template_fmt))); + + sprintf (fmt, template_fmt, summ); + summ = MemFree (summ); + ValNodeAddPointer (&rval, 0, NewClickableItem (clickable_item_type, fmt, vnp_l->data.ptrvalue)); + vnp_l->data.ptrvalue = NULL; + fmt = MemFree (fmt); } } - return err_msg; + + obj_lists = FreeListOfObjectLists (obj_lists); + return rval; } -NLM_EXTERN ValNodePtr CheckObjTableForRowsThatApplyToTheSameDestination (ValNodePtr obj_table) +NLM_EXTERN Int4 ApplySuspectRuleFixesToSeqEntry (SeqEntryPtr sep, SuspectRuleSetPtr rules, FILE *fp) { - Int4 col_num; - ValNodeBlock vnb, err_list; - ValNodePtr line_vnp, col_vnp, obj_vnp, vnp; - ValNodePtr col_obj_list; - Boolean any_column_values_left; - ValNodePtr match_rows; - - vnb.head = NULL; - vnb.tail = NULL; - err_list.head = NULL; - err_list.tail = NULL; + ValNodePtr vnp_l, vnp_o, obj_lists; + SuspectRulePtr rule; + CharPtr summ; + Int4 num_changed = 0, total_num_changed = 0; + Uint2 entityID; - /* now, for each row, get pointer to first column */ - for (line_vnp = obj_table; line_vnp != NULL; line_vnp = line_vnp->next) { - ValNodeAddPointerToEnd (&vnb, 0, line_vnp->data.ptrvalue); + if (sep == NULL || rules == NULL) { + return 0; } - /* now for each column, make a list of all features in the column, then sort to see if there are duplicates */ - any_column_values_left = TRUE; - col_num = 1; - while (any_column_values_left) { - any_column_values_left = FALSE; - col_obj_list = NULL; - for (vnp = vnb.head; vnp != NULL; vnp = vnp->next) { - col_vnp = vnp->data.ptrvalue; - if (col_vnp != NULL) { - obj_vnp = col_vnp->data.ptrvalue; - ValNodeLink (&col_obj_list, ValNodeCopyPtr (obj_vnp)); - vnp->data.ptrvalue = col_vnp->next; - any_column_values_left = TRUE; - } + obj_lists = GetFeaturesForSuspectRules (sep, rules, FEATDEF_PROT); + + for (vnp_l = obj_lists, rule = rules; vnp_l != NULL && rule != NULL; vnp_l = vnp_l->next, rule = rule->next) { + if (rule->replace == NULL || vnp_l->data.ptrvalue == NULL) { + continue; } - if (col_obj_list != NULL) { - col_obj_list = ValNodeSort (col_obj_list, SortVnpByChoiceAndPtrvalue); - for (vnp = col_obj_list; vnp != NULL && vnp->next != NULL; vnp = vnp->next) { - if (vnp->choice == vnp->next->choice - && vnp->data.ptrvalue == vnp->next->data.ptrvalue) { - match_rows = FindRowsForObjectInObjectTable (obj_table, col_num - 1, vnp->choice, vnp->data.ptrvalue); - /* report rows with matches */ - ValNodeAddPointerToEnd (&err_list, col_num, FormatMultipleDestinationErrorMessage (col_num, match_rows)); - match_rows = ValNodeFree (match_rows); - /* skip over the cluster of matches */ - while (vnp->next != NULL && vnp->choice == vnp->next->choice) { - vnp = vnp->next; - } - } + if (fp != NULL) { + summ = SummarizeSuspectRule (rule); + fprintf (fp, "%s:%d identified\n", summ, ValNodeLen (vnp_l->data.ptrvalue)); + summ = MemFree (summ); + } + num_changed = 0; + for (vnp_o = vnp_l->data.ptrvalue; vnp_o != NULL; vnp_o = vnp_o->next) { + if (ApplySuspectProductNameFixToFeature (rule, vnp_o->data.ptrvalue, fp)) { + num_changed++; } - col_obj_list = ValNodeFree (col_obj_list); } - col_num++; + if (fp != NULL) { + fprintf (fp, "Num fixed: %d\n", num_changed); + } + total_num_changed += num_changed; } - vnb.head = ValNodeFree (vnb.head); - return err_list.head; + entityID = ObjMgrGetEntityIDForChoice(sep); + ObjMgrSetDirtyFlag (entityID, TRUE); + ObjMgrSendMsg (OM_MSG_UPDATE, entityID, 0, 0); + + obj_lists = FreeListOfObjectLists (obj_lists); + return total_num_changed; } -static CharPtr GetMatchTextForLine (ValNodePtr values, ValNodePtr columns) +typedef struct rulesort { + SuspectRulePtr rule; + Int4 pos; +} RuleSortData, PNTR RuleSortPtr; + +static RuleSortPtr RuleSortNew (SuspectRulePtr rule, Int4 pos) { - ValNodePtr val_vnp, col_vnp; - CharPtr match_txt = NULL; - TabColumnConfigPtr t; + RuleSortPtr r; - for (val_vnp = values, col_vnp = columns; - val_vnp != NULL && col_vnp != NULL; - val_vnp = val_vnp->next, col_vnp = col_vnp->next) { - t = col_vnp->data.ptrvalue; - if (t != NULL && t->match_type != NULL) { - match_txt = val_vnp->data.ptrvalue; - break; + r = (RuleSortPtr) MemNew (sizeof (RuleSortData)); + r->rule = AsnIoMemCopy (rule, (AsnReadFunc)SuspectRuleAsnRead, (AsnWriteFunc) SuspectRuleAsnWrite); + r->pos = pos; + return r; +} + + +static RuleSortPtr RuleSortFree (RuleSortPtr r) +{ + if (r != NULL) { + r->rule = SuspectRuleFree (r->rule); + r = MemFree (r); + } + return r; +} + + +static int LIBCALLBACK SortVnpByRuleSortRule (VoidPtr ptr1, VoidPtr ptr2) +{ + ValNodePtr vnp1; + ValNodePtr vnp2; + RuleSortPtr r1, r2; + int rval = 0; + + if (ptr1 != NULL && ptr2 != NULL) { + vnp1 = *((ValNodePtr PNTR) ptr1); + vnp2 = *((ValNodePtr PNTR) ptr2); + if (vnp1 == NULL && vnp2 == NULL) { + rval = 0; + } else if (vnp1 == NULL) { + rval = -1; + } else if (vnp2 == NULL) { + rval = 1; + } else { + r1 = (RuleSortPtr) vnp1->data.ptrvalue; + r2 = (RuleSortPtr) vnp2->data.ptrvalue; + rval = CompareSuspectRuleByFixTypeThenFind (r1->rule, r2->rule); } } - return match_txt; + return rval; } -/* Note - when creating error messages, mark summary messages with choice = 1 */ -NLM_EXTERN ValNodePtr CheckObjTableForExistingText (SeqEntryPtr sep, ValNodePtr table, ValNodePtr columns, ValNodePtr obj_table) +static int LIBCALLBACK SortVnpByRuleSortPos (VoidPtr ptr1, VoidPtr ptr2) { - ValNodeBlock vnb; - ValNodePtr val_line_vnp, obj_line_vnp; - ValNodePtr val_vnp, obj_vnp, col_vnp; - ValNodePtr col_tot = NULL, col_tot_vnp; - Int4 line_num = 1, col_num, num_existing_text = 0; - Uint2 entityID; - TabColumnConfigPtr t; - CharPtr err_msg, str, qual_name, val; - CharPtr already_has_val_fmt = "%s\t%s\t%s\t%d\t%s\t%d"; - CharPtr num_existing_text_fmt = "%d fields already have text.\nID\tOld Value\tReplacement\tColumn\tQualifier\tLine"; - CharPtr mrna_warn_fmt = "%d coding region features have mRNAs, but %d do not."; - CharPtr col_tot_fmt = "For column %d, %d out of %d fields already have text."; - ValNodePtr target_list, feat_vnp; - Int4 num_with_mrna = 0, num_without_mrna = 0; - CharPtr match_txt; - CharPtr new_val; + ValNodePtr vnp1; + ValNodePtr vnp2; + RuleSortPtr r1, r2; + int rval = 0; + + if (ptr1 != NULL && ptr2 != NULL) { + vnp1 = *((ValNodePtr PNTR) ptr1); + vnp2 = *((ValNodePtr PNTR) ptr2); + if (vnp1 == NULL && vnp2 == NULL) { + rval = 0; + } else if (vnp1 == NULL) { + rval = -1; + } else if (vnp2 == NULL) { + rval = 1; + } else { + r1 = (RuleSortPtr) vnp1->data.ptrvalue; + r2 = (RuleSortPtr) vnp2->data.ptrvalue; + if (r1->pos < r2->pos) { + rval = -1; + } else if (r1->pos > r2->pos) { + rval = 1; + } else { + rval = 0; + } + } + } + return rval; +} - vnb.head = NULL; - vnb.tail = NULL; - if (sep == NULL) { - ValNodeAddPointerToEnd (&vnb, 1, StringSave ("No SeqEntry")); - } - if (table == NULL) { - ValNodeAddPointerToEnd (&vnb, 1, StringSave ("No table")); - } - if (columns == NULL) { - ValNodeAddPointerToEnd (&vnb, 1, StringSave ("No column information")); +static ValNodePtr SuspectRuleSetToRuleSortList (SuspectRuleSetPtr set) +{ + ValNodeBlock block; + SuspectRulePtr rule; + Int4 pos; + + InitValNodeBlock (&block, NULL); + for (rule = set, pos = 0; rule != NULL; rule = rule->next, pos++) { + ValNodeAddPointerToEnd (&block, 0, RuleSortNew (rule, pos)); } - if (vnb.head != NULL) { - return vnb.head; + return block.head; +} + + +static SuspectRuleSetPtr RuleSortListToSuspectRuleSet (ValNodePtr list) +{ + ValNodePtr vnp; + SuspectRuleSetPtr set = NULL; + SuspectRulePtr last = NULL; + RuleSortPtr r; + + for (vnp = list; vnp != NULL; vnp = vnp->next) { + r = (RuleSortPtr) vnp->data.ptrvalue; + if (r->rule != NULL) { + if (last == NULL) { + set = r->rule; + } else { + last->next = r->rule; + } + last = r->rule; + r->rule = NULL; + } } + return set; +} - entityID = SeqMgrGetEntityIDForSeqEntry (sep); - for (val_line_vnp = table, obj_line_vnp = obj_table, line_num = 1; - val_line_vnp != NULL && obj_line_vnp != NULL; - val_line_vnp = val_line_vnp->next, obj_line_vnp = obj_line_vnp->next, line_num++) { - val_vnp = val_line_vnp->data.ptrvalue; - obj_vnp = obj_line_vnp->data.ptrvalue; - col_vnp = columns; - if (val_vnp == NULL || obj_vnp == NULL) continue; - col_num = 1; - col_tot_vnp = col_tot; - if (col_tot_vnp == NULL) { - col_tot_vnp = ValNodeAddInt (&col_tot, 0, 0); +NLM_EXTERN void FindDiffsBetweenRuleSets (SuspectRuleSetPtr set1, SuspectRuleSetPtr set2, SuspectRuleSetPtr PNTR in1not2, SuspectRuleSetPtr PNTR in2not1) +{ + ValNodePtr list1, list2; + ValNodePtr vnp1, vnp2, cmp_start; + RuleSortPtr r1, r2; + Boolean found_match; + + /* eliminate duplicates, while maintaining original order */ + list1 = SuspectRuleSetToRuleSortList(set1); + list1 = ValNodeSort(list1, SortVnpByRuleSortRule); + list2 = SuspectRuleSetToRuleSortList(set2); + list2 = ValNodeSort(list2, SortVnpByRuleSortRule); + + cmp_start = list2; + for (vnp1 = list1; vnp1 != NULL; vnp1 = vnp1->next) { + r1 = (RuleSortPtr) vnp1->data.ptrvalue; + for (vnp2 = cmp_start; vnp2 != NULL && (vnp2->choice == 1 || SortVnpByRuleSortRule(&vnp1, &vnp2) > 0); vnp2 = vnp2->next) { + cmp_start = vnp2; } - while (obj_vnp != NULL && col_vnp != NULL) { - if (obj_vnp->data.ptrvalue != NULL) { - t = (TabColumnConfigPtr) col_vnp->data.ptrvalue; - if (t == NULL || t->match_type != NULL - || (t->skip_blank && (val_vnp == NULL || StringHasNoText (val_vnp->data.ptrvalue)))) { - /* ignore column or skip blank value */ - } else { - target_list = obj_vnp->data.ptrvalue; - if (val_vnp == NULL || val_vnp->data.ptrvalue == NULL) { - val = ""; - } else { - val = val_vnp->data.ptrvalue; - } - for (feat_vnp = target_list; feat_vnp != NULL; feat_vnp = feat_vnp->next) { - /* check for existing text */ - str = GetFieldValueForObject (feat_vnp->choice, feat_vnp->data.ptrvalue, t->field, NULL); - if (!StringHasNoText (str)) { - qual_name = SummarizeFieldType (t->field); - match_txt = GetMatchTextForLine (val_line_vnp->data.ptrvalue, columns); - if (match_txt == NULL) { - match_txt = ""; - } - new_val = StringSave (str); - SetStringValue (&new_val, val, t->existing_text); - err_msg = (CharPtr) MemNew (sizeof (Char) * (StringLen (already_has_val_fmt) - + StringLen (match_txt) - + StringLen (str) - + StringLen (new_val) - + StringLen (qual_name) - + 30)); - sprintf (err_msg, already_has_val_fmt, match_txt, str, new_val, col_num, qual_name, line_num); - ValNodeAddPointerToEnd (&vnb, 0, err_msg); - num_existing_text ++; - new_val = MemFree (new_val); - col_tot_vnp->data.intvalue ++; - } - str = MemFree (str); - /* check for mrna if changing CDS product */ - if (IsFieldTypeCDSProduct (t->field) && feat_vnp->choice == OBJ_SEQFEAT) { - if (GetmRNAForFeature (feat_vnp->data.ptrvalue) != NULL) { - num_with_mrna++; - } else { - num_without_mrna++; - } - } - } + found_match = FALSE; + while (vnp2 != NULL && (vnp2->choice == 1 || SortVnpByRuleSortRule(&vnp1, &vnp2) == 0) && !found_match) { + if (vnp2->data.ptrvalue != NULL) { + r2 = (RuleSortPtr) vnp2->data.ptrvalue; + if (AsnIoMemComp (r1->rule, r2->rule, (AsnWriteFunc) SuspectRuleAsnWrite)) { + found_match = TRUE; } } - if (val_vnp != NULL) { - val_vnp = val_vnp->next; - } - obj_vnp = obj_vnp->next; - col_vnp = col_vnp->next; - col_num++; - col_tot_vnp = col_tot_vnp->next; - if (col_tot_vnp == NULL) { - col_tot_vnp = ValNodeAddInt (&col_tot, 0, 0); - } - } - } - if (num_existing_text > 0) { - for (col_tot_vnp = col_tot, col_num = 1; col_tot_vnp != NULL; col_tot_vnp = col_tot_vnp->next, col_num++) { - if (col_tot_vnp->data.intvalue > 0) { - err_msg = (CharPtr) MemNew (sizeof (Char) * (StringLen (col_tot_fmt) + 45)); - sprintf (err_msg, col_tot_fmt, col_num, col_tot_vnp->data.intvalue, line_num - 1); - ValNodeAddPointerToEnd (&vnb, 1, err_msg); + if (!found_match) { + vnp2 = vnp2->next; } } - - err_msg = (CharPtr) MemNew (sizeof (Char) * (StringLen (num_existing_text_fmt) - + 15)); - sprintf (err_msg, num_existing_text_fmt, num_existing_text); - ValNodeAddPointerToFront (&vnb, 0, err_msg); + if (found_match) { + vnp1->data.ptrvalue = RuleSortFree(vnp1->data.ptrvalue); + vnp1->choice = 1; + vnp2->data.ptrvalue = RuleSortFree(vnp2->data.ptrvalue); + vnp2->choice = 1; + } } - col_tot = ValNodeFree (col_tot); - if (num_with_mrna > 0 && num_without_mrna > 0) { - err_msg = (CharPtr) MemNew (sizeof (Char) * (StringLen (mrna_warn_fmt) - + 30)); - sprintf (err_msg, mrna_warn_fmt, num_with_mrna, num_without_mrna); - ValNodeAddPointerToFront (&vnb, 1, err_msg); - } - return vnb.head; + vnp1 = ValNodeExtractList (&list1, 1); + vnp1 = ValNodeFree (vnp1); + vnp2 = ValNodeExtractList (&list2, 1); + vnp2 = ValNodeFree (vnp2); + + list1 = ValNodeSort (list1, SortVnpByRuleSortPos); + list2 = ValNodeSort (list2, SortVnpByRuleSortPos); + + *in1not2 = RuleSortListToSuspectRuleSet (list1); + *in2not1 = RuleSortListToSuspectRuleSet (list2); + list1 = ValNodeFreeData (list1); + list2 = ValNodeFreeData (list2); } -NLM_EXTERN ValNodePtr ApplyTableToFeatures (SeqEntryPtr sep, ValNodePtr table, ValNodePtr columns) +static Boolean ReportRuleSetProblems (CharPtr product_name, SuspectRuleSetPtr rule_list, FILE *output_file, CharPtr prefix) { - ValNodeBlock vnb; - ValNodePtr line_vnp, val_vnp, col_vnp; - Int4 line_num = 1, col_num; - Uint2 entityID; - ValNodePtr match_list, match_choice, target_list, feat_vnp; - TabColumnConfigPtr t; - CharPtr err_msg; - CharPtr no_match_fmt = "No match for %s, line %d"; - CharPtr bad_col_val_fmt = "Did not set value for column %d, line %d"; - CharPtr num_affected_fmt = "%d fields affected"; - Int4 num_fields_affected = 0; - CharPtr val; - Boolean success; - MatchTypePtr match_type; + CharPtr summ; + SuspectRulePtr rule; + Boolean any_found = FALSE; + + /* report with rule set */ + for (rule = rule_list; rule != NULL; rule = rule->next) { + if (MatchesSuspectProductRule (product_name, rule)) { + summ = SummarizeSuspectRule(rule); + if (output_file == NULL) { + if (prefix != NULL) { + printf ("%s\t", prefix); + } + printf ("%s\t%s\n", product_name, summ); + } else { + if (prefix != NULL) { + fprintf (output_file, "%s\t", prefix); + } + fprintf (output_file, "%s\t%s\n", product_name, summ); + } + summ = MemFree (summ); + any_found = TRUE; + } + } + return any_found; +} - vnb.head = NULL; - vnb.tail = NULL; - if (sep == NULL) { - ValNodeAddPointerToEnd (&vnb, 0, StringSave ("No SeqEntry")); - } - if (table == NULL) { - ValNodeAddPointerToEnd (&vnb, 0, StringSave ("No table")); - } - if (columns == NULL) { - ValNodeAddPointerToEnd (&vnb, 0, StringSave ("No column information")); +NLM_EXTERN Boolean FindSuspectProductNamesInEntrezGene (FILE *input_file, SuspectRuleSetPtr rule_list, FILE *output_file) +{ + EntrezgenePtr egp; + AsnIoPtr aip; + ValNodePtr vnp; + ProtRefPtr prp; + GeneRefPtr grp; + CharPtr prefix = NULL; + Char geneid_buf[20]; + + aip = AsnIoNew (ASNIO_TEXT_IN, input_file, NULL, NULL, NULL); + egp = EntrezgeneAsnRead (aip, NULL); + if (egp == NULL) { + return FALSE; } - if (vnb.head != NULL) { - return vnb.head; + + /* scan */ + if (egp->prot != NULL) { + if (egp->track_info != NULL && egp->track_info->geneid > 0) { + sprintf (geneid_buf, "%d", egp->track_info->geneid); + prefix = geneid_buf; + } else if (egp->gene != NULL) { + grp = (GeneRefPtr) egp->gene; + if (grp->locus_tag == NULL) { + prefix = grp->locus; + } else { + prefix = grp->locus_tag; + } + } + prp = (ProtRefPtr) egp->prot; + for (vnp = prp->name; vnp != NULL; vnp = vnp->next) { + if (rule_list == NULL) { + ReportProductNameProblems (vnp->data.ptrvalue, output_file, prefix); + } else { + ReportRuleSetProblems (vnp->data.ptrvalue, rule_list, output_file, prefix); + } + } } - match_type = FindMatchTypeInHeader (columns); + egp = EntrezgeneFree (egp); + return TRUE; +} - entityID = SeqMgrGetEntityIDForSeqEntry (sep); - for (line_vnp = table, line_num = 1; line_vnp != NULL; line_vnp = line_vnp->next, line_num++) { - match_choice = FindMatchChoiceInLine (line_vnp->data.ptrvalue, columns); - if (match_choice == NULL || StringHasNoText (match_choice->data.ptrvalue)) { - ReportEmptyIDColumn (&vnb, line_num); +NLM_EXTERN void FindSuspectProductNamesInNameList (FILE *input_file, SuspectRuleSetPtr rule_list, FILE *output_file) +{ + ReadBufferData rbd; + CharPtr line; + + rbd.fp = input_file; + rbd.current_data = NULL; + + line = AbstractReadFunction (&rbd); + while (line != NULL) + { + if (rule_list == NULL) { + ReportProductNameProblems (line, output_file, NULL); } else { - match_list = FindMatchForRow (match_type, match_choice->data.ptrvalue, entityID, sep); - if (match_list == NULL) { - err_msg = (CharPtr) MemNew (sizeof (Char) * (StringLen (no_match_fmt) + StringLen (match_choice->data.ptrvalue) + 15)); - sprintf (err_msg, no_match_fmt, match_choice->data.ptrvalue, line_num); - ValNodeAddPointerToEnd (&vnb, 0, err_msg); - } else { - for (val_vnp = line_vnp->data.ptrvalue, col_vnp = columns, col_num = 1; - col_vnp != NULL; - col_vnp = col_vnp->next, col_num++) { - t = (TabColumnConfigPtr) col_vnp->data.ptrvalue; - if (t == NULL || t->match_type != NULL - || (t->skip_blank && (val_vnp == NULL || StringHasNoText (val_vnp->data.ptrvalue)))) { - if (val_vnp != NULL) { - val_vnp = val_vnp->next; - } - continue; - } - - target_list = GetTargetListForRowAndColumn (match_type, match_list, t->field, t->constraint); - if (target_list == NULL) { - ReportMissingTargets (&vnb, t->field, match_choice->data.ptrvalue, col_num, line_num); - } else { - if (val_vnp == NULL || val_vnp->data.ptrvalue == NULL) { - val = ""; - } else { - val = val_vnp->data.ptrvalue; - } - for (feat_vnp = target_list; feat_vnp != NULL; feat_vnp = feat_vnp->next) { - if (val[0] == 0) { - success = RemoveFieldValueForObject (feat_vnp->choice, feat_vnp->data.ptrvalue, t->field, NULL); - } else { - success = SetFieldValueForObject (feat_vnp->choice, feat_vnp->data.ptrvalue, t->field, NULL, - val_vnp->data.ptrvalue, t->existing_text); - } - if (success) { - num_fields_affected++; - if (t->match_mrna && IsFieldTypeCDSProduct (t->field) - && feat_vnp->choice == OBJ_SEQFEAT) { - if (AdjustmRNAProductToMatchProteinProduct (feat_vnp->data.ptrvalue)) { - num_fields_affected++; - } - } - } else { - err_msg = (CharPtr) MemNew (sizeof (Char) * (StringLen (bad_col_val_fmt) + 30)); - sprintf (err_msg, bad_col_val_fmt, col_num, line_num); - ValNodeAddPointerToEnd (&vnb, 0, err_msg); - } - } - } - target_list = ValNodeFree (target_list); - if (val_vnp != NULL) { - val_vnp = val_vnp->next; - } - } - } - match_list = ValNodeFree (match_list); + ReportRuleSetProblems (line, rule_list, output_file, NULL); } + + line = MemFree (line); + line = AbstractReadFunction (&rbd); + } +} + + +/* code for special product table update */ +typedef struct productupdatetableitem { + CharPtr product_match; + CharPtr new_name; + CharPtr note_text; +} ProductUpdateTableItemData, PNTR ProductUpdateTableItemPtr; + + +static ProductUpdateTableItemPtr ProductUpdateTableItemNew (CharPtr product_match) +{ + ProductUpdateTableItemPtr item; + + item = (ProductUpdateTableItemPtr) MemNew (sizeof (ProductUpdateTableItemData)); + MemSet (item, 0, sizeof (ProductUpdateTableItemData)); + item->product_match = product_match; + return item; +} + + +static ProductUpdateTableItemPtr ProductUpdateTableItemFree (ProductUpdateTableItemPtr item) +{ + if (item != NULL) { + item->product_match = MemFree (item->product_match); + item->new_name = MemFree (item->new_name); + item->note_text = MemFree (item->note_text); + item = MemFree (item); + } + return item; +} + + +static void ProductUpdateTableItemWrite (FILE *fp, ProductUpdateTableItemPtr item) +{ + if (fp == NULL || item == NULL || StringHasNoText (item->product_match)) { + return; } - - err_msg = (CharPtr) MemNew (sizeof (Char) * (StringLen (num_affected_fmt) + 15)); - sprintf (err_msg, num_affected_fmt, num_fields_affected); - ValNodeAddPointerToFront (&vnb, 0, err_msg); - match_type = MatchTypeFree (match_type); - return vnb.head; + fprintf (fp, "%s", item->product_match); + if (!StringHasNoText (item->new_name)) { + fprintf (fp, "\tX\t%s", StringICmp (item->new_name, "hypothetical protein") == 0 ? "" : item->new_name); + if (!StringHasNoText (item->note_text)) { + fprintf (fp, "\tX\t%s", StringCmp (item->note_text, item->product_match) == 0 ? "" : item->note_text); + } + } + fprintf (fp, "\n"); } -NLM_EXTERN ValNodePtr CheckTableForExistingText (SeqEntryPtr sep, ValNodePtr table, ValNodePtr columns) + +NLM_EXTERN ValNodePtr ProductUpdateTableFree (ValNodePtr list) { - ValNodeBlock vnb; - ValNodePtr line_vnp, val_vnp, col_vnp; - Int4 line_num = 1, col_num, num_existing_text = 0; - Uint2 entityID; - TabColumnConfigPtr t; - CharPtr err_msg, str, qual_name, val; - CharPtr no_match_fmt = "No match for %s, line %d"; - CharPtr already_has_val_fmt = "%s already has value '%s' (column %d), line %d. Replacement is '%s'"; - CharPtr num_existing_text_fmt = "%d fields already have text."; - ValNodePtr match_choice, match_list; - ValNodePtr target_list, feat_vnp; - MatchTypePtr match_type; + ValNodePtr list_next; - vnb.head = NULL; - vnb.tail = NULL; - if (sep == NULL) { - ValNodeAddPointerToEnd (&vnb, 1, StringSave ("No SeqEntry")); + while (list != NULL) { + list_next = list->next; + list->next = NULL; + list->data.ptrvalue = ProductUpdateTableItemFree (list->data.ptrvalue); + list = ValNodeFree (list); + list = list_next; } - if (table == NULL) { - ValNodeAddPointerToEnd (&vnb, 1, StringSave ("No table")); + return list; +} + + +static void TrimBeginningAndEndingQuotes (CharPtr str) +{ + CharPtr src, dst; + + if (str == NULL) { + return; } - if (columns == NULL) { - ValNodeAddPointerToEnd (&vnb, 1, StringSave ("No column information")); + if (*str == '"') { + src = str + 1; + dst = src; + while (*src != 0) { + *dst = *src; + dst++; + src++; + } + *dst = 0; } - if (vnb.head != NULL) { - return vnb.head; + dst = str + StringLen(str) - 1; + if (*dst == '"') { + *dst = 0; } +} - match_type = FindMatchTypeInHeader (columns); - if (match_type == NULL) return NULL; - entityID = SeqMgrGetEntityIDForSeqEntry (sep); +static ProductUpdateTableItemPtr ProductUpdateTableItemFromValNodeList (ValNodePtr column_list) +{ + ProductUpdateTableItemPtr item; + ValNodePtr vnp; - for (line_vnp = table, line_num = 1; line_vnp != NULL; line_vnp = line_vnp->next, line_num++) { - match_choice = FindMatchChoiceInLine (line_vnp->data.ptrvalue, columns); - if (match_choice == NULL || StringHasNoText (match_choice->data.ptrvalue)) { - ReportEmptyIDColumn (&vnb, line_num); - if (vnb.head == NULL) { - vnb.head = vnb.tail; - } + if (column_list == NULL || StringHasNoText (column_list->data.ptrvalue) + || column_list->next == NULL + || StringICmp (column_list->next->data.ptrvalue, "X") != 0) { + return NULL; + } + + item = ProductUpdateTableItemNew(column_list->data.ptrvalue); + column_list->data.ptrvalue = NULL; + vnp = column_list->next->next; + + /* get new product name. Default to hypothetical protein if not specified */ + if (vnp == NULL || StringHasNoText (vnp->data.ptrvalue)) { + item->new_name = StringSave ("hypothetical protein"); + } else { + item->new_name = vnp->data.ptrvalue; + vnp->data.ptrvalue = NULL; + } + if (vnp != NULL) { + vnp = vnp->next; + } + + /* find out if note is required */ + if (vnp != NULL && StringCmp (vnp->data.ptrvalue, "X") == 0) { + if (vnp->next == NULL || StringHasNoText (vnp->next->data.ptrvalue)) { + item->note_text = StringSave (item->product_match); } else { - match_list = FindMatchForRow (match_type, match_choice->data.ptrvalue, entityID, sep); - if (match_list == NULL) { - err_msg = (CharPtr) MemNew (sizeof (Char) * (StringLen (no_match_fmt) + StringLen (match_choice->data.ptrvalue) + 15)); - sprintf (err_msg, no_match_fmt, match_choice->data.ptrvalue, line_num); - ValNodeAddPointerToEnd (&vnb, 0, err_msg); - } else { - for (val_vnp = line_vnp->data.ptrvalue, col_vnp = columns, col_num = 1; - col_vnp != NULL; - col_vnp = col_vnp->next, col_num++) { - t = (TabColumnConfigPtr) col_vnp->data.ptrvalue; - if (t == NULL || t->match_type != NULL - || (t->skip_blank && (val_vnp == NULL || StringHasNoText (val_vnp->data.ptrvalue)))) { - if (val_vnp != NULL) { - val_vnp = val_vnp->next; - } - continue; - } - target_list = GetTargetListForRowAndColumn (match_type, match_list, t->field, t->constraint); - if (target_list == NULL) { - ReportMissingTargets (&vnb, t->field, match_choice->data.ptrvalue, col_num, line_num); - } else { - if (val_vnp == NULL || val_vnp->data.ptrvalue == NULL) { - val = ""; - } else { - val = val_vnp->data.ptrvalue; - } - for (feat_vnp = target_list; feat_vnp != NULL; feat_vnp = feat_vnp->next) { - str = GetFieldValueForObject (feat_vnp->choice, feat_vnp->data.ptrvalue, t->field, NULL); - if (!StringHasNoText (str)) { - qual_name = SummarizeFieldType (t->field); - err_msg = (CharPtr) MemNew (sizeof (Char) * (StringLen (already_has_val_fmt) - + StringLen (qual_name) + StringLen (str) - + StringLen (val) - + 30)); - sprintf (err_msg, already_has_val_fmt, qual_name, str, col_num, line_num, val); - ValNodeAddPointerToEnd (&vnb, col_num, err_msg); - num_existing_text ++; - } - str = MemFree (str); - } - } - target_list = ValNodeFree (target_list); - if (val_vnp != NULL) { - val_vnp = val_vnp->next; - } - } + item->note_text = vnp->next->data.ptrvalue; + vnp->next->data.ptrvalue = NULL; + } + } + return item; +} + + +NLM_EXTERN ValNodePtr ReadProductUpdateTable (FILE *fp) +{ + ReadBufferData rbd; + CharPtr line; + ValNodeBlock line_list; + ValNodePtr column_list; + ProductUpdateTableItemPtr item; + + if (fp == NULL) return NULL; + rbd.fp = fp; + rbd.current_data = NULL; + + InitValNodeBlock (&line_list, NULL); + + line = AbstractReadFunction (&rbd); + while (line != NULL) + { + column_list = ReadOneColumnList (line); + if (column_list != NULL) { + TrimBeginningAndEndingQuotes(column_list->data.ptrvalue); + item = ProductUpdateTableItemFromValNodeList(column_list); + if (item != NULL) { + ValNodeAddPointerToEnd (&line_list, 0, item); } - match_list = ValNodeFree (match_list); + column_list = ValNodeFreeData (column_list); } - } - if (num_existing_text > 0) { - err_msg = (CharPtr) MemNew (sizeof (Char) * (StringLen (num_existing_text_fmt) - + 15)); - sprintf (err_msg, num_existing_text_fmt, num_existing_text); - ValNodeAddPointerToFront (&vnb, 0, err_msg); + line = AbstractReadFunction (&rbd); } + return line_list.head; +} - return vnb.head; + +static void WriteProductUpdateTable (FILE *fp, ValNodePtr table) +{ + ValNodePtr vnp; + ProductUpdateTableItemPtr item; + + for (vnp = table; vnp != NULL; vnp = vnp->next) { + item = (ProductUpdateTableItemPtr) vnp->data.ptrvalue; + if (item != NULL && !StringHasNoText (item->product_match)) { + ProductUpdateTableItemWrite(fp, item); + } + } } -/* Reporting functions for SMART */ -static void GetDescriptorPubTitles (SeqDescrPtr sdp, Pointer userdata) +static ProductUpdateTableItemPtr GetProductUpdateTableItemForProduct (CharPtr product, ValNodePtr list) { - CharPtr title; + ProductUpdateTableItemPtr item; + ValNodePtr vnp; - if (sdp == NULL || sdp->choice != Seq_descr_pub || userdata == NULL) { - return; + if (StringHasNoText (product) || list == NULL) { + return NULL; } - title = GetPubFieldFromObject (OBJ_SEQDESC, sdp, Publication_field_title, NULL); - if (title != NULL) { - ValNodeAddPointer ((ValNodePtr PNTR) userdata, 0, title); + for (vnp = list; vnp != NULL; vnp = vnp->next) { + if ((item = (ProductUpdateTableItemPtr) vnp->data.ptrvalue) != NULL + && StringCmp (product, item->product_match) == 0) { + return item; + } } + return NULL; } -static void GetFeaturePubTitles (SeqFeatPtr sfp, Pointer userdata) +typedef struct productupdate { + ValNodePtr table; + FILE *log_fp; + Boolean any_change; +} ProductUpdateData, PNTR ProductUpdatePtr; + +static void ApplyProductUpdateCallback (SeqFeatPtr sfp, Pointer data) { - CharPtr title; + ProductUpdatePtr pd; + BioseqPtr pbsp; + SeqFeatPtr prot; + ProtRefPtr prp = NULL; + SeqMgrFeatContext context; + ProductUpdateTableItemPtr item = NULL; + Char buf[255]; + ValNodePtr vnp; + Boolean adjusted_mrna; - if (sfp == NULL || sfp->data.choice != SEQFEAT_PUB || userdata == NULL) { + if (sfp == NULL || sfp->data.choice != SEQFEAT_CDREGION + || (pd = (ProductUpdatePtr) data) == NULL) { return; } - title = GetPubFieldFromObject (OBJ_SEQFEAT, sfp, Publication_field_title, NULL); - if (title != NULL) { - ValNodeAddPointer ((ValNodePtr PNTR) userdata, 0, title); + pbsp = BioseqFindFromSeqLoc (sfp->product); + prot = SeqMgrGetNextFeature (pbsp, NULL, 0, FEATDEF_PROT, &context); + if (prot == NULL || (prp = (ProtRefPtr) prot->data.value.ptrvalue) == NULL) { + prp = GetProtRefForFeature(sfp); + } + if (prp != NULL && prp->name != NULL) { + item = GetProductUpdateTableItemForProduct (prp->name->data.ptrvalue, pd->table); + + if (item != NULL) { + prp->name->data.ptrvalue = MemFree (prp->name->data.ptrvalue); + prp->name->data.ptrvalue = StringSave (item->new_name); + if (item->note_text != NULL) { + SetStringValue (&(sfp->comment), item->note_text, ExistingTextOption_append_semi); + } + /* also need to move ec numbers to note, if any, for hypothetical protein */ + if (StringICmp (item->new_name, "hypothetical protein") == 0 + && prp->ec != NULL) { + SetStringValue (&(sfp->comment), " EC_number=", ExistingTextOption_append_semi); + SetStringValue (&(sfp->comment), prp->ec->data.ptrvalue, ExistingTextOption_append_none); + for (vnp = prp->ec->next; vnp != NULL; vnp = vnp->next) { + SetStringValue (&(sfp->comment), vnp->data.ptrvalue, ExistingTextOption_append_comma); + } + } + + adjusted_mrna = AdjustmRNAProductToMatchProteinProduct(prot); + + pd->any_change = TRUE; + if (pd->log_fp != NULL) { + SeqIdWrite (SeqIdFindBest (pbsp->id, SEQID_GENBANK), buf, PRINTID_REPORT, sizeof (buf) - 1); + fprintf (pd->log_fp, "%s\t%s\t%s\t%s\t%s\n", buf, item->product_match, item->new_name, + item->note_text == NULL ? "" : item->note_text, + adjusted_mrna ? "Adjusted mRNA" : ""); + } + } } } -NLM_EXTERN ValNodePtr GetPublicationTitlesInSep (SeqEntryPtr sep) +NLM_EXTERN Boolean ApplyProductUpdateTable (ValNodePtr table, SeqEntryPtr sep, FILE *log_fp) { - ValNodePtr title_list = NULL; + ProductUpdateData pd; - VisitDescriptorsInSep (sep, &title_list, GetDescriptorPubTitles); - VisitFeaturesInSep (sep, &title_list, GetFeaturePubTitles); - return title_list; + if (table == NULL || sep == NULL) { + return FALSE; + } + + MemSet (&pd, 0, sizeof (ProductUpdateData)); + pd.table = table; + pd.log_fp = log_fp; + + VisitFeaturesInSep (sep, &pd, ApplyProductUpdateCallback); + return pd.any_change; } -NLM_EXTERN ValNodePtr GetPublicationTitlesOnSep (SeqEntryPtr sep) +static void ExportProductUpdateTableCallback (SeqFeatPtr sfp, Pointer data) { - ValNodePtr title_list = NULL; + ProtRefPtr prp; - VisitDescriptorsOnSep (sep, &title_list, GetDescriptorPubTitles); - VisitFeaturesOnSep (sep, &title_list, GetFeaturePubTitles); - return title_list; + if (sfp == NULL || data == NULL) { + return; + } + if (sfp->data.choice == SEQFEAT_PROT + && (prp = (ProtRefPtr)sfp->data.value.ptrvalue) != NULL + && prp->name != NULL + && !StringHasNoText (prp->name->data.ptrvalue)) { + ValNodeAddPointerToEnd ((ValNodeBlockPtr) data, 0, StringSave (prp->name->data.ptrvalue)); + } else if (sfp->data.choice == SEQFEAT_CDREGION + && (prp = GetProtRefForFeature(sfp)) != NULL + && prp->name != NULL + && !StringHasNoText (prp->name->data.ptrvalue)) { + ValNodeAddPointerToEnd ((ValNodeBlockPtr) data, 0, StringSave (prp->name->data.ptrvalue)); + } } -static void GetBankitCommentsCallback (SeqDescrPtr sdp, Pointer userdata) +NLM_EXTERN void ExportProductUpdateTable (SeqEntryPtr sep, FILE *fp) { - UserObjectPtr uop; - ObjectIdPtr oip; - UserFieldPtr ufp; + ValNodeBlock block; + ValNodePtr vnp; - if (sdp == NULL || sdp->choice != Seq_descr_user || userdata == NULL) { + if (sep == NULL || fp == NULL) { return; } + InitValNodeBlock (&block, NULL); - uop = (UserObjectPtr) sdp->data.ptrvalue; - if (uop != NULL && StringCmp (uop->_class, "SMART_V1.0") != 0) { - oip = uop->type; - if (oip != NULL && StringCmp (oip->str, "Submission") == 0) { - for (ufp = uop->data; ufp != NULL; ufp = ufp->next) { - oip = ufp->label; - if (oip != NULL - && StringCmp (oip->str, "AdditionalComment") == 0 - && !StringHasNoText (ufp->data.ptrvalue)) { - ValNodeAddPointer ((ValNodePtr PNTR) userdata, 0, StringSave (ufp->data.ptrvalue)); - } - } - } + VisitFeaturesInSep (sep, &block, ExportProductUpdateTableCallback); + + block.head = ValNodeSort (block.head, SortVnpByString); + ValNodeUnique (&(block.head), SortVnpByString, ValNodeFreeData); + + for (vnp = block.head; vnp != NULL; vnp = vnp->next) { + fprintf (fp, "%s\n", (CharPtr) vnp->data.ptrvalue); } + block.head = ValNodeFreeData (block.head); } -NLM_EXTERN ValNodePtr GetBankitCommentsInSep (SeqEntryPtr sep) +static Boolean ApplySuspectProductNameFixToProductUpdateTableItem (SuspectRulePtr rule, ProductUpdateTableItemPtr item) { - ValNodePtr comment_list = NULL; + CharPtr new_name; + Boolean rval = FALSE; - VisitDescriptorsInSep (sep, &comment_list, GetBankitCommentsCallback); - return comment_list; + if (rule == NULL || rule->replace == NULL || item == NULL || StringHasNoText (item->product_match)) { + return FALSE; + } + + if (item->new_name == NULL) { + new_name = StringSave (item->product_match); + } else { + new_name = StringSave (item->new_name); + } + if (ApplySuspectProductNameFixToString (rule, &new_name)) { + item->new_name = MemFree (item->new_name); + item->note_text = MemFree (item->note_text); + item->new_name = new_name; + if (rule->replace->move_to_note) { + item->note_text = StringSave (item->product_match); + } + rval = TRUE; + } else { + new_name = MemFree (new_name); + } + return rval; } -NLM_EXTERN ValNodePtr GetBankitCommentsOnSep (SeqEntryPtr sep) +static Boolean ApplySuspectProductNameFixesToProductUpdateTable (SuspectRuleSetPtr rule_set, ValNodePtr table) { - ValNodePtr comment_list = NULL; + SuspectRulePtr rule; + ValNodePtr vnp; + Boolean rval = FALSE, this_rule_apply, this_rule_match; + ProductUpdateTableItemPtr item; - VisitDescriptorsOnSep (sep, &comment_list, GetBankitCommentsCallback); - return comment_list; + if (rule_set == NULL || table == NULL) { + return FALSE; + } + + for (vnp = table; vnp != NULL; vnp = vnp->next) { + this_rule_apply = FALSE; + this_rule_match = FALSE; + item = (ProductUpdateTableItemPtr) vnp->data.ptrvalue; + for (rule = rule_set; rule != NULL; rule = rule->next) { + if (ApplySuspectProductNameFixToProductUpdateTableItem (rule, item)) { + this_rule_apply = TRUE; + } else if (!this_rule_apply && !this_rule_match) { + this_rule_match = MatchesSuspectProductRule (item->product_match, rule); + } + } + if (!this_rule_apply && this_rule_match) { + item->new_name = StringSave ("hypothetical protein"); + item->note_text = StringSave (item->product_match); + } + } + return rval; } +NLM_EXTERN void ExportProductUpdateTableWithPrecomputedSuggestions (FILE *fp, SeqEntryPtr sep, SuspectRuleSetPtr rules) +{ + ValNodeBlock block; + ValNodePtr vnp; + ProductUpdateTableItemPtr item; + + if (sep == NULL || fp == NULL) { + return; + } + InitValNodeBlock (&block, NULL); + + VisitFeaturesInSep (sep, &block, ExportProductUpdateTableCallback); + + block.head = ValNodeSort (block.head, SortVnpByString); + ValNodeUnique (&(block.head), SortVnpByString, ValNodeFreeData); + + for (vnp = block.head; vnp != NULL; vnp = vnp->next) { + item = ProductUpdateTableItemNew(vnp->data.ptrvalue); + vnp->data.ptrvalue = item; + } + + ApplySuspectProductNameFixesToProductUpdateTable (rules, block.head); + + WriteProductUpdateTable (fp, block.head); + + block.head = ProductUpdateTableFree (block.head); +} diff --git a/api/macroapi.h b/api/macroapi.h index 54509a60..f1868baf 100644 --- a/api/macroapi.h +++ b/api/macroapi.h @@ -29,7 +29,7 @@ * * Version Creation Date: 11/15/2007 * -* $Revision: 1.93 $ +* $Revision: 1.129 $ * * File Description: * @@ -155,6 +155,7 @@ NLM_EXTERN ValNodePtr GetFieldTypeListFromAECRAction (AECRActionPtr action); NLM_EXTERN Uint1 GetBiomolForRnaType (Int4 rnatype); NLM_EXTERN CharPtr GetBiomolNameForRnaType (Int4 rnatype); NLM_EXTERN void AddAllRNASubtypesToChoiceList (ValNodePtr PNTR field_list); +NLM_EXTERN ValNodePtr MakeFeatureFieldField (Uint2 ftype, Int4 legalqual); /* source qual functions */ NLM_EXTERN CharPtr GetSourceQualFromBioSource (BioSourcePtr biop, SourceQualChoicePtr scp, StringConstraintPtr constraint); @@ -190,13 +191,25 @@ NLM_EXTERN CharPtr GettmRNATagPeptide (RnaRefPtr rrp, StringConstraintPtr scp); NLM_EXTERN Boolean SetncRNAClass (RnaRefPtr rrp, StringConstraintPtr scp, CharPtr new_val, Uint2 existing_text); NLM_EXTERN CharPtr GetncRNAClass (RnaRefPtr rrp, StringConstraintPtr scp); +/* Structured Comment functions */ +NLM_EXTERN CharPtr GetStructuredCommentFieldFromUserObject (UserObjectPtr uop, StructuredCommentFieldPtr field, StringConstraintPtr scp); +NLM_EXTERN Boolean IsUserObjectStructuredComment (UserObjectPtr uop); - +/* Publication functions */ NLM_EXTERN CharPtr GetPubFieldLabel (Int4 pub_field); NLM_EXTERN ValNodePtr GetPubFieldList (void); NLM_EXTERN CharPtr GetPubFieldFromPub (PubPtr the_pub, Int4 field, StringConstraintPtr scp); NLM_EXTERN Int4 GetPubMLStatus (PubPtr the_pub); +/* DBLink functions */ +NLM_EXTERN Int4 GetNumDBLinkFields (void); +NLM_EXTERN CharPtr GetDBLinkNameFromDBLinkFieldType (Int4 field_type); +NLM_EXTERN Int4 GetDBLinkFieldTypeFromDBLinkName (CharPtr field_name); + + +/* other useful functions */ +NLM_EXTERN void GetGeneInfoForFeature (SeqFeatPtr sfp, GeneRefPtr PNTR p_grp, SeqFeatPtr PNTR p_gene); + /* generic string functions */ NLM_EXTERN Boolean SetStringValue (CharPtr PNTR existing_val, CharPtr new_val, Uint2 existing_text); NLM_EXTERN Boolean RemoveValNodeStringMatch (ValNodePtr PNTR list, StringConstraintPtr scp); @@ -207,19 +220,24 @@ NLM_EXTERN Boolean IsStringConstraintEmpty (StringConstraintPtr scp); NLM_EXTERN Boolean DoesSingleStringMatchConstraint (CharPtr str, StringConstraintPtr scp); NLM_EXTERN Boolean DoesStringMatchConstraint (CharPtr str, StringConstraintPtr scp); NLM_EXTERN Boolean RemoveStringConstraintPortionFromString (CharPtr PNTR str, StringConstraintPtr scp); +NLM_EXTERN Boolean ReplaceStringConstraintPortionInString (CharPtr PNTR str, CharPtr replace, StringConstraintPtr scp); NLM_EXTERN Boolean IsSourceConstraintEmpty (SourceConstraintPtr scp); NLM_EXTERN Boolean DoesBiosourceMatchConstraint (BioSourcePtr biop, SourceConstraintPtr scp); NLM_EXTERN Boolean IsSequenceConstraintEmpty (SequenceConstraintPtr constraint); +NLM_EXTERN Boolean DoesSequenceMatchSequenceConstraint (BioseqPtr bsp, SequenceConstraintPtr constraint); NLM_EXTERN Boolean IsPublicationConstraintEmpty (PublicationConstraintPtr constraint); NLM_EXTERN Boolean IsFieldConstraintEmpty (FieldConstraintPtr constraint); NLM_EXTERN Boolean IsCDSGeneProtQualConstraintEmpty (CDSGeneProtQualConstraintPtr constraint); NLM_EXTERN Boolean IsLocationConstraintEmpty (LocationConstraintPtr lcp); +NLM_EXTERN Boolean IsMolinfoFieldConstraintEmpty (MolinfoFieldConstraintPtr constraint); +NLM_EXTERN Boolean IsTranslationConstraintEmpty (TranslationConstraintPtr constraint); NLM_EXTERN Boolean DoesObjectMatchConstraintChoiceSet (Uint1 choice, Pointer data, ConstraintChoiceSetPtr csp); NLM_EXTERN Boolean DoesSeqIDListMeetStringConstraint (SeqIdPtr sip, StringConstraintPtr string_constraint); NLM_EXTERN ValNodePtr FreeObjectList (ValNodePtr vnp); NLM_EXTERN ValNodePtr GetObjectListForAECRAction (SeqEntryPtr sep, AECRActionPtr action); NLM_EXTERN ValNodePtr GetObjectListForAECRActionEx (SeqEntryPtr sep, AECRActionPtr action, BatchExtraPtr batch_extra); NLM_EXTERN ValNodePtr GetObjectListForFieldType (Uint1 field_type, SeqEntryPtr sep); +NLM_EXTERN ValNodePtr GetSequenceListForConstraint (SeqEntryPtr sep, ConstraintChoiceSetPtr csp); NLM_EXTERN Int4 DoApplyActionToObjectList (ApplyActionPtr action, ValNodePtr object_list, Boolean also_change_mrna, StringConstraintPtr scp); NLM_EXTERN Int4 DoApplyActionToObjectListEx (ApplyActionPtr action, ValNodePtr object_list, Boolean also_change_mrna, StringConstraintPtr scp, BatchExtraPtr batch_extra); NLM_EXTERN Int4 DoEditActionToObjectList (EditActionPtr action, ValNodePtr object_list, Boolean also_change_mrna); @@ -235,11 +253,15 @@ NLM_EXTERN Int4 DoParseActionToObjectListEx (AECRParseActionPtr action, ValNodeP NLM_EXTERN StringConstraintPtr FindStringConstraintInConstraintSetForField (FieldTypePtr field, ConstraintChoiceSetPtr csp); NLM_EXTERN StringConstraintPtr FindStringConstraintInConstraintSetForFieldPair (FieldPairTypePtr fieldpair, ConstraintChoiceSetPtr csp); NLM_EXTERN StringConstraintPtr StringConstraintFromFieldEdit (FieldEditPtr edit); +NLM_EXTERN ValNodePtr GetDuplicateFeaturesForRemoval (SeqEntryPtr sep, RemoveDuplicateFeatureActionPtr action); +NLM_EXTERN void RemoveDuplicateFeaturesInList (ValNodePtr delete_list, Uint2 entityID, Boolean remove_proteins); +NLM_EXTERN Boolean RemoveDuplicateFeaturesInSeqEntry (SeqEntryPtr sep, RemoveDuplicateFeatureActionPtr action, FILE *log_fp); NLM_EXTERN int LIBCALLBACK SortVnpByObject (VoidPtr ptr1, VoidPtr ptr2); NLM_EXTERN Boolean IsConversionSupported (Uint2 featdef_from, Uint2 featdef_to); +NLM_EXTERN void ApplyTextTransformsToString (CharPtr PNTR str, ValNodePtr transform_list); NLM_EXTERN CharPtr GetTextPortionFromString (CharPtr str, TextPortionPtr text_portion); NLM_EXTERN Boolean RemoveTextPortionFromString (CharPtr str, TextPortionPtr text_portion); NLM_EXTERN Boolean IsTextMarkerEmpty (TextMarkerPtr marker); @@ -282,8 +304,8 @@ NLM_EXTERN void AddAllDescriptorsToChoiceList (ValNodePtr PNTR descriptor_type_l NLM_EXTERN Boolean IsFixPubCapsActionEmpty (FixPubCapsActionPtr action); -NLM_EXTERN void ApplyMacroToSeqEntry (SeqEntryPtr sep, ValNodePtr macro, Int4Ptr pNumFields, Int4Ptr pNumFeat); -NLM_EXTERN Boolean ApplyMacroToSeqEntryEx (SeqEntryPtr sep, ValNodePtr macro, Int4Ptr pNumFields, Int4Ptr pNumFeat, FILE *log_fp); +NLM_EXTERN void ApplyMacroToSeqEntry (SeqEntryPtr sep, ValNodePtr macro); +NLM_EXTERN Boolean ApplyMacroToSeqEntryEx (SeqEntryPtr sep, ValNodePtr macro, FILE *log_fp); NLM_EXTERN SeqFeatPtr ApplyOneFeatureToBioseq (BioseqPtr bsp, Uint1 featdef, SeqLocPtr slp, ValNodePtr fields, ValNodePtr src_fields, Boolean add_mrna); @@ -295,8 +317,10 @@ NLM_EXTERN Boolean IsFieldTypeEmpty (FieldTypePtr field); NLM_EXTERN CharPtr SummarizeFieldType (ValNodePtr vnp); NLM_EXTERN Boolean IsFieldTypeNonText (ValNodePtr field_type); NLM_EXTERN CharPtr SummarizeExistingText (Uint2 existing_text); +NLM_EXTERN Boolean IsTextTransformEmpty (ValNodePtr vnp); extern const CharPtr kTaxnameAfterBinomialString; NLM_EXTERN CharPtr SummarizeTextPortion (TextPortionPtr text_portion); +NLM_EXTERN CharPtr SummarizeTextTransform (ValNodePtr transform); NLM_EXTERN CharPtr SummarizeParseSrc (ValNodePtr src); NLM_EXTERN CharPtr SummarizeParseDst (ValNodePtr dst); NLM_EXTERN CharPtr SummarizeAECRAction (AECRActionPtr a); @@ -304,6 +328,15 @@ NLM_EXTERN CharPtr SummarizeParseAction (ParseActionPtr p); NLM_EXTERN CharPtr SummarizeAutodefAction (AutodefActionPtr autodef); NLM_EXTERN CharPtr SummarizeRemoveDescriptorAction (RemoveDescriptorActionPtr a); NLM_EXTERN CharPtr SummarizeFixPubCapsAction (FixPubCapsActionPtr a); +NLM_EXTERN CharPtr SummarizeFixCapsAction (FixCapsActionPtr action); +NLM_EXTERN CharPtr SummarizeFixFormatAction (FixFormatActionPtr action); +NLM_EXTERN CharPtr SummarizeSortFieldsAction (SortFieldsActionPtr action); +NLM_EXTERN CharPtr SummarizeMolinfoBlockAction (MolinfoBlockPtr mib); +NLM_EXTERN CharPtr SummarizeRemoveDuplicateFeaturesAction (RemoveDuplicateFeatureActionPtr action); +NLM_EXTERN CharPtr SummarizeAuthorFixAction (AuthorFixActionPtr a); +NLM_EXTERN CharPtr SummarizeWordSubstitution (WordSubstitutionPtr word); +NLM_EXTERN CharPtr SummarizeFeatureStrandedness (Uint2 strandedness); +NLM_EXTERN CharPtr SummarizeStringConstraint (StringConstraintPtr constraint); NLM_EXTERN CharPtr SummarizeConstraintSet (ValNodePtr constraint_set); NLM_EXTERN CharPtr SummarizeConstraint (ValNodePtr constraint); @@ -321,8 +354,10 @@ NLM_EXTERN Boolean SetFieldValueForObjectEx (Uint1 choice, Pointer data, FieldTy NLM_EXTERN BioseqPtr GetSequenceForObject (Uint1 choice, Pointer data); NLM_EXTERN ValNodePtr GetMultipleFieldValuesForObject (Uint1 choice, Pointer data, FieldTypePtr field, StringConstraintPtr scp, BatchExtraPtr batch_extra); +NLM_EXTERN void InitValNodeBlock (ValNodeBlockPtr vnbp, ValNodePtr list); NLM_EXTERN void ValNodeAddPointerToEnd (ValNodeBlockPtr vnbp, Uint1 choice, Pointer data); NLM_EXTERN void ValNodeAddPointerToFront (ValNodeBlockPtr vnbp, Uint1 choice, Pointer data); +NLM_EXTERN void ValNodeLinkToEnd (ValNodeBlockPtr vnbp, ValNodePtr list); typedef enum { @@ -351,12 +386,14 @@ typedef struct tabcolumnconfig { ValNodePtr constraint; } TabColumnConfigData, PNTR TabColumnConfigPtr; + NLM_EXTERN MatchTypePtr MatchTypeNew (); NLM_EXTERN MatchTypePtr MatchTypeFree (MatchTypePtr match_type); NLM_EXTERN TabColumnConfigPtr TabColumnConfigNew (void); NLM_EXTERN TabColumnConfigPtr TabColumnConfigFree (TabColumnConfigPtr t); NLM_EXTERN TabColumnConfigPtr TabColumnConfigCopy (TabColumnConfigPtr orig); +NLM_EXTERN void TabColumnConfigReset (TabColumnConfigPtr t); NLM_EXTERN ValNodePtr TabColumnConfigListFree (ValNodePtr columns); NLM_EXTERN ValNodePtr TabColumnConfigListCopy (ValNodePtr orig); NLM_EXTERN ValNodePtr ValidateTabTableValues (ValNodePtr table, ValNodePtr columns); @@ -393,6 +430,69 @@ NLM_EXTERN Boolean GBBlockIsCompletelyEmpty (GBBlockPtr gb); NLM_EXTERN CharPtr GetObjectIdString (ObjectIdPtr oip); NLM_EXTERN Boolean SetObjectIdString (ObjectIdPtr oip, CharPtr value, Uint2 existing_text); +NLM_EXTERN void SplitPCRPrimersByPosition (SeqEntryPtr sep); +NLM_EXTERN void SplitPCRPrimersByConstraints (SeqEntryPtr sep, StringConstraintPtr scp_fwd, StringConstraintPtr scp_rev); +NLM_EXTERN void MergePCRPrimers (SeqEntryPtr sep); + +NLM_EXTERN SubSourcePtr FindBadLatLon (BioSourcePtr biop); +NLM_EXTERN Boolean LatLonAutocorrectList (FILE *fp, ValNodePtr object_list); +NLM_EXTERN void FixiPCRPrimerSeqsCallback (BioSourcePtr biop, Pointer data); + +NLM_EXTERN Boolean HasTaxonomyID (BioSourcePtr biop); + +NLM_EXTERN ProtRefPtr GetProtRefForFeature (SeqFeatPtr sfp); + +NLM_EXTERN Boolean StripSuffixFromAuthor (AuthorPtr pAuthor); +NLM_EXTERN Boolean TruncateAuthorMiddleInitials (AuthorPtr pAuthor); + + +/* for product name rules */ +NLM_EXTERN Int4 CountSuspectRuleSet (SuspectRuleSetPtr set); + +NLM_EXTERN Boolean IsSearchFuncEmpty (SearchFuncPtr func); +NLM_EXTERN Boolean IsSuspectRuleEmpty (SuspectRulePtr rule); + +NLM_EXTERN CharPtr SummarizeSearchFunc (SearchFuncPtr func); +NLM_EXTERN CharPtr SummarizeReplaceFunc (ReplaceFuncPtr replace); +NLM_EXTERN CharPtr SummarizeFixType (Uint2 fix_type); +NLM_EXTERN CharPtr SummarizeReplaceRule (ReplaceRulePtr replace); +NLM_EXTERN CharPtr SummarizeSuspectRule (SuspectRulePtr rule); + + +NLM_EXTERN Boolean StringMayContainPlural (CharPtr search); +NLM_EXTERN Boolean ContainsNorMoreSetsOfBracketsOrParentheses (CharPtr search, Int4 n); +NLM_EXTERN Boolean ContainsThreeOrMoreNumbersTogether (CharPtr search); +NLM_EXTERN Boolean IsPrefixPlusNumbers (CharPtr prefix, CharPtr search); +NLM_EXTERN Boolean StringContainsUnbalancedParentheses (CharPtr search); +NLM_EXTERN Boolean StringContainsUnderscore (CharPtr search); +NLM_EXTERN Boolean ProductContainsTerm (CharPtr pattern, CharPtr search); + +NLM_EXTERN Boolean DoesStringMatchSuspectRule (CharPtr str, SeqFeatPtr sfp, SuspectRulePtr rule); +NLM_EXTERN ValNodePtr GetFeaturesForSuspectRules (SeqEntryPtr sep, SuspectRuleSetPtr rules, Uint2 featdef); +NLM_EXTERN ValNodePtr FreeListOfObjectLists (ValNodePtr list); +NLM_EXTERN void PrintSuspectRuleMatches (SeqEntryPtr sep, SuspectRuleSetPtr rules, FILE *fp); +NLM_EXTERN ValNodePtr GetSuspectRuleDiscrepancies (SeqEntryPtr sep, SuspectRuleSetPtr rules, Uint2 featdef, Uint4 clickable_item_type); +NLM_EXTERN Int4 ApplySuspectRuleFixesToSeqEntry (SeqEntryPtr sep, SuspectRuleSetPtr rules, FILE *fp); + +NLM_EXTERN Boolean ApplySuspectProductNameFixToString (SuspectRulePtr rule, CharPtr PNTR str); +NLM_EXTERN Boolean ApplySuspectProductNameFixToFeature (SuspectRulePtr rule, SeqFeatPtr cds, FILE *fp); +NLM_EXTERN void SortSuspectRuleSetByFind (SuspectRuleSetPtr PNTR rules); +NLM_EXTERN void SortSuspectRuleSetByFixTypeThenFind (SuspectRuleSetPtr PNTR rules); + +NLM_EXTERN void FindDiffsBetweenRuleSets (SuspectRuleSetPtr set1, SuspectRuleSetPtr set2, SuspectRuleSetPtr PNTR in1not2, SuspectRuleSetPtr PNTR in2not1); +NLM_EXTERN Boolean FindSuspectProductNamesInEntrezGene (FILE *input_file, SuspectRuleSetPtr rule_list, FILE *output_file); +NLM_EXTERN void FindSuspectProductNamesInNameList (FILE *input_file, SuspectRuleSetPtr rule_list, FILE *output_file); + + +/* declarations for product update table handling */ +NLM_EXTERN ValNodePtr ReadProductUpdateTable (FILE *fp); +NLM_EXTERN ValNodePtr ProductUpdateTableFree (ValNodePtr list); +NLM_EXTERN Boolean ApplyProductUpdateTable (ValNodePtr table, SeqEntryPtr sep, FILE *log_fp); +NLM_EXTERN void ExportProductUpdateTable (SeqEntryPtr sep, FILE *fp); +NLM_EXTERN void ExportProductUpdateTableWithPrecomputedSuggestions (FILE *fp, SeqEntryPtr sep, SuspectRuleSetPtr rules); + +NLM_EXTERN ValNodePtr GetBioseqMatchesForSequenceIDs (ValNodePtr query_list, Uint1 match_location, SeqEntryPtr sep); + #ifdef __cplusplus } #endif diff --git a/api/objmgr.c b/api/objmgr.c index cfa33f98..1be2a72b 100644 --- a/api/objmgr.c +++ b/api/objmgr.c @@ -29,7 +29,7 @@ * * Version Creation Date: 9/94 * -* $Revision: 6.86 $ +* $Revision: 6.87 $ * * File Description: Manager for Bioseqs and BioseqSets * @@ -2405,6 +2405,9 @@ static Int4 NEAR ObjMgrLockFunc (ObjMgrPtr omp, Uint2 type, Pointer data, Boolea } omdp = ObjMgrFindTop(omp, omp->datalist[i]); + if (omdp == NULL) { + return lockcnt; + } if (lockit) { omdp->lockcnt++; diff --git a/api/seqmgr.c b/api/seqmgr.c index 55bb0d8b..3692fa8f 100644 --- a/api/seqmgr.c +++ b/api/seqmgr.c @@ -29,7 +29,7 @@ * * Version Creation Date: 9/94 * -* $Revision: 6.308 $ +* $Revision: 6.314 $ * * File Description: Manager for Bioseqs and BioseqSets * @@ -274,6 +274,7 @@ NLM_EXTERN Boolean MakeReversedSeqIdString (SeqIdPtr sid, CharPtr buf, size_t le case SEQID_TPE: case SEQID_TPD: case SEQID_GPIPE: + case SEQID_NAMED_ANNOT_TRACK: tsip = (TextSeqIdPtr) (sid->data.ptrvalue); if (tsip->accession != NULL) { tmp = tsip->name; @@ -2891,8 +2892,7 @@ NLM_EXTERN Boolean LIBCALL SeqMgrAdd (Uint2 type, Pointer data) SeqMgrUnlock(); return retval; } - - SeqMgrAddToBioseqIndex((BioseqPtr)data); + retval &= SeqMgrAddToBioseqIndex((BioseqPtr)data); SeqMgrUnlock(); @@ -3320,6 +3320,11 @@ NLM_EXTERN Boolean LIBCALL SeqMgrAddToBioseqIndex (BioseqPtr bsp) { bspp = smp->NonIndexedBioseq; smp->NonIndexedBioseq = MemNew((smp->NonIndexedBioseqNum + 10) * sizeof (BioseqPtr)); + if (smp->NonIndexedBioseq == NULL) { + Message (MSG_POSTERR, "Unable to allocate memory for bioseq index"); + smp->NonIndexedBioseq = bspp; + return FALSE; + } MemCopy(smp->NonIndexedBioseq, bspp, (smp->NonIndexedBioseqNum * sizeof(BioseqPtr))); MemFree(bspp); smp->NonIndexedBioseqNum += 10; @@ -6373,6 +6378,16 @@ static int LIBCALLBACK SortFeatItemListByLabel (VoidPtr vp1, VoidPtr vp2) return -1; } + /* If they're case-insensitive the same, but case-sensitive different, + then fall back to sort by case-sensitive + (e.g. AJ344068.1 has genes korA and KorA ) */ + compare = StringCmp (sp1->label, sp2->label); + if( compare > 0 ) { + return 1; + } else if( compare < 0 ) { + return -1; + } + /* for duplicated genes, etc., that cross origin, put ignored item last for binary search */ if (sp1->ignore) { @@ -8341,6 +8356,33 @@ NLM_EXTERN GeneRefPtr LIBCALL SeqMgrGetGeneXref (SeqFeatPtr sfp) return grp; } +NLM_EXTERN GeneRefPtr LIBCALL SeqMgrGetGeneXrefEx (SeqFeatPtr sfp, ObjectIdPtr PNTR oipP) + +{ + GeneRefPtr grp = NULL; + ObjectIdPtr oip; + SeqFeatXrefPtr xref; + + if (oipP != NULL) { + *oipP = NULL; + } + if (sfp == NULL) return NULL; + xref = sfp->xref; + while (xref != NULL && xref->data.choice != SEQFEAT_GENE) { + xref = xref->next; + } + if (xref != NULL) { + grp = (GeneRefPtr) xref->data.value.ptrvalue; + if (xref->id.choice == 3) { + oip = (ObjectIdPtr) xref->id.value.ptrvalue; + if (oip != NULL && oipP != NULL) { + *oipP = oip; + } + } + } + return grp; +} + NLM_EXTERN Boolean LIBCALL SeqMgrGeneIsSuppressed (GeneRefPtr grp) { @@ -8426,16 +8468,13 @@ static Int4 TestForOverlap (SMFeatItemPtr feat, SeqLocPtr slp, } else if (overlapType == LOCATION_SUBSET || overlapType == CHECK_INTERVALS) { /* requires individual intervals to be completely contained within gene, etc. */ - - if (feat->left <= left && feat->right >= right) { - sfp = feat->sfp; - if (sfp != NULL) { - diff = SeqLocAinB (slp, sfp->location); - if (diff >= 0) { - if (overlapType == LOCATION_SUBSET || numivals == 1 || - CheckInternalExonBoundaries (numivals, ivals, feat->numivals, feat->ivals)) { - return diff; - } + sfp = feat->sfp; + if (sfp != NULL) { + diff = SeqLocAinB (slp, sfp->location); + if (diff >= 0) { + if (overlapType == LOCATION_SUBSET || numivals == 1 || + CheckInternalExonBoundaries (numivals, ivals, feat->numivals, feat->ivals)) { + return diff; } } } @@ -9258,6 +9297,17 @@ NLM_EXTERN SeqFeatPtr LIBCALL SeqMgrGetOverlappingFeature (SeqLocPtr slp, Uint2 numfeats, position, overlapType, context, NULL, NULL, NULL, FALSE); } +NLM_EXTERN SeqFeatPtr LIBCALL SeqMgrGetOverlappingFeatureEx (SeqLocPtr slp, Uint2 subtype, + VoidPtr featarray, Int4 numfeats, + Int4Ptr position, Int2 overlapType, + SeqMgrFeatContext PNTR context, + Boolean special) + +{ + return SeqMgrGetBestOverlappingFeat (slp, subtype, (SMFeatItemPtr PNTR) featarray, + numfeats, position, overlapType, context, NULL, NULL, NULL, special); +} + NLM_EXTERN Int2 LIBCALL SeqMgrGetAllOverlappingFeatures (SeqLocPtr slp, Uint2 subtype, VoidPtr featarray, Int4 numfeats, diff --git a/api/seqport.c b/api/seqport.c index 6330111e..e9d30b1f 100644 --- a/api/seqport.c +++ b/api/seqport.c @@ -29,7 +29,7 @@ * * Version Creation Date: 7/13/91 * -* $Revision: 6.184 $ +* $Revision: 6.186 $ * * File Description: Ports onto Bioseqs * @@ -2386,7 +2386,7 @@ static Int4 SeqPortStreamRaw ( return count; } -static Int4 SeqPortStreamLit ( +static Int4 SeqPortStreamSeqLit ( SeqLitPtr slitp, Boolean is_na, Int4 start, @@ -2775,7 +2775,7 @@ static Int4 SeqPortStreamDelta ( } else if (sop->slitp != NULL) { - count += SeqPortStreamLit (sop->slitp, is_na, sop->from, sop->to, sop->strand, sdp); + count += SeqPortStreamSeqLit (sop->slitp, is_na, sop->from, sop->to, sop->strand, sdp); } } @@ -3006,6 +3006,7 @@ static Int4 SeqPortStreamSetup ( Int4 stop, Uint1 strand, SeqLocPtr loc, + SeqLitPtr lit, StreamFlgType flags, Pointer userdata, SeqPortStreamProc proc @@ -3017,10 +3018,11 @@ static Int4 SeqPortStreamSetup ( Int4 count = 0, from, to; Uint2 entityID; Int2 i; + Boolean is_na; StreamData sd; SeqLocPtr slp; - if (bsp == NULL && loc == NULL) return 0; + if (bsp == NULL && loc == NULL && lit == NULL) return 0; if (proc == NULL && userdata == NULL) return 0; MemSet ((Pointer) &sd, 0, sizeof (StreamData)); @@ -3087,6 +3089,24 @@ static Int4 SeqPortStreamSetup ( slp = SeqLocFindNext (loc, slp); } + + } else if (lit != NULL) { + + is_na = TRUE; + switch (lit->seq_data_type) { + case Seq_code_iupacaa : + case Seq_code_ncbi8aa : + case Seq_code_ncbieaa : + case Seq_code_ncbipaa : + case Seq_code_iupacaa3 : + case Seq_code_ncbistdaa : + is_na = FALSE; + break; + default : + break; + } + + count += SeqPortStreamSeqLit (lit, is_na, 0, lit->length - 1, Seq_strand_plus, &sd); } /* return number of bases or residues streamed to callback */ @@ -3109,7 +3129,7 @@ NLM_EXTERN Int4 SeqPortStream ( ) { - return SeqPortStreamSetup (bsp, 0, -1, Seq_strand_unknown, NULL, flags, userdata, proc); + return SeqPortStreamSetup (bsp, 0, -1, Seq_strand_unknown, NULL, NULL, flags, userdata, proc); } NLM_EXTERN Int4 SeqPortStreamInt ( @@ -3123,7 +3143,7 @@ NLM_EXTERN Int4 SeqPortStreamInt ( ) { - return SeqPortStreamSetup (bsp, start, stop, strand, NULL, flags, userdata, proc); + return SeqPortStreamSetup (bsp, start, stop, strand, NULL, NULL, flags, userdata, proc); } NLM_EXTERN Int4 SeqPortStreamLoc ( @@ -3134,7 +3154,18 @@ NLM_EXTERN Int4 SeqPortStreamLoc ( ) { - return SeqPortStreamSetup (NULL, 0, 0, 0, slp, flags, userdata, proc); + return SeqPortStreamSetup (NULL, 0, 0, 0, slp, NULL, flags, userdata, proc); +} + +NLM_EXTERN Int4 SeqPortStreamLit ( + SeqLitPtr lit, + StreamFlgType flags, + Pointer userdata, + SeqPortStreamProc proc +) + +{ + return SeqPortStreamSetup (NULL, 0, 0, 0, NULL, lit, flags, userdata, proc); } /******************************************************************************* @@ -8686,6 +8717,7 @@ NLM_EXTERN void ConvertNsToGaps ( if (bases == NULL) return; if (!NeedToConvert(bases, unknown_greater_than_or_equal, known_greater_than_or_equal, unknown_gap_size, known_gap_size)) { + MemFree (bases); return; } diff --git a/api/seqport.h b/api/seqport.h index 820f844d..22265681 100644 --- a/api/seqport.h +++ b/api/seqport.h @@ -29,7 +29,7 @@ * * Version Creation Date: 7/13/91 * -* $Revision: 6.62 $ +* $Revision: 6.64 $ * * File Description: Ports onto Bioseqs * @@ -172,6 +172,7 @@ NLM_EXTERN Boolean LIBCALL SeqPortSetUpAlphabet PROTO((SeqPortPtr spp, Uint1 cur * SeqPortStream (bsp, flags, userdata, proc) * SeqPortStreamInt (bsp, start, stop, strand, flags, userdata, proc) * SeqPortStreamLoc (slp, flags, userdata, proc) +* SeqPortStreamLit (lit, flags, userdata, proc) * Efficient functions to stream through sequence * ********************************************************************************/ @@ -198,6 +199,8 @@ typedef unsigned long StreamFlgType; #define STREAM_HTML_SPANS 256 /* show span tags at begining of each line */ +#define STREAM_ALL_FASTA_IDS 512 /* in FASTA streamer, show all Seq-ids */ + NLM_EXTERN Int4 SeqPortStream ( BioseqPtr bsp, StreamFlgType flags, @@ -222,6 +225,13 @@ NLM_EXTERN Int4 SeqPortStreamLoc ( SeqPortStreamProc proc ); +NLM_EXTERN Int4 SeqPortStreamLit ( + SeqLitPtr lit, + StreamFlgType flags, + Pointer userdata, + SeqPortStreamProc proc +); + /******************************************************************************* * * StreamCacheSetup (bsp, slp, flags, scp) diff --git a/api/sequtil.c b/api/sequtil.c index 1b398944..8cd935d7 100644 --- a/api/sequtil.c +++ b/api/sequtil.c @@ -29,7 +29,7 @@ * * Version Creation Date: 4/1/91 * -* $Revision: 6.304 $ +* $Revision: 6.328 $ * * File Description: Sequence Utilities for objseq and objsset * @@ -7677,6 +7677,11 @@ Boolean GetThePointForOffsetEx(SeqLocPtr of, SeqPntPtr target, Uint1 which_end, while ((pnt = SeqLocFindNext(of, pnt)) != NULL) { + if( pnt->choice == SEQLOC_NULL ) + { + /* Skip NULL parts when determining offsets */ + continue; + } last_strand = SeqLocStrand (pnt); last_sip = SeqLocId (pnt); if (last_strand != Seq_strand_minus) @@ -7839,6 +7844,11 @@ Boolean GetPointsForLeftAndRightOffsets(SeqLocPtr of, SeqPntPtr left, SeqPntPtr while ((pnt = SeqLocFindNext(of, pnt)) != NULL) { + if( pnt->choice == SEQLOC_NULL ) + { + /* Skip NULL parts when determining offsets */ + continue; + } last_strand = SeqLocStrand (pnt); last_sip = SeqLocId (pnt); if (last_strand != Seq_strand_minus) @@ -9795,7 +9805,7 @@ NLM_EXTERN SeqIdPtr LIBCALL SeqIdFromAccessionEx(CharPtr accession, Uint4 versi BioseqPtr bsp=NULL; TextSeqIdPtr tsp; Uint4 status; - if(accession==NULL || accession[0]=='\0') + if(accession==NULL || accession[0]=='\0' || accession[0]=='\n' || accession[0]=='\r') return NULL; sip=NULL; status = WHICH_db_accession(accession); @@ -10449,6 +10459,8 @@ NLM_EXTERN Uint4 LIBCALL WHICH_db_accession (CharPtr s) retcode = ACCN_DDBJ_WGS_PROT; } else if ((StringICmp(temp,"HAA") >= 0) && (StringICmp(temp,"HZZ") <= 0)) { retcode = ACCN_NCBI_TPA_PROT; + } else if ((StringICmp(temp,"IAA") >= 0) && (StringICmp(temp,"IZZ") <= 0)) { + retcode = ACCN_DDBJ_TPA_PROT; } else { retcode = ACCN_IS_PROTEIN; retval = TRUE; @@ -10510,7 +10522,10 @@ NLM_EXTERN Uint4 LIBCALL WHICH_db_accession (CharPtr s) (StringICmp(temp,"GR") == 0) || (StringICmp(temp,"GT") == 0) || (StringICmp(temp,"GW") == 0) || - (StringICmp(temp,"HO") == 0) ) { /* NCBI EST */ + (StringICmp(temp,"HO") == 0) || + (StringICmp(temp,"HS") == 0) || + (StringICmp(temp,"JG") == 0) || + (StringICmp(temp,"JK") == 0) ) { /* NCBI EST */ retcode = ACCN_NCBI_EST; } else if ((StringICmp(temp,"BV") == 0) || (StringICmp(temp,"GF") == 0)) { /* NCBI STS */ @@ -10527,7 +10542,7 @@ NLM_EXTERN Uint4 LIBCALL WHICH_db_accession (CharPtr s) (StringICmp(temp,"GQ") == 0) || (StringICmp(temp,"GU") == 0) || (StringICmp(temp,"HM") == 0) || - (StringICmp(temp,"HQ") == 0)) { /* NCBI direct submission */ + (StringICmp(temp,"JF") == 0)) { /* NCBI direct submission */ retcode = ACCN_NCBI_DIRSUB; } else if ((StringICmp(temp,"AE") == 0) || (StringICmp(temp,"CP") == 0) || @@ -10544,7 +10559,8 @@ NLM_EXTERN Uint4 LIBCALL WHICH_db_accession (CharPtr s) (StringICmp(temp,"EQ") == 0) || (StringICmp(temp,"FA") == 0) || (StringICmp(temp,"GG") == 0) || - (StringICmp(temp,"GL") == 0)) { /* NCBI segmented set header Bioseq */ + (StringICmp(temp,"GL") == 0) || + (StringICmp(temp,"JH") == 0)) { /* NCBI segmented set header Bioseq */ retcode = ACCN_NCBI_SEGSET; } else if ((StringICmp(temp,"AS") == 0) || (StringICmp(temp,"HR") == 0) || @@ -10573,7 +10589,10 @@ NLM_EXTERN Uint4 LIBCALL WHICH_db_accession (CharPtr s) (StringICmp(temp,"FH") == 0) || (StringICmp(temp,"FI") == 0) || (StringICmp(temp,"GS") == 0) || - (StringICmp(temp,"HN") == 0) ) { /* NCBI GSS */ + (StringICmp(temp,"HN") == 0) || + (StringICmp(temp,"HR") == 0) || + (StringICmp(temp,"JJ") == 0) || + (StringICmp(temp,"JM") == 0) ) { /* NCBI GSS */ retcode = ACCN_NCBI_GSS; } else if ((StringICmp(temp,"AR") == 0) || (StringICmp(temp,"DZ") == 0) || @@ -10599,10 +10618,17 @@ NLM_EXTERN Uint4 LIBCALL WHICH_db_accession (CharPtr s) retcode = ACCN_NCBI_TPA; } else if ((StringICmp(temp,"BN") == 0)) { /* EMBL third-party annotation */ retcode = ACCN_EMBL_TPA; - } else if ((StringICmp(temp,"BR") == 0)) { /* DDBJ third-party annotation */ + } else if ((StringICmp(temp,"BR") == 0) || + (StringICmp(temp,"HT") == 0) || + (StringICmp(temp,"HU") == 0)) { /* DDBJ third-party annotation */ retcode = ACCN_DDBJ_TPA; } else if((StringICmp(temp,"EZ") == 0) || - (StringICmp(temp,"HP") == 0)) { + (StringICmp(temp,"HP") == 0) || + (StringICmp(temp,"HQ") == 0) || + (StringICmp(temp,"JI") == 0) || + (StringICmp(temp,"JL") == 0) || + (StringICmp(temp,"JO") == 0) || + (StringICmp(temp,"JN") == 0)) { retcode = ACCN_NCBI_TSA; } else if((StringICmp(temp,"FX") == 0)) { retcode = ACCN_DDBJ_TSA; @@ -10612,7 +10638,6 @@ NLM_EXTERN Uint4 LIBCALL WHICH_db_accession (CharPtr s) (StringICmp(temp,"HE") == 0) || (StringICmp(temp,"HF") == 0) || (StringICmp(temp,"HG") == 0) || - (StringICmp(temp,"HH") == 0) || (StringICmp(temp,"HI") == 0)) { /* EMBL direct submission */ retcode = ACCN_EMBL_DIRSUB; } else if ((StringICmp(temp,"AL") == 0) || @@ -10632,7 +10657,13 @@ NLM_EXTERN Uint4 LIBCALL WHICH_db_accession (CharPtr s) (StringICmp(temp,"HA") == 0) || (StringICmp(temp,"HB") == 0) || (StringICmp(temp,"HC") == 0) || - (StringICmp(temp,"HD") == 0)) { /* EMBL patent division */ + (StringICmp(temp,"HD") == 0) || + (StringICmp(temp,"HH") == 0) || + (StringICmp(temp,"JA") == 0) || + (StringICmp(temp,"JB") == 0) || + (StringICmp(temp,"JC") == 0) || + (StringICmp(temp,"JD") == 0) || + (StringICmp(temp,"JE") == 0)) { /* EMBL patent division */ retcode = ACCN_EMBL_PATENT; } else if ((StringICmp(temp,"AT") == 0) || (StringICmp(temp,"AU") == 0) || @@ -10672,15 +10703,16 @@ NLM_EXTERN Uint4 LIBCALL WHICH_db_accession (CharPtr s) (StringICmp(temp,"FU") == 0) || (StringICmp(temp,"FV") == 0) || (StringICmp(temp,"FW") == 0) || - (StringICmp(temp,"FZ") == 0)) { /* DDBJ patent division */ + (StringICmp(temp,"FZ") == 0) || + (StringICmp(temp,"GB") == 0) || + (StringICmp(temp,"HV") == 0) || + (StringICmp(temp,"HW") == 0)) { /* DDBJ patent division */ retcode = ACCN_DDBJ_PATENT; } else if ((StringICmp(temp,"DE") == 0) || (StringICmp(temp,"DH") == 0) || - (StringICmp(temp,"FT") == 0)) { /* DDBJ GSS */ + (StringICmp(temp,"FT") == 0) || + (StringICmp(temp,"GA") == 0)) { /* DDBJ GSS */ retcode = ACCN_DDBJ_GSS; - } else if ((StringICmp(temp,"GA") == 0) || - (StringICmp(temp,"GB") == 0)) { /* DDBJ unassigned */ - retcode = ACCN_DDBJ_OTHER; } else { retcode = ACCN_IS_NT; break; @@ -10778,6 +10810,8 @@ NLM_EXTERN Uint4 LIBCALL WHICH_db_accession (CharPtr s) retcode = ACCN_EMBL_WGS; } else if ((StringNICmp(temp,"D", 1) == 0)) { retcode = ACCN_NCBI_WGS; + } else if ((StringNICmp(temp,"E", 1) == 0)) { + retcode = ACCN_DDBJ_WGS; } else retval = FALSE; while (*s) { diff --git a/api/sqnutil1.c b/api/sqnutil1.c index b67b5d46..73c9cfe7 100644 --- a/api/sqnutil1.c +++ b/api/sqnutil1.c @@ -29,7 +29,7 @@ * * Version Creation Date: 9/2/97 * -* $Revision: 6.587 $ +* $Revision: 6.648 $ * * File Description: * @@ -87,6 +87,7 @@ static int descr_insert_order [] = { Seq_descr_pdb, Seq_descr_embl, Seq_descr_genbank, + Seq_descr_modelev, Seq_descr_create_date, Seq_descr_update_date, 0 @@ -942,7 +943,8 @@ NLM_EXTERN void AddSeqEntryToSeqEntry (SeqEntryPtr target, SeqEntryPtr insert, B } } else if ((targetbssp->_class >= BioseqseqSet_class_mut_set && targetbssp->_class <= BioseqseqSet_class_eco_set) || - targetbssp->_class >= BioseqseqSet_class_wgs_set) { + targetbssp->_class == BioseqseqSet_class_wgs_set || + targetbssp->_class == BioseqseqSet_class_small_genome_set) { if (targetbssp->seq_set != NULL) { tmp = targetbssp->seq_set; @@ -1303,7 +1305,8 @@ NLM_EXTERN void RenormalizeNucProtSets (SeqEntryPtr sep, Boolean relink) if (bssp != NULL && (bssp->_class == 7 || (bssp->_class >= 13 && bssp->_class <= 16) || bssp->_class == BioseqseqSet_class_wgs_set || - bssp->_class == BioseqseqSet_class_gen_prod_set)) { + bssp->_class == BioseqseqSet_class_gen_prod_set || + bssp->_class == BioseqseqSet_class_small_genome_set)) { for (sep = bssp->seq_set; sep != NULL; sep = sep->next) { RenormalizeNucProtSets (sep, relink); } @@ -1472,6 +1475,7 @@ static Boolean ReturnStackToItem (GatherContextPtr gcp) bssp->_class != BioseqseqSet_class_phy_set && bssp->_class != BioseqseqSet_class_eco_set && bssp->_class != BioseqseqSet_class_wgs_set && + bssp->_class != BioseqseqSet_class_small_genome_set && (bssp->_class != BioseqseqSet_class_gen_prod_set || (! tdp->skipGenProdSet))) { return FALSE; @@ -1600,12 +1604,13 @@ NLM_EXTERN SeqEntryPtr LIBCALL GetTopSeqEntryForEntityID (Uint2 entityID) return NULL; } -NLM_EXTERN Boolean CheckSeqLocForPartial (SeqLocPtr location, BoolPtr p5ptr, BoolPtr p3ptr) +NLM_EXTERN Boolean CheckSeqLocForPartialEx (SeqLocPtr location, BoolPtr p5ptr, BoolPtr p3ptr, Int4Ptr limptr) { SeqLocPtr firstSlp; IntFuzzPtr ifp; SeqLocPtr lastSlp; + Int4 lim; Boolean partial5; Boolean partial3; SeqIntPtr sip; @@ -1614,6 +1619,7 @@ NLM_EXTERN Boolean CheckSeqLocForPartial (SeqLocPtr location, BoolPtr p5ptr, Boo partial5 = FALSE; partial3 = FALSE; + lim = -1; if (location != NULL) { firstSlp = NULL; lastSlp = NULL; @@ -1652,6 +1658,10 @@ NLM_EXTERN Boolean CheckSeqLocForPartial (SeqLocPtr location, BoolPtr p5ptr, Boo partial5 = TRUE; } } + ifp = spp->fuzz; + if (ifp != NULL && ifp->choice == 4) { + lim = ifp->a; + } } } if (lastSlp != NULL) { @@ -1681,6 +1691,10 @@ NLM_EXTERN Boolean CheckSeqLocForPartial (SeqLocPtr location, BoolPtr p5ptr, Boo partial3 = TRUE; } } + ifp = spp->fuzz; + if (ifp != NULL && ifp->choice == 4) { + lim = ifp->a; + } } } } @@ -1690,7 +1704,16 @@ NLM_EXTERN Boolean CheckSeqLocForPartial (SeqLocPtr location, BoolPtr p5ptr, Boo if (p3ptr != NULL) { *p3ptr = partial3; } - return (Boolean) (partial5 || partial3); + if (limptr != NULL) { + *limptr = lim; + } + return (Boolean) (partial5 || partial3 || lim == 3 || lim == 4); +} + +NLM_EXTERN Boolean CheckSeqLocForPartial (SeqLocPtr location, BoolPtr p5ptr, BoolPtr p3ptr) + +{ + return CheckSeqLocForPartialEx (location, p5ptr, p3ptr, NULL); } static void ConvertWholeToIntLoc (SeqLocPtr slp) @@ -1721,7 +1744,7 @@ static void ConvertWholeToIntLoc (SeqLocPtr slp) } } -NLM_EXTERN void SetSeqLocPartial (SeqLocPtr location, Boolean partial5, Boolean partial3) +NLM_EXTERN void SetSeqLocPartialEx (SeqLocPtr location, Boolean partial5, Boolean partial3, Int4 lim) { SeqLocPtr firstSlp; @@ -1788,6 +1811,14 @@ NLM_EXTERN void SetSeqLocPartial (SeqLocPtr location, Boolean partial5, Boolean ifp->a = 2; } } + } else if (lim == 3 || lim == 4) { + ifp = IntFuzzNew (); + if (ifp != NULL) { + ifp->choice = 4; + spp->fuzz = IntFuzzFree (spp->fuzz); + spp->fuzz = ifp; + ifp->a = lim; + } } else { if (spp->strand == Seq_strand_minus || spp->strand == Seq_strand_both_rev) { spp->fuzz = IntFuzzFree (spp->fuzz); @@ -1837,6 +1868,14 @@ NLM_EXTERN void SetSeqLocPartial (SeqLocPtr location, Boolean partial5, Boolean ifp->a = 1; } } + } else if (lim == 3 || lim == 4) { + ifp = IntFuzzNew (); + if (ifp != NULL) { + ifp->choice = 4; + spp->fuzz = IntFuzzFree (spp->fuzz); + spp->fuzz = ifp; + ifp->a = lim; + } } else { if (spp->strand == Seq_strand_minus || spp->strand == Seq_strand_both_rev) { spp->fuzz = IntFuzzFree (spp->fuzz); @@ -1849,10 +1888,17 @@ NLM_EXTERN void SetSeqLocPartial (SeqLocPtr location, Boolean partial5, Boolean } } +NLM_EXTERN void SetSeqLocPartial (SeqLocPtr location, Boolean partial5, Boolean partial3) + +{ + SetSeqLocPartialEx (location, partial5, partial3, -1); +} + NLM_EXTERN ValNodePtr GetSeqLocPartialSet (SeqLocPtr location) { ValNodePtr head = NULL, last = NULL, vnp; + Int4 lim; Boolean noLeft; Boolean noRight; SeqLocPtr slp; @@ -1862,7 +1908,7 @@ NLM_EXTERN ValNodePtr GetSeqLocPartialSet (SeqLocPtr location) slp = SeqLocFindNext (location, NULL); while (slp != NULL) { - CheckSeqLocForPartial (slp, &noLeft, &noRight); + CheckSeqLocForPartialEx (slp, &noLeft, &noRight, &lim); val = 0; if (noLeft) { val |= 2; @@ -1870,6 +1916,11 @@ NLM_EXTERN ValNodePtr GetSeqLocPartialSet (SeqLocPtr location) if (noRight) { val |= 1; } + if (lim == 3) { + val |= 4; + } else if (lim == 4) { + val |= 8; + } vnp = ValNodeAddInt (&last, 0, val); if (head == NULL) { head = vnp; @@ -1884,6 +1935,7 @@ NLM_EXTERN ValNodePtr GetSeqLocPartialSet (SeqLocPtr location) NLM_EXTERN void SetSeqLocPartialSet (SeqLocPtr location, ValNodePtr vnp) { + Int4 lim; Boolean noLeft; Boolean noRight; SeqLocPtr slp; @@ -1896,7 +1948,13 @@ NLM_EXTERN void SetSeqLocPartialSet (SeqLocPtr location, ValNodePtr vnp) val = (Int4) vnp->data.intvalue; noLeft = (Boolean) ((val & 2) != 0); noRight = (Boolean) ((val & 1) != 0); - SetSeqLocPartial (slp, noLeft, noRight); + lim = -1; + if ((val & 4) != 0) { + lim = 3; + } else if ((val & 8) != 0) { + lim = 4; + } + SetSeqLocPartialEx (slp, noLeft, noRight, lim); slp = SeqLocFindNext (location, slp); vnp = vnp->next; } @@ -2373,7 +2431,8 @@ NLM_EXTERN void PromoteXrefsExEx ( Boolean include_stop, Boolean remove_trailingX, Boolean gen_prod_set, - Boolean force_local_id + Boolean force_local_id, + BoolPtr seq_fetch_failP ) { @@ -2427,6 +2486,10 @@ NLM_EXTERN void PromoteXrefsExEx ( GeneRefPtr grp; */ + if (seq_fetch_failP != NULL) { + *seq_fetch_failP = FALSE; + } + if (sfp == NULL || bsp == NULL) return; /* set subtypes, used to find mRNA features for genomic product sets */ @@ -2569,6 +2632,9 @@ NLM_EXTERN void PromoteXrefsExEx ( } if (sip != NULL || sfp->idx.subtype == FEATDEF_mRNA) { rnaseq = GetSequenceByFeature (sfp); + if (rnaseq == NULL && seq_fetch_failP != NULL) { + *seq_fetch_failP = TRUE; + } if (rnaseq != NULL) { i = (Int4) StringLen (rnaseq); bs = BSNew (i + 2); @@ -2694,6 +2760,9 @@ NLM_EXTERN void PromoteXrefsExEx ( crp->frame = 0; **/ bs = ProteinFromCdRegionEx (sfp, include_stop, remove_trailingX); + if (bs == NULL && seq_fetch_failP != NULL) { + *seq_fetch_failP = TRUE; + } if (bs != NULL) { protseq = BSMerge (bs, NULL); bs = BSFree (bs); @@ -2982,6 +3051,9 @@ NLM_EXTERN void PromoteXrefsExEx ( } if (sip != NULL) { protseq = GetSequenceByFeature (sfp); + if (protseq == NULL && seq_fetch_failP != NULL) { + *seq_fetch_failP = TRUE; + } if (protseq != NULL) { i = (Int4) StringLen (protseq); bs = BSNew (i + 2); @@ -3072,13 +3144,13 @@ NLM_EXTERN void PromoteXrefsEx (SeqFeatPtr sfp, BioseqPtr bsp, Uint2 entityID, B Boolean remove_trailingX, Boolean gen_prod_set) { - PromoteXrefsExEx (sfp, bsp, entityID, include_stop, remove_trailingX, gen_prod_set, FALSE); + PromoteXrefsExEx (sfp, bsp, entityID, include_stop, remove_trailingX, gen_prod_set, FALSE, NULL); } NLM_EXTERN void PromoteXrefs (SeqFeatPtr sfp, BioseqPtr bsp, Uint2 entityID) { - PromoteXrefsExEx (sfp, bsp, entityID, TRUE, FALSE, FALSE, FALSE); + PromoteXrefsExEx (sfp, bsp, entityID, TRUE, FALSE, FALSE, FALSE, NULL); } /* begin BasicSeqEntryCleanup section */ @@ -3297,6 +3369,18 @@ static void CleanVisString (CharPtr PNTR strp) } } +static void CleanVisStringAndCompress (CharPtr PNTR strp) + +{ + if (strp == NULL) return; + if (*strp == NULL) return; + TrimSpacesSemicolonsAndCommas (*strp); + Asn2gnbkCompressSpaces (*strp); + if (HasNoText (*strp)) { + *strp = MemFree (*strp); + } +} + static void CleanVisStringJunk (CharPtr PNTR strp) { @@ -3308,6 +3392,18 @@ static void CleanVisStringJunk (CharPtr PNTR strp) } } +static void CleanVisStringJunkAndCompress (CharPtr PNTR strp) + +{ + if (strp == NULL) return; + if (*strp == NULL) return; + TrimSpacesAndJunkFromEnds (*strp, TRUE); + Asn2gnbkCompressSpaces (*strp); + if (HasNoText (*strp)) { + *strp = MemFree (*strp); + } +} + static void CleanDoubleQuote (CharPtr str) { @@ -3395,6 +3491,31 @@ static void CleanVisStringList (ValNodePtr PNTR vnpp) } } +static void CleanVisStringListAndCompress (ValNodePtr PNTR vnpp) + +{ + ValNodePtr next; + ValNodePtr PNTR prev; + ValNodePtr vnp; + + if (vnpp == NULL) return; + prev = vnpp; + vnp = *vnpp; + while (vnp != NULL) { + next = vnp->next; + TrimSpacesSemicolonsAndCommas (vnp->data.ptrvalue); + Asn2gnbkCompressSpaces (vnp->data.ptrvalue); + if (HasNoText (vnp->data.ptrvalue) || AlreadyInVnpList (*vnpp, vnp)) { + *prev = vnp->next; + vnp->next = NULL; + ValNodeFreeData (vnp); + } else { + prev = &(vnp->next); + } + vnp = next; + } +} + static Boolean AlreadyInVnpListCaseSensitive (ValNodePtr head, ValNodePtr curr) { @@ -4049,6 +4170,23 @@ NLM_EXTERN SeqFeatPtr LIBCALL GetBestProteinFeatureUnindexed (SeqLocPtr product) return prot; } +static void CleanupECNumber (CharPtr str) + +{ + size_t len; + + len = StringLen (str); + if (len < 1) return; + if (str [len - 1] == '.') { + str [len - 1] = ' '; + } + if (StringNICmp (str, "EC ", 3) == 0) { + str [0] = ' '; + str [1] = ' '; + } + TrimSpacesAroundString (str); +} + static Boolean HandledGBQualOnCDS (SeqFeatPtr sfp, GBQualPtr gbq, ValNodePtr PNTR afterMe) { @@ -4744,13 +4882,24 @@ static void CleanupReplace (GBQualPtr gbq) } } +static CharPtr evCategoryPfx [] = { + "", + "COORDINATES: ", + "DESCRIPTION: ", + "EXISTENCE: ", + NULL +}; + static void CleanupInference (GBQualPtr gbq) { Char ch; CharPtr colon; CharPtr dst; + Int2 j; + size_t len; CharPtr ptr; + CharPtr skip; CharPtr space; CharPtr str; @@ -4761,6 +4910,16 @@ static void CleanupInference (GBQualPtr gbq) space = NULL; colon = NULL; + skip = NULL; + for (j = 0; evCategoryPfx [j] != NULL; j++) { + len = StringLen (evCategoryPfx [j]); + if (StringNICmp (str, evCategoryPfx [j], len) != 0) continue; + skip = str + len; + } + if (skip != NULL) { + str = skip; + } + dst = str; ptr = str; ch = *ptr; @@ -4793,6 +4952,43 @@ static void CleanupInference (GBQualPtr gbq) *dst = '\0'; } +static CharPtr evCategoryNoSpace [] = { + "", + "COORDINATES:", + "DESCRIPTION:", + "EXISTENCE:", + NULL +}; + +static void RepairInference (GBQualPtr gbq) + +{ + Int2 j; + size_t len; + CharPtr ptr; + CharPtr skip; + CharPtr str; + + if (gbq == NULL) return; + if (StringHasNoText (gbq->val)) return; + + str = gbq->val; + for (j = 0; evCategoryNoSpace [j] != NULL; j++) { + len = StringLen (evCategoryNoSpace [j]); + if (StringNICmp (str, evCategoryNoSpace [j], len) != 0) continue; + if (StringNICmp (str, evCategoryPfx [j], len + 1) == 0) continue; + /* need to repair */ + skip = str + len; + ptr = MemNew (StringLen (skip) + 20); + if (ptr == NULL) return; + StringCpy (ptr, evCategoryPfx [j]); + StringCat (ptr, skip); + gbq->val = MemFree (gbq->val); + gbq->val = ptr; + return; + } +} + static void CleanupConsSplice (GBQualPtr gbq) { @@ -5127,6 +5323,26 @@ static void CleanupFeatureGBQuals (SeqFeatPtr sfp, Boolean isEmblOrDdbj) sfp->comment = MemFree (sfp->comment); sfp->comment = str; } + } else if (StringICmp (gbq->qual, "label") == 0) { + if (StringICmp (gbq->val, FindKeyFromFeatDefType (sfp->idx.subtype, FALSE)) == 0) { + /* skip label that is simply the feature key */ + } else if (sfp->comment == NULL || StringISearch (sfp->comment, gbq->qual) == NULL) { + /* if label is not already in comment, append */ + len = StringLen (sfp->comment) + StringLen (gbq->val) + StringLen ("label: ") + 5; + str = MemNew (sizeof (Char) * len); + if (sfp->comment == NULL) { + StringCpy (str, "label: "); + StringCat (str, gbq->val); + sfp->comment = str; + } else { + StringCpy (str, sfp->comment); + StringCat (str, "; "); + StringCat (str, "label: "); + StringCat (str, gbq->val); + sfp->comment = MemFree (sfp->comment); + sfp->comment = str; + } + } } else if (StringICmp (gbq->qual, "db_xref") == 0) { tag = gbq->val; ptr = StringChr (tag, ':'); @@ -5192,6 +5408,9 @@ static void CleanupFeatureGBQuals (SeqFeatPtr sfp, Boolean isEmblOrDdbj) gbq->qual = StringSave ("rpt_unit_seq"); unlink = FALSE; } + } else if (StringICmp (gbq->qual, "EC_number") == 0) { + CleanupECNumber (gbq->val); + unlink = FALSE; } else if (StringICmp (gbq->qual, "pseudo") == 0) { sfp->pseudo = TRUE; } else if (StringICmp (gbq->qual, "gene") == 0 && (! StringHasNoText (gbq->val))) { @@ -5213,6 +5432,7 @@ static void CleanupFeatureGBQuals (SeqFeatPtr sfp, Boolean isEmblOrDdbj) /* remove default inference string if instantiated */ } else { CleanupInference (gbq); + RepairInference (gbq); unlink = FALSE; } } else if (StringICmp (gbq->qual, "transposon") == 0) { @@ -5283,6 +5503,43 @@ static void CleanupFeatureGBQuals (SeqFeatPtr sfp, Boolean isEmblOrDdbj) } } } + if (StringICmp (gbq->qual, "mobile_element") == 0) { + if (sfp->data.choice == SEQFEAT_IMP) { + ifp = (ImpFeatPtr) sfp->data.value.ptrvalue; + if (ifp != NULL) { + if (StringICmp (ifp->key, "repeat_region") == 0 && gbq->val != NULL) { + gbq->qual = MemFree (gbq->qual); + gbq->qual = StringSave ("mobile_element_type"); + ifp->key = MemFree (ifp->key); + ifp->key = StringSave ("mobile_element"); + sfp->idx.subtype = FEATDEF_mobile_element; + } + } + } + } + + if (sfp->data.choice == SEQFEAT_IMP) { + ifp = (ImpFeatPtr) sfp->data.value.ptrvalue; + if (ifp != NULL) { + if (StringICmp (ifp->key, "conflict") == 0 ) { + ifp->key = MemFree (ifp->key); + ifp->key = StringSave ("misc_difference"); + sfp->idx.subtype = FEATDEF_misc_difference; + len = StringLen (sfp->comment) + StringLen ("conflict") + 5; + str = MemNew (sizeof (Char) * len); + if (sfp->comment == NULL) { + StringCpy (str, "conflict"); + sfp->comment = str; + } else { + StringCpy (str, "conflict; "); + StringCat (str, sfp->comment); + sfp->comment = MemFree (sfp->comment); + sfp->comment = str; + } + } + } + } + if (rpt_unit_seq != NULL) { CleanupRptUnit (rpt_unit_seq); } @@ -5652,9 +5909,9 @@ static void CleanOrgModListEx (OrgModPtr PNTR ompp, CharPtr orpcommon) while (omp != NULL) { next = omp->next; unlink= FALSE; - CleanVisString (&(omp->subname)); + CleanVisStringAndCompress (&(omp->subname)); TrimSpacesAndJunkFromEnds (omp->subname, FALSE); - CleanVisString (&(omp->attrib)); + CleanVisStringAndCompress (&(omp->attrib)); if (omp->subtype == ORGMOD_common && StringICmp (omp->subname, orpcommon) == 0) { unlink = TRUE; } else if (last != NULL) { @@ -5955,6 +6212,159 @@ static Uint1 LocationForPlastidText (CharPtr plastid_name) } } +NLM_EXTERN void StringToLower (CharPtr str) + +{ + Char ch; + + if (str == NULL) return; + ch = *str; + while (ch != '\0') { + *str = TO_LOWER (ch); + str++; + ch = *str; + } +} + + +static void CleanPCRPrimerSeq (CharPtr seq) +{ + CharPtr ptr, src, dst, tmp; + Char ch; + Boolean in_brackets = FALSE; + Int4 i; + + if (StringHasNoText (seq)) { + return; + } + + /* upper case sequence */ + ptr = seq; + ch = *ptr; + while (ch != '\0') { + if (IS_UPPER (ch)) { + *ptr = TO_LOWER (ch); + } + ptr++; + ch = *ptr; + } + /* remove any spaces in sequence outisde of */ + src = seq; + dst = seq; + ch = *src; + while (ch != '\0') { + if (ch == '<') { + in_brackets = TRUE; + *dst = ch; + dst++; + } else if (ch == '>') { + in_brackets = FALSE; + *dst = ch; + dst++; + } else if (ch != ' ') { + *dst = ch; + dst++; + } else if (in_brackets) { + *dst = ch; + dst++; + } + src++; + ch = *src; + } + *dst = '\0'; + /* upper case modified base */ + ptr = seq; + tmp = StringStr (ptr, ""); + while (tmp != NULL) { + ptr = tmp + 7; + for (i = 1; i < 6; i++) { + ch = tmp [i]; + tmp [i] = TO_UPPER (ch); + } + tmp = StringStr (ptr, ""); + } +} + + +static void CleanupPCRPrimers (PCRPrimerPtr PNTR pppp) + +{ + PCRPrimerPtr next; + PCRPrimerPtr PNTR prev; + PCRPrimerPtr ppp; + + if (pppp == NULL) return; + + prev = pppp; + ppp = *pppp; + while (ppp != NULL) { + next = ppp->next; + + CleanVisString (&(ppp->seq)); + CleanPCRPrimerSeq (ppp->seq); + CleanVisString (&(ppp->name)); + + if (ppp->seq == NULL && ppp->name == NULL) { + *prev = next; + ppp->next = NULL; + PCRPrimerFree (ppp); + } else { + StringToLower (ppp->seq); + prev = &(ppp->next); + } + + ppp = next; + } + + /* fix artifact caused by fwd/rev-primer-seq starting with colon, separating name and seq */ + + ppp = *pppp; + if (ppp == NULL) return; + next = ppp->next; + if (next == NULL) return; + if (next->next != NULL) return; + + if (ppp->name != NULL && ppp->seq == NULL && next->name == NULL && next->seq != NULL) { + ppp->seq = next->seq; + next->seq = NULL; + ppp->next = NULL; + PCRPrimerFree (next); + } else if (ppp->seq != NULL && ppp->name == NULL && next->seq == NULL && next->name != NULL) { + ppp->name = next->name; + next->name = NULL; + ppp->next = NULL; + PCRPrimerFree (next); + } +} + +static void CleanupPCRReactionSet (PCRReactionSetPtr PNTR prpp) + +{ + PCRReactionSetPtr next; + PCRReactionSetPtr PNTR prev; + PCRReactionSetPtr prp; + + if (prpp == NULL) return; + + prev = prpp; + prp = *prpp; + while (prp != NULL) { + next = prp->next; + + CleanupPCRPrimers (&(prp->forward)); + CleanupPCRPrimers (&(prp->reverse)); + + if (prp->forward == NULL && prp->reverse == NULL) { + *prev = next; + prp->next = NULL; + PCRReactionFree (prp); + } else { + prev = &(prp->next); + } + + prp = next; + } +} extern void CleanSubSourceList (SubSourcePtr PNTR sspp, Uint1 location) @@ -5984,9 +6394,9 @@ extern void CleanSubSourceList (SubSourcePtr PNTR sspp, Uint1 location) next = ssp->next; unlink= FALSE; if (! IsNoNameSubSource (ssp)) { - CleanVisString (&(ssp->name)); + CleanVisStringAndCompress (&(ssp->name)); TrimSpacesAndJunkFromEnds (ssp->name, FALSE); - } else if (StringICmp (ssp->name, "TRUE") == 0) { + } else /* if (StringICmp (ssp->name, "TRUE") == 0) */ { ssp->name = MemFree (ssp->name); ssp->name = StringSave (""); } @@ -6119,6 +6529,10 @@ extern void CleanSubSourceList (SubSourcePtr PNTR sspp, Uint1 location) last->name = ssp->name; ssp->name = NULL; unlink = TRUE; + } else if (ssp->subtype == SUBSRC_plastid_name && + location != 0 + && location == LocationForPlastidText (ssp->name)) { + unlink = TRUE; } } else if (HasNoText (ssp->name) && (! IsNoNameSubSource (ssp))) { unlink = TRUE; @@ -6601,8 +7015,6 @@ Nlm_QualNameAssoc current_subsource_subtype_alist[] = { {"Endogenous-virus-name", SUBSRC_endogenous_virus_name}, {"Environmental-sample", SUBSRC_environmental_sample}, {"Frequency", SUBSRC_frequency}, - {"Fwd-PCR-primer-name", SUBSRC_fwd_primer_name}, - {"Fwd-PCR-primer-seq", SUBSRC_fwd_primer_seq}, {"Genotype", SUBSRC_genotype}, {"Germline", SUBSRC_germline}, {"Haplogroup", SUBSRC_haplogroup}, @@ -6618,8 +7030,6 @@ Nlm_QualNameAssoc current_subsource_subtype_alist[] = { {"Plasmid-name", SUBSRC_plasmid_name}, {"Pop-variant", SUBSRC_pop_variant}, {"Rearranged", SUBSRC_rearranged}, - {"Rev-PCR-primer-name", SUBSRC_rev_primer_name}, - {"Rev-PCR-primer-seq", SUBSRC_rev_primer_seq}, {"Segment", SUBSRC_segment}, {"Sex", SUBSRC_sex}, {"Subclone", SUBSRC_subclone}, @@ -6635,6 +7045,10 @@ Nlm_QualNameAssoc discouraged_subsource_subtype_alist[] = { Nlm_QualNameAssoc discontinued_subsource_subtype_alist[] = { {"Ins-seq-name", SUBSRC_insertion_seq_name}, {"Transposon-name", SUBSRC_transposon_name}, + {"Fwd-PCR-primer-name", SUBSRC_fwd_primer_name}, + {"Fwd-PCR-primer-seq", SUBSRC_fwd_primer_seq}, + {"Rev-PCR-primer-name", SUBSRC_rev_primer_name}, + {"Rev-PCR-primer-seq", SUBSRC_rev_primer_seq}, { NULL, 0 } }; Nlm_NameNameAssoc subsource_aliases[] = { @@ -6907,47 +7321,154 @@ static CharPtr FindASubSource (BioSourcePtr biop, Uint1 subtype) return NULL; } -static void CleanupOrgModOther (BioSourcePtr biop, OrgNamePtr onp) +static CharPtr FindNextSingleTilde (CharPtr str) { - OrgModPtr next; - OrgModPtr omp; - OrgModPtr PNTR prev; - CharPtr str; - Uint1 subtype_val; - CharPtr tmp; - Boolean unlink; - CharPtr val; + Char ch; - if (biop == NULL || onp == NULL) return; + if (StringHasNoText (str)) return NULL; - prev = &(onp->mod); - omp = onp->mod; - while (omp != NULL) { - next = omp->next; - unlink= FALSE; - if (omp->subtype == ORGMOD_other) { - str = omp->subname; - val = NULL; - subtype_val = 0; - StringHasOrgModPrefix (str, &val, &subtype_val, TRUE); - if (val != NULL) { - tmp = FindAnOrgMod (onp, subtype_val); - if (tmp != NULL && StringICmp (tmp, val) == 0) { - unlink = TRUE; + ch = *str; + while (ch != '\0') { + if (ch == ' ') { + if (str [1] == '~') { + str++; + ch = *str; + while (ch == '~') { + str++; + ch = *str; } } else { - subtype_val = 0; - StringHasSubSourcePrefix (str, &val, &subtype_val, TRUE); - if (val != NULL) { - tmp = FindASubSource (biop, subtype_val); - if (tmp != NULL && StringICmp (tmp, val) == 0) { - unlink = TRUE; - } - } + str++; + ch = *str; + } + } else if (ch == '~') { + if (str [1] != '~') return str; + str++; + ch = *str; + while (ch == '~') { + str++; + ch = *str; } + } else { + str++; + ch = *str; } - if (unlink) { + } + + return NULL; +} + +static ValNodePtr SplitAtSingleTilde (CharPtr strs) + +{ + ValNodePtr head = NULL; + CharPtr ptr, str, tmp; + + if (StringHasNoText (strs)) return NULL; + + tmp = StringSave (strs); + str = tmp; + + while (StringDoesHaveText (str)) { + ptr = FindNextSingleTilde (str); + if (ptr != NULL) { + *ptr = '\0'; + ptr++; + } + TrimSpacesAroundString (str); + ValNodeCopyStr (&head, 0, str); + str = ptr; + } + + MemFree (tmp); + return head; +} + +static CharPtr MergeTildeStrings (ValNodePtr head) + +{ + size_t len = 0; + CharPtr prefix = "", ptr, str; + ValNodePtr vnp; + + if (head == NULL) return NULL; + + for (vnp = head; vnp != NULL; vnp = vnp->next) { + str = (CharPtr) vnp->data.ptrvalue; + if (StringHasNoText (str)) continue; + len += StringLen (str) + 1; + } + if (len < 1) return NULL; + + ptr = MemNew (sizeof (Char) * (len + 2)); + if (ptr == NULL) return NULL; + + for (vnp = head; vnp != NULL; vnp = vnp->next) { + str = (CharPtr) vnp->data.ptrvalue; + if (StringHasNoText (str)) continue; + StringCat (ptr, prefix); + StringCat (ptr, str); + prefix = "~"; + } + + return ptr; +} + +static void CleanupOrgModOther (BioSourcePtr biop, OrgNamePtr onp) + +{ + ValNodePtr head, vnp; + OrgModPtr next; + OrgModPtr omp; + OrgModPtr PNTR prev; + CharPtr str; + Uint1 subtype_val; + CharPtr tmp; + Boolean unlink; + CharPtr val; + + if (biop == NULL || onp == NULL) return; + + prev = &(onp->mod); + omp = onp->mod; + while (omp != NULL) { + next = omp->next; + unlink= FALSE; + if (omp->subtype == ORGMOD_other) { + str = omp->subname; + head = SplitAtSingleTilde (str); + for (vnp = head; vnp != NULL; vnp = vnp->next) { + str = (CharPtr) vnp->data.ptrvalue; + if (StringHasNoText (str)) continue; + val = NULL; + subtype_val = 0; + StringHasOrgModPrefix (str, &val, &subtype_val, TRUE); + if (val != NULL) { + tmp = FindAnOrgMod (onp, subtype_val); + if (tmp != NULL && StringICmp (tmp, val) == 0) { + vnp->data.ptrvalue = NULL; + } + } else { + subtype_val = 0; + StringHasSubSourcePrefix (str, &val, &subtype_val, TRUE); + if (val != NULL) { + tmp = FindASubSource (biop, subtype_val); + if (tmp != NULL && StringICmp (tmp, val) == 0) { + vnp->data.ptrvalue = NULL; + } + } + } + } + str = MergeTildeStrings (head); + ValNodeFreeData (head); + omp->subname = MemFree (omp->subname); + omp->subname = str; + if (StringHasNoText (str)) { + unlink = TRUE; + } + } + if (unlink) { *prev = omp->next; omp->next = NULL; OrgModFree (omp); @@ -6961,6 +7482,7 @@ static void CleanupOrgModOther (BioSourcePtr biop, OrgNamePtr onp) static void CleanupSubSourceOther (BioSourcePtr biop, OrgNamePtr onp) { + ValNodePtr head, vnp; SubSourcePtr next; SubSourcePtr PNTR prev; SubSourcePtr ssp; @@ -6970,7 +7492,7 @@ static void CleanupSubSourceOther (BioSourcePtr biop, OrgNamePtr onp) Boolean unlink; CharPtr val; - if (biop == NULL || onp == NULL) return; + if (biop == NULL /* || onp == NULL */ ) return; prev = &(biop->subtype); ssp = biop->subtype; @@ -6979,24 +7501,36 @@ static void CleanupSubSourceOther (BioSourcePtr biop, OrgNamePtr onp) unlink = FALSE; if (ssp->subtype == SUBSRC_other) { str = ssp->name; - val = NULL; - subtype_val = 0; - StringHasOrgModPrefix (str, &val, &subtype_val, TRUE); - if (val != NULL) { - tmp = FindAnOrgMod (onp, subtype_val); - if (tmp != NULL && StringICmp (tmp, val) == 0) { - unlink = TRUE; - } - } else { + head = SplitAtSingleTilde (str); + for (vnp = head; vnp != NULL; vnp = vnp->next) { + str = (CharPtr) vnp->data.ptrvalue; + if (StringHasNoText (str)) continue; + val = NULL; subtype_val = 0; - StringHasSubSourcePrefix (str, &val, &subtype_val, TRUE); + StringHasOrgModPrefix (str, &val, &subtype_val, TRUE); if (val != NULL) { - tmp = FindASubSource (biop, subtype_val); + tmp = FindAnOrgMod (onp, subtype_val); if (tmp != NULL && StringICmp (tmp, val) == 0) { - unlink = TRUE; + vnp->data.ptrvalue = NULL; + } + } else { + subtype_val = 0; + StringHasSubSourcePrefix (str, &val, &subtype_val, TRUE); + if (val != NULL) { + tmp = FindASubSource (biop, subtype_val); + if (tmp != NULL && StringICmp (tmp, val) == 0) { + vnp->data.ptrvalue = NULL; + } } } } + str = MergeTildeStrings (head); + ValNodeFreeData (head); + ssp->name = MemFree (ssp->name); + ssp->name = str; + if (StringHasNoText (str)) { + unlink = TRUE; + } } if (unlink) { *prev = ssp->next; @@ -7878,16 +8412,16 @@ static AffilPtr CleanAffil (AffilPtr afp) { if (afp == NULL) return NULL; - CleanVisStringJunk (&(afp->affil)); - CleanVisStringJunk (&(afp->div)); - CleanVisStringJunk (&(afp->city)); - CleanVisStringJunk (&(afp->sub)); - CleanVisStringJunk (&(afp->country)); - CleanVisStringJunk (&(afp->street)); - CleanVisStringJunk (&(afp->email)); - CleanVisStringJunk (&(afp->fax)); - CleanVisStringJunk (&(afp->phone)); - CleanVisStringJunk (&(afp->postal_code)); + CleanVisStringJunkAndCompress (&(afp->affil)); + CleanVisStringJunkAndCompress (&(afp->div)); + CleanVisStringJunkAndCompress (&(afp->city)); + CleanVisStringJunkAndCompress (&(afp->sub)); + CleanVisStringJunkAndCompress (&(afp->country)); + CleanVisStringJunkAndCompress (&(afp->street)); + CleanVisStringJunkAndCompress (&(afp->email)); + CleanVisStringJunkAndCompress (&(afp->fax)); + CleanVisStringJunkAndCompress (&(afp->phone)); + CleanVisStringJunkAndCompress (&(afp->postal_code)); if (afp->choice == 2) { if (StringCmp (afp->country, "U.S.A.") == 0) { afp->country = MemFree (afp->country); @@ -7922,12 +8456,21 @@ static void NormalizeAuthors (AuthListPtr alp, Boolean fixInitials) ValNodePtr PNTR prev; CharPtr str; Boolean upcaseinits; + ValNodePtr vnp; Boolean zap; if (alp == NULL) return; alp->affil = CleanAffil (alp->affil); - if (alp == NULL || alp->choice != 1) return; + if (alp->choice == 2 || alp->choice == 3) { + for (vnp = alp->names; vnp != NULL; vnp = vnp->next) { + str = (CharPtr) vnp->data.ptrvalue; + TrimSpacesAroundString (str); + Asn2gnbkCompressSpaces (str); + } + } + if (alp->choice != 1) return; + prev = &(alp->names); names = alp->names; while (names != NULL) { @@ -8008,6 +8551,10 @@ static void NormalizeAuthors (AuthListPtr alp, Boolean fixInitials) StringHasNoText (nsp->names [6])) { zap = TRUE; } + /* last name is required, so zap if not present */ + if (StringHasNoText (nsp->names [0])) { + zap = TRUE; + } } } else if (pid->choice == 3 || pid->choice == 4 || pid->choice == 5) { TrimSpacesAroundString ((CharPtr) pid->data); @@ -8168,6 +8715,7 @@ static void NormalizeAPub (ValNodePtr vnp, Boolean stripSerial, Boolean fixIniti CitBookPtr cbp; CitGenPtr cgp; CitJourPtr cjp; + CitPatPtr cpp; CitSubPtr csp; ImprintPtr imp; CharPtr str; @@ -8210,7 +8758,7 @@ static void NormalizeAPub (ValNodePtr vnp, Boolean stripSerial, Boolean fixIniti csp->date = imp->date; imp->date = NULL; } - if (imp != NULL && imp->pub == NULL) { + if (imp != NULL && imp->date == NULL) { csp->imp = ImprintFree (csp->imp); } if (alp != NULL && alp->affil != NULL) { @@ -8256,17 +8804,26 @@ static void NormalizeAPub (ValNodePtr vnp, Boolean stripSerial, Boolean fixIniti } } break; + case PUB_Patent : + cpp = (CitPatPtr) vnp->data.ptrvalue; + if (cpp != NULL) { + if (StringCmp (cpp->country, "USA") == 0) { + cpp->country = MemFree (cpp->country); + cpp->country = StringSave ("US"); + } + } + break; default : break; } if (imp != NULL) { - CleanVisString (&(imp->volume)); - CleanVisString (&(imp->issue)); - CleanVisString (&(imp->pages)); - CleanVisString (&(imp->section)); - CleanVisString (&(imp->part_sup)); - CleanVisString (&(imp->language)); - CleanVisString (&(imp->part_supi)); + CleanVisStringAndCompress (&(imp->volume)); + CleanVisStringAndCompress (&(imp->issue)); + CleanVisStringAndCompress (&(imp->pages)); + CleanVisStringAndCompress (&(imp->section)); + CleanVisStringAndCompress (&(imp->part_sup)); + CleanVisStringAndCompress (&(imp->language)); + CleanVisStringAndCompress (&(imp->part_supi)); } } @@ -8330,7 +8887,7 @@ static void NormalizePubdesc (PubdescPtr pdp, Boolean stripSerial, Boolean doAut Int4 artpmid = 0; Char buf1 [121]; Char buf2 [121]; - CitArtPtr cap; + CitArtPtr cap = NULL; CitGenPtr cgp; CitJourPtr cjp; Boolean fixInitials = TRUE; @@ -8451,7 +9008,9 @@ static void NormalizePubdesc (PubdescPtr pdp, Boolean stripSerial, Boolean doAut vnp = next; } if (pmid == 0 && artpmid > 0) { - vnp = ValNodeAddInt (&(pdp->pub), PUB_PMid, artpmid); + ValNodeAddInt (&(pdp->pub), PUB_PMid, artpmid); + } else if (pmid > 0 && artpmid == 0 && cap != NULL) { + ValNodeAddInt (&(cap->ids), ARTICLEID_PUBMED, pmid); } } @@ -8703,14 +9262,14 @@ static void CleanUserFields ( } -static void CleanStructuredComment ( -UserObjectPtr uop +NLM_EXTERN void CleanStructuredComment ( + UserObjectPtr uop ) { + Boolean genome_assembly_data = FALSE; UserFieldPtr ufp; - Int4 len; - CharPtr str, new_str, cp; + CharPtr str, core, new_str; if (uop == NULL || uop->type == NULL || StringCmp (uop->type->str, "StructuredComment") != 0) { @@ -8722,80 +9281,49 @@ UserObjectPtr uop && ufp->choice == 1 && (str = (CharPtr) ufp->data.ptrvalue) != NULL) { if (StringCmp (ufp->label->str, "StructuredCommentPrefix") == 0) { - len = StringLen (str); - if (StringNCmp (str, "##", 2) == 0 && len > 12 && StringCmp (str + len - 12, "Data-START##") == 0) { - /* it's ok, no changes necessary */ - } else { - cp = str + len - 1; - /* strip trailing pound signs (if present) */ - while (cp > str && *cp == '#') { - *cp = 0; - cp--; - } - /* remove START (if present) */ - if (cp - str > 4 && StringICmp (cp - 4, "START") == 0) { - cp -= 4; - *cp = 0; - cp--; - } - /* remove dash (if present) */ - if (cp > str && *cp == '-') { - *cp = 0; - cp--; - } - /* remove Data (if present) */ - if (cp - str > 3 && StringICmp (cp - 3, "Data") == 0) { - cp -= 3; - *cp = 0; - } - - /* skip leading pound signs */ - cp = str; - while (*cp == '#') { - ++cp; - } - new_str = (CharPtr) MemNew (sizeof (Char) * (StringLen (cp) + 15)); - sprintf (new_str, "##%sData-START##", cp); - str = MemFree (str); - ufp->data.ptrvalue = new_str; + core = StructuredCommentDbnameFromString(str); + new_str = (CharPtr) MemNew (sizeof (Char) * (StringLen (core) + 15)); + sprintf (new_str, "##%s-START##", core); + str = MemFree (str); + ufp->data.ptrvalue = new_str; + if (StringCmp (core, "Genome-Assembly-Data") == 0) { + genome_assembly_data = TRUE; } + core = MemFree (core); } else if (StringCmp (ufp->label->str, "StructuredCommentSuffix") == 0) { - len = StringLen (str); - if (StringNCmp (str, "##", 2) == 0 && len > 10 && StringCmp (str + len - 10, "Data-END##") == 0) { - /* it's ok, no changes necessary */ - } else { - cp = str + len - 1; - /* strip trailing pound signs (if present) */ - while (cp > str && *cp == '#') { - *cp = 0; - cp--; - } - /* remove END (if present) */ - if (cp - str > 2 && StringICmp (cp - 2, "END") == 0) { - cp -= 2; - *cp = 0; - cp--; - } - /* remove dash (if present) */ - if (cp > str && *cp == '-') { - *cp = 0; - cp--; - } - /* remove Data (if present) */ - if (cp - str > 3 && StringICmp (cp - 3, "Data") == 0) { - cp -= 3; - *cp = 0; - } - - /* skip leading pound signs */ - cp = str; - while (*cp == '#') { - ++cp; - } - new_str = (CharPtr) MemNew (sizeof (Char) * (StringLen (cp) + 15)); - sprintf (new_str, "##%sData-END##", cp); - str = MemFree (str); - ufp->data.ptrvalue = new_str; + core = StructuredCommentDbnameFromString(str); + new_str = (CharPtr) MemNew (sizeof (Char) * (StringLen (core) + 15)); + sprintf (new_str, "##%s-END##", core); + str = MemFree (str); + ufp->data.ptrvalue = new_str; + if (StringCmp (core, "Genome-Assembly-Data") == 0) { + genome_assembly_data = TRUE; + } + core = MemFree (core); + } + } + } + + if (genome_assembly_data) { + for (ufp = uop->data; ufp != NULL; ufp = ufp->next) { + if (ufp->label != NULL + && ufp->choice == 1 + && (str = (CharPtr) ufp->data.ptrvalue) != NULL) { + if (StringCmp (ufp->label->str, "Finishing Goal") == 0 || + StringCmp (ufp->label->str, "Current Finishing Status") == 0) { + if (StringCmp (str, "High Quality Draft") == 0) { + ufp->data.ptrvalue = StringSave ("High-Quality Draft"); + str = MemFree (str); + } else if (StringCmp (str, "Improved High Quality Draft") == 0) { + ufp->data.ptrvalue = StringSave ("Improved High-Quality Draft"); + str = MemFree (str); + } else if (StringCmp (str, "Annotation Directed") == 0) { + ufp->data.ptrvalue = StringSave ("Annotation-Directed Improvement"); + str = MemFree (str); + } else if (StringCmp (str, "Non-contiguous Finished") == 0) { + ufp->data.ptrvalue = StringSave ("Noncontiguous Finished"); + str = MemFree (str); + } } } } @@ -9434,10 +9962,59 @@ static Boolean NotExceptedRibosomalName ( return FALSE; } +NLM_EXTERN void CleanupSubSourceOrgModOtherFeat ( + SeqFeatPtr sfp, + Pointer userdata +) + +{ + BioSourcePtr biop; + OrgNamePtr onp = NULL; + OrgRefPtr orp; + + if (sfp == NULL) return; + if (sfp->data.choice != SEQFEAT_BIOSRC) return; + biop = (BioSourcePtr) sfp->data.value.ptrvalue; + if (biop == NULL) return; + orp = biop->org; + if (orp != NULL) { + onp = orp->orgname; + if (orp != NULL) { + CleanupOrgModOther (biop, onp); + } + } + CleanupSubSourceOther (biop, onp); +} + +NLM_EXTERN void CleanupSubSourceOrgModOtherDesc ( + SeqDescrPtr sdp, + Pointer userdata +) + +{ + BioSourcePtr biop; + OrgNamePtr onp = NULL; + OrgRefPtr orp; + + if (sdp == NULL) return; + if (sdp->choice != Seq_descr_source) return; + biop = (BioSourcePtr) sdp->data.ptrvalue; + if (biop == NULL) return; + orp = biop->org; + if (orp != NULL) { + onp = orp->orgname; + if (orp != NULL) { + CleanupOrgModOther (biop, onp); + } + } + CleanupSubSourceOther (biop, onp); +} + static void CleanupFeatureStrings ( SeqFeatPtr sfp, Boolean isJscan, Boolean stripSerial, + Boolean modernizeFeats, ValNodePtr PNTR publist ) @@ -9453,12 +10030,13 @@ static void CleanupFeatureStrings ( Boolean justTrnaText; size_t len; CharPtr name; - OrgNamePtr onp; + OrgNamePtr onp = NULL; OrgRefPtr orp; PubdescPtr pdp; ProtRefPtr prp; CharPtr ptr; RnaRefPtr rrp; + SubSourcePtr ssp; CharPtr str; CharPtr suff; CharPtr temp; @@ -9472,6 +10050,17 @@ static void CleanupFeatureStrings ( if (sfp == NULL) return; CleanVisString (&(sfp->comment)); + len = StringLen (sfp->comment); + if (len > 4) { + if (StringCmp (sfp->comment + len - 3, ",..") == 0 || + StringCmp (sfp->comment + len - 3, ".,.") == 0 || + StringCmp (sfp->comment + len - 3, "..,") == 0 || + StringCmp (sfp->comment + len - 3, ",.,") == 0) { + sfp->comment [len - 3] = '.'; + sfp->comment [len - 2] = '.'; + sfp->comment [len - 1] = '.'; + } + } CleanVisString (&(sfp->title)); CleanVisString (&(sfp->except_text)); if (StringDoesHaveText (sfp->except_text)) { @@ -9640,8 +10229,13 @@ static void CleanupFeatureStrings ( break; case SEQFEAT_PROT : prp = (ProtRefPtr) sfp->data.value.ptrvalue; - CleanVisString (&(prp->desc)); - CleanVisStringList (&(prp->name)); + for (vnp = prp->ec; vnp != NULL; vnp = vnp->next) { + str = (CharPtr) vnp->data.ptrvalue; + if (StringHasNoText (str)) continue; + CleanupECNumber (str); + } + CleanVisStringAndCompress (&(prp->desc)); + CleanVisStringListAndCompress (&(prp->name)); CleanVisStringList (&(prp->ec)); CleanVisStringList (&(prp->activity)); CleanDoubleQuote (prp->desc); @@ -9703,7 +10297,7 @@ static void CleanupFeatureStrings ( case SEQFEAT_RNA : rrp = (RnaRefPtr) sfp->data.value.ptrvalue; if (rrp->ext.choice == 1) { - CleanVisString ((CharPtr PNTR) &(rrp->ext.value.ptrvalue)); + CleanVisStringAndCompress ((CharPtr PNTR) &(rrp->ext.value.ptrvalue)); CleanDoubleQuote ((CharPtr) rrp->ext.value.ptrvalue); if (rrp->ext.value.ptrvalue == NULL) { rrp->ext.choice = 0; @@ -9803,14 +10397,14 @@ static void CleanupFeatureStrings ( if (rrp->ext.choice == 3) { rgp = (RNAGenPtr) rrp->ext.value.ptrvalue; if (rgp != NULL) { - CleanVisString (&(rgp->product)); + CleanVisStringAndCompress (&(rgp->product)); CleanDoubleQuote (rgp->product); - CleanVisString (&(rgp->_class)); + CleanVisStringAndCompress (&(rgp->_class)); CleanDoubleQuote (rgp->_class); for (rqp = rgp->quals; rqp != NULL; rqp = rqp->next) { - CleanVisString (&(rqp->qual)); + CleanVisStringAndCompress (&(rqp->qual)); CleanDoubleQuote (rqp->qual); - CleanVisString (&(rqp->val)); + CleanVisStringAndCompress (&(rqp->val)); CleanDoubleQuote (rqp->val); } } @@ -9841,7 +10435,7 @@ static void CleanupFeatureStrings ( } } */ - if (rrp->type == 4) { + if (rrp->type == 4 && rrp->ext.choice == 1 ) { name = (CharPtr) rrp->ext.value.ptrvalue; len = StringLen (name); if (len > 5 && NotExceptedRibosomalName (name)) { @@ -10039,6 +10633,15 @@ static void CleanupFeatureStrings ( } name = MemFree (name); } + if ((rrp->type == 255 || rrp->type == 10) && rrp->ext.choice == 0 && sfp->comment != NULL) { + if (StringICmp (sfp->comment, "internal transcribed spacer 1") == 0 || + StringICmp (sfp->comment, "internal transcribed spacer 2") == 0 || + StringICmp (sfp->comment, "internal transcribed spacer 3") == 0) { + rrp->ext.choice = 1; + rrp->ext.value.ptrvalue = sfp->comment; + sfp->comment = NULL; + } + } break; case SEQFEAT_PUB : pdp = (PubdescPtr) sfp->data.value.ptrvalue; @@ -10054,7 +10657,7 @@ static void CleanupFeatureStrings ( CleanVisString (&(ifp->descr)); break; case SEQFEAT_REGION : - CleanVisString ((CharPtr PNTR) &(sfp->data.value.ptrvalue)); + CleanVisStringAndCompress ((CharPtr PNTR) &(sfp->data.value.ptrvalue)); CleanDoubleQuote ((CharPtr) sfp->data.value.ptrvalue); if (sfp->data.value.ptrvalue == NULL) { sfp->data.choice = SEQFEAT_COMMENT; @@ -10069,7 +10672,7 @@ static void CleanupFeatureStrings ( case SEQFEAT_RSITE : break; case SEQFEAT_USER : - VisitUserObjectsInUop ((UserObjectPtr) sfp->data.value.ptrvalue, NULL, CleanUserObject); + VisitAllUserObjectsInUop ((UserObjectPtr) sfp->data.value.ptrvalue, NULL, CleanUserObject); break; case SEQFEAT_TXINIT : break; @@ -10089,24 +10692,36 @@ static void CleanupFeatureStrings ( } orp = biop->org; if (orp != NULL) { - CleanVisStringList (&(orp->mod)); + CleanVisStringListAndCompress (&(orp->mod)); OrpModToSubSource (&(orp->mod), &(biop->subtype)); onp = orp->orgname; if (onp != NULL) { CleanupOrgModOther (biop, onp); - CleanupSubSourceOther (biop, onp); } } biop->subtype = SortSubSourceList (biop->subtype); CleanSubSourceList (&(biop->subtype), biop->genome); + CleanupSubSourceOther (biop, onp); + biop->subtype = SortSubSourceList (biop->subtype); + if (modernizeFeats) { + ModernizePCRPrimers (biop); + } + CleanupPCRReactionSet (&(biop->pcr_primers)); + if (biop->genome == GENOME_unknown || biop->genome == GENOME_genomic) { + for (ssp = biop->subtype; ssp != NULL; ssp = ssp->next) { + if (ssp->subtype == SUBSRC_plasmid_name) { + biop->genome = GENOME_plasmid; + } + } + } } break; default : break; } if (orp != NULL) { - CleanVisString (&(orp->taxname)); - CleanVisString (&(orp->common)); + CleanVisStringAndCompress (&(orp->taxname)); + CleanVisStringAndCompress (&(orp->common)); CleanVisStringList (&(orp->mod)); CleanVisStringList (&(orp->syn)); FixOldDbxrefs (orp->db); @@ -10122,20 +10737,28 @@ static void CleanupFeatureStrings ( OrpModToOrgMod (&(orp->mod), &(onp->mod)); onp->mod = SortOrgModList (onp->mod); CleanOrgModListEx (&(onp->mod), orp->common); + onp->mod = SortOrgModList (onp->mod); onp = onp->next; } } } -static void CleanupDescriptorStrings (ValNodePtr sdp, Boolean stripSerial, ValNodePtr PNTR publist, Boolean isEmblOrDdbj) +static void CleanupDescriptorStrings ( + ValNodePtr sdp, + Boolean stripSerial, + Boolean modernizeFeats, + ValNodePtr PNTR publist, + Boolean isEmblOrDdbj +) { BioSourcePtr biop; EMBLBlockPtr ebp; GBBlockPtr gbp; - OrgNamePtr onp; + OrgNamePtr onp = NULL; OrgRefPtr orp; PubdescPtr pdp; + SubSourcePtr ssp; if (sdp == NULL) return; switch (sdp->choice) { @@ -10163,7 +10786,7 @@ static void CleanupDescriptorStrings (ValNodePtr sdp, Boolean stripSerial, ValNo } break; case Seq_descr_title : - CleanVisString ((CharPtr PNTR) &sdp->data.ptrvalue); + CleanVisStringAndCompress ((CharPtr PNTR) &sdp->data.ptrvalue); if (sdp->data.ptrvalue == NULL) { sdp->data.ptrvalue = StringSave (""); } @@ -10218,7 +10841,7 @@ static void CleanupDescriptorStrings (ValNodePtr sdp, Boolean stripSerial, ValNo } break; case Seq_descr_user : - VisitUserObjectsInUop ((UserObjectPtr) sdp->data.ptrvalue, NULL, CleanUserObject); + VisitAllUserObjectsInUop ((UserObjectPtr) sdp->data.ptrvalue, NULL, CleanUserObject); break; case Seq_descr_sp : break; @@ -10253,11 +10876,23 @@ static void CleanupDescriptorStrings (ValNodePtr sdp, Boolean stripSerial, ValNo onp = orp->orgname; if (onp != NULL) { CleanupOrgModOther (biop, onp); - CleanupSubSourceOther (biop, onp); } } biop->subtype = SortSubSourceList (biop->subtype); CleanSubSourceList (&(biop->subtype), biop->genome); + CleanupSubSourceOther (biop, onp); + biop->subtype = SortSubSourceList (biop->subtype); + if (modernizeFeats) { + ModernizePCRPrimers (biop); + } + CleanupPCRReactionSet (&(biop->pcr_primers)); + if (biop->genome == GENOME_unknown || biop->genome == GENOME_genomic) { + for (ssp = biop->subtype; ssp != NULL; ssp = ssp->next) { + if (ssp->subtype == SUBSRC_plasmid_name) { + biop->genome = GENOME_plasmid; + } + } + } } break; case Seq_descr_molinfo : @@ -10266,8 +10901,8 @@ static void CleanupDescriptorStrings (ValNodePtr sdp, Boolean stripSerial, ValNo break; } if (orp != NULL) { - CleanVisString (&(orp->taxname)); - CleanVisString (&(orp->common)); + CleanVisStringAndCompress (&(orp->taxname)); + CleanVisStringAndCompress (&(orp->common)); CleanVisStringList (&(orp->mod)); CleanVisStringList (&(orp->syn)); FixOldDbxrefs (orp->db); @@ -10283,6 +10918,7 @@ static void CleanupDescriptorStrings (ValNodePtr sdp, Boolean stripSerial, ValNo OrpModToOrgMod (&(orp->mod), &(onp->mod)); onp->mod = SortOrgModList (onp->mod); CleanOrgModListEx (&(onp->mod), orp->common); + onp->mod = SortOrgModList (onp->mod); onp = onp->next; } } @@ -10488,12 +11124,16 @@ NLM_EXTERN void CleanUpSeqLoc (SeqLocPtr slp) { BioseqPtr bsp; + SeqLocPtr curr; + SeqLocPtr head; SeqLocPtr last; SeqLocPtr loc; + SeqLocPtr next; SeqIdPtr sip; SeqIntPtr sintp; SeqPntPtr spp; Int4 swp; + SeqLocPtr tail; if (slp == NULL) return; @@ -10579,11 +11219,40 @@ NLM_EXTERN void CleanUpSeqLoc (SeqLocPtr slp) } loc = (SeqLocPtr) slp->data.ptrvalue; - if (loc == NULL || loc->next != NULL) return; - /* here seqloc_mix points to a single location element, so no need for seqloc_mix parent */ - slp->choice = loc->choice; - slp->data.ptrvalue = (Pointer) loc->data.ptrvalue; - MemFree (loc); + if (loc == NULL) return; + + if (loc->next == NULL) { + /* here seqloc_mix points to a single location element, so no need for seqloc_mix parent */ + slp->choice = loc->choice; + slp->data.ptrvalue = (Pointer) loc->data.ptrvalue; + MemFree (loc); + return; + } + + /* check for nested seqloc_mix, remove nesting */ + curr = loc; + last = NULL; + while (curr != NULL) { + next = curr->next; + if (curr->choice == SEQLOC_MIX) { + head = (SeqLocPtr) curr->data.ptrvalue; + if (head != NULL) { + tail = head; + while (tail->next != NULL) { + tail = tail->next; + } + if (last != NULL) { + last->next = head; + } + tail->next = curr->next; + curr->next = NULL; + curr = MemFree (curr); + } + } else { + last = curr; + } + curr = next; + } } typedef struct cbloc { @@ -10782,14 +11451,14 @@ static CharPtr GetMiRNAProduct (CharPtr str) { len = StringLen (str); if (len > 6 && StringCmp (str + len - 6, " miRNA") == 0 - && (len < 15 || StringCmp (str - 15, "precursor miRNA") != 0)) + && (len < 15 || StringCmp (str + len - 15, "precursor miRNA") != 0)) { product = (CharPtr) MemNew (sizeof (Char) * (len - 5)); StringNCpy (product, str, len - 6); product[len - 6] = 0; } else if (len > 9 && StringCmp (str + len - 9, " microRNA") == 0 - && (len < 21 || StringCmp (str - 21, "precursor microRNA") != 0)) + && (len < 18 || StringCmp (str + len - 18, "precursor microRNA") != 0)) { product = (CharPtr) MemNew (sizeof (Char) * (len - 8)); StringNCpy (product, str, len - 9); @@ -11027,17 +11696,21 @@ static Boolean IsFeatureCommentRedundant (SeqFeatPtr sfp) CodeBreakPtr cbp; CharPtr comment; CdRegionPtr crp; + SeqFeatPtr feat; Uint1 from; GBQualPtr gbq; GeneRefPtr grp; CharPtr name; + BioseqPtr prod; ProtRefPtr prp; Uint1 residue; RNAGenPtr rgp; RNAQualPtr rqp; RnaRefPtr rrp; + SeqAnnotPtr sap; SeqCodeTablePtr sctp; Uint1 seqcode; + SeqIdPtr sip; SeqMapTablePtr smtp; CharPtr str; tRNAPtr trp; @@ -11113,6 +11786,27 @@ static Boolean IsFeatureCommentRedundant (SeqFeatPtr sfp) } } } + if (sfp->product != NULL) { + sip = SeqLocId (sfp->product); + if (sip != NULL) { + prod = BioseqFind (sip); + if (prod != NULL) { + for (sap = prod->annot; sap != NULL; sap = sap->next) { + if (sap->type != 1) continue; + for (feat = (SeqFeatPtr) sap->data; feat != NULL; feat = feat->next) { + if (feat->data.choice != SEQFEAT_PROT) continue; + prp = (ProtRefPtr) feat->data.value.ptrvalue; + if (prp == NULL) continue; + for (vnp = prp->ec; vnp != NULL; vnp = vnp->next) { + str = (CharPtr) vnp->data.ptrvalue; + if (StringHasNoText (str)) continue; + if (StringCmp (comment, str) == 0) return TRUE; + } + } + } + } + } + } break; case SEQFEAT_PROT: prp = (ProtRefPtr) sfp->data.value.ptrvalue; @@ -11258,6 +11952,10 @@ static CharPtr ExtractSatelliteFromComment (CharPtr comment) } TrimSpacesAroundString (comment); } + if (comment != NULL && comment [0] == '~' && comment [1] != '~') { + comment [0] = ' '; + TrimSpacesAroundString (comment); + } return satellite_qual; } @@ -11270,13 +11968,32 @@ static void DoModernizeRNAFields (SeqFeatPtr sfp) RNAGenPtr rgp; RNAQualSetPtr rqp; RnaRefPtr rrp; + CharPtr str; Boolean unlink; + Int2 i; + size_t len; + CharPtr ncclass; + CharPtr product; + CharPtr tmp; if (sfp == NULL || sfp->data.choice != SEQFEAT_RNA) return; ModernizeRNAFields (sfp); rrp = (RnaRefPtr) sfp->data.value.ptrvalue; - if (rrp == NULL || rrp->ext.choice != 3) return; + if (rrp == NULL) return; + + if (rrp->ext.choice == 1 && rrp->type == 10) { + str = rrp->ext.value.ptrvalue; + if (StringHasNoText (str)) return; + + rgp = (RNAGenPtr) MemNew (sizeof (RNAGen)); + if (rgp == NULL) return; + rrp->ext.choice = 3; + rrp->ext.value.ptrvalue = (Pointer) rgp; + rgp->product = str; + } + + if (rrp->ext.choice != 3) return; rgp = (RNAGenPtr) rrp->ext.value.ptrvalue; if (rgp == NULL) return; @@ -11299,6 +12016,29 @@ static void DoModernizeRNAFields (SeqFeatPtr sfp) rqp = nextrqp; } + if (rrp->type == 10 && StringDoesHaveText (rgp->product) && rgp->_class == NULL) { + ncclass = rgp->product; + for (i = 0; ncrnaClassList [i] != NULL; i++) { + str = ncrnaClassList [i]; + if (StringHasNoText (str)) continue; + len = StringLen (str); + if (len < 1) continue; + if (StringNICmp (ncclass, str, len) != 0) continue; + if (ncclass [len] != ' ') continue; + tmp = ncclass + len + 1; + if (StringHasNoText (tmp)) continue; + ncclass [len] = '\0'; + rgp->_class = StringSave (ncclass); + product = StringSave (tmp); + rgp->product = MemFree (rgp->product); + rgp->product = product; + TrimSpacesAroundString (rgp->_class); + TrimSpacesAroundString (rgp->product); + rrp->type = 8; + sfp->idx.subtype = FEATDEF_ncRNA; + } + } + if (rgp->quals != NULL) return; if (StringDoesHaveText (rgp->_class) || StringDoesHaveText (rgp->product)) return; @@ -11385,7 +12125,7 @@ NLM_EXTERN void CleanUpSeqFeat ( ifp->key = MemFree (ifp->key); ifp->key = StringSave ("misc_binding"); sfp->idx.subtype = FEATDEF_misc_binding; - } else if (StringCmp (ifp->key, "satellite") == 0 ) { + } else if (StringCmp (ifp->key, "satellite") == 0 && (! isEmblOrDdbj)) { ifp->key = MemFree (ifp->key); ifp->key = StringSave ("repeat_region"); sfp->idx.subtype = FEATDEF_repeat_region; @@ -11449,7 +12189,7 @@ NLM_EXTERN void CleanUpSeqFeat ( } } } - if (sfp->data.choice == SEQFEAT_IMP && StringCmp (ifp->key, "repeat_region") == 0) { + if (sfp->data.choice == SEQFEAT_IMP && StringCmp (ifp->key, "repeat_region") == 0 && (! isEmblOrDdbj)) { satellite_type = ExtractSatelliteFromComment (sfp->comment); if (satellite_type != NULL) { gbq = GBQualNew (); @@ -11515,7 +12255,7 @@ NLM_EXTERN void CleanUpSeqFeat ( CleanupDuplicateGBQuals (&(sfp->qual)); CleanupFeatureGBQuals (sfp, isEmblOrDdbj); sfp->qual = SortIllegalGBQuals (sfp->qual); - CleanupFeatureStrings (sfp, isJscan, stripSerial, publist); + CleanupFeatureStrings (sfp, isJscan, stripSerial, modernizeFeats, publist); FixOldDbxrefs (sfp->dbxref); FixNumericDbxrefs (sfp->dbxref); sfp->dbxref = ValNodeSort (sfp->dbxref, SortDbxref); @@ -11638,7 +12378,7 @@ NLM_EXTERN void CleanUpSeqFeat ( CheckSeqLocForPartial (sfp->location, &partial5, &partial3); hasNulls = LocationHasNullsBetween (sfp->location); - sfp->partial = (sfp->partial || partial5 || partial3 || hasNulls); + sfp->partial = (sfp->partial || partial5 || partial3 || (hasNulls && ! isEmblOrDdbj)); prevlink = (SeqFeatXrefPtr PNTR) &(sfp->xref); xref = sfp->xref; @@ -11765,8 +12505,46 @@ static void CleanSeqIdInSeqAnnot (SeqAnnotPtr annot, Pointer userdata) VisitSeqIdsInSeqAnnot (annot, NULL, CleanUpSeqId); } +typedef struct npcounts { + Int4 nucs; + Int4 prots; +} NPCounts, PNTR NPCountsPtr; + +static void CountNucsAndProts (BioseqPtr bsp, Pointer userdata) + +{ + NPCountsPtr ncp; + + if (bsp == NULL) return; + ncp = (NPCountsPtr) userdata; + if (ncp == NULL) return; + + if (ISA_na (bsp->mol)) { + (ncp->nucs)++; + } else if (ISA_aa (bsp->mol)) { + (ncp->prots)++; + } +} + +static void FixBadSetClass (BioseqSetPtr bssp, Pointer userdata) + +{ + NPCounts nc; + + if (bssp == NULL) return; + if (bssp->_class != BioseqseqSet_class_not_set && bssp->_class != BioseqseqSet_class_other) return; + + MemSet ((Pointer) &nc, 0, sizeof (NPCounts)); + VisitSequencesInSet (bssp, (Pointer) &nc, VISIT_MAINS, CountNucsAndProts); + if (nc.nucs == 1 && nc.prots > 0) { + bssp->_class = BioseqseqSet_class_nuc_prot; + } else { + bssp->_class = BioseqseqSet_class_genbank; + } +} static void RemoveDuplicateSeqIds (BioseqPtr bsp) + { SeqIdPtr sip, sip_cmp, sip_prev, sip_next; @@ -11913,7 +12691,7 @@ static void BasicSeqEntryCleanupInternal ( default : break; } - CleanupDescriptorStrings (sdp, stripSerial, publist, isEmblOrDdbj); + CleanupDescriptorStrings (sdp, stripSerial, TRUE, publist, isEmblOrDdbj); sdp = sdp->next; } @@ -12326,6 +13104,10 @@ NLM_EXTERN void BasicSeqEntryCleanup (SeqEntryPtr sep) VisitGraphsInSep (sep, NULL, CleanSeqIdInSeqGraph); VisitAnnotsInSep (sep, NULL, CleanSeqIdInSeqAnnot); + /* Fix Bioseq-sets with class 0 */ + + VisitSetsInSep (sep, NULL, FixBadSetClass); + /* removed unnecessarily nested Pub-equivs */ VisitPubdescsInSep (sep, NULL, FlattenPubdesc); @@ -12387,22 +13169,110 @@ NLM_EXTERN void BasicSeqEntryCleanup (SeqEntryPtr sep) } } -static void SortSeqFeatFields ( +typedef struct bsecsmfedata { + Int4 max; + Int4 num_at_max; +} BsecSmfeData, PNTR BsecSmfePtr; + +static Boolean LIBCALLBACK BsecSMFEProc ( SeqFeatPtr sfp, - Pointer userdata + SeqMgrFeatContextPtr context ) + { - CdRegionPtr crp; - ValNodePtr psp; + BsecSmfePtr bsp; + Int4 len; - if (sfp == NULL) return; + if (sfp == NULL || context == NULL) return TRUE; + bsp = context->userdata; + if (bsp == NULL) return TRUE; - sfp->qual = SortFeatureGBQuals (sfp->qual); + len = SeqLocLen (sfp->location); + if (len < bsp->max) { + bsp->max = len; + bsp->num_at_max = 1; + } else if (len == bsp->max) { + (bsp->num_at_max)++; + } - sfp->qual = SortIllegalGBQuals (sfp->qual); + return TRUE; +} - sfp->dbxref = ValNodeSort (sfp->dbxref, SortDbxref); +NLM_EXTERN void RemoveUnnecessaryGeneXrefs ( + SeqFeatPtr sfp, + Pointer userdata +) + +{ + BsecSmfeData bsd; + Int2 count; + SeqFeatXrefPtr curr, next; + SeqMgrFeatContext fcontext; + SeqFeatXrefPtr PNTR last; + GeneRefPtr grp, grpx; + SeqFeatPtr sfpx; + CharPtr syn1, syn2; + + if (sfp == NULL || sfp->data.choice == SEQFEAT_GENE) return; + grp = SeqMgrGetGeneXref (sfp); + if (grp == NULL || SeqMgrGeneIsSuppressed (grp)) return; + sfpx = SeqMgrGetOverlappingGene (sfp->location, &fcontext); + if (sfpx == NULL || sfpx->data.choice != SEQFEAT_GENE) return; + grpx = (GeneRefPtr) sfpx->data.value.ptrvalue; + if (grpx == NULL) return; + + if ((!StringHasNoText (grp->locus)) && (!StringHasNoText (grpx->locus))) { + if ((StringICmp (grp->locus, grpx->locus) != 0)) return; + } else if (StringDoesHaveText (grp->locus_tag) && StringDoesHaveText (grp->locus_tag)) { + if ((StringICmp (grp->locus_tag, grpx->locus_tag) != 0)) return; + } else if (grp->syn != NULL && grpx->syn != NULL) { + syn1 = (CharPtr) grp->syn->data.ptrvalue; + syn2 = (CharPtr) grpx->syn->data.ptrvalue; + if ((!StringHasNoText (syn1)) && (!StringHasNoText (syn2))) { + if ((StringICmp (syn1, syn2) != 0)) return; + } + } + + MemSet ((Pointer) &bsd, 0, sizeof (BsecSmfeData)); + bsd.max = INT4_MAX; + bsd.num_at_max = 0; + count = SeqMgrGetAllOverlappingFeatures (sfp->location, FEATDEF_GENE, NULL, 0, + LOCATION_SUBSET, (Pointer) &bsd, BsecSMFEProc); + + if (bsd.num_at_max < 2) { + last = (SeqFeatXrefPtr PNTR) &(sfp->xref); + curr = sfp->xref; + while (curr != NULL) { + next = curr->next; + if (curr->data.choice == SEQFEAT_GENE) { + *last = next; + curr->next = NULL; + SeqFeatXrefFree (curr); + } else { + last = &(curr->next); + } + curr = next; + } + } +} + +static void SortSeqFeatFields ( + SeqFeatPtr sfp, + Pointer userdata +) + +{ + CdRegionPtr crp; + ValNodePtr psp; + + if (sfp == NULL) return; + + sfp->qual = SortFeatureGBQuals (sfp->qual); + + sfp->qual = SortIllegalGBQuals (sfp->qual); + + sfp->dbxref = ValNodeSort (sfp->dbxref, SortDbxref); psp = sfp->cit; if (psp != NULL && psp->data.ptrvalue) { @@ -12501,7 +13371,7 @@ NLM_EXTERN void ResynchCDSPartials (SeqFeatPtr sfp, Pointer userdata) if (StringCmp (orig_loc, new_loc) != 0) { lip->data_in_log = TRUE; if (lip->fp != NULL) { - fprintf (lip->fp, "Adjusted protein feature location from %s to %s\n", orig_loc, new_loc); + fprintf (lip->fp, "Synchronized coding region partials for protein feature location at %s\n", orig_loc, new_loc); } } new_loc = MemFree (new_loc); @@ -12997,11 +13867,20 @@ NLM_EXTERN Boolean UseLocalAsnloadDataAndErrMsg (void) StrCpy (path, appPath); ptr = StringStr (path, "/ncbi/build/"); if (ptr != NULL) { - /* see if running under Xcode build environment */ + /* see if running under Xcode 3 build environment */ ptr [5] = '\0'; dataFound = CheckDataPath (path, "data"); } } + if (! dataFound) { + StrCpy (path, appPath); + ptr = StringStr (path, "/Library/Developer/"); + if (ptr != NULL) { + /* see if running under Xcode 4 build environment */ + ptr [19] = '\0'; + dataFound = CheckDataPath (path, "data"); + } + } } #endif if (dataFound) { @@ -13350,6 +14229,32 @@ NLM_EXTERN Boolean LocationHasNullsBetween (SeqLocPtr location) return FALSE; } +NLM_EXTERN void NormalizeNullsBetween (SeqLocPtr location) + +{ + SeqLocPtr next, tmp, vnp; + + if (location == NULL) return; + if (! LocationHasNullsBetween (location)) return; + + if (location->choice != SEQLOC_MIX) return; + vnp = (ValNodePtr) location->data.ptrvalue; + if (vnp == NULL) return; + + while (vnp != NULL && vnp->next != NULL) { + next = vnp->next; + if (vnp->choice != SEQLOC_NULL && next->choice != SEQLOC_NULL) { + tmp = ValNodeNew (NULL); + if (tmp != NULL) { + tmp->choice = SEQLOC_NULL; + tmp->next = vnp->next; + vnp->next = tmp; + } + } + vnp = next; + } +} + NLM_EXTERN Uint1 FindFeatFromFeatDefType (Uint2 subtype) { @@ -13409,6 +14314,9 @@ NLM_EXTERN Uint1 FindFeatFromFeatDefType (Uint2 subtype) if (subtype >= FEATDEF_gap && subtype <= FEATDEF_oriT) { return SEQFEAT_IMP; } + if (subtype == FEATDEF_mobile_element) { + return SEQFEAT_IMP; + } } return 0; } @@ -13942,7 +14850,6 @@ NLM_EXTERN Int4 VisitSeqIdsInSeqAnnot (SeqAnnotPtr annot, Pointer userdata, Visi return index; } - NLM_EXTERN Int4 VisitUserFieldsInUfp (UserFieldPtr ufp, Pointer userdata, VisitUserFieldsFunc callback) { @@ -13982,6 +14889,7 @@ NLM_EXTERN Int4 VisitUserFieldsInUop (UserObjectPtr uop, Pointer userdata, Visit return index; } +/* Visits only unnested nodes */ NLM_EXTERN Int4 VisitUserObjectsInUop (UserObjectPtr uop, Pointer userdata, VisitUserObjectFunc callback) { @@ -14012,6 +14920,31 @@ NLM_EXTERN Int4 VisitUserObjectsInUop (UserObjectPtr uop, Pointer userdata, Visi return index; } +NLM_EXTERN Int4 VisitAllUserObjectsInUop (UserObjectPtr uop, Pointer userdata, VisitUserObjectFunc callback) + +{ + Int4 index = 0; + UserObjectPtr obj; + UserFieldPtr ufp; + + if (uop == NULL) return index; + for (ufp = uop->data; ufp != NULL; ufp = ufp->next) { + if (ufp->choice == 6) { + obj = (UserObjectPtr) ufp->data.ptrvalue; + index += VisitAllUserObjectsInUop (obj, userdata, callback); + } else if (ufp->choice == 12) { + for (obj = (UserObjectPtr) ufp->data.ptrvalue; obj != NULL; obj = obj->next) { + index += VisitAllUserObjectsInUop (obj, userdata, callback); + } + } + } + if (callback != NULL) { + callback (uop, userdata); + } + index++; + return index; +} + typedef struct uopdata { UserObjectPtr rsult; CharPtr tag; @@ -15051,8 +15984,9 @@ NLM_EXTERN Int4 VisitElementsInSep (SeqEntryPtr sep, Pointer userdata, VisitElem if (bssp == NULL) return index; if (bssp->_class == 7 || (bssp->_class >= 13 && bssp->_class <= 16) || - bssp->_class != BioseqseqSet_class_wgs_set || - bssp->_class == BioseqseqSet_class_gen_prod_set) { + bssp->_class == BioseqseqSet_class_wgs_set || + bssp->_class == BioseqseqSet_class_gen_prod_set || + bssp->_class == BioseqseqSet_class_small_genome_set) { for (tmp = bssp->seq_set; tmp != NULL; tmp = tmp->next) { index += VisitElementsInSep (tmp, userdata, callback); } @@ -15073,7 +16007,8 @@ NLM_EXTERN Boolean IsPopPhyEtcSet (Uint1 _class) _class == BioseqseqSet_class_pop_set || _class == BioseqseqSet_class_phy_set || _class == BioseqseqSet_class_eco_set || - _class == BioseqseqSet_class_wgs_set) return TRUE; + _class == BioseqseqSet_class_wgs_set || + _class == BioseqseqSet_class_small_genome_set) return TRUE; return FALSE; } @@ -15162,7 +16097,7 @@ static Int4 ScanBioseqSetReleaseInt ( fp = FileOpen (inputFile, binary? "rb" : "r"); #endif if (fp == NULL) { - Message (MSG_ERROR, "FileOpen failed for input file '%s'", inputFile); + Message (MSG_POSTERR, "FileOpen failed for input file '%s'", inputFile); return index; } @@ -15335,7 +16270,7 @@ NLM_EXTERN Int4 ScanEntrezgeneSetRelease ( fp = FileOpen (inputFile, binary? "rb" : "r"); #endif if (fp == NULL) { - Message (MSG_ERROR, "FileOpen failed for input file '%s'", inputFile); + Message (MSG_POSTERR, "FileOpen failed for input file '%s'", inputFile); return index; } @@ -15674,8 +16609,8 @@ static Boolean ProductsMatch (SeqLocPtr slp1, SeqLocPtr slp2, Boolean case_sensi { BioseqPtr bsp1, bsp2; Int2 ctr, pos1, pos2; - Char buf1[50]; - Char buf2[50]; + Char buf1[51]; + Char buf2[51]; Int4 len = 50; SeqFeatPtr sfp1, sfp2; SeqMgrFeatContext fcontext1, fcontext2; @@ -15796,6 +16731,59 @@ static Boolean DoLocationsMatch (SeqLocPtr slp1, SeqLocPtr slp2, Boolean allow_d } +static Boolean DoCdRegionsMatch (CdRegionPtr crp1, CdRegionPtr crp2) +{ + if (crp1 == NULL && crp2 == NULL) { + return TRUE; + } else if (crp1 == NULL || crp2 == NULL) { + return FALSE; + } else if ((crp1->orf && !crp2->orf) || (!crp1->orf && crp2->orf)){ + return FALSE; + } else if ((crp1->conflict && !crp2->conflict) || (!crp1->conflict && crp2->conflict)){ + return FALSE; + } else if (crp1->gaps != crp2->gaps) { + return FALSE; + } else if (crp1->mismatch != crp2->mismatch) { + return FALSE; + } else if (crp1->stops != crp2->stops) { + return FALSE; + } else if ((crp1->genetic_code == NULL && crp2->genetic_code != NULL) + || (crp1->genetic_code != NULL && crp2->genetic_code == NULL) + || (crp1->genetic_code != NULL && crp2->genetic_code != NULL + && !AsnIoMemComp (crp1->genetic_code, crp2->genetic_code, (AsnWriteFunc) GeneticCodeAsnWrite))) { + return FALSE; + } else if ((crp1->code_break == NULL && crp2->code_break != NULL) + || (crp1->code_break != NULL && crp2->code_break == NULL) + || (crp1->code_break != NULL && crp2->code_break != NULL + && !AsnIoMemComp (crp1->code_break, crp2->code_break, (AsnWriteFunc) CodeBreakAsnWrite))) { + return FALSE; + } else if (crp1->frame != crp2->frame) { + if ((crp1->frame == 0 || crp1->frame == 1) && (crp2->frame == 0 || crp2->frame == 1)) { + /* both effectively frame 1, ignore this difference */ + } else { + return FALSE; + } + } + return TRUE; +} + + +static Boolean DoesSeqFeatDataMatch (ChoicePtr d1, ChoicePtr d2) +{ + if (d1 == NULL && d2 == NULL) { + return TRUE; + } else if (d1 == NULL || d2 == NULL) { + return FALSE; + } else if (d1->choice != d2->choice) { + return FALSE; + } else if (d1->choice == SEQFEAT_CDREGION) { + return DoCdRegionsMatch(d1->value.ptrvalue, d2->value.ptrvalue); + } else { + return AsnIoMemComp(d1, d2, (AsnWriteFunc) SeqFeatDataAsnWrite); + } +} + + NLM_EXTERN Boolean DoFeaturesMatch (SeqFeatPtr sfp1, SeqFeatPtr sfp2, Boolean allow_different_sequences, Boolean case_sensitive, Boolean ignore_partial) { if (sfp1 == NULL && sfp2 == NULL) { @@ -15832,7 +16820,7 @@ NLM_EXTERN Boolean DoFeaturesMatch (SeqFeatPtr sfp1, SeqFeatPtr sfp2, Boolean al return FALSE; } else if (!DbxrefsMatch (sfp1->dbxref, sfp2->dbxref, case_sensitive)) { return FALSE; - } else if (!AsnIoMemComp(&(sfp1->data), &(sfp2->data), (AsnWriteFunc) SeqFeatDataAsnWrite)) { + } else if (!DoesSeqFeatDataMatch(&(sfp1->data), &(sfp2->data))) { return FALSE; } else if (!XrefsMatch (sfp1->xref, sfp2->xref)) { return FALSE; @@ -15859,7 +16847,7 @@ NLM_EXTERN void CleanupStringsForOneDescriptor (SeqDescPtr sdp, SeqEntryPtr sep) FlattenPubdesc (sdp->data.ptrvalue, NULL); } - CleanupDescriptorStrings (sdp, stripSerial, NULL, isEmblOrDdbj); + CleanupDescriptorStrings (sdp, stripSerial, TRUE, NULL, isEmblOrDdbj); } @@ -16116,3 +17104,680 @@ NLM_EXTERN Int4 ConvertCommentsWithSpacesToStructuredCommentsForSeqEntry (SeqEnt return sd.num_unable_to_convert; } + +NLM_EXTERN void RemoveFeatureLink (SeqFeatPtr sfp1, SeqFeatPtr sfp2) +{ + SeqFeatXrefPtr xref, next, PNTR prevlink; + ObjectIdPtr oip; + SeqFeatPtr link_sfp; + Char buf [32]; + CharPtr str = NULL; + + if (sfp1 == NULL) return; + + prevlink = (SeqFeatXrefPtr PNTR) &(sfp1->xref); + xref = sfp1->xref; + while (xref != NULL) { + next = xref->next; + link_sfp = NULL; + + if (xref->id.choice == 3) { + oip = (ObjectIdPtr) xref->id.value.ptrvalue; + if (oip != NULL) { + if (StringDoesHaveText (oip->str)) { + str = oip->str; + } else { + sprintf (buf, "%ld", (long) oip->id); + str = buf; + } + link_sfp = SeqMgrGetFeatureByFeatID (sfp1->idx.entityID, NULL, str, NULL, NULL); + } + } + if (link_sfp == sfp2) { + *prevlink = xref->next; + xref->next = NULL; + MemFree (xref); + } else { + prevlink = (SeqFeatXrefPtr PNTR) &(xref->next); + } + + xref = next; + } +} + + +NLM_EXTERN void LinkTwoFeatures (SeqFeatPtr dst, SeqFeatPtr sfp) + +{ + ChoicePtr cp; + ObjectIdPtr oip; + SeqFeatXrefPtr xref, prev_xref, next_xref; + SeqFeatPtr old_match; + + if (dst == NULL || sfp == NULL) return; + + cp = &(dst->id); + if (cp == NULL) return; + if (cp->choice == 3) { + /* don't create a duplicate xref, remove links to other features */ + xref = sfp->xref; + prev_xref = NULL; + while (xref != NULL) { + next_xref = xref->next; + if (xref->id.choice == 3 && xref->id.value.ptrvalue != NULL) { + if (ObjectIdMatch (cp->value.ptrvalue, xref->id.value.ptrvalue)) { + /* already have this xref */ + return; + } else { + old_match = SeqMgrGetFeatureByFeatID (sfp->idx.entityID, NULL, NULL, xref, NULL); + RemoveFeatureLink (sfp, old_match); + RemoveFeatureLink (old_match, sfp); + } + } else { + prev_xref = xref; + } + xref = next_xref; + } + + oip = (ObjectIdPtr) cp->value.ptrvalue; + if (oip != NULL) { + oip = AsnIoMemCopy (oip, (AsnReadFunc) ObjectIdAsnRead, + (AsnWriteFunc) ObjectIdAsnWrite); + if (oip != NULL) { + xref = SeqFeatXrefNew (); + if (xref != NULL) { + xref->id.choice = 3; + xref->id.value.ptrvalue = (Pointer) oip; + xref->next = sfp->xref; + sfp->xref = xref; + } + } + } + } +} + + +static void MakeFeatureXrefsFromProteinIdQualsCallback (SeqFeatPtr sfp, Pointer data) +{ + GBQualPtr gbq; + SeqIdPtr sip; + BioseqPtr pbsp; + SeqFeatPtr cds; + CharPtr product; + ProtRefPtr prp; + SeqEntryPtr sep; + + if (sfp == NULL || sfp->idx.subtype != FEATDEF_mRNA || (sep = (SeqEntryPtr) data) == NULL) { + return; + } + + for (gbq = sfp->qual; gbq != NULL; gbq = gbq->next) { + if (StringICmp (gbq->qual, "protein_id") == 0 || StringICmp (gbq->qual, "orig_protein_id") == 0) { + sip = CreateSeqIdFromText (gbq->val, sep); + pbsp = BioseqFind (sip); + cds = SeqMgrGetCDSgivenProduct (pbsp, NULL); + if (cds != NULL) { + LinkTwoFeatures (cds, sfp); + LinkTwoFeatures (sfp, cds); + product = GetRNAProductString(sfp, NULL); + if (StringHasNoText (product)) { + prp = GetProtRefForFeature (cds); + if (prp != NULL && prp->name != NULL && !StringHasNoText (prp->name->data.ptrvalue)) { + SetRNAProductString (sfp, NULL, prp->name->data.ptrvalue, ExistingTextOption_replace_old); + } + } + product = MemFree (product); + } + } + } +} + + +NLM_EXTERN void MakeFeatureXrefsFromProteinIdQuals (SeqEntryPtr sep) +{ + /* assign feature IDs, so that we can create xrefs that use them */ + AssignFeatureIDs (sep); + + VisitFeaturesInSep (sep, (Pointer) sep, MakeFeatureXrefsFromProteinIdQualsCallback); +} + + +static void MakeFeatureXrefsFromTranscriptIdQualsCallback (SeqFeatPtr sfp, Pointer data) +{ + GBQualPtr gbq; + SeqIdPtr sip; + BioseqPtr pbsp; + SeqFeatPtr cds; + CharPtr product; + ProtRefPtr prp; + SeqEntryPtr sep; + + if (sfp == NULL || sfp->idx.subtype != FEATDEF_mRNA || (sep = (SeqEntryPtr) data) == NULL) { + return; + } + + for (gbq = sfp->qual; gbq != NULL; gbq = gbq->next) { + if (StringICmp (gbq->qual, "transcript_id") == 0 || StringICmp (gbq->qual, "orig_transcript_id") == 0) { + sip = CreateSeqIdFromText (gbq->val, sep); + pbsp = BioseqFind (sip); + cds = SeqMgrGetCDSgivenProduct (pbsp, NULL); + if (cds != NULL) { + LinkTwoFeatures (cds, sfp); + LinkTwoFeatures (sfp, cds); + product = GetRNAProductString(sfp, NULL); + if (StringHasNoText (product)) { + prp = GetProtRefForFeature (cds); + if (prp != NULL && prp->name != NULL && !StringHasNoText (prp->name->data.ptrvalue)) { + SetRNAProductString (sfp, NULL, prp->name->data.ptrvalue, ExistingTextOption_replace_old); + } + } + product = MemFree (product); + } + } + } +} + + +NLM_EXTERN void MakeFeatureXrefsFromTranscriptIdQuals (SeqEntryPtr sep) +{ + /* assign feature IDs, so that we can create xrefs that use them */ + AssignFeatureIDs (sep); + + VisitFeaturesInSep (sep, (Pointer) sep, MakeFeatureXrefsFromTranscriptIdQualsCallback); +} + + +static void FinishHalfXrefsCallback (SeqFeatPtr sfp, Pointer data) +{ + SeqFeatPtr other; + SeqFeatXrefPtr xref, xref_other; + Boolean has_other_xref; + + if (sfp == NULL) { + return; + } + + xref = sfp->xref; + while (xref != NULL) { + if (xref->id.choice == 3) { + other = SeqMgrGetFeatureByFeatID (sfp->idx.entityID, NULL, NULL, xref, NULL); + if (other != NULL) { + xref_other = other->xref; + has_other_xref = FALSE; + while (xref_other != NULL && !has_other_xref) { + if (xref_other->id.choice == 3) { + has_other_xref = TRUE; + } + xref_other = xref_other->next; + } + if (!has_other_xref) { + LinkTwoFeatures (sfp, other); + } + } + } + xref = xref->next; + } +} + + +NLM_EXTERN void FinishHalfXrefs (SeqEntryPtr sep) +{ + VisitFeaturesInSep (sep, (Pointer) sep, FinishHalfXrefsCallback); +} + + +NLM_EXTERN Uint1 GetAaFromtRNA (tRNAPtr trp) +{ + Uint1 aa; + Uint1 from; + SeqMapTablePtr smtp; + + if (trp == NULL) { + return 0; + } + + aa = 0; + if (trp->aatype == 2) { + aa = trp->aa; + } else { + from = 0; + switch (trp->aatype) { + case 0: + from = 0; + break; + case 1: + from = Seq_code_iupacaa; + break; + case 2: + from = Seq_code_ncbieaa; + break; + case 3: + from = Seq_code_ncbi8aa; + break; + case 4: + from = Seq_code_ncbistdaa; + break; + default: + break; + } + smtp = SeqMapTableFind (Seq_code_ncbieaa, from); + if (smtp != NULL) { + aa = SeqMapTableConvert (smtp, trp->aa); + } + } + return aa; +} + + +NLM_EXTERN CharPtr GetCodesFortRNA (SeqFeatPtr sfp, Int2 *pCode) +{ + BioseqPtr bsp; + Int2 code = 0; + GeneticCodePtr gncp; + ValNodePtr vnp; + CharPtr codes = NULL; + + if (sfp == NULL) { + return NULL; + } + + /* find genetic code table */ + + bsp = GetBioseqGivenSeqLoc (sfp->location, sfp->idx.entityID); + BioseqToGeneticCode (bsp, &code, NULL, NULL, NULL, 0, NULL); + + gncp = GeneticCodeFind (code, NULL); + if (gncp == NULL) { + gncp = GeneticCodeFind (1, NULL); + code = 1; + } + if (gncp != NULL) { + for (vnp = (ValNodePtr) gncp->data.ptrvalue; vnp != NULL; vnp = vnp->next) { + if (vnp->choice != 3) continue; + codes = (CharPtr) vnp->data.ptrvalue; + break; + } + } + if (pCode != NULL) { + *pCode = code; + } + return codes; +} + + +static Boolean DoesCodonMatchAminoAcid (Uint1 aa, Uint1 index, CharPtr codes) +{ + Uint1 taa; + Boolean rval = FALSE; + + if (aa == 0 || aa == 255 || codes == NULL) + { + return TRUE; + } + taa = codes [index]; + + if (taa == aa) + { + rval = TRUE; + } + /* selenocysteine normally uses TGA (14), so ignore without requiring exception in record */ + else if (aa == 'U' && taa == '*' && index == 14) + { + rval = TRUE; + } + /* pyrrolysine normally uses TAG (11) in archaebacteria, ignore without requiring exception */ + else if (aa == 'O' && taa == '*' && index == 11) { + rval = TRUE; + } + /* TAA (10) is not yet known to be used for an exceptional amino acid, but the night is young */ + + return rval; +} + + +static Boolean IsATGC (Char ch) +{ + if (ch == 'A' || ch == 'T' || ch == 'G' || ch == 'C') { + return TRUE; + } else { + return FALSE; + } +} + + +static Char s_comp (Char ch) +{ + if (ch == 'A') { + return 'T'; + } else if (ch == 'G') { + return 'C'; + } else if (ch == 'C') { + return 'G'; + } else if (ch == 'T') { + return 'A'; + } else { + return 'N'; + } +} + + +static CharPtr GetFlipCodonLoggingInfo (SeqFeatPtr sfp) +{ + SeqFeatPtr gene = NULL; + GeneRefPtr grp = NULL; + ValNode vn; + CharPtr txt = NULL; + + GetGeneInfoForFeature (sfp, &grp, &gene); + if (grp != NULL && !StringHasNoText (grp->locus_tag)) { + txt = StringSave (grp->locus_tag); + } else { + MemSet (&vn, 0, sizeof (ValNode)); + vn.choice = OBJ_SEQFEAT; + vn.data.ptrvalue = sfp; + txt = GetDiscrepancyItemText (&vn); + } + return txt; +} + + +static Int4 CountCodonsRecognized (tRNAPtr trp) +{ + Int4 num = 0, i; + + if (trp == NULL) { + return 0; + } + for (i = 0; i < 6; i++) { + if (trp->codon [i] < 64) { + num++; + } + } + return num; +} + + +static Int4 CountMatchingCodons (tRNAPtr trp, Uint1 aa, CharPtr codes) +{ + Int4 num = 0, i; + + if (trp == NULL) { + return 0; + } + for (i = 0; i < 6; i++) { + if (trp->codon [i] < 64) { + if (DoesCodonMatchAminoAcid (aa, trp->codon[i], codes)) { + num++; + } + } + } + + return num; +} + + +static Int4 CountFlippableCodons (tRNAPtr trp, Uint1 aa, CharPtr codes, Int2 code) +{ + Int4 num = 0, i; + Int2 index; + Uint1 codon [4]; + Uint1 rcodon [4]; + + if (trp == NULL) { + return 0; + } + /* Note - it is important to set the fourth character in the codon array to NULL + * because CodonForIndex only fills in the three characters of actual codon, + * so if you StringCpy the codon array and the NULL character is not found after + * the three codon characters, you will write in memory you did not intend to. + */ + codon [3] = 0; + rcodon [3] = 0; + for (i = 0; i < 6; i++) + { + if (trp->codon [i] < 64 + && !DoesCodonMatchAminoAcid (aa, trp->codon[i], codes) + && CodonForIndex (trp->codon [i], Seq_code_iupacna, codon) + && IsATGC(codon[0]) + && IsATGC(codon[1]) + && IsATGC(codon[2])) + { + rcodon[0] = s_comp(codon[2]); + rcodon[1] = s_comp(codon[1]); + rcodon[2] = s_comp(codon[0]); + index = IndexForCodon (rcodon, code); + if (index < 64 && DoesCodonMatchAminoAcid(aa, index, codes)) + { + num++; + } + } + } + + return num; +} + + +static Int4 FlipFlippableCodons (tRNAPtr trp, Uint1 aa, CharPtr codes, Int2 code) +{ + Int4 num = 0, i; + Int2 index; + Uint1 codon [4]; + Uint1 rcodon [4]; + + if (trp == NULL) { + return 0; + } + /* Note - it is important to set the fourth character in the codon array to NULL + * because CodonForIndex only fills in the three characters of actual codon, + * so if you StringCpy the codon array and the NULL character is not found after + * the three codon characters, you will write in memory you did not intend to. + */ + codon [3] = 0; + rcodon [3] = 0; + for (i = 0; i < 6; i++) + { + if (trp->codon [i] < 64 + && !DoesCodonMatchAminoAcid (aa, trp->codon[i], codes) + && CodonForIndex (trp->codon [i], Seq_code_iupacna, codon) + && IsATGC(codon[0]) + && IsATGC(codon[1]) + && IsATGC(codon[2])) + { + rcodon[0] = s_comp(codon[2]); + rcodon[1] = s_comp(codon[1]); + rcodon[2] = s_comp(codon[0]); + index = IndexForCodon (rcodon, code); + if (index < 64 && DoesCodonMatchAminoAcid(aa, index, codes)) + { + trp->codon[i] = index; + num++; + } + } + } + + return num; +} + + +static Boolean IgnoretRNACodonRecognized (SeqFeatPtr sfp) +{ + if (sfp == NULL + || StringISearch (sfp->except_text, "RNA editing") != NULL + || StringISearch (sfp->except_text, "modified codon recognition") != NULL) + { + return TRUE; + } + else + { + return FALSE; + } +} + + +static void FlipCodonRecognizedCallback (SeqFeatPtr sfp, Pointer data) +{ + RnaRefPtr rrp; + tRNAPtr trp; + Uint1 aa; + CharPtr txt; + LogInfoPtr lip; + Int2 code = 0; + CharPtr codes = NULL; + Int4 num_codons, num_match, num_flippable; + + if (IgnoretRNACodonRecognized(sfp) + || sfp->idx.subtype != FEATDEF_tRNA + || (rrp = (RnaRefPtr) sfp->data.value.ptrvalue) == NULL + || rrp->ext.choice != 2 + || (trp = (tRNAPtr)(rrp->ext.value.ptrvalue)) == NULL) + { + return; + } + + num_codons = CountCodonsRecognized (trp); + if (num_codons == 0) { + return; + } + + lip = (LogInfoPtr) data; + + aa = GetAaFromtRNA (trp); + + /* find genetic code table */ + codes = GetCodesFortRNA (sfp, &code); + + if (codes == NULL) return; + + num_match = CountMatchingCodons (trp, aa, codes); + if (num_codons == num_match) { + return; + } else if (num_codons > 1) { + if (lip != NULL) + { + if (lip->fp != NULL) + { + /* text for log */ + txt = GetFlipCodonLoggingInfo (sfp); + fprintf (lip->fp, "Unable to flip bad codon_recognized for %s\n", txt); + txt = MemFree (txt); + } + lip->data_in_log = TRUE; + } + } else { + num_flippable = CountFlippableCodons(trp, aa, codes, code); + if (num_flippable == num_codons) { + FlipFlippableCodons (trp, aa, codes, code); + } else { + if (lip != NULL) + { + if (lip->fp != NULL) + { + /* text for log */ + txt = GetFlipCodonLoggingInfo (sfp); + fprintf (lip->fp, "Unable to flip bad codon_recognized for %s\n", txt); + txt = MemFree (txt); + } + lip->data_in_log = TRUE; + } + } + } +} + + +NLM_EXTERN void FlipCodonRecognizedInSeqEntry (SeqEntryPtr sep, LogInfoPtr lip) +{ + VisitFeaturesInSep (sep, lip, FlipCodonRecognizedCallback); +} + + +static void RemoveBadCodonRecognizedCallback (SeqFeatPtr sfp, Pointer data) +{ + RnaRefPtr rrp; + tRNAPtr trp; + Int2 j, k; + Uint1 aa; + Uint1 codon [4]; + Uint1 rcodon [4]; + CharPtr txt; + LogInfoPtr lip; + Int2 code = 0; + CharPtr codes = NULL; + Int4 num_codons, num_match; + + if (IgnoretRNACodonRecognized(sfp) + || sfp->idx.subtype != FEATDEF_tRNA + || (rrp = (RnaRefPtr) sfp->data.value.ptrvalue) == NULL + || rrp->ext.choice != 2 + || (trp = (tRNAPtr)(rrp->ext.value.ptrvalue)) == NULL) + { + return; + } + + num_codons = CountCodonsRecognized (trp); + if (num_codons == 0) { + return; + } + + lip = (LogInfoPtr) data; + + aa = GetAaFromtRNA (trp); + + /* find genetic code table */ + codes = GetCodesFortRNA (sfp, &code); + + if (codes == NULL) return; + + num_match = CountMatchingCodons (trp, aa, codes); + if (num_match == num_codons) { + return; + } + + /* Note - it is important to set the fourth character in the codon array to NULL + * because CodonForIndex only fills in the three characters of actual codon, + * so if you StringCpy the codon array and the NULL character is not found after + * the three codon characters, you will write in memory you did not intend to. + */ + codon [3] = 0; + rcodon [3] = 0; + + for (j = 0; j < 6; j++) + { + if (trp->codon [j] < 64) + { + if (DoesCodonMatchAminoAcid (aa, trp->codon[j], codes)) + { + /* already ok - skip it */ + } + else if (CodonForIndex (trp->codon [j], Seq_code_iupacna, codon) + && IsATGC(codon[0]) + && IsATGC(codon[1]) + && IsATGC(codon[2])) + { + for (k = j + 1; k < 6; k++) + { + trp->codon[k - 1] = trp->codon[k]; + } + trp->codon[5] = 255; + if (lip != NULL) + { + if (lip->fp != NULL) + { + /* text for log */ + txt = GetFlipCodonLoggingInfo (sfp); + fprintf (lip->fp, "Removed codon_recognized '%s' for %s\n", codon, txt); + txt = MemFree (txt); + } + lip->data_in_log = TRUE; + } + /* push index down, so we don't skip over a codon */ + j--; + } + } + } +} + + +NLM_EXTERN void RemoveBadCodonRecognizedInSeqEntry (SeqEntryPtr sep, LogInfoPtr lip) +{ + VisitFeaturesInSep (sep, lip, RemoveBadCodonRecognizedCallback); +} diff --git a/api/sqnutil2.c b/api/sqnutil2.c index 1f966f30..19b6aff6 100644 --- a/api/sqnutil2.c +++ b/api/sqnutil2.c @@ -29,7 +29,7 @@ * * Version Creation Date: 9/2/97 * -* $Revision: 6.439 $ +* $Revision: 6.502 $ * * File Description: * @@ -1876,6 +1876,39 @@ static CharPtr sqntag_biosrc_origin_list [] = { "synthetic", "other", NULL }; + +static void SqnTagParsePrimers (SqnTagPtr stp, BioSourcePtr biop) +{ + ValNode quals[4]; + Int4 qual_types[] = { SUBSRC_fwd_primer_name, SUBSRC_fwd_primer_seq, SUBSRC_rev_primer_name, SUBSRC_rev_primer_seq}; + Int4 qual_defs[] = { Source_qual_fwd_primer_name, Source_qual_fwd_primer_seq, Source_qual_rev_primer_name, Source_qual_rev_primer_seq}; + Int4 num_quals = 4, qual; + Int4 i, j; + + if (stp == NULL || stp->num_tags == 0 || biop == NULL) return; + + for (i = 0; i < num_quals; i++) { + MemSet (quals + i, 0, sizeof (ValNode)); + quals[i].choice = SourceQualChoice_textqual; + quals[i].data.intvalue = qual_defs[i]; + } + + for (i = 0; i < stp->num_tags; i++) { + if (stp->tag [i] != NULL) { + qual = EquivalentSubSourceEx (stp->tag[i], TRUE); + for (j = 0; j < num_quals; j++) { + if (qual == qual_types[j]) { + stp->used [i] = TRUE; + SetSourceQualInBioSource (biop, quals + j, NULL, stp->val[i], ExistingTextOption_add_qual); + break; + } + } + } + } + +} + + NLM_EXTERN BioSourcePtr ParseTitleIntoBioSource ( SqnTagPtr stp, CharPtr organism, @@ -1968,6 +2001,8 @@ NLM_EXTERN BioSourcePtr ParseTitleIntoBioSource ( SqnTagFindSubSourceQuals (stp, biop); + SqnTagParsePrimers (stp, biop); + list = SqnTagFindMultiple (stp, "db_xref"); for (list_vnp = list; list_vnp != NULL; list_vnp = list_vnp->next) { str = list_vnp->data.ptrvalue; @@ -3312,7 +3347,7 @@ static ValNodePtr ParseContigOrFeatureTableString (CharPtr contigs, Boolean tabD { Char ch; - Int2 i, j, k; + Int4 i, j, k; CharPtr str; Char tmp [2048]; ValNodePtr vnp; @@ -3345,12 +3380,16 @@ static ValNodePtr ParseContigOrFeatureTableString (CharPtr contigs, Boolean tabD str [j + k] = '\0'; i += j + k + 1; } - StringNCpy_0 (tmp, str + k, sizeof (tmp)); - SqnTrimSpacesAroundString (tmp); - if (HasNoText (tmp)) { - ValNodeAdd (&vnp); + if (StringLen (str + k) < sizeof (tmp)) { + StringNCpy_0 (tmp, str + k, sizeof (tmp)); + SqnTrimSpacesAroundString (tmp); + if (HasNoText (tmp)) { + ValNodeAdd (&vnp); + } else { + ValNodeCopyStr (&vnp, 0, tmp); + } } else { - ValNodeCopyStr (&vnp, 0, tmp); + ValNodeAddPointer (&vnp, 0, StringSave (str)); } } if (vnp != NULL) { @@ -3559,7 +3598,7 @@ NLM_EXTERN Int4 ReadSequenceAsnFile ( fp = FileOpen (inputFile, binary? "rb" : "r"); #endif if (fp == NULL) { - Message (MSG_ERROR, "FileOpen failed for input file '%s'", inputFile); + Message (MSG_POSTERR, "FileOpen failed for input file '%s'", inputFile); return index; } @@ -4433,6 +4472,11 @@ static ByteStorePtr ReadFlatFileDNA (FileCachePtr fcp, BoolPtr protPtr, Boolean Int4 bad_char [256]; Boolean non_prot_char [256]; Int4 num_bad = 0; + Boolean is_nuc_char [256]; + Boolean is_prot_char [256]; + CharPtr nuc_list = "atgcbdhkmnrsuvwy"; + CharPtr prot_list = "abcdefghijklmnopqrstuvwxyz"; + CharPtr ptr; if (fcp == NULL) return NULL; bs = BSNew (1000); @@ -4443,6 +4487,30 @@ static ByteStorePtr ReadFlatFileDNA (FileCachePtr fcp, BoolPtr protPtr, Boolean *perr = FALSE; } + MemSet (is_nuc_char, 0, sizeof (is_nuc_char)); + + ptr = nuc_list; + ch = *ptr; + while (ch != '\0') { + is_nuc_char [(int) ch] = TRUE; + ch = TO_UPPER (ch); + is_nuc_char [(int) ch] = TRUE; + ptr++; + ch = *ptr; + } + + MemSet (is_prot_char, 0, sizeof (is_prot_char)); + + ptr = prot_list; + ch = *ptr; + while (ch != '\0') { + is_prot_char [(int) ch] = TRUE; + ch = TO_UPPER (ch); + is_prot_char [(int) ch] = TRUE; + ptr++; + ch = *ptr; + } + if (forceNuc) { isProt = FALSE; } else if (forceProt) { @@ -4540,8 +4608,8 @@ static ByteStorePtr ReadFlatFileDNA (FileCachePtr fcp, BoolPtr protPtr, Boolean noErrors = FALSE; } } else { - if (IsNonSeqChar (ch, isProt)) - { + /* if (IsNonSeqChar (ch, isProt)) */ + if ((isProt && (! is_prot_char [(int) ch])) || ((! isProt) && (! is_nuc_char [(int) ch]))) { bad_char [(int) ch] ++; noErrors = FALSE; } @@ -4632,10 +4700,27 @@ static SimpleSeqPtr ByteStoreToSimpleSeq (ByteStorePtr bs, CharPtr seqid, CharPt #define qualVal field [QUAL_VAL_TAG] #define strandStr field [STRAND_TAG] + +static Char UnexpectedCharInPositionString (CharPtr str) +{ + CharPtr cp; + + if (str == NULL) { + return 0; + } + + cp = str; + while (*cp == '<' || *cp == '>' || *cp == '^' || isdigit (*cp) || *cp == '-') { + cp++; + } + return *cp; +} + + static Boolean ParseFeatTableLine (CharPtr line, Int4Ptr startP, Int4Ptr stopP, BoolPtr partial5P, BoolPtr partial3P, BoolPtr ispointP, BoolPtr isminusP, CharPtr PNTR featP, CharPtr PNTR qualP, - CharPtr PNTR valP, Int4 offset) + CharPtr PNTR valP, Int4 offset, Int4 lin_num) { Boolean badNumber; @@ -4653,6 +4738,7 @@ static Boolean ParseFeatTableLine (CharPtr line, Int4Ptr startP, Int4Ptr stopP, Int4 tmp; long int val; ValNodePtr vnp; + Char badch; if (line == NULL || HasNoText (line)) return FALSE; if (*line == '[') return FALSE; /* offset and other instructions encoded in brackets */ @@ -4676,6 +4762,10 @@ static Boolean ParseFeatTableLine (CharPtr line, Int4Ptr startP, Int4Ptr stopP, badNumber = FALSE; str = startStr; + badch = UnexpectedCharInPositionString (str); + if (badch != 0) { + Message (MSG_POSTERR, "Unexpected characters in from column of line %d - first bad character is '%c'", lin_num, badch); + } if (str != NULL && *str == '<') { partial5 = TRUE; str++; @@ -4692,6 +4782,10 @@ static Boolean ParseFeatTableLine (CharPtr line, Int4Ptr startP, Int4Ptr stopP, badNumber = TRUE; } str = stopStr; + badch = UnexpectedCharInPositionString (str); + if (badch != 0) { + Message (MSG_POSTERR, "Unexpected characters in to column of line %d - first bad character is '%c'", lin_num, badch); + } if (str != NULL && *str == '>') { partial3 = TRUE; str++; @@ -5266,7 +5360,7 @@ static Boolean ParseQualIntoBioSource (SeqFeatPtr sfp, CharPtr qual, CharPtr val return TRUE; } - found = EquivalentOrgMod (str); + found = EquivalentOrgMod (qual); if (found > 0) { if (found == 32) { found = 253; @@ -5286,7 +5380,7 @@ static Boolean ParseQualIntoBioSource (SeqFeatPtr sfp, CharPtr qual, CharPtr val return TRUE; } - found = EquivalentSubSource (str); + found = EquivalentSubSource (qual); if (found > 0) { ssp = SubSourceNew (); @@ -5638,6 +5732,14 @@ static Boolean ParseQualIntoGeneOntologyUserObject (SeqFeatPtr sfp, CharPtr qual return FALSE; } +static CharPtr okayCategoryPrefixes [] = { + "", + "COORDINATES:", + "DESCRIPTION:", + "EXISTENCE:", + NULL +}; + static CharPtr okayInferencePrefixes [] = { "", "similar to sequence", @@ -5659,10 +5761,25 @@ static Boolean InvalidInference (CharPtr str) { Int2 best, j; + Char ch; size_t len; if (StringHasNoText (str)) return TRUE; + for (j = 0; okayCategoryPrefixes [j] != NULL; j++) { + len = StringLen (okayCategoryPrefixes [j]); + if (StringNICmp (str, okayCategoryPrefixes [j], len) != 0) continue; + str += len; + ch = *str; + while (ch == ' ') { + str++; + ch = *str; + } + break; + } + + if (StringHasNoText (str)) return TRUE; + best = -1; for (j = 0; okayInferencePrefixes [j] != NULL; j++) { len = StringLen (okayInferencePrefixes [j]); @@ -6654,6 +6771,11 @@ static void ParseWhitespaceIntoTabs (CharPtr line) StringCat (str, "\t\t\t"); TrimSpacesAroundString (ptr); tmp = TokenizeAtWhiteSpace (ptr); + if (tmp != NULL) { + while (isspace (*tmp)) { + tmp++; + } + } StringCat (str, ptr); StringCat (str, "\t"); StringCat (str, tmp); @@ -6680,7 +6802,40 @@ static void ParseWhitespaceIntoTabs (CharPtr line) MemFree (str); } -static SeqAnnotPtr ReadFeatureTable (FileCachePtr fcp, CharPtr seqid, CharPtr annotname) + +static CharPtr ReadTheRestOfTheLine (FileCachePtr fcp, CharPtr original_buffer) +{ + Char line [2047]; + CharPtr str; + Boolean nonewline = TRUE; + ValNodeBlock extra; + Int4 len = 1; + ValNodePtr vnp; + + InitValNodeBlock(&extra, NULL); + ValNodeAddPointerToEnd (&extra, 0, StringSave(original_buffer)); + len += StringLen (original_buffer); + while (nonewline) { + nonewline = FALSE; + str = FileCacheReadLine (fcp, line, sizeof (line), &nonewline); + if (str == NULL) { + nonewline = FALSE; + } else { + ValNodeAddPointerToEnd (&extra, 0, StringSave (line)); + len += StringLen (line); + } + } + str = (CharPtr) MemNew (sizeof (Char) * len); + *str = 0; + for (vnp = extra.head; vnp != NULL; vnp = vnp->next) { + StringCat(str, vnp->data.ptrvalue); + } + str[len - 1] = 0; + return str; +} + + +static SeqAnnotPtr ReadFeatureTableEx (FileCachePtr fcp, CharPtr seqid, CharPtr annotname, Int4Ptr p_line) { Boolean allowWhitesp = TRUE; @@ -6728,6 +6883,7 @@ static SeqAnnotPtr ReadFeatureTable (FileCachePtr fcp, CharPtr seqid, CharPtr an CharPtr str; CharPtr tmp; CharPtr val; + Boolean free_str = FALSE; if (fcp == NULL || fcp->fp == NULL || seqid == NULL) return NULL; sip = SeqIdFindBest (MakeSeqID (seqid), 0); @@ -6735,6 +6891,18 @@ static SeqAnnotPtr ReadFeatureTable (FileCachePtr fcp, CharPtr seqid, CharPtr an pos = FileCacheTell (fcp); str = FileCacheReadLine (fcp, line, sizeof (line), &nonewline); + if (nonewline) { + str = ReadTheRestOfTheLine (fcp, line); + if (StringDoesHaveText (str)) { + free_str = TRUE; + } else { + str = MemFree (str); + } + } + + if (p_line != NULL) { + lin_num = *p_line; + } lin_num++; while (str != NULL) { @@ -6746,30 +6914,42 @@ static SeqAnnotPtr ReadFeatureTable (FileCachePtr fcp, CharPtr seqid, CharPtr an endsinspace = TRUE; } - if (! HasNoText (line)) { + if (! HasNoText (str)) { - if (StringNCmp (line, ">", 1) == 0 || - StringNCmp (line, "LOCUS ", 6) == 0 || - StringNCmp (line, "ID ", 3) == 0 || - StringStr (line, "::=") != NULL) { + if (StringNCmp (str, ">", 1) == 0 || + StringNCmp (str, "LOCUS ", 6) == 0 || + StringNCmp (str, "ID ", 3) == 0 || + StringStr (str, "::=") != NULL) { FileCacheSeek (fcp, pos); SeqIdFree (sip); + if (p_line != NULL) { + *p_line = lin_num; + } + if (free_str) { + str = MemFree (str); + } return sap; - } else if (StringNCmp (line, "//", 2) == 0) { + } else if (StringNCmp (str, "//", 2) == 0) { SeqIdFree (sip); + if (p_line != NULL) { + *p_line = lin_num; + } + if (free_str) { + str = MemFree (str); + } return sap; } if (allowWhitesp) { - ParseWhitespaceIntoTabs (line); + ParseWhitespaceIntoTabs (str); } feat = NULL; qual = NULL; val = NULL; - if (*line == '[') { - stp = SqnTagParse (line); + if (*str == '[') { + stp = SqnTagParse (str); if (stp != NULL) { tmp = SqnTagFind (stp, "offset"); if (tmp != NULL) { @@ -6780,14 +6960,14 @@ static SeqAnnotPtr ReadFeatureTable (FileCachePtr fcp, CharPtr seqid, CharPtr an } SqnTagFree (stp); - } else if (StringNICmp (line, "ORDER", 5) == 0) { + } else if (StringNICmp (str, "ORDER", 5) == 0) { if (sfp != NULL) { PutNullsBetween (sfp->location); } - } else if (ParseFeatTableLine (line, &start, &stop, &partial5, &partial3, &ispoint, - &isminus, &feat, &qual, &val, offset)) { + } else if (ParseFeatTableLine (str, &start, &stop, &partial5, &partial3, &ispoint, + &isminus, &feat, &qual, &val, offset, lin_num)) { if (feat != NULL && start >= 0 && stop >= 0) { if (sap == NULL) { @@ -7069,6 +7249,8 @@ static SeqAnnotPtr ReadFeatureTable (FileCachePtr fcp, CharPtr seqid, CharPtr an ErrPostEx (SEV_ERROR, ERR_SEQ_FEAT_ImpFeatBadLoc, "Bad location on feature %s (start %ld, stop %ld)", feat, (long) start, (long) stop); } + } else { + Message (MSG_POSTERR, "Unrecognized line in feature table: %s", str); } /* ParseFeatTableLine copies these three strings, so free here */ @@ -7079,6 +7261,8 @@ static SeqAnnotPtr ReadFeatureTable (FileCachePtr fcp, CharPtr seqid, CharPtr an } +#if 0 + /* commented out - always read in entire line now */ /* if humongously long line /note, now extends by concatenation */ while (nonewline && str != NULL) { @@ -7101,16 +7285,45 @@ static SeqAnnotPtr ReadFeatureTable (FileCachePtr fcp, CharPtr seqid, CharPtr an } } } +#endif pos = FileCacheTell (fcp); + if (free_str) { + str = MemFree (str); + free_str = FALSE; + } + str = FileCacheReadLine (fcp, line, sizeof (line), &nonewline); + if (nonewline) { + str = ReadTheRestOfTheLine (fcp, line); + if (StringDoesHaveText (str)) { + free_str = TRUE; + } else { + str = MemFree (str); + } + } else { + free_str = FALSE; + } + lin_num++; } + if (free_str) { + str = MemFree (str); + } + SeqIdFree (sip); + if (p_line != NULL) { + *p_line = lin_num; + } return sap; } +static SeqAnnotPtr ReadFeatureTable (FileCachePtr fcp, CharPtr seqid, CharPtr annotname) +{ + return ReadFeatureTableEx (fcp, seqid, annotname, NULL); +} + /* ReadVecScreenTable reads lines of vector screen output into a Seq-annot. */ static SeqAnnotPtr ReadVecScreenTable (FileCachePtr fcp, CharPtr seqid, CharPtr annotname) @@ -7754,15 +7967,22 @@ typedef struct setatp { AsnTypePtr atp_class; AsnTypePtr atp_seqset; AsnTypePtr atp_se; + AsnTypePtr atp_descr; + AsnTypePtr atp_descr_e; + AsnTypePtr atp_set_desc; + AsnTypePtr atp_bioseq_desc; AsnTypePtr atp_desc; AsnTypePtr atp_annot; + AsnTypePtr atp_bioseq_annot; AsnTypePtr atp_annot_e; + AsnTypePtr atp_bioseq_annot_e; AsnTypePtr atp_id; AsnTypePtr atp_coll; AsnTypePtr atp_date; AsnTypePtr atp_level; AsnTypePtr atp_release; AsnTypePtr atp_bss; + AsnTypePtr atp_bioseq; AsnTypePtr atp_seqentry; AsnTypePtr atp_seq; AsnTypePtr atp_set; @@ -7772,6 +7992,8 @@ typedef struct setatp { AsnTypePtr atp_seqsubmit_data_entries_E; AsnTypePtr atp_seqsubmit_data_entries; AsnTypePtr atp_seqsubmit_data_entries_set; + AsnTypePtr atp_bioseq_id_E; + AsnTypePtr atp_seqdesc_pub; } SetAtpData, PNTR SetAtpPtr; @@ -7781,9 +8003,15 @@ static SetAtpPtr GetSetAtp (void) AsnTypePtr atp_class; AsnTypePtr atp_seqset; AsnTypePtr atp_se; + AsnTypePtr atp_descr; + AsnTypePtr atp_descr_e; + AsnTypePtr atp_set_desc; + AsnTypePtr atp_bioseq_desc; AsnTypePtr atp_desc; AsnTypePtr atp_annot; + AsnTypePtr atp_bioseq_annot; AsnTypePtr atp_annot_e; + AsnTypePtr atp_bioseq_annot_e; AsnTypePtr atp_id; AsnTypePtr atp_coll; AsnTypePtr atp_date; @@ -7799,6 +8027,9 @@ static SetAtpPtr GetSetAtp (void) AsnTypePtr atp_seqsubmit_data_entries_E; AsnTypePtr atp_seqsubmit_data_entries; AsnTypePtr atp_seqsubmit_data_entries_set; + AsnTypePtr atp_bioseq; + AsnTypePtr atp_bioseq_id_E; + AsnTypePtr atp_seqdesc_pub; SetAtpPtr sp; amp = AsnAllModPtr (); @@ -7825,30 +8056,72 @@ static SetAtpPtr GetSetAtp (void) return NULL; } + atp_bioseq = AsnFind ("Bioseq"); + if (atp_bioseq == NULL) { + Message (MSG_POSTERR, "Unable to find ASN.1 type Bioseq"); + return NULL; + } + atp_class = AsnFind ("Bioseq-set.class"); if (atp_class == NULL) { Message (MSG_POSTERR, "Unable to find ASN.1 type Bioseq-set.class"); return NULL; } - atp_desc = AsnFind ("Bioseq-set.descr"); - if (atp_desc == NULL) { + atp_descr = AsnFind ("Seq-descr"); + if (atp_descr == NULL) { + Message (MSG_POSTERR, "Unable to find ASN.1 type Seq-descr"); + return NULL; + } + + atp_descr_e = AsnFind ("Seq-descr.E"); + if (atp_descr_e == NULL) { + Message (MSG_POSTERR, "Unable to find ASN.1 type Seq-descr.E"); + return NULL; + } + + atp_set_desc = AsnFind ("Bioseq-set.descr"); + if (atp_set_desc == NULL) { Message (MSG_POSTERR, "Unable to find ASN.1 type Bioseq-set.descr"); return NULL; } + atp_bioseq_desc = AsnFind ("Bioseq.descr"); + if (atp_bioseq_desc == NULL) { + Message (MSG_POSTERR, "Unable to find ASN.1 type Bioseq.descr"); + return NULL; + } + + atp_desc = AsnFind ("Seqdesc"); + if (atp_desc == NULL) { + Message (MSG_POSTERR, "Unable to find ASN.1 type Seqdesc"); + return NULL; + } + atp_annot = AsnFind ("Bioseq-set.annot"); if (atp_annot == NULL) { Message (MSG_POSTERR, "Unable to find ASN.1 type Bioseq-set.annot"); return NULL; } + atp_bioseq_annot = AsnFind ("Bioseq.annot"); + if (atp_bioseq_annot == NULL) { + Message (MSG_POSTERR, "Unable to find ASN.1 type Bioseq.annot"); + return NULL; + } + atp_annot_e = AsnFind ("Bioseq-set.annot.E"); if (atp_annot_e == NULL) { Message (MSG_POSTERR, "Unable to find ASN.1 type Bioseq-set.annot.E"); return NULL; } + atp_bioseq_annot_e = AsnFind ("Bioseq.annot.E"); + if (atp_bioseq_annot_e == NULL) { + Message (MSG_POSTERR, "Unable to find ASN.1 type Bioseq.annot.E"); + return NULL; + } + atp_id = AsnFind ("Bioseq-set.id"); if (atp_id == NULL) { Message (MSG_POSTERR, "Unable to find ASN.1 type Bioseq-set.id"); @@ -7930,21 +8203,39 @@ static SetAtpPtr GetSetAtp (void) return NULL; } + atp_bioseq_id_E = AsnFind ("Bioseq.id.E"); + if (atp_bioseq_id_E == NULL) { + Message (MSG_POSTERR, "Unable to find ASN.1 type Bioseq.id.E"); + return NULL; + } + + atp_seqdesc_pub = AsnFind ("Seqdesc.pub"); + if (atp_seqdesc_pub == NULL) { + Message (MSG_POSTERR, "Unable to find ASN.1 type Seqdesc.pub"); + return NULL; + } sp = (SetAtpPtr) MemNew (sizeof(SetAtpData)); sp->amp = amp; sp->atp_class = atp_class; sp->atp_seqset = atp_seqset; sp->atp_se = atp_se; + sp->atp_descr = atp_descr; + sp->atp_descr_e = atp_descr_e; + sp->atp_set_desc = atp_set_desc; + sp->atp_bioseq_desc = atp_bioseq_desc; sp->atp_desc = atp_desc; sp->atp_annot = atp_annot; + sp->atp_bioseq_annot = atp_bioseq_annot; sp->atp_annot_e = atp_annot_e; + sp->atp_bioseq_annot_e = atp_bioseq_annot_e; sp->atp_id = atp_id; sp->atp_coll = atp_coll; sp->atp_date = atp_date; sp->atp_level = atp_level; sp->atp_release = atp_release; sp->atp_bss = atp_bss; + sp->atp_bioseq = atp_bioseq; sp->atp_seqentry = atp_seqentry; sp->atp_seq = atp_seq; sp->atp_set = atp_set; @@ -7954,6 +8245,8 @@ static SetAtpPtr GetSetAtp (void) sp->atp_seqsubmit_data_entries_E = atp_seqsubmit_data_entries_E; sp->atp_seqsubmit_data_entries = atp_seqsubmit_data_entries; sp->atp_seqsubmit_data_entries_set = atp_seqsubmit_data_entries_set; + sp->atp_bioseq_id_E = atp_bioseq_id_E; + sp->atp_seqdesc_pub = atp_seqdesc_pub; return sp; } @@ -7964,7 +8257,7 @@ static BioseqSetPtr BioseqSetPartialRead (AsnIoPtr aip, AsnTypePtr PNTR orig, Se DataVal av; AsnTypePtr atp, oldatp; BioseqSetPtr bsp=NULL; - SeqEntryPtr curr, next, hold = NULL; + SeqEntryPtr curr, next; if (aip == NULL) @@ -8007,7 +8300,7 @@ static BioseqSetPtr BioseqSetPartialRead (AsnIoPtr aip, AsnTypePtr PNTR orig, Se bsp->date = DateAsnRead(aip, atp); if (bsp->date == NULL) goto erret; } - else if (atp == sp->atp_desc) + else if (atp == sp->atp_set_desc) { bsp->descr = SeqDescrAsnRead(aip, atp); if (bsp->descr == NULL) goto erret; @@ -8180,7 +8473,7 @@ NLM_EXTERN SeqEntryPtr ReadFilteredAsn (FILE *fp, Boolean is_binary, CharPtr acc } else { bsp = BioseqFree (bsp); } - } else if (atp == sp->atp_desc) { + } else if (atp == sp->atp_set_desc) { sdp = SeqDescrAsnRead (aip, atp); ValNodeLink (&(bssp->descr), ValNodeExtractList (&sdp, Seq_descr_pub)); sdp = SeqDescrFree (sdp); @@ -8356,7 +8649,7 @@ static Boolean BioseqSetWriteBefore (BioseqSetPtr bsp, AsnIoPtr aip, AsnTypePtr } if (bsp->descr != NULL) /* Seq-descr optional */ { - if (! SeqDescrAsnWrite(bsp->descr, aip, sp->atp_desc)) goto erret; + if (! SeqDescrAsnWrite(bsp->descr, aip, sp->atp_set_desc)) goto erret; } if (! AsnOpenStruct(aip, sp->atp_seqset, (Pointer)bsp->seq_set)) goto erret; @@ -8397,7 +8690,7 @@ static SeqEntryPtr BioseqSetCopyReplace (AsnIoPtr aip_in, AsnIoPtr aip_out, SeqE DataVal av; AsnTypePtr atp, oldatp; BioseqSetPtr bsp=NULL, edited_set; - SeqEntryPtr curr, next, hold = NULL; + SeqEntryPtr curr, next; Boolean wrote_front = FALSE; SeqDescrPtr tmp; SeqEntryPtr tmp_sep, replace; @@ -8445,7 +8738,7 @@ static SeqEntryPtr BioseqSetCopyReplace (AsnIoPtr aip_in, AsnIoPtr aip_out, SeqE bsp->date = DateAsnRead(aip_in, atp); if (bsp->date == NULL) goto erret; } - else if (atp == sp->atp_desc) + else if (atp == sp->atp_set_desc) { bsp->descr = SeqDescrAsnRead(aip_in, atp); if (bsp->descr == NULL) goto erret; @@ -8615,11 +8908,11 @@ static void SeqEntryCopyReplace (AsnIoPtr aip_in, AsnIoPtr aip_out, SeqEntryPtr tmp_sep = SeqEntryFree (tmp_sep); } } - else if (atp == sp->atp_desc) + else if (atp == sp->atp_set_desc) { /* write out descriptors from holding set instead */ bssp = edited->data.ptrvalue; - SeqDescrAsnWrite (bssp->descr, aip_out, sp->atp_desc); + SeqDescrAsnWrite (bssp->descr, aip_out, sp->atp_set_desc); } sep = SeqEntryFree (sep); @@ -8767,9 +9060,7 @@ NLM_EXTERN DescStreamPtr DescStreamNew (SeqDescPtr sdp, BioseqPtr parent) } if (parent != NULL) { ds->owners = SeqIdDup (SeqIdFindBest (parent->id, SEQID_GENBANK)); - if (ds->owners != NULL && ds->owners->next != NULL) { - ds->owners->next = SeqIdSetFree (ds->owners->next); - } + ds->last_owner = ds->owners; } @@ -8804,6 +9095,80 @@ NLM_EXTERN ValNodePtr DescStreamListFree (ValNodePtr vnp) } +static Boolean DoDescriptorsMatch (SeqDescPtr sdp1, SeqDescPtr sdp2) +{ + if (sdp1 == NULL && sdp2 == NULL) { + return TRUE; + } else if (sdp1 == NULL || sdp2 == NULL) { + return FALSE; + } else if (sdp1->choice != sdp2->choice) { + return FALSE; + } else if (sdp1->choice == Seq_descr_pub) { + return PubdescContentMatch (sdp1->data.ptrvalue, sdp2->data.ptrvalue); + } else { + return AsnIoMemComp (sdp1, sdp2, (AsnWriteFunc) SeqDescAsnWrite); + } +} + + +static void AddToDescStream (ValNodeBlockPtr vb, SeqDescPtr sdp, BioseqPtr parent) +{ + DescStreamPtr dsp_new, dsp; + CharPtr txt; + ValNodePtr vnp, prev = NULL, vnp_new; + Boolean add_to_prev = FALSE; + + if (vb == NULL) { + return; + } + if (vb->head == NULL) { + ValNodeAddPointerToEnd (vb, 0, DescStreamNew (sdp, parent)); + } else { + txt = GetDescriptorLabel(sdp); + vnp = vb->head; + dsp = vnp->data.ptrvalue; + while (vnp != NULL && StringCmp (txt, dsp->text) < 0) { + prev = vnp; + vnp = vnp->next; + if (vnp != NULL) { + dsp = vnp->data.ptrvalue; + } + } + if (vnp == NULL) { + ValNodeAddPointerToEnd (vb, 0, DescStreamNew (sdp, parent)); + } else { + while (vnp != NULL && StringCmp (txt, dsp->text) == 0 + && !(add_to_prev = DoDescriptorsMatch (sdp, dsp->orig)) ) { + prev = vnp; + vnp = vnp->next; + if (vnp != NULL) { + dsp = vnp->data.ptrvalue; + } + } + if (add_to_prev) { + dsp->last_owner->next = SeqIdDup (SeqIdFindBest (parent->id, SEQID_GENBANK)); + dsp->last_owner = dsp->last_owner->next; + } else { + dsp_new = DescStreamNew (sdp, parent); + vnp_new = ValNodeNew (NULL); + vnp_new->data.ptrvalue = dsp_new; + if (prev == NULL) { + vb->head = vnp_new; + vb->tail = vnp_new; + } else { + vnp_new->next = prev->next; + prev->next = vnp_new; + if (vnp_new->next == NULL) { + vb->tail = vnp_new; + } + } + } + txt = MemFree (txt); + } + } +} + + static int DescStreamCompare (DescStreamPtr ds1, DescStreamPtr ds2) { if (ds1 == NULL && ds2 == NULL) { @@ -8854,9 +9219,21 @@ static void RecombineDescStreamList (ValNodePtr PNTR p_list) for (cmp = vnp->next; cmp != NULL && (d2 = (DescStreamPtr) cmp->data.ptrvalue) != NULL && StringCmp (d1->text, d2->text) == 0; cmp = cmp->next) { - if (cmp->choice == 0 && AsnIoMemComp (d1->orig, d2->orig, (AsnWriteFunc) SeqDescAsnWrite)) { - ValNodeLink (&d1->owners, d2->owners); + if (cmp->choice == 0 && DoDescriptorsMatch (d1->orig, d2->orig)) { + /* combine owner lists */ + if (d1->last_owner == NULL) { + d1->owners = d2->owners; + d1->last_owner = d1->owners; + } else { + d1->last_owner->next = d2->owners; + } d2->owners = NULL; + if (d1->last_owner != NULL) { + while (d1->last_owner->next != NULL) { + d1->last_owner = d1->last_owner->next; + } + } + /* add dependencies */ d1->num_dependent += d2->num_dependent; /* mark choice for later extraction and deletion */ @@ -8871,52 +9248,46 @@ static void RecombineDescStreamList (ValNodePtr PNTR p_list) } -static void AddPubCitationsFromAnnot (SeqAnnotPtr annot, ValNodePtr desc_stream_list) +static void AddPubCitationsFromFeat (SeqFeatPtr sfp, ValNodePtr desc_stream_list) { - SeqFeatPtr sfp; ValNodePtr repl_v; DescStreamPtr d; PubdescPtr pdp; ValNodePtr vnp; ValNode vn_p, vn_c; - Boolean found; + Boolean found = FALSE; - if (annot == NULL || annot->type != 1) + + if (sfp == NULL || sfp->cit == NULL || sfp->cit->choice != 1 || sfp->cit->data.ptrvalue == NULL) { return; } + MemSet (&vn_p, 0, sizeof (ValNode)); MemSet (&vn_c, 0, sizeof (ValNode)); - for (sfp = annot->data; sfp != NULL; sfp = sfp->next) + + /* note - there could be multiple identical copies of a pub in the list, + * we only need to count the match once - we will combine the totals + * in RecombineDescStreamList. + */ + for (repl_v = desc_stream_list; repl_v != NULL && !found; repl_v = repl_v->next) { - if (sfp->cit == NULL || sfp->cit->choice != 1 || sfp->cit->data.ptrvalue == NULL) - { - continue; - } - /* note - there could be multiple identical copies of a pub in the list, - * we only need to count the match once - we will combine the totals - * in RecombineDescStreamList. - */ - found = FALSE; - for (repl_v = desc_stream_list; repl_v != NULL && !found; repl_v = repl_v->next) + d = (DescStreamPtr) repl_v->data.ptrvalue; + if (d->orig != NULL + && d->orig->choice == Seq_descr_pub + && (pdp = (PubdescPtr) d->orig->data.ptrvalue) != NULL) { - d = (DescStreamPtr) repl_v->data.ptrvalue; - if (d->orig != NULL - && d->orig->choice == Seq_descr_pub - && (pdp = (PubdescPtr) d->orig->data.ptrvalue) != NULL) - { - for (vnp = sfp->cit->data.ptrvalue; vnp != NULL; vnp = vnp->next) { - /* each vnp is a pub */ - vn_p.choice = PUB_Equiv; - vn_p.data.ptrvalue = pdp->pub; - vn_c.choice = PUB_Equiv; - vn_c.data.ptrvalue = vnp; - - if (PubLabelMatch (&vn_p, &vn_c) == 0) - { - d->num_dependent ++; - found = TRUE; - } + for (vnp = sfp->cit->data.ptrvalue; vnp != NULL; vnp = vnp->next) { + /* each vnp is a pub */ + vn_p.choice = PUB_Equiv; + vn_p.data.ptrvalue = pdp->pub; + vn_c.choice = PUB_Equiv; + vn_c.data.ptrvalue = vnp; + + if (PubLabelMatch (&vn_p, &vn_c) == 0) + { + d->num_dependent ++; + found = TRUE; } } } @@ -8924,6 +9295,21 @@ static void AddPubCitationsFromAnnot (SeqAnnotPtr annot, ValNodePtr desc_stream_ } +static void AddPubCitationsFromAnnot (SeqAnnotPtr annot, ValNodePtr desc_stream_list) +{ + SeqFeatPtr sfp; + + if (annot == NULL || annot->type != 1) + { + return; + } + for (sfp = annot->data; sfp != NULL; sfp = sfp->next) + { + AddPubCitationsFromFeat (sfp, desc_stream_list); + } +} + + static void AddPubCitationsFromAnnotSet (SeqAnnotPtr annot, ValNodePtr desc_stream_list) { while (annot != NULL) @@ -9080,12 +9466,35 @@ static void FixCitationsInSet (BioseqSetPtr bssp, ValNodePtr desc_stream_list) typedef struct streamreader { - ValNodePtr desc_stream_list; + ValNodeBlock desc_list; SeqDescrPtr parent_list; - SeqIdPtr PNTR sip_list; + ValNodeBlock seqid_list; } StreamReaderData, PNTR StreamReaderPtr; +static AsnTypePtr StreamingSkipElement (AsnIoPtr aip, AsnTypePtr orig, SetAtpPtr sp) +{ + AsnTypePtr atp; + DataVal av; + + if (AsnReadVal(aip, orig, &av) <= 0) return NULL; + + atp = AsnReadId(aip, sp->amp, orig); if (atp == NULL) return NULL; + while (atp != orig && atp != NULL) { + AsnReadVal(aip, atp, &av); + AsnKillValue (atp, &av); + atp = AsnReadId(aip, sp->amp, atp); + } + + /* close structure */ + if (atp == orig) { + AsnReadVal (aip, atp, &av); + AsnKillValue (atp, &av); + } + return atp; +} + + static void StreamingReadAny (AsnIoPtr aip, AsnTypePtr atp, SetAtpPtr sp, StreamReaderPtr sr); static AsnTypePtr StreamingReadBioseqSet (AsnIoPtr aip, AsnTypePtr orig, SetAtpPtr sp, StreamReaderPtr sr) @@ -9093,7 +9502,7 @@ static AsnTypePtr StreamingReadBioseqSet (AsnIoPtr aip, AsnTypePtr orig, SetAtpP DataVal av; AsnTypePtr atp, oldatp; BioseqSetPtr bsp=NULL; - SeqEntryPtr curr, next, hold = NULL; + SeqEntryPtr curr, next; BioseqPtr nuc_bsp; SeqDescPtr sdp = NULL; SeqAnnotPtr annot; @@ -9137,9 +9546,9 @@ static AsnTypePtr StreamingReadBioseqSet (AsnIoPtr aip, AsnTypePtr orig, SetAtpP bsp->date = DateAsnRead(aip, atp); if (bsp->date == NULL) goto erret; } - else if (atp == sp->atp_desc) + else if (atp == sp->atp_set_desc) { - bsp->descr = SeqDescrAsnRead(aip, atp); + bsp->descr = SeqDescrAsnRead (aip, atp); if (bsp->descr == NULL) goto erret; } else if (atp == sp->atp_seqset && bsp->_class != BioseqseqSet_class_nuc_prot) @@ -9218,22 +9627,22 @@ ret: if (bsp->_class == BioseqseqSet_class_nuc_prot) { if (bsp->seq_set != NULL && IS_Bioseq (bsp->seq_set)) { nuc_bsp = bsp->seq_set->data.ptrvalue; - if (sr->sip_list != NULL && nuc_bsp != NULL) { - ValNodeLink (sr->sip_list, SeqIdDup (SeqIdFindBest (nuc_bsp->id, SEQID_GENBANK))); + if (nuc_bsp != NULL) { + ValNodeLinkToEnd (&(sr->seqid_list), SeqIdDup (SeqIdFindBest (nuc_bsp->id, SEQID_GENBANK))); } for (sdp = bsp->descr; sdp != NULL; sdp = sdp->next) { if (sdp->choice == Seq_descr_pub) { - ValNodeAddPointer (&(sr->desc_stream_list), 0, DescStreamNew (sdp, nuc_bsp)); + ValNodeAddPointerToEnd (&(sr->desc_list), 0, DescStreamNew (sdp, nuc_bsp)); } } for (sdp = sr->parent_list; sdp != NULL; sdp = sdp->next) { if (sdp->choice == Seq_descr_pub) { - ValNodeAddPointer (&(sr->desc_stream_list), 0, DescStreamNew (sdp, nuc_bsp)); + ValNodeAddPointerToEnd (&(sr->desc_list), 0, DescStreamNew (sdp, nuc_bsp)); } } } /* count feature citations */ - AddPubCitationsFromSet (bsp, sr->desc_stream_list); + AddPubCitationsFromSet (bsp, sr->desc_list.head); } bsp = BioseqSetFree (bsp); @@ -9246,44 +9655,107 @@ erret: } -static void StreamingReadAny (AsnIoPtr aip, AsnTypePtr atp, SetAtpPtr sp, StreamReaderPtr sr) +static BioseqPtr LIBCALL StreamingReadBioseq (AsnIoPtr aip, AsnTypePtr orig, SetAtpPtr sp) { - BioseqPtr nuc_bsp; - SeqDescrPtr sdp = NULL; - AsnTypePtr atp_orig; - Boolean first = TRUE; + DataVal av; + AsnTypePtr atp; + BioseqPtr bsp=NULL; + Int2 level; - if (aip == NULL || sp == NULL || sr == NULL) { - return; - } - atp_orig = atp; + if (aip == NULL) + return bsp; - while (! aip->io_failure && atp != NULL && (first || atp != atp_orig)) { - first = FALSE; - if (atp == sp->atp_set) { - atp = StreamingReadBioseqSet (aip, atp, sp, sr); - } else { - if (atp == sp->atp_seq) { - nuc_bsp = BioseqAsnRead (aip, atp); - if (sr->sip_list != NULL && nuc_bsp != NULL) { - ValNodeLink (sr->sip_list, SeqIdDup (SeqIdFindBest (nuc_bsp->id, SEQID_GENBANK))); + if (! ProgMon("Read Bioseq")) + return bsp; + + if (orig == NULL) /* Bioseq ::= (self contained) */ + atp = AsnReadId(aip, sp->amp, sp->atp_bioseq); + else + atp = AsnLinkType(orig, sp->atp_bioseq); /* link in local tree */ + if (atp == NULL) return bsp; + + bsp = BioseqNew(); + if (bsp == NULL) goto erret; + + level = AsnGetLevel(aip); /* for skipping */ + + if (AsnReadVal(aip, atp, &av) <= 0) goto erret; /* read the start struct */ + + atp = AsnReadId(aip, sp->amp, atp); if (atp == NULL) goto erret; /* id required, start struct */ + bsp->id = SeqIdSetAsnRead(aip, atp, sp->atp_bioseq_id_E); + if (bsp->id == NULL) goto erret; + + atp = AsnReadId(aip, sp->amp, atp); if (atp == NULL) goto erret; + if (atp == sp->atp_bioseq_desc) /* descr optional */ + { + bsp->descr = SeqDescrAsnRead (aip, atp); + if (bsp->descr == NULL) goto erret; + atp = AsnReadId(aip, sp->amp, atp); if (atp == NULL) goto erret; + } + + atp = StreamingSkipElement(aip, atp, sp); + if (atp == NULL) goto erret; + + atp = AsnReadId(aip, sp->amp, atp); if (atp == NULL) goto erret; + + if (atp == sp->atp_bioseq_annot) + { + bsp->annot = SeqAnnotSetAsnRead(aip, atp, sp->atp_bioseq_annot_e); + if (bsp->annot == NULL) goto erret; + atp = AsnReadId(aip, sp->amp, atp); if (atp == NULL) goto erret; + } + + if (AsnReadVal(aip, atp, &av) <= 0) goto erret; /* end Bioseq */ +ret: + AsnUnlinkType(orig); /* unlink local tree */ + return bsp; +erret: + aip->io_failure = TRUE; + bsp = BioseqFree(bsp); + goto ret; +} + + +static void StreamingReadAny (AsnIoPtr aip, AsnTypePtr atp, SetAtpPtr sp, StreamReaderPtr sr) +{ + BioseqPtr nuc_bsp; + SeqDescrPtr sdp = NULL; + AsnTypePtr atp_orig; + Boolean first = TRUE; + DataVal av; + + if (aip == NULL || sp == NULL || sr == NULL) { + return; + } + atp_orig = atp; + + while (! aip->io_failure && atp != NULL && (first || atp != atp_orig)) { + first = FALSE; + if (atp == sp->atp_set) { + atp = StreamingReadBioseqSet (aip, atp, sp, sr); + } else { + if (atp == sp->atp_seq) { + nuc_bsp = StreamingReadBioseq (aip, atp, sp); + if (nuc_bsp != NULL) { + ValNodeLinkToEnd (&(sr->seqid_list), SeqIdDup (SeqIdFindBest (nuc_bsp->id, SEQID_GENBANK))); } for (sdp = nuc_bsp->descr; sdp != NULL; sdp = sdp->next) { if (sdp->choice == Seq_descr_pub) { - ValNodeAddPointer (&(sr->desc_stream_list), 0, DescStreamNew (sdp, nuc_bsp)); + AddToDescStream (&(sr->desc_list), sdp, nuc_bsp); } } for (sdp = sr->parent_list; sdp != NULL; sdp = sdp->next) { if (sdp->choice == Seq_descr_pub) { - ValNodeAddPointer (&(sr->desc_stream_list), 0, DescStreamNew (sdp, nuc_bsp)); + AddToDescStream (&(sr->desc_list), sdp, nuc_bsp); } } - AddPubCitationsFromAnnotSet(nuc_bsp->annot, sr->desc_stream_list); + AddPubCitationsFromAnnotSet(nuc_bsp->annot, sr->desc_list.head); nuc_bsp = BioseqFree (nuc_bsp); - } else if (atp == sp->atp_desc) { + } else if (atp == sp->atp_set_desc) { ValNodeLink (&(sr->parent_list), SeqDescrAsnRead (aip, atp)); } else { - AsnReadVal (aip, atp, NULL); + AsnReadVal (aip, atp, &av); + AsnKillValue (atp, &av); } } atp = AsnReadId (aip, sp->amp, atp); @@ -9298,6 +9770,7 @@ static Boolean StreamingReadSeqEntry (AsnIoPtr aip, SetAtpPtr sp, StreamReaderPt AsnTypePtr atp; BioseqPtr nuc_bsp; SeqDescPtr sdp; + DataVal av; atp = AsnReadId (aip, sp->amp, sp->atp_seqentry); if (atp == NULL) { @@ -9312,22 +9785,23 @@ static Boolean StreamingReadSeqEntry (AsnIoPtr aip, SetAtpPtr sp, StreamReaderPt } else { if (atp == sp->atp_seq) { nuc_bsp = BioseqAsnRead (aip, atp); - if (sr->sip_list != NULL && nuc_bsp != NULL) { - ValNodeLink (sr->sip_list, SeqIdDup (SeqIdFindBest (nuc_bsp->id, SEQID_GENBANK))); + if (nuc_bsp != NULL) { + ValNodeLinkToEnd (&(sr->seqid_list), SeqIdDup (SeqIdFindBest (nuc_bsp->id, SEQID_GENBANK))); } for (sdp = nuc_bsp->descr; sdp != NULL; sdp = sdp->next) { if (sdp->choice == Seq_descr_pub) { - ValNodeAddPointer (&(sr->desc_stream_list), 0, DescStreamNew (sdp, nuc_bsp)); + AddToDescStream (&(sr->desc_list), sdp, nuc_bsp); } } for (sdp = sr->parent_list; sdp != NULL; sdp = sdp->next) { if (sdp->choice == Seq_descr_pub) { - ValNodeAddPointer (&(sr->desc_stream_list), 0, DescStreamNew (sdp, nuc_bsp)); + AddToDescStream (&(sr->desc_list), sdp, nuc_bsp); } } nuc_bsp = BioseqFree (nuc_bsp); } else { - AsnReadVal (aip, atp, NULL); + AsnReadVal (aip, atp, &av); + AsnKillValue (atp, &av); } } return TRUE; @@ -9361,17 +9835,17 @@ static Boolean StreamingReadSeqSubmit (AsnIoPtr aip, SetAtpPtr sp, StreamReaderP } else { if (atp == sp->atp_seq) { nuc_bsp = BioseqAsnRead (aip, atp); - if (sr->sip_list != NULL && nuc_bsp != NULL) { - ValNodeLink (sr->sip_list, SeqIdDup (SeqIdFindBest (nuc_bsp->id, SEQID_GENBANK))); + if (nuc_bsp != NULL) { + ValNodeLinkToEnd (&(sr->seqid_list), SeqIdDup (SeqIdFindBest (nuc_bsp->id, SEQID_GENBANK))); } for (sdp = nuc_bsp->descr; sdp != NULL; sdp = sdp->next) { if (sdp->choice == Seq_descr_pub) { - ValNodeAddPointer (&(sr->desc_stream_list), 0, DescStreamNew (sdp, nuc_bsp)); + AddToDescStream (&(sr->desc_list), sdp, nuc_bsp); } } for (sdp = sr->parent_list; sdp != NULL; sdp = sdp->next) { if (sdp->choice == Seq_descr_pub) { - ValNodeAddPointer (&(sr->desc_stream_list), 0, DescStreamNew (sdp, nuc_bsp)); + AddToDescStream (&(sr->desc_list), sdp, nuc_bsp); } } nuc_bsp = BioseqFree (nuc_bsp); @@ -9393,11 +9867,9 @@ NLM_EXTERN ValNodePtr StreamAsnForDescriptors (FILE *fp, Boolean is_binary, Bool { AsnIoPtr aip; SetAtpPtr sp; - AsnTypePtr atp = NULL; - SeqEntryPtr sep = NULL, last_sep = NULL; - BioseqSetPtr bssp = NULL; StreamReaderData sr; Boolean rval; + ValNodePtr tmp; if (fp == NULL) return NULL; @@ -9414,7 +9886,9 @@ NLM_EXTERN ValNodePtr StreamAsnForDescriptors (FILE *fp, Boolean is_binary, Bool } MemSet (&sr, 0, sizeof (StreamReaderData)); - sr.sip_list = sip_list; + if (sip_list != NULL) { + InitValNodeBlock (&(sr.seqid_list), *sip_list); + } if (is_submit) { StreamingReadSeqSubmit (aip, sp, &sr); @@ -9430,9 +9904,20 @@ NLM_EXTERN ValNodePtr StreamAsnForDescriptors (FILE *fp, Boolean is_binary, Bool sr.parent_list = SeqDescrFree (sr.parent_list); /* combine list items */ - RecombineDescStreamList(&(sr.desc_stream_list)); + RecombineDescStreamList(&(sr.desc_list.head)); + + if (sip_list == NULL) { + sr.seqid_list.head = SeqIdSetFree(sr.seqid_list.head); + } else { + *sip_list = sr.seqid_list.head; + } + + /* set up on-all */ + tmp = SeqIdListToValNodeSeqIdList (*sip_list); + SetOnAllValsForDescStreamList(sr.desc_list.head, tmp); + tmp = ValNodeSeqIdListFree (tmp); - return sr.desc_stream_list; + return sr.desc_list.head; } @@ -9440,7 +9925,7 @@ static SeqDescrPtr GetDescriptorsForBioseq (BioseqPtr bsp, ValNodePtr desc_strea { ValNodePtr vnp; DescStreamPtr d; - SeqIdPtr sip; + SeqIdPtr sip, sip_tmp; Boolean found; SeqDescrPtr sdp = NULL; @@ -9452,8 +9937,17 @@ static SeqDescrPtr GetDescriptorsForBioseq (BioseqPtr bsp, ValNodePtr desc_strea d = (DescStreamPtr) vnp->data.ptrvalue; if (d->replace != NULL) { found = FALSE; - for (sip = d->owners; sip != NULL && !found; sip = sip->next) { - found = SeqIdIn (sip, bsp->id); + if (d->on_all) { + found = TRUE; + } else { + /* note - we can use just the best one, because that's the one that was copied */ + sip = SeqIdFindBest (bsp->id, SEQID_GENBANK); + found = FALSE; + for (sip_tmp = d->owners; sip_tmp != NULL && !found; sip_tmp = sip_tmp->next) { + if (SeqIdComp(sip, sip_tmp) == SIC_YES) { + found = TRUE; + } + } } if (found) { ValNodeLink (&sdp, AsnIoMemCopy (d->replace, (AsnReadFunc) SeqDescAsnRead, (AsnWriteFunc) SeqDescAsnWrite)); @@ -9506,8 +10000,7 @@ StreamingReadWriteBioseqSet DataVal av; AsnTypePtr atp, oldatp; BioseqSetPtr bsp=NULL; - SeqEntryPtr curr, next, hold = NULL; - SeqDescPtr sdp = NULL; + SeqEntryPtr curr, next; SeqDescrPtr tmp; SeqAnnotPtr annot; @@ -9549,7 +10042,7 @@ StreamingReadWriteBioseqSet bsp->date = DateAsnRead(aip_in, atp); if (bsp->date == NULL) goto erret; } - else if (atp == sp->atp_desc) + else if (atp == sp->atp_set_desc) { bsp->descr = SeqDescrAsnRead(aip_in, atp); if (bsp->descr == NULL) goto erret; @@ -9826,13 +10319,58 @@ NLM_EXTERN void WriteAsnWithReplacedDescriptors (ValNodePtr desc_stream_list, FI rval = StreamingReadWriteSeqEntry(desc_stream_list, aip_in, aip_out, sp); AsnIoFlush (aip_out); } - AsnIoClose (aip_in); - AsnIoClose (aip_out); + AsnIoFree (aip_in, FALSE); + AsnIoFree (aip_out, FALSE); sp = MemFree (sp); } +NLM_EXTERN Boolean IdListsMatch (SeqIdPtr sip_list, ValNodePtr all_sip) +{ + Boolean found = FALSE, any_missing = FALSE; + ValNodePtr vnp; + + if (sip_list == NULL || all_sip == NULL) { + return FALSE; + } + + if (ValNodeLen (sip_list) != ValNodeLen (all_sip)) { + return FALSE; + } + + while (sip_list != NULL) { + found = FALSE; + for (vnp = all_sip; vnp != NULL && !found; vnp = vnp->next) { + if (vnp->choice == 0 && SeqIdComp (vnp->data.ptrvalue, sip_list) == SIC_YES) { + vnp->choice = 1; + found = TRUE; + } + } + sip_list = sip_list->next; + } + for (vnp = all_sip; vnp != NULL; vnp = vnp->next) { + if (vnp->choice == 0) { + any_missing = TRUE; + } + vnp->choice = 0; + } + return !any_missing; +} + + +NLM_EXTERN void SetOnAllValsForDescStreamList (ValNodePtr desc_list, ValNodePtr all_sip) +{ + ValNodePtr vnp; + DescStreamPtr d; + + for (vnp = desc_list; vnp != NULL; vnp = vnp->next) { + d = (DescStreamPtr) vnp->data.ptrvalue; + d->on_all = IdListsMatch(d->owners, all_sip); + } +} + + /* ReadAsnFastaOrFlatFileEx reads lines, looking for starts of ASN.1, FASTA, GenBank, EMBL, or GenPept files. It then calls the appropriate read function, which is responsible for reading the sequence (or object) and restoring the file pointer to the beginning of the @@ -10765,7 +11303,7 @@ NLM_EXTERN Pointer ReadFeatureTableFile ( annotname = GetSeqId (seqid, line, sizeof (seqid), TRUE, FALSE); if (! HasNoText (seqid)) { - sap = ReadFeatureTable (&fc, seqid, annotname); + sap = ReadFeatureTableEx (&fc, seqid, annotname, lineP); if (sap != NULL && sap->type == 1) { sfp = (SeqFeatPtr) sap->data; prevsfp = (Pointer PNTR) &(sap->data); @@ -13041,6 +13579,7 @@ ReplaceItemPair AbbreviationList[] = { { "trna-", "tRNA-" }, { "var.", "var." }, { "var..", "var.." }, + { "uk", "UK" }, { "usa", "USA" }, { "U.S.A.", "USA" }, { "U.S.A", "USA" }, @@ -13173,6 +13712,122 @@ FixCapitalizationInElement } +static ReplaceItemPair s_CountryFixes[] = { + { "chnia", "China" }, + { "pr china", "P.R. China" }, + { "prchina", "P.R. China" }, + { "p.r.china", "P.R. China" }, + { "p.r china", "P.R. China" }, + { "p, r, china", "P.R. China" }, +}; + +#define NUM_CountryFixes sizeof (s_CountryFixes) / sizeof (ReplaceItemPair) + + +static void InsertMissingSpacesAfterCommas (CharPtr PNTR pString) +{ + Int4 num_new_spaces = 0; + CharPtr str, cp, new_str, src, dst; + + if (pString == NULL || *pString == NULL) { + return; + } + + str = *pString; + cp = StringChr (str, ','); + while (cp != NULL) { + if (*(cp + 1) != 0 && !isspace (*(cp + 1))) { + num_new_spaces++; + } + cp = StringChr (cp + 1, ','); + } + + if (num_new_spaces == 0) { + return; + } + + new_str = (CharPtr) MemNew (sizeof (Char) * (StringLen (str) + num_new_spaces + 1)); + src = str; + dst = new_str; + while (*src != 0) { + *dst = *src; + ++dst; + if (*src == ',' && *(src + 1) != 0 && !isspace (*(src + 1))) { + *dst = ' '; + ++dst; + } + ++src; + } + *dst = 0; + str = MemFree (str); + *pString = new_str; +} + + +static void InsertMissingSpacesAfterNo (CharPtr PNTR pString) +{ + Int4 num_new_spaces = 0; + CharPtr str, cp, new_str, src; + + if (pString == NULL || *pString == NULL) { + return; + } + + str = *pString; + cp = StringISearch (str, "No."); + while (cp != NULL) { + if (isalpha(*(cp + 3)) || isdigit(*(cp + 3))) { + num_new_spaces++; + } + cp = StringISearch (cp + 3, "No."); + } + + new_str = (CharPtr) MemNew (sizeof (Char) * (StringLen (str) + num_new_spaces + 1)); + new_str[0] = 0; + + src = str; + cp = StringISearch (src, "No."); + while (cp != NULL) { + StringNCat (new_str, src, cp - src); + StringCat (new_str, "No."); + if (isalpha(*(cp + 3)) || isdigit(*(cp + 3))) { + StringCat (new_str, " "); + } + src = cp + 3; + cp = StringISearch (src, "No."); + } + StringCat (new_str, src); + + str = MemFree (str); + *pString = new_str; +} + + +NLM_EXTERN void FixCapitalizationInCountryStringEx (CharPtr PNTR pCountry, Boolean punct_only) +{ + Int4 i; + + if (pCountry == NULL || StringICmp (*pCountry, "US") == 0) { + return; + } + InsertMissingSpacesAfterCommas (pCountry); + InsertMissingSpacesAfterNo (pCountry); + if (!punct_only) { + FixCapitalizationInElement (pCountry, TRUE, TRUE, FALSE); + + } + for (i = 0; i < NUM_CountryFixes; i++) { + FindReplaceString (pCountry, s_CountryFixes[i].FindString, + s_CountryFixes[i].ReplaceString, FALSE, TRUE); + } +} + +NLM_EXTERN void FixCapitalizationInCountryString (CharPtr PNTR pCountry) +{ + FixCapitalizationInCountryStringEx (pCountry, FALSE); +} + + NLM_EXTERN void FixCapitalizationInAuthor (AuthorPtr pAuthor) { NameStdPtr pNameStandard; @@ -13196,32 +13851,87 @@ NLM_EXTERN void FixCapitalizationInAuthor (AuthorPtr pAuthor) } -NLM_EXTERN void FixCapsInPubAffil (AffilPtr affil) +NLM_EXTERN void FixStateAbbreviationsInAffil (AffilPtr affil, LogInfoPtr lip) +{ + CharPtr abbrev; + + if (affil == NULL) { + return; + } + if (StringCmp (affil->country, "USA") == 0) { + abbrev = GetStateAbbreviation (affil->sub); + if (abbrev != NULL) { + if (lip != NULL) { + if (lip->fp != NULL) { + fprintf (lip->fp, "Changed %s to %s\n", affil->sub, abbrev); + } + lip->data_in_log = TRUE; + } + affil->sub = MemFree (affil->sub); + affil->sub = StringSave (abbrev); + } + } +} + + +NLM_EXTERN void FixCapsInPubAffilEx (AffilPtr affil, Boolean punct_only) { if (affil == NULL) return; - FixCapitalizationInElement (&(affil->affil), TRUE, TRUE, FALSE); - FixAffiliationShortWordsInElement (&(affil->affil)); - FixCapitalizationInElement (&(affil->div), TRUE, TRUE, FALSE); - FixAffiliationShortWordsInElement (&(affil->div)); - FixCapitalizationInElement (&(affil->city), FALSE, TRUE, FALSE); - FixAffiliationShortWordsInElement (&(affil->city)); + if (!punct_only) { + FixCapitalizationInElement (&(affil->affil), TRUE, TRUE, FALSE); + FixAffiliationShortWordsInElement (&(affil->affil)); + FixCapitalizationInElement (&(affil->div), TRUE, TRUE, FALSE); + FixAffiliationShortWordsInElement (&(affil->div)); + FixCapitalizationInElement (&(affil->city), FALSE, TRUE, FALSE); + FixAffiliationShortWordsInElement (&(affil->city)); + } + FixKnownAbbreviationsInElement (&(affil->affil)); + FixKnownAbbreviationsInElement (&(affil->street)); + FixKnownAbbreviationsInElement (&(affil->div)); + FixKnownAbbreviationsInElement (&(affil->city)); + + InsertMissingSpacesAfterCommas (&(affil->affil)); + InsertMissingSpacesAfterNo (&(affil->affil)); + InsertMissingSpacesAfterCommas (&(affil->div)); + InsertMissingSpacesAfterNo (&(affil->div)); + InsertMissingSpacesAfterCommas (&(affil->city)); + InsertMissingSpacesAfterNo (&(affil->city)); /* special handling for states */ - if (affil->sub != NULL && StringLen (affil->sub) == 2 - && isalpha((Int4)(affil->sub[0])) && isalpha((Int4)(affil->sub[1]))) - { - affil->sub[0] = toupper(affil->sub[0]); - affil->sub[1] = toupper(affil->sub[1]); + if (punct_only) { + InsertMissingSpacesAfterCommas (&(affil->sub)); } else { - FixCapitalizationInElement (&(affil->sub), FALSE, TRUE, FALSE); - FixAffiliationShortWordsInElement (&(affil->sub)); + if (affil->sub != NULL && StringLen (affil->sub) == 2 + && isalpha((Int4)(affil->sub[0])) && isalpha((Int4)(affil->sub[1]))) + { + affil->sub[0] = toupper(affil->sub[0]); + affil->sub[1] = toupper(affil->sub[1]); + } else { + FixCapitalizationInElement (&(affil->sub), FALSE, TRUE, FALSE); + FixAffiliationShortWordsInElement (&(affil->sub)); + InsertMissingSpacesAfterCommas (&(affil->sub)); + } + } + + if (!punct_only) { + FixCapitalizationInCountryString (&(affil->country)); + FixCapitalizationInElement (&(affil->street), FALSE, TRUE, FALSE); + FixAffiliationShortWordsInElement (&(affil->street)); + FixStateAbbreviationsInAffil (affil, NULL); } - FixCapitalizationInElement (&(affil->country), TRUE, TRUE, FALSE); - FixCapitalizationInElement (&(affil->street), FALSE, TRUE, FALSE); - FixAffiliationShortWordsInElement (&(affil->street)); + if (StringCmp (affil->country, "USA") == 0) { + FixStateAbbreviationsInAffil (affil, NULL); + } + InsertMissingSpacesAfterCommas (&(affil->street)); + InsertMissingSpacesAfterNo (&(affil->street)); } +NLM_EXTERN void FixCapsInPubAffil (AffilPtr affil) +{ + FixCapsInPubAffilEx (affil, FALSE); +} + ReplaceItemPair AffiliationShortWordList[] = { { "Au", "au" } , { "Aux", "aux" } , @@ -13236,6 +13946,10 @@ ReplaceItemPair AffiliationShortWordList[] = { { "Le", "le" }, { "Les", "les" }, { "Rue", "rue" }, + { "Po Box", "PO Box" }, + { "Pobox", "PO Box" }, + { "P.O box", "P.O. Box" }, + { "P.Obox", "P.O. Box" }, { "Y", "y" } }; @@ -13271,6 +13985,37 @@ NLM_EXTERN void FixAffiliationShortWordsInElement (CharPtr PNTR pEl) } +ReplaceItemPair KnownAbbreviationList[] = { + { "po box", "PO Box" }, + { "Pobox", "PO Box" }, + { "P.O box", "P.O. Box" }, + { "P.Obox", "P.O. Box" }, + { "PO.Box", "P.O. Box" }, + { "PO. Box", "P.O. Box" }, + { "pr china", "P.R. China" }, + { "prchina", "P.R. China" }, + { "p.r.china", "P.R. China" }, + { "p.r china", "P.R. China" }, + { "p, r, china", "P.R. China" }, + { "p,r, china", "P.R. China" }, + { "p,r,china", "P.R. China" } +}; + +NLM_EXTERN void FixKnownAbbreviationsInElement (CharPtr PNTR pEl) +{ + Int2 i; + + if (pEl == NULL) return; + if (*pEl == NULL) return; + + for (i = 0; i < sizeof (KnownAbbreviationList) / sizeof (ReplaceItemPair); i++) + { + FindReplaceString (pEl, KnownAbbreviationList[i].FindString, + KnownAbbreviationList[i].ReplaceString, FALSE, TRUE); + } +} + + NLM_EXTERN void FixOrgNamesInString (CharPtr str, ValNodePtr org_names) { ValNodePtr vnp; @@ -14697,7 +15442,7 @@ NLM_EXTERN void AdjustFeatureForGapsCallback (SeqFeatPtr sfp, Pointer data) SeqMgrFeatContext fcontext; Boolean set_partial_ends; - if (sfp == NULL || data == NULL) return; + if (sfp == NULL || data == NULL || sfp->location == NULL || sfp->idx.deleteme) return; afgp = (AdjustFeatForGapPtr) data; @@ -14861,7 +15606,7 @@ NLM_EXTERN void AdjustCDSLocationsForUnknownGapsCallback (SeqFeatPtr sfp, Pointe agd.options = eAdjustFeatForGap_unknown_gaps | eAdjustFeatForGap_make_partial | eAdjustFeatForGap_split_internal | eAdjustFeatForGap_trim_ends; - agd.align_func = data; + agd.align_func = (GlobalAlignFunc) data; agd.features_in_gap = NULL; @@ -15185,6 +15930,28 @@ NLM_EXTERN ValNodePtr ReportCoverageForBioseqSeqHist (BioseqPtr bsp) } +static Boolean IsSingleEstCoverage (BioseqPtr bsp) +{ + SeqAlignPtr salp; + SeqIdPtr sip; + + if (bsp == NULL || bsp->hist == NULL || bsp->hist->assembly == NULL) { + return FALSE; + } + if (bsp->hist->assembly->next == NULL) { + return TRUE; + } else { + sip = AlnMgr2GetNthSeqIdPtr (bsp->hist->assembly, 2); + for (salp = bsp->hist->assembly->next; salp != NULL; salp = salp->next) { + if (SeqIdComp (AlnMgr2GetNthSeqIdPtr (salp, 2), sip) == SIC_NO) { + return FALSE; + } + } + return TRUE; + } +} + + NLM_EXTERN ValNodePtr ReportCoverageForTranscriptomeIdsListSeqHist (ValNodePtr list) { ValNodePtr range_list = NULL, new_list; @@ -15192,6 +15959,9 @@ NLM_EXTERN ValNodePtr ReportCoverageForTranscriptomeIdsListSeqHist (ValNodePtr l Char id_str[255]; CharPtr good_fmt = "Coverage is complete for %s"; CharPtr msg; + Int4 num_single = 0, num_mult = 0; + CharPtr single_fmt = "%d records are covered by a single EST"; + CharPtr mult_fmt = "%d records are covered by multiple ESTs"; while (list != NULL) { t = (TranscriptomeIdsPtr) list->data.ptrvalue; @@ -15205,9 +15975,20 @@ NLM_EXTERN ValNodePtr ReportCoverageForTranscriptomeIdsListSeqHist (ValNodePtr l } else { ValNodeLink (&range_list, new_list); } + if (IsSingleEstCoverage(t->consensus_bsp)) { + num_single++; + } else { + num_mult++; + } } list = list->next; } + msg = (CharPtr) MemNew (sizeof (Char) * (StringLen (single_fmt) + 15)); + sprintf (msg, single_fmt, num_single); + ValNodeAddPointer (&range_list, 0, msg); + msg = (CharPtr) MemNew (sizeof (Char) * (StringLen (mult_fmt) + 15)); + sprintf (msg, mult_fmt, num_mult); + ValNodeAddPointer (&range_list, 0, msg); return range_list; } @@ -15573,9 +16354,9 @@ MakeTranscriptomeAssemblySeqHist for (vnp = t->token_list; vnp != NULL; vnp = vnp->next) { if (StringChr (vnp->data.ptrvalue, '|') == NULL) { - sprintf (id_buf, "gb|%s", vnp->data.ptrvalue); + sprintf (id_buf, "gb|%s", (char *) vnp->data.ptrvalue); } else { - sprintf (id_buf, "%s", vnp->data.ptrvalue); + sprintf (id_buf, "%s", (char *) vnp->data.ptrvalue); } sip = MakeSeqID (id_buf); read_bsp = BioseqLockById (sip); @@ -16256,6 +17037,7 @@ ReadOneColumnListEx if (plen == StringLen (p_start)) { found_end = TRUE; + p_end = p_start + plen; } else { @@ -16500,13 +17282,31 @@ NLM_EXTERN ValNodePtr FreeTabTable (ValNodePtr row_list) } +NLM_EXTERN ValNodePtr CopyTabTable (ValNodePtr row_list) +{ + ValNodeBlock row_block; + ValNodeBlock col_block; + ValNodePtr row, col; + + InitValNodeBlock(&row_block, NULL); + for (row = row_list; row != NULL; row = row->next) { + InitValNodeBlock(&col_block, NULL); + for (col = row->data.ptrvalue; col != NULL; col = col->next) { + ValNodeAddPointerToEnd (&col_block, col->choice, StringSave(col->data.ptrvalue)); + } + ValNodeAddPointerToEnd (&row_block, 0, col_block.head); + } + return row_block.head; +} + + NLM_EXTERN void WriteTabTableToFile (ValNodePtr table, FILE *fp) { ValNodePtr line, vnp; for (line = table; line != NULL; line = line->next) { for (vnp = line->data.ptrvalue; vnp != NULL; vnp = vnp->next) { - fprintf (fp, "%s%s", (CharPtr) vnp->data.ptrvalue, vnp->next == NULL ? "\n" : "\t"); + fprintf (fp, "%s%s", vnp->data.ptrvalue == NULL ? "" : (CharPtr) vnp->data.ptrvalue, vnp->next == NULL ? "\n" : "\t"); } } } @@ -16597,6 +17397,49 @@ NLM_EXTERN void ReparseTabTableConvertFirstSpaceToTab (ValNodePtr row_list) } +NLM_EXTERN void ReparseTabTableSeparateColumnAtDelimiter (ValNodePtr row_list, Char delimiter, Int4 col, Boolean stop_after_first) +{ + ValNodePtr line_vnp, col_vnp, new_vnp, next_col; + CharPtr first_text, second_text, first_space; + Int4 col_num; + + for (line_vnp = row_list; line_vnp != NULL; line_vnp = line_vnp->next) + { + col_vnp = line_vnp->data.ptrvalue; + col_num = 0; + while (col_num < col && col_vnp != NULL) { + col_num++; + col_vnp = col_vnp->next; + } + if (col_vnp != NULL) { + next_col = col_vnp->next; + while (col_vnp != next_col) { + first_text = col_vnp->data.ptrvalue; + if ((first_space = StringChr (first_text, delimiter)) != NULL) { + second_text = first_space + 1; + if (*second_text != 0) { + /* terminate first text at first delimiter */ + *first_space = 0; + /* create new column with text after first delimiter */ + second_text = StringSave (second_text); + new_vnp = ValNodeNew (NULL); + new_vnp->data.ptrvalue = second_text; + /* insert new column */ + new_vnp->next = col_vnp->next; + col_vnp->next = new_vnp; + } + } + if (stop_after_first) { + col_vnp = next_col; + } else { + col_vnp = col_vnp->next; + } + } + } + } +} + + static Int4 LenToNextTabOrMultispace (CharPtr cp) { Int4 len = 0; @@ -16621,7 +17464,6 @@ static Int4 LenToNextTabOrMultispace (CharPtr cp) static Int4 LenTabOrMultispace (CharPtr cp) { Int4 len = 0; - Boolean found = FALSE; if (StringHasNoText (cp)) { len = 0; @@ -16710,11 +17552,21 @@ NLM_EXTERN void CombineTabTableColumns (ValNodePtr row_list, ValNodePtr column_p add_vnp = col_vnp; col_prev = col_vnp; } else { - len = StringLen (add_vnp->data.ptrvalue) + StringLen (delimiter) + StringLen (col_vnp->data.ptrvalue) + 1; - tmp = (CharPtr) MemNew (sizeof (Char) * len); - sprintf (tmp, "%s%s%s", add_vnp->data.ptrvalue, delimiter == NULL ? "" : delimiter, col_vnp->data.ptrvalue); - add_vnp->data.ptrvalue = MemFree (add_vnp->data.ptrvalue); - add_vnp->data.ptrvalue = tmp; + if (StringHasNoText (col_vnp->data.ptrvalue)) { + /* do nothing - no need to add blank to blank */ + } else if (StringHasNoText (add_vnp->data.ptrvalue)) { + /* move from col_vnp */ + add_vnp->data.ptrvalue = MemFree (add_vnp->data.ptrvalue); + add_vnp->data.ptrvalue = col_vnp->data.ptrvalue; + col_vnp->data.ptrvalue = NULL; + } else { + /* combine with delimiter */ + len = StringLen (add_vnp->data.ptrvalue) + StringLen (delimiter) + StringLen (col_vnp->data.ptrvalue) + 1; + tmp = (CharPtr) MemNew (sizeof (Char) * len); + sprintf (tmp, "%s%s%s", (char *) add_vnp->data.ptrvalue, delimiter == NULL ? "" : delimiter, (char *) col_vnp->data.ptrvalue); + add_vnp->data.ptrvalue = MemFree (add_vnp->data.ptrvalue); + add_vnp->data.ptrvalue = tmp; + } col_prev->next = col_vnp->next; col_vnp->next = NULL; col_vnp = ValNodeFreeData (col_vnp); @@ -16753,29 +17605,174 @@ NLM_EXTERN void AddTextToTabTableColumn (ValNodePtr row_list, Int4 col, CharPtr } -static void AddToContextList (Char ch, CharPtr PNTR strp, ValNodePtr PNTR search_list) -{ - ValNodePtr vnp, vnp_last = NULL, vnp2, clist; +static int LIBCALLBACK SortTableRowByColumn (VoidPtr ptr1, VoidPtr ptr2, Int4 column) - if (strp == NULL || search_list == NULL) return; +{ + ValNodePtr vnp1; + ValNodePtr vnp2; + ValNodePtr col1, col2; + Int4 colpos = 1; + int rval = 0; - /* group contexts for the same character together */ - vnp = *search_list; - while (vnp != NULL && vnp->choice != (Uint1)ch) - { - vnp_last = vnp; - vnp = vnp->next; + if (ptr1 == NULL || ptr2 == NULL) return 0; + vnp1 = *((ValNodePtr PNTR) ptr1); + vnp2 = *((ValNodePtr PNTR) ptr2); + if (vnp1 == NULL || vnp2 == NULL) return 0; + col1 = vnp1->data.ptrvalue; + col2 = vnp2->data.ptrvalue; + while (col1 != NULL && col2 != NULL && colpos < column) { + col1 = col1->next; + col2 = col2->next; + colpos++; + } + if (col1 == NULL && col2 == NULL) { + rval = 0; + } else if (col1 == NULL) { + rval = -1; + } else if (col2 == NULL) { + rval = 1; + } else { + rval = StringCmp (col1->data.ptrvalue, col2->data.ptrvalue); } - if (vnp == NULL) - { - vnp = ValNodeNew(NULL); - if (vnp_last == NULL) - { - *search_list = vnp; - } - else - { - vnp_last->next = vnp; + return rval; +} + + +static Int4 s_TableRowSortColumn = 0; + +static int LIBCALLBACK SortTableRowByColumnStatic (VoidPtr ptr1, VoidPtr ptr2) +{ + return SortTableRowByColumn (ptr1, ptr2, s_TableRowSortColumn); +} + + +NLM_EXTERN ValNodePtr SortTableRowByAnyColumn (ValNodePtr table, Int4 column) +{ + s_TableRowSortColumn = column; + table = ValNodeSort (table, SortTableRowByColumnStatic); + return table; +} + + +NLM_EXTERN TwoStringHashPtr TwoStringHashFree (TwoStringHashPtr tsh) +{ + Int4 i; + + if (tsh != NULL) { + for (i = 0; i < tsh->num_lines; i++) { + tsh->table[2 * i] = MemFree (tsh->table[2 * i]); + tsh->table[2 * i + 1] = MemFree (tsh->table[2 * i + 1]); + } + tsh->table = MemFree (tsh->table); + tsh = MemFree (tsh); + } + return tsh; +} + + +static ValNodePtr GetNthValNode (ValNodePtr list, Int4 n) +{ + Int4 pos = 1; + ValNodePtr vnp; + + if (n < 1) { + return NULL; + } + for (vnp = list; vnp != NULL && pos < n; vnp = vnp->next) + { + pos++; + } + return vnp; +} + + +NLM_EXTERN TwoStringHashPtr MakeTwoStringHashFromTabTable (ValNodePtr line_list, Int4 column1, Int4 column2) +{ + ValNodePtr tmp, vnp, col1, col2; + Int4 len, i; + TwoStringHashPtr tsh; + + tmp = CopyTabTable(line_list); + tmp = SortTableRowByAnyColumn (tmp, column1); + len = ValNodeLen (tmp); + + tsh = (TwoStringHashPtr) MemNew (sizeof (TwoStringHashData)); + tsh->table = (CharPtr PNTR) MemNew (sizeof (CharPtr) * len * 2); + for (i = 0, vnp = tmp; vnp != NULL; vnp = vnp->next) { + col1 = GetNthValNode (vnp->data.ptrvalue, column1); + col2 = GetNthValNode (vnp->data.ptrvalue, column2); + if (col1 != NULL && col2 != NULL && !StringHasNoText (col1->data.ptrvalue) && !StringHasNoText (col2->data.ptrvalue)) { + tsh->table[2 * i] = StringSave (col1->data.ptrvalue); + tsh->table[2 * i + 1] = StringSave (col2->data.ptrvalue); + i++; + } + } + tsh->num_lines = i; + tmp = FreeTabTable(tmp); + return tsh; +} + + +NLM_EXTERN CharPtr GetValueFromTwoStringHash (CharPtr key, TwoStringHashPtr tsh) +{ + Int4 min = 0, num = -1, i, j; + Int4 max; + CharPtr tmp; + + if (StringHasNoText (key) || tsh == NULL) { + return NULL; + } + max = tsh->num_lines - 1; + + while (max >= min) + { + i = (max + min)/2; + tmp = tsh->table[2 * i]; + if ((j = StringCmp(tmp, key)) > 0) + { + max = i - 1; + } + else if (j < 0) + { + min = i + 1; + } + else + { + num = i; + break; + } + } + if (num == -1) { + return NULL; + } else { + return tsh->table[2 * num + 1]; + } +} + + +static void AddToContextList (Char ch, CharPtr PNTR strp, ValNodePtr PNTR search_list) +{ + ValNodePtr vnp, vnp_last = NULL, vnp2, clist; + + if (strp == NULL || search_list == NULL) return; + + /* group contexts for the same character together */ + vnp = *search_list; + while (vnp != NULL && vnp->choice != (Uint1)ch) + { + vnp_last = vnp; + vnp = vnp->next; + } + if (vnp == NULL) + { + vnp = ValNodeNew(NULL); + if (vnp_last == NULL) + { + *search_list = vnp; + } + else + { + vnp_last->next = vnp; } } vnp->choice = (Uint1) ch; @@ -16860,6 +17857,125 @@ NLM_EXTERN ValNodePtr ScanTabTableForSpecialCharacters (ValNodePtr row_list) } +NLM_EXTERN ValNodePtr AutoReplaceSpecialCharactersInText (CharPtr PNTR text) +{ + CharPtr cp, str, new_str, cp_dst; + Int4 len; + Int4 extra_len = 0; + Boolean any = FALSE; + CharPtr replace_fmt = "Replaced '%c' with '%s'"; + ValNodePtr repl_list = NULL; + CharPtr repl_str; + + if (text == NULL || (cp = *text) == NULL) { + return NULL; + } + + while (*cp != 0) { + if (*cp < ' ' || *cp > '~') { +#ifdef OS_WINNT + str = GetSpecialWinCharacterReplacement ((unsigned char) *cp); +#else + str = GetSpecialMacCharacterReplacement ((unsigned char) *cp); +#endif + len = StringLen (str); + if (len > 1) { + extra_len += len - 1; + } + any = TRUE; + } + ++cp; + } + if (any) { + new_str = (CharPtr) MemNew (sizeof (Char) * (StringLen (*text) + extra_len + 1)); + cp = *text; + cp_dst = new_str; + while (*cp != 0) { + if (*cp < ' ' || *cp > '~') { +#ifdef OS_WINNT + str = GetSpecialWinCharacterReplacement ((unsigned char) *cp); +#else + str = GetSpecialMacCharacterReplacement ((unsigned char) *cp); +#endif + repl_str = (CharPtr) MemNew (sizeof (Char) * (StringLen (replace_fmt) + StringLen (str))); + sprintf (repl_str, replace_fmt, *cp, str == NULL ? "" : str); + ValNodeAddPointer (&repl_list, 0, repl_str); + if (str != NULL) { + while (*str != 0) { + *cp_dst = *str; + cp_dst++; + str++; + } + } + } else { + *cp_dst = *cp; + cp_dst++; + } + cp++; + } + *text = MemFree (*text); + *text = new_str; + } + return repl_list; +} + + +NLM_EXTERN void AutoReplaceSpecialCharactersWithMessage (CharPtr PNTR text) +{ + ValNodePtr list, vnp; + + list = AutoReplaceSpecialCharactersInText(text); + for (vnp = list; vnp != NULL; vnp = vnp->next) { + Message (MSG_POSTERR, "%s", vnp->data.ptrvalue); + } + list = ValNodeFreeData (list); +} + + +NLM_EXTERN ValNodePtr AutoReplaceSpecialCharactersInTabTable (ValNodePtr row_list) +{ + ValNodePtr repl_list = NULL, col; + CharPtr cp; + + while (row_list != NULL) { + for (col = row_list->data.ptrvalue; col != NULL; col = col->next) { + cp = col->data.ptrvalue; + ValNodeLink (&repl_list, AutoReplaceSpecialCharactersInText(&cp)); + col->data.ptrvalue = cp; + } + row_list = row_list->next; + } + return repl_list; +} + + +NLM_EXTERN void AutoFixSpecialCharactersInEntity (Uint2 entityID) +{ + ValNodePtr bad_list = NULL, vnp, vnp_c; + Char label[2]; + CharPtr repl; + + label[1] = 0; + StringActionInEntity (entityID, FALSE, UPDATE_NEVER, NULL, NULL, NULL, TRUE, + SpecialCharFindWithContext, NULL, &bad_list); + for (vnp = bad_list; vnp != NULL; vnp = vnp->next) + { +#ifdef OS_WINNT + repl = GetSpecialWinCharacterReplacement ((unsigned char) vnp->choice); +#else + repl = GetSpecialMacCharacterReplacement ((unsigned char) vnp->choice); +#endif + label[0] = vnp->choice; + Message (MSG_POSTERR, "Replaced '%s' with '%s'", label, repl == NULL ? "" : repl); + for (vnp_c = vnp->data.ptrvalue; vnp_c != NULL; vnp_c = vnp_c->next) + { + FindReplaceString (vnp_c->data.ptrvalue, label, repl, TRUE, FALSE); + } + } + bad_list = FreeContextList (bad_list); +} + + /* Functions for reassigning affiliations of authors for Flu sequences */ typedef struct authaffil { CharPtr affil; @@ -17056,7 +18172,7 @@ NLM_EXTERN ValNodePtr MakeSplitPubListFromTabList (ValNodePtr PNTR tab_table, Se } -static AuthListPtr GetAuthorListForPub (PubPtr the_pub) +NLM_EXTERN AuthListPtr GetAuthorListForPub (PubPtr the_pub) { CitGenPtr cgp; CitSubPtr csp; @@ -17331,6 +18447,101 @@ static void AddStructuredCommentCallback (BioseqPtr bsp, Pointer data) } +static CharPtr official_prefix_list[] = { + "HIVDataBaseData", + "MIGS-Data", + "MIMS-Data", + "MIENS-Data", + "MIGS:3.0-Data", + "GISAID_EpiFlu(TM)Data", + "FluData", + "EpifluData", + "International Barcode of Life (iBOL)Data", + "Assembly-Data", + "Genome-Assembly-Data", + NULL +}; + + +NLM_EXTERN ValNodePtr GetStructuredCommentPrefixList (void) +{ + ValNodePtr list = NULL; + Int4 i; + + for (i = 0; official_prefix_list[i] != NULL; i++) { + ValNodeAddPointer (&list, 0, StringSave (official_prefix_list[i])); + } + return list; +} + + +static Int4 GetDbnameCoreLen (CharPtr dbname) +{ + Int4 len = StringLen (dbname); + if (len > 4 && StringICmp (dbname + len - 4, "Data") == 0) { + len -= 4; + } + if (len > 1 && StringNICmp (dbname + len - 1, "-", 1) == 0) { + len -= 1; + } + return len; +} + + +static CharPtr MatchesOfficialStructuredCommentDbname (CharPtr dbname) +{ + Int4 i; + Int4 len_orig; + Int4 len_can; + + len_orig = GetDbnameCoreLen (dbname); + for (i = 0; official_prefix_list[i] != NULL; i++) { + len_can = GetDbnameCoreLen (official_prefix_list[i]); + if (len_orig == len_can && StringNICmp (dbname, official_prefix_list[i], len_orig) == 0) { + return official_prefix_list[i]; + } + } + if (StringNICmp (dbname, "HIV-Database", len_orig) == 0) { + return "HIVDatabase"; + } + return NULL; +} + + +NLM_EXTERN CharPtr StructuredCommentDbnameFromString (CharPtr string) +{ + CharPtr dbname, tmp; + Int4 len; + + if (StringHasNoText (string)) { + return NULL; + } + + dbname = StringSave (string + StringSpn (string, "##")); + len = StringLen (dbname); + if (len > 2 && StringCmp (dbname + len - 2, "##") == 0) { + dbname[len - 2] = 0; + len -= 2; + } + if (len > 6 && StringCmp (dbname + len - 6, "-START") == 0) { + dbname[len - 6] = 0; + len -= 6; + } + if (len > 6 && StringCmp (dbname + len - 4, "-END") == 0) { + dbname[len - 4] = 0; + len -= 4; + } + + /* correct for weirdnesses with -data for recognizable prefixes */ + tmp = MatchesOfficialStructuredCommentDbname (dbname); + if (tmp != NULL) { + dbname = MemFree (dbname); + dbname = StringSave (tmp); + } + return dbname; +} + + static CharPtr MakeStructuredCommentPrefixFromString (CharPtr orig) { CharPtr core, new_prefix; @@ -17340,21 +18551,14 @@ static CharPtr MakeStructuredCommentPrefixFromString (CharPtr orig) return StringSave ("##Metadata-START##"); } - core = orig; - while (*core == '#') { - core++; - } + core = StructuredCommentDbnameFromString(orig); core_len = StringLen (core); - if (core_len > 8 && StringICmp (core + core_len - 8, "-START##") == 0) { - core_len -= 8; - } else if (core_len > 6 && StringICmp (core + core_len - 6, "-START") == 0) { - core_len -= 6; - } new_prefix = (CharPtr) MemNew (sizeof (Char) * (11 + core_len)); StringCpy (new_prefix, "##"); StringNCat (new_prefix, core, core_len); StringCat (new_prefix, "-START##"); + core = MemFree (core); return new_prefix; } @@ -17368,21 +18572,14 @@ static CharPtr MakeStructuredCommentSuffixFromString (CharPtr orig) return StringSave ("##Metadata-END##"); } - core = orig; - while (*core == '#') { - core++; - } + core = StructuredCommentDbnameFromString(orig); core_len = StringLen (core); - if (core_len > 6 && StringICmp (core + core_len - 6, "-END##") == 0) { - core_len -= 6; - } else if (core_len > 4 && StringICmp (core + core_len - 4, "-END") == 0) { - core_len -= 4; - } new_suffix = (CharPtr) MemNew (sizeof (Char) * (9 + core_len)); StringCpy (new_suffix, "##"); StringNCat (new_suffix, core, core_len); StringCat (new_suffix, "-END##"); + core = MemFree (core); return new_suffix; } @@ -17460,6 +18657,24 @@ NLM_EXTERN ValNodePtr CreateStructuredCommentsFromRow (ValNodePtr header, ValNod } +NLM_EXTERN void CreateStructuredCommentsForAllFromTable (SeqEntryPtr sep, ValNodePtr header, ValNodePtr line, ValNodePtr PNTR err_list) +{ + ValNodePtr tmp, vnp_l; + UserObjectPtr uop; + + while (line != NULL) { + tmp = CreateStructuredCommentsFromRow (header, line->data.ptrvalue, NULL, err_list); + for (vnp_l = tmp; vnp_l != NULL; vnp_l = vnp_l->next) { + uop = (UserObjectPtr) vnp_l->data.ptrvalue; + VisitBioseqsInSep (sep, uop, AddStructuredCommentCallback); + uop = UserObjectFree (uop); + } + tmp = ValNodeFree (tmp); + line = line->next; + } +} + + NLM_EXTERN ValNodePtr CreateStructuredCommentsFromFile (FILE *fp, SeqEntryPtr sep, Boolean apply_to_all) { ValNodePtr err_list = NULL; @@ -17469,7 +18684,6 @@ NLM_EXTERN ValNodePtr CreateStructuredCommentsFromFile (FILE *fp, SeqEntryPtr se CharPtr bad_id_fmt = "Unable to find sequence for %s"; CharPtr msg; BioseqPtr bsp; - UserObjectPtr uop; SeqDescrPtr sdp; if (fp == NULL || sep == NULL) { @@ -17496,16 +18710,7 @@ NLM_EXTERN ValNodePtr CreateStructuredCommentsFromFile (FILE *fp, SeqEntryPtr se line = table->next; if (apply_to_all) { - while (line != NULL) { - tmp = CreateStructuredCommentsFromRow (header, line->data.ptrvalue, NULL, &err_list); - for (vnp_l = tmp; vnp_l != NULL; vnp_l = vnp_l->next) { - uop = (UserObjectPtr) vnp_l->data.ptrvalue; - VisitBioseqsInSep (sep, uop, AddStructuredCommentCallback); - uop = UserObjectFree (uop); - } - tmp = ValNodeFree (tmp); - line = line->next; - } + CreateStructuredCommentsForAllFromTable (sep, header, line, &err_list); } else { while (line != NULL) { vnp_h = header; @@ -17658,7 +18863,7 @@ static void GetStructuredCommentsForBioseq(BioseqPtr bsp, Pointer data) for (sdp = SeqMgrGetNextDescriptor (bsp, NULL, Seq_descr_user, &context); sdp != NULL; - sdp = sdp->next) { + sdp = SeqMgrGetNextDescriptor (bsp, sdp, Seq_descr_user, &context)) { ValNodeLink (&list, RowFromStructuredComment (sdp->data.ptrvalue, &header)); } @@ -18122,7 +19327,9 @@ static Boolean RemoveDuplicateNestedSetsInSeqEntry (SeqEntryPtr top_sep) sep_prev->next = lower_bssp->seq_set; } sep_tmp = lower_bssp->seq_set; + sep_prev = sep_tmp; while (sep_tmp->next != NULL) { + sep_prev = sep_tmp; sep_tmp = sep_tmp->next; } sep_tmp->next = sep_next; @@ -18131,6 +19338,8 @@ static Boolean RemoveDuplicateNestedSetsInSeqEntry (SeqEntryPtr top_sep) sep->next = NULL; sep = SeqEntryFree (sep); rval = TRUE; + } else { + sep_prev = sep; } sep = sep_next; } @@ -18198,6 +19407,12 @@ NLM_EXTERN CharPtr KeywordForStructuredCommentName (UserObjectPtr uop) keyword = StringSave ("GSC:MIMS:2.1"); } else if (StringCmp (prefix, "##MIENS-Data-START##") == 0) { keyword = StringSave ("GSC:MIENS:2.1"); + } else if (StringCmp (prefix, "##MIGS:3.0-Data-START##") == 0) { + keyword = StringSave ("GSC:MIxS;MIGS:3.0"); + } else if (StringCmp (prefix, "##MIMS:3.0-Data-START##") == 0) { + keyword = StringSave ("GSC:MIxS;MIMS:3.0"); + } else if (StringCmp (prefix, "##MIMARKS:3.0-Data-START##") == 0) { + keyword = StringSave ("GSC:MIxS;MIMARKS:3.0"); } return keyword; @@ -18402,9 +19617,39 @@ NLM_EXTERN void RemoveStructuredCommentKeywords (Uint2 entityID) } +static Boolean StartsWith(CharPtr str, CharPtr start) +{ + Int4 str_len, start_len; + + str_len = StringLen (str); + start_len = StringLen (start); + + if (str_len < start_len || StringNICmp(str, start, start_len) != 0) { + return FALSE; + } else { + return TRUE; + } +} + + +static Boolean EndsWith(CharPtr str, CharPtr end) +{ + Int4 str_len, end_len; + + str_len = StringLen (str); + end_len = StringLen (end); + + if (str_len < end_len || StringICmp(str + str_len - end_len, end) != 0) { + return FALSE; + } else { + return TRUE; + } +} + + static void TrimPrimerSeqJunkFromString (CharPtr str) { - Int4 len; + Int4 len, start_len = 0, end_len = 0; CharPtr src, dst; if (StringHasNoText (str)) { @@ -18412,23 +19657,27 @@ static void TrimPrimerSeqJunkFromString (CharPtr str) } len = StringLen (str); - if (len >= 7 && StringNCmp (str, "5'-", 3) == 0 && StringCmp (str + len - 3, "-3'") == 0) { - src = str + 3; - dst = str; - len -= 6; + if (StartsWith (str, "5'-") || StartsWith (str, "5`-")) { + start_len = 3; + } else if (StartsWith (str, "5-") || StartsWith (str, "5'") || StartsWith (str, "5`")) { + start_len = 2; + } else if (StartsWith (str, "-")) { + start_len = 1; + } - while (len > 0) { - *dst = *src; - src++; - dst++; - len--; - } - *dst = 0; - } else if ((len >= 5 && StringNCmp (str, "5-", 2) == 0 && StringCmp (str + len - 2, "-3") == 0) - || (len >= 5 && StringNCmp (str, "5'", 2) == 0 && StringCmp (str + len - 2, "3'") == 0)) { - src = str + 2; + if (EndsWith (str, "-3'") || EndsWith (str, "-3`")) { + end_len = 3; + } else if (EndsWith (str, "-3") || EndsWith(str, "3'") || EndsWith(str, "3`")) { + end_len = 2; + } else if (EndsWith (str, "-")) { + end_len = 1; + } + + if (end_len > 0 || start_len > 0) { + src = str + start_len; dst = str; - len -= 4; + len -= (end_len + start_len); + while (len > 0) { *dst = *src; src++; @@ -18441,30 +19690,46 @@ static void TrimPrimerSeqJunkFromString (CharPtr str) } +static Boolean TrimJunkFromPrimer (PCRPrimerPtr pp, FILE *log_fp) +{ + CharPtr orig = NULL; + Boolean rval = FALSE; + + if (pp == NULL || StringHasNoText (pp->seq)) { + return FALSE; + } + if (log_fp != NULL) { + orig = StringSave (pp->seq); + } + TrimPrimerSeqJunkFromString (pp->seq); + if (log_fp != NULL && StringCmp (orig, pp->seq) != 0) { + fprintf (log_fp, "Changed primer seq from %s to %s\n", orig, pp->seq); + rval = TRUE; + } + orig = MemFree (orig); + return rval; +} + + static Boolean TrimPrimerSeqJunkOnBioSource (BioSourcePtr biop, FILE *log_fp) { - SubSourcePtr ssp; - CharPtr orig = NULL; - Boolean rval = FALSE; + PCRReactionSetPtr ps; + PCRPrimerPtr pp; + Boolean rval = FALSE; if (biop == NULL) { return FALSE; } - for (ssp = biop->subtype; ssp != NULL; ssp = ssp->next) { - if (ssp->subtype == SUBSRC_fwd_primer_seq - || ssp->subtype == SUBSRC_rev_primer_seq) { - if (log_fp != NULL) { - orig = StringSave (ssp->name); - } - TrimPrimerSeqJunkFromString (ssp->name); - if (log_fp != NULL && StringCmp (orig, ssp->name) != 0) { - fprintf (log_fp, "Changed primer seq from %s to %s\n", orig, ssp->name); - rval = TRUE; - } - orig = MemFree (orig); + for (ps = biop->pcr_primers; ps != NULL; ps = ps->next) { + for (pp = ps->forward; pp != NULL; pp = pp->next) { + rval |= TrimJunkFromPrimer(pp, log_fp); + } + for (pp = ps->reverse; pp != NULL; pp = pp->next) { + rval |= TrimJunkFromPrimer(pp, log_fp); } } + return rval; } @@ -18512,6 +19777,7 @@ static Boolean IsUSA (CharPtr country) { if (StringICmp (country, "USA") == 0 || StringICmp (country, "United States of America") == 0 + || StringICmp (country, "United States") == 0 || StringICmp (country, "U.S.A.") == 0 || StringICmp (country, "U S A") == 0) { return TRUE; @@ -18521,11 +19787,30 @@ static Boolean IsUSA (CharPtr country) } +static void FixStateAbbreviationsInCitSub (CitSubPtr csp, LogInfoPtr lip) +{ + if (csp != NULL && csp->authors != NULL + && csp->authors->affil != NULL + && IsUSA(csp->authors->affil->country)) { + if (StringCmp (csp->authors->affil->country, "USA") != 0) { + if (lip != NULL) { + if (lip->fp != NULL) { + fprintf (lip->fp, "Changed %s to USA\n", csp->authors->affil->country); + } + lip->data_in_log = TRUE; + } + csp->authors->affil->country = MemFree (csp->authors->affil->country); + csp->authors->affil->country = StringSave ("USA"); + } + FixStateAbbreviationsInAffil (csp->authors->affil, NULL); + } +} + + static void AbbreviateCitSubAffilStatesCallback (PubdescPtr pdp, Pointer data) { ValNodePtr vnp; CitSubPtr csp; - CharPtr abbrev; LogInfoPtr lip; if (pdp == NULL) return; @@ -18533,32 +19818,8 @@ static void AbbreviateCitSubAffilStatesCallback (PubdescPtr pdp, Pointer data) for (vnp = pdp->pub; vnp != NULL; vnp = vnp->next) { if (vnp->choice == PUB_Sub) { - csp = (CitSubPtr) vnp->data.ptrvalue; - if (csp != NULL && csp->authors != NULL - && csp->authors->affil != NULL - && IsUSA(csp->authors->affil->country)) { - if (StringCmp (csp->authors->affil->country, "USA") != 0) { - if (lip != NULL) { - if (lip->fp != NULL) { - fprintf (lip->fp, "Changed %s to USA\n", csp->authors->affil->country); - } - lip->data_in_log = TRUE; - } - csp->authors->affil->country = MemFree (csp->authors->affil->country); - csp->authors->affil->country = StringSave ("USA"); - } - abbrev = GetStateAbbreviation (csp->authors->affil->sub); - if (abbrev != NULL) { - if (lip != NULL) { - if (lip->fp != NULL) { - fprintf (lip->fp, "Changed %s to %s\n", csp->authors->affil->sub, abbrev); - } - lip->data_in_log = TRUE; - } - csp->authors->affil->sub = MemFree (csp->authors->affil->sub); - csp->authors->affil->sub = StringSave (abbrev); - } - } + csp = (CitSubPtr) vnp->data.ptrvalue; + FixStateAbbreviationsInCitSub (csp, lip); } } } @@ -18568,6 +19829,7 @@ NLM_EXTERN Boolean FixUsaAndStateAbbreviations (Uint2 entityID, FILE *log_fp) { SeqEntryPtr sep; LogInfoData lid; + SeqSubmitPtr ssp; sep = GetTopSeqEntryForEntityID (entityID); if (sep == NULL) @@ -18576,10 +19838,185 @@ NLM_EXTERN Boolean FixUsaAndStateAbbreviations (Uint2 entityID, FILE *log_fp) MemSet (&lid, 0, sizeof (LogInfoData)); lid.fp = log_fp; VisitPubdescsInSep (sep, &lid, AbbreviateCitSubAffilStatesCallback); + + ssp = FindSeqSubmitForSeqEntry (sep); + if (ssp != NULL && ssp->sub != NULL && ssp->sub->cit != NULL) { + FixStateAbbreviationsInCitSub (ssp->sub->cit, &lid); + } return lid.data_in_log; } +static ValNodePtr FindExonForInterval (BioseqPtr bsp, SeqLocPtr slp, Boolean match_from_exactly, Boolean match_to_exactly) +{ + SeqMgrFeatContext context; + SeqFeatPtr sfp; + ValNodePtr list = NULL; + Int4 from, to, feat_from, feat_to; + Uint1 strand; + SeqPntPtr spp; + SeqIntPtr sint; + + if (slp == NULL) { + return NULL; + } else if (slp->choice == SEQLOC_PNT) { + spp = (SeqPntPtr) slp->data.ptrvalue; + from = spp->point; + to = spp->point; + strand = spp->strand; + } else if (slp->choice == SEQLOC_INT) { + sint = (SeqIntPtr) slp->data.ptrvalue; + from = sint->from; + to = sint->to; + strand = sint->strand; + } else { + return NULL; + } + + for (sfp = SeqMgrGetNextFeature (bsp, NULL, 0, FEATDEF_exon, &context); + sfp != NULL && context.left <= to; + sfp = SeqMgrGetNextFeature (bsp, sfp, 0, FEATDEF_exon, &context)) + { + /* note - have to use location values, rather than context.left and context.right, + * because exon may already have been altered for another mRNA/CDS + */ + if (sfp->location == NULL) { + /* no location */ + continue; + } else if (sfp->location->choice == SEQLOC_PNT) { + spp = (SeqPntPtr) sfp->location->data.ptrvalue; + feat_from = spp->point; + feat_to = spp->point; + } else if (sfp->location->choice == SEQLOC_INT) { + sint = (SeqIntPtr) sfp->location->data.ptrvalue; + feat_from = sint->from; + feat_to = sint->to; + } else { + /* not handling other types of locations */ + continue; + } + if (context.numivals != 1) { + /* not going to match multi-interval exons */ + } else if (match_from_exactly && feat_from != from) { + /* no match on from */ + } else if (!match_from_exactly && (feat_from < from || feat_from > to)) { + /* less restrictive match fails for from */ + } else if (match_to_exactly && feat_to != to) { + /* no match on to */ + } else if (!match_to_exactly && (feat_to > to || feat_to < from)) { + /* less restrictive match fails for to */ + } else if ((strand == Seq_strand_minus && context.strand != Seq_strand_minus) + || (strand != Seq_strand_minus && context.strand == Seq_strand_minus)) { + /* strand match fails */ + } else { + ValNodeAddPointer (&list, OBJ_SEQFEAT, sfp); + } + } + return list; +} + + +static ValNodePtr SaveOrigExonPositions (ValNodePtr exon_list) +{ + ValNodePtr vnp; + SeqFeatPtr exon; + CharPtr orig_loc; + ValNodePtr loc_list = NULL; + + for (vnp = exon_list; vnp != NULL; vnp = vnp->next) + { + exon = (SeqFeatPtr) vnp->data.ptrvalue; + orig_loc = SeqLocPrintUseBestID (exon->location); + ValNodeAddPointer (&loc_list, 0, orig_loc); + } + return loc_list; +} + + +static void FixExonsForInterval (ValNodePtr list, Int4 from_diff, Int4 to_diff) +{ + ValNodePtr vnp; + SeqFeatPtr exon; + SeqPntPtr spp; + SeqIntPtr sint; + + if (list == NULL) { + return; + } + for (vnp = list; vnp != NULL; vnp = vnp->next) { + exon = vnp->data.ptrvalue; + if (exon != NULL && exon->location != NULL) { + if (exon->location->choice == SEQLOC_PNT) { + spp = (SeqPntPtr) exon->location->data.ptrvalue; + sint = SeqIntNew (); + sint->id = spp->id; + spp->id = NULL; + sint->strand = spp->strand; + sint->to = spp->point; + sint->from = spp->point; + spp = SeqPntFree (spp); + exon->location->data.ptrvalue = sint; + } + sint = (SeqIntPtr) exon->location->data.ptrvalue; + sint->from += from_diff; + sint->to += to_diff; + } + } +} + +typedef struct exonloclist { + ValNodePtr feature_list; + ValNodePtr orig_loc_list; +} ExonLocListData, PNTR ExonLocListPtr; + + +static ExonLocListPtr ExonLocListNew (BioseqPtr bsp, SeqLocPtr slp, Boolean match_from_exactly, Boolean match_to_exactly) +{ + ExonLocListPtr el = (ExonLocListPtr) MemNew (sizeof (ExonLocListData)); + el->feature_list = FindExonForInterval(bsp, slp, match_from_exactly, match_to_exactly); + if (el->feature_list == NULL) { + el = MemFree (el); + } else { + el->orig_loc_list = SaveOrigExonPositions(el->feature_list); + } + return el; +} + + +static ExonLocListPtr ExonLocListFree (ExonLocListPtr el) +{ + if (el != NULL) { + el->feature_list = ValNodeFree (el->feature_list); + el->orig_loc_list = ValNodeFreeData (el->orig_loc_list); + el = MemFree (el); + } + return el; +} + + +static void ReportExonLocationChanges (ExonLocListPtr el, LogInfoPtr lip) +{ + ValNodePtr exon_v, orig; + SeqFeatPtr exon; + CharPtr new_loc; + + if (lip == NULL || el == NULL) { + return; + } + for (exon_v = el->feature_list, orig = el->orig_loc_list; exon_v != NULL && orig != NULL; exon_v = exon_v->next, orig = orig->next) { + exon = (SeqFeatPtr) exon_v->data.ptrvalue; + new_loc = SeqLocPrintUseBestID (exon->location); + if (StringCmp (orig->data.ptrvalue, new_loc) != 0) { + if (lip->fp != NULL) { + fprintf (lip->fp, "Adjusted location for splice consensus: %s became %s\n", orig->data.ptrvalue, new_loc); + } + lip->data_in_log = TRUE; + } + new_loc = MemFree (new_loc); + } +} + + static void AdjustForConsensusSpliceCallback (SeqFeatPtr sfp, Pointer data) { SeqLocPtr slp, slp_last = NULL; @@ -18593,6 +20030,7 @@ static void AdjustForConsensusSpliceCallback (SeqFeatPtr sfp, Pointer data) Boolean match; SeqIntPtr sint; SeqPntPtr spp; + ExonLocListPtr last_exon_list = NULL, this_exon_list = NULL; /* variables used for logging change */ CharPtr orig_loc = NULL, new_loc; Boolean changed = FALSE; @@ -18637,9 +20075,13 @@ static void AdjustForConsensusSpliceCallback (SeqFeatPtr sfp, Pointer data) for (slp = sfp->location->data.ptrvalue; slp != NULL; slp = slp->next) { CheckSeqLocForPartial (slp, &partial5, &partial3); exon_len = SeqLocLen (slp); + /* record underlying exon features */ + this_exon_list = ExonLocListNew (bsp, slp, TRUE, TRUE); + if (!first && !partial5 && !partial3_last && (slp_last->choice == SEQLOC_INT || slp_last->choice == SEQLOC_PNT) - && (slp->choice == SEQLOC_INT || slp->choice == SEQLOC_PNT)) { + && (slp->choice == SEQLOC_INT || slp->choice == SEQLOC_PNT)) { + /* check for donor and acceptor pair */ /* maximum search space is beginning of previous exon to end of current exon */ exon_len_last = SeqLocLen (slp_last); @@ -18710,7 +20152,7 @@ static void AdjustForConsensusSpliceCallback (SeqFeatPtr sfp, Pointer data) && buf[len - exon_len - 1 - diff] == 'G' && buf[len - exon_len - 2 - diff] == 'A') { match = TRUE; } else { - diff--; + diff++; } } if (match) { @@ -18728,14 +20170,26 @@ static void AdjustForConsensusSpliceCallback (SeqFeatPtr sfp, Pointer data) sint = (SeqIntPtr) slp->data.ptrvalue; if (sint->strand == Seq_strand_minus) { sint->to += diff; + if (this_exon_list != NULL) { + FixExonsForInterval (this_exon_list->feature_list, 0, diff); + } } else { sint->from -= diff; + if (this_exon_list != NULL) { + FixExonsForInterval (this_exon_list->feature_list, -diff, 0); + } } sint = (SeqIntPtr) slp_last->data.ptrvalue; if (sint->strand == Seq_strand_minus) { sint->from += diff; + if (last_exon_list != NULL) { + FixExonsForInterval (last_exon_list->feature_list, diff, 0); + } } else { sint->to -= diff; + if (last_exon_list != NULL) { + FixExonsForInterval (last_exon_list->feature_list, 0, -diff); + } } changed = TRUE; } @@ -18754,8 +20208,15 @@ static void AdjustForConsensusSpliceCallback (SeqFeatPtr sfp, Pointer data) partial5_last = partial5; partial3_last = partial3; slp_last = slp; + ReportExonLocationChanges (last_exon_list, lip); + last_exon_list = ExonLocListFree (last_exon_list); + last_exon_list = this_exon_list; first = FALSE; } + + ReportExonLocationChanges (last_exon_list, lip); + last_exon_list = ExonLocListFree (last_exon_list); + BioseqUnlock (bsp); if (changed) { @@ -18770,6 +20231,36 @@ static void AdjustForConsensusSpliceCallback (SeqFeatPtr sfp, Pointer data) } +static void AdjustSeqEntryForConsensusSpliceBioseqCallback (BioseqPtr bsp, Pointer data) +{ + SeqDescPtr sdp; + SeqMgrDescContext dcontext; + BioSourcePtr biop; + SeqFeatPtr sfp; + SeqMgrFeatContext fcontext; + + if (bsp == NULL || ISA_aa (bsp->mol)) { + return; + } + + sdp = SeqMgrGetNextDescriptor (bsp, NULL, Seq_descr_source, &dcontext); + if (sdp == NULL || (biop = (BioSourcePtr)sdp->data.ptrvalue) == NULL + || (biop->genome != GENOME_genomic && biop->genome != GENOME_unknown) + || (biop->org != NULL && biop->org->orgname != NULL && StringISearch (biop->org->orgname->lineage, "viruses") != NULL) + || !HasTaxonomyID(biop)) + { + return; + } + + for (sfp = SeqMgrGetNextFeature (bsp, NULL, 0, 0, &fcontext); + sfp != NULL; + sfp = SeqMgrGetNextFeature (bsp, sfp, 0, 0, &fcontext)) + { + AdjustForConsensusSpliceCallback (sfp, data); + } +} + + NLM_EXTERN Boolean AdjustSeqEntryForConsensusSpliceEx (SeqEntryPtr sep, FILE *log_fp) { LogInfoData lid; @@ -18780,7 +20271,7 @@ NLM_EXTERN Boolean AdjustSeqEntryForConsensusSpliceEx (SeqEntryPtr sep, FILE *lo MemSet (&lid, 0, sizeof (LogInfoData)); lid.fp = log_fp; - VisitFeaturesInSep (sep, &lid, AdjustForConsensusSpliceCallback); + VisitBioseqsInSep (sep, &lid, AdjustSeqEntryForConsensusSpliceBioseqCallback); return lid.data_in_log; } @@ -18788,3 +20279,124 @@ NLM_EXTERN void AdjustSeqEntryForConsensusSplice (SeqEntryPtr sep) { AdjustSeqEntryForConsensusSpliceEx (sep, NULL); } + + +NLM_EXTERN CharPtr ValNodeSeqIdName (ValNodePtr vnp) +{ + Char buf[100]; + + if (vnp == NULL || vnp->data.ptrvalue == NULL) + { + return NULL; + } + else + { + SeqIdWrite (vnp->data.ptrvalue, buf, PRINTID_FASTA_SHORT, sizeof (buf) - 1); + return StringSave (buf); + } +} + + +NLM_EXTERN void ValNodeSeqIdFree (ValNodePtr vnp) +{ + if (vnp != NULL && vnp->data.ptrvalue != NULL) + { + vnp->data.ptrvalue = SeqIdFree (vnp->data.ptrvalue); + } +} + + +NLM_EXTERN ValNodePtr ValNodeSeqIdCopy (ValNodePtr vnp) +{ + ValNodePtr vnp_copy = NULL; + if (vnp != NULL) + { + ValNodeAddPointer (&vnp_copy, vnp->choice, SeqIdDup (vnp->data.ptrvalue)); + } + return vnp_copy; +} + +NLM_EXTERN Boolean ValNodeSeqIdMatch (ValNodePtr vnp1, ValNodePtr vnp2) +{ + if (vnp1 == NULL || vnp2 == NULL) + { + return FALSE; + } + if (SeqIdComp (vnp1->data.ptrvalue, vnp2->data.ptrvalue) == SIC_YES) + { + return TRUE; + } + else + { + return FALSE; + } +} + + +NLM_EXTERN ValNodePtr ValNodeSeqIdListFree (ValNodePtr list) +{ + ValNodePtr list_next; + + while (list != NULL) { + list_next = list->next; + list->next = NULL; + list->data.ptrvalue = SeqIdFree (list->data.ptrvalue); + list = ValNodeFree (list); + list = list_next; + } + return list; +} + + +NLM_EXTERN ValNodePtr ValNodeSeqIdListCopy (ValNodePtr list) +{ + ValNodePtr vnp, list_copy = NULL, list_prev = NULL; + + while (list != NULL) { + vnp = ValNodeNew (list_prev); + vnp->data.ptrvalue = SeqIdDup (list->data.ptrvalue); + if (list_copy == NULL) { + list_copy = vnp; + } + list_prev = vnp; + list = list->next; + } + return list_copy; +} + + +NLM_EXTERN ValNodePtr SeqIdListToValNodeSeqIdList (SeqIdPtr sip_list) +{ + SeqIdPtr sip; + ValNodePtr list = NULL, vnp_p = NULL, vnp; + + for (sip = sip_list; sip != NULL; sip = sip->next) { + vnp = ValNodeNew (vnp_p); + if (vnp_p == NULL) { + list = vnp; + } + vnp->data.ptrvalue = SeqIdDup (sip); + vnp_p = vnp; + } + return list; +} + + +NLM_EXTERN SeqIdPtr ValNodeSeqIdListToSeqIdList (ValNodePtr vnp_list) +{ + ValNodePtr vnp; + SeqIdPtr sip_list = NULL, sip_prev = NULL, sip; + + for (vnp = vnp_list; vnp != NULL; vnp = vnp->next) { + sip = SeqIdDup (vnp->data.ptrvalue); + if (sip_prev == NULL) { + sip_list = sip; + } else { + sip_prev->next = sip; + } + sip_prev = sip; + } + return sip_list; +} + + diff --git a/api/sqnutil3.c b/api/sqnutil3.c index 37086009..f38e1df2 100644 --- a/api/sqnutil3.c +++ b/api/sqnutil3.c @@ -29,7 +29,7 @@ * * Version Creation Date: 2/7/00 * -* $Revision: 6.633 $ +* $Revision: 6.762 $ * * File Description: * @@ -712,6 +712,166 @@ NLM_EXTERN void LinkCDSmRNAbyLabel ( VisitBioseqsInSep (sep, NULL, BspLinkCDSmRNAbyLabel); } + +static void MakeOneLink ( + SeqFeatPtr f1, + SeqFeatPtr f2 +) + +{ + ObjectIdPtr oip; + SeqFeatXrefPtr xref; + Int4 id; + + if (f1 == NULL || f2 == NULL || f1->id.choice != 3 || f2->id.choice != 3) { + return; + } + + oip = (ObjectIdPtr) f1->id.value.ptrvalue; + if (oip != NULL && oip->str == NULL) { + id = oip->id; + if (id > 0) { + for (xref = f2->xref; xref != NULL && xref->id.choice != 3; xref = xref->next) continue; + if (xref != NULL) { + oip = (ObjectIdPtr) xref->id.value.ptrvalue; + if (oip != NULL) { + if (oip->str != NULL) { + oip->str = MemFree (oip->str); + } + oip->id = id; + } + } else { + xref = SeqFeatXrefNew (); + if (xref != NULL) { + oip = ObjectIdNew (); + if (oip != NULL) { + oip->id = id; + xref->id.choice = 3; + xref->id.value.ptrvalue = (Pointer) oip; + xref->next = f2->xref; + f2->xref = xref; + } + } + } + } + } +} + + +static void CreateReciprocalLink ( + SeqFeatPtr f1, + SeqFeatPtr f2 +) + +{ + if (f1 == NULL || f2 == NULL || f1->id.choice != 3 || f2->id.choice != 3) { + return; + } + + MakeOneLink (f1, f2); + MakeOneLink (f2, f1); +} + + +static void LinkCDSmRNAbyLabelAndLocationCallback ( + BioseqPtr bsp, + Pointer userdata +) + +{ + SMFeatItemPtr PNTR array; + BioseqExtraPtr bspextra; + Uint2 entityID; + SMFeatItemPtr feat; + Int4 i, j, best_index, best_diff, diff; + Int4 num; + ObjMgrDataPtr omdp; + + if (bsp == NULL) return; + + omdp = SeqMgrGetOmdpForBioseq (bsp); + if (omdp == NULL || omdp->datatype != OBJ_BIOSEQ) return; + + bspextra = (BioseqExtraPtr) omdp->extradata; + if (bspextra == NULL) return; + array = bspextra->featsByLabel; + num = bspextra->numfeats; + if (array == NULL || num < 1) return; + + entityID = bsp->idx.entityID; + if (entityID < 1) { + entityID = ObjMgrGetEntityIDForPointer (omdp->dataptr); + } + + /* labels are all grouped together - for each cds/mRNA in group of identical labels, + * find match with best location. + */ + for (i = 0; i < num - 1; i++) { + feat = array [i]; + if (feat->sfp == NULL) { + continue; + } else if (feat->sfp->xref != NULL) { + /* already assigned feat xref */ + continue; + } else if (feat->sfp->idx.subtype != FEATDEF_CDS && feat->sfp->idx.subtype != FEATDEF_mRNA) { + /* not interested in these feature types */ + } else { + best_index = -1; + for (j = i + 1; j < num && StringCmp (feat->label, array[j]->label) == 0; j++) { + if (array[j]->sfp == NULL) { + /* bad */ + } else if (array[j]->sfp->xref != NULL) { + /* already assigned feat xref */ + } else if (feat->sfp->idx.subtype == FEATDEF_CDS) { + if (array[j]->sfp->idx.subtype != FEATDEF_mRNA) { + /* wrong feature type */ + } else if ((diff = SeqLocAinB (feat->sfp->location, array[j]->sfp->location)) < 0) { + /* locations don't match */ + } else { + if (best_index == -1) { + /* don't have a best yet */ + best_index = j; + best_diff = diff; + } else if (diff < best_diff) { + best_index = j; + best_diff = diff; + } + } + } else if (feat->sfp->idx.subtype == FEATDEF_mRNA) { + if (array[j]->sfp->idx.subtype != FEATDEF_CDS) { + /* wrong feature type */ + } else if ((diff = SeqLocAinB (array[j]->sfp->location, feat->sfp->location)) < 0) { + /* locations don't match */ + } else { + if (best_index == -1) { + /* don't have a best yet */ + best_index = j; + best_diff = diff; + } else if (diff < best_diff) { + best_index = j; + best_diff = diff; + } + } + } + } + if (best_index > -1) { + CreateReciprocalLink (feat->sfp, array[best_index]->sfp); + } + } + } +} + + +NLM_EXTERN void LinkCDSmRNAbyLabelAndLocation ( + SeqEntryPtr sep +) + +{ + AssignFeatureIDs (sep); + VisitBioseqsInSep (sep, NULL, LinkCDSmRNAbyLabelAndLocationCallback); +} + + typedef struct ovpdata { SeqFeatPtr sfp; Char revstr [42]; @@ -1309,6 +1469,44 @@ NLM_EXTERN void StripGeneRnaPcrAsnFilter ( } } +NLM_EXTERN void StripSeqFeatSupportAsnFilter ( + AsnIoPtr aip, + AsnIoPtr aop +) + +{ + AsnModulePtr amp; + AsnTypePtr atp, atp_se, atp_sf; + DataVal dv; + SeqFeatPtr sfp; + SeqFeatSupportPtr support; + + if (aip == NULL || aop == NULL) return; + + amp = AsnAllModPtr (); + if (amp == NULL) return; + atp_se = AsnFind ("Seq-entry"); + atp_sf = AsnFind ("Seq-annot.data.ftable.E"); + if (atp_se == NULL || atp_sf == NULL) return; + + atp = atp_se; + + while ((atp = AsnReadId (aip, amp, atp)) != NULL) { + if (atp == atp_sf) { + sfp = SeqFeatAsnRead (aip, atp); + support = sfp->support; + sfp->support = NULL; + SeqFeatAsnWrite (sfp, aop, atp); + sfp->support = support; + SeqFeatFree (sfp); + } else { + AsnReadVal (aip, atp, &dv); + AsnWrite (aop, atp, &dv); + AsnKillValue (atp, &dv); + } + } +} + /* CautiousSeqEntryCleanup section */ static Boolean EmptyOrNullString (CharPtr str) @@ -2163,6 +2361,7 @@ static FeatdefNameData featdefWithName [] = { { FEATDEF_otherRNA , "misc_RNA" }, { FEATDEF_misc_signal , "misc_signal" }, { FEATDEF_misc_structure , "misc_structure" }, + { FEATDEF_mobile_element , "mobile_element" }, { FEATDEF_modified_base , "modified_base" }, { FEATDEF_mRNA , "mRNA" }, { FEATDEF_NON_STD_RESIDUE , "NonStdRes" }, @@ -2343,7 +2542,8 @@ static CharPtr featurekeys [] = { "ncRNA", "tmRNA", "CloneRef", - "VariationRef" + "VariationRef", + "mobile_element" }; NLM_EXTERN CharPtr FindKeyFromFeatDefType (Uint1 type, Boolean forGBFF) @@ -2364,6 +2564,8 @@ NLM_EXTERN CharPtr FindKeyFromFeatDefType (Uint1 type, Boolean forGBFF) type == FEATDEF_BOND || type == FEATDEF_SITE) { key = "misc_feature"; + } else if (type == FEATDEF_VARIATIONREF) { + key = "variation"; } } @@ -3849,14 +4051,37 @@ static ValNodePtr ParsePCRColonString ( return head; } +static CharPtr FusePrimerNames ( + CharPtr first, + CharPtr second +) + +{ + size_t len; + CharPtr str; + + if (first == NULL) return second; + if (second == NULL) return first; + + len = StringLen (first) + StringLen (second) + 5; + str = MemNew (len); + if (str == NULL) return NULL; + + StringCpy (str, first); + StringCat (str, ":"); + StringCat (str, second); + + return str; +} + static PCRPrimerPtr ModernizePCRPrimerHalf ( CharPtr seq, CharPtr name ) { - CharPtr curr_name = NULL, curr_seq = NULL; - PCRPrimerPtr curr_primer, last_primer = NULL, primer_set = NULL; + CharPtr curr_name = NULL, curr_seq = NULL, fused_name; + PCRPrimerPtr curr_primer = NULL, last_primer = NULL, primer_set = NULL; ValNodePtr name_list, seq_list, name_vnp, seq_vnp; seq_list = ParsePCRColonString (seq); @@ -3865,7 +4090,7 @@ static PCRPrimerPtr ModernizePCRPrimerHalf ( seq_vnp = seq_list; name_vnp = name_list; - while (seq_vnp != NULL || name_vnp != NULL) { + while (seq_vnp != NULL /* || name_vnp != NULL */) { if (seq_vnp != NULL) { curr_seq = (CharPtr) seq_vnp->data.ptrvalue; seq_vnp = seq_vnp->next; @@ -3873,6 +4098,8 @@ static PCRPrimerPtr ModernizePCRPrimerHalf ( if (name_vnp != NULL) { curr_name = (CharPtr) name_vnp->data.ptrvalue; name_vnp = name_vnp->next; + } else { + curr_name = NULL; } curr_primer = (PCRPrimerPtr) MemNew (sizeof (PCRPrimer)); @@ -3890,6 +4117,31 @@ static PCRPrimerPtr ModernizePCRPrimerHalf ( } } + while (name_vnp != NULL && last_primer != NULL) { + curr_name = (CharPtr) name_vnp->data.ptrvalue; + fused_name = FusePrimerNames (last_primer->name, curr_name); + MemFree (last_primer->name); + last_primer->name = StringSaveNoNull (fused_name); + name_vnp = name_vnp->next; + } + + while (name_vnp != NULL && last_primer == NULL) { + curr_name = (CharPtr) name_vnp->data.ptrvalue; + curr_primer = (PCRPrimerPtr) MemNew (sizeof (PCRPrimer)); + if (curr_primer != NULL) { + curr_primer->name = StringSaveNoNull (curr_name); + + if (primer_set == NULL) { + primer_set = curr_primer; + } + if (last_primer != NULL) { + last_primer->next = curr_primer; + } + last_primer = curr_primer; + } + name_vnp = name_vnp->next; + } + ValNodeFreeData (seq_list); ValNodeFreeData (name_list); @@ -3911,7 +4163,7 @@ NLM_EXTERN void ModernizePCRPrimers ( Boolean unlink; if (biop == NULL) return; - if (biop->pcr_primers != NULL) return; + /* if (biop->pcr_primers != NULL) return; */ pset = ParsePCRSet (biop); if (pset == NULL) return; @@ -3944,6 +4196,10 @@ NLM_EXTERN void ModernizePCRPrimers ( FreePCRSet (pset); if (reaction_set != NULL) { + if (last_reaction != NULL) { + /* merge with existing structured pcr_primers */ + last_reaction->next = biop->pcr_primers; + } biop->pcr_primers = reaction_set; ssp = biop->subtype; @@ -5576,6 +5832,13 @@ extern Boolean RemoveSequenceFromAlignments (SeqEntryPtr sep, SeqIdPtr sip) return TRUE; } +static CharPtr evCategoryPrefix [] = { + "", + "COORDINATES: ", + "DESCRIPTION: ", + "EXISTENCE: ", + NULL +}; static CharPtr inferencePrefix [] = { "", @@ -5657,6 +5920,23 @@ static Int2 ValidateInferenceAccession (CharPtr str, Char chr, Boolean fetchAccn return rsult; } +static Char NextColonOrVerticalBar (CharPtr ptr) + +{ + Char ch = '\0'; + + if (ptr == NULL) return ch; + + ch = *ptr; + while (ch != '\0') { + if (ch == ':' || ch == '|') return ch; + ptr++; + ch = *ptr; + } + + return ch; +} + NLM_EXTERN Int2 ValidateInferenceQualifier (CharPtr val, Boolean fetchAccn) { @@ -5664,13 +5944,21 @@ NLM_EXTERN Int2 ValidateInferenceQualifier (CharPtr val, Boolean fetchAccn) Char ch; Boolean has_fetch_function, same_species; size_t len; - CharPtr nxt, ptr, rest, str; + CharPtr nxt, ptr, rest, skip, str; ObjMgrProcPtr ompp = NULL; if (StringHasNoText (val)) return EMPTY_INFERENCE_STRING; - rest = NULL; - best = -1; + skip = NULL; + for (j = 0; evCategoryPrefix [j] != NULL; j++) { + len = StringLen (evCategoryPrefix [j]); + if (StringNICmp (val, evCategoryPrefix [j], len) != 0) continue; + skip = val + len; + } + if (skip != NULL) { + val = skip; + } + for (j = 0; inferencePrefix [j] != NULL; j++) { len = StringLen (inferencePrefix [j]); if (StringNICmp (val, inferencePrefix [j], len) != 0) continue; @@ -5715,27 +6003,38 @@ NLM_EXTERN Int2 ValidateInferenceQualifier (CharPtr val, Boolean fetchAccn) str = StringSave (rest); if (best >= 1 && best <= 7) { - tmprsult = ValidateInferenceAccession (str, ':', fetchAccn, has_fetch_function); - if (tmprsult != VALID_INFERENCE) { - rsult = tmprsult; + ptr = str; + while (ptr != NULL) { + nxt = StringChr (ptr, ','); + if (nxt != NULL) { + *nxt = '\0'; + nxt++; + } + tmprsult = ValidateInferenceAccession (ptr, ':', fetchAccn, has_fetch_function); + if (tmprsult != VALID_INFERENCE) { + rsult = tmprsult; + } + ptr = nxt; } } else if (best == 12) { tmprsult = VALID_INFERENCE; - if (StringChr (str, '|') != NULL) { - ptr = StringRChr (str, ':'); - while (ptr != NULL) { - *ptr = '\0'; - ptr++; - nxt = StringChr (ptr, ','); - if (nxt != NULL) { - *nxt = '\0'; - } - tmprsult = ValidateInferenceAccession (ptr, '|', fetchAccn, has_fetch_function); - if (tmprsult != VALID_INFERENCE) { - rsult = tmprsult; - } - ptr = nxt; + ptr = StringRChr (str, ':'); + if (ptr != NULL) { + *ptr = '\0'; + ptr++; + } + while (ptr != NULL) { + nxt = StringChr (ptr, ','); + if (nxt != NULL) { + *nxt = '\0'; + nxt++; } + ch = NextColonOrVerticalBar (ptr); + tmprsult = ValidateInferenceAccession (ptr, ch, fetchAccn, has_fetch_function); + if (tmprsult != VALID_INFERENCE) { + rsult = tmprsult; + } + ptr = nxt; } } @@ -5804,6 +6103,7 @@ extern void MergeFeatureIntervalsToParts (SeqFeatPtr sfp, Boolean ordered) } extern void ExtendSingleGeneOnMRNA (BioseqPtr bsp, Pointer userdata) + { MolInfoPtr mip; SeqDescrPtr sdp; @@ -5812,6 +6112,8 @@ extern void ExtendSingleGeneOnMRNA (BioseqPtr bsp, Pointer userdata) SeqFeatPtr sfp; SeqMgrFeatContext context; Int4 num_cds = 0; + Int4 num_mrna = 0; + SeqIdPtr sip; SeqLocPtr slp; Boolean partial5, partial3; BioSourcePtr biop; @@ -5866,12 +6168,27 @@ extern void ExtendSingleGeneOnMRNA (BioseqPtr bsp, Pointer userdata) } } else if (sfp->data.choice == SEQFEAT_CDREGION) { num_cds++; - /* skip this seuqence if it has more than one coding region */ + /* skip this sequence if it has more than one coding region */ if (num_cds > 1 && !is_master_seq) { return; } + } else if (sfp->idx.subtype == FEATDEF_mRNA) { + num_mrna++; + /* skip this sequence if it has more than one mRNA */ + if (num_mrna > 1) return; + } + } + + if (gene != NULL && gene->location != NULL) { + slp = gene->location; + if (slp->choice != SEQLOC_INT) { + for (sip = bsp->id; sip != NULL; sip = sip->next) { + /* skip this sequence if it is multi-interval and EMBL or DDBJ */ + if (sip->choice == SEQID_EMBL || sip->choice == SEQID_DDBJ) return; + } } } + if (gene != NULL && BioseqFindFromSeqLoc (gene->location) == bsp) { CheckSeqLocForPartial (gene->location, &partial5, &partial3); has_nulls = LocationHasNullsBetween (gene->location); @@ -5994,6 +6311,30 @@ NewClickableItem } +extern ClickableItemPtr +NewClickableItemNoList +(Uint4 clickable_item_type, + CharPtr description) +{ + ClickableItemPtr dip; + + dip = (ClickableItemPtr) MemNew (sizeof (ClickableItemData)); + if (dip != NULL) + { + dip->clickable_item_type = clickable_item_type; + dip->description = StringSave (description); + dip->callback_func = NULL; + dip->datafree_func = NULL; + dip->callback_data = NULL; + dip->item_list = NULL; + dip->subcategories = NULL; + dip->expanded = FALSE; + dip->level = 0; + } + return dip; +} + + extern ValNodePtr ClickableItemObjectListFree (ValNodePtr vnp) { ValNodePtr vnp_next; @@ -7511,6 +7852,8 @@ static void GeneLocusTagDiscrepancyCallback (ValNodePtr item_list, Pointer userd Message (MSG_OK, "I could launch the editor for the individual gene..."); } +static Boolean IsBacterialBioSource (BioSourcePtr biop); + /* Not WGS, genome, or RefSeq */ static Boolean IsLocationDirSub (SeqLocPtr slp) { @@ -7571,7 +7914,7 @@ static Boolean IsLocationDirSub (SeqLocPtr slp) sdp != NULL && rval; sdp = SeqMgrGetNextDescriptor (bsp, sdp, Seq_descr_source, &context)) { biop = (BioSourcePtr) sdp->data.ptrvalue; - if (biop != NULL && biop->org != NULL && biop->org->orgname != NULL && StringICmp (biop->org->orgname->div, "BCT") == 0) { + if (IsBacterialBioSource(biop)) { rval = FALSE; } } @@ -8348,7 +8691,7 @@ static void FindShortIntronsCallback (SeqFeatPtr sfp, Pointer data) Boolean found_short = FALSE, partial5, partial3; Uint1 strand; - if (sfp == NULL || data == NULL) { + if (sfp == NULL || data == NULL || IsPseudo (sfp)) { return; } if (sfp->idx.subtype == FEATDEF_intron) { @@ -8698,6 +9041,77 @@ CheckFeatureTypeForLocationDiscrepancies } +static Boolean HasLineage (BioSourcePtr biop, CharPtr lineage) +{ + CharPtr forced_lineage; + + forced_lineage = GetAppProperty ("ReportLineage"); + if (StringISearch (forced_lineage, lineage) != NULL) + { + return TRUE; + } + else if (StringHasNoText (forced_lineage) + && biop != NULL && biop->org != NULL && biop->org->orgname != NULL + && StringISearch (biop->org->orgname->lineage, lineage) != NULL) + { + return TRUE; + } + else + { + return FALSE; + } +} + + +static Boolean BioseqHasLineage (BioseqPtr bsp, CharPtr lineage) +{ + SeqMgrDescContext context; + SeqDescrPtr sdp; + BioSourcePtr biop; + CharPtr forced_lineage; + + forced_lineage = GetAppProperty ("ReportLineage"); + if (!StringHasNoText (forced_lineage)) { + if (StringISearch (forced_lineage, lineage) != NULL) + { + return TRUE; + } else { + return FALSE; + } + } else if (bsp == NULL) { + return FALSE; + } + sdp = SeqMgrGetNextDescriptor (bsp, NULL, Seq_descr_source, &context); + if (sdp == NULL || (biop = sdp->data.ptrvalue) == NULL + || biop->org == NULL + || biop->org->orgname == NULL + || StringISearch (biop->org->orgname->lineage, lineage) == NULL) { + return FALSE; + } else { + return TRUE; + } + +} + + +static Boolean IsEukaryoticBioSource (BioSourcePtr biop) +{ + return HasLineage(biop, "Eukaryota"); +} + + +static Boolean IsViralBioSource (BioSourcePtr biop) +{ + return HasLineage(biop, "Viruses"); +} + + +static Boolean IsBacterialBioSource (BioSourcePtr biop) +{ + return HasLineage(biop, "Bacteria"); +} + + static Boolean IsEukaryotic (BioseqPtr bsp) { SeqMgrDescContext context; @@ -8713,8 +9127,7 @@ static Boolean IsEukaryotic (BioseqPtr bsp) || biop->genome == GENOME_chloroplast || biop->genome == GENOME_plastid || biop->genome == GENOME_apicoplast - || biop->org == NULL || biop->org->orgname == NULL - || StringSearch (biop->org->orgname->lineage, "Eukaryota") == NULL) { + || !IsEukaryoticBioSource(biop)) { return FALSE; } else { return TRUE; @@ -9174,6 +9587,20 @@ extern void FindPseudoDiscrepancies (ValNodePtr PNTR discrepancy_list, ValNodePt } + +static Boolean IsProtRefEmpty (ProtRefPtr prp) +{ + if (prp == NULL) { + return TRUE; + } else if (prp->name != NULL || prp->desc != NULL || prp->ec != NULL + || prp->activity != NULL || prp->db != NULL || prp->processed != 0) { + return FALSE; + } else { + return TRUE; + } +} + + NLM_EXTERN void OncallerToolPseudoDiscrepanciesFix (ValNodePtr item_list, Pointer data, LogInfoPtr lip) { ValNodePtr vnp, entityIDList = NULL; @@ -9181,6 +9608,10 @@ NLM_EXTERN void OncallerToolPseudoDiscrepanciesFix (ValNodePtr item_list, Pointe SeqFeatPtr sfp, mrna; CharPtr feat_txt; SeqMgrFeatContext fcontext; + SeqFeatXrefPtr xref, prev_xref, next_xref; + ValNodePtr next_name; + ProtRefPtr prp; + RnaRefPtr rrp; MemSet (&vn, 0, sizeof (ValNode)); vn.choice = OBJ_SEQFEAT; @@ -9209,7 +9640,44 @@ NLM_EXTERN void OncallerToolPseudoDiscrepanciesFix (ValNodePtr item_list, Pointe lip->data_in_log = TRUE; } mrna->pseudo = TRUE; + /* move mRNA product to comment */ + if ((rrp = (RnaRefPtr) mrna->data.value.ptrvalue) != NULL + && rrp->ext.choice == 1) { + SetStringValue (&(mrna->comment), rrp->ext.value.ptrvalue, ExistingTextOption_append_semi); + rrp->ext.value.ptrvalue = MemFree (rrp->ext.value.ptrvalue); + rrp->ext.choice = 0; + } } + + /* move CDS protein name to comment */ + prev_xref = NULL; + for (xref = sfp->xref; xref != NULL; xref = next_xref) { + next_xref = xref->next; + if (xref->data.choice == SEQFEAT_PROT + && (prp = (ProtRefPtr) xref->data.value.ptrvalue) != NULL + && prp->name != NULL + && !StringHasNoText (prp->name->data.ptrvalue)) { + SetStringValue (&(sfp->comment), prp->name->data.ptrvalue, ExistingTextOption_append_semi); + prp->name->data.ptrvalue = MemFree (prp->name->data.ptrvalue); + next_name = prp->name->next; + prp->name->next = NULL; + prp->name = ValNodeFreeData (prp->name); + prp->name = next_name; + if (IsProtRefEmpty(prp)) { + if (prev_xref == NULL) { + sfp->xref = next_xref; + } else { + prev_xref->next = next_xref; + } + xref->next = NULL; + xref = SeqFeatXrefFree (xref); + } else { + prev_xref = xref; + } + } else { + prev_xref = xref; + } + } } } } @@ -9600,7 +10068,7 @@ static void RemoveCodingRegionsWithSuppressionWords (ValNodePtr PNTR cds_list) } field = FeatureFieldNew (); - field->type = Feature_type_cds; + field->type = Macro_feature_type_cds; field->field = ValNodeNew (NULL); field->field->choice = FeatQualChoice_legal_qual; field->field->data.intvalue = Feat_qual_legal_product; @@ -10155,6 +10623,52 @@ extern void FindShortContigs (ValNodePtr PNTR discrepancy_list, ValNodePtr sep_l } } + +static void RemoveShortContigsWithoutAnnotation (ValNodePtr item_list, Pointer data, LogInfoPtr lip) +{ + ValNodePtr vnp, entityIDList = NULL; + BioseqPtr bsp; + SeqFeatPtr sfp; + SeqMgrFeatContext context; + CharPtr txt; + + if (Message (MSG_OKC, "Are you sure you want to remove short contigs without annotation?") == ANS_CANCEL) { + return; + } + + for (vnp = item_list; vnp != NULL; vnp = vnp->next) { + if (vnp->choice == OBJ_BIOSEQ) { + bsp = (BioseqPtr) vnp->data.ptrvalue; + if (bsp->annot == NULL) { + sfp = SeqMgrGetNextFeature (bsp, NULL, 0, 0, &context); + if (sfp == NULL) { + if (lip != NULL) { + lip->data_in_log = TRUE; + if (lip->fp != NULL) { + txt = GetDiscrepancyItemText (vnp); + fprintf (lip->fp, "Removed short contig without annotation: %s\n", txt); + txt = MemFree (txt); + } + } + bsp->idx.deleteme = TRUE; + ValNodeAddInt (&entityIDList, 0, bsp->idx.entityID); + } + } + } + } + + entityIDList = ValNodeSort (entityIDList, SortByIntvalue); + ValNodeUnique (&entityIDList, SortByIntvalue, ValNodeFree); + + for (vnp = entityIDList; vnp != NULL; vnp = vnp->next) { + DeleteMarkedObjects (vnp->data.intvalue, 0, NULL); + ObjMgrSetDirtyFlag (vnp->data.intvalue, TRUE); + ObjMgrSendMsg (OM_MSG_UPDATE, vnp->data.intvalue, 0, 0); + } + entityIDList = ValNodeFree (entityIDList); +} + + static void FindShortSequencesCallback (BioseqPtr bsp, Pointer userdata) { ValNodePtr PNTR bioseq_list; @@ -10473,10 +10987,57 @@ static Boolean DoesStringContainPhrase (CharPtr str, CharPtr phrase, Boolean cas } typedef Boolean (*SuspectProductNameSearchFunc) PROTO ((CharPtr, CharPtr)); +typedef void (*SuspectProductNameReplaceFunc) PROTO ((CharPtr PNTR, CharPtr, CharPtr, SeqFeatPtr)); + +typedef enum { + eSuspectNameType_None = 0, + eSuspectNameType_Typo = 1, + eSuspectNameType_QuickFix, + eSuspectNameType_NoOrganelleForProkaryote, + eSuspectNameType_MightBeNonfunctional, + eSuspectNameType_Database, + eSuspectNameType_RemoveOrganismName, + eSuspectNameType_InappropriateSymbol, + eSuspectNameType_EvolutionaryRelationship, + eSuspectNameType_UseProtein, + eSuspectNameType_Max +} ESuspectNameType; + +static CharPtr suspect_name_category_names[] = { + "Unknown category", + "Typo", + "Quick fix", + "Organelles not appropriate in prokaryote", + "Suspicous phrase; should this be nonfunctional?", + "May contain database identifer more appropriate in note; remove from product name", + "Remove organism from product name", + "Possible parsing error or incorrect formatting; remove inappropriate symbols", + "Implies evolutionary relationship; change to -like protein", + "Use xxx protein or xxx-containing protein", + "Unknown category" +}; + + +static Boolean CategoryOkForBioSource (BioSourcePtr biop, ESuspectNameType name_type) +{ + if (name_type != eSuspectNameType_NoOrganelleForProkaryote) { + return TRUE; + } else if (!HasTaxonomyID (biop)) { + return TRUE; + } else if (IsEukaryoticBioSource(biop)) { + return FALSE; + } else { + return TRUE; + } +} + typedef struct suspectproductname { CharPtr pattern; SuspectProductNameSearchFunc search_func; + ESuspectNameType fix_type; + CharPtr replace_phrase; + SuspectProductNameReplaceFunc replace_func; } SuspectProductNameData, PNTR SuspectProductNamePtr; @@ -10510,185 +11071,34 @@ static Boolean StartsWithPattern (CharPtr pattern, CharPtr search) } -static Boolean ProductContainsTerm (CharPtr pattern, CharPtr search) -{ - CharPtr str; - - /* don't bother searching for c-term or n-term if product name contains "domain" */ - if (StringISearch (search, "domain") != NULL) { - return FALSE; - } +static CharPtr s_putative_replacements[] = { + "possible", + "potential", + "predicted", + "probable", + NULL +}; - str = StringISearch(search, pattern); - /* c-term and n-term must be either first word or separated from other word by space, num, or punct */ - if (str != NULL && (str == search || !isalpha (*(str - 1)))) { - return TRUE; - } else { - return FALSE; - } -} -static Boolean MayContainPlural (CharPtr pattern, CharPtr search) +static Boolean StartsWithPutativeReplacement (CharPtr pattern, CharPtr search) { - CharPtr cp; - Char last_letter, second_to_last_letter, next_letter; - Int4 word_len = 0; - Boolean may_contain_plural = FALSE; - CharPtr word_skip = " ,"; + Int4 i; - if (search == NULL) return FALSE; - cp = search; - while (*cp != 0 && !may_contain_plural) { - word_len = StringCSpn (cp, word_skip); - last_letter = *(cp + word_len - 1); - if (last_letter == 's') { - if (word_len >=5 && StringNCmp (cp + word_len - 5, "trans", 5) == 0) { - /* not plural */ - cp = cp + word_len; - cp += StringSpn (cp, word_skip); - } else if (word_len > 2 - && (second_to_last_letter = *(cp + word_len - 2)) != 's' - && second_to_last_letter != 'i' - && second_to_last_letter != 'u' - && ((next_letter = *(cp + word_len)) == ',' || next_letter == 0)) { - may_contain_plural = TRUE; - } else { - cp = cp + word_len; - cp += StringSpn (cp, word_skip); - } - } else { - cp = cp + word_len; - cp += StringSpn (cp, word_skip); + for (i = 0; s_putative_replacements[i] != NULL; i++) { + if (StartsWithPattern(s_putative_replacements[i], search)) { + return TRUE; } } - return may_contain_plural; -} - - -static CharPtr FindFirstOpen (CharPtr cp) -{ - CharPtr pa, ba; - - if (cp == NULL) { - return NULL; - } - pa = StringChr (cp, '('); - ba = StringChr (cp, '['); - if (pa == NULL) { - return ba; - } else if (ba == NULL || ba > pa) { - return pa; - } else { - return ba; - } -} - - -static Char GetClose (Char ch) -{ - if (ch == '(') { - return ')'; - } else if (ch == '[') { - return ']'; - } else if (ch == '{') { - return '}'; - } else { - return ch; - } + return FALSE; } -static Boolean SkipBracketOrParen (CharPtr bp, CharPtr start, CharPtr PNTR skip_to) +static Boolean MayContainPlural (CharPtr pattern, CharPtr search) { - Boolean rval = FALSE; - CharPtr ep, ns; - - if (bp - start > 2 && StringNCmp (bp - 3, "NAD(P)", 6) == 0) { - rval = TRUE; - *skip_to = bp + 6; - } else if (StringNCmp (bp, "(NAD(P)H)", 9) == 0) { - rval = TRUE; - *skip_to = bp + 9; - } else if (StringNCmp (bp, "(NAD(P))", 8) == 0) { - rval = TRUE; - *skip_to = bp + 8; - } else if (StringNCmp (bp, "(I)", 3) == 0) { - rval = TRUE; - *skip_to = bp + 4; - } else if (StringNCmp (bp, "(II)", 4) == 0) { - rval = TRUE; - *skip_to = bp + 5; - } else if (StringNCmp (bp, "(III)", 5) == 0) { - rval = TRUE; - *skip_to = bp + 6; - } else if (StringNCmp (bp, "(NADPH)", 7) == 0) { - rval = TRUE; - *skip_to = bp + 7; - } else if (StringNCmp (bp, "(NAD+)", 6) == 0) { - rval = TRUE; - *skip_to = bp + 6; - } else if (StringNCmp (bp, "(NAPPH/NADH)", 12) == 0) { - rval = TRUE; - *skip_to = bp + 12; - } else if (StringNCmp (bp, "(NADP+)", 7) == 0) { - rval = TRUE; - *skip_to = bp + 7; - } else if (StringNCmp (bp, "[acyl-carrier protein]", 22) == 0) { - rval = TRUE; - *skip_to = bp + 22; - } else if (StringNCmp (bp, "[acyl-carrier-protein]", 22) == 0) { - rval = TRUE; - *skip_to = bp + 22; - } else if (StringNCmp (bp, "(acyl carrier protein)", 22) == 0) { - rval = TRUE; - *skip_to = bp + 22; - } else { - ns = StringChr (bp + 1, *bp); - ep = StringChr (bp + 1, GetClose(*bp)); - if (ep != NULL && (ns == NULL || ns > ep)) { - if (ep - bp < 5) { - rval = TRUE; - *skip_to = ep + 1; - } else if (ep - bp > 3 && StringNCmp (ep - 3, "ing", 3) == 0) { - rval = TRUE; - *skip_to = ep + 1; - } - } - } - return rval; + return StringMayContainPlural (search); } -static Boolean ContainsNorMoreSetsOfBracketsOrParentheses (CharPtr search, Int4 n) -{ - CharPtr cp, end; - Int4 num_found = 0; - - if (search == NULL) { - return FALSE; - } - - cp = FindFirstOpen(search); - while (num_found < n && cp != NULL && *cp != 0) { - if (SkipBracketOrParen(cp, search, &cp)) { - /* ignore it */ - cp = FindFirstOpen (cp); - } else if ((end = StringChr (cp, GetClose (*cp))) == NULL) { - /* skip, doesn't close the bracket */ - cp = FindFirstOpen (cp + 1); - } else { - cp = FindFirstOpen (end); - num_found ++; - } - } - - if (num_found >= n) { - return TRUE; - } else { - return FALSE; - } -} - static Boolean ContainsBracketsOrParentheses (CharPtr pattern, CharPtr search) { @@ -10731,6 +11141,24 @@ static Boolean BeginsWithPunct (CharPtr pattern, CharPtr search) } +static Boolean BeginsOrEndsWithQuotes (CharPtr pattern, CharPtr search) +{ + Int4 len; + + if (search == NULL) return FALSE; + if (search[0] == '\'' || search[0] == '"') { + return TRUE; + } else { + len = StringLen (search); + if (search[len - 1] == '\'' || search[len - 1] == '"') { + return TRUE; + } else { + return FALSE; + } + } +} + + static Boolean ContainsUnknownName (CharPtr pattern, CharPtr search) { if (StringISearch(search, pattern) != NULL @@ -10810,150 +11238,21 @@ static Boolean NormalSearch (CharPtr pattern, CharPtr search) } -static Boolean FollowedByFamily (CharPtr PNTR str) -{ - Int4 word_len; - - if (str == NULL || *str == NULL || **str == 0) { - return FALSE; - } - - word_len = StringCSpn (*str + 1, " "); - if (*(*str + word_len + 1) != 0 && StringNCmp (*str + word_len + 2, "family", 6) == 0) { - *str = *str + word_len + 7; - return TRUE; - } else { - return FALSE; - } -} - - -static Boolean PrecededByPrefix (CharPtr search, CharPtr cp, CharPtr prefix) -{ - Int4 len; - - if (search == NULL || cp == NULL || StringHasNoText (prefix)) { - return FALSE; - } - len = StringLen (prefix); - if (cp - search >= len && StringNCmp (cp - len, prefix, len) == 0) { - return TRUE; - } else { - return FALSE; - } -} - - -static Boolean InWordBeforeCytochromeOrCoenzyme (CharPtr cp, CharPtr start) -{ - if (cp == NULL) { - return FALSE; - } - - while (cp > start && !isspace (*cp)) { - cp--; - } - if (cp == start) { - return FALSE; - } - while (cp > start && isspace (*cp)) { - cp--; - } - if (cp - start >= 9 && StringNICmp (cp - 9, "cytochrome", 10) == 0) { - return TRUE; - } else if (cp - start >= 7 && StringNCmp (cp - 7, "coenzyme", 8) == 0) { - return TRUE; - } else { - return FALSE; - } -} - - static Boolean ThreeOrMoreNumbersTogether (CharPtr pattern, CharPtr search) { - CharPtr p; - Int4 num_digits = 0; - - if (search == NULL) { - return FALSE; - } - - p = search; - while (*p != 0) { - if (isdigit (*p)) { - if (PrecededByPrefix(search, p, "DUF") - || PrecededByPrefix(search, p, "UPF") - || PrecededByPrefix(search, p, "IS") - || PrecededByPrefix(search, p, "TIGR")) { - p += StrSpn (p, "0123456789") - 1; - num_digits = 0; - } else if (InWordBeforeCytochromeOrCoenzyme (p, search)) { - p += StrSpn (p, "0123456789") - 1; - num_digits = 0; - } else { - num_digits ++; - if (num_digits == 3) { - if (FollowedByFamily (&p)) { - num_digits = 0; - } else { - return TRUE; - } - } - } - } else { - num_digits = 0; - } - p++; - } - return FALSE; + return ContainsThreeOrMoreNumbersTogether (search); } + static Boolean ContainsUnderscore (CharPtr pattern, CharPtr search) { - CharPtr cp; - - if (search == NULL) { - return FALSE; - } - - cp = StringChr (search, '_'); - while (cp != NULL) { - if (FollowedByFamily (&cp)) { - /* search again */ - cp = StringChr (cp, '_'); - } else if (cp - search < 3 || *(cp + 1) == 0) { - return TRUE; - } else if ((StringNCmp (cp - 3, "MFS", 3) == 0 - || StringNCmp (cp - 3, "TPR", 3) == 0 - || StringNCmp (cp - 3, "AAA", 3) == 0) - && isdigit (*(cp + 1)) && !isdigit (*(cp + 2))) { - cp = StringChr (cp + 1, '_'); - } else { - return TRUE; - } - } - return FALSE; + return StringContainsUnderscore (search); } static Boolean PrefixPlusNumbersOnly (CharPtr pattern, CharPtr search) { - Int4 pattern_len, digit_len; - - if (search == NULL) { - return FALSE; - } - pattern_len = StringLen (pattern); - if (pattern_len > 0 && StringNCmp (search, pattern, pattern_len) != 0) { - return FALSE; - } - - digit_len = StringSpn (search + pattern_len, "1234567890"); - if (digit_len > 0 && *(search + pattern_len + digit_len) == 0) { - return TRUE; - } else { - return FALSE; - } + return IsPrefixPlusNumbers (pattern, search); } @@ -11004,45 +11303,20 @@ static Boolean AllCapitalLetters (CharPtr pattern, CharPtr search) static Boolean ContainsUnbalancedParentheses (CharPtr pattern, CharPtr search) { - CharPtr buffer, cp_src; - Int4 pos = 0; - Boolean is_bad = FALSE; - - if (search == NULL) { - return FALSE; - } + return StringContainsUnbalancedParentheses (search); +} - /* note - don't need space for terminating character */ - buffer = MemNew (sizeof (Char) * StringLen (search)); - cp_src = search; - while (*cp_src != 0 && !is_bad) { - if (*cp_src == '(' || *cp_src == '[') { - buffer[pos++] = *cp_src; - } else if (*cp_src == ')') { - if (pos < 1) { - is_bad = TRUE; - } else if (buffer[pos - 1] != '(') { - is_bad = TRUE; - } else { - pos --; - } - } else if (*cp_src == ']') { - if (pos < 1) { - is_bad = TRUE; - } else if (buffer[pos - 1] != '[') { - is_bad = TRUE; - } else { - pos--; - } - } - ++cp_src; - } - if (pos > 0) { - is_bad = TRUE; +static Boolean IsTooLong (CharPtr pattern, CharPtr search) +{ + if (StringISearch (search, "bifunctional") != NULL + || StringISearch (search, "multifunctional") != NULL) { + return FALSE; + } else if (StringLen (search) > 100) { + return TRUE; + } else { + return FALSE; } - buffer = MemFree (buffer); - return is_bad; } @@ -11092,435 +11366,810 @@ static Boolean ContainsDoubleSpace (CharPtr pattern, CharPtr search) } -static CharPtr SummarizeSuspectPhraseFunc (SuspectProductNameSearchFunc s) +static void SimpleReplaceFunc (CharPtr PNTR orig, CharPtr find, CharPtr replace, SeqFeatPtr sfp) { - if (s == NULL) { - return "NULL function"; - } else if (s == EndsWithPattern) { - return "occurs at end of text"; - } else if (s == ContainsWholeWord) { - return "contains phrase as whole word"; - } else if (s == StartsWithPattern) { - return "occurs at beginning of text"; - } else if (s == ContainsWholeWordCaseSensitive) { - return "contains phrase as whole word, case sensitive"; - } else if (s == IsSingleWord) { - return "entire text matches (not case sensitive)"; - } else if (s == IsSingleWordOrWeaselPlusSingleWord) { - return "entire text matches (not case sensitive) or text matches after weasel word"; - } else if (s == NormalSearch) { - return "contains phrase anywhere, not case sensitive"; - } else if (s == ContainsDoubleSpace) { - return "contains double space"; - } else if (s == PrefixPlusNumbersOnly) { - return "entire product is prefix followed by numbers"; + FindReplaceString (orig, find, replace, FALSE, TRUE); +} + + +static void SimpleReplaceAnywhereFunc (CharPtr PNTR orig, CharPtr find, CharPtr replace, SeqFeatPtr sfp) +{ + FindReplaceString (orig, find, replace, FALSE, FALSE); +} + + +static void ReplaceWholeNameFunc (CharPtr PNTR orig, CharPtr find, CharPtr replace, SeqFeatPtr sfp) +{ + if (orig == NULL) { + return; + } + if (IsSingleWordOrWeaselPlusSingleWord(find, *orig)) { + *orig = MemFree (*orig); + *orig = StringSave (replace); + } +} + + +static void ReplaceWholeNameAddNoteFunc (CharPtr PNTR orig, CharPtr find, CharPtr replace, SeqFeatPtr sfp) +{ + if (orig == NULL) { + return; + } + if (IsSingleWordOrWeaselPlusSingleWord(find, *orig)) { + SetStringValue (&(sfp->comment), *orig, ExistingTextOption_append_semi); + *orig = MemFree (*orig); + *orig = StringSave (replace); + } +} + + +static void ReplaceAtFront (CharPtr PNTR orig, CharPtr find, CharPtr replace, SeqFeatPtr sfp) +{ + Int4 orig_len, find_len, replace_len, new_len; + CharPtr new_str; + + if (orig == NULL || find == NULL) { + return; + } + + orig_len = StringLen (*orig); + find_len = StringLen (find); + if (find_len > orig_len || StringNICmp (*orig, find, find_len) != 0) { + return; + } + replace_len = StringLen (replace); + + new_len = orig_len + replace_len - find_len; + new_str = (CharPtr) MemNew (sizeof (Char) * (new_len + 1)); + if (replace_len > 0) { + StringCpy (new_str, replace); + } + StringCat (new_str, (*orig) + find_len); + *orig = MemFree (*orig); + *orig = new_str; +} + + +static void ReplaceAtEnd (CharPtr PNTR orig, CharPtr find, CharPtr replace, SeqFeatPtr sfp) +{ + Int4 orig_len, find_len, replace_len, new_len; + CharPtr new_str; + + if (orig == NULL || find == NULL) { + return; + } + + orig_len = StringLen (*orig); + find_len = StringLen (find); + if (find_len > orig_len || StringICmp ((*orig) + orig_len - find_len, find) != 0) { + return; + } + replace_len = StringLen (replace); + + new_len = orig_len + replace_len - find_len; + new_str = (CharPtr) MemNew (sizeof (Char) * (new_len + 1)); + StringNCpy (new_str, *orig, orig_len - find_len); + if (replace_len > 0) { + StringCat (new_str, replace); + } + *(new_str + new_len) = 0; + *orig = MemFree (*orig); + *orig = new_str; +} + + +static void UsePutative (CharPtr PNTR orig, CharPtr find, CharPtr replace, SeqFeatPtr sfp) +{ + Int4 i; + for (i = 0; s_putative_replacements[i] != NULL; i++) { + ReplaceAtFront (orig, s_putative_replacements[i], "putative", sfp); + } +} + + +static void RemoveBeginningAndEndingQuotes (CharPtr PNTR orig, CharPtr find, CharPtr replace, SeqFeatPtr sfp) +{ + CharPtr src, dst; + Int4 len; + + if (orig == NULL || *orig == NULL || !BeginsOrEndsWithQuotes (NULL, *orig)) { + return; + } + src = *orig; + dst = *orig; + if (*src == '\'' || *src == '"') { + src++; + while (*src != 0) { + *dst = *src; + dst++; + src++; + } + *dst = 0; + } + len = StringLen (*orig); + if ((*orig)[len - 1] == '\'' || (*orig)[len - 1] == '"') { + (*orig)[len - 1] = 0; + } +} + + +static void FixLongProduct (CharPtr PNTR orig, CharPtr find, CharPtr replace, SeqFeatPtr sfp) +{ + Int4 len, keep_len; + if (orig == NULL || *orig == NULL || sfp == NULL || *orig == sfp->comment) { + return; + } + len = StringLen (*orig); + keep_len = StringCSpn (*orig, ",;("); + if (keep_len < len) { + SetStringValue (&(sfp->comment), *orig, ExistingTextOption_append_semi); + *((*orig) + keep_len) = 0; + } +} + + +static void HaemReplaceFunc (CharPtr PNTR orig, CharPtr find, CharPtr replace, SeqFeatPtr sfp) +{ + if (orig == NULL || *orig == NULL) { + return; + } + + FindReplaceString (orig, find, "heme", FALSE, TRUE); + FindReplaceString (orig, find, "hem", FALSE, FALSE); +} + + +static CharPtr SummarizeSuspectPhraseFunc (SuspectProductNameSearchFunc s) +{ + if (s == NULL) { + return "NULL function"; + } else if (s == EndsWithPattern) { + return "occurs at end of text"; + } else if (s == ContainsWholeWord) { + return "contains phrase as whole word"; + } else if (s == StartsWithPattern) { + return "occurs at beginning of text"; + } else if (s == ContainsWholeWordCaseSensitive) { + return "contains phrase as whole word, case sensitive"; + } else if (s == IsSingleWord) { + return "entire text matches (not case sensitive)"; + } else if (s == IsSingleWordOrWeaselPlusSingleWord) { + return "entire text matches (not case sensitive) or text matches after weasel word"; + } else if (s == NormalSearch) { + return "contains phrase anywhere, not case sensitive"; + } else if (s == ContainsDoubleSpace) { + return "contains double space"; + } else if (s == PrefixPlusNumbersOnly) { + return "entire product is prefix followed by numbers"; + } else if (s == IsTooLong) { + return "longer than 50 characters"; } else { return "special rules"; } } +static CharPtr SummarizeSuspectReplacementPhrase (SuspectProductNameReplaceFunc s, CharPtr replace_phrase) +{ + CharPtr phrase = NULL; + CharPtr simple_fmt = "Replace with '%s' (whole word)"; + CharPtr simple_anywhere_fmt = "Replace with '%s'"; + CharPtr whole_fmt = "Replace entire product name with '%s'"; + CharPtr whole_note_fmt = "Move product name to note, use '%s' for product name"; + + + if (s == NULL) { + return StringSave ("No replacement"); + } else if (s == SimpleReplaceFunc) { + phrase = (CharPtr) MemNew (sizeof (Char) * (StringLen (simple_fmt) + StringLen (replace_phrase))); + sprintf (phrase, simple_fmt, replace_phrase); + } else if (s == SimpleReplaceAnywhereFunc) { + phrase = (CharPtr) MemNew (sizeof (Char) * (StringLen (simple_anywhere_fmt) + StringLen (replace_phrase))); + sprintf (phrase, simple_anywhere_fmt, replace_phrase); + } else if (s == FixLongProduct) { + phrase = StringSave ("Truncate at first comma or semicolon"); + } else if (s == UsePutative) { + phrase = StringSave ("Replace with 'putative'"); + } else if (s == ReplaceWholeNameFunc) { + phrase = (CharPtr) MemNew (sizeof (Char) * (StringLen (whole_fmt) + StringLen (replace_phrase))); + sprintf (phrase, whole_fmt, replace_phrase); + } else if (s == ReplaceWholeNameAddNoteFunc) { + phrase = (CharPtr) MemNew (sizeof (Char) * (StringLen (whole_note_fmt) + StringLen (replace_phrase))); + sprintf (phrase, whole_note_fmt, replace_phrase); + } else if (s == ReplaceAtEnd || s == ReplaceAtFront) { + phrase = (CharPtr) MemNew (sizeof (Char) * (StringLen (simple_anywhere_fmt) + StringLen (replace_phrase))); + sprintf (phrase, simple_anywhere_fmt, replace_phrase); + } else { + phrase = StringSave ("Unknown replacement action"); + } + return phrase; +} + + static SuspectProductNameData suspect_product_terms[] = { - { "like", EndsWithPattern }, - { "repeat", EndsWithPattern }, - { "domain", EndsWithPattern }, - { "fold", EndsWithFold }, - { "motif", EndsWithPattern }, - { "related", EndsWithPattern }, - { "binding", EndsWithPattern }, - { "containing", EndsWithPattern }, - { "containing", StartsWithPattern }, - { "from", StartsWithPattern }, - { "N-term", ProductContainsTerm }, - { "N term", ProductContainsTerm }, - { "C-term", ProductContainsTerm }, - { "C term", ProductContainsTerm }, - { "may contain a plural", MayContainPlural }, - { "Brackets or parenthesis [] ()", ContainsBracketsOrParentheses }, - { "Two or more sets of brackets or parentheseis", ContainsTwoSetsOfBracketsOrParentheses }, - { "ending with period, comma, hyphen, underscore, colon, or forward slash", EndsWithPunct }, - { "beginning with period, comma, or hyphen", BeginsWithPunct }, - { "unknown", ContainsUnknownName }, - { "COG", ContainsWholeWordCaseSensitive }, - { "EST", ContainsWholeWordCaseSensitive }, - { "DUF", ContainsWholeWordCaseSensitive }, - { "UPF", ContainsWholeWordCaseSensitive }, - { "DUF", PrefixPlusNumbersOnly }, - { "UPF", PrefixPlusNumbersOnly }, - { "IS", PrefixPlusNumbersOnly }, - { "FOG", ContainsWholeWordCaseSensitive }, - { "Subtilis", ContainsWholeWord }, - { "coli", ContainsWholeWord }, - { "pseudo", ContainsWholeWord }, - { "gene", ContainsWholeWord }, - { "genes", ContainsWholeWord }, - { "homo", ContainsWholeWord }, - { "argininte", ContainsWholeWord }, - { "diacyglycerol", ContainsWholeWord }, - { "glycosy", ContainsWholeWord }, - { "hypothetica", ContainsWholeWord }, - { "ncharacterized", ContainsWholeWord }, - { "obalt", ContainsWholeWord }, - { "odule", ContainsWholeWord }, - { "protei", ContainsWholeWord }, - { "sigm", ContainsWholeWord }, - { "thiamin/thiamin", ContainsWholeWord }, - { "threonin", ContainsWholeWord }, - { "ypothetical", ContainsWholeWord }, - { "ytochrome", ContainsWholeWord }, - { "aminotransferasee", ContainsWholeWord }, - { "bioin", ContainsWholeWord }, - { "biosythesis", ContainsWholeWord }, - { "chelatin", ContainsWholeWord }, - { "componenet", ContainsWholeWord }, - { "familie", ContainsWholeWord }, - { "hexpeptide", ContainsWholeWord }, - { "homocystein", ContainsWholeWord }, - { "initation", ContainsWholeWord }, - { "mobilisation", ContainsWholeWord }, - { "mutatrotase", ContainsWholeWord }, - { "oxidoreductasee", ContainsWholeWord }, - { "periplasmc", ContainsWholeWord }, - { "puter", ContainsWholeWord }, - { "reductasee", ContainsWholeWord }, - { "thioderoxin", ContainsWholeWord }, - { "transferasee", ContainsWholeWord }, - { "protein", IsSingleWord }, - { "putative protein", IsSingleWord }, - { "probable protein", IsSingleWord }, - { "sodium", IsSingleWord }, - { "CHC2 zinc finger", IsSingleWord }, - { "SWIM zinc finger", IsSingleWord }, - { "putative", IsSingleWordOrWeaselPlusSingleWord }, - { "probable", IsSingleWordOrWeaselPlusSingleWord }, - { "protein-containing", IsSingleWordOrWeaselPlusSingleWord }, - { "protein containing", IsSingleWordOrWeaselPlusSingleWord }, - { "transposase of", IsSingleWordOrWeaselPlusSingleWord }, - { "hypothetical", IsSingleWordOrWeaselPlusSingleWord }, - { "conserved hypothetical", IsSingleWordOrWeaselPlusSingleWord }, - { "conserved", IsSingleWordOrWeaselPlusSingleWord }, - { "purine", IsSingleWordOrWeaselPlusSingleWord }, - { "iron", IsSingleWordOrWeaselPlusSingleWord }, - { "phage", IsSingleWordOrWeaselPlusSingleWord }, - { "insertion sequence", IsSingleWordOrWeaselPlusSingleWord }, - { "transposon", IsSingleWordOrWeaselPlusSingleWord }, - { "signal peptide", IsSingleWordOrWeaselPlusSingleWord }, - { "NAD", IsSingleWordOrWeaselPlusSingleWord }, - { "p-loop", IsSingleWordOrWeaselPlusSingleWord }, - { "helix-turn-helix", IsSingleWordOrWeaselPlusSingleWord }, - { "domain family", IsSingleWordOrWeaselPlusSingleWord }, - { "PASTA", IsSingleWordOrWeaselPlusSingleWord }, - { "zinc finger", IsSingleWordOrWeaselPlusSingleWord }, - { "amino acid", IsSingleWordOrWeaselPlusSingleWord }, - { "peptide", IsSingleWordOrWeaselPlusSingleWord }, - { "citrate", IsSingleWordOrWeaselPlusSingleWord }, - { "PTS system", IsSingleWordOrWeaselPlusSingleWord }, - { "putative protein", IsSingleWordOrWeaselPlusSingleWord }, - { "Alanine", IsSingleWordOrWeaselPlusSingleWord }, - { "Arginine", IsSingleWordOrWeaselPlusSingleWord }, - { "Asparagine", IsSingleWordOrWeaselPlusSingleWord }, - { "Aspartic acid", IsSingleWordOrWeaselPlusSingleWord }, - { "Cysteine", IsSingleWordOrWeaselPlusSingleWord }, - { "DNA", IsSingleWordOrWeaselPlusSingleWord }, - { "Glutamic acid", IsSingleWordOrWeaselPlusSingleWord }, - { "Glutamine", IsSingleWordOrWeaselPlusSingleWord }, - { "Glycine", IsSingleWordOrWeaselPlusSingleWord }, - { "Histidine", IsSingleWordOrWeaselPlusSingleWord }, - { "Isoleucine", IsSingleWordOrWeaselPlusSingleWord }, - { "Leucine", IsSingleWordOrWeaselPlusSingleWord }, - { "Lysine", IsSingleWordOrWeaselPlusSingleWord }, - { "Methionine", IsSingleWordOrWeaselPlusSingleWord }, - { "ORF", IsSingleWordOrWeaselPlusSingleWord }, - { "Phenylalanine", IsSingleWordOrWeaselPlusSingleWord }, - { "Proline", IsSingleWordOrWeaselPlusSingleWord }, - { "RNA", IsSingleWordOrWeaselPlusSingleWord }, - { "Serine", IsSingleWordOrWeaselPlusSingleWord }, - { "Threonine", IsSingleWordOrWeaselPlusSingleWord }, - { "Tryptophan", IsSingleWordOrWeaselPlusSingleWord }, - { "Tyrosine", IsSingleWordOrWeaselPlusSingleWord }, - { "Valine", IsSingleWordOrWeaselPlusSingleWord }, - { "adenine", IsSingleWordOrWeaselPlusSingleWord }, - { "barrel", IsSingleWordOrWeaselPlusSingleWord }, - { "carbon", IsSingleWordOrWeaselPlusSingleWord }, - { "cytosine", IsSingleWordOrWeaselPlusSingleWord }, - { "domain", IsSingleWordOrWeaselPlusSingleWord }, - { "domain protein", IsSingleWordOrWeaselPlusSingleWord }, - { "factor", IsSingleWordOrWeaselPlusSingleWord }, - { "family protein", IsSingleWordOrWeaselPlusSingleWord }, - { "finger", IsSingleWordOrWeaselPlusSingleWord }, - { "ggdef", IsSingleWordOrWeaselPlusSingleWord }, - { "guanine", IsSingleWordOrWeaselPlusSingleWord }, - { "helium", IsSingleWordOrWeaselPlusSingleWord }, - { "helix", IsSingleWordOrWeaselPlusSingleWord }, - { "hydrogen", IsSingleWordOrWeaselPlusSingleWord }, - { "hypothetical ORF", IsSingleWordOrWeaselPlusSingleWord }, - { "mRNA", IsSingleWordOrWeaselPlusSingleWord }, - { "membrane", IsSingleWordOrWeaselPlusSingleWord }, - { "ncRNA", IsSingleWordOrWeaselPlusSingleWord }, - { "nitrogen", IsSingleWordOrWeaselPlusSingleWord }, - { "oxygen", IsSingleWordOrWeaselPlusSingleWord }, - { "plasmid", IsSingleWordOrWeaselPlusSingleWord }, - { "precursor", IsSingleWordOrWeaselPlusSingleWord }, - { "protein of unknown function", IsSingleWordOrWeaselPlusSingleWord }, - { "putative conserved hypothetical", IsSingleWordOrWeaselPlusSingleWord }, - { "putative hypothetical", IsSingleWordOrWeaselPlusSingleWord }, - { "putative signal peptide", IsSingleWordOrWeaselPlusSingleWord }, - { "rRNA", IsSingleWordOrWeaselPlusSingleWord }, - { "repeat", IsSingleWordOrWeaselPlusSingleWord }, - { "secreted", IsSingleWordOrWeaselPlusSingleWord }, - { "signal", IsSingleWordOrWeaselPlusSingleWord }, - { "subunit", IsSingleWordOrWeaselPlusSingleWord }, - { "tRNA", IsSingleWordOrWeaselPlusSingleWord }, - { "thymine", IsSingleWordOrWeaselPlusSingleWord }, - { "uracil", IsSingleWordOrWeaselPlusSingleWord }, - { "zinc", IsSingleWordOrWeaselPlusSingleWord }, - { "transport-associated", IsSingleWordOrWeaselPlusSingleWord }, - { "Similar to", NormalSearch }, - { "Related to", NormalSearch }, - { "interrupt", NormalSearch }, - { "Homolog", NormalSearch }, - { "Homologue", NormalSearch }, - { "Fragment", NormalSearch }, - { "Frameshift", NormalSearch }, - { "Intein", NormalSearch }, - { "Chloroplast", NormalSearch }, - { "Mitochondrial", NormalSearch }, - { "puatative", NormalSearch }, - { "putaive", NormalSearch }, - { "putaitve", NormalSearch }, - { "putatitve", NormalSearch }, - { "putataive", NormalSearch }, - { "putatuve", NormalSearch }, - { "ortholog", NormalSearch }, - { "orthologue", NormalSearch }, - { "paralog", NormalSearch }, - { "paralogue", NormalSearch }, - { "bifunctional protein", NormalSearch }, - { "pseudogene", NormalSearch }, - { "frame shift", NormalSearch }, - { "protien", NormalSearch }, - { "partial", NormalSearch }, - { "sphaeroides", NormalSearch }, - { "or related", NormalSearch }, - { "authentic point mutation", NormalSearch }, - { "novel protein", NormalSearch }, - { "ttg start", NormalSearch }, - { "domain protein domain protein", NormalSearch }, - { "deletion", NormalSearch }, - { "truncat", NormalSearch }, - { "hypothteical", NormalSearch }, - { "hypotethical", NormalSearch }, - { "hypothetcial", NormalSearch }, - { "consevered", NormalSearch }, - { "cotaining", NormalSearch }, - { "gIycerol", NormalSearch }, - { "haemagglutination", NormalSearch }, - { "family family", NormalSearch }, - { "domain domain", NormalSearch }, - { "putative, putative", NormalSearch }, - { "putative putative", NormalSearch }, - { "putative probable", NormalSearch }, - { "probable putative", NormalSearch }, - { "similar", NormalSearch }, - { "characterisation", NormalSearch }, - { "uncharacterised", NormalSearch }, - { "putatvie", NormalSearch }, - { "putaitve", NormalSearch }, - { "simmilar", NormalSearch }, - { "ribosoml", NormalSearch }, - { "transcirbed", NormalSearch }, - { "recognised", NormalSearch }, - { "heam", NormalSearch }, - { "haem", NormalSearch }, - { "golgi", NormalSearch }, - { "active site", NormalSearch }, - { "human", NormalSearch }, - { "domian", NormalSearch }, - { "facotr", NormalSearch }, - { "proein", NormalSearch }, - { "trnasporter", NormalSearch }, - { "tranporter", NormalSearch }, - { "proteinn", NormalSearch }, - { "homo sapiens", NormalSearch }, - { "sapiens", NormalSearch }, - { "Transmebrane", NormalSearch }, - { "Transemembrane", NormalSearch }, - { "Intiation", NormalSearch }, - { "Portein", NormalSearch }, - { "protrein", NormalSearch }, - { "hypotehtical", NormalSearch }, - { "K potassium", NormalSearch }, - { "K+ potassium", NormalSearch }, - { "outers", NormalSearch }, - { "weakly conserved", NormalSearch }, - { "highly conserved", NormalSearch }, - { "narrowly conserved", NormalSearch }, - { "No definition line found", NormalSearch }, - { "ECOLI", NormalSearch }, - { "alternate protein name", NormalSearch }, - { "widely conserved", NormalSearch }, - { "putative orphan protein", NormalSearch }, - { "orphan protein", NormalSearch }, - { "Plasmodium", NormalSearch }, - { "bos taurus", NormalSearch }, - { "open reading frame", NormalSearch }, - { "?", NormalSearch }, - { "#", NormalSearch }, - { ". ", NormalSearch }, - { "|", NormalSearch }, - { "=", NormalSearch }, - { "\\-PA", NormalSearch }, - { "_", ContainsUnderscore }, - { "three or more numbers together, not after 'UPF' or 'DUF' or 'IS' and not followed by the word 'family' and not preceded by either 'cytochrome' or 'coenzyme'", ThreeOrMoreNumbersTogether }, - { "putaitive", NormalSearch }, - { "putatve", NormalSearch }, - { "hypothtical", NormalSearch }, - { "hypotheical", NormalSearch }, - { "meausure", NormalSearch }, - { "flageller", NormalSearch }, - { "tumour", NormalSearch }, - { "dimerising", NormalSearch }, - { "dimerisation", NormalSearch }, - { "nucelar", NormalSearch }, - { "nulcear", NormalSearch }, - { "proteine", NormalSearch }, - { "unkown", NormalSearch }, - { "periplsmic", NormalSearch }, - { "molybopterin", NormalSearch }, - { "molydopterin", NormalSearch }, - { "aluminium", NormalSearch }, - { "aminopetidase", NormalSearch }, - { "asparate", NormalSearch }, - { "aparaginase", NormalSearch }, - { "bifunctionnal", NormalSearch }, - { "biosyntesis", NormalSearch }, - { "bnding", NormalSearch }, - { "carboxilic", NormalSearch }, - { "cell divisionFtsK/SpoIIIE", NormalSearch }, - { "coantaining", NormalSearch }, - { "coenzye", NormalSearch }, - { "componnent", NormalSearch }, - { "degration", NormalSearch }, - { "dependant", NormalSearch }, - { "disulphide", NormalSearch }, - { "divison", NormalSearch }, - { "dyhydrogenase", NormalSearch }, - { "glcosyl", NormalSearch }, - { "glucosainyl", NormalSearch }, - { "glutaminne", NormalSearch }, - { "hemelysin", NormalSearch }, - { "hemoglobine", NormalSearch }, - { "histadine", NormalSearch }, - { "homeserine", NormalSearch }, - { "hyphotetical", NormalSearch }, - { "hypotetical", NormalSearch }, - { "hypotheitcal", NormalSearch }, - { "hpothetical", NormalSearch }, - { "inductible", NormalSearch }, - { "majour", NormalSearch }, - { "mambrane", NormalSearch }, - { "meausure", NormalSearch }, - { "membranne", NormalSearch }, - { "methlytransferase", NormalSearch }, - { "metylase", NormalSearch }, - { "monoxyde", NormalSearch }, - { "monoxygenase", NormalSearch }, - { "mulitdrug", NormalSearch }, - { "ndoribonuclease", ContainsWholeWord }, - { "nickle", NormalSearch }, - { "oxidoreductasse", NormalSearch }, - { "oxydase", NormalSearch }, - { "phophate", NormalSearch }, - { "phopho", NormalSearch }, - { "phophoserine", NormalSearch }, - { "phoshate", NormalSearch }, - { "phosphotase", NormalSearch }, - { "posible", NormalSearch }, - { "presursor", NormalSearch }, - { "prortein", NormalSearch }, - { "regulatot", NormalSearch }, - { "resistence", NormalSearch }, - { "serinr", NormalSearch }, - { "signalling", NormalSearch }, - { "spscific", NormalSearch }, - { "stabilisation", NormalSearch }, - { "subnit", NormalSearch }, - { "sulpho", NormalSearch }, - { "sulphur", NormalSearch }, - { "sythase", NormalSearch }, - { "threonin", ContainsWholeWord }, - { "tranferase", NormalSearch }, - { "transebrane", NormalSearch }, - { "transglycolase", NormalSearch }, - { "transorter", NormalSearch }, - { "transpoase", NormalSearch }, - { "transportor", NormalSearch }, - { "transproter", NormalSearch }, - { "transulfuration", NormalSearch }, - { "typr", NormalSearch }, - { "uncharaterized", NormalSearch }, - { "undecapaprenyl", NormalSearch }, - { "utilisation", ContainsWholeWord }, - { "contain", ContainsWholeWord }, - { "start codon", ContainsWholeWord }, - { "Includes:", ContainsWholeWord }, - { "inactivated derivative", ContainsWholeWord }, - { "double space", ContainsDoubleSpace }, - { "all capital letters", AllCapitalLetters }, - { "unbalanced brackets or parentheses", ContainsUnbalancedParentheses }, - /* organism names */ - { "aureus", ContainsWholeWord }, - { "Arabidopsis", ContainsWholeWord }, - { "Aspergillus", ContainsWholeWord }, - { "niger", ContainsWholeWord }, - { "Bacillus", ContainsWholeWord }, - { "Bacteroides", ContainsWholeWord }, - { "B.subtilis", ContainsWholeWord }, - { "Campylobacter", ContainsWholeWord }, - { "cerevisiae", ContainsWholeWord }, - { "Chlamydial", ContainsWholeWord }, - { "Chlamydomonas", ContainsWholeWord }, - { "Drosophila", ContainsWholeWord }, - { "enterica", ContainsWholeWord }, - { "Escherichia", ContainsWholeWord }, - { "E.coli", ContainsWholeWord }, - { "halophilus", ContainsWholeWord }, - { "Helicobacter", ContainsWholeWord }, - { "Jejuni", ContainsWholeWord }, - { "Leishmania", ContainsWholeWord }, - { "Marinococcus", ContainsWholeWord }, - { "mouse", ContainsWholeWord }, - { "Mus musculus", ContainsWholeWord }, - { "Mycobacterium", ContainsWholeWord }, - { "Pestis", ContainsWholeWord }, - { "pseudomonas", ContainsWholeWord }, - { "pombe", ContainsWholeWord }, - { "pylori", ContainsWholeWord }, - { "Tuberculosis", ContainsWholeWord }, - { "rat", ContainsWholeWord }, - { "Rhodobacter", ContainsWholeWord }, - { "Staphylococcus", ContainsWholeWord }, - { "subsp", ContainsWholeWord }, - { "serovar", ContainsWholeWord }, - { "thaliana", ContainsWholeWord }, - { "Typhimurium", ContainsWholeWord }, - { "Salmonella", ContainsWholeWord }, - { "Staphlococcal", ContainsWholeWord }, - { "Staphlococcus", ContainsWholeWord }, - { "staphylococcal", ContainsWholeWord }, - { "sreptomyces", ContainsWholeWord }, - { "Streptococcus", ContainsWholeWord }, - { "streptococcal", ContainsWholeWord }, - { "streptomyces", ContainsWholeWord }, - { "xenopus", ContainsWholeWord }, - { "yeast", ContainsWholeWord }, - { "Yersinia", ContainsWholeWord } + { "beginning with period, comma, or hyphen" , BeginsWithPunct, eSuspectNameType_InappropriateSymbol, NULL, NULL } , + { "begins or ends with quotes", BeginsOrEndsWithQuotes, eSuspectNameType_QuickFix, NULL, RemoveBeginningAndEndingQuotes } , + { "binding" , EndsWithPattern, eSuspectNameType_UseProtein, NULL, NULL } , + { "domain", EndsWithPattern, eSuspectNameType_UseProtein, NULL, NULL } , + { "like" , EndsWithPattern, eSuspectNameType_UseProtein, NULL, NULL } , + { "motif" , EndsWithPattern, eSuspectNameType_UseProtein, NULL, NULL } , + { "related" , EndsWithPattern, eSuspectNameType_UseProtein, NULL, NULL } , + { "repeat", EndsWithPattern, eSuspectNameType_UseProtein, NULL, NULL } , + { "fold" , EndsWithFold, eSuspectNameType_UseProtein, NULL, NULL } , + { "Arabidopsis" , ContainsWholeWord, eSuspectNameType_RemoveOrganismName, NULL, NULL } , + { "Aspergillus" , ContainsWholeWord, eSuspectNameType_RemoveOrganismName, NULL, NULL } , + { "B.subtilis" , ContainsWholeWord, eSuspectNameType_RemoveOrganismName, NULL, NULL } , + { "Bacillus" , ContainsWholeWord, eSuspectNameType_RemoveOrganismName, NULL, NULL } , + { "Bacteroides" , ContainsWholeWord, eSuspectNameType_RemoveOrganismName, NULL, NULL } , + { "Campylobacter" , ContainsWholeWord, eSuspectNameType_RemoveOrganismName, NULL, NULL } , + { "Chlamydial" , ContainsWholeWord, eSuspectNameType_RemoveOrganismName, NULL, NULL } , + { "Chlamydomonas" , ContainsWholeWord, eSuspectNameType_RemoveOrganismName, NULL, NULL } , + { "Drosophila" , ContainsWholeWord, eSuspectNameType_RemoveOrganismName, NULL, NULL } , + { "E.coli" , ContainsWholeWord, eSuspectNameType_RemoveOrganismName, NULL, NULL } , + { "Escherichia" , ContainsWholeWord, eSuspectNameType_RemoveOrganismName, NULL, NULL } , + { "Helicobacter" , ContainsWholeWord, eSuspectNameType_RemoveOrganismName, NULL, NULL } , + { "Includes:" , ContainsWholeWord, eSuspectNameType_InappropriateSymbol, NULL, NULL } , + { "Jejuni" , ContainsWholeWord, eSuspectNameType_RemoveOrganismName, NULL, NULL } , + { "Leishmania" , ContainsWholeWord, eSuspectNameType_RemoveOrganismName, NULL, NULL } , + { "Marinococcus" , ContainsWholeWord, eSuspectNameType_RemoveOrganismName, NULL, NULL } , + { "Mus musculus" , ContainsWholeWord, eSuspectNameType_RemoveOrganismName, NULL, NULL } , + { "Mycobacterium" , ContainsWholeWord, eSuspectNameType_RemoveOrganismName, NULL, NULL } , + { "Pestis" , ContainsWholeWord, eSuspectNameType_RemoveOrganismName, NULL, NULL } , + { "Rhodobacter" , ContainsWholeWord, eSuspectNameType_RemoveOrganismName, NULL, NULL } , + { "Salmonella" , ContainsWholeWord, eSuspectNameType_RemoveOrganismName, NULL, NULL } , + { "Staphlococcal" , ContainsWholeWord, eSuspectNameType_RemoveOrganismName, NULL, NULL } , + { "Staphlococcus" , ContainsWholeWord, eSuspectNameType_RemoveOrganismName, NULL, NULL } , + { "Staphylococcus" , ContainsWholeWord, eSuspectNameType_RemoveOrganismName, NULL, NULL } , + { "Streptococcus" , ContainsWholeWord, eSuspectNameType_RemoveOrganismName, NULL, NULL } , + { "Subtilis" , ContainsWholeWord, eSuspectNameType_RemoveOrganismName, NULL, NULL } , + { "Tuberculosis" , ContainsWholeWord, eSuspectNameType_RemoveOrganismName, NULL, NULL } , + { "Typhimurium" , ContainsWholeWord, eSuspectNameType_RemoveOrganismName, NULL, NULL } , + { "Yersinia" , ContainsWholeWord, eSuspectNameType_RemoveOrganismName, NULL, NULL } , + { "aminotransferasee" , ContainsWholeWord, eSuspectNameType_Typo , "aminotransferase", SimpleReplaceFunc } , + { "arginin " , ContainsWholeWord, eSuspectNameType_Typo , "arginine ", SimpleReplaceFunc } , + { "argininte" , ContainsWholeWord, eSuspectNameType_Typo , "arginine", SimpleReplaceFunc } , + { "aureus" , ContainsWholeWord, eSuspectNameType_RemoveOrganismName, NULL, NULL } , + { "bioin" , ContainsWholeWord, eSuspectNameType_Typo , "biotin", SimpleReplaceFunc } , + { "biosythesis" , ContainsWholeWord, eSuspectNameType_Typo , "biosynthesis", SimpleReplaceFunc } , + { "cerevisiae" , ContainsWholeWord, eSuspectNameType_RemoveOrganismName, NULL, NULL } , + { "chelatin" , ContainsWholeWord, eSuspectNameType_Typo , "chelating", SimpleReplaceFunc } , + { "coli" , ContainsWholeWord, eSuspectNameType_RemoveOrganismName, NULL, NULL } , + { "contain" , ContainsWholeWord, eSuspectNameType_None, NULL, NULL } , + { "deydrogenase" , ContainsWholeWord, eSuspectNameType_Typo, "dehydrogenase", SimpleReplaceFunc } , + { "diacyglycerol" , ContainsWholeWord, eSuspectNameType_Typo, "diacylglycerol", SimpleReplaceFunc } , + { "domainl", ContainsWholeWord, eSuspectNameType_Typo, "domain", SimpleReplaceFunc } , + { "enterica" , ContainsWholeWord, eSuspectNameType_RemoveOrganismName, NULL, NULL } , + { "exporte" , ContainsWholeWord, eSuspectNameType_Typo, "exported", SimpleReplaceFunc } , + { "familie" , ContainsWholeWord, eSuspectNameType_Typo, "family", SimpleReplaceFunc } , + { "gene" , ContainsWholeWord, eSuspectNameType_None, NULL, NULL } , + { "genes" , ContainsWholeWord, eSuspectNameType_None, NULL, NULL } , + { "glycin" , ContainsWholeWord, eSuspectNameType_Typo, "glycine", SimpleReplaceFunc } , + { "glycosy" , ContainsWholeWord, eSuspectNameType_Typo, "glucosyl", SimpleReplaceFunc } , + { "halophilus" , ContainsWholeWord, eSuspectNameType_RemoveOrganismName, NULL, NULL } , + { "hemaggltinin" , ContainsWholeWord, eSuspectNameType_Typo, "hemagglutinin", SimpleReplaceFunc } , + { "hexpeptide" , ContainsWholeWord, eSuspectNameType_Typo, "hexapeptide", SimpleReplaceFunc } , + { "histide" , ContainsWholeWord, eSuspectNameType_Typo, "histidine", SimpleReplaceFunc } , + { "homo" , ContainsWholeWord, eSuspectNameType_RemoveOrganismName, NULL, NULL } , + { "homocystein" , ContainsWholeWord, eSuspectNameType_Typo, "homocysteine", SimpleReplaceFunc } , + { "hyp domain protein" , IsSingleWord, eSuspectNameType_Typo, "hypothetical protein", SimpleReplaceFunc }, + { "hypot" , ContainsWholeWord, eSuspectNameType_Typo, "hypothetical", SimpleReplaceFunc } , + { "hypothe" , ContainsWholeWord, eSuspectNameType_Typo, "hypothetical", SimpleReplaceFunc } , + { "hypothet" , ContainsWholeWord, eSuspectNameType_Typo, "hypothetical", SimpleReplaceFunc } , + { "hypothetic" , ContainsWholeWord, eSuspectNameType_Typo, "hypothetical", SimpleReplaceFunc } , + { "hypothetica" , ContainsWholeWord, eSuspectNameType_Typo, "hypothetical", SimpleReplaceFunc } , + { "hypothetical domain protein" , IsSingleWord, eSuspectNameType_Typo, "hypothetical protein", SimpleReplaceFunc }, + { "inactivated derivative" , ContainsWholeWord, eSuspectNameType_None, NULL, NULL } , + { "initation" , ContainsWholeWord, eSuspectNameType_Typo, "initiation", SimpleReplaceFunc } , + { "invertion" , ContainsWholeWord, eSuspectNameType_Typo, "inversion", SimpleReplaceFunc } , + { "isomaerase" , ContainsWholeWord, eSuspectNameType_Typo, "isomerase", SimpleReplaceFunc } , + { "mobilisation" , ContainsWholeWord, eSuspectNameType_Typo, "mobilization", SimpleReplaceFunc } , + { "mouse" , ContainsWholeWord, eSuspectNameType_RemoveOrganismName, NULL, NULL } , + { "mutatrotase" , ContainsWholeWord, eSuspectNameType_Typo, "mutarotase", SimpleReplaceFunc } , + { "ncharacterized" , ContainsWholeWord, eSuspectNameType_Typo, "uncharacterized", SimpleReplaceFunc } , + { "ndoribonuclease" , ContainsWholeWord, eSuspectNameType_Typo, "endoribonuclease", SimpleReplaceFunc } , + { "niger" , ContainsWholeWord, eSuspectNameType_RemoveOrganismName, NULL, NULL } , + { "ntegral " , ContainsWholeWord, eSuspectNameType_Typo, "integral ", SimpleReplaceFunc } , + { "obalt" , ContainsWholeWord, eSuspectNameType_Typo, "cobalt", SimpleReplaceFunc } , + { "odule" , ContainsWholeWord, eSuspectNameType_None, NULL, NULL } , + { "orf, hyp" , IsSingleWord, eSuspectNameType_Typo, "hypothetical protein", SimpleReplaceFunc }, + { "orf, hypothetical" , IsSingleWord, eSuspectNameType_Typo, "hypothetical protein", SimpleReplaceFunc }, + { "oxidoreductasee" , ContainsWholeWord, eSuspectNameType_Typo, "oxidoreductase", SimpleReplaceFunc } , + { "oxidoredutase" , ContainsWholeWord, eSuspectNameType_Typo, "oxidoreductase", SimpleReplaceFunc } , + { "periplamic" , ContainsWholeWord, eSuspectNameType_Typo, "periplasmic", SimpleReplaceFunc } , + { "periplasmc" , ContainsWholeWord, eSuspectNameType_Typo, "periplasmic", SimpleReplaceFunc } , + { "phosphatidyltransferse" , ContainsWholeWord, eSuspectNameType_Typo, "phosphatidyltransferase", SimpleReplaceFunc } , + { "phosphopantethiene" , ContainsWholeWord, eSuspectNameType_Typo, "phosphopantetheine", SimpleReplaceFunc } , + { "pombe" , ContainsWholeWord, eSuspectNameType_RemoveOrganismName, NULL, NULL } , + { "portein" , ContainsWholeWord, eSuspectNameType_Typo, "protein", SimpleReplaceFunc } , + { "protei" , ContainsWholeWord, eSuspectNameType_Typo, "protein", SimpleReplaceFunc } , + { "protwin" , ContainsWholeWord, eSuspectNameType_Typo, "protein", SimpleReplaceFunc } , + { "pseudo" , ContainsWholeWord, eSuspectNameType_MightBeNonfunctional, NULL, NULL } , + { "pseudomonas" , ContainsWholeWord, eSuspectNameType_RemoveOrganismName, NULL, NULL } , + { "puter" , ContainsWholeWord, eSuspectNameType_Typo, "outer", SimpleReplaceFunc } , + { "pylori" , ContainsWholeWord, eSuspectNameType_RemoveOrganismName, NULL, NULL } , + { "rat" , ContainsWholeWord, eSuspectNameType_RemoveOrganismName, NULL, NULL } , + { "reductasee" , ContainsWholeWord, eSuspectNameType_Typo, "reductase", SimpleReplaceFunc } , + { "rsponse" , ContainsWholeWord, eSuspectNameType_Typo, "response", SimpleReplaceFunc } , + { "serovar" , ContainsWholeWord, eSuspectNameType_RemoveOrganismName, NULL, NULL } , + { "sigm" , ContainsWholeWord, eSuspectNameType_Typo, "sigma", NULL } , + { "sreptomyces" , ContainsWholeWord, eSuspectNameType_None, NULL, NULL } , + { "staphylococcal" , ContainsWholeWord, eSuspectNameType_RemoveOrganismName, NULL, NULL } , + { "start codon" , ContainsWholeWord, eSuspectNameType_MightBeNonfunctional, NULL, NULL } , + { "streptococcal" , ContainsWholeWord, eSuspectNameType_RemoveOrganismName, NULL, NULL } , + { "streptomyces" , ContainsWholeWord, eSuspectNameType_RemoveOrganismName, NULL, NULL } , + { "subsp" , ContainsWholeWord, eSuspectNameType_RemoveOrganismName, NULL, NULL } , + { "tetracenpmycin" , ContainsWholeWord, eSuspectNameType_Typo, "tetracenomycin", SimpleReplaceFunc } , + { "thaliana" , ContainsWholeWord, eSuspectNameType_RemoveOrganismName, NULL, NULL } , + { "thiamin/thiamin" , ContainsWholeWord, eSuspectNameType_Typo, "thiamin/thiamine", SimpleReplaceFunc } , + { "thioderoxin" , ContainsWholeWord, eSuspectNameType_Typo, "thioredoxin", SimpleReplaceFunc } , + { "threonin" , ContainsWholeWord, eSuspectNameType_Typo, "threonine", SimpleReplaceFunc } , + { "transcrIptional" , ContainsWholeWordCaseSensitive, eSuspectNameType_Typo, "transcriptional", SimpleReplaceFunc } , + { "transemembrane" , ContainsWholeWord, eSuspectNameType_Typo, "transmembrane", SimpleReplaceFunc } , + { "transferasee" , ContainsWholeWord, eSuspectNameType_Typo, "transferase", SimpleReplaceFunc } , + { "transmebrane" , ContainsWholeWord, eSuspectNameType_Typo, "transmembrane", SimpleReplaceFunc } , + { "unkn", IsSingleWord, eSuspectNameType_None, "hypothetical protein", SimpleReplaceFunc }, + { "unnamed" , ContainsWholeWord, eSuspectNameType_None, NULL, NULL } , + { "utilisation" , ContainsWholeWord, eSuspectNameType_Typo, "utilization", SimpleReplaceFunc } , + { "xenopus" , ContainsWholeWord, eSuspectNameType_RemoveOrganismName, NULL, NULL } , + { "yeast" , ContainsWholeWord, eSuspectNameType_RemoveOrganismName, NULL, NULL } , + { "ypothetical" , ContainsWholeWord, eSuspectNameType_Typo, "hypothetical", SimpleReplaceFunc } , + { "ytochrome" , ContainsWholeWord, eSuspectNameType_Typo, "cytochrome", SimpleReplaceFunc } , + { "containing" , StartsWithPattern, eSuspectNameType_None, NULL, NULL } , + { "from" , StartsWithPattern, eSuspectNameType_None, NULL, NULL } , + { "CHC2 zinc finger" , IsSingleWord, eSuspectNameType_UseProtein, NULL, NULL } , + { "SWIM zinc finger" , IsSingleWord, eSuspectNameType_UseProtein, NULL, NULL } , + { "probable protein" , IsSingleWord, eSuspectNameType_None, NULL, NULL } , + { "protein" , IsSingleWord, eSuspectNameType_None, NULL, NULL } , + { "sodium" , IsSingleWord, eSuspectNameType_None, NULL, NULL } , + { "IS" , PrefixPlusNumbersOnly, eSuspectNameType_None, NULL, NULL } , + { "three or more numbers together, not after 'UPF' or 'DUF' or 'IS' and not followed by the word 'family' and not preceded by either 'cytochrome' or 'coenzyme'" , ThreeOrMoreNumbersTogether, + eSuspectNameType_Database, NULL, NULL } , + { "all capital letters" , AllCapitalLetters, eSuspectNameType_InappropriateSymbol, NULL, NULL } , + { "#" , NormalSearch, eSuspectNameType_InappropriateSymbol, NULL, NULL } , + { ". " , NormalSearch, eSuspectNameType_InappropriateSymbol, NULL, NULL } , + { "=" , NormalSearch, eSuspectNameType_InappropriateSymbol, NULL, NULL } , + { "?" , NormalSearch, eSuspectNameType_InappropriateSymbol, NULL, NULL } , + { "%" , NormalSearch, eSuspectNameType_None, NULL, NULL } , + { "Chloroplast" , NormalSearch, eSuspectNameType_NoOrganelleForProkaryote, NULL, NULL } , + { "ECOLI" , NormalSearch, eSuspectNameType_RemoveOrganismName, NULL, NULL } , + { "Fragment" , NormalSearch, eSuspectNameType_MightBeNonfunctional, NULL, NULL } , + { "Frameshift" , NormalSearch, eSuspectNameType_MightBeNonfunctional, NULL, NULL } , + { "Homolog" , NormalSearch, eSuspectNameType_EvolutionaryRelationship, NULL, NULL } , + { "Intein" , NormalSearch, eSuspectNameType_None, NULL, NULL } , + { "Intiation" , NormalSearch, eSuspectNameType_Typo, "initiation", SimpleReplaceFunc } , + { "K potassium" , NormalSearch, eSuspectNameType_None, NULL, NULL } , + { "K+ potassium" , NormalSearch, eSuspectNameType_None, NULL, NULL } , + { "Mitochondrial" , NormalSearch, eSuspectNameType_NoOrganelleForProkaryote, NULL, NULL } , + { "No definition line found" , NormalSearch, eSuspectNameType_None, NULL, NULL } , + { "Plasmodium" , NormalSearch, eSuspectNameType_RemoveOrganismName, NULL, NULL } , + { "Portein" , NormalSearch, eSuspectNameType_None, NULL, NULL } , + { "Related to" , NormalSearch, eSuspectNameType_None, NULL, NULL } , + { "Similar to" , NormalSearch, eSuspectNameType_None, NULL, NULL } , + { "Transemembrane" , NormalSearch, eSuspectNameType_None, NULL, NULL } , + { "Transmebrane" , NormalSearch, eSuspectNameType_None, NULL, NULL } , + { "\\-PA" , NormalSearch, eSuspectNameType_InappropriateSymbol, NULL, NULL } , + { "accessroy" , NormalSearch, eSuspectNameType_Typo, "accessory", SimpleReplaceFunc } , + { "aceytltranferase" , NormalSearch, eSuspectNameType_Typo, "acetyltransferase", SimpleReplaceFunc } , + { "active site" , NormalSearch, eSuspectNameType_UseProtein, NULL, NULL } , + { "adenylattransferase" , NormalSearch, eSuspectNameType_Typo, "adenylate transferase", SimpleReplaceFunc } , + { "adenylytransferase" , NormalSearch, eSuspectNameType_Typo, "adenylyltransferase", SimpleReplaceFunc } , + { "alternate protein name" , NormalSearch, eSuspectNameType_None, NULL, NULL } , + { "aluminium" , NormalSearch, eSuspectNameType_Typo, "aluminum", SimpleReplaceFunc } , + { "aminopetidase" , NormalSearch, eSuspectNameType_Typo, "aminopeptidase", SimpleReplaceFunc } , + { "aparaginase" , NormalSearch, eSuspectNameType_Typo, "asparaginase", SimpleReplaceFunc } , + { "asparate" , NormalSearch, eSuspectNameType_Typo, "aspartate", SimpleReplaceFunc } , + { "authentic point mutation" , NormalSearch, eSuspectNameType_MightBeNonfunctional, NULL, NULL } , + { "bifunctional" , NormalSearch, eSuspectNameType_None, NULL, NULL } , + { "bifunctionnal" , NormalSearch, eSuspectNameType_Typo, "bifunctional", SimpleReplaceFunc } , + { "bigenesis" , NormalSearch, eSuspectNameType_Typo, "biogenesis", SimpleReplaceFunc } , + { "biosyntesis" , NormalSearch, eSuspectNameType_Typo, "biosynthesis", SimpleReplaceFunc } , + { "bnding" , NormalSearch, eSuspectNameType_Typo, "binding", SimpleReplaceFunc } , + { "bos taurus" , NormalSearch, eSuspectNameType_RemoveOrganismName, NULL, NULL } , + { "carboxilic" , NormalSearch, eSuspectNameType_Typo, "carboxylic", SimpleReplaceFunc } , + { "cell divisionFtsK/SpoIIIE" , NormalSearch, eSuspectNameType_None, NULL, NULL } , + { "characteris" , NormalSearch, eSuspectNameType_Typo, "characteriz", SimpleReplaceAnywhereFunc } , + { "coantaining" , NormalSearch, eSuspectNameType_Typo, "containing", SimpleReplaceFunc } , + { "coenzye" , NormalSearch, eSuspectNameType_Typo, "coenzyme", SimpleReplaceFunc } , + { "componenet" , NormalSearch, eSuspectNameType_Typo, "component", SimpleReplaceFunc } , + { "componnent" , NormalSearch, eSuspectNameType_Typo, "component", SimpleReplaceFunc } , + { "consevered" , NormalSearch, eSuspectNameType_Typo, "conserved", SimpleReplaceFunc } , + { "containg" , NormalSearch, eSuspectNameType_Typo, "containing", SimpleReplaceFunc } , + { "cotaining" , NormalSearch, eSuspectNameType_Typo, "containing", SimpleReplaceFunc } , + { "degration" , NormalSearch, eSuspectNameType_Typo, "degradation", SimpleReplaceFunc } , + { "deletion" , NormalSearch, eSuspectNameType_MightBeNonfunctional, NULL, NULL } , + { "dependant" , NormalSearch, eSuspectNameType_Typo, "dependent", SimpleReplaceFunc } , + { "dimerisation" , NormalSearch, eSuspectNameType_Typo, "dimerization", SimpleReplaceFunc } , + { "dimerising" , NormalSearch, eSuspectNameType_Typo, "dimerizing", SimpleReplaceFunc } , + { "dioxyenase" , NormalSearch, eSuspectNameType_Typo, "dioxygenase", SimpleReplaceFunc } , + { "disulphide" , NormalSearch, eSuspectNameType_Typo, "disulfide", SimpleReplaceFunc } , + { "divison" , NormalSearch, eSuspectNameType_Typo, "division", SimpleReplaceFunc } , + { "domain domain" , NormalSearch, eSuspectNameType_None, NULL, NULL } , + { "domain protein domain protein" , NormalSearch, eSuspectNameType_None, NULL, NULL } , + { "domian" , NormalSearch, eSuspectNameType_Typo, "domain", SimpleReplaceFunc } , + { "dyhydrogenase" , NormalSearch, eSuspectNameType_None, NULL, NULL } , + { "dyhydrogenase" , NormalSearch, eSuspectNameType_Typo, "dehydrogenase", SimpleReplaceFunc } , + { "enentioselective" , NormalSearch, eSuspectNameType_Typo, "enantioselective", SimpleReplaceFunc } , + { "facotr" , NormalSearch, eSuspectNameType_Typo, "factor", SimpleReplaceFunc } , + { "fagella", NormalSearch, eSuspectNameType_Typo, "flagella", SimpleReplaceFunc } , + { "family family" , NormalSearch, eSuspectNameType_Typo, "family", SimpleReplaceFunc } , + { "flageller" , NormalSearch, eSuspectNameType_Typo, "flagellar", SimpleReplaceFunc } , + { "frame shift" , NormalSearch, eSuspectNameType_MightBeNonfunctional, NULL, NULL } , + { "gIycerol" , NormalSearch, eSuspectNameType_Typo, "glycerol", SimpleReplaceFunc } , + { "glcosyl" , NormalSearch, eSuspectNameType_Typo, "glycosyl", SimpleReplaceFunc } , + { "glucosainyl" , NormalSearch, eSuspectNameType_Typo, "glucosaminyl", SimpleReplaceFunc } , + { "glutaminne" , NormalSearch, eSuspectNameType_Typo, "glutamine", SimpleReplaceFunc } , + { "golgi" , NormalSearch, eSuspectNameType_NoOrganelleForProkaryote, NULL, NULL } , + { "haem" , NormalSearch, eSuspectNameType_Typo, "heme", HaemReplaceFunc } , + { "haemagglutination" , NormalSearch, eSuspectNameType_Typo, "hemagglutination", SimpleReplaceFunc } , + { "heam" , NormalSearch, eSuspectNameType_Typo, "heme", HaemReplaceFunc } , + { "hemelysin" , NormalSearch, eSuspectNameType_Typo, "hemolysin", SimpleReplaceFunc } , + { "hemoglobine" , NormalSearch, eSuspectNameType_Typo, "hemoglobin", SimpleReplaceFunc } , + { "hexapaptide" , NormalSearch, eSuspectNameType_Typo, "hexapeptide", SimpleReplaceFunc } , + { "highly conserved" , NormalSearch, eSuspectNameType_None, NULL, NULL } , + { "histadine" , NormalSearch, eSuspectNameType_Typo, "histidine", SimpleReplaceFunc } , + { "homeserine" , NormalSearch, eSuspectNameType_Typo, "homoserine", SimpleReplaceFunc } , + { "homo sapiens" , NormalSearch, eSuspectNameType_RemoveOrganismName, NULL, NULL } , + { "hpothetical" , NormalSearch, eSuspectNameType_Typo, "hypothetical", SimpleReplaceFunc } , + { "human" , NormalSearch, eSuspectNameType_RemoveOrganismName, NULL, NULL } , + { "hyphotetical" , NormalSearch, eSuspectNameType_Typo, "hypothetical", SimpleReplaceFunc } , + { "hyphotheical" , NormalSearch, eSuspectNameType_Typo, "hypothetical", SimpleReplaceFunc } , + { "hypotehtical" , NormalSearch, eSuspectNameType_Typo, "hypothetical", SimpleReplaceFunc } , + { "hypotethical" , NormalSearch, eSuspectNameType_Typo, "hypothetical", SimpleReplaceFunc } , + { "hypotetical" , NormalSearch, eSuspectNameType_Typo, "hypothetical", SimpleReplaceFunc } , + { "hypotheical" , NormalSearch, eSuspectNameType_Typo, "hypothetical", SimpleReplaceFunc } , + { "hypotheitcal" , NormalSearch, eSuspectNameType_Typo, "hypothetical", SimpleReplaceFunc } , + { "hypothetcial" , NormalSearch, eSuspectNameType_Typo, "hypothetical", SimpleReplaceFunc } , + { "hypothteical" , NormalSearch, eSuspectNameType_Typo, "hypothetical", SimpleReplaceFunc } , + { "hypothtical" , NormalSearch, eSuspectNameType_Typo, "hypothetical", SimpleReplaceFunc } , + { "hypthetical" , NormalSearch, eSuspectNameType_Typo, "hypothetical", SimpleReplaceFunc } , + { "hyptothetical" , NormalSearch, eSuspectNameType_Typo, "hypothetical", SimpleReplaceFunc } , + { "inductible" , NormalSearch, eSuspectNameType_Typo, "inducible", SimpleReplaceFunc } , + { "interrupt" , NormalSearch, eSuspectNameType_MightBeNonfunctional, NULL, NULL } , + { "isomerse" , NormalSearch, eSuspectNameType_Typo, "isomerase", SimpleReplaceFunc } , + { "majour" , NormalSearch, eSuspectNameType_Typo, "major", SimpleReplaceFunc } , + { "mambrane" , NormalSearch, eSuspectNameType_Typo, "membrane", SimpleReplaceFunc } , + { "meausure" , NormalSearch, eSuspectNameType_Typo, "measure", SimpleReplaceFunc } , + { "membranne" , NormalSearch, eSuspectNameType_Typo, "membrane", SimpleReplaceFunc } , + { "methlytransferase" , NormalSearch, eSuspectNameType_Typo, "methyltransferase", SimpleReplaceFunc } , + { "metylase" , NormalSearch, eSuspectNameType_Typo, "methylase", SimpleReplaceFunc } , + { "molibdenum" , NormalSearch, eSuspectNameType_Typo, "molybdenum", SimpleReplaceFunc } , + { "molybopterin" , NormalSearch, eSuspectNameType_Typo, "molybdopterin", SimpleReplaceFunc } , + { "molydopterin" , NormalSearch, eSuspectNameType_Typo, "molybdopterin", SimpleReplaceFunc } , + { "monooxigenase" , NormalSearch, eSuspectNameType_Typo, "monooxygenase", SimpleReplaceFunc } , + { "monoxyde" , NormalSearch, eSuspectNameType_Typo, "monoxide", SimpleReplaceFunc } , + { "monoxygenase" , NormalSearch, eSuspectNameType_Typo, "monooxygenase", SimpleReplaceFunc } , + { "mulitdrug" , NormalSearch, eSuspectNameType_Typo, "multidrug", SimpleReplaceFunc } , + { "multifunctional", NormalSearch, eSuspectNameType_None, NULL, NULL } , + { "narrowly conserved" , NormalSearch, eSuspectNameType_None, NULL, NULL } , + { "nickle" , NormalSearch, eSuspectNameType_Typo, "nickel", SimpleReplaceFunc } , + { "novel protein" , NormalSearch, eSuspectNameType_None, NULL, NULL } , + { "nucelar" , NormalSearch, eSuspectNameType_Typo, "nuclear", SimpleReplaceFunc } , + { "nucleotydyl" , NormalSearch, eSuspectNameType_Typo, "nucleotidyl", SimpleReplaceFunc } , + { "nulcear" , NormalSearch, eSuspectNameType_Typo, "nuclear", SimpleReplaceFunc } , + { "open reading frame" , NormalSearch, eSuspectNameType_MightBeNonfunctional, NULL, NULL } , + { "or related" , NormalSearch, eSuspectNameType_None, NULL, NULL } , + { "orphan protein" , NormalSearch, eSuspectNameType_None, NULL, NULL } , + { "ortholog" , NormalSearch, eSuspectNameType_EvolutionaryRelationship, NULL, NULL } , + { "outers" , NormalSearch, eSuspectNameType_Typo, "outer", SimpleReplaceFunc } , + { "oxidoreducatse" , NormalSearch, eSuspectNameType_Typo, "oxidoreductase", SimpleReplaceFunc } , + { "oxidoreductasse" , NormalSearch, eSuspectNameType_Typo, "oxidoreductase", SimpleReplaceFunc } , + { "oxidoreduxtase" , NormalSearch, eSuspectNameType_Typo, "oxidoreductase", SimpleReplaceFunc } , + { "oxydase" , NormalSearch, eSuspectNameType_Typo, "oxidase", SimpleReplaceFunc } , + { "paralog" , NormalSearch, eSuspectNameType_EvolutionaryRelationship, NULL, NULL } , + { "partial" , NormalSearch, eSuspectNameType_MightBeNonfunctional, NULL, NULL } , + { "peptidodoglycan" , NormalSearch, eSuspectNameType_Typo, "peptidoglycan", SimpleReplaceFunc } , + { "periplsmic" , NormalSearch, eSuspectNameType_Typo, "periplasmic", SimpleReplaceFunc } , + { "phophate" , NormalSearch, eSuspectNameType_Typo, "phosphate", SimpleReplaceFunc } , + { "phopho" , NormalSearch, eSuspectNameType_Typo, "phospho", SimpleReplaceFunc } , + { "phophoserine" , NormalSearch, eSuspectNameType_Typo, "phosphoserine", SimpleReplaceFunc } , + { "phoshate" , NormalSearch, eSuspectNameType_Typo, "phosphate", SimpleReplaceFunc } , + { "phosphatransferase" , NormalSearch, eSuspectNameType_Typo, "phosphotransferase", SimpleReplaceFunc } , + { "phosphotase" , NormalSearch, eSuspectNameType_Typo, "phosphatase", SimpleReplaceFunc } , + { "posible" , NormalSearch, eSuspectNameType_Typo, "possible", SimpleReplaceFunc } , + { "presursor" , NormalSearch, eSuspectNameType_Typo, "precursor", SimpleReplaceFunc } , + { "probable putative" , NormalSearch, eSuspectNameType_None, NULL, NULL } , + { "proein" , NormalSearch, eSuspectNameType_Typo, "protein", SimpleReplaceFunc } , + { "prortein" , NormalSearch, eSuspectNameType_Typo, "protein", SimpleReplaceFunc } , + { "proteine" , NormalSearch, eSuspectNameType_Typo, "protein", SimpleReplaceFunc } , + { "proteinn" , NormalSearch, eSuspectNameType_Typo, "protein", SimpleReplaceFunc } , + { "protien" , NormalSearch, eSuspectNameType_Typo, "protein", SimpleReplaceFunc } , + { "protrein" , NormalSearch, eSuspectNameType_Typo, "protein", SimpleReplaceFunc } , + { "prptein" , NormalSearch, eSuspectNameType_Typo, "protein", SimpleReplaceFunc } , + { "pseudogene" , NormalSearch, eSuspectNameType_None, NULL, NULL } , + { "puatative" , NormalSearch, eSuspectNameType_Typo, "putative", SimpleReplaceFunc } , + { "puative" , NormalSearch, eSuspectNameType_Typo, "putative", SimpleReplaceFunc } , + { "putaitive" , NormalSearch, eSuspectNameType_Typo, "putative", SimpleReplaceFunc } , + { "putaitve" , NormalSearch, eSuspectNameType_Typo, "putative", SimpleReplaceFunc } , + { "putaive" , NormalSearch, eSuspectNameType_Typo, "putative", SimpleReplaceFunc } , + { "putataive" , NormalSearch, eSuspectNameType_Typo, "putative", SimpleReplaceFunc } , + { "putatitve" , NormalSearch, eSuspectNameType_Typo, "putative", SimpleReplaceFunc } , + { "putative orphan protein" , NormalSearch, eSuspectNameType_None, NULL, NULL } , + { "putative probable" , NormalSearch, eSuspectNameType_None, NULL, NULL } , + { "putative putative" , NormalSearch, eSuspectNameType_None, NULL, NULL } , + { "putative, putative" , NormalSearch, eSuspectNameType_None, NULL, NULL } , + { "putatuve" , NormalSearch, eSuspectNameType_Typo, "putative", SimpleReplaceFunc } , + { "putatve" , NormalSearch, eSuspectNameType_Typo, "putative", SimpleReplaceFunc } , + { "putatvie" , NormalSearch, eSuspectNameType_Typo, "putative", SimpleReplaceFunc } , + { "putayive" , NormalSearch, eSuspectNameType_Typo, "putative", SimpleReplaceFunc } , + { "putitive" , NormalSearch, eSuspectNameType_Typo, "putative", SimpleReplaceFunc } , + { "qlcohol" , NormalSearch, eSuspectNameType_Typo, "alcohol", SimpleReplaceFunc } , + { "recognised" , NormalSearch, eSuspectNameType_Typo, "recognized", SimpleReplaceFunc } , + { "regulatot" , NormalSearch, eSuspectNameType_Typo, "regulator", SimpleReplaceFunc } , + { "reponse" , NormalSearch, eSuspectNameType_Typo, "response", SimpleReplaceFunc } , + { "resistence" , NormalSearch, eSuspectNameType_Typo, "resistance", SimpleReplaceFunc } , + { "ribosimal" , NormalSearch, eSuspectNameType_Typo, "ribosomal", SimpleReplaceFunc } , + { "ribosoml" , NormalSearch, eSuspectNameType_Typo, "ribosomal", SimpleReplaceFunc } , + { "sapiens" , NormalSearch, eSuspectNameType_RemoveOrganismName, NULL, NULL } , + { "serinr" , NormalSearch, eSuspectNameType_Typo, "serine", SimpleReplaceFunc } , + { "signalling" , NormalSearch, eSuspectNameType_Typo, "signaling", SimpleReplaceFunc } , + { "similar" , NormalSearch, eSuspectNameType_None, NULL, NULL } , + { "simmilar" , NormalSearch, eSuspectNameType_None, NULL, NULL } , + { "specfic" , NormalSearch, eSuspectNameType_Typo, "specific", SimpleReplaceFunc } , + { "sphaeroides" , NormalSearch, eSuspectNameType_RemoveOrganismName, NULL, NULL } , + { "spscific" , NormalSearch, eSuspectNameType_Typo, "specific", SimpleReplaceFunc } , + { "stabilisation" , NormalSearch, eSuspectNameType_Typo, "stabilization", SimpleReplaceFunc } , + { "subnit" , NormalSearch, eSuspectNameType_Typo, "subunit", SimpleReplaceFunc } , + { "suger" , NormalSearch, eSuspectNameType_Typo, "sugar", SimpleReplaceFunc } , + { "sulpho" , NormalSearch, eSuspectNameType_None, "sulfo", SimpleReplaceFunc } , + { "sulphur" , NormalSearch, eSuspectNameType_Typo, "sulfur", SimpleReplaceFunc } , + { "systhesis" , NormalSearch, eSuspectNameType_Typo, "synthesis", SimpleReplaceFunc } , + { "sythase" , NormalSearch, eSuspectNameType_Typo, "synthase", SimpleReplaceFunc } , + { "thiredoxin" , NormalSearch, eSuspectNameType_Typo, "thioredoxin", SimpleReplaceFunc } , + { "trancsriptional" , NormalSearch, eSuspectNameType_Typo, "transcription", SimpleReplaceFunc } , + { "tranferase" , NormalSearch, eSuspectNameType_Typo, "transferase", SimpleReplaceFunc } , + { "tranporter" , NormalSearch, eSuspectNameType_Typo, "transporter", SimpleReplaceFunc } , + { "transcirbed" , NormalSearch, eSuspectNameType_Typo, "transcribed", SimpleReplaceFunc } , + { "transcriptonal" , NormalSearch, eSuspectNameType_Typo, "transcriptional", SimpleReplaceFunc } , + { "transcritional" , NormalSearch, eSuspectNameType_Typo, "transcriptional", SimpleReplaceFunc } , + { "transebrane" , NormalSearch, eSuspectNameType_Typo, "transmembrane", SimpleReplaceFunc } , + { "transglycolase" , NormalSearch, eSuspectNameType_Typo, "transglycosylase", SimpleReplaceFunc } , + { "transorter" , NormalSearch, eSuspectNameType_Typo, "transporter", SimpleReplaceFunc } , + { "transpoase" , NormalSearch, eSuspectNameType_Typo, "transposase", SimpleReplaceFunc } , + { "transportor" , NormalSearch, eSuspectNameType_Typo, "transporter", SimpleReplaceFunc } , + { "transproter" , NormalSearch, eSuspectNameType_Typo, "transporter", SimpleReplaceFunc } , + { "transulfuration" , NormalSearch, eSuspectNameType_Typo, "transsulfuration", SimpleReplaceFunc } , + { "trnasporter" , NormalSearch, eSuspectNameType_Typo, "transporter", SimpleReplaceFunc } , + { "truncat" , NormalSearch, eSuspectNameType_MightBeNonfunctional, NULL, NULL } , + { "ttg start" , NormalSearch, eSuspectNameType_MightBeNonfunctional, NULL, NULL } , + { "tumour" , NormalSearch, eSuspectNameType_Typo, "tumor", SimpleReplaceFunc } , + { "typr" , NormalSearch, eSuspectNameType_Typo, "type", SimpleReplaceFunc } , + { "uncharacterized protein", IsSingleWordOrWeaselPlusSingleWord, eSuspectNameType_QuickFix, "hypothetical protein", ReplaceWholeNameFunc } , + { "uncharaterized" , NormalSearch, eSuspectNameType_None, NULL, NULL } , + { "undecapaprenyl" , NormalSearch, eSuspectNameType_Typo, "undecaprenyl", SimpleReplaceFunc } , + { "unkown" , NormalSearch, eSuspectNameType_None, NULL, NULL } , + { "utilising" , NormalSearch, eSuspectNameType_Typo, "utilizing", SimpleReplaceFunc } , + { "weakly conserved" , NormalSearch, eSuspectNameType_None, NULL, NULL } , + { "widely conserved" , NormalSearch, eSuspectNameType_None, NULL, NULL } , + { "|" , NormalSearch, eSuspectNameType_InappropriateSymbol, NULL, NULL } , + { "C term" , ProductContainsTerm, eSuspectNameType_MightBeNonfunctional, NULL, NULL } , + { "C-term" , ProductContainsTerm, eSuspectNameType_MightBeNonfunctional, NULL, NULL } , + { "N term" , ProductContainsTerm, eSuspectNameType_MightBeNonfunctional, NULL, NULL } , + { "N-term" , ProductContainsTerm, eSuspectNameType_MightBeNonfunctional, NULL, NULL } , + { "Two or more sets of brackets or parentheseis" , ContainsTwoSetsOfBracketsOrParentheses, eSuspectNameType_None, NULL, NULL } , + { "unknown" , ContainsUnknownName, eSuspectNameType_None, NULL, NULL } , + { "double space" , ContainsDoubleSpace, eSuspectNameType_None, NULL, NULL } , + { "COG" , ContainsWholeWordCaseSensitive, eSuspectNameType_Database, NULL, NULL } , + { "DUF" , ContainsWholeWordCaseSensitive, eSuspectNameType_Database, NULL, NULL } , + { "EST" , ContainsWholeWordCaseSensitive, eSuspectNameType_Database, NULL, NULL } , + { "FOG" , ContainsWholeWordCaseSensitive, eSuspectNameType_Database, NULL, NULL } , + { "UPF" , ContainsWholeWordCaseSensitive, eSuspectNameType_Database, NULL, NULL } , + { "_" , ContainsUnderscore, eSuspectNameType_Database, NULL, NULL } , + { "ending with period, comma, hyphen, underscore, colon, or forward slash" , EndsWithPunct, eSuspectNameType_InappropriateSymbol, NULL, NULL } , + { "PTS system" , IsSingleWordOrWeaselPlusSingleWord, eSuspectNameType_UseProtein, NULL, NULL } , + { "helix-turn-helix" , IsSingleWordOrWeaselPlusSingleWord, eSuspectNameType_UseProtein, NULL, NULL } , + { "transposase of" , IsSingleWordOrWeaselPlusSingleWord, eSuspectNameType_None, NULL, NULL } , + { "zinc finger" , IsSingleWordOrWeaselPlusSingleWord, eSuspectNameType_UseProtein, NULL, NULL } , + { "may contain a plural" , MayContainPlural, eSuspectNameType_None, NULL, NULL } , + { "unbalanced brackets or parentheses" , ContainsUnbalancedParentheses, eSuspectNameType_InappropriateSymbol, NULL, NULL } , + { "long product name that may contain descriptive information more appropriate in a note", IsTooLong, eSuspectNameType_QuickFix, NULL, NULL } , + { "Product name begins with possible, potential, predicted or probable. Please use putative.", StartsWithPutativeReplacement, eSuspectNameType_QuickFix, "putative", UsePutative } , + + { "CDS", NormalSearch, eSuspectNameType_None, NULL, NULL } , + { "doubtful", NormalSearch, eSuspectNameType_None, NULL, NULL } , + { "alternate protein name", IsSingleWordOrWeaselPlusSingleWord, eSuspectNameType_QuickFix, "hypothetical protein", ReplaceWholeNameFunc } , + { "conser", IsSingleWordOrWeaselPlusSingleWord, eSuspectNameType_QuickFix, "hypothetical protein", ReplaceWholeNameFunc } , + { "conserve", IsSingleWordOrWeaselPlusSingleWord, eSuspectNameType_QuickFix, "hypothetical protein", ReplaceWholeNameFunc } , + { "conserved hypothetical", IsSingleWordOrWeaselPlusSingleWord, eSuspectNameType_QuickFix, "hypothetical protein", ReplaceWholeNameFunc } , + { "conserved hypothetical protein", IsSingleWordOrWeaselPlusSingleWord, eSuspectNameType_QuickFix, "hypothetical protein", ReplaceWholeNameFunc } , + { "conserved", IsSingleWordOrWeaselPlusSingleWord, eSuspectNameType_QuickFix, "hypothetical protein", ReplaceWholeNameFunc } , + { "domain family", IsSingleWordOrWeaselPlusSingleWord, eSuspectNameType_QuickFix, "hypothetical protein", ReplaceWholeNameFunc } , + { "domain of unknown function", IsSingleWordOrWeaselPlusSingleWord, eSuspectNameType_QuickFix, "hypothetical protein", ReplaceWholeNameFunc } , + { "domain protein", IsSingleWordOrWeaselPlusSingleWord, eSuspectNameType_QuickFix, "hypothetical protein", ReplaceWholeNameFunc } , + { "domain", IsSingleWordOrWeaselPlusSingleWord, eSuspectNameType_QuickFix, "hypothetical protein", ReplaceWholeNameFunc } , + { "doubtful CDS found within S. typhi pathogenicity island", IsSingleWordOrWeaselPlusSingleWord, eSuspectNameType_QuickFix, "hypothetical protein", ReplaceWholeNameFunc } , + { "factor", IsSingleWordOrWeaselPlusSingleWord, eSuspectNameType_QuickFix, "hypothetical protein", ReplaceWholeNameFunc } , + { "family protein", IsSingleWordOrWeaselPlusSingleWord, eSuspectNameType_QuickFix, "hypothetical protein", ReplaceWholeNameFunc } , + { "hypo", IsSingleWordOrWeaselPlusSingleWord, eSuspectNameType_QuickFix, "hypothetical protein", ReplaceWholeNameFunc } , + { "hypothetical ORF", IsSingleWordOrWeaselPlusSingleWord, eSuspectNameType_QuickFix, "hypothetical protein", ReplaceWholeNameFunc } , + { "hypothetical", IsSingleWordOrWeaselPlusSingleWord, eSuspectNameType_QuickFix, "hypothetical protein", ReplaceWholeNameFunc } , + { "hypothetical domain protein", IsSingleWordOrWeaselPlusSingleWord, eSuspectNameType_QuickFix, "hypothetical protein", ReplaceWholeNameFunc } , + { "No definition line found", IsSingleWordOrWeaselPlusSingleWord, eSuspectNameType_QuickFix, "hypothetical protein", ReplaceWholeNameFunc } , + { "orphan protein", IsSingleWordOrWeaselPlusSingleWord, eSuspectNameType_QuickFix, "hypothetical protein", ReplaceWholeNameFunc } , + { "ORF", IsSingleWordOrWeaselPlusSingleWord, eSuspectNameType_QuickFix, "hypothetical protein", ReplaceWholeNameFunc } , + { "orf, hyp", IsSingleWordOrWeaselPlusSingleWord, eSuspectNameType_QuickFix, "hypothetical protein", ReplaceWholeNameFunc } , + { "orf, hypothetical", IsSingleWordOrWeaselPlusSingleWord, eSuspectNameType_QuickFix, "hypothetical protein", ReplaceWholeNameFunc } , + { "peptide", IsSingleWordOrWeaselPlusSingleWord, eSuspectNameType_QuickFix, "hypothetical protein", ReplaceWholeNameFunc } , + { "precursor", IsSingleWordOrWeaselPlusSingleWord, eSuspectNameType_QuickFix, "hypothetical protein", ReplaceWholeNameFunc } , + { "probable", IsSingleWordOrWeaselPlusSingleWord, eSuspectNameType_QuickFix, "hypothetical protein", ReplaceWholeNameFunc } , + { "predicted", IsSingleWordOrWeaselPlusSingleWord, eSuspectNameType_QuickFix, "hypothetical protein", ReplaceWholeNameFunc } , + { "predicted protein", IsSingleWordOrWeaselPlusSingleWord, eSuspectNameType_QuickFix, "hypothetical protein", ReplaceWholeNameFunc } , + { "probable protein", IsSingleWordOrWeaselPlusSingleWord, eSuspectNameType_QuickFix, "hypothetical protein", ReplaceWholeNameFunc } , + { "protein containing", IsSingleWordOrWeaselPlusSingleWord, eSuspectNameType_QuickFix, "hypothetical protein", ReplaceWholeNameFunc } , + { "protein of unknown function", IsSingleWordOrWeaselPlusSingleWord, eSuspectNameType_QuickFix, "hypothetical protein", ReplaceWholeNameFunc } , + { "protein-containing", IsSingleWordOrWeaselPlusSingleWord, eSuspectNameType_QuickFix, "hypothetical protein", ReplaceWholeNameFunc } , + { "pseudo", IsSingleWordOrWeaselPlusSingleWord, eSuspectNameType_QuickFix, "hypothetical protein", ReplaceWholeNameFunc } , + { "putative conserved hypothetical", IsSingleWordOrWeaselPlusSingleWord, eSuspectNameType_QuickFix, "hypothetical protein", ReplaceWholeNameFunc } , + { "putative hypothetical", IsSingleWordOrWeaselPlusSingleWord, eSuspectNameType_QuickFix, "hypothetical protein", ReplaceWholeNameFunc } , + { "putative protein", IsSingleWordOrWeaselPlusSingleWord, eSuspectNameType_QuickFix, "hypothetical protein", ReplaceWholeNameFunc } , + { "putative", IsSingleWordOrWeaselPlusSingleWord, eSuspectNameType_QuickFix, "hypothetical protein", ReplaceWholeNameFunc } , + { "uncharacterized conserved protein", IsSingleWordOrWeaselPlusSingleWord, eSuspectNameType_QuickFix, "hypothetical protein", ReplaceWholeNameFunc } , + { "unnamed", IsSingleWordOrWeaselPlusSingleWord, eSuspectNameType_QuickFix, "hypothetical protein", ReplaceWholeNameFunc } , + { "o252", IsSingleWordOrWeaselPlusSingleWord, eSuspectNameType_QuickFix, "hypothetical protein", ReplaceWholeNameAddNoteFunc } , + { "o252 protein", IsSingleWordOrWeaselPlusSingleWord, eSuspectNameType_QuickFix, "hypothetical protein", ReplaceWholeNameAddNoteFunc } , + { "Alanine", IsSingleWordOrWeaselPlusSingleWord, eSuspectNameType_QuickFix, "hypothetical protein", ReplaceWholeNameAddNoteFunc } , + { "Arginine", IsSingleWordOrWeaselPlusSingleWord, eSuspectNameType_QuickFix, "hypothetical protein", ReplaceWholeNameAddNoteFunc } , + { "Asparagine", IsSingleWordOrWeaselPlusSingleWord, eSuspectNameType_QuickFix, "hypothetical protein", ReplaceWholeNameAddNoteFunc } , + { "Aspartic acid", IsSingleWordOrWeaselPlusSingleWord, eSuspectNameType_QuickFix, "hypothetical protein", ReplaceWholeNameAddNoteFunc } , + { "Cysteine", IsSingleWordOrWeaselPlusSingleWord, eSuspectNameType_QuickFix, "hypothetical protein", ReplaceWholeNameAddNoteFunc } , + { "DNA", IsSingleWordOrWeaselPlusSingleWord, eSuspectNameType_QuickFix, "hypothetical protein", ReplaceWholeNameAddNoteFunc } , + { "Glutamic acid", IsSingleWordOrWeaselPlusSingleWord, eSuspectNameType_QuickFix, "hypothetical protein", ReplaceWholeNameAddNoteFunc } , + { "Glutamine", IsSingleWordOrWeaselPlusSingleWord, eSuspectNameType_QuickFix, "hypothetical protein", ReplaceWholeNameAddNoteFunc } , + { "Glycine", IsSingleWordOrWeaselPlusSingleWord, eSuspectNameType_QuickFix, "hypothetical protein", ReplaceWholeNameAddNoteFunc } , + { "Histidine", IsSingleWordOrWeaselPlusSingleWord, eSuspectNameType_QuickFix, "hypothetical protein", ReplaceWholeNameAddNoteFunc } , + { "Isoleucine", IsSingleWordOrWeaselPlusSingleWord, eSuspectNameType_QuickFix, "hypothetical protein", ReplaceWholeNameAddNoteFunc } , + { "Leucine", IsSingleWordOrWeaselPlusSingleWord, eSuspectNameType_QuickFix, "hypothetical protein", ReplaceWholeNameAddNoteFunc } , + { "Lysine", IsSingleWordOrWeaselPlusSingleWord, eSuspectNameType_QuickFix, "hypothetical protein", ReplaceWholeNameAddNoteFunc } , + { "Methionine", IsSingleWordOrWeaselPlusSingleWord, eSuspectNameType_QuickFix, "hypothetical protein", ReplaceWholeNameAddNoteFunc } , + { "NAD", IsSingleWordOrWeaselPlusSingleWord, eSuspectNameType_QuickFix, "hypothetical protein", ReplaceWholeNameAddNoteFunc } , + { "PASTA", IsSingleWordOrWeaselPlusSingleWord, eSuspectNameType_UseProtein, "hypothetical protein", ReplaceWholeNameAddNoteFunc } , + { "Phenylalanine", IsSingleWordOrWeaselPlusSingleWord, eSuspectNameType_QuickFix, "hypothetical protein", ReplaceWholeNameAddNoteFunc } , + { "Proline", IsSingleWordOrWeaselPlusSingleWord, eSuspectNameType_QuickFix, "hypothetical protein", ReplaceWholeNameAddNoteFunc } , + { "RNA", IsSingleWordOrWeaselPlusSingleWord, eSuspectNameType_QuickFix, "hypothetical protein", ReplaceWholeNameAddNoteFunc } , + { "Serine", IsSingleWordOrWeaselPlusSingleWord, eSuspectNameType_QuickFix, "hypothetical protein", ReplaceWholeNameAddNoteFunc } , + { "Threonine", IsSingleWordOrWeaselPlusSingleWord, eSuspectNameType_QuickFix, "hypothetical protein", ReplaceWholeNameAddNoteFunc } , + { "Tryptophan", IsSingleWordOrWeaselPlusSingleWord, eSuspectNameType_QuickFix, "hypothetical protein", ReplaceWholeNameAddNoteFunc } , + { "Tyrosine", IsSingleWordOrWeaselPlusSingleWord, eSuspectNameType_QuickFix, "hypothetical protein", ReplaceWholeNameAddNoteFunc } , + { "Valine", IsSingleWordOrWeaselPlusSingleWord, eSuspectNameType_QuickFix, "hypothetical protein", ReplaceWholeNameAddNoteFunc } , + { "adenine", IsSingleWordOrWeaselPlusSingleWord, eSuspectNameType_QuickFix, "hypothetical protein", ReplaceWholeNameAddNoteFunc } , + { "amino acid", IsSingleWordOrWeaselPlusSingleWord, eSuspectNameType_QuickFix, "hypothetical protein", ReplaceWholeNameAddNoteFunc } , + { "barrel", IsSingleWordOrWeaselPlusSingleWord, eSuspectNameType_QuickFix, "hypothetical protein", ReplaceWholeNameAddNoteFunc } , + { "carbon", IsSingleWordOrWeaselPlusSingleWord, eSuspectNameType_QuickFix, "hypothetical protein", ReplaceWholeNameAddNoteFunc } , + { "citrate", IsSingleWordOrWeaselPlusSingleWord, eSuspectNameType_QuickFix, "hypothetical protein", ReplaceWholeNameAddNoteFunc } , + { "cytosine", IsSingleWordOrWeaselPlusSingleWord, eSuspectNameType_QuickFix, "hypothetical protein", ReplaceWholeNameAddNoteFunc } , + { "finger", IsSingleWordOrWeaselPlusSingleWord, eSuspectNameType_QuickFix, "hypothetical protein", ReplaceWholeNameAddNoteFunc } , + { "ggdef", IsSingleWordOrWeaselPlusSingleWord, eSuspectNameType_QuickFix, "hypothetical protein", ReplaceWholeNameAddNoteFunc } , + { "guanine", IsSingleWordOrWeaselPlusSingleWord, eSuspectNameType_QuickFix, "hypothetical protein", ReplaceWholeNameAddNoteFunc } , + { "helium", IsSingleWordOrWeaselPlusSingleWord, eSuspectNameType_QuickFix, "hypothetical protein", ReplaceWholeNameAddNoteFunc } , + { "helix", IsSingleWordOrWeaselPlusSingleWord, eSuspectNameType_QuickFix, "hypothetical protein", ReplaceWholeNameAddNoteFunc } , + { "hydrogen", IsSingleWordOrWeaselPlusSingleWord, eSuspectNameType_QuickFix, "hypothetical protein", ReplaceWholeNameAddNoteFunc } , + { "insertion sequence", IsSingleWordOrWeaselPlusSingleWord, eSuspectNameType_QuickFix, "hypothetical protein", ReplaceWholeNameAddNoteFunc } , + { "iron", IsSingleWordOrWeaselPlusSingleWord, eSuspectNameType_QuickFix, "hypothetical protein", ReplaceWholeNameAddNoteFunc } , + { "mRNA", IsSingleWordOrWeaselPlusSingleWord, eSuspectNameType_QuickFix, "hypothetical protein", ReplaceWholeNameAddNoteFunc } , + { "membrane", IsSingleWordOrWeaselPlusSingleWord, eSuspectNameType_QuickFix, "hypothetical protein", ReplaceWholeNameAddNoteFunc } , + { "ncRNA", IsSingleWordOrWeaselPlusSingleWord, eSuspectNameType_QuickFix, "hypothetical protein", ReplaceWholeNameAddNoteFunc } , + { "nitrogen", IsSingleWordOrWeaselPlusSingleWord, eSuspectNameType_QuickFix, "hypothetical protein", ReplaceWholeNameAddNoteFunc } , + { "oxygen", IsSingleWordOrWeaselPlusSingleWord, eSuspectNameType_QuickFix, "hypothetical protein", ReplaceWholeNameAddNoteFunc } , + { "p-loop", IsSingleWordOrWeaselPlusSingleWord, eSuspectNameType_UseProtein, "hypothetical protein", ReplaceWholeNameAddNoteFunc } , + { "peptide", IsSingleWordOrWeaselPlusSingleWord, eSuspectNameType_QuickFix, "hypothetical protein", ReplaceWholeNameAddNoteFunc } , + { "phage", IsSingleWordOrWeaselPlusSingleWord, eSuspectNameType_QuickFix, "hypothetical protein", ReplaceWholeNameAddNoteFunc } , + { "plasmid", IsSingleWordOrWeaselPlusSingleWord, eSuspectNameType_QuickFix, "hypothetical protein", ReplaceWholeNameAddNoteFunc } , + { "purine", IsSingleWordOrWeaselPlusSingleWord, eSuspectNameType_QuickFix, "hypothetical protein", ReplaceWholeNameAddNoteFunc } , + { "rRNA", IsSingleWordOrWeaselPlusSingleWord, eSuspectNameType_QuickFix, "hypothetical protein", ReplaceWholeNameAddNoteFunc } , + { "repeat", IsSingleWordOrWeaselPlusSingleWord, eSuspectNameType_QuickFix, "hypothetical protein", ReplaceWholeNameAddNoteFunc } , + { "secreted", IsSingleWordOrWeaselPlusSingleWord, eSuspectNameType_QuickFix, "hypothetical protein", ReplaceWholeNameAddNoteFunc } , + { "signal peptide", IsSingleWordOrWeaselPlusSingleWord, eSuspectNameType_UseProtein, "hypothetical protein", ReplaceWholeNameAddNoteFunc } , + { "signal", IsSingleWordOrWeaselPlusSingleWord, eSuspectNameType_QuickFix, "hypothetical protein", ReplaceWholeNameAddNoteFunc } , + { "subunit", IsSingleWordOrWeaselPlusSingleWord, eSuspectNameType_QuickFix, "hypothetical protein", ReplaceWholeNameAddNoteFunc } , + { "tRNA", IsSingleWordOrWeaselPlusSingleWord, eSuspectNameType_QuickFix, "hypothetical protein", ReplaceWholeNameAddNoteFunc } , + { "thymine", IsSingleWordOrWeaselPlusSingleWord, eSuspectNameType_QuickFix, "hypothetical protein", ReplaceWholeNameAddNoteFunc } , + { "transport-associated", IsSingleWordOrWeaselPlusSingleWord, eSuspectNameType_QuickFix, "hypothetical protein", ReplaceWholeNameAddNoteFunc } , + { "transposon", IsSingleWordOrWeaselPlusSingleWord, eSuspectNameType_QuickFix, "hypothetical protein", ReplaceWholeNameAddNoteFunc } , + { "uracil", IsSingleWordOrWeaselPlusSingleWord, eSuspectNameType_QuickFix, "hypothetical protein", ReplaceWholeNameAddNoteFunc } , + { "zinc", IsSingleWordOrWeaselPlusSingleWord, eSuspectNameType_QuickFix, "hypothetical protein", ReplaceWholeNameAddNoteFunc } }; const int num_suspect_product_terms = sizeof (suspect_product_terms) / sizeof (SuspectProductNameData); +static void FixSuspectProductNameTyposInOneFeature (SeqFeatPtr cds, LogInfoPtr lip, ESuspectNameType fix_type) +{ + Int4 k; + ProtRefPtr prp; + ValNodePtr vnp; + CharPtr tmp, desc; + ValNode vn; + SeqFeatPtr mrna; + SeqMgrFeatContext context; + RnaRefPtr rrp; + CharPtr extra; + CharPtr and_associated_mrna = " and associated mRNA"; + + if (cds == NULL || cds->data.choice != SEQFEAT_CDREGION || cds->data.value.ptrvalue == NULL + || cds->product == NULL || (prp = GetProtRefForFeature(cds)) == NULL) + { + return; + } + + + + for (k = 0; k < num_suspect_product_terms; k++) + { + for (vnp = prp->name; vnp != NULL; vnp = vnp->next) + { + if (suspect_product_terms[k].fix_type == fix_type + && suspect_product_terms[k].replace_func != NULL + && suspect_product_terms[k].search_func != NULL + && (suspect_product_terms[k].search_func) (suspect_product_terms[k].pattern, vnp->data.ptrvalue)) + { + if (lip != NULL && lip->fp != NULL) { + tmp = StringSave ((CharPtr) vnp->data.ptrvalue); + (suspect_product_terms[k].replace_func)(&tmp, + suspect_product_terms[k].pattern, + suspect_product_terms[k].replace_phrase, + cds); + if (StringCmp (tmp, vnp->data.ptrvalue) != 0) { + extra = ""; + mrna = SeqMgrGetOverlappingmRNA (cds->location, &context); + if (mrna != NULL && mrna->data.choice == SEQFEAT_RNA + && (rrp = mrna->data.value.ptrvalue) != NULL + && rrp->ext.choice == 1 + && StringCmp (rrp->ext.value.ptrvalue, vnp->data.ptrvalue) == 0) { + rrp->ext.value.ptrvalue = MemFree (rrp->ext.value.ptrvalue); + rrp->ext.value.ptrvalue = StringSave (tmp); + extra = and_associated_mrna; + } + MemSet (&vn, 0, sizeof (ValNode)); + vn.choice = OBJ_SEQFEAT; + vn.data.ptrvalue = cds; + desc = GetDiscrepancyItemText (&vn); + fprintf (lip->fp, "Changed '%s' to '%s' for %s%s\n", (CharPtr) vnp->data.ptrvalue, tmp, desc, extra); + vnp->data.ptrvalue = MemFree (vnp->data.ptrvalue); + vnp->data.ptrvalue = tmp; + tmp = NULL; + desc = MemFree (desc); + lip->data_in_log = TRUE; + } + tmp = MemFree (tmp); + } else { + tmp = (CharPtr) vnp->data.ptrvalue; + (suspect_product_terms[k].replace_func)(&tmp, suspect_product_terms[k].pattern, suspect_product_terms[k].replace_phrase, cds); + vnp->data.ptrvalue = tmp; + } + break; + } + /* only check the first name */ + if (!StringHasNoText (vnp->data.ptrvalue)) { + break; + } + } + } +} + + +static void FixSuspectProductNameTypos (ValNodePtr item_list, Pointer data, LogInfoPtr lip) +{ + ValNodePtr vnp; + + for (vnp = item_list; vnp != NULL; vnp = vnp->next) { + if (vnp->choice == OBJ_SEQFEAT) { + FixSuspectProductNameTyposInOneFeature ((SeqFeatPtr) vnp->data.ptrvalue, lip, eSuspectNameType_Typo); + } + } +} + + +static void FixSuspectProductNameQuickFixes (ValNodePtr item_list, Pointer data, LogInfoPtr lip) +{ + ValNodePtr vnp; + + for (vnp = item_list; vnp != NULL; vnp = vnp->next) { + if (vnp->choice == OBJ_SEQFEAT) { + FixSuspectProductNameTyposInOneFeature ((SeqFeatPtr) vnp->data.ptrvalue, lip, eSuspectNameType_QuickFix); + } + } +} + static void FindSuspectProductNamesCallback (SeqFeatPtr sfp, Pointer userdata) { ValNodePtr PNTR feature_list; @@ -11529,6 +12178,7 @@ static void FindSuspectProductNamesCallback (SeqFeatPtr sfp, Pointer userdata) ValNodePtr vnp; BioseqPtr bsp; SeqFeatPtr cds; + BioSourcePtr biop = NULL; if (sfp == NULL || sfp->data.choice != SEQFEAT_PROT || sfp->data.value.ptrvalue == NULL || userdata == NULL) @@ -11547,11 +12197,16 @@ static void FindSuspectProductNamesCallback (SeqFeatPtr sfp, Pointer userdata) if (cds != NULL) { sfp = cds; } + /* find BioSource, to check whether we want to run all categories */ + biop = GetBiopForBsp (bsp); } } for (k = 0; k < num_suspect_product_terms; k++) - { + { + if (!CategoryOkForBioSource(biop, suspect_product_terms[k].fix_type)) { + continue; + } for (vnp = prp->name; vnp != NULL; vnp = vnp->next) { if (suspect_product_terms[k].search_func != NULL @@ -11570,15 +12225,22 @@ static void FindSuspectProductNamesCallback (SeqFeatPtr sfp, Pointer userdata) } -static ClickableItemPtr SuspectPhrase (Uint4 clickable_item_type, CharPtr phrase, CharPtr feat_type, ValNodePtr feature_list) +static ClickableItemPtr SuspectPhraseEx (Uint4 clickable_item_type, CharPtr phrase, Boolean quote_phrase, CharPtr feat_type, ValNodePtr feature_list) { ClickableItemPtr dip = NULL; - CharPtr bad_fmt = "%d %ss contain '%s'"; + CharPtr bad_fmt_quote = "%d %ss contain '%s'"; + CharPtr bad_fmt_noquote = "%d %ss contain %s"; + CharPtr bad_fmt; if (feature_list == NULL || phrase == NULL || StringHasNoText (feat_type)) { return NULL; } + if (quote_phrase) { + bad_fmt = bad_fmt_quote; + } else { + bad_fmt = bad_fmt_noquote; + } dip = (ClickableItemPtr) MemNew (sizeof (ClickableItemData)); if (dip != NULL) @@ -11595,6 +12257,12 @@ static ClickableItemPtr SuspectPhrase (Uint4 clickable_item_type, CharPtr phrase } +static ClickableItemPtr SuspectPhrase (Uint4 clickable_item_type, CharPtr phrase, CharPtr feat_type, ValNodePtr feature_list) +{ + return SuspectPhraseEx (clickable_item_type, phrase, TRUE, feat_type, feature_list); +} + + static ClickableItemPtr SuspectPhraseEnd (Uint4 clickable_item_type, CharPtr phrase, CharPtr feat_type, ValNodePtr feature_list) { ClickableItemPtr dip = NULL; @@ -11645,120 +12313,349 @@ static ClickableItemPtr SuspectPhraseStart (Uint4 clickable_item_type, CharPtr p } -extern void FindSuspectProductNames (ValNodePtr PNTR discrepancy_list, ValNodePtr sep_list) +static Uint4 ClickableItemTypeForNameCat (Int4 k) { - ValNodePtr PNTR feature_list = NULL; - ValNodePtr master_list = NULL, vnp; - Int4 k; - ClickableItemPtr dip; - ValNodePtr subcategories = NULL; - - if (discrepancy_list == NULL) return; + if (k == eSuspectNameType_Typo) { + return DISC_PRODUCT_NAME_TYPO; + } else if (k == eSuspectNameType_QuickFix) { + return DISC_PRODUCT_NAME_QUICKFIX; + } else { + return DISC_SUSPECT_PRODUCT_NAME; + } +} - feature_list = (ValNodePtr PNTR) MemNew (sizeof (ValNodePtr) * num_suspect_product_terms); - if (feature_list == NULL) return; +typedef struct suspectrulefeats { + SuspectRuleSetPtr rule_list; + ValNodePtr PNTR feature_list; + Int4 num_rules; +} SuspectRuleFeatsData, PNTR SuspectRuleFeatsPtr; + + +static void FindSuspectProductNamesWithRulesCallback (SeqFeatPtr sfp, Pointer userdata) +{ + SuspectRuleFeatsPtr srlist; + SuspectRulePtr rule; + Int4 k; + ProtRefPtr prp; + BioseqPtr bsp; + SeqFeatPtr cds; - /* initialize array for suspicious product names */ - for (k = 0; k < num_suspect_product_terms; k++) + if (sfp == NULL || sfp->data.choice != SEQFEAT_PROT || sfp->data.value.ptrvalue == NULL + || (srlist = (SuspectRuleFeatsPtr)userdata) == NULL) { - feature_list[k] = NULL; + return; } - for (vnp = sep_list; vnp != NULL; vnp = vnp->next) - { - VisitGenProdSetFeatures (vnp->data.ptrvalue, feature_list, FindSuspectProductNamesCallback); + prp = (ProtRefPtr) sfp->data.value.ptrvalue; + + if (prp == NULL || prp->name == NULL) { + return; } - - for (k = 0; k < num_suspect_product_terms; k++) - { - if (feature_list[k] != NULL) - { - if (suspect_product_terms[k].search_func == EndsWithPattern) - { - dip = SuspectPhraseEnd (DISC_SUSPECT_PRODUCT_NAME, suspect_product_terms[k].pattern, "product name", feature_list[k]); - } - else if (suspect_product_terms[k].search_func == StartsWithPattern) - { - dip = SuspectPhraseStart (DISC_SUSPECT_PRODUCT_NAME, suspect_product_terms[k].pattern, "product name", feature_list[k]); - } - else - { - dip = SuspectPhrase (DISC_SUSPECT_PRODUCT_NAME, suspect_product_terms[k].pattern, "product name", feature_list[k]); - } - if (dip != NULL) - { - ValNodeAddPointer (&subcategories, 0, dip); + + /* add coding region rather than protein */ + if (sfp->idx.subtype == FEATDEF_PROT) { + bsp = BioseqFindFromSeqLoc (sfp->location); + if (bsp != NULL) { + cds = SeqMgrGetCDSgivenProduct (bsp, NULL); + if (cds != NULL) { + sfp = cds; } - ValNodeLinkCopy (&master_list, feature_list[k]); } } - if (master_list != NULL) + for (k = 0, rule = srlist->rule_list; k < srlist->num_rules && rule != NULL; k++, rule = rule->next) { - dip = SuspectPhrase (DISC_SUSPECT_PRODUCT_NAME, "suspect phrase or characters", "product_name", master_list); - if (dip != NULL) + if (DoesStringMatchSuspectRule (prp->name->data.ptrvalue, sfp, rule)) { - dip->subcategories = subcategories; - ValNodeAddPointer (discrepancy_list, 0, dip); + ValNodeAddPointer (&(srlist->feature_list[k]), OBJ_SEQFEAT, sfp); + break; } } - - MemFree (feature_list); + } -NLM_EXTERN Boolean IsProductNameOk (CharPtr product_name) +static void AutoFixSuspectProductRules (ValNodePtr item_list, Pointer userdata, LogInfoPtr lip) { - Int4 k; - Boolean rval = TRUE; + SuspectRulePtr rule; + ValNodePtr vnp; - for (k = 0; k < num_suspect_product_terms && rval; k++) - { - if (suspect_product_terms[k].search_func != NULL - && (suspect_product_terms[k].search_func) (suspect_product_terms[k].pattern, product_name)) - { - rval = FALSE; - } + if ((rule = (SuspectRulePtr) userdata) == NULL || item_list == NULL) { + return; } - return rval; -} + for (vnp = item_list; vnp != NULL; vnp = vnp->next) { + if (vnp->choice == OBJ_SEQFEAT) { + if (ApplySuspectProductNameFixToFeature (rule, (SeqFeatPtr) vnp->data.ptrvalue, lip == NULL ? NULL : lip->fp)) { + if (lip != NULL) { + lip->data_in_log = TRUE; + } + } + } + } +} -extern void FindSuspectProductNamesInNameList (FILE *input_file, FILE *output_file) + +static void +FindSuspectProductNamesWithRules +(ValNodePtr PNTR discrepancy_list, + ValNodePtr sep_list, + SuspectRuleSetPtr rule_list) { - ReadBufferData rbd; - CharPtr line, func_name; - Int4 k; + SuspectRuleFeatsData srdata; + SuspectRulePtr rule; + CharPtr summ; + CharPtr fmt = "%d features %s"; + ValNodePtr PNTR name_cat; + ValNodePtr master_list = NULL, vnp; + Int4 k; + ClickableItemPtr dip, tdip = NULL; + ValNodePtr subcategories = NULL; + Int4 num_cat = Fix_type_gene + 1; + + if (discrepancy_list == NULL) return; + + srdata.num_rules = CountSuspectRuleSet (rule_list); + if (srdata.num_rules == 0) { + return; + } + + srdata.rule_list = rule_list; + srdata.feature_list = (ValNodePtr PNTR) MemNew (sizeof (ValNodePtr) * srdata.num_rules); + if (srdata.feature_list == NULL) return; + + name_cat = (ValNodePtr PNTR) MemNew (sizeof (ValNodePtr) * num_cat); + + /* initialize array for suspicious product names */ + for (k = 0; k < srdata.num_rules; k++) + { + srdata.feature_list[k] = NULL; + } + + /* initialize named categories */ + for (k = 0; k < num_cat; k++) { + name_cat[k] = NULL; + } - rbd.fp = input_file; - rbd.current_data = NULL; + for (vnp = sep_list; vnp != NULL; vnp = vnp->next) + { + VisitGenProdSetFeatures (vnp->data.ptrvalue, &srdata, FindSuspectProductNamesWithRulesCallback); + } - line = AbstractReadFunction (&rbd); - while (line != NULL) + for (k = 0, rule = srdata.rule_list; k < srdata.num_rules && rule != NULL; k++, rule = rule->next) { - for (k = 0; k < num_suspect_product_terms; k++) - { - if (suspect_product_terms[k].search_func != NULL - && (suspect_product_terms[k].search_func) (suspect_product_terms[k].pattern, line)) + if (srdata.feature_list[k] != NULL) + { + summ = SummarizeSuspectRule(rule); + dip = (ClickableItemPtr) MemNew (sizeof (ClickableItemData)); + dip->clickable_item_type = DISC_SUSPECT_PRODUCT_NAME; + dip->description = (CharPtr) MemNew (sizeof (Char) * (StringLen (fmt) + StringLen (summ) + 15)); + sprintf (dip->description, fmt, ValNodeLen (srdata.feature_list[k]), summ); + summ = MemFree (summ); + dip->callback_func = NULL; + dip->datafree_func = NULL; + dip->callback_data = NULL; + dip->item_list = srdata.feature_list[k]; + if (rule->replace != NULL) { + dip->autofix_func = AutoFixSuspectProductRules; + dip->autofix_data = rule; + } + ValNodeAddPointer (&name_cat[rule->rule_type], 0, dip); + ValNodeLinkCopy (&master_list, srdata.feature_list[k]); + } + } + if (master_list != NULL) + { + for (k = 0; k < num_cat; k++) { + if (name_cat[k] != NULL) { + tdip = (ClickableItemPtr) MemNew (sizeof (ClickableItemData)); + MemSet (tdip, 0, sizeof (ClickableItemData)); + tdip->description = StringSave (SummarizeFixType(k)); + tdip->item_list = ItemListFromSubcategories (name_cat[k]); + tdip->clickable_item_type = DISC_SUSPECT_PRODUCT_NAME; + tdip->subcategories = name_cat[k]; + tdip->expanded = TRUE; + ValNodeAddPointer (&subcategories, 0, tdip); + } + } + dip = SuspectPhraseEx (DISC_SUSPECT_PRODUCT_NAME, "suspect phrase or characters", FALSE, "product_name", master_list); + if (dip != NULL) + { + dip->subcategories = subcategories; + dip->expanded = TRUE; + ValNodeAddPointer (discrepancy_list, 0, dip); + } + } + + MemFree (srdata.feature_list); + MemFree (name_cat); +} + + + +static void FindSuspectProductNamesWithStaticList (ValNodePtr PNTR discrepancy_list, ValNodePtr sep_list) +{ + ValNodePtr PNTR feature_list = NULL; + ValNodePtr master_list = NULL, vnp; + Int4 k; + ClickableItemPtr dip, tdip = NULL; + ValNodePtr name_cat[eSuspectNameType_Max]; + ValNodePtr subcategories = NULL; + + if (discrepancy_list == NULL) return; + + feature_list = (ValNodePtr PNTR) MemNew (sizeof (ValNodePtr) * num_suspect_product_terms); + if (feature_list == NULL) return; + + MemSet (&name_cat, 0, sizeof (name_cat)); + + /* initialize array for suspicious product names */ + for (k = 0; k < num_suspect_product_terms; k++) + { + feature_list[k] = NULL; + } + + for (vnp = sep_list; vnp != NULL; vnp = vnp->next) + { + VisitGenProdSetFeatures (vnp->data.ptrvalue, feature_list, FindSuspectProductNamesCallback); + } + + for (k = 0; k < num_suspect_product_terms; k++) + { + if (feature_list[k] != NULL) + { + if (suspect_product_terms[k].search_func == EndsWithPattern) + { + dip = SuspectPhraseEnd (ClickableItemTypeForNameCat(suspect_product_terms[k].fix_type), suspect_product_terms[k].pattern, "product name", feature_list[k]); + } + else if (suspect_product_terms[k].search_func == StartsWithPattern) + { + dip = SuspectPhraseStart (ClickableItemTypeForNameCat(suspect_product_terms[k].fix_type), suspect_product_terms[k].pattern, "product name", feature_list[k]); + } + else + { + dip = SuspectPhrase (ClickableItemTypeForNameCat(suspect_product_terms[k].fix_type), suspect_product_terms[k].pattern, "product name", feature_list[k]); + } + if (dip != NULL) { - if (suspect_product_terms[k].search_func == EndsWithPattern) { - func_name = "Ends with"; - } else if (suspect_product_terms[k].search_func == StartsWithPattern) { - func_name = "Starts with"; + ValNodeAddPointer (&name_cat[suspect_product_terms[k].fix_type], 0, dip); + } + ValNodeLinkCopy (&master_list, feature_list[k]); + } + } + if (master_list != NULL) + { + for (k = 0; k < eSuspectNameType_Max; k++) { + if (name_cat[k] != NULL) { + tdip = (ClickableItemPtr) MemNew (sizeof (ClickableItemData)); + MemSet (tdip, 0, sizeof (ClickableItemData)); + tdip->description = StringSave (suspect_name_category_names[k]); + tdip->item_list = ItemListFromSubcategories (name_cat[k]); + tdip->clickable_item_type = ClickableItemTypeForNameCat(suspect_product_terms[k].fix_type); + tdip->subcategories = name_cat[k]; + tdip->expanded = TRUE; + ValNodeAddPointer (&subcategories, 0, tdip); + } + } + dip = SuspectPhraseEx (DISC_SUSPECT_PRODUCT_NAME, "suspect phrase or characters", FALSE, "product_name", master_list); + if (dip != NULL) + { + dip->subcategories = subcategories; + dip->expanded = TRUE; + ValNodeAddPointer (discrepancy_list, 0, dip); + } + } + + MemFree (feature_list); +} + + +static SuspectRuleSetPtr s_SuspectProductRuleList = NULL; + +extern void FindSuspectProductNames (ValNodePtr PNTR discrepancy_list, ValNodePtr sep_list) +{ + Char rule_file[PATH_MAX]; + AsnIoPtr aip; + + + if (s_SuspectProductRuleList == NULL) + { + if (GetAppParam ("SEQUINCUSTOM", "SETTINGS", "PRODUCT_RULES_LIST", NULL, rule_file, sizeof (rule_file) - 1) + || GetAppParam ("SEQUIN", "SETTINGS", "PRODUCT_RULES_LIST", NULL, rule_file, sizeof (rule_file) - 1)) + { + if ((aip = AsnIoOpen (rule_file, "r")) == NULL) { + Message (MSG_ERROR, "Unable to read %s", rule_file); + } else { + if ((s_SuspectProductRuleList = SuspectRuleSetAsnRead (aip, NULL)) == NULL) { + Message (MSG_ERROR, "Unable to read suspect product rules from %s", rule_file); + } + AsnIoClose (aip); + } + } + } + if (s_SuspectProductRuleList == NULL) + { + FindSuspectProductNamesWithStaticList(discrepancy_list, sep_list); + } + else + { + FindSuspectProductNamesWithRules(discrepancy_list, sep_list, s_SuspectProductRuleList); + } +} + + +NLM_EXTERN Boolean IsProductNameOk (CharPtr product_name) +{ + Int4 k; + Boolean rval = TRUE; + + for (k = 0; k < num_suspect_product_terms && rval; k++) + { + if (suspect_product_terms[k].search_func != NULL + && (suspect_product_terms[k].search_func) (suspect_product_terms[k].pattern, product_name)) + { + rval = FALSE; + } + } + return rval; +} + + +NLM_EXTERN Boolean ReportProductNameProblems (CharPtr product_name, FILE *output_file, CharPtr prefix) +{ + Int4 k; + Boolean any_problems = FALSE; + CharPtr func_name; + + for (k = 0; k < num_suspect_product_terms; k++) + { + if (suspect_product_terms[k].search_func != NULL + && (suspect_product_terms[k].search_func) (suspect_product_terms[k].pattern, product_name)) + { + if (suspect_product_terms[k].search_func == EndsWithPattern) { + func_name = "Ends with"; + } else if (suspect_product_terms[k].search_func == StartsWithPattern) { + func_name = "Starts with"; + } else { + func_name = "Contains"; + } + if (output_file) { + if (prefix == NULL) { + fprintf (output_file, "%s\t%s '%s'\n", product_name, func_name, suspect_product_terms[k].pattern); } else { - func_name = "Contains"; + fprintf (output_file, "%s\t%s\t%s '%s'\n", prefix, product_name, func_name, suspect_product_terms[k].pattern); } - if (output_file) { - fprintf (output_file, "%s\t%s '%s'\n", line, func_name, suspect_product_terms[k].pattern); + } else { + if (prefix == NULL) { + printf ("%s\t%s '%s'\n", product_name, func_name, suspect_product_terms[k].pattern); } else { - printf ("%s\t%s '%s'\n", line, func_name, suspect_product_terms[k].pattern); + printf ("%s\t%s\t%s '%s'\n", prefix, product_name, func_name, suspect_product_terms[k].pattern); } } + any_problems = TRUE; } - - line = MemFree (line); - line = AbstractReadFunction (&rbd); } + return any_problems; } @@ -11840,7 +12737,7 @@ extern void FindSuspectPhrases (ValNodePtr PNTR discrepancy_list, ValNodePtr sep if (subcat != NULL) { - dip = SuspectPhrase (DISC_SUSPECT_PRODUCT_NAME, "suspect phrases", "cds comments or protein description", ItemListFromSubcategories (subcat)); + dip = SuspectPhraseEx (DISC_SUSPECT_PRODUCT_NAME, "suspect phrases", FALSE, "cds comments or protein description", ItemListFromSubcategories (subcat)); if (dip != NULL) { dip->subcategories = subcat; @@ -11961,7 +12858,7 @@ static void FindSuspiciousPhraseInNoteText (ValNodePtr PNTR discrepancy_list, Va if (subcat != NULL) { - dip = SuspectPhrase (DISC_SUSPICIOUS_NOTE_TEXT, "suspicious phrases", "note text", ItemListFromSubcategories (subcat)); + dip = SuspectPhraseEx (DISC_SUSPICIOUS_NOTE_TEXT, "suspicious phrases", FALSE, "note text", ItemListFromSubcategories (subcat)); if (dip != NULL) { dip->subcategories = subcat; @@ -12696,7 +13593,7 @@ static void FindRNAsWithoutProductsCallback (SeqFeatPtr sfp, Pointer data) } ff = FeatureFieldNew (); - ff->type = Feature_type_any; + ff->type = Macro_feature_type_any; ValNodeAddInt (&ff->field, FeatQualChoice_legal_qual, Feat_qual_legal_product); field.choice = FieldType_feature_field; field.data.ptrvalue = ff; @@ -13237,7 +14134,7 @@ static void PercentNDiscrepancy (BioseqPtr bsp, Pointer userdata) } pct = PercentNInBioseq (bsp, FALSE); - if (pct > 10.0) + if (pct > 5.0) { ValNodeAddPointer ((ValNodePtr PNTR)userdata, OBJ_BIOSEQ, bsp); } @@ -13655,9 +14552,7 @@ static void AddMissingViralQualsDiscrepancies (BioSourcePtr biop, Uint1 choice, Boolean has_country = FALSE; Boolean has_specific_host = FALSE; - if (biop == NULL || biop->org == NULL || biop->org->orgname == NULL - || StringSearch (biop->org->orgname->lineage, "Viruses") == NULL - || q == NULL) { + if (!IsViralBioSource(biop) || q == NULL) { return; } @@ -14038,10 +14933,16 @@ static ClickableItemPtr FindMultipleSourceQuals (ValNodePtr qual, ValNodePtr ite CharPtr str1, str2, qualname, fmt; CharPtr has_multi_fmt = "%%d sources have multiple %s qualifiers"; ValNodePtr has_multi = NULL; + ValNodePtr src_choice; if (qual == NULL || item_list == NULL) { return NULL; } + if (qual->choice == FieldType_source_qual + && (src_choice = qual->data.ptrvalue) != NULL + && src_choice->choice != SourceQualChoice_textqual) { + return NULL; + } scp = StringConstraintNew (); scp->not_present = TRUE; @@ -14427,6 +15328,7 @@ static ValNodePtr RunBioSourceTest (SeqEntryPtr sep, BioSourceTestFunc func) static Boolean HasAmplifiedWithSpeciesSpecificPrimerNote (BioSourcePtr biop) { SubSourcePtr ssp; + OrgModPtr mod; Boolean rval = FALSE; if (biop == NULL) { @@ -14438,6 +15340,14 @@ static Boolean HasAmplifiedWithSpeciesSpecificPrimerNote (BioSourcePtr biop) rval = TRUE; } } + if (!rval && biop->org != NULL && biop->org->orgname != NULL) { + for (mod = biop->org->orgname->mod; mod != NULL && !rval; mod = mod->next) { + if (mod->subtype == ORGMOD_other + && StringCmp (mod->subname, "amplified with species-specific primers") == 0) { + rval = TRUE; + } + } + } return rval; } @@ -14500,6 +15410,98 @@ static void FindRequiredClones (ValNodePtr PNTR discrepancy_list, ValNodePtr sep } +static Boolean IsMissingRequiredStrain (BioSourcePtr biop) +{ + OrgModPtr mod; + + if (biop == NULL || !IsBacterialBioSource(biop) + || biop->org == NULL || biop->org->orgname == NULL) { + return FALSE; + } + for (mod = biop->org->orgname->mod; mod != NULL; mod = mod->next) { + if (mod->subtype == ORGMOD_strain) { + return FALSE; + } + } + return TRUE; +} + + +static void FindRequiredStrains (ValNodePtr PNTR discrepancy_list, ValNodePtr sep_list) +{ + ValNodePtr vnp, item_list = NULL; + + for (vnp = sep_list; vnp != NULL; vnp = vnp->next) { + ValNodeLink (&item_list, RunBioSourceTest (vnp->data.ptrvalue, IsMissingRequiredStrain)); + } + if (item_list != NULL) { + ValNodeAddPointer (discrepancy_list, 0, NewClickableItem (DISC_REQUIRED_STRAIN, "%d biosources are missing required strain value", item_list)); + } +} + + +static Boolean BacterialTaxShouldEndWithStrain (BioSourcePtr biop) +{ + OrgModPtr mod; + Int4 tax_len, len; + + if (biop == NULL || !IsBacterialBioSource(biop) + || biop->org == NULL || biop->org->orgname == NULL) { + return FALSE; + } + tax_len = StringLen (biop->org->taxname); + for (mod = biop->org->orgname->mod; mod != NULL; mod = mod->next) { + if (mod->subtype == ORGMOD_strain) { + len = StringLen (mod->subname); + if (len > tax_len || StringCmp (biop->org->taxname + tax_len - len, mod->subname) != 0) { + return TRUE; + } + } + } + return FALSE; +} + + +static void FindBacterialTaxStrainMismatch (ValNodePtr PNTR discrepancy_list, ValNodePtr sep_list) +{ + ValNodePtr vnp, item_list = NULL; + + for (vnp = sep_list; vnp != NULL; vnp = vnp->next) { + ValNodeLink (&item_list, RunBioSourceTest (vnp->data.ptrvalue, BacterialTaxShouldEndWithStrain)); + } + if (item_list != NULL) { + ValNodeAddPointer (discrepancy_list, 0, NewClickableItem (DISC_BACTERIAL_TAX_STRAIN_MISMATCH, "%d biosources have tax name/strain mismatch", item_list)); + } +} + + +static Boolean SpNotUncultured (BioSourcePtr biop) +{ + Int4 len; + + if (biop == NULL || biop->org == NULL || (len = StringLen(biop->org->taxname)) < 4 + || StringCmp (biop->org->taxname + len - 4, " sp.") != 0 + || StringNICmp (biop->org->taxname, "uncultured ", 11) == 0) { + return FALSE; + } else { + return TRUE; + } +} + + +static void FindSpNotUncultured (ValNodePtr PNTR discrepancy_list, ValNodePtr sep_list) +{ + ValNodePtr vnp, item_list = NULL; + + for (vnp = sep_list; vnp != NULL; vnp = vnp->next) { + ValNodeLink (&item_list, RunBioSourceTest (vnp->data.ptrvalue, SpNotUncultured)); + } + if (item_list != NULL) { + ValNodeAddPointer (discrepancy_list, 0, NewClickableItem (TEST_SP_NOT_UNCULTURED, "%d biosources have taxnames that end with ' sp.' but do not start with 'uncultured'", item_list)); + } +} + + static void RetroviridaeDNACallback (BioseqPtr bsp, Pointer data) { SeqMgrDescContext context; @@ -14511,9 +15513,8 @@ static void RetroviridaeDNACallback (BioseqPtr bsp, Pointer data) } sdp = SeqMgrGetNextDescriptor (bsp, NULL, Seq_descr_source, &context); if (sdp == NULL || (biop = sdp->data.ptrvalue) == NULL - || biop->org == NULL || biop->org->orgname == NULL || biop->genome == GENOME_proviral - || StringSearch (biop->org->orgname->lineage, "Retroviridae") == NULL) { + || !HasLineage(biop, "Retroviridae")) { return; } else { ValNodeAddPointer ((ValNodePtr PNTR) data, OBJ_SEQDESC, sdp); @@ -15647,7 +16648,7 @@ static void ChangeMoltypeToGenomicDNA (ValNodePtr item_list, Pointer data, LogIn const CharPtr kmRNAVariant = ", transcript variant "; const CharPtr kCDSVariant = ", isoform "; -static Boolean ProductsMatchForRefSeq (CharPtr cds_str, CharPtr mrna_str) +NLM_EXTERN Boolean ProductsMatchForRefSeq (CharPtr cds_str, CharPtr mrna_str) { CharPtr join_mrna, join_cds; Int4 len; @@ -15680,35 +16681,124 @@ static Boolean ProductsMatchForRefSeq (CharPtr cds_str, CharPtr mrna_str) } -static void ReportCDSWithoutmRNACallback (BioseqPtr bsp, Pointer data) +NLM_EXTERN SeqFeatPtr GetmRNAforCDS (SeqFeatPtr cds) { - SeqMgrFeatContext fcontext, mcontext; - SeqMgrDescContext dcontext; - SeqFeatPtr sfp, mRNA; - SeqDescrPtr sdp; - MolInfoPtr mip; - CharPtr feat_product, mrna_product; - ValNode field; - FeatureFieldPtr ff; + SeqFeatPtr mrna = NULL; + SeqFeatXrefPtr xref; + SeqMgrFeatContext mcontext; - if (bsp == NULL || bsp->mol != Seq_mol_dna || data == NULL) { - return; + /* first, check for mRNA identified by feature xref */ + for (xref = cds->xref; xref != NULL && mrna == NULL; xref = xref->next) { + if (xref->id.choice != 0) { + mrna = SeqMgrGetFeatureByFeatID (cds->idx.entityID, NULL, NULL, xref, NULL); + if (mrna != NULL && mrna->idx.subtype != FEATDEF_mRNA) { + mrna = NULL; + } + } } - if (!IsEukaryotic (bsp)) { - return; - } - sdp = SeqMgrGetNextDescriptor (bsp, NULL, Seq_descr_molinfo, &dcontext); - if (sdp == NULL || sdp->data.ptrvalue == NULL) { - return; + /* try by location if not by xref */ + if (mrna == NULL) { + mrna = SeqMgrGetLocationSupersetmRNA (cds->location, &mcontext); + if (mrna == NULL) { + mrna = SeqMgrGetOverlappingmRNA (cds->location, &mcontext); + } } - mip = (MolInfoPtr) sdp->data.ptrvalue; - if (mip->biomol != MOLECULE_TYPE_GENOMIC) { + return mrna; +} + +typedef struct underlyingfeat { + SeqFeatPtr orig_feat; + ValNodePtr matching_features; +} UnderlyingFeatData, PNTR UnderlyingFeatPtr; + +static Boolean LIBCALLBACK FindUnderlyingCDS ( + SeqFeatPtr sfp, + SeqMgrFeatContextPtr context +) + +{ + UnderlyingFeatPtr uf; + + if (sfp == NULL || context == NULL) return TRUE; + uf = context->userdata; + if (uf == NULL) return TRUE; + + if (TestFeatOverlap(uf->orig_feat, sfp, CHECK_INTERVALS) >= 0) { + ValNodeAddPointer (&(uf->matching_features), OBJ_SEQFEAT, sfp); + } + + return TRUE; +} + + +NLM_EXTERN SeqFeatPtr GetCDSformRNA (SeqFeatPtr mrna) +{ + SeqFeatPtr cds = NULL; + SeqFeatXrefPtr xref; + Int2 count; + UnderlyingFeatData uf; + + /* first, check for cds identified by feature xref */ + for (xref = mrna->xref; xref != NULL && cds == NULL; xref = xref->next) { + if (xref->id.choice != 0) { + cds = SeqMgrGetFeatureByFeatID (mrna->idx.entityID, NULL, NULL, xref, NULL); + if (cds != NULL && cds->idx.subtype != FEATDEF_CDS) { + cds = NULL; + } + } + } + + /* try by location if not by xref */ + if (cds == NULL) { + MemSet (&uf, 0, sizeof (UnderlyingFeatData)); + uf.orig_feat = mrna; + count = SeqMgrGetAllOverlappingFeatures (mrna->location, FEATDEF_CDS, NULL, 0, + SIMPLE_OVERLAP, &uf, FindUnderlyingCDS); + if (uf.matching_features != NULL) { + cds = uf.matching_features->data.ptrvalue; + uf.matching_features = ValNodeFree (uf.matching_features); + } + } + return cds; +} + + +static void ReportCDSWithoutmRNACallback (BioseqPtr bsp, Pointer data) +{ + SeqMgrFeatContext fcontext; + SeqMgrDescContext dcontext; + SeqFeatPtr sfp, mRNA; + SeqDescrPtr sdp; + MolInfoPtr mip; + CharPtr feat_product, mrna_product; + ValNode field; + FeatureFieldPtr ff; + BioSourcePtr biop; + + if (bsp == NULL || bsp->mol != Seq_mol_dna || data == NULL) { + return; + } + + if (!IsEukaryotic (bsp)) { + return; + } + biop = GetBiopForBsp(bsp); + if (biop != NULL && IsLocationOrganelle(biop->genome)) { + return; + } + + sdp = SeqMgrGetNextDescriptor (bsp, NULL, Seq_descr_molinfo, &dcontext); + if (sdp == NULL || sdp->data.ptrvalue == NULL) { + return; + } + mip = (MolInfoPtr) sdp->data.ptrvalue; + if (mip->biomol != MOLECULE_TYPE_GENOMIC) { return; } ff = FeatureFieldNew (); - ff->type = Feature_type_any; + ff->type = Macro_feature_type_any; ValNodeAddInt (&(ff->field), FeatQualChoice_legal_qual, Feat_qual_legal_product); field.choice = FieldType_feature_field; field.data.ptrvalue = ff; @@ -15720,10 +16810,8 @@ static void ReportCDSWithoutmRNACallback (BioseqPtr bsp, Pointer data) if (IsPseudo (sfp)) { continue; } - mRNA = SeqMgrGetLocationSupersetmRNA (sfp->location, &mcontext); - if (mRNA == NULL) { - mRNA = SeqMgrGetOverlappingmRNA (sfp->location, &mcontext); - } + + mRNA = GetmRNAforCDS(sfp); if (mRNA == NULL) { ValNodeAddPointer ((ValNodePtr PNTR) data, OBJ_SEQFEAT, sfp); @@ -17497,9 +18585,17 @@ static CharPtr flatfile_find_list_oncaller[] = { static CharPtr flatfile_find_list_oncaller_wholeword[] = { - "chian", + "caputre", "casette", + "chian", + "cytochome", "diveristy", + "genone", + "muesum", + "musuem", + "nuclear shutting", + "reserach", + "transcirption", "unversity", "varent", NULL @@ -17678,7 +18774,7 @@ static void FindTextInCDSProduct (ValNodePtr PNTR discrepancy_list, ValNodePtr s if (master_list != NULL) { - dip = SuspectPhrase (DISC_CDS_PRODUCT_FIND, "suspect phrase or characters", "coding region product", master_list); + dip = SuspectPhraseEx (DISC_CDS_PRODUCT_FIND, "suspect phrase or characters", FALSE, "coding region product", master_list); if (dip != NULL) { dip->subcategories = subcategories; @@ -18370,6 +19466,7 @@ NLM_EXTERN void RemoveExonsOnMrna (ValNodePtr item_list, Pointer data, LogInfoPt ObjMgrSetDirtyFlag (vnp->data.intvalue, TRUE); ObjMgrSendMsg (OM_MSG_UPDATE, vnp->data.intvalue, 0, 0); } + ValNodeFree (entityIDList); } @@ -18627,6 +19724,24 @@ static Boolean IsNameCapitalizationOk (CharPtr str) return rval; } +static Boolean IsAuthorInitialsCapitalizationOk (CharPtr init) +{ + CharPtr cp; + + if (StringHasNoText (init)) { + return TRUE; + } + + cp = init; + while (*cp != 0) { + if (isalpha (*cp) && !isupper(*cp)) { + return FALSE; + } + cp++; + } + return TRUE; +} + static void CheckAuthCapsAuthCallback (NameStdPtr nsp, Pointer userdata) { @@ -18642,7 +19757,7 @@ static void CheckAuthCapsAuthCallback (NameStdPtr nsp, Pointer userdata) } else if(!IsNameCapitalizationOk (nsp->names[1])) { /* first name bad */ *pIsBad = TRUE; - } else if(!IsNameCapitalizationOk (nsp->names[4])) { + } else if(!IsAuthorInitialsCapitalizationOk (nsp->names[4])) { /* initials bad */ *pIsBad = TRUE; } @@ -19959,9 +21074,7 @@ static void FindBacterialNonExtendablePartialsCallback (BioseqPtr bsp, Pointer u /* only perform test if associated organism cannot be identified as eukaryote */ sdp = SeqMgrGetNextDescriptor (bsp, NULL, Seq_descr_source, &dcontext); - if (sdp == NULL || (biop = sdp->data.ptrvalue) == NULL || biop->org == NULL - || biop->org->orgname == NULL - || StringISearch (biop->org->orgname->lineage, "Eukaryota") == NULL) { + if (sdp == NULL || (biop = sdp->data.ptrvalue) == NULL || !IsEukaryoticBioSource(biop)) { for (sfp = SeqMgrGetNextFeature (bsp, NULL, SEQFEAT_CDREGION, 0, &fcontext); sfp != NULL; sfp = SeqMgrGetNextFeature (bsp, sfp, SEQFEAT_CDREGION, 0, &fcontext)) { @@ -20040,9 +21153,7 @@ static void FindBacterialNonExtendablePartialsWithExceptionsCallback (BioseqPtr /* only perform test if associated organism cannot be identified as eukaryote */ sdp = SeqMgrGetNextDescriptor (bsp, NULL, Seq_descr_source, &dcontext); - if (sdp == NULL || (biop = sdp->data.ptrvalue) == NULL || biop->org == NULL - || biop->org->orgname == NULL - || StringISearch (biop->org->orgname->lineage, "Eukaryota") == NULL) { + if (sdp == NULL || (biop = sdp->data.ptrvalue) == NULL || !IsEukaryoticBioSource(biop)) { for (sfp = SeqMgrGetNextFeature (bsp, NULL, SEQFEAT_CDREGION, 0, &fcontext); sfp != NULL; sfp = SeqMgrGetNextFeature (bsp, sfp, SEQFEAT_CDREGION, 0, &fcontext)) { @@ -20215,13 +21326,76 @@ static void FindSuspectrRNAProductsCallback (SeqFeatPtr sfp, Pointer data) } +static StringConstraintPtr MakeSimpleSearchConstraint (CharPtr search, Boolean whole_word) +{ + StringConstraintPtr scp; + scp = StringConstraintNew(); + scp->match_text = StringSave (search); + scp->whole_word = whole_word; + return scp; +} + + +static SuspectRulePtr MakeSimpleSearchRule (CharPtr search, Boolean whole_word) +{ + SuspectRulePtr rule; + + rule = SuspectRuleNew(); + rule->find = ValNodeNew (NULL); + rule->find->choice = SearchFunc_string_constraint; + rule->find->data.ptrvalue = MakeSimpleSearchConstraint (search, whole_word); + return rule; +} + + +static SuspectRuleSetPtr MakeSuspectrRNARules (void) +{ + SuspectRuleSetPtr rna_rules = NULL, last_rule = NULL, tmp; + Int4 i; + + for (i = 0; i < num_suspect_rrna_product_names; i++) { + tmp = MakeSimpleSearchRule (suspect_rrna_product_names[i], FALSE); + if (last_rule == NULL) { + rna_rules = tmp; + } else { + last_rule->next = tmp; + } + last_rule = tmp; + } + + tmp = MakeSimpleSearchRule("8S", TRUE); + tmp->except = ValNodeNew (NULL); + tmp->except->choice = SearchFunc_string_constraint; + tmp->except->data.ptrvalue = MakeSimpleSearchConstraint("5.8S", TRUE); + if (last_rule == NULL) { + rna_rules = tmp; + } else { + last_rule->next = tmp; + } + last_rule = tmp; + + return rna_rules; +} + + static void FindSuspectrRNAProducts (ValNodePtr PNTR discrepancy_list, ValNodePtr sep_list) { - CheckForSuspectPhraseByList (discrepancy_list, sep_list, - suspect_rrna_product_names, num_suspect_rrna_product_names, - FindSuspectrRNAProductsCallback, - DISC_SUSPECT_RRNA_PRODUCTS, - "rRNA product name"); + SuspectRuleSetPtr rna_rules; + ValNodePtr subcat; + ClickableItemPtr cip; + + rna_rules = MakeSuspectrRNARules(); + + while (sep_list != NULL) { + subcat = GetSuspectRuleDiscrepancies (sep_list->data.ptrvalue, rna_rules, FEATDEF_rRNA, DISC_SUSPECT_RRNA_PRODUCTS); + if (subcat != NULL) { + cip = SuspectPhraseEx (DISC_SUSPECT_RRNA_PRODUCTS, "suspect phrase", FALSE, "rRNA product name", ItemListFromSubcategories (subcat)); + cip->subcategories = subcat; + ValNodeAddPointer (discrepancy_list, 0, cip); + } + sep_list = sep_list->next; + } + rna_rules = SuspectRuleSetFree (rna_rules); } @@ -20297,7 +21471,7 @@ static Boolean HasMissingBacteriaStrain (BioSourcePtr biop) return FALSE; } - if (StringCmp (biop->org->orgname->div, "BCT") != 0) { + if (!IsBacterialBioSource(biop)) { return FALSE; } @@ -20328,9 +21502,9 @@ static Boolean IsBacterialIsolate (BioSourcePtr biop) Boolean has_bad_isolate = FALSE; if (biop == NULL + || !IsBacterialBioSource(biop) || biop->org == NULL || biop->org->orgname == NULL - || StringISearch (biop->org->orgname->lineage, "Bacteria") == NULL || biop->org->orgname->mod == NULL || HasAmplifiedWithSpeciesSpecificPrimerNote(biop)) { return FALSE; @@ -20339,6 +21513,7 @@ static Boolean IsBacterialIsolate (BioSourcePtr biop) for (mod = biop->org->orgname->mod; mod != NULL && !has_bad_isolate; mod = mod->next) { if (mod->subtype == ORGMOD_isolate && StringNICmp (mod->subname, "DGGE gel band", 13) != 0 + && StringNICmp (mod->subname, "TGGE gel band", 13) != 0 && StringNICmp (mod->subname, "SSCP gel band", 13) != 0) { has_bad_isolate = TRUE; } @@ -20419,38 +21594,31 @@ static void FindMetagenomeSource (ValNodePtr PNTR discrepancy_list, ValNodePtr s } +static void FindBacteriamRNACallback (BioseqPtr bsp, Pointer data) +{ + SeqFeatPtr sfp; + SeqMgrFeatContext context; + + if (bsp == NULL || !BioseqHasLineage(bsp, "Bacteria") || data == NULL) { + return; + } + + sfp = SeqMgrGetNextFeature (bsp, NULL, 0, FEATDEF_mRNA, &context); + if (sfp != NULL) { + ValNodeAddPointer ((ValNodePtr PNTR) data, OBJ_BIOSEQ, bsp); + } +} + static void FindBacteriamRNA (ValNodePtr PNTR discrepancy_list, ValNodePtr sep_list) { - ValNodePtr vnp, item_list = NULL, constraint = NULL, src_list, vnp_s; + ValNodePtr vnp, item_list = NULL; SeqEntryPtr sep; - SourceConstraintPtr src; - SequenceConstraintPtr seq; - - src = SourceConstraintNew (); - src->field1 = ValNodeNew (NULL); - src->field1->choice = SourceQualChoice_textqual; - src->field1->data.intvalue = Source_qual_lineage; - src->constraint = StringConstraintNew (); - src->constraint->match_text = StringSave ("Bacteria"); - src->constraint->match_location = String_location_starts; - ValNodeAddPointer (&constraint, ConstraintChoice_source, src); - - seq = SequenceConstraintNew (); - seq->feature = Feature_type_mRNA; - ValNodeAddPointer (&constraint, ConstraintChoice_sequence, seq); for (vnp = sep_list; vnp != NULL; vnp = vnp->next) { sep = (SeqEntryPtr) vnp->data.ptrvalue; - src_list = GetObjectListForFieldType (FieldType_molinfo_field, sep); - for (vnp_s = src_list; vnp_s != NULL; vnp_s = vnp_s->next) { - if (DoesObjectMatchConstraintChoiceSet (vnp_s->choice, vnp_s->data.ptrvalue, constraint)) { - ValNodeAddPointer (&item_list, vnp_s->choice, vnp_s->data.ptrvalue); - } - } - src_list = FreeObjectList (src_list); + VisitBioseqsInSep (sep, &item_list, FindBacteriamRNACallback); } - constraint = ConstraintChoiceSetFree (constraint); if (item_list != NULL) { ValNodeAddPointer (discrepancy_list, 0, NewClickableItem (DISC_BACTERIA_SHOULD_NOT_HAVE_MRNA, "%d bacterial sequences have mRNA features", item_list)); @@ -20641,7 +21809,7 @@ static Boolean IsTrinomialWithoutQualifier (BioSourcePtr biop) } /* ignore viruses */ - if (biop->org->orgname != NULL && StringICmp (biop->org->orgname->div, "VRL") == 0) { + if (IsViralBioSource(biop)) { return FALSE; } @@ -20734,6 +21902,38 @@ static void FindShortrRNAs (ValNodePtr PNTR discrepancy_list, ValNodePtr sep_lis } +static void FindStandardNameCallback (SeqFeatPtr sfp, Pointer data) +{ + GBQualPtr q; + + if (sfp == NULL || data == NULL) { + return; + } + + for (q = sfp->qual; q != NULL; q = q->next) { + if (StringCmp (q->qual, "standard_name") == 0) { + ValNodeAddPointer ((ValNodePtr PNTR) data, OBJ_SEQFEAT, sfp); + return; + } + } +} + + +static void FindStandardName (ValNodePtr PNTR discrepancy_list, ValNodePtr sep_list) +{ + ValNodePtr item_list = NULL, vnp; + + + for (vnp = sep_list; vnp != NULL; vnp = vnp->next) { + VisitFeaturesInSep (vnp->data.ptrvalue, &item_list, FindStandardNameCallback); + } + + if (item_list != NULL) { + ValNodeAddPointer (discrepancy_list, 0, NewClickableItem (ONCALLER_HAS_STANDARD_NAME, "%d features have standard_name qualifier", item_list)); + } +} + + static Boolean DoAuthorityAndTaxnameConflict (BioSourcePtr biop) { OrgModPtr mod; @@ -21408,7 +22608,7 @@ static void FindBadBacterialGeneNamesCallback (BioseqPtr bsp, Pointer data) sdp = SeqMgrGetNextDescriptor (bsp, NULL, Seq_descr_source, &dcontext); if (sdp == NULL || (biop = sdp->data.ptrvalue) == NULL || biop->org == NULL || biop->org->orgname == NULL - || StringISearch (biop->org->orgname->lineage, "Bacteria") == NULL) { + || !IsBacterialBioSource (biop)) { return; } @@ -21423,17 +22623,154 @@ static void FindBadBacterialGeneNamesCallback (BioseqPtr bsp, Pointer data) } -static void FindBadBacterialGeneNames (ValNodePtr PNTR discrepancy_list, ValNodePtr sep_list) +typedef Boolean (*BadGeneNameTestFunc) PROTO ((CharPtr, CharPtr, SeqFeatPtr)); + +typedef struct badgenename { + CharPtr pattern; + BadGeneNameTestFunc func; +} BadGeneNameData, PNTR BadGeneNamePtr; + +static Boolean GeneNameLongerThanTenChars (CharPtr pattern, CharPtr search, SeqFeatPtr sfp) +{ + if (StringLen (search) > 10) { + return TRUE; + } else { + return FALSE; + } +} + +static Boolean GeneNameContainsPhrase (CharPtr pattern, CharPtr search, SeqFeatPtr sfp) +{ + if (StringISearch (search, pattern) != NULL) { + return TRUE; + } else { + return FALSE; + } +} + + +static Boolean GeneNameHas4Numbers (CharPtr pattern, CharPtr search, SeqFeatPtr sfp) +{ + CharPtr cp; + Int4 num_digits = 0; + + if (search == NULL) { + return FALSE; + } + + for (cp = search; *cp != 0 && num_digits < 4; cp++) { + if (isdigit (*cp)) { + ++num_digits; + } else { + num_digits = 0; + } + } + if (num_digits >= 4) { + return TRUE; + } else { + return FALSE; + } +} + + +static BadGeneNameData bad_gene_rules[] = { + { "more than 10 characters", GeneNameLongerThanTenChars }, + { "putative", GeneNameContainsPhrase }, + { "fragment", GeneNameContainsPhrase }, + { "gene", GeneNameContainsPhrase }, + { "orf", GeneNameContainsPhrase }, + { "like", GeneNameContainsPhrase }, + { "4 or more consecutive numbers", GeneNameHas4Numbers } +}; + + +static const Int4 kNumBadGeneRules = sizeof (bad_gene_rules) / sizeof (BadGeneNameData); + +static void FindBadGeneNameCallback (SeqFeatPtr sfp, Pointer data) +{ + ValNodePtr PNTR feature_lists; + GeneRefPtr grp; + Int4 k; + + if (sfp == NULL || sfp->data.choice != SEQFEAT_GENE + || (grp = (GeneRefPtr) sfp->data.value.ptrvalue) == NULL + || StringHasNoText (grp->locus) + || (feature_lists = (ValNodePtr PNTR) data) == NULL) { + return; + } + + for (k = 0; k < kNumBadGeneRules; k++) { + if (bad_gene_rules[k].func(bad_gene_rules[k].pattern, grp->locus, sfp)) { + ValNodeAddPointer (feature_lists + k, OBJ_SEQFEAT, sfp); + } + } +} + + +static void FindBadGeneNames (ValNodePtr PNTR discrepancy_list, ValNodePtr sep_list) { - ValNodePtr feature_list = NULL, vnp; - CharPtr fmt = "%d genes do not start with lowercase letters"; + ValNodePtr PNTR feature_lists, vnp; + ValNodePtr bad_bacterial_genes = NULL; + ValNodePtr subcat = NULL; + CharPtr fmt = "%d bacterial genes do not start with lowercase letters"; + Int4 k; + ClickableItemPtr dip; + feature_lists = (ValNodePtr PNTR) MemNew (sizeof (ValNodePtr) * kNumBadGeneRules); + MemSet (feature_lists, 0, sizeof (ValNodePtr) * kNumBadGeneRules); for (vnp = sep_list; vnp != NULL; vnp = vnp->next) { - VisitBioseqsInSep (vnp->data.ptrvalue, &feature_list, FindBadBacterialGeneNamesCallback); + VisitFeaturesInSep (vnp->data.ptrvalue, feature_lists, FindBadGeneNameCallback); + VisitBioseqsInSep (vnp->data.ptrvalue, &bad_bacterial_genes, FindBadBacterialGeneNamesCallback); } - if (feature_list != NULL) { - ValNodeAddPointer ((ValNodePtr PNTR) discrepancy_list, 0, NewClickableItem (DISC_BAD_BACTERIAL_GENE_NAME, fmt, feature_list)); + if (bad_bacterial_genes != NULL) { + ValNodeAddPointer (&subcat, 0, NewClickableItem (DISC_BAD_BACTERIAL_GENE_NAME, fmt, bad_bacterial_genes)); + } + + for (k = 0; k < kNumBadGeneRules; k++) { + if (feature_lists[k] != NULL) { + ValNodeAddPointer (&subcat, 0, SuspectPhraseEx(TEST_BAD_GENE_NAME, bad_gene_rules[k].pattern, FALSE, "gene", feature_lists[k])); + } + } + feature_lists = MemFree (feature_lists); + + if (subcat == NULL) { + /* do nothing */ + } else if (subcat->next == NULL) { + ValNodeLink (discrepancy_list, subcat); + } else { + dip = SuspectPhraseEx (TEST_BAD_GENE_NAME, "suspect phrase or characters", FALSE, "gene", ItemListFromSubcategories (subcat)); + if (dip != NULL) + { + dip->subcategories = subcat; + ValNodeAddPointer (discrepancy_list, 0, dip); + } + } +} + + +static void MoveBadGeneNames (ValNodePtr item_list, Pointer data, LogInfoPtr lip) +{ + SeqFeatPtr sfp; + GeneRefPtr grp; + ValNodePtr vnp; + Int4 num = 0; + + for (vnp = item_list; vnp != NULL; vnp = vnp->next) { + if (vnp->choice == OBJ_SEQFEAT && (sfp = (SeqFeatPtr) vnp->data.ptrvalue) != NULL + && sfp->data.choice == SEQFEAT_GENE + && (grp = (GeneRefPtr) sfp->data.value.ptrvalue) != NULL + && !StringHasNoText (grp->locus)) { + SetStringValue (&(sfp->comment), grp->locus, ExistingTextOption_append_semi); + grp->locus = MemFree (grp->locus); + num++; + } + } + if (num > 0 && lip != NULL) { + lip->data_in_log = TRUE; + if (lip->fp != NULL) { + fprintf (lip->fp, "Moved %d bad gene names to gene comment.\n", num); + } } } @@ -21463,6 +22800,7 @@ static BioseqSetClassNameClassValData bioseqsetclassname_classval[] = { {"Eco-set", BioseqseqSet_class_eco_set}, {"Gen-prod-set", BioseqseqSet_class_gen_prod_set}, {"WGS-set", BioseqseqSet_class_wgs_set}, + {"Small-genome-set", BioseqseqSet_class_small_genome_set}, {"Other", BioseqseqSet_class_other}}; #define NUM_bioseqsetclassname_classval sizeof (bioseqsetclassname_classval) / sizeof (BioseqSetClassNameClassValData) @@ -22118,14 +23456,1302 @@ static void FindProjectIdSequences (ValNodePtr PNTR discrepancy_list, ValNodePtr } -static void -RemoveUnwantedDiscrepancyItems -(ValNodePtr PNTR discrepancy_list, - DiscrepancyConfigPtr dcp) +static void FindSeqWithStructuredComments (BioseqPtr bsp, Pointer data) { - ValNodePtr vnp, prev = NULL, vnp_next; - ClickableItemPtr dip; - + SeqDescrPtr sdp; + SeqMgrDescContext context; + Uint1 num_present = 0; + UserObjectPtr uop; + + if (bsp == NULL || ISA_aa (bsp->mol) || data == NULL) { + return; + } + + for (sdp = SeqMgrGetNextDescriptor (bsp, NULL, Seq_descr_user, &context); + sdp != NULL; + sdp = SeqMgrGetNextDescriptor (bsp, sdp, Seq_descr_user, &context)) { + if ((uop = (UserObjectPtr) sdp->data.ptrvalue) != NULL + && uop->type != NULL + && StringICmp (uop->type->str, "StructuredComment") == 0) { + num_present++; + } + } + ValNodeAddPointer ((ValNodePtr PNTR) data, num_present, bsp); +} + + +static void FindMissingStructuredComments (ValNodePtr PNTR discrepancy_list, ValNodePtr sep_list) +{ + ValNodePtr count_list = NULL; + ValNodePtr tmp_list = NULL; + ValNodePtr vnp; + CharPtr fmt; + CharPtr num_fmt = "%%d sequences have %d structured comments"; + ClickableItemPtr cip; + ValNodePtr subcat = NULL; + Uint1 orig_choice; + + for (vnp = sep_list; vnp != NULL; vnp = vnp->next) { + VisitBioseqsInSep (vnp->data.ptrvalue, &count_list, FindSeqWithStructuredComments); + } + + if (count_list == NULL) { + return; + } + + tmp_list = ValNodeExtractList (&count_list, 0); + if (tmp_list == NULL) { + /* no sequences have 0 */ + tmp_list = ValNodeExtractList (&count_list, count_list->choice); + } + if (count_list == NULL) { + /* all sequences have same number of structured comments, no report */ + tmp_list = ValNodeFree (tmp_list); + } else { + while (tmp_list != NULL) { + orig_choice = tmp_list->choice; + for (vnp = tmp_list; vnp != NULL; vnp = vnp->next) { + vnp->choice = OBJ_BIOSEQ; + } + fmt = (CharPtr) MemNew (sizeof (Char) * (StringLen (num_fmt) + 15)); + sprintf (fmt, num_fmt, orig_choice); + cip = NewClickableItem (ONCALLER_MISSING_STRUCTURED_COMMENTS, fmt, tmp_list); + fmt = MemFree (fmt); + ValNodeAddPointer (&subcat, 0, cip); + if (count_list == NULL) { + tmp_list = NULL; + } else { + tmp_list = ValNodeExtractList (&count_list, count_list->choice); + } + } + if (subcat->next == NULL) { + subcat = FreeClickableList (subcat); + } else { + cip = (ClickableItemPtr) MemNew (sizeof (ClickableItemData)); + MemSet (cip, 0, sizeof (ClickableItemData)); + cip->clickable_item_type = ONCALLER_MISSING_STRUCTURED_COMMENTS; + cip->subcategories = subcat; + cip->description = StringSave ("Sequences have different numbers of structured comments"); + ValNodeAddPointer (discrepancy_list, 0, cip); + } + } +} + + +static void MissingGenomeAssemblyStructuredCommentCallback (BioseqPtr bsp, Pointer data) +{ + SeqDescrPtr sdp; + SeqMgrDescContext dcontext; + Boolean found = FALSE; + UserObjectPtr uop; + UserFieldPtr ufp; + + if (bsp == NULL || ISA_aa (bsp->mol) || data == NULL) { + return; + } + for (sdp = SeqMgrGetNextDescriptor (bsp, NULL, Seq_descr_user, &dcontext); + sdp != NULL && !found; + sdp = SeqMgrGetNextDescriptor (bsp, sdp, Seq_descr_user, &dcontext)) { + if ((uop = (UserObjectPtr) sdp->data.ptrvalue) != NULL + && uop->type != NULL + && StringICmp (uop->type->str, "StructuredComment") == 0) { + for (ufp = uop->data; ufp != NULL && !found; ufp = ufp->next) { + if (StringICmp (ufp->label->str, "StructuredCommentPrefix") == 0) { + if (ufp->choice == 1 && StringICmp (ufp->data.ptrvalue, "##Genome-Assembly-Data-START##") == 0) { + found = TRUE; + } + break; + } + } + } + } + if (!found) { + ValNodeAddPointer ((ValNodePtr PNTR) data, OBJ_BIOSEQ, bsp); + } +} + + +static void FindMissingGenomeAssemblyStructuredComments (ValNodePtr PNTR discrepancy_list, ValNodePtr sep_list) +{ + ValNodePtr vnp, item_list = NULL; + + for (vnp = sep_list; vnp != NULL; vnp = vnp->next) { + VisitBioseqsInSep (vnp->data.ptrvalue, &item_list, MissingGenomeAssemblyStructuredCommentCallback); + } + if (item_list != NULL) { + ValNodeAddPointer (discrepancy_list, 0, NewClickableItem (MISSING_GENOMEASSEMBLY_COMMENTS, "%d bioseqs are missing GenomeAssembly structured comments", item_list)); + } +} + + +static void FindCDSWithCDDXrefCallback (SeqFeatPtr sfp, Pointer data) +{ + ValNodePtr vnp; + DbtagPtr dbtag; + Boolean has_cdd_xref = FALSE; + + if (sfp == NULL || sfp->data.choice != SEQFEAT_CDREGION || data == NULL) { + return; + } + + for (vnp = sfp->dbxref; vnp != NULL && !has_cdd_xref; vnp = vnp->next) { + if ((dbtag = (DbtagPtr) vnp->data.ptrvalue) != NULL && StringICmp (dbtag->db, "CDD") == 0) { + has_cdd_xref = TRUE; + } + } + + if (has_cdd_xref) { + ValNodeAddPointer ((ValNodePtr PNTR) data, OBJ_SEQFEAT, sfp); + } +} + + +static void FindCDSWithCDDXref (ValNodePtr PNTR discrepancy_list, ValNodePtr sep_list) +{ + ValNodePtr vnp, item_list = NULL; + + for (vnp = sep_list; vnp != NULL; vnp = vnp->next) { + VisitFeaturesInSep (vnp->data.ptrvalue, &item_list, FindCDSWithCDDXrefCallback); + } + if (item_list != NULL) { + ValNodeAddPointer (discrepancy_list, 0, NewClickableItem (TEST_CDS_HAS_CDD_XREF, "%d features have CDD Xrefs", item_list)); + } +} + + +static void LIBCALLBACK CountUnusualNTProc (CharPtr sequence, Pointer userdata) +{ + Int4Ptr p_i; + CharPtr cp; + + if (sequence == NULL || userdata == NULL) return; + p_i = (Int4Ptr) userdata; + + for (cp = sequence; *cp != 0; cp++) + { + if (*cp != 'N' && *cp != 'A' && *cp != 'T' && *cp != 'G' && *cp != 'C') + { + (*p_i) ++; + } + } +} + + +static void FindUnusualNTCallback (BioseqPtr bsp, Pointer data) +{ + Int4 num_bad = 0; + Int4 flags = 0; + + if (bsp == NULL || ISA_aa (bsp->mol) || data == NULL) { + return; + } + + SeqPortStream (bsp, flags, (Pointer) &num_bad, CountUnusualNTProc); + if (num_bad > 0) { + ValNodeAddPointer ((ValNodePtr PNTR) data, OBJ_BIOSEQ, bsp); + } + +} + + +static void FindUnusualNT (ValNodePtr PNTR discrepancy_list, ValNodePtr sep_list) +{ + ValNodePtr vnp, item_list = NULL; + + for (vnp = sep_list; vnp != NULL; vnp = vnp->next) { + VisitBioseqsInSep (vnp->data.ptrvalue, &item_list, FindUnusualNTCallback); + } + if (item_list != NULL) { + ValNodeAddPointer (discrepancy_list, 0, NewClickableItem (TEST_UNUSUAL_NT, "%d sequences contain nucleotides that are not ATCG or N", item_list)); + } +} + + +typedef struct qualityinterval { + Int4 start; + Int4 pos; + Int4 num_ns; + FloatLo min_pct; + Int4 min_length; + Boolean found_interval; +} QualityIntervalData, PNTR QualityIntervalPtr; + + +static void LIBCALLBACK FindLowQualityIntervalProc (CharPtr sequence, Pointer userdata) +{ + QualityIntervalPtr p_i; + CharPtr cp; + Int4 len; + + if (sequence == NULL || userdata == NULL) return; + p_i = (QualityIntervalPtr) userdata; + + for (cp = sequence; *cp != 0; cp++) + { + if (*cp != 'A' && *cp != 'T' && *cp != 'G' && *cp != 'C') { + if (p_i->start == -1) { + /* start new interval if we aren't already in one */ + p_i->start = p_i->pos; + p_i->num_ns = 1; + } else { + /* add to number of ns in this interval */ + p_i->num_ns++; + } + } else { + if (p_i->start > -1) { + /* if we are already in an interval, see if we should continue to be */ + len = p_i->pos - p_i->start; + if ((FloatLo) p_i->num_ns / (FloatLo) len >= p_i->min_pct) { + /* yes */ + } else { + /* no */ + /* is the interval long enough to qualify? */ + if (len >= p_i->min_length) { + p_i->found_interval = TRUE; + } + /* reset for next interval */ + p_i->start = -1; + p_i->num_ns = 0; + } + } + } + p_i->pos ++; + } +} + + +static void FindLowQualityRegionsCallback (BioseqPtr bsp, Pointer data) +{ + QualityIntervalData q; + + Int4 flags = 0; + + if (bsp == NULL || ISA_aa (bsp->mol) || data == NULL) { + return; + } + MemSet (&q, 0, sizeof (QualityIntervalData)); + q.start = -1; + q.min_pct = 0.25; + q.min_length = 30; + + SeqPortStream (bsp, flags, (Pointer) &q, FindLowQualityIntervalProc); + /* check final interval, in case the end of the sequence is low quality */ + if (q.start > -1 && q.pos - q.start >= q.min_length) { + q.found_interval = TRUE; + } + + if (q.found_interval) { + ValNodeAddPointer ((ValNodePtr PNTR) data, OBJ_BIOSEQ, bsp); + } + +} + + +static void FindLowQualityRegions (ValNodePtr PNTR discrepancy_list, ValNodePtr sep_list) +{ + ValNodePtr vnp, item_list = NULL; + + for (vnp = sep_list; vnp != NULL; vnp = vnp->next) { + VisitBioseqsInSep (vnp->data.ptrvalue, &item_list, FindLowQualityRegionsCallback); + } + if (item_list != NULL) { + ValNodeAddPointer (discrepancy_list, 0, NewClickableItem (TEST_LOW_QUALITY_REGION, "%d sequences contains low quality region", item_list)); + } +} + + +NLM_EXTERN Boolean IsLocationOrganelle (Uint1 genome) +{ + if (genome == GENOME_chloroplast + || genome == GENOME_chromoplast + || genome == GENOME_kinetoplast + || genome == GENOME_mitochondrion + || genome == GENOME_cyanelle + || genome == GENOME_nucleomorph + || genome == GENOME_apicoplast + || genome == GENOME_leucoplast + || genome == GENOME_proplastid + || genome == GENOME_hydrogenosome + || genome == GENOME_plastid + || genome == GENOME_chromatophore) { + return TRUE; + } else { + return FALSE; + } +} + +static void FindOrganelleNotGenomicCallback(BioseqPtr bsp, Pointer data) +{ + SeqDescPtr sdp; + SeqMgrDescContext context; + MolInfoPtr mip; + BioSourcePtr biop; + + if (bsp == NULL || ISA_aa(bsp->mol) || data == NULL) { + return; + } + + sdp = SeqMgrGetNextDescriptor (bsp, NULL, Seq_descr_molinfo, &context); + if (sdp == NULL || (mip = (MolInfoPtr) sdp->data.ptrvalue) == NULL) { + return; + } else if ((mip->biomol == MOLECULE_TYPE_GENOMIC || mip->biomol == 0) && bsp->mol == Seq_mol_dna) { + return; + } + sdp = SeqMgrGetNextDescriptor (bsp, NULL, Seq_descr_source, &context); + if (sdp != NULL && (biop = (BioSourcePtr) sdp->data.ptrvalue) != NULL + && IsLocationOrganelle(biop->genome)) { + ValNodeAddPointer ((ValNodePtr PNTR) data, OBJ_SEQDESC, sdp); + } +} + + +static void FindOrganelleNotGenomic (ValNodePtr PNTR discrepancy_list, ValNodePtr sep_list) +{ + ValNodePtr vnp, item_list = NULL; + + for (vnp = sep_list; vnp != NULL; vnp = vnp->next) { + VisitBioseqsInSep (vnp->data.ptrvalue, &item_list, FindOrganelleNotGenomicCallback); + } + if (item_list != NULL) { + ValNodeAddPointer (discrepancy_list, 0, NewClickableItem (TEST_ORGANELLE_NOT_GENOMIC, "%d non-genomic sequences are organelles", item_list)); + } +} + + +static Boolean HasUnculturedNonOrganelleName (CharPtr taxname) +{ + if (StringCmp (taxname, "uncultured organism") == 0 + || StringCmp (taxname, "uncultured microorganism") == 0 + || StringCmp (taxname, "uncultured bacterium") == 0 + || StringCmp (taxname, "uncultured archaeon") == 0) { + return TRUE; + } else { + return FALSE; + } +} + + +static CharPtr kIntergenicSpacerNames[] = { + "trnL-trnF intergenic spacer", + "trnH-psbA intergenic spacer", + "trnS-trnG intergenic spacer", + "trnF-trnL intergenic spacer", + "psbA-trnH intergenic spacer", + "trnG-trnS intergenic spacer", + NULL}; + +static Boolean HasIntergenicSpacerName(CharPtr str) +{ + Int4 i; + Boolean rval = FALSE; + + for (i = 0; kIntergenicSpacerNames[i] != NULL && !rval; i++) { + if (StringISearch (str, kIntergenicSpacerNames[i]) != NULL) { + rval = TRUE; + } + } + return rval; +} + + +static void FindUnwantedSpacersCallback(BioseqPtr bsp, Pointer data) +{ + SeqDescPtr sdp; + SeqMgrDescContext context; + BioSourcePtr biop; + SeqMgrFeatContext fcontext; + SeqFeatPtr sfp; + + if (bsp == NULL || data == NULL) { + return; + } + + sdp = SeqMgrGetNextDescriptor (bsp, NULL, Seq_descr_source, &context); + if (sdp == NULL || (biop = (BioSourcePtr) sdp->data.ptrvalue) == NULL + || biop->genome == GENOME_chloroplast || biop->genome == GENOME_plastid) { + return; + } + /* shouldn't be uncultured non-organelle */ + if (biop != NULL && biop->org != NULL && HasUnculturedNonOrganelleName(biop->org->taxname)) { + return; + } + + for (sfp = SeqMgrGetNextFeature (bsp, NULL, 0, FEATDEF_misc_feature, &fcontext); + sfp != NULL; + sfp = SeqMgrGetNextFeature (bsp, sfp, 0, FEATDEF_misc_feature, &fcontext)) { + if (HasIntergenicSpacerName(sfp->comment)) { + ValNodeAddPointer ((ValNodePtr PNTR) data, OBJ_SEQFEAT, sfp); + } + } +} + + +static void FindUnwantedSpacers (ValNodePtr PNTR discrepancy_list, ValNodePtr sep_list) +{ + ValNodePtr vnp, item_list = NULL; + + for (vnp = sep_list; vnp != NULL; vnp = vnp->next) { + VisitBioseqsInSep (vnp->data.ptrvalue, &item_list, FindUnwantedSpacersCallback); + } + if (item_list != NULL) { + ValNodeAddPointer (discrepancy_list, 0, NewClickableItem (TEST_UNWANTED_SPACER, "%d suspect intergenic spacer notes not organelle", item_list)); + } +} + + +static SuspectRuleSetPtr OrganelleRules = NULL; +static Boolean OrganelleRuleReadAttempted = FALSE; + +static SuspectRuleSetPtr ReadOrganelleRules(void) +{ + AsnIoPtr aip; + Char buf [PATH_MAX]; + SuspectRuleSetPtr rule_list; + + if (! FindPath("ncbi", "ncbi", "data", buf, sizeof (buf))) + { + Message (MSG_POSTERR, "Failed to find organelle product rules"); + return NULL; + } + + StringCat(buf, "organelle_products.prt"); + + aip = AsnIoOpen (buf, "r"); + if (aip == NULL) { + Message (MSG_POSTERR, "Unable to open %s", buf); + return NULL; + } + + rule_list = SuspectRuleSetAsnRead (aip, NULL); + if (rule_list == NULL) { + Message (MSG_POSTERR, "Unable to read organelle product rule list from %s.", buf); + } + + AsnIoClose (aip); + return rule_list; +} + + +typedef struct findorganelleproducts { + SuspectRuleSetPtr rule_list; + ValNodePtr item_list; +} FindOrganelleProductsData, PNTR FindOrganelleProductsPtr; + +static void FindOrganelleProductsCallback(BioseqPtr bsp, Pointer data) +{ + SeqDescPtr sdp; + SeqMgrDescContext context; + BioSourcePtr biop; + SeqMgrFeatContext fcontext, pcontext; + SeqFeatPtr sfp, protsfp; + ProtRefPtr prp; + SuspectRulePtr rule; + FindOrganelleProductsPtr fop; + Boolean match; + BioseqPtr protbsp; + + if (bsp == NULL || (fop = (FindOrganelleProductsPtr)data) == NULL) { + return; + } + + sdp = SeqMgrGetNextDescriptor (bsp, NULL, Seq_descr_source, &context); + if (sdp == NULL || (biop = (BioSourcePtr) sdp->data.ptrvalue) == NULL + || biop->genome == GENOME_mitochondrion + || biop->genome == GENOME_chloroplast + || biop->genome == GENOME_plastid) { + return; + } + + /* source should not be bacterial or viral */ + if (biop != NULL && biop->org != NULL && biop->org->orgname != NULL) { + if (IsBacterialBioSource (biop) || IsViralBioSource(biop)) { + return; + } + } + + /* shouldn't be uncultured non-organelle */ + if (biop != NULL && biop->org != NULL && HasUnculturedNonOrganelleName(biop->org->taxname)) { + return; + } + + /* look for misc_features */ + for (sfp = SeqMgrGetNextFeature (bsp, NULL, 0, FEATDEF_misc_feature, &fcontext); + sfp != NULL; + sfp = SeqMgrGetNextFeature (bsp, sfp, 0, FEATDEF_misc_feature, &fcontext)) { + if (StringNICmp (sfp->comment, "contains ", 9) == 0) { + match = FALSE; + for (rule = fop->rule_list; rule != NULL && !match; rule = rule->next) { + match = DoesStringMatchSuspectRule (sfp->comment, sfp, rule); + } + if (match) { + ValNodeAddPointer (&(fop->item_list), OBJ_SEQFEAT, sfp); + } + } + } + + /* also look for coding regions */ + for (sfp = SeqMgrGetNextFeature (bsp, NULL, 0, FEATDEF_CDS, &fcontext); + sfp != NULL; + sfp = SeqMgrGetNextFeature (bsp, sfp, 0, FEATDEF_CDS, &fcontext)) { + protbsp = BioseqFindFromSeqLoc (sfp->product); + protsfp = SeqMgrGetNextFeature (protbsp, NULL, 0, FEATDEF_PROT, &pcontext); + if (protsfp != NULL && (prp = (ProtRefPtr) protsfp->data.value.ptrvalue) != NULL + && prp->name != NULL) { + match = FALSE; + for (rule = fop->rule_list; rule != NULL && !match; rule = rule->next) { + match = DoesStringMatchSuspectRule (prp->name->data.ptrvalue, sfp, rule); + } + if (match) { + ValNodeAddPointer (&(fop->item_list), OBJ_SEQFEAT, sfp); + } + } + } +} + + +static void FindOrganelleProducts(ValNodePtr PNTR discrepancy_list, ValNodePtr sep_list) +{ + ValNodePtr vnp; + FindOrganelleProductsData fd; + + if (!OrganelleRuleReadAttempted) { + OrganelleRules = ReadOrganelleRules(); + OrganelleRuleReadAttempted = TRUE; + } + if (OrganelleRules == NULL) { + return; + } + + MemSet (&fd, 0, sizeof (FindOrganelleProductsData)); + fd.rule_list = OrganelleRules; + + for (vnp = sep_list; vnp != NULL; vnp = vnp->next) { + VisitBioseqsInSep (vnp->data.ptrvalue, &fd, FindOrganelleProductsCallback); + } + if (fd.item_list != NULL) { + ValNodeAddPointer (discrepancy_list, 0, NewClickableItem (TEST_ORGANELLE_PRODUCTS, "%d suspect products not organelle", fd.item_list)); + } +} + + +static void FindBadMrnaQualCallback (BioseqPtr bsp, Pointer data) +{ + SeqDescPtr sdp; + SeqMgrDescContext context; + BioSourcePtr biop; + SubSourcePtr ssp; + Boolean found = FALSE; + + if (!IsMrnaSequence(bsp) || data == NULL) { + return; + } + sdp = SeqMgrGetNextDescriptor (bsp, NULL, Seq_descr_source, &context); + if (sdp == NULL || (biop = (BioSourcePtr) sdp->data.ptrvalue) == NULL) { + return; + } + + for (ssp = biop->subtype; ssp != NULL && !found; ssp = ssp->next) { + if (ssp->subtype == SUBSRC_germline || ssp->subtype == SUBSRC_rearranged) { + found = TRUE; + } + } + if (found) { + ValNodeAddPointer ((ValNodePtr PNTR) data, OBJ_SEQDESC, sdp); + } +} + + +static void FindBadMrnaQual (ValNodePtr PNTR discrepancy_list, ValNodePtr sep_list) +{ + ValNodePtr vnp, item_list = NULL; + + for (vnp = sep_list; vnp != NULL; vnp = vnp->next) { + VisitBioseqsInSep (vnp->data.ptrvalue, &item_list, FindBadMrnaQualCallback); + } + + if (item_list != NULL) { + ValNodeAddPointer (discrepancy_list, 0, NewClickableItem (TEST_BAD_MRNA_QUAL, "%d mRNA sequences have germline or rearranged qualifier", item_list)); + } +} + + +/* A warning when environmental sample qualifier is present and the organism name + * does not contain 'uncultured' or 'enrichment culture' or 'metagenome' + * and the source does not have note (orgmod or subsrc) + * 'amplified with species-specific primers' + * and the /metagenomic-source qualifier is not used + */ +static Boolean HasUnnecessaryEnvironmental(BioSourcePtr biop) +{ + SubSourcePtr ssp; + OrgModPtr mod; + Boolean found = FALSE; + Boolean has_note = FALSE; + Boolean has_metagenomic = FALSE; + + if (biop == NULL) { + return FALSE; + } + + for (ssp = biop->subtype; ssp != NULL && !has_note && !has_metagenomic; ssp = ssp->next) { + if (ssp->subtype == SUBSRC_environmental_sample) { + found = TRUE; + } else if (ssp->subtype == SUBSRC_other && StringISearch (ssp->name, "amplified with species-specific primers") != NULL) { + has_note = TRUE; + } else if (ssp->subtype == SUBSRC_metagenomic) { + has_metagenomic = TRUE; + } + } + + if (!found || has_note || has_metagenomic) { + return FALSE; + } + if (biop->org != NULL) { + if (StringISearch (biop->org->taxname, "uncultured") != NULL + || StringISearch (biop->org->taxname, "enrichment culture") != NULL + || StringISearch (biop->org->taxname, "metagenome") != NULL + || StringISearch (biop->org->taxname, "environmental sample") != NULL) { + return FALSE; + } + if (biop->org->orgname != NULL) { + for (mod = biop->org->orgname->mod; mod != NULL && !has_note; mod = mod->next) { + if (mod->subtype == ORGMOD_other && StringISearch (mod->subname, "amplified with species-specific primers") != NULL) { + has_note = TRUE; + } + } + if (has_note) { + return FALSE; + } + } + } + return TRUE; +} + + +static void FindUnnecessaryEnvironmental (ValNodePtr PNTR discrepancy_list, ValNodePtr sep_list) +{ + ValNodePtr vnp, item_list = NULL; + + for (vnp = sep_list; vnp != NULL; vnp = vnp->next) { + ValNodeLink (&item_list, RunBioSourceTest (vnp->data.ptrvalue, HasUnnecessaryEnvironmental)); + } + + if (item_list != NULL) { + ValNodeAddPointer (discrepancy_list, 0, NewClickableItem (TEST_UNNECESSARY_ENVIRONMENTAL, "%d biosources have unnecessary environmental qualifier", item_list)); + } +} + + +static void FindUnnecessaryVirusGeneCallback(BioseqPtr bsp, Pointer data) +{ + BioSourcePtr biop; + SeqMgrFeatContext context; + SeqFeatPtr sfp; + + if (bsp == NULL || data == NULL || ISA_aa(bsp->mol)) { + return; + } + + biop = GetBiopForBsp(bsp); + if (biop == NULL || biop->org == NULL || biop->org->orgname == NULL) { + return; + } + if (HasLineage (biop, "Picornaviridae") + || HasLineage (biop, "Potyviridae") + || HasLineage (biop, "Flaviviridae") + || HasLineage (biop, "Togaviridae")) { + for (sfp = SeqMgrGetNextFeature (bsp, NULL, SEQFEAT_GENE, 0, &context); + sfp != NULL; + sfp = SeqMgrGetNextFeature (bsp, sfp, SEQFEAT_GENE, 0, &context)) { + ValNodeAddPointer ((ValNodePtr PNTR) data, OBJ_SEQFEAT, sfp); + } + } +} + + +static void FindUnnecessaryVirusGene (ValNodePtr PNTR discrepancy_list, ValNodePtr sep_list) +{ + ValNodePtr vnp, item_list = NULL; + + for (vnp = sep_list; vnp != NULL; vnp = vnp->next) { + VisitBioseqsInSep (vnp->data.ptrvalue, &item_list, FindUnnecessaryVirusGeneCallback); + } + + if (item_list != NULL) { + ValNodeAddPointer (discrepancy_list, 0, NewClickableItem (TEST_UNNECESSARY_VIRUS_GENE, "%d unnecessary virus genes", item_list)); + } +} + + +typedef struct isunwanted { + Boolean has_sat_feat; + Boolean has_non_sat_feat; + Boolean has_rearranged; +} IsUnwantedData, PNTR IsUnwantedPtr; + + +static Boolean IsMicrosatelliteRepeatRegion (SeqFeatPtr sfp) +{ + GBQualPtr qual; + Boolean rval = FALSE; + + if (sfp == NULL || sfp->idx.subtype != FEATDEF_repeat_region) { + return FALSE; + } + for (qual = sfp->qual; qual != NULL && !rval; qual = qual->next) { + if (StringICmp (qual->qual, "satellite") == 0 && StringNICmp (qual->val, "microsatellite", 14) == 0) { + rval = TRUE; + } + } + return rval; +} + + +static void FindUnwantedSetWrappersCallback(BioseqPtr bsp, Pointer data) +{ + IsUnwantedPtr up; + SeqFeatPtr sfp; + SeqMgrFeatContext context; + BioSourcePtr biop; + SubSourcePtr ssp; + + if (bsp == NULL || ISA_aa(bsp->mol) || (up = (IsUnwantedPtr) data) == NULL) { + return; + } + + biop = GetBiopForBsp(bsp); + if (biop != NULL) { + for (ssp = biop->subtype; ssp != NULL && !up->has_rearranged; ssp = ssp->next) { + if (ssp->subtype == SUBSRC_rearranged) { + up->has_rearranged = TRUE; + } + } + } + + for (sfp = SeqMgrGetNextFeature (bsp, NULL, 0, 0, &context); + sfp != NULL && (!up->has_sat_feat || !up->has_non_sat_feat); + sfp = SeqMgrGetNextFeature (bsp, sfp, 0, 0, &context)) { + if (IsMicrosatelliteRepeatRegion(sfp)) { + up->has_sat_feat = TRUE; + } else { + up->has_non_sat_feat = TRUE; + } + } +} + + +static void FindUnwantedSetWrappersInSep(SeqEntryPtr sep, ValNodePtr PNTR pList) +{ + BioseqSetPtr bssp; + IsUnwantedData ud; + + if (sep == NULL || !IS_Bioseq_set(sep) || (bssp = (BioseqSetPtr) sep->data.ptrvalue) == NULL || pList == NULL) { + return; + } + + if (bssp->_class == BioseqseqSet_class_eco_set + || bssp->_class == BioseqseqSet_class_mut_set + || bssp->_class == BioseqseqSet_class_phy_set + || bssp->_class == BioseqseqSet_class_pop_set) { + MemSet (&ud, 0, sizeof (IsUnwantedData)); + VisitBioseqsInSep (sep, &ud, FindUnwantedSetWrappersCallback); + + if (ud.has_rearranged || (ud.has_sat_feat && !ud.has_non_sat_feat)) { + ValNodeAddPointer (pList, OBJ_BIOSEQSET, bssp); + } + } else { + for (sep = bssp->seq_set; sep != NULL; sep = sep->next) { + FindUnwantedSetWrappersInSep (sep, pList); + } + } +} + + +static void FindUnwantedSetWrappers (ValNodePtr PNTR discrepancy_list, ValNodePtr sep_list) +{ + ValNodePtr vnp, item_list = NULL; + + for (vnp = sep_list; vnp != NULL; vnp = vnp->next) { + FindUnwantedSetWrappersInSep (vnp->data.ptrvalue, &item_list); + } + + if (item_list != NULL) { + ValNodeAddPointer (discrepancy_list, 0, NewClickableItem (TEST_UNWANTED_SET_WRAPPER, "%d unwanted set wrappers", item_list)); + } +} + + +static Boolean IsMissingPrimerValue (BioSourcePtr biop) +{ + PCRReactionSetPtr set; + PCRPrimerPtr fwd, rev; + Boolean rval = FALSE; + + if (biop == NULL) { + return FALSE; + } + for (set = biop->pcr_primers; set != NULL && !rval; set = set->next) { + for (fwd = set->forward, rev = set->reverse; + fwd != NULL && rev != NULL && !rval; + fwd = fwd->next, rev = rev->next) { + if ((StringHasNoText(fwd->name) && !StringHasNoText(rev->name)) + || (!StringHasNoText (fwd->name) && StringHasNoText (rev->name)) + || (StringHasNoText(fwd->seq) && !StringHasNoText(rev->seq)) + || (!StringHasNoText (fwd->seq) && StringHasNoText (rev->seq))) { + rval = TRUE; + } + } + if (fwd != NULL || rev != NULL) { + rval = TRUE; + } + } + return rval; +} + + +static void FindMissingPrimerValues (ValNodePtr PNTR discrepancy_list, ValNodePtr sep_list) +{ + ValNodePtr vnp, item_list = NULL; + + for (vnp = sep_list; vnp != NULL; vnp = vnp->next) { + ValNodeLink (&item_list, RunBioSourceTest (vnp->data.ptrvalue, IsMissingPrimerValue)); + } + if (item_list != NULL) { + ValNodeAddPointer (discrepancy_list, 0, NewClickableItem (TEST_MISSING_PRIMER, "%d biosources have primer sets with missing values", item_list)); + } +} + + +static void FindUnexpectedMiscRNABioseq (BioseqPtr bsp, Pointer data) +{ + SeqFeatPtr sfp; + SeqMgrFeatContext context; + CharPtr product; + + if (bsp == NULL || data == NULL) { + return; + } + + for (sfp = SeqMgrGetNextFeature (bsp, NULL, 0, FEATDEF_otherRNA, &context); + sfp != NULL; + sfp = SeqMgrGetNextFeature (bsp, sfp, 0, FEATDEF_otherRNA, &context)) { + product = GetRNARefProductString(sfp->data.value.ptrvalue, NULL); + if (StringSearch (product, "ITS") == NULL && StringSearch (product, "internal transcribed spacer") == NULL) { + ValNodeAddPointer ((ValNodePtr PNTR) data, OBJ_SEQFEAT, sfp); + } + product = MemFree (product); + } +} + + +static void FindUnexpectedMiscRNA (ValNodePtr PNTR discrepancy_list, ValNodePtr sep_list) +{ + ValNodePtr vnp, item_list = NULL; + + for (vnp = sep_list; vnp != NULL; vnp = vnp->next) { + VisitBioseqsInSep (vnp->data.ptrvalue, &item_list, FindUnexpectedMiscRNABioseq); + } + if (item_list != NULL) { + ValNodeAddPointer (discrepancy_list, 0, NewClickableItem (TEST_UNUSUAL_MISC_RNA, "%d unexpected misc_RNA features found. misc_RNAs are unusual in a genome, consider using ncRNA, misc_binding, or misc_feature as appropriate.", item_list)); + } +} + + +static Boolean AmpPrimersNoEnvSample (BioSourcePtr biop) +{ + OrgModPtr mod; + SubSourcePtr ssp; + Boolean has_note = FALSE; + + if (biop == NULL) { + return FALSE; + } + + for (ssp = biop->subtype; ssp != NULL; ssp = ssp->next) { + if (ssp->subtype == SUBSRC_environmental_sample) { + return FALSE; + } else if (ssp->subtype == SUBSRC_other + && StringISearch (ssp->name, "amplified with species-specific primers") != NULL) { + has_note = TRUE; + } + } + + if (!has_note && biop->org != NULL && biop->org->orgname != NULL) { + for (mod = biop->org->orgname->mod; mod != NULL && !has_note; mod = mod->next) { + if (mod->subtype == SUBSRC_other + && StringISearch (mod->subname, "amplified with species-specific primers") != NULL) { + has_note = TRUE; + } + } + } + + return has_note; +} + + +static void FindAmpPrimersNoEnvSample (ValNodePtr PNTR discrepancy_list, ValNodePtr sep_list) +{ + ValNodePtr vnp, item_list = NULL; + + for (vnp = sep_list; vnp != NULL; vnp = vnp->next) { + ValNodeLink (&item_list, RunBioSourceTest (vnp->data.ptrvalue, AmpPrimersNoEnvSample)); + } + if (item_list != NULL) { + ValNodeAddPointer (discrepancy_list, 0, NewClickableItem (TEST_AMPLIFIED_PRIMERS_NO_ENVIRONMENTAL_SAMPLE, "%d biosources have 'amplified with species-specific primers' note but no environmental-sample qualifier.", item_list)); + } +} + + +static void FindDuplicateGenesOnOppositeStrandsCallback (BioseqPtr bsp, Pointer data) +{ + SeqFeatPtr sfp, sfp_prev = NULL; + SeqMgrFeatContext context; + Boolean sfp_prev_listed = FALSE; + + if (bsp == NULL || ISA_aa (bsp->mol) || data == NULL) { + return; + } + for (sfp = SeqMgrGetNextFeature (bsp, NULL, SEQFEAT_GENE, 0, &context); + sfp != NULL; + sfp = SeqMgrGetNextFeature (bsp, sfp, SEQFEAT_GENE, 0, &context)) { + if (sfp_prev != NULL) { + if (SeqLocCompare (sfp_prev->location, sfp->location) == SLC_A_EQ_B + && SeqLocStrand (sfp_prev->location) != SeqLocStrand (sfp->location)) { + if (!sfp_prev_listed) { + ValNodeAddPointer ((ValNodePtr PNTR) data, OBJ_SEQFEAT, sfp_prev); + } + ValNodeAddPointer ((ValNodePtr PNTR) data, OBJ_SEQFEAT, sfp); + sfp_prev_listed = TRUE; + } else { + sfp_prev_listed = FALSE; + } + } + sfp_prev = sfp; + } +} + + +static void FindDuplicateGenesOnOppositeStrands (ValNodePtr PNTR discrepancy_list, ValNodePtr sep_list) +{ + ValNodePtr vnp, item_list = NULL; + + for (vnp = sep_list; vnp != NULL; vnp = vnp->next) { + VisitBioseqsInSep (vnp->data.ptrvalue, &item_list, FindDuplicateGenesOnOppositeStrandsCallback); + } + if (item_list != NULL) { + ValNodeAddPointer (discrepancy_list, 0, NewClickableItem (TEST_DUP_GENES_OPPOSITE_STRANDS, "%d genes match other genes in the same location, but on the opposite strand", item_list)); + } +} + + +static void FindSmallGenomeSetCallback (BioseqSetPtr bssp, Pointer data) +{ + if (bssp != NULL && bssp->_class == BioseqseqSet_class_small_genome_set && data != NULL) { + *((BoolPtr)data) = TRUE; + } +} + + +static void ListBioSources(SeqDescrPtr sdp, Pointer data) +{ + if (sdp != NULL && sdp->choice == Seq_descr_source) { + ValNodeAddPointer ((ValNodePtr PNTR) data, OBJ_SEQDESC, sdp); + } +} + + +static void FindSmallGenomeSetProblems (ValNodePtr PNTR discrepancy_list, ValNodePtr sep_list) +{ + ValNodePtr vnp, src_list = NULL, item_list = NULL, s; + CharPtr taxname = NULL, strain = NULL, isolate = NULL; + CharPtr tmp; + Boolean has_small_genome_set; + BioSourcePtr biop; + ValNodePtr tax_qual, strain_qual, isolate_qual, segment_qual, div_qual; + ValNodePtr missing_segment = NULL; + Boolean all_taxnames_same = TRUE; + Boolean all_isolates_same = TRUE; + Boolean all_strains_same = TRUE; + + tax_qual = ValNodeNew (NULL); + tax_qual->choice = SourceQualChoice_textqual; + tax_qual->data.intvalue = Source_qual_taxname; + strain_qual = ValNodeNew (NULL); + strain_qual->choice = SourceQualChoice_textqual; + strain_qual->data.intvalue = Source_qual_strain; + isolate_qual = ValNodeNew (NULL); + isolate_qual->choice = SourceQualChoice_textqual; + isolate_qual->data.intvalue = Source_qual_isolate; + segment_qual = ValNodeNew (NULL); + segment_qual->choice = SourceQualChoice_textqual; + segment_qual->data.intvalue = Source_qual_segment; + div_qual = ValNodeNew (NULL); + div_qual->choice = SourceQualChoice_textqual; + div_qual->data.intvalue = Source_qual_division; + + for (vnp = sep_list; vnp != NULL; vnp = vnp->next) { + has_small_genome_set = FALSE; + VisitSetsInSep (vnp->data.ptrvalue, &has_small_genome_set, FindSmallGenomeSetCallback); + if (has_small_genome_set) { + VisitDescriptorsInSep (vnp->data.ptrvalue, &src_list, ListBioSources); + for (s = src_list; s != NULL; s = s->next) { + biop = GetBioSourceFromObject(s->choice, s->data.ptrvalue); + if (biop != NULL) { + /* look for segment when required */ + if (IsViralBioSource(biop)) { + tmp = GetSourceQualFromBioSource(biop, segment_qual, NULL); + if (tmp == NULL) { + ValNodeAddPointer (&missing_segment, OBJ_SEQDESC, s->data.ptrvalue); + } + tmp = MemFree (tmp); + } + /* are taxnames all the same */ + if (all_taxnames_same) { + tmp = GetSourceQualFromBioSource(biop, tax_qual, NULL); + if (tmp != NULL) { + if (s == src_list) { + taxname = tmp; + tmp = NULL; + } else if (StringCmp (taxname, tmp) != 0) { + all_taxnames_same = FALSE; + } + tmp = MemFree (tmp); + } + } + /* are isolates all the same */ + if (all_isolates_same) { + tmp = GetSourceQualFromBioSource(biop, isolate_qual, NULL); + if (tmp != NULL) { + if (s == src_list) { + isolate = tmp; + tmp = NULL; + } else if (StringCmp (isolate, tmp) != 0) { + all_isolates_same = FALSE; + } + tmp = MemFree (tmp); + } + } + /* are strains all the same */ + if (all_strains_same) { + tmp = GetSourceQualFromBioSource(biop, strain_qual, NULL); + if (tmp != NULL) { + if (s == src_list) { + strain = tmp; + tmp = NULL; + } else if (StringCmp (strain, tmp) != 0) { + all_strains_same = FALSE; + } + tmp = MemFree (tmp); + } + } + } + } + + src_list = FreeObjectList (src_list); + } + } + + taxname = MemFree (taxname); + isolate = MemFree (isolate); + strain = MemFree (strain); + + if (missing_segment != NULL) { + ValNodeAddPointer (discrepancy_list, 0, NewClickableItem (TEST_SMALL_GENOME_SET_PROBLEM, "%d biosources should have segment qualifier but do not", missing_segment)); + } + if (!all_taxnames_same) { + ValNodeAddPointer (discrepancy_list, 0, NewClickableItemNoList (TEST_SMALL_GENOME_SET_PROBLEM, "Not all biosources have same taxname")); + } + if (!all_isolates_same) { + ValNodeAddPointer (discrepancy_list, 0, NewClickableItemNoList (TEST_SMALL_GENOME_SET_PROBLEM, "Not all biosources have same isolate")); + } + if (!all_strains_same) { + ValNodeAddPointer (discrepancy_list, 0, NewClickableItemNoList (TEST_SMALL_GENOME_SET_PROBLEM, "Not all biosources have same strain")); + } + +} + + +static void FindOverlappingrRNAs (BioseqPtr bsp, Pointer userdata) +{ + SeqFeatPtr sfp, sfp_compare; + SeqMgrFeatContext context; + ValNodePtr PNTR overlapping_rrnas = NULL, non_overlap; + ValNodePtr rrna_list = NULL, vnp, vnp_next; + + if (bsp == NULL || userdata == NULL) + { + return; + } + + overlapping_rrnas = (ValNodePtr PNTR) userdata; + + for (sfp = SeqMgrGetNextFeature (bsp, NULL, 0, FEATDEF_rRNA, &context); + sfp != NULL; + sfp = SeqMgrGetNextFeature (bsp, sfp, 0, FEATDEF_rRNA, &context)) + { + ValNodeAddPointer (&rrna_list, 0, sfp); + } + + for (vnp = rrna_list; vnp != NULL && vnp->next != NULL; vnp = vnp->next) + { + sfp = (SeqFeatPtr) vnp->data.ptrvalue; + for (vnp_next = vnp->next; vnp_next != NULL; vnp_next = vnp_next->next) + { + sfp_compare = (SeqFeatPtr) vnp_next->data.ptrvalue; + + if (SeqLocCompare (sfp->location, sfp_compare->location) != SLC_NO_MATCH) + { + vnp->choice = OBJ_SEQFEAT; + vnp_next->choice = OBJ_SEQFEAT; + } + } + } + + non_overlap = ValNodeExtractList (&rrna_list, 0); + non_overlap = ValNodeFree (non_overlap); + ValNodeLink (overlapping_rrnas, rrna_list); + +} + + +extern void AddOverlappingrRNADiscrepancies (ValNodePtr PNTR discrepancy_list, ValNodePtr sep_list) +{ + CharPtr bad_fmt = "%d rRNA features overlap another rRNA feature."; + ValNodePtr overlapping_rrnas = NULL, vnp; + + if (discrepancy_list == NULL) + { + return; + } + + for (vnp = sep_list; vnp != NULL; vnp = vnp->next) { + VisitBioseqsInSep (vnp->data.ptrvalue, &overlapping_rrnas, FindOverlappingrRNAs); + } + + if (overlapping_rrnas != NULL) + { + ValNodeAddPointer (discrepancy_list, 0, NewClickableItem (TEST_OVERLAPPING_RRNAS, bad_fmt, overlapping_rrnas)); + } +} + + +static void FindMrnaSequencesWithMinusStrandFeaturesCallback (BioseqPtr bsp, Pointer data) +{ + SeqMgrFeatContext context; + SeqFeatPtr sfp; + Boolean found = FALSE; + + if (bsp == NULL || !IsMrnaSequence(bsp) || data == NULL) { + return; + } + + for (sfp = SeqMgrGetNextFeature (bsp, NULL, 0, 0, &context); + sfp != NULL && !found; + sfp = SeqMgrGetNextFeature (bsp, sfp, 0, 0, &context)) { + if (context.strand == Seq_strand_minus) { + found = TRUE; + } + } + if (found) { + ValNodeAddPointer ((ValNodePtr PNTR) data, OBJ_BIOSEQ, bsp); + } +} + + +static void FindMrnaSequencesWithMinusStrandFeatures (ValNodePtr PNTR discrepancy_list, ValNodePtr sep_list) +{ + CharPtr bad_fmt = "%d mRNA sequences have features on the complement strand."; + ValNodePtr seqs = NULL, vnp; + + if (discrepancy_list == NULL) + { + return; + } + + for (vnp = sep_list; vnp != NULL; vnp = vnp->next) { + VisitBioseqsInSep (vnp->data.ptrvalue, &seqs, FindMrnaSequencesWithMinusStrandFeaturesCallback); + } + + if (seqs != NULL) + { + ValNodeAddPointer (discrepancy_list, 0, NewClickableItem (TEST_MRNA_SEQUENCE_MINUS_STRAND_FEATURES, bad_fmt, seqs)); + } +} + + +static void FindTaxnameMissingFromDeflineCallback (BioseqPtr bsp, Pointer data) +{ + SeqMgrDescContext context; + SeqDescPtr sdp; + BioSourcePtr biop; + CharPtr cp; + Int4 len; + CharPtr lookfor; + + if (bsp == NULL || ISA_aa(bsp->mol) || data == NULL) { + return; + } + + sdp = SeqMgrGetNextDescriptor (bsp, NULL, Seq_descr_source, &context); + if (sdp == NULL || (biop = (BioSourcePtr) sdp->data.ptrvalue) == NULL + || biop->org == NULL + || StringHasNoText (biop->org->taxname)) { + return; + } + + lookfor = biop->org->taxname; + if (StringICmp (lookfor, "Human immunodeficiency virus 1") == 0) { + lookfor = "HIV-1"; + } else if (StringICmp (lookfor, "Human immunodeficiency virus 2") == 0) { + lookfor = "HIV-2"; + } + + sdp = SeqMgrGetNextDescriptor (bsp, NULL, Seq_descr_title, &context); + if (sdp != NULL) { + cp = StringISearch (sdp->data.ptrvalue, lookfor); + if (cp == NULL) { + /* taxname not in defline at all */ + ValNodeAddPointer ((ValNodePtr PNTR) data, OBJ_SEQDESC, sdp); + } else { + /* capitalization must match for all but the first letter */ + len = StringLen (lookfor); + if (StringNCmp (cp + 1, lookfor + 1, len - 1) != 0) { + ValNodeAddPointer ((ValNodePtr PNTR) data, OBJ_SEQDESC, sdp); + } + } + } +} + + +static void FindTaxnameMissingFromDefline (ValNodePtr PNTR discrepancy_list, ValNodePtr sep_list) +{ + CharPtr bad_fmt = "%d deflines do not contain the complete taxname."; + ValNodePtr seqs = NULL, vnp; + + if (discrepancy_list == NULL) + { + return; + } + + for (vnp = sep_list; vnp != NULL; vnp = vnp->next) { + VisitBioseqsInSep (vnp->data.ptrvalue, &seqs, FindTaxnameMissingFromDeflineCallback); + } + + if (seqs != NULL) + { + ValNodeAddPointer (discrepancy_list, 0, NewClickableItem (TEST_TAXNAME_NOT_IN_DEFLINE, bad_fmt, seqs)); + } +} + + +static void +RemoveUnwantedDiscrepancyItems +(ValNodePtr PNTR discrepancy_list, + DiscrepancyConfigPtr dcp) +{ + ValNodePtr vnp, prev = NULL, vnp_next; + ClickableItemPtr dip; + if (dcp == NULL || discrepancy_list == NULL || *discrepancy_list == NULL) { return; @@ -22174,8 +24800,6 @@ extern void SetDiscrepancyLevels (ValNodePtr discrepancy_list, Int4 level) } -typedef void (*AutofixCallback) (ValNodePtr item_list, Pointer userdata, LogInfoPtr lip); - typedef struct discrepancyinfo { CharPtr conf_name; @@ -22207,9 +24831,11 @@ static DiscrepancyInfoData discrepancy_info_list[] = { "Overlapping CDS", "OVERLAPPING_CDS", AddOverlappingCodingRegionDiscrepancies, MarkOverlappingCDSs }, { "Contained CDS", "CONTAINED_CDS", AddContainedCodingRegionDiscrepancies, NULL }, { "CDS RNA Overlap", "RNA_CDS_OVERLAP", AddRNACDSOverlapDiscrepancies, NULL }, - { "Short Contig", "SHORT_CONTIG", FindShortContigs, NULL }, + { "Short Contig", "SHORT_CONTIG", FindShortContigs, RemoveShortContigsWithoutAnnotation }, { "Inconsistent BioSource", "INCONSISTENT_BIOSOURCE", FindNonmatchingContigSources, NULL }, { "Suspect Product Name", "SUSPECT_PRODUCT_NAMES", FindSuspectProductNames, NULL }, + { "Suspect Product Name Typo", "DISC_PRODUCT_NAME_TYPO", FindSuspectProductNames, FixSuspectProductNameTypos }, + { "Suspect Product Name QuickFix", "DISC_PRODUCT_NAME_QUICKFIX", FindSuspectProductNames, FixSuspectProductNameQuickFixes }, { "Inconsistent Source And Definition Line", "INCONSISTENT_SOURCE_DEFLINE", FindInconsistentSourceAndDefline, NULL }, { "Partial CDSs in Complete Sequences", "PARTIAL_CDS_COMPLETE_SEQUENCE", FindParticalCDSsInCompleteSequences, NULL }, { "Hypothetical or Unknown Protein with EC Number", "EC_NUMBER_ON_UNKNOWN_PROTEIN", FindUnknownProteinsWithECNumbers, NULL }, @@ -22235,7 +24861,7 @@ static DiscrepancyInfoData discrepancy_info_list[] = { "Multiple CDS on GenProdSet, same protein", "DUP_GENPRODSET_PROTEIN", CheckListForGenProdSets, NULL}, { "mRNA on GenProdSet without transcript ID", "MISSING_GENPRODSET_TRANSCRIPT_ID", CheckListForGenProdSets, NULL}, { "mRNA on GenProdSet with duplicate ID", "DISC_DUP_GENPRODSET_TRANSCRIPT_ID", CheckListForGenProdSets, NULL}, - { "Greater than 10 percent Ns", "DISC_PERCENT_N", PercentNDiscrepanciesForSeqEntry, NULL}, + { "Greater than 5 percent Ns", "DISC_PERCENT_N", PercentNDiscrepanciesForSeqEntry, NULL}, { "Runs of 20 or more Ns", "N_RUNS", BaseCountAndNRunDiscrepancies, NULL}, { "Zero Base Counts", "ZERO_BASECOUNT", BaseCountAndNRunDiscrepancies, NULL}, { "Adjacent PseudoGenes with Identical Text", "ADJACENT_PSEUDOGENES", FindAdjacentPseudoGenes, NULL}, @@ -22307,14 +24933,39 @@ static DiscrepancyInfoData discrepancy_info_list[] = { "Mismatched Comments", "DISC_MISMATCHED_COMMENTS", FindMismatchedComments, FixMismatchedComments}, { "BioSources with the same strain should have the same taxname", "DISC_STRAIN_TAXNAME_MISMATCH", CollectStrainTaxnameDiscrepancies, NULL}, { "'Human' in host should be 'Homo sapiens'", "DISC_HUMAN_HOST", FindHumanHosts, FixHumanHosts}, - { "Genes on bacterial sequences should start with lowercase letters", "DISC_BAD_BACTERIAL_GENE_NAME", FindBadBacterialGeneNames, NULL}, + { "Genes on bacterial sequences should start with lowercase letters", "DISC_BAD_BACTERIAL_GENE_NAME", FindBadGeneNames, MoveBadGeneNames}, + { "Bad gene names", "TEST_BAD_GENE_NAME", FindBadGeneNames, MoveBadGeneNames }, { "Location is ordered (intervals interspersed with gaps)", "ONCALLER_ORDERED_LOCATION", FindOrderedLocations, FixOrderedLocations}, { "Comment descriptor present", "ONCALLER_COMMENT_PRESENT", FindCommentDescriptors, NULL }, { "Titles on sets", "ONCALLER_DEFLINE_ON_SET", FindTitlesOnSets, NULL }, { "HIV RNA location or molecule type inconsistent", "ONCALLER_HIV_RNA_INCONSISTENT", FindInconsistentHIVRNA, NULL }, { "Protein sequences should be at least 50 aa, unless they are partial", "SHORT_PROT_SEQUENCES", FindShortProtSequences, NULL }, { "mRNA sequences should not have exons", "TEST_EXON_ON_MRNA", FindExonsOnMrna, RemoveExonsOnMrna }, - { "Sequences with project IDs", "TEST_HAS_PROJECT_ID", FindProjectIdSequences, NULL } + { "Sequences with project IDs", "TEST_HAS_PROJECT_ID", FindProjectIdSequences, NULL }, + { "Feature has standard_name qualifier", "ONCALLER_HAS_STANDARD_NAME", FindStandardName, NULL }, + { "Missing structured comments", "ONCALLER_MISSING_STRUCTURED_COMMENTS", FindMissingStructuredComments, NULL }, + { "Bacteria should have strain", "DISC_REQUIRED_STRAIN", FindRequiredStrains, NULL}, + { "Bioseqs should have GenomeAssembly structured comments", "MISSING_GENOMEASSEMBLY_COMMENTS", FindMissingGenomeAssemblyStructuredComments, NULL }, + { "Bacterial taxnames should end with strain", "DISC_BACTERIAL_TAX_STRAIN_MISMATCH", FindBacterialTaxStrainMismatch, NULL }, + { "CDS has CDD Xref", "TEST_CDS_HAS_CDD_XREF", FindCDSWithCDDXref, NULL }, + { "Sequence contains unusual nucleotides", "TEST_UNUSUAL_NT", FindUnusualNT, NULL }, + { "Sequence contains regions of low quality", "TEST_LOW_QUALITY_REGION", FindLowQualityRegions, NULL }, + { "Organelle location should have genomic moltype", "TEST_ORGANELLE_NOT_GENOMIC", FindOrganelleNotGenomic, NULL }, + { "Intergenic spacer without plastid location", "TEST_UNWANTED_SPACER", FindUnwantedSpacers, NULL }, + { "Organelle products on non-organelle sequence", "TEST_ORGANELLE_PRODUCTS", FindOrganelleProducts, NULL }, + { "Organism ending in sp. needs tax consult", "TEST_SP_NOT_UNCULTURED", FindSpNotUncultured, NULL }, + { "mRNA sequence contains rearranged or germline", "TEST_BAD_MRNA_QUAL", FindBadMrnaQual, NULL }, + { "Unnecessary environmental qualifier present", "TEST_UNNECESSARY_ENVIRONMENTAL", FindUnnecessaryEnvironmental, NULL }, + { "Unnecessary gene features on virus", "TEST_UNNECESSARY_VIRUS_GENE", FindUnnecessaryVirusGene, NULL }, + { "Set wrapper on microsatellites or rearranged genes", "TEST_UNWANTED_SET_WRAPPER", FindUnwantedSetWrappers, NULL}, + { "Missing values in primer set", "TEST_MISSING_PRIMER", FindMissingPrimerValues, NULL}, + { "Unexpected misc_RNA features", "TEST_UNUSUAL_MISC_RNA", FindUnexpectedMiscRNA, NULL}, + { "Species-specific primers, no environmental sample", "TEST_AMPLIFIED_PRIMERS_NO_ENVIRONMENTAL_SAMPLE", FindAmpPrimersNoEnvSample, NULL}, + { "Duplicate genes on opposite strands", "TEST_DUP_GENES_OPPOSITE_STRANDS", FindDuplicateGenesOnOppositeStrands, NULL}, + { "Problems with small genome sets", "TEST_SMALL_GENOME_SET_PROBLEM", FindSmallGenomeSetProblems, NULL}, + { "Overlapping rRNA features", "TEST_OVERLAPPING_RRNAS", AddOverlappingrRNADiscrepancies, NULL}, + { "mRNA sequences have CDS/gene on the complement strand", "TEST_MRNA_SEQUENCE_MINUS_STRAND_FEATURES", FindMrnaSequencesWithMinusStrandFeatures, NULL}, + { "Complete taxname should be present in definition line", "TEST_TAXNAME_NOT_IN_DEFLINE", FindTaxnameMissingFromDefline, NULL} }; @@ -22384,7 +25035,20 @@ extern Boolean IsTestTypeAppropriateForReportType (Int4 test_type, EDiscrepancyR || test_type == ONCALLER_DEFLINE_ON_SET || test_type == ONCALLER_HIV_RNA_INCONSISTENT || test_type == TEST_EXON_ON_MRNA - || test_type == TEST_HAS_PROJECT_ID) { + || test_type == TEST_HAS_PROJECT_ID + || test_type == ONCALLER_HAS_STANDARD_NAME + || test_type == ONCALLER_MISSING_STRUCTURED_COMMENTS + || test_type == TEST_ORGANELLE_PRODUCTS + || test_type == TEST_SP_NOT_UNCULTURED + || test_type == TEST_BAD_MRNA_QUAL + || test_type == TEST_UNNECESSARY_ENVIRONMENTAL + || test_type == TEST_UNNECESSARY_VIRUS_GENE + || test_type == TEST_UNWANTED_SET_WRAPPER + || test_type == TEST_MISSING_PRIMER + || test_type == TEST_AMPLIFIED_PRIMERS_NO_ENVIRONMENTAL_SAMPLE + || test_type == TEST_SMALL_GENOME_SET_PROBLEM + || test_type == TEST_MRNA_SEQUENCE_MINUS_STRAND_FEATURES + || test_type == TEST_TAXNAME_NOT_IN_DEFLINE) { rval = FALSE; } else { rval = TRUE; @@ -22456,7 +25120,22 @@ extern Boolean IsTestTypeAppropriateForReportType (Int4 test_type, EDiscrepancyR || test_type == ONCALLER_DEFLINE_ON_SET || test_type == ONCALLER_HIV_RNA_INCONSISTENT || test_type == TEST_EXON_ON_MRNA - || test_type == TEST_HAS_PROJECT_ID) { + || test_type == TEST_HAS_PROJECT_ID + || test_type == ONCALLER_HAS_STANDARD_NAME + || test_type == ONCALLER_MISSING_STRUCTURED_COMMENTS + || test_type == TEST_ORGANELLE_NOT_GENOMIC + || test_type == TEST_UNWANTED_SPACER + || test_type == TEST_ORGANELLE_PRODUCTS + || test_type == TEST_SP_NOT_UNCULTURED + || test_type == TEST_BAD_MRNA_QUAL + || test_type == TEST_UNNECESSARY_ENVIRONMENTAL + || test_type == TEST_UNNECESSARY_VIRUS_GENE + || test_type == TEST_UNWANTED_SET_WRAPPER + || test_type == TEST_MISSING_PRIMER + || test_type == TEST_AMPLIFIED_PRIMERS_NO_ENVIRONMENTAL_SAMPLE + || test_type == TEST_SMALL_GENOME_SET_PROBLEM + || test_type == TEST_MRNA_SEQUENCE_MINUS_STRAND_FEATURES + || test_type == TEST_TAXNAME_NOT_IN_DEFLINE) { rval = TRUE; } break; @@ -22471,6 +25150,7 @@ extern Boolean IsTestTypeAppropriateForReportType (Int4 test_type, EDiscrepancyR extern void PrintDiscrepancyTestList (FILE *fp) { Int4 i; + CharPtr tmp; /* discrepancy report */ fprintf (fp, "Discrepancy Report Tests\n"); @@ -22496,9 +25176,35 @@ extern void PrintDiscrepancyTestList (FILE *fp) fprintf (fp, "Terms searched for by SUSPECT_PRODUCT_NAMES:\n"); for (i = 0; i < num_suspect_product_terms; i++) { - fprintf (fp, "'%s':%s\n", + fprintf (fp, "'%s':%s (Category: %s)\n", suspect_product_terms[i].pattern, - SummarizeSuspectPhraseFunc(suspect_product_terms[i].search_func)); + SummarizeSuspectPhraseFunc(suspect_product_terms[i].search_func), + suspect_name_category_names[suspect_product_terms[i].fix_type]); + } + fprintf (fp, "\n"); + + fprintf (fp, "Replacements for SUSPECT_PRODUCT_NAMES:\n"); + fprintf (fp, "Typos:\n"); + for (i = 0; i < num_suspect_product_terms; i++) { + if (suspect_product_terms[i].replace_func != NULL && suspect_product_terms[i].fix_type == eSuspectNameType_Typo) { + tmp = SummarizeSuspectReplacementPhrase (suspect_product_terms[i].replace_func, suspect_product_terms[i].replace_phrase); + fprintf (fp, "'%s':%s (%s)\n", + suspect_product_terms[i].pattern, + SummarizeSuspectPhraseFunc(suspect_product_terms[i].search_func), + tmp); + tmp = MemFree (tmp); + } + } + fprintf (fp, "QuickFixes:\n"); + for (i = 0; i < num_suspect_product_terms; i++) { + if (suspect_product_terms[i].replace_func != NULL && suspect_product_terms[i].fix_type == eSuspectNameType_QuickFix) { + tmp = SummarizeSuspectReplacementPhrase (suspect_product_terms[i].replace_func, suspect_product_terms[i].replace_phrase); + fprintf (fp, "'%s':%s (%s)\n", + suspect_product_terms[i].pattern, + SummarizeSuspectPhraseFunc(suspect_product_terms[i].search_func), + tmp); + tmp = MemFree (tmp); + } } fprintf (fp, "\n"); @@ -22627,6 +25333,7 @@ extern void ConfigureForGenomes (DiscrepancyConfigPtr dcp) dcp->conf_list[ONCALLER_SUPERFLUOUS_GENE] = FALSE; dcp->conf_list[ONCALLER_CONSORTIUM] = FALSE; dcp->conf_list[DISC_FEATURE_LIST] = FALSE; + dcp->conf_list[TEST_ORGANELLE_PRODUCTS] = FALSE; /* mitochondrial tests */ dcp->conf_list[DISC_DUP_TRNA] = FALSE; @@ -22639,6 +25346,21 @@ extern void ConfigureForGenomes (DiscrepancyConfigPtr dcp) /* on-caller specific tests */ dcp->conf_list[DISC_SRC_QUAL_PROBLEM] = FALSE; dcp->conf_list[DISC_CATEGORY_HEADER] = FALSE; + dcp->conf_list[TEST_TAXNAME_NOT_IN_DEFLINE] = FALSE; +} + + +extern void ConfigureForReportType (DiscrepancyConfigPtr dcp, EDiscrepancyReportType report_type) +{ + Int4 i; + + if (dcp == NULL) { + return; + } + + for (i = 0; i < MAX_DISC_TYPE; i++) { + dcp->conf_list[i] = IsTestTypeAppropriateForReportType (i, report_type); + } } @@ -22685,6 +25407,9 @@ extern void AutofixDiscrepancies (ValNodePtr vnp, Boolean fix_all, LogInfoPtr li if (discrepancy_info_list[cip->clickable_item_type].autofix_func != NULL) { (discrepancy_info_list[cip->clickable_item_type].autofix_func) (cip->item_list, NULL, lip); } + if (cip->autofix_func != NULL) { + (cip->autofix_func)(cip->item_list, cip->autofix_data, lip); + } } AutofixDiscrepancies (cip->subcategories, fix_all || cip->chosen, lip); } @@ -22699,7 +25424,8 @@ extern void ChooseFixableDiscrepancies (ValNodePtr vnp) while (vnp != NULL) { cip = (ClickableItemPtr) vnp->data.ptrvalue; if (cip != NULL && !cip->chosen) { - if (discrepancy_info_list[cip->clickable_item_type].autofix_func != NULL) { + if (discrepancy_info_list[cip->clickable_item_type].autofix_func != NULL + || cip->autofix_func != NULL) { cip->chosen = TRUE; } else { ChooseFixableDiscrepancies (cip->subcategories); @@ -22718,7 +25444,7 @@ static CharPtr GetLocusTagForFeature (SeqFeatPtr sfp) if (sfp == NULL) { return NULL; } - if (sfp->idx.subtype == FEATDEF_GENE) { + if (sfp->data.choice == SEQFEAT_GENE) { grp = sfp->data.value.ptrvalue; } else { grp = SeqMgrGetGeneXref (sfp); @@ -22788,10 +25514,28 @@ extern CharPtr GetBioseqSetLabel (BioseqSetPtr bssp) } +static void LIBCALLBACK CountNonATGCNTProc (CharPtr sequence, Pointer userdata) +{ + Int4Ptr p_i; + CharPtr cp; + + if (sequence == NULL || userdata == NULL) return; + p_i = (Int4Ptr) userdata; + + for (cp = sequence; *cp != 0; cp++) + { + if (*cp != 'A' && *cp != 'T' && *cp != 'G' && *cp != 'C') + { + (*p_i) ++; + } + } +} + + extern CharPtr GetDiscrepancyItemTextEx (ValNodePtr vnp, CharPtr filename) { CharPtr row_text = NULL, tmp, fmt = "%s:%s"; - SeqFeatPtr sfp, cds, sfp_index; + SeqFeatPtr sfp, cds, sfp_index = NULL; BioseqPtr bsp; SeqMgrFeatContext context; CharPtr location; @@ -22799,13 +25543,14 @@ extern CharPtr GetDiscrepancyItemTextEx (ValNodePtr vnp, CharPtr filename) SeqDescrPtr sdp; CharPtr locus_tag = ""; CharPtr bsp_fmt = "%s (length %d)\n"; + CharPtr bsp_unusual_fmt = "%s (length %d, %d other)\n"; ObjValNodePtr ovn; SeqEntryPtr sep; SeqSubmitPtr ssp; Boolean special_flag = FALSE; Uint1 data_choice; ValNodePtr extra_fields = NULL, field, field_strings = NULL, field_values, val_vnp; - Int4 field_len = 0, label_len; + Int4 field_len = 0, label_len, num_bad; if (vnp == NULL) { @@ -22865,9 +25610,9 @@ extern CharPtr GetDiscrepancyItemTextEx (ValNodePtr vnp, CharPtr filename) + StringLen (location) + StringLen (locus_tag) + 6)); - sprintf (row_text, "%s\t%s\t%s\t%s\n", label == NULL ? "unknown label" : label, - context.label == NULL ? "unknown context label" : context.label, - location == NULL ? "unknown location" : location, + sprintf (row_text, "%s\t%s\t%s\t%s\n", label, + context.label, + location, locus_tag == NULL ? "" : locus_tag); location = MemFree (location); } @@ -22879,8 +25624,15 @@ extern CharPtr GetDiscrepancyItemTextEx (ValNodePtr vnp, CharPtr filename) if (bsp != NULL) { tmp = GetBioseqLabel (vnp->data.ptrvalue); - row_text = (CharPtr) MemNew (sizeof(Char) * (StringLen (bsp_fmt) + StringLen (tmp) + 32)); - sprintf (row_text, bsp_fmt, tmp, bsp->length); + num_bad = 0; + SeqPortStream (bsp, 0, (Pointer) &num_bad, CountNonATGCNTProc); + if (num_bad > 0) { + row_text = (CharPtr) MemNew (sizeof(Char) * (StringLen (bsp_unusual_fmt) + StringLen (tmp) + 47)); + sprintf (row_text, bsp_unusual_fmt, tmp, bsp->length, num_bad); + } else { + row_text = (CharPtr) MemNew (sizeof(Char) * (StringLen (bsp_fmt) + StringLen (tmp) + 32)); + sprintf (row_text, bsp_fmt, tmp, bsp->length); + } tmp = MemFree (tmp); } } @@ -24721,9 +27473,12 @@ NLM_EXTERN void WriteGlobalDiscrepancyReport (GlobalDiscrepReportPtr g, FILE *fp /* create report for feature counts */ ValNodeLink (&local_list, CreateGlobalFeatureCountReports (&(g->feature_count_list))); + /* data collected for some tests with global components should not be displayed */ + RemoveUnwantedDiscrepancyItems (&local_list, g->test_config); + /* group discrepany reports from separate files */ CollateDiscrepancyReports (&(g->discrepancy_list)); - + fprintf (fp, "Discrepancy Report Results\n\n"); fprintf (fp, "Summary\n"); WriteDiscrepancyReportSummary (local_list, fp); @@ -24855,18 +27610,37 @@ extern ValNodePtr BarcodeTestResultsListFree (ValNodePtr res_list) { ValNodePtr vnp; - if (res_list != NULL) + while (res_list != NULL) { vnp = res_list->next; res_list->next = NULL; res_list->data.ptrvalue = BarcodeTestResultsFree (res_list->data.ptrvalue); - ValNodeFree (res_list); - BarcodeTestResultsListFree (vnp); + res_list = ValNodeFree (res_list); + res_list = vnp; } return res_list; } +extern ValNodePtr BarcodeTestResultsExtractPass (ValNodePtr PNTR res_list) +{ + ValNodePtr vnp, pass_list = NULL; + BarcodeTestResultsPtr res; + + if (res_list == NULL || *res_list == NULL) { + return NULL; + } + for (vnp = *res_list; vnp != NULL; vnp = vnp->next) { + res = (BarcodeTestResultsPtr) vnp->data.ptrvalue; + if (PassBarcodeTests(res)) { + vnp->choice = 1; + } + } + pass_list = ValNodeExtractList (res_list, 1); + return pass_list; +} + + /* determines whether barcode tests should be performed on a sequence - * no barcode keyword, no barcode tests needed. */ @@ -24894,7 +27668,7 @@ extern Boolean HasBARCODETech (BioseqPtr bsp) * Finds the MolInfo descriptor for the Bioseq and removes the BARCODE technique. * Returns true if the BARCODE technique was present before it was removed. */ -static Boolean RemoveBarcodeTechFromBioseq (BioseqPtr bsp) +NLM_EXTERN Boolean RemoveBarcodeTechFromBioseq (BioseqPtr bsp) { SeqDescrPtr sdp; SeqMgrDescContext dcontext; @@ -24916,7 +27690,7 @@ static Boolean RemoveBarcodeTechFromBioseq (BioseqPtr bsp) } -static Boolean RemoveBarcodeKeywordFromBioseq (BioseqPtr bsp) +NLM_EXTERN Boolean RemoveBarcodeKeywordFromBioseq (BioseqPtr bsp) { SeqDescrPtr sdp; SeqMgrDescContext dcontext; @@ -24925,6 +27699,7 @@ static Boolean RemoveBarcodeKeywordFromBioseq (BioseqPtr bsp) StringConstraint sc; ObjValNodePtr ovn; + MemSet (&sc, 0, sizeof (StringConstraint)); sc.case_sensitive = FALSE; sc.match_location = String_location_equals; sc.match_text = "BARCODE"; @@ -24994,114 +27769,91 @@ static void ApplyBarcodeTechToBioseq (BioseqPtr bsp) } -NLM_EXTERN void ApplyBarcodeKeywordToBioseq (BioseqPtr bsp) +NLM_EXTERN Boolean BioseqHasKeyword (BioseqPtr bsp, CharPtr keyword) { SeqDescrPtr sdp; SeqMgrDescContext dcontext; Boolean found = FALSE; GBBlockPtr gb; - SeqEntryPtr sep; + ValNodePtr vnp; + UserObjectPtr uop; - for (sdp = SeqMgrGetNextDescriptor (bsp, NULL, Seq_descr_genbank, &dcontext); - sdp != NULL && !found; - sdp = SeqMgrGetNextDescriptor (bsp, NULL, Seq_descr_genbank, &dcontext)) + if (StringICmp (keyword, "UNVERIFIED") == 0) { - gb = (GBBlockPtr) sdp->data.ptrvalue; - if (gb == NULL) + /* special case for unverified */ + for (sdp = SeqMgrGetNextDescriptor (bsp, NULL, Seq_descr_user, &dcontext); + sdp != NULL && !found; + sdp = SeqMgrGetNextDescriptor (bsp, sdp, Seq_descr_user, &dcontext)) { - gb = GBBlockNew (); - sdp->data.ptrvalue = gb; - } - SetStringsInValNodeStringList (&(gb->keywords), NULL, "BARCODE", ExistingTextOption_add_qual); - found = TRUE; - } - - if (!found) { - sep = SeqMgrGetSeqEntryForData (bsp); - sdp = CreateNewDescriptor (sep, Seq_descr_genbank); - gb = GBBlockNew (); - SetStringsInValNodeStringList (&(gb->keywords), NULL, "BARCODE", ExistingTextOption_add_qual); - sdp->data.ptrvalue = gb; - } -} - - -NLM_EXTERN Boolean BioseqHasBarcodeKeyword (BioseqPtr bsp) -{ - SeqDescrPtr sdp; - SeqMgrDescContext context; - GBBlockPtr gb; - ValNodePtr vnp; - Boolean rval = FALSE; - - for (sdp = SeqMgrGetNextDescriptor (bsp, NULL, Seq_descr_genbank, &context); - sdp != NULL && !rval; - sdp = SeqMgrGetNextDescriptor (bsp, sdp, Seq_descr_genbank, &context)) { - gb = (GBBlockPtr) sdp->data.ptrvalue; - if (gb != NULL) { - for (vnp = gb->keywords; vnp != NULL && !rval; vnp = vnp->next) { - if (StringCmp (vnp->data.ptrvalue, "BARCODE") == 0) { - rval = TRUE; - } + if ((uop = (UserObjectPtr) sdp->data.ptrvalue) != NULL + && uop->type != NULL + && StringICmp (uop->type->str, "Unverified") == 0) + { + found = TRUE; } } } - return rval; -} - - -NLM_EXTERN Boolean HasLowTrace (BioseqPtr bsp) -{ - SeqDescrPtr sdp; - SeqMgrDescContext context; - Boolean rval = TRUE; - UserObjectPtr uop; - UserFieldPtr ufp; - ObjectIdPtr oip; - int num_trace = 0; - if (bsp == NULL) { - return FALSE; - } - for (sdp = SeqMgrGetNextDescriptor (bsp, NULL, Seq_descr_user, &context); - sdp != NULL && rval; - sdp = SeqMgrGetNextDescriptor (bsp, sdp, Seq_descr_user, &context)) { - uop = (UserObjectPtr) sdp->data.ptrvalue; - if (uop != NULL && uop->type != NULL && StringICmp (uop->type->str, "Submission") == 0) { - for (ufp = uop->data; ufp != NULL && rval; ufp = ufp->next) { - oip = ufp->label; - if (oip != NULL && StringCmp (oip->str, "AdditionalComment") == 0 - && sscanf (ufp->data.ptrvalue, "Traces: %d", &num_trace) == 1 - && num_trace > 1) { - rval = FALSE; + for (sdp = SeqMgrGetNextDescriptor (bsp, NULL, Seq_descr_genbank, &dcontext); + sdp != NULL && !found; + sdp = SeqMgrGetNextDescriptor (bsp, sdp, Seq_descr_genbank, &dcontext)) + { + gb = (GBBlockPtr) sdp->data.ptrvalue; + if (gb != NULL) + { + for (vnp = gb->keywords; vnp != NULL && !found; vnp = vnp->next) + { + if (StringICmp (vnp->data.ptrvalue, keyword) == 0) + { + found = TRUE; } } } } - return rval; + return found; } -static void GetBarcodeLowTraceListCallback (BioseqPtr bsp, Pointer data) +NLM_EXTERN void ApplyBarcodeKeywordToBioseq (BioseqPtr bsp) { - if (bsp == NULL || data == NULL - || ISA_aa (bsp->mol) - || !BioseqHasBarcodeKeyword(bsp) - || !HasLowTrace(bsp)) { + SeqDescrPtr sdp; + SeqMgrDescContext dcontext; + Boolean found = FALSE; + GBBlockPtr gb; + SeqEntryPtr sep; + + if (BioseqHasKeyword (bsp, "UNVERIFIED")) + { return; - } else { - ValNodeAddPointer ((ValNodePtr PNTR) data, OBJ_BIOSEQ, bsp); } -} + for (sdp = SeqMgrGetNextDescriptor (bsp, NULL, Seq_descr_genbank, &dcontext); + sdp != NULL && !found; + sdp = SeqMgrGetNextDescriptor (bsp, sdp, Seq_descr_genbank, &dcontext)) + { + gb = (GBBlockPtr) sdp->data.ptrvalue; + if (gb == NULL) + { + gb = GBBlockNew (); + sdp->data.ptrvalue = gb; + } + SetStringsInValNodeStringList (&(gb->keywords), NULL, "BARCODE", ExistingTextOption_add_qual); + found = TRUE; + } -NLM_EXTERN ValNodePtr GetBarcodeLowTraceList (SeqEntryPtr sep) -{ - ValNodePtr list = NULL; + if (!found) { + sep = SeqMgrGetSeqEntryForData (bsp); + sdp = CreateNewDescriptor (sep, Seq_descr_genbank); + gb = GBBlockNew (); + SetStringsInValNodeStringList (&(gb->keywords), NULL, "BARCODE", ExistingTextOption_add_qual); + sdp->data.ptrvalue = gb; + } +} - VisitBioseqsInSep (sep, &list, GetBarcodeLowTraceListCallback); - return list; +NLM_EXTERN Boolean BioseqHasBarcodeKeyword (BioseqPtr bsp) +{ + return BioseqHasKeyword (bsp, "BARCODE"); } @@ -25136,25 +27888,6 @@ typedef struct barcodesearch { BarcodeTestConfigPtr cfg; } BarcodeSearchData, PNTR BarcodeSearchPtr; -static void FindShortBarcodeSequencesCallback (BioseqPtr bsp, Pointer userdata) -{ - BarcodeSearchPtr bsd; - - if (bsp == NULL || ISA_aa (bsp->mol) - || (bsd = (BarcodeSearchPtr) userdata) == NULL - || bsd->cfg == NULL - || (bsd->cfg->require_keyword && !HasBARCODETech(bsp))) - { - return; - } - - if (bsp->length < bsd->cfg->min_length) - { - ValNodeAddPointer (&(bsd->bioseq_list), OBJ_BIOSEQ, bsp); - } -} - - NLM_EXTERN Boolean IsIBOL (BioseqPtr bsp) { Boolean is_ibol = FALSE; @@ -25231,44 +27964,6 @@ static Boolean HasOrderAssignment (BioseqPtr bsp) } -static void FindMissingOrderAssignment (BioseqPtr bsp, Pointer userdata) -{ - BarcodeSearchPtr bsd; - - if (bsp == NULL || ISA_aa (bsp->mol) - || (bsd = (BarcodeSearchPtr) userdata) == NULL - || bsd->cfg == NULL - || (bsd->cfg->require_keyword && !HasBARCODETech(bsp))) - { - return; - } - - if (!HasOrderAssignment (bsp)) - { - ValNodeAddPointer (&(bsd->bioseq_list), OBJ_BIOSEQ, bsp); - } -} - - -static void FindLowTrace (BioseqPtr bsp, Pointer userdata) -{ - BarcodeSearchPtr bsd; - - if (bsp == NULL || ISA_aa (bsp->mol) - || (bsd = (BarcodeSearchPtr) userdata) == NULL - || bsd->cfg == NULL - || (bsd->cfg->require_keyword && !HasBARCODETech(bsp))) - { - return; - } - - if (HasLowTrace (bsp)) - { - ValNodeAddPointer (&(bsd->bioseq_list), OBJ_BIOSEQ, bsp); - } -} - - static Boolean HasFrameShift (BioseqPtr bsp) { SeqDescrPtr sdp; @@ -25300,48 +27995,15 @@ static Boolean HasFrameShift (BioseqPtr bsp) } -static void FindFrameShift (BioseqPtr bsp, Pointer userdata) -{ - BarcodeSearchPtr bsd; - - if (bsp == NULL || ISA_aa (bsp->mol) - || (bsd = (BarcodeSearchPtr) userdata) == NULL - || bsd->cfg == NULL - || (bsd->cfg->require_keyword && !HasBARCODETech(bsp))) - { - return; - } - - if (IsIBOL (bsp) && HasFrameShift (bsp)) - { - ValNodeAddPointer (&(bsd->bioseq_list), OBJ_BIOSEQ, bsp); - } -} - - typedef Boolean (*BarcodeBioSourceTestFunc) PROTO ((BioSourcePtr)); static Boolean HasForwardAndReversePrimers (BioSourcePtr biop) { - Boolean found_fwd_seq = FALSE; - Boolean found_rev_seq = FALSE; - SubSourcePtr ssp; + if (biop == NULL || biop->pcr_primers == NULL) return FALSE; - if (biop == NULL || biop->subtype == NULL) return FALSE; - - for (ssp = biop->subtype; ssp != NULL && (!found_fwd_seq || !found_rev_seq); ssp = ssp->next) - { - if (ssp->subtype == SUBSRC_fwd_primer_seq) - { - found_fwd_seq = TRUE; - } - else if (ssp->subtype == SUBSRC_rev_primer_seq) - { - found_rev_seq = TRUE; - } - } + if (biop->pcr_primers->forward == NULL || biop->pcr_primers->reverse == NULL) return FALSE; + return TRUE; - return found_fwd_seq && found_rev_seq; } @@ -25770,30 +28432,6 @@ static void BarcodeBioSourceTestCallback (BioseqPtr bsp, Pointer userdata, Barco } -static void FindMissingForwardAndReversePrimers (BioseqPtr bsp, Pointer userdata) -{ - BarcodeBioSourceTestCallback (bsp, userdata, HasForwardAndReversePrimers); -} - - -static void FindMissingCountryAndLatLon (BioseqPtr bsp, Pointer userdata) -{ - BarcodeBioSourceTestCallback (bsp, userdata, HasCountry); -} - - -static void FindMissingSpecimenVoucher (BioseqPtr bsp, Pointer userdata) -{ - BarcodeBioSourceTestCallback (bsp, userdata, HasVoucher); -} - - -static void FindBadCollectionDate (BioseqPtr bsp, Pointer userdata) -{ - BarcodeBioSourceTestCallback (bsp, userdata, HasCollectionDate); -} - - static void FindBadGPS (BioseqPtr bsp, Pointer userdata) { BarcodeBioSourceTestCallback (bsp, userdata, BarcodeGPSOkay); @@ -25830,114 +28468,87 @@ BarcodeTestForSeqEntry } -static void BarcodePercentNDiscrepancy (BioseqPtr bsp, Pointer userdata) -{ - FloatLo pct; - BarcodeSearchPtr bs; - - if (bsp == NULL || ISA_aa (bsp->mol) - || (bs = (BarcodeSearchPtr) userdata) == NULL - || bs->cfg == NULL - || (bs->cfg->require_keyword && !HasBARCODETech (bsp))) - { - return; - } - - bs = (BarcodeSearchPtr) userdata; - - pct = PercentNInBioseq (bsp, TRUE); - if (pct > bs->cfg->min_n_percent) - { - ValNodeAddPointer (&(bs->bioseq_list), OBJ_BIOSEQ, bsp); - } -} - - -static void BarcodePercentNDiscrepanciesForSeqEntry (SeqEntryPtr sep, ValNodePtr PNTR discrepancy_list, BarcodeTestConfigPtr cfg) +static void BarcodePercentNDiscrepanciesForSeqEntry (ValNodePtr results, ValNodePtr PNTR discrepancy_list, FloatLo min_n_percent) { - BarcodeSearchData bsd; - ValNodePtr subcategories = NULL, vnp; + BarcodeTestResultsPtr res; + ValNodePtr subcategories = NULL, bioseq_list = NULL, vnp; ClickableItemPtr cip; CharPtr fmt = "Sequence has %.1f%% percent Ns"; CharPtr top_fmt = "%d sequences have > %.1f%% Ns"; - FloatLo pct; - bsd.bioseq_list = NULL; - bsd.cfg = cfg; - if (bsd.cfg == NULL) + for (vnp = results; vnp != NULL; vnp = vnp->next) { - bsd.cfg = BarcodeTestConfigNew (); + res = (BarcodeTestResultsPtr) vnp->data.ptrvalue; + if (res->n_percent < min_n_percent) { + cip = (ClickableItemPtr) MemNew (sizeof (ClickableItemData)); + MemSet (cip, 0, sizeof (ClickableItemData)); + cip->description = (CharPtr) MemNew (sizeof (Char) * (StringLen (fmt) + 5)); + sprintf (cip->description, fmt, res->n_percent); + ValNodeAddPointer (&bioseq_list, OBJ_BIOSEQ, res->bsp); + ValNodeAddPointer (&(cip->item_list), OBJ_BIOSEQ, res->bsp); + ValNodeAddPointer (&subcategories, 0, cip); + } } - VisitBioseqsInSep (sep, &bsd, BarcodePercentNDiscrepancy); - - if (bsd.bioseq_list == NULL) return; - for (vnp = bsd.bioseq_list; vnp != NULL; vnp = vnp->next) - { + if (bioseq_list != NULL) { cip = (ClickableItemPtr) MemNew (sizeof (ClickableItemData)); MemSet (cip, 0, sizeof (ClickableItemData)); - cip->description = (CharPtr) MemNew (sizeof (Char) * (StringLen (fmt) + 5)); - pct = PercentNInBioseq (vnp->data.ptrvalue, TRUE); - sprintf (cip->description, fmt, pct); - ValNodeAddPointer (&(cip->item_list), OBJ_BIOSEQ, vnp->data.ptrvalue); - ValNodeAddPointer (&subcategories, 0, cip); - } - - cip = (ClickableItemPtr) MemNew (sizeof (ClickableItemData)); - MemSet (cip, 0, sizeof (ClickableItemData)); - cip->description = (CharPtr) MemNew (sizeof (Char) * (StringLen (top_fmt) + 10)); - sprintf (cip->description, fmt, ValNodeLen (bsd.bioseq_list), bsd.cfg->min_n_percent); - cip->item_list = bsd.bioseq_list; - cip->subcategories = subcategories; - ValNodeAddPointer (discrepancy_list, 0, cip); - - if (bsd.cfg != cfg) - { - bsd.cfg = BarcodeTestConfigFree (bsd.cfg); + cip->description = (CharPtr) MemNew (sizeof (Char) * (StringLen (top_fmt) + 10)); + sprintf (cip->description, fmt, ValNodeLen (bioseq_list), min_n_percent); + cip->item_list = bioseq_list; + cip->subcategories = subcategories; + ValNodeAddPointer (discrepancy_list, 0, cip); } } static void GetBarcodeDiscrepanciesForSeqEntry (SeqEntryPtr sep, ValNodePtr PNTR discrepancy_list, BarcodeTestConfigPtr cfg) { + ValNodePtr results, vnp; + ValNodePtr PNTR lists; + BarcodeTestResultsPtr res; + Int4 i; + CharPtr fmts[] = {"%d sequences are shorter than 500 nucleotides", + "%d sequences are missing forward and/or reverse primers", + "%d sequences are missing country", + "%d sequences are missing specimen voucher", + NULL, + "%d sequences have invalid collection date", + "%d sequences are missing order assignment", + "%d sequences have low trace", + "%d sequences have frameshift" }; + + + if (cfg == NULL) return; - if (cfg->conf_list[eBarcodeTest_Length]) - { - BarcodeTestForSeqEntry (sep, discrepancy_list, FindShortBarcodeSequencesCallback, "%d sequences are shorter than 500 nucleotides", cfg); - } - if (cfg->conf_list[eBarcodeTest_Primers]) - { - BarcodeTestForSeqEntry (sep, discrepancy_list, FindMissingForwardAndReversePrimers, "%d sequences are missing forward and/or reverse primers", cfg); - } - if (cfg->conf_list[eBarcodeTest_Country]) - { - BarcodeTestForSeqEntry (sep, discrepancy_list, FindMissingCountryAndLatLon, "%d sequences are missing country", cfg); + results = GetBarcodePassFail(sep, cfg); + + lists = (ValNodePtr PNTR) MemNew (sizeof (ValNodePtr) * eBarcodeTest_LAST); + MemSet (lists, 0, sizeof (ValNodePtr) * eBarcodeTest_LAST); + for (vnp = results; vnp != NULL; vnp = vnp->next) { + res = (BarcodeTestResultsPtr) vnp->data.ptrvalue; + for (i = 0; i < eBarcodeTest_LAST; i++) { + if (cfg->conf_list[i] && res->failed_tests[i] && fmts[i] != NULL) { + ValNodeAddPointer (&(lists[i]), OBJ_BIOSEQ, res->bsp); + } + } } - if (cfg->conf_list[eBarcodeTest_SpecimenVoucher]) - { - BarcodeTestForSeqEntry (sep, discrepancy_list, FindMissingSpecimenVoucher, "%d sequences are missing specimen voucher", cfg); + for (i = 0; i < eBarcodeTest_LAST; i++) { + if (cfg->conf_list[i] && lists[i] != NULL) { + if (fmts[i] != NULL) { + ValNodeAddPointer (discrepancy_list, 0, NewClickableItem (0, fmts[i], lists[i])); + } + } } + lists = MemFree(lists); + if (cfg->conf_list[eBarcodeTest_PercentN]) { - BarcodePercentNDiscrepanciesForSeqEntry (sep, discrepancy_list, cfg); - } - if (cfg->conf_list[eBarcodeTest_CollectionDate]) - { - BarcodeTestForSeqEntry (sep, discrepancy_list, FindBadCollectionDate, "%d sequences have invalid collection date", cfg); - } - if (cfg->conf_list[eBarcodeTest_OrderAssignment]) - { - BarcodeTestForSeqEntry (sep, discrepancy_list, FindMissingOrderAssignment, "%d sequences are missing order assignment", cfg); - } - if (cfg->conf_list[eBarcodeTest_LowTrace]) - { - BarcodeTestForSeqEntry (sep, discrepancy_list, FindLowTrace, "%d sequences have low trace", cfg); - } - if (cfg->conf_list[eBarcodeTest_FrameShift]) - { - BarcodeTestForSeqEntry (sep, discrepancy_list, FindFrameShift, "%d sequences have frameshift", cfg); + BarcodePercentNDiscrepanciesForSeqEntry (sep, discrepancy_list, cfg->min_n_percent); } + + results = BarcodeTestResultsListFree(results); } @@ -26184,7 +28795,8 @@ extern Boolean PassBarcodeTests (BarcodeTestResultsPtr res) } -NLM_EXTERN BarcodeTestResultsPtr BarcodeTestResultsForBioseq (BioseqPtr bsp, BarcodeTestConfigPtr cfg) +/* NOTE - this no longer performs the low trace test - that test needs to be done for the seq-entry as a whole */ +static BarcodeTestResultsPtr BarcodeTestResultsForBioseq (BioseqPtr bsp, BarcodeTestConfigPtr cfg) { BarcodeTestResultsPtr res = NULL; @@ -26222,10 +28834,6 @@ NLM_EXTERN BarcodeTestResultsPtr BarcodeTestResultsForBioseq (BioseqPtr bsp, Bar { res->failed_tests[eBarcodeTest_OrderAssignment] = !HasOrderAssignment (bsp); } - if (cfg->conf_list[eBarcodeTest_LowTrace]) - { - res->failed_tests[eBarcodeTest_LowTrace] = HasLowTrace (bsp); - } if (cfg->conf_list[eBarcodeTest_FrameShift]) { res->failed_tests[eBarcodeTest_FrameShift] = IsIBOL(bsp) && HasFrameShift (bsp); @@ -26241,7 +28849,7 @@ NLM_EXTERN BarcodeTestResultsPtr BarcodeTestResultsForBioseq (BioseqPtr bsp, Bar } -static void FailedBarcodeTests (BioseqPtr bsp, Pointer userdata) +static void DoBarcodeTestsExceptLowTrace (BioseqPtr bsp, Pointer userdata) { BarcodeBioseqSearchPtr sp; BarcodeTestResultsPtr res = NULL; @@ -26257,47 +28865,170 @@ static void FailedBarcodeTests (BioseqPtr bsp, Pointer userdata) res = BarcodeTestResultsForBioseq (bsp, sp->cfg); if (res == NULL) return; - if (sp->collect_positives - || !PassBarcodeTests(res)) - { - ValNodeAddPointer (&(sp->results_list), 0, res); - } - else - { - res = BarcodeTestResultsFree (res); - } + ValNodeAddPointer (&(sp->results_list), 0, res); } -extern ValNodePtr GetBarcodeFailedAccessionList (SeqEntryPtr sep, BarcodeTestConfigPtr cfg) -{ - BarcodeBioseqSearchData sd; +#ifdef OS_MSWIN +#include +#include - if (cfg == NULL) - { - sd.cfg = BarcodeTestConfigNew(); - } - else - { - sd.cfg = cfg; - } +NLM_EXTERN Int4 RunSilent(const char *cmdline) { + int status = -1; - sd.results_list = NULL; - sd.collect_positives = FALSE; + STARTUPINFO StartupInfo; + PROCESS_INFORMATION ProcessInfo; - VisitBioseqsInSep (sep, &sd, FailedBarcodeTests); + DWORD dwCreateFlags; - if (sd.cfg != cfg) - { - sd.cfg = BarcodeTestConfigFree (sd.cfg); +#ifndef COMP_METRO + /* code warrior headers do not have this, so comment out to allow compilation */ + _flushall(); +#endif + + /* Set startup info */ + memset(&StartupInfo, 0, sizeof(StartupInfo)); + StartupInfo.cb = sizeof(STARTUPINFO); + StartupInfo.dwFlags = STARTF_USESHOWWINDOW; + StartupInfo.wShowWindow = SW_HIDE; + dwCreateFlags = CREATE_NEW_CONSOLE; + + /* Run program */ + if (CreateProcess(NULL, (LPSTR)cmdline, NULL, NULL, FALSE, + dwCreateFlags, NULL, NULL, &StartupInfo, &ProcessInfo)) + { + /* wait running process */ + DWORD exitcode = -1; + WaitForSingleObject(ProcessInfo.hProcess, INFINITE); + GetExitCodeProcess(ProcessInfo.hProcess, &exitcode); + status = exitcode; + CloseHandle(ProcessInfo.hProcess); + CloseHandle(ProcessInfo.hThread); + } + else + { + DWORD dw = GetLastError(); + /* check for common errors first */ + if(dw == ERROR_FILE_NOT_FOUND) + Message(MSG_ERROR, "CreateProcess() failed: file not found."); + else + /* generic error message */ + Message(MSG_ERROR, "CreateProcess() failed, error code %d.", + (int)dw); + } + + return status; +} +#endif + +static CharPtr tracefetchcmd = NULL; + +static void FillInMissingTraces (ValNodePtr trace_check_list) +{ + Char path_in [PATH_MAX]; + Char path_out [PATH_MAX]; + FILE *fp; + Char id_txt[255]; + Char cmmd [256]; + ValNodePtr vnp; + BarcodeTestResultsPtr res; + ReadBufferData rbd; + CharPtr line, cp; + + if (tracefetchcmd == NULL) { + if (GetAppParam ("SEQUIN", "TRACECOUNT", "FETCHSCRIPT", NULL, cmmd, sizeof (cmmd))) { + tracefetchcmd = StringSaveNoNull (cmmd); + } + } + if (tracefetchcmd == NULL) return; + + TmpNam (path_in); + fp = FileOpen (path_in, "w"); + if (fp == NULL) { + Message (MSG_ERROR, "Unable to open temporary file %s, unable to get trace results", path_in); + } else { + /* make list of accessions to check */ + for (vnp = trace_check_list; vnp != NULL; vnp = vnp->next) { + res = (BarcodeTestResultsPtr) vnp->data.ptrvalue; + if (res != NULL) { + SeqIdWrite (SeqIdFindBest (res->bsp->id, SEQID_GENBANK), id_txt, PRINTID_TEXTID_ACC_ONLY, sizeof (id_txt) - 1); + fprintf (fp, "%s\n", id_txt); + } + } + FileClose (fp); + TmpNam (path_out); + /* launch script */ +#ifdef OS_UNIX + sprintf (cmmd, "%s -i %s -o %s", tracefetchcmd, path_in, path_out); + system (cmmd); +#endif +#ifdef OS_MSWIN + sprintf (cmmd, "%s -i %s -o %s", tracefetchcmd, path_in, path_out); + RunSilent (cmmd); +#endif + /* read results */ + fp = FileOpen (path_out, "r"); + if (fp == NULL) { + Message (MSG_ERROR, "Unable to open temporary file %s for results", path_out); + } else { + rbd.current_data = NULL; + rbd.fp = fp; + + line = AbstractReadFunction (&rbd); + vnp = trace_check_list; + res = (BarcodeTestResultsPtr) vnp->data.ptrvalue; + SeqIdWrite (SeqIdFindBest (res->bsp->id, SEQID_GENBANK), id_txt, PRINTID_TEXTID_ACC_ONLY, sizeof (id_txt) - 1); + + while (line != NULL && line[0] != EOF && vnp != NULL) { + if (!StringHasNoText (line)) { + cp = StringChr (line, '\t'); + if (cp != NULL) { + *cp = 0; + while (StringCmp (id_txt, line) != 0 && vnp != NULL) { + if (res->num_trace < 2) { + res->failed_tests[eBarcodeTest_LowTrace] = TRUE; + } + vnp = vnp->next; + res = (BarcodeTestResultsPtr) vnp->data.ptrvalue; + SeqIdWrite (SeqIdFindBest (res->bsp->id, SEQID_GENBANK), id_txt, PRINTID_TEXTID_ACC_ONLY, sizeof (id_txt) - 1); + } + if (vnp != NULL) { + res->num_trace++; + } + } + } + line = MemFree (line); + line = AbstractReadFunction (&rbd); + } + while (vnp != NULL) { + res = (BarcodeTestResultsPtr) vnp->data.ptrvalue; + if (res->num_trace < 2) { + res->failed_tests[eBarcodeTest_LowTrace] = TRUE; + } + vnp = vnp->next; + } + + FileClose (fp); + FileRemove (path_out); + } + FileRemove (path_in); } - return sd.results_list; } extern ValNodePtr GetBarcodePassFail (SeqEntryPtr sep, BarcodeTestConfigPtr cfg) { BarcodeBioseqSearchData sd; + ValNodePtr vnp; + BarcodeTestResultsPtr res; + SeqDescPtr sdp; + SeqMgrDescContext context; + UserObjectPtr uop; + UserFieldPtr ufp; + ObjectIdPtr oip; + Boolean has_low_trace, has_object; + int num_trace = 0; + ValNodePtr trace_check_list = NULL; if (cfg == NULL) { @@ -26309,9 +29040,51 @@ extern ValNodePtr GetBarcodePassFail (SeqEntryPtr sep, BarcodeTestConfigPtr cfg) } sd.results_list = NULL; - sd.collect_positives = TRUE; - VisitBioseqsInSep (sep, &sd, FailedBarcodeTests); + VisitBioseqsInSep (sep, &sd, DoBarcodeTestsExceptLowTrace); + + /* now do low trace test */ + /* first, loop through list - if bioseq has submission object with trace statement, + * get result from that. otherwise add to list. */ + for (vnp = sd.results_list; vnp != NULL; vnp = vnp->next) { + res = (BarcodeTestResultsPtr) vnp->data.ptrvalue; + if (res != NULL) { + /* look for user object */ + has_low_trace = FALSE; + has_object = FALSE; + for (sdp = SeqMgrGetNextDescriptor (res->bsp, NULL, Seq_descr_user, &context); + sdp != NULL && !has_low_trace; + sdp = SeqMgrGetNextDescriptor (res->bsp, sdp, Seq_descr_user, &context)) { + uop = (UserObjectPtr) sdp->data.ptrvalue; + if (uop != NULL && uop->type != NULL && StringICmp (uop->type->str, "Submission") == 0) { + for (ufp = uop->data; ufp != NULL && !has_low_trace; ufp = ufp->next) { + oip = ufp->label; + if (oip != NULL && StringCmp (oip->str, "AdditionalComment") == 0) { + if ( sscanf (ufp->data.ptrvalue, "Traces: %d", &num_trace) == 1) { + res->num_trace = num_trace; + if (num_trace < 2) { + has_low_trace = TRUE; + } + has_object = TRUE; + } + } + } + } + } + if (has_low_trace) { + res->failed_tests[eBarcodeTest_LowTrace] = TRUE; + } else if (!has_object) { + ValNodeAddPointer (&trace_check_list, 0, res); + } + } + } + + /* then put IDs in list, use script to collect from trace, add to results. */ + if (trace_check_list != NULL) { + FillInMissingTraces (trace_check_list); + /* NOTE - do NOT free barcode result data, since this list points to data in sd.results list */ + trace_check_list = ValNodeFree (trace_check_list); + } if (sd.cfg != cfg) { @@ -26339,9 +29112,6 @@ extern void WriteBarcodeTestComplianceEx (FILE *fp, ValNodePtr results_list, Boo barcode_id = BarcodeTestBarcodeIdString (res->bsp); genbank_id = BarcodeTestGenbankIdString (res->bsp); pass = PassBarcodeTests (res); - if (pass && low_trace_fail && HasLowTrace (res->bsp)) { - pass = FALSE; - } fprintf (fp, "%s\t%s\t%s\n", barcode_id, genbank_id, pass ? "PASS" : "FAIL"); barcode_id = MemFree (barcode_id); genbank_id = MemFree (genbank_id); @@ -27318,6 +30088,8 @@ static ReplacePairData latlon_replace_list[] = { { "DEG.", " " }, { "DEG", " " }, { "MIN.", "'" }, + { "MINUTES", "'" }, + { "MINUTE", "'" }, { "MIN", "'" }, { "SEC.", "''" }, { "SEC", "''" }, @@ -27327,6 +30099,7 @@ static ReplacePairData latlon_replace_list[] = { { "WEST", "W" }, }; + static Int4 num_latlon_replace = sizeof (latlon_replace_list) / sizeof (ReplacePairData); @@ -27461,7 +30234,7 @@ extern CharPtr FixLatLonFormat (CharPtr orig_lat_lon) bad_letter_found = TRUE; } } - else if (i >= 13) + else if (i >= 15) { if (dtoken1 == NULL) { @@ -27630,1197 +30403,326 @@ extern CharPtr FixLatLonFormat (CharPtr orig_lat_lon) /* allow a dash to separate the two tokens if no spaces and only one dash */ else if (first_space == NULL && second_space == NULL && first_dash != NULL && second_dash == NULL) { - ltoken1 = cpy; - *first_dash = ' '; - ltoken2 = first_dash + 1; - } - else if (dtoken1 != NULL && dtoken2 == NULL && dtoken1 > cpy && dtoken1 < cpy + StringLen (cpy) - 1) - { - word1 = MakeToken (cpy, dtoken1 + 1); - if (ParseFromDToken (word1, &lat, &ns, &prec1)) - { - /* first portion parses ok, assume user just left off direction for second token */ - /* letters end tokens */ - dtoken2 = dtoken1 + 1; - dtoken1 = cpy; - } - else - { - bad_letter_found = 1; - } - word1 = MemFree (word1); - } - else - { - bad_letter_found = 1; - } - } - if (first_space == NULL && first_dash != NULL && second_dash == NULL && !comma_sep) - { - /* don't let the dash dividing the tokens be used as minus sign */ - *first_dash = ' '; - } - - if (bad_letter_found) - { - } - else if (ltoken1 != NULL) - { - /* if latitude and longitude are at end of token, change start */ - if (ltoken1 != cpy) - { - ltoken2 = ltoken1 + 3; - if (*ltoken2 == 'G') - { - ltoken2++; - } - ltoken1 = cpy; - } - word1 = MakeToken(ltoken1, ltoken2); - word2 = MakeToken(ltoken2, NULL); - if (ParseFromLToken (word1, TRUE, &lat, &ns, &prec1) - && ParseFromLToken (word2, FALSE, &lon, &ew, &prec2)) - { - rval = MakeLatLonFromParts (lat, ns, prec1, lon, ew, prec2); - } - } - else - { - if (dtoken1 != cpy) - { - /* letters end tokens */ - dtoken2 = dtoken1 + 1; - dtoken1 = cpy; - } - word1 = MakeToken (dtoken1, dtoken2); - word2 = MakeToken (dtoken2, NULL); - if (ParseFromDToken (word1, &lat, &ns, &prec1) - && ParseFromDToken (word2, &lon, &ew, &prec2)) - { - rval = MakeLatLonFromParts (lat, ns, prec1, lon, ew, prec2); - } - } - - word1 = MemFree (word1); - word2 = MemFree (word2); - cpy = MemFree (cpy); - - if (rval != NULL && extra_text != NULL) - { - cpy = (CharPtr) MemNew (sizeof (Char) * (StringLen (rval) + StringLen (extra_text) + 1)); - sprintf (cpy, "%s%s", rval, extra_text); - rval = MemFree (rval); - rval = cpy; - } - return rval; -} - - -static void TestLatLonFormatting (FILE *fp) -{ - CharPtr tests[] = - { "100.12 N 200.12 E", /* already correct */ - "100 N 200 E", /* correctable */ - "100.1 N 200.2 E", /* correctable */ - "1OO.1 N 200.2 E", /* correctable (replace capital o with zero) */ - "100.1 N, 200.2 E", /* correctable (remove comma) */ - "E 100, S 120", /* correctable (remove comma, reverse order, letters before numbers */ - "latitude: 200 N longitude: 100 E", - "latitude: 200 E longitude: 100 N", /* NOT correctable */ - "N 37 45.403', 119 1.456' W", - "38 52 56 N 84 44 53 W", - "49 29 50 N 80 25 52 W", - "39N 93W", - "42:43:13N 01:0015W", - "02deg 33min 00.7sec S 45deg 01min 38.8sec W", - "42:24:37.9 N 85:22:11.7 W", - "10 N 124 E", - "41deg30'' S 145deg37' E", - "59.30deg N 22.40deg E", - "35 N 134 E", - "2 S 114 E", - "24deg 24.377' N 101deg 23.073' W'", - "26deg 57.9' N 102deg 08.3 W'", - "38 11 44.66 North 0 35 01.93 West", - "62.08 N 129.682", - "64.444 N -164.973", - "62.033 N -146.533", - "67 N -51", - "69.107 N 124.195", - "2:46:00-59:41:00", - "64 degree 55 N 25 degree 05 E", - "64.907 N -166.18", - "2:46:00-59:41:00", - "66 degree 21 N 29 degree 21 E", - "37deg27N 121deg52'W", - "01deg31'25''N 66''33'31''W", - "07deg33'30''N 69deg20'W", - "10.8439,-85.6138", - "11.03,-85.527", - "8 deg 45 min S, 63 deg 26 min W", - "29deg 49' 23.7' N; 106deg 23' 15.8'W", - "7:46S, 12:30E", - "35deg48'50'' N; 82deg5658'' W", - "45deg34.18''N, 122deg12.00 'W", - "37deg27N, 121deg52'W", - "41:00;00N 20:45:00E", - "02 deg 28' 29# S, 56 deg 6' 31# W" -}; - Int4 test_num, num_tests = sizeof (tests) / sizeof (char *); - CharPtr fix; - Int4 num_pass = 0, num_formatted = 0; - Boolean format_ok, lat_in_range, lon_in_range; - - if (fp == NULL) return; - - for (test_num = 0; test_num < num_tests; test_num++) - { - fprintf (fp, "Test %d: %s\n", test_num, tests[test_num]); - fix = FixLatLonFormat (tests[test_num]); - if (fix == NULL) - { - fprintf (fp, "Unable to correct format\n"); - } - else - { - IsCorrectLatLonFormat (fix, &format_ok, &lat_in_range, &lon_in_range); - if (format_ok) - { - num_formatted ++; - fprintf (fp, "Correction succeeded:%s\n", fix); - num_pass++; - } - else - { - num_formatted ++; - fprintf (fp, "Correction failed:%s\n", fix); - } - } - } - fprintf (fp, "Formats %d out of %d, %d succeed\n", num_formatted, num_tests, num_pass); -} - - -static CharPtr StringFromObjectID (ObjectIdPtr oip) -{ - CharPtr str; - if (oip == NULL) return NULL; - - if (oip->id > 0) - { - str = (CharPtr) MemNew (sizeof (Char) * 20); - sprintf (str, "%d", oip->id); - } - else - { - str = StringSave (oip->str); - } - return str; -} - -extern void ApplyBarcodeDbxrefToBioSource (BioSourcePtr biop, ObjectIdPtr oip) -{ - ValNodePtr vnp; - DbtagPtr dbt; - CharPtr str, cmp; - Boolean found = FALSE; - - if (biop == NULL || oip == NULL) return; - - if (biop->org == NULL) - { - biop->org = OrgRefNew(); - } - - str = StringFromObjectID (oip); - - for (vnp = biop->org->db; vnp != NULL && !found; vnp = vnp->next) - { - dbt = (DbtagPtr) vnp->data.ptrvalue; - if (dbt == NULL || dbt->tag == NULL) continue; - if (StringCmp (dbt->db, "BOLD") != 0) continue; - cmp = StringFromObjectID (dbt->tag); - if (StringCmp (str, cmp) == 0) found = TRUE; - cmp = MemFree (cmp); - } - if (found) - { - str = MemFree (str); - } - else - { - dbt = DbtagNew (); - dbt->db = StringSave ("BOLD"); - dbt->tag = ObjectIdNew(); - dbt->tag->str = str; - ValNodeAddPointer (&(biop->org->db), 0, dbt); - } -} - - -extern void ApplyBarcodeDbxrefsToBioseq (BioseqPtr bsp, Pointer data) -{ - SeqDescrPtr sdp; - SeqMgrDescContext context; - SeqIdPtr sip; - DbtagPtr dbt; - - if (bsp == NULL) return; - for (sip = bsp->id; sip != NULL; sip = sip->next) - { - if (IsBarcodeID (sip) && sip->choice == SEQID_GENERAL && sip->data.ptrvalue != NULL) - { - dbt = (DbtagPtr) sip->data.ptrvalue; - - sdp = SeqMgrGetNextDescriptor (bsp, NULL, Seq_descr_source, &context); - if (sdp != NULL) - { - ApplyBarcodeDbxrefToBioSource ((BioSourcePtr) sdp->data.ptrvalue, dbt->tag); - } + ltoken1 = cpy; + *first_dash = ' '; + ltoken2 = first_dash + 1; } - } -} - - -/* Code for Country Fixup */ -typedef struct countrystatelist { - CharPtr PNTR state_list; - CharPtr country_name; -} CountryStateListData, PNTR CountryStateListPtr; - -static Boolean IsMatchInSecondChoiceLists (CharPtr find_str, Int4 match_len, CountryStateListPtr second_choice_lists, CharPtr whole_string) -{ - Int4 i, j; - Boolean in_lists = FALSE; - CharPtr cp; - Int4 len_second_choice; - - if (StringHasNoText (find_str) || match_len < 1 || second_choice_lists == NULL) return FALSE; - - for (i = 0; second_choice_lists[i].state_list != NULL && !in_lists; i++) - { - for (j = 0; second_choice_lists[i].state_list[j] != NULL && !in_lists; j++) + else if (dtoken1 != NULL && dtoken2 == NULL && dtoken1 > cpy && dtoken1 < cpy + StringLen (cpy) - 1) { - len_second_choice = StringLen (second_choice_lists[i].state_list[j]); - if (len_second_choice == match_len - &&StringNCmp (find_str, second_choice_lists[i].state_list[j], match_len) == 0) + word1 = MakeToken (cpy, dtoken1 + 1); + if (ParseFromDToken (word1, &lat, &ns, &prec1)) { - in_lists = TRUE; + /* first portion parses ok, assume user just left off direction for second token */ + /* letters end tokens */ + dtoken2 = dtoken1 + 1; + dtoken1 = cpy; } - else if ((cp = StringSearch (second_choice_lists[i].state_list[j], find_str)) != NULL - && StringSearch (whole_string, second_choice_lists[i].state_list[j]) != NULL) + else { - in_lists = TRUE; - } - } - } - return in_lists; -} - - -static Boolean IsBodyOfWater (CharPtr str) -{ - if (StringHasNoText (str)) return FALSE; - if (StringSearch (str, "Ocean") != NULL) return TRUE; - if (StringSearch (str, "Gulf") != NULL) return TRUE; - if (StringSearch (str, "Sea") != NULL) return TRUE; - return FALSE; -} - - -static Boolean IsSubstringOfStringInList (CharPtr whole_str, CharPtr match_p, CharPtr match_str, CharPtr PNTR list) -{ - CharPtr cp; - Int4 context_len, find_len; - Boolean rval = FALSE; - - if (list == NULL || StringHasNoText (whole_str) || match_p == NULL || match_p < whole_str) { - return FALSE; - } - find_len = StringLen (match_str); - while (*list != NULL && !rval) { - context_len = StringLen (*list); - if (find_len < context_len) { - cp = StringSearch (whole_str, *list); - while (cp != NULL && !rval) { - if (match_p < cp) { - cp = NULL; - } else if (cp + context_len > match_p) { - rval = TRUE; - } else { - cp = StringSearch (cp + 1, *list); - } + bad_letter_found = 1; } + word1 = MemFree (word1); } - list++; - } - return rval; -} - - -static CharPtr bad_context_names[] = { - "Gibraltar Range National Park", - "Western Australia", - "WSW Chihuahua", - "Mississippi River", - NULL }; - -static Boolean IsBadContextName (CharPtr whole_str, CharPtr match_p, CharPtr match_str) -{ - Boolean rval; - Int4 len; - - rval = IsSubstringOfStringInList (whole_str, match_p, match_str, bad_context_names); - if (!rval) { - len = StringLen (match_str); - if (StringCmp (match_p + len, " River") == 0) { - rval = TRUE; - } else if (StringCmp (match_p + len, " State University") == 0) { - rval = TRUE; + else + { + bad_letter_found = 1; } } - return rval; -} - - -static Boolean IsPartOfStateName (CharPtr whole_str, CharPtr match_p, CharPtr match_str, CountryStateListPtr second_choice_lists) -{ - Boolean rval = FALSE; - Int4 i; - - if (second_choice_lists == NULL) return FALSE; - - for (i = 0; second_choice_lists[i].state_list != NULL && !rval; i++) { - rval = IsSubstringOfStringInList (whole_str, match_p, match_str, second_choice_lists[i].state_list); - } - return rval; -} - - -static CharPtr -FindStringInStringWithContext -(CharPtr search_str, CharPtr look_for, CharPtr PNTR list, CountryStateListPtr second_choice_lists) -{ - Int4 len_match; - CharPtr cp; - - if (StringHasNoText (search_str) || StringHasNoText (look_for)) { - return NULL; - } - - cp = StringISearch (search_str, look_for); - len_match = StringLen (look_for); - while (cp != NULL) { - /* if character after match is alpha, continue */ - if (isalpha ((Int4)(cp [len_match])) - /* if character before match is alpha, continue */ - || (cp > search_str && isalpha ((Int4)(*(cp - 1)))) - /* if match is part of a known "bad context", continue */ - || IsBadContextName (search_str, cp, look_for) - /* if is shorter match for other item, continue */ - || IsSubstringOfStringInList (search_str, cp, look_for, list) - || IsPartOfStateName (search_str, cp, look_for, second_choice_lists)) { - cp = StringSearch (cp + len_match, look_for); - } else { - return cp; - } + if (first_space == NULL && first_dash != NULL && second_dash == NULL && !comma_sep) + { + /* don't let the dash dividing the tokens be used as minus sign */ + *first_dash = ' '; } - return cp; -} - -static ValNodePtr FindBestStringMatch (CharPtr PNTR list, CharPtr find_str, CountryStateListPtr second_choice_lists) -{ - CharPtr PNTR ptr; - Int4 len_find; - CharPtr cp; - Boolean ocean_best, ocean_this; - Boolean best_in_second, this_in_second; - ValNodePtr match_list = NULL, vnp, best_vnp; - - if (list == NULL || find_str == NULL) return NULL; - - len_find = StringLen (find_str); - - /* first, find all matches */ - for (ptr = list; ptr != NULL && *ptr != NULL; ptr++) + if (bad_letter_found) { - cp = FindStringInStringWithContext (find_str, *ptr, list, second_choice_lists); - if (cp != NULL) { - ValNodeAddPointer (&match_list, 1, *ptr); - } - } - - if (match_list == NULL) return NULL; - - /* now eliminate matches where we know we have a preference */ - best_vnp = match_list; - for (vnp = match_list->next; vnp != NULL; vnp = vnp->next) + } + else if (ltoken1 != NULL) { - if (StringSearch (vnp->data.ptrvalue, best_vnp->data.ptrvalue) != NULL) + /* if latitude and longitude are at end of token, change start */ + if (ltoken1 != cpy) { - /* best is inside this one */ - best_vnp->choice = 0; - best_vnp = vnp; - } else if (StringSearch (best_vnp->data.ptrvalue, vnp->data.ptrvalue) != NULL) { - /* this is inside best */ - vnp->choice = 0; - } else { - /* prefer non-ocean to ocean */ - ocean_best = IsBodyOfWater (best_vnp->data.ptrvalue); - ocean_this = IsBodyOfWater (vnp->data.ptrvalue); - if (ocean_this && !ocean_best) - { - /* disregard this one */ - vnp->choice = 0; - continue; - } - else if (!ocean_this && ocean_best) - { - /* definitely take this to replace best */ - best_vnp->choice = 0; - best_vnp = vnp; - } - else if (second_choice_lists != NULL) + ltoken2 = ltoken1 + 3; + if (*ltoken2 == 'G') { - best_in_second = IsMatchInSecondChoiceLists (best_vnp->data.ptrvalue, StringLen (best_vnp->data.ptrvalue), second_choice_lists, find_str); - this_in_second = IsMatchInSecondChoiceLists (vnp->data.ptrvalue, StringLen (vnp->data.ptrvalue), second_choice_lists, find_str); - /* if this choice is a second choice, but the previous best wasn't, don't bother with this */ - if (this_in_second && !best_in_second) { - vnp->choice = 0; - } else if (!this_in_second && best_in_second) { - /* if previous choice was in the secondary lists, prefer this and ignore previous */ - best_vnp->choice = 0; - best_vnp = vnp; - } + ltoken2++; } + ltoken1 = cpy; + } + word1 = MakeToken(ltoken1, ltoken2); + word2 = MakeToken(ltoken2, NULL); + if (ParseFromLToken (word1, TRUE, &lat, &ns, &prec1) + && ParseFromLToken (word2, FALSE, &lon, &ew, &prec2)) + { + rval = MakeLatLonFromParts (lat, ns, prec1, lon, ew, prec2); } } - vnp = ValNodeExtract (&match_list, 0); - vnp = ValNodeFree (vnp); - return match_list; -} - -static CharPtr usa_state_list[] = -{ - "Alabama", - "Alaska", - "Arizona", - "Arkansas", - "California", - "Colorado", - "Connecticut", - "Delaware", - "Florida", - "Georgia", - "Hawaii", - "Idaho", - "Illinois", - "Indiana", - "Iowa", - "Kansas", - "Kentucky", - "Louisiana", - "Maine", - "Maryland", - "Massachusetts", - "Michigan", - "Minnesota", - "Mississippi", - "Missouri", - "Montana", - "Nebraska", - "Nevada", - "New Hampshire", - "New Jersey", - "New Mexico", - "New York", - "North Carolina", - "North Dakota", - "Ohio", - "Oklahoma", - "Oregon", - "Pennsylvania", - "Rhode Island", - "South Carolina", - "South Dakota", - "Tennessee", - "Texas", - "Utah", - "Vermont", - "Virginia", - "Washington", - "Washington, DC", - "West Virginia", - "Wisconsin", - "Wyoming", - NULL -}; - -static CharPtr uk_state_list[] = { - "England", - "Scotland", - NULL -}; - -static CharPtr canada_province_list[] = { - "Alberta", - "British Columbia", - "Manitoba", - "New Brunswick", - "Newfoundland and Labrador", - "Northwest Territories", - "Nova Scotia", - "Nunavut", - "Ontario", - "Prince Edward Island", - "Quebec", - "Saskatchewan", - "Yukon", - NULL -}; - -static CharPtr australia_state_list[] = { - "Australian Capital Territory", - "Jervis Bay Territory", - "New South Wales", - "Northern Territory", - "Queensland", - "South Australia", - "Tasmania", - "Victoria", - "Western Australia", - NULL -}; - -static CharPtr mx_state_list[] = -{ - "Aguascalientes", - "Baja California", - "Baja California Sur", - "Campeche", - "Chiapas", - "Chihuahua", - "Coahuila", - "Colima", - "Distrito Federal", - "Durango", - "Estado de Mexico", - "Guanajuato", - "Guerrero", - "Hidalgo", - "Jalisco", - "Michoacan", - "Morelos", - "Nayarit", - "Nuevo Leon", - "Oaxaca", - "Puebla", - "Queretaro", - "Quintana Roo", - "San Luis Potosi", - "Sinaloa", - "Sonora", - "Tabasco", - "Tamaulipas", - "Tlaxcala", - "Veracruz", - "Yucatan", - "Zacatecas", - NULL -}; - -static CharPtr portugal_state_list[] = { - "Azores", - NULL -}; - -static CharPtr ecuador_state_list[] = { - "Galapagos", - NULL -}; - - -static CountryStateListData country_state_list[] = { -{ usa_state_list, "USA" }, -{ uk_state_list, "United Kingdom"}, -{ canada_province_list, "Canada"}, -{ australia_state_list, "Australia"}, -{ mx_state_list, "Mexico"}, -{ portugal_state_list, "Portugal"}, -{ ecuador_state_list, "Ecuador"}, -{ NULL, NULL} -}; - - -static CharPtr FindStateMatch (CharPtr search, CharPtr PNTR country, Int4Ptr state_len, BoolPtr pMulti) -{ - CharPtr best_match = NULL; - Int4 i; - ValNodePtr state_matches; - - if (StringHasNoText (search)) return NULL; - if (country != NULL) { - *country = NULL; - } - - for (i = 0; country_state_list[i].state_list != NULL; i++) { - state_matches = FindBestStringMatch (country_state_list[i].state_list, search, NULL); - if (state_matches != NULL) { - if (state_matches->next == NULL && best_match == NULL) { - best_match = state_matches->data.ptrvalue; - if (country != NULL) { - *country = country_state_list[i].country_name; - } - } else { - *pMulti = TRUE; - return NULL; - } - state_matches = ValNodeFree (state_matches); + else + { + if (dtoken1 != cpy) + { + /* letters end tokens */ + dtoken2 = dtoken1 + 1; + dtoken1 = cpy; + } + word1 = MakeToken (dtoken1, dtoken2); + word2 = MakeToken (dtoken2, NULL); + if (ParseFromDToken (word1, &lat, &ns, &prec1) + && ParseFromDToken (word2, &lon, &ew, &prec2)) + { + rval = MakeLatLonFromParts (lat, ns, prec1, lon, ew, prec2); } } - if (best_match != NULL && state_len != NULL) { - *state_len = StringLen (best_match); + + word1 = MemFree (word1); + word2 = MemFree (word2); + cpy = MemFree (cpy); + + if (rval != NULL && extra_text != NULL) + { + cpy = (CharPtr) MemNew (sizeof (Char) * (StringLen (rval) + StringLen (extra_text) + 1)); + sprintf (cpy, "%s%s", rval, extra_text); + rval = MemFree (rval); + rval = cpy; } - return best_match; + return rval; } -static CharPtr FindStateMatchForCountry (CharPtr search, CharPtr country, BoolPtr pMulti) +static void TestLatLonFormatting (FILE *fp) { - ValNodePtr state_matches; - CharPtr state_match = NULL; - Int4 i; - - if (StringHasNoText (search) || StringHasNoText (country)) return NULL; - - for (i = 0; country_state_list[i].state_list != NULL; i++) { - if (StringCmp (country, country_state_list[i].country_name) == 0) { - state_matches = FindBestStringMatch (country_state_list[i].state_list, search, NULL); - if (state_matches != NULL) { - if (state_matches->next == NULL) { - state_match = state_matches->data.ptrvalue; - } else { - *pMulti = TRUE; - } - state_matches = ValNodeFree (state_matches); - return state_match; - } - } - } - return NULL; -} + CharPtr tests[] = + { "100.12 N 200.12 E", /* already correct */ + "100 N 200 E", /* correctable */ + "100.1 N 200.2 E", /* correctable */ + "1OO.1 N 200.2 E", /* correctable (replace capital o with zero) */ + "100.1 N, 200.2 E", /* correctable (remove comma) */ + "E 100, S 120", /* correctable (remove comma, reverse order, letters before numbers */ + "latitude: 200 N longitude: 100 E", + "latitude: 200 E longitude: 100 N", /* NOT correctable */ + "N 37 45.403', 119 1.456' W", + "38 52 56 N 84 44 53 W", + "49 29 50 N 80 25 52 W", + "39N 93W", + "42:43:13N 01:0015W", + "02deg 33min 00.7sec S 45deg 01min 38.8sec W", + "42:24:37.9 N 85:22:11.7 W", + "10 N 124 E", + "41deg30'' S 145deg37' E", + "59.30deg N 22.40deg E", + "35 N 134 E", + "2 S 114 E", + "24deg 24.377' N 101deg 23.073' W'", + "26deg 57.9' N 102deg 08.3 W'", + "38 11 44.66 North 0 35 01.93 West", + "62.08 N 129.682", + "64.444 N -164.973", + "62.033 N -146.533", + "67 N -51", + "69.107 N 124.195", + "2:46:00-59:41:00", + "64 degree 55 N 25 degree 05 E", + "64.907 N -166.18", + "2:46:00-59:41:00", + "66 degree 21 N 29 degree 21 E", + "37deg27N 121deg52'W", + "01deg31'25''N 66''33'31''W", + "07deg33'30''N 69deg20'W", + "10.8439,-85.6138", + "11.03,-85.527", + "8 deg 45 min S, 63 deg 26 min W", + "29deg 49' 23.7' N; 106deg 23' 15.8'W", + "7:46S, 12:30E", + "35deg48'50'' N; 82deg5658'' W", + "45deg34.18''N, 122deg12.00 'W", + "37deg27N, 121deg52'W", + "41:00;00N 20:45:00E", + "02 deg 28' 29# S, 56 deg 6' 31# W" +}; + Int4 test_num, num_tests = sizeof (tests) / sizeof (char *); + CharPtr fix; + Int4 num_pass = 0, num_formatted = 0; + Boolean format_ok, lat_in_range, lon_in_range; + if (fp == NULL) return; -static void FixCountryStringForStateName (CharPtr PNTR pCountry, CharPtr state_name, CharPtr country_name) -{ - CharPtr cp; - Int4 len_state, len_country, len_qual, len_name, len_after; - CharPtr before, newname; - - if (pCountry == NULL - || StringHasNoText (*pCountry) - || StringHasNoText (state_name) - || StringHasNoText (country_name)) - { - return; - } - - cp = StringStr (*pCountry, state_name); - if (cp == NULL) - { - return; - } - len_state = StringLen (state_name); - if (isalpha ((Int4)(cp [len_state]))) - { - return; - } - - len_country = StringLen (country_name); - - len_qual = StringLen (*pCountry); - if (cp == *pCountry) - { - len_after = len_qual - len_state; - newname = (CharPtr) MemNew ((5 + len_country + len_state + len_after) * sizeof (Char)); - sprintf (newname, "%s: %s", country_name, state_name); - if (len_after > 0) - { - StringCat (newname, ", "); - StringCat (newname, *pCountry + len_state); - } - *pCountry = MemFree (*pCountry); - *pCountry = newname; - } - else + for (test_num = 0; test_num < num_tests; test_num++) { - newname = (CharPtr) MemNew (len_qual + 5 + len_country); - *(cp - 1) = 0; - before = StringSave (*pCountry); - sprintf (newname, "%s: %s, ", country_name, state_name); - StringNCpy (newname + 4 + len_country + len_state, before, StringLen (before)); - StringCpy (newname + 4 + len_country + len_state + StringLen (before), cp + len_state); - len_name = StringLen (newname); - while (isspace ((Int4)(newname[len_name - 1])) || ispunct ((Int4)(newname [len_name - 1]))) + fprintf (fp, "Test %d: %s\n", test_num, tests[test_num]); + fix = FixLatLonFormat (tests[test_num]); + if (fix == NULL) { - newname [len_name - 1] = 0; - len_name --; + fprintf (fp, "Unable to correct format\n"); } - /* get rid of trailing comma if necessary */ - if (len_name == StringLen (country_name) + 3 + StringLen (state_name) - && newname [len_name - 1] == ',') + else { - newname [len_name - 1] = 0; - len_name --; - } - before = MemFree (before); - MemFree (*pCountry); - *pCountry = newname; - } -} - - -static CharPtr FindCountryMatch (CharPtr search_str, CharPtr PNTR country_list, BoolPtr isMulti) -{ - ValNodePtr match_list; - CharPtr best_match = NULL; - - if (StringSearch (search_str, "Yugoslavia")) { - *isMulti = TRUE; - return NULL; - } - - match_list = FindBestStringMatch (country_list, search_str, country_state_list); - if (match_list != NULL) { - if (match_list->next == NULL) { - best_match = match_list->data.ptrvalue; - } else { - *isMulti = TRUE; + IsCorrectLatLonFormat (fix, &format_ok, &lat_in_range, &lon_in_range); + if (format_ok) + { + num_formatted ++; + fprintf (fp, "Correction succeeded:%s\n", fix); + num_pass++; + } + else + { + num_formatted ++; + fprintf (fp, "Correction failed:%s\n", fix); + } } - match_list = ValNodeFree (match_list); } - return best_match; + fprintf (fp, "Formats %d out of %d, %d succeed\n", num_formatted, num_tests, num_pass); } -static ReplacePairData country_name_fixes[] = { - {"Vietnam", "Viet Nam"}, - {"Ivory Coast", "Cote d'Ivoire"}, - {"United States of America", "USA"}, - {"U.S.A.", "USA"}, - {"The Netherlands", "Netherlands"}, - {NULL, NULL} -}; - -static void FixCountryNames (CharPtr PNTR pCountry) +static CharPtr StringFromObjectID (ObjectIdPtr oip) { - ReplacePairPtr fix; + CharPtr str; + if (oip == NULL) return NULL; - if (pCountry == NULL || StringHasNoText (*pCountry)) + if (oip->id > 0) { - return; + str = (CharPtr) MemNew (sizeof (Char) * 20); + sprintf (str, "%d", oip->id); } - - fix = country_name_fixes; - while (fix->find != NULL) + else { - if (StringStr (*pCountry, fix->replace) == NULL || StringSearch (fix->find, fix->replace) != NULL) { - FindReplaceString (pCountry, fix->find, fix->replace, FALSE, TRUE); - } - fix++; + str = StringSave (oip->str); } + return str; } - -NLM_EXTERN CharPtr GetStateAbbreviation (CharPtr state) +static Boolean ApplyBarcodeDbxrefToBioSource (BioSourcePtr biop, ObjectIdPtr oip) { - ReplacePairPtr fix; - CharPtr abbrev = NULL; - - fix = us_state_abbrev_fixes; - while (fix->find != NULL && abbrev == NULL) { - if (StringICmp (fix->replace, state) == 0) { - abbrev = fix->find; - } - fix++; - } - return abbrev; -} - + ValNodePtr vnp; + DbtagPtr dbt; + CharPtr str, cmp; + Boolean found = FALSE; + Boolean rval = FALSE; -static void FixUSStateAbbreviations (CharPtr PNTR pCountry) -{ - ReplacePairPtr fix; + if (biop == NULL || oip == NULL) return FALSE; - if (pCountry == NULL || StringHasNoText (*pCountry)) + if (biop->org == NULL) { - return; + biop->org = OrgRefNew(); } - fix = us_state_abbrev_fixes; - while (fix->find != NULL) + str = StringFromObjectID (oip); + + for (vnp = biop->org->db; vnp != NULL && !found; vnp = vnp->next) { - FindReplaceString (pCountry, fix->find, fix->replace, TRUE, TRUE); - fix++; + dbt = (DbtagPtr) vnp->data.ptrvalue; + if (dbt == NULL || dbt->tag == NULL) continue; + if (StringCmp (dbt->db, "BOLD") != 0) continue; + cmp = StringFromObjectID (dbt->tag); + if (StringCmp (str, cmp) == 0) found = TRUE; + cmp = MemFree (cmp); } -} - - -static CharPtr MoveStateAndAddComma (CharPtr cntry_str, CharPtr state_match, Int4 len_cntry) -{ - CharPtr newname = NULL, cp; - Int4 len_state, len_qual, len_after, len_before; - - if (StringHasNoText (cntry_str) || StringHasNoText (state_match) || len_cntry < 1) + if (found) { - return cntry_str; + str = MemFree (str); } - - cp = StringISearch (cntry_str + len_cntry + 2, state_match); - if (cp != NULL) + else { - len_state = StringLen (state_match); - len_qual = StringLen (cntry_str); - - if (cp == cntry_str + len_cntry + 2) - { - /* state is at beginning of string */ - len_after = len_qual - len_cntry - 2 - len_state; - if (len_after == 0 || cntry_str [len_cntry + 2 + len_state] == ',') - { - /* already in correct format, nothing after state name */ - /* just copy in state name, in case we are correcting case */ - StringNCpy (cp, state_match, len_state); - return cntry_str; - } - else - { - /* insert comma */ - newname = (CharPtr) MemNew (StringLen (cntry_str) + 3); - StringNCpy (newname, cntry_str, len_cntry + 2 + len_state); - newname [len_cntry + 2 + len_state] = 0; - StringCat (newname, ","); - StringCat (newname, cntry_str + len_cntry + 2 + len_state); - cntry_str = MemFree (cntry_str); - cntry_str = newname; - } - } - else - { - newname = (CharPtr) MemNew (StringLen (cntry_str) + 3); - StringNCpy (newname, cntry_str, len_cntry + 2); - newname [len_cntry + 2] = 0; - StringCat (newname, state_match); - StringCat (newname, ", "); - len_before = cp - cntry_str - 3 - len_cntry; - StringNCpy (newname + len_cntry + 2 + len_state + 2, - cntry_str + len_cntry + 2, - len_before); - newname [len_cntry + 2 + len_state + 2 + len_before] = 0; - StringCat (newname, cp + len_state); - cntry_str = MemFree (cntry_str); - cntry_str = newname; - } + dbt = DbtagNew (); + dbt->db = StringSave ("BOLD"); + dbt->tag = ObjectIdNew(); + dbt->tag->str = str; + ValNodeAddPointer (&(biop->org->db), 0, dbt); + rval = TRUE; } - return cntry_str; + return rval; } -typedef struct namedregion { - CharPtr country; - CharPtr state; - CharPtr region; -} NamedRegionData, PNTR NamedRegionPtr; - - -static NamedRegionData named_regions[] = { -{ "USA", "Alaska", "Aleutian Islands" } -}; - -static Int4 num_named_regions = sizeof (named_regions) / sizeof (NamedRegionData); - -static void TrimInternalSpacesAndLeadingPunct (CharPtr str) -{ - CharPtr src, dst; - - src = str; - dst = str; - - while (*src != 0) { - if (isspace (*src)) { - if (dst > str && !isspace (*(dst - 1))) { - *dst = ' '; - dst++; - } - } else if (ispunct (*src)) { - if (dst > str) { - *dst = *src; - dst++; - } - } else { - *dst = *src; - dst++; - } - src++; - } - if (dst > src && (isspace (*(dst - 1)))) { - *(dst - 1) = 0; - } else { - *dst = 0; - } -} -static void FixForNamedRegions (CharPtr PNTR pCountry) +extern void ApplyBarcodeDbxrefsToBioseq (BioseqPtr bsp, Pointer data) { - Int4 i, country_len, state_len, region_len; - CharPtr region = NULL, country, state, new_str; - - if (pCountry == NULL || StringHasNoText (*pCountry)) return; + SeqDescrPtr sdp; + SeqMgrDescContext context; + SeqIdPtr sip; + DbtagPtr dbt; + Int4Ptr p_num; - for (i = 0; i < num_named_regions && region == NULL; i++) { - region = StringSearch (*pCountry, named_regions[i].region); - if (region != NULL) { - country_len = StringLen (named_regions[i].country); - state_len = StringLen (named_regions[i].state); - country = StringSearch (*pCountry, named_regions[i].country); - region_len = StringLen (named_regions[i].region); - if (country != NULL) { - MemSet (country, ' ', country_len); - if (*(country + country_len) == ':') { - *(country + country_len) = ' '; - } - } - state = StringSearch (*pCountry, named_regions[i].state); - if (state != NULL) { - MemSet (state, ' ', state_len); - if (*(state + state_len) == ',') { - *(state + state_len) = ' '; + if (bsp == NULL) return; + for (sip = bsp->id; sip != NULL; sip = sip->next) + { + if (IsBarcodeID (sip) && sip->choice == SEQID_GENERAL && sip->data.ptrvalue != NULL) + { + dbt = (DbtagPtr) sip->data.ptrvalue; + + sdp = SeqMgrGetNextDescriptor (bsp, NULL, Seq_descr_source, &context); + if (sdp != NULL) + { + if (ApplyBarcodeDbxrefToBioSource ((BioSourcePtr) sdp->data.ptrvalue, dbt->tag)) { + if ((p_num = (Int4Ptr) data) != NULL) { + (*p_num)++; + } } } - MemSet (region, ' ', region_len); - if (ispunct (*(region + region_len))) { - *(region + region_len) = ' '; - } - TrimInternalSpacesAndLeadingPunct (*pCountry); - new_str = (CharPtr) MemNew (sizeof (Char) * (country_len + state_len + region_len + 7 + StringLen (*pCountry))); - sprintf (new_str, "%s: %s, %s", named_regions[i].country, named_regions[i].state, named_regions[i].region); - if (!StringHasNoText (*pCountry)) { - StringCat (new_str, ", "); - StringCat (new_str, *pCountry); - } - *pCountry = MemFree (*pCountry); - *pCountry = new_str; } } } -static void FindCountryName (CharPtr PNTR pCountry, CharPtr PNTR country_list) -{ - CharPtr best_match = NULL, state_match, state_country = NULL; - CharPtr cp, before, newname, after; - Int4 len_cntry = 0, len_state = 0, len_qual, len_name; - Boolean state_multi = FALSE, country_multi = FALSE; - - if (pCountry == NULL || StringHasNoText (*pCountry)) - { - return; - } - - best_match = FindCountryMatch (*pCountry, country_list, &country_multi); - if (country_multi) { - *pCountry = MemFree (*pCountry); - return; - } - state_match = FindStateMatch (*pCountry, &state_country, &len_state, &state_multi); - - if ((best_match == NULL && state_match == NULL) || (best_match == NULL && state_multi)) { - *pCountry = MemFree (*pCountry); - return; - } else if (best_match != NULL && state_match != NULL && StringCmp (best_match, state_country) != 0) { - state_match = NULL; - } - - /* if match could be a country or a state, treat it as a country */ - if (StringCmp (best_match, state_match) == 0) { - state_match = NULL; - } - - if (IsBodyOfWater (best_match) && state_match != NULL) - { - /* prefer state to body of water */ - best_match = NULL; - } +/* Code for Country Fixup */ - /* if we have a country and a state, but the state is for a different country, drop the state */ - if (state_match != NULL && best_match != NULL && StringNCmp (state_country, best_match, len_cntry) != 0) - { - state_multi = FALSE; - state_match = FindStateMatchForCountry (*pCountry, best_match, &state_multi); - if (state_multi) { - *pCountry = MemFree (*pCountry); - return; - } - } +static Boolean IsSubstringOfStringInList (CharPtr whole_str, CharPtr match_p, CharPtr match_str, CharPtr PNTR list) +{ + CharPtr cp; + Int4 context_len, find_len; + Boolean rval = FALSE; - if (best_match != NULL && StringCmp (best_match, "USA") == 0 && StringLen (*pCountry) > 3 && state_match == NULL) - { - FixUSStateAbbreviations (pCountry); - state_multi = FALSE; - state_match = FindStateMatchForCountry (*pCountry, best_match, &state_multi); - if (state_multi) - { - *pCountry = MemFree (*pCountry); - return; - } - if (state_match != NULL) { - FindReplaceString (pCountry, "USA:", "", TRUE, TRUE); - FindReplaceString (pCountry, "USA", "", TRUE, TRUE); - best_match = NULL; - state_country = "USA"; - } - } - - if (best_match == NULL && state_match == NULL) { - *pCountry = MemFree (*pCountry); - return; - } - else if (best_match == NULL && state_match != NULL) - { - FixCountryStringForStateName (pCountry, state_match, state_country); + if (list == NULL || StringHasNoText (whole_str) || match_p == NULL || match_p < whole_str) { + return FALSE; } - else - { - cp = StringISearch (*pCountry, best_match); - len_cntry = StringLen (best_match); - after = cp + len_cntry; - while (isspace (*after) || ispunct(*after)) - { - after++; - } - - if (cp != NULL && !isalpha ((Int4)(cp [len_cntry]))) - { - len_qual = StringLen (*pCountry); - if (cp == *pCountry) - { - newname = (CharPtr) MemNew (len_cntry + StringLen (after) + 3); - sprintf (newname, "%s: %s", best_match, after); - } - else - { - /* strip spaces and punctuation from before */ - *(cp - 1) = 0; - before = cp - 2; - while (before >= *pCountry - && (isspace (*before) || ispunct (*before))) - { - *before = 0; - before--; - } - before = *pCountry; - while (isspace (*before) || ispunct(*before)) - { - before++; - } - - newname = (CharPtr) MemNew (len_cntry + StringLen (before) + StringLen (after) + 4); - sprintf (newname, "%s: %s%s%s", best_match, before, - StringHasNoText (before) || StringHasNoText(after) ? "" : " ", - after); - } - if (state_match != NULL) - { - newname = MoveStateAndAddComma (newname, state_match, len_cntry); + find_len = StringLen (match_str); + while (*list != NULL && !rval) { + context_len = StringLen (*list); + if (find_len < context_len) { + cp = StringSearch (whole_str, *list); + while (cp != NULL && !rval) { + if (match_p < cp) { + cp = NULL; + } else if (cp + context_len > match_p) { + rval = TRUE; + } else { + cp = StringSearch (cp + 1, *list); + } } - - /* remove trailing spaces and punctuation */ - len_name = StringLen (newname); - while (isspace ((Int4)(newname[len_name - 1])) - || newname [len_name - 1] == ',' - || newname [len_name - 1] == ':' - || newname [len_name - 1] == ';') - { - newname [len_name - 1] = 0; - len_name --; - } - MemFree (*pCountry); - *pCountry = newname; } - } + list++; + } + return rval; } -static void CountryColonToComma (CharPtr PNTR country_str) -{ - CharPtr cp, cp1, cp2, new_name; - Int4 pre_len; - - if (country_str == NULL || *country_str == NULL) { - return; - } - - cp = StringChr (*country_str, ':'); - if (cp == NULL) return; - cp = StringChr (cp + 1, ':'); - while (cp != NULL) { - cp1 = cp; - while (cp1 > *country_str && (isspace (*(cp1 - 1)) || *(cp1 - 1) == ',')) { - cp1--; - } - pre_len = cp1 - *country_str; - cp2 = cp + 1; - while (isspace (*cp2) || *cp2 == ',') { - cp2++; - } - new_name = (CharPtr) MemNew ((pre_len + StringLen (cp2) + 3) * sizeof (Char)); - StringNCpy (new_name, *country_str, pre_len); - StringCat (new_name, ", "); - StringCat (new_name, cp2); - *country_str = MemFree (*country_str); - *country_str = new_name; - cp = StringChr ((*country_str) + pre_len, ':'); - } -} +static ReplacePairData country_name_fixes[] = { + {"Vietnam", "Viet Nam"}, + {"Ivory Coast", "Cote d'Ivoire"}, + {"United States of America", "USA"}, + {"U.S.A.", "USA"}, + {"The Netherlands", "Netherlands"}, + {"People's Republic of China", "China"}, + {"Pr China", "China" }, + {"Prchina", "China" }, + {"P.R.China", "China" }, + {"P.R. China", "China" }, + {"P, R, China", "China" }, + {NULL, NULL} +}; -static void RemoveDoubleCommas (CharPtr PNTR country_str) +NLM_EXTERN CharPtr GetStateAbbreviation (CharPtr state) { - CharPtr cp, cp1, cp2, new_name; - Int4 pre_len; - Boolean found_second_comma; - - if (country_str == NULL || *country_str == NULL) { - return; + ReplacePairPtr fix; + CharPtr abbrev = NULL; + + fix = us_state_abbrev_fixes; + while (fix->find != NULL && abbrev == NULL) { + if (StringICmp (fix->replace, state) == 0) { + abbrev = fix->find; + } + fix++; } - - cp = StringChr (*country_str, ','); - while (cp != NULL) { - cp1 = cp; - while (cp1 > *country_str && (isspace (*(cp1 - 1)) || *(cp1 - 1) == ',')) { - cp1--; - } - pre_len = cp1 - *country_str; - cp2 = cp + 1; - found_second_comma = FALSE; - while (isspace (*cp2) || *cp2 == ',') { - if (*cp2 == ',') { - found_second_comma = TRUE; - } - cp2++; - } - - if (cp1 < cp || found_second_comma || cp2 > cp + 2) { - new_name = (CharPtr) MemNew ((pre_len + StringLen (cp2) + 3) * sizeof (Char)); - StringNCpy (new_name, *country_str, pre_len); - StringCat (new_name, ", "); - StringCat (new_name, cp2); - *country_str = MemFree (*country_str); - *country_str = new_name; - cp = StringChr ((*country_str) + pre_len + 1, ','); - } else { - cp = StringChr (cp2, ','); - } - } + return abbrev; } @@ -28932,7 +30834,15 @@ static CharPtr NewFixCountry (CharPtr country, CharPtr PNTR country_list) too_many_countries = ContainsMultipleCountryNames (country_list, country); } - if (valid_country != NULL && !too_many_countries) { + if (valid_country != NULL && too_many_countries && valid_country == country) { + len_country = StringCSpn (valid_country, separator_list); + if (country[len_country] == ':' && !isspace (country[len_country + 1])) { + new_country = MemNew (sizeof (Char) * (StringLen (country) + 2)); + StringNCpy (new_country, country, len_country + 1); + StringCat (new_country, " "); + StringCat (new_country, country + len_country + 1); + } + } else if (valid_country != NULL && !too_many_countries) { len_country = StringCSpn (valid_country, separator_list); len_before = valid_country - country; @@ -28989,20 +30899,528 @@ extern CharPtr GetCountryFix (CharPtr country, CharPtr PNTR country_list) CharPtr new_country; if (StringHasNoText (country)) return NULL; -#if 1 new_country = NewFixCountry (country, country_list); -#else - new_country = StringSave (country); - FixCountryNames (&new_country); - FindCountryName (&new_country, country_list); - CountryColonToComma (&new_country); - RemoveDoubleCommas (&new_country); - FixForNamedRegions (&new_country); -#endif return new_country; } +typedef struct countryfixup { + CharPtr PNTR country_list; + ValNodePtr warning_list; + Boolean capitalize_after_colon; + Boolean any_changed; + FILE *log_fp; +} CountryFixupData, PNTR CountryFixupPtr; + + +static void CapitalizeFirstLetterOfEveryWord (CharPtr pString) +{ + CharPtr pCh; + + pCh = pString; + if (pCh == NULL) return; + if (*pCh == '\0') return; + + while (*pCh != 0) + { + /* skip over spaces */ + while (isspace(*pCh)) + { + pCh++; + } + + /* capitalize first letter after white space */ + if (isalpha (*pCh)) + { + *pCh = toupper (*pCh); + pCh++; + } + /* skip over rest of word */ + while (*pCh != 0 && !isspace (*pCh)) + { + if (isalpha (*pCh)) { + *pCh = tolower (*pCh); + } + pCh++; + } + } +} + + +static void CountryFixupItem (Uint1 choice, Pointer data, CountryFixupPtr c) +{ + BioSourcePtr biop; + SubSourcePtr ssp; + CharPtr new_country; + CharPtr cp; + CharPtr tmp; + Int4 country_len; + + if (data == NULL || c == NULL) return; + + biop = GetBioSourceFromObject (choice, data); + if (biop == NULL) return; + + for (ssp = biop->subtype; ssp != NULL; ssp = ssp->next) + { + if (ssp->subtype == SUBSRC_country && !StringHasNoText (ssp->name)) + { + new_country = GetCountryFix (ssp->name, c->country_list); + if (new_country == NULL) { + ValNodeAddPointer (&c->warning_list, choice, data); + } else { + cp = StringChr (new_country, ':'); + if (cp != NULL) { + country_len = cp - new_country; + /* skip colon */ + cp++; + /* skip over space after colon */ + cp += StringSpn (cp, " \t"); + if (c->capitalize_after_colon) { + /* reset capitalization */ + CapitalizeFirstLetterOfEveryWord (cp); + } + if (*(new_country + country_len + 1) != 0 && !isspace (*(new_country + country_len + 1))) { + tmp = (CharPtr) MemNew (sizeof (Char) * (StringLen (new_country) + 2)); + StringNCpy (tmp, new_country, country_len + 1); + StringCat (tmp, " "); + StringCat (tmp, cp + 1); + new_country = MemFree (new_country); + new_country = tmp; + } + } + if (StringCmp (ssp->name, new_country) == 0) { + new_country = MemFree (new_country); + } else { + c->any_changed = TRUE; + if (c->log_fp != NULL) { + fprintf (c->log_fp, "Changed '%s' to '%s'\n", ssp->name, new_country); + } + ssp->name = MemFree (ssp->name); + ssp->name = new_country; + } + } + } + } +} + + +static void CountryFixupDesc (SeqDescrPtr sdp, Pointer userdata) +{ + if (sdp != NULL && userdata != NULL && sdp->choice == Seq_descr_source) { + CountryFixupItem (OBJ_SEQDESC, sdp, (CountryFixupPtr) userdata); + } +} + + +static void CountryFixupFeat (SeqFeatPtr sfp, Pointer userdata) +{ + if (sfp != NULL && userdata != NULL && sfp->data.choice == SEQFEAT_BIOSRC) { + CountryFixupItem (OBJ_SEQFEAT, sfp, (CountryFixupPtr) userdata); + } +} + + +NLM_EXTERN ValNodePtr FixupCountryQuals (SeqEntryPtr sep, Boolean fix_after_colon) +{ + CountryFixupData c; + + MemSet (&c, 0, sizeof (CountryFixupData)); + c.country_list = GetValidCountryList (); + if (c.country_list == NULL) return NULL; + c.capitalize_after_colon = fix_after_colon; + c.warning_list = NULL; + VisitDescriptorsInSep (sep, &c, CountryFixupDesc); + VisitFeaturesInSep (sep, &c, CountryFixupFeat); + return c.warning_list; +} + + +NLM_EXTERN Boolean FixupCountryQualsWithLog (SeqEntryPtr sep, Boolean fix_after_colon, FILE *log_fp) +{ + CountryFixupData c; + + MemSet (&c, 0, sizeof (CountryFixupData)); + c.log_fp = log_fp; + c.country_list = GetValidCountryList (); + if (c.country_list == NULL) return FALSE; + c.capitalize_after_colon = fix_after_colon; + c.warning_list = NULL; + VisitDescriptorsInSep (sep, &c, CountryFixupDesc); + VisitFeaturesInSep (sep, &c, CountryFixupFeat); + c.warning_list = ValNodeFree (c.warning_list); + return c.any_changed; +} + + +typedef struct qualfixup { + SourceConstraintPtr scp; + ReplacePairPtr fix_list; + Boolean case_counts; + Boolean whole_word; + Boolean is_orgmod; + Uint1 subtype; + Boolean any_changed; + FILE *log_fp; +} QualFixupData, PNTR QualFixupPtr; + +static void FixupBioSourceQuals (BioSourcePtr biop, Pointer data) +{ + QualFixupPtr qf; + OrgModPtr mod; + SubSourcePtr ssp; + ReplacePairPtr fix; + CharPtr orig = NULL; + + if (biop == NULL || (qf = (QualFixupPtr) data) == NULL + || qf->fix_list == NULL + || !DoesBiosourceMatchConstraint(biop, qf->scp)) { + return; + } + + if (qf->is_orgmod) { + if (biop->org == NULL || biop->org->orgname == NULL) { + return; + } + for (mod = biop->org->orgname->mod; mod != NULL; mod = mod->next) { + if (mod->subtype == qf->subtype) { + for (fix = qf->fix_list; fix->find != NULL; fix++) { + orig = StringSave (mod->subname); + FindReplaceString (&(mod->subname), fix->find, fix->replace, qf->case_counts, qf->whole_word); + if (StringCmp (orig, mod->subname) != 0) { + qf->any_changed = TRUE; + if (qf->log_fp != NULL) { + fprintf (qf->log_fp, "Changed '%s' to '%s'\n", orig, mod->subname); + } + } + orig = MemFree (orig); + } + } + } + } else { + for (ssp = biop->subtype; ssp != NULL; ssp = ssp->next) { + if (ssp->subtype == qf->subtype) { + for (fix = qf->fix_list; fix->find != NULL; fix++) { + orig = StringSave (ssp->name); + FindReplaceString (&(ssp->name), fix->find, fix->replace, qf->case_counts, qf->whole_word); + if (StringCmp (orig, ssp->name) != 0) { + qf->any_changed = TRUE; + if (qf->log_fp != NULL) { + fprintf (qf->log_fp, "Changed '%s' to '%s'\n", orig, ssp->name); + } + } + orig = MemFree (orig); + } + } + } + } +} + + +static ReplacePairData mouse_strain_fixes[] = { + {"129/Sv", "129/Sv"} , + {"129/SvJ", "129/SvJ"} , + {"BALB/c", "BALB/c"} , + {"C57BL/6", "C57BL/6"} , + {"C57BL/6J", "C57BL/6J"} , + {"CD-1", "CD-1"} , + {"CZECHII", "CZECHII"} , + {"FVB/N", "FVB/N"} , + {"FVB/N-3", "FVB/N-3"} , + {"ICR", "ICR"} , + {"NMRI", "NMRI"} , + {"NOD", "NOD"} , + {"C3H", "C3H"} , + {"C57BL", "C57BL"} , + {"C57BL/6", "C57BL/6"} , + {"C57BL/6J", "C57BL/6J" } , + {"DBA/2", "DBA/2"} , + {NULL, NULL}}; + +NLM_EXTERN Boolean FixupMouseStrains (SeqEntryPtr sep, FILE *log_fp) +{ + QualFixupData qd; + + MemSet (&qd, 0, sizeof (QualFixupData)); + + qd.case_counts = FALSE; + qd.whole_word = TRUE; + qd.is_orgmod = TRUE; + qd.subtype = ORGMOD_strain; + qd.scp = SourceConstraintNew (); + qd.scp->constraint = StringConstraintNew (); + qd.scp->constraint->match_text = StringSave ("Mus musculus"); + qd.scp->constraint->match_location = String_location_starts; + qd.scp->field1 = ValNodeNew (NULL); + qd.scp->field1->choice = SourceQualValChoice_textqual; + qd.scp->field1->data.intvalue = Source_qual_taxname; + qd.log_fp = log_fp; + qd.fix_list = mouse_strain_fixes; + + VisitBioSourcesInSep (sep, &qd, FixupBioSourceQuals); + qd.scp = SourceConstraintFree (qd.scp); + return qd.any_changed; +} + + +typedef struct srcqualfixlist { + Int4 src_qual; + CharPtr PNTR fix_list; +} SrcQualFixListData, PNTR SrcQualFixListPtr; + + +static CharPtr src_qual_sex_words[] = { + "male", + "female", + NULL }; + +static CharPtr src_qual_host_words[] = { + "porcine", + "caprine", + "ovine", + "cattle", + "canine", + "feline", + "bovine", + "tomato", + "pepper", + "yak", + "horse", + "pig", + "cow", + "rice", + "turkey", + "chicken", + "sheep", + "yak", + "salmon", + "wolf", + "nematode", + "fox", + "swine", + "fish", + "maize", + "soybean", + "wheat", + NULL }; + + static CharPtr src_qual_lab_host_words[] = { + "porcine", + "caprine", + "ovine", + "cattle", + "canine", + "feline", + "bovine", + "tomato", + "pepper", + "yak", + "horse", + "pig", + "cow", + "rice", + "turkey", + "chicken", + "sheep", + "yak", + "salmon", + "wolf", + "nematode", + "fox", + "swine", + "fish", + "maize", + "soybean", + "wheat", + NULL }; + +static CharPtr src_qual_isolation_source_words[] = { + "porcine", + "caprine", + "ovine", + "cattle", + "canine", + "feline", + "bovine", + "tomato", + "pepper", + "yak", + "horse", + "pig", + "cow", + "rice", + "turkey", + "chicken", + "rhizosphere soil", + "soil", + "agricultural soil", + "seedling", + "fruit", + "leaf", + "leaves", + "stem", + "flower", + "root", + "root tip", + "mammary gland", + "skin", + "serum", + "testis", + "cerbrospinal fluid", + "placenta", + "blood", + "head", + "ovary", + "heart", + "rumen", + "plasma", + "wound", + "sera", + "lymph node", + "lung", + "swab", + "patient", + "feces", + "forest", + "clinical", + "milk", + "leaves", + "oviduct", + "whole blood", + "salivary gland", + "oviduct", + "ovary", + "testes", + "skin", + "brain", + "nasal swab", + "urine", + "intestines", + "stomach", + "muscle", + "muscle tissue", + "kidney", + "epithelium", + "acne", + "cornea", + NULL }; + +static CharPtr src_qual_tissue_type_words[] = { + "blood", + "whole blood", + "salivary gland", + "oviduct", + "mammary gland", + "testis", + "placenta", + "heart", + "ovary", + "testes", + "skin", + "brain", + "intestines", + "stomach", + "muscle", + "kidney", + "muscle tissue", + "epithelium", + "lymph node", + "lung", + "mammary gland", + "skin", + "cornea", + "fruit", + "leaf", + "leaves", + "stem", + "flower", + "root", + "root tip", + NULL }; + +static CharPtr src_qual_dev_stage_words[] = { + "adult", + NULL }; + +static SrcQualFixListData src_qual_fixes[] = { + {Source_qual_sex, src_qual_sex_words} , + {Source_qual_nat_host, src_qual_host_words}, + {Source_qual_isolation_source, src_qual_isolation_source_words}, + {Source_qual_lab_host, src_qual_lab_host_words}, + {Source_qual_tissue_type, src_qual_tissue_type_words}, + {Source_qual_dev_stage, src_qual_dev_stage_words}, + {0, NULL} +}; + +typedef struct srcqualfix { + Boolean any_change; + FILE *log_fp; + CharPtr PNTR fix_list; + ValNode vn; +} SrcQualFixData, PNTR SrcQualFixPtr; + + +static void FixSourceQualCaps (BioSourcePtr biop, Pointer data) +{ + CharPtr val, orig; + SrcQualFixPtr sq; + Int4 i; + StringConstraint sd; + + if (biop == NULL || (sq = (SrcQualFixPtr) data) == NULL || sq->fix_list == NULL) { + return; + } + val = GetSourceQualFromBioSource (biop, &(sq->vn), NULL); + if (val == NULL) { + return; + } + orig = StringSave (val); + for (i = 0; sq->fix_list[i] != NULL; i++) { + if (StringICmp (val, sq->fix_list[i]) == 0) { + val = MemFree (val); + val = StringSave (sq->fix_list[i]); + } + } + if (StringCmp (orig, val) != 0) { + MemSet (&sd, 0, sizeof (StringConstraint)); + sd.match_text = orig; + sd.match_location = String_location_equals; + if (SetSourceQualInBioSource (biop, &(sq->vn), &sd, val, ExistingTextOption_replace_old)) { + sq->any_change = TRUE; + if (sq->log_fp != NULL) { + fprintf (sq->log_fp, "Changed '%s' to '%s'\n", orig, val); + } + } + } + orig = MemFree (orig); + val = MemFree (val); +} + + +NLM_EXTERN Boolean FixSrcQualCaps (SeqEntryPtr sep, Int4 src_qual, FILE *log_fp) +{ + Int4 i; + SrcQualFixData sd; + + MemSet (&sd, 0, sizeof (SrcQualFixData)); + sd.log_fp = log_fp; + sd.any_change = FALSE; + MemSet (&sd.vn, 0, sizeof (ValNode)); + sd.vn.choice = SourceQualChoice_textqual; + + /* find fix function */ + for (i = 0; src_qual_fixes[i].fix_list != NULL; i++) { + if (src_qual_fixes[i].src_qual == src_qual) { + sd.fix_list = src_qual_fixes[i].fix_list; + sd.vn.data.intvalue = src_qual; + VisitBioSourcesInSep (sep, &sd, FixSourceQualCaps); + } + } + + return sd.any_change; +} + + extern ValNodePtr ListFeaturesInLocation (BioseqPtr bsp, SeqLocPtr slp, Uint1 seqfeatChoice, Uint1 featdefChoice) { ValNodePtr feat_list = NULL; @@ -29099,7 +31517,7 @@ extern ValNodePtr ListCodingRegionsContainedInSourceFeatures (SeqEntryPtr sep) extern void CountNsInSequence (BioseqPtr bsp, Int4Ptr p_total, Int4Ptr p_max_stretch, Boolean expand_gaps) { - Int2 ctr, pos, i; + Int4 ctr, pos, i; Char buf1[51]; Int4 len = 50, total = 0, max_stretch = 0, this_stretch = 0; StreamFlgType flags = STREAM_CORRECT_INVAL; @@ -29223,6 +31641,11 @@ NLM_EXTERN void ParseTaxNameToQuals (OrgRefPtr org, TextFsaPtr tags) Int4 val_len, match_len; if (tags == NULL || org == NULL || StringHasNoText (org->taxname)) return; + + if (StringSearch (org->taxname, " x ") != NULL) { + /* ignore cross, applies only to one parent, do not parse */ + return; + } state = 0; ptr = org->taxname; ch = *ptr; @@ -29276,3 +31699,238 @@ NLM_EXTERN ValNodePtr GetLocusTagPrefixList (SeqEntryPtr sep) return list; } + +static CharPtr RemovableCultureNotes[] = { + "[uncultured (using universal primers)]", + "[uncultured (using universal primers) bacterial source]", + "[cultured bacterial source]", + "[enrichment culture bacterial source]", + "[mixed bacterial source (cultured and uncultured)]", + "[uncultured]; [universal primers]", + "[mixed bacterial source]", + NULL +}; + +static CharPtr ReplaceableCultureNotes[] = { + "[uncultured (with species-specific primers)]", + "[uncultured]; [amplified with species-specific primers]", + "[uncultured (using species-specific primers) bacterial source]", + NULL +}; + + +static Boolean RemoveCultureNotesFromText (CharPtr PNTR p_txt) +{ + CharPtr txt, cp, src, dst; + Int4 i, len, extra_len; + Boolean any_removed = FALSE; + + if (p_txt == NULL || (txt = *p_txt) == NULL) { + return FALSE; + } + for (i = 0; RemovableCultureNotes[i] != NULL; i++) { + len = StringLen (RemovableCultureNotes[i]); + cp = StringISearch (txt, RemovableCultureNotes[i]); + while (cp != NULL) { + extra_len = StringSpn (cp + len, " ;"); + src = cp + len + extra_len; + dst = cp; + while (*src != 0) { + *dst = *src; + ++dst; + ++src; + } + *dst = 0; + any_removed = TRUE; + cp = StringISearch (txt, RemovableCultureNotes[i]); + } + } + + for (i = 0; ReplaceableCultureNotes[i] != NULL; i++) { + if (StringICmp (txt, ReplaceableCultureNotes[i]) == 0) { + *p_txt = MemFree (*p_txt); + *p_txt = StringSave ("amplified with species-specific primers"); + txt = *p_txt; + any_removed = TRUE; + break; + } + } + if (StringHasNoText (txt)) { + *p_txt = MemFree (*p_txt); + any_removed = TRUE; + } + return any_removed; +} + + +static void RemoveCultureNotesBioSourceCallback (BioSourcePtr biop, Pointer data) +{ + BoolPtr p_rval; + Boolean rval = FALSE; + SubSourcePtr ssp, ssp_prev = NULL, ssp_next; + + if (biop == NULL) { + return; + } + p_rval = (BoolPtr) data; + + for (ssp = biop->subtype; ssp != NULL; ssp = ssp_next) { + ssp_next = ssp->next; + if (ssp->subtype == 255) { + rval |= RemoveCultureNotesFromText(&(ssp->name)); + if (StringHasNoText (ssp->name)) { + ssp->next = NULL; + ssp = SubSourceFree (ssp); + if (ssp_prev == NULL) { + biop->subtype = ssp_next; + } else { + ssp_prev->next = ssp_next; + } + } else { + ssp_prev = ssp; + } + } else { + ssp_prev = ssp; + } + } + + if (p_rval != NULL) { + *p_rval |= rval; + } +} + + +NLM_EXTERN Boolean RemoveCultureNotes (SeqEntryPtr sep) +{ + Boolean rval = FALSE; + + VisitBioSourcesInSep (sep, &rval, RemoveCultureNotesBioSourceCallback); + return rval; +} + + +static CharPtr s_CorrectProductCaps[] = { + "ABC", + "AAA", + "ATP", + "ATPase", + "A/G", + "AMP", + "CDP", + "coproporphyrinogen III", + "cytochrome BD", + "cytochrome C", + "cytochrome C2", + "cytochrome C550", + "cytochrome D", + "cytochrome O", + "cytochrome P450", + "cytochrome P460", + "D-alanine", + "D-alanyl", + "D-amino", + "D-beta", + "D-cysteine", + "D-lactate", + "D-ribulose", + "D-xylulose", + "endonuclease I", + "endonuclease II", + "endonuclease III", + "endonuclease V", + "EPS I", + "Fe-S", + "ferredoxin I", + "ferredoxin II", + "GTP", + "GTPase", + "H+", + "hemolysin I", + "hemolysin II", + "hemolysin III", + "L-allo", + "L-arabinose", + "L-asparaginase", + "L-aspartate", + "L-carnitine", + "L-fuculose", + "L-glutamine", + "L-histidinol", + "L-isoaspartate", + "L-serine", + "MFS", + "FAD/NAD(P)", + "MCP", + "Mg+", + "Mg chelatase", + "Mg-protoporphyrin IX", + "N(5)", + "N,N-", + "N-(", + "N-acetyl", + "N-acyl", + "N-carb", + "N-form", + "N-iso", + "N-succ", + "NADP", + "Na+/H+", + "NAD", + "NAD(P)", + "NADPH", + "O-sial", + "O-succ", + "pH", + "ribonuclease BN", + "ribonuclease D", + "ribonuclease E", + "ribonuclease G", + "ribonuclease H", + "ribonuclease I", + "ribonuclease II", + "ribonuclease III", + "ribonuclease P", + "ribonuclease PH", + "ribonuclease R", + "RNAse", + "S-adeno", + "type I", + "type II", + "type III", + "type IV", + "type V", + "type VI", + "UDP", + "UDP-N", + "Zn", + NULL}; + +NLM_EXTERN void FixProductWordCapitalization (CharPtr PNTR pProduct) +{ + Int4 i; + + if (pProduct == NULL || *pProduct == NULL) { + return; + } + + for (i = 0; s_CorrectProductCaps[i] != NULL; i++) { + FindReplaceString (pProduct, s_CorrectProductCaps[i], s_CorrectProductCaps[i], FALSE, TRUE); + } +} + + +NLM_EXTERN Boolean IsNCBIFileID (SeqIdPtr sip) +{ + DbtagPtr dbt; + + if (sip == NULL || sip->choice != SEQID_GENERAL) return FALSE; + dbt = (DbtagPtr) sip->data.ptrvalue; + if (dbt == NULL) return FALSE; + if (StringCmp (dbt->db, "NCBIFILE") == 0) { + return TRUE; + } else { + return FALSE; + } +} + + diff --git a/api/sqnutil4.c b/api/sqnutil4.c index 6690de04..bcdebf0f 100755 --- a/api/sqnutil4.c +++ b/api/sqnutil4.c @@ -29,7 +29,7 @@ * * Version Creation Date: 12/27/2007 * -* $Revision: 1.111 $ +* $Revision: 1.153 $ * * File Description: * This file contains functions for automatically generating definition lines. @@ -53,6 +53,8 @@ #include #include #include +#include +#include #define NLM_GENERATED_CODE_PROTO #include #include @@ -207,7 +209,8 @@ static void ListClauses ( ValNodePtr clauselist, ValNodePtr PNTR strings, Boolean allow_semicolons, - Boolean suppress_final_and + Boolean suppress_final_and, + Boolean suppress_allele ); static void LabelClauses @@ -480,7 +483,7 @@ static void AddSubtypeFields (ValNodePtr PNTR sq_list, SourceQualDescPtr orig) } -static void AddQualList (ValNodePtr PNTR list, Nlm_QualNameAssocPtr qual_list, Boolean is_orgmod, Boolean use_alternate_note_name) +static void AddQualList (ValNodePtr PNTR list, Nlm_QualNameAssocPtr qual_list, Boolean is_orgmod, Boolean use_alternate_note_name, Boolean get_subfields) { Int4 k; SourceQualDescPtr sqdp; @@ -509,7 +512,9 @@ static void AddQualList (ValNodePtr PNTR list, Nlm_QualNameAssocPtr qual_list, B sqdp->subfield = 0; ValNodeAddPointer (list, 0, sqdp); } - AddSubtypeFields (list, sqdp); + if (get_subfields) { + AddSubtypeFields (list, sqdp); + } } } @@ -545,7 +550,7 @@ static void AddNoteQual (ValNodePtr PNTR list, Boolean is_orgmod, Boolean use_al } -static int LIBCALLBACK SortVnpBySourceQualDesc (VoidPtr ptr1, VoidPtr ptr2) +NLM_EXTERN int LIBCALLBACK SortVnpBySourceQualDesc (VoidPtr ptr1, VoidPtr ptr2) { SourceQualDescPtr str1; @@ -569,27 +574,27 @@ static int LIBCALLBACK SortVnpBySourceQualDesc (VoidPtr ptr1, VoidPtr ptr2) } -extern ValNodePtr GetSourceQualDescList (Boolean get_subsrc, Boolean get_orgmod, Boolean get_discouraged, Boolean get_discontinued) +extern ValNodePtr GetSourceQualDescListEx (Boolean get_subsrc, Boolean get_orgmod, Boolean get_discouraged, Boolean get_discontinued, Boolean get_subfields) { ValNodePtr source_qual_list = NULL; if (get_orgmod) { - AddQualList (&source_qual_list, current_orgmod_subtype_alist, TRUE, get_subsrc); + AddQualList (&source_qual_list, current_orgmod_subtype_alist, TRUE, get_subsrc, get_subfields); if (get_discouraged) { - AddQualList (&source_qual_list, discouraged_orgmod_subtype_alist, TRUE, get_subsrc); + AddQualList (&source_qual_list, discouraged_orgmod_subtype_alist, TRUE, get_subsrc, get_subfields); } if (get_discontinued) { - AddQualList (&source_qual_list, discontinued_orgmod_subtype_alist, TRUE, get_subsrc); + AddQualList (&source_qual_list, discontinued_orgmod_subtype_alist, TRUE, get_subsrc, get_subfields); } AddNoteQual (&source_qual_list, TRUE, get_subsrc); } if (get_subsrc) { - AddQualList (&source_qual_list, current_subsource_subtype_alist, FALSE, get_orgmod); + AddQualList (&source_qual_list, current_subsource_subtype_alist, FALSE, get_orgmod, get_subfields); if (get_discouraged) { - AddQualList (&source_qual_list, discouraged_subsource_subtype_alist, FALSE, get_orgmod); + AddQualList (&source_qual_list, discouraged_subsource_subtype_alist, FALSE, get_orgmod, get_subfields); } if (get_discontinued) { - AddQualList (&source_qual_list, discontinued_subsource_subtype_alist, FALSE, get_orgmod); + AddQualList (&source_qual_list, discontinued_subsource_subtype_alist, FALSE, get_orgmod, get_subfields); } AddNoteQual (&source_qual_list, FALSE, get_orgmod); } @@ -598,6 +603,11 @@ extern ValNodePtr GetSourceQualDescList (Boolean get_subsrc, Boolean get_orgmod, return source_qual_list; } +extern ValNodePtr GetSourceQualDescList (Boolean get_subsrc, Boolean get_orgmod, Boolean get_discouraged, Boolean get_discontinued) +{ + return GetSourceQualDescListEx (get_subsrc, get_orgmod, get_discouraged, get_discontinued, TRUE); +} + /* * The CountModifiersProc is used as the callback function for * VisitBioSourcesInSep when we are getting a list of all the modifiers @@ -3475,11 +3485,12 @@ static Int4 IsMobileElementGBQual (GBQualPtr gbqual) { Int4 keyword_idx; if (gbqual == NULL || gbqual->qual == NULL || gbqual->val == NULL) return -1; - if (StringCmp (gbqual->qual, "mobile_element") != 0) return -1; + if (StringCmp (gbqual->qual, "mobile_element") != 0 && StringCmp (gbqual->qual, "mobile_element_type") != 0) return -1; keyword_idx = StartsWithMobileElementKeyword (gbqual->val); if (keyword_idx < 0) return -1; if (keyword_idx == eMobileElementOther - && StringStr (gbqual->val, "transposable element") == NULL) { + && StringStr (gbqual->val, "transposable element") == NULL + && StringStr (gbqual->val, "P element") == NULL) { return -1; } else { return keyword_idx; @@ -3508,7 +3519,7 @@ static Boolean FeatureDoesNotGetPartialComplete (SeqFeatPtr sfp) NLM_EXTERN Boolean LIBCALLBACK IsMobileElement (SeqFeatPtr sfp) { GBQualPtr gbqual; - if (sfp == NULL || sfp->idx.subtype != FEATDEF_repeat_region) return FALSE; + if (sfp == NULL || (sfp->idx.subtype != FEATDEF_repeat_region && sfp->idx.subtype != FEATDEF_mobile_element)) return FALSE; for (gbqual = sfp->qual; gbqual != NULL; gbqual = gbqual->next) { @@ -3523,7 +3534,7 @@ static Boolean LIBCALLBACK IsRemovableMobileElement (SeqFeatPtr sfp) { GBQualPtr gbqual; Int4 keyword_idx; - if (sfp == NULL || sfp->idx.subtype != FEATDEF_repeat_region) return FALSE; + if (sfp == NULL || (sfp->idx.subtype != FEATDEF_repeat_region && sfp->idx.subtype != FEATDEF_mobile_element)) return FALSE; for (gbqual = sfp->qual; gbqual != NULL; gbqual = gbqual->next) { @@ -6524,7 +6535,7 @@ static CharPtr GetGenericInterval prev_feat->next = featlist; } - ListClauses (featlist, &strings, FALSE, suppress_final_and); + ListClauses (featlist, &strings, FALSE, suppress_final_and, rp->suppress_allele); subfeatlist = MergeValNodeStrings (strings, FALSE); ValNodeFreeData (strings); len += StringLen (subfeatlist) + 7; @@ -8148,7 +8159,8 @@ static void ListClauses ( ValNodePtr clauselist, ValNodePtr PNTR strings, Boolean allow_semicolons, - Boolean suppress_final_and + Boolean suppress_final_and, + Boolean suppress_allele ) { FeatureClausePtr thisclause, onebefore, twobefore, oneafter, twoafter; @@ -8198,8 +8210,8 @@ static void ListClauses ( onebefore_has_typeword_change = TRUE; } if (onebefore_has_typeword_change || onebefore_has_interval_change - || (DisplayAlleleName (onebefore) && StringLen (onebefore->allelename) != 0) - || (DisplayAlleleName (thisclause) && StringLen (thisclause->allelename) != 0)) + || (!suppress_allele && DisplayAlleleName (onebefore) && StringLen (onebefore->allelename) != 0) + || (!suppress_allele && DisplayAlleleName (thisclause) && StringLen (thisclause->allelename) != 0)) { onebefore_has_detail_change = TRUE; } @@ -8233,8 +8245,8 @@ static void ListClauses ( oneafter_has_typeword_change = TRUE; } if (oneafter_has_typeword_change || oneafter_has_interval_change - || (DisplayAlleleName (thisclause) && StringLen (thisclause->allelename) != 0) - || (DisplayAlleleName (oneafter) && StringLen (oneafter->allelename) != 0)) + || (!suppress_allele && DisplayAlleleName (thisclause) && StringLen (thisclause->allelename) != 0) + || (!suppress_allele && DisplayAlleleName (oneafter) && StringLen (oneafter->allelename) != 0)) { oneafter_has_detail_change = TRUE; } @@ -8397,15 +8409,15 @@ static void ListClauses ( } else if (oneafter != NULL && twoafter != NULL && ! oneafter_has_interval_change && StringCmp (thisclause->interval, twoafter->interval) == 0 - && ((DisplayAlleleName (oneafter) && StringLen (oneafter->allelename) > 0) - || (DisplayAlleleName (thisclause) && StringLen (thisclause->allelename) > 0))) + && ((!suppress_allele && DisplayAlleleName (oneafter) && StringLen (oneafter->allelename) > 0) + || (!suppress_allele && DisplayAlleleName (thisclause) && StringLen (thisclause->allelename) > 0))) { print_comma = TRUE; } else if (oneafter != NULL && onebefore != NULL && ! oneafter_has_interval_change && ! onebefore_has_interval_change - && ((DisplayAlleleName (oneafter) && StringLen (oneafter->allelename) > 0) - || (DisplayAlleleName (thisclause) && StringLen (thisclause->allelename) > 0))) + && ((!suppress_allele && DisplayAlleleName (oneafter) && StringLen (oneafter->allelename) > 0) + || (!suppress_allele && DisplayAlleleName (thisclause) && StringLen (thisclause->allelename) > 0))) { print_comma = TRUE; } @@ -8483,7 +8495,7 @@ static void ListClauses ( clause_len += 4; if (print_comma) clause_len += 2; - if (DisplayAlleleName (thisclause)) + if (!suppress_allele && DisplayAlleleName (thisclause)) { clause_len += StringLen (thisclause->allelename) + 10; if (StringLen (thisclause->allelename) > 0) @@ -8525,7 +8537,7 @@ static void ListClauses ( StringCat (clause_string, thisclause->feature_label_data.typeword); if (typeword_is_plural) StringCat (clause_string, "s"); - if (DisplayAlleleName (thisclause) + if (!suppress_allele && DisplayAlleleName (thisclause) && thisclause->allelename != NULL) { StringCat (clause_string, ", "); @@ -9014,6 +9026,7 @@ NLM_EXTERN void InitFeatureRequests ( feature_requests->suppress_locus_tags = FALSE; feature_requests->suppressed_feature_list = NULL; feature_requests->use_ncrna_note = FALSE; + feature_requests->suppress_allele = FALSE; } @@ -10487,6 +10500,10 @@ NLM_EXTERN CharPtr BuildNonFeatureListClause (BioseqPtr bsp, DefLineType feature { str = StringSave (", complete sequence."); } + else if (feature_list_type == DEFLINE_PARTIAL_SEQUENCE) + { + str = StringSave (", partial sequence."); + } else if (feature_list_type == DEFLINE_COMPLETE_GENOME) { ending_str [0] = 0; @@ -10705,7 +10722,7 @@ static CharPtr BuildFeatureClauses ( /* SmashTallClauses (feature_list, TRUE); */ clause = *feature_list; - ListClauses (clause, &strings, TRUE, FALSE); + ListClauses (clause, &strings, TRUE, FALSE, feature_requests->suppress_allele); AutoDef_AddEnding (clause, &strings, bsp, product_flag, alternate_splice_flag); @@ -10810,6 +10827,49 @@ static void BuildFeatClauseListForSegSet ( FreeListElement (sdld.parent_feature_list); } + +static Boolean Is5SList (ValNodePtr feature_list) +{ + FeatureClausePtr fcp; + Boolean is_5s_list = TRUE; + SeqFeatPtr sfp; + RnaRefPtr rrp; + + if (feature_list == NULL || feature_list->next == NULL) { + return FALSE; + } + + while (feature_list != NULL && is_5s_list) { + if (feature_list->choice != DEFLINE_CLAUSEPLUS) { + is_5s_list = FALSE; + } else if ((fcp = (FeatureClausePtr) feature_list->data.ptrvalue) == NULL) { + is_5s_list = FALSE; + } else if (fcp->featlist->choice != DEFLINE_FEATLIST + || (sfp = (SeqFeatPtr) fcp->featlist->data.ptrvalue) == NULL) { + is_5s_list = FALSE; + } else if (sfp->idx.subtype == FEATDEF_rRNA) { + rrp = (RnaRefPtr) sfp->data.value.ptrvalue; + if (rrp == NULL) { + is_5s_list = FALSE; + } else if (rrp->ext.choice != 1) { + is_5s_list = FALSE; + } else if (StringCmp (rrp->ext.value.ptrvalue, "5S ribosomal RNA") != 0) { + is_5s_list = FALSE; + } + } else if (sfp->idx.subtype == FEATDEF_misc_feature) { + if (StringCmp (sfp->comment, "nontranscribed spacer") != 0 + && StringCmp (sfp->comment, "contains 5S ribosomal RNA and nontranscribed spacer") != 0) { + is_5s_list = FALSE; + } + } else { + is_5s_list = FALSE; + } + feature_list = feature_list->next; + } + return is_5s_list; +} + + static void BuildOneFeatClauseList ( SeqEntryPtr sep, Uint2 entityID, @@ -10867,16 +10927,21 @@ static void BuildOneFeatClauseList ( if (deflist == NULL) return; deflist->sep = SeqMgrGetSeqEntryForData (bsp), deflist->bsp = bsp; - deflist->clauselist = BuildFeatureClauses (bsp, - molecule_type, - SeqMgrGetSeqEntryForData (bsp), - &head, - FALSE, - NULL, - product_flag, - alternate_splice_flag, - gene_cluster_opp_strand, - feature_requests); + if (Is5SList(head)) { + deflist->clauselist = StringSave ("5S ribosomal RNA gene region"); + } else { + deflist->clauselist = BuildFeatureClauses (bsp, + molecule_type, + SeqMgrGetSeqEntryForData (bsp), + &head, + FALSE, + NULL, + product_flag, + alternate_splice_flag, + gene_cluster_opp_strand, + feature_requests); + } + vnp = ValNodeNew (*list); if (vnp == NULL) return; if (*list == NULL) *list = vnp; @@ -11351,7 +11416,7 @@ static Boolean UseHaplotype (OrganismDescriptionModifiersPtr odmp, ValNodePtr PN static Boolean UseAutoDefId (OrganismDescriptionModifiersPtr odmp, ValNodePtr PNTR mod_list, ModifierItemLocalPtr available) { - Int4 index; + size_t index; ValNodePtr vnp; Boolean found, changed = FALSE; ModifierItemLocalPtr cpy; @@ -11822,7 +11887,8 @@ NLM_EXTERN DefLineClauseOptionsPtr MakeFeatureRequestsMatchExpectedTitle (Bioseq { SeqEntryPtr sep; ValNodePtr defline_clauses = NULL; - Int4 index, mod_index; + size_t index; + Int4 mod_index; ValNodePtr best_modifier_indices, default_modifier_indices, modifier_indices = NULL, tmp_mod_list; ValNodePtr vnp; ModifierItemLocalPtr modList; @@ -12222,6 +12288,33 @@ NLM_EXTERN void BuildDefinitionLinesFromFeatureClauseLists ( } } +NLM_EXTERN void BuildDefLinesFromFeatClauseListsForOneBsp ( + ValNodePtr list, + ModifierItemLocalPtr modList, + ValNodePtr modifier_indices, + OrganismDescriptionModifiersPtr odmp, + BioseqPtr bsp +) +{ + ValNodePtr vnp; + DefLineFeatClausePtr defline_featclause; + CharPtr tmp_str; + + for (vnp = list; vnp != NULL; vnp = vnp->next) + { + if (vnp->data.ptrvalue != NULL) + { + defline_featclause = vnp->data.ptrvalue; + if (defline_featclause == NULL) continue; + if (defline_featclause->bsp != bsp) continue; + tmp_str = BuildOneDefinitionLine (defline_featclause->sep, defline_featclause->bsp, + defline_featclause->clauselist, + modList, modifier_indices, odmp); + ReplaceDefinitionLine (defline_featclause->sep, tmp_str); + } + } +} + /* This removes redundant titles on nuc-prot sets, which will not be * visible in the flat file if all sequences in the nuc-prot set have @@ -12300,6 +12393,40 @@ NLM_EXTERN void RemoveProteinTitles (SeqEntryPtr sep) DeleteMarkedObjects (entityID, 0, NULL); } +static void MRnaTitleRemoveProc (BioseqPtr bsp, Pointer userdata) + +{ + MolInfoPtr mip; + ObjValNodePtr ovp; + SeqDescrPtr sdp; + + if (bsp == NULL) return; + if (! ISA_na (bsp->mol)) return; + + sdp = GetNextDescriptorUnindexed (bsp, Seq_descr_molinfo, NULL); + if (sdp == NULL) return; + mip = (MolInfoPtr) sdp->data.ptrvalue; + if (mip == NULL || mip->biomol != MOLECULE_TYPE_MRNA) return; + + for (sdp = bsp->descr; sdp != NULL; sdp = sdp->next) { + if (sdp->choice == Seq_descr_title && sdp->extended) { + ovp = (ObjValNodePtr) sdp; + ovp->idx.deleteme = TRUE; + } + } +} + +NLM_EXTERN void RemoveMRnaTitles (SeqEntryPtr sep) + +{ + Uint2 entityID; + + if (sep == NULL) return; + VisitBioseqsInSep (sep, NULL, MRnaTitleRemoveProc); + entityID = ObjMgrGetEntityIDForChoice (sep); + DeleteMarkedObjects (entityID, 0, NULL); +} + typedef struct popsetdefline { DeflineFeatureRequestListPtr feature_requests; @@ -12448,6 +12575,8 @@ NLM_EXTERN void AddPopsetTitles PopsetDeflineData pop; pop.feature_requests = feature_requests; + /* forcibly suppress alleles in popset titles */ + pop.feature_requests->suppress_allele = TRUE; pop.product_flag = product_flag; pop.alternate_splice_flag = alternate_splice_flag; pop.gene_cluster_opp_strand = gene_cluster_opp_strand; @@ -14598,6 +14727,36 @@ NLM_EXTERN SeqFeatPtr GetProtFeature (BioseqPtr protbsp) } +NLM_EXTERN Boolean ConvertMiscFeatToGene (SeqFeatPtr sfp) +{ + GeneRefPtr grp; + CharPtr cp; + + if (sfp == NULL || sfp->idx.subtype != FEATDEF_misc_feature) { + return FALSE; + } + sfp->data.value.ptrvalue = ImpFeatFree (sfp->data.value.ptrvalue); + grp = GeneRefNew (); + sfp->data.value.ptrvalue = grp; + sfp->data.choice = SEQFEAT_GENE; + sfp->idx.subtype = 0; + + if (!StringHasNoText (sfp->comment)) { + cp = StringChr (sfp->comment, ';'); + if (cp != NULL) { + *cp = 0; + } + grp->locus = StringSave (sfp->comment); + if (cp != NULL) { + cp = StringSave (cp + 1); + } + sfp->comment = MemFree (sfp->comment); + sfp->comment = cp; + } + return TRUE; +} + + NLM_EXTERN Boolean ConvertMiscFeatToCodingRegion (SeqFeatPtr sfp) { BioseqPtr bsp, prot_bsp; @@ -16776,6 +16935,154 @@ NLM_EXTERN CharPtr ReformatDateStringEx (CharPtr orig_date, Boolean month_first, } +NLM_EXTERN CharPtr ReformatDateWithMonthNames (CharPtr orig_date) +{ + CharPtr reformatted_date = NULL, cp; + Int4 year = 0, day = 0; + CharPtr month = NULL; + CharPtr token_list[3]; + Int4 token_lens[3]; + CharPtr numbers = "0123456789"; + CharPtr letters = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz"; + Int4 num_tokens = 0; + Int4 token_len; + Int4 month_token = -1; + Boolean is_num; + Int4 num_1, num_2; + + if (StringHasNoText (orig_date)) + { + return NULL; + } + + /* divide our original date into tokens */ + /* skip over any leading spaces */ + cp = orig_date; + while (*cp != 0 && num_tokens < 3) + { + is_num = FALSE; + token_len = StringSpn (cp, numbers); + if (token_len == 0) + { + token_len = StringSpn (cp, letters); + } + else + { + is_num = TRUE; + } + if (token_len == 0) + { + cp++; + } + else + { + if (!is_num) + { + if (month_token == -1) + { + month_token = num_tokens; + } + else + { + /* already found a month string */ + return NULL; + } + } + token_list [num_tokens] = cp; + token_lens [num_tokens] = token_len; + num_tokens ++; + cp += token_len; + } + } + + if (num_tokens == 0 || *cp != 0 || month_token == -1 || num_tokens < 2) + { + return NULL; + } + + if (num_tokens == 2) + { + if (month_token == 0) + { + month = GetMonthFromToken (token_list [0], token_lens [0]); + year = GetYearFromToken (token_list [1], token_lens [1]); + } + else if (month_token == 1) + { + month = GetMonthFromToken (token_list [1], token_lens [1]); + year = GetYearFromToken (token_list [0], token_lens [0]); + } + else + { + return NULL; + } + } + else if (num_tokens == 3) + { + if (month_token == 0) + { + month = GetMonthFromToken (token_list [0], token_lens [0]); + num_1 = ReadNumberFromToken (token_list [1], token_lens [1]); + num_2 = ReadNumberFromToken (token_list [2], token_lens [2]); + if (!ChooseDayAndYear (num_1, num_2, month, FALSE, &day, &year)) + { + return NULL; + } + } + else if (month_token == 1) + { + month = GetMonthFromToken (token_list [1], token_lens [1]); + num_1 = ReadNumberFromToken (token_list [0], token_lens [0]); + num_2 = ReadNumberFromToken (token_list [2], token_lens [2]); + if (!ChooseDayAndYear (num_1, num_2, month, FALSE, &day, &year)) + { + return NULL; + } + } + else if (month_token == 2) + { + month = GetMonthFromToken (token_list [2], token_lens [2]); + num_1 = ReadNumberFromToken (token_list [0], token_lens [0]); + num_2 = ReadNumberFromToken (token_list [1], token_lens [1]); + if (!ChooseDayAndYear (num_1, num_2, month, FALSE, &day, &year)) + { + return NULL; + } + } + else + { + return NULL; + } + year = GetYearFromNumber(year); + } + + if (month == NULL && day > 0) + { + return NULL; + } + + reformatted_date = (CharPtr) MemNew (sizeof (Char) * 12); + if (reformatted_date == NULL) + { + return NULL; + } + + if (month == NULL) + { + sprintf (reformatted_date, "%d", year); + } + else if (day == 0) + { + sprintf (reformatted_date, "%s-%d", month, year); + } + else + { + sprintf (reformatted_date, "%02d-%s-%d", day, month, year); + } + return reformatted_date; +} + + NLM_EXTERN Boolean CreateMatPeptideFromCDS (SeqFeatPtr sfp) { SeqFeatPtr orig_prot, new_prot; @@ -16817,6 +17124,7 @@ NLM_EXTERN Boolean ConvertCDSToMatPeptideForOverlappingCDS (SeqFeatPtr sfp, SeqF ProtRefPtr prp; Int4 frame; Boolean rval = FALSE; + Boolean partial5, partial3; if (sfp == NULL || top_cds == NULL || sfp->data.choice != SEQFEAT_CDREGION || top_cds->data.choice != SEQFEAT_CDREGION) { return FALSE; @@ -16827,7 +17135,8 @@ NLM_EXTERN Boolean ConvertCDSToMatPeptideForOverlappingCDS (SeqFeatPtr sfp, SeqF { crp = (CdRegionPtr) sfp->data.value.ptrvalue; - prot_loc = dnaLoc_to_aaLoc(top_cds, sfp->location, TRUE, &frame, TRUE); + CheckSeqLocForPartial (sfp->location, &partial5, &partial3); + prot_loc = dnaLoc_to_aaLoc(top_cds, sfp->location, TRUE, &frame, !partial3); if (prot_loc != NULL) { /* Create new feature on prot_bsp */ @@ -17016,82 +17325,105 @@ NLM_EXTERN void AddNewUniqueAnnotations (SeqAnnotPtr PNTR new_set, SeqAnnotPtr p } } -static void AddCategorySeqEntriesToSet (BioseqSetPtr newset, ClickableItemPtr category) + +static void AddItemListToSet (ValNodePtr item_list, BioseqSetPtr newset, Boolean for_segregate) { ValNodePtr vnp_item; SeqEntryPtr sep, last_sep, prev_sep, remove_sep; BioseqSetPtr bssp, orig_parent; BioseqPtr bsp; - if (newset == NULL || category == NULL || category->item_list == NULL) return; + if (newset == NULL || item_list == NULL) return; - if (category->chosen) { - last_sep = newset->seq_set; - while (last_sep != NULL && last_sep->next != NULL) { - last_sep = last_sep->next; - } - - for (vnp_item = category->item_list; vnp_item != NULL; vnp_item = vnp_item->next) { - sep = GetBestSeqEntryForItem (vnp_item); - if (sep == NULL || sep->data.ptrvalue == NULL) continue; - orig_parent = NULL; - if (IS_Bioseq (sep)) { - bsp = sep->data.ptrvalue; - if (bsp->idx.parenttype == OBJ_BIOSEQSET) { - orig_parent = bsp->idx.parentptr; - bsp->idx.parentptr = NULL; - } - } else if (IS_Bioseq_set (sep)) { - bssp = sep->data.ptrvalue; - if (bssp->idx.parenttype == OBJ_BIOSEQSET) { - orig_parent = bssp->idx.parentptr; - bssp->idx.parentptr = NULL; - } - } else { - continue; + last_sep = newset->seq_set; + while (last_sep != NULL && last_sep->next != NULL) { + last_sep = last_sep->next; + } + + for (vnp_item = item_list; vnp_item != NULL; vnp_item = vnp_item->next) { + sep = GetBestSeqEntryForItem (vnp_item); + if (sep == NULL || sep->data.ptrvalue == NULL) continue; + orig_parent = NULL; + bsp = NULL; + bssp = NULL; + if (IS_Bioseq (sep)) { + bsp = sep->data.ptrvalue; + if (bsp->idx.parenttype == OBJ_BIOSEQSET) { + orig_parent = bsp->idx.parentptr; + bsp->idx.parentptr = NULL; } - - if (orig_parent != NULL) { - /* remove this seq-entry from the original parent */ - prev_sep = NULL; - for (remove_sep = orig_parent->seq_set; - remove_sep != NULL && remove_sep != sep; - remove_sep = remove_sep->next) { - prev_sep = remove_sep; - } - if (remove_sep == sep) { - if (prev_sep == NULL) { - orig_parent->seq_set = orig_parent->seq_set->next; - if (orig_parent->seq_set == NULL) { - orig_parent->idx.deleteme = TRUE; - } - } else { - prev_sep->next = sep->next; + } else if (IS_Bioseq_set (sep)) { + bssp = sep->data.ptrvalue; + if (bssp->idx.parenttype == OBJ_BIOSEQSET) { + orig_parent = bssp->idx.parentptr; + bssp->idx.parentptr = NULL; + } + } else { + continue; + } + + if (orig_parent != NULL) { + /* remove this seq-entry from the original parent */ + prev_sep = NULL; + for (remove_sep = orig_parent->seq_set; + remove_sep != NULL && remove_sep != sep; + remove_sep = remove_sep->next) { + prev_sep = remove_sep; + } + if (remove_sep == sep) { + if (prev_sep == NULL) { + orig_parent->seq_set = orig_parent->seq_set->next; + if (orig_parent->seq_set == NULL) { + orig_parent->idx.deleteme = TRUE; } + } else { + prev_sep->next = sep->next; } - /* set class type if not already set */ - if (newset->_class == BioseqseqSet_class_genbank) { - newset->_class = orig_parent->_class; - } } - if (orig_parent != NULL) { + /* set class type if not already set */ + if (newset->_class == BioseqseqSet_class_genbank && for_segregate) { + newset->_class = orig_parent->_class; + } + } + if (orig_parent != NULL) { + if (for_segregate) { /* add descriptors from the orig_parent to the new parent */ AddNewUniqueDescriptors (&(newset->descr), orig_parent->descr); /* add annotations from the orig_parent to the new parent */ AddNewUniqueAnnotations (&(newset->annot), orig_parent->annot); - } - - /* add to new parent */ - sep->next = NULL; - if (last_sep == NULL) { - newset->seq_set = sep; } else { - last_sep->next = sep; + /* add descriptors from the orig_parent to the bioseq itself (or nuc-prot-set if that's what moved) */ + if (bsp != NULL) { + AddNewUniqueDescriptors (&(bsp->descr), orig_parent->descr); + } else if (bssp != NULL) { + AddNewUniqueDescriptors (&(bssp->descr), orig_parent->descr); + } } - last_sep = sep; - SeqMgrLinkSeqEntry (sep, OBJ_BIOSEQSET, newset); } + + /* add to new parent */ + sep->next = NULL; + if (last_sep == NULL) { + newset->seq_set = sep; + } else { + last_sep->next = sep; + } + last_sep = sep; + SeqMgrLinkSeqEntry (sep, OBJ_BIOSEQSET, newset); + } + +} + + +static void AddCategorySeqEntriesToSet (BioseqSetPtr newset, ClickableItemPtr category) +{ + ValNodePtr vnp_item; + + if (newset == NULL || category == NULL || category->item_list == NULL) return; + + if (category->chosen) { + AddItemListToSet (category->item_list, newset, TRUE); } else { for (vnp_item = category->subcategories; vnp_item != NULL; vnp_item = vnp_item->next) { AddCategorySeqEntriesToSet (newset, vnp_item->data.ptrvalue); @@ -17118,6 +17450,113 @@ static Boolean NeedsNewSet (SeqEntryPtr sep) return FALSE; } + +static Boolean IsSingletonSet (SeqEntryPtr sep) +{ + BioseqSetPtr bssp; + SeqAnnotPtr sap; + + if (sep == NULL + || !IS_Bioseq_set(sep) + || (bssp = (BioseqSetPtr) sep->data.ptrvalue) == NULL + || bssp->seq_set == NULL + || bssp->seq_set->next != NULL) { + return FALSE; + } + + /* not a singleton set if it has an alignment annotation */ + for (sap = bssp->annot; sap != NULL; sap = sap->next) { + if (sap->type == 2) { + return FALSE; + } + } + return TRUE; +} + + +static void AddAnnotsToSeqEntry (SeqEntryPtr sep, SeqAnnotPtr sap) +{ + BioseqPtr bsp; + BioseqSetPtr bssp; + SeqAnnotPtr last_sap; + + if (sep == NULL) { + return; + } + if (IS_Bioseq(sep)) { + bsp = (BioseqPtr) sep->data.ptrvalue; + if (bsp->annot == NULL) { + bsp->annot = sap; + } else { + last_sap = bsp->annot; + while (last_sap->next != NULL) { + last_sap = last_sap->next; + } + last_sap->next = sap; + } + } else if (IS_Bioseq_set (sep)) { + bssp = (BioseqSetPtr) sep->data.ptrvalue; + if (bssp->annot == NULL) { + bssp->annot = sap; + } else { + last_sap = bssp->annot; + while (last_sap->next != NULL) { + last_sap = last_sap->next; + } + last_sap->next = sap; + } + } +} + + +static void PromoteSingletonSetsInSet (SeqEntryPtr sep) +{ + ObjMgrDataPtr omdptop; + ObjMgrData omdata; + BioseqSetPtr bssp, child_bssp; + SeqEntryPtr sep_child, child_next, child_prev = NULL; + ValNodePtr titles; + Uint2 top_parenttype; + Pointer top_parentptr; + + if (sep == NULL || (bssp = (BioseqSetPtr) sep->data.ptrvalue) == NULL) { + return; + } + + SaveSeqEntryObjMgrData (sep, &omdptop, &omdata); + GetSeqEntryParent (sep, &top_parentptr, &top_parenttype); + + for (sep_child = bssp->seq_set; sep_child != NULL; sep_child = child_next) { + child_next = sep_child->next; + if (IsSingletonSet(sep_child)) { + child_bssp = (BioseqSetPtr) sep_child->data.ptrvalue; + /* remove set title if any */ + titles = ValNodeExtractList (&(child_bssp->descr), Seq_descr_title); + titles = SeqDescrFree (titles); + /* propagate remaining descriptors */ + SetDescriptorPropagate (child_bssp); + /* push down annotation */ + AddAnnotsToSeqEntry (child_bssp->seq_set, child_bssp->annot); + /* replace in list */ + if (child_prev == NULL) { + bssp->seq_set = child_bssp->seq_set; + } else { + child_prev->next = child_bssp->seq_set; + } + child_bssp->seq_set->next = child_next; + child_prev = child_bssp->seq_set; + child_bssp->seq_set = NULL; + sep_child = SeqEntryFree (sep_child); + } else { + child_prev = sep_child; + } + } + + SeqMgrLinkSeqEntry (sep, top_parenttype, top_parentptr); + RestoreSeqEntryObjMgrData (sep, omdptop, &omdata); +} + + NLM_EXTERN BioseqSetPtr MakeGroupsForUniqueValues (BioseqSetPtr bssp, ValNodePtr value_lists) @@ -17230,6 +17669,9 @@ NLM_EXTERN BioseqSetPtr MakeGroupsForUniqueValues parent_set->descr = SeqDescrFree (parent_set->descr); } + sep = SeqMgrGetSeqEntryForData (parent_set); + PromoteSingletonSetsInSet (sep); + ObjMgrSetDirtyFlag (entityID, TRUE); ObjMgrSendMsg (OM_MSG_UPDATE, entityID, 0, 0); @@ -17237,6 +17679,123 @@ NLM_EXTERN BioseqSetPtr MakeGroupsForUniqueValues } +static void RemoveBioseqFromAlignmentsCallback (SeqAnnotPtr sap, Pointer data) +{ + BioseqPtr bsp; + SeqAlignPtr salphead, salp, salp_next, prev_salp, remove_salp, last_remove; + SeqIdPtr sip, tmpsip; + Uint4 seqid_order; + + if (sap == NULL || sap->type != 2 + || (bsp = (BioseqPtr) data) == NULL + || (salphead = (SeqAlignPtr) sap->data) == NULL) { + return; + } + + salp = salphead; + prev_salp = NULL; + remove_salp = NULL; + last_remove = NULL; + while (salp != NULL) + { + salp_next = salp->next; + tmpsip = SeqIdPtrFromSeqAlign (salp); + seqid_order = 0; + for (sip = bsp->id; sip != NULL && seqid_order == 0; sip = sip->next) { + seqid_order = SeqIdOrderInBioseqIdList(sip, tmpsip); + } + if (seqid_order == 0) + { + /* do nothing for this subalignment */ + prev_salp = salp; + } + else if (salp->dim == 2 || salphead->segtype ==1) + { + /* This is for a pairwise alignment or a DENDIAG alignment */ + if (prev_salp == NULL) + { + salphead = salp->next; + } + else + { + prev_salp->next = salp->next; + } + /* save the alignments that we want to free in a list and get rid of them + * at the end - freeing them beforehand causes problems with listing the + * IDs in the alignment. + */ + salp->next = NULL; + if (remove_salp == NULL) + { + remove_salp = salp; + } + else + { + last_remove->next = salp; + } + last_remove = salp; + } + else + { + SeqAlignBioseqDeleteById (salphead, sip); + prev_salp = salp; + } + salp = salp_next; + } + /* Now we can free the alignment */ + SeqAlignFree (remove_salp); + + sap->data = salphead; + if (sap->data == NULL) { + sap->idx.deleteme = TRUE; + } +} + + +/* expect that list is a valnode list with choice OBJ_BIOSEQ and data.ptrvalue a bioseq */ +NLM_EXTERN void MoveSequencesFromSetToWrapper (ValNodePtr list, Uint2 entityID) +{ + ObjMgrDataPtr omdptop; + ObjMgrData omdata; + Uint2 parenttype; + Pointer parentptr; + BioseqSetPtr bssp; + SeqEntryPtr sep; + ValNodePtr vnp; + + if (list == NULL) return; + sep = GetTopSeqEntryForEntityID (entityID); + if (sep == NULL + || !IS_Bioseq_set (sep) + || (bssp = sep->data.ptrvalue) == NULL + || bssp->_class != BioseqseqSet_class_genbank) { + return; + } + + /* first, propagate descriptors */ + SetDescriptorPropagate (bssp); + + /* pull sequences out of current positions and add to top-level set */ + SaveSeqEntryObjMgrData (sep, &omdptop, &omdata); + GetSeqEntryParent (sep, &parentptr, &parenttype); + + AddItemListToSet (list, bssp, FALSE); + + RestoreSeqEntryObjMgrData (sep, omdptop, &omdata); + + /* remove sequences from alignments */ + for (vnp = list; vnp != NULL; vnp = vnp->next) + { + VisitAnnotsInSep (sep, vnp->data.ptrvalue, RemoveBioseqFromAlignmentsCallback); + } + DeleteMarkedObjects (entityID, 0, NULL); + + ObjMgrSetDirtyFlag (entityID, TRUE); + ObjMgrSendMsg (OM_MSG_UPDATE, entityID, 0, 0); + +} + + static void GetBioseqListCallback (BioseqPtr bsp, Pointer userdata) { if (bsp != NULL && userdata != NULL && ! ISA_aa (bsp->mol)) @@ -17348,55 +17907,7096 @@ NLM_EXTERN void SegregateSetsByNumberPerSet (SeqEntryPtr sep, Int4 num_per_set) } -NLM_EXTERN CharPtr CompressSpaces (CharPtr str) +typedef Boolean (*Nlm_ParseProc) PROTO ((CharPtr, Pointer)); + +static Boolean SkipToken (CharPtr cp, Pointer data) { - Char ch; - CharPtr dst; - Char last; - CharPtr ptr; + return TRUE; +} - if (str != NULL && str [0] != '\0') { - dst = str; - ptr = str; - ch = *ptr; - while (ch != '\0' && ch <= ' ') { - ptr++; - ch = *ptr; - } - while (ch != '\0') { - *dst = ch; - dst++; - ptr++; - last = ch; - ch = *ptr; - if (ch != '\0' && ch < ' ') { - *ptr = ' '; - ch = *ptr; - } - while (ch != '\0' && last <= ' ' && ch <= ' ') { - ptr++; - ch = *ptr; - } - } - *dst = '\0'; - dst = NULL; - ptr = str; - ch = *ptr; - while (ch != '\0') { - if (ch != ' ') { - dst = NULL; - } else if (dst == NULL) { - dst = ptr; + +static Boolean ParseLineOfTokens (CharPtr line, Nlm_ParseProc PNTR token_funcs, Pointer data) +{ + CharPtr cp, cp_next; + Char ch_was; + Int4 token_num = 0; + Boolean rval = TRUE; + + if (StringHasNoText (line) || token_funcs == NULL) { + return FALSE; + } + + cp = line; + cp_next = StringChr (cp, '\t'); + while (cp_next != NULL && rval && token_funcs[token_num] != NULL) { + ch_was = *cp_next; + *cp_next = 0; + rval = token_funcs[token_num] (cp, data); + *cp_next = ch_was; + cp = cp_next + 1; + cp_next = StringChr (cp, '\t'); + token_num++; + } + + if (rval && token_funcs[token_num] != NULL) { + /* last token_func for end of line */ + rval = token_funcs[token_num](cp, data); + token_num++; + while (token_funcs[token_num] != NULL && rval) { + rval = token_funcs[token_num](NULL, data); + token_num++; + } + } + return rval; +} + + +/* Output from Fungal ITS sequence extractor: + * first column is ID (after position in set) and length + * next column is ITS1 or ---- (---- means it's not there) + * next column is ITS2 or ---- (---- means it's not there) + * next column is range for ITS1 (or -----) + * next column is range for ITS2 (or -----) + * next column (if present) indicates reverse complement. + */ + +typedef struct extractorinfo { + CharPtr id; + Int4 length; + Boolean has_its1; + Boolean has_its2; + CharPtr its1_range; + CharPtr its2_range; + Boolean is_complement; +} ExtractorInfoData, PNTR ExtractorInfoPtr; + + +static ExtractorInfoPtr ExtractorInfoNew () +{ + ExtractorInfoPtr ep = (ExtractorInfoPtr) MemNew (sizeof (ExtractorInfoData)); + MemSet (ep, 0, sizeof (ExtractorInfoData)); + return ep; +} + + +static ExtractorInfoPtr ExtractorInfoFree (ExtractorInfoPtr ep) +{ + if (ep != NULL) { + ep->id = MemFree (ep->id); + ep->its1_range = MemFree (ep->its1_range); + ep->its2_range = MemFree (ep->its2_range); + ep = MemFree (ep); + } + return ep; +} + + +static Boolean ParseExtractorIdAndLength (CharPtr cp, Pointer data) +{ + ExtractorInfoPtr ep; + CharPtr div, id_start, id_end; + Int4 len; + Char ch_was; + + if (StringHasNoText (cp) || (ep = (ExtractorInfoPtr) data) == NULL) { + return FALSE; + } + /* separate length and ID */ + len = StringLen (cp); + if (len < 4) { + return FALSE; + } + if (StringCmp (cp + len - 4, " bp.") != 0) { + return FALSE; + } + + div = cp + len - 5; + while (div > cp && isdigit (*div)) { + div--; + } + if (!isdigit (*(div + 1))) { + return FALSE; + } + ep->length = atoi (div + 1); + + /* skip over the part that indicates the position of the sequence (1 of N, 2 of N, etc.) */ + id_start = StringChr (cp, ')'); + if (id_start == NULL) { + return FALSE; + } + id_start++; + while (isspace (*id_start)) { + id_start++; + } + + if (id_start >= div) { + return FALSE; + } + + /* if we have a list of IDs, truncate after just the first one */ + id_end = StringChr (id_start, '|'); + if (id_end != NULL && id_end < div) { + id_end = StringChr (id_end + 1, '|'); + if (id_end != NULL && id_end < div) { + div = id_end; + } + } + + ch_was = *div; + *div = 0; + ep->id = StringSave (id_start); + *div = ch_was; + /* trim spaces from end of ID */ + cp = ep->id + StringLen (ep->id) - 1; + while (cp > ep->id && isspace (*cp)) { + cp--; + } + *(cp + 1) = 0; + return TRUE; +} + + +static Boolean ParseHasITS1 (CharPtr cp, Pointer data) +{ + ExtractorInfoPtr ep; + + if (StringHasNoText (cp) || (ep = (ExtractorInfoPtr) data) == NULL) { + return FALSE; + } + + if (StringCmp (cp, "ITS1") == 0) { + ep->has_its1 = TRUE; + } else if (StringCmp (cp, "----") == 0) { + ep->has_its1 = FALSE; + } else { + return FALSE; + } + + return TRUE; +} + + +static Boolean ParseHasITS2 (CharPtr cp, Pointer data) +{ + ExtractorInfoPtr ep; + + if (StringHasNoText (cp) || (ep = (ExtractorInfoPtr) data) == NULL) { + return FALSE; + } + + if (StringCmp (cp, "ITS2") == 0) { + ep->has_its2 = TRUE; + } else if (StringCmp (cp, "----") == 0) { + ep->has_its2 = FALSE; + } else { + return FALSE; + } + + return TRUE; +} + + +static Boolean ParseITS1Range (CharPtr cp, Pointer data) +{ + ExtractorInfoPtr ep; + Boolean rval = TRUE; + + if (StringHasNoText (cp) || (ep = (ExtractorInfoPtr) data) == NULL) { + return FALSE; + } + + if (*cp == '-') { + if (ep->has_its1) { + rval = FALSE; + } else { + rval = TRUE; + } + } else if (StringNCmp (cp, "ITS1: ", 6) == 0) { + if (ep->has_its1) { + ep->its1_range = StringSave (cp + 6); + } else { + rval = FALSE; + } + } else { + rval = FALSE; + } + return rval; +} + + +static Boolean ParseITS2Range (CharPtr cp, Pointer data) +{ + ExtractorInfoPtr ep; + Boolean rval = TRUE; + + if (StringHasNoText (cp) || (ep = (ExtractorInfoPtr) data) == NULL) { + return FALSE; + } + + if (*cp == '-') { + if (ep->has_its2) { + rval = FALSE; + } else { + rval = TRUE; + } + } else if (StringNCmp (cp, "ITS2: ", 6) == 0) { + if (ep->has_its2) { + ep->its2_range = StringSave (cp + 6); + } else { + rval = FALSE; + } + } else { + rval = FALSE; + } + return rval; +} + + +static Boolean ParseIsComplement (CharPtr cp, Pointer data) +{ + ExtractorInfoPtr ep; + Boolean rval = TRUE; + + if ((ep = (ExtractorInfoPtr) data) == NULL) { + return FALSE; + } + if (StringHasNoText (cp)) { + ep->is_complement = FALSE; + } else if (StringNCmp (cp, "Reverse complementary", 21) == 0) { + ep->is_complement = TRUE; + } else { + rval = FALSE; + } + return rval; +} + + +static Nlm_ParseProc token_parsers[] = { + ParseExtractorIdAndLength, + ParseHasITS1, + ParseHasITS2, + SkipToken, + SkipToken, + ParseITS1Range, + ParseITS2Range, + ParseIsComplement, + NULL}; + + +typedef enum { + eExtractorFeat18S = 0, + eExtractorFeatITS1, + eExtractorFeat58S, + eExtractorFeatITS2, + eExtractorFeat28S +} EExtractorFeat; + +CharPtr extractor_feature_labels[] = { + "18S ribosomal RNA", + "internal transcribed spacer 1", + "5.8S ribosomal RNA", + "internal transcribed spacer 2", + "28S ribosomal RNA" +}; + + +static CharPtr MakeLabelFromExtractorInfo (ExtractorInfoPtr ep) +{ + Boolean feat_present[5]; + CharPtr cp, label; + Int4 len, i, num_feat = 0, feat_num = 0; + + if (ep == NULL) { + return NULL; + } + + MemSet (feat_present, 0, sizeof (feat_present)); + if (ep->has_its1) { + feat_present[eExtractorFeatITS1] = TRUE; + if (StringNCmp (ep->its1_range, "1-", 2) == 0) { + feat_present[eExtractorFeat18S] = FALSE; + } else { + feat_present[eExtractorFeat18S] = TRUE; + } + if (ep->has_its2) { + feat_present[eExtractorFeat58S] = TRUE; + feat_present[eExtractorFeatITS2] = TRUE; + cp = StringChr (ep->its2_range, '-'); + if (cp != NULL && StringCmp (cp + 1, "end") == 0) { + feat_present[eExtractorFeat28S] = FALSE; + } else { + feat_present[eExtractorFeat28S] = TRUE; } - ptr++; - ch = *ptr; + } else { + cp = StringChr (ep->its1_range, '-'); + if (cp != NULL && StringCmp (cp + 1, "end") == 0) { + feat_present[eExtractorFeat58S] = FALSE; + } else { + feat_present[eExtractorFeat58S] = TRUE; + } + feat_present[eExtractorFeatITS2] = FALSE; + feat_present[eExtractorFeat28S] = FALSE; } - if (dst != NULL) { - *dst = '\0'; + } else { + feat_present[eExtractorFeat18S] = FALSE; + feat_present[eExtractorFeatITS1] = FALSE; + if (StringNCmp (ep->its2_range, "1-", 2) == 0) { + feat_present[eExtractorFeat58S] = FALSE; + } else { + feat_present[eExtractorFeat58S] = TRUE; + } + feat_present[eExtractorFeatITS2] = TRUE; + cp = StringChr (ep->its2_range, '-'); + if (cp != NULL && StringCmp (cp + 1, "end") == 0) { + feat_present[eExtractorFeat28S] = FALSE; + } else { + feat_present[eExtractorFeat28S] = TRUE; } } - return str; + + len = 15; + for (i = 0; i < 5; i++) { + if (feat_present[i]) { + len += StringLen (extractor_feature_labels[i]) + 2; + num_feat++; + } else if (num_feat > 0) { + break; + } + } + label = (CharPtr) MemNew (sizeof (Char) * len); + sprintf (label, "contains "); + for (i = 0; i < 5; i++) { + if (feat_present[i]) { + if (feat_num > 0) { + if (feat_num == num_feat - 1) { + if (num_feat == 2) { + StringCat (label, " and "); + } else { + StringCat (label, ", and "); + } + } else { + StringCat (label, ", "); + } + } + StringCat (label, extractor_feature_labels[i]); + feat_num++; + } else if (feat_num > 0) { + break; + } + } + return label; +} + + +NLM_EXTERN void RevCompFeats (SeqEntryPtr sep, Pointer mydata, Int4 index, Int2 indent) + +{ + BioseqPtr bsp; + BioseqSetPtr bssp; + SeqAnnotPtr sap; + SeqFeatPtr sfp; + + if (mydata == NULL) return; + if (sep == NULL || sep->data.ptrvalue == NULL) return; + if (IS_Bioseq (sep)) { + bsp = (BioseqPtr) sep->data.ptrvalue; + sap = bsp->annot; + } else if (IS_Bioseq_set (sep)) { + bssp = (BioseqSetPtr) sep->data.ptrvalue; + sap = bssp->annot; + } else return; + bsp = (BioseqPtr) mydata; + if (bsp == NULL) return; + if (! ISA_na (bsp->mol)) return; + while (sap != NULL) { + if (sap->type == 1) { + sfp = (SeqFeatPtr) sap->data; + while (sfp != NULL) { + RevCompOneFeatForBioseq (sfp, bsp); + sfp = sfp->next; + } + } + sap = sap->next; + } } +static SeqFeatPtr ParseExtractorResultRowToFeatures (CharPtr line, SeqEntryPtr sep) +{ + ExtractorInfoPtr ep; + SeqFeatPtr sfp = NULL; + CharPtr label; + Int4 len; + SeqIdPtr sip; + BioseqPtr bsp; + RnaRefPtr rrp; + RNAGenPtr rgp; + + if (StringHasNoText (line)) { + return NULL; + } + + ep = ExtractorInfoNew (); + if (!ParseLineOfTokens(line, token_parsers, ep)) { + ep = ExtractorInfoFree (ep); + Message (MSG_POSTERR, "Unable to parse extractor line %s", line); + return NULL; + } + if (!ep->has_its1 && !ep->has_its2) { + ep = ExtractorInfoFree (ep); + Message (MSG_POSTERR, "Unable to determine feature list for line %s", line); + return NULL; + } + + /* figure out ID */ + len = StringLen (ep->id); + if (len > 3 && ep->id[len - 1] == '.' && ep->id[len - 2] == '.' && ep->id[len - 3] == '.') { + ep = ExtractorInfoFree (ep); + Message (MSG_POSTERR, "ID was truncated for line %s", line); + return NULL; + } + sip = CreateSeqIdFromText (ep->id, sep); + bsp = BioseqFind (sip); + sip = SeqIdFree (sip); + if (bsp == NULL) { + ep = ExtractorInfoFree (ep); + Message (MSG_POSTERR, "ID for sequence not present in record in line %s", line); + return NULL; + } + + + /* calculate label */ + label = MakeLabelFromExtractorInfo(ep); + + if (ep->is_complement) { + BioseqRevComp (bsp); + SeqEntryExplore (sep, (Pointer) bsp, RevCompFeats); + } + + /* make feature and attach to appropriate annots */ + sfp = CreateNewFeatureOnBioseq (bsp, SEQFEAT_RNA, NULL); + rrp = RnaRefNew (); + sfp->data.value.ptrvalue = rrp; + rrp->type = 255; + rgp = RNAGenNew (); + rrp->ext.choice = 3; + rrp->ext.value.ptrvalue = rgp; + sfp->comment = label; + SetSeqLocPartial (sfp->location, TRUE, TRUE); + + ep = ExtractorInfoFree (ep); + return sfp; +} + +NLM_EXTERN void ParseExtractorResultsTableToFeatures (FILE *fp, SeqEntryPtr sep) +{ + ReadBufferData rbd; + CharPtr line; + + rbd.fp = fp; + rbd.current_data = NULL; + line = AbstractReadFunction (&rbd); + while (line != NULL && line[0] != EOF) { + /* TODO: skip intro lines */ + ParseExtractorResultRowToFeatures(line, sep); + line = MemFree (line); + line = AbstractReadFunction (&rbd); + } +} + +extern CharPtr latlon_onedegree []; +CharPtr latlon_onedegree [] = { + "1", + "Afghanistan", + "\t39\t69\t72", + "\t38\t63\t75", + "\t37\t62\t75", + "\t36\t60\t75", + "\t35\t59\t75", + "\t34\t59\t72", + "\t33\t59\t72", + "\t32\t59\t71", + "\t31\t59\t70", + "\t30\t59\t70", + "\t29\t59\t67", + "\t28\t59\t67", + "Albania", + "\t43\t18\t21", + "\t42\t18\t21", + "\t41\t18\t22", + "\t40\t18\t22", + "\t39\t18\t22", + "\t38\t18\t21", + "Algeria", + "\t38\t5\t8", + "\t37\t-1\t9", + "\t36\t-3\t9", + "\t35\t-3\t9", + "\t34\t-3\t9", + "\t33\t-3\t10", + "\t32\t-4\t10", + "\t31\t-6\t10", + "\t30\t-9\t10", + "\t29\t-9\t10", + "\t28\t-9\t10", + "\t27\t-9\t10", + "\t26\t-9\t11", + "\t25\t-9\t12", + "\t24\t-7\t12", + "\t23\t-5\t12", + "\t22\t-4\t12", + "\t21\t-2\t12", + "\t20\t-1\t10", + "\t19\t0\t8", + "\t18\t1\t7", + "\t17\t2\t4", + "American Samoa", + "\t-10\t-172\t-170", + "\t-11\t-172\t-170", + "\t-12\t-172\t-170", + "\t-13\t-171\t-167", + "\t-14\t-171\t-167", + "\t-15\t-171\t-167", + "Andorra", + "\t43\t0\t2", + "\t42\t0\t2", + "\t41\t0\t2", + "Angola", + "\t-3\t11\t14", + "\t-4\t11\t17", + "\t-5\t11\t17\t19\t21", + "\t-6\t11\t22", + "\t-7\t11\t22", + "\t-8\t11\t23", + "\t-9\t11\t25", + "\t-10\t11\t25", + "\t-11\t11\t25", + "\t-12\t11\t25", + "\t-13\t11\t25", + "\t-14\t10\t25", + "\t-15\t10\t23", + "\t-16\t10\t24", + "\t-17\t10\t24", + "\t-18\t10\t24", + "\t-19\t19\t22", + "Anguilla", + "\t19\t-64\t-61", + "\t18\t-64\t-61", + "\t17\t-64\t-61", + "Antarctica", + "\t-59\t-47\t-43", + "\t-60\t-59\t-53\t-47\t-43", + "\t-61\t-62\t-53\t-47\t-43", + "\t-62\t-62\t-53", + "\t-63\t-65\t-54", + "\t-64\t-66\t-54\t51\t56\t99\t104\t110\t114", + "\t-65\t-69\t-56\t47\t58\t86\t117\t119\t144", + "\t-66\t-70\t-59\t42\t70\t79\t147", + "\t-67\t-91\t-89\t-73\t-59\t31\t35\t38\t71\t76\t156", + "\t-68\t-91\t-89\t-76\t-60\t31\t161", + "\t-69\t-91\t-89\t-77\t-60\t-11\t168", + "\t-70\t-103\t-95\t-77\t-59\t-13\t171", + "\t-71\t-106\t-87\t-81\t-79\t-77\t-58\t-15\t171", + "\t-72\t-128\t-117\t-115\t-112\t-106\t-58\t-22\t-19\t-17\t171", + "\t-73\t-137\t-109\t-106\t-58\t-23\t171", + "\t-74\t-147\t-58\t-27\t170", + "\t-75\t-150\t-59\t-32\t166", + "\t-76\t-159\t-62\t-48\t-44\t-36\t170", + "\t-77\t-165\t-65\t-51\t-42\t-37\t170", + "\t-78\t-165\t-64\t-62\t-58\t-52\t-41\t-37\t170", + "\t-79\t-165\t-58\t-55\t168", + "\t-80\t-165\t-58\t-55\t164", + "\t-81\t-175\t-170\t-164\t169", + "\t-82\t-175\t177", + "\t-83\t-180\t180", + "\t-84\t-180\t180", + "\t-85\t-180\t180", + "\t-86\t-180\t180", + "\t-87\t-180\t180", + "\t-88\t-180\t180", + "\t-89\t-180\t180", + "\t-90\t-180\t180", + "\t-90\t-180\t180", + "Antigua and Barbuda", + "\t18\t-62\t-60", + "\t17\t-62\t-60", + "\t16\t-62\t-60", + "\t15\t-62\t-60", + "Argentina", + "\t-20\t-67\t-61", + "\t-21\t-68\t-60", + "\t-22\t-68\t-59", + "\t-23\t-69\t-57", + "\t-24\t-69\t-52", + "\t-25\t-69\t-52", + "\t-26\t-70\t-52", + "\t-27\t-70\t-52", + "\t-28\t-71\t-52", + "\t-29\t-71\t-54", + "\t-30\t-71\t-55", + "\t-31\t-71\t-56", + "\t-32\t-71\t-56", + "\t-33\t-71\t-56", + "\t-34\t-71\t-56", + "\t-35\t-72\t-55", + "\t-36\t-72\t-55", + "\t-37\t-72\t-55", + "\t-38\t-72\t-55", + "\t-39\t-72\t-56", + "\t-40\t-72\t-60", + "\t-41\t-73\t-61", + "\t-42\t-73\t-61", + "\t-43\t-73\t-62", + "\t-44\t-73\t-63", + "\t-45\t-73\t-64", + "\t-46\t-73\t-64", + "\t-47\t-74\t-64", + "\t-48\t-74\t-64", + "\t-49\t-74\t-64", + "\t-50\t-74\t-66", + "\t-51\t-74\t-66", + "\t-52\t-73\t-66", + "\t-53\t-71\t-62", + "\t-54\t-69\t-62", + "\t-55\t-69\t-62", + "\t-56\t-67\t-65", + "Armenia", + "\t42\t42\t46", + "\t41\t42\t46", + "\t40\t42\t47", + "\t39\t42\t47", + "\t38\t43\t47", + "\t37\t45\t47", + "Aruba", + "\t13\t-71\t-68", + "\t12\t-71\t-68", + "\t11\t-71\t-68", + "Ashmore and Cartier Islands", + "\t-11\t122\t124", + "\t-12\t122\t124", + "\t-13\t122\t124", + "Australia", + "\t-8\t141\t143", + "\t-9\t131\t133\t141\t143", + "\t-10\t129\t137\t140\t144", + "\t-11\t129\t137\t140\t144", + "\t-12\t124\t137\t140\t144", + "\t-13\t123\t137\t140\t146", + "\t-14\t123\t138\t140\t146", + "\t-15\t121\t146", + "\t-16\t121\t147", + "\t-17\t120\t147", + "\t-18\t118\t149", + "\t-19\t114\t150", + "\t-20\t112\t151", + "\t-21\t112\t151", + "\t-22\t112\t152", + "\t-23\t112\t154", + "\t-24\t111\t154", + "\t-25\t111\t154", + "\t-26\t111\t154", + "\t-27\t112\t154", + "\t-28\t112\t154", + "\t-29\t113\t154", + "\t-30\t113\t154\t158\t160", + "\t-31\t113\t154\t158\t160", + "\t-32\t113\t154\t158\t160", + "\t-33\t113\t129\t131\t153", + "\t-34\t113\t125\t133\t152", + "\t-35\t113\t124\t134\t152", + "\t-36\t115\t119\t134\t151", + "\t-37\t135\t151", + "\t-38\t138\t151", + "\t-39\t139\t149", + "\t-40\t142\t149", + "\t-41\t142\t149", + "\t-42\t143\t149", + "\t-43\t144\t149", + "\t-44\t144\t149", + "\t-53\t157\t159", + "\t-54\t157\t159", + "\t-55\t157\t159", + "Australia: Australian Capital Territory", + "\t-34\t147\t150", + "\t-35\t147\t150", + "\t-36\t147\t150", + "Australia: Jervis Bay Territory", + "\t-34\t149\t151", + "\t-35\t149\t151", + "\t-36\t149\t151", + "Australia: New South Wales", + "\t-27\t147\t154", + "\t-28\t140\t154", + "\t-29\t140\t154", + "\t-30\t140\t154", + "\t-31\t140\t154", + "\t-32\t140\t154", + "\t-33\t140\t153", + "\t-34\t140\t152", + "\t-35\t140\t152", + "\t-36\t142\t151", + "\t-37\t143\t151", + "\t-38\t147\t151", + "Australia: Northern Territory", + "\t-9\t131\t133", + "\t-10\t129\t137", + "\t-11\t129\t137", + "\t-12\t128\t137", + "\t-13\t128\t137", + "\t-14\t128\t138", + "\t-15\t128\t139", + "\t-16\t128\t139", + "\t-17\t128\t139", + "\t-18\t128\t139", + "\t-19\t128\t139", + "\t-20\t128\t139", + "\t-21\t128\t139", + "\t-22\t128\t139", + "\t-23\t128\t139", + "\t-24\t128\t139", + "\t-25\t128\t139", + "\t-26\t128\t139", + "\t-27\t128\t139", + "Australia: Queensland", + "\t-9\t141\t143", + "\t-10\t140\t144", + "\t-11\t140\t144", + "\t-12\t140\t144", + "\t-13\t140\t146", + "\t-14\t140\t146", + "\t-15\t137\t146", + "\t-16\t137\t147", + "\t-17\t137\t147", + "\t-18\t137\t149", + "\t-19\t137\t150", + "\t-20\t137\t151", + "\t-21\t137\t151", + "\t-22\t137\t152", + "\t-23\t137\t154", + "\t-24\t137\t154", + "\t-25\t137\t154", + "\t-26\t137\t154", + "\t-27\t137\t154", + "\t-28\t140\t154", + "\t-29\t140\t154", + "\t-30\t140\t152", + "Australia: South Australia", + "\t-25\t128\t142", + "\t-26\t128\t142", + "\t-27\t128\t142", + "\t-28\t128\t142", + "\t-29\t128\t142", + "\t-30\t128\t142", + "\t-31\t128\t142", + "\t-32\t128\t142", + "\t-33\t131\t142", + "\t-34\t133\t142", + "\t-35\t134\t142", + "\t-36\t134\t141", + "\t-37\t135\t141", + "\t-38\t138\t141", + "\t-39\t139\t141", + "Australia: Tasmania", + "\t-38\t142\t149", + "\t-39\t142\t149", + "\t-40\t142\t149", + "\t-41\t142\t149", + "\t-42\t143\t149", + "\t-43\t144\t149", + "\t-44\t144\t149", + "Australia: Victoria", + "\t-32\t139\t141", + "\t-33\t139\t144", + "\t-34\t139\t148", + "\t-35\t139\t149", + "\t-36\t139\t150", + "\t-37\t139\t150", + "\t-38\t139\t150", + "\t-39\t139\t148", + "\t-40\t145\t147", + "Australia: Western Australia", + "\t-12\t124\t128", + "\t-13\t123\t130", + "\t-14\t123\t130", + "\t-15\t121\t130", + "\t-16\t121\t130", + "\t-17\t120\t130", + "\t-18\t118\t130", + "\t-19\t114\t130", + "\t-20\t112\t130", + "\t-21\t112\t130", + "\t-22\t112\t130", + "\t-23\t112\t130", + "\t-24\t111\t130", + "\t-25\t111\t130", + "\t-26\t111\t130", + "\t-27\t112\t130", + "\t-28\t112\t130", + "\t-29\t113\t130", + "\t-30\t113\t130", + "\t-31\t113\t130", + "\t-32\t113\t130", + "\t-33\t113\t129", + "\t-34\t113\t125", + "\t-35\t113\t124", + "\t-36\t115\t119", + "Austria", + "\t50\t13\t16", + "\t49\t11\t18", + "\t48\t8\t18", + "\t47\t8\t18", + "\t46\t8\t18", + "\t45\t8\t17", + "Azerbaijan", + "\t42\t43\t50", + "\t41\t43\t51", + "\t40\t43\t51", + "\t39\t43\t51", + "\t38\t43\t50", + "\t37\t44\t50", + "Bahamas", + "\t27\t-79\t-76", + "\t26\t-80\t-75", + "\t25\t-80\t-73", + "\t24\t-80\t-72", + "\t23\t-80\t-71", + "\t22\t-80\t-71", + "\t21\t-76\t-71", + "\t20\t-74\t-71", + "\t19\t-74\t-72", + "Bahrain", + "\t27\t49\t51", + "\t26\t49\t51", + "\t25\t49\t51", + "\t24\t49\t51", + "Baker Island", + "\t1\t-177\t-175", + "\t0\t-177\t-175", + "\t-1\t-177\t-175", + "Bangladesh", + "\t27\t87\t90", + "\t26\t87\t93", + "\t25\t87\t93", + "\t24\t87\t93", + "\t23\t87\t93", + "\t22\t87\t93", + "\t21\t87\t93", + "\t20\t88\t93", + "\t19\t91\t93", + "Barbados", + "\t14\t-60\t-58", + "\t13\t-60\t-58", + "\t12\t-60\t-58", + "Bassas da India", + "\t-20\t38\t40", + "\t-21\t38\t40", + "\t-22\t38\t40", + "Belarus", + "\t57\t26\t30", + "\t56\t25\t32", + "\t55\t23\t32", + "\t54\t22\t33", + "\t53\t22\t33", + "\t52\t22\t33", + "\t51\t22\t32", + "\t50\t22\t31", + "Belgium", + "\t52\t1\t6", + "\t51\t1\t7", + "\t50\t1\t7", + "\t49\t1\t7", + "\t48\t3\t6", + "Belize", + "\t19\t-90\t-86", + "\t18\t-90\t-86", + "\t17\t-90\t-86", + "\t16\t-90\t-86", + "\t15\t-90\t-87", + "\t14\t-90\t-87", + "Benin", + "\t13\t1\t4", + "\t12\t-1\t4", + "\t11\t-1\t4", + "\t10\t-1\t4", + "\t9\t-1\t4", + "\t8\t0\t4", + "\t7\t0\t3", + "\t6\t0\t3", + "\t5\t0\t3", + "Bermuda", + "\t33\t-65\t-63", + "\t32\t-65\t-63", + "\t31\t-65\t-63", + "Bhutan", + "\t29\t88\t92", + "\t28\t87\t93", + "\t27\t87\t93", + "\t26\t87\t93", + "\t25\t87\t93", + "Bolivia", + "\t-8\t-67\t-64", + "\t-9\t-70\t-64", + "\t-10\t-70\t-63", + "\t-11\t-70\t-61", + "\t-12\t-70\t-59", + "\t-13\t-70\t-59", + "\t-14\t-70\t-59", + "\t-15\t-70\t-57", + "\t-16\t-70\t-56", + "\t-17\t-70\t-56", + "\t-18\t-70\t-56", + "\t-19\t-70\t-56", + "\t-20\t-69\t-56", + "\t-21\t-69\t-56", + "\t-22\t-69\t-61", + "\t-23\t-69\t-61", + "Borneo", + "\t6\t113\t116", + "\t5\t113\t116", + "\t4\t113\t116", + "\t3\t113\t116", + "Borneo", + "\t5\t114\t118", + "\t4\t107\t109\t114\t118", + "\t3\t107\t110\t113\t119", + "\t2\t107\t120", + "\t1\t107\t120", + "\t0\t107\t120", + "\t-1\t107\t120", + "\t-2\t107\t118", + "\t-3\t109\t117", + "\t-4\t109\t117", + "\t-5\t113\t117", + "Borneo", + "\t8\t115\t118", + "\t7\t115\t119", + "\t6\t114\t120", + "\t5\t112\t120", + "\t4\t111\t120", + "\t3\t108\t119", + "\t2\t108\t116", + "\t1\t108\t116", + "\t0\t108\t115", + "\t-1\t109\t112", + "Bosnia and Herzegovina", + "\t46\t14\t19", + "\t45\t14\t20", + "\t44\t14\t20", + "\t43\t14\t20", + "\t42\t15\t20", + "\t41\t16\t19", + "Botswana", + "\t-16\t22\t26", + "\t-17\t19\t26", + "\t-18\t19\t27", + "\t-19\t19\t28", + "\t-20\t19\t30", + "\t-21\t18\t30", + "\t-22\t18\t30", + "\t-23\t18\t30", + "\t-24\t18\t28", + "\t-25\t18\t27", + "\t-26\t19\t26", + "\t-27\t19\t23", + "Bouvet Island", + "\t-53\t2\t4", + "\t-54\t2\t4", + "\t-55\t2\t4", + "Brazil", + "\t6\t-61\t-58", + "\t5\t-65\t-58\t-52\t-50", + "\t4\t-65\t-58\t-53\t-49", + "\t3\t-69\t-49", + "\t2\t-70\t-48", + "\t1\t-71\t-45\t-30\t-28", + "\t0\t-71\t-43\t-30\t-28", + "\t-1\t-71\t-38\t-30\t-28", + "\t-2\t-70\t-37\t-33\t-31", + "\t-3\t-73\t-35\t-33\t-31", + "\t-4\t-74\t-31", + "\t-5\t-74\t-33", + "\t-6\t-75\t-33", + "\t-7\t-75\t-33", + "\t-8\t-75\t-33", + "\t-9\t-74\t-33", + "\t-10\t-74\t-34", + "\t-11\t-73\t-35", + "\t-12\t-71\t-36", + "\t-13\t-65\t-36", + "\t-14\t-63\t-37", + "\t-15\t-61\t-37", + "\t-16\t-61\t-37", + "\t-17\t-61\t-37", + "\t-18\t-59\t-38", + "\t-19\t-59\t-38\t-30\t-27", + "\t-20\t-59\t-38\t-30\t-27", + "\t-21\t-59\t-39\t-30\t-27", + "\t-22\t-58\t-39", + "\t-23\t-58\t-39", + "\t-24\t-56\t-42", + "\t-25\t-56\t-45", + "\t-26\t-56\t-46", + "\t-27\t-57\t-47", + "\t-28\t-58\t-47", + "\t-29\t-58\t-47", + "\t-30\t-58\t-48", + "\t-31\t-58\t-49", + "\t-32\t-57\t-49", + "\t-33\t-54\t-51", + "\t-34\t-54\t-51", + "British Indian Ocean Territory", + "\t-4\t70\t73", + "\t-5\t70\t73", + "\t-6\t70\t73", + "\t-7\t70\t73", + "\t-8\t71\t73", + "British Virgin Islands", + "\t19\t-65\t-63", + "\t18\t-65\t-63", + "\t17\t-65\t-63", + "Brunei", + "\t6\t113\t116", + "\t5\t113\t116", + "\t4\t113\t116", + "\t3\t113\t116", + "Bulgaria", + "\t45\t21\t28", + "\t44\t21\t29", + "\t43\t21\t29", + "\t42\t21\t29", + "\t41\t21\t29", + "\t40\t21\t29", + "Burkina Faso", + "\t16\t-1\t1", + "\t15\t-3\t1", + "\t14\t-5\t2", + "\t13\t-5\t3", + "\t12\t-6\t3", + "\t11\t-6\t3", + "\t10\t-6\t3", + "\t9\t-6\t1", + "\t8\t-5\t-1", + "Burundi", + "\t-1\t27\t31", + "\t-2\t27\t31", + "\t-3\t27\t31", + "\t-4\t28\t31", + "\t-5\t28\t31", + "Cambodia", + "\t15\t101\t108", + "\t14\t101\t108", + "\t13\t101\t108", + "\t12\t101\t108", + "\t11\t101\t108", + "\t10\t101\t107", + "\t9\t102\t107", + "Cameroon", + "\t14\t13\t15", + "\t13\t13\t16", + "\t12\t12\t16", + "\t11\t12\t16", + "\t10\t11\t16", + "\t9\t11\t16", + "\t8\t9\t16", + "\t7\t8\t16", + "\t6\t7\t16", + "\t5\t7\t16", + "\t4\t7\t16", + "\t3\t7\t17", + "\t2\t8\t17", + "\t1\t8\t17", + "\t0\t14\t17", + "Canada", + "\t84\t-78\t-67", + "\t83\t-90\t-60", + "\t82\t-96\t-60", + "\t81\t-101\t-60", + "\t80\t-106\t-61", + "\t79\t-115\t-108\t-106\t-66", + "\t78\t-121\t-69", + "\t77\t-124\t-73", + "\t76\t-124\t-74", + "\t75\t-125\t-76", + "\t74\t-125\t-75", + "\t73\t-126\t-73", + "\t72\t-126\t-69", + "\t71\t-132\t-66", + "\t70\t-142\t-65", + "\t69\t-142\t-63", + "\t68\t-142\t-61", + "\t67\t-142\t-60", + "\t66\t-142\t-60", + "\t65\t-142\t-60", + "\t64\t-142\t-61", + "\t63\t-142\t-62", + "\t62\t-142\t-63", + "\t61\t-142\t-89\t-84\t-63", + "\t60\t-142\t-91\t-81\t-62", + "\t59\t-142\t-91\t-81\t-61", + "\t58\t-140\t-88\t-81\t-60", + "\t57\t-138\t-86\t-81\t-59", + "\t56\t-133\t-57", + "\t55\t-134\t-56", + "\t54\t-134\t-54", + "\t53\t-134\t-54", + "\t52\t-134\t-54", + "\t51\t-133\t-54", + "\t50\t-132\t-52", + "\t49\t-129\t-51", + "\t48\t-128\t-51", + "\t47\t-126\t-51", + "\t46\t-90\t-51", + "\t45\t-86\t-58\t-56\t-51", + "\t44\t-84\t-58", + "\t43\t-84\t-73\t-67\t-58", + "\t42\t-84\t-75\t-67\t-63\t-61\t-58", + "\t41\t-84\t-77", + "\t40\t-84\t-80", + "Canada: Alberta", + "\t61\t-121\t-109", + "\t60\t-121\t-109", + "\t59\t-121\t-109", + "\t58\t-121\t-109", + "\t57\t-121\t-109", + "\t56\t-121\t-109", + "\t55\t-121\t-109", + "\t54\t-121\t-109", + "\t53\t-121\t-109", + "\t52\t-121\t-109", + "\t51\t-119\t-109", + "\t50\t-118\t-109", + "\t49\t-116\t-109", + "\t48\t-115\t-109", + "\t47\t-115\t-109", + "Canada: British Columbia", + "\t61\t-140\t-119", + "\t60\t-140\t-119", + "\t59\t-140\t-119", + "\t58\t-140\t-119", + "\t57\t-138\t-119", + "\t56\t-134\t-119", + "\t55\t-134\t-119", + "\t54\t-134\t-117", + "\t53\t-134\t-116", + "\t52\t-134\t-114", + "\t51\t-133\t-113", + "\t50\t-132\t-113", + "\t49\t-129\t-113", + "\t48\t-128\t-113", + "\t47\t-126\t-113", + "Canada: Manitoba", + "\t61\t-103\t-93", + "\t60\t-103\t-93", + "\t59\t-103\t-91", + "\t58\t-103\t-88", + "\t57\t-103\t-87", + "\t56\t-103\t-87", + "\t55\t-103\t-87", + "\t54\t-103\t-89", + "\t53\t-102\t-90", + "\t52\t-102\t-92", + "\t51\t-102\t-93", + "\t50\t-102\t-94", + "\t49\t-102\t-94", + "\t48\t-102\t-94", + "\t47\t-102\t-94", + "Canada: New Brunswick", + "\t49\t-67\t-63", + "\t48\t-70\t-63", + "\t47\t-70\t-62", + "\t46\t-70\t-62", + "\t45\t-68\t-62", + "\t44\t-68\t-63", + "\t43\t-67\t-65", + "Canada: Newfoundland and Labrador", + "\t61\t-65\t-63", + "\t60\t-65\t-62", + "\t59\t-65\t-61", + "\t58\t-65\t-60", + "\t57\t-65\t-59", + "\t56\t-68\t-57", + "\t55\t-68\t-56", + "\t54\t-68\t-54", + "\t53\t-68\t-54", + "\t52\t-68\t-54", + "\t51\t-68\t-54", + "\t50\t-65\t-63\t-59\t-52", + "\t49\t-60\t-51", + "\t48\t-60\t-51", + "\t47\t-60\t-51", + "\t46\t-60\t-51", + "\t45\t-56\t-51", + "Canada: Northwest Territories", + "\t79\t-115\t-109", + "\t78\t-121\t-109", + "\t77\t-124\t-109", + "\t76\t-124\t-109", + "\t75\t-125\t-109", + "\t74\t-125\t-109", + "\t73\t-126\t-109", + "\t72\t-126\t-109", + "\t71\t-132\t-109", + "\t70\t-136\t-109", + "\t69\t-137\t-109", + "\t68\t-137\t-115\t-113\t-111", + "\t67\t-137\t-113", + "\t66\t-137\t-108", + "\t65\t-135\t-100", + "\t64\t-134\t-100", + "\t63\t-133\t-100", + "\t62\t-131\t-100", + "\t61\t-130\t-101", + "\t60\t-129\t-101", + "\t59\t-127\t-101", + "Canada: Nova Scotia", + "\t48\t-61\t-59", + "\t47\t-65\t-58", + "\t46\t-66\t-58", + "\t45\t-67\t-58", + "\t44\t-67\t-58", + "\t43\t-67\t-58", + "\t42\t-67\t-63\t-61\t-58", + "Canada: Nunavut", + "\t84\t-78\t-67", + "\t83\t-90\t-60", + "\t82\t-96\t-60", + "\t81\t-101\t-60", + "\t80\t-106\t-61", + "\t79\t-111\t-108\t-106\t-66", + "\t78\t-111\t-69", + "\t77\t-111\t-73", + "\t76\t-111\t-74", + "\t75\t-111\t-76", + "\t74\t-111\t-75", + "\t73\t-111\t-73", + "\t72\t-111\t-69", + "\t71\t-118\t-66", + "\t70\t-121\t-65", + "\t69\t-121\t-63", + "\t68\t-121\t-61", + "\t67\t-121\t-60", + "\t66\t-121\t-60", + "\t65\t-118\t-60", + "\t64\t-114\t-61", + "\t63\t-110\t-62", + "\t62\t-103\t-63", + "\t61\t-103\t-89\t-84\t-63", + "\t60\t-103\t-91\t-81\t-77\t-69\t-63", + "\t59\t-103\t-93\t-81\t-76\t-69\t-63", + "\t58\t-81\t-75", + "\t57\t-81\t-75", + "\t56\t-81\t-75", + "\t55\t-82\t-75", + "\t54\t-83\t-76", + "\t53\t-83\t-77", + "\t52\t-83\t-77", + "\t51\t-82\t-77", + "\t50\t-80\t-78", + "Canada: Ontario", + "\t57\t-90\t-86", + "\t56\t-92\t-81", + "\t55\t-94\t-81", + "\t54\t-95\t-81", + "\t53\t-96\t-79", + "\t52\t-96\t-78", + "\t51\t-96\t-78", + "\t50\t-96\t-78", + "\t49\t-96\t-78", + "\t48\t-96\t-78", + "\t47\t-95\t-76", + "\t46\t-90\t-73", + "\t45\t-86\t-73", + "\t44\t-84\t-73", + "\t43\t-84\t-73", + "\t42\t-84\t-75", + "\t41\t-84\t-77", + "\t40\t-84\t-80", + "Canada: Prince Edward Island", + "\t48\t-65\t-62", + "\t47\t-65\t-60", + "\t46\t-65\t-60", + "\t45\t-65\t-60", + "\t44\t-63\t-61", + "Canada: Quebec", + "\t63\t-79\t-71", + "\t62\t-79\t-68", + "\t61\t-79\t-68\t-66\t-63", + "\t60\t-79\t-68\t-66\t-63", + "\t59\t-79\t-62", + "\t58\t-79\t-62", + "\t57\t-79\t-62", + "\t56\t-79\t-62", + "\t55\t-80\t-62", + "\t54\t-80\t-62", + "\t53\t-80\t-56", + "\t52\t-80\t-56", + "\t51\t-80\t-56", + "\t50\t-80\t-56", + "\t49\t-80\t-57", + "\t48\t-80\t-60", + "\t47\t-80\t-60", + "\t46\t-80\t-65\t-63\t-60", + "\t45\t-80\t-68", + "\t44\t-78\t-69", + "\t43\t-75\t-73", + "Canada: Saskatchewan", + "\t61\t-111\t-101", + "\t60\t-111\t-101", + "\t59\t-111\t-101", + "\t58\t-111\t-101", + "\t57\t-111\t-101", + "\t56\t-111\t-100", + "\t55\t-111\t-100", + "\t54\t-111\t-100", + "\t53\t-111\t-100", + "\t52\t-111\t-100", + "\t51\t-111\t-100", + "\t50\t-111\t-100", + "\t49\t-111\t-100", + "\t48\t-111\t-100", + "\t47\t-111\t-100", + "Canada: Yukon", + "\t70\t-142\t-136", + "\t69\t-142\t-135", + "\t68\t-142\t-132", + "\t67\t-142\t-131", + "\t66\t-142\t-131", + "\t65\t-142\t-129", + "\t64\t-142\t-128", + "\t63\t-142\t-127", + "\t62\t-142\t-125", + "\t61\t-142\t-122", + "\t60\t-142\t-122", + "\t59\t-142\t-122", + "Cape Verde", + "\t18\t-26\t-23", + "\t17\t-26\t-21", + "\t16\t-26\t-21", + "\t15\t-26\t-21", + "\t14\t-25\t-21", + "\t13\t-25\t-22", + "Cayman Islands", + "\t20\t-82\t-78", + "\t19\t-82\t-78", + "\t18\t-82\t-78", + "Central African Republic", + "\t12\t21\t23", + "\t11\t20\t24", + "\t10\t18\t24", + "\t9\t17\t25", + "\t8\t14\t26", + "\t7\t13\t27", + "\t6\t13\t28", + "\t5\t13\t28", + "\t4\t13\t28", + "\t3\t13\t26", + "\t2\t14\t19", + "\t1\t14\t17", + "Chad", + "\t24\t13\t17", + "\t23\t13\t19", + "\t22\t13\t21", + "\t21\t13\t23", + "\t20\t14\t24", + "\t19\t14\t24", + "\t18\t14\t24", + "\t17\t13\t24", + "\t16\t12\t24", + "\t15\t12\t24", + "\t14\t12\t24", + "\t13\t12\t23", + "\t12\t12\t23", + "\t11\t13\t23", + "\t10\t12\t23", + "\t9\t12\t23", + "\t8\t12\t22", + "\t7\t13\t20", + "\t6\t14\t18", + "Chile", + "\t-16\t-70\t-68", + "\t-17\t-71\t-67", + "\t-18\t-71\t-67", + "\t-19\t-71\t-67", + "\t-20\t-71\t-67", + "\t-21\t-71\t-66", + "\t-22\t-71\t-66", + "\t-23\t-71\t-66", + "\t-24\t-71\t-66", + "\t-25\t-106\t-104\t-81\t-78\t-71\t-66", + "\t-26\t-110\t-108\t-106\t-104\t-81\t-78\t-72\t-67", + "\t-27\t-110\t-108\t-106\t-104\t-81\t-78\t-72\t-67", + "\t-28\t-110\t-108\t-72\t-67", + "\t-29\t-72\t-68", + "\t-30\t-72\t-68", + "\t-31\t-72\t-68", + "\t-32\t-81\t-77\t-72\t-68", + "\t-33\t-81\t-77\t-73\t-68", + "\t-34\t-81\t-77\t-73\t-68", + "\t-35\t-74\t-68", + "\t-36\t-74\t-69", + "\t-37\t-74\t-69", + "\t-38\t-74\t-69", + "\t-39\t-74\t-69", + "\t-40\t-75\t-70", + "\t-41\t-75\t-70", + "\t-42\t-75\t-70", + "\t-43\t-76\t-70", + "\t-44\t-76\t-70", + "\t-45\t-76\t-70", + "\t-46\t-76\t-70", + "\t-47\t-76\t-70", + "\t-48\t-76\t-70", + "\t-49\t-76\t-71", + "\t-50\t-76\t-69", + "\t-51\t-76\t-67", + "\t-52\t-76\t-67", + "\t-53\t-76\t-66", + "\t-54\t-75\t-65", + "\t-55\t-74\t-65", + "\t-56\t-72\t-65", + "China", + "\t54\t119\t126", + "\t53\t119\t127", + "\t52\t118\t127", + "\t51\t118\t128", + "\t50\t85\t88\t115\t130", + "\t49\t84\t90\t114\t135", + "\t48\t81\t91\t114\t135", + "\t47\t81\t92\t114\t135", + "\t46\t79\t94\t110\t135", + "\t45\t78\t96\t110\t135", + "\t44\t78\t96\t109\t134", + "\t43\t78\t132", + "\t42\t75\t132", + "\t41\t72\t132", + "\t40\t72\t129", + "\t39\t72\t127", + "\t38\t72\t125", + "\t37\t72\t123", + "\t36\t73\t123", + "\t35\t73\t123", + "\t34\t74\t121", + "\t33\t77\t122", + "\t32\t77\t122", + "\t31\t77\t123", + "\t30\t77\t123", + "\t29\t78\t123", + "\t28\t81\t123", + "\t27\t83\t122", + "\t26\t84\t93\t96\t122", + "\t25\t96\t121", + "\t24\t96\t120", + "\t23\t96\t120", + "\t22\t96\t118", + "\t21\t98\t117", + "\t20\t98\t102\t105\t114", + "\t19\t107\t112", + "\t18\t107\t112", + "\t17\t107\t111", + "China", + "\t25\t117\t119", + "\t24\t117\t119", + "\t23\t117\t119", + "China: Hainan", + "\t21\t108\t111", + "\t20\t107\t112", + "\t19\t107\t112", + "\t18\t107\t112", + "\t17\t107\t111", + "Christmas Island", + "\t-9\t104\t106", + "\t-10\t104\t106", + "\t-11\t104\t106", + "Clipperton Island", + "\t11\t-110\t-108", + "\t10\t-110\t-108", + "\t9\t-110\t-108", + "Cocos Islands", + "\t-11\t95\t97", + "\t-12\t95\t97", + "\t-13\t95\t97", + "Colombia", + "\t14\t-82\t-80", + "\t13\t-82\t-80\t-73\t-70", + "\t12\t-82\t-80\t-75\t-70", + "\t11\t-82\t-80\t-76\t-70", + "\t10\t-77\t-70", + "\t9\t-78\t-71", + "\t8\t-78\t-69", + "\t7\t-78\t-66", + "\t6\t-78\t-66", + "\t5\t-78\t-66", + "\t4\t-78\t-66", + "\t3\t-79\t-66", + "\t2\t-80\t-65", + "\t1\t-80\t-65", + "\t0\t-80\t-65", + "\t-1\t-79\t-68", + "\t-2\t-75\t-68", + "\t-3\t-74\t-68", + "\t-4\t-71\t-68", + "\t-5\t-71\t-68", + "Comoros", + "\t-10\t42\t44", + "\t-11\t42\t45", + "\t-12\t42\t45", + "\t-13\t42\t45", + "Cook Islands", + "\t-7\t-159\t-156", + "\t-8\t-159\t-156", + "\t-9\t-166\t-164\t-162\t-156", + "\t-10\t-166\t-164\t-162\t-159", + "\t-11\t-166\t-164\t-162\t-159", + "\t-17\t-160\t-158", + "\t-18\t-160\t-156", + "\t-19\t-160\t-156", + "\t-20\t-160\t-156", + "\t-21\t-160\t-156", + "\t-22\t-160\t-156", + "Coral Sea Islands", + "\t-15\t146\t151", + "\t-16\t146\t151", + "\t-17\t146\t151", + "\t-18\t147\t149", + "\t-20\t152\t156", + "\t-21\t152\t156", + "\t-22\t152\t156", + "\t-23\t154\t156", + "Costa Rica", + "\t12\t-86\t-83", + "\t11\t-86\t-82", + "\t10\t-86\t-81", + "\t9\t-86\t-81", + "\t8\t-86\t-81", + "\t7\t-84\t-81", + "\t6\t-88\t-86", + "\t5\t-88\t-86", + "\t4\t-88\t-86", + "Cote d'Ivoire", + "\t11\t-9\t-3", + "\t10\t-9\t-1", + "\t9\t-9\t-1", + "\t8\t-9\t-1", + "\t7\t-9\t-1", + "\t6\t-9\t-1", + "\t5\t-9\t-1", + "\t4\t-8\t-1", + "\t3\t-8\t-4", + "Croatia", + "\t47\t14\t18", + "\t46\t12\t20", + "\t45\t12\t20", + "\t44\t12\t20", + "\t43\t12\t20", + "\t42\t14\t19", + "\t41\t15\t19", + "Cuba", + "\t24\t-84\t-79", + "\t23\t-85\t-76", + "\t22\t-85\t-74", + "\t21\t-85\t-73", + "\t20\t-85\t-73", + "\t19\t-80\t-73", + "\t18\t-78\t-73", + "Curacao", + "\t13\t-70\t-67", + "\t12\t-70\t-67", + "\t11\t-70\t-67", + "Cyprus", + "\t36\t31\t35", + "\t35\t31\t35", + "\t34\t31\t35", + "\t33\t31\t35", + "Cyprus", + "\t36\t31\t35", + "\t35\t31\t35", + "\t34\t31\t35", + "Cyprus", + "\t35\t31\t34", + "\t34\t31\t34", + "\t33\t31\t34", + "Cyprus", + "\t36\t32\t34", + "\t35\t32\t34", + "\t34\t32\t34", + "\t33\t32\t34", + "Czech Republic", + "\t52\t13\t16", + "\t51\t11\t19", + "\t50\t11\t19", + "\t49\t11\t19", + "\t48\t11\t19", + "\t47\t12\t18", + "Democratic Republic of the Congo", + "\t6\t18\t20\t23\t28", + "\t5\t17\t31", + "\t4\t17\t31", + "\t3\t17\t32", + "\t2\t16\t32", + "\t1\t16\t32", + "\t0\t15\t32", + "\t-1\t15\t31", + "\t-2\t14\t30", + "\t-3\t11\t30", + "\t-4\t11\t30", + "\t-5\t11\t31", + "\t-6\t11\t31", + "\t-7\t11\t13\t15\t31", + "\t-8\t15\t31", + "\t-9\t16\t31", + "\t-10\t20\t29", + "\t-11\t21\t30", + "\t-12\t21\t30", + "\t-13\t25\t30", + "\t-14\t27\t30", + "Denmark", + "\t58\t7\t12", + "\t57\t7\t13", + "\t56\t7\t16", + "\t55\t7\t16", + "\t54\t7\t16", + "\t53\t7\t13", + "Djibouti", + "\t13\t41\t44", + "\t12\t40\t44", + "\t11\t40\t44", + "\t10\t40\t44", + "\t9\t40\t43", + "Dominica", + "\t16\t-62\t-60", + "\t15\t-62\t-60", + "\t14\t-62\t-60", + "Dominican Republic", + "\t20\t-72\t-67", + "\t19\t-73\t-67", + "\t18\t-73\t-67", + "\t17\t-73\t-67", + "\t16\t-72\t-70", + "East Timor", + "\t-7\t123\t128", + "\t-8\t123\t128", + "\t-9\t123\t128", + "\t-10\t123\t127", + "Ecuador", + "\t2\t-80\t-77", + "\t1\t-81\t-74", + "\t0\t-81\t-74", + "\t-1\t-82\t-74", + "\t-2\t-82\t-74", + "\t-3\t-82\t-74", + "\t-4\t-81\t-76", + "\t-5\t-81\t-77", + "\t-6\t-80\t-78", + "Ecuador: Galapagos", + "\t2\t-93\t-90", + "\t1\t-93\t-88", + "\t0\t-93\t-88", + "\t-1\t-92\t-88", + "\t-2\t-92\t-88", + "Egypt", + "\t32\t23\t35", + "\t31\t23\t35", + "\t30\t23\t35", + "\t29\t23\t35", + "\t28\t23\t35", + "\t27\t23\t35", + "\t26\t23\t35", + "\t25\t23\t36", + "\t24\t23\t36", + "\t23\t23\t37", + "\t22\t23\t37", + "\t21\t23\t37", + "\t20\t23\t37", + "El Salvador", + "\t15\t-90\t-87", + "\t14\t-91\t-86", + "\t13\t-91\t-86", + "\t12\t-91\t-86", + "Equatorial Guinea", + "\t4\t7\t9", + "\t3\t7\t12", + "\t2\t7\t12", + "\t1\t8\t12", + "\t0\t4\t6\t8\t12", + "\t-1\t4\t6\t8\t10", + "\t-2\t4\t6", + "Eritrea", + "\t19\t37\t39", + "\t18\t35\t40", + "\t17\t35\t41", + "\t16\t35\t41", + "\t15\t35\t42", + "\t14\t35\t43", + "\t13\t35\t44", + "\t12\t39\t44", + "\t11\t40\t44", + "Estonia", + "\t60\t21\t29", + "\t59\t20\t29", + "\t58\t20\t29", + "\t57\t20\t28", + "\t56\t20\t28", + "Ethiopia", + "\t15\t35\t41", + "\t14\t35\t42", + "\t13\t34\t43", + "\t12\t33\t43", + "\t11\t33\t44", + "\t10\t33\t44", + "\t9\t32\t47", + "\t8\t31\t48", + "\t7\t31\t48", + "\t6\t31\t48", + "\t5\t33\t47", + "\t4\t33\t46", + "\t3\t34\t46", + "\t2\t36\t42", + "Europa Island", + "\t-21\t39\t41", + "\t-22\t39\t41", + "\t-23\t39\t41", + "Falkland Islands (Islas Malvinas)", + "\t-50\t-62\t-56", + "\t-51\t-62\t-56", + "\t-52\t-62\t-56", + "\t-53\t-62\t-57", + "Faroe Islands", + "\t63\t-8\t-5", + "\t62\t-8\t-5", + "\t61\t-8\t-5", + "\t60\t-7\t-5", + "Fiji", + "\t-11\t176\t178", + "\t-12\t176\t178", + "\t-13\t176\t178", + "\t-15\t-180\t-178\t176\t180", + "\t-16\t-180\t-177\t176\t180", + "\t-17\t-180\t-177\t176\t180", + "\t-18\t-180\t-177\t176\t180", + "\t-19\t-180\t-177\t176\t180", + "\t-20\t-180\t-177\t173\t180", + "\t-21\t173\t175", + "\t-22\t173\t175", + "Finland", + "\t71\t26\t28", + "\t70\t19\t30", + "\t69\t19\t30", + "\t68\t19\t31", + "\t67\t19\t31", + "\t66\t22\t31", + "\t65\t22\t31", + "\t64\t20\t32", + "\t63\t20\t32", + "\t62\t20\t32", + "\t61\t20\t32", + "\t60\t20\t31", + "\t59\t20\t29", + "\t58\t21\t25", + "Finland", + "\t61\t18\t22", + "\t60\t18\t22", + "\t59\t18\t22", + "\t58\t19\t21", + "France", + "\t52\t0\t3", + "\t51\t0\t5", + "\t50\t-2\t8", + "\t49\t-6\t9", + "\t48\t-6\t9", + "\t47\t-6\t9", + "\t46\t-5\t8", + "\t45\t-3\t8", + "\t44\t-2\t8", + "\t43\t-2\t8", + "\t42\t-2\t8", + "\t41\t-2\t7", + "France: Corsica", + "\t44\t8\t10", + "\t43\t7\t10", + "\t42\t7\t10", + "\t41\t7\t10", + "\t40\t7\t10", + "France: Saint Barthelemy", + "\t18\t-63\t-61", + "\t17\t-63\t-61", + "\t16\t-63\t-61", + "France: Saint Martin", + "\t19\t-64\t-62", + "\t18\t-64\t-62", + "\t17\t-64\t-62", + "French Guiana", + "\t6\t-55\t-51", + "\t5\t-55\t-50", + "\t4\t-55\t-50", + "\t3\t-55\t-50", + "\t2\t-55\t-50", + "\t1\t-55\t-51", + "French Polynesia", + "\t-6\t-141\t-139", + "\t-7\t-141\t-138", + "\t-8\t-141\t-137", + "\t-9\t-141\t-137", + "\t-10\t-141\t-137", + "\t-11\t-140\t-137", + "\t-13\t-149\t-140", + "\t-14\t-149\t-139", + "\t-15\t-152\t-139", + "\t-16\t-152\t-137", + "\t-17\t-152\t-135", + "\t-18\t-150\t-148\t-146\t-135", + "\t-19\t-142\t-135", + "\t-20\t-142\t-134", + "\t-21\t-152\t-150\t-141\t-134", + "\t-22\t-152\t-146\t-141\t-133", + "\t-23\t-152\t-146\t-136\t-133", + "\t-24\t-150\t-146\t-136\t-133", + "\t-26\t-145\t-143", + "\t-27\t-145\t-143", + "\t-28\t-145\t-143", + "French Southern and Antarctic Lands", + "\t-10\t46\t48", + "\t-11\t46\t48", + "\t-12\t46\t48", + "\t-14\t53\t55", + "\t-15\t53\t55", + "\t-16\t41\t43\t53\t55", + "\t-17\t41\t43", + "\t-18\t41\t43", + "\t-20\t38\t40", + "\t-21\t38\t41", + "\t-22\t38\t41", + "\t-23\t39\t41", + "\t-36\t76\t78", + "\t-37\t76\t78", + "\t-38\t76\t78", + "\t-39\t76\t78", + "\t-45\t49\t52", + "\t-46\t49\t52", + "\t-47\t49\t52\t67\t70", + "\t-48\t67\t71", + "\t-49\t67\t71", + "\t-50\t67\t71", + "Gabon", + "\t3\t10\t14", + "\t2\t8\t15", + "\t1\t7\t15", + "\t0\t7\t15", + "\t-1\t7\t15", + "\t-2\t7\t15", + "\t-3\t8\t15", + "\t-4\t9\t12", + "Gambia", + "\t14\t-17\t-12", + "\t13\t-17\t-12", + "\t12\t-17\t-12", + "Gaza Strip", + "\t32\t33\t35", + "\t31\t33\t35", + "\t30\t33\t35", + "Georgia", + "\t44\t38\t44", + "\t43\t38\t47", + "\t42\t38\t47", + "\t41\t39\t47", + "\t40\t40\t47", + "Germany", + "\t56\t7\t9", + "\t55\t7\t15", + "\t54\t5\t15", + "\t53\t5\t15", + "\t52\t4\t16", + "\t51\t4\t16", + "\t50\t4\t16", + "\t49\t4\t15", + "\t48\t5\t14", + "\t47\t6\t14", + "\t46\t6\t14", + "Ghana", + "\t12\t-2\t1", + "\t11\t-3\t1", + "\t10\t-3\t1", + "\t9\t-3\t1", + "\t8\t-4\t1", + "\t7\t-4\t2", + "\t6\t-4\t2", + "\t5\t-4\t2", + "\t4\t-4\t2", + "\t3\t-3\t0", + "Gibraltar", + "\t37\t-6\t-4", + "\t36\t-6\t-4", + "\t35\t-6\t-4", + "Glorioso Islands", + "\t-10\t46\t48", + "\t-11\t46\t48", + "\t-12\t46\t48", + "Greece", + "\t42\t20\t27", + "\t41\t19\t27", + "\t40\t18\t27", + "\t39\t18\t27", + "\t38\t18\t28", + "\t37\t19\t29", + "\t36\t19\t29", + "\t35\t20\t29", + "\t34\t22\t28", + "\t33\t23\t26", + "Greenland", + "\t84\t-47\t-23", + "\t83\t-60\t-18", + "\t82\t-65\t-10", + "\t81\t-68\t-10", + "\t80\t-69\t-10", + "\t79\t-74\t-13", + "\t78\t-74\t-16", + "\t77\t-74\t-16", + "\t76\t-73\t-16", + "\t75\t-72\t-16", + "\t74\t-68\t-65\t-61\t-16", + "\t73\t-58\t-16", + "\t72\t-57\t-19", + "\t71\t-57\t-20", + "\t70\t-56\t-20", + "\t69\t-56\t-20", + "\t68\t-55\t-21", + "\t67\t-54\t-24", + "\t66\t-54\t-31", + "\t65\t-54\t-32", + "\t64\t-54\t-34", + "\t63\t-53\t-39", + "\t62\t-52\t-39", + "\t61\t-51\t-40", + "\t60\t-50\t-41", + "\t59\t-49\t-41", + "\t58\t-45\t-42", + "Grenada", + "\t13\t-62\t-60", + "\t12\t-62\t-60", + "\t11\t-62\t-60", + "Guadeloupe", + "\t17\t-62\t-59", + "\t16\t-62\t-59", + "\t15\t-62\t-59", + "\t14\t-62\t-60", + "Guam", + "\t14\t143\t145", + "\t13\t143\t145", + "\t12\t143\t145", + "Guatemala", + "\t18\t-92\t-88", + "\t17\t-92\t-88", + "\t16\t-93\t-87", + "\t15\t-93\t-87", + "\t14\t-93\t-87", + "\t13\t-93\t-88", + "\t12\t-92\t-89", + "Guernsey", + "\t50\t-3\t-1", + "\t49\t-3\t-1", + "\t48\t-3\t-1", + "Guinea", + "\t13\t-14\t-7", + "\t12\t-15\t-7", + "\t11\t-16\t-6", + "\t10\t-16\t-6", + "\t9\t-16\t-6", + "\t8\t-14\t-6", + "\t7\t-11\t-6", + "\t6\t-10\t-7", + "Guinea-Bissau", + "\t13\t-17\t-12", + "\t12\t-17\t-12", + "\t11\t-17\t-12", + "\t10\t-17\t-12", + "\t9\t-16\t-13", + "Guyana", + "\t9\t-61\t-58", + "\t8\t-61\t-57", + "\t7\t-62\t-56", + "\t6\t-62\t-56", + "\t5\t-62\t-56", + "\t4\t-62\t-56", + "\t3\t-61\t-55", + "\t2\t-61\t-55", + "\t1\t-61\t-55", + "\t0\t-60\t-55", + "Haiti", + "\t21\t-73\t-71", + "\t20\t-74\t-70", + "\t19\t-75\t-70", + "\t18\t-75\t-70", + "\t17\t-75\t-70", + "Heard Island and McDonald Islands", + "\t-51\t72\t74", + "\t-52\t72\t74", + "\t-53\t72\t74", + "\t-54\t72\t74", + "Honduras", + "\t18\t-84\t-82", + "\t17\t-87\t-82", + "\t16\t-90\t-82", + "\t15\t-90\t-82", + "\t14\t-90\t-82", + "\t13\t-90\t-82", + "\t12\t-89\t-84", + "\t11\t-88\t-86", + "Hong Kong", + "\t23\t112\t115", + "\t22\t112\t115", + "\t21\t112\t115", + "Howland Island", + "\t1\t-177\t-175", + "\t0\t-177\t-175", + "\t-1\t-177\t-175", + "Hungary", + "\t49\t16\t23", + "\t48\t15\t23", + "\t47\t15\t23", + "\t46\t15\t23", + "\t45\t15\t22", + "\t44\t16\t20", + "Iceland", + "\t67\t-24\t-13", + "\t66\t-25\t-12", + "\t65\t-25\t-12", + "\t64\t-25\t-12", + "\t63\t-25\t-12", + "\t62\t-23\t-15", + "India", + "\t36\t76\t79", + "\t35\t72\t79", + "\t34\t72\t80", + "\t33\t72\t80", + "\t32\t72\t80", + "\t31\t72\t82", + "\t30\t71\t82\t93\t97", + "\t29\t69\t82\t87\t89\t91\t98", + "\t28\t68\t98", + "\t27\t68\t98", + "\t26\t68\t98", + "\t25\t67\t96", + "\t24\t67\t96", + "\t23\t67\t95", + "\t22\t67\t95", + "\t21\t67\t94", + "\t20\t68\t93", + "\t19\t69\t88", + "\t18\t71\t87", + "\t17\t71\t85", + "\t16\t72\t84", + "\t15\t72\t83", + "\t14\t72\t82\t91\t95", + "\t13\t73\t81\t91\t95", + "\t12\t71\t81\t91\t95", + "\t11\t71\t81\t91\t94", + "\t10\t71\t80\t91\t94", + "\t9\t71\t80\t91\t94", + "\t8\t72\t80\t91\t94", + "\t7\t72\t79\t92\t94", + "\t6\t92\t94", + "\t5\t92\t94", + "Indonesia", + "\t6\t94\t98\t125\t127", + "\t5\t94\t99\t106\t109\t114\t118\t125\t128", + "\t4\t94\t100\t104\t109\t114\t118\t124\t128", + "\t3\t94\t102\t104\t110\t113\t119\t124\t129", + "\t2\t94\t132", + "\t1\t94\t137", + "\t0\t96\t139", + "\t-1\t96\t141", + "\t-2\t97\t141", + "\t-3\t98\t141", + "\t-4\t99\t141", + "\t-5\t101\t141", + "\t-6\t101\t116\t118\t141", + "\t-7\t104\t141", + "\t-8\t105\t132\t136\t141", + "\t-9\t109\t132\t136\t141", + "\t-10\t115\t126\t139\t141", + "\t-11\t119\t125", + "Iran", + "\t40\t43\t49", + "\t39\t43\t49\t54\t58", + "\t38\t43\t61", + "\t37\t43\t62", + "\t36\t43\t62", + "\t35\t43\t62", + "\t34\t44\t62", + "\t33\t44\t62", + "\t32\t44\t62", + "\t31\t45\t62", + "\t30\t46\t62", + "\t29\t46\t63", + "\t28\t47\t64", + "\t27\t49\t64", + "\t26\t50\t64", + "\t25\t52\t64", + "\t24\t53\t62", + "Iraq", + "\t38\t41\t45", + "\t37\t40\t46", + "\t36\t40\t47", + "\t35\t39\t47", + "\t34\t37\t47", + "\t33\t37\t48", + "\t32\t37\t48", + "\t31\t37\t49", + "\t30\t39\t49", + "\t29\t41\t49", + "\t28\t42\t49", + "Ireland", + "\t56\t-9\t-5", + "\t55\t-11\t-5", + "\t54\t-11\t-5", + "\t53\t-11\t-4", + "\t52\t-11\t-4", + "\t51\t-11\t-4", + "\t50\t-11\t-6", + "Isle of Man", + "\t55\t-5\t-3", + "\t54\t-5\t-3", + "\t53\t-5\t-3", + "Israel", + "\t34\t34\t36", + "\t33\t33\t36", + "\t32\t33\t36", + "\t31\t33\t36", + "\t30\t33\t36", + "\t29\t33\t36", + "\t28\t33\t36", + "Italy", + "\t48\t10\t13", + "\t47\t6\t14", + "\t46\t5\t14", + "\t45\t5\t14", + "\t44\t5\t14", + "\t43\t5\t16", + "\t42\t6\t18", + "\t41\t7\t19", + "\t40\t7\t19", + "\t39\t7\t19", + "\t38\t7\t19", + "\t37\t7\t18", + "\t36\t10\t17", + "\t35\t10\t16", + "\t34\t11\t13", + "Jamaica", + "\t19\t-79\t-75", + "\t18\t-79\t-75", + "\t17\t-79\t-75", + "\t16\t-78\t-75", + "Jan Mayen", + "\t72\t-9\t-6", + "\t71\t-10\t-6", + "\t70\t-10\t-6", + "\t69\t-10\t-7", + "Japan", + "\t46\t139\t143", + "\t45\t139\t146", + "\t44\t139\t146", + "\t43\t138\t146", + "\t42\t138\t146", + "\t41\t138\t146", + "\t40\t138\t144", + "\t39\t137\t143", + "\t38\t135\t143", + "\t37\t131\t142", + "\t36\t131\t142", + "\t35\t128\t141", + "\t34\t128\t141", + "\t33\t127\t140", + "\t32\t127\t141", + "\t31\t127\t134\t138\t141", + "\t30\t128\t132\t139\t141", + "\t29\t128\t132\t139\t141", + "\t28\t126\t131\t139\t143", + "\t27\t125\t131\t139\t143", + "\t26\t122\t132\t139\t143", + "\t25\t121\t132\t140\t143\t152\t154", + "\t24\t121\t126\t130\t132\t140\t142\t152\t154", + "\t23\t121\t126\t140\t142\t152\t154", + "Jarvis Island", + "\t1\t-161\t-159", + "\t0\t-161\t-159", + "\t-1\t-161\t-159", + "Jersey", + "\t50\t-3\t-1", + "\t49\t-3\t-1", + "\t48\t-3\t-1", + "Johnston Atoll", + "\t17\t-170\t-168", + "\t16\t-170\t-168", + "\t15\t-170\t-168", + "Jordan", + "\t34\t37\t39", + "\t33\t34\t40", + "\t32\t34\t40", + "\t31\t34\t40", + "\t30\t33\t39", + "\t29\t33\t38", + "\t28\t33\t38", + "Juan de Nova Island", + "\t-16\t41\t43", + "\t-17\t41\t43", + "\t-18\t41\t43", + "Kazakhstan", + "\t56\t67\t71", + "\t55\t60\t77", + "\t54\t59\t79", + "\t53\t59\t79", + "\t52\t48\t84", + "\t51\t46\t86", + "\t50\t45\t88", + "\t49\t45\t88", + "\t48\t45\t88", + "\t47\t45\t87", + "\t46\t46\t86", + "\t45\t47\t86", + "\t44\t48\t83", + "\t43\t48\t56\t58\t81", + "\t42\t49\t56\t60\t81", + "\t41\t50\t56\t64\t81", + "\t40\t51\t56\t65\t71", + "\t39\t66\t69", + "Kenya", + "\t6\t33\t36", + "\t5\t32\t42", + "\t4\t32\t42", + "\t3\t32\t42", + "\t2\t33\t42", + "\t1\t32\t42", + "\t0\t32\t42", + "\t-1\t32\t42", + "\t-2\t32\t42", + "\t-3\t34\t42", + "\t-4\t36\t41", + "\t-5\t37\t40", + "Kerguelen Archipelago", + "\t-47\t67\t70", + "\t-48\t67\t71", + "\t-49\t67\t71", + "\t-50\t67\t71", + "Kingman Reef", + "\t7\t-163\t-161", + "\t6\t-163\t-161", + "\t5\t-163\t-161", + "Kiribati", + "\t5\t-161\t-159", + "\t4\t-161\t-158\t171\t173", + "\t3\t-161\t-156\t171\t173", + "\t2\t-160\t-156\t171\t174", + "\t1\t-158\t-156\t171\t175", + "\t0\t-158\t-156\t171\t177", + "\t-1\t-172\t-170\t171\t177", + "\t-2\t-172\t-170\t173\t177", + "\t-3\t-173\t-170\t-156\t-153\t174\t177", + "\t-4\t-173\t-171\t-156\t-153", + "\t-5\t-173\t-171\t-156\t-153", + "\t-10\t-152\t-150", + "\t-11\t-152\t-150", + "\t-12\t-152\t-150", + "Kosovo", + "\t44\t19\t22", + "\t43\t19\t22", + "\t42\t19\t22", + "\t41\t19\t22", + "\t40\t19\t21", + "Kuwait", + "\t31\t46\t49", + "\t30\t45\t49", + "\t29\t45\t49", + "\t28\t45\t49", + "\t27\t46\t49", + "Kyrgyzstan", + "\t44\t72\t75", + "\t43\t69\t81", + "\t42\t69\t81", + "\t41\t68\t81", + "\t40\t68\t80", + "\t39\t68\t78", + "\t38\t68\t74", + "Laos", + "\t23\t100\t103", + "\t22\t99\t104", + "\t21\t99\t105", + "\t20\t99\t105", + "\t19\t99\t106", + "\t18\t99\t107", + "\t17\t99\t108", + "\t16\t99\t108", + "\t15\t103\t108", + "\t14\t104\t108", + "\t13\t104\t108", + "\t12\t104\t107", + "Latvia", + "\t59\t23\t26", + "\t58\t20\t28", + "\t57\t19\t29", + "\t56\t19\t29", + "\t55\t19\t29", + "\t54\t24\t28", + "Lebanon", + "\t35\t34\t37", + "\t34\t34\t37", + "\t33\t34\t37", + "\t32\t34\t37", + "Lesotho", + "\t-27\t26\t30", + "\t-28\t26\t30", + "\t-29\t26\t30", + "\t-30\t26\t30", + "\t-31\t26\t29", + "Liberia", + "\t9\t-11\t-8", + "\t8\t-12\t-7", + "\t7\t-12\t-6", + "\t6\t-12\t-6", + "\t5\t-12\t-6", + "\t4\t-11\t-6", + "\t3\t-10\t-6", + "Libya", + "\t34\t10\t12", + "\t33\t9\t16\t19\t25", + "\t32\t9\t26", + "\t31\t8\t26", + "\t30\t8\t26", + "\t29\t8\t25", + "\t28\t8\t25", + "\t27\t8\t25", + "\t26\t8\t25", + "\t25\t8\t25", + "\t24\t8\t25", + "\t23\t9\t25", + "\t22\t10\t25", + "\t21\t12\t25", + "\t20\t17\t25", + "\t19\t20\t25", + "\t18\t21\t25", + "Liechtenstein", + "\t48\t8\t10", + "\t47\t8\t10", + "\t46\t8\t10", + "Lithuania", + "\t57\t20\t26", + "\t56\t19\t27", + "\t55\t19\t27", + "\t54\t19\t27", + "\t53\t21\t27", + "\t52\t22\t25", + "Luxembourg", + "\t51\t4\t7", + "\t50\t4\t7", + "\t49\t4\t7", + "\t48\t4\t7", + "Macau", + "\t23\t112\t114", + "\t22\t112\t114", + "\t21\t112\t114", + "Macedonia", + "\t43\t19\t23", + "\t42\t19\t24", + "\t41\t19\t24", + "\t40\t19\t24", + "\t39\t19\t22", + "Madagascar", + "\t-10\t48\t50", + "\t-11\t47\t50", + "\t-12\t46\t51", + "\t-13\t46\t51", + "\t-14\t44\t51", + "\t-15\t43\t51", + "\t-16\t42\t51", + "\t-17\t42\t51", + "\t-18\t42\t50", + "\t-19\t42\t50", + "\t-20\t42\t50", + "\t-21\t42\t49", + "\t-22\t42\t49", + "\t-23\t42\t48", + "\t-24\t42\t48", + "\t-25\t42\t48", + "\t-26\t43\t48", + "Malawi", + "\t-8\t31\t35", + "\t-9\t31\t35", + "\t-10\t31\t35", + "\t-11\t31\t35", + "\t-12\t31\t36", + "\t-13\t31\t36", + "\t-14\t31\t36", + "\t-15\t31\t36", + "\t-16\t33\t36", + "\t-17\t33\t36", + "\t-18\t34\t36", + "Malaysia", + "\t8\t115\t118", + "\t7\t98\t103\t115\t119", + "\t6\t98\t104\t114\t120", + "\t5\t98\t104\t112\t120", + "\t4\t99\t104\t111\t120", + "\t3\t99\t105\t108\t119", + "\t2\t99\t105\t108\t116", + "\t1\t100\t105\t108\t116", + "\t0\t101\t105\t108\t115", + "\t-1\t109\t112", + "Maldives", + "\t8\t71\t73", + "\t7\t71\t74", + "\t6\t71\t74", + "\t5\t71\t74", + "\t4\t71\t74", + "\t3\t71\t74", + "\t2\t71\t74", + "\t1\t71\t74", + "\t0\t71\t74", + "\t-1\t71\t74", + "Mali", + "\t25\t-7\t-2", + "\t24\t-7\t0", + "\t23\t-7\t1", + "\t22\t-7\t2", + "\t21\t-7\t3", + "\t20\t-7\t5", + "\t19\t-7\t5", + "\t18\t-7\t5", + "\t17\t-6\t5", + "\t16\t-12\t5", + "\t15\t-13\t5", + "\t14\t-13\t4", + "\t13\t-13\t1", + "\t12\t-13\t-1", + "\t11\t-12\t-3", + "\t10\t-12\t-3", + "\t9\t-9\t-4", + "Malta", + "\t37\t13\t15", + "\t36\t13\t15", + "\t35\t13\t15", + "\t34\t13\t15", + "Marshall Islands", + "\t15\t167\t170", + "\t14\t167\t170", + "\t13\t167\t170", + "\t12\t164\t167", + "\t11\t164\t167\t169\t171", + "\t10\t164\t167\t169\t171", + "\t9\t166\t171", + "\t8\t166\t172", + "\t7\t166\t173", + "\t6\t167\t173", + "\t5\t167\t173", + "\t4\t167\t170", + "\t3\t167\t169", + "Martinique", + "\t15\t-62\t-59", + "\t14\t-62\t-59", + "\t13\t-62\t-59", + "Mauritania", + "\t28\t-9\t-7", + "\t27\t-9\t-5", + "\t26\t-13\t-3", + "\t25\t-13\t-3", + "\t24\t-14\t-3", + "\t23\t-14\t-3", + "\t22\t-18\t-5", + "\t21\t-18\t-5", + "\t20\t-18\t-4", + "\t19\t-18\t-4", + "\t18\t-17\t-4", + "\t17\t-17\t-4", + "\t16\t-17\t-4", + "\t15\t-17\t-4", + "\t14\t-17\t-4", + "\t13\t-13\t-10", + "Mauritius", + "\t-9\t55\t57", + "\t-10\t55\t57", + "\t-11\t55\t57", + "\t-18\t56\t58\t62\t64", + "\t-19\t56\t58\t62\t64", + "\t-20\t56\t58\t62\t64", + "\t-21\t56\t58", + "Mayotte", + "\t-11\t44\t46", + "\t-12\t44\t46", + "\t-13\t44\t46", + "Mexico", + "\t33\t-118\t-112", + "\t32\t-118\t-104", + "\t31\t-118\t-103", + "\t30\t-119\t-99", + "\t29\t-119\t-98", + "\t28\t-119\t-98", + "\t27\t-119\t-96", + "\t26\t-116\t-96", + "\t25\t-115\t-96", + "\t24\t-113\t-96", + "\t23\t-113\t-96", + "\t22\t-111\t-96\t-91\t-85", + "\t21\t-111\t-95\t-91\t-85", + "\t20\t-111\t-109\t-107\t-94\t-92\t-85", + "\t19\t-115\t-109\t-106\t-85", + "\t18\t-115\t-109\t-106\t-86", + "\t17\t-115\t-109\t-105\t-86", + "\t16\t-103\t-87", + "\t15\t-101\t-89", + "\t14\t-98\t-90", + "\t13\t-93\t-91", + "Micronesia", + "\t10\t137\t139", + "\t9\t137\t139\t148\t151\t153\t155", + "\t8\t137\t139\t148\t155", + "\t7\t148\t159", + "\t6\t148\t154\t156\t159\t161\t164", + "\t5\t148\t150\t152\t154\t156\t159\t161\t164", + "\t4\t152\t154\t156\t158\t161\t164", + "Midway Islands", + "\t29\t-178\t-176", + "\t28\t-178\t-176", + "\t27\t-178\t-176", + "Moldova", + "\t49\t25\t29", + "\t48\t25\t30", + "\t47\t25\t31", + "\t46\t26\t31", + "\t45\t27\t31", + "\t44\t27\t29", + "Monaco", + "\t44\t6\t8", + "\t43\t6\t8", + "\t42\t6\t8", + "Mongolia", + "\t53\t97\t100", + "\t52\t96\t103", + "\t51\t88\t108\t112\t117", + "\t50\t86\t117", + "\t49\t86\t119", + "\t48\t86\t120", + "\t47\t86\t120", + "\t46\t88\t120", + "\t45\t89\t120", + "\t44\t89\t117", + "\t43\t91\t115", + "\t42\t94\t112", + "\t41\t95\t111", + "\t40\t102\t106", + "Montenegro", + "\t44\t17\t21", + "\t43\t17\t21", + "\t42\t17\t21", + "\t41\t17\t21", + "\t40\t18\t20", + "Montserrat", + "\t17\t-63\t-61", + "\t16\t-63\t-61", + "\t15\t-63\t-61", + "Morocco", + "\t36\t-7\t-1", + "\t35\t-7\t0", + "\t34\t-9\t0", + "\t33\t-10\t0", + "\t32\t-10\t0", + "\t31\t-10\t0", + "\t30\t-11\t-1", + "\t29\t-13\t-2", + "\t28\t-14\t-4", + "\t27\t-15\t-7", + "\t26\t-15\t-7", + "\t25\t-16\t-8", + "\t24\t-17\t-11", + "\t23\t-17\t-11", + "\t22\t-18\t-12", + "\t21\t-18\t-13", + "\t20\t-18\t-13", + "Mozambique", + "\t-9\t38\t41", + "\t-10\t33\t41", + "\t-11\t33\t41", + "\t-12\t33\t41", + "\t-13\t29\t41", + "\t-14\t29\t41", + "\t-15\t29\t41", + "\t-16\t29\t41", + "\t-17\t29\t41", + "\t-18\t31\t39", + "\t-19\t31\t37", + "\t-20\t30\t36", + "\t-21\t30\t36", + "\t-22\t30\t36", + "\t-23\t30\t36", + "\t-24\t30\t36", + "\t-25\t30\t36", + "\t-26\t30\t34", + "\t-27\t31\t33", + "Myanmar", + "\t29\t96\t99", + "\t28\t94\t99", + "\t27\t94\t99", + "\t26\t93\t99", + "\t25\t92\t99", + "\t24\t92\t100", + "\t23\t91\t100", + "\t22\t91\t102", + "\t21\t91\t102", + "\t20\t91\t102", + "\t19\t91\t101", + "\t18\t91\t100", + "\t17\t92\t99", + "\t16\t93\t99", + "\t15\t92\t99", + "\t14\t92\t100", + "\t13\t92\t94\t96\t100", + "\t12\t96\t100", + "\t11\t96\t100", + "\t10\t96\t100", + "\t9\t96\t100", + "\t8\t97\t99", + "Namibia", + "\t-15\t12\t14", + "\t-16\t10\t26", + "\t-17\t10\t26", + "\t-18\t10\t26", + "\t-19\t10\t25", + "\t-20\t11\t21", + "\t-21\t12\t21", + "\t-22\t12\t21", + "\t-23\t13\t21", + "\t-24\t13\t20", + "\t-25\t13\t20", + "\t-26\t13\t20", + "\t-27\t13\t20", + "\t-28\t14\t20", + "\t-29\t14\t20", + "Nauru", + "\t1\t165\t167", + "\t0\t165\t167", + "\t-1\t165\t167", + "Navassa Island", + "\t19\t-76\t-74", + "\t18\t-76\t-74", + "\t17\t-76\t-74", + "Nepal", + "\t31\t79\t83", + "\t30\t79\t85", + "\t29\t79\t87", + "\t28\t79\t89", + "\t27\t79\t89", + "\t26\t80\t89", + "\t25\t83\t89", + "Netherlands", + "\t54\t3\t8", + "\t53\t3\t8", + "\t52\t2\t8", + "\t51\t2\t8", + "\t50\t2\t7", + "\t49\t4\t7", + "\t19\t-64\t-62", + "\t18\t-64\t-61", + "\t17\t-64\t-61", + "\t16\t-64\t-61", + "\t13\t-69\t-67", + "\t12\t-69\t-67", + "\t11\t-69\t-67", + "Netherlands Antilles", + "\t13\t-70\t-67", + "\t12\t-70\t-67", + "\t11\t-70\t-67", + "Netherlands Antilles", + "\t19\t-64\t-62", + "\t18\t-64\t-61", + "\t17\t-64\t-61", + "\t16\t-64\t-61", + "New Caledonia", + "\t-18\t158\t160\t162\t164", + "\t-19\t158\t160\t162\t168", + "\t-20\t158\t160\t162\t169", + "\t-21\t162\t169", + "\t-22\t163\t169", + "\t-23\t165\t168", + "New Zealand", + "\t-7\t-173\t-171", + "\t-8\t-173\t-170", + "\t-9\t-173\t-170", + "\t-10\t-172\t-170", + "\t-28\t-178\t-176", + "\t-29\t-178\t-176", + "\t-30\t-178\t-176", + "\t-33\t171\t174", + "\t-34\t171\t175", + "\t-35\t171\t176", + "\t-36\t172\t179", + "\t-37\t172\t179", + "\t-38\t172\t179", + "\t-39\t171\t179", + "\t-40\t170\t179", + "\t-41\t169\t177", + "\t-42\t-177\t-175\t167\t177", + "\t-43\t-177\t-175\t166\t175", + "\t-44\t-177\t-175\t165\t174", + "\t-45\t-177\t-175\t165\t172", + "\t-46\t165\t172", + "\t-47\t165\t171", + "\t-48\t165\t169\t177\t179", + "\t-49\t164\t167\t177\t179", + "\t-50\t164\t167\t177\t179", + "\t-51\t164\t170", + "\t-52\t168\t170", + "\t-53\t168\t170", + "Nicaragua", + "\t16\t-84\t-82", + "\t15\t-87\t-81", + "\t14\t-88\t-81", + "\t13\t-88\t-81", + "\t12\t-88\t-82", + "\t11\t-88\t-82", + "\t10\t-87\t-82", + "\t9\t-85\t-82", + "Niger", + "\t24\t10\t14", + "\t23\t8\t16", + "\t22\t6\t16", + "\t21\t5\t16", + "\t20\t3\t16", + "\t19\t3\t16", + "\t18\t3\t16", + "\t17\t2\t16", + "\t16\t0\t16", + "\t15\t-1\t16", + "\t14\t-1\t15", + "\t13\t-1\t14", + "\t12\t-1\t14", + "\t11\t0\t10", + "\t10\t1\t4", + "Nigeria", + "\t14\t3\t15", + "\t13\t2\t15", + "\t12\t2\t15", + "\t11\t2\t15", + "\t10\t1\t15", + "\t9\t1\t14", + "\t8\t1\t14", + "\t7\t1\t13", + "\t6\t1\t13", + "\t5\t1\t12", + "\t4\t4\t10", + "\t3\t4\t9", + "Niue", + "\t-17\t-170\t-168", + "\t-18\t-170\t-168", + "\t-19\t-170\t-168", + "\t-20\t-170\t-168", + "Norfolk Island", + "\t-27\t166\t168", + "\t-28\t166\t168", + "\t-29\t166\t168", + "\t-30\t166\t168", + "North Korea", + "\t44\t128\t130", + "\t43\t127\t131", + "\t42\t125\t131", + "\t41\t123\t131", + "\t40\t123\t131", + "\t39\t123\t130", + "\t38\t123\t129", + "\t37\t123\t129", + "\t36\t123\t127", + "Northern Mariana Islands", + "\t21\t143\t146", + "\t20\t143\t146", + "\t19\t143\t146", + "\t18\t144\t146", + "\t17\t144\t146", + "\t16\t144\t146", + "\t15\t144\t146", + "\t14\t144\t146", + "\t13\t144\t146", + "Norway", + "\t72\t22\t29", + "\t71\t17\t32", + "\t70\t14\t32", + "\t69\t11\t32", + "\t68\t11\t31", + "\t67\t11\t26", + "\t66\t10\t18", + "\t65\t8\t17", + "\t64\t6\t15", + "\t63\t3\t15", + "\t62\t3\t13", + "\t61\t3\t13", + "\t60\t3\t13", + "\t59\t3\t13", + "\t58\t4\t13", + "\t57\t4\t12", + "\t56\t5\t8", + "Oman", + "\t27\t55\t57", + "\t26\t55\t57", + "\t25\t54\t58", + "\t24\t54\t60", + "\t23\t54\t60", + "\t22\t54\t60", + "\t21\t54\t60", + "\t20\t51\t60", + "\t19\t50\t59", + "\t18\t50\t58", + "\t17\t50\t58", + "\t16\t51\t57", + "\t15\t51\t55", + "Pakistan", + "\t38\t73\t75", + "\t37\t70\t77", + "\t36\t70\t78", + "\t35\t68\t78", + "\t34\t68\t78", + "\t33\t68\t78", + "\t32\t65\t76", + "\t31\t65\t76", + "\t30\t59\t75", + "\t29\t59\t75", + "\t28\t59\t74", + "\t27\t60\t73", + "\t26\t60\t72", + "\t25\t60\t72", + "\t24\t60\t72", + "\t23\t65\t72", + "\t22\t66\t69", + "Palau", + "\t8\t133\t135", + "\t7\t133\t135", + "\t6\t131\t135", + "\t5\t131\t135", + "\t4\t130\t133", + "\t3\t130\t132", + "\t2\t130\t132", + "\t1\t130\t132", + "Palmyra Atoll", + "\t6\t-163\t-161", + "\t5\t-163\t-161", + "\t4\t-163\t-161", + "Panama", + "\t10\t-83\t-76", + "\t9\t-84\t-76", + "\t8\t-84\t-76", + "\t7\t-84\t-76", + "\t6\t-82\t-76", + "Papua New Guinea", + "\t0\t141\t143\t145\t151", + "\t-1\t139\t143\t145\t153", + "\t-2\t139\t155", + "\t-3\t139\t155", + "\t-4\t139\t156", + "\t-5\t139\t156", + "\t-6\t139\t156", + "\t-7\t139\t156", + "\t-8\t139\t154", + "\t-9\t139\t154", + "\t-10\t139\t155", + "\t-11\t146\t155", + "\t-12\t152\t155", + "Paracel Islands", + "\t18\t110\t112", + "\t17\t110\t113", + "\t16\t110\t113", + "\t15\t110\t113", + "Paraguay", + "\t-18\t-62\t-57", + "\t-19\t-63\t-56", + "\t-20\t-63\t-56", + "\t-21\t-63\t-54", + "\t-22\t-63\t-53", + "\t-23\t-63\t-53", + "\t-24\t-62\t-53", + "\t-25\t-61\t-53", + "\t-26\t-59\t-53", + "\t-27\t-59\t-53", + "\t-28\t-59\t-54", + "Peru", + "\t1\t-76\t-73", + "\t0\t-76\t-72", + "\t-1\t-78\t-69", + "\t-2\t-81\t-69", + "\t-3\t-82\t-68", + "\t-4\t-82\t-68", + "\t-5\t-82\t-68", + "\t-6\t-82\t-71", + "\t-7\t-82\t-71", + "\t-8\t-80\t-69", + "\t-9\t-80\t-68", + "\t-10\t-79\t-68", + "\t-11\t-79\t-67", + "\t-12\t-78\t-67", + "\t-13\t-78\t-67", + "\t-14\t-77\t-67", + "\t-15\t-77\t-67", + "\t-16\t-76\t-67", + "\t-17\t-75\t-67", + "\t-18\t-73\t-68", + "\t-19\t-71\t-68", + "Philippines", + "\t22\t120\t122", + "\t21\t120\t123", + "\t20\t120\t123", + "\t19\t119\t123", + "\t18\t119\t123", + "\t17\t118\t123", + "\t16\t118\t123", + "\t15\t118\t125", + "\t14\t118\t125", + "\t13\t118\t126", + "\t12\t118\t126", + "\t11\t117\t127", + "\t10\t116\t127", + "\t9\t115\t127", + "\t8\t115\t127", + "\t7\t115\t127", + "\t6\t115\t127", + "\t5\t117\t127", + "\t4\t118\t126", + "\t3\t118\t120", + "Pitcairn Islands", + "\t-22\t-131\t-129", + "\t-23\t-131\t-127\t-125\t-123", + "\t-24\t-131\t-127\t-125\t-123", + "\t-25\t-131\t-127\t-125\t-123", + "\t-26\t-131\t-129", + "Poland", + "\t55\t13\t24", + "\t54\t13\t24", + "\t53\t13\t24", + "\t52\t13\t24", + "\t51\t13\t25", + "\t50\t13\t25", + "\t49\t13\t25", + "\t48\t16\t24", + "\t47\t21\t23", + "Portugal", + "\t43\t-9\t-7", + "\t42\t-9\t-5", + "\t41\t-9\t-5", + "\t40\t-10\t-5", + "\t39\t-10\t-5", + "\t38\t-10\t-5", + "\t37\t-10\t-5", + "\t36\t-9\t-6", + "\t35\t-8\t-6", + "Portugal: Azores", + "\t40\t-32\t-26", + "\t39\t-32\t-26", + "\t38\t-32\t-24", + "\t37\t-29\t-24", + "\t36\t-26\t-24", + "\t35\t-26\t-24", + "Portugal: Madeira", + "\t34\t-17\t-15", + "\t33\t-18\t-15", + "\t32\t-18\t-15", + "\t31\t-18\t-14", + "\t30\t-17\t-14", + "\t29\t-17\t-14", + "Puerto Rico", + "\t19\t-68\t-64", + "\t18\t-68\t-64", + "\t17\t-68\t-64", + "\t16\t-68\t-64", + "Qatar", + "\t27\t50\t52", + "\t26\t49\t52", + "\t25\t49\t52", + "\t24\t49\t52", + "\t23\t49\t52", + "Republic of the Congo", + "\t4\t15\t19", + "\t3\t12\t19", + "\t2\t12\t19", + "\t1\t12\t19", + "\t0\t11\t19", + "\t-1\t10\t18", + "\t-2\t10\t18", + "\t-3\t10\t17", + "\t-4\t10\t17", + "\t-5\t10\t16", + "\t-6\t10\t13", + "Reunion", + "\t-19\t54\t56", + "\t-20\t54\t56", + "\t-21\t54\t56", + "\t-22\t54\t56", + "Romania", + "\t49\t21\t28", + "\t48\t20\t29", + "\t47\t19\t29", + "\t46\t19\t30", + "\t45\t19\t30", + "\t44\t19\t30", + "\t43\t20\t30", + "\t42\t21\t29", + "Russia", + "\t82\t49\t51\t53\t66\t88\t97", + "\t81\t35\t37\t43\t66\t77\t81\t88\t100", + "\t80\t35\t37\t43\t66\t75\t81\t88\t105", + "\t79\t35\t37\t43\t66\t75\t81\t89\t108", + "\t78\t49\t52\t57\t60\t66\t68\t75\t78\t88\t108\t155\t157", + "\t77\t59\t70\t88\t114\t136\t143\t147\t153\t155\t157", + "\t76\t54\t70\t80\t114\t134\t153\t155\t157", + "\t75\t53\t70\t78\t117\t134\t153", + "\t74\t52\t130\t134\t151", + "\t73\t50\t61\t67\t130\t134\t151", + "\t72\t-180\t-174\t50\t59\t65\t159\t177\t180", + "\t71\t-180\t-174\t50\t61\t65\t163\t167\t172\t177\t180", + "\t70\t-180\t-174\t27\t37\t47\t180", + "\t69\t-180\t-175\t27\t180", + "\t68\t-180\t-171\t27\t180", + "\t67\t-180\t-168\t27\t180", + "\t66\t-180\t-167\t28\t180", + "\t65\t-180\t-167\t28\t180", + "\t64\t-180\t-167\t28\t180", + "\t63\t-176\t-171\t28\t180", + "\t62\t27\t180", + "\t61\t25\t180", + "\t60\t25\t175", + "\t59\t25\t173", + "\t58\t26\t167\t169\t171", + "\t57\t26\t143\t150\t165", + "\t56\t19\t23\t26\t141\t154\t167", + "\t55\t18\t23\t26\t144\t154\t169", + "\t54\t18\t23\t27\t144\t154\t169", + "\t53\t18\t23\t29\t144\t154\t163\t165\t169", + "\t52\t30\t63\t71\t144\t154\t161", + "\t51\t30\t63\t77\t121\t124\t145\t154\t159", + "\t50\t33\t62\t78\t121\t125\t145\t153\t159", + "\t49\t34\t50\t53\t62\t78\t99\t101\t120\t126\t145\t152\t157", + "\t48\t36\t49\t83\t90\t94\t98\t106\t120\t126\t145\t151\t156", + "\t47\t36\t50\t129\t145\t149\t155", + "\t46\t35\t50\t129\t144\t146\t154", + "\t45\t35\t50\t129\t153", + "\t44\t35\t49\t129\t138\t141\t151", + "\t43\t36\t49\t129\t137\t144\t148", + "\t42\t38\t49\t129\t136\t144\t147", + "\t41\t42\t49\t129\t135", + "\t40\t45\t49", + "Rwanda", + "\t0\t28\t31", + "\t-1\t27\t31", + "\t-2\t27\t31", + "\t-3\t27\t31", + "Saint Helena", + "\t-6\t-15\t-13", + "\t-7\t-15\t-13", + "\t-8\t-15\t-13", + "\t-14\t-6\t-4", + "\t-15\t-6\t-4", + "\t-16\t-6\t-4", + "\t-17\t-6\t-4", + "\t-36\t-13\t-11", + "\t-37\t-13\t-11", + "\t-38\t-13\t-11", + "\t-39\t-11\t-8", + "\t-40\t-11\t-8", + "\t-41\t-11\t-8", + "Saint Kitts and Nevis", + "\t18\t-63\t-61", + "\t17\t-63\t-61", + "\t16\t-63\t-61", + "Saint Lucia", + "\t15\t-62\t-59", + "\t14\t-62\t-59", + "\t13\t-62\t-59", + "\t12\t-62\t-59", + "Saint Pierre and Miquelon", + "\t48\t-57\t-55", + "\t47\t-57\t-55", + "\t46\t-57\t-55", + "\t45\t-57\t-55", + "Saint Vincent and the Grenadines", + "\t14\t-62\t-60", + "\t13\t-62\t-60", + "\t12\t-62\t-60", + "\t11\t-62\t-60", + "Samoa", + "\t-12\t-173\t-170", + "\t-13\t-173\t-170", + "\t-14\t-173\t-170", + "\t-15\t-172\t-170", + "San Marino", + "\t44\t11\t13", + "\t43\t11\t13", + "\t42\t11\t13", + "Sao Tome and Principe", + "\t2\t6\t8", + "\t1\t5\t8", + "\t0\t5\t8", + "\t-1\t5\t7", + "Saudi Arabia", + "\t33\t37\t40", + "\t32\t35\t43", + "\t31\t35\t44", + "\t30\t33\t48", + "\t29\t33\t49", + "\t28\t33\t50", + "\t27\t33\t51", + "\t26\t33\t51", + "\t25\t34\t52", + "\t24\t35\t53", + "\t23\t36\t56", + "\t22\t37\t56", + "\t21\t37\t56", + "\t20\t37\t56", + "\t19\t38\t56", + "\t18\t39\t55", + "\t17\t40\t52", + "\t16\t40\t48", + "\t15\t40\t48", + "Senegal", + "\t17\t-17\t-12", + "\t16\t-17\t-11", + "\t15\t-18\t-10", + "\t14\t-18\t-10", + "\t13\t-18\t-10", + "\t12\t-17\t-10", + "\t11\t-17\t-10", + "Serbia", + "\t47\t18\t21", + "\t46\t17\t22", + "\t45\t17\t23", + "\t44\t17\t23", + "\t43\t17\t23", + "\t42\t18\t23", + "\t41\t19\t23", + "Seychelles", + "\t-2\t54\t56", + "\t-3\t54\t56", + "\t-4\t52\t56", + "\t-5\t51\t56", + "\t-6\t51\t57", + "\t-7\t51\t53\t55\t57", + "\t-8\t45\t48\t51\t53\t55\t57", + "\t-9\t45\t48", + "\t-10\t45\t48", + "Sierra Leone", + "\t10\t-14\t-9", + "\t9\t-14\t-9", + "\t8\t-14\t-9", + "\t7\t-14\t-9", + "\t6\t-13\t-9", + "\t5\t-12\t-10", + "Singapore", + "\t2\t102\t105", + "\t1\t102\t105", + "\t0\t102\t105", + "Sint Maarten", + "\t19\t-64\t-62", + "\t18\t-64\t-62", + "\t17\t-64\t-62", + "Slovakia", + "\t50\t16\t23", + "\t49\t15\t23", + "\t48\t15\t23", + "\t47\t15\t23", + "\t46\t16\t19", + "Slovenia", + "\t47\t12\t17", + "\t46\t12\t17", + "\t45\t12\t17", + "\t44\t12\t16", + "Solomon Islands", + "\t-5\t154\t158", + "\t-6\t154\t161", + "\t-7\t154\t163", + "\t-8\t154\t163\t166\t168", + "\t-9\t155\t168", + "\t-10\t158\t168", + "\t-11\t158\t169", + "\t-12\t158\t161\t165\t169", + "\t-13\t167\t169", + "Somalia", + "\t12\t47\t52", + "\t11\t47\t52", + "\t10\t47\t52", + "\t9\t47\t52", + "\t8\t45\t51", + "\t7\t44\t51", + "\t6\t44\t50", + "\t5\t40\t50", + "\t4\t40\t49", + "\t3\t39\t49", + "\t2\t39\t48", + "\t1\t39\t47", + "\t0\t39\t46", + "\t-1\t39\t44", + "\t-2\t40\t42", + "Somalia", + "\t12\t42\t44\t46\t49", + "\t11\t41\t49", + "\t10\t41\t49", + "\t9\t41\t49", + "\t8\t42\t49", + "\t7\t43\t49", + "\t6\t45\t48", + "South Africa", + "\t-21\t26\t32", + "\t-22\t25\t32", + "\t-23\t18\t21\t24\t32", + "\t-24\t18\t32", + "\t-25\t18\t33", + "\t-26\t18\t33", + "\t-27\t15\t33", + "\t-28\t15\t33", + "\t-29\t15\t33", + "\t-30\t15\t32", + "\t-31\t16\t31", + "\t-32\t16\t31", + "\t-33\t16\t30", + "\t-34\t16\t28", + "\t-35\t17\t26", + "\t-45\t36\t38", + "\t-46\t36\t38", + "\t-47\t36\t38", + "South Georgia and the South Sandwich Islands", + "\t-52\t-43\t-36", + "\t-53\t-43\t-33", + "\t-54\t-43\t-33", + "\t-55\t-39\t-33\t-29\t-26", + "\t-56\t-35\t-33\t-29\t-25", + "\t-57\t-29\t-25", + "\t-58\t-28\t-25", + "\t-59\t-28\t-25", + "\t-60\t-28\t-25", + "South Korea", + "\t39\t125\t129", + "\t38\t123\t131", + "\t37\t123\t131", + "\t36\t123\t131", + "\t35\t124\t130", + "\t34\t124\t130", + "\t33\t124\t129", + "\t32\t125\t127", + "Spain", + "\t44\t-10\t0", + "\t43\t-10\t4", + "\t42\t-10\t4", + "\t41\t-10\t5", + "\t40\t-9\t5", + "\t39\t-8\t5", + "\t38\t-8\t5", + "\t37\t-8\t2", + "\t36\t-8\t1", + "\t35\t-7\t0", + "\t34\t-6\t-1", + "Spain: Canary Islands", + "\t30\t-14\t-12", + "\t29\t-19\t-12", + "\t28\t-19\t-12", + "\t27\t-19\t-12", + "\t26\t-19\t-14", + "Spratly Islands", + "\t12\t113\t115", + "\t11\t112\t115", + "\t10\t112\t116", + "\t9\t110\t116", + "\t8\t110\t116", + "\t7\t110\t116", + "\t6\t112\t114", + "\t5\t112\t114", + "Sri Lanka", + "\t10\t78\t81", + "\t9\t78\t82", + "\t8\t78\t82", + "\t7\t78\t82", + "\t6\t78\t82", + "\t5\t78\t82", + "\t4\t79\t81", + "Sudan", + "\t23\t30\t32", + "\t22\t23\t38", + "\t21\t23\t38", + "\t20\t22\t38", + "\t19\t22\t39", + "\t18\t22\t39", + "\t17\t22\t39", + "\t16\t21\t39", + "\t15\t21\t38", + "\t14\t20\t37", + "\t13\t20\t37", + "\t12\t20\t37", + "\t11\t20\t37", + "\t10\t21\t36", + "\t9\t21\t35", + "\t8\t22\t35", + "\t7\t22\t35", + "\t6\t23\t36", + "\t5\t25\t36", + "\t4\t25\t36", + "\t3\t26\t35", + "\t2\t29\t34", + "Suriname", + "\t7\t-56\t-54", + "\t6\t-58\t-52", + "\t5\t-59\t-52", + "\t4\t-59\t-52", + "\t3\t-59\t-52", + "\t2\t-59\t-52", + "\t1\t-58\t-53", + "\t0\t-57\t-54", + "Svalbard", + "\t81\t15\t28\t30\t34", + "\t80\t9\t34", + "\t79\t9\t34", + "\t78\t9\t31", + "\t77\t9\t31", + "\t76\t12\t26", + "\t75\t14\t20\t23\t26", + "\t74\t17\t20", + "\t73\t17\t20", + "Swaziland", + "\t-24\t30\t33", + "\t-25\t29\t33", + "\t-26\t29\t33", + "\t-27\t29\t33", + "\t-28\t29\t32", + "Sweden", + "\t70\t19\t21", + "\t69\t16\t24", + "\t68\t15\t24", + "\t67\t13\t25", + "\t66\t13\t25", + "\t65\t11\t25", + "\t64\t10\t25", + "\t63\t10\t22", + "\t62\t10\t21", + "\t61\t11\t19", + "\t60\t10\t20", + "\t59\t10\t20", + "\t58\t10\t20", + "\t57\t10\t20", + "\t56\t10\t20", + "\t55\t11\t19", + "\t54\t11\t15", + "Switzerland", + "\t48\t5\t10", + "\t47\t4\t11", + "\t46\t4\t11", + "\t45\t4\t11", + "\t44\t5\t10", + "Syria", + "\t38\t39\t43", + "\t37\t35\t43", + "\t36\t34\t43", + "\t35\t34\t43", + "\t34\t34\t42", + "\t33\t34\t42", + "\t32\t34\t40", + "\t31\t34\t39", + "Taiwan", + "\t26\t120\t123", + "\t25\t119\t123", + "\t24\t118\t123", + "\t23\t118\t122", + "\t22\t118\t122", + "\t21\t119\t122", + "\t20\t119\t121", + "Tajikistan", + "\t42\t69\t71", + "\t41\t67\t71", + "\t40\t66\t74", + "\t39\t66\t75", + "\t38\t66\t76", + "\t37\t66\t76", + "\t36\t66\t76", + "\t35\t66\t73", + "Tanzania", + "\t1\t29\t31", + "\t0\t29\t36", + "\t-1\t29\t38", + "\t-2\t29\t39", + "\t-3\t28\t40", + "\t-4\t28\t40", + "\t-5\t28\t40", + "\t-6\t28\t40", + "\t-7\t28\t40", + "\t-8\t29\t40", + "\t-9\t29\t41", + "\t-10\t30\t41", + "\t-11\t33\t41", + "\t-12\t33\t40", + "Thailand", + "\t21\t98\t101", + "\t20\t96\t102", + "\t19\t96\t105", + "\t18\t96\t105", + "\t17\t96\t106", + "\t16\t96\t106", + "\t15\t97\t106", + "\t14\t97\t106", + "\t13\t97\t106", + "\t12\t97\t103", + "\t11\t97\t103", + "\t10\t96\t103", + "\t9\t96\t101", + "\t8\t96\t101", + "\t7\t97\t103", + "\t6\t97\t103", + "\t5\t98\t103", + "\t4\t99\t102", + "Togo", + "\t12\t-1\t1", + "\t11\t-1\t2", + "\t10\t-1\t2", + "\t9\t-1\t2", + "\t8\t-1\t2", + "\t7\t-1\t2", + "\t6\t-1\t2", + "\t5\t-1\t2", + "Tokelau", + "\t-8\t-172\t-170", + "\t-9\t-172\t-170", + "\t-10\t-172\t-170", + "Tonga", + "\t-14\t-176\t-172", + "\t-15\t-176\t-172", + "\t-16\t-176\t-172", + "\t-17\t-175\t-172", + "\t-18\t-176\t-172", + "\t-19\t-176\t-172", + "\t-20\t-176\t-173", + "\t-21\t-177\t-173", + "\t-22\t-177\t-173", + "\t-23\t-177\t-175", + "Trinidad and Tobago", + "\t12\t-61\t-59", + "\t11\t-62\t-59", + "\t10\t-62\t-59", + "\t9\t-62\t-59", + "Tromelin Island", + "\t-14\t53\t55", + "\t-15\t53\t55", + "\t-16\t53\t55", + "Tunisia", + "\t38\t7\t12", + "\t37\t7\t12", + "\t36\t7\t12", + "\t35\t6\t12", + "\t34\t6\t12", + "\t33\t6\t12", + "\t32\t6\t12", + "\t31\t7\t12", + "\t30\t8\t11", + "\t29\t8\t11", + "Turkey", + "\t43\t25\t28\t32\t36", + "\t42\t25\t44", + "\t41\t24\t45", + "\t40\t24\t45", + "\t39\t24\t45", + "\t38\t24\t45", + "\t37\t25\t45", + "\t36\t26\t45", + "\t35\t26\t41\t43\t45", + "\t34\t34\t37", + "Turkmenistan", + "\t43\t51\t61", + "\t42\t51\t62", + "\t41\t51\t63", + "\t40\t51\t64", + "\t39\t51\t67", + "\t38\t51\t67", + "\t37\t52\t67", + "\t36\t52\t67", + "\t35\t59\t65", + "\t34\t60\t65", + "Turks and Caicos Islands", + "\t22\t-73\t-70", + "\t21\t-73\t-70", + "\t20\t-73\t-70", + "Tuvalu", + "\t-4\t175\t177", + "\t-5\t175\t178", + "\t-6\t175\t179", + "\t-7\t175\t180", + "\t-8\t176\t180", + "\t-9\t177\t180", + "\t-10\t178\t180", + "Uganda", + "\t5\t32\t35", + "\t4\t29\t35", + "\t3\t29\t35", + "\t2\t29\t36", + "\t1\t28\t36", + "\t0\t28\t36", + "\t-1\t28\t35", + "\t-2\t28\t34", + "Ukraine", + "\t53\t29\t35", + "\t52\t22\t36", + "\t51\t22\t39", + "\t50\t21\t41", + "\t49\t21\t41", + "\t48\t21\t41", + "\t47\t21\t41", + "\t46\t21\t40", + "\t45\t27\t38", + "\t44\t27\t37", + "\t43\t32\t36", + "United Arab Emirates", + "\t27\t55\t57", + "\t26\t53\t57", + "\t25\t50\t57", + "\t24\t50\t57", + "\t23\t50\t57", + "\t22\t50\t56", + "\t21\t51\t56", + "United Kingdom", + "\t61\t-3\t1", + "\t60\t-4\t1", + "\t59\t-8\t1", + "\t58\t-14\t-12\t-9\t0", + "\t57\t-14\t-12\t-9\t0", + "\t56\t-14\t-12\t-9\t0", + "\t55\t-9\t1", + "\t54\t-9\t1", + "\t53\t-9\t2", + "\t52\t-6\t2", + "\t51\t-6\t2", + "\t50\t-7\t2", + "\t49\t-7\t1", + "\t48\t-7\t-4", + "Uruguay", + "\t-29\t-58\t-54", + "\t-30\t-59\t-52", + "\t-31\t-59\t-52", + "\t-32\t-59\t-52", + "\t-33\t-59\t-52", + "\t-34\t-59\t-52", + "\t-35\t-59\t-52", + "USA", + "\t72\t-158\t-153", + "\t71\t-163\t-141", + "\t70\t-164\t-140", + "\t69\t-167\t-140", + "\t68\t-167\t-140", + "\t67\t-167\t-140", + "\t66\t-169\t-140", + "\t65\t-169\t-140", + "\t64\t-172\t-140", + "\t63\t-172\t-140", + "\t62\t-172\t-140", + "\t61\t-174\t-138", + "\t60\t-174\t-171\t-168\t-133", + "\t59\t-174\t-171\t-168\t-132", + "\t58\t-171\t-131", + "\t57\t-171\t-168\t-163\t-150\t-138\t-129", + "\t56\t-171\t-168\t-164\t-151\t-137\t-128", + "\t55\t-170\t-152\t-136\t-128", + "\t54\t-170\t-154\t-135\t-128\t171\t173", + "\t53\t-177\t-158\t-134\t-129\t171\t180", + "\t52\t-180\t-165\t171\t180", + "\t51\t-180\t-167\t171\t180", + "\t50\t-180\t-174\t-96\t-93\t176\t180", + "\t49\t-125\t-86", + "\t48\t-125\t-84\t-70\t-66", + "\t47\t-125\t-82\t-71\t-66", + "\t46\t-125\t-81\t-75\t-66", + "\t45\t-125\t-81\t-77\t-65", + "\t44\t-125\t-65", + "\t43\t-125\t-65", + "\t42\t-125\t-68", + "\t41\t-125\t-68", + "\t40\t-125\t-68", + "\t39\t-125\t-71", + "\t38\t-124\t-73", + "\t37\t-124\t-73", + "\t36\t-124\t-74", + "\t35\t-123\t-74", + "\t34\t-122\t-74", + "\t33\t-121\t-75", + "\t32\t-121\t-76", + "\t31\t-119\t-78", + "\t30\t-114\t-79", + "\t29\t-106\t-79", + "\t28\t-105\t-79", + "\t27\t-174\t-172\t-104\t-94\t-90\t-88\t-83\t-79", + "\t26\t-174\t-166\t-100\t-95\t-83\t-79", + "\t25\t-174\t-166\t-100\t-96\t-83\t-79", + "\t24\t-172\t-160\t-98\t-96\t-83\t-79", + "\t23\t-165\t-158\t-83\t-79", + "\t22\t-165\t-155", + "\t21\t-161\t-154", + "\t20\t-161\t-153", + "\t19\t-158\t-153", + "\t18\t-157\t-153", + "\t17\t-156\t-154", + "USA: Alabama", + "\t36\t-89\t-84", + "\t35\t-89\t-84", + "\t34\t-89\t-84", + "\t33\t-89\t-83", + "\t32\t-89\t-83", + "\t31\t-89\t-83", + "\t30\t-89\t-83", + "\t29\t-89\t-86", + "USA: Alaska", + "\t72\t-158\t-153", + "\t71\t-163\t-141", + "\t70\t-164\t-140", + "\t69\t-167\t-140", + "\t68\t-167\t-140", + "\t67\t-167\t-140", + "\t66\t-169\t-140", + "\t65\t-169\t-140", + "\t64\t-172\t-140", + "\t63\t-172\t-140", + "\t62\t-172\t-140", + "\t61\t-174\t-138", + "\t60\t-174\t-171\t-168\t-133", + "\t59\t-174\t-171\t-168\t-132", + "\t58\t-171\t-131", + "\t57\t-171\t-168\t-163\t-150\t-138\t-129", + "\t56\t-171\t-168\t-164\t-151\t-137\t-128", + "\t55\t-170\t-152\t-136\t-128", + "\t54\t-170\t-154\t-135\t-128\t171\t173", + "\t53\t-177\t-158\t-134\t-129\t171\t180", + "\t52\t-180\t-165\t171\t180", + "\t51\t-180\t-167\t171\t180", + "\t50\t-180\t-174\t176\t180", + "USA: Alaska, Aleutian Islands", + "\t60\t-154\t-149\t-147\t-145", + "\t59\t-162\t-159\t-154\t-149\t-147\t-145", + "\t58\t-171\t-169\t-162\t-159\t-155\t-149\t-147\t-145", + "\t57\t-171\t-168\t-162\t-150", + "\t56\t-171\t-168\t-164\t-151", + "\t55\t-170\t-152", + "\t54\t-170\t-154", + "\t53\t-177\t-158", + "\t52\t-180\t-165", + "\t51\t-180\t-167", + "\t50\t-180\t-174", + "USA: Arizona", + "\t38\t-115\t-108", + "\t37\t-115\t-108", + "\t36\t-115\t-108", + "\t35\t-115\t-108", + "\t34\t-115\t-108", + "\t33\t-115\t-108", + "\t32\t-115\t-108", + "\t31\t-115\t-108", + "\t30\t-114\t-108", + "USA: Arkansas", + "\t37\t-95\t-88", + "\t36\t-95\t-88", + "\t35\t-95\t-88", + "\t34\t-95\t-88", + "\t33\t-95\t-89", + "\t32\t-95\t-90", + "USA: California", + "\t43\t-125\t-119", + "\t42\t-125\t-119", + "\t41\t-125\t-119", + "\t40\t-125\t-119", + "\t39\t-125\t-117", + "\t38\t-124\t-116", + "\t37\t-124\t-115", + "\t36\t-124\t-113", + "\t35\t-123\t-113", + "\t34\t-122\t-113", + "\t33\t-121\t-113", + "\t32\t-121\t-113", + "\t31\t-119\t-113", + "USA: Colorado", + "\t42\t-110\t-101", + "\t41\t-110\t-101", + "\t40\t-110\t-101", + "\t39\t-110\t-101", + "\t38\t-110\t-101", + "\t37\t-110\t-101", + "\t36\t-110\t-101", + "USA: Connecticut", + "\t43\t-74\t-70", + "\t42\t-74\t-70", + "\t41\t-74\t-70", + "\t40\t-74\t-70", + "USA: Delaware", + "\t40\t-76\t-74", + "\t39\t-76\t-74", + "\t38\t-76\t-74", + "\t37\t-76\t-74", + "USA: District of Columbia", + "\t40\t-78\t-76", + "\t39\t-78\t-75", + "\t38\t-78\t-75", + "\t37\t-78\t-75", + "USA: Florida", + "\t32\t-88\t-84", + "\t31\t-88\t-80", + "\t30\t-88\t-79", + "\t29\t-88\t-79", + "\t28\t-86\t-79", + "\t27\t-83\t-79", + "\t26\t-83\t-79", + "\t25\t-83\t-79", + "\t24\t-83\t-79", + "\t23\t-83\t-79", + "USA: Georgia", + "\t36\t-86\t-82", + "\t35\t-86\t-81", + "\t34\t-86\t-80", + "\t33\t-86\t-79", + "\t32\t-86\t-79", + "\t31\t-86\t-79", + "\t30\t-86\t-79", + "\t29\t-86\t-80", + "USA: Hawaii", + "\t27\t-174\t-172", + "\t26\t-174\t-166", + "\t25\t-174\t-166", + "\t24\t-172\t-160", + "\t23\t-165\t-158", + "\t22\t-165\t-155", + "\t21\t-161\t-154", + "\t20\t-161\t-153", + "\t19\t-158\t-153", + "\t18\t-157\t-153", + "\t17\t-156\t-154", + "USA: Idaho", + "\t49\t-118\t-115", + "\t48\t-118\t-114", + "\t47\t-118\t-113", + "\t46\t-118\t-112", + "\t45\t-118\t-110", + "\t44\t-118\t-110", + "\t43\t-118\t-110", + "\t42\t-118\t-110", + "\t41\t-118\t-110", + "USA: Illinois", + "\t43\t-91\t-86", + "\t42\t-92\t-86", + "\t41\t-92\t-86", + "\t40\t-92\t-86", + "\t39\t-92\t-86", + "\t38\t-92\t-86", + "\t37\t-91\t-86", + "\t36\t-91\t-87", + "\t35\t-90\t-88", + "USA: Indiana", + "\t42\t-88\t-83", + "\t41\t-88\t-83", + "\t40\t-88\t-83", + "\t39\t-89\t-83", + "\t38\t-89\t-83", + "\t37\t-89\t-83", + "\t36\t-89\t-85", + "USA: Iowa", + "\t44\t-97\t-90", + "\t43\t-97\t-89", + "\t42\t-97\t-89", + "\t41\t-97\t-89", + "\t40\t-97\t-89", + "\t39\t-96\t-89", + "USA: Kansas", + "\t41\t-103\t-94", + "\t40\t-103\t-93", + "\t39\t-103\t-93", + "\t38\t-103\t-93", + "\t37\t-103\t-93", + "\t36\t-103\t-93", + "USA: Kentucky", + "\t40\t-85\t-83", + "\t39\t-87\t-81", + "\t38\t-90\t-80", + "\t37\t-90\t-80", + "\t36\t-90\t-80", + "\t35\t-90\t-81", + "USA: Louisiana", + "\t34\t-95\t-90", + "\t33\t-95\t-89", + "\t32\t-95\t-88", + "\t31\t-95\t-87", + "\t30\t-95\t-87", + "\t29\t-94\t-87", + "\t28\t-94\t-87", + "\t27\t-90\t-88", + "USA: Maine", + "\t48\t-70\t-66", + "\t47\t-71\t-66", + "\t46\t-72\t-66", + "\t45\t-72\t-65", + "\t44\t-72\t-65", + "\t43\t-72\t-65", + "\t42\t-71\t-68", + "USA: Maryland", + "\t40\t-80\t-74", + "\t39\t-80\t-74", + "\t38\t-80\t-74", + "\t37\t-78\t-74", + "\t36\t-77\t-74", + "USA: Massachusetts", + "\t43\t-74\t-69", + "\t42\t-74\t-68", + "\t41\t-74\t-68", + "\t40\t-72\t-68", + "USA: Michigan", + "\t49\t-90\t-86", + "\t48\t-91\t-84", + "\t47\t-91\t-82", + "\t46\t-91\t-81", + "\t45\t-91\t-81", + "\t44\t-89\t-81", + "\t43\t-88\t-81", + "\t42\t-88\t-81", + "\t41\t-88\t-81", + "\t40\t-88\t-82", + "USA: Minnesota", + "\t50\t-96\t-93", + "\t49\t-98\t-88", + "\t48\t-98\t-88", + "\t47\t-98\t-88", + "\t46\t-98\t-88", + "\t45\t-97\t-90", + "\t44\t-97\t-90", + "\t43\t-97\t-90", + "\t42\t-97\t-90", + "USA: Mississippi", + "\t36\t-91\t-88", + "\t35\t-92\t-87", + "\t34\t-92\t-87", + "\t33\t-92\t-87", + "\t32\t-92\t-87", + "\t31\t-92\t-87", + "\t30\t-92\t-87", + "\t29\t-90\t-87", + "USA: Missouri", + "\t41\t-96\t-90", + "\t40\t-96\t-89", + "\t39\t-96\t-89", + "\t38\t-96\t-88", + "\t37\t-95\t-88", + "\t36\t-95\t-88", + "\t35\t-95\t-88", + "\t34\t-91\t-88", + "USA: Montana", + "\t49\t-117\t-103", + "\t48\t-117\t-103", + "\t47\t-117\t-103", + "\t46\t-117\t-103", + "\t45\t-116\t-103", + "\t44\t-115\t-103", + "\t43\t-114\t-110", + "USA: Nebraska", + "\t44\t-105\t-97", + "\t43\t-105\t-95", + "\t42\t-105\t-94", + "\t41\t-105\t-94", + "\t40\t-105\t-94", + "\t39\t-103\t-94", + "USA: Nevada", + "\t43\t-121\t-113", + "\t42\t-121\t-113", + "\t41\t-121\t-113", + "\t40\t-121\t-113", + "\t39\t-121\t-113", + "\t38\t-121\t-113", + "\t37\t-120\t-113", + "\t36\t-119\t-113", + "\t35\t-118\t-113", + "\t34\t-116\t-113", + "USA: New Hampshire", + "\t46\t-72\t-70", + "\t45\t-73\t-69", + "\t44\t-73\t-69", + "\t43\t-73\t-69", + "\t42\t-73\t-69", + "\t41\t-73\t-69", + "USA: New Jersey", + "\t42\t-76\t-73", + "\t41\t-76\t-72", + "\t40\t-76\t-72", + "\t39\t-76\t-72", + "\t38\t-76\t-73", + "\t37\t-75\t-73", + "USA: New Mexico", + "\t38\t-110\t-102", + "\t37\t-110\t-102", + "\t36\t-110\t-102", + "\t35\t-110\t-102", + "\t34\t-110\t-102", + "\t33\t-110\t-102", + "\t32\t-110\t-102", + "\t31\t-110\t-102", + "\t30\t-110\t-105", + "USA: New York", + "\t46\t-75\t-72", + "\t45\t-77\t-72", + "\t44\t-80\t-72", + "\t43\t-80\t-72", + "\t42\t-80\t-70", + "\t41\t-80\t-70", + "\t40\t-79\t-70", + "\t39\t-75\t-71", + "USA: North Carolina", + "\t37\t-83\t-74", + "\t36\t-85\t-74", + "\t35\t-85\t-74", + "\t34\t-85\t-74", + "\t33\t-85\t-75", + "\t32\t-79\t-76", + "USA: North Dakota", + "\t49\t-105\t-96", + "\t48\t-105\t-95", + "\t47\t-105\t-95", + "\t46\t-105\t-95", + "\t45\t-105\t-95", + "\t44\t-105\t-95", + "USA: Ohio", + "\t43\t-82\t-79", + "\t42\t-85\t-79", + "\t41\t-85\t-79", + "\t40\t-85\t-79", + "\t39\t-85\t-79", + "\t38\t-85\t-79", + "\t37\t-85\t-80", + "USA: Oklahoma", + "\t38\t-104\t-93", + "\t37\t-104\t-93", + "\t36\t-104\t-93", + "\t35\t-104\t-93", + "\t34\t-101\t-93", + "\t33\t-101\t-93", + "\t32\t-98\t-93", + "USA: Oregon", + "\t47\t-124\t-115", + "\t46\t-125\t-115", + "\t45\t-125\t-115", + "\t44\t-125\t-115", + "\t43\t-125\t-115", + "\t42\t-125\t-115", + "\t41\t-125\t-116", + "USA: Pennsylvania", + "\t43\t-81\t-77", + "\t42\t-81\t-73", + "\t41\t-81\t-73", + "\t40\t-81\t-73", + "\t39\t-81\t-73", + "\t38\t-81\t-74", + "USA: Rhode Island", + "\t43\t-72\t-70", + "\t42\t-72\t-70", + "\t41\t-72\t-70", + "\t40\t-72\t-70", + "USA: South Carolina", + "\t36\t-84\t-79", + "\t35\t-84\t-77", + "\t34\t-84\t-77", + "\t33\t-84\t-77", + "\t32\t-83\t-77", + "\t31\t-82\t-78", + "USA: South Dakota", + "\t46\t-105\t-95", + "\t45\t-105\t-95", + "\t44\t-105\t-95", + "\t43\t-105\t-95", + "\t42\t-105\t-95", + "\t41\t-99\t-95", + "USA: Tennessee", + "\t37\t-90\t-80", + "\t36\t-91\t-80", + "\t35\t-91\t-80", + "\t34\t-91\t-81", + "\t33\t-90\t-87\t-85\t-83", + "USA: Texas", + "\t37\t-104\t-99", + "\t36\t-104\t-99", + "\t35\t-104\t-97", + "\t34\t-104\t-93", + "\t33\t-107\t-93", + "\t32\t-107\t-92", + "\t31\t-107\t-92", + "\t30\t-107\t-92", + "\t29\t-106\t-92", + "\t28\t-105\t-92", + "\t27\t-104\t-94", + "\t26\t-100\t-95", + "\t25\t-100\t-96", + "\t24\t-98\t-96", + "USA: Utah", + "\t43\t-115\t-110", + "\t42\t-115\t-108", + "\t41\t-115\t-108", + "\t40\t-115\t-108", + "\t39\t-115\t-108", + "\t38\t-115\t-108", + "\t37\t-115\t-108", + "\t36\t-115\t-108", + "USA: Vermont", + "\t46\t-74\t-70", + "\t45\t-74\t-70", + "\t44\t-74\t-70", + "\t43\t-74\t-70", + "\t42\t-74\t-71", + "\t41\t-74\t-71", + "USA: Virginia", + "\t40\t-79\t-76", + "\t39\t-81\t-74", + "\t38\t-83\t-74", + "\t37\t-84\t-74", + "\t36\t-84\t-74", + "\t35\t-84\t-74", + "USA: Washington", + "\t49\t-125\t-116", + "\t48\t-125\t-116", + "\t47\t-125\t-115", + "\t46\t-125\t-115", + "\t45\t-125\t-115", + "\t44\t-123\t-118", + "USA: West Virginia", + "\t41\t-81\t-79", + "\t40\t-83\t-76", + "\t39\t-83\t-76", + "\t38\t-83\t-76", + "\t37\t-83\t-77", + "\t36\t-83\t-79", + "USA: Wisconsin", + "\t48\t-92\t-88", + "\t47\t-93\t-87", + "\t46\t-93\t-85", + "\t45\t-93\t-85", + "\t44\t-93\t-85", + "\t43\t-93\t-85", + "\t42\t-92\t-86", + "\t41\t-92\t-86", + "USA: Wyoming", + "\t46\t-112\t-103", + "\t45\t-112\t-103", + "\t44\t-112\t-103", + "\t43\t-112\t-103", + "\t42\t-112\t-103", + "\t41\t-112\t-103", + "\t40\t-112\t-103", + "Uzbekistan", + "\t46\t55\t60", + "\t45\t54\t62", + "\t44\t54\t66", + "\t43\t54\t67\t69\t72", + "\t42\t54\t73", + "\t41\t54\t74", + "\t40\t54\t74", + "\t39\t60\t74", + "\t38\t61\t72", + "\t37\t63\t69", + "\t36\t65\t69", + "Vanuatu", + "\t-12\t165\t168", + "\t-13\t165\t169", + "\t-14\t165\t169", + "\t-15\t165\t169", + "\t-16\t165\t169", + "\t-17\t166\t170", + "\t-18\t167\t170", + "\t-19\t167\t170", + "\t-20\t168\t170", + "\t-21\t168\t170", + "Venezuela", + "\t13\t-71\t-66", + "\t12\t-73\t-62", + "\t11\t-73\t-60", + "\t10\t-74\t-59", + "\t9\t-74\t-58", + "\t8\t-74\t-58", + "\t7\t-73\t-58", + "\t6\t-73\t-59", + "\t5\t-72\t-59", + "\t4\t-68\t-59", + "\t3\t-68\t-59", + "\t2\t-68\t-61", + "\t1\t-68\t-62", + "\t0\t-68\t-62", + "\t-1\t-67\t-64", + "Viet Nam", + "\t24\t103\t106", + "\t23\t101\t107", + "\t22\t101\t108", + "\t21\t101\t108", + "\t20\t101\t108", + "\t19\t102\t108", + "\t18\t102\t108", + "\t17\t103\t109", + "\t16\t104\t109", + "\t15\t105\t110", + "\t14\t106\t110", + "\t13\t105\t110", + "\t12\t104\t110", + "\t11\t102\t110", + "\t10\t102\t110", + "\t9\t102\t109", + "\t8\t103\t107", + "\t7\t103\t107", + "Virgin Islands", + "\t19\t-66\t-63", + "\t18\t-66\t-63", + "\t17\t-66\t-63", + "\t16\t-65\t-63", + "Wake Island", + "\t20\t165\t167", + "\t19\t165\t167", + "\t18\t165\t167", + "Wallis and Futuna", + "\t-12\t-177\t-175", + "\t-13\t-179\t-175", + "\t-14\t-179\t-175", + "\t-15\t-179\t-176", + "West Bank", + "\t33\t33\t36", + "\t32\t33\t36", + "\t31\t33\t36", + "\t30\t33\t36", + "Western Sahara", + "\t28\t-14\t-7", + "\t27\t-15\t-7", + "\t26\t-15\t-7", + "\t25\t-16\t-7", + "\t24\t-17\t-7", + "\t23\t-17\t-11", + "\t22\t-18\t-11", + "\t21\t-18\t-12", + "\t20\t-18\t-12", + "\t19\t-18\t-16", + "Yemen", + "\t19\t47\t53", + "\t18\t42\t53", + "\t17\t41\t54", + "\t16\t41\t54", + "\t15\t41\t54", + "\t14\t41\t53", + "\t13\t41\t55", + "\t12\t41\t49\t51\t55", + "\t11\t42\t46\t51\t55", + "Zambia", + "\t-7\t27\t32", + "\t-8\t27\t34", + "\t-9\t22\t25\t27\t34", + "\t-10\t22\t34", + "\t-11\t22\t34", + "\t-12\t20\t34", + "\t-13\t20\t34", + "\t-14\t20\t34", + "\t-15\t20\t34", + "\t-16\t20\t31", + "\t-17\t20\t29", + "\t-18\t21\t28", + "\t-19\t24\t27", + "Zimbabwe", + "\t-14\t27\t32", + "\t-15\t26\t33", + "\t-16\t24\t34", + "\t-17\t24\t34", + "\t-18\t24\t34", + "\t-19\t24\t34", + "\t-20\t24\t34", + "\t-21\t25\t34", + "\t-22\t26\t33", + "\t-23\t28\t32", + NULL +}; + +extern CharPtr water_onedegree []; +CharPtr water_onedegree [] = { + "1", + "Adriatic Sea", + "\t46\t11\t15", + "\t45\t11\t16", + "\t44\t11\t18", + "\t43\t11\t20", + "\t42\t11\t20", + "\t41\t12\t20", + "\t40\t14\t20", + "\t39\t16\t20", + "\t38\t17\t20", + "Aegean Sea", + "\t41\t21\t27", + "\t40\t21\t27", + "\t39\t21\t28", + "\t38\t21\t29", + "\t37\t21\t29", + "\t36\t23\t29", + "\t35\t23\t29", + "Albemarle Sound", + "\t37\t-77\t-74", + "\t36\t-77\t-74", + "\t35\t-77\t-74", + "\t34\t-77\t-74", + "Alboran Sea", + "\t37\t-6\t-1", + "\t36\t-6\t0", + "\t35\t-6\t0", + "\t34\t-6\t0", + "Amundsen Gulf", + "\t72\t-126\t-117", + "\t71\t-128\t-116", + "\t70\t-128\t-116", + "\t69\t-128\t-116", + "\t68\t-127\t-117", + "Amundsen Sea", + "\t-71\t-108\t-101", + "\t-72\t-115\t-97", + "\t-73\t-115\t-97", + "\t-74\t-115\t-97", + "\t-75\t-115\t-97", + "\t-76\t-112\t-97", + "Andaman Sea", + "\t17\t93\t96", + "\t16\t92\t97", + "\t15\t91\t99", + "\t14\t91\t99", + "\t13\t91\t99", + "\t12\t91\t99", + "\t11\t91\t99", + "\t10\t91\t99", + "\t9\t91\t99", + "\t8\t91\t99", + "\t7\t91\t99", + "\t6\t92\t99", + "\t5\t92\t98", + "\t4\t94\t96", + "Arabian Sea", + "\t26\t60\t67", + "\t25\t59\t68", + "\t24\t59\t69", + "\t23\t58\t70", + "\t22\t57\t71", + "\t21\t57\t72", + "\t20\t56\t74", + "\t19\t55\t74", + "\t18\t53\t74", + "\t17\t51\t74", + "\t16\t50\t74", + "\t15\t50\t75", + "\t14\t50\t75", + "\t13\t50\t75", + "\t12\t50\t74", + "\t11\t50\t72", + "\t10\t50\t72", + "\t9\t50\t72", + "\t8\t52\t72", + "\t7\t54\t73", + "\t6\t56\t73", + "\t5\t58\t73", + "\t4\t60\t73", + "\t3\t63\t73", + "\t2\t65\t73", + "\t1\t67\t74", + "\t0\t69\t74", + "\t-1\t71\t74", + "Arafura Sea", + "\t-2\t132\t135", + "\t-3\t132\t138", + "\t-4\t131\t139", + "\t-5\t131\t139", + "\t-6\t130\t141", + "\t-7\t129\t141", + "\t-8\t129\t142", + "\t-9\t129\t143", + "\t-10\t129\t143", + "\t-11\t130\t143", + "\t-12\t130\t143", + "\t-13\t133\t142", + "Aral Sea", + "\t47\t58\t62", + "\t46\t57\t62", + "\t45\t57\t62", + "\t44\t57\t61", + "\t43\t57\t61", + "Arctic Ocean", + "\t90\t-180\t180", + "\t89\t-180\t180", + "\t88\t-180\t180", + "\t87\t-180\t180", + "\t86\t-180\t180", + "\t85\t-180\t180", + "\t84\t-180\t180", + "\t83\t-180\t180", + "\t82\t-180\t180", + "\t81\t-180\t-69\t-18\t180", + "\t80\t-180\t-75\t-4\t180", + "\t79\t-180\t-75\t10\t50\t100\t180", + "\t78\t-180\t-99\t-88\t-79\t108\t180", + "\t77\t-180\t-107\t117\t180", + "\t76\t-180\t-112\t125\t180", + "\t75\t-180\t-119\t133\t148\t157\t180", + "\t74\t-180\t-125\t161\t180", + "\t73\t-180\t-132\t165\t180", + "\t72\t-180\t-138\t169\t180", + "\t71\t-180\t-145\t173\t180", + "\t70\t-180\t-152\t177\t180", + "Atlantic Ocean", + "\t69\t-33\t-29", + "\t68\t-34\t-27", + "\t67\t-39\t-26", + "\t66\t-42\t-24", + "\t65\t-42\t-20\t-17\t-11", + "\t64\t-43\t-8", + "\t63\t-43\t-6", + "\t62\t-43\t-3", + "\t61\t-44\t0", + "\t60\t-44\t0", + "\t59\t-45\t0", + "\t58\t-45\t0", + "\t57\t-46\t-1", + "\t56\t-47\t-5", + "\t55\t-48\t-6", + "\t54\t-48\t-7", + "\t53\t-49\t-5", + "\t52\t-50\t-5", + "\t51\t-51\t-4", + "\t50\t-51\t-4", + "\t49\t-52\t-4", + "\t48\t-56\t-4", + "\t47\t-60\t-4", + "\t46\t-62\t-5", + "\t45\t-65\t-5", + "\t44\t-66\t-6", + "\t43\t-68\t-6", + "\t42\t-74\t-6", + "\t41\t-75\t-7", + "\t40\t-75\t-7", + "\t39\t-76\t-7", + "\t38\t-76\t-5", + "\t37\t-77\t-4", + "\t36\t-77\t-4", + "\t35\t-78\t-4", + "\t34\t-80\t-4", + "\t33\t-81\t-5", + "\t32\t-82\t-5", + "\t31\t-82\t-7", + "\t30\t-82\t-8", + "\t29\t-82\t-8", + "\t28\t-82\t-8", + "\t27\t-81\t-9", + "\t26\t-81\t-11", + "\t25\t-81\t-12", + "\t24\t-81\t-13", + "\t23\t-81\t-13", + "\t22\t-81\t-14", + "\t21\t-81\t-15", + "\t20\t-78\t-15", + "\t19\t-76\t-15", + "\t18\t-74\t-15", + "\t17\t-69\t-15", + "\t16\t-62\t-15", + "\t15\t-61\t-15", + "\t14\t-61\t-14", + "\t13\t-60\t-14", + "\t12\t-61\t-14", + "\t11\t-61\t-13", + "\t10\t-62\t-12", + "\t9\t-62\t-11", + "\t8\t-62\t-10", + "\t7\t-61\t-9", + "\t6\t-59\t-8", + "\t5\t-59\t-5", + "\t4\t-58\t-2", + "\t3\t-53\t2", + "\t2\t-52\t5", + "\t1\t-51\t7", + "\t0\t-51\t7", + "\t-1\t-51\t7", + "Atlantic Ocean", + "\t1\t-50\t9", + "\t0\t-50\t10", + "\t-1\t-50\t11", + "\t-2\t-47\t12", + "\t-3\t-44\t13", + "\t-4\t-40\t14", + "\t-5\t-39\t14", + "\t-6\t-37\t14", + "\t-7\t-36\t14", + "\t-8\t-36\t14", + "\t-9\t-37\t14", + "\t-10\t-38\t14", + "\t-11\t-39\t14", + "\t-12\t-40\t14", + "\t-13\t-40\t14", + "\t-14\t-40\t13", + "\t-15\t-40\t13", + "\t-16\t-40\t13", + "\t-17\t-40\t13", + "\t-18\t-41\t13", + "\t-19\t-41\t14", + "\t-20\t-42\t14", + "\t-21\t-45\t15", + "\t-22\t-46\t15", + "\t-23\t-48\t15", + "\t-24\t-49\t15", + "\t-25\t-49\t16", + "\t-26\t-49\t16", + "\t-27\t-50\t17", + "\t-28\t-51\t17", + "\t-29\t-51\t18", + "\t-30\t-52\t19", + "\t-31\t-53\t19", + "\t-32\t-54\t19", + "\t-33\t-55\t20", + "\t-34\t-56\t20", + "\t-35\t-57\t20", + "\t-36\t-58\t20", + "\t-37\t-62\t20", + "\t-38\t-63\t20", + "\t-39\t-63\t20", + "\t-40\t-64\t20", + "\t-41\t-66\t20", + "\t-42\t-66\t20", + "\t-43\t-66\t20", + "\t-44\t-66\t20", + "\t-45\t-66\t20", + "\t-46\t-67\t20", + "\t-47\t-68\t20", + "\t-48\t-68\t20", + "\t-49\t-68\t20", + "\t-50\t-69\t20", + "\t-51\t-69\t20", + "\t-52\t-69\t20", + "\t-53\t-70\t20", + "\t-54\t-70\t20", + "\t-55\t-70\t20", + "\t-56\t-69\t20", + "\t-57\t-69\t20", + "\t-58\t-69\t20", + "\t-59\t-69\t20", + "\t-60\t-69\t20", + "\t-61\t-69\t20", + "Bab el Mandeb", + "\t14\t42\t44", + "\t13\t42\t44", + "\t12\t42\t44", + "\t11\t42\t44", + "Baffin Bay", + "\t79\t-77\t-71", + "\t78\t-83\t-70", + "\t77\t-83\t-66", + "\t76\t-83\t-61", + "\t75\t-81\t-55", + "\t74\t-81\t-54", + "\t73\t-81\t-53", + "\t72\t-79\t-53", + "\t71\t-78\t-53", + "\t70\t-76\t-53", + "\t69\t-73\t-53", + "\t68\t-70\t-53", + "Bahia Blanca", + "\t-37\t-63\t-60", + "\t-38\t-63\t-60", + "\t-39\t-63\t-60", + "\t-40\t-63\t-60", + "Bahia de Campeche", + "\t22\t-94\t-89", + "\t21\t-98\t-89", + "\t20\t-98\t-89", + "\t19\t-98\t-89", + "\t18\t-97\t-89", + "\t17\t-96\t-90", + "Bahia Grande", + "\t-48\t-69\t-66", + "\t-49\t-70\t-66", + "\t-50\t-70\t-66", + "\t-51\t-70\t-66", + "\t-52\t-70\t-67", + "\t-53\t-69\t-67", + "Bahia Inutil", + "\t-52\t-71\t-68", + "\t-53\t-71\t-68", + "\t-54\t-71\t-68", + "\t-55\t-71\t-68", + "Baia de Maputo", + "\t-24\t31\t33", + "\t-25\t31\t33", + "\t-26\t31\t33", + "\t-27\t31\t33", + "Baia de Marajo", + "\t1\t-49\t-47", + "\t0\t-50\t-47", + "\t-1\t-50\t-47", + "\t-2\t-50\t-47", + "\t-3\t-50\t-48", + "Baia de Sao Marcos", + "\t0\t-45\t-43", + "\t-1\t-45\t-42", + "\t-2\t-45\t-42", + "\t-3\t-45\t-42", + "\t-4\t-45\t-43", + "Baird Inlet", + "\t61\t-165\t-162", + "\t60\t-165\t-162", + "\t59\t-165\t-162", + "Balearic Sea", + "\t42\t0\t4", + "\t41\t-1\t5", + "\t40\t-1\t5", + "\t39\t-1\t5", + "\t38\t-1\t5", + "\t37\t-1\t3", + "Bali Sea", + "\t-5\t114\t117", + "\t-6\t113\t118", + "\t-7\t113\t118", + "\t-8\t113\t118", + "\t-9\t113\t118", + "\t-10\t115\t117", + "Baltic Sea", + "\t60\t16\t24", + "\t59\t15\t24", + "\t58\t15\t24", + "\t57\t13\t23", + "\t56\t11\t23", + "\t55\t11\t22", + "\t54\t11\t22", + "\t53\t11\t21", + "\t52\t13\t15", + "Banda Sea", + "\t1\t121\t124", + "\t0\t120\t126", + "\t-1\t119\t129", + "\t-2\t119\t131", + "\t-3\t119\t133", + "\t-4\t119\t134", + "\t-5\t119\t134", + "\t-6\t119\t134", + "\t-7\t119\t133", + "\t-8\t119\t132", + "\t-9\t121\t132", + "Barents Sea", + "\t82\t49\t66", + "\t81\t16\t19\t26\t66", + "\t80\t16\t67", + "\t79\t16\t67", + "\t78\t16\t68", + "\t77\t16\t69", + "\t76\t16\t69", + "\t75\t16\t69", + "\t74\t18\t61", + "\t73\t20\t57", + "\t72\t22\t55", + "\t71\t24\t59", + "\t70\t26\t61", + "\t69\t26\t61", + "\t68\t28\t61", + "\t67\t36\t61", + "\t66\t43\t50", + "\t65\t44\t48", + "Bass Strait", + "\t-36\t143\t150", + "\t-37\t142\t150", + "\t-38\t142\t150", + "\t-39\t142\t149", + "\t-40\t142\t149", + "\t-41\t142\t149", + "\t-42\t144\t148", + "Bathurst Inlet", + "\t68\t-109\t-106", + "\t67\t-109\t-106", + "\t66\t-109\t-106", + "\t65\t-109\t-106", + "Bay of Bengal", + "\t24\t89\t91", + "\t23\t86\t92", + "\t22\t85\t93", + "\t21\t85\t94", + "\t20\t83\t95", + "\t19\t82\t95", + "\t18\t81\t95", + "\t17\t80\t95", + "\t16\t79\t95", + "\t15\t79\t95", + "\t14\t79\t94", + "\t13\t78\t93", + "\t12\t78\t93", + "\t11\t78\t93", + "\t10\t78\t93", + "\t9\t78\t93", + "\t8\t79\t94", + "\t7\t79\t95", + "\t6\t80\t96", + "\t5\t84\t96", + "\t4\t91\t96", + "Bay of Biscay", + "\t49\t-6\t-3", + "\t48\t-7\t0", + "\t47\t-7\t0", + "\t46\t-8\t1", + "\t45\t-8\t1", + "\t44\t-9\t1", + "\t43\t-9\t0", + "\t42\t-9\t0", + "Bay of Fundy", + "\t46\t-68\t-62", + "\t45\t-68\t-62", + "\t44\t-68\t-62", + "\t43\t-68\t-64", + "Bay of Plenty", + "\t-35\t174\t177", + "\t-36\t174\t179", + "\t-37\t174\t179", + "\t-38\t174\t179", + "Beaufort Sea", + "\t77\t-126\t-121", + "\t76\t-133\t-121", + "\t75\t-139\t-121", + "\t74\t-146\t-122", + "\t73\t-153\t-122", + "\t72\t-157\t-122", + "\t71\t-157\t-123", + "\t70\t-157\t-124", + "\t69\t-157\t-125", + "\t68\t-145\t-127", + "Bellingshausen Sea", + "\t-67\t-74\t-70", + "\t-68\t-80\t-70", + "\t-69\t-86\t-68", + "\t-70\t-92\t-68", + "\t-71\t-96\t-68", + "\t-72\t-96\t-68", + "\t-73\t-96\t-73", + "\t-74\t-96\t-73", + "Bering Sea", + "\t67\t-171\t-168", + "\t66\t-173\t-165", + "\t65\t-175\t-163", + "\t64\t-177\t-163", + "\t63\t-179\t-163\t174\t180", + "\t62\t-180\t-163\t171\t180", + "\t61\t-180\t-160\t165\t180", + "\t60\t-180\t-160\t165\t180", + "\t59\t-180\t-160\t163\t180", + "\t58\t-180\t-160\t161\t180", + "\t57\t-180\t-160\t161\t180", + "\t56\t-180\t-160\t161\t180", + "\t55\t-180\t-160\t161\t180", + "\t54\t-180\t-160\t163\t180", + "\t53\t-180\t-161\t165\t180", + "\t52\t-180\t-163\t167\t180", + "\t51\t-180\t-166\t169\t180", + "\t50\t-180\t-171\t171\t180", + "\t49\t178\t180", + "Bering Strait", + "\t67\t-171\t-168", + "\t66\t-171\t-166", + "\t65\t-171\t-166", + "\t64\t-171\t-166", + "Bight of Benin", + "\t7\t0\t5", + "\t6\t-1\t6", + "\t5\t-1\t6", + "\t4\t-1\t6", + "\t3\t2\t6", + "Bight of Biafra", + "\t5\t5\t10", + "\t4\t5\t10", + "\t3\t5\t10", + "\t2\t7\t10", + "\t1\t8\t10", + "Bismarck Sea", + "\t0\t141\t148", + "\t-1\t140\t152", + "\t-2\t140\t153", + "\t-3\t140\t153", + "\t-4\t141\t153", + "\t-5\t143\t153", + "\t-6\t144\t152", + "Black Sea", + "\t48\t30\t32", + "\t47\t29\t34", + "\t46\t28\t37", + "\t45\t27\t39", + "\t44\t26\t41", + "\t43\t26\t42", + "\t42\t26\t42", + "\t41\t26\t42", + "\t40\t26\t42", + "\t39\t37\t41", + "Bo Hai", + "\t41\t119\t123", + "\t40\t116\t123", + "\t39\t116\t123", + "\t38\t116\t122", + "\t37\t116\t122", + "\t36\t117\t121", + "Boca Grande", + "\t10\t-62\t-59", + "\t9\t-62\t-59", + "\t8\t-62\t-59", + "\t7\t-62\t-59", + "Bohol Sea", + "\t11\t122\t126", + "\t10\t122\t126", + "\t9\t122\t126", + "\t8\t122\t126", + "\t7\t122\t126", + "Boknafjorden", + "\t60\t4\t7", + "\t59\t4\t7", + "\t58\t4\t7", + "\t57\t4\t7", + "Bosporus", + "\t42\t27\t30", + "\t41\t27\t30", + "\t40\t27\t30", + "Bransfield Strait", + "\t-60\t-58\t-53", + "\t-61\t-63\t-53", + "\t-62\t-63\t-53", + "\t-63\t-64\t-53", + "\t-64\t-64\t-54", + "\t-65\t-64\t-59", + "\t-66\t-64\t-62", + "Bristol Bay", + "\t60\t-161\t-155", + "\t59\t-163\t-155", + "\t58\t-163\t-155", + "\t57\t-163\t-155", + "\t56\t-163\t-156", + "\t55\t-162\t-157", + "\t54\t-162\t-159", + "Bristol Channel", + "\t52\t-7\t-1", + "\t51\t-7\t-1", + "\t50\t-7\t-1", + "\t49\t-6\t-3", + "Caribbean Sea", + "\t23\t-84\t-79", + "\t22\t-88\t-77", + "\t21\t-88\t-73", + "\t20\t-88\t-71", + "\t19\t-89\t-60", + "\t18\t-89\t-60", + "\t17\t-89\t-59", + "\t16\t-89\t-59", + "\t15\t-88\t-58", + "\t14\t-87\t-58", + "\t13\t-84\t-58", + "\t12\t-84\t-58", + "\t11\t-84\t-58", + "\t10\t-84\t-59", + "\t9\t-84\t-59", + "\t8\t-84\t-74\t-63\t-59", + "\t7\t-83\t-75", + "Caspian Sea", + "\t48\t49\t52", + "\t47\t47\t54", + "\t46\t46\t54", + "\t45\t45\t54", + "\t44\t45\t54", + "\t43\t45\t53", + "\t42\t46\t53", + "\t41\t46\t53", + "\t40\t47\t54", + "\t39\t47\t54", + "\t38\t47\t54", + "\t37\t47\t55", + "\t36\t47\t55", + "\t35\t49\t55", + "Celebes Sea", + "\t8\t121\t125", + "\t7\t120\t126", + "\t6\t117\t126", + "\t5\t116\t126", + "\t4\t116\t126", + "\t3\t116\t126", + "\t2\t116\t126", + "\t1\t116\t126", + "\t0\t116\t126", + "\t-1\t117\t124", + "Ceram Sea", + "\t0\t124\t133", + "\t-1\t124\t134", + "\t-2\t124\t134", + "\t-3\t124\t134", + "\t-4\t124\t126\t129\t134", + "\t-5\t130\t134", + "\t-6\t132\t134", + "Chaun Bay", + "\t70\t167\t171", + "\t69\t167\t171", + "\t68\t167\t171", + "\t67\t168\t171", + "Chesapeake Bay", + "\t40\t-77\t-74", + "\t39\t-78\t-74", + "\t38\t-78\t-74", + "\t37\t-78\t-74", + "\t36\t-78\t-74", + "\t35\t-77\t-75", + "Chukchi Sea", + "\t72\t-179\t-155\t177\t179", + "\t71\t-180\t-155\t175\t180", + "\t70\t-180\t-155\t174\t180", + "\t69\t-180\t-156\t174\t180", + "\t68\t-180\t-161\t174\t180", + "\t67\t-180\t-162\t179\t180", + "\t66\t-176\t-162", + "\t65\t-175\t-163", + "\t64\t-169\t-165", + "Cook Inlet", + "\t62\t-152\t-148", + "\t61\t-154\t-148", + "\t60\t-155\t-148", + "\t59\t-155\t-148", + "\t58\t-155\t-150", + "\t57\t-154\t-151", + "Cook Strait", + "\t-39\t173\t176", + "\t-40\t173\t176", + "\t-41\t173\t176", + "\t-42\t173\t176", + "Coral Sea", + "\t-7\t142\t147", + "\t-8\t141\t148\t164\t168", + "\t-9\t141\t153\t161\t168", + "\t-10\t141\t168", + "\t-11\t141\t168", + "\t-12\t141\t168", + "\t-13\t142\t169", + "\t-14\t142\t169", + "\t-15\t142\t169", + "\t-16\t144\t169", + "\t-17\t144\t170", + "\t-18\t144\t170", + "\t-19\t145\t170", + "\t-20\t145\t170", + "\t-21\t147\t170", + "\t-22\t148\t169", + "\t-23\t148\t168", + "\t-24\t149\t167", + "\t-25\t150\t166", + "\t-26\t151\t165", + "\t-27\t152\t164", + "\t-28\t152\t162", + "\t-29\t152\t161", + "\t-30\t152\t160", + "Cordova Bay", + "\t56\t-134\t-131", + "\t55\t-134\t-131", + "\t54\t-134\t-131", + "\t53\t-133\t-131", + "Cumberland Sound", + "\t67\t-69\t-63", + "\t66\t-69\t-62", + "\t65\t-69\t-62", + "\t64\t-69\t-62", + "\t63\t-67\t-62", + "\t62\t-65\t-63", + "Dardanelles", + "\t41\t25\t27", + "\t40\t25\t27", + "\t39\t25\t27", + "\t38\t25\t27", + "Darnley Bay", + "\t70\t-125\t-122", + "\t69\t-125\t-122", + "\t68\t-125\t-122", + "Davao Gulf", + "\t8\t124\t126", + "\t7\t124\t127", + "\t6\t124\t127", + "\t5\t124\t127", + "\t4\t124\t126", + "Davis Sea", + "\t-62\t90\t104", + "\t-63\t86\t111", + "\t-64\t84\t113", + "\t-65\t83\t113", + "\t-66\t82\t113", + "\t-67\t82\t111", + "\t-68\t82\t87", + "Davis Strait", + "\t70\t-70\t-52", + "\t69\t-70\t-50", + "\t68\t-70\t-49", + "\t67\t-70\t-49", + "\t66\t-67\t-49", + "\t65\t-64\t-49", + "\t64\t-66\t-48", + "\t63\t-66\t-47", + "\t62\t-66\t-44", + "\t61\t-66\t-43", + "\t60\t-65\t-43", + "\t59\t-65\t-43", + "Delaware Bay", + "\t40\t-76\t-73", + "\t39\t-76\t-73", + "\t38\t-76\t-73", + "\t37\t-76\t-73", + "Denmark Strait", + "\t71\t-23\t-21", + "\t70\t-26\t-19", + "\t69\t-31\t-18", + "\t68\t-31\t-16", + "\t67\t-31\t-15", + "\t66\t-30\t-15", + "\t65\t-28\t-15", + "\t64\t-27\t-16", + "\t63\t-25\t-22", + "Disko Bay", + "\t71\t-55\t-49", + "\t70\t-55\t-49", + "\t69\t-55\t-49", + "\t68\t-54\t-49", + "\t67\t-54\t-49", + "Dixon Entrance", + "\t55\t-134\t-130", + "\t54\t-134\t-130", + "\t53\t-134\t-130", + "Dmitriy Laptev Strait", + "\t74\t138\t144", + "\t73\t138\t144", + "\t72\t138\t144", + "\t71\t139\t144", + "Drake Passage", + "\t-53\t-67\t-62", + "\t-54\t-69\t-61", + "\t-55\t-69\t-60", + "\t-56\t-69\t-58", + "\t-57\t-69\t-57", + "\t-58\t-69\t-56", + "\t-59\t-69\t-55", + "\t-60\t-69\t-54", + "\t-61\t-69\t-54", + "\t-62\t-69\t-54", + "\t-63\t-69\t-57", + "\t-64\t-69\t-61", + "\t-65\t-69\t-62", + "\t-66\t-69\t-64", + "\t-67\t-69\t-65", + "East China Sea", + "\t34\t124\t127\t129\t131", + "\t33\t122\t131", + "\t32\t120\t131", + "\t31\t120\t131", + "\t30\t120\t131", + "\t29\t119\t131", + "\t28\t119\t131", + "\t27\t118\t130", + "\t26\t118\t129", + "\t25\t118\t129", + "\t24\t118\t128", + "\t23\t120\t127", + "\t22\t122\t125", + "East Korea Bay", + "\t41\t127\t129", + "\t40\t126\t129", + "\t39\t126\t129", + "\t38\t126\t129", + "\t37\t126\t129", + "East Siberian Sea", + "\t78\t147\t158", + "\t77\t137\t162", + "\t76\t137\t166", + "\t75\t137\t170", + "\t74\t138\t174", + "\t73\t138\t178", + "\t72\t138\t180", + "\t71\t142\t180", + "\t70\t147\t180", + "\t69\t150\t155\t157\t178", + "\t68\t158\t176", + "\t67\t159\t162", + "Eclipse Sound", + "\t74\t-81\t-79", + "\t73\t-82\t-76", + "\t72\t-82\t-76", + "\t71\t-82\t-76", + "\t70\t-81\t-77", + "English Channel", + "\t52\t0\t2", + "\t51\t-6\t2", + "\t50\t-7\t2", + "\t49\t-7\t2", + "\t48\t-7\t2", + "\t47\t-6\t0", + "Eskimo Lakes", + "\t70\t-134\t-130", + "\t69\t-134\t-130", + "\t68\t-134\t-130", + "\t67\t-134\t-131", + "Estrecho de Magellanes", + "\t-51\t-75\t-67", + "\t-52\t-75\t-67", + "\t-53\t-75\t-67", + "\t-54\t-74\t-69", + "\t-55\t-72\t-69", + "Finger Lakes", + "\t43\t-78\t-75", + "\t42\t-78\t-75", + "\t41\t-78\t-75", + "Flores Sea", + "\t-4\t118\t121", + "\t-5\t117\t121", + "\t-6\t116\t122", + "\t-7\t116\t123", + "\t-8\t116\t123", + "\t-9\t116\t123", + "Foxe Basin", + "\t71\t-80\t-76", + "\t70\t-83\t-73", + "\t69\t-83\t-72", + "\t68\t-85\t-71", + "\t67\t-87\t-71", + "\t66\t-87\t-71", + "\t65\t-87\t-71", + "\t64\t-86\t-72", + "\t63\t-84\t-74", + "\t62\t-81\t-78", + "Franklin Bay", + "\t70\t-126\t-124", + "\t69\t-126\t-124", + "\t68\t-126\t-124", + "Frobisher Bay", + "\t64\t-69\t-64", + "\t63\t-69\t-64", + "\t62\t-69\t-64", + "\t61\t-68\t-64", + "Fury and Hecla Strait", + "\t71\t-86\t-82", + "\t70\t-86\t-81", + "\t69\t-86\t-81", + "\t68\t-86\t-81", + "Garabogaz Bay", + "\t43\t52\t54", + "\t42\t51\t55", + "\t41\t51\t55", + "\t40\t51\t55", + "\t39\t51\t55", + "Geographe Bay", + "\t-29\t114\t116", + "\t-30\t114\t116", + "\t-31\t114\t116", + "\t-32\t114\t116", + "\t-33\t114\t116", + "\t-34\t114\t116", + "George VI Sound", + "\t-68\t-70\t-67", + "\t-69\t-70\t-66", + "\t-70\t-74\t-65", + "\t-71\t-75\t-65", + "\t-72\t-75\t-65", + "\t-73\t-75\t-65", + "\t-74\t-75\t-67", + "Goldsmith Channel", + "\t74\t-108\t-104", + "\t73\t-108\t-104", + "\t72\t-108\t-104", + "\t71\t-106\t-104", + "Golfe du Lion", + "\t44\t2\t6", + "\t43\t2\t6", + "\t42\t2\t6", + "\t41\t2\t5", + "\t40\t2\t4", + "Golfo Corcovado", + "\t-40\t-74\t-71", + "\t-41\t-74\t-71", + "\t-42\t-74\t-71", + "\t-43\t-75\t-71", + "\t-44\t-75\t-71", + "\t-45\t-75\t-71", + "\t-46\t-74\t-71", + "Golfo de California", + "\t32\t-115\t-112", + "\t31\t-115\t-111", + "\t30\t-115\t-111", + "\t29\t-115\t-110", + "\t28\t-115\t-108", + "\t27\t-114\t-108", + "\t26\t-113\t-107", + "\t25\t-113\t-106", + "\t24\t-112\t-105", + "\t23\t-111\t-105", + "\t22\t-110\t-105", + "Golfo de Guayaquil", + "\t-1\t-81\t-78", + "\t-2\t-81\t-78", + "\t-3\t-81\t-78", + "\t-4\t-81\t-78", + "Golfo de Panama", + "\t10\t-80\t-78", + "\t9\t-81\t-76", + "\t8\t-81\t-76", + "\t7\t-81\t-76", + "\t6\t-81\t-77", + "Golfo de Penas", + "\t-45\t-76\t-73", + "\t-46\t-76\t-73", + "\t-47\t-76\t-73", + "\t-48\t-76\t-73", + "Golfo de Tehuantepec", + "\t17\t-96\t-92", + "\t16\t-97\t-92", + "\t15\t-97\t-92", + "\t14\t-97\t-92", + "Golfo de Uraba", + "\t9\t-78\t-75", + "\t8\t-78\t-75", + "\t7\t-78\t-75", + "\t6\t-77\t-75", + "Golfo San Jorge", + "\t-43\t-67\t-65", + "\t-44\t-68\t-64", + "\t-45\t-68\t-64", + "\t-46\t-68\t-64", + "\t-47\t-68\t-64", + "\t-48\t-67\t-64", + "Golfo San Matias", + "\t-39\t-66\t-63", + "\t-40\t-66\t-62", + "\t-41\t-66\t-62", + "\t-42\t-66\t-62", + "\t-43\t-65\t-62", + "Great Australian Bight", + "\t-30\t127\t133", + "\t-31\t123\t135", + "\t-32\t118\t136", + "\t-33\t117\t136", + "\t-34\t117\t140", + "\t-35\t117\t140", + "\t-36\t117\t141", + "\t-37\t119\t144", + "\t-38\t123\t144", + "\t-39\t126\t145", + "\t-40\t129\t146", + "\t-41\t133\t146", + "\t-42\t136\t147", + "\t-43\t139\t147", + "\t-44\t143\t147", + "Great Barrier Reef", + "\t-8\t141\t146", + "\t-9\t141\t146", + "\t-10\t141\t146", + "\t-11\t141\t146", + "\t-12\t141\t147", + "\t-13\t142\t148", + "\t-14\t142\t148", + "\t-15\t142\t148", + "\t-16\t144\t149", + "\t-17\t144\t150", + "\t-18\t144\t151", + "\t-19\t145\t151", + "\t-20\t145\t152", + "\t-21\t147\t154", + "\t-22\t148\t154", + "\t-23\t148\t154", + "\t-24\t149\t154", + "\t-25\t150\t154", + "\t-26\t151\t154", + "Great Bear Lake", + "\t68\t-121\t-118", + "\t67\t-126\t-116", + "\t66\t-126\t-116", + "\t65\t-126\t-116", + "\t64\t-125\t-116", + "\t63\t-123\t-119", + "Great Salt Lake", + "\t42\t-114\t-110", + "\t41\t-114\t-110", + "\t40\t-114\t-110", + "\t39\t-113\t-110", + "Great Slave Lake", + "\t63\t-117\t-108", + "\t62\t-118\t-108", + "\t61\t-118\t-108", + "\t60\t-118\t-110", + "\t59\t-117\t-113", + "Greenland Sea", + "\t84\t-32\t-17", + "\t83\t-33\t-3", + "\t82\t-33\t11", + "\t81\t-33\t18", + "\t80\t-30\t-27\t-25\t18", + "\t79\t-24\t18", + "\t78\t-22\t18", + "\t77\t-23\t18", + "\t76\t-23\t18", + "\t75\t-23\t17", + "\t74\t-28\t14", + "\t73\t-28\t10", + "\t72\t-28\t5", + "\t71\t-27\t0", + "\t70\t-26\t-4", + "\t69\t-29\t-7", + "\t68\t-29\t-9", + "\t67\t-29\t-10", + "\t66\t-27\t-10", + "\t65\t-25\t-11", + "\t64\t-24\t-12", + "Guba Gusinaya", + "\t73\t144\t148", + "\t72\t143\t148", + "\t71\t143\t148", + "\t70\t143\t147", + "Gulf of Aden", + "\t16\t49\t52", + "\t15\t46\t52", + "\t14\t44\t52", + "\t13\t42\t52", + "\t12\t41\t52", + "\t11\t41\t52", + "\t10\t41\t52", + "\t9\t42\t47", + "Gulf of Alaska", + "\t61\t-150\t-138", + "\t60\t-152\t-137", + "\t59\t-156\t-135", + "\t58\t-157\t-135", + "\t57\t-159\t-135", + "\t56\t-164\t-139", + "\t55\t-164\t-145", + "\t54\t-164\t-152", + "\t53\t-164\t-158", + "Gulf of Anadyr", + "\t67\t-180\t-177", + "\t66\t-180\t-174", + "\t65\t-180\t-172", + "\t64\t-180\t-172", + "\t63\t-180\t-172", + "\t62\t-180\t-174", + "\t61\t-180\t-176", + "\t60\t-180\t-178", + "Gulf of Anadyr", + "\t66\t175\t180", + "\t65\t173\t180", + "\t64\t173\t180", + "\t63\t173\t180", + "\t62\t177\t180", + "\t61\t178\t180", + "Gulf of Aqaba", + "\t30\t33\t35", + "\t29\t33\t35", + "\t28\t33\t35", + "\t27\t33\t35", + "\t26\t33\t35", + "Gulf of Boothia", + "\t72\t-90\t-88", + "\t71\t-93\t-84", + "\t70\t-93\t-83", + "\t69\t-93\t-83", + "\t68\t-93\t-83", + "\t67\t-91\t-83", + "\t66\t-89\t-85", + "Gulf of Bothnia", + "\t66\t20\t26", + "\t65\t20\t26", + "\t64\t17\t26", + "\t63\t16\t26", + "\t62\t16\t24", + "\t61\t16\t24", + "\t60\t16\t24", + "\t59\t16\t24", + "\t58\t17\t24", + "Gulf of Buli", + "\t2\t127\t130", + "\t1\t127\t130", + "\t0\t127\t130", + "\t-1\t127\t130", + "Gulf of Carpentaria", + "\t-11\t135\t142", + "\t-12\t134\t142", + "\t-13\t134\t142", + "\t-14\t134\t142", + "\t-15\t134\t142", + "\t-16\t134\t142", + "\t-17\t136\t142", + "\t-18\t138\t141", + "Gulf of Finland", + "\t61\t23\t31", + "\t60\t21\t31", + "\t59\t21\t31", + "\t58\t21\t31", + "Gulf of Gabes", + "\t36\t9\t12", + "\t35\t9\t12", + "\t34\t9\t12", + "\t33\t9\t12", + "\t32\t9\t11", + "Gulf of Guinea", + "\t6\t-6\t3", + "\t5\t-8\t8", + "\t4\t-8\t9", + "\t3\t-8\t10", + "\t2\t-6\t10", + "\t1\t-3\t11", + "\t0\t1\t11", + "\t-1\t4\t11", + "Gulf of Honduras", + "\t18\t-89\t-87", + "\t17\t-89\t-86", + "\t16\t-89\t-85", + "\t15\t-89\t-85", + "\t14\t-89\t-85", + "Gulf of Kamchatka", + "\t57\t161\t164", + "\t56\t160\t164", + "\t55\t160\t164", + "\t54\t160\t164", + "\t53\t160\t163", + "Gulf of Kau", + "\t3\t127\t129", + "\t2\t126\t130", + "\t1\t126\t130", + "\t0\t126\t130", + "\t-1\t126\t128", + "Gulf of Khambhat", + "\t23\t71\t73", + "\t22\t71\t74", + "\t21\t69\t74", + "\t20\t69\t74", + "\t19\t69\t73", + "\t18\t71\t73", + "Gulf of Kutch", + "\t24\t67\t71", + "\t23\t67\t71", + "\t22\t67\t71", + "\t21\t67\t71", + "Gulf of Maine", + "\t45\t-70\t-65", + "\t44\t-71\t-64", + "\t43\t-71\t-64", + "\t42\t-71\t-64", + "\t41\t-71\t-65", + "\t40\t-70\t-67", + "Gulf of Mannar", + "\t10\t77\t80", + "\t9\t76\t80", + "\t8\t76\t80", + "\t7\t76\t80", + "\t6\t78\t80", + "Gulf of Martaban", + "\t18\t95\t98", + "\t17\t94\t98", + "\t16\t94\t98", + "\t15\t94\t98", + "\t14\t94\t98", + "\t13\t96\t98", + "Gulf of Masira", + "\t21\t56\t59", + "\t20\t56\t59", + "\t19\t56\t59", + "\t18\t56\t58", + "Gulf of Mexico", + "\t31\t-90\t-83", + "\t30\t-96\t-81", + "\t29\t-98\t-81", + "\t28\t-98\t-81", + "\t27\t-98\t-80", + "\t26\t-98\t-79", + "\t25\t-98\t-78", + "\t24\t-98\t-78", + "\t23\t-98\t-78", + "\t22\t-98\t-82", + "\t21\t-98\t-82", + "\t20\t-98\t-83", + "\t19\t-98\t-93", + "Gulf of Ob", + "\t73\t71\t76", + "\t72\t70\t76", + "\t71\t70\t76", + "\t70\t70\t77", + "\t69\t71\t78", + "\t68\t70\t79", + "\t67\t68\t79", + "\t66\t68\t79", + "\t65\t68\t74", + "Gulf of Olenek", + "\t74\t117\t124", + "\t73\t117\t124", + "\t72\t117\t124", + "\t71\t118\t124", + "Gulf of Oman", + "\t27\t55\t58", + "\t26\t55\t62", + "\t25\t55\t62", + "\t24\t55\t62", + "\t23\t55\t61", + "\t22\t56\t61", + "\t21\t58\t60", + "Gulf of Papua", + "\t-6\t142\t146", + "\t-7\t141\t147", + "\t-8\t141\t147", + "\t-9\t141\t147", + "Gulf of Riga", + "\t60\t22\t24", + "\t59\t21\t25", + "\t58\t20\t25", + "\t57\t20\t25", + "\t56\t20\t25", + "\t55\t22\t24", + "Gulf of Sakhalin", + "\t55\t138\t143", + "\t54\t138\t143", + "\t53\t138\t143", + "\t52\t139\t143", + "Gulf of Sidra", + "\t33\t14\t20", + "\t32\t14\t21", + "\t31\t14\t21", + "\t30\t14\t21", + "\t29\t16\t21", + "Gulf of St. Lawrence", + "\t52\t-59\t-55", + "\t51\t-65\t-55", + "\t50\t-65\t-55", + "\t49\t-67\t-56", + "\t48\t-67\t-53", + "\t47\t-67\t-53", + "\t46\t-67\t-53", + "\t45\t-65\t-54", + "\t44\t-64\t-60", + "Gulf of Suez", + "\t30\t31\t34", + "\t29\t31\t34", + "\t28\t31\t35", + "\t27\t31\t35", + "\t26\t32\t35", + "Gulf of Thailand", + "\t14\t98\t101", + "\t13\t98\t103", + "\t12\t98\t104", + "\t11\t98\t106", + "\t10\t98\t106", + "\t9\t98\t106", + "\t8\t98\t106", + "\t7\t98\t105", + "\t6\t99\t104", + "\t5\t99\t103", + "Gulf of Tomini", + "\t1\t119\t124", + "\t0\t119\t124", + "\t-1\t119\t124", + "\t-2\t119\t122", + "Gulf of Tonkin", + "\t22\t105\t110", + "\t21\t105\t111", + "\t20\t104\t111", + "\t19\t104\t111", + "\t18\t104\t111", + "\t17\t104\t109", + "\t16\t105\t108", + "Gulf of Yana", + "\t76\t135\t138", + "\t75\t135\t141", + "\t74\t135\t141", + "\t73\t133\t142", + "\t72\t131\t142", + "\t71\t131\t142", + "\t70\t131\t140", + "Gulf St. Vincent", + "\t-31\t136\t138", + "\t-32\t135\t138", + "\t-33\t134\t139", + "\t-34\t134\t139", + "\t-35\t134\t139", + "\t-36\t135\t139", + "Hadley Bay", + "\t74\t-109\t-107", + "\t73\t-109\t-106", + "\t72\t-109\t-106", + "\t71\t-109\t-106", + "\t70\t-109\t-106", + "Hall Basin", + "\t83\t-63\t-61", + "\t82\t-69\t-60", + "\t81\t-69\t-60", + "\t80\t-69\t-60", + "\t79\t-64\t-62", + "Halmahera Sea", + "\t1\t126\t131", + "\t0\t126\t132", + "\t-1\t126\t132", + "\t-2\t126\t132", + "Hamilton Inlet", + "\t55\t-59\t-56", + "\t54\t-61\t-56", + "\t53\t-61\t-56", + "\t52\t-61\t-57", + "Hangzhou Bay", + "\t31\t119\t123", + "\t30\t119\t123", + "\t29\t119\t123", + "\t28\t120\t123", + "Hecate Straight", + "\t56\t-133\t-129", + "\t55\t-133\t-128", + "\t54\t-133\t-128", + "\t53\t-133\t-128", + "\t52\t-133\t-128", + "\t51\t-132\t-129", + "Helodranon' Antongila", + "\t-14\t48\t51", + "\t-15\t48\t51", + "\t-16\t48\t51", + "\t-17\t48\t50", + "Hudson Bay", + "\t67\t-87\t-84", + "\t66\t-88\t-84", + "\t65\t-94\t-81", + "\t64\t-94\t-78", + "\t63\t-94\t-77", + "\t62\t-95\t-76", + "\t61\t-95\t-76", + "\t60\t-95\t-76", + "\t59\t-95\t-76", + "\t58\t-95\t-75", + "\t57\t-95\t-75", + "\t56\t-93\t-75", + "\t55\t-93\t-75", + "\t54\t-88\t-75", + "\t53\t-83\t-77", + "Hudson Strait", + "\t65\t-79\t-71", + "\t64\t-81\t-69", + "\t63\t-81\t-64", + "\t62\t-81\t-63", + "\t61\t-79\t-63", + "\t60\t-73\t-63", + "\t59\t-71\t-63", + "IJsselmeer", + "\t54\t4\t6", + "\t53\t3\t6", + "\t52\t3\t6", + "\t51\t3\t6", + "Indian Ocean", + "\t11\t49\t53", + "\t10\t49\t55", + "\t9\t49\t57", + "\t8\t48\t59\t80\t85", + "\t7\t48\t61\t79\t92", + "\t6\t47\t64\t78\t96", + "\t5\t46\t66\t77\t97", + "\t4\t45\t68\t75\t98", + "\t3\t44\t70\t74\t99", + "\t2\t43\t99", + "\t1\t40\t101", + "\t0\t39\t101", + "\t-1\t39\t102", + "\t-2\t38\t103", + "\t-3\t38\t104", + "\t-4\t37\t105", + "\t-5\t37\t107", + "\t-6\t37\t111", + "\t-7\t37\t119", + "\t-8\t38\t120", + "\t-9\t38\t123", + "\t-10\t38\t125", + "\t-11\t38\t126", + "\t-12\t43\t127", + "\t-13\t48\t127", + "\t-14\t48\t127", + "\t-15\t48\t127", + "\t-16\t48\t126", + "\t-17\t48\t125", + "\t-18\t47\t124", + "\t-19\t47\t123", + "\t-20\t47\t122", + "\t-21\t46\t120", + "\t-22\t46\t117", + "\t-23\t46\t115", + "\t-24\t38\t114", + "\t-25\t31\t114", + "\t-26\t31\t115", + "\t-27\t30\t115", + "\t-28\t30\t115", + "\t-29\t29\t116", + "\t-30\t28\t116", + "\t-31\t27\t116", + "\t-32\t22\t116", + "\t-33\t18\t117", + "\t-34\t18\t120", + "\t-35\t18\t124", + "\t-36\t18\t127", + "\t-37\t18\t130", + "\t-38\t18\t134", + "\t-39\t18\t137", + "\t-40\t18\t140", + "\t-41\t18\t144", + "\t-42\t18\t148", + "\t-43\t18\t151", + "\t-44\t18\t153", + "\t-45\t18\t156", + "\t-46\t18\t159", + "\t-47\t18\t161", + "\t-48\t18\t164", + "\t-49\t18\t167", + "\t-50\t18\t167", + "\t-51\t18\t167", + "\t-52\t18\t167", + "\t-53\t18\t167", + "\t-54\t18\t167", + "\t-55\t18\t167", + "\t-56\t18\t167", + "\t-57\t18\t167", + "\t-58\t18\t167", + "\t-59\t18\t167", + "\t-60\t18\t167", + "\t-61\t18\t167", + "Inner Sea", + "\t35\t129\t136", + "\t34\t129\t136", + "\t33\t129\t136", + "\t32\t129\t136", + "\t31\t130\t133", + "Inner Seas", + "\t59\t-7\t-4", + "\t58\t-8\t-4", + "\t57\t-8\t-3", + "\t56\t-9\t-3", + "\t55\t-9\t-3", + "\t54\t-9\t-3", + "\t53\t-8\t-4", + "Internal Canada (B.C.) Waters", + "\t54\t-130\t-126", + "\t53\t-130\t-126", + "\t52\t-130\t-126", + "\t51\t-130\t-126", + "Internal Canada (B.C.) Waters", + "\t55\t-133\t-131", + "\t54\t-133\t-131", + "\t53\t-133\t-131", + "\t52\t-133\t-131", + "Internal Canada (B.C.) Waters", + "\t56\t-131\t-128", + "\t55\t-131\t-128", + "\t54\t-131\t-128", + "\t53\t-131\t-129", + "Internal Canada Arctic Waters", + "\t72\t-119\t-116", + "\t71\t-119\t-116", + "\t70\t-119\t-116", + "Internal Canada Arctic Waters", + "\t69\t-108\t-104", + "\t68\t-108\t-104", + "\t67\t-108\t-104", + "Internal Canada Arctic Waters", + "\t71\t-82\t-79", + "\t70\t-82\t-78", + "\t69\t-82\t-78", + "\t68\t-82\t-78", + "Internal Denmark Waters", + "\t57\t9\t11", + "\t56\t8\t12", + "\t55\t8\t12", + "\t54\t8\t12", + "\t53\t8\t12", + "Internal Philippines Waters", + "\t11\t124\t127", + "\t10\t124\t127", + "\t9\t124\t127", + "\t8\t124\t127", + "Internal Philippines Waters", + "\t11\t122\t124", + "\t10\t122\t124", + "\t9\t122\t124", + "\t8\t122\t124", + "Internal U.S. (Alaska) Waters", + "\t60\t-138\t-134", + "\t59\t-138\t-132", + "\t58\t-138\t-131", + "\t57\t-138\t-130", + "\t56\t-136\t-129", + "\t55\t-135\t-129", + "\t54\t-134\t-129", + "Ionian Sea", + "\t41\t15\t18", + "\t40\t15\t22", + "\t39\t14\t24", + "\t38\t14\t24", + "\t37\t14\t24", + "\t36\t14\t23", + "\t35\t14\t23", + "Irish Sea", + "\t55\t-7\t-1", + "\t54\t-7\t-1", + "\t53\t-7\t-1", + "\t52\t-7\t-1", + "\t51\t-7\t-2", + "\t50\t-7\t-4", + "James Bay", + "\t55\t-83\t-77", + "\t54\t-83\t-77", + "\t53\t-83\t-77", + "\t52\t-83\t-77", + "\t51\t-83\t-77", + "\t50\t-81\t-77", + "\t49\t-80\t-78", + "Java Sea", + "\t-1\t105\t114", + "\t-2\t104\t117", + "\t-3\t104\t119", + "\t-4\t103\t120", + "\t-5\t103\t120", + "\t-6\t103\t120", + "\t-7\t104\t119", + "\t-8\t111\t118", + "Jones Sound", + "\t77\t-92\t-77", + "\t76\t-92\t-77", + "\t75\t-92\t-77", + "\t74\t-91\t-78", + "Joseph Bonaparte Gulf", + "\t-12\t126\t130", + "\t-13\t126\t130", + "\t-14\t126\t130", + "\t-15\t126\t130", + "\t-16\t127\t130", + "Kaliningrad", + "\t56\t19\t22", + "\t55\t19\t22", + "\t54\t19\t22", + "\t53\t19\t22", + "Kane Basin", + "\t81\t-73\t-63", + "\t80\t-79\t-63", + "\t79\t-79\t-63", + "\t78\t-79\t-63", + "\t77\t-79\t-67", + "Kangertittivaq", + "\t72\t-29\t-23", + "\t71\t-30\t-20", + "\t70\t-30\t-20", + "\t69\t-30\t-20", + "\t68\t-28\t-26", + "Kara Sea", + "\t82\t64\t96", + "\t81\t64\t98", + "\t80\t64\t103", + "\t79\t64\t103", + "\t78\t65\t103", + "\t77\t65\t102", + "\t76\t59\t102", + "\t75\t56\t102", + "\t74\t55\t100", + "\t73\t54\t88", + "\t72\t54\t88", + "\t71\t54\t80", + "\t70\t54\t69\t74\t80", + "\t69\t55\t70\t77\t80", + "\t68\t59\t70", + "\t67\t65\t70", + "Karaginskiy Gulf", + "\t61\t162\t167", + "\t60\t161\t167", + "\t59\t160\t167", + "\t58\t160\t167", + "\t57\t160\t166", + "\t56\t161\t164", + "Karskiye Vorota Strait", + "\t71\t56\t60", + "\t70\t56\t60", + "\t69\t56\t60", + "Kattegat", + "\t59\t10\t12", + "\t58\t9\t13", + "\t57\t9\t13", + "\t56\t9\t13", + "\t55\t9\t13", + "\t54\t10\t12", + "Kennedy Channel", + "\t82\t-67\t-63", + "\t81\t-68\t-63", + "\t80\t-68\t-63", + "\t79\t-68\t-63", + "Khatanga Gulf", + "\t76\t111\t114", + "\t75\t108\t114", + "\t74\t105\t114", + "\t73\t104\t114", + "\t72\t104\t113", + "\t71\t104\t107", + "Korea Strait", + "\t37\t128\t131", + "\t36\t126\t133", + "\t35\t125\t133", + "\t34\t125\t133", + "\t33\t125\t133", + "\t32\t125\t131", + "\t31\t126\t130", + "Kotzebue Sound", + "\t68\t-164\t-160", + "\t67\t-165\t-159", + "\t66\t-165\t-159", + "\t65\t-165\t-159", + "Kronotskiy Gulf", + "\t55\t158\t162", + "\t54\t158\t162", + "\t53\t158\t162", + "\t52\t158\t161", + "La Perouse Strait", + "\t47\t140\t142", + "\t46\t140\t143", + "\t45\t140\t143", + "\t44\t140\t143", + "Labrador Sea", + "\t61\t-65\t-43", + "\t60\t-65\t-42", + "\t59\t-65\t-42", + "\t58\t-65\t-42", + "\t57\t-64\t-43", + "\t56\t-63\t-43", + "\t55\t-63\t-44", + "\t54\t-62\t-45", + "\t53\t-60\t-46", + "\t52\t-58\t-46", + "\t51\t-57\t-47", + "\t50\t-57\t-48", + "\t49\t-57\t-49", + "\t48\t-57\t-49", + "\t47\t-55\t-50", + "\t46\t-54\t-51", + "Laccadive Sea", + "\t15\t73\t75", + "\t14\t70\t75", + "\t13\t70\t76", + "\t12\t70\t76", + "\t11\t70\t77", + "\t10\t70\t77", + "\t9\t70\t79", + "\t8\t70\t80", + "\t7\t70\t81", + "\t6\t70\t81", + "\t5\t71\t81", + "\t4\t71\t80", + "\t3\t71\t79", + "\t2\t71\t78", + "\t1\t71\t76", + "\t0\t71\t75", + "\t-1\t71\t74", + "Lago de Maracaibo", + "\t11\t-72\t-70", + "\t10\t-73\t-70", + "\t9\t-73\t-70", + "\t8\t-73\t-70", + "Lake Baikal", + "\t56\t107\t110", + "\t55\t107\t110", + "\t54\t105\t110", + "\t53\t104\t110", + "\t52\t102\t110", + "\t51\t102\t109", + "\t50\t102\t107", + "Lake Chad", + "\t14\t13\t15", + "\t13\t13\t15", + "\t12\t13\t15", + "\t11\t13\t15", + "Lake Champlain", + "\t46\t-74\t-72", + "\t45\t-74\t-72", + "\t44\t-74\t-72", + "\t43\t-74\t-72", + "\t42\t-74\t-72", + "Lake Erie", + "\t44\t-80\t-77", + "\t43\t-84\t-77", + "\t42\t-84\t-77", + "\t41\t-84\t-77", + "\t40\t-84\t-79", + "Lake Huron", + "\t47\t-82\t-80", + "\t46\t-82\t-78", + "\t45\t-82\t-78", + "\t44\t-82\t-78", + "\t43\t-82\t-78", + "Lake Huron", + "\t47\t-85\t-80", + "\t46\t-85\t-78", + "\t45\t-85\t-78", + "\t44\t-85\t-78", + "\t43\t-84\t-78", + "\t42\t-84\t-80", + "Lake Huron", + "\t47\t-84\t-80", + "\t46\t-84\t-80", + "\t45\t-84\t-80", + "\t44\t-84\t-80", + "Lake Huron", + "\t45\t-84\t-82", + "\t44\t-84\t-82", + "\t43\t-84\t-82", + "\t42\t-84\t-82", + "Lake Malawi", + "\t-8\t32\t35", + "\t-9\t32\t35", + "\t-10\t32\t35", + "\t-11\t32\t35", + "\t-12\t33\t36", + "\t-13\t33\t36", + "\t-14\t33\t36", + "\t-15\t33\t36", + "Lake Michigan", + "\t47\t-86\t-84", + "\t46\t-88\t-83", + "\t45\t-89\t-83", + "\t44\t-89\t-83", + "\t43\t-89\t-84", + "\t42\t-88\t-85", + "\t41\t-88\t-85", + "\t40\t-88\t-85", + "Lake Okeechobee", + "\t28\t-82\t-79", + "\t27\t-82\t-79", + "\t26\t-82\t-79", + "\t25\t-82\t-79", + "Lake Ontario", + "\t45\t-78\t-74", + "\t44\t-80\t-74", + "\t43\t-80\t-74", + "\t42\t-80\t-75", + "Lake Pontchartrain", + "\t31\t-91\t-88", + "\t30\t-91\t-88", + "\t29\t-91\t-88", + "Lake Saint Clair", + "\t43\t-84\t-81", + "\t42\t-84\t-81", + "\t41\t-84\t-81", + "Lake Shasta", + "\t41\t-123\t-121", + "\t40\t-123\t-121", + "\t39\t-123\t-121", + "Lake Superior", + "\t50\t-89\t-87", + "\t49\t-90\t-84", + "\t48\t-92\t-83", + "\t47\t-93\t-83", + "\t46\t-93\t-83", + "\t45\t-93\t-83", + "Lake Superior", + "\t48\t-85\t-83", + "\t47\t-86\t-83", + "\t46\t-86\t-83", + "\t45\t-86\t-83", + "Lake Tahoe", + "\t40\t-121\t-118", + "\t39\t-121\t-118", + "\t38\t-121\t-118", + "\t37\t-121\t-118", + "Lake Tanganyika", + "\t-2\t28\t30", + "\t-3\t28\t30", + "\t-4\t28\t30", + "\t-5\t28\t31", + "\t-6\t28\t31", + "\t-7\t28\t32", + "\t-8\t28\t32", + "\t-9\t29\t32", + "Lake Victoria", + "\t1\t30\t35", + "\t0\t30\t35", + "\t-1\t30\t35", + "\t-2\t30\t35", + "\t-3\t30\t34", + "\t-4\t31\t33", + "Lake Winnipeg", + "\t55\t-99\t-96", + "\t54\t-100\t-96", + "\t53\t-100\t-95", + "\t52\t-100\t-95", + "\t51\t-99\t-95", + "\t50\t-99\t-95", + "\t49\t-97\t-95", + "Laptev Sea", + "\t82\t95\t101", + "\t81\t95\t109", + "\t80\t95\t118", + "\t79\t95\t126", + "\t78\t96\t134", + "\t77\t101\t139", + "\t76\t103\t139", + "\t75\t104\t139", + "\t74\t111\t138", + "\t73\t111\t137", + "\t72\t111\t137", + "\t71\t112\t114\t126\t136", + "\t70\t127\t134", + "\t69\t129\t132", + "Leyte Gulf", + "\t12\t124\t126", + "\t11\t124\t126", + "\t10\t124\t126", + "\t9\t124\t126", + "Liddon Gulf", + "\t76\t-115\t-110", + "\t75\t-116\t-110", + "\t74\t-116\t-110", + "\t73\t-116\t-111", + "Ligurian Sea", + "\t45\t7\t10", + "\t44\t6\t10", + "\t43\t6\t10", + "\t42\t6\t10", + "Lincoln Sea", + "\t84\t-70\t-36", + "\t83\t-70\t-36", + "\t82\t-70\t-36", + "\t81\t-69\t-37", + "\t80\t-54\t-48\t-46\t-43", + "Long Island Sound", + "\t42\t-74\t-71", + "\t41\t-74\t-71", + "\t40\t-74\t-71", + "\t39\t-74\t-71", + "Lutzow-Holm Bay", + "\t-67\t32\t41", + "\t-68\t32\t41", + "\t-69\t32\t41", + "\t-70\t32\t40", + "\t-71\t37\t39", + "Luzon Strait", + "\t23\t119\t121", + "\t22\t119\t122", + "\t21\t119\t123", + "\t20\t119\t123", + "\t19\t119\t123", + "\t18\t119\t123", + "\t17\t119\t123", + "M'Clure Strait", + "\t77\t-123\t-119", + "\t76\t-124\t-114", + "\t75\t-125\t-113", + "\t74\t-125\t-113", + "\t73\t-125\t-113", + "\t72\t-116\t-114", + "Mackenzie Bay", + "\t70\t-140\t-133", + "\t69\t-140\t-133", + "\t68\t-140\t-133", + "\t67\t-138\t-134", + "Makassar Strait", + "\t2\t116\t122", + "\t1\t116\t122", + "\t0\t115\t122", + "\t-1\t115\t121", + "\t-2\t115\t120", + "\t-3\t115\t120", + "\t-4\t115\t120", + "\t-5\t116\t120", + "\t-6\t118\t120", + "Marguerite Bay", + "\t-66\t-70\t-65", + "\t-67\t-71\t-65", + "\t-68\t-71\t-65", + "\t-69\t-71\t-65", + "\t-70\t-71\t-65", + "Massachusetts Bay", + "\t43\t-72\t-69", + "\t42\t-72\t-69", + "\t41\t-72\t-69", + "\t40\t-71\t-69", + "Matochkin Shar Strait", + "\t74\t53\t57", + "\t73\t53\t57", + "\t72\t53\t57", + "McMurdo Sound", + "\t-71\t165\t167", + "\t-72\t163\t170", + "\t-73\t161\t170", + "\t-74\t159\t170", + "\t-75\t159\t170", + "\t-76\t159\t170", + "\t-77\t161\t170", + "\t-78\t161\t170", + "\t-79\t162\t166", + "Mecklenburger Bucht", + "\t55\t9\t13", + "\t54\t9\t13", + "\t53\t9\t13", + "\t52\t9\t12", + "Mediterranean Sea", + "\t38\t10\t15", + "\t37\t9\t24\t26\t37", + "\t36\t9\t37", + "\t35\t9\t37", + "\t34\t9\t36", + "\t33\t9\t36", + "\t32\t9\t36", + "\t31\t11\t36", + "\t30\t23\t35", + "\t29\t27\t30", + "Mediterranean Sea", + "\t44\t4\t10", + "\t43\t3\t10", + "\t42\t2\t10", + "\t41\t2\t10", + "\t40\t2\t10", + "\t39\t-1\t11", + "\t38\t-2\t13", + "\t37\t-3\t13", + "\t36\t-3\t13", + "\t35\t-3\t11", + "\t34\t-2\t1", + "Melville Bay", + "\t77\t-68\t-59", + "\t76\t-68\t-56", + "\t75\t-68\t-55", + "\t74\t-67\t-55", + "\t73\t-62\t-55", + "Minto Inlet", + "\t72\t-119\t-114", + "\t71\t-119\t-114", + "\t70\t-119\t-114", + "Molucca Sea", + "\t5\t125\t127", + "\t4\t124\t128", + "\t3\t124\t129", + "\t2\t123\t129", + "\t1\t122\t129", + "\t0\t122\t129", + "\t-1\t122\t129", + "\t-2\t122\t128", + "Monterey Bay", + "\t37\t-123\t-120", + "\t36\t-123\t-120", + "\t35\t-123\t-120", + "Mozambique Channel", + "\t-9\t39\t44", + "\t-10\t39\t49", + "\t-11\t39\t50", + "\t-12\t39\t50", + "\t-13\t39\t50", + "\t-14\t39\t49", + "\t-15\t38\t48", + "\t-16\t35\t48", + "\t-17\t34\t46", + "\t-18\t33\t45", + "\t-19\t33\t45", + "\t-20\t33\t45", + "\t-21\t33\t45", + "\t-22\t34\t44", + "\t-23\t33\t45", + "\t-24\t31\t46", + "\t-25\t31\t46", + "\t-26\t31\t46", + "\t-27\t31\t39", + "Murchison Sound", + "\t79\t-73\t-71", + "\t78\t-73\t-65", + "\t77\t-73\t-65", + "\t76\t-73\t-65", + "North Sea", + "\t61\t-2\t7", + "\t60\t-3\t7", + "\t59\t-4\t8", + "\t58\t-5\t10", + "\t57\t-5\t10", + "\t56\t-5\t10", + "\t55\t-4\t10", + "\t54\t-4\t10", + "\t53\t-2\t10", + "\t52\t-1\t10", + "\t51\t-1\t5", + "\t50\t-1\t5", + "\t49\t0\t2", + "Norton Sound", + "\t65\t-165\t-159", + "\t64\t-165\t-159", + "\t63\t-165\t-159", + "\t62\t-165\t-159", + "Norwegian Sea", + "\t77\t13\t19", + "\t76\t9\t21", + "\t75\t4\t23", + "\t74\t-1\t25", + "\t73\t-5\t27", + "\t72\t-9\t28", + "\t71\t-10\t28", + "\t70\t-11\t28", + "\t69\t-12\t28", + "\t68\t-12\t24", + "\t67\t-13\t18", + "\t66\t-14\t15", + "\t65\t-14\t15", + "\t64\t-14\t14", + "\t63\t-14\t12", + "\t62\t-12\t10", + "\t61\t-9\t9", + "\t60\t-7\t7", + "\t59\t-4\t6", + "Oresund", + "\t57\t11\t13", + "\t56\t11\t13", + "\t55\t11\t13", + "\t54\t11\t13", + "Ozero Mogotoyevo", + "\t73\t143\t147", + "\t72\t143\t147", + "\t71\t143\t147", + "Pacific Ocean", + "\t59\t-140\t-135", + "\t58\t-146\t-134", + "\t57\t-153\t-132\t161\t164", + "\t56\t-159\t-132\t161\t166", + "\t55\t-164\t-131\t160\t168", + "\t54\t-167\t-131\t157\t170", + "\t53\t-172\t-130\t157\t172", + "\t52\t-180\t-129\t156\t179", + "\t51\t-180\t-126\t155\t180", + "\t50\t-180\t-123\t154\t180", + "\t49\t-180\t-123\t153\t180", + "\t48\t-180\t-123\t152\t180", + "\t47\t-180\t-122\t150\t180", + "\t46\t-180\t-122\t148\t180", + "\t45\t-180\t-122\t147\t180", + "\t44\t-180\t-122\t143\t180", + "\t43\t-180\t-123\t142\t180", + "\t42\t-180\t-123\t141\t180", + "\t41\t-180\t-123\t140\t180", + "\t40\t-180\t-122\t140\t180", + "\t39\t-180\t-121\t139\t180", + "\t38\t-180\t-121\t139\t180", + "\t37\t-180\t-120\t139\t180", + "\t36\t-180\t-119\t138\t180", + "\t35\t-180\t-117\t138\t180", + "\t34\t-180\t-116\t138\t180", + "\t33\t-180\t-115\t138\t180", + "\t32\t-180\t-115\t138\t180", + "\t31\t-180\t-114\t139\t180", + "\t30\t-180\t-113\t139\t180", + "\t29\t-180\t-113\t139\t180", + "\t28\t-180\t-113\t140\t180", + "\t27\t-180\t-111\t141\t180", + "\t26\t-180\t-111\t141\t180", + "\t25\t-180\t-110\t141\t180", + "\t24\t-180\t-105\t140\t180", + "\t23\t-180\t-104\t140\t180", + "\t22\t-180\t-104\t140\t180", + "\t21\t-180\t-104\t140\t180", + "\t20\t-180\t-103\t140\t180", + "\t19\t-180\t-101\t143\t180", + "\t18\t-180\t-99\t144\t180", + "\t17\t-180\t-97\t145\t180", + "\t16\t-180\t-91\t145\t180", + "\t15\t-180\t-90\t144\t180", + "\t14\t-180\t-86\t144\t180", + "\t13\t-180\t-85\t143\t180", + "\t12\t-180\t-84\t141\t180", + "\t11\t-180\t-84\t140\t180", + "\t10\t-180\t-82\t138\t180", + "\t9\t-180\t-80\t136\t180", + "\t8\t-180\t-76\t135\t180", + "\t7\t-180\t-76\t133\t180", + "\t6\t-180\t-76\t132\t180", + "\t5\t-180\t-76\t130\t180", + "\t4\t-180\t-76\t128\t180", + "\t3\t-180\t-76\t127\t180", + "\t2\t-180\t-76\t127\t180", + "\t1\t-180\t-76\t127\t180", + "\t0\t-180\t-77\t128\t180", + "\t-1\t-180\t-78\t128\t180", + "Pacific Ocean", + "\t4\t171\t173", + "\t3\t-93\t-90\t170\t174", + "\t2\t-93\t-89\t170\t174", + "\t1\t-180\t-79\t130\t180", + "\t0\t-180\t-79\t130\t180", + "\t-1\t-180\t-79\t130\t180", + "\t-2\t-180\t-79\t133\t180", + "\t-3\t-180\t-79\t133\t142\t145\t180", + "\t-4\t-180\t-79\t133\t136\t150\t180", + "\t-5\t-180\t-78\t152\t180", + "\t-6\t-180\t-78\t153\t180", + "\t-7\t-180\t-77\t154\t180", + "\t-8\t-180\t-77\t156\t180", + "\t-9\t-180\t-76\t158\t180", + "\t-10\t-180\t-76\t160\t180", + "\t-11\t-180\t-75\t160\t180", + "\t-12\t-180\t-75\t166\t180", + "\t-13\t-180\t-74\t166\t180", + "\t-14\t-180\t-73\t166\t180", + "\t-15\t-180\t-71\t166\t180", + "\t-16\t-180\t-69\t167\t180", + "\t-17\t-180\t-69\t167\t180", + "\t-18\t-180\t-69\t167\t180", + "\t-19\t-180\t-69\t168\t180", + "\t-20\t-180\t-69\t167\t180", + "\t-21\t-180\t-69\t166\t180", + "\t-22\t-180\t-69\t165\t180", + "\t-23\t-180\t-69\t164\t180", + "\t-24\t-180\t-69\t163\t180", + "\t-25\t-180\t-69\t161\t180", + "\t-26\t-180\t-69\t160\t180", + "\t-27\t-180\t-69\t159\t180", + "\t-28\t-180\t-69\t158\t180", + "\t-29\t-180\t-70\t158\t180", + "\t-30\t-180\t-70\t158\t180", + "\t-31\t-180\t-70\t158\t180", + "\t-32\t-180\t-70\t158\t180", + "\t-33\t-180\t-70\t161\t180", + "\t-34\t-180\t-70\t165\t180", + "\t-35\t-180\t-70\t169\t180", + "\t-36\t-180\t-71\t172\t180", + "\t-37\t-180\t-71\t173\t180", + "\t-38\t-180\t-72\t174\t180", + "\t-39\t-180\t-72\t175\t180", + "\t-40\t-180\t-72\t173\t180", + "\t-41\t-180\t-72\t172\t180", + "\t-42\t-180\t-72\t170\t180", + "\t-43\t-180\t-72\t169\t180", + "\t-44\t-180\t-72\t169\t180", + "\t-45\t-180\t-72\t166\t180", + "\t-46\t-180\t-72\t166\t180", + "\t-47\t-180\t-72\t165\t180", + "\t-48\t-180\t-72\t165\t180", + "\t-49\t-180\t-72\t165\t180", + "\t-50\t-180\t-71\t165\t180", + "\t-51\t-180\t-71\t165\t180", + "\t-52\t-180\t-71\t165\t180", + "\t-53\t-180\t-68\t165\t180", + "\t-54\t-180\t-67\t165\t180", + "\t-55\t-180\t-67\t165\t180", + "\t-56\t-180\t-67\t165\t180", + "\t-57\t-180\t-67\t165\t180", + "\t-58\t-180\t-67\t165\t180", + "\t-59\t-180\t-67\t165\t180", + "\t-60\t-180\t-67\t165\t180", + "\t-61\t-180\t-67\t165\t180", + "Palk Strait", + "\t11\t78\t80", + "\t10\t77\t81", + "\t9\t77\t81", + "\t8\t77\t81", + "\t7\t78\t80", + "Pamlico Sound", + "\t36\t-78\t-74", + "\t35\t-78\t-74", + "\t34\t-78\t-74", + "\t33\t-77\t-75", + "Peacock Sound", + "\t-71\t-103\t-94", + "\t-72\t-103\t-94", + "\t-73\t-103\t-94", + "\t-74\t-102\t-94", + "Persian Gulf", + "\t31\t46\t51", + "\t30\t46\t51", + "\t29\t46\t52", + "\t28\t46\t57", + "\t27\t47\t58", + "\t26\t47\t58", + "\t25\t48\t58", + "\t24\t49\t57", + "\t23\t49\t55", + "\t22\t50\t53", + "Philippine Sea", + "\t36\t135\t139", + "\t35\t135\t140", + "\t34\t132\t141", + "\t33\t130\t141", + "\t32\t129\t141", + "\t31\t129\t141", + "\t30\t129\t142", + "\t29\t128\t143", + "\t28\t127\t143", + "\t27\t126\t143", + "\t26\t126\t143", + "\t25\t120\t143", + "\t24\t120\t143", + "\t23\t119\t143", + "\t22\t119\t144", + "\t21\t119\t146", + "\t20\t119\t146", + "\t19\t120\t147", + "\t18\t121\t147", + "\t17\t120\t147", + "\t16\t120\t147", + "\t15\t120\t147", + "\t14\t120\t147", + "\t13\t120\t146", + "\t12\t121\t146", + "\t11\t123\t145", + "\t10\t124\t144", + "\t9\t124\t142", + "\t8\t125\t141", + "\t7\t125\t139", + "\t6\t124\t137", + "\t5\t124\t136", + "\t4\t124\t134", + "\t3\t124\t133", + "\t2\t124\t131", + "\t1\t127\t129", + "Porpoise Bay", + "\t-65\t125\t131", + "\t-66\t125\t131", + "\t-67\t125\t131", + "\t-68\t126\t130", + "Prince ALbert Sound", + "\t71\t-118\t-110", + "\t70\t-118\t-110", + "\t69\t-118\t-110", + "Prince of Wales Strait", + "\t74\t-118\t-113", + "\t73\t-121\t-113", + "\t72\t-121\t-113", + "\t71\t-121\t-115", + "\t70\t-121\t-117", + "Prince William Sound", + "\t62\t-149\t-145", + "\t61\t-149\t-144", + "\t60\t-149\t-144", + "\t59\t-149\t-144", + "\t58\t-148\t-146", + "Prydz Bay", + "\t-66\t68\t75", + "\t-67\t68\t80", + "\t-68\t66\t80", + "\t-69\t66\t80", + "\t-70\t66\t78", + "\t-71\t65\t74", + "\t-72\t65\t72", + "\t-73\t65\t71", + "\t-74\t65\t68", + "Puget Sound", + "\t49\t-123\t-121", + "\t48\t-124\t-121", + "\t47\t-124\t-121", + "\t46\t-124\t-121", + "Qiongzhou Strait", + "\t21\t108\t111", + "\t20\t108\t111", + "\t19\t108\t111", + "\t18\t108\t110", + "Queen Charlotte Sound", + "\t54\t-130\t-128", + "\t53\t-132\t-127", + "\t52\t-132\t-126", + "\t51\t-132\t-126", + "\t50\t-132\t-126", + "\t49\t-130\t-126", + "Queen Charlotte Straight", + "\t52\t-128\t-125", + "\t51\t-128\t-123", + "\t50\t-128\t-123", + "\t49\t-128\t-123", + "Ragay Gulf", + "\t14\t121\t124", + "\t13\t121\t124", + "\t12\t121\t124", + "\t11\t122\t124", + "Red Sea", + "\t29\t33\t36", + "\t28\t32\t36", + "\t27\t32\t37", + "\t26\t32\t38", + "\t25\t32\t39", + "\t24\t33\t39", + "\t23\t34\t40", + "\t22\t34\t40", + "\t21\t34\t41", + "\t20\t35\t42", + "\t19\t36\t42", + "\t18\t36\t43", + "\t17\t36\t43", + "\t16\t37\t43", + "\t15\t38\t44", + "\t14\t38\t44", + "\t13\t39\t44", + "\t12\t40\t44", + "\t11\t41\t44", + "Richard Collinson Inlet", + "\t74\t-115\t-113", + "\t73\t-115\t-112", + "\t72\t-115\t-112", + "\t71\t-115\t-112", + "Rio de la Plata", + "\t-31\t-59\t-57", + "\t-32\t-59\t-57", + "\t-33\t-59\t-53", + "\t-34\t-59\t-53", + "\t-35\t-59\t-53", + "\t-36\t-58\t-54", + "\t-37\t-58\t-55", + "Robeson Channel", + "\t83\t-63\t-56", + "\t82\t-63\t-55", + "\t81\t-63\t-55", + "\t80\t-62\t-55", + "Ronne Entrance", + "\t-70\t-76\t-74", + "\t-71\t-76\t-72", + "\t-72\t-76\t-72", + "\t-73\t-76\t-72", + "Ross Sea", + "\t-70\t169\t180", + "\t-71\t167\t180", + "\t-72\t167\t180", + "\t-73\t167\t180", + "\t-74\t168\t180", + "\t-75\t168\t180", + "\t-76\t165\t180", + "\t-77\t160\t180", + "\t-78\t158\t180", + "\t-79\t157\t180", + "\t-80\t157\t180", + "\t-81\t157\t180", + "\t-82\t159\t180", + "\t-83\t160\t180", + "\t-84\t166\t180", + "\t-85\t176\t180", + "Ross Sea", + "\t-70\t-180\t-101", + "\t-71\t-180\t-101", + "\t-72\t-180\t-101", + "\t-73\t-180\t-101", + "\t-74\t-180\t-107", + "\t-75\t-180\t-130\t-125\t-113", + "\t-76\t-180\t-135", + "\t-77\t-180\t-144", + "\t-78\t-180\t-147", + "\t-79\t-180\t-147", + "\t-80\t-180\t-147", + "\t-81\t-180\t-147", + "\t-82\t-180\t-149", + "\t-83\t-180\t-152", + "\t-84\t-180\t-155", + "\t-85\t-180\t-155", + "\t-86\t-159\t-155", + "Salton Sea", + "\t34\t-117\t-114", + "\t33\t-117\t-114", + "\t32\t-117\t-114", + "Samar Sea", + "\t14\t122\t124", + "\t13\t122\t125", + "\t12\t122\t126", + "\t11\t122\t126", + "\t10\t123\t126", + "San Francisco Bay", + "\t39\t-123\t-120", + "\t38\t-123\t-120", + "\t37\t-123\t-120", + "\t36\t-123\t-121", + "Sargasso Sea", + "\t36\t-68\t-51", + "\t35\t-69\t-50", + "\t34\t-70\t-49", + "\t33\t-70\t-49", + "\t32\t-71\t-49", + "\t31\t-71\t-49", + "\t30\t-71\t-49", + "\t29\t-71\t-49", + "\t28\t-71\t-49", + "\t27\t-71\t-49", + "\t26\t-71\t-49", + "\t25\t-71\t-49", + "\t24\t-71\t-49", + "\t23\t-70\t-49", + "\t22\t-69\t-49", + "\t21\t-68\t-49", + "\t20\t-67\t-49", + "\t19\t-64\t-50", + "Savu Sea", + "\t-7\t117\t126", + "\t-8\t117\t126", + "\t-9\t117\t126", + "\t-10\t117\t125", + "\t-11\t119\t124", + "Scotia Sea", + "\t-50\t-59\t-53", + "\t-51\t-60\t-46", + "\t-52\t-60\t-40", + "\t-53\t-60\t-35", + "\t-54\t-60\t-35", + "\t-55\t-59\t-35", + "\t-56\t-59\t-36", + "\t-57\t-58\t-37", + "\t-58\t-58\t-39", + "\t-59\t-57\t-40", + "\t-60\t-57\t-41", + "\t-61\t-56\t-43", + "\t-62\t-56\t-49", + "Sea of Azov", + "\t48\t36\t40", + "\t47\t33\t40", + "\t46\t33\t40", + "\t45\t33\t39", + "\t44\t33\t39", + "Sea of Crete", + "\t39\t22\t24", + "\t38\t21\t25", + "\t37\t21\t29", + "\t36\t21\t29", + "\t35\t22\t29", + "\t34\t22\t28", + "Sea of Japan", + "\t52\t139\t143", + "\t51\t139\t143", + "\t50\t139\t143", + "\t49\t138\t143", + "\t48\t137\t143", + "\t47\t137\t143", + "\t46\t135\t143", + "\t45\t134\t143", + "\t44\t130\t143", + "\t43\t129\t142", + "\t42\t128\t142", + "\t41\t127\t141", + "\t40\t127\t141", + "\t39\t127\t141", + "\t38\t127\t141", + "\t37\t127\t140", + "\t36\t126\t139", + "\t35\t125\t138", + "\t34\t125\t137", + "\t33\t125\t133", + "\t32\t125\t131", + "\t31\t126\t130", + "Sea of Marmara", + "\t42\t26\t30", + "\t41\t25\t30", + "\t40\t25\t30", + "\t39\t25\t30", + "Sea of Okhotsk", + "\t60\t141\t156", + "\t59\t139\t156", + "\t58\t137\t157", + "\t57\t137\t157", + "\t56\t137\t157", + "\t55\t136\t157", + "\t54\t136\t157", + "\t53\t136\t157", + "\t52\t136\t139\t141\t158", + "\t51\t142\t158", + "\t50\t142\t158", + "\t49\t141\t157", + "\t48\t141\t156", + "\t47\t141\t155", + "\t46\t140\t154", + "\t45\t140\t153", + "\t44\t140\t151", + "\t43\t141\t149", + "\t42\t143\t148", + "Selat Bali", + "\t-7\t113\t116", + "\t-8\t113\t116", + "\t-9\t113\t116", + "Selat Dampier", + "\t1\t128\t132", + "\t0\t128\t132", + "\t-1\t128\t132", + "\t-2\t130\t132", + "Seno de Skyring", + "\t-51\t-74\t-70", + "\t-52\t-74\t-70", + "\t-53\t-74\t-70", + "\t-54\t-74\t-71", + "Seno Otway", + "\t-51\t-72\t-70", + "\t-52\t-73\t-70", + "\t-53\t-73\t-70", + "\t-54\t-73\t-70", + "Shark Bay", + "\t-23\t112\t114", + "\t-24\t112\t115", + "\t-25\t112\t115", + "\t-26\t112\t115", + "\t-27\t112\t115", + "Shelikhova Gulf", + "\t63\t162\t166", + "\t62\t155\t166", + "\t61\t153\t166", + "\t60\t153\t165", + "\t59\t153\t164", + "\t58\t153\t162", + "\t57\t154\t160", + "\t56\t155\t158", + "Sherman Basin", + "\t69\t-99\t-97", + "\t68\t-99\t-96", + "\t67\t-99\t-96", + "\t66\t-99\t-96", + "Sibuyan Sea", + "\t14\t120\t123", + "\t13\t120\t124", + "\t12\t120\t124", + "\t11\t120\t124", + "\t10\t120\t124", + "Skagerrak", + "\t60\t8\t12", + "\t59\t6\t12", + "\t58\t6\t12", + "\t57\t6\t12", + "\t56\t6\t11", + "\t55\t7\t9", + "Smith Sound", + "\t53\t-128\t-125", + "\t52\t-129\t-125", + "\t51\t-129\t-125", + "\t50\t-129\t-125", + "Sognefjorden", + "\t62\t3\t8", + "\t61\t3\t8", + "\t60\t3\t8", + "\t59\t4\t8", + "Solomon Sea", + "\t-3\t151\t155", + "\t-4\t146\t155", + "\t-5\t145\t157", + "\t-6\t145\t160", + "\t-7\t145\t161", + "\t-8\t146\t162", + "\t-9\t147\t163", + "\t-10\t147\t163", + "\t-11\t148\t163", + "\t-12\t152\t162", + "South China Sea", + "\t24\t112\t121", + "\t23\t112\t121", + "\t22\t109\t122", + "\t21\t108\t123", + "\t20\t108\t123", + "\t19\t107\t123", + "\t18\t105\t123", + "\t17\t105\t123", + "\t16\t105\t121", + "\t15\t106\t121", + "\t14\t107\t121", + "\t13\t108\t121", + "\t12\t107\t121", + "\t11\t104\t121", + "\t10\t104\t120", + "\t9\t103\t120", + "\t8\t102\t119", + "\t7\t101\t118", + "\t6\t101\t117", + "\t5\t101\t117", + "\t4\t101\t117", + "\t3\t102\t116", + "\t2\t102\t114", + "\t1\t101\t113", + "\t0\t101\t112", + "\t-1\t101\t111", + "\t-2\t103\t111", + "\t-3\t103\t111", + "\t-4\t105\t107", + "Southern Ocean", + "\t-59\t-180\t180", + "\t-60\t-180\t180", + "\t-61\t-180\t180", + "\t-62\t-180\t180", + "\t-63\t-180\t180", + "\t-64\t-180\t180", + "\t-65\t-180\t91\t103\t180", + "\t-66\t-180\t87\t110\t180", + "\t-67\t-180\t52\t54\t85\t112\t180", + "\t-68\t-180\t51\t54\t84\t112\t122\t141\t180", + "\t-69\t-180\t-69\t-66\t44\t74\t80\t145\t180", + "\t-70\t-180\t-73\t-64\t33\t154\t180", + "\t-71\t-180\t-79\t-63\t33\t159\t180", + "\t-72\t-180\t-85\t-62\t2\t24\t27\t161\t163\t166\t180", + "\t-73\t-100\t-91", + "St. Helena Bay", + "\t-30\t16\t19", + "\t-31\t16\t19", + "\t-32\t16\t19", + "\t-33\t16\t19", + "St. Lawrence River", + "\t51\t-67\t-63", + "\t50\t-69\t-63", + "\t49\t-72\t-63", + "\t48\t-72\t-63", + "\t47\t-74\t-66", + "\t46\t-75\t-68", + "\t45\t-75\t-69", + "\t44\t-75\t-72", + "Stettiner Haff", + "\t55\t12\t14", + "\t54\t12\t15", + "\t53\t12\t15", + "\t52\t12\t15", + "Storfjorden", + "\t79\t17\t22", + "\t78\t16\t22", + "\t77\t15\t22", + "\t76\t15\t22", + "\t75\t15\t19", + "Strait of Belle Isle", + "\t53\t-56\t-54", + "\t52\t-58\t-54", + "\t51\t-58\t-54", + "\t50\t-58\t-54", + "Strait of Georgia", + "\t51\t-126\t-122", + "\t50\t-126\t-121", + "\t49\t-126\t-121", + "\t48\t-126\t-121", + "\t47\t-124\t-121", + "Strait of Gibraltar", + "\t37\t-7\t-4", + "\t36\t-7\t-4", + "\t35\t-7\t-4", + "\t34\t-6\t-4", + "Strait of Juan de Fuca", + "\t49\t-125\t-121", + "\t48\t-125\t-121", + "\t47\t-125\t-121", + "Strait of Malacca", + "\t9\t97\t99", + "\t8\t97\t100", + "\t7\t95\t101", + "\t6\t94\t101", + "\t5\t94\t101", + "\t4\t94\t102", + "\t3\t96\t103", + "\t2\t97\t104", + "\t1\t98\t104", + "\t0\t99\t104", + "\t-1\t101\t104", + "Strait of Singapore", + "\t2\t102\t105", + "\t1\t102\t105", + "\t0\t102\t105", + "Straits of Florida", + "\t27\t-81\t-77", + "\t26\t-82\t-77", + "\t25\t-84\t-77", + "\t24\t-84\t-77", + "\t23\t-84\t-77", + "\t22\t-84\t-78", + "Sulu Sea", + "\t13\t118\t122", + "\t12\t118\t123", + "\t11\t118\t123", + "\t10\t117\t124", + "\t9\t116\t124", + "\t8\t115\t124", + "\t7\t115\t124", + "\t6\t115\t123", + "\t5\t115\t123", + "\t4\t116\t121", + "Sulzberger Bay", + "\t-75\t-153\t-144", + "\t-76\t-159\t-144", + "\t-77\t-159\t-144", + "\t-78\t-159\t-144", + "Surigao Strait", + "\t11\t124\t126", + "\t10\t124\t126", + "\t9\t124\t126", + "\t8\t124\t126", + "Taiwan Strait", + "\t26\t117\t122", + "\t25\t116\t122", + "\t24\t116\t122", + "\t23\t116\t121", + "\t22\t116\t121", + "Tasman Sea", + "\t-28\t152\t160", + "\t-29\t152\t160", + "\t-30\t151\t162", + "\t-31\t150\t166", + "\t-32\t150\t170", + "\t-33\t149\t174", + "\t-34\t149\t174", + "\t-35\t148\t175", + "\t-36\t148\t175", + "\t-37\t147\t175", + "\t-38\t146\t176", + "\t-39\t146\t176", + "\t-40\t146\t176", + "\t-41\t146\t176", + "\t-42\t145\t175", + "\t-43\t145\t172", + "\t-44\t145\t171", + "\t-45\t147\t169", + "\t-46\t150\t168", + "\t-47\t152\t168", + "\t-48\t155\t168", + "\t-49\t158\t167", + "\t-50\t160\t167", + "\t-51\t163\t167", + "Tatar Strait", + "\t54\t139\t142", + "\t53\t139\t142", + "\t52\t139\t142", + "\t51\t140\t142", + "\t50\t140\t142", + "Tayabas Bay", + "\t14\t119\t123", + "\t13\t119\t123", + "\t12\t119\t123", + "The North Western Passages", + "\t81\t-101\t-95", + "\t80\t-108\t-90\t-88\t-82", + "\t79\t-114\t-80", + "\t78\t-117\t-80", + "\t77\t-120\t-80", + "\t76\t-120\t-81", + "\t75\t-120\t-78", + "\t74\t-120\t-76", + "\t73\t-106\t-76", + "\t72\t-106\t-76", + "\t71\t-118\t-116\t-106\t-83", + "\t70\t-119\t-112\t-108\t-83", + "\t69\t-119\t-85", + "\t68\t-119\t-92", + "\t67\t-118\t-92", + "\t66\t-116\t-106\t-104\t-94", + "\t65\t-97\t-94", + "Timor Sea", + "\t-7\t125\t131", + "\t-8\t123\t131", + "\t-9\t121\t132", + "\t-10\t121\t133", + "\t-11\t121\t133", + "\t-12\t122\t133", + "\t-13\t124\t133", + "\t-14\t125\t131", + "Torres Strait", + "\t-8\t140\t144", + "\t-9\t140\t144", + "\t-10\t140\t144", + "\t-11\t141\t143", + "\t-12\t141\t143", + "Trondheimsfjorden", + "\t65\t10\t12", + "\t64\t7\t12", + "\t63\t7\t12", + "\t62\t7\t12", + "Tsugaru Strait", + "\t42\t139\t142", + "\t41\t139\t142", + "\t40\t139\t142", + "\t39\t139\t142", + "Tyrrhenian Sea", + "\t45\t8\t11", + "\t44\t8\t11", + "\t43\t8\t12", + "\t42\t8\t14", + "\t41\t8\t16", + "\t40\t8\t17", + "\t39\t7\t17", + "\t38\t7\t17", + "\t37\t7\t17", + "\t36\t10\t14", + "Uchiura Bay", + "\t43\t139\t144", + "\t42\t139\t144", + "\t41\t139\t144", + "\t40\t139\t143", + "\t39\t140\t142", + "Uda Bay", + "\t57\t136\t139", + "\t56\t134\t139", + "\t55\t134\t139", + "\t54\t134\t139", + "\t53\t134\t139", + "\t52\t135\t138", + "Ungava Bay", + "\t61\t-71\t-63", + "\t60\t-71\t-63", + "\t59\t-71\t-63", + "\t58\t-71\t-64", + "\t57\t-71\t-64", + "\t56\t-70\t-66", + "Uummannaq Fjord", + "\t73\t-54\t-52", + "\t72\t-55\t-50", + "\t71\t-55\t-49", + "\t70\t-55\t-49", + "\t69\t-55\t-49", + "Vestfjorden", + "\t69\t12\t18", + "\t68\t11\t18", + "\t67\t11\t18", + "\t66\t11\t17", + "\t65\t12\t14", + "Vil'kitskogo Strait", + "\t79\t99\t106", + "\t78\t99\t106", + "\t77\t99\t106", + "\t76\t99\t106", + "\t75\t99\t101", + "Vincennes Bay", + "\t-65\t103\t111", + "\t-66\t103\t111", + "\t-67\t103\t111", + "Visayan Sea", + "\t13\t122\t124", + "\t12\t121\t125", + "\t11\t121\t125", + "\t10\t121\t125", + "\t9\t121\t125", + "Viscount Melville Sound", + "\t76\t-110\t-103", + "\t75\t-115\t-103", + "\t74\t-116\t-103", + "\t73\t-116\t-103", + "\t72\t-116\t-104", + "\t71\t-114\t-107", + "Waddenzee", + "\t54\t3\t7", + "\t53\t3\t7", + "\t52\t3\t7", + "\t51\t3\t6", + "Wager Bay", + "\t66\t-92\t-86", + "\t65\t-92\t-86", + "\t64\t-92\t-86", + "Weddell Sea", + "\t-70\t-62\t-9", + "\t-71\t-63\t-9", + "\t-72\t-63\t-9", + "\t-73\t-64\t-10", + "\t-74\t-66\t-13", + "\t-75\t-78\t-14", + "\t-76\t-84\t-17", + "\t-77\t-84\t-25", + "\t-78\t-84\t-22", + "\t-79\t-84\t-22", + "\t-80\t-82\t-22", + "\t-81\t-79\t-23", + "\t-82\t-70\t-36", + "\t-83\t-66\t-50\t-48\t-42", + "\t-84\t-62\t-57", + "White Sea", + "\t69\t37\t45", + "\t68\t30\t33\t37\t45", + "\t67\t30\t45", + "\t66\t30\t45", + "\t65\t31\t45", + "\t64\t33\t41", + "\t63\t33\t41", + "\t62\t35\t38", + "Wrigley Gulf", + "\t-72\t-131\t-124", + "\t-73\t-135\t-123", + "\t-74\t-135\t-123", + "\t-75\t-135\t-123", + "Wynniat Bay", + "\t73\t-112\t-109", + "\t72\t-112\t-109", + "\t71\t-112\t-109", + "Yellow Sea", + "\t41\t123\t125", + "\t40\t120\t126", + "\t39\t120\t126", + "\t38\t119\t127", + "\t37\t119\t127", + "\t36\t118\t127", + "\t35\t118\t127", + "\t34\t118\t127", + "\t33\t118\t127", + "\t32\t119\t127", + "\t31\t119\t125", + "\t30\t120\t123", + "Yellowstone Lake", + "\t45\t-111\t-109", + "\t44\t-111\t-109", + "\t43\t-111\t-109", + "Yenisey Gulf", + "\t74\t77\t81", + "\t73\t77\t83", + "\t72\t77\t84", + "\t71\t77\t84", + "\t70\t79\t84", + "\t69\t81\t84", + "Yucatan Channel", + "\t23\t-86\t-84", + "\t22\t-88\t-83", + "\t21\t-88\t-83", + "\t20\t-88\t-83", + NULL +}; + diff --git a/api/sqnutils.h b/api/sqnutils.h index d0c01c53..51206637 100644 --- a/api/sqnutils.h +++ b/api/sqnutils.h @@ -29,7 +29,7 @@ * * Version Creation Date: 9/2/97 * -* $Revision: 6.483 $ +* $Revision: 6.559 $ * * File Description: * @@ -192,10 +192,14 @@ NLM_EXTERN Boolean CheckSeqLocForPartial (SeqLocPtr location, BoolPtr p5ptr, Boo NLM_EXTERN void SetSeqLocPartial (SeqLocPtr location, Boolean partial5, Boolean partial3); NLM_EXTERN void FreeAllFuzz (SeqLocPtr location); NLM_EXTERN Boolean LocationHasNullsBetween (SeqLocPtr location); +NLM_EXTERN void NormalizeNullsBetween (SeqLocPtr location); NLM_EXTERN ValNodePtr GetSeqLocPartialSet (SeqLocPtr location); NLM_EXTERN void SetSeqLocPartialSet (SeqLocPtr location, ValNodePtr vnp); NLM_EXTERN Boolean SeqLocBadSortOrder (BioseqPtr bsp, SeqLocPtr slp); NLM_EXTERN Boolean SeqLocMixedStrands (BioseqPtr bsp, SeqLocPtr slp); +/* Check/SetSeqLocPartialEx take lim argument - 3 is tr, 4 is tl */ +NLM_EXTERN Boolean CheckSeqLocForPartialEx (SeqLocPtr location, BoolPtr p5ptr, BoolPtr p3ptr, Int4Ptr limptr); +NLM_EXTERN void SetSeqLocPartialEx (SeqLocPtr location, Boolean partial5, Boolean partial3, Int4 lim); /* GetBioseqGivenSeqLoc returns a segmented bioseq if the SeqLoc is to the parts */ @@ -252,6 +256,7 @@ NLM_EXTERN void ReassignFeatureIDs (SeqEntryPtr sep); NLM_EXTERN void LinkCDSmRNAbyOverlap (SeqEntryPtr sep); NLM_EXTERN void LinkCDSmRNAbyProduct (SeqEntryPtr sep); NLM_EXTERN void LinkCDSmRNAbyLabel (SeqEntryPtr sep); +NLM_EXTERN void LinkCDSmRNAbyLabelAndLocation (SeqEntryPtr sep); NLM_EXTERN void StripFeatIDXrefAsnFilter (AsnIoPtr aip, AsnIoPtr aop); NLM_EXTERN void StripSeqDataGapAsnFilter (AsnIoPtr aip, AsnIoPtr aop); @@ -259,7 +264,7 @@ NLM_EXTERN void StripNewFeatMolInfoFieldsAsnFilter (AsnIoPtr aip, AsnIoPtr aop); NLM_EXTERN void StripPCRPrimerAsnFilter (AsnIoPtr aip, AsnIoPtr aop); NLM_EXTERN void StripOrgNamePgcodeAsnFilter (AsnIoPtr aip, AsnIoPtr aop); NLM_EXTERN void StripGeneRnaPcrAsnFilter (AsnIoPtr aip, AsnIoPtr aop); - +NLM_EXTERN void StripSeqFeatSupportAsnFilter (AsnIoPtr aip, AsnIoPtr aop); /* functions to parse [org=Drosophila melanogaster] and [gene=lacZ] from titles */ /* for example, passing "gene" to SqnTagFind returns "lacZ" */ @@ -566,11 +571,29 @@ NLM_EXTERN BioseqPtr ReadDeltaFastaWithEmptyDefline (FILE *fp, Uint2Ptr entityID feature table with ReadAsnFastaOrFlatFile) to stand-alone gene features or protein features and protein bioseqs. It processes ALL features in the list - you give it the FIRST sfp. */ -NLM_EXTERN void PromoteXrefs (SeqFeatPtr sfp, BioseqPtr bsp, Uint2 entityID); -NLM_EXTERN void PromoteXrefsEx (SeqFeatPtr sfp, BioseqPtr bsp, Uint2 entityID, Boolean include_stop, - Boolean remove_trailingX, Boolean gen_prod_set); -NLM_EXTERN void PromoteXrefsExEx (SeqFeatPtr sfp, BioseqPtr bsp, Uint2 entityID, Boolean include_stop, - Boolean remove_trailingX, Boolean gen_prod_set, Boolean force_local_id); +NLM_EXTERN void PromoteXrefs ( + SeqFeatPtr sfp, + BioseqPtr bsp, + Uint2 entityID +); +NLM_EXTERN void PromoteXrefsEx ( + SeqFeatPtr sfp, + BioseqPtr bsp, + Uint2 entityID, + Boolean include_stop, + Boolean remove_trailingX, + Boolean gen_prod_set +); +NLM_EXTERN void PromoteXrefsExEx ( + SeqFeatPtr sfp, + BioseqPtr bsp, + Uint2 entityID, + Boolean include_stop, + Boolean remove_trailingX, + Boolean gen_prod_set, + Boolean force_local_id, + BoolPtr seq_fetch_failP +); /* SetEmptyGeneticCodes imposes genetic code on all coding regions within a feature table */ @@ -620,15 +643,22 @@ NLM_EXTERN void CleanUpSeqFeat (SeqFeatPtr sfp, Boolean isEmblOrDdbj, Boolean is NLM_EXTERN void CleanUpSeqLoc (SeqLocPtr slp); +NLM_EXTERN void CleanupSubSourceOrgModOtherFeat (SeqFeatPtr sfp, Pointer userdata); +NLM_EXTERN void CleanupSubSourceOrgModOtherDesc (SeqDescrPtr sdp, Pointer userdata); + NLM_EXTERN void CleanUpPubdescAuthors (PubdescPtr pdp); NLM_EXTERN void CleanUpPubdescBody (PubdescPtr pdp, Boolean stripSerial); +NLM_EXTERN void CleanStructuredComment (UserObjectPtr uop); + NLM_EXTERN void SortSeqEntryQualifiers (SeqEntryPtr sep); /* BasicSeqAnnotCleanup is for cleaning up contents of separate named Seq-annot objects */ NLM_EXTERN void BasicSeqAnnotCleanup (SeqAnnotPtr sap); +NLM_EXTERN void RemoveUnnecessaryGeneXrefs (SeqFeatPtr sfp, Pointer userdata); + /* CautiousSeqEntryCleanup is a gradual consolidation and replacement of functions in SeriousSeqEntryCleanup, which does change the itemID structure, and is intended to be safe for a retrofit of the ID database */ @@ -766,10 +796,12 @@ typedef void (*VisitUserFieldsFunc) (UserFieldPtr ufp, Pointer userdata); NLM_EXTERN Int4 VisitUserFieldsInUfp (UserFieldPtr ufp, Pointer userdata, VisitUserFieldsFunc callback); NLM_EXTERN Int4 VisitUserFieldsInUop (UserObjectPtr uop, Pointer userdata, VisitUserFieldsFunc callback); -/* visits all sub UserObjects if the data type is 12 - needed to pack multiple user objects on a single feature */ +/* visits all sub UserObjects if the data type is 12 - needed to pack multiple user objects on a single feature. Does not visit user objects which contain other user objects. */ typedef void (*VisitUserObjectFunc) (UserObjectPtr uop, Pointer userdata); NLM_EXTERN Int4 VisitUserObjectsInUop (UserObjectPtr uop, Pointer userdata, VisitUserObjectFunc callback); +/* Visits all user objects, even if they contain other user objects */ +NLM_EXTERN Int4 VisitAllUserObjectsInUop (UserObjectPtr uop, Pointer userdata, VisitUserObjectFunc callback); /* explores sub UserObjects including "CombinedFeatureUserObjects" and finds by label */ @@ -914,6 +946,7 @@ NLM_EXTERN void FixNonWGSSets (ValNodePtr item_list, Pointer data, LogInfoPtr li /* structures and functions for the Discrepancy Report */ typedef void (*ClickableCallback) (ValNodePtr item_list, Pointer userdata); typedef void (*ClickableCallbackDataFree) (Pointer userdata); +typedef void (*AutofixCallback) (ValNodePtr item_list, Pointer userdata, LogInfoPtr lip); typedef struct clickableitem { @@ -927,6 +960,12 @@ typedef struct clickableitem ValNodePtr subcategories; Boolean expanded; Int4 level; + AutofixCallback autofix_func; /* note - autofix functions can be set for an + * entire category or for an individual clickable + * item. Don't set autofix functions in both + * places or they will both be called. + */ + Pointer autofix_data; /* data for item-specific autofixes */ } ClickableItemData, PNTR ClickableItemPtr; extern ClickableItemPtr @@ -935,6 +974,11 @@ NewClickableItem CharPtr description_fmt, ValNodePtr item_list); +extern ClickableItemPtr +NewClickableItemNoList +(Uint4 clickable_item_type, + CharPtr description); + extern ValNodePtr ClickableItemObjectListFree (ValNodePtr vnp); extern ValNodePtr ClickableItemObjectListCopy (ValNodePtr orig); extern ClickableItemPtr ClickableItemFree (ClickableItemPtr cip); @@ -982,6 +1026,8 @@ typedef enum { DISC_SHORT_CONTIG, DISC_INCONSISTENT_BIOSRC, DISC_SUSPECT_PRODUCT_NAME, + DISC_PRODUCT_NAME_TYPO, + DISC_PRODUCT_NAME_QUICKFIX, DISC_INCONSISTENT_BIOSRC_DEFLINE, DISC_PARTIAL_CDS_IN_COMPLETE_SEQUENCE, DISC_EC_NUMBER_ON_HYPOTHETICAL_PROTEIN, @@ -1080,6 +1126,7 @@ typedef enum { DISC_STRAIN_TAXNAME_MISMATCH, DISC_HUMAN_HOST, DISC_BAD_BACTERIAL_GENE_NAME, + TEST_BAD_GENE_NAME, ONCALLER_ORDERED_LOCATION, ONCALLER_COMMENT_PRESENT, ONCALLER_DEFLINE_ON_SET, @@ -1087,6 +1134,30 @@ typedef enum { SHORT_PROT_SEQUENCES, TEST_EXON_ON_MRNA, TEST_HAS_PROJECT_ID, + ONCALLER_HAS_STANDARD_NAME, + ONCALLER_MISSING_STRUCTURED_COMMENTS, + DISC_REQUIRED_STRAIN, + MISSING_GENOMEASSEMBLY_COMMENTS, + DISC_BACTERIAL_TAX_STRAIN_MISMATCH, + TEST_CDS_HAS_CDD_XREF, + TEST_UNUSUAL_NT, + TEST_LOW_QUALITY_REGION, + TEST_ORGANELLE_NOT_GENOMIC, + TEST_UNWANTED_SPACER, + TEST_ORGANELLE_PRODUCTS, + TEST_SP_NOT_UNCULTURED, + TEST_BAD_MRNA_QUAL, + TEST_UNNECESSARY_ENVIRONMENTAL, + TEST_UNNECESSARY_VIRUS_GENE, + TEST_UNWANTED_SET_WRAPPER, + TEST_MISSING_PRIMER, + TEST_UNUSUAL_MISC_RNA, + TEST_AMPLIFIED_PRIMERS_NO_ENVIRONMENTAL_SAMPLE, + TEST_DUP_GENES_OPPOSITE_STRANDS, + TEST_SMALL_GENOME_SET_PROBLEM, + TEST_OVERLAPPING_RRNAS, + TEST_MRNA_SEQUENCE_MINUS_STRAND_FEATURES, + TEST_TAXNAME_NOT_IN_DEFLINE, MAX_DISC_TYPE } DiscrepancyType; @@ -1124,6 +1195,7 @@ extern void DisableTRNATests (DiscrepancyConfigPtr dcp); extern CharPtr SetDiscrepancyReportTestsFromString (CharPtr list, Boolean enable, DiscrepancyConfigPtr dcp); extern void ConfigureForBigSequence (DiscrepancyConfigPtr dcp); extern void ConfigureForGenomes (DiscrepancyConfigPtr dcp); +extern void ConfigureForReportType (DiscrepancyConfigPtr dcp, EDiscrepancyReportType report_type); typedef void (*PerformDiscrepancyTest) PROTO ((ValNodePtr PNTR, ValNodePtr)); @@ -1316,12 +1388,14 @@ typedef struct barcodetestresults Boolean failed_tests[eBarcodeTest_LAST]; BioseqPtr bsp; FloatLo n_percent; + Int4 num_trace; } BarcodeTestResultsData, PNTR BarcodeTestResultsPtr; extern BarcodeTestResultsPtr BarcodeTestResultsNew (); extern BarcodeTestResultsPtr BarcodeTestResultsFree (BarcodeTestResultsPtr res); extern BarcodeTestResultsPtr BarcodeTestResultsCopy (BarcodeTestResultsPtr res); extern ValNodePtr BarcodeTestResultsListFree (ValNodePtr res_list); +extern ValNodePtr BarcodeTestResultsExtractPass (ValNodePtr PNTR res_list); extern Boolean IsBarcodeID (SeqIdPtr sip); @@ -1330,11 +1404,8 @@ extern CharPtr BarcodeTestGenbankIdString (BioseqPtr bsp); /* This one gets discrepancies by category */ extern ValNodePtr GetBarcodeDiscrepancies (ValNodePtr sep_list, BarcodeTestConfigPtr cfg); -/* This one lists accessions that fail */ -extern ValNodePtr GetBarcodeFailedAccessionList (SeqEntryPtr sep, BarcodeTestConfigPtr cfg); extern ValNodePtr GetBarcodePassFail (SeqEntryPtr sep, BarcodeTestConfigPtr cfg); NLM_EXTERN CharPtr GetBarcodeTestFailureReasons (BarcodeTestResultsPtr res); -NLM_EXTERN BarcodeTestResultsPtr BarcodeTestResultsForBioseq (BioseqPtr bsp, BarcodeTestConfigPtr cfg); /* This one lists passes and failures, with reasons for failures */ extern void WriteBarcodeTestComprehensive (FILE *fp, ValNodePtr results_list); extern void WriteBarcodeDiscrepancies (FILE *fp, ValNodePtr results_list); @@ -1342,7 +1413,6 @@ extern void WriteBarcodeFailureReport (FILE *fp, ValNodePtr results_list); extern void WriteBarcodeTestCompliance (FILE *fp, ValNodePtr results_list); extern void WriteBarcodeTestComplianceEx (FILE *fp, ValNodePtr results_list, Boolean low_trace_fail); extern void WriteBarcodeTagTable (FILE *fp, ValNodePtr results_list); -NLM_EXTERN Boolean HasLowTrace (BioseqPtr bsp); NLM_EXTERN Boolean IsIBOL (BioseqPtr bsp); NLM_EXTERN Boolean @@ -1361,9 +1431,11 @@ extern Boolean PassBarcodeTests (BarcodeTestResultsPtr res); extern Boolean HasBARCODETech (BioseqPtr bsp); NLM_EXTERN void ApplyBarcodeKeywordToBioseq (BioseqPtr bsp); NLM_EXTERN Boolean BioseqHasBarcodeKeyword (BioseqPtr bsp); -NLM_EXTERN ValNodePtr GetBarcodeLowTraceList (SeqEntryPtr sep); +NLM_EXTERN Boolean BioseqHasKeyword (BioseqPtr bsp, CharPtr keyword); NLM_EXTERN void RemoveBarcodeKeywordsFromObjectList (FILE *fp, ValNodePtr object_list); +NLM_EXTERN Boolean RemoveBarcodeTechFromBioseq (BioseqPtr bsp); extern Int4 CountPolymorphismsInBioseq (BioseqPtr bsp); +NLM_EXTERN Boolean RemoveBarcodeKeywordFromBioseq (BioseqPtr bsp); extern CharPtr ExpandDiscrepancyReportTestsFromString (CharPtr list, Boolean expand, DiscReportOutputConfigPtr dcp); @@ -1429,8 +1501,13 @@ FixCapitalizationInElement NLM_EXTERN void FixCapitalizationInAuthor (AuthorPtr pAuthor); NLM_EXTERN void FixCapsInPubAffil (AffilPtr affil); +NLM_EXTERN void FixCapsInPubAffilEx (AffilPtr affil, Boolean punct_only); +NLM_EXTERN void FixCapitalizationInCountryString (CharPtr PNTR pCountry); +NLM_EXTERN void FixCapitalizationInCountryStringEx (CharPtr PNTR pCountry, Boolean punct_only); +NLM_EXTERN void FixStateAbbreviationsInAffil (AffilPtr affil, LogInfoPtr lip); NLM_EXTERN void FixAffiliationShortWordsInElement (CharPtr PNTR pEl); +NLM_EXTERN void FixKnownAbbreviationsInElement (CharPtr PNTR pEl); NLM_EXTERN void FixAbbreviationsInElement (CharPtr PNTR pEl); NLM_EXTERN void FixOrgNamesInString (CharPtr str, ValNodePtr org_names); @@ -1537,6 +1614,8 @@ NLM_EXTERN void ConvertLocalIdsToBarcodeIds (SeqEntryPtr sep); NLM_EXTERN ValNodePtr MakeTokensFromLine (CharPtr line); NLM_EXTERN SeqFeatPtr GetGeneForFeature (SeqFeatPtr sfp); +NLM_EXTERN SeqFeatPtr GetmRNAforCDS (SeqFeatPtr cds); +NLM_EXTERN SeqFeatPtr GetCDSformRNA (SeqFeatPtr mrna); NLM_EXTERN Boolean IsStringInSpanInList (CharPtr str, CharPtr list); @@ -1563,6 +1642,7 @@ NLM_EXTERN CharPtr GetRemovableItemName (Int4 i); typedef enum { DEFLINE_USE_FEATURES = 1, DEFLINE_COMPLETE_SEQUENCE, + DEFLINE_PARTIAL_SEQUENCE, DEFLINE_COMPLETE_GENOME, DEFLINE_PARTIAL_GENOME, DEFLINE_SEQUENCE @@ -1578,6 +1658,7 @@ typedef struct deflinefeaturerequestlist { Boolean suppress_locus_tags; ValNodePtr suppressed_feature_list; Boolean use_ncrna_note; + Boolean suppress_allele; } DeflineFeatureRequestList, PNTR DeflineFeatureRequestListPtr; NLM_EXTERN void InitFeatureRequests (DeflineFeatureRequestListPtr feature_requests); @@ -1729,6 +1810,8 @@ typedef struct sourcequaldesc Uint1 subfield; } SourceQualDescData, PNTR SourceQualDescPtr; +NLM_EXTERN int LIBCALLBACK SortVnpBySourceQualDesc (VoidPtr ptr1, VoidPtr ptr2); + NLM_EXTERN void SetRequiredModifiers (ModifierItemLocalPtr modList); NLM_EXTERN void CountModifiers (ModifierItemLocalPtr ItemList, SeqEntryPtr sep); NLM_EXTERN ValNodePtr FindBestModifiersEx(SeqEntryPtr sep, ModifierItemLocalPtr ItemList, Boolean use_new); @@ -1762,6 +1845,14 @@ BuildDefinitionLinesFromFeatureClauseLists ValNodePtr modifier_indices, OrganismDescriptionModifiersPtr odmp); +NLM_EXTERN void +BuildDefLinesFromFeatClauseListsForOneBsp +(ValNodePtr list, + ModifierItemLocalPtr modList, + ValNodePtr modifier_indices, + OrganismDescriptionModifiersPtr odmp, + BioseqPtr bsp); + NLM_EXTERN void AutoDefForSeqEntry (SeqEntryPtr sep, @@ -1806,6 +1897,7 @@ NLM_EXTERN void AddModifierLabel CharPtr modifier_text); NLM_EXTERN Boolean LIBCALLBACK IsMobileElement (SeqFeatPtr sfp); NLM_EXTERN void RemoveNucProtSetTitles (SeqEntryPtr sep); +NLM_EXTERN void RemoveMRnaTitles (SeqEntryPtr sep); NLM_EXTERN void RemoveProteinTitles (SeqEntryPtr sep); NLM_EXTERN void SetAutoDefIDModifiers (ModifierItemLocalPtr modList); @@ -1813,19 +1905,36 @@ NLM_EXTERN void SetAutoDefIDModifiers (ModifierItemLocalPtr modList); NLM_EXTERN ValNodePtr ReadTabTableFromFile (FILE *fp); NLM_EXTERN ValNodePtr FlipTabTableAxes (ValNodePtr row_list); NLM_EXTERN ValNodePtr FreeTabTable (ValNodePtr row_list); +NLM_EXTERN ValNodePtr CopyTabTable (ValNodePtr row_list); NLM_EXTERN void WriteTabTableToFile (ValNodePtr table, FILE *fp); NLM_EXTERN ValNodePtr CountTabTableBlanks (ValNodePtr row_list); NLM_EXTERN ValNodePtr ScanTabTableForSpecialCharacters (ValNodePtr row_list); +NLM_EXTERN ValNodePtr AutoReplaceSpecialCharactersInText (CharPtr PNTR text); +NLM_EXTERN void AutoReplaceSpecialCharactersWithMessage (CharPtr PNTR text); +NLM_EXTERN ValNodePtr AutoReplaceSpecialCharactersInTabTable (ValNodePtr row_list); +NLM_EXTERN void AutoFixSpecialCharactersInEntity (Uint2 entityID); + NLM_EXTERN void RemoveQuotesFromTabTable (ValNodePtr row_list); NLM_EXTERN void ReparseTabTableConvertFirstSpaceToTab (ValNodePtr row_list); NLM_EXTERN void ReparseTabTableConvertMultiSpaceToTab (ValNodePtr row_list); NLM_EXTERN void CombineTabTableColumns (ValNodePtr row_list, ValNodePtr column_pos, CharPtr delimiter); +NLM_EXTERN void ReparseTabTableSeparateColumnAtDelimiter (ValNodePtr row_list, Char delimiter, Int4 col, Boolean stop_after_first); NLM_EXTERN void AddTextToTabTableColumn (ValNodePtr row_list, Int4 col, CharPtr text, Uint2 existing_text); NLM_EXTERN ValNodePtr ReadOneColumnList (CharPtr line); +NLM_EXTERN ValNodePtr SortTableRowByAnyColumn (ValNodePtr table, Int4 column); NLM_EXTERN void SpecialCharFindWithContext (CharPtr PNTR strp, Pointer userdata, BoolPtr did_find, BoolPtr did_change); NLM_EXTERN ValNodePtr FreeContextList (ValNodePtr context_list); +typedef struct twostringhash { + CharPtr PNTR table; + Int4 num_lines; +} TwoStringHashData, PNTR TwoStringHashPtr; + +NLM_EXTERN TwoStringHashPtr TwoStringHashFree (TwoStringHashPtr tsh); +NLM_EXTERN TwoStringHashPtr MakeTwoStringHashFromTabTable (ValNodePtr line_list, Int4 column1, Int4 column2); +NLM_EXTERN CharPtr GetValueFromTwoStringHash (CharPtr key, TwoStringHashPtr tsh); + NLM_EXTERN Int4 ExtendSeqLocToEnd (SeqLocPtr slp, BioseqPtr bsp, Boolean end5); /* functions for converting features */ @@ -1866,6 +1975,7 @@ NLM_EXTERN Boolean ConvertImpToImpFunc (SeqFeatPtr sfp, Uint2 featdef_to); NLM_EXTERN Boolean ConvertRegionToRNAFunc (SeqFeatPtr sfp, Uint2 featdef_to); NLM_EXTERN Boolean ConvertGeneToMiscFeatFunc (SeqFeatPtr sfp, Uint2 featdef_to); NLM_EXTERN Boolean ConvertProtToProtFunc (SeqFeatPtr sfp, Uint2 featdef_to); +NLM_EXTERN Boolean ConvertMiscFeatToGene (SeqFeatPtr sfp); NLM_EXTERN Boolean ConvertMiscFeatToCodingRegion (SeqFeatPtr sfp); NLM_EXTERN void ExtraCDSCreationActions (SeqFeatPtr cds, SeqEntryPtr parent_sep); NLM_EXTERN SeqFeatPtr GetProtFeature (BioseqPtr protbsp); @@ -1877,12 +1987,14 @@ NLM_EXTERN Boolean CodingRegionHasTranslExcept (SeqFeatPtr sfp); NLM_EXTERN SeqEntryPtr SequenceStringToSeqEntry (CharPtr str, SeqIdPtr sip, Uint1 mol_type); NLM_EXTERN void RevCompOneFeatForBioseq (SeqFeatPtr sfp, BioseqPtr bsp); +NLM_EXTERN void RevCompFeats (SeqEntryPtr sep, Pointer mydata, Int4 index, Int2 indent); NLM_EXTERN ValNodePtr SplitPubListFree (ValNodePtr list); NLM_EXTERN ValNodePtr MakeSplitPubListFromTabList (ValNodePtr PNTR tab_table, SeqEntryPtr sep, ValNodePtr PNTR err_list); NLM_EXTERN void SplitPubsByList (ValNodePtr split_list); /* for parsing collection dates */ NLM_EXTERN CharPtr ReformatDateStringEx (CharPtr orig_date, Boolean month_first, BoolPtr month_ambiguous); +NLM_EXTERN CharPtr ReformatDateWithMonthNames (CharPtr orig_date); NLM_EXTERN Int4 GetYearFromToken (CharPtr token, Int4 token_len); NLM_EXTERN Int4 ReadNumberFromToken (CharPtr token, Int4 token_len); NLM_EXTERN CharPtr GetMonthFromToken (CharPtr token, Int4 token_len); @@ -1890,6 +2002,7 @@ NLM_EXTERN Int4 GetMonthNumFromAbbrev (CharPtr month_abbrev); NLM_EXTERN CharPtr GetMonthAbbrev (Int4 n); NLM_EXTERN Int4 GetDaysInMonth (Int4 n); +NLM_EXTERN void CreateStructuredCommentsForAllFromTable (SeqEntryPtr sep, ValNodePtr header, ValNodePtr line, ValNodePtr PNTR err_list); NLM_EXTERN ValNodePtr CreateStructuredCommentsFromFile (FILE *fp, SeqEntryPtr sep, Boolean apply_to_all); NLM_EXTERN void AddDatabaseNameToStructuredComment (UserObjectPtr uop, CharPtr dbname); NLM_EXTERN ValNodePtr CreateStructuredCommentTableFromSeqEntry (SeqEntryPtr sep); @@ -1932,6 +2045,7 @@ NLM_EXTERN SeqFeatPtr FindBestProtein (Uint2 entityID, SeqLocPtr product); NLM_EXTERN void AddNonExtendableException (SeqFeatPtr sfp); NLM_EXTERN SeqLocPtr GetmRNALocationFromCDSLocation (SeqLocPtr slp, Uint2 entityID); NLM_EXTERN void AddmRNAForCDS (SeqFeatPtr sfp); +NLM_EXTERN Boolean ProductsMatchForRefSeq (CharPtr cds_str, CharPtr mrna_str); NLM_EXTERN SeqSubmitPtr FindSeqSubmitForSeqEntry (SeqEntryPtr sep); NLM_EXTERN Boolean CreateMatPeptideFromCDS (SeqFeatPtr sfp); NLM_EXTERN Boolean ConvertCDSToMatPeptideForOverlappingCDS (SeqFeatPtr sfp, SeqFeatPtr top_cds, Boolean remove_original); @@ -1954,9 +2068,9 @@ NLM_EXTERN void SegregateSetsByNumber (SeqEntryPtr sep, Int4 num_sets); NLM_EXTERN ValNodePtr PrepareSequenceListForSegregateByNumberPerSet (Int4 num_per_set, SeqEntryPtr sep); NLM_EXTERN void SegregateSetsByNumberPerSet (SeqEntryPtr sep, Int4 num_per_set); -NLM_EXTERN ValNodePtr CreateStructuredCommentsFromRow (ValNodePtr header, ValNodePtr values, CharPtr id_str, ValNodePtr PNTR err_list); +NLM_EXTERN void MoveSequencesFromSetToWrapper (ValNodePtr list, Uint2 entityID); -NLM_EXTERN CharPtr CompressSpaces (CharPtr str); +NLM_EXTERN ValNodePtr CreateStructuredCommentsFromRow (ValNodePtr header, ValNodePtr values, CharPtr id_str, ValNodePtr PNTR err_list); NLM_EXTERN void MergeAdjacentAnnotsInList (SeqAnnotPtr sap); @@ -1976,7 +2090,7 @@ NLM_EXTERN void ParseTaxNameToQuals (OrgRefPtr org, TextFsaPtr tags); NLM_EXTERN ValNodePtr GetLocusTagPrefixList (SeqEntryPtr sep); NLM_EXTERN Boolean IsProductNameOk (CharPtr product_name); -extern void FindSuspectProductNamesInNameList (FILE *input_file, FILE *output_file); +NLM_EXTERN Boolean ReportProductNameProblems (CharPtr product_name, FILE *output_file, CharPtr prefix); NLM_EXTERN SeqEntryPtr ReadFilteredAsn (FILE *fp, Boolean is_binary, CharPtr accn_list, Uint2Ptr entityIDptr); NLM_EXTERN void ReintegrateFilteredAsn (SeqEntryPtr sep, FILE *orig_file, FILE *output, Boolean is_binary); @@ -1985,6 +2099,8 @@ typedef struct descstream { SeqDescPtr orig; SeqDescPtr replace; SeqIdPtr owners; + SeqIdPtr last_owner; + Boolean on_all; CharPtr text; Int4 num_dependent; } DescStreamData, PNTR DescStreamPtr; @@ -1995,6 +2111,8 @@ NLM_EXTERN ValNodePtr DescStreamListFree (ValNodePtr vnp); NLM_EXTERN ValNodePtr StreamAsnForDescriptors (FILE *fp, Boolean is_binary, Boolean is_batch, Boolean is_submit, SeqIdPtr PNTR sip_list); NLM_EXTERN void WriteAsnWithReplacedDescriptors (ValNodePtr desc_stream_list, FILE *orig_file, FILE *output, Boolean is_binary, Boolean is_batch, Boolean is_submit); +NLM_EXTERN Boolean IdListsMatch (SeqIdPtr sip_list, ValNodePtr all_sip); +NLM_EXTERN void SetOnAllValsForDescStreamList (ValNodePtr desc_list, ValNodePtr all_sip); extern Boolean ParseCodeBreak (SeqFeatPtr sfp, CharPtr val, Int4 offset); @@ -2020,6 +2138,52 @@ FixCapitalizationInTitle NLM_EXTERN Int4 ConvertCommentsWithSpacesToStructuredCommentsForSeqEntry (SeqEntryPtr sep); +NLM_EXTERN void ParseExtractorResultsTableToFeatures (FILE *fp, SeqEntryPtr sep); + +#ifdef OS_MSWIN +NLM_EXTERN Int4 RunSilent(const char *cmdline); +#endif + + +NLM_EXTERN CharPtr ValNodeSeqIdName (ValNodePtr vnp); +NLM_EXTERN void ValNodeSeqIdFree (ValNodePtr vnp); +NLM_EXTERN ValNodePtr ValNodeSeqIdCopy (ValNodePtr vnp); +NLM_EXTERN Boolean ValNodeSeqIdMatch (ValNodePtr vnp1, ValNodePtr vnp2); +NLM_EXTERN ValNodePtr ValNodeSeqIdListFree (ValNodePtr list); +NLM_EXTERN ValNodePtr ValNodeSeqIdListCopy (ValNodePtr list); +NLM_EXTERN ValNodePtr SeqIdListToValNodeSeqIdList (SeqIdPtr sip_list); +NLM_EXTERN SeqIdPtr ValNodeSeqIdListToSeqIdList (ValNodePtr vnp_list); + +NLM_EXTERN void StringToLower (CharPtr str); + +NLM_EXTERN ValNodePtr FixupCountryQuals (SeqEntryPtr sep, Boolean fix_after_colon); +NLM_EXTERN Boolean FixupCountryQualsWithLog (SeqEntryPtr sep, Boolean fix_after_colon, FILE *log_fp); +NLM_EXTERN Boolean FixupMouseStrains (SeqEntryPtr sep, FILE *log_fp); + +NLM_EXTERN CharPtr StructuredCommentDbnameFromString (CharPtr string); +NLM_EXTERN ValNodePtr GetStructuredCommentPrefixList (void); +extern ValNodePtr GetSourceQualDescListEx (Boolean get_subsrc, Boolean get_orgmod, Boolean get_discouraged, Boolean get_discontinued, Boolean get_subfields); + +NLM_EXTERN Boolean RemoveCultureNotes (SeqEntryPtr sep); + +NLM_EXTERN AuthListPtr GetAuthorListForPub (PubPtr the_pub); + +NLM_EXTERN void FixProductWordCapitalization (CharPtr PNTR pProduct); +NLM_EXTERN Boolean FixSrcQualCaps (SeqEntryPtr sep, Int4 src_qual, FILE *log_fp); +NLM_EXTERN Boolean IsNCBIFileID (SeqIdPtr sip); + +NLM_EXTERN Boolean IsLocationOrganelle (Uint1 genome); + +NLM_EXTERN void RemoveFeatureLink (SeqFeatPtr sfp1, SeqFeatPtr sfp2); +NLM_EXTERN void LinkTwoFeatures (SeqFeatPtr dst, SeqFeatPtr sfp); +NLM_EXTERN void MakeFeatureXrefsFromProteinIdQuals (SeqEntryPtr sep); +NLM_EXTERN void MakeFeatureXrefsFromTranscriptIdQuals (SeqEntryPtr sep); +NLM_EXTERN void FinishHalfXrefs (SeqEntryPtr sep); +NLM_EXTERN void FlipCodonRecognizedInSeqEntry (SeqEntryPtr sep, LogInfoPtr lip); +NLM_EXTERN void RemoveBadCodonRecognizedInSeqEntry (SeqEntryPtr sep, LogInfoPtr lip); +NLM_EXTERN Uint1 GetAaFromtRNA (tRNAPtr trp); +NLM_EXTERN CharPtr GetCodesFortRNA (SeqFeatPtr sfp, Int2 *pCode); + #ifdef __cplusplus } diff --git a/api/subutil.c b/api/subutil.c index 572c23bf..2536d82f 100644 --- a/api/subutil.c +++ b/api/subutil.c @@ -29,7 +29,7 @@ * * Version Creation Date: 11/3/93 * -* $Revision: 6.87 $ +* $Revision: 6.93 $ * * File Description: Utilities for creating ASN.1 submissions * @@ -56,6 +56,7 @@ static char *this_file = __FILE__; #include #include #include +#include /***************************************************************************** * @@ -644,7 +645,8 @@ NLM_EXTERN SeqEntryPtr AddSeqOnlyToSubmission ( targetbssp = (BioseqSetPtr) tmp->data.ptrvalue; if (targetbssp->_class == 7 || (targetbssp->_class >= 13 && targetbssp->_class <= 16) || - targetbssp->_class == BioseqseqSet_class_wgs_set) { + targetbssp->_class == BioseqseqSet_class_wgs_set || + targetbssp->_class == BioseqseqSet_class_small_genome_set) { tmp = targetbssp->seq_set; } } @@ -886,7 +888,8 @@ NLM_EXTERN SeqEntryPtr AddSegmentedSeqToSubmission ( targetbssp = (BioseqSetPtr) tmp->data.ptrvalue; if (targetbssp->_class == 7 || (targetbssp->_class >= 13 && targetbssp->_class <= 16) || - targetbssp->_class == BioseqseqSet_class_wgs_set) { + targetbssp->_class == BioseqseqSet_class_wgs_set || + targetbssp->_class == BioseqseqSet_class_small_genome_set) { tmp = targetbssp->seq_set; } } @@ -1317,7 +1320,8 @@ NLM_EXTERN SeqEntryPtr AddNucProtToSubmission ( targetbssp = (BioseqSetPtr) tmp->data.ptrvalue; if (targetbssp->_class == 7 || (targetbssp->_class >= 13 && targetbssp->_class <= 16) || - targetbssp->_class == BioseqseqSet_class_wgs_set) { + targetbssp->_class == BioseqseqSet_class_wgs_set || + targetbssp->_class == BioseqseqSet_class_small_genome_set) { tmp = targetbssp->seq_set; } } @@ -1652,22 +1656,23 @@ NLM_EXTERN Boolean AddBasesToByteStore (ByteStorePtr bsp, CharPtr the_bases) Uint1 residue; Uint1Ptr dnaconv; CharPtr tmp; + Char ch; dnaconv = GetDNAConv(); buf = MemNew(StringLen(the_bases) + 1); bu = buf; for (tmp = the_bases; *tmp != '\0'; tmp++) { - *tmp = TO_UPPER(*tmp); - if (*tmp == 'U') *tmp = 'T'; - if (*tmp == 'X') *tmp = 'N'; - residue = dnaconv[*tmp]; + ch = TO_UPPER(*tmp); + if (ch == 'U') ch = 'T'; + if (ch == 'X') ch = 'N'; + residue = dnaconv[ch]; if (residue > 2) { *bu++ = residue; } else if (residue == 1 && IS_ALPHA(*tmp)) { *bu++ = 'N'; } else { - ErrPostEx(SEV_ERROR, 0,0, "Illegal character in Bioseq [%c]", *tmp); + ErrPostEx(SEV_ERROR, 0,0, "Illegal character in Bioseq [%c]", ch); } } BSWrite(bsp, buf, (Int4) (bu - buf)); @@ -1684,20 +1689,21 @@ NLM_EXTERN Boolean AddAAsToByteStore (ByteStorePtr bsp, CharPtr the_aas) Uint1 residue; Uint1Ptr aaconv; CharPtr tmp; + Char ch; aaconv = GetProteinConv(); buf = MemNew(StringLen(the_aas) + 1); bu = buf; for (tmp = the_aas; *tmp != '\0'; tmp++) { - *tmp = TO_UPPER(*tmp); - residue = aaconv[*tmp]; + ch = TO_UPPER(*tmp); + residue = aaconv[ch]; if (residue > 2) { *bu++ = residue; - } else if (residue == 1 && IS_ALPHA(*tmp)) { + } else if (residue == 1 && IS_ALPHA(ch)) { *bu++ = 'X'; } else { - ErrPostEx(SEV_ERROR, 0,0, "Illegal character in Bioseq [%c]", *tmp); + ErrPostEx(SEV_ERROR, 0,0, "Illegal character in Bioseq [%c]", ch); } } @@ -5811,3 +5817,205 @@ NLM_EXTERN void RemoveAllSeqAnnotCleanupUserObjs ( } } + +static void GetNcbiAutofixDescr(SeqDescrPtr sdp, Pointer data) +{ + UserObjectPtr uop; + UserObjectPtr PNTR p_uop; + + if (sdp != NULL + && sdp->choice == Seq_descr_user + && (uop = (UserObjectPtr)sdp->data.ptrvalue) != NULL + && uop->type != NULL + && StringICmp (uop->type->str, "NcbiAutofix") == 0 + && (p_uop = (UserObjectPtr PNTR) data) != NULL) { + *p_uop = uop; + } +} + + +NLM_EXTERN UserObjectPtr FindNcbiAutofixUserObject ( + SeqEntryPtr sep +) + +{ + UserObjectPtr uop = NULL; + + if (sep == NULL) return NULL; + + VisitDescriptorsInSep (sep, (Pointer) &uop, GetNcbiAutofixDescr); + + return uop; +} + + +NLM_EXTERN void AddNcbiAutofixUserObject ( + SeqEntryPtr sep +) + +{ + SeqDescrPtr sdp; + UserObjectPtr uop; + + sdp = CreateNewDescriptor(sep, Seq_descr_user); + uop = UserObjectNew (); + uop->type = ObjectIdNew(); + uop->type->str = StringSave ("NcbiAutofix"); + sdp->data.ptrvalue = uop; +} + + +static void RemoveNcbiAutofixDescr(SeqDescrPtr sdp, Pointer data) +{ + UserObjectPtr uop; + ObjValNodePtr ovp; + + if (sdp != NULL + && sdp->choice == Seq_descr_user + && (uop = (UserObjectPtr)sdp->data.ptrvalue) != NULL + && uop->type != NULL + && StringICmp (uop->type->str, "NcbiAutofix") == 0 + && sdp->extended != 0) { + ovp = (ObjValNodePtr) sdp; + ovp->idx.deleteme = TRUE; + } +} + + +NLM_EXTERN void RemoveNcbiAutofixUserObjects ( + SeqEntryPtr sep +) + +{ + if (sep == NULL) return; + + VisitDescriptorsInSep (sep, (Pointer) NULL, RemoveNcbiAutofixDescr); + DeleteMarkedObjects (0, OBJ_SEQENTRY, (Pointer) sep); +} + +NLM_EXTERN UserObjectPtr CreateUnverifiedUserObject ( + void +) + +{ + ObjectIdPtr oip; + UserObjectPtr uop; + + uop = UserObjectNew (); + oip = ObjectIdNew (); + oip->str = StringSave ("Unverified"); + uop->type = oip; + + return uop; +} + +static void GetUnverifiedDescr(SeqDescrPtr sdp, Pointer data) +{ + UserObjectPtr uop; + UserObjectPtr PNTR p_uop; + + if (sdp != NULL + && sdp->choice == Seq_descr_user + && (uop = (UserObjectPtr)sdp->data.ptrvalue) != NULL + && IsUnverifiedUserObject(uop) + && (p_uop = (UserObjectPtr PNTR) data) != NULL) { + *p_uop = uop; + } +} + + +NLM_EXTERN UserObjectPtr FindUnverifiedUserObject ( + SeqEntryPtr sep +) + +{ + UserObjectPtr uop = NULL; + + if (sep == NULL) return NULL; + + VisitDescriptorsInSep (sep, (Pointer) &uop, GetUnverifiedDescr); + + return uop; +} + + +NLM_EXTERN void AddUnverifiedUserObject ( + SeqEntryPtr sep +) + +{ + SeqDescrPtr sdp; + UserObjectPtr uop; + + sdp = CreateNewDescriptor(sep, Seq_descr_user); + uop = UserObjectNew (); + uop->type = ObjectIdNew(); + uop->type->str = StringSave ("Unverified"); + sdp->data.ptrvalue = uop; +} + + +NLM_EXTERN void AddUnverifiedUserObjectToBioseq ( + BioseqPtr bsp +) + +{ + SeqDescPtr sdp; + SeqMgrDescContext context; + Boolean found = FALSE; + + if (bsp == NULL || ISA_aa(bsp->mol)) { + return; + } + for (sdp = SeqMgrGetNextDescriptor (bsp, NULL, Seq_descr_user, &context); + sdp != NULL && !found; + sdp = SeqMgrGetNextDescriptor (bsp, sdp, Seq_descr_user, &context)) { + if (IsUnverifiedUserObject(sdp->data.ptrvalue)) { + found = TRUE; + } + } + if (!found) { + sdp = CreateNewDescriptorOnBioseq (bsp, Seq_descr_user); + sdp->data.ptrvalue = CreateUnverifiedUserObject(); + } +} + + +static void RemoveUnverifiedDescr(SeqDescrPtr sdp, Pointer data) +{ + UserObjectPtr uop; + ObjValNodePtr ovp; + + if (sdp != NULL + && sdp->choice == Seq_descr_user + && (uop = (UserObjectPtr)sdp->data.ptrvalue) != NULL + && uop->type != NULL + && StringICmp (uop->type->str, "Unverified") == 0 + && sdp->extended != 0) { + ovp = (ObjValNodePtr) sdp; + ovp->idx.deleteme = TRUE; + } +} + + +NLM_EXTERN void RemoveUnverifiedUserObjects ( + SeqEntryPtr sep +) + +{ + if (sep == NULL) return; + + VisitDescriptorsInSep (sep, (Pointer) NULL, RemoveUnverifiedDescr); + DeleteMarkedObjects (0, OBJ_SEQENTRY, (Pointer) sep); +} + + +NLM_EXTERN Boolean IsUnverifiedUserObject (UserObjectPtr uop) +{ + if (uop == NULL || uop->type == NULL || StringICmp (uop->type->str, "Unverified") != 0) { + return FALSE; + } else { + return TRUE; + } +} + diff --git a/api/subutil.h b/api/subutil.h index 7dbad98e..07d08126 100644 --- a/api/subutil.h +++ b/api/subutil.h @@ -31,7 +31,7 @@ * * Version Creation Date: 11/3/93 * -* $Revision: 6.78 $ +* $Revision: 6.83 $ * * File Description: Utilities for creating ASN.1 submissions * @@ -692,6 +692,8 @@ NLM_EXTERN Boolean AddGenomeToEntry ( #define SUBSRC_mating_type 38 #define SUBSRC_linkage_group 39 #define SUBSRC_haplogroup 40 +#define SUBSRC_whole_replicon 41 +#define SUBSRC_phenotype 42 #define SUBSRC_other 255 /********************************************* @@ -739,6 +741,8 @@ NLM_EXTERN Boolean AddGenomeToEntry ( mating-type (38) , linkage-group (39) , haplogroup (40) , + whole-replicon (41) , + phenotype (42) , other (255) } , * value is an optional string to give the name (eg. of the @@ -1623,6 +1627,44 @@ NLM_EXTERN void RemoveAllSeqAnnotCleanupUserObjs ( SeqAnnotPtr sap ); +NLM_EXTERN UserObjectPtr FindNcbiAutofixUserObject ( + SeqEntryPtr sep +); + +NLM_EXTERN void AddNcbiAutofixUserObject ( + SeqEntryPtr sep +); + +NLM_EXTERN void RemoveNcbiAutofixUserObjects ( + SeqEntryPtr sep +); + +/* Mark unverified sequences */ + +NLM_EXTERN UserObjectPtr CreateUnverifiedUserObject ( + void +); + +NLM_EXTERN UserObjectPtr FindUnverifiedUserObject ( + SeqEntryPtr sep +); + +NLM_EXTERN void AddUnverifiedUserObject ( + SeqEntryPtr sep +); + +NLM_EXTERN void AddUnverifiedUserObjectToBioseq ( + BioseqPtr bsp +); + +NLM_EXTERN void RemoveUnverifiedUserObjects ( + SeqEntryPtr sep +); + +NLM_EXTERN Boolean IsUnverifiedUserObject ( + UserObjectPtr uop +); + #ifdef __cplusplus } diff --git a/api/tofasta.c b/api/tofasta.c index 58797c41..38c1280e 100644 --- a/api/tofasta.c +++ b/api/tofasta.c @@ -29,7 +29,7 @@ * * Version Creation Date: 7/12/91 * -* $Revision: 6.219 $ +* $Revision: 6.230 $ * * File Description: various sequence objects to fasta output * @@ -876,6 +876,7 @@ static SeqIdPtr ChooseFastaID (BioseqPtr bsp, Boolean allow_mult) static Int4 BioseqFastaStreamInternal ( BioseqPtr bsp, SeqLocPtr slp, + SeqLitPtr lit, CharPtr str, FILE *fp, ByteStorePtr bs, @@ -890,16 +891,22 @@ static Int4 BioseqFastaStreamInternal ( ) { + Char acc [41]; + SeqIdPtr accn = NULL; Char buf [4096]; - Char ch; + Char ch, ch1, ch2, ch3; Int4 count = 0; + Int4 gi = -1; + SeqIdPtr gpp = NULL; Char id [128]; + Uint1 id_format = PRINTID_FASTA_LONG; + CharPtr ptr; StreamFsa sf; SeqIdPtr sip = NULL; Char spn [64]; CharPtr tmp; - if (bsp == NULL && slp == NULL && str == NULL) return 0; + if (bsp == NULL && slp == NULL && lit == NULL && str == NULL) return 0; if (fp == NULL && bs == NULL) return 0; if (bsp != NULL && bsp->repr == Seq_repr_virtual) return 0; if (linelen > 128) { @@ -920,6 +927,7 @@ static Int4 BioseqFastaStreamInternal ( if (grouplen < 1) { grouplen = 0; } + acc [0] = '\0'; MemSet ((Pointer) &sf, 0, sizeof (StreamFsa)); sf.fp = fp; sf.bs = bs; @@ -932,15 +940,108 @@ static Int4 BioseqFastaStreamInternal ( sf.grouplen = grouplen; sf.skip = skip; sf.gi = 0; - if (bsp != NULL) { - for (sip = bsp->id; sip != NULL; sip = sip->next) { - if (sip->choice != SEQID_GI) continue; - sf.gi = sip->data.intvalue; - } - } sf.start = 0; sf.seqpos = 0; sf.seqspans = (Boolean) ((flags & STREAM_HTML_SPANS) != 0); + if (sf.seqspans) { + if (bsp != NULL) { + for (sip = bsp->id; sip != NULL; sip = sip->next) { + switch (sip->choice) { + case SEQID_GI : + gi = sip->data.intvalue; + break; + case SEQID_GENBANK : + case SEQID_EMBL : + case SEQID_DDBJ : + case SEQID_OTHER : + accn = sip; + break; + case SEQID_PIR : + case SEQID_SWISSPROT : + case SEQID_PRF : + case SEQID_PDB : + accn = sip; + break; + case SEQID_TPG : + case SEQID_TPE : + case SEQID_TPD : + accn = sip; + break; + case SEQID_GPIPE : + /* should not override better accession */ + gpp = sip; + break; + default : + break; + } + } + } else if (slp != NULL) { + /* PUBSEQ_OS will send a SeqInt with a chain of Seq-ids */ + for (sip = SeqLocId (slp); sip != NULL; sip = sip->next) { + switch (sip->choice) { + case SEQID_GI : + gi = sip->data.intvalue; + break; + case SEQID_GENBANK : + case SEQID_EMBL : + case SEQID_DDBJ : + case SEQID_OTHER : + accn = sip; + break; + case SEQID_PIR : + case SEQID_SWISSPROT : + case SEQID_PRF : + case SEQID_PDB : + accn = sip; + break; + case SEQID_TPG : + case SEQID_TPE : + case SEQID_TPD : + accn = sip; + break; + case SEQID_GPIPE : + /* should not override better accession */ + gpp = sip; + break; + default : + break; + } + } + if (sip != NULL && sip->choice == SEQID_GI) { + sf.gi = sip->data.intvalue; + } + } + if (gi > 0) { + sf.gi = gi; + } + if (accn == NULL) { + accn = gpp; + } + if (accn != NULL) { + SeqIdWrite (accn, acc, PRINTID_TEXTID_ACC_ONLY, sizeof (acc) - 1); + + if (accn->choice == SEQID_PDB) { + ptr = StringChr (acc, '_'); + if (ptr != NULL) { + ch1 = ptr [1]; + if (ch1 != '\0') { + ch2 = ptr [2]; + if (ch2 != '\0') { + ch3 = ptr [3]; + if (ch3 == '\0') { + if (ch1 == ch2) { + if (IS_UPPER (ch1)) { + ptr [1] = TO_LOWER (ch1); + ptr [2] = '\0'; + } + } + } + } + } + } + } + } + } if (do_defline) { id [0] = '\0'; if (substitute_ids) { @@ -948,7 +1049,10 @@ static Int4 BioseqFastaStreamInternal ( } else if (bsp != NULL) { sip = bsp->id; } - SeqIdWrite (sip, id, PRINTID_FASTA_LONG, sizeof (id) - 1); + if ((flags & STREAM_ALL_FASTA_IDS) != 0) { + id_format = PRINTID_FASTA_ALL; + } + SeqIdWrite (sip, id, id_format, sizeof (id) - 1); /* no longer need to do feature indexing if title not present to speed up creation */ /* sdp = GetNextDescriptorUnindexed (bsp, Seq_descr_title, NULL); @@ -984,6 +1088,8 @@ static Int4 BioseqFastaStreamInternal ( count = SeqPortStream (bsp, flags, (Pointer) &sf, FsaStreamProc); } else if (slp != NULL) { count = SeqPortStreamLoc (slp, flags, (Pointer) &sf, FsaStreamProc); + } else if (lit != NULL) { + count = SeqPortStreamLit (lit, flags, (Pointer) &sf, FsaStreamProc); } else if (str != NULL) { count = StringLen (str); FsaStreamProc (str, (Pointer) &sf); @@ -1007,6 +1113,12 @@ static Int4 BioseqFastaStreamInternal ( fprintf (sf.fp, ""); } fprintf (sf.fp, "\n"); + if (sf.seqspans) { + fprintf (sf.fp, "\n"); + } } else if (sf.bs != NULL) { if (sf.seqspans) { sprintf (spn, "", (long) sf.gi, (long) (sf.start + 1)); @@ -1017,6 +1129,16 @@ static Int4 BioseqFastaStreamInternal ( BSWrite (sf.bs, "", sizeof ("")); } BSWrite (sf.bs, "\n", sizeof ("\n")); + if (sf.seqspans) { + sprintf (spn, "\n"); + BSWrite (sf.bs, spn, StringLen (spn)); + } } } return count; @@ -1033,7 +1155,7 @@ NLM_EXTERN Int4 BioseqFastaStream ( ) { - return BioseqFastaStreamInternal (bsp, NULL, NULL, fp, NULL, flags, + return BioseqFastaStreamInternal (bsp, NULL, NULL, NULL, fp, NULL, flags, linelen, blocklen, grouplen, do_defline, FALSE, FALSE, 0); } @@ -1051,7 +1173,7 @@ NLM_EXTERN Int4 BioseqFastaStreamEx ( ) { - return BioseqFastaStreamInternal (bsp, NULL, NULL, fp, NULL, flags, + return BioseqFastaStreamInternal (bsp, NULL, NULL, NULL, fp, NULL, flags, linelen, blocklen, grouplen, do_defline, substitute_ids, sorted_protein, 0); } @@ -1067,7 +1189,7 @@ NLM_EXTERN Int4 BioseqFastaMemStream ( ) { - return BioseqFastaStreamInternal (bsp, NULL, NULL, NULL, bs, flags, + return BioseqFastaStreamInternal (bsp, NULL, NULL, NULL, NULL, bs, flags, linelen, blocklen, grouplen, do_defline, FALSE, FALSE, 0); } @@ -1084,7 +1206,24 @@ NLM_EXTERN Int4 SeqLocFastaStream ( { if (slp == NULL || fp == NULL) return 0; - return BioseqFastaStreamInternal (NULL, slp, NULL, fp, NULL, flags, + return BioseqFastaStreamInternal (NULL, slp, NULL, NULL, fp, NULL, flags, + linelen, blocklen, grouplen, + FALSE, FALSE, FALSE, 0); +} + +NLM_EXTERN Int4 SeqLitFastaStream ( + SeqLitPtr lit, + FILE *fp, + StreamFlgType flags, + Int2 linelen, + Int2 blocklen, + Int2 grouplen +) + +{ + if (lit == NULL || fp == NULL) return 0; + + return BioseqFastaStreamInternal (NULL, NULL, lit, NULL, fp, NULL, flags, linelen, blocklen, grouplen, FALSE, FALSE, FALSE, 0); } @@ -1275,7 +1414,7 @@ NLM_EXTERN Int4 CdRegionFastaStream ( skip = 2; } - return BioseqFastaStreamInternal (NULL, sfp->location, NULL, fp, NULL, flags, + return BioseqFastaStreamInternal (NULL, sfp->location, NULL, NULL, fp, NULL, flags, linelen, blocklen, grouplen, FALSE, FALSE, FALSE, skip); } @@ -1330,7 +1469,7 @@ NLM_EXTERN Int4 TranslationFastaStream ( } } - count = BioseqFastaStreamInternal (NULL, NULL, str, fp, NULL, flags, + count = BioseqFastaStreamInternal (NULL, NULL, NULL, str, fp, NULL, flags, linelen, blocklen, grouplen, FALSE, FALSE, FALSE, 0); @@ -1339,6 +1478,153 @@ NLM_EXTERN Int4 TranslationFastaStream ( return count; } +static void DoGeneDefline ( + SeqFeatPtr sfp, + FILE *fp, + GeneRefPtr grp, + CharPtr idSuffix +) + +{ + BioseqPtr bsp = NULL; + Char buf [512]; + Boolean do_defline = TRUE; + Uint2 entityID; + SeqMgrFeatContext genecontext; + IntAsn2gbJob iaj; + Boolean partial5; + Boolean partial3; + SeqIdPtr sip; + CharPtr str; + Char tmp [64]; + + if (sfp == NULL || fp == NULL || grp == NULL) return; + if (sfp == NULL || fp == NULL || sfp->data.choice != SEQFEAT_GENE) return; + grp = (GeneRefPtr) sfp->data.value.ptrvalue; + if (grp == NULL) return; + + if (do_defline) { + bsp = BioseqFindFromSeqLoc (sfp->location); + if (bsp == NULL) { + do_defline = FALSE; + StringCpy (buf, "lcl|"); + sip = SeqLocId (sfp->location); + if (sip != NULL) { + SeqIdWrite (sip, tmp, PRINTID_TEXTID_ACC_VER, sizeof (tmp) - 1); + StringCat (buf, tmp); + } + if (StringDoesHaveText (idSuffix) && StringLen (idSuffix) < 200) { + StringCat (buf, idSuffix); + } + FastaFileFunc (bsp, FASTA_ID, buf, sizeof (buf), (Pointer) fp); + StringCpy (buf, "?"); + FastaFileFunc (bsp, FASTA_DEFLINE, buf, sizeof (buf), (Pointer) fp); + fflush (fp); + } + } + + if (do_defline && bsp != NULL) { + if (sfp != SeqMgrGetDesiredFeature (0, bsp, 0, 0, sfp, &genecontext)) { + do_defline = FALSE; + StringCpy (buf, "lcl|"); + sip = SeqIdFindWorst (bsp->id); + if (sip != NULL) { + SeqIdWrite (sip, tmp, PRINTID_TEXTID_ACC_VER, sizeof (tmp) - 1); + StringCat (buf, tmp); + } + if (StringDoesHaveText (idSuffix) && StringLen (idSuffix) < 200) { + StringCat (buf, idSuffix); + } + FastaFileFunc (bsp, FASTA_ID, buf, sizeof (buf), (Pointer) fp); + StringCpy (buf, "?"); + FastaFileFunc (bsp, FASTA_DEFLINE, buf, sizeof (buf), (Pointer) fp); + fflush (fp); + } + } + + if (do_defline) { + entityID = ObjMgrGetEntityIDForPointer (bsp); + if (SeqMgrFeaturesAreIndexed (entityID) == 0) { + SeqMgrIndexFeatures (entityID, NULL); + } + + CheckSeqLocForPartial (sfp->location, &partial5, &partial3); + + MemSet ((Pointer) &iaj, 0, sizeof (IntAsn2gbJob)); + iaj.flags.iupacaaOnly = FALSE; + iaj.relModeError = FALSE; + + StringCpy (buf, "lcl|"); + sip = SeqIdFindWorst (bsp->id); + if (sip != NULL) { + SeqIdWrite (sip, tmp, PRINTID_TEXTID_ACC_VER, sizeof (tmp) - 1); + StringCat (buf, tmp); + } + if (StringDoesHaveText (idSuffix) && StringLen (idSuffix) < 200) { + StringCat (buf, idSuffix); + } + + FastaFileFunc (bsp, FASTA_ID, buf, sizeof (buf), (Pointer) fp); + + buf [0] = '\0'; + if (StringDoesHaveText (grp->locus)) { + StringCat (buf, "[gene="); + StringCat (buf, grp->locus); + StringCat (buf, "] "); + } + if (StringDoesHaveText (grp->locus_tag)) { + StringCat (buf, "[locus_tag="); + StringCat (buf, grp->locus_tag); + StringCat (buf, "] "); + } + if (StringLen (buf) == 0 && StringDoesHaveText (genecontext.label)) { + StringCat (buf, "[gene="); + StringCat (buf, genecontext.label); + StringCat (buf, "] "); + } + str = FFFlatLoc (&iaj, bsp, sfp->location, FALSE, FALSE); + if (str != NULL && StringLen (str) + StringLen (buf) < sizeof (buf) - 10) { + StringCat (buf, "[location="); + StringCat (buf, str); + StringCat (buf, "] "); + MemFree (str); + } + TrimSpacesAroundString (buf); + + FastaFileFunc (bsp, FASTA_DEFLINE, buf, sizeof (buf), (Pointer) fp); + + fflush (fp); + } +} + +NLM_EXTERN Int4 GeneFastaStream ( + SeqFeatPtr sfp, + FILE *fp, + StreamFlgType flags, + Int2 linelen, + Int2 blocklen, + Int2 grouplen, + Boolean do_defline, + CharPtr idSuffix +) + +{ + GeneRefPtr grp; + + if (sfp == NULL || sfp->data.choice != SEQFEAT_GENE) return 0; + if (fp == NULL) return 0; + grp = (GeneRefPtr) sfp->data.value.ptrvalue; + if (grp == NULL) return 0; + + if (do_defline) { + DoGeneDefline (sfp, fp, grp, idSuffix); + } + + return BioseqFastaStreamInternal (NULL, sfp->location, NULL, NULL, fp, NULL, flags, + linelen, blocklen, grouplen, + FALSE, FALSE, FALSE, 0); +} + /***************************************************************************** * * SeqEntryFastaStream (bsp, fp, flags, linelen, blocklen, grouplen, @@ -5109,6 +5395,7 @@ typedef struct deflinestruct { /* subsource fields */ CharPtr m_chromosome; CharPtr m_clone; + Boolean m_has_clone; CharPtr m_map; CharPtr m_plasmid; CharPtr m_segment; @@ -5117,6 +5404,9 @@ typedef struct deflinestruct { CharPtr m_isolate; CharPtr m_strain; + /* user object fields */ + Boolean m_is_unverified; + /* exception fields */ TextFsaPtr m_low_quality_fsa; } DefLineData, PNTR DefLinePtr; @@ -5176,6 +5466,7 @@ static void x_SetFlags ( SeqIdPtr sip; CharPtr str; TextSeqIdPtr tsip; + UserObjectPtr uop; ValNodePtr vnp; if (dlp == NULL) return; @@ -5307,6 +5598,19 @@ static void x_SetFlags ( } } + /* process Unverified user object */ + for (sdp = GetNextDescriptorUnindexed (bsp, Seq_descr_user, NULL); + sdp != NULL; + sdp = GetNextDescriptorUnindexed (bsp, Seq_descr_user, sdp)) { + if (sdp->choice != Seq_descr_user) continue; + uop = (UserObjectPtr) sdp->data.ptrvalue; + if (uop == NULL) continue; + oip = uop->type; + if (oip == NULL) continue; + if (StringICmp (oip->str, "Unverified") != 0) continue; + dlp->m_is_unverified = TRUE; + } + if (dlp->m_htg_tech || dlp->m_third_party) { /* process keywords */ keywords = NULL; @@ -5365,6 +5669,31 @@ static void x_SetFlags ( } /* set instance variables from BioSource */ +static void x_SetSrcClone ( + SeqFeatPtr sfp, + Pointer userdata +) + +{ + BioSourcePtr biop; + DefLinePtr dlp; + SubSourcePtr ssp; + + if (sfp == NULL || sfp->data.choice != SEQFEAT_BIOSRC) return; + dlp = (DefLinePtr) userdata; + if (dlp == NULL) return; + + biop = (BioSourcePtr) sfp->data.value.ptrvalue; + if (biop == NULL) return; + + /* look for clones on source features */ + for (ssp = biop->subtype; ssp != NULL; ssp = ssp->next) { + if (StringHasNoText (ssp->name)) continue; + if (ssp->subtype != SUBSRC_clone) continue; + dlp->m_has_clone = TRUE; + } +} + static void x_SetBioSrc ( DefLinePtr dlp ) @@ -5404,6 +5733,7 @@ static void x_SetBioSrc ( break; case SUBSRC_clone : dlp->m_clone = ssp->name; + dlp->m_has_clone = TRUE; break; case SUBSRC_map : dlp->m_map = ssp->name; @@ -5444,6 +5774,10 @@ static void x_SetBioSrc ( } } } + + if (dlp->m_has_clone) return; + + VisitFeaturesOnBsp (bsp, (Pointer) dlp, x_SetSrcClone); } static CharPtr x_TrimFirstNCharacters ( @@ -5593,6 +5927,11 @@ static CharPtr x_DescribeClones ( if (dlp == NULL) return NULL; + if (dlp->m_htgs_unfinished && dlp->m_htgs_pooled && dlp->m_has_clone) { + result = StringSave (", pooled multiple clones"); + return result; + } + str = dlp->m_clone; if (StringHasNoText (str)) return NULL; @@ -5606,9 +5945,7 @@ static CharPtr x_DescribeClones ( ch = *str; } - if (dlp->m_htgs_unfinished && dlp->m_htgs_pooled) { - result = StringSave (", pooled multiple clones"); - } else if (count > 3) { + if (count > 3) { sprintf (buf, ", %d clones", (int) count); result = StringSave (buf); } else { @@ -6817,7 +7154,8 @@ static CharPtr x_TitleFromWGS ( } static CharPtr x_SetPrefix ( - DefLinePtr dlp + DefLinePtr dlp, + CharPtr title ) { @@ -6825,7 +7163,11 @@ static CharPtr x_SetPrefix ( if (dlp == NULL) return NULL; - if (dlp->m_is_tsa) { + if (dlp->m_is_unverified) { + if (StringStr (title, "UNVERIFIED") == NULL) { + prefix = "UNVERIFIED: "; + } + } else if (dlp->m_is_tsa) { prefix = "TSA: "; } else if (dlp->m_third_party) { if (dlp->m_tpa_exp) { @@ -7062,6 +7404,8 @@ NLM_EXTERN CharPtr NewCreateDefLine ( x_TrimFirstNCharacters (title, 10); } else if (StringNICmp (title, "TSA:", 4) == 0) { x_TrimFirstNCharacters (title, 4); + } else if (StringNICmp (title, "UNVERIFIED:", 11) == 0) { + x_TrimFirstNCharacters (title, 11); } /* strip leading spaces remaining after removal of old TPA or TSA prefixes */ @@ -7071,7 +7415,7 @@ NLM_EXTERN CharPtr NewCreateDefLine ( x_TrimMostPunctFromEnd (title); /* calcualte prefix */ - prefix = x_SetPrefix (dlp); + prefix = x_SetPrefix (dlp, title); /* calculate suffix */ suffix = x_SetSuffix (dlp, title); @@ -7093,6 +7437,8 @@ NLM_EXTERN CharPtr NewCreateDefLine ( dlp = MemFree (dlp); + Asn2gnbkCompressSpaces (result); + return result; } diff --git a/api/tofasta.h b/api/tofasta.h index e3e43108..af2e15e5 100644 --- a/api/tofasta.h +++ b/api/tofasta.h @@ -29,7 +29,7 @@ * * Version Creation Date: 7/12/91 * -* $Revision: 6.39 $ +* $Revision: 6.41 $ * * File Description: various sequence objects to fasta output * @@ -148,8 +148,10 @@ NLM_EXTERN Boolean BioseqToFastaX PROTO((BioseqPtr bsp, MyFsaPtr mfp, Boolean is * BioseqFastaStream (bsp, fp, flags, linelen, blocklen, grouplen, do_defline) * BioseqFastaMemStream (bsp, bs, flags, linelen, blocklen, grouplen, do_defline) * SeqLocFastaStream (slp, fp, flags, linelen, blocklen, grouplen) +* SeqLitFastaStream (lit, fp, flags, linelen, blocklen, grouplen) * CdRegionFastaStream (sfp, fp, flags, linelen, blocklen, grouplen) * TranslationFastaStream (sfp, fp, flags, linelen, blocklen, grouplen) +* GeneFastaStream (sfp, fp, flags, linelen, blocklen, grouplen) * SeqEntryFastaStream (sep, fp, flags, linelen, blocklen, grouplen, * do_na, do_aa, master_style) * @@ -197,6 +199,15 @@ NLM_EXTERN Int4 SeqLocFastaStream ( Int2 grouplen ); +NLM_EXTERN Int4 SeqLitFastaStream ( + SeqLitPtr lit, + FILE *fp, + StreamFlgType flags, + Int2 linelen, + Int2 blocklen, + Int2 grouplen +); + NLM_EXTERN Int4 CdRegionFastaStream ( SeqFeatPtr sfp, FILE *fp, @@ -219,6 +230,17 @@ NLM_EXTERN Int4 TranslationFastaStream ( CharPtr idSuffix ); +NLM_EXTERN Int4 GeneFastaStream ( + SeqFeatPtr sfp, + FILE *fp, + StreamFlgType flags, + Int2 linelen, + Int2 blocklen, + Int2 grouplen, + Boolean do_defline, + CharPtr idSuffix +); + NLM_EXTERN Int4 SeqEntryFastaStream ( SeqEntryPtr sep, FILE *fp, diff --git a/api/utilpub.c b/api/utilpub.c index 3d3dd5ad..933ab96a 100644 --- a/api/utilpub.c +++ b/api/utilpub.c @@ -259,7 +259,8 @@ Uint2 entityID, Uint4 itemID, Uint2 itemtype) if (pdp) { descr = AsnIoMemCopy(pdp, (AsnReadFunc) PubdescAsnRead, (AsnWriteFunc) PubdescAsnWrite); - vnp = ValNodeNew(NULL); + if (descr == NULL) return NULL; + vnp = ValNodeNew(NULL); vnp->choice = PUB_Equiv; vnp->data.ptrvalue = descr->pub; psp = (PubStructPtr) MemNew(sizeof(PubStruct)); @@ -1595,6 +1596,12 @@ NLM_EXTERN void EntryStripSerialNumber (SeqEntryPtr sep) } } +NLM_EXTERN void ForceStripSerialNumber (SeqEntryPtr sep) +{ + if (sep == NULL) return; + SeqEntryExplore(sep, NULL, StripSerialNumber); +} + NLM_EXTERN ValNodePtr remove_node(ValNodePtr head, ValNodePtr x) { ValNodePtr v, p; diff --git a/api/utilpub.h b/api/utilpub.h index a712195d..4ea30fbf 100644 --- a/api/utilpub.h +++ b/api/utilpub.h @@ -64,6 +64,8 @@ NLM_EXTERN Boolean empty_citgen PROTO((CitGenPtr cit)); NLM_EXTERN void EntryStripSerialNumber PROTO((SeqEntryPtr sep)); +NLM_EXTERN void ForceStripSerialNumber PROTO((SeqEntryPtr sep)); + NLM_EXTERN void VnpHeapSort PROTO ((ValNodePtr PNTR vnp, int (LIBCALLBACK *compar )PROTO ((Nlm_VoidPtr, Nlm_VoidPtr )))); NLM_EXTERN SeqFeatPtr remove_feat PROTO((SeqFeatPtr head, SeqFeatPtr x)); diff --git a/api/valapi.c b/api/valapi.c index e7f589aa..8563697c 100755 --- a/api/valapi.c +++ b/api/valapi.c @@ -29,7 +29,7 @@ * * Version Creation Date: 4/7/2009 * -* $Revision: 1.9 $ +* $Revision: 1.10 $ * * File Description: * @@ -136,7 +136,18 @@ static CharPtr commentRulesStr = "Comment-set ::= {\n" \ " { \n" \ " field-name \"Current Finishing Status\" ,\n" \ " match-expression \"^\\(Standard Draft\\|High Quality Draft\\|Improved High Quality Draft\\|Annotation Directed\\|Non-contiguous Finished\\|Finished\\)$\" } } }\n" \ -"} } }\n"; +"} } , \n" \ +" { \n" \ +" prefix \"##Assembly-Data-START##\" , \n" \ +" fields { \n" \ +" { \n" \ +" field-name \"Assembly Method\" , \n" \ +" match-expression \".+ v\\. .+\" , \n" \ +" required TRUE } , \n" \ +" { \n" \ +" field-name \"Sequencing Technology\" , \n" \ +" required TRUE } } } \n" \ +"}\n"; #endif diff --git a/api/valid.c b/api/valid.c index c526bbf1..d3fdba59 100644 --- a/api/valid.c +++ b/api/valid.c @@ -29,7 +29,7 @@ * * Version Creation Date: 1/1/94 * -* $Revision: 6.1388 $ +* $Revision: 6.1533 $ * * File Description: Sequence editing utilities * @@ -70,6 +70,10 @@ static char *this_file = __FILE__; #include #include #include +#include "ecnum_specific.inc" +#include "ecnum_ambiguous.inc" +#include "ecnum_deleted.inc" +#include "ecnum_replaced.inc" /***************************************************************************** * @@ -121,6 +125,24 @@ static Boolean ECnumberWasDeleted (CharPtr str); static Boolean ECnumberWasReplaced (CharPtr str); static void ValidateCitSub (ValidStructPtr vsp, CitSubPtr csp); +static Boolean HasFeatId(SeqFeatPtr sfp, Int4 num) +{ + Boolean rval = FALSE; + ObjectIdPtr oip; + + if (sfp == NULL) { + return FALSE; + } + if (sfp->id.choice == 3) { + oip = (ObjectIdPtr) sfp->id.value.ptrvalue; + if (oip->id == num) { + rval = TRUE; + } + } + return rval; +} + + /* alignment validator */ NLM_EXTERN Boolean ValidateSeqAlignWithinValidator (ValidStructPtr vsp, SeqEntryPtr sep, Boolean find_remote_bsp, Boolean do_hist_assembly); @@ -161,6 +183,7 @@ NLM_EXTERN void ValidStructClear (ValidStructPtr vsp) Boolean strictLatLonCountry; Boolean rubiscoTest; Boolean indexerVersion; + Boolean disableSuppression; Int2 validationLimit; ValidErrorFunc errfunc; Pointer userdata; @@ -174,6 +197,8 @@ NLM_EXTERN void ValidStructClear (ValidStructPtr vsp) Boolean is_gpipe_in_sep; Boolean is_gps_in_sep; Boolean is_embl_ddbj_in_sep; + Boolean is_old_gb_in_sep; + Boolean is_patent_in_sep; Boolean other_sets_in_sep; Boolean is_insd_in_sep; Boolean only_lcl_gnl_in_sep; @@ -182,6 +207,8 @@ NLM_EXTERN void ValidStructClear (ValidStructPtr vsp) Boolean is_smupd_in_sep; Boolean feat_loc_has_gi; Boolean feat_prod_has_gi; + Boolean has_multi_int_genes; + Boolean has_seg_bioseqs; Boolean far_fetch_failure; if (vsp == NULL) @@ -214,6 +241,7 @@ NLM_EXTERN void ValidStructClear (ValidStructPtr vsp) strictLatLonCountry = vsp->strictLatLonCountry; rubiscoTest = vsp->rubiscoTest; indexerVersion = vsp->indexerVersion; + disableSuppression = vsp->disableSuppression; validationLimit = vsp->validationLimit; errfunc = vsp->errfunc; userdata = vsp->userdata; @@ -228,6 +256,8 @@ NLM_EXTERN void ValidStructClear (ValidStructPtr vsp) is_gps_in_sep = vsp->is_gps_in_sep; other_sets_in_sep = vsp->other_sets_in_sep; is_embl_ddbj_in_sep = vsp->is_embl_ddbj_in_sep; + is_old_gb_in_sep = vsp->is_old_gb_in_sep; + is_patent_in_sep = vsp->is_patent_in_sep; is_insd_in_sep = vsp->is_insd_in_sep; only_lcl_gnl_in_sep = vsp->only_lcl_gnl_in_sep; has_gnl_prot_sep = vsp->has_gnl_prot_sep; @@ -235,6 +265,8 @@ NLM_EXTERN void ValidStructClear (ValidStructPtr vsp) is_smupd_in_sep = vsp->is_smupd_in_sep; feat_loc_has_gi = vsp->feat_loc_has_gi; feat_prod_has_gi = vsp->feat_prod_has_gi; + has_multi_int_genes = vsp->has_multi_int_genes; + has_seg_bioseqs = vsp->has_seg_bioseqs; far_fetch_failure = vsp->far_fetch_failure; MemSet ((VoidPtr) vsp, 0, sizeof (ValidStruct)); vsp->errbuf = errbuf; @@ -264,6 +296,7 @@ NLM_EXTERN void ValidStructClear (ValidStructPtr vsp) vsp->strictLatLonCountry = strictLatLonCountry; vsp->rubiscoTest = rubiscoTest; vsp->indexerVersion = indexerVersion; + vsp->disableSuppression = disableSuppression; vsp->validationLimit = validationLimit; vsp->errfunc = errfunc; vsp->userdata = userdata; @@ -278,6 +311,8 @@ NLM_EXTERN void ValidStructClear (ValidStructPtr vsp) vsp->is_gps_in_sep = is_gps_in_sep; vsp->other_sets_in_sep = other_sets_in_sep; vsp->is_embl_ddbj_in_sep = is_embl_ddbj_in_sep; + vsp->is_old_gb_in_sep = is_old_gb_in_sep; + vsp->is_patent_in_sep = is_patent_in_sep; vsp->is_insd_in_sep = is_insd_in_sep; vsp->only_lcl_gnl_in_sep = only_lcl_gnl_in_sep; vsp->has_gnl_prot_sep = has_gnl_prot_sep; @@ -285,6 +320,8 @@ NLM_EXTERN void ValidStructClear (ValidStructPtr vsp) vsp->is_smupd_in_sep = is_smupd_in_sep; vsp->feat_loc_has_gi = feat_loc_has_gi; vsp->feat_prod_has_gi = feat_prod_has_gi; + vsp->has_multi_int_genes = has_multi_int_genes; + vsp->has_seg_bioseqs = has_seg_bioseqs; vsp->far_fetch_failure = far_fetch_failure; return; } @@ -571,7 +608,8 @@ static CharPtr err1Label [] = { "SeqLitDataLength0", "DSmRNA", "HighNContentStretch", - "HighNContentPercent" + "HighNContentPercent", + "BadSegmentedSeq" }; static CharPtr err2Label [] = { @@ -651,7 +689,12 @@ static CharPtr err2Label [] = { "BadStrucCommMultipleFields", "BioSourceNeedsChromosome", "MolInfoConflictsWithBioSource", - "MissingKeyword" + "MissingKeyword", + "FakeStructuredComment", + "StructuredCommentPrefixOrSuffixMissing", + "LatLonWater", + "LatLonOffshore", + "MissingPersonalCollectionName" }; static CharPtr err3Label [] = { @@ -701,7 +744,9 @@ static CharPtr err4Label [] = { "MissingSetTitle", "NucProtSetHasTitle", "ComponentMissingTitle", - "SingleItemSet" + "SingleItemSet", + "MisplacedMolInfo", + "ImproperlyNestedSets" }; static CharPtr err5Label [] = { @@ -881,7 +926,11 @@ static CharPtr err5Label [] = { "ShortIntron", "GeneXrefStrandProblem", "CDSmRNAXrefLocationProblem", - "LocusCollidesWithLocusTag" + "LocusCollidesWithLocusTag", + "IdenticalGeneSymbolAndSynonym", + "NeedsNote", + "RptUnitRangeProblem", + "TooManyInferenceAccessions" }; static CharPtr err6Label [] = { @@ -1142,6 +1191,13 @@ static void CustValErr (ValidStructPtr vsp, ErrSev severity, int errcode, int su } else if (vsp->descr != NULL) { label = tmp; diff = SeqDescLabel (vsp->descr, tmp, wrklen, OM_LABEL_BOTH); + + if (diff > 100 && vsp->descr->choice == Seq_descr_comment && errcode == 2 && subcode == 77) { + diff = 100; + *(tmp + diff - 3) = '.'; + *(tmp + diff - 2) = '.'; + *(tmp + diff - 1) = '.'; + } buflen -= diff; tmp += diff; *tmp = '\0'; @@ -1296,6 +1352,229 @@ static void CustValErr (ValidStructPtr vsp, ErrSev severity, int errcode, int su featureID, message, objtype, label, context, location, product, vsp->userdata); } + +/* framework for suppressing validator errors using a list-based strategy */ +typedef Boolean (*ValidErrSuppressFunc) PROTO ((ValidStructPtr)); + +static Boolean IsGenomicPipeline (ValidStructPtr vsp) +{ + if (vsp == NULL) { + return FALSE; + } else if (vsp->bsp_genomic_in_sep && vsp->is_gpipe_in_sep) { + return TRUE; + } else { + return FALSE; + } +} + + +static Boolean IsUnclassifiedExcept (ValidStructPtr vsp) +{ + Boolean rval = FALSE; + if (vsp == NULL || vsp->sfp == NULL) { + return FALSE; + } + if (vsp->sfp->excpt && (! vsp->ignoreExceptions)) { + if (vsp->sfp->data.choice == SEQFEAT_CDREGION) { + if (StringStr (vsp->sfp->except_text, "unclassified translation discrepancy") != NULL) { + rval = TRUE; + } + } else if (vsp->sfp->idx.subtype == FEATDEF_mRNA) { + if (StringStr (vsp->sfp->except_text, "unclassified transcription discrepancy") != NULL) { + rval = TRUE; + } + } + } + return rval; +} + + +static Boolean IsNotUnclassifiedExcept (ValidStructPtr vsp) +{ + return !IsUnclassifiedExcept(vsp); +} + + +static Boolean IsUnclassifedExceptAndGenomicPipeline (ValidStructPtr vsp) +{ + if (IsGenomicPipeline(vsp) && IsUnclassifiedExcept(vsp)) { + return TRUE; + } else { + return FALSE; + } +} + + +static Boolean NonconsensusExcept (ValidStructPtr vsp) +{ + Boolean rval = FALSE; + if (vsp == NULL || vsp->sfp == NULL) { + return FALSE; + } + if (vsp->sfp->excpt && (! vsp->ignoreExceptions)) { + if (StringISearch (vsp->sfp->except_text, "nonconsensus splice site") != NULL || + StringISearch (vsp->sfp->except_text, "heterogeneous population sequenced") != NULL || + StringISearch (vsp->sfp->except_text, "low-quality sequence region") != NULL || + StringISearch (vsp->sfp->except_text, "artificial location") != NULL) { + rval = TRUE; + } + } + return rval; +} + + +typedef struct validerrsuppression { + int code1; + int code2; + CharPtr search_phrase; + CharPtr exclude_phrase; + ValidErrSuppressFunc func; +} ValidErrSuppressionData, PNTR ValidErrSuppressionPtr; + +static ValidErrSuppressionData valid_suppress[] = { + {ERR_SEQ_FEAT_PartialProblem, "When SeqFeat.product is a partial Bioseq, SeqFeat.location should also be partial", NULL, IsGenomicPipeline }, + {ERR_SEQ_FEAT_PartialProblem, "End of location should probably be partial", NULL, IsGenomicPipeline}, + {ERR_SEQ_FEAT_PartialProblem, "This SeqFeat should not be partial", NULL, IsGenomicPipeline}, + {ERR_SEQ_FEAT_PartialProblem, "AND is not at consensus splice site", NULL, IsGenomicPipeline}, + {ERR_SEQ_FEAT_PartialProblem, "PartialLocation: Internal partial intervals do not include first/last residue of sequence", NULL, IsGenomicPipeline}, + {ERR_SEQ_FEAT_PartialProblem, "AND is not at consensus splice site", NULL, NonconsensusExcept}, + {ERR_SEQ_FEAT_PartialProblem, "(but is at consensus splice site)", NULL, IsGenomicPipeline}, + {ERR_SEQ_FEAT_PartialProblem, "PartialLocation: Start does not include first/last residue of sequence", NULL, IsGenomicPipeline}, + {ERR_SEQ_FEAT_PartialProblem, "PartialLocation: Stop does not include first/last residue of sequence", NULL, IsGenomicPipeline}, + {ERR_SEQ_FEAT_PartialsInconsistent, NULL, NULL, IsGenomicPipeline }, + {ERR_SEQ_FEAT_PolyATail, NULL, NULL, IsGenomicPipeline }, + {ERR_SEQ_FEAT_InternalStop, NULL, NULL, IsUnclassifedExceptAndGenomicPipeline}, + {ERR_SEQ_FEAT_StartCodon , NULL, NULL, IsUnclassifiedExcept} + +}; + +const Int4 kNumSuppressionRules = sizeof (valid_suppress) / sizeof (ValidErrSuppressionData); + +static Boolean ShouldSuppressValidErr (ValidStructPtr vsp, int code1, int code2, const char *fmt) +{ + Int4 i; + Boolean rval = FALSE; + + if (vsp->disableSuppression) return FALSE; + + for (i = 0; i < kNumSuppressionRules && !rval; i++) { + if (code1 == valid_suppress[i].code1 && code2 == valid_suppress[i].code2 + && (valid_suppress[i].search_phrase == NULL || StringISearch (fmt, valid_suppress[i].search_phrase) != NULL) + && (valid_suppress[i].func == NULL || valid_suppress[i].func(vsp)) + && (valid_suppress[i].exclude_phrase == NULL || StringISearch (fmt, valid_suppress[i].exclude_phrase) == NULL)) { + rval = TRUE; + } + } + + return rval; +} + + +/* framework for changing validator warnings using a list-based strategy */ +typedef int (*ValidErrSevChangeFunc) PROTO ((int, ValidStructPtr)); + +typedef struct validerrsevchange { + int code1; + int code2; + CharPtr search_phrase; + CharPtr exclude_phrase; + ValidErrSevChangeFunc func; +} ValidErrSevChangeData, PNTR ValidErrSevChangePtr; + + +static int LowerToInfoForGenomic (int severity, ValidStructPtr vsp) +{ + if (IsGenomicPipeline(vsp)) { + return SEV_INFO; + } else { + return severity; + } +} + + +static int WarnForGPSOrRefSeq (int severity, ValidStructPtr vsp) +{ + Boolean gpsOrRefSeq = FALSE; + SeqEntryPtr sep; + SeqFeatPtr sfp; + BioseqSetPtr bssp; + SeqLocPtr head, slp = NULL, nxt; + SeqIdPtr sip, id; + BioseqPtr bsp; + TextSeqIdPtr tsip; + + sep = vsp->sep; + if (sep != NULL && IS_Bioseq_set (sep)) { + bssp = (BioseqSetPtr) sep->data.ptrvalue; + if (bssp != NULL && bssp->_class == BioseqseqSet_class_gen_prod_set) { + gpsOrRefSeq = TRUE; + } + } + + if (!gpsOrRefSeq) { + sfp = vsp->sfp; + head = sfp->location; + slp = SeqLocFindPart (head, slp, EQUIV_IS_ONE); + while (slp != NULL && !gpsOrRefSeq) { + sip = SeqLocId (slp); + if (sip == NULL) + break; + nxt = SeqLocFindPart (head, slp, EQUIV_IS_ONE); + + /* genomic product set or NT_ contig always relaxes to SEV_WARNING */ + bsp = BioseqFind (sip); + if (bsp != NULL) { + for (id = bsp->id; id != NULL; id = id->next) { + if (id->choice == SEQID_OTHER) { + tsip = (TextSeqIdPtr) id->data.ptrvalue; + if (tsip != NULL && tsip->accession != NULL) { + gpsOrRefSeq = TRUE; + } + } + } + } + + slp = nxt; + } + } + if (gpsOrRefSeq) { + if (severity > SEV_WARNING) { + severity = SEV_WARNING; + } + } + return severity; +} + + +static ValidErrSevChangeData valid_sevchange[] = { + {ERR_SEQ_FEAT_NotSpliceConsensusDonor, "Splice donor consensus (GT) not found at start of intron, position", NULL, LowerToInfoForGenomic}, + {ERR_SEQ_FEAT_NotSpliceConsensusAcceptor, "Splice acceptor consensus (AG) not found at end of intron, position", NULL, LowerToInfoForGenomic}, + {ERR_SEQ_FEAT_NotSpliceConsensusDonor, "Splice donor consensus (GT) not found after exon", NULL, LowerToInfoForGenomic}, + {ERR_SEQ_FEAT_NotSpliceConsensusDonor, "Splice donor consensus (GT) not found after exon", NULL, WarnForGPSOrRefSeq}, + {ERR_SEQ_FEAT_NotSpliceConsensusAcceptor, "Splice acceptor consensus (AG) not found before exon", NULL, LowerToInfoForGenomic}, + {ERR_SEQ_FEAT_NotSpliceConsensusAcceptor, "Splice acceptor consensus (AG) not found before exon", NULL, WarnForGPSOrRefSeq}, +}; + +const Int4 kNumSevChangeRules = sizeof (valid_sevchange) / sizeof (ValidErrSevChangeData); + +static int AdjustSeverity (int severity, ValidStructPtr vsp, int code1, int code2, const char *fmt) +{ + Int4 i; + int rval = severity; + + for (i = 0; i < kNumSevChangeRules; i++) { + if (code1 == valid_sevchange[i].code1 && code2 == valid_sevchange[i].code2 + && (valid_sevchange[i].search_phrase == NULL || StringISearch (fmt, valid_sevchange[i].search_phrase) != NULL) + && (valid_sevchange[i].exclude_phrase == NULL || StringISearch (fmt, valid_sevchange[i].exclude_phrase) == NULL) + && valid_sevchange[i].func != NULL) { + rval = (valid_sevchange[i].func)(rval, vsp); + } + } + + return rval; +} + + #ifdef VAR_ARGS NLM_EXTERN void CDECL ValidErr (vsp, severity, code1, code2, fmt, va_alist) ValidStructPtr vsp; @@ -1322,9 +1601,11 @@ NLM_EXTERN void CDECL ValidErr (ValidStructPtr vsp, int severity, int code1, int SeqFeatPtr sfp; SeqIdPtr sip; - if (vsp == NULL || severity < vsp->cutoff) + if (vsp == NULL || severity < vsp->cutoff || ShouldSuppressValidErr(vsp, code1, code2, fmt)) return; + severity = AdjustSeverity(severity, vsp, code1, code2, fmt); + if (vsp->errbuf == NULL) { vsp->errbuf = MemNew (8192); if (vsp->errbuf == NULL) @@ -1491,9 +1772,27 @@ NLM_EXTERN void CDECL ValidErr (ValidStructPtr vsp, int severity, int code1, int buflen -= diff; tmp += diff; - diff = SeqDescLabel (vsp->descr, tmp, buflen, OM_LABEL_BOTH); - buflen -= diff; - tmp += diff; + if (vsp->descr->choice == Seq_descr_comment) { + diff = SeqDescLabel (vsp->descr, tmp, buflen, OM_LABEL_BOTH); + if (diff > 100) { + /* truncate long comment in message */ + tmp [94] = ' '; + tmp [95] = '.'; + tmp [96] = '.'; + tmp [97] = '.'; + tmp [98] = '\0'; + diff = 98; + buflen -= diff; + tmp += diff; + } else { + buflen -= diff; + tmp += diff; + } + } else { + diff = SeqDescLabel (vsp->descr, tmp, buflen, OM_LABEL_BOTH); + buflen -= diff; + tmp += diff; + } } /* @@ -1653,6 +1952,18 @@ static void StructuredCommentError (EFieldValid err_code, FieldRulePtr field_rul } +static Boolean StringLooksLikeFakeStructuredComment (CharPtr str) +{ + if (StringHasNoText (str)) { + return FALSE; + } + if (StringSearch (str, "::") != NULL) { + return TRUE; + } + return FALSE; +} + + /***************************************************************************** * * Valid1GatherProc(gcp) @@ -1871,6 +2182,10 @@ static Boolean Valid1GatherProc (GatherContextPtr gcp) ValidErr (vsp, SEV_INFO, ERR_SEQ_DESCR_SerialInComment, "Comment may refer to reference by serial number - attach reference specific comments to the reference REMARK instead."); } + if (StringLooksLikeFakeStructuredComment (str)) { + ValidErr (vsp, SEV_INFO, ERR_SEQ_DESCR_FakeStructuredComment, + "Comment may be formatted to look like a structured comment."); + } for (vnp2 = sdp->next; vnp2 != NULL; vnp2 = vnp2->next) { if (vnp2->choice == Seq_descr_comment) { ptr = (CharPtr) vnp2->data.ptrvalue; @@ -2595,13 +2910,21 @@ static void CheckForCollidingSerials ( static void ValidateFeatCits (SeqEntryPtr sep, ValidStructPtr vsp) { + SeqEntryPtr bsep; + BioseqPtr bsp = NULL; GatherContext gc; VfcData vfd; if (vsp == NULL || sep == NULL) return; + + bsep = FindNthBioseq (sep, 1); + if (bsep != NULL && IS_Bioseq (bsep)) { + bsp = (BioseqPtr) bsep->data.ptrvalue; + } + vsp->gcp = &gc; vsp->bssp = NULL; - vsp->bsp = NULL; + vsp->bsp = bsp; vsp->sfp = NULL; vsp->descr = NULL; MemSet ((Pointer) &gc, 0, sizeof (GatherContext)); @@ -2613,7 +2936,7 @@ static void ValidateFeatCits (SeqEntryPtr sep, ValidStructPtr vsp) VisitFeaturesInSep (sep, (Pointer) &vfd, CheckFeatCits); vsp->bssp = NULL; - vsp->bsp = NULL; + vsp->bsp = bsp; vsp->sfp = NULL; vsp->descr = NULL; vfd.serial = ValNodeSort (vfd.serial, SortByIntvalue); @@ -2625,10 +2948,12 @@ static void ValidateFeatCits (SeqEntryPtr sep, ValidStructPtr vsp) ValNodeFree (vfd.serial); } -static void ValidateFeatIDs (Uint2 entityID, ValidStructPtr vsp) +static void ValidateFeatIDs (SeqEntryPtr sep, Uint2 entityID, ValidStructPtr vsp) { SMFidItemPtr PNTR array; + SeqEntryPtr bsep; + BioseqPtr bsp = NULL; BioseqExtraPtr bspextra; SMFeatItemPtr feat; GatherContext gc; @@ -2640,7 +2965,7 @@ static void ValidateFeatIDs (Uint2 entityID, ValidStructPtr vsp) ObjMgrDataPtr omdp; SeqFeatPtr sfp; - if (entityID < 1 || vsp == NULL) return; + if (sep == NULL || entityID < 1 || vsp == NULL) return; omdp = ObjMgrGetData (entityID); if (omdp == NULL) return; bspextra = (BioseqExtraPtr) omdp->extradata; @@ -2649,9 +2974,14 @@ static void ValidateFeatIDs (Uint2 entityID, ValidStructPtr vsp) num = bspextra->numfids; if (array == NULL || num < 1) return; + bsep = FindNthBioseq (sep, 1); + if (bsep != NULL && IS_Bioseq (bsep)) { + bsp = (BioseqPtr) bsep->data.ptrvalue; + } + vsp->gcp = &gc; vsp->bssp = NULL; - vsp->bsp = NULL; + vsp->bsp = bsp; vsp->sfp = NULL; vsp->descr = NULL; MemSet ((Pointer) &gc, 0, sizeof (GatherContext)); @@ -2684,21 +3014,6 @@ typedef struct vsicdata { ValNodePtr tailid; } VsicData, PNTR VsicDataPtr; -static Boolean IsNCBIFileID (SeqIdPtr sip) -{ - DbtagPtr dbt; - - if (sip == NULL || sip->choice != SEQID_GENERAL) return FALSE; - dbt = (DbtagPtr) sip->data.ptrvalue; - if (dbt == NULL) return FALSE; - if (StringCmp (dbt->db, "NCBIFILE") == 0) { - return TRUE; - } else { - return FALSE; - } -} - - static void CaptureTextSeqIDs (BioseqPtr bsp, Pointer userdata) { @@ -2755,6 +3070,8 @@ static ValNodePtr UniqueValNodeCaseSensitive (ValNodePtr list) static void ValidateSeqIdCase (SeqEntryPtr sep, ValidStructPtr vsp) { + SeqEntryPtr bsep; + BioseqPtr bsp = NULL; CharPtr curr; GatherContext gc; GatherContextPtr gcp; @@ -2764,13 +3081,18 @@ static void ValidateSeqIdCase (SeqEntryPtr sep, ValidStructPtr vsp) if (vsp == NULL || sep == NULL) return; + bsep = FindNthBioseq (sep, 1); + if (bsep != NULL && IS_Bioseq (bsep)) { + bsp = (BioseqPtr) bsep->data.ptrvalue; + } + MemSet ((Pointer) &gc, 0, sizeof (GatherContext)); MemSet ((Pointer) &vd, 0, sizeof (VsicData)); gcp = &gc; vsp->gcp = &gc; vsp->bssp = NULL; - vsp->bsp = NULL; + vsp->bsp = bsp; vsp->sfp = NULL; vsp->descr = NULL; vd.vsp = vsp; @@ -2806,6 +3128,7 @@ static void LookForBioseqFields (BioseqPtr bsp, Pointer userdata) Boolean has_lcl_gnl = FALSE; Boolean has_others = FALSE; SeqIdPtr sip; + TextSeqIdPtr tsip; ValidStructPtr vsp; if (bsp == NULL || userdata == NULL) return; @@ -2819,10 +3142,21 @@ static void LookForBioseqFields (BioseqPtr bsp, Pointer userdata) /* and fall through */ case SEQID_GENBANK: case SEQID_TPG: + vsp->is_insd_in_sep = TRUE; + tsip = (TextSeqIdPtr) sip->data.ptrvalue; + if (tsip != NULL) { + if (StringLen (tsip->accession) == 6) { + vsp->is_old_gb_in_sep = TRUE; + } + } + break; case SEQID_TPE: case SEQID_TPD: vsp->is_insd_in_sep = TRUE; break; + case SEQID_PATENT: + vsp->is_patent_in_sep = TRUE; + break; case SEQID_OTHER: vsp->is_refseq_in_sep = TRUE; break; @@ -2870,6 +3204,7 @@ static void LookForBioseqSetFields (BioseqSetPtr bssp, Pointer userdata) case BioseqseqSet_class_phy_set: case BioseqseqSet_class_eco_set: case BioseqseqSet_class_wgs_set: + case BioseqseqSet_class_small_genome_set: break; vsp->other_sets_in_sep = TRUE; default: @@ -2883,7 +3218,8 @@ static void LookForBioseqSetFields (BioseqSetPtr bssp, Pointer userdata) bssp->_class == BioseqseqSet_class_pop_set || bssp->_class == BioseqseqSet_class_phy_set || bssp->_class == BioseqseqSet_class_eco_set || - bssp->_class == BioseqseqSet_class_wgs_set) { + bssp->_class == BioseqseqSet_class_wgs_set || + bssp->_class == BioseqseqSet_class_small_genome_set) { vsp->other_sets_in_sep = TRUE; } } @@ -2943,6 +3279,46 @@ static void LookForSeqDescrFields (SeqDescrPtr sdp, Pointer userdata) } } +static void FindMultiIntervalGenes ( + SeqFeatPtr sfp, + Pointer userdata +) + +{ + BoolPtr multiIntervalGenesP; + SeqLocPtr slp; + + if (sfp == NULL || sfp->data.choice != SEQFEAT_GENE) return; + multiIntervalGenesP = (BoolPtr) userdata; + if (multiIntervalGenesP == NULL) return; + + slp = sfp->location; + if (slp == NULL) return; + switch (slp->choice) { + case SEQLOC_PACKED_INT : + case SEQLOC_PACKED_PNT : + case SEQLOC_MIX : + case SEQLOC_EQUIV : + *multiIntervalGenesP = TRUE; + break; + default : + break; + } +} + +static void FindSegmentedBioseqs ( + BioseqPtr bsp, + Pointer userdata +) + +{ + BoolPtr segmentedBioseqsP; + + if (bsp == NULL || bsp->repr != Seq_repr_seg) return; + segmentedBioseqsP = (BoolPtr) userdata; + if (segmentedBioseqsP == NULL) return; + *segmentedBioseqsP = TRUE; +} static void SetPubScratchData (SeqDescrPtr sdp, Pointer userdata) { @@ -3231,6 +3607,7 @@ static void TestDeletedOrReplacedECnumbers (ValidStructPtr vsp) CharPtr ptr; ErrSev sev; CharPtr str; + CharPtr tmp; /* only check first time program runs validator */ @@ -3260,14 +3637,22 @@ static void TestDeletedOrReplacedECnumbers (ValidStructPtr vsp) if (! ECnumberNotInList (str)) { ValidErr (vsp, SEV_WARNING, ERR_SEQ_FEAT_EcNumberProblem, "Replaced EC number %s still in live list", str); } - if (ECnumberNotInList (ptr)) { - ValidErr (vsp, SEV_WARNING, ERR_SEQ_FEAT_EcNumberProblem, "Replacement EC number %s not in live list", ptr); - } if (ECnumberWasDeleted (str)) { ValidErr (vsp, SEV_WARNING, ERR_SEQ_FEAT_EcNumberProblem, "Replaced EC number %s in deleted list", str); } - if (ECnumberWasDeleted (ptr)) { - ValidErr (vsp, SEV_WARNING, ERR_SEQ_FEAT_EcNumberProblem, "Replacement EC number %s in deleted list", ptr); + while (StringDoesHaveText (ptr)) { + tmp = StringChr (ptr, '\t'); + if (tmp != NULL) { + *tmp = '\0'; + tmp++; + } + if (ECnumberNotInList (ptr)) { + ValidErr (vsp, SEV_WARNING, ERR_SEQ_FEAT_EcNumberProblem, "Replacement EC number %s not in live list", ptr); + } + if (ECnumberWasDeleted (ptr)) { + ValidErr (vsp, SEV_WARNING, ERR_SEQ_FEAT_EcNumberProblem, "Replacement EC number %s in deleted list", ptr); + } + ptr = tmp; } } } @@ -3334,21 +3719,6 @@ static CollisionInfoPtr CollisionInfoFree (CollisionInfoPtr cip) } -static Boolean IsNcbiFileId(SeqIdPtr sip) -{ - DbtagPtr dbtag; - - if (sip == NULL || sip->choice != SEQID_GENERAL || (dbtag = sip->data.ptrvalue) == NULL) { - return FALSE; - } - if (StringCmp (dbtag->db, "NCBIFILE") == 0) { - return TRUE; - } else { - return FALSE; - } -} - - static void LongCollisionCallback (BioseqPtr bsp, Pointer data) { SeqIdPtr sip; @@ -3358,7 +3728,7 @@ static void LongCollisionCallback (BioseqPtr bsp, Pointer data) } for (sip = bsp->id; sip != NULL; sip = sip->next) { - if (!IsNcbiFileId(sip)) { + if (!IsNCBIFileID(sip)) { ValNodeAddPointer ((ValNodePtr PNTR) data, 0, CollisionInfoNew (sip, bsp)); } } @@ -3469,33 +3839,193 @@ static Boolean ValTooManyFarComponents ( return toomanyfar; } -NLM_EXTERN Boolean ValidateSeqEntry (SeqEntryPtr sep, ValidStructPtr vsp) +static CharPtr inferencePrefix [] = { + "", + "similar to sequence", + "similar to AA sequence", + "similar to DNA sequence", + "similar to RNA sequence", + "similar to RNA sequence, mRNA", + "similar to RNA sequence, EST", + "similar to RNA sequence, other RNA", + "profile", + "nucleotide motif", + "protein motif", + "ab initio prediction", + "alignment", + NULL +}; + + +static CharPtr NextColonOrVerticalBarPtr (CharPtr ptr) { - AuthListPtr alp; - AuthorPtr ap; - DatePtr cd, dp; - ContactInfoPtr cip; - CitSubPtr csp; - Uint2 entityID = 0; - GatherScope gs; - BioseqSetPtr bssp; - SeqSubmitPtr ssp = NULL; - Boolean do_many = FALSE; - Boolean mult_subs = FALSE; - Boolean farFetchProd; - Boolean first = TRUE; - Int4 errors[6]; - Int2 i; - Boolean suppress_no_pubs = TRUE; - Boolean suppress_no_biosrc = TRUE; - FeatProb featprob; - GatherContextPtr gcp = NULL; - GatherContext gc; - SeqEntryPtr fsep; - BioseqPtr fbsp = NULL; - Int2 limit; - SeqEntryPtr oldsep; + Char ch = '\0'; + + if (ptr == NULL) return NULL; + + ch = *ptr; + while (ch != '\0') { + if (ch == ':' || ch == '|') return ptr; + ptr++; + ch = *ptr; + } + + return NULL; +} + +typedef struct valcountdata { + Int4 numInferences; + Int4 numAccessions; +} ValCountData, PNTR ValCountPtr; + +static void ValCountInfAccnVer (SeqFeatPtr sfp, Pointer userdata) + +{ + Int2 best, j; + Char ch; + GBQualPtr gbq; + size_t len; + CharPtr nxt; + CharPtr ptr; + CharPtr rest; + CharPtr str; + CharPtr tmp; + ValCountPtr vcp; + + + if (sfp == NULL || userdata == NULL) return; + vcp = (ValCountPtr) userdata; + + for (gbq = sfp->qual; gbq != NULL; gbq = gbq->next) { + if (StringICmp (gbq->qual, "inference") != 0) continue; + if (StringHasNoText (gbq->val)) continue; + + (vcp->numInferences)++; + + rest = NULL; + best = -1; + for (j = 0; inferencePrefix [j] != NULL; j++) { + len = StringLen (inferencePrefix [j]); + if (StringNICmp (gbq->val, inferencePrefix [j], len) != 0) continue; + rest = gbq->val + len; + best = j; + } + if (best < 0 || inferencePrefix [best] == NULL) continue; + if (rest == NULL) continue; + + ch = *rest; + while (IS_WHITESP (ch)) { + rest++; + ch = *rest; + } + if (StringNICmp (rest, "(same species)", 14) == 0) { + rest += 14; + } + ch = *rest; + while (IS_WHITESP (ch) || ch == ':') { + rest++; + ch = *rest; + } + if (StringHasNoText (rest)) continue; + + str = StringSave (rest); + + ptr = str; + if (best == 12) { + ptr = StringRChr (str, ':'); + if (ptr != NULL) { + *ptr = '\0'; + ptr++; + } + } + while (ptr != NULL) { + nxt = StringChr (ptr, ','); + if (nxt != NULL) { + *nxt = '\0'; + nxt++; + } + tmp = NextColonOrVerticalBarPtr (ptr); + if (tmp != NULL) { + *tmp = '\0'; + tmp++; + TrimSpacesAroundString (ptr); + TrimSpacesAroundString (tmp); + if (StringDoesHaveText (tmp)) { + if (StringICmp (ptr, "INSD") == 0 || StringICmp (ptr, "RefSeq") == 0) { + (vcp->numAccessions)++; + } + } + } + ptr = nxt; + } + + MemFree (str); + } +} + +NLM_EXTERN Boolean TooManyInferenceAccessions ( + SeqEntryPtr sep, + Int4Ptr numInferences, + Int4Ptr numAccessions +) + +{ + ValCountData vcd; + + if (numInferences != NULL) { + *numInferences = 0; + } + if (numAccessions != NULL) { + *numAccessions = 0; + } + if (sep == NULL) return FALSE; + + vcd.numInferences = 0; + vcd.numAccessions = 0; + + VisitFeaturesInSep (sep, (Pointer) &vcd, ValCountInfAccnVer); + + if (numInferences != NULL) { + *numInferences = vcd.numInferences; + } + if (numAccessions != NULL) { + *numAccessions = vcd.numAccessions; + } + + if (vcd.numInferences > 1000 || vcd.numAccessions > 1000) return TRUE; + + return FALSE; +} + +NLM_EXTERN Boolean ValidateSeqEntry (SeqEntryPtr sep, ValidStructPtr vsp) + +{ + AuthListPtr alp; + AuthorPtr ap; + DatePtr cd, dp; + ContactInfoPtr cip; + CitSubPtr csp; + Uint2 entityID = 0; + GatherScope gs; + BioseqSetPtr bssp; + SeqSubmitPtr ssp = NULL; + Boolean do_many = FALSE; + Boolean mult_subs = FALSE; + Boolean farFetchProd; + Boolean first = TRUE; + Int4 errors[6]; + Int2 i; + Boolean inferenceAccnCheck; + Boolean suppress_no_pubs = TRUE; + Boolean suppress_no_biosrc = TRUE; + FeatProb featprob; + GatherContextPtr gcp = NULL; + GatherContext gc; + SeqEntryPtr fsep; + BioseqPtr fbsp = NULL; + Int2 limit; + SeqEntryPtr oldsep; ErrSev oldsev; ObjMgrDataPtr omdp; SeqEntryPtr topsep = NULL; @@ -3504,10 +4034,14 @@ NLM_EXTERN Boolean ValidateSeqEntry (SeqEntryPtr sep, ValidStructPtr vsp) SubmitBlockPtr sbp; ErrSev sev; SeqIdPtr sip; + Boolean has_multi_int_genes = FALSE; + Boolean has_seg_bioseqs = FALSE; Boolean isGPS = FALSE; Boolean isPatent = FALSE; Boolean isPDB = FALSE; FindRepData frd; + Int4 numInferences; + Int4 numAccessions; if (sep == NULL || vsp == NULL) return FALSE; @@ -3595,6 +4129,7 @@ NLM_EXTERN Boolean ValidateSeqEntry (SeqEntryPtr sep, ValidStructPtr vsp) vsp->is_gps_in_sep = FALSE; vsp->other_sets_in_sep = FALSE; vsp->is_embl_ddbj_in_sep = FALSE; + vsp->is_old_gb_in_sep = FALSE; vsp->is_insd_in_sep = FALSE; vsp->only_lcl_gnl_in_sep = FALSE; vsp->has_gnl_prot_sep = FALSE; @@ -3605,6 +4140,11 @@ NLM_EXTERN Boolean ValidateSeqEntry (SeqEntryPtr sep, ValidStructPtr vsp) VisitSetsInSep (sep, (Pointer) vsp, LookForBioseqSetFields); VisitDescriptorsInSep (sep, (Pointer) vsp, LookForSeqDescrFields); + VisitFeaturesInSep (sep, (Pointer) &has_multi_int_genes, FindMultiIntervalGenes); + vsp->has_multi_int_genes = has_multi_int_genes; + VisitBioseqsInSep (sep, (Pointer) &has_seg_bioseqs, FindSegmentedBioseqs); + vsp->has_seg_bioseqs = has_seg_bioseqs; + /* vsp->is_htg_in_sep = FALSE; VisitDescriptorsInSep (sep, (Pointer) &(vsp->is_htg_in_sep), LookForHTG); @@ -3633,6 +4173,8 @@ NLM_EXTERN Boolean ValidateSeqEntry (SeqEntryPtr sep, ValidStructPtr vsp) globalvsp = vsp; /* for spell checker */ + inferenceAccnCheck = vsp->inferenceAccnCheck; + while (sep != NULL) { vsp->far_fetch_failure = FALSE; @@ -3798,8 +4340,24 @@ NLM_EXTERN Boolean ValidateSeqEntry (SeqEntryPtr sep, ValidStructPtr vsp) /* AssignIDsInEntity (gc.entityID, 0, NULL); */ + if (inferenceAccnCheck) { + numInferences = 0; + numAccessions = 0; + if (TooManyInferenceAccessions (sep, &numInferences, &numAccessions)) { + ValidErr (vsp, SEV_INFO, ERR_SEQ_FEAT_TooManyInferenceAccessions, + "Skipping validation of %ld /inference qualifiers with %ld accessions", + (long) numInferences, (long) numAccessions); + + /* suppress inference accession.version check for this record */ + vsp->inferenceAccnCheck = FALSE; + } + } + GatherSeqEntry (sep, (Pointer) vsp, Valid1GatherProc, &gs); + /* restore inferenceAccnCheck flag for next record */ + vsp->inferenceAccnCheck = inferenceAccnCheck; + if (ssp != NULL) { if (ssp->datatype == 1) { vsp->bsp = NULL; @@ -3844,7 +4402,7 @@ NLM_EXTERN Boolean ValidateSeqEntry (SeqEntryPtr sep, ValidStructPtr vsp) vsp->gcp = NULL; vsp->gcp = NULL; - ValidateFeatIDs (gc.entityID, vsp); + ValidateFeatIDs (sep, gc.entityID, vsp); vsp->gcp = NULL; vsp->gcp = NULL; @@ -3970,6 +4528,8 @@ static CharPtr GetBioseqSetClass (Uint1 cl) return ("gen-prod-set"); if (cl == BioseqseqSet_class_wgs_set) return ("wgs-set"); + if (cl == BioseqseqSet_class_small_genome_set) + return ("small-genome-set"); if (cl == BioseqseqSet_class_other) return ("other"); return ("not-set"); @@ -4100,6 +4660,13 @@ static void ValidateNucProtSet (BioseqSetPtr bssp, ValidStructPtr vsp) "Nuc-prot set has %ld protein with a BioSource descriptor", (long) prot_biosource); } + for (sdp = bssp->descr; sdp != NULL; sdp = sdp->next) { + if (sdp->choice == Seq_descr_title) { + ValidErr (vsp, SEV_WARNING, ERR_SEQ_PKG_NucProtSetHasTitle, + "Nuc-prot set should not have title descriptor"); + } + } + for (sdp = bssp->descr; sdp != NULL; sdp = sdp->next) { if (sdp->choice == Seq_descr_source) { biop = (BioSourcePtr) sdp->data.ptrvalue; @@ -4107,9 +4674,6 @@ static void ValidateNucProtSet (BioseqSetPtr bssp, ValidStructPtr vsp) orp = biop->org; if (orp != NULL && StringDoesHaveText (orp->taxname)) return; } - } else if (sdp->choice == Seq_descr_title) { - ValidErr (vsp, SEV_WARNING, ERR_SEQ_PKG_NucProtSetHasTitle, - "Nuc-prot set should not have title descriptor"); } } @@ -4447,6 +5011,20 @@ static void LookForMolInfoInconsistency (BioseqSetPtr bssp, ValidStructPtr vsp) } } +static Boolean SetHasMolInfo (BioseqSetPtr bssp) + +{ + SeqDescrPtr sdp; + + if (bssp == NULL) return FALSE; + + for (sdp = bssp->descr; sdp != NULL; sdp = sdp->next) { + if (sdp->choice == Seq_descr_molinfo) return TRUE; + } + + return FALSE; +} + static void ValidatePopSet (BioseqSetPtr bssp, ValidStructPtr vsp) { @@ -4473,6 +5051,10 @@ static void ValidatePopSet (BioseqSetPtr bssp, ValidStructPtr vsp) } } + if (SetHasMolInfo (bssp)) { + ValidErr (vsp, SEV_WARNING, ERR_SEQ_PKG_MisplacedMolInfo, "Pop set has MolInfo on set"); + } + LookForMolInfoInconsistency (bssp, vsp); for (sep = bssp->seq_set; sep != NULL; sep = sep->next) { @@ -4529,6 +5111,10 @@ static void ValidateMutSet (BioseqSetPtr bssp, ValidStructPtr vsp) } } + if (SetHasMolInfo (bssp)) { + ValidErr (vsp, SEV_WARNING, ERR_SEQ_PKG_MisplacedMolInfo, "Mut set has MolInfo on set"); + } + LookForMolInfoInconsistency (bssp, vsp); /* error is currently suppressed @@ -4559,6 +5145,10 @@ static void ValidateGenbankSet (BioseqSetPtr bssp, ValidStructPtr vsp) "Bioseq-set contains internal GenBank Bioseq-set"); } } + + if (SetHasMolInfo (bssp)) { + ValidErr (vsp, SEV_WARNING, ERR_SEQ_PKG_MisplacedMolInfo, "Genbank set has MolInfo on set"); + } } static void ValidatePhyEcoWgsSet (BioseqSetPtr bssp, ValidStructPtr vsp) @@ -4578,6 +5168,10 @@ static void ValidatePhyEcoWgsSet (BioseqSetPtr bssp, ValidStructPtr vsp) } } + if (SetHasMolInfo (bssp)) { + ValidErr (vsp, SEV_WARNING, ERR_SEQ_PKG_MisplacedMolInfo, "Phy/eco/wgs set has MolInfo on set"); + } + LookForMolInfoInconsistency (bssp, vsp); } @@ -4637,10 +5231,53 @@ static void ValidateGenProdSet (BioseqSetPtr bssp, ValidStructPtr vsp) } } + if (SetHasMolInfo (bssp)) { + ValidErr (vsp, SEV_WARNING, ERR_SEQ_PKG_MisplacedMolInfo, "GenProd set has MolInfo on set"); + } + gcp->itemID = olditemid; gcp->thistype = olditemtype; } +static void NestedSetProc (BioseqSetPtr bssp, Pointer userdata) + +{ + ValidStructPtr vsp; + GatherContextPtr gcp = NULL; + + if (bssp == NULL) return; + + /* pop/phy/mut/eco set can contain up to nuc-prot sets */ + switch (bssp->_class) { + case BioseqseqSet_class_nuc_prot: + case BioseqseqSet_class_segset: + case BioseqseqSet_class_parts: + return; + default: + break; + } + + vsp = (ValidStructPtr) userdata; + if (vsp == NULL) return; + gcp = vsp->gcp; + if (gcp == NULL) return; + + ValidErr (vsp, SEV_WARNING, ERR_SEQ_PKG_ImproperlyNestedSets, "Nested sets within Pop/Phy/Mut/Eco/Wgs set"); +} + +static void CheckForNestedSets (BioseqSetPtr bssp, Pointer userdata) + +{ + SeqEntryPtr sep; + + if (bssp == NULL) return; + + for (sep = bssp->seq_set; sep != NULL; sep = sep->next) { + if (!IS_Bioseq_set (sep)) continue; + VisitSetsInSep (sep, userdata, NestedSetProc); + } +} + static void ValidateBioseqSet (GatherContextPtr gcp) { @@ -4706,14 +5343,18 @@ static void ValidateBioseqSet (GatherContextPtr gcp) break; case BioseqseqSet_class_pop_set: ValidatePopSet (bssp, vsp); + CheckForNestedSets (bssp, vsp); break; case BioseqseqSet_class_mut_set: ValidateMutSet (bssp, vsp); + CheckForNestedSets (bssp, vsp); break; case BioseqseqSet_class_phy_set: case BioseqseqSet_class_eco_set: case BioseqseqSet_class_wgs_set: + case BioseqseqSet_class_small_genome_set: ValidatePhyEcoWgsSet (bssp, vsp); + CheckForNestedSets (bssp, vsp); break; case BioseqseqSet_class_gen_prod_set: ValidateGenProdSet (bssp, vsp); @@ -4748,7 +5389,7 @@ static void ValidateBioseqSet (GatherContextPtr gcp) if (sep == NULL) { ValidErr (vsp, SEV_WARNING, ERR_SEQ_PKG_EmptySet, "Pop/Phy/Mut/Eco set has no components"); } else if (sep->next == NULL) { - if (VisitAlignmentsInSep (sep, NULL, NULL) == 0) { + if (VisitAlignmentsInSep (gcp->sep, NULL, NULL) == 0) { ValidErr (vsp, SEV_WARNING, ERR_SEQ_PKG_SingleItemSet, "Pop/Phy/Mut/Eco set has only one component and no alignments"); } } @@ -4818,10 +5459,12 @@ static void LookForSecondaryConflict (ValidStructPtr vsp, GatherContextPtr gcp, static void CheckSegBspAgainstParts (ValidStructPtr vsp, GatherContextPtr gcp, BioseqPtr bsp) { BioseqSetPtr bssp; + Boolean is_odd; BioseqPtr part; SeqEntryPtr sep; SeqIdPtr sip; SeqLocPtr slp; + BioseqPtr vbsp; if (vsp == NULL || gcp == NULL || bsp == NULL) return; @@ -4845,6 +5488,25 @@ static void CheckSegBspAgainstParts (ValidStructPtr vsp, GatherContextPtr gcp, B if (bssp->_class != BioseqseqSet_class_parts) return; + is_odd = FALSE; + for (slp = (ValNodePtr) bsp->seq_ext; slp != NULL; slp = slp->next) { + is_odd = (! is_odd); + if (is_odd) { + if (slp->choice == SEQLOC_NULL) { + ValidErr (vsp, SEV_ERROR, ERR_SEQ_INST_BadSegmentedSeq, "Odd segmented component is not expected to be NULL"); + } + } else { + if (slp->choice != SEQLOC_NULL) { + vbsp = BioseqFindFromSeqLoc (slp); + if (vbsp != NULL) { + if (vbsp->repr != Seq_repr_virtual) { + ValidErr (vsp, SEV_ERROR, ERR_SEQ_INST_BadSegmentedSeq, "Even segmented component is expected to be NULL or VIRTUAL"); + } + } + } + } + } + sep = bssp->seq_set; for (slp = (ValNodePtr) bsp->seq_ext; slp != NULL; slp = slp->next) { if (slp->choice == SEQLOC_NULL) @@ -5160,7 +5822,7 @@ static Int4 CountAdjacentNsInInterval (GatherContextPtr gcp, BioseqPtr bsp, Int4 SeqLocPtr slp; RunOfNs ron; - if (bsp == NULL || from < 0 || to < from) { + if (bsp == NULL || from < 0 || to < from || ISA_aa (bsp->mol)) { return 0; } @@ -5375,7 +6037,7 @@ static void ReportLongSeqId (SeqIdPtr sip, ValidStructPtr vsp, Int4 max_len) Int4 id_len = 0; CharPtr id_txt; - if (sip == NULL || vsp == NULL || IsNcbiFileId(sip)) { + if (sip == NULL || vsp == NULL || IsNCBIFileID(sip)) { return; } @@ -5389,6 +6051,23 @@ static void ReportLongSeqId (SeqIdPtr sip, ValidStructPtr vsp, Int4 max_len) } +static Boolean SequenceHasGaps (BioseqPtr bsp) +{ + SeqMgrFeatContext context; + SeqFeatPtr sfp; + + if (bsp == NULL) { + return FALSE; + } + sfp = SeqMgrGetNextFeature (bsp, NULL, 0, FEATDEF_gap, &context); + if (sfp == NULL) { + return FALSE; + } else { + return TRUE; + } +} + + static void ValidateBioseqInst (GatherContextPtr gcp) { Boolean retval = TRUE; @@ -5430,7 +6109,7 @@ static void ValidateBioseqInst (GatherContextPtr gcp) SeqMgrFeatContext protctxt; CharPtr protlbl = NULL; TextSeqIdPtr tsip; - CharPtr ptr, last, str, title, buf; + CharPtr ptr, last, str, title, buf, bufplus; Uint1 lastchoice; Char ch; Boolean multitoken; @@ -5459,6 +6138,7 @@ static void ValidateBioseqInst (GatherContextPtr gcp) Boolean is_gps = FALSE; Boolean isRefSeq = FALSE; Boolean isSwissProt = FALSE; + Boolean only_local = TRUE; Boolean isLRG = FALSE; ValNodePtr keywords; Boolean last_is_gap; @@ -5533,6 +6213,9 @@ static void ValidateBioseqInst (GatherContextPtr gcp) } for (sip1 = bsp->id; sip1 != NULL; sip1 = sip1->next) { + if (sip1->choice != SEQID_LOCAL) { + only_local = FALSE; + } if (sip1->choice == SEQID_OTHER) { isRefSeq = TRUE; tsip = (TextSeqIdPtr) sip1->data.ptrvalue; @@ -5778,8 +6461,13 @@ static void ValidateBioseqInst (GatherContextPtr gcp) break; case SEQID_GENERAL: dbt = (DbtagPtr) sip1->data.ptrvalue; - if (dbt != NULL && StringICmp (dbt->db, "LRG") == 0) { - isLRG = TRUE; + if (dbt != NULL) { + if (StringICmp (dbt->db, "LRG") == 0) { + isLRG = TRUE; + } + if (StringLen (dbt->db) > 20) { + ValidErr (vsp, SEV_WARNING, ERR_SEQ_INST_BadSeqIdFormat, "Database name longer than 20 characters"); + } } break; default: @@ -6957,9 +7645,20 @@ static void ValidateBioseqInst (GatherContextPtr gcp) gcp->thistype = olditemtype; } } + + if (StringISearch (title, "complete genome") != NULL && SequenceHasGaps (bsp)) { + /* warning if title contains complete genome but sequence contains gap features */ + olditemid = gcp->itemID; + olditemtype = gcp->thistype; + gcp->itemID = bsp->idx.itemID; + gcp->thistype = OBJ_BIOSEQ; + ValidErr (vsp, SEV_WARNING, ERR_SEQ_INST_CompleteTitleProblem, "Title contains 'complete genome' but sequence has gaps"); + gcp->itemID = olditemid; + gcp->thistype = olditemtype; + } } } else { - if (ISA_na (bsp->mol) && vsp->other_sets_in_sep && vsp->indexerVersion) { + if (ISA_na (bsp->mol) && vsp->other_sets_in_sep && (vsp->is_insd_in_sep || vsp->is_refseq_in_sep) && vsp->indexerVersion) { ValidErr (vsp, SEV_WARNING, ERR_SEQ_PKG_ComponentMissingTitle, "Nucleotide component of pop/phy/mut/eco/wgs set is missing its title"); } @@ -7003,7 +7702,18 @@ static void ValidateBioseqInst (GatherContextPtr gcp) if (StringICmp (buf, title) != 0) { /* also check generated protein defline with all prp->names - old convention */ if (NewCreateDefLineBuf (&ii, bsp, buf, buflen, TRUE, TRUE)) { - if (StringICmp (buf, title) != 0) { + bufplus = buf; + if (StringNCmp (bufplus, "PREDICTED: ", 11) == 0) { + bufplus += 11; + } else if (StringNCmp (bufplus, "UNVERIFIED: ", 12) == 0) { + bufplus += 12; + } + if (StringNCmp (title, "PREDICTED: ", 11) == 0) { + title += 11; + } else if (StringNCmp (title, "UNVERIFIED: ", 12) == 0) { + title += 12; + } + if (StringICmp (bufplus, title) != 0) { olditemid = gcp->itemID; olditemtype = gcp->thistype; if (vnp->extended != 0) { @@ -7106,6 +7816,8 @@ static void ValidateBioseqInst (GatherContextPtr gcp) sev = SEV_WARNING; } else if (bsp->topology == TOPOLOGY_CIRCULAR) { sev = SEV_WARNING; + } else if (only_local) { + sev = SEV_WARNING; } else if (StringICmp (str, "NNNNNNNNNN") == 0) { sev = SEV_ERROR; } else { @@ -7138,6 +7850,8 @@ static void ValidateBioseqInst (GatherContextPtr gcp) sev = SEV_WARNING; } else if (bsp->topology == TOPOLOGY_CIRCULAR) { sev = SEV_WARNING; + } else if (only_local) { + sev = SEV_WARNING; } else if (StringICmp (str, "NNNNNNNNNN") == 0) { sev = SEV_ERROR; } else { @@ -7411,7 +8125,9 @@ static void ValidateCitSub (ValidStructPtr vsp, CitSubPtr csp) ValidErr (vsp, SEV_ERROR, ERR_GENERIC_MissingPubInfo, "Submission citation has no author names"); } if (!hasAffil) { - ValidErr (vsp, sev, ERR_GENERIC_MissingPubInfo, "Submission citation has no affiliation"); + if (! vsp->is_patent_in_sep) { + ValidErr (vsp, sev, ERR_GENERIC_MissingPubInfo, "Submission citation has no affiliation"); + } } dp = csp->date; if (dp != NULL) { @@ -7682,7 +8398,9 @@ static void ValidatePubdesc (ValidStructPtr vsp, GatherContextPtr gcp, PubdescPt cgp = (CitGenPtr) vnp->data.ptrvalue; hasName = FALSE; if (cgp != NULL) { - if (!StringHasNoText (cgp->cit)) { + if (StringDoesHaveText (cgp->cit)) { + /* skip if just BackBone id number */ + if (StringNICmp (cgp->cit, "BackBone id_pub = ", 18) == 0 && cgp->journal == NULL && cgp->date == NULL && cgp->serial_number < 0) break; if (StringNICmp (cgp->cit, "submitted", 8) == 0 || StringNICmp (cgp->cit, "unpublished", 11) == 0 || StringNICmp (cgp->cit, "Online Publication", 18) == 0 || @@ -8086,27 +8804,6 @@ static Boolean DeltaOrFarSeg (SeqEntryPtr sep, SeqLocPtr location) } -static Boolean IsLocationOrganelle (Uint1 genome) -{ - if (genome == GENOME_chloroplast - || genome == GENOME_chromoplast - || genome == GENOME_kinetoplast - || genome == GENOME_mitochondrion - || genome == GENOME_cyanelle - || genome == GENOME_nucleomorph - || genome == GENOME_apicoplast - || genome == GENOME_leucoplast - || genome == GENOME_proplastid - || genome == GENOME_hydrogenosome - || genome == GENOME_plastid - || genome == GENOME_chromatophore) { - return TRUE; - } else { - return FALSE; - } -} - - static Boolean IsOrganelleBioseq (BioseqPtr bsp) { SeqDescrPtr sdp; @@ -8136,7 +8833,6 @@ ValidateIntronEndsAtSpliceSiteOrGap Char id_buf[150]; SeqFeatPtr rna; SeqMgrFeatContext rcontext; - ErrSev sev = SEV_WARNING; if (vsp == NULL || slp == NULL) return; CheckSeqLocForPartial (slp, &partial5, &partial3); @@ -8178,10 +8874,6 @@ ValidateIntronEndsAtSpliceSiteOrGap strand = SeqLocStrand (slp); - if (vsp->is_gpipe_in_sep && vsp->bsp_genomic_in_sep) { - sev = SEV_INFO; - } - if (!partial5) { if (strand == Seq_strand_minus) { SeqPortStreamInt (bsp, stop - 1, stop, Seq_strand_minus, EXPAND_GAPS_TO_DASHES, (Pointer) buf, NULL); @@ -8198,7 +8890,7 @@ ValidateIntronEndsAtSpliceSiteOrGap ValidErr (vsp, SEV_INFO, ERR_SEQ_FEAT_NotSpliceConsensusDonor, "Splice donor consensus (GT) not found at start of terminal intron, position %ld of %s", (long) (pos + 1), id_buf); } else { - ValidErr (vsp, sev, ERR_SEQ_FEAT_NotSpliceConsensusDonor, + ValidErr (vsp, SEV_WARNING, ERR_SEQ_FEAT_NotSpliceConsensusDonor, "Splice donor consensus (GT) not found at start of intron, position %ld of %s", (long) (pos + 1), id_buf); } } @@ -8217,13 +8909,77 @@ ValidateIntronEndsAtSpliceSiteOrGap ValidErr (vsp, SEV_INFO, ERR_SEQ_FEAT_NotSpliceConsensusAcceptor, "Splice acceptor consensus (AG) not found at end of terminal intron, position %ld of %s, but at end of sequence", (long) (pos + 1), id_buf); } else { - ValidErr (vsp, sev, ERR_SEQ_FEAT_NotSpliceConsensusAcceptor, + ValidErr (vsp, SEV_WARNING, ERR_SEQ_FEAT_NotSpliceConsensusAcceptor, "Splice acceptor consensus (AG) not found at end of intron, position %ld of %s", (long) (pos + 1), id_buf); } } BioseqUnlock (bsp); } +static Boolean IsLocInSmallGenomeSet ( + SeqLocPtr loc +) + +{ + BioseqPtr bsp; + SeqIdPtr sip; + SeqLocPtr slp; + + if (loc == NULL) return FALSE; + + slp = SeqLocFindNext (loc, NULL); + while (slp != NULL) { + sip = SeqLocId (slp); + if (sip == NULL) return FALSE; + bsp = BioseqFind (sip); + if (bsp == NULL) return FALSE; + slp = SeqLocFindNext (loc, slp); + } + + return TRUE; +} + +static Boolean AllPartsInSmallGenomeSet ( + SeqLocPtr loc, + ValidStructPtr vsp, + BioseqPtr bsp +) + +{ + BioseqSetPtr bssp; + SeqEntryPtr oldscope; + Boolean rsult = FALSE; + SeqEntryPtr sep; + + if (loc == NULL || vsp == NULL || bsp == NULL) return FALSE; + + sep = vsp->sep; + if (sep == NULL) return FALSE; + if (! IS_Bioseq_set (sep)) return FALSE; + bssp = (BioseqSetPtr) sep->data.ptrvalue; + if (bssp == NULL) return FALSE; + + /* if genbank set wraps everything, go down one set level */ + if (bssp->_class == BioseqseqSet_class_genbank) { + sep = bssp->seq_set; + if (sep == NULL) return FALSE; + if (! IS_Bioseq_set (sep)) return FALSE; + bssp = (BioseqSetPtr) sep->data.ptrvalue; + } + + /* check for small genome set */ + if (bssp == NULL || bssp->_class != BioseqseqSet_class_small_genome_set) return FALSE; + + /* scope within small genome set for subsequent BioseqFind calls */ + oldscope = SeqEntrySetScope (sep); + + rsult = IsLocInSmallGenomeSet (loc); + + SeqEntrySetScope (oldscope); + + return rsult; +} + /***************************************************************************** * @@ -8411,7 +9167,7 @@ static Boolean ValidateSeqFeatCommon (SeqFeatPtr sfp, BioseqValidStrPtr bvsp, Va } } - if (farloc && (! is_nc) && (! is_emb)) { + if (farloc && (! is_nc) && (! is_emb) && (! AllPartsInSmallGenomeSet (sfp->location, vsp, bsp))) { ValidErr (vsp, SEV_WARNING, ERR_SEQ_FEAT_FarLocation, "Feature has 'far' location - accession not packaged in record"); } @@ -8619,6 +9375,7 @@ static CharPtr Nlm_valid_country_codes [] = { "Bahamas", "Bahrain", "Baker Island", + "Baltic Sea", "Bangladesh", "Barbados", "Bassas da India", @@ -8990,584 +9747,358 @@ NLM_EXTERN CharPtr GetCorrectedCountryCapitalization (CharPtr name) return NULL; } +static CharPtr bodiesOfWater [] = { + "Basin", + "Bay", + "Bight", + "Canal", + "Channel", + "Coastal", + "Cove", + "Estuary", + "Fjord", + "Freshwater", + "Gulf", + "Harbor", + "Inlet", + "Lagoon", + "Lake", + "Narrows", + "Ocean", + "Offshore", + "Passage", + "Passages", + "Reef", + "River", + "Sea", + "Seawater", + "Sound", + "Strait", + "Trench", + "Trough", + "Water", + "Waters", + NULL +}; + +static TextFsaPtr GetBodiesOfWaterFSA (void) + + +{ + TextFsaPtr fsa; + Int2 i; + CharPtr prop = "BodiesOfWaterFSA"; -static CharPtr ctry_lat_lon [] = { - "Afghanistan\tAF\t60.4\t29.3\t74.9\t38.5", - "Albania\tAL\t19.2\t39.6\t21.1\t42.7", - "Algeria\tAG\t-8.7\t18.9\t12.0\t37.1", - "American Samoa\tAQ\t-171.1\t-11.1\t-171.1\t-11.0\t-170.9\t-14.4\t-169.4\t-14.2", - "Andorra\tAN\t1.4\t42.4\t1.8\t42.7", - "Angola\tAO\t11.6\t-18.1\t24.1\t-4.4", - "Anguilla\tAV\t-63.2\t18.1\t-62.9\t18.3", - "Antarctica\tAY\t", - "Antigua and Barbuda\tAC\t-62.4\t16.9\t-62.3\t16.9\t-62.0\t16.9\t-61.7\t17.7", - "Arctic Ocean\tXX\t", - "Argentina\tAR\t-73.6\t-55.1\t-53.6\t-21.8", - "Armenia\tAM\t43.4\t38.8\t46.6\t41.3", - "Aruba\tAA\t-70.1\t12.4\t-69.8\t12.7", - "Ashmore and Cartier Islands\tAT\t122.9\t-12.3\t123.1\t-12.1", - "Atlantic Ocean\tXX\t", - "Australia\tAS\t112.9\t-43.7\t153.6\t-10.0", - "Australia: Australian Capital Territory\tXX\t148.7\t-36.0\t149.4\t-35.1", - "Australia: Jervis Bay Territory\tXX\t150.5\t-35.2\t150.8\t-35.1", - "Australia: New South Wales\tXX\t140.9\t-37.6\t153.6\t-28.2", - "Australia: Northern Territory\tXX\t128.9\t-26.1\t138.0\t-10.9", - "Australia: Queensland\tXX\t137.9\t-29.2\t153.6\t-10.0", - "Australia: South Australia\tXX\t128.9\t-38.1\t141.0\t-26.0", - "Australia: Tasmania\tXX\t143.8\t-43.7\t148.5\t-39.6", - "Australia: Victoria\tXX\t140.9\t-39.6\t150.0\t-34.0", - "Australia: Western Australia\tXX\t112.9\t-35.2\t129.0\t-13.7", - "Austria\tAU\t9.5\t46.3\t17.2\t49.0", - "Azerbaijan\tAJ\t45.0\t38.3\t50.6\t41.9", - "Bahamas\tBF\t-79.7\t20.9\t-72.7\t27.2", - "Bahrain\tBA\t50.3\t25.7\t50.7\t26.3", - "Baker Island\tFQ\t-176.5\t0.1\t-176.5\t0.2", - "Bangladesh\tBG\t88.0\t20.5\t92.7\t26.6", - "Barbados\tBB\t-59.7\t13.0\t-59.4\t13.3", - "Bassas da India\tBS\t39.6\t-21.6\t39.8\t-21.4", - "Belarus\tBO\t23.1\t51.2\t32.8\t56.2", - "Belgium\tBE\t2.5\t49.4\t6.4\t51.5", - "Belize\tBH\t-89.3\t15.8\t-86.9\t18.5", - "Benin\tBN\t0.7\t6.2\t3.9\t12.4", - "Bermuda\tBD\t-64.9\t32.2\t-64.7\t32.4", - "Bhutan\tBT\t88.7\t26.7\t92.1\t28.3", - "Bolivia\tBL\t-69.7\t-22.9\t-57.5\t-9.7", - "Borneo\tXX\t108.6\t-4.2\t119.3\t7.4", - "Bosnia and Herzegovina\tBK\t15.7\t42.5\t19.7\t45.3", - "Botswana\tBC\t19.9\t-27.0\t29.4\t-17.8", - "Bouvet Island\tBV\t3.3\t-54.5\t3.5\t-54.4", - "Brazil\tBR\t-74.0\t-33.8\t-34.8\t5.0", - "British Virgin Islands\tVI\t-64.8\t18.2\t-63.2\t18.8", - "Brunei\tBX\t114.0\t4.0\t115.4\t5.0", - "Bulgaria\tBU\t22.3\t41.2\t28.6\t44.2", - "Burkina Faso\tUV\t-5.6\t9.4\t2.4\t15.1", - "Burundi\tBY\t28.9\t-4.5\t30.8\t-2.3", - "Cambodia\tCB\t102.3\t9.2\t107.6\t14.7", - "Cameroon\tCM\t8.4\t1.6\t16.2\t13.1", - "Canada\tCA\t-141.0\t41.7\t-52.6\t83.1", - "Canada: Alberta\tXX\t-120.0\t48.9\t-110.0\t60.0", - "Canada: British Columbia\tXX\t-139.1\t48.3\t-114.1\t60.0", - "Canada: Manitoba\tXX\t-102.1\t48.9\t-89.0\t60.0", - "Canada: New Brunswick\tXX\t-69.1\t44.5\t-63.8\t48.1", - "Canada: Newfoundland and Labrador\tXX\t-67.9\t46.6\t-52.6\t60.4", - "Canada: Northwest Territories\tXX\t-136.5\t60.0\t-102.0\t78.8", - "Canada: Nova Scotia\tXX\t-66.4\t43.3\t-59.7\t47.0", - "Canada: Nunavut\tXX\t-120.4\t60.0\t-61.2\t83.1", - "Canada: Ontario\tXX\t-95.2\t41.6\t-74.3\t56.9", - "Canada: Prince Edward Island\tXX\t-64.5\t45.9\t-62.0\t47.1", - "Canada: Quebec\tXX\t-79.8\t45.0\t-57.1\t62.6", - "Canada: Saskatchewan\tXX\t-110.0\t48.9\t-101.4\t60.0", - "Canada: Yukon\tXX\t-141.0\t60.0\t-124.0\t69.6", - "Cape Verde\tCV\t-25.4\t14.8\t-22.7\t17.2", - "Cayman Islands\tCJ\t-81.5\t19.2\t-81.1\t19.4\t-80.2\t19.6\t-79.7\t19.8", - "Central African Republic\tCT\t14.4\t2.2\t27.5\t11.0", - "Chad\tCD\t13.4\t7.4\t24.0\t23.5", - "Chile\tCI\t-75.8\t-56.0\t-66.4\t-17.5", - "China\tCH\t73.5\t20.2\t134.8\t53.6\t108.6\t18.1\t111.1\t20.2", - "China: Hainan\tXX\t108.6\t18.1\t111.1\t20.2", - "Christmas Island\tKT\t105.5\t-10.6\t105.7\t-10.4", - "Clipperton Island\tIP\t-109.3\t10.2\t-109.2\t10.3", - "Cocos Islands\tCK\t96.8\t-12.2\t96.9\t-11.8", - "Colombia\tCO\t-79.1\t-4.3\t-66.9\t12.5", - "Comoros\tCN\t43.2\t-12.5\t44.5\t-11.4", - "Cook Islands\tCW\t-159.9\t-22.0\t-157.3\t-18.8", - "Coral Sea Islands\tCR\t", - "Costa Rica\tCS\t-87.1\t5.4\t-87.0\t5.6\t-86.0\t8.0\t-82.6\t11.2", - "Cote d'Ivoire\tIV\t-8.6\t4.3\t-2.5\t10.7", - "Croatia\tHR\t13.4\t42.3\t19.4\t46.5", - "Cuba\tCU\t-85.0\t19.8\t-74.1\t23.3", - "Cyprus\tCY\t32.2\t34.5\t34.6\t35.7", - "Czech Republic\tEZ\t12.0\t48.5\t18.9\t51.0", - "Democratic Republic of the Congo\tCG\t12.2\t-13.5\t31.3\t5.4", - "Denmark\tDA\t8.0\t54.5\t12.7\t57.7\t14.6\t54.9\t15.2\t55.3", - "Djibouti\tDJ\t41.7\t10.9\t43.4\t12.7", - "Dominica\tDO\t-61.5\t15.2\t-61.2\t15.6", - "Dominican Republic\tDR\t-72.1\t17.4\t-68.3\t19.9", - "East Timor\tTT\t124.9\t-9.5\t127.4\t-8.3", - "Ecuador\tEC\t-92.1\t-1.5\t-89.2\t1.7\t-81.1\t-5.0\t-75.2\t1.4", - "Ecuador: Galapagos\tXX\t-92.1\t-1.5\t-89.2\t1.7", - "Egypt\tEG\t24.6\t21.7\t35.8\t31.7", - "El Salvador\tES\t-90.2\t13.1\t-87.7\t14.4", - "Equatorial Guinea\tEK\t8.4\t3.2\t8.9\t3.8\t9.2\t0.8\t11.3\t2.3", - "Eritrea\tER\t36.4\t12.3\t43.1\t18.0", - "Estonia\tEN\t21.7\t57.5\t28.2\t59.7", - "Ethiopia\tET\t32.9\t3.4\t48.0\t14.9", - "Europa Island\tEU\t40.3\t-22.4\t40.4\t-22.3", - "Falkland Islands (Islas Malvinas)\tFK\t-61.4\t-53.0\t-57.7\t-51.0", - "Faroe Islands\tFO\t-7.7\t61.3\t-6.3\t62.4", - "Fiji\tFJ\t-180.0\t-20.7\t-178.2\t-15.7\t-175.7\t-19.8\t-175.0\t-15.6\t176.8\t-19.3\t180.0\t-12.5", - "Finland\tFI\t19.3\t59.7\t31.6\t70.1", - "France\tFR\t-5.2\t42.3\t8.2\t51.1\t8.5\t41.3\t9.6\t43.1", - "France: Corsica\tXX\t8.5\t41.3\t9.6\t43.1", - "French Guiana\tFG\t-54.6\t2.1\t-51.6\t5.8", - "French Polynesia\tFP\t-154.7\t-27.7\t-134.9\t-7.8", - "French Southern and Antarctic Lands\tFS\t68.6\t-49.8\t70.6\t-48.5", - "Gabon\tGB\t8.6\t-4.0\t14.5\t2.3", - "Gambia\tGA\t-16.9\t13.0\t-13.8\t13.8", - "Gaza Strip\tGZ\t34.2\t31.2\t34.5\t31.6", - "Georgia\tGG\t40.0\t41.0\t46.7\t43.6", - "Germany\tGM\t5.8\t47.2\t15.0\t55.1", - "Ghana\tGH\t-3.3\t4.7\t1.2\t11.2", - "Gibraltar\tGI\t-5.4\t36.1\t-5.3\t36.2", - "Glorioso Islands\tGO\t47.2\t-11.6\t47.4\t-11.5", - "Greece\tGR\t19.3\t34.8\t28.2\t41.8", - "Greenland\tGL\t-73.3\t59.7\t-11.3\t83.6", - "Grenada\tGJ\t-61.8\t11.9\t-61.6\t12.3", - "Guadeloupe\tGP\t-63.2\t17.8\t-62.8\t18.1\t-61.9\t15.8\t-61.0\t16.5", - "Guam\tGQ\t144.6\t13.2\t145.0\t13.7", - "Guatemala\tGT\t-92.3\t13.7\t-88.2\t17.8", - "Guernsey\tGK\t-2.7\t49.4\t-2.4\t49.5", - "Guinea\tGV\t-15.1\t7.1\t-7.6\t12.7", - "Guinea-Bissau\tPU\t-16.8\t10.8\t-13.6\t12.7", - "Guyana\tGY\t-61.4\t1.1\t-56.5\t8.6", - "Haiti\tHA\t-74.5\t18.0\t-71.6\t20.1", - "Heard Island and McDonald Islands\tHM\t73.2\t-53.2\t73.7\t-52.9", - "Honduras\tHO\t-89.4\t12.9\t-83.2\t16.5", - "Hong Kong\tHK\t113.8\t22.1\t114.4\t22.6", - "Howland Island\tHQ\t-176.7\t0.7\t-176.6\t0.8", - "Hungary\tHU\t16.1\t45.7\t22.9\t48.6", - "Iceland\tIC\t-24.6\t63.2\t-13.5\t66.6", - "India\tIN\t67.3\t8.0\t97.4\t35.5", - "Indian Ocean\tXX\t", - "Indonesia\tID\t95.0\t-11.1\t141.0\t5.9", - "Iran\tIR\t44.0\t25.0\t63.3\t39.8", - "Iraq\tIZ\t38.8\t29.1\t48.6\t37.4", - "Ireland\tEI\t-10.7\t51.4\t-6.0\t55.4", - "Isle of Man\tIM\t-4.9\t54.0\t-4.3\t54.4", - "Israel\tIS\t34.2\t29.4\t35.7\t33.3", - "Italy\tIT\t6.6\t35.4\t18.5\t47.1", - "Jamaica\tJM\t-78.4\t17.7\t-76.2\t18.5", - "Jan Mayen\tJN\t-9.1\t70.8\t-7.9\t71.2", - "Japan\tJA\t122.9\t24.0\t125.5\t25.9\t126.7\t20.5\t145.8\t45.5", - "Jarvis Island\tDQ\t-160.1\t-0.4\t-160.0\t-0.4", - "Jersey\tJE\t-2.3\t49.1\t-2.0\t49.3", - "Johnston Atoll\tJQ\t-169.6\t16.7\t-169.4\t16.8", - "Jordan\tJO\t34.9\t29.1\t39.3\t33.4", - "Juan de Nova Island\tJU\t42.6\t-17.1\t42.8\t-16.8", - "Kazakhstan\tKZ\t46.4\t40.9\t87.3\t55.4", - "Kenya\tKE\t33.9\t-4.7\t41.9\t4.6", - "Kerguelen Archipelago\tXX\t", - "Kingman Reef\tKQ\t-162.9\t6.1\t-162.4\t6.7", - "Kiribati\tKR\t172.6\t0.1\t173.9\t3.4\t174.2\t-2.7\t176.9\t-0.5", - "Kosovo\tKV\t20.0\t41.8\t43.3\t21.9", - "Kuwait\tKU\t46.5\t28.5\t48.4\t30.1", - "Kyrgyzstan\tKG\t69.2\t39.1\t80.3\t43.2", - "Laos\tLA\t100.0\t13.9\t107.7\t22.5", - "Latvia\tLG\t20.9\t55.6\t28.2\t58.1", - "Lebanon\tLE\t35.1\t33.0\t36.6\t34.7", - "Lesotho\tLT\t27.0\t-30.7\t29.5\t-28.6", - "Liberia\tLI\t-11.5\t4.3\t-7.4\t8.6", - "Libya\tLY\t9.3\t19.5\t25.2\t33.2", - "Liechtenstein\tLS\t9.4\t47.0\t9.6\t47.3", - "Lithuania\tLH\t20.9\t53.9\t26.9\t56.4", - "Luxembourg\tLU\t5.7\t49.4\t6.5\t50.2", - "Macau\tMC\t113.5\t22.1\t113.6\t22.2", - "Macedonia\tMK\t20.4\t40.8\t23.0\t42.4", - "Madagascar\tMA\t43.1\t-25.7\t50.5\t-11.9", - "Malawi\tMI\t32.6\t-17.2\t35.9\t-9.4", - "Malaysia\tMY\t98.9\t5.6\t98.9\t5.7\t99.6\t1.2\t104.5\t6.7\t109.5\t0.8\t119.3\t7.4", - "Maldives\tMV\t72.6\t-0.7\t73.7\t7.1", - "Mali\tML\t-12.3\t10.1\t4.2\t25.0", - "Malta\tMT\t14.1\t35.8\t14.6\t36.1", - "Marshall Islands\tRM\t160.7\t4.5\t172.0\t14.8", - "Martinique\tMB\t-61.3\t14.3\t-60.8\t14.9", - "Mauritania\tMR\t-17.1\t14.7\t-4.8\t27.3", - "Mauritius\tMP\t57.3\t-20.6\t57.8\t-20.0\t59.5\t-16.9\t59.6\t-16.7", - "Mayotte\tMF\t45.0\t-13.1\t45.3\t-12.6", - "Mediterranean Sea\tXX\t", - "Mexico\tMX\t-118.5\t28.8\t-118.3\t29.2\t-117.3\t14.5\t-86.7\t32.7", - "Micronesia\tFM\t138.0\t9.4\t138.2\t9.6\t139.6\t9.8\t139.8\t10.0\t140.5\t9.7\t140.5\t9.8\t147.0\t7.3\t147.0\t7.4\t149.3\t6.6\t149.3\t6.7\t151.5\t7.1\t152.0\t7.5\t153.5\t5.2\t153.8\t5.6\t157.1\t5.7\t160.7\t7.1\t162.9\t5.2\t163.0\t5.4", - "Midway Islands\tMQ\t-178.4\t28.3\t-178.3\t28.4\t-177.4\t28.1\t-177.3\t28.2\t-174.0\t26.0\t-174.0\t26.1\t-171.8\t25.7\t-171.7\t25.8", - "Moldova\tMD\t26.6\t45.4\t30.2\t48.5", - "Monaco\tMN\t7.3\t43.7\t7.5\t43.8", - "Mongolia\tMG\t87.7\t41.5\t119.9\t52.2", - "Montenegro\tMJ\t18.4\t42.2\t20.4\t43.6", - "Montserrat\tMH\t-62.3\t16.6\t-62.1\t16.8", - "Morocco\tMO\t-13.2\t27.6\t-1.0\t35.9", - "Mozambique\tMZ\t30.2\t-26.9\t40.8\t-10.5", - "Myanmar\tBM\t92.1\t9.6\t101.2\t28.5", - "Namibia\tWA\t11.7\t-29.0\t25.3\t-17.0", - "Nauru\tNR\t166.8\t-0.6\t166.9\t-0.5", - "Navassa Island\tBQ\t-75.1\t18.3\t-75.0\t18.4", - "Nepal\tNP\t80.0\t26.3\t88.2\t30.4", - "Netherlands\tNL\t3.3\t50.7\t7.2\t53.6", - "Netherlands Antilles\tNT\t-69.2\t11.9\t-68.2\t12.4\t-63.3\t17.4\t-62.9\t18.1", - "New Caledonia\tNC\t163.5\t-22.8\t169.0\t-19.5", - "New Zealand\tNZ\t166.4\t-48.1\t178.6\t-34.1", - "Nicaragua\tNU\t-87.7\t10.7\t-82.6\t15.0", - "Niger\tNG\t0.1\t11.6\t16.0\t23.5", - "Nigeria\tNI\t2.6\t4.2\t14.7\t13.9", - "Niue\tNE\t-170.0\t-19.2\t-169.8\t-19.0", - "Norfolk Island\tNF\t168.0\t-29.2\t168.1\t-29.0", - "North Korea\tKN\t124.1\t37.5\t130.7\t43.0", - "North Sea\tXX\t", - "Northern Mariana Islands\tCQ\t144.8\t14.1\t146.1\t20.6", - "Norway\tNO\t4.6\t57.9\t31.1\t71.2", - "Oman\tMU\t51.8\t16.6\t59.8\t25.0", - "Pacific Ocean\tXX\t", - "Pakistan\tPK\t60.8\t23.6\t77.8\t37.1", - "Palau\tPS\t132.3\t4.3\t132.3\t4.3\t134.1\t6.8\t134.7\t7.7", - "Palmyra Atoll\tLQ\t-162.2\t5.8\t-162.0\t5.9", - "Panama\tPM\t-83.1\t7.1\t-77.2\t9.6", - "Papua New Guinea\tPP\t140.8\t-11.7\t156.0\t-0.9\t157.0\t-4.9\t157.1\t-4.8\t159.4\t-4.7\t159.5\t-4.5", - "Paracel Islands\tPF\t111.1\t15.7\t111.2\t15.8", - "Paraguay\tPA\t-62.7\t-27.7\t-54.3\t-19.3", - "Peru\tPE\t-81.4\t-18.4\t-68.7\t0.0", - "Philippines\tRP\t116.9\t4.9\t126.6\t21.1", - "Pitcairn Islands\tPC\t-128.4\t-24.5\t-128.3\t-24.3", - "Poland\tPL\t14.1\t49.0\t24.2\t54.8", - "Portugal\tPO\t-9.5\t36.9\t-6.2\t42.1\t-31.3\t36.9\t-25.0\t39.8\t-17.3\t32.4\t-16.2\t33.2", - "Portugal: Azores\tXX\t-31.3\t36.9\t-25.0\t39.8", - "Portugal: Madeira\tXX\t-17.3\t32.4\t-16.2\t33.2", - "Puerto Rico\tRQ\t-68.0\t17.8\t-65.2\t18.5", - "Qatar\tQA\t50.7\t24.4\t52.4\t26.2", - "Republic of the Congo\tCF\t11.2\t-5.1\t18.6\t3.7", - "Reunion\tRE\t55.2\t-21.4\t55.8\t-20.9", - "Romania\tRO\t20.2\t43.6\t29.7\t48.3", - "Ross Sea\tXX\t", - "Russia\tRS\t-180.0\t64.2\t-169.0\t71.6\t19.7\t54.3\t22.9\t55.3\t26.9\t41.1\t180.0\t81.3", - "Rwanda\tRW\t28.8\t-2.9\t30.9\t-1.1", - "Saint Helena\tSH\t-5.8\t-16.1\t-5.6\t-15.9", - "Saint Kitts and Nevis\tSC\t62.9\t17.0\t62.5\t17.5", - "Saint Lucia\tST\t-61.1\t13.7\t-60.9\t14.1", - "Saint Pierre and Miquelon\tSB\t-56.5\t46.7\t-56.2\t47.1", - "Saint Vincent and the Grenadines\tVC\t-61.6\t12.4\t-61.1\t13.4", - "Samoa\tWS\t-172.8\t-14.1\t-171.4\t-13.4", - "San Marino\tSM\t12.4\t43.8\t12.5\t44.0", - "Sao Tome and Principe\tTP\t6.4\t0.0\t1.7\t7.5", - "Saudi Arabia\tSA\t34.4\t15.6\t55.7\t32.2", - "Senegal\tSG\t-17.6\t12.3\t-11.4\t16.7", - "Serbia\tRB\t18.8\t42.2\t23.1\t46.2", - "Seychelles\tSE\t50.7\t-9.6\t51.1\t-9.2\t52.7\t-7.2\t52.8\t-7.0\t53.0\t-6.3\t53.7\t-5.1\t55.2\t-5.9\t56.0\t-3.7\t56.2\t-7.2\t56.3\t-7.1", - "Sierra Leone\tSL\t-13.4\t6.9\t-10.3\t10.0", - "Singapore\tSN\t103.6\t1.1\t104.1\t1.5", - "Slovakia\tLO\t16.8\t47.7\t22.6\t49.6", - "Slovenia\tSI\t13.3\t45.4\t16.6\t46.9", - "Solomon Islands\tBP\t155.5\t-11.9\t162.8\t-5.1\t165.6\t-11.8\t167.0\t-10.1\t167.1\t-10.0\t167.3\t-9.8\t168.8\t-12.3\t168.8\t-12.3", - "Somalia\tSO\t40.9\t-1.7\t51.4\t12.0", - "South Africa\tSF\t16.4\t-34.9\t32.9\t-22.1", - "South Georgia and the South Sandwich Islands\tSX\t-38.3\t-54.9\t-35.7\t-53.9", - "South Korea\tKS\t125.0\t33.1\t129.6\t38.6", - "Southern Ocean\tXX\t", - "Spain\tSP\t-9.3\t35.1\t4.3\t43.8\t-18.2\t27.6\t-13.4\t29.5", - "Spain: Canary Islands\tXX\t-18.2\t27.6\t-13.4\t29.5", - "Spratly Islands\tPG\t114.0\t9.6\t115.8\t11.1", - "Sri Lanka\tCE\t79.6\t5.9\t81.9\t9.8", - "Sudan\tSU\t21.8\t3.4\t38.6\t23.6", - "Suriname\tNS\t-58.1\t1.8\t-54.0\t6.0", - "Svalbard\tSV\t10.4\t76.4\t33.5\t80.8", - "Swaziland\tWZ\t30.7\t-27.4\t32.1\t-25.7", - "Sweden\tSW\t10.9\t55.3\t24.2\t69.1", - "Switzerland\tSZ\t5.9\t45.8\t10.5\t47.8", - "Syria\tSY\t35.7\t32.3\t42.4\t37.3", - "Taiwan\tTW\t119.3\t21.9\t122.0\t25.3", - "Tajikistan\tTI\t67.3\t36.6\t75.1\t41.0", - "Tanzania\tTZ\t29.3\t-11.8\t40.4\t-1.0", - "Tasman Sea\tXX\t", - "Thailand\tTH\t97.3\t5.6\t105.6\t20.5", - "Togo\tTO\t-0.2\t6.1\t1.8\t11.1", - "Tokelau\tTL\t-172.6\t-9.5\t-171.1\t-8.5", - "Tonga\tTN\t-176.3\t-22.4\t-176.2\t-22.3\t-175.5\t-21.5\t-174.5\t-20.0", - "Trinidad and Tobago\tTD\t-62.0\t10.0\t-60.5\t11.3", - "Tromelin Island\tTE\t54.5\t-15.9\t54.5\t-15.9", - "Tunisia\tTS\t7.5\t30.2\t11.6\t37.5", - "Turkey\tTU\t25.6\t35.8\t44.8\t42.1", - "Turkmenistan\tTX\t52.4\t35.1\t66.7\t42.8", - "Turks and Caicos Islands\tTK\t-73.8\t20.9\t-73.0\t21.3", - "Tuvalu\tTV\t176.0\t-7.3\t177.3\t-5.6\t178.4\t-8.0\t178.7\t-7.4\t179.0\t-9.5\t179.9\t-8.5", - "Uganda\tUG\t29.5\t-1.5\t35.0\t4.2", - "Ukraine\tUP\t22.1\t44.3\t40.2\t52.4", - "United Arab Emirates\tAE\t51.1\t22.4\t56.4\t26.1", - "United Kingdom\tUK\t-8.7\t49.7\t1.8\t60.8", - "Uruguay\tUY\t-58.5\t-35.0\t-53.1\t-30.1", - "USA\tUS\t-124.8\t24.5\t-66.9\t49.4\t-168.2\t54.3\t-130.0\t71.4\t172.4\t52.3\t176.0\t53.0\t177.2\t51.3\t179.8\t52.1\t-179.5\t51.0\t-172.0\t52.5\t-171.5\t52.0\t-164.5\t54.5\t-164.8\t23.5\t-164.7\t23.6\t-162.0\t23.0\t-161.9\t23.1\t-160.6\t18.9\t-154.8\t22.2", - "USA: Alabama\tXX\t-88.8\t30.1\t-84.9\t35.0", - "USA: Alaska\tXX\t-168.2\t54.3\t-130.0\t71.4\t172.4\t52.3\t176.0\t53.0\t177.2\t51.3\t179.8\t52.1\t-179.5\t51.0\t-172.0\t52.5\t-171.5\t52.0\t-164.5\t54.5", - "USA: Alaska, Aleutian Islands\tXX\t172.4\t52.3\t176.0\t53.0\t177.2\t51.3\t179.8\t52.1\t-179.5\t51.0\t-172.0\t52.5\t-171.5\t52.0\t-164.5\t54.5", - "USA: Arizona\tXX\t-114.9\t31.3\t-109.0\t37.0", - "USA: Arkansas\tXX\t-94.7\t33.0\t-89.6\t36.5", - "USA: California\tXX\t-124.5\t32.5\t-114.1\t42.0", - "USA: Colorado\tXX\t-109.1\t36.9\t-102.0\t41.0", - "USA: Connecticut\tXX\t-73.8\t40.9\t-71.8\t42.1", - "USA: Delaware\tXX\t-75.8\t38.4\t-74.9\t39.8", - "USA: Florida\tXX\t-87.7\t24.5\t-80.0\t31.0", - "USA: Georgia\tXX\t-85.7\t30.3\t-80.8\t35.0", - "USA: Hawaii\tXX\t-164.8\t23.5\t-164.7\t23.6\t-162.0\t23.0\t-161.9\t23.1\t-160.6\t18.9\t-154.8\t22.2", - "USA: Idaho\tXX\t-117.3\t41.9\t-111.0\t49.0", - "USA: Illinois\tXX\t-91.6\t36.9\t-87.0\t42.5", - "USA: Indiana\tXX\t-88.1\t37.7\t-84.8\t41.8", - "USA: Iowa\tXX\t-96.7\t40.3\t-90.1\t43.5", - "USA: Kansas\tXX\t-102.1\t36.9\t-94.6\t40.0", - "USA: Kentucky\tXX\t-89.5\t36.5\t-82.0\t39.1", - "USA: Louisiana\tXX\t-94.1\t28.9\t-88.8\t33.0", - "USA: Maine\tXX\t-71.1\t43.0\t-66.9\t47.5", - "USA: Maryland\tXX\t-79.5\t37.8\t-75.1\t39.7", - "USA: Massachusetts\tXX\t-73.6\t41.2\t-69.9\t42.9", - "USA: Michigan\tXX\t-90.5\t41.6\t-82.1\t48.3", - "USA: Minnesota\tXX\t-97.3\t43.4\t-90.0\t49.4", - "USA: Mississippi\tXX\t-91.7\t30.1\t-88.1\t35.0", - "USA: Missouri\tXX\t-95.8\t36.0\t-89.1\t40.6", - "USA: Montana\tXX\t-116.1\t44.3\t-104.0\t49.0", - "USA: Nebraska\tXX\t-104.1\t40.0\t-95.3\t43.0", - "USA: Nevada\tXX\t-120.0\t35.0\t-114.0\t42.0", - "USA: New Hampshire\tXX\t-72.6\t42.6\t-70.7\t45.3", - "USA: New Jersey\tXX\t-75.6\t38.9\t-73.9\t41.4", - "USA: New Mexico\tXX\t-109.1\t31.3\t-103.0\t37.0", - "USA: New York\tXX\t-79.8\t40.4\t-71.9\t45.0", - "USA: North Carolina\tXX\t-84.4\t33.8\t-75.5\t36.6", - "USA: North Dakota\tXX\t-104.1\t45.9\t-96.6\t49.0", - "USA: Ohio\tXX\t-84.9\t38.3\t-80.5\t42.3", - "USA: Oklahoma\tXX\t-103.1\t33.6\t-94.4\t37.0", - "USA: Oregon\tXX\t-124.6\t41.9\t-116.5\t46.3", - "USA: Pennsylvania\tXX\t-80.6\t39.7\t-74.7\t42.5", - "USA: Rhode Island\tXX\t-71.9\t41.1\t-71.1\t42.0", - "USA: South Carolina\tXX\t-83.4\t32.0\t-78.6\t35.2", - "USA: South Dakota\tXX\t-104.1\t42.4\t-96.4\t45.9", - "USA: Tennessee\tXX\t-90.4\t35.0\t-81.7\t36.7", - "USA: Texas\tXX\t-106.7\t25.8\t-93.5\t36.5", - "USA: Utah\tXX\t-114.1\t37.0\t-109.1\t42.0", - "USA: Vermont\tXX\t-73.5\t42.7\t-71.5\t45.0", - "USA: Virginia\tXX\t-83.7\t36.5\t-75.2\t39.5", - "USA: Washington\tXX\t-124.8\t45.5\t-116.9\t49.0", - "USA: West Virginia\tXX\t-82.7\t37.1\t-77.7\t40.6", - "USA: Wisconsin\tXX\t-92.9\t42.4\t-86.3\t47.3", - "USA: Wyoming\tXX\t-111.1\t40.9\t-104.1\t45.0", - "Uzbekistan\tUZ\t55.9\t37.1\t73.1\t45.6", - "Vanuatu\tNH\t166.5\t-20.3\t170.2\t-13.1", - "Venezuela\tVE\t-73.4\t0.7\t-59.8\t12.2", - "Viet Nam\tVM\t102.1\t8.4\t109.5\t23.4", - "Virgin Islands\tVQ\t-65.1\t17.6\t-64.6\t18.5", - "Wake Island\tWQ\t166.5\t19.2\t166.7\t19.3", - "Wallis and Futuna\tWF\t-178.3\t-14.4\t-178.0\t-14.2\t-176.3\t-13.4\t-176.1\t-13.2", - "West Bank\tWE\t34.8\t31.3\t35.6\t32.6", - "Western Sahara\tWI\t-17.2\t20.7\t-8.7\t27.7", - "Yemen\tYM\t41.8\t11.7\t54.5\t19.0", - "Zambia\tZA\t21.9\t-18.1\t33.7\t-8.2", - "Zimbabwe\tZI\t25.2\t-22.5\t33.1\t-15.6", - NULL -}; - - -/* one CtBlock for each discontiguous area per country */ - -typedef struct ctblock { - CharPtr country; /* points to instance in countries list */ - FloatHi minx; - FloatHi miny; - FloatHi maxx; - FloatHi maxy; -} CtBlock, PNTR CtBlockPtr; - -/* one CtGrid for each 10-degree-by-10-degree area touched by a CtBlock */ + fsa = (TextFsaPtr) GetAppProperty (prop); + if (fsa != NULL) return fsa; -typedef struct ctgrid { - CtBlockPtr cbp; - Int2 xindex; - Int2 yindex; -} CtGrid, PNTR CtGridPtr; + fsa = TextFsaNew (); + if (fsa != NULL) { + for (i = 0; bodiesOfWater [i] != NULL; i++) { + TextFsaAdd (fsa, bodiesOfWater [i]); + } + } -/* main structure for country/lat-lon lookup */ + SetAppProperty (prop, (Pointer) fsa); -typedef struct ctset { - ValNodePtr countries; - ValNodePtr blocks; - ValNodePtr grids; - CtBlockPtr PNTR bkarray; /* sorted by country name */ - CtGridPtr PNTR gdarray; /* sorted by geographic index */ - Int4 num_blocks; - Int4 num_grids; -} CtSet, PNTR CtSetPtr; + return fsa; +} -static int LIBCALLBACK SortCbpByCountry ( - VoidPtr ptr1, - VoidPtr ptr2 -) +NLM_EXTERN Boolean StringContainsBodyOfWater (CharPtr str) { - int compare; - CtBlockPtr cbp1, cbp2; + Char ch; + TextFsaPtr fsa; + CharPtr ptr; + Int4 state; + ValNodePtr matches; - if (ptr1 == NULL || ptr2 == NULL) return 0; - cbp1 = *((CtBlockPtr PNTR) ptr1); - cbp2 = *((CtBlockPtr PNTR) ptr2); - if (cbp1 == NULL || cbp2 == NULL) return 0; + if (StringHasNoText (str)) return FALSE; - compare = StringICmp (cbp1->country, cbp2->country); - if (compare > 0) { - return 1; - } else if (compare < 0) { - return -1; + fsa = GetBodiesOfWaterFSA (); + if (fsa == NULL) return FALSE; + + state = 0; + ptr = str; + ch = *ptr; + + while (ch != '\0') { + matches = NULL; + state = TextFsaNext (fsa, state, ch, &matches); + ptr++; + ch = *ptr; + if (ch == '\0' || ch == ',' || ch == ':' || ch == ';' || ch == ' ') { + if (matches != NULL) return TRUE; + state = 0; + } } - return 0; + return FALSE; } -static int CgpGridComp ( - CtGridPtr cgp1, - Int2 xindex, - Int2 yindex +/* BEGINNING OF NEW LATITUDE-LONGITUDE COUNTRY VALIDATION CODE */ + +/* latitude-longitude to country conversion */ + +typedef struct ctyblock { + CharPtr name; /* name of country or country: subregion */ + CharPtr level0; /* just the country */ + CharPtr level1; /* just the subregion */ + Int4 area; /* pixel area for choosing smallest overlapping subregion */ + Int4 minlat; /* minimum latitude */ + Int4 maxlat; /* maximum latitude */ + Int4 minlon; /* minimum longitude */ + Int4 maxlon; /* maximum longitude */ +} CtyBlock, PNTR CtyBlockPtr; + +typedef struct latblock { + CtyBlockPtr landmass; /* points to instance in countries list */ + Int4 lat; /* latitude (integer in 10ths of a degree) */ + Int4 minlon; /* minimum longitude */ + Int4 maxlon; /* maximum longitude */ +} LatBlock, PNTR LatBlockPtr; + +typedef struct ctryset { + ValNodePtr ctyblocks; /* linked list of country blocks */ + CtyBlockPtr PNTR ctyarray; /* country blocks sorted by name */ + Int4 numCtyBlocks; + ValNodePtr latblocks; /* linked list of latitude blocks */ + LatBlockPtr PNTR latarray; /* latitude blocks sorted by latitude then longitude */ + Int4 numLatBlocks; + FloatHi scale; +} CtrySet, PNTR CtrySetPtr; + +static int LIBCALLBACK SortByCountry ( + VoidPtr ptr1, + VoidPtr ptr2 ) { - if (cgp1 == NULL) return 0; + CtyBlockPtr cbp1; + CtyBlockPtr cbp2; + int cmp; + ValNodePtr vnp1; + ValNodePtr vnp2; - if (cgp1->xindex > xindex) { - return 1; - } else if (cgp1->xindex < xindex) { - return -1; - } + if (ptr1 == NULL || ptr2 == NULL) return 0; + vnp1 = *((ValNodePtr PNTR) ptr1); + vnp2 = *((ValNodePtr PNTR) ptr2); + if (vnp1 == NULL || vnp2 == NULL) return 0; + cbp1 = (CtyBlockPtr) vnp1->data.ptrvalue; + cbp2 = (CtyBlockPtr) vnp2->data.ptrvalue; + if (cbp1 == NULL || cbp2 == NULL) return 0; - if (cgp1->yindex > yindex) { + cmp = StringICmp (cbp1->name, cbp2->name); + if (cmp > 0) { return 1; - } else if (cgp1->yindex < yindex) { + } else if (cmp < 0) { return -1; } return 0; } -static int LIBCALLBACK SortCgpByGrid ( +static int LIBCALLBACK SortByLatLon ( VoidPtr ptr1, VoidPtr ptr2 ) { - CtBlockPtr cbp1, cbp2; - CtGridPtr cgp1, cgp2; - int compare; + CtyBlockPtr cbp1; + CtyBlockPtr cbp2; + int cmp; + LatBlockPtr lbp1; + LatBlockPtr lbp2; + ValNodePtr vnp1; + ValNodePtr vnp2; if (ptr1 == NULL || ptr2 == NULL) return 0; - cgp1 = *((CtGridPtr PNTR) ptr1); - cgp2 = *((CtGridPtr PNTR) ptr2); - if (cgp1 == NULL || cgp2 == NULL) return 0; + vnp1 = *((ValNodePtr PNTR) ptr1); + vnp2 = *((ValNodePtr PNTR) ptr2); + if (vnp1 == NULL || vnp2 == NULL) return 0; + lbp1 = (LatBlockPtr) vnp1->data.ptrvalue; + lbp2 = (LatBlockPtr) vnp2->data.ptrvalue; + if (lbp1 == NULL || lbp2 == NULL) return 0; - compare = CgpGridComp (cgp1, cgp2->xindex, cgp2->yindex); - if (compare > 0) { - return 1; - } else if (compare < 0) { + if (lbp1->lat < lbp2->lat) { return -1; - } - - cbp1 = cgp1->cbp; - cbp2 = cgp2->cbp; - if (cbp1 == NULL || cbp2 == NULL) return 0; - - if (cbp1->minx > cbp2->minx) { + } else if (lbp1->lat > lbp2->lat) { return 1; - } else if (cbp1->minx < cbp2->minx) { - return -1; } - if (cbp1->maxx > cbp2->maxx) { + if (lbp1->minlon < lbp2->minlon) { return -1; - } else if (cbp1->maxx < cbp2->maxx) { + } else if (lbp1->minlon > lbp2->minlon) { return 1; } - if (cbp1->miny > cbp2->miny) { + if (lbp1->maxlon < lbp2->maxlon) { return 1; - } else if (cbp1->miny < cbp2->miny) { + } else if (lbp1->maxlon > lbp2->maxlon) { return -1; } - if (cbp1->maxy > cbp2->maxy) { + cbp1 = lbp1->landmass; + cbp2 = lbp2->landmass; + if (cbp1 == NULL || cbp2 == NULL) return 0; + + if (cbp1->area < cbp2->area) { return -1; - } else if (cbp1->maxy < cbp2->maxy) { + } else if (cbp1->area > cbp2->area) { return 1; } - compare = StringICmp (cbp1->country, cbp2->country); - if (compare > 0) { + cmp = StringICmp (cbp1->name, cbp2->name); + if (cmp > 0) { return 1; - } else if (compare < 0) { + } else if (cmp < 0) { return -1; } return 0; } -static Int2 LatLonDegreeToIndex ( - FloatHi coord -) +#define EPSILON 0.001 -{ - double fval; - long ival; +static Int4 ConvertLat (FloatHi lat, FloatHi scale) { + + Int4 val = 0; + + if (lat < -90.0) { + lat = -90.0; + } + if (lat > 90.0) { + lat = 90.0; + } + + if (lat > 0) { + val = (Int4) (lat * scale + EPSILON); + } else { + val = (Int4) (-(-lat * scale + EPSILON)); + } + + return val; +} + +static Int4 ConvertLon (FloatHi lon, FloatHi scale) { + + Int4 val = 0; + + if (lon < -180.0) { + lon = -180.0; + } + if (lon > 180.0) { + lon = 180.0; + } - fval = coord; - fval += 200.0; - fval /= 10.0; - ival = (long) fval; - ival -= 20; + if (lon > 0) { + val = (Int4) (lon * scale + EPSILON); + } else { + val = (Int4) (-(-lon * scale + EPSILON)); + } - return (Int2) ival; + return val; } -static CtSetPtr CtSetDataFree ( - CtSetPtr csp +static CtrySetPtr FreeLatLonCountryData ( + CtrySetPtr csp ) { + CtyBlockPtr cbp; + ValNodePtr vnp; + if (csp == NULL) return NULL; - ValNodeFreeData (csp->countries); - ValNodeFreeData (csp->blocks); - ValNodeFreeData (csp->grids); + for (vnp = csp->ctyblocks; vnp != NULL; vnp = vnp->next) { + cbp = (CtyBlockPtr) vnp->data.ptrvalue; + if (cbp == NULL) continue; + MemFree (cbp->name); + MemFree (cbp->level0); + MemFree (cbp->level1); + } + + ValNodeFreeData (csp->ctyblocks); + ValNodeFreeData (csp->latblocks); - MemFree (csp->bkarray); - MemFree (csp->gdarray); + MemFree (csp->ctyarray); + MemFree (csp->latarray); MemFree (csp); return NULL; } -static Boolean ct_set_not_found = FALSE; +/* Original data source is Natural Earth. Free vector and raster map data @ http://naturalearthdata.com */ + +static CharPtr LatLonCountryReadNextLine ( + FileCache PNTR fcp, + CharPtr buf, + size_t bufsize, + CharPtr PNTR local, + Int4Ptr idxP +) + +{ + Int4 idx; + CharPtr str = NULL; + + if (fcp != NULL) { + str = FileCacheReadLine (fcp, buf, bufsize, NULL); + } + + if (local != NULL && idxP != NULL) { + idx = *idxP; + str = local [idx]; + if (str != NULL) { + StringNCpy_0 (buf, local [idx], bufsize); + str = buf; + } + idx++; + *idxP = idx; + } + + return str; +} -static CtSetPtr GetCtSetLatLonDataInt ( +static CtrySetPtr ReadLatLonCountryData ( CharPtr prop, CharPtr file, CharPtr PNTR local ) { - CtBlockPtr PNTR bkarray; - ValNodePtr blocks = NULL; - FloatHi bounds [4]; - CtBlockPtr cbp; - CtGridPtr cgp; - ValNodePtr countries = NULL; - CharPtr country; - CtSetPtr csp; - FileCache fc; - FILE *fp = NULL; - CtGridPtr PNTR gdarray; - ValNodePtr grids = NULL; - Int2 hix; - Int2 hiy; - Int2 i; - Int2 j = 0; - ValNodePtr lastblk = NULL; - ValNodePtr lastctry = NULL; - ValNodePtr lastgrd = NULL; - Char line [1024]; - Int2 lox; - Int2 loy; - Int4 num; - Char path [PATH_MAX]; - CharPtr ptr; - ErrSev sev; - CharPtr str = NULL; - double val; - ValNodePtr vnp; - CharPtr wrk; - Int2 x; - Int2 y; - - csp = (CtSetPtr) GetAppProperty (prop); - if (csp != NULL) return csp; - - if (ct_set_not_found) return NULL; + Char buf [128]; + Char ch; + CtyBlockPtr cbp = NULL; + CtrySetPtr csp = NULL; + CtyBlockPtr PNTR ctyarray; + ValNodePtr ctyblocks = NULL; + FileCache fc; + FileCache PNTR fcp = NULL; + FILE *fp = NULL; + Int4 i; + Int4 idx = 0; + ValNodePtr lastlatblock = NULL; + ValNodePtr lastctyblock = NULL; + FloatHi latitude; + LatBlockPtr PNTR latarray; + ValNodePtr latblocks = NULL; + LatBlockPtr lbp; + Char line [1024]; + FloatHi maxlongitude; + FloatHi minlongitude; + Char path [PATH_MAX]; + CharPtr ptr; + CharPtr recentCountry = NULL; + FloatHi scale = 0.0; + Boolean scale_not_set = TRUE; + ErrSev sev; + CharPtr str; + Char tmp [128]; + double val; + ValNodePtr vnp; + CharPtr wrk; if (FindPath ("ncbi", "ncbi", "data", path, sizeof (path))) { FileBuildPath (path, NULL, file); @@ -9578,292 +10109,488 @@ static CtSetPtr GetCtSetLatLonDataInt ( if (fp != NULL) { FileCacheSetup (&fc, fp); - str = FileCacheReadLine (&fc, line, sizeof (line), NULL); - } else if (local != NULL) { - str = local [j]; - if (str != NULL) { - StringNCpy_0 (line, str, sizeof (line)); - str = line; + fcp = &fc; + local = NULL; + } else if (local == NULL) { + return NULL; + } + + for (str = LatLonCountryReadNextLine (fcp, line, sizeof (line), local, &idx); + str != NULL; + str = LatLonCountryReadNextLine (fcp, line, sizeof (line), local, &idx)) { + if (StringHasNoText (str)) continue; + + /* if reading from local copy, str cannot be modified, so copy to local buf and reset pointer */ + + StringNCpy_0 (buf, str, sizeof (buf)); + str = buf; + + ch = str [0]; + + /* ignore comment lines starting with hyphen */ + + if (ch == '-') continue; + + /* Scale should be at top of file, after comments */ + + if (IS_DIGIT (ch)) { + if (scale_not_set && sscanf (str, "%lf", &val) == 1) { + scale = (FloatHi) val; + scale_not_set = FALSE; + } + + continue; } - } else return NULL; - while (str != NULL) { - if (StringDoesHaveText (str)) { + /* Country starts on first column */ + + if (IS_ALPHA (ch)) { + + if (scale_not_set) { + scale = 20.0; + scale_not_set = FALSE; + } + ptr = StringChr (str, '\t'); + if (ptr != NULL) { + *ptr = '\0'; + } + + if (StringCmp (str, recentCountry) == 0) continue; + + cbp = (CtyBlockPtr) MemNew (sizeof (CtyBlock)); + if (cbp == NULL) continue; + + TrimSpacesAroundString (str); + cbp->name = StringSave (str); + StringNCpy_0 (tmp, str, sizeof (tmp)); + ptr = StringChr (tmp, ':'); if (ptr != NULL) { *ptr = '\0'; ptr++; - ptr = StringChr (ptr, '\t'); - if (ptr != NULL) { - ptr++; - if (StringDoesHaveText (str) && StringDoesHaveText (ptr)) { + TrimSpacesAroundString (ptr); + if (StringDoesHaveText (ptr)) { + cbp->level1 = StringSave (ptr); + } + TrimSpacesAroundString (tmp); + cbp->level0 = StringSave (tmp); + } else { + TrimSpacesAroundString (str); + cbp->level0 = StringSave (str); + } + cbp->area = 0; + cbp->minlat = INT4_MAX; + cbp->maxlat = INT4_MIN; + cbp->minlon = INT4_MAX; + cbp->maxlon = INT4_MIN; + vnp = ValNodeAddPointer (&lastctyblock, 0, (Pointer) cbp); + if (ctyblocks == NULL) { + ctyblocks = vnp; + } + lastctyblock = vnp; - country = StringSave (str); + recentCountry = cbp->name; - vnp = ValNodeAddPointer (&lastctry, 0, (Pointer) country); - if (countries == NULL) { - countries = vnp; - } - lastctry = vnp; + continue; + } - wrk = StringSave (ptr); - str = wrk; - i = 0; + /* Latitude with longitude min/max pairs on line starting with tab */ - while (StringDoesHaveText (str)) { + if (ch != '\t') continue; + + wrk = StringSave (str + 1); + if (wrk == NULL) continue; + + ptr = StringChr (wrk, '\t'); + if (ptr != NULL) { + *ptr = '\0'; + ptr++; + if (sscanf (wrk, "%lf", &val) == 1) { + latitude = (FloatHi) val; + + str = ptr; + while (StringDoesHaveText (str)) { + ptr = StringChr (str, '\t'); + if (ptr != NULL) { + *ptr = '\0'; + ptr++; + } + if (sscanf (str, "%lf", &val) != 1) { + /* prevent infinite loop if it fails */ + str = NULL; + } else { + minlongitude = (FloatHi) val; + str = ptr; + if (StringDoesHaveText (str)) { ptr = StringChr (str, '\t'); if (ptr != NULL) { *ptr = '\0'; ptr++; } - if (sscanf (str, "%lf", &val) == 1) { - bounds [i] = (FloatHi) val; - i++; - if (i > 3) { - - cbp = (CtBlockPtr) MemNew (sizeof (CtBlock)); - if (cbp != NULL) { - cbp->country = country; - cbp->minx = bounds [0]; - cbp->miny = bounds [1]; - cbp->maxx = bounds [2]; - cbp->maxy = bounds [3]; - - vnp = ValNodeAddPointer (&lastblk, 0, (Pointer) cbp); - if (blocks == NULL) { - blocks = vnp; - } - lastblk = vnp; - - lox = LatLonDegreeToIndex (cbp->minx); - loy = LatLonDegreeToIndex (cbp->miny); - hix = LatLonDegreeToIndex (cbp->maxx); - hiy = LatLonDegreeToIndex (cbp->maxy); - - for (x = lox; x <= hix; x++) { - for (y = loy; y <= hiy; y++) { - cgp = (CtGridPtr) MemNew (sizeof (CtGrid)); - if (cgp != NULL) { - cgp->cbp = cbp; - cgp->xindex = x; - cgp->yindex = y; - - vnp = ValNodeAddPointer (&lastgrd, 0, (Pointer) cgp); - if (grids == NULL) { - grids = vnp; - } - lastgrd = vnp; - } - } - } + maxlongitude = (FloatHi) val; + + lbp = (LatBlockPtr) MemNew (sizeof (LatBlock)); + if (lbp != NULL) { + lbp->landmass = cbp; + lbp->lat = ConvertLat (latitude, scale); + lbp->minlon = ConvertLon (minlongitude, scale); + lbp->maxlon = ConvertLon (maxlongitude, scale); + + vnp = ValNodeAddPointer (&lastlatblock, 0, (Pointer) lbp); + if (latblocks == NULL) { + latblocks = vnp; } - - i = 0; + lastlatblock = vnp; } } - - str = ptr; } - - MemFree (wrk); + str = ptr; } } } } - if (fp != NULL) { - str = FileCacheReadLine (&fc, line, sizeof (line), NULL); - } else { - j++; - str = local [j]; - if (str != NULL) { - StringNCpy_0 (line, str, sizeof (line)); - str = line; - } - } + MemFree (wrk); } if (fp != NULL) { FileClose (fp); } - if (countries == NULL || blocks == NULL || grids == NULL) { - ct_set_not_found = TRUE; + if (ctyblocks == NULL || latblocks == NULL) { return NULL; } - csp = (CtSetPtr) MemNew (sizeof (CtSet)); + csp = (CtrySetPtr) MemNew (sizeof (CtrySet)); if (csp == NULL) return NULL; - /* now populate, heap sort arrays */ - - num = ValNodeLen (blocks); + for (vnp = latblocks; vnp != NULL; vnp = vnp->next) { + lbp = (LatBlockPtr) vnp->data.ptrvalue; + if (lbp == NULL) continue; + cbp = lbp->landmass; + if (cbp == NULL) continue; + cbp->area += lbp->maxlon - lbp->minlon + 1; + if (cbp->minlat > lbp->lat) { + cbp->minlat = lbp->lat; + } + if (cbp->maxlat < lbp->lat) { + cbp->maxlat = lbp->lat; + } + if (cbp->minlon > lbp->minlon) { + cbp->minlon = lbp->minlon; + } + if (cbp->maxlon < lbp->maxlon) { + cbp->maxlon = lbp->maxlon; + } + } - csp->countries = countries; - csp->blocks = blocks; - csp->num_blocks = (Int2) num; + ctyblocks = ValNodeSort (ctyblocks, SortByCountry); + csp->ctyblocks = ctyblocks; + csp->numCtyBlocks = ValNodeLen (ctyblocks); - bkarray = (CtBlockPtr PNTR) MemNew (sizeof (CtBlockPtr) * (num + 1)); - if (bkarray != NULL) { - for (vnp = blocks, i = 0; vnp != NULL; vnp = vnp->next, i++) { - cbp = (CtBlockPtr) vnp->data.ptrvalue; - bkarray [i] = cbp; - } + latblocks = ValNodeSort (latblocks, SortByLatLon); + csp->latblocks = latblocks; + csp->numLatBlocks = ValNodeLen (latblocks); - HeapSort (bkarray, (size_t) num, sizeof (CtBlockPtr), SortCbpByCountry); - csp->bkarray = bkarray; + if (scale_not_set) { + scale = 20.0; } + csp->scale = scale; - num = ValNodeLen (grids); + ctyarray = (CtyBlockPtr PNTR) MemNew (sizeof (CtyBlockPtr) * (csp->numCtyBlocks + 1)); + if (ctyarray != NULL) { + for (vnp = ctyblocks, i = 0; vnp != NULL; vnp = vnp->next, i++) { + cbp = (CtyBlockPtr) vnp->data.ptrvalue; + ctyarray [i] = cbp; + } - csp->num_grids = (Int2) num; + csp->ctyarray = ctyarray; + } - gdarray = (CtGridPtr PNTR) MemNew (sizeof (CtGridPtr) * (num + 1)); - if (gdarray != NULL) { - for (vnp = grids, i = 0; vnp != NULL; vnp = vnp->next, i++) { - cgp = (CtGridPtr) vnp->data.ptrvalue; - gdarray [i] = cgp; + latarray = (LatBlockPtr PNTR) MemNew (sizeof (LatBlockPtr) * (csp->numLatBlocks + 1)); + if (latarray != NULL) { + for (vnp = latblocks, i = 0; vnp != NULL; vnp = vnp->next, i++) { + lbp = (LatBlockPtr) vnp->data.ptrvalue; + latarray [i] = lbp; } - HeapSort (gdarray, (size_t) num, sizeof (CtGridPtr), SortCgpByGrid); - csp->gdarray = gdarray; + csp->latarray = latarray; } - SetAppProperty (prop, (Pointer) csp); +/* +{ + FILE *fp; + fp = FileOpen ("ctrymap.txt", "w"); + if (fp != NULL) { + for (vnp = latblocks; vnp != NULL; vnp = vnp->next) { + lbp = (LatBlockPtr) vnp->data.ptrvalue; + if (lbp == NULL) continue; + cbp = lbp->landmass; + if (cbp == NULL) continue; + fprintf (fp, "%s\t[%d]\t%d\t%d\t%d\n", cbp->name, (int) cbp->area, + (int) lbp->lat, (int) lbp->minlon, (int) lbp->maxlon); + } + FileClose (fp); + } +} +*/ return csp; } -static CtSetPtr GetCtSetLatLonData ( - void -) +static Boolean ctryset_not_found = FALSE; +static Boolean watrset_not_found = FALSE; -{ - return GetCtSetLatLonDataInt ("CountryLatLonList", "country_lat_lon.txt", ctry_lat_lon); -} +extern CharPtr latlon_onedegree []; +extern CharPtr water_onedegree []; -NLM_EXTERN Boolean IsCountryInLatLonList ( - CharPtr country -) +static CtrySetPtr GetLatLonCountryData (void) { - CtBlockPtr cbp; - CtBlockPtr PNTR bkarray; - CtSetPtr csp; - Int2 L, R, mid; + CtrySetPtr csp = NULL; + CharPtr prop = "CountryLatLonData"; - if (StringHasNoText (country)) return FALSE; - - csp = GetCtSetLatLonData (); - if (csp == NULL) return FALSE; + csp = (CtrySetPtr) GetAppProperty (prop); + if (csp != NULL) return csp; - bkarray = csp->bkarray; - if (bkarray == NULL) return FALSE; + if (ctryset_not_found) return NULL; - L = 0; - R = csp->num_blocks - 1; + csp = ReadLatLonCountryData (prop, "lat_lon_country.txt", latlon_onedegree); - while (L < R) { - mid = (L + R) / 2; - cbp = bkarray [mid]; - if (cbp != NULL && StringICmp (cbp->country, country) < 0) { - L = mid + 1; - } else { - R = mid; - } + if (csp == NULL) { + ctryset_not_found = TRUE; + return NULL; } - cbp = bkarray [R]; - if (cbp != NULL && StringICmp (cbp->country, country) == 0) return TRUE; + SetAppProperty (prop, (Pointer) csp); - return FALSE; + return csp; } -static Int2 GetCountryBlockIndex ( - CharPtr country -) +static CtrySetPtr GetLatLonWaterData (void) { - CtBlockPtr cbp; - CtBlockPtr PNTR bkarray; - CtSetPtr csp; - Int2 L, R, mid; - - if (StringHasNoText (country)) return -1; + CtrySetPtr csp = NULL; + CharPtr prop = "WaterLatLonData"; - csp = GetCtSetLatLonData (); - if (csp == NULL) return -1; + csp = (CtrySetPtr) GetAppProperty (prop); + if (csp != NULL) return csp; - bkarray = csp->bkarray; - if (bkarray == NULL) return -1; + if (watrset_not_found) return NULL; - L = 0; - R = csp->num_blocks - 1; + csp = ReadLatLonCountryData (prop, "lat_lon_water.txt", water_onedegree); + + if (csp == NULL) { + watrset_not_found = TRUE; + return NULL; + } + + SetAppProperty (prop, (Pointer) csp); + + return csp; +} + +static CtyBlockPtr GetEntryInLatLonListIndex ( + CharPtr country, + CtrySetPtr csp +) + +{ + CtyBlockPtr PNTR array; + CtyBlockPtr cbp; + Int2 L, R, mid; + + if (StringHasNoText (country)) return NULL; + if (csp == NULL) return NULL; + + array = csp->ctyarray; + if (array == NULL) return NULL; + + L = 0; + R = csp->numCtyBlocks - 1; while (L < R) { mid = (L + R) / 2; - cbp = bkarray [mid]; - if (cbp != NULL && StringICmp (cbp->country, country) < 0) { + cbp = array [mid]; + if (cbp != NULL && cbp->name != NULL && StringICmp (cbp->name, country) < 0) { L = mid + 1; } else { R = mid; } } - if (R < csp->num_blocks) { - cbp = bkarray [R]; - if (cbp == NULL) return -1; - if (StringICmp (cbp->country, country) != 0) return -1; - return R; - } + cbp = array [R]; + if (cbp != NULL && cbp->name != NULL && StringICmp (cbp->name, country) == 0) return cbp; - return -1; + return NULL; } -NLM_EXTERN Boolean CountryBoxesOverlap ( - CharPtr country1, - CharPtr country2 +NLM_EXTERN Boolean CountryIsInLatLonList ( + CharPtr country ) { - CtBlockPtr cbp1, cbp2; - CtBlockPtr PNTR bkarray; - CtSetPtr csp; - Int4 num_blocks; - Int2 R1, R2, x1, x2; + CtyBlockPtr cbp; + CtrySetPtr csp; - R1 = GetCountryBlockIndex (country1); - R2 = GetCountryBlockIndex (country2); + if (StringHasNoText (country)) return FALSE; + csp = GetLatLonCountryData (); + if (csp == NULL) return FALSE; - if (R1 < 0 || R2 < 0) return FALSE; + cbp = GetEntryInLatLonListIndex (country, csp); + if (cbp != NULL && cbp->name != NULL && StringICmp (cbp->name, country) == 0) return TRUE; - csp = GetCtSetLatLonData (); + return FALSE; +} + +NLM_EXTERN Boolean IsCountryInLatLonList ( + CharPtr country +) + +{ + return CountryIsInLatLonList (country); +} + +NLM_EXTERN Boolean WaterIsInLatLonList ( + CharPtr country +) + +{ + CtyBlockPtr cbp; + CtrySetPtr csp; + + if (StringHasNoText (country)) return FALSE; + csp = GetLatLonWaterData (); if (csp == NULL) return FALSE; - num_blocks = csp->num_blocks; - bkarray = csp->bkarray; - if (bkarray == NULL) return FALSE; + cbp = GetEntryInLatLonListIndex (country, csp); + if (cbp != NULL && cbp->name != NULL && StringICmp (cbp->name, country) == 0) return TRUE; - for (x1 = R1; x1 < num_blocks; x1++) { - cbp1 = bkarray [x1]; - if (cbp1 == NULL) return FALSE; - if (StringICmp (cbp1->country, country1) != 0) break; + return FALSE; +} - for (x2 = R2; x2 < num_blocks; x2++) { - cbp2 = bkarray [x2]; - if (cbp2 == NULL) return FALSE; - if (StringICmp (cbp2->country, country2) != 0) break; +static int LatLonCmp ( + LatBlockPtr lbp, + Int2 latitude +) - if (cbp1->maxx >= cbp2->minx && cbp1->minx <= cbp2->maxx) { - if (cbp1->maxy >= cbp2->miny && cbp1->miny <= cbp2->maxy) return TRUE; - } +{ + if (lbp == NULL) return 0; + + if (lbp->lat < latitude) { + return -1; + } else if (lbp->lat > latitude) { + return 1; + } + + return 0; +} + +static Int4 GetLatLonIndex ( + CtrySetPtr csp, + LatBlockPtr PNTR array, + Int2 latitude +) + +{ + LatBlockPtr lbp; + Int4 L, R, mid; + + if (csp == NULL || array == NULL) return 0; + + L = 0; + R = csp->numLatBlocks - 1; + + while (L < R) { + mid = (L + R) / 2; + lbp = array [mid]; + if (lbp != NULL && LatLonCmp (lbp, latitude) < 0) { + L = mid + 1; + } else { + R = mid; } } + return R; +} + +static Boolean SubregionStringICmp ( + CharPtr region, + CharPtr country +) + +{ + Char possible [256]; + CharPtr ptr; + + if (StringHasNoText (region) || StringHasNoText (country)) return FALSE; + StringNCpy_0 (possible, region, sizeof (possible)); + ptr = StringChr (possible, ':'); + if (ptr == NULL) return FALSE; + *ptr = '\0'; + if (StringICmp (possible, country) == 0) return TRUE; + return FALSE; +} + +static Boolean RegionContainsLatLon ( + CharPtr country, + FloatHi lat, + FloatHi lon, + CtrySetPtr csp +) + +{ + LatBlockPtr PNTR array; + CtyBlockPtr cbp; + Int4 latitude; + Int4 longitude; + LatBlockPtr lbp; + Int4 R; + + if (StringHasNoText (country)) return FALSE; + if (csp == NULL) return FALSE; + + array = csp->latarray; + if (array == NULL) return FALSE; + + latitude = ConvertLat (lat, csp->scale); + longitude = ConvertLon (lon, csp->scale); + + for (R = GetLatLonIndex (csp, array, latitude); R < csp->numLatBlocks; R++) { + lbp = array [R]; + if (lbp == NULL) break; + if (latitude != lbp->lat) break; + + if (longitude < lbp->minlon) continue; + if (longitude > lbp->maxlon) continue; + + cbp = lbp->landmass; + if (cbp == NULL) continue; + if (StringICmp (cbp->name, country) == 0) return TRUE; + if (SubregionStringICmp (cbp->name, country)) return TRUE; + } + return FALSE; } +NLM_EXTERN Boolean CountryContainsLatLon ( + CharPtr country, + FloatHi lat, + FloatHi lon +) + +{ + CtrySetPtr csp; + + if (StringHasNoText (country)) return FALSE; + + csp = GetLatLonCountryData (); + if (csp == NULL) return FALSE; + + return RegionContainsLatLon (country, lat, lon, csp); +} + NLM_EXTERN Boolean TestLatLonForCountry ( CharPtr country, FloatHi lat, @@ -9871,41 +10598,153 @@ NLM_EXTERN Boolean TestLatLonForCountry ( ) { - CtBlockPtr cbp; - CtBlockPtr PNTR bkarray; - CtSetPtr csp; - Int2 L, R, mid; + return CountryContainsLatLon (country, lat, lon); +} + +NLM_EXTERN Boolean WaterContainsLatLon ( + CharPtr country, + FloatHi lat, + FloatHi lon +) + +{ + CtrySetPtr csp; if (StringHasNoText (country)) return FALSE; - csp = GetCtSetLatLonData (); + csp = GetLatLonWaterData (); if (csp == NULL) return FALSE; - bkarray = csp->bkarray; - if (bkarray == NULL) return FALSE; + return RegionContainsLatLon (country, lat, lon, csp); +} - L = 0; - R = csp->num_blocks - 1; +static Boolean NewLatLonCandidateIsBetter ( + CharPtr country, + CharPtr province, + CtyBlockPtr best, + CtyBlockPtr cbp, + Boolean newer_is_smaller +) - while (L < R) { - mid = (L + R) / 2; - cbp = bkarray [mid]; - if (cbp != NULL && StringICmp (cbp->country, country) < 0) { - L = mid + 1; - } else { - R = mid; +{ + if (cbp == NULL) return FALSE; + if (best == NULL) return TRUE; + + /* if no preferred country, just look for smallest area */ + if (country == NULL) { + return newer_is_smaller; + } + + /* if match to preferred country */ + if (StringICmp (country, cbp->level0) == 0) { + + /* if best was not preferred country, take new match */ + if (StringICmp (country, best->level0) != 0) return TRUE; + + /* if match to preferred province */ + if (province != NULL && StringICmp (province, cbp->level1) == 0) { + + /* if best was not preferred province, take new match */ + if (StringICmp (province, best->level1) != 0) return TRUE; } + + /* if both match province, or neither does, or no preferred province, take smallest */ + return newer_is_smaller; } - while (R < csp->num_blocks) { - cbp = bkarray [R]; - if (cbp == NULL) return FALSE; - if (StringICmp (cbp->country, country) != 0) return FALSE; - if (lon >= cbp->minx && lat >= cbp->miny && lon <= cbp->maxx && lat <= cbp->maxy) return TRUE; - R++; + /* if best matches preferred country, keep */ + if (StringICmp (country, best->level0) == 0) return FALSE; + + /* otherwise take smallest */ + return newer_is_smaller; +} + +static CtyBlockPtr LookupRegionByLatLon ( + FloatHi lat, + FloatHi lon, + CharPtr country, + CharPtr province, + CtrySetPtr csp +) + +{ + LatBlockPtr PNTR array; + CtyBlockPtr cbp, best = NULL; + Int4 latitude; + Int4 longitude; + LatBlockPtr lbp; + Int4 R; + + if (csp == NULL) return NULL; + + array = csp->latarray; + if (array == NULL) return NULL; + + latitude = ConvertLat (lat, csp->scale); + longitude = ConvertLon (lon, csp->scale); + + for (R = GetLatLonIndex (csp, array, latitude); R < csp->numLatBlocks; R++) { + lbp = array [R]; + if (lbp == NULL) break; + if (latitude != lbp->lat) break; + + if (longitude < lbp->minlon) continue; + if (longitude > lbp->maxlon) continue; + + cbp = lbp->landmass; + if (cbp == NULL) continue; + + if (best == NULL || NewLatLonCandidateIsBetter (country, province, best, cbp, (Boolean) (cbp->area < best->area))) { + best = cbp; + } } - return FALSE; + return best; +} + +static CtyBlockPtr GuessCountryByLatLon ( + FloatHi lat, + FloatHi lon, + CharPtr country, + CharPtr province +) + +{ + CtrySetPtr csp; + + csp = GetLatLonCountryData (); + if (csp == NULL) return NULL; + + return LookupRegionByLatLon (lat, lon, country, province, csp); +} + +static CtyBlockPtr GuessWaterByLatLon ( + FloatHi lat, + FloatHi lon, + CharPtr country +) + +{ + CtrySetPtr csp; + + csp = GetLatLonWaterData (); + if (csp == NULL) return NULL; + + return LookupRegionByLatLon (lat, lon, country, NULL, csp); +} + +NLM_EXTERN CharPtr LookupCountryByLatLon ( + FloatHi lat, + FloatHi lon +) + +{ + CtyBlockPtr cbp; + + cbp = GuessCountryByLatLon (lat, lon, NULL, NULL); + if (cbp == NULL) return NULL; + + return cbp->name; } NLM_EXTERN CharPtr GuessCountryForLatLon ( @@ -9914,138 +10753,550 @@ NLM_EXTERN CharPtr GuessCountryForLatLon ( ) { - CtBlockPtr cbp; - CtGridPtr cgp; - CharPtr country = NULL; - CtSetPtr csp; - CtGridPtr PNTR gdarray; - Int2 L, R, mid; - Int2 x; - Int2 y; + return LookupCountryByLatLon (lat, lon); +} + +NLM_EXTERN CharPtr LookupWaterByLatLon ( + FloatHi lat, + FloatHi lon +) + +{ + CtyBlockPtr cbp; + + cbp = GuessWaterByLatLon (lat, lon, NULL); + if (cbp == NULL) return NULL; + + return cbp->name; +} + +NLM_EXTERN FloatHi CountryDataScaleIs (void) + +{ + CtrySetPtr csp; + + csp = GetLatLonCountryData (); + if (csp == NULL) return 0.0; + + return csp->scale; +} + +NLM_EXTERN FloatHi WaterDataScaleIs (void) + +{ + CtrySetPtr csp; + + csp = GetLatLonWaterData (); + if (csp == NULL) return 0.0; + + return csp->scale; +} + + +static Boolean RegionExtremesOverlap ( + CharPtr first, + CharPtr second, + CtrySetPtr csp +) + +{ + CtyBlockPtr cbp1, cbp2; + + if (StringHasNoText (first) || StringHasNoText (second)) return FALSE; + if (csp == NULL) return FALSE; + + cbp1 = GetEntryInLatLonListIndex (first, csp); + if (cbp1 == NULL || cbp1->name == NULL || StringICmp (cbp1->name, first) != 0) return FALSE; + + cbp2 = GetEntryInLatLonListIndex (second, csp); + if (cbp2 == NULL || cbp2->name == NULL || StringICmp (cbp2->name, second) != 0) return FALSE; + + if (cbp1->minlat > cbp2->maxlat) return FALSE; + if (cbp2->minlat > cbp1->maxlat) return FALSE; + if (cbp1->minlon > cbp2->maxlon) return FALSE; + if (cbp2->minlon > cbp1->maxlon) return FALSE; + + return TRUE; +} + +NLM_EXTERN Boolean CountryExtremesOverlap ( + CharPtr first, + CharPtr second +) + +{ + CtrySetPtr csp; + + if (StringHasNoText (first) || StringHasNoText (second)) return FALSE; + csp = GetLatLonCountryData (); + if (csp == NULL) return FALSE; + + return RegionExtremesOverlap (first, second, csp); +} + +NLM_EXTERN Boolean CountryBoxesOverlap ( + CharPtr country1, + CharPtr country2 +) + +{ + return CountryExtremesOverlap (country1, country2); +} + +NLM_EXTERN Boolean WaterExtremesOverlap ( + CharPtr first, + CharPtr second +) + +{ + CtrySetPtr csp; + + if (StringHasNoText (first) || StringHasNoText (second)) return FALSE; + csp = GetLatLonWaterData (); + if (csp == NULL) return FALSE; + + return RegionExtremesOverlap (first, second, csp); +} + +/* +Distance on a spherical surface calculation adapted from +http://www.linuxjournal.com/magazine/ +work-shell-calculating-distance-between-two-latitudelongitude-points +*/ + +#define EARTH_RADIUS 6371.0 /* average radius of non-spherical earth in kilometers */ +#define CONST_PI 3.14159265359 + +static double DegreesToRadians ( + FloatHi degrees +) + +{ + return (degrees * (CONST_PI / 180.0)); +} + +static FloatHi DistanceOnGlobe ( + FloatHi latA, + FloatHi lonA, + FloatHi latB, + FloatHi lonB +) + +{ + double lat1, lon1, lat2, lon2; + double dLat, dLon, a, c; + + lat1 = DegreesToRadians (latA); + lon1 = DegreesToRadians (lonA); + lat2 = DegreesToRadians (latB); + lon2 = DegreesToRadians (lonB); + + dLat = lat2 - lat1; + dLon = lon2 - lon1; + + a = sin (dLat / 2) * sin (dLat / 2) + + cos (lat1) * cos (lat2) * sin (dLon / 2) * sin (dLon / 2); + c = 2 * atan2 (sqrt (a), sqrt (1 - a)); + + return (FloatHi) (EARTH_RADIUS * c); +} + +static FloatHi ErrorDistance ( + FloatHi latA, + FloatHi lonA, + FloatHi scale) +{ + double lat1, lon1, lat2, lon2; + double dLat, dLon, a, c; + + lat1 = DegreesToRadians (latA); + lon1 = DegreesToRadians (lonA); + lat2 = DegreesToRadians (latA + (1.0 / scale)); + lon2 = DegreesToRadians (lonA + (1.0 / scale)); + + dLat = lat2 - lat1; + dLon = lon2 - lon1; + + a = sin (dLat / 2) * sin (dLat / 2) + + cos (lat1) * cos (lat2) * sin (dLon / 2) * sin (dLon / 2); + c = 2 * atan2 (sqrt (a), sqrt (1 - a)); + + return (FloatHi) (EARTH_RADIUS * c); + +} + + +static CtyBlockPtr RegionClosestToLatLon ( + FloatHi lat, + FloatHi lon, + FloatHi range, + FloatHi PNTR distanceP, + CtrySetPtr csp +) + +{ + LatBlockPtr PNTR array; + CtyBlockPtr cbp, best = NULL; + FloatHi closest = EARTH_RADIUS * CONST_PI * 2; + FloatHi delta; + Int4 latitude; + Int4 longitude; + Int4 maxDelta; + LatBlockPtr lbp; + Int4 R; + Int4 x; + Int4 y; + Boolean is_geographically_better; + + if (distanceP != NULL) { + *distanceP = 0.0; + } - csp = GetCtSetLatLonData (); if (csp == NULL) return NULL; - gdarray = csp->gdarray; - if (gdarray == NULL) return NULL; + array = csp->latarray; + if (array == NULL) return NULL; - L = 0; - R = csp->num_grids - 1; + latitude = ConvertLat (lat, csp->scale); + longitude = ConvertLon (lon, csp->scale); - x = LatLonDegreeToIndex (lon); - y = LatLonDegreeToIndex (lat); + maxDelta = (Int4) (range * csp->scale + EPSILON); - while (L < R) { - mid = (L + R) / 2; - cgp = gdarray [mid]; - if (cgp != NULL && CgpGridComp (cgp, x, y) < 0) { - L = mid + 1; + for (R = GetLatLonIndex (csp, array, latitude - maxDelta); R < csp->numLatBlocks; R++) { + lbp = array [R]; + if (lbp == NULL) break; + if (latitude + maxDelta < lbp->lat) break; + + if (longitude < lbp->minlon - maxDelta) continue; + if (longitude > lbp->maxlon + maxDelta) continue; + + cbp = lbp->landmass; + if (cbp == NULL) continue; + + if (longitude < lbp->minlon) { + x = lbp->minlon; + } else if (longitude > lbp->maxlon) { + x = lbp->maxlon; } else { - R = mid; + x = longitude; + } + + y = lbp->lat; + + delta = DistanceOnGlobe (lat, lon, (FloatHi) (y / csp->scale), (FloatHi) (x / csp->scale)); + + is_geographically_better = FALSE; + if (delta < closest) { + is_geographically_better = TRUE; + } else if (delta - closest < 0.000001) { + if (best == NULL || cbp->area < best->area) { + is_geographically_better = TRUE; + } + } + + if (best == NULL || NewLatLonCandidateIsBetter (NULL, NULL, best, cbp, is_geographically_better)) { + best = cbp; + closest = delta; } } - while (R < csp->num_grids) { - cgp = gdarray [R]; - if (cgp == NULL) return country; - if (cgp->xindex != x || cgp->yindex != y) return country; - cbp = cgp->cbp; - if (cbp == NULL) return country; - if (lon >= cbp->minx && lat >= cbp->miny && lon <= cbp->maxx && lat <= cbp->maxy) { - country = cbp->country; + if (best != NULL) { + if (distanceP != NULL) { + *distanceP = closest; } - R++; } - return country; + return best; } +static CtyBlockPtr NearestCountryByLatLon ( + FloatHi lat, + FloatHi lon, + FloatHi range, + FloatHi PNTR distanceP +) -static CharPtr bodiesOfWater [] = { - "Bay", - "Canal", - "Channel", - "Coastal", - "Cove", - "Estuary", - "Fjord", - "Freshwater", - "Gulf", - "Harbor", - "Inlet", - "Lagoon", - "Lake", - "Narrows", - "Ocean", - "Offshore", - "Passage", - "River", - "Sea", - "Seawater", - "Sound", - "Strait", - "Water", - "Waters", - NULL -}; +{ + CtrySetPtr csp; -static TextFsaPtr GetBodiesOfWaterFSA (void) + csp = GetLatLonCountryData (); + if (csp == NULL) return NULL; + return RegionClosestToLatLon (lat, lon, range, distanceP, csp); +} + +static CtyBlockPtr NearestWaterByLatLon ( + FloatHi lat, + FloatHi lon, + FloatHi range, + FloatHi PNTR distanceP +) { - TextFsaPtr fsa; - Int2 i; - CharPtr prop = "BodiesOfWaterFSA"; + CtrySetPtr csp; - fsa = (TextFsaPtr) GetAppProperty (prop); - if (fsa != NULL) return fsa; + csp = GetLatLonWaterData (); + if (csp == NULL) return NULL; - fsa = TextFsaNew (); - if (fsa != NULL) { - for (i = 0; bodiesOfWater [i] != NULL; i++) { - TextFsaAdd (fsa, bodiesOfWater [i]); + return RegionClosestToLatLon (lat, lon, range, distanceP, csp); +} + +NLM_EXTERN CharPtr CountryClosestToLatLon ( + FloatHi lat, + FloatHi lon, + FloatHi range, + FloatHi PNTR distanceP +) + +{ + CtyBlockPtr cbp; + + cbp = NearestCountryByLatLon (lat, lon, range, distanceP); + if (cbp == NULL) return NULL; + + return cbp->name; +} + +NLM_EXTERN CharPtr WaterClosestToLatLon ( + FloatHi lat, + FloatHi lon, + FloatHi range, + FloatHi PNTR distanceP +) + +{ + CtyBlockPtr cbp; + + cbp = NearestWaterByLatLon (lat, lon, range, distanceP); + if (cbp == NULL) return NULL; + + return cbp->name; +} + +static CtyBlockPtr RegionIsNearLatLon ( + CharPtr country, + CharPtr province, + FloatHi lat, + FloatHi lon, + FloatHi range, + FloatHi PNTR distanceP, + CtrySetPtr csp +) + +{ + LatBlockPtr PNTR array; + CtyBlockPtr cbp, best = NULL; + FloatHi closest = EARTH_RADIUS * CONST_PI * 2; + FloatHi delta; + Int4 latitude; + Int4 longitude; + Int4 maxDelta; + LatBlockPtr lbp; + Int4 R; + Int4 x; + Int4 y; + + if (distanceP != NULL) { + *distanceP = 0.0; + } + + if (StringHasNoText (country)) return NULL; + if (csp == NULL) return NULL; + + array = csp->latarray; + if (array == NULL) return NULL; + + latitude = ConvertLat (lat, csp->scale); + longitude = ConvertLon (lon, csp->scale); + + maxDelta = (Int4) (range * csp->scale + EPSILON); + + for (R = GetLatLonIndex (csp, array, latitude - maxDelta); R < csp->numLatBlocks; R++) { + lbp = array [R]; + if (lbp == NULL) break; + if (latitude + maxDelta < lbp->lat) break; + + if (longitude < lbp->minlon - maxDelta) continue; + if (longitude > lbp->maxlon + maxDelta) continue; + + cbp = lbp->landmass; + if (cbp == NULL) continue; + + if (StringICmp (country, cbp->level0) != 0) continue; + if (/* province != NULL && */ StringICmp (province, cbp->level1) != 0) continue; + + if (longitude < lbp->minlon) { + x = lbp->minlon; + } else if (longitude > lbp->maxlon) { + x = lbp->maxlon; + } else { + x = longitude; + } + + y = lbp->lat; + + delta = DistanceOnGlobe (lat, lon, (FloatHi) (y / csp->scale), (FloatHi) (x / csp->scale)); + + if (best == NULL || delta < closest) { + best = cbp; + closest = delta; } } - SetAppProperty (prop, (Pointer) fsa); + if (best != NULL) { + if (distanceP != NULL) { + *distanceP = closest; + } + } + + return best; +} + +static CtyBlockPtr CountryToLatLonDistance ( + CharPtr country, + CharPtr province, + FloatHi lat, + FloatHi lon, + FloatHi range, + FloatHi PNTR distanceP +) + +{ + CtrySetPtr csp; + + csp = GetLatLonCountryData (); + if (csp == NULL) return NULL; + + return RegionIsNearLatLon (country, province, lat, lon, range, distanceP, csp); +} + +static CtyBlockPtr WaterToLatLonDistance ( + CharPtr country, + FloatHi lat, + FloatHi lon, + FloatHi range, + FloatHi PNTR distanceP +) + +{ + CtrySetPtr csp; + + csp = GetLatLonWaterData (); + if (csp == NULL) return NULL; + + return RegionIsNearLatLon (country, NULL, lat, lon, range, distanceP, csp); +} + +NLM_EXTERN Boolean CountryIsNearLatLon ( + CharPtr country, + FloatHi lat, + FloatHi lon, + FloatHi range, + FloatHi PNTR distanceP +) + +{ + CtyBlockPtr cbp; + + cbp = CountryToLatLonDistance (country, NULL, lat, lon, range, distanceP); + if (cbp == NULL) return FALSE; + + return TRUE; +} + +NLM_EXTERN Boolean WaterIsNearLatLon ( + CharPtr country, + FloatHi lat, + FloatHi lon, + FloatHi range, + FloatHi PNTR distanceP +) + +{ + CtyBlockPtr cbp; + + cbp = WaterToLatLonDistance (country, lat, lon, range, distanceP); + if (cbp == NULL) return FALSE; + + return TRUE; +} + +/* +static void WriteLatLonRegionData ( + CtrySetPtr csp, + FILE* fp +) + +{ + Char buf [150]; + CtyBlockPtr cbp; + LatBlockPtr lbp; + ValNodePtr vnp; + + if (csp == NULL || fp == NULL) return; + + for (vnp = csp->latblocks; vnp != NULL; vnp = vnp->next) { + lbp = (LatBlockPtr) vnp->data.ptrvalue; + if (lbp == NULL) { + fprintf (fp, "NULL LatBlockPtr\n"); + continue; + } + cbp = lbp->landmass; + if (cbp == NULL) { + fprintf (fp, "NULL CtyBlockPtr\n"); + continue; + } + + if (StringHasNoText (cbp->name)) { + fprintf (fp, "NULL cbp->name\n"); + continue; + } + + StringNCpy_0 (buf, cbp->name, 50); + StringCat (buf, " "); + buf [50] = '\0'; - return fsa; + fprintf (fp, "%s %4d : %4d .. %4d\n", buf, (int) lbp->lat, (int) lbp->minlon, (int) lbp->maxlon); + } + + fprintf (fp, "\n\n"); } -NLM_EXTERN Boolean StringContainsBodyOfWater (CharPtr str) +static void TestLatLonCountryData (void) { - Char ch; - TextFsaPtr fsa; - CharPtr ptr; - Int4 state; - ValNodePtr matches; + CtrySetPtr csp; + FILE *fp; - if (StringHasNoText (str)) return FALSE; + fp = FileOpen ("stdout", "w"); + if (fp == NULL) { + Message (MSG_OK, "Unable to open output file"); + return; + } - fsa = GetBodiesOfWaterFSA (); - if (fsa == NULL) return FALSE; + csp = GetLatLonCountryData (); + if (csp == NULL) { + fprintf (fp, "GetLatLonCountryData failed\n"); + FileClose (fp); + return; + } - state = 0; - ptr = str; - ch = *ptr; + WriteLatLonRegionData (csp, fp); - while (ch != '\0') { - matches = NULL; - state = TextFsaNext (fsa, state, ch, &matches); - ptr++; - ch = *ptr; - if (ch == '\0' || ch == ',' || ch == ':' || ch == ';' || ch == ' ') { - if (matches != NULL) return TRUE; - state = 0; - } + csp = GetLatLonWaterData (); + if (csp == NULL) { + fprintf (fp, "GetLatLonWaterData failed\n"); + FileClose (fp); + return; } - return FALSE; -} - + WriteLatLonRegionData (csp, fp); + FileClose (fp); +} +*/ +/* END OF NEW LATITUDE-LONGITUDE COUNTRY VALIDATION CODE */ static Boolean StringListIsUnique (ValNodePtr list) @@ -10143,6 +11394,7 @@ static Boolean PrimerSeqIsValid (ValidStructPtr vsp, CharPtr name, Char PNTR bad ValNodePtr matches; CharPtr ptr; Int4 state; + Boolean first; if (badch != NULL) { *badch = '\0'; @@ -10175,12 +11427,14 @@ static Boolean PrimerSeqIsValid (ValidStructPtr vsp, CharPtr name, Char PNTR bad if (ch == '<') { state = 0; matches = NULL; - while (ch != '\0' && ch != '>') { + first = TRUE; + while (ch != '\0' && ch != '>' && (first || ch != '<')) { state = TextFsaNext (fsa, state, ch, &matches); ptr++; ch = *ptr; + first = FALSE; } - if (ch != '>') { + if (ch != '>' || ch == '<') { if (badch != NULL) { *badch = ch; } @@ -10740,8 +11994,14 @@ static void ValidateOrgModVoucher (ValidStructPtr vsp, OrgModPtr mod) return; } - /* ignore personal collections */ - if (StringNICmp (inst, "personal", 8) == 0) return; + /* previously ignored personal collections, now complain if name missing */ + if (StringNICmp (inst, "personal", 8) == 0) { + if (StringICmp (inst, "personal") == 0 && StringLen (str) > 0) { + ValidErr (vsp, SEV_WARNING, ERR_SEQ_DESCR_MissingPersonalCollectionName, + "Personal collection does not have name of collector"); + } + return; + } len1 = StringLen (inst); len2 = StringLen (str); @@ -10996,96 +12256,793 @@ static CharPtr sgml_strings [] = { NULL }; -static void InitializeSgmlStringsFSA (ValidStructPtr vsp) +static void InitializeSgmlStringsFSA (ValidStructPtr vsp) + +{ + Int2 i; + + vsp->sgmlStrings = TextFsaNew (); + for (i = 0; sgml_strings [i] != NULL; i++) { + TextFsaAdd (vsp->sgmlStrings, sgml_strings [i]); + } +} + +static Boolean StringHasSgml (ValidStructPtr vsp, CharPtr str) + +{ + Int2 ascii_len; + Char buf [256]; + Char ch; + TextFsaPtr fsa; + ValNodePtr matches; + Boolean not_sgml; + CharPtr ptr; + ErrSev sev; + Int4 state; + + if (StringHasNoText (str)) return FALSE; + if (StringChr (str, '&') == NULL) return FALSE; + + if (vsp == NULL) return FALSE; + if (vsp->sgmlStrings == NULL) { + InitializeSgmlStringsFSA (vsp); + } + fsa = vsp->sgmlStrings; + if (fsa == NULL) return FALSE; + + not_sgml = TRUE; + state = 0; + matches = NULL; + for (ptr = str, ch = *ptr; ch != '\0'; ptr++, ch = *ptr) { + state = TextFsaNext (fsa, state, ch, &matches); + if (matches != NULL) { + not_sgml = FALSE; + } + } + if (not_sgml) return FALSE; + + sev = ErrSetMessageLevel (SEV_REJECT); + ascii_len = Sgml2AsciiLen (str); + if (ascii_len + 2 >= sizeof (buf)) { + ErrSetMessageLevel (sev); + return FALSE; + } + + buf [0] = '\0'; + Sgml2Ascii (str, buf, ascii_len + 1); + ErrSetMessageLevel (sev); + + if (StringHasNoText (buf)) return FALSE; + if (StringCmp (str, buf) == 0) return FALSE; + + return TRUE; +} + +static CharPtr valid_sex_values [] = { + "female", + "male", + "hermaphrodite", + "unisexual", + "bisexual", + "asexual", + "monoecious", + "monecious", + "dioecious", + "diecious", + NULL +}; + +static Boolean IsValidSexValue (CharPtr str) + +{ + int i; + + if (StringHasNoText (str)) return FALSE; + + for (i = 0; valid_sex_values [i] != NULL; i++) { + if (StringICmp (str, valid_sex_values [i]) == 0) return TRUE; + } + + return FALSE; +} + +static Boolean LatLonInRange ( + FloatHi lat, + FloatHi lon +) + +{ + if (lat < -90.0001 || lat > 90.0001) return FALSE; + if (lon < -180.0001 || lon > 180.0001) return FALSE; + + return TRUE; +} + +static Boolean RegionIsClosestToLatLon ( + CharPtr country, + FloatHi lat, + FloatHi lon, + FloatHi range, + FloatHi PNTR distanceP, + CtrySetPtr csp +) + +{ + LatBlockPtr PNTR array; + CtyBlockPtr cbp; + FloatHi closest = EARTH_RADIUS * CONST_PI * 2; + CharPtr guess = NULL; + FloatHi delta; + Int4 latitude; + Int4 longitude; + Int4 maxDelta; + LatBlockPtr lbp; + Int4 R; + Int4 x; + Int4 y; + + + if (StringHasNoText (country)) return FALSE; + + if (distanceP != NULL) { + *distanceP = 0.0; + } + + if (csp == NULL) return FALSE; + + array = csp->latarray; + if (array == NULL) return FALSE; + + latitude = ConvertLat (lat, csp->scale); + longitude = ConvertLon (lon, csp->scale); + + maxDelta = (Int4) (range * csp->scale + EPSILON); + + for (R = GetLatLonIndex (csp, array, latitude - maxDelta); R < csp->numLatBlocks; R++) { + lbp = array [R]; + if (lbp == NULL) break; + if (latitude + maxDelta < lbp->lat) break; + + if (longitude < lbp->minlon - maxDelta) continue; + if (longitude > lbp->maxlon + maxDelta) continue; + + cbp = lbp->landmass; + if (cbp == NULL) continue; + + if (longitude < lbp->minlon) { + x = lbp->minlon; + } else if (longitude > lbp->maxlon) { + x = lbp->maxlon; + } else { + x = longitude; + } + + y = lbp->lat; + + delta = DistanceOnGlobe (lat, lon, (FloatHi) (y / csp->scale), (FloatHi) (x / csp->scale)); + + if (delta < closest) { + guess = cbp->name; + closest = delta; + } else if (delta == closest) { + if (StringCmp (country, cbp->name) == 0) { + guess = cbp->name; + } + } + } + + if (guess != NULL) { + if (distanceP != NULL) { + *distanceP = closest; + } + } + + if (StringCmp (guess, country) == 0) return TRUE; + + return FALSE; +} + + +static Boolean CountryIsClosestToLatLon ( + CharPtr country, + FloatHi lat, + FloatHi lon, + FloatHi range, + FloatHi PNTR distanceP +) + +{ + CtrySetPtr csp; + + csp = GetLatLonCountryData (); + if (csp == NULL) return FALSE; + + return RegionIsClosestToLatLon (country, lat, lon, range, distanceP, csp); +} + + +static int AdjustAndRoundDistance ( + FloatHi distance, + FloatHi scale +) + +{ + if (scale < 1.1) { + distance += 111.19; + } else if (scale > 19.5 && scale < 20.5) { + distance += 5.56; + } else if (scale > 99.5 && scale < 100.5) { + distance += 1.11; + } + + return (int) (distance + 0.5); +} + +typedef struct latlonmap { + FloatHi lat; + FloatHi lon; + CharPtr fullguess; + CharPtr guesscountry; + CharPtr guessprovince; + CharPtr guesswater; + CharPtr closestfull; + CharPtr closestcountry; + CharPtr closestprovince; + CharPtr closestwater; + CharPtr claimedfull; + int landdistance; + int waterdistance; + int claimeddistance; +} LatLonMap, PNTR LatLonMapPtr; + +static void CalculateLatLonMap ( + FloatHi lat, + FloatHi lon, + CharPtr country, + CharPtr province, + FloatHi scale, + LatLonMapPtr lmp +) + +{ + CtyBlockPtr cbp; + FloatHi landdistance = 0.0, waterdistance = 0.0, claimeddistance = 0.0; + Boolean goodmatch = FALSE; + + if (lmp == NULL) return; + + /* initialize result values */ + MemSet ((Pointer) lmp, 0, sizeof (LatLonMap)); + + lmp->lat = lat; + lmp->lon = lon; + + /* lookup region by coordinates, or find nearest region and calculate distance */ + cbp = GuessCountryByLatLon (lat, lon, country, province); + if (cbp != NULL) { + /* successfully found inside some country */ + lmp->fullguess = cbp->name; + lmp->guesscountry = cbp->level0; + lmp->guessprovince = cbp->level1; + if (StringICmp (country, lmp->guesscountry) == 0 && (province == NULL || StringICmp (province, lmp->guessprovince) == 0)) { + goodmatch = TRUE; + } + } else { + /* not inside a country, check water */ + cbp = GuessWaterByLatLon (lat, lon, country); + if (cbp != NULL) { + /* found inside water */ + lmp->guesswater = cbp->name; + if (StringICmp (country, lmp->guesswater) == 0) { + goodmatch = TRUE; + } + /* + also see if close to land for coastal warning (if country is land) + or proximity message (if country is water) + */ + cbp = NearestCountryByLatLon (lat, lon, 5.0, &landdistance); + if (cbp != NULL) { + lmp->closestfull = cbp->name; + lmp->closestcountry = cbp->level0; + lmp->closestprovince = cbp->level1; + lmp->landdistance = AdjustAndRoundDistance (landdistance, scale); + if (StringICmp (country, lmp->closestcountry) == 0 && (province == NULL || StringICmp (province, lmp->closestprovince) == 0)) { + goodmatch = TRUE; + } + } + } else { + /* may be coastal inlet, area of data insufficiency */ + cbp = NearestCountryByLatLon (lat, lon, 5.0, &landdistance); + if (cbp != NULL) { + lmp->closestfull = cbp->name; + lmp->closestcountry = cbp->level0; + lmp->closestprovince = cbp->level1; + lmp->landdistance = AdjustAndRoundDistance (landdistance, scale); + if (StringICmp (country, lmp->closestcountry) == 0 && (province == NULL || StringICmp (province, lmp->closestprovince) == 0)) { + goodmatch = TRUE; + } + } + cbp = NearestWaterByLatLon (lat, lon, 5.0, &waterdistance); + if (cbp != NULL) { + lmp->closestwater = cbp->level0; + lmp->waterdistance = AdjustAndRoundDistance (waterdistance, scale); + if (StringICmp (country, lmp->closestwater) == 0) { + goodmatch = TRUE; + } + } + } + } + /* if guess is not the provided country or province, calculate distance to claimed country */ + if (! goodmatch) { + cbp = CountryToLatLonDistance (country, province, lat, lon, 5.0, &claimeddistance); + if (cbp != NULL) { + if (claimeddistance < ErrorDistance(lmp->lat, lmp->lon, scale)) { + lmp->guesscountry = country; + lmp->guessprovince = province; + lmp->fullguess = cbp->name; + } else { + lmp->claimedfull = cbp->name; + lmp->claimeddistance = AdjustAndRoundDistance (claimeddistance, scale); + } + } else if (province == NULL) { + cbp = WaterToLatLonDistance (country, lat, lon, 5.0, &claimeddistance); + if (cbp != NULL) { + lmp->claimedfull = cbp->name; + lmp->claimeddistance = AdjustAndRoundDistance (claimeddistance, scale); + } + } + } +} + + +enum { + eLatLonClassify_CountryMatch = 1 , + eLatLonClassify_ProvinceMatch = 2 , + eLatLonClassify_WaterMatch = 4 , + eLatLonClassify_CountryClosest = 8 , + eLatLonClassify_ProvinceClosest = 16 , + eLatLonClassify_WaterClosest = 32 , + eLatLonClassify_Error = 256 +} ELatLonClassify; + + +static Uint4 ClassifyLatLonMap ( + CharPtr fullname, + CharPtr country, + CharPtr province, + LatLonMapPtr lmp +) + +{ + Uint4 rval = 0; + + if (lmp == NULL) return eLatLonClassify_Error; + + /* compare guesses or closest regions to indicated country and province */ + if (lmp->guesscountry != NULL) { + + /* if top level countries match */ + if (StringICmp (country, lmp->guesscountry) == 0) { + rval |= eLatLonClassify_CountryMatch; + /* if both are null, call it a match */ + if (StringICmp (province, lmp->guessprovince) == 0) { + rval |= eLatLonClassify_ProvinceMatch; + } + } + /* if they don't match, do they overlap or are closest? */ + if (!(rval & eLatLonClassify_CountryMatch)) { + if (StringICmp (country, lmp->closestcountry) == 0) { + rval |= eLatLonClassify_CountryClosest; + if (StringICmp (province, lmp->closestprovince) == 0) { + rval |= eLatLonClassify_ProvinceClosest; + } + } + } else if (!(rval & eLatLonClassify_ProvinceMatch) && province != NULL) { + if (StringICmp (province, lmp->closestprovince) == 0) { + rval |= eLatLonClassify_ProvinceClosest; + } + } + } + if (lmp->guesswater != NULL) { + /* was the non-approved body of water correctly indicated? */ + if (StringICmp (country, lmp->guesswater) == 0) { + rval |= eLatLonClassify_WaterMatch; + } else if (StringICmp (country, lmp->closestwater) == 0) { + rval |= eLatLonClassify_WaterClosest; + } + } + if (lmp->closestcountry != NULL && StringICmp (country, lmp->closestcountry) == 0) { + if (lmp->guesscountry == NULL && lmp->guesswater == NULL) { + /* coastal area */ + rval |= eLatLonClassify_CountryMatch; + lmp->guesscountry = lmp->closestcountry; + lmp->fullguess = lmp->closestcountry; + if (lmp->closestprovince != NULL && StringICmp (province, lmp->closestprovince) == 0) { + rval |= eLatLonClassify_ProvinceMatch; + lmp->guessprovince = lmp->closestprovince; + lmp->fullguess = lmp->closestfull; + } + } else { + rval |= eLatLonClassify_CountryClosest; + if (lmp->closestprovince != NULL && StringICmp (province, lmp->closestprovince) == 0) { + rval |= eLatLonClassify_ProvinceClosest; + } + } + } + return rval; +} + + +static void LatLonWaterErrors ( + ValidStructPtr vsp, + LatLonMapPtr lmp, + Uint4 test, + FloatHi neardist, + CharPtr country, + CharPtr province, + CharPtr lat_lon, + CharPtr fullname, + FloatHi scale + ) +{ + CharPtr fmt = "Lat_lon '%s' is closest to %s'%s' at distance %d km, but in water '%s'"; + CharPtr claimed_fmt = "Lat_lon '%s' is closest to %s'%s' at distance %d km, but in water '%s' - claimed region '%s' is at distance %d km"; + + Boolean suppress = FALSE; + CharPtr reportregion; + CharPtr nosubphrase = ""; + CharPtr desphrase = "designated subregion "; + CharPtr subphrase = "another subregion "; + CharPtr phrase = nosubphrase; + Boolean show_claimed = FALSE; + + if (test & (eLatLonClassify_CountryClosest | eLatLonClassify_ProvinceClosest)) { + + if (lmp->landdistance < 22) { + /* for now, will not report */ + /* this is a policy decision */ + suppress = TRUE; + } else if (StringStr (fullname, "Island") != NULL) { + suppress = TRUE; + } + + if (test & eLatLonClassify_ProvinceClosest) { + reportregion = fullname; + phrase = desphrase; + } else { + /* wasn't closest province, so must be closest country */ + if (province != NULL && vsp->testLatLonSubregion) { + phrase = subphrase; + reportregion = lmp->closestfull; + } else { + reportregion = lmp->closestcountry; + } + if (lmp->claimedfull != NULL) { + show_claimed = TRUE; + } + } + + if (!suppress) { + if (show_claimed) { + ValidErr (vsp, SEV_INFO, ERR_SEQ_DESCR_LatLonWater, claimed_fmt, lat_lon, + phrase, reportregion, + lmp->landdistance, lmp->guesswater, + lmp->claimedfull, lmp->claimeddistance); + } else { + ValidErr (vsp, SEV_INFO, ERR_SEQ_DESCR_LatLonWater, + fmt, lat_lon, + phrase, reportregion, + lmp->landdistance, lmp->guesswater); + } + } + + } else if (neardist > 0) { + fmt = "Lat_lon '%s' is in water '%s', '%s' is %d km away"; + ValidErr (vsp, SEV_INFO, ERR_SEQ_DESCR_LatLonWater, fmt, lat_lon, lmp->guesswater, fullname, AdjustAndRoundDistance (neardist, scale)); + } else { + fmt = "Lat_lon '%s' is in water '%s'"; + ValidErr (vsp, SEV_INFO, ERR_SEQ_DESCR_LatLonWater, fmt, lat_lon, lmp->guesswater); + } +} + +static void LatLonLandErrors ( + ValidStructPtr vsp, + LatLonMapPtr lmp, + CharPtr country, + CharPtr province, + CharPtr lat_lon, + CharPtr fullname + ) { - Int2 i; + CharPtr fmt; - vsp->sgmlStrings = TextFsaNew (); - for (i = 0; sgml_strings [i] != NULL; i++) { - TextFsaAdd (vsp->sgmlStrings, sgml_strings [i]); + if (lmp->claimedfull != NULL) { + fmt = "Lat_lon '%s' maps to '%s' instead of '%s' - claimed region '%s' is at distance %d km"; + if (province != NULL) { + if (StringICmp (lmp->guesscountry, country) == 0) { + if (vsp->testLatLonSubregion) { + ValidErr (vsp, SEV_INFO, ERR_SEQ_DESCR_LatLonState, fmt, lat_lon, lmp->fullguess, fullname, lmp->claimedfull, lmp->claimeddistance); + } + } else { + ValidErr (vsp, SEV_INFO, ERR_SEQ_DESCR_LatLonCountry, fmt, lat_lon, lmp->fullguess, fullname, lmp->claimedfull, lmp->claimeddistance); + } + } else { + ValidErr (vsp, SEV_INFO, ERR_SEQ_DESCR_LatLonCountry, fmt, lat_lon, lmp->fullguess, country, lmp->claimedfull, lmp->claimeddistance); + } + } else { + fmt = "Lat_lon '%s' maps to '%s' instead of '%s'"; + if (StringICmp (lmp->guesscountry, country) == 0) { + if (vsp->testLatLonSubregion) { + ValidErr (vsp, SEV_INFO, ERR_SEQ_DESCR_LatLonState, fmt, lat_lon, lmp->fullguess, fullname); + } + } else { + ValidErr (vsp, SEV_INFO, ERR_SEQ_DESCR_LatLonCountry, fmt, lat_lon, lmp->fullguess, fullname); + } } } -static Boolean StringHasSgml (ValidStructPtr vsp, CharPtr str) + +typedef enum { + eLatLonAdjust_none = 0 , + eLatLonAdjust_flip = 1 , + eLatLonAdjust_negate_lat = 2 , + eLatLonAdjust_negate_lon = 4 +} ELatLonAdjust; + +static void NewerValidateCountryLatLon ( + ValidStructPtr vsp, + GatherContextPtr gcp, + CharPtr countryname, + CharPtr lat_lon +) { - Int2 ascii_len; - Char buf [256]; - Char ch; - TextFsaPtr fsa; - ValNodePtr matches; - Boolean not_sgml; + Char buf0 [256], buf1 [256], buf2 [256]; + CharPtr country = NULL, province = NULL, fullname = NULL; + CtrySetPtr csp; + Boolean format_ok = FALSE, lat_in_range = FALSE, lon_in_range = FALSE; + FloatHi lat = 0.0; + FloatHi lon = 0.0; + LatLonMap llm, adjusted; CharPtr ptr; - ErrSev sev; - Int4 state; + FloatHi scale = 1.0; + FloatHi neardist = 0.0; + ELatLonAdjust adjust = eLatLonAdjust_none; + Uint4 test, adjust_test = 0; + CharPtr fmt; - if (StringHasNoText (str)) return FALSE; - if (StringChr (str, '&') == NULL) return FALSE; + if (vsp == NULL || gcp == NULL) return; + if (StringHasNoText (countryname)) return; + if (StringHasNoText (lat_lon)) return; - if (vsp == NULL) return FALSE; - if (vsp->sgmlStrings == NULL) { - InitializeSgmlStringsFSA (vsp); + IsCorrectLatLonFormat (lat_lon, &format_ok, &lat_in_range, &lon_in_range); + if (! format_ok) { + /* may have comma and then altitude, so just get lat_lon component */ + StringNCpy_0 (buf0, lat_lon, sizeof (buf0)); + ptr = StringChr (buf0, ','); + if (ptr != NULL) { + *ptr = '\0'; + lat_lon = buf0; + IsCorrectLatLonFormat (lat_lon, &format_ok, &lat_in_range, &lon_in_range); + } } - fsa = vsp->sgmlStrings; - if (fsa == NULL) return FALSE; - not_sgml = TRUE; - state = 0; - matches = NULL; - for (ptr = str, ch = *ptr; ch != '\0'; ptr++, ch = *ptr) { - state = TextFsaNext (fsa, state, ch, &matches); - if (matches != NULL) { - not_sgml = FALSE; + /* reality checks */ + if (! format_ok) { + /* incorrect lat_lon format should be reported elsewhere */ + return; + } + if (! lat_in_range) { + /* incorrect latitude range should be reported elsewhere */ + return; + } + if (! lon_in_range) { + /* incorrect longitude range should be reported elsewhere */ + return; + } + + if (! ParseLatLon (lat_lon, &lat, &lon)) { + /* report unable to parse lat_lon */ + return; + } + + StringNCpy_0 (buf1, countryname, sizeof (buf1)); + /* trim at comma or semicolon, leaving only country/ocean and possibly state/province */ + ptr = StringChr (buf1, ','); + if (ptr != NULL) { + *ptr = '\0'; + } + ptr = StringChr (buf1, ';'); + if (ptr != NULL) { + *ptr = '\0'; + } + TrimSpacesAroundString (buf1); + if (StringDoesHaveText (buf1)) { + fullname = buf1; + } + + StringNCpy_0 (buf2, buf1, sizeof (buf2)); + /* separate country from state/province */ + ptr = StringChr (buf2, ':'); + if (ptr != NULL) { + if (CountryIsInLatLonList (buf2)) { + /* store province if in data list as subregion of designated country */ + *ptr = '\0'; + ptr++; + TrimSpacesAroundString (ptr); + if (StringDoesHaveText (ptr)) { + province = ptr; + } + } else { + /* otherwise just truncate country at colon, trimming further descriptive information */ + *ptr = '\0'; + ptr++; } } - if (not_sgml) return FALSE; + TrimSpacesAroundString (buf2); + if (StringDoesHaveText (buf2)) { + country = buf2; + } - sev = ErrSetMessageLevel (SEV_REJECT); - ascii_len = Sgml2AsciiLen (str); - if (ascii_len + 2 >= sizeof (buf)) { - ErrSetMessageLevel (sev); - return FALSE; + if (StringHasNoText (country)) { + /* report leading colon without country */ + return; } - buf [0] = '\0'; - Sgml2Ascii (str, buf, ascii_len + 1); - ErrSetMessageLevel (sev); + /* known exceptions - don't even bother calculating any further */ + if (StringCmp (country, "Antarctica") == 0 && lat < -60.0) { + return; + } - if (StringHasNoText (buf)) return FALSE; - if (StringCmp (str, buf) == 0) return FALSE; + if (! CountryIsInLatLonList (country)) { + if (! WaterIsInLatLonList (country)) { + /* report unrecognized country */ + return; + } else { + /* report that it may refer to specific small body of water */ + /* continue to look for nearby country for proximity report */ + /* (do not return) */ + } + } - return TRUE; + csp = GetLatLonCountryData (); + if (csp == NULL) { + /* report unable to find data */ + return; + } + + /* scale (reciprocal of degree resolution) needed for adjusting offshore distance calculation */ + scale = csp->scale; + + /* calculate assignment or proximity by coordinates */ + CalculateLatLonMap (lat, lon, country, province, scale, &llm); + + /* compare indicated country/province to guess/proximate country/water */ + test = ClassifyLatLonMap (fullname, country, province, &llm); + + if (!test && CountryIsNearLatLon(country, lat, lon, 2.0, &neardist) && neardist < 5.0) { + llm.guesscountry = country; + llm.guessprovince = NULL; + test = ClassifyLatLonMap (fullname, country, province, &llm); + } + + if (!test && !CountryIsNearLatLon(country, lat, lon, 20.0, &neardist) && !WaterIsNearLatLon(country, lat, lon, 20.0, &neardist)) { + CalculateLatLonMap (lon, lat, country, province, scale, &adjusted); + adjust_test = ClassifyLatLonMap (fullname, country, province, &adjusted); + if (adjust_test) { + adjust = eLatLonAdjust_flip; + } else { + CalculateLatLonMap (-lat, lon, country, province, scale, &adjusted); + adjust_test = ClassifyLatLonMap (fullname, country, province, &adjusted); + if (adjust_test) { + adjust = eLatLonAdjust_negate_lat; + } else { + CalculateLatLonMap (lat, -lon, country, province, scale, &adjusted); + adjust_test = ClassifyLatLonMap (fullname, country, province, &adjusted); + if (adjust_test) { + adjust = eLatLonAdjust_negate_lon; + } + } + } + + if (adjust_test) { + test = adjust_test; + MemCopy (&llm, &adjusted, sizeof (LatLonMap)); + } + } + + if (adjust) { + if (adjust == eLatLonAdjust_flip) { + ValidErr (vsp, SEV_WARNING, ERR_SEQ_DESCR_LatLonValue, "Latitude and longitude values appear to be exchanged"); + } else if (adjust == eLatLonAdjust_negate_lat) { + if (lat < 0.0) { + ValidErr (vsp, SEV_WARNING, ERR_SEQ_DESCR_LatLonValue, "Latitude should be set to N (northern hemisphere)"); + } else { + ValidErr (vsp, SEV_WARNING, ERR_SEQ_DESCR_LatLonValue, "Latitude should be set to S (southern hemisphere)"); + } + } else if (adjust == eLatLonAdjust_negate_lon) { + if (lon < 0.0) { + ValidErr (vsp, SEV_WARNING, ERR_SEQ_DESCR_LatLonValue, "Longitude should be set to E (eastern hemisphere)"); + } else { + ValidErr (vsp, SEV_WARNING, ERR_SEQ_DESCR_LatLonValue, "Longitude should be set to W (western hemisphere)"); + } + } + } else { + if ((test & eLatLonClassify_CountryMatch) && (test & eLatLonClassify_ProvinceMatch)) { + /* success! nothing to report */ + } else if (test & eLatLonClassify_WaterMatch) { + /* success! nothing to report */ + } else if (test & eLatLonClassify_CountryMatch && province == NULL) { + if (vsp->testLatLonSubregion) { + fmt = "Lat_lon %s is in %s (more specific than %s)"; + ValidErr (vsp, SEV_INFO, ERR_SEQ_DESCR_LatLonState, fmt, lat_lon, llm.fullguess, country); + } + } else if (llm.guesswater != NULL) { + LatLonWaterErrors(vsp, &llm, test, neardist, country, province, lat_lon, fullname, scale); + } else if (llm.guesscountry != NULL) { + LatLonLandErrors (vsp, &llm, country, province, lat_lon, fullname); + } else if (llm.closestcountry != NULL) { + fmt = "Lat_lon '%s' is closest to '%s' instead of '%s'"; + ValidErr (vsp, SEV_INFO, ERR_SEQ_DESCR_LatLonCountry, fmt, lat_lon, llm.closestcountry, fullname); + } else if (llm.closestwater != NULL) { + fmt = "Lat_lon '%s' is closest to '%s' instead of '%s'"; + ValidErr (vsp, SEV_INFO, ERR_SEQ_DESCR_LatLonWater, fmt, lat_lon, llm.closestwater, fullname); + } else { + fmt = "Unable to determine mapping for lat_lon '%s' and country '%s'"; + ValidErr (vsp, SEV_INFO, ERR_SEQ_DESCR_LatLonCountry, fmt, lat_lon, fullname); + } + } } -static CharPtr valid_sex_values [] = { - "female", - "male", - "hermaphrodite", - "unisexual", - "bisexual", - "asexual", - "monoecious", - "monecious", - "dioecious", - "diecious", - NULL + +/* note - special case for sex because it prevents a different message from being displayed, do not list here */ +static const Uint1 sUnexpectedViralSubSourceQualifiers[] = { + SUBSRC_cell_line, + SUBSRC_cell_type, + SUBSRC_tissue_type, + SUBSRC_dev_stage }; -static Boolean IsValidSexValue (CharPtr str) +static const Int4 sNumUnexpectedViralSubSourceQualifiers = sizeof (sUnexpectedViralSubSourceQualifiers) / sizeof (Uint1); -{ - int i; - if (StringHasNoText (str)) return FALSE; +static Boolean IsUnexpectedViralSubSourceQualifier (Uint1 subtype) +{ + Int4 i; + Boolean rval = FALSE; - for (i = 0; valid_sex_values [i] != NULL; i++) { - if (StringICmp (str, valid_sex_values [i]) == 0) return TRUE; + for (i = 0; i < sNumUnexpectedViralSubSourceQualifiers && !rval; i++) { + if (subtype == sUnexpectedViralSubSourceQualifiers[i]) { + rval = TRUE; + } } + return rval; +} - return FALSE; +static const Uint1 sUnexpectedViralOrgModQualifiers[] = { + ORGMOD_breed, + ORGMOD_cultivar, + ORGMOD_specimen_voucher +}; + +static const Int4 sNumUnexpectedViralOrgModQualifiers = sizeof (sUnexpectedViralOrgModQualifiers) / sizeof (Uint1); + + +static Boolean IsUnexpectedViralOrgModQualifier (Uint1 subtype) +{ + Int4 i; + Boolean rval = FALSE; + + for (i = 0; i < sNumUnexpectedViralOrgModQualifiers && !rval; i++) { + if (subtype == sUnexpectedViralOrgModQualifiers[i]) { + rval = TRUE; + } + } + return rval; } + static void ValidateBioSource (ValidStructPtr vsp, GatherContextPtr gcp, BioSourcePtr biop, SeqFeatPtr sfp, ValNodePtr sdp) { Char badch; @@ -11093,24 +13050,23 @@ static void ValidateBioSource (ValidStructPtr vsp, GatherContextPtr gcp, BioSour Boolean bad_frequency; BioseqPtr bsp; BioseqSetPtr bssp; - Char buf [256]; Char ch; Boolean chromconf = FALSE; Int2 chromcount = 0; SubSourcePtr chromosome = NULL; CharPtr countryname = NULL; + CtrySetPtr csp; ValNodePtr db; DbtagPtr dbt; - Boolean format_ok; CharPtr gb_synonym = NULL; Boolean germline = FALSE; CharPtr good; - CharPtr guess = NULL; Boolean has_strain = FALSE; Boolean has_fwd_pcr_seq = FALSE; Boolean has_rev_pcr_seq = FALSE; Boolean has_pcr_name = FALSE; Boolean has_metagenome_source = FALSE; + Boolean has_plasmid = FALSE; Int4 id; Boolean is_env_sample = FALSE; Boolean is_iso_source = FALSE; @@ -11129,11 +13085,7 @@ static void ValidateBioSource (ValidStructPtr vsp, GatherContextPtr gcp, BioSour Boolean is_rf; Boolean is_sc; CharPtr last_db = NULL; - FloatHi lat = 0.0; - FloatHi lon = 0.0; CharPtr lat_lon = NULL; - Boolean lat_in_range; - Boolean lon_in_range; Int2 num_bio_material = 0; Int2 num_culture_collection = 0; Int2 num_specimen_voucher = 0; @@ -11152,18 +13104,17 @@ static void ValidateBioSource (ValidStructPtr vsp, GatherContextPtr gcp, BioSour Int4 primer_len_before; Int4 primer_len_after; ValNodePtr pset; - CharPtr ptr; Boolean rearranged = FALSE; SeqEntryPtr sep; ErrSev sev; SubSourcePtr ssp; CharPtr str; - Boolean strict = TRUE; CharPtr synonym = NULL; - Char tmp [128]; Boolean varietyOK; CharPtr inst1, inst2, id1, id2, coll1, coll2; Char buf1 [512], buf2 [512]; + PCRPrimerPtr ppp; + PCRReactionSetPtr prp; if (vsp->sourceQualTags == NULL) { InitializeSourceQualTags (vsp); @@ -11302,6 +13253,7 @@ static void ValidateBioSource (ValidStructPtr vsp, GatherContextPtr gcp, BioSour ValidErr (vsp, SEV_WARNING, ERR_SEQ_DESCR_BioSourceInconsistency, "Unexpected use of /mating_type qualifier"); } } else if (ssp->subtype == SUBSRC_plasmid_name) { + has_plasmid = TRUE; if (biop->genome != GENOME_plasmid) { ValidErr (vsp, SEV_WARNING, ERR_SEQ_DESCR_BioSourceInconsistency, "Plasmid subsource but not plasmid location"); } @@ -11441,15 +13393,20 @@ static void ValidateBioSource (ValidStructPtr vsp, GatherContextPtr gcp, BioSour ValidErr (vsp, SEV_WARNING, ERR_SEQ_DESCR_BioSourceInconsistency, "bad frequency qualifier value %s", ssp->name); } } - } else if (ssp->subtype == SUBSRC_cell_line && isViral) { - ValidErr (vsp, SEV_WARNING, ERR_SEQ_DESCR_BioSourceInconsistency, "Virus has unexpected cell_line qualifier"); - } else if (ssp->subtype == SUBSRC_cell_type && isViral) { - ValidErr (vsp, SEV_WARNING, ERR_SEQ_DESCR_BioSourceInconsistency, "Virus has unexpected cell_type qualifier"); - } else if (ssp->subtype == SUBSRC_tissue_type && isViral) { - ValidErr (vsp, SEV_WARNING, ERR_SEQ_DESCR_BioSourceInconsistency, "Virus has unexpected tissue_type qualifier"); + } + + if (isViral && IsUnexpectedViralSubSourceQualifier(ssp->subtype)) { + ValidErr (vsp, SEV_WARNING, ERR_SEQ_DESCR_BioSourceInconsistency, "Virus has unexpected %s qualifier", GetSubsourceQualName (ssp->subtype)); } ssp = ssp->next; } + + if (biop->genome == GENOME_plasmid) { + if (! has_plasmid) { + ValidErr (vsp, SEV_WARNING, ERR_SEQ_DESCR_BioSourceInconsistency, "Plasmid location but not plasmid subsource"); + } + } + if (num_country > 1) { ValidErr (vsp, SEV_WARNING, ERR_SEQ_DESCR_MultipleSourceQualifiers, "Multiple country qualifiers present"); } @@ -11470,112 +13427,9 @@ static void ValidateBioSource (ValidStructPtr vsp, GatherContextPtr gcp, BioSour } if (countryname != NULL && lat_lon != NULL) { - IsCorrectLatLonFormat (lat_lon, &format_ok, &lat_in_range, &lon_in_range); - if (! format_ok) { - /* may have comma and then altitude, so just get lat_lon component */ - StringNCpy_0 (tmp, lat_lon, sizeof (tmp)); - ptr = StringChr (tmp, ','); - if (ptr != NULL) { - *ptr = '\0'; - lat_lon = tmp; - IsCorrectLatLonFormat (tmp, &format_ok, &lat_in_range, &lon_in_range); - } - } - if (format_ok && ParseLatLon (lat_lon, &lat, &lon)) { - StringNCpy_0 (buf, countryname, sizeof (buf)); - ptr = StringChr (buf, ':'); - if (ptr != NULL) { - *ptr = '\0'; - strict = FALSE; - } - if (IsCountryInLatLonList (buf)) { - if (TestLatLonForCountry (buf, lat, lon)) { - /* match */ - if (! strict) { - StringNCpy_0 (buf, countryname, sizeof (buf)); - ptr = StringChr (buf, ','); - if (ptr != NULL) { - *ptr = '\0'; - } - ptr = StringChr (buf, ';'); - if (ptr != NULL) { - *ptr = '\0'; - } - if (IsCountryInLatLonList (buf)) { - if (TestLatLonForCountry (buf, lat, lon)) { - /* match */ - } else { - if (vsp->strictLatLonCountry || (vsp->testLatLonSubregion && (! StringContainsBodyOfWater (countryname)))) { - /* passed unqualified but failed qualified country name, report at info level for now */ - guess = GuessCountryForLatLon (lat, lon); - if (StringDoesHaveText (guess)) { - if (CountryBoxesOverlap (buf, guess)) { - if (vsp->indexerVersion) { - ValidErr (vsp, SEV_INFO, ERR_SEQ_DESCR_LatLonAdjacent, - "Lat_lon '%s' MIGHT be in '%s' instead of adjacent '%s' - SHIFT DOUBLE CLICK TO LAUNCH GOOGLE EARTH -", - lat_lon, guess, buf); - } else { - ValidErr (vsp, SEV_INFO, ERR_SEQ_DESCR_LatLonState, - "Lat_lon '%s' MIGHT be in '%s' instead of '%s'", lat_lon, guess, buf); - } - } else { - ValidErr (vsp, SEV_INFO, ERR_SEQ_DESCR_LatLonState, - "Lat_lon '%s' does not map to subregion '%s', but may be in '%s'", lat_lon, buf, guess); - } - } else { - ValidErr (vsp, SEV_INFO, ERR_SEQ_DESCR_LatLonState, - "Lat_lon '%s' does not map to subregion '%s'", lat_lon, buf); - } - } - } - } - } - } else if (TestLatLonForCountry (buf, -lat, lon)) { - if (lat < 0.0) { - ValidErr (vsp, SEV_WARNING, ERR_SEQ_DESCR_LatLonValue, "Latitude should be set to N (northern hemisphere)"); - } else { - ValidErr (vsp, SEV_WARNING, ERR_SEQ_DESCR_LatLonValue, "Latitude should be set to S (southern hemisphere)"); - } - } else if (TestLatLonForCountry (buf, lat, -lon)) { - if (lon < 0.0) { - ValidErr (vsp, SEV_WARNING, ERR_SEQ_DESCR_LatLonValue, "Longitude should be set to E (eastern hemisphere)"); - } else { - ValidErr (vsp, SEV_WARNING, ERR_SEQ_DESCR_LatLonValue, "Longitude should be set to W (western hemisphere)"); - } - /* - } else if (TestLatLonForCountry (buf, -lat, -lon)) { - ValidErr (vsp, SEV_WARNING, ERR_SEQ_DESCR_LatLonValue, "Both latitude and longitude appear to be in wrong hemispheres"); - */ - } else if (TestLatLonForCountry (buf, lon, lat)) { - ValidErr (vsp, SEV_WARNING, ERR_SEQ_DESCR_LatLonValue, "Latitude and longitude values appear to be exchanged"); - /* - } else if (strict) { - ValidErr (vsp, SEV_INFO, ERR_SEQ_DESCR_LatLonCountry, "Lat_lon '%s' does not map to '%s'", lat_lon, buf); - */ - } else { - if (vsp->strictLatLonCountry || (! StringContainsBodyOfWater (countryname))) { - guess = GuessCountryForLatLon (lat, lon); - if (guess != NULL) { - if (CountryBoxesOverlap (buf, guess)) { - if (vsp->indexerVersion) { - ValidErr (vsp, SEV_INFO, ERR_SEQ_DESCR_LatLonAdjacent, - "Lat_lon '%s' MIGHT be in '%s' instead of adjacent '%s' - SHIFT DOUBLE CLICK TO LAUNCH GOOGLE EARTH -", - lat_lon, guess, buf); - } else { - ValidErr (vsp, SEV_INFO, ERR_SEQ_DESCR_LatLonCountry, - "Lat_lon '%s' MIGHT be in '%s' instead of '%s'", lat_lon, guess, buf); - } - } else { - ValidErr (vsp, SEV_INFO, ERR_SEQ_DESCR_LatLonCountry, - "Lat_lon '%s' does not map to '%s', but may be in '%s'", lat_lon, buf, guess); - } - } else { - ValidErr (vsp, SEV_INFO, ERR_SEQ_DESCR_LatLonCountry, - "Lat_lon '%s' does not map to '%s'", lat_lon, buf); - } - } - } - } + csp = GetLatLonCountryData (); + if (csp != NULL) { + NewerValidateCountryLatLon (vsp, gcp, countryname, lat_lon); } } @@ -11604,6 +13458,41 @@ static void ValidateBioSource (ValidStructPtr vsp, GatherContextPtr gcp, BioSour } } + for (prp = biop->pcr_primers; prp != NULL; prp = prp->next) { + + for (ppp = prp->forward; ppp != NULL; ppp = ppp->next) { + if (StringDoesHaveText (ppp->seq) && (! PrimerSeqIsValid (vsp, ppp->seq, &badch))) { + if (badch < ' ' || badch > '~') { + badch = '?'; + } + ValidErr (vsp, SEV_WARNING, ERR_SEQ_DESCR_BadPCRPrimerSequence, + "PCR forward primer sequence format is incorrect, first bad character is '%c'", (char) badch); + } + if (StringLen (ppp->name) > 10 && PrimerSeqIsValid (vsp, ppp->name, &badch)) { + if (badch < ' ' || badch > '~') { + badch = '?'; + } + ValidErr (vsp, SEV_WARNING, ERR_SEQ_DESCR_BadPCRPrimerName, "PCR forward primer name appears to be a sequence"); + } + } + + for (ppp = prp->reverse; ppp != NULL; ppp = ppp->next) { + if (StringDoesHaveText (ppp->seq) && (! PrimerSeqIsValid (vsp, ppp->seq, &badch))) { + if (badch < ' ' || badch > '~') { + badch = '?'; + } + ValidErr (vsp, SEV_WARNING, ERR_SEQ_DESCR_BadPCRPrimerSequence, + "PCR reverse primer sequence format is incorrect, first bad character is '%c'", (char) badch); + } + if (StringLen (ppp->name) > 10 && PrimerSeqIsValid (vsp, ppp->name, &badch)) { + if (badch < ' ' || badch > '~') { + badch = '?'; + } + ValidErr (vsp, SEV_WARNING, ERR_SEQ_DESCR_BadPCRPrimerName, "PCR reverse primer name appears to be a sequence"); + } + } + } + if (germline && rearranged) { ValidErr (vsp, SEV_WARNING, ERR_SEQ_DESCR_BioSourceInconsistency, "Germline and rearranged should not both be present"); } @@ -11734,15 +13623,23 @@ static void ValidateBioSource (ValidStructPtr vsp, GatherContextPtr gcp, BioSour if (StringStr (onp->lineage, "Chlorarachniophyceae") == 0 && StringStr (onp->lineage, "Cryptophyta") == 0) { ValidErr (vsp, SEV_WARNING, ERR_SEQ_DESCR_BadOrganelle, "Only Chlorarachniophyceae and Cryptophyta have nucleomorphs"); } + } else if (biop->genome == GENOME_macronuclear) { + if (StringStr (onp->lineage, "Ciliophora") == 0) { + ValidErr (vsp, SEV_WARNING, ERR_SEQ_DESCR_BadOrganelle, "Only Ciliophora have macronuclear locations"); + } } /* warn if bacteria has organelle location */ - if (StringCmp (onp->div, "BCT") == 0 - && biop->genome != GENOME_unknown - && biop->genome != GENOME_genomic - && biop->genome != GENOME_plasmid - && biop->genome != GENOME_chromosome) { - ValidErr (vsp, SEV_WARNING, ERR_SEQ_DESCR_BioSourceInconsistency, "Bacterial source should not have organelle location"); + if (StringCmp (onp->div, "BCT") == 0 || StringCmp (onp->div, "VRL") == 0) { + if (biop->genome == GENOME_unknown + || biop->genome == GENOME_genomic + || biop->genome == GENOME_plasmid + || biop->genome == GENOME_chromosome + || (biop->genome == GENOME_proviral && StringCmp (onp->div, "VRL") == 0)) { + /* it's ok */ + } else { + ValidErr (vsp, SEV_WARNING, ERR_SEQ_DESCR_BioSourceInconsistency, "Bacterial or viral source should not have organelle location"); + } } if (StringCmp (onp->div, "ENV") == 0 && (! is_env_sample)) { @@ -11812,6 +13709,11 @@ static void ValidateBioSource (ValidStructPtr vsp, GatherContextPtr gcp, BioSour } else if (omp->subtype == ORGMOD_gb_synonym) { gb_synonym = omp->subname; } + + if (isViral && IsUnexpectedViralOrgModQualifier(omp->subtype)) { + ValidErr (vsp, SEV_WARNING, ERR_SEQ_DESCR_BioSourceInconsistency, "Virus has unexpected %s qualifier", GetOrgModQualName (omp->subtype)); + } + omp = omp->next; } @@ -12075,10 +13977,27 @@ static Boolean StringHasPMID (CharPtr str) ptr++; ch = *ptr; } - + + return FALSE; +} + + +static Boolean HasStructuredCommentPrefix (UserObjectPtr uop) +{ + UserFieldPtr ufp; + + if (uop == NULL) { + return FALSE; + } + for (ufp = uop->data; ufp != NULL; ufp = ufp->next) { + if (ufp->label != NULL && StringCmp (ufp->label->str, "StructuredCommentPrefix") == 0) { + return TRUE; + } + } return FALSE; } + static Boolean ValidateSeqDescrCommon (ValNodePtr sdp, BioseqValidStrPtr bvsp, ValidStructPtr vsp, Uint4 descitemid) { ValNodePtr vnp, vnp2; @@ -12217,6 +14136,10 @@ static Boolean ValidateSeqDescrCommon (ValNodePtr sdp, BioseqValidStrPtr bvsp, V ValidErr (vsp, SEV_INFO, ERR_SEQ_DESCR_SerialInComment, "Comment may refer to reference by serial number - attach reference specific comments to the reference REMARK instead."); } + if (StringLooksLikeFakeStructuredComment (str)) { + ValidErr (vsp, SEV_INFO, ERR_SEQ_DESCR_FakeStructuredComment, + "Comment may be formatted to look like a structured comment."); + } for (vnp2 = vnp->next; vnp2 != NULL; vnp2 = vnp2->next) { if (vnp2->choice == Seq_descr_comment) { ptr = (CharPtr) vnp2->data.ptrvalue; @@ -12428,6 +14351,9 @@ static Boolean ValidateSeqDescrCommon (ValNodePtr sdp, BioseqValidStrPtr bvsp, V if (uop->data == NULL) { ValidErr (vsp, SEV_WARNING, ERR_SEQ_DESCR_UserObjectProblem, "Structured Comment user object descriptor is empty"); } + if (!HasStructuredCommentPrefix (uop)) { + ValidErr (vsp, SEV_INFO, ERR_SEQ_DESCR_StructuredCommentPrefixOrSuffixMissing, "Structured Comment lacks prefix"); + } } } } @@ -13234,7 +15160,7 @@ static Int2 IdXrefsNotReciprocal ( for (xref = cds->xref; xref != NULL; xref = xref->next) { if (xref->id.choice != 0) { matchsfp = SeqMgrGetFeatureByFeatID (cds->idx.entityID, NULL, NULL, xref, NULL); - if (matchsfp != mrna) { + if (matchsfp != NULL && matchsfp->idx.subtype == FEATDEF_mRNA && matchsfp != mrna) { return 1; } } @@ -13243,7 +15169,7 @@ static Int2 IdXrefsNotReciprocal ( for (xref = mrna->xref; xref != NULL; xref = xref->next) { if (xref->id.choice != 0) { matchsfp = SeqMgrGetFeatureByFeatID (mrna->idx.entityID, NULL, NULL, xref, NULL); - if (matchsfp != cds) { + if (matchsfp != NULL && matchsfp->idx.subtype == FEATDEF_CDS && matchsfp != cds) { return 1; } } @@ -13709,21 +15635,23 @@ static void ValidateCDSmRNAmatch ( rpt_region = SeqMgrGetOverlappingFeature (sfp->location, 0, repeat_region_array, num_repeat_regions, NULL, CONTAINED_WITHIN, &rcontext); if (rpt_region == NULL) { - /* - if (gcp != NULL) { - gcp->itemID = sfp->idx.itemID; - gcp->thistype = OBJ_SEQFEAT; - } - vsp->descr = NULL; - vsp->sfp = sfp; - ValidErr (vsp, SEV_WARNING, ERR_SEQ_FEAT_CDSwithNoMRNAOverlap, "CDS overlapped by 0 mRNAs"); - */ - vnp = ValNodeAddPointer (&cdstail, 0, (Pointer) sfp); - if (cdshead == NULL) { - cdshead = vnp; + if (StringStr (sfp->except_text, "rearrangement required for product") == NULL) { + /* + if (gcp != NULL) { + gcp->itemID = sfp->idx.itemID; + gcp->thistype = OBJ_SEQFEAT; + } + vsp->descr = NULL; + vsp->sfp = sfp; + ValidErr (vsp, SEV_WARNING, ERR_SEQ_FEAT_CDSwithNoMRNAOverlap, "CDS overlapped by 0 mRNAs"); + */ + vnp = ValNodeAddPointer (&cdstail, 0, (Pointer) sfp); + if (cdshead == NULL) { + cdshead = vnp; + } + cdstail = vnp; + num_no_mrna++; } - cdstail = vnp; - num_no_mrna++; } } } @@ -14226,6 +16154,7 @@ static void CheckForNonViralComplete (BioseqPtr bsp, ValidStructPtr vsp, GatherC ObjValNodePtr ovp; SeqDescrPtr sdp; CharPtr title = NULL; + SubSourcePtr ssp; if (bsp == NULL || vsp == NULL) return; @@ -14258,6 +16187,12 @@ static void CheckForNonViralComplete (BioseqPtr bsp, ValidStructPtr vsp, GatherC if (StringNICmp (onp->lineage, "Viroids; ", 9) == 0) return; if (StringICmp (onp->lineage, "Viruses") == 0 && StringICmp (onp->div, "PHG") == 0) return; + for (ssp = biop->subtype; ssp != NULL; ssp = ssp->next) { + if (ssp->subtype == SUBSRC_endogenous_virus_name) { + return; + } + } + if (gcp != NULL) { olditemid = gcp->itemID; olditemtype = gcp->thistype; @@ -14404,7 +16339,8 @@ static Boolean ValidateBioseqContextIndexed (BioseqPtr bsp, BioseqValidStrPtr bv Int2 numBadFullSource; SubSourcePtr sbsp; Int2 numgene, numcds, nummrna, numcdsproducts, nummrnaproducts, - numcdspseudo, nummrnapseudo, lastrnatype, thisrnatype; + numcdspseudo, nummrnapseudo, numrearrangedcds, lastrnatype, + thisrnatype; Boolean cds_products_unique = TRUE, mrna_products_unique = TRUE, suppress_duplicate_messages = FALSE, pseudo; SeqIdPtr sip; @@ -14460,6 +16396,7 @@ static Boolean ValidateBioseqContextIndexed (BioseqPtr bsp, BioseqValidStrPtr bv nummrnaproducts = 0; numcdspseudo = 0; nummrnapseudo = 0; + numrearrangedcds = 0; sfp = SeqMgrGetNextFeature (bsp, NULL, 0, 0, &fcontext); while (sfp != NULL) { @@ -14469,6 +16406,9 @@ static Boolean ValidateBioseqContextIndexed (BioseqPtr bsp, BioseqValidStrPtr bv break; case FEATDEF_CDS : numcds++; + if (StringStr (sfp->except_text, "rearrangement required for product") != NULL) { + numrearrangedcds++; + } if (sfp->product != NULL) { numcdsproducts++; sip = SeqLocId (sfp->product); @@ -14567,7 +16507,7 @@ static Boolean ValidateBioseqContextIndexed (BioseqPtr bsp, BioseqValidStrPtr bv cds_products_unique && mrna_products_unique) { suppress_duplicate_messages = TRUE; } - if (numcdsproducts > 0 && numcdsproducts + numcdspseudo != numcds) { + if (numcdsproducts > 0 && numcdsproducts + numcdspseudo != numcds && numcdsproducts + numcdspseudo + numrearrangedcds != numcds) { if (gcp != NULL) { gcp->itemID = olditemid; gcp->thistype = olditemtype; @@ -14646,6 +16586,7 @@ static Boolean ValidateBioseqContextIndexed (BioseqPtr bsp, BioseqValidStrPtr bv sfp = SeqMgrGetNextFeature (bsp, NULL, 0, 0, &fcontext); last_reported = FALSE; while (sfp != NULL) { + HasFeatId(sfp, 932); leave = TRUE; if (last != NULL) { ivalssame = FALSE; @@ -15097,9 +17038,9 @@ static Boolean ValidateBioseqContextIndexed (BioseqPtr bsp, BioseqValidStrPtr bv if (cdsRight + 1 != fcontext.left) { ValidErr (vsp, SEV_WARNING, ERR_SEQ_FEAT_UTRdoesNotAbutCDS, "CDS does not abut 3'UTR"); } - if (bvsp->is_mrna && cdscount == 1 && utr3count == 1 && fcontext.right != bsp->length - 1) { - ValidErr (vsp, SEV_WARNING, ERR_SEQ_FEAT_UTRdoesNotExtendToEnd, "3'UTR does not extend to end of mRNA"); - } + } + if (bvsp->is_mrna && cdscount == 1 && utr3count == 1 && fcontext.right != bsp->length - 1) { + ValidErr (vsp, SEV_WARNING, ERR_SEQ_FEAT_UTRdoesNotExtendToEnd, "3'UTR does not extend to end of mRNA"); } threeUTRright = fcontext.right; } @@ -15710,6 +17651,9 @@ static void ValidateTSASequenceForNs (BioseqPtr bsp, ValidStructPtr vsp) CharPtr str; ValNode vn; + if (ISA_aa (bsp->mol)) { + return; + } gcp = vsp->gcp; oldEntityID = gcp->entityID; @@ -15768,6 +17712,19 @@ static void ValidateTSASequenceForNs (BioseqPtr bsp, ValidStructPtr vsp) } MemFree (str); } + } else { + CountNsInSequence (bsp, &total, &max_stretch, FALSE); + percent_N = (total * 100) / bsp->length; + if (percent_N > 50) { + vsp->bsp = bsp; + vsp->descr = NULL; + vsp->sfp = NULL; + gcp->entityID = bsp->idx.entityID; + gcp->itemID = bsp->idx.itemID; + gcp->thistype = OBJ_BIOSEQ; + + ValidErr (vsp, SEV_WARNING, ERR_SEQ_INST_HighNContentPercent, "Sequence contains %d percent Ns", percent_N); + } } gcp->entityID = oldEntityID; gcp->itemID = oldItemID; @@ -15814,6 +17771,9 @@ static void ValidateRefSeqTitle (BioseqPtr bsp, ValidStructPtr vsp, Boolean is_v if (sdp != NULL) { title = (CharPtr) sdp->data.ptrvalue; if (StringDoesHaveText (title)) { + if (StringNCmp (title, "PREDICTED: ", 11) == 0) { + title += 11; + } len = StringLen (taxname); tlen = StringLen (title); if (ISA_na (bsp->mol)) { @@ -15834,6 +17794,87 @@ static void ValidateRefSeqTitle (BioseqPtr bsp, ValidStructPtr vsp, Boolean is_v } +static Boolean EndsWithSuffixPlusFieldValue (CharPtr str, CharPtr suffix, CharPtr val) +{ + CharPtr cp, last_word; + + cp = StringSearch (str, suffix); + if (cp == NULL) { + return FALSE; + } + last_word = StringRChr (str, ' '); + if (last_word == NULL || last_word < cp) { + return FALSE; + } + if (StringCmp (last_word + 1, val) == 0) { + return TRUE; + } else { + return FALSE; + } + +} + + +static void ValidateBarcodeIndexNumber (CharPtr bin, BioseqPtr bsp, ValidStructPtr vsp) +{ + SeqDescPtr sdp; + SeqMgrDescContext context; + BioSourcePtr biop; + Int4 bin_len; + + if (StringHasNoText (bin) || bsp == NULL || vsp == NULL) { + return; + } + + bin_len = StringLen (bin); + + sdp = SeqMgrGetNextDescriptor (bsp, NULL, Seq_descr_source, &context); + if (sdp == NULL || (biop = (BioSourcePtr) sdp->data.ptrvalue) == NULL || biop->org == NULL) { + return; + } + /* only check if name contains "sp." or "bacterium" */ + if (StringISearch (biop->org->taxname, "sp.") == NULL && StringISearch (biop->org->taxname, "bacterium") == NULL) { + return; + } + /* only check if name contains BOLD */ + if (StringSearch (biop->org->taxname, "BOLD") == NULL) { + return; + } + if (!EndsWithSuffixPlusFieldValue(biop->org->taxname, "sp. ", bin) + && !EndsWithSuffixPlusFieldValue(biop->org->taxname, "bacterium ", bin)) { + ValidErr (vsp, SEV_ERROR, ERR_SEQ_DESCR_BadStrucCommInvalidFieldValue, "Organism name should end with sp. plus Barcode Index Number (%s)", bin); + } +} + + +static void ValidateStructuredCommentsInContext (BioseqPtr bsp, ValidStructPtr vsp) +{ + SeqDescPtr sdp; + SeqMgrDescContext dcontext; + UserObjectPtr uop; + ObjectIdPtr oip; + UserFieldPtr curr; + + /* validate structured comments in context */ + for (sdp = SeqMgrGetNextDescriptor (bsp, NULL, Seq_descr_user, &dcontext); + sdp != NULL; + sdp = SeqMgrGetNextDescriptor (bsp, sdp, Seq_descr_user, &dcontext)) + { + uop = sdp->data.ptrvalue; + if (uop != NULL && uop->type != NULL && StringICmp (uop->type->str, "StructuredComment") == 0) + { + for (curr = uop->data; curr != NULL; curr = curr->next) + { + if (curr->choice != 1) continue; + oip = curr->label; + if (oip == NULL || StringCmp (oip->str, "Barcode Index Number") != 0) continue; + ValidateBarcodeIndexNumber ((CharPtr) curr->data.ptrvalue, bsp, vsp); + } + } + } +} + + /***************************************************************************** * * ValidateBioseqContext(gcp) @@ -15889,7 +17930,7 @@ static void ValidateBioseqContext (GatherContextPtr gcp) CharPtr str; CharPtr taxname = NULL; TextSeqIdPtr tsip; - BioSourcePtr biop; + BioSourcePtr biop = NULL; OrgRefPtr orp; OrgNamePtr onp; OrgModPtr omp; @@ -16082,6 +18123,9 @@ static void ValidateBioseqContext (GatherContextPtr gcp) gcp->itemID = oldItemID; gcp->thistype = oldItemtype; } + if (BioseqHasKeyword(bsp, "BARCODE") && BioseqHasKeyword(bsp, "UNVERIFIED")) { + ValidErr (vsp, SEV_WARNING, ERR_SEQ_DESCR_BadKeyword, "Sequence has both BARCODE and UNVERIFIED keywords"); + } } if (is_neg_strand_virus && mip != NULL) { @@ -16393,6 +18437,9 @@ static void ValidateBioseqContext (GatherContextPtr gcp) /* TSA checks */ ValidateTSASequenceForNs (bsp, vsp); + + /* validate structured comments in context */ + ValidateStructuredCommentsInContext (bsp, vsp); } /***************************************************************************** @@ -16424,6 +18471,9 @@ static void CheckPeptideOnCodonBoundary (ValidStructPtr vsp, GatherContextPtr gc Boolean partial5, partial3; Int4 pos1, pos2, adjust = 0, mod1, mod2; + if (SeqLocStop (sfp->location) == 2150166) { + mod1 = 0; + } cds = SeqMgrGetOverlappingCDS (sfp->location, NULL); if (cds == NULL) return; @@ -16500,969 +18550,6 @@ static CharPtr legal_mobile_element_strings [] = { NULL }; -static CharPtr ecnum_ambig [] = { - "1.-.-.-", "1.1.-.-", "1.1.1.-", "1.1.1.n", "1.1.2.-", "1.1.2.n", - "1.1.3.-", "1.1.3.n", "1.1.4.-", "1.1.4.n", "1.1.5.-", "1.1.5.n", - "1.1.98.-", "1.1.98.n", "1.1.99.-", "1.1.99.n", "1.1.n.n", - "1.2.-.-", "1.2.1.-", "1.2.1.n", "1.2.2.-", "1.2.2.n", "1.2.3.-", - "1.2.3.n", "1.2.4.-", "1.2.4.n", "1.2.7.-", "1.2.7.n", "1.2.99.-", - "1.2.99.n", "1.2.n.n", "1.3.-.-", "1.3.1.-", "1.3.1.n", "1.3.2.-", - "1.3.2.n", "1.3.3.-", "1.3.3.n", "1.3.5.-", "1.3.5.n", "1.3.7.-", - "1.3.7.n", "1.3.99.-", "1.3.99.n", "1.3.n.n", "1.4.-.-", "1.4.1.-", - "1.4.1.n", "1.4.2.-", "1.4.2.n", "1.4.3.-", "1.4.3.n", "1.4.4.-", - "1.4.4.n", "1.4.5.-", "1.4.5.n", "1.4.7.-", "1.4.7.n", "1.4.99.-", - "1.4.99.n", "1.4.n.n", "1.5.-.-", "1.5.1.-", "1.5.1.n", "1.5.3.-", - "1.5.3.n", "1.5.4.-", "1.5.4.n", "1.5.5.-", "1.5.5.n", "1.5.7.-", - "1.5.7.n", "1.5.8.-", "1.5.8.n", "1.5.99.-", "1.5.99.n", "1.5.n.n", - "1.6.-.-", "1.6.1.-", "1.6.1.n", "1.6.2.-", "1.6.2.n", "1.6.3.-", - "1.6.3.n", "1.6.4.-", "1.6.4.n", "1.6.5.-", "1.6.5.n", "1.6.6.-", - "1.6.6.n", "1.6.7.-", "1.6.7.n", "1.6.8.-", "1.6.8.n", "1.6.99.-", - "1.6.99.n", "1.6.n.n", "1.7.-.-", "1.7.1.-", "1.7.1.n", "1.7.2.-", - "1.7.2.n", "1.7.3.-", "1.7.3.n", "1.7.5.-", "1.7.5.n", "1.7.7.-", - "1.7.7.n", "1.7.99.-", "1.7.99.n", "1.7.n.n", "1.8.-.-", "1.8.1.-", - "1.8.1.n", "1.8.2.-", "1.8.2.n", "1.8.3.-", "1.8.3.n", "1.8.4.-", - "1.8.4.n", "1.8.5.-", "1.8.5.n", "1.8.6.-", "1.8.6.n", "1.8.7.-", - "1.8.7.n", "1.8.98.-", "1.8.98.n", "1.8.99.-", "1.8.99.n", - "1.8.n.n", "1.9.-.-", "1.9.3.-", "1.9.3.n", "1.9.6.-", "1.9.6.n", - "1.9.99.-", "1.9.99.n", "1.9.n.n", "1.10.-.-", "1.10.1.-", - "1.10.1.n", "1.10.2.-", "1.10.2.n", "1.10.3.-", "1.10.3.n", - "1.10.99.-", "1.10.99.n", "1.10.n.n", "1.11.-.-", "1.11.1.-", - "1.11.1.n", "1.11.n.n", "1.12.-.-", "1.12.1.-", "1.12.1.n", - "1.12.2.-", "1.12.2.n", "1.12.5.-", "1.12.5.n", "1.12.7.-", - "1.12.7.n", "1.12.98.-", "1.12.98.n", "1.12.99.-", "1.12.99.n", - "1.12.n.n", "1.13.-.-", "1.13.1.-", "1.13.1.n", "1.13.11.-", - "1.13.11.n", "1.13.12.-", "1.13.12.n", "1.13.99.-", "1.13.99.n", - "1.13.n.n", "1.14.-.-", "1.14.1.-", "1.14.1.n", "1.14.2.-", - "1.14.2.n", "1.14.3.-", "1.14.3.n", "1.14.11.-", "1.14.11.n", - "1.14.12.-", "1.14.12.n", "1.14.13.-", "1.14.13.n", "1.14.14.-", - "1.14.14.n", "1.14.15.-", "1.14.15.n", "1.14.16.-", "1.14.16.n", - "1.14.17.-", "1.14.17.n", "1.14.18.-", "1.14.18.n", "1.14.19.-", - "1.14.19.n", "1.14.20.-", "1.14.20.n", "1.14.21.-", "1.14.21.n", - "1.14.99.-", "1.14.99.n", "1.14.n.n", "1.15.-.-", "1.15.1.-", - "1.15.1.n", "1.15.n.n", "1.16.-.-", "1.16.1.-", "1.16.1.n", - "1.16.3.-", "1.16.3.n", "1.16.8.-", "1.16.8.n", "1.16.n.n", - "1.17.-.-", "1.17.1.-", "1.17.1.n", "1.17.3.-", "1.17.3.n", - "1.17.4.-", "1.17.4.n", "1.17.5.-", "1.17.5.n", "1.17.7.-", - "1.17.7.n", "1.17.99.-", "1.17.99.n", "1.17.n.n", "1.18.-.-", - "1.18.1.-", "1.18.1.n", "1.18.2.-", "1.18.2.n", "1.18.3.-", - "1.18.3.n", "1.18.6.-", "1.18.6.n", "1.18.96.-", "1.18.96.n", - "1.18.99.-", "1.18.99.n", "1.18.n.n", "1.19.-.-", "1.19.6.-", - "1.19.6.n", "1.19.n.n", "1.20.-.-", "1.20.1.-", "1.20.1.n", - "1.20.4.-", "1.20.4.n", "1.20.98.-", "1.20.98.n", "1.20.99.-", - "1.20.99.n", "1.20.n.n", "1.21.-.-", "1.21.3.-", "1.21.3.n", - "1.21.4.-", "1.21.4.n", "1.21.99.-", "1.21.99.n", "1.21.n.n", - "1.22.-.-", "1.22.1.-", "1.22.1.n", "1.22.n.n", "1.97.-.-", - "1.97.1.-", "1.97.1.n", "1.97.n.n", "1.98.-.-", "1.98.1.-", - "1.98.1.n", "1.98.n.n", "1.99.-.-", "1.99.1.-", "1.99.1.n", - "1.99.2.-", "1.99.2.n", "1.99.n.n", "1.n.n.n", "2.-.-.-", "2.1.-.-", - "2.1.1.-", "2.1.1.n", "2.1.2.-", "2.1.2.n", "2.1.3.-", "2.1.3.n", - "2.1.4.-", "2.1.4.n", "2.1.n.n", "2.2.-.-", "2.2.1.-", "2.2.1.n", - "2.2.n.n", "2.3.-.-", "2.3.1.-", "2.3.1.n", "2.3.2.-", "2.3.2.n", - "2.3.3.-", "2.3.3.n", "2.3.n.n", "2.4.-.-", "2.4.1.-", "2.4.1.n", - "2.4.2.-", "2.4.2.n", "2.4.99.-", "2.4.99.n", "2.4.n.n", "2.5.-.-", - "2.5.1.-", "2.5.1.n", "2.5.n.n", "2.6.-.-", "2.6.1.-", "2.6.1.n", - "2.6.2.-", "2.6.2.n", "2.6.3.-", "2.6.3.n", "2.6.99.-", "2.6.99.n", - "2.6.n.n", "2.7.-.-", "2.7.1.-", "2.7.1.n", "2.7.2.-", "2.7.2.n", - "2.7.3.-", "2.7.3.n", "2.7.4.-", "2.7.4.n", "2.7.5.-", "2.7.5.n", - "2.7.6.-", "2.7.6.n", "2.7.7.-", "2.7.7.n", "2.7.8.-", "2.7.8.n", - "2.7.9.-", "2.7.9.n", "2.7.10.-", "2.7.10.n", "2.7.11.-", - "2.7.11.n", "2.7.12.-", "2.7.12.n", "2.7.13.-", "2.7.13.n", - "2.7.99.-", "2.7.99.n", "2.7.n.n", "2.8.-.-", "2.8.1.-", "2.8.1.n", - "2.8.2.-", "2.8.2.n", "2.8.3.-", "2.8.3.n", "2.8.4.-", "2.8.4.n", - "2.8.n.n", "2.9.-.-", "2.9.1.-", "2.9.1.n", "2.9.n.n", "2.n.n.n", - "3.-.-.-", "3.1.-.-", "3.1.1.-", "3.1.1.n", "3.1.2.-", "3.1.2.n", - "3.1.3.-", "3.1.3.n", "3.1.4.-", "3.1.4.n", "3.1.5.-", "3.1.5.n", - "3.1.6.-", "3.1.6.n", "3.1.7.-", "3.1.7.n", "3.1.8.-", "3.1.8.n", - "3.1.11.-", "3.1.11.n", "3.1.13.-", "3.1.13.n", "3.1.14.-", - "3.1.14.n", "3.1.15.-", "3.1.15.n", "3.1.16.-", "3.1.16.n", - "3.1.21.-", "3.1.21.n", "3.1.22.-", "3.1.22.n", "3.1.23.-", - "3.1.23.n", "3.1.24.-", "3.1.24.n", "3.1.25.-", "3.1.25.n", - "3.1.26.-", "3.1.26.n", "3.1.27.-", "3.1.27.n", "3.1.30.-", - "3.1.30.n", "3.1.31.-", "3.1.31.n", "3.1.n.n", "3.2.-.-", "3.2.1.-", - "3.2.1.n", "3.2.2.-", "3.2.2.n", "3.2.3.-", "3.2.3.n", "3.2.n.n", - "3.3.-.-", "3.3.1.-", "3.3.1.n", "3.3.2.-", "3.3.2.n", "3.3.n.n", - "3.4.-.-", "3.4.1.-", "3.4.1.n", "3.4.2.-", "3.4.2.n", "3.4.3.-", - "3.4.3.n", "3.4.4.-", "3.4.4.n", "3.4.11.-", "3.4.11.n", "3.4.12.-", - "3.4.12.n", "3.4.13.-", "3.4.13.n", "3.4.14.-", "3.4.14.n", - "3.4.15.-", "3.4.15.n", "3.4.16.-", "3.4.16.n", "3.4.17.-", - "3.4.17.n", "3.4.18.-", "3.4.18.n", "3.4.19.-", "3.4.19.n", - "3.4.21.-", "3.4.21.n", "3.4.22.-", "3.4.22.n", "3.4.23.-", - "3.4.23.n", "3.4.24.-", "3.4.24.n", "3.4.25.-", "3.4.25.n", - "3.4.99.-", "3.4.99.n", "3.4.n.n", "3.5.-.-", "3.5.1.-", "3.5.1.n", - "3.5.2.-", "3.5.2.n", "3.5.3.-", "3.5.3.n", "3.5.4.-", "3.5.4.n", - "3.5.5.-", "3.5.5.n", "3.5.99.-", "3.5.99.n", "3.5.n.n", "3.6.-.-", - "3.6.1.-", "3.6.1.n", "3.6.2.-", "3.6.2.n", "3.6.3.-", "3.6.3.n", - "3.6.4.-", "3.6.4.n", "3.6.5.-", "3.6.5.n", "3.6.n.n", "3.7.-.-", - "3.7.1.-", "3.7.1.n", "3.7.n.n", "3.8.-.-", "3.8.1.-", "3.8.1.n", - "3.8.2.-", "3.8.2.n", "3.8.n.n", "3.9.-.-", "3.9.1.-", "3.9.1.n", - "3.9.n.n", "3.10.-.-", "3.10.1.-", "3.10.1.n", "3.10.n.n", - "3.11.-.-", "3.11.1.-", "3.11.1.n", "3.11.n.n", "3.12.-.-", - "3.12.1.-", "3.12.1.n", "3.12.n.n", "3.13.-.-", "3.13.1.-", - "3.13.1.n", "3.13.n.n", "3.n.n.n", "4.-.-.-", "4.1.-.-", "4.1.1.-", - "4.1.1.n", "4.1.2.-", "4.1.2.n", "4.1.3.-", "4.1.3.n", "4.1.99.-", - "4.1.99.n", "4.1.n.n", "4.2.-.-", "4.2.1.-", "4.2.1.n", "4.2.2.-", - "4.2.2.n", "4.2.3.-", "4.2.3.n", "4.2.99.-", "4.2.99.n", "4.2.n.n", - "4.3.-.-", "4.3.1.-", "4.3.1.n", "4.3.2.-", "4.3.2.n", "4.3.3.-", - "4.3.3.n", "4.3.99.-", "4.3.99.n", "4.3.n.n", "4.4.-.-", "4.4.1.-", - "4.4.1.n", "4.4.n.n", "4.5.-.-", "4.5.1.-", "4.5.1.n", "4.5.n.n", - "4.6.-.-", "4.6.1.-", "4.6.1.n", "4.6.n.n", "4.99.-.-", "4.99.1.-", - "4.99.1.n", "4.99.n.n", "4.n.n.n", "5.-.-.-", "5.1.-.-", "5.1.1.-", - "5.1.1.n", "5.1.2.-", "5.1.2.n", "5.1.3.-", "5.1.3.n", "5.1.99.-", - "5.1.99.n", "5.1.n.n", "5.2.-.-", "5.2.1.-", "5.2.1.n", "5.2.n.n", - "5.3.-.-", "5.3.1.-", "5.3.1.n", "5.3.2.-", "5.3.2.n", "5.3.3.-", - "5.3.3.n", "5.3.4.-", "5.3.4.n", "5.3.99.-", "5.3.99.n", "5.3.n.n", - "5.4.-.-", "5.4.1.-", "5.4.1.n", "5.4.2.-", "5.4.2.n", "5.4.3.-", - "5.4.3.n", "5.4.4.-", "5.4.4.n", "5.4.99.-", "5.4.99.n", "5.4.n.n", - "5.5.-.-", "5.5.1.-", "5.5.1.n", "5.5.n.n", "5.99.-.-", "5.99.1.-", - "5.99.1.n", "5.99.n.n", "5.n.n.n", "6.-.-.-", "6.1.-.-", "6.1.1.-", - "6.1.1.n", "6.1.n.n", "6.2.-.-", "6.2.1.-", "6.2.1.n", "6.2.n.n", - "6.3.-.-", "6.3.1.-", "6.3.1.n", "6.3.2.-", "6.3.2.n", "6.3.3.-", - "6.3.3.n", "6.3.4.-", "6.3.4.n", "6.3.5.-", "6.3.5.n", "6.3.n.n", - "6.4.-.-", "6.4.1.-", "6.4.1.n", "6.4.n.n", "6.5.-.-", "6.5.1.-", - "6.5.1.n", "6.5.n.n", "6.6.-.-", "6.6.1.-", "6.6.1.n", "6.6.n.n", - "6.n.n.n", - NULL -}; - -static CharPtr ecnum_specif [] = { - "1.1.1.1", "1.1.1.2", "1.1.1.3", "1.1.1.4", "1.1.1.6", "1.1.1.7", - "1.1.1.8", "1.1.1.9", "1.1.1.10", "1.1.1.11", "1.1.1.12", - "1.1.1.13", "1.1.1.14", "1.1.1.15", "1.1.1.16", "1.1.1.17", - "1.1.1.18", "1.1.1.19", "1.1.1.20", "1.1.1.21", "1.1.1.22", - "1.1.1.23", "1.1.1.24", "1.1.1.25", "1.1.1.26", "1.1.1.27", - "1.1.1.28", "1.1.1.29", "1.1.1.30", "1.1.1.31", "1.1.1.32", - "1.1.1.33", "1.1.1.34", "1.1.1.35", "1.1.1.36", "1.1.1.37", - "1.1.1.38", "1.1.1.39", "1.1.1.40", "1.1.1.41", "1.1.1.42", - "1.1.1.43", "1.1.1.44", "1.1.1.45", "1.1.1.46", "1.1.1.47", - "1.1.1.48", "1.1.1.49", "1.1.1.50", "1.1.1.51", "1.1.1.52", - "1.1.1.53", "1.1.1.54", "1.1.1.55", "1.1.1.56", "1.1.1.57", - "1.1.1.58", "1.1.1.59", "1.1.1.60", "1.1.1.61", "1.1.1.62", - "1.1.1.63", "1.1.1.64", "1.1.1.65", "1.1.1.66", "1.1.1.67", - "1.1.1.69", "1.1.1.71", "1.1.1.72", "1.1.1.73", "1.1.1.75", - "1.1.1.76", "1.1.1.77", "1.1.1.78", "1.1.1.79", "1.1.1.80", - "1.1.1.81", "1.1.1.82", "1.1.1.83", "1.1.1.84", "1.1.1.85", - "1.1.1.86", "1.1.1.87", "1.1.1.88", "1.1.1.90", "1.1.1.91", - "1.1.1.92", "1.1.1.93", "1.1.1.94", "1.1.1.95", "1.1.1.96", - "1.1.1.97", "1.1.1.98", "1.1.1.99", "1.1.1.100", "1.1.1.101", - "1.1.1.102", "1.1.1.103", "1.1.1.104", "1.1.1.105", "1.1.1.106", - "1.1.1.107", "1.1.1.108", "1.1.1.110", "1.1.1.111", "1.1.1.112", - "1.1.1.113", "1.1.1.114", "1.1.1.115", "1.1.1.116", "1.1.1.117", - "1.1.1.118", "1.1.1.119", "1.1.1.120", "1.1.1.121", "1.1.1.122", - "1.1.1.123", "1.1.1.124", "1.1.1.125", "1.1.1.126", "1.1.1.127", - "1.1.1.128", "1.1.1.129", "1.1.1.130", "1.1.1.131", "1.1.1.132", - "1.1.1.133", "1.1.1.134", "1.1.1.135", "1.1.1.136", "1.1.1.137", - "1.1.1.138", "1.1.1.140", "1.1.1.141", "1.1.1.142", "1.1.1.143", - "1.1.1.144", "1.1.1.145", "1.1.1.146", "1.1.1.147", "1.1.1.148", - "1.1.1.149", "1.1.1.150", "1.1.1.151", "1.1.1.152", "1.1.1.153", - "1.1.1.154", "1.1.1.156", "1.1.1.157", "1.1.1.158", "1.1.1.159", - "1.1.1.160", "1.1.1.161", "1.1.1.162", "1.1.1.163", "1.1.1.164", - "1.1.1.165", "1.1.1.166", "1.1.1.167", "1.1.1.168", "1.1.1.169", - "1.1.1.170", "1.1.1.172", "1.1.1.173", "1.1.1.174", "1.1.1.175", - "1.1.1.176", "1.1.1.177", "1.1.1.178", "1.1.1.179", "1.1.1.181", - "1.1.1.183", "1.1.1.184", "1.1.1.185", "1.1.1.186", "1.1.1.187", - "1.1.1.188", "1.1.1.189", "1.1.1.190", "1.1.1.191", "1.1.1.192", - "1.1.1.193", "1.1.1.194", "1.1.1.195", "1.1.1.196", "1.1.1.197", - "1.1.1.198", "1.1.1.199", "1.1.1.200", "1.1.1.201", "1.1.1.202", - "1.1.1.203", "1.1.1.205", "1.1.1.206", "1.1.1.207", "1.1.1.208", - "1.1.1.209", "1.1.1.210", "1.1.1.211", "1.1.1.212", "1.1.1.213", - "1.1.1.214", "1.1.1.215", "1.1.1.216", "1.1.1.217", "1.1.1.218", - "1.1.1.219", "1.1.1.220", "1.1.1.221", "1.1.1.222", "1.1.1.223", - "1.1.1.224", "1.1.1.225", "1.1.1.226", "1.1.1.227", "1.1.1.228", - "1.1.1.229", "1.1.1.230", "1.1.1.231", "1.1.1.232", "1.1.1.233", - "1.1.1.234", "1.1.1.235", "1.1.1.236", "1.1.1.237", "1.1.1.238", - "1.1.1.239", "1.1.1.240", "1.1.1.241", "1.1.1.243", "1.1.1.244", - "1.1.1.245", "1.1.1.246", "1.1.1.247", "1.1.1.248", "1.1.1.250", - "1.1.1.251", "1.1.1.252", "1.1.1.254", "1.1.1.255", "1.1.1.256", - "1.1.1.257", "1.1.1.258", "1.1.1.259", "1.1.1.260", "1.1.1.261", - "1.1.1.262", "1.1.1.263", "1.1.1.264", "1.1.1.265", "1.1.1.266", - "1.1.1.267", "1.1.1.268", "1.1.1.269", "1.1.1.270", "1.1.1.271", - "1.1.1.272", "1.1.1.273", "1.1.1.274", "1.1.1.275", "1.1.1.276", - "1.1.1.277", "1.1.1.278", "1.1.1.279", "1.1.1.280", "1.1.1.281", - "1.1.1.282", "1.1.1.283", "1.1.1.284", "1.1.1.285", "1.1.1.286", - "1.1.1.287", "1.1.1.288", "1.1.1.289", "1.1.1.290", "1.1.1.291", - "1.1.1.292", "1.1.1.294", "1.1.1.295", "1.1.1.296", "1.1.1.297", - "1.1.1.298", "1.1.1.299", "1.1.1.300", "1.1.1.301", "1.1.1.302", - "1.1.1.303", "1.1.1.304", "1.1.2.2", "1.1.2.3", "1.1.2.4", - "1.1.2.5", "1.1.3.3", "1.1.3.4", "1.1.3.5", "1.1.3.6", "1.1.3.7", - "1.1.3.8", "1.1.3.9", "1.1.3.10", "1.1.3.11", "1.1.3.12", - "1.1.3.13", "1.1.3.14", "1.1.3.15", "1.1.3.16", "1.1.3.17", - "1.1.3.18", "1.1.3.19", "1.1.3.20", "1.1.3.21", "1.1.3.23", - "1.1.3.27", "1.1.3.28", "1.1.3.29", "1.1.3.30", "1.1.3.37", - "1.1.3.38", "1.1.3.39", "1.1.3.40", "1.1.3.41", "1.1.4.1", - "1.1.4.2", "1.1.5.2", "1.1.5.3", "1.1.5.4", "1.1.5.5", "1.1.5.6", - "1.1.5.7", "1.1.99.1", "1.1.99.2", "1.1.99.3", "1.1.99.4", - "1.1.99.6", "1.1.99.7", "1.1.99.8", "1.1.99.9", "1.1.99.10", - "1.1.99.11", "1.1.99.12", "1.1.99.13", "1.1.99.14", "1.1.99.18", - "1.1.99.20", "1.1.99.21", "1.1.99.22", "1.1.99.23", "1.1.99.24", - "1.1.99.25", "1.1.99.26", "1.1.99.27", "1.1.99.28", "1.1.99.29", - "1.1.99.30", "1.1.99.31", "1.1.99.32", "1.1.99.33", "1.2.1.2", - "1.2.1.3", "1.2.1.4", "1.2.1.5", "1.2.1.7", "1.2.1.8", "1.2.1.9", - "1.2.1.10", "1.2.1.11", "1.2.1.12", "1.2.1.13", "1.2.1.15", - "1.2.1.16", "1.2.1.17", "1.2.1.18", "1.2.1.19", "1.2.1.20", - "1.2.1.21", "1.2.1.22", "1.2.1.23", "1.2.1.24", "1.2.1.25", - "1.2.1.26", "1.2.1.27", "1.2.1.28", "1.2.1.29", "1.2.1.30", - "1.2.1.31", "1.2.1.32", "1.2.1.33", "1.2.1.36", "1.2.1.38", - "1.2.1.39", "1.2.1.40", "1.2.1.41", "1.2.1.42", "1.2.1.43", - "1.2.1.44", "1.2.1.45", "1.2.1.46", "1.2.1.47", "1.2.1.48", - "1.2.1.49", "1.2.1.50", "1.2.1.51", "1.2.1.52", "1.2.1.53", - "1.2.1.54", "1.2.1.57", "1.2.1.58", "1.2.1.59", "1.2.1.60", - "1.2.1.61", "1.2.1.62", "1.2.1.63", "1.2.1.64", "1.2.1.65", - "1.2.1.66", "1.2.1.67", "1.2.1.68", "1.2.1.69", "1.2.1.70", - "1.2.1.71", "1.2.1.72", "1.2.1.73", "1.2.1.74", "1.2.1.75", - "1.2.1.76", "1.2.1.77", "1.2.1.78", "1.2.2.1", "1.2.2.2", "1.2.2.3", - "1.2.2.4", "1.2.3.1", "1.2.3.3", "1.2.3.4", "1.2.3.5", "1.2.3.6", - "1.2.3.7", "1.2.3.8", "1.2.3.9", "1.2.3.11", "1.2.3.13", "1.2.3.14", - "1.2.4.1", "1.2.4.2", "1.2.4.4", "1.2.7.1", "1.2.7.2", "1.2.7.3", - "1.2.7.4", "1.2.7.5", "1.2.7.6", "1.2.7.7", "1.2.7.8", "1.2.99.2", - "1.2.99.3", "1.2.99.4", "1.2.99.5", "1.2.99.6", "1.2.99.7", - "1.3.1.1", "1.3.1.2", "1.3.1.3", "1.3.1.4", "1.3.1.5", "1.3.1.6", - "1.3.1.7", "1.3.1.8", "1.3.1.9", "1.3.1.10", "1.3.1.11", "1.3.1.12", - "1.3.1.13", "1.3.1.14", "1.3.1.15", "1.3.1.16", "1.3.1.17", - "1.3.1.18", "1.3.1.19", "1.3.1.20", "1.3.1.21", "1.3.1.22", - "1.3.1.24", "1.3.1.25", "1.3.1.26", "1.3.1.27", "1.3.1.28", - "1.3.1.29", "1.3.1.30", "1.3.1.31", "1.3.1.32", "1.3.1.33", - "1.3.1.34", "1.3.1.35", "1.3.1.36", "1.3.1.37", "1.3.1.38", - "1.3.1.39", "1.3.1.40", "1.3.1.41", "1.3.1.42", "1.3.1.43", - "1.3.1.44", "1.3.1.45", "1.3.1.46", "1.3.1.47", "1.3.1.48", - "1.3.1.49", "1.3.1.51", "1.3.1.52", "1.3.1.53", "1.3.1.54", - "1.3.1.56", "1.3.1.57", "1.3.1.58", "1.3.1.60", "1.3.1.62", - "1.3.1.63", "1.3.1.64", "1.3.1.65", "1.3.1.66", "1.3.1.67", - "1.3.1.68", "1.3.1.69", "1.3.1.70", "1.3.1.71", "1.3.1.72", - "1.3.1.73", "1.3.1.74", "1.3.1.75", "1.3.1.76", "1.3.1.77", - "1.3.1.78", "1.3.1.79", "1.3.1.80", "1.3.1.81", "1.3.1.82", - "1.3.1.83", "1.3.1.84", "1.3.2.3", "1.3.3.1", "1.3.3.3", "1.3.3.4", - "1.3.3.5", "1.3.3.6", "1.3.3.7", "1.3.3.8", "1.3.3.9", "1.3.3.10", - "1.3.3.11", "1.3.3.12", "1.3.5.1", "1.3.5.2", "1.3.7.1", "1.3.7.2", - "1.3.7.3", "1.3.7.4", "1.3.7.5", "1.3.7.6", "1.3.99.1", "1.3.99.2", - "1.3.99.3", "1.3.99.4", "1.3.99.5", "1.3.99.6", "1.3.99.7", - "1.3.99.8", "1.3.99.10", "1.3.99.12", "1.3.99.13", "1.3.99.14", - "1.3.99.15", "1.3.99.16", "1.3.99.17", "1.3.99.18", "1.3.99.19", - "1.3.99.20", "1.3.99.21", "1.3.99.22", "1.3.99.23", "1.3.99.24", - "1.3.99.25", "1.4.1.1", "1.4.1.2", "1.4.1.3", "1.4.1.4", "1.4.1.5", - "1.4.1.7", "1.4.1.8", "1.4.1.9", "1.4.1.10", "1.4.1.11", "1.4.1.12", - "1.4.1.13", "1.4.1.14", "1.4.1.15", "1.4.1.16", "1.4.1.17", - "1.4.1.18", "1.4.1.19", "1.4.1.20", "1.4.1.21", "1.4.2.1", - "1.4.3.1", "1.4.3.2", "1.4.3.3", "1.4.3.4", "1.4.3.5", "1.4.3.7", - "1.4.3.8", "1.4.3.10", "1.4.3.11", "1.4.3.12", "1.4.3.13", - "1.4.3.14", "1.4.3.15", "1.4.3.16", "1.4.3.19", "1.4.3.20", - "1.4.3.21", "1.4.3.22", "1.4.3.23", "1.4.4.2", "1.4.5.1", "1.4.7.1", - "1.4.99.1", "1.4.99.2", "1.4.99.3", "1.4.99.4", "1.4.99.5", - "1.5.1.1", "1.5.1.2", "1.5.1.3", "1.5.1.5", "1.5.1.6", "1.5.1.7", - "1.5.1.8", "1.5.1.9", "1.5.1.10", "1.5.1.11", "1.5.1.12", - "1.5.1.15", "1.5.1.16", "1.5.1.17", "1.5.1.18", "1.5.1.19", - "1.5.1.20", "1.5.1.21", "1.5.1.22", "1.5.1.23", "1.5.1.24", - "1.5.1.25", "1.5.1.26", "1.5.1.27", "1.5.1.28", "1.5.1.29", - "1.5.1.30", "1.5.1.31", "1.5.1.32", "1.5.1.33", "1.5.1.34", - "1.5.3.1", "1.5.3.2", "1.5.3.4", "1.5.3.5", "1.5.3.6", "1.5.3.7", - "1.5.3.10", "1.5.3.11", "1.5.3.12", "1.5.3.13", "1.5.3.14", - "1.5.3.15", "1.5.3.16", "1.5.3.17", "1.5.4.1", "1.5.5.1", "1.5.7.1", - "1.5.8.1", "1.5.8.2", "1.5.99.1", "1.5.99.2", "1.5.99.3", - "1.5.99.4", "1.5.99.5", "1.5.99.6", "1.5.99.8", "1.5.99.9", - "1.5.99.11", "1.5.99.12", "1.5.99.13", "1.6.1.1", "1.6.1.2", - "1.6.2.2", "1.6.2.4", "1.6.2.5", "1.6.2.6", "1.6.3.1", "1.6.5.2", - "1.6.5.3", "1.6.5.4", "1.6.5.5", "1.6.5.6", "1.6.5.7", "1.6.6.9", - "1.6.99.1", "1.6.99.3", "1.6.99.5", "1.6.99.6", "1.7.1.1", - "1.7.1.2", "1.7.1.3", "1.7.1.4", "1.7.1.5", "1.7.1.6", "1.7.1.7", - "1.7.1.9", "1.7.1.10", "1.7.1.11", "1.7.1.12", "1.7.1.13", - "1.7.2.1", "1.7.2.2", "1.7.2.3", "1.7.3.1", "1.7.3.2", "1.7.3.3", - "1.7.3.4", "1.7.3.5", "1.7.5.1", "1.7.7.1", "1.7.7.2", "1.7.99.1", - "1.7.99.4", "1.7.99.6", "1.7.99.7", "1.7.99.8", "1.8.1.2", - "1.8.1.3", "1.8.1.4", "1.8.1.5", "1.8.1.6", "1.8.1.7", "1.8.1.8", - "1.8.1.9", "1.8.1.10", "1.8.1.11", "1.8.1.12", "1.8.1.13", - "1.8.1.14", "1.8.1.15", "1.8.1.16", "1.8.2.1", "1.8.2.2", "1.8.3.1", - "1.8.3.2", "1.8.3.3", "1.8.3.4", "1.8.3.5", "1.8.4.1", "1.8.4.2", - "1.8.4.3", "1.8.4.4", "1.8.4.7", "1.8.4.8", "1.8.4.9", "1.8.4.10", - "1.8.4.11", "1.8.4.12", "1.8.4.13", "1.8.4.14", "1.8.5.1", - "1.8.5.2", "1.8.7.1", "1.8.98.1", "1.8.98.2", "1.8.99.1", - "1.8.99.2", "1.8.99.3", "1.9.3.1", "1.9.6.1", "1.9.99.1", - "1.10.1.1", "1.10.2.1", "1.10.2.2", "1.10.3.1", "1.10.3.2", - "1.10.3.3", "1.10.3.4", "1.10.3.5", "1.10.3.6", "1.10.99.1", - "1.10.99.2", "1.10.99.3", "1.11.1.1", "1.11.1.2", "1.11.1.3", - "1.11.1.5", "1.11.1.6", "1.11.1.7", "1.11.1.8", "1.11.1.9", - "1.11.1.10", "1.11.1.11", "1.11.1.12", "1.11.1.13", "1.11.1.14", - "1.11.1.15", "1.11.1.16", "1.11.1.17", "1.12.1.2", "1.12.1.3", - "1.12.2.1", "1.12.5.1", "1.12.7.2", "1.12.98.1", "1.12.98.2", - "1.12.98.3", "1.12.99.6", "1.13.11.1", "1.13.11.2", "1.13.11.3", - "1.13.11.4", "1.13.11.5", "1.13.11.6", "1.13.11.8", "1.13.11.9", - "1.13.11.10", "1.13.11.11", "1.13.11.12", "1.13.11.13", - "1.13.11.14", "1.13.11.15", "1.13.11.16", "1.13.11.17", - "1.13.11.18", "1.13.11.19", "1.13.11.20", "1.13.11.22", - "1.13.11.23", "1.13.11.24", "1.13.11.25", "1.13.11.26", - "1.13.11.27", "1.13.11.28", "1.13.11.29", "1.13.11.30", - "1.13.11.31", "1.13.11.33", "1.13.11.34", "1.13.11.35", - "1.13.11.36", "1.13.11.37", "1.13.11.38", "1.13.11.39", - "1.13.11.40", "1.13.11.41", "1.13.11.43", "1.13.11.44", - "1.13.11.45", "1.13.11.46", "1.13.11.47", "1.13.11.48", - "1.13.11.49", "1.13.11.50", "1.13.11.51", "1.13.11.52", - "1.13.11.53", "1.13.11.54", "1.13.11.55", "1.13.11.56", "1.13.12.1", - "1.13.12.2", "1.13.12.3", "1.13.12.4", "1.13.12.5", "1.13.12.6", - "1.13.12.7", "1.13.12.8", "1.13.12.9", "1.13.12.12", "1.13.12.13", - "1.13.12.14", "1.13.12.15", "1.13.12.16", "1.13.12.17", "1.13.99.1", - "1.13.99.3", "1.14.11.1", "1.14.11.2", "1.14.11.3", "1.14.11.4", - "1.14.11.6", "1.14.11.7", "1.14.11.8", "1.14.11.9", "1.14.11.10", - "1.14.11.11", "1.14.11.12", "1.14.11.13", "1.14.11.14", - "1.14.11.15", "1.14.11.16", "1.14.11.17", "1.14.11.18", - "1.14.11.19", "1.14.11.20", "1.14.11.21", "1.14.11.22", - "1.14.11.23", "1.14.11.24", "1.14.11.25", "1.14.11.26", - "1.14.11.27", "1.14.11.28", "1.14.12.1", "1.14.12.3", "1.14.12.4", - "1.14.12.5", "1.14.12.7", "1.14.12.8", "1.14.12.9", "1.14.12.10", - "1.14.12.11", "1.14.12.12", "1.14.12.13", "1.14.12.14", - "1.14.12.15", "1.14.12.16", "1.14.12.17", "1.14.12.18", - "1.14.12.19", "1.14.12.20", "1.14.12.21", "1.14.13.1", "1.14.13.2", - "1.14.13.3", "1.14.13.4", "1.14.13.5", "1.14.13.6", "1.14.13.7", - "1.14.13.8", "1.14.13.9", "1.14.13.10", "1.14.13.11", "1.14.13.12", - "1.14.13.13", "1.14.13.14", "1.14.13.15", "1.14.13.16", - "1.14.13.17", "1.14.13.18", "1.14.13.19", "1.14.13.20", - "1.14.13.21", "1.14.13.22", "1.14.13.23", "1.14.13.24", - "1.14.13.25", "1.14.13.26", "1.14.13.27", "1.14.13.28", - "1.14.13.29", "1.14.13.30", "1.14.13.31", "1.14.13.32", - "1.14.13.33", "1.14.13.34", "1.14.13.35", "1.14.13.36", - "1.14.13.37", "1.14.13.38", "1.14.13.39", "1.14.13.40", - "1.14.13.41", "1.14.13.42", "1.14.13.43", "1.14.13.44", - "1.14.13.46", "1.14.13.47", "1.14.13.48", "1.14.13.49", - "1.14.13.50", "1.14.13.51", "1.14.13.52", "1.14.13.53", - "1.14.13.54", "1.14.13.55", "1.14.13.56", "1.14.13.57", - "1.14.13.58", "1.14.13.59", "1.14.13.60", "1.14.13.61", - "1.14.13.62", "1.14.13.63", "1.14.13.64", "1.14.13.66", - "1.14.13.67", "1.14.13.68", "1.14.13.69", "1.14.13.70", - "1.14.13.71", "1.14.13.72", "1.14.13.73", "1.14.13.74", - "1.14.13.75", "1.14.13.76", "1.14.13.77", "1.14.13.78", - "1.14.13.79", "1.14.13.80", "1.14.13.81", "1.14.13.82", - "1.14.13.83", "1.14.13.84", "1.14.13.85", "1.14.13.86", - "1.14.13.87", "1.14.13.88", "1.14.13.89", "1.14.13.90", - "1.14.13.91", "1.14.13.92", "1.14.13.93", "1.14.13.94", - "1.14.13.95", "1.14.13.96", "1.14.13.97", "1.14.13.98", - "1.14.13.99", "1.14.13.100", "1.14.13.101", "1.14.13.102", - "1.14.13.103", "1.14.13.104", "1.14.13.105", "1.14.13.106", - "1.14.13.107", "1.14.13.108", "1.14.13.109", "1.14.13.110", - "1.14.13.111", "1.14.13.112", "1.14.13.113", "1.14.14.1", - "1.14.14.3", "1.14.14.5", "1.14.14.7", "1.14.15.1", "1.14.15.2", - "1.14.15.3", "1.14.15.4", "1.14.15.5", "1.14.15.6", "1.14.15.7", - "1.14.15.8", "1.14.16.1", "1.14.16.2", "1.14.16.3", "1.14.16.4", - "1.14.16.5", "1.14.16.6", "1.14.17.1", "1.14.17.3", "1.14.17.4", - "1.14.18.1", "1.14.18.2", "1.14.19.1", "1.14.19.2", "1.14.19.3", - "1.14.19.4", "1.14.19.5", "1.14.19.6", "1.14.20.1", "1.14.21.1", - "1.14.21.2", "1.14.21.3", "1.14.21.4", "1.14.21.5", "1.14.21.6", - "1.14.21.7", "1.14.99.1", "1.14.99.2", "1.14.99.3", "1.14.99.4", - "1.14.99.7", "1.14.99.9", "1.14.99.10", "1.14.99.11", "1.14.99.12", - "1.14.99.14", "1.14.99.15", "1.14.99.19", "1.14.99.20", - "1.14.99.21", "1.14.99.22", "1.14.99.23", "1.14.99.24", - "1.14.99.26", "1.14.99.27", "1.14.99.28", "1.14.99.29", - "1.14.99.30", "1.14.99.31", "1.14.99.32", "1.14.99.33", - "1.14.99.34", "1.14.99.35", "1.14.99.36", "1.14.99.37", - "1.14.99.38", "1.14.99.39", "1.14.99.40", "1.15.1.1", "1.15.1.2", - "1.16.1.1", "1.16.1.2", "1.16.1.3", "1.16.1.4", "1.16.1.5", - "1.16.1.6", "1.16.1.7", "1.16.1.8", "1.16.3.1", "1.16.8.1", - "1.17.1.1", "1.17.1.2", "1.17.1.3", "1.17.1.4", "1.17.1.5", - "1.17.3.1", "1.17.3.2", "1.17.3.3", "1.17.4.1", "1.17.4.2", - "1.17.5.1", "1.17.7.1", "1.17.99.1", "1.17.99.2", "1.17.99.3", - "1.17.99.4", "1.17.99.5", "1.18.1.1", "1.18.1.2", "1.18.1.3", - "1.18.1.4", "1.18.6.1", "1.19.6.1", "1.20.1.1", "1.20.4.1", - "1.20.4.2", "1.20.4.3", "1.20.98.1", "1.20.99.1", "1.21.3.1", - "1.21.3.2", "1.21.3.3", "1.21.3.4", "1.21.3.5", "1.21.3.6", - "1.21.4.1", "1.21.4.2", "1.21.4.3", "1.21.4.4", "1.21.99.1", - "1.22.1.1", "1.97.1.1", "1.97.1.2", "1.97.1.3", "1.97.1.4", - "1.97.1.8", "1.97.1.9", "1.97.1.10", "1.97.1.11", "2.1.1.1", - "2.1.1.2", "2.1.1.3", "2.1.1.4", "2.1.1.5", "2.1.1.6", "2.1.1.7", - "2.1.1.8", "2.1.1.9", "2.1.1.10", "2.1.1.11", "2.1.1.12", - "2.1.1.13", "2.1.1.14", "2.1.1.15", "2.1.1.16", "2.1.1.17", - "2.1.1.18", "2.1.1.19", "2.1.1.20", "2.1.1.21", "2.1.1.22", - "2.1.1.25", "2.1.1.26", "2.1.1.27", "2.1.1.28", "2.1.1.29", - "2.1.1.31", "2.1.1.32", "2.1.1.33", "2.1.1.34", "2.1.1.35", - "2.1.1.36", "2.1.1.37", "2.1.1.38", "2.1.1.39", "2.1.1.40", - "2.1.1.41", "2.1.1.42", "2.1.1.43", "2.1.1.44", "2.1.1.45", - "2.1.1.46", "2.1.1.47", "2.1.1.48", "2.1.1.49", "2.1.1.50", - "2.1.1.51", "2.1.1.52", "2.1.1.53", "2.1.1.54", "2.1.1.55", - "2.1.1.56", "2.1.1.57", "2.1.1.59", "2.1.1.60", "2.1.1.61", - "2.1.1.62", "2.1.1.63", "2.1.1.64", "2.1.1.65", "2.1.1.66", - "2.1.1.67", "2.1.1.68", "2.1.1.69", "2.1.1.70", "2.1.1.71", - "2.1.1.72", "2.1.1.74", "2.1.1.75", "2.1.1.76", "2.1.1.77", - "2.1.1.78", "2.1.1.79", "2.1.1.80", "2.1.1.82", "2.1.1.83", - "2.1.1.84", "2.1.1.85", "2.1.1.86", "2.1.1.87", "2.1.1.88", - "2.1.1.89", "2.1.1.90", "2.1.1.91", "2.1.1.94", "2.1.1.95", - "2.1.1.96", "2.1.1.97", "2.1.1.98", "2.1.1.99", "2.1.1.100", - "2.1.1.101", "2.1.1.102", "2.1.1.103", "2.1.1.104", "2.1.1.105", - "2.1.1.106", "2.1.1.107", "2.1.1.108", "2.1.1.109", "2.1.1.110", - "2.1.1.111", "2.1.1.112", "2.1.1.113", "2.1.1.114", "2.1.1.115", - "2.1.1.116", "2.1.1.117", "2.1.1.118", "2.1.1.119", "2.1.1.120", - "2.1.1.121", "2.1.1.122", "2.1.1.123", "2.1.1.124", "2.1.1.125", - "2.1.1.126", "2.1.1.127", "2.1.1.128", "2.1.1.129", "2.1.1.130", - "2.1.1.131", "2.1.1.132", "2.1.1.133", "2.1.1.136", "2.1.1.137", - "2.1.1.139", "2.1.1.140", "2.1.1.141", "2.1.1.142", "2.1.1.143", - "2.1.1.144", "2.1.1.145", "2.1.1.146", "2.1.1.147", "2.1.1.148", - "2.1.1.149", "2.1.1.150", "2.1.1.151", "2.1.1.152", "2.1.1.153", - "2.1.1.154", "2.1.1.155", "2.1.1.156", "2.1.1.157", "2.1.1.158", - "2.1.1.159", "2.1.1.160", "2.1.1.161", "2.1.1.162", "2.1.1.163", - "2.1.1.164", "2.1.1.165", "2.1.2.1", "2.1.2.2", "2.1.2.3", - "2.1.2.4", "2.1.2.5", "2.1.2.7", "2.1.2.8", "2.1.2.9", "2.1.2.10", - "2.1.2.11", "2.1.3.1", "2.1.3.2", "2.1.3.3", "2.1.3.5", "2.1.3.6", - "2.1.3.7", "2.1.3.8", "2.1.3.9", "2.1.3.10", "2.1.3.11", "2.1.4.1", - "2.1.4.2", "2.2.1.1", "2.2.1.2", "2.2.1.3", "2.2.1.4", "2.2.1.5", - "2.2.1.6", "2.2.1.7", "2.2.1.8", "2.2.1.9", "2.3.1.1", "2.3.1.2", - "2.3.1.3", "2.3.1.4", "2.3.1.5", "2.3.1.6", "2.3.1.7", "2.3.1.8", - "2.3.1.9", "2.3.1.10", "2.3.1.11", "2.3.1.12", "2.3.1.13", - "2.3.1.14", "2.3.1.15", "2.3.1.16", "2.3.1.17", "2.3.1.18", - "2.3.1.19", "2.3.1.20", "2.3.1.21", "2.3.1.22", "2.3.1.23", - "2.3.1.24", "2.3.1.25", "2.3.1.26", "2.3.1.27", "2.3.1.28", - "2.3.1.29", "2.3.1.30", "2.3.1.31", "2.3.1.32", "2.3.1.33", - "2.3.1.34", "2.3.1.35", "2.3.1.36", "2.3.1.37", "2.3.1.38", - "2.3.1.39", "2.3.1.40", "2.3.1.41", "2.3.1.42", "2.3.1.43", - "2.3.1.44", "2.3.1.45", "2.3.1.46", "2.3.1.47", "2.3.1.48", - "2.3.1.49", "2.3.1.50", "2.3.1.51", "2.3.1.52", "2.3.1.53", - "2.3.1.54", "2.3.1.56", "2.3.1.57", "2.3.1.58", "2.3.1.59", - "2.3.1.60", "2.3.1.61", "2.3.1.62", "2.3.1.63", "2.3.1.64", - "2.3.1.65", "2.3.1.66", "2.3.1.67", "2.3.1.68", "2.3.1.69", - "2.3.1.71", "2.3.1.72", "2.3.1.73", "2.3.1.74", "2.3.1.75", - "2.3.1.76", "2.3.1.77", "2.3.1.78", "2.3.1.79", "2.3.1.80", - "2.3.1.81", "2.3.1.82", "2.3.1.83", "2.3.1.84", "2.3.1.85", - "2.3.1.86", "2.3.1.87", "2.3.1.88", "2.3.1.89", "2.3.1.90", - "2.3.1.91", "2.3.1.92", "2.3.1.93", "2.3.1.94", "2.3.1.95", - "2.3.1.96", "2.3.1.97", "2.3.1.98", "2.3.1.99", "2.3.1.100", - "2.3.1.101", "2.3.1.102", "2.3.1.103", "2.3.1.104", "2.3.1.105", - "2.3.1.106", "2.3.1.107", "2.3.1.108", "2.3.1.109", "2.3.1.110", - "2.3.1.111", "2.3.1.112", "2.3.1.113", "2.3.1.114", "2.3.1.115", - "2.3.1.116", "2.3.1.117", "2.3.1.118", "2.3.1.119", "2.3.1.121", - "2.3.1.122", "2.3.1.123", "2.3.1.125", "2.3.1.126", "2.3.1.127", - "2.3.1.128", "2.3.1.129", "2.3.1.130", "2.3.1.131", "2.3.1.132", - "2.3.1.133", "2.3.1.134", "2.3.1.135", "2.3.1.136", "2.3.1.137", - "2.3.1.138", "2.3.1.139", "2.3.1.140", "2.3.1.141", "2.3.1.142", - "2.3.1.143", "2.3.1.144", "2.3.1.145", "2.3.1.146", "2.3.1.147", - "2.3.1.148", "2.3.1.149", "2.3.1.150", "2.3.1.151", "2.3.1.152", - "2.3.1.153", "2.3.1.154", "2.3.1.155", "2.3.1.156", "2.3.1.157", - "2.3.1.158", "2.3.1.159", "2.3.1.160", "2.3.1.161", "2.3.1.162", - "2.3.1.163", "2.3.1.164", "2.3.1.165", "2.3.1.166", "2.3.1.167", - "2.3.1.168", "2.3.1.169", "2.3.1.170", "2.3.1.171", "2.3.1.172", - "2.3.1.173", "2.3.1.174", "2.3.1.175", "2.3.1.176", "2.3.1.177", - "2.3.1.178", "2.3.1.179", "2.3.1.180", "2.3.1.181", "2.3.1.182", - "2.3.1.183", "2.3.1.184", "2.3.1.185", "2.3.1.186", "2.3.1.187", - "2.3.1.188", "2.3.1.189", "2.3.1.190", "2.3.2.1", "2.3.2.2", - "2.3.2.3", "2.3.2.4", "2.3.2.5", "2.3.2.6", "2.3.2.7", "2.3.2.8", - "2.3.2.9", "2.3.2.10", "2.3.2.11", "2.3.2.12", "2.3.2.13", - "2.3.2.14", "2.3.2.15", "2.3.3.1", "2.3.3.2", "2.3.3.3", "2.3.3.4", - "2.3.3.5", "2.3.3.6", "2.3.3.7", "2.3.3.8", "2.3.3.9", "2.3.3.10", - "2.3.3.11", "2.3.3.12", "2.3.3.13", "2.3.3.14", "2.3.3.15", - "2.4.1.1", "2.4.1.2", "2.4.1.4", "2.4.1.5", "2.4.1.7", "2.4.1.8", - "2.4.1.9", "2.4.1.10", "2.4.1.11", "2.4.1.12", "2.4.1.13", - "2.4.1.14", "2.4.1.15", "2.4.1.16", "2.4.1.17", "2.4.1.18", - "2.4.1.19", "2.4.1.20", "2.4.1.21", "2.4.1.22", "2.4.1.23", - "2.4.1.24", "2.4.1.25", "2.4.1.26", "2.4.1.27", "2.4.1.28", - "2.4.1.29", "2.4.1.30", "2.4.1.31", "2.4.1.32", "2.4.1.33", - "2.4.1.34", "2.4.1.35", "2.4.1.36", "2.4.1.37", "2.4.1.38", - "2.4.1.39", "2.4.1.40", "2.4.1.41", "2.4.1.43", "2.4.1.44", - "2.4.1.45", "2.4.1.46", "2.4.1.47", "2.4.1.48", "2.4.1.49", - "2.4.1.50", "2.4.1.52", "2.4.1.53", "2.4.1.54", "2.4.1.56", - "2.4.1.57", "2.4.1.58", "2.4.1.60", "2.4.1.62", "2.4.1.63", - "2.4.1.64", "2.4.1.65", "2.4.1.66", "2.4.1.67", "2.4.1.68", - "2.4.1.69", "2.4.1.70", "2.4.1.71", "2.4.1.73", "2.4.1.74", - "2.4.1.78", "2.4.1.79", "2.4.1.80", "2.4.1.81", "2.4.1.82", - "2.4.1.83", "2.4.1.85", "2.4.1.86", "2.4.1.87", "2.4.1.88", - "2.4.1.90", "2.4.1.91", "2.4.1.92", "2.4.1.94", "2.4.1.95", - "2.4.1.96", "2.4.1.97", "2.4.1.99", "2.4.1.100", "2.4.1.101", - "2.4.1.102", "2.4.1.103", "2.4.1.104", "2.4.1.105", "2.4.1.106", - "2.4.1.109", "2.4.1.110", "2.4.1.111", "2.4.1.113", "2.4.1.114", - "2.4.1.115", "2.4.1.116", "2.4.1.117", "2.4.1.118", "2.4.1.119", - "2.4.1.120", "2.4.1.121", "2.4.1.122", "2.4.1.123", "2.4.1.125", - "2.4.1.126", "2.4.1.127", "2.4.1.128", "2.4.1.129", "2.4.1.130", - "2.4.1.131", "2.4.1.132", "2.4.1.133", "2.4.1.134", "2.4.1.135", - "2.4.1.136", "2.4.1.137", "2.4.1.138", "2.4.1.139", "2.4.1.140", - "2.4.1.141", "2.4.1.142", "2.4.1.143", "2.4.1.144", "2.4.1.145", - "2.4.1.146", "2.4.1.147", "2.4.1.148", "2.4.1.149", "2.4.1.150", - "2.4.1.152", "2.4.1.153", "2.4.1.155", "2.4.1.156", "2.4.1.157", - "2.4.1.158", "2.4.1.159", "2.4.1.160", "2.4.1.161", "2.4.1.162", - "2.4.1.163", "2.4.1.164", "2.4.1.165", "2.4.1.166", "2.4.1.167", - "2.4.1.168", "2.4.1.170", "2.4.1.171", "2.4.1.172", "2.4.1.173", - "2.4.1.174", "2.4.1.175", "2.4.1.176", "2.4.1.177", "2.4.1.178", - "2.4.1.179", "2.4.1.180", "2.4.1.181", "2.4.1.182", "2.4.1.183", - "2.4.1.184", "2.4.1.185", "2.4.1.186", "2.4.1.187", "2.4.1.188", - "2.4.1.189", "2.4.1.190", "2.4.1.191", "2.4.1.192", "2.4.1.193", - "2.4.1.194", "2.4.1.195", "2.4.1.196", "2.4.1.197", "2.4.1.198", - "2.4.1.199", "2.4.1.201", "2.4.1.202", "2.4.1.203", "2.4.1.205", - "2.4.1.206", "2.4.1.207", "2.4.1.208", "2.4.1.209", "2.4.1.210", - "2.4.1.211", "2.4.1.212", "2.4.1.213", "2.4.1.214", "2.4.1.215", - "2.4.1.216", "2.4.1.217", "2.4.1.218", "2.4.1.219", "2.4.1.220", - "2.4.1.221", "2.4.1.222", "2.4.1.223", "2.4.1.224", "2.4.1.225", - "2.4.1.226", "2.4.1.227", "2.4.1.228", "2.4.1.229", "2.4.1.230", - "2.4.1.231", "2.4.1.232", "2.4.1.234", "2.4.1.236", "2.4.1.237", - "2.4.1.238", "2.4.1.239", "2.4.1.240", "2.4.1.241", "2.4.1.242", - "2.4.1.243", "2.4.1.244", "2.4.1.245", "2.4.1.246", "2.4.1.247", - "2.4.1.248", "2.4.1.249", "2.4.1.250", "2.4.2.1", "2.4.2.2", - "2.4.2.3", "2.4.2.4", "2.4.2.5", "2.4.2.6", "2.4.2.7", "2.4.2.8", - "2.4.2.9", "2.4.2.10", "2.4.2.11", "2.4.2.12", "2.4.2.14", - "2.4.2.15", "2.4.2.16", "2.4.2.17", "2.4.2.18", "2.4.2.19", - "2.4.2.20", "2.4.2.21", "2.4.2.22", "2.4.2.23", "2.4.2.24", - "2.4.2.25", "2.4.2.26", "2.4.2.27", "2.4.2.28", "2.4.2.29", - "2.4.2.30", "2.4.2.31", "2.4.2.32", "2.4.2.33", "2.4.2.34", - "2.4.2.35", "2.4.2.36", "2.4.2.37", "2.4.2.38", "2.4.2.39", - "2.4.2.40", "2.4.2.41", "2.4.2.42", "2.4.99.1", "2.4.99.2", - "2.4.99.3", "2.4.99.4", "2.4.99.5", "2.4.99.6", "2.4.99.7", - "2.4.99.8", "2.4.99.9", "2.4.99.10", "2.4.99.11", "2.5.1.1", - "2.5.1.2", "2.5.1.3", "2.5.1.4", "2.5.1.5", "2.5.1.6", "2.5.1.7", - "2.5.1.9", "2.5.1.10", "2.5.1.11", "2.5.1.15", "2.5.1.16", - "2.5.1.17", "2.5.1.18", "2.5.1.19", "2.5.1.20", "2.5.1.21", - "2.5.1.22", "2.5.1.23", "2.5.1.24", "2.5.1.25", "2.5.1.26", - "2.5.1.27", "2.5.1.28", "2.5.1.29", "2.5.1.30", "2.5.1.31", - "2.5.1.32", "2.5.1.33", "2.5.1.34", "2.5.1.35", "2.5.1.36", - "2.5.1.38", "2.5.1.39", "2.5.1.41", "2.5.1.42", "2.5.1.43", - "2.5.1.44", "2.5.1.45", "2.5.1.46", "2.5.1.47", "2.5.1.48", - "2.5.1.49", "2.5.1.50", "2.5.1.51", "2.5.1.52", "2.5.1.53", - "2.5.1.54", "2.5.1.55", "2.5.1.56", "2.5.1.57", "2.5.1.58", - "2.5.1.59", "2.5.1.60", "2.5.1.61", "2.5.1.62", "2.5.1.63", - "2.5.1.65", "2.5.1.66", "2.5.1.67", "2.5.1.68", "2.5.1.69", - "2.5.1.70", "2.5.1.71", "2.5.1.72", "2.5.1.73", "2.5.1.74", - "2.5.1.75", "2.5.1.76", "2.5.1.77", "2.5.1.78", "2.5.1.79", - "2.5.1.80", "2.6.1.1", "2.6.1.2", "2.6.1.3", "2.6.1.4", "2.6.1.5", - "2.6.1.6", "2.6.1.7", "2.6.1.8", "2.6.1.9", "2.6.1.11", "2.6.1.12", - "2.6.1.13", "2.6.1.14", "2.6.1.15", "2.6.1.16", "2.6.1.17", - "2.6.1.18", "2.6.1.19", "2.6.1.21", "2.6.1.22", "2.6.1.23", - "2.6.1.24", "2.6.1.26", "2.6.1.27", "2.6.1.28", "2.6.1.29", - "2.6.1.30", "2.6.1.31", "2.6.1.32", "2.6.1.33", "2.6.1.34", - "2.6.1.35", "2.6.1.36", "2.6.1.37", "2.6.1.38", "2.6.1.39", - "2.6.1.40", "2.6.1.41", "2.6.1.42", "2.6.1.43", "2.6.1.44", - "2.6.1.45", "2.6.1.46", "2.6.1.47", "2.6.1.48", "2.6.1.49", - "2.6.1.50", "2.6.1.51", "2.6.1.52", "2.6.1.54", "2.6.1.55", - "2.6.1.56", "2.6.1.57", "2.6.1.58", "2.6.1.59", "2.6.1.60", - "2.6.1.62", "2.6.1.63", "2.6.1.64", "2.6.1.65", "2.6.1.66", - "2.6.1.67", "2.6.1.68", "2.6.1.70", "2.6.1.71", "2.6.1.72", - "2.6.1.73", "2.6.1.74", "2.6.1.75", "2.6.1.76", "2.6.1.77", - "2.6.1.78", "2.6.1.79", "2.6.1.80", "2.6.1.81", "2.6.1.82", - "2.6.1.83", "2.6.1.84", "2.6.1.85", "2.6.1.86", "2.6.3.1", - "2.6.99.1", "2.6.99.2", "2.7.1.1", "2.7.1.2", "2.7.1.3", "2.7.1.4", - "2.7.1.5", "2.7.1.6", "2.7.1.7", "2.7.1.8", "2.7.1.10", "2.7.1.11", - "2.7.1.12", "2.7.1.13", "2.7.1.14", "2.7.1.15", "2.7.1.16", - "2.7.1.17", "2.7.1.18", "2.7.1.19", "2.7.1.20", "2.7.1.21", - "2.7.1.22", "2.7.1.23", "2.7.1.24", "2.7.1.25", "2.7.1.26", - "2.7.1.27", "2.7.1.28", "2.7.1.29", "2.7.1.30", "2.7.1.31", - "2.7.1.32", "2.7.1.33", "2.7.1.34", "2.7.1.35", "2.7.1.36", - "2.7.1.39", "2.7.1.40", "2.7.1.41", "2.7.1.42", "2.7.1.43", - "2.7.1.44", "2.7.1.45", "2.7.1.46", "2.7.1.47", "2.7.1.48", - "2.7.1.49", "2.7.1.50", "2.7.1.51", "2.7.1.52", "2.7.1.53", - "2.7.1.54", "2.7.1.55", "2.7.1.56", "2.7.1.58", "2.7.1.59", - "2.7.1.60", "2.7.1.61", "2.7.1.62", "2.7.1.63", "2.7.1.64", - "2.7.1.65", "2.7.1.66", "2.7.1.67", "2.7.1.68", "2.7.1.69", - "2.7.1.71", "2.7.1.72", "2.7.1.73", "2.7.1.74", "2.7.1.76", - "2.7.1.77", "2.7.1.78", "2.7.1.79", "2.7.1.80", "2.7.1.81", - "2.7.1.82", "2.7.1.83", "2.7.1.84", "2.7.1.85", "2.7.1.86", - "2.7.1.87", "2.7.1.88", "2.7.1.89", "2.7.1.90", "2.7.1.91", - "2.7.1.92", "2.7.1.93", "2.7.1.94", "2.7.1.95", "2.7.1.100", - "2.7.1.101", "2.7.1.102", "2.7.1.103", "2.7.1.105", "2.7.1.106", - "2.7.1.107", "2.7.1.108", "2.7.1.113", "2.7.1.114", "2.7.1.118", - "2.7.1.119", "2.7.1.121", "2.7.1.122", "2.7.1.127", "2.7.1.130", - "2.7.1.134", "2.7.1.136", "2.7.1.137", "2.7.1.138", "2.7.1.140", - "2.7.1.142", "2.7.1.143", "2.7.1.144", "2.7.1.145", "2.7.1.146", - "2.7.1.147", "2.7.1.148", "2.7.1.149", "2.7.1.150", "2.7.1.151", - "2.7.1.153", "2.7.1.154", "2.7.1.156", "2.7.1.157", "2.7.1.158", - "2.7.1.159", "2.7.1.160", "2.7.1.161", "2.7.1.162", "2.7.1.163", - "2.7.1.164", "2.7.1.165", "2.7.2.1", "2.7.2.2", "2.7.2.3", - "2.7.2.4", "2.7.2.6", "2.7.2.7", "2.7.2.8", "2.7.2.10", "2.7.2.11", - "2.7.2.12", "2.7.2.13", "2.7.2.14", "2.7.2.15", "2.7.3.1", - "2.7.3.2", "2.7.3.3", "2.7.3.4", "2.7.3.5", "2.7.3.6", "2.7.3.7", - "2.7.3.8", "2.7.3.9", "2.7.3.10", "2.7.4.1", "2.7.4.2", "2.7.4.3", - "2.7.4.4", "2.7.4.6", "2.7.4.7", "2.7.4.8", "2.7.4.9", "2.7.4.10", - "2.7.4.11", "2.7.4.12", "2.7.4.13", "2.7.4.14", "2.7.4.15", - "2.7.4.16", "2.7.4.17", "2.7.4.18", "2.7.4.19", "2.7.4.20", - "2.7.4.21", "2.7.4.22", "2.7.4.23", "2.7.4.24", "2.7.6.1", - "2.7.6.2", "2.7.6.3", "2.7.6.4", "2.7.6.5", "2.7.7.1", "2.7.7.2", - "2.7.7.3", "2.7.7.4", "2.7.7.5", "2.7.7.6", "2.7.7.7", "2.7.7.8", - "2.7.7.9", "2.7.7.10", "2.7.7.11", "2.7.7.12", "2.7.7.13", - "2.7.7.14", "2.7.7.15", "2.7.7.18", "2.7.7.19", "2.7.7.21", - "2.7.7.22", "2.7.7.23", "2.7.7.24", "2.7.7.25", "2.7.7.27", - "2.7.7.28", "2.7.7.30", "2.7.7.31", "2.7.7.32", "2.7.7.33", - "2.7.7.34", "2.7.7.35", "2.7.7.36", "2.7.7.37", "2.7.7.38", - "2.7.7.39", "2.7.7.40", "2.7.7.41", "2.7.7.42", "2.7.7.43", - "2.7.7.44", "2.7.7.45", "2.7.7.46", "2.7.7.47", "2.7.7.48", - "2.7.7.49", "2.7.7.50", "2.7.7.51", "2.7.7.52", "2.7.7.53", - "2.7.7.54", "2.7.7.55", "2.7.7.56", "2.7.7.57", "2.7.7.58", - "2.7.7.59", "2.7.7.60", "2.7.7.61", "2.7.7.62", "2.7.7.63", - "2.7.7.64", "2.7.7.65", "2.7.7.66", "2.7.7.67", "2.7.7.68", - "2.7.8.1", "2.7.8.2", "2.7.8.3", "2.7.8.4", "2.7.8.5", "2.7.8.6", - "2.7.8.7", "2.7.8.8", "2.7.8.9", "2.7.8.10", "2.7.8.11", "2.7.8.12", - "2.7.8.13", "2.7.8.14", "2.7.8.15", "2.7.8.17", "2.7.8.18", - "2.7.8.19", "2.7.8.20", "2.7.8.21", "2.7.8.22", "2.7.8.23", - "2.7.8.24", "2.7.8.25", "2.7.8.26", "2.7.8.27", "2.7.8.28", - "2.7.9.1", "2.7.9.2", "2.7.9.3", "2.7.9.4", "2.7.9.5", "2.7.10.1", - "2.7.10.2", "2.7.11.1", "2.7.11.2", "2.7.11.3", "2.7.11.4", - "2.7.11.5", "2.7.11.6", "2.7.11.7", "2.7.11.8", "2.7.11.9", - "2.7.11.10", "2.7.11.11", "2.7.11.12", "2.7.11.13", "2.7.11.14", - "2.7.11.15", "2.7.11.16", "2.7.11.17", "2.7.11.18", "2.7.11.19", - "2.7.11.20", "2.7.11.21", "2.7.11.22", "2.7.11.23", "2.7.11.24", - "2.7.11.25", "2.7.11.26", "2.7.11.27", "2.7.11.28", "2.7.11.29", - "2.7.11.30", "2.7.11.31", "2.7.12.1", "2.7.12.2", "2.7.13.1", - "2.7.13.2", "2.7.13.3", "2.7.99.1", "2.8.1.1", "2.8.1.2", "2.8.1.3", - "2.8.1.4", "2.8.1.5", "2.8.1.6", "2.8.1.7", "2.8.1.8", "2.8.2.1", - "2.8.2.2", "2.8.2.3", "2.8.2.4", "2.8.2.5", "2.8.2.6", "2.8.2.7", - "2.8.2.8", "2.8.2.9", "2.8.2.10", "2.8.2.11", "2.8.2.13", - "2.8.2.14", "2.8.2.15", "2.8.2.16", "2.8.2.17", "2.8.2.18", - "2.8.2.19", "2.8.2.20", "2.8.2.21", "2.8.2.22", "2.8.2.23", - "2.8.2.24", "2.8.2.25", "2.8.2.26", "2.8.2.27", "2.8.2.28", - "2.8.2.29", "2.8.2.30", "2.8.2.31", "2.8.2.32", "2.8.2.33", - "2.8.2.34", "2.8.3.1", "2.8.3.2", "2.8.3.3", "2.8.3.5", "2.8.3.6", - "2.8.3.7", "2.8.3.8", "2.8.3.9", "2.8.3.10", "2.8.3.11", "2.8.3.12", - "2.8.3.13", "2.8.3.14", "2.8.3.15", "2.8.3.16", "2.8.3.17", - "2.8.4.1", "2.8.4.2", "2.9.1.1", "2.9.1.2", "3.1.1.1", "3.1.1.2", - "3.1.1.3", "3.1.1.4", "3.1.1.5", "3.1.1.6", "3.1.1.7", "3.1.1.8", - "3.1.1.10", "3.1.1.11", "3.1.1.13", "3.1.1.14", "3.1.1.15", - "3.1.1.17", "3.1.1.19", "3.1.1.20", "3.1.1.21", "3.1.1.22", - "3.1.1.23", "3.1.1.24", "3.1.1.25", "3.1.1.26", "3.1.1.27", - "3.1.1.28", "3.1.1.29", "3.1.1.30", "3.1.1.31", "3.1.1.32", - "3.1.1.33", "3.1.1.34", "3.1.1.35", "3.1.1.36", "3.1.1.37", - "3.1.1.38", "3.1.1.39", "3.1.1.40", "3.1.1.41", "3.1.1.42", - "3.1.1.43", "3.1.1.44", "3.1.1.45", "3.1.1.46", "3.1.1.47", - "3.1.1.48", "3.1.1.49", "3.1.1.50", "3.1.1.51", "3.1.1.52", - "3.1.1.53", "3.1.1.54", "3.1.1.55", "3.1.1.56", "3.1.1.57", - "3.1.1.58", "3.1.1.59", "3.1.1.60", "3.1.1.61", "3.1.1.63", - "3.1.1.64", "3.1.1.65", "3.1.1.66", "3.1.1.67", "3.1.1.68", - "3.1.1.70", "3.1.1.71", "3.1.1.72", "3.1.1.73", "3.1.1.74", - "3.1.1.75", "3.1.1.76", "3.1.1.77", "3.1.1.78", "3.1.1.79", - "3.1.1.80", "3.1.1.81", "3.1.1.82", "3.1.1.83", "3.1.1.84", - "3.1.2.1", "3.1.2.2", "3.1.2.3", "3.1.2.4", "3.1.2.5", "3.1.2.6", - "3.1.2.7", "3.1.2.10", "3.1.2.11", "3.1.2.12", "3.1.2.13", - "3.1.2.14", "3.1.2.15", "3.1.2.16", "3.1.2.17", "3.1.2.18", - "3.1.2.19", "3.1.2.20", "3.1.2.21", "3.1.2.22", "3.1.2.23", - "3.1.2.25", "3.1.2.26", "3.1.2.27", "3.1.3.1", "3.1.3.2", "3.1.3.3", - "3.1.3.4", "3.1.3.5", "3.1.3.6", "3.1.3.7", "3.1.3.8", "3.1.3.9", - "3.1.3.10", "3.1.3.11", "3.1.3.12", "3.1.3.13", "3.1.3.14", - "3.1.3.15", "3.1.3.16", "3.1.3.17", "3.1.3.18", "3.1.3.19", - "3.1.3.20", "3.1.3.21", "3.1.3.22", "3.1.3.23", "3.1.3.24", - "3.1.3.25", "3.1.3.26", "3.1.3.27", "3.1.3.28", "3.1.3.29", - "3.1.3.31", "3.1.3.32", "3.1.3.33", "3.1.3.34", "3.1.3.35", - "3.1.3.36", "3.1.3.37", "3.1.3.38", "3.1.3.39", "3.1.3.40", - "3.1.3.41", "3.1.3.42", "3.1.3.43", "3.1.3.44", "3.1.3.45", - "3.1.3.46", "3.1.3.47", "3.1.3.48", "3.1.3.49", "3.1.3.50", - "3.1.3.51", "3.1.3.52", "3.1.3.53", "3.1.3.54", "3.1.3.55", - "3.1.3.56", "3.1.3.57", "3.1.3.58", "3.1.3.59", "3.1.3.60", - "3.1.3.62", "3.1.3.63", "3.1.3.64", "3.1.3.66", "3.1.3.67", - "3.1.3.68", "3.1.3.69", "3.1.3.70", "3.1.3.71", "3.1.3.72", - "3.1.3.73", "3.1.3.74", "3.1.3.75", "3.1.3.76", "3.1.3.77", - "3.1.3.78", "3.1.3.79", "3.1.3.80", "3.1.4.1", "3.1.4.2", "3.1.4.3", - "3.1.4.4", "3.1.4.11", "3.1.4.12", "3.1.4.13", "3.1.4.14", - "3.1.4.15", "3.1.4.16", "3.1.4.17", "3.1.4.35", "3.1.4.37", - "3.1.4.38", "3.1.4.39", "3.1.4.40", "3.1.4.41", "3.1.4.42", - "3.1.4.43", "3.1.4.44", "3.1.4.45", "3.1.4.46", "3.1.4.48", - "3.1.4.49", "3.1.4.50", "3.1.4.51", "3.1.4.52", "3.1.4.53", - "3.1.5.1", "3.1.6.1", "3.1.6.2", "3.1.6.3", "3.1.6.4", "3.1.6.6", - "3.1.6.7", "3.1.6.8", "3.1.6.9", "3.1.6.10", "3.1.6.11", "3.1.6.12", - "3.1.6.13", "3.1.6.14", "3.1.6.15", "3.1.6.16", "3.1.6.17", - "3.1.6.18", "3.1.7.1", "3.1.7.2", "3.1.7.3", "3.1.7.4", "3.1.7.5", - "3.1.8.1", "3.1.8.2", "3.1.11.1", "3.1.11.2", "3.1.11.3", - "3.1.11.4", "3.1.11.5", "3.1.11.6", "3.1.13.1", "3.1.13.2", - "3.1.13.3", "3.1.13.4", "3.1.13.5", "3.1.14.1", "3.1.15.1", - "3.1.16.1", "3.1.21.1", "3.1.21.2", "3.1.21.3", "3.1.21.4", - "3.1.21.5", "3.1.21.6", "3.1.21.7", "3.1.22.1", "3.1.22.2", - "3.1.22.4", "3.1.22.5", "3.1.25.1", "3.1.26.1", "3.1.26.2", - "3.1.26.3", "3.1.26.4", "3.1.26.5", "3.1.26.6", "3.1.26.7", - "3.1.26.8", "3.1.26.9", "3.1.26.10", "3.1.26.11", "3.1.26.12", - "3.1.26.13", "3.1.27.1", "3.1.27.2", "3.1.27.3", "3.1.27.4", - "3.1.27.5", "3.1.27.6", "3.1.27.7", "3.1.27.8", "3.1.27.9", - "3.1.27.10", "3.1.30.1", "3.1.30.2", "3.1.31.1", "3.2.1.1", - "3.2.1.2", "3.2.1.3", "3.2.1.4", "3.2.1.6", "3.2.1.7", "3.2.1.8", - "3.2.1.10", "3.2.1.11", "3.2.1.14", "3.2.1.15", "3.2.1.17", - "3.2.1.18", "3.2.1.20", "3.2.1.21", "3.2.1.22", "3.2.1.23", - "3.2.1.24", "3.2.1.25", "3.2.1.26", "3.2.1.28", "3.2.1.31", - "3.2.1.32", "3.2.1.33", "3.2.1.35", "3.2.1.36", "3.2.1.37", - "3.2.1.38", "3.2.1.39", "3.2.1.40", "3.2.1.41", "3.2.1.42", - "3.2.1.43", "3.2.1.44", "3.2.1.45", "3.2.1.46", "3.2.1.47", - "3.2.1.48", "3.2.1.49", "3.2.1.50", "3.2.1.51", "3.2.1.52", - "3.2.1.53", "3.2.1.54", "3.2.1.55", "3.2.1.56", "3.2.1.57", - "3.2.1.58", "3.2.1.59", "3.2.1.60", "3.2.1.61", "3.2.1.62", - "3.2.1.63", "3.2.1.64", "3.2.1.65", "3.2.1.66", "3.2.1.67", - "3.2.1.68", "3.2.1.70", "3.2.1.71", "3.2.1.72", "3.2.1.73", - "3.2.1.74", "3.2.1.75", "3.2.1.76", "3.2.1.77", "3.2.1.78", - "3.2.1.80", "3.2.1.81", "3.2.1.82", "3.2.1.83", "3.2.1.84", - "3.2.1.85", "3.2.1.86", "3.2.1.87", "3.2.1.88", "3.2.1.89", - "3.2.1.91", "3.2.1.92", "3.2.1.93", "3.2.1.94", "3.2.1.95", - "3.2.1.96", "3.2.1.97", "3.2.1.98", "3.2.1.99", "3.2.1.100", - "3.2.1.101", "3.2.1.102", "3.2.1.103", "3.2.1.104", "3.2.1.105", - "3.2.1.106", "3.2.1.107", "3.2.1.108", "3.2.1.109", "3.2.1.111", - "3.2.1.112", "3.2.1.113", "3.2.1.114", "3.2.1.115", "3.2.1.116", - "3.2.1.117", "3.2.1.118", "3.2.1.119", "3.2.1.120", "3.2.1.121", - "3.2.1.122", "3.2.1.123", "3.2.1.124", "3.2.1.125", "3.2.1.126", - "3.2.1.127", "3.2.1.128", "3.2.1.129", "3.2.1.130", "3.2.1.131", - "3.2.1.132", "3.2.1.133", "3.2.1.134", "3.2.1.135", "3.2.1.136", - "3.2.1.137", "3.2.1.139", "3.2.1.140", "3.2.1.141", "3.2.1.142", - "3.2.1.143", "3.2.1.144", "3.2.1.145", "3.2.1.146", "3.2.1.147", - "3.2.1.149", "3.2.1.150", "3.2.1.151", "3.2.1.152", "3.2.1.153", - "3.2.1.154", "3.2.1.155", "3.2.1.156", "3.2.1.157", "3.2.1.158", - "3.2.1.159", "3.2.1.161", "3.2.1.162", "3.2.1.163", "3.2.1.164", - "3.2.1.165", "3.2.2.1", "3.2.2.2", "3.2.2.3", "3.2.2.4", "3.2.2.5", - "3.2.2.6", "3.2.2.7", "3.2.2.8", "3.2.2.9", "3.2.2.10", "3.2.2.11", - "3.2.2.12", "3.2.2.13", "3.2.2.14", "3.2.2.15", "3.2.2.16", - "3.2.2.17", "3.2.2.19", "3.2.2.20", "3.2.2.21", "3.2.2.22", - "3.2.2.23", "3.2.2.24", "3.2.2.25", "3.2.2.26", "3.2.2.27", - "3.2.2.28", "3.2.2.29", "3.3.1.1", "3.3.1.2", "3.3.2.1", "3.3.2.2", - "3.3.2.4", "3.3.2.5", "3.3.2.6", "3.3.2.7", "3.3.2.8", "3.3.2.9", - "3.3.2.10", "3.3.2.11", "3.4.11.1", "3.4.11.2", "3.4.11.3", - "3.4.11.4", "3.4.11.5", "3.4.11.6", "3.4.11.7", "3.4.11.9", - "3.4.11.10", "3.4.11.13", "3.4.11.14", "3.4.11.15", "3.4.11.16", - "3.4.11.17", "3.4.11.18", "3.4.11.19", "3.4.11.20", "3.4.11.21", - "3.4.11.22", "3.4.11.23", "3.4.11.24", "3.4.13.3", "3.4.13.4", - "3.4.13.5", "3.4.13.7", "3.4.13.9", "3.4.13.12", "3.4.13.17", - "3.4.13.18", "3.4.13.19", "3.4.13.20", "3.4.13.21", "3.4.13.22", - "3.4.14.1", "3.4.14.2", "3.4.14.4", "3.4.14.5", "3.4.14.6", - "3.4.14.9", "3.4.14.10", "3.4.14.11", "3.4.14.12", "3.4.15.1", - "3.4.15.4", "3.4.15.5", "3.4.15.6", "3.4.16.2", "3.4.16.4", - "3.4.16.5", "3.4.16.6", "3.4.17.1", "3.4.17.2", "3.4.17.3", - "3.4.17.4", "3.4.17.6", "3.4.17.8", "3.4.17.10", "3.4.17.11", - "3.4.17.12", "3.4.17.13", "3.4.17.14", "3.4.17.15", "3.4.17.16", - "3.4.17.17", "3.4.17.18", "3.4.17.19", "3.4.17.20", "3.4.17.21", - "3.4.17.22", "3.4.17.23", "3.4.18.1", "3.4.19.1", "3.4.19.2", - "3.4.19.3", "3.4.19.5", "3.4.19.6", "3.4.19.7", "3.4.19.9", - "3.4.19.11", "3.4.19.12", "3.4.21.1", "3.4.21.2", "3.4.21.3", - "3.4.21.4", "3.4.21.5", "3.4.21.6", "3.4.21.7", "3.4.21.9", - "3.4.21.10", "3.4.21.12", "3.4.21.19", "3.4.21.20", "3.4.21.21", - "3.4.21.22", "3.4.21.25", "3.4.21.26", "3.4.21.27", "3.4.21.32", - "3.4.21.34", "3.4.21.35", "3.4.21.36", "3.4.21.37", "3.4.21.38", - "3.4.21.39", "3.4.21.41", "3.4.21.42", "3.4.21.43", "3.4.21.45", - "3.4.21.46", "3.4.21.47", "3.4.21.48", "3.4.21.49", "3.4.21.50", - "3.4.21.53", "3.4.21.54", "3.4.21.55", "3.4.21.57", "3.4.21.59", - "3.4.21.60", "3.4.21.61", "3.4.21.62", "3.4.21.63", "3.4.21.64", - "3.4.21.65", "3.4.21.66", "3.4.21.67", "3.4.21.68", "3.4.21.69", - "3.4.21.70", "3.4.21.71", "3.4.21.72", "3.4.21.73", "3.4.21.74", - "3.4.21.75", "3.4.21.76", "3.4.21.77", "3.4.21.78", "3.4.21.79", - "3.4.21.80", "3.4.21.81", "3.4.21.82", "3.4.21.83", "3.4.21.84", - "3.4.21.85", "3.4.21.86", "3.4.21.88", "3.4.21.89", "3.4.21.90", - "3.4.21.91", "3.4.21.92", "3.4.21.93", "3.4.21.94", "3.4.21.95", - "3.4.21.96", "3.4.21.97", "3.4.21.98", "3.4.21.99", "3.4.21.100", - "3.4.21.101", "3.4.21.102", "3.4.21.103", "3.4.21.104", - "3.4.21.105", "3.4.21.106", "3.4.21.107", "3.4.21.108", - "3.4.21.109", "3.4.21.110", "3.4.21.111", "3.4.21.112", - "3.4.21.113", "3.4.21.114", "3.4.21.115", "3.4.21.116", - "3.4.21.117", "3.4.21.118", "3.4.21.119", "3.4.21.120", "3.4.22.1", - "3.4.22.2", "3.4.22.3", "3.4.22.6", "3.4.22.7", "3.4.22.8", - "3.4.22.10", "3.4.22.14", "3.4.22.15", "3.4.22.16", "3.4.22.24", - "3.4.22.25", "3.4.22.26", "3.4.22.27", "3.4.22.28", "3.4.22.29", - "3.4.22.30", "3.4.22.31", "3.4.22.32", "3.4.22.33", "3.4.22.34", - "3.4.22.35", "3.4.22.36", "3.4.22.37", "3.4.22.38", "3.4.22.39", - "3.4.22.40", "3.4.22.41", "3.4.22.42", "3.4.22.43", "3.4.22.44", - "3.4.22.45", "3.4.22.46", "3.4.22.47", "3.4.22.48", "3.4.22.49", - "3.4.22.50", "3.4.22.51", "3.4.22.52", "3.4.22.53", "3.4.22.54", - "3.4.22.55", "3.4.22.56", "3.4.22.57", "3.4.22.58", "3.4.22.59", - "3.4.22.60", "3.4.22.61", "3.4.22.62", "3.4.22.63", "3.4.22.64", - "3.4.22.65", "3.4.22.66", "3.4.22.67", "3.4.22.68", "3.4.22.69", - "3.4.22.70", "3.4.22.71", "3.4.23.1", "3.4.23.2", "3.4.23.3", - "3.4.23.4", "3.4.23.5", "3.4.23.12", "3.4.23.15", "3.4.23.16", - "3.4.23.17", "3.4.23.18", "3.4.23.19", "3.4.23.20", "3.4.23.21", - "3.4.23.22", "3.4.23.23", "3.4.23.24", "3.4.23.25", "3.4.23.26", - "3.4.23.28", "3.4.23.29", "3.4.23.30", "3.4.23.31", "3.4.23.32", - "3.4.23.34", "3.4.23.35", "3.4.23.36", "3.4.23.38", "3.4.23.39", - "3.4.23.40", "3.4.23.41", "3.4.23.42", "3.4.23.43", "3.4.23.44", - "3.4.23.45", "3.4.23.46", "3.4.23.47", "3.4.23.48", "3.4.23.49", - "3.4.23.50", "3.4.23.51", "3.4.24.1", "3.4.24.3", "3.4.24.6", - "3.4.24.7", "3.4.24.11", "3.4.24.12", "3.4.24.13", "3.4.24.14", - "3.4.24.15", "3.4.24.16", "3.4.24.17", "3.4.24.18", "3.4.24.19", - "3.4.24.20", "3.4.24.21", "3.4.24.22", "3.4.24.23", "3.4.24.24", - "3.4.24.25", "3.4.24.26", "3.4.24.27", "3.4.24.28", "3.4.24.29", - "3.4.24.30", "3.4.24.31", "3.4.24.32", "3.4.24.33", "3.4.24.34", - "3.4.24.35", "3.4.24.36", "3.4.24.37", "3.4.24.38", "3.4.24.39", - "3.4.24.40", "3.4.24.41", "3.4.24.42", "3.4.24.43", "3.4.24.44", - "3.4.24.45", "3.4.24.46", "3.4.24.47", "3.4.24.48", "3.4.24.49", - "3.4.24.50", "3.4.24.51", "3.4.24.52", "3.4.24.53", "3.4.24.54", - "3.4.24.55", "3.4.24.56", "3.4.24.57", "3.4.24.58", "3.4.24.59", - "3.4.24.60", "3.4.24.61", "3.4.24.62", "3.4.24.63", "3.4.24.64", - "3.4.24.65", "3.4.24.66", "3.4.24.67", "3.4.24.68", "3.4.24.69", - "3.4.24.70", "3.4.24.71", "3.4.24.72", "3.4.24.73", "3.4.24.74", - "3.4.24.75", "3.4.24.76", "3.4.24.77", "3.4.24.78", "3.4.24.79", - "3.4.24.80", "3.4.24.81", "3.4.24.82", "3.4.24.83", "3.4.24.84", - "3.4.24.85", "3.4.24.86", "3.4.24.87", "3.4.25.1", "3.4.25.2", - "3.5.1.1", "3.5.1.2", "3.5.1.3", "3.5.1.4", "3.5.1.5", "3.5.1.6", - "3.5.1.7", "3.5.1.8", "3.5.1.9", "3.5.1.10", "3.5.1.11", "3.5.1.12", - "3.5.1.13", "3.5.1.14", "3.5.1.15", "3.5.1.16", "3.5.1.17", - "3.5.1.18", "3.5.1.19", "3.5.1.20", "3.5.1.21", "3.5.1.22", - "3.5.1.23", "3.5.1.24", "3.5.1.25", "3.5.1.26", "3.5.1.27", - "3.5.1.28", "3.5.1.29", "3.5.1.30", "3.5.1.31", "3.5.1.32", - "3.5.1.33", "3.5.1.35", "3.5.1.36", "3.5.1.38", "3.5.1.39", - "3.5.1.40", "3.5.1.41", "3.5.1.42", "3.5.1.43", "3.5.1.44", - "3.5.1.46", "3.5.1.47", "3.5.1.48", "3.5.1.49", "3.5.1.50", - "3.5.1.51", "3.5.1.52", "3.5.1.53", "3.5.1.54", "3.5.1.55", - "3.5.1.56", "3.5.1.57", "3.5.1.58", "3.5.1.59", "3.5.1.60", - "3.5.1.61", "3.5.1.62", "3.5.1.63", "3.5.1.64", "3.5.1.65", - "3.5.1.66", "3.5.1.67", "3.5.1.68", "3.5.1.69", "3.5.1.70", - "3.5.1.71", "3.5.1.72", "3.5.1.73", "3.5.1.74", "3.5.1.75", - "3.5.1.76", "3.5.1.77", "3.5.1.78", "3.5.1.79", "3.5.1.81", - "3.5.1.82", "3.5.1.83", "3.5.1.84", "3.5.1.85", "3.5.1.86", - "3.5.1.87", "3.5.1.88", "3.5.1.89", "3.5.1.90", "3.5.1.91", - "3.5.1.92", "3.5.1.93", "3.5.1.94", "3.5.1.95", "3.5.1.96", - "3.5.1.97", "3.5.1.98", "3.5.1.99", "3.5.1.100", "3.5.1.101", - "3.5.1.102", "3.5.1.103", "3.5.2.1", "3.5.2.2", "3.5.2.3", - "3.5.2.4", "3.5.2.5", "3.5.2.6", "3.5.2.7", "3.5.2.9", "3.5.2.10", - "3.5.2.11", "3.5.2.12", "3.5.2.13", "3.5.2.14", "3.5.2.15", - "3.5.2.16", "3.5.2.17", "3.5.2.18", "3.5.3.1", "3.5.3.2", "3.5.3.3", - "3.5.3.4", "3.5.3.5", "3.5.3.6", "3.5.3.7", "3.5.3.8", "3.5.3.9", - "3.5.3.10", "3.5.3.11", "3.5.3.12", "3.5.3.13", "3.5.3.14", - "3.5.3.15", "3.5.3.16", "3.5.3.17", "3.5.3.18", "3.5.3.19", - "3.5.3.20", "3.5.3.21", "3.5.3.22", "3.5.3.23", "3.5.4.1", - "3.5.4.2", "3.5.4.3", "3.5.4.4", "3.5.4.5", "3.5.4.6", "3.5.4.7", - "3.5.4.8", "3.5.4.9", "3.5.4.10", "3.5.4.11", "3.5.4.12", - "3.5.4.13", "3.5.4.14", "3.5.4.15", "3.5.4.16", "3.5.4.17", - "3.5.4.18", "3.5.4.19", "3.5.4.20", "3.5.4.21", "3.5.4.22", - "3.5.4.23", "3.5.4.24", "3.5.4.25", "3.5.4.26", "3.5.4.27", - "3.5.4.28", "3.5.4.29", "3.5.4.30", "3.5.5.1", "3.5.5.2", "3.5.5.4", - "3.5.5.5", "3.5.5.6", "3.5.5.7", "3.5.5.8", "3.5.99.1", "3.5.99.2", - "3.5.99.3", "3.5.99.4", "3.5.99.5", "3.5.99.6", "3.5.99.7", - "3.6.1.1", "3.6.1.2", "3.6.1.3", "3.6.1.5", "3.6.1.6", "3.6.1.7", - "3.6.1.8", "3.6.1.9", "3.6.1.10", "3.6.1.11", "3.6.1.12", - "3.6.1.13", "3.6.1.14", "3.6.1.15", "3.6.1.16", "3.6.1.17", - "3.6.1.18", "3.6.1.19", "3.6.1.20", "3.6.1.21", "3.6.1.22", - "3.6.1.23", "3.6.1.24", "3.6.1.25", "3.6.1.26", "3.6.1.27", - "3.6.1.28", "3.6.1.29", "3.6.1.30", "3.6.1.31", "3.6.1.39", - "3.6.1.40", "3.6.1.41", "3.6.1.42", "3.6.1.43", "3.6.1.44", - "3.6.1.45", "3.6.1.52", "3.6.1.53", "3.6.2.1", "3.6.2.2", "3.6.3.1", - "3.6.3.2", "3.6.3.3", "3.6.3.4", "3.6.3.5", "3.6.3.6", "3.6.3.7", - "3.6.3.8", "3.6.3.9", "3.6.3.10", "3.6.3.11", "3.6.3.12", - "3.6.3.14", "3.6.3.15", "3.6.3.16", "3.6.3.17", "3.6.3.18", - "3.6.3.19", "3.6.3.20", "3.6.3.21", "3.6.3.22", "3.6.3.23", - "3.6.3.24", "3.6.3.25", "3.6.3.26", "3.6.3.27", "3.6.3.28", - "3.6.3.29", "3.6.3.30", "3.6.3.31", "3.6.3.32", "3.6.3.33", - "3.6.3.34", "3.6.3.35", "3.6.3.36", "3.6.3.37", "3.6.3.38", - "3.6.3.39", "3.6.3.40", "3.6.3.41", "3.6.3.42", "3.6.3.43", - "3.6.3.44", "3.6.3.46", "3.6.3.47", "3.6.3.48", "3.6.3.49", - "3.6.3.50", "3.6.3.51", "3.6.3.52", "3.6.3.53", "3.6.4.1", - "3.6.4.2", "3.6.4.3", "3.6.4.4", "3.6.4.5", "3.6.4.6", "3.6.4.7", - "3.6.4.8", "3.6.4.9", "3.6.4.10", "3.6.4.11", "3.6.4.12", - "3.6.4.13", "3.6.5.1", "3.6.5.2", "3.6.5.3", "3.6.5.4", "3.6.5.5", - "3.6.5.6", "3.7.1.1", "3.7.1.2", "3.7.1.3", "3.7.1.4", "3.7.1.5", - "3.7.1.6", "3.7.1.7", "3.7.1.8", "3.7.1.9", "3.7.1.10", "3.7.1.11", - "3.8.1.1", "3.8.1.2", "3.8.1.3", "3.8.1.5", "3.8.1.6", "3.8.1.7", - "3.8.1.8", "3.8.1.9", "3.8.1.10", "3.8.1.11", "3.9.1.1", "3.10.1.1", - "3.10.1.2", "3.11.1.1", "3.11.1.2", "3.11.1.3", "3.12.1.1", - "3.13.1.1", "3.13.1.3", "4.1.1.1", "4.1.1.2", "4.1.1.3", "4.1.1.4", - "4.1.1.5", "4.1.1.6", "4.1.1.7", "4.1.1.8", "4.1.1.9", "4.1.1.11", - "4.1.1.12", "4.1.1.14", "4.1.1.15", "4.1.1.16", "4.1.1.17", - "4.1.1.18", "4.1.1.19", "4.1.1.20", "4.1.1.21", "4.1.1.22", - "4.1.1.23", "4.1.1.24", "4.1.1.25", "4.1.1.28", "4.1.1.29", - "4.1.1.30", "4.1.1.31", "4.1.1.32", "4.1.1.33", "4.1.1.34", - "4.1.1.35", "4.1.1.36", "4.1.1.37", "4.1.1.38", "4.1.1.39", - "4.1.1.40", "4.1.1.41", "4.1.1.42", "4.1.1.43", "4.1.1.44", - "4.1.1.45", "4.1.1.46", "4.1.1.47", "4.1.1.48", "4.1.1.49", - "4.1.1.50", "4.1.1.51", "4.1.1.52", "4.1.1.53", "4.1.1.54", - "4.1.1.55", "4.1.1.56", "4.1.1.57", "4.1.1.58", "4.1.1.59", - "4.1.1.60", "4.1.1.61", "4.1.1.62", "4.1.1.63", "4.1.1.64", - "4.1.1.65", "4.1.1.66", "4.1.1.67", "4.1.1.68", "4.1.1.69", - "4.1.1.70", "4.1.1.71", "4.1.1.72", "4.1.1.73", "4.1.1.74", - "4.1.1.75", "4.1.1.76", "4.1.1.77", "4.1.1.78", "4.1.1.79", - "4.1.1.80", "4.1.1.81", "4.1.1.82", "4.1.1.83", "4.1.1.84", - "4.1.1.85", "4.1.1.86", "4.1.1.87", "4.1.1.88", "4.1.1.89", - "4.1.1.90", "4.1.2.2", "4.1.2.4", "4.1.2.5", "4.1.2.8", "4.1.2.9", - "4.1.2.10", "4.1.2.11", "4.1.2.12", "4.1.2.13", "4.1.2.14", - "4.1.2.17", "4.1.2.18", "4.1.2.19", "4.1.2.20", "4.1.2.21", - "4.1.2.22", "4.1.2.23", "4.1.2.24", "4.1.2.25", "4.1.2.26", - "4.1.2.27", "4.1.2.28", "4.1.2.29", "4.1.2.30", "4.1.2.32", - "4.1.2.33", "4.1.2.34", "4.1.2.35", "4.1.2.36", "4.1.2.37", - "4.1.2.38", "4.1.2.40", "4.1.2.41", "4.1.2.42", "4.1.2.43", - "4.1.2.44", "4.1.2.45", "4.1.3.1", "4.1.3.3", "4.1.3.4", "4.1.3.6", - "4.1.3.13", "4.1.3.14", "4.1.3.16", "4.1.3.17", "4.1.3.22", - "4.1.3.24", "4.1.3.25", "4.1.3.26", "4.1.3.27", "4.1.3.30", - "4.1.3.32", "4.1.3.34", "4.1.3.35", "4.1.3.36", "4.1.3.38", - "4.1.3.39", "4.1.3.40", "4.1.99.1", "4.1.99.2", "4.1.99.3", - "4.1.99.5", "4.1.99.11", "4.1.99.12", "4.1.99.13", "4.1.99.14", - "4.1.99.15", "4.2.1.1", "4.2.1.2", "4.2.1.3", "4.2.1.4", "4.2.1.5", - "4.2.1.6", "4.2.1.7", "4.2.1.8", "4.2.1.9", "4.2.1.10", "4.2.1.11", - "4.2.1.12", "4.2.1.17", "4.2.1.18", "4.2.1.19", "4.2.1.20", - "4.2.1.22", "4.2.1.24", "4.2.1.25", "4.2.1.27", "4.2.1.28", - "4.2.1.30", "4.2.1.31", "4.2.1.32", "4.2.1.33", "4.2.1.34", - "4.2.1.35", "4.2.1.36", "4.2.1.39", "4.2.1.40", "4.2.1.41", - "4.2.1.42", "4.2.1.43", "4.2.1.44", "4.2.1.45", "4.2.1.46", - "4.2.1.47", "4.2.1.48", "4.2.1.49", "4.2.1.50", "4.2.1.51", - "4.2.1.52", "4.2.1.53", "4.2.1.54", "4.2.1.55", "4.2.1.56", - "4.2.1.57", "4.2.1.58", "4.2.1.59", "4.2.1.60", "4.2.1.61", - "4.2.1.62", "4.2.1.65", "4.2.1.66", "4.2.1.67", "4.2.1.68", - "4.2.1.69", "4.2.1.70", "4.2.1.73", "4.2.1.74", "4.2.1.75", - "4.2.1.76", "4.2.1.77", "4.2.1.78", "4.2.1.79", "4.2.1.80", - "4.2.1.81", "4.2.1.82", "4.2.1.83", "4.2.1.84", "4.2.1.85", - "4.2.1.87", "4.2.1.88", "4.2.1.89", "4.2.1.90", "4.2.1.91", - "4.2.1.92", "4.2.1.93", "4.2.1.94", "4.2.1.95", "4.2.1.96", - "4.2.1.97", "4.2.1.98", "4.2.1.99", "4.2.1.100", "4.2.1.101", - "4.2.1.103", "4.2.1.104", "4.2.1.105", "4.2.1.106", "4.2.1.107", - "4.2.1.108", "4.2.1.109", "4.2.1.110", "4.2.1.111", "4.2.1.112", - "4.2.1.113", "4.2.1.114", "4.2.1.115", "4.2.1.116", "4.2.1.117", - "4.2.1.118", "4.2.1.119", "4.2.1.120", "4.2.2.1", "4.2.2.2", - "4.2.2.3", "4.2.2.5", "4.2.2.6", "4.2.2.7", "4.2.2.8", "4.2.2.9", - "4.2.2.10", "4.2.2.11", "4.2.2.12", "4.2.2.13", "4.2.2.14", - "4.2.2.15", "4.2.2.16", "4.2.2.17", "4.2.2.18", "4.2.2.19", - "4.2.2.20", "4.2.2.21", "4.2.2.22", "4.2.3.1", "4.2.3.2", "4.2.3.3", - "4.2.3.4", "4.2.3.5", "4.2.3.6", "4.2.3.7", "4.2.3.8", "4.2.3.9", - "4.2.3.10", "4.2.3.11", "4.2.3.12", "4.2.3.13", "4.2.3.14", - "4.2.3.15", "4.2.3.16", "4.2.3.17", "4.2.3.18", "4.2.3.19", - "4.2.3.20", "4.2.3.21", "4.2.3.22", "4.2.3.23", "4.2.3.24", - "4.2.3.25", "4.2.3.26", "4.2.3.27", "4.2.3.28", "4.2.3.29", - "4.2.3.30", "4.2.3.31", "4.2.3.32", "4.2.3.33", "4.2.3.34", - "4.2.3.35", "4.2.3.36", "4.2.3.37", "4.2.3.38", "4.2.3.39", - "4.2.3.40", "4.2.3.41", "4.2.3.42", "4.2.3.43", "4.2.3.44", - "4.2.3.45", "4.2.99.12", "4.2.99.18", "4.2.99.20", "4.3.1.1", - "4.3.1.2", "4.3.1.3", "4.3.1.4", "4.3.1.6", "4.3.1.7", "4.3.1.9", - "4.3.1.10", "4.3.1.12", "4.3.1.13", "4.3.1.14", "4.3.1.15", - "4.3.1.16", "4.3.1.17", "4.3.1.18", "4.3.1.19", "4.3.1.20", - "4.3.1.22", "4.3.1.23", "4.3.1.24", "4.3.1.25", "4.3.1.26", - "4.3.2.1", "4.3.2.2", "4.3.2.3", "4.3.2.4", "4.3.2.5", "4.3.3.1", - "4.3.3.2", "4.3.3.3", "4.3.3.4", "4.3.3.5", "4.3.99.2", "4.4.1.1", - "4.4.1.2", "4.4.1.3", "4.4.1.4", "4.4.1.5", "4.4.1.6", "4.4.1.8", - "4.4.1.9", "4.4.1.10", "4.4.1.11", "4.4.1.13", "4.4.1.14", - "4.4.1.15", "4.4.1.16", "4.4.1.17", "4.4.1.19", "4.4.1.20", - "4.4.1.21", "4.4.1.22", "4.4.1.23", "4.4.1.24", "4.4.1.25", - "4.5.1.1", "4.5.1.2", "4.5.1.3", "4.5.1.4", "4.5.1.5", "4.6.1.1", - "4.6.1.2", "4.6.1.6", "4.6.1.12", "4.6.1.13", "4.6.1.14", - "4.6.1.15", "4.99.1.1", "4.99.1.2", "4.99.1.3", "4.99.1.4", - "4.99.1.5", "4.99.1.6", "4.99.1.7", "4.99.1.8", "5.1.1.1", - "5.1.1.2", "5.1.1.3", "5.1.1.4", "5.1.1.5", "5.1.1.6", "5.1.1.7", - "5.1.1.8", "5.1.1.9", "5.1.1.10", "5.1.1.11", "5.1.1.12", - "5.1.1.13", "5.1.1.14", "5.1.1.15", "5.1.1.16", "5.1.1.17", - "5.1.1.18", "5.1.2.1", "5.1.2.2", "5.1.2.3", "5.1.2.4", "5.1.2.5", - "5.1.2.6", "5.1.3.1", "5.1.3.2", "5.1.3.3", "5.1.3.4", "5.1.3.5", - "5.1.3.6", "5.1.3.7", "5.1.3.8", "5.1.3.9", "5.1.3.10", "5.1.3.11", - "5.1.3.12", "5.1.3.13", "5.1.3.14", "5.1.3.15", "5.1.3.16", - "5.1.3.17", "5.1.3.18", "5.1.3.19", "5.1.3.20", "5.1.3.21", - "5.1.3.22", "5.1.3.23", "5.1.99.1", "5.1.99.2", "5.1.99.3", - "5.1.99.4", "5.1.99.5", "5.2.1.1", "5.2.1.2", "5.2.1.3", "5.2.1.4", - "5.2.1.5", "5.2.1.6", "5.2.1.7", "5.2.1.8", "5.2.1.9", "5.2.1.10", - "5.3.1.1", "5.3.1.3", "5.3.1.4", "5.3.1.5", "5.3.1.6", "5.3.1.7", - "5.3.1.8", "5.3.1.9", "5.3.1.12", "5.3.1.13", "5.3.1.14", - "5.3.1.15", "5.3.1.16", "5.3.1.17", "5.3.1.20", "5.3.1.21", - "5.3.1.22", "5.3.1.23", "5.3.1.24", "5.3.1.25", "5.3.1.26", - "5.3.1.27", "5.3.2.1", "5.3.2.2", "5.3.3.1", "5.3.3.2", "5.3.3.3", - "5.3.3.4", "5.3.3.5", "5.3.3.6", "5.3.3.7", "5.3.3.8", "5.3.3.9", - "5.3.3.10", "5.3.3.11", "5.3.3.12", "5.3.3.13", "5.3.3.14", - "5.3.3.15", "5.3.4.1", "5.3.99.2", "5.3.99.3", "5.3.99.4", - "5.3.99.5", "5.3.99.6", "5.3.99.7", "5.3.99.8", "5.3.99.9", - "5.4.1.1", "5.4.1.2", "5.4.2.1", "5.4.2.2", "5.4.2.3", "5.4.2.4", - "5.4.2.5", "5.4.2.6", "5.4.2.7", "5.4.2.8", "5.4.2.9", "5.4.2.10", - "5.4.3.2", "5.4.3.3", "5.4.3.4", "5.4.3.5", "5.4.3.6", "5.4.3.7", - "5.4.3.8", "5.4.4.1", "5.4.4.2", "5.4.4.3", "5.4.99.1", "5.4.99.2", - "5.4.99.3", "5.4.99.4", "5.4.99.5", "5.4.99.7", "5.4.99.8", - "5.4.99.9", "5.4.99.11", "5.4.99.12", "5.4.99.13", "5.4.99.14", - "5.4.99.15", "5.4.99.16", "5.4.99.17", "5.4.99.18", "5.5.1.1", - "5.5.1.2", "5.5.1.3", "5.5.1.4", "5.5.1.5", "5.5.1.6", "5.5.1.7", - "5.5.1.8", "5.5.1.9", "5.5.1.10", "5.5.1.11", "5.5.1.12", - "5.5.1.13", "5.5.1.14", "5.5.1.15", "5.5.1.16", "5.99.1.1", - "5.99.1.2", "5.99.1.3", "5.99.1.4", "6.1.1.1", "6.1.1.2", "6.1.1.3", - "6.1.1.4", "6.1.1.5", "6.1.1.6", "6.1.1.7", "6.1.1.9", "6.1.1.10", - "6.1.1.11", "6.1.1.12", "6.1.1.13", "6.1.1.14", "6.1.1.15", - "6.1.1.16", "6.1.1.17", "6.1.1.18", "6.1.1.19", "6.1.1.20", - "6.1.1.21", "6.1.1.22", "6.1.1.23", "6.1.1.24", "6.1.1.25", - "6.1.1.26", "6.1.1.27", "6.2.1.1", "6.2.1.2", "6.2.1.3", "6.2.1.4", - "6.2.1.5", "6.2.1.6", "6.2.1.7", "6.2.1.8", "6.2.1.9", "6.2.1.10", - "6.2.1.11", "6.2.1.12", "6.2.1.13", "6.2.1.14", "6.2.1.15", - "6.2.1.16", "6.2.1.17", "6.2.1.18", "6.2.1.19", "6.2.1.20", - "6.2.1.22", "6.2.1.23", "6.2.1.24", "6.2.1.25", "6.2.1.26", - "6.2.1.27", "6.2.1.28", "6.2.1.30", "6.2.1.31", "6.2.1.32", - "6.2.1.33", "6.2.1.34", "6.2.1.35", "6.2.1.36", "6.3.1.1", - "6.3.1.2", "6.3.1.4", "6.3.1.5", "6.3.1.6", "6.3.1.7", "6.3.1.8", - "6.3.1.9", "6.3.1.10", "6.3.1.11", "6.3.1.12", "6.3.1.13", - "6.3.2.1", "6.3.2.2", "6.3.2.3", "6.3.2.4", "6.3.2.5", "6.3.2.6", - "6.3.2.7", "6.3.2.8", "6.3.2.9", "6.3.2.10", "6.3.2.11", "6.3.2.12", - "6.3.2.13", "6.3.2.14", "6.3.2.16", "6.3.2.17", "6.3.2.18", - "6.3.2.19", "6.3.2.20", "6.3.2.21", "6.3.2.22", "6.3.2.23", - "6.3.2.24", "6.3.2.25", "6.3.2.26", "6.3.2.27", "6.3.2.28", - "6.3.2.29", "6.3.2.30", "6.3.2.31", "6.3.2.32", "6.3.2.33", - "6.3.2.34", "6.3.3.1", "6.3.3.2", "6.3.3.3", "6.3.3.4", "6.3.4.1", - "6.3.4.2", "6.3.4.3", "6.3.4.4", "6.3.4.5", "6.3.4.6", "6.3.4.7", - "6.3.4.8", "6.3.4.9", "6.3.4.10", "6.3.4.11", "6.3.4.12", - "6.3.4.13", "6.3.4.14", "6.3.4.15", "6.3.4.16", "6.3.4.17", - "6.3.4.18", "6.3.5.1", "6.3.5.2", "6.3.5.3", "6.3.5.4", "6.3.5.5", - "6.3.5.6", "6.3.5.7", "6.3.5.9", "6.3.5.10", "6.4.1.1", "6.4.1.2", - "6.4.1.3", "6.4.1.4", "6.4.1.5", "6.4.1.6", "6.4.1.7", "6.5.1.1", - "6.5.1.2", "6.5.1.3", "6.5.1.4", "6.6.1.1", "6.6.1.2", - NULL -}; - NLM_EXTERN Boolean LookForECnumberPattern (CharPtr str) { @@ -17615,7 +18702,7 @@ static Boolean ValidateECnumber (CharPtr str) NLM_EXTERN void ECNumberFSAFreeAll (void) { - CtSetPtr csp; + CtrySetPtr ctsp; TextFsaPtr fsa; fsa = (TextFsaPtr) GetAppProperty ("SpecificECNumberFSA"); @@ -17648,17 +18735,23 @@ NLM_EXTERN void ECNumberFSAFreeAll (void) TextFsaFree (fsa); } - csp = (CtSetPtr) GetAppProperty ("CountryLatLonList"); - if (csp != NULL) { - SetAppProperty ("CountryLatLonList", NULL); - CtSetDataFree (csp); + ctsp = (CtrySetPtr) GetAppProperty ("CountryLatLonData"); + if (ctsp != NULL) { + SetAppProperty ("CountryLatLonData", NULL); + FreeLatLonCountryData (ctsp); + } + + ctsp = (CtrySetPtr) GetAppProperty ("WaterLatLonData"); + if (ctsp != NULL) { + SetAppProperty ("WaterLatLonData", NULL); + FreeLatLonCountryData (ctsp); } ic_code_data = MemFree (ic_code_data); ic_code_list = ValNodeFreeData (ic_code_list); } -static TextFsaPtr GetECNumberFSA (CharPtr prop, CharPtr file, CharPtr PNTR local, Boolean trimAtTab) +static TextFsaPtr GetECNumberFSA (CharPtr prop, CharPtr file, CharPtr PNTR local, size_t numitems, Boolean trimAtTab) { FileCache fc; @@ -17707,7 +18800,7 @@ static TextFsaPtr GetECNumberFSA (CharPtr prop, CharPtr file, CharPtr PNTR local } } else if (local != NULL) { - for (i = 0; local [i] != NULL; i++) { + for (i = 0; /* local [i] != NULL */ i < numitems; i++) { str = local [i]; if (StringDoesHaveText (str)) { if (StringLen (str) + 3 < sizeof (tmp)) { @@ -17733,25 +18826,25 @@ static TextFsaPtr GetECNumberFSA (CharPtr prop, CharPtr file, CharPtr PNTR local static TextFsaPtr GetSpecificECNumberFSA (void) { - return (GetECNumberFSA ("SpecificECNumberFSA", "ecnum_specific.txt", ecnum_specif, FALSE)); + return (GetECNumberFSA ("SpecificECNumberFSA", "ecnum_specific.txt", (CharPtr PNTR) kECNum_specific, sizeof (kECNum_specific) / sizeof (char*), TRUE)); } static TextFsaPtr GetAmbiguousECNumberFSA (void) { - return (GetECNumberFSA ("AmbiguousECNumberFSA", "ecnum_ambiguous.txt", ecnum_ambig, FALSE)); + return (GetECNumberFSA ("AmbiguousECNumberFSA", "ecnum_ambiguous.txt", (CharPtr PNTR) kECNum_ambiguous, sizeof (kECNum_ambiguous) / sizeof (char*), TRUE)); } static TextFsaPtr GetDeletedECNumberFSA (void) { - return (GetECNumberFSA ("DeletedECNumberFSA", "ecnum_deleted.txt", NULL, FALSE)); + return (GetECNumberFSA ("DeletedECNumberFSA", "ecnum_deleted.txt", (CharPtr PNTR) kECNum_deleted, sizeof (kECNum_deleted) / sizeof (char*), TRUE)); } static TextFsaPtr GetReplacedECNumberFSA (void) { - return (GetECNumberFSA ("ReplacedEECNumberFSA", "ecnum_replaced.txt", NULL, TRUE)); + return (GetECNumberFSA ("ReplacedEECNumberFSA", "ecnum_replaced.txt", (CharPtr PNTR) kECNum_replaced, sizeof (kECNum_replaced) / sizeof (char*), TRUE)); } static Boolean ECnumberNotInList (CharPtr str) @@ -17933,7 +19026,7 @@ static void ValidateRptUnit (ValidStructPtr vsp, GatherContextPtr gcp, SeqFeatPt fto = ftmp; } if (from < ffrom || from > fto || to < ffrom || to > fto) { - ValidErr (vsp, SEV_WARNING, ERR_SEQ_FEAT_InvalidQualifierValue, "/rpt_unit_range is not within sequence length"); + ValidErr (vsp, SEV_WARNING, ERR_SEQ_FEAT_RptUnitRangeProblem, "/rpt_unit_range is not within sequence length"); } } } @@ -17959,6 +19052,224 @@ static void ValidateRptUnit (ValidStructPtr vsp, GatherContextPtr gcp, SeqFeatPt } } + +static Boolean IsGbIndexQualPairValid (Int2 index, Int2 val) +{ + Int2 i; + Boolean found = FALSE; + + for (i = 0; i < ParFlat_GBFeat[index].opt_num && !found; i++) { + if (ParFlat_GBFeat[index].opt_qual[i] == val) { + found = TRUE; + } + } + for (i = 0; i < ParFlat_GBFeat[index].mand_num && !found; i++) { + if (ParFlat_GBFeat[index].mand_qual[i] == val) { + found = TRUE; + } + } + return found; +} + + +NLM_EXTERN CharPtr GetGBFeatKeyForFeature (SeqFeatPtr sfp) +{ + CharPtr key = NULL; + ImpFeatPtr ifp; + + if (sfp == NULL) { + return NULL; + } + + if (sfp->data.choice == SEQFEAT_IMP) { + ifp = (ImpFeatPtr) sfp->data.value.ptrvalue; + if (StringCmp (ifp->key, "-") == 0) { + key = StringSave ("misc_feature"); + } else { + key = StringSaveNoNull (ifp->key); + } + } else { + key = StringSaveNoNull (FeatDefTypeLabel (sfp)); + if (StringCmp (key, "Gene") == 0) { + *key = 'g'; + } else if (StringCmp (key, "preRNA") == 0) { + key = MemFree (key); + key = StringSave ("precursor_RNA"); + } + } + return key; +} + + +NLM_EXTERN Boolean ShouldSuppressGBQual(Uint1 subtype, CharPtr qual_name) +{ + if (StringHasNoText (qual_name)) { + return FALSE; + } + + /* always suppress experiment and inference quals */ + if (StringCmp (qual_name, "experiment") == 0 || StringCmp (qual_name, "inference") == 0) { + return TRUE; + } + + if (subtype == FEATDEF_ncRNA) { + if (StringCmp (qual_name, "product") == 0 + || StringCmp (qual_name, "ncRNA_class") == 0) { + return TRUE; + } + } else if (subtype == FEATDEF_tmRNA) { + if (StringCmp (qual_name, "product") == 0 + || StringCmp (qual_name, "tag_peptide") == 0) { + return TRUE; + } + } else if (subtype == FEATDEF_otherRNA) { + if (StringCmp (qual_name, "product") == 0) { + return TRUE; + } + } + + return FALSE; +} + + +NLM_EXTERN Boolean ShouldBeAGBQual (Uint1 subtype, Int2 qual, Boolean allowProductGBQual) + +{ + if (qual < 0) return FALSE; + if (allowProductGBQual && qual == GBQUAL_product) return TRUE; + if (qual == GBQUAL_citation || + qual == GBQUAL_db_xref || + qual == GBQUAL_evidence || + qual == GBQUAL_exception || + qual == GBQUAL_gene || + qual == GBQUAL_gene_synonym || + qual == GBQUAL_insertion_seq || + qual == GBQUAL_label || + qual == GBQUAL_locus_tag || + qual == GBQUAL_non_functional || + qual == GBQUAL_note || + qual == GBQUAL_partial || + qual == GBQUAL_product || + qual == GBQUAL_pseudo || + qual == GBQUAL_pseudogene || + qual == GBQUAL_rpt_unit || + qual == GBQUAL_transposon || + qual == GBQUAL_experiment || + qual == GBQUAL_trans_splicing || + qual == GBQUAL_ribosomal_slippage || + qual == GBQUAL_standard_name || + qual == GBQUAL_inference) + { + return FALSE; + } + if (subtype == FEATDEF_CDS) + { + if (qual == GBQUAL_codon_start + || qual == GBQUAL_codon + || qual == GBQUAL_EC_number + || qual == GBQUAL_gdb_xref + || qual == GBQUAL_number + || qual == GBQUAL_protein_id + || qual == GBQUAL_transl_except + || qual == GBQUAL_transl_table + || qual == GBQUAL_translation + || qual == GBQUAL_allele + || qual == GBQUAL_function + || qual == GBQUAL_old_locus_tag) + { + return FALSE; + } + } + if (qual == GBQUAL_map && subtype != FEATDEF_ANY && subtype != FEATDEF_repeat_region && subtype != FEATDEF_gap) return FALSE; + if (qual == GBQUAL_operon && subtype != FEATDEF_ANY && subtype != FEATDEF_operon) return FALSE; + if (Nlm_GetAppProperty ("SequinUseEMBLFeatures") == NULL) + { + if (qual == GBQUAL_usedin) + { + return FALSE; + } + } + + if (qual > -1 && ShouldSuppressGBQual (subtype, ParFlat_GBQual_names [qual].name)) { + return FALSE; + } + + return TRUE; +} + + +static CharPtr sWrongQualReasons[] = { + "conflicting codon_start values", + "codon_start value should be 1, 2, or 3" +}; + +typedef enum { + eWrongQualReason_conflicting_codon_start = 0, + eWrongQualReason_bad_codon_start_value +} EWrongQualReason; + +/* + * Return values: + * 1: yes + * 0: no + * -1: don't know + * 2: no for special reasons + */ +NLM_EXTERN Int4 IsQualValidForFeature (GBQualPtr gbqual, SeqFeatPtr sfp) +{ + CharPtr key = NULL; + Int2 val; + Int4 rval = -1; + Int2 index; + CdRegionPtr crp; + + if (sfp == NULL || gbqual == NULL) { + return -1; + } + + key = GetGBFeatKeyForFeature (sfp); + index = GBFeatKeyNameValid (&key, FALSE); + key = MemFree (key); + + if (index == -1) { + /* unknown */ + rval = -1; + } else if (StringCmp (gbqual->qual, "gsdb_id") == 0) { + /* force good */ + rval = 1; + } else if (sfp->data.choice == SEQFEAT_GENE && + (StringCmp (gbqual->qual, "gen_map") == 0 || + StringCmp (gbqual->qual, "cyt_map") == 0 || + StringCmp (gbqual->qual, "rad_map") == 0)) { + rval = 1; + } else if (sfp->data.choice == SEQFEAT_CDREGION + && StringCmp (gbqual->qual, "orig_transcript_id") == 0) { + rval = 1; + } else if (sfp->data.choice == SEQFEAT_RNA && + (StringCmp (gbqual->qual, "orig_protein_id") == 0 || + StringCmp (gbqual->qual, "orig_transcript_id") == 0)) { + rval = 1; + } else if ((val = GBQualNameValid (gbqual->qual)) == -1) { + rval = -1; + } else if (sfp->data.choice == SEQFEAT_CDREGION + && val == GBQUAL_codon_start) { + crp = (CdRegionPtr) sfp->data.value.ptrvalue; + if (crp != NULL) { + if (crp->frame > 0) { + rval = eWrongQualReason_conflicting_codon_start + 2; + } else { + rval = eWrongQualReason_bad_codon_start_value + 2; + } + } + } else if (IsGbIndexQualPairValid (index, val)) { + rval = 1; + } else { + rval = 0; + } + return rval; +} + + static void ValidateImpFeat (ValidStructPtr vsp, GatherContextPtr gcp, SeqFeatPtr sfp, ImpFeatPtr ifp) { @@ -17992,6 +19303,7 @@ static void ValidateImpFeat (ValidStructPtr vsp, GatherContextPtr gcp, SeqFeatPt CharPtr str; CharPtr tmp; Int2 val; + Int4 qvalid; if (vsp == NULL || gcp == NULL || sfp == NULL || ifp == NULL) return; @@ -18092,8 +19404,16 @@ static void ValidateImpFeat (ValidStructPtr vsp, GatherContextPtr gcp, SeqFeatPt } } } + if (StringHasNoText(sfp->comment) && sfp->qual == NULL && sfp->dbxref == NULL) { + ValidErr (vsp, SEV_WARNING, ERR_SEQ_FEAT_NeedsNote, "A note or other qualifier is required for a misc_feature"); + } } for (gbqual = sfp->qual; gbqual != NULL; gbqual = gbqual->next) { + qvalid = IsQualValidForFeature (gbqual, sfp); + if (qvalid == 0) { + ValidErr (vsp, SEV_WARNING, ERR_SEQ_FEAT_WrongQualOnImpFeat, "Wrong qualifier %s for feature %s", gbqual->qual, key); + } + if (StringCmp (gbqual->qual, "gsdb_id") == 0) { continue; } @@ -18105,26 +19425,6 @@ static void ValidateImpFeat (ValidStructPtr vsp, GatherContextPtr gcp, SeqFeatPt ValidErr (vsp, SEV_WARNING, ERR_SEQ_FEAT_UnknownImpFeatQual, "NULL qualifier"); } } else if (index != -1) { - found = FALSE; - for (i = 0; i < ParFlat_GBFeat[index].opt_num; i++) { - qual = ParFlat_GBFeat[index].opt_qual[i]; - if (qual == val) { - found = TRUE; - break; - } - } - if (!found) { - for (i = 0; i < ParFlat_GBFeat[index].mand_num; i++) { - qual = ParFlat_GBFeat[index].mand_qual[i]; - if (qual == val) { - found = TRUE; - break; - } - } - if (!found) { - ValidErr (vsp, SEV_WARNING, ERR_SEQ_FEAT_WrongQualOnImpFeat, "Wrong qualifier %s for feature %s", gbqual->qual, key); - } - } if (gbqual->val != NULL) { if (val == GBQUAL_rpt_type) { failed = FALSE; @@ -18245,7 +19545,7 @@ static void ValidateImpFeat (ValidStructPtr vsp, GatherContextPtr gcp, SeqFeatPt if (!found) { ValidErr (vsp, SEV_ERROR, ERR_SEQ_FEAT_InvalidQualifierValue, "%s is not a legal value for qualifier %s", gbqual->val, gbqual->qual); } - } else if (val == GBQUAL_mobile_element) { + } else if (val == GBQUAL_mobile_element_type) { found = FALSE; str = NULL; for (i = 0; legal_mobile_element_strings[i] != NULL; i++) { @@ -18383,6 +19683,7 @@ static void ValidateNonImpFeat (ValidStructPtr vsp, GatherContextPtr gcp, SeqFea CharPtr str; CharPtr tmp; Int2 val; + Int4 qvalid; if (vsp == NULL || gcp == NULL || sfp == NULL) return; @@ -18395,6 +19696,13 @@ static void ValidateNonImpFeat (ValidStructPtr vsp, GatherContextPtr gcp, SeqFea } index = GBFeatKeyNameValid (&key, FALSE); for (gbqual = sfp->qual; gbqual != NULL; gbqual = gbqual->next) { + qvalid = IsQualValidForFeature (gbqual, sfp); + if (qvalid == 0) { + ValidErr (vsp, SEV_WARNING, ERR_SEQ_FEAT_WrongQualOnFeature, "Wrong qualifier %s for feature %s", gbqual->qual, key); + } else if (qvalid > 1) { + ValidErr (vsp, SEV_WARNING, ERR_SEQ_FEAT_WrongQualOnFeature, sWrongQualReasons[qvalid - 2]); + } + if (StringCmp (gbqual->qual, "gsdb_id") == 0) { continue; } @@ -18406,31 +19714,18 @@ static void ValidateNonImpFeat (ValidStructPtr vsp, GatherContextPtr gcp, SeqFea if (StringCmp (gbqual->qual, "cyt_map") == 0) continue; if (StringCmp (gbqual->qual, "rad_map") == 0) continue; } + if (sfp->data.choice == SEQFEAT_CDREGION) { + if (StringCmp (gbqual->qual, "orig_transcript_id") == 0) continue; + } + if (sfp->data.choice == SEQFEAT_RNA) { + if (StringCmp (gbqual->qual, "orig_protein_id") == 0) continue; + if (StringCmp (gbqual->qual, "orig_transcript_id") == 0) continue; + } ValidErr (vsp, SEV_WARNING, ERR_SEQ_FEAT_UnknownFeatureQual, "Unknown qualifier %s", gbqual->qual); } else { ValidErr (vsp, SEV_WARNING, ERR_SEQ_FEAT_UnknownFeatureQual, "NULL qualifier"); } } else if (index != -1) { - found = FALSE; - for (i = 0; i < ParFlat_GBFeat[index].opt_num; i++) { - qual = ParFlat_GBFeat[index].opt_qual[i]; - if (qual == val) { - found = TRUE; - break; - } - } - if (!found) { - for (i = 0; i < ParFlat_GBFeat[index].mand_num; i++) { - qual = ParFlat_GBFeat[index].mand_qual[i]; - if (qual == val) { - found = TRUE; - break; - } - } - if (!found) { - ValidErr (vsp, SEV_WARNING, ERR_SEQ_FEAT_WrongQualOnFeature, "Wrong qualifier %s for feature %s", gbqual->qual, key); - } - } if (gbqual->val != NULL) { if (val == GBQUAL_rpt_type) { failed = FALSE; @@ -18674,6 +19969,7 @@ static Boolean PartialAtSpliceSiteOrGap (ValidStructPtr vsp, SeqLocPtr head, Uin ValidErr (vsp, SEV_WARNING, ERR_SEQ_FEAT_Range, "Unable to check splice consensus because feature outside range of sequence"); */ + BioseqUnlock (bsp); return FALSE; } @@ -18940,6 +20236,7 @@ static Boolean TwoListsHaveCommonItem ( return FALSE; } + static void CheckTrnaCodons ( ValidStructPtr vsp, GatherContextPtr gcp, @@ -18950,15 +20247,12 @@ static void CheckTrnaCodons ( { Uint1 aa = 0; Uint1 anticodon [4]; - BioseqPtr bsp; Char ch; Int2 code = 0; CharPtr codes = NULL; Uint1 codon [4]; CharPtr complementBase = " TVGH CD M KN YSAABW R "; - Uint1 from; CharPtr gen_code_name = NULL; - GeneticCodePtr gncp; Int2 i; Uint2 idx; Uint1 index; @@ -18972,7 +20266,6 @@ static void CheckTrnaCodons ( StreamCache sc; ErrSev sev = SEV_ERROR; SeqLocPtr slp; - SeqMapTablePtr smtp; CharPtr str; Uint1 taa; CharPtr three_letter_aa = NULL; @@ -18986,35 +20279,7 @@ static void CheckTrnaCodons ( /* extract indicated amino acid */ - aa = 0; - if (trp->aatype == 2) { - aa = trp->aa; - } else { - from = 0; - switch (trp->aatype) { - case 0: - from = 0; - break; - case 1: - from = Seq_code_iupacaa; - break; - case 2: - from = Seq_code_ncbieaa; - break; - case 3: - from = Seq_code_ncbi8aa; - break; - case 4: - from = Seq_code_ncbistdaa; - break; - default: - break; - } - smtp = SeqMapTableFind (Seq_code_ncbieaa, from); - if (smtp != NULL) { - aa = SeqMapTableConvert (smtp, trp->aa); - } - } + aa = GetAaFromtRNA (trp); three_letter_aa = Get3LetterSymbol (NULL, Seq_code_ncbieaa, NULL, aa); if (StringHasNoText (three_letter_aa)) { @@ -19022,22 +20287,8 @@ static void CheckTrnaCodons ( } /* find genetic code table */ + codes = GetCodesFortRNA(sfp, &code); - bsp = GetBioseqGivenSeqLoc (sfp->location, gcp->entityID); - BioseqToGeneticCode (bsp, &code, NULL, NULL, NULL, 0, NULL); - - gncp = GeneticCodeFind (code, NULL); - if (gncp == NULL) { - gncp = GeneticCodeFind (1, NULL); - code = 1; - } - if (gncp == NULL) return; - - for (vnp = (ValNodePtr) gncp->data.ptrvalue; vnp != NULL; vnp = vnp->next) { - if (vnp->choice != 3) continue; - codes = (CharPtr) vnp->data.ptrvalue; - break; - } if (codes == NULL) return; for (vnp = genetic_code_name_list; vnp != NULL; vnp = vnp->next) { @@ -19413,17 +20664,35 @@ static void CheckCDSPartial (ValidStructPtr vsp, SeqFeatPtr sfp) Boolean partial3; SeqDescrPtr sdp; ErrSev sev; + Boolean need_to_unlock = FALSE; if (vsp == NULL || sfp == NULL) return; if (sfp->product == NULL) return; if (!vsp->useSeqMgrIndexes) return; bsp = BioseqFindFromSeqLoc (sfp->product); + if (bsp == NULL && vsp->farFetchCDSproducts) { + bsp = BioseqLockById (SeqLocId(sfp->product)); + if (bsp != NULL) { + need_to_unlock = TRUE; + } + } if (bsp == NULL) return; sdp = SeqMgrGetNextDescriptor (bsp, NULL, Seq_descr_molinfo, &context); - if (sdp == NULL) return; + if (sdp == NULL) { + if (need_to_unlock) { + BioseqUnlock(bsp); + } + return; + } mip = (MolInfoPtr) sdp->data.ptrvalue; - if (mip == NULL) return; + if (mip == NULL) { + if (need_to_unlock) { + BioseqUnlock (bsp); + } + return; + } CheckSeqLocForPartial (sfp->location, &partial5, &partial3); + switch (mip->completeness) { case 0 : /* unknown */ break; @@ -19483,6 +20752,9 @@ static void CheckCDSPartial (ValidStructPtr vsp, SeqFeatPtr sfp) default : break; } + if (need_to_unlock) { + BioseqUnlock (bsp); + } } static void CheckForCommonCDSProduct (ValidStructPtr vsp, SeqFeatPtr sfp) @@ -19509,6 +20781,7 @@ static void CheckForCommonCDSProduct (ValidStructPtr vsp, SeqFeatPtr sfp) crp = (CdRegionPtr) sfp->data.value.ptrvalue; if (crp != NULL && crp->orf) return; + grp = SeqMgrGetGeneXref (sfp); if (grp == NULL || (!SeqMgrGeneIsSuppressed (grp))) { gene = SeqMgrGetOverlappingGene (sfp->location, NULL); @@ -20970,7 +22243,7 @@ static ValNodePtr ValidateGoTermQualifier ( ValidErr (vsp, SEV_WARNING, ERR_SEQ_FEAT_BadGeneOntologyFormat, "Bad data format for GO term qualifier PMID"); } break; - case 4 : + case 5 : if (ufp->choice == 1) { evidence = (CharPtr) ufp->data.ptrvalue; } else { @@ -21778,10 +23051,94 @@ static void ValidateRna (SeqFeatPtr sfp, ValidStructPtr vsp, GatherContextPtr gc } +NLM_EXTERN Boolean IsGeneXrefRedundant (SeqFeatPtr sfp) +{ + GeneRefPtr grp; + SeqFeatPtr sfpx; + GeneRefPtr grpx; + Boolean redundantgenexref = FALSE; + CharPtr syn1, syn2; + DummySmfeData dsd; + Int2 count; + SeqMgrFeatContext fcontext; + + grp = SeqMgrGetGeneXref (sfp); + if (grp == NULL) { + return FALSE; + } + if (grp != NULL && SeqMgrGeneIsSuppressed (grp)) return FALSE; + + sfpx = SeqMgrGetOverlappingGene (sfp->location, &fcontext); + if (sfpx == NULL || sfpx->data.choice != SEQFEAT_GENE) + return FALSE; + grpx = (GeneRefPtr) sfpx->data.value.ptrvalue; + if (grpx == NULL) + return FALSE; + + if (StringDoesHaveText (grp->locus_tag) && StringDoesHaveText (grp->locus_tag)) { + if (StringICmp (grp->locus_tag, grpx->locus_tag) == 0) { + redundantgenexref = TRUE; + } + } else if (StringDoesHaveText (grp->locus) && StringDoesHaveText (grp->locus)) { + if (StringICmp (grp->locus, grpx->locus) == 0) { + redundantgenexref = TRUE; + } + } else if (grp->syn != NULL && grpx->syn != NULL) { + syn1 = (CharPtr) grp->syn->data.ptrvalue; + syn2 = (CharPtr) grpx->syn->data.ptrvalue; + if ((StringDoesHaveText (syn1)) && StringDoesHaveText (syn2)) { + if (StringICmp (syn1, syn2) == 0) { + redundantgenexref = TRUE; + } + } + } + if (redundantgenexref) { + MemSet ((Pointer) &dsd, 0, sizeof (DummySmfeData)); + dsd.max = INT4_MAX; + dsd.num_at_max = 0; + dsd.equivalent_genes = FALSE; + dsd.grp_at_max = NULL; + count = SeqMgrGetAllOverlappingFeatures (sfp->location, FEATDEF_GENE, NULL, 0, + LOCATION_SUBSET, (Pointer) &dsd, DummySMFEProc); + if (dsd.num_at_max > 1) { + redundantgenexref = FALSE; + } + } + return redundantgenexref; +} + + +static void CheckCodingRegionAndProteinFeaturePartials (SeqFeatPtr cds, ValidStructPtr vsp) +{ + BioseqPtr protbsp; + SeqFeatPtr prot; + SeqMgrFeatContext context; + Boolean cds_partial5, cds_partial3, prot_partial5, prot_partial3; + + if (cds == NULL || cds->data.choice != SEQFEAT_CDREGION || vsp == NULL) { + return; + } + + protbsp = BioseqFindFromSeqLoc (cds->product); + if (protbsp == NULL) { + return; + } + prot = SeqMgrGetNextFeature (protbsp, NULL, 0, FEATDEF_PROT, &context); + if (prot == NULL) { + return; + } + CheckSeqLocForPartial (cds->location, &cds_partial5, &cds_partial3); + CheckSeqLocForPartial (prot->location, &prot_partial5, &prot_partial3); + if ((cds_partial5 && !prot_partial5) || (!cds_partial5 && prot_partial5) + || (cds_partial3 && !prot_partial3) || (!cds_partial3 && prot_partial3)) { + ValidErr (vsp, SEV_WARNING, ERR_SEQ_FEAT_PartialsInconsistent, "Coding region and protein feature partials conflict"); + } +} + + NLM_EXTERN void ValidateSeqFeat (GatherContextPtr gcp) { Int2 type, i, j; - static char *parterr[2] = { "PartialProduct", "PartialLocation" }; static char *parterrs[4] = { "Start does not include first/last residue of sequence", "Stop does not include first/last residue of sequence", @@ -21826,7 +23183,7 @@ NLM_EXTERN void ValidateSeqFeat (GatherContextPtr gcp) SeqFeatPtr sfpx = NULL, sfpy = NULL, prt; SeqFeatPtr operon; Boolean redundantgenexref; - SeqMgrFeatContext fcontext; + SeqMgrFeatContext fcontext, gcontext; CharPtr syn1, syn2, label = NULL, genexref_label; Uint2 oldEntityID; Uint4 oldItemID; @@ -21862,10 +23219,8 @@ NLM_EXTERN void ValidateSeqFeat (GatherContextPtr gcp) Boolean farFetchProd; Boolean skip; Boolean is_nc = FALSE; - Boolean no_nonconsensus_except = TRUE; VariationRefPtr vrfp; - vsp = (ValidStructPtr) (gcp->userdata); sfp = (SeqFeatPtr) (gcp->thisitem); vsp->descr = NULL; @@ -21902,14 +23257,7 @@ NLM_EXTERN void ValidateSeqFeat (GatherContextPtr gcp) partials[0] = SeqLocPartialCheckEx (sfp->product, farFetchProd); partials[1] = SeqLocPartialCheck (sfp->location); - if (sfp->excpt) { - if (StringISearch (sfp->except_text, "nonconsensus splice site") != NULL || - StringISearch (sfp->except_text, "heterogeneous population sequenced") != NULL || - StringISearch (sfp->except_text, "low-quality sequence region") != NULL || - StringISearch (sfp->except_text, "artificial location") != NULL) { - no_nonconsensus_except = FALSE; - } - } + CheckCodingRegionAndProteinFeaturePartials (sfp, vsp); if ((partials[0] != SLP_COMPLETE) || (partials[1] != SLP_COMPLETE) || (sfp->partial)) { /* partialness */ /* a feature on a partial sequence should be partial -- if often isn't */ @@ -21919,11 +23267,7 @@ NLM_EXTERN void ValidateSeqFeat (GatherContextPtr gcp) /* a partial feature, with complete location, but partial product */ else if ((sfp->partial) && (sfp->product != NULL) && (partials[1] == SLP_COMPLETE) && (sfp->product->choice == SEQLOC_WHOLE) && (partials[0] != SLP_COMPLETE)) { - if (vsp->is_gpipe_in_sep && vsp->bsp_genomic_in_sep) { - /* skip in gpipe genomic */ - } else { - ValidErr (vsp, SEV_WARNING, ERR_SEQ_FEAT_PartialProblem, "When SeqFeat.product is a partial Bioseq, SeqFeat.location should also be partial"); - } + ValidErr (vsp, SEV_WARNING, ERR_SEQ_FEAT_PartialProblem, "When SeqFeat.product is a partial Bioseq, SeqFeat.location should also be partial"); } /* gene on segmented set is now 'order', should also be partial */ else if (type == SEQFEAT_GENE && sfp->product == NULL && partials[1] == SLP_INTERNAL) { @@ -21959,8 +23303,6 @@ NLM_EXTERN void ValidateSeqFeat (GatherContextPtr gcp) tmp = StringMove (tmp, "FALSE"); if (bsp == NULL && LocationIsFar (sfp->product) && NoFetchFunctions ()) { vsp->far_fetch_failure = TRUE; - } else if (vsp->is_gpipe_in_sep && vsp->bsp_genomic_in_sep) { - /* ignore inconsistent partial warnings in genomic gpipe sequence */ } else { ValidErr (vsp, sev, ERR_SEQ_FEAT_PartialsInconsistent, buf); } @@ -21984,11 +23326,7 @@ NLM_EXTERN void ValidateSeqFeat (GatherContextPtr gcp) tmp = StringMove (tmp, "TRUE"); else tmp = StringMove (tmp, "FALSE"); - if (vsp->is_gpipe_in_sep && vsp->bsp_genomic_in_sep) { - /* ignore inconsistent partial warnings in genomic gpipe sequence */ - } else { - ValidErr (vsp, SEV_WARNING, ERR_SEQ_FEAT_PartialsInconsistent, buf); - } + ValidErr (vsp, SEV_WARNING, ERR_SEQ_FEAT_PartialsInconsistent, buf); } /* 5' or 3' partial location giving unclassified partial product */ else if (((partials [1] & SLP_START) != 0 || ((partials [1] & SLP_STOP) != 0)) && ((partials [0] & SLP_OTHER) != 0) && sfp->partial) { @@ -21996,40 +23334,67 @@ NLM_EXTERN void ValidateSeqFeat (GatherContextPtr gcp) } /* may have other error bits set as well */ - for (i = 0; i < 2; i++) { - errtype = SLP_NOSTART; - for (j = 0; j < 4; j++) { - bypassGeneTest = FALSE; - if (partials[i] & errtype) { - if (i == 1 && j < 2 && IsCddFeat (sfp)) { - /* suppresses warning */ - } else if (i == 1 && j < 2 && sfp->data.choice == SEQFEAT_GENE && SameAsCDS (sfp, errtype, NULL)) { - /* - ValidErr (vsp, SEV_INFO, ERR_SEQ_FEAT_PartialProblem, - "%s: %s", - parterr[i], parterrs[j]); - */ - } else if (i == 1 && j < 2 && sfp->data.choice == SEQFEAT_GENE && SameAsMRNA (sfp, errtype)) { - } else if (i == 1 && j < 2 && sfp->idx.subtype == FEATDEF_mRNA && SameAsCDS (sfp, errtype, &bypassGeneTest)) { - } else if (i == 1 && j < 2 && sfp->idx.subtype == FEATDEF_mRNA && (! bypassGeneTest) && SameAsGene (sfp)) { - } else if (i == 1 && j < 2 && sfp->idx.subtype == FEATDEF_exon && SameAsMRNA (sfp, errtype)) { + /* PartialProduct */ + errtype = SLP_NOSTART; + for (j = 0; j < 4; j++) { + bypassGeneTest = FALSE; + if (partials[0] & errtype) { + if (sfp->data.choice == SEQFEAT_CDREGION && sfp->excpt && + StringStr (sfp->except_text, "rearrangement required for product") != NULL) { + } else if (sfp->data.choice == SEQFEAT_CDREGION && j == 0) { + ValidErr (vsp, SEV_WARNING, ERR_SEQ_FEAT_PartialProblem, + "PartialProduct: 5' partial is not at start AND is not at consensus splice site"); + } else if (sfp->data.choice == SEQFEAT_CDREGION && j == 1) { + ValidErr (vsp, SEV_WARNING, ERR_SEQ_FEAT_PartialProblem, + "PartialProduct: 3' partial is not at stop AND is not at consensus splice site"); + } else { + ValidErr (vsp, SEV_WARNING, ERR_SEQ_FEAT_PartialProblem, + "PartialProduct: %s", parterrs[j]); + } + } + errtype <<= 1; + } + /* PartialLocation */ + errtype = SLP_NOSTART; + for (j = 0; j < 4; j++) { + bypassGeneTest = FALSE; + if (partials[1] & errtype) { + if (j == 3) { + if (LocationIsFar (sfp->location) && NoFetchFunctions ()) { + vsp->far_fetch_failure = TRUE; + } else if (sfp->data.choice == SEQFEAT_CDREGION && sfp->excpt && + StringStr (sfp->except_text, "rearrangement required for product") != NULL) { + } else { + ValidErr (vsp, SEV_WARNING, ERR_SEQ_FEAT_PartialProblem, + "PartialLocation: Improper use of partial (greater than or less than)"); + } + } else if (j == 2) { + if (LocationIsFar (sfp->location) && NoFetchFunctions ()) { + vsp->far_fetch_failure = TRUE; + } else { + ValidErr (vsp, SEV_WARNING, ERR_SEQ_FEAT_PartialProblem, + "PartialLocation: Internal partial intervals do not include first/last residue of sequence"); + } + } else { + if (IsCddFeat (sfp)) { + /* suppresses warning */ + } else if (sfp->data.choice == SEQFEAT_GENE && SameAsCDS (sfp, errtype, NULL)) { + } else if (sfp->data.choice == SEQFEAT_GENE && SameAsMRNA (sfp, errtype)) { + } else if (sfp->idx.subtype == FEATDEF_mRNA && SameAsCDS (sfp, errtype, &bypassGeneTest)) { + } else if (sfp->idx.subtype == FEATDEF_mRNA && (! bypassGeneTest) && SameAsGene (sfp)) { + } else if (sfp->idx.subtype == FEATDEF_exon && SameAsMRNA (sfp, errtype)) { } else if (LocationIsFar (sfp->location) && NoFetchFunctions ()) { vsp->far_fetch_failure = TRUE; - - } else if (i == 1 && j < 2 && sfp->data.choice == SEQFEAT_CDREGION && SameAsMRNA (sfp, errtype) && + } else if (sfp->data.choice == SEQFEAT_CDREGION && SameAsMRNA (sfp, errtype) && PartialAtSpliceSiteOrGap (vsp, sfp->location, errtype, &isgap, &badseq)) { - } else if (i == 1 && j < 2 && PartialAtSpliceSiteOrGap (vsp, sfp->location, errtype, &isgap, &badseq)) { + } else if (PartialAtSpliceSiteOrGap (vsp, sfp->location, errtype, &isgap, &badseq)) { if (! isgap) { if (sfp->idx.subtype != FEATDEF_CDS || SplicingNotExpected (sfp)) { - if (vsp->is_gpipe_in_sep && vsp->bsp_genomic_in_sep && i == 1 && (j == 0 || j == 1 || j == 2)) { - /* ignore in genomic gpipe sequence */ - } else { - ValidErr (vsp, SEV_INFO, ERR_SEQ_FEAT_PartialProblem, - "%s: %s (but is at consensus splice site)", - parterr[i], parterrs[j]); - } + ValidErr (vsp, SEV_INFO, ERR_SEQ_FEAT_PartialProblem, + "PartialLocation: %s (but is at consensus splice site)", + parterrs[j]); } else if (sfp->idx.subtype == FEATDEF_CDS) { bsp = BioseqFindFromSeqLoc (sfp->location); if (bsp != NULL) { @@ -22039,71 +23404,41 @@ NLM_EXTERN void ValidateSeqFeat (GatherContextPtr gcp) if (mip != NULL) { if (mip->biomol == MOLECULE_TYPE_MRNA) { ValidErr (vsp, SEV_WARNING, ERR_SEQ_FEAT_PartialProblem, - "%s: %s (but is at consensus splice site, but is on an mRNA that is already spliced)", - parterr[i], parterrs[j]); + "PartialLocation: %s (but is at consensus splice site, but is on an mRNA that is already spliced)", + parterrs[j]); } } } } } } - } else if (i == 1 && j < 2 && badseq) { + } else if (badseq) { ValidErr (vsp, SEV_INFO, ERR_SEQ_FEAT_PartialProblem, - "%s: %s (and is at bad sequence)", - parterr[i], parterrs[j]); + "PartialLocation: %s (and is at bad sequence)", + parterrs[j]); } else if (sfp->data.choice == SEQFEAT_CDREGION && sfp->excpt && StringStr (sfp->except_text, "rearrangement required for product") != NULL) { } else if (sfp->data.choice == SEQFEAT_CDREGION && j == 0) { - if (no_nonconsensus_except) { - if (vsp->is_gpipe_in_sep && vsp->bsp_genomic_in_sep) { - /* skip in gpipe genomic */ - } else { - ValidErr (vsp, SEV_WARNING, ERR_SEQ_FEAT_PartialProblem, - "%s: %s", parterr[i], "5' partial is not at start AND" - " is not at consensus splice site"); - } - } + ValidErr (vsp, SEV_WARNING, ERR_SEQ_FEAT_PartialProblem, + "PartialLocation: 5' partial is not at start AND is not at consensus splice site"); } else if (sfp->data.choice == SEQFEAT_CDREGION && j == 1) { - if (no_nonconsensus_except) { - if (vsp->is_gpipe_in_sep && vsp->bsp_genomic_in_sep) { - /* skip in gpipe genomic */ - } else { - ValidErr (vsp, SEV_WARNING, ERR_SEQ_FEAT_PartialProblem, - "%s: %s", parterr[i], "3' partial is not at stop AND" - " is not at consensus splice site"); - } - } - } else if (vsp->is_gpipe_in_sep && vsp->bsp_genomic_in_sep && i == 1 && (j == 0 || j == 1 || j == 2)) { - /* ignore start/stop not at end in genomic gpipe sequence */ - } else { ValidErr (vsp, SEV_WARNING, ERR_SEQ_FEAT_PartialProblem, - "%s: %s", parterr[i], parterrs[j]); + "PartialLocation: 3' partial is not at stop AND is not at consensus splice site"); + } else if (j == 0) { + ValidErr (vsp, SEV_WARNING, ERR_SEQ_FEAT_PartialProblem, + "PartialLocation: Start does not include first/last residue of sequence"); + } else if (j == 1) { + ValidErr (vsp, SEV_WARNING, ERR_SEQ_FEAT_PartialProblem, + "PartialLocation: Stop does not include first/last residue of sequence"); } } - errtype <<= 1; } + errtype <<= 1; } } CheckForIllegalDbxref (vsp, gcp, sfp->dbxref); - /* - for (vnp = sfp->dbxref; vnp != NULL; vnp = vnp->next) { - id = -1; - db = vnp->data.ptrvalue; - if (db && db->db) { - for (i = 0; i < DBNUM; i++) { - if (StringCmp (db->db, dbtag[i]) == 0) { - id = i; - break; - } - } - if (id == -1 || (type != SEQFEAT_CDREGION && id < 4)) { - ValidErr (vsp, SEV_WARNING, ERR_SEQ_FEAT_IllegalDbXref, "Illegal db_xref type %s", db->db); - } - } - } - */ switch (type) { case 1: /* Gene-ref */ @@ -22212,6 +23547,17 @@ NLM_EXTERN void ValidateSeqFeat (GatherContextPtr gcp) } } } + if (grp->syn != NULL) { + bsp = BioseqFindFromSeqLoc (sfp->location); + for (vnp = grp->syn; vnp != NULL; vnp = vnp->next) { + str = (CharPtr) vnp->data.ptrvalue; + if (StringHasNoText (str)) continue; + sfpx = SeqMgrGetFeatureByLabel (bsp, str, SEQFEAT_GENE, 0, NULL); + if (sfpx != NULL && sfpx != sfp) { + ValidErr (vsp, SEV_WARNING, ERR_SEQ_FEAT_IdenticalGeneSymbolAndSynonym, "gene synonym has same value (%s) as locus of another gene feature", str); + } + } + } if (StringDoesHaveText (grp->locus) && StringDoesHaveText (grp->desc) && StringCmp (grp->locus, grp->desc) == 0) { ValidErr (vsp, SEV_WARNING, ERR_SEQ_FEAT_UndesiredGeneSynonym, "gene description has same value as gene locus"); } @@ -22325,6 +23671,12 @@ NLM_EXTERN void ValidateSeqFeat (GatherContextPtr gcp) i = SeqLocCompare (cbp->loc, sfp->location); if ((i != SLC_A_IN_B) && (i != SLC_A_EQ_B)) { ValidErr (vsp, SEV_ERROR, ERR_SEQ_FEAT_Range, "Code-break location not in coding region"); + } else if (sfp->product != NULL) { + slp = dnaLoc_to_aaLoc (sfp, cbp->loc, TRUE, NULL, TRUE); + if (slp == NULL) { + ValidErr (vsp, SEV_ERROR, ERR_SEQ_FEAT_Range, "Code-break location not in coding region - may be frame problem"); + } + SeqLocFree (slp); } if (prevcbp != NULL) { i = SeqLocCompare (cbp->loc, prevcbp->loc); @@ -23005,7 +24357,7 @@ NLM_EXTERN void ValidateSeqFeat (GatherContextPtr gcp) dsd.grp_at_max = NULL; count = SeqMgrGetAllOverlappingFeatures (sfp->location, FEATDEF_GENE, NULL, 0, LOCATION_SUBSET, (Pointer) &dsd, DummySMFEProc); - if (dsd.num_at_max > 1) { + if (dsd.num_at_max > 1 && sfp->idx.subtype != FEATDEF_repeat_region && sfp->idx.subtype != FEATDEF_mobile_element) { if (dsd.equivalent_genes) { ValidErr (vsp, SEV_WARNING, ERR_SEQ_FEAT_GeneXrefNeeded, "Feature overlapped by %d identical-length equivalent genes but has no cross-reference", (int) dsd.num_at_max); @@ -23062,14 +24414,28 @@ NLM_EXTERN void ValidateSeqFeat (GatherContextPtr gcp) } } - sfpx = SeqMgrGetOverlappingGene (sfp->location, &fcontext); + sfpx = NULL; + if (SeqMgrGetDesiredFeature (sfp->idx.entityID, NULL, 0, 0, sfp, &fcontext) == sfp) { + if (fcontext.bad_order || fcontext.mixed_strand) { + sfpx = SeqMgrGetOverlappingFeatureEx (sfp->location, FEATDEF_GENE, NULL, 0, NULL, LOCATION_SUBSET, &gcontext, TRUE); + } else if (vsp->has_multi_int_genes) { + sfpx = SeqMgrGetOverlappingFeatureEx (sfp->location, FEATDEF_GENE, NULL, 0, NULL, LOCATION_SUBSET, &gcontext, TRUE); + if (sfpx == NULL && (vsp->has_seg_bioseqs || vsp->is_embl_ddbj_in_sep || vsp->is_old_gb_in_sep)) { + sfpx = SeqMgrGetOverlappingGene (sfp->location, &gcontext); + } + } else { + sfpx = SeqMgrGetOverlappingGene (sfp->location, &gcontext); + } + } else { + sfpx = SeqMgrGetOverlappingGene (sfp->location, &gcontext); + } if (sfpx == NULL || sfpx->data.choice != SEQFEAT_GENE) return; grpx = (GeneRefPtr) sfpx->data.value.ptrvalue; if (grpx == NULL) return; redundantgenexref = FALSE; - label = fcontext.label; + label = gcontext.label; if (StringDoesHaveText (grp->locus_tag) && StringDoesHaveText (grp->locus_tag)) { if (StringICmp (grp->locus_tag, grpx->locus_tag) == 0) { redundantgenexref = TRUE; @@ -23356,22 +24722,14 @@ NLM_EXTERN void MrnaTransCheck (ValidStructPtr vsp, SeqFeatPtr sfp) has_errors = TRUE; other_than_mismatch = TRUE; if (report_errors || rna_editing) { - if (vsp->is_gpipe_in_sep && vsp->bsp_genomic_in_sep) { - /* suppress if gpipe genomic */ - } else { - ValidErr (vsp, SEV_INFO, ERR_SEQ_FEAT_PolyATail, "Transcript length [%ld] less than %sproduct length [%ld], but tail is 100%s polyA", (long) mlen, farstr, (long) plen, "%"); - } + ValidErr (vsp, SEV_INFO, ERR_SEQ_FEAT_PolyATail, "Transcript length [%ld] less than %sproduct length [%ld], but tail is 100%s polyA", (long) mlen, farstr, (long) plen, "%"); } plen = mlen; /* if it passes polyA test, allow base-by-base comparison on common length */ } else { has_errors = TRUE; other_than_mismatch = TRUE; if (report_errors || rna_editing) { - if (vsp->is_gpipe_in_sep && vsp->bsp_genomic_in_sep) { - /* suppress if gpipe genomic */ - } else { - ValidErr (vsp, SEV_INFO, ERR_SEQ_FEAT_PolyATail, "Transcript length [%ld] less than %sproduct length [%ld], but tail >= 95%s polyA", (long) mlen, farstr, (long) plen, "%"); - } + ValidErr (vsp, SEV_INFO, ERR_SEQ_FEAT_PolyATail, "Transcript length [%ld] less than %sproduct length [%ld], but tail >= 95%s polyA", (long) mlen, farstr, (long) plen, "%"); } plen = mlen; /* if it passes polyA test, allow base-by-base comparison on common length */ } @@ -23407,14 +24765,6 @@ NLM_EXTERN void MrnaTransCheck (ValidStructPtr vsp, SeqFeatPtr sfp) MemFree (pdseq); } -erret: - - MemFree (mrseq); - - if (unlockProd) { - BioseqUnlock (bsp); - } - if (! report_errors) { if (! has_errors) { ValidErr (vsp, SEV_WARNING, ERR_SEQ_FEAT_UnnecessaryException, "mRNA has exception but passes transcription test"); @@ -23428,6 +24778,15 @@ erret: ValidErr (vsp, SEV_WARNING, ERR_SEQ_FEAT_UnqualifiedException, "mRNA has unqualified transcribed product replaced exception"); } } + +erret: + + MemFree (mrseq); + + if (unlockProd) { + BioseqUnlock (bsp); + } + } /***************************************************************************** @@ -23605,6 +24964,12 @@ static void ValidateTranslExcept ( MemFree (protseq); } +typedef struct cdsmismatch { + Int4 pos; + Int2 cds_residue; + Int2 prot_residue; +} CDSMismatchData, PNTR CDSMismatchPtr; + NLM_EXTERN void CdTransCheck (ValidStructPtr vsp, SeqFeatPtr sfp) { @@ -23618,6 +24983,7 @@ NLM_EXTERN void CdTransCheck (ValidStructPtr vsp, SeqFeatPtr sfp) CdRegionPtr crp; SeqIdPtr protid = NULL; Int2 residue1, residue2, stop_count = 0, mismatch = 0, ragged = 0; + CDSMismatchData mismatches[11]; Boolean got_stop = FALSE; /* SeqPortPtr spp = NULL; @@ -23638,7 +25004,7 @@ NLM_EXTERN void CdTransCheck (ValidStructPtr vsp, SeqFeatPtr sfp) Boolean partial5 = FALSE; Boolean partial3 = FALSE; Boolean rna_editing = FALSE; - CharPtr nuclocstr, farstr = ""; + CharPtr nuclocstr, farstr = "", loc2str; CodeBreakPtr cbp; Int4 pos1, pos2, pos; SeqLocPtr tmp; @@ -23803,12 +25169,6 @@ NLM_EXTERN void CdTransCheck (ValidStructPtr vsp, SeqFeatPtr sfp) prot1len = prot1seq->length; } - if (annotated_by_transcript_or_proteomic) { - if (1.2 * prot2len < prot1len) { - ValidErr (vsp, SEV_WARNING, ERR_SEQ_FEAT_TransLen, "Protein product length [%ld] is more than 120%% of the %stranslation length [%ld]", prot1len, farstr, prot2len); - } - } - if (alt_start && gccode == 1) { /* sev = SEV_WARNING; */ sev = SEV_NONE; /* only enable for RefSeq, leave old code in for now */ @@ -23969,6 +25329,12 @@ NLM_EXTERN void CdTransCheck (ValidStructPtr vsp, SeqFeatPtr sfp) } } + if (annotated_by_transcript_or_proteomic) { + if (1.2 * prot2len < prot1len) { + ValidErr (vsp, SEV_WARNING, ERR_SEQ_FEAT_TransLen, "Protein product length [%ld] is more than 120%% of the %stranslation length [%ld]", prot1len, farstr, prot2len); + } + } + /* prot2len = BSLen (newprot); len = prot2len; @@ -23995,15 +25361,9 @@ NLM_EXTERN void CdTransCheck (ValidStructPtr vsp, SeqFeatPtr sfp) sev = SEV_WARNING; } if (report_errors || unclassified_except) { - if (! unclassified_except) { - ValidErr (vsp, sev, ERR_SEQ_FEAT_StartCodon, - "Illegal start codon (and %ld internal stops). Probably wrong genetic code [%d]", (long) stop_count, gccode); - } - if (unclassified_except && vsp->is_gpipe_in_sep && vsp->bsp_genomic_in_sep) { - /* suppress if gpipe genomic */ - } else { - ValidErr (vsp, sev, ERR_SEQ_FEAT_InternalStop, "%ld internal stops (and illegal start codon). Genetic code [%d]", (long) stop_count, gccode); - } + ValidErr (vsp, sev, ERR_SEQ_FEAT_StartCodon, + "Illegal start codon (and %ld internal stops). Probably wrong genetic code [%d]", (long) stop_count, gccode); + ValidErr (vsp, sev, ERR_SEQ_FEAT_InternalStop, "%ld internal stops (and illegal start codon). Genetic code [%d]", (long) stop_count, gccode); } } else if (got_x) { has_errors = TRUE; @@ -24013,15 +25373,9 @@ NLM_EXTERN void CdTransCheck (ValidStructPtr vsp, SeqFeatPtr sfp) sev = SEV_WARNING; } if (report_errors || unclassified_except) { - if (! unclassified_except) { - ValidErr (vsp, sev, ERR_SEQ_FEAT_StartCodon, + ValidErr (vsp, sev, ERR_SEQ_FEAT_StartCodon, "Ambiguous start codon (and %ld internal stops). Possibly wrong genetic code [%d]", (long) stop_count, gccode); - } - if (unclassified_except && vsp->is_gpipe_in_sep && vsp->bsp_genomic_in_sep) { - /* suppress if gpipe genomic */ - } else { - ValidErr (vsp, sev, ERR_SEQ_FEAT_InternalStop, "%ld internal stops (and ambiguous start codon). Genetic code [%d]", (long) stop_count, gccode); - } + ValidErr (vsp, sev, ERR_SEQ_FEAT_InternalStop, "%ld internal stops (and ambiguous start codon). Genetic code [%d]", (long) stop_count, gccode); } } else { has_errors = TRUE; @@ -24057,11 +25411,7 @@ NLM_EXTERN void CdTransCheck (ValidStructPtr vsp, SeqFeatPtr sfp) sev = SEV_REJECT; } } - if (unclassified_except && vsp->is_gpipe_in_sep && vsp->bsp_genomic_in_sep) { - /* suppress if gpipe genomic */ - } else { - ValidErr (vsp, sev, ERR_SEQ_FEAT_InternalStop, "%ld internal stops. Genetic code [%d]", (long) stop_count, gccode); - } + ValidErr (vsp, sev, ERR_SEQ_FEAT_InternalStop, "%ld internal stops. Genetic code [%d]", (long) stop_count, gccode); } } prot_ok = FALSE; @@ -24070,13 +25420,13 @@ NLM_EXTERN void CdTransCheck (ValidStructPtr vsp, SeqFeatPtr sfp) } else if (got_dash) { has_errors = TRUE; other_than_mismatch = TRUE; - if (report_errors && ! unclassified_except) { + if (report_errors) { ValidErr (vsp, SEV_ERROR, ERR_SEQ_FEAT_StartCodon, "Illegal start codon used. Wrong genetic code [%d] or protein should be partial", gccode); } } else if (got_x && (! partial5)) { has_errors = TRUE; other_than_mismatch = TRUE; - if (report_errors && ! unclassified_except) { + if (report_errors) { ValidErr (vsp, SEV_ERROR, ERR_SEQ_FEAT_StartCodon, "Ambiguous start codon used. Wrong genetic code [%d] or protein should be partial", gccode); } } @@ -24205,12 +25555,7 @@ NLM_EXTERN void CdTransCheck (ValidStructPtr vsp, SeqFeatPtr sfp) sev = SEV_WARNING; } } - if (mismatch == 10) { - has_errors = TRUE; - if (report_errors && (! mismatch_except)) { - ValidErr (vsp, SEV_ERROR, ERR_SEQ_FEAT_MisMatchAA, "More than 10 mismatches. Genetic code [%d]", gccode); - } - } else if (i == 0) { + if (i == 0) { if ((sfp->partial) && (!no_beg) && (!no_end)) { /* ok, it's partial */ has_errors = TRUE; other_than_mismatch = TRUE; @@ -24222,9 +25567,7 @@ NLM_EXTERN void CdTransCheck (ValidStructPtr vsp, SeqFeatPtr sfp) other_than_mismatch = TRUE; if (report_errors) { if (! got_dash) { - if (! unclassified_except){ - ValidErr (vsp, SEV_ERROR, ERR_SEQ_FEAT_StartCodon, "Illegal start codon used. Wrong genetic code [%d] or protein should be partial", gccode); - } + ValidErr (vsp, SEV_ERROR, ERR_SEQ_FEAT_StartCodon, "Illegal start codon used. Wrong genetic code [%d] or protein should be partial", gccode); } } } else if (residue1 == 'X') { @@ -24232,51 +25575,63 @@ NLM_EXTERN void CdTransCheck (ValidStructPtr vsp, SeqFeatPtr sfp) other_than_mismatch = TRUE; if (report_errors) { if (! got_x) { - if (! unclassified_except){ - ValidErr (vsp, SEV_ERROR, ERR_SEQ_FEAT_StartCodon, "Ambiguous start codon used. Wrong genetic code [%d] or protein should be partial", gccode); - } + ValidErr (vsp, SEV_ERROR, ERR_SEQ_FEAT_StartCodon, "Ambiguous start codon used. Wrong genetic code [%d] or protein should be partial", gccode); } } } else { - nuclocstr = MapToNTCoords (sfp, protid, i); - if (nuclocstr != NULL) { - has_errors = TRUE; - if (report_errors && (! mismatch_except)) { - ValidErr (vsp, sev, ERR_SEQ_FEAT_MisMatchAA, - "%sResidue %ld in protein [%c] != translation [%c] at %s", farstr, (long) (i + 1), (char) residue2, (char) residue1, nuclocstr); - } - } else { - has_errors = TRUE; - if (report_errors && (! mismatch_except)) { - ValidErr (vsp, sev, ERR_SEQ_FEAT_MisMatchAA, - "%sResidue %ld in protein [%c] != translation [%c]", farstr, (long) (i + 1), (char) residue2, (char) residue1); - } - } - MemFree (nuclocstr); - } - } else if (mismatch < 10) { - nuclocstr = MapToNTCoords (sfp, protid, i); - if (nuclocstr != NULL) { has_errors = TRUE; - if (report_errors && (! mismatch_except)) { - ValidErr (vsp, sev, ERR_SEQ_FEAT_MisMatchAA, - "%sResidue %ld in protein [%c] != translation [%c] at %s", farstr, (long) (i + 1), (char) residue2, (char) residue1, nuclocstr); - } + mismatches[mismatch].pos = i; + mismatches[mismatch].cds_residue = residue1; + mismatches[mismatch].prot_residue = residue2; + mismatch++; + } + } else { + has_errors = TRUE; + if (mismatch >= 10) { + mismatches[10].pos = i; + mismatches[10].cds_residue = residue1; + mismatches[10].prot_residue = residue2; } else { - has_errors = TRUE; - if (report_errors && (! mismatch_except)) { - ValidErr (vsp, sev, ERR_SEQ_FEAT_MisMatchAA, - "%sResidue %ld in protein [%c] != translation [%c]", farstr, (long) (i + 1), (char) residue2, (char) residue1); - } + mismatches[mismatch].pos = i; + mismatches[mismatch].cds_residue = residue1; + mismatches[mismatch].prot_residue = residue2; } - MemFree (nuclocstr); + mismatch++; } - mismatch++; } } - /* - spp = SeqPortFree (spp); - */ + + if (report_errors && !mismatch_except) { + if (mismatch > 10) { + if (report_errors && !mismatch_except) { + nuclocstr = MapToNTCoords (sfp, protid, mismatches[0].pos); + loc2str = MapToNTCoords (sfp, protid, mismatches[10].pos); + ValidErr (vsp, sev, ERR_SEQ_FEAT_MisMatchAA, + "%d mismatches found. First mismatch at %ld, residue in protein [%c] != translation [%c]%s%s. Last mismatch at %ld, residue in protein [%c] != translation [%c]%s%s. Genetic code [%d]", + mismatch, + (long) (mismatches[0].pos + 1), mismatches[0].prot_residue, mismatches[0].cds_residue, + nuclocstr == NULL ? "" : " at ", nuclocstr == NULL ? "" : nuclocstr, + (long) (mismatches[10].pos + 1), mismatches[10].prot_residue, mismatches[10].cds_residue, + loc2str == NULL ? "" : " at ", loc2str == NULL ? "" : loc2str, + gccode); + nuclocstr = MemFree (nuclocstr); + loc2str = MemFree (loc2str); + } + } else { + for (i = 0; i < mismatch; i++) { + nuclocstr = MapToNTCoords (sfp, protid, mismatches[i].pos); + ValidErr (vsp, sev, ERR_SEQ_FEAT_MisMatchAA, + "%sResidue %ld in protein [%c] != translation [%c]%s%s", farstr, + (long) (mismatches[i].pos + 1), + (char) mismatches[i].prot_residue, + (char) mismatches[i].cds_residue, + nuclocstr == NULL ? "" : " at ", + nuclocstr == NULL ? "" : nuclocstr); + nuclocstr = MemFree (nuclocstr); + } + } + } + } else { has_errors = TRUE; other_than_mismatch = TRUE; @@ -24291,21 +25646,13 @@ NLM_EXTERN void CdTransCheck (ValidStructPtr vsp, SeqFeatPtr sfp) has_errors = TRUE; other_than_mismatch = TRUE; if (report_errors) { - if (vsp->is_gpipe_in_sep && vsp->bsp_genomic_in_sep) { - /* suppress if gpipe genomic */ - } else { - ValidErr (vsp, SEV_ERROR, ERR_SEQ_FEAT_PartialProblem, "End of location should probably be partial"); - } + ValidErr (vsp, SEV_ERROR, ERR_SEQ_FEAT_PartialProblem, "End of location should probably be partial"); } } else { has_errors = TRUE; other_than_mismatch = TRUE; if (report_errors) { - if (vsp->is_gpipe_in_sep && vsp->bsp_genomic_in_sep) { - /* suppress if gpipe genomic */ - } else { - ValidErr (vsp, SEV_ERROR, ERR_SEQ_FEAT_PartialProblem, "This SeqFeat should not be partial"); - } + ValidErr (vsp, SEV_ERROR, ERR_SEQ_FEAT_PartialProblem, "This SeqFeat should not be partial"); } } show_stop = FALSE; @@ -24390,6 +25737,55 @@ erret: } } + +static void mRNAMatchesCompleteCDSEnd (SeqFeatPtr mrna, BoolPtr p5, BoolPtr p3) +{ + Boolean partial5, partial3; + SeqFeatPtr cds; + Uint2 strand; + + if (p5 != NULL) { + *p5 = FALSE; + } + if (p3 != NULL) { + *p3 = FALSE; + } + + cds = GetCDSformRNA (mrna); + + if (mrna == NULL || cds == NULL) { + return; + } + + strand = SeqLocStrand (mrna->location); + + CheckSeqLocForPartial (cds->location, &partial5, &partial3); + if (p5 != NULL && !partial5) { + if (strand == Seq_strand_minus) { + if (SeqLocStop (cds->location) == SeqLocStop (mrna->location)) { + *p5 = TRUE; + } + } else { + if (SeqLocStart (cds->location) == SeqLocStart (mrna->location)) { + *p5 = TRUE; + } + } + } + + if (p3 != NULL && !partial3) { + if (strand == Seq_strand_minus) { + if (SeqLocStart (cds->location) == SeqLocStart (mrna->location)) { + *p3 = TRUE; + } + } else { + if (SeqLocStop (cds->location) == SeqLocStop (mrna->location)) { + *p3 = TRUE; + } + } + } +} + + /***************************************************************************** * * SpliceCheck(sfp) @@ -24407,7 +25803,7 @@ static void SpliceCheckEx (ValidStructPtr vsp, SeqFeatPtr sfp, Boolean checkAll) /* SeqPortPtr spp = NULL; */ - SeqIdPtr last_sip = NULL, sip, id; + SeqIdPtr last_sip = NULL, sip; Int2 total, ctr; BioseqPtr bsp = NULL; Int4 strt, stp, len = 0, donor, acceptor; @@ -24417,16 +25813,14 @@ static void SpliceCheckEx (ValidStructPtr vsp, SeqFeatPtr sfp, Boolean checkAll) report_errors = TRUE, checkExonDonor, checkExonAcceptor, pseudo; int severity; Uint2 partialflag; - Boolean gpsOrRefSeq = FALSE; SeqEntryPtr sep; - BioseqSetPtr bssp; - TextSeqIdPtr tsip; StreamCache sc; SeqInt sint; ValNode vn; SeqMgrFeatContext context; SeqFeatPtr mrna, gene; GeneRefPtr grp; + Boolean ignore_partial_mrna_5 = FALSE, ignore_partial_mrna_3 = FALSE; if (sfp == NULL) return; @@ -24498,15 +25892,14 @@ static void SpliceCheckEx (ValidStructPtr vsp, SeqFeatPtr sfp, Boolean checkAll) firstPartial = FALSE; lastPartial = FALSE; + if (sfp->idx.subtype == FEATDEF_mRNA) { + mRNAMatchesCompleteCDSEnd (sfp, &ignore_partial_mrna_5, &ignore_partial_mrna_3); + } + + /* genomic product set or NT_ contig always relaxes to SEV_WARNING */ sep = vsp->sep; - if (sep != NULL && IS_Bioseq_set (sep)) { - bssp = (BioseqSetPtr) sep->data.ptrvalue; - if (bssp != NULL && bssp->_class == BioseqseqSet_class_gen_prod_set) { - gpsOrRefSeq = TRUE; - } - } slp = SeqLocFindPart (head, slp, EQUIV_IS_ONE); while (slp != NULL) { @@ -24520,31 +25913,7 @@ static void SpliceCheckEx (ValidStructPtr vsp, SeqFeatPtr sfp, Boolean checkAll) if (sip == NULL) break; - /* genomic product set or NT_ contig always relaxes to SEV_WARNING */ bsp = BioseqFind (sip); - if (bsp != NULL) { - for (id = bsp->id; id != NULL; id = id->next) { - if (id->choice == SEQID_OTHER) { - tsip = (TextSeqIdPtr) id->data.ptrvalue; - if (tsip != NULL && tsip->accession != NULL) { - /* - if (StringNICmp (tsip->accession, "NT_", 3) == 0) { - gpsOrRefSeq = TRUE; - } else if (StringNICmp (tsip->accession, "NC_", 3) == 0) { - gpsOrRefSeq = TRUE; - } else if (StringNICmp (tsip->accession, "NG_", 3) == 0) { - gpsOrRefSeq = TRUE; - } else if (StringNICmp (tsip->accession, "NM_", 3) == 0) { - gpsOrRefSeq = TRUE; - } else if (StringNICmp (tsip->accession, "NR_", 3) == 0) { - gpsOrRefSeq = TRUE; - } - */ - gpsOrRefSeq = TRUE; - } - } - } - } if ((ctr == 1) || (!SeqIdMatch (sip, last_sip))) { /* spp = SeqPortFree (spp); */ @@ -24639,7 +26008,11 @@ static void SpliceCheckEx (ValidStructPtr vsp, SeqFeatPtr sfp, Boolean checkAll) } } - if (((checkExonDonor && (!lastPartial)) || ctr < total) && (stp < (len - 2))) { /* check donor on all but last exon and on sequence */ + if (((checkExonDonor && (!lastPartial)) + || ctr < total + || (ctr == total && lastPartial && (sfp->idx.subtype != FEATDEF_mRNA || !ignore_partial_mrna_3))) + && (stp < (len - 2))) + { /* check donor on all but last exon and on sequence */ tbuf[0] = '\0'; StreamCacheSetPosition (&sc, stp + 1); residue1 = StreamCacheGetResidue (&sc); @@ -24672,11 +26045,7 @@ static void SpliceCheckEx (ValidStructPtr vsp, SeqFeatPtr sfp, Boolean checkAll) } } } else { - if (vsp->is_gpipe_in_sep && vsp->bsp_genomic_in_sep) { - severity = SEV_INFO; - } else if (gpsOrRefSeq) { - severity = SEV_WARNING; - } else if (checkExonDonor) { + if (checkExonDonor) { severity = SEV_WARNING; } else if (reportAsError) { severity = SEV_ERROR; @@ -24708,7 +26077,11 @@ static void SpliceCheckEx (ValidStructPtr vsp, SeqFeatPtr sfp, Boolean checkAll) } } - if (((checkExonAcceptor && (!firstPartial)) || ctr != 1) && (strt > 1)) { + if (((checkExonAcceptor && (!firstPartial)) + || ctr != 1 + || (ctr == 1 && firstPartial && (sfp->idx.subtype != FEATDEF_mRNA || !ignore_partial_mrna_5))) + && (strt > 1)) + { StreamCacheSetPosition (&sc, strt - 2); residue1 = StreamCacheGetResidue (&sc); residue2 = StreamCacheGetResidue (&sc); @@ -24722,11 +26095,7 @@ static void SpliceCheckEx (ValidStructPtr vsp, SeqFeatPtr sfp, Boolean checkAll) has_errors = TRUE; } else if (IS_residue (residue1) && IS_residue (residue2)) { if (residue1 != 'A' || residue2 != 'G') { - if (vsp->is_gpipe_in_sep && vsp->bsp_genomic_in_sep) { - severity = SEV_INFO; - } else if (gpsOrRefSeq) { - severity = SEV_WARNING; - } else if (checkExonAcceptor) { + if (checkExonAcceptor) { severity = SEV_WARNING; } else if (reportAsError) { severity = SEV_ERROR; @@ -24876,6 +26245,7 @@ NLM_EXTERN void ValidateSeqLoc (ValidStructPtr vsp, SeqLocPtr slp, CharPtr prefi Uint1 strand2 = 0, strand1; ErrSev sev, oldsev; SeqIntPtr sip1, sip2, prevsip; + SeqBondPtr sbp; SeqPntPtr spp; PackSeqPntPtr pspp; SeqIdPtr id1 = NULL, id2 = NULL; @@ -24979,6 +26349,21 @@ NLM_EXTERN void ValidateSeqLoc (ValidStructPtr vsp, SeqLocPtr slp, CharPtr prefi tmpval = PackSeqPntCheck (pspp); prevsip = NULL; break; + case SEQLOC_BOND: + sbp = (SeqBondPtr) tmp->data.ptrvalue; + if (sbp != NULL) { + spp = (SeqPntPtr) sbp->a; + if (spp != NULL) { + tmpval = SeqPntCheck (spp); + } + /* if already failed, no need to check second point */ + if (tmpval) { + spp = (SeqPntPtr) sbp->b; + if (spp != NULL) { + tmpval = SeqPntCheck (spp); + } + } + } case SEQLOC_NULL: break; default: diff --git a/api/valid.h b/api/valid.h index edd532c4..ff1f0208 100644 --- a/api/valid.h +++ b/api/valid.h @@ -29,7 +29,7 @@ * * Version Creation Date: 1/1/94 * -* $Revision: 6.56 $ +* $Revision: 6.73 $ * * File Description: Sequence editing utilities * @@ -153,6 +153,7 @@ typedef struct validstruct { Boolean strictLatLonCountry; /* bodies of water do not relax country vs. lat_lon mismatch */ Boolean rubiscoTest; /* look for ribulose bisphosphate variants */ Boolean indexerVersion; /* special tests for GenBank indexers */ + Boolean disableSuppression; /* disables suppression of message by ShouldSuppressValidErr */ Int2 validationLimit; /* limit validation to major classes in Valid1GatherProc */ /* this section used for finer error reporting callback */ ValidErrorFunc errfunc; @@ -169,6 +170,8 @@ typedef struct validstruct { Boolean is_gps_in_sep; /* record has genomic product set */ Boolean other_sets_in_sep; /* record has pop/phy/mut/eco/wgs set */ Boolean is_embl_ddbj_in_sep; /* record has embl or ddbj seqid */ + Boolean is_old_gb_in_sep; /* record has old style GenBank accession */ + Boolean is_patent_in_sep; /* record has patent seqid */ Boolean is_insd_in_sep; /* record has genbank/embl/ddbj or tpg/tpe/tpd seqid */ Boolean only_lcl_gnl_in_sep; /* record has seqid of only local or general */ Boolean has_gnl_prot_sep; /* protein Bioseq has general seqid */ @@ -176,6 +179,8 @@ typedef struct validstruct { Boolean is_smupd_in_sep; /* record in INSD internal processing */ Boolean feat_loc_has_gi; /* at least one feature has a gi location reference */ Boolean feat_prod_has_gi; /* at least one feature has a gi product reference */ + Boolean has_multi_int_genes; /* record has multi-interval genes */ + Boolean has_seg_bioseqs; /* record has segmented Bioseqs */ Boolean far_fetch_failure; /* a far location or bioseq with no fetch function */ VoidPtr rrna_array; /* sorted feature index array of rRNA features */ VoidPtr trna_array; /* sorted feature index array of tRNA features */ @@ -199,10 +204,86 @@ NLM_EXTERN Boolean CountryIsValid (CharPtr name, BoolPtr old_countryP, BoolPtr b NLM_EXTERN CharPtr GetCorrectedCountryCapitalization (CharPtr name); NLM_EXTERN Boolean LookForECnumberPattern (CharPtr str); +NLM_EXTERN Boolean StringContainsBodyOfWater (CharPtr str); + +/* original country latitude-longitude tests */ NLM_EXTERN Boolean IsCountryInLatLonList (CharPtr country); NLM_EXTERN Boolean TestLatLonForCountry (CharPtr country, FloatHi lat, FloatHi lon); NLM_EXTERN CharPtr GuessCountryForLatLon (FloatHi lat, FloatHi lon); -NLM_EXTERN Boolean StringContainsBodyOfWater (CharPtr str); + +/* improved country latitude-longitude tests */ +/* + for proximity tests, range is a maximum bounding search box in degrees, + distanceP is filled in with a minimum distance in kilometers (subject + to non-spherical earth calculation error) +*/ + +NLM_EXTERN Boolean CountryIsInLatLonList ( + CharPtr country +); +NLM_EXTERN Boolean WaterIsInLatLonList ( + CharPtr country +); + +NLM_EXTERN Boolean CountryContainsLatLon ( + CharPtr country, + FloatHi lat, + FloatHi lon +); +NLM_EXTERN Boolean WaterContainsLatLon ( + CharPtr country, + FloatHi lat, + FloatHi lon +); + +NLM_EXTERN CharPtr LookupCountryByLatLon ( + FloatHi lat, + FloatHi lon +); +NLM_EXTERN CharPtr LookupWaterByLatLon ( + FloatHi lat, + FloatHi lon +); + +NLM_EXTERN CharPtr CountryClosestToLatLon ( + FloatHi lat, + FloatHi lon, + FloatHi range, + FloatHi PNTR distanceP +); +NLM_EXTERN CharPtr WaterClosestToLatLon ( + FloatHi lat, + FloatHi lon, + FloatHi range, + FloatHi PNTR distanceP +); + +NLM_EXTERN Boolean CountryIsNearLatLon ( + CharPtr country, + FloatHi lat, + FloatHi lon, + FloatHi range, + FloatHi PNTR distanceP +); +NLM_EXTERN Boolean WaterIsNearLatLon ( + CharPtr country, + FloatHi lat, + FloatHi lon, + FloatHi range, + FloatHi PNTR distanceP +); + +NLM_EXTERN Boolean CountryExtremesOverlap ( + CharPtr first, + CharPtr second +); +NLM_EXTERN Boolean WaterExtremesOverlap ( + CharPtr first, + CharPtr second +); + +NLM_EXTERN FloatHi CountryDataScaleIs (void); +NLM_EXTERN FloatHi WaterDataScaleIs (void); NLM_EXTERN Boolean ParseStructuredVoucher (CharPtr subname, CharPtr PNTR inst, CharPtr PNTR id); NLM_EXTERN Boolean VoucherInstitutionIsValid (CharPtr inst); @@ -213,6 +294,20 @@ NLM_EXTERN void ECNumberFSAFreeAll (void); NLM_EXTERN Boolean HasTpaUserObject (BioseqPtr bsp); NLM_EXTERN Boolean CountryBoxesOverlap (CharPtr country1, CharPtr country2); +NLM_EXTERN Boolean IsGeneXrefRedundant (SeqFeatPtr sfp); + +/* warns if over 1000 /inference qualifiers or accessions in inference qualifiers */ +NLM_EXTERN Boolean TooManyInferenceAccessions ( + SeqEntryPtr sep, + Int4Ptr numInferences, + Int4Ptr numAccessions +); + +NLM_EXTERN Int4 IsQualValidForFeature (GBQualPtr gbqual, SeqFeatPtr sfp); +NLM_EXTERN CharPtr GetGBFeatKeyForFeature (SeqFeatPtr sfp); +NLM_EXTERN Boolean ShouldSuppressGBQual(Uint1 subtype, CharPtr qual_name); +NLM_EXTERN Boolean ShouldBeAGBQual (Uint1 subtype, Int2 qual, Boolean allowProductGBQual); + #ifdef __cplusplus } diff --git a/api/valid.msg b/api/valid.msg index d8669dc3..d18b7366 100644 --- a/api/valid.msg +++ b/api/valid.msg @@ -243,6 +243,10 @@ This sequence contains long stretches of Ns. $^ HighNContentPercent, 69 This sequence contains a high percentage of Ns. +$^ BadSegmentedSeq, 70 +Segmented sequences should have gap or virtual in between real components. + + $$ SEQ_DESCR, 2 $^ BioSourceMissing, 1 @@ -510,6 +514,21 @@ Viral lineage information conflicts with MolInfo. $^ MissingKeyword, 76 Expected keyword was not found. +$^ FakeStructuredComment, 77 +Comment descriptor may have been formatted to look like structured comment. + +$^ StructuredCommentPrefixOrSuffixMissing, 78 +Structured comments should have a prefix or suffix. + +$^ LatLonWater, 79 +The lat_lon coordinate map in a body of water. + +$^ LatLonOffshore, 80 +The lat_lon coordinate is probably in a minor or unnamed body of water. + +$^ MissingPersonalCollectionName, 81 +The personal collection does not indicate the name of the collector. + $$ GENERIC, 3 @@ -656,6 +675,13 @@ pop/phy/mut/eco set. $^ SingleItemSet, 27 Only a single Bioseq was found in this BioseqSet. Is that what was intended? +$^ MisplacedMolInfo, 28 +Mol-info should not be on a pop/phy/mut/eco/wgs/genbank/genprod set. + +$^ ImproperlyNestedSets, 29 +A pop/phy/mut/eco/wgs set has an unexpected internal set other than nuc-prot, +seg-set, or parts set. + $$ SEQ_FEAT, 5 @@ -1334,6 +1360,20 @@ The CDS is not contained within the cross-referenced mRNA. $^ LocusCollidesWithLocusTag, 176 A gene locus is identical with a gene locus_tag. +$^ IdenticalGeneSymbolAndSynonym, 177 +The gene synonym is the same as the locus of a different gene. + +$^ NeedsNote, 178 +A misc_feature requires a note. + +$^ RptUnitRangeProblem, 179 +The value of the rpt_unit_range qualifier is not inside the parent feature location. + +$^ TooManyInferenceAccessions, 180 +There are too many inference qualifier accessions to have their versions verified by +network access. + + $$ SEQ_ALIGN, 6 $^ SeqIdProblem, 1 diff --git a/api/validerr.h b/api/validerr.h index 515f59b1..7376ec29 100644 --- a/api/validerr.h +++ b/api/validerr.h @@ -71,6 +71,7 @@ #define ERR_SEQ_INST_DSmRNA 1,67 #define ERR_SEQ_INST_HighNContentStretch 1,68 #define ERR_SEQ_INST_HighNContentPercent 1,69 +#define ERR_SEQ_INST_BadSegmentedSeq 1,70 #define ERR_SEQ_DESCR 2,0 #define ERR_SEQ_DESCR_BioSourceMissing 2,1 #define ERR_SEQ_DESCR_InvalidForType 2,2 @@ -148,6 +149,11 @@ #define ERR_SEQ_DESCR_BioSourceNeedsChromosome 2,74 #define ERR_SEQ_DESCR_MolInfoConflictsWithBioSource 2,75 #define ERR_SEQ_DESCR_MissingKeyword 2,76 +#define ERR_SEQ_DESCR_FakeStructuredComment 2,77 +#define ERR_SEQ_DESCR_StructuredCommentPrefixOrSuffixMissing 2,78 +#define ERR_SEQ_DESCR_LatLonWater 2,79 +#define ERR_SEQ_DESCR_LatLonOffshore 2,80 +#define ERR_SEQ_DESCR_MissingPersonalCollectionName 2,81 #define ERR_GENERIC 3,0 #define ERR_GENERIC_NonAsciiAsn 3,1 #define ERR_GENERIC_Spell 3,2 @@ -192,6 +198,8 @@ #define ERR_SEQ_PKG_NucProtSetHasTitle 4,25 #define ERR_SEQ_PKG_ComponentMissingTitle 4,26 #define ERR_SEQ_PKG_SingleItemSet 4,27 +#define ERR_SEQ_PKG_MisplacedMolInfo 4,28 +#define ERR_SEQ_PKG_ImproperlyNestedSets 4,29 #define ERR_SEQ_FEAT 5,0 #define ERR_SEQ_FEAT_InvalidForType 5,1 #define ERR_SEQ_FEAT_PartialProblem 5,2 @@ -369,6 +377,10 @@ #define ERR_SEQ_FEAT_GeneXrefStrandProblem 5,174 #define ERR_SEQ_FEAT_CDSmRNAXrefLocationProblem 5,175 #define ERR_SEQ_FEAT_LocusCollidesWithLocusTag 5,176 +#define ERR_SEQ_FEAT_IdenticalGeneSymbolAndSynonym 5,177 +#define ERR_SEQ_FEAT_NeedsNote 5,178 +#define ERR_SEQ_FEAT_RptUnitRangeProblem 5,179 +#define ERR_SEQ_FEAT_TooManyInferenceAccessions 5,180 #define ERR_SEQ_ALIGN 6,0 #define ERR_SEQ_ALIGN_SeqIdProblem 6,1 #define ERR_SEQ_ALIGN_StrandRev 6,2 -- cgit v1.2.3