diff options
Diffstat (limited to 'demo/tbl2asn.c')
-rw-r--r-- | demo/tbl2asn.c | 152 |
1 files changed, 132 insertions, 20 deletions
diff --git a/demo/tbl2asn.c b/demo/tbl2asn.c index 1b36b887..a969f40e 100644 --- a/demo/tbl2asn.c +++ b/demo/tbl2asn.c @@ -29,7 +29,7 @@ * * Version Creation Date: 5/5/00 * -* $Revision: 6.71 $ +* $Revision: 6.79 $ * * File Description: * @@ -61,7 +61,7 @@ #include <simple.h> #include <aliparse.h> -#define TBL2ASN_APP_VER "2.5" +#define TBL2ASN_APP_VER "2.8" CharPtr TBL2ASN_APPLICATION = TBL2ASN_APP_VER; @@ -162,6 +162,7 @@ static void ValidateOneFile ( if (vsp != NULL) { vsp->useSeqMgrIndexes = TRUE; vsp->suppressContext = TRUE; + vsp->seqSubmitParent = TRUE; oldErrSev = ErrSetMessageLevel (SEV_NONE); ValidateSeqEntry (sep, vsp); ValidStructFree (vsp); @@ -552,6 +553,16 @@ static OrgStuff commonOrgStuff [] = { "PLN", 1, 1, 39947 }, { + "Aspergillus nidulans FGSC A4", "", + "Eukaryota; Fungi; Ascomycota; Pezizomycotina; Eurotiomycetes; Eurotiales; Trichocomaceae; Emericella", + "PLN", 1, 4, 227321 + }, + { + "environmental sequence", "", + "unclassified; environmental samples", + "UNA", 1, 2, 256318 + }, + { NULL, NULL, NULL, NULL, 0, 0, 0 } }; @@ -778,42 +789,62 @@ static BioseqPtr AttachSeqAnnotEntity (Uint2 entityID, SeqAnnotPtr sap, Boolean return bsp; } -static CharPtr TrimBracketsFromString (CharPtr str) +static CharPtr TrimBracketsFromString (CharPtr str, SqnTagPtr stp) { Uchar ch; /* to use 8bit characters in multibyte languages */ + Int2 count; CharPtr dst; CharPtr ptr; - if (StringHasNoText (str)) return str; + if (StringHasNoText (str) || stp == NULL) return str; /* remove bracketed fields */ + count = 0; dst = str; ptr = str; ch = *ptr; while (ch != '\0') { if (ch == '[') { - ptr++; - ch = *ptr; - while (ch != '\0' && ch != ']' && ch != '"') { + if (count < stp->num_tags && (! stp->used [count])) { + *dst = ch; + dst++; ptr++; ch = *ptr; - } - if (ch == '"') { + while (ch != '\0' && ch != ']') { + *dst = ch; + dst++; + ptr++; + ch = *ptr; + } + *dst = ch; + dst++; + ptr++; + ch = *ptr; + } else { ptr++; ch = *ptr; - while (ch != '\0' && ch != '"') { + while (ch != '\0' && ch != ']' && ch != '"') { + ptr++; + ch = *ptr; + } + if (ch == '"') { + ptr++; + ch = *ptr; + while (ch != '\0' && ch != '"') { + ptr++; + ch = *ptr; + } + } + while (ch != '\0' && ch != ']') { ptr++; ch = *ptr; } - } - while (ch != '\0' && ch != ']') { ptr++; ch = *ptr; } - ptr++; - ch = *ptr; + count++; } else { *dst = ch; dst++; @@ -1037,16 +1068,16 @@ static void ProcessOneNuc ( } } - if (stp != NULL) { - SqnTagFree (stp); - } - - TrimBracketsFromString (ttl); + TrimBracketsFromString (ttl, stp); if (! StringHasNoText (ttl)) { str = StringSave (ttl); SeqDescrAddPointer (&(bsp->descr), Seq_descr_title, (Pointer) str); } + if (stp != NULL) { + SqnTagFree (stp); + } + ValNodeFreeData (vnp); } @@ -1179,6 +1210,60 @@ static void ReplaceOnePeptide ( MemFree (str2); } +static void ReplaceOneRNA ( + SimpleSeqPtr ssp, + Boolean conflict +) + +{ + ByteStorePtr bs; + BioseqPtr bsp; + SeqIdPtr sip; + CharPtr str1, str2; + + if (ssp == NULL || ssp->numid < 1) return; + + sip = MakeSeqID (ssp->id [0]); + bsp = BioseqFind (sip); + SeqIdFree (sip); + if (bsp == NULL || bsp->repr != Seq_repr_raw) return; + + /* remove trailing X and * */ + + bs = ssp->seq; + ssp->seqlen = BSLen (bs); + + str1 = BSMerge (ssp->seq, NULL); + str2 = GetSequenceByBsp (bsp); + + if (StringCmp (str1, str2) != 0) { + + /* swap sequence byte stores */ + + bs = bsp->seq_data; + bsp->seq_data = ssp->seq; + ssp->seq = bs; + bsp->length = BSLen (bsp->seq_data); + bsp->seq_data_type = Seq_code_iupacna; + + /* + mrna = SeqMgrGetRNAgivenProduct (bsp, NULL); + if (mrna != NULL) { + + if (conflict) { + mrna->excpt = TRUE; + if (StringHasNoText (mrna->except_text)) { + mrna->except_text = StringSave ("RNA editing"); + } + } + } + */ + } + + MemFree (str1); + MemFree (str2); +} + static Uint2 ProcessOneAsn ( FILE* fp, BioSourcePtr src, @@ -2128,6 +2213,29 @@ static void ProcessOneRecord ( FileClose (fp); } + /* read one or more feature tables from .rna file */ + + fp = OpenOneFile (directory, base, ".rna"); + if (fp != NULL) { + + /* indexing needed to find mRNA from transcript product to set RNA editing exception */ + + SeqMgrIndexFeatures (entityID, NULL); + + while ((dataptr = ReadAsnFastaOrFlatFile (fp, &datatype, NULL, FALSE, TRUE, TRUE, TRUE)) != NULL) { + if (datatype == OBJ_FASTA) { + + ssp = (SimpleSeqPtr) dataptr; + ReplaceOneRNA (ssp, tbl->conflict); + SimpleSeqFree (ssp); + + } else { + ObjMgrFree (datatype, dataptr); + } + } + FileClose (fp); + } + /* read one or more quality score blocks from .qvl file */ fp = OpenOneFile (directory, base, ".qvl"); @@ -2222,7 +2330,11 @@ static void ProcessOneRecord ( if (! tbl->genprodset) { VisitFeaturesInSep (sep, NULL, ClearRnaProducts); } - InstantiateProteinTitles (entityID, NULL); + if (SeqMgrFeaturesAreIndexed (entityID)) { + InstantiateProteinTitles (entityID, NULL); + } else { + Message (MSG_POSTERR, "Unable to instantiate protein titles due to dropped index"); + } if (tbl->genprodset) { /* need to reindex before instantiating mRNA titles */ SeqMgrIndexFeatures (entityID, NULL); |