/* macro.c * =========================================================================== * * PUBLIC DOMAIN NOTICE * National Center for Biotechnology Information (NCBI) * * This software/database is a "United States Government Work" under the * terms of the United States Copyright Act. It was written as part of * the author's official duties as a United States Government employee and * thus cannot be copyrighted. This software/database is freely available * to the public for use. The National Library of Medicine and the U.S. * Government do not place any restriction on its use or reproduction. * We would, however, appreciate having the NCBI and the author cited in * any work or product based on this material * * Although all reasonable efforts have been taken to ensure the accuracy * and reliability of the software and data, the NLM and the U.S. * Government do not and cannot warrant the performance or results that * may be obtained by using this software or data. The NLM and the U.S. * Government disclaim all warranties, express or implied, including * warranties of performance, merchantability or fitness for any particular * purpose. * * =========================================================================== * * File Name: macro.c * * Author: Colleen Bollin * * Version Creation Date: 11/8/2007 * * $Revision: 1.598 $ * * File Description: * * Modifications: * -------------------------------------------------------------------------- * Date Name Description of modification * ------- ---------- ----------------------------------------------------- * * * ========================================================================== */ #include #include #include #include #include #include #include #include #include #include #include #include #include #define NLM_GENERATED_CODE_PROTO #include #include #include #include #include #include #include #include #include #include /* static void CollectNucBioseqCallback (BioseqPtr bsp, Pointer data); */ static void AddCommentDescriptorDestinationsForBioseq (BioseqPtr bsp, ValNodePtr PNTR dest_list); static void AddDeflineDescriptorDestinationsForBioseq (BioseqPtr bsp, ValNodePtr PNTR dest_list); static void AddGenbankBlockDescriptorDestinationsForBioseq (BioseqPtr bsp, ValNodePtr PNTR dest_list); static void GetNucBioseqCallback (BioseqPtr bsp, Pointer userdata) { ValNodeBlockPtr vbp; if (bsp == NULL) return; if (! ISA_na (bsp->mol)) return; vbp = (ValNodeBlockPtr) userdata; if (vbp == NULL) return; ValNodeAddPointerEx (&(vbp->head), &(vbp->tail), OBJ_BIOSEQ, bsp); } static ValNodePtr CollectNucBioseqs (SeqEntryPtr sep) { ValNodeBlock vnb; if (sep == NULL) return NULL; vnb.head = NULL; vnb.tail = NULL; VisitBioseqsInSep (sep, &vnb, GetNucBioseqCallback); return vnb.head; } static Boolean IsAllCaps (CharPtr str) { CharPtr cp; if (StringHasNoText (str)) return FALSE; cp = str; while (*cp != 0) { if (isalpha (*cp)) { if (islower (*cp)) { return FALSE; } } cp++; } return TRUE; } static Boolean IsAllLowerCase (CharPtr str) { CharPtr cp; if (StringHasNoText (str)) return FALSE; cp = str; while (*cp != 0) { if (isalpha (*cp)) { if (isupper (*cp)) { return FALSE; } } cp++; } return TRUE; } static Boolean IsAllPunctuation (CharPtr str) { CharPtr cp; if (StringHasNoText (str)) return FALSE; cp = str; while (*cp != 0) { if (!ispunct (*cp)) { return FALSE; } cp++; } return TRUE; } static CharPtr PrintPartialOrCompleteDate(DatePtr date) { CharPtr str = NULL; Char year[5]; Char result[15]; if (date == NULL) { return NULL; } str = PrintDate(date); if (str == NULL && date->data[0] > 0 && date->data[1]) { if ((int) (date -> data[1]) < 30) { sprintf(year, "%4d", (int) (date -> data[1] + 2000)); } else { sprintf(year, "%4d", (int) (date -> data[1] + 1900)); } if (date->data[2]) { sprintf(result, "%s %s", NCBI_months[date->data[2] -1 ], year); } else { StringCpy (result, year); } str = StringSave (result); } return str; } static Boolean DoesFeatureMatchRnaType (SeqFeatPtr sfp, RnaFeatTypePtr rt); static Int4 CompareRnaTypes (RnaFeatTypePtr rt1, RnaFeatTypePtr rt2); /* NOTES */ /* When adding a new field type, add implementation to the following functions: * GetFromFieldFromFieldPair * GetToFieldFromFieldPair * BuildFieldPairFromFromField * FieldTypeChoiceFromFieldPairTypeChoice * CompareFieldTypes * IsObjectAppropriateForFieldValue * GetFieldValueForObject * RemoveFieldValueForObject * SetFieldValueForObject * SortFieldsForObject * GetObjectListForFieldType * GetFieldListForFieldType * IsFieldTypeEmpty * AllowFieldMulti * SummarizeFieldType * GetTargetListForRowAndColumn * ReportMissingTargets * CountObjectsForColumnFields */ NLM_EXTERN FeatureFieldPtr FeatureFieldCopy (FeatureFieldPtr orig) { FeatureFieldPtr ff = NULL; if (orig != NULL) { ff = FeatureFieldNew(); ff->type = orig->type; if (orig->field != NULL) { ff->field = AsnIoMemCopy (orig->field, (AsnReadFunc) FeatQualChoiceAsnRead, (AsnWriteFunc) FeatQualChoiceAsnWrite); } } return ff; } NLM_EXTERN FieldTypePtr FieldTypeCopy (FieldTypePtr orig) { FieldTypePtr ft = NULL; RnaQualPtr rq, rq_orig; if (orig != NULL) { if (orig->data.ptrvalue == NULL) { ft = ValNodeNew (NULL); ft->choice = orig->choice; } else if (orig->choice == FieldType_feature_field) { ft = ValNodeNew (NULL); ft->choice = FieldType_feature_field; ft->data.ptrvalue = FeatureFieldCopy (orig->data.ptrvalue); } else if (orig->choice == FieldType_rna_field) { ft = ValNodeNew (NULL); ft->choice = FieldType_rna_field; rq_orig = (RnaQualPtr) orig->data.ptrvalue; rq = RnaQualNew (); rq->field = rq_orig->field; rq->type = AsnIoMemCopy (rq_orig->type, (AsnReadFunc) RnaFeatTypeAsnRead, (AsnWriteFunc) RnaFeatTypeAsnWrite); ft->data.ptrvalue = rq; } else { ft = AsnIoMemCopy (orig, (AsnReadFunc) FieldTypeAsnRead, (AsnWriteFunc) FieldTypeAsnWrite); } } return ft; } /* Functions for handling FieldPairs */ NLM_EXTERN FieldTypePtr GetFromFieldFromFieldPair (FieldPairTypePtr fieldpair) { SourceQualChoicePtr ss = NULL; SourceQualPairPtr sqpp; FeatureFieldPairPtr fp; FeatureFieldPtr fs; RnaQualPairPtr rqp; RnaQualPtr rq; FieldTypePtr f = NULL; CDSGeneProtFieldPairPtr cp; MolinfoFieldPairPtr mp; StructuredCommentFieldPairPtr scfp; DBLinkFieldPairPtr dbfp; ValNodePtr vnp; if (fieldpair == NULL) return NULL; switch (fieldpair->choice) { case FieldPairType_source_qual: sqpp = (SourceQualPairPtr) fieldpair->data.ptrvalue; if (sqpp != NULL) { ss = ValNodeNew (NULL); ss->choice = SourceQualChoice_textqual; ss->data.intvalue = sqpp->field_from; f = ValNodeNew (NULL); f->choice = FieldType_source_qual; f->data.ptrvalue = ss; } break; case FieldPairType_feature_field: fp = (FeatureFieldPairPtr) fieldpair->data.ptrvalue; if (fp != NULL) { fs = FeatureFieldNew (); fs->type = fp->type; fs->field = (FeatQualChoicePtr) AsnIoMemCopy (fp->field_from, (AsnReadFunc) FeatQualChoiceAsnRead, (AsnWriteFunc) FeatQualChoiceAsnWrite); f = ValNodeNew (NULL); f->choice = FieldType_feature_field; f->data.ptrvalue = fs; } break; case FieldPairType_rna_field: rqp = (RnaQualPairPtr) fieldpair->data.ptrvalue; if (rqp != NULL) { rq = RnaQualNew (); if (rqp->type != NULL) { rq->type = AsnIoMemCopy (rqp->type, (AsnReadFunc) RnaFeatTypeAsnRead, (AsnWriteFunc) RnaFeatTypeAsnWrite); } rq->field = rqp->field_from; f = ValNodeNew (NULL); f->choice = FieldType_rna_field; f->data.ptrvalue = rq; } break; case FieldPairType_cds_gene_prot: cp = (CDSGeneProtFieldPairPtr) fieldpair->data.ptrvalue; if (cp != NULL) { f = ValNodeNew (NULL); f->choice = FieldType_cds_gene_prot; f->data.intvalue = cp->field_from; } break; case FieldPairType_molinfo_field: mp = (MolinfoFieldPairPtr) fieldpair->data.ptrvalue; if (mp != NULL && mp->data.ptrvalue != NULL) { vnp = NULL; switch (mp->choice) { case MolinfoFieldPair_molecule: vnp = ValNodeNew (NULL); vnp->choice = MolinfoField_molecule; vnp->data.intvalue = ((MolinfoMoleculePairPtr)mp->data.ptrvalue)->from; break; case MolinfoFieldPair_technique: vnp = ValNodeNew (NULL); vnp->choice = MolinfoField_technique; vnp->data.intvalue = ((MolinfoTechniquePairPtr)mp->data.ptrvalue)->from; break; case MolinfoFieldPair_completedness: vnp = ValNodeNew (NULL); vnp->choice = MolinfoField_completedness; vnp->data.intvalue = ((MolinfoCompletednessPairPtr)mp->data.ptrvalue)->from; break; case MolinfoFieldPair_mol_class: vnp = ValNodeNew (NULL); vnp->choice = MolinfoField_mol_class; vnp->data.intvalue = ((MolinfoMolClassPairPtr)mp->data.ptrvalue)->from; break; case MolinfoFieldPair_topology: vnp = ValNodeNew (NULL); vnp->choice = MolinfoField_topology; vnp->data.intvalue = ((MolinfoTopologyPairPtr)mp->data.ptrvalue)->from; break; case MolinfoFieldPair_strand: vnp = ValNodeNew (NULL); vnp->choice = MolinfoField_strand; vnp->data.intvalue = ((MolinfoStrandPairPtr)mp->data.ptrvalue)->from; break; } if (vnp != NULL) { f = ValNodeNew (NULL); f->choice = FieldType_molinfo_field; f->data.ptrvalue = vnp; } } break; case FieldPairType_struc_comment_field: scfp = (StructuredCommentFieldPairPtr) fieldpair->data.ptrvalue; if (scfp != NULL) { f = ValNodeNew (NULL); f->choice = FieldType_struc_comment_field; f->data.ptrvalue = AsnIoMemCopy (scfp->from, (AsnReadFunc) StructuredCommentFieldAsnRead, (AsnWriteFunc) StructuredCommentFieldAsnWrite); } break; case FieldPairType_dblink: dbfp = (DBLinkFieldPairPtr) fieldpair->data.ptrvalue; if (dbfp != NULL) { f = ValNodeNew (NULL); f->choice = FieldType_dblink; f->data.intvalue = dbfp->from; } break; } return f; } NLM_EXTERN FieldTypePtr GetToFieldFromFieldPair (FieldPairTypePtr fieldpair) { SourceQualChoicePtr ss = NULL; SourceQualPairPtr sqpp; FeatureFieldPairPtr fp; FeatureFieldPtr fs; FieldTypePtr f = NULL; RnaQualPairPtr rqp; RnaQualPtr rq; CDSGeneProtFieldPairPtr cp; MolinfoFieldPairPtr mp; StructuredCommentFieldPairPtr scfp; DBLinkFieldPairPtr dbfp; ValNodePtr vnp; if (fieldpair == NULL) return NULL; switch (fieldpair->choice) { case FieldPairType_source_qual: sqpp = (SourceQualPairPtr) fieldpair->data.ptrvalue; if (sqpp != NULL) { ss = ValNodeNew (NULL); ss->choice = SourceQualChoice_textqual; ss->data.intvalue = sqpp->field_to; f = ValNodeNew (NULL); f->choice = FieldType_source_qual; f->data.ptrvalue = ss; } break; case FieldPairType_feature_field: fp = (FeatureFieldPairPtr) fieldpair->data.ptrvalue; if (fp != NULL) { fs = FeatureFieldNew (); fs->type = fp->type; fs->field = (FeatQualChoicePtr) AsnIoMemCopy (fp->field_to, (AsnReadFunc) FeatQualChoiceAsnRead, (AsnWriteFunc) FeatQualChoiceAsnWrite); f = ValNodeNew (NULL); f->choice = FieldType_feature_field; f->data.ptrvalue = fs; } break; case FieldPairType_rna_field: rqp = (RnaQualPairPtr) fieldpair->data.ptrvalue; if (rqp != NULL) { rq = RnaQualNew (); if (rqp->type != NULL) { rq->type = AsnIoMemCopy (rqp->type, (AsnReadFunc) RnaFeatTypeAsnRead, (AsnWriteFunc) RnaFeatTypeAsnWrite); } rq->field = rqp->field_to; f = ValNodeNew (NULL); f->choice = FieldType_rna_field; f->data.ptrvalue = rq; } break; case FieldPairType_cds_gene_prot: cp = (CDSGeneProtFieldPairPtr) fieldpair->data.ptrvalue; if (cp != NULL) { f = ValNodeNew (NULL); f->choice = FieldType_cds_gene_prot; f->data.intvalue = cp->field_to; } break; case FieldPairType_molinfo_field: mp = (MolinfoFieldPairPtr) fieldpair->data.ptrvalue; if (mp != NULL && mp->data.ptrvalue != NULL) { vnp = NULL; switch (mp->choice) { case MolinfoFieldPair_molecule: vnp = ValNodeNew (NULL); vnp->choice = MolinfoField_molecule; vnp->data.intvalue = ((MolinfoMoleculePairPtr)mp->data.ptrvalue)->to; break; case MolinfoFieldPair_technique: vnp = ValNodeNew (NULL); vnp->choice = MolinfoField_technique; vnp->data.intvalue = ((MolinfoTechniquePairPtr)mp->data.ptrvalue)->to; break; case MolinfoFieldPair_completedness: vnp = ValNodeNew (NULL); vnp->choice = MolinfoField_completedness; vnp->data.intvalue = ((MolinfoCompletednessPairPtr)mp->data.ptrvalue)->to; break; case MolinfoFieldPair_mol_class: vnp = ValNodeNew (NULL); vnp->choice = MolinfoField_mol_class; vnp->data.intvalue = ((MolinfoMolClassPairPtr)mp->data.ptrvalue)->to; break; case MolinfoFieldPair_topology: vnp = ValNodeNew (NULL); vnp->choice = MolinfoField_topology; vnp->data.intvalue = ((MolinfoTopologyPairPtr)mp->data.ptrvalue)->to; break; case MolinfoFieldPair_strand: vnp = ValNodeNew (NULL); vnp->choice = MolinfoField_strand; vnp->data.intvalue = ((MolinfoStrandPairPtr)mp->data.ptrvalue)->to; break; } if (vnp != NULL) { f = ValNodeNew (NULL); f->choice = FieldType_molinfo_field; f->data.ptrvalue = vnp; } } break; case FieldPairType_struc_comment_field: scfp = (StructuredCommentFieldPairPtr) fieldpair->data.ptrvalue; if (scfp != NULL) { f = ValNodeNew (NULL); f->choice = FieldType_struc_comment_field; f->data.ptrvalue = AsnIoMemCopy (scfp->to, (AsnReadFunc) StructuredCommentFieldAsnRead, (AsnWriteFunc) StructuredCommentFieldAsnWrite); } break; case FieldPairType_dblink: dbfp = (DBLinkFieldPairPtr) fieldpair->data.ptrvalue; if (dbfp != NULL) { f = ValNodeNew (NULL); f->choice = FieldType_dblink; f->data.intvalue = dbfp->to; } break; } return f; } NLM_EXTERN FieldPairTypePtr BuildFieldPairFromFromField (FieldTypePtr field_from) { SourceQualChoicePtr ss = NULL; SourceQualPairPtr sqpp; FeatureFieldPairPtr fp; FeatureFieldPtr fs; RnaQualPairPtr rqp; RnaQualPtr rq; CDSGeneProtFieldPairPtr cp; StructuredCommentFieldPairPtr scfp; DBLinkFieldPairPtr dbfp; ValNodePtr mp; MolinfoMoleculePairPtr mol_p; MolinfoTechniquePairPtr tech_p; MolinfoCompletednessPairPtr comp_p; MolinfoMolClassPairPtr class_p; MolinfoTopologyPairPtr topo_p; MolinfoStrandPairPtr strand_p; ValNodePtr vnp; FieldPairTypePtr pair = NULL; if (field_from == NULL) return NULL; switch (field_from->choice) { case FieldType_source_qual: pair = ValNodeNew (NULL); pair->choice = FieldPairType_source_qual; ss = (SourceQualChoicePtr) field_from->data.ptrvalue; if (ss != NULL && ss->choice == SourceQualChoice_textqual) { sqpp = SourceQualPairNew (); sqpp->field_from = ss->data.intvalue; pair->data.ptrvalue = sqpp; } break; case FieldType_feature_field: pair = ValNodeNew (NULL); pair->choice = FieldPairType_feature_field; fs = (FeatureFieldPtr) field_from->data.ptrvalue; if (fs != NULL) { fp = FeatureFieldPairNew (); fp->type = fs->type; fp->field_from = (FeatQualChoicePtr) AsnIoMemCopy (fs->field, (AsnReadFunc) FeatQualChoiceAsnRead, (AsnWriteFunc) FeatQualChoiceAsnWrite); pair->data.ptrvalue = fp; } break; case FieldType_rna_field: pair = ValNodeNew (NULL); pair->choice = FieldPairType_rna_field; rq = (RnaQualPtr) field_from->data.ptrvalue; if (rq != NULL) { rqp = RnaQualPairNew (); if (rq->type != NULL) { rqp->type = AsnIoMemCopy (rq->type, (AsnReadFunc) RnaFeatTypeAsnRead, (AsnWriteFunc) RnaFeatTypeAsnWrite); } rqp->field_from = rq->field; pair->data.ptrvalue = rqp; } break; case FieldType_cds_gene_prot: pair = ValNodeNew (NULL); pair->choice = FieldPairType_cds_gene_prot; cp = CDSGeneProtFieldPairNew (); cp->field_from = field_from->data.intvalue; pair->data.ptrvalue = cp; break; case FieldType_molinfo_field: pair = ValNodeNew (NULL); pair->choice = FieldPairType_molinfo_field; vnp = field_from->data.ptrvalue; if (vnp != NULL) { switch (vnp->choice) { case MolinfoField_molecule: mol_p = MolinfoMoleculePairNew (); mol_p->from = vnp->data.intvalue; mp = ValNodeNew (NULL); mp->choice = MolinfoFieldPair_molecule; mp->data.ptrvalue = mol_p; pair->data.ptrvalue = mp; break; case MolinfoField_technique: tech_p = MolinfoTechniquePairNew (); tech_p->from = vnp->data.intvalue; mp = ValNodeNew (NULL); mp->choice = MolinfoFieldPair_molecule; mp->data.ptrvalue = tech_p; pair->data.ptrvalue = mp; break; case MolinfoField_completedness: comp_p = MolinfoCompletednessPairNew (); comp_p->from = vnp->data.intvalue; mp = ValNodeNew (NULL); mp->choice = MolinfoFieldPair_molecule; mp->data.ptrvalue = comp_p; pair->data.ptrvalue = mp; break; case MolinfoField_mol_class: class_p = MolinfoMolClassPairNew (); class_p->from = vnp->data.intvalue; mp = ValNodeNew (NULL); mp->choice = MolinfoFieldPair_molecule; mp->data.ptrvalue = class_p; pair->data.ptrvalue = mp; break; case MolinfoField_topology: topo_p = MolinfoTopologyPairNew (); topo_p->from = vnp->data.intvalue; mp = ValNodeNew (NULL); mp->choice = MolinfoFieldPair_molecule; mp->data.ptrvalue = topo_p; pair->data.ptrvalue = mp; break; case MolinfoFieldPair_strand: strand_p = MolinfoStrandPairNew (); strand_p->from = vnp->data.intvalue; mp = ValNodeNew (NULL); mp->choice = MolinfoFieldPair_molecule; mp->data.ptrvalue = strand_p; pair->data.ptrvalue = mp; break; } } break; case FieldType_struc_comment_field: pair = ValNodeNew (NULL); pair->choice = FieldPairType_struc_comment_field; scfp = StructuredCommentFieldPairNew (); scfp->from = AsnIoMemCopy (field_from, (AsnReadFunc) StructuredCommentFieldAsnRead, (AsnWriteFunc) StructuredCommentFieldAsnWrite); pair->data.ptrvalue = scfp; break; case FieldType_dblink: pair = ValNodeNew (NULL); pair->choice = FieldPairType_dblink; dbfp = DBLinkFieldPairNew (); dbfp->from = field_from->data.intvalue; pair->data.ptrvalue = dbfp; break; } return pair; } NLM_EXTERN Uint1 FieldTypeChoiceFromFieldPairTypeChoice (Uint1 field_pair_choice) { Uint1 field_type_choice = 0; switch (field_pair_choice) { case FieldPairType_source_qual: field_type_choice = FieldType_source_qual; break; case FieldPairType_feature_field: field_type_choice = FieldType_feature_field; break; case FieldPairType_rna_field: field_type_choice = FieldType_rna_field; break; case FieldPairType_cds_gene_prot: field_type_choice = FieldType_cds_gene_prot; break; case FieldPairType_molinfo_field: field_type_choice = FieldType_molinfo_field; break; case FieldPairType_struc_comment_field: field_type_choice = FieldType_struc_comment_field; break; case FieldPairType_dblink: field_type_choice = FieldType_dblink; break; } return field_type_choice; } /* functions for handling single fields */ static int CompareSourceQuals (VoidPtr ptr1, VoidPtr ptr2) { ValNodePtr vnp1; ValNodePtr vnp2; CharPtr tmp1, tmp2; int rval = 0; if (ptr1 != NULL && ptr2 != NULL) { vnp1 = *((ValNodePtr PNTR) ptr1); vnp2 = *((ValNodePtr PNTR) ptr2); if (vnp1 == NULL && vnp2 == NULL) { rval = 0; } else if (vnp1 == NULL) { rval = -1; } else if (vnp2 == NULL) { rval = 1; } else if (vnp1->choice > vnp2->choice) { rval = 1; } else if (vnp1->choice < vnp2->choice) { rval = -1; } else if (vnp1->choice == SourceQualChoice_textqual) { if (vnp1->data.intvalue == vnp2->data.intvalue) { return 0; } else if (vnp1->data.intvalue == Source_qual_taxname) { return -1; } else if (vnp2->data.intvalue == Source_qual_taxname) { return 1; } else if (vnp1->data.intvalue == Source_qual_taxid) { return -1; } else if (vnp2->data.intvalue == Source_qual_taxid) { return 1; } else { tmp1 = GetSourceQualName(vnp1->data.intvalue); tmp2 = GetSourceQualName (vnp2->data.intvalue); rval = StringCmp (tmp1, tmp2); } } else if (vnp1->data.intvalue > vnp2->data.intvalue) { rval = 1; } else if (vnp1->data.intvalue < vnp2->data.intvalue) { rval = -1; } else { rval = 0; } } return rval; } static int LIBCALLBACK SortVnpByChoiceAndIntvalue (VoidPtr ptr1, VoidPtr ptr2); NLM_EXTERN int CompareFieldTypesEx (FieldTypePtr vnp1, FieldTypePtr vnp2, Boolean use_source_qual_sort) { int rval = 0; FeatureFieldPtr field1, field2; RnaQualPtr rq1, rq2; StructuredCommentFieldPtr scf1, scf2; Int4 v1, v2; if (vnp1 == NULL && vnp2 == NULL) { rval = 0; } else if (vnp1 == NULL) { rval = -1; } else if (vnp2 == NULL) { rval = 1; } else if (vnp1->choice > vnp2->choice) { rval = 1; } else if (vnp1->choice < vnp2->choice) { rval = -1; } else { switch (vnp1->choice) { case FieldType_source_qual: vnp1 = vnp1->data.ptrvalue; vnp2 = vnp2->data.ptrvalue; if (use_source_qual_sort) { rval = CompareSourceQuals(&vnp1, &vnp2); } else { rval = SortVnpByChoiceAndIntvalue (&vnp1, &vnp2); } break; case FieldType_molinfo_field: vnp1 = vnp1->data.ptrvalue; vnp2 = vnp2->data.ptrvalue; rval = SortVnpByChoiceAndIntvalue (&vnp1, &vnp2); break; case FieldType_feature_field: field1 = (FeatureFieldPtr) vnp1->data.ptrvalue; field2 = (FeatureFieldPtr) vnp2->data.ptrvalue; if (field1 == NULL && field2 == NULL) { rval = 0; } else if (field1 == NULL) { rval = -1; } else if (field2 == NULL) { rval = 1; } else if (field1->type < field2->type) { rval = -1; } else if (field1->type > field2->type) { rval = 1; } else if (field1->field == NULL && field2->field == NULL) { rval = 0; } else if (field1->field == NULL) { rval = -1; } else if (field2->field == NULL) { rval = 1; } else if (field1->field->choice < field2->field->choice) { rval = -1; } else if (field1->field->choice > field2->field->choice) { rval = 1; } else { switch (field1->field->choice) { case FeatQualChoice_legal_qual: if (field1->field->data.intvalue < field2->field->data.intvalue) { rval = -1; } else if (field1->field->data.intvalue > field2->field->data.intvalue) { rval = 1; } break; case FeatQualChoice_illegal_qual: rval = 0; break; } } break; case FieldType_cds_gene_prot: case FieldType_pub: case FieldType_misc: if (vnp1->data.intvalue > vnp2->data.intvalue) { rval = 1; } else if (vnp1->data.intvalue < vnp2->data.intvalue) { rval = -1; } break; case FieldType_rna_field: rq1 = (RnaQualPtr) vnp1->data.ptrvalue; rq2 = (RnaQualPtr) vnp2->data.ptrvalue; if (rq1 == NULL && rq2 == NULL) { rval = 0; } else if (rq1 == NULL) { rval = -1; } else if (rq2 == NULL) { rval = 1; } else if ((rval = CompareRnaTypes (rq1->type, rq2->type)) == 0) { if (rq1->field < rq2->field) { rval = -1; } else if (rq1->field > rq2->field) { rval = 1; } else { rval = 0; } } break; case FieldType_struc_comment_field: scf1 = (StructuredCommentFieldPtr) vnp1->data.ptrvalue; scf2 = (StructuredCommentFieldPtr) vnp2->data.ptrvalue; if (scf1 == NULL && scf2 == NULL) { rval = 0; } else if (scf1 == NULL) { rval = -1; } else if (scf2 == NULL) { rval = 1; } else if (scf1->choice < scf2->choice) { rval = -1; } else if (scf1->choice > scf2->choice) { rval = 1; } else if (scf1->choice == StructuredCommentField_named) { rval = StringCmp (scf1->data.ptrvalue, scf2->data.ptrvalue); } break; case FieldType_dblink: v1 = vnp1->data.intvalue; v2 = vnp2->data.intvalue; if (v1 == v2) { rval = 0; } else if (v1 < v2) { rval = -1; } else { rval = 1; } break; } } return rval; } NLM_EXTERN int CompareFieldTypes (FieldTypePtr vnp1, FieldTypePtr vnp2) { return CompareFieldTypesEx (vnp1, vnp2, FALSE); } static Boolean DoFieldTypesMatch (FieldTypePtr field1, FieldTypePtr field2) { if (CompareFieldTypes (field1, field2) == 0) { return TRUE; } else { return FALSE; } } static Int2 FeatureTypeFromCDSGeneProtField (Uint2 cds_gene_prot_field); NLM_EXTERN Int2 FeatureTypeFromFieldType (FieldTypePtr field) { Int2 feat_type = Macro_feature_type_any; FeatureFieldPtr ffp; RnaQualPtr rq; if (field == NULL) { feat_type = Macro_feature_type_any; } else { switch (field->choice) { case FieldType_source_qual: feat_type = Macro_feature_type_biosrc; break; case FieldType_feature_field: ffp = (FeatureFieldPtr) field->data.ptrvalue; if (ffp != NULL) { feat_type = ffp->type; } break; case FieldType_rna_field: rq = (RnaQualPtr) field->data.ptrvalue; if (rq != NULL) { feat_type = GetFeatureTypeForRnaType (rq->type->choice); } break; case FieldType_cds_gene_prot: feat_type = FeatureTypeFromCDSGeneProtField (field->data.intvalue); break; } } return feat_type; } NLM_EXTERN Boolean IsFeatureFieldEmpty (FeatureFieldPtr field) { if (field == NULL) return TRUE; if (field->field == NULL) return TRUE; return FALSE; } NLM_EXTERN ValNodePtr MakeFeatureFieldField (Uint2 ftype, Int4 legalqual) { FeatureFieldPtr ff; ValNodePtr field; ff = FeatureFieldNew(); ff->type = ftype; ff->field = ValNodeNew (NULL); ff->field->choice = FeatQualChoice_legal_qual; ff->field->data.intvalue = legalqual; field = ValNodeNew (NULL); field->choice = FieldType_feature_field; field->data.ptrvalue = ff; return field; } NLM_EXTERN Boolean IsRnaQualEmpty (RnaQualPtr rq) { if (rq == NULL) return TRUE; return FALSE; } NLM_EXTERN Boolean IsFieldTypeEmpty (FieldTypePtr field) { Boolean rval = TRUE; ValNodePtr vnp; if (field == NULL) return TRUE; switch (field->choice) { case FieldType_source_qual: if (field->data.ptrvalue != NULL) { rval = FALSE; } break; case FieldType_feature_field: if (!IsFeatureFieldEmpty (field->data.ptrvalue)) { rval = FALSE; } break; case FieldType_cds_gene_prot: rval = FALSE; break; case FieldType_pub: rval = FALSE; break; case FieldType_rna_field: rval = IsRnaQualEmpty (field->data.ptrvalue); break; case FieldType_struc_comment_field: vnp = field->data.ptrvalue; if (vnp == NULL || (vnp->choice == StructuredCommentField_named && StringHasNoText (vnp->data.ptrvalue)) || (vnp->choice != StructuredCommentField_named && vnp->choice != StructuredCommentField_database)) { rval = TRUE; } else { rval = FALSE; } break; case FieldType_dblink: if (field->data.intvalue < 1) { rval = TRUE; } else { rval = FALSE; } case FieldType_misc: rval = FALSE; break; case FieldType_molinfo_field: rval = FALSE; break; } return rval; } NLM_EXTERN Boolean AllowFieldMulti (FieldTypePtr field) { Boolean rval = FALSE; FeatureFieldPtr feature_field; if (field == NULL) return FALSE; switch (field->choice) { case FieldType_source_qual: rval = AllowSourceQualMulti (field->data.ptrvalue); break; case FieldType_feature_field: feature_field = (FeatureFieldPtr) field->data.ptrvalue; if (feature_field != NULL && feature_field->field != NULL && feature_field->field->choice == FeatQualChoice_legal_qual && (feature_field->field->data.intvalue == Feat_qual_legal_db_xref || feature_field->field->data.intvalue == Feat_qual_legal_ec_number)) { rval = TRUE; } break; case FieldType_cds_gene_prot: if (field->data.intvalue == CDSGeneProt_field_prot_ec_number || field->data.intvalue == CDSGeneProt_field_mat_peptide_ec_number || field->data.intvalue == CDSGeneProt_field_gene_synonym) { rval = TRUE; } break; case FieldType_pub: break; case FieldType_rna_field: if (field->data.intvalue == Rna_field_gene_synonym) { rval = TRUE; } break; case FieldType_struc_comment_field: break; case FieldType_dblink: rval = TRUE; break; case FieldType_misc: if (field->data.intvalue == Misc_field_keyword) { rval = TRUE; } break; } return rval; } NLM_EXTERN Boolean IsUserObjectStructuredComment (UserObjectPtr uop) { if (uop != NULL && uop->type != NULL && StringCmp (uop->type->str, "StructuredComment") == 0) { return TRUE; } else { return FALSE; } } static Boolean IsEmptyStructuredComment (UserObjectPtr uop) { if (!IsUserObjectStructuredComment(uop)) { return FALSE; } if (uop->data == NULL) { return TRUE; } else { return FALSE; } } static Boolean IsUserObjectDBLink (UserObjectPtr uop) { if (uop != NULL && uop->type != NULL && StringCmp (uop->type->str, "DBLink") == 0) { return TRUE; } else { return FALSE; } } static Boolean IsEmptyDBLink (UserObjectPtr uop) { if (uop != NULL && uop->type != NULL && StringCmp (uop->type->str, "DBLink") == 0) { if (uop->data == NULL) { return TRUE; } else { return FALSE; } } else { return FALSE; } } static Boolean IsObjectAppropriateForFieldValue (Uint1 choice, Pointer data, FieldTypePtr field) { SeqFeatPtr sfp; SeqDescrPtr sdp; FeatureFieldPtr fp; RnaQualPtr rq; Boolean rval = FALSE; if (data == NULL || field == NULL) return FALSE; switch (field->choice) { case FieldType_source_qual : if (choice == OBJ_SEQFEAT) { sfp = (SeqFeatPtr) data; if (sfp->data.choice == SEQFEAT_BIOSRC) { rval = TRUE; } } else if (choice == OBJ_SEQDESC) { sdp = (SeqDescrPtr) data; if (sdp->choice == Seq_descr_source) { rval = TRUE; } } break; case FieldType_feature_field : if (choice == OBJ_SEQFEAT) { sfp = (SeqFeatPtr) data; fp = (FeatureFieldPtr) field->data.ptrvalue; if (fp != NULL && (fp->type == Macro_feature_type_any || GetFeatdefFromFeatureType (fp->type) == sfp->idx.subtype)) { rval = TRUE; } } break; case FieldType_rna_field : if (choice == OBJ_SEQFEAT) { sfp = (SeqFeatPtr) data; rq = (RnaQualPtr) field->data.ptrvalue; if (rq != NULL && DoesFeatureMatchRnaType (sfp, rq->type)) { rval = TRUE; } } break; case FieldType_cds_gene_prot : if (choice == 0) { rval = TRUE; } break; case FieldType_molinfo_field : if (choice == OBJ_BIOSEQ) { rval = TRUE; } break; case FieldType_pub: if (choice == OBJ_SEQFEAT) { sfp = (SeqFeatPtr) data; if (sfp->data.choice == SEQFEAT_PUB) { rval = TRUE; } } else if (choice == OBJ_SEQDESC) { sdp = (SeqDescrPtr) data; if (sdp->choice == Seq_descr_pub) { rval = TRUE; } } break; case FieldType_struc_comment_field: if (choice == OBJ_SEQDESC) { sdp = (SeqDescrPtr) data; if (sdp->choice == Seq_descr_user && IsUserObjectStructuredComment (sdp->data.ptrvalue)) { rval = TRUE; } } break; case FieldType_dblink: if (choice == OBJ_SEQDESC) { sdp = (SeqDescrPtr) data; if (sdp->choice == Seq_descr_user && IsUserObjectDBLink (sdp->data.ptrvalue)) { rval = TRUE; } } break; case FieldType_misc: if (choice == OBJ_BIOSEQ && field->data.intvalue == Misc_field_genome_project_id) { rval = TRUE; } else if (choice == OBJ_SEQDESC && field->data.intvalue == Misc_field_comment_descriptor && (sdp = (SeqDescrPtr) data) != NULL && sdp->choice == Seq_descr_comment) { rval = TRUE; } else if (choice == OBJ_SEQDESC && field->data.intvalue == Misc_field_defline && (sdp = (SeqDescrPtr) data) != NULL && sdp->choice == Seq_descr_title) { rval = TRUE; } else if (choice == OBJ_SEQDESC && field->data.intvalue == Misc_field_keyword && (sdp = (SeqDescrPtr) data) != NULL && sdp->choice == Seq_descr_genbank) { rval = TRUE; } break; } return rval; } static Boolean IsObjectAppropriateForFieldPair (Uint1 choice, Pointer data, FieldPairTypePtr fieldpair) { FieldTypePtr f; Boolean rval; f = GetFromFieldFromFieldPair(fieldpair); rval = IsObjectAppropriateForFieldValue(choice, data, f); f = FieldTypeFree (f); return rval; } /* structure and create/free functions for CGPSet, used for handling CDS-Gene-Prot sets */ typedef struct cgpset { ValNodePtr cds_list; ValNodePtr gene_list; ValNodePtr prot_list; ValNodePtr mrna_list; } CGPSetData, PNTR CGPSetPtr; static CGPSetPtr CGPSetNew (void) { CGPSetPtr c; c = (CGPSetPtr) MemNew (sizeof (CGPSetData)); c->cds_list = NULL; c->gene_list = NULL; c->prot_list = NULL; c->mrna_list = NULL; return c; } static CGPSetPtr CGPSetFree (CGPSetPtr c) { if (c != NULL) { c->cds_list = ValNodeFree (c->cds_list); c->gene_list = ValNodeFree (c->gene_list); c->prot_list = ValNodeFree (c->prot_list); c->mrna_list = ValNodeFree (c->mrna_list); c = MemFree (c); } return c; } static ValNodePtr FreeCGPSetList (ValNodePtr vnp) { ValNodePtr vnp_next; while (vnp != NULL) { vnp_next = vnp->next; vnp->next = NULL; vnp->data.ptrvalue = CGPSetFree (vnp->data.ptrvalue); vnp = ValNodeFree (vnp); vnp = vnp_next; } return NULL; } static CGPSetPtr BuildCGPSetFromCodingRegion (SeqFeatPtr cds, BoolPtr indexing_needed); static CGPSetPtr BuildCGPSetFromGene (SeqFeatPtr gene); static CGPSetPtr BuildCGPSetFrommRNA (SeqFeatPtr mrna); /* generic functions for mapping fields */ typedef struct feattypefeatdef { Int4 feattype; Int4 featdef; CharPtr featname; } FeatTypeFeatDefData, PNTR FeatTypeFeatDefPtr; static FeatTypeFeatDefData feattype_featdef[] = { { Macro_feature_type_any , FEATDEF_ANY , "any" } , { Macro_feature_type_gene , FEATDEF_GENE , "gene" } , { Macro_feature_type_org , FEATDEF_ORG , "org" } , { Macro_feature_type_cds , FEATDEF_CDS , "CDS" } , { Macro_feature_type_prot , FEATDEF_PROT , "Protein" } , { Macro_feature_type_preRNA , FEATDEF_preRNA , "preRNA" } , { Macro_feature_type_mRNA , FEATDEF_mRNA , "mRNA" } , { Macro_feature_type_tRNA , FEATDEF_tRNA , "tRNA" } , { Macro_feature_type_rRNA , FEATDEF_rRNA , "rRNA" } , { Macro_feature_type_snRNA , FEATDEF_snRNA , "snRNA" } , { Macro_feature_type_scRNA , FEATDEF_scRNA , "scRNA" } , { Macro_feature_type_otherRNA , FEATDEF_otherRNA , "misc_RNA" } , { Macro_feature_type_pub , FEATDEF_PUB , "pub" } , { Macro_feature_type_seq , FEATDEF_SEQ , "seq" } , { Macro_feature_type_imp , FEATDEF_IMP , "imp" } , { Macro_feature_type_allele , FEATDEF_allele , "allele" } , { Macro_feature_type_attenuator , FEATDEF_attenuator , "attenuator" } , { Macro_feature_type_c_region , FEATDEF_C_region , "c_region" } , { Macro_feature_type_caat_signal , FEATDEF_CAAT_signal , "caat_signal" } , { Macro_feature_type_imp_CDS , FEATDEF_Imp_CDS , "imp_CDS" } , { Macro_feature_type_d_loop , FEATDEF_D_loop , "d_loop" } , { Macro_feature_type_d_segment , FEATDEF_D_segment , "d_segment" } , { Macro_feature_type_enhancer , FEATDEF_enhancer , "enhancer" } , { Macro_feature_type_exon , FEATDEF_exon , "exon" } , { Macro_feature_type_gC_signal , FEATDEF_GC_signal , "gC_signal" } , { Macro_feature_type_iDNA , FEATDEF_iDNA , "iDNA" } , { Macro_feature_type_intron , FEATDEF_intron , "intron" } , { Macro_feature_type_j_segment , FEATDEF_J_segment , "j_segment" } , { Macro_feature_type_ltr , FEATDEF_LTR , "LTR" } , { Macro_feature_type_mat_peptide , FEATDEF_mat_peptide , "mat_peptide" } , { Macro_feature_type_misc_binding , FEATDEF_misc_binding , "misc_binding" } , { Macro_feature_type_misc_difference , FEATDEF_misc_difference , "misc_difference" } , { Macro_feature_type_misc_feature , FEATDEF_misc_feature , "misc_feature" } , { Macro_feature_type_misc_recomb , FEATDEF_misc_recomb , "misc_recomb" } , { Macro_feature_type_misc_RNA , FEATDEF_otherRNA , "misc_RNA" } , { Macro_feature_type_misc_signal , FEATDEF_misc_signal , "misc_signal" } , { Macro_feature_type_misc_structure , FEATDEF_misc_structure , "misc_structure" } , { Macro_feature_type_modified_base , FEATDEF_modified_base , "modified_base" } , { Macro_feature_type_mutation , FEATDEF_mutation , "mutation" } , { Macro_feature_type_n_region , FEATDEF_N_region , "n_region" } , { Macro_feature_type_old_sequence , FEATDEF_old_sequence , "old_sequence" } , { Macro_feature_type_polyA_signal , FEATDEF_polyA_signal , "polyA_signal" } , { Macro_feature_type_polyA_site , FEATDEF_polyA_site , "polyA_site" } , { Macro_feature_type_precursor_RNA , FEATDEF_preRNA , "precursor_RNA" } , { Macro_feature_type_prim_transcript , FEATDEF_prim_transcript , "prim_transcript" } , { Macro_feature_type_primer_bind , FEATDEF_primer_bind , "primer_bind" } , { Macro_feature_type_promoter , FEATDEF_promoter , "promoter" } , { Macro_feature_type_protein_bind , FEATDEF_protein_bind , "protein_bind" } , { Macro_feature_type_rbs , FEATDEF_RBS , "rbs" } , { Macro_feature_type_repeat_region , FEATDEF_repeat_region , "repeat_region" } , { Macro_feature_type_rep_origin , FEATDEF_rep_origin , "rep_origin" } , { Macro_feature_type_s_region , FEATDEF_S_region , "s_region" } , { Macro_feature_type_sig_peptide , FEATDEF_sig_peptide , "sig_peptide" } , { Macro_feature_type_source , FEATDEF_source , "source" } , { Macro_feature_type_stem_loop , FEATDEF_stem_loop , "stem_loop" } , { Macro_feature_type_sts , FEATDEF_STS , "sts" } , { Macro_feature_type_tata_signal , FEATDEF_TATA_signal , "tata_signal" } , { Macro_feature_type_terminator , FEATDEF_terminator , "terminator" } , { Macro_feature_type_transit_peptide , FEATDEF_transit_peptide , "transit_peptide" } , { Macro_feature_type_unsure , FEATDEF_unsure , "unsure" } , { Macro_feature_type_v_region , FEATDEF_V_region , "v_region" } , { Macro_feature_type_v_segment , FEATDEF_V_segment , "v_segment" } , { Macro_feature_type_variation , FEATDEF_variation , "variation" } , { Macro_feature_type_virion , FEATDEF_virion , "virion" } , { Macro_feature_type_n3clip , FEATDEF_3clip , "3'clip" } , { Macro_feature_type_n3UTR , FEATDEF_3UTR , "3'UTR" } , { Macro_feature_type_n5clip , FEATDEF_5clip , "5'clip" } , { Macro_feature_type_n5UTR , FEATDEF_5UTR , "5'UTR" } , { Macro_feature_type_n10_signal , FEATDEF_10_signal , "10_signal" } , { Macro_feature_type_n35_signal , FEATDEF_35_signal , "35_signal" } , { Macro_feature_type_site_ref , FEATDEF_site_ref , "site_ref" } , { Macro_feature_type_region , FEATDEF_REGION , "region" } , { Macro_feature_type_comment , FEATDEF_COMMENT , "comment" } , { Macro_feature_type_bond , FEATDEF_BOND , "bond" } , { Macro_feature_type_site , FEATDEF_SITE , "site" } , { Macro_feature_type_rsite , FEATDEF_RSITE , "rsite" } , { Macro_feature_type_user , FEATDEF_USER , "user" } , { Macro_feature_type_txinit , FEATDEF_TXINIT , "txinit" } , { Macro_feature_type_num , FEATDEF_NUM , "num" } , { Macro_feature_type_psec_str , FEATDEF_PSEC_STR , "psec_str" } , { Macro_feature_type_non_std_residue , FEATDEF_NON_STD_RESIDUE , "non_std_residue" } , { Macro_feature_type_het , FEATDEF_HET , "het" } , { Macro_feature_type_biosrc , FEATDEF_BIOSRC , "biosrc" } , { Macro_feature_type_preprotein , FEATDEF_preprotein , "preprotein" } , { Macro_feature_type_mat_peptide_aa , FEATDEF_mat_peptide_aa , "mat_peptide_aa" } , { Macro_feature_type_sig_peptide_aa , FEATDEF_sig_peptide_aa , "sig_peptide_aa" } , { Macro_feature_type_transit_peptide_aa , FEATDEF_transit_peptide_aa , "transit_peptide_aa" } , { Macro_feature_type_snoRNA , FEATDEF_snoRNA , "snoRNA" } , { Macro_feature_type_gap , FEATDEF_gap , "gap" } , { Macro_feature_type_operon , FEATDEF_operon , "operon" } , { Macro_feature_type_oriT , FEATDEF_oriT , "oriT" } , { Macro_feature_type_ncRNA , FEATDEF_ncRNA , "ncRNA" } , { Macro_feature_type_tmRNA , FEATDEF_tmRNA , "tmRNA" } , { Macro_feature_type_mobile_element, FEATDEF_mobile_element, "mobile_element" } , { Macro_feature_type_regulatory, FEATDEF_regulatory, "regulatory" } }; #define NUM_feattype_featdef sizeof (feattype_featdef) / sizeof (FeatTypeFeatDefData) NLM_EXTERN Int4 GetFeatdefFromFeatureType (Int4 feature_type) { Int4 i; for (i = 0; i < NUM_feattype_featdef; i++) { if (feature_type == feattype_featdef[i].feattype) { return feattype_featdef[i].featdef; } } return FEATDEF_BAD; } NLM_EXTERN Int4 GetFeatureTypeFromFeatdef (Int4 featdef) { Int4 i; for (i = 0; i < NUM_feattype_featdef; i++) { if (featdef == feattype_featdef[i].featdef) { return feattype_featdef[i].feattype; } } return FEATDEF_BAD; } NLM_EXTERN CharPtr GetFeatureNameFromFeatureType (Int4 feature_type) { CharPtr str = NULL; Int4 i; for (i = 0; i < NUM_feattype_featdef && str == NULL; i++) { if (feature_type == feattype_featdef[i].feattype) { str = feattype_featdef[i].featname; } } if (str == NULL) { str = "Unknown feature type"; } return str; } static Boolean Matchnamestring (CharPtr name1, CharPtr name2) { if (name1 == NULL && name2 == NULL) { return TRUE; } else if (name1 == NULL || name2 == NULL) { return FALSE; } else { while (*name1 != 0 && *name2 != 0) { while (*name1 == ' ' || *name1 == '-' || *name1 == '_') { name1++; } while (*name2 == ' ' || *name2 == '-' || *name2 == '_') { name2++; } if (tolower (*name1) != tolower(*name2)) { return FALSE; } name1++; name2++; } if (*name1 == 0 && *name2 == 0) { return TRUE; } else { return FALSE; } } } typedef struct stringalias { CharPtr alias; CharPtr canonical; } StringAliasData, PNTR StringAliasPtr; static CharPtr GetCanonical (CharPtr str, StringAliasPtr alias_list) { Int4 i; if (alias_list == NULL) { return str; } for (i = 0; alias_list[i].alias != NULL; i++) { if (Matchnamestring (str, alias_list[i].alias)) { return alias_list[i].canonical; } } return str; } NLM_EXTERN Int4 GetFeatureTypeByName (CharPtr feat_name) { Int4 i; for (i = 0; i < NUM_feattype_featdef; i++) { if (Matchnamestring (feattype_featdef[i].featname, feat_name)) { return feattype_featdef[i].feattype; } } return -1; } NLM_EXTERN void AddImportFeaturesToChoiceList (ValNodePtr PNTR feature_type_list) { Int4 i, seqfeattype; CharPtr featname; ValNodePtr tmp_list = NULL; for (i = 1; i < NUM_feattype_featdef; i++) { if (feattype_featdef[i].feattype == Macro_feature_type_gap) continue; if (feattype_featdef[i].feattype == Macro_feature_type_conflict) continue; if (IsRegulatorySubtype(feattype_featdef[i].featdef)) continue; seqfeattype = FindFeatFromFeatDefType (feattype_featdef[i].featdef); if (seqfeattype == SEQFEAT_IMP) { featname = GetFeatureNameFromFeatureType (feattype_featdef[i].feattype); if (featname != NULL) { ValNodeAddPointer (&tmp_list, feattype_featdef[i].feattype, StringSave (featname)); } } } tmp_list = ValNodeSort (tmp_list, SortVnpByString); ValNodeLink (feature_type_list, tmp_list); } static Boolean IsMostUsedFeature (Uint1 val) { if (val == Macro_feature_type_gene || val == Macro_feature_type_cds || val == Macro_feature_type_prot || val == Macro_feature_type_exon || val == Macro_feature_type_intron || val == Macro_feature_type_mRNA || val == Macro_feature_type_rRNA || val == Macro_feature_type_otherRNA || val == Macro_feature_type_misc_feature) { return TRUE; } else { return FALSE; } } static int LIBCALLBACK SortVnpByFeatureName (VoidPtr ptr1, VoidPtr ptr2) { CharPtr str1; CharPtr str2; ValNodePtr vnp1; ValNodePtr vnp2; Boolean most_used1, most_used2; if (ptr1 != NULL && ptr2 != NULL) { vnp1 = *((ValNodePtr PNTR) ptr1); vnp2 = *((ValNodePtr PNTR) ptr2); if (vnp1 != NULL && vnp2 != NULL) { most_used1 = IsMostUsedFeature (vnp1->choice); most_used2 = IsMostUsedFeature (vnp2->choice); if (most_used1 && !most_used2) { return -1; } else if (!most_used1 && most_used2) { return 1; } else { str1 = (CharPtr) vnp1->data.ptrvalue; str2 = (CharPtr) vnp2->data.ptrvalue; if (str1 != NULL && str2 != NULL) { return StringICmp (str1, str2); } } } } return 0; } NLM_EXTERN void AddAllFeaturesToChoiceList (ValNodePtr PNTR feature_type_list) { Int4 i; CharPtr featname; ValNodePtr tmp_list = NULL; for (i = 1; i < NUM_feattype_featdef; i++) { if (feattype_featdef[i].feattype == Macro_feature_type_gap) continue; if (IsRegulatorySubtype(feattype_featdef[i].featdef)) continue; featname = GetFeatureNameFromFeatureType (feattype_featdef[i].feattype); if (featname != NULL) { ValNodeAddPointer (&tmp_list, feattype_featdef[i].feattype, StringSave (featname)); } } tmp_list = ValNodeSort (tmp_list, SortVnpByFeatureName); ValNodeLink (feature_type_list, tmp_list); } typedef struct featqualgbqual { Int4 featqual; Int4 gbqual; Int4 subfield; CharPtr qualname; } FeatQualGBQualData, PNTR FeatQualGBQualPtr; static FeatQualGBQualData featqual_gbqual[] = { { Feat_qual_legal_allele , GBQUAL_allele , 0, "allele" } , { Feat_qual_legal_anticodon , GBQUAL_anticodon , 0, "anticodon" } , { Feat_qual_legal_bound_moiety , GBQUAL_bound_moiety , 0, "bound-moiety" } , { Feat_qual_legal_chromosome , GBQUAL_chromosome , 0, "chromosome" } , { Feat_qual_legal_citation , GBQUAL_citation , 0, "citation" } , { Feat_qual_legal_codon , GBQUAL_codon , 0, "codon" } , { Feat_qual_legal_codon_start , GBQUAL_codon_start , 0, "codon-start" } , { Feat_qual_legal_compare , GBQUAL_compare , 0, "compare" } , { Feat_qual_legal_cons_splice , GBQUAL_cons_splice , 0, "cons-splice" } , { Feat_qual_legal_db_xref , GBQUAL_db_xref , 0, "db-xref" } , { Feat_qual_legal_direction , GBQUAL_direction , 0, "direction" } , { Feat_qual_legal_ec_number , GBQUAL_EC_number , 0, "EC number" } , { Feat_qual_legal_environmental_sample , GBQUAL_environmental_sample , 0, "environmental-sample" } , { Feat_qual_legal_evidence , GBQUAL_evidence , 0, "evidence" } , { Feat_qual_legal_exception , GBQUAL_exception , 0, "exception" } , { Feat_qual_legal_experiment , GBQUAL_experiment , 0, "experiment" } , { Feat_qual_legal_focus , GBQUAL_focus , 0, "focus" } , { Feat_qual_legal_frequency , GBQUAL_frequency , 0, "frequency" } , { Feat_qual_legal_function , GBQUAL_function , 0, "function" } , { Feat_qual_legal_gene , GBQUAL_gene , 0, "locus" } , { Feat_qual_legal_inference , GBQUAL_inference , 0, "inference" } , { Feat_qual_legal_location , -1 , 0, "location" } , { Feat_qual_legal_locus_tag , GBQUAL_locus_tag , 0, "locus-tag" } , { Feat_qual_legal_map , GBQUAL_map , 0, "map" } , { Feat_qual_legal_mobile_element_type , GBQUAL_mobile_element_type , 0, "mobile-element-type" } , { Feat_qual_legal_mobile_element_type_type , GBQUAL_mobile_element_type , 1, "mobile-element-type-type"} , { Feat_qual_legal_mobile_element_name , GBQUAL_mobile_element_type , 2, "mobile-element-name"} , { Feat_qual_legal_mod_base , GBQUAL_mod_base , 0, "mod-base" } , { Feat_qual_legal_mol_type , GBQUAL_mol_type , 0, "mol-type" } , { Feat_qual_legal_name, -1 , 0 , "name" } , { Feat_qual_legal_ncRNA_class , GBQUAL_ncRNA_class , 0, "ncRNA-class" } , { Feat_qual_legal_note , GBQUAL_note , 0, "note" } , { Feat_qual_legal_number , GBQUAL_number , 0, "number" } , { Feat_qual_legal_old_locus_tag , GBQUAL_old_locus_tag , 0, "old-locus-tag" } , { Feat_qual_legal_operon , GBQUAL_operon , 0, "operon" } , { Feat_qual_legal_organism , GBQUAL_organism , 0, "organism" } , { Feat_qual_legal_organelle , GBQUAL_organelle , 0, "organelle" } , { Feat_qual_legal_partial , GBQUAL_partial , 0, "partial" } , { Feat_qual_legal_pcr_conditions, GBQUAL_PCR_conditions , 0, "pcr-conditions" } , { Feat_qual_legal_phenotype , GBQUAL_phenotype , 0, "phenotype" } , { Feat_qual_legal_plasmid , GBQUAL_plasmid , 0, "plasmid" } , { Feat_qual_legal_product , GBQUAL_product , 0, "product" } , { Feat_qual_legal_protein_id , GBQUAL_protein_id , 0, "protein-id" } , { Feat_qual_legal_pseudo , GBQUAL_pseudogene , 0, "pseudogene" } , { Feat_qual_legal_rearranged , GBQUAL_rearranged , 0, "rearranged" } , { Feat_qual_legal_regulatory_class , GBQUAL_regulatory_class , 0, "regulatory-class" } , { Feat_qual_legal_replace , GBQUAL_replace , 0, "replace" } , { Feat_qual_legal_rpt_family , GBQUAL_rpt_family , 0, "rpt-family" } , { Feat_qual_legal_rpt_type , GBQUAL_rpt_type , 0, "rpt-type" } , { Feat_qual_legal_rpt_unit , GBQUAL_rpt_unit , 0, "rpt-unit" } , { Feat_qual_legal_rpt_unit_seq , GBQUAL_rpt_unit_seq , 0, "rpt-unit-seq" } , { Feat_qual_legal_rpt_unit_range , GBQUAL_rpt_unit_range , 0, "rpt-unit-range" } , { Feat_qual_legal_satellite , GBQUAL_satellite , 0, "satellite" } , { Feat_qual_legal_satellite_type , GBQUAL_satellite, 1, "satellite-type"} , { Feat_qual_legal_satellite_name , GBQUAL_satellite, 2, "satellite-name"} , { Feat_qual_legal_segment , GBQUAL_segment , 0, "segment" } , { Feat_qual_legal_sequenced_mol , GBQUAL_sequenced_mol , 0, "sequenced-mol" } , { Feat_qual_legal_standard_name , GBQUAL_standard_name , 0, "standard-name" } , { Feat_qual_legal_tag_peptide , GBQUAL_tag_peptide , 0, "tag-peptide" } , { Feat_qual_legal_transcript_id , GBQUAL_transcript_id , 0, "transcript-id" } , { Feat_qual_legal_transgenic , GBQUAL_transgenic , 0, "transgenic" } , { Feat_qual_legal_translation , GBQUAL_translation , 0, "translation" } , { Feat_qual_legal_transl_except , GBQUAL_transl_except , 0, "transl-except" } , { Feat_qual_legal_transl_table , GBQUAL_transl_table , 0, "transl-table" } , { Feat_qual_legal_usedin , GBQUAL_usedin , 0, "usedin" } }; #define NUM_featqual_gbqual sizeof (featqual_gbqual) / sizeof (FeatQualGBQualData) NLM_EXTERN Int4 GetNumFeatQual (void) { return NUM_featqual_gbqual; } static Int4 GetGBQualFromFeatQual (Int4 featqual, Int4Ptr subfield) { Int4 i; for (i = 0; i < NUM_featqual_gbqual; i++) { if (featqual == featqual_gbqual[i].featqual) { if (subfield != NULL) { *subfield = featqual_gbqual[i].subfield; } return featqual_gbqual[i].gbqual; } } return -1; } static Int4 GetFeatQualByGBQualAndSubfield (Int4 gbqual, Int4 subfield) { Int4 i; for (i = 0; i < NUM_featqual_gbqual; i++) { if (featqual_gbqual[i].gbqual == gbqual && featqual_gbqual[i].subfield == subfield) { return featqual_gbqual[i].featqual; } } return -1; } NLM_EXTERN CharPtr GetFeatQualName (Int4 featqual) { Int4 i; for (i = 0; i < NUM_featqual_gbqual; i++) { if (featqual == featqual_gbqual[i].featqual) { return featqual_gbqual[i].qualname; } } return NULL; } NLM_EXTERN Int4 GetFeatQualByName (CharPtr qualname) { Int4 i; for (i = 0; i < NUM_featqual_gbqual; i++) { if (Matchnamestring (featqual_gbqual[i].qualname, qualname)) { return featqual_gbqual[i].featqual; } } return -1; } static Int4 NumGbQualSubfields (Int4 gbqual) { Int4 i, num_subfields = 0; for (i = 0; i < NUM_featqual_gbqual; i++) { if (featqual_gbqual[i].gbqual == gbqual) { if (featqual_gbqual[i].subfield > num_subfields) { num_subfields = featqual_gbqual[i].subfield; } } } return num_subfields; } NLM_EXTERN void AddAllFeatureFieldsToChoiceList (ValNodePtr PNTR field_list) { Int4 i; for (i = 0; i < NUM_featqual_gbqual; i++) { ValNodeAddPointer (field_list, featqual_gbqual[i].featqual, StringSave (featqual_gbqual[i].qualname)); } } NLM_EXTERN CharPtr SummarizeFeatQual (ValNodePtr qual) { if (qual == NULL) { return StringSave ("unspecified qualifier"); } else if (qual->choice == FeatQualChoice_legal_qual) { return StringSave (GetFeatQualName (qual->data.intvalue)); } else if (qual->choice == FeatQualChoice_illegal_qual) { return StringSave (qual->data.ptrvalue); } else { return StringSave ("unspecified qualifier"); } } /* functions for RnaQual values */ /* functions for RnaType values */ typedef struct rnatypemap { Int4 rnatype; Int4 rnaval; Int4 featuretype; CharPtr rnaname; } RnaTypeMapData, PNTR RnaTypeMapPtr; static RnaTypeMapData rnatypemap[] = { { RnaFeatType_preRNA , RNA_TYPE_premsg, Macro_feature_type_preRNA, "preRNA" } , { RnaFeatType_mRNA , RNA_TYPE_mRNA, Macro_feature_type_mRNA, "mRNA" } , { RnaFeatType_tRNA , RNA_TYPE_tRNA, Macro_feature_type_tRNA, "tRNA" } , { RnaFeatType_rRNA , RNA_TYPE_rRNA, Macro_feature_type_rRNA, "rRNA" } , { RnaFeatType_ncRNA , RNA_TYPE_ncRNA , Macro_feature_type_ncRNA, "ncRNA" } , { RnaFeatType_tmRNA , RNA_TYPE_tmRNA , Macro_feature_type_tmRNA, "tmRNA" } , { RnaFeatType_miscRNA , RNA_TYPE_misc_RNA , Macro_feature_type_misc_RNA, "misc_RNA" } }; #define NUM_rnatypemap sizeof (rnatypemap) / sizeof (RnaTypeMapData) static CharPtr GetNameForRnaType (Int4 rnatype) { Int4 i; for (i = 0; i < NUM_rnatypemap; i++) { if (rnatypemap[i].rnatype == rnatype) { return rnatypemap[i].rnaname; } } return NULL; } static Int4 GetRnaTypeForName (CharPtr rnaname) { Int4 i; for (i = 0; i < NUM_rnatypemap; i++) { if (StringCmp (rnatypemap[i].rnaname, rnaname) == 0) { return rnatypemap[i].rnatype; } } return -1; } static Int4 GetRnaValForRnaType (Int4 rnatype) { Int4 i; for (i = 0; i < NUM_rnatypemap; i++) { if (rnatypemap[i].rnatype == rnatype) { return rnatypemap[i].rnaval; } } return -1; } NLM_EXTERN Int4 GetFeatureTypeForRnaType (Int4 rnatype) { Int4 i; for (i = 0; i < NUM_rnatypemap; i++) { if (rnatypemap[i].rnatype == rnatype) { return rnatypemap[i].featuretype; } } return -1; } NLM_EXTERN ValNodePtr GetRNATypeList (void) { Int4 i; ValNodePtr list = NULL; for (i = 0; i < NUM_rnatypemap; i++) { ValNodeAddPointer (&list, rnatypemap[i].rnatype, StringSave (rnatypemap[i].rnaname)); } return list; } static Boolean DoesFeatureMatchRnaType (SeqFeatPtr sfp, RnaFeatTypePtr rt) { Boolean rval = FALSE; RnaRefPtr rrp; RNAGenPtr rgp; Int4 rnaval; if (sfp == NULL || sfp->data.choice != SEQFEAT_RNA) { return FALSE; } if (rt == NULL || rt->choice == RnaFeatType_any) return TRUE; rrp = (RnaRefPtr) sfp->data.value.ptrvalue; if (rrp == NULL) return FALSE; rnaval = GetRnaValForRnaType (rt->choice); if (rnaval == rrp->type) { switch (rt->choice) { case RnaFeatType_ncRNA: if (rt->data.ptrvalue == NULL) { rval = TRUE; } else if ((rgp = (RNAGenPtr) rrp->ext.value.ptrvalue) != NULL && StringCmp (rgp->_class, rt->data.ptrvalue) == 0) { rval = TRUE; } break; case RnaFeatType_tmRNA: rval = TRUE; break; case RnaFeatType_miscRNA: rval = TRUE; break; default: rval = TRUE; break; } } return rval; } static Int4 CompareRnaTypes (RnaFeatTypePtr rt1, RnaFeatTypePtr rt2) { Int4 rval = 0; if (rt1 == NULL && rt2 == NULL) { rval = 0; } else if (rt1 == NULL) { rval = -1; } else if (rt2 == NULL) { rval = 1; } else if (rt1->choice < rt2->choice) { rval = -1; } else if (rt1->choice > rt2->choice) { rval = 1; } else if (rt1->choice == RnaFeatType_ncRNA) { if (rt2->data.ptrvalue == NULL) { rval = 0; } else { rval = StringCmp (rt1->data.ptrvalue, rt2->data.ptrvalue); } } else { rval = 0; } return rval; } static RnaFeatTypePtr RnaFeatTypeFromSeqFeat (SeqFeatPtr sfp) { RnaRefPtr rrp; RnaFeatTypePtr rt = NULL; RNAGenPtr rgp; if (sfp == NULL || sfp->data.choice != SEQFEAT_RNA || sfp->data.value.ptrvalue == NULL) { return NULL; } rrp = (RnaRefPtr) sfp->data.value.ptrvalue; switch (rrp->type) { case RNA_TYPE_premsg: rt = ValNodeNew (NULL); rt->choice = RnaFeatType_preRNA; break; case RNA_TYPE_mRNA: rt = ValNodeNew (NULL); rt->choice = RnaFeatType_mRNA; break; case RNA_TYPE_tRNA: rt = ValNodeNew (NULL); rt->choice = RnaFeatType_tRNA; break; case RNA_TYPE_rRNA: rt = ValNodeNew (NULL); rt->choice = RnaFeatType_rRNA; break; case RNA_TYPE_ncRNA: rt = ValNodeNew (NULL); rt->choice = RnaFeatType_ncRNA; if (rrp->ext.choice == 3) { rgp = (RNAGenPtr) rrp->ext.value.ptrvalue; if (rgp != NULL && !StringHasNoText (rgp->_class)) { rt->data.ptrvalue = StringSave (rgp->_class); } } break; case RNA_TYPE_tmRNA: rt = ValNodeNew (NULL); rt->choice = RnaFeatType_tmRNA; break; case RNA_TYPE_misc_RNA: case 255: rt = ValNodeNew (NULL); rt->choice = RnaFeatType_miscRNA; break; } return rt; } typedef struct rnafieldname { Int4 field; Int4 featqual; CharPtr fieldname; } RnaFieldNameData, PNTR RnaFieldNamePtr; static RnaFieldNameData rnafieldnames[] = { { Rna_field_product , Feat_qual_legal_product, "product" } , { Rna_field_comment , Feat_qual_legal_note, "comment" } , { Rna_field_codons_recognized , Feat_qual_legal_codons_recognized, "codons recognized" } , { Rna_field_ncrna_class , Feat_qual_legal_ncRNA_class, "ncRNA class" } , { Rna_field_tag_peptide , Feat_qual_legal_tag_peptide, "tag-peptide" } , { Rna_field_anticodon , Feat_qual_legal_anticodon, "anticodon" } , { Rna_field_transcript_id , Feat_qual_legal_transcript_id, "transcript ID" } , { Rna_field_gene_locus , Feat_qual_legal_gene, "gene locus" } , { Rna_field_gene_description , Feat_qual_legal_gene_description, "gene description" } , { Rna_field_gene_maploc , Feat_qual_legal_map, "gene maploc" } , { Rna_field_gene_locus_tag , Feat_qual_legal_locus_tag, "gene locus tag" } , { Rna_field_gene_synonym , Feat_qual_legal_synonym, "gene synonym" } , { Rna_field_gene_comment , Feat_qual_legal_gene_comment, "gene comment" } }; #define NUM_rnafieldnames sizeof (rnafieldnames) / sizeof (RnaFieldNameData) NLM_EXTERN CharPtr GetNameForRnaField (Int4 rnafield) { Int4 i; for (i = 0; i < NUM_rnafieldnames; i++) { if (rnafieldnames[i].field == rnafield) { return rnafieldnames[i].fieldname; } } return NULL; } static Int4 GetRnaFieldForName (CharPtr fieldname) { Int4 i; for (i = 0; i < NUM_rnafieldnames; i++) { if (StringCmp (rnafieldnames[i].fieldname, fieldname) == 0) { return rnafieldnames[i].field; } } return -1; } NLM_EXTERN ValNodePtr GetRnaFieldList (void) { ValNodePtr list = NULL; Int4 i; for (i = 0; i < NUM_rnafieldnames; i++) { ValNodeAddPointer (&list, rnafieldnames[i].field, StringSave (rnafieldnames[i].fieldname)); } return list; } static Int4 GetFeatQualForRnaField (Int4 field) { Int4 i; for (i = 0; i < NUM_rnafieldnames; i++) { if (rnafieldnames[i].field == field) { return rnafieldnames[i].featqual; } } return -1; } NLM_EXTERN FeatureFieldPtr FeatureFieldFromRnaQual (RnaQualPtr rq) { FeatureFieldPtr ffp = NULL; Int4 type, qual; if (rq == NULL || rq->type == NULL) return NULL; type = GetFeatureTypeForRnaType (rq->type->choice); qual = GetFeatQualForRnaField (rq->field); if (type >= 0 && qual >= 0) { ffp = FeatureFieldNew (); ffp->type = type; ValNodeAddInt (&(ffp->field), FeatQualChoice_legal_qual, qual); } return ffp; } NLM_EXTERN RnaQualPtr RnaQualFromFeatureField (FeatureFieldPtr ffp) { RnaQualPtr rq = NULL; Int4 i; if (ffp != NULL && ffp->field != NULL && ffp->field->choice == FeatQualChoice_legal_qual) { for (i = 0; i < NUM_rnafieldnames && rnafieldnames[i].featqual != ffp->field->choice; i++) { } if (i < NUM_rnafieldnames) { rq = RnaQualNew (); rq->field = rnafieldnames[i].featqual; rq->type = ValNodeNew (NULL); switch (ffp->type) { case Macro_feature_type_preRNA: case Macro_feature_type_precursor_RNA: rq->type->choice = RnaFeatType_preRNA; break; case Macro_feature_type_mRNA: rq->type->choice = RnaFeatType_mRNA; break; case Macro_feature_type_tRNA: rq->type->choice = RnaFeatType_tRNA; break; case Macro_feature_type_rRNA: rq->type->choice = RnaFeatType_rRNA; break; case Macro_feature_type_snRNA: rq->type->choice = RnaFeatType_ncRNA; rq->type->data.ptrvalue = StringSave ("snRNA"); break; case Macro_feature_type_scRNA: rq->type->choice = RnaFeatType_ncRNA; rq->type->data.ptrvalue = StringSave ("scRNA"); break; case Macro_feature_type_snoRNA: rq->type->choice = RnaFeatType_ncRNA; rq->type->data.ptrvalue = StringSave ("snoRNA"); break; case Macro_feature_type_otherRNA: case Macro_feature_type_misc_RNA: rq->type->choice = RnaFeatType_miscRNA; break; case Macro_feature_type_ncRNA: rq->type->choice = RnaFeatType_ncRNA; break; case Macro_feature_type_tmRNA: rq->type->choice = RnaFeatType_tmRNA; break; default: rq = RnaQualFree (rq); break; } } } return rq; } NLM_EXTERN CharPtr SummarizeRnaType (RnaFeatTypePtr rt) { CharPtr rnatypename = NULL; CharPtr fmt = "%s ncRNA"; if (rt == NULL || rt->choice == RnaFeatType_any) { rnatypename = StringSave ("Any RNA"); } else if (rt->choice == RnaFeatType_ncRNA) { if (StringHasNoText (rt->data.ptrvalue)) { return StringSave ("ncRNA"); } else { rnatypename = (CharPtr) MemNew (sizeof (Char) * (StringLen (fmt) + StringLen (rt->data.ptrvalue))); sprintf (rnatypename, fmt, rt->data.ptrvalue); } } else { rnatypename = StringSave (GetNameForRnaType (rt->choice)); } return rnatypename; } static CharPtr SummarizeRnaQual (RnaQualPtr rq) { CharPtr rnatypename, qualname; CharPtr any_fmt = "RNA %s"; CharPtr fmt = "%s %s"; CharPtr s = NULL; if (rq == NULL) return NULL; qualname = GetNameForRnaField (rq->field); if (qualname == NULL) { return NULL; } rnatypename = SummarizeRnaType (rq->type); if (rnatypename == NULL) { s = (CharPtr) MemNew (sizeof (Char) * (StringLen (any_fmt) + StringLen (qualname))); sprintf (s, any_fmt, qualname); } else { s = (CharPtr) MemNew (sizeof (Char) * (StringLen (fmt) + StringLen (rnatypename) + StringLen (qualname))); sprintf (s, fmt, rnatypename, qualname); rnatypename = MemFree (rnatypename); } return s; } static CharPtr SummarizeStructuredCommentField (StructuredCommentFieldPtr field) { CharPtr summ = NULL; CharPtr fmt = "structured comment field %s"; if (field == NULL) return NULL; if (field->choice == StructuredCommentField_database) { summ = StringSave ("structured comment database"); } else if (field->choice == StructuredCommentField_named) { summ = (CharPtr) MemNew (sizeof (Char) * (StringLen (fmt) + StringLen (field->data.ptrvalue))); sprintf (summ, fmt, field->data.ptrvalue == NULL ? "" : field->data.ptrvalue); } return summ; } #define IS_ORGMOD 1 #define IS_SUBSRC 2 #define IS_OTHER 3 typedef struct srcqualscqual { Int4 srcqual; Int4 subtype; Int4 typeflag; Int4 subfield; CharPtr qualname; } SrcQualSCQualData, PNTR SrcQualSCQualPtr; #define kAllNotesStr "All Notes" #define kAllQualsStr "All" #define kAllPrimersStr "All Primers" static SrcQualSCQualData srcqual_scqual[] = { { Source_qual_acronym , ORGMOD_acronym , IS_ORGMOD , 0 , "acronym" } , { Source_qual_anamorph , ORGMOD_anamorph , IS_ORGMOD , 0 , "anamorph" } , { Source_qual_authority , ORGMOD_authority , IS_ORGMOD , 0 , "authority" } , { Source_qual_bio_material , ORGMOD_bio_material , IS_ORGMOD , 0 , "bio-material" } , { Source_qual_bio_material_INST , ORGMOD_bio_material , IS_ORGMOD , 1 , "bio-material-inst" } , { Source_qual_bio_material_COLL , ORGMOD_bio_material , IS_ORGMOD , 2 , "bio-material-coll" } , { Source_qual_bio_material_SpecID , ORGMOD_bio_material , IS_ORGMOD , 3 , "bio-material-specid" } , { Source_qual_biotype , ORGMOD_biotype , IS_ORGMOD , 0 , "biotype" } , { Source_qual_biovar , ORGMOD_biovar , IS_ORGMOD , 0 , "biovar" } , { Source_qual_breed , ORGMOD_breed , IS_ORGMOD , 0 , "breed" } , { Source_qual_cell_line , SUBSRC_cell_line , IS_SUBSRC , 0 , "cell-line" } , { Source_qual_cell_type , SUBSRC_cell_type , IS_SUBSRC , 0 , "cell-type" } , { Source_qual_chemovar , ORGMOD_chemovar , IS_ORGMOD , 0 , "chemovar" } , { Source_qual_chromosome , SUBSRC_chromosome , IS_SUBSRC , 0 , "chromosome" } , { Source_qual_clone , SUBSRC_clone , IS_SUBSRC , 0 , "clone" } , { Source_qual_clone_lib , SUBSRC_clone_lib , IS_SUBSRC , 0 , "clone-lib" } , { Source_qual_collected_by , SUBSRC_collected_by , IS_SUBSRC , 0 , "collected-by" } , { Source_qual_collection_date , SUBSRC_collection_date , IS_SUBSRC , 0 , "collection-date" } , { Source_qual_common , ORGMOD_common , IS_ORGMOD , 0 , "common" } , { Source_qual_common_name , 0 , IS_OTHER , 0 , "common name" } , { Source_qual_country , SUBSRC_country , IS_SUBSRC , 0 , "country" } , { Source_qual_cultivar , ORGMOD_cultivar , IS_ORGMOD , 0 , "cultivar" } , { Source_qual_culture_collection , ORGMOD_culture_collection , IS_ORGMOD , 0 , "culture-collection" } , { Source_qual_culture_collection_INST , ORGMOD_culture_collection , IS_ORGMOD , 1 , "culture-collection-inst" } , { Source_qual_culture_collection_COLL , ORGMOD_culture_collection , IS_ORGMOD , 2 , "culture-collection-coll" } , { Source_qual_culture_collection_SpecID , ORGMOD_culture_collection , IS_ORGMOD , 3 , "culture-collection-specid" } , { Source_qual_dbxref , 0 , IS_OTHER , 0 , "dbxref" } , { Source_qual_dev_stage , SUBSRC_dev_stage , IS_SUBSRC , 0 , "dev-stage" } , { Source_qual_division , 0 , IS_OTHER, 0 , "division" } , { Source_qual_dosage , ORGMOD_dosage , IS_ORGMOD , 0 , "dosage" } , { Source_qual_ecotype , ORGMOD_ecotype , IS_ORGMOD , 0 , "ecotype" } , { Source_qual_endogenous_virus_name , SUBSRC_endogenous_virus_name , IS_SUBSRC , 0 , "endogenous-virus-name" } , { Source_qual_environmental_sample , SUBSRC_environmental_sample , IS_SUBSRC , 0 , "environmental-sample" } , { Source_qual_forma , ORGMOD_forma , IS_ORGMOD , 0 , "forma" } , { Source_qual_forma_specialis , ORGMOD_forma_specialis , IS_ORGMOD , 0 , "forma-specialis" } , { Source_qual_frequency , SUBSRC_frequency , IS_SUBSRC , 0 , "frequency" } , { Source_qual_fwd_primer_name , SUBSRC_fwd_primer_name , IS_SUBSRC , 0 , "fwd-primer-name" } , { Source_qual_fwd_primer_seq , SUBSRC_fwd_primer_seq , IS_SUBSRC , 0 , "fwd-primer-seq" } , { Source_qual_gb_acronym , ORGMOD_gb_acronym , IS_ORGMOD , 0 , "gb-acronym" } , { Source_qual_gb_anamorph , ORGMOD_gb_anamorph , IS_ORGMOD , 0 , "gb-anamorph" } , { Source_qual_gb_synonym , ORGMOD_gb_synonym , IS_ORGMOD , 0 , "gb-synonym" } , { Source_qual_genotype , SUBSRC_genotype , IS_SUBSRC , 0 , "genotype" } , { Source_qual_germline , SUBSRC_germline , IS_SUBSRC , 0 , "germline" } , { Source_qual_group , ORGMOD_group , IS_ORGMOD , 0 , "group" } , { Source_qual_haplotype , SUBSRC_haplotype , IS_SUBSRC , 0 , "haplotype" } , { Source_qual_identified_by , SUBSRC_identified_by , IS_SUBSRC , 0 , "identified-by" } , { Source_qual_insertion_seq_name , SUBSRC_insertion_seq_name , IS_SUBSRC , 0 , "insertion-seq-name" } , { Source_qual_isolate , ORGMOD_isolate , IS_ORGMOD , 0 , "isolate" } , { Source_qual_isolation_source , SUBSRC_isolation_source , IS_SUBSRC , 0 , "isolation-source" } , { Source_qual_lab_host , SUBSRC_lab_host , IS_SUBSRC , 0 , "lab-host" } , { Source_qual_lat_lon , SUBSRC_lat_lon , IS_SUBSRC , 0 , "lat-lon" } , { Source_qual_lineage , 0, IS_OTHER, 0 , "lineage" } , { Source_qual_map , SUBSRC_map , IS_SUBSRC , 0 , "map" } , { Source_qual_metagenome_source , ORGMOD_metagenome_source , IS_ORGMOD , 0 , "metagenome-source" } , { Source_qual_metagenomic , SUBSRC_metagenomic , IS_SUBSRC , 0 , "metagenomic" } , { Source_qual_old_lineage , ORGMOD_old_lineage , IS_ORGMOD , 0 , "old-lineage" } , { Source_qual_old_name , ORGMOD_old_name , IS_ORGMOD , 0 , "old-name" } , { Source_qual_orgmod_note , ORGMOD_other, IS_ORGMOD, 0 , "note-orgmod" } , { Source_qual_pathovar , ORGMOD_pathovar , IS_ORGMOD , 0 , "pathovar" } , { Source_qual_plasmid_name , SUBSRC_plasmid_name , IS_SUBSRC , 0 , "plasmid-name" } , { Source_qual_plastid_name , SUBSRC_plastid_name , IS_SUBSRC , 0 , "plastid-name" } , { Source_qual_pop_variant , SUBSRC_pop_variant , IS_SUBSRC , 0 , "pop-variant" } , { Source_qual_rearranged , SUBSRC_rearranged , IS_SUBSRC , 0 , "rearranged" } , { Source_qual_rev_primer_name , SUBSRC_rev_primer_name , IS_SUBSRC , 0 , "rev-primer-name" } , { Source_qual_rev_primer_seq , SUBSRC_rev_primer_seq , IS_SUBSRC , 0 , "rev-primer-seq" } , { Source_qual_segment , SUBSRC_segment , IS_SUBSRC , 0 , "segment" } , { Source_qual_serogroup , ORGMOD_serogroup , IS_ORGMOD , 0 , "serogroup" } , { Source_qual_serotype , ORGMOD_serotype , IS_ORGMOD , 0 , "serotype" } , { Source_qual_serovar , ORGMOD_serovar , IS_ORGMOD , 0 , "serovar" } , { Source_qual_sex , SUBSRC_sex , IS_SUBSRC , 0 , "sex" } , { Source_qual_nat_host , ORGMOD_nat_host , IS_ORGMOD , 0 , "host" } , { Source_qual_specimen_voucher , ORGMOD_specimen_voucher , IS_ORGMOD , 0 , "specimen-voucher" } , { Source_qual_specimen_voucher_INST , ORGMOD_specimen_voucher , IS_ORGMOD , 1 , "specimen-voucher-inst" } , { Source_qual_specimen_voucher_COLL , ORGMOD_specimen_voucher , IS_ORGMOD , 2 , "specimen-voucher-coll" } , { Source_qual_specimen_voucher_SpecID , ORGMOD_specimen_voucher , IS_ORGMOD , 3 , "specimen-voucher-specid" } , { Source_qual_strain , ORGMOD_strain , IS_ORGMOD , 0 , "strain" } , { Source_qual_subclone , SUBSRC_subclone , IS_SUBSRC , 0 , "subclone" } , { Source_qual_subgroup , ORGMOD_subgroup , IS_ORGMOD , 0 , "subgroup" } , { Source_qual_subsource_note , SUBSRC_other , IS_SUBSRC , 0 , "note-subsrc" } , { Source_qual_sub_species , ORGMOD_sub_species , IS_ORGMOD , 0 , "sub-species" } , { Source_qual_substrain , ORGMOD_substrain , IS_ORGMOD , 0 , "substrain" } , { Source_qual_subtype , ORGMOD_subtype , IS_ORGMOD , 0 , "subtype" } , { Source_qual_synonym , ORGMOD_synonym , IS_ORGMOD , 0 , "synonym" } , { Source_qual_taxname , 0 , IS_OTHER , 0 , "taxname" } , { Source_qual_teleomorph , ORGMOD_teleomorph , IS_ORGMOD , 0 , "teleomorph" } , { Source_qual_tissue_lib , SUBSRC_tissue_lib , IS_SUBSRC , 0 , "tissue-lib" } , { Source_qual_tissue_type , SUBSRC_tissue_type , IS_SUBSRC , 0 , "tissue-type" } , { Source_qual_transgenic , SUBSRC_transgenic , IS_SUBSRC , 0 , "transgenic" } , { Source_qual_transposon_name , SUBSRC_transposon_name , IS_SUBSRC , 0 , "transposon-name" } , { Source_qual_type , ORGMOD_type , IS_ORGMOD , 0 , "type" } , { Source_qual_type_material , ORGMOD_type_material , IS_ORGMOD , 0 , "type-material" } , { Source_qual_variety , ORGMOD_variety , IS_ORGMOD , 0 , "variety" } , { Source_qual_all_notes , 255 , IS_OTHER , 0 , kAllNotesStr } , { Source_qual_all_quals , 0 , IS_OTHER , 0, kAllQualsStr } , { Source_qual_mating_type , SUBSRC_mating_type , IS_SUBSRC , 0 , "mating-type" } , { Source_qual_linkage_group , SUBSRC_linkage_group , IS_SUBSRC , 0 , "linkage-group" } , { Source_qual_haplogroup , SUBSRC_haplogroup, IS_SUBSRC, 0, "haplogroup"} , { Source_qual_taxid , 0 , IS_OTHER , 0 , "taxid" } , { Source_qual_all_primers , 0, IS_OTHER , 0, kAllPrimersStr } , { Source_qual_altitude , SUBSRC_altitude, IS_SUBSRC , 0 , "altitude"} }; #define NUM_srcqual_scqual sizeof (srcqual_scqual) / sizeof (SrcQualSCQualData) static StringAliasData src_qual_alias_list[] = { {"organism", "taxname"}, {"organism name", "taxname"}, {"date", "collection-date"}, {"voucher", "specimen-voucher"}, {"specific-host", "host"}, {"note sub-source", "note-subsrc"}, { NULL, NULL} }; NLM_EXTERN Int4 GetSubSrcQualFromSrcQual (Int4 srcqual, Int4Ptr subfield) { Int4 i; for (i = 0; i < NUM_srcqual_scqual; i++) { if (srcqual == srcqual_scqual[i].srcqual) { if (srcqual_scqual[i].typeflag == IS_SUBSRC) { if (subfield != NULL) { *subfield = srcqual_scqual[i].subfield; } return srcqual_scqual[i].subtype; } else { return -1; } } } return -1; } NLM_EXTERN Int4 GetOrgModQualFromSrcQual (Int4 srcqual, Int4Ptr subfield) { Int4 i; for (i = 0; i < NUM_srcqual_scqual; i++) { if (srcqual == srcqual_scqual[i].srcqual) { if (srcqual_scqual[i].typeflag == IS_ORGMOD) { if (subfield != NULL) { *subfield = srcqual_scqual[i].subfield; } return srcqual_scqual[i].subtype; } else { return -1; } } } return -1; } NLM_EXTERN Int4 GetSrcQualFromSubSrcOrOrgMod (Int4 qual, Boolean is_org_mod) { Int4 i; for (i = 0; i < NUM_srcqual_scqual; i++) { if (qual == srcqual_scqual[i].subtype && ((is_org_mod && srcqual_scqual[i].typeflag == IS_ORGMOD) || (!is_org_mod && srcqual_scqual[i].typeflag == IS_SUBSRC))) { return srcqual_scqual[i].srcqual; } } return -1; } NLM_EXTERN Boolean IsNonTextSourceQual (Int4 srcqual) { if (srcqual == Source_qual_transgenic || srcqual == Source_qual_germline || srcqual == Source_qual_metagenomic || srcqual == Source_qual_environmental_sample || srcqual == Source_qual_rearranged) { return TRUE; } else { return FALSE; } } NLM_EXTERN Boolean IsNonTextFieldType (FieldTypePtr field) { ValNodePtr vnp; if (field == NULL) { return FALSE; } else if (field->choice == FieldType_molinfo_field) { return TRUE; } else if (field->choice != FieldType_source_qual) { return FALSE; } else if ((vnp = field->data.ptrvalue) == NULL) { return FALSE; } else if (vnp->choice != SourceQualChoice_textqual) { return FALSE; } else { return IsNonTextSourceQual (vnp->data.intvalue); } } NLM_EXTERN CharPtr GetSourceQualName (Int4 srcqual) { CharPtr str = NULL; Int4 i; for (i = 0; i < NUM_srcqual_scqual && str == NULL; i++) { if (srcqual_scqual[i].srcqual == srcqual) { str = srcqual_scqual[i].qualname; } } if (str == NULL) { str = "Unknown source qualifier"; } return str; } NLM_EXTERN Int4 GetSourceQualTypeByName (CharPtr qualname) { Int4 i; qualname = GetCanonical (qualname, src_qual_alias_list); for (i = 0; i < NUM_srcqual_scqual; i++) { if (Matchnamestring(srcqual_scqual[i].qualname, qualname)) { return srcqual_scqual[i].srcqual; } } if (StringICmp (qualname, "subsp.") == 0) { return Source_qual_sub_species; } else if (StringICmp (qualname, "var.") == 0) { return Source_qual_variety; } else if (StringICmp (qualname, "str.") == 0) { return Source_qual_strain; } else if (StringICmp (qualname, "note") == 0) { return Source_qual_orgmod_note; } else if (Matchnamestring (qualname, "latitude-longitude") || Matchnamestring (qualname, "lat-long")) { return Source_qual_lat_lon; } return -1; } NLM_EXTERN ValNodePtr GetSourceQualList (Boolean for_remove) { ValNodePtr list = NULL, tmp = NULL, last = NULL; Int4 i; if (for_remove) { ValNodeAddPointer (&list, 0, StringSave (kAllQualsStr)); last = ValNodeAddPointer (&list, 0, StringSave (kAllNotesStr)); last = ValNodeAddPointer (&list, 0, StringSave (kAllPrimersStr)); } for (i = 0; i < NUM_srcqual_scqual; i++) { if (srcqual_scqual[i].srcqual != Source_qual_all_notes && srcqual_scqual[i].srcqual != Source_qual_all_quals && srcqual_scqual[i].srcqual != Source_qual_all_primers) { ValNodeAddPointer (&tmp, 0, StringSave (srcqual_scqual[i].qualname)); } } tmp = ValNodeSort (tmp, SortVnpByString); if (last == NULL) { list = tmp; } else { last->next = tmp; } return list; } NLM_EXTERN ValNodePtr GetSourceQualFieldListFromBioSource (BioSourcePtr biop) { SubSourcePtr ssp; OrgModPtr mod; ValNodePtr list = NULL, vnp; Int4 i; PCRReactionSetPtr ps; PCRPrimerPtr pp; if (biop == NULL) { return NULL; } vnp = ValNodeNew (NULL); vnp->choice = SourceQualChoice_textqual; vnp->data.intvalue = Source_qual_taxname; ValNodeAddPointer (&list, FieldType_source_qual, vnp); /* add other tax values */ if (biop->org != NULL && !StringHasNoText (biop->org->common)) { vnp = ValNodeNew (NULL); vnp->choice = SourceQualChoice_textqual; vnp->data.intvalue = Source_qual_common_name; ValNodeAddPointer (&list, FieldType_source_qual, vnp); } if (biop->org != NULL && biop->org->orgname != NULL) { if (!StringHasNoText (biop->org->orgname->lineage)) { vnp = ValNodeNew (NULL); vnp->choice = SourceQualChoice_textqual; vnp->data.intvalue = Source_qual_lineage; ValNodeAddPointer (&list, FieldType_source_qual, vnp); } if (!StringHasNoText (biop->org->orgname->div)) { vnp = ValNodeNew (NULL); vnp->choice = SourceQualChoice_textqual; vnp->data.intvalue = Source_qual_division; ValNodeAddPointer (&list, FieldType_source_qual, vnp); } } /* add taxid */ if (HasTaxonomyID(biop)) { vnp = ValNodeNew (NULL); vnp->choice = SourceQualChoice_textqual; vnp->data.intvalue = Source_qual_taxid; ValNodeAddPointer (&list, FieldType_source_qual, vnp); } /* add subtypes */ for (ssp = biop->subtype; ssp != NULL; ssp = ssp->next) { for (i = 0; i < NUM_srcqual_scqual && (srcqual_scqual[i].typeflag != IS_SUBSRC || srcqual_scqual[i].subtype != ssp->subtype); i++) {} if (i < NUM_srcqual_scqual) { vnp = ValNodeNew (NULL); vnp->choice = SourceQualChoice_textqual; vnp->data.intvalue = srcqual_scqual[i].srcqual; ValNodeAddPointer (&list, FieldType_source_qual, vnp); } } /* add orgmods */ if (biop->org != NULL && biop->org->orgname != NULL) { for (mod = biop->org->orgname->mod; mod != NULL; mod = mod->next) { for (i = 0; i < NUM_srcqual_scqual && (srcqual_scqual[i].typeflag != IS_ORGMOD || srcqual_scqual[i].subtype != mod->subtype); i++) {} if (i < NUM_srcqual_scqual) { vnp = ValNodeNew (NULL); vnp->choice = SourceQualChoice_textqual; vnp->data.intvalue = srcqual_scqual[i].srcqual; ValNodeAddPointer (&list, FieldType_source_qual, vnp); } } } /* add PCR primers */ for (ps = biop->pcr_primers; ps != NULL; ps = ps->next) { for (pp = ps->forward; pp != NULL; pp = pp->next) { if (!StringHasNoText (pp->name)) { vnp = ValNodeNew (NULL); vnp->choice = SourceQualChoice_textqual; vnp->data.intvalue = Source_qual_fwd_primer_name; ValNodeAddPointer (&list, FieldType_source_qual, vnp); } if (!StringHasNoText (pp->seq)) { vnp = ValNodeNew (NULL); vnp->choice = SourceQualChoice_textqual; vnp->data.intvalue = Source_qual_fwd_primer_seq; ValNodeAddPointer (&list, FieldType_source_qual, vnp); } } for (pp = ps->reverse; pp != NULL; pp = pp->next) { if (!StringHasNoText (pp->name)) { vnp = ValNodeNew (NULL); vnp->choice = SourceQualChoice_textqual; vnp->data.intvalue = Source_qual_rev_primer_name; ValNodeAddPointer (&list, FieldType_source_qual, vnp); } if (!StringHasNoText (pp->seq)) { vnp = ValNodeNew (NULL); vnp->choice = SourceQualChoice_textqual; vnp->data.intvalue = Source_qual_rev_primer_seq; ValNodeAddPointer (&list, FieldType_source_qual, vnp); } } } return list; } NLM_EXTERN Boolean AllowSourceQualMulti (SourceQualChoicePtr s) { Boolean rval = FALSE; if (s == NULL || s->choice != SourceQualChoice_textqual || s->data.ptrvalue == NULL) { return FALSE; } else if (s->data.intvalue == Source_qual_culture_collection || s->data.intvalue == Source_qual_bio_material || s->data.intvalue == Source_qual_specimen_voucher || s->data.intvalue == Source_qual_dbxref || s->data.intvalue == Source_qual_fwd_primer_name || s->data.intvalue == Source_qual_fwd_primer_seq || s->data.intvalue == Source_qual_rev_primer_name || s->data.intvalue == Source_qual_rev_primer_seq) { rval = TRUE; } return rval; } NLM_EXTERN TextFsaPtr GetOrgModSearch (void) { TextFsaPtr tags; tags = TextFsaNew(); TextFsaAdd (tags, "pathovar"); TextFsaAdd (tags, "serovar"); TextFsaAdd (tags, "strain"); TextFsaAdd (tags, "sub-species"); TextFsaAdd (tags, "variety"); /* abbreviations */ TextFsaAdd (tags, "subsp."); TextFsaAdd (tags, "var."); TextFsaAdd (tags, "str."); return tags; } typedef struct srclocgenome { Int4 srcloc; Int4 genome; CharPtr name; } SrcLocGenomeData, PNTR SrcLocGenomePtr; static SrcLocGenomeData srcloc_genome[] = { { Source_location_unknown , GENOME_unknown , " " } , { Source_location_genomic , GENOME_genomic , "genomic" } , { Source_location_chloroplast , GENOME_chloroplast , "chloroplast" } , { Source_location_chromoplast , GENOME_chromoplast , "chromoplast" } , { Source_location_kinetoplast , GENOME_kinetoplast , "kinetoplast" } , { Source_location_mitochondrion , GENOME_mitochondrion , "mitochondrion" } , { Source_location_plastid , GENOME_plastid , "plastid" } , { Source_location_macronuclear , GENOME_macronuclear , "macronuclear" } , { Source_location_extrachrom , GENOME_extrachrom , "extrachromosomal" } , { Source_location_plasmid , GENOME_plasmid , "plasmid" } , { Source_location_transposon , GENOME_transposon , "transposon" } , { Source_location_insertion_seq , GENOME_insertion_seq , "insertion-seq" } , { Source_location_cyanelle , GENOME_cyanelle , "cyanelle" } , { Source_location_proviral , GENOME_proviral , "proviral" } , { Source_location_virion , GENOME_virion , "virion" } , { Source_location_nucleomorph , GENOME_nucleomorph , "nucleomorph" } , { Source_location_apicoplast , GENOME_apicoplast , "apicoplast" } , { Source_location_leucoplast , GENOME_leucoplast , "leucoplast" } , { Source_location_proplastid , GENOME_proplastid , "proplastid" } , { Source_location_endogenous_virus , GENOME_endogenous_virus , "endogenous-virus" } , { Source_location_hydrogenosome , GENOME_hydrogenosome , "hydrogenosome" } , { Source_location_chromosome , GENOME_chromosome , "chromosome" } , { Source_location_chromatophore , GENOME_chromatophore , "chromatophore" } }; #define NUM_srcloc_genome sizeof (srcloc_genome) / sizeof (SrcLocGenomeData) NLM_EXTERN Int4 GenomeFromSrcLoc (Int4 srcloc) { Int4 i; for (i = 0; i < NUM_srcloc_genome; i++) { if (srcloc_genome[i].srcloc == srcloc) { return srcloc_genome[i].genome; } } return -1; } NLM_EXTERN Int4 SrcLocFromGenome (Int4 genome) { Int4 i; for (i = 0; i < NUM_srcloc_genome; i++) { if (srcloc_genome[i].genome == genome) { return srcloc_genome[i].srcloc; } } return -1; } NLM_EXTERN CharPtr LocNameFromGenome (Int4 genome) { Int4 i; for (i = 0; i < NUM_srcloc_genome; i++) { if (srcloc_genome[i].genome == genome) { return srcloc_genome[i].name; } } return NULL; } NLM_EXTERN Int4 GenomeFromLocName (CharPtr loc_name) { Int4 i; for (i = 0; i < NUM_srcloc_genome; i++) { if (StringICmp (srcloc_genome[i].name, loc_name) == 0) { return srcloc_genome[i].genome; } } return -1; } NLM_EXTERN ValNodePtr GetLocationList (Boolean for_remove) { ValNodePtr list = NULL, start = NULL; Int4 i; for (i = 0; i < NUM_srcloc_genome; i++) { if (for_remove && srcloc_genome[i].srcloc == Source_location_unknown) { ValNodeAddPointer (&list, srcloc_genome[i].srcloc, StringSave ("any")); } else { ValNodeAddPointer (&list, srcloc_genome[i].srcloc, StringSave (srcloc_genome[i].name)); } } list = ValNodeSort (list, SortVnpByString); /* put mitochondrion and chloroplast at top of list */ ValNodeAddPointer (&start, Source_location_mitochondrion, StringSave ("mitochondrion")); ValNodeAddPointer (&start, Source_location_chloroplast, StringSave ("chloroplast")); ValNodeLink (&start, list); list = start; return list; } static ValNodePtr SrcLocationFieldFromValue (CharPtr value) { ValNodePtr field, sq; Int4 genome; genome = GenomeFromLocName(value); if (genome < 0) { return NULL; } sq = ValNodeNew (NULL); sq->choice = SourceQualValChoice_location; sq->data.intvalue = genome; field = ValNodeNew (NULL); field->choice = FieldType_source_qual; field->data.ptrvalue = sq; return field; } typedef struct srcorigorigin { Int4 srcorig; Int4 origin; CharPtr name; } SrcOrigOriginData, PNTR SrcrigOriginPtr; static SrcOrigOriginData srcorig_origin[] = { { Source_origin_unknown , 0 , "unknown" } , { Source_origin_natural , 1 , "natural" } , { Source_origin_natmut , 2 , "natmut" } , { Source_origin_mut , 3 , "mut" } , { Source_origin_artificial , 4 , "artificial" } , { Source_origin_synthetic , 5 , "synthetic" } , { Source_origin_other , 255 , "other" } }; #define NUM_srcorig_origin sizeof (srcorig_origin) / sizeof (SrcOrigOriginData) NLM_EXTERN Int4 OriginFromSrcOrig (Int4 srcorig) { Int4 i; for (i = 0; i < NUM_srcorig_origin; i++) { if (srcorig_origin[i].srcorig == srcorig) { return srcorig_origin[i].origin; } } return -1; } NLM_EXTERN Int4 SrcOrigFromOrigin (Int4 origin) { Int4 i; for (i = 0; i < NUM_srcorig_origin; i++) { if (srcorig_origin[i].origin == origin) { return srcorig_origin[i].srcorig; } } return -1; } NLM_EXTERN CharPtr OriginNameFromOrigin (Int4 origin) { Int4 i; for (i = 0; i < NUM_srcorig_origin; i++) { if (srcorig_origin[i].origin == origin) { return srcorig_origin[i].name; } } return NULL; } static Int4 OriginFromOriginName (CharPtr origin_name) { Int4 i; for (i = 0; i < NUM_srcorig_origin; i++) { if (StringCmp (srcorig_origin[i].name, origin_name) == 0) { return srcorig_origin[i].origin; } } return -1; } NLM_EXTERN ValNodePtr GetOriginList (Boolean for_remove) { ValNodePtr list = NULL; Int4 i; for (i = 0; i < NUM_srcorig_origin; i++) { if (for_remove && srcorig_origin[i].srcorig == Source_origin_unknown) { ValNodeAddPointer (&list, srcorig_origin[i].srcorig, StringSave ("any")); } else { ValNodeAddPointer (&list, srcorig_origin[i].srcorig, StringSave (srcorig_origin[i].name)); } } return list; } /* special code for converting source features to source qualifier val lists */ static void SetSrcQualTextValue (ValNodePtr PNTR fields, Int4 srcqual, CharPtr val) { SourceQualTextValPtr st; st = SourceQualTextValNew (); st->srcqual = srcqual; st->val = StringSave (val); ValNodeAddPointer (fields, SourceQualValChoice_textqual, st); } static ValNodePtr SourceQualValsFromOrgMods (OrgModPtr mod) { Int4 src_qual; ValNodePtr fields = NULL; while (mod != NULL) { src_qual = GetSrcQualFromSubSrcOrOrgMod (mod->subtype, TRUE); if (src_qual > -1) { SetSrcQualTextValue (&fields, src_qual, mod->subname); } mod = mod->next; } return fields; } static ValNodePtr SourceQualValsFromSubSrcs (SubSourcePtr ssp) { Int4 src_qual; ValNodePtr fields = NULL; while (ssp != NULL) { src_qual = GetSrcQualFromSubSrcOrOrgMod (ssp->subtype, FALSE); if (src_qual > -1) { SetSrcQualTextValue (&fields, src_qual, ssp->name); } ssp = ssp->next; } return fields; } static ValNodePtr SourceQualValsFromSynonyms (ValNodePtr syn) { ValNodePtr fields = NULL; while (syn != NULL) { SetSrcQualTextValue (&fields, Source_qual_synonym, syn->data.ptrvalue); syn = syn->next; } return fields; } NLM_EXTERN CharPtr GetDbtagString (DbtagPtr db_tag); static ValNodePtr SourceQualValsFromDbxrefs (ValNodePtr dbxref) { ValNodePtr fields = NULL; CharPtr tmp; while (dbxref != NULL) { tmp = GetDbtagString (dbxref->data.ptrvalue); SetSrcQualTextValue (&fields, Source_qual_dbxref, tmp); dbxref = dbxref->next; } return fields; } NLM_EXTERN ValNodePtr SourceQualValsFromBioSourcePtr (BioSourcePtr biop) { ValNodePtr fields = NULL; Int4 loc, origin; if (biop == NULL) { return NULL; } ValNodeLink (&fields, SourceQualValsFromSubSrcs (biop->subtype)); /* genome */ if (biop->genome != GENOME_unknown) { loc = SrcLocFromGenome (biop->genome); if (loc > -1) { ValNodeAddInt (&fields, SourceQualValChoice_location, loc); } } /* origin */ if (biop->origin > 0) { origin = SrcOrigFromOrigin (biop->origin); if (origin > -1) { ValNodeAddInt (&fields, SourceQualValChoice_origin, origin); } } /* TODO: need focus */ if (biop->org != NULL) { if (!StringHasNoText (biop->org->taxname)) { SetSrcQualTextValue (&fields, Source_qual_taxname, biop->org->taxname); } /* need common */ if (!StringHasNoText (biop->org->common)) { SetSrcQualTextValue (&fields, Source_qual_common, biop->org->common); } /* dbxrefs */ ValNodeLink (&fields, SourceQualValsFromDbxrefs (biop->org->db)); /* add synonyms */ SourceQualValsFromSynonyms (biop->org->syn); if (biop->org->orgname != NULL) { ValNodeLink (&fields, SourceQualValsFromOrgMods (biop->org->orgname->mod)); /* lineage */ if (!StringHasNoText (biop->org->orgname->lineage)) { SetSrcQualTextValue (&fields, Source_qual_lineage, biop->org->orgname->lineage); } /* div */ if (!StringHasNoText (biop->org->orgname->div)) { SetSrcQualTextValue (&fields, Source_qual_division, biop->org->orgname->div); } /* gcode, mgcode */ if (biop->org->orgname->gcode > 0) { ValNodeAddInt (&fields, SourceQualChoice_gcode, biop->org->orgname->gcode); } if (biop->org->orgname->mgcode > 0) { ValNodeAddInt (&fields, SourceQualChoice_mgcode, biop->org->orgname->mgcode); } } } return fields; } static void SetSourceQualValOnBioSource (BioSourcePtr biop, ValNodePtr src_qual) { ValNode vn; SourceQualTextValPtr st; if (biop == NULL || src_qual == NULL) { return; } vn.next = NULL; switch (src_qual->choice) { case SourceQualValChoice_textqual: st = (SourceQualTextValPtr) src_qual->data.ptrvalue; if (st != NULL) { vn.choice = SourceQualChoice_textqual; vn.data.intvalue = st->srcqual; if (AllowSourceQualMulti (src_qual)) { SetSourceQualInBioSource (biop, &vn, NULL, st->val, ExistingTextOption_add_qual); } else { SetSourceQualInBioSource (biop, &vn, NULL, st->val, ExistingTextOption_replace_old); } } break; case SourceQualValChoice_location: vn.choice = SourceQualChoice_location; vn.data.intvalue = src_qual->data.intvalue; SetSourceQualInBioSource (biop, &vn, NULL, NULL, ExistingTextOption_replace_old); break; case SourceQualValChoice_origin: vn.choice = SourceQualChoice_origin; vn.data.intvalue = src_qual->data.intvalue; SetSourceQualInBioSource (biop, &vn, NULL, NULL, ExistingTextOption_replace_old); break; case SourceQualValChoice_gcode: vn.choice = SourceQualChoice_gcode; vn.data.intvalue = src_qual->data.intvalue; SetSourceQualInBioSource (biop, &vn, NULL, NULL, ExistingTextOption_replace_old); break; case SourceQualValChoice_mgcode: vn.choice = SourceQualChoice_mgcode; vn.data.intvalue = src_qual->data.intvalue; SetSourceQualInBioSource (biop, &vn, NULL, NULL, ExistingTextOption_replace_old); break; } } NLM_EXTERN BioSourcePtr BioSourceFromSourceQualVals (ValNodePtr fields) { BioSourcePtr biop = NULL; ValNodePtr vnp; if (fields != NULL) { biop = BioSourceNew (); for (vnp = fields; vnp != NULL; vnp = vnp->next) { SetSourceQualValOnBioSource (biop, vnp); } } return biop; } typedef struct cdsgeneprotfieldname { Int4 field; CharPtr name; } CDSGeneProtFieldNameData, PNTR CDSGeneProtFieldNamePtr; static CDSGeneProtFieldNameData cdsgeneprotfield_name[] = { { CDSGeneProt_field_cds_comment , "CDS comment" } , { CDSGeneProt_field_cds_inference , "CDS inference" } , { CDSGeneProt_field_codon_start , "codon-start" } , { CDSGeneProt_field_gene_locus , "gene locus" } , { CDSGeneProt_field_gene_description , "gene description" } , { CDSGeneProt_field_gene_comment , "gene comment" } , { CDSGeneProt_field_gene_inference, "gene inference" } , { CDSGeneProt_field_gene_allele , "gene allele" } , { CDSGeneProt_field_gene_maploc , "gene maploc" } , { CDSGeneProt_field_gene_locus_tag , "gene locus tag" } , { CDSGeneProt_field_gene_synonym , "gene synonym" } , { CDSGeneProt_field_gene_old_locus_tag , "gene old locus tag" } , { CDSGeneProt_field_mrna_product , "mRNA product" } , { CDSGeneProt_field_mrna_comment , "mRNA comment" } , { CDSGeneProt_field_prot_name , "protein name" } , { CDSGeneProt_field_prot_description , "protein description" } , { CDSGeneProt_field_prot_ec_number , "protein EC number" } , { CDSGeneProt_field_prot_activity , "protein activity" } , { CDSGeneProt_field_prot_comment , "protein comment" } , { CDSGeneProt_field_mat_peptide_name , "mat-peptide name" } , { CDSGeneProt_field_mat_peptide_description , "mat-peptide description" } , { CDSGeneProt_field_mat_peptide_ec_number , "mat-peptide EC number" } , { CDSGeneProt_field_mat_peptide_activity , "mat-peptide activity" } , { CDSGeneProt_field_mat_peptide_comment , "mat-peptide comment" } }; #define NUM_cdsgeneprotfield_name sizeof (cdsgeneprotfield_name) / sizeof (CDSGeneProtFieldNameData) NLM_EXTERN CharPtr CDSGeneProtNameFromField (Int4 field) { Int4 i; for (i = 0; i < NUM_cdsgeneprotfield_name; i++) { if (cdsgeneprotfield_name[i].field == field) { return cdsgeneprotfield_name[i].name; } } return NULL; } static Int4 CDSGeneProtFieldFromName (CharPtr str) { Int4 i; for (i = 0; i < NUM_cdsgeneprotfield_name; i++) { if (Matchnamestring (cdsgeneprotfield_name[i].name, str)) { return cdsgeneprotfield_name[i].field; } } return -1; } NLM_EXTERN void AddAllCDSGeneProtFieldsToChoiceList (ValNodePtr PNTR field_list) { Int4 i; ValNodeAddPointer (field_list, CDSGeneProt_field_prot_name, StringSave ("protein name")); ValNodeAddPointer (field_list, CDSGeneProt_field_prot_description, StringSave ("protein description")); for (i = 0; i < NUM_cdsgeneprotfield_name; i++) { ValNodeAddPointer (field_list, cdsgeneprotfield_name[i].field, StringSave (cdsgeneprotfield_name[i].name)); } } static ValNodePtr MakeCDSGeneProtFieldTypeList (void) { Int4 i; ValNodePtr field_list = NULL; for (i = 0; i < NUM_cdsgeneprotfield_name; i++) { ValNodeAddInt (&field_list, FieldType_cds_gene_prot, cdsgeneprotfield_name[i].field); } return field_list; } typedef struct cdsgeneprotfeatname { Int4 feature_type; CharPtr name; } CDSGeneProtFeatNameData, PNTR CDSGeneProtFeatNamePtr; static CDSGeneProtFeatNameData cdsgeneprotfeat_name[] = { { CDSGeneProt_feature_type_constraint_gene , "gene" } , { CDSGeneProt_feature_type_constraint_mRNA , "mRNA" } , { CDSGeneProt_feature_type_constraint_cds , "CDS" } , { CDSGeneProt_feature_type_constraint_prot , "protein" } , { CDSGeneProt_feature_type_constraint_mat_peptide , "mat-peptide" }}; #define NUM_cdsgeneprotfeat_name sizeof (cdsgeneprotfeat_name) / sizeof (CDSGeneProtFeatNameData) NLM_EXTERN CharPtr CDSGeneProtFeatureNameFromFeatureType (Int4 feature_type) { Int4 i; for (i = 0; i < NUM_cdsgeneprotfeat_name; i++) { if (cdsgeneprotfeat_name[i].feature_type == feature_type) { return cdsgeneprotfeat_name[i].name; } } return NULL; } NLM_EXTERN void AddAllCDSGeneProtFeaturesToChoiceList (ValNodePtr PNTR field_list) { Int4 i; for (i = 0; i < NUM_cdsgeneprotfeat_name; i++) { ValNodeAddPointer (field_list, cdsgeneprotfeat_name[i].feature_type, StringSave (cdsgeneprotfeat_name[i].name)); } } static Boolean IsCDSGeneProtFieldMatPeptideRelated (Int4 val) { if (val == CDSGeneProt_field_mat_peptide_name || val == CDSGeneProt_field_mat_peptide_description || val == CDSGeneProt_field_mat_peptide_ec_number || val == CDSGeneProt_field_mat_peptide_activity || val == CDSGeneProt_field_mat_peptide_comment) { return TRUE; } else { return FALSE; } } static Boolean IsFieldTypeMatPeptideRelated (FieldTypePtr field) { Boolean rval = FALSE; FeatureFieldPtr ff; if (field == NULL) { rval = FALSE; } else if ((field->choice == FieldType_feature_field && (ff = field->data.ptrvalue) != NULL && ff->type == Macro_feature_type_mat_peptide_aa) || (field->choice == FieldType_cds_gene_prot && IsCDSGeneProtFieldMatPeptideRelated(field->data.intvalue))) { rval = TRUE; } else { rval = FALSE; } return rval; } static Boolean IsConstraintChoiceMatPeptideRelated (ConstraintChoicePtr constraint) { CDSGeneProtQualConstraintPtr cq; FieldConstraintPtr fq; Boolean rval = FALSE; if (constraint == NULL) { rval = FALSE; } else if (constraint->choice == ConstraintChoice_cdsgeneprot_qual) { cq = (CDSGeneProtQualConstraintPtr) constraint->data.ptrvalue; if (cq != NULL && cq->field1 != NULL && IsCDSGeneProtFieldMatPeptideRelated (cq->field1->data.intvalue)) { rval = TRUE; } else { rval = FALSE; } } else if (constraint->choice == ConstraintChoice_field) { fq = (FieldConstraintPtr) constraint->data.ptrvalue; if (fq != NULL && IsFieldTypeMatPeptideRelated (fq->field)) { rval = TRUE; } else { rval = FALSE; } } else { rval = FALSE; } return rval; } static Int2 FeatureTypeFromCDSGeneProtField (Uint2 cds_gene_prot_field) { Int2 feat_type = Macro_feature_type_any; switch (cds_gene_prot_field) { case CDSGeneProt_field_cds_comment: case CDSGeneProt_field_cds_inference: case CDSGeneProt_field_codon_start: feat_type = Macro_feature_type_cds; break; case CDSGeneProt_field_gene_locus: case CDSGeneProt_field_gene_description: case CDSGeneProt_field_gene_comment: case CDSGeneProt_field_gene_allele: case CDSGeneProt_field_gene_maploc: case CDSGeneProt_field_gene_locus_tag: case CDSGeneProt_field_gene_synonym: case CDSGeneProt_field_gene_old_locus_tag: case CDSGeneProt_field_gene_inference: feat_type = Macro_feature_type_gene; break; case CDSGeneProt_field_mrna_product: case CDSGeneProt_field_mrna_comment: feat_type = Macro_feature_type_mRNA; break; case CDSGeneProt_field_prot_name: case CDSGeneProt_field_prot_description: case CDSGeneProt_field_prot_ec_number: case CDSGeneProt_field_prot_activity: case CDSGeneProt_field_prot_comment: feat_type = Macro_feature_type_prot; break; case CDSGeneProt_field_mat_peptide_name: case CDSGeneProt_field_mat_peptide_description: case CDSGeneProt_field_mat_peptide_ec_number: case CDSGeneProt_field_mat_peptide_activity: case CDSGeneProt_field_mat_peptide_comment: feat_type = Macro_feature_type_mat_peptide_aa; break; } return feat_type; } NLM_EXTERN FeatureFieldPtr FeatureFieldFromCDSGeneProtField (Uint2 cds_gene_prot_field) { FeatureFieldPtr f = NULL; switch (cds_gene_prot_field) { case CDSGeneProt_field_cds_comment: f = FeatureFieldNew (); f->type = Macro_feature_type_cds; f->field = ValNodeNew (NULL); f->field->choice = FeatQualChoice_legal_qual; f->field->data.intvalue = Feat_qual_legal_note; break; case CDSGeneProt_field_cds_inference: f = FeatureFieldNew (); f->type = Macro_feature_type_cds; f->field = ValNodeNew (NULL); f->field->choice = FeatQualChoice_legal_qual; f->field->data.intvalue = Feat_qual_legal_inference; break; case CDSGeneProt_field_codon_start: f = FeatureFieldNew (); f->type = Macro_feature_type_cds; f->field = ValNodeNew (NULL); f->field->choice = FeatQualChoice_legal_qual; f->field->data.intvalue = Feat_qual_legal_codon_start; break; case CDSGeneProt_field_gene_locus: f = FeatureFieldNew (); f->type = Macro_feature_type_gene; f->field = ValNodeNew (NULL); f->field->choice = FeatQualChoice_legal_qual; f->field->data.intvalue = Feat_qual_legal_gene; break; case CDSGeneProt_field_gene_description: f = FeatureFieldNew (); f->type = Macro_feature_type_gene; f->field = ValNodeNew (NULL); f->field->choice = FeatQualChoice_legal_qual; f->field->data.intvalue = Feat_qual_legal_gene_description; break; case CDSGeneProt_field_gene_comment: f = FeatureFieldNew (); f->type = Macro_feature_type_gene; f->field = ValNodeNew (NULL); f->field->choice = FeatQualChoice_legal_qual; f->field->data.intvalue = Feat_qual_legal_note; break; case CDSGeneProt_field_gene_allele: f = FeatureFieldNew (); f->type = Macro_feature_type_gene; f->field = ValNodeNew (NULL); f->field->choice = FeatQualChoice_legal_qual; f->field->data.intvalue = Feat_qual_legal_allele; break; case CDSGeneProt_field_gene_maploc: f = FeatureFieldNew (); f->type = Macro_feature_type_gene; f->field = ValNodeNew (NULL); f->field->choice = FeatQualChoice_legal_qual; f->field->data.intvalue = Feat_qual_legal_map; break; case CDSGeneProt_field_gene_locus_tag: f = FeatureFieldNew (); f->type = Macro_feature_type_gene; f->field = ValNodeNew (NULL); f->field->choice = FeatQualChoice_legal_qual; f->field->data.intvalue = Feat_qual_legal_locus_tag; break; case CDSGeneProt_field_gene_synonym: f = FeatureFieldNew (); f->type = Macro_feature_type_gene; f->field = ValNodeNew (NULL); f->field->choice = FeatQualChoice_legal_qual; f->field->data.intvalue = Feat_qual_legal_synonym; break; case CDSGeneProt_field_gene_old_locus_tag: f = FeatureFieldNew (); f->type = Macro_feature_type_gene; f->field = ValNodeNew (NULL); f->field->choice = FeatQualChoice_legal_qual; f->field->data.intvalue = Feat_qual_legal_old_locus_tag; break; case CDSGeneProt_field_gene_inference: f = FeatureFieldNew (); f->type = Macro_feature_type_gene; f->field = ValNodeNew (NULL); f->field->choice = FeatQualChoice_legal_qual; f->field->data.intvalue = Feat_qual_legal_inference; break; case CDSGeneProt_field_mrna_product: f = FeatureFieldNew (); f->type = Macro_feature_type_mRNA; f->field = ValNodeNew (NULL); f->field->choice = FeatQualChoice_legal_qual; f->field->data.intvalue = Feat_qual_legal_product; break; case CDSGeneProt_field_mrna_comment: f = FeatureFieldNew (); f->type = Macro_feature_type_mRNA; f->field = ValNodeNew (NULL); f->field->choice = FeatQualChoice_legal_qual; f->field->data.intvalue = Feat_qual_legal_note; break; case CDSGeneProt_field_prot_name: f = FeatureFieldNew (); f->type = Macro_feature_type_prot; f->field = ValNodeNew (NULL); f->field->choice = FeatQualChoice_legal_qual; f->field->data.intvalue = Feat_qual_legal_product; break; case CDSGeneProt_field_prot_description: f = FeatureFieldNew (); f->type = Macro_feature_type_prot; f->field = ValNodeNew (NULL); f->field->choice = FeatQualChoice_legal_qual; f->field->data.intvalue = Feat_qual_legal_description; break; case CDSGeneProt_field_prot_ec_number: f = FeatureFieldNew (); f->type = Macro_feature_type_prot; f->field = ValNodeNew (NULL); f->field->choice = FeatQualChoice_legal_qual; f->field->data.intvalue = Feat_qual_legal_ec_number; break; case CDSGeneProt_field_prot_activity: f = FeatureFieldNew (); f->type = Macro_feature_type_prot; f->field = ValNodeNew (NULL); f->field->choice = FeatQualChoice_legal_qual; f->field->data.intvalue = Feat_qual_legal_activity; break; case CDSGeneProt_field_prot_comment: f = FeatureFieldNew (); f->type = Macro_feature_type_prot; f->field = ValNodeNew (NULL); f->field->choice = FeatQualChoice_legal_qual; f->field->data.intvalue = Feat_qual_legal_note; break; case CDSGeneProt_field_mat_peptide_name: f = FeatureFieldNew (); f->type = Macro_feature_type_mat_peptide_aa; f->field = ValNodeNew (NULL); f->field->choice = FeatQualChoice_legal_qual; f->field->data.intvalue = Feat_qual_legal_product; break; case CDSGeneProt_field_mat_peptide_description: f = FeatureFieldNew (); f->type = Macro_feature_type_mat_peptide_aa; f->field = ValNodeNew (NULL); f->field->choice = FeatQualChoice_legal_qual; f->field->data.intvalue = Feat_qual_legal_description; break; case CDSGeneProt_field_mat_peptide_ec_number: f = FeatureFieldNew (); f->type = Macro_feature_type_mat_peptide_aa; f->field = ValNodeNew (NULL); f->field->choice = FeatQualChoice_legal_qual; f->field->data.intvalue = Feat_qual_legal_ec_number; break; case CDSGeneProt_field_mat_peptide_activity: f = FeatureFieldNew (); f->type = Macro_feature_type_mat_peptide_aa; f->field = ValNodeNew (NULL); f->field->choice = FeatQualChoice_legal_qual; f->field->data.intvalue = Feat_qual_legal_activity; break; case CDSGeneProt_field_mat_peptide_comment: f = FeatureFieldNew (); f->type = Macro_feature_type_mat_peptide_aa; f->field = ValNodeNew (NULL); f->field->choice = FeatQualChoice_legal_qual; f->field->data.intvalue = Feat_qual_legal_note; break; } return f; } static Uint2 CDSGeneProtFieldFromFeatureField (FeatureFieldPtr ffp) { Uint2 cds_gene_prot_field = 0; if (ffp != NULL && ffp->field != NULL && ffp->field->choice == FeatQualChoice_legal_qual) { switch (ffp->field->data.intvalue) { case Feat_qual_legal_note: switch (ffp->type) { case Macro_feature_type_cds: cds_gene_prot_field = CDSGeneProt_field_cds_comment; break; case Macro_feature_type_gene: cds_gene_prot_field = CDSGeneProt_field_gene_comment; break; case Macro_feature_type_mRNA: cds_gene_prot_field = CDSGeneProt_field_mrna_comment; break; case Macro_feature_type_prot: cds_gene_prot_field = CDSGeneProt_field_prot_comment; break; case Macro_feature_type_mat_peptide_aa: cds_gene_prot_field = CDSGeneProt_field_mat_peptide_comment; break; } break; case Feat_qual_legal_inference: switch (ffp->type) { case Macro_feature_type_cds: cds_gene_prot_field = CDSGeneProt_field_cds_inference; break; case Macro_feature_type_gene: cds_gene_prot_field = CDSGeneProt_field_gene_inference; break; } break; case Feat_qual_legal_codon_start: cds_gene_prot_field = CDSGeneProt_field_codon_start; break; case Feat_qual_legal_gene: cds_gene_prot_field = CDSGeneProt_field_gene_locus; break; case Feat_qual_legal_gene_description: cds_gene_prot_field = CDSGeneProt_field_gene_description; break; case Feat_qual_legal_allele: cds_gene_prot_field = CDSGeneProt_field_gene_allele; break; case Feat_qual_legal_map: cds_gene_prot_field = CDSGeneProt_field_gene_maploc; break; case Feat_qual_legal_locus_tag: cds_gene_prot_field = CDSGeneProt_field_gene_locus_tag; break; case Feat_qual_legal_synonym: cds_gene_prot_field = CDSGeneProt_field_gene_synonym; break; case Feat_qual_legal_old_locus_tag: cds_gene_prot_field = CDSGeneProt_field_gene_old_locus_tag; break; case Feat_qual_legal_product: switch (ffp->type) { case Macro_feature_type_mRNA: cds_gene_prot_field = CDSGeneProt_field_mrna_product; break; case Macro_feature_type_prot: cds_gene_prot_field = CDSGeneProt_field_prot_name; break; case Macro_feature_type_mat_peptide_aa: cds_gene_prot_field = CDSGeneProt_field_mat_peptide_name; break; } break; case Feat_qual_legal_description: switch (ffp->type) { case Macro_feature_type_gene: cds_gene_prot_field = CDSGeneProt_field_gene_description; break; case Macro_feature_type_prot: cds_gene_prot_field = CDSGeneProt_field_prot_description; break; case Macro_feature_type_mat_peptide_aa: cds_gene_prot_field = CDSGeneProt_field_mat_peptide_description; break; } break; case Feat_qual_legal_ec_number: switch (ffp->type) { case Macro_feature_type_prot: cds_gene_prot_field = CDSGeneProt_field_prot_ec_number; break; case Macro_feature_type_mat_peptide_aa: cds_gene_prot_field = CDSGeneProt_field_mat_peptide_ec_number; break; } break; case Feat_qual_legal_activity: switch (ffp->type) { case Macro_feature_type_prot: cds_gene_prot_field = CDSGeneProt_field_prot_activity; break; case Macro_feature_type_mat_peptide_aa: cds_gene_prot_field = CDSGeneProt_field_mat_peptide_activity; break; } break; } } return cds_gene_prot_field; } /* Molinfo fields */ typedef struct moleculetypebiomol { Int4 molecule_type; Int4 biomol; CharPtr name; } MoleculeTypeBiomolData, PNTR MoleculeTypeBiomolPtr; static MoleculeTypeBiomolData moleculetype_biomol[] = { { Molecule_type_unknown , 0, " " } , { Molecule_type_genomic , MOLECULE_TYPE_GENOMIC , "genomic" } , { Molecule_type_precursor_RNA , MOLECULE_TYPE_PRE_MRNA , "precursor RNA" } , { Molecule_type_mRNA , MOLECULE_TYPE_MRNA , "mRNA" } , { Molecule_type_rRNA , MOLECULE_TYPE_RRNA , "rRNA" } , { Molecule_type_tRNA , MOLECULE_TYPE_TRNA , "tRNA" } , { Molecule_type_genomic_mRNA , MOLECULE_TYPE_GENOMIC_MRNA_MIX , "genomic mRNA" } , { Molecule_type_cRNA , MOLECULE_TYPE_CRNA , "cRNA" } , { Molecule_type_transcribed_RNA, MOLECULE_TYPE_TRANSCRIBED_RNA, "transcribed RNA" } , { Molecule_type_ncRNA, MOLECULE_TYPE_NCRNA, "ncRNA" } , { Molecule_type_transfer_messenger_RNA, MOLECULE_TYPE_TMRNA, "tmRNA" } , { Molecule_type_macro_other, MOLECULE_TYPE_OTHER_GENETIC_MATERIAL, "other-genetic" } }; #define NUM_moleculetype_biomol sizeof (moleculetype_biomol) / sizeof (MoleculeTypeBiomolData) NLM_EXTERN Int4 BiomolFromMoleculeType (Int4 molecule_type) { Int4 i; for (i = 0; i < NUM_moleculetype_biomol; i++) { if (moleculetype_biomol[i].molecule_type == molecule_type) { return moleculetype_biomol[i].biomol; } } return -1; } NLM_EXTERN CharPtr BiomolNameFromBiomol (Int4 biomol) { Int4 i; for (i = 0; i < NUM_moleculetype_biomol; i++) { if (moleculetype_biomol[i].biomol == biomol) { return moleculetype_biomol[i].name; } } return NULL; } static Int4 BiomolFromBiomolName (CharPtr biomol_name) { Int4 i; for (i = 0; i < NUM_moleculetype_biomol; i++) { if (StringICmp (moleculetype_biomol[i].name, biomol_name) == 0) { return moleculetype_biomol[i].biomol; } } return -1; } NLM_EXTERN ValNodePtr GetMoleculeTypeList (void) { ValNodePtr list = NULL; Int4 i; for (i = 0; i < NUM_moleculetype_biomol; i++) { ValNodeAddPointer (&list, moleculetype_biomol[i].molecule_type, StringSave (moleculetype_biomol[i].name)); } return list; } /* Technique fields */ typedef struct techniquetypetech { Int4 technique_type; Int4 tech; CharPtr name; } TechniqueTypeTechData, PNTR TechniqueTypeTechPtr; static TechniqueTypeTechData techniquetype_tech[] = { { Technique_type_unknown , MI_TECH_unknown , " " } , { Technique_type_standard , MI_TECH_standard , "standard" } , { Technique_type_est , MI_TECH_est , "EST" } , { Technique_type_sts , MI_TECH_sts , "STS" } , { Technique_type_survey , MI_TECH_survey , "survey" } , { Technique_type_genetic_map , MI_TECH_genemap , "genetic map" } , { Technique_type_physical_map , MI_TECH_physmap , "physical map" } , { Technique_type_derived , MI_TECH_derived , "derived" } , { Technique_type_concept_trans , MI_TECH_concept_trans , "concept-trans" } , { Technique_type_seq_pept , MI_TECH_seq_pept , "seq-pept" } , { Technique_type_both , MI_TECH_both , "both" } , { Technique_type_seq_pept_overlap , MI_TECH_seq_pept_overlap , "seq-pept-overlap" } , { Technique_type_seq_pept_homol , MI_TECH_seq_pept_homol, "seq-pept-homol" } , { Technique_type_concept_trans_a, MI_TECH_concept_trans_a, "concept-trans-a" } , { Technique_type_htgs_1, MI_TECH_htgs_1, "HTGS-1" } , { Technique_type_htgs_2, MI_TECH_htgs_2, "HTGS-2" } , { Technique_type_htgs_3, MI_TECH_htgs_3, "HTGS-3" } , { Technique_type_fli_cDNA, MI_TECH_fli_cdna, "fli-cDNA" } , { Technique_type_htgs_0, MI_TECH_htgs_0, "HTGS-0" } , { Technique_type_htc, MI_TECH_htc, "HTC" } , { Technique_type_wgs, MI_TECH_wgs, "WGS" } , { Technique_type_barcode, MI_TECH_barcode, "BARCODE" } , { Technique_type_composite_wgs_htgs, MI_TECH_composite_wgs_htgs, "composite WGS-HTGS" } , { Technique_type_tsa, MI_TECH_tsa, "TSA" } , { Technique_type_targeted, MI_TECH_targeted, "targeted" } , { Technique_type_other, MI_TECH_other, "other" } }; #define NUM_techniquetype_tech sizeof (techniquetype_tech) / sizeof (TechniqueTypeTechData) NLM_EXTERN Int4 TechFromTechniqueType (Int4 technique_type) { Int4 i; for (i = 0; i < NUM_techniquetype_tech; i++) { if (techniquetype_tech[i].technique_type == technique_type) { return techniquetype_tech[i].tech; } } return -1; } NLM_EXTERN CharPtr TechNameFromTech (Int4 tech) { Int4 i; for (i = 0; i < NUM_techniquetype_tech; i++) { if (techniquetype_tech[i].tech == tech) { return techniquetype_tech[i].name; } } return NULL; } NLM_EXTERN Int4 TechFromTechName (CharPtr tech_name) { Int4 i; for (i = 0; i < NUM_techniquetype_tech; i++) { if (StringsAreEquivalent (techniquetype_tech[i].name, tech_name)) { return techniquetype_tech[i].tech; } } return -1; } NLM_EXTERN ValNodePtr GetTechniqueTypeList (void) { ValNodePtr list = NULL; Int4 i; for (i = 0; i < NUM_techniquetype_tech; i++) { ValNodeAddPointer (&list, techniquetype_tech[i].technique_type, StringSave (techniquetype_tech[i].name)); } return list; } /* Completedness fields */ typedef struct completednesstypecompleteness { Int4 completedness_type; Int4 completeness; CharPtr name; } CompletednessTypeCompletenessData, PNTR CompletednessTypeCompletenessPtr; static CompletednessTypeCompletenessData completednesstype_completeness[] = { { Completedness_type_unknown, 0, " " } , { Completedness_type_complete, 1, "complete" } , { Completedness_type_partial, 2, "partial" } , { Completedness_type_no_left, 3, "no left" } , { Completedness_type_no_right, 4, "no right" } , { Completedness_type_no_ends, 5, "no ends" } , { Completedness_type_has_left, 6, "has left" } , { Completedness_type_has_right, 7, "has right" } , { Completedness_type_other, 255, "other" } }; #define NUM_completednesstype_completeness sizeof (completednesstype_completeness) / sizeof (CompletednessTypeCompletenessData) NLM_EXTERN Int4 CompletenessFromCompletednessType (Int4 completedness_type) { Int4 i; for (i = 0; i < NUM_completednesstype_completeness; i++) { if (completednesstype_completeness[i].completedness_type == completedness_type) { return completednesstype_completeness[i].completeness; } } return -1; } NLM_EXTERN CharPtr CompletenessNameFromCompleteness (Int4 completeness) { Int4 i; for (i = 0; i < NUM_completednesstype_completeness; i++) { if (completednesstype_completeness[i].completeness == completeness) { return completednesstype_completeness[i].name; } } return NULL; } static Int4 CompletenessFromCompletenessName (CharPtr completeness_name) { Int4 i; for (i = 0; i < NUM_completednesstype_completeness; i++) { if (StringICmp (completednesstype_completeness[i].name, completeness_name) == 0) { return completednesstype_completeness[i].completeness; } } return -1; } NLM_EXTERN ValNodePtr GetCompletednessTypeList (void) { ValNodePtr list = NULL; Int4 i; for (i = 0; i < NUM_completednesstype_completeness; i++) { ValNodeAddPointer (&list, completednesstype_completeness[i].completedness_type, StringSave (completednesstype_completeness[i].name)); } return list; } /* Molecule class fields */ typedef struct moleculeclasstypemol { Int4 moleculeclass_type; Int4 mol; CharPtr name; } MoleculeClassTypeMolData, PNTR MoleculeClassTypeMolPtr; static MoleculeClassTypeMolData moleculeclasstype_mol[] = { { Molecule_class_type_unknown, 0, " " } , { Molecule_class_type_dna, MOLECULE_CLASS_DNA, "DNA" } , { Molecule_class_type_rna, MOLECULE_CLASS_RNA, "RNA" } , { Molecule_class_type_protein, MOLECULE_CLASS_PROTEIN, "protein" } , { Molecule_class_type_nucleotide, MOLECULE_CLASS_NUC, "nucleotide" } , { Molecule_class_type_other, 255, "other" } }; #define NUM_moleculeclasstype_mol sizeof (moleculeclasstype_mol) / sizeof (MoleculeClassTypeMolData) NLM_EXTERN Int4 MolFromMoleculeClassType (Int4 moleculeclass_type) { Int4 i; for (i = 0; i < NUM_moleculeclasstype_mol; i++) { if (moleculeclasstype_mol[i].moleculeclass_type == moleculeclass_type) { return moleculeclasstype_mol[i].mol; } } return -1; } NLM_EXTERN CharPtr MolNameFromMol (Int4 mol) { Int4 i; for (i = 0; i < NUM_moleculeclasstype_mol; i++) { if (moleculeclasstype_mol[i].mol == mol) { return moleculeclasstype_mol[i].name; } } return NULL; } static Int4 MolFromMolName (CharPtr mol_name) { Int4 i; for (i = 0; i < NUM_moleculeclasstype_mol; i++) { if (StringICmp (moleculeclasstype_mol[i].name, mol_name) == 0) { return moleculeclasstype_mol[i].mol; } } return -1; } NLM_EXTERN ValNodePtr GetMoleculeClassTypeList (void) { ValNodePtr list = NULL; Int4 i; for (i = 0; i < NUM_moleculeclasstype_mol; i++) { ValNodeAddPointer (&list, moleculeclasstype_mol[i].moleculeclass_type, StringSave (moleculeclasstype_mol[i].name)); } return list; } /* Topology fields */ typedef struct topologytypetopology { Int4 topology_type; Int4 topology; CharPtr name; } TopologyTypeTopologyData, PNTR TopologyTypeTopologyPtr; static TopologyTypeTopologyData topologytype_topology[] = { { Topology_type_unknown, 0, " " } , { Topology_type_linear, TOPOLOGY_LINEAR, "linear" } , { Topology_type_circular, TOPOLOGY_CIRCULAR, "circular" } , { Topology_type_tandem, TOPOLOGY_TANDEM, "tandem" } , { Topology_type_other, 255, "other" } }; #define NUM_topologytype_topology sizeof (topologytype_topology) / sizeof (TopologyTypeTopologyData) NLM_EXTERN Int4 TopologyFromTopologyType (Int4 topology_type) { Int4 i; for (i = 0; i < NUM_topologytype_topology; i++) { if (topologytype_topology[i].topology_type == topology_type) { return topologytype_topology[i].topology; } } return -1; } NLM_EXTERN CharPtr TopologyNameFromTopology (Int4 topology) { Int4 i; for (i = 0; i < NUM_topologytype_topology; i++) { if (topologytype_topology[i].topology == topology) { return topologytype_topology[i].name; } } return NULL; } static Int4 TopologyFromTopologyName (CharPtr topology_name) { Int4 i; for (i = 0; i < NUM_topologytype_topology; i++) { if (StringICmp (topologytype_topology[i].name, topology_name) == 0) { return topologytype_topology[i].topology; } } return -1; } NLM_EXTERN ValNodePtr GetTopologyTypeList (void) { ValNodePtr list = NULL; Int4 i; for (i = 0; i < NUM_topologytype_topology; i++) { ValNodeAddPointer (&list, topologytype_topology[i].topology_type, StringSave (topologytype_topology[i].name)); } return list; } /* strand fields */ typedef struct strandtypestrand { Int4 strand_type; Int4 strand; CharPtr name; } StrandTypeStrandData, PNTR StrandTypeStrandPtr; static StrandTypeStrandData strandtype_strand[] = { { Strand_type_unknown, 0, " " } , { Strand_type_single, STRANDEDNESS_SINGLE, "single" } , { Strand_type_double__, STRANDEDNESS_DOUBLE, "double" } , { Strand_type_mixed, 3, "mixed" } , { Strand_type_mixed_rev, 4, "mixed-rev" } , { Strand_type_other, 255, "other" } }; #define NUM_strandtype_strand sizeof (strandtype_strand) / sizeof (StrandTypeStrandData) NLM_EXTERN Int4 StrandFromStrandType (Int4 strand_type) { Int4 i; for (i = 0; i < NUM_strandtype_strand; i++) { if (strandtype_strand[i].strand_type == strand_type) { return strandtype_strand[i].strand; } } return -1; } NLM_EXTERN CharPtr StrandNameFromStrand (Int4 strand) { Int4 i; for (i = 0; i < NUM_strandtype_strand; i++) { if (strandtype_strand[i].strand == strand) { return strandtype_strand[i].name; } } return NULL; } static Int4 StrandFromStrandName (CharPtr strand_name) { Int4 i; for (i = 0; i < NUM_strandtype_strand; i++) { if (StringICmp (strandtype_strand[i].name, strand_name) == 0) { return strandtype_strand[i].strand; } } return -1; } NLM_EXTERN ValNodePtr GetStrandTypeList (void) { ValNodePtr list = NULL; Int4 i; for (i = 0; i < NUM_strandtype_strand; i++) { ValNodeAddPointer (&list, strandtype_strand[i].strand_type, StringSave (strandtype_strand[i].name)); } return list; } static CharPtr GetSequenceQualValName (ValNodePtr field) { CharPtr val = NULL; if (field == NULL) return NULL; switch (field->choice) { case MolinfoField_molecule: val = BiomolNameFromBiomol (BiomolFromMoleculeType (field->data.intvalue)); break; case MolinfoField_technique: val = TechNameFromTech (TechFromTechniqueType (field->data.intvalue)); break; case MolinfoField_completedness: val = CompletenessNameFromCompleteness (CompletenessFromCompletednessType (field->data.intvalue)); break; case MolinfoField_mol_class: val = MolNameFromMol (MolFromMoleculeClassType (field->data.intvalue)); break; case MolinfoField_topology: val = TopologyNameFromTopology (TopologyFromTopologyType (field->data.intvalue)); break; case MolinfoField_strand: val = StrandNameFromStrand (StrandFromStrandType (field->data.intvalue)); break; } return val; } static CharPtr GetSequenceQualName (ValNodePtr field) { CharPtr str = NULL, fieldname = "invalid field", val = "invalid value"; CharPtr fmt = "%s %s"; if (field == NULL) return NULL; switch (field->choice) { case MolinfoField_molecule: fieldname = "molecule"; val = BiomolNameFromBiomol (BiomolFromMoleculeType (field->data.intvalue)); break; case MolinfoField_technique: fieldname = "technique"; val = TechNameFromTech (TechFromTechniqueType (field->data.intvalue)); break; case MolinfoField_completedness: fieldname = "completeness"; val = CompletenessNameFromCompleteness (CompletenessFromCompletednessType (field->data.intvalue)); break; case MolinfoField_mol_class: fieldname = "class"; val = MolNameFromMol (MolFromMoleculeClassType (field->data.intvalue)); break; case MolinfoField_topology: fieldname = "topology"; val = TopologyNameFromTopology (TopologyFromTopologyType (field->data.intvalue)); break; case MolinfoField_strand: fieldname = "strand"; val = StrandNameFromStrand (StrandFromStrandType (field->data.intvalue)); break; } if (val == NULL) { val = "Invalid value"; } str = (CharPtr) MemNew (sizeof (Char) * (StringLen (fmt) + StringLen (fieldname) + StringLen (val))); sprintf (str, fmt, fieldname, val); return str; } static ValNodePtr MakeSequenceQualFieldTypeList (void) { ValNodePtr field_list = NULL; ValNodePtr field; field = ValNodeNew (NULL); field->choice = MolinfoField_molecule; field->data.ptrvalue = NULL; ValNodeAddPointer (&field_list, FieldType_molinfo_field, field); field = ValNodeNew (NULL); field->choice = MolinfoField_technique; field->data.ptrvalue = NULL; ValNodeAddPointer (&field_list, FieldType_molinfo_field, field); field = ValNodeNew (NULL); field->choice = MolinfoField_completedness; field->data.ptrvalue = NULL; ValNodeAddPointer (&field_list, FieldType_molinfo_field, field); field = ValNodeNew (NULL); field->choice = MolinfoField_mol_class; field->data.ptrvalue = NULL; ValNodeAddPointer (&field_list, FieldType_molinfo_field, field); field = ValNodeNew (NULL); field->choice = MolinfoField_topology; field->data.ptrvalue = NULL; ValNodeAddPointer (&field_list, FieldType_molinfo_field, field); field = ValNodeNew (NULL); field->choice = MolinfoField_strand; field->data.ptrvalue = NULL; ValNodeAddPointer (&field_list, FieldType_molinfo_field, field); return field_list; } static ValNodePtr MolinfoFieldFromFieldAndStringValue (ValNodePtr field, CharPtr val) { ValNodePtr mp = NULL; Int4 enum_val; if (field == NULL) { return NULL; } switch (field->choice) { case MolinfoField_molecule: enum_val = BiomolFromBiomolName(val); if (enum_val > -1) { mp = ValNodeNew (NULL); mp->choice = MolinfoField_molecule; mp->data.intvalue = enum_val; } break; case MolinfoField_technique: enum_val = TechFromTechName(val); if (enum_val > -1) { mp = ValNodeNew (NULL); mp->choice = MolinfoField_technique; mp->data.intvalue = enum_val; } break; case MolinfoField_completedness: enum_val = CompletenessFromCompletenessName(val); if (enum_val > -1) { mp = ValNodeNew (NULL); mp->choice = MolinfoField_completedness; mp->data.intvalue = enum_val; } break; case MolinfoField_mol_class: enum_val = MolFromMolName(val); if (enum_val > -1) { mp = ValNodeNew (NULL); mp->choice = MolinfoField_mol_class; mp->data.intvalue = enum_val; } break; case MolinfoField_topology: enum_val = TopologyFromTopologyName(val); if (enum_val > -1) { mp = ValNodeNew (NULL); mp->choice = MolinfoField_topology; mp->data.intvalue = enum_val; } break; case MolinfoFieldPair_strand: enum_val = StrandFromStrandName(val); if (enum_val > -1) { mp = ValNodeNew (NULL); mp->choice = MolinfoFieldPair_strand; mp->data.intvalue = enum_val; } break; } return mp; } /* bond types */ typedef struct bondtype { Int4 macro_bond_type; Int4 asn1_bond_type; CharPtr name; } BondTypeData, PNTR BondTypePtr; static BondTypeData bond_type[] = { { Bond_type_disulfide, 1, "Disulfide" } , { Bond_type_thioester, 2, "Thioester" } , { Bond_type_crosslink, 3, "Crosslink" } , { Bond_type_thioether, 4, "Thioether" } , { Bond_type_other, 255, "Other" } }; #define NUM_bond_type sizeof (bond_type) / sizeof (BondTypeData) NLM_EXTERN Int4 Asn1BondTypeFromMacroBondType (Int4 macro_bond_type) { Int4 i; for (i = 0; i < NUM_bond_type; i++) { if (bond_type[i].macro_bond_type == macro_bond_type) { return bond_type[i].asn1_bond_type; } } return -1; } NLM_EXTERN Int4 MacroBondTypeFromAsn1BondType (Int4 asn1_bond_type) { Int4 i; for (i = 0; i < NUM_bond_type; i++) { if (bond_type[i].asn1_bond_type == asn1_bond_type) { return bond_type[i].macro_bond_type; } } return -1; } NLM_EXTERN CharPtr GetMacroBondTypeName (Int4 macro_bond_type) { Int4 i; for (i = 0; i < NUM_bond_type; i++) { if (bond_type[i].macro_bond_type == macro_bond_type) { return bond_type[i].name; } } return NULL; } NLM_EXTERN ValNodePtr GetBondTypeList (void) { ValNodePtr list = NULL; Int4 i; for (i = 0; i < NUM_bond_type; i++) { ValNodeAddPointer (&list, bond_type[i].macro_bond_type, StringSave (bond_type[i].name)); } return list; } /* site types */ typedef struct sitetype { Int4 macro_site_type; Int4 asn1_site_type; CharPtr name; } SiteTypeData, PNTR SiteTypePtr; static SiteTypeData site_type[] = { {Site_type_active, 1, "Active"}, {Site_type_binding, 2, "Binding"}, {Site_type_cleavage, 3, "Cleavage"}, {Site_type_inhibit, 4, "Inhibit"}, {Site_type_modified, 5, "Modified"}, {Site_type_glycosylation, 6, "Glycosylation"}, {Site_type_myristoylation, 7, "Myristoylation"}, {Site_type_mutagenized, 8, "Mutagenized"}, {Site_type_metal_binding, 9, "Metal-binding"}, {Site_type_phosphorylation, 10, "Phosphorylation"}, {Site_type_acetylation, 11, "Acetylation"}, {Site_type_amidation, 12, "Amidation"}, {Site_type_methylation, 13, "Methylation"}, {Site_type_hydroxylation, 14, "Hydroxylation"}, {Site_type_sulfatation, 15, "Sulfatation"}, {Site_type_oxidative_deamination, 16, "Oxidative-deamination"}, {Site_type_pyrrolidone_carboxylic_acid, 17, "Pyrrolidone-carboxylic-acid"}, {Site_type_gamma_carboxyglutamic_acid, 18, "Gamma-carboxyglutamic-acid"}, {Site_type_blocked, 19, "Blocked"}, {Site_type_lipid_binding, 20, "Lipid-binding"}, {Site_type_np_binding, 21, "np-binding"}, {Site_type_dna_binding, 22, "DNA-binding"}, {Site_type_signal_peptide, 23, "Signal-peptide"}, {Site_type_transit_peptide, 24, "Transit-peptide"}, {Site_type_transmembrane_region, 25, "Transmembrane-region"}, {Site_type_nitrosylation, 26, "Nitrosylation"}, {Site_type_other, 255, "Other"}, }; #define NUM_site_type sizeof (site_type) / sizeof (SiteTypeData) NLM_EXTERN Int4 Asn1SiteTypeFromMacroSiteType (Int4 macro_site_type) { Int4 i; for (i = 0; i < NUM_site_type; i++) { if (site_type[i].macro_site_type == macro_site_type) { return site_type[i].asn1_site_type; } } return -1; } NLM_EXTERN Int4 MacroSiteTypeFromAsn1SiteType (Int4 asn1_site_type) { Int4 i; for (i = 0; i < NUM_site_type; i++) { if (site_type[i].asn1_site_type == asn1_site_type) { return site_type[i].macro_site_type; } } return -1; } NLM_EXTERN CharPtr GetMacroSiteTypeName (Int4 macro_site_type) { Int4 i; for (i = 0; i < NUM_site_type; i++) { if (site_type[i].macro_site_type == macro_site_type) { return site_type[i].name; } } return NULL; } NLM_EXTERN ValNodePtr GetSiteTypeList (void) { ValNodePtr list = NULL; Int4 i; for (i = 0; i < NUM_site_type; i++) { ValNodeAddPointer (&list, site_type[i].macro_site_type, StringSave (site_type[i].name)); } return list; } /* Simple constraints */ static Boolean DisallowCharacter (Char ch, Boolean disallow_slash) { if (isalpha ((Int4) ch) || isdigit ((Int4) ch) || ch == '_' || ch == '-') { return TRUE; } else if (disallow_slash && ch == '/') { return TRUE; } else { return FALSE; } } static Boolean IsWholeWordMatchEx (CharPtr start, CharPtr found, Int4 match_len, Boolean disallow_slash) { Boolean rval = TRUE; Char char_after; Char char_before; if (match_len == 0) { rval = TRUE; } else if (start == NULL || found == NULL) { rval = FALSE; } else { char_after = *(found + match_len); if (found != start) { char_before = *(found - 1); if (DisallowCharacter (char_before, disallow_slash)) { rval = FALSE; } } if (char_after != 0 && DisallowCharacter (char_after, disallow_slash)) { rval = FALSE; } } return rval; } static Boolean IsWholeWordMatch (CharPtr start, CharPtr found, Int4 match_len) { return IsWholeWordMatchEx (start, found, match_len, FALSE); } NLM_EXTERN Boolean IsStringConstraintEmpty (StringConstraintPtr scp) { if (scp == NULL) { return TRUE; } if (scp->is_all_caps || scp->is_all_lower || scp->is_all_punct) { return FALSE; } else if (scp->match_text == NULL || scp->match_text[0] == 0) { return TRUE; } else { return FALSE; } } static void StripUnimportantCharacters (CharPtr str, Boolean strip_space, Boolean strip_punct) { CharPtr src, dst; if (str == NULL) { return; } src = str; dst = str; while (*src != 0) { if ((strip_space && isspace (*src)) || (strip_punct && ispunct (*src))) { /* don't copy this character */ } else { if (src > dst) { *dst = *src; } dst++; } src++; } *dst = 0; } static Boolean IsWholeWordAtStart (CharPtr str, CharPtr cp, Boolean is_start) { if (cp == str) { return is_start; } else { return !isalpha (*(cp - 1)); } } static int CaseNCompare (CharPtr str1, CharPtr str2, Int4 n, Boolean case_sensitive) { if (n == 0) { return 0; } else if (case_sensitive) { return StringNCmp (str1, str2, n); } else { return StringNICmp (str1, str2, n); } } static Boolean AdvancedStringCompare (CharPtr str, CharPtr str_match, StringConstraintPtr scp, Boolean is_start, Int4Ptr p_target_match_len) { CharPtr cp_s, cp_m; Boolean match = TRUE, recursive_match = FALSE; Boolean word_start_s, word_start_m; WordSubstitutionPtr word; Int4 len1, len2, init_target_match_len = 0, target_match_len = 0; ValNodePtr syn; if (str == NULL) { return FALSE; } else if (scp == NULL || str_match == NULL) { return TRUE; } cp_s = str; cp_m = str_match; if (p_target_match_len != NULL) { init_target_match_len = *p_target_match_len; } while (match && *cp_m != 0 && !recursive_match) { /* first, check to see if we're skipping synonyms */ for (word = scp->ignore_words; word != NULL && !recursive_match; word = word->next) { len1 = StringLen (word->word); if (CaseNCompare(word->word, cp_m, len1, word->case_sensitive) == 0) { /* text match */ word_start_m = IsWholeWordAtStart (str_match, cp_m, is_start); if (!word->whole_word || (!isalpha (*(cp_m + len1)) && word_start_m)) { /* whole word match */ if (word->synonyms == NULL) { if (AdvancedStringCompare (cp_s, cp_m + len1, scp, word_start_m, &target_match_len)) { recursive_match = TRUE; } } else { for (syn = word->synonyms; syn != NULL && !recursive_match; syn = syn->next) { len2 = StringLen (syn->data.ptrvalue); if (CaseNCompare(syn->data.ptrvalue, cp_s, len2, word->case_sensitive) == 0) { /* text match */ word_start_s = IsWholeWordAtStart (str, cp_s, is_start); if (!word->whole_word || (!isalpha (*(cp_s + len2)) && word_start_s)) { /* whole word match */ if (AdvancedStringCompare (cp_s + len2, cp_m + len1, scp, word_start_m && word_start_s, &target_match_len)) { recursive_match = TRUE; } } } } } } } } if (!recursive_match) { if (CaseNCompare(cp_m, cp_s, 1, scp->case_sensitive) == 0) { cp_m++; cp_s++; target_match_len++; } else if (scp->ignore_space && (isspace (*cp_m) || isspace (*cp_s))) { if (isspace (*cp_m)) { cp_m++; } if (isspace (*cp_s)) { cp_s++; target_match_len++; } } else if (scp->ignore_punct && (ispunct (*cp_m) || ispunct (*cp_s))) { if (ispunct (*cp_m)) { cp_m++; } if (ispunct (*cp_s)) { cp_s++; target_match_len++; } } else { match = FALSE; } } } if (match && !recursive_match) { while ((scp->ignore_space && isspace (*cp_s)) || (scp->ignore_punct && ispunct (*cp_s))) { cp_s++; target_match_len++; } while ((scp->ignore_space && isspace (*cp_m)) || (scp->ignore_punct && ispunct (*cp_m))) { cp_m++; } if (*cp_m != 0) { match = FALSE; } else if ((scp->match_location == String_location_ends || scp->match_location == String_location_equals) && *cp_s != 0) { match = FALSE; } else if (scp->whole_word && (!is_start || isalpha (*cp_s))) { match = FALSE; } } if (match && p_target_match_len != NULL) { (*p_target_match_len) += target_match_len; } return match; } static Boolean AdvancedStringMatch (CharPtr str, StringConstraintPtr scp) { CharPtr cp; Boolean rval = FALSE; if (str == NULL) { rval = FALSE; } else if (scp == NULL) { rval = TRUE; } else if (AdvancedStringCompare (str, scp->match_text, scp, TRUE, NULL)) { rval = TRUE; } else if (scp->match_location == String_location_starts || scp->match_location == String_location_equals) { rval = FALSE; } else { cp = str + 1; while (!rval && *cp != 0) { if (scp->whole_word) { while (*cp != 0 && isalpha (*(cp-1))) { cp++; } } if (*cp != 0) { if (AdvancedStringCompare (cp, scp->match_text, scp, TRUE, NULL)) { rval = TRUE; } else { cp++; } } } } return rval; } static void TestAdvancedStringMatch (void) { StringConstraintPtr scp; CharPtr text = "The quick brown fox jumped over the lazy dog."; CharPtr summ; scp = StringConstraintNew (); scp->match_location = String_location_contains; scp->match_text = StringSave ("dog leaped"); scp->ignore_words = WordSubstitutionNew(); scp->ignore_words->word = StringSave ("leap"); ValNodeAddPointer (&scp->ignore_words->synonyms, 0, StringSave ("jump")); scp->ignore_words->next = WordSubstitutionNew(); scp->ignore_words->next->word = StringSave ("dog"); ValNodeAddPointer (&scp->ignore_words->next->synonyms, 0, StringSave ("fox")); AdvancedStringMatch(text, scp); summ = SummarizeStringConstraint (scp); summ = MemFree (summ); scp = StringConstraintFree (scp); scp = StringConstraintNew (); scp->match_location = String_location_equals; scp->match_text = StringSave ("A fast beige wolf leaped across a sleepy beagle."); scp->ignore_words = WordSubstitutionNew(); scp->ignore_words->word = StringSave ("a"); scp->ignore_words->whole_word = TRUE; ValNodeAddPointer (&scp->ignore_words->synonyms, 0, StringSave ("the")); scp->ignore_words->next = WordSubstitutionNew(); scp->ignore_words->next->word = StringSave ("fast"); ValNodeAddPointer (&scp->ignore_words->next->synonyms, 0, StringSave ("quick")); scp->ignore_words->next->next = WordSubstitutionNew(); scp->ignore_words->next->next->word = StringSave ("beige"); ValNodeAddPointer (&scp->ignore_words->next->next->synonyms, 0, StringSave ("brown")); scp->ignore_words->next->next->next = WordSubstitutionNew(); scp->ignore_words->next->next->next->word = StringSave ("wolf"); ValNodeAddPointer (&scp->ignore_words->next->next->next->synonyms, 0, StringSave ("fox")); scp->ignore_words->next->next->next->next = WordSubstitutionNew(); scp->ignore_words->next->next->next->next->word = StringSave ("across"); ValNodeAddPointer (&scp->ignore_words->next->next->next->next->synonyms, 0, StringSave ("over")); scp->ignore_words->next->next->next->next->next = WordSubstitutionNew(); scp->ignore_words->next->next->next->next->next->word = StringSave ("beagle"); ValNodeAddPointer (&scp->ignore_words->next->next->next->next->next->synonyms, 0, StringSave ("dog")); AdvancedStringMatch(text, scp); summ = SummarizeStringConstraint (scp); summ = MemFree (summ); scp = StringConstraintFree (scp); } static const CharPtr kPutative = "putative"; static CharPtr s_weasels[] = { "candidate", "hypothetical", "novel", "possible", "potential", "predicted", "probable", "putative", "candidate", "uncharacterized", "unique", NULL }; static CharPtr SkipOneWeasel (CharPtr str) { Int4 i, len; CharPtr cp = str; for (i = 0; s_weasels[i] != NULL; i++) { len = StringLen (s_weasels[i]); if (StringNICmp (str, s_weasels[i], len) == 0 && isspace (*(str + len))) { cp = str + len + 1; while (isspace (*cp)) { cp++; } return cp; } } return cp; } static CharPtr SkipWeasel (CharPtr str) { CharPtr cp = str; cp = SkipOneWeasel (str); while (cp != str) { str = cp; cp = SkipOneWeasel (str); } return cp; } NLM_EXTERN Boolean DoesSingleStringMatchConstraint (CharPtr str, StringConstraintPtr scp) { CharPtr pFound; Boolean rval = FALSE; Char char_after = 0; CharPtr search, pattern, tmp_match; if (IsStringConstraintEmpty (scp)) return TRUE; if (StringHasNoText (str)) return FALSE; if (scp->ignore_weasel) { str = SkipWeasel(str); } if (scp->is_all_caps && !IsAllCaps(str)) { return FALSE; } if (scp->is_all_lower && !IsAllLowerCase(str)) { return FALSE; } if (scp->is_all_punct && !IsAllPunctuation(str)) { return FALSE; } if (scp->match_text == NULL) { return TRUE; } tmp_match = scp->match_text; if (scp->ignore_weasel) { scp->match_text = SkipWeasel (scp->match_text); } if (scp->match_location != String_location_inlist && scp->ignore_words != NULL) { scp->match_text = tmp_match; return AdvancedStringMatch(str, scp); } if (scp->match_location != String_location_inlist && (scp->ignore_space || scp->ignore_punct)) { search = StringSave (str); StripUnimportantCharacters (search, scp->ignore_space, scp->ignore_punct); pattern = StringSave (scp->match_text); StripUnimportantCharacters (pattern, scp->ignore_space, scp->ignore_punct); } else { search = str; pattern = scp->match_text; } switch (scp->match_location) { case String_location_contains: if (scp->case_sensitive) { pFound = StringSearch (search, pattern); } else { pFound = StringISearch (search, pattern); } if (pFound == NULL) { rval = FALSE; } else if (scp->whole_word) { rval = IsWholeWordMatch (search, pFound, StringLen (pattern)); while (!rval && pFound != NULL) { if (scp->case_sensitive) { pFound = StringSearch (pFound + 1, pattern); } else { pFound = StringISearch (pFound + 1, pattern); } if (pFound != NULL) { rval = IsWholeWordMatch (search, pFound, StringLen (pattern)); } } } else { rval = TRUE; } break; case String_location_starts: if (scp->case_sensitive) { pFound = StringSearch (search, pattern); } else { pFound = StringISearch (search, pattern); } if (pFound == search) { if (scp->whole_word) { rval = IsWholeWordMatch (search, pFound, StringLen (pattern)); } else { rval = TRUE; } } break; case String_location_ends: if (scp->case_sensitive) { pFound = StringSearch (search, pattern); } else { pFound = StringISearch (search, pattern); } while (pFound != NULL && !rval) { char_after = *(pFound + StringLen (pattern)); if (char_after == 0) { if (scp->whole_word) { rval = IsWholeWordMatch (search, pFound, StringLen (pattern)); } else { rval = TRUE; } /* stop the search, we're at the end of the string */ pFound = NULL; } else { if (scp->case_sensitive) { pFound = StringSearch (pFound + 1, pattern); } else { pFound = StringISearch (pFound + 1, pattern); } } } break; case String_location_equals: if (scp->case_sensitive) { if (StringCmp (search, pattern) == 0) { rval = TRUE; } } else { if (StringICmp (search, pattern) == 0) { rval = TRUE; } } break; case String_location_inlist: if (scp->case_sensitive) { pFound = StringSearch (pattern, search); } else { pFound = StringISearch (pattern, search); } if (pFound == NULL) { rval = FALSE; } else { rval = IsWholeWordMatchEx (pattern, pFound, StringLen (search), TRUE); while (!rval && pFound != NULL) { if (scp->case_sensitive) { pFound = StringSearch (pFound + 1, search); } else { pFound = StringISearch (pFound + 1, search); } if (pFound != NULL) { rval = IsWholeWordMatchEx (pattern, pFound, StringLen (str), TRUE); } } } if (!rval) { /* look for spans */ rval = IsStringInSpanInList (search, pattern); } break; } if (search != str) { search = MemFree (search); } if (pattern != scp->match_text) { pattern = MemFree (pattern); } scp->match_text = tmp_match; return rval; } NLM_EXTERN Boolean DoesStringMatchConstraint (CharPtr str, StringConstraintPtr scp) { Boolean rval; rval = DoesSingleStringMatchConstraint (str, scp); if (scp != NULL && scp->not_present) { rval = !rval; } return rval; } static Boolean DoesStringListMatchConstraint (ValNodePtr list, StringConstraintPtr scp) { Int4 len = 1; CharPtr tmp; Boolean rval = FALSE; ValNodePtr vnp; if (IsStringConstraintEmpty (scp)) { return TRUE; } if (list == NULL) return FALSE; for (vnp = list; vnp != NULL; vnp = vnp->next) { len += StringLen (vnp->data.ptrvalue) + 2; } tmp = (CharPtr) MemNew (sizeof (Char) * len); for (vnp = list; vnp != NULL; vnp = vnp->next) { StringCat (tmp, vnp->data.ptrvalue); if (vnp->next != NULL) { StringCat (tmp, "; "); } } rval = DoesStringMatchConstraint (tmp, scp); tmp = MemFree (tmp); return rval; } NLM_EXTERN Boolean ReplaceStringConstraintPortionInString (CharPtr PNTR str, CharPtr replace, StringConstraintPtr scp) { Boolean rval = FALSE; CharPtr match_start, new_str; Int4 match_len, front_len; if (str == NULL) { return FALSE; } else if (*str == NULL) { if (IsStringConstraintEmpty (scp) || scp->not_present) { *str = StringSave (replace); rval = TRUE; } } else if (IsStringConstraintEmpty (scp)) { *str = MemFree (*str); *str = StringSave (replace); rval = TRUE; } else { switch (scp->match_location) { case String_location_equals: case String_location_inlist: if (DoesStringMatchConstraint (*str, scp)) { *str = MemFree (*str); *str = StringSave (replace); rval = TRUE; } break; case String_location_starts: match_len = 0; if (AdvancedStringCompare (*str, scp->match_text, scp, TRUE, &match_len)) { new_str = (CharPtr) MemNew (sizeof (Char) * (StringLen (*str) - match_len + StringLen (replace) + 1)); StringCpy (new_str, replace); StringCat (new_str, (*str) + match_len); *str = MemFree (*str); *str = new_str; rval = TRUE; } break; case String_location_contains: match_start = *str; while (*match_start != 0) { match_len = 0; if (AdvancedStringCompare (match_start, scp->match_text, scp, (match_start == *str || !isalpha (*(match_start - 1))), &match_len)) { new_str = (CharPtr) MemNew (sizeof (Char) * (StringLen (*str) - match_len + StringLen (replace) + 1)); front_len = match_start - *str; StringNCpy (new_str, *str, front_len); StringCat (new_str, replace); StringCat (new_str, match_start + match_len); *str = MemFree (*str); *str = new_str; match_start = (*str) + front_len + StringLen (replace); rval = TRUE; } else { match_start++; } } break; case String_location_ends: match_start = *str; while (!rval && *match_start != 0) { match_len = 0; if (AdvancedStringCompare (match_start, scp->match_text, scp, (match_start == *str), &match_len) && *(match_start + match_len) == 0) { new_str = (CharPtr) MemNew (sizeof (Char) * ((match_start - *str) + StringLen (replace) + 1)); StringNCpy (new_str, *str, match_start - *str); StringCat (new_str, replace); *str = MemFree (*str); *str = new_str; rval = TRUE; } else { match_start++; } } break; } } return rval; } NLM_EXTERN Boolean RemoveStringConstraintPortionFromString (CharPtr PNTR str, StringConstraintPtr scp) { CharPtr pFound, src, dst, cp; Boolean rval = FALSE; Int4 match_len; if (str == NULL || *str == NULL) return FALSE; if (IsStringConstraintEmpty (scp) || scp->not_present) return FALSE; if (scp->match_location == String_location_equals) { if (scp->case_sensitive) { if (StringCmp (*str, scp->match_text) == 0) { rval = TRUE; } } else { if (StringICmp (*str, scp->match_text) == 0) { rval = TRUE; } } if (rval == TRUE) { **str = 0; } } else { match_len = StringLen (scp->match_text); if (scp->case_sensitive) { pFound = StringSearch (*str, scp->match_text); } else { pFound = StringISearch (*str, scp->match_text); } while (pFound != NULL) { switch (scp->match_location) { case String_location_contains: case String_location_inlist: if ((!scp->whole_word && scp->match_location != String_location_inlist) || IsWholeWordMatch (*str, pFound, match_len)) { src = pFound + match_len; dst = pFound; while (*src != 0) { *dst = *src; dst++; src++; } *dst = 0; rval = TRUE; cp = pFound; } else { cp = pFound + 1; } if (scp->case_sensitive) { pFound = StringSearch (cp, scp->match_text); } else { pFound = StringISearch (cp, scp->match_text); } break; case String_location_starts: if (pFound == *str && (!scp->whole_word || IsWholeWordMatch (*str, pFound, match_len))) { src = pFound + match_len; dst = pFound; while (*src != 0) { *dst = *src; dst++; src++; } *dst = 0; rval = TRUE; } pFound = NULL; break; case String_location_ends: if (*(pFound + match_len) == 0 && (!scp->whole_word || IsWholeWordMatch (*str, pFound, match_len))) { *pFound = 0; rval = TRUE; pFound = NULL; } else { if (scp->case_sensitive) { pFound = StringSearch (pFound + 1, scp->match_text); } else { pFound = StringISearch (pFound + 1, scp->match_text); } } break; } } } if (rval && StringHasNoText (*str)) { *str = MemFree (*str); } return rval; } NLM_EXTERN Boolean IsLocationConstraintEmpty (LocationConstraintPtr lcp) { Boolean rval = TRUE; if (lcp == NULL) { rval = TRUE; } else if (lcp->strand != Strand_constraint_any) { rval = FALSE; } else if (lcp->seq_type != Seqtype_constraint_any) { rval = FALSE; } else if (lcp->partial5 != Partial_constraint_either) { rval = FALSE; } else if (lcp->partial3 != Partial_constraint_either) { rval = FALSE; } else if (lcp->location_type != Location_type_constraint_any) { rval = FALSE; } else if (lcp->end5 != NULL || lcp->end3 != NULL) { rval = FALSE; } return rval; } static Boolean DoesStrandMatchConstraint (SeqLocPtr slp, LocationConstraintPtr lcp) { Uint2 strand; Boolean rval = FALSE; if (slp == NULL) { rval = FALSE; } else if (lcp == NULL || lcp->strand == Strand_constraint_any) { rval = TRUE; } else { strand = SeqLocStrand (slp); if (strand == Seq_strand_minus) { if (lcp->strand == Strand_constraint_minus) { rval = TRUE; } else { rval = FALSE; } } else { if (lcp->strand == Strand_constraint_plus) { rval = TRUE; } else { rval = FALSE; } } } return rval; } static Boolean DoesBioseqMatchSequenceType (BioseqPtr bsp, Uint2 seq_type) { Boolean rval = FALSE; if (bsp == NULL) return FALSE; if (seq_type == Seqtype_constraint_any) return TRUE; if (ISA_na (bsp->mol) && seq_type == Seqtype_constraint_nuc) { rval = TRUE; } else if (ISA_aa (bsp->mol) && seq_type == Seqtype_constraint_prot) { rval = TRUE; } return rval; } static Boolean DoesSequenceTypeMatchContraint (SeqLocPtr slp, LocationConstraintPtr lcp) { Boolean rval = FALSE; BioseqPtr bsp; if (slp == NULL) { rval = FALSE; } else if (lcp == NULL || lcp->seq_type == Seqtype_constraint_any) { rval = TRUE; } else { bsp = BioseqFindFromSeqLoc (slp); rval = DoesBioseqMatchSequenceType (bsp, lcp->seq_type); } return rval; } static Boolean DoesLocationMatchPartialnessConstraint (SeqLocPtr slp, LocationConstraintPtr lcp) { Boolean rval = FALSE; Boolean partial5, partial3; if (slp == NULL) { rval = FALSE; } else if (lcp == NULL) { rval = TRUE; } else { CheckSeqLocForPartial (slp, &partial5, &partial3); if (lcp->partial5 == Partial_constraint_partial && !partial5) { rval = FALSE; } else if (lcp->partial5 == Partial_constraint_complete && partial5) { rval = FALSE; } else if (lcp->partial3 == Partial_constraint_partial && !partial3) { rval = FALSE; } else if (lcp->partial3 == Partial_constraint_complete && partial3) { rval = FALSE; } else { rval = TRUE; } } return rval; } static Boolean DoesLocationMatchTypeConstraint (SeqLocPtr slp, LocationConstraintPtr lcp) { Boolean rval = FALSE, has_null = FALSE; Int4 num_intervals = 0; SeqLocPtr slp_tmp = NULL; if (slp == NULL) { rval = FALSE; } else if (lcp->location_type == Location_type_constraint_any) { rval = TRUE; } else { while ((slp_tmp = SeqLocFindNext (slp, slp_tmp)) != NULL) { if (slp_tmp->choice == SEQLOC_NULL) { has_null = TRUE; } else if (slp->choice != SEQLOC_EMPTY) { num_intervals++; } } if (lcp->location_type == Location_type_constraint_single_interval) { if (num_intervals == 1) { rval = TRUE; } } else if (lcp->location_type == Location_type_constraint_joined) { if (num_intervals > 1 && !has_null) { rval = TRUE; } } else if (lcp->location_type == Location_type_constraint_ordered) { if (num_intervals > 1 && has_null) { rval = TRUE; } } } return rval; } static Boolean DoesLocationMatchDistanceConstraint (SeqLocPtr slp, LocationConstraintPtr lcp) { Boolean rval = TRUE; Uint1 strand; BioseqPtr bsp = NULL; Int4 pos; if (slp == NULL) { return FALSE; } else if (lcp->end5 == NULL && lcp->end3 == NULL) { return TRUE; } strand = SeqLocStrand (slp); if (strand == Seq_strand_minus) { if (lcp->end5 != NULL) { bsp = BioseqFindFromSeqLoc (slp); if (bsp == NULL) { rval = FALSE; } else { pos = SeqLocStop (slp); switch (lcp->end5->choice) { case LocationPosConstraint_dist_from_end: if (bsp->length - pos - 1 != lcp->end5->data.intvalue) { rval = FALSE; } break; case LocationPosConstraint_max_dist_from_end: if (bsp->length - pos - 1 > lcp->end5->data.intvalue) { rval = FALSE; } break; case LocationPosConstraint_min_dist_from_end: if (bsp->length - pos - 1 < lcp->end5->data.intvalue) { rval = FALSE; } break; } } } if (lcp->end3 != NULL && rval) { pos = SeqLocStart (slp); switch (lcp->end3->choice) { case LocationPosConstraint_dist_from_end: if (pos != lcp->end3->data.intvalue) { rval = FALSE; } break; case LocationPosConstraint_max_dist_from_end: if (pos > lcp->end3->data.intvalue) { rval = FALSE; } break; case LocationPosConstraint_min_dist_from_end: if (pos < lcp->end3->data.intvalue) { rval = FALSE; } break; } } } else { if (lcp->end5 != NULL) { pos = SeqLocStart (slp); switch (lcp->end5->choice) { case LocationPosConstraint_dist_from_end: if (pos != lcp->end5->data.intvalue) { rval = FALSE; } break; case LocationPosConstraint_max_dist_from_end: if (pos > lcp->end5->data.intvalue) { rval = FALSE; } break; case LocationPosConstraint_min_dist_from_end: if (pos < lcp->end5->data.intvalue) { rval = FALSE; } break; } } if (lcp->end3 != NULL && rval) { bsp = BioseqFindFromSeqLoc (slp); if (bsp == NULL) { rval = FALSE; } else { pos = SeqLocStop (slp); switch (lcp->end3->choice) { case LocationPosConstraint_dist_from_end: if (bsp->length - pos - 1 != lcp->end3->data.intvalue) { rval = FALSE; } break; case LocationPosConstraint_max_dist_from_end: if (bsp->length - pos - 1 > lcp->end3->data.intvalue) { rval = FALSE; } break; case LocationPosConstraint_min_dist_from_end: if (bsp->length - pos - 1 < lcp->end3->data.intvalue) { rval = FALSE; } break; } } } } return rval; } static Boolean DoesLocationMatchConstraint (SeqLocPtr slp, LocationConstraintPtr lcp) { Boolean rval = FALSE; if (slp == NULL) { rval = FALSE; } else if (IsLocationConstraintEmpty(lcp)) { rval = TRUE; } else if (DoesStrandMatchConstraint (slp, lcp) && DoesSequenceTypeMatchContraint (slp, lcp) && DoesLocationMatchPartialnessConstraint (slp, lcp) && DoesLocationMatchTypeConstraint(slp, lcp) && DoesLocationMatchDistanceConstraint(slp, lcp)) { rval = TRUE; } return rval; } static Boolean DoesFeatureMatchLocationConstraint (SeqFeatPtr sfp, LocationConstraintPtr constraint) { BioseqPtr bsp; SeqFeatPtr cds; SeqMgrFeatContext context; Boolean rval = TRUE; if (sfp == NULL) { return FALSE; } else if (IsLocationConstraintEmpty (constraint)) { return TRUE; } bsp = BioseqFindFromSeqLoc (sfp->location); if (constraint->strand != Strand_constraint_any) { if (bsp == NULL) { rval = FALSE; } else if (ISA_aa (bsp->mol)) { cds = SeqMgrGetCDSgivenProduct (bsp, &context); if (cds == NULL) { rval = FALSE; } else if (!DoesStrandMatchConstraint (cds->location, constraint)) { rval = FALSE; } } else { if (!DoesStrandMatchConstraint (sfp->location, constraint)) { rval = FALSE; } } } if (!DoesBioseqMatchSequenceType (bsp, constraint->seq_type)) { rval = FALSE; } if (!DoesLocationMatchPartialnessConstraint (sfp->location, constraint)) { rval = FALSE; } if (!DoesLocationMatchTypeConstraint (sfp->location, constraint)) { rval = FALSE; } if (!DoesLocationMatchDistanceConstraint(sfp->location, constraint)) { rval = FALSE; } return rval; } static Boolean DoesSeqFeatMatchLocationConstraint (SeqFeatPtr sfp, LocationConstraintPtr constraint) { if (sfp == NULL) { return FALSE; } else if (IsLocationConstraintEmpty(constraint)) { return TRUE; } else if (!DoesLocationMatchPartialnessConstraint (sfp->location, constraint)) { return FALSE; } else if (!DoesStrandMatchConstraint (sfp->location, constraint)) { return FALSE; } else if (!DoesLocationMatchTypeConstraint (sfp->location, constraint)) { return FALSE; } else if (!DoesLocationMatchDistanceConstraint(sfp->location, constraint)) { return FALSE; } else { return TRUE; } } static Boolean DoesBioseqMatchLocationConstraint (BioseqPtr bsp, LocationConstraintPtr constraint) { Boolean at_least_one = FALSE; Boolean rval = TRUE; SeqFeatPtr sfp; SeqMgrFeatContext context; if (bsp == NULL) return FALSE; if (IsLocationConstraintEmpty(constraint)) { return TRUE; } if (!DoesBioseqMatchSequenceType(bsp, constraint->seq_type)) { return FALSE; } if (constraint->strand != Strand_constraint_any || constraint->partial5 != Partial_constraint_either || constraint->partial3 != Partial_constraint_either) { if (ISA_aa (bsp->mol)) { sfp = SeqMgrGetCDSgivenProduct (bsp, &context); return DoesSeqFeatMatchLocationConstraint(sfp, constraint); } else { at_least_one = FALSE; for (sfp = SeqMgrGetNextFeature (bsp, NULL, 0, 0, &context); sfp != NULL && rval; sfp = SeqMgrGetNextFeature (bsp, sfp, 0, 0, &context)) { rval = DoesSeqFeatMatchLocationConstraint (sfp, constraint); at_least_one = TRUE; } return rval && at_least_one; } } else { return TRUE; } } static Boolean DoesObjectMatchLocationConstraint (Uint1 choice, Pointer data, LocationConstraintPtr constraint) { SeqFeatPtr sfp; SeqDescrPtr sdp; CGPSetPtr cgp; BioseqPtr bsp = NULL; BioseqSetPtr bssp; ValNodePtr vnp; ObjValNodePtr ovp; if (data == NULL) return FALSE; if (IsLocationConstraintEmpty(constraint)) { return TRUE; } if (choice == OBJ_SEQFEAT) { sfp = (SeqFeatPtr) data; return DoesFeatureMatchLocationConstraint (sfp, constraint); } else if (choice == OBJ_SEQDESC) { sdp = (SeqDescrPtr) data; if (sdp->extended != 0) { ovp = (ObjValNodePtr) sdp; if (ovp->idx.parenttype == OBJ_BIOSEQSET) { bssp = (BioseqSetPtr) ovp->idx.parentptr; if (bssp != NULL && bssp->seq_set != NULL && IS_Bioseq_set (bssp->seq_set)) { bsp = (BioseqPtr) bssp->seq_set->data.ptrvalue; } } else if (ovp->idx.parenttype == OBJ_BIOSEQ) { bsp = (BioseqPtr) ovp->idx.parentptr; } } return DoesBioseqMatchLocationConstraint(bsp, constraint); } else if (choice == 0) { if (constraint->seq_type != Seqtype_constraint_any) { return FALSE; } cgp = (CGPSetPtr) data; for (vnp = cgp->cds_list; vnp != NULL; vnp = vnp->next) { if (DoesFeatureMatchLocationConstraint (vnp->data.ptrvalue, constraint)) { return TRUE; } } for (vnp = cgp->gene_list; vnp != NULL; vnp = vnp->next) { if (DoesFeatureMatchLocationConstraint (vnp->data.ptrvalue, constraint)) { return TRUE; } } for (vnp = cgp->mrna_list; vnp != NULL; vnp = vnp->next) { if (DoesFeatureMatchLocationConstraint (vnp->data.ptrvalue, constraint)) { return TRUE; } } for (vnp = cgp->prot_list; vnp != NULL; vnp = vnp->next) { if (DoesFeatureMatchLocationConstraint (vnp->data.ptrvalue, constraint)) { return TRUE; } } return FALSE; } else if (choice == OBJ_BIOSEQ) { return DoesBioseqMatchLocationConstraint((BioseqPtr)data, constraint); } else { return FALSE; } } NLM_EXTERN Boolean IsTextMarkerEmpty (TextMarkerPtr marker) { CharPtr cp; Boolean rval = FALSE; if (marker == NULL) { rval = TRUE; } else if (marker->choice == TextMarker_free_text) { cp = (CharPtr) marker->data.ptrvalue; if (cp == NULL || *cp == 0) { rval = TRUE; } } return rval; } NLM_EXTERN TextMarkerPtr MakeTextTextMarker (CharPtr text) { TextMarkerPtr text_marker = ValNodeNew (NULL); text_marker->choice = TextMarker_free_text; text_marker->data.ptrvalue = StringSave (text); return text_marker; } static CharPtr FindTextMarker(CharPtr str, Int4Ptr len, TextMarkerPtr marker, Boolean case_sensitive, Boolean whole_word) { CharPtr search; CharPtr rval = NULL; Int4 search_len = 0; Int4 tmp; CharPtr digits = "0123456789"; CharPtr letters = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz"; if (str == NULL) { return NULL; } if (marker == NULL) { if (len != NULL) { *len = StringLen (str); } rval = str; } else if (marker->choice == TextMarker_free_text) { search = (CharPtr) marker->data.ptrvalue; if (search == NULL || search[0] == 0) { if (len != NULL) { *len = StringLen (str); } rval = str; } else { if (case_sensitive) { rval = StringSearch (str, search); } else { rval = StringISearch (str, search); } if (rval != NULL) { search_len = StringLen (search); if (whole_word && ! IsWholeWordMatch (str, rval, search_len)) { rval = NULL; } else { if (len != NULL) { *len = search_len; } } } } } else if (marker->choice == TextMarker_digits) { tmp = StringCSpn(str, digits); if (*(str + tmp) != 0) { rval = str + tmp; if (len != NULL) { *len = StringSpn (rval, digits); } } } else if (marker->choice == TextMarker_letters) { tmp = StringCSpn(str, letters); if (*(str + tmp) != 0) { rval = str + tmp; if (len != NULL) { *len = StringSpn (rval, letters); } } } return rval; } static CharPtr ApplyEditToString (CharPtr str, FieldEditPtr edit); /* for parsing and editing */ NLM_EXTERN void ApplyTextTransformsToString (CharPtr PNTR str, ValNodePtr transform_list) { CharPtr tmp; if (str == NULL || *str == NULL) { return; } while (transform_list != NULL) { switch (transform_list->choice) { case TextTransform_edit: tmp = ApplyEditToString (*str, transform_list->data.ptrvalue); *str = MemFree (*str); *str = tmp; break; case TextTransform_caps: FixCapitalizationInString (str, transform_list->data.intvalue, NULL); break; case TextTransform_remove: RemoveTextPortionFromString (*str, (TextPortionPtr)transform_list->data.ptrvalue); break; } transform_list = transform_list->next; } } static Boolean IsTextPortionEmpty (TextPortionPtr text_portion) { if (text_portion == NULL || (IsTextMarkerEmpty (text_portion->left_marker) && IsTextMarkerEmpty (text_portion->right_marker))) { return TRUE; } else { return FALSE; } } NLM_EXTERN Boolean IsTextTransformEmpty (ValNodePtr vnp) { Boolean rval = TRUE; FieldEditPtr edit; if (vnp == NULL) { return TRUE; } switch (vnp->choice) { case TextTransform_edit: if ((edit = (FieldEditPtr) vnp->data.ptrvalue) != NULL && edit->find_txt != NULL) { rval = FALSE; } break; case TextTransform_caps: if (vnp->data.intvalue > Cap_change_none) { rval = FALSE; } break; case TextTransform_remove: if (!IsTextPortionEmpty (vnp->data.ptrvalue)) { rval = FALSE; } break; } return rval; } NLM_EXTERN CharPtr GetTextPortionFromString (CharPtr str, TextPortionPtr text_portion) { CharPtr portion = NULL; CharPtr found_start, found_end; Int4 left_len = 0, right_len = 0, found_len; if (StringHasNoText (str)) { return NULL; } if (text_portion == NULL) { return StringSave (str); } found_start = FindTextMarker(str, &left_len, text_portion->left_marker, text_portion->case_sensitive, text_portion->whole_word); if (found_start == NULL) { return NULL; } if (!IsTextMarkerEmpty(text_portion->left_marker)) { if (text_portion->inside && !text_portion->include_left) { found_start += left_len; } else if (!text_portion->inside && text_portion->include_left) { found_start += left_len; } } found_end = FindTextMarker (found_start, &right_len, text_portion->right_marker, text_portion->case_sensitive, text_portion->whole_word); if (found_end == NULL) { return NULL; } if ((text_portion->inside && text_portion->include_right) || (!text_portion->inside && !text_portion->include_right) || IsTextMarkerEmpty(text_portion->right_marker)) { found_end += right_len; } found_len = found_end - found_start; if (found_len > 0) { portion = (CharPtr) MemNew (sizeof (Char) * (found_len + 1)); StringNCpy (portion, found_start, found_len); portion[found_len] = 0; } return portion; } static CharPtr FindTextPortionLocationInString (CharPtr str, TextPortionPtr text_portion) { CharPtr start, stop; if (str == NULL || text_portion == NULL) return FALSE; start = FindTextMarker(str, NULL, text_portion->left_marker, text_portion->case_sensitive, text_portion->whole_word); if (start != NULL && !IsTextMarkerEmpty (text_portion->right_marker)) { stop = FindTextMarker(start, NULL, text_portion->right_marker, text_portion->case_sensitive, text_portion->whole_word); if (stop == NULL) { start = NULL; } } return start; } static Boolean ReplaceStringForParse(CharPtr src_text, TextPortionPtr text_portion) { CharPtr src, dst; Int4 right_len; if (src_text == NULL || text_portion == NULL) { return FALSE; } dst = FindTextPortionLocationInString (src_text, text_portion); if (dst == NULL) return FALSE; if (IsTextMarkerEmpty (text_portion->right_marker)) { *dst = 0; } else { src = FindTextMarker(dst, &right_len, text_portion->right_marker, text_portion->case_sensitive, text_portion->whole_word); if (src != NULL) { if (text_portion->include_right) { src += right_len; } while (*src != 0) { *dst = *src; dst++; src++; } *dst = 0; } } return TRUE; } NLM_EXTERN Boolean RemoveTextPortionFromString (CharPtr str, TextPortionPtr text_portion) { CharPtr before = NULL, after = NULL, src, dst; Boolean rval = FALSE; Int4 left_len, right_len; if (str == NULL || text_portion == NULL) { return FALSE; } if (text_portion->inside) { rval = ReplaceStringForParse (str, text_portion); } else { if ((before = FindTextMarker (str, &left_len, text_portion->left_marker, text_portion->case_sensitive, text_portion->whole_word)) != NULL && (after = FindTextMarker (before, &right_len, text_portion->right_marker, text_portion->case_sensitive, text_portion->whole_word)) != NULL) { if (!IsTextMarkerEmpty (text_portion->right_marker)) { if (text_portion->include_right) { *after = 0; } else { *(after + right_len) = 0; } rval = TRUE; } if (!IsTextMarkerEmpty (text_portion->left_marker)) { dst = str; if (text_portion->include_left) { src = before + left_len; } else { src = before; } while (*src != 0) { *dst = *src; ++dst; ++src; } *dst = 0; rval = TRUE; } } } return rval; } /* generic functions for setting field values */ NLM_EXTERN Boolean SetStringValue (CharPtr PNTR existing_val, CharPtr new_val, Uint2 existing_text) { Boolean rval = FALSE; Int4 len; CharPtr tmp; if (existing_val == NULL) { return FALSE; } if (StringHasNoText (*existing_val)) { *existing_val = MemFree (*existing_val); *existing_val = StringSave (new_val); rval = TRUE; } else { if (existing_text != ExistingTextOption_replace_old && (new_val == NULL || *new_val == 0)) { return FALSE; } if (existing_text == ExistingTextOption_replace_old && StringCmp (*existing_val, new_val) == 0) { return FALSE; } switch (existing_text) { case ExistingTextOption_replace_old : *existing_val = MemFree (*existing_val); *existing_val = StringSave (new_val); rval = TRUE; break; case ExistingTextOption_append_semi : len = StringLen (new_val) + StringLen (*existing_val) + 3; tmp = (CharPtr) MemNew (sizeof (Char) * len); if (tmp != NULL) { sprintf (tmp, "%s; %s", *existing_val, new_val); MemFree (*existing_val); *existing_val = tmp; rval = TRUE; } break; case ExistingTextOption_append_space : len = StringLen (new_val) + StringLen (*existing_val) + 2; tmp = (CharPtr) MemNew (sizeof (Char) * len); if (tmp != NULL) { sprintf (tmp, "%s %s", *existing_val, new_val); MemFree (*existing_val); *existing_val = tmp; rval = TRUE; } break; case ExistingTextOption_append_colon : len = StringLen (new_val) + StringLen (*existing_val) + 3; tmp = (CharPtr) MemNew (sizeof (Char) * len); if (tmp != NULL) { sprintf (tmp, "%s: %s", *existing_val, new_val); MemFree (*existing_val); *existing_val = tmp; rval = TRUE; } break; case ExistingTextOption_append_comma : len = StringLen (new_val) + StringLen (*existing_val) + 3; tmp = (CharPtr) MemNew (sizeof (Char) * len); if (tmp != NULL) { sprintf (tmp, "%s, %s", *existing_val, new_val); MemFree (*existing_val); *existing_val = tmp; rval = TRUE; } break; case ExistingTextOption_append_none : len = StringLen (new_val) + StringLen (*existing_val) + 1; tmp = (CharPtr) MemNew (sizeof (Char) * len); if (tmp != NULL) { sprintf (tmp, "%s%s", *existing_val, new_val); MemFree (*existing_val); *existing_val = tmp; rval = TRUE; } break; case ExistingTextOption_prefix_semi : len = StringLen (new_val) + StringLen (*existing_val) + 3; tmp = (CharPtr) MemNew (sizeof (Char) * len); if (tmp != NULL) { sprintf (tmp, "%s; %s", new_val, *existing_val); MemFree (*existing_val); *existing_val = tmp; rval = TRUE; } break; case ExistingTextOption_prefix_space : len = StringLen (new_val) + StringLen (*existing_val) + 2; tmp = (CharPtr) MemNew (sizeof (Char) * len); if (tmp != NULL) { sprintf (tmp, "%s %s", new_val, *existing_val); MemFree (*existing_val); *existing_val = tmp; rval = TRUE; } break; case ExistingTextOption_prefix_colon : len = StringLen (new_val) + StringLen (*existing_val) + 3; tmp = (CharPtr) MemNew (sizeof (Char) * len); if (tmp != NULL) { sprintf (tmp, "%s: %s", new_val, *existing_val); MemFree (*existing_val); *existing_val = tmp; rval = TRUE; } break; case ExistingTextOption_prefix_comma : len = StringLen (new_val) + StringLen (*existing_val) + 3; tmp = (CharPtr) MemNew (sizeof (Char) * len); if (tmp != NULL) { sprintf (tmp, "%s, %s", new_val, *existing_val); MemFree (*existing_val); *existing_val = tmp; rval = TRUE; } break; case ExistingTextOption_prefix_none : len = StringLen (new_val) + StringLen (*existing_val) + 1; tmp = (CharPtr) MemNew (sizeof (Char) * len); if (tmp != NULL) { sprintf (tmp, "%s%s", new_val, *existing_val); MemFree (*existing_val); *existing_val = tmp; rval = TRUE; } break; case ExistingTextOption_leave_old : rval = FALSE; } } return rval; } /* NOTE: The following functions, GetTwoFieldSubfield, SetTwoFieldSubfield, and RemoveTwoFieldSubfield, * all assume that if only one field is present, it is subfield 1. */ static CharPtr GetTwoFieldSubfield (CharPtr str, Uint1 subfield) { CharPtr cp; CharPtr new_val = NULL; Int4 len; if (StringHasNoText (str) || subfield > 2) { return NULL; } if (subfield == 0) { new_val = StringSave (str); } else { cp = StringChr (str, ':'); if (cp == NULL) { if (subfield == 1) { new_val = StringSave (str); } else { new_val = NULL; } } else { if (subfield == 1) { len = cp - str + 1; new_val = (CharPtr) MemNew (sizeof (Char) * len); StringNCpy (new_val, str, len - 1); new_val[len - 1] = 0; } else if (!StringHasNoText (cp + 1)) { new_val = StringSave (cp + 1); } } } return new_val; } static CharPtr MakeValFromTwoFields (CharPtr PNTR fields) { Boolean empty1, empty2; CharPtr val = NULL; if (fields == NULL) return NULL; empty1 = StringHasNoText (fields[0]); empty2 = StringHasNoText (fields[1]); if (empty1 && empty2) { val = NULL; } else if (empty1) { val = (CharPtr) MemNew (sizeof (Char) * (StringLen (fields[1]) + 2)); sprintf (val, ":%s", fields[1]); } else if (empty2) { val = StringSave (fields[0]); } else { val = (CharPtr) MemNew (sizeof (Char) * (StringLen (fields[0]) + StringLen (fields[1]) + 2)); sprintf (val, "%s:%s", fields[0], fields[1]); } return val; } static Boolean RemoveTwoFieldSubfield (CharPtr PNTR existing_val, Uint1 subfield) { Boolean rval = FALSE; CharPtr fields[2]; if (existing_val == NULL || StringHasNoText (*existing_val) || subfield > 2) { return FALSE; } if (subfield == 0) { *existing_val = MemFree (*existing_val); rval = TRUE; } else { fields[0] = GetTwoFieldSubfield (*existing_val, 1); fields[1] = GetTwoFieldSubfield (*existing_val, 2); if (!StringHasNoText (fields[subfield - 1])) { fields[subfield - 1] = MemFree (fields[subfield - 1]); *existing_val = MemFree (*existing_val); *existing_val = MakeValFromTwoFields (fields); rval = TRUE; } fields[0] = MemFree (fields[0]); fields[1] = MemFree (fields[1]); } return rval; } static Boolean SetTwoFieldSubfield (CharPtr PNTR existing_val, Int4 subfield, CharPtr new_field, Uint2 existing_text) { Boolean rval = FALSE; CharPtr fields[2]; if (existing_val == NULL || subfield > 2 || StringHasNoText (new_field)) { return FALSE; } if (subfield == 0) { rval = SetStringValue (existing_val, new_field, existing_text); } else { fields[0] = GetTwoFieldSubfield (*existing_val, 1); fields[1] = GetTwoFieldSubfield (*existing_val, 2); if (SetStringValue (&(fields[subfield - 1]), new_field, existing_text)) { *existing_val = MemFree (*existing_val); *existing_val = MakeValFromTwoFields (fields); rval = TRUE; } fields[0] = MemFree (fields[0]); fields[1] = MemFree (fields[1]); } return rval; } /* NOTE: The following functions, GetThreeFieldSubfield, SetThreeFieldSubfield, and RemoveThreeFieldSubfield * all assume that if only one field is present, it is subfield 3. If two fields are present, they are subfields 1 and 3. */ static CharPtr GetThreeFieldSubfield (CharPtr str, Uint1 subfield) { CharPtr cp, cp2; Int4 num_colons = 0; CharPtr new_val = NULL; if (StringHasNoText (str)) { return NULL; } cp = StringChr (str, ':'); while (cp != NULL) { num_colons ++; cp = StringChr (cp + 1, ':'); } if (subfield == 0) { new_val = StringSave (str); } else if (subfield == 1) { if (num_colons == 0) { return NULL; } else { cp = StringChr (str, ':'); new_val = (CharPtr) MemNew (sizeof (Char) * (cp - str + 1)); StringNCpy (new_val, str, cp - str); new_val[cp - str] = 0; } } else if (subfield == 2) { if (num_colons == 0 || num_colons == 1) { return NULL; } else { cp = StringChr (str, ':'); cp2 = StringChr (cp + 1, ':'); new_val = (CharPtr) MemNew (sizeof (Char) * (cp2 - cp)); StringNCpy (new_val, cp + 1, cp2 - cp - 1); new_val[cp2 - cp - 1] = 0; } } else { if (num_colons == 0) { new_val = StringSave (str); } else { cp = StringRChr (str, ':'); new_val = StringSave (cp + 1); } } return new_val; } static CharPtr MakeValFromThreeFields (CharPtr PNTR fields) { Int4 i; Boolean empty[3]; CharPtr val = NULL; if (fields == NULL) return NULL; for (i = 0; i < 3; i++) { empty[i] = StringHasNoText (fields[i]); } if (empty[0] && empty[1] && empty[2]) { /* do nothing, value is now empty */ } else if (empty[0] && empty[1]) { val = StringSave (fields[2]); } else if (empty[0] && empty[2]) { val = (CharPtr) MemNew (sizeof (Char) * (StringLen (fields[1]) + 2)); sprintf (val, ":%s:", fields[1]); } else if (empty[1] && empty[2]) { val = (CharPtr) MemNew (sizeof (Char) * (StringLen (fields[0]) + 2)); sprintf (val, "%s:", fields[0]); } else if (empty[0]) { val = (CharPtr) MemNew (sizeof (Char) * (StringLen (fields[1]) + StringLen (fields[2]) + 3)); sprintf (val, ":%s:%s", fields[1], fields[2]); } else if (empty[1]) { val = (CharPtr) MemNew (sizeof (Char) * (StringLen (fields[0]) + StringLen (fields[2]) + 3)); sprintf (val, "%s:%s", fields[0], fields[2]); } else if (empty[2]) { val = (CharPtr) MemNew (sizeof (Char) * (StringLen (fields[0]) + StringLen (fields[1]) + 3)); sprintf (val, "%s:%s:", fields[0], fields[1]); } else { val = (CharPtr) MemNew (sizeof (Char) * (StringLen (fields[0]) + StringLen (fields[1]) + StringLen (fields[2]) + 3)); sprintf (val, "%s:%s:%s", fields[0], fields[1], fields[2]); } return val; } static Boolean RemoveThreeFieldSubfield (CharPtr PNTR existing_val, Uint1 subfield) { Int4 i; CharPtr fields[3]; Boolean rval = FALSE; if (existing_val == NULL || subfield > 3 || StringHasNoText (*existing_val)) return FALSE; if (subfield == 0) { *existing_val = MemFree (*existing_val); rval = TRUE; } else { for (i = 0; i < 3; i++) { fields[i] = GetThreeFieldSubfield (*existing_val, i + 1); } if (!StringHasNoText (fields[subfield - 1])) { fields[subfield - 1] = MemFree (fields[subfield - 1]); *existing_val = MakeValFromThreeFields (fields); rval = TRUE; } for (i = 0; i < 3; i++) { fields[i] = MemFree (fields[i]); } } return rval; } static Boolean SetThreeFieldSubfield (CharPtr PNTR existing_val, Int4 subfield, CharPtr new_field, Uint2 existing_text) { Int4 i; CharPtr fields[3]; Boolean rval = FALSE; if (existing_val == NULL || StringHasNoText (new_field) || subfield < 0 || subfield > 3) return FALSE; if (subfield == 0) { rval = SetStringValue (existing_val, new_field, existing_text); } else { for (i = 0; i < 3; i++) { fields[i] = GetThreeFieldSubfield (*existing_val, i + 1); } if (SetStringValue (&(fields[subfield - 1]), new_field, existing_text)) { *existing_val = MemFree (*existing_val); *existing_val = MakeValFromThreeFields (fields); rval = TRUE; } for (i = 0; i < 3; i++) { fields[i] = MemFree (fields[i]); } } return rval; } NLM_EXTERN Boolean SetStringsInValNodeStringList (ValNodePtr PNTR list, StringConstraintPtr scp, CharPtr new_val, Uint2 existing_text) { ValNodePtr vnp; CharPtr cp; Boolean rval = FALSE, found = FALSE; if (list == NULL) { return FALSE; } if (*list == NULL && (scp == NULL || StringHasNoText (scp->match_text))) { ValNodeAddPointer (list, 0, StringSave (new_val)); rval = TRUE; } else if (existing_text == ExistingTextOption_add_qual) { for (vnp = *list; vnp != NULL; vnp = vnp->next) { if (StringCmp (new_val, vnp->data.ptrvalue) == 0) { found = TRUE; break; } } if (!found) { ValNodeAddPointer (list, 0, StringSave (new_val)); rval = TRUE; } } else if (existing_text == ExistingTextOption_replace_old) { found = FALSE; for (vnp = *list; vnp != NULL; vnp = vnp->next) { cp = (CharPtr) vnp->data.ptrvalue; if (DoesStringMatchConstraint (vnp->data.ptrvalue, scp)) { rval |= SetStringValue (&cp, new_val, existing_text); vnp->data.ptrvalue = cp; found = TRUE; } } if (!found && DoesStringListMatchConstraint (*list, scp)) { *list = ValNodeFreeData (*list); vnp = ValNodeNew (NULL); vnp->data.ptrvalue = StringSave (new_val); *list = vnp; rval = TRUE; } } else if (existing_text == ExistingTextOption_leave_old) { rval = FALSE; } else { for (vnp = *list; vnp != NULL; vnp = vnp->next) { cp = (CharPtr) vnp->data.ptrvalue; if (DoesStringMatchConstraint (cp, scp)) { rval |= SetStringValue (&cp, new_val, existing_text); vnp->data.ptrvalue = cp; } } } return rval; } NLM_EXTERN Boolean SetStringInGBQualList (GBQualPtr PNTR list, ValNodePtr field, StringConstraintPtr scp, CharPtr new_val, Uint2 existing_text) { Boolean rval = FALSE, does_match, any_found = FALSE; Int4 gbqual, subfield; CharPtr qual_name = NULL, tmp; GBQualPtr gbq, last_gbq = NULL; if (field == NULL) return FALSE; if (field->choice == FeatQualChoice_legal_qual) { gbqual = GetGBQualFromFeatQual (field->data.intvalue, &subfield); if (gbqual > -1) { qual_name = ParFlat_GBQual_names [gbqual].name; if (existing_text == ExistingTextOption_add_qual) { gbq = GBQualNew (); gbq->qual = StringSave (qual_name); gbq->val = StringSave (new_val); if (last_gbq == NULL) { *list = gbq; } else { last_gbq->next = gbq; } rval = TRUE; } else { for (gbq = *list; gbq != NULL; gbq = gbq->next) { if (StringCmp (gbq->qual, qual_name) == 0) { if (subfield > 0) { does_match = TRUE; if (!IsStringConstraintEmpty (scp)) { tmp = GetTwoFieldSubfield (gbq->val, subfield); does_match = DoesStringMatchConstraint (tmp, scp); tmp = MemFree (tmp); } if (does_match) { rval |= SetTwoFieldSubfield (&(gbq->val), subfield, new_val, existing_text); } } else if (DoesStringMatchConstraint (gbq->val, scp)) { rval |= SetStringValue (&(gbq->val), new_val, existing_text); } any_found = TRUE; } last_gbq = gbq; } if (!rval && (scp == NULL || scp->match_text == NULL || (any_found == FALSE && scp->not_present))) { gbq = GBQualNew (); gbq->qual = StringSave (qual_name); gbq->val = StringSave (new_val); if (last_gbq == NULL) { *list = gbq; } else { last_gbq->next = gbq; } rval = TRUE; } } } } else if (field->choice == FeatQualChoice_illegal_qual) { for (gbq = *list; gbq != NULL; gbq = gbq->next) { if (DoesStringMatchConstraint (gbq->qual, field->data.ptrvalue) && DoesStringMatchConstraint (gbq->val, scp)) { rval |= SetStringValue (&(gbq->val), new_val, existing_text); } } } return rval; } static Boolean SetStringInRNAQualList (RNAQualPtr PNTR list, CharPtr qual_name, StringConstraintPtr scp, CharPtr new_val, Uint2 existing_text) { Boolean rval = FALSE; RNAQualPtr rq, last_rq = NULL; if (StringHasNoText (qual_name)) { return FALSE; } if (existing_text == ExistingTextOption_add_qual) { rq = RNAQualNew (); rq->qual = StringSave (qual_name); rq->val = StringSave (new_val); if (last_rq == NULL) { *list = rq; } else { last_rq->next = rq; } rval = TRUE; } else { for (rq = *list; rq != NULL; rq = rq->next) { if (StringCmp (rq->qual, qual_name) == 0 && DoesStringMatchConstraint (rq->val, scp)) { rval |= SetStringValue (&(rq->val), new_val, existing_text); } last_rq = rq; } if (!rval && (scp == NULL || scp->match_text == NULL)) { rq = RNAQualNew (); rq->qual = StringSave (qual_name); rq->val = StringSave (new_val); if (last_rq == NULL) { *list = rq; } else { last_rq->next = rq; } rval = TRUE; } } return rval; } static CharPtr GetFirstRNAQualMatchName (RNAQualPtr qual, CharPtr qual_name, StringConstraintPtr scp) { CharPtr str = NULL; while (qual != NULL && str == NULL) { if (StringCmp (qual->qual, qual_name) == 0 && !StringHasNoText (qual->val) && DoesStringMatchConstraint (qual->val, scp)) { str = StringSave (qual->val); } qual = qual->next; } return str; } static Boolean RemoveRNAQualMatch (RNAQualPtr PNTR list, CharPtr qual_name, StringConstraintPtr scp) { RNAQualPtr qual_prev = NULL, qual_next, qual; Boolean rval = FALSE; if (list == NULL) return FALSE; qual = *list; while (qual != NULL) { qual_next = qual->next; if (StringICmp (qual->qual, qual_name) == 0 && DoesStringMatchConstraint (qual->val, scp)) { if (qual_prev == NULL) { *list = qual->next; } else { qual_prev->next = qual->next; } qual->next = NULL; qual = RNAQualFree (qual); rval = TRUE; } else { qual_prev = qual; } qual = qual_next; } return rval; } static Boolean SetInt2ValueWithString (Int2Ptr val, CharPtr val_str, Uint2 existing_text) { Char num[15]; CharPtr tmp = NULL; Boolean rval = FALSE; if (val == NULL) return FALSE; sprintf (num, "%d", *val); tmp = StringSave (num); if (SetStringValue (&tmp, val_str, existing_text) && StringIsAllDigits (tmp)) { *val = atoi (tmp); rval = TRUE; } tmp = MemFree (tmp); return rval; } static CharPtr GetInt2ValueFromString (Int2 val, StringConstraintPtr scp) { Char num[15]; sprintf (num, "%d", val); if (DoesStringMatchConstraint (num, scp)) { return StringSave (num); } else { return NULL; } } NLM_EXTERN Boolean SetObjectIdString (ObjectIdPtr oip, CharPtr value, Uint2 existing_text) { Boolean rval = FALSE; Char num[15]; CharPtr tmp = NULL; if (oip == NULL) { return FALSE; } if (oip->id > 0) { sprintf (num, "%d", oip->id); tmp = StringSave (num); } else { tmp = StringSaveNoNull (oip->str); } if (SetStringValue (&tmp, value, existing_text)) { oip->str = MemFree (oip->str); oip->id = 0; if (StringIsAllDigits (tmp) && StringLen (tmp) < 8 && *tmp != '0') { oip->id = atoi (tmp); } else { oip->str = tmp; tmp = NULL; } rval = TRUE; } tmp = MemFree (tmp); return rval; } NLM_EXTERN CharPtr GetObjectIdString (ObjectIdPtr oip) { CharPtr rval = NULL; Char num[15]; if (oip == NULL) { return FALSE; } if (oip->id > 0) { sprintf (num, "%d", oip->id); rval = StringSave (num); } else { rval = StringSaveNoNull (oip->str); } return rval; } static Boolean DoesNumberMatchStringConstraint (Int4 num, StringConstraintPtr scp) { Char tmp[15]; if (IsStringConstraintEmpty (scp)) { return TRUE; } sprintf (tmp, "%d", num); return DoesStringMatchConstraint(tmp, scp); } static Boolean DoesObjectIdMatchStringConstraint (ObjectIdPtr oip, StringConstraintPtr scp) { Boolean rval = FALSE; if (oip == NULL) { return FALSE; } else if (IsStringConstraintEmpty (scp)) { return TRUE; } else if (oip->id > 0) { rval = DoesNumberMatchStringConstraint (oip->id, scp); } else { rval = DoesStringMatchConstraint (oip->str, scp); } return rval; } /* generic functions for getting string values */ static Int4 GetDbtagStringLen (DbtagPtr db_tag) { Int4 len; if (db_tag == NULL) { return 0; } len = StringLen (db_tag->db) + 2; if (db_tag->tag != NULL) { if (db_tag->tag->str != NULL) { len += StringLen (db_tag->tag->str); } else { len += 10; } } return len; } NLM_EXTERN CharPtr GetDbtagString (DbtagPtr db_tag) { Int4 len; CharPtr str; if (db_tag == NULL) { return NULL; } len = GetDbtagStringLen (db_tag); if (len == 0) { return NULL; } str = (CharPtr) MemNew (len * sizeof (Char)); if (str != NULL) { StringCpy (str, db_tag->db); StringCat (str, ":"); if (db_tag->tag != NULL) { if (db_tag->tag->str != NULL) { StringCat (str, db_tag->tag->str); } else { sprintf (str + StringLen (str), "%d", db_tag->tag->id); } } } return str; } NLM_EXTERN Boolean SetDbtagString (DbtagPtr db_tag, CharPtr value, Uint2 existing_text) { Boolean rval = FALSE; CharPtr cp; Int4 dbxvalid; CharPtr tmp; CharPtr twoval; if (db_tag == NULL || StringHasNoText (value)) { return FALSE; } cp = StringChr (value, ':'); if (cp == NULL) { tmp = StringSave (db_tag->db); if (SetStringValue (&tmp, value, existing_text)) { dbxvalid = DbxrefIsValid (tmp, NULL, NULL, NULL, NULL); if (dbxvalid != 0) { db_tag->db = MemFree (db_tag->db); db_tag->db = tmp; tmp = NULL; rval = TRUE; } } if (!rval) { if (db_tag->tag == NULL) { db_tag->tag = ObjectIdNew(); } rval = SetObjectIdString (db_tag->tag, value, existing_text); } tmp = MemFree (tmp); } else { twoval = StringSave (value); cp = StringChr (twoval, ':'); *cp = 0; cp++; rval = SetStringValue (&(db_tag->db), twoval, existing_text); if (db_tag->tag == NULL) { db_tag->tag = ObjectIdNew (); } rval |= SetObjectIdString (db_tag->tag, cp, existing_text); twoval = MemFree (twoval); } return rval; } static Boolean SetDbxrefString (ValNodePtr PNTR list, StringConstraintPtr scp, CharPtr value, Uint2 existing_text) { ValNodePtr vnp; Boolean rval = FALSE, skip; DbtagPtr dbtag; CharPtr cp; if (list == NULL) { return FALSE; } if (existing_text == ExistingTextOption_add_qual || (*list == NULL && (scp == NULL || StringHasNoText (scp->match_text)))) { dbtag = DbtagNew (); rval = SetDbtagString (dbtag, value, existing_text); if (rval) { ValNodeAddPointer (list, 0, dbtag); } else { dbtag = DbtagFree (dbtag); } } else { for (vnp = *list; vnp != NULL; vnp = vnp->next) { skip = FALSE; if (scp != NULL) { cp = GetDbtagString (vnp->data.ptrvalue); if (!DoesStringMatchConstraint (cp, scp)) { skip = TRUE; } cp = MemFree (cp); } if (!skip) { rval |= SetDbtagString (vnp->data.ptrvalue, value, existing_text); } } } return rval; } static CharPtr GetFirstValNodeStringMatch (ValNodePtr vnp, StringConstraintPtr scp) { CharPtr str = NULL; while (vnp != NULL && str == NULL) { if (!StringHasNoText (vnp->data.ptrvalue) && DoesStringMatchConstraint (vnp->data.ptrvalue, scp)) { str = StringSave (vnp->data.ptrvalue); } vnp = vnp->next; } return str; } NLM_EXTERN Boolean RemoveValNodeStringMatch (ValNodePtr PNTR list, StringConstraintPtr scp) { ValNodePtr vnp_prev = NULL, vnp_next, vnp; Boolean rval = FALSE; if (list == NULL) return FALSE; vnp = *list; while (vnp != NULL) { vnp_next = vnp->next; if (!StringHasNoText (vnp->data.ptrvalue) && DoesStringMatchConstraint (vnp->data.ptrvalue, scp)) { if (vnp_prev == NULL) { *list = vnp->next; } else { vnp_prev->next = vnp->next; } vnp->next = NULL; vnp = ValNodeFreeData (vnp); rval = TRUE; } else { vnp_prev = vnp; } vnp = vnp_next; } return rval; } NLM_EXTERN CharPtr GetFirstGBQualMatch (GBQualPtr qual, CharPtr qual_name, Int4 subfield, StringConstraintPtr scp) { CharPtr str = NULL; while (qual != NULL && str == NULL) { if (StringICmp (qual->qual, qual_name) == 0) { str = GetTwoFieldSubfield (qual->val, subfield); if (StringHasNoText (str) || !DoesStringMatchConstraint (str, scp)) { str = MemFree (str); } } qual = qual->next; } return str; } static CharPtr GetFirstGBQualMatchConstraintName (GBQualPtr qual, StringConstraintPtr qual_name, StringConstraintPtr scp) { CharPtr str = NULL; while (qual != NULL && str == NULL) { if (DoesStringMatchConstraint (qual->qual, qual_name) &&!StringHasNoText (qual->val) && DoesStringMatchConstraint (qual->val, scp)) { str = StringSave (qual->val); } qual = qual->next; } return str; } NLM_EXTERN Boolean RemoveGBQualMatch (GBQualPtr PNTR list, CharPtr qual_name, Int4 subfield, StringConstraintPtr scp) { GBQualPtr qual_prev = NULL, qual_next, qual; CharPtr tmp; Boolean rval = FALSE, does_match, do_remove; if (list == NULL) return FALSE; qual = *list; while (qual != NULL) { qual_next = qual->next; do_remove = FALSE; if (StringICmp (qual->qual, qual_name) == 0) { if (subfield > 0) { does_match = TRUE; if (!IsStringConstraintEmpty (scp)) { tmp = GetTwoFieldSubfield (qual->val, subfield); does_match = DoesStringMatchConstraint (tmp, scp); tmp = MemFree (tmp); } if (RemoveTwoFieldSubfield (&(qual->val), subfield)) { rval = TRUE; if (StringHasNoText (qual->val)) { do_remove = TRUE; } } } else if (DoesStringMatchConstraint (qual->val, scp)) { do_remove = TRUE; } } if (do_remove) { if (qual_prev == NULL) { *list = qual->next; } else { qual_prev->next = qual->next; } qual->next = NULL; qual = GBQualFree (qual); rval = TRUE; } else { qual_prev = qual; } qual = qual_next; } return rval; } static Boolean RemoveGBQualMatchConstraintName (GBQualPtr PNTR list, StringConstraintPtr qual_name, StringConstraintPtr scp) { GBQualPtr qual_prev = NULL, qual_next, qual; Boolean rval = FALSE; if (list == NULL) return FALSE; qual = *list; while (qual != NULL) { qual_next = qual->next; if (DoesStringMatchConstraint (qual->qual, qual_name) && !StringHasNoText (qual->val) && DoesStringMatchConstraint (qual->val, scp)) { if (qual_prev == NULL) { *list = qual->next; } else { qual_prev->next = qual->next; } qual->next = NULL; qual = GBQualFree (qual); rval = TRUE; } else { qual_prev = qual; } qual = qual_next; } return rval; } static CharPtr GetDbxrefString (ValNodePtr list, StringConstraintPtr scp) { ValNodePtr vnp; Int4 len = 0; CharPtr str = NULL, cp; if (list == NULL) { return NULL; } for (vnp = list; vnp != NULL; vnp = vnp->next) { cp = GetDbtagString (vnp->data.ptrvalue); if (cp != NULL && DoesStringMatchConstraint(cp, scp)) { len += StringLen (cp) + 1; } cp = MemFree (cp); } if (len == 0) { return NULL; } str = (CharPtr) MemNew ((len + 1) * sizeof (Char)); if (str != NULL) { for (vnp = list; vnp != NULL; vnp = vnp->next) { cp = GetDbtagString (vnp->data.ptrvalue); if (cp != NULL && DoesStringMatchConstraint(cp, scp)) { StringCat (str, cp); StringCat (str, ";"); } cp = MemFree (cp); } } if (StringLen (str) >1) { /* remove final semicolon */ str [StringLen (str) - 1] = 0; } return str; } static ValNodePtr GetMultipleDbxrefStrings (ValNodePtr list, StringConstraintPtr scp) { ValNodePtr vnp, val_list = NULL; CharPtr cp; for (vnp = list; vnp != NULL; vnp = vnp->next) { cp = GetDbtagString (vnp->data.ptrvalue); if (cp != NULL && DoesStringMatchConstraint(cp, scp)) { ValNodeAddPointer (&val_list, 0, cp); } } return val_list; } static Boolean RemoveDbxrefString (ValNodePtr PNTR list, StringConstraintPtr scp) { ValNodePtr vnp, vnp_prev = NULL, vnp_next; CharPtr cp; Boolean rval = FALSE; if (list == NULL || *list == NULL) { return FALSE; } vnp = *list; while (vnp != NULL) { vnp_next = vnp->next; cp = GetDbtagString (vnp->data.ptrvalue); if (DoesStringMatchConstraint(cp, scp)) { if (vnp_prev == NULL) { *list = vnp->next; } else { vnp_prev->next = vnp->next; } vnp->next = NULL; vnp->data.ptrvalue = DbtagFree (vnp->data.ptrvalue); vnp = ValNodeFree (vnp); rval = TRUE; } else { vnp_prev = vnp; } vnp = vnp_next; } return rval; } static CharPtr trnaList [] = { "tRNA-Gap", "tRNA-Ala", "tRNA-Asx", "tRNA-Cys", "tRNA-Asp", "tRNA-Glu", "tRNA-Phe", "tRNA-Gly", "tRNA-His", "tRNA-Ile", "tRNA-Xle", "tRNA-Lys", "tRNA-Leu", "tRNA-Met", "tRNA-Asn", "tRNA-Pyl", "tRNA-Pro", "tRNA-Gln", "tRNA-Arg", "tRNA-Ser", "tRNA-Thr", "tRNA-Sec", "tRNA-Val", "tRNA-Trp", "tRNA-OTHER", "tRNA-Tyr", "tRNA-Glx", "tRNA-TERM", NULL }; static CharPtr GetTrnaProductString (tRNAPtr trna) { Uint1 aa; Uint1 from; SeqMapTablePtr smtp; Uint2 idx; CharPtr str = NULL; if (trna == NULL) { return NULL; } aa = 0; if (trna->aatype == 2) { aa = trna->aa; } else { from = 0; switch (trna->aatype) { case 0 : from = 0; break; case 1 : from = Seq_code_iupacaa; break; case 2 : from = Seq_code_ncbieaa; break; case 3 : from = Seq_code_ncbi8aa; break; case 4 : from = Seq_code_ncbistdaa; break; default: break; } smtp = SeqMapTableFind (Seq_code_ncbieaa, from); if (smtp != NULL) { aa = SeqMapTableConvert (smtp, trna->aa); if (aa == 255 && from == Seq_code_iupacaa) { if (trna->aa == 'U') { aa = 'U'; } else if (trna->aa == 'O') { aa = 'O'; } } } } if (aa > 0 && aa != 255) { if (aa != '*') { idx = aa - (64 /* + shift */); } else { idx = 25; } if (idx > 0 && idx < 28) { str = trnaList [idx]; } } return str; } NLM_EXTERN CharPtr GetRNARefProductString (RnaRefPtr rrp, StringConstraintPtr scp) { CharPtr str = NULL; RNAGenPtr rgp; if (rrp == NULL || rrp->ext.choice == 0) { return NULL; } if (rrp->ext.choice == 1) { str = StringSave (rrp->ext.value.ptrvalue); } else if (rrp->ext.choice == 2) { str = StringSaveNoNull (GetTrnaProductString (rrp->ext.value.ptrvalue)); } else if (rrp->ext.choice == 3 && (rgp = (RNAGenPtr) rrp->ext.value.ptrvalue) != NULL) { if (!StringHasNoText (rgp->product)) { str = StringSave (rgp->product); } } if (!DoesStringMatchConstraint(str, scp)) { str = MemFree (str); } return str; } NLM_EXTERN CharPtr GetRNAProductString (SeqFeatPtr sfp, StringConstraintPtr scp) { RnaRefPtr rrp; RNAGenPtr rgp; SeqMgrFeatContext context; CharPtr str = NULL; if (sfp == NULL || sfp->data.choice != SEQFEAT_RNA || sfp->data.value.ptrvalue == NULL) { return NULL; } rrp = sfp->data.value.ptrvalue; if (rrp->ext.choice == 0 || (rrp->ext.choice == 1 && StringHasNoText (rrp->ext.value.ptrvalue)) || (rrp->ext.choice == 1 && (StringCmp (rrp->ext.value.ptrvalue, "ncRNA") == 0 || StringCmp (rrp->ext.value.ptrvalue, "tmRNA") == 0 || StringCmp (rrp->ext.value.ptrvalue, "misc_RNA") == 0))) { str = GetFirstGBQualMatch (sfp->qual, "product", 0, scp); } if (str == NULL) { if (rrp->ext.choice == 1 && !StringHasNoText (rrp->ext.value.ptrvalue) && StringCmp (rrp->ext.value.ptrvalue, "ncRNA") != 0 && StringCmp (rrp->ext.value.ptrvalue, "tmRNA") != 0 && StringCmp (rrp->ext.value.ptrvalue, "misc_RNA") != 0) { str = StringSave (rrp->ext.value.ptrvalue); } else if (rrp->ext.choice == 2 && rrp->ext.value.ptrvalue != NULL) { if (SeqMgrGetDesiredFeature (sfp->idx.entityID, NULL, 0, 0, sfp, &context) != NULL && !StringHasNoText (context.label) && StringCmp (context.label, "tRNA") != 0) { str = (CharPtr) MemNew (sizeof (Char) + (StringLen (context.label) + 6)); sprintf (str, "tRNA-%s", context.label); } } else if (rrp->ext.choice == 3 && (rgp = (RNAGenPtr) rrp->ext.value.ptrvalue) != NULL) { if (!StringHasNoText (rgp->product)) { str = StringSave (rgp->product); } } if (!DoesStringMatchConstraint(str, scp)) { str = MemFree (str); } } return str; } static Boolean IsParseabletRNAName (CharPtr name_string) { if (StringHasNoText(name_string)) { return TRUE; } else if (StringNICmp (name_string, "trna-", 5) != 0) { return FALSE; } else if (StringLen (name_string) != 8) { return FALSE; } else if (ParseTRnaString (name_string, NULL, NULL, TRUE) == 0) { return FALSE; } else { return TRUE; } } NLM_EXTERN Boolean SetRNARefProductString (RnaRefPtr rrp, StringConstraintPtr scp, CharPtr new_val, Uint2 existing_text) { Boolean rval = FALSE; RNAGenPtr rgp; CharPtr cp, tmp; tRNAPtr trp; Boolean justTrnaText = FALSE; Uint1 codon [6]; if (rrp == NULL) { return FALSE; } if (rrp->ext.choice == 0) { if (scp == NULL || scp->match_text == NULL) { if (rrp->type == 5 || rrp->type == 6 || rrp->type == 7 || rrp->type == 8 || rrp->type == 9 || rrp->type == 10) { rgp = RNAGenNew (); rgp->product = StringSave (new_val); rrp->ext.choice = 3; rrp->ext.value.ptrvalue = rgp; } else { rrp->ext.choice = 1; rrp->ext.value.ptrvalue = StringSave (new_val); } rval = TRUE; } } else if (rrp->ext.choice == 1) { cp = rrp->ext.value.ptrvalue; rval = SetStringValue (&cp, new_val, existing_text); rrp->ext.value.ptrvalue = cp; } else if (rrp->ext.choice == 3) { rgp = (RNAGenPtr) rrp->ext.value.ptrvalue; rval = SetStringValue (&(rgp->product), new_val, existing_text); } else if (rrp->ext.choice == 2) { tmp = StringSaveNoNull (GetTrnaProductString (rrp->ext.value.ptrvalue)); if (DoesStringMatchConstraint (tmp, scp) && SetStringValue (&tmp, new_val, existing_text)) { trp = (tRNAPtr) rrp->ext.value.ptrvalue; if (trp == NULL) { trp = MemNew (sizeof (tRNA)); trp->aatype = 0; MemSet (trp->codon, 255, sizeof (trp->codon)); trp->anticodon = NULL; rrp->ext.value.ptrvalue = trp; } if (!IsParseabletRNAName(tmp)) { if (trp->anticodon == NULL && trp->codon[0] == 255 && trp->codon[1] == 255 && trp->codon[2] == 255 && trp->codon[3] == 255 && trp->codon[4] == 255 && trp->codon[5] == 255) { trp = MemFree (trp); rrp->ext.choice = 1; rrp->ext.value.ptrvalue = tmp; tmp = NULL; rval = TRUE; } } else { trp->aa = ParseTRnaString (tmp, &justTrnaText, codon, TRUE); trp->aatype = 2; rval = TRUE; } tmp = MemFree (tmp); } } return rval; } NLM_EXTERN Boolean SetRNAProductString (SeqFeatPtr sfp, StringConstraintPtr scp, CharPtr new_val, Uint2 existing_text) { RnaRefPtr rrp; RNAGenPtr rgp; Boolean rval = FALSE; ValNode vn; CharPtr cp, tmp; tRNAPtr trp; Boolean justTrnaText = FALSE; Uint1 codon [6]; if (sfp == NULL || sfp->data.choice != SEQFEAT_RNA || sfp->data.value.ptrvalue == NULL) { return FALSE; } rrp = sfp->data.value.ptrvalue; if ((rrp->ext.choice == 0 && rrp->type != 5 && rrp->type != 6 && rrp->type != 7 && rrp->type != 8 && rrp->type != 9 && rrp->type != 10) || (rrp->ext.choice == 1 && StringHasNoText (rrp->ext.value.ptrvalue)) || (rrp->ext.choice == 1 && (StringCmp (rrp->ext.value.ptrvalue, "ncRNA") == 0 || StringCmp (rrp->ext.value.ptrvalue, "tmRNA") == 0 || StringCmp (rrp->ext.value.ptrvalue, "misc_RNA") == 0))) { vn.choice = FeatQualChoice_legal_qual; vn.data.intvalue = Feat_qual_legal_product; rval = SetStringInGBQualList (&(sfp->qual), &vn, scp, new_val, existing_text); } if (!rval) { if (rrp->ext.choice == 0 && (rrp->type == 5 || rrp->type == 6 || rrp->type == 7 || rrp->type == 8 || rrp->type == 9 || rrp->type == 10)) { rrp->ext.choice = 3; } if ((rrp->ext.choice == 0 || (rrp->ext.choice == 1 && StringHasNoText (rrp->ext.value.ptrvalue))) && (scp == NULL || scp->match_text == NULL)) { rrp->ext.value.ptrvalue = MemFree (rrp->ext.value.ptrvalue); rrp->ext.value.ptrvalue = StringSave (new_val); rrp->ext.choice = 1; rval = TRUE; } else if (rrp->ext.choice == 1 && StringCmp (rrp->ext.value.ptrvalue, "ncRNA") != 0 && StringCmp (rrp->ext.value.ptrvalue, "tmRNA") != 0 && StringCmp (rrp->ext.value.ptrvalue, "misc_RNA") != 0 && DoesStringMatchConstraint (rrp->ext.value.ptrvalue, scp)) { cp = rrp->ext.value.ptrvalue; rval = SetStringValue (&cp, new_val, existing_text); rrp->ext.value.ptrvalue = cp; rval = TRUE; } else if (rrp->ext.choice == 3) { rgp = (RNAGenPtr) rrp->ext.value.ptrvalue; if (rgp == NULL) { rgp = RNAGenNew (); rrp->ext.value.ptrvalue = rgp; } rval = SetStringValue (&(rgp->product), new_val, existing_text); } else if (rrp->ext.choice == 2) { tmp = GetRNAProductString (sfp, NULL); if (DoesStringMatchConstraint (tmp, scp) && SetStringValue (&tmp, new_val, existing_text)) { trp = (tRNAPtr) rrp->ext.value.ptrvalue; if (trp == NULL) { trp = MemNew (sizeof (tRNA)); trp->aatype = 0; MemSet (trp->codon, 255, sizeof (trp->codon)); trp->anticodon = NULL; rrp->ext.value.ptrvalue = trp; } if (!IsParseabletRNAName(tmp)) { if (trp->anticodon == NULL && trp->codon[0] == 255 && trp->codon[1] == 255 && trp->codon[2] == 255 && trp->codon[3] == 255 && trp->codon[4] == 255 && trp->codon[5] == 255) { trp = MemFree (trp); rrp->ext.choice = 1; rrp->ext.value.ptrvalue = tmp; tmp = NULL; rval = TRUE; } else { vn.choice = FeatQualChoice_legal_qual; vn.data.intvalue = Feat_qual_legal_product; if (SetStringInGBQualList (&(sfp->qual), &vn, scp, new_val, existing_text)) { trp->aa = 0; rval = TRUE; } } } else { trp->aa = ParseTRnaString (tmp, &justTrnaText, codon, TRUE); trp->aatype = 2; rval = TRUE; } tmp = MemFree (tmp); } } } return rval; } NLM_EXTERN Boolean RemoveRNAProductString (SeqFeatPtr sfp, StringConstraintPtr scp) { RnaRefPtr rrp; RNAGenPtr rgp; Boolean rval = FALSE; if (sfp == NULL || sfp->data.choice != SEQFEAT_RNA || sfp->data.value.ptrvalue == NULL) { return FALSE; } rrp = sfp->data.value.ptrvalue; if (rrp->ext.choice == 0 || (rrp->ext.choice == 1 && StringHasNoText (rrp->ext.value.ptrvalue)) || (rrp->ext.choice == 1 && (StringCmp (rrp->ext.value.ptrvalue, "ncRNA") == 0 || StringCmp (rrp->ext.value.ptrvalue, "tmRNA") == 0 || StringCmp (rrp->ext.value.ptrvalue, "misc_RNA") == 0))) { rval = RemoveGBQualMatch (&(sfp->qual), "product", 0, scp); } if (!rval) { if (rrp->ext.choice == 1) { if (!StringHasNoText (rrp->ext.value.ptrvalue) && StringCmp (rrp->ext.value.ptrvalue, "ncRNA") != 0 && StringCmp (rrp->ext.value.ptrvalue, "tmRNA") != 0 && StringCmp (rrp->ext.value.ptrvalue, "misc_RNA") != 0 && DoesStringMatchConstraint(rrp->ext.value.ptrvalue, scp)) { rrp->ext.value.ptrvalue = MemFree (rrp->ext.value.ptrvalue); rrp->ext.choice = 0; rval = TRUE; } } else if (rrp->ext.choice == 3) { rgp = (RNAGenPtr) rrp->ext.value.ptrvalue; if (rgp != NULL && !StringHasNoText (rgp->product) && DoesStringMatchConstraint (rgp->product, scp)) { rgp->product = MemFree (rgp->product); rval = TRUE; } } } return rval; } static Boolean RemovetRNACodons_Recognized (SeqFeatPtr sfp) { RnaRefPtr rrp; tRNAPtr trp; if (sfp == NULL || sfp->data.choice != SEQFEAT_RNA || sfp->data.value.ptrvalue == NULL) { return FALSE; } rrp = (RnaRefPtr) sfp->data.value.ptrvalue; if (rrp->ext.choice != 2) { return FALSE; } trp = (tRNAPtr) rrp->ext.value.ptrvalue; if (trp == NULL) { return FALSE; } trp->codon [0] = 255; trp->codon [1] = 255; trp->codon [2] = 255; trp->codon [3] = 255; trp->codon [4] = 255; trp->codon [5] = 255; return TRUE; } /* M A or C R A or G W A or T S C or G Y C or T K G or T V A or C or G H A or C or T D A or G or T B C or G or T X G or A or T or C N G or A or T or C */ typedef struct ambiguitychar { Char ch; CharPtr replacements; } AmbiguityCharData, PNTR AmbiguityCharPtr; static AmbiguityCharData s_AmbiguityChars[] = { {'M', "AC"}, {'R', "AG"}, {'W', "AT"}, {'S', "CG"}, {'Y', "CT"}, {'K', "GT"}, {'V', "ACG"}, {'H', "ACT"}, {'D', "AGT"}, {'B', "CGT"}, {'X', "GATC"}, {'N', "GATC"}, {'\0', NULL} }; static ValNodePtr ExpandWobbleCodon (CharPtr codon) { ValNodePtr list = NULL, vnp, new_list; Int4 i, j, len; CharPtr this_codon, cp, new_codon; Boolean any; if (StringHasNoText (codon)) { return NULL; } len = StringLen (codon); ValNodeAddPointer (&list, 0, StringSave (codon)); for (j = 0; j < len; j++) { new_list = NULL; for (vnp = list; vnp != NULL; vnp = vnp->next) { this_codon = vnp->data.ptrvalue; any = FALSE; for (i = 0; s_AmbiguityChars[i].ch != 0 && !any; i++) { if (this_codon[j] == s_AmbiguityChars[i].ch) { cp = s_AmbiguityChars[i].replacements; while (*cp != 0) { new_codon = StringSave (this_codon); new_codon[j] = *cp; ValNodeAddPointer (&new_list, 0, new_codon); cp++; } any = TRUE; } } if (!any) { ValNodeAddPointer (&new_list, 0, StringSave (this_codon)); } } list = ValNodeFreeData (list); list = new_list; } for (vnp = list; vnp != NULL; vnp = vnp->next) { vnp->choice = IndexForCodon (vnp->data.ptrvalue, Seq_code_iupacna); } return list; } static Boolean ParseCodonsRecognizedFromCommaDelimitedList (CharPtr str, Uint1Ptr codons) { Int4 codon_num, k = 0, q; Char ch; Boolean rval = TRUE; Uint1 codon[4]; ValNodePtr wobble_list, vnp; if (StringHasNoText (str) || codons == NULL) { return FALSE; } for (codon_num = 0; codon_num < 6; codon_num++) { codons[codon_num] = 255; } codon_num = 0; while (isspace (*str)) { str++; } while (*str != 0 && codon_num < 6 && rval) { k = 0; q = 0; ch = str [k]; while (ch != '\0' && q < 3 && rval) { ch = TO_UPPER (ch); if (StringChr ("ACGTUYNKMRYSWBVHD", ch) != NULL) { if (ch == 'U') { ch = 'T'; } codon [q] = (Uint1) ch; q++; } else { rval = FALSE; } k++; ch = str [k]; } if (q < 3 || isalpha (ch)) { rval = FALSE; } if (rval) { codon [q] = 0; if (q == 3) { wobble_list = ExpandWobbleCodon(codon); for (vnp = wobble_list; vnp != NULL && codon_num < 6 && rval; vnp = vnp->next) { if (vnp->choice == INVALID_RESIDUE) { rval = FALSE; } else { codons [codon_num++] = vnp->choice; } } if (vnp != NULL) { /* too many ambiguities */ rval = FALSE; } wobble_list = ValNodeFreeData (wobble_list); } str += 3; while (isspace (*str)) { str++; } while (*str == ',') { str++; } while (isspace (*str)) { str++; } } } if (*str != 0) { rval = FALSE; } return rval; } NLM_EXTERN Boolean SettRNACodons_Recognized (SeqFeatPtr sfp, StringConstraintPtr scp, CharPtr new_val, Uint2 existing_text) { RnaRefPtr rrp; tRNAPtr trp; Uint1 codon[6]; Uint1 new_codons[6]; Int4 codon_num, num_new, num_old = 0, i; Boolean rval = FALSE, already_have; if (sfp == NULL || sfp->data.choice != SEQFEAT_RNA || sfp->data.value.ptrvalue == NULL) { return FALSE; } if (StringHasNoText (new_val)) { return FALSE; } rrp = (RnaRefPtr) sfp->data.value.ptrvalue; if (rrp->ext.choice != 2) { return FALSE; } trp = (tRNAPtr) rrp->ext.value.ptrvalue; if (trp == NULL) { return FALSE; } if (ParseCodonsRecognizedFromCommaDelimitedList (new_val, codon)) { switch (existing_text) { case ExistingTextOption_replace_old : for (codon_num = 0; codon_num < 6; codon_num++) { trp->codon[codon_num] = codon[codon_num]; } rval = TRUE; break; case ExistingTextOption_append_semi : case ExistingTextOption_append_space : case ExistingTextOption_append_colon : case ExistingTextOption_append_comma : case ExistingTextOption_append_none : case ExistingTextOption_prefix_semi : case ExistingTextOption_prefix_space : case ExistingTextOption_prefix_colon : case ExistingTextOption_prefix_comma : case ExistingTextOption_prefix_none : case ExistingTextOption_add_qual : for (num_old = 0; num_old < 6 && trp->codon[num_old] != 255; num_old++) { new_codons[num_old] = trp->codon[num_old]; } codon_num = num_old; rval = TRUE; for (num_new = 0; num_new < 6 && codon[num_new] != 255 && rval; num_new++) { already_have = FALSE; for (i = 0; i < codon_num && !already_have; i++) { if (codon[num_new] == new_codons[i]) { already_have = TRUE; } } if (!already_have) { if (codon_num < 6) { new_codons[codon_num] = codon[num_new]; codon_num++; } else { rval = FALSE; } } } if (rval) { for (i = 0; i < codon_num; i++) { trp->codon[i] = new_codons[i]; } while (codon_num < 6) { trp->codon[codon_num++] = 255; } } break; case ExistingTextOption_leave_old : if (trp->codon[0] == 255) { for (i = 0; i < 6; i++) { trp->codon[i] = codon[i]; } rval = TRUE; } break; } } return TRUE; } static CharPtr GettRNACodonsRecognized (SeqFeatPtr sfp, StringConstraintPtr scp) { RnaRefPtr rrp; tRNAPtr trp; Int4 j; Char buf[31]; Uint1 codon [4]; if (sfp == NULL || sfp->data.choice != SEQFEAT_RNA || sfp->data.value.ptrvalue == NULL) { return NULL; } rrp = (RnaRefPtr) sfp->data.value.ptrvalue; if (rrp->ext.choice != 2) { return NULL; } trp = (tRNAPtr) rrp->ext.value.ptrvalue; if (trp == NULL) { return NULL; } buf[0] = 0; for (j = 0; j < 6; j++) { if (trp->codon [j] < 64) { /* Note - it is important to set the fourth character in the codon array to NULL * because CodonForIndex only fills in the three characters of actual codon, * so if you StringCpy the codon array and the NULL character is not found after * the three codon characters, you will write in memory you did not intend to. */ codon [3] = 0; if (CodonForIndex (trp->codon [j], Seq_code_iupacna, codon)) { if (buf[0] != 0) { StringCat (buf, ", "); } StringCat (buf, (CharPtr) codon); } } } if (buf[0] == 0) { return NULL; } else { return StringSave (buf); } } NLM_EXTERN Boolean SettmRNATagPeptide (RnaRefPtr rrp, StringConstraintPtr scp, CharPtr new_val, Uint2 existing_text) { RNAGenPtr rgp; Boolean rval = FALSE; if (rrp == NULL) { return FALSE; } if (rrp->ext.choice == 0) { rrp->ext.choice = 3; } if (rrp->ext.choice == 1) { rgp = RNAGenNew (); rgp->product = rrp->ext.value.ptrvalue; rrp->ext.value.ptrvalue = rgp; rrp->ext.choice = 3; } if (rrp->ext.choice == 3) { rgp = (RNAGenPtr) rrp->ext.value.ptrvalue; if (rgp == NULL) { rgp = RNAGenNew (); rrp->ext.value.ptrvalue = rgp; } rval = SetStringInRNAQualList (&(rgp->quals), "tag_peptide", scp, new_val, existing_text); } return rval; } NLM_EXTERN CharPtr GettmRNATagPeptide (RnaRefPtr rrp, StringConstraintPtr scp) { RNAGenPtr rgp; if (rrp == NULL || rrp->ext.choice != 3 || (rgp = (RNAGenPtr) rrp->ext.value.ptrvalue) == NULL) { return NULL; } return GetFirstRNAQualMatchName (rgp->quals, "tag_peptide", scp); } static Boolean RemovetmRNATagPeptide (RnaRefPtr rrp, StringConstraintPtr scp) { RNAGenPtr rgp; if (rrp == NULL || rrp->ext.choice != 3 || (rgp = (RNAGenPtr) rrp->ext.value.ptrvalue) == NULL) { return FALSE; } return RemoveRNAQualMatch (&(rgp->quals), "tag_peptide", scp); } NLM_EXTERN Boolean SetncRNAClass (RnaRefPtr rrp, StringConstraintPtr scp, CharPtr new_val, Uint2 existing_text) { RNAGenPtr rgp; Boolean rval = FALSE; if (rrp == NULL) { return FALSE; } if (rrp->ext.choice == 0) { rrp->ext.choice = 3; } if (rrp->ext.choice == 1) { rgp = RNAGenNew (); rgp->product = rrp->ext.value.ptrvalue; rrp->ext.value.ptrvalue = rgp; rrp->ext.choice = 3; } if (rrp->ext.choice == 3) { rgp = (RNAGenPtr) rrp->ext.value.ptrvalue; if (rgp == NULL) { rgp = RNAGenNew (); rrp->ext.value.ptrvalue = rgp; } rval = SetStringValue (&(rgp->_class), new_val, existing_text); } return rval; } NLM_EXTERN CharPtr GetncRNAClass (RnaRefPtr rrp, StringConstraintPtr scp) { RNAGenPtr rgp; if (rrp == NULL || rrp->ext.choice != 3 || (rgp = (RNAGenPtr) rrp->ext.value.ptrvalue) == NULL) { return NULL; } if (DoesStringMatchConstraint (rgp->_class, scp)) { return StringSave (rgp->_class); } else { return NULL; } } static Boolean RemovencRNAClass (RnaRefPtr rrp, StringConstraintPtr scp) { RNAGenPtr rgp; if (rrp == NULL || rrp->ext.choice != 3 || (rgp = (RNAGenPtr) rrp->ext.value.ptrvalue) == NULL) { return FALSE; } if (!StringHasNoText (rgp->_class) && DoesStringMatchConstraint (rgp->_class, scp)) { rgp->_class = MemFree (rgp->_class); return TRUE; } else { return FALSE; } } static SeqLocPtr ParseSimpleInterval (CharPtr str, BioseqPtr bsp, CharPtr PNTR end) { Boolean partial_left = FALSE, partial_right = FALSE; Int4 left_num, right_num, swap_num; SeqLocPtr slp = NULL; Uint1 strand = Seq_strand_plus; if (StringHasNoText (str)) { return NULL; } while (isspace (*str)) { str++; } if (*str == '<' || *str == '>') { partial_left = TRUE; str++; } if (!isdigit (*str)) { return NULL; } left_num = atoi (str); while (isdigit (*str)) { str++; } while (isspace (*str) || *str == '.' || *str == '-') { str++; } if (*str == '<' || *str == '>') { partial_right = TRUE; str++; } if (!isdigit (*str)) { return NULL; } right_num = atoi (str); while (isdigit (*str)) { str++; } if (left_num > right_num) { swap_num = left_num; left_num = right_num; right_num = swap_num; strand = Seq_strand_minus; } slp = SeqLocIntNew (left_num - 1, right_num - 1, strand, SeqIdDup (SeqIdFindWorst (bsp->id))); SetSeqLocPartial (slp, partial_left, partial_right); if (end != NULL) { *end = str; } return slp; } static void ComplementSeqLoc (SeqLocPtr slp) { SeqIntPtr sip; Boolean partial5 = FALSE, partial3 = FALSE; if (slp != NULL && slp->choice == SEQLOC_INT && slp->data.ptrvalue != NULL) { sip = (SeqIntPtr) slp->data.ptrvalue; if (sip->strand != Seq_strand_minus) { CheckSeqLocForPartial (slp, &partial5, &partial3); SetSeqLocPartial (slp, partial3, partial5); sip->strand = Seq_strand_minus; } } } NLM_EXTERN SeqLocPtr ParseSimpleSeqLoc (CharPtr str, BioseqPtr bsp) { CharPtr cp, cp_next; SeqLocPtr slp = NULL, slp_first = NULL, slp_tmp; Boolean is_complement = FALSE; if (StringHasNoText (str) || bsp == NULL) { return NULL; } cp = str; while (isspace (*cp)) { cp ++; } while (*cp != 0) { is_complement = FALSE; if (StringNICmp (cp, "complement", 10) == 0) { cp += 10; is_complement = TRUE; } else if (StringNICmp (cp, "comp", 4) == 0) { cp += 4; is_complement = TRUE; } if (*cp == '(') { cp++; } slp_tmp = ParseSimpleInterval (cp, bsp, &cp_next); if (slp_tmp == NULL) { slp = SeqLocFree (slp); return NULL; } if (is_complement) { ComplementSeqLoc (slp_tmp); } if (slp == NULL) { slp = slp_tmp; } else if (slp->choice == SEQLOC_INT) { slp_first = slp; slp_first->next = slp_tmp; slp = ValNodeNew (NULL); slp->choice = SEQLOC_MIX; slp->data.ptrvalue = slp_first; } else { ValNodeLink ((ValNodePtr PNTR) slp->data.ptrvalue, slp_tmp); } cp = cp_next; while (isspace (*cp)) { cp++; } if (*cp == ')') { cp++; } while (isspace (*cp)) { cp++; } if (*cp == ',') { cp++; } while (isspace (*cp)) { cp++; } } if (*cp != 0) { slp = SeqLocFree (slp); } return slp; } static Boolean SetAnticodon (SeqFeatPtr sfp, StringConstraintPtr scp, CharPtr new_val, Uint2 existing_text) { RnaRefPtr rrp; tRNAPtr trp; Boolean rval = FALSE; SeqLocPtr slp, slp_merge; BioseqPtr bsp; if (sfp == NULL || sfp->data.choice != SEQFEAT_RNA || sfp->data.value.ptrvalue == NULL) { return FALSE; } if (StringHasNoText (new_val)) { return FALSE; } rrp = (RnaRefPtr) sfp->data.value.ptrvalue; if (rrp->ext.choice != 2) { return FALSE; } trp = (tRNAPtr) rrp->ext.value.ptrvalue; if (trp == NULL) { return FALSE; } if (trp->anticodon != NULL && existing_text == ExistingTextOption_leave_old) { return FALSE; } bsp = BioseqFindFromSeqLoc (sfp->location); if (bsp == NULL) { return FALSE; } slp = ParseSimpleSeqLoc (new_val, bsp); if (slp == NULL) { return FALSE; } if (trp->anticodon == NULL) { trp->anticodon = slp; rval = TRUE; } else if (existing_text == ExistingTextOption_replace_old) { trp->anticodon = SeqLocFree (trp->anticodon); trp->anticodon = slp; rval = TRUE; } else { slp_merge = SeqLocMerge (bsp, trp->anticodon, slp, FALSE, FALSE, FALSE); slp = SeqLocFree (slp); trp->anticodon = SeqLocFree (trp->anticodon); trp->anticodon = slp_merge; rval = TRUE; } return rval; } static CharPtr GetIntervalString (SeqLocPtr slp) { CharPtr fmt = "%s%d..%s%d"; CharPtr complement_fmt = "complement(%s%d..%s%d)"; CharPtr str = NULL; SeqIntPtr sip; Boolean partial5 = FALSE, partial3 = FALSE; if (slp == NULL || slp->choice != SEQLOC_INT || slp->data.ptrvalue == NULL) { return NULL; } sip = (SeqIntPtr) slp->data.ptrvalue; CheckSeqLocForPartial (slp, &partial5, &partial3); if (sip->strand == Seq_strand_minus) { str = (CharPtr) MemNew (sizeof (Char) * (StringLen (complement_fmt) + 30)); sprintf (str, complement_fmt, partial3 ? "<" : "", sip->from + 1, partial5 ? ">" : "", sip->to + 1); } else { str = (CharPtr) MemNew (sizeof (Char) * (StringLen (fmt) + 30)); sprintf (str, fmt, partial5 ? "<" : "", sip->from + 1, partial3 ? ">" : "", sip->to + 1); } return str; } static CharPtr GetAnticodonLocString (SeqFeatPtr sfp) { RnaRefPtr rrp; tRNAPtr trp; SeqLocPtr slp; CharPtr str = NULL, tmp; ValNodePtr str_list = NULL, vnp; Int4 len = 0; if (sfp == NULL || sfp->data.choice != SEQFEAT_RNA || sfp->data.value.ptrvalue == NULL) { return NULL; } rrp = (RnaRefPtr) sfp->data.value.ptrvalue; if (rrp->ext.choice != 2) { return NULL; } trp = (tRNAPtr) rrp->ext.value.ptrvalue; if (trp == NULL || trp->anticodon == NULL) { return NULL; } if (trp->anticodon->choice == SEQLOC_INT) { str = GetIntervalString (trp->anticodon); } else if (trp->anticodon->choice == SEQLOC_MIX) { for (slp = trp->anticodon->data.ptrvalue; slp != NULL; slp = slp->next) { tmp = GetIntervalString (slp); if (tmp == NULL) { str_list = ValNodeFreeData (str_list); return StringSave ("complex location"); } else { len += StringLen (tmp) + 2; ValNodeAddPointer (&str_list, 0, tmp); } } str = (CharPtr) MemNew (sizeof (Char) * len); str[0] = 0; for (vnp = str_list; vnp != NULL; vnp = vnp->next) { StringCat (str, vnp->data.ptrvalue); if (vnp->next != NULL) { StringCat (str, ", "); } } str_list = ValNodeFreeData (str_list); } return str; } NLM_EXTERN ProtRefPtr GetProtRefForFeature (SeqFeatPtr sfp) { BioseqPtr protbsp; SeqFeatPtr protsfp; ProtRefPtr prp = NULL; SeqFeatXrefPtr xref; if (sfp == NULL) return NULL; if (sfp->data.choice == SEQFEAT_PROT) { prp = (ProtRefPtr) sfp->data.value.ptrvalue; } else if (sfp->data.choice == SEQFEAT_CDREGION) { xref = sfp->xref; while (xref != NULL && xref->data.choice != SEQFEAT_PROT) { xref = xref->next; } if (xref != NULL) { prp = xref->data.value.ptrvalue; } if (prp == NULL && sfp->product != NULL) { protbsp = BioseqFindFromSeqLoc (sfp->product); protsfp = GetProtFeature (protbsp); if (protsfp != NULL) { prp = protsfp->data.value.ptrvalue; } } } return prp; } NLM_EXTERN void GetGeneInfoForFeature (SeqFeatPtr sfp, GeneRefPtr PNTR p_grp, SeqFeatPtr PNTR p_gene) { GeneRefPtr grp = NULL; SeqFeatPtr gene = NULL; SeqMgrFeatContext fcontext; if (p_grp != NULL) { *p_grp = NULL; } if (p_gene != NULL) { *p_gene = NULL; } if (sfp == NULL) { return; } if (sfp->idx.subtype == FEATDEF_GENE) { grp = sfp->data.value.ptrvalue; gene = sfp; } else { grp = SeqMgrGetGeneXref (sfp); if (grp == NULL) { gene = SeqMgrGetOverlappingGene (sfp->location, &fcontext); if (gene != NULL) { grp = gene->data.value.ptrvalue; } } else if (SeqMgrGeneIsSuppressed (grp)) { grp = NULL; } } if (p_grp != NULL) { *p_grp = grp; } if (p_gene != NULL) { *p_gene = gene; } } static CharPtr GetCitationTextFromFeature (SeqFeatPtr sfp, StringConstraintPtr scp, ValNodePtr cit_list) { SeqEntryPtr sep; BioseqPtr bsp; ValNodePtr list = NULL, vnp; CharPtr rval = NULL; Int4 serial_number; Char buf[100]; ValNodePtr psp; if (sfp == NULL || sfp->cit == NULL) { return NULL; } bsp = GetSequenceForObject (OBJ_SEQFEAT, sfp); if (cit_list == NULL) { /* list not provided - must create now */ sep = SeqMgrGetSeqEntryForData (bsp); list = GetCitListsForSeqEntry (sep); cit_list = list; } psp = sfp->cit->data.ptrvalue; for (vnp = psp; vnp != NULL && rval == NULL; vnp = vnp->next) { serial_number = GetCitationNumberForMinPub (bsp, vnp, cit_list); if (serial_number > -1) { sprintf (buf, "%d", serial_number); if (IsStringConstraintEmpty (scp) || DoesStringMatchConstraint (buf, scp)) { rval = StringSave (buf); } } } list = PubSerialNumberListFree (list); return rval; } static CharPtr GetCodeBreakString (SeqFeatPtr sfp) { CdRegionPtr crp; ValNodePtr list = NULL, vnp; BioseqPtr bsp; Int4 len = 0; CharPtr str = NULL; if (sfp == NULL || sfp->data.choice != SEQFEAT_CDREGION || (crp = (CdRegionPtr) sfp->data.value.ptrvalue) == NULL || crp->code_break == NULL) { return NULL; } bsp = BioseqFindFromSeqLoc (sfp->location); PrintFTCodeBreak (&list, crp->code_break, bsp); for (vnp = list; vnp != NULL; vnp = vnp->next) { if (StringNCmp (vnp->data.ptrvalue, "\t\t\ttransl_except\t", 17) == 0) { len += StringLen (vnp->data.ptrvalue) - 17; } } if (len > 0) { str = (CharPtr) MemNew (sizeof (Char) * (len + 1)); str[0] = 0; for (vnp = list; vnp != NULL; vnp = vnp->next) { if (StringNCmp (vnp->data.ptrvalue, "\t\t\ttransl_except\t", 17) == 0) { StringCat (str, ((CharPtr) vnp->data.ptrvalue) + 17); if (vnp->next == NULL) { str[StringLen(str) - 1] = 0; } else { str[StringLen(str) - 1] = ';'; } } } } list = ValNodeFreeData (list); return str; } static CharPtr GetQualFromFeatureAnyType (SeqFeatPtr sfp, ValNodePtr field, StringConstraintPtr scp, BatchExtraPtr batch_extra) { CharPtr str = NULL; GeneRefPtr grp = NULL; ProtRefPtr prp = NULL; Int4 gbqual, subfield; SeqFeatPtr gene = NULL; CdRegionPtr crp; ValNodePtr vnp; Char buf[20]; BioseqPtr protbsp; if (sfp == NULL || field == NULL) { return NULL; } // for gene fields GetGeneInfoForFeature (sfp, &grp, &gene); // for protein fields prp = GetProtRefForFeature (sfp); /* fields common to all features */ /* note, also known as comment */ if ((field->choice == FeatQualChoice_legal_qual && field->data.intvalue == Feat_qual_legal_note) || (field->choice == FeatQualChoice_illegal_qual && DoesStringMatchConstraint ("note", field->data.ptrvalue))) { if (!StringHasNoText (sfp->comment) && DoesStringMatchConstraint(sfp->comment, scp)) { str = StringSave (sfp->comment); } } /* db-xref */ if (str == NULL && ((field->choice == FeatQualChoice_legal_qual && field->data.intvalue == Feat_qual_legal_db_xref) || (field->choice == FeatQualChoice_illegal_qual && DoesStringMatchConstraint ("db_xref", field->data.ptrvalue)))) { str = GetDbxrefString (sfp->dbxref, scp); } /* exception */ if (str == NULL && ((field->choice == FeatQualChoice_legal_qual && field->data.intvalue == Feat_qual_legal_exception) || (field->choice == FeatQualChoice_illegal_qual && DoesStringMatchConstraint ("exception", field->data.ptrvalue)))) { if (!StringHasNoText (sfp->except_text) && DoesStringMatchConstraint(sfp->except_text, scp)) { str = StringSave (sfp->except_text); } } /* evidence */ if (str == NULL && ((field->choice == FeatQualChoice_legal_qual && field->data.intvalue == Feat_qual_legal_evidence) || (field->choice == FeatQualChoice_illegal_qual && DoesStringMatchConstraint ("evidence", field->data.ptrvalue)))) { if (sfp->exp_ev == 1) { str = StringSave ("experimental"); } else if (sfp->exp_ev == 2) { str = StringSave ("non-experimental"); } if (!DoesStringMatchConstraint(str, scp)) { str = MemFree (str); } } /* citation */ if (str == NULL && ((field->choice == FeatQualChoice_legal_qual && field->data.intvalue == Feat_qual_legal_citation) || (field->choice == FeatQualChoice_illegal_qual && DoesStringMatchConstraint ("citation", field->data.ptrvalue)))) { str = GetCitationTextFromFeature (sfp, scp, batch_extra == NULL ? NULL : batch_extra->cit_list); } /* location */ if (str == NULL && ((field->choice == FeatQualChoice_legal_qual && field->data.intvalue == Feat_qual_legal_location) || (field->choice == FeatQualChoice_illegal_qual && DoesStringMatchConstraint ("location", field->data.ptrvalue)))) { str = SeqLocPrintUseBestID (sfp->location); } /* pseudo */ if (str == NULL && (field->choice == FeatQualChoice_legal_qual && field->data.intvalue == Feat_qual_legal_pseudo) || (field->choice == FeatQualChoice_illegal_qual && DoesStringMatchConstraint ("pseudogene", field->data.ptrvalue))) { str = GetFirstGBQualMatch (sfp->qual, "pseudogene", 0, scp); if (str == NULL && sfp->pseudo) { str = StringSave ("unqualified"); } } /* fields common to some features */ /* product */ if (str == NULL && ((field->choice == FeatQualChoice_legal_qual && field->data.intvalue == Feat_qual_legal_product) || (field->choice == FeatQualChoice_illegal_qual && DoesStringMatchConstraint ("product", field->data.ptrvalue)))) { if (prp != NULL) { str = GetFirstValNodeStringMatch (prp->name, scp); } else if (sfp->data.choice == SEQFEAT_RNA) { str = GetRNAProductString (sfp, scp); } } /* Gene fields */ /* locus */ if (str == NULL && ((field->choice == FeatQualChoice_legal_qual && field->data.intvalue == Feat_qual_legal_gene) || (field->choice == FeatQualChoice_illegal_qual && DoesStringMatchConstraint ("locus", field->data.ptrvalue))) && grp != NULL) { if (!StringHasNoText (grp->locus) && DoesStringMatchConstraint(grp->locus, scp)) { str = StringSave (grp->locus); } } /* description */ if (str == NULL && ((field->choice == FeatQualChoice_legal_qual && field->data.intvalue == Feat_qual_legal_gene_description) || (field->choice == FeatQualChoice_illegal_qual && DoesStringMatchConstraint ("description", field->data.ptrvalue))) && grp != NULL) { if (!StringHasNoText (grp->desc) && DoesStringMatchConstraint(grp->desc, scp)) { str = StringSave (grp->desc); } } /* maploc */ if (str == NULL && ((field->choice == FeatQualChoice_legal_qual && field->data.intvalue == Feat_qual_legal_map) || (field->choice == FeatQualChoice_illegal_qual && DoesStringMatchConstraint ("map", field->data.ptrvalue))) && grp != NULL) { if (!StringHasNoText (grp->maploc) && DoesStringMatchConstraint(grp->maploc, scp)) { str = StringSave (grp->maploc); } } /* allele */ if (str == NULL && ((field->choice == FeatQualChoice_legal_qual && field->data.intvalue == Feat_qual_legal_allele) || (field->choice == FeatQualChoice_illegal_qual && DoesStringMatchConstraint ("allele", field->data.ptrvalue))) && grp != NULL && sfp->idx.subtype != FEATDEF_variation) { if (!StringHasNoText (grp->allele) && DoesStringMatchConstraint(grp->allele, scp)) { str = StringSave (grp->allele); } } /* locus_tag */ if (str == NULL && ((field->choice == FeatQualChoice_legal_qual && field->data.intvalue == Feat_qual_legal_locus_tag) || (field->choice == FeatQualChoice_illegal_qual && DoesStringMatchConstraint ("locus_tag", field->data.ptrvalue))) && grp != NULL) { if (!StringHasNoText (grp->locus_tag) && DoesStringMatchConstraint(grp->locus_tag, scp)) { str = StringSave (grp->locus_tag); } } /* synonym */ if (str == NULL && ((field->choice == FeatQualChoice_legal_qual && field->data.intvalue == Feat_qual_legal_synonym) || (field->choice == FeatQualChoice_illegal_qual && DoesStringMatchConstraint ("synonym", field->data.ptrvalue))) && grp != NULL) { str = GetFirstValNodeStringMatch (grp->syn, scp); } /* gene comment */ if (str == NULL && field->choice == FeatQualChoice_legal_qual && field->data.intvalue == Feat_qual_legal_gene_comment && gene != NULL && !StringHasNoText (gene->comment) && DoesStringMatchConstraint (gene->comment, scp)) { str = StringSave (gene->comment); } /* protein fields */ /* note - product handled above */ /* description */ if (str == NULL && ((field->choice == FeatQualChoice_legal_qual && field->data.intvalue == Feat_qual_legal_description) || (field->choice == FeatQualChoice_illegal_qual && DoesStringMatchConstraint ("description", field->data.ptrvalue))) && prp != NULL) { if (!StringHasNoText (prp->desc) && DoesStringMatchConstraint(prp->desc, scp)) { str = StringSave (prp->desc); } } /* ec_number */ if (str == NULL && ((field->choice == FeatQualChoice_legal_qual && field->data.intvalue == Feat_qual_legal_ec_number) || (field->choice == FeatQualChoice_illegal_qual && DoesStringMatchConstraint ("ec_number", field->data.ptrvalue))) && prp != NULL) { str = GetFirstValNodeStringMatch (prp->ec, scp); } /* activity */ if (str == NULL && ((field->choice == FeatQualChoice_legal_qual && field->data.intvalue == Feat_qual_legal_activity) || (field->choice == FeatQualChoice_illegal_qual && DoesStringMatchConstraint ("activity", field->data.ptrvalue))) && prp != NULL) { str = GetFirstValNodeStringMatch (prp->activity, scp); } /* coding region fields */ /* transl_except */ if (str == NULL && field->choice == FeatQualChoice_legal_qual && field->data.intvalue == Feat_qual_legal_transl_except && sfp->data.choice == SEQFEAT_CDREGION) { str = GetCodeBreakString (sfp); } /* transl_table */ if (str == NULL && field->choice == FeatQualChoice_legal_qual && field->data.intvalue == Feat_qual_legal_transl_table && sfp->data.choice == SEQFEAT_CDREGION && (crp = (CdRegionPtr) sfp->data.value.ptrvalue) != NULL) { if (crp->genetic_code != NULL && (vnp = crp->genetic_code->data.ptrvalue) != NULL && vnp->choice == 2) { sprintf (buf, "%d", vnp->data.intvalue); str = StringSave (buf); } } /* translation */ if (str == NULL && field->choice == FeatQualChoice_legal_qual && field->data.intvalue == Feat_qual_legal_translation && sfp->data.choice == SEQFEAT_CDREGION) { if (sfp->product != NULL) { protbsp = BioseqFindFromSeqLoc (sfp->product); str = GetSequenceByBsp (protbsp); } } /* special RNA qualifiers */ /* tRNA qualifiers */ /* codon-recognized */ if (str == NULL && ((field->choice == FeatQualChoice_legal_qual && field->data.intvalue == Feat_qual_legal_codons_recognized) || (field->choice == FeatQualChoice_illegal_qual && DoesStringMatchConstraint ("codon-recognized", field->data.ptrvalue)))) { str = GettRNACodonsRecognized (sfp, scp); } /* anticodon */ if (str == NULL && ((field->choice == FeatQualChoice_legal_qual && field->data.intvalue == Feat_qual_legal_anticodon) || (field->choice == FeatQualChoice_illegal_qual && DoesStringMatchConstraint ("anticodon", field->data.ptrvalue)))) { str = GetAnticodonLocString (sfp); } /* tag-peptide */ if (str == NULL && ((field->choice == FeatQualChoice_legal_qual && field->data.intvalue == Feat_qual_legal_tag_peptide) || (field->choice == FeatQualChoice_illegal_qual && DoesStringMatchConstraint ("tag-peptide", field->data.ptrvalue)))) { str = GettmRNATagPeptide (sfp->data.value.ptrvalue, scp); } /* ncRNA_class */ if (str == NULL && ((field->choice == FeatQualChoice_legal_qual && field->data.intvalue == Feat_qual_legal_ncRNA_class) || (field->choice == FeatQualChoice_illegal_qual && DoesStringMatchConstraint ("ncRNA_class", field->data.ptrvalue)))) { str = GetncRNAClass (sfp->data.value.ptrvalue, scp); } /* codon-start */ if (field->choice == FeatQualChoice_legal_qual && field->data.intvalue == Feat_qual_legal_codon_start && sfp->data.choice == SEQFEAT_CDREGION) { crp = (CdRegionPtr) sfp->data.value.ptrvalue; if (crp->frame == 1 || crp->frame == 0) { str = StringSave ("1"); } else { str = (CharPtr) MemNew (sizeof (Char) * 15); sprintf (str, "%d", crp->frame); } if (!DoesStringMatchConstraint (str, scp)) { str = MemFree (str); } } /* special region qualifiers */ if (sfp->idx.subtype == FEATDEF_REGION && field->choice == FeatQualChoice_legal_qual && field->data.intvalue == Feat_qual_legal_name && DoesStringMatchConstraint (sfp->data.value.ptrvalue, scp)) { str = StringSave (sfp->data.value.ptrvalue); } /* actual GenBank qualifiers */ if (str == NULL) { if (field->choice == FeatQualChoice_legal_qual) { gbqual = GetGBQualFromFeatQual (field->data.intvalue, &subfield); if (gbqual > -1) { str = GetFirstGBQualMatch (sfp->qual, ParFlat_GBQual_names [gbqual].name, subfield, scp); } else { /* need to do something with non-qualifier qualifiers */ } } else { str = GetFirstGBQualMatchConstraintName (sfp->qual, field->data.ptrvalue, scp); } } return str; } NLM_EXTERN CharPtr GetQualFromFeatureEx (SeqFeatPtr sfp, FeatureFieldPtr field, StringConstraintPtr scp, BatchExtraPtr batch_extra) { if (sfp == NULL || field == NULL || field->field == NULL) { return NULL; } if (field->type != Macro_feature_type_any && sfp->idx.subtype != GetFeatdefFromFeatureType (field->type)) { return NULL; } return GetQualFromFeatureAnyType (sfp, field->field, scp, batch_extra); } NLM_EXTERN CharPtr GetQualFromFeature (SeqFeatPtr sfp, FeatureFieldPtr field, StringConstraintPtr scp) { return GetQualFromFeatureEx (sfp, field, scp, NULL); } static Boolean RemoveCodeBreak (CdRegionPtr crp) { CodeBreakPtr cbp, nextcbp; if (crp == NULL || crp->code_break == NULL) { return FALSE; } cbp = crp->code_break; while (cbp != NULL) { nextcbp = cbp->next; cbp->next = NULL; cbp = CodeBreakFree (cbp); cbp = nextcbp; } crp->code_break = NULL; return TRUE; } static Boolean RemoveQualFromFeatureAnyType (SeqFeatPtr sfp, ValNodePtr field, StringConstraintPtr scp) { Boolean rval = FALSE; GeneRefPtr grp = NULL; ProtRefPtr prp = NULL; RnaRefPtr rrp = NULL; CdRegionPtr crp; tRNAPtr trp; Int4 gbqual, subfield; SeqFeatPtr gene = NULL; SeqMgrFeatContext fcontext; if (sfp == NULL || field == NULL) { return FALSE; } /* for gene fields */ if (sfp->idx.subtype == FEATDEF_GENE) { grp = sfp->data.value.ptrvalue; gene = sfp; } else { grp = SeqMgrGetGeneXref (sfp); if (grp == NULL) { gene = SeqMgrGetOverlappingGene (sfp->location, &fcontext); if (gene != NULL) { grp = gene->data.value.ptrvalue; } } else if (SeqMgrGeneIsSuppressed (grp)) { grp = NULL; } } /* for protein fields */ prp = GetProtRefForFeature (sfp); /* for RNA fields */ if (sfp->data.choice == SEQFEAT_RNA) { rrp = (RnaRefPtr) sfp->data.value.ptrvalue; } /* fields common to all features */ /* note, also known as comment */ if ((field->choice == FeatQualChoice_legal_qual && field->data.intvalue == Feat_qual_legal_note) || (field->choice == FeatQualChoice_illegal_qual && DoesStringMatchConstraint ("note", field->data.ptrvalue))) { if (!StringHasNoText (sfp->comment) && DoesStringMatchConstraint (sfp->comment, scp)) { sfp->comment = MemFree (sfp->comment); rval = TRUE; } } /* db-xref */ if ((field->choice == FeatQualChoice_legal_qual && field->data.intvalue == Feat_qual_legal_db_xref) || (field->choice == FeatQualChoice_illegal_qual && DoesStringMatchConstraint ("db_xref", field->data.ptrvalue))) { rval = RemoveDbxrefString (&(sfp->dbxref), scp); } /* exception */ if ((field->choice == FeatQualChoice_legal_qual && field->data.intvalue == Feat_qual_legal_exception) || (field->choice == FeatQualChoice_illegal_qual && DoesStringMatchConstraint ("exception", field->data.ptrvalue))) { if (!StringHasNoText (sfp->except_text) && DoesStringMatchConstraint (sfp->except_text, scp)) { sfp->except_text = MemFree (sfp->except_text); sfp->excpt = FALSE; rval = TRUE; } } /* evidence */ if ((field->choice == FeatQualChoice_legal_qual && field->data.intvalue == Feat_qual_legal_evidence) || (field->choice == FeatQualChoice_illegal_qual && DoesStringMatchConstraint ("evidence", field->data.ptrvalue))) { if ((sfp->exp_ev == 1 && DoesStringMatchConstraint("experimental", scp)) || (sfp->exp_ev == 2 && DoesStringMatchConstraint("non-experimental", scp))) { sfp->exp_ev = 0; rval = TRUE; } } /* citation */ if ((field->choice == FeatQualChoice_legal_qual && field->data.intvalue == Feat_qual_legal_citation) || (field->choice == FeatQualChoice_illegal_qual && DoesStringMatchConstraint ("citation", field->data.ptrvalue))) { if (sfp->cit != NULL) { sfp->cit = PubSetFree (sfp->cit); rval = TRUE; } } /* location */ if ((field->choice == FeatQualChoice_legal_qual && field->data.intvalue == Feat_qual_legal_location) || (field->choice == FeatQualChoice_illegal_qual && DoesStringMatchConstraint ("location", field->data.ptrvalue))) { if (sfp->location != NULL) { sfp->location = SeqLocFree (sfp->location); rval = TRUE; } } /* pseudo */ if ((field->choice == FeatQualChoice_legal_qual && field->data.intvalue == Feat_qual_legal_pseudo) || (field->choice == FeatQualChoice_illegal_qual && DoesStringMatchConstraint ("pseudogene", field->data.ptrvalue))) { if (gene != NULL) { if (gene->pseudo) { gene->pseudo = FALSE; rval = TRUE; } rval |= RemoveGBQualMatch (&(gene->qual), "pseudogene", 0, NULL); } if (sfp->pseudo) { sfp->pseudo = FALSE; rval = TRUE; } rval |= RemoveGBQualMatch (&(sfp->qual), "pseudogene", 0, NULL); return rval; } /* fields common to some features */ /* product */ if ((field->choice == FeatQualChoice_legal_qual && field->data.intvalue == Feat_qual_legal_product) || (field->choice == FeatQualChoice_illegal_qual && DoesStringMatchConstraint ("product", field->data.ptrvalue))) { if (prp != NULL) { rval = RemoveValNodeStringMatch (&(prp->name), scp); } else if (sfp->data.choice == SEQFEAT_RNA) { rval = RemoveRNAProductString (sfp, scp); } } /* Gene fields */ /* locus */ if (((field->choice == FeatQualChoice_legal_qual && field->data.intvalue == Feat_qual_legal_gene) || (field->choice == FeatQualChoice_illegal_qual && DoesStringMatchConstraint ("locus", field->data.ptrvalue))) && grp != NULL) { if (!StringHasNoText (grp->locus) && DoesStringMatchConstraint (grp->locus, scp)) { grp->locus = MemFree (grp->locus); rval = TRUE; } } /* description */ if (((field->choice == FeatQualChoice_legal_qual && field->data.intvalue == Feat_qual_legal_gene_description) || (field->choice == FeatQualChoice_illegal_qual && DoesStringMatchConstraint ("description", field->data.ptrvalue))) && grp != NULL) { if (!StringHasNoText (grp->desc) && DoesStringMatchConstraint(grp->desc, scp)) { grp->desc = MemFree (grp->desc); rval = TRUE; } } /* maploc */ if (((field->choice == FeatQualChoice_legal_qual && field->data.intvalue == Feat_qual_legal_map) || (field->choice == FeatQualChoice_illegal_qual && DoesStringMatchConstraint ("map", field->data.ptrvalue))) && grp != NULL) { if (!StringHasNoText (grp->maploc) && DoesStringMatchConstraint(grp->maploc, scp)) { grp->maploc = MemFree (grp->maploc); rval = TRUE; } } /* allele */ if (((field->choice == FeatQualChoice_legal_qual && field->data.intvalue == Feat_qual_legal_allele) || (field->choice == FeatQualChoice_illegal_qual && DoesStringMatchConstraint ("allele", field->data.ptrvalue))) && grp != NULL && sfp->idx.subtype != FEATDEF_variation) { if (!StringHasNoText (grp->allele) && DoesStringMatchConstraint(grp->allele, scp)) { grp->allele = MemFree (grp->allele); rval = TRUE; } } /* locus_tag */ if (((field->choice == FeatQualChoice_legal_qual && field->data.intvalue == Feat_qual_legal_locus_tag) || (field->choice == FeatQualChoice_illegal_qual && DoesStringMatchConstraint ("locus_tag", field->data.ptrvalue))) && grp != NULL) { if (!StringHasNoText (grp->locus_tag) && DoesStringMatchConstraint(grp->locus_tag, scp)) { grp->locus_tag = MemFree (grp->locus_tag); rval = TRUE; } } /* synonym */ if (((field->choice == FeatQualChoice_legal_qual && field->data.intvalue == Feat_qual_legal_synonym) || (field->choice == FeatQualChoice_illegal_qual && DoesStringMatchConstraint ("synonym", field->data.ptrvalue))) && grp != NULL) { rval = RemoveValNodeStringMatch (&(grp->syn), scp); } /* gene comment */ if (field->choice == FeatQualChoice_legal_qual && field->data.intvalue == Feat_qual_legal_gene_comment && gene != NULL && !StringHasNoText (gene->comment) && DoesStringMatchConstraint (gene->comment, scp)) { gene->comment = MemFree (gene->comment); rval = TRUE; } /* protein fields */ /* note - product handled above */ /* description */ if (((field->choice == FeatQualChoice_legal_qual && field->data.intvalue == Feat_qual_legal_description) || (field->choice == FeatQualChoice_illegal_qual && DoesStringMatchConstraint ("description", field->data.ptrvalue))) && prp != NULL) { if (!StringHasNoText (prp->desc) && DoesStringMatchConstraint(prp->desc, scp)) { prp->desc = MemFree (prp->desc); rval = TRUE; } } /* ec_number */ if (((field->choice == FeatQualChoice_legal_qual && field->data.intvalue == Feat_qual_legal_ec_number) || (field->choice == FeatQualChoice_illegal_qual && DoesStringMatchConstraint ("ec_number", field->data.ptrvalue))) && prp != NULL) { rval = RemoveValNodeStringMatch (&(prp->ec), scp); } /* activity */ if (((field->choice == FeatQualChoice_legal_qual && (field->data.intvalue == Feat_qual_legal_activity || field->data.intvalue == Feat_qual_legal_function)) || (field->choice == FeatQualChoice_illegal_qual && (DoesStringMatchConstraint ("activity", field->data.ptrvalue) || DoesStringMatchConstraint ("function", field->data.ptrvalue)))) && prp != NULL) { rval = RemoveValNodeStringMatch (&(prp->activity), scp); } /* special coding region fields */ /* transl_except */ if (field->choice == FeatQualChoice_legal_qual && field->data.intvalue == Feat_qual_legal_transl_except && sfp->data.choice == SEQFEAT_CDREGION) { crp = (CdRegionPtr) sfp->data.value.ptrvalue; rval = RemoveCodeBreak (crp); } /* transl_table */ if (field->choice == FeatQualChoice_legal_qual && field->data.intvalue == Feat_qual_legal_transl_table && sfp->data.choice == SEQFEAT_CDREGION && (crp = (CdRegionPtr) sfp->data.value.ptrvalue) != NULL) { if (crp->genetic_code != NULL) { crp->genetic_code = GeneticCodeFree (crp->genetic_code); rval = TRUE; } } /* special RNA fields */ /* anticodon */ if (((field->choice == FeatQualChoice_legal_qual && field->data.intvalue == Feat_qual_legal_anticodon) || (field->choice == FeatQualChoice_illegal_qual && DoesStringMatchConstraint ("anticodon", field->data.ptrvalue))) && rrp != NULL && rrp->ext.choice == 2) { trp = (tRNAPtr) rrp->ext.value.ptrvalue; if (trp != NULL && trp->anticodon != NULL) { trp->anticodon = SeqLocFree (trp->anticodon); rval = TRUE; } } /* codons recognized */ if (((field->choice == FeatQualChoice_legal_qual && field->data.intvalue == Feat_qual_legal_codons_recognized) || (field->choice == FeatQualChoice_illegal_qual && DoesStringMatchConstraint ("codon-recognized", field->data.ptrvalue))) && rrp != NULL && rrp->ext.choice == 2) { rval = RemovetRNACodons_Recognized (sfp); } /* tag_peptide */ if (((field->choice == FeatQualChoice_legal_qual && field->data.intvalue == Feat_qual_legal_tag_peptide) || (field->choice == FeatQualChoice_illegal_qual && DoesStringMatchConstraint ("tag-peptide", field->data.ptrvalue))) && rrp != NULL && rrp->ext.choice == 3) { rval = RemovetmRNATagPeptide (rrp, scp); } if (((field->choice == FeatQualChoice_legal_qual && field->data.intvalue == Feat_qual_legal_ncRNA_class) || (field->choice == FeatQualChoice_illegal_qual && DoesStringMatchConstraint ("ncRNA_class", field->data.ptrvalue))) && rrp != NULL && rrp->ext.choice == 3) { rval = RemovencRNAClass (rrp, scp); } /* special region qualifiers */ if (sfp->idx.subtype == FEATDEF_REGION && field->choice == FeatQualChoice_legal_qual && field->data.intvalue == Feat_qual_legal_name && !StringHasNoText (sfp->data.value.ptrvalue) && DoesStringMatchConstraint (sfp->data.value.ptrvalue, scp)) { sfp->data.value.ptrvalue = MemFree (sfp->data.value.ptrvalue); rval = TRUE; } if (!rval) { /* actual GenBank qualifiers */ if (field->choice == FeatQualChoice_legal_qual) { gbqual = GetGBQualFromFeatQual (field->data.intvalue, &subfield); if (gbqual > -1) { rval = RemoveGBQualMatch (&(sfp->qual), ParFlat_GBQual_names [gbqual].name, subfield, scp); } else { /* need to do something with non-qualifier qualifiers */ } } else { rval = RemoveGBQualMatchConstraintName (&(sfp->qual), field->data.ptrvalue, scp); } } return rval; } NLM_EXTERN Boolean RemoveQualFromFeature (SeqFeatPtr sfp, FeatureFieldPtr field, StringConstraintPtr scp) { if (sfp == NULL || field == NULL || field->field == NULL) { return FALSE; } if (field->type != Macro_feature_type_any && sfp->idx.subtype != GetFeatdefFromFeatureType (field->type)) { return FALSE; } return RemoveQualFromFeatureAnyType (sfp, field->field, scp); } static Boolean ChooseBestFrame (SeqFeatPtr sfp) { CdRegionPtr crp; Uint1 new_frame = 0, i, orig_frame; ByteStorePtr bs; Int4 lens [3]; Int4 max; Boolean retval = TRUE; if (sfp == NULL || sfp->data.choice != SEQFEAT_CDREGION) return FALSE; crp = sfp->data.value.ptrvalue; if (crp == NULL) return FALSE; orig_frame = crp->frame; max = 0; for (i = 1; i <= 3; i++) { crp->frame = i; bs = ProteinFromCdRegionEx (sfp, FALSE, FALSE); lens[i - 1] = BSLen (bs); BSFree (bs); if (lens[i - 1] > max) { max = lens[i - 1]; new_frame = i; } } for (i = 1; i <= 3; i++) { if (lens [i - 1] == max && i != new_frame) { retval = FALSE; } } if (retval) { crp->frame = new_frame; } else { crp->frame = orig_frame; } return retval; } static Boolean ChooseMatchingFrame (SeqFeatPtr sfp) { CdRegionPtr crp; BioseqPtr protbsp; CharPtr expected_translation, frame_translation; Uint1 new_frame = 0, i, orig_frame; ByteStorePtr bs; Boolean retval = FALSE; if (sfp == NULL || sfp->data.choice != SEQFEAT_CDREGION || sfp->product == NULL || (protbsp = BioseqFindFromSeqLoc (sfp->product)) == NULL || (crp = sfp->data.value.ptrvalue) == NULL) { return FALSE; } expected_translation = GetSequenceByBsp (protbsp); if (StringHasNoText (expected_translation)) { expected_translation = MemFree (expected_translation); return FALSE; } orig_frame = crp->frame; for (i = 1; i <= 3 && !retval; i++) { crp->frame = i; bs = ProteinFromCdRegionEx (sfp, FALSE, FALSE); frame_translation = BSMerge (bs, NULL); if (StringCmp (frame_translation, expected_translation) == 0) { new_frame = i; retval = TRUE; } BSFree (bs); frame_translation = MemFree (frame_translation); } expected_translation = MemFree (expected_translation); if (new_frame == 1 && orig_frame == 0) { new_frame = 0; } if (retval) { crp->frame = new_frame; if (new_frame == orig_frame) { /* didn't actually change the frame */ retval = FALSE; } } else { crp->frame = orig_frame; } return retval; } static SeqFeatPtr CreateGeneForFeature (SeqFeatPtr sfp) { BioseqPtr bsp; SeqFeatPtr gene = NULL; SeqLocPtr slp_new; if (sfp == NULL || sfp->data.choice == SEQFEAT_GENE) { return NULL; } else { bsp = BioseqFindFromSeqLoc (sfp->location); if (bsp != NULL) { gene = CreateNewFeatureOnBioseq (bsp, SEQFEAT_GENE, sfp->location); if (gene != NULL) { slp_new = SeqLocMerge (bsp, gene->location, NULL, TRUE, FALSE, FALSE); if (slp_new != NULL && slp_new != gene->location) { gene->location = SeqLocFree (gene->location); gene->location = slp_new; } gene->data.value.ptrvalue = GeneRefNew(); } } } return gene; } static void AdjustProteinSequenceForReadingFrame (SeqFeatPtr cds); static Boolean SetCitationTextOnFeature (SeqFeatPtr sfp, StringConstraintPtr scp, CharPtr value, Uint2 existing_text, ValNodePtr cit_list) { SeqEntryPtr sep; BioseqPtr bsp; ValNodePtr list = NULL, vnp; Boolean rval = FALSE, already_present = FALSE; Int4 new_number, serial_number; ValNodePtr min_pub, new_list; if (sfp == NULL) { return FALSE; } if (sfp->cit != NULL && existing_text == ExistingTextOption_leave_old) { return FALSE; } if (!StringIsAllDigits (value)) { return FALSE; } new_number = atoi (value); bsp = GetSequenceForObject (OBJ_SEQFEAT, sfp); if (cit_list == NULL) { /* list not provided - must create now */ sep = SeqMgrGetSeqEntryForData (bsp); list = GetCitListsForSeqEntry (sep); cit_list = list; } min_pub = GetMinPubForCitationNumber (bsp, new_number, cit_list); if (min_pub == NULL) { list = PubSerialNumberListFree (list); return FALSE; } if (existing_text == ExistingTextOption_replace_old) { sfp->cit = PubSetFree (sfp->cit); sfp->cit = ValNodeNew (NULL); sfp->cit->choice = 1; new_list = NULL; ValNodeLink (&new_list, AsnIoMemCopy (min_pub->data.ptrvalue, (AsnReadFunc) PubAsnRead, (AsnWriteFunc) PubAsnWrite)); sfp->cit->data.ptrvalue = new_list; rval = TRUE; } else { for (vnp = sfp->cit->data.ptrvalue; vnp != NULL && !already_present; vnp = vnp->next) { serial_number = GetCitationNumberForMinPub (bsp, vnp, cit_list); if (serial_number == new_number) { already_present = TRUE; } } if (!already_present) { new_list = sfp->cit->data.ptrvalue; ValNodeLink (&new_list, AsnIoMemCopy (min_pub->data.ptrvalue, (AsnReadFunc) PubAsnRead, (AsnWriteFunc) PubAsnWrite)); sfp->cit->data.ptrvalue = new_list; rval = TRUE; } } list = PubSerialNumberListFree (list); return rval; } static Boolean SetFeatureLocation (SeqFeatPtr sfp, CharPtr value, Uint2 existing_text) { SeqLocPtr loc; Boolean locmap; int num_errs; Boolean sitesmap; SeqIdPtr sip; Boolean rval = FALSE; sip = SeqLocId (sfp->location); loc = Nlm_gbparseint (value, &locmap, &sitesmap, &num_errs, sip); if (loc != NULL) { switch (existing_text) { case ExistingTextOption_replace_old: sfp->location = SeqLocFree (sfp->location); sfp->location = loc; loc = NULL; rval = TRUE; break; case ExistingTextOption_append_semi: case ExistingTextOption_append_space: case ExistingTextOption_append_colon: case ExistingTextOption_append_comma: case ExistingTextOption_append_none: SeqLocAdd (&(sfp->location), loc, FALSE, FALSE); SeqLocPackage (sfp->location); loc = NULL; rval = TRUE; break; case ExistingTextOption_prefix_semi: case ExistingTextOption_prefix_space: case ExistingTextOption_prefix_colon: case ExistingTextOption_prefix_comma: case ExistingTextOption_prefix_none: SeqLocAdd (&loc, sfp->location, FALSE, FALSE); SeqLocPackage (loc); sfp->location = loc; loc = NULL; rval = TRUE; break; case ExistingTextOption_leave_old: if (sfp->location == NULL) { sfp->location = loc; loc = NULL; rval = TRUE; } break; } } loc = SeqLocFree (loc); return rval; } static Boolean SetGeneticCode (CdRegionPtr crp, Int4 value) { ValNodePtr vnp; if (crp == NULL) { return FALSE; } if (crp->genetic_code != NULL) { crp->genetic_code = GeneticCodeFree (crp->genetic_code); } crp->genetic_code = GeneticCodeNew (); vnp = ValNodeNew (NULL); vnp->choice = 2; vnp->data.intvalue = value; crp->genetic_code->data.ptrvalue = vnp; return TRUE; } static Boolean SetQualOnFeatureAnyType (SeqFeatPtr sfp, ValNodePtr field, StringConstraintPtr scp, CharPtr value, Uint2 existing_text, BatchExtraPtr batch_extra) { Boolean rval = FALSE; Boolean matched_term = FALSE; GeneRefPtr grp = NULL; ProtRefPtr prp = NULL; CharPtr tmp; CdRegionPtr crp; SeqFeatPtr gene = NULL; SeqMgrFeatContext fcontext; if (sfp == NULL || field == NULL) { return FALSE; } // for gene fields if (sfp->idx.subtype == FEATDEF_GENE) { grp = sfp->data.value.ptrvalue; gene = sfp; } else { grp = SeqMgrGetGeneXref (sfp); if (grp == NULL) { gene = SeqMgrGetOverlappingGene (sfp->location, &fcontext); if (gene != NULL) { grp = gene->data.value.ptrvalue; } } } // for protein fields prp = GetProtRefForFeature (sfp); /* fields common to all features */ /* note, also known as comment */ if ((field->choice == FeatQualChoice_legal_qual && field->data.intvalue == Feat_qual_legal_note) || (field->choice == FeatQualChoice_illegal_qual && DoesStringMatchConstraint ("note", field->data.ptrvalue))) { if (DoesStringMatchConstraint(sfp->comment, scp)) { rval = SetStringValue ( &(sfp->comment), value, existing_text); } matched_term = TRUE; } /* db-xref */ if ((field->choice == FeatQualChoice_legal_qual && field->data.intvalue == Feat_qual_legal_db_xref) || (field->choice == FeatQualChoice_illegal_qual && DoesStringMatchConstraint ("db_xref", field->data.ptrvalue))) { rval = SetDbxrefString (&(sfp->dbxref), scp, value, existing_text); } /* exception */ if ((field->choice == FeatQualChoice_legal_qual && field->data.intvalue == Feat_qual_legal_exception) || (field->choice == FeatQualChoice_illegal_qual && DoesStringMatchConstraint ("exception", field->data.ptrvalue))) { if (DoesStringMatchConstraint(sfp->except_text, scp)) { rval = SetStringValue ( &(sfp->except_text), value, existing_text); if (StringHasNoText(sfp->except_text)) { sfp->excpt = FALSE; } else { sfp->excpt = TRUE; } } matched_term = TRUE; } /* evidence */ if ((field->choice == FeatQualChoice_legal_qual && field->data.intvalue == Feat_qual_legal_evidence) || (field->choice == FeatQualChoice_illegal_qual && DoesStringMatchConstraint ("evidence", field->data.ptrvalue))) { tmp = NULL; if (sfp->exp_ev == 1) { tmp = StringSave ("experimental"); } else if (sfp->exp_ev == 2) { tmp = StringSave ("non-experimental"); } if (DoesStringMatchConstraint(tmp, scp)) { rval = SetStringValue (&tmp, value, existing_text); if (rval) { rval = FALSE; if (StringICmp (tmp, "experimental") == 0) { sfp->exp_ev = 1; rval = TRUE; } else if (StringICmp (tmp, "non-experimental") == 0) { sfp->exp_ev = 2; rval = TRUE; } else if (StringHasNoText (tmp)) { sfp->exp_ev = 0; rval = TRUE; } } } tmp = MemFree (tmp); } /* citation */ if ((field->choice == FeatQualChoice_legal_qual && field->data.intvalue == Feat_qual_legal_citation) || (field->choice == FeatQualChoice_illegal_qual && DoesStringMatchConstraint ("citation", field->data.ptrvalue))) { rval = SetCitationTextOnFeature (sfp, scp, value, existing_text, batch_extra == NULL ? NULL : batch_extra->cit_list); } /* location */ if ((field->choice == FeatQualChoice_legal_qual && field->data.intvalue == Feat_qual_legal_location) || (field->choice == FeatQualChoice_illegal_qual && DoesStringMatchConstraint ("location", field->data.ptrvalue))) { rval = SetFeatureLocation (sfp, value, existing_text); return rval; } /* pseudo */ if ((field->choice == FeatQualChoice_legal_qual && field->data.intvalue == Feat_qual_legal_pseudo) || (field->choice == FeatQualChoice_illegal_qual && DoesStringMatchConstraint ("pseudogene", field->data.ptrvalue))) { if (gene != NULL) { if (!gene->pseudo) { gene->pseudo = TRUE; rval = TRUE; } if (StringICmp (value, "Unqualified") != 0) { rval |= SetStringInGBQualList (&(gene->qual), field, scp, value, existing_text); } return rval; } else { if (!sfp->pseudo) { sfp->pseudo = TRUE; rval = TRUE; } if (StringICmp (value, "Unqualified") != 0) { rval |= SetStringInGBQualList (&(sfp->qual), field, scp, value, existing_text); } return rval; } } /* fields common to some features */ /* product */ if ((field->choice == FeatQualChoice_legal_qual && field->data.intvalue == Feat_qual_legal_product) || (field->choice == FeatQualChoice_illegal_qual && DoesStringMatchConstraint ("product", field->data.ptrvalue))) { if (prp != NULL) { rval = SetStringsInValNodeStringList (&(prp->name), scp, value, existing_text); } else if (sfp->data.choice == SEQFEAT_RNA) { rval = SetRNAProductString (sfp, scp, value, existing_text); } matched_term = TRUE; } /* Gene fields */ /* locus */ if ((field->choice == FeatQualChoice_legal_qual && field->data.intvalue == Feat_qual_legal_gene) || (field->choice == FeatQualChoice_illegal_qual && DoesStringMatchConstraint ("locus", field->data.ptrvalue))) { if (grp == NULL && IsStringConstraintEmpty (scp)) { /* create new gene feature */ gene = CreateGeneForFeature (sfp); if (gene != NULL) { grp = (GeneRefPtr) gene->data.value.ptrvalue; } } if (grp != NULL && DoesStringMatchConstraint(grp->locus, scp)) { rval = SetStringValue (&(grp->locus), value, existing_text); } matched_term = TRUE; } /* description */ if (((field->choice == FeatQualChoice_legal_qual && field->data.intvalue == Feat_qual_legal_gene_description) || (field->choice == FeatQualChoice_illegal_qual && DoesStringMatchConstraint ("description", field->data.ptrvalue))) && grp != NULL) { if (DoesStringMatchConstraint(grp->desc, scp)) { rval = SetStringValue (&(grp->desc), value, existing_text); } matched_term = TRUE; } /* maploc */ if (((field->choice == FeatQualChoice_legal_qual && field->data.intvalue == Feat_qual_legal_map) || (field->choice == FeatQualChoice_illegal_qual && DoesStringMatchConstraint ("map", field->data.ptrvalue))) && grp != NULL) { if (DoesStringMatchConstraint(grp->maploc, scp)) { rval = SetStringValue (&(grp->maploc), value, existing_text); } matched_term = TRUE; } /* allele */ if (((field->choice == FeatQualChoice_legal_qual && field->data.intvalue == Feat_qual_legal_allele) || (field->choice == FeatQualChoice_illegal_qual && DoesStringMatchConstraint ("allele", field->data.ptrvalue))) && grp != NULL && sfp->idx.subtype != FEATDEF_variation) { if (DoesStringMatchConstraint(grp->allele, scp)) { rval = SetStringValue (&(grp->allele), value, existing_text); } matched_term = TRUE; } /* locus_tag */ if (((field->choice == FeatQualChoice_legal_qual && field->data.intvalue == Feat_qual_legal_locus_tag) || (field->choice == FeatQualChoice_illegal_qual && DoesStringMatchConstraint ("locus_tag", field->data.ptrvalue))) && grp != NULL) { if (DoesStringMatchConstraint(grp->locus_tag, scp)) { rval = SetStringValue (&(grp->locus_tag), value, existing_text); } matched_term = TRUE; } /* synonym */ if (((field->choice == FeatQualChoice_legal_qual && field->data.intvalue == Feat_qual_legal_synonym) || (field->choice == FeatQualChoice_illegal_qual && DoesStringMatchConstraint ("synonym", field->data.ptrvalue))) && grp != NULL) { rval = SetStringsInValNodeStringList (&(grp->syn), scp, value, existing_text); matched_term = TRUE; } /* gene comment */ if (field->choice == FeatQualChoice_legal_qual && field->data.intvalue == Feat_qual_legal_gene_comment && gene != NULL) { rval = SetStringValue (&(gene->comment), value, existing_text); matched_term = TRUE; } /* protein fields */ /* note - product handled above */ /* description */ if (((field->choice == FeatQualChoice_legal_qual && field->data.intvalue == Feat_qual_legal_description) || (field->choice == FeatQualChoice_illegal_qual && DoesStringMatchConstraint ("description", field->data.ptrvalue))) && prp != NULL) { if (DoesStringMatchConstraint(prp->desc, scp)) { rval = SetStringValue (&(prp->desc), value, existing_text); } } /* ec_number */ if (((field->choice == FeatQualChoice_legal_qual && field->data.intvalue == Feat_qual_legal_ec_number) || (field->choice == FeatQualChoice_illegal_qual && DoesStringMatchConstraint ("ec_number", field->data.ptrvalue))) && prp != NULL) { rval = SetStringsInValNodeStringList (&(prp->ec), scp, value, existing_text); } /* activity */ if (((field->choice == FeatQualChoice_legal_qual && field->data.intvalue == Feat_qual_legal_activity) || (field->choice == FeatQualChoice_illegal_qual && DoesStringMatchConstraint ("activity", field->data.ptrvalue))) && prp != NULL) { rval = SetStringsInValNodeStringList (&(prp->activity), scp, value, existing_text); } /* special coding region fields */ /* codon start */ /* note - if product existed before, it will be retranslated */ if (field->choice == FeatQualChoice_legal_qual && field->data.intvalue == Feat_qual_legal_codon_start && sfp->data.choice == SEQFEAT_CDREGION) { crp = (CdRegionPtr) sfp->data.value.ptrvalue; if (StringICmp (value, "best") == 0) { rval = ChooseBestFrame (sfp); } else if (StringICmp (value, "match") == 0) { rval = ChooseMatchingFrame (sfp); } else if (StringCmp (value, "1") == 0) { crp->frame = 1; rval = TRUE; } else if (StringCmp (value, "2") == 0) { crp->frame = 2; rval = TRUE; } else if (StringCmp (value, "3") == 0) { crp->frame = 3; rval = TRUE; } if (rval && sfp->product != NULL) { AdjustProteinSequenceForReadingFrame (sfp); } matched_term = TRUE; } /* transl_except */ if (field->choice == FeatQualChoice_legal_qual && field->data.intvalue == Feat_qual_legal_transl_except && sfp->data.choice == SEQFEAT_CDREGION) { crp = (CdRegionPtr) sfp->data.value.ptrvalue; if (crp->code_break != NULL && existing_text == ExistingTextOption_leave_old) { matched_term = TRUE; } else { if (crp->code_break != NULL && existing_text == ExistingTextOption_replace_old) { RemoveCodeBreak (crp); } rval = ParseCodeBreak (sfp, value, 0); } } /* transl_table */ if (field->choice == FeatQualChoice_legal_qual && field->data.intvalue == Feat_qual_legal_transl_table && sfp->data.choice == SEQFEAT_CDREGION && (crp = (CdRegionPtr) sfp->data.value.ptrvalue) != NULL && StringIsAllDigits (value)) { if (crp->genetic_code != NULL && existing_text == ExistingTextOption_leave_old) { matched_term = TRUE; } else { rval = SetGeneticCode (crp, atoi (value)); } } /* special RNA fields */ /* tRNA fields */ if (sfp->idx.subtype == FEATDEF_tRNA && ((field->choice == FeatQualChoice_legal_qual && field->data.intvalue == Feat_qual_legal_codons_recognized) || (field->choice == FeatQualChoice_illegal_qual && DoesStringMatchConstraint ("codon-recognized", field->data.ptrvalue)))) { rval = SettRNACodons_Recognized (sfp, scp, value, existing_text); } if (sfp->idx.subtype == FEATDEF_tRNA && ((field->choice == FeatQualChoice_legal_qual && field->data.intvalue == Feat_qual_legal_anticodon) || (field->choice == FeatQualChoice_illegal_qual && DoesStringMatchConstraint ("anticodon", field->data.ptrvalue)))) { rval = SetAnticodon (sfp, scp, value, existing_text); } if (sfp->idx.subtype == FEATDEF_tmRNA && ((field->choice == FeatQualChoice_legal_qual && field->data.intvalue == Feat_qual_legal_tag_peptide) || (field->choice == FeatQualChoice_illegal_qual && DoesStringMatchConstraint ("tag-peptide", field->data.ptrvalue)))) { rval = SettmRNATagPeptide (sfp->data.value.ptrvalue, scp, value, existing_text); } if (sfp->idx.subtype == FEATDEF_ncRNA && ((field->choice == FeatQualChoice_legal_qual && field->data.intvalue == Feat_qual_legal_ncRNA_class) || (field->choice == FeatQualChoice_illegal_qual && DoesStringMatchConstraint ("ncRNA_class", field->data.ptrvalue)))) { rval = SetncRNAClass (sfp->data.value.ptrvalue, scp, value, existing_text); matched_term = TRUE; } /* special region qualifiers */ if (sfp->idx.subtype == FEATDEF_REGION && field->choice == FeatQualChoice_legal_qual && field->data.intvalue == Feat_qual_legal_name && DoesStringMatchConstraint(sfp->data.value.ptrvalue, scp)) { rval = SetStringValue ((CharPtr PNTR)(&(sfp->data.value.ptrvalue)), value, existing_text); matched_term = TRUE; } /* actual GenBank qualifiers */ if (!rval && !matched_term) { rval = SetStringInGBQualList (&(sfp->qual), field, scp, value, existing_text); } return rval; } static Boolean SetQualOnFeatureEx (SeqFeatPtr sfp, FeatureFieldPtr field, StringConstraintPtr scp, CharPtr value, Uint2 existing_text, BatchExtraPtr batch_extra) { if (sfp == NULL || field == NULL || field->field == NULL) { return FALSE; } if (field->type != Macro_feature_type_any && sfp->idx.subtype != GetFeatdefFromFeatureType (field->type)) { return FALSE; } return SetQualOnFeatureAnyType (sfp, field->field, scp, value, existing_text, batch_extra); } NLM_EXTERN Boolean SetQualOnFeature (SeqFeatPtr sfp, FeatureFieldPtr field, StringConstraintPtr scp, CharPtr value, Uint2 existing_text) { return SetQualOnFeatureEx (sfp, field, scp, value, existing_text, NULL); } NLM_EXTERN CharPtr GetRNAQualFromFeature (SeqFeatPtr sfp, RnaQualPtr rq, StringConstraintPtr scp, BatchExtraPtr batch_extra) { ValNode vn; if (sfp == NULL || rq == NULL || !DoesFeatureMatchRnaType(sfp, rq->type)) { return NULL; } MemSet (&vn, 0, sizeof (ValNode)); vn.choice = FeatQualChoice_legal_qual; vn.data.intvalue = GetFeatQualForRnaField (rq->field); return GetQualFromFeatureAnyType (sfp, &vn, scp, batch_extra); } NLM_EXTERN Boolean RemoveRNAQualFromFeature (SeqFeatPtr sfp, RnaQualPtr rq, StringConstraintPtr scp) { ValNode vn; if (sfp == NULL || rq == NULL || !DoesFeatureMatchRnaType(sfp, rq->type)) { return FALSE; } MemSet (&vn, 0, sizeof (ValNode)); vn.choice = FeatQualChoice_legal_qual; vn.data.intvalue = GetFeatQualForRnaField (rq->field); return RemoveQualFromFeatureAnyType (sfp, &vn, scp); } NLM_EXTERN Boolean SetRNAQualOnFeature (SeqFeatPtr sfp, RnaQualPtr rq, StringConstraintPtr scp, CharPtr value, Uint2 existing_text) { ValNode vn; if (sfp == NULL || rq == NULL || !DoesFeatureMatchRnaType(sfp, rq->type)) { return FALSE; } MemSet (&vn, 0, sizeof (ValNode)); vn.choice = FeatQualChoice_legal_qual; vn.data.intvalue = GetFeatQualForRnaField (rq->field); return SetQualOnFeatureAnyType (sfp, &vn, scp, value, existing_text, NULL); } static int LIBCALLBACK SortVnpByStringLenShortToLong (VoidPtr ptr1, VoidPtr ptr2) { ValNodePtr vnp1; ValNodePtr vnp2; Int4 len1, len2; if (ptr1 != NULL && ptr2 != NULL) { vnp1 = *((ValNodePtr PNTR) ptr1); vnp2 = *((ValNodePtr PNTR) ptr2); if (vnp1 != NULL && vnp2 != NULL) { len1 = StringLen (vnp1->data.ptrvalue); len2 = StringLen (vnp2->data.ptrvalue); if (len1 < len2) { return -1; } else if (len1 > len2) { return 1; } else { return StringCmp (vnp1->data.ptrvalue, vnp2->data.ptrvalue); } } } return 0; } static int LIBCALLBACK SortVnpByStringLenLongToShort (VoidPtr ptr1, VoidPtr ptr2) { ValNodePtr vnp1; ValNodePtr vnp2; Int4 len1, len2; if (ptr1 != NULL && ptr2 != NULL) { vnp1 = *((ValNodePtr PNTR) ptr1); vnp2 = *((ValNodePtr PNTR) ptr2); if (vnp1 != NULL && vnp2 != NULL) { len1 = StringLen (vnp1->data.ptrvalue); len2 = StringLen (vnp2->data.ptrvalue); if (len1 < len2) { return 1; } else if (len1 > len2) { return -1; } else { return StringCmp (vnp1->data.ptrvalue, vnp2->data.ptrvalue); } } } return 0; } static Boolean SortProtNames (SeqFeatPtr sfp, Uint2 order) { ProtRefPtr prp; Boolean rval = FALSE; if (sfp == NULL || sfp->data.choice != SEQFEAT_PROT || (prp = (ProtRefPtr) sfp->data.value.ptrvalue) == NULL || prp->name == NULL || prp->name->next == NULL) { return FALSE; } switch (order) { case Sort_order_short_to_long: if (!ValNodeIsSorted(prp->name, SortVnpByStringLenShortToLong)) { prp->name = ValNodeSort (prp->name, SortVnpByStringLenShortToLong); rval = TRUE; } break; case Sort_order_long_to_short: if (!ValNodeIsSorted(prp->name, SortVnpByStringLenLongToShort)) { prp->name = ValNodeSort (prp->name, SortVnpByStringLenLongToShort); rval = TRUE; } break; case Sort_order_alphabetical: if (!ValNodeIsSorted(prp->name, SortVnpByStringCS)) { prp->name = ValNodeSort (prp->name, SortVnpByStringCS); rval = TRUE; } break; } return rval; } NLM_EXTERN Boolean SortQualOnFeature (SeqFeatPtr sfp, FeatureFieldPtr field, Uint2 order) { SeqFeatPtr prot = NULL; BioseqPtr protbsp; SeqMgrFeatContext context; Boolean rval = FALSE; if (sfp == NULL || field == NULL) { return FALSE; } if (field->type == Macro_feature_type_cds || field->type == Macro_feature_type_prot) { if (field->field->choice == FeatQualChoice_legal_qual && field->field->data.intvalue == Feat_qual_legal_product) { if (sfp->data.choice == SEQFEAT_CDREGION) { protbsp = BioseqFindFromSeqLoc (sfp->product); prot = SeqMgrGetNextFeature (protbsp, NULL, 0, FEATDEF_PROT, &context); } else if (sfp->idx.subtype == FEATDEF_PROT) { prot = sfp; } rval = SortProtNames (prot, order); } } return rval; } static void AddLegalFeatureField (ValNodePtr PNTR list, Uint2 featdef, Uint2 qual) { FeatureFieldPtr ffield; Int4 gbqual, num_subfields, i, legal_qual; if (list == NULL) return; ffield = FeatureFieldNew (); ffield->type = GetFeatureTypeFromFeatdef (featdef); ValNodeAddInt (&(ffield->field), FeatQualChoice_legal_qual, qual); ValNodeAddPointer (list, FieldType_feature_field, ffield); /* also add subfields */ gbqual = GetGBQualFromFeatQual (qual, NULL); num_subfields = NumGbQualSubfields (gbqual); for (i = 1; i <= num_subfields; i++) { legal_qual = GetFeatQualByGBQualAndSubfield (gbqual, i); if (legal_qual > -1) { ffield = FeatureFieldNew (); ffield->type = GetFeatureTypeFromFeatdef (featdef); ValNodeAddInt (&(ffield->field), FeatQualChoice_legal_qual, legal_qual); ValNodeAddPointer (list, FieldType_feature_field, ffield); } } } static ValNodePtr GetFieldListFromFeature (SeqFeatPtr sfp) { GeneRefPtr grp = NULL; SeqFeatPtr gene = NULL; ProtRefPtr prp = NULL; ValNodePtr list = NULL; GBQualPtr qual; Int4 qual_num; if (sfp == NULL) { return NULL; } // for gene fields GetGeneInfoForFeature (sfp, &grp, &gene); /* add gene-specific fields */ if (grp != NULL) { if (!StringHasNoText (grp->locus)) { AddLegalFeatureField (&list, sfp->idx.subtype, Feat_qual_legal_gene); } if (!StringHasNoText (grp->allele)) { AddLegalFeatureField (&list, sfp->idx.subtype, Feat_qual_legal_allele); } if (!StringHasNoText (grp->desc)) { AddLegalFeatureField (&list, sfp->idx.subtype, Feat_qual_legal_gene_description); } if (!StringHasNoText (grp->maploc)) { AddLegalFeatureField (&list, sfp->idx.subtype, Feat_qual_legal_map); } if (!StringHasNoText (grp->locus_tag)) { AddLegalFeatureField (&list, sfp->idx.subtype, Feat_qual_legal_locus_tag); } if (grp->syn != NULL) { AddLegalFeatureField (&list, sfp->idx.subtype, Feat_qual_legal_synonym); } } /* add protein-specific fields */ prp = GetProtRefForFeature (sfp); if (prp != NULL) { /* product name */ if (prp->name != NULL) { AddLegalFeatureField (&list, sfp->idx.subtype, Feat_qual_legal_product); } /* protein description */ if (!StringHasNoText (prp->desc)) { AddLegalFeatureField (&list, sfp->idx.subtype, Feat_qual_legal_description); } /* ec_number */ if (prp->ec != NULL) { AddLegalFeatureField (&list, sfp->idx.subtype, Feat_qual_legal_ec_number); } /* activity */ if (prp->activity != NULL) { AddLegalFeatureField (&list, sfp->idx.subtype, Feat_qual_legal_activity); } } /* fields common to all features */ /* note, also known as comment */ if (!StringHasNoText (sfp->comment)) { AddLegalFeatureField (&list, sfp->idx.subtype, Feat_qual_legal_note); } /* db-xref */ if (sfp->dbxref != NULL) { AddLegalFeatureField (&list, sfp->idx.subtype, Feat_qual_legal_db_xref); } /* exception */ if (!StringHasNoText (sfp->except_text)) { AddLegalFeatureField (&list, sfp->idx.subtype, Feat_qual_legal_exception); } /* evidence */ if (sfp->exp_ev > 0) { AddLegalFeatureField (&list, sfp->idx.subtype, Feat_qual_legal_evidence); } /* citation */ if (sfp->cit != NULL) { AddLegalFeatureField (&list, sfp->idx.subtype, Feat_qual_legal_citation); } /* RNA specific */ if (sfp->data.choice == SEQFEAT_RNA) { AddLegalFeatureField (&list, sfp->idx.subtype, Feat_qual_legal_product); } /* coding regions */ if (sfp->data.choice == SEQFEAT_CDREGION) { AddLegalFeatureField (&list, sfp->idx.subtype, Feat_qual_legal_codon_start); } /* regions */ if (sfp->idx.subtype == FEATDEF_REGION) { AddLegalFeatureField (&list, sfp->idx.subtype, Feat_qual_legal_name); } /* actual GenBank qualifiers */ for (qual = sfp->qual; qual != NULL; qual = qual->next) { qual_num = GetFeatQualByName (qual->qual); if (qual_num > -1) { AddLegalFeatureField (&list, sfp->idx.subtype, qual_num); } } return list; } /* Functions for handling new PCR primer sets: * GetPrimerValueFromBioSource * GetMultiplePrimerValuesFromBioSource * RemovePrimerValueFromBioSource * SetPrimerValueInBioSource */ static CharPtr GetPrimerValueFromBioSource (BioSourcePtr biop, Int4 field, StringConstraintPtr constraint) { PCRReactionSetPtr ps; PCRPrimerPtr pp; CharPtr str = NULL; if (biop == NULL) { return NULL; } ps = biop->pcr_primers; while (ps != NULL && str == NULL) { switch (field) { case Source_qual_fwd_primer_name: pp = ps->forward; while (pp != NULL && str == NULL) { if (!StringHasNoText (pp->name) && DoesStringMatchConstraint (pp->name, constraint)) { str = StringSave (pp->name); } pp = pp->next; } break; case Source_qual_fwd_primer_seq: pp = ps->forward; while (pp != NULL && str == NULL) { if (!StringHasNoText (pp->seq) && DoesStringMatchConstraint (pp->seq, constraint)) { str = StringSave (pp->seq); } pp = pp->next; } break; case Source_qual_rev_primer_name: pp = ps->reverse; while (pp != NULL && str == NULL) { if (!StringHasNoText (pp->name) && DoesStringMatchConstraint (pp->name, constraint)) { str = StringSave (pp->name); } pp = pp->next; } break; case Source_qual_rev_primer_seq: pp = ps->reverse; while (pp != NULL && str == NULL) { if (!StringHasNoText (pp->seq) && DoesStringMatchConstraint (pp->seq, constraint)) { str = StringSave (pp->seq); } pp = pp->next; } break; } ps = ps->next; } return str; } static ValNodePtr GetMultiplePrimerValuesFromBioSource (BioSourcePtr biop, Int4 field, StringConstraintPtr constraint) { PCRReactionSetPtr ps; PCRPrimerPtr pp; ValNodePtr list = NULL; if (biop == NULL) { return NULL; } ps = biop->pcr_primers; while (ps != NULL) { switch (field) { case Source_qual_fwd_primer_name: pp = ps->forward; while (pp != NULL) { if (!StringHasNoText (pp->name) && DoesStringMatchConstraint (pp->name, constraint)) { ValNodeAddPointer (&list, 0, StringSave (pp->name)); } pp = pp->next; } break; case Source_qual_fwd_primer_seq: pp = ps->forward; while (pp != NULL) { if (!StringHasNoText (pp->seq) && DoesStringMatchConstraint (pp->seq, constraint)) { ValNodeAddPointer (&list, 0, StringSave (pp->seq)); } pp = pp->next; } break; case Source_qual_rev_primer_name: pp = ps->reverse; while (pp != NULL) { if (!StringHasNoText (pp->name) && DoesStringMatchConstraint (pp->name, constraint)) { ValNodeAddPointer (&list, 0, StringSave (pp->name)); } pp = pp->next; } break; case Source_qual_rev_primer_seq: pp = ps->reverse; while (pp != NULL) { if (!StringHasNoText (pp->seq) && DoesStringMatchConstraint (pp->seq, constraint)) { ValNodeAddPointer (&list, 0, StringSave (pp->seq)); } pp = pp->next; } break; } ps = ps->next; } return list; } static Boolean PCRPrimerIsEmpty (PCRPrimerPtr primer) { if (primer == NULL) { return TRUE; } else if (StringHasNoText (primer->name) && StringHasNoText (primer->seq)) { return TRUE; } else { return FALSE; } } static Boolean PCRPrimerListIsEmpty (PCRPrimerPtr primer) { Boolean rval = TRUE; while (primer != NULL && rval) { rval = PCRPrimerIsEmpty(primer); primer = primer->next; } return rval; } NLM_EXTERN Boolean PCRReactionIsEmpty (PCRReactionPtr pr) { if (pr == NULL) { return TRUE; } else if (PCRPrimerListIsEmpty(pr->forward) && PCRPrimerListIsEmpty(pr->reverse)) { return TRUE; } else { return FALSE; } } static Boolean RemoveNameFromPrimerList (PCRPrimerPtr PNTR pp_list, StringConstraintPtr constraint) { PCRPrimerPtr pp, prev_pp = NULL, next_pp; Boolean rval = FALSE; if (pp_list == NULL || (pp = (PCRPrimerPtr) *pp_list) == NULL) { return FALSE; } while (pp != NULL) { if (!StringHasNoText (pp->name) && DoesStringMatchConstraint (pp->name, constraint)) { pp->name = MemFree (pp->name); rval = TRUE; } next_pp = pp->next; if (PCRPrimerIsEmpty(pp)) { pp->next = NULL; pp = PCRPrimerFree (pp); if (prev_pp == NULL) { *pp_list = next_pp; } else { prev_pp->next = next_pp; } } else { prev_pp = pp; } pp = next_pp; } return rval; } static Boolean RemoveSeqFromPrimerList (PCRPrimerPtr PNTR pp_list, StringConstraintPtr constraint) { PCRPrimerPtr pp, prev_pp = NULL, next_pp; Boolean rval = FALSE; if (pp_list == NULL || (pp = (PCRPrimerPtr) *pp_list) == NULL) { return FALSE; } while (pp != NULL) { if (!StringHasNoText (pp->seq) && DoesStringMatchConstraint (pp->seq, constraint)) { pp->seq = MemFree (pp->seq); rval = TRUE; } next_pp = pp->next; if (PCRPrimerIsEmpty(pp)) { pp->next = NULL; pp = PCRPrimerFree (pp); if (prev_pp == NULL) { *pp_list = next_pp; } else { prev_pp->next = next_pp; } } else { prev_pp = pp; } pp = next_pp; } return rval; } static Boolean RemovePrimerValueFromBioSource (BioSourcePtr biop, Int4 field, StringConstraintPtr constraint) { PCRReactionSetPtr ps, prev_ps = NULL, next_ps; Boolean rval = FALSE; if (biop == NULL) { return FALSE; } ps = biop->pcr_primers; while (ps != NULL) { switch (field) { case Source_qual_fwd_primer_name: rval |= RemoveNameFromPrimerList (&(ps->forward), constraint); break; case Source_qual_fwd_primer_seq: rval |= RemoveSeqFromPrimerList (&(ps->forward), constraint); break; case Source_qual_rev_primer_name: rval |= RemoveNameFromPrimerList (&(ps->reverse), constraint); break; case Source_qual_rev_primer_seq: rval |= RemoveSeqFromPrimerList (&(ps->reverse), constraint); break; } next_ps = ps->next; if (PCRReactionIsEmpty(ps)) { ps->next = NULL; ps = PCRReactionFree (ps); if (prev_ps == NULL) { biop->pcr_primers = next_ps; } else { prev_ps->next = next_ps; } } else { prev_ps = ps; } ps = next_ps; } return rval; } static Boolean IsCompoundPrimerValue (CharPtr value) { Int4 len; if (StringHasNoText (value)) { return FALSE; } else if (StringChr (value, ':') != NULL || StringChr (value, ',') != NULL) { return TRUE; } len = StringLen (value); if (*value == '(' && value[len - 1] == ')') { return TRUE; } else { return FALSE; } } static Boolean HasMultiplePrimerSets (CharPtr value) { if (StringChr (value, ',')) { return TRUE; } else { return FALSE; } } static ValNodePtr GetPrimerSetComponents (CharPtr value) { CharPtr cp, last_cp, tmp, src, dst; ValNodePtr list = NULL; last_cp = value; for (cp = StringChr (value, ','); cp != NULL; cp = StringChr (last_cp, ',')) { tmp = (CharPtr) MemNew (sizeof (Char) * (cp - last_cp + 1)); src = last_cp; dst = tmp; while (src < cp) { if (*src != '(' && *src != ')') { *dst = *src; dst++; } src++; } *dst = 0; ValNodeAddPointer (&list, 0, tmp); last_cp = cp + 1; } if (*last_cp != 0) { tmp = (CharPtr) MemNew (sizeof (Char) * (StringLen (last_cp) + 1)); src = last_cp; dst = tmp; while (*src != 0) { if (*src != '(' && *src != ')') { *dst = *src; dst++; } src++; } *dst = 0; ValNodeAddPointer (&list, 0, tmp); } return list; } static ValNodePtr GetPrimerElements (CharPtr value) { CharPtr cp, last_cp, tmp; ValNodePtr list = NULL; Int4 len; last_cp = value; for (cp = StringChr (value, ':'); cp != NULL; cp = StringChr (last_cp, ':')) { len = cp - last_cp + 1; tmp = (CharPtr) MemNew (sizeof (Char) * len); StringNCpy (tmp, last_cp, len - 1); tmp[len - 1] = 0; ValNodeAddPointer (&list, 0, tmp); last_cp = cp + 1; } if (*last_cp != 0) { ValNodeAddPointer (&list, 0, StringSave (last_cp)); } return list; } static Boolean OverwriteNameStringIntoPrimerList (CharPtr value, PCRPrimerPtr PNTR p_list) { ValNodePtr elements, vnp; PCRPrimerPtr pp, prev_pp = NULL; Boolean any_change = FALSE; if (p_list == NULL) { return FALSE; } elements = GetPrimerElements (value); for (vnp = elements, pp = *p_list; vnp != NULL; vnp = vnp->next) { if (pp == NULL) { pp = PCRPrimerNew (); if (prev_pp == NULL) { *p_list = pp; } else { prev_pp->next = pp; } any_change = TRUE; } if (StringCmp (pp->name, vnp->data.ptrvalue) != 0) { pp->name = MemFree (pp->name); pp->name = vnp->data.ptrvalue; vnp->data.ptrvalue = NULL; any_change = TRUE; } prev_pp = pp; pp = pp->next; } while (pp != NULL) { if (!StringHasNoText (pp->name)) { any_change = TRUE; } pp->name = MemFree (pp->name); pp = pp->next; } elements = ValNodeFreeData (elements); return any_change; } static Boolean OverwriteSeqStringIntoPrimerList (CharPtr value, PCRPrimerPtr PNTR p_list) { ValNodePtr elements, vnp; PCRPrimerPtr pp, prev_pp = NULL; Boolean any_change = FALSE; if (p_list == NULL) { return FALSE; } elements = GetPrimerElements (value); for (vnp = elements, pp = *p_list; vnp != NULL; vnp = vnp->next) { if (pp == NULL) { pp = PCRPrimerNew (); if (prev_pp == NULL) { *p_list = pp; } else { prev_pp->next = pp; } any_change = TRUE; } if (StringCmp (pp->seq, vnp->data.ptrvalue) != 0) { pp->seq = MemFree (pp->seq); pp->seq = vnp->data.ptrvalue; vnp->data.ptrvalue = NULL; any_change = TRUE; } prev_pp = pp; pp = pp->next; } while (pp != NULL) { if (!StringHasNoText (pp->seq)) { any_change = TRUE; } pp->seq = MemFree (pp->seq); pp = pp->next; } elements = ValNodeFreeData (elements); return any_change; } static Boolean OverwriteFwdNameStringIntoPCRReactionSet (CharPtr value, PCRReactionPtr PNTR p_list) { ValNodePtr sets, vnp; PCRReactionPtr ps, prev_ps = NULL; Boolean any_change = FALSE; if (p_list == NULL) { return FALSE; } sets = GetPrimerSetComponents (value); for (vnp = sets, ps = *p_list; vnp != NULL; vnp = vnp->next) { if (ps == NULL) { ps = PCRReactionNew (); if (prev_ps == NULL) { *p_list = ps; } else { prev_ps->next = ps; } any_change = TRUE; } any_change |= OverwriteNameStringIntoPrimerList (vnp->data.ptrvalue, &(ps->forward)); prev_ps = ps; ps = ps->next; } while (ps != NULL) { any_change |= RemoveNameFromPrimerList (&(ps->forward), NULL); ps = ps->next; } sets = ValNodeFreeData (sets); return any_change; } static Boolean OverwriteRevNameStringIntoPCRReactionSet (CharPtr value, PCRReactionPtr PNTR p_list) { ValNodePtr sets, vnp; PCRReactionPtr ps, prev_ps = NULL; Boolean any_change = FALSE; if (p_list == NULL) { return FALSE; } sets = GetPrimerSetComponents (value); for (vnp = sets, ps = *p_list; vnp != NULL; vnp = vnp->next) { if (ps == NULL) { ps = PCRReactionNew (); if (prev_ps == NULL) { *p_list = ps; } else { prev_ps->next = ps; } any_change = TRUE; } any_change |= OverwriteNameStringIntoPrimerList (vnp->data.ptrvalue, &(ps->reverse)); prev_ps = ps; ps = ps->next; } while (ps != NULL) { any_change |= RemoveNameFromPrimerList (&(ps->reverse), NULL); ps = ps->next; } sets = ValNodeFreeData (sets); return any_change; } static Boolean OverwriteFwdSeqStringIntoPCRReactionSet (CharPtr value, PCRReactionPtr PNTR p_list) { ValNodePtr sets, vnp; PCRReactionPtr ps, prev_ps = NULL; Boolean any_change = FALSE; if (p_list == NULL) { return FALSE; } sets = GetPrimerSetComponents (value); for (vnp = sets, ps = *p_list; vnp != NULL; vnp = vnp->next) { if (ps == NULL) { ps = PCRReactionNew (); if (prev_ps == NULL) { *p_list = ps; } else { prev_ps->next = ps; } any_change = TRUE; } any_change |= OverwriteSeqStringIntoPrimerList (vnp->data.ptrvalue, &(ps->forward)); prev_ps = ps; ps = ps->next; } while (ps != NULL) { any_change |= RemoveSeqFromPrimerList (&(ps->forward), NULL); ps = ps->next; } sets = ValNodeFreeData (sets); return any_change; } static Boolean OverwriteRevSeqStringIntoPCRReactionSet (CharPtr value, PCRReactionPtr PNTR p_list) { ValNodePtr sets, vnp; PCRReactionPtr ps, prev_ps = NULL; Boolean any_change = FALSE; if (p_list == NULL) { return FALSE; } sets = GetPrimerSetComponents (value); for (vnp = sets, ps = *p_list; vnp != NULL; vnp = vnp->next) { if (ps == NULL) { ps = PCRReactionNew (); if (prev_ps == NULL) { *p_list = ps; } else { prev_ps->next = ps; } any_change = TRUE; } any_change |= OverwriteSeqStringIntoPrimerList (vnp->data.ptrvalue, &(ps->reverse)); prev_ps = ps; ps = ps->next; } while (ps != NULL) { any_change |= RemoveSeqFromPrimerList (&(ps->reverse), NULL); ps = ps->next; } sets = ValNodeFreeData (sets); return any_change; } static Boolean SetNameInPrimerList (PCRPrimerPtr PNTR pp_list, StringConstraintPtr constraint, CharPtr value, Uint2 existing_text) { PCRPrimerPtr pp, prev_pp = NULL; Boolean rval = FALSE; if (pp_list == NULL) { return FALSE; } pp = *pp_list; while (pp != NULL) { if (DoesStringMatchConstraint (pp->name, constraint)) { rval = SetStringValue (&(pp->name), value, existing_text); } prev_pp = pp; pp = pp->next; } return rval; } static Boolean SetSeqInPrimerList (PCRPrimerPtr PNTR pp_list, StringConstraintPtr constraint, CharPtr value, Uint2 existing_text) { PCRPrimerPtr pp, prev_pp = NULL; Boolean rval = FALSE; if (pp_list == NULL) { return FALSE; } pp = *pp_list; while (pp != NULL) { if (DoesStringMatchConstraint (pp->seq, constraint)) { rval = SetStringValue (&(pp->seq), value, existing_text); } prev_pp = pp; pp = pp->next; } return rval; } static Boolean SetPrimerValueInBioSource(BioSourcePtr biop, Int4 field, StringConstraintPtr constraint, CharPtr value, Uint2 existing_text) { PCRReactionSetPtr ps, prev_ps = NULL; Boolean rval = FALSE; ps = biop->pcr_primers; if (IsCompoundPrimerValue(value)) { if (existing_text != ExistingTextOption_leave_old || biop->pcr_primers == NULL) { switch (field) { case Source_qual_fwd_primer_name: rval = OverwriteFwdNameStringIntoPCRReactionSet (value, &(biop->pcr_primers)); break; case Source_qual_fwd_primer_seq: rval = OverwriteFwdSeqStringIntoPCRReactionSet (value, &(biop->pcr_primers)); break; case Source_qual_rev_primer_name: rval = OverwriteRevNameStringIntoPCRReactionSet (value, &(biop->pcr_primers)); break; case Source_qual_rev_primer_seq: rval = OverwriteRevSeqStringIntoPCRReactionSet (value, &(biop->pcr_primers)); break; } } } else { while (ps != NULL) { switch (field) { case Source_qual_fwd_primer_name: rval |= SetNameInPrimerList (&(ps->forward), constraint, value, existing_text); break; case Source_qual_fwd_primer_seq: rval |= SetSeqInPrimerList (&(ps->forward), constraint, value, existing_text); break; case Source_qual_rev_primer_name: rval |= SetNameInPrimerList (&(ps->reverse), constraint, value, existing_text); break; case Source_qual_rev_primer_seq: rval |= SetSeqInPrimerList (&(ps->reverse), constraint, value, existing_text); break; } prev_ps = ps; ps = ps->next; } if (IsStringConstraintEmpty (constraint) && !rval && (existing_text != ExistingTextOption_leave_old || biop->pcr_primers == NULL)) { if (prev_ps == NULL) { ps = PCRReactionSetNew (); biop->pcr_primers = ps; } else if ((PCRPrimerListIsEmpty(prev_ps->forward) && (field == Source_qual_fwd_primer_name || field == Source_qual_fwd_primer_seq)) || (PCRPrimerListIsEmpty(prev_ps->reverse) && (field == Source_qual_rev_primer_name || field == Source_qual_rev_primer_seq))) { /* add to previous set */ ps = prev_ps; } else { /* field is filled on previous, build a new one */ ps = PCRReactionSetNew (); prev_ps->next = ps; } switch (field) { case Source_qual_fwd_primer_name: ps->forward = PCRPrimerNew (); ps->forward->name = StringSave (value); rval = TRUE; break; case Source_qual_fwd_primer_seq: ps->forward = PCRPrimerNew (); ps->forward->seq = StringSave (value); rval = TRUE; break; case Source_qual_rev_primer_name: ps->reverse = PCRPrimerNew (); ps->reverse->name = StringSave (value); rval = TRUE; break; case Source_qual_rev_primer_seq: ps->reverse = PCRPrimerNew (); ps->reverse->seq = StringSave (value); rval = TRUE; break; } } } return rval; } /* functions for source qualifiers */ NLM_EXTERN Boolean HasTaxonomyID (BioSourcePtr biop) { ValNodePtr db; DbtagPtr dbt; Boolean rval = FALSE; if (biop == NULL || biop->org == NULL) { return FALSE; } for (db = biop->org->db; db != NULL && !rval; db = db->next) { dbt = (DbtagPtr) db->data.ptrvalue; if (dbt != NULL && dbt->db != NULL && StringICmp (dbt->db, "taxon") == 0) { rval = TRUE; } } return rval; } static CharPtr GetTaxonomyId (BioSourcePtr biop, StringConstraintPtr scp) { ValNodePtr db; DbtagPtr dbt; CharPtr str = NULL; Char buf[15]; if (biop == NULL || biop->org == NULL) { return NULL; } for (db = biop->org->db; db != NULL && str == NULL; db = db->next) { dbt = (DbtagPtr) db->data.ptrvalue; if (dbt != NULL && dbt->db != NULL && StringICmp (dbt->db, "taxon") == 0) { if (dbt->tag->id > 0) { sprintf (buf, "%d", dbt->tag->id); if (DoesStringMatchConstraint (buf, scp)) { str = StringSave (buf); } } else if (!StringHasNoText (dbt->tag->str) && DoesStringMatchConstraint (dbt->tag->str, scp)) { str = StringSave (dbt->tag->str); } } } return str; } static ValNodePtr GetMultipleTaxidStrings (ValNodePtr list, StringConstraintPtr scp) { ValNodePtr vnp, val_list = NULL; DbtagPtr dbt; CharPtr str = NULL; Char buf[15]; for (vnp = list; vnp != NULL; vnp = vnp->next) { dbt = (DbtagPtr) vnp->data.ptrvalue; if (dbt != NULL && StringCmp (dbt->db, "taxon") == 0) { if (dbt->tag->id > 0) { sprintf (buf, "%d", dbt->tag->id); if (DoesStringMatchConstraint (buf, scp)) { str = StringSave (buf); } } else if (!StringHasNoText (dbt->tag->str) && DoesStringMatchConstraint (dbt->tag->str, scp)) { str = StringSave (dbt->tag->str); } if (str != NULL) { ValNodeAddPointer (&val_list, 0, str); } } } return val_list; } static Boolean SetTaxonomyId (BioSourcePtr biop, StringConstraintPtr scp, CharPtr value, Uint2 existing_text) { CharPtr tmp; CharPtr fmt = "taxon:%s"; Boolean rval; if (biop == NULL) { return FALSE; } if (biop->org == NULL) { biop->org = OrgRefNew(); } tmp = (CharPtr) MemNew (sizeof (Char) * (StringLen (value) + StringLen (fmt))); sprintf (tmp, fmt, value == NULL ? "" : value); rval = SetDbxrefString (&(biop->org->db), scp, tmp, existing_text); tmp = MemFree (tmp); return rval; } static Boolean RemoveTaxonomyId (BioSourcePtr biop, StringConstraintPtr scp) { ValNodePtr db, db_prev = NULL, db_next; DbtagPtr dbt; Boolean rval = FALSE, do_remove; Char buf[15]; if (biop == NULL || biop->org == NULL) { return FALSE; } for (db = biop->org->db; db != NULL; db = db_next) { db_next = db->next; dbt = (DbtagPtr) db->data.ptrvalue; do_remove = FALSE; if (dbt != NULL && dbt->db != NULL && StringICmp (dbt->db, "taxon") == 0) { if (dbt->tag->id > 0) { sprintf (buf, "%d", dbt->tag->id); if (DoesStringMatchConstraint (buf, scp)) { do_remove = TRUE; } } else if (!StringHasNoText (dbt->tag->str) && DoesStringMatchConstraint (dbt->tag->str, scp)) { do_remove = TRUE; } } if (do_remove) { if (db_prev == NULL) { biop->org->db = db_next; } else { db_prev->next = db_next; } db->next = NULL; db->data.ptrvalue = DbtagFree (db->data.ptrvalue); db = ValNodeFree (db); rval = TRUE; } else { db_prev = db; } } return rval; } NLM_EXTERN CharPtr GetSourceQualFromBioSource (BioSourcePtr biop, SourceQualChoicePtr scp, StringConstraintPtr constraint) { CharPtr str = NULL; SubSourcePtr ssp; OrgModPtr mod; Int4 orgmod_subtype = -1, subsrc_subtype = -1; Int4 subfield; ValNode vn; Char buf[15]; if (biop == NULL || scp == NULL) return NULL; switch (scp->choice) { case SourceQualChoice_textqual: if (scp->data.intvalue == Source_qual_taxname) { if (biop->org != NULL && !StringHasNoText (biop->org->taxname) && DoesStringMatchConstraint (biop->org->taxname, constraint)) { str = StringSave (biop->org->taxname); } } else if (scp->data.intvalue == Source_qual_common_name) { if (biop->org != NULL && !StringHasNoText (biop->org->common) && DoesStringMatchConstraint (biop->org->common, constraint)) { str = StringSave (biop->org->common); } } else if (scp->data.intvalue == Source_qual_lineage) { if (biop->org != NULL && biop->org->orgname != NULL && !StringHasNoText (biop->org->orgname->lineage) && DoesStringMatchConstraint (biop->org->orgname->lineage, constraint)) { str = StringSave (biop->org->orgname->lineage); } } else if (scp->data.intvalue == Source_qual_division) { if (biop->org != NULL && biop->org->orgname != NULL && !StringHasNoText (biop->org->orgname->div) && DoesStringMatchConstraint (biop->org->orgname->div, constraint)) { str = StringSave (biop->org->orgname->div); } } else if (scp->data.intvalue == Source_qual_dbxref) { if (biop->org != NULL) { str = GetDbxrefString (biop->org->db, constraint); } } else if (scp->data.intvalue == Source_qual_taxid) { str = GetTaxonomyId (biop, constraint); } else if (scp->data.intvalue == Source_qual_all_notes) { vn.choice = SourceQualChoice_textqual; vn.data.intvalue = Source_qual_subsource_note; vn.next = NULL; str = GetSourceQualFromBioSource (biop, &vn, constraint); if (str == NULL) { vn.data.intvalue = Source_qual_orgmod_note; str = GetSourceQualFromBioSource (biop, &vn, constraint); } } else if (scp->data.intvalue == Source_qual_all_quals || scp->data.intvalue == Source_qual_all_primers) { /* will not do */ } else if (scp->data.intvalue == Source_qual_fwd_primer_name || scp->data.intvalue == Source_qual_fwd_primer_seq || scp->data.intvalue == Source_qual_rev_primer_name || scp->data.intvalue == Source_qual_rev_primer_seq) { /* fetch from new primer object */ str = GetPrimerValueFromBioSource (biop, scp->data.intvalue, constraint); } else { orgmod_subtype = GetOrgModQualFromSrcQual (scp->data.intvalue, &subfield); if (orgmod_subtype == -1) { subsrc_subtype = GetSubSrcQualFromSrcQual (scp->data.intvalue, &subfield); for (ssp = biop->subtype; ssp != NULL && str == NULL; ssp = ssp->next) { if (ssp->subtype == subsrc_subtype) { if (StringHasNoText (ssp->name)) { if (IsNonTextSourceQual (scp->data.intvalue) && DoesStringMatchConstraint ("TRUE", constraint)) { str = StringSave ("TRUE"); } } else { if (subfield == 0) { if (DoesStringMatchConstraint (ssp->name, constraint)) { str = StringSave (ssp->name); } } else { str = GetThreeFieldSubfield (ssp->name, subfield); if (StringHasNoText (str) || !DoesStringMatchConstraint (str, constraint)) { str = MemFree (str); } } } } } } else { if (biop->org != NULL && biop->org->orgname != NULL) { for (mod = biop->org->orgname->mod; mod != NULL && str == NULL; mod = mod->next) { if (mod->subtype == orgmod_subtype) { if (StringHasNoText (mod->subname)) { if (IsNonTextSourceQual (scp->data.intvalue) && DoesStringMatchConstraint ("TRUE", constraint)) { str = StringSave ("TRUE"); } } else { if (subfield == 0) { if (DoesStringMatchConstraint (mod->subname, constraint)) { str = StringSave (mod->subname); } } else { str = GetThreeFieldSubfield (mod->subname, subfield); if (StringHasNoText (str) || !DoesStringMatchConstraint (str, constraint)) { str = MemFree (str); } } } } } } } } break; case SourceQualChoice_location: str = LocNameFromGenome (biop->genome); if (DoesStringMatchConstraint (str, constraint)) { str = StringSave (str); } else { str = NULL; } break; case SourceQualChoice_origin: str = OriginNameFromOrigin (biop->origin); if (DoesStringMatchConstraint (str, constraint)) { str = StringSave (str); } else { str = NULL; } break; case SourceQualChoice_gcode: if (biop->org != NULL && biop->org->orgname != NULL && biop->org->orgname->gcode != 0) { sprintf (buf, "%d", biop->org->orgname->gcode); str = StringSave (buf); } break; case SourceQualChoice_mgcode: if (biop->org != NULL && biop->org->orgname != NULL && biop->org->orgname->mgcode != 0) { sprintf (buf, "%d", biop->org->orgname->mgcode); str = StringSave (buf); } break; } return str; } NLM_EXTERN ValNodePtr GetMultipleSourceQualsFromBioSource (BioSourcePtr biop, SourceQualChoicePtr scp, StringConstraintPtr constraint) { ValNodePtr val_list = NULL; CharPtr str = NULL; SubSourcePtr ssp; OrgModPtr mod; Int4 orgmod_subtype = -1, subsrc_subtype = -1; Int4 subfield; ValNode vn; if (biop == NULL || scp == NULL) return NULL; if (scp->choice == SourceQualChoice_textqual) { if (scp->data.intvalue == Source_qual_taxname) { if (biop->org != NULL && !StringHasNoText (biop->org->taxname) && DoesStringMatchConstraint (biop->org->taxname, constraint)) { ValNodeAddPointer (&val_list, 0, StringSave (biop->org->taxname)); } } else if (scp->data.intvalue == Source_qual_common_name) { if (biop->org != NULL && !StringHasNoText (biop->org->common) && DoesStringMatchConstraint (biop->org->common, constraint)) { ValNodeAddPointer (&val_list, 0, StringSave (biop->org->common)); } } else if (scp->data.intvalue == Source_qual_lineage) { if (biop->org != NULL && biop->org->orgname != NULL && !StringHasNoText (biop->org->orgname->lineage) && DoesStringMatchConstraint (biop->org->orgname->lineage, constraint)) { ValNodeAddPointer (&val_list, 0, StringSave (biop->org->orgname->lineage)); } } else if (scp->data.intvalue == Source_qual_division) { if (biop->org != NULL && biop->org->orgname != NULL && !StringHasNoText (biop->org->orgname->div) && DoesStringMatchConstraint (biop->org->orgname->div, constraint)) { ValNodeAddPointer (&val_list, 0, StringSave (biop->org->orgname->div)); } } else if (scp->data.intvalue == Source_qual_dbxref) { if (biop->org != NULL) { ValNodeLink (&val_list, GetMultipleDbxrefStrings (biop->org->db, constraint)); } } else if (scp->data.intvalue == Source_qual_taxid) { if (biop->org != NULL) { ValNodeLink (&val_list, GetMultipleTaxidStrings (biop->org->db, constraint)); } } else if (scp->data.intvalue == Source_qual_fwd_primer_name || scp->data.intvalue == Source_qual_fwd_primer_seq || scp->data.intvalue == Source_qual_rev_primer_name || scp->data.intvalue == Source_qual_rev_primer_seq) { /* fetch from new primer object */ ValNodeLink (&val_list, GetMultiplePrimerValuesFromBioSource (biop, scp->data.intvalue, constraint)); } else if (scp->data.intvalue == Source_qual_all_notes) { vn.choice = SourceQualChoice_textqual; vn.data.intvalue = Source_qual_subsource_note; vn.next = NULL; str = GetSourceQualFromBioSource (biop, &vn, constraint); if (str != NULL) { ValNodeAddPointer (&val_list, 0, str); } vn.data.intvalue = Source_qual_orgmod_note; str = GetSourceQualFromBioSource (biop, &vn, constraint); if (str != NULL) { ValNodeAddPointer (&val_list, 0, str); } } else if (scp->data.intvalue == Source_qual_all_quals || scp->data.intvalue == Source_qual_all_primers) { /* will not do */ } else { orgmod_subtype = GetOrgModQualFromSrcQual (scp->data.intvalue, &subfield); if (orgmod_subtype == -1) { subsrc_subtype = GetSubSrcQualFromSrcQual (scp->data.intvalue, &subfield); for (ssp = biop->subtype; ssp != NULL; ssp = ssp->next) { if (ssp->subtype == subsrc_subtype) { if (StringHasNoText (ssp->name)) { if (IsNonTextSourceQual (scp->data.intvalue) && DoesStringMatchConstraint ("TRUE", constraint)) { ValNodeAddPointer (&val_list, 0, StringSave ("TRUE")); } } else { if (subfield == 0) { if (DoesStringMatchConstraint (ssp->name, constraint)) { ValNodeAddPointer (&val_list, 0, StringSave (ssp->name)); } } else { str = GetThreeFieldSubfield (ssp->name, subfield); if (StringHasNoText (str) || !DoesStringMatchConstraint (str, constraint)) { str = MemFree (str); } else { ValNodeAddPointer (&val_list, 0, str); } } } } } } else { if (biop->org != NULL && biop->org->orgname != NULL) { for (mod = biop->org->orgname->mod; mod != NULL && str == NULL; mod = mod->next) { if (mod->subtype == orgmod_subtype) { if (StringHasNoText (mod->subname)) { if (IsNonTextSourceQual (scp->data.intvalue) && DoesStringMatchConstraint ("TRUE", constraint)) { ValNodeAddPointer (&val_list, 0, StringSave ("TRUE")); } } else { if (subfield == 0) { if (DoesStringMatchConstraint (mod->subname, constraint)) { ValNodeAddPointer (&val_list, 0, StringSave (mod->subname)); } } else { str = GetThreeFieldSubfield (mod->subname, subfield); if (StringHasNoText (str) || !DoesStringMatchConstraint (str, constraint)) { str = MemFree (str); } else { ValNodeAddPointer (&val_list, 0, str); } } } } } } } } } else { str = GetSourceQualFromBioSource (biop, scp, constraint); if (str != NULL) { ValNodeAddPointer (&val_list, 0, str); } } return val_list; } static Boolean RemoveAllSourceQualsFromBioSource (BioSourcePtr biop, StringConstraintPtr constraint) { Int4 i; Boolean rval = FALSE; ValNode vn; vn.next = NULL; vn.choice = SourceQualChoice_textqual; for (i = 0; i < NUM_srcqual_scqual; i++) { if (srcqual_scqual[i].srcqual != Source_qual_all_quals && srcqual_scqual[i].srcqual != Source_qual_all_notes && srcqual_scqual[i].srcqual != Source_qual_all_primers) { vn.data.intvalue = srcqual_scqual[i].srcqual; rval |= RemoveSourceQualFromBioSource (biop, &vn, constraint); } } return rval; } static void Lcl_RemoveOldName (OrgRefPtr orp) { OrgModPtr prev = NULL, curr, next_mod; if (orp == NULL || orp->orgname == NULL) return; curr = orp->orgname->mod; while (curr != NULL) { next_mod = curr->next; if (curr->subtype == ORGMOD_old_name) { if (prev == NULL) { orp->orgname->mod = curr->next; } else { prev->next = curr->next; } curr->next = NULL; OrgModFree (curr); } else { prev = curr; } curr = next_mod; } } NLM_EXTERN Boolean RemoveSourceQualFromBioSource (BioSourcePtr biop, SourceQualChoicePtr scp, StringConstraintPtr constraint) { SubSourcePtr ssp, ssp_prev = NULL, ssp_next; OrgModPtr mod, mod_prev = NULL, mod_next; Int4 orgmod_subtype = -1, subsrc_subtype = -1, subfield; CharPtr str, tmp; Boolean rval = FALSE, do_remove, does_match; ValNode vn; if (biop == NULL || scp == NULL) return FALSE; switch (scp->choice) { case SourceQualChoice_textqual: if (scp->data.intvalue == Source_qual_taxname) { if (biop->org != NULL && !StringHasNoText (biop->org->taxname) && DoesStringMatchConstraint (biop->org->taxname, constraint)) { biop->org->taxname = MemFree (biop->org->taxname); RemoveTaxRef (biop->org); Lcl_RemoveOldName (biop->org); rval = TRUE; } } else if (scp->data.intvalue == Source_qual_common_name) { if (biop->org != NULL && !StringHasNoText (biop->org->common) && DoesStringMatchConstraint (biop->org->common, constraint)) { biop->org->common = MemFree (biop->org->common); rval = TRUE; } } else if (scp->data.intvalue == Source_qual_lineage) { if (biop->org != NULL && biop->org->orgname != NULL && !StringHasNoText (biop->org->orgname->lineage) && DoesStringMatchConstraint (biop->org->orgname->lineage, constraint)) { biop->org->orgname->lineage = MemFree (biop->org->orgname->lineage); rval = TRUE; } } else if (scp->data.intvalue == Source_qual_division) { if (biop->org != NULL && biop->org->orgname != NULL && !StringHasNoText (biop->org->orgname->div) && DoesStringMatchConstraint (biop->org->orgname->div, constraint)) { biop->org->orgname->div = MemFree (biop->org->orgname->div); rval = TRUE; } } else if (scp->data.intvalue == Source_qual_dbxref) { if (biop->org != NULL) { rval = RemoveDbxrefString (&(biop->org->db), constraint); } } else if (scp->data.intvalue == Source_qual_taxid) { rval = RemoveTaxonomyId (biop, constraint); } else if (scp->data.intvalue == Source_qual_fwd_primer_name || scp->data.intvalue == Source_qual_fwd_primer_seq || scp->data.intvalue == Source_qual_rev_primer_name || scp->data.intvalue == Source_qual_rev_primer_seq) { /* remove from new primer object */ rval = RemovePrimerValueFromBioSource (biop, scp->data.intvalue, constraint); } else if (scp->data.intvalue == Source_qual_all_notes) { vn.choice = SourceQualChoice_textqual; vn.data.intvalue = Source_qual_subsource_note; vn.next = NULL; rval |= RemoveSourceQualFromBioSource (biop, &vn, constraint); vn.data.intvalue = Source_qual_orgmod_note; rval |= RemoveSourceQualFromBioSource (biop, &vn, constraint); } else if (scp->data.intvalue == Source_qual_all_primers) { vn.choice = SourceQualChoice_textqual; vn.data.intvalue = Source_qual_fwd_primer_name; vn.next = NULL; rval |= RemoveSourceQualFromBioSource (biop, &vn, constraint); vn.data.intvalue = Source_qual_rev_primer_name; rval |= RemoveSourceQualFromBioSource (biop, &vn, constraint); vn.data.intvalue = Source_qual_fwd_primer_seq; rval |= RemoveSourceQualFromBioSource (biop, &vn, constraint); vn.data.intvalue = Source_qual_rev_primer_seq; rval |= RemoveSourceQualFromBioSource (biop, &vn, constraint); } else if (scp->data.intvalue == Source_qual_all_quals) { rval |= RemoveAllSourceQualsFromBioSource (biop, constraint); } else { orgmod_subtype = GetOrgModQualFromSrcQual (scp->data.intvalue, &subfield); if (orgmod_subtype == -1) { subsrc_subtype = GetSubSrcQualFromSrcQual (scp->data.intvalue, &subfield); ssp = biop->subtype; while (ssp != NULL) { ssp_next = ssp->next; do_remove = FALSE; if (ssp->subtype == subsrc_subtype) { if (subfield == 0) { if (DoesStringMatchConstraint (ssp->name, constraint)) { do_remove = TRUE; } } else { does_match = TRUE; if (!IsStringConstraintEmpty (constraint)) { tmp = GetThreeFieldSubfield (ssp->name, subfield); does_match = DoesStringMatchConstraint (tmp, constraint); tmp = MemFree (tmp); } if (does_match) { rval |= RemoveThreeFieldSubfield (&(ssp->name), subfield); if (StringHasNoText (ssp->name)) { do_remove = TRUE; } } } } if (do_remove) { if (ssp_prev == NULL) { biop->subtype = ssp->next; } else { ssp_prev->next = ssp->next; } ssp->next = NULL; ssp = SubSourceFree (ssp); rval = TRUE; } else { ssp_prev = ssp; } ssp = ssp_next; } } else { if (biop->org != NULL && biop->org->orgname != NULL) { mod = biop->org->orgname->mod; while (mod != NULL) { mod_next = mod->next; do_remove = FALSE; if (mod->subtype == orgmod_subtype) { if (subfield == 0) { if (DoesStringMatchConstraint (mod->subname, constraint)) { do_remove = TRUE; } } else { does_match = TRUE; if (!IsStringConstraintEmpty (constraint)) { tmp = GetThreeFieldSubfield (mod->subname, subfield); does_match = DoesStringMatchConstraint (tmp, constraint); tmp = MemFree (tmp); } if (does_match) { rval |= RemoveThreeFieldSubfield (&(mod->subname), subfield); } if (StringHasNoText (mod->subname)) { do_remove = TRUE; } } } if (do_remove) { if (mod_prev == NULL) { biop->org->orgname->mod = mod->next; } else { mod_prev->next = mod->next; } mod->next = NULL; mod = OrgModFree (mod); rval = TRUE; } else { mod_prev = mod; } mod = mod_next; } } } } break; case SourceQualChoice_location: str = LocNameFromGenome (biop->genome); if (DoesStringMatchConstraint (str, constraint)) { if (scp->data.intvalue == 0 || biop->genome == GenomeFromSrcLoc (scp->data.intvalue)) { biop->genome = 0; rval = TRUE; } } break; case SourceQualChoice_origin: str = OriginNameFromOrigin (biop->origin); if (DoesStringMatchConstraint (str, constraint)) { if (scp->data.intvalue == 0 || biop->origin == OriginFromSrcOrig (scp->data.intvalue)) { biop->origin = 0; rval = TRUE; } } break; case SourceQualChoice_gcode: if (biop->org != NULL && biop->org->orgname != NULL && biop->org->orgname->gcode != 0) { biop->org->orgname->gcode = 0; rval = TRUE; } break; case SourceQualChoice_mgcode: if (biop->org != NULL && biop->org->orgname != NULL && biop->org->orgname->mgcode != 0) { biop->org->orgname->mgcode = 0; rval = TRUE; } break; } return rval; } NLM_EXTERN Boolean SetSourceQualInBioSource (BioSourcePtr biop, SourceQualChoicePtr scp, StringConstraintPtr constraint, CharPtr value, Uint2 existing_text) { SubSourcePtr ssp, ssp_prev = NULL, ssp_next; OrgModPtr mod, mod_prev = NULL, mod_next; Int4 orgmod_subtype = -1, subsrc_subtype = -1, subfield; CharPtr str, tmp; Boolean rval = FALSE, found = FALSE, does_match; ValNode vn; if (biop == NULL || scp == NULL) return FALSE; switch (scp->choice) { case SourceQualChoice_textqual: if (scp->data.intvalue == Source_qual_taxname) { if ((biop->org == NULL && IsStringConstraintEmpty (constraint)) || (biop->org != NULL && DoesStringMatchConstraint (biop->org->taxname, constraint))) { if (biop->org == NULL) { biop->org = OrgRefNew(); } rval = SetStringValue (&(biop->org->taxname), value, existing_text); if (rval) { RemoveTaxRef (biop->org); Lcl_RemoveOldName (biop->org); } } } else if (scp->data.intvalue == Source_qual_common_name) { if ((biop->org == NULL && IsStringConstraintEmpty (constraint)) || (biop->org != NULL && DoesStringMatchConstraint (biop->org->common, constraint))) { if (biop->org == NULL) { biop->org = OrgRefNew(); } rval = SetStringValue (&(biop->org->common), value, existing_text); } } else if (scp->data.intvalue == Source_qual_lineage) { if ((biop->org == NULL && IsStringConstraintEmpty (constraint)) ||(biop->org != NULL && biop->org->orgname == NULL && IsStringConstraintEmpty (constraint)) ||(biop->org != NULL && biop->org->orgname != NULL && DoesStringMatchConstraint (biop->org->orgname->lineage, constraint))) { if (biop->org == NULL) { biop->org = OrgRefNew(); } if (biop->org->orgname == NULL) { biop->org->orgname = OrgNameNew (); } rval = SetStringValue (&(biop->org->orgname->lineage), value, existing_text); } } else if (scp->data.intvalue == Source_qual_division) { if ((biop->org == NULL && IsStringConstraintEmpty (constraint)) || (biop->org != NULL && biop->org->orgname == NULL && IsStringConstraintEmpty (constraint)) || (biop->org != NULL && biop->org->orgname != NULL && DoesStringMatchConstraint (biop->org->orgname->div, constraint))) { if (biop->org == NULL) { biop->org = OrgRefNew(); } if (biop->org->orgname == NULL) { biop->org->orgname = OrgNameNew (); } rval = SetStringValue (&(biop->org->orgname->div), value, existing_text); } } else if (scp->data.intvalue == Source_qual_dbxref) { if (biop->org == NULL) { biop->org = OrgRefNew (); } rval = SetDbxrefString (&(biop->org->db), constraint, value, existing_text); } else if (scp->data.intvalue == Source_qual_taxid) { rval = SetTaxonomyId(biop, constraint, value, existing_text); } else if (scp->data.intvalue == Source_qual_all_notes) { vn.choice = SourceQualChoice_textqual; vn.data.intvalue = Source_qual_subsource_note; vn.next = NULL; rval |= SetSourceQualInBioSource (biop, &vn, constraint, value, existing_text); vn.data.intvalue = Source_qual_orgmod_note; rval |= SetSourceQualInBioSource (biop, &vn, constraint, value, existing_text); } else if (scp->data.intvalue == Source_qual_all_primers) { rval = SetPrimerValueInBioSource (biop, Source_qual_fwd_primer_name, constraint, value, existing_text); rval |= SetPrimerValueInBioSource (biop, Source_qual_fwd_primer_seq, constraint, value, existing_text); rval |= SetPrimerValueInBioSource (biop, Source_qual_rev_primer_name, constraint, value, existing_text); rval |= SetPrimerValueInBioSource (biop, Source_qual_rev_primer_seq, constraint, value, existing_text); } else if (scp->data.intvalue == Source_qual_fwd_primer_name || scp->data.intvalue == Source_qual_fwd_primer_seq || scp->data.intvalue == Source_qual_rev_primer_name || scp->data.intvalue == Source_qual_rev_primer_seq) { /* remove from new primer object */ rval = SetPrimerValueInBioSource (biop, scp->data.intvalue, constraint, value, existing_text); } else if (scp->data.intvalue == Source_qual_all_quals) { /* will not do this */ } else { orgmod_subtype = GetOrgModQualFromSrcQual (scp->data.intvalue, &subfield); if (orgmod_subtype == -1) { subsrc_subtype = GetSubSrcQualFromSrcQual (scp->data.intvalue, &subfield); if (subsrc_subtype > -1) { if (existing_text == ExistingTextOption_add_qual) { /* create new subsource */ ssp = SubSourceNew (); ssp->subtype = subsrc_subtype; rval = SetThreeFieldSubfield (&(ssp->name), subfield, value, existing_text); /* find last in current list */ ssp_prev = biop->subtype; while (ssp_prev != NULL && ssp_prev->next != NULL) { ssp_prev = ssp_prev->next; } /* add to end of list */ if (ssp_prev == NULL) { biop->subtype = ssp; } else { ssp_prev->next = ssp; } } else { ssp = biop->subtype; while (ssp != NULL) { ssp_next = ssp->next; if (ssp->subtype == subsrc_subtype) { if (subfield == 0) { if (DoesStringMatchConstraint (ssp->name, constraint)) { rval = SetStringValue (&(ssp->name), value, existing_text); found = TRUE; } } else { does_match = TRUE; if (!IsStringConstraintEmpty (constraint)) { tmp = GetThreeFieldSubfield (ssp->name, subfield); does_match = DoesStringMatchConstraint (tmp, constraint); } if (does_match) { rval = SetThreeFieldSubfield (&(ssp->name), subfield, value, existing_text); found = TRUE; } } if (rval && StringHasNoText (ssp->name) && !IsNonTextSourceQual(scp->data.intvalue)) { if (ssp_prev == NULL) { biop->subtype = ssp->next; } else { ssp_prev->next = ssp->next; } ssp->next = NULL; ssp = SubSourceFree (ssp); } else { ssp_prev = ssp; } } else { ssp_prev = ssp; } ssp = ssp_next; } if (!found && IsStringConstraintEmpty (constraint)) { ssp = SubSourceNew (); ssp->subtype = subsrc_subtype; if (StringHasNoText (value) && IsNonTextSourceQual(scp->data.intvalue)) { ssp->name = StringSave (""); } else { rval = SetThreeFieldSubfield (&(ssp->name), subfield, value, existing_text); } if (ssp_prev == NULL) { biop->subtype = ssp; } else { ssp_prev->next = ssp; } } } } } else { if (existing_text == ExistingTextOption_add_qual) { if (biop->org == NULL) { biop->org = OrgRefNew(); } if (biop->org->orgname == NULL) { biop->org->orgname = OrgNameNew(); } /* create new orgmod */ mod = OrgModNew (); mod->subtype = orgmod_subtype; rval = SetThreeFieldSubfield (&(mod->subname), subfield, value, existing_text); /* find last in current list */ mod_prev = biop->org->orgname->mod; while (mod_prev != NULL && mod_prev->next != NULL) { mod_prev = mod_prev->next; } /* add to end of list */ if (mod_prev == NULL) { biop->org->orgname->mod = mod; } else { mod_prev->next = mod; } } else { if (biop->org != NULL && biop->org->orgname != NULL) { mod = biop->org->orgname->mod; while (mod != NULL) { mod_next = mod->next; if (mod->subtype == orgmod_subtype) { if (subfield == 0) { if (DoesStringMatchConstraint (mod->subname, constraint)) { rval = SetStringValue (&(mod->subname), value, existing_text); found = TRUE; } } else { does_match = TRUE; if (!IsStringConstraintEmpty (constraint)) { tmp = GetThreeFieldSubfield (mod->subname, subfield); does_match = DoesStringMatchConstraint (tmp, constraint); tmp = MemFree (tmp); } if (does_match) { rval = SetThreeFieldSubfield (&(mod->subname), subfield, value, existing_text); found = TRUE; } } if (rval && StringHasNoText (mod->subname) && !IsNonTextSourceQual(scp->data.intvalue)) { if (mod_prev == NULL) { biop->org->orgname->mod = mod->next; } else { mod_prev->next = mod->next; } mod->next = NULL; mod = OrgModFree (mod); } else { mod_prev = mod; } } else { mod_prev = mod; } mod = mod_next; } } if (!found && IsStringConstraintEmpty (constraint)) { if (biop->org == NULL) { biop->org = OrgRefNew(); } if (biop->org->orgname == NULL) { biop->org->orgname = OrgNameNew(); } mod = OrgModNew (); mod->subtype = orgmod_subtype; rval = SetThreeFieldSubfield (&(mod->subname), subfield, value, existing_text); if (mod_prev == NULL) { biop->org->orgname->mod = mod; } else { mod_prev->next = mod; } } } } } break; case SourceQualChoice_location: str = LocNameFromGenome (biop->genome); if (DoesStringMatchConstraint (str, constraint)) { biop->genome = GenomeFromSrcLoc (scp->data.intvalue); rval = TRUE; } break; case SourceQualChoice_origin: str = OriginNameFromOrigin (biop->origin); if (DoesStringMatchConstraint (str, constraint)) { biop->origin = OriginFromSrcOrig(scp->data.intvalue); rval = TRUE; } break; case SourceQualChoice_gcode: if (biop->org == NULL) { biop->org = OrgRefNew(); } if (biop->org->orgname == NULL) { biop->org->orgname = OrgNameNew(); } biop->org->orgname->gcode = scp->data.intvalue; rval = TRUE; break; case SourceQualChoice_mgcode: if (biop->org == NULL) { biop->org = OrgRefNew(); } if (biop->org->orgname == NULL) { biop->org->orgname = OrgNameNew(); } biop->org->orgname->mgcode = scp->data.intvalue; rval = TRUE; break; } return rval; } NLM_EXTERN BioseqPtr GetRepresentativeBioseqFromBioseqSet (BioseqSetPtr bssp) { SeqEntryPtr sep; BioseqPtr bsp = NULL; if (bssp == NULL || (bssp->_class != BioseqseqSet_class_segset && bssp->_class != BioseqseqSet_class_nuc_prot)) { return NULL; } sep = bssp->seq_set; if (sep->data.ptrvalue == NULL) { bsp = NULL; } else if (IS_Bioseq(sep)) { bsp = sep->data.ptrvalue; } else if (IS_Bioseq_set (sep)) { bsp = GetRepresentativeBioseqFromBioseqSet (sep->data.ptrvalue); } return bsp; } NLM_EXTERN BioseqPtr GetSequenceForObject (Uint1 choice, Pointer data) { BioseqPtr bsp = NULL; SeqFeatPtr sfp; SeqDescrPtr sdp; ObjValNodePtr ovp; CGPSetPtr cgp; ValNodePtr vnp; if (data == NULL) return NULL; switch (choice) { case OBJ_BIOSEQ: bsp = (BioseqPtr) data; break; case OBJ_SEQFEAT: sfp = (SeqFeatPtr) data; bsp = BioseqFindFromSeqLoc (sfp->location); break; case OBJ_SEQDESC: sdp = (SeqDescrPtr) data; if (sdp->extended) { ovp = (ObjValNodePtr) sdp; if (ovp->idx.parenttype == OBJ_BIOSEQ && ovp->idx.parentptr != NULL) { bsp = ovp->idx.parentptr; } else if (ovp->idx.parenttype == OBJ_BIOSEQSET) { bsp = GetRepresentativeBioseqFromBioseqSet (ovp->idx.parentptr); } } break; case 0: cgp = (CGPSetPtr) data; for (vnp = cgp->cds_list; vnp != NULL && bsp == NULL; vnp = vnp->next) { sfp = vnp->data.ptrvalue; if (sfp != NULL) { bsp = BioseqFindFromSeqLoc (sfp->location); } } for (vnp = cgp->mrna_list; vnp != NULL && bsp == NULL; vnp = vnp->next) { sfp = vnp->data.ptrvalue; if (sfp != NULL) { bsp = BioseqFindFromSeqLoc (sfp->location); } } for (vnp = cgp->gene_list; vnp != NULL && bsp == NULL; vnp = vnp->next) { sfp = vnp->data.ptrvalue; if (sfp != NULL) { bsp = BioseqFindFromSeqLoc (sfp->location); } } break; } return bsp; } NLM_EXTERN BioSourcePtr GetBioSourceFromObject (Uint1 choice, Pointer data) { BioSourcePtr biop = NULL; SeqDescrPtr sdp; SeqFeatPtr sfp; BioseqPtr bsp = NULL; SeqMgrDescContext context; if (data == NULL) return NULL; switch (choice) { case OBJ_SEQDESC: sdp = (SeqDescrPtr) data; if (sdp->choice == Seq_descr_source) { biop = sdp->data.ptrvalue; } break; case OBJ_SEQFEAT: sfp = (SeqFeatPtr) data; if (sfp->data.choice == SEQFEAT_BIOSRC) { biop = sfp->data.value.ptrvalue; } break; } if (biop == NULL) { bsp = GetSequenceForObject (choice, data); sdp = SeqMgrGetNextDescriptor (bsp, NULL, Seq_descr_source, &context); if (sdp != NULL && sdp->choice == Seq_descr_source) { biop = sdp->data.ptrvalue; } } return biop; } NLM_EXTERN Uint2 GetEntityIdFromObject (Uint1 choice, Pointer data) { Uint2 entityID = 0; SeqDescrPtr sdp; ObjValNodePtr ovp; SeqFeatPtr sfp; BioseqPtr bsp; if (data == NULL) return 0; switch (choice) { case OBJ_SEQDESC: sdp = (SeqDescrPtr) data; if (sdp->extended) { ovp = (ObjValNodePtr) sdp; entityID = ovp->idx.entityID; } break; case OBJ_SEQFEAT: sfp = (SeqFeatPtr) data; entityID = sfp->idx.entityID; break; default: bsp = GetSequenceForObject (choice, data); if (bsp != NULL) { entityID = bsp->idx.entityID; } break; } return entityID; } /* functions for dealing with CDS-Gene-Prot sets */ static CharPtr GetFieldValueFromCGPSet (CGPSetPtr c, Uint2 field, StringConstraintPtr scp) { CharPtr str = NULL; ValNodePtr vnp; SeqFeatPtr sfp; GeneRefPtr grp; RnaRefPtr rrp; ProtRefPtr prp; FeatureFieldPtr ffield; if (c == NULL) return NULL; switch (field) { case CDSGeneProt_field_cds_comment: case CDSGeneProt_field_cds_inference: case CDSGeneProt_field_codon_start: ffield = FeatureFieldFromCDSGeneProtField (field); for (vnp = c->cds_list; vnp != NULL && str == NULL; vnp = vnp->next) { sfp = (SeqFeatPtr) vnp->data.ptrvalue; str = GetQualFromFeature (sfp, ffield, scp); } ffield = FeatureFieldFree (ffield); break; case CDSGeneProt_field_gene_locus: case CDSGeneProt_field_gene_inference: ffield = FeatureFieldFromCDSGeneProtField (field); for (vnp = c->gene_list; vnp != NULL && str == NULL; vnp = vnp->next) { sfp = (SeqFeatPtr) vnp->data.ptrvalue; str = GetQualFromFeature (sfp, ffield, scp); } ffield = FeatureFieldFree (ffield); break; case CDSGeneProt_field_gene_description: for (vnp = c->gene_list; vnp != NULL && str == NULL; vnp = vnp->next) { sfp = (SeqFeatPtr) vnp->data.ptrvalue; if (sfp != NULL && sfp->data.choice == SEQFEAT_GENE && (grp = sfp->data.value.ptrvalue) != NULL && !StringHasNoText (grp->desc) && DoesStringMatchConstraint(grp->desc, scp)) { str = StringSave (grp->desc); } } break; case CDSGeneProt_field_gene_comment: for (vnp = c->gene_list; vnp != NULL && str == NULL; vnp = vnp->next) { sfp = (SeqFeatPtr) vnp->data.ptrvalue; if (sfp != NULL && !StringHasNoText (sfp->comment) && DoesStringMatchConstraint(sfp->comment, scp)) { str = StringSave (sfp->comment); } } break; case CDSGeneProt_field_gene_allele: for (vnp = c->gene_list; vnp != NULL && str == NULL; vnp = vnp->next) { sfp = (SeqFeatPtr) vnp->data.ptrvalue; if (sfp != NULL && sfp->data.choice == SEQFEAT_GENE && (grp = sfp->data.value.ptrvalue) != NULL && !StringHasNoText (grp->allele) && DoesStringMatchConstraint(grp->allele, scp)) { str = StringSave (grp->allele); } } break; case CDSGeneProt_field_gene_maploc: for (vnp = c->gene_list; vnp != NULL && str == NULL; vnp = vnp->next) { sfp = (SeqFeatPtr) vnp->data.ptrvalue; if (sfp != NULL && sfp->data.choice == SEQFEAT_GENE && (grp = sfp->data.value.ptrvalue) != NULL && !StringHasNoText (grp->maploc) && DoesStringMatchConstraint(grp->maploc, scp)) { str = StringSave (grp->maploc); } } break; case CDSGeneProt_field_gene_locus_tag: for (vnp = c->gene_list; vnp != NULL && str == NULL; vnp = vnp->next) { sfp = (SeqFeatPtr) vnp->data.ptrvalue; if (sfp != NULL && sfp->data.choice == SEQFEAT_GENE && (grp = sfp->data.value.ptrvalue) != NULL && !StringHasNoText (grp->locus_tag) && DoesStringMatchConstraint(grp->locus_tag, scp)) { str = StringSave (grp->locus_tag); } } break; case CDSGeneProt_field_gene_synonym: for (vnp = c->gene_list; vnp != NULL && str == NULL; vnp = vnp->next) { sfp = (SeqFeatPtr) vnp->data.ptrvalue; if (sfp != NULL && sfp->data.choice == SEQFEAT_GENE && (grp = sfp->data.value.ptrvalue) != NULL) { str = GetFirstValNodeStringMatch (grp->syn, scp); } } break; case CDSGeneProt_field_gene_old_locus_tag: for (vnp = c->gene_list; vnp != NULL && str == NULL; vnp = vnp->next) { sfp = (SeqFeatPtr) vnp->data.ptrvalue; if (sfp != NULL) { str = GetFirstGBQualMatch (sfp->qual, "old_locus_tag", 0, scp); } } break; case CDSGeneProt_field_mrna_product: for (vnp = c->mrna_list; vnp != NULL && str == NULL; vnp = vnp->next) { sfp = (SeqFeatPtr) vnp->data.ptrvalue; if (sfp != NULL && sfp->data.choice == SEQFEAT_RNA && (rrp = sfp->data.value.ptrvalue) != NULL && rrp->ext.choice == 1 && !StringHasNoText (rrp->ext.value.ptrvalue) && DoesStringMatchConstraint(rrp->ext.value.ptrvalue, scp)) { str = StringSave (rrp->ext.value.ptrvalue); } } break; case CDSGeneProt_field_mrna_comment: for (vnp = c->mrna_list; vnp != NULL && str == NULL; vnp = vnp->next) { sfp = (SeqFeatPtr) vnp->data.ptrvalue; if (sfp != NULL && !StringHasNoText (sfp->comment) && DoesStringMatchConstraint(sfp->comment, scp)) { str = StringSave (sfp->comment); } } break; case CDSGeneProt_field_prot_name: for (vnp = c->prot_list; vnp != NULL && str == NULL; vnp = vnp->next) { sfp = (SeqFeatPtr) vnp->data.ptrvalue; if (sfp != NULL && sfp->data.choice == SEQFEAT_PROT && sfp->idx.subtype == FEATDEF_PROT && (prp = sfp->data.value.ptrvalue) != NULL) { str = GetFirstValNodeStringMatch (prp->name, scp); } } break; case CDSGeneProt_field_prot_description: for (vnp = c->prot_list; vnp != NULL && str == NULL; vnp = vnp->next) { sfp = (SeqFeatPtr) vnp->data.ptrvalue; if (sfp != NULL && sfp->data.choice == SEQFEAT_PROT && sfp->idx.subtype == FEATDEF_PROT && (prp = sfp->data.value.ptrvalue) != NULL && !StringHasNoText (prp->desc) && DoesStringMatchConstraint(prp->desc, scp)) { str = StringSave (prp->desc); } } break; case CDSGeneProt_field_prot_ec_number: for (vnp = c->prot_list; vnp != NULL && str == NULL; vnp = vnp->next) { sfp = (SeqFeatPtr) vnp->data.ptrvalue; if (sfp != NULL && sfp->data.choice == SEQFEAT_PROT && sfp->idx.subtype == FEATDEF_PROT && (prp = sfp->data.value.ptrvalue) != NULL) { str = GetFirstValNodeStringMatch (prp->ec, scp); } } break; case CDSGeneProt_field_prot_activity: for (vnp = c->prot_list; vnp != NULL && str == NULL; vnp = vnp->next) { sfp = (SeqFeatPtr) vnp->data.ptrvalue; if (sfp != NULL && sfp->data.choice == SEQFEAT_PROT && sfp->idx.subtype == FEATDEF_PROT && (prp = sfp->data.value.ptrvalue) != NULL) { str = GetFirstValNodeStringMatch (prp->activity, scp); } } break; case CDSGeneProt_field_prot_comment: for (vnp = c->prot_list; vnp != NULL && str == NULL; vnp = vnp->next) { sfp = (SeqFeatPtr) vnp->data.ptrvalue; if (sfp != NULL && sfp->idx.subtype == FEATDEF_PROT && !StringHasNoText (sfp->comment) && DoesStringMatchConstraint(sfp->comment, scp)) { str = StringSave (sfp->comment); } } break; case CDSGeneProt_field_mat_peptide_name: for (vnp = c->prot_list; vnp != NULL && str == NULL; vnp = vnp->next) { sfp = (SeqFeatPtr) vnp->data.ptrvalue; if (sfp != NULL && sfp->data.choice == SEQFEAT_PROT && sfp->idx.subtype == FEATDEF_mat_peptide_aa && (prp = sfp->data.value.ptrvalue) != NULL) { str = GetFirstValNodeStringMatch (prp->name, scp); } } break; case CDSGeneProt_field_mat_peptide_description: for (vnp = c->prot_list; vnp != NULL && str == NULL; vnp = vnp->next) { sfp = (SeqFeatPtr) vnp->data.ptrvalue; if (sfp != NULL && sfp->data.choice == SEQFEAT_PROT && sfp->idx.subtype == FEATDEF_mat_peptide_aa && (prp = sfp->data.value.ptrvalue) != NULL && !StringHasNoText (prp->desc) && DoesStringMatchConstraint(prp->desc, scp)) { str = StringSave (prp->desc); } } break; case CDSGeneProt_field_mat_peptide_ec_number: for (vnp = c->prot_list; vnp != NULL && str == NULL; vnp = vnp->next) { sfp = (SeqFeatPtr) vnp->data.ptrvalue; if (sfp != NULL && sfp->data.choice == SEQFEAT_PROT && sfp->idx.subtype == FEATDEF_mat_peptide_aa && (prp = sfp->data.value.ptrvalue) != NULL) { str = GetFirstValNodeStringMatch (prp->ec, scp); } } break; case CDSGeneProt_field_mat_peptide_activity: for (vnp = c->prot_list; vnp != NULL && str == NULL; vnp = vnp->next) { sfp = (SeqFeatPtr) vnp->data.ptrvalue; if (sfp != NULL && sfp->data.choice == SEQFEAT_PROT && sfp->idx.subtype == FEATDEF_mat_peptide_aa && (prp = sfp->data.value.ptrvalue) != NULL) { str = GetFirstValNodeStringMatch (prp->activity, scp); } } break; case CDSGeneProt_field_mat_peptide_comment: for (vnp = c->prot_list; vnp != NULL && str == NULL; vnp = vnp->next) { sfp = (SeqFeatPtr) vnp->data.ptrvalue; if (sfp != NULL && sfp->idx.subtype == FEATDEF_mat_peptide_aa && !StringHasNoText (sfp->comment) && DoesStringMatchConstraint(sfp->comment, scp)) { str = StringSave (sfp->comment); } } break; } return str; } static Boolean RemoveFieldValueFromCGPSet (CGPSetPtr c, Uint2 field, StringConstraintPtr scp) { Boolean rval = FALSE; ValNodePtr vnp; SeqFeatPtr sfp; GeneRefPtr grp; RnaRefPtr rrp; ProtRefPtr prp; FeatureFieldPtr ffield; if (c == NULL) return FALSE; switch (field) { case CDSGeneProt_field_cds_comment: case CDSGeneProt_field_cds_inference: case CDSGeneProt_field_codon_start: ffield = FeatureFieldFromCDSGeneProtField (field); for (vnp = c->cds_list; vnp != NULL; vnp = vnp->next) { sfp = (SeqFeatPtr) vnp->data.ptrvalue; rval |= RemoveQualFromFeature (sfp, ffield, scp); } ffield = FeatureFieldFree (ffield); break; case CDSGeneProt_field_gene_locus: case CDSGeneProt_field_gene_inference: ffield = FeatureFieldFromCDSGeneProtField (field); for (vnp = c->gene_list; vnp != NULL; vnp = vnp->next) { sfp = (SeqFeatPtr) vnp->data.ptrvalue; rval |= RemoveQualFromFeature (sfp, ffield, scp); } ffield = FeatureFieldFree (ffield); break; case CDSGeneProt_field_gene_description: for (vnp = c->gene_list; vnp != NULL; vnp = vnp->next) { sfp = (SeqFeatPtr) vnp->data.ptrvalue; if (sfp != NULL && sfp->data.choice == SEQFEAT_GENE && (grp = sfp->data.value.ptrvalue) != NULL && !StringHasNoText (grp->desc) && DoesStringMatchConstraint(grp->desc, scp)) { grp->desc = MemFree(grp->desc); rval = TRUE; } } break; case CDSGeneProt_field_gene_comment: for (vnp = c->gene_list; vnp != NULL; vnp = vnp->next) { sfp = (SeqFeatPtr) vnp->data.ptrvalue; if (sfp != NULL && !StringHasNoText (sfp->comment) && DoesStringMatchConstraint(sfp->comment, scp)) { sfp->comment = MemFree (sfp->comment); rval = TRUE; } } break; case CDSGeneProt_field_gene_allele: for (vnp = c->gene_list; vnp != NULL; vnp = vnp->next) { sfp = (SeqFeatPtr) vnp->data.ptrvalue; if (sfp != NULL && sfp->data.choice == SEQFEAT_GENE && (grp = sfp->data.value.ptrvalue) != NULL && !StringHasNoText (grp->allele) && DoesStringMatchConstraint(grp->allele, scp)) { grp->allele = MemFree (grp->allele); rval = TRUE; } } break; case CDSGeneProt_field_gene_maploc: for (vnp = c->gene_list; vnp != NULL; vnp = vnp->next) { sfp = (SeqFeatPtr) vnp->data.ptrvalue; if (sfp != NULL && sfp->data.choice == SEQFEAT_GENE && (grp = sfp->data.value.ptrvalue) != NULL && !StringHasNoText (grp->maploc) && DoesStringMatchConstraint(grp->maploc, scp)) { grp->maploc = MemFree (grp->maploc); rval = TRUE; } } break; case CDSGeneProt_field_gene_locus_tag: for (vnp = c->gene_list; vnp != NULL; vnp = vnp->next) { sfp = (SeqFeatPtr) vnp->data.ptrvalue; if (sfp != NULL && sfp->data.choice == SEQFEAT_GENE && (grp = sfp->data.value.ptrvalue) != NULL && !StringHasNoText (grp->locus_tag) && DoesStringMatchConstraint(grp->locus_tag, scp)) { grp->locus_tag = MemFree (grp->locus_tag); rval = TRUE; } } break; case CDSGeneProt_field_gene_synonym: for (vnp = c->gene_list; vnp != NULL; vnp = vnp->next) { sfp = (SeqFeatPtr) vnp->data.ptrvalue; if (sfp != NULL && sfp->data.choice == SEQFEAT_GENE && (grp = sfp->data.value.ptrvalue) != NULL) { rval |= RemoveValNodeStringMatch (&(grp->syn), scp); } } break; case CDSGeneProt_field_gene_old_locus_tag: for (vnp = c->gene_list; vnp != NULL; vnp = vnp->next) { sfp = (SeqFeatPtr) vnp->data.ptrvalue; if (sfp != NULL) { rval |= RemoveGBQualMatch (&(sfp->qual), "old_locus_tag", 0, scp); } } break; case CDSGeneProt_field_mrna_product: for (vnp = c->mrna_list; vnp != NULL; vnp = vnp->next) { sfp = (SeqFeatPtr) vnp->data.ptrvalue; if (sfp != NULL && sfp->data.choice == SEQFEAT_RNA && (rrp = sfp->data.value.ptrvalue) != NULL && rrp->ext.choice == 1 && !StringHasNoText (rrp->ext.value.ptrvalue) && DoesStringMatchConstraint(rrp->ext.value.ptrvalue, scp)) { rrp->ext.value.ptrvalue = MemFree (rrp->ext.value.ptrvalue); rrp->ext.choice = 0; rval = TRUE; } } break; case CDSGeneProt_field_mrna_comment: for (vnp = c->mrna_list; vnp != NULL; vnp = vnp->next) { sfp = (SeqFeatPtr) vnp->data.ptrvalue; if (sfp != NULL && !StringHasNoText (sfp->comment) && DoesStringMatchConstraint(sfp->comment, scp)) { sfp->comment = MemFree (sfp->comment); rval = TRUE; } } break; case CDSGeneProt_field_prot_name: for (vnp = c->prot_list; vnp != NULL; vnp = vnp->next) { sfp = (SeqFeatPtr) vnp->data.ptrvalue; if (sfp != NULL && sfp->data.choice == SEQFEAT_PROT && sfp->idx.subtype == FEATDEF_PROT && (prp = sfp->data.value.ptrvalue) != NULL) { rval |= RemoveValNodeStringMatch (&(prp->name), scp); } } break; case CDSGeneProt_field_prot_description: for (vnp = c->prot_list; vnp != NULL; vnp = vnp->next) { sfp = (SeqFeatPtr) vnp->data.ptrvalue; if (sfp != NULL && sfp->data.choice == SEQFEAT_PROT && sfp->idx.subtype == FEATDEF_PROT && (prp = sfp->data.value.ptrvalue) != NULL && !StringHasNoText (prp->desc) && DoesStringMatchConstraint(prp->desc, scp)) { prp->desc = MemFree (prp->desc); rval = TRUE; } } break; case CDSGeneProt_field_prot_ec_number: for (vnp = c->prot_list; vnp != NULL; vnp = vnp->next) { sfp = (SeqFeatPtr) vnp->data.ptrvalue; if (sfp != NULL && sfp->data.choice == SEQFEAT_PROT && sfp->idx.subtype == FEATDEF_PROT && (prp = sfp->data.value.ptrvalue) != NULL) { rval |= RemoveValNodeStringMatch (&(prp->ec), scp); } } break; case CDSGeneProt_field_prot_activity: for (vnp = c->prot_list; vnp != NULL; vnp = vnp->next) { sfp = (SeqFeatPtr) vnp->data.ptrvalue; if (sfp != NULL && sfp->data.choice == SEQFEAT_PROT && sfp->idx.subtype == FEATDEF_PROT && (prp = sfp->data.value.ptrvalue) != NULL) { rval |= RemoveValNodeStringMatch (&(prp->activity), scp); } } break; case CDSGeneProt_field_prot_comment: for (vnp = c->prot_list; vnp != NULL; vnp = vnp->next) { sfp = (SeqFeatPtr) vnp->data.ptrvalue; if (sfp != NULL && sfp->idx.subtype == FEATDEF_PROT && !StringHasNoText (sfp->comment) && DoesStringMatchConstraint(sfp->comment, scp)) { sfp->comment = MemFree (sfp->comment); rval = TRUE; } } break; case CDSGeneProt_field_mat_peptide_name: for (vnp = c->prot_list; vnp != NULL; vnp = vnp->next) { sfp = (SeqFeatPtr) vnp->data.ptrvalue; if (sfp != NULL && sfp->data.choice == SEQFEAT_PROT && sfp->idx.subtype == FEATDEF_mat_peptide_aa && (prp = sfp->data.value.ptrvalue) != NULL) { rval |= RemoveValNodeStringMatch (&(prp->name), scp); } } break; case CDSGeneProt_field_mat_peptide_description: for (vnp = c->prot_list; vnp != NULL; vnp = vnp->next) { sfp = (SeqFeatPtr) vnp->data.ptrvalue; if (sfp != NULL && sfp->data.choice == SEQFEAT_PROT && sfp->idx.subtype == FEATDEF_mat_peptide_aa && (prp = sfp->data.value.ptrvalue) != NULL && !StringHasNoText (prp->desc) && DoesStringMatchConstraint(prp->desc, scp)) { prp->desc = MemFree (prp->desc); rval = TRUE; } } break; case CDSGeneProt_field_mat_peptide_ec_number: for (vnp = c->prot_list; vnp != NULL; vnp = vnp->next) { sfp = (SeqFeatPtr) vnp->data.ptrvalue; if (sfp != NULL && sfp->data.choice == SEQFEAT_PROT && sfp->idx.subtype == FEATDEF_mat_peptide_aa && (prp = sfp->data.value.ptrvalue) != NULL) { rval |= RemoveValNodeStringMatch (&(prp->ec), scp); } } break; case CDSGeneProt_field_mat_peptide_activity: for (vnp = c->prot_list; vnp != NULL; vnp = vnp->next) { sfp = (SeqFeatPtr) vnp->data.ptrvalue; if (sfp != NULL && sfp->data.choice == SEQFEAT_PROT && sfp->idx.subtype == FEATDEF_mat_peptide_aa && (prp = sfp->data.value.ptrvalue) != NULL) { rval |= RemoveValNodeStringMatch (&(prp->activity), scp); } } break; case CDSGeneProt_field_mat_peptide_comment: for (vnp = c->prot_list; vnp != NULL; vnp = vnp->next) { sfp = (SeqFeatPtr) vnp->data.ptrvalue; if (sfp != NULL && sfp->idx.subtype == FEATDEF_mat_peptide_aa && !StringHasNoText (sfp->comment) && DoesStringMatchConstraint(sfp->comment, scp)) { sfp->comment = MemFree (sfp->comment); rval = TRUE; } } break; } return rval; } static SeqFeatPtr CreateGeneForCGPSet (CGPSetPtr c) { SeqFeatPtr gene = NULL, sfp = NULL; ValNodePtr vnp; if (c == NULL) return NULL; for (vnp = c->cds_list; vnp != NULL && sfp == NULL; vnp = vnp->next) { sfp = vnp->data.ptrvalue; } for (vnp = c->mrna_list; vnp != NULL && sfp == NULL; vnp = vnp->next) { sfp = vnp->data.ptrvalue; } gene = CreateGeneForFeature (sfp); return gene; } static Boolean SetFieldValueInCGPSet (CGPSetPtr c, Uint2 field, StringConstraintPtr scp, CharPtr value, Uint2 existing_text) { Boolean rval = FALSE; ValNodePtr vnp; SeqFeatPtr sfp; GeneRefPtr grp; ProtRefPtr prp; FeatureFieldPtr ffield; if (c == NULL) return FALSE; switch (field) { case CDSGeneProt_field_cds_comment: case CDSGeneProt_field_cds_inference: case CDSGeneProt_field_codon_start: ffield = FeatureFieldFromCDSGeneProtField (field); for (vnp = c->cds_list; vnp != NULL; vnp = vnp->next) { sfp = (SeqFeatPtr) vnp->data.ptrvalue; rval |= SetQualOnFeature (sfp, ffield, scp, value, existing_text); } ffield = FeatureFieldFree (ffield); break; case CDSGeneProt_field_gene_locus: if (c->gene_list == NULL && scp == NULL) { sfp = CreateGeneForCGPSet (c); if (sfp != NULL) { ValNodeAddPointer (&(c->gene_list), 0, sfp); } } for (vnp = c->gene_list; vnp != NULL; vnp = vnp->next) { sfp = (SeqFeatPtr) vnp->data.ptrvalue; if (sfp != NULL && sfp->data.choice == SEQFEAT_GENE && (grp = sfp->data.value.ptrvalue) != NULL && DoesStringMatchConstraint(grp->locus, scp)) { rval |= SetStringValue ( &(grp->locus), value, existing_text); } } break; case CDSGeneProt_field_gene_description: case CDSGeneProt_field_gene_inference: ffield = FeatureFieldFromCDSGeneProtField (field); for (vnp = c->gene_list; vnp != NULL; vnp = vnp->next) { sfp = (SeqFeatPtr) vnp->data.ptrvalue; rval |= SetQualOnFeature (sfp, ffield, scp, value, existing_text); } ffield = FeatureFieldFree (ffield); break; case CDSGeneProt_field_gene_comment: for (vnp = c->gene_list; vnp != NULL; vnp = vnp->next) { sfp = (SeqFeatPtr) vnp->data.ptrvalue; if (sfp != NULL && DoesStringMatchConstraint(sfp->comment, scp)) { rval |= SetStringValue ( &(sfp->comment), value, existing_text); } } break; case CDSGeneProt_field_gene_allele: for (vnp = c->gene_list; vnp != NULL; vnp = vnp->next) { sfp = (SeqFeatPtr) vnp->data.ptrvalue; if (sfp != NULL && sfp->data.choice == SEQFEAT_GENE && (grp = sfp->data.value.ptrvalue) != NULL && DoesStringMatchConstraint(grp->allele, scp)) { rval |= SetStringValue (&(grp->allele), value, existing_text); } } break; case CDSGeneProt_field_gene_maploc: for (vnp = c->gene_list; vnp != NULL; vnp = vnp->next) { sfp = (SeqFeatPtr) vnp->data.ptrvalue; if (sfp != NULL && sfp->data.choice == SEQFEAT_GENE && (grp = sfp->data.value.ptrvalue) != NULL && DoesStringMatchConstraint(grp->maploc, scp)) { rval |= SetStringValue ( &(grp->maploc), value, existing_text); } } break; case CDSGeneProt_field_gene_locus_tag: for (vnp = c->gene_list; vnp != NULL; vnp = vnp->next) { sfp = (SeqFeatPtr) vnp->data.ptrvalue; if (sfp != NULL && sfp->data.choice == SEQFEAT_GENE && (grp = sfp->data.value.ptrvalue) != NULL && DoesStringMatchConstraint(grp->locus_tag, scp)) { rval |= SetStringValue ( &(grp->locus_tag), value, existing_text); } } break; case CDSGeneProt_field_gene_synonym: for (vnp = c->gene_list; vnp != NULL; vnp = vnp->next) { sfp = (SeqFeatPtr) vnp->data.ptrvalue; if (sfp != NULL && sfp->data.choice == SEQFEAT_GENE && (grp = sfp->data.value.ptrvalue) != NULL) { rval |= SetStringsInValNodeStringList (&(grp->syn), scp, value, existing_text); } } break; case CDSGeneProt_field_gene_old_locus_tag: ffield = FeatureFieldFromCDSGeneProtField (field); for (vnp = c->gene_list; vnp != NULL; vnp = vnp->next) { sfp = (SeqFeatPtr) vnp->data.ptrvalue; if (sfp != NULL) { rval |= SetStringInGBQualList (&(sfp->qual), ffield->field, scp, value, existing_text); } } ffield = FeatureFieldFree (ffield); break; case CDSGeneProt_field_mrna_product: for (vnp = c->mrna_list; vnp != NULL; vnp = vnp->next) { sfp = (SeqFeatPtr) vnp->data.ptrvalue; rval |= SetRNAProductString (sfp, scp, value, existing_text); } break; case CDSGeneProt_field_mrna_comment: for (vnp = c->mrna_list; vnp != NULL; vnp = vnp->next) { sfp = (SeqFeatPtr) vnp->data.ptrvalue; if (sfp != NULL&& DoesStringMatchConstraint(sfp->comment, scp)) { rval |= SetStringValue ( &(sfp->comment), value, existing_text); } } break; case CDSGeneProt_field_prot_name: for (vnp = c->prot_list; vnp != NULL; vnp = vnp->next) { sfp = (SeqFeatPtr) vnp->data.ptrvalue; if (sfp != NULL && sfp->data.choice == SEQFEAT_PROT && sfp->idx.subtype == FEATDEF_PROT && (prp = sfp->data.value.ptrvalue) != NULL) { rval |= SetStringsInValNodeStringList (&(prp->name), scp, value, existing_text); } } break; case CDSGeneProt_field_prot_description: for (vnp = c->prot_list; vnp != NULL; vnp = vnp->next) { sfp = (SeqFeatPtr) vnp->data.ptrvalue; if (sfp != NULL && sfp->data.choice == SEQFEAT_PROT && sfp->idx.subtype == FEATDEF_PROT && (prp = sfp->data.value.ptrvalue) != NULL && DoesStringMatchConstraint(prp->desc, scp)) { rval |= SetStringValue ( &(prp->desc), value, existing_text); } } break; case CDSGeneProt_field_prot_ec_number: for (vnp = c->prot_list; vnp != NULL; vnp = vnp->next) { sfp = (SeqFeatPtr) vnp->data.ptrvalue; if (sfp != NULL && sfp->data.choice == SEQFEAT_PROT && sfp->idx.subtype == FEATDEF_PROT && (prp = sfp->data.value.ptrvalue) != NULL) { rval |= SetStringsInValNodeStringList (&(prp->ec), scp, value, existing_text); } } break; case CDSGeneProt_field_prot_activity: for (vnp = c->prot_list; vnp != NULL; vnp = vnp->next) { sfp = (SeqFeatPtr) vnp->data.ptrvalue; if (sfp != NULL && sfp->data.choice == SEQFEAT_PROT && sfp->idx.subtype == FEATDEF_PROT && (prp = sfp->data.value.ptrvalue) != NULL) { rval |= SetStringsInValNodeStringList (&(prp->activity), scp, value, existing_text); } } break; case CDSGeneProt_field_prot_comment: for (vnp = c->prot_list; vnp != NULL; vnp = vnp->next) { sfp = (SeqFeatPtr) vnp->data.ptrvalue; if (sfp != NULL && sfp->idx.subtype == FEATDEF_PROT && DoesStringMatchConstraint(sfp->comment, scp)) { rval |= SetStringValue ( &(sfp->comment), value, existing_text); } } break; case CDSGeneProt_field_mat_peptide_name: for (vnp = c->prot_list; vnp != NULL; vnp = vnp->next) { sfp = (SeqFeatPtr) vnp->data.ptrvalue; if (sfp != NULL && sfp->data.choice == SEQFEAT_PROT && sfp->idx.subtype == FEATDEF_mat_peptide_aa && (prp = sfp->data.value.ptrvalue) != NULL) { rval |= SetStringsInValNodeStringList (&(prp->name), scp, value, existing_text); } } break; case CDSGeneProt_field_mat_peptide_description: for (vnp = c->prot_list; vnp != NULL; vnp = vnp->next) { sfp = (SeqFeatPtr) vnp->data.ptrvalue; if (sfp != NULL && sfp->data.choice == SEQFEAT_PROT && sfp->idx.subtype == FEATDEF_mat_peptide_aa && (prp = sfp->data.value.ptrvalue) != NULL && DoesStringMatchConstraint(prp->desc, scp)) { rval |= SetStringValue ( &(prp->desc), value, existing_text); } } break; case CDSGeneProt_field_mat_peptide_ec_number: for (vnp = c->prot_list; vnp != NULL; vnp = vnp->next) { sfp = (SeqFeatPtr) vnp->data.ptrvalue; if (sfp != NULL && sfp->data.choice == SEQFEAT_PROT && sfp->idx.subtype == FEATDEF_mat_peptide_aa && (prp = sfp->data.value.ptrvalue) != NULL) { rval |= SetStringsInValNodeStringList (&(prp->ec), scp, value, existing_text); } } break; case CDSGeneProt_field_mat_peptide_activity: for (vnp = c->prot_list; vnp != NULL; vnp = vnp->next) { sfp = (SeqFeatPtr) vnp->data.ptrvalue; if (sfp != NULL && sfp->data.choice == SEQFEAT_PROT && sfp->idx.subtype == FEATDEF_mat_peptide_aa && (prp = sfp->data.value.ptrvalue) != NULL) { rval |= SetStringsInValNodeStringList (&(prp->activity), scp, value, existing_text); } } break; case CDSGeneProt_field_mat_peptide_comment: for (vnp = c->prot_list; vnp != NULL; vnp = vnp->next) { sfp = (SeqFeatPtr) vnp->data.ptrvalue; if (sfp != NULL && sfp->idx.subtype == FEATDEF_mat_peptide_aa && DoesStringMatchConstraint(sfp->comment, scp)) { rval |= SetStringValue ( &(sfp->comment), value, existing_text); } } break; } return rval; } static Boolean SortFieldInCGPSet (CGPSetPtr c, Uint2 field, Uint2 order) { ValNodePtr vnp; SeqFeatPtr sfp; Boolean rval = FALSE; if (c == NULL) { return FALSE; } if (field == CDSGeneProt_field_prot_name) { for (vnp = c->prot_list; vnp != NULL; vnp = vnp->next) { sfp = (SeqFeatPtr) vnp->data.ptrvalue; if (sfp != NULL && sfp->idx.subtype == FEATDEF_PROT) { rval |= SortProtNames (sfp, order); } } } return rval; } static MolInfoPtr GetMolInfoForBioseq (BioseqPtr bsp) { MolInfoPtr m = NULL; SeqDescrPtr sdp; if (bsp == NULL) return NULL; sdp = bsp->descr; while (sdp != NULL && sdp->choice != Seq_descr_molinfo) { sdp = sdp->next; } if (sdp != NULL) { m = (MolInfoPtr) sdp->data.ptrvalue; } return m; } static CharPtr GetSequenceQualFromBioseq (BioseqPtr bsp, ValNodePtr field) { CharPtr rval = NULL; MolInfoPtr m; if (bsp == NULL || field == NULL) return NULL; switch (field->choice) { case MolinfoField_molecule: m = GetMolInfoForBioseq (bsp); if (m != NULL) { rval = BiomolNameFromBiomol (m->biomol); } break; case MolinfoField_technique: m = GetMolInfoForBioseq (bsp); if (m != NULL) { rval = TechNameFromTech (m->tech); } break; case MolinfoField_completedness: m = GetMolInfoForBioseq (bsp); if (m != NULL) { rval = CompletenessNameFromCompleteness (m->completeness); } break; case MolinfoField_mol_class: rval = MolNameFromMol (bsp->mol); break; case MolinfoField_topology: rval = TopologyNameFromTopology (bsp->topology); break; case MolinfoField_strand: rval = StrandNameFromStrand (bsp->strand); break; } if (rval != NULL) rval = StringSave (rval); return rval; } static Boolean RemoveSequenceQualFromBioseq (BioseqPtr bsp, ValNodePtr field) { MolInfoPtr m; Boolean rval = FALSE; if (bsp == NULL || field == NULL) return FALSE; switch (field->choice) { case MolinfoField_molecule: m = GetMolInfoForBioseq (bsp); if (m != NULL) { m->biomol = 0; rval = TRUE; } break; case MolinfoField_technique: m = GetMolInfoForBioseq (bsp); if (m != NULL) { m->tech = 0; rval = TRUE; } break; case MolinfoField_completedness: m = GetMolInfoForBioseq (bsp); if (m != NULL) { m->completeness = 0; rval = TRUE; } break; case MolinfoField_mol_class: bsp->mol = 0; rval = TRUE; break; case MolinfoField_topology: bsp->topology = 0; rval = TRUE; break; case MolinfoField_strand: bsp->strand = 0; rval = TRUE; break; } return rval; } static MolInfoPtr AddMolInfoToBioseq (BioseqPtr bsp) { SeqDescrPtr sdp; MolInfoPtr m; sdp = CreateNewDescriptorOnBioseq (bsp, Seq_descr_molinfo); m = MolInfoNew (); sdp->data.ptrvalue = m; return m; } static Boolean SetSequenceQualOnBioseq (BioseqPtr bsp, ValNodePtr field) { MolInfoPtr m = NULL; Boolean rval = FALSE; Int4 new_val; if (bsp == NULL || field == NULL) return FALSE; switch (field->choice) { case MolinfoField_molecule: if (m == NULL) { m = GetMolInfoForBioseq (bsp); if (m == NULL) { m = AddMolInfoToBioseq (bsp); rval = TRUE; } } new_val = BiomolFromMoleculeType (field->data.intvalue); if (m->biomol != new_val) { m->biomol = new_val; rval = TRUE; } break; case MolinfoField_technique: if (m == NULL) { m = GetMolInfoForBioseq (bsp); if (m == NULL) { m = AddMolInfoToBioseq (bsp); } } new_val = TechFromTechniqueType (field->data.intvalue); if (m->tech != new_val) { m->tech = new_val; rval = TRUE; } break; case MolinfoField_completedness: if (m == NULL) { m = GetMolInfoForBioseq (bsp); if (m == NULL) { m = AddMolInfoToBioseq (bsp); } } new_val = CompletenessFromCompletednessType (field->data.intvalue); if (m->completeness != new_val) { m->completeness = new_val; rval = TRUE; } break; case MolinfoField_mol_class: new_val = MolFromMoleculeClassType (field->data.intvalue); if (bsp->mol != new_val) { bsp->mol = new_val; rval = TRUE; } break; case MolinfoField_topology: new_val = TopologyFromTopologyType (field->data.intvalue); if (bsp->topology != new_val) { bsp->topology = new_val; rval = TRUE; } break; case MolinfoField_strand: new_val = StrandFromStrandType (field->data.intvalue); if (bsp->strand != new_val) { bsp->strand = new_val; rval = TRUE; } break; } return rval; } static CharPtr GetGenomeProjectIdFromBioseq (BioseqPtr bsp, StringConstraintPtr scp) { SeqDescrPtr sdp; SeqMgrDescContext context; Char buf[50]; UserObjectPtr uop; UserFieldPtr ufp; sdp = SeqMgrGetNextDescriptor (bsp, NULL, Seq_descr_user, &context); while (sdp != NULL) { uop = (UserObjectPtr) sdp->data.ptrvalue; if (uop != NULL && uop->type != NULL && StringCmp (uop->type->str, "GenomeProjectsDB") == 0) { ufp = uop->data; while (ufp != NULL) { if (ufp->label != NULL && StringCmp (ufp->label->str, "ProjectID") == 0 && ufp->choice == 2) { sprintf (buf, "%d", ufp->data.intvalue); if (IsStringConstraintEmpty (scp) || DoesStringMatchConstraint (buf, scp)) { return StringSave (buf); } } ufp = ufp->next; } } sdp = SeqMgrGetNextDescriptor (bsp, sdp, Seq_descr_user, &context); } return NULL; } static Boolean RemoveGenomeProjectIdFromBioseq (BioseqPtr bsp, StringConstraintPtr scp) { SeqDescrPtr sdp; SeqMgrDescContext context; Char buf[50]; UserObjectPtr uop; UserFieldPtr ufp; ObjValNodePtr ovn; Boolean rval = FALSE; sdp = SeqMgrGetNextDescriptor (bsp, NULL, Seq_descr_user, &context); while (sdp != NULL) { uop = (UserObjectPtr) sdp->data.ptrvalue; if (uop != NULL && uop->type != NULL && StringCmp (uop->type->str, "GenomeProjectsDB") == 0) { ufp = uop->data; while (ufp != NULL) { if (ufp->label != NULL && StringCmp (ufp->label->str, "ProjectID") == 0 && ufp->choice == 2) { sprintf (buf, "%d", ufp->data.intvalue); if (IsStringConstraintEmpty (scp) || DoesStringMatchConstraint (buf, scp)) { if (sdp->extended != 0) { ovn = (ObjValNodePtr) sdp; ovn->idx.deleteme = TRUE; rval = TRUE; } } } ufp = ufp->next; } } sdp = SeqMgrGetNextDescriptor (bsp, sdp, Seq_descr_user, &context); } return rval; } static Boolean SetGenomeProjectIdOnBioseq (BioseqPtr bsp, StringConstraintPtr scp, CharPtr value, Uint2 existing_text) { SeqDescrPtr sdp; SeqMgrDescContext context; Char buf[50]; CharPtr tmp; UserObjectPtr uop; UserFieldPtr ufp; Boolean rval = FALSE; if (bsp == NULL || !StringIsAllDigits (value)) { return FALSE; } sdp = SeqMgrGetNextDescriptor (bsp, NULL, Seq_descr_user, &context); while (sdp != NULL) { uop = (UserObjectPtr) sdp->data.ptrvalue; if (uop != NULL && uop->type != NULL && StringCmp (uop->type->str, "GenomeProjectsDB") == 0) { ufp = uop->data; while (ufp != NULL) { if (ufp->label != NULL && StringCmp (ufp->label->str, "ProjectID") == 0 && ufp->choice == 2) { sprintf (buf, "%d", ufp->data.intvalue); if (IsStringConstraintEmpty (scp) || DoesStringMatchConstraint (buf, scp)) { tmp = StringSave (buf); if (SetStringValue (&tmp, value, existing_text) && StringIsAllDigits (tmp)) { ufp->data.intvalue = atoi (tmp); rval = TRUE; } tmp = MemFree (tmp); } } ufp = ufp->next; } } sdp = SeqMgrGetNextDescriptor (bsp, sdp, Seq_descr_user, &context); } if (!rval && IsStringConstraintEmpty (scp)) { sdp = CreateNewDescriptorOnBioseq (bsp, Seq_descr_user); uop = CreateGenomeProjectsDBUserObject (); AddIDsToGenomeProjectsDBUserObject (uop, atoi (value), 0); sdp->data.ptrvalue = uop; rval = TRUE; } return rval; } NLM_EXTERN CharPtr GetBioProjectIdFromBioseq (BioseqPtr bsp, StringConstraintPtr scp) { SeqDescrPtr sdp; SeqMgrDescContext context; CharPtr val; UserObjectPtr uop; UserFieldPtr ufp; CharPtr PNTR cpp; Int4 i; sdp = SeqMgrGetNextDescriptor (bsp, NULL, Seq_descr_user, &context); while (sdp != NULL) { uop = (UserObjectPtr) sdp->data.ptrvalue; if (uop != NULL && uop->type != NULL && StringCmp (uop->type->str, "DBLink") == 0) { ufp = uop->data; while (ufp != NULL) { if (ufp->label != NULL && StringCmp (ufp->label->str, "BioProject") == 0) { if (ufp->choice == 1) { val = ufp->data.ptrvalue; if (IsStringConstraintEmpty (scp) || DoesStringMatchConstraint (val, scp)) { return StringSave (val); } } else if (ufp->choice == 7 && ufp->num > 0 && (cpp = (CharPtr PNTR) ufp->data.ptrvalue) != NULL) { for (i = 0; i < ufp->num; i++) { if (IsStringConstraintEmpty (scp) || DoesStringMatchConstraint (cpp[i], scp)) { return StringSave (cpp[i]); } } } } ufp = ufp->next; } } sdp = SeqMgrGetNextDescriptor (bsp, sdp, Seq_descr_user, &context); } return NULL; } static Boolean SetTextDescriptor (SeqDescrPtr sdp, StringConstraintPtr scp, CharPtr value, Uint2 existing_text) { Boolean rval = FALSE; CharPtr cp; ObjValNodePtr ovp; Boolean was_empty; if (sdp == NULL) { return FALSE; } if (IsStringConstraintEmpty (scp) || DoesStringMatchConstraint (sdp->data.ptrvalue, scp)) { if (StringHasNoText (sdp->data.ptrvalue)) { was_empty = TRUE; } else { was_empty = FALSE; } cp = sdp->data.ptrvalue; if (SetStringValue (&cp, value, existing_text)) { rval = TRUE; } sdp->data.ptrvalue = cp; if (was_empty) { ovp = (ObjValNodePtr) sdp; ovp->idx.deleteme = FALSE; } } return rval; } static CharPtr s_StringEndsWith (CharPtr str, CharPtr end) { Int4 str_len, end_len; if (end == NULL || str == NULL) { return NULL; } str_len = StringLen (str); end_len = StringLen (end); if (end_len > str_len) { return NULL; } if (StringCmp (str + str_len - end_len, end) == 0) { return str + str_len - end_len; } else { return NULL; } } static CharPtr DbnameValFromPrefixOrSuffix (CharPtr val) { CharPtr rval = NULL, stop; if (val == NULL) { return NULL; } if (StringNCmp (val, "##", 2) == 0) { val += 2; } rval = StringSave (val); if ((stop = s_StringEndsWith (rval, "-START##")) != NULL || (stop = s_StringEndsWith (rval, "-START##")) != NULL || (stop = s_StringEndsWith (rval, "START##")) != NULL || (stop = s_StringEndsWith (rval, "-END##")) != NULL || (stop = s_StringEndsWith (rval, "END##")) != NULL) { *stop = 0; } return rval; } NLM_EXTERN Boolean IsUserFieldStructuredCommentPrefixOrSuffix (UserFieldPtr ufp) { if (ufp == NULL || ufp->label == NULL) { return FALSE; } else if (StringCmp (ufp->label->str, "StructuredCommentPrefix") == 0 || StringCmp (ufp->label->str, "StructuredCommentSuffix") == 0) { return TRUE; } else { return FALSE; } } NLM_EXTERN CharPtr GetStructuredCommentFieldFromUserObject (UserObjectPtr uop, StructuredCommentFieldPtr field, StringConstraintPtr scp) { UserFieldPtr curr; CharPtr rval = NULL; if (!IsUserObjectStructuredComment(uop) || field == NULL) { return NULL; } if (field->choice == StructuredCommentField_database) { for (curr = uop->data; curr != NULL && rval == NULL; curr = curr->next) { if (IsUserFieldStructuredCommentPrefixOrSuffix(curr) && curr->choice == 1) { rval = DbnameValFromPrefixOrSuffix (curr->data.ptrvalue); if (!IsStringConstraintEmpty (scp) && !DoesStringMatchConstraint (rval, scp)) { rval = MemFree (rval); } } } } else if (field->choice == StructuredCommentField_named) { for (curr = uop->data; curr != NULL && rval == NULL; curr = curr->next) { if (curr->label != NULL && StringICmp (curr->label->str, field->data.ptrvalue) == 0) { if (curr->choice == 1) { if (IsStringConstraintEmpty (scp) || DoesStringMatchConstraint (curr->data.ptrvalue, scp)) { rval = StringSave (curr->data.ptrvalue); } } } } } else if (field->choice == StructuredCommentField_field_name) { for (curr = uop->data; curr != NULL && rval == NULL; curr = curr->next) { if (!IsUserFieldStructuredCommentPrefixOrSuffix (curr) && DoesObjectIdMatchStringConstraint(curr->label, scp)) { rval = GetObjectIdString (curr->label); } } } return rval; } static Boolean RemoveStructuredCommentFieldFromUserObject (UserObjectPtr uop, ValNodePtr field, StringConstraintPtr scp) { UserFieldPtr curr, prev = NULL, ufp_next; Boolean rval = FALSE, do_remove; CharPtr val; if (!IsUserObjectStructuredComment(uop) || field == NULL) { return FALSE; } if (field->choice == StructuredCommentField_database) { for (curr = uop->data; curr != NULL; curr = ufp_next) { do_remove = FALSE; ufp_next = curr->next; if (IsUserFieldStructuredCommentPrefixOrSuffix (curr) && curr->choice == 1) { val = DbnameValFromPrefixOrSuffix (curr->data.ptrvalue); if (IsStringConstraintEmpty (scp) || DoesStringMatchConstraint (val, scp)) { do_remove = TRUE; } val = MemFree (val); } if (do_remove) { if (prev == NULL) { uop->data = curr->next; } else { prev->next = curr->next; } curr->next = NULL; curr = UserFieldFree (curr); rval = TRUE; } else { prev = curr; } } } else if (field->choice == StructuredCommentField_named) { for (curr = uop->data; curr != NULL; curr = ufp_next) { do_remove = FALSE; ufp_next = curr->next; if (curr->label != NULL && StringICmp (curr->label->str, field->data.ptrvalue) == 0) { if (curr->choice == 1) { if (IsStringConstraintEmpty (scp) || DoesStringMatchConstraint (curr->data.ptrvalue, scp)) { do_remove = TRUE; } } } if (do_remove) { if (prev == NULL) { uop->data = curr->next; } else { prev->next = curr->next; } curr->next = NULL; curr = UserFieldFree (curr); rval = TRUE; } else { prev = curr; } } } else if (field->choice == StructuredCommentField_field_name) { for (curr = uop->data; curr != NULL; curr = ufp_next) { do_remove = FALSE; ufp_next = curr->next; if (!IsUserFieldStructuredCommentPrefixOrSuffix (curr) && DoesObjectIdMatchStringConstraint (curr->label, scp)) { if (prev == NULL) { uop->data = curr->next; } else { prev->next = curr->next; } curr->next = NULL; curr = UserFieldFree (curr); rval = TRUE; } else { prev = curr; } } } return rval; } static Boolean SetStructuredCommentFieldOnUserObject (UserObjectPtr uop, StructuredCommentFieldPtr field, StringConstraintPtr scp, CharPtr value, Uint2 existing_text) { UserFieldPtr curr, first = NULL, last = NULL, ufp; Boolean rval = FALSE; CharPtr oldval, newval, fmt; CharPtr prefix_fmt = "##%s-START##"; CharPtr suffix_fmt = "##%s-END##"; if (!IsUserObjectStructuredComment(uop) || field == NULL) { return FALSE; } if (field->choice == StructuredCommentField_database) { first = uop->data; curr = first; while (curr != NULL) { if (IsUserFieldStructuredCommentPrefixOrSuffix (curr) && curr->choice == 1) { oldval = DbnameValFromPrefixOrSuffix (curr->data.ptrvalue); if (IsStringConstraintEmpty (scp) || DoesStringMatchConstraint (oldval, scp)) { if (StringCmp (curr->label->str, "StructuredCommentPrefix") == 0) { fmt = prefix_fmt; } else { fmt = suffix_fmt; } SetStringValue (&oldval, value, existing_text); newval = (CharPtr) MemNew (sizeof (Char) * (StringLen (fmt) + StringLen (oldval))); sprintf (newval, fmt, oldval); curr->data.ptrvalue = MemFree (curr->data.ptrvalue); curr->data.ptrvalue = newval; rval = TRUE; } oldval = MemFree (oldval); } last = curr; curr = curr->next; } if (!rval && IsStringConstraintEmpty (scp)) { /* make prefix */ curr = UserFieldNew (); curr->label = ObjectIdNew (); curr->label->str = StringSave ("StructuredCommentPrefix"); curr->choice = 1; newval = (CharPtr) MemNew (sizeof (Char) * (StringLen (prefix_fmt) + StringLen (value))); sprintf (newval, prefix_fmt, value); curr->data.ptrvalue = newval; curr->next = first; uop->data = curr; first = curr; /* make suffix */ curr = UserFieldNew (); curr->label = ObjectIdNew (); curr->label->str = StringSave ("StructuredCommentSuffix"); curr->choice = 1; newval = (CharPtr) MemNew (sizeof (Char) * (StringLen (suffix_fmt) + StringLen (value))); sprintf (newval, suffix_fmt, value); curr->data.ptrvalue = newval; if (last == NULL) { first->next = curr; } else { last->next = curr; } rval = TRUE; } } else if (field->choice == StructuredCommentField_named) { last = uop->data; for (curr = uop->data; curr != NULL; curr = curr->next) { if (curr->label != NULL && StringICmp (curr->label->str, field->data.ptrvalue) == 0) { if (curr->choice == 1) { if (IsStringConstraintEmpty (scp) || DoesStringMatchConstraint (curr->data.ptrvalue, scp)) { newval = (CharPtr) curr->data.ptrvalue; SetStringValue (&newval, value, existing_text); curr->data.ptrvalue = newval; rval = TRUE; } } } last = curr; } if (!rval && IsStringConstraintEmpty (scp)) { curr = UserFieldNew (); curr->label = ObjectIdNew (); curr->label->str = StringSave (field->data.ptrvalue); curr->choice = 1; curr->data.ptrvalue = StringSave (value); if (last == NULL) { uop->data = curr; } else { last->next = curr; } rval = TRUE; } } else if (field->choice == StructuredCommentField_field_name) { last = uop->data; for (curr = uop->data; curr != NULL; curr = curr->next) { if (!IsUserFieldStructuredCommentPrefixOrSuffix (curr)) { if (DoesObjectIdMatchStringConstraint (curr->label, scp)) { rval = SetObjectIdString (curr->label, value, existing_text); } last = curr; } } if (!rval && IsStringConstraintEmpty (scp)) { curr = UserFieldNew (); curr->label = ObjectIdNew (); curr->label->str = StringSave (value); curr->choice = 1; curr->data.ptrvalue = StringSave (""); if (last == NULL) { ufp = uop->data; if (ufp != NULL) { curr->next = ufp->next; ufp->next = curr; } } else { curr->next = last->next; last->next = curr; } rval = TRUE; } } return rval; } typedef struct dblinkname { Int4 field_type; CharPtr field_name; } DBLinkNameData, PNTR DBLinkNamePtr; static DBLinkNameData dblink_names[] = { { DBLink_field_type_trace_assembly , "Trace Assembly Archive" } , { DBLink_field_type_bio_sample , "BioSample" } , { DBLink_field_type_probe_db , "ProbeDB" } , { DBLink_field_type_sequence_read_archve , "Sequence Read Archive" } , { DBLink_field_type_bio_project , "BioProject" } , { DBLink_field_type_assembly , "Assembly" } }; #define NUM_dblinkname sizeof (dblink_names) / sizeof (DBLinkNameData) NLM_EXTERN CharPtr GetDBLinkNameFromDBLinkFieldType (Int4 field_type) { CharPtr str = NULL; Int4 i; for (i = 0; i < NUM_dblinkname && str == NULL; i++) { if (field_type == dblink_names[i].field_type) { str = dblink_names[i].field_name; } } if (str == NULL) { str = "Unknown field type"; } return str; } NLM_EXTERN Int4 GetDBLinkFieldTypeFromDBLinkName (CharPtr field_name) { Int4 rval = -1; Int4 i; for (i = 0; i < NUM_dblinkname && rval < 0; i++) { if (StringCmp (field_name, dblink_names[i].field_name) == 0) { rval = dblink_names[i].field_type; } } return rval; } NLM_EXTERN Int4 GetNumDBLinkFields (void) { return NUM_dblinkname; } static CharPtr GetDBLinkFieldFromUserObject (UserObjectPtr uop, Int4 field, StringConstraintPtr scp) { UserFieldPtr curr; CharPtr rval = NULL; CharPtr field_name; Char buf[15]; CharPtr PNTR cpp; Int4Ptr ipp; Int4 i; if (!IsUserObjectDBLink(uop) || field < 1) { return NULL; } field_name = GetDBLinkNameFromDBLinkFieldType (field); for (curr = uop->data; curr != NULL && rval == NULL; curr = curr->next) { if (curr->label != NULL && StringCmp (curr->label->str, field_name) == 0) { if (curr->choice == 7) { if (curr->num > 0 && (cpp = (CharPtr PNTR) curr->data.ptrvalue) != NULL) { for (i = 0; i < curr->num && rval == NULL; i++) { if (IsStringConstraintEmpty (scp) || DoesStringMatchConstraint (cpp[i], scp)) { rval = StringSave (cpp[i]); } } } } else if (curr->choice == 8) { if (curr->num > 0 && (ipp = (Int4Ptr) curr->data.ptrvalue) != NULL) { for (i = 0; i < curr->num && rval == NULL; i++) { sprintf (buf, "%d", ipp[i]); if (IsStringConstraintEmpty (scp) || DoesStringMatchConstraint (buf, scp)) { rval = StringSave (buf); } } } } } } return rval; } static ValNodePtr GetMultipleDBLinkFieldValuesFromUserObject (UserObjectPtr uop, Int4 field, StringConstraintPtr scp) { UserFieldPtr curr; ValNodePtr rval = NULL; CharPtr field_name; Char buf[15]; CharPtr PNTR cpp; Int4Ptr ipp; Int4 i; if (!IsUserObjectDBLink(uop) || field < 1) { return NULL; } field_name = GetDBLinkNameFromDBLinkFieldType (field); for (curr = uop->data; curr != NULL; curr = curr->next) { if (curr->label != NULL && StringCmp (curr->label->str, field_name) == 0) { if (curr->choice == 7) { if (curr->num > 0 && (cpp = (CharPtr PNTR) curr->data.ptrvalue) != NULL) { for (i = 0; i < curr->num; i++) { if (IsStringConstraintEmpty (scp) || DoesStringMatchConstraint (cpp[i], scp)) { ValNodeAddPointer (&rval, 0, StringSave (cpp[i])); } } } } else if (curr->choice == 8) { if (curr->num > 0 && (ipp = (Int4Ptr) curr->data.ptrvalue) != NULL) { for (i = 0; i < curr->num; i++) { sprintf (buf, "%d", ipp[i]); if (IsStringConstraintEmpty (scp) || DoesStringMatchConstraint (buf, scp)) { ValNodeAddPointer (&rval, 0, StringSave (buf)); } } } } } } return rval; } static Boolean RemoveDBLinkFieldFromUserObject (UserObjectPtr uop, Int4 field, StringConstraintPtr scp) { UserFieldPtr curr, prev_type = NULL, next_type; Boolean rval = FALSE; Char buf[15]; CharPtr field_name; CharPtr PNTR cpp; Int4Ptr ipp; Int4 i, j; if (!IsUserObjectDBLink(uop) || field < 1) { return FALSE; } field_name = GetDBLinkNameFromDBLinkFieldType (field); for (curr = uop->data; curr != NULL; curr = next_type) { next_type = curr->next; if (curr->label != NULL && StringCmp (curr->label->str, field_name) == 0) { if (curr->choice == 7) { if (curr->num > 0 && (cpp = (CharPtr PNTR) curr->data.ptrvalue) != NULL) { for (i = 0; i < curr->num; i++) { if (IsStringConstraintEmpty (scp) || DoesStringMatchConstraint (cpp[i], scp)) { cpp[i] = MemFree (cpp[i]); for (j = i + 1; j < curr->num; j++) { cpp[j - 1] = cpp[j]; } curr->num--; rval = TRUE; i--; } } } } else if (curr->choice == 8) { if (curr->num > 0 && (ipp = (Int4Ptr) curr->data.ptrvalue) != NULL) { for (i = 0; i < curr->num; i++) { sprintf (buf, "%d", ipp[i]); if (IsStringConstraintEmpty (scp) || DoesStringMatchConstraint (buf, scp)) { for (j = i + 1; j < curr->num; j++) { ipp[j - 1] = ipp[j]; } curr->num--; rval = TRUE; i--; } } } } } if (curr->num == 0) { if (prev_type == NULL) { uop->data = next_type; } else { prev_type->next = next_type; } curr->next = NULL; curr = UserFieldFree (curr); } else { prev_type = curr; } } return rval; } static Boolean SetDBLinkFieldOnUserObject (UserObjectPtr uop, Int4 field, StringConstraintPtr scp, CharPtr value, Uint2 existing_text) { UserFieldPtr curr, last = NULL; Boolean rval = FALSE; CharPtr newval; CharPtr field_name; CharPtr PNTR cpp = NULL; CharPtr PNTR new_cpp; Int4Ptr ipp = NULL, new_ipp; Int4 i; Char buf[15]; if (!IsUserObjectDBLink(uop) || field < 1) { return FALSE; } field_name = GetDBLinkNameFromDBLinkFieldType (field); for (curr = uop->data; curr != NULL; curr = curr->next) { if (curr->label != NULL && StringCmp (curr->label->str, field_name) == 0) { if (curr->choice == 7) { if (curr->num > 0 && (cpp = (CharPtr PNTR) curr->data.ptrvalue) != NULL && existing_text != ExistingTextOption_add_qual) { for (i = 0; i < curr->num; i++) { if (IsStringConstraintEmpty (scp) || DoesStringMatchConstraint (cpp[i], scp)) { newval = cpp[i]; SetStringValue (&newval, value, existing_text); cpp[i] = newval; rval = TRUE; } } } if (!rval && IsStringConstraintEmpty (scp)) { new_cpp = (CharPtr PNTR) MemNew (sizeof (CharPtr) * (curr->num + 1)); if (cpp != NULL) { for (i = 0; i < curr->num; i++) { new_cpp[i] = cpp[i]; cpp[i] = NULL; } new_cpp[i] = StringSave (value); } cpp = MemFree (cpp); curr->data.ptrvalue = new_cpp; curr->num++; rval = TRUE; } } else if (curr->choice == 8 && StringIsAllDigits (value)) { if (curr->num > 0 && (ipp = (Int4Ptr) curr->data.ptrvalue) != NULL && existing_text != ExistingTextOption_add_qual) { for (i = 0; i < curr->num; i++) { sprintf (buf, "%d", ipp[i]); if (IsStringConstraintEmpty (scp) || DoesStringMatchConstraint (buf, scp)) { newval = StringSave (buf); SetStringValue (&newval, value, existing_text); if (StringIsAllDigits (newval)) { ipp[i] = atoi (newval); rval = TRUE; } newval = MemFree (newval); } } } if (!rval && IsStringConstraintEmpty (scp)) { new_ipp = (Int4Ptr) MemNew (sizeof (Int4) * (curr->num + 1)); if (ipp != NULL) { for (i = 0; i < curr->num; i++) { new_ipp[i] = ipp[i]; } new_ipp[i] = atoi (value); } ipp = MemFree (ipp); curr->data.ptrvalue = new_ipp; curr->num++; rval = TRUE; } } } last = curr; } if (!rval && IsStringConstraintEmpty (scp) && (field != DBLink_field_type_trace_assembly || StringIsAllDigits (value))) { curr = UserFieldNew (); curr->label = ObjectIdNew (); curr->label->str = StringSave (field_name); if (field == DBLink_field_type_trace_assembly) { curr->choice = 8; curr->num = 1; ipp = (Int4Ptr) MemNew (sizeof (Int4) * curr->num); ipp[0] = atoi (value); curr->data.ptrvalue = ipp; } else { curr->choice = 7; curr->num = 1; cpp = (CharPtr PNTR) MemNew (sizeof (CharPtr) * curr->num); cpp[0] = StringSave (value); curr->data.ptrvalue = cpp; } if (last == NULL) { uop->data = curr; } else { last->next = curr; } rval = TRUE; } return rval; } /* The following functions are used for getting and setting various types of data * in publications. */ static CharPtr legalMonths [] = { "Jan", "Feb", "Mar", "Apr", "May", "Jun", "Jul", "Aug", "Sep", "Oct", "Nov", "Dec", NULL }; static DatePtr ReadDateFromString (CharPtr date_str) { Char ch; Int2 i; CharPtr ptr1, ptr2, month = NULL, day = NULL, year = NULL; CharPtr str; Int4 day_val = 0; Uint1 month_num = 0; long val; Int4 year_val = 0; DatePtr dp = NULL; Boolean critical_error = FALSE; if (StringHasNoText (date_str)) return NULL; str = StringSave (date_str); ptr1 = StringChr (str, '-'); if (ptr1 != NULL) { *ptr1 = '\0'; ptr1++; ptr2 = StringChr (ptr1, '-'); if (ptr2 != NULL) { *ptr2 = '\0'; ptr2++; day = str; month = ptr1; year = ptr2; } else { month = str; year = ptr1; } } else { year = str; } if (day != NULL) { if (sscanf (day, "%ld", &val) != 1 || val < 1 || val > 31) { critical_error = TRUE; } day_val = val; } if (month != NULL) { for (i = 0; legalMonths [i] != NULL; i++) { if (StringCmp (month, legalMonths [i]) == 0) { month_num = i + 1; break; } } if (legalMonths [i] == NULL) critical_error = TRUE; } if (year != NULL) { ptr1 = year; ch = *ptr1; while (ch != '\0') { if (! (IS_DIGIT (ch))) critical_error = TRUE; ptr1++; ch = *ptr1; } if (sscanf (year, "%ld", &val) == 1) { if (val < 1700 || val > 2100) critical_error = TRUE; year_val = val - 1900; } else { critical_error = TRUE; } } str = MemFree (str); if (!critical_error) { dp = DateNew(); dp->data[0] = 1; dp->data[1] = (Uint1) year_val; dp->data[2] = month_num; dp->data[3] = (Uint1) day_val; } return dp; } static CharPtr GetAuthorStringEx (AuthorPtr author, Boolean use_initials) { CharPtr str = NULL; NameStdPtr n; Int4 len; Boolean has_middle = FALSE; if (author == NULL || author->name == NULL) return NULL; switch (author->name->choice) { case 1: /* dbtag */ str = GetDbtagString (author->name->data); break; case 2: /* name */ n = (NameStdPtr) author->name->data; if (n != NULL) { if (use_initials) { len = StringLen (n->names[0]) + StringLen (n->names[4]) + 2; str = (CharPtr) MemNew (sizeof (Char) * (len)); sprintf (str, "%s%s", StringHasNoText (n->names[4]) ? "" : n->names[4], StringHasNoText (n->names[0]) ? "" : n->names[0]); } else { len = StringLen (n->names[1]) + StringLen (n->names[0]) + 2; if (StringLen (n->names[4]) > 2) { len += StringLen (n->names[4]) - 1; has_middle = TRUE; } str = (CharPtr) MemNew (sizeof (Char) * (len)); sprintf (str, "%s%s%s%s%s", StringHasNoText (n->names[1]) ? "" : n->names[1], StringHasNoText (n->names[1]) ? "" : " ", has_middle ? n->names[4] + 2 : "", has_middle ? " " : "", StringHasNoText (n->names[0]) ? "" : n->names[0]); } } break; case 3: /* ml */ case 4: /* str */ case 5: /* consortium */ str = StringSave (author->name->data); break; } return str; } static CharPtr GetAuthorString (AuthorPtr author) { return GetAuthorStringEx (author, FALSE); } static CharPtr GetAuthorListStringEx (AuthListPtr alp, StringConstraintPtr scp, Boolean use_initials) { CharPtr str = NULL, tmp; Int4 len = 0; ValNodePtr list = NULL, vnp; if (alp == NULL) return NULL; switch (alp->choice) { case 1: for (vnp = alp->names; vnp != NULL; vnp = vnp->next) { tmp = GetAuthorStringEx (vnp->data.ptrvalue, use_initials); if (tmp != NULL) { if (DoesStringMatchConstraint (tmp, scp)) { ValNodeAddPointer (&list, 0, tmp); len += StringLen (tmp) + 2; } else { tmp = MemFree (tmp); } } } break; case 2: case 3: for (vnp = alp->names; vnp != NULL; vnp = vnp->next) { if (vnp->data.ptrvalue != NULL && DoesStringMatchConstraint (vnp->data.ptrvalue, scp)) { ValNodeAddPointer (&list, 0, StringSave (vnp->data.ptrvalue)); len += StringLen (vnp->data.ptrvalue) + 2; } } break; } if (len > 0) { str = (CharPtr) MemNew (sizeof (Char) * (len + 1)); for (vnp = list; vnp != NULL; vnp = vnp->next) { StringCat (str, vnp->data.ptrvalue); if (vnp->next != NULL) { StringCat (str, ", "); } } } return str; } NLM_EXTERN CharPtr GetAuthorListString (AuthListPtr alp, StringConstraintPtr scp) { return GetAuthorListStringEx (alp, scp, FALSE); } static Boolean RemoveAuthorListString (AuthListPtr alp, StringConstraintPtr scp) { CharPtr tmp; Boolean rval = FALSE; ValNodePtr vnp, vnp_next, vnp_prev = NULL; if (alp == NULL) return FALSE; switch (alp->choice) { case 1: for (vnp = alp->names; vnp != NULL; vnp = vnp_next) { vnp_next = vnp->next; tmp = GetAuthorString (vnp->data.ptrvalue); if (tmp != NULL) { if (DoesStringMatchConstraint (tmp, scp)) { if (vnp_prev == NULL) { alp->names = vnp->next; } else { vnp_prev->next = vnp->next; } vnp->next = NULL; vnp->data.ptrvalue = AuthorFree (vnp->data.ptrvalue); vnp = ValNodeFree (vnp); rval = TRUE; } else { vnp_prev = vnp; } tmp = MemFree (tmp); } else { vnp_prev = vnp; } } break; case 2: case 3: for (vnp = alp->names; vnp != NULL; vnp = vnp_next) { vnp_next = vnp->next; if (vnp->data.ptrvalue != NULL && DoesStringMatchConstraint (vnp->data.ptrvalue, scp)) { if (vnp_prev == NULL) { alp->names = vnp->next; } else { vnp_prev->next = vnp->next; } vnp->next = NULL; vnp = ValNodeFreeData (vnp); rval = TRUE; } else { vnp_prev = vnp; } } break; } return rval; } static NameStdPtr ReadNameFromString (CharPtr str, CharPtr PNTR next_name) { CharPtr cp_end, cp_space; CharPtr p_repl1 = NULL, p_repl2 = NULL, p_repl3 = NULL; Char ch_r1, ch_r2, ch_r3; NameStdPtr n; if (StringHasNoText (str)) { if (next_name != NULL) { *next_name = NULL; } return NULL; } /* skip over any leading spaces */ str += StringSpn (str, " \t"); /* skip over "and" if found */ if (StringNCmp (str, "and ", 4) == 0) { str += 4; } if (StringHasNoText (str)) { str = MemFree (str); return NULL; } cp_end = StringChr (str, ','); if (cp_end != NULL) { p_repl1 = cp_end; ch_r1 = *p_repl1; *cp_end = 0; if (next_name != NULL) { if (StringHasNoText (cp_end + 1)) { *next_name = NULL; } else { *next_name = cp_end + 1; } } } else if (next_name != NULL) { *next_name = NULL; } n = NameStdNew (); /* look for elements in name */ cp_space = StringRChr (str, ' '); if (cp_space == NULL) { n->names[0] = StringSave (str); } else { n->names[0] = StringSave (cp_space + 1); while (isspace (*cp_space)) { cp_space--; } p_repl2 = cp_space + 1; ch_r2 = *p_repl2; *(cp_space + 1) = 0; cp_space = StringChr (str, ' '); if (cp_space == NULL) { n->names[1] = StringSave (str); n->names[4] = (CharPtr) MemNew (sizeof (Char) * 3); sprintf (n->names[4], "%c.", *(n->names[1])); } else { p_repl3 = cp_space; ch_r3 = *p_repl3; *(cp_space) = 0; n->names[1] = StringSave (str); cp_space++; while (isspace (*cp_space)) { cp_space++; } n->names[4] = (CharPtr) MemNew (sizeof (Char) * (4 + StringLen (cp_space))); sprintf (n->names[4], "%c.%s.", *(n->names[1]), cp_space); } } if (p_repl1 != NULL) { *p_repl1 = ch_r1; } if (p_repl2 != NULL) { *p_repl2 = ch_r2; } if (p_repl3 != NULL) { *p_repl3 = ch_r3; } return n; } NLM_EXTERN ValNodePtr ReadNameListFromString (CharPtr value) { ValNodePtr names = NULL; AuthorPtr ap; NameStdPtr n; CharPtr next_cp, cp; cp = value; next_cp = NULL; while (cp != NULL) { n = ReadNameFromString (cp, &next_cp); if (n != NULL) { ap = AuthorNew (); ap->name = PersonIdNew (); ap->name->choice = 2; ap->name->data = n; ValNodeAddPointer (&names, 1, ap); } cp = next_cp; } return names; } static ValNodePtr FreeNameList (Uint1 choice, ValNodePtr name_list) { ValNodePtr curr, next; curr = name_list; while (curr != NULL) { if (choice == 1) /* std type */ AuthorFree((AuthorPtr) curr->data.ptrvalue); else /* ml or str */ MemFree(curr->data.ptrvalue); next = curr->next; MemFree(curr); curr = next; } return curr; } static Boolean SetAuthorListFromString (AuthListPtr alp, StringConstraintPtr scp, CharPtr value, Uint2 existing_text) { ValNodePtr name_list = NULL, vnp, vnp_prev, vnp_next, vnp_tmp; CharPtr tmp; Boolean rval = FALSE, found, ok_to_set = FALSE; if (alp == NULL || StringHasNoText (value)) return FALSE; /* can only combine lists if existing list is same type */ if (alp->names == NULL || alp->choice == 1) { ok_to_set = TRUE; } else { switch (existing_text) { case ExistingTextOption_replace_old: if (IsStringConstraintEmpty (scp)) { ok_to_set = TRUE; } break; case ExistingTextOption_append_space: case ExistingTextOption_append_colon: case ExistingTextOption_append_none: case ExistingTextOption_prefix_space: case ExistingTextOption_prefix_colon: case ExistingTextOption_prefix_none: ok_to_set = TRUE; break; } } if (!ok_to_set) { return FALSE; } if (alp->names == NULL && IsStringConstraintEmpty (scp)) { /* no prior values - just add new list */ name_list = ReadNameListFromString (value); if (name_list != NULL) { ValNodeLink (&alp->names, name_list); alp->choice = 1; rval = TRUE; } } else { switch (existing_text) { case ExistingTextOption_append_semi: case ExistingTextOption_append_comma: name_list = ReadNameListFromString (value); if (IsStringConstraintEmpty (scp)) { /* append to list */ ValNodeLink (&(alp->names), name_list); rval = TRUE; } else { /* insert in list after first match */ vnp = alp->names; found = FALSE; while (vnp != NULL && !found) { tmp = GetAuthorString (vnp->data.ptrvalue); if (tmp != NULL && DoesStringMatchConstraint (tmp, scp)) { found = TRUE; } tmp = MemFree (tmp); if (!found) { vnp = vnp->next; } } if (found) { ValNodeLink (&name_list, vnp->next); vnp->next = name_list; rval = TRUE; } } break; case ExistingTextOption_prefix_semi: case ExistingTextOption_prefix_comma: name_list = ReadNameListFromString (value); if (IsStringConstraintEmpty (scp)) { /* prepend to list */ ValNodeLink (&name_list, alp->names); alp->names = name_list; rval = TRUE; } else { /* insert in list before first match */ vnp = alp->names; vnp_prev = NULL; found = FALSE; while (vnp != NULL && !found) { tmp = GetAuthorString (vnp->data.ptrvalue); if (tmp != NULL && DoesStringMatchConstraint (tmp, scp)) { found = TRUE; } tmp = MemFree (tmp); if (!found) { vnp_prev = vnp; vnp = vnp->next; } } if (found) { if (vnp_prev == NULL) { ValNodeLink (&name_list, alp->names); alp->names = name_list; } else { ValNodeLink (&name_list, vnp_prev->next); vnp_prev->next = name_list; } rval = TRUE; } } break; case ExistingTextOption_replace_old: name_list = ReadNameListFromString (value); if (IsStringConstraintEmpty (scp)) { /* replace entire list */ alp->names = FreeNameList (alp->choice, alp->names); alp->names = name_list; alp->choice = 1; rval = TRUE; } else { /* replace first author that matches with new match, remove others that match */ vnp = alp->names; vnp_prev = NULL; found = FALSE; while (vnp != NULL) { vnp_next = vnp->next; tmp = GetAuthorString (vnp->data.ptrvalue); if (tmp != NULL && DoesStringMatchConstraint (tmp, scp)) { if (found) { if (vnp_prev == NULL) { alp->names = vnp->next; } else { vnp_prev->next = vnp->next; } } else { vnp_tmp = name_list; while (vnp_tmp->next != NULL) { vnp_tmp = vnp_tmp->next; } ValNodeLink (&name_list, vnp->next); if (vnp_prev == NULL) { alp->names = name_list; } else { vnp_prev->next = name_list; } vnp_prev = vnp_tmp; found = TRUE; rval = TRUE; } vnp->next = NULL; vnp = FreeNameList (alp->choice, vnp); } else { vnp_prev = vnp; } tmp = MemFree (tmp); vnp = vnp_next; } } break; case ExistingTextOption_append_space: case ExistingTextOption_append_colon: case ExistingTextOption_append_none: case ExistingTextOption_prefix_space: case ExistingTextOption_prefix_colon: case ExistingTextOption_prefix_none: vnp_prev = NULL; for (vnp = alp->names; vnp != NULL; vnp = vnp_next) { vnp_next = vnp->next; if (alp->choice == 1) { tmp = GetAuthorString (vnp->data.ptrvalue); if (tmp != NULL && DoesStringMatchConstraint (tmp, scp) && SetStringValue (&tmp, value, existing_text)) { name_list = ReadNameListFromString (tmp); if (name_list != NULL) { vnp_tmp = name_list; while (vnp_tmp->next != NULL) { vnp_tmp = vnp_tmp->next; } ValNodeLink (&name_list, vnp_next); if (vnp_prev == NULL) { alp->names = name_list; } else { vnp_prev->next = name_list; } vnp_prev = vnp_tmp; vnp->next = NULL; vnp = FreeNameList (alp->choice, vnp); rval = TRUE; name_list = NULL; } else { vnp_prev = vnp; } } else { vnp_prev = vnp; } tmp = MemFree (tmp); } else { if (vnp->data.ptrvalue != NULL && DoesStringMatchConstraint (vnp->data.ptrvalue, scp)) { tmp = (CharPtr) vnp->data.ptrvalue; rval |= SetStringValue (&tmp, value, existing_text); vnp->data.ptrvalue = tmp; } } } break; } } if (!rval && name_list != NULL) { name_list = FreeNameList (1, vnp); } return rval; } static CharPtr GetPubFieldFromAffil (AffilPtr ap, Int4 field, StringConstraintPtr scp) { CharPtr str = NULL; if (ap == NULL) return NULL; switch (field) { case Publication_field_affiliation: if (!StringHasNoText (ap->affil) && DoesStringMatchConstraint (ap->affil, scp)) { str = StringSave (ap->affil); } break; case Publication_field_affil_div: if (!StringHasNoText (ap->div) && DoesStringMatchConstraint (ap->div, scp)) { str = StringSave (ap->div); } break; case Publication_field_affil_city: if (!StringHasNoText (ap->city) && DoesStringMatchConstraint (ap->city, scp)) { str = StringSave (ap->city); } break; case Publication_field_affil_sub: if (!StringHasNoText (ap->sub) && DoesStringMatchConstraint (ap->sub, scp)) { str = StringSave (ap->sub); } break; case Publication_field_affil_country: if (!StringHasNoText (ap->country) && DoesStringMatchConstraint (ap->country, scp)) { str = StringSave (ap->country); } break; case Publication_field_affil_street: if (!StringHasNoText (ap->street) && DoesStringMatchConstraint (ap->street, scp)) { str = StringSave (ap->street); } break; case Publication_field_affil_email: if (!StringHasNoText (ap->email) && DoesStringMatchConstraint (ap->email, scp)) { str = StringSave (ap->email); } break; case Publication_field_affil_fax: if (!StringHasNoText (ap->fax) && DoesStringMatchConstraint (ap->fax, scp)) { str = StringSave (ap->fax); } break; case Publication_field_affil_phone: if (!StringHasNoText (ap->phone) && DoesStringMatchConstraint (ap->phone, scp)) { str = StringSave (ap->phone); } break; case Publication_field_affil_zipcode: if (!StringHasNoText (ap->postal_code) && DoesStringMatchConstraint (ap->postal_code, scp)) { str = StringSave (ap->postal_code); } break; } return str; } static Boolean RemovePubFieldFromAffil (AffilPtr ap, Int4 field, StringConstraintPtr scp) { Boolean rval = FALSE; if (ap == NULL) return FALSE; switch (field) { case Publication_field_affiliation: if (!StringHasNoText (ap->affil) && DoesStringMatchConstraint (ap->affil, scp)) { ap->affil = MemFree (ap->affil); rval = TRUE; } break; case Publication_field_affil_div: if (!StringHasNoText (ap->div) && DoesStringMatchConstraint (ap->div, scp)) { ap->div = MemFree (ap->div); rval = TRUE; } break; case Publication_field_affil_city: if (!StringHasNoText (ap->city) && DoesStringMatchConstraint (ap->city, scp)) { ap->city = MemFree (ap->city); rval = TRUE; } break; case Publication_field_affil_sub: if (!StringHasNoText (ap->sub) && DoesStringMatchConstraint (ap->sub, scp)) { ap->sub = MemFree (ap->sub); rval = TRUE; } break; case Publication_field_affil_country: if (!StringHasNoText (ap->country) && DoesStringMatchConstraint (ap->country, scp)) { ap->country = MemFree (ap->country); rval = TRUE; } break; case Publication_field_affil_street: if (!StringHasNoText (ap->street) && DoesStringMatchConstraint (ap->street, scp)) { ap->street = MemFree (ap->street); rval = TRUE; } break; case Publication_field_affil_email: if (!StringHasNoText (ap->email) && DoesStringMatchConstraint (ap->email, scp)) { ap->email = MemFree (ap->email); rval = TRUE; } break; case Publication_field_affil_fax: if (!StringHasNoText (ap->fax) && DoesStringMatchConstraint (ap->fax, scp)) { ap->fax = MemFree (ap->fax); rval = TRUE; } break; case Publication_field_affil_phone: if (!StringHasNoText (ap->phone) && DoesStringMatchConstraint (ap->phone, scp)) { ap->phone = MemFree (ap->phone); rval = TRUE; } break; case Publication_field_affil_zipcode: if (!StringHasNoText (ap->postal_code) && DoesStringMatchConstraint (ap->postal_code, scp)) { ap->postal_code = MemFree (ap->postal_code); rval = TRUE; } break; } return rval; } static Boolean SetAffilPubField (AffilPtr ap, Int4 field, StringConstraintPtr scp, CharPtr value, Uint2 existing_text) { Boolean rval = FALSE; if (ap == NULL) return FALSE; switch (field) { case Publication_field_affiliation: if (!StringHasNoText (ap->affil) || DoesStringMatchConstraint (ap->affil, scp)) { rval = SetStringValue (&(ap->affil), value, existing_text); } break; case Publication_field_affil_div: if (!StringHasNoText (ap->div) || DoesStringMatchConstraint (ap->div, scp)) { rval = SetStringValue (&(ap->div), value, existing_text); } break; case Publication_field_affil_city: if (!StringHasNoText (ap->city) || DoesStringMatchConstraint (ap->city, scp)) { rval = SetStringValue (&(ap->city), value, existing_text); } break; case Publication_field_affil_sub: if (!StringHasNoText (ap->sub) || DoesStringMatchConstraint (ap->sub, scp)) { rval = SetStringValue (&(ap->sub), value, existing_text); } break; case Publication_field_affil_country: if (!StringHasNoText (ap->country) || DoesStringMatchConstraint (ap->country, scp)) { rval = SetStringValue (&(ap->country), value, existing_text); } break; case Publication_field_affil_street: if (!StringHasNoText (ap->street) || DoesStringMatchConstraint (ap->street, scp)) { rval = SetStringValue (&(ap->street), value, existing_text); } break; case Publication_field_affil_email: if (!StringHasNoText (ap->email) || DoesStringMatchConstraint (ap->email, scp)) { rval = SetStringValue (&(ap->email), value, existing_text); } break; case Publication_field_affil_fax: if (!StringHasNoText (ap->fax) || DoesStringMatchConstraint (ap->fax, scp)) { rval = SetStringValue (&(ap->fax), value, existing_text); } break; case Publication_field_affil_phone: if (!StringHasNoText (ap->phone) || DoesStringMatchConstraint (ap->phone, scp)) { rval = SetStringValue (&(ap->phone), value, existing_text); } break; case Publication_field_affil_zipcode: if (!StringHasNoText (ap->postal_code) || DoesStringMatchConstraint (ap->postal_code, scp)) { rval = SetStringValue (&(ap->postal_code), value, existing_text); } break; } return rval; } static CharPtr GetPubFieldFromImprint (ImprintPtr imprint, Int4 field, StringConstraintPtr scp) { CharPtr str = NULL; if (imprint == NULL) return NULL; switch (field) { case Publication_field_volume: if (!StringHasNoText (imprint->volume) && DoesStringMatchConstraint (imprint->volume, scp)) { str = StringSave (imprint->volume); } break; case Publication_field_issue: if (!StringHasNoText (imprint->issue) && DoesStringMatchConstraint (imprint->issue, scp)) { str = StringSave (imprint->issue); } break; case Publication_field_pages: if (!StringHasNoText (imprint->pages) && DoesStringMatchConstraint (imprint->pages, scp)) { str = StringSave (imprint->pages); } break; case Publication_field_date: if (imprint->date != NULL) { str = PrintPartialOrCompleteDate (imprint->date); if (StringHasNoText (str) || !DoesStringMatchConstraint (str, scp)) { str = MemFree (str); } } break; } return str; } static Boolean RemovePubDate (DatePtr PNTR pDate, StringConstraintPtr scp) { CharPtr str; Boolean rval = FALSE; if (pDate == NULL || *pDate == NULL) { return FALSE; } str = PrintPartialOrCompleteDate (*pDate); if (!StringHasNoText (str) && DoesStringMatchConstraint (str, scp)) { *pDate = DateFree (*pDate); rval = TRUE; } str = MemFree (str); return rval; } static Boolean SetPubDate (DatePtr PNTR pDate, StringConstraintPtr scp, CharPtr value, Uint2 existing_text) { CharPtr tmp; DatePtr dp = NULL; Boolean made_new_date = FALSE; Boolean rval = FALSE; if (pDate == NULL) { return FALSE; } if (*pDate == NULL) { *pDate = DateNew(); made_new_date = TRUE; } tmp = PrintPartialOrCompleteDate (*pDate); if (DoesStringMatchConstraint (tmp, scp) && SetStringValue (&tmp, value, existing_text)) { dp = ReadDateFromString (tmp); if (dp != NULL) { *pDate = DateFree (*pDate); *pDate = dp; rval = TRUE; } } tmp = MemFree (tmp); if (!rval && made_new_date) { *pDate = DateFree (*pDate); } return rval; } static Boolean RemovePubFieldFromImprint (ImprintPtr imprint, Int4 field, StringConstraintPtr scp) { Boolean rval = FALSE; if (imprint == NULL) return FALSE; switch (field) { case Publication_field_volume: if (!StringHasNoText (imprint->volume) && DoesStringMatchConstraint (imprint->volume, scp)) { imprint->volume = MemFree (imprint->volume); rval = TRUE; } break; case Publication_field_issue: if (!StringHasNoText (imprint->issue) && DoesStringMatchConstraint (imprint->issue, scp)) { imprint->issue = MemFree (imprint->issue); rval = TRUE; } break; case Publication_field_pages: if (!StringHasNoText (imprint->pages) && DoesStringMatchConstraint (imprint->pages, scp)) { imprint->pages = MemFree (imprint->pages); rval = TRUE; } break; case Publication_field_date: rval = RemovePubDate (&(imprint->date), scp); break; } return rval; } static Boolean SetPubFieldOnImprint (ImprintPtr imprint, Int4 field, StringConstraintPtr scp, CharPtr value, Uint2 existing_text) { Boolean rval = FALSE; if (imprint == NULL) return FALSE; switch (field) { case Publication_field_volume: if (DoesStringMatchConstraint (imprint->volume, scp)) { rval = SetStringValue (&(imprint->volume), value, existing_text); } break; case Publication_field_issue: if (StringHasNoText (imprint->issue) || DoesStringMatchConstraint (imprint->issue, scp)) { rval = SetStringValue (&(imprint->issue), value, existing_text); } break; case Publication_field_pages: if (StringHasNoText (imprint->pages) || DoesStringMatchConstraint (imprint->pages, scp)) { rval = SetStringValue (&(imprint->pages), value, existing_text); } break; case Publication_field_date: rval = SetPubDate (&(imprint->date), scp, value, existing_text); break; } return rval; } static void SetValNodeChoices (ValNodePtr list, Uint1 new_choice) { while (list != NULL) { list->choice = new_choice; list = list->next; } } static CharPtr GetPubFieldFromCitJour (CitJourPtr cjp, Int4 field, StringConstraintPtr scp) { CharPtr str = NULL; if (cjp == NULL) return NULL; switch (field) { case Publication_field_journal: str = GetFirstValNodeStringMatch (cjp->title, scp); break; case Publication_field_volume: case Publication_field_issue: case Publication_field_pages: case Publication_field_date: str = GetPubFieldFromImprint (cjp->imp, field, scp); break; } return str; } static Boolean RemovePubFieldFromCitJour (CitJourPtr cjp, Int4 field, StringConstraintPtr scp) { Boolean rval = FALSE; if (cjp == NULL) return FALSE; switch (field) { case Publication_field_journal: rval = RemoveValNodeStringMatch (&(cjp->title), scp); break; case Publication_field_volume: case Publication_field_issue: case Publication_field_pages: case Publication_field_date: rval = RemovePubFieldFromImprint (cjp->imp, field, scp); break; } return rval; } static Boolean SetPubFieldOnCitJour (CitJourPtr cjp, Int4 field, StringConstraintPtr scp, CharPtr value, Uint2 existing_text) { Boolean rval = FALSE; if (cjp == NULL) return FALSE; switch (field) { case Publication_field_journal: rval = SetStringsInValNodeStringList (&(cjp->title), scp, value, existing_text); SetValNodeChoices (cjp->title, 1); break; case Publication_field_volume: case Publication_field_issue: case Publication_field_pages: case Publication_field_date: rval = SetPubFieldOnImprint (cjp->imp, field, scp, value, existing_text); break; } return rval; } static CharPtr GetPubFieldFromCitBook (CitBookPtr cbp, Int4 field, StringConstraintPtr scp) { CharPtr str = NULL; if (cbp == NULL) return NULL; switch (field) { case Publication_field_title: str = GetFirstValNodeStringMatch (cbp->title, scp); break; case Publication_field_authors: str = GetAuthorListString (cbp->authors, scp); break; case Publication_field_authors_initials: str = GetAuthorListStringEx (cbp->authors, scp, TRUE); break; case Publication_field_affiliation: case Publication_field_affil_div: case Publication_field_affil_city: case Publication_field_affil_sub: case Publication_field_affil_country: case Publication_field_affil_street: case Publication_field_affil_email: case Publication_field_affil_fax: case Publication_field_affil_phone: case Publication_field_affil_zipcode: if (cbp->authors != NULL) { str = GetPubFieldFromAffil (cbp->authors->affil, field, scp); } break; case Publication_field_volume: case Publication_field_issue: case Publication_field_pages: case Publication_field_date: str = GetPubFieldFromImprint (cbp->imp, field, scp); break; } return str; } static Boolean RemovePubFieldFromCitBook (CitBookPtr cbp, Int4 field, StringConstraintPtr scp) { Boolean rval = FALSE; if (cbp == NULL) return FALSE; switch (field) { case Publication_field_title: rval = RemoveValNodeStringMatch (&(cbp->title), scp); break; case Publication_field_authors: rval = RemoveAuthorListString (cbp->authors, scp); break; case Publication_field_affiliation: case Publication_field_affil_div: case Publication_field_affil_city: case Publication_field_affil_sub: case Publication_field_affil_country: case Publication_field_affil_street: case Publication_field_affil_email: case Publication_field_affil_fax: case Publication_field_affil_phone: case Publication_field_affil_zipcode: if (cbp->authors != NULL) { rval = RemovePubFieldFromAffil(cbp->authors->affil, field, scp); } break; case Publication_field_volume: case Publication_field_issue: case Publication_field_pages: case Publication_field_date: rval = RemovePubFieldFromImprint (cbp->imp, field, scp); break; } return rval; } static Boolean SetPubFieldOnCitBook (CitBookPtr cbp, Int4 field, StringConstraintPtr scp, CharPtr value, Uint2 existing_text) { Boolean rval = FALSE; if (cbp == NULL) return FALSE; switch (field) { case Publication_field_title: rval = SetStringsInValNodeStringList (&(cbp->title), scp, value, existing_text); SetValNodeChoices (cbp->title, 1); break; case Publication_field_authors: if (cbp->authors == NULL) { cbp->authors = AuthListNew(); } rval = SetAuthorListFromString (cbp->authors, scp, value, existing_text); break; case Publication_field_affiliation: case Publication_field_affil_div: case Publication_field_affil_city: case Publication_field_affil_sub: case Publication_field_affil_country: case Publication_field_affil_street: case Publication_field_affil_email: case Publication_field_affil_fax: case Publication_field_affil_phone: case Publication_field_affil_zipcode: if (cbp->authors == NULL) { cbp->authors = AuthListNew(); } if (cbp->authors->affil == NULL) { cbp->authors->affil = AffilNew(); } rval = SetAffilPubField (cbp->authors->affil, field, scp, value, existing_text); break; case Publication_field_volume: case Publication_field_issue: case Publication_field_pages: case Publication_field_date: if (cbp->imp == NULL) { cbp->imp = ImprintNew(); } rval = SetPubFieldOnImprint (cbp->imp, field, scp, value, existing_text); break; } return rval; } NLM_EXTERN CharPtr GetPubFieldFromPub (PubPtr the_pub, Int4 field, StringConstraintPtr scp) { CitGenPtr cgp; CitArtPtr cap; CitBookPtr cbp; CitPatPtr cpp; CitSubPtr csp; CitJourPtr cjp; CharPtr str = NULL; if (the_pub == NULL || the_pub->data.ptrvalue == NULL) return NULL; if (field == Publication_field_pub_class) { return GetPubclassFromPub(the_pub); } switch (the_pub->choice) { case PUB_Gen : cgp = (CitGenPtr) the_pub->data.ptrvalue; switch (field) { case Publication_field_cit: if (!StringHasNoText (cgp->cit) && DoesStringMatchConstraint (cgp->title, scp)) { str = StringSave (cgp->cit); } break; case Publication_field_authors: str = GetAuthorListString (cgp->authors, scp); break; case Publication_field_authors_initials: str = GetAuthorListStringEx (cgp->authors, scp, TRUE); break; case Publication_field_affiliation: case Publication_field_affil_div: case Publication_field_affil_city: case Publication_field_affil_sub: case Publication_field_affil_country: case Publication_field_affil_street: case Publication_field_affil_email: case Publication_field_affil_fax: case Publication_field_affil_phone: case Publication_field_affil_zipcode: if (cgp->authors != NULL && cgp->authors->affil != NULL) { str = GetPubFieldFromAffil (cgp->authors->affil, field, scp); } break; case Publication_field_journal: str = GetFirstValNodeStringMatch (cgp->journal, scp); break; case Publication_field_volume: if (!StringHasNoText (cgp->volume) && DoesStringMatchConstraint (cgp->volume, scp)) { str = StringSave (cgp->volume); } break; case Publication_field_issue: if (!StringHasNoText (cgp->issue) && DoesStringMatchConstraint (cgp->issue, scp)) { str = StringSave (cgp->issue); } break; case Publication_field_pages: if (!StringHasNoText (cgp->pages) && DoesStringMatchConstraint (cgp->pages, scp)) { str = StringSave (cgp->pages); } break; case Publication_field_date: if (cgp->date != NULL) { str = PrintPartialOrCompleteDate (cgp->date); if (StringHasNoText (str) || !DoesStringMatchConstraint (str, scp)) { str = MemFree (str); } } break; case Publication_field_serial_number: str = GetInt2ValueFromString (cgp->serial_number, scp); break; case Publication_field_title: if (!StringHasNoText (cgp->title) && DoesStringMatchConstraint (cgp->title, scp)) { str = StringSave (cgp->title); } break; } break; case PUB_Sub : csp = (CitSubPtr) the_pub->data.ptrvalue; switch (field) { case Publication_field_title: if (!StringHasNoText (csp->descr) && DoesStringMatchConstraint (csp->descr, scp)) { str = StringSave (csp->descr); } break; case Publication_field_authors: str = GetAuthorListString (csp->authors, scp); break; case Publication_field_authors_initials: str = GetAuthorListStringEx (csp->authors, scp, TRUE); break; case Publication_field_affiliation: case Publication_field_affil_div: case Publication_field_affil_city: case Publication_field_affil_sub: case Publication_field_affil_country: case Publication_field_affil_street: case Publication_field_affil_email: case Publication_field_affil_fax: case Publication_field_affil_phone: case Publication_field_affil_zipcode: if (csp->authors != NULL) { str = GetPubFieldFromAffil (csp->authors->affil, field, scp); } break; case Publication_field_date: str = PrintPartialOrCompleteDate (csp->date); if (StringHasNoText (str) || !DoesStringMatchConstraint (str, scp)) { str = MemFree (str); } break; } break; case PUB_Article : cap = (CitArtPtr) the_pub->data.ptrvalue; switch (field) { case Publication_field_title: str = GetFirstValNodeStringMatch (cap->title, scp); break; case Publication_field_authors: str = GetAuthorListString (cap->authors, scp); break; case Publication_field_authors_initials: str = GetAuthorListStringEx (cap->authors, scp, TRUE); break; case Publication_field_affiliation: case Publication_field_affil_div: case Publication_field_affil_city: case Publication_field_affil_sub: case Publication_field_affil_country: case Publication_field_affil_street: case Publication_field_affil_email: case Publication_field_affil_fax: case Publication_field_affil_phone: case Publication_field_affil_zipcode: if (cap->authors != NULL) { str = GetPubFieldFromAffil (cap->authors->affil, field, scp); } break; default: if (cap->from == 1) { str = GetPubFieldFromCitJour (cap->fromptr, field, scp); } else if (cap->from == 2) { str = GetPubFieldFromCitBook (cap->fromptr, field, scp); } break; } break; case PUB_Journal: cjp = (CitJourPtr) the_pub->data.ptrvalue; str = GetPubFieldFromCitJour (cjp, field, scp); break; case PUB_Book : case PUB_Man : cbp = (CitBookPtr) the_pub->data.ptrvalue; str = GetPubFieldFromCitBook (cbp, field, scp); break; case PUB_Patent : cpp = (CitPatPtr) the_pub->data.ptrvalue; switch (field) { case Publication_field_title: if (!StringHasNoText (cpp->title) && DoesStringMatchConstraint (cpp->title, scp)) { str = StringSave (cpp->title); } break; case Publication_field_authors: str = GetAuthorListString (cpp->authors, scp); break; case Publication_field_authors_initials: str = GetAuthorListStringEx (cpp->authors, scp, TRUE); break; case Publication_field_affiliation: case Publication_field_affil_div: case Publication_field_affil_city: case Publication_field_affil_sub: case Publication_field_affil_country: case Publication_field_affil_street: case Publication_field_affil_email: case Publication_field_affil_fax: case Publication_field_affil_phone: case Publication_field_affil_zipcode: if (cpp->authors != NULL) { str = GetPubFieldFromAffil (cpp->authors->affil, field, scp); } break; } break; case PUB_PMid: if (field == Publication_field_pmid) { str = (CharPtr) MemNew (sizeof (Char) * 15); sprintf (str, "%d", the_pub->data.intvalue); } break; default : break; } return str; } static Boolean RemovePMIDOnCitArt (CitArtPtr cap, StringConstraintPtr scp) { Boolean rval = FALSE; ValNodePtr vnp, vnp_prev = NULL, vnp_next; if (cap == NULL) { return FALSE; } for (vnp = cap->ids; vnp != NULL; vnp = vnp_next) { vnp_next = vnp->next; if (vnp->choice == ARTICLEID_PUBMED && DoesNumberMatchStringConstraint (vnp->data.intvalue, scp)) { if (vnp_prev == NULL) { cap->ids->next = vnp_next; } else { vnp_prev->next = vnp_next; } vnp->next = NULL; vnp = ArticleIdFree (vnp); rval = TRUE; } else { vnp_prev = vnp; } } return rval; } static Boolean RemovePubFieldFromPub (PubPtr the_pub, Int4 field, StringConstraintPtr scp) { CitGenPtr cgp; CitArtPtr cap; CitBookPtr cbp; CitPatPtr cpp; CitSubPtr csp; Boolean rval = FALSE; Char num[15]; if (the_pub == NULL) return FALSE; if (field == Publication_field_pub_class) { return SetPubclassOnPub(the_pub, "unpublished"); } switch (the_pub->choice) { case PUB_Gen : cgp = (CitGenPtr) the_pub->data.ptrvalue; switch (field) { case Publication_field_cit: if (!StringHasNoText (cgp->cit) && DoesStringMatchConstraint (cgp->title, scp)) { cgp->cit = MemFree (cgp->cit); rval = TRUE; } break; case Publication_field_authors: rval = RemoveAuthorListString (cgp->authors, scp); break; case Publication_field_affiliation: case Publication_field_affil_div: case Publication_field_affil_city: case Publication_field_affil_sub: case Publication_field_affil_country: case Publication_field_affil_street: case Publication_field_affil_email: case Publication_field_affil_fax: case Publication_field_affil_phone: case Publication_field_affil_zipcode: if (cgp->authors != NULL) { rval = RemovePubFieldFromAffil(cgp->authors->affil, field, scp); } break; case Publication_field_journal: rval = RemoveValNodeStringMatch (&(cgp->journal), scp); break; case Publication_field_volume: if (!StringHasNoText (cgp->volume) && DoesStringMatchConstraint (cgp->volume, scp)) { cgp->volume = MemFree (cgp->volume); rval = TRUE; } break; case Publication_field_issue: if (!StringHasNoText (cgp->issue) && DoesStringMatchConstraint (cgp->issue, scp)) { cgp->issue = MemFree (cgp->issue); rval = TRUE; } break; case Publication_field_pages: if (!StringHasNoText (cgp->pages) && DoesStringMatchConstraint (cgp->pages, scp)) { cgp->pages = MemFree (cgp->pages); rval = TRUE; } break; case Publication_field_date: rval = RemovePubDate (&(cgp->date), scp); break; case Publication_field_serial_number: if (cgp->serial_number > 0) { sprintf (num, "%d", cgp->serial_number); if (DoesStringMatchConstraint (num, scp)) { cgp->serial_number = 0; rval = TRUE; } } break; case Publication_field_title: if (!StringHasNoText (cgp->title) && DoesStringMatchConstraint (cgp->title, scp)) { cgp->title = MemFree (cgp->title); rval = TRUE; } break; } break; case PUB_Sub : csp = (CitSubPtr) the_pub->data.ptrvalue; switch (field) { case Publication_field_title: if (!StringHasNoText (csp->descr) && DoesStringMatchConstraint (csp->descr, scp)) { csp->descr = MemFree (csp->descr); rval = TRUE; } break; case Publication_field_authors: rval = RemoveAuthorListString (csp->authors, scp); break; case Publication_field_affiliation: case Publication_field_affil_div: case Publication_field_affil_city: case Publication_field_affil_sub: case Publication_field_affil_country: case Publication_field_affil_street: case Publication_field_affil_email: case Publication_field_affil_fax: case Publication_field_affil_phone: case Publication_field_affil_zipcode: if (csp->authors != NULL) { rval = RemovePubFieldFromAffil(csp->authors->affil, field, scp); } break; case Publication_field_date: rval = RemovePubDate (&(csp->date), scp); break; } break; case PUB_Article : cap = (CitArtPtr) the_pub->data.ptrvalue; switch (field) { case Publication_field_pmid: rval = RemovePMIDOnCitArt (cap, scp); break; case Publication_field_title: rval = RemoveValNodeStringMatch (&(cap->title), scp); break; case Publication_field_authors: rval = RemoveAuthorListString (cap->authors, scp); break; case Publication_field_affiliation: case Publication_field_affil_div: case Publication_field_affil_city: case Publication_field_affil_sub: case Publication_field_affil_country: case Publication_field_affil_street: case Publication_field_affil_email: case Publication_field_affil_fax: case Publication_field_affil_phone: case Publication_field_affil_zipcode: if (cap->authors != NULL) { rval = RemovePubFieldFromAffil(cap->authors->affil, field, scp); } break; default: if (cap->from == 1) { rval = RemovePubFieldFromCitJour (cap->fromptr, field, scp); } else if (cap->from == 2) { rval = RemovePubFieldFromCitBook (cap->fromptr, field, scp); } break; } break; case PUB_Journal: rval = RemovePubFieldFromCitJour (the_pub->data.ptrvalue, field, scp); break; case PUB_Book : case PUB_Man : cbp = (CitBookPtr) the_pub->data.ptrvalue; rval = RemovePubFieldFromCitBook (cbp, field, scp); break; case PUB_Patent : cpp = (CitPatPtr) the_pub->data.ptrvalue; switch (field) { case Publication_field_title: if (!StringHasNoText (cpp->title) && DoesStringMatchConstraint (cpp->title, scp)) { cpp->title = MemFree (cpp->title); rval = TRUE; } break; case Publication_field_authors: rval = RemoveAuthorListString (cpp->authors, scp); break; case Publication_field_affiliation: case Publication_field_affil_div: case Publication_field_affil_city: case Publication_field_affil_sub: case Publication_field_affil_country: case Publication_field_affil_street: case Publication_field_affil_email: case Publication_field_affil_fax: case Publication_field_affil_phone: case Publication_field_affil_zipcode: if (cpp->authors != NULL) { rval = RemovePubFieldFromAffil(cpp->authors->affil, field, scp); } break; } break; case PUB_PMid: if (field == Publication_field_pmid) { the_pub->data.intvalue = 0; } break; default : break; } return rval; } static Boolean SetPMIDOnCitArt (CitArtPtr cap, StringConstraintPtr scp, CharPtr value, Uint2 existing_text) { Boolean found = FALSE, rval = FALSE; ValNodePtr vnp; if (cap == NULL || !StringIsAllDigits(value)) { return FALSE; } for (vnp = cap->ids; vnp != NULL; vnp = vnp->next) { if (vnp->choice == ARTICLEID_PUBMED) { found = TRUE; if (existing_text == ExistingTextOption_replace_old && DoesNumberMatchStringConstraint(vnp->data.intvalue, scp)) { vnp->data.intvalue = atoi (value); rval = TRUE; } } } if (!found && IsStringConstraintEmpty (scp)) { ValNodeAddInt (&(cap->ids), ARTICLEID_PUBMED, atoi (value)); rval = TRUE; } return rval; } static Boolean SetPubFieldOnPub (PubPtr the_pub, Int4 field, StringConstraintPtr scp, CharPtr value, Uint2 existing_text) { CitGenPtr cgp; CitArtPtr cap; CitBookPtr cbp; CitPatPtr cpp; CitSubPtr csp; Boolean rval = FALSE; if (the_pub == NULL || value == NULL) return FALSE; if (field == Publication_field_pub_class) { return SetPubclassOnPub(the_pub, value); } switch (the_pub->choice) { case PUB_Gen : cgp = (CitGenPtr) the_pub->data.ptrvalue; switch (field) { case Publication_field_cit: if (DoesStringMatchConstraint (cgp->cit, scp)) { rval = SetStringValue ( &(cgp->cit), value, existing_text); } break; case Publication_field_authors: if (cgp->authors == NULL) { cgp->authors = AuthListNew(); } rval = SetAuthorListFromString (cgp->authors, scp, value, existing_text); break; case Publication_field_affiliation: case Publication_field_affil_div: case Publication_field_affil_city: case Publication_field_affil_sub: case Publication_field_affil_country: case Publication_field_affil_street: case Publication_field_affil_email: case Publication_field_affil_fax: case Publication_field_affil_phone: case Publication_field_affil_zipcode: if (cgp->authors == NULL) { cgp->authors = AuthListNew(); } if (cgp->authors->affil == NULL) { cgp->authors->affil = AffilNew(); } rval = SetAffilPubField (cgp->authors->affil, field, scp, value, existing_text); break; case Publication_field_journal: rval = SetStringsInValNodeStringList (&(cgp->journal), scp, value, existing_text); SetValNodeChoices (cgp->journal, 1); break; case Publication_field_volume: if (DoesStringMatchConstraint (cgp->volume, scp)) { rval = SetStringValue ( &(cgp->volume), value, existing_text); } break; case Publication_field_issue: if (DoesStringMatchConstraint (cgp->issue, scp)) { rval = SetStringValue ( &(cgp->issue), value, existing_text); } break; case Publication_field_pages: if (DoesStringMatchConstraint (cgp->pages, scp)) { rval = SetStringValue ( &(cgp->pages), value, existing_text); } break; case Publication_field_date: rval = SetPubDate (&(cgp->date), scp, value, existing_text); break; case Publication_field_serial_number: rval = SetInt2ValueWithString (&(cgp->serial_number), value, existing_text); break; case Publication_field_title: if (DoesStringMatchConstraint(cgp->title, scp)) { rval = SetStringValue ( &(cgp->title), value, existing_text); } break; } break; case PUB_Sub : csp = (CitSubPtr) the_pub->data.ptrvalue; switch (field) { case Publication_field_title: if (DoesStringMatchConstraint (csp->descr, scp)) { rval = SetStringValue (&(csp->descr), value, existing_text); } break; case Publication_field_authors: if (csp->authors == NULL) { csp->authors = AuthListNew(); } rval = SetAuthorListFromString (csp->authors, scp, value, existing_text); break; case Publication_field_affiliation: case Publication_field_affil_div: case Publication_field_affil_city: case Publication_field_affil_sub: case Publication_field_affil_country: case Publication_field_affil_street: case Publication_field_affil_email: case Publication_field_affil_fax: case Publication_field_affil_phone: case Publication_field_affil_zipcode: if (csp->authors == NULL) { csp->authors = AuthListNew(); } if (csp->authors->affil == NULL) { csp->authors->affil = AffilNew(); } rval = SetAffilPubField (csp->authors->affil, field, scp, value, existing_text); break; case Publication_field_date: rval = SetPubDate (&(csp->date), scp, value, existing_text); break; } break; case PUB_Article : cap = (CitArtPtr) the_pub->data.ptrvalue; switch (field) { case Publication_field_pmid: rval = SetPMIDOnCitArt (cap, scp, value, existing_text); break; case Publication_field_title: rval = SetStringsInValNodeStringList (&(cap->title), scp, value, existing_text); SetValNodeChoices (cap->title, 1); break; case Publication_field_authors: if (cap->authors == NULL) { cap->authors = AuthListNew(); } rval = SetAuthorListFromString (cap->authors, scp, value, existing_text); break; case Publication_field_affiliation: case Publication_field_affil_div: case Publication_field_affil_city: case Publication_field_affil_sub: case Publication_field_affil_country: case Publication_field_affil_street: case Publication_field_affil_email: case Publication_field_affil_fax: case Publication_field_affil_phone: case Publication_field_affil_zipcode: if (cap->authors == NULL) { cap->authors = AuthListNew(); } if (cap->authors->affil == NULL) { cap->authors->affil = AffilNew(); } rval = SetAffilPubField (cap->authors->affil, field, scp, value, existing_text); break; default: if (cap->from == 1) { rval = SetPubFieldOnCitJour (cap->fromptr, field, scp, value, existing_text); } else if (cap->from == 2) { rval = SetPubFieldOnCitBook (cap->fromptr, field, scp, value, existing_text); } break; } break; case PUB_Journal: rval = SetPubFieldOnCitJour (the_pub->data.ptrvalue, field, scp, value, existing_text); break; case PUB_Book : case PUB_Man : cbp = (CitBookPtr) the_pub->data.ptrvalue; rval = SetPubFieldOnCitBook (cbp, field, scp, value, existing_text); break; case PUB_Patent : cpp = (CitPatPtr) the_pub->data.ptrvalue; switch (field) { case Publication_field_title: if (DoesStringMatchConstraint(cpp->title, scp)) { rval = SetStringValue ( &(cpp->title), value, existing_text); } break; case Publication_field_authors: if (cpp->authors == NULL) { cpp->authors = AuthListNew(); } rval = SetAuthorListFromString (cpp->authors, scp, value, existing_text); break; case Publication_field_affiliation: case Publication_field_affil_div: case Publication_field_affil_city: case Publication_field_affil_sub: case Publication_field_affil_country: case Publication_field_affil_street: case Publication_field_affil_email: case Publication_field_affil_fax: case Publication_field_affil_phone: case Publication_field_affil_zipcode: if (cpp->authors == NULL) { cpp->authors = AuthListNew(); } if (cpp->authors->affil == NULL) { cpp->authors->affil = AffilNew(); } rval = SetAffilPubField (cpp->authors->affil, field, scp, value, existing_text); break; } break; case PUB_PMid: if (field == Publication_field_pmid && StringIsAllDigits (value) && DoesNumberMatchStringConstraint(the_pub->data.intvalue, scp) && existing_text == ExistingTextOption_replace_old) { the_pub->data.intvalue = atoi (value); rval = TRUE; } break; default : break; } return rval; } static CharPtr GetPubFieldFromObject (Uint1 choice, Pointer data, Int4 field, StringConstraintPtr scp) { CharPtr rval = NULL; PubdescPtr pdp = NULL; PubPtr pub; SeqFeatPtr sfp; SeqDescrPtr sdp; if (data == NULL) return NULL; if (choice == OBJ_SEQFEAT) { sfp = (SeqFeatPtr) data; if (sfp->data.choice == SEQFEAT_PUB) { pdp = sfp->data.value.ptrvalue; } } else if (choice == OBJ_SEQDESC) { sdp = (SeqDescrPtr) data; if (sdp->choice == Seq_descr_pub) { pdp = sdp->data.ptrvalue; } } if (pdp == NULL) return NULL; for (pub = pdp->pub; pub != NULL && rval == NULL; pub = pub->next) { rval = GetPubFieldFromPub (pub, field, scp); } return rval; } static Boolean RemovePubFieldFromObject (Uint1 choice, Pointer data, Int4 field, StringConstraintPtr scp) { Boolean rval = FALSE; PubdescPtr pdp = NULL; PubPtr pub, pub_prev = NULL, pub_next; SeqFeatPtr sfp; SeqDescrPtr sdp; if (data == NULL) return FALSE; if (choice == OBJ_SEQFEAT) { sfp = (SeqFeatPtr) data; if (sfp->data.choice == SEQFEAT_PUB) { pdp = sfp->data.value.ptrvalue; } } else if (choice == OBJ_SEQDESC) { sdp = (SeqDescrPtr) data; if (sdp->choice == Seq_descr_pub) { pdp = sdp->data.ptrvalue; } } if (pdp == NULL) return FALSE; pub = pdp->pub; while (pub != NULL) { pub_next = pub->next; rval |= RemovePubFieldFromPub (pub, field, scp); if (field == Publication_field_pmid && pub->choice == PUB_PMid && pub->data.intvalue == 0) { if (pub_prev == NULL) { pdp->pub = pub_next; } else { pub_prev->next = pub_next; } pub->next = NULL; pub = PubFree (pub); } else { pub_prev = pub; } pub = pub->next; } return rval; } static Boolean SetPubFieldOnObject (Uint1 choice, Pointer data, Int4 field, StringConstraintPtr scp, CharPtr value, Uint2 existing_text) { Boolean rval = FALSE; PubdescPtr pdp = NULL; PubPtr pub; SeqFeatPtr sfp; SeqDescrPtr sdp = NULL; if (data == NULL) return FALSE; if (choice == OBJ_SEQFEAT) { sfp = (SeqFeatPtr) data; if (sfp->data.choice == SEQFEAT_PUB) { pdp = sfp->data.value.ptrvalue; } } else if (choice == OBJ_SEQDESC) { sdp = (SeqDescrPtr) data; if (sdp->choice == Seq_descr_pub) { pdp = sdp->data.ptrvalue; } } if (pdp == NULL) return FALSE; for (pub = pdp->pub; pub != NULL; pub = pub->next) { rval |= SetPubFieldOnPub (pub, field, scp, value, existing_text); } if (!rval && field == Publication_field_pmid && IsStringConstraintEmpty (scp) && StringIsAllDigits(value)) { /* first, set pub class to published for pre-existing pub */ if (pdp->pub != NULL && pdp->pub->choice == PUB_Gen) { SetPubclassOnPub(pdp->pub, "journal"); } ValNodeAddInt (&pdp->pub, PUB_PMid, atoi (value)); } return rval; } NLM_EXTERN Uint1 FieldTypeFromAECRAction (AECRActionPtr action) { Uint1 field_type = 0; ApplyActionPtr a; EditActionPtr e; ConvertActionPtr v; CopyActionPtr c; SwapActionPtr s; RemoveActionPtr r; AECRParseActionPtr p; RemoveOutsideActionPtr ro; if (action == NULL || action->action == NULL || action->action->data.ptrvalue == NULL) { return 0; } switch (action->action->choice) { case ActionChoice_apply: a = (ApplyActionPtr) action->action->data.ptrvalue; if (a->field != NULL) { field_type = a->field->choice; } break; case ActionChoice_edit: e = (EditActionPtr) action->action->data.ptrvalue; if (e->field != NULL) { field_type = e->field->choice; } break; case ActionChoice_remove_outside: ro = (RemoveOutsideActionPtr) action->action->data.ptrvalue; if (ro != NULL && ro->field != NULL) { field_type = ro->field->choice; } break; case ActionChoice_convert: v = (ConvertActionPtr) action->action->data.ptrvalue; if (v->fields != NULL) { field_type = FieldTypeChoiceFromFieldPairTypeChoice (v->fields->choice); } break; case ActionChoice_copy: c = (CopyActionPtr) action->action->data.ptrvalue; if (c->fields != NULL) { field_type = FieldTypeChoiceFromFieldPairTypeChoice (c->fields->choice); } break; case ActionChoice_swap: s = (SwapActionPtr) action->action->data.ptrvalue; if (s->fields != NULL) { field_type = FieldTypeChoiceFromFieldPairTypeChoice (s->fields->choice); } break; case ActionChoice_remove: r = (RemoveActionPtr) action->action->data.ptrvalue; if (r->field != NULL) { field_type = r->field->choice; } break; case ActionChoice_parse: p = (AECRParseActionPtr) action->action->data.ptrvalue; if (p->fields != NULL) { field_type = FieldTypeChoiceFromFieldPairTypeChoice (p->fields->choice); } break; } return field_type; } typedef struct pubserialnumber { BioseqPtr bsp; Int4 serial_number; ValNodePtr min_pub; } PubSerialNumberData, PNTR PubSerialNumberPtr; static PubSerialNumberPtr PubSerialNumberNew () { PubSerialNumberPtr psn; psn = (PubSerialNumberPtr) MemNew (sizeof (PubSerialNumberData)); psn->bsp = NULL; psn->serial_number = 0; psn->min_pub = NULL; return psn; } static PubSerialNumberPtr PubSerialNumberFree (PubSerialNumberPtr psn) { if (psn != NULL) { psn->min_pub = PubSetFree (psn->min_pub); psn = MemFree (psn); } return psn; } NLM_EXTERN ValNodePtr PubSerialNumberListFree (ValNodePtr vnp) { ValNodePtr vnp_next; while (vnp != NULL) { vnp_next = vnp->next; vnp->next = NULL; vnp->data.ptrvalue = PubSerialNumberFree (vnp->data.ptrvalue); vnp = ValNodeFree (vnp); vnp = vnp_next; } return vnp; } static void CaptureRefBlockSerialNumbers (CharPtr str, Pointer userdata, BlockType blocktype, Uint2 entityID, Uint2 itemtype, Uint4 itemID, Int4 left, Int4 right ) { CharPtr cp; Int4 serial_number; ValNodePtr vnp; BioseqPtr bsp = NULL; SeqFeatPtr sfp; SeqDescrPtr sdp; SeqMgrFeatContext fcontext; SeqMgrDescContext dcontext; PubSerialNumberPtr psn; ValNodePtr ppr = NULL; PubdescPtr pdp = NULL; if (blocktype != REFERENCE_BLOCK || userdata == NULL) return; if (StringNICmp (str, "REFERENCE", 9) != 0) { return; } cp = str + 9; while (isspace (*cp)) { cp++; } if (!isdigit (*cp)) { return; } serial_number = atoi (cp); if (itemtype == OBJ_SEQFEAT) { sfp = SeqMgrGetDesiredFeature (entityID, NULL, itemID, 0, NULL, &fcontext); if (sfp != NULL && sfp->data.choice == SEQFEAT_PUB) { pdp = (PubdescPtr) sfp->data.value.ptrvalue; bsp = GetSequenceForObject (OBJ_SEQFEAT, sfp); } } else if (itemtype == OBJ_SEQDESC) { sdp = SeqMgrGetDesiredDescriptor (entityID, NULL, itemID, 0, NULL, &dcontext); if (sdp != NULL && sdp->choice == Seq_descr_pub) { pdp = (PubdescPtr) sdp->data.ptrvalue; bsp = GetSequenceForObject (OBJ_SEQDESC, sdp); } } if (pdp != NULL && bsp != NULL) { vnp = ValNodeNew (NULL); if (vnp != NULL) { vnp->choice = PUB_Equiv; vnp->data.ptrvalue = pdp->pub; ppr = MinimizePub (vnp); ValNodeFree (vnp); } vnp = ValNodeNew (NULL); if (vnp != NULL) { vnp->choice = PUB_Equiv; vnp->data.ptrvalue = ppr; psn = PubSerialNumberNew (); psn->bsp = bsp; psn->serial_number = serial_number; psn->min_pub = vnp; ValNodeAddPointer ((ValNodePtr PNTR) userdata, 0, psn); } } } NLM_EXTERN ValNodePtr GetCitListsForSeqEntry (SeqEntryPtr sep) { XtraBlock xtra; ValNodePtr head = NULL; ErrSev level; Boolean okay; SeqEntryPtr oldscope; Uint2 entityID; if (sep == NULL) return NULL; MemSet ((Pointer) &xtra, 0, sizeof (XtraBlock)); xtra.ffwrite = CaptureRefBlockSerialNumbers; xtra.userdata = (Pointer) &head; level = ErrSetMessageLevel (SEV_MAX); oldscope = SeqEntrySetScope (sep); okay = SeqEntryToGnbk (sep, NULL, GENBANK_FMT, SEQUIN_MODE, NORMAL_STYLE, SHOW_CONTIG_FEATURES, 0, 0, &xtra, NULL); entityID = SeqMgrGetEntityIDForSeqEntry (sep); SeqEntrySetScope (oldscope); ErrSetMessageLevel (level); return head; } NLM_EXTERN Int4 GetCitationNumberForMinPub (BioseqPtr bsp, ValNodePtr min_pub, ValNodePtr pub_list) { Int4 rval = -1; PubSerialNumberPtr psn; ValNodePtr vnp, tmp; if (bsp == NULL || min_pub == NULL || pub_list == NULL) { return -1; } tmp = ValNodeNew (NULL); tmp->choice = PUB_Equiv; tmp->data.ptrvalue = min_pub; for (vnp = pub_list; vnp != NULL && rval == -1; vnp = vnp->next) { psn = (PubSerialNumberPtr) vnp->data.ptrvalue; if (psn->bsp == bsp) { if (PubLabelMatch (tmp, psn->min_pub) == 0) { rval = psn->serial_number; } } } tmp = ValNodeFree (tmp); return rval; } NLM_EXTERN ValNodePtr GetMinPubForCitationNumber (BioseqPtr bsp, Int4 number, ValNodePtr pub_list) { ValNodePtr rval = NULL; PubSerialNumberPtr psn; ValNodePtr vnp; if (bsp == NULL || number < 0 || pub_list == NULL) { return NULL; } for (vnp = pub_list; vnp != NULL && rval == NULL; vnp = vnp->next) { psn = (PubSerialNumberPtr) vnp->data.ptrvalue; if (psn->bsp == bsp && psn->serial_number == number) { rval = psn->min_pub; } } return rval; } /* * Some batch operations will be faster if information about the entire record is collected once * and reused. The BatchExtra structure is where such data belongs. */ NLM_EXTERN BatchExtraPtr BatchExtraNew () { BatchExtraPtr b; b = (BatchExtraPtr) MemNew (sizeof (BatchExtraData)); b->cit_list = NULL; return b; } NLM_EXTERN BatchExtraPtr BatchExtraFree (BatchExtraPtr b) { if (b != NULL) { b->cit_list = PubSerialNumberListFree (b->cit_list); b = MemFree (b); } return b; } static Boolean IsCitationField (FieldTypePtr field) { FeatureFieldPtr feature_field; if (field != NULL && field->choice == FieldType_feature_field && (feature_field = field->data.ptrvalue) != NULL && feature_field->field != NULL && ((feature_field->field->choice == FeatQualChoice_legal_qual && feature_field->field->data.intvalue == Feat_qual_legal_citation) || (feature_field->field->choice == FeatQualChoice_illegal_qual && DoesStringMatchConstraint ("citation", feature_field->field->data.ptrvalue)))) { return TRUE; } else { return FALSE; } } static void InitBatchExtraForField (BatchExtraPtr batch_extra, FieldTypePtr field, SeqEntryPtr sep) { if (batch_extra == NULL) { return; } /* only need to collect citations if citation is in the list of applicable fields */ if (IsCitationField (field)) { ValNodeLink (&(batch_extra->cit_list), GetCitListsForSeqEntry (sep)); } } static void InitBatchExtraForAECRAction (BatchExtraPtr batch_extra, AECRActionPtr action, SeqEntryPtr sep) { ValNodePtr field_list, field; if (batch_extra == NULL || action == NULL) { return; } field_list = GetFieldTypeListFromAECRAction (action); for (field = field_list; field != NULL; field = field->next) { InitBatchExtraForField (batch_extra, field, sep); } field_list = FieldTypeListFree (field_list); } NLM_EXTERN int LIBCALLBACK SortVnpByObject (VoidPtr ptr1, VoidPtr ptr2) { ValNodePtr vnp1; ValNodePtr vnp2; CharPtr str1, str2; int rval = 0; if (ptr1 != NULL && ptr2 != NULL) { vnp1 = *((ValNodePtr PNTR) ptr1); vnp2 = *((ValNodePtr PNTR) ptr2); if (vnp1 != NULL && vnp2 != NULL) { if (vnp1->choice < vnp2->choice) { rval = -1; } else if (vnp1->choice > vnp2->choice) { rval = 1; } else { str1 = GetDiscrepancyItemText (vnp1); str2 = GetDiscrepancyItemText (vnp2); rval = StringCmp (str1, str1); str1 = MemFree (str1); str2 = MemFree (str2); } } } return rval; } static ValNodePtr BioseqListForObjectList (ValNodePtr object_list) { ValNodePtr vnp, bsp_list = NULL; BioseqPtr bsp; for (vnp = object_list; vnp != NULL; vnp = vnp->next) { bsp = GetSequenceForObject (vnp->choice, vnp->data.ptrvalue); if (bsp != NULL) { ValNodeAddPointer (&bsp_list, OBJ_BIOSEQ, bsp); } } bsp_list = ValNodeSort (bsp_list, SortVnpByObject); ValNodeUnique (&bsp_list, SortVnpByObject, ValNodeFree); return bsp_list; } static void InitBatchExtraForAECRActionAndObjectList (BatchExtraPtr batch_extra, AECRActionPtr action, ValNodePtr object_list) { ValNodePtr field_list, field; ValNodePtr bsp_list = NULL, vnp; SeqEntryPtr sep; if (batch_extra == NULL || action == NULL) { return; } field_list = GetFieldTypeListFromAECRAction (action); bsp_list = BioseqListForObjectList (object_list); for (vnp = bsp_list; vnp != NULL; vnp = vnp->next) { sep = SeqMgrGetSeqEntryForData (vnp->data.ptrvalue); for (field = field_list; field != NULL; field = field->next) { InitBatchExtraForField (batch_extra, field, sep); } } bsp_list = ValNodeFree (bsp_list); field_list = FieldTypeListFree (field_list); } NLM_EXTERN CharPtr GetFieldValueForObjectEx (Uint1 choice, Pointer data, FieldTypePtr field, StringConstraintPtr scp, BatchExtraPtr batch_extra) { CharPtr str = NULL; FeatureFieldPtr feature_field; SeqDescrPtr sdp; GBBlockPtr gb; SeqMgrDescContext context; if (data == NULL || field == NULL || field->data.ptrvalue == NULL) return NULL; switch (field->choice) { case FieldType_source_qual : str = GetSourceQualFromBioSource (GetBioSourceFromObject (choice, data), (SourceQualChoicePtr) field->data.ptrvalue, scp); break; case FieldType_feature_field : if (choice == OBJ_SEQFEAT) { str = GetQualFromFeatureEx ((SeqFeatPtr) data, (FeatureFieldPtr) field->data.ptrvalue, scp, batch_extra); } break; case FieldType_cds_gene_prot : if (choice == 0) { str = GetFieldValueFromCGPSet ((CGPSetPtr) data, field->data.intvalue, scp); } else if (choice == OBJ_SEQFEAT) { feature_field = FeatureFieldFromCDSGeneProtField (field->data.intvalue); str = GetQualFromFeature ((SeqFeatPtr) data, feature_field, scp); feature_field = FeatureFieldFree (feature_field); } break; case FieldType_molinfo_field : if (choice == OBJ_BIOSEQ) { str = GetSequenceQualFromBioseq ((BioseqPtr) data, field->data.ptrvalue); } break; case FieldType_pub : str = GetPubFieldFromObject (choice, data, field->data.intvalue, scp); break; case FieldType_rna_field : if (choice == OBJ_SEQFEAT) { str = GetRNAQualFromFeature ((SeqFeatPtr) data, field->data.ptrvalue, scp, NULL); } break; case FieldType_struc_comment_field: if (choice == OBJ_SEQDESC && data != NULL) { sdp = (SeqDescrPtr) data; if (sdp != NULL && sdp->choice == Seq_descr_user) { str = GetStructuredCommentFieldFromUserObject (sdp->data.ptrvalue, field->data.ptrvalue, scp); } } break; case FieldType_dblink: if (choice == OBJ_SEQDESC && data != NULL) { sdp = (SeqDescrPtr) data; if (sdp != NULL && sdp->choice == Seq_descr_user) { str = GetDBLinkFieldFromUserObject (sdp->data.ptrvalue, field->data.intvalue, scp); } } else if (choice == OBJ_BIOSEQ) { for (sdp = SeqMgrGetNextDescriptor ((BioseqPtr) data, NULL, Seq_descr_user, &context); sdp != NULL && str == NULL; sdp = SeqMgrGetNextDescriptor ((BioseqPtr) data, sdp, Seq_descr_user, &context)) { str = GetDBLinkFieldFromUserObject (sdp->data.ptrvalue, field->data.intvalue, scp); } } break; case FieldType_misc: if (choice == OBJ_BIOSEQ) { if (field->data.intvalue == Misc_field_genome_project_id) { str = GetGenomeProjectIdFromBioseq ((BioseqPtr) data, scp); } else if (field->data.intvalue == Misc_field_comment_descriptor) { str = NULL; for (sdp = SeqMgrGetNextDescriptor ((BioseqPtr) data, NULL, Seq_descr_comment, &context); sdp != NULL && str == NULL; sdp = SeqMgrGetNextDescriptor ((BioseqPtr) data, sdp, Seq_descr_comment, &context)) { if (DoesStringMatchConstraint (sdp->data.ptrvalue, scp)) { str = StringSave (sdp->data.ptrvalue); } } } else if (field->data.intvalue == Misc_field_defline) { str = NULL; for (sdp = SeqMgrGetNextDescriptor ((BioseqPtr) data, NULL, Seq_descr_title, &context); sdp != NULL && str == NULL; sdp = SeqMgrGetNextDescriptor ((BioseqPtr) data, sdp, Seq_descr_title, &context)) { if (DoesStringMatchConstraint (sdp->data.ptrvalue, scp)) { str = StringSave (sdp->data.ptrvalue); } } } else if (field->data.intvalue == Misc_field_keyword) { str = NULL; for (sdp = SeqMgrGetNextDescriptor ((BioseqPtr) data, NULL, Seq_descr_genbank, &context); sdp != NULL && str == NULL; sdp = SeqMgrGetNextDescriptor ((BioseqPtr) data, sdp, Seq_descr_genbank, &context)) { gb = (GBBlockPtr) sdp->data.ptrvalue; str = GetFirstValNodeStringMatch (gb->keywords, scp); } } } else if (choice == OBJ_SEQDESC && field->data.intvalue == Misc_field_comment_descriptor) { sdp = (SeqDescrPtr) data; if (sdp != NULL && sdp->choice == Seq_descr_comment && !StringHasNoText (sdp->data.ptrvalue)) { str = StringSave (sdp->data.ptrvalue); } } else if (choice == OBJ_SEQDESC && field->data.intvalue == Misc_field_defline) { sdp = (SeqDescrPtr) data; if (sdp != NULL && sdp->choice == Seq_descr_title && !StringHasNoText (sdp->data.ptrvalue)) { str = StringSave (sdp->data.ptrvalue); } } else if (choice == OBJ_SEQDESC && field->data.intvalue == Misc_field_keyword) { sdp = (SeqDescrPtr) data; if (sdp != NULL && sdp->choice == Seq_descr_genbank && (gb = (GBBlockPtr) sdp->data.ptrvalue) != NULL) { str = GetFirstValNodeStringMatch (gb->keywords, scp); } } break; } return str; } NLM_EXTERN CharPtr GetFieldValueForObject (Uint1 choice, Pointer data, FieldTypePtr field, StringConstraintPtr scp) { return GetFieldValueForObjectEx (choice, data, field, scp, NULL); } NLM_EXTERN ValNodePtr GetMultipleFieldValuesForObject (Uint1 choice, Pointer data, FieldTypePtr field, StringConstraintPtr scp, BatchExtraPtr batch_extra) { CharPtr str = NULL; ValNodePtr val_list = NULL; SeqDescPtr sdp; if (data == NULL || field == NULL || field->data.ptrvalue == NULL) return FALSE; if (field->choice == FieldType_source_qual) { val_list = GetMultipleSourceQualsFromBioSource (GetBioSourceFromObject (choice, data), (SourceQualChoicePtr) field->data.ptrvalue, scp); } else if (field->choice == FieldType_dblink && choice == OBJ_SEQDESC && (sdp = (SeqDescPtr) data) != NULL && sdp->choice == Seq_descr_user) { val_list = GetMultipleDBLinkFieldValuesFromUserObject ((UserObjectPtr) sdp->data.ptrvalue, field->data.intvalue, scp); } else { str = GetFieldValueForObjectEx (choice, data, field, scp, batch_extra); if (str != NULL) { ValNodeAddPointer (&val_list, 0, str); } } return val_list; } NLM_EXTERN Boolean GBBlockIsCompletelyEmpty (GBBlockPtr gb) { if (gb != NULL && gb->extra_accessions == NULL && gb->keywords == NULL && gb->source == NULL && gb->origin == NULL && gb->date == NULL && gb->div == NULL && gb->taxonomy == NULL && gb->entry_date == NULL) { return TRUE; } else { return FALSE; } } static Boolean RemoveFieldValueForObject (Uint1 choice, Pointer data, FieldTypePtr field, StringConstraintPtr scp) { Boolean rval = FALSE; FeatureFieldPtr feature_field; SeqDescrPtr sdp; ObjValNodePtr ovp; GBBlockPtr gb; if (data == NULL || field == NULL || field->data.ptrvalue == NULL) return FALSE; switch (field->choice) { case FieldType_source_qual : rval = RemoveSourceQualFromBioSource (GetBioSourceFromObject (choice, data), (SourceQualChoicePtr) field->data.ptrvalue, scp); break; case FieldType_feature_field : if (choice == OBJ_SEQFEAT) { rval = RemoveQualFromFeature ((SeqFeatPtr) data, (FeatureFieldPtr) field->data.ptrvalue, scp); } break; case FieldType_cds_gene_prot: if (choice == 0) { rval = RemoveFieldValueFromCGPSet ((CGPSetPtr) data, field->data.intvalue, scp); } else if (choice == OBJ_SEQFEAT) { feature_field = FeatureFieldFromCDSGeneProtField (field->data.intvalue); rval = RemoveQualFromFeature ((SeqFeatPtr) data, feature_field, scp); feature_field = FeatureFieldFree (feature_field); } break; case FieldType_molinfo_field : if (choice == OBJ_BIOSEQ) { rval = RemoveSequenceQualFromBioseq ((BioseqPtr) data, field->data.ptrvalue); } break; case FieldType_pub : rval = RemovePubFieldFromObject (choice, data, field->data.intvalue, scp); break; case FieldType_rna_field : if (choice == OBJ_SEQFEAT) { rval = RemoveRNAQualFromFeature ((SeqFeatPtr) data, field->data.ptrvalue, scp); } break; case FieldType_struc_comment_field: if (choice == OBJ_SEQDESC && data != NULL) { sdp = (SeqDescrPtr) data; if (sdp != NULL && sdp->choice == Seq_descr_user) { rval = RemoveStructuredCommentFieldFromUserObject (sdp->data.ptrvalue, field->data.ptrvalue, scp); if (rval && IsEmptyStructuredComment (sdp->data.ptrvalue)) { ovp = (ObjValNodePtr) sdp; ovp->idx.deleteme = TRUE; } } } break; case FieldType_dblink: if (choice == OBJ_SEQDESC && data != NULL) { sdp = (SeqDescrPtr) data; if (sdp != NULL && sdp->choice == Seq_descr_user) { rval = RemoveDBLinkFieldFromUserObject (sdp->data.ptrvalue, field->data.intvalue, scp); if (rval && IsEmptyDBLink (sdp->data.ptrvalue)) { ovp = (ObjValNodePtr) sdp; ovp->idx.deleteme = TRUE; } } } break; case FieldType_misc: if (choice == OBJ_BIOSEQ && field->data.intvalue == Misc_field_genome_project_id) { rval = RemoveGenomeProjectIdFromBioseq ((BioseqPtr) data, scp); } else if (choice == OBJ_SEQDESC && field->data.intvalue == Misc_field_comment_descriptor) { sdp = (SeqDescrPtr) data; ovp = (ObjValNodePtr) sdp; if (sdp->choice == Seq_descr_comment) { ovp->idx.deleteme = TRUE; rval = TRUE; } } else if (choice == OBJ_SEQDESC && field->data.intvalue == Misc_field_defline) { sdp = (SeqDescrPtr) data; ovp = (ObjValNodePtr) sdp; if (sdp->choice == Seq_descr_title) { ovp->idx.deleteme = TRUE; rval = TRUE; } } else if (choice == OBJ_SEQDESC && field->data.intvalue == Misc_field_keyword) { sdp = (SeqDescrPtr) data; ovp = (ObjValNodePtr) sdp; if (sdp->choice == Seq_descr_genbank && (gb = (GBBlockPtr) sdp->data.ptrvalue) != NULL) { if (RemoveValNodeStringMatch (&(gb->keywords), scp)) { rval = TRUE; if (GBBlockIsCompletelyEmpty(gb)) { ovp->idx.deleteme = TRUE; } } } } break; } return rval; } NLM_EXTERN Boolean SetFieldValueForObjectEx (Uint1 choice, Pointer data, FieldTypePtr field, StringConstraintPtr scp, CharPtr value, Uint2 existing_text, BatchExtraPtr batch_extra) { Boolean rval = FALSE; FeatureFieldPtr feature_field; SeqDescrPtr sdp; ObjValNodePtr ovp; GBBlockPtr gb; Boolean was_empty; if (data == NULL || field == NULL || field->data.ptrvalue == NULL) return FALSE; switch (field->choice) { case FieldType_source_qual : rval = SetSourceQualInBioSource (GetBioSourceFromObject (choice, data), (SourceQualChoicePtr) field->data.ptrvalue, scp, value, existing_text); break; case FieldType_feature_field : if (choice == OBJ_SEQFEAT) { rval = SetQualOnFeatureEx ((SeqFeatPtr) data, (FeatureFieldPtr) field->data.ptrvalue, scp, value, existing_text, batch_extra); } break; case FieldType_cds_gene_prot: if (choice == 0) { rval = SetFieldValueInCGPSet ((CGPSetPtr) data, field->data.intvalue, scp, value, existing_text); } else if (choice == OBJ_SEQFEAT) { feature_field = FeatureFieldFromCDSGeneProtField (field->data.intvalue); rval = SetQualOnFeatureEx ((SeqFeatPtr) data, feature_field, scp, value, existing_text, batch_extra); feature_field = FeatureFieldFree (feature_field); } break; case FieldType_molinfo_field: if (choice == OBJ_BIOSEQ) { rval = SetSequenceQualOnBioseq ((BioseqPtr) data, field->data.ptrvalue); } break; case FieldType_pub : rval = SetPubFieldOnObject (choice, data, field->data.intvalue, scp, value, existing_text); break; case FieldType_rna_field : if (choice == OBJ_SEQFEAT) { rval = SetRNAQualOnFeature ((SeqFeatPtr) data, field->data.ptrvalue, scp, value, existing_text); } break; case FieldType_struc_comment_field: if (choice == OBJ_SEQDESC && data != NULL) { sdp = (SeqDescrPtr) data; if (sdp != NULL && sdp->choice == Seq_descr_user) { was_empty = IsEmptyStructuredComment (sdp->data.ptrvalue); rval = SetStructuredCommentFieldOnUserObject (sdp->data.ptrvalue, field->data.ptrvalue, scp, value, existing_text); if (was_empty && !IsEmptyStructuredComment (sdp->data.ptrvalue)) { ovp = (ObjValNodePtr) sdp; ovp->idx.deleteme = FALSE; } } } break; case FieldType_dblink: if (choice == OBJ_SEQDESC && data != NULL) { sdp = (SeqDescrPtr) data; if (sdp != NULL && sdp->choice == Seq_descr_user) { was_empty = IsEmptyDBLink (sdp->data.ptrvalue); rval = SetDBLinkFieldOnUserObject (sdp->data.ptrvalue, field->data.intvalue, scp, value, existing_text); if (was_empty && !IsEmptyDBLink (sdp->data.ptrvalue)) { ovp = (ObjValNodePtr) sdp; ovp->idx.deleteme = FALSE; } } } break; case FieldType_misc: if (choice == OBJ_BIOSEQ && field->data.intvalue == Misc_field_genome_project_id) { rval = SetGenomeProjectIdOnBioseq ((BioseqPtr) data, scp, value, existing_text); } else if (choice == OBJ_SEQDESC && field->data.intvalue == Misc_field_comment_descriptor) { sdp = (SeqDescrPtr) data; if (sdp->choice == Seq_descr_comment) { rval = SetTextDescriptor (sdp, scp, value, existing_text); } } else if (choice == OBJ_SEQDESC && field->data.intvalue == Misc_field_defline) { sdp = (SeqDescrPtr) data; if (sdp->choice == Seq_descr_title) { rval = SetTextDescriptor (sdp, scp, value, existing_text); RemoveAutodefObjectsForDesc(sdp); } } else if (choice == OBJ_SEQDESC && field->data.intvalue == Misc_field_keyword) { sdp = (SeqDescrPtr) data; if (sdp->choice == Seq_descr_genbank && (gb = (GBBlockPtr) sdp->data.ptrvalue) != NULL) { was_empty = GBBlockIsCompletelyEmpty (gb); if (SetStringsInValNodeStringList (&(gb->keywords), scp, value, existing_text)) { rval = TRUE; if (sdp->extended) { ovp = (ObjValNodePtr) sdp; if (GBBlockIsCompletelyEmpty(gb)) { ovp->idx.deleteme = TRUE; } else if (was_empty) { ovp->idx.deleteme = FALSE; } } } } } break; } return rval; } NLM_EXTERN Boolean SetFieldValueForObject (Uint1 choice, Pointer data, FieldTypePtr field, StringConstraintPtr scp, CharPtr value, Uint2 existing_text) { return SetFieldValueForObjectEx (choice, data, field, scp, value, existing_text, NULL); } NLM_EXTERN Boolean SortFieldsForObject (Uint1 choice, Pointer data, FieldTypePtr field, Uint2 order) { Boolean rval = FALSE; FeatureFieldPtr feature_field; if (data == NULL || field == NULL || field->data.ptrvalue == NULL) return FALSE; switch (field->choice) { case FieldType_source_qual : break; case FieldType_feature_field : if (choice == OBJ_SEQFEAT) { rval = SortQualOnFeature ((SeqFeatPtr) data, (FeatureFieldPtr) field->data.ptrvalue, order); } break; case FieldType_cds_gene_prot: if (choice == 0) { rval = SortFieldInCGPSet ((CGPSetPtr) data, field->data.intvalue, order); } else if (choice == OBJ_SEQFEAT) { feature_field = FeatureFieldFromCDSGeneProtField (field->data.intvalue); rval = SortQualOnFeature ((SeqFeatPtr) data, feature_field, order); feature_field = FeatureFieldFree (feature_field); } break; case FieldType_molinfo_field: break; case FieldType_pub : break; case FieldType_rna_field : break; case FieldType_struc_comment_field: break; case FieldType_dblink: break; case FieldType_misc: break; } return rval; } NLM_EXTERN ValNodePtr GetFieldTypeListFromAECRAction (AECRActionPtr action) { ValNodePtr field_list = NULL; ApplyActionPtr apply; EditActionPtr edit; ConvertActionPtr convert; CopyActionPtr copy; SwapActionPtr swap; RemoveActionPtr remove; AECRParseActionPtr parse; RemoveOutsideActionPtr ro; if (action == NULL) { return NULL; } /* todo - add fields from constraints ? */ /* get fields from action */ if (action->action != NULL) { switch (action->action->choice) { case ActionChoice_apply: apply = (ApplyActionPtr) action->action->data.ptrvalue; if (apply != NULL) { ValNodeLink (&field_list, FieldTypeCopy (apply->field)); } break; case ActionChoice_edit: edit = (EditActionPtr) action->action->data.ptrvalue; if (edit != NULL) { ValNodeLink (&field_list, FieldTypeCopy (edit->field)); } break; case ActionChoice_remove_outside: ro = (RemoveOutsideActionPtr) action->action->data.ptrvalue; if (ro != NULL) { ValNodeLink (&field_list, FieldTypeCopy (ro->field)); } break; case ActionChoice_convert: convert = (ConvertActionPtr) action->action->data.ptrvalue; if (convert != NULL) { ValNodeLink (&field_list, GetFromFieldFromFieldPair (convert->fields)); ValNodeLink (&field_list, GetToFieldFromFieldPair (convert->fields)); } break; case ActionChoice_copy: copy = (CopyActionPtr) action->action->data.ptrvalue; if (copy != NULL) { ValNodeLink (&field_list, GetFromFieldFromFieldPair (copy->fields)); ValNodeLink (&field_list, GetToFieldFromFieldPair (copy->fields)); } break; case ActionChoice_swap: swap = (SwapActionPtr) action->action->data.ptrvalue; if (swap != NULL) { ValNodeLink (&field_list, GetFromFieldFromFieldPair (swap->fields)); ValNodeLink (&field_list, GetToFieldFromFieldPair (swap->fields)); } break; case ActionChoice_remove: remove = (RemoveActionPtr) action->action->data.ptrvalue; if (remove != NULL) { ValNodeLink (&field_list, FieldTypeCopy (remove->field)); } break; case ActionChoice_parse: parse = (AECRParseActionPtr) action->action->data.ptrvalue; if (parse != NULL) { ValNodeLink (&field_list, GetFromFieldFromFieldPair (parse->fields)); ValNodeLink (&field_list, GetToFieldFromFieldPair (parse->fields)); } break; } } return field_list; } NLM_EXTERN Boolean AreAECRActionFieldsEqual (AECRActionPtr action1, AECRActionPtr action2) { ApplyActionPtr a1, a2; EditActionPtr e1, e2; ConvertActionPtr v1, v2; CopyActionPtr c1, c2; SwapActionPtr s1, s2; RemoveActionPtr r1, r2; AECRParseActionPtr p1, p2; RemoveOutsideActionPtr ro1, ro2; FieldTypePtr field1, field2; Boolean rval = FALSE; if (action1 == NULL && action2 == NULL) { return TRUE; } else if (action1 == NULL || action2 == NULL) { return FALSE; } else if (action1->action == NULL && action2->action == NULL) { return TRUE; } else if (action1->action == NULL || action2->action == NULL) { return FALSE; } else if (action1->action->choice != action2->action->choice) { return FALSE; } else if (action1->action->data.ptrvalue == NULL && action2->action->data.ptrvalue == NULL) { return TRUE; } else if (action1->action->data.ptrvalue == NULL || action2->action->data.ptrvalue == NULL) { return FALSE; } else { switch (action1->action->choice) { case ActionChoice_apply: a1 = (ApplyActionPtr) action1->action->data.ptrvalue; a2 = (ApplyActionPtr) action2->action->data.ptrvalue; rval = DoFieldTypesMatch (a1->field, a2->field); break; case ActionChoice_edit: e1 = (EditActionPtr) action1->action->data.ptrvalue; e2 = (EditActionPtr) action2->action->data.ptrvalue; rval = DoFieldTypesMatch (e1->field, e2->field); break; case ActionChoice_remove_outside: ro1 = (RemoveOutsideActionPtr) action1->action->data.ptrvalue; ro2 = (RemoveOutsideActionPtr) action2->action->data.ptrvalue; rval = DoFieldTypesMatch (ro1->field, ro2->field); break; case ActionChoice_convert: v1 = (ConvertActionPtr) action1->action->data.ptrvalue; v2 = (ConvertActionPtr) action2->action->data.ptrvalue; field1 = GetFromFieldFromFieldPair (v1->fields); field2 = GetFromFieldFromFieldPair (v2->fields); rval = DoFieldTypesMatch (field1, field2); if (rval) { field1 = FieldTypeFree (field1); field2 = FieldTypeFree (field2); field1 = GetToFieldFromFieldPair (v1->fields); field2 = GetToFieldFromFieldPair (v2->fields); rval = DoFieldTypesMatch (field1, field2); } field1 = FieldTypeFree (field1); field2 = FieldTypeFree (field2); if (rval) { if ((v1->keep_original && !v2->keep_original) || (!v1->keep_original && v2->keep_original)) { rval = FALSE; } } break; case ActionChoice_copy: c1 = (CopyActionPtr) action1->action->data.ptrvalue; c2 = (CopyActionPtr) action2->action->data.ptrvalue; field1 = GetFromFieldFromFieldPair (c1->fields); field2 = GetFromFieldFromFieldPair (c2->fields); rval = DoFieldTypesMatch (field1, field2); if (rval) { field1 = FieldTypeFree (field1); field2 = FieldTypeFree (field2); field1 = GetToFieldFromFieldPair (c1->fields); field2 = GetToFieldFromFieldPair (c2->fields); rval = DoFieldTypesMatch (field1, field2); } field1 = FieldTypeFree (field1); field2 = FieldTypeFree (field2); break; case ActionChoice_swap: s1 = (SwapActionPtr) action1->action->data.ptrvalue; s2 = (SwapActionPtr) action2->action->data.ptrvalue; field1 = GetFromFieldFromFieldPair (s1->fields); field2 = GetFromFieldFromFieldPair (s2->fields); rval = DoFieldTypesMatch (field1, field2); if (rval) { field1 = FieldTypeFree (field1); field2 = FieldTypeFree (field2); field1 = GetToFieldFromFieldPair (s1->fields); field2 = GetToFieldFromFieldPair (s2->fields); rval = DoFieldTypesMatch (field1, field2); } field1 = FieldTypeFree (field1); field2 = FieldTypeFree (field2); break; case ActionChoice_remove: r1 = (RemoveActionPtr) action1->action->data.ptrvalue; r2 = (RemoveActionPtr) action2->action->data.ptrvalue; rval = DoFieldTypesMatch (r1->field, r2->field); break; case ActionChoice_parse: p1 = (AECRParseActionPtr) action1->action->data.ptrvalue; p2 = (AECRParseActionPtr) action2->action->data.ptrvalue; field1 = GetFromFieldFromFieldPair (p1->fields); field2 = GetFromFieldFromFieldPair (p2->fields); rval = DoFieldTypesMatch (field1, field2); if (rval) { field1 = FieldTypeFree (field1); field2 = FieldTypeFree (field2); field1 = GetToFieldFromFieldPair (p1->fields); field2 = GetToFieldFromFieldPair (p2->fields); rval = DoFieldTypesMatch (field1, field2); } field1 = FieldTypeFree (field1); field2 = FieldTypeFree (field2); break; } } return rval; } static Boolean IsNonTextSourceQualPresent (BioSourcePtr biop, Int4 srcqual) { Int4 orgmod_subtype, subsrc_subtype, subfield; OrgModPtr mod; SubSourcePtr ssp; Boolean rval = FALSE; if (biop == NULL) return FALSE; orgmod_subtype = GetOrgModQualFromSrcQual (srcqual, &subfield); if (orgmod_subtype == -1) { subsrc_subtype = GetSubSrcQualFromSrcQual (srcqual, &subfield); for (ssp = biop->subtype; ssp != NULL && !rval; ssp = ssp->next) { if (ssp->subtype == subsrc_subtype) { rval = TRUE; } } } else { if (biop->org != NULL && biop->org->orgname != NULL) { for (mod = biop->org->orgname->mod; mod != NULL && !rval; mod = mod->next) { if (mod->subtype == orgmod_subtype) { rval = TRUE; } } } } return rval; } static Boolean IsSourceQualPresent (BioSourcePtr biop, SourceQualChoicePtr scp) { Boolean rval = FALSE; CharPtr str; if (biop == NULL) return FALSE; if (scp == NULL) return TRUE; switch (scp->choice) { case SourceQualChoice_textqual: if (IsNonTextSourceQual (scp->data.intvalue)) { rval = IsNonTextSourceQualPresent (biop, scp->data.intvalue); } else { str = GetSourceQualFromBioSource (biop, scp, NULL); if (!StringHasNoText (str)) { rval = TRUE; } str = MemFree (str); } break; case SourceQualChoice_location: if (biop->genome != 0) { rval = TRUE; } break; case SourceQualChoice_origin: if (biop->origin != 0) { rval = TRUE; } break; } return rval; } typedef struct objecthasstring { StringConstraintPtr scp; Boolean found; } ObjectHasStringData, PNTR ObjectHasStringPtr; static void LIBCALLBACK AsnWriteConstraintCallBack (AsnExpOptStructPtr pAEOS) { CharPtr pchSource; ObjectHasStringPtr ohsp; ohsp = (ObjectHasStringPtr) pAEOS->data; if (ISA_STRINGTYPE (AsnFindBaseIsa (pAEOS->atp))) { pchSource = (CharPtr) pAEOS->dvp->ptrvalue; ohsp->found |= DoesSingleStringMatchConstraint (pchSource, ohsp->scp); } } static Boolean DoesObjectMatchStringConstraint (Uint1 choice, Pointer data, StringConstraintPtr scp) { ObjMgrPtr omp; ObjMgrTypePtr omtp; AsnIoPtr aip; AsnExpOptPtr aeop; ObjectHasStringData ohsd; SeqFeatPtr sfp, prot; SeqMgrFeatContext fcontext; CharPtr search_txt; CGPSetPtr c; ValNodePtr vnp; Boolean all_match = TRUE, any_match = FALSE, rval; BioseqPtr protbsp; ImpFeatPtr imp; if (data == NULL) return FALSE; if (scp == NULL) return TRUE; if (choice == 0) { /* CDS-Gene-Prot set */ c = (CGPSetPtr) data; for (vnp = c->gene_list; vnp != NULL && (!any_match || all_match); vnp = vnp->next) { if (DoesObjectMatchStringConstraint (OBJ_SEQFEAT, vnp->data.ptrvalue, scp)) { any_match = TRUE; } else { all_match = FALSE; } } for (vnp = c->cds_list; vnp != NULL && (!any_match || all_match); vnp = vnp->next) { if (DoesObjectMatchStringConstraint (OBJ_SEQFEAT, vnp->data.ptrvalue, scp)) { any_match = TRUE; } else { all_match = FALSE; } } for (vnp = c->mrna_list; vnp != NULL && (!any_match || all_match); vnp = vnp->next) { if (DoesObjectMatchStringConstraint (OBJ_SEQFEAT, vnp->data.ptrvalue, scp)) { any_match = TRUE; } else { all_match = FALSE; } } for (vnp = c->prot_list; vnp != NULL && (!any_match || all_match); vnp = vnp->next) { if (DoesObjectMatchStringConstraint (OBJ_SEQFEAT, vnp->data.ptrvalue, scp)) { any_match = TRUE; } else { all_match = FALSE; } } if (scp->not_present) { rval = all_match; } else { rval = any_match; } } else { omp = ObjMgrGet (); omtp = ObjMgrTypeFind (omp, choice, NULL, NULL); if (omtp == NULL) return FALSE; aip = AsnIoNullOpen (); aeop = AsnExpOptNew (aip, NULL, NULL, AsnWriteConstraintCallBack); ohsd.found = FALSE; ohsd.scp = scp; if (aeop != NULL) { aeop->user_data = (Pointer) &ohsd; } (omtp->asnwrite) (data, aip, NULL); if (!ohsd.found && omtp->datatype == OBJ_SEQFEAT) { sfp = (SeqFeatPtr) data; if (sfp->data.choice == SEQFEAT_CDREGION) { protbsp = BioseqFindFromSeqLoc (sfp->product); prot = SeqMgrGetNextFeature (protbsp, NULL, 0, FEATDEF_PROT, &fcontext); if (prot != NULL) { (omtp->asnwrite) (prot, aip, NULL); } } else { if (SeqMgrFeaturesAreIndexed(sfp->idx.entityID) == 0) { SeqMgrIndexFeatures (sfp->idx.entityID, NULL); } if (sfp->idx.subtype == FEATDEF_tRNA) { sfp = SeqMgrGetDesiredFeature (sfp->idx.entityID, NULL, sfp->idx.itemID, 0, sfp, &fcontext); ohsd.found = DoesSingleStringMatchConstraint (fcontext.label, ohsd.scp); if (!ohsd.found && sfp != NULL && sfp->idx.subtype == FEATDEF_tRNA) { search_txt = (CharPtr) MemNew ((StringLen (fcontext.label) + 6) * sizeof (Char)); if (search_txt != NULL) { sprintf (search_txt, "tRNA-%s", fcontext.label); ohsd.found = DoesSingleStringMatchConstraint (search_txt, ohsd.scp); search_txt = MemFree (search_txt); } } } else if (!ohsd.found && sfp != NULL && sfp->data.choice == SEQFEAT_IMP && (imp = (ImpFeatPtr) sfp->data.value.ptrvalue) != NULL) { ohsd.found = DoesSingleStringMatchConstraint (imp->key, ohsd.scp); } } } AsnIoClose (aip); if (scp->not_present) { rval = !ohsd.found; } else { rval = ohsd.found; } } return rval; } NLM_EXTERN Boolean IsSourceConstraintEmpty (SourceConstraintPtr scp) { if (scp == NULL) return TRUE; if (scp->field1 == NULL && scp->field2 == NULL && IsStringConstraintEmpty(scp->constraint)) { return TRUE; } else { return FALSE; } } NLM_EXTERN Boolean DoesBiosourceMatchConstraint (BioSourcePtr biop, SourceConstraintPtr scp) { Boolean rval = FALSE; CharPtr str1, str2; ValNode vn; if (biop == NULL) return FALSE; if (scp == NULL) return TRUE; if (IsStringConstraintEmpty(scp->constraint)) { /* looking for qual present */ if (scp->field1 != NULL && scp->field2 == NULL) { rval = IsSourceQualPresent (biop, scp->field1); } else if (scp->field2 != NULL && scp->field1 == NULL) { rval = IsSourceQualPresent (biop, scp->field2); /* looking for quals to match */ } else if (scp->field1 != NULL && scp->field2 != NULL) { str1 = GetSourceQualFromBioSource (biop, scp->field1, NULL); str2 = GetSourceQualFromBioSource (biop, scp->field2, NULL); if (StringCmp (str1, str2) == 0) { rval = TRUE; } str1 = MemFree (str1); str2 = MemFree (str2); } else { /* nothing specified, automatic match */ rval = TRUE; } } else { if (scp->field1 != NULL && scp->field2 == NULL) { if (AllowSourceQualMulti(scp->field1) && scp->constraint->not_present) { scp->constraint->not_present = FALSE; str1 = GetSourceQualFromBioSource (biop, scp->field1, scp->constraint); scp->constraint->not_present = TRUE; if (str1 != NULL) { rval = FALSE; } else { rval = TRUE; } str1 = MemFree (str1); } else { str1 = GetSourceQualFromBioSource (biop, scp->field1, scp->constraint); if (str1 == NULL) { if (scp->constraint->not_present) { str1 = GetSourceQualFromBioSource (biop, scp->field1, NULL); if (str1 == NULL) { rval = TRUE; } } } else if (!StringHasNoText (str1)) { rval = TRUE; } str1 = MemFree (str1); } } else if (scp->field2 != NULL && scp->field1 == NULL) { str2 = GetSourceQualFromBioSource (biop, scp->field2, scp->constraint); if (str2 == NULL) { if (scp->constraint->not_present) { str2 = GetSourceQualFromBioSource (biop, scp->field2, NULL); if (str2 == NULL) { rval = TRUE; } } } else if (!StringHasNoText (str2)) { rval = TRUE; } str2 = MemFree (str2); } else if (scp->field1 != NULL && scp->field2 != NULL) { str1 = GetSourceQualFromBioSource (biop, scp->field1, scp->constraint); str2 = GetSourceQualFromBioSource (biop, scp->field2, scp->constraint); if (StringCmp (str1, str2) == 0) { rval = TRUE; } str1 = MemFree (str1); str2 = MemFree (str2); } else { /* generic string constraint */ vn.choice = Seq_descr_source; vn.next = NULL; vn.extended = 0; vn.data.ptrvalue = biop; rval = DoesObjectMatchStringConstraint (OBJ_SEQDESC, &vn, scp->constraint); } } return rval; } static Boolean DoesCGPSetMatchPseudoConstraint (CGPSetPtr c, CDSGeneProtPseudoConstraintPtr constraint) { Boolean any_pseudo = FALSE; ValNodePtr vnp; SeqFeatPtr sfp; Boolean rval = FALSE; if (c == NULL) return FALSE; if (constraint == NULL) return TRUE; switch (constraint->feature) { case CDSGeneProt_feature_type_constraint_gene : for (vnp = c->gene_list; vnp != NULL && !any_pseudo; vnp = vnp->next) { sfp = (SeqFeatPtr) vnp->data.ptrvalue; if (sfp != NULL && sfp->pseudo) { any_pseudo = TRUE; } } break; case CDSGeneProt_feature_type_constraint_mRNA : for (vnp = c->mrna_list; vnp != NULL && !any_pseudo; vnp = vnp->next) { sfp = (SeqFeatPtr) vnp->data.ptrvalue; if (sfp != NULL && sfp->pseudo) { any_pseudo = TRUE; } } break; case CDSGeneProt_feature_type_constraint_cds : for (vnp = c->mrna_list; vnp != NULL && !any_pseudo; vnp = vnp->next) { sfp = (SeqFeatPtr) vnp->data.ptrvalue; if (sfp != NULL && sfp->pseudo) { any_pseudo = TRUE; } } break; case CDSGeneProt_feature_type_constraint_prot : for (vnp = c->mrna_list; vnp != NULL && !any_pseudo; vnp = vnp->next) { sfp = (SeqFeatPtr) vnp->data.ptrvalue; if (sfp != NULL && sfp->pseudo && sfp->idx.subtype == FEATDEF_PROT) { any_pseudo = TRUE; } } break; case CDSGeneProt_feature_type_constraint_mat_peptide : for (vnp = c->mrna_list; vnp != NULL && !any_pseudo; vnp = vnp->next) { sfp = (SeqFeatPtr) vnp->data.ptrvalue; if (sfp != NULL && sfp->pseudo && sfp->idx.subtype == FEATDEF_mat_peptide_aa) { any_pseudo = TRUE; } } break; } if ((any_pseudo && constraint->is_pseudo) || (!any_pseudo && !constraint->is_pseudo)) { rval = TRUE; } return rval; } static Boolean DoesFeatureMatchCGPPseudoConstraint (SeqFeatPtr sfp, CDSGeneProtPseudoConstraintPtr constraint) { Boolean any_pseudo = FALSE; ValNodePtr feat_list, vnp; SeqFeatPtr gene, mrna, cds, prot; Boolean rval = FALSE; SeqMgrFeatContext fcontext; if (sfp == NULL) return FALSE; if (constraint == NULL) return TRUE; switch (constraint->feature) { case CDSGeneProt_feature_type_constraint_gene : if (sfp->data.choice == SEQFEAT_GENE) { if (sfp->pseudo) { any_pseudo = TRUE; } } else if (sfp->data.choice == SEQFEAT_PROT) { cds = SeqMgrGetCDSgivenProduct (BioseqFindFromSeqLoc (sfp->location), &fcontext); if (cds != NULL) { gene = GetGeneForFeature (cds); if (gene != NULL && gene->pseudo) { any_pseudo = TRUE; } } } else { gene = GetGeneForFeature (sfp); if (gene != NULL && gene->pseudo) { any_pseudo = TRUE; } } break; case CDSGeneProt_feature_type_constraint_mRNA : if (sfp->idx.subtype == FEATDEF_mRNA) { if (sfp->pseudo) { any_pseudo = TRUE; } } else if (sfp->data.choice == SEQFEAT_PROT) { cds = SeqMgrGetCDSgivenProduct (BioseqFindFromSeqLoc (sfp->location), &fcontext); if (cds != NULL) { mrna = GetmRNAforCDS (cds); if (mrna != NULL && mrna->pseudo) { any_pseudo = TRUE; } } } else { mrna = GetmRNAforCDS (sfp); if (mrna != NULL && mrna->pseudo) { any_pseudo = TRUE; } } break; case CDSGeneProt_feature_type_constraint_cds : if (sfp->idx.subtype == FEATDEF_CDS) { if (sfp->pseudo) { any_pseudo = TRUE; } } else if (sfp->data.choice == SEQFEAT_PROT) { cds = SeqMgrGetCDSgivenProduct (BioseqFindFromSeqLoc (sfp->location), &fcontext); if (cds != NULL && cds->pseudo) { any_pseudo = TRUE; } } else { feat_list = ListFeaturesInLocation (BioseqFindFromSeqLoc (sfp->location), sfp->location, SEQFEAT_CDREGION, FEATDEF_CDS); for (vnp = feat_list; vnp != NULL && !any_pseudo; vnp = vnp->next) { cds = vnp->data.ptrvalue; if (cds != NULL && cds->pseudo) { any_pseudo = TRUE; } } feat_list = ValNodeFree (feat_list); } break; case CDSGeneProt_feature_type_constraint_prot : if (sfp->idx.subtype == FEATDEF_PROT) { if (sfp->pseudo) { any_pseudo = TRUE; } } else if (sfp->data.choice == SEQFEAT_PROT) { prot = SeqMgrGetNextFeature (BioseqFindFromSeqLoc (sfp->location), NULL, 0, FEATDEF_PROT, &fcontext); if (prot != NULL && prot->pseudo) { any_pseudo = TRUE; } } else if (sfp->idx.subtype == FEATDEF_CDS) { prot = SeqMgrGetNextFeature (BioseqFindFromSeqLoc (sfp->product), NULL, 0, FEATDEF_PROT, &fcontext); if (prot != NULL && prot->pseudo) { any_pseudo = TRUE; } } else { feat_list = ListFeaturesInLocation (BioseqFindFromSeqLoc (sfp->location), sfp->location, SEQFEAT_CDREGION, FEATDEF_CDS); for (vnp = feat_list; vnp != NULL && !any_pseudo; vnp = vnp->next) { cds = vnp->data.ptrvalue; if (cds != NULL) { prot = SeqMgrGetNextFeature (BioseqFindFromSeqLoc (cds->product), NULL, 0, FEATDEF_PROT, &fcontext); if (prot != NULL && prot->pseudo) { any_pseudo = TRUE; } } } feat_list = ValNodeFree (feat_list); } break; case CDSGeneProt_feature_type_constraint_mat_peptide : if (sfp->idx.subtype == FEATDEF_mat_peptide_aa) { if (sfp->pseudo) { any_pseudo = TRUE; } } break; } if ((any_pseudo && constraint->is_pseudo) || (!any_pseudo && !constraint->is_pseudo)) { rval = TRUE; } return rval; } NLM_EXTERN Boolean IsCDSGeneProtQualConstraintEmpty (CDSGeneProtQualConstraintPtr constraint) { if (constraint == NULL) return TRUE; if (constraint->field1 == NULL && constraint->field2 == NULL && IsStringConstraintEmpty (constraint->constraint)) { return TRUE; } else { return FALSE; } } static Boolean DoesCGPSetMatchQualConstraint (CGPSetPtr c, CDSGeneProtQualConstraintPtr constraint) { Boolean rval = FALSE; CharPtr str, str1, str2; if (c == NULL) return FALSE; if (constraint == NULL) return TRUE; if (IsStringConstraintEmpty (constraint->constraint)) { /* looking for qual present */ if (constraint->field1 != NULL && constraint->field2 == NULL) { str = GetFieldValueFromCGPSet (c, constraint->field1->data.intvalue, NULL); if (str != NULL) { rval = TRUE; str = MemFree (str); } } else if (constraint->field2 != NULL && constraint->field1 == NULL) { str = GetFieldValueFromCGPSet (c, constraint->field2->data.intvalue, NULL); if (str == NULL) { rval = FALSE; } else { str = MemFree (str); } /* looking for quals to match */ } else if (constraint->field1 != NULL && constraint->field2 != NULL) { str1 = GetFieldValueFromCGPSet (c, constraint->field1->data.intvalue, NULL); str2 = GetFieldValueFromCGPSet (c, constraint->field2->data.intvalue, NULL); if (StringCmp (str1, str2) == 0) { rval = TRUE; } str1 = MemFree (str1); str2 = MemFree (str2); } else { /* nothing specified, automatic match */ rval = TRUE; } } else { if (constraint->field1 != NULL && constraint->field2 == NULL) { str1 = GetFieldValueFromCGPSet (c, constraint->field1->data.intvalue, constraint->constraint); if (str1 == NULL) { if (constraint->constraint->not_present) { str1 = GetFieldValueFromCGPSet (c, constraint->field1->data.intvalue, NULL); if (str1 == NULL) { rval = TRUE; } } } else if (!StringHasNoText (str1)) { rval = TRUE; } str1 = MemFree (str1); } else if (constraint->field2 != NULL && constraint->field1 == NULL) { str2 = GetFieldValueFromCGPSet (c, constraint->field2->data.intvalue, constraint->constraint); if (str2 == NULL) { if (constraint->constraint->not_present) { str2 = GetFieldValueFromCGPSet (c, constraint->field2->data.intvalue, NULL); if (str2 == NULL) { rval = TRUE; } } } else if (!StringHasNoText (str2)) { rval = TRUE; } str2 = MemFree (str2); } else if (constraint->field1 != NULL && constraint->field2 != NULL) { str1 = GetFieldValueFromCGPSet (c, constraint->field1->data.intvalue, constraint->constraint); str2 = GetFieldValueFromCGPSet (c, constraint->field2->data.intvalue, constraint->constraint); if (StringCmp (str1, str2) == 0) { rval = TRUE; } str1 = MemFree (str1); str2 = MemFree (str2); } else { /* generic string constraint */ rval = DoesObjectMatchStringConstraint (0, c, constraint->constraint); } } return rval; } static Boolean DoesSequenceHaveFeatureWithQualPresent (BioseqPtr bsp, FeatureFieldPtr feature_field, StringConstraintPtr scp) { Boolean rval = FALSE; SeqFeatPtr sfp, sfp_p; SeqMgrFeatContext context1, context2; Int4 featdef; Uint1 seqfeattype; CharPtr str; BioseqPtr prot_bsp; if (bsp == NULL) { return FALSE; } else if (feature_field == NULL) { return TRUE; } featdef = GetFeatdefFromFeatureType(feature_field->type); seqfeattype = FindFeatFromFeatDefType (featdef); if (seqfeattype == SEQFEAT_PROT) { for (sfp = SeqMgrGetNextFeature (bsp, NULL, 0, FEATDEF_CDS, &context1); sfp != NULL && !rval; sfp = SeqMgrGetNextFeature (bsp, sfp, 0, FEATDEF_CDS, &context1)) { prot_bsp = BioseqFindFromSeqLoc (sfp->product); for (sfp_p = SeqMgrGetNextFeature (prot_bsp, NULL, 0, featdef, &context2); sfp_p != NULL && !rval; sfp_p = SeqMgrGetNextFeature (prot_bsp, sfp_p, 0, featdef, &context2)) { str = GetQualFromFeature (sfp_p, feature_field, scp); if (str == NULL && scp != NULL) { if (scp->not_present) { str = GetQualFromFeature (sfp_p, feature_field, NULL); if (str == NULL) { rval = TRUE; } } } else if (!StringHasNoText (str)) { rval = TRUE; } str = MemFree (str); } } } else { for (sfp = SeqMgrGetNextFeature (bsp, NULL, 0, featdef, &context1); sfp != NULL && !rval; sfp = SeqMgrGetNextFeature (bsp, sfp, 0, featdef, &context1)) { str = GetQualFromFeature (sfp, feature_field, scp); if (str == NULL && scp != NULL) { if (scp->not_present) { str = GetQualFromFeature (sfp, feature_field, NULL); if (str == NULL) { rval = TRUE; } } } else if (!StringHasNoText (str)) { rval = TRUE; } str = MemFree (str); } } return rval; } static Boolean DoesSequenceHaveFeatureWithMatchingQuals (BioseqPtr bsp, CDSGeneProtConstraintFieldPtr f1, CDSGeneProtConstraintFieldPtr f2, StringConstraintPtr scp) { Int4 featdef; Uint1 seqfeattype; SeqFeatPtr sfp, sfp_p; CharPtr str, str2; SeqMgrFeatContext context1, context2; FeatureFieldPtr feature_field1 = NULL, feature_field2 = NULL; CGPSetPtr c; Boolean b = FALSE; Boolean rval = FALSE; BioseqPtr prot_bsp; if (bsp == NULL || f1 == NULL || f2 == NULL) { return FALSE; } feature_field1 = FeatureFieldFromCDSGeneProtField(f1->data.intvalue); feature_field2 = FeatureFieldFromCDSGeneProtField(f2->data.intvalue); if (feature_field1 == NULL || feature_field2 == NULL) { feature_field1 = FeatureFieldFree (feature_field1); feature_field2 = FeatureFieldFree (feature_field2); return FALSE; } if (feature_field1->type == feature_field2->type) { featdef = GetFeatdefFromFeatureType(feature_field1->type); seqfeattype = FindFeatFromFeatDefType (featdef); if (seqfeattype == SEQFEAT_PROT) { for (sfp = SeqMgrGetNextFeature (bsp, NULL, 0, FEATDEF_CDS, &context1); sfp != NULL && !rval; sfp = SeqMgrGetNextFeature (bsp, sfp, 0, FEATDEF_CDS, &context1)) { prot_bsp = BioseqFindFromSeqLoc (sfp->product); for (sfp_p = SeqMgrGetNextFeature (prot_bsp, NULL, 0, featdef, &context2); sfp_p != NULL && !rval; sfp_p = SeqMgrGetNextFeature (prot_bsp, sfp_p, 0, featdef, &context2)) { str = GetQualFromFeature (sfp_p, feature_field1, scp); str2 = GetQualFromFeature (sfp_p, feature_field2, scp); if (str != NULL && str2 != NULL && StringCmp (str, str2) == 0) { rval = TRUE; } str = MemFree (str); str2 = MemFree (str2); } } } else { for (sfp = SeqMgrGetNextFeature (bsp, NULL, 0, featdef, &context1); sfp != NULL && !rval; sfp = SeqMgrGetNextFeature (bsp, sfp, 0, featdef, &context1)) { str = GetQualFromFeature (sfp, feature_field1, scp); str2 = GetQualFromFeature (sfp, feature_field2, scp); if (str != NULL && str2 != NULL && StringCmp (str, str2) == 0) { rval = TRUE; } str = MemFree (str); str2 = MemFree (str2); } } } else { for (sfp = SeqMgrGetNextFeature (bsp, NULL, 0, FEATDEF_CDS, &context1); sfp != NULL && !rval; sfp = SeqMgrGetNextFeature (bsp, sfp, 0, FEATDEF_CDS, &context1)) { c = BuildCGPSetFromCodingRegion (sfp, &b); str = GetFieldValueFromCGPSet (c, f1->data.intvalue, scp); str2 = GetFieldValueFromCGPSet (c, f2->data.intvalue, scp); if (str != NULL && str2 != NULL && StringCmp (str, str2) == 0) { rval = TRUE; } str = MemFree (str); str2 = MemFree (str2); c = CGPSetFree (c); } } return rval; } static Boolean DoesSequenceMatchCGPQualConstraint (BioseqPtr bsp, CDSGeneProtQualConstraintPtr constraint) { FeatureFieldPtr feature_field; Boolean rval = FALSE; if (bsp == NULL) { return FALSE; } else if (constraint == NULL) { return TRUE; } if (IsStringConstraintEmpty (constraint->constraint)) { /* looking for qual present */ if ((constraint->field1 != NULL && constraint->field2 == NULL) || (constraint->field2 != NULL && constraint->field1 == NULL)) { if (constraint->field1 != NULL) { feature_field = FeatureFieldFromCDSGeneProtField (constraint->field1->data.intvalue); } else { feature_field = FeatureFieldFromCDSGeneProtField (constraint->field2->data.intvalue); } if (feature_field != NULL) { rval = DoesSequenceHaveFeatureWithQualPresent (bsp, feature_field, NULL); feature_field = FeatureFieldFree (feature_field); } /* looking for quals to match */ } else if (constraint->field1 != NULL && constraint->field2 != NULL) { rval = DoesSequenceHaveFeatureWithMatchingQuals (bsp, constraint->field1, constraint->field2, NULL); } else { /* nothing specified, automatic match */ rval = TRUE; } } else if ((constraint->field1 != NULL && constraint->field2 == NULL) || (constraint->field1 == NULL && constraint->field2 != NULL)) { /* one field must match constraint */ if (constraint->field1 != NULL) { feature_field = FeatureFieldFromCDSGeneProtField (constraint->field1->data.intvalue); } else { feature_field = FeatureFieldFromCDSGeneProtField (constraint->field2->data.intvalue); } if (feature_field != NULL) { rval = DoesSequenceHaveFeatureWithQualPresent (bsp, feature_field, constraint->constraint); feature_field = FeatureFieldFree (feature_field); } } else if (constraint->field1 != NULL && constraint->field2 != NULL) { /* two fields must match and match constraint */ rval = DoesSequenceHaveFeatureWithMatchingQuals (bsp, constraint->field1, constraint->field2, constraint->constraint); } else { /* generic string constraint */ rval = DoesObjectMatchStringConstraint (OBJ_BIOSEQ, bsp, constraint->constraint); } return rval; } static Boolean DoesSequenceInSetMatchCGPQualConstraint (BioseqSetPtr bssp, CDSGeneProtQualConstraintPtr constraint) { Boolean rval = FALSE; SeqEntryPtr sep; if (bssp == NULL) return FALSE; if (constraint == NULL) return TRUE; for (sep = bssp->seq_set; sep != NULL && !rval; sep = sep->next) { if (IS_Bioseq (sep)) { rval = DoesSequenceMatchCGPQualConstraint ((BioseqPtr) sep->data.ptrvalue, constraint); } else if (IS_Bioseq_set (sep)) { rval = DoesSequenceInSetMatchCGPQualConstraint ((BioseqSetPtr) sep->data.ptrvalue, constraint); } } return rval; } static Boolean DoesSeqDescMatchCGPQualConstraint (SeqDescrPtr sdp, CDSGeneProtQualConstraintPtr constraint) { Boolean rval = FALSE; BioseqPtr bsp; ObjValNodePtr ovp; if (sdp == NULL) return FALSE; if (constraint == NULL) return TRUE; bsp = GetSequenceForObject (OBJ_SEQDESC, sdp); if (bsp == NULL) { if (sdp->extended) { ovp = (ObjValNodePtr) sdp; if (ovp->idx.parenttype == OBJ_BIOSEQSET && ovp->idx.parentptr != NULL) { rval = DoesSequenceInSetMatchCGPQualConstraint ((BioseqSetPtr) ovp->idx.parentptr, constraint); } } } else { rval = DoesSequenceMatchCGPQualConstraint (bsp, constraint); } return rval; } static void UnmarkFeatureList (ValNodePtr list) { SeqFeatPtr sfp; while (list != NULL) { sfp = list->data.ptrvalue; if (sfp != NULL) { sfp->idx.deleteme = FALSE; } list = list->next; } } static void FillOutCGPSetForGene (CGPSetPtr c, SeqFeatPtr gene) { SeqFeatPtr cds, mrna, prot; SeqMgrFeatContext fcontext, pcontext; BioseqPtr bsp, protbsp; Int4 left, right, tmp; ValNodeBlock cds_list, mrna_list, prot_list; if (c == NULL || gene == NULL || (bsp = BioseqFindFromSeqLoc (gene->location)) == NULL) { return; } InitValNodeBlock(&cds_list, c->cds_list); InitValNodeBlock(&mrna_list, c->mrna_list); InitValNodeBlock(&prot_list, c->prot_list); left = SeqLocStart (gene->location); right = SeqLocStop (gene->location); if (left > right) { tmp = left; left = right; right = tmp; } for (cds = SeqMgrGetNextFeature (bsp, NULL, SEQFEAT_CDREGION, 0, &fcontext); cds != NULL && fcontext.left <= right; cds = SeqMgrGetNextFeature (bsp, cds, SEQFEAT_CDREGION, 0, &fcontext)) { if (gene == GetGeneForFeature (cds)) { ValNodeAddPointerToEnd (&cds_list, 0, cds); mrna = GetmRNAforCDS (cds); if (mrna != NULL) { ValNodeAddPointerToEnd (&mrna_list, 0, mrna); } if (cds->product != NULL) { protbsp = BioseqFindFromSeqLoc (cds->product); if (protbsp != NULL) { prot = SeqMgrGetNextFeature (protbsp, NULL, SEQFEAT_PROT, FEATDEF_PROT, &pcontext); if (prot != NULL) { ValNodeAddPointerToEnd (&prot_list, 0, prot); } /* also add in mat_peptides from protein feature */ prot = SeqMgrGetNextFeature (protbsp, NULL, SEQFEAT_PROT, FEATDEF_mat_peptide_aa, &pcontext); while (prot != NULL) { ValNodeAddPointerToEnd (&prot_list, 0, prot); prot = SeqMgrGetNextFeature (protbsp, prot, SEQFEAT_PROT, FEATDEF_mat_peptide_aa, &pcontext); } } } } } for (mrna = SeqMgrGetNextFeature (bsp, NULL, 0, FEATDEF_mRNA, &fcontext); mrna != NULL && fcontext.left <= right; mrna = SeqMgrGetNextFeature (bsp, mrna, 0, FEATDEF_mRNA, &fcontext)) { if (gene == GetGeneForFeature (mrna)) { ValNodeAddPointerToEnd (&mrna_list, 0, mrna); } } c->cds_list = cds_list.head; c->mrna_list = mrna_list.head; c->prot_list = prot_list.head; } static void FillOutCGPSetForGeneList (CGPSetPtr c) { ValNodePtr vnp; if (c == NULL) { return; } c->gene_list = ValNodeSort (c->gene_list, SortVnpByChoiceAndPtrvalue); ValNodeUnique (&c->gene_list, SortVnpByChoiceAndPtrvalue, ValNodeFree); for (vnp = c->gene_list; vnp != NULL; vnp = vnp->next) { FillOutCGPSetForGene (c, vnp->data.ptrvalue); } c->cds_list = ValNodeSort (c->cds_list, SortVnpByChoiceAndPtrvalue); ValNodeUnique (&c->cds_list, SortVnpByChoiceAndPtrvalue, ValNodeFree); c->mrna_list = ValNodeSort (c->mrna_list, SortVnpByChoiceAndPtrvalue); ValNodeUnique (&c->mrna_list, SortVnpByChoiceAndPtrvalue, ValNodeFree); c->prot_list = ValNodeSort (c->prot_list, SortVnpByChoiceAndPtrvalue); ValNodeUnique (&c->prot_list, SortVnpByChoiceAndPtrvalue, ValNodeFree); } static Boolean DoesFeatureMatchCGPQualConstraint (SeqFeatPtr sfp, CDSGeneProtQualConstraintPtr constraint) { CGPSetPtr c = NULL; Boolean b = FALSE; SeqMgrFeatContext context; Boolean rval = FALSE; FeatureFieldPtr ff; SeqFeatPtr cds; CharPtr str1 = NULL, str2 = NULL; if (sfp == NULL) { return FALSE; } else if (constraint == NULL) { return TRUE; } if (sfp->data.choice == SEQFEAT_CDREGION) { c = BuildCGPSetFromCodingRegion (sfp, &b); } else if (sfp->data.choice == SEQFEAT_PROT) { cds = SeqMgrGetCDSgivenProduct (BioseqFindFromSeqLoc (sfp->location), &context); c = BuildCGPSetFromCodingRegion (cds, &b); } else if (sfp->data.choice == SEQFEAT_GENE) { c = BuildCGPSetFromGene (sfp); FillOutCGPSetForGeneList (c); } else if (sfp->data.choice == SEQFEAT_RNA) { c = BuildCGPSetFrommRNA (sfp); } if (c == NULL) { return FALSE; } UnmarkFeatureList (c->cds_list); UnmarkFeatureList (c->mrna_list); UnmarkFeatureList (c->gene_list); rval = DoesCGPSetMatchQualConstraint (c, constraint); if (rval && sfp->idx.subtype == FEATDEF_mat_peptide_aa) { if (constraint->field1 != NULL) { if (IsCDSGeneProtFieldMatPeptideRelated (constraint->field1->data.intvalue)) { ff = FeatureFieldFromCDSGeneProtField (constraint->field1->data.intvalue); str1 = GetQualFromFeature (sfp, ff, constraint->constraint); ff = FeatureFieldFree (ff); } else { str1 = GetFieldValueFromCGPSet (c, constraint->field1->data.intvalue, constraint->constraint); } if (str1 == NULL) { rval = FALSE; } } if (constraint->field2 != NULL) { if (IsCDSGeneProtFieldMatPeptideRelated (constraint->field2->data.intvalue)) { ff = FeatureFieldFromCDSGeneProtField (constraint->field2->data.intvalue); str2 = GetQualFromFeature (sfp, ff, constraint->constraint); ff = FeatureFieldFree (ff); } else { str2 = GetFieldValueFromCGPSet (c, constraint->field2->data.intvalue, constraint->constraint); } if (str2 == NULL) { rval = FALSE; } } if (rval && constraint->field1 != NULL && constraint->field2 != NULL && StringCmp (str1, str2) != 0) { rval = FALSE; } str1 = MemFree (str1); str2 = MemFree (str2); } c = CGPSetFree (c); return rval; } NLM_EXTERN Boolean IsSequenceConstraintEmpty (SequenceConstraintPtr constraint) { if (constraint == NULL) return TRUE; if (constraint->seqtype != NULL && constraint->seqtype->choice != SequenceConstraintMolTypeConstraint_any) return FALSE; if (constraint->feature != Macro_feature_type_any) return FALSE; /* note - having a num_type_features not be null isn't enough to make the constraint non-empty */ if (!IsStringConstraintEmpty (constraint->id)) return FALSE; if (constraint->num_features != NULL) return FALSE; if (constraint->length != NULL) return FALSE; if (constraint->strandedness != Feature_strandedness_constraint_any) return FALSE; return TRUE; } static CharPtr CopyListWithoutBankIt (CharPtr orig) { CharPtr cpy, src, dst; if (orig == NULL) { return NULL; } cpy = StringSave (orig); src = orig; dst = cpy; while (*src != 0) { if ((*src == 'B' || *src == 'b') && (src == orig || isspace (*(src - 1)) || *(src - 1) == ',' || *(src - 1) == ';') && StringNICmp (src, "BankIt", 6) == 0) { src += 6; while (*src == '/' || *src == ':' || *src == ' ') { src++; } } else { *dst = *src; ++dst; ++src; } } *dst = 0; return cpy; } static Boolean DoesTextMatchBankItId (SeqIdPtr sip, StringConstraintPtr scp) { Boolean rval = FALSE; Int4 offset; CharPtr text, tmp, cp, partial_match; Char ch_orig = 0; DbtagPtr dbtag; if (scp == NULL || scp->match_text == NULL || sip == NULL || sip->choice != SEQID_GENERAL) { return FALSE; } dbtag = (DbtagPtr) sip->data.ptrvalue; if (dbtag == NULL || StringCmp (dbtag->db, "BankIt") != 0 || dbtag->tag == NULL) { return FALSE; } text = CopyListWithoutBankIt (scp->match_text); tmp = scp->match_text; scp->match_text = text; rval = DoesObjectIdMatchStringConstraint (dbtag->tag, scp); if (!rval) { offset = StringCSpn (text, "/ "); if (text[offset] != 0) { ch_orig = text[offset]; text[offset] = '_'; rval = DoesObjectIdMatchStringConstraint (dbtag->tag, scp); text[offset] = ch_orig; } } if (!rval && ch_orig != '/' && dbtag->tag->str != NULL && (cp = StringChr (dbtag->tag->str, '/')) != NULL) { partial_match = StringSave (dbtag->tag->str); partial_match[cp - dbtag->tag->str] = 0; rval = DoesStringMatchConstraint (partial_match, scp); partial_match = MemFree (partial_match); } scp->match_text = tmp; text = MemFree (text); return rval; } NLM_EXTERN Boolean DoesSeqIDListMeetStringConstraint (SeqIdPtr sip, StringConstraintPtr string_constraint) { CharPtr id; CharPtr cp, cp_dst; SeqIdPtr tmp; Boolean match, changed; DbtagPtr dbtag; CharPtr tmp_id; if (sip == NULL) { return FALSE; } if (string_constraint == NULL) { return TRUE; } while (sip != NULL) { /* temporary disconnect ID from list */ tmp = sip->next; sip->next = NULL; id = SeqIdWholeLabel (sip, PRINTID_FASTA_LONG); match = DoesSingleStringMatchConstraint (id, string_constraint); if (!match) { changed = FALSE; /* remove terminating pipe character */ if (id[StringLen(id) - 1] == '|') { id[StringLen(id) - 1] = 0; changed = TRUE; } /* remove leading pipe identifier */ cp = StringChr (id, '|'); if (cp != NULL) { changed = TRUE; cp++; cp_dst = id; while (*cp != 0) { *cp_dst = *cp; cp_dst++; cp++; } *cp_dst = 0; } if (changed) { match = DoesSingleStringMatchConstraint (id, string_constraint); } /* if search text doesn't have ., try ID without version */ if (!match && StringChr (string_constraint->match_text, '.') == NULL) { cp = StringChr (id, '.'); if (cp != NULL) { *cp = 0; match = DoesSingleStringMatchConstraint (id, string_constraint); *cp = '.'; } } /* Bankit? */ if (!match && DoesTextMatchBankItId (sip, string_constraint)) { match = TRUE; } if (!match && sip->choice == SEQID_GENERAL && sip->data.ptrvalue != NULL) { dbtag = (DbtagPtr) sip->data.ptrvalue; if (StringCmp (dbtag->db, "NCBIFILE") == 0 && dbtag->tag != NULL) { if (DoesSingleStringMatchConstraint (dbtag->tag->str, string_constraint)) { match = TRUE; } else if ((cp = StringRChr (dbtag->tag->str, '/')) != NULL) { tmp_id = (CharPtr) MemNew (sizeof (Char) * (cp - dbtag->tag->str + 1)); StringNCpy (tmp_id, dbtag->tag->str, cp - dbtag->tag->str); tmp_id[cp - dbtag->tag->str] = 0; if (DoesSingleStringMatchConstraint (tmp_id, string_constraint)) { match = TRUE; } tmp_id = MemFree (tmp_id); } } } } id = MemFree (id); sip->next = tmp; if (match) { if (string_constraint->not_present) { return FALSE; } else { return TRUE; } } sip = sip->next; } if (string_constraint->not_present) { return TRUE; } else { return FALSE; } } typedef struct rnatypebiomol { Int4 rnatype; Uint1 biomol; CharPtr rnamolname; } RnaTypeBiomolData, PNTR RnaTypeBiomolPtr; static RnaTypeBiomolData rna_type_biomol[] = { { Sequence_constraint_rnamol_genomic , MOLECULE_TYPE_GENOMIC, "Genomic RNA" } , { Sequence_constraint_rnamol_precursor_RNA , MOLECULE_TYPE_PRE_MRNA , "Precursor RNA" } , { Sequence_constraint_rnamol_mRNA , MOLECULE_TYPE_MRNA , "mRNA [cDNA]" } , { Sequence_constraint_rnamol_rRNA , MOLECULE_TYPE_RRNA , "Ribosomal RNA" } , { Sequence_constraint_rnamol_tRNA , MOLECULE_TYPE_TRNA , "Transfer RNA" } , { Sequence_constraint_rnamol_genomic_mRNA , MOLECULE_TYPE_GENOMIC_MRNA_MIX , "Genomic-mRNA" } , { Sequence_constraint_rnamol_cRNA , MOLECULE_TYPE_CRNA , "cRNA" } , { Sequence_constraint_rnamol_transcribed_RNA , MOLECULE_TYPE_TRANSCRIBED_RNA , "Transcribed RNA" } , { Sequence_constraint_rnamol_ncRNA , MOLECULE_TYPE_NCRNA , "Non-coding RNA" } , { Sequence_constraint_rnamol_transfer_messenger_RNA , MOLECULE_TYPE_TMRNA , "Transfer-messenger RNA" } } ; #define NUM_rna_type_biomol sizeof (rna_type_biomol) / sizeof (RnaTypeBiomolData) NLM_EXTERN Uint1 GetBiomolForRnaType (Int4 rnatype) { Int4 i; for (i = 0; i < NUM_rna_type_biomol; i++) { if (rna_type_biomol[i].rnatype == rnatype) { return rna_type_biomol[i].biomol; } } return 0; } NLM_EXTERN CharPtr GetBiomolNameForRnaType (Int4 rnatype) { Int4 i; for (i = 0; i < NUM_rna_type_biomol; i++) { if (rna_type_biomol[i].rnatype == rnatype) { return rna_type_biomol[i].rnamolname; } } return "invalid RNA type"; } NLM_EXTERN void AddAllRNASubtypesToChoiceList (ValNodePtr PNTR field_list) { Int4 i; if (field_list == NULL) return; ValNodeAddPointer (field_list, Sequence_constraint_rnamol_any, StringSave ("Any RNA")); for (i = 0; i < NUM_rna_type_biomol; i++) { ValNodeAddPointer (field_list, rna_type_biomol[i].rnatype, StringSave (rna_type_biomol[i].rnamolname)); } } static Boolean DoesValueMatchQuantityConstraint (Int4 val, ValNodePtr quantity) { Boolean rval = TRUE; if (quantity == NULL) { rval = TRUE; } else if (quantity->choice == QuantityConstraint_equals && val != quantity->data.intvalue) { return FALSE; } else if (quantity->choice == QuantityConstraint_greater_than && val <= quantity->data.intvalue) { return FALSE; } else if (quantity->choice == QuantityConstraint_less_than && val >= quantity->data.intvalue) { return FALSE; } return rval; } static Boolean DoesSequenceMatchStrandednessConstraint (BioseqPtr bsp, Uint2 strandedness) { SeqMgrFeatContext context; SeqFeatPtr sfp; Int4 num_minus = 0; Int4 num_plus = 0; Boolean rval = FALSE; if (bsp == NULL) { return FALSE; } else if (strandedness == Feature_strandedness_constraint_any) { return TRUE; } sfp = SeqMgrGetNextFeature (bsp, NULL, 0, 0, &context); while (sfp != NULL) { if (context.strand == Seq_strand_minus) { num_minus++; if (strandedness == Feature_strandedness_constraint_plus_only || strandedness == Feature_strandedness_constraint_no_minus) { return FALSE; } else if (strandedness == Feature_strandedness_constraint_at_least_one_minus) { return TRUE; } } else { num_plus++; if (strandedness == Feature_strandedness_constraint_minus_only || strandedness == Feature_strandedness_constraint_no_plus) { return FALSE; } else if (strandedness == Feature_strandedness_constraint_at_least_one_plus) { return TRUE; } } sfp = SeqMgrGetNextFeature (bsp, sfp, 0, 0, &context); } switch (strandedness) { case Feature_strandedness_constraint_minus_only: if (num_minus > 0 && num_plus == 0) { rval = TRUE; } break; case Feature_strandedness_constraint_plus_only: if (num_plus > 0 && num_minus == 0) { rval = TRUE; } break; case Feature_strandedness_constraint_at_least_one_minus: if (num_minus > 0) { rval = TRUE; } break; case Feature_strandedness_constraint_at_least_one_plus: if (num_plus > 0) { rval = TRUE; } break; case Feature_strandedness_constraint_no_minus: if (num_minus == 0) { rval = TRUE; } break; case Feature_strandedness_constraint_no_plus: if (num_plus == 0) { rval = TRUE; } break; } return rval; } static Boolean DoesFeatureCountMatchQuantityConstraint (BioseqPtr bsp, Uint2 featdef, ValNodePtr quantity) { Int4 num_features = 0; SeqFeatPtr sfp; SeqMgrFeatContext fcontext; for (sfp = SeqMgrGetNextFeature (bsp, NULL, 0, (Uint1)featdef, &fcontext); sfp != NULL; sfp = SeqMgrGetNextFeature (bsp, sfp, 0, (Uint1)featdef, &fcontext)) { num_features++; /* note - break out of loop or return as soon as we know constraint * succeeds or passes - no need to iterate through all features */ if (quantity == NULL) { return TRUE; } else if (quantity->choice == QuantityConstraint_equals && num_features > quantity->data.intvalue) { return FALSE; } else if (quantity->choice == QuantityConstraint_greater_than && num_features > quantity->data.intvalue) { break; } else if (quantity->choice == QuantityConstraint_less_than && num_features >= quantity->data.intvalue) { return FALSE; } } if (quantity == NULL) { return FALSE; } else if (!DoesValueMatchQuantityConstraint(num_features, quantity)) { return FALSE; } else { return TRUE; } } NLM_EXTERN Boolean DoesSequenceMatchSequenceConstraint (BioseqPtr bsp, SequenceConstraintPtr constraint) { SeqDescrPtr sdp; SeqMgrDescContext dcontext; MolInfoPtr mip; if (bsp == NULL) return FALSE; if (IsSequenceConstraintEmpty (constraint)) return TRUE; if (constraint->seqtype != NULL && constraint->seqtype->choice != SequenceConstraintMolTypeConstraint_any) { switch (constraint->seqtype->choice) { case SequenceConstraintMolTypeConstraint_nucleotide : if (ISA_aa (bsp->mol)) { return FALSE; } break; case SequenceConstraintMolTypeConstraint_dna : if (bsp->mol != Seq_mol_dna) { return FALSE; } break; case SequenceConstraintMolTypeConstraint_rna : if (bsp->mol != Seq_mol_rna) { return FALSE; } if (constraint->seqtype->data.intvalue != Sequence_constraint_rnamol_any) { sdp = SeqMgrGetNextDescriptor (bsp, NULL, Seq_descr_molinfo, &dcontext); if (sdp == NULL || sdp->data.ptrvalue == NULL || sdp->choice != Seq_descr_molinfo) { return FALSE; } mip = (MolInfoPtr) sdp->data.ptrvalue; if (GetBiomolForRnaType (constraint->seqtype->data.intvalue) != mip->biomol) { return FALSE; } } break; case SequenceConstraintMolTypeConstraint_protein : if (!ISA_aa (bsp->mol)) { return FALSE; } break; } } if (constraint->feature != Macro_feature_type_any) { if (!DoesFeatureCountMatchQuantityConstraint (bsp, GetFeatdefFromFeatureType (constraint->feature), constraint->num_type_features)) { return FALSE; } } if (!IsStringConstraintEmpty (constraint->id) && !DoesSeqIDListMeetStringConstraint (bsp->id, constraint->id)) { return FALSE; } if (constraint->num_features != NULL) { if (!DoesFeatureCountMatchQuantityConstraint (bsp, 0, constraint->num_features)) { return FALSE; } } if (!DoesValueMatchQuantityConstraint(bsp->length, constraint->length)) { return FALSE; } if (!DoesSequenceMatchStrandednessConstraint(bsp, constraint->strandedness)) { return FALSE; } return TRUE; } static Boolean DoesSequenceInSetMatchSequenceConstraint (BioseqSetPtr bssp, SequenceConstraintPtr constraint) { Boolean rval = FALSE; SeqEntryPtr sep; if (bssp == NULL) return FALSE; if (IsSequenceConstraintEmpty (constraint)) return TRUE; for (sep = bssp->seq_set; sep != NULL && !rval; sep = sep->next) { if (IS_Bioseq (sep)) { rval = DoesSequenceMatchSequenceConstraint ((BioseqPtr) sep->data.ptrvalue, constraint); } else if (IS_Bioseq_set (sep)) { rval = DoesSequenceInSetMatchSequenceConstraint ((BioseqSetPtr) sep->data.ptrvalue, constraint); } } return rval; } static Boolean DoesObjectMatchSequenceConstraint (Uint1 choice, Pointer data, SequenceConstraintPtr constraint) { BioseqPtr bsp; SeqDescrPtr sdp; ObjValNodePtr ovp; Boolean rval = FALSE; if (data == NULL) return FALSE; if (IsSequenceConstraintEmpty (constraint)) return TRUE; bsp = GetSequenceForObject (choice, data); if (bsp == NULL) { if (choice == OBJ_SEQDESC) { sdp = (SeqDescrPtr) data; if (sdp->extended) { ovp = (ObjValNodePtr) sdp; if (ovp->idx.parenttype == OBJ_BIOSEQSET && ovp->idx.parentptr != NULL) { rval = DoesSequenceInSetMatchSequenceConstraint ((BioseqSetPtr) ovp->idx.parentptr, constraint); } } } } else { rval = DoesSequenceMatchSequenceConstraint (bsp, constraint); } return rval; } /* Pub fields */ typedef struct pubfieldlabel { Int4 pub_field; CharPtr name; } PubFieldLabelData, PNTR PubFieldLabelPtr; static PubFieldLabelData pubfield_labels[] = { { Publication_field_cit, "citation" } , { Publication_field_authors, "authors" } , { Publication_field_journal, "journal" } , { Publication_field_volume, "volume" } , { Publication_field_issue, "issue" } , { Publication_field_pages, "pages" } , { Publication_field_date, "date" } , { Publication_field_serial_number, "serial number" } , { Publication_field_title, "title" } , { Publication_field_affiliation, "affiliation" } , { Publication_field_affil_div, "department" } , { Publication_field_affil_city, "city" } , { Publication_field_affil_sub, "state" } , { Publication_field_affil_country, "country" } , { Publication_field_affil_street, "street" } , { Publication_field_affil_email, "email" } , { Publication_field_affil_fax, "fax" } , { Publication_field_affil_phone, "phone" } , { Publication_field_affil_zipcode, "postal code" } , { Publication_field_pmid, "PMID"} , { Publication_field_pub_class, "class" } }; #define NUM_pubfield_labels sizeof (pubfield_labels) / sizeof (PubFieldLabelData) NLM_EXTERN CharPtr GetPubFieldLabel (Int4 pub_field) { CharPtr rval = NULL; Int4 i; for (i = 0; i < NUM_pubfield_labels; i++) { if (pubfield_labels[i].pub_field == pub_field) { rval = pubfield_labels[i].name; break; } } return rval; } NLM_EXTERN Int4 GetPubFieldFromLabel(CharPtr label) { Int4 rval = -1; Int4 i; if (StringNICmp (label, "publication", 11) == 0) { label = label + 11; while (*label == '-' || *label == ' ') { label++; } } for (i = 0; i < NUM_pubfield_labels; i++) { if (StringsAreEquivalent(pubfield_labels[i].name, label)) { rval = pubfield_labels[i].pub_field; break; } } return rval; } NLM_EXTERN ValNodePtr GetPubFieldList (void) { ValNodePtr val_list = NULL; Int4 i; for (i = 0; i < NUM_pubfield_labels; i++) { ValNodeAddPointer (&val_list, pubfield_labels[i].pub_field, StringSave (pubfield_labels[i].name)); } return val_list; } static ValNodePtr MakePubFieldTypeList (void) { ValNodePtr field_list = NULL; Int4 i; for (i = 0; i < NUM_pubfield_labels; i++) { ValNodeAddInt (&field_list, FieldType_pub, pubfield_labels[i].pub_field); } return field_list; } typedef struct pub_class_qual { Uint1 pub_choice; Int4 status; Uint1 art_from; CharPtr name; } PubClassQualData, PNTR PubClassQualPtr; static PubClassQualData pub_class_quals[] = { { PUB_Gen, Pub_type_unpublished, 0, "unpublished" } , { PUB_Sub, Pub_type_in_press, 0, "in-press submission" } , { PUB_Sub, Pub_type_published, 0, "submission" } , { PUB_Article, Pub_type_in_press, 1, "in-press journal" } , { PUB_Article, Pub_type_published, 1, "journal" } , { PUB_Article, Pub_type_in_press, 2, "in-press book chapter" } , { PUB_Article, Pub_type_published, 2, "book chapter" } , { PUB_Article, Pub_type_in_press, 3, "in-press proceedings chapter" } , { PUB_Article, Pub_type_published, 3, "proceedings chapter" } , { PUB_Book, Pub_type_in_press, 0, "in-press book" } , { PUB_Book, Pub_type_published, 0, "book" } , { PUB_Man, Pub_type_in_press, 0, "in-press thesis" } , { PUB_Man, Pub_type_published, 0, "thesis" } , { PUB_Proc, Pub_type_in_press, 0, "in-press proceedings" } , { PUB_Proc, Pub_type_published, 0, "proceedings" } , { PUB_Patent, Pub_type_any, 0, "patent" } }; #define NUM_pub_class_quals sizeof (pub_class_quals) / sizeof (PubClassQualData) NLM_EXTERN ValNodePtr GetPubClassList () { ValNodePtr list = NULL; Int4 i; for (i = 0; i < NUM_pub_class_quals; i++) { ValNodeAddPointer (&list, Publication_field_pub_class, StringSave (pub_class_quals[i].name)); } return list; } static PubClassQualPtr GetPubclassQualFromPub (PubPtr the_pub) { CharPtr str = NULL; CitArtPtr art; Int4 ml_class; Int4 art_from = 0; Int4 i; if (the_pub == NULL) { return NULL; } ml_class = GetPubMLStatus(the_pub); if (the_pub->choice == PUB_Article && (art = (CitArtPtr) the_pub->data.ptrvalue) != NULL) { art_from = art->from; } for (i = 0; i < NUM_pub_class_quals; i++) { if (the_pub->choice == pub_class_quals[i].pub_choice && (ml_class == pub_class_quals[i].status || ml_class == 0 || pub_class_quals[i].status == 0) && (art_from == 0 || pub_class_quals[i].art_from == 0 || art_from == pub_class_quals[i].art_from)) { return pub_class_quals + i; } } return NULL; } NLM_EXTERN CharPtr GetPubclassFromPub (PubPtr the_pub) { PubClassQualPtr pq = GetPubclassQualFromPub (the_pub); if (pq == NULL) { return NULL; } else { return StringSave(pq->name); } } static PubClassQualPtr GetPubclassFromString(CharPtr str) { Int4 i; PubClassQualPtr pq = NULL; for (i = 0; i < NUM_pub_class_quals; i++) { if (StringsAreEquivalent (pub_class_quals[i].name, str)) { pq = pub_class_quals + i; break; } } return pq; } static Boolean FreePubDataForConversion (PubPtr the_pub) { Boolean rval = FALSE; if (the_pub == NULL) { return FALSE; } switch (the_pub->choice) { case PUB_Gen: the_pub->data.ptrvalue = CitGenFree (the_pub->data.ptrvalue); rval = TRUE; break; case PUB_Sub: the_pub->data.ptrvalue = CitSubFree (the_pub->data.ptrvalue); rval = TRUE; break; case PUB_Article: the_pub->data.ptrvalue = CitArtFree (the_pub->data.ptrvalue); rval = TRUE; break; case PUB_Journal: the_pub->data.ptrvalue = CitJourFree (the_pub->data.ptrvalue); rval = TRUE; break; case PUB_Book: case PUB_Man: case PUB_Proc: the_pub->data.ptrvalue = CitBookFree (the_pub->data.ptrvalue); rval = TRUE; break; case PUB_Patent: the_pub->data.ptrvalue = CitPatFree (the_pub->data.ptrvalue); rval = TRUE; break; } return rval; } static void SetArtFrom(PubPtr the_pub, Uint1 art_from) { CitArtPtr cap; CitJourPtr cjp; CitBookPtr cbp; if (the_pub == NULL) { return; } if (the_pub->choice == PUB_Article) { if ((cap = (CitArtPtr)the_pub->data.ptrvalue) == NULL) { cap = CitArtNew(); the_pub->data.ptrvalue = cap; } cap->from = art_from; switch (cap->from) { case 1: cjp = CitJourNew(); cjp->imp = ImprintNew(); cap->fromptr = cjp; break; case 2: case 3: cbp = CitBookNew(); cbp->imp = ImprintNew(); cap->fromptr = cbp; break; } } } static Boolean NewPubDataForConversion (PubPtr the_pub, Uint1 art_from) { CitBookPtr cbp; Boolean rval = FALSE; if (the_pub == NULL) { return FALSE; } switch (the_pub->choice) { case PUB_Gen: the_pub->data.ptrvalue = CitGenNew(); rval = TRUE; break; case PUB_Sub: the_pub->data.ptrvalue = CitSubNew(); rval = TRUE; break; case PUB_Article: the_pub->data.ptrvalue = CitArtNew(); SetArtFrom(the_pub, art_from); rval = TRUE; break; case PUB_Journal: the_pub->data.ptrvalue = CitJourNew(); rval = TRUE; break; case PUB_Book: case PUB_Man: case PUB_Proc: cbp = CitBookNew(); cbp->imp = ImprintNew(); cbp->imp->date = DateNew(); the_pub->data.ptrvalue = cbp; rval = TRUE; break; case PUB_Patent: the_pub->data.ptrvalue = CitPatNew(); rval = TRUE; break; } return rval; } static ImprintPtr GetPubImprint (PubPtr the_pub) { CitArtPtr cap; CitBookPtr cbp; CitJourPtr cjp; ImprintPtr imp = NULL; if (the_pub == NULL || the_pub->data.ptrvalue == NULL) { return NULL; } switch (the_pub->choice) { case PUB_Article : cap = (CitArtPtr) the_pub->data.ptrvalue; if (cap->from == 1) { cjp = (CitJourPtr) cap->fromptr; if (cjp != NULL) { imp = cjp->imp; } } else if (cap->from == 2 || cap->from == 3) { cbp = (CitBookPtr) cap->fromptr; if (cbp != NULL) { imp = cbp->imp; } } break; case PUB_Journal : cjp = (CitJourPtr) the_pub->data.ptrvalue; imp = cjp->imp; case PUB_Book : case PUB_Man : cbp = (CitBookPtr) the_pub->data.ptrvalue; imp = cbp->imp; break; default : break; } return imp; } static Boolean SetPubStatusOnPub (PubPtr the_pub, Int4 status) { ImprintPtr imp; CitGenPtr cgp; Boolean rval = FALSE; imp = GetPubImprint(the_pub); if (imp != NULL) { switch (status) { case Pub_type_unpublished: imp->prepub = 255; rval = TRUE; break; case Pub_type_published: imp->prepub = 0; rval = TRUE; break; case Pub_type_in_press: imp->prepub = 2; rval = TRUE; break; case Pub_type_submitter_block: imp->prepub = 1; rval = TRUE; break; } } else if (the_pub->choice == PUB_Gen) { if ((cgp = (CitGenPtr) the_pub->data.ptrvalue) == NULL) { cgp = CitGenNew(); the_pub->data.ptrvalue = cgp; } if (status == Pub_type_unpublished) { cgp->cit = MemFree (cgp->cit); cgp->cit = StringSave("unpublished"); } else { if (StringICmp (cgp->cit, "unpublished") == 0) { cgp->cit = MemFree (cgp->cit); } } } else { } return rval; } static void CopyRelevantPubDetails (PubPtr orig_pub, PubPtr new_pub) { Int4 i; CharPtr val; if (orig_pub == NULL || new_pub == NULL) { return; } for (i = 0; i < NUM_pubfield_labels; i++) { if (pubfield_labels[i].pub_field != Publication_field_pub_class /* field we are copying now */ && pubfield_labels[i].pub_field != Publication_field_authors /* already copying this elsewhere */) { val = GetPubFieldFromPub(orig_pub, pubfield_labels[i].pub_field, NULL); if (!StringHasNoText (val)) { SetPubFieldOnPub(new_pub, pubfield_labels[i].pub_field, NULL, val, ExistingTextOption_replace_old); } } } } NLM_EXTERN Boolean SetPubclassOnPub (PubPtr the_pub, CharPtr pub_class) { PubClassQualPtr orig_pq = NULL, new_pq = NULL; AuthListPtr PNTR palp; AuthListPtr PNTR new_palp; Boolean rval = FALSE; ValNode new_pub; if (the_pub == NULL) { return FALSE; } new_pq = GetPubclassFromString(pub_class); orig_pq = GetPubclassQualFromPub(the_pub); if (new_pq == NULL || orig_pq == NULL || new_pq == orig_pq) { return FALSE; } if (new_pq->pub_choice == the_pub->choice && new_pq->art_from == orig_pq->art_from) { /* only thing changing is in-press/published */ if (new_pq->status != orig_pq->status) { rval = SetPubStatusOnPub(the_pub, new_pq->status); } } else { MemSet (&new_pub, 0, sizeof (ValNode)); new_pub.choice = new_pq->pub_choice; NewPubDataForConversion(&new_pub, new_pq->art_from); palp = GetAuthListForPub(the_pub); new_palp = GetAuthListForPub(&new_pub); if (palp && *palp && new_palp) { *new_palp = AsnIoMemCopy (*palp, (AsnReadFunc) AuthListAsnRead, (AsnWriteFunc) AuthListAsnWrite); } /* TODO: Copy over other relevant details */ CopyRelevantPubDetails(the_pub, &new_pub); SetPubStatusOnPub(&new_pub, new_pq->status); rval = FreePubDataForConversion(the_pub); if (rval) { the_pub->choice = new_pub.choice; the_pub->data.ptrvalue = new_pub.data.ptrvalue; } else { FreePubDataForConversion(&new_pub); } } return FALSE; } NLM_EXTERN Boolean IsPublicationConstraintEmpty (PublicationConstraintPtr constraint) { Boolean rval = FALSE; if (constraint == NULL || (constraint->type == Pub_type_any && (constraint->field == NULL || IsStringConstraintEmpty (constraint->field->constraint)) && (constraint->special_field == NULL || constraint->special_field->constraint == NULL))) { rval = TRUE; } return rval; } NLM_EXTERN Int4 GetPubMLStatus (PubPtr the_pub) { CitGenPtr cgp; CitSubPtr csp; CitArtPtr cap; CitBookPtr cbp; CitJourPtr cjp; ImprintPtr imp = NULL; Int4 status = Pub_type_any; if (the_pub == NULL || the_pub->data.ptrvalue == NULL) { return Pub_type_any; } switch (the_pub->choice) { case PUB_Gen : cgp = (CitGenPtr) the_pub->data.ptrvalue; if (cgp->cit != NULL && StringICmp (cgp->cit, "unpublished") == 0) { status = Pub_type_unpublished; } else { status = Pub_type_published; } break; case PUB_Sub : csp = (CitSubPtr) the_pub->data.ptrvalue; status = Pub_type_submitter_block; break; case PUB_Article : cap = (CitArtPtr) the_pub->data.ptrvalue; if (cap->from == 1) { cjp = (CitJourPtr) cap->fromptr; if (cjp != NULL) { imp = cjp->imp; } } else if (cap->from == 2 || cap->from == 3) { cbp = (CitBookPtr) cap->fromptr; if (cbp != NULL) { imp = cbp->imp; } } break; case PUB_Journal : cjp = (CitJourPtr) the_pub->data.ptrvalue; imp = cjp->imp; case PUB_Book : case PUB_Man : cbp = (CitBookPtr) the_pub->data.ptrvalue; imp = cbp->imp; break; case PUB_Patent : status = Pub_type_published; break; default : break; } if (imp != NULL) { if (imp->prepub == 0) { status = Pub_type_published; } else if (imp->prepub == 2) { status = Pub_type_in_press; } else if (imp->prepub == 1 && the_pub->choice == PUB_Sub) { status = Pub_type_submitter_block; } else { status = Pub_type_unpublished; } } return status; } static Boolean DoesPubFieldMatch (PubdescPtr pdp, PubFieldConstraintPtr field) { Boolean rval = FALSE, match_all = TRUE; PubPtr pub; CharPtr tmp; if (pdp == NULL) return FALSE; if (field == NULL) return TRUE; if (field->constraint->not_present) { match_all = TRUE; for (pub = pdp->pub; pub != NULL && match_all; pub = pub->next) { tmp = GetPubFieldFromPub (pub, field->field, NULL); if (!DoesStringMatchConstraint (tmp, field->constraint)) { match_all = FALSE; } tmp = MemFree (tmp); } rval = match_all; } else { for (pub = pdp->pub; pub != NULL && !rval; pub = pub->next) { tmp = GetPubFieldFromPub (pub, field->field, field->constraint); if (tmp != NULL) { rval = TRUE; } tmp = MemFree (tmp); } } return rval; } static Boolean DoesPubFieldSpecialMatch (PubdescPtr pdp, PubFieldSpecialConstraintPtr field) { Boolean rval = FALSE; PubPtr pub; CharPtr tmp; if (pdp == NULL) return FALSE; if (field == NULL) return TRUE; if (field->constraint->choice == PubFieldSpecialConstraintType_is_present) { for (pub = pdp->pub; pub != NULL && !rval; pub = pub->next) { tmp = GetPubFieldFromPub (pub, field->field, NULL); if (!StringHasNoText (tmp)) { /* at least one is present and non-empty */ rval = TRUE; } tmp = MemFree (tmp); } } else if (field->constraint->choice == PubFieldSpecialConstraintType_is_not_present) { rval = TRUE; for (pub = pdp->pub; pub != NULL && rval; pub = pub->next) { tmp = GetPubFieldFromPub (pub, field->field, NULL); if (!StringHasNoText (tmp)) { /* at least one is present and non-empty */ rval = FALSE; } tmp = MemFree (tmp); } } else if (field->constraint->choice == PubFieldSpecialConstraintType_is_all_caps) { rval = TRUE; for (pub = pdp->pub; pub != NULL && rval; pub = pub->next) { tmp = GetPubFieldFromPub (pub, field->field, NULL); if (tmp != NULL && !IsAllCaps (tmp)) { /* at least one is not all caps */ rval = FALSE; } tmp = MemFree (tmp); } } else if (field->constraint->choice == PubFieldSpecialConstraintType_is_all_lower) { rval = TRUE; for (pub = pdp->pub; pub != NULL && rval; pub = pub->next) { tmp = GetPubFieldFromPub (pub, field->field, NULL); if (tmp != NULL && !IsAllLowerCase (tmp)) { /* at least one is not all caps */ rval = FALSE; } tmp = MemFree (tmp); } } else if (field->constraint->choice == PubFieldSpecialConstraintType_is_all_punct) { rval = TRUE; for (pub = pdp->pub; pub != NULL && rval; pub = pub->next) { tmp = GetPubFieldFromPub (pub, field->field, NULL); if (tmp != NULL && !IsAllPunctuation (tmp)) { /* at least one is not all punctuation */ rval = FALSE; } tmp = MemFree (tmp); } } return rval; } static Boolean DoesPubMatchPublicationConstraint (PubdescPtr pdp, PublicationConstraintPtr constraint) { Boolean type_ok = TRUE, rval = FALSE; PubPtr pub; if (pdp == NULL) return FALSE; if (IsPublicationConstraintEmpty (constraint)) return TRUE; if (constraint->type != Pub_type_any) { type_ok = FALSE; for (pub = pdp->pub; pub != NULL && !type_ok; pub = pub->next) { if (GetPubMLStatus (pub) == constraint->type) { type_ok = TRUE; } } } if (type_ok) { rval = (constraint->field == NULL || DoesPubFieldMatch (pdp, constraint->field)) && (constraint->special_field == NULL || DoesPubFieldSpecialMatch (pdp, constraint->special_field)); } return rval; } static Boolean DoesObjectMatchPublicationConstraint (Uint1 choice, Pointer data, PublicationConstraintPtr constraint) { Boolean rval = TRUE; SeqFeatPtr sfp; SeqDescrPtr sdp; if (data == NULL) return FALSE; if (IsPublicationConstraintEmpty (constraint)) return TRUE; switch (choice) { case OBJ_SEQFEAT: sfp = (SeqFeatPtr) data; if (sfp->data.choice == SEQFEAT_PUB) { rval = DoesPubMatchPublicationConstraint (sfp->data.value.ptrvalue, constraint); } break; case OBJ_SEQDESC: sdp = (SeqDescrPtr) data; if (sdp->choice == Seq_descr_pub) { rval = DoesPubMatchPublicationConstraint (sdp->data.ptrvalue, constraint); } break; } return rval; } NLM_EXTERN Boolean IsFieldConstraintEmpty (FieldConstraintPtr constraint) { RnaQualPtr rq; FeatureFieldPtr ffp; if (constraint == NULL || constraint->field == NULL || IsStringConstraintEmpty (constraint->string_constraint)) { return TRUE; } else if (constraint->field->choice == FieldType_rna_field && ((rq = (RnaQualPtr)constraint->field->data.ptrvalue) == NULL || rq->type == NULL)) { return TRUE; } else if (constraint->field->choice == FieldType_feature_field && (ffp = (FeatureFieldPtr)constraint->field->data.ptrvalue) == NULL) { return TRUE; } else { return FALSE; } } static Boolean DoesObjectMatchFeatureFieldConstraint (Uint1 choice, Pointer data, FeatureFieldPtr ffp, StringConstraintPtr string_constraint) { Boolean rval = FALSE; CharPtr str; BioseqPtr bsp; Int4 subtype; SeqFeatPtr sfp; SeqMgrFeatContext fcontext; Boolean not_present; CGPSetPtr cgp; Uint2 cds_gene_prot_field; if (data == NULL) { return FALSE; } if (IsStringConstraintEmpty (string_constraint)) { return TRUE; } switch (choice) { case OBJ_SEQFEAT: not_present = string_constraint->not_present; string_constraint->not_present = FALSE; str = GetQualFromFeature ((SeqFeatPtr) data, ffp, string_constraint); if (str != NULL) { rval = TRUE; str = MemFree (str); } if (not_present) { rval = !rval; string_constraint->not_present = TRUE; } break; case OBJ_SEQDESC: case OBJ_BIOSEQ: bsp = GetSequenceForObject (choice, data); if (bsp != NULL) { subtype = GetFeatdefFromFeatureType (ffp->type); not_present = string_constraint->not_present; string_constraint->not_present = FALSE; for (sfp = SeqMgrGetNextFeature (bsp, NULL, 0, subtype, &fcontext); !rval && sfp != NULL; sfp = SeqMgrGetNextFeature (bsp, sfp, 0, subtype, &fcontext)) { str = GetQualFromFeature (sfp, ffp, string_constraint); if (str != NULL) { rval = TRUE; str = MemFree (str); } } if (not_present) { rval = !rval; string_constraint->not_present = TRUE; } } break; case 0: cgp = (CGPSetPtr) data; cds_gene_prot_field = CDSGeneProtFieldFromFeatureField (ffp); if (cds_gene_prot_field > 0) { not_present = string_constraint->not_present; string_constraint->not_present = FALSE; str = GetFieldValueFromCGPSet (cgp, cds_gene_prot_field, string_constraint); if (str != NULL) { rval = TRUE; str = MemFree (str); } if (not_present) { rval = !rval; string_constraint->not_present = TRUE; } } break; } return rval; } static Boolean DoesObjectMatchRnaQualConstraint (Uint1 choice, Pointer data, RnaQualPtr rq, StringConstraintPtr string_constraint) { Boolean rval = FALSE; CharPtr str; BioseqPtr bsp; Int4 subtype; SeqFeatPtr sfp; SeqMgrFeatContext fcontext; Boolean not_present; Uint1 feat_choice = 0; if (data == NULL) { return FALSE; } if (IsStringConstraintEmpty (string_constraint)) { return TRUE; } switch (choice) { case OBJ_SEQFEAT: not_present = string_constraint->not_present; string_constraint->not_present = FALSE; str = GetRNAQualFromFeature ((SeqFeatPtr) data, rq, string_constraint, NULL); if (str != NULL) { rval = TRUE; str = MemFree (str); } if (not_present) { rval = !rval; string_constraint->not_present = TRUE; } break; case OBJ_SEQDESC: case OBJ_BIOSEQ: bsp = GetSequenceForObject (choice, data); if (bsp != NULL) { if (rq->type == NULL || rq->type->choice == RnaFeatType_any) { feat_choice = SEQFEAT_RNA; subtype = 0; } else { feat_choice = 0; subtype = GetFeatdefFromFeatureType(GetFeatureTypeForRnaType(rq->type->choice)); } not_present = string_constraint->not_present; string_constraint->not_present = FALSE; for (sfp = SeqMgrGetNextFeature (bsp, NULL, feat_choice, subtype, &fcontext); !rval && sfp != NULL; sfp = SeqMgrGetNextFeature (bsp, sfp, feat_choice, subtype, &fcontext)) { str = GetRNAQualFromFeature (sfp, rq, string_constraint, NULL); if (str != NULL) { rval = TRUE; str = MemFree (str); } } if (not_present) { rval = !rval; string_constraint->not_present = TRUE; } } break; } return rval; } static Boolean DoesObjectMatchFieldConstraint (Uint1 choice, Pointer data, FieldConstraintPtr constraint) { Boolean rval = FALSE; BioSourcePtr biop; BioseqPtr bsp; CharPtr str; FeatureFieldPtr ffp; if (data == NULL) return FALSE; if (IsFieldConstraintEmpty (constraint)) { return TRUE; } switch (constraint->field->choice) { case FieldType_source_qual: biop = GetBioSourceFromObject (choice, data); if (biop != NULL) { str = GetSourceQualFromBioSource (biop, constraint->field->data.ptrvalue, constraint->string_constraint); if (str != NULL) { rval = TRUE; str = MemFree (str); } } break; case FieldType_feature_field: ffp = (FeatureFieldPtr) constraint->field->data.ptrvalue; rval = DoesObjectMatchFeatureFieldConstraint (choice, data, ffp, constraint->string_constraint); break; case FieldType_rna_field: rval = DoesObjectMatchRnaQualConstraint (choice, data, constraint->field->data.ptrvalue, constraint->string_constraint); break; case FieldType_cds_gene_prot: ffp = FeatureFieldFromCDSGeneProtField (constraint->field->data.intvalue); rval = DoesObjectMatchFeatureFieldConstraint (choice, data, ffp, constraint->string_constraint); ffp = FeatureFieldFree (ffp); break; case FieldType_molinfo_field: bsp = GetSequenceForObject (choice, data); if (bsp != NULL) { str = GetSequenceQualFromBioseq (bsp, constraint->field->data.ptrvalue); if (str == NULL && constraint->string_constraint->not_present) { rval = TRUE; } else if (str != NULL && DoesStringMatchConstraint (str, constraint->string_constraint)) { rval = TRUE; } str = MemFree (str); } break; case FieldType_misc: case FieldType_dblink: bsp = GetSequenceForObject (choice, data); if (bsp != NULL) { str = GetFieldValueForObjectEx (OBJ_BIOSEQ, bsp, constraint->field, constraint->string_constraint, NULL); if (str != NULL) { rval = TRUE; } str = MemFree (str); } break; /* TODO LATER */ case FieldType_pub: break; } return rval; } static CharPtr GetFeatureFieldFromObject (Uint1 choice, Pointer data, FeatureFieldPtr ffp, StringConstraintPtr scp) { CharPtr rval = NULL; BioseqPtr bsp; CGPSetPtr cgp; SeqFeatPtr sfp; SeqMgrFeatContext fcontext; Int4 subtype; Uint2 cds_gene_prot_field; if (ffp == NULL || data == NULL) { return NULL; } switch (choice) { case OBJ_SEQFEAT: rval = GetQualFromFeature ((SeqFeatPtr) data, ffp, scp); break; case OBJ_SEQDESC: case OBJ_BIOSEQ: bsp = GetSequenceForObject (choice, data); if (bsp != NULL) { subtype = GetFeatdefFromFeatureType (ffp->type); for (sfp = SeqMgrGetNextFeature (bsp, NULL, 0, subtype, &fcontext); rval == NULL && sfp != NULL; sfp = SeqMgrGetNextFeature (bsp, sfp, 0, subtype, &fcontext)) { rval = GetQualFromFeature (sfp, ffp, scp); } } break; case 0: cgp = (CGPSetPtr) data; cds_gene_prot_field = CDSGeneProtFieldFromFeatureField (ffp); if (cds_gene_prot_field > 0) { rval = GetFieldValueFromCGPSet (cgp, cds_gene_prot_field, scp); } break; } return rval; } static CharPtr GetConstraintFieldFromObject (Uint1 choice, Pointer data, ValNodePtr field, StringConstraintPtr scp) { BioSourcePtr biop; BioseqPtr bsp; SeqFeatPtr sfp; SeqMgrFeatContext fcontext; Int4 subtype; FeatureFieldPtr ffp; RnaQualPtr rq; Uint1 feat_choice = 0; CharPtr rval = NULL; if (data == NULL || field == NULL) { return NULL; } switch (field->choice) { case FieldType_source_qual: biop = GetBioSourceFromObject (choice, data); if (biop != NULL) { rval = GetSourceQualFromBioSource (biop, field->data.ptrvalue, scp); } break; case FieldType_feature_field: rval = GetFeatureFieldFromObject(choice, data, (FeatureFieldPtr) field->data.ptrvalue, scp); break; case FieldType_rna_field: rq = (RnaQualPtr) field->data.ptrvalue; switch (choice) { case OBJ_SEQFEAT: rval = GetRNAQualFromFeature ((SeqFeatPtr) data, rq, scp, NULL); break; case OBJ_SEQDESC: case OBJ_BIOSEQ: bsp = GetSequenceForObject (choice, data); if (bsp != NULL) { if (rq->type == NULL || rq->type->choice == RnaFeatType_any) { feat_choice = SEQFEAT_RNA; subtype = 0; } else { feat_choice = 0; subtype = GetFeatdefFromFeatureType(GetFeatureTypeForRnaType(rq->type->choice)); } for (sfp = SeqMgrGetNextFeature (bsp, NULL, feat_choice, subtype, &fcontext); rval == NULL && sfp != NULL; sfp = SeqMgrGetNextFeature (bsp, sfp, feat_choice, subtype, &fcontext)) { rval = GetRNAQualFromFeature (sfp, rq, scp, NULL); } } break; } break; case FieldType_cds_gene_prot: ffp = FeatureFieldFromCDSGeneProtField (field->data.intvalue); rval = GetFeatureFieldFromObject (choice, data, ffp, scp); ffp = FeatureFieldFree (ffp); break; case FieldType_molinfo_field: bsp = GetSequenceForObject (choice, data); if (bsp != NULL) { rval = GetSequenceQualFromBioseq (bsp, field->data.ptrvalue); if (rval != NULL && scp != NULL && !DoesStringMatchConstraint (rval, scp)) { rval = MemFree (rval); } } break; case FieldType_misc: bsp = GetSequenceForObject (choice, data); if (bsp != NULL) { rval = GetFieldValueForObjectEx (OBJ_BIOSEQ, bsp, field, scp, NULL); } break; } return rval; } static Boolean DoesObjectMatchFieldMissingConstraint(Uint1 choice, Pointer data, ValNodePtr field) { Boolean rval = FALSE; CharPtr str; if (data == NULL || field == NULL) return FALSE; str = GetConstraintFieldFromObject(choice, data, field, NULL); if (str == NULL) { rval = TRUE; } str = MemFree (str); return rval; } NLM_EXTERN Boolean IsMolinfoFieldConstraintEmpty (MolinfoFieldConstraintPtr constraint) { if (constraint == NULL || constraint->field == NULL) { return TRUE; } else { return FALSE; } } static Boolean DoesObjectMatchMolinfoFieldConstraint (Uint1 choice, Pointer data, MolinfoFieldConstraintPtr constraint) { BioseqPtr bsp; MolInfoPtr mip; Boolean rval = FALSE; bsp = GetSequenceForObject (choice, data); if (bsp == NULL) { rval = FALSE; } else if (IsMolinfoFieldConstraintEmpty(constraint)) { rval = TRUE; } else { mip = GetMolInfoForBioseq (bsp); rval = FALSE; switch (constraint->field->choice) { case MolinfoField_molecule: if (mip == NULL && constraint->field->data.intvalue == 0) { rval = TRUE; } else if (mip != NULL && mip->biomol == BiomolFromMoleculeType (constraint->field->data.intvalue)) { rval = TRUE; } break; case MolinfoField_technique: if (mip == NULL && constraint->field->data.intvalue == 0) { rval = TRUE; } else if (mip != NULL && mip->tech == TechFromTechniqueType (constraint->field->data.intvalue)) { rval = TRUE; } break; case MolinfoField_completedness: if (mip == NULL && constraint->field->data.intvalue == 0) { rval = TRUE; } else if (mip != NULL && mip->completeness == CompletenessFromCompletednessType (constraint->field->data.intvalue)) { rval = TRUE; } break; case MolinfoField_mol_class: if (bsp->mol == MolFromMoleculeClassType (constraint->field->data.intvalue)) { rval = TRUE; } break; case MolinfoField_topology: if (bsp->topology == TopologyFromTopologyType (constraint->field->data.intvalue)) { rval = TRUE; } break; case MolinfoField_strand: if (bsp->strand == StrandFromStrandType (constraint->field->data.intvalue)) { rval = TRUE; } break; } if (constraint->is_not) { rval = !rval; } } return rval; } static Boolean DoesCodingRegionMatchTranslationConstraint (SeqFeatPtr sfp, TranslationConstraintPtr constraint) { ByteStorePtr trans_prot = NULL; BioseqPtr actual_prot = NULL; CharPtr translation = NULL; Int4 translation_len = 0; CharPtr actual = NULL; Int4 actual_len = 0; CharPtr stop, cp1, cp2; Boolean rval = TRUE, alt_start = FALSE; StringConstraintPtr scp; Int4 pos, comp_len; Int4 num = 0; if (sfp == NULL || sfp->data.choice != SEQFEAT_CDREGION) { return FALSE; } else if (constraint == NULL) { return TRUE; } if (constraint->actual_strings != NULL || constraint->num_mismatches != NULL) { actual_prot = BioseqLockById(SeqLocId(sfp->product)); if (actual_prot != NULL) { actual = (CharPtr) MemNew (sizeof (Char) * (actual_prot->length + 1)); SeqPortStreamInt (actual_prot, 0, actual_prot->length - 1, Seq_strand_plus, EXPAND_GAPS_TO_DASHES, (Pointer) (actual), NULL); actual_len = StringLen (actual); } } for (scp = constraint->actual_strings; scp != NULL && rval; scp = scp->next) { rval = DoesStringMatchConstraint (actual, scp); } if (rval) { if (constraint->transl_strings != NULL || constraint->internal_stops != Match_type_constraint_dont_care || constraint->num_mismatches != NULL) { trans_prot = ProteinFromCdRegionExEx (sfp, TRUE, FALSE, &alt_start, TRUE); /* include stop codons, do not remove trailing X/B/Z */ if (trans_prot != NULL) { translation = BSMerge (trans_prot, NULL); translation_len = StringLen (translation); } BSFree (trans_prot); } for (scp = constraint->transl_strings; scp != NULL && rval; scp = scp->next) { rval = DoesStringMatchConstraint (translation, scp); } if (rval && constraint->internal_stops != Match_type_constraint_dont_care) { stop = StringChr (translation, '*'); if (stop != NULL && stop != translation + translation_len - 1) { if (constraint->internal_stops == Match_type_constraint_no) { rval = FALSE; } } else { if (constraint->internal_stops == Match_type_constraint_yes) { rval = FALSE; } } } } if (rval && constraint->num_mismatches != NULL) { stop = StringRChr (translation, '*'); if (stop != NULL && stop == translation + translation_len - 1) { translation_len--; } stop = StringRChr (actual, '*'); if (stop != NULL && stop == actual + actual_len - 1) { actual_len--; } if (translation_len > actual_len) { num = translation_len - actual_len; comp_len = actual_len; } else { num = actual_len - translation_len; comp_len = translation_len; } cp1 = actual; cp2 = translation; if (cp1 != NULL && cp2 != NULL) { for (pos = 0; pos < comp_len && rval; pos++) { if (*cp1 != *cp2) { num++; if (constraint->num_mismatches->choice == QuantityConstraint_equals && num > constraint->num_mismatches->data.intvalue) { rval = FALSE; } else if (constraint->num_mismatches->choice == QuantityConstraint_less_than && num >= constraint->num_mismatches->data.intvalue) { rval = FALSE; } } cp1++; cp2++; } } if (rval) { if (constraint->num_mismatches->choice == QuantityConstraint_greater_than && num <= constraint->num_mismatches->data.intvalue) { rval = FALSE; } else if (constraint->num_mismatches->choice == QuantityConstraint_equals && num != constraint->num_mismatches->data.intvalue) { rval = FALSE; } else if (constraint->num_mismatches->choice == QuantityConstraint_less_than && num >= constraint->num_mismatches->data.intvalue) { rval = FALSE; } } } if (actual_prot != NULL) { BioseqUnlock(actual_prot); } actual = MemFree (actual); translation = MemFree (translation); return rval; } static Boolean DoesObjectMatchTranslationConstraint (Uint1 choice, Pointer data, TranslationConstraintPtr constraint) { Boolean rval = FALSE; SeqFeatPtr sfp = NULL; BioseqPtr bsp; SeqMgrFeatContext context; if (data == NULL) { return FALSE; } else if (constraint == NULL) { return TRUE; } switch (choice) { case OBJ_SEQFEAT: /* must be coding region or protein feature */ sfp = (SeqFeatPtr) data; if (sfp != NULL && sfp->data.choice == SEQFEAT_PROT) { bsp = BioseqFindFromSeqLoc (sfp->location); sfp = SeqMgrGetCDSgivenProduct (bsp, &context); } rval = DoesCodingRegionMatchTranslationConstraint (sfp, constraint); break; case OBJ_BIOSEQ: /* must be protein sequence, or nucleotide bioseq with only one coding region */ bsp = data; if (bsp != NULL) { if (ISA_aa (bsp->mol)) { sfp = SeqMgrGetCDSgivenProduct (bsp, &context); } else { sfp = SeqMgrGetNextFeature (bsp, NULL, SEQFEAT_CDREGION, 0, &context); if (SeqMgrGetNextFeature (bsp, sfp, SEQFEAT_CDREGION, 0, &context) != NULL) { sfp = NULL; } } rval = DoesCodingRegionMatchTranslationConstraint (sfp, constraint); } break; } return rval; } static Boolean DoesObjectMatchConstraint (Uint1 choice, Pointer data, ConstraintChoicePtr constraint) { Boolean rval = TRUE; if (data == NULL) return FALSE; if (constraint == NULL) return TRUE; switch (constraint->choice) { case ConstraintChoice_string : rval = DoesObjectMatchStringConstraint (choice, data, constraint->data.ptrvalue); break; case ConstraintChoice_location : rval = DoesObjectMatchLocationConstraint (choice, data, constraint->data.ptrvalue); break; case ConstraintChoice_field : rval = DoesObjectMatchFieldConstraint (choice, data, constraint->data.ptrvalue); break; case ConstraintChoice_source : rval = DoesBiosourceMatchConstraint (GetBioSourceFromObject (choice, data), constraint->data.ptrvalue); break; case ConstraintChoice_cdsgeneprot_qual : if (choice == 0) { rval = DoesCGPSetMatchQualConstraint (data, constraint->data.ptrvalue); } else if (choice == OBJ_SEQDESC) { rval = DoesSeqDescMatchCGPQualConstraint (data, constraint->data.ptrvalue); } else if (choice == OBJ_SEQFEAT) { rval = DoesFeatureMatchCGPQualConstraint (data, constraint->data.ptrvalue); } else if (choice == OBJ_BIOSEQ) { rval = DoesSequenceMatchCGPQualConstraint (data, constraint->data.ptrvalue); } else { rval = FALSE; } break; case ConstraintChoice_cdsgeneprot_pseudo : if (choice == 0) { rval = DoesCGPSetMatchPseudoConstraint (data, constraint->data.ptrvalue); } else if (choice == OBJ_SEQFEAT) { rval = DoesFeatureMatchCGPPseudoConstraint (data, constraint->data.ptrvalue); } break; case ConstraintChoice_sequence : rval = DoesObjectMatchSequenceConstraint (choice, data, constraint->data.ptrvalue); break; case ConstraintChoice_pub: rval = DoesObjectMatchPublicationConstraint (choice, data, constraint->data.ptrvalue); break; case ConstraintChoice_molinfo: rval = DoesObjectMatchMolinfoFieldConstraint (choice, data, constraint->data.ptrvalue); break; case ConstraintChoice_field_missing: rval = DoesObjectMatchFieldMissingConstraint (choice, data, constraint->data.ptrvalue); break; case ConstraintChoice_translation: rval = DoesObjectMatchTranslationConstraint (choice, data, constraint->data.ptrvalue); break; } return rval; } NLM_EXTERN Boolean DoesObjectMatchConstraintChoiceSet (Uint1 choice, Pointer data, ConstraintChoiceSetPtr csp) { Boolean rval = TRUE; if (data == NULL) return FALSE; while (csp != NULL && rval) { rval = DoesObjectMatchConstraint (choice, data, csp); csp = csp->next; } return rval; } NLM_EXTERN StringConstraintPtr FindStringConstraintInConstraintSetForField (FieldTypePtr field, ConstraintChoiceSetPtr csp) { StringConstraintPtr scp = NULL; SourceConstraintPtr source_constraint; CDSGeneProtQualConstraintPtr cgp_constraint; PublicationConstraintPtr pub_constraint; FieldConstraintPtr field_constraint; FieldType ft; while (csp != NULL) { switch (csp->choice) { case ConstraintChoice_string : scp = csp->data.ptrvalue; break; case ConstraintChoice_source : source_constraint = (SourceConstraintPtr) csp->data.ptrvalue; if (source_constraint != NULL && source_constraint->constraint != NULL) { if (source_constraint->field1 != NULL) { ft.choice = FieldType_source_qual; ft.data.ptrvalue = source_constraint->field1; ft.next = NULL; if (DoFieldTypesMatch (field, &ft)) { scp = source_constraint->constraint; } } if (scp == NULL && source_constraint->field2 == NULL) { ft.choice = FieldType_source_qual; ft.data.ptrvalue = source_constraint->field2; ft.next = NULL; if (DoFieldTypesMatch (field, &ft)) { scp = source_constraint->constraint; } } } break; case ConstraintChoice_cdsgeneprot_qual : cgp_constraint = (CDSGeneProtQualConstraintPtr) csp->data.ptrvalue; if (field->choice == FieldType_cds_gene_prot && cgp_constraint != NULL && cgp_constraint->constraint != NULL && ((cgp_constraint->field1 != NULL && cgp_constraint->field1->data.intvalue == field->data.intvalue) || (cgp_constraint->field2 != NULL && cgp_constraint->field2->data.intvalue == field->data.intvalue))) { scp = cgp_constraint->constraint; } break; case ConstraintChoice_pub : pub_constraint = csp->data.ptrvalue; if (pub_constraint != NULL && pub_constraint->field != NULL) { if (field->data.intvalue == pub_constraint->field->field && !IsStringConstraintEmpty (pub_constraint->field->constraint)) { scp = pub_constraint->field->constraint; } } break; case ConstraintChoice_field : field_constraint = csp->data.ptrvalue; if (field_constraint != NULL && field_constraint->field != NULL && DoFieldTypesMatch (field, field_constraint->field)) { scp = field_constraint->string_constraint; } break; } csp = csp->next; } return scp; } NLM_EXTERN StringConstraintPtr FindStringConstraintInConstraintSetForFieldPair (FieldPairTypePtr fieldpair, ConstraintChoiceSetPtr csp) { StringConstraintPtr scp; FieldTypePtr f; f = GetFromFieldFromFieldPair (fieldpair); scp = FindStringConstraintInConstraintSetForField (f, csp); f = FieldTypeFree (f); return scp; } NLM_EXTERN StringConstraintPtr StringConstraintFromFieldEdit (FieldEditPtr edit) { StringConstraintPtr scp; if (edit == NULL || edit->find_txt == NULL) return NULL; scp = StringConstraintNew (); scp->match_text = StringSave (edit->find_txt); switch (edit->location) { case Field_edit_location_anywhere : scp->match_location = String_location_contains; break; case Field_edit_location_beginning : scp->match_location = String_location_starts; break; case Field_edit_location_end : scp->match_location = String_location_ends; break; } scp->case_sensitive = !(edit->case_insensitive); scp->whole_word = FALSE; scp->not_present = FALSE; return scp; } static CharPtr ApplyEditToString (CharPtr str, FieldEditPtr edit) { CharPtr cp_found, new_str; Int4 found_len, replace_len, new_len; if (edit == NULL) return StringSave (str); str = StringSave (str); if (edit->case_insensitive) { cp_found = StringISearch (str, edit->find_txt); } else { cp_found = StringSearch (str, edit->find_txt); } found_len = StringLen (edit->find_txt); replace_len = StringLen (edit->repl_txt); while (cp_found != NULL) { if (edit->location == Field_edit_location_beginning && cp_found != str) { cp_found = NULL; } else if (edit->location == Field_edit_location_end && cp_found != str + StringLen (str) - found_len) { if (edit->case_insensitive) { cp_found = StringISearch (cp_found + found_len, edit->find_txt); } else { cp_found = StringSearch (cp_found + found_len, edit->find_txt); } } else { new_len = StringLen (str) + 1 - found_len + replace_len; new_str = (CharPtr) MemNew (new_len * sizeof (Char)); if (new_str != NULL) { if (cp_found != str) { StringNCpy (new_str, str, cp_found - str); } StringCat (new_str, edit->repl_txt); StringCat (new_str, cp_found + found_len); cp_found = new_str + (cp_found - str) + replace_len; str = MemFree (str); str = new_str; } if (edit->case_insensitive) { cp_found = StringISearch (cp_found, edit->find_txt); } else { cp_found = StringSearch (cp_found, edit->find_txt); } } } return str; } static void RemoveFieldNameFromString (CharPtr field_name, CharPtr str) { Uint4 field_name_len; CharPtr src, dst; if (StringHasNoText (field_name) || StringHasNoText (str)) { return; } field_name_len = StringLen (field_name); if (!StringHasNoText (field_name) && StringNICmp(str, field_name, field_name_len) == 0 && StringLen (str) > field_name_len && str[field_name_len] == ' ') { src = str + field_name_len + 1; while (*src == ' ') { src++; } dst = str; while (*src != 0) { *dst = *src; dst++; src++; } *dst = 0; } } typedef struct objectcollection { AECRActionPtr action; ValNodePtr object_list; ValNodePtr object_tail; BatchExtraPtr batch_extra; } ObjectCollectionData, PNTR ObjectCollectionPtr; static void AECRActionObjectCollectionItemCallback (Uint1 objecttype, Pointer objectdata, ObjectCollectionPtr o) { ApplyActionPtr a; EditActionPtr e; ConvertActionPtr v; CopyActionPtr c; SwapActionPtr s; RemoveActionPtr r; AECRParseActionPtr p; RemoveOutsideActionPtr ro; CharPtr str, portion, field_name; StringConstraintPtr scp; FieldTypePtr field_from = NULL, field_to = NULL; if (objectdata == NULL || o == NULL) return; /* check to make sure object is appropriate for field and meets filter */ switch (o->action->action->choice) { case ActionChoice_apply : a = (ApplyActionPtr) o->action->action->data.ptrvalue; if (a != NULL && IsObjectAppropriateForFieldValue (objecttype, objectdata, a->field) && DoesObjectMatchConstraintChoiceSet (objecttype, objectdata, o->action->constraint)) { ValNodeAddPointerEx (&(o->object_list), &(o->object_tail), objecttype, objectdata); } break; case ActionChoice_edit : e = (EditActionPtr) o->action->action->data.ptrvalue; if (e != NULL && IsObjectAppropriateForFieldValue (objecttype, objectdata, e->field) && DoesObjectMatchConstraintChoiceSet (objecttype, objectdata, o->action->constraint)) { scp = StringConstraintFromFieldEdit (e->edit); str = GetFieldValueForObjectEx (objecttype, objectdata, e->field, scp, o->batch_extra); if (!StringHasNoText (str)) { ValNodeAddPointerEx (&(o->object_list), &(o->object_tail), objecttype, objectdata); } str = MemFree (str); } break; case ActionChoice_remove_outside : ro = (RemoveOutsideActionPtr) o->action->action->data.ptrvalue; if (ro != NULL && IsObjectAppropriateForFieldValue (objecttype, objectdata, ro->field) && DoesObjectMatchConstraintChoiceSet (objecttype, objectdata, o->action->constraint)) { ValNodeAddPointerEx (&(o->object_list), &(o->object_tail), objecttype, objectdata); } break; case ActionChoice_convert : v = (ConvertActionPtr) o->action->action->data.ptrvalue; if (v != NULL && (field_from = GetFromFieldFromFieldPair(v->fields)) != NULL && IsObjectAppropriateForFieldValue (objecttype, objectdata, field_from) && DoesObjectMatchConstraintChoiceSet (objecttype, objectdata, o->action->constraint)) { scp = FindStringConstraintInConstraintSetForField (field_from, o->action->constraint); str = GetFieldValueForObjectEx (objecttype, objectdata, field_from, scp, o->batch_extra); if (v->strip_name) { field_to = GetToFieldFromFieldPair (v->fields); field_name = SummarizeFieldType (field_to); RemoveFieldNameFromString (field_name, str); field_name = MemFree (field_name); field_to = FieldTypeFree (field_to); } if (!StringHasNoText (str)) { ValNodeAddPointerEx (&(o->object_list), &(o->object_tail), objecttype, objectdata); } str = MemFree (str); } field_from = FieldTypeFree (field_from); break; case ActionChoice_copy : c = (CopyActionPtr) o->action->action->data.ptrvalue; if (c != NULL && (field_from = GetFromFieldFromFieldPair(c->fields)) != NULL && (field_to = GetFromFieldFromFieldPair(c->fields)) != NULL && IsObjectAppropriateForFieldValue (objecttype, objectdata, field_from) && IsObjectAppropriateForFieldValue (objecttype, objectdata, field_to) && DoesObjectMatchConstraintChoiceSet (objecttype, objectdata, o->action->constraint)) { ValNodeAddPointerEx (&(o->object_list), &(o->object_tail), objecttype, objectdata); } field_from = FieldTypeFree (field_from); field_to = FieldTypeFree (field_to); break; case ActionChoice_swap : s = (SwapActionPtr) o->action->action->data.ptrvalue; if (s != NULL && (field_from = GetFromFieldFromFieldPair(s->fields)) != NULL && (field_to = GetFromFieldFromFieldPair(s->fields)) != NULL && IsObjectAppropriateForFieldValue (objecttype, objectdata, field_from) && IsObjectAppropriateForFieldValue (objecttype, objectdata, field_to) && DoesObjectMatchConstraintChoiceSet (objecttype, objectdata, o->action->constraint)) { ValNodeAddPointerEx (&(o->object_list), &(o->object_tail), objecttype, objectdata); } field_from = FieldTypeFree (field_from); field_to = FieldTypeFree (field_to); break; case ActionChoice_remove : r = (RemoveActionPtr) o->action->action->data.ptrvalue; if (r != NULL && IsObjectAppropriateForFieldValue (objecttype, objectdata, r->field) && DoesObjectMatchConstraintChoiceSet (objecttype, objectdata, o->action->constraint)) { ValNodeAddPointerEx (&(o->object_list), &(o->object_tail), objecttype, objectdata); } break; case ActionChoice_parse : p = (AECRParseActionPtr) o->action->action->data.ptrvalue; if (p != NULL && (field_from = GetFromFieldFromFieldPair(p->fields)) != NULL && (field_to = GetFromFieldFromFieldPair(p->fields)) != NULL && IsObjectAppropriateForFieldValue (objecttype, objectdata, field_from) && IsObjectAppropriateForFieldValue (objecttype, objectdata, field_to) && DoesObjectMatchConstraintChoiceSet (objecttype, objectdata, o->action->constraint)) { scp = FindStringConstraintInConstraintSetForField (field_from, o->action->constraint); str = GetFieldValueForObjectEx (objecttype, objectdata, field_from, scp, o->batch_extra); portion = GetTextPortionFromString (str, p->portion); ApplyTextTransformsToString (&portion, p->transform); if (!StringHasNoText (portion)) { ValNodeAddPointerEx (&(o->object_list), &(o->object_tail), objecttype, objectdata); } portion = MemFree (portion); str = MemFree (str); } field_from = FieldTypeFree (field_from); field_to = FieldTypeFree (field_to); break; } } static void AECRActionObjectCollectionFeatureCallback (SeqFeatPtr sfp, Pointer data) { ObjectCollectionPtr o; if (sfp == NULL || data == NULL) return; o = (ObjectCollectionPtr) data; AECRActionObjectCollectionItemCallback (OBJ_SEQFEAT, sfp, o); } static void AECRActionObjectCollectionDescriptorCallback (SeqDescrPtr sdp, Pointer data) { ObjectCollectionPtr o; if (sdp == NULL || data == NULL) return; o = (ObjectCollectionPtr) data; AECRActionObjectCollectionItemCallback (OBJ_SEQDESC, sdp, o); } static void AECRObjectCollectionBioseqCallback (BioseqPtr bsp, Pointer data) { ObjectCollectionPtr o; if (bsp == NULL || data == NULL) return; o = (ObjectCollectionPtr) data; AECRActionObjectCollectionItemCallback (OBJ_BIOSEQ, bsp, o); } static ValNodePtr CollectMiscObjectsForApply (SeqEntryPtr sep, Int4 misc_type, ValNodePtr constraint) { ValNodePtr target_list = NULL, bsp_list = NULL, tmp_list = NULL, tmp_tail = NULL, vnp; if (sep == NULL) { return NULL; } /* VisitBioseqsInSep (sep, &bsp_list, CollectNucBioseqCallback); */ bsp_list = CollectNucBioseqs (sep); for (vnp = bsp_list; vnp != NULL; vnp = vnp->next) { if (DoesObjectMatchConstraintChoiceSet (vnp->choice, vnp->data.ptrvalue, constraint)) { ValNodeAddPointerEx (&tmp_list, &tmp_tail, vnp->choice, vnp->data.ptrvalue); } } bsp_list = ValNodeFree (bsp_list); if (misc_type == Misc_field_genome_project_id) { target_list = tmp_list; tmp_list = NULL; } else if (misc_type == Misc_field_comment_descriptor) { for (vnp = tmp_list; vnp != NULL; vnp = vnp->next) { AddCommentDescriptorDestinationsForBioseq (vnp->data.ptrvalue, &target_list); } } else if (misc_type == Misc_field_defline) { for (vnp = tmp_list; vnp != NULL; vnp = vnp->next) { AddDeflineDescriptorDestinationsForBioseq (vnp->data.ptrvalue, &target_list); } tmp_list = ValNodeFree (tmp_list); } else if (misc_type == Misc_field_keyword) { for (vnp = tmp_list; vnp != NULL; vnp = vnp->next) { AddGenbankBlockDescriptorDestinationsForBioseq (vnp->data.ptrvalue, &target_list); } tmp_list = ValNodeFree (tmp_list); } tmp_list = ValNodeFree (tmp_list); return target_list; } static void AddDBLinkDescriptorDestinationsForBioseq (BioseqPtr bsp, ValNodePtr PNTR dest_list) { SeqDescrPtr sdp; UserObjectPtr uop; SeqMgrDescContext context; Boolean found = FALSE; ObjValNodePtr ovp; if (bsp == NULL || dest_list == NULL) { return; } for (sdp = SeqMgrGetNextDescriptor (bsp, NULL, Seq_descr_user, &context); sdp != NULL; sdp = SeqMgrGetNextDescriptor (bsp, sdp, Seq_descr_user, &context)) { if ((uop = sdp->data.ptrvalue) != NULL && IsUserObjectDBLink (uop)) { ValNodeAddPointer (dest_list, OBJ_SEQDESC, sdp); found = TRUE; } } if (!found) { /* if no existing comment descriptor, create one, marked for delete. * unmark it for deletion when it gets populated. */ sdp = CreateNewDescriptorOnBioseq (bsp, Seq_descr_user); sdp->data.ptrvalue = CreateDBLinkUserObject (); ovp = (ObjValNodePtr) sdp; ovp->idx.deleteme = TRUE; ValNodeAddPointer (dest_list, OBJ_SEQDESC, sdp); } } static ValNodePtr CollectDBLinkObjectsForApply (SeqEntryPtr sep, Int4 misc_type, ValNodePtr constraint) { ValNodePtr target_list = NULL, bsp_list = NULL, tmp_list = NULL, tmp_tail = NULL, vnp; if (sep == NULL) { return NULL; } /* VisitBioseqsInSep (sep, &bsp_list, CollectNucBioseqCallback); */ bsp_list = CollectNucBioseqs (sep); for (vnp = bsp_list; vnp != NULL; vnp = vnp->next) { if (DoesObjectMatchConstraintChoiceSet (vnp->choice, vnp->data.ptrvalue, constraint)) { AddDBLinkDescriptorDestinationsForBioseq (vnp->data.ptrvalue, &target_list); } } bsp_list = ValNodeFree (bsp_list); return target_list; } static void AddStructuredCommentDescriptorDestinationsForBioseq (BioseqPtr bsp, ValNodePtr PNTR dest_list, ValNodePtr PNTR dest_tail) { SeqDescrPtr sdp; SeqMgrDescContext context; Boolean found = FALSE; ObjValNodePtr ovp; UserObjectPtr uop; if (bsp == NULL || dest_list == NULL || dest_tail == NULL) { return; } for (sdp = SeqMgrGetNextDescriptor (bsp, NULL, Seq_descr_user, &context); sdp != NULL; sdp = SeqMgrGetNextDescriptor (bsp, sdp, Seq_descr_user, &context)) { if (IsUserObjectStructuredComment (sdp->data.ptrvalue)) { ValNodeAddPointerEx (dest_list, dest_tail, OBJ_SEQDESC, sdp); found = TRUE; } } if (!found) { /* if no existing structured comment descriptor, create one, marked for delete. * unmark it for deletion when it gets populated. */ sdp = CreateNewDescriptorOnBioseq (bsp, Seq_descr_user); uop = UserObjectNew (); uop->type = ObjectIdNew (); uop->type->str = StringSave ("StructuredComment"); sdp->data.ptrvalue = uop; ovp = (ObjValNodePtr) sdp; ovp->idx.deleteme = TRUE; ValNodeAddPointerEx (dest_list, dest_tail, OBJ_SEQDESC, sdp); } } static ValNodePtr CollectStructuredCommentsForApply (SeqEntryPtr sep, ValNodePtr constraint) { ValNodePtr target_list = NULL, target_tail = NULL, bsp_list = NULL, tmp_list = NULL, tmp_tail = NULL, vnp; if (sep == NULL) { return NULL; } /* VisitBioseqsInSep (sep, &bsp_list, CollectNucBioseqCallback); */ bsp_list = CollectNucBioseqs (sep); for (vnp = bsp_list; vnp != NULL; vnp = vnp->next) { if (DoesObjectMatchConstraintChoiceSet (vnp->choice, vnp->data.ptrvalue, constraint)) { ValNodeAddPointerEx (&tmp_list, &tmp_tail, vnp->choice, vnp->data.ptrvalue); } } bsp_list = ValNodeFree (bsp_list); for (vnp = tmp_list; vnp != NULL; vnp = vnp->next) { AddStructuredCommentDescriptorDestinationsForBioseq (vnp->data.ptrvalue, &target_list, &target_tail); } tmp_list = ValNodeFree (tmp_list); return target_list; } NLM_EXTERN ValNodePtr GetObjectListForAECRActionEx (SeqEntryPtr sep, AECRActionPtr action, BatchExtraPtr batch_extra) { ObjectCollectionData ocd; ApplyActionPtr apply; Uint1 field_type; if (action == NULL) return NULL; ocd.action = action; ocd.object_list = NULL; ocd.object_tail = NULL; if (batch_extra == NULL) { ocd.batch_extra = BatchExtraNew (); InitBatchExtraForAECRAction (ocd.batch_extra, action, sep); } else { ocd.batch_extra = batch_extra; } field_type = FieldTypeFromAECRAction (action); if (field_type == FieldType_molinfo_field) { VisitBioseqsInSep (sep, &ocd, AECRObjectCollectionBioseqCallback); } else if (field_type == FieldType_misc && action->action != NULL && action->action->choice == ActionChoice_apply && (apply = action->action->data.ptrvalue) != NULL) { ocd.object_list = CollectMiscObjectsForApply (sep, apply->field->data.intvalue, action->constraint); } else if (field_type == FieldType_dblink && action->action != NULL && action->action->choice == ActionChoice_apply && (apply = action->action->data.ptrvalue) != NULL) { ocd.object_list = CollectDBLinkObjectsForApply (sep, apply->field->data.intvalue, action->constraint); } else if (field_type == FieldType_struc_comment_field) { ocd.object_list = CollectStructuredCommentsForApply (sep, action->constraint); } else { VisitFeaturesInSep (sep, &ocd, AECRActionObjectCollectionFeatureCallback); VisitDescriptorsInSep (sep, &ocd, AECRActionObjectCollectionDescriptorCallback); if (field_type == FieldType_misc) { VisitBioseqsInSep (sep, &ocd, AECRObjectCollectionBioseqCallback); } } if (batch_extra != ocd.batch_extra) { ocd.batch_extra = BatchExtraFree (ocd.batch_extra); } return ocd.object_list; } NLM_EXTERN ValNodePtr GetObjectListForAECRAction (SeqEntryPtr sep, AECRActionPtr action) { return GetObjectListForAECRActionEx (sep, action, NULL); } NLM_EXTERN ValNodePtr FreeObjectList (ValNodePtr vnp) { ValNodePtr vnp_next; while (vnp != NULL) { vnp_next = vnp->next; vnp->next = NULL; if (vnp->choice == 0) { vnp->data.ptrvalue = CGPSetFree (vnp->data.ptrvalue); } vnp = ValNodeFree (vnp); vnp = vnp_next; } return vnp; } typedef struct buildcgpset { ValNodePtr cds_list; ValNodePtr mrna_list; ValNodePtr gene_list; } BuildCGPSetData, PNTR BuildCGPSetPtr; static void BuildCGPSetCallback (SeqFeatPtr sfp, Pointer userdata) { BuildCGPSetPtr b; if (sfp == NULL || sfp->idx.deleteme || userdata == NULL) return; b = (BuildCGPSetPtr) userdata; if (sfp->data.choice == SEQFEAT_CDREGION) { ValNodeAddPointer (&(b->cds_list), OBJ_SEQFEAT, sfp); } else if (sfp->data.choice == SEQFEAT_GENE) { ValNodeAddPointer (&(b->gene_list), OBJ_SEQFEAT, sfp); } else if (sfp->idx.subtype == FEATDEF_mRNA) { ValNodeAddPointer (&(b->mrna_list), OBJ_SEQFEAT, sfp); } else if (SeqMgrGetGeneXref (sfp) != NULL) { ValNodeAddPointer (&(b->gene_list), OBJ_SEQFEAT, sfp); } } static SeqFeatPtr AddProtFeatForCds (SeqFeatPtr cds, BioseqPtr protbsp) { ProtRefPtr prp; SeqFeatPtr prot; Boolean partial5, partial3; if (cds == NULL || protbsp == NULL) { return NULL; } prp = ProtRefNew (); prot = CreateNewFeatureOnBioseq (protbsp, SEQFEAT_PROT, NULL); if (prot != NULL) { prot->data.value.ptrvalue = prp; CheckSeqLocForPartial (cds->location, &partial5, &partial3); SetSeqLocPartial (prot->location, partial5, partial3); prot->partial = partial5 || partial3; } return prot; } static CGPSetPtr BuildCGPSetFromCodingRegion (SeqFeatPtr cds, BoolPtr indexing_needed) { SeqMgrFeatContext fcontext; SeqFeatPtr gene = NULL, mrna, prot; BioseqPtr protbsp; CGPSetPtr cdsp; if (cds == NULL || cds->data.choice != SEQFEAT_CDREGION) return NULL; cdsp = (CGPSetPtr) MemNew (sizeof (CGPSetData)); ValNodeAddPointer (&(cdsp->cds_list), 0, cds); gene = GetGeneForFeature (cds); if (gene != NULL) { ValNodeAddPointer (&(cdsp->gene_list), 0, gene); /* mark gene, so that we'll know it isn't lonely */ gene->idx.deleteme = TRUE; } mrna = GetmRNAforCDS (cds); if (mrna != NULL) { ValNodeAddPointer (&(cdsp->mrna_list), 0, mrna); /* mark mrna, so that we'll know it's already in a set */ mrna->idx.deleteme = TRUE; } if (cds->product != NULL) { protbsp = BioseqFindFromSeqLoc (cds->product); if (protbsp != NULL) { prot = SeqMgrGetBestProteinFeature (protbsp, NULL); if (prot == NULL) { prot = GetBestProteinFeatureUnindexed (cds->product); } /* if there is no full-length protein feature, make one */ if (prot == NULL) { prot = AddProtFeatForCds (cds, protbsp); if (prot != NULL) { ResynchCDSPartials (cds, NULL); if (indexing_needed != NULL) { *indexing_needed = TRUE; } } } if (prot != NULL) { ValNodeAddPointer (&(cdsp->prot_list), 0, prot); } /* also add in mat_peptides from protein feature */ prot = SeqMgrGetNextFeature (protbsp, NULL, SEQFEAT_PROT, FEATDEF_mat_peptide_aa, &fcontext); while (prot != NULL) { ValNodeAddPointer (&(cdsp->prot_list), 0, prot); prot = SeqMgrGetNextFeature (protbsp, prot, SEQFEAT_PROT, FEATDEF_mat_peptide_aa, &fcontext); } } } return cdsp; } static CGPSetPtr BuildCGPSetFrommRNA (SeqFeatPtr mrna) { SeqFeatPtr gene; CGPSetPtr cdsp; if (mrna == NULL || mrna->idx.deleteme || mrna->idx.subtype != FEATDEF_mRNA) return NULL; cdsp = (CGPSetPtr) MemNew (sizeof (CGPSetData)); ValNodeAddPointer (&(cdsp->mrna_list), 0, mrna); gene = GetGeneForFeature (mrna); if (gene != NULL) { ValNodeAddPointer (&(cdsp->gene_list), 0, gene); /* mark gene, so that we'll know it isn't lonely */ gene->idx.deleteme = TRUE; } return cdsp; } static CGPSetPtr BuildCGPSetFromGene (SeqFeatPtr gene) { CGPSetPtr cdsp; if (gene == NULL || gene->idx.deleteme || gene->idx.subtype != FEATDEF_GENE) { return NULL; } cdsp = CGPSetNew (); ValNodeAddPointer (&(cdsp->gene_list), 0, gene); return cdsp; } static void AdjustCGPObjectListForMatPeptides (ValNodePtr PNTR cgp_list, FieldTypePtr field1, FieldTypePtr field2, ConstraintChoiceSetPtr constraints) { ConstraintChoiceSetPtr mat_peptide_constraints = NULL; ValNodePtr vnp, vnp_prev, vnp_next; ValNodePtr m_vnp, m_vnp_prev, m_vnp_next, mat_peptide_list; CGPSetPtr cdsp; SeqFeatPtr sfp; if (cgp_list == NULL || *cgp_list == NULL || constraints == NULL || (field1 == NULL && field2 == NULL) /* no fields specified */ || (!IsFieldTypeMatPeptideRelated (field1) && !IsFieldTypeMatPeptideRelated(field2))) { return; } /* get list of constraints that apply to mat-peptide features */ while (constraints != NULL) { if (IsConstraintChoiceMatPeptideRelated (constraints)) { ValNodeLink (&mat_peptide_constraints, AsnIoMemCopy (constraints, (AsnReadFunc) ConstraintChoiceAsnRead, (AsnWriteFunc) ConstraintChoiceAsnWrite)); } constraints = constraints->next; } if (mat_peptide_constraints == NULL) { return; } /* if both fields are mat-peptide related, or one is mat-peptide related and the other is NULL, * convert sets to lists of mat-peptide features * otherwise just remove mat-peptide features from the prot-list that do not match the constraints. */ if ((field1 != NULL && !IsFieldTypeMatPeptideRelated (field1)) || (field2 != NULL && !IsFieldTypeMatPeptideRelated (field2))) { for (vnp = *cgp_list; vnp != NULL; vnp = vnp->next) { if (vnp->choice == 0) { cdsp = (CGPSetPtr) vnp->data.ptrvalue; m_vnp_prev = NULL; for (m_vnp = cdsp->prot_list; m_vnp != NULL; m_vnp = m_vnp_next) { m_vnp_next = m_vnp->next; sfp = m_vnp->data.ptrvalue; if (sfp == NULL || (sfp->idx.subtype == FEATDEF_mat_peptide_aa && !DoesObjectMatchConstraintChoiceSet (OBJ_SEQFEAT, sfp, mat_peptide_constraints))) { if (m_vnp_prev == NULL) { cdsp->prot_list = m_vnp->next; } else { m_vnp_prev->next = m_vnp->next; } m_vnp->next = NULL; m_vnp = ValNodeFree (m_vnp); } else { m_vnp_prev = m_vnp; } } } } } else { vnp_prev = NULL; for (vnp = *cgp_list; vnp != NULL; vnp = vnp_next) { vnp_next = vnp->next; if (vnp->choice == 0) { mat_peptide_list = NULL; cdsp = (CGPSetPtr) vnp->data.ptrvalue; for (m_vnp = cdsp->prot_list; m_vnp != NULL; m_vnp = m_vnp->next) { sfp = m_vnp->data.ptrvalue; if (sfp->idx.subtype == FEATDEF_mat_peptide_aa && DoesObjectMatchConstraintChoiceSet (OBJ_SEQFEAT, sfp, mat_peptide_constraints)) { ValNodeAddPointer (&mat_peptide_list, OBJ_SEQFEAT, sfp); } } if (mat_peptide_list == NULL) { if (vnp_prev == NULL) { *cgp_list = vnp->next; } else { vnp_prev->next = vnp->next; } vnp->next = NULL; vnp = FreeObjectList (vnp); } else { m_vnp = mat_peptide_list; while (m_vnp->next != NULL) { m_vnp = m_vnp->next; } if (vnp_prev == NULL) { *cgp_list = mat_peptide_list; } else { vnp_prev->next = mat_peptide_list; } m_vnp->next = vnp_next; vnp_prev = m_vnp; vnp->next = NULL; vnp = FreeObjectList (vnp); } } else { vnp_prev = vnp; } } } mat_peptide_constraints = ConstraintChoiceSetFree (mat_peptide_constraints); } static ValNodePtr BuildCGPSetList (Uint2 entityID, AECRActionPtr act, BoolPtr created_protein_features) { SeqEntryPtr sep; BuildCGPSetData b; CGPSetPtr cdsp; ValNodePtr vnp, vnp_next, vnp_prev; ValNodePtr cdset_list = NULL; SeqFeatPtr cds, gene, mrna; Boolean need_indexing = FALSE; ApplyActionPtr a; EditActionPtr e; ConvertActionPtr c; CopyActionPtr cp; SwapActionPtr s; AECRParseActionPtr pa; RemoveActionPtr r; RemoveOutsideActionPtr ro; FieldTypePtr field_from, field_to; sep = GetTopSeqEntryForEntityID (entityID); b.cds_list = NULL; b.gene_list = NULL; b.mrna_list = NULL; if (created_protein_features != NULL) { *created_protein_features = FALSE; } VisitFeaturesInSep (sep, &b, BuildCGPSetCallback); /* build cdsets that have coding regions */ for (vnp = b.cds_list; vnp != NULL; vnp = vnp->next) { cds = (SeqFeatPtr) vnp->data.ptrvalue; if (cds == NULL) continue; cdsp = BuildCGPSetFromCodingRegion (cds, &need_indexing); if (cdsp != NULL) { ValNodeAddPointer (&cdset_list, 0, cdsp); } } if (need_indexing) { /* indexing because we have created full-length protein features */ SeqMgrIndexFeatures (entityID, NULL); if (created_protein_features != NULL) { *created_protein_features = TRUE; } } /* build cdsets for mrna features that don't have coding regions */ for (vnp = b.mrna_list; vnp != NULL; vnp = vnp->next) { mrna = (SeqFeatPtr) vnp->data.ptrvalue; if (mrna == NULL || mrna->idx.deleteme) continue; cdsp = BuildCGPSetFrommRNA (mrna); if (cdsp != NULL) { ValNodeAddPointer (&cdset_list, 0, cdsp); } } /* build cdsets for lonely genes / features with gene xrefs that are not coding regions or mrnas */ for (vnp = b.gene_list; vnp != NULL; vnp = vnp->next) { gene = (SeqFeatPtr) vnp->data.ptrvalue; if (gene == NULL || gene->idx.deleteme) continue; cdsp = BuildCGPSetFromGene (gene); if (cdsp != NULL) { ValNodeAddPointer (&cdset_list, 0, cdsp); } } /* now unmark features */ UnmarkFeatureList (b.cds_list); UnmarkFeatureList (b.mrna_list); UnmarkFeatureList (b.gene_list); b.cds_list = ValNodeFree (b.cds_list); b.mrna_list = ValNodeFree (b.mrna_list); b.gene_list = ValNodeFree (b.gene_list); /* now remove sets that don't match our choice constraint */ if (act != NULL && act->constraint != NULL) { vnp_prev = NULL; for (vnp = cdset_list; vnp != NULL; vnp = vnp_next) { vnp_next = vnp->next; if (!DoesObjectMatchConstraintChoiceSet (0, vnp->data.ptrvalue, act->constraint)) { if (vnp_prev == NULL) { cdset_list = vnp->next; } else { vnp_prev->next = vnp->next; } vnp->next = NULL; FreeCGPSetList (vnp); } else { vnp_prev = vnp; } } } /* adjust if action fields are mat-peptide specific */ if (act != NULL && act->action != NULL && act->action->data.ptrvalue != NULL) { switch (act->action->choice) { case ActionChoice_apply: a = (ApplyActionPtr) act->action->data.ptrvalue; AdjustCGPObjectListForMatPeptides (&cdset_list, a->field, NULL, act->constraint); break; case ActionChoice_edit: e = (EditActionPtr) act->action->data.ptrvalue; AdjustCGPObjectListForMatPeptides (&cdset_list, e->field, NULL, act->constraint); break; case ActionChoice_remove_outside: ro = (RemoveOutsideActionPtr) act->action->data.ptrvalue; AdjustCGPObjectListForMatPeptides (&cdset_list, ro->field, NULL, act->constraint); break; case ActionChoice_convert: c = (ConvertActionPtr) act->action->data.ptrvalue; field_from = GetFromFieldFromFieldPair (c->fields); field_to = GetToFieldFromFieldPair (c->fields); AdjustCGPObjectListForMatPeptides (&cdset_list, field_from, field_to, act->constraint); field_from = FieldTypeFree (field_from); field_to = FieldTypeFree (field_to); break; case ActionChoice_copy: cp = (CopyActionPtr) act->action->data.ptrvalue; field_from = GetFromFieldFromFieldPair (cp->fields); field_to = GetToFieldFromFieldPair (cp->fields); AdjustCGPObjectListForMatPeptides (&cdset_list, field_from, field_to, act->constraint); field_from = FieldTypeFree (field_from); field_to = FieldTypeFree (field_to); break; case ActionChoice_swap: s = (SwapActionPtr) act->action->data.ptrvalue; field_from = GetFromFieldFromFieldPair (s->fields); field_to = GetToFieldFromFieldPair (s->fields); AdjustCGPObjectListForMatPeptides (&cdset_list, field_from, field_to, act->constraint); field_from = FieldTypeFree (field_from); field_to = FieldTypeFree (field_to); break; case ActionChoice_remove: r = (RemoveActionPtr) act->action->data.ptrvalue; AdjustCGPObjectListForMatPeptides (&cdset_list, r->field, NULL, act->constraint); break; case ActionChoice_parse: pa = (AECRParseActionPtr) act->action->data.ptrvalue; field_from = GetFromFieldFromFieldPair (pa->fields); field_to = GetToFieldFromFieldPair (pa->fields); AdjustCGPObjectListForMatPeptides (&cdset_list, field_from, field_to, act->constraint); field_from = FieldTypeFree (field_from); field_to = FieldTypeFree (field_to); break; } } return cdset_list; } static Boolean AlsoChangeMrnaForObject (Uint1 choice, Pointer data) { CharPtr str; SeqFeatPtr sfp, mrna, cds; BioseqPtr prot; FeatureField f; Boolean rval = FALSE; if (choice == 0) { str = GetFieldValueFromCGPSet (data, CDSGeneProt_field_prot_name, NULL); rval = SetFieldValueInCGPSet (data, CDSGeneProt_field_mrna_product, NULL, str, ExistingTextOption_replace_old); str = MemFree (str); } else if (choice == OBJ_SEQFEAT) { sfp = (SeqFeatPtr) data; if (sfp != NULL) { if (sfp->data.choice == SEQFEAT_CDREGION) { mrna = GetmRNAforCDS (sfp); } else if (sfp->data.choice == SEQFEAT_PROT) { prot = BioseqFindFromSeqLoc(sfp->location); cds = SeqMgrGetCDSgivenProduct (prot, NULL); mrna = GetmRNAforCDS (cds); } if (mrna != NULL) { if (sfp->data.choice == SEQFEAT_CDREGION) { f.type = Macro_feature_type_cds; } else { f.type = Macro_feature_type_prot; } f.field = ValNodeNew(NULL); f.field->next = NULL; f.field->choice = FeatQualChoice_legal_qual; f.field->data.intvalue = Feat_qual_legal_product; str = GetQualFromFeature (sfp, &f, NULL); f.type = Macro_feature_type_mRNA; rval = SetQualOnFeature (mrna, &f, NULL, str, ExistingTextOption_replace_old); str = MemFree (str); f.field = ValNodeFree (f.field); } } } return rval; } NLM_EXTERN Int4 DoApplyActionToObjectListEx (ApplyActionPtr action, ValNodePtr object_list, Boolean also_change_mrna, StringConstraintPtr scp, BatchExtraPtr batch_extra) { ValNodePtr vnp; Int4 num_succeed = 0, num_fail = 0; CharPtr old_str, new_str; if (action == NULL || object_list == NULL) return 0; for (vnp = object_list; vnp != NULL; vnp = vnp->next) { old_str = GetFieldValueForObjectEx (vnp->choice, vnp->data.ptrvalue, action->field, scp, batch_extra); if (SetFieldValueForObjectEx (vnp->choice, vnp->data.ptrvalue, action->field, scp, action->value, action->existing_text, batch_extra)) { new_str = GetFieldValueForObjectEx (vnp->choice, vnp->data.ptrvalue, action->field, scp, batch_extra); if (StringCmp (old_str, new_str) != 0) { if (also_change_mrna) { AlsoChangeMrnaForObject (vnp->choice, vnp->data.ptrvalue); } num_succeed ++; } new_str = MemFree (new_str); } else { num_fail++; } old_str = MemFree (old_str); } return num_succeed; } NLM_EXTERN Int4 DoApplyActionToObjectList (ApplyActionPtr action, ValNodePtr object_list, Boolean also_change_mrna, StringConstraintPtr scp) { return DoApplyActionToObjectListEx (action, object_list, also_change_mrna, scp, NULL); } NLM_EXTERN Int4 DoEditActionToObjectListEx (EditActionPtr action, ValNodePtr object_list, Boolean also_change_mrna, BatchExtraPtr batch_extra) { ValNodePtr vnp; Int4 num_succeed = 0, num_fail = 0; StringConstraintPtr scp; CharPtr str, new_str; if (action == NULL || object_list == NULL) return 0; scp = StringConstraintFromFieldEdit (action->edit); for (vnp = object_list; vnp != NULL; vnp = vnp->next) { str = GetFieldValueForObjectEx (vnp->choice, vnp->data.ptrvalue, action->field, scp, batch_extra); new_str = ApplyEditToString (str, action->edit); if (StringCmp (str, new_str) != 0 && SetFieldValueForObjectEx (vnp->choice, vnp->data.ptrvalue, action->field, scp, new_str, ExistingTextOption_replace_old, batch_extra)) { if (also_change_mrna) { AlsoChangeMrnaForObject (vnp->choice, vnp->data.ptrvalue); } num_succeed ++; } else { num_fail++; } new_str = MemFree (new_str); str = MemFree (str); } return num_succeed; } NLM_EXTERN Int4 DoEditActionToObjectList (EditActionPtr action, ValNodePtr object_list, Boolean also_change_mrna) { return DoEditActionToObjectListEx (action, object_list, also_change_mrna, NULL); } static Boolean HasMarkers (CharPtr str, TextPortionPtr text_portion) { Boolean rval = FALSE; Int4 left_len = 0, right_len = 0; if (IsTextMarkerEmpty(text_portion->left_marker)) { if (IsTextMarkerEmpty(text_portion->right_marker)) { /* both markers empty, badly formatted command */ rval = FALSE; } else if (NULL != FindTextMarker(str, &right_len, text_portion->right_marker, text_portion->case_sensitive, text_portion->whole_word)) { rval = TRUE; } } else if (NULL == FindTextMarker(str, &left_len, text_portion->left_marker, text_portion->case_sensitive, text_portion->whole_word)) { rval = FALSE; } else if (IsTextMarkerEmpty(text_portion->right_marker) || NULL != FindTextMarker(str + left_len, &right_len, text_portion->right_marker, text_portion->case_sensitive, text_portion->whole_word)) { rval = TRUE; } return rval; } NLM_EXTERN Int4 DoRemoveOutsideToObjectList (RemoveOutsideActionPtr action, ValNodePtr object_list, Boolean also_change_mrna, BatchExtraPtr batch_extra) { ValNodePtr vnp; Int4 num_succeed = 0, num_fail = 0; CharPtr str, new_str; if (action == NULL || object_list == NULL) return 0; for (vnp = object_list; vnp != NULL; vnp = vnp->next) { str = GetFieldValueForObjectEx (vnp->choice, vnp->data.ptrvalue, action->field, NULL, batch_extra); if (!action->remove_if_not_found && !HasMarkers (str, action->portion)) { /* do nothing */ } else { new_str = GetTextPortionFromString (str, action->portion); if (StringCmp (str, new_str) != 0 && SetFieldValueForObjectEx (vnp->choice, vnp->data.ptrvalue, action->field, NULL, new_str, ExistingTextOption_replace_old, batch_extra)) { if (also_change_mrna) { AlsoChangeMrnaForObject (vnp->choice, vnp->data.ptrvalue); } num_succeed ++; } else { num_fail++; } new_str = MemFree (new_str); } str = MemFree (str); } return num_succeed; } static Boolean IsFieldTypeProteinDesc (FieldTypePtr ft); static Boolean NoFieldChange (CharPtr new_val, ValNodePtr vnp, FieldTypePtr field_from, StringConstraintPtr scp, BatchExtraPtr batch_extra) { Boolean rval = FALSE; CharPtr orig = GetFieldValueForObjectEx (vnp->choice, vnp->data.ptrvalue, field_from, scp, batch_extra); if (StringCmp (orig, new_val) == 0) { rval = TRUE; } orig = MemFree (orig); return rval; } static Boolean AddValuesToList(ValNodePtr apply, ValNodePtr PNTR current, Uint2 existing_text) { ValNodePtr vnp_a, vnp_c; Boolean rval = FALSE; CharPtr str; if (apply == NULL) { return FALSE; } else if (existing_text == ExistingTextOption_leave_old && current != NULL && *current != NULL) { return FALSE; } else if (existing_text == ExistingTextOption_add_qual) { for (vnp_a = apply; vnp_a != NULL; vnp_a = vnp_a->next) { ValNodeAddPointer (current, vnp_a->choice, StringSave ((CharPtr)(vnp_a->data.ptrvalue))); } rval = TRUE; } else if (existing_text == ExistingTextOption_replace_old) { *current = ValNodeFreeData (*current); for (vnp_a = apply; vnp_a != NULL; vnp_a = vnp_a->next) { ValNodeAddPointer (current, vnp_a->choice, StringSave ((CharPtr)(vnp_a->data.ptrvalue))); } rval = TRUE; } else { for (vnp_a = apply; vnp_a != NULL; vnp_a = vnp_a->next) { if (*current == NULL) { ValNodeAddPointer (current, vnp_a->choice, StringSave ((CharPtr)(vnp_a->data.ptrvalue))); rval = TRUE; } else { for (vnp_c = *current; vnp_c != NULL; vnp_c = vnp_c->next) { str = (CharPtr)(vnp_c->data.ptrvalue); rval |= SetStringValue(&str, (CharPtr)(vnp_a->data.ptrvalue), existing_text); vnp_c->data.ptrvalue = str; } } } } return rval; } NLM_EXTERN Int4 DoConvertActionToObjectListEx (ConvertActionPtr action, ValNodePtr object_list, Boolean also_change_mrna, StringConstraintPtr scp, BatchExtraPtr batch_extra) { ValNodePtr vnp; Int4 num_succeed = 0; CharPtr str, from_val, field_name = NULL; FieldTypePtr field_from, field_to; Boolean already_added, field_change; ValNodePtr val_list_from, val_list_to, val_vnp; if (action == NULL || object_list == NULL || action->fields == NULL) return 0; field_from = GetFromFieldFromFieldPair (action->fields); field_to = GetToFieldFromFieldPair (action->fields); if (action->strip_name) { field_name = SummarizeFieldType (field_to); } if (action->fields->choice == FieldPairType_molinfo_field) { for (vnp = object_list; vnp != NULL; vnp = vnp->next) { str = GetFieldValueForObjectEx (vnp->choice, vnp->data.ptrvalue, field_from, NULL, batch_extra); from_val = GetSequenceQualValName (field_from->data.ptrvalue); if (StringCmp (str, from_val) == 0 && SetFieldValueForObjectEx (vnp->choice, vnp->data.ptrvalue, field_to, NULL, str, ExistingTextOption_replace_old, batch_extra)) { num_succeed ++; } str = MemFree (str); } } else { for (vnp = object_list; vnp != NULL; vnp = vnp->next) { /* there may be multiple qualifiers */ val_list_from = GetMultipleFieldValuesForObject (vnp->choice, vnp->data.ptrvalue, field_from, scp, batch_extra); val_list_to = GetMultipleFieldValuesForObject (vnp->choice, vnp->data.ptrvalue, field_to, NULL, batch_extra); for (val_vnp = val_list_from; val_vnp != NULL; val_vnp = val_vnp->next) { str = (CharPtr)(val_vnp->data.ptrvalue); if (action->strip_name) { RemoveFieldNameFromString (field_name, str); } FixCapitalizationInString(&str, action->capitalization, NULL); val_vnp->data.ptrvalue = str; } field_change = AddValuesToList(val_list_from, &val_list_to, action->existing_text); if (field_change) { if (!action->keep_original) { RemoveFieldValueForObject(vnp->choice, vnp->data.ptrvalue, field_from, scp); } RemoveFieldValueForObject(vnp->choice, vnp->data.ptrvalue, field_to, NULL); for (val_vnp = val_list_to; val_vnp != NULL; val_vnp = val_vnp->next) { SetFieldValueForObjectEx(vnp->choice, vnp->data.ptrvalue, field_to, NULL, (CharPtr) (val_vnp->data.ptrvalue), ExistingTextOption_add_qual, batch_extra); } } if (also_change_mrna) { field_change |= AlsoChangeMrnaForObject (vnp->choice, vnp->data.ptrvalue); } if (field_change) { num_succeed++; } val_list_from = ValNodeFreeData(val_list_from); val_list_to = ValNodeFreeData(val_list_to); } } field_from = FieldTypeFree (field_from); field_to = FieldTypeFree (field_to); field_name = MemFree (field_name); return num_succeed; } NLM_EXTERN Int4 DoConvertActionToObjectList (ConvertActionPtr action, ValNodePtr object_list, Boolean also_change_mrna, StringConstraintPtr scp) { return DoConvertActionToObjectListEx (action, object_list, also_change_mrna, scp, NULL); } NLM_EXTERN Int4 DoCopyActionToObjectListEx (CopyActionPtr action, ValNodePtr object_list, Boolean also_change_mrna, StringConstraintPtr scp, BatchExtraPtr batch_extra) { ValNodePtr vnp; Int4 num_succeed = 0, num_fail = 0; CharPtr str; FieldTypePtr field_from, field_to; if (action == NULL || object_list == NULL) return 0; field_from = GetFromFieldFromFieldPair (action->fields); field_to = GetToFieldFromFieldPair (action->fields); for (vnp = object_list; vnp != NULL; vnp = vnp->next) { str = GetFieldValueForObjectEx (vnp->choice, vnp->data.ptrvalue, field_from, scp, batch_extra); if (SetFieldValueForObjectEx (vnp->choice, vnp->data.ptrvalue, field_to, NULL, str, action->existing_text, batch_extra)) { if (also_change_mrna) { AlsoChangeMrnaForObject (vnp->choice, vnp->data.ptrvalue); } num_succeed ++; } else { num_fail++; } str = MemFree (str); } field_from = FieldTypeFree (field_from); field_to = FieldTypeFree (field_to); return num_succeed; } NLM_EXTERN Int4 DoCopyActionToObjectList (CopyActionPtr action, ValNodePtr object_list, Boolean also_change_mrna, StringConstraintPtr scp) { return DoCopyActionToObjectListEx (action, object_list, also_change_mrna, scp, NULL); } NLM_EXTERN Int4 DoSwapActionToObjectListEx (SwapActionPtr action, ValNodePtr object_list, Boolean also_change_mrna, StringConstraintPtr scp, BatchExtraPtr batch_extra) { ValNodePtr vnp; Int4 num_succeed = 0, num_fail = 0; CharPtr str1, str2; FieldTypePtr field_from, field_to; if (action == NULL || object_list == NULL) return 0; field_from = GetFromFieldFromFieldPair (action->fields); field_to = GetToFieldFromFieldPair (action->fields); for (vnp = object_list; vnp != NULL; vnp = vnp->next) { str1 = GetFieldValueForObjectEx (vnp->choice, vnp->data.ptrvalue, field_from, scp, batch_extra); str2 = GetFieldValueForObjectEx (vnp->choice, vnp->data.ptrvalue, field_to, NULL, batch_extra); if (SetFieldValueForObjectEx (vnp->choice, vnp->data.ptrvalue, field_to, NULL, str1, ExistingTextOption_replace_old, batch_extra) && SetFieldValueForObjectEx (vnp->choice, vnp->data.ptrvalue, field_from, scp, str2, ExistingTextOption_replace_old, batch_extra)) { if (also_change_mrna) { AlsoChangeMrnaForObject (vnp->choice, vnp->data.ptrvalue); } num_succeed ++; } else { num_fail++; } str1 = MemFree (str1); str2 = MemFree (str2); } field_from = FieldTypeFree (field_from); field_to = FieldTypeFree (field_to); return num_succeed; } NLM_EXTERN Int4 DoSwapActionToObjectList (SwapActionPtr action, ValNodePtr object_list, Boolean also_change_mrna, StringConstraintPtr scp) { return DoSwapActionToObjectListEx (action, object_list, also_change_mrna, scp, NULL); } NLM_EXTERN Int4 DoRemoveActionToObjectList (RemoveActionPtr action, ValNodePtr object_list, Boolean also_change_mrna, StringConstraintPtr scp) { ValNodePtr vnp; Int4 num_succeed = 0, num_fail = 0; if (action == NULL || object_list == NULL) return 0; for (vnp = object_list; vnp != NULL; vnp = vnp->next) { if (RemoveFieldValueForObject (vnp->choice, vnp->data.ptrvalue, action->field, scp)) { if (also_change_mrna) { AlsoChangeMrnaForObject (vnp->choice, vnp->data.ptrvalue); } num_succeed ++; } else { num_fail++; } } return num_succeed; } NLM_EXTERN Int4 DoParseActionToObjectListEx (AECRParseActionPtr action, ValNodePtr object_list, Boolean also_change_mrna, StringConstraintPtr scp, BatchExtraPtr batch_extra) { ValNodePtr vnp; CharPtr str1, str2, str3, cp, tmp; Int4 len, num_succeed = 0, diff, left_len, right_len; FieldTypePtr field_from, field_to; if (action == NULL || object_list == NULL) return 0; field_from = GetFromFieldFromFieldPair (action->fields); field_to = GetToFieldFromFieldPair (action->fields); for (vnp = object_list; vnp != NULL; vnp = vnp->next) { str1 = GetFieldValueForObjectEx (vnp->choice, vnp->data.ptrvalue, field_from, scp, batch_extra); str2 = GetTextPortionFromString (str1, action->portion); str3 = StringSave (str2); ApplyTextTransformsToString (&str3, action->transform); if (str3 != NULL) { if (action->remove_from_parsed) { cp = FindTextPortionLocationInString (str1, action->portion); if (cp != NULL) { len = StringLen (str2); tmp = cp; if (action->portion != NULL && !IsTextMarkerEmpty (action->portion->left_marker) && (tmp = FindTextMarker (str1, &left_len, action->portion->left_marker, action->portion->case_sensitive, action->portion->whole_word)) != NULL) { if (action->portion->include_left) { /* adjust */ } else if (!action->portion->include_left) { /* adjust */ if (action->remove_left) { len += left_len; } else { cp += left_len; } } } if (action->portion != NULL && !IsTextMarkerEmpty (action->portion->right_marker) && action->remove_right && !action->portion->include_right && action->portion != NULL && (tmp = FindTextMarker (tmp, &right_len, action->portion->right_marker, action->portion->case_sensitive, action->portion->whole_word)) != NULL) { diff = right_len; len += diff; } StringCpy (cp, cp + len); SetFieldValueForObjectEx (vnp->choice, vnp->data.ptrvalue, field_from, scp, str1, ExistingTextOption_replace_old, batch_extra); } } if (SetFieldValueForObjectEx (vnp->choice, vnp->data.ptrvalue, field_to, NULL, str3, action->existing_text, batch_extra)) { if (also_change_mrna) { AlsoChangeMrnaForObject (vnp->choice, vnp->data.ptrvalue); } num_succeed++; } } str1 = MemFree (str1); str2 = MemFree (str2); str3 = MemFree (str3); } field_from = FieldTypeFree (field_from); field_to = FieldTypeFree (field_to); return num_succeed; } NLM_EXTERN Int4 DoParseActionToObjectList (AECRParseActionPtr action, ValNodePtr object_list, Boolean also_change_mrna, StringConstraintPtr scp) { return DoParseActionToObjectListEx (action, object_list, also_change_mrna, scp, NULL); } static Int4 ApplyAECRActionToSeqEntry (AECRActionPtr act, SeqEntryPtr sep, BoolPtr created_protein_features) { StringConstraintPtr scp; ApplyActionPtr a; ConvertActionPtr c; RemoveActionPtr r; EditActionPtr e; RemoveOutsideActionPtr ro; ValNodePtr object_list = NULL; Uint1 field_type; Uint2 entityID; Int4 num_succeed = 0; FieldTypePtr field_from; BatchExtraPtr batch_extra; AECRActionPtr act_cpy = NULL; FeatureFieldPtr field_cpy; if (act == NULL || act->action == NULL) return 0; field_type = FieldTypeFromAECRAction (act); if (field_type == FieldType_cds_gene_prot) { if (act->action->choice == ActionChoice_edit) { act_cpy = AsnIoMemCopy (act, (AsnReadFunc) AECRActionAsnRead, (AsnWriteFunc) AECRActionAsnWrite); e = (EditActionPtr)act_cpy->action->data.ptrvalue; field_cpy = FeatureFieldFromCDSGeneProtField (e->field->data.intvalue); e->field->choice = FieldType_feature_field; e->field->data.ptrvalue = field_cpy; act = act_cpy; field_type = FieldTypeFromAECRAction (act); } } batch_extra = BatchExtraNew (); InitBatchExtraForAECRAction (batch_extra, act, sep); if (field_type == FieldType_cds_gene_prot) { entityID = ObjMgrGetEntityIDForChoice(sep); object_list = BuildCGPSetList (entityID, act, created_protein_features); } else { object_list = GetObjectListForAECRActionEx (sep, act, batch_extra); } if (object_list == NULL) { return 0; } switch (act->action->choice) { case ActionChoice_apply: a = (ApplyActionPtr) act->action->data.ptrvalue; scp = FindStringConstraintInConstraintSetForField (a->field, act->constraint); num_succeed = DoApplyActionToObjectListEx (act->action->data.ptrvalue, object_list, act->also_change_mrna, scp, batch_extra); if (a->field->choice == FieldType_misc || a->field->choice == FieldType_dblink) { DeleteMarkedObjects (ObjMgrGetEntityIDForChoice(sep), 0, NULL); } break; case ActionChoice_edit: e = (EditActionPtr) act->action->data.ptrvalue; num_succeed = DoEditActionToObjectListEx (e, object_list, act->also_change_mrna, batch_extra); if (e->field->choice == FieldType_misc) { DeleteMarkedObjects (ObjMgrGetEntityIDForChoice(sep), 0, NULL); } break; case ActionChoice_remove_outside: ro = (RemoveOutsideActionPtr) act->action->data.ptrvalue; num_succeed = DoRemoveOutsideToObjectList (ro, object_list, act->also_change_mrna, batch_extra); if (ro->field->choice == FieldType_misc) { DeleteMarkedObjects (ObjMgrGetEntityIDForChoice(sep), 0, NULL); } break; case ActionChoice_convert: scp = NULL; if (act->constraint != NULL) { c = (ConvertActionPtr) act->action->data.ptrvalue; field_from = GetFromFieldFromFieldPair (c->fields); scp = FindStringConstraintInConstraintSetForField (field_from, act->constraint); field_from = FieldTypeFree (field_from); } num_succeed = DoConvertActionToObjectListEx (act->action->data.ptrvalue, object_list, act->also_change_mrna, scp, batch_extra); break; case ActionChoice_swap: num_succeed = DoSwapActionToObjectListEx (act->action->data.ptrvalue, object_list, act->also_change_mrna, NULL, batch_extra); break; case ActionChoice_copy: num_succeed = DoCopyActionToObjectListEx (act->action->data.ptrvalue, object_list, act->also_change_mrna, NULL, batch_extra); break; case ActionChoice_remove: r = (RemoveActionPtr) act->action->data.ptrvalue; scp = FindStringConstraintInConstraintSetForField (r->field, act->constraint); num_succeed = DoRemoveActionToObjectList (act->action->data.ptrvalue, object_list, act->also_change_mrna, scp); if (r->field->choice == FieldType_misc) { DeleteMarkedObjects (ObjMgrGetEntityIDForChoice(sep), 0, NULL); } else if (r->field->choice == FieldType_dblink) { DeleteMarkedObjects (ObjMgrGetEntityIDForChoice(sep), 0, NULL); } break; case ActionChoice_parse: num_succeed = DoParseActionToObjectListEx (act->action->data.ptrvalue, object_list, act->also_change_mrna, NULL, batch_extra); break; } object_list = FreeObjectList (object_list); batch_extra = BatchExtraFree (batch_extra); act_cpy = AECRActionFree (act_cpy); return num_succeed; } static AECRSamplePtr AECRSampleNew (void) { AECRSamplePtr sample; sample = (AECRSamplePtr) MemNew (sizeof (AECRSampleData)); MemSet (sample, 0, sizeof (AECRSampleData)); sample->all_same = TRUE; return sample; } NLM_EXTERN AECRSamplePtr AECRSampleFree (AECRSamplePtr sample) { if (sample != NULL) { sample->field = FieldTypeFree (sample->field); sample->first_value = MemFree (sample->first_value); sample = MemFree (sample); } return sample; } NLM_EXTERN ValNodePtr AECRSampleListFree (ValNodePtr list) { ValNodePtr list_next; while (list != NULL) { list_next = list->next; list->next = NULL; list->data.ptrvalue = AECRSampleFree (list->data.ptrvalue); list = ValNodeFree (list); list = list_next; } return list; } static void AddTextToAECRSample (AECRSamplePtr sample, CharPtr txt) { if (StringHasNoText (txt)) { txt = MemFree (txt); } else if (sample != NULL) { sample->num_found ++; if (sample->first_value == NULL) { sample->first_value = txt; } else { if (sample->all_same && StringCmp (sample->first_value, txt) != 0) { sample->all_same = FALSE; } txt = MemFree (txt); } } } NLM_EXTERN AECRSamplePtr GetAECRSampleFromObjectListEx (ValNodePtr object_list, FieldTypePtr field, BatchExtraPtr batch_extra) { AECRSamplePtr sample; ValNodePtr vnp, prot_vnp, bsp_list; CharPtr txt; CGPSetPtr cgp; SeqFeatPtr sfp; BatchExtraPtr b = NULL; SeqEntryPtr sep; if (object_list == NULL || field == NULL) return NULL; if (batch_extra == NULL) { b = BatchExtraNew (); batch_extra = b; bsp_list = BioseqListForObjectList (object_list); for (vnp = bsp_list; vnp != NULL; vnp = vnp->next) { sep = SeqMgrGetSeqEntryForData (vnp->data.ptrvalue); InitBatchExtraForField (batch_extra, field, sep); } bsp_list = ValNodeFree (bsp_list); } sample = AECRSampleNew (); sample->field = FieldTypeCopy (field); for (vnp = object_list; vnp != NULL; vnp = vnp->next) { if (vnp->choice == 0 && IsFieldTypeMatPeptideRelated (field)) { cgp = (CGPSetPtr) vnp->data.ptrvalue; if (cgp != NULL) { for (prot_vnp = cgp->prot_list; prot_vnp != NULL; prot_vnp = prot_vnp->next) { sfp = (SeqFeatPtr) prot_vnp->data.ptrvalue; if (sfp != NULL && sfp->idx.subtype == FEATDEF_mat_peptide_aa) { txt = GetFieldValueForObjectEx (OBJ_SEQFEAT, sfp, field, NULL, batch_extra); AddTextToAECRSample (sample, txt); } } } } else { txt = GetFieldValueForObjectEx (vnp->choice, vnp->data.ptrvalue, field, NULL, batch_extra); AddTextToAECRSample (sample, txt); } } b = BatchExtraFree (b); return sample; } NLM_EXTERN AECRSamplePtr GetAECRSampleFromObjectList (ValNodePtr object_list, FieldTypePtr field) { return GetAECRSampleFromObjectListEx (object_list, field, NULL); } static void GetFieldsFromAECR (AECRActionPtr act, FieldTypePtr PNTR pField, ValNodePtr PNTR pFieldPair) { ApplyActionPtr a; EditActionPtr e; ConvertActionPtr c; SwapActionPtr s; CopyActionPtr cp; RemoveActionPtr r; AECRParseActionPtr p; RemoveOutsideActionPtr ro; if (pField != NULL) { *pField = NULL; } if (pFieldPair != NULL) { *pFieldPair = NULL; } if (act == NULL || act->action == NULL || act->action->data.ptrvalue == NULL) { return; } switch (act->action->choice) { case ActionChoice_apply: if (pField != NULL) { a = (ApplyActionPtr) act->action->data.ptrvalue; *pField = a->field; } break; case ActionChoice_edit: if (pField != NULL) { e = (EditActionPtr) act->action->data.ptrvalue; *pField = e->field; } break; case ActionChoice_remove_outside: if (pField != NULL) { ro = (RemoveOutsideActionPtr) act->action->data.ptrvalue; *pField = ro->field; } break; case ActionChoice_convert: if (pFieldPair != NULL) { c = (ConvertActionPtr) act->action->data.ptrvalue; *pFieldPair = c->fields; } break; case ActionChoice_swap: if (pFieldPair != NULL) { s = (SwapActionPtr) act->action->data.ptrvalue; *pFieldPair = s->fields; } break; case ActionChoice_copy: if (pFieldPair != NULL) { cp = (CopyActionPtr) act->action->data.ptrvalue; *pFieldPair = cp->fields; } break; case ActionChoice_remove: if (pField != NULL) { r = (RemoveActionPtr) act->action->data.ptrvalue; *pField = r->field; } break; case ActionChoice_parse: if (pFieldPair != NULL) { p = (AECRParseActionPtr) act->action->data.ptrvalue; *pFieldPair = p->fields; } break; } } NLM_EXTERN ValNodePtr LIBCALLBACK FieldTypeListFree (ValNodePtr list) { ValNodePtr list_next; while (list != NULL) { list_next = list->next; list->next = NULL; list = FieldTypeFree (list); list = list_next; } return list; } NLM_EXTERN ValNodePtr LIBCALLBACK FieldTypeListCopy (ValNodePtr orig) { ValNodePtr prev = NULL, new_list = NULL, vnp; while (orig != NULL) { vnp = FieldTypeCopy (orig); if (prev == NULL) { new_list = vnp; } else { prev->next = vnp; } prev = vnp; orig = orig->next; } return new_list; } static int LIBCALLBACK SortVnpByChoiceAndIntvalue (VoidPtr ptr1, VoidPtr ptr2) { ValNodePtr vnp1; ValNodePtr vnp2; int rval = 0; if (ptr1 != NULL && ptr2 != NULL) { vnp1 = *((ValNodePtr PNTR) ptr1); vnp2 = *((ValNodePtr PNTR) ptr2); if (vnp1 == NULL && vnp2 == NULL) { rval = 0; } else if (vnp1 == NULL) { rval = -1; } else if (vnp2 == NULL) { rval = 1; } else if (vnp1->choice > vnp2->choice) { rval = 1; } else if (vnp1->choice < vnp2->choice) { rval = -1; } else if (vnp1->data.intvalue > vnp2->data.intvalue) { rval = 1; } else if (vnp1->data.intvalue < vnp2->data.intvalue) { rval = -1; } else { rval = 0; } } return rval; } /* Callback function used for sorting and uniqueing */ NLM_EXTERN int LIBCALLBACK SortVnpByFieldType (VoidPtr ptr1, VoidPtr ptr2) { ValNodePtr vnp1; ValNodePtr vnp2; int rval = 0; if (ptr1 != NULL && ptr2 != NULL) { vnp1 = *((ValNodePtr PNTR) ptr1); vnp2 = *((ValNodePtr PNTR) ptr2); rval = CompareFieldTypes (vnp1, vnp2); } return rval; } NLM_EXTERN int LIBCALLBACK SortVnpByFieldTypeAndSourceQualifier (VoidPtr ptr1, VoidPtr ptr2) { ValNodePtr vnp1; ValNodePtr vnp2; int rval = 0; if (ptr1 != NULL && ptr2 != NULL) { vnp1 = *((ValNodePtr PNTR) ptr1); vnp2 = *((ValNodePtr PNTR) ptr2); rval = CompareFieldTypesEx (vnp1, vnp2, TRUE); } return rval; } static void GetBioSourceFields (BioSourcePtr biop, Pointer userdata) { ValNodePtr new_list = NULL; if (biop == NULL || userdata == NULL) { return; } /* although the following function doesn't preserve order because we reverse the args, that's okay because every function that uses GetBioSourceFields sorts the results at the end anyway, and we want the shorter list as the first argument since ValNodeLink is linear in the length of its first arg. */ new_list = GetSourceQualFieldListFromBioSource (biop); *(ValNodePtr PNTR) userdata = ValNodeLink ( &new_list, *(ValNodePtr PNTR) userdata); } NLM_EXTERN void SortUniqueFieldTypeList (ValNodePtr PNTR field_list) { if (field_list == NULL) return; *field_list = ValNodeSort (*field_list, SortVnpByFieldType); ValNodeUnique (field_list, SortVnpByFieldType, FieldTypeListFree); } NLM_EXTERN ValNodePtr GetSourceQualSampleFieldList (SeqEntryPtr sep) { ValNodePtr field_list = NULL; VisitBioSourcesInSep (sep, &field_list, GetBioSourceFields); field_list = ValNodeSort (field_list, SortVnpByFieldTypeAndSourceQualifier); ValNodeUnique (&field_list, SortVnpByFieldTypeAndSourceQualifier, FieldTypeListFree); return field_list; } NLM_EXTERN ValNodePtr GetSourceQualSampleFieldListForSeqEntryList (ValNodePtr list) { ValNodePtr field_list = NULL; ValNodePtr vnp; if (list == NULL) { return NULL; } for (vnp = list; vnp != NULL; vnp = vnp->next) { VisitBioSourcesInSep (vnp->data.ptrvalue, &field_list, GetBioSourceFields); } field_list = ValNodeSort (field_list, SortVnpByFieldTypeAndSourceQualifier); ValNodeUnique (&field_list, SortVnpByFieldTypeAndSourceQualifier, FieldTypeListFree); return field_list; } static void GetFeatureQualFieldListForAECRSampleCallback (SeqFeatPtr sfp, Pointer data) { ValNodePtr PNTR list; list = (ValNodePtr PNTR) data; if (list == NULL || sfp == NULL) return; ValNodeLink (list, GetFieldListFromFeature (sfp)); } static ValNodePtr GetFeatureQualFieldList (SeqEntryPtr sep) { ValNodePtr field_list = NULL; VisitFeaturesInSep (sep, &field_list, GetFeatureQualFieldListForAECRSampleCallback); field_list = ValNodeSort (field_list, SortVnpByFieldType); ValNodeUnique (&field_list, SortVnpByFieldType, FieldTypeListFree); return field_list; } static void GetRnaQualFieldListForAECRSampleCallback (SeqFeatPtr sfp, Pointer userdata) { RnaFeatTypePtr type; RnaRefPtr rrp; RnaQualPtr rq; RNAGenPtr rgp; GeneRefPtr grp = NULL; SeqFeatPtr gene = NULL; SeqMgrFeatContext fcontext; if (sfp == NULL || sfp->data.choice != SEQFEAT_RNA || sfp->data.value.ptrvalue == NULL || userdata == NULL) { return; } rrp = (RnaRefPtr) sfp->data.value.ptrvalue; type = RnaFeatTypeFromSeqFeat (sfp); if (type == NULL) return; /* add product if appropriate */ if ((type->choice == RnaFeatType_preRNA || type->choice == RnaFeatType_mRNA || type->choice == RnaFeatType_rRNA || type->choice == RnaFeatType_miscRNA || type->choice == RnaFeatType_any) && rrp->ext.choice == 1 && !StringHasNoText (rrp->ext.value.ptrvalue)) { rq = RnaQualNew (); rq->type = AsnIoMemCopy (type, (AsnReadFunc) RnaFeatTypeAsnRead, (AsnWriteFunc) RnaFeatTypeAsnWrite); rq->field = Rna_field_product; ValNodeAddPointer ((ValNodePtr PNTR) userdata, FieldType_rna_field, rq); } else if (rrp->ext.choice == 3 && (rgp = (RNAGenPtr) rrp->ext.value.ptrvalue) != NULL && !StringHasNoText (rgp->product)) { rq = RnaQualNew (); rq->type = AsnIoMemCopy (type, (AsnReadFunc) RnaFeatTypeAsnRead, (AsnWriteFunc) RnaFeatTypeAsnWrite); rq->field = Rna_field_product; ValNodeAddPointer ((ValNodePtr PNTR) userdata, FieldType_rna_field, rq); } /* add comment if present */ if (!StringHasNoText (sfp->comment)) { rq = RnaQualNew (); rq->type = AsnIoMemCopy (type, (AsnReadFunc) RnaFeatTypeAsnRead, (AsnWriteFunc) RnaFeatTypeAsnWrite); rq->field = Rna_field_comment; ValNodeAddPointer ((ValNodePtr PNTR) userdata, FieldType_rna_field, rq); } /* add tRNA specific if appropriate */ if (type->choice == RnaFeatType_tRNA || (type->choice == RnaFeatType_any && rrp->type == 2)) { /* codons recognized */ rq = RnaQualNew (); rq->type = AsnIoMemCopy (type, (AsnReadFunc) RnaFeatTypeAsnRead, (AsnWriteFunc) RnaFeatTypeAsnWrite); rq->field = Rna_field_codons_recognized; ValNodeAddPointer ((ValNodePtr PNTR) userdata, FieldType_rna_field, rq); /* anticodon */ rq = RnaQualNew (); rq->type = AsnIoMemCopy (type, (AsnReadFunc) RnaFeatTypeAsnRead, (AsnWriteFunc) RnaFeatTypeAsnWrite); rq->field = Rna_field_anticodon; ValNodeAddPointer ((ValNodePtr PNTR) userdata, FieldType_rna_field, rq); } /* add ncRNA class if appropriate and present */ if ((type->choice == RnaFeatType_ncRNA || type->choice == RnaFeatType_any) && rrp->ext.choice == 3 && (rgp = (RNAGenPtr) rrp->ext.value.ptrvalue) != NULL && !StringHasNoText (rgp->_class)) { rq = RnaQualNew (); rq->type = AsnIoMemCopy (type, (AsnReadFunc) RnaFeatTypeAsnRead, (AsnWriteFunc) RnaFeatTypeAsnWrite); rq->field = Rna_field_ncrna_class; ValNodeAddPointer ((ValNodePtr PNTR) userdata, FieldType_rna_field, rq); } /* add transcript ID if present */ if (sfp->product != NULL) { rq = RnaQualNew (); rq->type = AsnIoMemCopy (type, (AsnReadFunc) RnaFeatTypeAsnRead, (AsnWriteFunc) RnaFeatTypeAsnWrite); rq->field = Rna_field_transcript_id; ValNodeAddPointer ((ValNodePtr PNTR) userdata, FieldType_rna_field, rq); } /* add gene fields */ grp = SeqMgrGetGeneXref (sfp); if (grp == NULL) { gene = SeqMgrGetOverlappingGene (sfp->location, &fcontext); if (gene != NULL) { grp = gene->data.value.ptrvalue; } } if (grp != NULL && !SeqMgrGeneIsSuppressed (grp)) { /* gene locus */ if (!StringHasNoText (grp->locus)) { rq = RnaQualNew (); rq->type = AsnIoMemCopy (type, (AsnReadFunc) RnaFeatTypeAsnRead, (AsnWriteFunc) RnaFeatTypeAsnWrite); rq->field = Rna_field_gene_locus; ValNodeAddPointer ((ValNodePtr PNTR) userdata, FieldType_rna_field, rq); } /* gene description */ if (!StringHasNoText (grp->desc)) { rq = RnaQualNew (); rq->type = AsnIoMemCopy (type, (AsnReadFunc) RnaFeatTypeAsnRead, (AsnWriteFunc) RnaFeatTypeAsnWrite); rq->field = Rna_field_gene_locus; ValNodeAddPointer ((ValNodePtr PNTR) userdata, FieldType_rna_field, rq); } /* maploc */ if (!StringHasNoText (grp->maploc)) { rq = RnaQualNew (); rq->type = AsnIoMemCopy (type, (AsnReadFunc) RnaFeatTypeAsnRead, (AsnWriteFunc) RnaFeatTypeAsnWrite); rq->field = Rna_field_gene_maploc; ValNodeAddPointer ((ValNodePtr PNTR) userdata, FieldType_rna_field, rq); } /* locus tag */ if (!StringHasNoText (grp->locus_tag)) { rq = RnaQualNew (); rq->type = AsnIoMemCopy (type, (AsnReadFunc) RnaFeatTypeAsnRead, (AsnWriteFunc) RnaFeatTypeAsnWrite); rq->field = Rna_field_gene_locus_tag; ValNodeAddPointer ((ValNodePtr PNTR) userdata, FieldType_rna_field, rq); } /* synonym */ if (grp->syn != NULL) { rq = RnaQualNew (); rq->type = AsnIoMemCopy (type, (AsnReadFunc) RnaFeatTypeAsnRead, (AsnWriteFunc) RnaFeatTypeAsnWrite); rq->field = Rna_field_gene_synonym; ValNodeAddPointer ((ValNodePtr PNTR) userdata, FieldType_rna_field, rq); } } /* gene comment */ if (gene != NULL && !StringHasNoText (gene->comment)) { rq = RnaQualNew (); rq->type = AsnIoMemCopy (type, (AsnReadFunc) RnaFeatTypeAsnRead, (AsnWriteFunc) RnaFeatTypeAsnWrite); rq->field = Rna_field_gene_comment; ValNodeAddPointer ((ValNodePtr PNTR) userdata, FieldType_rna_field, rq); } } static ValNodePtr GetRnaQualFieldList (SeqEntryPtr sep) { ValNodePtr field_list = NULL; VisitFeaturesInSep (sep, &field_list, GetRnaQualFieldListForAECRSampleCallback); field_list = ValNodeSort (field_list, SortVnpByFieldType); ValNodeUnique (&field_list, SortVnpByFieldType, FieldTypeListFree); return field_list; } NLM_EXTERN ValNodePtr GetStructuredCommentFieldListFromUserObject (UserObjectPtr uop) { ValNodePtr list = NULL; UserFieldPtr ufp; ValNodePtr vnp; if (uop != NULL && IsUserObjectStructuredComment (uop)) { ufp = uop->data; while (ufp != NULL) { if (ufp->label != NULL && ufp->label->str != NULL && StringCmp (ufp->label->str, "StructuredCommentPrefix") != 0 && StringCmp (ufp->label->str, "StructuredCommentSuffix") != 0) { vnp = ValNodeNew (NULL); vnp->choice = StructuredCommentField_named; vnp->data.ptrvalue = StringSave (ufp->label->str); ValNodeAddPointer (&list, FieldType_struc_comment_field, vnp); } ufp = ufp->next; } } return list; } static void GetStructuredCommentFieldsCallback (SeqDescrPtr sdp, Pointer data) { UserObjectPtr uop; if (sdp != NULL && data != NULL && sdp->choice == Seq_descr_user && (uop = sdp->data.ptrvalue) != NULL && IsUserObjectStructuredComment (uop)) { ValNodeLink ((ValNodePtr PNTR) data, GetStructuredCommentFieldListFromUserObject(uop)); } } NLM_EXTERN ValNodePtr GetStructuredCommentFieldList (SeqEntryPtr sep) { ValNodePtr field_list = NULL; ValNodePtr dbname, field_name; dbname = ValNodeNew (NULL); dbname->choice = StructuredCommentField_database; ValNodeAddPointer (&field_list, FieldType_struc_comment_field, dbname); field_name = ValNodeNew (NULL); field_name->choice = StructuredCommentField_field_name; ValNodeAddPointer (&field_list, FieldType_struc_comment_field, field_name); VisitDescriptorsInSep (sep, &field_list, GetStructuredCommentFieldsCallback); field_list = ValNodeSort (field_list, SortVnpByFieldType); ValNodeUnique (&field_list, SortVnpByFieldType, FieldTypeListFree); return field_list; } static void CollectBioSourceDescCallback (SeqDescrPtr sdp, Pointer data) { if (sdp != NULL && sdp->choice == Seq_descr_source && data != NULL) { ValNodeAddPointer ((ValNodePtr PNTR) data, OBJ_SEQDESC, sdp); } } static void CollectBioSourceFeatCallback (SeqFeatPtr sfp, Pointer data) { if (sfp != NULL && sfp->data.choice == SEQFEAT_BIOSRC) { ValNodeAddPointer ((ValNodePtr PNTR) data, OBJ_SEQFEAT, sfp); } } static void CollectFeaturesCallback (SeqFeatPtr sfp, Pointer data) { if (sfp != NULL && data != NULL && sfp->data.choice != SEQFEAT_BIOSRC && sfp->data.choice != SEQFEAT_PUB) { ValNodeAddPointer ((ValNodePtr PNTR) data, OBJ_SEQFEAT, sfp); } } static void CollectPubDescCallback (SeqDescrPtr sdp, Pointer data) { if (sdp != NULL && sdp->choice == Seq_descr_pub && data != NULL) { ValNodeAddPointer ((ValNodePtr PNTR) data, OBJ_SEQDESC, sdp); } } static void CollectPubFeatCallback (SeqFeatPtr sfp, Pointer data) { if (sfp != NULL && sfp->data.choice == SEQFEAT_PUB) { ValNodeAddPointer ((ValNodePtr PNTR) data, OBJ_SEQFEAT, sfp); } } static void CollectBioseqCallback (BioseqPtr bsp, Pointer data) { if (bsp != NULL && data != NULL) { ValNodeAddPointer ((ValNodePtr PNTR) data, OBJ_BIOSEQ, bsp); } } /* static void CollectNucBioseqCallback (BioseqPtr bsp, Pointer data) { if (bsp != NULL && data != NULL && !ISA_aa (bsp->mol)) { ValNodeAddPointer ((ValNodePtr PNTR) data, OBJ_BIOSEQ, bsp); } } */ static void AddCommentDescriptorDestinationsForBioseq (BioseqPtr bsp, ValNodePtr PNTR dest_list) { SeqDescrPtr sdp; SeqMgrDescContext context; Boolean found = FALSE; ObjValNodePtr ovp; if (bsp == NULL || dest_list == NULL) { return; } for (sdp = SeqMgrGetNextDescriptor (bsp, NULL, Seq_descr_comment, &context); sdp != NULL; sdp = SeqMgrGetNextDescriptor (bsp, sdp, Seq_descr_comment, &context)) { ValNodeAddPointer (dest_list, OBJ_SEQDESC, sdp); found = TRUE; } if (!found) { /* if no existing comment descriptor, create one, marked for delete. * unmark it for deletion when it gets populated. */ sdp = CreateNewDescriptorOnBioseq (bsp, Seq_descr_comment); sdp->data.ptrvalue = StringSave (""); ovp = (ObjValNodePtr) sdp; ovp->idx.deleteme = TRUE; ValNodeAddPointer (dest_list, OBJ_SEQDESC, sdp); } } static ValNodePtr CollectCommentDescriptors (SeqEntryPtr sep) { ValNodePtr seq_list = NULL, vnp, desc_list = NULL; if (sep == NULL) { return NULL; } /* VisitBioseqsInSep (sep, &seq_list, CollectNucBioseqCallback); */ seq_list = CollectNucBioseqs (sep); for (vnp = seq_list; vnp != NULL; vnp = vnp->next) { AddCommentDescriptorDestinationsForBioseq (vnp->data.ptrvalue, &desc_list); } seq_list = ValNodeFree (seq_list); return desc_list; } static void CollectStructuredCommentsCallback (SeqDescrPtr sdp, Pointer data) { UserObjectPtr uop; if (sdp != NULL && data != NULL && sdp->choice == Seq_descr_user && (uop = sdp->data.ptrvalue) != NULL && IsUserObjectStructuredComment (uop)) { ValNodeAddPointer ((ValNodePtr PNTR) data, OBJ_SEQDESC, sdp); } } static ValNodePtr CollectDBLinkDescriptors (SeqEntryPtr sep) { ValNodePtr seq_list = NULL, vnp, desc_list = NULL; if (sep == NULL) { return NULL; } /* VisitBioseqsInSep (sep, &seq_list, CollectNucBioseqCallback); */ seq_list = CollectNucBioseqs (sep); for (vnp = seq_list; vnp != NULL; vnp = vnp->next) { AddDBLinkDescriptorDestinationsForBioseq (vnp->data.ptrvalue, &desc_list); } seq_list = ValNodeFree (seq_list); return desc_list; } static void AddDeflineDescriptorDestinationsForBioseq (BioseqPtr bsp, ValNodePtr PNTR dest_list) { SeqDescrPtr sdp; SeqMgrDescContext context; Boolean found = FALSE; ObjValNodePtr ovp; if (bsp == NULL || dest_list == NULL) { return; } for (sdp = SeqMgrGetNextDescriptor (bsp, NULL, Seq_descr_title, &context); sdp != NULL; sdp = SeqMgrGetNextDescriptor (bsp, sdp, Seq_descr_title, &context)) { ValNodeAddPointer (dest_list, OBJ_SEQDESC, sdp); found = TRUE; } if (!found) { /* if no existing comment descriptor, create one, marked for delete. * unmark it for deletion when it gets populated. */ sdp = CreateNewDescriptorOnBioseq (bsp, Seq_descr_title); sdp->data.ptrvalue = StringSave (""); ovp = (ObjValNodePtr) sdp; ovp->idx.deleteme = TRUE; ValNodeAddPointer (dest_list, OBJ_SEQDESC, sdp); } } static ValNodePtr CollectDeflineDescriptors (SeqEntryPtr sep) { ValNodePtr seq_list = NULL, vnp, desc_list = NULL; if (sep == NULL) { return NULL; } /* VisitBioseqsInSep (sep, &seq_list, CollectNucBioseqCallback); */ seq_list = CollectNucBioseqs (sep); for (vnp = seq_list; vnp != NULL; vnp = vnp->next) { AddDeflineDescriptorDestinationsForBioseq (vnp->data.ptrvalue, &desc_list); } seq_list = ValNodeFree (seq_list); return desc_list; } static void AddGenbankBlockDescriptorDestinationsForBioseq (BioseqPtr bsp, ValNodePtr PNTR dest_list) { SeqDescrPtr sdp; SeqMgrDescContext context; Boolean found = FALSE; ObjValNodePtr ovp; if (bsp == NULL || dest_list == NULL) { return; } for (sdp = SeqMgrGetNextDescriptor (bsp, NULL, Seq_descr_genbank, &context); sdp != NULL; sdp = SeqMgrGetNextDescriptor (bsp, sdp, Seq_descr_genbank, &context)) { ValNodeAddPointer (dest_list, OBJ_SEQDESC, sdp); found = TRUE; } if (!found) { /* if no existing comment descriptor, create one, marked for delete. * unmark it for deletion when it gets populated. */ sdp = CreateNewDescriptorOnBioseq (bsp, Seq_descr_genbank); sdp->data.ptrvalue = GBBlockNew (); ovp = (ObjValNodePtr) sdp; ovp->idx.deleteme = TRUE; ValNodeAddPointer (dest_list, OBJ_SEQDESC, sdp); } } static ValNodePtr CollectGenbankBlockDescriptors (SeqEntryPtr sep) { ValNodePtr seq_list = NULL, vnp, desc_list = NULL; if (sep == NULL) { return NULL; } /* VisitBioseqsInSep (sep, &seq_list, CollectNucBioseqCallback); */ seq_list = CollectNucBioseqs (sep); for (vnp = seq_list; vnp != NULL; vnp = vnp->next) { AddGenbankBlockDescriptorDestinationsForBioseq (vnp->data.ptrvalue, &desc_list); } seq_list = ValNodeFree (seq_list); return desc_list; } static void CollectDblinkCallback (SeqDescPtr sdp, Pointer data) { UserObjectPtr uop; if (sdp == NULL || data == NULL || sdp->choice != Seq_descr_user || (uop = (UserObjectPtr)sdp->data.ptrvalue) == NULL || uop->type == NULL || StringCmp (uop->type->str, "DBLink") != 0) { return; } else { ValNodeAddPointer ((ValNodePtr PNTR) data, OBJ_SEQDESC, sdp); } } NLM_EXTERN ValNodePtr GetObjectListForFieldType (Uint1 field_type, SeqEntryPtr sep) { ValNodePtr object_list = NULL; Uint2 entityID; switch (field_type) { case FieldType_source_qual: VisitDescriptorsInSep (sep, &object_list, CollectBioSourceDescCallback); VisitFeaturesInSep (sep, &object_list, CollectBioSourceFeatCallback); break; case FieldType_cds_gene_prot: entityID = ObjMgrGetEntityIDForChoice(sep); object_list = BuildCGPSetList (entityID, NULL, NULL); break; case FieldType_feature_field: VisitFeaturesInSep (sep, &object_list, CollectFeaturesCallback); break; case FieldType_molinfo_field: VisitBioseqsInSep (sep, &object_list, CollectBioseqCallback); break; case FieldType_pub: VisitDescriptorsInSep (sep, &object_list, CollectPubDescCallback); VisitFeaturesInSep (sep, &object_list, CollectPubFeatCallback); break; case FieldType_rna_field: VisitFeaturesInSep (sep, &object_list, CollectFeaturesCallback); break; case FieldType_struc_comment_field: VisitDescriptorsInSep (sep, &object_list, CollectStructuredCommentsCallback); break; case FieldType_misc: /* VisitBioseqsInSep (sep, &object_list, CollectNucBioseqCallback); */ object_list = CollectNucBioseqs (sep); ValNodeLink (&object_list, CollectCommentDescriptors (sep)); break; case FieldType_dblink: VisitDescriptorsInSep (sep, &object_list, CollectDblinkCallback); break; } return object_list; } typedef struct seqcollector { ValNodePtr object_list; ConstraintChoiceSetPtr csp; } SeqCollectorData, PNTR SeqCollectorPtr; static void SeqCollectorCallback (BioseqPtr bsp, Pointer data) { SeqCollectorPtr s; if ((s = (SeqCollectorPtr) data) == NULL) { return; } if (DoesObjectMatchConstraintChoiceSet (OBJ_BIOSEQ, bsp, s->csp)) { ValNodeAddPointer (&(s->object_list), OBJ_BIOSEQ, bsp); } } NLM_EXTERN ValNodePtr GetSequenceListForConstraint (SeqEntryPtr sep, ConstraintChoiceSetPtr csp) { SeqCollectorData s; MemSet (&s, 0, sizeof (SeqCollectorData)); s.csp = csp; VisitBioseqsInSep (sep, &s, SeqCollectorCallback); return s.object_list; } NLM_EXTERN ValNodePtr GetFieldListForFieldType (Uint1 field_type, SeqEntryPtr sep) { ValNodePtr fields = NULL; /* get a list of the fields that are appropriate for the objects collected */ switch (field_type) { case FieldType_cds_gene_prot: fields = MakeCDSGeneProtFieldTypeList (); break; case FieldType_source_qual: fields = GetSourceQualSampleFieldList (sep); break; case FieldType_feature_field: fields = GetFeatureQualFieldList (sep); break; case FieldType_molinfo_field: fields = MakeSequenceQualFieldTypeList (); break; case FieldType_pub: fields = MakePubFieldTypeList (); break; case FieldType_rna_field: fields = GetRnaQualFieldList (sep); break; case FieldType_struc_comment_field: fields = GetStructuredCommentFieldList (sep); break; case FieldType_misc: ValNodeAddInt (&fields, FieldType_misc, Misc_field_genome_project_id); ValNodeAddInt (&fields, FieldType_misc, Misc_field_comment_descriptor); ValNodeAddInt (&fields, FieldType_misc, Misc_field_defline); ValNodeAddInt (&fields, FieldType_misc, Misc_field_keyword); break; case FieldType_dblink: ValNodeAddInt (&fields, FieldType_dblink, DBLink_field_type_trace_assembly); ValNodeAddInt (&fields, FieldType_dblink, DBLink_field_type_bio_sample); ValNodeAddInt (&fields, FieldType_dblink, DBLink_field_type_probe_db); ValNodeAddInt (&fields, FieldType_dblink, DBLink_field_type_sequence_read_archve); ValNodeAddInt (&fields, FieldType_dblink, DBLink_field_type_bio_project); break; } return fields; } NLM_EXTERN ValNodePtr GetAECRSampleListForSeqEntry (Uint1 field_type, SeqEntryPtr sep) { ValNodePtr object_list; ValNodePtr fields = NULL, vnp; ValNodePtr list = NULL; AECRSamplePtr sample; BatchExtraPtr batch_extra; object_list = GetObjectListForFieldType (field_type, sep); /* get a list of the fields that are appropriate for the objects collected */ fields = GetFieldListForFieldType (field_type, sep); batch_extra = BatchExtraNew (); for (vnp = fields; vnp != NULL; vnp = vnp->next) { InitBatchExtraForField (batch_extra, vnp, sep); } for (vnp = fields; vnp != NULL; vnp = vnp->next) { sample = GetAECRSampleFromObjectListEx (object_list, vnp, batch_extra); if (sample != NULL && sample->num_found > 0) { ValNodeAddPointer (&list, 0, sample); } else { sample = AECRSampleFree (sample); } } batch_extra = BatchExtraFree (batch_extra); fields = FieldTypeListFree (fields); object_list = FreeObjectList (object_list); return list; } NLM_EXTERN ValNodePtr GetAECRSampleList (AECRActionPtr act, SeqEntryPtr sep) { Uint1 field_type; Uint2 entityID; ValNodePtr object_list; ValNodePtr fields = NULL, vnp; ValNodePtr list = NULL; AECRSamplePtr sample; BatchExtraPtr batch_extra; batch_extra = BatchExtraNew (); InitBatchExtraForAECRAction (batch_extra, act, sep); field_type = FieldTypeFromAECRAction (act); if (field_type == FieldType_cds_gene_prot) { entityID = ObjMgrGetEntityIDForChoice(sep); object_list = BuildCGPSetList (entityID, act, NULL); } else { object_list = GetObjectListForAECRActionEx (sep, act, batch_extra); } /* get fields used in action */ fields = GetFieldTypeListFromAECRAction (act); for (vnp = fields; vnp != NULL; vnp = vnp->next) { sample = GetAECRSampleFromObjectListEx (object_list, vnp, batch_extra); if (sample != NULL && sample->num_found > 0) { ValNodeAddPointer (&list, 0, sample); } else { sample = AECRSampleFree (sample); } } fields = FieldTypeListFree (fields); batch_extra = BatchExtraFree (batch_extra); DeleteMarkedObjects (ObjMgrGetEntityIDForChoice(sep), 0, NULL); FreeObjectList (object_list); return list; } NLM_EXTERN AECRSamplePtr GetFieldSampleFromList (ValNodePtr list, FieldTypePtr field) { AECRSamplePtr sample = NULL; while (list != NULL && sample == NULL) { sample = list->data.ptrvalue; if (sample != NULL && !DoFieldTypesMatch (sample->field, field)) { sample = NULL; } list = list->next; } return sample; } static void RemoveFieldsForWhichThereAreNoData (ValNodePtr PNTR field_list, ValNodePtr object_list) { ValNodePtr vnp_prev = NULL, vnp_f, vnp_next; AECRSamplePtr sample; if (field_list == NULL || *field_list == NULL) { return; } vnp_prev = NULL; vnp_f = *field_list; while (vnp_f != NULL) { vnp_next = vnp_f->next; if (vnp_f->choice == FieldType_source_qual || vnp_f->choice == FieldType_feature_field || vnp_f->choice == FieldType_rna_field) { vnp_prev = vnp_f; } else { sample = GetAECRSampleFromObjectList (object_list, vnp_f); if (sample == NULL || sample->num_found == 0) { if (vnp_prev == NULL) { *field_list = vnp_next; } else { vnp_prev->next = vnp_next; } vnp_f->next = NULL; vnp_f = FieldTypeFree (vnp_f); } else { vnp_prev = vnp_f; } sample = AECRSampleFree (sample); } vnp_f = vnp_next; } } NLM_EXTERN void GetAECRExistingTextList (Uint1 field_type, SeqEntryPtr sep, FILE *fp) { ValNodePtr object_list, vnp_f, vnp_o; ValNodePtr fields = NULL; BioseqPtr bsp; Char id_buf[255]; CharPtr txt1 = NULL; object_list = GetObjectListForFieldType (field_type, sep); /* get a list of the fields that are appropriate for the objects collected */ fields = GetFieldListForFieldType (field_type, sep); /* remove fields for which there is no data */ RemoveFieldsForWhichThereAreNoData (&fields, object_list); /* add header */ fprintf (fp, "Accession"); for (vnp_f = fields; vnp_f != NULL; vnp_f = vnp_f->next) { txt1 = SummarizeFieldType (vnp_f); fprintf (fp, "\t%s", txt1); txt1 = MemFree (txt1); } fprintf (fp, "\n"); for (vnp_o = object_list; vnp_o != NULL; vnp_o = vnp_o->next) { bsp = GetSequenceForObject (vnp_o->choice, vnp_o->data.ptrvalue); if (bsp == NULL) { id_buf[0] = 0; } else { SeqIdWrite (SeqIdFindBest (bsp->id, SEQID_GENBANK), id_buf, PRINTID_REPORT, sizeof (id_buf) - 1); } fprintf (fp, "%s", id_buf); for (vnp_f = fields; vnp_f != NULL; vnp_f = vnp_f->next) { txt1 = GetFieldValueForObject (vnp_o->choice, vnp_o->data.ptrvalue, vnp_f, NULL); fprintf (fp, "\t%s", txt1 == NULL ? "" : txt1); txt1 = MemFree (txt1); } fprintf (fp, "\n"); } fields = FieldTypeListFree (fields); object_list = FreeObjectList (object_list); } static void InsertBlanksInRow (ValNodePtr row, Int4 insert_pos, Int4Ptr num_field_per_pos, Int4 num_blanks) { ValNodePtr vnp, prev, vnp_blank; Int4 pos = 0, skip; /* first, skip accession */ prev = row; vnp = row->next; while (vnp != NULL && pos <= insert_pos) { for (skip = 0; skip < num_field_per_pos[pos] && vnp != NULL; skip++, vnp = vnp->next) { prev = vnp; } pos++; } for (skip = 0; skip < num_blanks; skip++) { vnp_blank = ValNodeNew (NULL); vnp_blank->next = prev->next; prev->next = vnp_blank; } } static void AddListToTabTable (ValNodePtr vals, ValNodePtr text_table, ValNodePtr this_row, Int4 pos, Int4Ptr num_field_per_pos) { Int4 num_new_fields; ValNodePtr vnp; num_new_fields = ValNodeLen (vals); if (num_new_fields > num_field_per_pos[pos]) { /* go back and insert blanks in all the previous rows */ for (vnp = text_table; vnp != NULL; vnp = vnp->next) { InsertBlanksInRow (vnp->data.ptrvalue, pos, num_field_per_pos, num_new_fields - num_field_per_pos[pos]); } num_field_per_pos[pos] = num_new_fields; } ValNodeLink (&this_row, vals); while (num_new_fields < num_field_per_pos[pos]) { ValNodeAddPointer (&this_row, 0, NULL); num_new_fields++; } } static ValNodePtr StartRowWithSourceFields (CharPtr id, BioseqPtr bsp, ValNodePtr src_field_list, Int4Ptr num_field_per_pos, ValNodePtr text_table) { ValNodePtr text_row = NULL; SeqDescPtr sdp; ValNodePtr vals, vnp_f; Int4 pos; SeqMgrDescContext context; /* add accession */ ValNodeAddPointer (&text_row, 0, StringSave (id)); /* add source fields */ if (src_field_list != NULL) { sdp = SeqMgrGetNextDescriptor (bsp, NULL, Seq_descr_source, &context); for (vnp_f = src_field_list, pos = 0; vnp_f != NULL; vnp_f = vnp_f->next, pos++) { vals = GetMultipleFieldValuesForObject (OBJ_SEQDESC, sdp, vnp_f, NULL, NULL); AddListToTabTable (vals, text_table, text_row, pos, num_field_per_pos); } } return text_row; } NLM_EXTERN void ExportFieldTable (Uint1 field_type, ValNodePtr src_field_list, SeqEntryPtr sep, FILE *fp) { ValNodePtr object_list, vnp_f, vnp_o; ValNodePtr fields = NULL; ValNodePtr text_table = NULL, text_row; BioseqPtr bsp; Char id_buf[255]; CharPtr txt1 = NULL, title; SeqDescrPtr pub_sdp; SeqMgrDescContext pub_context; Int4 num_orig_fields; Int4Ptr num_field_per_pos; Int4 pos, i; if (field_type == 0) { object_list = GetObjectListForFieldType (FieldType_source_qual, sep); } else if (field_type == FieldType_misc) { object_list = CollectDeflineDescriptors (sep); ValNodeAddInt (&fields, FieldType_misc, Misc_field_defline); } else if (field_type == FieldType_pub) { object_list = GetObjectListForFieldType (FieldType_source_qual, sep); /* only get publication titles */ ValNodeAddInt (&fields, FieldType_pub, Publication_field_title); } else { object_list = GetObjectListForFieldType (field_type, sep); /* get a list of the fields that are appropriate for the objects collected */ fields = GetFieldListForFieldType (field_type, sep); /* remove fields for which there is no data */ RemoveFieldsForWhichThereAreNoData (&fields, object_list); } num_orig_fields = ValNodeLen (src_field_list); num_field_per_pos = (Int4Ptr) MemNew (sizeof (Int4) * num_orig_fields); for (pos = 0; pos < num_orig_fields; pos++) { num_field_per_pos[pos] = 1; } /* get text table */ for (vnp_o = object_list; vnp_o != NULL; vnp_o = vnp_o->next) { bsp = GetSequenceForObject (vnp_o->choice, vnp_o->data.ptrvalue); if (bsp != NULL) { /* first column is accession */ SeqIdWrite (SeqIdFindBest (bsp->id, SEQID_GENBANK), id_buf, PRINTID_REPORT, sizeof (id_buf) - 1); if (field_type == FieldType_pub) { for (pub_sdp = SeqMgrGetNextDescriptor (bsp, NULL, Seq_descr_pub, &pub_context); pub_sdp != NULL; pub_sdp = SeqMgrGetNextDescriptor (bsp, pub_sdp, Seq_descr_pub, &pub_context)) { /* Get Publication Title */ title = GetFieldValueForObject (OBJ_SEQDESC, pub_sdp, fields, NULL); if (!StringHasNoText (title)) { text_row = StartRowWithSourceFields (id_buf, bsp, src_field_list, num_field_per_pos, text_table); /* add publication title */ ValNodeAddPointer (&text_row, 0, title); /* add row to table */ ValNodeAddPointer (&text_table, 0, text_row); } title = MemFree (title); } } else { text_row = StartRowWithSourceFields (id_buf, bsp, src_field_list, num_field_per_pos, text_table); /* get requested fields */ for (vnp_f = fields; vnp_f != NULL; vnp_f = vnp_f->next) { txt1 = GetFieldValueForObject (vnp_o->choice, vnp_o->data.ptrvalue, vnp_f, NULL); ValNodeAddPointer (&text_row, 0, txt1); } /* add row to table */ ValNodeAddPointer (&text_table, 0, text_row); } } } /* add header */ /* accession is first column */ fprintf (fp, "Accession"); /* list source fields first */ for (vnp_f = src_field_list, pos = 0; vnp_f != NULL; vnp_f = vnp_f->next, pos++) { txt1 = SummarizeFieldType (vnp_f); for (i = 0; i < num_field_per_pos[pos]; i++) { fprintf (fp, "\t%s", txt1); } txt1 = MemFree (txt1); } /* list fields */ for (vnp_f = fields; vnp_f != NULL; vnp_f = vnp_f->next) { txt1 = SummarizeFieldType (vnp_f); fprintf (fp, "\t%s", txt1); txt1 = MemFree (txt1); } fprintf (fp, "\n"); WriteTabTableToFile (text_table, fp); FreeTabTable(text_table); fields = FieldTypeListFree (fields); object_list = FreeObjectList (object_list); num_field_per_pos = MemFree (num_field_per_pos); } /* This section handles parsing where the source field and destination field may not be on the same * group of objects. */ typedef struct parsesourceinfo { BioseqPtr bsp; SeqFeatPtr sfp; SeqDescrPtr sdp; SeqIdPtr sip; ValNodePtr dest_list; CharPtr parse_src_txt; } ParseSourceInfoData, PNTR ParseSourceInfoPtr; static ParseSourceInfoPtr ParseSourceInfoNew (BioseqPtr bsp, SeqFeatPtr sfp, SeqDescrPtr sdp, SeqIdPtr sip, CharPtr parse_src_txt) { ParseSourceInfoPtr psip; psip = (ParseSourceInfoPtr) MemNew (sizeof (ParseSourceInfoData)); if (psip != NULL) { psip->bsp = bsp; psip->sdp = sdp; psip->sfp = sfp; psip->sip = sip; psip->dest_list = NULL; psip->parse_src_txt = parse_src_txt; } return psip; } static ParseSourceInfoPtr ParseSourceInfoFree (ParseSourceInfoPtr psip) { if (psip != NULL) { psip->dest_list = ValNodeFree (psip->dest_list); psip->parse_src_txt = MemFree (psip->parse_src_txt); psip = MemFree (psip); } return psip; } static ParseSourceInfoPtr ParseSourceInfoCopy (ParseSourceInfoPtr psip) { ParseSourceInfoPtr pcopy = NULL; if (psip != NULL) { pcopy = (ParseSourceInfoPtr) MemNew (sizeof (ParseSourceInfoData)); if (pcopy != NULL) { pcopy->bsp = psip->bsp; pcopy->sfp = psip->sfp; pcopy->sdp = psip->sdp; pcopy->sip = psip->sip; pcopy->dest_list = NULL; pcopy->parse_src_txt = NULL; } } return pcopy; } static ValNodePtr ParseSourceListFree (ValNodePtr vnp) { ValNodePtr vnp_next; while (vnp != NULL) { vnp_next = vnp->next; vnp->next = NULL; vnp->data.ptrvalue = ParseSourceInfoFree (vnp->data.ptrvalue); vnp = ValNodeFree (vnp); vnp = vnp_next; } return vnp; } static void GetDeflineSourcesForBioseq (BioseqPtr bsp, TextPortionPtr portion, ValNodePtr PNTR source_list) { SeqDescrPtr sdp; SeqMgrDescContext dcontext; CharPtr str; ParseSourceInfoPtr psip; if (bsp == NULL || source_list == NULL) { return; } sdp = SeqMgrGetNextDescriptor (bsp, NULL, Seq_descr_title, &dcontext); while (sdp != NULL) { str = GetTextPortionFromString (sdp->data.ptrvalue, portion); if (str != NULL) { psip = ParseSourceInfoNew (bsp, NULL, sdp, NULL, str); if (psip != NULL) { ValNodeAddPointer (source_list, 0, psip); } else { str = MemFree (str); } } sdp = SeqMgrGetNextDescriptor (bsp, sdp, Seq_descr_title, &dcontext); } } static CharPtr GetIDSrc (SeqIdPtr sip, Uint1 id_type, CharPtr tag) { DbtagPtr dbt = NULL; ObjectIdPtr oip = NULL; Char id_str[128]; CharPtr str_src = NULL; if (sip == NULL || sip->choice != id_type) return NULL; if (id_type == SEQID_GENERAL) { dbt = (DbtagPtr) sip->data.ptrvalue; if (dbt == NULL || (tag != NULL && StringCmp (dbt->db, tag) != 0)) return NULL; oip = dbt->tag; } else if (id_type == SEQID_LOCAL) { oip = sip->data.ptrvalue; } if (oip == NULL) { SeqIdWrite (sip, id_str, PRINTID_REPORT, sizeof (id_str)); str_src = StringSave (id_str); } else { if (oip->str == NULL) { sprintf (id_str, "%d", oip->id); str_src = StringSave (id_str); } else { str_src = StringSave (oip->str); } } return str_src; } static void GetIDSourcesForBioseq (BioseqPtr bsp, TextPortionPtr portion, Uint1 id_type, CharPtr tag, ValNodePtr PNTR source_list) { SeqIdPtr sip; ParseSourceInfoPtr psip; CharPtr src_str = NULL, str; if (bsp == NULL || source_list == NULL) { return; } sip = bsp->id; while (sip != NULL) { if ((src_str = GetIDSrc (sip, id_type, tag)) != NULL) { str = GetTextPortionFromString (src_str, portion); if (str != NULL) { psip = ParseSourceInfoNew (bsp, NULL, NULL, sip, str); if (psip != NULL) { ValNodeAddPointer (source_list, 0, psip); } else { str = MemFree (str); } } src_str = MemFree (src_str); } sip = sip->next; } } static void GetLocalIDSourcesForBioseq (BioseqPtr bsp, TextPortionPtr tp, ValNodePtr PNTR source_list) { GetIDSourcesForBioseq (bsp, tp, SEQID_LOCAL, NULL, source_list); } static void GetNcbiFileSourceForBioseq (BioseqPtr bsp, TextPortionPtr tp, ValNodePtr PNTR source_list) { GetIDSourcesForBioseq (bsp, tp, SEQID_GENERAL, "NCBIFILE", source_list); } static void GetGeneralIdTextSourcesForBioseq (BioseqPtr bsp, Boolean db_only, TextPortionPtr portion, ValNodePtr PNTR source_list) { SeqIdPtr sip; ParseSourceInfoPtr psip; DbtagPtr dbtag; CharPtr src_str = NULL, str; if (bsp == NULL || source_list == NULL) { return; } for (sip = bsp->id; sip != NULL; sip = sip->next) { if (sip->choice == SEQID_GENERAL && (dbtag = (DbtagPtr) sip->data.ptrvalue) != NULL) { if (db_only) { str = GetTextPortionFromString (dbtag->db, portion); } else { src_str = GetDbtagString (dbtag); str = GetTextPortionFromString (src_str, portion); src_str = MemFree (src_str); } if (str != NULL) { psip = ParseSourceInfoNew (bsp, NULL, NULL, sip, str); if (psip != NULL) { ValNodeAddPointer (source_list, 0, psip); } else { str = MemFree (str); } } } } } static void GetGeneralIDSourcesForBioseq (BioseqPtr bsp, ValNodePtr general_id, TextPortionPtr tp, ValNodePtr PNTR source_list) { if (general_id == NULL) { return; } switch (general_id->choice) { case ParseSrcGeneralId_whole_text: GetGeneralIdTextSourcesForBioseq (bsp, FALSE, tp, source_list); break; case ParseSrcGeneralId_db: GetGeneralIdTextSourcesForBioseq (bsp, TRUE, tp, source_list); break; case ParseSrcGeneralId_tag: if (StringHasNoText (general_id->data.ptrvalue)) { GetIDSourcesForBioseq (bsp, tp, SEQID_GENERAL, NULL, source_list); } else { GetIDSourcesForBioseq (bsp, tp, SEQID_GENERAL, general_id->data.ptrvalue, source_list); } break; default: break; } } static void StripBankitCommentForParse (SeqDescrPtr sdp, TextPortionPtr tp) { UserObjectPtr uop; ObjectIdPtr oip; UserFieldPtr ufp; if (sdp == NULL || sdp->choice != Seq_descr_user || tp == NULL) { return; } /* Bankit Comments */ uop = (UserObjectPtr) sdp->data.ptrvalue; if (uop != NULL && StringCmp (uop->_class, "SMART_V1.0") != 0) { oip = uop->type; if (oip != NULL && StringCmp (oip->str, "Submission") == 0) { for (ufp = uop->data; ufp != NULL; ufp = ufp->next) { oip = ufp->label; if (oip != NULL && StringCmp (oip->str, "AdditionalComment") == 0) { ReplaceStringForParse (ufp->data.ptrvalue, tp); } } } } } static void StripStructuredCommentForParse (SeqDescrPtr sdp, CharPtr comment_field, TextPortionPtr tp) { UserObjectPtr uop; ObjectIdPtr oip; UserFieldPtr ufp; if (sdp == NULL || sdp->choice != Seq_descr_user || tp == NULL || StringHasNoText (comment_field)) { return; } uop = (UserObjectPtr) sdp->data.ptrvalue; if (IsUserObjectStructuredComment (uop)) { for (ufp = uop->data; ufp != NULL; ufp = ufp->next) { oip = ufp->label; if (oip != NULL && StringCmp (oip->str, comment_field) == 0) { ReplaceStringForParse (ufp->data.ptrvalue, tp); } } } } static void GetBankitCommentSourcesForBioseq (BioseqPtr bsp, TextPortionPtr tp, ValNodePtr PNTR source_list) { SeqDescrPtr sdp; SeqMgrDescContext dcontext; ParseSourceInfoPtr psip; UserObjectPtr uop; ObjectIdPtr oip; UserFieldPtr ufp; CharPtr str = NULL; if (bsp == NULL || source_list == NULL) { return; } sdp = SeqMgrGetNextDescriptor (bsp, NULL, Seq_descr_user, &dcontext); while (sdp != NULL) { if (sdp->extended != 0) { /* Bankit Comments */ uop = (UserObjectPtr) sdp->data.ptrvalue; if (uop != NULL && StringCmp (uop->_class, "SMART_V1.0") != 0) { oip = uop->type; if (oip != NULL && StringCmp (oip->str, "Submission") == 0) { for (ufp = uop->data; ufp != NULL; ufp = ufp->next) { oip = ufp->label; if (oip != NULL && StringCmp (oip->str, "AdditionalComment") == 0) { str = GetTextPortionFromString (ufp->data.ptrvalue, tp); if (str != NULL) { psip = ParseSourceInfoNew (bsp, NULL, sdp, NULL, str); if (psip == NULL) { str = MemFree (str); } else { ValNodeAddPointer (source_list, 0, psip); } } } } } } } sdp = SeqMgrGetNextDescriptor (bsp, sdp, Seq_descr_user, &dcontext); } } static void GetCommentSourcesForBioseq (BioseqPtr bsp, TextPortionPtr tp, ValNodePtr PNTR source_list) { SeqDescrPtr sdp; SeqFeatPtr sfp; SeqMgrFeatContext fcontext; SeqMgrDescContext dcontext; ParseSourceInfoPtr psip; CharPtr str; if (bsp == NULL || source_list == NULL) { return; } sdp = SeqMgrGetNextDescriptor (bsp, NULL, Seq_descr_comment, &dcontext); while (sdp != NULL) { str = GetTextPortionFromString (sdp->data.ptrvalue, tp); if (str != NULL) { psip = ParseSourceInfoNew (bsp, NULL, sdp, NULL, str); if (psip == NULL) { str = MemFree (str); } else { ValNodeAddPointer (source_list, 0, psip); } } sdp = SeqMgrGetNextDescriptor (bsp, sdp, Seq_descr_comment, &dcontext); } sfp = SeqMgrGetNextFeature (bsp, NULL, SEQFEAT_COMMENT, 0, &fcontext); while (sfp != NULL) { str = GetTextPortionFromString (sfp->data.value.ptrvalue, tp); if (str != NULL) { psip = ParseSourceInfoNew (bsp, sfp, NULL, NULL, str); if (psip == NULL) { str = MemFree (str); } else { ValNodeAddPointer (source_list, 0, psip); } } sfp = SeqMgrGetNextFeature (bsp, sfp, SEQFEAT_COMMENT, 0, &fcontext); } GetBankitCommentSourcesForBioseq (bsp, tp, source_list); } static void GetStructuredCommentSourcesForBioseq (BioseqPtr bsp, TextPortionPtr tp, CharPtr comment_field, ValNodePtr PNTR source_list) { SeqDescrPtr sdp; UserObjectPtr uop; ObjectIdPtr oip; UserFieldPtr ufp; SeqMgrDescContext dcontext; CharPtr str; ParseSourceInfoPtr psip; if (bsp == NULL || source_list == NULL) { return; } sdp = SeqMgrGetNextDescriptor (bsp, NULL, Seq_descr_user, &dcontext); while (sdp != NULL) { if (sdp->extended != 0 && sdp->data.ptrvalue != NULL) { uop = (UserObjectPtr) sdp->data.ptrvalue; if (IsUserObjectStructuredComment (uop)) { for (ufp = uop->data; ufp != NULL; ufp = ufp->next) { oip = ufp->label; if (oip != NULL && StringCmp (oip->str, comment_field) == 0) { str = GetTextPortionFromString (ufp->data.ptrvalue, tp); if (str != NULL) { psip = ParseSourceInfoNew (bsp, NULL, sdp, NULL, str); if (psip == NULL) { str = MemFree (str); } else { ValNodeAddPointer (source_list, 0, psip); } } } } } } sdp = SeqMgrGetNextDescriptor (bsp, sdp, Seq_descr_user, &dcontext); } } static void GetFlatFileSourcesForBioseq (BioseqPtr bsp, TextPortionPtr tp, ValNodePtr PNTR source_list) { SeqEntryPtr sep; Asn2gbJobPtr ajp; Int4 index; ErrSev level; CharPtr string, str; ParseSourceInfoPtr psip; if (bsp == NULL || source_list == NULL) { return; } sep = SeqMgrGetSeqEntryForData (bsp); if (sep == NULL) { return; } level = ErrSetMessageLevel (SEV_MAX); ajp = asn2gnbk_setup (bsp, NULL, NULL, (FmtType)GENBANK_FMT, SEQUIN_MODE, NORMAL_STYLE, 0, 0, 0, NULL); if (ajp != NULL) { for (index = 0; index < ajp->numParagraphs; index++) { string = asn2gnbk_format (ajp, (Int4) index); if (string != NULL && *string != '\0') { CompressSpaces (string); str = GetTextPortionFromString (string, tp); if (str != NULL) { psip = ParseSourceInfoNew (bsp, NULL, NULL, NULL, str); if (psip == NULL) { str = MemFree (str); } else { ValNodeAddPointer (source_list, 0, psip); } } } MemFree (string); } asn2gnbk_cleanup (ajp); } ErrSetMessageLevel (level); } const CharPtr nomial_keywords[] = { "f. sp. ", "var.", "pv.", "bv.", "serovar", "subsp." }; const Int4 num_nomial_keywords = sizeof(nomial_keywords) / sizeof (CharPtr); static CharPtr GetTextAfterNomial (CharPtr taxname) { CharPtr ptr, nomial_end; Int4 i; Boolean found_keyword = TRUE; ptr = StringChr (taxname, ' '); if (ptr == NULL) return NULL; /* skip over the first word and the spaces after it. */ while (*ptr == ' ') { ptr++; } ptr = StringChr (ptr, ' '); /* if there are only two words, give up. */ if (ptr == NULL) { return NULL; } nomial_end = ptr; while (*ptr == ' ') { ptr++; } while (found_keyword) { found_keyword = FALSE; /* if the next word is a nomial keyword, skip that plus the first word that follows it. */ for (i = 0; i < num_nomial_keywords && *nomial_end != 0; i++) { if (StringNCmp (ptr, nomial_keywords[i], StringLen(nomial_keywords[i])) == 0) { ptr += StringLen(nomial_keywords[i]); while (*ptr == ' ' ) { ptr++; } nomial_end = StringChr (ptr, ' '); if (nomial_end == NULL) { nomial_end = ptr + StringLen (ptr); } else { ptr = nomial_end; while (*ptr == ' ') { ptr++; } found_keyword = TRUE; } } } } return nomial_end; } static void GetOrgParseSourcesForBioSource (BioSourcePtr biop, BioseqPtr bsp, SeqDescrPtr sdp, SeqFeatPtr sfp, ParseSrcOrgPtr o, TextPortionPtr tp, ValNodePtr PNTR source_list) { CharPtr str = NULL, portion, tmp; ValNode vn; ParseSourceInfoPtr psip; if (biop == NULL || o == NULL || o->field == NULL || source_list == NULL) return; switch (o->field->choice) { case ParseSrcOrgChoice_source_qual : vn.choice = SourceQualChoice_textqual; vn.data.intvalue = o->field->data.intvalue; vn.next = NULL; str = GetSourceQualFromBioSource (biop, &vn, NULL); break; case ParseSrcOrgChoice_taxname_after_binomial : vn.choice = SourceQualChoice_textqual; vn.data.intvalue = Source_qual_taxname; vn.next = NULL; str = GetSourceQualFromBioSource (biop, &vn, NULL); tmp = GetTextAfterNomial (str); tmp = StringSave (tmp); str = MemFree (str); str = tmp; break; } portion = GetTextPortionFromString (str, tp); if (portion != NULL) { psip = ParseSourceInfoNew (bsp, sfp, sdp, NULL, portion); if (psip == NULL) { portion = MemFree (portion); } else { ValNodeAddPointer (source_list, 0, psip); } } str = MemFree (str); } static void GetOrgParseSourcesForBioseq (BioseqPtr bsp, ParseSrcOrgPtr o, TextPortionPtr tp, ValNodePtr PNTR source_list) { SeqDescrPtr sdp; SeqFeatPtr sfp; SeqMgrFeatContext fcontext; SeqMgrDescContext dcontext; if (bsp == NULL || o == NULL || source_list == NULL) return; if (o->type == Object_type_constraint_any || o->type == Object_type_constraint_descriptor) { for (sdp = SeqMgrGetNextDescriptor (bsp, NULL, Seq_descr_source, &dcontext); sdp != NULL; sdp = SeqMgrGetNextDescriptor (bsp, sdp, Seq_descr_source, &dcontext)) { GetOrgParseSourcesForBioSource (sdp->data.ptrvalue, bsp, sdp, NULL, o, tp, source_list); } } if (o->type == Object_type_constraint_any || o->type == Object_type_constraint_feature) { for (sfp = SeqMgrGetNextFeature (bsp, NULL, SEQFEAT_BIOSRC, 0, &fcontext); sfp != NULL; sfp = SeqMgrGetNextFeature (bsp, sfp, SEQFEAT_BIOSRC, 0, &fcontext)) { GetOrgParseSourcesForBioSource (sfp->data.value.ptrvalue, bsp, NULL, sfp, o, tp, source_list); } } } typedef struct parsesrccollection { ParseSrcPtr src; TextPortionPtr portion; ValNodePtr src_list; } ParseSrcCollectionData, PNTR ParseSrcCollectionPtr; static void FindParseSourceBioseqCallback (BioseqPtr bsp, Pointer userdata) { ParseSrcCollectionPtr psp; if (bsp == NULL || ISA_aa (bsp->mol) || userdata == NULL) { return; } psp = (ParseSrcCollectionPtr) userdata; if (psp->src == NULL) return; switch (psp->src->choice) { case ParseSrc_defline: if (!ISA_aa (bsp->mol)) { GetDeflineSourcesForBioseq (bsp, psp->portion, &(psp->src_list)); } break; case ParseSrc_flatfile: GetFlatFileSourcesForBioseq (bsp, psp->portion, &(psp->src_list)); break; case ParseSrc_local_id: if (! ISA_aa (bsp->mol) && bsp->repr != Seq_repr_seg) { GetLocalIDSourcesForBioseq (bsp, psp->portion, &(psp->src_list)); } break; case ParseSrc_file_id: GetNcbiFileSourceForBioseq (bsp, psp->portion, &(psp->src_list)); break; case ParseSrc_general_id: GetGeneralIDSourcesForBioseq (bsp, psp->src->data.ptrvalue, psp->portion, &(psp->src_list)); break; case ParseSrc_org: GetOrgParseSourcesForBioseq (bsp, psp->src->data.ptrvalue, psp->portion, &(psp->src_list)); break; case ParseSrc_comment: GetCommentSourcesForBioseq (bsp, psp->portion, &(psp->src_list)); break; case ParseSrc_structured_comment: GetStructuredCommentSourcesForBioseq(bsp, psp->portion, psp->src->data.ptrvalue, &(psp->src_list)); break; case ParseSrc_bankit_comment: if (!ISA_aa (bsp->mol)) { GetBankitCommentSourcesForBioseq (bsp, psp->portion, &(psp->src_list)); } break; } } static void GetOrgNamesInRecordCallback (BioSourcePtr biop, Pointer userdata) { ValNodePtr PNTR org_names; if (biop == NULL || biop->org == NULL || StringHasNoText (biop->org->taxname) || userdata == NULL) { return; } org_names = (ValNodePtr PNTR) userdata; ValNodeAddPointer (org_names, 0, biop->org->taxname); } static void SetToUpper (CharPtr cp) { if (cp == NULL) return; while (*cp != 0) { if (isalpha (*cp)) { *cp = toupper (*cp); } cp++; } } static void CapitalizeWords (CharPtr string, Boolean punc) { CharPtr cp; Boolean send_upper = TRUE; if (string == NULL) { return; } cp = string; while (*cp != 0) { if (isspace (*cp) || (punc && ispunct (*cp))) { send_upper = TRUE; } else if (isalpha (*cp)) { if (send_upper) { *cp = toupper (*cp); } else { *cp = tolower (*cp); } send_upper = FALSE; } else { send_upper = FALSE; } cp++; } } NLM_EXTERN void FixCapitalizationInString (CharPtr PNTR pTitle, Uint2 capitalization, ValNodePtr org_names) { if (pTitle == NULL || capitalization == Cap_change_none) return; switch (capitalization) { case Cap_change_tolower: ResetCapitalization (FALSE, *pTitle); FixAbbreviationsInElement (pTitle); FixOrgNamesInString (*pTitle, org_names); break; case Cap_change_toupper: SetToUpper (*pTitle); FixAbbreviationsInElement (pTitle); FixOrgNamesInString (*pTitle, org_names); break; case Cap_change_firstcap: ResetCapitalization (TRUE, *pTitle); FixAbbreviationsInElement (pTitle); FixOrgNamesInString (*pTitle, org_names); break; case Cap_change_firstcaprestnochange: if (*pTitle != NULL && isalpha (**pTitle)) { **pTitle = toupper (**pTitle); } break; case Cap_change_firstlower_restnochange: if (*pTitle != NULL && isalpha (**pTitle)) { **pTitle = tolower (**pTitle); } break; case Cap_change_cap_word_space: CapitalizeWords (*pTitle, FALSE); FixAbbreviationsInElement (pTitle); FixOrgNamesInString (*pTitle, org_names); break; case Cap_change_cap_word_space_punc: CapitalizeWords (*pTitle, TRUE); FixAbbreviationsInElement (pTitle); FixOrgNamesInString (*pTitle, org_names); break; } } static void AddDeflineDestinationsForBioseq (BioseqPtr bsp, ValNodePtr PNTR dest_list) { SeqDescrPtr sdp; SeqMgrDescContext dcontext; if (bsp == NULL || dest_list == NULL) { return; } sdp = SeqMgrGetNextDescriptor (bsp, NULL, Seq_descr_title, &dcontext); while (sdp != NULL) { ValNodeAddPointer (dest_list, OBJ_SEQDESC, sdp); sdp = SeqMgrGetNextDescriptor (bsp, sdp, Seq_descr_title, &dcontext); } } static ValNodePtr GetFeatureListForNucleotideBioseq (Uint1 featdef, BioseqPtr bsp); static ValNodePtr GetFeatureListForProteinBioseq (Uint1 featdef, BioseqPtr bsp); static void AddFeatureDestinationsForBioseq (BioseqPtr bsp, FeatureFieldLegalPtr featfield, ValNodePtr PNTR dest_list) { Int4 featdef; if (bsp == NULL || featfield == NULL || dest_list == NULL) return; featdef = GetFeatdefFromFeatureType (featfield->type); if (ISA_aa (bsp->mol)) { ValNodeLink (dest_list, GetFeatureListForProteinBioseq (featdef, bsp)); } else { ValNodeLink (dest_list, GetFeatureListForNucleotideBioseq (featdef, bsp)); } } static void GetBioSourceDestinationsForBioseq (BioseqPtr bsp, Uint2 object_type, ValNodePtr PNTR dest_list) { SeqDescrPtr sdp; SeqFeatPtr sfp; SeqMgrFeatContext fcontext; SeqMgrDescContext dcontext; if (bsp == NULL || dest_list == NULL) { return; } if (object_type == Object_type_constraint_any || object_type == Object_type_constraint_descriptor) { sdp = SeqMgrGetNextDescriptor (bsp, NULL, Seq_descr_source, &dcontext); while (sdp != NULL) { ValNodeAddPointer (dest_list, OBJ_SEQDESC, sdp); sdp = SeqMgrGetNextDescriptor (bsp, sdp, Seq_descr_source, &dcontext); } } if (object_type == Object_type_constraint_any || object_type == Object_type_constraint_feature) { sfp = SeqMgrGetNextFeature (bsp, NULL, SEQFEAT_BIOSRC, 0, &fcontext); while (sfp != NULL) { ValNodeAddPointer (dest_list, OBJ_SEQFEAT, sfp); sfp = SeqMgrGetNextFeature (bsp, sfp, SEQFEAT_BIOSRC, 0, &fcontext); } } } static void AddParseDestinations (ParseSourceInfoPtr psip, ParseDestPtr dst) { ParseDstOrgPtr o; if (psip == NULL || dst == NULL) return; switch (dst->choice) { case ParseDest_defline : AddDeflineDestinationsForBioseq (psip->bsp, &(psip->dest_list)); break; case ParseDest_org : o = (ParseDstOrgPtr) dst->data.ptrvalue; if ((o->type == Object_type_constraint_any || o->type == Object_type_constraint_descriptor) && psip->sdp != NULL && psip->sdp->choice == Seq_descr_source) { ValNodeAddPointer (&(psip->dest_list), OBJ_SEQDESC, psip->sdp); } else if ((o->type == Object_type_constraint_any || o->type == Object_type_constraint_feature) && psip->sfp != NULL && psip->sfp->data.choice == SEQFEAT_BIOSRC) { ValNodeAddPointer (&(psip->dest_list), OBJ_SEQFEAT, psip->sfp); } else { GetBioSourceDestinationsForBioseq (psip->bsp, o->type, &(psip->dest_list)); } break; case ParseDest_featqual : AddFeatureDestinationsForBioseq (psip->bsp, dst->data.ptrvalue, &(psip->dest_list)); break; case ParseDest_comment_descriptor : AddCommentDescriptorDestinationsForBioseq (psip->bsp, &(psip->dest_list)); break; case ParseDest_dbxref : GetBioSourceDestinationsForBioseq (psip->bsp, Object_type_constraint_any, &(psip->dest_list)); break; } } static Boolean SourceHasOneUndeletedDestination (ParseSourceInfoPtr source) { Int4 num_seen = 0; ValNodePtr vnp; if (source == NULL || source->dest_list == NULL) { return FALSE; } vnp = source->dest_list; while (vnp != NULL && num_seen < 2) { if (vnp->choice > 1) { num_seen ++; } vnp = vnp->next; } if (num_seen == 1) { return TRUE; } else { return FALSE; } } static void CombineSourcesForDestinations (ValNodePtr PNTR source_list) { ValNodePtr source1_vnp, source2_vnp, dest1_vnp, dest2_vnp; ValNodePtr source_new, del_vnp; ParseSourceInfoPtr psip1, psip2, new_psip; CharPtr comb_txt; for (source1_vnp = *source_list; source1_vnp != NULL; source1_vnp = source1_vnp->next) { psip1 = (ParseSourceInfoPtr) source1_vnp->data.ptrvalue; if (psip1 == NULL || psip1->dest_list == NULL) { continue; } for (source2_vnp = source1_vnp->next; source2_vnp != NULL; source2_vnp = source2_vnp->next) { if (source2_vnp->choice > 0) { /* already marked for deletion */ continue; } psip2 = (ParseSourceInfoPtr) source2_vnp->data.ptrvalue; if (psip2 == NULL || psip2->dest_list == NULL) { continue; } for (dest1_vnp = psip1->dest_list; dest1_vnp != NULL; dest1_vnp = dest1_vnp->next) { if (dest1_vnp->choice == 0) { /* already marked for deletion */ continue; } for (dest2_vnp = psip2->dest_list; dest2_vnp != NULL; dest2_vnp = dest2_vnp->next) { if (dest2_vnp->choice == 0) { /* already marked for deletion */ continue; } if (dest1_vnp->choice == dest2_vnp->choice && dest1_vnp->data.ptrvalue == dest2_vnp->data.ptrvalue) { comb_txt = (CharPtr) (MemNew (sizeof (Char) * (StringLen (psip1->parse_src_txt) + StringLen (psip2->parse_src_txt) + 2))); StringCpy (comb_txt, psip1->parse_src_txt); StringCat (comb_txt, ";"); StringCat (comb_txt, psip2->parse_src_txt); /* If the first source has a single destination, then we can * add the text from the second source to the first and remove * the destination from the second source. */ if (SourceHasOneUndeletedDestination (psip1)) { psip1->parse_src_txt = MemFree (psip1->parse_src_txt); psip1->parse_src_txt = comb_txt; dest2_vnp->choice = 0; } /* If the first source has more than one destination and * the second source has a single destination, then we can * remove the repeated desination from the first source * and add the text from the first source to the second source. */ else if (SourceHasOneUndeletedDestination (psip2)) { psip2->parse_src_txt = MemFree (psip2->parse_src_txt); psip2->parse_src_txt = comb_txt; dest1_vnp->choice = 0; } /* If the first and second sources have multiple destinations, * we need to remove the repeated destination from both the first * and second source and create a new source with the combined * text for just the repeated destination. */ else { new_psip = ParseSourceInfoNew (NULL, NULL, NULL, NULL, comb_txt); ValNodeAddPointer (&(new_psip->dest_list), dest1_vnp->choice, dest1_vnp->data.ptrvalue); dest1_vnp->choice = 0; dest2_vnp->choice = 0; source_new = ValNodeNew (NULL); source_new->choice = 0; source_new->data.ptrvalue = new_psip; source_new->next = source1_vnp->next; source1_vnp->next = source_new; } } } } del_vnp = ValNodeExtractList (&(psip1->dest_list), 0); del_vnp = ValNodeFree (del_vnp); if (psip1->dest_list == NULL) { source1_vnp->choice = 1; } del_vnp = ValNodeExtractList (&(psip2->dest_list), 0); del_vnp = ValNodeFree (del_vnp); if (psip2->dest_list == NULL) { source2_vnp->choice = 1; } } } /* now remove sources deleted */ del_vnp = ValNodeExtractList (source_list, 1); del_vnp = ParseSourceListFree (del_vnp); } static BioseqSetPtr GetPartsForSourceDescriptorOnSegSet (SeqDescrPtr sdp) { ObjValNodePtr ovp; BioseqSetPtr bssp; SeqEntryPtr sep; if (sdp == NULL || sdp->extended != 1) { return NULL; } ovp = (ObjValNodePtr) sdp; if (ovp->idx.parenttype != OBJ_BIOSEQSET || ovp->idx.parentptr == NULL) { return NULL; } bssp = (BioseqSetPtr) ovp->idx.parentptr; if (bssp->_class == BioseqseqSet_class_nuc_prot && IS_Bioseq_set (bssp->seq_set) && bssp->seq_set->data.ptrvalue != NULL) { bssp = (BioseqSetPtr) bssp->seq_set->data.ptrvalue; } if (bssp->_class == BioseqseqSet_class_segset) { sep = bssp->seq_set; while (sep != NULL) { if (IS_Bioseq_set (sep) && sep->data.ptrvalue != NULL) { bssp = (BioseqSetPtr) sep->data.ptrvalue; if (bssp->_class == BioseqseqSet_class_parts) { return bssp; } } sep = sep->next; } } return NULL; } static SeqDescrPtr FindSourceDescriptorInSeqEntry (SeqEntryPtr sep) { BioseqPtr bsp; BioseqSetPtr bssp; SeqDescrPtr sdp = NULL; if (sep != NULL && sep->data.ptrvalue != NULL) { if (IS_Bioseq (sep)) { bsp = (BioseqPtr) sep->data.ptrvalue; sdp = bsp->descr; } else if (IS_Bioseq_set (sep)) { bssp = (BioseqSetPtr) sep->data.ptrvalue; sdp = bssp->descr; } while (sdp != NULL && sdp->choice != Seq_descr_source) { sdp = sdp->next; } } return sdp; } static SeqDescrPtr PropagateToSeqEntry (SeqEntryPtr sep, SeqDescrPtr sdp) { BioseqPtr bsp; BioseqSetPtr bssp; SeqDescrPtr new_sdp = NULL; if (sep != NULL && sep->data.ptrvalue != NULL) { if (IS_Bioseq (sep)) { bsp = (BioseqPtr) sep->data.ptrvalue; new_sdp = AsnIoMemCopy ((Pointer) sdp, (AsnReadFunc) SeqDescrAsnRead, (AsnWriteFunc) SeqDescrAsnWrite); ValNodeLink (&(bsp->descr), new_sdp); } else if (IS_Bioseq_set (sep)) { bssp = (BioseqSetPtr) sep->data.ptrvalue; new_sdp = AsnIoMemCopy ((Pointer) sdp, (AsnReadFunc) SeqDescrAsnRead, (AsnWriteFunc) SeqDescrAsnWrite); ValNodeLink (&(bssp->descr), new_sdp); } } return new_sdp; } static void PropagateSourceOnSegSetForParse (ValNodePtr parse_source_list) { ParseSourceInfoPtr psip; ValNodePtr vnp_src, vnp_dst; SeqDescrPtr sdp, other_sdp; SeqEntryPtr sep; ValNodePtr extra_dests = NULL; BioseqSetPtr parts_bssp; for (vnp_src = parse_source_list; vnp_src != NULL; vnp_src = vnp_src->next) { psip = (ParseSourceInfoPtr) vnp_src->data.ptrvalue; if (psip != NULL) { for (vnp_dst = psip->dest_list; vnp_dst != NULL; vnp_dst = vnp_dst->next) { if (vnp_dst->choice == OBJ_SEQDESC) { sdp = (SeqDescrPtr) vnp_dst->data.ptrvalue; if (sdp != NULL && sdp->choice == Seq_descr_source) { parts_bssp = GetPartsForSourceDescriptorOnSegSet (sdp); if (parts_bssp != NULL) { for (sep = parts_bssp->seq_set; sep != NULL; sep = sep->next) { if (IS_Bioseq(sep) && sep->data.ptrvalue == psip->bsp) { other_sdp = FindSourceDescriptorInSeqEntry (sep); if (other_sdp == NULL) { other_sdp = PropagateToSeqEntry (sep, sdp); ValNodeAddPointer (&extra_dests, OBJ_SEQDESC, other_sdp); } } } /* set choice to 0 so master won't be a destination */ vnp_dst->choice = 0; } } } } /* add extra destinations to list */ ValNodeLink (&psip->dest_list, extra_dests); extra_dests = NULL; } } } NLM_EXTERN CharPtr GetDBxrefFromBioSource (BioSourcePtr biop, CharPtr db_name) { CharPtr rval = NULL; ValNodePtr vnp; DbtagPtr dbtag; if (biop == NULL || biop->org == NULL || StringHasNoText (db_name)) { return NULL; } for (vnp = biop->org->db; vnp != NULL && rval == NULL; vnp = vnp->next) { dbtag = (DbtagPtr) vnp->data.ptrvalue; if (dbtag != NULL && StringCmp (db_name, dbtag->db) == 0) { rval = GetObjectIdString (dbtag->tag); } } return rval; } NLM_EXTERN Boolean SetDBxrefForBioSource (BioSourcePtr biop, CharPtr db_name, CharPtr str, Uint2 existing_text) { ValNodePtr dbx; DbtagPtr dbtag; Boolean found = FALSE; Char buf[20]; Boolean rval = FALSE; if (biop == NULL || StringHasNoText (db_name) || StringHasNoText (str)) { return FALSE; } if (biop->org == NULL) { biop->org = OrgRefNew(); } dbx = biop->org->db; while (dbx != NULL && !found) { dbtag = (DbtagPtr) dbx->data.ptrvalue; if (dbtag != NULL && dbtag->tag != NULL && StringCmp (dbtag->db, db_name) == 0) { found = TRUE; } if (!found) { dbx = dbx->next; } } if (!found) { dbtag = DbtagNew(); dbtag->db = StringSave (db_name); ValNodeAddPointer (&(biop->org->db), 0, dbtag); } if (dbtag->tag == NULL) { dbtag->tag = ObjectIdNew(); } /* if it was a number before, make it a string now */ if (dbtag->tag->id > 0 && dbtag->tag->str == NULL) { sprintf (buf, "%d", dbtag->tag->id); dbtag->tag->id = 0; dbtag->tag->str = StringSave (buf); } rval = SetStringValue (&(dbtag->tag->str), str, existing_text); return rval; } NLM_EXTERN Boolean RemoveDBxrefForBioSource (BioSourcePtr biop, CharPtr db_name, StringConstraintPtr scp) { ValNodePtr dbx, prev = NULL, dbx_next; DbtagPtr dbtag; CharPtr str; Boolean found = FALSE; if (biop == NULL || StringHasNoText (db_name)) { return FALSE; } if (biop->org == NULL) { biop->org = OrgRefNew(); } dbx = biop->org->db; for (dbx = biop->org->db; dbx != NULL; dbx = dbx_next) { dbx_next = dbx->next; dbtag = (DbtagPtr) dbx->data.ptrvalue; str = NULL; if (dbtag != NULL && dbtag->tag != NULL && StringCmp (dbtag->db, db_name) == 0 && (scp == NULL || ((str = GetDbtagString(dbtag)) != NULL && DoesStringMatchConstraint (str, scp)))) { if (prev == NULL) { biop->org->db = dbx->next; } else { prev->next = dbx->next; } dbx->data.ptrvalue = DbtagFree (dbx->data.ptrvalue); dbx = ValNodeFree (dbx); found = TRUE; } else { prev = dbx; } str = MemFree (str); } return found; } static Int4 SetFieldForDestList (ValNodePtr dest_list, ParseDestPtr field, CharPtr str, Uint2 existing_text) { ValNodePtr vnp; SeqDescrPtr sdp; ObjValNodePtr ovp; CharPtr cp; BioSourcePtr biop; ParseDstOrgPtr o; FeatureFieldLegalPtr fl; FeatureField f; Boolean was_empty; Int4 num_succeeded = 0; if (dest_list == NULL || field == NULL) return 0; switch (field->choice) { case ParseDest_defline : for (vnp = dest_list; vnp != NULL; vnp = vnp->next) { if (vnp->choice == OBJ_SEQDESC && vnp->data.ptrvalue != NULL) { sdp = (SeqDescrPtr) vnp->data.ptrvalue; if (sdp->choice == Seq_descr_title) { cp = sdp->data.ptrvalue; if (SetStringValue (&cp, str, existing_text)) { num_succeeded++; } sdp->data.ptrvalue = cp; RemoveAutodefObjectsForDesc(sdp); } } } break; case ParseDest_org : o = (ParseDstOrgPtr) field->data.ptrvalue; if (o != NULL) { for (vnp = dest_list; vnp != NULL; vnp = vnp->next) { biop = GetBioSourceFromObject (vnp->choice, vnp->data.ptrvalue); if (SetSourceQualInBioSource (biop, o->field, NULL, str, existing_text)) { num_succeeded++; } } } break; case ParseDest_featqual: fl = (FeatureFieldLegalPtr) field->data.ptrvalue; if (fl != NULL) { f.type = fl->type; f.field = ValNodeNew(NULL); f.field->next = NULL; f.field->choice = FeatQualChoice_legal_qual; f.field->data.intvalue = fl->field; for (vnp = dest_list; vnp != NULL; vnp = vnp->next) { if (SetQualOnFeature (vnp->data.ptrvalue, &f, NULL, str, existing_text)) { num_succeeded++; } } f.field = ValNodeFree (f.field); } break; case ParseDest_comment_descriptor: for (vnp = dest_list; vnp != NULL; vnp = vnp->next) { sdp = vnp->data.ptrvalue; if (StringHasNoText (sdp->data.ptrvalue)) { was_empty = TRUE; } else { was_empty = FALSE; } cp = sdp->data.ptrvalue; if (SetStringValue (&cp, str, existing_text)) { num_succeeded++; } sdp->data.ptrvalue = cp; if (was_empty) { ovp = (ObjValNodePtr) sdp; ovp->idx.deleteme = FALSE; } } break; case ParseDest_dbxref: if (!StringHasNoText (field->data.ptrvalue)) { for (vnp = dest_list; vnp != NULL; vnp = vnp->next) { biop = GetBioSourceFromObject (vnp->choice, vnp->data.ptrvalue); if (SetDBxrefForBioSource (biop, field->data.ptrvalue, str, existing_text)) { num_succeeded++; } } } break; } return num_succeeded; } static void AddToSampleForDestList (AECRSamplePtr sample, ValNodePtr dest_list, ParseDestPtr field) { ValNodePtr vnp; SeqDescrPtr sdp; BioSourcePtr biop; ParseDstOrgPtr o; FeatureFieldLegalPtr fl; FeatureField f; if (dest_list == NULL || field == NULL || sample == NULL) return; switch (field->choice) { case ParseDest_defline : for (vnp = dest_list; vnp != NULL; vnp = vnp->next) { if (vnp->choice == OBJ_SEQDESC && vnp->data.ptrvalue != NULL) { sdp = (SeqDescrPtr) vnp->data.ptrvalue; if (sdp->choice == Seq_descr_title) { AddTextToAECRSample (sample, StringSave (sdp->data.ptrvalue)); } } } break; case ParseDest_org : o = (ParseDstOrgPtr) field->data.ptrvalue; if (o != NULL) { for (vnp = dest_list; vnp != NULL; vnp = vnp->next) { biop = GetBioSourceFromObject (vnp->choice, vnp->data.ptrvalue); AddTextToAECRSample (sample, GetSourceQualFromBioSource (biop, o->field, NULL)); } } break; case ParseDest_featqual: fl = (FeatureFieldLegalPtr) field->data.ptrvalue; if (fl != NULL) { f.type = fl->type; f.field = ValNodeNew(NULL); f.field->next = NULL; f.field->choice = FeatQualChoice_legal_qual; f.field->data.intvalue = fl->field; for (vnp = dest_list; vnp != NULL; vnp = vnp->next) { AddTextToAECRSample (sample, GetQualFromFeature (vnp->data.ptrvalue, &f, NULL)); } f.field = ValNodeFree (f.field); } break; case ParseDest_comment_descriptor: for (vnp = dest_list; vnp != NULL; vnp = vnp->next) { sdp = (SeqDescrPtr) vnp->data.ptrvalue; AddTextToAECRSample (sample, StringSave (sdp->data.ptrvalue)); } break; case ParseDest_dbxref: if (!StringHasNoText (field->data.ptrvalue)) { for (vnp = dest_list; vnp != NULL; vnp = vnp->next) { biop = GetBioSourceFromObject (vnp->choice, vnp->data.ptrvalue); AddTextToAECRSample (sample, GetDBxrefFromBioSource (biop, field->data.ptrvalue)); } } break; } } static CharPtr GetParseBioSourceField (ValNodePtr field, BioSourcePtr biop) { CharPtr str = NULL; if (field == NULL || biop == NULL) { return NULL; } if (field->choice == ParseSrcOrgChoice_source_qual) { str = GetSourceQualFromBioSource (biop, field, NULL); } else if (field->choice == ParseSrcOrgChoice_taxname_after_binomial) { if (biop->org != NULL) { str = StringSave (GetTextAfterNomial (biop->org->taxname)); } } return str; } static Boolean SetParseBioSourceField (ValNodePtr field, CharPtr str, BioSourcePtr biop) { Boolean rval = FALSE; CharPtr after, new_val; Int4 len, new_len; if (field == NULL || biop == NULL) { return FALSE; } if (field->choice == ParseSrcOrgChoice_source_qual) { rval = SetSourceQualInBioSource (biop, field, NULL, str, ExistingTextOption_replace_old); } else if (field->choice == ParseSrcOrgChoice_taxname_after_binomial) { if (biop->org != NULL) { after = GetTextAfterNomial (biop->org->taxname); len = after - biop->org->taxname; new_len = len + StringLen (str) + 2; /* note - do not need to free after, because after is a pointer to a position in biop->org->taxname */ new_val = (CharPtr) MemNew (sizeof (Char) * new_len); StringNCpy (new_val, biop->org->taxname, len); new_val[len] = 0; if (!StringHasNoText (str)) { StringCat (new_val, " "); StringCat (new_val, str); } biop->org->taxname = MemFree (biop->org->taxname); biop->org->taxname = new_val; rval = TRUE; } } return rval; } static void StripFieldForSrcList (ParseSourceInfoPtr psip, ParseSrcPtr field, TextPortionPtr text_portion) { CharPtr str; ParseSrcOrgPtr o; BioSourcePtr biop; if (psip == NULL || field == NULL || text_portion == NULL) return; switch (field->choice) { case ParseSrc_defline : if (psip->sdp != NULL && psip->sdp->choice == Seq_descr_title) { ReplaceStringForParse (psip->sdp->data.ptrvalue, text_portion); } break; case ParseSrc_org : o = (ParseSrcOrgPtr) field->data.ptrvalue; if (o != NULL && o->field != NULL) { biop = NULL; if (psip->sdp != NULL && psip->sdp->choice == Seq_descr_source) { biop = (BioSourcePtr) psip->sdp->data.ptrvalue; } else if (psip->sfp != NULL && psip->sfp->data.choice == SEQFEAT_BIOSRC) { biop = (BioSourcePtr) psip->sfp->data.value.ptrvalue; } if (biop != NULL) { str = GetParseBioSourceField (o->field, biop); ReplaceStringForParse (str, text_portion); SetParseBioSourceField (o->field, str, biop); str = MemFree (str); } } break; case ParseSrc_comment: if (psip->sdp != NULL) { if (psip->sdp->choice == Seq_descr_user) { StripBankitCommentForParse (psip->sdp, text_portion); } else if (psip->sdp->choice == Seq_descr_comment) { ReplaceStringForParse (psip->sdp->data.ptrvalue, text_portion); } } if (psip->sfp != NULL && psip->sfp->data.choice == SEQFEAT_COMMENT) { ReplaceStringForParse (psip->sfp->data.value.ptrvalue, text_portion); } break; case ParseSrc_bankit_comment: if (psip->sdp != NULL && psip->sdp->choice == Seq_descr_user) { StripBankitCommentForParse (psip->sdp, text_portion); } break; case ParseSrc_structured_comment: if (psip->sdp != NULL && psip->sdp->choice == Seq_descr_user) { StripStructuredCommentForParse (psip->sdp, field->data.ptrvalue, text_portion); } break; } } NLM_EXTERN AECRSamplePtr GetExistingTextForParseAction (ParseActionPtr action, SeqEntryPtr sep) { ParseSrcCollectionData psd; ParseSourceInfoPtr psip; ValNodePtr vnp; ValNodePtr dest_list = NULL; AECRSamplePtr sample; if (action == NULL || sep == NULL) return 0; psd.src = action->src; psd.portion = action->portion; psd.src_list = NULL; /* first, we need to get a list of the parse sources */ VisitBioseqsInSep (sep, &psd, FindParseSourceBioseqCallback); /* for each parse source, get a list of the destinations */ for (vnp = psd.src_list; vnp != NULL; vnp = vnp->next) { if (vnp->data.ptrvalue == NULL) continue; psip = (ParseSourceInfoPtr) vnp->data.ptrvalue; /* find destinations */ AddParseDestinations (psip, action->dest); /* add destinations to list */ ValNodeLink (&dest_list, psip->dest_list); psip->dest_list = NULL; } psd.src_list = ParseSourceListFree (psd.src_list); /* get sample for dest_list */ sample = AECRSampleNew (); AddToSampleForDestList (sample, dest_list, action->dest); dest_list = ValNodeFree (dest_list); return sample; } static Int4 ApplyParseActionToSeqEntry (ParseActionPtr action, SeqEntryPtr sep) { ParseSrcCollectionData psd; ParseSourceInfoPtr psip; ValNodePtr orgnames = NULL, source_list_for_removal = NULL, vnp; Int4 num_succeeded = 0; if (action == NULL || sep == NULL) return 0; psd.src = action->src; psd.portion = action->portion; psd.src_list = NULL; /* first, we need to get a list of the parse sources */ VisitBioseqsInSep (sep, &psd, FindParseSourceBioseqCallback); if (action->capitalization != Cap_change_none) { /* if we will be fixing capitalization, get org names to use in fixes */ VisitBioSourcesInSep (sep, &orgnames, GetOrgNamesInRecordCallback); } /* for each parse source, we need to get a list of the destinations */ for (vnp = psd.src_list; vnp != NULL; vnp = vnp->next) { if (vnp->data.ptrvalue == NULL) continue; psip = (ParseSourceInfoPtr) vnp->data.ptrvalue; if (action->remove_from_parsed) { ValNodeAddPointer (&source_list_for_removal, 0, ParseSourceInfoCopy (psip)); } /* fix source text */ FixCapitalizationInString (&(psip->parse_src_txt), action->capitalization, orgnames); ApplyTextTransformsToString (&(psip->parse_src_txt), action->transform); /* find destinations */ AddParseDestinations (psip, action->dest); } /* free orgname list if we created it */ orgnames = ValNodeFree (orgnames); CombineSourcesForDestinations (&(psd.src_list)); if (action->dest->choice == ParseDest_org) { PropagateSourceOnSegSetForParse (psd.src_list); } /* now do the parsing */ for (vnp = psd.src_list; vnp != NULL; vnp = vnp->next) { psip = (ParseSourceInfoPtr) vnp->data.ptrvalue; num_succeeded += SetFieldForDestList (psip->dest_list, action->dest, psip->parse_src_txt, action->existing_text); } /* now remove strings from sources */ for (vnp = source_list_for_removal; vnp != NULL; vnp = vnp->next) { if (vnp->data.ptrvalue == NULL) continue; psip = (ParseSourceInfoPtr) vnp->data.ptrvalue; StripFieldForSrcList (psip, action->src, action->portion); } psd.src_list = ParseSourceListFree (psd.src_list); return num_succeeded; } static void SetCdRegionGeneticCode (SeqFeatPtr cds) { CdRegionPtr crp; SeqEntryPtr parent_sep; BioseqPtr bsp; Int4 genCode; ValNodePtr code, vnp; if (cds == NULL || cds->data.choice != SEQFEAT_CDREGION) return; if (cds->data.value.ptrvalue == NULL) { cds->data.value.ptrvalue = CdRegionNew(); } crp = (CdRegionPtr) cds->data.value.ptrvalue; bsp = BioseqFindFromSeqLoc (cds->location); if (bsp == NULL) return; parent_sep = GetBestTopParentForData (bsp->idx.entityID, bsp); genCode = SeqEntryToGeneticCode (parent_sep, NULL, NULL, 0); code = ValNodeNew (NULL); if (code != NULL) { code->choice = 254; vnp = ValNodeNew (NULL); code->data.ptrvalue = vnp; if (vnp != NULL) { vnp->choice = 2; vnp->data.intvalue = genCode; } } crp->genetic_code = code; } static void CreateDataForFeature (SeqFeatPtr sfp, Int4 feature_type) { Int4 featdef, seqfeattype; CharPtr label = NULL; RnaRefPtr rrp; RNAGenPtr rgp; ImpFeatPtr ifp; featdef = GetFeatdefFromFeatureType (feature_type); sfp->idx.subtype = featdef; seqfeattype = FindFeatFromFeatDefType (featdef); switch (seqfeattype) { case SEQFEAT_GENE: sfp->data.value.ptrvalue = GeneRefNew(); break; case SEQFEAT_CDREGION: sfp->data.value.ptrvalue = CdRegionNew(); SetCdRegionGeneticCode (sfp); break; case SEQFEAT_RNA: rrp = RnaRefNew(); rrp->ext.choice = 0; sfp->data.value.ptrvalue = rrp; switch (featdef) { case FEATDEF_preRNA: rrp->type = RNA_TYPE_premsg; break; case FEATDEF_mRNA: rrp->type = RNA_TYPE_mRNA; break; case FEATDEF_tRNA: rrp->type = RNA_TYPE_tRNA; break; case FEATDEF_rRNA: rrp->type = RNA_TYPE_rRNA; break; case FEATDEF_snRNA: rrp->type = RNA_TYPE_ncRNA; SetncRNAClass (rrp, NULL, "snRNA", ExistingTextOption_replace_old); break; case FEATDEF_scRNA: rrp->type = RNA_TYPE_ncRNA; SetncRNAClass (rrp, NULL, "scRNA", ExistingTextOption_replace_old); break; case FEATDEF_tmRNA: rrp->type = RNA_TYPE_tmRNA; rgp = RNAGenNew (); rrp->ext.choice = 3; rrp->ext.value.ptrvalue = rgp; break; case FEATDEF_ncRNA: rrp->type = RNA_TYPE_ncRNA; rgp = RNAGenNew (); rrp->ext.choice = 3; rrp->ext.value.ptrvalue = rgp; break; case FEATDEF_otherRNA: rrp->type = RNA_TYPE_misc_RNA; rgp = RNAGenNew(); rrp->ext.choice = 3; rrp->ext.value.ptrvalue = rgp; break; } break; case SEQFEAT_IMP: ifp = ImpFeatNew(); sfp->data.value.ptrvalue = ifp; label = GetFeatureNameFromFeatureType (feature_type); ifp->key = StringSave (label); break; } } static SeqLocPtr LocationFromApplyFeatureAction (BioseqPtr bsp, ApplyFeatureActionPtr action) { LocationIntervalPtr l; SeqLocPtr slp = NULL; Uint1 strand = Seq_strand_plus; Int4 from, to; if (bsp == NULL || action == NULL || action->location == NULL) return NULL; if (!action->plus_strand) { strand = Seq_strand_minus; } if (action->location->choice == LocationChoice_interval) { l = (LocationIntervalPtr) action->location->data.ptrvalue; if (l != NULL) { from = MIN (l->from, l->to) - 1; to = MAX (l->from, l->to) - 1; slp = SeqLocIntNew (from, to, strand, SeqIdFindWorst (bsp->id)); } SetSeqLocPartial (slp, action->partial5, action->partial3); } else if (action->location->choice == LocationChoice_whole_sequence) { slp = SeqLocIntNew (0, bsp->length - 1, strand, SeqIdFindWorst (bsp->id)); SetSeqLocPartial (slp, action->partial5, action->partial3); } else if (action->location->choice == LocationChoice_point) { AddSeqLocPoint (&slp, SeqIdStripLocus (SeqIdDup (SeqIdFindBest (bsp->id, 0))), action->location->data.intvalue, FALSE, TRUE, strand); } return slp; } static Boolean OkToApplyToBioseq (ApplyFeatureActionPtr action, BioseqPtr bsp) { SeqFeatPtr sfp; SeqMgrFeatContext context; Int4 featdef; Boolean rval = TRUE; if (action == NULL || bsp == NULL) return FALSE; if (!action->add_redundant) { featdef = GetFeatdefFromFeatureType (action->type); sfp = SeqMgrGetNextFeature (bsp, NULL, 0, featdef, &context); if (sfp != NULL) { rval = FALSE; } } return rval; } static void AddParts (ApplyFeatureActionPtr action, BioseqSetPtr parts, ValNodePtr PNTR bsp_list) { SeqEntryPtr sep; Int4 seg_num; if (action == NULL || !action->apply_to_parts || parts == NULL || parts->_class != BioseqseqSet_class_parts || bsp_list == NULL) { return; } if (action->only_seg_num > -1) { seg_num = 0; sep = parts->seq_set; while (seg_num < action->only_seg_num && sep != NULL) { sep = sep->next; seg_num++; } if (sep != NULL && IS_Bioseq (sep) && OkToApplyToBioseq (action, sep->data.ptrvalue)) { ValNodeAddPointer (bsp_list, OBJ_BIOSEQ, sep->data.ptrvalue); } } else { for (sep = parts->seq_set; sep != NULL; sep = sep->next) { if (IS_Bioseq (sep) && OkToApplyToBioseq (action, sep->data.ptrvalue)) { ValNodeAddPointer (bsp_list, OBJ_BIOSEQ, sep->data.ptrvalue); } } } } static void AddSequenceOrParts (ApplyFeatureActionPtr action, BioseqPtr bsp, ValNodePtr PNTR bsp_list) { BioseqSetPtr bssp, parts; SeqEntryPtr sep; if (action == NULL || bsp == NULL || bsp_list == NULL) return; if (bsp->idx.parenttype == OBJ_BIOSEQSET && bsp->idx.parentptr != NULL) { bssp = (BioseqSetPtr) bsp->idx.parentptr; if (bssp->_class == BioseqseqSet_class_segset) { if (action->apply_to_parts) { sep = bssp->seq_set; while (sep != NULL && !IS_Bioseq_set (sep)) { sep = sep->next; } if (sep != NULL) { AddParts (action, sep->data.ptrvalue, bsp_list); } } else { if (OkToApplyToBioseq (action, bsp)) { ValNodeAddPointer (bsp_list, OBJ_BIOSEQ, bsp); } } } else if (bssp->_class == BioseqseqSet_class_parts) { if (action->apply_to_parts) { AddParts (action, bssp, bsp_list); } else { parts = bssp; if (parts->idx.parenttype == OBJ_BIOSEQSET && parts->idx.parentptr != NULL) { bssp = (BioseqSetPtr) parts->idx.parentptr; if (IS_Bioseq (bssp->seq_set) && OkToApplyToBioseq (action, bssp->seq_set->data.ptrvalue)) { ValNodeAddPointer (bsp_list, OBJ_BIOSEQ, bsp_list); } } } } else { if (OkToApplyToBioseq (action, bsp)) { ValNodeAddPointer (bsp_list, OBJ_BIOSEQ, bsp); } } } else { if (OkToApplyToBioseq (action, bsp)) { ValNodeAddPointer (bsp_list, OBJ_BIOSEQ, bsp); } } } static void AddSequenceOrPartsFromSeqEntry (ApplyFeatureActionPtr action, SeqEntryPtr sep, ValNodePtr PNTR bsp_list) { BioseqSetPtr bssp; SeqEntryPtr seq_set; if (action == NULL || sep == NULL) return; while (sep != NULL) { if (IS_Bioseq (sep)) { AddSequenceOrParts (action, sep->data.ptrvalue, bsp_list); } else if (IS_Bioseq_set (sep)) { bssp = (BioseqSetPtr) sep->data.ptrvalue; if (bssp->_class == BioseqseqSet_class_segset) { /* find master segment */ seq_set = bssp->seq_set; while (seq_set != NULL && !IS_Bioseq (seq_set)) { seq_set = seq_set->next; } if (seq_set != NULL) { AddSequenceOrParts (action, seq_set->data.ptrvalue, bsp_list); } } else if (bssp->_class == BioseqseqSet_class_nuc_prot) { /* find nucleotide sequence */ seq_set = bssp->seq_set; if (seq_set != NULL) { if (IS_Bioseq_set (seq_set)) { /* nucleotide is segmented set */ bssp = (BioseqSetPtr) seq_set->data.ptrvalue; if (bssp != NULL && bssp->_class == BioseqseqSet_class_segset && bssp->seq_set != NULL && IS_Bioseq (bssp->seq_set)) { AddSequenceOrParts (action, bssp->seq_set->data.ptrvalue, bsp_list); } } else if (IS_Bioseq (seq_set)) { AddSequenceOrParts (action, seq_set->data.ptrvalue, bsp_list); } } } else { /* add from set members */ AddSequenceOrPartsFromSeqEntry (action, bssp->seq_set, bsp_list); } } sep = sep->next; } } static void AdjustProteinSequenceForReadingFrame (SeqFeatPtr cds) { BioseqPtr protbsp, bsp; ByteStorePtr bs; SeqFeatPtr prot_sfp; Boolean partial5, partial3; if (cds == NULL || cds->data.choice != SEQFEAT_CDREGION) return; protbsp = BioseqFindFromSeqLoc (cds->product); if (protbsp == NULL) { bsp = BioseqFindFromSeqLoc (cds->location); if (bsp != NULL) { ExtraCDSCreationActions (cds, GetBestTopParentForData (bsp->idx.entityID, bsp)); } } else { bs = ProteinFromCdRegionExWithTrailingCodonHandling (cds, TRUE, FALSE, TRUE); protbsp->seq_data = (SeqDataPtr) BSFree ((ByteStorePtr)(protbsp->seq_data)); protbsp->seq_data = (SeqDataPtr) bs; protbsp->length = BSLen (bs); prot_sfp = GetProtFeature (protbsp); if (prot_sfp == NULL) { prot_sfp = CreateNewFeatureOnBioseq (protbsp, SEQFEAT_PROT, NULL); prot_sfp->data.value.ptrvalue = ProtRefNew (); CheckSeqLocForPartial (cds->location, &partial5, &partial3); SetSeqLocPartial (prot_sfp->location, partial5, partial3); prot_sfp->partial = (partial5 || partial3); } else { if (SeqLocLen (prot_sfp->location) != protbsp->length) { prot_sfp->location = SeqLocFree (prot_sfp->location); prot_sfp->location = SeqLocIntNew (0, protbsp->length - 1, Seq_strand_plus, SeqIdFindWorst (protbsp->id)); CheckSeqLocForPartial (cds->location, &partial5, &partial3); SetSeqLocPartial (prot_sfp->location, partial5, partial3); prot_sfp->partial = (partial5 || partial3); } } } } NLM_EXTERN SeqFeatPtr ApplyOneFeatureToBioseq (BioseqPtr bsp, Uint1 featdef, SeqLocPtr slp, ValNodePtr fields, ValNodePtr src_fields, Boolean add_mrna) { Int4 seqfeattype; SeqFeatPtr sfp, gene = NULL, mrna = NULL; FeatQualLegalValPtr q; FeatureField f; ValNodePtr field_vnp; Int4 feature_type; seqfeattype = FindFeatFromFeatDefType (featdef); sfp = CreateNewFeatureOnBioseq (bsp, seqfeattype, slp); if (sfp == NULL) return NULL; feature_type = GetFeatureTypeFromFeatdef(featdef); CreateDataForFeature (sfp, feature_type); /* any extra actions */ switch (featdef) { case FEATDEF_CDS : ExtraCDSCreationActions (sfp, GetBestTopParentForData (bsp->idx.entityID, bsp)); break; case FEATDEF_source : if (src_fields != NULL) { sfp->data.value.ptrvalue = ImpFeatFree (sfp->data.value.ptrvalue); sfp->data.choice = SEQFEAT_BIOSRC; sfp->data.value.ptrvalue = BioSourceFromSourceQualVals (src_fields); } break; } for (field_vnp = fields; field_vnp != NULL; field_vnp = field_vnp->next) { q = (FeatQualLegalValPtr) field_vnp->data.ptrvalue; if (q != NULL) { f.field = ValNodeNew(NULL); f.field->next = NULL; f.field->choice = FeatQualChoice_legal_qual; f.field->data.intvalue = q->qual; if (sfp->data.choice != SEQFEAT_GENE && (q->qual == Feat_qual_legal_gene || q->qual == Feat_qual_legal_gene_description)) { if (gene == NULL) { gene = CreateNewFeatureOnBioseq (bsp, SEQFEAT_GENE, slp); CreateDataForFeature (gene, Macro_feature_type_gene); } f.type = Macro_feature_type_gene; SetQualOnFeature (gene, &f, NULL, q->val, ExistingTextOption_replace_old); } else { f.type = feature_type; SetQualOnFeature (sfp, &f, NULL, q->val, ExistingTextOption_replace_old); } } } if (featdef == FEATDEF_CDS) { /* retranslate, to account for change in reading frame */ AdjustProteinSequenceForReadingFrame (sfp); /* after the feature has been created, then adjust it for gaps */ /* Note - this step may result in multiple coding regions being created. */ AdjustCDSLocationsForUnknownGapsCallback (sfp, NULL); if (add_mrna) { slp = SeqLocCopy (slp); mrna = CreateNewFeatureOnBioseq (bsp, SEQFEAT_RNA, slp); CreateDataForFeature (mrna, Macro_feature_type_mRNA); for (field_vnp = fields; field_vnp != NULL; field_vnp = field_vnp->next) { q = (FeatQualLegalValPtr) field_vnp->data.ptrvalue; if (q != NULL && q->qual == Feat_qual_legal_product) { f.field = ValNodeNew(NULL); f.field->next = NULL; f.field->choice = FeatQualChoice_legal_qual; f.field->data.intvalue = q->qual; f.type = Macro_feature_type_mRNA; SetQualOnFeature (mrna, &f, NULL, q->val, ExistingTextOption_replace_old); } } } } return sfp; } static Int4 ApplyApplyFeatureActionToSeqEntry (ApplyFeatureActionPtr action, SeqEntryPtr sep) { ValNodePtr bsp_list = NULL, vnp; Int4 featdef; BioseqPtr bsp; SeqFeatPtr sfp; SeqLocPtr slp; SeqIdPtr sip; Int4 num_created = 0; Int4 len; CharPtr list_delimiters = " ,\t;"; CharPtr cp, tmp; if (sep == NULL || action == NULL) return 0; /* first, get list of Bioseqs to apply features to */ /* relevant values : seq_list, add_redundant, apply_to_parts, only_seg_num */ if (action->seq_list != NULL && action->seq_list->choice == SequenceListChoice_list) { for (vnp = action->seq_list->data.ptrvalue; vnp != NULL; vnp = vnp->next) { cp = (CharPtr) vnp->data.ptrvalue; while (cp != NULL && *cp != 0) { len = StringCSpn (cp, list_delimiters); if (len > 0) { tmp = (CharPtr) MemNew (sizeof (Char) * (len + 1)); StringNCpy (tmp, cp, len); tmp[len] = 0; sip = CreateSeqIdFromText (tmp, sep); bsp = BioseqFind (sip); if (bsp != NULL) { AddSequenceOrParts (action, bsp, &bsp_list); } cp += len; } cp += StringSpn (cp, list_delimiters); } } } else { AddSequenceOrPartsFromSeqEntry (action, sep, &bsp_list); } /* now add feature to each bioseq in list */ for (vnp = bsp_list; vnp != NULL; vnp = vnp->next) { bsp = vnp->data.ptrvalue; if (bsp == NULL) continue; featdef = GetFeatdefFromFeatureType (action->type); slp = LocationFromApplyFeatureAction (bsp, action); sfp = ApplyOneFeatureToBioseq (bsp, featdef, slp, action->fields, action->src_fields, action->add_mrna); if (sfp != NULL) { num_created++; } } return num_created; } typedef struct convertandremovefeaturecollection { Uint1 featdef; ValNodePtr constraint_set; ValNodePtr feature_list; } ConvertAndRemoveFeatureCollectionData, PNTR ConvertAndRemoveFeatureCollectionPtr; static void ConvertAndRemoveFeatureCollectionCallback (SeqFeatPtr sfp, Pointer data) { ConvertAndRemoveFeatureCollectionPtr p; if (sfp == NULL || data == NULL) return; p = (ConvertAndRemoveFeatureCollectionPtr) data; if ((p->featdef == FEATDEF_ANY || sfp->idx.subtype == p->featdef ) && DoesObjectMatchConstraintChoiceSet (OBJ_SEQFEAT, sfp, p->constraint_set)) { ValNodeAddPointer (&(p->feature_list), OBJ_SEQFEAT, sfp); } } static Int4 ApplyRemoveFeatureActionToSeqEntry (RemoveFeatureActionPtr action, SeqEntryPtr sep) { ConvertAndRemoveFeatureCollectionData d; ValNodePtr vnp; SeqFeatPtr sfp; Int4 num_deleted = 0, num_products_deleted = 0; BioseqPtr bsp; if (action == NULL) return 0; d.featdef = GetFeatdefFromFeatureType (action->type); d.constraint_set = action->constraint; d.feature_list = NULL; VisitFeaturesInSep (sep, &d, ConvertAndRemoveFeatureCollectionCallback); for (vnp = d.feature_list; vnp != NULL; vnp = vnp->next) { sfp = vnp->data.ptrvalue; if (sfp != NULL) { sfp->idx.deleteme = TRUE; if (sfp->product != NULL && (bsp = BioseqFind(SeqLocId(sfp->product))) != NULL) { bsp->idx.deleteme = TRUE; num_products_deleted++; } num_deleted ++; } } d.feature_list = ValNodeFree (d.feature_list); DeleteMarkedObjects (ObjMgrGetEntityIDForChoice(sep), 0, NULL); RenormalizeNucProtSets (sep, TRUE); return num_deleted + num_products_deleted; } /* functions for converting features */ static Boolean ApplyConvertFeatureSrcOptions (SeqFeatPtr sfp, ValNodePtr src_options, Boolean keep_original) { ConvertFromCDSOptionsPtr options = NULL; Boolean rval = FALSE; if (sfp == NULL) return FALSE; if (src_options == NULL) return TRUE; if (src_options->choice == ConvertFeatureSrcOptions_cds) { options = (ConvertFromCDSOptionsPtr) src_options->data.ptrvalue; if (options != NULL) { ApplyCDSOptionsToFeature (sfp, options->remove_mRNA, options->remove_gene, options->remove_transcript_id, keep_original); rval = TRUE; } } return rval; } typedef Boolean (*ConvertFeatureFunc) PROTO ((SeqFeatPtr, Int4, ConvertFeatureDstOptionsPtr)); static void ApplyRNADestinationOptions (SeqFeatPtr sfp, Int4 featdef_to, ConvertFeatureDstOptionsPtr dst_options) { CharPtr existing_class; FeatureField ff; /* apply destination options */ if (featdef_to == FEATDEF_ncRNA && dst_options != NULL && dst_options->choice == ConvertFeatureDstOptions_ncrna_class && !StringHasNoText (dst_options->data.ptrvalue)) { ff.type = Macro_feature_type_ncRNA; ff.field = ValNodeNew (NULL); ff.field->choice = FeatQualChoice_legal_qual; ff.field->data.intvalue = Feat_qual_legal_ncRNA_class; existing_class = GetQualFromFeature (sfp, &ff, NULL); if (StringCmp (dst_options->data.ptrvalue, existing_class) != 0) { sfp->idx.subtype = FEATDEF_ncRNA; SetQualOnFeature (sfp, &ff, NULL, dst_options->data.ptrvalue, ExistingTextOption_append_semi); } existing_class = MemFree (existing_class); ff.field = ValNodeFree (ff.field); } } static Boolean ConvertCDSToRNAFunc (SeqFeatPtr sfp, Int4 featdef_to, ConvertFeatureDstOptionsPtr dst_options) { Boolean rval; if (sfp == NULL || sfp->data.choice != SEQFEAT_CDREGION) { return FALSE; } rval = ConvertCDSToRNA (sfp, featdef_to); if (rval) { ApplyRNADestinationOptions (sfp, featdef_to, dst_options); } return rval; } static Boolean ConvertGeneToRNAFunc (SeqFeatPtr sfp, Int4 featdef_to, ConvertFeatureDstOptionsPtr dst_options) { Boolean rval; rval = ConvertGeneToRNA (sfp, featdef_to); if (rval) { ApplyRNADestinationOptions (sfp, featdef_to, dst_options); } return rval; } static Boolean ConvertBioSrcToRegionFunc (SeqFeatPtr sfp, Int4 featdef_to, ConvertFeatureDstOptionsPtr dst_options) { return ConvertBioSrcToRepeatRegion (sfp, featdef_to); } static Boolean ConvertCDSToMiscFeatFunc (SeqFeatPtr sfp, Int4 featdef_to, ConvertFeatureDstOptionsPtr dst_options) { Boolean rval = FALSE; if (sfp == NULL || sfp->data.choice != SEQFEAT_CDREGION) { return FALSE; } else if (sfp->pseudo) { rval = ConvertOnePseudoCDSToMiscFeatEx (sfp, FALSE); } else { /* do other here */ rval = ConvertNonPseudoCDSToMiscFeat (sfp, FALSE); } return rval; } static Boolean ConvertImpToProtFuncEx (SeqFeatPtr sfp, Int4 featdef_to, ConvertFeatureDstOptionsPtr dst_options) { return ConvertImpToProtFunc (sfp, featdef_to); } static Boolean ConvertProtToImpFuncEx (SeqFeatPtr sfp, Int4 featdef_to, ConvertFeatureDstOptionsPtr dst_options) { return ConvertProtToImpFunc (sfp, featdef_to); } static Boolean ConvertProtToProt (SeqFeatPtr sfp, Int4 featdef_to, ConvertFeatureDstOptionsPtr dst_options) { return ConvertProtToProtFunc (sfp, featdef_to); } static Boolean ConvertCDSToMatPeptide (SeqFeatPtr sfp, Int4 featdef_to, ConvertFeatureDstOptionsPtr dst_options) { return AutoConvertCDSToMiscFeat (sfp, (dst_options == NULL || dst_options->choice != ConvertFeatureDstOptions_remove_original) ? FALSE : dst_options->data.boolvalue); } static Boolean ConvertImpToRNAFunc (SeqFeatPtr sfp, Int4 featdef_to, ConvertFeatureDstOptionsPtr dst_options) { RnaRefPtr rrp; GBQualPtr qual, qual_prev = NULL; Boolean add_to_comment = FALSE; CharPtr old_comment = NULL; if (sfp == NULL || sfp->data.choice != SEQFEAT_IMP) { return FALSE; } for (qual = sfp->qual; qual != NULL && StringCmp (qual->qual, "product") != 0; qual = qual->next) { qual_prev = qual; } if (qual != NULL) { old_comment = StringSave (qual->val); if (qual_prev == NULL) { sfp->qual = qual->next; } else { qual_prev->next = qual->next; } qual->next = NULL; qual = GBQualFree (qual); } else { old_comment = sfp->comment; sfp->comment = NULL; } rrp = RnaRefFromLabel (featdef_to, old_comment, &add_to_comment); sfp->data.value.ptrvalue = ImpFeatFree ((ImpFeatPtr) sfp->data.value.ptrvalue); sfp->data.choice = SEQFEAT_RNA; sfp->data.value.ptrvalue = (Pointer) rrp; SetRNAProductString (sfp, NULL, old_comment, ExistingTextOption_replace_old); if (add_to_comment) { SetStringValue (&(sfp->comment), old_comment, ExistingTextOption_append_semi); } old_comment = MemFree (old_comment); ApplyRNADestinationOptions (sfp, featdef_to, dst_options); return TRUE; } static Boolean ConvertRegionToImp (SeqFeatPtr sfp, Int4 featdef_to, ConvertFeatureDstOptionsPtr dst_options) { return ConvertRegionToImpFunc (sfp, featdef_to); } static Boolean ConvertImpToImp (SeqFeatPtr sfp, Int4 featdef_to, ConvertFeatureDstOptionsPtr dst_options) { return ConvertImpToImpFunc (sfp, featdef_to); } static Boolean ConvertRegionToRNA (SeqFeatPtr sfp, Int4 featdef_to, ConvertFeatureDstOptionsPtr dst_options) { Boolean rval; rval = ConvertRegionToRNAFunc (sfp, featdef_to); if (rval) { ApplyRNADestinationOptions (sfp, featdef_to, dst_options); } return rval; } static Boolean ConvertncRNAToMiscBinding (SeqFeatPtr sfp, Int4 featdef_to, ConvertFeatureDstOptionsPtr dst_options) { RnaRefPtr rrp; RNAGenPtr rgp; ImpFeatPtr ifp; rrp = (RnaRefPtr) sfp->data.value.ptrvalue; if (NULL == rrp) return FALSE; if (rrp->ext.choice == 1) { /* move product to note */ SetStringValue (&(sfp->comment), rrp->ext.value.ptrvalue, ExistingTextOption_append_semi); } else if (rrp->ext.choice == 3 && (rgp = (RNAGenPtr) rrp->ext.value.ptrvalue) != NULL && !StringHasNoText (rgp->product)) { SetStringValue (&(sfp->comment), rgp->product, ExistingTextOption_append_semi); } rrp = RnaRefFree (rrp); sfp->data.choice = SEQFEAT_IMP; ifp = ImpFeatNew (); ifp->key = StringSave ("misc_binding"); sfp->data.value.ptrvalue = ifp; sfp->idx.subtype = 0; return TRUE; } static Boolean ConvertCommentToMiscFeat (SeqFeatPtr sfp, Int4 featdef_to, ConvertFeatureDstOptionsPtr dst_options) { ImpFeatPtr ifp; if (sfp == NULL || sfp->data.choice != SEQFEAT_COMMENT || sfp->data.value.ptrvalue != NULL) { return FALSE; } ifp = ImpFeatNew (); if (ifp != NULL) { ifp->key = StringSave ("misc_feature"); sfp->data.choice = SEQFEAT_IMP; sfp->data.value.ptrvalue = (Pointer) ifp; return TRUE; } return FALSE; } static Boolean ConvertGeneToImpFeat (SeqFeatPtr sfp, Int4 featdef_to, ConvertFeatureDstOptionsPtr dst_options) { return ConvertGeneToImpFeatFunc (sfp, featdef_to); } static Boolean ConvertRNAToImpFeatEx (SeqFeatPtr sfp, Int4 featdef_to, ConvertFeatureDstOptionsPtr dst_options) { CharPtr product = NULL; ImpFeatPtr ifp; Uint1 seqfeattype; if (sfp == NULL || sfp->data.choice != SEQFEAT_RNA) { return FALSE; } seqfeattype = FindFeatFromFeatDefType (featdef_to); if (seqfeattype != SEQFEAT_IMP) { return FALSE; } product = GetRNAProductString (sfp, NULL); RemoveRNAProductString (sfp, NULL); sfp->data.value.ptrvalue = RnaRefFree (sfp->data.value.ptrvalue); ifp = ImpFeatNew (); ifp->key = StringSave (GetImportFeatureName (featdef_to)); sfp->data.choice = SEQFEAT_IMP; sfp->data.value.ptrvalue = (Pointer) ifp; SetStringValue (&(sfp->comment), product, ExistingTextOption_append_semi); product = MemFree (product); return TRUE; } NLM_EXTERN Boolean ConvertRNAToImpFeat (SeqFeatPtr sfp, Int4 featdef_to) { return ConvertRNAToImpFeatEx (sfp, featdef_to, NULL); } static Boolean ConvertSiteToImpFeat (SeqFeatPtr sfp, Int4 featdef_to, ConvertFeatureDstOptionsPtr dst_options) { GBQualPtr gbqual; ImpFeatPtr ifp; Int2 sitetype; CharPtr str; if (sfp == NULL || sfp->data.choice != SEQFEAT_SITE) { return FALSE; } ifp = ImpFeatNew (); if (NULL == ifp) { return FALSE; } sitetype = (Int2) sfp->data.value.intvalue; sfp->data.choice = SEQFEAT_IMP; sfp->data.value.ptrvalue = (Pointer) ifp; ifp->key = StringSave (GetImportFeatureName (featdef_to)); str = GetMacroSiteTypeName (MacroSiteTypeFromAsn1SiteType (sitetype)); if (str != NULL) { gbqual = GBQualNew (); if (gbqual != NULL) { gbqual->qual = StringSave ("note"); gbqual->val = StringSave (str); gbqual->next = sfp->qual; sfp->qual = gbqual; } } return TRUE; } static Boolean ConvertProtToRegion (SeqFeatPtr sfp, Int4 featdef_to, ConvertFeatureDstOptionsPtr dst_options) { ProtRefPtr prp; ValNodePtr vnp; CharPtr str; if (sfp == NULL || sfp->data.choice != SEQFEAT_PROT) { return FALSE; } prp = (ProtRefPtr) sfp->data.value.ptrvalue; if (NULL == prp) { return FALSE; } vnp = prp->name; if (vnp != NULL && vnp->next == NULL) { str = (CharPtr) vnp->data.ptrvalue; if (! StringHasNoText (str)) { vnp->data.ptrvalue = NULL; sfp->data.value.ptrvalue = ProtRefFree (prp); sfp->data.choice = SEQFEAT_REGION; sfp->data.value.ptrvalue = (Pointer) str; } } return TRUE; } static Boolean ConvertRegionToProt (SeqFeatPtr sfp, Int4 featdef_to, ConvertFeatureDstOptionsPtr dst_options) { return ConvertRegionToProtFunc (sfp, featdef_to); } static Boolean ConvertToBond (SeqFeatPtr sfp, Int4 featdef_to, ConvertFeatureDstOptionsPtr dst_options) { SeqLocPtr slp = NULL; BioseqPtr bsp; SeqEntryPtr sep; Boolean no_cds = FALSE; SeqFeatPtr new_sfp; SeqIdPtr sip; SeqBondPtr sbp; SeqPntPtr spp; if (sfp == NULL || featdef_to != FEATDEF_BOND || dst_options == NULL || dst_options->choice != ConvertFeatureDstOptions_bond) { return FALSE; } SeqFeatDataFree (&(sfp->data)); sfp->data.choice = SEQFEAT_BOND; sfp->data.value.intvalue = Asn1BondTypeFromMacroBondType (dst_options->data.intvalue); bsp = BioseqFindFromSeqLoc (sfp->location); if (!ISA_aa (bsp->mol)) { slp = GetProteinLocationForNucleotideFeatureConversion (sfp->location, &no_cds); if (no_cds || slp == NULL) { return FALSE; } sfp->location = SeqLocFree (sfp->location); sfp->location = slp; } if (sfp->location->choice != SEQLOC_BOND) { sip = SeqLocId (sfp->location); if (sip != NULL) { sbp = SeqBondNew (); if (sbp != NULL) { slp = ValNodeNew (NULL); if (slp != NULL) { slp->choice = SEQLOC_BOND; slp->data.ptrvalue = (Pointer) sbp; spp = SeqPntNew (); if (spp != NULL) { spp->strand = SeqLocStrand (sfp->location); spp->id = SeqIdStripLocus (SeqIdDup (SeqIdFindBest (sip, 0))); spp->point = SeqLocStart (sfp->location); sbp->a = spp; } spp = SeqPntNew (); if (spp != NULL) { spp->strand = SeqLocStrand (sfp->location); spp->id = SeqIdStripLocus (SeqIdDup (SeqIdFindBest (sip, 0))); spp->point = SeqLocStop (sfp->location); sbp->b = spp; } sfp->location = SeqLocFree (sfp->location); sfp->location = slp; } } } } sfp->idx.subtype = 0; bsp = GetBioseqGivenSeqLoc (slp, sfp->idx.entityID); if (bsp == NULL) { return FALSE; } sep = SeqMgrGetSeqEntryForData (bsp); if (sep == NULL) { return FALSE; } new_sfp = (SeqFeatPtr) AsnIoMemCopy (sfp, (AsnReadFunc) SeqFeatAsnRead, (AsnWriteFunc) SeqFeatAsnWrite); sfp->idx.deleteme = TRUE; CreateNewFeature (sep, NULL, SEQFEAT_BOND, new_sfp); return TRUE; } static Boolean ConvertToSite (SeqFeatPtr sfp, Int4 featdef_to, ConvertFeatureDstOptionsPtr dst_options) { SeqLocPtr slp = NULL; BioseqPtr bsp; SeqEntryPtr sep; Boolean no_cds = FALSE; SeqFeatPtr new_sfp; if (sfp == NULL || featdef_to != FEATDEF_SITE || dst_options == NULL || dst_options->choice != ConvertFeatureDstOptions_site) { return FALSE; } SeqFeatDataFree (&(sfp->data)); sfp->data.choice = SEQFEAT_SITE; sfp->data.value.intvalue = Asn1SiteTypeFromMacroSiteType (dst_options->data.intvalue); bsp = BioseqFindFromSeqLoc (sfp->location); if (!ISA_aa (bsp->mol)) { slp = GetProteinLocationForNucleotideFeatureConversion (sfp->location, &no_cds); if (no_cds || slp == NULL) { return FALSE; } sfp->location = SeqLocFree (sfp->location); sfp->location = slp; } sfp->idx.subtype = 0; bsp = GetBioseqGivenSeqLoc (slp, sfp->idx.entityID); if (bsp == NULL) { return FALSE; } sep = SeqMgrGetSeqEntryForData (bsp); if (sep == NULL) { return FALSE; } new_sfp = (SeqFeatPtr) AsnIoMemCopy (sfp, (AsnReadFunc) SeqFeatAsnRead, (AsnWriteFunc) SeqFeatAsnWrite); sfp->idx.deleteme = TRUE; CreateNewFeature (sep, NULL, SEQFEAT_SITE, new_sfp); return TRUE; } static Boolean ConvertToRegion (SeqFeatPtr sfp, Int4 featdef_to, ConvertFeatureDstOptionsPtr dst_options) { BioseqPtr bsp; RegionTypePtr r; Boolean create_prot_feats, no_cds = FALSE; SeqLocPtr slp; SeqEntryPtr sep; SeqFeatPtr new_sfp; if (sfp == NULL || featdef_to != FEATDEF_REGION || dst_options == NULL || dst_options->choice != ConvertFeatureDstOptions_region || dst_options->data.ptrvalue == NULL) { return FALSE; } r = (RegionTypePtr) dst_options->data.ptrvalue; create_prot_feats = !r->create_nucleotide; bsp = BioseqFindFromSeqLoc (sfp->location); if (bsp == NULL) return FALSE; if (ISA_aa (bsp->mol)) { if (create_prot_feats) { slp = (SeqLocPtr) AsnIoMemCopy (sfp->location, (AsnReadFunc) SeqLocAsnRead, (AsnWriteFunc) SeqLocAsnWrite); } else { slp = FindNucleotideLocationForProteinFeatureConversion (sfp->location); } sfp->location = SeqLocFree (sfp->location); sfp->location = slp; } else if (create_prot_feats) { slp = GetProteinLocationForNucleotideFeatureConversion (sfp->location, &no_cds); if (no_cds) { return FALSE; } sfp->location = SeqLocFree (sfp->location); sfp->location = slp; } bsp = GetBioseqGivenSeqLoc (sfp->location, sfp->idx.entityID); if (bsp == NULL) { return FALSE; } sep = SeqMgrGetSeqEntryForData (bsp); if (sep == NULL) { return FALSE; } SeqFeatDataFree (&(sfp->data)); sfp->data.choice = SEQFEAT_REGION; sfp->data.value.ptrvalue = sfp->comment; sfp->comment = NULL; new_sfp = (SeqFeatPtr) AsnIoMemCopy (sfp, (AsnReadFunc) SeqFeatAsnRead, (AsnWriteFunc) SeqFeatAsnWrite); sfp->idx.deleteme = TRUE; CreateNewFeature (sep, NULL, SEQFEAT_REGION, new_sfp); return TRUE; } static Boolean ConvertRNAToRNA (SeqFeatPtr sfp, Int4 featdef_to, ConvertFeatureDstOptionsPtr dst_options) { RnaRefPtr rrp; Boolean add_to_comment = FALSE; CharPtr product; rrp = (RnaRefPtr) sfp->data.value.ptrvalue; if (NULL == rrp) { return FALSE; } product = GetRNAProductString (sfp, NULL); RemoveRNAProductString (sfp, NULL); sfp->data.value.ptrvalue = RnaRefFree (sfp->data.value.ptrvalue); sfp->data.value.ptrvalue = RnaRefFromLabel (featdef_to, product, &add_to_comment); SetRNAProductString (sfp, NULL, product, ExistingTextOption_replace_old); if (add_to_comment) { SetStringValue (&(sfp->comment), product, ExistingTextOption_append_semi); } product = MemFree (product); /* apply destination options */ ApplyRNADestinationOptions (sfp, featdef_to, dst_options); sfp->idx.subtype = 0; return TRUE; } static Boolean MiscFeatToCodingRegionConvertFunc (SeqFeatPtr sfp, Int4 featdef_to, ConvertFeatureDstOptionsPtr dst_options) { return ConvertMiscFeatToCodingRegion (sfp); } static Boolean mRNAToCodingRegionConvertFunc (SeqFeatPtr sfp, Int4 featdef_to, ConvertFeatureDstOptionsPtr dst_options) { return ConvertmRNAToCodingRegion (sfp); } static Boolean tRNAToGeneConvertFunc(SeqFeatPtr sfp, Int4 featdef_to, ConvertFeatureDstOptionsPtr dst_options) { return ConverttRNAToGene (sfp); } static Boolean MiscFeatToGeneConvertFunc (SeqFeatPtr sfp, Int4 featdef_to, ConvertFeatureDstOptionsPtr dst_options) { return ConvertMiscFeatToGene (sfp); } typedef struct convertfeattable { Uint2 seqfeat_from; Uint2 featdef_from; Uint2 seqfeat_to; Uint2 featdef_to; ConvertFeatureFunc func; CharPtr help_text; } ConvertFeatTableData, PNTR ConvertFeatTablePtr; static ConvertFeatTableData conversion_functions[] = { { SEQFEAT_CDREGION, FEATDEF_CDS, SEQFEAT_RNA, FEATDEF_ANY, ConvertCDSToRNAFunc, "Delete protein product sequence.\nClear product field if transcript ID removal was requested.\nIf converting to tRNA and anticodon value can be parsed from label, set aa value, and add any text that could not be parsed into an anticodon value to the feature note.\nIf converting to other RNA, put label in RNA product." }, { SEQFEAT_GENE, FEATDEF_GENE, SEQFEAT_RNA, FEATDEF_ANY, ConvertGeneToRNAFunc, "If converting to tRNA and anticodon value can be parsed from label, set aa value, and add any text that could not be parsed into an anticodon value to the feature note. If converting to other RNA, put label in RNA product. Also append gene locus, allele, description, map location, and locus tag to comment (as long as these values are not already in the label and therefore in the RNA product)." }, { SEQFEAT_BIOSRC, FEATDEF_BIOSRC, SEQFEAT_IMP, FEATDEF_repeat_region, ConvertBioSrcToRegionFunc, "Creates a repeat_region with mobile_element qualifiers for the transposon and/or insertion sequence qualifiers on the BioSource. All other BioSource information is discarded." }, { SEQFEAT_CDREGION, FEATDEF_CDS, SEQFEAT_IMP, FEATDEF_misc_feature, ConvertCDSToMiscFeatFunc, "Copy comment from coding region to new misc_feature and remove product field. If not pseudo coding region, add product name from protein feature to new misc_feature comment and delete product sequence." }, { SEQFEAT_IMP, FEATDEF_ANY, SEQFEAT_PROT, FEATDEF_ANY, ConvertImpToProtFuncEx, "Original feature must be on nucleotide sequence and be contained in coding region location. Coding region must have product protein sequence. New feature is created on product protein sequence so that the translated location will be as close as possible to the original nucleotide location (may not be exact because of codon boundaries)." }, { SEQFEAT_PROT, FEATDEF_mat_peptide_aa, SEQFEAT_IMP, FEATDEF_ANY, ConvertProtToImpFuncEx, "Original feature must be on a protein sequence that is a product of a coding region.\nNew feature will be created on same sequence as coding region.\n" "If protein feature has name, this will be saved as /product qualifier on new feature.\nIf protein feature does not have name but does have description, this will be saved as /product qualifier on new feature.\n" "EC_number values from the protein feature will be saved as /EC_number qualifiers on the new feature.\nActivity values will be saved as /function qualifiers on the new feature.\n" "Db_xref values from the protein feature will be saved as /db_xref qualifers on the new feature." }, { SEQFEAT_PROT, FEATDEF_sig_peptide_aa, SEQFEAT_IMP, FEATDEF_ANY, ConvertProtToImpFuncEx, "Original feature must be on a protein sequence that is a product of a coding region.\nNew feature will be created on same sequence as coding region.\n" "If protein feature has name, this will be saved as /product qualifier on new feature.\nIf protein feature does not have name but does have description, this will be saved as /product qualifier on new feature.\n" "EC_number values from the protein feature will be saved as /EC_number qualifiers on the new feature.\nActivity values will be saved as /function qualifiers on the new feature.\n" "Db_xref values from the protein feature will be saved as /db_xref qualifers on the new feature." }, { SEQFEAT_PROT, FEATDEF_transit_peptide_aa, SEQFEAT_IMP, FEATDEF_ANY, ConvertProtToImpFuncEx, "Original feature must be on a protein sequence that is a product of a coding region.\nNew feature will be created on same sequence as coding region.\n" "If protein feature has name, this will be saved as /product qualifier on new feature.\nIf protein feature does not have name but does have description, this will be saved as /product qualifier on new feature.\n" "EC_number values from the protein feature will be saved as /EC_number qualifiers on the new feature.\nActivity values will be saved as /function qualifiers on the new feature.\n" "Db_xref values from the protein feature will be saved as /db_xref qualifers on the new feature." }, { SEQFEAT_IMP, FEATDEF_ANY, SEQFEAT_RNA, FEATDEF_ANY, ConvertImpToRNAFunc, "Creates an RNA feature of the specified subtype. Import feature key is discarded." }, { SEQFEAT_IMP, FEATDEF_misc_feature, SEQFEAT_CDREGION, FEATDEF_CDS, MiscFeatToCodingRegionConvertFunc, "Use misc_feature comment for coding region product name." }, { SEQFEAT_IMP, FEATDEF_misc_feature, SEQFEAT_GENE, FEATDEF_GENE, MiscFeatToGeneConvertFunc, "Creates gene with locus value from misc_feature comment." }, { SEQFEAT_REGION, FEATDEF_REGION, SEQFEAT_IMP, FEATDEF_ANY, ConvertRegionToImp, "Creates a misc_feature with the region name saved as a /note qualifier." }, { SEQFEAT_REGION, FEATDEF_REGION, SEQFEAT_RNA, FEATDEF_ANY, ConvertRegionToRNA, "Creates an RNA feature with the region name as the product name." }, { SEQFEAT_COMMENT, FEATDEF_ANY, SEQFEAT_IMP, FEATDEF_misc_feature, ConvertCommentToMiscFeat, "Creates a misc_feature with the same note as the original. Note - the flatfile display for the feature is the same." }, { SEQFEAT_GENE, FEATDEF_GENE, SEQFEAT_IMP, FEATDEF_ANY, ConvertGeneToImpFeat, "Creates an import feature with the gene description and locus prepended to the original comment, separated by semicolons." }, { SEQFEAT_RNA, FEATDEF_ANY, SEQFEAT_IMP, FEATDEF_ANY, ConvertRNAToImpFeatEx, "Creates an import feature of the specified subtype and adds the RNA product name to the comment." } , { SEQFEAT_RNA, FEATDEF_mRNA, SEQFEAT_CDREGION, FEATDEF_CDS, mRNAToCodingRegionConvertFunc, "Convert mRNA to coding region, use mRNA product for protein feature" }, { SEQFEAT_RNA, FEATDEF_tRNA, SEQFEAT_GENE, FEATDEF_GENE, tRNAToGeneConvertFunc, "Convert tRNA to gene, use tRNA product for gene description" }, { SEQFEAT_SITE, FEATDEF_ANY, SEQFEAT_IMP, FEATDEF_ANY, ConvertSiteToImpFeat, "Creates an import feature of the specified subtype with the site type name as a /note qualifier." } , { SEQFEAT_PROT, FEATDEF_mat_peptide_aa, SEQFEAT_REGION, FEATDEF_REGION, NULL, "Creates a Region feature with the protein name as the region name." }, { SEQFEAT_PROT, FEATDEF_ANY, SEQFEAT_REGION, FEATDEF_REGION, ConvertProtToRegion, "Creates a Region feature with the protein name as the region name." }, { SEQFEAT_REGION, FEATDEF_REGION, SEQFEAT_PROT, FEATDEF_ANY, ConvertRegionToProt, "If feature is on nucleotide sequence, will create feature on protein product sequence for overlapping coding region. Protein name will be region name." }, { 0, FEATDEF_ANY, SEQFEAT_BOND, FEATDEF_BOND, ConvertToBond, "Create Bond feature with specified bond type. Location is a SeqLocBond with a point at the start of the original location and a point at the end of the original location. All feature ID, partialness, except, comment, product, location, genbank qualifiers, title, citation, experimental evidence, gene xrefs, db xrefs, and pseudo-ness information is discarded." }, { 0, FEATDEF_ANY, SEQFEAT_SITE, FEATDEF_SITE, ConvertToSite, "Create Site feature with specified site type. All feature ID, partialness, except, comment, product, location, genbank qualifiers, title, citation, experimental evidence, gene xrefs, db xrefs, and pseudo-ness information is discarded." }, { 0, FEATDEF_ANY, SEQFEAT_REGION, FEATDEF_REGION, ConvertToRegion, "Create Region feature on nucleotide sequence or protein product sequence of overlapping coding region as specified. Use comment on feature for region name.\n" "All feature ID, partialness, except, comment, product, location, genbank qualifiers, title, citation, experimental evidence, gene xrefs, db xrefs, and pseudo-ness information is discarded." }, { SEQFEAT_IMP, FEATDEF_ANY, SEQFEAT_IMP, FEATDEF_ANY, ConvertImpToImp, "Changes type of import feature." }, { SEQFEAT_RNA, FEATDEF_ANY, SEQFEAT_RNA, FEATDEF_ANY, ConvertRNAToRNA, "Changes type of RNA feature." }, { SEQFEAT_RNA, FEATDEF_ncRNA, SEQFEAT_IMP, FEATDEF_misc_binding, ConvertncRNAToMiscBinding, "Changes ncRNA to misc_binding." }, { SEQFEAT_PROT, FEATDEF_ANY, SEQFEAT_PROT, FEATDEF_ANY, ConvertProtToProt, "Changes type of protein feature." }, { SEQFEAT_CDREGION, FEATDEF_CDS, SEQFEAT_PROT, FEATDEF_mat_peptide_aa, ConvertCDSToMatPeptide, "If coding region is overlapped by another coding region, will convert the coding region to a mat-peptide on the overlapping coding region's protein sequence, otherwise if you have checked \"Leave Original Feature\" it will create a mat-peptide with the same protein names and description on the protein sequence for the coding region." } }; static Int4 num_convert_feature_table_lines = sizeof (conversion_functions) / sizeof (ConvertFeatTableData); static Int4 GetConversionFunctionTableLine (Uint2 seqfeat_from, Uint2 featdef_from, Uint2 seqfeat_to, Uint2 featdef_to) { Int4 i, table_line_num = -1; for (i = 0; i < num_convert_feature_table_lines && table_line_num == -1; i++) { if ((conversion_functions[i].seqfeat_from == 0 || conversion_functions[i].seqfeat_from == seqfeat_from) && (conversion_functions[i].featdef_from == FEATDEF_ANY || conversion_functions[i].featdef_from == featdef_from) && (conversion_functions[i].seqfeat_to == 0 || conversion_functions[i].seqfeat_to == seqfeat_to) && (conversion_functions[i].featdef_to == FEATDEF_ANY || conversion_functions[i].featdef_to == featdef_to)) { table_line_num = i; } } return table_line_num; } NLM_EXTERN Boolean IsConversionSupported (Uint2 type_from, Uint2 type_to) { Int4 line; Uint2 featdef_from, featdef_to, seqfeat_from, seqfeat_to; featdef_from = GetFeatdefFromFeatureType (type_from); seqfeat_from = FindFeatFromFeatDefType (featdef_from); featdef_to = GetFeatdefFromFeatureType (type_to); seqfeat_to = FindFeatFromFeatDefType (featdef_to); line = GetConversionFunctionTableLine (seqfeat_from, featdef_from, seqfeat_to, featdef_to); if (line > -1 && conversion_functions[line].func != NULL) { return TRUE; } else { return FALSE; } } static CharPtr GetFeatureTextForLogging (SeqFeatPtr sfp) { ValNode vn; Int4 len; CharPtr txt = NULL; MemSet (&vn, 0, sizeof (ValNode)); vn.choice = OBJ_SEQFEAT; vn.data.ptrvalue = sfp; txt = GetDiscrepancyItemText (&vn); if (txt == NULL) { txt = StringSave ("(null)"); } else { len = StringLen (txt); if (len > 0 && txt[len - 1] == '\n') { txt[len - 1] = 0; } } return txt; } static Int4 ApplyConvertFeatureActionToSeqEntry (ConvertFeatureActionPtr action, SeqEntryPtr sep, FILE *log_fp) { ConvertAndRemoveFeatureCollectionData d; ValNodePtr vnp; SeqFeatPtr sfp, sfp_copy; Int4 num_affected = 0, table_line; Uint2 seqfeat_from, featdef_from, seqfeat_to, featdef_to; /* variables for logging */ CharPtr txt_old, txt_new; if (action == NULL) return 0; featdef_from = GetFeatdefFromFeatureType (action->type_from); seqfeat_from = FindFeatFromFeatDefType(featdef_from); featdef_to = GetFeatdefFromFeatureType (action->type_to); seqfeat_to = FindFeatFromFeatDefType (featdef_to); table_line = GetConversionFunctionTableLine (seqfeat_from, featdef_from, seqfeat_to, featdef_to); if (table_line < 0 || conversion_functions[table_line].func == NULL) { return 0; } d.featdef = GetFeatdefFromFeatureType (action->type_from); d.constraint_set = action->src_feat_constraint; d.feature_list = NULL; VisitFeaturesInSep (sep, &d, ConvertAndRemoveFeatureCollectionCallback); if (d.feature_list == NULL) { return 0; } for (vnp = d.feature_list; vnp != NULL; vnp = vnp->next) { sfp = vnp->data.ptrvalue; if (sfp != NULL) { sfp_copy = (SeqFeatPtr) AsnIoMemCopy (sfp, (AsnReadFunc) SeqFeatAsnRead, (AsnWriteFunc) SeqFeatAsnWrite); /* add subtype value to copy */ sfp_copy->idx.subtype = sfp->idx.subtype; sfp_copy->next = sfp->next; sfp->next = sfp_copy; if (conversion_functions[table_line].func (sfp_copy, featdef_to, action->dst_options)) { ApplyConvertFeatureSrcOptions (sfp_copy, action->src_options, action->leave_original); num_affected ++; if (!action->leave_original) { sfp->idx.deleteme = TRUE; } if (log_fp != NULL) { txt_old = GetFeatureTextForLogging (sfp); txt_new = GetFeatureTextForLogging (sfp_copy); if (action->leave_original) { fprintf (log_fp, "Added new feature %s based on %s\n", txt_new, txt_old); } else { fprintf (log_fp, "Replaced feature %s with %s\n", txt_old, txt_new); } txt_old = MemFree (txt_old); txt_new = MemFree (txt_new); } sfp_copy->idx.subtype = 0; } else { sfp_copy->idx.deleteme = TRUE; } } } DeleteMarkedObjects (ObjMgrGetEntityIDForChoice(sep), 0, NULL); RenormalizeNucProtSets (sep, TRUE); return num_affected; } /* Functions for editing feature locations */ static Boolean DoesStrandMatch (Int4 strand_choice, Uint1 strand_val) { Boolean rval = FALSE; switch (strand_choice) { case Feature_location_strand_from_any: rval = TRUE; break; case Feature_location_strand_from_unknown: if (strand_val == Seq_strand_unknown) { rval = TRUE; } break; case Feature_location_strand_from_plus: if (strand_val != Seq_strand_minus) { rval = TRUE; } break; case Feature_location_strand_from_minus: if (strand_val == Seq_strand_minus) { rval = TRUE; } break; case Feature_location_strand_from_both: if (strand_val == Seq_strand_both) { rval = TRUE; } break; } return rval; } static Uint1 GetNewStrandValue (Int4 strand_choice, Uint1 strand_val) { Uint1 rval = Seq_strand_unknown; switch (strand_choice) { case Feature_location_strand_to_reverse: switch (strand_val) { case Seq_strand_plus: case Seq_strand_unknown: rval = Seq_strand_minus; break; case Seq_strand_minus: rval = Seq_strand_plus; break; default: rval = strand_val; break; } break; case Feature_location_strand_to_unknown: rval = Seq_strand_unknown; break; case Feature_location_strand_to_plus: rval = Seq_strand_plus; break; case Feature_location_strand_to_minus: rval = Seq_strand_minus; break; case Feature_location_strand_to_both: rval = Seq_strand_both; break; } return rval; } static Boolean ConvertLocationStrand (SeqLocPtr slp, Int4 fromStrand, Int4 toStrand) { SeqLocPtr loc; PackSeqPntPtr psp; SeqBondPtr sbp; SeqIntPtr sinp; SeqPntPtr spp; Boolean rval = FALSE; Uint1 strand_orig; while (slp != NULL) { switch (slp->choice) { case SEQLOC_NULL : break; case SEQLOC_EMPTY : case SEQLOC_WHOLE : break; case SEQLOC_INT : sinp = (SeqIntPtr) slp->data.ptrvalue; if (sinp != NULL && DoesStrandMatch (fromStrand, sinp->strand)) { strand_orig = sinp->strand; sinp->strand = GetNewStrandValue (toStrand, sinp->strand); if (strand_orig != sinp->strand) { rval = TRUE; } } break; case SEQLOC_PNT : spp = (SeqPntPtr) slp->data.ptrvalue; if (spp != NULL && DoesStrandMatch (fromStrand, spp->strand)) { strand_orig = spp->strand; spp->strand = GetNewStrandValue (toStrand, spp->strand); if (strand_orig != spp->strand) { rval = TRUE; } } break; case SEQLOC_PACKED_PNT : psp = (PackSeqPntPtr) slp->data.ptrvalue; if (psp != NULL && DoesStrandMatch (fromStrand, psp->strand)) { strand_orig = psp->strand; psp->strand = GetNewStrandValue (toStrand, psp->strand); if (strand_orig != psp->strand) { rval = TRUE; } } break; case SEQLOC_PACKED_INT : case SEQLOC_MIX : case SEQLOC_EQUIV : loc = (SeqLocPtr) slp->data.ptrvalue; while (loc != NULL) { rval |= ConvertLocationStrand (loc, fromStrand, toStrand); loc = loc->next; } break; case SEQLOC_BOND : sbp = (SeqBondPtr) slp->data.ptrvalue; if (sbp != NULL) { spp = (SeqPntPtr) sbp->a; if (spp != NULL && DoesStrandMatch (fromStrand, spp->strand)) { strand_orig = spp->strand; spp->strand = GetNewStrandValue (toStrand, spp->strand); if (strand_orig != spp->strand) { rval = TRUE; } } spp = (SeqPntPtr) sbp->b; if (spp != NULL && DoesStrandMatch (fromStrand, spp->strand)) { strand_orig = spp->strand; spp->strand = GetNewStrandValue (toStrand, spp->strand); if (strand_orig != spp->strand) { rval = TRUE; } } } break; case SEQLOC_FEAT : break; default : break; } slp = slp->next; } return rval; } static Boolean ApplyEditLocationStrandToSeqFeat (EditLocationStrandPtr edit, SeqFeatPtr sfp) { Boolean rval = FALSE; if (edit == NULL || sfp == NULL) { return FALSE; } rval = ConvertLocationStrand (sfp->location, edit->strand_from, edit->strand_to); return rval; } static Boolean At5EndOfSequence (SeqLocPtr slp, BioseqPtr bsp) { Uint1 strand; Int4 start; Boolean at_end = FALSE; if (slp == NULL || bsp == NULL) return FALSE; strand = SeqLocStrand (slp); if (strand == Seq_strand_minus) { start = SeqLocStop (slp); if (start == bsp->length - 1) { at_end = TRUE; } } else { start = SeqLocStart (slp); if (start == 0) { at_end = TRUE; } } return at_end; } static Boolean HasGoodStartCodon (SeqFeatPtr sfp) { ByteStorePtr bs; CharPtr prot; Boolean has_start = FALSE; if (sfp != NULL && sfp->data.choice == SEQFEAT_CDREGION) { bs = ProteinFromCdRegionEx (sfp, TRUE, FALSE); if (bs != NULL) { prot = BSMerge (bs, NULL); bs = BSFree (bs); if (prot != NULL && *prot == 'M') { has_start = TRUE; } prot = MemFree (prot); } } return has_start; } static Boolean ApplyPartial5SetActionToSeqFeat (Partial5SetActionPtr action, SeqFeatPtr sfp) { Boolean rval = FALSE; Boolean make_partial = FALSE; Uint1 strand; BioseqPtr bsp; CdRegionPtr crp; Boolean partial5, partial3; if (action == NULL || sfp == NULL) return FALSE; bsp = BioseqFindFromSeqLoc (sfp->location); strand = SeqLocStrand (sfp->location); switch (action->constraint) { case Partial_5_set_constraint_all: make_partial = TRUE; break; case Partial_5_set_constraint_at_end: make_partial = At5EndOfSequence (sfp->location, bsp); break; case Partial_5_set_constraint_bad_start: make_partial = HasGoodStartCodon (sfp); break; case Partial_5_set_constraint_frame_not_one: if (sfp->data.choice == SEQFEAT_CDREGION && (crp = sfp->data.value.ptrvalue) != NULL && crp->frame != 0 && crp->frame != 1) { make_partial = TRUE; } break; } if (make_partial) { CheckSeqLocForPartial (sfp->location, &partial5, &partial3); if (!partial5) { SetSeqLocPartial (sfp->location, TRUE, partial3); if (action->extend && bsp != NULL) { if (ExtendSeqLocToEnd (sfp->location, bsp, TRUE)) { ChooseBestFrame (sfp); } } rval = TRUE; } } return rval; } static Boolean ApplyClear5PartialToSeqFeat (Int4 action, SeqFeatPtr sfp) { Boolean rval = FALSE, clear_partial = FALSE; Boolean partial5, partial3; if (sfp == NULL) return FALSE; switch (action) { case Partial_5_clear_constraint_all: clear_partial = TRUE; break; case Partial_5_clear_constraint_not_at_end: clear_partial = !At5EndOfSequence(sfp->location, BioseqFindFromSeqLoc (sfp->location)); break; case Partial_5_clear_constraint_good_start: clear_partial = !HasGoodStartCodon(sfp); break; } if (clear_partial) { CheckSeqLocForPartial (sfp->location, &partial5, &partial3); if (partial5) { SetSeqLocPartial (sfp->location, FALSE, partial3); rval = TRUE; } } return rval; } static Boolean At3EndOfSequence (SeqLocPtr slp, BioseqPtr bsp) { Uint1 strand; Int4 stop; Boolean at_end = FALSE; if (slp == NULL || bsp == NULL) return FALSE; strand = SeqLocStrand (slp); if (strand == Seq_strand_minus) { stop = SeqLocStart (slp); if (stop == 0) { at_end = TRUE; } } else { stop = SeqLocStop (slp); if (stop == bsp->length - 1) { at_end = TRUE; } } return at_end; } static Boolean HasGoodStopCodon (SeqFeatPtr sfp) { ByteStorePtr bs; CharPtr prot; Boolean has_stop = FALSE; if (sfp != NULL && sfp->data.choice == SEQFEAT_CDREGION) { bs = ProteinFromCdRegionEx (sfp, TRUE, FALSE); if (bs != NULL) { prot = BSMerge (bs, NULL); bs = BSFree (bs); if (prot != NULL && prot[StringLen (prot) - 1] == '*') { has_stop = TRUE; } prot = MemFree (prot); } } return has_stop; } static Boolean ApplyPartial3SetActionToSeqFeat (Partial3SetActionPtr action, SeqFeatPtr sfp) { Boolean rval = FALSE; Boolean make_partial = FALSE; Uint1 strand; BioseqPtr bsp; Boolean partial5, partial3; if (action == NULL || sfp == NULL) return FALSE; bsp = BioseqFindFromSeqLoc (sfp->location); strand = SeqLocStrand (sfp->location); switch (action->constraint) { case Partial_3_set_constraint_all: make_partial = TRUE; break; case Partial_3_set_constraint_at_end: make_partial = At3EndOfSequence (sfp->location, bsp); break; case Partial_3_set_constraint_bad_end: make_partial = HasGoodStopCodon (sfp); break; } if (make_partial) { CheckSeqLocForPartial (sfp->location, &partial5, &partial3); if (!partial3) { SetSeqLocPartial (sfp->location, partial5, TRUE); if (action->extend && bsp != NULL) { ExtendSeqLocToEnd (sfp->location, bsp, FALSE); } rval = TRUE; } } return rval; } static Boolean ApplyClear3PartialToSeqFeat (Int4 action, SeqFeatPtr sfp) { Boolean rval = FALSE, clear_partial = FALSE; Boolean partial5, partial3; if (sfp == NULL) return FALSE; switch (action) { case Partial_3_clear_constraint_all: clear_partial = TRUE; break; case Partial_3_clear_constraint_not_at_end: clear_partial = !At3EndOfSequence(sfp->location, BioseqFindFromSeqLoc (sfp->location)); break; case Partial_3_clear_constraint_good_end: clear_partial = HasGoodStopCodon(sfp); break; } if (clear_partial) { CheckSeqLocForPartial (sfp->location, &partial5, &partial3); if (partial3) { SetSeqLocPartial (sfp->location, partial5, FALSE); rval = TRUE; } } return rval; } static Boolean ApplyPartialBothSetActionToSeqFeat (PartialBothSetActionPtr action, SeqFeatPtr sfp) { Boolean rval = FALSE; Boolean make_partial = FALSE; Uint1 strand; BioseqPtr bsp; Boolean partial5, partial3; if (action == NULL || sfp == NULL) return FALSE; bsp = BioseqFindFromSeqLoc (sfp->location); strand = SeqLocStrand (sfp->location); switch (action->constraint) { case Partial_both_set_constraint_all: make_partial = TRUE; break; case Partial_both_set_constraint_at_end: make_partial = At5EndOfSequence (sfp->location, bsp) && At3EndOfSequence (sfp->location, bsp); break; } if (make_partial) { CheckSeqLocForPartial (sfp->location, &partial5, &partial3); if (!partial5 || !partial3) { SetSeqLocPartial (sfp->location, TRUE, TRUE); if (action->extend && bsp != NULL) { ExtendSeqLocToEnd (sfp->location, bsp, FALSE); if (ExtendSeqLocToEnd (sfp->location, bsp, TRUE)) { ChooseBestFrame (sfp); } } rval = TRUE; } } return rval; } static Boolean ApplyClearBothPartialToSeqFeat (Int4 action, SeqFeatPtr sfp) { Boolean rval = FALSE, clear_partial = FALSE; Boolean partial5, partial3; BioseqPtr bsp; if (sfp == NULL) return FALSE; switch (action) { case Partial_both_clear_constraint_all: clear_partial = TRUE; break; case Partial_both_clear_constraint_not_at_end: bsp = BioseqFindFromSeqLoc (sfp->location); clear_partial = !At5EndOfSequence (sfp->location, bsp) && !At3EndOfSequence(sfp->location, bsp); break; case Partial_3_clear_constraint_good_end: clear_partial = !HasGoodStopCodon(sfp); break; } if (clear_partial) { CheckSeqLocForPartial (sfp->location, &partial5, &partial3); if (partial5 || partial3) { SetSeqLocPartial (sfp->location, FALSE, FALSE); rval = TRUE; } } return rval; } static Boolean ApplyConvertLocationToSeqFeat (Int4 convert_location, SeqFeatPtr sfp) { Boolean hasNulls, rval = FALSE; SeqLocPtr slp; BioseqPtr bsp; Boolean partial5, partial3; if (sfp == NULL || (bsp = BioseqFindFromSeqLoc (sfp->location))== NULL) { return FALSE; } CheckSeqLocForPartial (sfp->location, &partial5, &partial3); hasNulls = LocationHasNullsBetween (sfp->location); switch (convert_location) { case Convert_location_type_join : if (hasNulls) { slp = SeqLocMerge (bsp, sfp->location, NULL, FALSE, FALSE, FALSE); sfp->location = SeqLocFree (sfp->location); sfp->location = slp; if (bsp->repr == Seq_repr_seg) { slp = SegLocToPartsEx (bsp, sfp->location, FALSE); sfp->location = SeqLocFree (sfp->location); sfp->location = slp; hasNulls = LocationHasNullsBetween (sfp->location); sfp->partial = (sfp->partial || hasNulls); } FreeAllFuzz (sfp->location); SetSeqLocPartial (sfp->location, partial5, partial3); rval = TRUE; } break; case Convert_location_type_order : if (!hasNulls) { slp = SeqLocMerge (bsp, sfp->location, NULL, FALSE, FALSE, TRUE); sfp->location = SeqLocFree (sfp->location); sfp->location = slp; if (bsp->repr == Seq_repr_seg) { slp = SegLocToPartsEx (bsp, sfp->location, TRUE); sfp->location = SeqLocFree (sfp->location); sfp->location = slp; hasNulls = LocationHasNullsBetween (sfp->location); sfp->partial = (sfp->partial || hasNulls); } FreeAllFuzz (sfp->location); SetSeqLocPartial (sfp->location, partial5, partial3); rval = TRUE; } break; case Convert_location_type_merge : if (sfp->location->choice != SEQLOC_INT) { slp = SeqLocMerge (bsp, sfp->location, NULL, TRUE, FALSE, FALSE); sfp->location = SeqLocFree (sfp->location); sfp->location = slp; SetSeqLocPartial (sfp->location, partial5, partial3); rval = TRUE; } default: break; } return rval; } static Boolean ExtendSeqFeat5 (SeqFeatPtr sfp) { BioseqPtr bsp; CdRegionPtr crp; Int4 start_diff; Boolean partial5, partial3; if (sfp == NULL || (bsp = BioseqFindFromSeqLoc (sfp->location)) == NULL) { return FALSE; } if ((start_diff = ExtendSeqLocToEnd (sfp->location, bsp, TRUE)) > 0) { if (sfp->data.choice == SEQFEAT_CDREGION) { CheckSeqLocForPartial (sfp->location, &partial5, &partial3); if (partial5) { crp = (CdRegionPtr) sfp->data.value.ptrvalue; if (crp != NULL) { if (crp->frame == 0) { crp->frame = 1; } crp->frame = (crp->frame + start_diff - 1) % 3 + 1; } } } return TRUE; } else { return FALSE; } } static Boolean ExtendSeqFeat3 (SeqFeatPtr sfp) { BioseqPtr bsp; Uint1 strand; Int4 stop_before, stop_after; if (sfp == NULL || (bsp = BioseqFindFromSeqLoc (sfp->location)) == NULL) { return FALSE; } strand = SeqLocStrand (sfp->location); if (strand == Seq_strand_minus) { stop_before = SeqLocStart (sfp->location); } else { stop_before = SeqLocStop (sfp->location); } ExtendSeqLocToEnd (sfp->location, bsp, FALSE); if (strand == Seq_strand_minus) { stop_after = SeqLocStart (sfp->location); } else { stop_after = SeqLocStop (sfp->location); } if (stop_before == stop_after) { return FALSE; } else { return TRUE; } } static Int4 ExtendSeqInt5ToPos (SeqIntPtr sint, Int4 pos) { Int4 distance = 0; if (sint == NULL) { return FALSE; } if (sint->strand == Seq_strand_minus) { distance = pos - sint->to; sint->to = pos; } else { distance = sint->from - pos; sint->from = pos; } return distance; } static Int4 ExtendSeqInt3ToPos (SeqIntPtr sint, Int4 pos) { Int4 distance = 0; if (sint == NULL) { return FALSE; } if (sint->strand == Seq_strand_minus) { distance = sint->from - pos; sint->from = pos; } else { distance = pos - sint->to; sint->to = pos; } return distance; } static Int4 ExtendSeqLocToPos (SeqLocPtr slp, Int4 pos, Boolean end5) { Int4 diff = 0; SeqLocPtr slp_index; if (slp == NULL) return 0; switch (slp->choice) { case SEQLOC_INT: if (end5) { diff = ExtendSeqInt5ToPos (slp->data.ptrvalue, pos); } else { diff = ExtendSeqInt3ToPos (slp->data.ptrvalue, pos); } break; case SEQLOC_MIX: case SEQLOC_PACKED_INT: if (end5) { /* take the first one */ diff = ExtendSeqLocToPos (slp->data.ptrvalue, pos, end5); } else { /* take the last one */ for (slp_index = slp->data.ptrvalue; slp_index != NULL && slp_index->next != NULL; slp_index = slp_index->next) { } if (slp_index != NULL) { diff = ExtendSeqLocToPos (slp_index, pos, end5); } } break; } return diff; } static Boolean s_StrandsMatch (Uint1 strand1, Uint1 strand2) { Boolean rval = FALSE; if (strand1 == Seq_strand_minus) { if (strand2 == Seq_strand_minus) { rval = TRUE; } } else { if (strand2 != Seq_strand_minus) { rval = TRUE; } } return rval; } static Int4 FindPosBeforeFeat (SeqFeatPtr sfp, ExtendToFeaturePtr efp) { BioseqPtr bsp; Int4 featdef, start, stop, tmp, pos = -1; SeqMgrFeatContext context; SeqFeatPtr regulator = NULL, candidate; Uint1 strand; if (sfp == NULL || efp == NULL || (bsp = BioseqFindFromSeqLoc (sfp->location)) == NULL) { return -1; } featdef = GetFeatdefFromFeatureType (efp->type); start = SeqLocStart (sfp->location); stop = SeqLocStop (sfp->location); if (stop < start) { tmp = stop; stop = start; start = tmp; } strand = SeqLocStrand (sfp->location); candidate = SeqMgrGetNextFeature (bsp, NULL, 0, featdef, &context); /* take last match before feature */ while (candidate != NULL && (context.right < start || (efp->include_feat && context.left < start))) { if (s_StrandsMatch(strand, context.strand) && DoesValueMatchQuantityConstraint (start - context.right, efp->distance)) { regulator = candidate; if (efp->include_feat) { pos = context.left; } else { pos = context.right + 1; } } candidate = SeqMgrGetNextFeature (bsp, candidate, 0, featdef, &context); } return pos; } static Int4 FindPosAfterFeat (SeqFeatPtr sfp, ExtendToFeaturePtr efp) { BioseqPtr bsp; Int4 featdef, start, stop, tmp, pos = -1; SeqMgrFeatContext context; SeqFeatPtr regulator = NULL, candidate; Uint1 strand; if (sfp == NULL || efp == NULL || (bsp = BioseqFindFromSeqLoc (sfp->location)) == NULL) { return -1; } featdef = GetFeatdefFromFeatureType (efp->type); start = SeqLocStart (sfp->location); stop = SeqLocStop (sfp->location); if (stop < start) { tmp = stop; stop = start; start = tmp; } strand = SeqLocStrand (sfp->location); candidate = SeqMgrGetNextFeature (bsp, NULL, 0, featdef, &context); /* take first match after feature */ while (candidate != NULL && (context.left < stop || (efp->include_feat && context.right < stop) || !s_StrandsMatch(strand, context.strand))) { candidate = SeqMgrGetNextFeature (bsp, candidate, 0, featdef, &context); } while (candidate != NULL && regulator == NULL) { if (DoesValueMatchQuantityConstraint (context.left - stop, efp->distance) && s_StrandsMatch(strand, context.strand)) { regulator = candidate; if (efp->include_feat) { pos = context.right; } else { pos = context.left - 1; } } else { candidate = SeqMgrGetNextFeature (bsp, candidate, 0, featdef, &context); } } return pos; } static Boolean ExtendSeqFeatToFeat (SeqFeatPtr sfp, ExtendToFeaturePtr efp, Boolean end5) { Int4 pos = -1; Uint2 strand; CdRegionPtr crp; Int4 start_diff; Boolean partial5, partial3; if (sfp == NULL) { return FALSE; } strand = SeqLocStrand (sfp->location); if (end5) { if (strand == Seq_strand_minus) { pos = FindPosAfterFeat (sfp, efp); } else { pos = FindPosBeforeFeat (sfp, efp); } } else { if (strand == Seq_strand_minus) { pos = FindPosBeforeFeat (sfp, efp); } else { pos = FindPosAfterFeat (sfp, efp); } } if (pos > -1 && (start_diff = ExtendSeqLocToPos (sfp->location, pos, end5)) > 0) { if (end5 && sfp->data.choice == SEQFEAT_CDREGION) { CheckSeqLocForPartial (sfp->location, &partial5, &partial3); if (partial5) { crp = (CdRegionPtr) sfp->data.value.ptrvalue; if (crp != NULL) { if (crp->frame == 0) { crp->frame = 1; } crp->frame = (crp->frame + start_diff - 1) % 3 + 1; } } } return TRUE; } else { return FALSE; } } static Boolean ApplyLocationEditTypeToSeqFeat (ValNodePtr action, SeqFeatPtr sfp) { Boolean rval = FALSE; if (action == NULL || sfp == NULL) { return FALSE; } switch (action->choice) { case LocationEditType_strand: rval = ApplyEditLocationStrandToSeqFeat (action->data.ptrvalue, sfp); break; case LocationEditType_set_5_partial: rval = ApplyPartial5SetActionToSeqFeat (action->data.ptrvalue, sfp); break; case LocationEditType_clear_5_partial: rval = ApplyClear5PartialToSeqFeat (action->data.intvalue, sfp); break; case LocationEditType_set_3_partial: rval = ApplyPartial3SetActionToSeqFeat (action->data.ptrvalue, sfp); break; case LocationEditType_clear_3_partial: rval = ApplyClear3PartialToSeqFeat (action->data.intvalue, sfp); break; case LocationEditType_set_both_partial: rval = ApplyPartialBothSetActionToSeqFeat (action->data.ptrvalue, sfp); break; case LocationEditType_clear_both_partial: rval = ApplyClearBothPartialToSeqFeat (action->data.intvalue, sfp); break; case LocationEditType_convert: rval = ApplyConvertLocationToSeqFeat (action->data.intvalue, sfp); break; case LocationEditType_extend_5: rval = ExtendSeqFeat5 (sfp); break; case LocationEditType_extend_3: rval = ExtendSeqFeat3 (sfp); break; case LocationEditType_extend_5_to_feat: rval = ExtendSeqFeatToFeat (sfp, action->data.ptrvalue, TRUE); break; case LocationEditType_extend_3_to_feat: rval = ExtendSeqFeatToFeat (sfp, action->data.ptrvalue, FALSE); break; } return rval; } static Int4 ApplyEditFeatureLocationActionToSeqEntry (EditFeatureLocationActionPtr action, SeqEntryPtr sep, FILE *log_fp) { ConvertAndRemoveFeatureCollectionData d; ValNodePtr vnp; SeqFeatPtr sfp, gene; Int4 num_affected = 0; /* variables for logging */ CharPtr old_loc = NULL, new_loc; Boolean retranslated, adjusted_gene; if (action == NULL) return 0; d.featdef = GetFeatdefFromFeatureType (action->type); d.constraint_set = action->constraint; d.feature_list = NULL; VisitFeaturesInSep (sep, &d, ConvertAndRemoveFeatureCollectionCallback); for (vnp = d.feature_list; vnp != NULL; vnp = vnp->next) { sfp = vnp->data.ptrvalue; if (sfp != NULL) { if (log_fp != NULL) { old_loc = SeqLocPrintUseBestID (sfp->location); } if (sfp->data.choice != SEQFEAT_GENE && action->also_edit_gene) { gene = GetGeneForFeature (sfp); } else { gene = NULL; } if (ApplyLocationEditTypeToSeqFeat (action->action, sfp)) { adjusted_gene = FALSE; if (gene != NULL && ApplyLocationEditTypeToSeqFeat (action->action, gene)) { adjusted_gene = TRUE; } retranslated = FALSE; if (sfp->data.choice == SEQFEAT_CDREGION && action->retranslate_cds) { SeqMgrIndexFeatures(sfp->idx.entityID, NULL); retranslated = RetranslateOneCDS (sfp, sfp->idx.entityID, TRUE, TRUE); } num_affected++; if (log_fp != NULL) { new_loc = SeqLocPrintUseBestID (sfp->location); fprintf (log_fp, "Changed location %s to %s%s%s\n", old_loc, new_loc, retranslated ? " and retranslated protein" : "", adjusted_gene ? " and adjusted gene location" : ""); new_loc = MemFree (new_loc); } } old_loc = MemFree (old_loc); } } return num_affected; } typedef struct molinfoblocklog { MolinfoBlockPtr mib; FILE *log_fp; Boolean any_change; } MolInfoBlockLogData, PNTR MolInfoBlockLogPtr; static void ApplyMolinfoBlockCallback (BioseqPtr bsp, Pointer data) { MolInfoBlockLogPtr ml; MolinfoBlockPtr mib; ValNodePtr field; MolInfoPtr mip; Char id_buf[100]; CharPtr field_name; if (bsp == NULL) { return; } ml = (MolInfoBlockLogPtr) data; if (ml == NULL || ml->mib == NULL) { return; } mib = ml->mib; if (!DoesObjectMatchConstraintChoiceSet (OBJ_BIOSEQ, bsp, mib->constraint)) { return; } mip = GetMolInfoForBioseq (bsp); for (field = mib->from_list; field != NULL; field = field->next) { switch (field->choice) { case MolinfoField_molecule: if (mip == NULL || mip->biomol != BiomolFromMoleculeType (field->data.intvalue)) { return; } break; case MolinfoField_technique: if (mip == NULL || mip->tech != TechFromTechniqueType (field->data.intvalue)) { return; } break; case MolinfoField_completedness: if (mip == NULL || mip->completeness != CompletenessFromCompletednessType (field->data.intvalue)) { return; } break; case MolinfoField_mol_class: if (bsp->mol != MolFromMoleculeClassType (field->data.intvalue)) { return; } break; case MolinfoField_topology: if (bsp->topology != TopologyFromTopologyType (field->data.intvalue)) { return; } break; case MolinfoField_strand: if (bsp->strand != StrandFromStrandType (field->data.intvalue)) { return; } break; } } for (field = mib->to_list; field != NULL; field = field->next) { if (SetSequenceQualOnBioseq (bsp, field)) { if (ml->log_fp != NULL) { SeqIdWrite (SeqIdFindBest (bsp->id, SEQID_GENBANK), id_buf, PRINTID_REPORT, sizeof (id_buf) - 1); field_name = GetSequenceQualName (field); fprintf (ml->log_fp, "Changed to %s for %s\n", field_name, id_buf); field_name = MemFree (field_name); } ml->any_change = TRUE; } } } static Boolean ApplyMolinfoBlockToSeqEntryEx (SeqEntryPtr sep, MolinfoBlockPtr mib, FILE *log_fp) { MolInfoBlockLogData md; md.any_change = FALSE; md.log_fp = log_fp; md.mib = mib; VisitBioseqsInSep (sep, &md, ApplyMolinfoBlockCallback); return md.any_change; } NLM_EXTERN void ApplyMolinfoBlockToSeqEntry (SeqEntryPtr sep, MolinfoBlockPtr mib) { ApplyMolinfoBlockToSeqEntryEx (sep, mib, NULL); } static Boolean ApplyFixPubCapsToSeqEntry (FixPubCapsActionPtr action, SeqEntryPtr sep, FILE *log_fp); static Boolean FixAuthorNamesCaps (FixAuthorCapsPtr action, SeqEntryPtr sep, FILE *log_fp); static Boolean ApplyFixCapsToSeqEntry (SeqEntryPtr sep, FixCapsActionPtr action, FILE *log_fp) { Boolean any_change = FALSE; if (sep == NULL || action == NULL) { return FALSE; } switch (action->choice) { case FixCapsAction_pub: any_change = ApplyFixPubCapsToSeqEntry (action->data.ptrvalue, sep, log_fp); break; case FixCapsAction_src_country: any_change = FixupCountryQualsWithLog (sep, FALSE, log_fp); break; case FixCapsAction_mouse_strain: any_change = FixupMouseStrains (sep, log_fp); break; case FixCapsAction_src_qual: any_change = FixSrcQualCaps (sep, action->data.intvalue, log_fp); break; case FixCapsAction_author: any_change = FixAuthorNamesCaps (action->data.ptrvalue, sep, log_fp); break; } return any_change; } static void FixCollectionDatesCallback (BioSourcePtr biop, Pointer data) { LogInfoPtr lip; SubSourcePtr ssp; CharPtr new_date; if (biop == NULL) { return; } lip = (LogInfoPtr) data; for (ssp = biop->subtype; ssp != NULL; ssp = ssp->next) { if (ssp->subtype == SUBSRC_collection_date) { new_date = ReformatDateWithMonthNames (ssp->name); if (new_date != NULL && StringCmp (new_date, ssp->name) != 0) { if (lip != NULL) { if (lip->fp != NULL) { fprintf (lip->fp, "Changed '%s' to '%s'\n", ssp->name, new_date); } lip->data_in_log = TRUE; } ssp->name = MemFree (ssp->name); ssp->name = new_date; new_date = NULL; } new_date = MemFree (new_date); } } } NLM_EXTERN SubSourcePtr FindBadLatLon (BioSourcePtr biop) { SubSourcePtr ssp, ssp_bad = NULL; Boolean format_ok, lat_in_range, lon_in_range, precision_ok; if (biop == NULL) { return NULL; } for (ssp = biop->subtype; ssp != NULL && ssp_bad == NULL; ssp = ssp->next) { if (ssp->subtype == SUBSRC_lat_lon) { IsCorrectLatLonFormat (ssp->name, &format_ok, &precision_ok, &lat_in_range, &lon_in_range); if (!format_ok || !lat_in_range || !lon_in_range) { ssp_bad = ssp; } } } return ssp_bad; } static void FindBadLatLonDesc (SeqDescrPtr sdp, Pointer userdata) { if (sdp == NULL || sdp->choice != Seq_descr_source || userdata == NULL) { return; } if (FindBadLatLon (sdp->data.ptrvalue) != NULL) { ValNodeAddPointer ((ValNodePtr PNTR) userdata, OBJ_SEQDESC, sdp); } } static void FindBadLatLonFeat (SeqFeatPtr sfp, Pointer userdata) { if (sfp == NULL || sfp->data.choice != SEQFEAT_BIOSRC || userdata == NULL) { return; } if (FindBadLatLon (sfp->data.value.ptrvalue) != NULL) { ValNodeAddPointer ((ValNodePtr PNTR) userdata, OBJ_SEQFEAT, sfp); } } NLM_EXTERN ValNodePtr FindBadLatLonObjects (SeqEntryPtr sep) { ValNodePtr list = NULL; VisitDescriptorsInSep (sep, &list, FindBadLatLonDesc); VisitFeaturesInSep (sep, &list, FindBadLatLonFeat); return list; } static void AddAltitudeToSubSourceNote (BioSourcePtr biop, CharPtr extra_text) { SubSourcePtr ssp; CharPtr new_note, new_note_fmt = "%s%saltitude:%s"; if (biop == NULL || StringHasNoText (extra_text)) { return; } ssp = biop->subtype; while (ssp != NULL && ssp->subtype != SUBSRC_other) { ssp = ssp->next; } if (ssp == NULL) { ssp = SubSourceNew (); ssp->subtype = SUBSRC_other; ssp->next = biop->subtype; biop->subtype = ssp; } new_note = (CharPtr) MemNew (sizeof (Char) * (StringLen (ssp->name) + StringLen (extra_text) + StringLen (new_note_fmt))); sprintf (new_note, new_note_fmt, ssp->name == NULL ? "" : ssp->name, ssp->name == NULL ? "" : "; ", extra_text); ssp->name = MemFree (ssp->name); ssp->name = new_note; } NLM_EXTERN Boolean LatLonAutocorrectList (FILE *fp, ValNodePtr object_list) { ValNodePtr vnp; SeqDescrPtr sdp; BioSourcePtr biop; SubSourcePtr bad_ssp; CharPtr fix, extra_text; Boolean any_change = FALSE; if (object_list == NULL) return FALSE; for (vnp = object_list; vnp != NULL; vnp = vnp->next) { if (vnp->choice != OBJ_SEQDESC) continue; sdp = vnp->data.ptrvalue; if (sdp != NULL && sdp->choice == Seq_descr_source) { biop = (BioSourcePtr) sdp->data.ptrvalue; bad_ssp = FindBadLatLon (biop); if (bad_ssp != NULL) { fix = FixLatLonFormat (bad_ssp->name); if (fix != NULL) { extra_text = StringChr (fix, ','); if (extra_text != NULL) { *extra_text = 0; extra_text++; while (isspace (*extra_text)) { extra_text++; } } if (fp != NULL) { fprintf (fp, "Corrected %s to %s\n", bad_ssp->name, fix); } bad_ssp->name = MemFree (bad_ssp->name); bad_ssp->name = fix; if (extra_text != NULL) { AddAltitudeToSubSourceNote (biop, extra_text); if (fp != NULL) { fprintf (fp, "Moved %s to subsource note\n", extra_text); } } any_change = TRUE; } else { if (fp != NULL) { fprintf (fp, "Unable to correct %s\n", bad_ssp->name); } } } } } return any_change; } static void ReplaceiInSeq (CharPtr PNTR seq, LogInfoPtr lip) { CharPtr cp, new_seq, src, dst; Int4 num_i = 0, num_extra = 0; if (seq == NULL) { return; } cp = StringISearch (*seq, "i"); while (cp != NULL) { if (cp == *seq || *(cp - 1) != '<') { num_extra++; } if (*(cp + 1) != '>') { num_extra++; } num_i++; cp = StringISearch (cp + 1, "i"); } if (num_extra != 0) { new_seq = (CharPtr) MemNew (sizeof (Char) * (StringLen (*seq) + 1 + num_extra)); src = *seq; dst = new_seq; while (*src != 0) { if (*src == 'i' || *src == 'I') { if (src == *seq || *(src - 1) != '<') { *dst = '<'; dst++; } *dst = 'i'; dst++; if (*(src + 1) != '>') { *dst = '>'; dst++; } } else { *dst = *src; dst++; } src++; } *dst = 0; if (lip != NULL) { if (lip->fp != NULL) { fprintf (lip->fp, "Changed primer sequence from '%s' to '%s'\n", *seq, new_seq); } lip->data_in_log = TRUE; } *seq = MemFree (*seq); *seq = new_seq; } } NLM_EXTERN void FixiPCRPrimerSeqsCallback (BioSourcePtr biop, Pointer data) { PCRReactionSetPtr ps; PCRPrimerPtr p; LogInfoPtr lip; if (biop == NULL) { return; } lip = (LogInfoPtr) data; for (ps = biop->pcr_primers; ps != NULL; ps = ps->next) { for (p = ps->forward; p != NULL; p = p->next) { ReplaceiInSeq (&(p->seq), lip); } for (p = ps->reverse; p != NULL; p = p->next) { ReplaceiInSeq (&(p->seq), lip); } } } typedef struct fixproteinnameformat { Boolean any_change; FILE *fp; ValNodePtr orgnames; } FixProteinNameFormatData, PNTR FixProteinNameFormatPtr; static void FixProteinNameFormatCallback (SeqFeatPtr sfp, Pointer data) { FixProteinNameFormatPtr f; ProtRefPtr prp; ValNodePtr vnp_n, vnp_p; CharPtr cp; Int4 len; if (sfp == NULL || sfp->data.choice != SEQFEAT_PROT || (prp = (ProtRefPtr) sfp->data.value.ptrvalue) == NULL || (f = (FixProteinNameFormatPtr) data) == NULL) { return; } for (vnp_n = f->orgnames; vnp_n != NULL; vnp_n = vnp_n->next) { for (vnp_p = prp->name; vnp_p != NULL; vnp_p = vnp_p->next) { if ((cp = StringISearch (vnp_p->data.ptrvalue, vnp_n->data.ptrvalue)) != NULL) { len = StringLen (vnp_n->data.ptrvalue); if (cp != vnp_p->data.ptrvalue && ((*(cp - 1) == '(' && *(cp + len) == ')') || (*(cp - 1) == '[' && *(cp + len) == ']'))) { cp--; len+= 2; } if (*(cp + len) == 0 && isspace (*(cp - 1))) { *(cp - 1) = 0; f->any_change = TRUE; if (f->fp != NULL) { fprintf (f->fp, "Removed '%s' from protein name (now '%s')\n", (CharPtr) vnp_n->data.ptrvalue, (CharPtr) vnp_p->data.ptrvalue); } } else { if (isspace (*(cp + len))) { len ++; } StringCpy (cp, cp + len); f->any_change = TRUE; if (f->fp != NULL) { fprintf (f->fp, "Removed '%s' from protein name (now '%s')\n", (CharPtr) vnp_n->data.ptrvalue, (CharPtr) vnp_p->data.ptrvalue); } } } } } } static Boolean ApplyFixFormatToSeqEntry (SeqEntryPtr sep, FixFormatActionPtr action, FILE *log_fp) { LogInfoData lid; FixProteinNameFormatData protformat; ValNodePtr list; if (sep == NULL || action == NULL) { return FALSE; } MemSet (&lid, 0, sizeof (LogInfoData)); lid.fp = log_fp; switch (action->choice) { case FixFormatAction_collection_date: VisitBioSourcesInSep (sep, &lid, FixCollectionDatesCallback); break; case FixFormatAction_lat_lon: list = FindBadLatLonObjects (sep); lid.data_in_log = LatLonAutocorrectList (lid.fp, list); list = FreeObjectList (list); break; case FixFormatAction_primers: VisitBioSourcesInSep (sep, &lid, FixiPCRPrimerSeqsCallback); break; case FixFormatAction_protein_name: MemSet (&protformat, 0, sizeof (FixProteinNameFormatData)); protformat.fp = log_fp; VisitBioSourcesInSep (sep, &(protformat.orgnames), GetOrgNamesInRecordCallback); VisitFeaturesInSep (sep, &protformat, FixProteinNameFormatCallback); protformat.orgnames = ValNodeFree (protformat.orgnames); lid.data_in_log = protformat.any_change; break; } return lid.data_in_log; } typedef struct replacepair { CharPtr find; CharPtr replace; } ReplacePairData, PNTR ReplacePairPtr; static ReplacePairData macro_spell_fixes[] = { {"Agricultrual", "Agricultural"}, {"Agricultureal", "Agricultural"}, {"Agricultrure", "Agriculture"}, {"bioremidiation", "bioremediation"}, {"Colledge", "College"}, {"Insitiute", "Institute" }, {"Instutite", "Institute" }, {"instute", "Institute" }, {"institue", "Institute" }, {"insitute", "Institute" }, {"insititute","Institute" }, {"Instiute","Institute" }, {"hpothetical", "hypothetical" }, {"hyphotetical", "hypothetical" }, {"hyphotheical", "hypothetical" }, {"hypotehtical", "hypothetical" }, {"hypotethical", "hypothetical" }, {"hypotetical", "hypothetical" }, {"hypotheical", "hypothetical" }, {"hypotheitcal", "hypothetical" }, {"hypothetcial", "hypothetical" }, {"hypothetica", "hypothetical" }, {"hypothteical", "hypothetical" }, {"hypothtical", "hypothetical" }, {"hypthetical", "hypothetical" }, {"hyptothetical", "hypothetical" }, {"idendification", "identification" }, {"protien", "protein" }, {"puatative", "putative" }, {"puative", "putative" }, {"puative", "putative" }, {"putaitive", "putative" }, {"putaitve", "putative" }, {"putaive", "putative" }, {"putataive", "putative" }, {"putatitve", "putative" }, {"putitive", "putative" }, {"reseach", "research"}, {"sequene", "sequence"}, {"univeristy", "University" }, {"univerisity", "University" }, {"univercity", "University" }, {"uiniversity", "University" }, {"uinversity", "University" }, {"univesity", "University" }, {"uviversity", "University" }, {"universtiy", "University" }, {"unvierstity", "University" }, {"univiersity", "University" }, {"universtity", "University" }, {"Unversity", "University" }, {"Univresity", "University" }, {NULL, NULL}}; static void SetFlagWhenChanged (Uint2 entityID, Uint4 itemID, Uint2 itemtype, Pointer userdata) { BoolPtr flag; if ((flag = (BoolPtr) userdata) != NULL) { *flag = TRUE; } } static Boolean SpellFixSeqEntry (SeqEntryPtr sep, Pointer data, FILE *log_fp) { Boolean any_changes = FALSE, this_change; Uint2 entityID; Int4 i; entityID = ObjMgrGetEntityIDForChoice (sep); for (i = 0; macro_spell_fixes[i].find != NULL; i++) { this_change = FALSE; FindReplaceInEntity (entityID, macro_spell_fixes[i].find, macro_spell_fixes[i].replace, FALSE, TRUE, TRUE, FALSE, 0, NULL, NULL, NULL, FALSE, SetFlagWhenChanged, &this_change); if (this_change) { if (log_fp != NULL) { fprintf (log_fp, "Replaced '%s' with '%s'\n", macro_spell_fixes[i].find, macro_spell_fixes[i].replace); } any_changes = TRUE; } } return any_changes; } typedef struct descriptortypename { Int4 descriptortype; Uint1 descriptor_choice; CharPtr descriptorname; } DescriptorTypeNameData, PNTR DescriptorTypeNamePtr; static DescriptorTypeNameData descriptortypename[] = { { Descriptor_type_all , 0 , "Any" } , { Descriptor_type_title , Seq_descr_title , "Title" } , { Descriptor_type_source , Seq_descr_source , "Source" } , { Descriptor_type_publication , Seq_descr_pub , "Publication" } , { Descriptor_type_comment , Seq_descr_comment , "Comment" } , { Descriptor_type_genbank , Seq_descr_genbank , "GenBank" } , { Descriptor_type_user , Seq_descr_user , "User" } , { Descriptor_type_create_date , Seq_descr_create_date , "CreateDate" } , { Descriptor_type_update_date , Seq_descr_update_date , "UpdateDate" } , { Descriptor_type_mol_info , Seq_descr_molinfo , "MolInfo" } , { Descriptor_type_structured_comment , Seq_descr_user , "StructuredComment" } , { Descriptor_type_genome_project_id , Seq_descr_user , "GenomeProjectID" } }; #define NUM_descriptortypename sizeof (descriptortypename) / sizeof (DescriptorTypeNameData) static Int4 GetDescriptorTypeFromDescriptorChoice (Uint1 descriptor_choice) { Int4 i; for (i = 0; i < NUM_descriptortypename; i++) { if (descriptor_choice == descriptortypename[i].descriptor_choice) { return descriptortypename[i].descriptortype; } } return -1; } static Uint1 GetDescriptorChoiceFromDescriptorType (Int4 descriptortype) { Int4 i; for (i = 0; i < NUM_descriptortypename; i++) { if (descriptortype == descriptortypename[i].descriptortype) { return descriptortypename[i].descriptor_choice; } } return SEQDESCR_MAX; } NLM_EXTERN CharPtr GetDescriptorNameFromDescriptorType (Int4 descriptortype) { CharPtr str = NULL; Int4 i; for (i = 0; i < NUM_descriptortypename && str == NULL; i++) { if (descriptortype == descriptortypename[i].descriptortype) { str = descriptortypename[descriptortype].descriptorname; } } if (str == NULL) { str = "Unknown descriptor type"; } return str; } NLM_EXTERN void AddAllDescriptorsToChoiceList (ValNodePtr PNTR descriptor_type_list) { Int4 i; ValNodePtr tmp_list = NULL; for (i = 0; i < NUM_descriptortypename; i++) { ValNodeAddPointer (&tmp_list, descriptortypename[i].descriptortype, StringSave (descriptortypename[i].descriptorname)); } tmp_list = ValNodeSort (tmp_list, SortVnpByString); ValNodeLink (descriptor_type_list, tmp_list); } static Boolean DoesDescriptorMatchType (SeqDescrPtr sdp, Int4 descriptortype) { Uint1 descriptorchoice; UserObjectPtr uop; if (sdp == NULL) { return FALSE; } else if (descriptortype == Descriptor_type_all) { return TRUE; } else if ((descriptorchoice = GetDescriptorChoiceFromDescriptorType (descriptortype)) == SEQDESCR_MAX) { return FALSE; } else if (descriptorchoice != sdp->choice) { return FALSE; } else if (descriptortype == Descriptor_type_structured_comment) { if (sdp->choice == Seq_descr_user && ((uop = (UserObjectPtr) sdp->data.ptrvalue) == NULL || uop->type == NULL || StringCmp (uop->type->str, "StructuredComment") != 0)) { return FALSE; } else { return TRUE; } } else if (descriptortype == Descriptor_type_genome_project_id) { if (sdp->choice == Seq_descr_user && ((uop = (UserObjectPtr) sdp->data.ptrvalue) == NULL || uop->type == NULL || StringCmp (uop->type->str, "GenomeProjectsDB") != 0)) { return FALSE; } else { return TRUE; } } else { return TRUE; } } typedef struct removedescriptoractioncollection { RemoveDescriptorActionPtr action; ValNodePtr obj_list; } RemoveDescriptorActionCollectionData, PNTR RemoveDescriptorActionCollectionPtr; static void RemoveDescriptorCollectionCallback (SeqDescrPtr sdp, Pointer data) { RemoveDescriptorActionCollectionPtr d; if (sdp == NULL || (d = (RemoveDescriptorActionCollectionPtr) data) == NULL || d->action == NULL) { return; } if (DoesDescriptorMatchType (sdp, d->action->type) && DoesObjectMatchConstraintChoiceSet (OBJ_SEQDESC, sdp, d->action->constraint)) { ValNodeAddPointer (&(d->obj_list), OBJ_SEQDESC, sdp); } } static Int4 ApplyRemoveDescriptorActionToSeqEntry (RemoveDescriptorActionPtr action, SeqEntryPtr sep) { RemoveDescriptorActionCollectionData d; SeqDescrPtr sdp; ObjValNodePtr ovp; ValNodePtr vnp; Int4 num_deleted = 0; if (action == NULL) return 0; d.action = action; d.obj_list = NULL; VisitDescriptorsInSep (sep, &d, RemoveDescriptorCollectionCallback); if (d.obj_list == NULL) { return 0; } for (vnp = d.obj_list; vnp != NULL; vnp = vnp->next) { sdp = vnp->data.ptrvalue; if (sdp != NULL && sdp->extended != 0) { ovp = (ObjValNodePtr) sdp; ovp->idx.deleteme = TRUE; num_deleted ++; } } DeleteMarkedObjects (ObjMgrGetEntityIDForChoice(sep), 0, NULL); return num_deleted; } static void TrimStopsFromCompleteCodingRegionsCallback (SeqFeatPtr sfp, Pointer data) { Boolean p5, p3; BioseqPtr protbsp; CharPtr prot_str; Int4 len; /* variables for shortening protein features */ SeqFeatPtr prot_sfp; SeqMgrFeatContext fcontext; SeqIntPtr sintp; /* variables for logging */ LogInfoPtr lip; Char id_buf[100]; if (sfp == NULL || sfp->data.choice != SEQFEAT_CDREGION || sfp->product == NULL) { return; } CheckSeqLocForPartial (sfp->location, &p5, &p3); if (p3) { return; } protbsp = BioseqFindFromSeqLoc (sfp->product); if (protbsp == NULL) { return; } prot_str = GetSequenceByBsp (protbsp); if (prot_str == NULL || (len = StringLen (prot_str)) == 0 || prot_str[len - 1] != '*') { prot_str = MemFree (prot_str); return; } BSSeek ((ByteStorePtr) protbsp->seq_data, -1, SEEK_END); BSDelete ((ByteStorePtr) protbsp->seq_data, 1); protbsp->length -= 1; prot_str = MemFree (prot_str); for (prot_sfp = SeqMgrGetNextFeature (protbsp, NULL, 0, 0, &fcontext); prot_sfp != NULL; prot_sfp = SeqMgrGetNextFeature (protbsp, prot_sfp, 0, 0, &fcontext)) { if (prot_sfp->location != NULL && prot_sfp->location->choice == SEQLOC_INT && (sintp = (SeqIntPtr)prot_sfp->location->data.ptrvalue) != NULL) { if (sintp->to > protbsp->length - 1) { sintp->to = protbsp->length - 1; } } } lip = (LogInfoPtr) data; if (lip != NULL) { if (lip->fp != NULL) { SeqIdWrite (SeqIdFindBest (protbsp->id, SEQID_GENBANK), id_buf, PRINTID_FASTA_SHORT, sizeof (id_buf) - 1); fprintf (lip->fp, "Trimmed trailing * from %s\n", id_buf); } lip->data_in_log = TRUE; } } NLM_EXTERN Boolean TrimStopsFromCompleteCodingRegions (SeqEntryPtr sep, FILE *log_fp) { LogInfoData lid; MemSet (&lid, 0, sizeof (LogInfoData)); lid.fp = log_fp; VisitFeaturesInSep (sep, &lid, TrimStopsFromCompleteCodingRegionsCallback); return lid.data_in_log; } static DefLineType DefLineTypeFromAutodefListType(Uint2 list_type) { DefLineType deflinetype = DEFLINE_USE_FEATURES; switch (list_type) { case Autodef_list_type_feature_list: deflinetype = DEFLINE_USE_FEATURES; break; case Autodef_list_type_complete_sequence: deflinetype = DEFLINE_COMPLETE_SEQUENCE; break; case Autodef_list_type_complete_genome: deflinetype = DEFLINE_COMPLETE_GENOME; break; case Autodef_list_type_sequence: deflinetype = DEFLINE_SEQUENCE; break; } return deflinetype; } static void ApplyAutodefActionToSeqEntry (AutodefActionPtr action, SeqEntryPtr sep) { OrganismDescriptionModifiers od; ModifierItemLocalPtr modList; DeflineFeatureRequestList dfrl; ValNodePtr vnp, modifier_indices = NULL; ValNode field_type, source_qual_choice; Uint4 i; Int4 defline_pos; InitOrganismDescriptionModifiers (&od, NULL); od.use_modifiers = TRUE; modList = MemNew (NumDefLineModifiers () * sizeof (ModifierItemLocalData)); for (i = 0; i < NumDefLineModifiers(); i++) { modList[i].any_present = FALSE; modList[i].all_present = FALSE; modList[i].is_unique = FALSE; modList[i].first_value_seen = NULL; modList[i].values_seen = NULL; modList[i].all_unique = FALSE; modList[i].status = NULL; modList[i].required = FALSE; } SetRequiredModifiers (modList); /* add modifiers specified in action */ source_qual_choice.next = NULL; source_qual_choice.choice = SourceQualChoice_textqual; field_type.next = NULL; field_type.choice = FieldType_source_qual; field_type.data.ptrvalue = &source_qual_choice; for (vnp = action->modifiers; vnp != NULL; vnp = vnp->next) { source_qual_choice.data.intvalue = vnp->data.intvalue; defline_pos = GetDeflinePosForFieldType (&field_type); if (defline_pos > -1) { modList[defline_pos].required = TRUE; modList[defline_pos].any_present = TRUE; ValNodeAddInt (&modifier_indices, 0, defline_pos); } } InitFeatureRequests (&dfrl); dfrl.feature_list_type = DefLineTypeFromAutodefListType (action->clause_list_type); if (action->misc_feat_parse_rule > 0) { dfrl.keep_items[RemovableNoncodingProductFeat] = TRUE; dfrl.misc_feat_parse_rule = action->misc_feat_parse_rule; } AutoDefForSeqEntry (sep, SeqMgrGetEntityIDForSeqEntry (sep), &od, modList, modifier_indices, &dfrl, DEFAULT_ORGANELLE_CLAUSE, FALSE, FALSE); modList = MemFree (modList); modifier_indices = ValNodeFree (modifier_indices); } NLM_EXTERN Boolean IsFixPubCapsActionEmpty (FixPubCapsActionPtr action) { if (action == NULL) { return TRUE; } if (action->affiliation || action->authors || action->title || action->affil_country) { return FALSE; } else { return TRUE; } } typedef struct fixpubcaps { FixPubCapsActionPtr action; ValNodePtr orgnames; Int4 num_pub_fields; Int4 num_sub_fields; ValNodePtr object_list; } FixPubCapsData, PNTR FixPubCapsPtr; static Boolean IsPubASub (ValNodePtr pub) { if (pub == NULL) { return FALSE; } else if (pub->choice == PUB_Sub) { return TRUE; } else if (pub->choice == PUB_Equiv) { return IsPubASub(pub->data.ptrvalue); } else { return FALSE; } } static void ApplyFixPubCapsCallback (PubdescPtr pdp, Pointer data) { FixPubCapsPtr f; CharPtr orig, tmp; ValNodePtr pub; AuthListPtr alp = NULL; ValNodePtr names; AuthorPtr ap, ap_orig; AffilPtr affil_orig; f = (FixPubCapsPtr)data; if (f == NULL || f->action == NULL) { return; } if (f->action->title) { for (pub = pdp->pub; pub != NULL; pub = pub->next) { orig = GetPubFieldFromPub (pub, Publication_field_title, NULL); if (orig != NULL) { tmp = StringSave (orig); if (!f->action->punct_only) { FixCapitalizationInTitle (&tmp, TRUE, f->orgnames); } if (StringCmp (orig, tmp) != 0) { SetPubFieldOnPub (pub, Publication_field_title, NULL, tmp, ExistingTextOption_replace_old); if (IsPubASub(pub)) { f->num_sub_fields++; } else { f->num_pub_fields++; } } tmp = MemFree (tmp); orig = MemFree (orig); } } } if (f->action->authors && !f->action->punct_only) { alp = GetAuthListPtr (pdp, NULL); if (alp != NULL && alp->choice == 1) { for (names = alp->names; names != NULL; names = names->next) { ap = names->data.ptrvalue; ap_orig = AsnIoMemCopy (ap, (AsnReadFunc) AuthorAsnRead, (AsnWriteFunc) AuthorAsnWrite); FixCapitalizationInAuthor (ap); if (!AsnIoMemComp (ap, ap_orig, (AsnWriteFunc) AuthorAsnWrite)) { if (IsPubASub(pdp->pub)) { f->num_sub_fields++; } else { f->num_pub_fields++; } } ap_orig = AuthorFree (ap_orig); } } } if (f->action->affiliation) { if (alp == NULL) { alp = GetAuthListPtr (pdp, NULL); } if (alp != NULL && alp->affil != NULL) { affil_orig = AsnIoMemCopy (alp->affil, (AsnReadFunc) AffilAsnRead, (AsnWriteFunc) AffilAsnWrite); FixCapsInPubAffilEx (alp->affil, f->action->punct_only); if (!AsnIoMemComp (alp->affil, affil_orig, (AsnWriteFunc) AffilAsnWrite)) { if (IsPubASub(pdp->pub)) { f->num_sub_fields++; } else { f->num_pub_fields++; } } affil_orig = AffilFree (affil_orig); } } else if (f->action->affil_country) { if (alp == NULL) { alp = GetAuthListPtr (pdp, NULL); } if (alp != NULL && alp->affil != NULL && !StringHasNoText (alp->affil->country)) { orig = StringSave (alp->affil->country); FixCapitalizationInCountryStringEx (&(alp->affil->country), f->action->punct_only); if (StringCmp (orig, alp->affil->country) != 0) { if (IsPubASub(pdp->pub)) { f->num_sub_fields++; } else { f->num_pub_fields++; } } if (StringCmp (alp->affil->country, "USA") == 0 && !StringHasNoText (alp->affil->sub) && !f->action->punct_only) { orig = StringSave (alp->affil->sub); FixStateAbbreviationsInAffil (alp->affil, NULL); if (StringCmp (orig, alp->affil->sub) != 0) { if (IsPubASub(pdp->pub)) { f->num_sub_fields++; } else { f->num_pub_fields++; } } orig = MemFree (orig); } orig = MemFree (orig); } } } static void CollectPubObjectsFeatCallback (SeqFeatPtr sfp, Pointer data) { FixPubCapsPtr f; if (sfp == NULL || sfp->data.choice != SEQFEAT_PUB || (f = (FixPubCapsPtr) data) == NULL) { return; } if (DoesObjectMatchConstraintChoiceSet (OBJ_SEQFEAT, sfp, f->action->constraint)) { ValNodeAddPointer (&(f->object_list), OBJ_SEQFEAT, sfp); } } static void CollectPubObjectsDescCallback (SeqDescPtr sdp, Pointer data) { FixPubCapsPtr f; if (sdp == NULL || sdp->choice != Seq_descr_pub || (f = (FixPubCapsPtr) data) == NULL) { return; } if (DoesObjectMatchConstraintChoiceSet (OBJ_SEQDESC, sdp, f->action->constraint)) { ValNodeAddPointer (&(f->object_list), OBJ_SEQDESC, sdp); } } static Boolean ApplyFixPubCapsToSeqEntry (FixPubCapsActionPtr action, SeqEntryPtr sep, FILE *log_fp) { FixPubCapsData f; ValNodePtr vnp; PubdescPtr pdp; SeqFeatPtr sfp; SeqDescPtr sdp; CharPtr summ; Boolean rval = FALSE; AuthListPtr alp; ValNodePtr names; AuthorPtr ap; SeqSubmitPtr ssp; SubmitBlockPtr sbp; CitSubPtr csp; if (action == NULL || sep == NULL) return FALSE; MemSet (&f, 0, sizeof (FixPubCapsData)); f.action = action; /* collect pub objects that match constraint */ VisitDescriptorsInSep (sep, &f, CollectPubObjectsDescCallback); VisitFeaturesInSep (sep, &f, CollectPubObjectsFeatCallback); if (f.object_list == NULL) { /* nothing to change */ return FALSE; } if (action->title) { /* get org names to use in fixes */ VisitBioSourcesInSep (sep, &f.orgnames, GetOrgNamesInRecordCallback); } for (vnp = f.object_list; vnp != NULL; vnp = vnp->next) { pdp = NULL; if (vnp->choice == OBJ_SEQFEAT) { sfp = vnp->data.ptrvalue; pdp = sfp->data.value.ptrvalue; } else if (vnp->choice == OBJ_SEQDESC) { sdp = vnp->data.ptrvalue; pdp = sdp->data.ptrvalue; } ApplyFixPubCapsCallback (pdp, &f); } ssp = FindSeqSubmitForSeqEntry (sep); if (ssp != NULL) { sbp = ssp->sub; if (sbp != NULL) { csp = sbp->cit; if (csp != NULL) { alp = csp->authors; if (alp != NULL && alp->choice == 1) { for (names = alp->names; names != NULL; names = names->next) { ap = names->data.ptrvalue; if (f.action->authors && !f.action->punct_only) { FixCapitalizationInAuthor (ap); f.num_sub_fields++; } } } } } } f.orgnames = ValNodeFree (f.orgnames); if (f.num_sub_fields > 0 || f.num_pub_fields > 0) { rval = TRUE; if (log_fp != NULL) { summ = SummarizeFixPubCapsAction (action); if (f.num_sub_fields > 0) { fprintf (log_fp, "Fixed capitalization in %d publication fields in submitter blocks during %s\n", f.num_sub_fields, summ); } if (f.num_pub_fields > 0) { fprintf (log_fp, "Fixed capitalization in %d publication fields in publication blocks during %s\n", f.num_pub_fields, summ); } summ = MemFree (summ); } } return rval; } static void FixAuthorLastNamesAuthor (AuthorPtr author, ValNodeBlockPtr block) { NameStdPtr pNameStandard; CharPtr newval; CharPtr str; CharPtr fmt = "%s to %s"; if (author == NULL || author->name == NULL || author->name->choice != 2) { return; } pNameStandard = author->name->data; if (pNameStandard != NULL && pNameStandard->names[0] != NULL) { if (IsAllCaps(pNameStandard->names[0])) { newval = StringSave (pNameStandard->names[0]); FixCapitalizationInElement (&newval, FALSE, FALSE, TRUE); if (StringCmp (pNameStandard->names[0], newval) != 0) { str = (CharPtr) MemNew (sizeof (Char) * (StringLen (fmt) + StringLen (pNameStandard->names[0]) + StringLen (newval))); sprintf (str, fmt, pNameStandard->names[0], newval); ValNodeAddPointerToEnd (block, 0, str); pNameStandard->names[0] = MemFree (pNameStandard->names[0]); pNameStandard->names[0] = newval; newval = NULL; } else { newval = MemFree (newval); } } } } static void FixAuthorNameAuthor (AuthorPtr author, ValNodeBlockPtr block) { CharPtr oldval, newval; CharPtr str; CharPtr fmt = "%s to %s"; oldval = GetAuthorStringEx (author, FALSE); if (IsAllCaps (oldval)) { FixCapitalizationInAuthor (author); newval = GetAuthorStringEx (author, FALSE); str = (CharPtr) MemNew (sizeof (Char) * (StringLen (fmt) + StringLen (oldval) + StringLen (newval))); sprintf (str, fmt, oldval, newval); ValNodeAddPointerToEnd (block, 0, str); } } typedef struct fixsingleauthor { Boolean last_name_only; ValNodeBlock block; } FixSingleAuthorData, PNTR FixSingleAuthorPtr; static void FixAuthorNamesPub (PubPtr pub, FixSingleAuthorPtr f) { AuthListPtr alp; ValNodePtr names; if (f == NULL) { return; } alp = GetAuthorListForPub (pub); if (alp != NULL && alp->choice == 1) { for (names = alp->names; names != NULL; names = names->next) { if (f->last_name_only) { FixAuthorLastNamesAuthor(names->data.ptrvalue, &(f->block)); } else { FixAuthorNameAuthor (names->data.ptrvalue, &(f->block)); } } } } static void FixAuthorNamesPubdesc (PubdescPtr pdp, FixSingleAuthorPtr f) { ValNodePtr vnp; if (pdp != NULL) { for (vnp = pdp->pub; vnp != NULL; vnp = vnp->next) { FixAuthorNamesPub (vnp, f); } } } static void FixAuthorNamesCapsDescCallback (SeqDescPtr sdp, Pointer data) { if (sdp != NULL && sdp->choice == Seq_descr_pub) { FixAuthorNamesPubdesc(sdp->data.ptrvalue, data); } } static void FixAuthorNamesCapsFeatCallback (SeqFeatPtr sfp, Pointer data) { if (sfp != NULL && sfp->data.choice == SEQFEAT_PUB) { FixAuthorNamesPubdesc(sfp->data.value.ptrvalue, data); } } static Boolean FixAuthorNamesCaps (FixAuthorCapsPtr action, SeqEntryPtr sep, FILE *log_fp) { FixSingleAuthorData fix; ValNodePtr vnp; CharPtr fmt = "Fix Author Last Names Capitalization: Changed %s %d time%s\n"; Int4 count; CharPtr curr; if (action == NULL) { return FALSE; } fix.last_name_only = action->last_name_only; InitValNodeBlock (&(fix.block), NULL); /* collect pub objects that match constraint */ VisitDescriptorsInSep (sep, &fix, FixAuthorNamesCapsDescCallback); VisitFeaturesInSep (sep, &fix, FixAuthorNamesCapsFeatCallback); if (fix.block.head == NULL) { /* nothing changed */ return FALSE; } else { /* report changes */ if (log_fp != NULL) { fix.block.head = ValNodeSort (fix.block.head, SortVnpByString); curr = fix.block.head->data.ptrvalue; count = 1; for (vnp = fix.block.head->next; vnp != NULL; vnp = vnp->next) { if (StringCmp (curr, vnp->data.ptrvalue) == 0) { count++; } else { fprintf (log_fp, fmt, curr, count, count > 1 ? "s" : ""); curr = vnp->data.ptrvalue; count = 1; } } fprintf (log_fp, fmt, curr, count, count > 1 ? "s" : ""); } fix.block.head = ValNodeFreeData (fix.block.head); return TRUE; } } NLM_EXTERN Boolean IsFieldSortable (FieldTypePtr field) { Boolean rval = FALSE; FeatureFieldPtr ffield; if (field == NULL) { return FALSE; } if (field->choice == FieldType_feature_field) { ffield = field->data.ptrvalue; if (ffield != NULL) { if ((ffield->type == Macro_feature_type_cds || ffield->type == Macro_feature_type_prot) && ffield->field->choice == FeatQualChoice_legal_qual && ffield->field->data.intvalue == Feat_qual_legal_product) { rval = TRUE; } } } else if (field->choice == FieldType_cds_gene_prot) { if (field->data.intvalue == CDSGeneProt_field_prot_name) { rval = TRUE; } } return rval; } static Int4 SortFieldsInSeqEntry (SortFieldsActionPtr action, SeqEntryPtr sep) { ValNodePtr object_list = NULL, vnp; Int4 num = 0; if (action == NULL || action->field == NULL || !IsFieldSortable(action->field) || sep == NULL) { return 0; } object_list = GetObjectListForFieldType (action->field->choice, sep); for (vnp = object_list; vnp != NULL; vnp = vnp->next) { if (DoesObjectMatchConstraintChoiceSet (vnp->choice, vnp->data.ptrvalue, action->constraint) && IsObjectAppropriateForFieldValue(vnp->choice, vnp->data.ptrvalue, action->field)) { if (SortFieldsForObject (vnp->choice, vnp->data.ptrvalue, action->field, action->order)) { num++; } } } return num; } static Boolean DoStringsMatch (CharPtr str1, CharPtr str2, Boolean case_sensitive) { Boolean rval = FALSE; if (case_sensitive) { if (StringCmp (str1, str2) == 0) { rval = TRUE; } } else if (StringICmp (str1, str2) == 0) { rval = TRUE; } return rval; } static Boolean DoGBQualListsMatch (GBQualPtr gbq1, GBQualPtr gbq2, Boolean case_sensitive) { Boolean rval = TRUE; while (rval && gbq1 != NULL && gbq2 != NULL) { if (!DoStringsMatch (gbq1->qual, gbq2->qual, case_sensitive)) { rval = FALSE; } else if (!DoStringsMatch (gbq1->val, gbq2->val, case_sensitive)) { rval = FALSE; } else { gbq1 = gbq1->next; gbq2 = gbq2->next; } } if (gbq1 != NULL || gbq2 != NULL) { rval = FALSE; } return rval; } static Boolean CheckBioseqForPartial (BioseqPtr bsp, BoolPtr partial5, BoolPtr partial3) { SeqMgrDescContext context; SeqDescrPtr sdp; MolInfoPtr mip; Boolean rval = FALSE; if (bsp == NULL) { return FALSE; } if (partial5 != NULL) { *partial5 = FALSE; } if (partial3 != NULL) { *partial3 = FALSE; } sdp = SeqMgrGetNextDescriptor (bsp, NULL, Seq_descr_molinfo, &context); if (sdp != NULL && (mip = (MolInfoPtr) sdp->data.ptrvalue) != NULL) { /* partial 5 */ if (mip->completeness == 3 || mip->completeness == 5) { if (partial5 != NULL) { *partial5 = TRUE; } rval = TRUE; } /* partial 3 */ if (mip->completeness == 4 || mip->completeness == 5) { if (partial3 != NULL) { *partial3 = TRUE; } rval = TRUE; } if (mip->completeness == 2) { rval = TRUE; } } return rval; } static Boolean ProductsMatch (SeqLocPtr slp1, SeqLocPtr slp2, Boolean case_sensitive, Boolean ignore_partial) { BioseqPtr bsp1, bsp2; Int2 ctr, pos1, pos2; Char buf1[51]; Char buf2[51]; Int4 len = 50; SeqFeatPtr sfp1, sfp2; SeqMgrFeatContext fcontext1, fcontext2; Boolean partial5_1, partial5_2, partial3_1, partial3_2; if (slp1 == NULL && slp2 == NULL) { return TRUE; } else if (slp1 == NULL || slp2 == NULL) { return FALSE; } else if (SeqLocCompare (slp1, slp2) == SLC_A_EQ_B) { return TRUE; } else { bsp1 = BioseqFindFromSeqLoc (slp1); bsp2 = BioseqFindFromSeqLoc (slp2); if (bsp1 == NULL || bsp2 == NULL) { /* can't compare, assume they don't match */ return FALSE; } else if (bsp1->length != bsp2->length) { return FALSE; } else { CheckBioseqForPartial (bsp1, &partial5_1, &partial3_1); CheckBioseqForPartial (bsp2, &partial5_2, &partial3_2); if (!ignore_partial && ((partial5_1 && !partial5_2) || (!partial5_1 && partial5_2) || (partial3_1 && !partial3_2) || (!partial3_1 && partial3_2))) { return FALSE; } /* check that translation sequences match */ pos1 = 0; pos2 = 0; if (ignore_partial) { if (partial5_1 || partial5_2) { pos1++; pos2++; } } while (pos1 < bsp1->length && pos2 < bsp2->length) { ctr = SeqPortStreamInt (bsp1, pos1, MIN(pos1 + len - 1, bsp1->length - 1), Seq_strand_plus, STREAM_EXPAND_GAPS | STREAM_CORRECT_INVAL, (Pointer) buf1, NULL); ctr = SeqPortStreamInt (bsp2, pos2, MIN(pos2 + len - 1, bsp2->length - 1), Seq_strand_plus, STREAM_EXPAND_GAPS | STREAM_CORRECT_INVAL, (Pointer) buf2, NULL); if (StringNCmp (buf1, buf2, ctr) != 0) { return FALSE; } pos1 += len; pos2 += len; } /* now check that protein features match */ sfp1 = SeqMgrGetNextFeature (bsp1, NULL, 0, 0, &fcontext1); sfp2 = SeqMgrGetNextFeature (bsp2, NULL, 0, 0, &fcontext2); while (sfp1 != NULL && sfp2 != NULL) { if (!DoFeaturesMatch (sfp1, sfp2, TRUE, case_sensitive, ignore_partial)) { return FALSE; } sfp1 = SeqMgrGetNextFeature (bsp1, sfp1, SEQFEAT_PROT, 0, &fcontext1); sfp2 = SeqMgrGetNextFeature (bsp2, sfp2, SEQFEAT_PROT, 0, &fcontext2); } if (sfp1 != NULL || sfp2 != NULL) { return FALSE; } else { return TRUE; } } } } static Boolean DoLocationPartialsMatch (SeqLocPtr slp1, SeqLocPtr slp2) { Boolean partial5_1, partial3_1, partial1; Boolean partial5_2, partial3_2, partial2; partial1 = CheckSeqLocForPartial (slp1, &partial5_1, &partial3_1); partial2 = CheckSeqLocForPartial (slp2, &partial5_2, &partial3_2); if ((partial1 && !partial2) || (!partial1 && partial2)) { return FALSE; } if ((partial5_1 && !partial5_2) || (!partial5_1 && partial5_2)) { return FALSE; } if ((partial3_1 && !partial3_2) || (!partial3_1 && partial3_2)) { return FALSE; } return TRUE; } static Boolean DoLocationsMatch (SeqLocPtr slp1, SeqLocPtr slp2, Boolean allow_different_sequences, Boolean ignore_partial) { SeqLocPtr slp_tmp1, slp_tmp2; if (slp1 == NULL && slp2 == NULL) { return TRUE; } else if (slp1 == NULL || slp2 == NULL) { return FALSE; } if (!ignore_partial && !DoLocationPartialsMatch (slp1, slp2)) { return FALSE; } if (allow_different_sequences) { for (slp_tmp1 = SeqLocFindNext (slp1, NULL), slp_tmp2 = SeqLocFindNext (slp2, NULL); slp_tmp1 != NULL && slp_tmp2 != NULL; slp_tmp1 = SeqLocFindNext (slp1, slp_tmp1), slp_tmp2 = SeqLocFindNext (slp2, slp_tmp2)) { if (SeqLocStart (slp_tmp1) != SeqLocStart (slp_tmp2) || SeqLocStop (slp_tmp1) != SeqLocStop (slp_tmp2) || (!ignore_partial && !DoLocationPartialsMatch (slp_tmp1, slp_tmp2))) { return FALSE; } } } else if (SeqLocCompare (slp1, slp2) != SLC_A_EQ_B) { return FALSE; } return TRUE; } static Boolean DoCdRegionsMatch (CdRegionPtr crp1, CdRegionPtr crp2) { if (crp1 == NULL && crp2 == NULL) { return TRUE; } else if (crp1 == NULL || crp2 == NULL) { return FALSE; } else if ((crp1->orf && !crp2->orf) || (!crp1->orf && crp2->orf)){ return FALSE; } else if ((crp1->conflict && !crp2->conflict) || (!crp1->conflict && crp2->conflict)){ return FALSE; } else if (crp1->gaps != crp2->gaps) { return FALSE; } else if (crp1->mismatch != crp2->mismatch) { return FALSE; } else if (crp1->stops != crp2->stops) { return FALSE; } else if ((crp1->genetic_code == NULL && crp2->genetic_code != NULL) || (crp1->genetic_code != NULL && crp2->genetic_code == NULL) || (crp1->genetic_code != NULL && crp2->genetic_code != NULL && !AsnIoMemComp (crp1->genetic_code, crp2->genetic_code, (AsnWriteFunc) GeneticCodeAsnWrite))) { return FALSE; } else if ((crp1->code_break == NULL && crp2->code_break != NULL) || (crp1->code_break != NULL && crp2->code_break == NULL) || (crp1->code_break != NULL && crp2->code_break != NULL && !AsnIoMemComp (crp1->code_break, crp2->code_break, (AsnWriteFunc) CodeBreakAsnWrite))) { return FALSE; } else if (crp1->frame != crp2->frame) { if ((crp1->frame == 0 || crp1->frame == 1) && (crp2->frame == 0 || crp2->frame == 1)) { /* both effectively frame 1, ignore this difference */ } else { return FALSE; } } return TRUE; } static Boolean DoesSeqFeatDataMatch (ChoicePtr d1, ChoicePtr d2) { if (d1 == NULL && d2 == NULL) { return TRUE; } else if (d1 == NULL || d2 == NULL) { return FALSE; } else if (d1->choice != d2->choice) { return FALSE; } else if (d1->choice == SEQFEAT_CDREGION) { return DoCdRegionsMatch(d1->value.ptrvalue, d2->value.ptrvalue); } else { return AsnIoMemComp(d1, d2, (AsnWriteFunc) SeqFeatDataAsnWrite); } } NLM_EXTERN Boolean DoFeaturesMatch (SeqFeatPtr sfp1, SeqFeatPtr sfp2, Boolean allow_different_sequences, Boolean case_sensitive, Boolean ignore_partial) { if (sfp1 == NULL && sfp2 == NULL) { return TRUE; } else if (sfp1 == NULL || sfp2 == NULL) { return FALSE; } if (sfp1->data.choice != sfp2->data.choice) { return FALSE; } else if (sfp1->idx.subtype != sfp2->idx.subtype) { return FALSE; } else if (!ignore_partial && ((sfp1->partial && !sfp2->partial) || (!sfp1->partial && sfp2->partial))) { return FALSE; } else if ((sfp1->pseudo && !sfp2->pseudo) || (!sfp1->pseudo && sfp2->pseudo)) { return FALSE; } else if ((sfp1->excpt && !sfp2->excpt) || (!sfp1->excpt && sfp2->excpt)) { return FALSE; } else if (!DoLocationsMatch (sfp1->location, sfp2->location, allow_different_sequences, ignore_partial)) { return FALSE; } else if (!DoStringsMatch (sfp1->comment, sfp2->comment, case_sensitive)) { return FALSE; } else if (!DoStringsMatch (sfp1->title, sfp2->title, case_sensitive)) { return FALSE; } else if (sfp1->ext != NULL || sfp2->ext != NULL) { return FALSE; } else if (sfp1->exts != NULL || sfp2->exts != NULL) { return FALSE; } else if (!DoStringsMatch (sfp1->except_text, sfp2->except_text, case_sensitive)) { return FALSE; } else if (sfp1->exp_ev != sfp2->exp_ev) { return FALSE; } else if (!DoGBQualListsMatch (sfp1->qual, sfp2->qual, case_sensitive)) { return FALSE; } else if ((sfp1->cit != NULL || sfp2->cit != NULL) && PubMatch (sfp1->cit, sfp2->cit) != 0) { return FALSE; } else if (!DbxrefsMatch (sfp1->dbxref, sfp2->dbxref, case_sensitive)) { return FALSE; } else if (!DoesSeqFeatDataMatch(&(sfp1->data), &(sfp2->data))) { return FALSE; } else if (!XrefsMatch (sfp1->xref, sfp2->xref)) { return FALSE; } else if (!ProductsMatch (sfp1->product, sfp2->product, case_sensitive, ignore_partial)) { return FALSE; } else { return TRUE; } } typedef struct dupfeats { ValNodePtr delete_list; RemoveDuplicateFeatureActionPtr action; } DupFeatsData, PNTR DupFeatsPtr; static void FindDuplicateFeatsCallback (BioseqPtr bsp, Pointer data) { DupFeatsPtr dfp; SeqFeatPtr sfp1, sfp2; SeqMgrFeatContext fcontext; Uint1 featdef; ValNodePtr vnp_prev = NULL; if (bsp == NULL || (dfp = (DupFeatsPtr) data) == NULL) { return; } if (dfp->action->type == Macro_feature_type_any) { featdef = 0; } else { featdef = GetFeatdefFromFeatureType (dfp->action->type); } sfp1 = SeqMgrGetNextFeature (bsp, NULL, 0, featdef, &fcontext); while (sfp1 != NULL) { sfp2 = SeqMgrGetNextFeature (bsp, sfp1, 0, featdef, &fcontext); if (sfp1 == sfp2) { break; } if (DoFeaturesMatch (sfp1, sfp2, FALSE, dfp->action->case_sensitive, dfp->action->ignore_partials)) { if (DoesObjectMatchConstraintChoiceSet (OBJ_SEQFEAT, sfp2, dfp->action->rd_constraint)) { vnp_prev = ValNodeAddPointer (&(dfp->delete_list), OBJ_SEQFEAT, sfp2); } else if ((vnp_prev == NULL || vnp_prev->data.ptrvalue != sfp1) && DoesObjectMatchConstraintChoiceSet (OBJ_SEQFEAT, sfp1, dfp->action->rd_constraint)) { ValNodeAddPointer (&(dfp->delete_list), OBJ_SEQFEAT, sfp1); } } sfp1 = sfp2; } } NLM_EXTERN ValNodePtr GetDuplicateFeaturesForRemoval (SeqEntryPtr sep, RemoveDuplicateFeatureActionPtr action) { DupFeatsData df; MemSet (&df, 0, sizeof (DupFeatsData)); df.action = action; VisitBioseqsInSep (sep, &df, FindDuplicateFeatsCallback); return df.delete_list; } NLM_EXTERN void RemoveDuplicateFeaturesInList (ValNodePtr delete_list, Uint2 entityID, Boolean remove_proteins) { ValNodePtr vnp; SeqFeatPtr sfp; BioseqPtr protbsp; SeqEntryPtr sep; for (vnp = delete_list; vnp != NULL; vnp = vnp->next) { sfp = (SeqFeatPtr) vnp->data.ptrvalue; if (sfp != NULL) { if (remove_proteins && sfp->data.choice == SEQFEAT_CDREGION && sfp->product != NULL) { protbsp = BioseqFindFromSeqLoc (sfp->product); if (protbsp != NULL) { protbsp->idx.deleteme = TRUE; } } sfp->idx.deleteme = TRUE; } } DeleteMarkedObjects (entityID, 0, NULL); if (remove_proteins) { sep = GetTopSeqEntryForEntityID (entityID); RenormalizeNucProtSets (sep, TRUE); } } NLM_EXTERN Boolean RemoveDuplicateFeaturesInSeqEntry (SeqEntryPtr sep, RemoveDuplicateFeatureActionPtr action, FILE *log_fp) { ValNodePtr delete_list; Int4 num; delete_list = GetDuplicateFeaturesForRemoval (sep, action); if (delete_list == NULL) { return FALSE; } if (log_fp != NULL) { num = ValNodeLen (delete_list); fprintf (log_fp, "Removed %d duplicate features\n", num); } RemoveDuplicateFeaturesInList (delete_list, ObjMgrGetEntityIDForChoice(sep), action->remove_proteins); return TRUE; } NLM_EXTERN Boolean DoesTextContainOnlyTheseWords (CharPtr txt, ValNodePtr word_list) { CharPtr cp; ValNodePtr vnp; Boolean match; Boolean at_least_one = FALSE; Int4 len; if (StringHasNoText(txt)) { return FALSE; } cp = txt; while (isspace (*cp) || ispunct(*cp)) { cp++; } match = TRUE; while (*cp != 0 && match) { match = FALSE; for (vnp = word_list; vnp != NULL && !match; vnp = vnp->next) { len = StringLen (vnp->data.ptrvalue); if (StringNICmp (cp, vnp->data.ptrvalue, len) == 0 && (*(cp + len) == 0 || isspace(*(cp + len)) || ispunct(*(cp + len)))) { match = TRUE; cp += len; at_least_one = TRUE; } } while (isspace (*cp) || ispunct(*cp)) { cp++; } } return (match && at_least_one); } static ValNodePtr WordListFromText (CharPtr txt) { ValNodePtr list = NULL; CharPtr start, end, word; Int4 len; if (StringHasNoText(txt)) { return NULL; } start = txt; while (isspace (*start) || ispunct(*start)) { start++; } while (*start != 0) { end = start + 1; len = 1; while (*end != 0 && !isspace (*end) && !ispunct(*end)) { end++; len++; } word = (CharPtr) MemNew (sizeof (Char) * (len + 1)); StringNCpy (word, start, len); word[len] = 0; ValNodeAddPointer (&list, 0, word); start = end; while (isspace (*start) || ispunct(*start)) { start++; } } return list; } static CharPtr s_SpecialLineageWords[] = { "Class", "Classification", "Domain", "Family", "Genus", "Kingdom", "Lineage", "Note", "Order", "Organism", "Phylum", "Species", "Superfamily", "Tax class/lineage", "Taxonomic classification", "Taxonomic Classification is", "Taxonomy", NULL }; static Boolean RemoveLineageNoteFromBioSource (BioSourcePtr biop, FILE *fp) { SubSourcePtr ssp, ssp_prev = NULL, ssp_next; OrgModPtr mod, mod_prev = NULL, mod_next; Boolean any_removed = FALSE; ValNodePtr word_list = NULL; Int4 i; if (!HasTaxonomyID (biop) || biop->org == NULL || biop->org->orgname == NULL || StringHasNoText (biop->org->orgname->lineage)) { return FALSE; } word_list = WordListFromText(biop->org->orgname->lineage); ValNodeLink (&word_list, WordListFromText(biop->org->taxname)); for (i = 0; s_SpecialLineageWords[i] != NULL; i++) { ValNodeAddPointer (&word_list, 0, StringSave (s_SpecialLineageWords[i])); } for (ssp = biop->subtype; ssp != NULL; ssp = ssp_next) { ssp_next = ssp->next; if (ssp->subtype == SUBSRC_other && DoesTextContainOnlyTheseWords(ssp->name, word_list)) { if (ssp_prev == NULL) { biop->subtype = ssp_next; } else { ssp_prev->next = ssp_next; } ssp->next = NULL; if (fp != NULL) { fprintf (fp, "Removed note %s where lineage is %s\n", ssp->name, biop->org->orgname->lineage); } ssp = SubSourceFree (ssp); any_removed = TRUE; } else { ssp_prev = ssp; } } for (mod = biop->org->orgname->mod; mod != NULL; mod = mod_next) { mod_next = mod->next; if (mod->subtype == ORGMOD_other && DoesTextContainOnlyTheseWords(mod->subname, word_list)) { if (mod_prev == NULL) { biop->org->orgname->mod = mod_next; } else { mod_prev->next = mod_next; } mod->next = NULL; if (fp != NULL) { fprintf (fp, "Removed note %s where lineage is %s\n", mod->subname, biop->org->orgname->lineage); } mod = OrgModFree (mod); any_removed = TRUE; } else { mod_prev = mod; } } word_list = ValNodeFreeData (word_list); return any_removed; } static void RemoveLineageNotesCallback (BioSourcePtr biop, Pointer data) { LogInfoPtr lip; if (biop == NULL) { return; } lip = (LogInfoPtr) data; if (RemoveLineageNoteFromBioSource(biop, lip == NULL ? NULL : lip->fp)) { if (lip) { lip->data_in_log = TRUE; } } } static Boolean RemoveLineageNotesInSeqEntry (SeqEntryPtr sep, FILE *log_fp) { LogInfoData lid; MemSet (&lid, 0, sizeof (LogInfoData)); lid.fp = log_fp; VisitBioSourcesInSep (sep, &lid, RemoveLineageNotesCallback); return lid.data_in_log; } typedef struct logandpointer { LogInfoData lid; Pointer action; } LogAndPointerData, PNTR LogAndPointerPtr; static Boolean GeneXrefMatchesSuppression (GeneRefPtr grp, Uint2 suppression) { Boolean rval = FALSE; if (grp == NULL) { return FALSE; } switch (suppression) { case Gene_xref_suppression_type_any: rval = TRUE; break; case Gene_xref_suppression_type_suppressing: if (SeqMgrGeneIsSuppressed(grp)) { rval = TRUE; } break; case Gene_xref_suppression_type_non_suppressing: if (!SeqMgrGeneIsSuppressed(grp)) { rval = TRUE; } break; } return rval; } static Boolean GeneXrefMatchesNecessary (SeqFeatPtr sfp, GeneRefPtr grp, Uint2 necessary) { Boolean rval = FALSE; if (sfp == NULL || grp == NULL) { return FALSE; } switch (necessary) { case Gene_xref_necessary_type_any: rval = TRUE; break; case Gene_xref_necessary_type_necessary: if (!SeqMgrGeneIsSuppressed (grp) && !IsGeneXrefRedundant (sfp)) { rval = TRUE; } break; case Gene_xref_necessary_type_unnecessary: if (!SeqMgrGeneIsSuppressed (grp) && IsGeneXrefRedundant (sfp)) { rval = TRUE; } break; } return rval; } static Boolean RemoveXref (SeqFeatPtr sfp, Uint2 choice, Pointer data) { SeqFeatXrefPtr xref, xref_next, xref_prev = NULL; Boolean removed = FALSE; if (sfp == NULL) return FALSE; for (xref = sfp->xref; xref != NULL; xref = xref_next) { xref_next = xref->next; if ((xref->data.choice == choice || choice == 0) && (xref->data.value.ptrvalue == data || data == NULL)) { if (xref_prev == NULL) { sfp->xref = xref_next; } else { xref_prev->next = xref_next; } xref->next = NULL; xref = SeqFeatXrefFree (xref); removed = TRUE; } else { xref_prev = xref; } } return removed; } static void MacroRemoveXrefsCallback(SeqFeatPtr sfp, Pointer data) { LogAndPointerPtr lp; RemoveXrefsActionPtr action; GeneXrefTypePtr gene; GeneRefPtr grp; CharPtr text; ValNode vn; if (sfp == NULL || (lp = (LogAndPointerPtr)data) == NULL || (action = (RemoveXrefsActionPtr)lp->action) == NULL || action->xref_type == NULL) { return; } if (!DoesObjectMatchConstraintChoiceSet (OBJ_SEQFEAT, sfp, action->constraint)) { return; } switch (action->xref_type->choice) { case XrefType_gene: grp = SeqMgrGetGeneXref (sfp); if (grp != NULL) { gene = (GeneXrefTypePtr) action->xref_type->data.ptrvalue; if (gene != NULL) { if ((gene->feature == Macro_feature_type_any || gene->feature == GetFeatureTypeFromFeatdef(sfp->idx.subtype)) && GeneXrefMatchesSuppression(grp, gene->suppression) && GeneXrefMatchesNecessary(sfp, grp, gene->necessary)) { if (RemoveXref(sfp, SEQFEAT_GENE, grp)) { lp->lid.data_in_log = TRUE; if (lp->lid.fp != NULL) { MemSet (&vn, 0, sizeof (ValNode)); vn.choice = OBJ_SEQFEAT; vn.data.ptrvalue = sfp; text = GetDiscrepancyItemText (&vn); fprintf (lp->lid.fp, "Removed Gene xref from %s\n", text); text = MemFree (text); } } } } } break; } } static Boolean MacroRemoveXrefs (SeqEntryPtr sep, RemoveXrefsActionPtr action, FILE *log_fp) { LogAndPointerData ld; MemSet (&ld.lid, 0, sizeof (LogAndPointerData)); ld.lid.fp = log_fp; ld.action = action; VisitFeaturesInSep (sep, &ld, MacroRemoveXrefsCallback); return ld.lid.data_in_log; } static void MacroMakeGeneXrefsCallback(SeqFeatPtr sfp, Pointer data) { LogAndPointerPtr lp; MakeGeneXrefActionPtr action; SeqFeatPtr gene; GeneRefPtr grp; CharPtr text; ValNode vn; SeqMgrFeatContext context; SeqFeatXrefPtr xref; if (sfp == NULL || sfp->data.choice == SEQFEAT_GENE || (lp = (LogAndPointerPtr)data) == NULL || (action = (MakeGeneXrefActionPtr) lp->action) == NULL) { return; } if (action->feature != Macro_feature_type_any && action->feature != GetFeatureTypeFromFeatdef(sfp->idx.subtype)) { return; } if (!DoesObjectMatchConstraintChoiceSet (OBJ_SEQFEAT, sfp, action->constraint)) { return; } grp = SeqMgrGetGeneXref (sfp); if (grp != NULL) { return; } gene = SeqMgrGetOverlappingGene (sfp->location, &context); if (gene != NULL && (grp = (GeneRefPtr) gene->data.value.ptrvalue) != NULL) { grp = (GeneRefPtr) AsnIoMemCopy (grp, (AsnReadFunc)GeneRefAsnRead, (AsnWriteFunc)GeneRefAsnWrite); xref = SeqFeatXrefNew (); xref->data.choice = SEQFEAT_GENE; xref->data.value.ptrvalue = grp; xref->next = sfp->xref; sfp->xref = xref; lp->lid.data_in_log = TRUE; if (lp->lid.fp != NULL) { MemSet (&vn, 0, sizeof (ValNode)); vn.choice = OBJ_SEQFEAT; vn.data.ptrvalue = sfp; text = GetDiscrepancyItemText (&vn); fprintf (lp->lid.fp, "Added Gene xref to %s\n", text); text = MemFree (text); } } } static Boolean MacroMakeGeneXrefs (SeqEntryPtr sep, MakeGeneXrefActionPtr action, FILE *log_fp) { LogAndPointerData ld; MemSet (&ld.lid, 0, sizeof (LogAndPointerData)); ld.lid.fp = log_fp; ld.action = action; VisitFeaturesInSep (sep, &ld, MacroMakeGeneXrefsCallback); return ld.lid.data_in_log; } static Boolean MacroMakeBoldXrefs (SeqEntryPtr sep, FILE *log_fp) { Int4 num_created = 0; VisitBioseqsInSep (sep, &num_created, ApplyBarcodeDbxrefsToBioseq); if (num_created > 0) { if (log_fp != NULL) { fprintf (log_fp, "Created %d BARCODE dbxrefs\n", num_created); } return TRUE; } else { return FALSE; } } NLM_EXTERN Boolean StripSuffixFromAuthor (AuthorPtr pAuthor) { NameStdPtr pNameStandard; Boolean rval = FALSE; if (pAuthor == NULL) return FALSE; else if(pAuthor->name->choice != 2) return FALSE; pNameStandard = pAuthor->name->data; if (pNameStandard != NULL && pNameStandard->names[5] != NULL) { pNameStandard->names[5][0] = 0; rval = TRUE; } return rval; } NLM_EXTERN Boolean TruncateAuthorMiddleInitials (AuthorPtr pAuthor) { NameStdPtr pNameStandard; CharPtr cp; Boolean rval = FALSE; if (pAuthor == NULL) return FALSE; else if(pAuthor->name->choice != 2) return FALSE; pNameStandard = pAuthor->name->data; if (pNameStandard != NULL) { cp = StringChr (pNameStandard->names[4], '.'); if (cp == NULL || StringChr (cp + 1, '.') == NULL) { if (StringLen (pNameStandard->names[4]) > 3) { pNameStandard->names[4][3] = 0; pNameStandard->names[4][2] = '.'; rval = TRUE; } } else if (StringLen (pNameStandard->names[4]) > 4) { pNameStandard->names[4][4] = 0; pNameStandard->names[4][3] = '.'; rval = TRUE; } } return rval; } static Boolean MoveAuthorMiddleToFirst (AuthorPtr pAuthor) { NameStdPtr pNameStandard; CharPtr cp; Int4 num_letters = 0; Boolean rval = FALSE; if (pAuthor == NULL) return FALSE; else if(pAuthor->name->choice != 2) return FALSE; pNameStandard = pAuthor->name->data; if (pNameStandard != NULL) { cp = StringChr (pNameStandard->names[4], '.'); if (cp != NULL) { cp++; while (isalpha(*(cp + num_letters))) { num_letters++; } if (num_letters > 1) { SetStringValue (&(pNameStandard->names[1]), cp, ExistingTextOption_append_space); *cp = 0; rval = TRUE; } } } return rval; } const CharPtr s_AuthorFixActionNames[] = { "Truncate middle initials", "Strip author suffix", "Move middle name to first name" }; NLM_EXTERN CharPtr SummarizeAuthorFixAction (AuthorFixActionPtr a) { CharPtr rval = NULL; CharPtr constraint; if (a == NULL) { return StringSave("Unknown action"); } if (a->fix_type < 1 || a->fix_type > sizeof (s_AuthorFixActionNames) / sizeof (CharPtr)) { return StringSave("Unknown action"); } constraint = SummarizeConstraintSet (a->constraint); if (constraint == NULL) { rval = StringSave (s_AuthorFixActionNames[a->fix_type - 1]); } else { rval = (CharPtr) MemNew (sizeof (Char) * (StringLen (s_AuthorFixActionNames[a->fix_type - 1]) + StringLen (constraint) + 2)); StringCpy (rval, s_AuthorFixActionNames[a->fix_type - 1]); StringCat (rval, " "); StringCat (rval, constraint); constraint = MemFree (constraint); } return rval; } typedef struct pubcollect { ValNodePtr list; ValNodePtr constraint; } PubCollectData, PNTR PubCollectPtr; static void GetPubsForAuthorFixDesc (SeqDescPtr sdp, Pointer data) { PubCollectPtr p; if (sdp == NULL || sdp->choice != Seq_descr_pub || (p = (PubCollectPtr) data) == NULL) { return; } if (DoesObjectMatchConstraintChoiceSet (OBJ_SEQDESC, sdp, p->constraint)) { ValNodeAddPointer (&(p->list), OBJ_SEQDESC, sdp); } } static void GetPubsForAuthorFixFeat (SeqFeatPtr sfp, Pointer data) { PubCollectPtr p; if (sfp == NULL || sfp->data.choice != SEQFEAT_PUB || (p = (PubCollectPtr) data) == NULL) { return; } if (DoesObjectMatchConstraintChoiceSet (OBJ_SEQFEAT, sfp, p->constraint)) { ValNodeAddPointer (&(p->list), OBJ_SEQFEAT, sfp); } } static Boolean ApplyAuthorFixToSeqEntry (SeqEntryPtr sep, AuthorFixActionPtr action, FILE *log_fp) { PubCollectData p; ValNodePtr vnp, pub; PubdescPtr pdp; SeqFeatPtr sfp; SeqDescPtr sdp; AuthListPtr alp; ValNodePtr names; AuthorPtr ap; SeqSubmitPtr ssp; SubmitBlockPtr sbp; ContactInfoPtr cip; CitSubPtr csp; Int4 num_changed = 0; if (sep == NULL || action == NULL) { return FALSE; } MemSet (&p, 0, sizeof (PubCollectData)); p.constraint = action->constraint; VisitDescriptorsInSep (sep, &p, GetPubsForAuthorFixDesc); VisitFeaturesInSep (sep, &p, GetPubsForAuthorFixFeat); for (vnp = p.list; vnp != NULL; vnp = vnp->next) { pdp = NULL; if (vnp->choice == OBJ_SEQFEAT) { sfp = (SeqFeatPtr) vnp->data.ptrvalue; if (sfp != NULL && sfp->data.choice == SEQFEAT_PUB) { pdp = sfp->data.value.ptrvalue; } } else if (vnp->choice == OBJ_SEQDESC) { sdp = (SeqDescPtr) vnp->data.ptrvalue; if (sdp != NULL && sdp->choice == Seq_descr_pub) { pdp = sdp->data.ptrvalue; } } if (pdp != NULL) { for (pub = pdp->pub; pub != NULL; pub = pub->next) { alp = GetAuthorListForPub (pub); if (alp != NULL && alp->choice == 1) { for (names = alp->names; names != NULL; names = names->next) { ap = names->data.ptrvalue; switch (action->fix_type) { case Author_fix_type_truncate_middle_initials: if (TruncateAuthorMiddleInitials(ap)) { num_changed++; } break; case Author_fix_type_strip_suffix: if (StripSuffixFromAuthor(ap)) { num_changed++; } break; case Author_fix_type_move_middle_to_first: if (MoveAuthorMiddleToFirst (ap)) { num_changed++; } break; } } } } } } ssp = FindSeqSubmitForSeqEntry (sep); if (ssp != NULL) { sbp = ssp->sub; if (sbp != NULL) { csp = sbp->cit; if (csp != NULL) { alp = csp->authors; if (alp != NULL && alp->choice == 1) { for (names = alp->names; names != NULL; names = names->next) { ap = names->data.ptrvalue; switch (action->fix_type) { case Author_fix_type_truncate_middle_initials: if (TruncateAuthorMiddleInitials(ap)) { num_changed++; } break; case Author_fix_type_strip_suffix: if (StripSuffixFromAuthor(ap)) { num_changed++; } break; case Author_fix_type_move_middle_to_first: if (MoveAuthorMiddleToFirst (ap)) { num_changed++; } break; } } } } cip = sbp->contact; if (cip != NULL) { ap = cip->contact; if (ap != NULL) { /* switch (action->fix_type) { case Author_fix_type_truncate_middle_initials: if (TruncateAuthorMiddleInitials(ap)) { num_changed++; } break; case Author_fix_type_strip_suffix: if (StripSuffixFromAuthor(ap)) { num_changed++; } break; case Author_fix_type_move_middle_to_first: if (MoveAuthorMiddleToFirst (ap)) { num_changed++; } break; } */ } } } } p.list = ValNodeFree (p.list); if (num_changed > 0) { if (log_fp != NULL) { fprintf (log_fp, "%s for %d names\n", s_AuthorFixActionNames[action->fix_type - 1], num_changed); } return TRUE; } else { return FALSE; } } static Boolean UpdateSequencesInSeqEntry (SeqEntryPtr sep, UpdateSequencesActionPtr a, FILE *log_fp, GlobalAlignFunc align_func) { FILE *fp; SeqEntryPtr update_sequences; SeqEntryPtr update_sep, orig_scope; ValNodePtr err_msg_list = NULL, vnp; Boolean chars_stripped = FALSE; Int4 orig_seq_num = 0, update_seq_num = 0; ValNodePtr orig_list = NULL, update_list = NULL; ValNodePtr unmatched_updates; BioseqSetPtr top_bssp; Uint2 update_entityID; ValNodePtr vnp_o, vnp_u; SeqAlignPtr salp = NULL; Boolean revcomp, data_in_log; BioseqPtr update_bsp, orig_bsp; Boolean rval = FALSE; Char id_buf[255]; if (sep == NULL || a == NULL || StringHasNoText (a->filename)) { return FALSE; } fp = FileOpen (a->filename, "r"); if (fp == NULL) { if (log_fp != NULL) { fprintf (log_fp, "Unable to open %s for update sequences\n", a->filename); } return FALSE; } update_sequences = ImportNucleotideFASTASequencesFromFile (fp, TRUE, NULL, &err_msg_list, &chars_stripped, TRUE); FileClose (fp); ValNodeFreeData (err_msg_list); AddUniqueUpdateSequenceIDs (update_sequences); if (update_sequences == NULL) { if (log_fp != NULL) { fprintf (log_fp, "Unable to read FASTA update sequences from %s\n", a->filename); } return FALSE; } if (chars_stripped && log_fp != NULL) { fprintf (log_fp, "Characters were stripped from FASTA update sequences in %s\n", a->filename); } top_bssp = BioseqSetNew (); top_bssp->_class = BioseqseqSet_class_genbank; top_bssp->seq_set = update_sequences; update_sep = SeqEntryNew (); update_sep->choice = 2; update_sep->data.ptrvalue = top_bssp; update_entityID = ObjMgrGetEntityIDForPointer (top_bssp); AssignIDsInEntityEx (update_entityID, 0, NULL, NULL); ListBioseqsInSeqEntry (sep, TRUE, &orig_seq_num, &orig_list); ListBioseqsInSeqEntry (update_sep, TRUE, &update_seq_num, &update_list); orig_scope = SeqEntrySetScope (sep); unmatched_updates = ShuffleUpdateBioseqListWithIndex (&update_list, orig_list); SeqEntrySetScope (orig_scope); if (log_fp != NULL && unmatched_updates != NULL) { for (vnp = unmatched_updates; vnp != NULL; vnp = vnp->next) { /* TODO - log unmatched update sequences? */ } } RemoveSequencesWithoutUpdates (&orig_list, &update_list); for (vnp_o = orig_list, vnp_u = update_list; vnp_o != NULL && vnp_u != NULL; vnp_o = vnp_o->next, vnp_u = vnp_u->next) { orig_bsp = vnp_o->data.ptrvalue; update_bsp = vnp_u->data.ptrvalue; revcomp = FALSE; salp = AlignForSequenceUpdate (orig_bsp, update_bsp, &revcomp, align_func); /* TODO - warn about no alignment? */ ReplaceOneSequence (salp, orig_bsp, update_bsp); if (revcomp) { BioseqRevComp (orig_bsp); SeqEntryExplore (sep, (Pointer) orig_bsp, RevCompFeats); } if (! AreSequenceResiduesIdentical (orig_bsp, update_bsp)) { if (a->add_cit_subs) { AddCitSubToUpdatedSequence (orig_bsp, orig_bsp->idx.entityID, kSubmitterUpdateText); } RemoveQualityScores (orig_bsp, log_fp, &data_in_log); if (log_fp != NULL) { SeqIdWrite (SeqIdFindWorst (orig_bsp->id), id_buf, PRINTID_REPORT, sizeof (id_buf) - 1); fprintf (log_fp, "Updated sequence %s.\n", id_buf); } rval = TRUE; } salp = SeqAlignFree (salp); } top_bssp->idx.deleteme = TRUE; DeleteMarkedObjects (update_entityID, 0, NULL); return rval; } static void AddTransSplicingToGene (SeqFeatPtr sfp, Pointer userdata) { Int4Ptr countP; Int4 numivals = 0; SeqLocPtr slp = NULL; CharPtr str; if (sfp == NULL || sfp->data.choice != SEQFEAT_GENE) return; countP = (Int4Ptr) userdata; if (sfp->excpt) { if (StringISearch (sfp->except_text, "trans-splicing") != NULL) return; } while ((slp = SeqLocFindNext (sfp->location, slp)) != NULL) { numivals++; } if (numivals < 2) return; sfp->excpt = TRUE; if (sfp->except_text == NULL) { sfp->except_text = StringSave ("trans-splicing"); } else { str = (CharPtr) MemNew (sizeof (Char) * (sizeof (sfp->except_text) + 20)); if (str != NULL) { sprintf (str, "%s,trans-splicing", sfp->except_text); MemFree (sfp->except_text); sfp->except_text = str; } } if (countP != NULL) { (*countP)++; } } static void LookForBioseqSetFields (BioseqSetPtr bssp, Pointer userdata) { BoolPtr bp; if (bssp == NULL || bssp->_class != BioseqseqSet_class_small_genome_set) return; bp = (BoolPtr) userdata; if (bp == NULL) return; *bp = TRUE; } static Boolean AddTransSplicingInSeqEntry (SeqEntryPtr sep, FILE *log_fp) { Int4 count = 0; Boolean is_small_genome_set = FALSE; if (sep == NULL) return FALSE; VisitSetsInSep (sep, (Pointer) &is_small_genome_set, LookForBioseqSetFields); if (! is_small_genome_set) return FALSE; VisitFeaturesInSep (sep, (Pointer) &count, AddTransSplicingToGene); return (Boolean) (count > 0); } static Boolean RemoveInvalidECnumbersInSeqEntry (SeqEntryPtr sep, FILE *log_fp) { Int4 count = 0; if (sep == NULL) return FALSE; count += UpdateReplacedECNumbers (sep); count += DeleteBadECNumbers (sep); return (Boolean) (count > 0); } typedef struct tsaidfromdefline { TextPortionPtr text_portion; CharPtr suffix; Int4 num_created; } TSAIdFromDeflineData, PNTR TSAIdFromDeflinePtr; static void CreateTSAIDsFromDeflineCallback (BioseqPtr bsp, Pointer data) { TSAIdFromDeflinePtr t; CharPtr db; SeqDescrPtr sdp; SeqMgrDescContext dcontext; CharPtr str; SeqIdPtr sip_new; DbtagPtr dbtag; if (bsp == NULL || ISA_aa (bsp->mol) || (t = (TSAIdFromDeflinePtr) data) == NULL || (db = GetTSAIDDB(bsp)) == NULL) { return; } sdp = SeqMgrGetNextDescriptor (bsp, NULL, Seq_descr_title, &dcontext); if (sdp == NULL || (str = GetTextPortionFromString ((CharPtr) sdp->data.ptrvalue, t->text_portion)) == NULL) { db = MemFree (db); return; } dbtag = DbtagNew (); dbtag->db = db; dbtag->tag = ObjectIdNew (); if (t->suffix == NULL) { dbtag->tag->str = str; str = NULL; } else { dbtag->tag->str = (CharPtr) MemNew (sizeof (Char) * (StringLen (str) + StringLen (t->suffix) + 2)); sprintf (dbtag->tag->str, "%s.%s", str, t->suffix); str = MemFree (str); } sip_new = ValNodeNew (NULL); sip_new->choice = SEQID_GENERAL; sip_new->data.ptrvalue = dbtag; sip_new->next = bsp->id; bsp->id = sip_new; SeqMgrReplaceInBioseqIndex (bsp); t->num_created ++; } NLM_EXTERN Int4 CreateTSAIDsFromDeflineInSep (SeqEntryPtr sep, CharPtr suffix, TextPortionPtr t) { TSAIdFromDeflineData td; td.text_portion = t; td.suffix = suffix; td.num_created = 0; VisitBioseqsInSep (sep, &td, CreateTSAIDsFromDeflineCallback); return td.num_created; } typedef struct tsa_id_callback { SeqEntryPtr top_sep; CharPtr suffix; TextPortionPtr text_portion; Int4 num_created; } TSAIdCallbackData, PNTR TSAIdCallbackPtr; static void MakeTSAIdsCallback (BioseqPtr bsp, Pointer data) { SeqIdPtr sip, sip_tsa = NULL, sip_local = NULL; TSAIdCallbackPtr t; SeqIdPtr sip_new = NULL; DbtagPtr dbtag, dbtag_old = NULL; CharPtr db = NULL; ObjectIdPtr oip = NULL; Int4 id_num = 0; CharPtr id_str = NULL, tmp_str = NULL; CharPtr cp; if (bsp == NULL || ISA_aa (bsp->mol) || data == NULL || !IsTSA (bsp)) { return; } t = (TSAIdCallbackPtr) data; for (sip = bsp->id; sip != NULL; sip = sip->next) { if (sip->choice == SEQID_LOCAL) { sip_local = sip; } else if (sip->choice == SEQID_GENERAL && (dbtag_old = (DbtagPtr) sip->data.ptrvalue) != NULL && (StringNCmp (dbtag_old->db, "gpid:", 5) == 0 || StringNCmp (dbtag_old->db, "bpid:", 5) == 0)) { sip_tsa = sip; } } if (sip_tsa == NULL && sip_local == NULL) { return; } db = GetTSAIDDB (bsp); if (db == NULL) { return; } dbtag = DbtagNew (); dbtag->db = db; dbtag->tag = ObjectIdNew (); if (sip_tsa != NULL && (dbtag_old = (DbtagPtr) sip_tsa->data.ptrvalue) != NULL) { oip = dbtag_old->tag; sip = sip_tsa; } else if (sip_local != NULL) { oip = sip_local->data.ptrvalue; sip = sip_local; } if (oip == NULL) { return; } if (oip->str == NULL) { if (t->text_portion != NULL) { tmp_str = (CharPtr) MemNew (sizeof (Char) * 16); sprintf (tmp_str, "%d", oip->id); id_str = GetTextPortionFromString (tmp_str, t->text_portion); tmp_str = MemFree (tmp_str); } else { id_num = oip->id; } } else { if (t->text_portion == NULL) { id_str = StringSave (oip->str); if (sip == sip_tsa && (cp = StringRChr (id_str, '.')) != NULL) { *cp = 0; } } else { id_str = GetTextPortionFromString (oip->str, t->text_portion); } } if (id_num == 0 && id_str == NULL && StringHasNoText (t->suffix)) { return; } if (t->suffix == NULL) { if (id_str == NULL) { dbtag->tag->id = id_num; } else { dbtag->tag->str = StringSave (id_str); } } else { if (id_str == NULL) { dbtag->tag->str = (CharPtr) MemNew (sizeof (Char) * (16 + StringLen (t->suffix))); sprintf (dbtag->tag->str, "%d.%s", id_num, t->suffix); } else { dbtag->tag->str = (CharPtr) MemNew (sizeof (Char) * (StringLen (id_str) + StringLen (t->suffix) + 2)); sprintf (dbtag->tag->str, "%s.%s", id_str, t->suffix); } } id_str = MemFree (id_str); sip_new = ValNodeNew (NULL); sip_new->choice = SEQID_GENERAL; sip_new->data.ptrvalue = dbtag; sip = SeqIdDup (sip); ReplaceSeqIdWithSeqId (sip, sip_new, t->top_sep); sip = SeqIdFree (sip); sip_new = SeqIdFree (sip_new); t->num_created++; } NLM_EXTERN Int4 ConvertLocalIdsToTSAIds (SeqEntryPtr sep, CharPtr suffix, TextPortionPtr tp) { TSAIdCallbackData t; t.top_sep = sep; t.suffix = suffix; t.text_portion = tp; t.num_created = 0; VisitBioseqsInSep (sep, &t, MakeTSAIdsCallback); return t.num_created; } static void EditTSAIdsCallback (BioseqPtr bsp, Pointer data) { SeqIdPtr sip, sip_tsa = NULL; TSAIdCallbackPtr t; SeqIdPtr sip_new = NULL; DbtagPtr dbtag, dbtag_old = NULL; ObjectIdPtr oip = NULL; Int4 id_num = 0; CharPtr id_str = NULL, tmp_str = NULL; CharPtr cp; if (bsp == NULL || ISA_aa (bsp->mol) || data == NULL || !IsTSA (bsp)) { return; } t = (TSAIdCallbackPtr) data; for (sip = bsp->id; sip != NULL && sip_tsa == NULL; sip = sip->next) { if (sip->choice == SEQID_GENERAL && (dbtag_old = (DbtagPtr) sip->data.ptrvalue) != NULL && (StringNCmp (dbtag_old->db, "gpid:", 5) == 0 || StringNCmp (dbtag_old->db, "bpid:", 5) == 0)) { sip_tsa = sip; } } if (sip_tsa == NULL) { return; } oip = dbtag_old->tag; if (oip->str == NULL) { if (t->text_portion != NULL) { tmp_str = (CharPtr) MemNew (sizeof (Char) * 16); sprintf (tmp_str, "%d", oip->id); id_str = GetTextPortionFromString (tmp_str, t->text_portion); tmp_str = MemFree (tmp_str); } else { id_num = oip->id; } } else { if (t->text_portion == NULL) { id_str = StringSave (oip->str); if ((cp = StringRChr (id_str, '.')) != NULL) { *cp = 0; } } else { id_str = GetTextPortionFromString (oip->str, t->text_portion); } } if (id_num == 0 && id_str == NULL && StringHasNoText (t->suffix)) { return; } dbtag = DbtagNew (); dbtag->db = StringSave (dbtag_old->db); dbtag->tag = ObjectIdNew (); if (t->suffix == NULL) { if (id_str == NULL) { dbtag->tag->id = id_num; } else { dbtag->tag->str = StringSave (id_str); } } else { if (id_str == NULL) { dbtag->tag->str = (CharPtr) MemNew (sizeof (Char) * (16 + StringLen (t->suffix))); sprintf (dbtag->tag->str, "%d.%s", id_num, t->suffix); } else { dbtag->tag->str = (CharPtr) MemNew (sizeof (Char) * (StringLen (oip->str) + StringLen (t->suffix) + 2)); sprintf (dbtag->tag->str, "%s.%s", id_str, t->suffix); } } id_str = MemFree (id_str); sip_tsa = SeqIdDup (sip_tsa); sip_new = ValNodeNew (NULL); sip_new->choice = SEQID_GENERAL; sip_new->data.ptrvalue = dbtag; ReplaceSeqIdWithSeqId (sip_tsa, sip_new, t->top_sep); sip_new = SeqIdFree (sip_new); sip_tsa = SeqIdFree (sip_tsa); t->num_created++; } NLM_EXTERN Int4 EditTSAIds (SeqEntryPtr sep, CharPtr suffix, TextPortionPtr tp) { TSAIdCallbackData t; t.top_sep = sep; t.suffix = suffix; t.text_portion = tp; t.num_created = 0; VisitBioseqsInSep (sep, &t, EditTSAIdsCallback); return t.num_created; } static Boolean CreateTsaIDsInSeqEntry (SeqEntryPtr sep, CreateTSAIdsActionPtr action, FILE * log_fp) { Int4 num_created = 0; if (sep == NULL || action == NULL || action->src == NULL) { return FALSE; } switch (action->src->choice) { case CreateTSAIdsSrc_local_id: num_created = ConvertLocalIdsToTSAIds (sep, action->suffix, action->id_text_portion); break; case CreateTSAIdsSrc_defline: num_created = CreateTSAIDsFromDeflineInSep (sep, action->suffix, action->src->data.ptrvalue); break; } if (num_created > 0) { if (log_fp != NULL) { fprintf (log_fp, "Created %d TSA IDs\n", num_created); } return TRUE; } else { return FALSE; } } static Boolean PerformAutofixInSeqEntry (SeqEntryPtr sep, AutofixActionPtr action, FILE * log_fp) { Int4 num_created = 0; DiscrepancyType test_type; DiscrepancyConfigPtr dcp; LogInfoData lid; Int4 i; ValNodePtr results; ValNodePtr sep_list = NULL; if (sep == NULL || action == NULL || (test_type = GetDiscrepancyTypeFromSettingName (action->test_name)) == MAX_DISC_TYPE) { return FALSE; } dcp = DiscrepancyConfigNew(); for (i = 0; i < MAX_DISC_TYPE; i++) { dcp->conf_list[i] = FALSE; } dcp->conf_list[test_type] = TRUE; ValNodeAddPointer (&sep_list, 0, sep); results = CollectDiscrepancies (dcp, sep_list, NULL); sep_list = ValNodeFree (sep_list); dcp = DiscrepancyConfigFree (dcp); MemSet (&lid, 0, sizeof (LogInfoData)); lid.fp = log_fp; AutofixDiscrepancies (results, TRUE, &lid); results = FreeClickableList (results); return lid.data_in_log; } typedef struct taxnameconsistencydata { CharPtr taxname; Boolean first; Boolean consistent; } TaxnameConsistencyData, PNTR TaxnameConsistencyPtr; static void TaxnameConsistencyBiosourceCallback (BioSourcePtr biop, Pointer data) { TaxnameConsistencyPtr tp; if (biop == NULL || (tp = (TaxnameConsistencyPtr)data) == NULL) { return; } if (biop->org == NULL || StringHasNoText (biop->org->taxname)) { if (tp->first) { tp->first = FALSE; } else if (tp->taxname != NULL) { tp->consistent = FALSE; } } else if (tp->first) { tp->taxname = biop->org->taxname; tp->first = FALSE; } else if (StringCmp (tp->taxname, biop->org->taxname) != 0) { tp->consistent = FALSE; } } static Boolean AreTaxnamesConsistent (BioseqSetPtr bssp) { TaxnameConsistencyData td; td.taxname = NULL; td.first = TRUE; td.consistent = TRUE; VisitBioSourcesInSet (bssp, &td, TaxnameConsistencyBiosourceCallback); return td.consistent; } static Int4 FixPopToPhySets (SeqEntryPtr sep) { BioseqSetPtr bssp; Int4 rval = 0; if (sep == NULL || !IS_Bioseq_set (sep) || (bssp = (BioseqSetPtr) sep->data.ptrvalue) == NULL) { return 0; } if (bssp->_class == BioseqseqSet_class_pop_set && !AreTaxnamesConsistent(bssp)) { bssp->_class = BioseqseqSet_class_phy_set; rval++; } for (sep = bssp->seq_set; sep != NULL; sep = sep->next) { rval += FixPopToPhySets(sep); } return rval; } static Boolean PerformFixSetsInSeqEntry (SeqEntryPtr sep, FixSetsActionPtr action, FILE * log_fp) { Int4 num_renormalized; Boolean rval = FALSE; if (sep == NULL || action == NULL) { return FALSE; } switch (action->choice) { case FixSetsAction_remove_single_item_set: num_renormalized = RemoveSingleItemSet (sep, TRUE); if (num_renormalized > 0) { if (log_fp != NULL) { fprintf (log_fp, "Removed %d wrapper sets\n", num_renormalized); } rval = TRUE; } break; case FixSetsAction_renormalize_nuc_prot_sets: num_renormalized = RenormalizeNucProtSets (sep, TRUE); if (num_renormalized > 0) { if (log_fp != NULL) { fprintf (log_fp, "Renormalized %d sets\n", num_renormalized); } rval = TRUE; } break; case FixSetsAction_fix_pop_to_phy: num_renormalized = FixPopToPhySets (sep); if (num_renormalized > 0) { if (log_fp != NULL) { fprintf (log_fp, "Converted %d sets\n", num_renormalized); } rval = TRUE; } break; } return rval; } static Boolean PerformApplyTableInSeqEntry (SeqEntryPtr sep, ApplyTableActionPtr action, FILE * log_fp) { Boolean rval = FALSE; ValNodePtr table, data_table; FILE *fp; ValNodePtr err_list = NULL; ValNodePtr val, vnp, obj_table = NULL, dup_dest_errs; ValNodePtr columns = NULL, dup_col_list; TabColumnConfigPtr t; if (action == NULL) { return FALSE; } if (action->in_memory_table != NULL && action->in_memory_table->data.ptrvalue == NULL) { if (log_fp != NULL) { fprintf (log_fp, "In memory table missing from apply table action.\n"); } return FALSE; } if (action->in_memory_table == NULL) { if (StringHasNoText (action->filename)) { return FALSE; } fp = FileOpen (action->filename, "r"); if (fp == NULL) { if (log_fp != NULL) { fprintf (log_fp, "Unable to open %s\n", action->filename); } return FALSE; } table = ReadTabTableFromFile (fp); FileClose (fp); if (table == NULL) { if (log_fp != NULL) { fprintf (log_fp, "Unable to read table from %s\n", action->filename); } return FALSE; } } else { table = (ValNodePtr) action->in_memory_table->data.ptrvalue; } if (table->next == NULL) { if (log_fp != NULL) { fprintf (log_fp, "Table must have at least two rows, one header and one data, unable to apply table from %s\n", action->filename); } if (action->in_memory_table == NULL) { table = FreeTabTable (table); } return FALSE; } data_table = table; t = TabColumnConfigNew (); t->match_type = MatchTypeFromTableMatchType (action->match_type); if (t->match_type == NULL) { if (log_fp != NULL) { fprintf (log_fp, "No match type for table, unable to apply table from %s\n", action->filename); } t = TabColumnConfigFree (t); if (action->in_memory_table == NULL) { table = FreeTabTable (table); } return FALSE; } else if (t->match_type->choice == eTableMatchAny) { if (table->next->next != NULL) { if (log_fp != NULL) { fprintf (log_fp, "Table must only have two rows for Match All Rows option, unable to apply table from %s\n", action->filename); } t = TabColumnConfigFree (t); if (action->in_memory_table == NULL) { table = FreeTabTable (table); } return FALSE; } else { data_table = table->next; } } ValNodeAddPointer (&columns, 0, t); rval = TRUE; for (val = table->data.ptrvalue, vnp = columns; val != NULL && rval; val = val->next, vnp = vnp->next) { if (vnp == NULL) { vnp = ValNodeNew (columns); } t = vnp->data.ptrvalue; if (t == NULL) { t = TabColumnConfigNew (); vnp->data.ptrvalue = t; } if (t->match_type == NULL && t->field == NULL) { t->field = FieldTypeFromString (val->data.ptrvalue); if (t->field == NULL) { t = TabColumnConfigFree (t); vnp->data.ptrvalue = NULL; rval = FALSE; if (log_fp != NULL) { fprintf (log_fp, "%s not recognized as qualifier name, unable to apply table from %s\n", (CharPtr) val->data.ptrvalue, action->filename); } } else { if (IsFieldTypeCDSProduct(t->field)) { t->match_mrna = action->also_change_mrna; } t->skip_blank = action->skip_blanks; } } } if (rval) { dup_col_list = CheckForDuplicateColumns (columns); if (dup_col_list != NULL) { FixDuplicateColumns (columns); dup_col_list = ValNodeFreeData(dup_col_list); } if (log_fp != NULL) { err_list = ValidateTabTableValues (data_table, columns); for (vnp = err_list; vnp != NULL; vnp = vnp->next) { fprintf (log_fp, "%s\n", (CharPtr) vnp->data.ptrvalue); } err_list = ValNodeFreeData (err_list); } obj_table = GetObjectTableForTabTable (sep, data_table, columns, &err_list); dup_dest_errs = CheckObjTableForRowsThatApplyToTheSameDestination (obj_table); if (dup_dest_errs != NULL) { if (log_fp != NULL) { for (vnp = dup_dest_errs; vnp != NULL; vnp = vnp->next) { fprintf (log_fp, "%s\n", (CharPtr) vnp->data.ptrvalue); } fprintf (log_fp, "For one or more columns, two or more rows in the table apply to the same object. Cannot apply table."); } dup_dest_errs = ValNodeFreeData (dup_dest_errs); rval = FALSE; } else { ValNodeLink (&err_list, CheckObjTableForExistingText (sep, data_table, columns, obj_table)); /* look for errors with choice 1 */ for (vnp = err_list; vnp != NULL; vnp = vnp->next) { if (vnp->choice == 1) { if (log_fp != NULL) { fprintf (log_fp, "%s\n", (CharPtr) vnp->data.ptrvalue); } } } /* look for errors with choice 0 */ for (vnp = err_list; vnp != NULL; vnp = vnp->next) { if (vnp->choice == 0) { if (log_fp != NULL) { fprintf (log_fp, "%s\n", (CharPtr) vnp->data.ptrvalue); } } } err_list = ValNodeFreeData (err_list); if (!rval) { DeleteMarkedObjects (SeqMgrGetEntityIDForSeqEntry (sep), 0, NULL); } else { err_list = ApplyTableValuesToObjectTable (sep, data_table, columns, obj_table); if (log_fp != NULL) { for (vnp = err_list; vnp != NULL; vnp = vnp->next) { fprintf (log_fp, "%s\n", (CharPtr) vnp->data.ptrvalue); } } } } obj_table = FreeObjectTableForTabTable (obj_table); } err_list = ValNodeFreeData (err_list); if (action->in_memory_table == NULL) { table = FreeTabTable (table); } columns = TabColumnConfigListFree (columns); return rval; } typedef struct addfiledescriptorsdata { SeqDescPtr sdp_list; ValNodePtr constraint; Int4 num_affected; } AddFileDescriptorsData, PNTR AddFileDescriptorsPtr; static void AddFileDescriptorsCallback (BioseqPtr bsp, Pointer data) { AddFileDescriptorsPtr a; BioseqSetPtr bssp = NULL; if (bsp == NULL || (a = (AddFileDescriptorsPtr) data) == NULL || ISA_aa (bsp->mol)) { return; } if (!DoesObjectMatchConstraintChoiceSet (OBJ_BIOSEQ, bsp, a->constraint)) { return; } if (bsp->idx.parenttype == OBJ_BIOSEQSET) { bssp = (BioseqSetPtr) bsp->idx.parentptr; } if (bssp != NULL && bssp->_class == BioseqseqSet_class_nuc_prot) { ValNodeLink (&(bssp->descr), AsnIoMemCopy (a->sdp_list, (AsnReadFunc) SeqDescrAsnRead, (AsnWriteFunc) SeqDescrAsnWrite)); } else { ValNodeLink (&(bsp->descr), AsnIoMemCopy (a->sdp_list, (AsnReadFunc) SeqDescrAsnRead, (AsnWriteFunc) SeqDescrAsnWrite)); } a->num_affected++; } static Boolean AddFileDescriptors (SeqEntryPtr sep, AddDescriptorListActionPtr action, FILE * log_fp) { Boolean rval = FALSE; SeqDescPtr sdp, sdp_next; AsnIoPtr aip; AddFileDescriptorsData a; if (action == NULL || action->descriptor_list == NULL) { return FALSE; } if (action->descriptor_list->in_memory_table != NULL && action->descriptor_list->in_memory_table->data.ptrvalue == NULL) { if (log_fp != NULL) { fprintf (log_fp, "In memory table missing from add file descriptors action.\n"); } return FALSE; } MemSet (&a, 0, sizeof (AddFileDescriptorsData)); a.constraint = action->constraint; if (action->descriptor_list->in_memory_table == NULL) { if (StringHasNoText (action->descriptor_list->filename)) { return FALSE; } aip = AsnIoOpen (action->descriptor_list->filename, "r"); if (aip == NULL) { if (log_fp != NULL) { fprintf (log_fp, "Unable to open %s\n", action->descriptor_list->filename); } return FALSE; } while (sdp = SeqDescAsnRead (aip, NULL)) { ValNodeLink (&(a.sdp_list), sdp); } AsnIoClose (aip); if (a.sdp_list == NULL) { if (log_fp != NULL) { fprintf (log_fp, "Unable to read table from %s\n", action->descriptor_list->filename); } return FALSE; } } else { a.sdp_list = (ValNodePtr) action->descriptor_list->in_memory_table->data.ptrvalue; } VisitBioseqsInSep (sep, &a, AddFileDescriptorsCallback); if (a.num_affected > 0) { rval = TRUE; if (log_fp != NULL) { fprintf (log_fp, "Applied descriptors from %s to %d bioseqs\n", action->descriptor_list->filename, a.num_affected); } } if (action->descriptor_list->in_memory_table == NULL) { for (sdp = a.sdp_list; sdp != NULL; sdp = sdp_next) { sdp_next = sdp->next; sdp->next = NULL; sdp = SeqDescFree (sdp); } } return rval; } static void AutoApplyStructuredCommentPrefixesCallback (SeqDescPtr sdp, Pointer data) { UserObjectPtr uop; CharPtr prefix; if (data != NULL && sdp != NULL && sdp->choice == Seq_descr_user && (uop = (UserObjectPtr) sdp->data.ptrvalue) != NULL && uop->type != NULL && StringICmp (uop->type->str, "StructuredComment") == 0 && (prefix = AutoapplyStructuredCommentPrefix (uop)) != NULL) { ValNodeAddPointer ((ValNodePtr PNTR) data, 0, prefix); } } static Boolean AutoApplyStructuredCommentPrefixes (SeqEntryPtr sep, FILE *log_fp) { ValNodePtr added = NULL, vnp; Int4 count = 0; CharPtr curr_prefix = NULL; VisitDescriptorsInSep (sep, &added, AutoApplyStructuredCommentPrefixesCallback); if (added == NULL) { return FALSE; } if (log_fp != NULL) { added = ValNodeSort (added, SortVnpByString); curr_prefix = added->data.ptrvalue; count = 1; for (vnp = added->next; vnp != NULL; vnp = vnp->next) { if (StringCmp (curr_prefix, vnp->data.ptrvalue) == 0) { count++; } else { fprintf (log_fp, "Added %d %s structured comment prefix%s\n", count, curr_prefix, count == 1 ? "" : "es"); curr_prefix = vnp->data.ptrvalue; count = 1; } } fprintf (log_fp, "Added %d %s structured comment prefix%s\n", count, curr_prefix, count == 1 ? "" : "es"); } added = ValNodeFree (added); return TRUE; } typedef struct performremovesequencesdata{ RemoveSequencesActionPtr action; Boolean any; FILE *log_fp; } PerformRemoveSequencesData, PNTR PerformRemoveSequencesPtr; static void PerformRemoveSequencesInSeqEntryCallback (BioseqPtr bsp, Pointer userdata) { PerformRemoveSequencesPtr p; Char id_buf[PATH_MAX]; SeqEntryPtr sep; BioseqSetPtr bssp; p = (PerformRemoveSequencesPtr) userdata; if (p == NULL || p->action == NULL || p->action->constraint == NULL) { return; } if (DoesObjectMatchConstraintChoiceSet (OBJ_BIOSEQ, bsp, p->action->constraint)) { if (!ISA_aa (bsp->mol) && (sep = GetBestTopParentForData (bsp->idx.entityID, bsp)) != NULL && IS_Bioseq_set (sep) && (bssp = (BioseqSetPtr) sep->data.ptrvalue) != NULL) { bssp->idx.deleteme = TRUE; } else { bsp->idx.deleteme = TRUE; } p->any = TRUE; if (p->log_fp != NULL) { SeqIdWrite (SeqIdFindBest (bsp->id, SEQID_GENBANK), id_buf, PRINTID_REPORT, sizeof (id_buf) - 1); fprintf (p->log_fp, "Removed %s\n", id_buf); } } } static Boolean PerformRemoveSequencesInSeqEntry (SeqEntryPtr sep, RemoveSequencesActionPtr action, FILE * log_fp) { PerformRemoveSequencesData prd; if (action == NULL || action->constraint == NULL) { return FALSE; } prd.action = action; prd.any = FALSE; prd.log_fp = log_fp; VisitBioseqsInSep (sep, &prd, PerformRemoveSequencesInSeqEntryCallback); if (prd.any) { DeleteMarkedObjects (SeqMgrGetEntityIDForSeqEntry(sep), 0, NULL); } return prd.any; } typedef struct propagateseqtech { BioseqPtr bsp; UserObjectPtr uop; CharPtr filename; } PropagateSeqTechData, PNTR PropagateSeqTechPtr; static Boolean IsStructuredCommentWithPrefix (UserObjectPtr uop, CharPtr prefix) { UserFieldPtr ufp; if (uop == NULL || uop->type == NULL || StringICmp (uop->type->str, "StructuredComment") != 0) { return FALSE; } for (ufp = uop->data; ufp != NULL; ufp = ufp->next) { if (ufp->label != NULL && StringICmp (ufp->label->str, "StructuredCommentPrefix") == 0) { if (ufp->choice != 1 || StringCmp (ufp->data.ptrvalue, prefix) != 0) { return FALSE; } else { return TRUE; } } } return FALSE; } static Boolean TruncateAtLocalId (SeqIdPtr sip_local, CharPtr filename) { Char id_buf[20]; CharPtr cmp; ObjectIdPtr oip; Boolean removed_id = FALSE; Int4 len, f_len; if (filename == NULL || sip_local == NULL || (oip = (ObjectIdPtr) sip_local->data.ptrvalue) == NULL) { return FALSE; } f_len = StringLen (filename); if (oip->id > 0) { sprintf (id_buf, "%d", oip->id); cmp = id_buf; } else { cmp = oip->str; } len = StringLen (cmp); if (f_len > len + 1 && filename[f_len - len - 1] == '/' && StringCmp (filename + (f_len - len), cmp) == 0) { filename[f_len - len - 1] = 0; removed_id = TRUE; } return removed_id; } static void FindSeqTechBsp (BioseqPtr bsp, Pointer data) { SeqIdPtr sip, sip_local = NULL; DbtagPtr dbtag; PropagateSeqTechPtr p; CharPtr cp; SeqDescPtr sdp; SeqMgrDescContext context; if (bsp == NULL || ISA_aa(bsp->mol) || data == NULL) { return; } p = (PropagateSeqTechPtr) MemNew (sizeof (PropagateSeqTechData)); p->bsp = bsp; /* find NCBIFILE id */ for (sip = bsp->id; sip != NULL; sip = sip->next) { if (sip->choice == SEQID_GENERAL && (dbtag = (DbtagPtr) sip->data.ptrvalue) != NULL && (StringICmp (dbtag->db, "NCBIFILE") == 0)) { p->filename = StringSave (dbtag->tag->str); } else if (sip->choice == SEQID_LOCAL) { sip_local = sip; } } if (p->filename == NULL) { p = MemFree (p); return; } if (!TruncateAtLocalId(sip_local, p->filename)) { cp = StringRChr (p->filename, '/'); if (cp != NULL) { *cp = 0; } } for (sdp = SeqMgrGetNextDescriptor (bsp, NULL, Seq_descr_user, &context); sdp != NULL && p->uop == NULL; sdp = SeqMgrGetNextDescriptor (bsp, sdp, Seq_descr_user, &context)) { if (IsStructuredCommentWithPrefix(sdp->data.ptrvalue, "##Assembly-Data-START##")) { p->uop = sdp->data.ptrvalue; } } ValNodeAddPointer ((ValNodePtr PNTR) data, 0, p); } static int LIBCALLBACK SortVnpByPropagateSeqTech (VoidPtr ptr1, VoidPtr ptr2) { PropagateSeqTechPtr str1; PropagateSeqTechPtr str2; ValNodePtr vnp1; ValNodePtr vnp2; int comp = 0; if (ptr1 != NULL && ptr2 != NULL) { vnp1 = *((ValNodePtr PNTR) ptr1); vnp2 = *((ValNodePtr PNTR) ptr2); if (vnp1 != NULL && vnp2 != NULL) { str1 = (PropagateSeqTechPtr) vnp1->data.ptrvalue; str2 = (PropagateSeqTechPtr) vnp2->data.ptrvalue; if (str1 != NULL && str2 != NULL) { comp = StringCmp (str1->filename, str2->filename); if (comp == 0) { if (str1->uop == NULL) { comp = 1; } else if (str2->uop == NULL) { comp = -1; } } } } } return comp; } static Boolean PerformPropagateSequenceTechnology (SeqEntryPtr sep, Pointer action, FILE * log_fp) { ValNodePtr list = NULL, vnp; PropagateSeqTechPtr p1, p2; Boolean rval = FALSE; SeqDescPtr sdp; Int4 num_added = 0; VisitBioseqsInSep (sep, &list, FindSeqTechBsp); list = ValNodeSort (list, SortVnpByPropagateSeqTech); if (list == NULL) { return FALSE; } p1 = list->data.ptrvalue; for (vnp = list->next; vnp != NULL; vnp = vnp->next) { p2 = vnp->data.ptrvalue; if (p1->uop == NULL || StringCmp (p1->filename, p2->filename) != 0) { p1 = p2; } else if (p2->uop == NULL) { sdp = CreateNewDescriptorOnBioseq (p2->bsp, Seq_descr_user); sdp->data.ptrvalue = AsnIoMemCopy (p1->uop, (AsnReadFunc) UserObjectAsnRead, (AsnWriteFunc) UserObjectAsnWrite); num_added ++; rval = TRUE; } } for (vnp = list; vnp != NULL; vnp = vnp->next) { p1 = vnp->data.ptrvalue; p1->filename = MemFree (p1->filename); vnp->data.ptrvalue = MemFree (vnp->data.ptrvalue); } list = ValNodeFree (list); if (num_added > 0 && log_fp != NULL) { fprintf (log_fp, "Added %d Assembly Data descriptors.\n", num_added); } return rval; } typedef struct ecrepdata { CharPtr before; CharPtr after; } EcRepData, PNTR EcRepPtr; static EcRepPtr EcRepFree (EcRepPtr e) { if (e != NULL) { e->before = MemFree (e->before); e->after = MemFree (e->after); e = MemFree (e); } return e; } static int LIBCALLBACK SortVnpByEcBefore (VoidPtr ptr1, VoidPtr ptr2) { EcRepPtr erp1, erp2; CharPtr str1, str2; ValNodePtr vnp1, vnp2; if (ptr1 == NULL || ptr2 == NULL) return 0; vnp1 = *((ValNodePtr PNTR) ptr1); vnp2 = *((ValNodePtr PNTR) ptr2); if (vnp1 == NULL || vnp2 == NULL) return 0; erp1 = (EcRepPtr) vnp1->data.ptrvalue; erp2 = (EcRepPtr) vnp2->data.ptrvalue; if (erp1 == NULL || erp2 == NULL) return 0; str1 = erp1->before; str2 = erp2->before; if (str1 == NULL || str2 == NULL) return 0; return StringCmp (str1, str2); } static EcRepPtr PNTR SetupECReplacementTable (CharPtr file, Int4Ptr len) { EcRepPtr erp; FileCache fc; FILE *fp = NULL; Int4 i; ValNodePtr last = NULL; Char line [512]; Char path [PATH_MAX]; CharPtr ptr; ErrSev sev; CharPtr str; ValNodePtr vnp; ValNodePtr ec_rep_list = NULL; EcRepPtr PNTR ec_rep_data = NULL; Int4 ec_rep_len = 0; if (FindPath ("ncbi", "ncbi", "data", path, sizeof (path))) { FileBuildPath (path, NULL, file); sev = ErrSetMessageLevel (SEV_ERROR); fp = FileOpen (path, "r"); ErrSetMessageLevel (sev); if (fp != NULL) { FileCacheSetup (&fc, fp); str = FileCacheReadLine (&fc, line, sizeof (line), NULL); while (str != NULL) { if (StringDoesHaveText (str)) { ptr = StringChr (str, '\t'); if (ptr != NULL) { *ptr = '\0'; ptr++; erp = (EcRepPtr) MemNew (sizeof (EcRepData)); if (erp != NULL) { erp->before = StringSave (str); erp->after = StringSave (ptr); vnp = ValNodeAddPointer (&last, 0, (Pointer) erp); if (ec_rep_list == NULL) { ec_rep_list = vnp; } last = vnp; } } } str = FileCacheReadLine (&fc, line, sizeof (line), NULL); } FileClose (fp); ec_rep_len = ValNodeLen (ec_rep_list); if (ec_rep_len > 0) { ec_rep_list = ValNodeSort (ec_rep_list, SortVnpByEcBefore); ec_rep_data = (EcRepPtr PNTR) MemNew (sizeof (EcRepPtr) * (ec_rep_len + 1)); if (ec_rep_data != NULL) { for (vnp = ec_rep_list, i = 0; vnp != NULL; vnp = vnp->next, i++) { erp = (EcRepPtr) vnp->data.ptrvalue; ec_rep_data [i] = erp; } } } } } ec_rep_list = ValNodeFree (ec_rep_list); *len = ec_rep_len; return ec_rep_data; } static EcRepPtr PNTR FreeECReplacementTable (EcRepPtr PNTR ec_rep_data, Int4 ec_rep_len) { Int4 i; if (ec_rep_data == NULL) { return NULL; } for (i = 0; i < ec_rep_len; i++) { ec_rep_data[i] = EcRepFree(ec_rep_data[i]); } ec_rep_data = MemFree (ec_rep_data); return ec_rep_data; } static EcRepPtr GetEcReplacementFromTable (CharPtr str, EcRepPtr PNTR ec_rep_data, Int4 ec_rep_len) { Int4 L, R, mid; EcRepPtr erp = NULL; L = 0; R = ec_rep_len - 1; while (L < R) { mid = (L + R) / 2; erp = ec_rep_data [(int) mid]; if (erp != NULL && StringCmp (erp->before, str) < 0) { L = mid + 1; } else { R = mid; } } erp = ec_rep_data [(int) R]; return erp; } typedef struct replaceupdatedec { FILE *log_fp; UpdateReplacedEcNumbersActionPtr action; EcRepPtr PNTR ec_rep_data; Int4 ec_rep_len; Int4 num_removed; Int4 num_replaced; } ReplaceUpdatedECData, PNTR ReplaceUpdatedEcPtr; static Boolean GetLocusTagFromProtRef (SeqFeatPtr sfp, CharPtr PNTR p_locus_tag) { BioseqPtr bsp; SeqFeatPtr cds; SeqMgrFeatContext fcontext; SeqFeatPtr gene; GeneRefPtr grp; if (sfp == NULL || p_locus_tag == NULL) return FALSE; grp = SeqMgrGetGeneXref (sfp); if (grp != NULL) { if (SeqMgrGeneIsSuppressed (grp)) return FALSE; if (StringDoesHaveText (grp->locus_tag)) { *p_locus_tag = StringSave (grp->locus_tag); return TRUE; } else if (StringDoesHaveText (grp->locus)) { *p_locus_tag = StringSave (grp->locus); return TRUE; } } bsp = BioseqFindFromSeqLoc (sfp->location); if (bsp == NULL) return FALSE; cds = SeqMgrGetCDSgivenProduct (bsp, &fcontext); if (cds == NULL) return FALSE; grp = SeqMgrGetGeneXref (cds); if (grp != NULL) { if (SeqMgrGeneIsSuppressed (grp)) return FALSE; if (StringDoesHaveText (grp->locus_tag)) { *p_locus_tag = StringSave (grp->locus_tag); return TRUE; } else if (StringDoesHaveText (grp->locus)) { *p_locus_tag = StringSave (grp->locus); return TRUE; } } gene = SeqMgrGetOverlappingGene (cds->location, &fcontext); if (gene == NULL || gene->data.choice != SEQFEAT_GENE) return FALSE; grp = (GeneRefPtr) gene->data.value.ptrvalue; if (grp != NULL) { if (SeqMgrGeneIsSuppressed (grp)) return FALSE; if (StringDoesHaveText (grp->locus_tag)) { *p_locus_tag = StringSave (grp->locus_tag); return TRUE; } else if (StringDoesHaveText (grp->locus)) { *p_locus_tag = StringSave (grp->locus); return TRUE; } } return FALSE; } static void UpdateECCallback (SeqFeatPtr sfp, Pointer userdata) { ProtRefPtr prp; CharPtr str; ValNodePtr vnp; CharPtr locus_tag = NULL; ReplaceUpdatedEcPtr r; EcRepPtr erp; if (sfp == NULL || sfp->data.choice != SEQFEAT_PROT) return; prp = (ProtRefPtr) sfp->data.value.ptrvalue; if (prp == NULL || prp->ec == NULL) return; r = (ReplaceUpdatedEcPtr) userdata; if (r == NULL) { return; } GetLocusTagFromProtRef (sfp, &locus_tag); for (vnp = prp->ec; vnp != NULL; vnp = vnp->next) { str = (CharPtr) vnp->data.ptrvalue; if (StringHasNoText (str)) continue; if (ValidateECnumber (str)) { erp = GetEcReplacementFromTable(str, r->ec_rep_data, r->ec_rep_len); if (erp != NULL && StringCmp (erp->before, str) == 0) { if (StringChr (erp->after, '\t') == NULL) { if (r->log_fp != NULL) { fprintf (r->log_fp, "%s:replaced %s with %s\n", locus_tag == NULL ? "No locus tag" : locus_tag, erp->before, erp->after); } vnp->data.ptrvalue = MemFree (vnp->data.ptrvalue); vnp->data.ptrvalue = StringSave (erp->after); r->num_replaced++; } else if (r->action->delete_multiple_replacement) { if (r->log_fp != NULL) { fprintf (r->log_fp, "%s: removed %s\n", locus_tag == NULL ? "No locus tag" : locus_tag, erp->before); } vnp->data.ptrvalue = MemFree (vnp->data.ptrvalue); r->num_removed++; } } str = vnp->data.ptrvalue; if ( str != NULL && r->action->delete_unrecognized && ECnumberNotInList (str)) { if (r->log_fp != NULL) { fprintf (r->log_fp, "%s: deleted %s\n", locus_tag == NULL ? "No locus tag" : locus_tag, str); } vnp->data.ptrvalue = MemFree (vnp->data.ptrvalue); r->num_removed++; } } else { if (r->action->delete_improper_format) { if (r->log_fp != NULL) { fprintf (r->log_fp, "%s: removed %s\n", locus_tag == NULL ? "No locus tag" : locus_tag, str); } vnp->data.ptrvalue = MemFree (vnp->data.ptrvalue); r->num_removed++; } } } locus_tag = MemFree (locus_tag); } static Boolean ReplaceUpdatedECNumbers (SeqEntryPtr sep, UpdateReplacedEcNumbersActionPtr action, FILE *log_fp) { ReplaceUpdatedECData r; MemSet (&r, 0, sizeof (ReplaceUpdatedECData)); r.action = action; r.log_fp = log_fp; r.ec_rep_data = SetupECReplacementTable ("ecnum_replaced.txt", &(r.ec_rep_len)); VisitFeaturesInSep (sep, (Pointer) &r, UpdateECCallback); r.ec_rep_data = FreeECReplacementTable(r.ec_rep_data, r.ec_rep_len); if (r.num_removed > 0 || r.num_replaced > 0) { return TRUE; } else { return FALSE; } } typedef struct retranslatecdscallback { Int4 num_retranslated; RetranslateCdsActionPtr action; } RetranslateCDSCallbackData, PNTR RetranslateCDSCallbackPtr; static void PerformRetranslationsCallback (SeqFeatPtr sfp, Pointer data) { RetranslateCDSCallbackPtr r; if (sfp != NULL && sfp->data.choice == SEQFEAT_CDREGION && (r = (RetranslateCDSCallbackPtr) data) != NULL && r->action != NULL && RetranslateOneCDS (sfp, sfp->idx.entityID, !r->action->obey_stop_codon, r->action->obey_stop_codon)) { r->num_retranslated++; } } static Boolean PerformRetranslations (SeqEntryPtr sep, RetranslateCdsActionPtr action, FILE *log_fp) { RetranslateCDSCallbackData r; MemSet (&r, 0, sizeof (RetranslateCDSCallbackData)); r.action = action; VisitFeaturesInSep (sep, &r, PerformRetranslationsCallback); if (r.num_retranslated > 0) { if (log_fp != NULL) { fprintf (log_fp, "Retranslated %d coding regions\n", r.num_retranslated); } return TRUE; } else { return FALSE; } } typedef struct adjustfeaturesforgapscallback { Int4 num_processed; AdjustFeaturesForGapsActionPtr action; } AdjustFeaturesForGapCallbackData, PNTR AdjustFeaturesForGapCallbackPtr; static void PerformAdjustFeaturesForGapsCallback (SeqFeatPtr sfp, Pointer data) { AdjustFeaturesForGapCallbackPtr r; if (sfp != NULL && (r = (AdjustFeaturesForGapCallbackPtr) data) != NULL && r->action != NULL) { AdjustFeatureForGapsCallback (sfp, r->action); r->num_processed++; } } static Boolean PerformAdjustFeaturesForGaps (SeqEntryPtr sep, AdjustFeaturesForGapsActionPtr action, FILE *log_fp) { AdjustFeaturesForGapCallbackData r; MemSet (&r, 0, sizeof (AdjustFeaturesForGapCallbackData)); r.action = action; VisitFeaturesInSep (sep, &r, PerformAdjustFeaturesForGapsCallback); if (r.num_processed > 0) { if (log_fp != NULL) { fprintf (log_fp, "Adjusted %d features for gaps\n", r.num_processed); } return TRUE; } else { return FALSE; } } NLM_EXTERN CharPtr SummarizePerformAutofixAction (AutofixActionPtr action) { DiscrepancyType test_type; CharPtr fmt = "Perform Autofix for %s Discrepancy Report Test"; CharPtr summ; if (action == NULL || (test_type = GetDiscrepancyTypeFromSettingName (action->test_name)) == MAX_DISC_TYPE) { return NULL; } summ = (CharPtr) MemNew (sizeof (Char) * (StringLen (fmt) + StringLen (action->test_name))); sprintf (summ, fmt, action->test_name); return summ; } NLM_EXTERN CharPtr SummarizeFixSetsAction (FixSetsActionPtr action) { CharPtr summ = NULL; if (action == NULL) { return NULL; } switch (action->choice) { case FixSetsAction_remove_single_item_set: summ = StringSave ("Remove single-sequence pop, phy, mut, or eco wrapper set without alignment"); break; case FixSetsAction_renormalize_nuc_prot_sets: summ = StringSave ("Renormalize nuc-prot sets"); break; case FixSetsAction_fix_pop_to_phy: summ = StringSave ("Convert pop sets to phy sets when taxnames are inconsistent"); break; } return summ; } NLM_EXTERN CharPtr SummarizeUpdateSequencesAction (UpdateSequencesActionPtr action) { CharPtr summ, fmt = "Update sequences with FASTA from file %s"; CharPtr add_cit_subs = ", add Cit-subs to sequences changed"; Int4 len; if (action == NULL || StringHasNoText (action->filename)) { return NULL; } len = StringLen(fmt) + StringLen (action->filename); if (action->add_cit_subs) { len += StringLen (add_cit_subs); } summ = (CharPtr) MemNew (sizeof (Char) * len); sprintf (summ, fmt, action->filename); if (action->add_cit_subs) { StringCat (summ, add_cit_subs); } return summ; } NLM_EXTERN CharPtr SummarizeCreateTSAIDsAction (CreateTSAIdsActionPtr action) { CharPtr summ = NULL; CharPtr suffix_fmt = ", use suffix %s"; CharPtr local_fmt = "Create TSA IDs from local IDs"; CharPtr defline_fmt = "Create TSA IDs from %s in defline"; CharPtr text_portion; Int4 len; if (action == NULL || action->src == NULL) { return NULL; } switch (action->src->choice) { case CreateTSAIdsSrc_local_id: len = StringLen (local_fmt) + 1; if (!StringHasNoText (action->suffix)) { len += StringLen (suffix_fmt) + StringLen (action->suffix); } summ = (CharPtr) MemNew (sizeof (Char) * len); sprintf (summ, "%s", local_fmt); if (!StringHasNoText (action->suffix)) { sprintf (summ + StringLen (summ), suffix_fmt, action->suffix); } break; case CreateTSAIdsSrc_defline: text_portion = SummarizeTextPortion (action->src->data.ptrvalue); if (text_portion == NULL) { text_portion = StringSave ("entire text"); } len = StringLen (defline_fmt) + StringLen (text_portion); if (!StringHasNoText (action->suffix)) { len += StringLen (suffix_fmt) + StringLen (action->suffix); } summ = (CharPtr) MemNew (sizeof (Char) * len); sprintf (summ, defline_fmt, text_portion); text_portion = MemFree (text_portion); if (!StringHasNoText (action->suffix)) { sprintf (summ + StringLen (summ), suffix_fmt, action->suffix); } break; } return summ; } NLM_EXTERN CharPtr SummarizeApplyTableAction (ApplyTableActionPtr action) { CharPtr summ, fmt = "Apply table from file %s"; Int4 len; if (action == NULL || StringHasNoText (action->filename)) { return NULL; } len = StringLen(fmt) + StringLen (action->filename); summ = (CharPtr) MemNew (sizeof (Char) * len); sprintf (summ, fmt, action->filename); return summ; } NLM_EXTERN CharPtr SummarizeAddDescriptorListAction (AddDescriptorListActionPtr action) { CharPtr summ, fmt = "Add descriptors from file %s to nucleotide sequences"; CharPtr constraint; Int4 len; if (action == NULL || action->descriptor_list == NULL || StringHasNoText (action->descriptor_list->filename)) { return NULL; } constraint = SummarizeConstraintSet (action->constraint); len = StringLen(fmt) + StringLen (action->descriptor_list->filename) + StringLen (constraint) + 1; summ = (CharPtr) MemNew (sizeof (Char) * len); sprintf (summ, fmt, action->descriptor_list->filename); if (constraint != NULL) { StringCat (summ, " "); StringCat (summ, constraint); constraint = MemFree (constraint); } return summ; } NLM_EXTERN CharPtr SummarizeRemoveSequencesAction (RemoveSequencesActionPtr action) { CharPtr summ = NULL, constraint, fmt = "Remove sequences %s"; if (action == NULL || action->constraint == NULL) { return NULL; } constraint = SummarizeConstraintSet (action->constraint); if (constraint != NULL) { summ = (CharPtr) MemNew (sizeof (CharPtr) * (StringLen (fmt) + StringLen (constraint))); sprintf (summ, fmt, constraint); constraint = MemFree (constraint); } return summ; } NLM_EXTERN CharPtr SummarizePropagateSequenceTechnology (Pointer action) { return StringSave ("Propagate Assembly-Data structured comments to sequences with same filename"); } /* Functions for summarizing macro actions for display */ static CharPtr SummarizeApplyFeatureAction (ApplyFeatureActionPtr a) { CharPtr label = NULL; CharPtr str; CharPtr fmt = "Apply %s"; if (a == NULL) { str = StringSave ("No action"); } else { label = GetFeatureNameFromFeatureType (a->type); str = (CharPtr) MemNew (sizeof (Char) * (StringLen (fmt) + StringLen (label))); sprintf (str, fmt, label); } return str; } static CharPtr SummarizeRemoveFeatureAction (RemoveFeatureActionPtr a) { CharPtr label = NULL; CharPtr constraint, str; CharPtr fmt = "Remove %s"; CharPtr constraint_fmt = "Remove %s %s"; if (a == NULL) { str = StringSave ("No action"); } else { label = GetFeatureNameFromFeatureType (a->type); constraint = SummarizeConstraintSet (a->constraint); if (constraint == NULL) { str = (CharPtr) MemNew (sizeof (Char) * (StringLen (fmt) + StringLen (label))); sprintf (str, fmt, label); } else { str = (CharPtr) MemNew (sizeof (Char) * (StringLen (constraint_fmt) + StringLen (label) + StringLen (constraint))); sprintf (str, constraint_fmt, label, constraint); constraint = MemFree (constraint); } } return str; } static CharPtr SummarizeConvertSourceOptions (ValNodePtr vnp) { ConvertFromCDSOptionsPtr options; CharPtr fmt = "(%sremove overlapping mRNA, %sremove overlapping gene, %sremove transcript ID)"; CharPtr str; if (vnp == NULL || vnp->choice != ConvertFeatureSrcOptions_cds || vnp->data.ptrvalue == NULL) { return NULL; } options = (ConvertFromCDSOptionsPtr) vnp->data.ptrvalue; str = (CharPtr) MemNew (sizeof (Char) * (StringLen (fmt) + 21)); sprintf (str, fmt, options->remove_mRNA ? "" : "do not ", options->remove_gene ? "" : "do not ", options->remove_transcript_id ? "" : "do not "); return str; } static CharPtr SummarizeConvertDestOptions (ValNodePtr vnp) { RegionTypePtr r; CharPtr str = NULL; if (vnp == NULL) return NULL; switch (vnp->choice) { case ConvertFeatureDstOptions_bond: str = StringSave (GetMacroBondTypeName(vnp->data.intvalue)); break; case ConvertFeatureDstOptions_site: str = StringSave (GetMacroSiteTypeName(vnp->data.intvalue)); break; case ConvertFeatureDstOptions_region: r = (RegionTypePtr) vnp->data.ptrvalue; if (r != NULL) { if (r->create_nucleotide) { str = StringSave ("on nucleotide sequence"); } else { str = StringSave ("on protein sequence"); } } break; } return str; } static CharPtr SummarizeConvertFeatureAction (ConvertFeatureActionPtr a) { CharPtr str = NULL, from_label, to_label, constraint, src_options, dst_options; CharPtr fmt = "Convert %s to %s"; CharPtr keep_orig = ", keep original feature"; CharPtr remove_orig = ", remove original feature"; Int4 len; if (a == NULL) { str = StringSave ("No action"); } else { from_label = GetFeatureNameFromFeatureType (a->type_from); to_label = GetFeatureNameFromFeatureType (a->type_to); src_options = SummarizeConvertSourceOptions (a->src_options); dst_options = SummarizeConvertDestOptions (a->dst_options); constraint = SummarizeConstraintSet (a->src_feat_constraint); len = StringLen (fmt) + StringLen (from_label) + StringLen (to_label); if (src_options != NULL) { len += StringLen (src_options) + 3; } if (dst_options != NULL) { len += StringLen (dst_options) + 1; } if (constraint != NULL) { len += StringLen (constraint) + 1; } if (a->leave_original) { len += StringLen (keep_orig); } else { len += StringLen (remove_orig); } str = (CharPtr) MemNew (sizeof (Char) * len); sprintf (str, fmt, from_label, to_label); if (dst_options != NULL) { StringCat (str, " "); StringCat (str, dst_options); dst_options = MemFree (dst_options); } if (src_options != NULL) { StringCat (str, ", "); StringCat (str, src_options); src_options = MemFree (src_options); } if (constraint != NULL) { StringCat (str, " "); StringCat (str, constraint); constraint = MemFree (constraint); } if (a->leave_original) { StringCat (str, keep_orig); } else { StringCat (str, remove_orig); } } return str; } static CharPtr SummarizeEditLocationStrand (EditLocationStrandPtr strand) { CharPtr from_label = NULL, to_label = NULL; CharPtr fmt = "Convert %s strand to %s"; CharPtr str = NULL; if (strand == NULL) return NULL; switch (strand->strand_from) { case Feature_location_strand_from_any: from_label = "any"; break; case Feature_location_strand_from_plus: from_label = "plus"; break; case Feature_location_strand_from_minus: from_label = "minus"; break; case Feature_location_strand_from_unknown: from_label = "unknown"; break; case Feature_location_strand_from_both: from_label = "both"; break; } switch (strand->strand_to) { case Feature_location_strand_to_plus: to_label = "plus"; break; case Feature_location_strand_to_minus: to_label = "minus"; break; case Feature_location_strand_to_unknown: to_label = "unknown"; break; case Feature_location_strand_to_both: to_label = "both"; break; case Feature_location_strand_to_reverse: to_label = "reverse"; break; } if (from_label != NULL && to_label != NULL) { str = (CharPtr) MemNew (sizeof (Char) * (StringLen (fmt) + StringLen (from_label) + StringLen (to_label))); sprintf (str, fmt, from_label, to_label); } return str; } static CharPtr SummarizePartial5SetAction (Partial5SetActionPtr a) { CharPtr str = NULL; CharPtr constraint = NULL, extend = NULL; CharPtr fmt = "Set 5' partial%s%s"; if (a == NULL) return NULL; switch (a->constraint) { case Partial_5_set_constraint_all: constraint = ""; break; case Partial_5_set_constraint_at_end: constraint = " when 5' end of location is at end of sequence"; break; case Partial_5_set_constraint_bad_start: constraint = " when coding region has no start codon"; break; case Partial_5_set_constraint_frame_not_one: constraint = " when coding region frame > 1"; break; } if (a->extend) { extend = ", extend 5' end of feature to end of sequence"; } else { extend = ""; } if (constraint != NULL) { str = (CharPtr) MemNew (sizeof (Char) * (StringLen (fmt) + StringLen (constraint) + StringLen (extend))); sprintf (str, fmt, constraint, extend); } return str; } static CharPtr SummarizePartial5ClearAction (Int4 a) { CharPtr str = NULL; switch (a) { case Partial_5_clear_constraint_all: str = StringSave ("Clear 5' partial"); break; case Partial_5_clear_constraint_not_at_end: str = StringSave ("Clear 5' partial when 5' end of feature is not at end of sequence"); break; case Partial_5_clear_constraint_good_start: str = StringSave ("Clear 5' partial when coding region has start codon"); break; } return str; } static CharPtr SummarizePartial3SetAction (Partial3SetActionPtr a) { CharPtr str = NULL; CharPtr constraint = NULL, extend = NULL; CharPtr fmt = "Set 3' partial%s%s"; if (a == NULL) return NULL; switch (a->constraint) { case Partial_3_set_constraint_all: constraint = ""; break; case Partial_3_set_constraint_at_end: constraint = " when 3' end of location is at end of sequence"; break; case Partial_3_set_constraint_bad_end: constraint = " when coding region has no stop codon"; break; } if (a->extend) { extend = ", extend 3' end of feature to end of sequence"; } else { extend = ""; } if (constraint != NULL) { str = (CharPtr) MemNew (sizeof (Char) * (StringLen (fmt) + StringLen (constraint) + StringLen (extend))); sprintf (str, fmt, constraint, extend); } return str; } static CharPtr SummarizePartial3ClearAction (Int4 a) { CharPtr str = NULL; switch (a) { case Partial_3_clear_constraint_all: str = StringSave ("Clear 3' partial"); break; case Partial_3_clear_constraint_not_at_end: str = StringSave ("Clear 3' partial when 3' end of feature is not at end of sequence"); break; case Partial_3_clear_constraint_good_end: str = StringSave ("Clear 3' partial when coding region has stop codon"); break; } return str; } static CharPtr SummarizePartialBothSetAction (PartialBothSetActionPtr a) { CharPtr str = NULL; CharPtr constraint = NULL, extend = NULL; CharPtr fmt = "Set both ends partial%s%s"; if (a == NULL) return NULL; switch (a->constraint) { case Partial_5_set_constraint_all: constraint = ""; break; case Partial_5_set_constraint_at_end: constraint = " when both ends of location are at end of sequence"; break; } if (a->extend) { extend = ", extend both ends of feature to end of sequence"; } else { extend = ""; } if (constraint != NULL) { str = (CharPtr) MemNew (sizeof (Char) * (StringLen (fmt) + StringLen (constraint) + StringLen (extend))); sprintf (str, fmt, constraint, extend); } return str; } static CharPtr SummarizePartialBothClearAction (Int4 a) { CharPtr str = NULL; switch (a) { case Partial_both_clear_constraint_all: str = StringSave ("Clear both ends partial"); break; case Partial_3_clear_constraint_not_at_end: str = StringSave ("Clear both ends partial when both ends of feature are not at end of sequence"); break; } return str; } static CharPtr SummarizeConvertLoc (Int4 a) { CharPtr str = NULL; switch (a) { case Convert_location_type_join: str = StringSave ("Convert location to join"); break; case Convert_location_type_order: str = StringSave ("Convert location to order"); break; case Convert_location_type_merge: str = StringSave ("Convert location to single interval"); break; } return str; } static CharPtr SummarizeFeatureDistance (ValNodePtr v, Int4 end) { CharPtr fmt = " %s %d from %d' end of feature"; CharPtr summ = NULL; if (v == NULL || v->choice < 1 || v->choice > k_NumQuantityWords) { return NULL; } summ = (CharPtr) MemNew (sizeof (Char) * (StringLen (fmt) + StringLen (s_QuantityWords[v->choice - 1]) + 15)); sprintf (summ, fmt, s_QuantityWords[v->choice - 1], v->data.intvalue, end); return summ; } static CharPtr SummarizeExtendToFeature (ExtendToFeaturePtr efp, Boolean end5) { CharPtr fmt = "Extend %d' end of feature to nearest %s feature%s%s"; CharPtr include_fmt = " (include %s location)"; CharPtr feature, distance, include = NULL; Int4 len; CharPtr summ = NULL; if (efp == NULL) { return NULL; } feature = GetFeatureNameFromFeatureType (efp->type); distance = SummarizeFeatureDistance (efp->distance, end5 ? 5 : 3); len = StringLen (fmt) + StringLen (feature) + StringLen (distance); if (efp->include_feat) { include = (CharPtr) MemNew (sizeof (Char) * (StringLen (include_fmt) + StringLen (feature))); sprintf (include, include_fmt, feature); len += StringLen (include); } summ = (CharPtr) MemNew (sizeof (Char) * (len + 1)); sprintf (summ, fmt, end5 ? 5 : 3, feature, distance == NULL ? "" : distance, include == NULL ? "" : include); distance = MemFree (distance); include = MemFree (include); return summ; } static CharPtr SummarizeEditFeatureLocationAction (EditFeatureLocationActionPtr a) { CharPtr str = NULL, action_label = NULL, constraint, feature; CharPtr fmt = "%s for %s features"; CharPtr constraint_fmt = "%s for %s features %s"; CharPtr retranslate_cds = " and retranslated affected coding regions"; CharPtr also_edit_gene = " and adjust overlapping gene"; Int4 len; if (a == NULL || a->action == NULL) { str = StringSave ("No action"); } else { switch (a->action->choice) { case LocationEditType_strand: action_label = SummarizeEditLocationStrand (a->action->data.ptrvalue); break; case LocationEditType_set_5_partial: action_label = SummarizePartial5SetAction (a->action->data.ptrvalue); break; case LocationEditType_clear_5_partial: action_label = SummarizePartial5ClearAction (a->action->data.intvalue); break; case LocationEditType_set_3_partial: action_label = SummarizePartial3SetAction (a->action->data.ptrvalue); break; case LocationEditType_clear_3_partial: action_label = SummarizePartial3ClearAction (a->action->data.intvalue); break; case LocationEditType_set_both_partial: action_label = SummarizePartialBothSetAction (a->action->data.ptrvalue); break; case LocationEditType_clear_both_partial: action_label = SummarizePartialBothClearAction (a->action->data.intvalue); break; case LocationEditType_convert: action_label = SummarizeConvertLoc (a->action->data.intvalue); break; case LocationEditType_extend_5: action_label = StringSave ("Extend 5' end of feature to end of sequence"); break; case LocationEditType_extend_3: action_label = StringSave ("Extend 3' end of feature to end of sequence"); break; case LocationEditType_extend_5_to_feat: action_label = SummarizeExtendToFeature (a->action->data.ptrvalue, TRUE); break; case LocationEditType_extend_3_to_feat: action_label = SummarizeExtendToFeature (a->action->data.ptrvalue, FALSE); break; } if (action_label == NULL) { str = StringSave ("Invalid action"); } else { feature = GetFeatureNameFromFeatureType (a->type); constraint = SummarizeConstraintSet (a->constraint); len = 0; if (a->retranslate_cds) { len += StringLen (retranslate_cds); } if (a->also_edit_gene) { len += StringLen (also_edit_gene); } if (constraint == NULL) { len += StringLen (fmt) + StringLen (action_label) + StringLen (feature); str = (CharPtr) MemNew (sizeof (Char) * len); sprintf (str, fmt, action_label, feature); } else { len += StringLen (constraint_fmt) + StringLen (action_label) + StringLen (feature) + StringLen (constraint); str = (CharPtr) MemNew (sizeof (Char) * len); sprintf (str, constraint_fmt, action_label, feature, constraint); constraint = MemFree (constraint); } if (a->retranslate_cds) { StringCat (str, retranslate_cds); } if (a->also_edit_gene) { StringCat (str, also_edit_gene); } } } return str; } static CharPtr s_Suppression[] = { NULL, "suppressing", "non-suppressing" }; static CharPtr s_Necessary[] = { NULL, "necessary", "unnecessary" }; static CharPtr SummarizeRemoveXref (RemoveXrefsActionPtr a) { CharPtr str = NULL, label, constraint; GeneXrefTypePtr g; CharPtr fmt = "Remove %s%s%s%sgene xrefs from %s features"; CharPtr suppression, necessary; Int4 len; if (a == NULL || a->xref_type == NULL) { str = StringSave ("No action"); } else if (a->xref_type->choice != XrefType_gene || (g = (GeneXrefTypePtr) a->xref_type->data.ptrvalue) == NULL) { str = StringSave ("Invalid action"); } else { label = GetFeatureNameFromFeatureType (g->feature); if (g->suppression < sizeof (s_Suppression) / sizeof (CharPtr)) { suppression = s_Suppression[g->suppression]; } else { suppression = NULL; } if (g->necessary < sizeof (s_Necessary) / sizeof (CharPtr)) { necessary = s_Necessary[g->necessary]; } else { necessary = NULL; } constraint = SummarizeConstraintSet (a->constraint); len = StringLen (label) + StringLen (fmt) + StringLen (suppression) + StringLen (necessary) + StringLen (constraint); str = (CharPtr) MemNew (sizeof (Char) * len); sprintf (str, fmt, suppression == NULL ? "" : suppression, suppression == NULL ? "" : " ", necessary == NULL ? "" : necessary, necessary == NULL ? "" : " ", label); if (constraint != NULL) { StringCat (str, constraint); constraint = MemFree (constraint); } } return str; } static CharPtr SummarizeMakeGeneXrefs(MakeGeneXrefActionPtr a) { CharPtr constraint, str, label; CharPtr fmt = "Make gene xrefs from overlapping gene features for %s features%s"; if (a == NULL) { str = StringSave ("No action"); } else { label = GetFeatureNameFromFeatureType (a->feature); constraint = SummarizeConstraintSet (a->constraint); str = (CharPtr) MemNew (sizeof (Char) * (StringLen (fmt) + StringLen (constraint) + StringLen (label))); sprintf (str, fmt, label, constraint == NULL ? "" : constraint); constraint = MemFree (constraint); } return str; } NLM_EXTERN CharPtr SummarizeMacroAction (ValNodePtr vnp) { CharPtr str = NULL; if (vnp == NULL) { return StringSave ("No action"); } switch (vnp->choice) { case MacroActionChoice_aecr: str = SummarizeAECRAction (vnp->data.ptrvalue); break; case MacroActionChoice_parse: str = SummarizeParseAction (vnp->data.ptrvalue); break; case MacroActionChoice_add_feature: str = SummarizeApplyFeatureAction (vnp->data.ptrvalue); break; case MacroActionChoice_remove_feature: str = SummarizeRemoveFeatureAction (vnp->data.ptrvalue); break; case MacroActionChoice_edit_location: str = SummarizeEditFeatureLocationAction (vnp->data.ptrvalue); break; case MacroActionChoice_convert_feature: str = SummarizeConvertFeatureAction (vnp->data.ptrvalue); break; case MacroActionChoice_remove_descriptor: str = SummarizeRemoveDescriptorAction (vnp->data.ptrvalue); break; case MacroActionChoice_autodef: str = SummarizeAutodefAction (vnp->data.ptrvalue); break; case MacroActionChoice_removesets: str = StringSave ("Remove duplicate nested sets"); break; case MacroActionChoice_trim_junk_from_primer_seq: str = StringSave ("Trim junk from primer seqs"); break; case MacroActionChoice_fix_usa_and_states: str = StringSave ("Fix USA and state abbreviations in publications"); break; case MacroActionChoice_trim_stop_from_complete_cds: str = StringSave ("Remove trailing * from complete coding regions"); break; case MacroActionChoice_synchronize_cds_partials: str = StringSave ("Synchronize coding region partials"); break; case MacroActionChoice_adjust_for_consensus_splice: str = StringSave ("Adjust coding regions for consensus splice sites"); break; case MacroActionChoice_fix_pub_caps: str = SummarizeFixPubCapsAction(vnp->data.ptrvalue); break; case MacroActionChoice_remove_seg_gaps: str = StringSave ("Remove seg-gaps"); break; case MacroActionChoice_sort_fields: str = SummarizeSortFieldsAction (vnp->data.ptrvalue); break; case MacroActionChoice_apply_molinfo_block: str = SummarizeMolinfoBlockAction (vnp->data.ptrvalue); break; case MacroActionChoice_fix_caps: str = SummarizeFixCapsAction (vnp->data.ptrvalue); break; case MacroActionChoice_fix_format: str = SummarizeFixFormatAction (vnp->data.ptrvalue); break; case MacroActionChoice_fix_spell: str = StringSave ("Fix spelling"); break; case MacroActionChoice_remove_duplicate_features: str = SummarizeRemoveDuplicateFeaturesAction (vnp->data.ptrvalue); break; case MacroActionChoice_remove_lineage_notes: str = StringSave ("Remove lineage source notes"); break; case MacroActionChoice_remove_xrefs: str = SummarizeRemoveXref(vnp->data.ptrvalue); break; case MacroActionChoice_make_gene_xrefs: str = SummarizeMakeGeneXrefs(vnp->data.ptrvalue); break; case MacroActionChoice_make_bold_xrefs: str = StringSave ("Make Barcode Xrefs"); break; case MacroActionChoice_fix_author: str = SummarizeAuthorFixAction(vnp->data.ptrvalue); break; case MacroActionChoice_update_sequences: str = SummarizeUpdateSequencesAction (vnp->data.ptrvalue); break; case MacroActionChoice_add_trans_splicing: str = StringSave ("Set trans-splicing exception in genes"); break; case MacroActionChoice_remove_invalid_ecnumbers: str = StringSave ("Remove invalid EC_numbers"); break; case MacroActionChoice_create_tsa_ids: str = SummarizeCreateTSAIDsAction (vnp->data.ptrvalue); break; case MacroActionChoice_perform_autofix: str = SummarizePerformAutofixAction (vnp->data.ptrvalue); break; case MacroActionChoice_fix_sets: str = SummarizeFixSetsAction (vnp->data.ptrvalue); break; case MacroActionChoice_apply_table: str = SummarizeApplyTableAction (vnp->data.ptrvalue); break; case MacroActionChoice_remove_sequences: str = SummarizeRemoveSequencesAction (vnp->data.ptrvalue); break; case MacroActionChoice_propagate_sequence_technology: str = SummarizePropagateSequenceTechnology(vnp->data.ptrvalue); break; case MacroActionChoice_add_file_descriptors: str = SummarizeAddDescriptorListAction(vnp->data.ptrvalue); break; case MacroActionChoice_propagate_missing_old_name: str = StringSave ("Propagate missing old-name qualifier"); break; case MacroActionChoice_autoapply_structured_comments: str = StringSave ("Autoapply structured comment prefixes"); break; case MacroActionChoice_reorder_structured_comments: str = StringSave ("Reorder structured comment fields"); break; case MacroActionChoice_remove_duplicate_structured_comments: str = StringSave ("Remove duplicate structured comments"); break; case MacroActionChoice_lookup_taxonomy: str = StringSave ("Perform taxonomy lookup and correct genetic codes"); break; case MacroActionChoice_lookup_pubs: str = StringSave ("Perform pubs lookup"); break; case MacroActionChoice_trim_terminal_ns: str = StringSave ("Trim terminal Ns from nucleotide bioseqs"); break; case MacroActionChoice_update_replaced_ecnumbers: str = StringSave ("Update Replaced EC_numbers"); break; case MacroActionChoice_instantiate_protein_titles: str = StringSave ("Instantiate Protein Titles"); break; case MacroActionChoice_retranslate_cds: str = StringSave ("Retranslate coding regions"); break; case MacroActionChoice_add_selenocysteine_except: str = StringSave ("Replace selenocysteine stops"); break; case MacroActionChoice_join_short_trnas: str = StringSave ("Join short tRNAs"); break; case MacroActionChoice_adjust_features_for_gaps: str = StringSave ("Adjust features for gaps"); break; default: str = StringSave ("Invalid action"); break; } return str; } NLM_EXTERN Boolean ApplyMacroToSeqEntryExEx (SeqEntryPtr sep, ValNodePtr macro, FILE *log_fp, GlobalAlignFunc align_func, Int4Ptr pNumNoOp) { Int4 num_AECR = 0, num_parse = 0, num; Uint2 entityID; Boolean needs_update = FALSE; CharPtr summ; Boolean any_change = FALSE; Boolean created_protein_features = FALSE; ValNodePtr list; LogInfoData lid; entityID = SeqMgrGetEntityIDForSeqEntry(sep); if (pNumNoOp != NULL) { *pNumNoOp = 0; } while (macro != NULL) { needs_update = TRUE; switch (macro->choice) { case MacroActionChoice_aecr: num = ApplyAECRActionToSeqEntry ((AECRActionPtr) macro->data.ptrvalue, sep, &created_protein_features); num_AECR += num; if (num > 0) { if (log_fp != NULL) { summ = SummarizeAECRAction ((AECRActionPtr) macro->data.ptrvalue); fprintf (log_fp, "Changed %d fields during %s\n", num, summ); summ = MemFree (summ); } any_change = TRUE; } else if (pNumNoOp != NULL) { (*pNumNoOp)++; } if (created_protein_features) { if (log_fp != NULL) { fprintf (log_fp, "Created protein features\n"); } any_change = TRUE; } break; case MacroActionChoice_parse: num = ApplyParseActionToSeqEntry ((ParseActionPtr) macro->data.ptrvalue, sep); num_parse += num; if (num > 0) { if (log_fp != NULL) { summ = SummarizeParseAction ((ParseActionPtr) macro->data.ptrvalue); fprintf (log_fp, "Changed %d fields during %s\n", num, summ); summ = MemFree (summ); } any_change = TRUE; } else if (pNumNoOp != NULL) { (*pNumNoOp)++; } break; case MacroActionChoice_add_feature: num = ApplyApplyFeatureActionToSeqEntry ((ApplyFeatureActionPtr) macro->data.ptrvalue, sep); if (num > 0) { if (log_fp != NULL) { fprintf (log_fp, "Added %d features\n", num); } any_change = TRUE; } else if (pNumNoOp != NULL) { (*pNumNoOp)++; } SeqMgrIndexFeatures (entityID, NULL); break; case MacroActionChoice_remove_feature: num = ApplyRemoveFeatureActionToSeqEntry ((RemoveFeatureActionPtr) macro->data.ptrvalue, sep); if (num > 0) { if (log_fp != NULL) { fprintf (log_fp, "Removed %d features\n", num); } any_change = TRUE; ObjMgrSetDirtyFlag (entityID, TRUE); ObjMgrSendMsg (OM_MSG_UPDATE, entityID, 0, 0); needs_update = FALSE; } else if (pNumNoOp != NULL) { (*pNumNoOp)++; } break; case MacroActionChoice_edit_location: num = ApplyEditFeatureLocationActionToSeqEntry ((EditFeatureLocationActionPtr) macro->data.ptrvalue, sep, log_fp); if (num > 0) { any_change = TRUE; ObjMgrSetDirtyFlag (entityID, TRUE); ObjMgrSendMsg (OM_MSG_UPDATE, entityID, 0, 0); needs_update = FALSE; } else if (pNumNoOp != NULL) { (*pNumNoOp)++; } break; case MacroActionChoice_convert_feature: num = ApplyConvertFeatureActionToSeqEntry ((ConvertFeatureActionPtr) macro->data.ptrvalue, sep, log_fp); if (num > 0) { any_change = TRUE; } else if (pNumNoOp != NULL) { (*pNumNoOp)++; } ObjMgrSetDirtyFlag (entityID, TRUE); ObjMgrSendMsg (OM_MSG_UPDATE, entityID, 0, 0); needs_update = FALSE; break; case MacroActionChoice_remove_descriptor: num = ApplyRemoveDescriptorActionToSeqEntry ((RemoveDescriptorActionPtr) macro->data.ptrvalue, sep); if (num > 0) { if (log_fp != NULL) { summ = SummarizeRemoveDescriptorAction ((RemoveDescriptorActionPtr) macro->data.ptrvalue); fprintf (log_fp, "Removed %d descriptors during %s\n", num, summ); summ = MemFree (summ); } any_change = TRUE; } else if (pNumNoOp != NULL) { (*pNumNoOp)++; } break; case MacroActionChoice_autodef: ApplyAutodefActionToSeqEntry ((AutodefActionPtr) macro->data.ptrvalue, sep); if (log_fp != NULL) { summ = SummarizeAutodefAction ((AutodefActionPtr) macro->data.ptrvalue); if (summ != NULL) { fprintf (log_fp, "Performed %s\n", summ); } summ = MemFree (summ); } any_change = TRUE; break; case MacroActionChoice_removesets: if (RemoveDuplicateNestedSetsForEntityID (entityID)) { if (log_fp != NULL) { fprintf (log_fp, "Removed duplicate nested sets\n"); } any_change = TRUE; } else if (pNumNoOp != NULL) { (*pNumNoOp)++; } break; case MacroActionChoice_trim_junk_from_primer_seq: if (TrimPrimerSeqJunkInSeqEntry (sep, log_fp)) { any_change = TRUE; } else if (pNumNoOp != NULL) { (*pNumNoOp)++; } break; case MacroActionChoice_fix_usa_and_states: if (FixUsaAndStateAbbreviations (entityID, log_fp)) { any_change = TRUE; } else if (pNumNoOp != NULL) { (*pNumNoOp)++; } break; case MacroActionChoice_trim_stop_from_complete_cds: if (TrimStopsFromCompleteCodingRegions(sep, log_fp)) { ObjMgrSetDirtyFlag (entityID, TRUE); ObjMgrSendMsg (OM_MSG_UPDATE, entityID, 0, 0); needs_update = FALSE; any_change = TRUE; } else if (pNumNoOp != NULL) { (*pNumNoOp)++; } break; case MacroActionChoice_synchronize_cds_partials: if (ResynchCodingRegionPartialsEx(sep, log_fp)) { ObjMgrSetDirtyFlag (entityID, TRUE); ObjMgrSendMsg (OM_MSG_UPDATE, entityID, 0, 0); needs_update = FALSE; any_change = TRUE; } else if (pNumNoOp != NULL) { (*pNumNoOp)++; } break; case MacroActionChoice_adjust_for_consensus_splice: if (AdjustSeqEntryForConsensusSpliceEx(sep, log_fp, TRUE)) { ObjMgrSetDirtyFlag (entityID, TRUE); ObjMgrSendMsg (OM_MSG_UPDATE, entityID, 0, 0); needs_update = FALSE; any_change = TRUE; } else if (pNumNoOp != NULL) { (*pNumNoOp)++; } break; case MacroActionChoice_fix_pub_caps: if (ApplyFixPubCapsToSeqEntry (macro->data.ptrvalue, sep, log_fp)) { any_change = TRUE; } else if (pNumNoOp != NULL) { (*pNumNoOp)++; } break; case MacroActionChoice_remove_seg_gaps: num = RemoveSegGapsInSeqEntry (sep); if (num > 0) { if (log_fp != NULL) { fprintf (log_fp, "Removed gaps in %d alignments\n", num); } any_change = TRUE; } else if (pNumNoOp != NULL) { (*pNumNoOp)++; } break; case MacroActionChoice_sort_fields: num = SortFieldsInSeqEntry (macro->data.ptrvalue, sep); if (num > 0) { if (log_fp != NULL) { summ = SummarizeSortFieldsAction (macro->data.ptrvalue); fprintf (log_fp, "Changed order of fields for %d objects during %s\n", num, summ); summ = MemFree (summ); } any_change = TRUE; } else if (pNumNoOp != NULL) { (*pNumNoOp)++; } break; case MacroActionChoice_apply_molinfo_block: if (ApplyMolinfoBlockToSeqEntryEx (sep, macro->data.ptrvalue, log_fp)) { any_change = TRUE; } else if (pNumNoOp != NULL) { (*pNumNoOp)++; } break; case MacroActionChoice_fix_caps: if (ApplyFixCapsToSeqEntry (sep, macro->data.ptrvalue, log_fp)) { any_change = TRUE; } else if (pNumNoOp != NULL) { (*pNumNoOp)++; } break; case MacroActionChoice_fix_format: if (ApplyFixFormatToSeqEntry (sep, macro->data.ptrvalue, log_fp)) { any_change = TRUE; } else if (pNumNoOp != NULL) { (*pNumNoOp)++; } break; case MacroActionChoice_fix_spell: if (SpellFixSeqEntry (sep, macro->data.ptrvalue, log_fp)) { any_change = TRUE; } else if (pNumNoOp != NULL) { (*pNumNoOp)++; } break; case MacroActionChoice_remove_duplicate_features: if (RemoveDuplicateFeaturesInSeqEntry (sep, macro->data.ptrvalue, log_fp)) { any_change = TRUE; ObjMgrSetDirtyFlag (entityID, TRUE); ObjMgrSendMsg (OM_MSG_UPDATE, entityID, 0, 0); needs_update = FALSE; } else if (pNumNoOp != NULL) { (*pNumNoOp)++; } break; case MacroActionChoice_remove_lineage_notes: if (RemoveLineageNotesInSeqEntry (sep, log_fp)) { any_change = TRUE; } else if (pNumNoOp != NULL) { (*pNumNoOp)++; } break; case MacroActionChoice_remove_xrefs: if (MacroRemoveXrefs (sep, macro->data.ptrvalue, log_fp)) { any_change = TRUE; } else if (pNumNoOp != NULL) { (*pNumNoOp)++; } break; case MacroActionChoice_make_gene_xrefs: if (MacroMakeGeneXrefs (sep, macro->data.ptrvalue, log_fp)) { any_change = TRUE; } else if (pNumNoOp != NULL) { (*pNumNoOp)++; } break; case MacroActionChoice_make_bold_xrefs: if (MacroMakeBoldXrefs (sep, log_fp)) { any_change = TRUE; } else if (pNumNoOp != NULL) { (*pNumNoOp)++; } break; case MacroActionChoice_fix_author: if (ApplyAuthorFixToSeqEntry (sep, macro->data.ptrvalue, log_fp)) { any_change = TRUE; } else if (pNumNoOp != NULL) { (*pNumNoOp)++; } break; case MacroActionChoice_update_sequences: if (UpdateSequencesInSeqEntry (sep, macro->data.ptrvalue, log_fp, align_func)) { any_change = TRUE; } else if (pNumNoOp != NULL) { (*pNumNoOp)++; } break; case MacroActionChoice_add_trans_splicing: if (AddTransSplicingInSeqEntry (sep, log_fp)) { any_change = TRUE; } else if (pNumNoOp != NULL) { (*pNumNoOp)++; } break; case MacroActionChoice_remove_invalid_ecnumbers: if (RemoveInvalidECnumbersInSeqEntry (sep, log_fp)) { any_change = TRUE; } else if (pNumNoOp != NULL) { (*pNumNoOp)++; } break; case MacroActionChoice_create_tsa_ids: if (CreateTsaIDsInSeqEntry (sep, macro->data.ptrvalue, log_fp)) { any_change = TRUE; } else if (pNumNoOp != NULL) { (*pNumNoOp)++; } break; case MacroActionChoice_perform_autofix: if (PerformAutofixInSeqEntry (sep, macro->data.ptrvalue, log_fp)) { any_change = TRUE; } else if (pNumNoOp != NULL) { (*pNumNoOp)++; } break; case MacroActionChoice_fix_sets: if (PerformFixSetsInSeqEntry (sep, macro->data.ptrvalue, log_fp)) { any_change = TRUE; } else if (pNumNoOp != NULL) { (*pNumNoOp)++; } break; case MacroActionChoice_apply_table: if (PerformApplyTableInSeqEntry (sep, macro->data.ptrvalue, log_fp)) { any_change = TRUE; } else if (pNumNoOp != NULL) { (*pNumNoOp)++; } break; case MacroActionChoice_remove_sequences: if (PerformRemoveSequencesInSeqEntry (sep, macro->data.ptrvalue, log_fp)) { ObjMgrSetDirtyFlag (entityID, TRUE); ObjMgrSendMsg (OM_MSG_UPDATE, entityID, 0, 0); needs_update = FALSE; any_change = TRUE; } else if (pNumNoOp != NULL) { (*pNumNoOp)++; } break; case MacroActionChoice_propagate_sequence_technology: if (PerformPropagateSequenceTechnology(sep, macro->data.ptrvalue, log_fp)) { any_change = TRUE; } else if (pNumNoOp != NULL) { (*pNumNoOp)++; } break; case MacroActionChoice_add_file_descriptors: if (AddFileDescriptors (sep, macro->data.ptrvalue, log_fp)) { any_change = TRUE; } else if (pNumNoOp != NULL) { (*pNumNoOp)++; } break; case MacroActionChoice_propagate_missing_old_name: list = ValNodeNew (NULL); list->data.ptrvalue = sep; if (PropagateMissingOldNames (list)) { any_change = TRUE; if (log_fp != NULL) { fprintf (log_fp, "Propagated missing old name qualifiers\n"); } } else if (pNumNoOp != NULL) { (*pNumNoOp)++; } break; case MacroActionChoice_autoapply_structured_comments: if (AutoApplyStructuredCommentPrefixes (sep, log_fp)) { any_change = TRUE; } else if (pNumNoOp != NULL) { (*pNumNoOp)++; } break; case MacroActionChoice_reorder_structured_comments: if (ReorderStructuredCommentsInSeqEntry (sep)) { if (log_fp != NULL) { fprintf (log_fp, "Reordered structured comment fields\n"); } any_change = TRUE; } else if (pNumNoOp != NULL) { (*pNumNoOp)++; } break; case MacroActionChoice_remove_duplicate_structured_comments: if (RemoveDuplicateStructuredCommentsInSeqEntry(sep)) { if (log_fp != NULL) { fprintf (log_fp, "Removed duplicate structured comments\n"); } any_change = TRUE; } else if (pNumNoOp != NULL) { (*pNumNoOp)++; } break; case MacroActionChoice_lookup_taxonomy: Taxon3ReplaceOrgInSeqEntry(sep, FALSE); CorrectGenCodes (sep, entityID); if (log_fp != NULL) { fprintf (log_fp, "Performed TaxLookup and corrected genetic codes\n"); } any_change = TRUE; break; case MacroActionChoice_lookup_pubs: MemSet (&lid, 0, sizeof (LogInfoData)); lid.fp = log_fp; num = LookupPubsInSeqEntry (sep, log_fp == NULL ? NULL : &lid); if (num > 0) { any_change = TRUE; if (log_fp != NULL) { fprintf (log_fp, "Replaced %d pubs during Pub Lookup\n", num); } } else if (pNumNoOp != NULL) { (*pNumNoOp)++; } break; case MacroActionChoice_trim_terminal_ns: MemSet (&lid, 0, sizeof (LogInfoData)); lid.fp = log_fp; num = TrimNsFromNucsInSeqEntry (sep, log_fp == NULL ? NULL : &lid); if (num > 0) { any_change = TRUE; if (log_fp != NULL) { fprintf (log_fp, "Trimmed terminal Ns from %d sequences\n", num); } } else if (pNumNoOp != NULL) { (*pNumNoOp)++; } break; case MacroActionChoice_update_replaced_ecnumbers: if (ReplaceUpdatedECNumbers(sep, macro->data.ptrvalue, log_fp)) { any_change = TRUE; } else if (pNumNoOp != NULL) { (*pNumNoOp)++; } break; case MacroActionChoice_instantiate_protein_titles: InstantiateProteinTitles (entityID, NULL); any_change = TRUE; if (log_fp != NULL) { fprintf (log_fp, "Instantiated protein titles\n", num); } break; case MacroActionChoice_retranslate_cds: if (PerformRetranslations (sep, macro->data.ptrvalue, log_fp)) { any_change = TRUE; } else if (pNumNoOp != NULL) { (*pNumNoOp)++; } break; case MacroActionChoice_add_selenocysteine_except: if (ReplaceStopsWithSelenocysteineInSeqEntry(sep, log_fp)) { any_change = TRUE; } else if (pNumNoOp != NULL) { (*pNumNoOp)++; } break; case MacroActionChoice_join_short_trnas: if (JoinShortTrnas(sep, log_fp)) { any_change = TRUE; } else if (pNumNoOp != NULL) { (*pNumNoOp)++; } break; case MacroActionChoice_adjust_features_for_gaps: if (PerformAdjustFeaturesForGaps (sep, macro->data.ptrvalue, log_fp)) { any_change = TRUE; } else if (pNumNoOp != NULL) { (*pNumNoOp)++; } break; } macro = macro->next; } if (needs_update) { ObjMgrSetDirtyFlag (entityID, TRUE); ObjMgrSendMsg (OM_MSG_UPDATE, entityID, 0, 0); } return any_change; } NLM_EXTERN Boolean ApplyMacroToSeqEntryEx (SeqEntryPtr sep, ValNodePtr macro, FILE *log_fp, GlobalAlignFunc align_func) { return ApplyMacroToSeqEntryExEx (sep, macro, log_fp, align_func, NULL); } NLM_EXTERN void ApplyMacroToSeqEntry (SeqEntryPtr sep, ValNodePtr macro) { ApplyMacroToSeqEntryEx (sep, macro, NULL, NULL); } static Boolean PreprocessApplyTableMacro (ApplyTableActionPtr apply_table, FILE *log_fp) { Boolean rval = TRUE; FILE *fp; if (apply_table->in_memory_table == NULL) { apply_table->in_memory_table = ValNodeNew (NULL); apply_table->in_memory_table->choice = ApplyTableExtraData_table; if (StringHasNoText (apply_table->filename)) { rval = FALSE; if (log_fp != NULL) { fprintf (log_fp, "No filename supplied for apply table action\n"); } } else { fp = FileOpen (apply_table->filename, "r"); if (fp == NULL) { if (log_fp != NULL) { fprintf (log_fp, "Unable to open %s\n", apply_table->filename); } rval = FALSE; } else { apply_table->in_memory_table->data.ptrvalue = ReadTabTableFromFile (fp); FileClose (fp); if (apply_table->in_memory_table->data.ptrvalue == NULL) { if (log_fp != NULL) { fprintf (log_fp, "Unable to read table from %s\n", apply_table->filename); } rval = FALSE; } } } } return rval; } static Boolean PreprocessAddDescriptorListMacro (AddDescriptorListActionPtr action, FILE *log_fp) { Boolean rval = TRUE; AsnIoPtr aip; SeqDescPtr sdp; SeqDescrPtr sdp_list = NULL; if (action->descriptor_list->in_memory_table == NULL) { action->descriptor_list->in_memory_table = ValNodeNew (NULL); action->descriptor_list->in_memory_table->choice = ApplyTableExtraData_table; if (StringHasNoText (action->descriptor_list->filename)) { return FALSE; } aip = AsnIoOpen (action->descriptor_list->filename, "r"); if (aip == NULL) { if (log_fp != NULL) { fprintf (log_fp, "Unable to open %s\n", action->descriptor_list->filename); } return FALSE; } while (sdp = SeqDescAsnRead (aip, NULL)) { ValNodeLink (&sdp_list, sdp); } AsnIoClose (aip); if (sdp_list == NULL) { if (log_fp != NULL) { fprintf (log_fp, "Unable to read table from %s\n", action->descriptor_list->filename); } return FALSE; } action->descriptor_list->in_memory_table->data.ptrvalue = sdp_list; } return rval; } NLM_EXTERN Boolean PreprocessMacroForRepeatedUse (ValNodePtr macro, FILE *log_fp) { ValNodePtr vnp; Boolean rval = TRUE; for (vnp = macro; vnp != NULL; vnp = vnp->next) { if (vnp->choice == MacroActionChoice_apply_table) { rval &= PreprocessApplyTableMacro(vnp->data.ptrvalue, log_fp); } else if (vnp->choice == MacroActionChoice_add_file_descriptors) { rval &= PreprocessAddDescriptorListMacro (vnp->data.ptrvalue, log_fp); } } return rval; } NLM_EXTERN void CleanupMacroAfterRepeatedUse (ValNodePtr macro) { ValNodePtr vnp; ApplyTableActionPtr apply_table; AddDescriptorListActionPtr desc_list; SeqDescPtr sdp, sdp_next; for (vnp = macro; vnp != NULL; vnp = vnp->next) { if (vnp->choice == MacroActionChoice_apply_table) { if ((apply_table = (ApplyTableActionPtr) vnp->data.ptrvalue) != NULL && apply_table->in_memory_table != NULL) { apply_table->in_memory_table->data.ptrvalue = FreeTabTable (apply_table->in_memory_table->data.ptrvalue); apply_table->in_memory_table = ValNodeFree (apply_table->in_memory_table); } } else if (vnp->choice == MacroActionChoice_add_file_descriptors) { if ((desc_list = (AddDescriptorListActionPtr) vnp->data.ptrvalue) != NULL && desc_list->descriptor_list != NULL && desc_list->descriptor_list->in_memory_table != NULL) { for (sdp = desc_list->descriptor_list->in_memory_table->data.ptrvalue; sdp != NULL; sdp = sdp_next) { sdp_next = sdp->next; sdp->next = NULL; sdp = SeqDescFree (sdp); } desc_list->descriptor_list->in_memory_table->data.ptrvalue = NULL; } } } } /* for generating text descriptions of macro objects */ NLM_EXTERN CharPtr SummarizeSourceQual (ValNodePtr field) { CharPtr summ = NULL, locname, origname; Int4 genome, origin; CharPtr loc_fmt = "location %s"; CharPtr orig_fmt = "origin %s"; if (field == NULL) return NULL; switch (field->choice) { case SourceQualChoice_textqual: summ = StringSave (GetSourceQualName (field->data.intvalue)); break; case SourceQualChoice_location: genome = GenomeFromSrcLoc (field->data.intvalue); locname = LocNameFromGenome (genome); summ = (CharPtr) MemNew (sizeof (Char) * (StringLen (loc_fmt) + StringLen (locname))); sprintf (summ, loc_fmt, locname); break; case SourceQualChoice_origin: origin = OriginFromSrcOrig (field->data.intvalue); origname = OriginNameFromOrigin (origin); summ = (CharPtr) MemNew (sizeof (Char) * (StringLen (orig_fmt) + StringLen (origname))); sprintf (summ, orig_fmt, origname); break; } return summ; } NLM_EXTERN CharPtr FeatureFieldLabel (CharPtr feature_name, ValNodePtr field) { CharPtr cp; CharPtr label = NULL; CharPtr legal_fmt = "%s %s"; CharPtr illegal_fmt = "constrained field on %s"; if (feature_name == NULL) { feature_name = "Unknown feature"; } if (field == NULL) { return StringSave ("missing field"); } else if (field->choice == FeatQualChoice_legal_qual) { cp = GetFeatQualName (field->data.intvalue); if (cp == NULL) cp = "Unknown field type"; label = (CharPtr) MemNew (sizeof (Char) * (StringLen (legal_fmt) + StringLen (feature_name) + StringLen (cp))); sprintf (label, legal_fmt, feature_name, cp); } else if (field->choice == FeatQualChoice_illegal_qual) { label = (CharPtr) MemNew (sizeof (Char) * (StringLen (illegal_fmt) + StringLen (feature_name))); sprintf (label, illegal_fmt, feature_name); } else { label = StringSave ("illegal field value"); } return label; } NLM_EXTERN CharPtr SummarizeFieldType (ValNodePtr vnp) { FeatureFieldPtr ffp; CharPtr str = NULL; CharPtr label = NULL; CharPtr pub_fmt = "publication %s"; if (vnp == NULL) { str = StringSave ("missing field"); } else { switch (vnp->choice) { case FieldType_source_qual: str = SummarizeSourceQual (vnp->data.ptrvalue); break; case FieldType_feature_field: ffp = (FeatureFieldPtr) vnp->data.ptrvalue; if (ffp == NULL || ffp->field == NULL) { str = StringSave ("missing field"); } else { label = GetFeatureNameFromFeatureType (ffp->type); str = FeatureFieldLabel (label, ffp->field); } break; case FieldType_cds_gene_prot: str = StringSaveNoNull (CDSGeneProtNameFromField (vnp->data.intvalue)); if (str == NULL) { str = StringSave ("Invalid CDS-Gene-Prot Field"); } break; case FieldType_molinfo_field: str = GetSequenceQualName (vnp->data.ptrvalue); if (str == NULL) { str = StringSave ("Invalid Sequence Qual Field"); } break; case FieldType_pub: switch (vnp->data.intvalue) { case Publication_field_cit: str = StringSave ("publication citation"); break; case Publication_field_authors: str = StringSave ("publication authors"); break; case Publication_field_journal: str = StringSave ("publication journal"); break; case Publication_field_volume: str = StringSave ("publication volume"); break; case Publication_field_issue: str = StringSave ("publication issue"); break; case Publication_field_pages: str = StringSave ("publication pages"); break; case Publication_field_date: str = StringSave ("publication date"); break; case Publication_field_serial_number: str = StringSave ("publication serial number"); break; case Publication_field_title: str = StringSave ("publication title"); break; case Publication_field_pmid: str = StringSave ("PMID"); break; default: label = GetPubFieldLabel (vnp->data.intvalue); if (label == NULL) { str = StringSave ("Invalid field type"); } else { str = MemNew (sizeof (Char) * (StringLen (pub_fmt) + StringLen (label))); sprintf (str, pub_fmt, label); } break; } break; case FieldType_rna_field: str = SummarizeRnaQual (vnp->data.ptrvalue); break; case FieldType_struc_comment_field: str = SummarizeStructuredCommentField (vnp->data.ptrvalue); break; case FieldType_dblink: str = StringSave (GetDBLinkNameFromDBLinkFieldType (vnp->data.intvalue)); break; case FieldType_misc: if (vnp->data.intvalue == Misc_field_genome_project_id) { str = StringSave ("Genome Project ID"); } else if (vnp->data.intvalue == Misc_field_comment_descriptor) { str = StringSave ("Comment Descriptor"); } else if (vnp->data.intvalue == Misc_field_defline) { str = StringSave ("Definition Line"); } else if (vnp->data.intvalue == Misc_field_keyword) { str = StringSave ("Keyword"); } else { str = StringSave ("Invalid field type"); } break; default: str = StringSave ("Invalid field type"); break; } } return str; } NLM_EXTERN FieldTypePtr FieldTypeFromString (CharPtr str) { Int4 qual_type, feat_type = -1; FieldTypePtr ft = NULL; FeatureFieldPtr ffp; ValNodePtr vnp, molfield; CharPtr cpy, cp; RnaQualPtr rq; if (StringHasNoText (str)) { return NULL; } /* check source quals first */ qual_type = GetSourceQualTypeByName (str); if (qual_type > -1) { vnp = ValNodeNew (NULL); vnp->choice = SourceQualChoice_textqual; vnp->data.intvalue = qual_type; ft = ValNodeNew (NULL); ft->choice = FieldType_source_qual; ft->data.ptrvalue = vnp; } else { /* try feature fields */ cpy = StringSave (str); cp = StringChr (cpy, ' '); while (cp != NULL && feat_type == -1) { *cp = 0; feat_type = GetFeatureTypeByName (cpy); if (feat_type < 0) { *cp = ' '; cp = StringChr (cp + 1, ' '); } } if (feat_type > -1) { qual_type = GetFeatQualByName (cp + 1); if (qual_type > -1) { ffp = FeatureFieldNew (); ffp->type = feat_type; ValNodeAddInt (&ffp->field, FeatQualChoice_legal_qual, qual_type); ft = ValNodeNew (NULL); ft->choice = FieldType_feature_field; ft->data.ptrvalue = ffp; } } cpy = MemFree (cpy); if (ft == NULL) { /* try CDS-gene-prot */ qual_type = CDSGeneProtFieldFromName (str); if (qual_type > -1) { ft = ValNodeNew (NULL); ft->choice = FieldType_cds_gene_prot; ft->data.intvalue = qual_type; } } if (ft == NULL) { /* try RNA Quals */ cpy = StringSave (str); cp = StringChr (cpy, ' '); if (cp != NULL) { *cp = 0; feat_type = GetRnaTypeForName (cpy); qual_type = GetRnaFieldForName (cp + 1); if (feat_type > -1 && qual_type > -1) { rq = RnaQualNew (); rq->type = ValNodeNew (NULL); rq->type->choice = feat_type; rq->type->data.ptrvalue = NULL; rq->field = qual_type; ft = ValNodeNew (NULL); ft->choice = FieldType_rna_field; ft->data.ptrvalue = rq; } } cpy = MemFree (cpy); } if (ft == NULL && Matchnamestring (str, "comment-descriptor")) { ft = ValNodeNew (NULL); ft->choice = FieldType_misc; ft->data.intvalue = Misc_field_comment_descriptor; } /* try DBLink fields */ if (ft == NULL) { qual_type = GetDBLinkFieldTypeFromDBLinkName (str); if (qual_type > -1) { ft = ValNodeNew (NULL); ft->choice = FieldType_dblink; ft->data.intvalue = qual_type; } } /* try publication fields */ if (ft == NULL) { qual_type = GetPubFieldFromLabel(str); if (qual_type > -1) { ft = ValNodeNew (NULL); ft->choice = FieldType_pub; ft->data.intvalue = qual_type; } } /* molinfo fields */ if (ft == NULL) { if (StringsAreEquivalent(str, "completeness")) { molfield = ValNodeNew (NULL); molfield->choice = MolinfoField_completedness; ft = ValNodeNew (NULL); ft->choice = FieldType_molinfo_field; ft->data.ptrvalue = molfield; } else if (StringsAreEquivalent(str, "topology")) { molfield = ValNodeNew (NULL); molfield->choice = MolinfoField_topology; ft = ValNodeNew (NULL); ft->choice = FieldType_molinfo_field; ft->data.ptrvalue = molfield; } } /* location/genome */ if (ft == NULL && StringsAreEquivalent(str, "location") || StringsAreEquivalent(str, "genome")) { vnp = ValNodeNew (NULL); vnp->choice = SourceQualValChoice_location; ft = ValNodeNew (NULL); ft->choice = FieldType_source_qual; ft->data.ptrvalue = vnp; } } return ft; } NLM_EXTERN Boolean IsFieldTypeNonText (ValNodePtr field_type) { ValNodePtr vnp; Boolean rval = FALSE; if (field_type == NULL) { return FALSE; } switch (field_type->choice) { case FieldType_source_qual : vnp = (ValNodePtr) field_type->data.ptrvalue; if (vnp != NULL) { if (vnp->choice == SourceQualChoice_location || vnp->choice == SourceQualChoice_origin) { rval = TRUE; } else if (vnp->choice == SourceQualChoice_textqual) { if (IsNonTextSourceQual (vnp->data.intvalue)) { rval = TRUE; } } } break; case FieldType_molinfo_field : rval = TRUE; break; } return rval; } NLM_EXTERN CharPtr SummarizeExistingText (Uint2 existing_text) { CharPtr str = NULL; switch (existing_text) { case ExistingTextOption_append_semi : str = "append separated by semicolon"; break; case ExistingTextOption_append_space : str = "append separated by space"; break; case ExistingTextOption_append_colon : str = "append separated by colon"; break; case ExistingTextOption_append_comma: str = "append separated by comma"; break; case ExistingTextOption_append_none : str = "append (no separator)"; break; case ExistingTextOption_prefix_semi : str = "prefix separated by semicolon"; break; case ExistingTextOption_prefix_space : str = "prefix separated by space"; break; case ExistingTextOption_prefix_colon : str = "prefix separated by colon"; break; case ExistingTextOption_prefix_comma: str = "prefix separated by comma"; break; case ExistingTextOption_prefix_none : str = "prefix (no separator)"; break; case ExistingTextOption_leave_old : str = "ignore new text when existing text is present"; break; case ExistingTextOption_replace_old : str = "overwrite existing text"; break; case ExistingTextOption_add_qual : str = "add new qual"; break; default: str = "invalid existing_text option"; break; } return str; } static CharPtr SummarizeTextMarker (TextMarkerPtr text_marker) { CharPtr summ = NULL; if (IsTextMarkerEmpty (text_marker)) { return NULL; } else if (text_marker->choice == TextMarker_free_text) { summ = StringSave (text_marker->data.ptrvalue); } else if (text_marker->choice == TextMarker_digits) { summ = StringSave ("numbers"); } else if (text_marker->choice == TextMarker_letters) { summ = StringSave ("letters"); } return summ; } NLM_EXTERN CharPtr SummarizeTextPortion (TextPortionPtr text_portion) { CharPtr summ = NULL; CharPtr left_fmt = NULL, right_fmt = NULL; CharPtr left_text = NULL, right_text = NULL; Int4 len = 6; if (text_portion == NULL || (IsTextMarkerEmpty (text_portion->left_marker) && IsTextMarkerEmpty (text_portion->right_marker))) { summ = StringSave ("entire text"); } else { left_text = SummarizeTextMarker(text_portion->left_marker); right_text = SummarizeTextMarker(text_portion->right_marker); if (text_portion->inside) { if (left_text != NULL) { if (text_portion->include_left) { left_fmt = "starting with "; } else { left_fmt = "just after "; } len += StringLen (left_fmt) + StringLen (left_text) + 3; } if (right_text != NULL) { if (text_portion->include_right) { right_fmt = "up to and including "; } else { right_fmt = "up to "; } len += StringLen (right_fmt) + StringLen (right_text) + 3; if (left_fmt != NULL) { len += 2; } } if (left_fmt == NULL && right_fmt == NULL) { summ = StringSave ("entire text"); } else { summ = (CharPtr) MemNew (sizeof (Char) * len); StringCat (summ, "text "); if (left_fmt != NULL) { StringCat (summ, left_fmt); StringCat (summ, "'"); StringCat (summ, left_text); StringCat (summ, "'"); if (right_fmt != NULL) { StringCat (summ, ", "); } } if (right_fmt != NULL) { StringCat (summ, right_fmt); StringCat (summ, "'"); StringCat (summ, right_text); StringCat (summ, "'"); } } } else { if (right_text != NULL) { if (text_portion->include_right) { right_fmt = "starting with "; } else { right_fmt = "after "; } len += StringLen (right_fmt) + StringLen (right_text) + 3; } if (left_text != NULL) { if (text_portion->include_left) { left_fmt = "up to and including "; } else { left_fmt = "before "; } len += StringLen (left_fmt) + StringLen (left_text) + 3; if (right_fmt != NULL) { len += 5; } } if (left_fmt == NULL && right_fmt == NULL) { summ = StringSave ("entire text"); } else { summ = (CharPtr) MemNew (sizeof (Char) * len); StringCat (summ, "text "); if (right_fmt != NULL) { StringCat (summ, right_fmt); StringCat (summ, "'"); StringCat (summ, right_text); StringCat (summ, "'"); if (left_fmt != NULL) { StringCat (summ, " and "); } } if (left_fmt != NULL) { StringCat (summ, left_fmt); StringCat (summ, "'"); StringCat (summ, left_text); StringCat (summ, "'"); } } } left_text = MemFree (left_text); right_text = MemFree (right_text); } return summ; } const CharPtr kTaxnameAfterBinomialString = "Taxname after binomial"; static CharPtr SummarizeParseSrcGeneralId (ValNodePtr vnp) { CharPtr summ = NULL; CharPtr fmt = "general ID %s tag"; if (vnp == NULL) { return StringSave ("invalid id"); } switch (vnp->choice) { case ParseSrcGeneralId_whole_text: summ = StringSave ("entire general ID"); break; case ParseSrcGeneralId_db: summ = StringSave ("general ID database"); break; case ParseSrcGeneralId_tag: if (vnp->data.ptrvalue == NULL || StringHasNoText (vnp->data.ptrvalue)) { summ = StringSave ("general ID tag"); } else { summ = (CharPtr) MemNew (sizeof (Char) * (StringLen (fmt) + StringLen (vnp->data.ptrvalue))); sprintf (summ, fmt, vnp->data.ptrvalue); } break; default: summ = StringSave ("invalid id"); break; } return summ; } NLM_EXTERN CharPtr SummarizeParseSrc (ValNodePtr src) { CharPtr summ = NULL; CharPtr fmt = "structured comment field %s"; ParseSrcOrgPtr src_org; Boolean need_to_save = TRUE; if (src != NULL) { switch (src->choice) { case ParseSrc_defline: summ = "defline"; break; case ParseSrc_flatfile: summ = "flat file"; break; case ParseSrc_local_id: summ = "local ID"; break; case ParseSrc_org: src_org = (ParseSrcOrgPtr) src->data.ptrvalue; if (src_org != NULL) { if (src_org->field != NULL) { if (src_org->field->choice == ParseSrcOrgChoice_taxname_after_binomial) { summ = kTaxnameAfterBinomialString; } else if (src_org->field->choice == ParseSrcOrgChoice_source_qual) { summ = GetSourceQualName (src_org->field->data.intvalue); } } } break; case ParseSrc_comment: summ = "comment"; break; case ParseSrc_bankit_comment: summ = "BankIT comment"; break; case ParseSrc_structured_comment: if (!StringHasNoText (src->data.ptrvalue)) { summ = (CharPtr) MemNew (sizeof (Char) * (StringLen (src->data.ptrvalue) + StringLen (fmt))); sprintf (summ, fmt, src->data.ptrvalue); need_to_save = FALSE; } break; case ParseSrc_file_id: summ = "file ID"; break; case ParseSrc_general_id: summ = SummarizeParseSrcGeneralId(src->data.ptrvalue); need_to_save = FALSE; break; } } if (summ == NULL) { summ = StringSave ("missing field"); } else if (need_to_save) { summ = StringSave (summ); } return summ; } NLM_EXTERN CharPtr SummarizeParseDst (ValNodePtr dst) { CharPtr summ = NULL; CharPtr fmt = "%s %s"; CharPtr feature, field; ParseDstOrgPtr dst_org; Boolean need_to_save = TRUE; FeatureFieldLegalPtr ffp; if (dst != NULL) { switch (dst->choice) { case ParseDest_defline: summ = "defline"; break; case ParseDest_org: dst_org = (ParseDstOrgPtr) dst->data.ptrvalue; if (dst_org != NULL) { if (dst_org->field != NULL) { switch (dst_org->field->choice) { case SourceQualChoice_textqual: summ = GetSourceQualName (dst_org->field->data.intvalue); break; case SourceQualChoice_location: summ = "location"; break; case SourceQualChoice_origin: summ = "origin"; break; } } } break; case ParseDest_featqual: ffp = (FeatureFieldLegalPtr) dst->data.ptrvalue; if (ffp != NULL) { feature = GetFeatureNameFromFeatureType (ffp->type); field = GetFeatQualName (ffp->field); summ = (CharPtr) MemNew (sizeof (Char) * (StringLen (fmt) + StringLen (feature) + StringLen (field))); sprintf (summ, fmt, feature, field); need_to_save = FALSE; } break; case ParseDest_dbxref: summ = "dbxref"; break; } } if (summ == NULL) { summ = StringSave ("missing field"); } else if (need_to_save) { summ = StringSave (summ); } return summ; } /* summarizing AECR actions */ static CharPtr SummarizeFieldPairType (ValNodePtr vnp, CharPtr connect_word) { FeatureFieldPairPtr ffp; CDSGeneProtFieldPairPtr cgp; SourceQualPairPtr quals; MolinfoFieldPairPtr m_fields; RnaQualPairPtr rna_quals; CharPtr str = NULL; CharPtr from_label = NULL, to_label = NULL; CharPtr label_fmt = "%s %s %s"; CharPtr type_label_fmt = "%s %s %s %s"; CharPtr label = NULL; if (connect_word == NULL) { connect_word = "to"; } if (vnp == NULL) { str = StringSave ("missing field"); } else { switch (vnp->choice) { case FieldPairType_source_qual: if (vnp->data.ptrvalue != NULL) { quals = (SourceQualPairPtr) vnp->data.ptrvalue; from_label = GetSourceQualName (quals->field_from); to_label = GetSourceQualName (quals->field_to); } if (from_label != NULL && to_label != NULL) { str = (CharPtr) MemNew (sizeof (Char) * (StringLen (from_label) + StringLen (connect_word) + StringLen (to_label) + 3)); sprintf (str, "%s %s %s", from_label, connect_word, to_label); } else { str = StringSave ("missing field"); } break; case FieldPairType_feature_field: ffp = (FeatureFieldPairPtr) vnp->data.ptrvalue; if (ffp == NULL || ffp->field_from == NULL || ffp->field_to == NULL) { str = StringSave ("missing field"); } else { label = GetFeatureNameFromFeatureType (ffp->type); from_label = FeatureFieldLabel (label, ffp->field_from); to_label = FeatureFieldLabel (label, ffp->field_to); str = (CharPtr) MemNew (sizeof (Char) * (StringLen (label_fmt) + StringLen (from_label) + StringLen (to_label) + StringLen (connect_word))); sprintf (str, label_fmt, from_label, connect_word, to_label); from_label = MemFree (from_label); to_label = MemFree (to_label); } break; case FieldPairType_cds_gene_prot: cgp = (CDSGeneProtFieldPairPtr) vnp->data.ptrvalue; from_label = CDSGeneProtNameFromField (cgp->field_from); to_label = CDSGeneProtNameFromField (cgp->field_to); str = (CharPtr) MemNew (sizeof (Char) * StringLen (from_label) + StringLen (connect_word) + StringLen (to_label) + 3); sprintf (str, "%s %s %s", from_label, connect_word, to_label); break; case FieldPairType_molinfo_field: m_fields = (MolinfoFieldPairPtr) vnp->data.ptrvalue; from_label = NULL; to_label = NULL; label = NULL; switch (m_fields->choice) { case MolinfoFieldPair_molecule: from_label = BiomolNameFromBiomol (BiomolFromMoleculeType (((MolinfoMoleculePairPtr) m_fields->data.ptrvalue)->from)); to_label = BiomolNameFromBiomol (BiomolFromMoleculeType (((MolinfoMoleculePairPtr) m_fields->data.ptrvalue)->to)); label = "molecule"; break; case MolinfoFieldPair_technique: from_label = TechNameFromTech (TechFromTechniqueType (((MolinfoTechniquePairPtr) m_fields->data.ptrvalue)->from)); to_label = TechNameFromTech (TechFromTechniqueType (((MolinfoTechniquePairPtr) m_fields->data.ptrvalue)->to)); label = "technique"; break; case MolinfoFieldPair_completedness: from_label = CompletenessNameFromCompleteness (CompletenessFromCompletednessType (((MolinfoCompletednessPairPtr) m_fields->data.ptrvalue)->from)); to_label = CompletenessNameFromCompleteness (CompletenessFromCompletednessType (((MolinfoCompletednessPairPtr) m_fields->data.ptrvalue)->to)); label = "completeness"; break; case MolinfoFieldPair_mol_class: from_label = MolNameFromMol (MolFromMoleculeClassType (((MolinfoMolClassPairPtr) m_fields->data.ptrvalue)->from)); to_label = MolNameFromMol (MolFromMoleculeClassType (((MolinfoMolClassPairPtr) m_fields->data.ptrvalue)->to)); label = "class"; break; case MolinfoFieldPair_topology: from_label = TopologyNameFromTopology (TopologyFromTopologyType (((MolinfoTopologyPairPtr) m_fields->data.ptrvalue)->from)); to_label = TopologyNameFromTopology (TopologyFromTopologyType (((MolinfoTopologyPairPtr) m_fields->data.ptrvalue)->to)); label = "topology"; break; case MolinfoFieldPair_strand: from_label = StrandNameFromStrand (StrandFromStrandType (((MolinfoStrandPairPtr) m_fields->data.ptrvalue)->from)); to_label = StrandNameFromStrand (StrandFromStrandType (((MolinfoStrandPairPtr) m_fields->data.ptrvalue)->to)); label = "strand"; break; } if (from_label == NULL) { from_label = "Unknown value"; } if (to_label == NULL) { to_label = "Unknown value"; } if (label == NULL) { label = "Unknown molinfo field"; } str = (CharPtr) MemNew (sizeof (Char) * (StringLen (type_label_fmt) + StringLen (label) + StringLen (from_label) + StringLen (to_label) + StringLen (connect_word))); sprintf (str, type_label_fmt, label, from_label, connect_word, to_label); break; case FieldPairType_rna_field: if (vnp->data.ptrvalue != NULL) { rna_quals = (RnaQualPairPtr) vnp->data.ptrvalue; label = SummarizeRnaType (rna_quals->type); from_label = GetNameForRnaField (rna_quals->field_from); to_label = GetNameForRnaField (rna_quals->field_to); } if (from_label != NULL && to_label != NULL && label != NULL) { str = (CharPtr) MemNew (sizeof (Char) * (StringLen (type_label_fmt) + StringLen (label) + StringLen (from_label) + StringLen (connect_word) + StringLen (to_label))); sprintf (str, type_label_fmt, label, from_label, connect_word, to_label); } else { str = StringSave ("missing field"); } label = MemFree (label); break; default: str = StringSave ("Invalid field type"); break; } } return str; } static CharPtr SummarizeApplyAction (ApplyActionPtr a) { CharPtr str = NULL; CharPtr fmt = "Apply %s to %s (%s)"; CharPtr nontextqual_fmt = "Apply %s (%s)"; CharPtr field, existing_text; if (a == NULL) { str = StringSave ("No action"); } else if (a->value == NULL || a->field == NULL) { str = StringSave ("Invalid action"); } else { field = SummarizeFieldType (a->field); existing_text = SummarizeExistingText (a->existing_text); if (IsFieldTypeNonText (a->field)) { str = (CharPtr) MemNew (sizeof (Char) * StringLen (nontextqual_fmt) + StringLen (field) + StringLen (existing_text)); sprintf (str, nontextqual_fmt, field, existing_text); } else { str = (CharPtr) MemNew (sizeof (Char) * (StringLen (fmt) + StringLen (a->value) + StringLen (field) + StringLen (existing_text))); sprintf (str, fmt, a->value, field, existing_text); } field = MemFree (field); } return str; } static CharPtr SummarizeEditAction (EditActionPtr a) { CharPtr str = NULL; CharPtr fmt = "Edit %s replace '%s'%s with '%s'"; CharPtr case_insensitive = " (case insensitive)"; CharPtr field; Int4 len; if (a == NULL) { str = StringSave ("No action"); } else if (a->field == NULL || a->field == NULL || a->edit == NULL || a->edit->find_txt == NULL) { str = StringSave ("Invalid action"); } else { field = SummarizeFieldType (a->field); len = StringLen (fmt) + StringLen (field) + StringLen (a->edit->find_txt) + StringLen (a->edit->repl_txt); if (a->edit->case_insensitive) { len += StringLen (case_insensitive); } str = (CharPtr) MemNew (sizeof (Char) * len); sprintf (str, fmt, field, a->edit->find_txt, a->edit->case_insensitive ? case_insensitive : "", a->edit->repl_txt == NULL ? "" : a->edit->repl_txt); field = MemFree (field); } return str; } static CharPtr SummarizeRemoveOutsideAction (RemoveOutsideActionPtr a) { CharPtr str = NULL; CharPtr fmt = "Remove %s in %s"; CharPtr case_insensitive = " (case insensitive)"; CharPtr if_not_found = ", remove entire text if search text not found"; CharPtr field, tmp; Int4 len; if (a == NULL) { str = StringSave ("No action"); } else if (a->field == NULL || a->field == NULL || a->portion == NULL) { str = StringSave ("Invalid action"); } else { field = SummarizeFieldType (a->field); tmp = SummarizeTextPortion (a->portion); len = StringLen (fmt) + StringLen (field) + StringLen (tmp); if (a->remove_if_not_found) { len += StringLen (if_not_found); } str = (CharPtr) MemNew (sizeof (Char) * len); sprintf (str, fmt, tmp, field); if (a->remove_if_not_found) { StringCat (str, if_not_found); } field = MemFree (field); tmp = MemFree (tmp); } return str; } static CharPtr SummarizeConvertAction (ConvertActionPtr a) { CharPtr str = NULL; CharPtr fmt = "Convert %s (%s)"; CharPtr fields, existing_text; if (a == NULL) { str = StringSave ("No action"); } else if (a->fields == NULL || a->fields == NULL) { str = StringSave ("Invalid action"); } else { fields = SummarizeFieldPairType (a->fields, "to"); existing_text = SummarizeExistingText (a->existing_text); str = (CharPtr) MemNew (sizeof (Char) * (StringLen (fmt) + StringLen (fields) + StringLen (existing_text))); sprintf (str, fmt, fields, existing_text); fields = MemFree (fields); } return str; } static CharPtr SummarizeCopyAction (CopyActionPtr a) { CharPtr str = NULL; CharPtr fmt = "Copy %s (%s)"; CharPtr fields, existing_text; if (a == NULL) { str = StringSave ("No action"); } else if (a->fields == NULL) { str = StringSave ("Invalid action"); } else { fields = SummarizeFieldPairType (a->fields, "to"); existing_text = SummarizeExistingText (a->existing_text); str = (CharPtr) MemNew (sizeof (Char) * (StringLen (fmt) + StringLen (fields) + StringLen (existing_text))); sprintf (str, fmt, fields, existing_text); fields = MemFree (fields); } return str; } static CharPtr SummarizeSwapAction (SwapActionPtr a) { CharPtr str = NULL; CharPtr fmt = "Swap %s"; CharPtr fields; if (a == NULL) { str = StringSave ("No action"); } else if (a->fields == NULL) { str = StringSave ("Invalid action"); } else { fields = SummarizeFieldPairType (a->fields, "with"); str = (CharPtr) MemNew (sizeof (Char) * (StringLen (fmt) + StringLen (fields))); sprintf (str, fmt, fields); fields = MemFree (fields); } return str; } static CharPtr SummarizeCapChange (Uint1 cap_change) { CharPtr rval = NULL; switch (cap_change) { case Cap_change_tolower: rval = StringSave ("change capitalization to lower"); break; case Cap_change_toupper: rval = StringSave ("change capitalization to upper"); break; case Cap_change_firstcap: rval = StringSave ("capitalize first letter, remaining lower case"); break; case Cap_change_firstcaprestnochange: rval = StringSave ("capitalize first letter, do not change other characters"); break; case Cap_change_firstlower_restnochange: rval = StringSave ("lowercase first letter, do not change other characters"); break; case Cap_change_cap_word_space: rval = StringSave ("capitalize first letter and letters after spaces"); break; case Cap_change_cap_word_space_punc: rval = StringSave ("capitalize first letter and letters after spaces or punctuation"); break; } return rval; } NLM_EXTERN CharPtr SummarizeTextTransform (ValNodePtr transform) { FieldEditPtr edit; CharPtr replace_fmt = "replace '%s' with '%s'"; CharPtr remove_fmt = "remove %s"; CharPtr case_insensitive = " (case insensitive)"; CharPtr rval = NULL, tmp; Int4 len = 0; if (transform == NULL) { return NULL; } switch (transform->choice) { case TextTransform_edit: if ((edit = (FieldEditPtr) transform->data.ptrvalue) != NULL) { len = StringLen (replace_fmt) + StringLen (edit->find_txt) + StringLen (edit->repl_txt); if (edit->case_insensitive) { len += StringLen (case_insensitive); } rval = (CharPtr) MemNew (sizeof (Char) * len); sprintf (rval, replace_fmt, edit->find_txt == NULL ? "" : edit->find_txt, edit->repl_txt == NULL ? "" : edit->repl_txt); if (edit->case_insensitive) { StringCat (rval, case_insensitive); } } break; case TextTransform_caps: rval = SummarizeCapChange(transform->data.intvalue); break; case TextTransform_remove: tmp = SummarizeTextPortion (transform->data.ptrvalue); rval = (CharPtr) MemNew (sizeof (Char) * (StringLen (remove_fmt) + StringLen (tmp))); sprintf (rval, remove_fmt, tmp); tmp = MemFree (tmp); break; } return rval; } static CharPtr SummarizeTextTransformList (ValNodePtr text_transform) { ValNodePtr str_list = NULL, vnp; Int4 len = 0; CharPtr rval = NULL, tmp; for (vnp = text_transform; vnp != NULL; vnp = vnp->next) { tmp = SummarizeTextTransform (vnp); if (tmp != NULL) { ValNodeAddPointer (&str_list, 0, tmp); len += StringLen (tmp) + 3; } } rval = (CharPtr) MemNew (sizeof (Char) * len); for (vnp = str_list; vnp != NULL; vnp = vnp->next) { StringCat (rval, vnp->data.ptrvalue); if (vnp->next != NULL) { StringCat (rval, ", "); } } str_list = ValNodeFreeData (str_list); return rval; } static CharPtr SummarizeAECRParseAction (AECRParseActionPtr a) { CharPtr str = NULL; CharPtr fmt = "Parse %s%s%s from %s(%s)"; CharPtr fields, existing_text, text_portion, transform; if (a == NULL) { str = StringSave ("No action"); } else if (a->fields == NULL) { str = StringSave ("Invalid action"); } else { fields = SummarizeFieldPairType (a->fields, "to"); existing_text = SummarizeExistingText (a->existing_text); text_portion = SummarizeTextPortion (a->portion); transform = SummarizeTextTransformList(a->transform); str = (CharPtr) MemNew (sizeof (Char) * (StringLen (fmt) + StringLen (text_portion) + StringLen (transform) + StringLen (fields) + StringLen (existing_text))); sprintf (str, fmt, text_portion, transform == NULL ? "" : " ", transform == NULL ? "" : transform, fields, existing_text); fields = MemFree (fields); text_portion = MemFree (text_portion); transform = MemFree (transform); } return str; } static CharPtr SummarizeRemoveAction (RemoveActionPtr a) { CharPtr str = NULL; CharPtr fmt = "Remove %s"; CharPtr field; if (a == NULL) { str = StringSave ("No action"); } else if (a->field == NULL || a->field == NULL) { str = StringSave ("Invalid action"); } else { field = SummarizeFieldType (a->field); str = (CharPtr) MemNew (sizeof (Char) * (StringLen (fmt) + StringLen (field))); sprintf (str, fmt, field); field = MemFree (field); } return str; } NLM_EXTERN CharPtr SummarizeAECRAction (AECRActionPtr a) { CharPtr str = NULL, act = NULL, constraint = NULL; if (a == NULL) { str = StringSave ("No action"); } else if (a->action == NULL) { str = StringSave ("Invalid command"); } else { switch (a->action->choice) { case ActionChoice_apply: act = SummarizeApplyAction (a->action->data.ptrvalue); break; case ActionChoice_edit: act = SummarizeEditAction (a->action->data.ptrvalue); break; case ActionChoice_remove_outside: act = SummarizeRemoveOutsideAction (a->action->data.ptrvalue); break; case ActionChoice_convert: act = SummarizeConvertAction (a->action->data.ptrvalue); break; case ActionChoice_copy: act = SummarizeCopyAction (a->action->data.ptrvalue); break; case ActionChoice_swap: act = SummarizeSwapAction (a->action->data.ptrvalue); break; case ActionChoice_remove: act = SummarizeRemoveAction (a->action->data.ptrvalue); break; case ActionChoice_parse: act = SummarizeAECRParseAction (a->action->data.ptrvalue); break; } if (act == NULL) { str = StringSave ("Invalid action"); } else { constraint = SummarizeConstraintSet (a->constraint); if (constraint == NULL) { str = act; } else { str = (CharPtr) MemNew (sizeof (Char) * (StringLen(act) + 2 + StringLen (constraint))); sprintf (str, "%s %s", act, constraint); act = MemFree (act); constraint = MemFree (constraint); } } } return str; } NLM_EXTERN CharPtr SummarizeParseAction (ParseActionPtr p) { CharPtr field_from = NULL, field_to = NULL; CharPtr existing_text = NULL, text_portion = NULL, transform; CharPtr summ = NULL; CharPtr fmt = "Parse %s from %s to %s%s%s (%s)"; if (p == NULL) { summ = StringSave ("No action"); } else { field_from = SummarizeParseSrc (p->src); field_to = SummarizeParseDst (p->dest); existing_text = SummarizeExistingText (p->existing_text); text_portion = SummarizeTextPortion (p->portion); transform = SummarizeTextTransformList(p->transform); summ = (CharPtr) MemNew (sizeof (Char) * (StringLen (fmt) + StringLen (text_portion) + StringLen (field_from) + StringLen (field_to) + StringLen (transform) + StringLen (existing_text))); sprintf (summ, fmt, text_portion, field_from, field_to, transform == NULL ? "" : " ", transform == NULL ? "" : transform, existing_text); text_portion = MemFree (text_portion); field_from = MemFree (field_from); field_to = MemFree (field_to); } return summ; } static CharPtr SummarizeAutodefClauseListType (Uint2 clause_list_type) { CharPtr str = "complete sequence"; switch (clause_list_type) { case Autodef_list_type_feature_list: str = "list features"; break; case Autodef_list_type_complete_sequence: str = "complete sequence"; break; case Autodef_list_type_complete_genome: str = "complete genome"; break; case Autodef_list_type_sequence: str = "sequence"; break; } return str; } NLM_EXTERN CharPtr SummarizeAutodefAction (AutodefActionPtr autodef) { CharPtr label = NULL, mod_name; CharPtr str = NULL; CharPtr fmt = "Autodef %s"; CharPtr modifiers_fmt = " with modifier"; CharPtr misc_feat_rule = NULL; Int4 len; ValNodePtr mod_names = NULL, vnp; if (autodef == NULL) { str = StringSave ("No action"); } else { label = SummarizeAutodefClauseListType (autodef->clause_list_type); if (autodef->clause_list_type == Autodef_list_type_feature_list) { if (autodef->misc_feat_parse_rule == 1) { misc_feat_rule = ", use misc-feat comment before first semicolon"; } else if (autodef->misc_feat_parse_rule == 2) { misc_feat_rule = ", look for non-coding product in misc-feat comment"; } } len = StringLen (fmt) + StringLen (label) + StringLen (misc_feat_rule); if (autodef->modifiers != NULL) { len += StringLen (modifiers_fmt) + 2; for (vnp = autodef->modifiers; vnp != NULL; vnp = vnp->next) { mod_name = GetSourceQualName (vnp->data.intvalue); len += StringLen (mod_name) + 3; ValNodeAddPointer (&mod_names, 0, mod_name); } } str = (CharPtr) MemNew (sizeof (Char) * (len + 1)); sprintf (str, fmt, label); if (autodef->modifiers != NULL) { StringCat (str, modifiers_fmt); if (autodef->modifiers->next != NULL) { StringCat (str, "s"); } for (vnp = mod_names; vnp != NULL; vnp = vnp->next) { StringCat (str, " "); StringCat (str, vnp->data.ptrvalue); if (vnp->next != NULL) { StringCat (str, ","); } } } mod_names = ValNodeFree (mod_names); if (misc_feat_rule != NULL) { StringCat (str, misc_feat_rule); } } return str; } NLM_EXTERN CharPtr SummarizeRemoveDescriptorAction (RemoveDescriptorActionPtr a) { CharPtr label = NULL; CharPtr constraint, str; CharPtr fmt = "Remove %s"; CharPtr constraint_fmt = "Remove %s descriptors %s"; if (a == NULL) { str = StringSave ("No action"); } else { label = GetDescriptorNameFromDescriptorType (a->type); constraint = SummarizeConstraintSet (a->constraint); if (constraint == NULL) { str = (CharPtr) MemNew (sizeof (Char) * (StringLen (fmt) + StringLen (label))); sprintf (str, fmt, label); } else { str = (CharPtr) MemNew (sizeof (Char) * (StringLen (constraint_fmt) + StringLen (label) + StringLen (constraint))); sprintf (str, constraint_fmt, label, constraint); constraint = MemFree (constraint); } } return str; } NLM_EXTERN CharPtr SummarizeFixPubCapsAction (FixPubCapsActionPtr a) { CharPtr constraint = NULL; Int4 len = 0; CharPtr descriptions[] = {"affiliation", "title", "authors", "affiliation country"}; CharPtr punct_only = " (punctuation only)"; Boolean present[4]; Int4 i, first = 4, last = 0, num_items = 0; CharPtr summ = NULL; if (a == NULL) { return NULL; } present[0] = a->affiliation; present[1] = a->title; present[2] = a->authors; present[3] = a->affil_country; for (i = 0; i < 4; i++) { if (present[i]) { len += 6 + StringLen (descriptions[i]); if (first == 4) { first = i; } last = i; num_items++; } } if (len > 0) { if (a->punct_only) { len += StringLen (punct_only); } constraint = SummarizeConstraintSet (a->constraint); len += StringLen (constraint) + 14; summ = (CharPtr) MemNew (sizeof (Char) * len); sprintf (summ, "Fix pub "); for (i = 0; i < 4; i++) { if (present[i]) { if (i != first) { if (num_items > 2) { StringCat (summ, ", "); } if (i == last) { StringCat (summ, " and "); } } StringCat (summ, descriptions[i]); } } if (a->punct_only) { StringCat (summ, punct_only); } if (constraint != NULL) { StringCat (summ, " where "); StringCat (summ, constraint); } constraint = MemFree (constraint); } return summ; } static CharPtr SummarizeFixAuthorCaps (FixAuthorCapsPtr action) { if (action == NULL) { return StringSave ("Invalid action"); } else if (action->last_name_only) { return StringSave ("Fix capitalization in author last names where last name is all caps"); } else { return StringSave ("Fix capitalization in author name where name is all caps"); } } NLM_EXTERN CharPtr SummarizeFixCapsAction (FixCapsActionPtr action) { CharPtr summ = NULL, tmp; CharPtr fmt = "Fix capitalization in %s source qualifier"; if (action == NULL) { summ = StringSave ("Invalid action"); } else { switch (action->choice) { case FixCapsAction_pub: summ = SummarizeFixPubCapsAction (action->data.ptrvalue); break; case FixCapsAction_src_country: summ = StringSave ("Fix source country qualifier capitalization"); break; case FixCapsAction_mouse_strain: summ = StringSave ("Fix capitalization in common Mus musculus strains"); break; case FixCapsAction_src_qual: tmp = GetSourceQualName (action->data.intvalue); summ = (CharPtr) MemNew (sizeof (Char) * (StringLen (tmp) + StringLen (fmt))); sprintf (summ, fmt, tmp); break; case FixCapsAction_author: summ = SummarizeFixAuthorCaps (action->data.ptrvalue); break; default: summ = StringSave ("Invalid action"); break; } } return summ; } NLM_EXTERN CharPtr SummarizeFixFormatAction (FixFormatActionPtr action) { CharPtr summ = NULL; if (action == NULL) { summ = StringSave ("Invalid action"); } else { switch (action->choice) { case FixFormatAction_collection_date: summ = StringSave ("Fix collection-date format"); break; case FixFormatAction_lat_lon: summ = StringSave ("Fix lat-lon format"); break; case FixFormatAction_primers: summ = StringSave ("Fix i in primer sequence"); break; case FixFormatAction_protein_name: summ = StringSave ("Remove organism names from protein names"); break; default: summ = StringSave ("Invalid action"); break; } } return summ; } NLM_EXTERN CharPtr SummarizeRemoveDuplicateFeaturesAction (RemoveDuplicateFeatureActionPtr action) { CharPtr summ = NULL; CharPtr start_fmt = "Remove duplicate%s%s features"; CharPtr feat_type; CharPtr case_sensitive = "(case-sensitive)"; CharPtr ignore_partials = "(ignore partials)"; CharPtr remove_proteins = " and remove protein products"; Int4 len = 0; if (action == NULL) { summ = StringSave ("Invalid action"); } else { len = StringLen (start_fmt); if (action->type == Macro_feature_type_any) { feat_type = ""; } else { feat_type = GetFeatureNameFromFeatureType (action->type); } len += StringLen (feat_type) + 1; if (action->case_sensitive) { len += StringLen (case_sensitive); } if (action->ignore_partials) { len += StringLen (ignore_partials); } if (action->remove_proteins) { len += StringLen (remove_proteins); } summ = (CharPtr) MemNew (sizeof (Char) * len); sprintf (summ, start_fmt, action->type == Macro_feature_type_any ? "" : " ", feat_type); if (action->case_sensitive) { StringCat (summ, case_sensitive); } if (action->ignore_partials) { StringCat (summ, ignore_partials); } if (action->remove_proteins) { StringCat (summ, remove_proteins); } } return summ; } NLM_EXTERN CharPtr GetSortOrderName (Uint2 order) { CharPtr rval = NULL; switch (order) { case Sort_order_short_to_long: rval = "by length, short to long"; break; case Sort_order_long_to_short: rval = "by length, long to short"; break; case Sort_order_alphabetical: rval = "alphabetically"; break; default: rval = "unknown order"; break; } return rval; } NLM_EXTERN CharPtr SummarizeSortFieldsAction (SortFieldsActionPtr action) { CharPtr label, order, constraint, summ; CharPtr fmt = "Sort %s fields %s%s%s"; label = SummarizeFieldType (action->field); order = GetSortOrderName(action->order); constraint = SummarizeConstraintSet (action->constraint); summ = (CharPtr) MemNew (sizeof (Char) * (StringLen (fmt) + StringLen (label) + StringLen(order) + StringLen (constraint))); sprintf (summ, fmt, label, order, constraint == NULL ? "" : " where ", constraint == NULL ? "" : constraint); label = MemFree (label); constraint = MemFree (constraint); return summ; } NLM_EXTERN CharPtr SummarizeMolinfoBlockAction (MolinfoBlockPtr mib) { CharPtr field_label, constraint, summ; ValNodePtr field, field_strs = NULL, from_strs = NULL, vnp; Int4 len = 11; Int4 num_from = 0; Int4 num_to = 0; if (mib == NULL) { return NULL; } for (field = mib->to_list; field != NULL; field = field->next) { field_label = GetSequenceQualName (field); ValNodeAddPointer (&field_strs, 0, field_label); len += StringLen (field_label) + 2; num_to++; } for (field = mib->from_list; field != NULL; field = field->next) { field_label = GetSequenceQualName (field); ValNodeAddPointer (&from_strs, 0, field_label); len += StringLen (field_label) + 2; num_from++; } constraint = SummarizeConstraintSet (mib->constraint); len += StringLen (constraint); if (constraint != NULL || num_from > 0) { len += 12; } if (num_to > 1) { len += 5; } if (num_from > 1) { len += 5; } summ = (CharPtr) MemNew (sizeof (Char) * len); sprintf (summ, "Change to "); for (vnp = field_strs; vnp != NULL; vnp = vnp->next) { StringCat (summ, vnp->data.ptrvalue); if (vnp->next != NULL) { if (num_to > 2) { if (vnp->next->next == NULL) { StringCat (summ, ", and"); } else { StringCat (summ, ", "); } } else { StringCat (summ, " and "); } } } if (num_from > 0 || constraint != NULL) { StringCat (summ, " where "); } for (vnp = from_strs; vnp != NULL; vnp = vnp->next) { StringCat (summ, vnp->data.ptrvalue); if (vnp->next != NULL) { if (num_from > 2) { if (vnp->next->next == NULL && constraint == NULL) { StringCat (summ, ", and"); } else { StringCat (summ, ", "); } } else if (constraint == NULL) { StringCat (summ, " and "); } else { StringCat (summ, ", "); } } } if (constraint != NULL && num_from > 0) { StringCat (summ, " and "); } StringCat (summ, constraint); field_strs = ValNodeFreeData (field_strs); from_strs = ValNodeFreeData (from_strs); constraint = MemFree (constraint); return summ; } /* summarizing constraints */ static CharPtr GetStringLocationPhrase (Uint2 match_location, Boolean not_present) { CharPtr location_word = NULL; switch (match_location) { case String_location_contains : if (not_present) { location_word = "does not contain"; } else { location_word = "contains"; } break; case String_location_equals : if (not_present) { location_word = "does not equal"; } else { location_word = "equals"; } break; case String_location_starts : if (not_present) { location_word = "does not start with"; } else { location_word = "starts with"; } break; case String_location_ends : if (not_present) { location_word = "does not end with"; } else { location_word = "ends with"; } break; case String_location_inlist : if (not_present) { location_word = "is not one of"; } else { location_word = "is one of"; } break; } return location_word; } static const CharPtr kCaseSensitive = "case-sensitive"; static const CharPtr kWholeWord = "whole word"; NLM_EXTERN CharPtr SummarizeWordSubstitution (WordSubstitutionPtr word) { CharPtr fmt = "allow '%s' to be replaced by '%s'"; Int4 len = 0; ValNodePtr vnp; CharPtr summ = NULL; if (word == NULL || word->synonyms == NULL) { return NULL; } len = StringLen (fmt) + StringLen (word->word); for (vnp = word->synonyms; vnp != NULL; vnp = vnp->next) { len += StringLen (vnp->data.ptrvalue) + 4; } if (word->case_sensitive) { len += StringLen (kCaseSensitive) + 3; } if (word->whole_word) { len += StringLen (kWholeWord) + 3; } summ = (CharPtr) MemNew (sizeof (Char) * len); sprintf (summ, fmt, word->word == NULL ? "" : word->word, (word->synonyms == NULL || word->synonyms->data.ptrvalue == NULL) ? "" : word->synonyms->data.ptrvalue); if (word->synonyms != NULL) { for (vnp = word->synonyms->next; vnp != NULL; vnp = vnp->next) { if (word->synonyms->next->next != NULL) { StringCat (summ, ","); } StringCat (summ, " "); if (vnp->next == NULL) { StringCat (summ, "and "); } StringCat (summ, "'"); if (vnp->data.ptrvalue != NULL) { StringCat (summ, vnp->data.ptrvalue); } StringCat (summ, "'"); } } if (word->case_sensitive) { StringCat (summ, ", "); StringCat (summ, kCaseSensitive); } if (word->whole_word) { StringCat (summ, ", "); StringCat (summ, kWholeWord); } return summ; } NLM_EXTERN CharPtr SummarizeStringConstraintEx (StringConstraintPtr constraint, Boolean short_version) { CharPtr location_word = NULL; CharPtr ignore_space = "ignore spaces"; CharPtr ignore_punct = "ignore punctuation"; CharPtr ignore_weasel = "ignore 'putative' synonyms"; CharPtr str = NULL; Int4 len; CharPtr fmt = "%s '%s'"; Boolean has_extra = FALSE; WordSubstitutionPtr word; ValNodePtr subst_words = NULL, vnp; CharPtr tmp; if (IsStringConstraintEmpty (constraint)) return NULL; if (constraint->match_text != NULL) { location_word = GetStringLocationPhrase (constraint->match_location, constraint->not_present); if (location_word == NULL) return NULL; len = StringLen (location_word) + StringLen (constraint->match_text) + StringLen (fmt); if (!short_version) { if (constraint->case_sensitive) { len += StringLen (kCaseSensitive) + 3; } if (constraint->whole_word) { len += StringLen (kWholeWord) + 3; } if (constraint->ignore_space) { len += StringLen (ignore_space) + 3; } if (constraint->ignore_punct) { len += StringLen (ignore_punct) + 3; } if (constraint->ignore_weasel) { len += StringLen (ignore_weasel) + 3; } /* allocate space for substitution phrases */ for (word = constraint->ignore_words; word != NULL; word = word->next) { tmp = SummarizeWordSubstitution (word); if (tmp != NULL) { ValNodeAddPointer (&subst_words, 0, tmp); len += StringLen (tmp) + 2; } } } str = (CharPtr) MemNew (sizeof (Char) * len); sprintf (str, fmt, location_word, constraint->match_text); if (!short_version) { if (constraint->case_sensitive || constraint->whole_word || constraint->ignore_space || constraint->ignore_punct) { StringCat (str, " ("); } if (constraint->case_sensitive) { StringCat (str, kCaseSensitive); has_extra = TRUE; } if (constraint->whole_word) { if (has_extra) { StringCat (str, ", "); } StringCat (str, kWholeWord); has_extra = TRUE; } if (constraint->ignore_space) { if (has_extra) { StringCat (str, ", "); } StringCat (str, ignore_space); has_extra = TRUE; } if (constraint->ignore_punct) { if (has_extra) { StringCat (str, ", "); } StringCat (str, ignore_punct); has_extra = TRUE; } if (constraint->ignore_weasel) { if (has_extra) { StringCat (str, ", "); } StringCat (str, ignore_weasel); has_extra = TRUE; } if (constraint->case_sensitive || constraint->whole_word || constraint->ignore_space || constraint->ignore_punct) { StringCat (str, ")"); } for (vnp = subst_words; vnp != NULL; vnp = vnp->next) { StringCat (str, ", "); StringCat (str, vnp->data.ptrvalue); } subst_words = ValNodeFreeData (subst_words); } } if (constraint->is_all_caps) { SetStringValue(&str, "all letters are uppercase", ExistingTextOption_append_comma); } if (constraint->is_all_lower) { SetStringValue(&str, "all letters are lowercase", ExistingTextOption_append_comma); } if (constraint->is_all_punct) { SetStringValue(&str, "all characters are punctuation", ExistingTextOption_append_comma); } return str; } NLM_EXTERN CharPtr SummarizeStringConstraint (StringConstraintPtr constraint) { return SummarizeStringConstraintEx (constraint, FALSE); } static CharPtr SummarizePartialnessForLocationConstraint (LocationConstraintPtr constraint) { if (constraint == NULL || (constraint->partial5 == Partial_constraint_either && constraint->partial3 == Partial_constraint_either)) { return NULL; } if (constraint->partial5 == Partial_constraint_either) { if (constraint->partial3 == Partial_constraint_partial) { return "that are 3' partial"; } else { return "that are 3' complete"; } } else if (constraint->partial3 == Partial_constraint_either) { if (constraint->partial5 == Partial_constraint_partial) { return "that are 5' partial"; } else { return "that are 5' complete"; } } else if (constraint->partial5 == Partial_constraint_partial && constraint->partial3 == Partial_constraint_partial) { return "that are partial on both ends"; } else if (constraint->partial5 == Partial_constraint_complete && constraint->partial3 == Partial_constraint_complete) { return "that are complete on both ends"; } else if (constraint->partial5 == Partial_constraint_complete && constraint->partial3 == Partial_constraint_partial) { return "that are 5' complete and 3' partial"; } else if (constraint->partial5 == Partial_constraint_partial && constraint->partial3 == Partial_constraint_complete) { return "that are 5' partial and 3' complete"; } else { return NULL; } } static CharPtr SummarizeLocationType (LocationConstraintPtr constraint) { if (constraint == NULL || constraint->location_type == Location_type_constraint_any) { return NULL; } else if (constraint->location_type == Location_type_constraint_single_interval) { return "with single interval"; } else if (constraint->location_type == Location_type_constraint_joined) { return "with joined intervals"; } else if (constraint->location_type == Location_type_constraint_ordered) { return "with ordered intervals"; } else { return NULL; } } static CharPtr distance_words[] = { NULL, "exactly", "no more than", "no less than" }; static CharPtr SummarizeEndDistance (ValNodePtr vnp, CharPtr end_name) { CharPtr str = NULL; CharPtr fmt = "with %s %s %d from end of sequence"; if (vnp == NULL || vnp->choice < 1 || vnp->choice > 3) { return NULL; } str = (CharPtr) MemNew (sizeof (Char) * (StringLen (distance_words[vnp->choice]) + StringLen (end_name) + StringLen (fmt) + 15)); sprintf (str, fmt, end_name, distance_words[vnp->choice], vnp->data.intvalue); return str; } static CharPtr SummarizeLocationConstraint (LocationConstraintPtr constraint) { CharPtr str = NULL; CharPtr strand_word = NULL, seq_word = NULL; CharPtr fmt = "only objects"; CharPtr partial; CharPtr location_type; CharPtr dist5 = NULL, dist3 = NULL; Int4 len = 0; if (IsLocationConstraintEmpty (constraint)) { return NULL; } partial = SummarizePartialnessForLocationConstraint (constraint); location_type = SummarizeLocationType(constraint); dist5 = SummarizeEndDistance (constraint->end5, "5' end"); dist3 = SummarizeEndDistance (constraint->end3, "3' end"); if (constraint->seq_type == Seqtype_constraint_nuc) { seq_word = "nucleotide sequences"; } else if (constraint->seq_type == Seqtype_constraint_prot) { seq_word = "protein sequences"; } if (constraint->strand == Strand_constraint_plus) { strand_word = " on plus strands"; } else if (constraint->strand == Strand_constraint_minus) { strand_word = " on minus strands"; } len = StringLen (fmt) + 1; if (strand_word != NULL) { len += StringLen (strand_word); } if (seq_word != NULL) { len += StringLen (seq_word) + 4; } if (partial != NULL) { len += StringLen (partial) + 2; } if (location_type != NULL) { len += StringLen (location_type) + 2; } if (dist5 != NULL) { len += StringLen (dist5) + 1; } if (dist3 != NULL) { len += StringLen (dist3) + 1; } str = (CharPtr) MemNew (sizeof (Char) * len); sprintf (str, "%s", fmt); if (strand_word == NULL && seq_word != NULL) { StringCat (str, " on "); StringCat (str, seq_word); } else if (strand_word != NULL) { StringCat (str, strand_word); if (seq_word != NULL) { StringCat (str, " of "); StringCat (str, seq_word); } } if (partial != NULL) { StringCat (str, " "); StringCat (str, partial); } if (location_type != NULL) { StringCat (str, " "); StringCat (str, location_type); } if (dist5 != NULL) { StringCat (str, " "); StringCat (str, dist5); dist5 = MemFree (dist5); } if (dist3 != NULL) { StringCat (str, " "); StringCat (str, dist3); dist3 = MemFree (dist3); } return str; } static CharPtr SummarizeSourceConstraint (SourceConstraintPtr constraint) { CharPtr string, intro = NULL, field1, field2; CharPtr match_fmt = "%s %s matches %s"; CharPtr present_fmt = "%s %s is present"; CharPtr text_fmt = "%s text %s"; CharPtr two_match_fmt = "%s %s matches %s and %s %s"; CharPtr one_match_fmt = "%s %s %s"; CharPtr summ = NULL; if (constraint == NULL) return NULL; string = SummarizeStringConstraint (constraint->constraint); field1 = SummarizeSourceQual (constraint->field1); field2 = SummarizeSourceQual (constraint->field2); if (constraint->field1 == NULL && constraint->field2 == NULL && string == NULL) { if (constraint->type_constraint == Object_type_constraint_feature) { summ = StringSave ("where source is a feature"); } else if (constraint->type_constraint == Object_type_constraint_descriptor) { summ = StringSave ("where source is a descriptor"); } } else { if (constraint->type_constraint == Object_type_constraint_any) { intro = "where source"; } else if (constraint->type_constraint == Object_type_constraint_feature) { intro = "where source feature"; } else if (constraint->type_constraint == Object_type_constraint_descriptor) { intro = "where source descriptor"; } else { string = MemFree (string); field1 = MemFree (field1); field2 = MemFree (field2); return NULL; } if (string == NULL) { if (field1 == NULL && field2 == NULL) { if (constraint->type_constraint == Object_type_constraint_feature) { summ = StringSave ("where source is a feature"); } else if (constraint->type_constraint == Object_type_constraint_descriptor) { summ = StringSave ("where source is a descriptor"); } } else if (field1 != NULL && field2 != NULL) { summ = (CharPtr) MemNew (sizeof (Char) * (StringLen (match_fmt) + StringLen (intro) + StringLen (field1) + StringLen (field2))); sprintf (summ, match_fmt, intro, field1, field2); } else if (field1 != NULL) { summ = (CharPtr) MemNew (sizeof (Char) * (StringLen (present_fmt) + StringLen (intro) + StringLen (field1))); sprintf (summ, present_fmt, intro, field1); } else if (field2 != NULL) { summ = (CharPtr) MemNew (sizeof (Char) * (StringLen (present_fmt) + StringLen (intro) + StringLen (field2))); sprintf (summ, present_fmt, intro, field2); } } else { if (field1 == NULL && field2 == NULL) { summ = (CharPtr) MemNew (sizeof (Char) * (StringLen (text_fmt) + StringLen (intro) + StringLen (string))); sprintf (summ, text_fmt, intro, string); } else if (field1 != NULL && field2 != NULL) { summ = (CharPtr) MemNew (sizeof (Char) * (StringLen (two_match_fmt) + StringLen (intro) + 2 * StringLen (field1) + StringLen (field2) + StringLen (string))); sprintf (summ, two_match_fmt, intro, field1, field2, field1, string); } else if (field1 != NULL) { summ = (CharPtr) MemNew (sizeof (Char) * (StringLen (one_match_fmt) + StringLen (intro) + StringLen (field1) + StringLen (string))); sprintf (summ, one_match_fmt, intro, field1, string); } else if (field2 != NULL) { summ = (CharPtr) MemNew (sizeof (Char) * (StringLen (one_match_fmt) + StringLen (intro) + StringLen (field2) + StringLen (string))); sprintf (summ, one_match_fmt, intro, field2, string); } } } string = MemFree (string); field1 = MemFree (field1); field2 = MemFree (field2); return summ; } static CharPtr SummarizeCDSGeneProtPseudoConstraint (CDSGeneProtPseudoConstraintPtr constraint) { CharPtr summ = NULL, pseudo_feat; CharPtr is_pseudo_fmt = "where %s is pseudo"; CharPtr not_pseudo_fmt = "where %s is not pseudo"; if (constraint != NULL) { pseudo_feat = CDSGeneProtFeatureNameFromFeatureType (constraint->feature); if (pseudo_feat != NULL) { if (constraint->is_pseudo) { summ = (CharPtr) MemNew (sizeof (Char) * (StringLen (is_pseudo_fmt) + StringLen (pseudo_feat))); sprintf (summ, is_pseudo_fmt, pseudo_feat); } else { summ = (CharPtr) MemNew (sizeof (Char) * (StringLen (not_pseudo_fmt) + StringLen (pseudo_feat))); sprintf (summ, not_pseudo_fmt, pseudo_feat); } } } return summ; } static CharPtr SummarizeCDSGeneProtQualConstraint (CDSGeneProtQualConstraintPtr constraint) { CharPtr string, field1 = NULL, field2 = NULL; CharPtr match_fmt = "where %s matches %s"; CharPtr present_fmt = "where %s is present"; CharPtr text_fmt = "where CDS-gene-prot text %s"; CharPtr two_match_fmt = "where %s matches %s and %s %s"; CharPtr one_match_fmt = "where %s %s"; CharPtr summ = NULL; if (constraint == NULL) return NULL; string = SummarizeStringConstraint (constraint->constraint); if (constraint->field1 != NULL && constraint->field1->choice == CDSGeneProtConstraintField_field) { field1 = CDSGeneProtNameFromField (constraint->field1->data.intvalue); } if (constraint->field2 != NULL && constraint->field2->choice == CDSGeneProtConstraintField_field) { field2 = CDSGeneProtNameFromField (constraint->field2->data.intvalue); } if (string == NULL) { if (field1 != NULL && field2 != NULL) { summ = (CharPtr) MemNew (sizeof (Char) * (StringLen (match_fmt) + StringLen (field1) + StringLen (field2))); sprintf (summ, match_fmt, field1, field2); } else if (field1 != NULL) { summ = (CharPtr) MemNew (sizeof (Char) * (StringLen (present_fmt) + StringLen (field1))); sprintf (summ, present_fmt, field1); } else if (field2 != NULL) { summ = (CharPtr) MemNew (sizeof (Char) * (StringLen (present_fmt) + StringLen (field2))); sprintf (summ, present_fmt, field2); } } else { if (field1 == NULL && field2 == NULL) { summ = (CharPtr) MemNew (sizeof (Char) * (StringLen (text_fmt) + StringLen (string))); sprintf (summ, text_fmt, string); } else if (field1 != NULL && field2 != NULL) { summ = (CharPtr) MemNew (sizeof (Char) * (StringLen (two_match_fmt) + 2 * StringLen (field1) + StringLen (field2) + StringLen (string))); sprintf (summ, two_match_fmt, field1, field2, field1, string); } else if (field1 != NULL) { summ = (CharPtr) MemNew (sizeof (Char) * (StringLen (one_match_fmt) + StringLen (field1) + StringLen (string))); sprintf (summ, one_match_fmt, field1, string); } else if (field2 != NULL) { summ = (CharPtr) MemNew (sizeof (Char) * (StringLen (one_match_fmt) + StringLen (field2) + StringLen (string))); sprintf (summ, one_match_fmt, field2, string); } } string = MemFree (string); /* note - field1 and field2 aren't allocated, so we don't need to free them */ return summ; } const CharPtr s_QuantityWords [] = { "exactly", "more than", "less than" }; const Int4 k_NumQuantityWords = sizeof (s_QuantityWords) / sizeof (CharPtr); static CharPtr SummarizeFeatureQuantity (ValNodePtr v, CharPtr feature_name) { CharPtr fmt = "sequence has %s %d %s%sfeature%s"; Int4 len; CharPtr summ = NULL; if (v == NULL || v->choice < 1 || v->choice > k_NumQuantityWords) { return NULL; } len = StringLen (fmt) + StringLen (s_QuantityWords[v->choice - 1]) + 15; if (!StringHasNoText (feature_name)) { len += StringLen (feature_name); } summ = (CharPtr) MemNew (sizeof (Char) * len); if (StringHasNoText (feature_name)) { sprintf (summ, fmt, s_QuantityWords[v->choice - 1], v->data.intvalue, "", "", v->data.intvalue == 1 ? "" : "s"); } else { sprintf (summ, fmt, s_QuantityWords[v->choice - 1], v->data.intvalue, feature_name, " ", v->data.intvalue == 1 ? "" : "s"); } return summ; } static CharPtr SummarizeSequenceLength (ValNodePtr v) { CharPtr fmt = "sequence is %s %d in length"; CharPtr summ = NULL; if (v == NULL || v->choice < 1 || v->choice > k_NumQuantityWords) { return NULL; } summ = (CharPtr) MemNew (sizeof (Char) * (StringLen (fmt) + StringLen (s_QuantityWords[v->choice - 1]) + 15)); sprintf (summ, fmt, s_QuantityWords[v->choice - 1], v->data.intvalue); return summ; } static CharPtr s_SequenceConstraintStrandedness[] = { "Any", "sequence contains only minus strand features", "sequence contains only plus strand features", "sequence contains at least one minus strand feature", "sequence contains at least one plus strand feature", "sequence contains no minus strand features", "sequence contains no plus strand features" }; NLM_EXTERN CharPtr SummarizeFeatureStrandedness (Uint2 strandedness) { if (strandedness < sizeof (s_SequenceConstraintStrandedness) / sizeof (CharPtr)) { return s_SequenceConstraintStrandedness[strandedness]; } else { return NULL; } } static CharPtr SummarizeSequenceConstraint (SequenceConstraintPtr constraint) { CharPtr summ = NULL; CharPtr seq_word = NULL, featpresent = NULL, id = NULL; Int4 len = 0; CharPtr seq_word_intro = "where sequence type is "; CharPtr feat_after = " is present"; CharPtr id_intro = "sequence ID "; CharPtr feat_type_quantity = NULL; CharPtr feat_quantity = NULL; CharPtr length_quantity = NULL; CharPtr strandedness = NULL; if (IsSequenceConstraintEmpty (constraint)) { summ = StringSave ("Missing sequence constraint"); } else { if (constraint->seqtype != NULL && constraint->seqtype->choice != SequenceConstraintMolTypeConstraint_any) { switch (constraint->seqtype->choice) { case SequenceConstraintMolTypeConstraint_nucleotide: seq_word = "nucleotide"; break; case SequenceConstraintMolTypeConstraint_dna: seq_word = "DNA"; break; case SequenceConstraintMolTypeConstraint_rna: if (constraint->seqtype->data.intvalue == Sequence_constraint_rnamol_any) { seq_word = "RNA"; } else { seq_word = GetBiomolNameForRnaType (constraint->seqtype->data.intvalue); } break; case SequenceConstraintMolTypeConstraint_protein: seq_word = "protein"; break; } } if (constraint->feature != Macro_feature_type_any) { featpresent = GetFeatureNameFromFeatureType (constraint->feature); if (constraint->num_type_features != NULL) { feat_type_quantity = SummarizeFeatureQuantity (constraint->num_type_features, featpresent); featpresent = NULL; } } if (!IsStringConstraintEmpty (constraint->id)) { id = SummarizeStringConstraint (constraint->id); } if (seq_word != NULL) { len += StringLen (seq_word) + StringLen (seq_word_intro); } if (featpresent != NULL) { if (len == 0) { len += 6; } else { len += 5; } len += StringLen (featpresent); len += StringLen (feat_after); } if (feat_type_quantity != NULL) { if (len == 0) { len += 6; } else { len += 5; } len += StringLen (feat_type_quantity); } if (id != NULL) { if (len == 0) { len += 6; } else { len += 5; } len += StringLen (id_intro); len += StringLen (id); } feat_quantity = SummarizeFeatureQuantity (constraint->num_features, NULL); if (feat_quantity != NULL) { len += StringLen (feat_quantity) + 6; } length_quantity = SummarizeSequenceLength (constraint->length); if (length_quantity != NULL) { len += StringLen (length_quantity) + 6; } if (constraint->strandedness > Feature_strandedness_constraint_any) { strandedness = SummarizeFeatureStrandedness(constraint->strandedness); len += StringLen (strandedness) + 6; } if (len == 0) { summ = StringSave ("missing sequence constraint"); } else { len++; summ = (CharPtr) MemNew (sizeof (Char) * len); summ[0] = 0; if (seq_word != NULL) { StringCat (summ, seq_word_intro); StringCat (summ, seq_word); } if (featpresent != NULL) { if (seq_word == NULL) { StringCat (summ, "where "); } else { StringCat (summ, " and "); } StringCat (summ, featpresent); StringCat (summ, feat_after); } if (feat_type_quantity != NULL) { if (summ[0] == 0) { StringCat (summ, "where "); } else { StringCat (summ, " and "); } StringCat (summ, feat_type_quantity); } if (id != NULL) { if (seq_word == NULL && featpresent == NULL) { StringCat (summ, "where "); } else { StringCat (summ, " and "); } StringCat (summ, id_intro); StringCat (summ, id); } if (feat_quantity != NULL) { if (StringHasNoText (summ)) { StringCat (summ, "where "); } else { StringCat (summ, " and "); } StringCat (summ, feat_quantity); } if (length_quantity != NULL) { if (StringHasNoText (summ)) { StringCat (summ, "where "); } else { StringCat (summ, " and "); } StringCat (summ, length_quantity); } if (strandedness != NULL) { if (StringHasNoText (summ)) { StringCat (summ, "where "); } else { StringCat (summ, " and "); } StringCat (summ, strandedness); } } id = MemFree (id); feat_type_quantity = MemFree (feat_type_quantity); feat_quantity = MemFree (feat_quantity); length_quantity = MemFree (length_quantity); } return summ; } const CharPtr s_SpecialPubFieldWords [] = { "is present", "is not present", "is all caps", "is all lowercase", "is all punctuation" }; const Int4 k_NumSpecialPubFieldWords = sizeof (s_SpecialPubFieldWords) / sizeof (CharPtr); static CharPtr SummarizePubFieldSpecialConstraint (PubFieldSpecialConstraintPtr field) { CharPtr fmt = "%s %s"; CharPtr label, summ = NULL; if (field == NULL || field->constraint == NULL || field->constraint->choice < 1 || field->constraint->choice > k_NumSpecialPubFieldWords) { return NULL; } label = GetPubFieldLabel (field->field); summ = (CharPtr) MemNew (sizeof (Char) * (StringLen (fmt) + StringLen (label) + StringLen (s_SpecialPubFieldWords[field->constraint->choice - 1]))); sprintf (summ, fmt, label, s_SpecialPubFieldWords[field->constraint->choice - 1]); return summ; } static CharPtr SummarizePubFieldConstraint (PubFieldConstraintPtr field) { CharPtr fmt = "%s %s", summ = NULL; CharPtr string, label; if (field == NULL || field->constraint == NULL) { return NULL; } string = SummarizeStringConstraint (field->constraint); label = GetPubFieldLabel (field->field); summ = (CharPtr) MemNew (sizeof (Char) * (StringLen (fmt) + StringLen (label) + StringLen (string))); sprintf (summ, fmt, label, string); string = MemFree (string); return summ; } static CharPtr SummarizePublicationConstraint (PublicationConstraintPtr constraint) { CharPtr type = NULL, field = NULL, special = NULL, summ = NULL; Boolean first = TRUE; Int4 len; if (IsPublicationConstraintEmpty (constraint)) return NULL; switch (constraint->type) { case Pub_type_published: type = "pub is published"; break; case Pub_type_unpublished: type = "pub is unpublished"; break; case Pub_type_in_press: type = "pub is in press"; break; case Pub_type_submitter_block: type = "pub is submitter block"; break; } field = SummarizePubFieldConstraint (constraint->field); special = SummarizePubFieldSpecialConstraint (constraint->special_field); if (type == NULL && field == NULL && special == NULL) { return NULL; } len = 17 + StringLen (type) + StringLen (field) + StringLen (special); summ = (CharPtr) MemNew (sizeof (Char) * len); sprintf (summ, "where "); if (type != NULL) { StringCat (summ, type); first = FALSE; } if (field != NULL) { if (!first) { StringCat (summ, " and "); } StringCat (summ, field); first = FALSE; } if (special != NULL) { if (!first) { StringCat (summ, " and "); } StringCat (summ, special); first = FALSE; } field = MemFree (field); special = MemFree (special); return summ; } static CharPtr SummarizeFieldConstraint (FieldConstraintPtr constraint) { CharPtr rval = NULL; CharPtr string = NULL, label = NULL; CharPtr fmt = "where %s %s"; if (IsFieldConstraintEmpty (constraint)) return NULL; string = SummarizeStringConstraint (constraint->string_constraint); label = SummarizeFieldType (constraint->field); if (string != NULL && label != NULL) { rval = (CharPtr) MemNew (sizeof (Char) * (StringLen (fmt) + StringLen (label) + StringLen (string))); sprintf (rval, fmt, label, string); } string = MemFree (string); label = MemFree (label); return rval; } static CharPtr SummarizeMissingFieldConstraint (FieldTypePtr field) { CharPtr rval = NULL; CharPtr label = NULL; CharPtr fmt = "where %s is missing"; if (field == NULL) return NULL; label = SummarizeFieldType (field); if (label != NULL) { rval = (CharPtr) MemNew (sizeof (Char) * (StringLen (fmt) + StringLen (label))); sprintf (rval, fmt, label); } label = MemFree (label); return rval; } static CharPtr SummarizeMolinfoFieldConstraint (MolinfoFieldConstraintPtr constraint) { CharPtr label, cp; CharPtr fmt = "where %s is%s %s"; CharPtr rval = NULL; Int4 len, offset; if (IsMolinfoFieldConstraintEmpty(constraint)) { return NULL; } label = GetSequenceQualName (constraint->field); if (label == NULL) { return NULL; } cp = StringChr (label, ' '); if (cp == NULL) { return NULL; } offset = cp - label; len = StringLen (fmt) + StringLen (label); if (constraint->is_not) { len += 4; } rval = (CharPtr) MemNew (sizeof (Char) * len); sprintf (rval, "where %s", label); StringCpy (rval + 7 + offset, constraint->is_not ? "is not " : "is "); StringCat (rval, cp + 1); return rval; } NLM_EXTERN Boolean IsTranslationConstraintEmpty (TranslationConstraintPtr constraint) { if (constraint == NULL) { return TRUE; } else if (constraint->num_mismatches != NULL) { return FALSE; } else if (constraint->internal_stops != Match_type_constraint_dont_care) { return FALSE; } else if (!IsStringConstraintEmpty (constraint->actual_strings)) { return FALSE; } else if (!IsStringConstraintEmpty (constraint->transl_strings)) { return FALSE; } else { return TRUE; } } static CharPtr SummarizeTranslationMismatches (ValNodePtr v) { CharPtr fmt = "there are %s %d mismatches between the actual and translated protein sequences"; CharPtr summ = NULL; if (v == NULL || v->choice < 1 || v->choice > k_NumQuantityWords) { return NULL; } summ = (CharPtr) MemNew (sizeof (Char) * (StringLen (fmt) + StringLen (s_QuantityWords[v->choice - 1]) + 15)); sprintf (summ, fmt, s_QuantityWords[v->choice - 1], v->data.intvalue); return summ; } static CharPtr SummarizeTranslationConstraint (TranslationConstraintPtr constraint) { CharPtr rval = NULL; CharPtr mismatch = NULL; CharPtr tmp; CharPtr where_actual_sequence = "where actual sequence "; CharPtr where_transl_sequence = "where translated sequence "; CharPtr has_internal_stops = "sequence has internal stops"; CharPtr no_internal_stops = "sequence has no internal stops"; Int4 len = 0; StringConstraintPtr scp; ValNodePtr actual_phrases = NULL, transl_phrases = NULL, vnp; Int4 num_phrases = 0, phrase_num = 1; if (IsTranslationConstraintEmpty(constraint)) { return NULL; } if (constraint->actual_strings != NULL) { len += StringLen (where_actual_sequence); for (scp = constraint->actual_strings; scp != NULL; scp = scp->next) { tmp = SummarizeStringConstraint (scp); if (tmp != NULL) { len += StringLen (tmp) + 2; ValNodeAddPointer (&actual_phrases, 0, tmp); } } len += 5; num_phrases ++; } if (constraint->transl_strings != NULL) { len += StringLen (where_transl_sequence); for (scp = constraint->transl_strings; scp != NULL; scp = scp->next) { tmp = SummarizeStringConstraint (scp); if (tmp != NULL) { len += StringLen (tmp) + 2; ValNodeAddPointer (&transl_phrases, 0, tmp); } } len += 5; num_phrases ++; } if (constraint->num_mismatches != NULL) { mismatch = SummarizeTranslationMismatches(constraint->num_mismatches); len += StringLen (mismatch) + 5; num_phrases ++; } if (constraint->internal_stops == Match_type_constraint_yes) { len += StringLen (has_internal_stops) + 5; num_phrases ++; } else if (constraint->internal_stops == Match_type_constraint_no) { len += StringLen (no_internal_stops) + 5; num_phrases ++; } rval = (CharPtr) MemNew (sizeof (Char) * len); rval[0] = 0; if (actual_phrases != NULL) { StringCat (rval, where_actual_sequence); for (vnp = actual_phrases; vnp != NULL; vnp = vnp->next) { StringCat (rval, vnp->data.ptrvalue); if (vnp->next != NULL) { StringCat (rval, ", "); } } actual_phrases = ValNodeFreeData (actual_phrases); phrase_num++; } if (transl_phrases != NULL) { if (phrase_num > 1) { if (num_phrases > 2) { StringCat (rval, ", "); } if (phrase_num == num_phrases) { StringCat (rval, " and "); } } StringCat (rval, where_transl_sequence); for (vnp = transl_phrases; vnp != NULL; vnp = vnp->next) { StringCat (rval, vnp->data.ptrvalue); if (vnp->next != NULL) { StringCat (rval, ", "); } } transl_phrases = ValNodeFreeData (transl_phrases); phrase_num++; } if (mismatch != NULL) { if (phrase_num > 1) { if (num_phrases > 2) { StringCat (rval, ", "); } if (phrase_num == num_phrases) { StringCat (rval, " and "); } } StringCat (rval, mismatch); mismatch = MemFree (mismatch); phrase_num++; } if (constraint->internal_stops == Match_type_constraint_yes) { if (phrase_num > 1) { if (num_phrases > 2) { StringCat (rval, ", "); } if (phrase_num == num_phrases) { StringCat (rval, " and "); } } StringCat (rval, has_internal_stops); phrase_num++; } else if (constraint->internal_stops == Match_type_constraint_yes) { len += StringLen (no_internal_stops) + 5; if (phrase_num > 1) { if (num_phrases > 2) { StringCat (rval, ", "); } if (phrase_num == num_phrases) { StringCat (rval, " and "); } } StringCat (rval, no_internal_stops); phrase_num++; } return rval; } NLM_EXTERN CharPtr SummarizeConstraint (ValNodePtr constraint) { CharPtr phrase = NULL, tmp; CharPtr fmt = "where object text %s"; if (constraint == NULL) return NULL; switch (constraint->choice) { case ConstraintChoice_string: tmp = SummarizeStringConstraint (constraint->data.ptrvalue); if (tmp != NULL) { phrase = (CharPtr) MemNew (sizeof (Char) * (StringLen (tmp) + StringLen (fmt))); sprintf (phrase, fmt, tmp); tmp = MemFree (tmp); } break; case ConstraintChoice_location: phrase = SummarizeLocationConstraint (constraint->data.ptrvalue); break; case ConstraintChoice_source: phrase = SummarizeSourceConstraint (constraint->data.ptrvalue); break; case ConstraintChoice_cdsgeneprot_qual: phrase = SummarizeCDSGeneProtQualConstraint (constraint->data.ptrvalue); break; case ConstraintChoice_cdsgeneprot_pseudo: phrase = SummarizeCDSGeneProtPseudoConstraint (constraint->data.ptrvalue); break; case ConstraintChoice_sequence: phrase = SummarizeSequenceConstraint (constraint->data.ptrvalue); break; case ConstraintChoice_pub: phrase = SummarizePublicationConstraint (constraint->data.ptrvalue); break; case ConstraintChoice_field: phrase = SummarizeFieldConstraint (constraint->data.ptrvalue); break; case ConstraintChoice_molinfo: phrase = SummarizeMolinfoFieldConstraint (constraint->data.ptrvalue); break; case ConstraintChoice_field_missing: phrase = SummarizeMissingFieldConstraint (constraint->data.ptrvalue); break; case ConstraintChoice_translation: phrase = SummarizeTranslationConstraint (constraint->data.ptrvalue); break; } return phrase; } NLM_EXTERN CharPtr SummarizeConstraintSet (ValNodePtr constraint_set) { ValNodePtr phrases = NULL, vnp; Int4 len = 0; CharPtr phrase, str = NULL; while (constraint_set != NULL) { phrase = SummarizeConstraint (constraint_set); if (phrase != NULL) { ValNodeAddPointer (&phrases, 0, phrase); if (len > 0) { len += 5; /* for " and " */ } else { len += 1; /* for terminal NULL */ } len += StringLen (phrase); } constraint_set = constraint_set->next; } if (len > 0) { str = (CharPtr) MemNew (sizeof (Char) * len); for (vnp = phrases; vnp != NULL; vnp = vnp->next) { StringCat (str, vnp->data.ptrvalue); if (vnp->next != NULL) { StringCat (str, " and "); } } } return str; } /* for table readers that use the macro language functions */ /* MatchType is used to represent how the column should be matched. */ NLM_EXTERN MatchTypePtr MatchTypeNew () { MatchTypePtr match_type = MemNew (sizeof (MatchTypeData)); match_type->data = NULL; match_type->match_location = String_location_equals; match_type->choice = eTableMatchNucID; return match_type; } NLM_EXTERN MatchTypePtr MatchTypeFree (MatchTypePtr match_type) { if (match_type != NULL) { if (match_type->choice == eTableMatchSourceQual) { match_type->data = SourceQualChoiceFree (match_type->data); } match_type = MemFree (match_type); } return match_type; } static MatchTypePtr MatchTypeCopy (MatchTypePtr orig) { MatchTypePtr match_type = NULL; if (orig != NULL) { match_type = MatchTypeNew(); match_type->choice = orig->choice; match_type->match_location = orig->match_location; if (match_type->choice == eTableMatchSourceQual) { match_type->data = AsnIoMemCopy (orig->data, (AsnReadFunc) SourceQualChoiceAsnRead, (AsnWriteFunc) SourceQualChoiceAsnWrite); } } return match_type; } static MatchTypePtr FindMatchTypeInHeader (ValNodePtr columns) { ValNodePtr col_vnp; MatchTypePtr match_type = NULL; TabColumnConfigPtr t; for (col_vnp = columns; col_vnp != NULL && match_type == NULL; col_vnp = col_vnp->next) { t = (TabColumnConfigPtr) col_vnp->data.ptrvalue; if (t != NULL && t->match_type != NULL) { match_type = MatchTypeCopy (t->match_type); } } return match_type; } NLM_EXTERN TabColumnConfigPtr TabColumnConfigNew (void) { TabColumnConfigPtr t; t = (TabColumnConfigPtr) MemNew (sizeof (TabColumnConfigData)); t->match_type = NULL; t->field = NULL; t->existing_text = ExistingTextOption_replace_old; t->constraint = NULL; t->skip_blank = TRUE; return t; } NLM_EXTERN TabColumnConfigPtr TabColumnConfigFree (TabColumnConfigPtr t) { if (t != NULL) { t->field = FieldTypeFree (t->field); t->match_type = MatchTypeFree (t->match_type); t->constraint = ConstraintChoiceSetFree (t->constraint); t = MemFree (t); } return t; } NLM_EXTERN TabColumnConfigPtr TabColumnConfigCopy (TabColumnConfigPtr orig) { TabColumnConfigPtr t = NULL; if (orig != NULL) { t = TabColumnConfigNew (); t->match_type = MatchTypeCopy (orig->match_type); t->existing_text = orig->existing_text; t->skip_blank = orig->skip_blank; t->match_mrna = orig->match_mrna; t->field = FieldTypeCopy (orig->field); t->constraint = AsnIoMemCopy (orig->constraint, (AsnReadFunc) ConstraintChoiceSetAsnRead, (AsnWriteFunc) ConstraintChoiceSetAsnWrite); } return t; } NLM_EXTERN void TabColumnConfigReset (TabColumnConfigPtr t) { if (t != NULL) { t->match_type = MatchTypeFree (t->match_type); t->field = FieldTypeFree (t->field); t->constraint = ConstraintChoiceSetFree (t->constraint); t->existing_text = ExistingTextOption_replace_old; t->skip_blank = TRUE; t->match_mrna = FALSE; } } NLM_EXTERN ValNodePtr TabColumnConfigListFree (ValNodePtr columns) { ValNodePtr vnp_next; while (columns != NULL) { vnp_next = columns->next; columns->data.ptrvalue = TabColumnConfigFree (columns->data.ptrvalue); columns->next = NULL; columns = ValNodeFree (columns); columns = vnp_next; } return columns; } NLM_EXTERN ValNodePtr TabColumnConfigListCopy (ValNodePtr orig) { ValNodePtr new_list = NULL; TabColumnConfigPtr t; while (orig != NULL) { t = TabColumnConfigCopy (orig->data.ptrvalue); ValNodeAddPointer (&new_list, 0, t); orig = orig->next; } return new_list; } NLM_EXTERN MatchTypePtr MatchTypeFromTableMatchType (TableMatchPtr t) { MatchTypePtr m; if (t == NULL) { return NULL; } m = MatchTypeNew (); m->match_location = t->match_location; if (t->match_type != NULL) { switch (t->match_type->choice) { case TableMatchType_feature_id: m->choice = eTableMatchFeatureID; break; case TableMatchType_gene_locus_tag: m->choice = eTableMatchGeneLocusTag; break; case TableMatchType_protein_id: m->choice = eTableMatchProteinID; break; case TableMatchType_dbxref: m->choice = eTableMatchDbxref; break; case TableMatchType_nuc_id: m->choice = eTableMatchNucID; break; case TableMatchType_src_qual: m->choice = eTableMatchSourceQual; m->data = AsnIoMemCopy (t->match_type->data.ptrvalue, (AsnReadFunc)SourceQualChoiceAsnRead, (AsnWriteFunc)SourceQualChoiceAsnWrite); break; case TableMatchType_protein_name: m->choice = eTableMatchProteinName; break; case TableMatchType_bioproject: m->choice = eTableMatchBioProject; break; case TableMatchType_any: m->choice = eTableMatchAny; break; } } return m; } NLM_EXTERN TableMatchPtr TableMatchTypeFromMatchType (MatchTypePtr m) { TableMatchPtr t; ValNodePtr s; if (m == NULL) { return NULL; } t = TableMatchNew (); t->match_location = m->match_location; t->match_type = ValNodeNew (NULL); switch (m->choice) { case eTableMatchFeatureID: t->match_type->choice = TableMatchType_feature_id; break; case eTableMatchGeneLocusTag: t->match_type->choice = TableMatchType_gene_locus_tag; break; case eTableMatchProteinID: t->match_type->choice = TableMatchType_protein_id; break; case eTableMatchDbxref: t->match_type->choice = TableMatchType_dbxref; break; case eTableMatchNucID: t->match_type->choice = TableMatchType_nuc_id; break; case eTableMatchSourceQual: t->match_type->choice = TableMatchType_src_qual; t->match_type->data.ptrvalue = AsnIoMemCopy (m->data, (AsnReadFunc)SourceQualChoiceAsnRead, (AsnWriteFunc)SourceQualChoiceAsnWrite); break; case eTableMatchBioSource: t->match_type->choice = TableMatchType_src_qual; s = ValNodeNew (NULL); s->choice = SourceQualChoice_textqual; s->data.intvalue = Source_qual_taxname; t->match_type->data.ptrvalue = s; break; case eTableMatchProteinName: t->match_type->choice = TableMatchType_protein_name; break; case eTableMatchBioProject: t->match_type->choice = TableMatchType_bioproject; break; case eTableMatchAny: t->match_type->choice = TableMatchType_any; break; } return t; } NLM_EXTERN ValNodePtr CheckForDuplicateColumns (ValNodePtr columns) { ValNodePtr vnp, vnp2; TabColumnConfigPtr t, t2; ValNodePtr duplicate_column_list = NULL; Boolean this_dup; for (vnp = columns; vnp != NULL; vnp = vnp->next) { this_dup = FALSE; t = (TabColumnConfigPtr) vnp->data.ptrvalue; if (t != NULL && t->field != NULL) { for (vnp2 = vnp->next; vnp2 != NULL && !this_dup; vnp2 = vnp2->next) { t2 = (TabColumnConfigPtr) vnp2->data.ptrvalue; if (t2 != NULL && CompareFieldTypes(t->field, t2->field) == 0 && t2->existing_text != ExistingTextOption_add_qual) { ValNodeAddPointer (&duplicate_column_list, 0, SummarizeFieldType (t->field)); this_dup = TRUE; } } } } duplicate_column_list = ValNodeSort (duplicate_column_list, SortVnpByString); ValNodeUnique (&duplicate_column_list, SortVnpByString, ValNodeFreeData); return duplicate_column_list; } NLM_EXTERN void FixDuplicateColumns (ValNodePtr columns) { ValNodePtr vnp, vnp2; TabColumnConfigPtr t, t2; if (columns == NULL || columns->next == NULL) { return; } for (vnp = columns; vnp != NULL; vnp = vnp->next) { t = (TabColumnConfigPtr) vnp->data.ptrvalue; if (t->field != NULL) { for (vnp2 = vnp->next; vnp2 != NULL; vnp2 = vnp2->next) { t2 = (TabColumnConfigPtr) vnp2->data.ptrvalue; if (CompareFieldTypes(t->field, t2->field) == 0 && t2->existing_text != ExistingTextOption_add_qual) { t2->existing_text = ExistingTextOption_add_qual; } } } } } /* This checks the column names and returns a list of the feature fields */ NLM_EXTERN ValNodePtr ValidateFeatureFieldColumnNames (ValNodePtr header_line, ValNodePtr PNTR perr_list) { ValNodePtr header_vnp; ValNodePtr err_list = NULL, col_list = NULL; Boolean rval = TRUE; TabColumnConfigPtr t; FeatureFieldPtr field; Int4 featqual, feat_type; CharPtr first_space; if (header_line == NULL) { return FALSE; } header_vnp = header_line->data.ptrvalue; if (header_vnp == NULL || header_vnp->next == NULL) { return FALSE; } /* skip ID column */ header_vnp = header_vnp->next; while (header_vnp != NULL && rval) { first_space = StringChr (header_vnp->data.ptrvalue, ' '); if (first_space != NULL) { *first_space = 0; feat_type = GetFeatureTypeByName (header_vnp->data.ptrvalue); featqual = GetFeatQualByName (first_space + 1); *first_space = ' '; if (feat_type < 0 || featqual < 0) { /* unable to recognize column name */ ValNodeAddPointer (&err_list, 0, StringSave (header_vnp->data.ptrvalue)); /* if we're not able to send back a list of errors, just quit now */ if (perr_list == NULL) { rval = FALSE; } } else if (err_list == NULL) { /* if we've already found errors, don't bother collecting more fields */ field = FeatureFieldNew (); field->type = feat_type; field->field = ValNodeNew (NULL); field->field->choice = FeatQualChoice_legal_qual; field->field->data.intvalue = featqual; t = TabColumnConfigNew (); t->field = ValNodeNew (NULL); t->field->choice = FieldType_feature_field; t->field->data.ptrvalue = field; ValNodeAddPointer (&col_list, 0, t); } } else { featqual = GetFeatQualByName (header_vnp->data.ptrvalue); if (featqual < 0) { /* unable to recognize column name */ ValNodeAddPointer (&err_list, 0, StringSave (header_vnp->data.ptrvalue)); /* if we're not able to send back a list of errors, just quit now */ if (perr_list == NULL) { rval = FALSE; } } else if (err_list == NULL) { /* if we've already found errors, don't bother collecting more fields */ field = FeatureFieldNew (); field->type = Macro_feature_type_any; field->field = ValNodeNew (NULL); field->field->choice = FeatQualChoice_legal_qual; field->field->data.intvalue = featqual; t = TabColumnConfigNew (); t->field = ValNodeNew (NULL); t->field->choice = FieldType_feature_field; t->field->data.ptrvalue = field; ValNodeAddPointer (&col_list, 0, t); } } header_vnp = header_vnp->next; } if (err_list != NULL) { col_list = TabColumnConfigListFree (col_list); if (perr_list != NULL) { *perr_list = err_list; } else { err_list = ValNodeFreeData (err_list); } } return col_list; } typedef struct findgenelocustag { CharPtr locus_tag; ValNodePtr gene_list; } FindGeneLocusTagData, PNTR FindGeneLocusTagPtr; static void FindGeneByLocusTagBioseqCallback (BioseqPtr bsp, Pointer userdata) { FindGeneLocusTagPtr p; SeqFeatPtr gene; SeqMgrFeatContext fcontext; if (bsp == NULL || userdata == NULL || !ISA_na (bsp->mol)) { return; } p = (FindGeneLocusTagPtr) userdata; gene = SeqMgrGetGeneByLocusTag (bsp, p->locus_tag, &fcontext); if (gene != NULL) { ValNodeAddPointer (&p->gene_list, OBJ_SEQFEAT, gene); } } typedef struct objbymatch { ValNodePtr obj_list; StringConstraintPtr scp; } ObjByMatchData, PNTR ObjByMatchPtr; static void GetFeaturesByDbxrefCallback (SeqFeatPtr sfp, Pointer userdata) { ObjByMatchPtr p; ValNodePtr vnp; DbtagPtr dbt; Char buf[20]; Boolean found = FALSE; if (sfp == NULL || sfp->dbxref == NULL || userdata == NULL) return; p = (ObjByMatchPtr) userdata; if (IsStringConstraintEmpty (p->scp)) return; for (vnp = sfp->dbxref; vnp != NULL && !found; vnp = vnp->next) { dbt = (DbtagPtr) vnp->data.ptrvalue; if (dbt != NULL && dbt->tag != NULL) { if (dbt->tag->id > 0) { sprintf (buf, "%d", dbt->tag->id); if (DoesStringMatchConstraint (buf, p->scp)) { found = TRUE; } } else if (DoesStringMatchConstraint (dbt->tag->str, p->scp)) { found = TRUE; } } } if (found) { ValNodeAddPointer (&(p->obj_list), OBJ_SEQFEAT, sfp); } } static ValNodePtr GetFeaturesByDbxref (SeqEntryPtr sep, CharPtr dbxref, Uint1 match_location) { ObjByMatchData d; d.scp = StringConstraintNew (); d.scp->match_text = StringSave (dbxref); d.scp->match_location = match_location; d.obj_list = NULL; VisitFeaturesInSep (sep, &d, GetFeaturesByDbxrefCallback); d.scp = StringConstraintFree (d.scp); return d.obj_list; } static void GetBioSourcesByTaxNameDescriptorCallback (SeqDescrPtr sdp, Pointer userdata) { ObjByMatchPtr p; BioSourcePtr biop; if (sdp == NULL || sdp->choice != Seq_descr_source || userdata == NULL) return; p = (ObjByMatchPtr) userdata; if (IsStringConstraintEmpty (p->scp)) return; biop = (BioSourcePtr) sdp->data.ptrvalue; if (biop != NULL && biop->org != NULL && DoesStringMatchConstraint (biop->org->taxname, p->scp)) { ValNodeAddPointer (&(p->obj_list), OBJ_SEQDESC, sdp); } } static void GetBioSourcesByTaxNameFeatureCallback (SeqFeatPtr sfp, Pointer userdata) { ObjByMatchPtr p; BioSourcePtr biop; if (sfp == NULL || sfp->data.choice != SEQFEAT_BIOSRC || userdata == NULL) return; p = (ObjByMatchPtr) userdata; if (IsStringConstraintEmpty (p->scp)) return; biop = (BioSourcePtr) sfp->data.value.ptrvalue; if (biop != NULL && biop->org != NULL && DoesStringMatchConstraint (biop->org->taxname, p->scp)) { ValNodeAddPointer (&(p->obj_list), OBJ_SEQFEAT, sfp); } } static ValNodePtr GetBioSourcesByTaxName (SeqEntryPtr sep, CharPtr taxname, Uint1 match_location) { ObjByMatchData d; d.scp = StringConstraintNew (); d.scp->match_text = StringSave (taxname); d.scp->match_location = match_location; d.obj_list = NULL; VisitDescriptorsInSep (sep, &d, GetBioSourcesByTaxNameDescriptorCallback); VisitFeaturesInSep (sep, &d, GetBioSourcesByTaxNameFeatureCallback); d.scp = StringConstraintFree (d.scp); return d.obj_list; } typedef struct objbystrinfld { ValNodePtr obj_list; FieldTypePtr field; StringConstraintPtr scp; } ObjByStrInFldData, PNTR ObjByStrInFldPtr; static void GetBioSourcesBySourceQualDescriptorCallback (SeqDescrPtr sdp, Pointer userdata) { ObjByStrInFldPtr p; CharPtr tmp; if (sdp == NULL || sdp->choice != Seq_descr_source || userdata == NULL) return; p = (ObjByStrInFldPtr) userdata; if (IsStringConstraintEmpty (p->scp)) return; tmp = GetFieldValueForObject (OBJ_SEQDESC, sdp, p->field, p->scp); if (tmp != NULL) { ValNodeAddPointer (&(p->obj_list), OBJ_SEQDESC, sdp); } tmp = MemFree (tmp); } static void GetBioSourcesBySourceQualFeatureCallback (SeqFeatPtr sfp, Pointer userdata) { ObjByStrInFldPtr p; CharPtr tmp; if (sfp == NULL || sfp->data.choice != SEQFEAT_BIOSRC || userdata == NULL) return; p = (ObjByStrInFldPtr) userdata; if (IsStringConstraintEmpty (p->scp)) return; tmp = GetFieldValueForObject (OBJ_SEQFEAT, sfp, p->field, p->scp); if (tmp != NULL) { ValNodeAddPointer (&(p->obj_list), OBJ_SEQFEAT, sfp); } tmp = MemFree (tmp); } static ValNodePtr GetBioSourcesBySourceQual (SeqEntryPtr sep, SourceQualChoicePtr q, CharPtr val, Uint1 match_location) { ObjByStrInFldData od; od.scp = StringConstraintNew(); od.scp->match_text = StringSave (val); od.scp->match_location = match_location; od.obj_list = NULL; od.field = ValNodeNew (NULL); od.field->choice = FieldType_source_qual; od.field->data.ptrvalue = q; VisitDescriptorsInSep (sep, &od, GetBioSourcesBySourceQualDescriptorCallback); VisitFeaturesInSep (sep, &od, GetBioSourcesBySourceQualFeatureCallback); od.field = ValNodeFree (od.field); od.scp = StringConstraintFree (od.scp); return od.obj_list; } static void GetBioseqsByIdCallback (BioseqPtr bsp, Pointer data) { ObjByMatchPtr d; ObjectIdPtr oip; SeqIdPtr sip; Boolean found_match = FALSE; DbtagPtr dbtag; CharPtr cp, tmp_id; if (bsp == NULL || data == NULL || (d = (ObjByMatchPtr) data) == NULL) { return; } found_match = DoesSeqIDListMeetStringConstraint (bsp->id, d->scp); for (sip = bsp->id; sip != NULL && !found_match; sip = sip->next) { if (sip->choice == SEQID_GENERAL && sip->data.ptrvalue != NULL) { dbtag = (DbtagPtr) sip->data.ptrvalue; if (StringCmp (dbtag->db, "NCBIFILE") == 0 && dbtag->tag != NULL) { if (DoesStringMatchConstraint (dbtag->tag->str, d->scp)) { found_match = TRUE; } else if ((cp = StringRChr (dbtag->tag->str, '/')) != NULL) { tmp_id = (CharPtr) MemNew (sizeof (Char) * (cp - dbtag->tag->str + 1)); StringNCpy (tmp_id, dbtag->tag->str, cp - dbtag->tag->str); tmp_id[cp - dbtag->tag->str] = 0; if (DoesStringMatchConstraint (tmp_id, d->scp)) { found_match = TRUE; } tmp_id = MemFree (tmp_id); } } } else if (sip->choice == SEQID_LOCAL && (oip = sip->data.ptrvalue) != NULL && StringNICmp (oip->str, "bankit", 6) == 0 && DoesStringMatchConstraint (oip->str + 6, d->scp)) { found_match = TRUE; } } if (found_match) { ValNodeAddPointer (&(d->obj_list), OBJ_BIOSEQ, bsp); } } static ValNodePtr FindBioseqsByMatchType (SeqEntryPtr sep, Uint1 match_location, CharPtr match_str) { ObjByMatchData d; if (sep == NULL || StringHasNoText (match_str)) { return NULL; } d.scp = StringConstraintNew (); d.scp->match_text = StringSave (match_str); d.scp->match_location = match_location; d.obj_list = NULL; VisitBioseqsInSep (sep, &d, GetBioseqsByIdCallback); d.scp = StringConstraintFree (d.scp); return d.obj_list; } typedef struct bioseqsearchitem { BioseqPtr bsp; CharPtr str; Int4 num; Boolean free_str; } BioseqSearchItemData, PNTR BioseqSearchItemPtr; static BioseqSearchItemPtr BioseqSearchItemNewStr (BioseqPtr bsp, CharPtr str, Boolean need_free) { BioseqSearchItemPtr bsi; bsi = (BioseqSearchItemPtr) MemNew (sizeof (BioseqSearchItemData)); bsi->bsp = bsp; bsi->str = str; bsi->free_str = need_free; if (StringIsAllDigits (bsi->str)) { bsi->num = atoi (bsi->str); } return bsi; } static BioseqSearchItemPtr BioseqSearchItemNewInt (BioseqPtr bsp, Int4 num) { BioseqSearchItemPtr bsi; bsi = (BioseqSearchItemPtr) MemNew (sizeof (BioseqSearchItemData)); bsi->bsp = bsp; bsi->num = num; bsi->free_str = FALSE; return bsi; } static BioseqSearchItemPtr BioseqSearchItemFree (BioseqSearchItemPtr bsi) { if (bsi != NULL) { if (bsi->free_str) { bsi->str = MemFree (bsi->str); } bsi = MemFree (bsi); } return bsi; } static ValNodePtr BioseqSearchItemListFree (ValNodePtr vnp) { ValNodePtr vnp_next; while (vnp != NULL) { vnp_next = vnp->next; vnp->next = NULL; vnp->data.ptrvalue = BioseqSearchItemFree (vnp->data.ptrvalue); vnp = ValNodeFree (vnp); vnp = vnp_next; } return vnp; } static int CompareBioseqSearchItem (BioseqSearchItemPtr b1, BioseqSearchItemPtr b2) { if (b1 == NULL && b2 == NULL) { return 0; } else if (b1 == NULL) { return 1; } else if (b2 == NULL) { return -1; } else if (b1->num > 0 && b2->num > 0) { if (b1->num < b2->num) { return -1; } else if (b1->num == b2->num) { return 0; } else { return 1; } } else if (b1->num > 0) { return 1; } else if (b2->num > 0) { return -1; } else { return StringICmp (b1->str, b2->str); } } static int LIBCALLBACK SortVnpByBioseqSearchItem (VoidPtr ptr1, VoidPtr ptr2) { ValNodePtr vnp1; ValNodePtr vnp2; if (ptr1 == NULL || ptr2 == NULL) return 0; vnp1 = *((ValNodePtr PNTR) ptr1); vnp2 = *((ValNodePtr PNTR) ptr2); if (vnp1 == NULL || vnp2 == NULL) return 0; return CompareBioseqSearchItem(vnp1->data.ptrvalue, vnp2->data.ptrvalue); } NLM_EXTERN void InitValNodeBlock (ValNodeBlockPtr vnbp, ValNodePtr list) { vnbp->head = list; vnbp->tail = list; if (vnbp->tail != NULL) { while (vnbp->tail->next != NULL) { vnbp->tail = vnbp->tail->next; } } } NLM_EXTERN void ValNodeAddPointerToEnd (ValNodeBlockPtr vnbp, Uint1 choice, Pointer data) { ValNodePtr vnp_new; vnp_new = ValNodeAddPointer (&(vnbp->tail), choice, data); if (vnbp->head == NULL) { vnbp->head = vnp_new; } vnbp->tail = vnp_new; } NLM_EXTERN void ValNodeAddPointerToFront (ValNodeBlockPtr vnbp, Uint1 choice, Pointer data) { ValNodePtr vnp; vnp = ValNodeNew (NULL); vnp->choice = choice; vnp->data.ptrvalue = data; vnp->next = vnbp->head; vnbp->head = vnp; } NLM_EXTERN void ValNodeLinkToEnd (ValNodeBlockPtr vnbp, ValNodePtr list) { if (list == NULL) { return; } else if (vnbp->head == NULL) { vnbp->head = list; vnbp->tail = list; while (vnbp->tail->next != NULL) { vnbp->tail = vnbp->tail->next; } } else { vnbp->tail->next = list; while (vnbp->tail->next != NULL) { vnbp->tail = vnbp->tail->next; } } } NLM_EXTERN void ValNodeSortBlock (ValNodeBlockPtr vnbp, int (LIBCALLBACK *compar )PROTO ((Nlm_VoidPtr, Nlm_VoidPtr ))) { if (vnbp == NULL || vnbp->head == NULL) { return; } vnbp->head = ValNodeSort(vnbp->head, compar); vnbp->tail = vnbp->head; while (vnbp->tail->next != NULL) { vnbp->tail = vnbp->tail->next; } } static SeqIdPtr FindLocalId (SeqIdPtr list) { while (list != NULL && list->choice != SEQID_LOCAL) { list = list->next; } return list; } static void BuildIdStringsListForIdList (SeqIdPtr sip_list, BioseqPtr bsp, ValNodeBlockPtr block) { SeqIdPtr sip, sip_next, local; CharPtr id, cp, tmp; DbtagPtr dbtag; ObjectIdPtr oid; Int4 len; TextSeqIdPtr tsip; for (sip = sip_list; sip != NULL; sip = sip->next) { sip_next = sip->next; sip->next = NULL; id = SeqIdWholeLabel (sip, PRINTID_FASTA_LONG); tmp = SeqIdWholeLabel (sip, PRINTID_REPORT); if (StringCmp (id, tmp) != 0) { ValNodeAddPointerToEnd (block, 0, BioseqSearchItemNewStr (bsp, tmp, TRUE)); tmp = NULL; } else { tmp = MemFree (tmp); } sip->next = sip_next; if (id != NULL) { /* remove terminating pipe character */ if (id[StringLen(id) - 1] == '|') { id[StringLen(id) - 1] = 0; } ValNodeAddPointerToEnd (block, 0, BioseqSearchItemNewStr (bsp, id, TRUE)); /* remove leading pipe identifier */ cp = StringChr (id, '|'); if (cp != NULL) { cp = cp + 1; ValNodeAddPointerToEnd (block, 0, BioseqSearchItemNewStr (bsp, cp, FALSE)); } else { cp = id; } if ((sip->choice == SEQID_GENBANK || sip->choice == SEQID_EMBL || sip->choice == SEQID_DDBJ || sip->choice == SEQID_TPG || sip->choice == SEQID_TPE || sip->choice == SEQID_TPD || sip->choice == SEQID_PIR || sip->choice == SEQID_SWISSPROT)\ && (tsip = (TextSeqIdPtr)sip->data.ptrvalue) != NULL) { /* try just accession, if version and/or name and/or release supplied */ if (!StringHasNoText (tsip->accession) && (tsip->version > 0 || !StringHasNoText (tsip->name) || !StringHasNoText (tsip->release))) { ValNodeAddPointerToEnd (block, 0, BioseqSearchItemNewStr (bsp, tsip->accession, FALSE)); } } /* just bankit number */ if (sip->choice == SEQID_GENERAL && (dbtag = (DbtagPtr) sip->data.ptrvalue) != NULL) { if (StringCmp (dbtag->db, "BankIt") == 0) { if (dbtag->tag->id > 0) { id = (CharPtr) MemNew (sizeof (Char) * 22); sprintf (id, "BankIt%d", dbtag->tag->id); ValNodeAddPointerToEnd (block, 0, BioseqSearchItemNewStr (bsp, id, TRUE)); ValNodeAddPointerToEnd (block, 0, BioseqSearchItemNewInt (bsp, dbtag->tag->id)); } else { id = (CharPtr) MemNew (sizeof (Char) * (8 + StringLen (dbtag->tag->str))); sprintf (id, "BankIt%s", dbtag->tag->str); ValNodeAddPointerToEnd (block, 0, BioseqSearchItemNewStr (bsp, id, TRUE)); ValNodeAddPointerToEnd (block, 0, BioseqSearchItemNewStr (bsp, dbtag->tag->str, FALSE)); } /* also look for BankIt id with forward slash instead of _ */ if ((cp = StringRChr (id, '_')) != NULL) { len = cp - id; tmp = StringSave (id); tmp[len] = '/'; ValNodeAddPointerToEnd (block, 0, BioseqSearchItemNewStr (bsp, tmp, TRUE)); } } else if (StringCmp (dbtag->db, "NCBIFILE") == 0 && dbtag->tag != NULL) { ValNodeAddPointerToEnd (block, 0, BioseqSearchItemNewStr (bsp, dbtag->tag->str, FALSE)); if ((local = FindLocalId(bsp->id)) != NULL && (oid = (ObjectIdPtr) local->data.ptrvalue) != NULL && oid->str != NULL && (cp = StringSearch (dbtag->tag->str, oid->str)) == dbtag->tag->str + StringLen (dbtag->tag->str) - StringLen (oid->str)) { /* file ID already ends with local ID, don't need to add twice, but do add file name */ id = (CharPtr) MemNew (sizeof (Char) * (cp - dbtag->tag->str)); StringNCpy (id, dbtag->tag->str, cp - dbtag->tag->str - 1); ValNodeAddPointerToEnd (block, 0, BioseqSearchItemNewStr (bsp, id, TRUE)); } else if ((cp = StringRChr (dbtag->tag->str, '/')) != NULL) { ValNodeAddPointerToEnd (block, 0, BioseqSearchItemNewStr (bsp, cp + 1, FALSE)); /* also add string for just file name */ id = (CharPtr) MemNew (sizeof (Char) * (cp - dbtag->tag->str)); StringNCpy (id, dbtag->tag->str, cp - dbtag->tag->str - 1); ValNodeAddPointerToEnd (block, 0, BioseqSearchItemNewStr (bsp, id, TRUE)); } } else if (dbtag->tag != NULL) { /* for all other types, also use just the string or int part */ if (dbtag->tag->str != NULL) { ValNodeAddPointerToEnd (block, 0, BioseqSearchItemNewStr (bsp, dbtag->tag->str, FALSE)); } } } } } } static void BuildIDStringsListCallback (BioseqPtr bsp, Pointer data) { if (bsp != NULL) { BuildIdStringsListForIdList (bsp->id, bsp, (ValNodeBlockPtr) data); } } static void AddBankItSingletons (ValNodeBlockPtr list) { BioseqSearchItemPtr item, item2; ValNodePtr vnp, forw; CharPtr bankit_str = NULL, cp; ValNodePtr other_list = NULL; Int4 len1, len2; Boolean add_truncated; for (vnp = list->head; vnp != NULL; vnp = vnp->next) { item = (BioseqSearchItemPtr) vnp->data.ptrvalue; if (item != NULL && StringNICmp (item->str, "BankIt", 6) == 0 && item->str[6] != '|' && StringChr (item->str, '/') != NULL) { ValNodeAddPointer (&other_list, 0, item); } } other_list = ValNodeSort (other_list, SortVnpByBioseqSearchItem); vnp = other_list; while (vnp != NULL) { item = (BioseqSearchItemPtr) vnp->data.ptrvalue; add_truncated = TRUE; if (vnp->next != NULL) { item2 = vnp->next->data.ptrvalue; cp = StringRChr (item->str, '/'); len1 = cp - item->str; cp = StringRChr (item2->str, '/'); len2 = cp - item2->str; if (len1 == len2 && StringNICmp (item->str, item2->str, len1) == 0) { add_truncated = FALSE; forw = vnp->next->next; while (forw != NULL && (item2 = (BioseqSearchItemPtr) forw->data.ptrvalue) != NULL && (cp = StringRChr (item2->str, '/')) != NULL && (len2 = cp - item2->str) == len1 && StringNICmp (item->str, item2->str, len1) == 0) { forw = forw->next; } vnp = forw; } } if (add_truncated) { bankit_str = StringSave (item->str); cp = StringRChr (bankit_str, '/'); if (cp != NULL) { *cp = 0; } ValNodeAddPointerToEnd (list, 0, BioseqSearchItemNewStr (item->bsp, bankit_str, TRUE)); /* also add string without BankIt */ ValNodeAddPointerToEnd (list, 0, BioseqSearchItemNewStr (item->bsp, bankit_str + 6, FALSE)); vnp = vnp->next; } } other_list = ValNodeFree (other_list); } static void AddUnderscoreSingletons (ValNodeBlockPtr list) { BioseqSearchItemPtr item, item2; ValNodePtr vnp, forw; CharPtr single_str = NULL, cp; ValNodePtr other_list = NULL; Int4 len1, len2; Boolean add_truncated; for (vnp = list->head; vnp != NULL; vnp = vnp->next) { item = (BioseqSearchItemPtr) vnp->data.ptrvalue; if (item != NULL && StringNICmp (item->str, "lcl|", 4) == 0 && StringSearch (item->str, "__") != NULL) { ValNodeAddPointer (&other_list, 0, item); } } other_list = ValNodeSort (other_list, SortVnpByBioseqSearchItem); vnp = other_list; while (vnp != NULL) { item = (BioseqSearchItemPtr) vnp->data.ptrvalue; add_truncated = TRUE; if (vnp->next != NULL) { item2 = vnp->next->data.ptrvalue; cp = StringSearch (item->str, "__"); len1 = cp - item->str; cp = StringSearch (item2->str, "__"); len2 = cp - item2->str; if (len1 == len2 && StringNICmp (item->str, item2->str, len1) == 0) { add_truncated = FALSE; forw = vnp->next->next; while (forw != NULL && (item2 = (BioseqSearchItemPtr) forw->data.ptrvalue) != NULL && (cp = StringSearch (item2->str, "__")) != NULL && (len2 = cp - item2->str) == len1 && StringNICmp (item->str, item2->str, len1) == 0) { forw = forw->next; } vnp = forw; } } if (add_truncated) { single_str = StringSave (item->str); cp = StringSearch (single_str, "__"); if (cp != NULL) { *cp = 0; } ValNodeAddPointerToEnd (list, 0, BioseqSearchItemNewStr (item->bsp, single_str, TRUE)); vnp = vnp->next; } } other_list = ValNodeFree (other_list); } /* first are str, second are int */ typedef struct bioseqsearchindex { Int4 num_str; Int4 num_int; Int4 num_total; BioseqSearchItemPtr PNTR items; } BioseqSearchIndexData, PNTR BioseqSearchIndexPtr; static BioseqSearchIndexPtr BioseqSearchIndexFree (BioseqSearchIndexPtr index) { Int4 i; if (index != NULL) { for (i = 0; i < index->num_total; i++) { index->items[i] = BioseqSearchItemFree(index->items[i]); } index->items = MemFree (index->items); index = MemFree (index); } return index; } static BioseqSearchIndexPtr BuildIDStringsList (SeqEntryPtr sep) { ValNodeBlock vnb; ValNodePtr list = NULL, vnp; Int4 num_total, i; BioseqSearchIndexPtr index; vnb.head = NULL; vnb.tail = NULL; VisitBioseqsInSep (sep, &vnb, BuildIDStringsListCallback); AddBankItSingletons(&vnb); AddUnderscoreSingletons(&vnb); list = vnb.head; list = ValNodeSort (list, SortVnpByBioseqSearchItem); num_total = ValNodeLen (list); index = (BioseqSearchIndexPtr) MemNew (sizeof (BioseqSearchIndexData)); index->items = (BioseqSearchItemPtr PNTR) MemNew (sizeof (BioseqSearchItemPtr) * num_total); for (vnp = list, i = 0; vnp != NULL && i < num_total; vnp = vnp->next, i++) { index->items[i] = vnp->data.ptrvalue; vnp->data.ptrvalue = NULL; if (index->items[i]->num > 0) { index->num_int++; } else { index->num_str++; } } index->num_total = index->num_int + index->num_str; list = ValNodeFree (list); return index; } static BioseqSearchIndexPtr BuildIDStringsListForBioseqList (ValNodePtr bsp_list) { ValNodeBlock vnb; ValNodePtr list = NULL, vnp; Int4 num_total, i; BioseqSearchIndexPtr index; vnb.head = NULL; vnb.tail = NULL; for (vnp = bsp_list; vnp != NULL; vnp = vnp->next) { BuildIDStringsListCallback (vnp->data.ptrvalue, &vnb); } AddBankItSingletons(&vnb); AddUnderscoreSingletons(&vnb); list = vnb.head; list = ValNodeSort (list, SortVnpByBioseqSearchItem); num_total = ValNodeLen (list); index = (BioseqSearchIndexPtr) MemNew (sizeof (BioseqSearchIndexData)); index->items = (BioseqSearchItemPtr PNTR) MemNew (sizeof (BioseqSearchItemPtr) * num_total); for (vnp = list, i = 0; vnp != NULL && i < num_total; vnp = vnp->next, i++) { index->items[i] = vnp->data.ptrvalue; vnp->data.ptrvalue = NULL; if (index->items[i]->num > 0) { index->num_int++; } else { index->num_str++; } } index->num_total = index->num_int + index->num_str; list = ValNodeFree (list); return index; } static BioseqPtr FindStringInIdListIndex (CharPtr str, BioseqSearchIndexPtr index) { CharPtr tmp; Int4 match, imax, imin, i, j; Int4 num = -1; if (index == NULL) { return NULL; } if (StringIsAllDigits (str)) { match = atoi (str); imax = index->num_total - 1; imin = index->num_str; while (imax >= imin) { i = (imax + imin)/2; if (index->items[i]->num > match) imax = i - 1; else if (index->items[i]->num < match) imin = i + 1; else { num = i; break; } } } else { imax = index->num_str - 1; imin = 0; while (imax >= imin) { i = (imax + imin)/2; tmp = index->items[i]->str; if ((j = StringICmp(tmp, str)) > 0) imax = i - 1; else if (j < 0) imin = i + 1; else { num = i; break; } } } if (num > -1) { return index->items[num]->bsp; } else { return NULL; } } static ValNodePtr FindListInIdListIndex (Uint1 match_location, CharPtr match_str, BioseqSearchIndexPtr index) { Int4 i; ValNodePtr list = NULL; StringConstraintPtr scp; Char buf[5000]; if (StringHasNoText (match_str) || index == NULL) { return NULL; } scp = StringConstraintNew (); scp->match_text = StringSave (match_str); scp->match_location = match_location; for (i = 0; i < index->num_str; i++) { if (DoesStringMatchConstraint (index->items[i]->str, scp)) { ValNodeAddPointer (&list, OBJ_BIOSEQ, index->items[i]->bsp); } } for (i = index->num_str; i < index->num_str + index->num_int; i++) { sprintf (buf, "%u", index->items[i]->num); if (DoesStringMatchConstraint (buf, scp)) { ValNodeAddPointer (&list, OBJ_BIOSEQ, index->items[i]->bsp); } } scp = StringConstraintFree (scp); list = ValNodeSort (list, SortVnpByChoiceAndPtrvalue); ValNodeUnique (&list, SortVnpByChoiceAndPtrvalue, ValNodeFree); return list; } /* J. Chen */ typedef struct bioseqbymatch { BioseqPtr bsp; CharPtr match_str; } BioseqByMatch, PNTR BioseqByMatchPtr; /* J. Chen */ static void GetBioseqByProteinName(SeqFeatPtr sfp, Pointer userdata) { BioseqByMatchPtr bsp_m_p; ProtRefPtr prp; ValNodePtr name; bsp_m_p = (BioseqByMatchPtr) userdata; if (sfp == NULL || sfp->data.choice != SEQFEAT_PROT || (prp = (ProtRefPtr) sfp->data.value.ptrvalue) == NULL) { bsp_m_p->bsp = NULL; return; } for (name = prp->name; name != NULL; name = name->next) if (strcmp(name->data.ptrvalue, bsp_m_p->match_str)) bsp_m_p->bsp = NULL; } /* GetBioseqByProteinName */ /* J. Chen */ static void FindBioseqByProteinName(SeqEntryPtr sep, BioseqByMatchPtr bsp_m, ValNodePtr *match_list) { SeqEntryPtr tmp; if (IS_Bioseq(sep)) { bsp_m->bsp = (BioseqPtr) sep->data.ptrvalue; VisitFeaturesInSep (sep, bsp_m, GetBioseqByProteinName); if (bsp_m->bsp != NULL) ValNodeAddPointer (match_list, OBJ_BIOSEQ, bsp_m->bsp); } else if (IS_Bioseq_set(sep)) { for (tmp = ((BioseqSetPtr) sep->data.ptrvalue)->seq_set; tmp != NULL; tmp= tmp->next) { FindBioseqByProteinName(tmp, bsp_m, match_list); } } } /* FindBioseqByProteinName */ static void GetAllBioseqsCallback (BioseqPtr bsp, Pointer data) { if (bsp != NULL && !ISA_aa (bsp->mol)) { ValNodeAddPointer ((ValNodePtr PNTR) data, OBJ_BIOSEQ, bsp); } } typedef struct stringlist { CharPtr str; ValNodePtr list; } StringListData, PNTR StringListPtr; static void GetBioseqsByBioProjectCallback(BioseqPtr bsp, Pointer data) { StringListPtr s; CharPtr bioproject; if (bsp == NULL || ISA_aa(bsp->mol) || (s = (StringListPtr) data) == NULL) { return; } bioproject = GetBioProjectIdFromBioseq(bsp, NULL); if (StringICmp (bioproject, s->str) == 0) { ValNodeAddPointer (&(s->list), OBJ_BIOSEQ, bsp); } bioproject = MemFree (bioproject); } static ValNodePtr GetBioseqsByBioProject (SeqEntryPtr sep, CharPtr match_str) { StringListData s; MemSet (&s, 0, sizeof (StringListData)); s.str = match_str; VisitBioseqsInSep (sep, &s, GetBioseqsByBioProjectCallback); return s.list; } static ValNodePtr FindMatchForRowEx (MatchTypePtr match_type, CharPtr match_str, Uint2 entityID, SeqEntryPtr sep, BioseqSearchIndexPtr index ) { ValNodePtr match_list = NULL; FindGeneLocusTagData fd; SeqFeatPtr sfp; SeqMgrFeatContext fcontext; BioseqPtr bsp; BioseqByMatch bsp_m; if (match_type == NULL || sep == NULL) return NULL; switch (match_type->choice) { case eTableMatchProteinName: /* J. Chen */ bsp_m.match_str = match_str; FindBioseqByProteinName(sep, &bsp_m, &match_list); break; case eTableMatchFeatureID: sfp = SeqMgrGetFeatureByFeatID (entityID, NULL, match_str, NULL, &fcontext); if (sfp != NULL) { ValNodeAddPointer (&match_list, OBJ_SEQFEAT, sfp); } break; case eTableMatchGeneLocusTag: fd.locus_tag = match_str; fd.gene_list = NULL; VisitBioseqsInSep (sep, &fd, FindGeneByLocusTagBioseqCallback); ValNodeLink (&match_list, fd.gene_list); break; case eTableMatchProteinID: case eTableMatchNucID: if (match_type->match_location == String_location_equals && index != NULL) { bsp = FindStringInIdListIndex (match_str, index); if (bsp != NULL) { ValNodeAddPointer (&match_list, OBJ_BIOSEQ, bsp); } } else if (index != NULL) { ValNodeLink (&match_list, FindListInIdListIndex (match_type->match_location, match_str, index)); } else { ValNodeLink (&match_list, FindBioseqsByMatchType (sep, match_type->match_location, match_str)); } break; case eTableMatchDbxref: match_list = GetFeaturesByDbxref (sep, match_str, match_type->match_location); break; case eTableMatchBioSource: match_list = GetBioSourcesByTaxName (sep, match_str, match_type->match_location); break; case eTableMatchSourceQual: match_list = GetBioSourcesBySourceQual (sep, match_type->data, match_str, match_type->match_location); break; case eTableMatchAny: VisitBioseqsInSep (sep, &match_list, GetAllBioseqsCallback); break; case eTableMatchBioProject: match_list = GetBioseqsByBioProject(sep, match_str); break; } return match_list; } static ValNodePtr FindMatchForRow (MatchTypePtr match_type, CharPtr match_str, Uint2 entityID, SeqEntryPtr sep ) { return FindMatchForRowEx (match_type, match_str, entityID, sep, NULL); } static ValNodePtr GetFeatureListForProteinBioseq (Uint1 featdef, BioseqPtr bsp) { ValNodePtr feat_list = NULL; SeqFeatPtr sfp, cds; SeqMgrFeatContext fcontext; Int4 seqfeattype; if (bsp == NULL || !ISA_aa (bsp->mol)) { return NULL; } seqfeattype = FindFeatFromFeatDefType (featdef); if (seqfeattype == SEQFEAT_PROT) { for (sfp = SeqMgrGetNextFeature (bsp, NULL, 0, featdef, &fcontext); sfp != NULL; sfp = SeqMgrGetNextFeature (bsp, sfp, 0, featdef, &fcontext)) { ValNodeAddPointer (&feat_list, OBJ_SEQFEAT, sfp); } } else { cds = SeqMgrGetCDSgivenProduct (bsp, NULL); if (cds != NULL) { sfp = NULL; if (featdef == FEATDEF_CDS) { sfp = cds; } else if (featdef == FEATDEF_GENE) { sfp = GetGeneForFeature (cds); } else if (featdef == FEATDEF_mRNA) { sfp = GetmRNAforCDS (cds); } if (sfp != NULL) { ValNodeAddPointer (&feat_list, OBJ_SEQFEAT, sfp); } } } return feat_list; } static ValNodePtr GetFeatureListForNucleotideBioseq (Uint1 featdef, BioseqPtr bsp) { ValNodePtr feat_list = NULL; SeqFeatPtr sfp; SeqMgrFeatContext fcontext; Int4 seqfeattype; BioseqPtr prot_bsp; if (bsp == NULL || ISA_aa (bsp->mol)) { return NULL; } seqfeattype = FindFeatFromFeatDefType (featdef); if (seqfeattype == SEQFEAT_PROT) { for (sfp = SeqMgrGetNextFeature (bsp, NULL, 0, FEATDEF_CDS, &fcontext); sfp != NULL; sfp = SeqMgrGetNextFeature (bsp, sfp, 0, FEATDEF_CDS, &fcontext)) { prot_bsp = BioseqFindFromSeqLoc (sfp->product); ValNodeLink (&feat_list, GetFeatureListForProteinBioseq (featdef, prot_bsp)); } } else { for (sfp = SeqMgrGetNextFeature (bsp, NULL, 0, featdef, &fcontext); sfp != NULL; sfp = SeqMgrGetNextFeature (bsp, sfp, 0, featdef, &fcontext)) { ValNodeAddPointer (&feat_list, OBJ_SEQFEAT, sfp); } } return feat_list; } static ValNodePtr GetFeaturesForGene (SeqFeatPtr gene, Uint1 featdef) { BioseqPtr bsp; SeqFeatPtr sfp; ValNodePtr feat_list = NULL; SeqMgrFeatContext fcontext; Int4 start, stop, swap; if (gene == NULL) return NULL; bsp = BioseqFindFromSeqLoc (gene->location); start = SeqLocStart (gene->location); stop = SeqLocStop (gene->location); if (stop < start) { swap = start; start = stop; stop = swap; } for (sfp = SeqMgrGetNextFeature (bsp, NULL, 0, featdef, &fcontext); sfp != NULL && fcontext.left < stop; sfp = SeqMgrGetNextFeature (bsp, sfp, 0, featdef, &fcontext)) { if (sfp != gene && fcontext.right >= start && gene == GetGeneForFeature (sfp)) { ValNodeAddPointer (&feat_list, OBJ_SEQFEAT, sfp); } } return feat_list; } static ValNodePtr GetFeatureListForGene (Uint1 featdef, SeqFeatPtr gene) { ValNodePtr feat_list = NULL, cds_list, vnp; SeqFeatPtr sfp, cds; SeqMgrFeatContext fcontext; BioseqPtr protbsp; if (gene == NULL) { return NULL; } if (featdef == FEATDEF_GENE) { ValNodeAddPointer (&feat_list, OBJ_SEQFEAT, gene); } else if (FindFeatFromFeatDefType (featdef == SEQFEAT_PROT)) { cds_list = GetFeaturesForGene (gene, FEATDEF_CDS); for (vnp = cds_list; vnp != NULL; vnp = vnp->next) { cds = vnp->data.ptrvalue; if (cds != NULL) { protbsp = BioseqFindFromSeqLoc (cds->product); for (sfp = SeqMgrGetNextFeature (protbsp, NULL, 0, featdef, &fcontext); sfp != NULL; sfp = SeqMgrGetNextFeature (protbsp, sfp, 0, featdef, &fcontext)) { ValNodeAddPointer (&feat_list, OBJ_SEQFEAT, sfp); } } } cds_list = ValNodeFree (cds_list); } else { feat_list = GetFeaturesForGene (gene, featdef); } return feat_list; } static ValNodePtr AddFeaturesFromBioseqSet (BioseqSetPtr bssp, Uint1 featdef) { SeqEntryPtr sep; BioseqPtr bsp; Int4 seqfeattype; ValNodePtr item_list = NULL; if (bssp == NULL) return NULL; seqfeattype = FindFeatFromFeatDefType (featdef); for (sep = bssp->seq_set; sep != NULL; sep = sep->next) { if (sep->data.ptrvalue == NULL) continue; if (IS_Bioseq (sep)) { bsp = sep->data.ptrvalue; if (seqfeattype == SEQFEAT_PROT) { if (ISA_aa (bsp->mol)) { ValNodeLink (&item_list, GetFeatureListForProteinBioseq (featdef, bsp)); } } else if (!ISA_aa (bsp->mol)) { ValNodeLink (&item_list, GetFeatureListForNucleotideBioseq (featdef, bsp)); } } else if (IS_Bioseq_set (sep)) { ValNodeLink (&item_list, AddFeaturesFromBioseqSet (sep->data.ptrvalue, featdef)); } } return item_list; } static ValNodePtr GetFeatureListForBioSourceObjects (ValNodePtr item_list, FeatureFieldPtr field) { ValNodePtr vnp; SeqFeatPtr sfp; SeqDescrPtr sdp; BioseqPtr bsp; ObjValNodePtr ovp; ValNodePtr feature_list = NULL; if (item_list == NULL || field == NULL) return NULL; for (vnp = item_list; vnp != NULL; vnp = vnp->next) { if (vnp->choice == OBJ_SEQFEAT) { sfp = vnp->data.ptrvalue; if (sfp != NULL) { bsp = BioseqFindFromSeqLoc (sfp->location); ValNodeLink (&feature_list, GetFeatureListForNucleotideBioseq (GetFeatdefFromFeatureType(field->type), bsp)); } } else if (vnp->choice == OBJ_SEQDESC) { sdp = vnp->data.ptrvalue; if (sdp != NULL && sdp->extended != 0) { ovp = (ObjValNodePtr) sdp; if (ovp->idx.parenttype == OBJ_BIOSEQSET) { ValNodeLink (&feature_list, AddFeaturesFromBioseqSet (ovp->idx.parentptr, GetFeatdefFromFeatureType(field->type))); } else if (ovp->idx.parenttype == OBJ_BIOSEQ) { bsp = (BioseqPtr) ovp->idx.parentptr; ValNodeLink (&feature_list, GetFeatureListForNucleotideBioseq (GetFeatdefFromFeatureType(field->type), bsp)); } } } } return feature_list; } NLM_EXTERN ValNodePtr ValNodeCopyPtr (ValNodePtr orig) { ValNodePtr new_list = NULL, last_vnp = NULL, vnp; while (orig != NULL) { vnp = ValNodeNew (NULL); vnp->choice = orig->choice; vnp->data.ptrvalue = orig->data.ptrvalue; if (last_vnp == NULL) { new_list = vnp; } else { last_vnp->next = vnp; } last_vnp = vnp; orig = orig->next; } return new_list; } static ValNodePtr GetFeatureListForRowAndColumn (MatchTypePtr match_type, ValNodePtr match_list, FeatureFieldPtr field) { ValNodePtr feature_list = NULL, vnp; if (match_list == NULL || field == NULL || match_type == NULL) return NULL; switch (match_type->choice) { case eTableMatchFeatureID: feature_list = ValNodeCopyPtr (match_list); break; case eTableMatchGeneLocusTag: for (vnp = match_list; vnp != NULL; vnp = vnp->next) { ValNodeLink (&feature_list, GetFeatureListForGene (GetFeatdefFromFeatureType(field->type), vnp->data.ptrvalue)); } break; case eTableMatchProteinName: /* J. Chen */ case eTableMatchProteinID: for (vnp = match_list; vnp != NULL; vnp = vnp->next) { ValNodeLink (&feature_list, GetFeatureListForProteinBioseq (GetFeatdefFromFeatureType(field->type), vnp->data.ptrvalue)); } break; case eTableMatchDbxref: feature_list = ValNodeCopyPtr (match_list); break; case eTableMatchNucID: case eTableMatchAny: case eTableMatchBioProject: for (vnp = match_list; vnp != NULL; vnp = vnp->next) { ValNodeLink (&feature_list, GetFeatureListForNucleotideBioseq (GetFeatdefFromFeatureType(field->type), vnp->data.ptrvalue)); } break; case eTableMatchBioSource: case eTableMatchSourceQual: ValNodeLink (&feature_list, GetFeatureListForBioSourceObjects (match_list, field)); break; } return feature_list; } static void AddBioSourcesForBioseq (BioseqPtr bsp, ValNodePtr PNTR feature_list) { SeqDescrPtr sdp; SeqMgrDescContext context; Boolean any = FALSE; SeqEntryPtr sep; if (bsp == NULL || feature_list == NULL) return; for (sdp = SeqMgrGetNextDescriptor (bsp, NULL, Seq_descr_source, &context); sdp != NULL; sdp = SeqMgrGetNextDescriptor (bsp, sdp, Seq_descr_source, &context)) { ValNodeAddPointer (feature_list, OBJ_SEQDESC, sdp); any = TRUE; } if (!any && !ISA_aa (bsp->mol)) { sep = GetBestTopParentForData (bsp->idx.entityID, bsp); sdp = CreateNewDescriptor (sep, Seq_descr_source); sdp->data.ptrvalue = BioSourceNew (); ValNodeAddPointer (feature_list, OBJ_SEQDESC, sdp); } } static void AddBioSourcesForFeature (SeqFeatPtr sfp, ValNodePtr PNTR feature_list) { BioseqPtr bsp; if (sfp == NULL || feature_list == NULL) return; if (sfp->data.choice == SEQFEAT_BIOSRC) { ValNodeAddPointer (feature_list, OBJ_SEQFEAT, sfp); } else { bsp = BioseqFindFromSeqLoc (sfp->location); AddBioSourcesForBioseq (bsp, feature_list); } } static ValNodePtr GetBioSourceListForRowAndColumn (MatchTypePtr match_type, ValNodePtr match_list, FeatureFieldPtr field) { ValNodePtr feature_list = NULL, vnp; if (match_list == NULL || field == NULL || match_type == NULL) return NULL; switch (match_type->choice) { case eTableMatchFeatureID: for (vnp = match_list; vnp != NULL; vnp = vnp->next) { if (vnp->choice == OBJ_SEQFEAT && vnp->data.ptrvalue != NULL) { AddBioSourcesForFeature (vnp->data.ptrvalue, &feature_list); } } break; case eTableMatchGeneLocusTag: for (vnp = match_list; vnp != NULL; vnp = vnp->next) { if (vnp->choice == OBJ_SEQFEAT && vnp->data.ptrvalue != NULL) { AddBioSourcesForFeature (vnp->data.ptrvalue, &feature_list); } } break; case eTableMatchProteinName: /* J. Chen */ case eTableMatchProteinID: case eTableMatchNucID: case eTableMatchAny: case eTableMatchBioProject: for (vnp = match_list; vnp != NULL; vnp = vnp->next) { if (vnp->choice == OBJ_BIOSEQ) { AddBioSourcesForBioseq (vnp->data.ptrvalue, &feature_list); } } break; case eTableMatchDbxref: for (vnp = match_list; vnp != NULL; vnp = vnp->next) { if (vnp->choice == OBJ_SEQFEAT && vnp->data.ptrvalue != NULL) { AddBioSourcesForFeature (vnp->data.ptrvalue, &feature_list); } } break; case eTableMatchBioSource: case eTableMatchSourceQual: feature_list = ValNodeCopyPtr (match_list); break; } return feature_list; } Boolean PropagateThisDescriptor (SeqDescPtr sdp, Pointer extradata) { if (sdp == (SeqDescPtr) extradata) { return TRUE; } else { return FALSE; } } static void PrePropagatePubs (BioseqPtr bsp) { ValNodePtr pub_list = NULL, vnp; SeqEntryPtr sep; SeqDescPtr sdp; SeqMgrDescContext dcontext; ObjValNodePtr ovp; BioseqSetPtr bssp; if (bsp == NULL) { return; } for (sdp = SeqMgrGetNextDescriptor (bsp, NULL, Seq_descr_pub, &dcontext); sdp != NULL; sdp = SeqMgrGetNextDescriptor (bsp, sdp, Seq_descr_pub, &dcontext)) { if (sdp->extended && (ovp = (ObjValNodePtr) sdp) != NULL && ovp->idx.parenttype == OBJ_BIOSEQSET && (bssp = (BioseqSetPtr) ovp->idx.parentptr) != NULL && bssp->_class != BioseqseqSet_class_nuc_prot) { ValNodeAddPointer (&pub_list, OBJ_SEQDESC, sdp); } } if (pub_list != NULL) { sep = GetTopSeqEntryForEntityID(bsp->idx.entityID); for (vnp = pub_list; vnp != NULL; vnp = vnp->next) { PropagateSomeDescriptors (sep, PropagateThisDescriptor, vnp->data.ptrvalue); } DeleteMarkedObjects (bsp->idx.entityID, 0, NULL); ObjMgrSetDirtyFlag (bsp->idx.entityID, TRUE); ObjMgrSendMsg (OM_MSG_UPDATE, bsp->idx.entityID, 0, 0); pub_list = ValNodeFree (pub_list); } } static void AddPubsForBioseq (BioseqPtr bsp, ValNodePtr PNTR feature_list) { SeqDescrPtr sdp; SeqMgrDescContext dcontext; SeqFeatPtr sfp; SeqMgrFeatContext fcontext; if (bsp == NULL || feature_list == NULL) return; /* pre-propagate publications descriptors */ PrePropagatePubs(bsp); for (sdp = SeqMgrGetNextDescriptor (bsp, NULL, Seq_descr_pub, &dcontext); sdp != NULL; sdp = SeqMgrGetNextDescriptor (bsp, sdp, Seq_descr_pub, &dcontext)) { ValNodeAddPointer (feature_list, OBJ_SEQDESC, sdp); } for (sfp = SeqMgrGetNextFeature (bsp, NULL, SEQFEAT_PUB, 0, &fcontext); sfp != NULL; sfp = SeqMgrGetNextFeature (bsp, sfp, SEQFEAT_PUB, 0, &fcontext)) { ValNodeAddPointer (feature_list, OBJ_SEQFEAT, sfp); } } static ValNodePtr AddPubListFromBioseqSet (BioseqSetPtr bssp) { SeqEntryPtr sep; BioseqPtr bsp; ValNodePtr item_list = NULL; if (bssp == NULL) return NULL; for (sep = bssp->seq_set; sep != NULL; sep = sep->next) { if (sep->data.ptrvalue == NULL) continue; if (IS_Bioseq (sep)) { bsp = sep->data.ptrvalue; if (!ISA_aa (bsp->mol)) { AddPubsForBioseq (bsp, &item_list); } } else if (IS_Bioseq_set (sep)) { ValNodeLink (&item_list, AddPubListFromBioseqSet (sep->data.ptrvalue)); } } return item_list; } static ValNodePtr GetPubListForBioSourceObjects (ValNodePtr item_list) { ValNodePtr vnp; SeqFeatPtr sfp; SeqDescrPtr sdp; BioseqPtr bsp; ObjValNodePtr ovp; ValNodePtr feature_list = NULL; if (item_list == NULL) return NULL; for (vnp = item_list; vnp != NULL; vnp = vnp->next) { if (vnp->choice == OBJ_SEQFEAT) { sfp = vnp->data.ptrvalue; if (sfp != NULL) { bsp = BioseqFindFromSeqLoc (sfp->location); AddPubsForBioseq (bsp, &feature_list); } } else if (vnp->choice == OBJ_SEQDESC) { sdp = vnp->data.ptrvalue; if (sdp != NULL && sdp->extended != 0) { ovp = (ObjValNodePtr) sdp; if (ovp->idx.parenttype == OBJ_BIOSEQSET) { ValNodeLink (&feature_list, AddPubListFromBioseqSet (ovp->idx.parentptr)); } else if (ovp->idx.parenttype == OBJ_BIOSEQ) { bsp = (BioseqPtr) ovp->idx.parentptr; AddPubsForBioseq (bsp, &feature_list); } } } } return feature_list; } static ValNodePtr GetPubListForRowAndColumn (MatchTypePtr match_type, ValNodePtr match_list) { SeqFeatPtr sfp; ValNodePtr vnp; ValNodePtr feature_list = NULL; if (match_type == NULL) return NULL; switch (match_type->choice) { case eTableMatchFeatureID: for (vnp = match_list; vnp != NULL; vnp = vnp->next) { if (vnp->choice == OBJ_SEQFEAT && vnp->data.ptrvalue != NULL) { sfp = (SeqFeatPtr) vnp->data.ptrvalue; AddPubsForBioseq (BioseqFindFromSeqLoc (sfp->location), &feature_list); } } break; case eTableMatchGeneLocusTag: for (vnp = match_list; vnp != NULL; vnp = vnp->next) { if (vnp->choice == OBJ_SEQFEAT && vnp->data.ptrvalue != NULL) { sfp = (SeqFeatPtr) vnp->data.ptrvalue; AddPubsForBioseq (BioseqFindFromSeqLoc (sfp->location), &feature_list); } } break; case eTableMatchProteinName: /* J. Chen */ case eTableMatchProteinID: case eTableMatchNucID: case eTableMatchAny: case eTableMatchBioProject: for (vnp = match_list; vnp != NULL; vnp = vnp->next) { if (vnp->choice == OBJ_BIOSEQ) { AddPubsForBioseq (vnp->data.ptrvalue, &feature_list); } } break; case eTableMatchDbxref: for (vnp = match_list; vnp != NULL; vnp = vnp->next) { if (vnp->choice == OBJ_SEQFEAT && vnp->data.ptrvalue != NULL) { sfp = (SeqFeatPtr) vnp->data.ptrvalue; AddPubsForBioseq (BioseqFindFromSeqLoc (sfp->location), &feature_list); } } break; case eTableMatchBioSource: case eTableMatchSourceQual: feature_list = GetPubListForBioSourceObjects (match_list); break; } return feature_list; } static ValNodePtr GetSequenceListForBioSourceObjects (ValNodePtr item_list) { ValNodePtr vnp; SeqFeatPtr sfp; SeqDescrPtr sdp; BioseqPtr bsp; ObjValNodePtr ovp; ValNodePtr seq_list = NULL; SeqEntryPtr sep; if (item_list == NULL) return NULL; for (vnp = item_list; vnp != NULL; vnp = vnp->next) { if (vnp->choice == OBJ_SEQFEAT) { sfp = vnp->data.ptrvalue; if (sfp != NULL) { bsp = BioseqFindFromSeqLoc (sfp->location); if (bsp != NULL) { ValNodeAddPointer (&seq_list, OBJ_BIOSEQ, bsp); } } } else if (vnp->choice == OBJ_SEQDESC) { sdp = vnp->data.ptrvalue; if (sdp != NULL && sdp->extended != 0) { ovp = (ObjValNodePtr) sdp; if (ovp->idx.parenttype == OBJ_BIOSEQSET) { sep = SeqMgrGetSeqEntryForData (ovp->idx.parentptr); /* VisitBioseqsInSep (sep, &seq_list, CollectNucBioseqCallback); */ seq_list = CollectNucBioseqs (sep); } else if (ovp->idx.parenttype == OBJ_BIOSEQ) { bsp = (BioseqPtr) ovp->idx.parentptr; if (bsp != NULL) { ValNodeAddPointer (&seq_list, OBJ_BIOSEQ, bsp); } } } } } return seq_list; } static ValNodePtr GetSequenceListForRowAndColumn (MatchTypePtr match_type, ValNodePtr match_list) { SeqFeatPtr sfp; ValNodePtr vnp; ValNodePtr seq_list = NULL; BioseqPtr bsp; if (match_type == NULL) return NULL; switch (match_type->choice) { case eTableMatchFeatureID: for (vnp = match_list; vnp != NULL; vnp = vnp->next) { if (vnp->choice == OBJ_SEQFEAT && vnp->data.ptrvalue != NULL) { sfp = (SeqFeatPtr) vnp->data.ptrvalue; bsp = BioseqFindFromSeqLoc (sfp->location); if (bsp != NULL) { ValNodeAddPointer (&seq_list, OBJ_BIOSEQ, bsp); } } } break; case eTableMatchGeneLocusTag: for (vnp = match_list; vnp != NULL; vnp = vnp->next) { if (vnp->choice == OBJ_SEQFEAT && vnp->data.ptrvalue != NULL) { sfp = (SeqFeatPtr) vnp->data.ptrvalue; bsp = BioseqFindFromSeqLoc (sfp->location); if (bsp != NULL) { ValNodeAddPointer (&seq_list, OBJ_BIOSEQ, bsp); } } } break; case eTableMatchProteinName: /* J. Chen */ case eTableMatchProteinID: case eTableMatchNucID: case eTableMatchBioProject: for (vnp = match_list; vnp != NULL; vnp = vnp->next) { if (vnp->choice == OBJ_BIOSEQ) { ValNodeAddPointer (&seq_list, OBJ_BIOSEQ, vnp->data.ptrvalue); } } break; case eTableMatchDbxref: for (vnp = match_list; vnp != NULL; vnp = vnp->next) { if (vnp->choice == OBJ_SEQFEAT && vnp->data.ptrvalue != NULL) { sfp = (SeqFeatPtr) vnp->data.ptrvalue; bsp = BioseqFindFromSeqLoc (sfp->location); if (bsp != NULL) { ValNodeAddPointer (&seq_list, OBJ_BIOSEQ, bsp); } } } break; case eTableMatchBioSource: case eTableMatchSourceQual: seq_list = GetSequenceListForBioSourceObjects (match_list); break; case eTableMatchAny: seq_list = ValNodeCopyPtr (match_list); break; } return seq_list; } static ValNodePtr GetStructuredCommentListForRowAndColumn (MatchTypePtr match_type, ValNodePtr match_list) { ValNodePtr seq_list, target_list = NULL, vnp; SeqDescrPtr sdp; SeqMgrDescContext context; seq_list = GetSequenceListForRowAndColumn (match_type, match_list); for (vnp = seq_list; vnp != NULL; vnp = vnp->next) { if (vnp->choice == OBJ_BIOSEQ) { for (sdp = SeqMgrGetNextDescriptor (vnp->data.ptrvalue, NULL, Seq_descr_user, &context); sdp != NULL; sdp = SeqMgrGetNextDescriptor (vnp->data.ptrvalue, sdp, Seq_descr_user, &context)) { if (IsUserObjectStructuredComment (sdp->data.ptrvalue)) { ValNodeAddPointer (&target_list, OBJ_SEQDESC, sdp); } } } } seq_list = ValNodeFree (seq_list); return target_list; } static ValNodePtr GetTargetListForRowAndColumn (MatchTypePtr match_type, ValNodePtr match_list, FieldTypePtr field, ValNodePtr constraint) { ValNodePtr target_list = NULL, vnp_prev = NULL, vnp, vnp_next, tmp_list; FeatureFieldPtr feature_field; if (field == NULL || match_type == NULL) return NULL; switch (field->choice) { case FieldType_source_qual: target_list = GetBioSourceListForRowAndColumn (match_type, match_list, field->data.ptrvalue); break; case FieldType_feature_field: target_list = GetFeatureListForRowAndColumn (match_type, match_list, field->data.ptrvalue); break; case FieldType_cds_gene_prot: feature_field = FeatureFieldFromCDSGeneProtField (field->data.intvalue); target_list = GetFeatureListForRowAndColumn (match_type, match_list, feature_field); feature_field = FeatureFieldFree (feature_field); break; case FieldType_pub: target_list = GetPubListForRowAndColumn (match_type, match_list); break; case FieldType_rna_field: feature_field = FeatureFieldFromRnaQual (field->data.ptrvalue); target_list = GetFeatureListForRowAndColumn (match_type, match_list, feature_field); feature_field = FeatureFieldFree (feature_field); break; case FieldType_struc_comment_field: target_list = GetStructuredCommentListForRowAndColumn (match_type, match_list); break; case FieldType_dblink: tmp_list = GetSequenceListForRowAndColumn (match_type, match_list); for (vnp = tmp_list; vnp != NULL; vnp = vnp->next) { AddDBLinkDescriptorDestinationsForBioseq (vnp->data.ptrvalue, &target_list); } tmp_list = ValNodeFree (tmp_list); break; case FieldType_misc: if (field->data.intvalue == Misc_field_genome_project_id) { target_list = GetSequenceListForRowAndColumn (match_type, match_list); } else if (field->data.intvalue == Misc_field_comment_descriptor) { tmp_list = GetSequenceListForRowAndColumn (match_type, match_list); for (vnp = tmp_list; vnp != NULL; vnp = vnp->next) { AddCommentDescriptorDestinationsForBioseq (vnp->data.ptrvalue, &target_list); } tmp_list = ValNodeFree (tmp_list); } else if (field->data.intvalue == Misc_field_defline) { tmp_list = GetSequenceListForRowAndColumn (match_type, match_list); for (vnp = tmp_list; vnp != NULL; vnp = vnp->next) { AddDeflineDescriptorDestinationsForBioseq (vnp->data.ptrvalue, &target_list); } tmp_list = ValNodeFree (tmp_list); } else if (field->data.intvalue == Misc_field_keyword) { tmp_list = GetSequenceListForRowAndColumn (match_type, match_list); for (vnp = tmp_list; vnp != NULL; vnp = vnp->next) { AddGenbankBlockDescriptorDestinationsForBioseq (vnp->data.ptrvalue, &target_list); } tmp_list = ValNodeFree (tmp_list); } break; case FieldType_molinfo_field: target_list = GetSequenceListForRowAndColumn(match_type, match_list); break; } /* remove targets that do not match constraint */ vnp = target_list; while (vnp != NULL) { vnp_next = vnp->next; if (!DoesObjectMatchConstraintChoiceSet (vnp->choice, vnp->data.ptrvalue, constraint)) { if (vnp_prev == NULL) { target_list = vnp->next; } else { vnp_prev->next = vnp->next; } vnp->next = NULL; vnp = ValNodeFree (vnp); } else { vnp_prev = vnp; } vnp = vnp_next; } /* remove targets found twice */ target_list = ValNodeSort (target_list, SortVnpByChoiceAndPtrvalue); ValNodeUnique (&target_list, SortVnpByChoiceAndPtrvalue, ValNodeFree); return target_list; } static void ReportMissingTargets (ValNodeBlockPtr err_list, FieldTypePtr ft, CharPtr match_val, Int4 col_num, Int4 line_num) { CharPtr feat_name; FeatureFieldPtr field; CharPtr no_feat_fmt = "No %s feature for %s (column %d, line %d)"; CharPtr no_src_fmt = "No biosource for %s (column %d, line %d)"; CharPtr no_seq_fmt = "No sequence for %s (column %d, line %d)"; CharPtr no_cmt_fmt = "No structured comment for %s (column %d, line %d)"; CharPtr err_msg; RnaQualPtr rq; if (err_list == NULL || ft == NULL || match_val == NULL) return; switch (ft->choice) { case FieldType_source_qual: err_msg = (CharPtr) MemNew (sizeof (Char) * (StringLen (no_src_fmt) + StringLen (match_val) + 30)); sprintf (err_msg, no_src_fmt, match_val, col_num, line_num); ValNodeAddPointerToEnd (err_list, 0, err_msg); break; case FieldType_feature_field: field = (FeatureFieldPtr) ft->data.ptrvalue; if (field != NULL) { feat_name = GetFeatureNameFromFeatureType (field->type); err_msg = (CharPtr) MemNew (sizeof (Char) * (StringLen (no_feat_fmt) + StringLen (feat_name) + StringLen (match_val) + 30)); sprintf (err_msg, no_feat_fmt, feat_name, match_val, col_num, line_num); ValNodeAddPointerToEnd (err_list, 0, err_msg); } break; case FieldType_cds_gene_prot: field = FeatureFieldFromCDSGeneProtField (ft->data.intvalue); if (field != NULL) { feat_name = GetFeatureNameFromFeatureType (field->type); err_msg = (CharPtr) MemNew (sizeof (Char) * (StringLen (no_feat_fmt) + StringLen (feat_name) + StringLen (match_val) + 30)); sprintf (err_msg, no_feat_fmt, feat_name, match_val, col_num, line_num); ValNodeAddPointerToEnd (err_list, 0, err_msg); } field = FeatureFieldFree (field); break; case FieldType_rna_field: rq = (RnaQualPtr) ft->data.ptrvalue; if (rq != NULL) { feat_name = SummarizeRnaType (rq->type); err_msg = (CharPtr) MemNew (sizeof (Char) * (StringLen (no_feat_fmt) + StringLen (feat_name) + StringLen (match_val) + 30)); sprintf (err_msg, no_feat_fmt, feat_name, match_val, col_num, line_num); ValNodeAddPointerToEnd (err_list, 0, err_msg); } break; case FieldType_struc_comment_field: err_msg = (CharPtr) MemNew (sizeof (Char) * (StringLen (no_cmt_fmt) + StringLen (match_val) + 30)); sprintf (err_msg, no_cmt_fmt, match_val, col_num, line_num); ValNodeAddPointerToEnd (err_list, 0, err_msg); break; case FieldType_misc: err_msg = (CharPtr) MemNew (sizeof (Char) * (StringLen (no_seq_fmt) + StringLen (match_val) + 30)); sprintf (err_msg, no_seq_fmt, match_val, col_num, line_num); ValNodeAddPointerToEnd (err_list, 0, err_msg); break; } } static void ReportEmptyIDColumn (ValNodeBlockPtr vnb, Int4 line_num) { CharPtr err_msg; CharPtr missing_id_fmt = "No ID for line %d"; err_msg = (CharPtr) MemNew (sizeof (Char) * (StringLen (missing_id_fmt) + 15)); sprintf (err_msg, missing_id_fmt, line_num); ValNodeAddPointerToEnd (vnb, 0, err_msg); } static ValNodePtr FindMatchChoiceInLine (ValNodePtr val_vnp, ValNodePtr col_vnp) { TabColumnConfigPtr t; while (val_vnp != NULL && col_vnp != NULL) { t = (TabColumnConfigPtr) col_vnp->data.ptrvalue; if (t != NULL && t->match_type != NULL) { return val_vnp; } val_vnp = val_vnp->next; col_vnp = col_vnp->next; } return NULL; } NLM_EXTERN SeqFeatPtr GetmRNAForFeature (SeqFeatPtr sfp) { BioseqPtr pbsp; if (sfp == NULL) return NULL; if (sfp->data.choice == SEQFEAT_PROT) { pbsp = BioseqFindFromSeqLoc (sfp->location); sfp = SeqMgrGetCDSgivenProduct (pbsp, NULL); if (sfp == NULL) return NULL; } return GetmRNAforCDS (sfp); } NLM_EXTERN Boolean AdjustmRNAProductToMatchProteinProduct (SeqFeatPtr sfp) { SeqFeatPtr mrna; ProtRefPtr prp; RnaRefPtr rrp; if (sfp == NULL) { return FALSE; } if (sfp->data.choice == SEQFEAT_PROT) { prp = (ProtRefPtr) sfp->data.value.ptrvalue; } else if (sfp->data.choice == SEQFEAT_CDREGION) { prp = GetProtRefForFeature(sfp); } else { return FALSE; } mrna = GetmRNAForFeature (sfp); if (mrna == NULL) return FALSE; rrp = (RnaRefPtr) mrna->data.value.ptrvalue; if (rrp == NULL) { rrp = RnaRefNew(); mrna->data.value.ptrvalue = rrp; } rrp->ext.value.ptrvalue = MemFree (rrp->ext.value.ptrvalue); if (prp == NULL || prp->name == NULL || StringHasNoText (prp->name->data.ptrvalue)) { rrp->ext.choice = 0; } else { rrp->ext.choice = 1; rrp->ext.value.ptrvalue = StringSave (prp->name->data.ptrvalue); } return TRUE; } NLM_EXTERN Boolean IsFieldTypeCDSProduct (FieldTypePtr ft) { FeatureFieldPtr field; Boolean rval = FALSE; if (ft == NULL) return FALSE; if (ft->choice == FieldType_feature_field) { field = (FeatureFieldPtr) ft->data.ptrvalue; if (field != NULL && field->field != NULL && field->field->choice == FeatQualChoice_legal_qual) { if (field->type == Macro_feature_type_cds && field->field->data.intvalue == Feat_qual_legal_product) { rval = TRUE; } else if (field->type == Macro_feature_type_prot && field->field->data.intvalue == Feat_qual_legal_product) { rval = TRUE; } } } else if (ft->choice == FieldType_cds_gene_prot) { if (ft->data.intvalue == CDSGeneProt_field_prot_name) { rval = TRUE; } } return rval; } static Boolean IsFieldTypeProteinDesc (FieldTypePtr ft) { FeatureFieldPtr field; Boolean rval = FALSE; if (ft == NULL) return FALSE; if (ft->choice == FieldType_feature_field) { field = (FeatureFieldPtr) ft->data.ptrvalue; if (field != NULL && (field->type == Macro_feature_type_cds || field->type == Macro_feature_type_prot) && field->field != NULL && field->field->choice == FeatQualChoice_legal_qual && field->field->data.intvalue == Feat_qual_legal_description) { rval = TRUE; } } else if (ft->choice == FieldType_cds_gene_prot) { if (ft->data.intvalue == CDSGeneProt_field_prot_description) { rval = TRUE; } } return rval; } static Boolean IsFieldTypeGeneLocusTag (FieldTypePtr ft) { FeatureFieldPtr field; RnaQualPtr rq; Boolean rval = FALSE; if (ft == NULL) return FALSE; if (ft->choice == FieldType_feature_field) { field = (FeatureFieldPtr) ft->data.ptrvalue; if (field != NULL && field->type == Macro_feature_type_gene && field->field != NULL && field->field->choice == FeatQualChoice_legal_qual && field->field->data.intvalue == Feat_qual_legal_locus_tag) { rval = TRUE; } } else if (ft->choice == FieldType_cds_gene_prot) { if (ft->data.intvalue == CDSGeneProt_field_gene_locus_tag) { rval = TRUE; } } else if (ft->choice == FieldType_rna_field) { rq = (RnaQualPtr) ft->data.ptrvalue; if (rq != NULL && rq->field == Rna_field_gene_locus_tag) { rval = TRUE; } } return rval; } NLM_EXTERN ValNodePtr ValidateTabTableValues (ValNodePtr table, ValNodePtr columns) { ValNodePtr err_list = NULL; ValNodePtr line_vnp, col_vnp, val_vnp; Int4 line_num, col_num; TabColumnConfigPtr t; ValNodePtr locus_tag_values = NULL, bad_locus_tags = NULL, vnp, tmp_field, sq; CharPtr bad_format_fmt = "Locus tag %s has incorrect format"; CharPtr dup_fmt = "Locus tag %s appears in the table more than once"; CharPtr inconsistent_fmt = "Locus tag prefix for %s is inconsistent"; CharPtr bad_molinfo_fmt = "'%s' is not a valid value for this field"; CharPtr err_msg; MatchTypePtr match_type; if (table == NULL || columns == NULL) { return NULL; } match_type = FindMatchTypeInHeader (columns); if (match_type == NULL) { ValNodeAddPointer (&err_list, 0, StringSave ("No match type")); return err_list; } if (match_type->choice == eTableMatchAny && table->next != NULL) { if (table->next->next != NULL) { ValNodeAddPointer (&err_list, 0, StringSave ("Too many rows for apply to all")); } else { /* skip header */ table = table->next; } } for (line_vnp = table, line_num = 1; line_vnp != NULL; line_vnp = line_vnp->next, line_num++) { for (val_vnp = line_vnp->data.ptrvalue, col_vnp = columns, col_num = 1; val_vnp != NULL && col_vnp != NULL; val_vnp = val_vnp->next, col_vnp = col_vnp->next, col_num++) { t = (TabColumnConfigPtr) col_vnp->data.ptrvalue; if (t == NULL || t->match_type != NULL || val_vnp == NULL || StringHasNoText (val_vnp->data.ptrvalue)) { continue; } if (IsFieldTypeGeneLocusTag (t->field)) { ValNodeAddPointer (&locus_tag_values, 0, val_vnp->data.ptrvalue); } else if (t->field != NULL && t->field->choice == FieldType_molinfo_field && val_vnp->data.ptrvalue != NULL) { tmp_field = MolinfoFieldFromFieldAndStringValue (t->field->data.ptrvalue, val_vnp->data.ptrvalue); if (tmp_field == NULL) { err_msg =(CharPtr) MemNew (sizeof (Char) * (StringLen(bad_molinfo_fmt) + StringLen (val_vnp->data.ptrvalue))); sprintf (err_msg, bad_molinfo_fmt, val_vnp->data.ptrvalue); ValNodeAddPointer (&err_list, 0, err_msg); } tmp_field = MolinfoFieldFree(tmp_field); } else if (t->field != NULL && t->field->choice == FieldType_source_qual && (sq = (ValNodePtr)(t->field->data.ptrvalue)) != NULL && sq->choice == SourceQualValChoice_location) { tmp_field = SrcLocationFieldFromValue(val_vnp->data.ptrvalue); if (tmp_field == NULL) { err_msg =(CharPtr) MemNew (sizeof (Char) * (StringLen(bad_molinfo_fmt) + StringLen (val_vnp->data.ptrvalue))); sprintf (err_msg, bad_molinfo_fmt, val_vnp->data.ptrvalue); ValNodeAddPointer (&err_list, 0, err_msg); } tmp_field = FieldTypeFree (tmp_field); } } } bad_locus_tags = FindBadLocusTagsInList (locus_tag_values); for (vnp = bad_locus_tags; vnp != NULL; vnp = vnp->next) { switch (vnp->choice) { case eLocusTagErrorBadFormat: err_msg = (CharPtr) MemNew (sizeof (Char) * (StringLen (bad_format_fmt) + StringLen (vnp->data.ptrvalue))); sprintf (err_msg, bad_format_fmt, vnp->data.ptrvalue); ValNodeAddPointer (&err_list, 0, err_msg); break; case eLocusTagErrorDuplicate: err_msg = (CharPtr) MemNew (sizeof (Char) * (StringLen (dup_fmt) + StringLen (vnp->data.ptrvalue))); sprintf (err_msg, dup_fmt, vnp->data.ptrvalue); ValNodeAddPointer (&err_list, 0, err_msg); break; case eLocusTagErrorInconsistentPrefix: err_msg = (CharPtr) MemNew (sizeof (Char) * (StringLen (inconsistent_fmt) + StringLen (vnp->data.ptrvalue))); sprintf (err_msg, inconsistent_fmt, vnp->data.ptrvalue); ValNodeAddPointer (&err_list, 0, err_msg); break; } } locus_tag_values = ValNodeFree (locus_tag_values); return err_list; } NLM_EXTERN ValNodePtr GetSequenceListsForMatchTypeInTabTable (SeqEntryPtr sep, ValNodePtr table, Int4 col, MatchTypePtr match_type, ValNodePtr PNTR p_err_list) { ValNodePtr vnp_row, vnp; ValNodePtr sequence_lists = NULL, match_list, target_list; Uint2 entityID; Int4 num, line; CharPtr no_match_fmt = "No match for %s, line %d"; CharPtr no_match_txt_fmt = "No match text for line %d"; CharPtr msg; BioseqSearchIndexPtr index = NULL; if (sep == NULL || table == NULL || match_type == NULL || col < 0) { return NULL; } entityID = SeqMgrGetEntityIDForSeqEntry (sep); index = BuildIDStringsList(sep); if (match_type->choice == eTableMatchAny && table->next != NULL) { /* skip first row, must contain header */ table = table->next; } for (vnp_row = table, line = 1; vnp_row != NULL; vnp_row = vnp_row->next, line++) { vnp = vnp_row->data.ptrvalue; num = 0; while (vnp != NULL && num < col) { vnp = vnp->next; num++; } if (vnp == NULL || StringHasNoText (vnp->data.ptrvalue)) { ValNodeAddPointer (&sequence_lists, 0, NULL); if (p_err_list != NULL) { msg = (CharPtr) MemNew (sizeof (Char) * (StringLen (no_match_txt_fmt) + 15)); sprintf (msg, no_match_txt_fmt, line); ValNodeAddPointer (p_err_list, 0, msg); } } else { match_list = FindMatchForRowEx (match_type, vnp->data.ptrvalue, entityID, sep, index); target_list = GetSequenceListForRowAndColumn (match_type, match_list); match_list = ValNodeFree (match_list); ValNodeAddPointer (&sequence_lists, 0, target_list); if (target_list == NULL && p_err_list != NULL) { msg = (CharPtr) MemNew (sizeof (Char) * (StringLen (no_match_fmt) + StringLen (vnp->data.ptrvalue) + 15)); sprintf (msg, no_match_fmt, vnp->data.ptrvalue, line); ValNodeAddPointer (p_err_list, 0, msg); } } } index = BioseqSearchIndexFree (index); return sequence_lists; } NLM_EXTERN ValNodePtr FreeSequenceLists (ValNodePtr lists) { ValNodePtr vnp; for (vnp = lists; vnp != NULL; vnp = vnp->next) { vnp->data.ptrvalue = ValNodeFree (vnp->data.ptrvalue); } lists = ValNodeFree (lists); return lists; } NLM_EXTERN ValNodePtr GetBioseqMatchesForSequenceIDs (ValNodePtr query_list, Uint1 match_location, SeqEntryPtr sep) { ValNodePtr response_list = NULL, vnp, single_list, vnp_t; BioseqSearchIndexPtr index = NULL; BioseqPtr bsp; ValNodeBlock thisid_index; BioseqSearchItemPtr si; Char num_buf[15]; CharPtr match_str; index = BuildIDStringsList(sep); for (vnp = query_list; vnp != NULL; vnp = vnp->next) { InitValNodeBlock (&thisid_index, NULL); BuildIdStringsListForIdList (vnp->data.ptrvalue, NULL, &thisid_index); bsp = NULL; for (vnp_t = thisid_index.head; vnp_t != NULL && bsp == NULL; vnp_t = vnp_t->next) { si = (BioseqSearchItemPtr) vnp_t->data.ptrvalue; if (si->num > 0) { sprintf (num_buf, "%d", si->num); match_str = num_buf; } else { match_str = si->str; } if (match_location == String_location_equals) { bsp = FindStringInIdListIndex (match_str, index); } else { single_list = FindListInIdListIndex (match_location, match_str, index); if (single_list != NULL && single_list->next == NULL) { bsp = single_list->data.ptrvalue; } single_list = ValNodeFree (single_list); } } thisid_index.head = BioseqSearchItemListFree(thisid_index.head); ValNodeAddPointer (&response_list, OBJ_BIOSEQ, bsp); } index = BioseqSearchIndexFree (index); return response_list; } static ValNodePtr ReportTableSummaryLine (Int4 err_lines, Int4 total_lines, CharPtr fmt) { CharPtr str; ValNodePtr vnp; str = (CharPtr) MemNew (sizeof (Char) + (StringLen (fmt) + 30)); sprintf (str, fmt, err_lines, total_lines); vnp = ValNodeNew (NULL); vnp->data.ptrvalue = str; return vnp; } NLM_EXTERN ValNodePtr GetObjectTableForTabTable (SeqEntryPtr sep, ValNodePtr table, ValNodePtr columns, ValNodePtr PNTR p_err_list) { ValNodeBlock vnb; ValNodePtr line_vnp, val_vnp, col_vnp, err_vnp; ValNodePtr obj_table = NULL, obj_row, last_obj = NULL, tmp, last = NULL; Int4 line_num = 1, col_num; Uint2 entityID; ValNodePtr match_list, match_choice, target_list; TabColumnConfigPtr t; CharPtr err_msg; CharPtr no_match_fmt = "No match for %s, line %d"; MatchTypePtr match_type; Int4 num_empty = 0, num_missing = 0, num_no_targets = 0; BioseqSearchIndexPtr index = NULL; vnb.head = NULL; vnb.tail = NULL; if (sep == NULL) { ValNodeAddPointerToEnd (&vnb, 0, StringSave ("No SeqEntry")); } if (table == NULL) { ValNodeAddPointerToEnd (&vnb, 0, StringSave ("No table")); } if (columns == NULL) { ValNodeAddPointerToEnd (&vnb, 0, StringSave ("No column information")); } match_type = FindMatchTypeInHeader (columns); if (match_type == NULL) { ValNodeAddPointerToEnd (&vnb, 0, StringSave ("No Match Type")); } else if (match_type->choice == eTableMatchAny && table != NULL && table->next != NULL) { if (table->next->next != NULL) { ValNodeAddPointerToEnd (&vnb, 0, StringSave ("Too many rows for apply to all")); } else { /* skip header */ table = table->next; } } if (vnb.head != NULL) { if (p_err_list == NULL) { vnb.head = ValNodeFreeData (vnb.head); } else { *p_err_list = vnb.head; } return NULL; } entityID = SeqMgrGetEntityIDForSeqEntry (sep); index = BuildIDStringsList(sep); last = NULL; for (line_vnp = table, line_num = 1; line_vnp != NULL; line_vnp = line_vnp->next, line_num++) { obj_row = NULL; match_choice = FindMatchChoiceInLine (line_vnp->data.ptrvalue, columns); if (match_choice == NULL || StringHasNoText (match_choice->data.ptrvalue)) { ReportEmptyIDColumn (&vnb, line_num); num_empty++; } else { match_list = FindMatchForRowEx (match_type, match_choice->data.ptrvalue, entityID, sep, index); if (match_list == NULL) { err_msg = (CharPtr) MemNew (sizeof (Char) * (StringLen (no_match_fmt) + StringLen (match_choice->data.ptrvalue) + 15)); sprintf (err_msg, no_match_fmt, match_choice->data.ptrvalue, line_num); ValNodeAddPointerToEnd (&vnb, 0, err_msg); num_missing ++; } else { for (val_vnp = line_vnp->data.ptrvalue, col_vnp = columns, col_num = 1; col_vnp != NULL; col_vnp = col_vnp->next, col_num++) { target_list = NULL; t = (TabColumnConfigPtr) col_vnp->data.ptrvalue; if (t == NULL || t->match_type != NULL || (t->skip_blank && (val_vnp == NULL || StringHasNoText (val_vnp->data.ptrvalue)))) { /* no targets */ } else { target_list = GetTargetListForRowAndColumn (match_type, match_list, t->field, t->constraint); if (target_list == NULL) { ReportMissingTargets (&vnb, t->field, match_choice->data.ptrvalue, col_num, line_num); num_no_targets++; } } ValNodeAddPointer (&obj_row, 0, target_list); if (val_vnp != NULL) { val_vnp = val_vnp->next; } } } match_list = ValNodeFree (match_list); } tmp = ValNodeAddPointer (&last_obj, 0, obj_row); if (obj_table == NULL) { obj_table = last_obj; } last_obj = tmp; } match_type = MatchTypeFree (match_type); index = BioseqSearchIndexFree (index); if (vnb.head != NULL) { if (num_empty > 0) { err_vnp = ReportTableSummaryLine (num_empty, line_num - 1, "%d lines out of %d have no ID value"); err_vnp->next = vnb.head; vnb.head = err_vnp; } if (num_no_targets > 0) { err_vnp = ReportTableSummaryLine (num_no_targets, line_num - 1, "%d lines out of %d have no targets"); err_vnp->next = vnb.head; vnb.head = err_vnp; } if (num_missing > 0) { err_vnp = ReportTableSummaryLine (num_missing, line_num - 1, "%d lines out of %d have no match"); err_vnp->next = vnb.head; vnb.head = err_vnp; } if (p_err_list == NULL) { vnb.head = ValNodeFreeData (vnb.head); } else { *p_err_list = vnb.head; } } return obj_table; } NLM_EXTERN ValNodePtr FreeObjectTableForTabTable (ValNodePtr table) { ValNodePtr vnp_next, vnp_row, vnp_row_next; while (table != NULL) { vnp_next = table->next; table->next = NULL; vnp_row = table->data.ptrvalue; while (vnp_row != NULL) { vnp_row_next = vnp_row->next; vnp_row->next = NULL; vnp_row->data.ptrvalue = ValNodeFree (vnp_row->data.ptrvalue); vnp_row = ValNodeFree (vnp_row); vnp_row = vnp_row_next; } table = ValNodeFree (table); table = vnp_next; } return table; } typedef struct countfeat { Uint1 featdef; Int4 num; } CountFeatData, PNTR CountFeatPtr; static void CountFeaturesCallback (SeqFeatPtr sfp, Pointer userdata) { CountFeatPtr p; if (sfp == NULL || userdata == NULL) return; p = (CountFeatPtr) userdata; if (sfp->idx.subtype == p->featdef) { p->num++; } } static void CountBioSourceDescriptorsCallback (SeqDescrPtr sdp, Pointer userdata) { Int4Ptr p; p = (Int4Ptr) userdata; if (sdp != NULL && p != NULL && sdp->choice == Seq_descr_source) { (*p)++; } } static void CountPubDescriptorsCallback (SeqDescrPtr sdp, Pointer userdata) { Int4Ptr p; p = (Int4Ptr) userdata; if (sdp != NULL && p != NULL && sdp->choice == Seq_descr_pub) { (*p)++; } } static ValNodePtr CountObjectsForColumnFields (SeqEntryPtr sep, ValNodePtr columns) { ValNodePtr count_list = NULL, vnp; TabColumnConfigPtr t; CountFeatData d; FeatureFieldPtr f; Int4 num; Uint1 featdef = 0; ValNodePtr tmp_list = NULL; d.featdef = 0; d.num = 0; for (vnp = columns; vnp != NULL; vnp = vnp->next) { num = 0; t = (TabColumnConfigPtr) vnp->data.ptrvalue; if (t != NULL && t->match_type == NULL && t->field != NULL) { switch (t->field->choice) { case FieldType_source_qual: if (featdef != FEATDEF_BIOSRC) { d.featdef = FEATDEF_BIOSRC; d.num = 0; VisitFeaturesInSep (sep, &d, CountFeaturesCallback); VisitDescriptorsInSep (sep, &(d.num), CountBioSourceDescriptorsCallback); } num = d.num; break; case FieldType_feature_field: f = (FeatureFieldPtr) t->field->data.ptrvalue; if (f != NULL) { featdef = GetFeatdefFromFeatureType(f->type); if (featdef != d.featdef) { d.featdef = featdef; d.num = 0; VisitFeaturesInSep (sep, &d, CountFeaturesCallback); } num = d.num; } break; case FieldType_cds_gene_prot: f = FeatureFieldFromCDSGeneProtField (t->field->data.intvalue); if (f != NULL) { featdef = GetFeatdefFromFeatureType(f->type); if (featdef != d.featdef) { d.featdef = featdef; d.num = 0; VisitFeaturesInSep (sep, &d, CountFeaturesCallback); } num = d.num; } f = FeatureFieldFree (f); break; case FieldType_rna_field: f = FeatureFieldFromRnaQual (t->field->data.ptrvalue); if (f != NULL) { featdef = GetFeatdefFromFeatureType(f->type); if (featdef != d.featdef) { d.featdef = featdef; d.num = 0; VisitFeaturesInSep (sep, &d, CountFeaturesCallback); } num = d.num; } f = FeatureFieldFree (f); break; case FieldType_pub: d.featdef = FEATDEF_PUB; d.num = 0; VisitFeaturesInSep (sep, &d, CountFeaturesCallback); VisitDescriptorsInSep (sep, &(d.num), CountPubDescriptorsCallback); num = d.num; break; case FieldType_struc_comment_field: VisitDescriptorsInSep (sep, &tmp_list, CollectStructuredCommentsCallback); num = ValNodeLen (tmp_list); tmp_list = ValNodeFree (tmp_list); break; case FieldType_dblink: tmp_list = CollectDBLinkDescriptors (sep); num = ValNodeLen (tmp_list); tmp_list = ValNodeFree (tmp_list); break; case FieldType_misc: if (t->field->data.intvalue == Misc_field_genome_project_id) { /* VisitBioseqsInSep (sep, &tmp_list, CollectNucBioseqCallback); */ tmp_list = CollectNucBioseqs (sep); num = ValNodeLen (tmp_list); tmp_list = ValNodeFree (tmp_list); } else if (t->field->data.intvalue == Misc_field_comment_descriptor) { tmp_list = CollectCommentDescriptors (sep); num = ValNodeLen (tmp_list); tmp_list = ValNodeFree (tmp_list); } else if (t->field->data.intvalue == Misc_field_defline) { tmp_list = CollectDeflineDescriptors (sep); num = ValNodeLen (tmp_list); tmp_list = ValNodeFree (tmp_list); } else if (t->field->data.intvalue == Misc_field_keyword) { tmp_list = CollectGenbankBlockDescriptors (sep); num = ValNodeLen (tmp_list); tmp_list = ValNodeFree (tmp_list); } break; case FieldType_molinfo_field: VisitBioseqsInSep (sep, &tmp_list, CollectBioseqCallback); num = ValNodeLen (tmp_list); tmp_list = ValNodeFree (tmp_list); break; } } ValNodeAddInt (&count_list, 0, num); } return count_list; } NLM_EXTERN ValNodePtr ApplyTableValuesToObjectTable (SeqEntryPtr sep, ValNodePtr table, ValNodePtr columns, ValNodePtr obj_table) { ValNodePtr val_line_vnp, obj_line_vnp; ValNodePtr val_vnp, obj_vnp, col_vnp; ValNodePtr target_vnp, tmp_field; TabColumnConfigPtr t; CharPtr val, qual_name; ValNodePtr err_list = NULL, count_list, count_affected_list = NULL, count_vnp, count_tot_vnp, sq; CharPtr err_msg; CharPtr bad_col_val_fmt = "Did not set value for column %d, line %d"; CharPtr num_affected_fmt = "%d fields affected"; CharPtr col_num_affected_fmt = "For %s (column %d), %d items were affected out of %d total"; Int4 num_fields_affected = 0, col_num, line_num, num_this_column; Boolean success; ValNodePtr count_msg = NULL; MatchTypePtr match_type; count_list = CountObjectsForColumnFields (sep, columns); match_type = FindMatchTypeInHeader (columns); if (match_type->choice == eTableMatchAny && table->next != NULL) { /* skip first row, must contain header */ table = table->next; } for (val_line_vnp = table, obj_line_vnp = obj_table, line_num = 1; val_line_vnp != NULL && obj_line_vnp != NULL; val_line_vnp = val_line_vnp->next, obj_line_vnp = obj_line_vnp->next, line_num++) { val_vnp = val_line_vnp->data.ptrvalue; obj_vnp = obj_line_vnp->data.ptrvalue; col_vnp = columns; col_num = 1; count_vnp = count_affected_list; while (obj_vnp != NULL && col_vnp != NULL) { num_this_column = 0; if (obj_vnp->data.ptrvalue != NULL) { t = (TabColumnConfigPtr) col_vnp->data.ptrvalue; if (t == NULL || t->match_type != NULL || (t->skip_blank && (val_vnp == NULL || StringHasNoText (val_vnp->data.ptrvalue)))) { /* ignore column or skip blank value */ } else { if (val_vnp == NULL || val_vnp->data.ptrvalue == NULL) { val = ""; } else { val = val_vnp->data.ptrvalue; } for (target_vnp = obj_vnp->data.ptrvalue; target_vnp != NULL; target_vnp = target_vnp->next) { if (val[0] == 0) { success = RemoveFieldValueForObject (target_vnp->choice, target_vnp->data.ptrvalue, t->field, NULL); } else { if (t->field != NULL && t->field->choice == FieldType_molinfo_field) { /* adjust molinfo fields */ success = FALSE; if (target_vnp->choice == OBJ_BIOSEQ) { tmp_field = MolinfoFieldFromFieldAndStringValue (t->field->data.ptrvalue, val_vnp->data.ptrvalue); if (tmp_field != NULL) { success = SetSequenceQualOnBioseq ((BioseqPtr) target_vnp->data.ptrvalue, tmp_field); tmp_field = MolinfoFieldFree(tmp_field); } } } else if (t->field != NULL && t->field->choice == FieldType_source_qual && (sq = (ValNodePtr)(t->field->data.ptrvalue)) != NULL && sq->choice == SourceQualValChoice_location) { /* adjust for source location */ success = FALSE; tmp_field = SrcLocationFieldFromValue(val_vnp->data.ptrvalue); if (tmp_field != NULL) { success = SetFieldValueForObject (target_vnp->choice, target_vnp->data.ptrvalue, tmp_field, NULL, val_vnp->data.ptrvalue, t->existing_text); tmp_field = FieldTypeFree (tmp_field); } } else { success = SetFieldValueForObject (target_vnp->choice, target_vnp->data.ptrvalue, t->field, NULL, val_vnp->data.ptrvalue, t->existing_text); } } if (success) { num_fields_affected++; num_this_column++; if (t->match_mrna && IsFieldTypeCDSProduct (t->field) && target_vnp->choice == OBJ_SEQFEAT) { if (AdjustmRNAProductToMatchProteinProduct (target_vnp->data.ptrvalue)) { num_fields_affected++; } } } else { err_msg = (CharPtr) MemNew (sizeof (Char) * (StringLen (bad_col_val_fmt) + 30)); sprintf (err_msg, bad_col_val_fmt, col_num, line_num); ValNodeAddPointer (&err_list, 0, err_msg); } } } } if (val_vnp != NULL) { val_vnp = val_vnp->next; } if (count_vnp == NULL) { ValNodeAddInt (&count_affected_list, 0, num_this_column); } else { count_vnp->data.intvalue += num_this_column; count_vnp = count_vnp->next; } obj_vnp = obj_vnp->next; col_vnp = col_vnp->next; col_num++; } } /* put message at top of list for number of fields affected */ err_msg = (CharPtr) MemNew (sizeof (Char) * (StringLen (num_affected_fmt) + 15)); sprintf (err_msg, num_affected_fmt, num_fields_affected); ValNodeAddPointer (&count_msg, 0, err_msg); /* if any affected, list number of fields per column, and the total in the record */ if (num_fields_affected > 0) { for (count_vnp = count_affected_list, count_tot_vnp = count_list, col_vnp = columns, col_num = 1; count_vnp != NULL && count_tot_vnp != NULL && col_vnp != NULL; count_vnp = count_vnp->next, count_tot_vnp = count_tot_vnp->next, col_vnp = col_vnp->next, col_num++) { t = (TabColumnConfigPtr) col_vnp->data.ptrvalue; if (t != NULL && t->match_type == NULL) { qual_name = SummarizeFieldType (t->field); err_msg = (CharPtr) MemNew (sizeof (Char) * (StringLen (col_num_affected_fmt) + StringLen (qual_name) + 45)); sprintf (err_msg, col_num_affected_fmt, qual_name, col_num, count_vnp->data.intvalue, count_tot_vnp->data.intvalue); ValNodeAddPointer (&count_msg, 0, err_msg); qual_name = MemFree (qual_name); } } } ValNodeLink (&count_msg, err_list); count_list = ValNodeFree (count_list); count_affected_list = ValNodeFree (count_affected_list); return count_msg; } static ValNodePtr FindRowsForObjectInObjectTable (ValNodePtr obj_table, Int4 column, Uint1 choice, Pointer data) { Int4 col_num, row_num; ValNodePtr line_vnp, col_vnp, obj_vnp; ValNodePtr match_rows = NULL; if (obj_table == NULL || column < 0) { return NULL; } for (line_vnp = obj_table, row_num = 0; line_vnp != NULL; line_vnp = line_vnp->next, row_num++) { col_vnp = line_vnp->data.ptrvalue; col_num = 0; while (col_num < column && col_vnp != NULL) { col_vnp = col_vnp->next; col_num++; } if (col_vnp != NULL) { obj_vnp = col_vnp->data.ptrvalue; while (obj_vnp != NULL && (obj_vnp->choice != choice || obj_vnp->data.ptrvalue != data)) { obj_vnp = obj_vnp->next; } if (obj_vnp != NULL) { ValNodeAddInt (&match_rows, 0, row_num); } } } return match_rows; } static CharPtr FormatMultipleDestinationErrorMessage (Int4 col_num, ValNodePtr match_rows) { CharPtr multi_fmt = "Multiple rows apply to the same object for column %d. Matching rows:"; CharPtr err_msg; Char buf[16]; ValNodePtr vnp; err_msg = (CharPtr) MemNew (sizeof (Char) * (StringLen (multi_fmt) + 30 + 15 * ValNodeLen (match_rows))); sprintf (err_msg, multi_fmt, col_num); for (vnp = match_rows; vnp != NULL; vnp = vnp->next) { sprintf (buf, "%d", vnp->data.intvalue + 1); StringCat (err_msg, buf); if (vnp->next != NULL) { StringCat (err_msg, ","); } } return err_msg; } NLM_EXTERN ValNodePtr CheckObjTableForRowsThatApplyToTheSameDestination (ValNodePtr obj_table) { Int4 col_num; ValNodeBlock vnb, err_list; ValNodePtr line_vnp, col_vnp, obj_vnp, vnp; ValNodePtr col_obj_list; Boolean any_column_values_left; ValNodePtr match_rows; vnb.head = NULL; vnb.tail = NULL; err_list.head = NULL; err_list.tail = NULL; /* now, for each row, get pointer to first column */ for (line_vnp = obj_table; line_vnp != NULL; line_vnp = line_vnp->next) { if (line_vnp->data.ptrvalue != NULL) { ValNodeAddPointerToEnd (&vnb, 0, line_vnp->data.ptrvalue); } } /* now for each column, make a list of all features in the column, then sort to see if there are duplicates */ any_column_values_left = TRUE; col_num = 1; while (any_column_values_left) { any_column_values_left = FALSE; col_obj_list = NULL; for (vnp = vnb.head; vnp != NULL; vnp = vnp->next) { col_vnp = vnp->data.ptrvalue; if (col_vnp != NULL) { obj_vnp = col_vnp->data.ptrvalue; ValNodeLink (&col_obj_list, ValNodeCopyPtr (obj_vnp)); vnp->data.ptrvalue = col_vnp->next; any_column_values_left = TRUE; } } if (col_obj_list != NULL) { col_obj_list = ValNodeSort (col_obj_list, SortVnpByChoiceAndPtrvalue); for (vnp = col_obj_list; vnp != NULL && vnp->next != NULL; vnp = vnp->next) { if (vnp->choice == vnp->next->choice && vnp->data.ptrvalue == vnp->next->data.ptrvalue) { match_rows = FindRowsForObjectInObjectTable (obj_table, col_num - 1, vnp->choice, vnp->data.ptrvalue); /* report rows with matches */ ValNodeAddPointerToEnd (&err_list, col_num, FormatMultipleDestinationErrorMessage (col_num, match_rows)); match_rows = ValNodeFree (match_rows); /* skip over the cluster of matches */ while (vnp->next != NULL && vnp->choice == vnp->next->choice) { vnp = vnp->next; } } } col_obj_list = ValNodeFree (col_obj_list); } col_num++; } vnb.head = ValNodeFree (vnb.head); return err_list.head; } static CharPtr GetMatchTextForLine (ValNodePtr values, ValNodePtr columns) { ValNodePtr val_vnp, col_vnp; CharPtr match_txt = NULL; TabColumnConfigPtr t; for (val_vnp = values, col_vnp = columns; val_vnp != NULL && col_vnp != NULL; val_vnp = val_vnp->next, col_vnp = col_vnp->next) { t = col_vnp->data.ptrvalue; if (t != NULL && t->match_type != NULL) { match_txt = val_vnp->data.ptrvalue; break; } } return match_txt; } /* Note - when creating error messages, mark summary messages with choice = 1 */ NLM_EXTERN ValNodePtr CheckObjTableForExistingText (SeqEntryPtr sep, ValNodePtr table, ValNodePtr columns, ValNodePtr obj_table) { ValNodeBlock vnb; ValNodePtr val_line_vnp, obj_line_vnp; ValNodePtr val_vnp, obj_vnp, col_vnp; ValNodePtr col_tot = NULL, col_tot_vnp; Int4 line_num = 1, col_num, num_existing_text = 0; Uint2 entityID; TabColumnConfigPtr t; CharPtr err_msg, str, qual_name, val; CharPtr already_has_val_fmt = "%s\t%s\t%s\t%d\t%s\t%d"; CharPtr num_existing_text_fmt = "%d fields already have text.\nID\tOld Value\tReplacement\tColumn\tQualifier\tLine"; CharPtr mrna_warn_fmt = "%d coding region features have mRNAs, but %d do not."; CharPtr col_tot_fmt = "For column %d, %d out of %d fields already have text."; ValNodePtr target_list, feat_vnp; Int4 num_with_mrna = 0, num_without_mrna = 0; CharPtr match_txt; CharPtr new_val; MatchTypePtr match_type; vnb.head = NULL; vnb.tail = NULL; if (sep == NULL) { ValNodeAddPointerToEnd (&vnb, 1, StringSave ("No SeqEntry")); } if (table == NULL) { ValNodeAddPointerToEnd (&vnb, 1, StringSave ("No table")); } if (columns == NULL) { ValNodeAddPointerToEnd (&vnb, 1, StringSave ("No column information")); } match_type = FindMatchTypeInHeader (columns); if (match_type == NULL) { ValNodeAddPointerToEnd (&vnb, 1, StringSave ("Must have match type")); } else if (table != NULL && match_type->choice == eTableMatchAny && table->next != NULL) { if (table->next->next != NULL) { ValNodeAddPointerToEnd (&vnb, 1, StringSave ("Table has too many rows for apply to all")); } else { /* skip first row, must contain header */ table = table->next; } } if (vnb.head != NULL) { return vnb.head; } entityID = SeqMgrGetEntityIDForSeqEntry (sep); for (val_line_vnp = table, obj_line_vnp = obj_table, line_num = 1; val_line_vnp != NULL && obj_line_vnp != NULL; val_line_vnp = val_line_vnp->next, obj_line_vnp = obj_line_vnp->next, line_num++) { val_vnp = val_line_vnp->data.ptrvalue; obj_vnp = obj_line_vnp->data.ptrvalue; col_vnp = columns; if (val_vnp == NULL || obj_vnp == NULL) continue; col_num = 1; col_tot_vnp = col_tot; if (col_tot_vnp == NULL) { col_tot_vnp = ValNodeAddInt (&col_tot, 0, 0); } while (obj_vnp != NULL && col_vnp != NULL) { if (obj_vnp->data.ptrvalue != NULL) { t = (TabColumnConfigPtr) col_vnp->data.ptrvalue; if (t == NULL || t->match_type != NULL || (t->skip_blank && (val_vnp == NULL || StringHasNoText (val_vnp->data.ptrvalue)))) { /* ignore column or skip blank value */ } else { target_list = obj_vnp->data.ptrvalue; if (val_vnp == NULL || val_vnp->data.ptrvalue == NULL) { val = ""; } else { val = val_vnp->data.ptrvalue; } for (feat_vnp = target_list; feat_vnp != NULL; feat_vnp = feat_vnp->next) { /* check for existing text */ str = GetFieldValueForObject (feat_vnp->choice, feat_vnp->data.ptrvalue, t->field, NULL); if (!StringHasNoText (str)) { qual_name = SummarizeFieldType (t->field); match_txt = GetMatchTextForLine (val_line_vnp->data.ptrvalue, columns); if (match_txt == NULL) { match_txt = ""; } new_val = StringSave (str); SetStringValue (&new_val, val, t->existing_text); err_msg = (CharPtr) MemNew (sizeof (Char) * (StringLen (already_has_val_fmt) + StringLen (match_txt) + StringLen (str) + StringLen (new_val) + StringLen (qual_name) + 30)); sprintf (err_msg, already_has_val_fmt, match_txt, str, new_val, col_num, qual_name, line_num); ValNodeAddPointerToEnd (&vnb, 0, err_msg); num_existing_text ++; new_val = MemFree (new_val); col_tot_vnp->data.intvalue ++; } str = MemFree (str); /* check for mrna if changing CDS product */ if (IsFieldTypeCDSProduct (t->field) && feat_vnp->choice == OBJ_SEQFEAT) { if (GetmRNAForFeature (feat_vnp->data.ptrvalue) != NULL) { num_with_mrna++; } else { num_without_mrna++; } } } } } if (val_vnp != NULL) { val_vnp = val_vnp->next; } obj_vnp = obj_vnp->next; col_vnp = col_vnp->next; col_num++; col_tot_vnp = col_tot_vnp->next; if (col_tot_vnp == NULL) { col_tot_vnp = ValNodeAddInt (&col_tot, 0, 0); } } } if (num_existing_text > 0) { for (col_tot_vnp = col_tot, col_num = 1; col_tot_vnp != NULL; col_tot_vnp = col_tot_vnp->next, col_num++) { if (col_tot_vnp->data.intvalue > 0) { err_msg = (CharPtr) MemNew (sizeof (Char) * (StringLen (col_tot_fmt) + 45)); sprintf (err_msg, col_tot_fmt, col_num, col_tot_vnp->data.intvalue, line_num - 1); ValNodeAddPointerToEnd (&vnb, 1, err_msg); } } err_msg = (CharPtr) MemNew (sizeof (Char) * (StringLen (num_existing_text_fmt) + 15)); sprintf (err_msg, num_existing_text_fmt, num_existing_text); ValNodeAddPointerToFront (&vnb, 0, err_msg); } col_tot = ValNodeFree (col_tot); if (num_with_mrna > 0 && num_without_mrna > 0) { err_msg = (CharPtr) MemNew (sizeof (Char) * (StringLen (mrna_warn_fmt) + 30)); sprintf (err_msg, mrna_warn_fmt, num_with_mrna, num_without_mrna); ValNodeAddPointerToFront (&vnb, 1, err_msg); } return vnb.head; } NLM_EXTERN ValNodePtr ApplyTableToFeatures (SeqEntryPtr sep, ValNodePtr table, ValNodePtr columns) { ValNodeBlock vnb; ValNodePtr line_vnp, val_vnp, col_vnp; Int4 line_num = 1, col_num; Uint2 entityID; ValNodePtr match_list, match_choice, target_list, feat_vnp; TabColumnConfigPtr t; CharPtr err_msg; CharPtr no_match_fmt = "No match for %s, line %d"; CharPtr bad_col_val_fmt = "Did not set value for column %d, line %d"; CharPtr num_affected_fmt = "%d fields affected"; Int4 num_fields_affected = 0; CharPtr val; Boolean success; MatchTypePtr match_type; vnb.head = NULL; vnb.tail = NULL; if (sep == NULL) { ValNodeAddPointerToEnd (&vnb, 0, StringSave ("No SeqEntry")); } if (table == NULL) { ValNodeAddPointerToEnd (&vnb, 0, StringSave ("No table")); } if (columns == NULL) { ValNodeAddPointerToEnd (&vnb, 0, StringSave ("No column information")); } if (vnb.head != NULL) { return vnb.head; } match_type = FindMatchTypeInHeader (columns); if (match_type->choice == eTableMatchAny && table != NULL && table->next != NULL) { /* skip first row, must contain header */ table = table->next; } entityID = SeqMgrGetEntityIDForSeqEntry (sep); for (line_vnp = table, line_num = 1; line_vnp != NULL; line_vnp = line_vnp->next, line_num++) { match_choice = FindMatchChoiceInLine (line_vnp->data.ptrvalue, columns); if (match_choice == NULL || StringHasNoText (match_choice->data.ptrvalue)) { ReportEmptyIDColumn (&vnb, line_num); } else { match_list = FindMatchForRow (match_type, match_choice->data.ptrvalue, entityID, sep); if (match_list == NULL) { err_msg = (CharPtr) MemNew (sizeof (Char) * (StringLen (no_match_fmt) + StringLen (match_choice->data.ptrvalue) + 15)); sprintf (err_msg, no_match_fmt, match_choice->data.ptrvalue, line_num); ValNodeAddPointerToEnd (&vnb, 0, err_msg); } else { for (val_vnp = line_vnp->data.ptrvalue, col_vnp = columns, col_num = 1; col_vnp != NULL; col_vnp = col_vnp->next, col_num++) { t = (TabColumnConfigPtr) col_vnp->data.ptrvalue; if (t == NULL || t->match_type != NULL || (t->skip_blank && (val_vnp == NULL || StringHasNoText (val_vnp->data.ptrvalue)))) { if (val_vnp != NULL) { val_vnp = val_vnp->next; } continue; } target_list = GetTargetListForRowAndColumn (match_type, match_list, t->field, t->constraint); if (target_list == NULL) { ReportMissingTargets (&vnb, t->field, match_choice->data.ptrvalue, col_num, line_num); } else { if (val_vnp == NULL || val_vnp->data.ptrvalue == NULL) { val = ""; } else { val = val_vnp->data.ptrvalue; } for (feat_vnp = target_list; feat_vnp != NULL; feat_vnp = feat_vnp->next) { if (val[0] == 0) { success = RemoveFieldValueForObject (feat_vnp->choice, feat_vnp->data.ptrvalue, t->field, NULL); } else { success = SetFieldValueForObject (feat_vnp->choice, feat_vnp->data.ptrvalue, t->field, NULL, val_vnp->data.ptrvalue, t->existing_text); } if (success) { num_fields_affected++; if (t->match_mrna && IsFieldTypeCDSProduct (t->field) && feat_vnp->choice == OBJ_SEQFEAT) { if (AdjustmRNAProductToMatchProteinProduct (feat_vnp->data.ptrvalue)) { num_fields_affected++; } } } else { err_msg = (CharPtr) MemNew (sizeof (Char) * (StringLen (bad_col_val_fmt) + 30)); sprintf (err_msg, bad_col_val_fmt, col_num, line_num); ValNodeAddPointerToEnd (&vnb, 0, err_msg); } } } target_list = ValNodeFree (target_list); if (val_vnp != NULL) { val_vnp = val_vnp->next; } } } match_list = ValNodeFree (match_list); } } err_msg = (CharPtr) MemNew (sizeof (Char) * (StringLen (num_affected_fmt) + 15)); sprintf (err_msg, num_affected_fmt, num_fields_affected); ValNodeAddPointerToFront (&vnb, 0, err_msg); match_type = MatchTypeFree (match_type); return vnb.head; } NLM_EXTERN ValNodePtr CheckTableForExistingText (SeqEntryPtr sep, ValNodePtr table, ValNodePtr columns) { ValNodeBlock vnb; ValNodePtr line_vnp, val_vnp, col_vnp; Int4 line_num = 1, col_num, num_existing_text = 0; Uint2 entityID; TabColumnConfigPtr t; CharPtr err_msg, str, qual_name, val; CharPtr no_match_fmt = "No match for %s, line %d"; CharPtr already_has_val_fmt = "%s already has value '%s' (column %d), line %d. Replacement is '%s'"; CharPtr num_existing_text_fmt = "%d fields already have text."; ValNodePtr match_choice, match_list; ValNodePtr target_list, feat_vnp; MatchTypePtr match_type; vnb.head = NULL; vnb.tail = NULL; if (sep == NULL) { ValNodeAddPointerToEnd (&vnb, 1, StringSave ("No SeqEntry")); } if (table == NULL) { ValNodeAddPointerToEnd (&vnb, 1, StringSave ("No table")); } if (columns == NULL) { ValNodeAddPointerToEnd (&vnb, 1, StringSave ("No column information")); } if (vnb.head != NULL) { return vnb.head; } match_type = FindMatchTypeInHeader (columns); if (match_type == NULL) return NULL; entityID = SeqMgrGetEntityIDForSeqEntry (sep); if (match_type->choice == eTableMatchAny && table != NULL && table->next != NULL) { /* skip first row, must contain header */ table = table->next; } for (line_vnp = table, line_num = 1; line_vnp != NULL; line_vnp = line_vnp->next, line_num++) { match_choice = FindMatchChoiceInLine (line_vnp->data.ptrvalue, columns); if (match_choice == NULL || StringHasNoText (match_choice->data.ptrvalue)) { ReportEmptyIDColumn (&vnb, line_num); if (vnb.head == NULL) { vnb.head = vnb.tail; } } else { match_list = FindMatchForRow (match_type, match_choice->data.ptrvalue, entityID, sep); if (match_list == NULL) { err_msg = (CharPtr) MemNew (sizeof (Char) * (StringLen (no_match_fmt) + StringLen (match_choice->data.ptrvalue) + 15)); sprintf (err_msg, no_match_fmt, match_choice->data.ptrvalue, line_num); ValNodeAddPointerToEnd (&vnb, 0, err_msg); } else { for (val_vnp = line_vnp->data.ptrvalue, col_vnp = columns, col_num = 1; col_vnp != NULL; col_vnp = col_vnp->next, col_num++) { t = (TabColumnConfigPtr) col_vnp->data.ptrvalue; if (t == NULL || t->match_type != NULL || (t->skip_blank && (val_vnp == NULL || StringHasNoText (val_vnp->data.ptrvalue)))) { if (val_vnp != NULL) { val_vnp = val_vnp->next; } continue; } target_list = GetTargetListForRowAndColumn (match_type, match_list, t->field, t->constraint); if (target_list == NULL) { ReportMissingTargets (&vnb, t->field, match_choice->data.ptrvalue, col_num, line_num); } else { if (val_vnp == NULL || val_vnp->data.ptrvalue == NULL) { val = ""; } else { val = val_vnp->data.ptrvalue; } for (feat_vnp = target_list; feat_vnp != NULL; feat_vnp = feat_vnp->next) { str = GetFieldValueForObject (feat_vnp->choice, feat_vnp->data.ptrvalue, t->field, NULL); if (!StringHasNoText (str)) { qual_name = SummarizeFieldType (t->field); err_msg = (CharPtr) MemNew (sizeof (Char) * (StringLen (already_has_val_fmt) + StringLen (qual_name) + StringLen (str) + StringLen (val) + 30)); sprintf (err_msg, already_has_val_fmt, qual_name, str, col_num, line_num, val); ValNodeAddPointerToEnd (&vnb, col_num, err_msg); num_existing_text ++; } str = MemFree (str); } } target_list = ValNodeFree (target_list); if (val_vnp != NULL) { val_vnp = val_vnp->next; } } } match_list = ValNodeFree (match_list); } } if (num_existing_text > 0) { err_msg = (CharPtr) MemNew (sizeof (Char) * (StringLen (num_existing_text_fmt) + 15)); sprintf (err_msg, num_existing_text_fmt, num_existing_text); ValNodeAddPointerToFront (&vnb, 0, err_msg); } return vnb.head; } /* Reporting functions for SMART */ static void GetDescriptorPubTitles (SeqDescrPtr sdp, Pointer userdata) { CharPtr title; if (sdp == NULL || sdp->choice != Seq_descr_pub || userdata == NULL) { return; } title = GetPubFieldFromObject (OBJ_SEQDESC, sdp, Publication_field_title, NULL); if (title != NULL) { ValNodeAddPointer ((ValNodePtr PNTR) userdata, 0, title); } } static void GetFeaturePubTitles (SeqFeatPtr sfp, Pointer userdata) { CharPtr title; if (sfp == NULL || sfp->data.choice != SEQFEAT_PUB || userdata == NULL) { return; } title = GetPubFieldFromObject (OBJ_SEQFEAT, sfp, Publication_field_title, NULL); if (title != NULL) { ValNodeAddPointer ((ValNodePtr PNTR) userdata, 0, title); } } NLM_EXTERN ValNodePtr GetPublicationTitlesInSep (SeqEntryPtr sep) { ValNodePtr title_list = NULL; VisitDescriptorsInSep (sep, &title_list, GetDescriptorPubTitles); VisitFeaturesInSep (sep, &title_list, GetFeaturePubTitles); return title_list; } NLM_EXTERN ValNodePtr GetPublicationTitlesOnSep (SeqEntryPtr sep) { ValNodePtr title_list = NULL; VisitDescriptorsOnSep (sep, &title_list, GetDescriptorPubTitles); VisitFeaturesOnSep (sep, &title_list, GetFeaturePubTitles); return title_list; } static void GetBankitCommentsCallback (SeqDescrPtr sdp, Pointer userdata) { UserObjectPtr uop; ObjectIdPtr oip; UserFieldPtr ufp; if (sdp == NULL || sdp->choice != Seq_descr_user || userdata == NULL) { return; } uop = (UserObjectPtr) sdp->data.ptrvalue; if (uop != NULL && StringCmp (uop->_class, "SMART_V1.0") != 0) { oip = uop->type; if (oip != NULL && StringCmp (oip->str, "Submission") == 0) { for (ufp = uop->data; ufp != NULL; ufp = ufp->next) { oip = ufp->label; if (oip != NULL && StringCmp (oip->str, "AdditionalComment") == 0 && !StringHasNoText (ufp->data.ptrvalue)) { ValNodeAddPointer ((ValNodePtr PNTR) userdata, 0, StringSave (ufp->data.ptrvalue)); } } } } } NLM_EXTERN ValNodePtr GetBankitCommentsInSep (SeqEntryPtr sep) { ValNodePtr comment_list = NULL; VisitDescriptorsInSep (sep, &comment_list, GetBankitCommentsCallback); return comment_list; } NLM_EXTERN ValNodePtr GetBankitCommentsOnSep (SeqEntryPtr sep) { ValNodePtr comment_list = NULL; VisitDescriptorsOnSep (sep, &comment_list, GetBankitCommentsCallback); return comment_list; } static void SplitPCRPrimersByPositionCallback (BioSourcePtr biop, Pointer data) { PCRReactionPtr ps, ps_next, ps_new; PCRPrimerPtr pp_f, pp_r; if (biop == NULL || biop->pcr_primers == NULL) { return; } for (ps = biop->pcr_primers; ps != NULL; ps = ps_next) { ps_next = ps->next; pp_f = ps->forward; pp_r = ps->reverse; while (pp_f != NULL && pp_r != NULL && pp_f->next != NULL && pp_r->next != NULL) { ps_new = PCRReactionNew (); ps_new->forward = pp_f->next; ps_new->reverse = pp_r->next; pp_f->next = NULL; pp_r->next = NULL; ps->next = ps_new; ps_new->next = ps_next; ps = ps_new; pp_f = ps->forward; pp_r = ps->reverse; } } } NLM_EXTERN void SplitPCRPrimersByPosition (SeqEntryPtr sep) { VisitBioSourcesInSep (sep, NULL, SplitPCRPrimersByPositionCallback); } static void MergePCRPrimersCallback (BioSourcePtr biop, Pointer data) { PCRReactionPtr ps, ps_next; PCRPrimerPtr pp_f_last, pp_r_last; if (biop == NULL || biop->pcr_primers == NULL || biop->pcr_primers->next == NULL) { return; } pp_f_last = biop->pcr_primers->forward; if (pp_f_last != NULL) { while (pp_f_last->next != NULL) { pp_f_last = pp_f_last->next; } } pp_r_last = biop->pcr_primers->reverse; if (pp_r_last != NULL) { while (pp_r_last->next != NULL) { pp_r_last = pp_r_last->next; } } ps = biop->pcr_primers->next; biop->pcr_primers->next = NULL; while (ps != NULL) { ps_next = ps->next; ps->next = NULL; if (ps->forward != NULL) { if (pp_f_last == NULL) { biop->pcr_primers->forward = ps->forward; } else { pp_f_last->next = ps->forward; } if (pp_f_last != NULL) { while (pp_f_last->next != NULL) { pp_f_last = pp_f_last->next; } } ps->forward = NULL; } if (ps->reverse != NULL) { if (pp_r_last == NULL) { biop->pcr_primers->reverse = ps->reverse; } else { pp_r_last->next = ps->reverse; } if (pp_r_last != NULL) { while (pp_r_last->next != NULL) { pp_r_last = pp_r_last->next; } } ps->reverse = NULL; } ps = PCRReactionFree (ps); ps = ps_next; } } NLM_EXTERN void MergePCRPrimers (SeqEntryPtr sep) { VisitBioSourcesInSep (sep, NULL, MergePCRPrimersCallback); } static PCRPrimerPtr ExtractPrimersByConstraint (PCRPrimerPtr PNTR pp_list, StringConstraintPtr scp) { PCRPrimerPtr new_list = NULL, last_new = NULL, prev = NULL, pp, pp_next; if (pp_list == NULL || *pp_list == NULL) { return NULL; } pp = *pp_list; while (pp != NULL) { pp_next = pp->next; if (DoesStringMatchConstraint(pp->name, scp)) { if (prev == NULL) { *pp_list = pp->next; } else { prev->next = pp->next; } pp->next = NULL; if (last_new == NULL) { new_list = pp; } else { last_new->next = pp; } last_new = pp; } else { prev = pp; } pp = pp_next; } return new_list; } typedef struct stringconstraintpair { StringConstraintPtr scp1; StringConstraintPtr scp2; } StringConstraintPairData, PNTR StringConstraintPairPtr; static void SplitPCRPrimersByConstraintsCallback (BioSourcePtr biop, Pointer data) { PCRReactionPtr ps, ps_new, last_ps = NULL; PCRPrimerPtr pp_match, last_fwd = NULL, last_rev = NULL; StringConstraintPairPtr pair; if (biop == NULL || biop->pcr_primers == NULL || (pair = (StringConstraintPairPtr) data) == NULL) { return; } ps_new = PCRReactionNew (); for (ps = biop->pcr_primers; ps != NULL; ps = ps->next) { /* take forward matches */ pp_match = ExtractPrimersByConstraint (&(ps->forward), pair->scp1); if (pp_match != NULL) { if (last_fwd == NULL) { ps_new->forward = pp_match; } else { last_fwd->next = pp_match; } last_fwd = pp_match; while (last_fwd->next != NULL) { last_fwd = last_fwd->next; } } /* take reverse matches */ pp_match = ExtractPrimersByConstraint (&(ps->reverse), pair->scp2); if (pp_match != NULL) { if (last_rev == NULL) { ps_new->reverse = pp_match; } else { last_rev->next = pp_match; } last_rev = pp_match; while (last_rev->next != NULL) { last_rev = last_rev->next; } } last_ps = ps; } if (ps_new->forward != NULL || ps_new->reverse != NULL) { last_ps->next = ps_new; } else { ps_new = PCRReactionFree (ps_new); } } NLM_EXTERN void SplitPCRPrimersByConstraints (SeqEntryPtr sep, StringConstraintPtr scp_fwd, StringConstraintPtr scp_rev) { StringConstraintPairData pair; pair.scp1 = scp_fwd; pair.scp2 = scp_rev; VisitBioSourcesInSep (sep, &pair, SplitPCRPrimersByConstraintsCallback); } /* product name fixing rules */ NLM_EXTERN Int4 CountSuspectRuleSet (SuspectRuleSetPtr set) { Int4 num = 0; while (set != NULL) { num++; set = set->next; } return num; } /* emptiness */ NLM_EXTERN Boolean IsSearchFuncEmpty (SearchFuncPtr func) { Boolean rval = TRUE; if (func == NULL) { rval = TRUE; } else { switch (func->choice) { case SearchFunc_string_constraint: rval = IsStringConstraintEmpty (func->data.ptrvalue); break; case SearchFunc_prefix_and_numbers: rval = StringHasNoText (func->data.ptrvalue); break; default: rval = FALSE; } } return rval; } NLM_EXTERN Boolean IsSuspectRuleEmpty (SuspectRulePtr rule) { if (rule == NULL) { return TRUE; } else if (IsSearchFuncEmpty(rule->find)) { return TRUE; } else { return FALSE; } } /* summarization */ NLM_EXTERN CharPtr SummarizeSearchFunc (SearchFuncPtr func, Boolean short_version) { CharPtr summ = NULL; CharPtr bracket_fmt = "Contains %d or more brackets or parentheses"; CharPtr prefix_fmt = "Is '%s' followed by numbers"; CharPtr length_fmt = "Is longer than %d characters"; CharPtr term_fmt = "Contains '%s' at start or separated from other letters by numbers, spaces, or punctuation, but does not also contain 'domain'"; CharPtr short_term_fmt = "Contains '%s'"; if (func == NULL) { summ = StringSave ("No search function"); } else { switch (func->choice) { case SearchFunc_string_constraint: summ = SummarizeStringConstraintEx (func->data.ptrvalue, short_version); break; case SearchFunc_contains_plural: summ = StringSave ("May contain plural"); break; case SearchFunc_n_or_more_brackets_or_parentheses: summ = (CharPtr) MemNew (sizeof (Char) * (StringLen (bracket_fmt) + 15)); sprintf (summ, bracket_fmt, func->data.intvalue); break; case SearchFunc_three_numbers: summ = StringSave ("Three or more numbers together"); break; case SearchFunc_underscore: summ = StringSave ("Contains underscore"); break; case SearchFunc_prefix_and_numbers: summ = (CharPtr) MemNew (sizeof (Char) * (StringLen (prefix_fmt) + StringLen (func->data.ptrvalue))); sprintf (summ, prefix_fmt, func->data.ptrvalue == NULL ? "" : func->data.ptrvalue); break; case SearchFunc_all_caps: summ = StringSave ("Is all capital letters"); break; case SearchFunc_unbalanced_paren: summ = StringSave ("Contains unbalanced brackets or parentheses"); break; case SearchFunc_too_long: summ = (CharPtr) MemNew (sizeof (Char) * (StringLen (length_fmt) + 15)); sprintf (summ, length_fmt, func->data.intvalue); break; case SearchFunc_has_term: if (short_version) { summ = (CharPtr) MemNew (sizeof (Char) * (StringLen (short_term_fmt) + StringLen (func->data.ptrvalue))); sprintf (summ, short_term_fmt, func->data.ptrvalue == NULL ? "" : func->data.ptrvalue); } else { summ = (CharPtr) MemNew (sizeof (Char) * (StringLen (term_fmt) + StringLen (func->data.ptrvalue))); sprintf (summ, term_fmt, func->data.ptrvalue == NULL ? "" : func->data.ptrvalue); } break; default: summ = StringSave ("Unknown search function"); break; } } return summ; } NLM_EXTERN CharPtr SummarizeReplaceFunc (ReplaceFuncPtr replace, Boolean short_version) { CharPtr summ = NULL; SimpleReplacePtr simple; CharPtr replace_fmt = "Replace %swith '%s'"; CharPtr whole = "entire name "; CharPtr weasel_to_putative = ", retain and normalize 'putative' synonym"; CharPtr haem_fmt = "Replace '%s' with 'heme' if whole word, 'hem' otherwise"; Int4 len; if (replace == NULL) { return NULL; } switch (replace->choice) { case ReplaceFunc_simple_replace: simple = (SimpleReplacePtr) replace->data.ptrvalue; len = StringLen (replace_fmt) + StringLen (simple->replace) + 1; if (simple->whole_string) { len += StringLen (whole); } if (simple->weasel_to_putative && !short_version) { len += StringLen (weasel_to_putative); } summ = (CharPtr) MemNew (sizeof (Char) * len); sprintf (summ, replace_fmt, simple->whole_string ? whole : "" , simple->replace == NULL ? "" : simple->replace); if (simple->weasel_to_putative && !short_version) { StringCat (summ, weasel_to_putative); } break; case ReplaceFunc_haem_replace: summ = (CharPtr) MemNew (sizeof (Char) * (StringLen (haem_fmt) + StringLen (replace->data.ptrvalue))); sprintf (summ, haem_fmt, replace->data.ptrvalue); break; default: summ = StringSave ("Unknown replacement function"); break; } return summ; } static CharPtr fix_type_names[] = { "None", "Typo", "Putative Typo", "Quick fix", "Organelles not appropriate in prokaryote", "Suspicious phrase; should this be nonfunctional?", "May contain database identifier more appropriate in note; remove from product name", "Remove organism from product name", "Possible parsing error or incorrect formatting; remove inappropriate symbols", "Implies evolutionary relationship; change to -like protein", "Consider adding 'protein' to the end of the product name", "Correct the name or use 'hypothetical protein'", "Use American spelling", "Use short product name instead of descriptive phrase", "use protein instead of gene as appropriate" }; NLM_EXTERN CharPtr SummarizeFixType (Uint2 fix_type) { if (fix_type < sizeof (fix_type_names) / sizeof (CharPtr)) { return fix_type_names[fix_type]; } else { return "Unknown fix type"; } } NLM_EXTERN CharPtr SummarizeReplaceRule (ReplaceRulePtr replace, Boolean short_version) { CharPtr add_note = ", move original to note"; CharPtr func; CharPtr summ = NULL; Int4 len; if (replace == NULL) { return NULL; } func = SummarizeReplaceFunc (replace->replace_func, short_version); len = StringLen (func) + 1; if (replace->move_to_note) { len += StringLen (add_note); } summ = (CharPtr) MemNew (sizeof (Char) * len); StringCpy (summ, func); if (replace->move_to_note) { StringCat (summ, add_note); } func = MemFree (func); return summ; } NLM_EXTERN CharPtr SummarizeSuspectRuleEx (SuspectRulePtr rule, Boolean short_version) { CharPtr find = NULL, replace = NULL, fix_type = NULL, feat_constraint = NULL, except = NULL; CharPtr summ = NULL; CharPtr tmp = NULL; CharPtr butnot = " but not "; CharPtr desc = " Description: "; CharPtr fatal = "(FATAL)"; Int4 len; if (rule == NULL) { return NULL; } if (!short_version && rule->rule_type != Fix_type_none) { fix_type = SummarizeFixType (rule->rule_type); } if (short_version && !StringHasNoText (rule->description)) { if (fix_type == NULL) { summ = StringSave (rule->description); } else { len = StringLen (fix_type) + StringLen (rule->description) + 4; summ = (CharPtr) MemNew (sizeof (Char) * len); StringCpy (summ, rule->description); StringCat (summ, " ("); StringCat (summ, fix_type); StringCat (summ, ")"); } if (rule->fatal) { len = StringLen (summ) + StringLen (fatal) + 1; tmp = (CharPtr) MemNew (sizeof (Char) * len); StringCpy (tmp, summ); StringCat (tmp, fatal); summ = (CharPtr) MemFree (summ); summ = tmp; } return summ; } find = SummarizeSearchFunc (rule->find, short_version); if (!IsSearchFuncEmpty(rule->except)) { except = SummarizeSearchFunc (rule->except, short_version); } if (!short_version) { feat_constraint = SummarizeConstraintSet (rule->feat_constraint); } if (!short_version || rule->rule_type == Fix_type_typo) { replace = SummarizeReplaceRule (rule->replace, short_version); } len = StringLen (find) + StringLen (except) + StringLen (feat_constraint) + StringLen (replace) + 3; if (fix_type != NULL) { len = len + StringLen (fix_type) + 3; } if (!StringHasNoText (rule->description)) { len += StringLen (rule->description) + StringLen (desc); } if (feat_constraint != NULL) { len += 2; } if (except != NULL) { len += StringLen (butnot); } if (rule->fatal) { len += StringLen(fatal); } summ = (CharPtr) MemNew (sizeof (Char) * len); StringCpy (summ, find); if (except != NULL) { StringCat (summ, butnot); StringCat (summ, except); } if (feat_constraint != NULL) { StringCat (summ, ", "); StringCat (summ, feat_constraint); } if (replace != NULL) { StringCat (summ, ", "); StringCat (summ, replace); } if (fix_type != NULL) { StringCat (summ, " ("); StringCat (summ, fix_type); StringCat (summ, ")"); } if (!StringHasNoText (rule->description)) { StringCat (summ, desc); StringCat (summ, rule->description); } if (rule->fatal) { StringCat (summ, fatal); } find = MemFree (find); except = MemFree (except); feat_constraint = MemFree (feat_constraint); replace = MemFree (replace); return summ; } NLM_EXTERN CharPtr SummarizeSuspectRule (SuspectRulePtr rule) { return SummarizeSuspectRuleEx (rule, FALSE); } NLM_EXTERN Boolean StringMayContainPlural (CharPtr search) { CharPtr cp; Char last_letter, second_to_last_letter, next_letter; Int4 word_len = 0; Boolean may_contain_plural = FALSE; CharPtr word_skip = " ,"; if (search == NULL) return FALSE; cp = search; while (*cp != 0 && !may_contain_plural) { word_len = StringCSpn (cp, word_skip); last_letter = *(cp + word_len - 1); if (last_letter == 's') { if (word_len >=5 && StringNCmp (cp + word_len - 5, "trans", 5) == 0) { /* not plural */ cp = cp + word_len; cp += StringSpn (cp, word_skip); } else if (word_len > 3 && (second_to_last_letter = *(cp + word_len - 2)) != 's' && second_to_last_letter != 'i' && second_to_last_letter != 'u' && ((next_letter = *(cp + word_len)) == ',' || next_letter == 0)) { may_contain_plural = TRUE; } else { cp = cp + word_len; cp += StringSpn (cp, word_skip); } } else { cp = cp + word_len; cp += StringSpn (cp, word_skip); } } return may_contain_plural; } static CharPtr FindFirstOpen (CharPtr cp) { CharPtr pa, ba; if (cp == NULL) { return NULL; } pa = StringChr (cp, '('); ba = StringChr (cp, '['); if (pa == NULL) { return ba; } else if (ba == NULL || ba > pa) { return pa; } else { return ba; } } static Char GetClose (Char ch) { if (ch == '(') { return ')'; } else if (ch == '[') { return ']'; } else if (ch == '{') { return '}'; } else { return ch; } } static Boolean SkipBracketOrParen (CharPtr bp, CharPtr start, CharPtr PNTR skip_to) { Boolean rval = FALSE; CharPtr ep, ns; if (bp - start > 2 && StringNCmp (bp - 3, "NAD(P)", 6) == 0) { rval = TRUE; *skip_to = bp + 6; } else if (StringNCmp (bp, "(NAD(P)H)", 9) == 0) { rval = TRUE; *skip_to = bp + 9; } else if (StringNCmp (bp, "(NAD(P))", 8) == 0) { rval = TRUE; *skip_to = bp + 8; } else if (StringNCmp (bp, "(I)", 3) == 0) { rval = TRUE; *skip_to = bp + 4; } else if (StringNCmp (bp, "(II)", 4) == 0) { rval = TRUE; *skip_to = bp + 5; } else if (StringNCmp (bp, "(III)", 5) == 0) { rval = TRUE; *skip_to = bp + 6; } else if (StringNCmp (bp, "(NADPH)", 7) == 0) { rval = TRUE; *skip_to = bp + 7; } else if (StringNCmp (bp, "(NAD+)", 6) == 0) { rval = TRUE; *skip_to = bp + 6; } else if (StringNCmp (bp, "(NAPPH/NADH)", 12) == 0) { rval = TRUE; *skip_to = bp + 12; } else if (StringNCmp (bp, "(NADP+)", 7) == 0) { rval = TRUE; *skip_to = bp + 7; } else if (StringNCmp (bp, "[acyl-carrier protein]", 22) == 0) { rval = TRUE; *skip_to = bp + 22; } else if (StringNCmp (bp, "[acyl-carrier-protein]", 22) == 0) { rval = TRUE; *skip_to = bp + 22; } else if (StringNCmp (bp, "(acyl carrier protein)", 22) == 0) { rval = TRUE; *skip_to = bp + 22; } else { ns = StringChr (bp + 1, *bp); ep = StringChr (bp + 1, GetClose(*bp)); if (ep != NULL && (ns == NULL || ns > ep)) { if (ep - bp < 5) { rval = TRUE; *skip_to = ep + 1; } else if (ep - bp > 3 && StringNCmp (ep - 3, "ing", 3) == 0) { rval = TRUE; *skip_to = ep + 1; } } } return rval; } NLM_EXTERN Boolean ContainsNorMoreSetsOfBracketsOrParentheses (CharPtr search, Int4 n) { CharPtr cp, end; Int4 num_found = 0; if (search == NULL) { return FALSE; } cp = FindFirstOpen(search); while (num_found < n && cp != NULL && *cp != 0) { if (SkipBracketOrParen(cp, search, &cp)) { /* ignore it */ cp = FindFirstOpen (cp); } else if ((end = StringChr (cp, GetClose (*cp))) == NULL) { /* skip, doesn't close the bracket */ cp = FindFirstOpen (cp + 1); } else { cp = FindFirstOpen (end); num_found ++; } } if (num_found >= n) { return TRUE; } else { return FALSE; } } static Boolean FollowedByFamily (CharPtr PNTR str) { Int4 word_len; if (str == NULL || *str == NULL || **str == 0) { return FALSE; } word_len = StringCSpn (*str + 1, " "); if (*(*str + word_len + 1) != 0 && StringNCmp (*str + word_len + 2, "family", 6) == 0) { *str = *str + word_len + 7; return TRUE; } else { return FALSE; } } static Boolean InWordBeforeCytochromeOrCoenzyme (CharPtr cp, CharPtr start) { if (cp == NULL) { return FALSE; } while (cp > start && !isspace (*cp)) { cp--; } if (cp == start) { return FALSE; } while (cp > start && isspace (*cp)) { cp--; } if (cp - start >= 9 && StringNICmp (cp - 9, "cytochrome", 10) == 0) { return TRUE; } else if (cp - start >= 7 && StringNCmp (cp - 7, "coenzyme", 8) == 0) { return TRUE; } else { return FALSE; } } static Boolean PrecededByPrefix (CharPtr search, CharPtr cp, CharPtr prefix) { Int4 len; if (search == NULL || cp == NULL || StringHasNoText (prefix)) { return FALSE; } len = StringLen (prefix); if (cp - search >= len && StringNCmp (cp - len, prefix, len) == 0) { return TRUE; } else { return FALSE; } } static CharPtr OkNumberPrefix[] = {"DUF", "UPF", "IS", "TIGR", "UCP", "PUF", "CHP", NULL }; static Boolean PrecededByOkPrefix (CharPtr search, CharPtr p) { Int4 i; Boolean rval = FALSE; for (i = 0; OkNumberPrefix[i] != NULL && !rval; i++) { if (PrecededByPrefix (search, p, OkNumberPrefix[i])) { rval = TRUE; } } return rval; } NLM_EXTERN Boolean ContainsThreeOrMoreNumbersTogether (CharPtr search) { CharPtr p; Int4 num_digits = 0; if (search == NULL) { return FALSE; } p = search; while (*p != 0) { if (isdigit (*p)) { if (PrecededByOkPrefix(search, p)) { p += StrSpn (p, "0123456789") - 1; num_digits = 0; } else if (InWordBeforeCytochromeOrCoenzyme (p, search)) { p += StrSpn (p, "0123456789") - 1; num_digits = 0; } else { num_digits ++; if (num_digits == 3) { if (FollowedByFamily (&p)) { num_digits = 0; } else { return TRUE; } } } } else { num_digits = 0; } p++; } return FALSE; } NLM_EXTERN Boolean StringContainsUnderscore (CharPtr search) { CharPtr cp; if (search == NULL) { return FALSE; } cp = StringChr (search, '_'); while (cp != NULL) { if (FollowedByFamily (&cp)) { /* search again */ cp = StringChr (cp, '_'); } else if (cp - search < 3 || *(cp + 1) == 0) { return TRUE; } else if ((StringNCmp (cp - 3, "MFS", 3) == 0 || StringNCmp (cp - 3, "TPR", 3) == 0 || StringNCmp (cp - 3, "AAA", 3) == 0) && isdigit (*(cp + 1)) && !isdigit (*(cp + 2))) { cp = StringChr (cp + 1, '_'); } else { return TRUE; } } return FALSE; } NLM_EXTERN Boolean ProductContainsTerm (CharPtr pattern, CharPtr search) { CharPtr str; /* don't bother searching for c-term or n-term if product name contains "domain" */ if (StringISearch (search, "domain") != NULL) { return FALSE; } str = StringISearch(search, pattern); /* c-term and n-term must be either first word or separated from other word by space, num, or punct */ if (str != NULL && (str == search || !isalpha (*(str - 1)))) { return TRUE; } else { return FALSE; } } NLM_EXTERN Boolean IsPrefixPlusNumbers (CharPtr prefix, CharPtr search) { Int4 pattern_len, digit_len; if (search == NULL) { return FALSE; } pattern_len = StringLen (prefix); if (pattern_len > 0 && StringNCmp (search, prefix, pattern_len) != 0) { return FALSE; } digit_len = StringSpn (search + pattern_len, "1234567890"); if (digit_len > 0 && *(search + pattern_len + digit_len) == 0) { return TRUE; } else { return FALSE; } } NLM_EXTERN Boolean StringContainsUnbalancedParentheses (CharPtr search) { CharPtr buffer, cp_src; Int4 pos = 0; Boolean is_bad = FALSE; if (search == NULL) { return FALSE; } /* note - don't need space for terminating character */ buffer = MemNew (sizeof (Char) * StringLen (search)); cp_src = search; while (*cp_src != 0 && !is_bad) { if (*cp_src == '(' || *cp_src == '[') { buffer[pos++] = *cp_src; } else if (*cp_src == ')') { if (pos < 1) { is_bad = TRUE; } else if (buffer[pos - 1] != '(') { is_bad = TRUE; } else { pos --; } } else if (*cp_src == ']') { if (pos < 1) { is_bad = TRUE; } else if (buffer[pos - 1] != '[') { is_bad = TRUE; } else { pos--; } } ++cp_src; } if (pos > 0) { is_bad = TRUE; } buffer = MemFree (buffer); return is_bad; } static Boolean MatchesSearchFunc (CharPtr str, SearchFuncPtr search) { Boolean rval = FALSE; if (str == NULL) { return FALSE; } else if (search == NULL) { return TRUE; } switch (search->choice) { case SearchFunc_string_constraint: rval = DoesStringMatchConstraint(str, (StringConstraintPtr) search->data.ptrvalue); break; case SearchFunc_contains_plural: rval = StringMayContainPlural (str); break; case SearchFunc_n_or_more_brackets_or_parentheses: rval = ContainsNorMoreSetsOfBracketsOrParentheses (str, search->data.intvalue); break; case SearchFunc_three_numbers: rval = ContainsThreeOrMoreNumbersTogether (str); break; case SearchFunc_underscore: rval = StringContainsUnderscore (str); break; case SearchFunc_prefix_and_numbers: rval = IsPrefixPlusNumbers (search->data.ptrvalue, str); break; case SearchFunc_all_caps: rval = IsAllCaps (str); break; case SearchFunc_unbalanced_paren: rval = StringContainsUnbalancedParentheses (str); break; case SearchFunc_too_long: if (StringISearch (str, "bifunctional") == NULL && StringISearch (str, "multifunctional") == NULL && StringLen (str) > (Uint4) search->data.intvalue) { rval = TRUE; } break; case SearchFunc_has_term: rval = ProductContainsTerm (search->data.ptrvalue, str); break; } return rval; } static Boolean MatchesSuspectProductRule (CharPtr str, SuspectRulePtr rule) { if (str == NULL) { return FALSE; } else if (rule == NULL) { return TRUE; } if (!IsSearchFuncEmpty(rule->find) && !MatchesSearchFunc(str, rule->find)) { return FALSE; } else if (!IsSearchFuncEmpty(rule->except) && MatchesSearchFunc (str, rule->except)) { return FALSE; } else { return TRUE; } } typedef struct suspectrulecallback { SuspectRuleSetPtr rules; ValNodePtr obj_lists; Uint2 featdef; } SuspectRuleCallbackData, PNTR SuspectRuleCallbackPtr; NLM_EXTERN Boolean DoesStringMatchSuspectRule (CharPtr str, SeqFeatPtr sfp, SuspectRulePtr rule) { BioseqPtr bsp; SeqFeatPtr cds; Boolean rval = FALSE; if (rule == NULL) { return TRUE; } if (MatchesSuspectProductRule(str, rule)) { /* we want to list the coding region, rather than the protein feature, if we can */ if (sfp != NULL && sfp->data.choice == SEQFEAT_PROT) { bsp = BioseqFindFromSeqLoc (sfp->location); if (bsp != NULL) { cds = SeqMgrGetCDSgivenProduct (bsp, NULL); if (cds != NULL) { sfp = cds; } } } if (sfp == NULL) { if (rule->feat_constraint == NULL) { rval = TRUE; } } else if (DoesObjectMatchConstraintChoiceSet (OBJ_SEQFEAT, sfp, rule->feat_constraint)) { rval = TRUE; } } return rval; } static void SuspectRuleFeatCallback (SeqFeatPtr sfp, Pointer data) { SuspectRuleCallbackPtr s; ProtRefPtr prp; SuspectRulePtr rule; ValNodePtr vnp; SeqFeatPtr cds; BioseqPtr bsp; ValNodePtr list; SeqFeatPtr report_sfp = sfp; CharPtr check_val = NULL; if (sfp == NULL || (s = (SuspectRuleCallbackPtr) data) == NULL || sfp->idx.subtype != s->featdef) { return; } if (s->featdef == FEATDEF_PROT) { prp = (ProtRefPtr) sfp->data.value.ptrvalue; if (prp == NULL || prp->name == NULL) { return; } check_val = prp->name->data.ptrvalue; /* we want to list the coding region, rather than the protein feature, if we can */ bsp = BioseqFindFromSeqLoc (sfp->location); if (bsp != NULL) { cds = SeqMgrGetCDSgivenProduct (bsp, NULL); if (cds != NULL) { report_sfp = cds; } } } else if (s->featdef == FEATDEF_rRNA) { check_val = GetRNAProductString (sfp, NULL); } for (rule = s->rules, vnp = s->obj_lists; rule != NULL; rule = rule->next, vnp = vnp->next) { /* make sure we have space in the object lists */ if (vnp == NULL) { vnp = ValNodeNew (s->obj_lists); if (s->obj_lists == NULL) { s->obj_lists = vnp; } } if (MatchesSuspectProductRule (check_val, rule)) { if (DoesObjectMatchConstraintChoiceSet (OBJ_SEQFEAT, report_sfp, rule->feat_constraint)) { list = vnp->data.ptrvalue; ValNodeAddPointer (&list, OBJ_SEQFEAT, report_sfp); vnp->data.ptrvalue = list; } } } } NLM_EXTERN ValNodePtr GetFeaturesForSuspectRules (SeqEntryPtr sep, SuspectRuleSetPtr rules, Uint2 featdef) { SuspectRuleCallbackData d; MemSet (&d, 0, sizeof (SuspectRuleCallbackData)); d.obj_lists = NULL; d.rules = rules; d.featdef = featdef; VisitFeaturesInSep (sep, &d, SuspectRuleFeatCallback); return d.obj_lists; } NLM_EXTERN ValNodePtr FreeListOfObjectLists (ValNodePtr list) { ValNodePtr vnp; for (vnp = list; vnp != NULL; vnp = vnp->next) { vnp->data.ptrvalue = FreeObjectList (vnp->data.ptrvalue); } list = ValNodeFree (list); return list; } NLM_EXTERN Boolean ApplySuspectProductNameFixToString (SuspectRulePtr rule, CharPtr PNTR str) { SimpleReplacePtr simple_replace; Boolean rval = FALSE; Boolean use_putative = FALSE; CharPtr orig; if (str == NULL || rule == NULL || rule->replace == NULL || rule->replace->replace_func == NULL) { return FALSE; } switch (rule->replace->replace_func->choice) { case ReplaceFunc_simple_replace: simple_replace = (SimpleReplacePtr) rule->replace->replace_func->data.ptrvalue; if (simple_replace != NULL) { if (simple_replace->weasel_to_putative) { if (SkipWeasel(*str) != *str) { use_putative = TRUE; } } if (rule->find == NULL || rule->find->choice != SearchFunc_string_constraint) { *str = MemFree (*str); *str = StringSave (simple_replace->replace); rval = TRUE; } else if (simple_replace->whole_string && DoesStringMatchConstraint (*str, rule->find->data.ptrvalue)) { *str = MemFree (*str); *str = StringSave (simple_replace->replace); rval = TRUE; } else { rval = ReplaceStringConstraintPortionInString (str, simple_replace->replace, rule->find->data.ptrvalue); } if (use_putative && StringNCmp (*str, kPutative, StringLen (kPutative)) != 0) { SetStringValue (str, kPutative, ExistingTextOption_prefix_space); } } break; case ReplaceFunc_haem_replace: orig = StringSave (*str); FindReplaceString (str, rule->replace->replace_func->data.ptrvalue, "heme", FALSE, TRUE); FindReplaceString (str, rule->replace->replace_func->data.ptrvalue, "hem", FALSE, FALSE); if (StringCmp (orig, *str) != 0) { rval = TRUE; } orig = MemFree (orig); break; } return rval; } NLM_EXTERN Boolean ApplySuspectProductNameFixToFeature (SuspectRulePtr rule, SeqFeatPtr cds, FILE *fp) { BioseqPtr protbsp; SeqFeatPtr protfeat; SeqMgrFeatContext context; ProtRefPtr prp; CharPtr new_name, desc; Boolean rval = FALSE; ValNode vn; if (rule == NULL || rule->replace == NULL || cds == NULL || cds->data.choice != SEQFEAT_CDREGION) { return FALSE; } protbsp = BioseqFindFromSeqLoc (cds->product); protfeat = SeqMgrGetNextFeature (protbsp, NULL, 0, FEATDEF_PROT, &context); if (protfeat == NULL || protfeat->idx.subtype != FEATDEF_PROT || (prp = (ProtRefPtr) protfeat->data.value.ptrvalue) == NULL || prp->name == NULL) { return FALSE; } new_name = StringSave (prp->name->data.ptrvalue); if (ApplySuspectProductNameFixToString (rule, &new_name)) { if (fp != NULL) { fprintf (fp, "Changed '%s' to '%s'", prp->name->data.ptrvalue == NULL ? "" : (CharPtr) prp->name->data.ptrvalue, new_name); } if (rule->replace->move_to_note) { if (SetStringValue (&(cds->comment), prp->name->data.ptrvalue, ExistingTextOption_append_semi)) { if (fp != NULL) { fprintf (fp, " and moved original to note"); } } } prp->name->data.ptrvalue = MemFree (prp->name->data.ptrvalue); prp->name->data.ptrvalue = new_name; if (AdjustmRNAProductToMatchProteinProduct(protfeat)) { if (fp != NULL) { fprintf (fp, " and adjusted mRNA"); } } if (fp != NULL) { MemSet (&vn, 0, sizeof (ValNode)); vn.choice = OBJ_SEQFEAT; vn.data.ptrvalue = cds; desc = GetDiscrepancyItemText (&vn); if (desc != NULL) { fprintf (fp, " for %s", desc); desc = MemFree (desc); } fprintf (fp, "\n"); } rval = TRUE; } else { new_name = MemFree (new_name); } return rval; } static CharPtr TextFromSearchFunc (ValNodePtr s) { StringConstraintPtr scp; CharPtr rval = NULL; if (s == NULL) { return NULL; } switch (s->choice) { case SearchFunc_string_constraint: scp = (StringConstraintPtr) s->data.ptrvalue; if (scp != NULL) { rval = scp->match_text; } break; case SearchFunc_contains_plural: case SearchFunc_n_or_more_brackets_or_parentheses: case SearchFunc_three_numbers: case SearchFunc_all_caps: case SearchFunc_unbalanced_paren: case SearchFunc_too_long: /* no text */ break; case SearchFunc_underscore: rval = "_"; break; case SearchFunc_prefix_and_numbers: case SearchFunc_has_term: rval = s->data.ptrvalue; break; } return rval; } static int CompareSearchFunc (ValNodePtr s1, ValNodePtr s2) { CharPtr txt1, txt2; int rval; if (s1 == NULL && s2 == NULL) { rval = 0; } else if (s1 == NULL) { rval = -1; } else if (s2 == NULL) { rval = 1; } else { txt1 = TextFromSearchFunc (s1); txt2 = TextFromSearchFunc (s2); rval = StringICmp (txt1, txt2); if (rval == 0) { if (s1->choice < s2->choice) { rval = -1; } else if (s1->choice > s2->choice) { rval = 1; } } } return rval; } static int CompareSuspectRuleByFind (SuspectRulePtr rule1, SuspectRulePtr rule2) { int rval = 0; if (rule1 == NULL && rule2 == NULL) { rval = 0; } else if (rule1 == NULL) { rval = -1; } else if (rule2 == NULL) { rval = 1; } else if ((rval = CompareSearchFunc (rule1->find, rule2->find)) != 0) { /* no further comparisons */ } return rval; } static int LIBCALLBACK SortVnpBySuspectRuleFind (VoidPtr ptr1, VoidPtr ptr2) { ValNodePtr vnp1; ValNodePtr vnp2; int rval = 0; if (ptr1 != NULL && ptr2 != NULL) { vnp1 = *((ValNodePtr PNTR) ptr1); vnp2 = *((ValNodePtr PNTR) ptr2); if (vnp1 == NULL && vnp2 == NULL) { rval = 0; } else if (vnp1 == NULL) { rval = -1; } else if (vnp2 == NULL) { rval = 1; } else { rval = CompareSuspectRuleByFind (vnp1->data.ptrvalue, vnp2->data.ptrvalue); } } return rval; } static int CompareSuspectRuleByFixTypeThenFind (SuspectRulePtr rule1, SuspectRulePtr rule2) { int rval = 0; if (rule1 == NULL && rule2 == NULL) { rval = 0; } else if (rule1 == NULL) { rval = -1; } else if (rule2 == NULL) { rval = 1; } else if (rule1->rule_type < rule2->rule_type) { rval = -1; } else if (rule1->rule_type > rule2->rule_type) { rval = 1; } else if ((rval = CompareSearchFunc (rule1->find, rule2->find)) != 0) { /* no further comparisons */ } return rval; } static int LIBCALLBACK SortVnpBySuspectRuleFixTypeThenFind (VoidPtr ptr1, VoidPtr ptr2) { ValNodePtr vnp1; ValNodePtr vnp2; int rval = 0; if (ptr1 != NULL && ptr2 != NULL) { vnp1 = *((ValNodePtr PNTR) ptr1); vnp2 = *((ValNodePtr PNTR) ptr2); if (vnp1 == NULL && vnp2 == NULL) { rval = 0; } else if (vnp1 == NULL) { rval = -1; } else if (vnp2 == NULL) { rval = 1; } else { rval = CompareSuspectRuleByFixTypeThenFind (vnp1->data.ptrvalue, vnp2->data.ptrvalue); } } return rval; } static ValNodePtr MakeValNodeListFromSuspectRuleSet (SuspectRuleSetPtr rules) { ValNodeBlock block; SuspectRulePtr one; InitValNodeBlock (&block, NULL); for (one = rules; one != NULL; one = one->next) { ValNodeAddPointerToEnd (&block, 0, one); } return block.head; } static SuspectRuleSetPtr MakeSuspectRuleSetFromValNodeList (ValNodePtr tmp_list) { ValNodePtr vnp; SuspectRuleSetPtr first = NULL, last = NULL; for (vnp = tmp_list; vnp != NULL; vnp = vnp->next) { if (last == NULL) { first = vnp->data.ptrvalue; } else { last->next = vnp->data.ptrvalue; } last = vnp->data.ptrvalue; last->next = NULL; } return first; } NLM_EXTERN void SortSuspectRuleSetByFind (SuspectRuleSetPtr PNTR rules) { ValNodePtr tmp_list; if (rules == NULL || *rules == NULL) { return; } tmp_list = MakeValNodeListFromSuspectRuleSet (*rules); tmp_list = ValNodeSort (tmp_list, SortVnpBySuspectRuleFind); *rules = MakeSuspectRuleSetFromValNodeList (tmp_list); tmp_list = ValNodeFree (tmp_list); } NLM_EXTERN void SortSuspectRuleSetByFixTypeThenFind (SuspectRuleSetPtr PNTR rules) { ValNodePtr tmp_list; if (rules == NULL || *rules == NULL) { return; } tmp_list = MakeValNodeListFromSuspectRuleSet (*rules); tmp_list = ValNodeSort (tmp_list, SortVnpBySuspectRuleFixTypeThenFind); *rules = MakeSuspectRuleSetFromValNodeList (tmp_list); tmp_list = ValNodeFree (tmp_list); } NLM_EXTERN void PrintSuspectRuleMatches (SeqEntryPtr sep, SuspectRuleSetPtr rules, FILE *fp) { ValNodePtr vnp_l, vnp_o, obj_lists; SuspectRulePtr rule; CharPtr summ; if (sep == NULL || rules == NULL || fp == NULL) { return; } obj_lists = GetFeaturesForSuspectRules (sep, rules, FEATDEF_PROT); for (vnp_l = obj_lists, rule = rules; vnp_l != NULL && rule != NULL; vnp_l = vnp_l->next, rule = rule->next) { if (ValNodeLen (vnp_l->data.ptrvalue) > 0) { summ = SummarizeSuspectRule (rule); fprintf (fp, "%s:%d\n", summ, ValNodeLen (vnp_l->data.ptrvalue)); summ = MemFree (summ); for (vnp_o = vnp_l->data.ptrvalue; vnp_o != NULL; vnp_o = vnp_o->next) { summ = GetDiscrepancyItemText (vnp_o); fprintf (fp, "\t%s", summ); summ = MemFree (summ); } } } obj_lists = FreeListOfObjectLists (obj_lists); } NLM_EXTERN ValNodePtr GetSuspectRuleDiscrepancies (SeqEntryPtr sep, SuspectRuleSetPtr rules, Uint2 featdef, Uint4 clickable_item_type) { ValNodePtr vnp_l, obj_lists, rval = NULL; SuspectRulePtr rule; CharPtr summ; CharPtr rna_fmt = "%%d rRNA product names %s"; CharPtr cds_fmt = "%%d product names %s"; CharPtr template_fmt; CharPtr fmt; if (sep == NULL || rules == NULL) { return NULL; } obj_lists = GetFeaturesForSuspectRules (sep, rules, featdef); if (featdef == FEATDEF_rRNA) { template_fmt = rna_fmt; } else { template_fmt = cds_fmt; } for (vnp_l = obj_lists, rule = rules; vnp_l != NULL && rule != NULL; vnp_l = vnp_l->next, rule = rule->next) { if (ValNodeLen (vnp_l->data.ptrvalue) > 0) { summ = SummarizeSuspectRule (rule); fmt = (CharPtr) MemNew (sizeof (Char) * (StringLen (summ) + StringLen (template_fmt))); sprintf (fmt, template_fmt, summ); summ = MemFree (summ); ValNodeAddPointer (&rval, 0, NewClickableItem (clickable_item_type, fmt, vnp_l->data.ptrvalue)); vnp_l->data.ptrvalue = NULL; fmt = MemFree (fmt); } } obj_lists = FreeListOfObjectLists (obj_lists); return rval; } NLM_EXTERN Int4 ApplySuspectRuleFixesToSeqEntry (SeqEntryPtr sep, SuspectRuleSetPtr rules, FILE *fp) { ValNodePtr vnp_l, vnp_o, obj_lists; SuspectRulePtr rule; CharPtr summ; Int4 num_changed = 0, total_num_changed = 0; Uint2 entityID; if (sep == NULL || rules == NULL) { return 0; } obj_lists = GetFeaturesForSuspectRules (sep, rules, FEATDEF_PROT); for (vnp_l = obj_lists, rule = rules; vnp_l != NULL && rule != NULL; vnp_l = vnp_l->next, rule = rule->next) { if (rule->replace == NULL || vnp_l->data.ptrvalue == NULL) { continue; } if (fp != NULL) { summ = SummarizeSuspectRule (rule); fprintf (fp, "%s:%d identified\n", summ, ValNodeLen (vnp_l->data.ptrvalue)); summ = MemFree (summ); } num_changed = 0; for (vnp_o = vnp_l->data.ptrvalue; vnp_o != NULL; vnp_o = vnp_o->next) { if (ApplySuspectProductNameFixToFeature (rule, vnp_o->data.ptrvalue, fp)) { num_changed++; } } if (fp != NULL) { fprintf (fp, "Num fixed: %d\n", num_changed); } total_num_changed += num_changed; } entityID = ObjMgrGetEntityIDForChoice(sep); ObjMgrSetDirtyFlag (entityID, TRUE); ObjMgrSendMsg (OM_MSG_UPDATE, entityID, 0, 0); obj_lists = FreeListOfObjectLists (obj_lists); return total_num_changed; } typedef struct rulesort { SuspectRulePtr rule; Int4 pos; } RuleSortData, PNTR RuleSortPtr; static RuleSortPtr RuleSortNew (SuspectRulePtr rule, Int4 pos) { RuleSortPtr r; r = (RuleSortPtr) MemNew (sizeof (RuleSortData)); r->rule = AsnIoMemCopy (rule, (AsnReadFunc)SuspectRuleAsnRead, (AsnWriteFunc) SuspectRuleAsnWrite); r->pos = pos; return r; } static RuleSortPtr RuleSortFree (RuleSortPtr r) { if (r != NULL) { r->rule = SuspectRuleFree (r->rule); r = MemFree (r); } return r; } static int LIBCALLBACK SortVnpByRuleSortRule (VoidPtr ptr1, VoidPtr ptr2) { ValNodePtr vnp1; ValNodePtr vnp2; RuleSortPtr r1, r2; int rval = 0; if (ptr1 != NULL && ptr2 != NULL) { vnp1 = *((ValNodePtr PNTR) ptr1); vnp2 = *((ValNodePtr PNTR) ptr2); if (vnp1 == NULL && vnp2 == NULL) { rval = 0; } else if (vnp1 == NULL) { rval = -1; } else if (vnp2 == NULL) { rval = 1; } else { r1 = (RuleSortPtr) vnp1->data.ptrvalue; r2 = (RuleSortPtr) vnp2->data.ptrvalue; rval = CompareSuspectRuleByFixTypeThenFind (r1->rule, r2->rule); } } return rval; } static int LIBCALLBACK SortVnpByRuleSortPos (VoidPtr ptr1, VoidPtr ptr2) { ValNodePtr vnp1; ValNodePtr vnp2; RuleSortPtr r1, r2; int rval = 0; if (ptr1 != NULL && ptr2 != NULL) { vnp1 = *((ValNodePtr PNTR) ptr1); vnp2 = *((ValNodePtr PNTR) ptr2); if (vnp1 == NULL && vnp2 == NULL) { rval = 0; } else if (vnp1 == NULL) { rval = -1; } else if (vnp2 == NULL) { rval = 1; } else { r1 = (RuleSortPtr) vnp1->data.ptrvalue; r2 = (RuleSortPtr) vnp2->data.ptrvalue; if (r1->pos < r2->pos) { rval = -1; } else if (r1->pos > r2->pos) { rval = 1; } else { rval = 0; } } } return rval; } static ValNodePtr SuspectRuleSetToRuleSortList (SuspectRuleSetPtr set) { ValNodeBlock block; SuspectRulePtr rule; Int4 pos; InitValNodeBlock (&block, NULL); for (rule = set, pos = 0; rule != NULL; rule = rule->next, pos++) { ValNodeAddPointerToEnd (&block, 0, RuleSortNew (rule, pos)); } return block.head; } static SuspectRuleSetPtr RuleSortListToSuspectRuleSet (ValNodePtr list) { ValNodePtr vnp; SuspectRuleSetPtr set = NULL; SuspectRulePtr last = NULL; RuleSortPtr r; for (vnp = list; vnp != NULL; vnp = vnp->next) { r = (RuleSortPtr) vnp->data.ptrvalue; if (r->rule != NULL) { if (last == NULL) { set = r->rule; } else { last->next = r->rule; } last = r->rule; r->rule = NULL; } } return set; } NLM_EXTERN void FindDiffsBetweenRuleSets (SuspectRuleSetPtr set1, SuspectRuleSetPtr set2, SuspectRuleSetPtr PNTR in1not2, SuspectRuleSetPtr PNTR in2not1) { ValNodePtr list1, list2; ValNodePtr vnp1, vnp2, cmp_start; RuleSortPtr r1, r2; Boolean found_match; /* eliminate duplicates, while maintaining original order */ list1 = SuspectRuleSetToRuleSortList(set1); list1 = ValNodeSort(list1, SortVnpByRuleSortRule); list2 = SuspectRuleSetToRuleSortList(set2); list2 = ValNodeSort(list2, SortVnpByRuleSortRule); cmp_start = list2; for (vnp1 = list1; vnp1 != NULL; vnp1 = vnp1->next) { r1 = (RuleSortPtr) vnp1->data.ptrvalue; for (vnp2 = cmp_start; vnp2 != NULL && (vnp2->choice == 1 || SortVnpByRuleSortRule(&vnp1, &vnp2) > 0); vnp2 = vnp2->next) { cmp_start = vnp2; } found_match = FALSE; while (vnp2 != NULL && (vnp2->choice == 1 || SortVnpByRuleSortRule(&vnp1, &vnp2) == 0) && !found_match) { if (vnp2->data.ptrvalue != NULL) { r2 = (RuleSortPtr) vnp2->data.ptrvalue; if (AsnIoMemComp (r1->rule, r2->rule, (AsnWriteFunc) SuspectRuleAsnWrite)) { found_match = TRUE; } } if (!found_match) { vnp2 = vnp2->next; } } if (found_match) { vnp1->data.ptrvalue = RuleSortFree(vnp1->data.ptrvalue); vnp1->choice = 1; vnp2->data.ptrvalue = RuleSortFree(vnp2->data.ptrvalue); vnp2->choice = 1; } } vnp1 = ValNodeExtractList (&list1, 1); vnp1 = ValNodeFree (vnp1); vnp2 = ValNodeExtractList (&list2, 1); vnp2 = ValNodeFree (vnp2); list1 = ValNodeSort (list1, SortVnpByRuleSortPos); list2 = ValNodeSort (list2, SortVnpByRuleSortPos); *in1not2 = RuleSortListToSuspectRuleSet (list1); *in2not1 = RuleSortListToSuspectRuleSet (list2); list1 = ValNodeFreeData (list1); list2 = ValNodeFreeData (list2); } static Boolean ReportRuleSetProblems (CharPtr product_name, SuspectRuleSetPtr rule_list, FILE *output_file, CharPtr prefix) { CharPtr summ; SuspectRulePtr rule; Boolean any_found = FALSE; /* report with rule set */ for (rule = rule_list; rule != NULL; rule = rule->next) { if (MatchesSuspectProductRule (product_name, rule)) { summ = SummarizeSuspectRule(rule); if (output_file == NULL) { if (prefix != NULL) { printf ("%s\t", prefix); } printf ("%s\t%s\n", product_name, summ); } else { if (prefix != NULL) { fprintf (output_file, "%s\t", prefix); } fprintf (output_file, "%s\t%s\n", product_name, summ); } summ = MemFree (summ); any_found = TRUE; } } return any_found; } static Boolean FixRuleSetProblems (CharPtr PNTR product_name, SuspectRuleSetPtr rule_list) { SuspectRulePtr rule; Boolean any_found = FALSE; if (product_name == NULL || *product_name == NULL) { return FALSE; } /* report with rule set */ for (rule = rule_list; rule != NULL; rule = rule->next) { if (rule->replace != NULL && MatchesSuspectProductRule (*product_name, rule)) { any_found |= ApplySuspectProductNameFixToString (rule, product_name); } } return any_found; } NLM_EXTERN Boolean FindSuspectProductNamesInEntrezGene (FILE *input_file, SuspectRuleSetPtr rule_list, FILE *output_file) { EntrezgenePtr egp; AsnIoPtr aip; ValNodePtr vnp; ProtRefPtr prp; GeneRefPtr grp; CharPtr prefix = NULL; Char geneid_buf[20]; aip = AsnIoNew (ASNIO_TEXT_IN, input_file, NULL, NULL, NULL); egp = EntrezgeneAsnRead (aip, NULL); if (egp == NULL) { return FALSE; } /* scan */ if (egp->prot != NULL) { if (egp->track_info != NULL && egp->track_info->geneid > 0) { sprintf (geneid_buf, "%d", egp->track_info->geneid); prefix = geneid_buf; } else if (egp->gene != NULL) { grp = (GeneRefPtr) egp->gene; if (grp->locus_tag == NULL) { prefix = grp->locus; } else { prefix = grp->locus_tag; } } prp = (ProtRefPtr) egp->prot; for (vnp = prp->name; vnp != NULL; vnp = vnp->next) { if (rule_list == NULL) { ReportProductNameProblems (vnp->data.ptrvalue, output_file, prefix); } else { ReportRuleSetProblems (vnp->data.ptrvalue, rule_list, output_file, prefix); } } } egp = EntrezgeneFree (egp); return TRUE; } NLM_EXTERN Boolean FixSuspectProductNamesInEntrezGene (FILE *input_file, SuspectRuleSetPtr rule_list, FILE *output_file) { EntrezgenePtr egp; AsnIoPtr aip; ValNodePtr vnp; ProtRefPtr prp; GeneRefPtr grp; CharPtr prefix = NULL; Char geneid_buf[20]; CharPtr product_name; aip = AsnIoNew (ASNIO_TEXT_IN, input_file, NULL, NULL, NULL); egp = EntrezgeneAsnRead (aip, NULL); if (egp == NULL) { return FALSE; } /* scan */ if (egp->prot != NULL) { if (egp->track_info != NULL && egp->track_info->geneid > 0) { sprintf (geneid_buf, "%d", egp->track_info->geneid); prefix = geneid_buf; } else if (egp->gene != NULL) { grp = (GeneRefPtr) egp->gene; if (grp->locus_tag == NULL) { prefix = grp->locus; } else { prefix = grp->locus_tag; } } prp = (ProtRefPtr) egp->prot; for (vnp = prp->name; vnp != NULL; vnp = vnp->next) { product_name = vnp->data.ptrvalue; if (rule_list == NULL) { FixProductNameProblems (&product_name); } else { FixRuleSetProblems (&product_name, rule_list); } fprintf (output_file, "%s\n", product_name); vnp->data.ptrvalue = product_name; } } egp = EntrezgeneFree (egp); return TRUE; } NLM_EXTERN void FindSuspectProductNamesInNameList (FILE *input_file, SuspectRuleSetPtr rule_list, FILE *output_file) { ReadBufferData rbd; CharPtr line; rbd.fp = input_file; rbd.current_data = NULL; line = AbstractReadFunction (&rbd); while (line != NULL) { if (rule_list == NULL) { ReportProductNameProblems (line, output_file, NULL); } else { ReportRuleSetProblems (line, rule_list, output_file, NULL); } line = MemFree (line); line = AbstractReadFunction (&rbd); } } NLM_EXTERN void FixSuspectProductNamesInNameList (FILE *input_file, SuspectRuleSetPtr rule_list, FILE *output_file) { ReadBufferData rbd; CharPtr line; rbd.fp = input_file; rbd.current_data = NULL; line = AbstractReadFunction (&rbd); while (line != NULL) { if (rule_list == NULL) { FixProductNameProblems (&line); } else { FixRuleSetProblems (&line, rule_list); } fprintf (output_file, "%s\n", line); line = MemFree (line); line = AbstractReadFunction (&rbd); } } /* code for special product table update */ typedef struct productupdatetableitem { CharPtr product_match; CharPtr new_name; CharPtr note_text; } ProductUpdateTableItemData, PNTR ProductUpdateTableItemPtr; static ProductUpdateTableItemPtr ProductUpdateTableItemNew (CharPtr product_match) { ProductUpdateTableItemPtr item; item = (ProductUpdateTableItemPtr) MemNew (sizeof (ProductUpdateTableItemData)); MemSet (item, 0, sizeof (ProductUpdateTableItemData)); item->product_match = product_match; return item; } static ProductUpdateTableItemPtr ProductUpdateTableItemFree (ProductUpdateTableItemPtr item) { if (item != NULL) { item->product_match = MemFree (item->product_match); item->new_name = MemFree (item->new_name); item->note_text = MemFree (item->note_text); item = MemFree (item); } return item; } static void ProductUpdateTableItemWrite (FILE *fp, ProductUpdateTableItemPtr item) { if (fp == NULL || item == NULL || StringHasNoText (item->product_match)) { return; } fprintf (fp, "%s", item->product_match); if (!StringHasNoText (item->new_name)) { fprintf (fp, "\tX\t%s", StringICmp (item->new_name, "hypothetical protein") == 0 ? "" : item->new_name); if (!StringHasNoText (item->note_text)) { fprintf (fp, "\tX\t%s", StringCmp (item->note_text, item->product_match) == 0 ? "" : item->note_text); } } fprintf (fp, "\n"); } NLM_EXTERN ValNodePtr ProductUpdateTableFree (ValNodePtr list) { ValNodePtr list_next; while (list != NULL) { list_next = list->next; list->next = NULL; list->data.ptrvalue = ProductUpdateTableItemFree (list->data.ptrvalue); list = ValNodeFree (list); list = list_next; } return list; } static void TrimBeginningAndEndingQuotes (CharPtr str) { CharPtr src, dst; if (str == NULL) { return; } if (*str == '"') { src = str + 1; dst = src; while (*src != 0) { *dst = *src; dst++; src++; } *dst = 0; } dst = str + StringLen(str) - 1; if (*dst == '"') { *dst = 0; } } static ProductUpdateTableItemPtr ProductUpdateTableItemFromValNodeList (ValNodePtr column_list) { ProductUpdateTableItemPtr item; ValNodePtr vnp; if (column_list == NULL || StringHasNoText (column_list->data.ptrvalue) || column_list->next == NULL || StringICmp (column_list->next->data.ptrvalue, "X") != 0) { return NULL; } item = ProductUpdateTableItemNew(column_list->data.ptrvalue); column_list->data.ptrvalue = NULL; vnp = column_list->next->next; /* get new product name. Default to hypothetical protein if not specified */ if (vnp == NULL || StringHasNoText (vnp->data.ptrvalue)) { item->new_name = StringSave ("hypothetical protein"); } else { item->new_name = vnp->data.ptrvalue; vnp->data.ptrvalue = NULL; } if (vnp != NULL) { vnp = vnp->next; } /* find out if note is required */ if (vnp != NULL && StringCmp (vnp->data.ptrvalue, "X") == 0) { if (vnp->next == NULL || StringHasNoText (vnp->next->data.ptrvalue)) { item->note_text = StringSave (item->product_match); } else { item->note_text = vnp->next->data.ptrvalue; vnp->next->data.ptrvalue = NULL; } } return item; } NLM_EXTERN ValNodePtr ReadProductUpdateTable (FILE *fp) { ReadBufferData rbd; CharPtr line; ValNodeBlock line_list; ValNodePtr column_list; ProductUpdateTableItemPtr item; if (fp == NULL) return NULL; rbd.fp = fp; rbd.current_data = NULL; InitValNodeBlock (&line_list, NULL); line = AbstractReadFunction (&rbd); while (line != NULL) { column_list = ReadOneColumnList (line); if (column_list != NULL) { TrimBeginningAndEndingQuotes(column_list->data.ptrvalue); item = ProductUpdateTableItemFromValNodeList(column_list); if (item != NULL) { ValNodeAddPointerToEnd (&line_list, 0, item); } column_list = ValNodeFreeData (column_list); } line = AbstractReadFunction (&rbd); } return line_list.head; } static void WriteProductUpdateTable (FILE *fp, ValNodePtr table) { ValNodePtr vnp; ProductUpdateTableItemPtr item; for (vnp = table; vnp != NULL; vnp = vnp->next) { item = (ProductUpdateTableItemPtr) vnp->data.ptrvalue; if (item != NULL && !StringHasNoText (item->product_match)) { ProductUpdateTableItemWrite(fp, item); } } } static ProductUpdateTableItemPtr GetProductUpdateTableItemForProduct (CharPtr product, ValNodePtr list) { ProductUpdateTableItemPtr item; ValNodePtr vnp; if (StringHasNoText (product) || list == NULL) { return NULL; } for (vnp = list; vnp != NULL; vnp = vnp->next) { if ((item = (ProductUpdateTableItemPtr) vnp->data.ptrvalue) != NULL && StringCmp (product, item->product_match) == 0) { return item; } } return NULL; } typedef struct productupdate { ValNodePtr table; FILE *log_fp; Boolean any_change; } ProductUpdateData, PNTR ProductUpdatePtr; static void ApplyProductUpdateCallback (SeqFeatPtr sfp, Pointer data) { ProductUpdatePtr pd; BioseqPtr pbsp; SeqFeatPtr prot; ProtRefPtr prp = NULL; SeqMgrFeatContext context; ProductUpdateTableItemPtr item = NULL; Char buf[255]; ValNodePtr vnp; Boolean adjusted_mrna; if (sfp == NULL || sfp->data.choice != SEQFEAT_CDREGION || (pd = (ProductUpdatePtr) data) == NULL) { return; } pbsp = BioseqFindFromSeqLoc (sfp->product); prot = SeqMgrGetNextFeature (pbsp, NULL, 0, FEATDEF_PROT, &context); if (prot == NULL || (prp = (ProtRefPtr) prot->data.value.ptrvalue) == NULL) { prp = GetProtRefForFeature(sfp); } if (prp != NULL && prp->name != NULL) { item = GetProductUpdateTableItemForProduct (prp->name->data.ptrvalue, pd->table); if (item != NULL) { prp->name->data.ptrvalue = MemFree (prp->name->data.ptrvalue); prp->name->data.ptrvalue = StringSave (item->new_name); if (item->note_text != NULL) { SetStringValue (&(sfp->comment), item->note_text, ExistingTextOption_append_semi); } /* also need to move ec numbers to note, if any, for hypothetical protein */ if (StringICmp (item->new_name, "hypothetical protein") == 0 && prp->ec != NULL) { SetStringValue (&(sfp->comment), " EC_number=", ExistingTextOption_append_semi); SetStringValue (&(sfp->comment), prp->ec->data.ptrvalue, ExistingTextOption_append_none); for (vnp = prp->ec->next; vnp != NULL; vnp = vnp->next) { SetStringValue (&(sfp->comment), vnp->data.ptrvalue, ExistingTextOption_append_comma); } } adjusted_mrna = AdjustmRNAProductToMatchProteinProduct(prot); pd->any_change = TRUE; if (pd->log_fp != NULL) { SeqIdWrite (SeqIdFindBest (pbsp->id, SEQID_GENBANK), buf, PRINTID_REPORT, sizeof (buf) - 1); fprintf (pd->log_fp, "%s\t%s\t%s\t%s\t%s\n", buf, item->product_match, item->new_name, item->note_text == NULL ? "" : item->note_text, adjusted_mrna ? "Adjusted mRNA" : ""); } } } } NLM_EXTERN Boolean ApplyProductUpdateTable (ValNodePtr table, SeqEntryPtr sep, FILE *log_fp) { ProductUpdateData pd; if (table == NULL || sep == NULL) { return FALSE; } MemSet (&pd, 0, sizeof (ProductUpdateData)); pd.table = table; pd.log_fp = log_fp; VisitFeaturesInSep (sep, &pd, ApplyProductUpdateCallback); return pd.any_change; } static void ExportProductUpdateTableCallback (SeqFeatPtr sfp, Pointer data) { ProtRefPtr prp; if (sfp == NULL || data == NULL) { return; } if (sfp->data.choice == SEQFEAT_PROT && (prp = (ProtRefPtr)sfp->data.value.ptrvalue) != NULL && prp->name != NULL && !StringHasNoText (prp->name->data.ptrvalue)) { ValNodeAddPointerToEnd ((ValNodeBlockPtr) data, 0, StringSave (prp->name->data.ptrvalue)); } else if (sfp->data.choice == SEQFEAT_CDREGION && (prp = GetProtRefForFeature(sfp)) != NULL && prp->name != NULL && !StringHasNoText (prp->name->data.ptrvalue)) { ValNodeAddPointerToEnd ((ValNodeBlockPtr) data, 0, StringSave (prp->name->data.ptrvalue)); } } NLM_EXTERN void ExportProductUpdateTable (SeqEntryPtr sep, FILE *fp) { ValNodeBlock block; ValNodePtr vnp; if (sep == NULL || fp == NULL) { return; } InitValNodeBlock (&block, NULL); VisitFeaturesInSep (sep, &block, ExportProductUpdateTableCallback); block.head = ValNodeSort (block.head, SortVnpByString); ValNodeUnique (&(block.head), SortVnpByString, ValNodeFreeData); for (vnp = block.head; vnp != NULL; vnp = vnp->next) { fprintf (fp, "%s\n", (CharPtr) vnp->data.ptrvalue); } block.head = ValNodeFreeData (block.head); } static Boolean ApplySuspectProductNameFixToProductUpdateTableItem (SuspectRulePtr rule, ProductUpdateTableItemPtr item) { CharPtr new_name; Boolean rval = FALSE; if (rule == NULL || rule->replace == NULL || item == NULL || StringHasNoText (item->product_match)) { return FALSE; } if (item->new_name == NULL) { new_name = StringSave (item->product_match); } else { new_name = StringSave (item->new_name); } if (ApplySuspectProductNameFixToString (rule, &new_name)) { item->new_name = MemFree (item->new_name); item->note_text = MemFree (item->note_text); item->new_name = new_name; if (rule->replace->move_to_note) { item->note_text = StringSave (item->product_match); } rval = TRUE; } else { new_name = MemFree (new_name); } return rval; } static Boolean ApplySuspectProductNameFixesToProductUpdateTable (SuspectRuleSetPtr rule_set, ValNodePtr table) { SuspectRulePtr rule; ValNodePtr vnp; Boolean rval = FALSE, this_rule_apply, this_rule_match; ProductUpdateTableItemPtr item; if (rule_set == NULL || table == NULL) { return FALSE; } for (vnp = table; vnp != NULL; vnp = vnp->next) { this_rule_apply = FALSE; this_rule_match = FALSE; item = (ProductUpdateTableItemPtr) vnp->data.ptrvalue; for (rule = rule_set; rule != NULL; rule = rule->next) { if (ApplySuspectProductNameFixToProductUpdateTableItem (rule, item)) { this_rule_apply = TRUE; } else if (!this_rule_apply && !this_rule_match) { this_rule_match = MatchesSuspectProductRule (item->product_match, rule); } } if (!this_rule_apply && this_rule_match) { item->new_name = StringSave ("hypothetical protein"); item->note_text = StringSave (item->product_match); } } return rval; } NLM_EXTERN void ExportProductUpdateTableWithPrecomputedSuggestions (FILE *fp, SeqEntryPtr sep, SuspectRuleSetPtr rules) { ValNodeBlock block; ValNodePtr vnp; ProductUpdateTableItemPtr item; if (sep == NULL || fp == NULL) { return; } InitValNodeBlock (&block, NULL); VisitFeaturesInSep (sep, &block, ExportProductUpdateTableCallback); block.head = ValNodeSort (block.head, SortVnpByString); ValNodeUnique (&(block.head), SortVnpByString, ValNodeFreeData); for (vnp = block.head; vnp != NULL; vnp = vnp->next) { item = ProductUpdateTableItemNew(vnp->data.ptrvalue); vnp->data.ptrvalue = item; } ApplySuspectProductNameFixesToProductUpdateTable (rules, block.head); WriteProductUpdateTable (fp, block.head); block.head = ProductUpdateTableFree (block.head); } static Int4 FindBioseqInValNodeList (BioseqPtr bsp, ValNodePtr list) { Int4 pos = 0; ValNodePtr vnp; vnp = list; while (vnp != NULL && bsp != vnp->data.ptrvalue) { pos++; vnp = vnp->next; } if (vnp == NULL) { return -1; } else { return pos; } } /* for update sequence matching */ /* note - must set scope to original before calling */ NLM_EXTERN ValNodePtr ShuffleUpdateBioseqListWithIndex (ValNodePtr PNTR update_bioseq_list, ValNodePtr orig_bioseq_list) { ValNodePtr unmatched_list = NULL; Int4 update_pos; BioseqPtr orig_bsp, update_bsp; BioseqSearchIndexPtr index; ValNodePtr unmatched_vnp, unmatched_next, unmatched_prev = NULL, update_prev = NULL; SeqIdPtr sip; ObjectIdPtr oip; BioseqPtr PNTR update_vector; Int4 len; if (update_bioseq_list == NULL || *update_bioseq_list == NULL) { return NULL; } else if (orig_bioseq_list == NULL) { unmatched_list = *update_bioseq_list; *update_bioseq_list = NULL; return unmatched_list; } len = ValNodeLen (orig_bioseq_list); update_vector = (BioseqPtr PNTR) MemNew (sizeof (BioseqPtr) * len); MemSet (update_vector, 0, sizeof (BioseqPtr) * len); index = BuildIDStringsListForBioseqList (orig_bioseq_list); /* for each update sequence, identifies original Bioseq and if found, removes from list */ for (unmatched_vnp = *update_bioseq_list; unmatched_vnp != NULL; unmatched_vnp = unmatched_next) { unmatched_next = unmatched_vnp->next; update_bsp = unmatched_vnp->data.ptrvalue; orig_bsp = NULL; if (update_bsp != NULL) { for (sip = update_bsp->id; sip != NULL && orig_bsp == NULL; sip = sip->next) { if (sip->choice == SEQID_LOCAL && (oip = (ObjectIdPtr)sip->data.ptrvalue) != NULL && oip->str != NULL) { orig_bsp = FindStringInIdListIndex (oip->str, index); } else { orig_bsp = BioseqFind (sip); } } } if (orig_bsp != NULL && (update_pos = FindBioseqInValNodeList (orig_bsp, orig_bioseq_list)) > -1) { update_vector[update_pos] = update_bsp; if (unmatched_prev == NULL) { *update_bioseq_list = unmatched_vnp->next; } else { unmatched_prev->next = unmatched_vnp->next; } unmatched_vnp->next = NULL; unmatched_vnp = ValNodeFree (unmatched_vnp); } else { unmatched_prev = unmatched_vnp; } } index = BioseqSearchIndexFree (index); unmatched_list = *update_bioseq_list; *update_bioseq_list = NULL; *update_bioseq_list = ValNodeNew (NULL); (*update_bioseq_list)->data.ptrvalue = update_vector[0]; update_prev = (*update_bioseq_list); for (update_pos = 1; update_pos < len; update_pos++) { update_prev = ValNodeNew (update_prev); update_prev->data.ptrvalue = update_vector[update_pos]; update_prev->choice = update_pos; } return unmatched_list; } static int LIBCALLBACK SortVnpByInt (VoidPtr ptr1, VoidPtr ptr2) { ValNodePtr vnp1; ValNodePtr vnp2; if (ptr1 == NULL || ptr2 == NULL) return 0; vnp1 = *((ValNodePtr PNTR) ptr1); vnp2 = *((ValNodePtr PNTR) ptr2); if (vnp1 == NULL || vnp2 == NULL) return 0; if (vnp1->data.intvalue > vnp2->data.intvalue) { return 1; } else if (vnp1->data.intvalue < vnp2->data.intvalue) { return -1; } return 0; } static void AddGeneQualifiersToNote (SeqFeatPtr gene, CharPtr PNTR note) { GeneRefPtr grp; GBQualPtr gbq; if (gene == NULL || note == NULL || gene->data.choice != SEQFEAT_GENE) { return; } grp = (GeneRefPtr) gene->data.value.ptrvalue; if (!StringHasNoText(grp->locus)) { SetStringValue(note, grp->locus, ExistingTextOption_prefix_semi); } if (!StringHasNoText(grp->allele)) { SetStringValue(note, grp->allele, ExistingTextOption_prefix_semi); } if (!StringHasNoText(grp->desc)) { SetStringValue(note, grp->desc, ExistingTextOption_prefix_semi); } if (!StringHasNoText(grp->maploc)) { SetStringValue(note, grp->maploc, ExistingTextOption_prefix_semi); } if (!StringHasNoText(grp->locus_tag)) { SetStringValue(note, grp->locus_tag, ExistingTextOption_prefix_semi); } if (!StringHasNoText(gene->comment)) { SetStringValue(note, gene->comment, ExistingTextOption_prefix_semi); } for (gbq = gene->qual; gbq != NULL; gbq = gbq->next) { if (!StringHasNoText (gbq->val)) { SetStringValue(note, gbq->val, ExistingTextOption_prefix_semi); } } } static void LogCDSConversion (LogInfoPtr lip, SeqFeatPtr sfp, SeqFeatPtr gene, ProtRefPtr prp) { GeneRefPtr grp; CharPtr desc = NULL; CharPtr loc; if (lip == NULL || lip->fp == NULL) { return; } if (gene != NULL && (grp = gene->data.value.ptrvalue) != NULL) { if (!StringHasNoText (grp->locus_tag)) { desc = grp->locus_tag; } else if (!StringHasNoText (grp->locus)) { desc = grp->locus; } } if (desc == NULL && prp != NULL) { if (prp->name != NULL && !StringHasNoText (prp->name->data.ptrvalue)) { desc = prp->name->data.ptrvalue; } } if (desc == NULL) { desc = "Unknown"; } loc = SeqLocPrint (sfp->location); fprintf (lip->fp, "%s CDS at %s converted to misc_feature", desc, loc); loc = MemFree (loc); lip->data_in_log = TRUE; } static void LogrRNAConversion (LogInfoPtr lip, SeqFeatPtr sfp, SeqFeatPtr gene) { GeneRefPtr grp; CharPtr desc = NULL, loc; if (lip == NULL || lip->fp == NULL) { return; } if (gene != NULL && (grp = gene->data.value.ptrvalue) != NULL) { if (!StringHasNoText (grp->locus_tag)) { desc = StringSave(grp->locus_tag); } else if (!StringHasNoText (grp->locus)) { desc = StringSave(grp->locus); } } if (desc == NULL) { desc = GetRNAProductString(sfp, NULL); } if (desc == NULL) { desc = StringSave("unknown"); } loc = SeqLocPrint (sfp->location); fprintf (lip->fp, "%s rRNA at %s converted to misc_feature", desc, loc); loc = MemFree (loc); desc = MemFree (desc); lip->data_in_log = TRUE; } /* for cleaning up bad features identified by validator or asndisc */ NLM_EXTERN void ConvertListToMiscFeat (ValNodePtr list, Boolean remove_gene, LogInfoPtr lip) { ValNodePtr vnp, other_list; SeqFeatPtr sfp, gene; ProtRefPtr prp; BioseqPtr pbsp; ImpFeatPtr ifp; CharPtr rna_name; ValNodePtr entityIDList = NULL; SeqEntryPtr sep; Boolean converted; for (vnp = list; vnp != NULL; vnp = vnp->next) { sfp = (SeqFeatPtr) vnp->data.ptrvalue; gene = GetGeneForFeature(sfp); converted = FALSE; if (sfp->data.choice == SEQFEAT_CDREGION) { prp = GetProtRefForFeature (sfp); LogCDSConversion(lip, sfp, gene, prp); if (prp != NULL && prp->name != NULL && !StringHasNoText (prp->name->data.ptrvalue)) { SetStringValue(&(sfp->comment), prp->name->data.ptrvalue, ExistingTextOption_prefix_semi); } pbsp = BioseqFindFromSeqLoc (sfp->product); if (pbsp != NULL) { pbsp->idx.deleteme = TRUE; } sfp->data.value.ptrvalue = CdRegionFree (sfp->data.value.ptrvalue); sfp->data.choice = SEQFEAT_IMP; ifp = ImpFeatNew(); ifp->key = StringSave("misc_feature"); sfp->data.value.ptrvalue = ifp; sfp->product = SeqLocFree (sfp->product); sfp->idx.subtype = 0; ValNodeAddInt (&entityIDList, 0, sfp->idx.entityID); converted = TRUE; } else if (sfp->data.choice == SEQFEAT_RNA) { LogrRNAConversion(lip, sfp, gene); rna_name = GetRNAProductString(sfp, NULL); SetStringValue(&(sfp->comment), rna_name, ExistingTextOption_prefix_semi); sfp->data.value.ptrvalue = RnaRefFree (sfp->data.value.ptrvalue); sfp->data.choice = SEQFEAT_IMP; ifp = ImpFeatNew(); ifp->key = StringSave("misc_feature"); sfp->data.value.ptrvalue = ifp; sfp->idx.subtype = 0; ValNodeAddInt (&entityIDList, 0, sfp->idx.entityID); converted = TRUE; } if (converted && remove_gene && gene != NULL) { other_list = GetFeaturesForGene(gene, 0); if (ValNodeLen (other_list) < 2) { AddGeneQualifiersToNote(gene, &(sfp->comment)); gene->idx.deleteme = TRUE; if (lip != NULL && lip->fp != NULL) { fprintf (lip->fp, ", gene deleted"); } } other_list = ValNodeFree (other_list); } if (converted && lip != NULL && lip->fp != NULL) { fprintf (lip->fp, "\n"); } } entityIDList = ValNodeSort (entityIDList, SortVnpByInt); ValNodeUnique (&entityIDList, SortVnpByInt, ValNodeFree); for (vnp = entityIDList; vnp != NULL; vnp = vnp->next) { /* remove any protein sequences or genes that were marked for deletion */ DeleteMarkedObjects (vnp->data.intvalue, 0, NULL); sep = GetTopSeqEntryForEntityID(vnp->data.intvalue); RenormalizeNucProtSets (sep, TRUE); SeqMgrIndexFeatures (vnp->data.intvalue, NULL); } entityIDList = ValNodeFree (entityIDList); }