summaryrefslogtreecommitdiff
path: root/access/tax3api.c
diff options
context:
space:
mode:
Diffstat (limited to 'access/tax3api.c')
-rw-r--r--access/tax3api.c416
1 files changed, 290 insertions, 126 deletions
diff --git a/access/tax3api.c b/access/tax3api.c
index 8b57085c..89f94c77 100644
--- a/access/tax3api.c
+++ b/access/tax3api.c
@@ -29,7 +29,7 @@
*
* Version Creation Date: 7/8/04
*
-* $Revision: 1.46 $
+* $Revision: 1.59 $
*
* File Description:
*
@@ -64,6 +64,30 @@ static const CharPtr tax3servicename = "TaxService3";
static const CharPtr tax3servicename = "TaxService3Test";
#endif
+static void Tax3ReplyFixup (
+ Taxon3ReplyPtr t3ry
+)
+
+{
+ OrgNamePtr onp;
+ OrgRefPtr orp;
+ T3DataPtr tdp;
+ T3ReplyPtr trp;
+
+ if (t3ry == NULL) return;
+
+ for (trp = t3ry->reply; trp != NULL; trp = trp->next) {
+ if (trp->choice != T3Reply_data) continue;
+ tdp = (T3DataPtr) trp->data.ptrvalue;
+ if (tdp == NULL) continue;
+ orp = (OrgRefPtr) tdp->org;
+ if (orp == NULL) continue;
+ onp = orp->orgname;
+ if (onp == NULL) continue;
+ onp->pgcode = GetSpecialPlastidGenCode (orp->taxname, onp->lineage);
+ }
+}
+
NLM_EXTERN CONN Tax3OpenConnection (
void
)
@@ -126,6 +150,7 @@ NLM_EXTERN Taxon3ReplyPtr Tax3WaitForReply (
if (status == eIO_Success) {
aicp = QUERY_AsnIoConnOpen (text_tax_asn ? "r" : "rb", conn);
t3ry = Taxon3ReplyAsnRead (aicp->aip, NULL);
+ Tax3ReplyFixup (t3ry);
QUERY_AsnIoConnClose (aicp);
}
CONN_Close (conn);
@@ -143,6 +168,7 @@ NLM_EXTERN Taxon3ReplyPtr Tax3SynchronousQuery (
AsnIoConnPtr aicp;
CONN conn;
Taxon3ReplyPtr t3ry;
+ time_t t1, t2, t3;
if (t3rq == NULL) return NULL;
@@ -159,7 +185,10 @@ NLM_EXTERN Taxon3ReplyPtr Tax3SynchronousQuery (
QUERY_SendQuery (conn);
+ t1 = time(NULL);
t3ry = Tax3WaitForReply (conn);
+ t2 = time(NULL);
+ t3 = t2 - t1;
return t3ry;
}
@@ -215,6 +244,7 @@ NLM_EXTERN Taxon3ReplyPtr Tax3ReadReply (
if (conn != NULL && status == eIO_Success) {
aicp = QUERY_AsnIoConnOpen (text_tax_asn ? "r" : "rb", conn);
t3ry = Taxon3ReplyAsnRead (aicp->aip, NULL);
+ Tax3ReplyFixup (t3ry);
QUERY_AsnIoConnClose (aicp);
}
return t3ry;
@@ -246,6 +276,20 @@ NLM_EXTERN Taxon3RequestPtr CreateTaxon3Request (
return t2rp;
}
+
+static void SaveTaxon3Request (Taxon3RequestPtr t3rp, CharPtr path)
+{
+ AsnIoPtr aip;
+
+ if (t3rp != NULL) {
+ aip = AsnIoOpen (path, "w");
+ if (aip != NULL) {
+ Taxon3RequestAsnWrite (t3rp, aip, NULL);
+ AsnIoClose (aip);
+ }
+ }
+}
+
NLM_EXTERN Taxon3RequestPtr CreateMultiTaxon3Request (ValNodePtr org_list)
{
ValNodePtr vnp;
@@ -259,20 +303,40 @@ NLM_EXTERN Taxon3RequestPtr CreateMultiTaxon3Request (ValNodePtr org_list)
{
switch (vnp->choice)
{
- case 1:
- ValNodeAddInt (&(t3rp->request), 1, vnp->data.intvalue);
+ case T3Request_taxid:
+ ValNodeAddInt (&(t3rp->request), T3Request_taxid, vnp->data.intvalue);
break;
- case 2:
- ValNodeCopyStr (&(t3rp->request), 2, vnp->data.ptrvalue);
+ case T3Request_name:
+ ValNodeCopyStr (&(t3rp->request), T3Request_name, vnp->data.ptrvalue);
break;
- case 3:
+ case T3Request_org:
orp = AsnIoMemCopy (vnp->data.ptrvalue,
(AsnReadFunc) OrgRefAsnRead,
(AsnWriteFunc) OrgRefAsnWrite);
- ValNodeAddPointer (&(t3rp->request), 3, (Pointer) orp);
+ ValNodeAddPointer (&(t3rp->request), T3Request_org, (Pointer) orp);
break;
}
}
+
+ /* SaveTaxon3Request(t3rp, "request.txt"); */
+ return t3rp;
+}
+
+/* takes ValNode list of integers, creates request */
+NLM_EXTERN Taxon3RequestPtr CreateJoinRequest (ValNodePtr taxon_list)
+{
+ Taxon3RequestPtr t3rp;
+ ValNodePtr vnp, data = NULL;
+
+ t3rp = Taxon3RequestNew();
+ if (t3rp == NULL) return NULL;
+
+ for (vnp = taxon_list; vnp != NULL; vnp = vnp->next) {
+ ValNodeAddInt (&data, T3Request_join, vnp->data.intvalue);
+ }
+ ValNodeAddPointer ((&t3rp->request), T3Request_join, data);
+
+/* SaveTaxon3Request(t3rp, "join_request.txt"); */
return t3rp;
}
@@ -293,6 +357,29 @@ static Boolean HasMisspellingFlag (T3DataPtr t)
}
+static Uint1 GetStatusFlags (T3DataPtr t)
+{
+ Uint1 flags = 0;
+ T3StatusFlagsPtr status;
+
+ if (t == NULL) return FALSE;
+ status = t->status;
+ while (status != NULL) {
+ if (StringCmp (status->property, "unpublished_name") == 0) {
+ flags |= eReturnedOrgFlag_unpublished;
+ } else if (StringCmp (status->property, "misspelled_name") == 0) {
+ flags |= eReturnedOrgFlag_misspelled;
+ }
+ status = status->next;
+ }
+ if (flags == 0) {
+ flags = eReturnedOrgFlag_normal;
+ }
+ return flags;
+}
+
+
+
static int LIBCALLBACK SortVnpByOrgRef (VoidPtr ptr1, VoidPtr ptr2)
{
@@ -323,11 +410,15 @@ NLM_EXTERN ValNodePtr Taxon3GetOrgRefList (ValNodePtr org_list)
ValNodePtr PNTR ptr_array;
ValNodePtr vnp, vnp_rq, vnp_rp;
Int4 i, num_orgs;
+ Uint1 choice;
+ TextFsaPtr tags;
if (org_list == NULL) {
return NULL;
}
+ tags = GetOrgModSearch();
+
/* make a copy of the original list - we will prepare the response list by substituting the OrgRef */
org_list = ValNodeCopyPtr (org_list);
@@ -383,11 +474,9 @@ NLM_EXTERN ValNodePtr Taxon3GetOrgRefList (ValNodePtr org_list)
tdp = (T3DataPtr) trp->data.ptrvalue;
if (tdp != NULL) {
t3orp = (OrgRefPtr)(tdp->org);
- if (HasMisspellingFlag (tdp)) {
- ValNodeAddPointer (&response_list, eReturnedOrgFlag_misspelled, (Pointer) t3orp);
- } else {
- ValNodeAddPointer (&response_list, eReturnedOrgFlag_normal, (Pointer) t3orp);
- }
+ choice = GetStatusFlags (tdp);
+ ParseTaxNameToQuals(t3orp, tags);
+ ValNodeAddPointer (&response_list, choice, (Pointer) t3orp);
tdp->org = NULL;
}
break;
@@ -439,6 +528,7 @@ NLM_EXTERN ValNodePtr Taxon3GetOrgRefList (ValNodePtr org_list)
ptr_array[num_orgs - 1]->next = NULL;
org_list = ptr_array[0];
ptr_array = MemFree (ptr_array);
+ tags = TextFsaFree (tags);
return org_list;
}
@@ -697,10 +787,12 @@ static void CheckSuggestedFixes (ValNodePtr tax_fix_list)
}
if (t != NULL) {
t->suggested_fix = MemFree (t->suggested_fix);
- t->suggested_fix = vnp_rp->data.ptrvalue;
- vnp_rp->data.ptrvalue = NULL;
- vnp_rq = vnp_rq->next;
- vnp_rp = vnp_rp->next;
+ if (vnp_rq != NULL) {
+ t->suggested_fix = vnp_rp->data.ptrvalue;
+ vnp_rp->data.ptrvalue = NULL;
+ vnp_rq = vnp_rq->next;
+ vnp_rp = vnp_rp->next;
+ }
}
}
rp_list = ValNodeFreeData (rp_list);
@@ -866,7 +958,7 @@ NLM_EXTERN OrgRefPtr Taxon3GetOrg (OrgRefPtr orp)
OrgRefPtr t3orp = NULL;
T3ReplyPtr trp;
T3ErrorPtr tep;
-
+
if (orp == NULL) return NULL;
t3rq = CreateTaxon3Request (0, NULL, orp);
@@ -903,7 +995,7 @@ static Boolean DoOrgIdsMatch(BioSourcePtr b1, BioSourcePtr b2)
{
DbtagPtr d1 = NULL, d2 = NULL;
ValNodePtr vnp;
-
+
if (b1 == NULL || b2 == NULL)
{
return FALSE;
@@ -923,21 +1015,21 @@ static Boolean DoOrgIdsMatch(BioSourcePtr b1, BioSourcePtr b2)
for (vnp = b2->org->db; vnp; vnp = vnp->next)
{
d2 = (DbtagPtr) vnp->data.ptrvalue;
- if (StringCmp(d2->db, "taxon") == 0)
- {
+ if (StringCmp(d2->db, "taxon") == 0)
+ {
break;
- }
+ }
}
if (d1 && d2)
{
- if (d1->tag->id == d2->tag->id)
- {
+ if (d1->tag->id == d2->tag->id)
+ {
return TRUE;
- }
+ }
}
else if (StringICmp(b1->org->taxname, b2->org->taxname) == 0)
{
- return TRUE;
+ return TRUE;
}
return FALSE;
}
@@ -946,17 +1038,17 @@ static BioSourcePtr Tax3BioSourceMerge(BioSourcePtr host, BioSourcePtr guest)
{
SubSourcePtr ssp, sp, last_ssp;
OrgModPtr omp, homp, last_omp;
- OrgNamePtr onp;
-
+ OrgNamePtr onp;
+
if (host == NULL && guest == NULL)
{
return NULL;
}
if (host == NULL && guest != NULL)
{
- host = AsnIoMemCopy(guest, (AsnReadFunc) BioSourceAsnRead,
- (AsnWriteFunc) BioSourceAsnWrite);
- return host;
+ host = AsnIoMemCopy(guest, (AsnReadFunc) BioSourceAsnRead,
+ (AsnWriteFunc) BioSourceAsnWrite);
+ return host;
}
if (host != NULL && guest == NULL)
{
@@ -973,12 +1065,12 @@ static BioSourcePtr Tax3BioSourceMerge(BioSourcePtr host, BioSourcePtr guest)
last_ssp = host->subtype;
while (last_ssp != NULL && last_ssp->next != NULL)
{
- last_ssp = last_ssp->next;
+ last_ssp = last_ssp->next;
}
for (ssp = guest->subtype; ssp; ssp = ssp->next)
{
sp = AsnIoMemCopy(ssp, (AsnReadFunc) SubSourceAsnRead,
- (AsnWriteFunc) SubSourceAsnWrite);
+ (AsnWriteFunc) SubSourceAsnWrite);
if (last_ssp == NULL)
{
host->subtype = sp;
@@ -991,12 +1083,12 @@ static BioSourcePtr Tax3BioSourceMerge(BioSourcePtr host, BioSourcePtr guest)
}
if (guest->org->orgname)
{
- if ((onp = host->org->orgname) == NULL)
- {
- onp = OrgNameNew();
- host->org->orgname = onp;
- }
- last_omp = onp->mod;
+ if ((onp = host->org->orgname) == NULL)
+ {
+ onp = OrgNameNew();
+ host->org->orgname = onp;
+ }
+ last_omp = onp->mod;
while (last_omp != NULL && last_omp->next != NULL)
{
last_omp = last_omp->next;
@@ -1004,15 +1096,15 @@ static BioSourcePtr Tax3BioSourceMerge(BioSourcePtr host, BioSourcePtr guest)
for (omp = guest->org->orgname->mod; omp; omp = omp->next)
{
homp = AsnIoMemCopy(omp, (AsnReadFunc) OrgModAsnRead,
- (AsnWriteFunc) OrgModAsnWrite);
+ (AsnWriteFunc) OrgModAsnWrite);
if (last_omp == NULL)
{
- onp->mod = homp;
+ onp->mod = homp;
}
else
{
- last_omp->next = homp;
- last_omp = homp;
+ last_omp->next = homp;
+ last_omp = homp;
}
}
}
@@ -1021,101 +1113,101 @@ static BioSourcePtr Tax3BioSourceMerge(BioSourcePtr host, BioSourcePtr guest)
/**************************************************************************
-* Compare BioSources in one bioseq->descr using Taxonomy to find
-* their join parent
-* merge if organisms are the same or create a feature if different
+* Compare BioSources in one bioseq->descr using Taxonomy to find
+* their join parent
+* merge if organisms are the same or create a feature if different
*
**************************************************************************/
NLM_EXTERN void Tax3MergeSourceDescr (SeqEntryPtr sep, Pointer data, Int4 index, Int2 indent)
{
- BioseqPtr bsp = NULL;
- ValNodePtr vnp, newlist;
- SeqFeatPtr sfp;
- BioSourcePtr first_biop = NULL;
- BioSourcePtr other_biop;
- BioSourcePtr tmp_biop;
- ObjValNodePtr ovp;
-
- if (!IS_Bioseq(sep)) {
- return;
- }
- newlist = (ValNodePtr) data;
- bsp = (BioseqPtr) sep->data.ptrvalue;
- if ((bsp->repr != Seq_repr_raw) && (bsp->repr != Seq_repr_const)
- && (bsp->repr != Seq_repr_delta))
- return;
-
- if (! ISA_na(bsp->mol))
- return;
-
- /* add the descriptors in newlist to the end of the list in bsp->descr*/
- if (bsp->descr == NULL)
- {
- bsp->descr = newlist;
- }
- else
- {
- for (vnp = bsp->descr; vnp->next != NULL; vnp = vnp->next)
- {
- }
- vnp->next = newlist;
- }
-
- /* now find the first source descriptor in bsp->descr that has an org*/
+ BioseqPtr bsp = NULL;
+ ValNodePtr vnp, newlist;
+ SeqFeatPtr sfp;
+ BioSourcePtr first_biop = NULL;
+ BioSourcePtr other_biop;
+ BioSourcePtr tmp_biop;
+ ObjValNodePtr ovp;
+
+ if (!IS_Bioseq(sep)) {
+ return;
+ }
+ newlist = (ValNodePtr) data;
+ bsp = (BioseqPtr) sep->data.ptrvalue;
+ if ((bsp->repr != Seq_repr_raw) && (bsp->repr != Seq_repr_const)
+ && (bsp->repr != Seq_repr_delta))
+ return;
+
+ if (! ISA_na(bsp->mol))
+ return;
+
+ /* add the descriptors in newlist to the end of the list in bsp->descr*/
+ if (bsp->descr == NULL)
+ {
+ bsp->descr = newlist;
+ }
+ else
+ {
+ for (vnp = bsp->descr; vnp->next != NULL; vnp = vnp->next)
+ {
+ }
+ vnp->next = newlist;
+ }
+
+ /* now find the first source descriptor in bsp->descr that has an org*/
/* note - we can't use SeqMgrGetNextDescriptor here because we have just
* added to the descriptors, so they are not indexed. */
- for (vnp = bsp->descr; vnp != NULL; vnp = vnp->next)
- {
- if (vnp->choice != Seq_descr_source) continue;
- if (vnp->data.ptrvalue == NULL)
- {
- ErrPostStr(SEV_WARNING, 0, 0, "Source descriptor missing data");
- if (vnp->extended)
- {
- ovp = (ObjValNodePtr) vnp;
- ovp->idx.deleteme = TRUE;
- }
- }
- if (first_biop == NULL)
- {
- first_biop = vnp->data.ptrvalue;
- }
- else
- {
- other_biop = vnp->data.ptrvalue;
- /* detach biosource pointer from descr, so that it will not be freed
- * when the descriptor is deleted.
- */
- vnp->data.ptrvalue = NULL;
+ for (vnp = bsp->descr; vnp != NULL; vnp = vnp->next)
+ {
+ if (vnp->choice != Seq_descr_source) continue;
+ if (vnp->data.ptrvalue == NULL)
+ {
+ ErrPostStr(SEV_WARNING, 0, 0, "Source descriptor missing data");
+ if (vnp->extended)
+ {
+ ovp = (ObjValNodePtr) vnp;
+ ovp->idx.deleteme = TRUE;
+ }
+ }
+ if (first_biop == NULL)
+ {
+ first_biop = vnp->data.ptrvalue;
+ }
+ else
+ {
+ other_biop = vnp->data.ptrvalue;
+ /* detach biosource pointer from descr, so that it will not be freed
+ * when the descriptor is deleted.
+ */
+ vnp->data.ptrvalue = NULL;
if (vnp->extended)
{
ovp = (ObjValNodePtr) vnp;
- ovp->idx.deleteme = TRUE;
+ ovp->idx.deleteme = TRUE;
}
if (DoOrgIdsMatch(first_biop, other_biop))
- {
- /* merge the two sources */
- tmp_biop = Tax3BioSourceMerge(first_biop, other_biop);
- if (tmp_biop == NULL)
- {
- ErrPostStr (SEV_WARNING, 0, 0, "Failed to merge biosources");
- }
- else
- {
- first_biop = tmp_biop;
- }
- other_biop = BioSourceFree (other_biop);
- } else {
- /* create a source feature */
- sfp = CreateNewFeatureOnBioseq (bsp, SEQFEAT_BIOSRC, NULL);
- if (sfp != NULL)
- {
+ {
+ /* merge the two sources */
+ tmp_biop = Tax3BioSourceMerge(first_biop, other_biop);
+ if (tmp_biop == NULL)
+ {
+ ErrPostStr (SEV_WARNING, 0, 0, "Failed to merge biosources");
+ }
+ else
+ {
+ first_biop = tmp_biop;
+ }
+ other_biop = BioSourceFree (other_biop);
+ } else {
+ /* create a source feature */
+ sfp = CreateNewFeatureOnBioseq (bsp, SEQFEAT_BIOSRC, NULL);
+ if (sfp != NULL)
+ {
sfp->data.value.ptrvalue = other_biop;
- }
+ }
}
- }
- }
- return;
+ }
+ }
+ return;
}
static Int4 GetTaxIdFromOrgRef (OrgRefPtr orp)
@@ -1187,7 +1279,7 @@ static void AddBioSourceToList (BioSourcePtr biop, Pointer userdata)
ValNodeAddPointer (list, 4, (Pointer) biop);
}
-NLM_EXTERN void Taxon3ReplaceOrgInSeqEntry (SeqEntryPtr sep, Boolean keep_syn)
+NLM_EXTERN void Taxon3ReplaceOrgInSeqEntryEx (SeqEntryPtr sep, Boolean keep_syn, Boolean replace_unpub)
{
ValNodePtr biop_list = NULL;
ValNodePtr request_list = NULL;
@@ -1218,7 +1310,8 @@ NLM_EXTERN void Taxon3ReplaceOrgInSeqEntry (SeqEntryPtr sep, Boolean keep_syn)
biop = (BioSourcePtr) biop_vnp->data.ptrvalue;
swap_org = biop->org;
response_org = response_vnp->data.ptrvalue;
- if (response_org != NULL)
+ if (response_org != NULL
+ && (replace_unpub || !(response_vnp->choice & eReturnedOrgFlag_unpublished)))
{
biop->org = response_org;
response_vnp->data.ptrvalue = NULL;
@@ -1235,6 +1328,12 @@ NLM_EXTERN void Taxon3ReplaceOrgInSeqEntry (SeqEntryPtr sep, Boolean keep_syn)
}
+NLM_EXTERN void Taxon3ReplaceOrgInSeqEntry (SeqEntryPtr sep, Boolean keep_syn)
+{
+ Taxon3ReplaceOrgInSeqEntryEx (sep, keep_syn, TRUE);
+}
+
+
static void GetBioSourceFeaturesForCheck (SeqFeatPtr sfp, Pointer userdata)
{
ValNodePtr PNTR list = (ValNodePtr PNTR) userdata;
@@ -2673,3 +2772,68 @@ NLM_EXTERN ValNodePtr GetOrganismTaxLookupFailuresInSeqEntry (SeqEntryPtr sep)
return failed_list;
}
+
+static void CollectTaxIds (BioSourcePtr biop, Pointer data)
+{
+ ValNodePtr vnp;
+ DbtagPtr dbtag;
+
+ if (biop == NULL || biop->org == NULL || data == NULL) {
+ return;
+ }
+ for (vnp = biop->org->db; vnp != NULL; vnp = vnp->next) {
+ dbtag = (DbtagPtr) vnp->data.ptrvalue;
+ if (dbtag != NULL && StringCmp ("taxon", dbtag->db) == 0 && dbtag->tag->id > 0) {
+ ValNodeAddInt ((ValNodePtr PNTR) data, 0, dbtag->tag->id);
+ }
+ }
+}
+
+
+NLM_EXTERN OrgRefPtr GetCommonOrgRefForSeqEntry (SeqEntryPtr sep)
+{
+ ValNodePtr list = NULL;
+ Taxon3RequestPtr t3rq;
+ T3ReplyPtr trp;
+ Taxon3ReplyPtr t3ry;
+ T3DataPtr tdp;
+ T3ErrorPtr tep;
+ OrgRefPtr org = NULL;
+
+ VisitBioSourcesInSep (sep, &list, CollectTaxIds);
+ if (list == NULL) {
+ ErrPostEx (SEV_ERROR, 0, 0, "No tax IDs found - cannot create PopSet Title");
+ return NULL;
+ }
+ ValNodeUnique (&list, SortByIntvalue, ValNodeFree);
+
+ t3rq = CreateJoinRequest (list);
+ list = ValNodeFree (list);
+
+ t3ry = Tax3SynchronousQuery (t3rq);
+ Taxon3RequestFree (t3rq);
+ if (t3ry != NULL) {
+ for (trp = t3ry->reply; trp != NULL; trp = trp->next) {
+ switch (trp->choice) {
+ case T3Reply_error :
+ tep = (T3ErrorPtr) trp->data.ptrvalue;
+ if (tep != NULL) {
+ ErrPostEx (SEV_ERROR, 0, 0, tep->message);
+ }
+ break;
+ case T3Reply_data :
+ tdp = (T3DataPtr) trp->data.ptrvalue;
+ if (tdp != NULL) {
+ org = (OrgRefPtr)(tdp->org);
+ tdp->org = NULL;
+ }
+ break;
+ default :
+ break;
+ }
+ }
+ Taxon3ReplyFree (t3ry);
+ }
+ return org;
+}
+