diff options
author | Aaron M. Ucko <ucko@debian.org> | 2009-07-27 02:35:37 +0000 |
---|---|---|
committer | Aaron M. Ucko <ucko@debian.org> | 2009-07-27 02:35:37 +0000 |
commit | cb2452a815dd397299bc41d4ca4339883a4cf19e (patch) | |
tree | 913c0be09727fb828caf8e574469c56dc6ffb262 /network | |
parent | e8f3513c621e07a1d0890d3dab064a122ef9533e (diff) |
[svn-upgrade] Integrating new upstream version, ncbi-tools6 (6.1.20090719)
Diffstat (limited to 'network')
-rwxr-xr-x | network/id1arch/Makefile | 66 | ||||
-rw-r--r-- | network/id1arch/idfetch.c | 210 | ||||
-rw-r--r-- | network/medarch/client/medutil.c | 163 |
3 files changed, 247 insertions, 192 deletions
diff --git a/network/id1arch/Makefile b/network/id1arch/Makefile new file mode 100755 index 00000000..4e9fef63 --- /dev/null +++ b/network/id1arch/Makefile @@ -0,0 +1,66 @@ +include $(NCBI)/ncbi.mk + + +INCLUDES = -I . -I$(NCBI_INCDIR) +CFLAGS=$(CCOPT) $(INCLUDES) -g #-DDEBUG_ASN_PRINT + +LDFLAGS = -L. -L$(NCBI_ALTLIB) -lncbiid1 -lnetcli -lncbitool -lblastcompadj -lncbiobj -lncbi -lm + +CC = gcc -g + +BINARIES = idfetch + +## +## some things to make +## + +all : $(BINARIES) + +idfetch : idfetch.o + $(CC) -o $@ idfetch.o $(LDFLAGS) + +idfetch.pure : idfetch.o + purify $(CC) -o $@ idfetch.o $(LDFLAGS) + +test : test.o + $(CC) -o $@ test.o $(LDFLAGS) + +seqidtest : seqidtest.o + $(CC) -o $@ seqidtest.o $(LDFLAGS) +sample : sample.o + $(CC) -o $@ sample.o $(LDFLAGS) +gi2hash : gi2hash.o id_hash.o + $(CC) -o $@ gi2hash.o id_hash.o $(LDFLAGS) +generate : id1gen.c + +library : libncbiid1.a + +libncbiid1.a : id1gen.o id1arch.o accid1.o + rm -f $@ + ar -q $@ id1gen.o id1arch.o accid1.o + + +#id1gen.c : id1.l id1.h all.l id1map.h +# rm -f id1generr id1genout +# $(CODEGEN) -i id1map.h -l id1.l,all.l -o id1gen -d . > id1genout 2>id1generr + +#id1.l : id1.asn +# rm -f id1.l* +# rm -f id1.h id1stat.h +# $(ASNTOOL) -m id1.asn -l id1.h +# rm -f id1.h +# $(ASNTOOL) -m id1.asn -o id1.h +# mv id1.l* id1.l + +#all.l : +# rm -f all.l* +# $(ASNTOOL) -m $(NCBI_ASNALL) -l all.h +# cp all.l* all.l +# +#id1.h : id1.asn +# $(ASNTOOL) -m id1.asn -o id1.h + + + +clean : + - rm -f *.o diff --git a/network/id1arch/idfetch.c b/network/id1arch/idfetch.c index ce598744..3a62da9e 100644 --- a/network/id1arch/idfetch.c +++ b/network/id1arch/idfetch.c @@ -24,172 +24,9 @@ * =========================================================================== * Author Karl Sirotkin * - $Log: idfetch.c,v $ - Revision 1.42 2008/03/08 03:44:04 ucko - Comment out one more extraneous explicit CONN_STATELESS setting. - - Revision 1.41 2008/03/07 22:12:24 lavr - #if 0 and comment out explicit STATENESS requirement on service - - Revision 1.40 2007/03/27 13:30:10 kans - moved sqnutils.h early to avoid collision with grp1 define - - Revision 1.39 2006/08/02 15:16:04 vysokolo - Added features tRNA and microRNA - - Revision 1.38 2005/05/16 23:18:34 vysokolo - Added features 'HPRD' and 'STS' to the key '-F'. - - Revision 1.37 2005/04/13 14:38:12 kans - prototype for TryGetGi, send NORMAL_STYLE to SeqEntryToGnbk again - - Revision 1.36 2004/10/19 21:51:29 vysokolo - Bug fix of -s key - - Revision 1.35 2004/10/12 21:39:28 vysokolo - Added intervals of accessions like: "ABC_000123-ABC_000456" - - Revision 1.34 2004/10/04 19:30:25 vysokolo - The "strcasecmp" replaced by "StringICmp" - - Revision 1.33 2004/09/30 17:59:26 vysokolo - Added key -F to enable features by name. - - Revision 1.32 2004/05/25 18:41:35 kans - removed obsolete STREAM_SEQ_PORT_FIRST flag - - Revision 1.31 2004/02/18 22:18:45 yaschenk - adding recognition of gnl|sat_name|ent seqids - - Revision 1.30 2004/02/03 21:25:16 yaschenk - relaxing ranges for -g and -e - - Revision 1.29 2003/12/17 20:35:38 kans - initialize status, send NORMAL_STYLE to SeqEntrytoGnbk instead of 0 (also fixed in asn2gnbk), pass lookup flags - - Revision 1.28 2003/11/19 16:35:19 yaschenk - relaxing ranges for -g and -c - - Revision 1.27 2003/03/28 18:48:39 yaschenk - tuning ObjMgr, adding STREAM_SEQ_PORT_FIRST to SeqEntryToGnbk - - Revision 1.26 2003/01/29 23:08:19 yaschenk - fixing FASTA on far pointers - - Revision 1.25 2003/01/21 22:27:23 kans - new CstType parameter for flatfile generator - - Revision 1.24 2002/12/30 22:36:53 yaschenk - optimizing.. - - Revision 1.23 2002/11/07 17:21:55 yaschenk - switching ID1 to new displatcher - - Revision 1.22 2002/07/23 19:31:43 butanaev - Filtered out gi -1 - - Revision 1.21 2001/11/02 14:24:44 kans - made Fasta style SeqId args multi-line for Mac window - - Revision 1.20 2001/11/02 12:36:20 kans - now using public Entrez2 server - - Revision 1.19 2001/09/28 15:56:04 kans - look for extra and title fields in Entrez2 docsum - - Revision 1.18 2001/09/10 21:09:36 kans - changed to use new Entrez2DocsumDataPtr - still need to get example of field_name keys - - Revision 1.17 2001/02/12 21:57:11 butanaev - Made 3 retries to EntrezSynchronousQuery() when the NULL is returned. - - Revision 1.16 2001/02/08 16:13:46 yaschenk - fixing wrong check for missing version in _PIR and SP - - Revision 1.15 2000/10/06 22:59:44 yaschenk - strncpy not setting \0 bug - - Revision 1.14 2000/08/10 15:17:38 butanaev - Updated -t 7 mode: strings like 'gi|3|emb|A00003.1|A00003' retreived from - Entrez2DocsumPtr->caption. - - Revision 1.13 2000/08/03 17:01:23 kans - included ni_lib.h for Mac, removed Mac compiler warnings - - Revision 1.12 2000/08/02 16:55:28 yaschenk - increasing buffer size to 1000 - - Revision 1.11 2000/08/02 16:17:00 butanaev - Added: - -t 7 - to retrieve Entrez DocSums - -Q filename - to read Entrez query from the file - - Revision 1.9 2000/07/13 16:46:54 yaschenk - adding ObjMgrFreeCache(0) to avoid hitting the limit in ObrMgr - - Revision 1.2 2000/06/01 18:05:35 butanaev - Fixed numerous bugs with control flow... - - Revision 1.1 2000/06/01 16:48:22 butanaev - New functionality: - -G parameter, which previously accepted the list of gi's, - now accepts gi,accession,accession.version,fasta seqid, - which can be mixed. - - -q parameter generates the list out of Entrez query - when -q is used -d has special meaning: - -d n - run query against Nucleotide database - -d p - run query against Protein database - - -n parameter limits the output to the list of gi's - - Revision 1.6 2000/05/24 17:30:42 yaschenk - make parameter list look better - - Revision 1.5 2000/05/23 15:42:08 yaschenk - adding quality score display - - Revision 1.4 2000/03/31 18:35:58 yaschenk - Adding Jonathan's logic for FF and FASTA - - Revision 1.3 2000/03/30 20:43:51 yaschenk - adding AsnIoReset between Entries - - Revision 1.2 1999/11/02 18:27:43 yaschenk - adding -G parameter to idfetch - - Revision 1.1 1998/12/28 17:56:29 yaschenk - preparing idfetch to go to production - - Revision 1.1 1997/05/29 14:34:07 sirotkin - syncing sampson from mutant for procs. taking source from sampson. this is now current - - * Revision 4.0 1995/07/26 13:55:55 ostell - * force revision to 4.0 - * - * Revision 1.3 1995/06/21 14:14:29 kans - * replaced asn2ff_entrez with SeqEntryToFlat - * - * Revision 1.2 1995/05/17 17:59:15 epstein - * add RCS log revision history - * - * Revision 1.1 94/08/11 13:26:31 ostell - * Initial revision - * - * Revision 1.3 1993/12/02 10:12:41 kans - * Includes <ncbi.h> instead of <sys/types.h> - * - * Revision 1.2 93/11/24 13:25:56 sirotkin - * First working version - * - * Revision 1.1 93/11/23 16:01:51 sirotkin - * Initial revision - * - revised by OStell for public use. - * - * Modified by Eugene Yaschenko for ID1 Server * */ + #include <ncbi.h> #include <objsset.h> #include <sequtil.h> @@ -273,7 +110,7 @@ int Numarg = sizeof(myargs)/sizeof(myargs[0]); static Nlm_Int2 Nlm_WhichArg PROTO(( Nlm_Char which, Nlm_Int2 numargs, Nlm_ArgPtr ap)); static void MyBioseqToFasta(BioseqPtr bsp, Pointer userdata); -static Boolean CreateMaxPlexParam(); +static Boolean CreateMaxPlexParam(void); static Int4 GetIntervalAccession( const Char* pAccession, Char* pResult); Int4 giBuffer[1000]; @@ -292,7 +129,6 @@ Int2 Main() Boolean has_trouble = FALSE; Int4 entity_spec_count = 0; CharPtr outmode; - Int4 ent = 0; Int4 gi = 0; FILE * fp_in = NULL; SeqIdPtr sip; @@ -509,12 +345,11 @@ Int2 Main() "flaTtened SeqId, format: type(name,accession,release,version) or type=accession", */ - static CharPtr name = NULL, release = NULL, version = NULL, number = NULL; + static CharPtr name = NULL, release = NULL, number = NULL; CharPtr p; static CharPtr PNTR fields [] = {&name, &accession, &release, &number}; Boolean found_equals = FALSE, found_left = FALSE, - found_colon = FALSE, flat_seqid_err = FALSE, - dna_type = FALSE, any_type = FALSE; + found_colon = FALSE, dna_type = FALSE, any_type = FALSE; int dex; TextSeqIdPtr tsip; @@ -622,6 +457,7 @@ Int2 Main() case SEQID_SWISSPROT : case SEQID_OTHER : case SEQID_PRF : + case SEQID_GPIPE : tsip = TextSeqIdNew(); sip->data.ptrvalue = tsip; if(accession) @@ -887,7 +723,9 @@ static Boolean IdFetch_func1(CharPtr data, Int2 maxplex) if((gi = TryGetGi(SEQID_GENBANK, acc, NULL, ver)) || (gi = TryGetGi(SEQID_OTHER, acc, NULL, ver)) || (gi = TryGetGi(SEQID_GENBANK, NULL, acc, ver)) || - (gi = TryGetGi(SEQID_OTHER, NULL, acc, ver))) + (gi = TryGetGi(SEQID_OTHER, NULL, acc, ver)) || + (gi = TryGetGi(SEQID_GPIPE, acc, NULL, ver)) || + (gi = TryGetGi(SEQID_GPIPE, NULL, acc, ver))) return IdFetch_func(gi, myargs[dbarg].strvalue, myargs[entarg].intvalue, @@ -1132,11 +970,11 @@ static Boolean IdFetch_func(Int4 gi,CharPtr db, Int4 ent,Int2 maxplex) switch(myargs[infotypearg].intvalue){ case 0: if(bsp){ - MyBioseqToFasta(bsp,(Pointer)fp); + MyBioseqToFasta(bsp,(Pointer)fp); } else { - VisitBioseqsInSep(sep,(Pointer)fp, MyBioseqToFasta); + VisitBioseqsInSep(sep,(Pointer)fp, MyBioseqToFasta); } break; case 2: @@ -1155,7 +993,9 @@ static Boolean IdFetch_func(Int4 gi,CharPtr db, Int4 ent,Int2 maxplex) DONE: if(bsp) { +#if 0 static Uint2 reap_cnt; +#endif BioseqUnlock(bsp); #if 0 reap_cnt++; @@ -1269,10 +1109,17 @@ static Int4 BEGetUidsFromQuery(CharPtr query, Uint4Ptr PNTR uids, #define FASTA_LINE_SIZE 70 #define FASTA_LINES_IN_CHUNK 5000 +#define SEQPORT_2_SEQSTREAM + static void MyBioseqToFasta(BioseqPtr bsp, Pointer userdata) { +#ifdef SEQPORT_2_SEQSTREAM + SeqInt si; + ValNode vn; +#else SeqPortPtr spp=NULL; +#endif Char buf[2048]; Char str[200]; ValNodePtr vnp; @@ -1291,6 +1138,19 @@ MyBioseqToFasta(BioseqPtr bsp, Pointer userdata) while(start < bsp->length){ stop=start+step-1; if(stop >= bsp->length) stop=bsp->length-1; +#ifdef SEQPORT_2_SEQSTREAM + MemSet ((Pointer) &si, 0, sizeof (SeqInt)); + MemSet ((Pointer) &vn, 0, sizeof (ValNode)); + + si.from = start; si.to = stop; si.strand = 0; + si.id = SeqIdFindBest (bsp->id, 0); + + vn.choice = SEQLOC_INT; + vn.data.ptrvalue = (Pointer) &si; + + SeqLocFastaStream (&vn, fp, STREAM_ALLOW_NEG_GIS | + STREAM_EXPAND_GAPS | SUPPRESS_VIRT_SEQ, FASTA_LINE_SIZE, 0, 0); +#else spp = SeqPortNew(bsp,start,stop,0, (ISA_na(bsp->mol))?Seq_code_iupacna:Seq_code_ncbieaa); if(spp==NULL) return; SeqPortSet_do_virtual(spp, TRUE); @@ -1302,8 +1162,12 @@ MyBioseqToFasta(BioseqPtr bsp, Pointer userdata) SeqPortFree(spp); spp=NULL; } +#endif start=stop+1; } + SeqMgrFreeCache(); + ObjMgrReap(ObjMgrGet()); + ObjMgrFreeCache(0); } /* @@ -1328,7 +1192,7 @@ select * from annot_types; 9 "microRNA" efff 13 */ -Boolean CreateMaxPlexParam() +static Boolean CreateMaxPlexParam(void) { Char buf[1024]; Char *ptoken = NULL; diff --git a/network/medarch/client/medutil.c b/network/medarch/client/medutil.c index ff59c9fd..0421f034 100644 --- a/network/medarch/client/medutil.c +++ b/network/medarch/client/medutil.c @@ -28,7 +28,7 @@ * * Version Creation Date: 8/31/93 * -* $Revision: 6.23 $ +* $Revision: 6.24 $ * * File Description: Medline Utilities for MedArch * Assumes user calls MedArchInit and Fini @@ -44,6 +44,9 @@ * * RCS Modification History: * $Log: medutil.c,v $ +* Revision 6.24 2009/06/19 19:27:29 bazhin +* Added support for multiple consortium names. +* * Revision 6.23 2007/12/04 23:29:22 bazhin * MergePubIds() renamed to MergeNonPubmedPubIds(). Merging is * limited to types DOI and OTHER only. @@ -396,11 +399,18 @@ static Boolean ten_authors_compare(CitArtPtr capold, CitArtPtr capnew) Boolean ten_authors(CitArtPtr art, CitArtPtr art_tmp) { NameStdPtr namestd; + ValNodePtr oldcon; + ValNodePtr newcon; + ValNodePtr tvnp; + ValNodePtr vnp; ValNodePtr v; AuthorPtr aup; + CharPtr oldbuf; + CharPtr newbuf; CharPtr mu[10]; - CharPtr oldcon; - CharPtr newcon; + CharPtr p; + Int4 oldlen; + Int4 newlen; Int2 num; Int2 numnew; Int2 numtmp; @@ -427,24 +437,124 @@ Boolean ten_authors(CitArtPtr art, CitArtPtr art_tmp) if(art->authors->choice != 1) return(ten_authors_compare(art, art_tmp)); + oldbuf = NULL; oldcon = NULL; + oldlen = 1; for(num = 0, v = art->authors->names; v != NULL; v = v->next) { aup = v->data.ptrvalue; if(aup->name->choice == 2) num++; else if(aup->name->choice == 5) - oldcon = aup->name->data; + { + p = aup->name->data; + oldlen += (StringLen(p) + 2); + if(oldcon == NULL) + { + oldcon = ValNodeNew(NULL); + oldcon->data.ptrvalue = p; + continue; + } + + for(vnp = oldcon; vnp != NULL; vnp = vnp->next) + { + if(StringICmp(p, vnp->data.ptrvalue) <= 0) + { + if(vnp == oldcon) + { + oldcon = ValNodeNew(NULL); + oldcon->data.ptrvalue = p; + oldcon->next = vnp; + } + else + { + tvnp = ValNodeNew(NULL); + tvnp->data.ptrvalue = vnp->data.ptrvalue; + vnp->data.ptrvalue = p; + tvnp->next = vnp->next; + vnp->next = tvnp; + } + break; + } + if(vnp->next == NULL) + { + vnp->next = ValNodeNew(NULL); + vnp->next->data.ptrvalue = p; + break; + } + } + } + } + if(oldcon != NULL) + { + oldbuf = MemNew(oldlen); + oldbuf[0] = '\0'; + for(vnp = oldcon; vnp != NULL; vnp = vnp->next) + { + if(oldbuf[0] != '\0') + StringCat(oldbuf, "; "); + StringCat(oldbuf, vnp->data.ptrvalue); + } } + newbuf = NULL; newcon = NULL; + newlen = 1; for(numtmp = 0, v = art_tmp->authors->names; v != NULL; v = v->next) { aup = v->data.ptrvalue; if(aup->name->choice == 2) numtmp++; else if(aup->name->choice == 5) - newcon = aup->name->data; + { + p = aup->name->data; + newlen += (StringLen(p) + 2); + if(newcon == NULL) + { + newcon = ValNodeNew(NULL); + newcon->data.ptrvalue = p; + continue; + } + + for(vnp = newcon; vnp != NULL; vnp = vnp->next) + { + if(StringICmp(p, vnp->data.ptrvalue) <= 0) + { + if(vnp == newcon) + { + newcon = ValNodeNew(NULL); + newcon->data.ptrvalue = p; + newcon->next = vnp; + } + else + { + tvnp = ValNodeNew(NULL); + tvnp->data.ptrvalue = vnp->data.ptrvalue; + vnp->data.ptrvalue = p; + tvnp->next = vnp->next; + vnp->next = tvnp; + } + break; + } + if(vnp->next == NULL) + { + vnp->next = ValNodeNew(NULL); + vnp->next->data.ptrvalue = p; + break; + } + } + } + } + if(newcon != NULL) + { + newbuf = MemNew(newlen); + newbuf[0] = '\0'; + for(vnp = newcon; vnp != NULL; vnp = vnp->next) + { + if(newbuf[0] != '\0') + StringCat(newbuf, "; "); + StringCat(newbuf, vnp->data.ptrvalue); + } } if(oldcon != NULL) @@ -453,27 +563,42 @@ Boolean ten_authors(CitArtPtr art, CitArtPtr art_tmp) { ErrPostEx(SEV_WARNING, ERR_REFERENCE_NoConsortAuthors, "Publication as returned by MedArch lacks consortium authors of the original publication: \"%s\".", - oldcon); - aup = AuthorNew(); - aup->name = PersonIdNew(); - aup->name->choice = 5; - aup->name->data = StringSave(oldcon); - v = ValNodeNew(NULL); - v->data.ptrvalue = aup; - v->next = art_tmp->authors->names; - art_tmp->authors->names = v; - newcon = oldcon; + oldbuf); + for(vnp = oldcon;; vnp = vnp->next) + { + aup = AuthorNew(); + aup->name = PersonIdNew(); + aup->name->choice = 5; + aup->name->data = StringSave(vnp->data.ptrvalue); + vnp->data.ptrvalue = aup; + if(vnp->next == NULL) + break; + } + vnp->next = art_tmp->authors->names; + art_tmp->authors->names = oldcon; } - else if(StringICmp(oldcon, newcon) != 0) + else { - ErrPostEx(SEV_WARNING, ERR_REFERENCE_DiffConsortAuthors, - "Consortium author names differ. Original is \"%s\". MedArch's is \"%s\".", - oldcon, newcon); + if(StringICmp(oldbuf, newbuf) != 0) + ErrPostEx(SEV_WARNING, ERR_REFERENCE_DiffConsortAuthors, + "Consortium author names differ. Original is \"%s\". MedArch's is \"%s\".", + oldbuf, newbuf); + MemFree(newbuf); + newbuf = NULL; + ValNodeFree(oldcon); + ValNodeFree(newcon); + newcon = NULL; } + MemFree(oldbuf); if(num == 0) return(TRUE); } + if(newcon != NULL) + ValNodeFree(newcon); + if(newbuf != NULL) + MemFree(newbuf); + numnew = 0; for(v = art_tmp->authors->names; v != NULL && numnew < 10; v = v->next) { |