summaryrefslogtreecommitdiff
path: root/network
diff options
context:
space:
mode:
authorAaron M. Ucko <ucko@debian.org>2009-07-27 02:35:37 +0000
committerAaron M. Ucko <ucko@debian.org>2009-07-27 02:35:37 +0000
commitcb2452a815dd397299bc41d4ca4339883a4cf19e (patch)
tree913c0be09727fb828caf8e574469c56dc6ffb262 /network
parente8f3513c621e07a1d0890d3dab064a122ef9533e (diff)
[svn-upgrade] Integrating new upstream version, ncbi-tools6 (6.1.20090719)
Diffstat (limited to 'network')
-rwxr-xr-xnetwork/id1arch/Makefile66
-rw-r--r--network/id1arch/idfetch.c210
-rw-r--r--network/medarch/client/medutil.c163
3 files changed, 247 insertions, 192 deletions
diff --git a/network/id1arch/Makefile b/network/id1arch/Makefile
new file mode 100755
index 00000000..4e9fef63
--- /dev/null
+++ b/network/id1arch/Makefile
@@ -0,0 +1,66 @@
+include $(NCBI)/ncbi.mk
+
+
+INCLUDES = -I . -I$(NCBI_INCDIR)
+CFLAGS=$(CCOPT) $(INCLUDES) -g #-DDEBUG_ASN_PRINT
+
+LDFLAGS = -L. -L$(NCBI_ALTLIB) -lncbiid1 -lnetcli -lncbitool -lblastcompadj -lncbiobj -lncbi -lm
+
+CC = gcc -g
+
+BINARIES = idfetch
+
+##
+## some things to make
+##
+
+all : $(BINARIES)
+
+idfetch : idfetch.o
+ $(CC) -o $@ idfetch.o $(LDFLAGS)
+
+idfetch.pure : idfetch.o
+ purify $(CC) -o $@ idfetch.o $(LDFLAGS)
+
+test : test.o
+ $(CC) -o $@ test.o $(LDFLAGS)
+
+seqidtest : seqidtest.o
+ $(CC) -o $@ seqidtest.o $(LDFLAGS)
+sample : sample.o
+ $(CC) -o $@ sample.o $(LDFLAGS)
+gi2hash : gi2hash.o id_hash.o
+ $(CC) -o $@ gi2hash.o id_hash.o $(LDFLAGS)
+generate : id1gen.c
+
+library : libncbiid1.a
+
+libncbiid1.a : id1gen.o id1arch.o accid1.o
+ rm -f $@
+ ar -q $@ id1gen.o id1arch.o accid1.o
+
+
+#id1gen.c : id1.l id1.h all.l id1map.h
+# rm -f id1generr id1genout
+# $(CODEGEN) -i id1map.h -l id1.l,all.l -o id1gen -d . > id1genout 2>id1generr
+
+#id1.l : id1.asn
+# rm -f id1.l*
+# rm -f id1.h id1stat.h
+# $(ASNTOOL) -m id1.asn -l id1.h
+# rm -f id1.h
+# $(ASNTOOL) -m id1.asn -o id1.h
+# mv id1.l* id1.l
+
+#all.l :
+# rm -f all.l*
+# $(ASNTOOL) -m $(NCBI_ASNALL) -l all.h
+# cp all.l* all.l
+#
+#id1.h : id1.asn
+# $(ASNTOOL) -m id1.asn -o id1.h
+
+
+
+clean :
+ - rm -f *.o
diff --git a/network/id1arch/idfetch.c b/network/id1arch/idfetch.c
index ce598744..3a62da9e 100644
--- a/network/id1arch/idfetch.c
+++ b/network/id1arch/idfetch.c
@@ -24,172 +24,9 @@
* ===========================================================================
* Author Karl Sirotkin
*
- $Log: idfetch.c,v $
- Revision 1.42 2008/03/08 03:44:04 ucko
- Comment out one more extraneous explicit CONN_STATELESS setting.
-
- Revision 1.41 2008/03/07 22:12:24 lavr
- #if 0 and comment out explicit STATENESS requirement on service
-
- Revision 1.40 2007/03/27 13:30:10 kans
- moved sqnutils.h early to avoid collision with grp1 define
-
- Revision 1.39 2006/08/02 15:16:04 vysokolo
- Added features tRNA and microRNA
-
- Revision 1.38 2005/05/16 23:18:34 vysokolo
- Added features 'HPRD' and 'STS' to the key '-F'.
-
- Revision 1.37 2005/04/13 14:38:12 kans
- prototype for TryGetGi, send NORMAL_STYLE to SeqEntryToGnbk again
-
- Revision 1.36 2004/10/19 21:51:29 vysokolo
- Bug fix of -s key
-
- Revision 1.35 2004/10/12 21:39:28 vysokolo
- Added intervals of accessions like: "ABC_000123-ABC_000456"
-
- Revision 1.34 2004/10/04 19:30:25 vysokolo
- The "strcasecmp" replaced by "StringICmp"
-
- Revision 1.33 2004/09/30 17:59:26 vysokolo
- Added key -F to enable features by name.
-
- Revision 1.32 2004/05/25 18:41:35 kans
- removed obsolete STREAM_SEQ_PORT_FIRST flag
-
- Revision 1.31 2004/02/18 22:18:45 yaschenk
- adding recognition of gnl|sat_name|ent seqids
-
- Revision 1.30 2004/02/03 21:25:16 yaschenk
- relaxing ranges for -g and -e
-
- Revision 1.29 2003/12/17 20:35:38 kans
- initialize status, send NORMAL_STYLE to SeqEntrytoGnbk instead of 0 (also fixed in asn2gnbk), pass lookup flags
-
- Revision 1.28 2003/11/19 16:35:19 yaschenk
- relaxing ranges for -g and -c
-
- Revision 1.27 2003/03/28 18:48:39 yaschenk
- tuning ObjMgr, adding STREAM_SEQ_PORT_FIRST to SeqEntryToGnbk
-
- Revision 1.26 2003/01/29 23:08:19 yaschenk
- fixing FASTA on far pointers
-
- Revision 1.25 2003/01/21 22:27:23 kans
- new CstType parameter for flatfile generator
-
- Revision 1.24 2002/12/30 22:36:53 yaschenk
- optimizing..
-
- Revision 1.23 2002/11/07 17:21:55 yaschenk
- switching ID1 to new displatcher
-
- Revision 1.22 2002/07/23 19:31:43 butanaev
- Filtered out gi -1
-
- Revision 1.21 2001/11/02 14:24:44 kans
- made Fasta style SeqId args multi-line for Mac window
-
- Revision 1.20 2001/11/02 12:36:20 kans
- now using public Entrez2 server
-
- Revision 1.19 2001/09/28 15:56:04 kans
- look for extra and title fields in Entrez2 docsum
-
- Revision 1.18 2001/09/10 21:09:36 kans
- changed to use new Entrez2DocsumDataPtr - still need to get example of field_name keys
-
- Revision 1.17 2001/02/12 21:57:11 butanaev
- Made 3 retries to EntrezSynchronousQuery() when the NULL is returned.
-
- Revision 1.16 2001/02/08 16:13:46 yaschenk
- fixing wrong check for missing version in _PIR and SP
-
- Revision 1.15 2000/10/06 22:59:44 yaschenk
- strncpy not setting \0 bug
-
- Revision 1.14 2000/08/10 15:17:38 butanaev
- Updated -t 7 mode: strings like 'gi|3|emb|A00003.1|A00003' retreived from
- Entrez2DocsumPtr->caption.
-
- Revision 1.13 2000/08/03 17:01:23 kans
- included ni_lib.h for Mac, removed Mac compiler warnings
-
- Revision 1.12 2000/08/02 16:55:28 yaschenk
- increasing buffer size to 1000
-
- Revision 1.11 2000/08/02 16:17:00 butanaev
- Added:
- -t 7 - to retrieve Entrez DocSums
- -Q filename - to read Entrez query from the file
-
- Revision 1.9 2000/07/13 16:46:54 yaschenk
- adding ObjMgrFreeCache(0) to avoid hitting the limit in ObrMgr
-
- Revision 1.2 2000/06/01 18:05:35 butanaev
- Fixed numerous bugs with control flow...
-
- Revision 1.1 2000/06/01 16:48:22 butanaev
- New functionality:
- -G parameter, which previously accepted the list of gi's,
- now accepts gi,accession,accession.version,fasta seqid,
- which can be mixed.
-
- -q parameter generates the list out of Entrez query
- when -q is used -d has special meaning:
- -d n - run query against Nucleotide database
- -d p - run query against Protein database
-
- -n parameter limits the output to the list of gi's
-
- Revision 1.6 2000/05/24 17:30:42 yaschenk
- make parameter list look better
-
- Revision 1.5 2000/05/23 15:42:08 yaschenk
- adding quality score display
-
- Revision 1.4 2000/03/31 18:35:58 yaschenk
- Adding Jonathan's logic for FF and FASTA
-
- Revision 1.3 2000/03/30 20:43:51 yaschenk
- adding AsnIoReset between Entries
-
- Revision 1.2 1999/11/02 18:27:43 yaschenk
- adding -G parameter to idfetch
-
- Revision 1.1 1998/12/28 17:56:29 yaschenk
- preparing idfetch to go to production
-
- Revision 1.1 1997/05/29 14:34:07 sirotkin
- syncing sampson from mutant for procs. taking source from sampson. this is now current
-
- * Revision 4.0 1995/07/26 13:55:55 ostell
- * force revision to 4.0
- *
- * Revision 1.3 1995/06/21 14:14:29 kans
- * replaced asn2ff_entrez with SeqEntryToFlat
- *
- * Revision 1.2 1995/05/17 17:59:15 epstein
- * add RCS log revision history
- *
- * Revision 1.1 94/08/11 13:26:31 ostell
- * Initial revision
- *
- * Revision 1.3 1993/12/02 10:12:41 kans
- * Includes <ncbi.h> instead of <sys/types.h>
- *
- * Revision 1.2 93/11/24 13:25:56 sirotkin
- * First working version
- *
- * Revision 1.1 93/11/23 16:01:51 sirotkin
- * Initial revision
- *
- revised by OStell for public use.
- *
- * Modified by Eugene Yaschenko for ID1 Server
*
*/
+
#include <ncbi.h>
#include <objsset.h>
#include <sequtil.h>
@@ -273,7 +110,7 @@ int Numarg = sizeof(myargs)/sizeof(myargs[0]);
static Nlm_Int2 Nlm_WhichArg PROTO(( Nlm_Char which, Nlm_Int2 numargs, Nlm_ArgPtr ap));
static void MyBioseqToFasta(BioseqPtr bsp, Pointer userdata);
-static Boolean CreateMaxPlexParam();
+static Boolean CreateMaxPlexParam(void);
static Int4 GetIntervalAccession( const Char* pAccession, Char* pResult);
Int4 giBuffer[1000];
@@ -292,7 +129,6 @@ Int2 Main()
Boolean has_trouble = FALSE;
Int4 entity_spec_count = 0;
CharPtr outmode;
- Int4 ent = 0;
Int4 gi = 0;
FILE * fp_in = NULL;
SeqIdPtr sip;
@@ -509,12 +345,11 @@ Int2 Main()
"flaTtened SeqId, format:
type(name,accession,release,version) or type=accession",
*/
- static CharPtr name = NULL, release = NULL, version = NULL, number = NULL;
+ static CharPtr name = NULL, release = NULL, number = NULL;
CharPtr p;
static CharPtr PNTR fields [] = {&name, &accession, &release, &number};
Boolean found_equals = FALSE, found_left = FALSE,
- found_colon = FALSE, flat_seqid_err = FALSE,
- dna_type = FALSE, any_type = FALSE;
+ found_colon = FALSE, dna_type = FALSE, any_type = FALSE;
int dex;
TextSeqIdPtr tsip;
@@ -622,6 +457,7 @@ Int2 Main()
case SEQID_SWISSPROT :
case SEQID_OTHER :
case SEQID_PRF :
+ case SEQID_GPIPE :
tsip = TextSeqIdNew();
sip->data.ptrvalue = tsip;
if(accession)
@@ -887,7 +723,9 @@ static Boolean IdFetch_func1(CharPtr data, Int2 maxplex)
if((gi = TryGetGi(SEQID_GENBANK, acc, NULL, ver)) ||
(gi = TryGetGi(SEQID_OTHER, acc, NULL, ver)) ||
(gi = TryGetGi(SEQID_GENBANK, NULL, acc, ver)) ||
- (gi = TryGetGi(SEQID_OTHER, NULL, acc, ver)))
+ (gi = TryGetGi(SEQID_OTHER, NULL, acc, ver)) ||
+ (gi = TryGetGi(SEQID_GPIPE, acc, NULL, ver)) ||
+ (gi = TryGetGi(SEQID_GPIPE, NULL, acc, ver)))
return IdFetch_func(gi,
myargs[dbarg].strvalue,
myargs[entarg].intvalue,
@@ -1132,11 +970,11 @@ static Boolean IdFetch_func(Int4 gi,CharPtr db, Int4 ent,Int2 maxplex)
switch(myargs[infotypearg].intvalue){
case 0:
if(bsp){
- MyBioseqToFasta(bsp,(Pointer)fp);
+ MyBioseqToFasta(bsp,(Pointer)fp);
}
else
{
- VisitBioseqsInSep(sep,(Pointer)fp, MyBioseqToFasta);
+ VisitBioseqsInSep(sep,(Pointer)fp, MyBioseqToFasta);
}
break;
case 2:
@@ -1155,7 +993,9 @@ static Boolean IdFetch_func(Int4 gi,CharPtr db, Int4 ent,Int2 maxplex)
DONE:
if(bsp)
{
+#if 0
static Uint2 reap_cnt;
+#endif
BioseqUnlock(bsp);
#if 0
reap_cnt++;
@@ -1269,10 +1109,17 @@ static Int4 BEGetUidsFromQuery(CharPtr query, Uint4Ptr PNTR uids,
#define FASTA_LINE_SIZE 70
#define FASTA_LINES_IN_CHUNK 5000
+#define SEQPORT_2_SEQSTREAM
+
static void
MyBioseqToFasta(BioseqPtr bsp, Pointer userdata)
{
+#ifdef SEQPORT_2_SEQSTREAM
+ SeqInt si;
+ ValNode vn;
+#else
SeqPortPtr spp=NULL;
+#endif
Char buf[2048];
Char str[200];
ValNodePtr vnp;
@@ -1291,6 +1138,19 @@ MyBioseqToFasta(BioseqPtr bsp, Pointer userdata)
while(start < bsp->length){
stop=start+step-1;
if(stop >= bsp->length) stop=bsp->length-1;
+#ifdef SEQPORT_2_SEQSTREAM
+ MemSet ((Pointer) &si, 0, sizeof (SeqInt));
+ MemSet ((Pointer) &vn, 0, sizeof (ValNode));
+
+ si.from = start; si.to = stop; si.strand = 0;
+ si.id = SeqIdFindBest (bsp->id, 0);
+
+ vn.choice = SEQLOC_INT;
+ vn.data.ptrvalue = (Pointer) &si;
+
+ SeqLocFastaStream (&vn, fp, STREAM_ALLOW_NEG_GIS |
+ STREAM_EXPAND_GAPS | SUPPRESS_VIRT_SEQ, FASTA_LINE_SIZE, 0, 0);
+#else
spp = SeqPortNew(bsp,start,stop,0, (ISA_na(bsp->mol))?Seq_code_iupacna:Seq_code_ncbieaa);
if(spp==NULL) return;
SeqPortSet_do_virtual(spp, TRUE);
@@ -1302,8 +1162,12 @@ MyBioseqToFasta(BioseqPtr bsp, Pointer userdata)
SeqPortFree(spp);
spp=NULL;
}
+#endif
start=stop+1;
}
+ SeqMgrFreeCache();
+ ObjMgrReap(ObjMgrGet());
+ ObjMgrFreeCache(0);
}
/*
@@ -1328,7 +1192,7 @@ select * from annot_types;
9 "microRNA" efff 13
*/
-Boolean CreateMaxPlexParam()
+static Boolean CreateMaxPlexParam(void)
{
Char buf[1024];
Char *ptoken = NULL;
diff --git a/network/medarch/client/medutil.c b/network/medarch/client/medutil.c
index ff59c9fd..0421f034 100644
--- a/network/medarch/client/medutil.c
+++ b/network/medarch/client/medutil.c
@@ -28,7 +28,7 @@
*
* Version Creation Date: 8/31/93
*
-* $Revision: 6.23 $
+* $Revision: 6.24 $
*
* File Description: Medline Utilities for MedArch
* Assumes user calls MedArchInit and Fini
@@ -44,6 +44,9 @@
*
* RCS Modification History:
* $Log: medutil.c,v $
+* Revision 6.24 2009/06/19 19:27:29 bazhin
+* Added support for multiple consortium names.
+*
* Revision 6.23 2007/12/04 23:29:22 bazhin
* MergePubIds() renamed to MergeNonPubmedPubIds(). Merging is
* limited to types DOI and OTHER only.
@@ -396,11 +399,18 @@ static Boolean ten_authors_compare(CitArtPtr capold, CitArtPtr capnew)
Boolean ten_authors(CitArtPtr art, CitArtPtr art_tmp)
{
NameStdPtr namestd;
+ ValNodePtr oldcon;
+ ValNodePtr newcon;
+ ValNodePtr tvnp;
+ ValNodePtr vnp;
ValNodePtr v;
AuthorPtr aup;
+ CharPtr oldbuf;
+ CharPtr newbuf;
CharPtr mu[10];
- CharPtr oldcon;
- CharPtr newcon;
+ CharPtr p;
+ Int4 oldlen;
+ Int4 newlen;
Int2 num;
Int2 numnew;
Int2 numtmp;
@@ -427,24 +437,124 @@ Boolean ten_authors(CitArtPtr art, CitArtPtr art_tmp)
if(art->authors->choice != 1)
return(ten_authors_compare(art, art_tmp));
+ oldbuf = NULL;
oldcon = NULL;
+ oldlen = 1;
for(num = 0, v = art->authors->names; v != NULL; v = v->next)
{
aup = v->data.ptrvalue;
if(aup->name->choice == 2)
num++;
else if(aup->name->choice == 5)
- oldcon = aup->name->data;
+ {
+ p = aup->name->data;
+ oldlen += (StringLen(p) + 2);
+ if(oldcon == NULL)
+ {
+ oldcon = ValNodeNew(NULL);
+ oldcon->data.ptrvalue = p;
+ continue;
+ }
+
+ for(vnp = oldcon; vnp != NULL; vnp = vnp->next)
+ {
+ if(StringICmp(p, vnp->data.ptrvalue) <= 0)
+ {
+ if(vnp == oldcon)
+ {
+ oldcon = ValNodeNew(NULL);
+ oldcon->data.ptrvalue = p;
+ oldcon->next = vnp;
+ }
+ else
+ {
+ tvnp = ValNodeNew(NULL);
+ tvnp->data.ptrvalue = vnp->data.ptrvalue;
+ vnp->data.ptrvalue = p;
+ tvnp->next = vnp->next;
+ vnp->next = tvnp;
+ }
+ break;
+ }
+ if(vnp->next == NULL)
+ {
+ vnp->next = ValNodeNew(NULL);
+ vnp->next->data.ptrvalue = p;
+ break;
+ }
+ }
+ }
+ }
+ if(oldcon != NULL)
+ {
+ oldbuf = MemNew(oldlen);
+ oldbuf[0] = '\0';
+ for(vnp = oldcon; vnp != NULL; vnp = vnp->next)
+ {
+ if(oldbuf[0] != '\0')
+ StringCat(oldbuf, "; ");
+ StringCat(oldbuf, vnp->data.ptrvalue);
+ }
}
+ newbuf = NULL;
newcon = NULL;
+ newlen = 1;
for(numtmp = 0, v = art_tmp->authors->names; v != NULL; v = v->next)
{
aup = v->data.ptrvalue;
if(aup->name->choice == 2)
numtmp++;
else if(aup->name->choice == 5)
- newcon = aup->name->data;
+ {
+ p = aup->name->data;
+ newlen += (StringLen(p) + 2);
+ if(newcon == NULL)
+ {
+ newcon = ValNodeNew(NULL);
+ newcon->data.ptrvalue = p;
+ continue;
+ }
+
+ for(vnp = newcon; vnp != NULL; vnp = vnp->next)
+ {
+ if(StringICmp(p, vnp->data.ptrvalue) <= 0)
+ {
+ if(vnp == newcon)
+ {
+ newcon = ValNodeNew(NULL);
+ newcon->data.ptrvalue = p;
+ newcon->next = vnp;
+ }
+ else
+ {
+ tvnp = ValNodeNew(NULL);
+ tvnp->data.ptrvalue = vnp->data.ptrvalue;
+ vnp->data.ptrvalue = p;
+ tvnp->next = vnp->next;
+ vnp->next = tvnp;
+ }
+ break;
+ }
+ if(vnp->next == NULL)
+ {
+ vnp->next = ValNodeNew(NULL);
+ vnp->next->data.ptrvalue = p;
+ break;
+ }
+ }
+ }
+ }
+ if(newcon != NULL)
+ {
+ newbuf = MemNew(newlen);
+ newbuf[0] = '\0';
+ for(vnp = newcon; vnp != NULL; vnp = vnp->next)
+ {
+ if(newbuf[0] != '\0')
+ StringCat(newbuf, "; ");
+ StringCat(newbuf, vnp->data.ptrvalue);
+ }
}
if(oldcon != NULL)
@@ -453,27 +563,42 @@ Boolean ten_authors(CitArtPtr art, CitArtPtr art_tmp)
{
ErrPostEx(SEV_WARNING, ERR_REFERENCE_NoConsortAuthors,
"Publication as returned by MedArch lacks consortium authors of the original publication: \"%s\".",
- oldcon);
- aup = AuthorNew();
- aup->name = PersonIdNew();
- aup->name->choice = 5;
- aup->name->data = StringSave(oldcon);
- v = ValNodeNew(NULL);
- v->data.ptrvalue = aup;
- v->next = art_tmp->authors->names;
- art_tmp->authors->names = v;
- newcon = oldcon;
+ oldbuf);
+ for(vnp = oldcon;; vnp = vnp->next)
+ {
+ aup = AuthorNew();
+ aup->name = PersonIdNew();
+ aup->name->choice = 5;
+ aup->name->data = StringSave(vnp->data.ptrvalue);
+ vnp->data.ptrvalue = aup;
+ if(vnp->next == NULL)
+ break;
+ }
+ vnp->next = art_tmp->authors->names;
+ art_tmp->authors->names = oldcon;
}
- else if(StringICmp(oldcon, newcon) != 0)
+ else
{
- ErrPostEx(SEV_WARNING, ERR_REFERENCE_DiffConsortAuthors,
- "Consortium author names differ. Original is \"%s\". MedArch's is \"%s\".",
- oldcon, newcon);
+ if(StringICmp(oldbuf, newbuf) != 0)
+ ErrPostEx(SEV_WARNING, ERR_REFERENCE_DiffConsortAuthors,
+ "Consortium author names differ. Original is \"%s\". MedArch's is \"%s\".",
+ oldbuf, newbuf);
+ MemFree(newbuf);
+ newbuf = NULL;
+ ValNodeFree(oldcon);
+ ValNodeFree(newcon);
+ newcon = NULL;
}
+ MemFree(oldbuf);
if(num == 0)
return(TRUE);
}
+ if(newcon != NULL)
+ ValNodeFree(newcon);
+ if(newbuf != NULL)
+ MemFree(newbuf);
+
numnew = 0;
for(v = art_tmp->authors->names; v != NULL && numnew < 10; v = v->next)
{