summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorAaron M. Ucko <ucko@debian.org>2019-02-06 22:39:48 -0500
committerAaron M. Ucko <ucko@debian.org>2019-02-06 22:40:21 -0500
commit71e8f128bfe1d9726c4376661ff3c659bda7f7ba (patch)
tree42c4d998328c6c6eb6425ee7304c1a85f27d4529
parent53ca0250e80ae069a17356799006815dd528f5a0 (diff)
parent1c42e99e91bbfb976d95038ed72113cc6d224482 (diff)
Merge tag 'upstream/10.9.20190205+ds'
Upstream version 10.9.20190205(+ds).
-rw-r--r--debian/changelog4
-rwxr-xr-xnquire149
-rw-r--r--xtract.go29
3 files changed, 149 insertions, 33 deletions
diff --git a/debian/changelog b/debian/changelog
index 31303aa..0e16441 100644
--- a/debian/changelog
+++ b/debian/changelog
@@ -1,4 +1,4 @@
-ncbi-entrez-direct (10.9.20190131+ds-1) UNRELEASED; urgency=medium
+ncbi-entrez-direct (10.9.20190205+ds-1) UNRELEASED; urgency=medium
* New upstream release. (NOT RELEASED YET.)
* debian/man/{archive-pubmed,download-pubmed,edirect,efilter,
@@ -10,7 +10,7 @@ ncbi-entrez-direct (10.9.20190131+ds-1) UNRELEASED; urgency=medium
* debian/rules: Stop installing retired scripts local-phrase-search and
(implicitly) pm-{clean,current,erase,log,repack,uids,verify}.
- -- Aaron M. Ucko <ucko@debian.org> Wed, 06 Feb 2019 22:38:28 -0500
+ -- Aaron M. Ucko <ucko@debian.org> Wed, 06 Feb 2019 22:39:48 -0500
ncbi-entrez-direct (10.5.20181204+ds-2) unstable; urgency=medium
diff --git a/nquire b/nquire
index a48c3f5..717996d 100755
--- a/nquire
+++ b/nquire
@@ -63,10 +63,12 @@ BEGIN
}
use lib $LibDir;
+use JSON::PP;
use LWP::UserAgent;
use POSIX;
use URI::Escape;
use Net::FTP;
+use XML::Simple;
# definitions
@@ -81,6 +83,7 @@ sub clearflags {
$alias = "";
$debug = false;
$http = "";
+ $j2x = false;
$output = "";
}
@@ -229,6 +232,33 @@ sub do_uri_escape {
return $rslt;
}
+sub convert_bools {
+ my %unrecognized;
+
+ local *_convert_bools = sub {
+ my $ref_type = ref($_[0]);
+ if (!$ref_type) {
+ # Nothing.
+ }
+ elsif ($ref_type eq 'HASH') {
+ _convert_bools($_) for values(%{ $_[0] });
+ }
+ elsif ($ref_type eq 'ARRAY') {
+ _convert_bools($_) for @{ $_[0] };
+ }
+ elsif (
+ $ref_type eq 'JSON::PP::Boolean' || $ref_type eq 'Types::Serialiser::Boolean'
+ ) {
+ $_[0] = $_[0] ? 1 : 0;
+ }
+ else {
+ ++$unrecognized{$ref_type};
+ }
+ };
+
+ &_convert_bools;
+}
+
# nquire executes an external URL query from command line arguments
my $nquire_help = qq{
@@ -439,6 +469,58 @@ Federated Query
}" |
xtract -pattern result -block binding -element "binding\@name" literal
+BioThings Queries
+
+ nquire -variant variant "chr6:g.26093141G>A" -fields dbsnp.gene |
+ xtract -pattern gene -element \@geneid
+
+ nquire -gene query -q "symbol:OPN1MW" -species 9606 |
+ xtract -pattern hits -element "\@_id"
+
+ nquire -gene query -q "symbol:OPN1MW AND taxid:9606" |
+ xtract -pattern hits -element "\@_id"
+
+ nquire -gene gene 2652 -fields pathway.wikipathways |
+ xtract -pattern pathway -element "\@id"
+
+ nquire -gene query -q "pathway.wikipathways.id:WP455" -size 300 |
+ xtract -pattern hits -element "\@_id"
+
+ nquire -chem query -q "drugbank.targets.uniprot:P05231 AND drugbank.targets.actions:inhibitor" -fields hgvs |
+ xtract -pattern hits -element "\@_id"
+
+EDirect Expansion
+
+ ExtractIDs() {
+ xtract -pattern BIO_THINGS -block Id -tab "\\n" -element "Id"
+ }
+
+ WrapIDs() {
+ xtract -wrp BIO_THINGS -pattern opt -wrp "Type" -lbl "\$1" \\
+ -wrp "Count" -num "\$2" -block "\$2" -wrp "Id" -element "\$3" |
+ xtract -format
+ }
+
+ nquire -gene query -q "symbol:OPN1MW AND taxid:9606" |
+ WrapIDs entrezgene hits "\@entrezgene" |
+
+ ExtractIDs |
+ while read geneid
+ do
+ nquire -gene gene "\$geneid" -fields pathway.wikipathways
+ done |
+ WrapIDs pathway.wikipathways.id pathway "\@id" |
+
+ ExtractIDs |
+ while read pathid
+ do
+ nquire -gene query -q "pathway.wikipathways.id:\$pathid" -size 300
+ done |
+ WrapIDs entrezgene hits "\@entrezgene" |
+
+ ExtractIDs |
+ sort -n
+
};
my @pubchem_properties = qw(
@@ -498,6 +580,14 @@ sub nquire {
@args = @ARGV;
$max = scalar @args;
+ %biothingsHash = (
+ '-gene' => 'http://mygene.info/v3',
+ '-variant' => 'http://myvariant.info/v1',
+ '-chem' => 'http://mychem.info/v1',
+ '-drug' => 'http://c.biothings.io/v1',
+ '-taxon' => 'http://t.biothings.io/v1',
+ );
+
if ( $max < 1 ) {
return;
}
@@ -705,42 +795,20 @@ sub nquire {
}
}
- } elsif ( $pat eq "-mygene" or $pat eq "-mygene.info" ) {
- # shortcut for mygene.info (undocumented)
- $i++;
- if ( $i < $max ) {
- $url = "http://mygene.info/v3";
- if ( $http eq "" ) {
- $http = "get";
- }
- }
- } elsif ( $pat eq "-myvariant" or $pat eq "-myvariant.info" ) {
- # shortcut for myvariant.info (undocumented)
+ } elsif ( defined $biothingsHash{$pat} ) {
+ # shortcuts for biothings services (undocumented)
$i++;
- if ( $i < $max ) {
- $url = "http://myvariant.info/v1";
- if ( $http eq "" ) {
+ $url = $biothingsHash{$pat};
+ if ( $http eq "" ) {
$http = "get";
- }
- }
- } elsif ( $pat eq "-mychem" or $pat eq "-mychem.info" ) {
- # shortcut for mychem.info (undocumented)
- $i++;
- if ( $i < $max ) {
- $url = "http://mychem.info/v1";
- if ( $http eq "" ) {
- $http = "get";
- }
}
+ $j2x = true;
} elsif ( $pat eq "-wikipathways" ) {
# shortcut for webservice.wikipathways.org (undocumented)
$i++;
if ( $i < $max ) {
$url = "http://webservice.wikipathways.org";
- if ( $http eq "" ) {
- $http = "get";
- }
}
} elsif ( $pat eq "-biosample" ) {
@@ -825,6 +893,33 @@ sub nquire {
# perform query
$output = do_post ($url, $arg);
+ if ( $j2x ) {
+ my $jc = JSON::PP->new->ascii->pretty->allow_nonref;
+ my $conv = $jc->decode($output);
+ convert_bools($conv);
+ my $result = XMLout($conv, SuppressEmpty => undef);
+
+ # remove newlines, tabs, space between tokens, compress runs of spaces
+ $result =~ s/\r/ /g;
+ $result =~ s/\n/ /g;
+ $result =~ s/\t//g;
+ $result =~ s/ +/ /g;
+ $result =~ s/> +</></g;
+
+ # remove <opt> flanking object
+ if ( $result =~ /<opt>\s*?</ and $result =~ />\s*?<\/opt>/ ) {
+ $result =~ s/<opt>\s*?</</g;
+ $result =~ s/>\s*?<\/opt>/>/g;
+ }
+
+ $output = "$result";
+
+ # restore newlines between objects
+ $output =~ s/> *?</>\n</g;
+
+ binmode(STDOUT, ":utf8");
+ }
+
print "$output";
}
diff --git a/xtract.go b/xtract.go
index 7813d24..1441dca 100644
--- a/xtract.go
+++ b/xtract.go
@@ -3497,11 +3497,15 @@ func ProcessClause(curr *Node, stages []*Step, mask, prev, pfx, sfx, plg, sep, d
}
}
- // -nucleic uses direction of range to decide between forward strand or reverse complement
doRevComp := false
- if stat == NUCLEIC && min+1 > max {
- min, max = max-1, min+1
- doRevComp = true
+ doUpCase := false
+ if status == NUCLEIC {
+ // -nucleic uses direction of range to decide between forward strand or reverse complement
+ if min+1 > max {
+ min, max = max-1, min+1
+ doRevComp = true
+ }
+ doUpCase = true
}
// numeric range now calculated, apply slice to string
@@ -3509,6 +3513,9 @@ func ProcessClause(curr *Node, stages []*Step, mask, prev, pfx, sfx, plg, sep, d
if doRevComp {
str = reverseComplement(str)
}
+ if doUpCase {
+ str = strings.ToUpper(str)
+ }
acc(str)
} else if max == 0 {
if min > 0 && min < len(str) {
@@ -3517,6 +3524,9 @@ func ProcessClause(curr *Node, stages []*Step, mask, prev, pfx, sfx, plg, sep, d
if doRevComp {
str = reverseComplement(str)
}
+ if doUpCase {
+ str = strings.ToUpper(str)
+ }
acc(str)
}
}
@@ -3527,6 +3537,9 @@ func ProcessClause(curr *Node, stages []*Step, mask, prev, pfx, sfx, plg, sep, d
if doRevComp {
str = reverseComplement(str)
}
+ if doUpCase {
+ str = strings.ToUpper(str)
+ }
acc(str)
}
}
@@ -3537,6 +3550,9 @@ func ProcessClause(curr *Node, stages []*Step, mask, prev, pfx, sfx, plg, sep, d
if doRevComp {
str = reverseComplement(str)
}
+ if doUpCase {
+ str = strings.ToUpper(str)
+ }
acc(str)
}
}
@@ -4541,11 +4557,16 @@ func ProcessInstructions(commands []*Operation, curr *Node, mask, tab, ret strin
case LBL:
lbl := str
accum(tab)
+ accum(plg)
+ accum(pfx)
if plain {
accum(lbl)
} else {
printInColor(lbl)
}
+ accum(sfx)
+ plg = ""
+ lst = elg
tab = col
ret = lin
case PFC: