diff options
author | Aaron M. Ucko <ucko@debian.org> | 2019-02-06 22:39:48 -0500 |
---|---|---|
committer | Aaron M. Ucko <ucko@debian.org> | 2019-02-06 22:40:21 -0500 |
commit | 71e8f128bfe1d9726c4376661ff3c659bda7f7ba (patch) | |
tree | 42c4d998328c6c6eb6425ee7304c1a85f27d4529 | |
parent | 53ca0250e80ae069a17356799006815dd528f5a0 (diff) | |
parent | 1c42e99e91bbfb976d95038ed72113cc6d224482 (diff) |
Merge tag 'upstream/10.9.20190205+ds'
Upstream version 10.9.20190205(+ds).
-rw-r--r-- | debian/changelog | 4 | ||||
-rwxr-xr-x | nquire | 149 | ||||
-rw-r--r-- | xtract.go | 29 |
3 files changed, 149 insertions, 33 deletions
diff --git a/debian/changelog b/debian/changelog index 31303aa..0e16441 100644 --- a/debian/changelog +++ b/debian/changelog @@ -1,4 +1,4 @@ -ncbi-entrez-direct (10.9.20190131+ds-1) UNRELEASED; urgency=medium +ncbi-entrez-direct (10.9.20190205+ds-1) UNRELEASED; urgency=medium * New upstream release. (NOT RELEASED YET.) * debian/man/{archive-pubmed,download-pubmed,edirect,efilter, @@ -10,7 +10,7 @@ ncbi-entrez-direct (10.9.20190131+ds-1) UNRELEASED; urgency=medium * debian/rules: Stop installing retired scripts local-phrase-search and (implicitly) pm-{clean,current,erase,log,repack,uids,verify}. - -- Aaron M. Ucko <ucko@debian.org> Wed, 06 Feb 2019 22:38:28 -0500 + -- Aaron M. Ucko <ucko@debian.org> Wed, 06 Feb 2019 22:39:48 -0500 ncbi-entrez-direct (10.5.20181204+ds-2) unstable; urgency=medium @@ -63,10 +63,12 @@ BEGIN } use lib $LibDir; +use JSON::PP; use LWP::UserAgent; use POSIX; use URI::Escape; use Net::FTP; +use XML::Simple; # definitions @@ -81,6 +83,7 @@ sub clearflags { $alias = ""; $debug = false; $http = ""; + $j2x = false; $output = ""; } @@ -229,6 +232,33 @@ sub do_uri_escape { return $rslt; } +sub convert_bools { + my %unrecognized; + + local *_convert_bools = sub { + my $ref_type = ref($_[0]); + if (!$ref_type) { + # Nothing. + } + elsif ($ref_type eq 'HASH') { + _convert_bools($_) for values(%{ $_[0] }); + } + elsif ($ref_type eq 'ARRAY') { + _convert_bools($_) for @{ $_[0] }; + } + elsif ( + $ref_type eq 'JSON::PP::Boolean' || $ref_type eq 'Types::Serialiser::Boolean' + ) { + $_[0] = $_[0] ? 1 : 0; + } + else { + ++$unrecognized{$ref_type}; + } + }; + + &_convert_bools; +} + # nquire executes an external URL query from command line arguments my $nquire_help = qq{ @@ -439,6 +469,58 @@ Federated Query }" | xtract -pattern result -block binding -element "binding\@name" literal +BioThings Queries + + nquire -variant variant "chr6:g.26093141G>A" -fields dbsnp.gene | + xtract -pattern gene -element \@geneid + + nquire -gene query -q "symbol:OPN1MW" -species 9606 | + xtract -pattern hits -element "\@_id" + + nquire -gene query -q "symbol:OPN1MW AND taxid:9606" | + xtract -pattern hits -element "\@_id" + + nquire -gene gene 2652 -fields pathway.wikipathways | + xtract -pattern pathway -element "\@id" + + nquire -gene query -q "pathway.wikipathways.id:WP455" -size 300 | + xtract -pattern hits -element "\@_id" + + nquire -chem query -q "drugbank.targets.uniprot:P05231 AND drugbank.targets.actions:inhibitor" -fields hgvs | + xtract -pattern hits -element "\@_id" + +EDirect Expansion + + ExtractIDs() { + xtract -pattern BIO_THINGS -block Id -tab "\\n" -element "Id" + } + + WrapIDs() { + xtract -wrp BIO_THINGS -pattern opt -wrp "Type" -lbl "\$1" \\ + -wrp "Count" -num "\$2" -block "\$2" -wrp "Id" -element "\$3" | + xtract -format + } + + nquire -gene query -q "symbol:OPN1MW AND taxid:9606" | + WrapIDs entrezgene hits "\@entrezgene" | + + ExtractIDs | + while read geneid + do + nquire -gene gene "\$geneid" -fields pathway.wikipathways + done | + WrapIDs pathway.wikipathways.id pathway "\@id" | + + ExtractIDs | + while read pathid + do + nquire -gene query -q "pathway.wikipathways.id:\$pathid" -size 300 + done | + WrapIDs entrezgene hits "\@entrezgene" | + + ExtractIDs | + sort -n + }; my @pubchem_properties = qw( @@ -498,6 +580,14 @@ sub nquire { @args = @ARGV; $max = scalar @args; + %biothingsHash = ( + '-gene' => 'http://mygene.info/v3', + '-variant' => 'http://myvariant.info/v1', + '-chem' => 'http://mychem.info/v1', + '-drug' => 'http://c.biothings.io/v1', + '-taxon' => 'http://t.biothings.io/v1', + ); + if ( $max < 1 ) { return; } @@ -705,42 +795,20 @@ sub nquire { } } - } elsif ( $pat eq "-mygene" or $pat eq "-mygene.info" ) { - # shortcut for mygene.info (undocumented) - $i++; - if ( $i < $max ) { - $url = "http://mygene.info/v3"; - if ( $http eq "" ) { - $http = "get"; - } - } - } elsif ( $pat eq "-myvariant" or $pat eq "-myvariant.info" ) { - # shortcut for myvariant.info (undocumented) + } elsif ( defined $biothingsHash{$pat} ) { + # shortcuts for biothings services (undocumented) $i++; - if ( $i < $max ) { - $url = "http://myvariant.info/v1"; - if ( $http eq "" ) { + $url = $biothingsHash{$pat}; + if ( $http eq "" ) { $http = "get"; - } - } - } elsif ( $pat eq "-mychem" or $pat eq "-mychem.info" ) { - # shortcut for mychem.info (undocumented) - $i++; - if ( $i < $max ) { - $url = "http://mychem.info/v1"; - if ( $http eq "" ) { - $http = "get"; - } } + $j2x = true; } elsif ( $pat eq "-wikipathways" ) { # shortcut for webservice.wikipathways.org (undocumented) $i++; if ( $i < $max ) { $url = "http://webservice.wikipathways.org"; - if ( $http eq "" ) { - $http = "get"; - } } } elsif ( $pat eq "-biosample" ) { @@ -825,6 +893,33 @@ sub nquire { # perform query $output = do_post ($url, $arg); + if ( $j2x ) { + my $jc = JSON::PP->new->ascii->pretty->allow_nonref; + my $conv = $jc->decode($output); + convert_bools($conv); + my $result = XMLout($conv, SuppressEmpty => undef); + + # remove newlines, tabs, space between tokens, compress runs of spaces + $result =~ s/\r/ /g; + $result =~ s/\n/ /g; + $result =~ s/\t//g; + $result =~ s/ +/ /g; + $result =~ s/> +</></g; + + # remove <opt> flanking object + if ( $result =~ /<opt>\s*?</ and $result =~ />\s*?<\/opt>/ ) { + $result =~ s/<opt>\s*?</</g; + $result =~ s/>\s*?<\/opt>/>/g; + } + + $output = "$result"; + + # restore newlines between objects + $output =~ s/> *?</>\n</g; + + binmode(STDOUT, ":utf8"); + } + print "$output"; } @@ -3497,11 +3497,15 @@ func ProcessClause(curr *Node, stages []*Step, mask, prev, pfx, sfx, plg, sep, d } } - // -nucleic uses direction of range to decide between forward strand or reverse complement doRevComp := false - if stat == NUCLEIC && min+1 > max { - min, max = max-1, min+1 - doRevComp = true + doUpCase := false + if status == NUCLEIC { + // -nucleic uses direction of range to decide between forward strand or reverse complement + if min+1 > max { + min, max = max-1, min+1 + doRevComp = true + } + doUpCase = true } // numeric range now calculated, apply slice to string @@ -3509,6 +3513,9 @@ func ProcessClause(curr *Node, stages []*Step, mask, prev, pfx, sfx, plg, sep, d if doRevComp { str = reverseComplement(str) } + if doUpCase { + str = strings.ToUpper(str) + } acc(str) } else if max == 0 { if min > 0 && min < len(str) { @@ -3517,6 +3524,9 @@ func ProcessClause(curr *Node, stages []*Step, mask, prev, pfx, sfx, plg, sep, d if doRevComp { str = reverseComplement(str) } + if doUpCase { + str = strings.ToUpper(str) + } acc(str) } } @@ -3527,6 +3537,9 @@ func ProcessClause(curr *Node, stages []*Step, mask, prev, pfx, sfx, plg, sep, d if doRevComp { str = reverseComplement(str) } + if doUpCase { + str = strings.ToUpper(str) + } acc(str) } } @@ -3537,6 +3550,9 @@ func ProcessClause(curr *Node, stages []*Step, mask, prev, pfx, sfx, plg, sep, d if doRevComp { str = reverseComplement(str) } + if doUpCase { + str = strings.ToUpper(str) + } acc(str) } } @@ -4541,11 +4557,16 @@ func ProcessInstructions(commands []*Operation, curr *Node, mask, tab, ret strin case LBL: lbl := str accum(tab) + accum(plg) + accum(pfx) if plain { accum(lbl) } else { printInColor(lbl) } + accum(sfx) + plg = "" + lst = elg tab = col ret = lin case PFC: |