summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorAaron M. Ucko <ucko@debian.org>2019-02-26 21:22:47 -0500
committerAaron M. Ucko <ucko@debian.org>2019-02-26 21:22:47 -0500
commit857290e0db7609751c8e208731ea3cab6ded4c28 (patch)
tree3816eff3ed0ff04c02a4dba8afad0a2c05c45162
parent1c42e99e91bbfb976d95038ed72113cc6d224482 (diff)
New upstream version 10.9.20190219+ds
-rwxr-xr-xnquire31
-rw-r--r--xtract.go99
2 files changed, 125 insertions, 5 deletions
diff --git a/nquire b/nquire
index 717996d..e41fe4b 100755
--- a/nquire
+++ b/nquire
@@ -84,6 +84,7 @@ sub clearflags {
$debug = false;
$http = "";
$j2x = false;
+ $x2j = false;
$output = "";
}
@@ -492,12 +493,12 @@ BioThings Queries
EDirect Expansion
ExtractIDs() {
- xtract -pattern BIO_THINGS -block Id -tab "\\n" -element "Id"
+ xtract -pattern BIO_THINGS -block Id -tab "\\n" -element Id
}
WrapIDs() {
- xtract -wrp BIO_THINGS -pattern opt -wrp "Type" -lbl "\$1" \\
- -wrp "Count" -num "\$2" -block "\$2" -wrp "Id" -element "\$3" |
+ xtract -wrp BIO_THINGS -pattern opt -wrp Type -lbl "\$1" \\
+ -wrp Count -num "\$2" -block "\$2" -wrp Id -element "\$3" |
xtract -format
}
@@ -584,6 +585,7 @@ sub nquire {
'-gene' => 'http://mygene.info/v3',
'-variant' => 'http://myvariant.info/v1',
'-chem' => 'http://mychem.info/v1',
+ '-disease' => 'http://mydisease.info/v1',
'-drug' => 'http://c.biothings.io/v1',
'-taxon' => 'http://t.biothings.io/v1',
);
@@ -695,6 +697,19 @@ sub nquire {
}
}
+ # if present, -j2x or -x2j must be next argument (undocumented)
+
+ if ( $i < $max ) {
+ $pat = $args[$i];
+ if ( $pat eq "-j2x" ) {
+ $i++;
+ $j2x = true;
+ } elsif ( $pat eq "-x2j" ) {
+ $i++;
+ $x2j = true;
+ }
+ }
+
# read file of keyword shortcuts for URL expansion
if ( $i < $max ) {
@@ -920,6 +935,16 @@ sub nquire {
binmode(STDOUT, ":utf8");
}
+ if ( $x2j ) {
+ my $xc = new XML::Simple(KeepRoot => 1);
+ my $conv = $xc->XMLin($output);
+ convert_bools($conv);
+ my $jc = JSON::PP->new->ascii->pretty->allow_nonref;
+ my $result = $jc->encode($conv);
+
+ $output = "$result";
+ }
+
print "$output";
}
diff --git a/xtract.go b/xtract.go
index 1441dca..82fd193 100644
--- a/xtract.go
+++ b/xtract.go
@@ -112,6 +112,7 @@ String Constraints
-equals String must match exactly
-contains Substring must be present
+ -is-within String must be present
-starts-with Substring must be at beginning
-ends-with Substring must be at end
-is-not String must not match
@@ -1675,6 +1676,7 @@ const (
OR
EQUALS
CONTAINS
+ ISWITHIN
STARTSWITH
ENDSWITH
ISNOT
@@ -1778,6 +1780,7 @@ var argTypeIs = map[string]ArgumentType{
"-or": CONDITIONAL,
"-equals": CONDITIONAL,
"-contains": CONDITIONAL,
+ "-is-within": CONDITIONAL,
"-starts-with": CONDITIONAL,
"-ends-with": CONDITIONAL,
"-is-not": CONDITIONAL,
@@ -1893,6 +1896,7 @@ var opTypeIs = map[string]OpType{
"-or": OR,
"-equals": EQUALS,
"-contains": CONTAINS,
+ "-is-within": ISWITHIN,
"-starts-with": STARTSWITH,
"-ends-with": ENDSWITH,
"-is-not": ISNOT,
@@ -2586,7 +2590,7 @@ func ParseArguments(cmdargs []string, pttrn string) *Block {
cond = append(cond, op)
parseStep(op, elementColonValue)
status = UNSET
- case EQUALS, CONTAINS, STARTSWITH, ENDSWITH, ISNOT:
+ case EQUALS, CONTAINS, ISWITHIN, STARTSWITH, ENDSWITH, ISNOT:
if op != nil {
if len(str) > 1 && str[0] == '\\' {
// first character may be backslash protecting dash (undocumented)
@@ -4729,7 +4733,7 @@ func ConditionsAreSatisfied(conditions []*Operation, curr *Node, mask string, in
stat := constraint.Type
switch stat {
- case EQUALS, CONTAINS, STARTSWITH, ENDSWITH, ISNOT:
+ case EQUALS, CONTAINS, ISWITHIN, STARTSWITH, ENDSWITH, ISNOT:
// substring test on element values
str = strings.ToUpper(str)
val = strings.ToUpper(val)
@@ -4743,6 +4747,10 @@ func ConditionsAreSatisfied(conditions []*Operation, curr *Node, mask string, in
if strings.Contains(str, val) {
return true
}
+ case ISWITHIN:
+ if strings.Contains(val, str) {
+ return true
+ }
case STARTSWITH:
if strings.HasPrefix(str, val) {
return true
@@ -6039,6 +6047,70 @@ func ProcessINSD(args []string, isPipe, addDash, doIndex bool) []string {
return acc
}
+// BIOTHINGS EXTRACTION COMMAND GENERATOR
+
+// ProcessBiopath generates extraction commands for BioThings resources (undocumented)
+func ProcessBiopath(args []string, isPipe bool) []string {
+
+ // nquire -variant variant "chr6:g.26093141G>A" | xtract -biopath "clinvar.rcv.conditions.identifiers.omim"
+
+ // xtract -pattern opt -division clinvar -group rcv -branch conditions -block identifiers -sep "\n" -tab "\n" -element "omim,@omim"
+
+ var acc []string
+
+ max := len(args)
+ if max < 1 {
+ fmt.Fprintf(os.Stderr, "\nERROR: Insufficient command-line arguments supplied to xtract -biopath\n")
+ os.Exit(1)
+ }
+
+ path := args[0]
+
+ dirs := strings.Split(path, ".")
+ max = len(dirs)
+ if max < 1 {
+ fmt.Fprintf(os.Stderr, "\nERROR: Insufficient command-line arguments supplied to xtract -biopath\n")
+ os.Exit(1)
+ }
+
+ expname := []string{
+ "-division",
+ "-group",
+ "-branch",
+ "-block",
+ "-section",
+ "-subset",
+ "-unit",
+ }
+
+ acc = append(acc, "-pattern", "opt")
+
+ max--
+ if max > 7 {
+ fmt.Fprintf(os.Stderr, "\nERROR: Too many nodes in argument supplied to xtract -biopath\n")
+ os.Exit(1)
+ }
+
+ for i, str := range dirs {
+ if i < max {
+ acc = append(acc, expname[i], str)
+ } else {
+ if isPipe {
+ acc = append(acc, "-sep", "\\n", "-tab", "\\n")
+ } else {
+ acc = append(acc, "-sep", "\"\\n\"", "-tab", "\"\\n\"")
+ }
+ if isPipe {
+ acc = append(acc, "-element", str+",@"+str)
+ } else {
+ acc = append(acc, "-element", "\""+str+",@"+str+"\"")
+ }
+ }
+ }
+
+ return acc
+}
+
// HYDRA CITATION MATCHER COMMAND GENERATOR
// ProcessHydra generates extraction commands for NCBI's in-house citation matcher (undocumented)
@@ -8471,6 +8543,29 @@ func main() {
args = hydra
}
+ // BIOTHINGS EXTRACTION COMMAND GENERATOR
+
+ // -biopath takes a dotted exploration path for BioThings resources (undocumented)
+ if args[0] == "-biopath" {
+
+ args = args[1:]
+
+ biopath := ProcessBiopath(args, isPipe || usingFile)
+
+ if !isPipe && !usingFile {
+ // no piped input, so write output instructions
+ fmt.Printf("xtract")
+ for _, str := range biopath {
+ fmt.Printf(" %s", str)
+ }
+ fmt.Printf("\n")
+ return
+ }
+
+ // data in pipe, so replace arguments, execute dynamically
+ args = biopath
+ }
+
// ENTREZ2INDEX COMMAND GENERATOR
// -e2index shortcut for experimental indexing code (documented in rchive.go)