diff options
author | Aaron M. Ucko <ucko@debian.org> | 2019-02-26 21:22:47 -0500 |
---|---|---|
committer | Aaron M. Ucko <ucko@debian.org> | 2019-02-26 21:22:47 -0500 |
commit | 857290e0db7609751c8e208731ea3cab6ded4c28 (patch) | |
tree | 3816eff3ed0ff04c02a4dba8afad0a2c05c45162 | |
parent | 1c42e99e91bbfb976d95038ed72113cc6d224482 (diff) |
New upstream version 10.9.20190219+ds
-rwxr-xr-x | nquire | 31 | ||||
-rw-r--r-- | xtract.go | 99 |
2 files changed, 125 insertions, 5 deletions
@@ -84,6 +84,7 @@ sub clearflags { $debug = false; $http = ""; $j2x = false; + $x2j = false; $output = ""; } @@ -492,12 +493,12 @@ BioThings Queries EDirect Expansion ExtractIDs() { - xtract -pattern BIO_THINGS -block Id -tab "\\n" -element "Id" + xtract -pattern BIO_THINGS -block Id -tab "\\n" -element Id } WrapIDs() { - xtract -wrp BIO_THINGS -pattern opt -wrp "Type" -lbl "\$1" \\ - -wrp "Count" -num "\$2" -block "\$2" -wrp "Id" -element "\$3" | + xtract -wrp BIO_THINGS -pattern opt -wrp Type -lbl "\$1" \\ + -wrp Count -num "\$2" -block "\$2" -wrp Id -element "\$3" | xtract -format } @@ -584,6 +585,7 @@ sub nquire { '-gene' => 'http://mygene.info/v3', '-variant' => 'http://myvariant.info/v1', '-chem' => 'http://mychem.info/v1', + '-disease' => 'http://mydisease.info/v1', '-drug' => 'http://c.biothings.io/v1', '-taxon' => 'http://t.biothings.io/v1', ); @@ -695,6 +697,19 @@ sub nquire { } } + # if present, -j2x or -x2j must be next argument (undocumented) + + if ( $i < $max ) { + $pat = $args[$i]; + if ( $pat eq "-j2x" ) { + $i++; + $j2x = true; + } elsif ( $pat eq "-x2j" ) { + $i++; + $x2j = true; + } + } + # read file of keyword shortcuts for URL expansion if ( $i < $max ) { @@ -920,6 +935,16 @@ sub nquire { binmode(STDOUT, ":utf8"); } + if ( $x2j ) { + my $xc = new XML::Simple(KeepRoot => 1); + my $conv = $xc->XMLin($output); + convert_bools($conv); + my $jc = JSON::PP->new->ascii->pretty->allow_nonref; + my $result = $jc->encode($conv); + + $output = "$result"; + } + print "$output"; } @@ -112,6 +112,7 @@ String Constraints -equals String must match exactly -contains Substring must be present + -is-within String must be present -starts-with Substring must be at beginning -ends-with Substring must be at end -is-not String must not match @@ -1675,6 +1676,7 @@ const ( OR EQUALS CONTAINS + ISWITHIN STARTSWITH ENDSWITH ISNOT @@ -1778,6 +1780,7 @@ var argTypeIs = map[string]ArgumentType{ "-or": CONDITIONAL, "-equals": CONDITIONAL, "-contains": CONDITIONAL, + "-is-within": CONDITIONAL, "-starts-with": CONDITIONAL, "-ends-with": CONDITIONAL, "-is-not": CONDITIONAL, @@ -1893,6 +1896,7 @@ var opTypeIs = map[string]OpType{ "-or": OR, "-equals": EQUALS, "-contains": CONTAINS, + "-is-within": ISWITHIN, "-starts-with": STARTSWITH, "-ends-with": ENDSWITH, "-is-not": ISNOT, @@ -2586,7 +2590,7 @@ func ParseArguments(cmdargs []string, pttrn string) *Block { cond = append(cond, op) parseStep(op, elementColonValue) status = UNSET - case EQUALS, CONTAINS, STARTSWITH, ENDSWITH, ISNOT: + case EQUALS, CONTAINS, ISWITHIN, STARTSWITH, ENDSWITH, ISNOT: if op != nil { if len(str) > 1 && str[0] == '\\' { // first character may be backslash protecting dash (undocumented) @@ -4729,7 +4733,7 @@ func ConditionsAreSatisfied(conditions []*Operation, curr *Node, mask string, in stat := constraint.Type switch stat { - case EQUALS, CONTAINS, STARTSWITH, ENDSWITH, ISNOT: + case EQUALS, CONTAINS, ISWITHIN, STARTSWITH, ENDSWITH, ISNOT: // substring test on element values str = strings.ToUpper(str) val = strings.ToUpper(val) @@ -4743,6 +4747,10 @@ func ConditionsAreSatisfied(conditions []*Operation, curr *Node, mask string, in if strings.Contains(str, val) { return true } + case ISWITHIN: + if strings.Contains(val, str) { + return true + } case STARTSWITH: if strings.HasPrefix(str, val) { return true @@ -6039,6 +6047,70 @@ func ProcessINSD(args []string, isPipe, addDash, doIndex bool) []string { return acc } +// BIOTHINGS EXTRACTION COMMAND GENERATOR + +// ProcessBiopath generates extraction commands for BioThings resources (undocumented) +func ProcessBiopath(args []string, isPipe bool) []string { + + // nquire -variant variant "chr6:g.26093141G>A" | xtract -biopath "clinvar.rcv.conditions.identifiers.omim" + + // xtract -pattern opt -division clinvar -group rcv -branch conditions -block identifiers -sep "\n" -tab "\n" -element "omim,@omim" + + var acc []string + + max := len(args) + if max < 1 { + fmt.Fprintf(os.Stderr, "\nERROR: Insufficient command-line arguments supplied to xtract -biopath\n") + os.Exit(1) + } + + path := args[0] + + dirs := strings.Split(path, ".") + max = len(dirs) + if max < 1 { + fmt.Fprintf(os.Stderr, "\nERROR: Insufficient command-line arguments supplied to xtract -biopath\n") + os.Exit(1) + } + + expname := []string{ + "-division", + "-group", + "-branch", + "-block", + "-section", + "-subset", + "-unit", + } + + acc = append(acc, "-pattern", "opt") + + max-- + if max > 7 { + fmt.Fprintf(os.Stderr, "\nERROR: Too many nodes in argument supplied to xtract -biopath\n") + os.Exit(1) + } + + for i, str := range dirs { + if i < max { + acc = append(acc, expname[i], str) + } else { + if isPipe { + acc = append(acc, "-sep", "\\n", "-tab", "\\n") + } else { + acc = append(acc, "-sep", "\"\\n\"", "-tab", "\"\\n\"") + } + if isPipe { + acc = append(acc, "-element", str+",@"+str) + } else { + acc = append(acc, "-element", "\""+str+",@"+str+"\"") + } + } + } + + return acc +} + // HYDRA CITATION MATCHER COMMAND GENERATOR // ProcessHydra generates extraction commands for NCBI's in-house citation matcher (undocumented) @@ -8471,6 +8543,29 @@ func main() { args = hydra } + // BIOTHINGS EXTRACTION COMMAND GENERATOR + + // -biopath takes a dotted exploration path for BioThings resources (undocumented) + if args[0] == "-biopath" { + + args = args[1:] + + biopath := ProcessBiopath(args, isPipe || usingFile) + + if !isPipe && !usingFile { + // no piped input, so write output instructions + fmt.Printf("xtract") + for _, str := range biopath { + fmt.Printf(" %s", str) + } + fmt.Printf("\n") + return + } + + // data in pipe, so replace arguments, execute dynamically + args = biopath + } + // ENTREZ2INDEX COMMAND GENERATOR // -e2index shortcut for experimental indexing code (documented in rchive.go) |