summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorAaron M. Ucko <ucko@debian.org>2017-10-06 17:43:37 -0400
committerAaron M. Ucko <ucko@debian.org>2017-10-06 17:44:16 -0400
commit0d510bdeee8747dd089294a7210944e8c236cdad (patch)
treefa4d2c46f995c8f7eaab9a3c6428710e8fc488fb
parent91f37314681b612fec14016b237f144f67d81a49 (diff)
parenta0990267f337037396f8665df411d0b8bc641a66 (diff)
Merge tag 'upstream/7.40.20170926+ds'
Upstream version 7.40.20170926(+ds).
-rw-r--r--debian/changelog4
-rwxr-xr-xedirect.pl91
-rw-r--r--xtract.go294
3 files changed, 254 insertions, 135 deletions
diff --git a/debian/changelog b/debian/changelog
index e5072b2..6bc8fc0 100644
--- a/debian/changelog
+++ b/debian/changelog
@@ -1,4 +1,4 @@
-ncbi-entrez-direct (7.30.20170918+ds-1) UNRELEASED; urgency=medium
+ncbi-entrez-direct (7.40.20170926+ds-1) UNRELEASED; urgency=medium
* New upstream release. (NOT YET RELEASED.)
* debian/control: Unconditionalize Go build dependencies.
@@ -8,7 +8,7 @@ ncbi-entrez-direct (7.30.20170918+ds-1) UNRELEASED; urgency=medium
* debian/rules: Remove fallback logic to install the old Perl
implementation of xtract, now retired upstream.
- -- Aaron M. Ucko <ucko@debian.org> Fri, 06 Oct 2017 17:43:21 -0400
+ -- Aaron M. Ucko <ucko@debian.org> Fri, 06 Oct 2017 17:43:37 -0400
ncbi-entrez-direct (6.90.20170705+ds-2) unstable; urgency=medium
diff --git a/edirect.pl b/edirect.pl
index a755fd2..5326a3f 100755
--- a/edirect.pl
+++ b/edirect.pl
@@ -43,7 +43,7 @@ use File::Spec;
# EDirect version number
-$version = "7.30";
+$version = "7.40";
BEGIN
{
@@ -197,6 +197,11 @@ sub clearflags {
"shows#significantly#since#so#some#such#than#that#the#their#theirs#them#" .
"then#there#therefore#these#they#this#those#through#thus#to#upon#use#used#" .
"using#various#very#was#we#were#what#when#which#while#with#within#without#would#";
+
+ $os = "$^O";
+
+ $api_key = "";
+ $api_key = $ENV{NCBI_API_KEY} if defined $ENV{NCBI_API_KEY};
}
# gets a live UID for any database
@@ -432,27 +437,6 @@ sub get_email {
return $addr;
}
-# correct misspellings in query
-
-sub spell_check_query {
-
- my $db = shift (@_);
- my $qury = shift (@_);
-
- my $url = $base . $espell;
-
- my $enc = uri_escape($query);
- $arg = "db=$db&term=$enc";
-
- my $data = do_post ($url, $arg, $tool, $email, true);
-
- Encode::_utf8_on($data);
-
- $qury = $1 if ( $data =~ /<CorrectedQuery>(.+)<\/CorrectedQuery>/ );
-
- return $qury;
-}
-
# elink and epost currently need a separate ESearch to get the correct result count
sub get_count {
@@ -471,6 +455,14 @@ sub get_count {
$url .= "&edirect=$version";
+ if ( $os ne "" ) {
+ $url .= "&os=$os";
+ }
+
+ if ( $api_key ne "" ) {
+ $url .= "&api_key=$api_key";
+ }
+
if ( $tulx eq "" ) {
$tulx = "entrez-direct";
}
@@ -545,6 +537,14 @@ sub get_uids {
$url .= "&edirect=$version";
+ if ( $os ne "" ) {
+ $url .= "&os=$os";
+ }
+
+ if ( $api_key ne "" ) {
+ $url .= "&api_key=$api_key";
+ }
+
if ( $tulx eq "" ) {
$tulx = "edirect";
}
@@ -593,6 +593,14 @@ sub do_post_yielding_ref {
my $emlx = shift (@_);
my $intr = shift (@_);
+ if ( $os ne "" ) {
+ $argx .= "&os=$os";
+ }
+
+ if ( $api_key ne "" ) {
+ $argx .= "&api_key=$api_key";
+ }
+
$argx .= "&edirect=$version";
if ( $intr ) {
@@ -1356,6 +1364,27 @@ sub process_extras {
return $xtras;
}
+# correct misspellings in query
+
+sub spell_check_query {
+
+ my $db = shift (@_);
+ my $qury = shift (@_);
+
+ my $url = $base . $espell;
+
+ my $enc = uri_escape($query);
+ $arg = "db=$db&term=$enc";
+
+ my $data = do_post ($url, $arg, $tool, $email, true);
+
+ Encode::_utf8_on($data);
+
+ $qury = $1 if ( $data =~ /<CorrectedQuery>(.+)<\/CorrectedQuery>/ );
+
+ return $qury;
+}
+
sub efilt {
# ... | edirect.pl -filter -query "bacteria [ORGN]" -days 365 | ...
@@ -1382,6 +1411,7 @@ sub efilt {
"source=s" => \$source,
"status=s" => \$status,
"type=s" => \$gtype,
+ "api_key=s" => \$api_key,
"email=s" => \$emaddr,
"tool=s" => \$tuul,
"help" => \$help,
@@ -2122,6 +2152,7 @@ sub eftch {
"extrafeat=i" => \$extrafeat,
"start=i" => \$min,
"stop=i" => \$max,
+ "api_key=s" => \$api_key,
"email=s" => \$emaddr,
"tool=s" => \$tuul,
"pipe" => \$pipe,
@@ -2669,6 +2700,7 @@ sub einfo {
"dbs" => \$dbs,
"fields" => \$fields,
"links" => \$links,
+ "api_key=s" => \$api_key,
"email=s" => \$emaddr,
"tool=s" => \$tuul,
"help" => \$help,
@@ -2726,6 +2758,16 @@ sub einfo {
$prefix = "&";
}
+ if ( $os ne "" ) {
+ $url .= "$prefix" . "os=$os";
+ $prefix = "&";
+ }
+
+ if ( $api_key ne "" ) {
+ $url .= "$prefix" . "api_key=$api_key";
+ $prefix = "&";
+ }
+
$url .= "$prefix" . "edirect=$version";
$prefix = "&";
@@ -3182,6 +3224,7 @@ sub elink {
"batch" => \$batch,
"holding=s" => \$holding,
"label=s" => \$lbl,
+ "api_key=s" => \$api_key,
"email=s" => \$emaddr,
"tool=s" => \$tuul,
"help" => \$help,
@@ -3524,6 +3567,7 @@ sub entfy {
MyGetOptions(
$ntfy_help,
+ "api_key=s" => \$api_key,
"email=s" => \$emaddr,
"tool=s" => \$tuul,
"help" => \$help,
@@ -3695,6 +3739,7 @@ sub epost {
"format=s" => \$field,
"input=s" => \$input,
"label=s" => \$lbl,
+ "api_key=s" => \$api_key,
"email=s" => \$emaddr,
"tool=s" => \$tuul,
"help" => \$help,
@@ -3958,6 +4003,7 @@ sub espel {
$spell_help,
"db=s" => \$db,
"query=s" => \$query,
+ "api_key=s" => \$api_key,
"email=s" => \$emaddr,
"tool=s" => \$tuul,
"help" => \$help,
@@ -4430,6 +4476,7 @@ sub esrch {
"split=s" => \$split,
"merge=s" => \$meadow,
"pairs=s" => \$pair,
+ "api_key=s" => \$api_key,
"email=s" => \$emaddr,
"tool=s" => \$tuul,
"help" => \$help,
diff --git a/xtract.go b/xtract.go
index 10e3210..2825215 100644
--- a/xtract.go
+++ b/xtract.go
@@ -80,7 +80,7 @@ import (
// VERSION AND HELP MESSAGE TEXT
-const xtractVersion = "7.30"
+const xtractVersion = "7.40"
const xtractHelp = `
Overview
@@ -337,6 +337,7 @@ Local Record Indexing
-flag [strict|mixed|none]
-gzip Use compression for local XML files
-hash Print UIDs and checksum values to stdout
+ -skip File of UIDs to skip
Sample File Download
@@ -422,7 +423,7 @@ Reconstruct Release Files
Experimental Postings File Creation
efetch -db pubmed -id 12857958,2981625 -format xml |
- xtract -e2index |
+ xtract -e2index PubmedArticle MedlineCitation/PMID ArticleTitle,AbstractText,Keyword |
xtract -pattern IdxDocument -UID IdxUid \
-block NORM -pfc "\n" -element "&UID",NORM |
LC_ALL='C' sort -k 2f -k 1n |
@@ -431,51 +432,6 @@ Experimental Postings File Creation
DISABLE ANTI-VIRUS FILE SCANNING FOR LOCAL ARCHIVES OR MOVE TO TRUSTED FILES
DISABLE SPOTLIGHT INDEXING FOR EXTERNAL DISKS CONTAINING LOCAL ARCHIVES
-
-APFS Disk Creation
-
- diskutil list
-
- diskutil apfs createContainer /dev/disk1s2
- diskutil apfs addVolume disk1s2 APFS myssd
-
- diskutil mountDisk /dev/disk1
-
- sudo mdutil -i off /Volumes/myssd
- sudo mdutil -E /Volumes/myssd
- sudo rm -rf /Volumes/myssd/.Spotlight*
- sudo rm -rf /Volumes/myssd/.fseventsd
-
- touch /Volumes/myssd/.metadata_never_index
- chmod 444 /Volumes/myssd/.metadata_never_index
- mkdir /Volumes/myssd/.fseventsd
- touch /Volumes/myssd/.fseventsd/no_log
-
- Apple->System Preferences
- Spotlight
- Privacy
- Add: /Volumes/myssd
-
-FAT Disk Creation
-
- diskutil eraseDisk FAT32 BACKUP /dev/disk1
-
-Ramdisk Creation
-
- RAMDISK_SIZE_GB=4
- RAMDISK_SECTORS=$((2097152 * $RAMDISK_SIZE_GB))
- DISK_ID=$(hdiutil attach -nomount ram://$RAMDISK_SECTORS)
- echo "Disk ID is :" $DISK_ID
- diskutil erasevolume HFS+ myssd ${DISK_ID}
-
-Ramdisk Deletion
-
- diskutil list
-
- umount -f ${DISK_ID}
- hdiutil detach ${DISK_ID}
-
- (OR EJECT BY DRAGGING DISK IMAGE TO TRASH)
`
const xtractInternal = `
@@ -2505,11 +2461,6 @@ func TrimPunctuation(str string) string {
max := len(str)
- hasLeftP := strings.Contains(str, "(")
- hasRightP := strings.Contains(str, ")")
- hasLeftB := strings.Contains(str, "[")
- hasRightB := strings.Contains(str, "]")
-
doOneTrim := func() {
if max > 0 {
@@ -2558,6 +2509,9 @@ func TrimPunctuation(str string) string {
max -= 2
}
+ hasLeftP := strings.Contains(str, "(")
+ hasRightP := strings.Contains(str, ")")
+
if max > 1 && str[0] == '(' && str[1] == '(' && !hasRightP {
// trim leading double parentheses
str = str[2:]
@@ -2582,6 +2536,9 @@ func TrimPunctuation(str string) string {
max--
}
+ hasLeftB := strings.Contains(str, "[")
+ hasRightB := strings.Contains(str, "]")
+
if max > 0 && str[0] == '[' && !hasRightB {
// trim isolated left bracket
str = str[1:]
@@ -3272,7 +3229,7 @@ func ParseArguments(args []string, pttrn string) *Block {
// parseCommands does initial parsing of exploration command structure
parseCommands = func(parent *Block, startLevel LevelType) {
- // function to find next highest level exploration argument
+ // find next highest level exploration argument
findNextLevel := func(args []string, level LevelType) (LevelType, string, string) {
if len(args) > 1 {
@@ -3309,7 +3266,7 @@ func ParseArguments(args []string, pttrn string) *Block {
return
}
- // function to group arguments at a given exploration level
+ // group arguments at a given exploration level
subsetCommands := func(args []string) *Block {
max := len(args)
@@ -3404,7 +3361,7 @@ func ParseArguments(args []string, pttrn string) *Block {
status := UNSET
- // function to parse conditional clause into execution step
+ // parse conditional clause into execution step
parseStep := func(op *Operation, elementColonValue bool) {
if op == nil {
@@ -3624,7 +3581,7 @@ func ParseArguments(args []string, pttrn string) *Block {
status := UNSET
- // function to parse next argument
+ // parse next argument
nextStatus := func(str string) OpType {
status = ParseFlag(str)
@@ -3655,7 +3612,7 @@ func ParseArguments(args []string, pttrn string) *Block {
return status
}
- // function to parse extraction clause into individual steps
+ // parse extraction clause into individual steps
parseSteps := func(op *Operation, pttrn string) {
if op == nil {
@@ -4124,7 +4081,7 @@ func PartitionPattern(pat, star string, rdr *XMLReader, proc func(int, int64, st
CharSkip [256]int
}
- // function to initialize <pattern> to </pattern> scanner
+ // initialize <pattern> to </pattern> scanner
newScanner := func(pattern string) *Scanner {
if pattern == "" {
@@ -4151,7 +4108,7 @@ func PartitionPattern(pat, star string, rdr *XMLReader, proc func(int, int64, st
return scr
}
- // function check surroundings of match candidate
+ // check surroundings of match candidate
isAnElement := func(text string, lf, rt, mx int) bool {
if (lf >= 0 && text[lf] == '<') || (lf > 0 && text[lf] == '/' && text[lf-1] == '<') {
@@ -4218,7 +4175,7 @@ func PartitionPattern(pat, star string, rdr *XMLReader, proc func(int, int64, st
STOPPATTERN
)
- // function to find next element with pattern name
+ // find next element with pattern name
nextPattern := func(scr *Scanner, text string, pos int) (PatternType, int, int) {
if scr == nil || text == "" {
@@ -4364,7 +4321,7 @@ func PartitionPattern(pat, star string, rdr *XMLReader, proc func(int, int64, st
return
}
- // function to find next element in XML
+ // find next element in XML
nextElement := func(text string, pos int) string {
txtlen := len(text)
@@ -4525,7 +4482,7 @@ func ProcessXMLStream(in *XMLReader, tbls *Tables, args []string, action Special
plainText := (!tbls.DeGloss && !tbls.DoMixed)
- // function to get next XML token
+ // get next XML token
nextToken := func(idx int) (TagType, string, string, int, int) {
if Text == "" {
@@ -5587,7 +5544,7 @@ func ProcessXMLStream(in *XMLReader, tbls *Tables, args []string, action Special
justStartName := ""
justStartIndent := 0
- // function to indent a specified number of spaces
+ // indent a specified number of spaces
doIndent := func(indt int) {
if compRecrd || flushLeft {
return
@@ -5603,7 +5560,7 @@ func ProcessXMLStream(in *XMLReader, tbls *Tables, args []string, action Special
buffer.WriteString(indentSpaces[i])
}
- // function to handle delayed start tag
+ // handle delayed start tag
doDelayedName := func() {
if needsRightBracket != "" {
buffer.WriteString(">")
@@ -5620,7 +5577,7 @@ func ProcessXMLStream(in *XMLReader, tbls *Tables, args []string, action Special
closingTag := ""
- // function to print attributes
+ // print attributes
printAttributes := func(attr string) {
attr = strings.TrimSpace(attr)
@@ -6598,29 +6555,56 @@ func ProcessHydra(isPipe bool) []string {
// ENTREZ2INDEX COMMAND GENERATOR
// ProcessE2Index generates extraction commands to create input for Entrez2Index (undocumented)
-func ProcessE2Index(isPipe bool) []string {
+func ProcessE2Index(args []string, isPipe bool) []string {
var acc []string
+ max := len(args)
+ if max < 3 {
+ fmt.Fprintf(os.Stderr, "\nERROR: Insufficient command-line arguments supplied to xtract -e2index\n")
+ os.Exit(1)
+ }
+
+ patrn := args[0]
+ ident := args[1]
+
+ args = args[2:]
+
if isPipe {
acc = append(acc, "-head", "<IdxDocumentSet>", "-tail", "</IdxDocumentSet>")
acc = append(acc, "-hd", " <IdxDocument>\\n", "-tl", " </IdxDocument>")
- acc = append(acc, "-pattern", "PubmedArticle")
+ acc = append(acc, "-pattern")
+ ql := fmt.Sprintf("\"%s\"", patrn)
+ acc = append(acc, ql)
acc = append(acc, "-pfx", " <IdxUid>", "-sfx", "</IdxUid>\\n")
- acc = append(acc, "-element", "MedlineCitation/PMID")
+ acc = append(acc, "-element")
+ ql = fmt.Sprintf("\"%s\"", ident)
+ acc = append(acc, ql)
acc = append(acc, "-clr", "-rst", "-tab", "")
acc = append(acc, "-lbl", " <IdxSearchFields>\\n")
- acc = append(acc, "-indices", "ArticleTitle,AbstractText,Keyword")
+ acc = append(acc, "-indices")
+ for _, str := range args {
+ ql = fmt.Sprintf("\"%s\"", str)
+ acc = append(acc, ql)
+ }
acc = append(acc, "-clr", "-lbl", " </IdxSearchFields>\\n")
} else {
acc = append(acc, "-head", "\"<IdxDocumentSet>\"", "-tail", "\"</IdxDocumentSet>\"")
acc = append(acc, "-hd", "\" <IdxDocument>\\n\"", "-tl", "\" </IdxDocument>\"")
- acc = append(acc, "-pattern", "PubmedArticle")
+ acc = append(acc, "-pattern")
+ ql := fmt.Sprintf("\"%s\"", patrn)
+ acc = append(acc, ql)
acc = append(acc, "-pfx", "\" <IdxUid>\"", "-sfx", "\"</IdxUid>\\n\"")
- acc = append(acc, "-element", "MedlineCitation/PMID")
+ acc = append(acc, "-element")
+ ql = fmt.Sprintf("\"%s\"", ident)
+ acc = append(acc, ql)
acc = append(acc, "-clr", "-rst", "-tab", "\"\"")
acc = append(acc, "-lbl", "\" <IdxSearchFields>\\n\"")
- acc = append(acc, "-indices", "ArticleTitle,AbstractText,Keyword")
+ acc = append(acc, "-indices")
+ for _, str := range args {
+ ql = fmt.Sprintf("\"%s\"", str)
+ acc = append(acc, ql)
+ }
acc = append(acc, "-clr", "-lbl", "\" </IdxSearchFields>\\n\"")
}
@@ -6811,7 +6795,7 @@ func PrintSubtree(node *Node, style IndentType, printAttrs bool, proc func(strin
" ",
}
- // function to indent a specified number of spaces
+ // indent a specified number of spaces
doIndent := func(indt int) {
i := indt
for i > 9 {
@@ -7631,7 +7615,7 @@ func ConditionsAreSatisfied(conditions []*Operation, curr *Node, mask string, in
isMatch := false
isAvoid := false
- // function to test string or numeric constraints
+ // test string or numeric constraints
testConstraint := func(str string, constraint *Step) bool {
if str == "" || constraint == nil {
@@ -8077,7 +8061,7 @@ func ProcessQuery(Text, parent string, index int, cmds *Block, tbls *Tables, act
FarmMax := tbls.FarmSize
FarmItems := make([]Node, FarmMax)
- // function to allocate multiple nodes in a large array for memory management efficiency
+ // allocate multiple nodes in a large array for memory management efficiency
nextNode := func(strt, attr, prnt string) *Node {
// if farm array slots used up, allocate new array
@@ -8108,7 +8092,7 @@ func ProcessQuery(Text, parent string, index int, cmds *Block, tbls *Tables, act
plainText := (!tbls.DeGloss && !tbls.DoMixed)
- // function to get next XML token
+ // get next XML token
nextToken := func(idx int) (TagType, string, string, int) {
// lookup table array pointers
@@ -8575,7 +8559,7 @@ func ProcessQuery(Text, parent string, index int, cmds *Block, tbls *Tables, act
return ""
}
-// FUNCTION TO CONVERT IDENTIFIER TO DIRECTORY PATH FOR LOCAL FILE ARCHIVE
+// CONVERT IDENTIFIER TO DIRECTORY PATH FOR LOCAL FILE ARCHIVE
// MakeArchiveTrie allows a short prefix of letters with an optional underscore, and splits the remainder into character pairs
func MakeArchiveTrie(str string, arry [132]rune) string {
@@ -8643,7 +8627,7 @@ func MakeArchiveTrie(str string, arry [132]rune) string {
return strings.ToUpper(string(arry[:i]))
}
-// FUNCTION TO CONVERT TERM TO DIRECTORY PATH FOR POSTINGS FILE STORAGE
+// CONVERT TERM TO DIRECTORY PATH FOR POSTINGS FILE STORAGE
// MakePostingsTrie splits a string into characters, separated by path delimiting slashes
func MakePostingsTrie(str string, arry [516]rune) string {
@@ -9014,6 +8998,72 @@ func CreateUniquer(tbls *Tables, inp <-chan Extract) <-chan Extract {
return out
}
+func CreateDeleter(tbls *Tables, dltd string, inp <-chan Extract) <-chan Extract {
+
+ if tbls == nil || inp == nil {
+ return nil
+ }
+
+ out := make(chan Extract, tbls.ChanDepth)
+ if out == nil {
+ fmt.Fprintf(os.Stderr, "\nERROR: Unable to create deleter channel\n")
+ os.Exit(1)
+ }
+
+ // map to track UIDs to skip
+ shouldSkip := make(map[string]bool)
+
+ checkMap := false
+
+ if dltd != "" && dltd != "-" {
+ fmt.Fprintf(os.Stderr, "\nEnter CreateDeleter Scanner\n")
+ checkMap = true
+
+ skipFile, err := os.Open(dltd)
+ if err != nil {
+ fmt.Fprintf(os.Stderr, "\nERROR: Unable to read skip file\n")
+ os.Exit(1)
+ }
+
+ scanr := bufio.NewScanner(skipFile)
+
+ for scanr.Scan() {
+
+ // read lines of identifiers
+ id := scanr.Text()
+
+ // add to exclusion map
+ shouldSkip[id] = true
+ }
+
+ skipFile.Close()
+ fmt.Fprintf(os.Stderr, "\nLeave CreateDeleter Scanner\n")
+ }
+
+ // xmlDeleter removes records listed as deleted
+ xmlDeleter := func(inp <-chan Extract, out chan<- Extract) {
+
+ // close channel when all records have been processed
+ defer close(out)
+
+ for curr := range inp {
+
+ // check if identifier was deleted
+ if checkMap && shouldSkip[curr.Ident] {
+ continue
+ }
+
+ // send to output channel
+ out <- curr
+ }
+ }
+
+ // launch single deleter goroutine
+ go xmlDeleter(inp, out)
+
+ return out
+}
+
func CreateStashers(tbls *Tables, inp <-chan Extract) <-chan string {
if tbls == nil || inp == nil {
@@ -9611,6 +9661,9 @@ func main() {
// path for local data indexed as trie
stsh := ""
+ // file of UIDs to skip
+ dltd := ""
+
// path for postings files indexed as trie
pstg := ""
@@ -9634,7 +9687,7 @@ func main() {
// repeat the specified extraction 5 times for each -proc from 1 to nCPU
trial := false
- // function to get numeric value
+ // get numeric value
getNumericArg := func(name string, zer, min, max int) int {
if len(args) < 2 {
@@ -9718,6 +9771,15 @@ func main() {
stsh = args[1]
// skip past first of two arguments
args = args[1:]
+ // UIDs to ignore
+ case "-skip":
+ if len(args) < 2 {
+ fmt.Fprintf(os.Stderr, "\nERROR: Skip file is missing\n")
+ os.Exit(1)
+ }
+ dltd = args[1]
+ // skip past first of two arguments
+ args = args[1:]
// local directory path for postings files (undocumented)
case "-posting", "-postings":
if len(args) < 2 {
@@ -10139,7 +10201,9 @@ func main() {
// -e2index shortcut for experimental indexing code (undocumented)
if args[0] == "-e2index" {
- res := ProcessE2Index(isPipe || usingFile)
+ args = args[1:]
+
+ res := ProcessE2Index(args, isPipe || usingFile)
if !isPipe && !usingFile {
// no piped input, so write output instructions
@@ -10188,14 +10252,41 @@ func main() {
defer pprof.StopCPUProfile()
}
+ // SPECIAL FORMATTING COMMANDS
+
+ inSwitch = true
+ action := NOPROCESS
+
+ switch args[0] {
+ case "-format":
+ action = DOFORMAT
+ case "-outline":
+ action = DOOUTLINE
+ case "-synopsis":
+ action = DOSYNOPSIS
+ case "-verify", "-validate":
+ action = DOVERIFY
+ case "-filter":
+ action = DOFILTER
+ default:
+ // if not any of the formatting commands, keep going
+ inSwitch = false
+ }
+
+ if inSwitch {
+ ProcessXMLStream(rdr, tbls, args, action)
+ return
+ }
+
// INITIALIZE PROCESS TIMER AND RECORD COUNT
startTime := time.Now()
recordCount := 0
byteCount := 0
- // function to print processing rate and program duration
+ // print processing rate and program duration
printDuration := func(name string) {
+
stopTime := time.Now()
duration := stopTime.Sub(startTime)
seconds := float64(duration.Nanoseconds()) / 1e9
@@ -10205,6 +10296,7 @@ func main() {
} else {
fmt.Fprintf(os.Stderr, "\nXtract processed %d %s in %.3f seconds", recordCount, name, seconds)
}
+
if seconds >= 0.001 && recordCount > 0 {
rate := int(float64(recordCount) / seconds)
if rate >= 1000000 {
@@ -10224,33 +10316,8 @@ func main() {
}
fmt.Fprintf(os.Stderr, ")")
}
- fmt.Fprintf(os.Stderr, "\n\n")
- }
-
- // SPECIAL FORMATTING COMMANDS
- inSwitch = true
- action := NOPROCESS
-
- switch args[0] {
- case "-format":
- action = DOFORMAT
- case "-outline":
- action = DOOUTLINE
- case "-synopsis":
- action = DOSYNOPSIS
- case "-verify", "-validate":
- action = DOVERIFY
- case "-filter":
- action = DOFILTER
- default:
- // if not any of the formatting commands, keep going
- inSwitch = false
- }
-
- if inSwitch {
- ProcessXMLStream(rdr, tbls, args, action)
- return
+ fmt.Fprintf(os.Stderr, "\n\n")
}
// SPECIFY STRINGS TO GO BEFORE AND AFTER ENTIRE OUTPUT OR EACH RECORD
@@ -10427,7 +10494,7 @@ func main() {
// -archive without -index retrieves XML files in trie-based directory structure
if stsh != "" && indx == "" {
- uidq := CreateUIDReader(in, tbls)
+ uidq := CreateUIDReader(rdr.Reader, tbls)
strq := CreateFetchers(tbls, uidq)
unsq := CreateUnshuffler(tbls, strq)
@@ -10577,7 +10644,7 @@ func main() {
return
}
- // function to print new or updated XML record
+ // print new or updated XML record
printRecord := func(stn string, isNew bool) {
if stn == "" {
@@ -10691,9 +10758,14 @@ func main() {
idnq := CreateExaminers(tbls, parent, xmlq)
unsq := CreateUnshuffler(tbls, idnq)
unqq := CreateUniquer(tbls, unsq)
- stsq := CreateStashers(tbls, unqq)
+ delq := unqq
+ if dltd != "" {
+ // only create deleter if -skip argument is present
+ delq = CreateDeleter(tbls, dltd, unqq)
+ }
+ stsq := CreateStashers(tbls, delq)
- if xmlq == nil || idnq == nil || unsq == nil || unqq == nil || stsq == nil {
+ if xmlq == nil || idnq == nil || unsq == nil || unqq == nil || delq == nil || stsq == nil {
fmt.Fprintf(os.Stderr, "\nERROR: Unable to create stash generator\n")
os.Exit(1)
}