1 files changed, 263 insertions, 116 deletions
diff --git a/xtract.go b/xtract.go
index 46c760e..2d91bf7 100644
--- a/xtract.go
+++ b/xtract.go
@@ -93,8 +93,8 @@ Overview
 
 Processing Flags
 
-  -mixed           Allow PubMed mixed content
   -strict          Remove HTML highlight tags
+  -mixed           Allow PubMed mixed content
 
   -accent          Delete Unicode accents
   -ascii           Unicode to numeric character references
@@ -327,17 +327,15 @@ Examples
 `
 
 const xtractExtras = `
+Processing Flags
+
+  -flags      [strict|mixed|none]
+
 Local Record Indexing
 
   -stash      Base path for individual XML files
   -index      Name of element to use for identifier
-
-Processing Commands
-
-  -prepare    [release|report] Compare daily update to stash
-  -ignore     Ignore contents of object in -prepare comparisons
-  -missing    Print list of missing identifiers
-  -unique     File of UIDs for skipping all but last version
+  -unique     File of UIDs for removing intermediate records
 
 Sample File Download
 
@@ -361,11 +359,11 @@ Human Subset Extraction
 PubMed Download
 
   download-pubmed baseline updatefiles
-  unpack-pubmed
+  unpack-pubmed mixed
 
 PubMed Archive Creation
 
-  stash-pubmed /Volumes/myssd/Pubmed
+  stash-pubmed mixed /Volumes/myssd/Pubmed
 
 PubMed Archive Retrieval
 
@@ -374,6 +372,12 @@ PubMed Archive Retrieval
 `
 
 const xtractAdvanced = `
+Processing Commands
+
+  -prepare    [release|report] Compare daily update to stash
+  -ignore     Ignore contents of object in -prepare comparisons
+  -missing    Print list of missing identifiers
+
 Update Candidate Report
 
   gzcat medline*.xml.gz | xtract -strict -compress -format flush |
@@ -452,14 +456,14 @@ Performance Tuning Script
 
 Processor Titration Results
 
-  1    27748    207
-  2    51011    272
-  3    73487    700
-  4    93032    2559
-  5    92596    1549
-  6    89513    1570
-  7    84872    1145
-  8    83829    952
+  1    27622    31
+  2    51799    312
+  3    74853    593
+  4    95867    1337
+  5    97171    4019
+  6    93460    2458
+  7    87467    1030
+  8    82448    2651
 
 Execution Profiling
 
@@ -618,7 +622,7 @@ Gene Regions
   LOCUS       NC_000076               2142 bp    DNA     linear   CON 09-FEB-2015
   DEFINITION  Mus musculus strain C57BL/6J chromosome 10, GRCm38.p3 C57BL/6J.
   ACCESSION   NC_000076 REGION: complement(75771233..75773374) GPC_000000783
-  VERSION     NC_000076.6  GI:372099100
+  VERSION     NC_000076.6
   ...
   FEATURES             Location/Qualifiers
        source          1..2142
@@ -2276,7 +2280,7 @@ type Tables struct {
 	DeGloss   bool
 	DoMixed   bool
 	DeAccent  bool
-	DoAscii   bool
+	DoASCII   bool
 }
 
 type Node struct {
@@ -2487,26 +2491,22 @@ func TrimPunctuation(str string) string {
 		}
 	}
 
-	if max > 0 {
-		if str[0] == '(' && !strings.Contains(str, ")") {
-			// trim isolated left parentheses
-			str = str[1:]
-			max--
-		}
+	if max > 0 && str[0] == '(' && !strings.Contains(str, ")") {
+		// trim isolated left parentheses
+		str = str[1:]
+		max--
 	}
 
-	if max > 1 {
-		if str[max-1] == ')' && !strings.Contains(str, "(") {
-			// trim isolated right parentheses
-			str = str[:max-1]
-			// max--
-		}
+	if max > 1 && str[max-1] == ')' && !strings.Contains(str, "(") {
+		// trim isolated right parentheses
+		str = str[:max-1]
+		// max--
 	}
 
 	return str
 }
 
-func HtmlAhead(text string, pos int) int {
+func HTMLAhead(text string, pos int) int {
 
 	max := len(text) - pos
 
@@ -2570,7 +2570,7 @@ func HtmlAhead(text string, pos int) int {
 	return 0
 }
 
-func HtmlBehind(bufr []byte, pos int) bool {
+func HTMLBehind(bufr []byte, pos int) bool {
 
 	if pos > 1 && bufr[pos-2] == '<' {
 		ch := bufr[pos-1]
@@ -2781,7 +2781,7 @@ var (
 	rpair *strings.Replacer
 )
 
-func DoHtmlReplace(str string) string {
+func DoHTMLReplace(str string) string {
 
 	// replacer/repairer not reentrant, protected by mutex
 	rlock.Lock()
@@ -2862,7 +2862,7 @@ func DoHtmlReplace(str string) string {
 	return str
 }
 
-func DoHtmlRepair(str string) string {
+func DoHTMLRepair(str string) string {
 
 	// replacer/repairer not reentrant, protected by mutex
 	rlock.Lock()
@@ -2923,7 +2923,7 @@ func DoHtmlRepair(str string) string {
 	return str
 }
 
-func DoTrimFlankingHtml(str string) string {
+func DoTrimFlankingHTML(str string) string {
 
 	badPrefix := [10]string{
 		"<i></i>",
@@ -3050,7 +3050,7 @@ func DoAccentTransform(str string) string {
 	return str
 }
 
-func UnicodeToAscii(str string) string {
+func UnicodeToASCII(str string) string {
 
 	var buffer bytes.Buffer
 
@@ -3874,16 +3874,16 @@ type XMLReader struct {
 	Closed     bool
 	Docompress bool
 	Docleanup  bool
-	Leavehtml  bool
+	LeaveHTML  bool
 }
 
-func NewXMLReader(in io.Reader, doCompress, doCleanup, leaveHtml bool) *XMLReader {
+func NewXMLReader(in io.Reader, doCompress, doCleanup, leaveHTML bool) *XMLReader {
 
 	if in == nil {
 		return nil
 	}
 
-	rdr := &XMLReader{Reader: in, Docompress: doCompress, Docleanup: doCleanup, Leavehtml: leaveHtml}
+	rdr := &XMLReader{Reader: in, Docompress: doCompress, Docleanup: doCleanup, LeaveHTML: leaveHTML}
 
 	// 65536 appears to be the maximum number of characters presented to io.Reader when input is piped from stdin
 	// increasing size of buffer when input is from a file does not improve program performance
@@ -3940,9 +3940,9 @@ func (rdr *XMLReader) NextBlock() string {
 		pos := -1
 		for pos = len(bufr) - 1; pos >= 0; pos-- {
 			if bufr[pos] == '>' {
-				if rdr.Leavehtml {
+				if rdr.LeaveHTML {
 					// optionally skip backwards past embedded i, b, u, sub, and sup HTML open, close, and empty tags
-					if HtmlBehind(bufr, pos) {
+					if HTMLBehind(bufr, pos) {
 						continue
 					}
 				}
@@ -4521,7 +4521,7 @@ func ProcessXMLStream(in *XMLReader, tbls *Tables, args []string, action Special
 
 		start := idx
 
-		if ch == '<' && (plainText || HtmlAhead(text, idx) == 0) {
+		if ch == '<' && (plainText || HTMLAhead(text, idx) == 0) {
 
 			// at start of element
 			idx++
@@ -4723,7 +4723,7 @@ func ProcessXMLStream(in *XMLReader, tbls *Tables, args []string, action Special
 				}
 				if ch == '<' && !plainText {
 					// optionally allow HTML text formatting elements and super/subscripts
-					advance := HtmlAhead(text, idx)
+					advance := HTMLAhead(text, idx)
 					if advance > 0 {
 						idx += advance
 						ch = text[idx]
@@ -4934,7 +4934,7 @@ func ProcessXMLStream(in *XMLReader, tbls *Tables, args []string, action Special
 		startLine := 0
 
 		// warn if HTML tags are not well-formed
-		unbalancedHtml := func(text string) bool {
+		unbalancedHTML := func(text string) bool {
 
 			var arry []string
 
@@ -5037,7 +5037,7 @@ func ProcessXMLStream(in *XMLReader, tbls *Tables, args []string, action Special
 						fmt.Fprintf(os.Stdout, "Contents not expected before </%s>, line %d\n", parent, line)
 					}
 					if tbls.DeGloss || tbls.DoMixed {
-						if unbalancedHtml(name) {
+						if unbalancedHTML(name) {
 							fmt.Fprintf(os.Stdout, "Unbalanced mixed-content tags, line %d\n", line)
 						}
 					}
@@ -5393,7 +5393,7 @@ func ProcessXMLStream(in *XMLReader, tbls *Tables, args []string, action Special
 						str = RemoveUnicodeMarkup(str)
 					}
 					if HasAngleBracket(str) {
-						str = DoHtmlReplace(str)
+						str = DoHTMLReplace(str)
 					}
 				}
 				if tbls.DoMixed {
@@ -5401,18 +5401,18 @@ func ProcessXMLStream(in *XMLReader, tbls *Tables, args []string, action Special
 						str = SimulateUnicodeMarkup(str)
 					}
 					if HasAngleBracket(str) {
-						str = DoHtmlRepair(str)
+						str = DoHTMLRepair(str)
 					}
-					str = DoTrimFlankingHtml(str)
+					str = DoTrimFlankingHTML(str)
 				}
 				if tbls.DeAccent {
 					if IsNotASCII(str) {
 						str = DoAccentTransform(str)
 					}
 				}
-				if tbls.DoAscii {
+				if tbls.DoASCII {
 					if IsNotASCII(str) {
-						str = UnicodeToAscii(str)
+						str = UnicodeToASCII(str)
 					}
 				}
 
@@ -5530,9 +5530,9 @@ func ProcessXMLStream(in *XMLReader, tbls *Tables, args []string, action Special
 					attr = DoAccentTransform(attr)
 				}
 			}
-			if tbls.DoAscii {
+			if tbls.DoASCII {
 				if IsNotASCII(attr) {
-					attr = UnicodeToAscii(attr)
+					attr = UnicodeToASCII(attr)
 				}
 			}
 
@@ -5773,7 +5773,7 @@ func ProcessXMLStream(in *XMLReader, tbls *Tables, args []string, action Special
 							name = RemoveUnicodeMarkup(name)
 						}
 						if HasAngleBracket(name) {
-							name = DoHtmlReplace(name)
+							name = DoHTMLReplace(name)
 						}
 					}
 					if tbls.DoMixed {
@@ -5781,18 +5781,18 @@ func ProcessXMLStream(in *XMLReader, tbls *Tables, args []string, action Special
 							name = SimulateUnicodeMarkup(name)
 						}
 						if HasAngleBracket(name) {
-							name = DoHtmlRepair(name)
+							name = DoHTMLRepair(name)
 						}
-						name = DoTrimFlankingHtml(name)
+						name = DoTrimFlankingHTML(name)
 					}
 					if tbls.DeAccent {
 						if IsNotASCII(name) {
 							name = DoAccentTransform(name)
 						}
 					}
-					if tbls.DoAscii {
+					if tbls.DoASCII {
 						if IsNotASCII(name) {
-							name = UnicodeToAscii(name)
+							name = UnicodeToASCII(name)
 						}
 					}
 					if HasFlankingSpace(name) {
@@ -7168,7 +7168,7 @@ func ProcessClause(curr *Node, stages []*Step, mask, prev, pfx, sfx, sep, def st
 					str = RemoveUnicodeMarkup(str)
 				}
 				if HasAngleBracket(str) {
-					str = DoHtmlReplace(str)
+					str = DoHTMLReplace(str)
 				}
 
 				// break terms at spaces, allowing hyphenated words
@@ -7993,7 +7993,7 @@ func ProcessQuery(Text, parent string, index int, cmds *Block, tbls *Tables, act
 
 		start := idx
 
-		if ch == '<' && (plainText || HtmlAhead(text, idx) == 0) {
+		if ch == '<' && (plainText || HTMLAhead(text, idx) == 0) {
 
 			// at start of element
 			idx++
@@ -8142,7 +8142,7 @@ func ProcessQuery(Text, parent string, index int, cmds *Block, tbls *Tables, act
 				}
 				if ch == '<' && !plainText {
 					// optionally allow HTML text formatting elements and super/subscripts
-					advance := HtmlAhead(text, idx)
+					advance := HTMLAhead(text, idx)
 					if advance > 0 {
 						idx += advance
 						ch = text[idx]
@@ -8218,7 +8218,7 @@ func ProcessQuery(Text, parent string, index int, cmds *Block, tbls *Tables, act
 						name = RemoveUnicodeMarkup(name)
 					}
 					if HasAngleBracket(name) {
-						name = DoHtmlReplace(name)
+						name = DoHTMLReplace(name)
 					}
 				}
 				if tbls.DoMixed {
@@ -8226,18 +8226,18 @@ func ProcessQuery(Text, parent string, index int, cmds *Block, tbls *Tables, act
 						name = SimulateUnicodeMarkup(name)
 					}
 					if HasAngleBracket(name) {
-						name = DoHtmlReplace(name)
+						name = DoHTMLReplace(name)
 					}
-					name = DoTrimFlankingHtml(name)
+					name = DoTrimFlankingHTML(name)
 				}
 				if tbls.DeAccent {
 					if IsNotASCII(name) {
 						name = DoAccentTransform(name)
 					}
 				}
-				if tbls.DoAscii {
+				if tbls.DoASCII {
 					if IsNotASCII(name) {
-						name = UnicodeToAscii(name)
+						name = UnicodeToASCII(name)
 					}
 				}
 				node.Contents = name
@@ -8546,7 +8546,7 @@ func (h *ExtractHeap) Pop() interface{} {
 // process with single goroutine calls defer close(out) so consumer(s) can range over channel
 // process with multiple instances calls defer wg.Done(), separate goroutine uses wg.Wait() to delay close(out)
 
-func CreateProducer(pat, star string, rdr *XMLReader, tbls *Tables) <-chan Extract {
+func CreateProducer(pat, star string, rdr *XMLReader, uidFile string, tbls *Tables) <-chan Extract {
 
 	if rdr == nil || tbls == nil {
 		return nil
@@ -8558,15 +8558,72 @@ func CreateProducer(pat, star string, rdr *XMLReader, tbls *Tables) <-chan Extra
 		os.Exit(1)
 	}
 
+	// create map that counts instances of each UID
+	order := make(map[string]int)
+
+	checkIDs := false
+
+	if uidFile != "" {
+		checkIDs = true
+
+		// read file of identifiers to use for filtering
+		fl, err := os.Open(uidFile)
+		if err != nil {
+			fmt.Fprintf(os.Stderr, "\nERROR: Unable to open identifier file '%s'\n", uidFile)
+			os.Exit(1)
+		}
+
+		scanr := bufio.NewScanner(fl)
+
+		// read lines of identifiers
+		for scanr.Scan() {
+
+			id := scanr.Text()
+
+			// map records count for given identifier
+			val := order[id]
+			val++
+			order[id] = val
+		}
+
+		fl.Close()
+	}
+
 	// xmlProducer sends partitioned XML strings through channel
 	xmlProducer := func(pat, star string, rdr *XMLReader, out chan<- Extract) {
 
 		// close channel when all records have been processed
 		defer close(out)
 
+		parent := ""
+		if star == "*" {
+			parent = pat
+		}
+
 		// partition all input by pattern and send XML substring to available consumer through channel
 		PartitionPattern(pat, star, rdr,
 			func(rec int, ofs int64, str string) {
+
+				if checkIDs {
+					id := ProcessQuery(str[:], parent, rec, nil, tbls, DOINDEX)
+					if id == "" {
+						return
+					}
+
+					val, ok := order[id]
+					if !ok {
+						// not in identifier list, skip
+						return
+					}
+					// decrement count in map
+					val--
+					order[id] = val
+					if val > 0 {
+						// only write last record with a given identifier
+						return
+					}
+				}
+
 				out <- Extract{rec, "", str}
 			})
 	}
@@ -9058,7 +9115,10 @@ func main() {
 	deGloss := false
 	doMixed := false
 	deAccent := false
-	doAscii := false
+	doASCII := false
+
+	// -flags sets -strict or -mixed cleanup flags from argument
+	flgs := ""
 
 	// read data from file instead of stdin
 	fileName := ""
@@ -9076,7 +9136,7 @@ func main() {
 	// element to use as local data index
 	indx := ""
 
-	// file of index values for removing duplicates
+	// file of index values for removing duplicates (read or write, depending upon context)
 	unqe := ""
 
 	// phrase to find anywhere in XML
@@ -9165,10 +9225,10 @@ func main() {
 			fileName = args[1]
 			// skip past first of two arguments
 			args = args[1:]
-		// file with selected indexes for removing duplicates
+		// uid file for removing duplicates
 		case "-unique":
 			if len(args) < 2 {
-				fmt.Fprintf(os.Stderr, "\nERROR: Unique identifier file is missing\n")
+				fmt.Fprintf(os.Stderr, "\nERROR: Unique identifier file name is missing\n")
 				os.Exit(1)
 			}
 			unqe = args[1]
@@ -9217,7 +9277,15 @@ func main() {
 		case "-accent", "-plain":
 			deAccent = true
 		case "-ascii":
-			doAscii = true
+			doASCII = true
+		case "-flags":
+			if len(args) < 2 {
+				fmt.Fprintf(os.Stderr, "\nERROR: Flags argument is missing\n")
+				os.Exit(1)
+			}
+			flgs = args[1]
+			// skip past first of two arguments
+			args = args[1:]
 		// debugging flags
 		case "-prepare":
 			cmpr = true
@@ -9270,6 +9338,20 @@ func main() {
 		}
 	}
 
+	// -flags allows script to set -strict or -mixed from argument
+	switch flgs {
+	case "strict":
+		deGloss = true
+	case "mixed":
+		doMixed = true
+	case "none", "default":
+	default:
+		if flgs != "" {
+			fmt.Fprintf(os.Stderr, "\nERROR: Unrecognized -flags value '%s'\n", flgs)
+			os.Exit(1)
+		}
+	}
+
 	// reality checks on number of processors to use
 	// performance degrades if capacity is above maximum number of partitions per second (context switching?)
 	if numProcs == 0 {
@@ -9442,7 +9524,7 @@ func main() {
 	tbls.DeGloss = deGloss
 	tbls.DoMixed = doMixed
 	tbls.DeAccent = deAccent
-	tbls.DoAscii = doAscii
+	tbls.DoASCII = doASCII
 
 	// FILE NAME CAN BE SUPPLIED WITH -input COMMAND
 
@@ -9879,7 +9961,7 @@ func main() {
 
 	// COMPARE XML UPDATES TO LOCAL DIRECTORY, RETAIN NEW OR SUBSTANTIVELY CHANGED RECORDS
 
-	// -prepare plus -stash plus -index plus -pattern compares XML files against stash (undocumented)
+	// -prepare plus -stash plus -index plus -pattern compares XML files against stash
 	if stsh != "" && indx != "" && cmpr {
 
 		doReport := false
@@ -10022,10 +10104,10 @@ func main() {
 
 	// SAVE XML COMPONENT RECORDS TO LOCAL DIRECTORY INDEXED BY TRIE ON IDENTIFIER
 
-	// -stash plus -index plus -pattern saves XML files in trie-based directory structure
+	// -stash plus -index [plus -unique] plus -pattern saves XML files in trie-based directory structure
 	if stsh != "" && indx != "" {
 
-		xmlq := CreateProducer(topPattern, star, rdr, tbls)
+		xmlq := CreateProducer(topPattern, star, rdr, unqe, tbls)
 		idnq := CreateExaminers(tbls, parent, xmlq)
 		unsq := CreateUnshuffler(tbls, idnq)
 		unqq := CreateUniquer(tbls, unsq)
@@ -10050,40 +10132,83 @@ func main() {
 		return
 	}
 
-	// READ FILE OF IDENTIFIERS AND EXTRACT SELECTED RECORDS FROM XML INPUT FILE
+	// GENERATE UID LIST AND REMOVE LEADING SPACES FROM XML
 
-	// -index plus -unique [plus -head/-tail/-hd/-tl] plus -pattern with no other extraction arguments
-	// takes an XML input file and a file of its UIDs and keeps only the last version of each record
-	if indx != "" && unqe != "" && len(args) == 2 {
+	// -index plus -unique [plus -head/-tail/-hd/-tl] plus -pattern takes an XML input file and
+	// writes a trimmed version with leading spaces removed, also creating a file of its UIDs
+	if stsh == "" && indx != "" && unqe != "" {
 
-		// read file of identifiers to use for filtering
-		fl, err := os.Open(unqe)
+		fl, err := os.Create(unqe)
 		if err != nil {
-			fmt.Fprintf(os.Stderr, "\nERROR: Unable to open identifier file '%s'\n", unqe)
+			fmt.Fprintf(os.Stderr, "\nERROR: Unable to open uid output file '%s'\n", unqe)
 			os.Exit(1)
 		}
 
-		// create map that counts instances of each UID
-		order := make(map[string]int)
+		if head != "" {
+			os.Stdout.WriteString(head)
+			os.Stdout.WriteString("\n")
+		}
 
-		scanr := bufio.NewScanner(fl)
+		// write output, efficiently skipping leading spaces on each line
+		writeFlush := func(text string) {
 
-		// read lines of identifiers
-		for scanr.Scan() {
+			if text == "" {
+				return
+			}
 
-			id := scanr.Text()
+			var buffer bytes.Buffer
 
-			// map records count for given identifier
-			val := order[id]
-			val++
-			order[id] = val
-		}
+			max := len(text)
+			idx := 0
+			inBlank := &tbls.InBlank
 
-		fl.Close()
+			for idx < max {
 
-		if head != "" {
-			os.Stdout.WriteString(head)
-			os.Stdout.WriteString("\n")
+				// skip past leading blanks and empty lines
+				for idx < max {
+					ch := text[idx]
+					if !inBlank[ch] {
+						break
+					}
+					idx++
+				}
+
+				start := idx
+
+				// skip to next newline
+				for idx < max {
+					if text[idx] == '\n' {
+						break
+					}
+					idx++
+				}
+
+				str := text[start:idx]
+
+				if str == "" {
+					continue
+				}
+
+				// skip processing instruction
+				if strings.HasPrefix(str, "<?") && strings.HasSuffix(str, "?>") {
+					continue
+				}
+
+				// trim spaces next to angle bracket
+				for strings.Contains(str, "> ") {
+					str = strings.Replace(str, "> ", ">", 1)
+				}
+				for strings.Contains(str, " <") {
+					str = strings.Replace(str, " <", "<", 1)
+				}
+
+				buffer.WriteString(str[:])
+				buffer.WriteString("\n")
+			}
+
+			rsult := buffer.String()
+
+			os.Stdout.WriteString(rsult)
 		}
 
 		PartitionPattern(topPattern, star, rdr,
@@ -10095,27 +10220,43 @@ func main() {
 					return
 				}
 
-				val, ok := order[id]
-				if !ok {
-					// not in identifier list, skip
-					return
-				}
-				// decrement count in map
-				val--
-				order[id] = val
-				if val > 0 {
-					// only write last record with a given identifier
-					return
-				}
+				fl.WriteString(id)
+				fl.WriteString("\n")
 
 				if hd != "" {
 					os.Stdout.WriteString(hd)
 					os.Stdout.WriteString("\n")
 				}
 
-				// write selected record
-				os.Stdout.WriteString(str[:])
-				os.Stdout.WriteString("\n")
+				if tbls.DeGloss {
+					if HasMarkup(str) {
+						str = RemoveUnicodeMarkup(str)
+					}
+					if HasAngleBracket(str) {
+						str = DoHTMLReplace(str)
+					}
+				}
+				if tbls.DoMixed {
+					if HasMarkup(str) {
+						str = SimulateUnicodeMarkup(str)
+					}
+					if HasAngleBracket(str) {
+						str = DoHTMLRepair(str)
+					}
+					str = DoTrimFlankingHTML(str)
+				}
+				if tbls.DeAccent {
+					if IsNotASCII(str) {
+						str = DoAccentTransform(str)
+					}
+				}
+				if tbls.DoASCII {
+					if IsNotASCII(str) {
+						str = UnicodeToASCII(str)
+					}
+				}
+
+				writeFlush(str[:])
 
 				if tl != "" {
 					os.Stdout.WriteString(tl)
@@ -10128,6 +10269,12 @@ func main() {
 			os.Stdout.WriteString("\n")
 		}
 
+		err = fl.Sync()
+		if err != nil {
+			fmt.Println(err.Error())
+		}
+		fl.Close()
+
 		if timr {
 			printDuration("records")
 		}
@@ -10333,7 +10480,7 @@ func main() {
 					os.Exit(1)
 				}
 
-				xmlq := CreateProducer(topPattern, star, rdr, tbls)
+				xmlq := CreateProducer(topPattern, star, rdr, "", tbls)
 				tblq := CreateConsumers(cmds, tbls, parent, xmlq)
 
 				if xmlq == nil || tblq == nil {
@@ -10445,7 +10592,7 @@ func main() {
 	// LAUNCH PRODUCER, CONSUMER, AND UNSHUFFLER SERVERS
 
 	// launch producer goroutine to partition XML by pattern
-	xmlq := CreateProducer(topPattern, star, rdr, tbls)
+	xmlq := CreateProducer(topPattern, star, rdr, "", tbls)
 
 	// launch consumer goroutines to parse and explore partitioned XML objects
 	tblq := CreateConsumers(cmds, tbls, parent, xmlq)