diff options
59 files changed, 323 insertions, 244 deletions
@@ -926,7 +926,7 @@ Information on how to obtain an API Key is described in this NCBI blogpost: The Public Domain Notice for all NCBI EDirect scripts is located at: - https://www.ncbi.nlm.nih.gov/IEB/ToolBox/CPP_DOC/lxr/source/scripts/projects/blast/LICENSE + https://www.ncbi.nlm.nih.gov/books/NBK179288/#chapter6.Public_Domain_Notice Questions or comments on EDirect may be sent to info@ncbi.nlm.nih.gov. diff --git a/accn-at-a-time b/accn-at-a-time index adc6994..5f92f94 100755 --- a/accn-at-a-time +++ b/accn-at-a-time @@ -1,7 +1,7 @@ #!/bin/bash -norc # Public domain notice for all NCBI EDirect scripts is located at: -# https://www.ncbi.nlm.nih.gov/IEB/ToolBox/CPP_DOC/lxr/source/scripts/projects/blast/LICENSE +# https://www.ncbi.nlm.nih.gov/books/NBK179288/#chapter6.Public_Domain_Notice sed 's/[^a-zA-Z0-9_.]/ /g; s/^ *//' | tr 'A-Z' 'a-z' | diff --git a/align-columns b/align-columns index 87b31fa..db30200 100755 --- a/align-columns +++ b/align-columns @@ -1,7 +1,7 @@ #!/bin/sh # Public domain notice for all NCBI EDirect scripts is located at: -# https://www.ncbi.nlm.nih.gov/IEB/ToolBox/CPP_DOC/lxr/source/scripts/projects/blast/LICENSE +# https://www.ncbi.nlm.nih.gov/books/NBK179288/#chapter6.Public_Domain_Notice # inspired by Steve Kinzler's align script - see http://kinzler.com/me/align/ diff --git a/amino-acid-composition b/amino-acid-composition index cc84e2c..817e6e5 100755 --- a/amino-acid-composition +++ b/amino-acid-composition @@ -1,7 +1,7 @@ #!/bin/bash -norc # Public domain notice for all NCBI EDirect scripts is located at: -# https://www.ncbi.nlm.nih.gov/IEB/ToolBox/CPP_DOC/lxr/source/scripts/projects/blast/LICENSE +# https://www.ncbi.nlm.nih.gov/books/NBK179288/#chapter6.Public_Domain_Notice abbrev=( Ala Asx Cys Asp Glu Phe Gly His Ile \ Xle Lys Leu Met Asn Pyl Pro Gln Arg \ diff --git a/archive-pubmed b/archive-pubmed index 706f72a..4767b72 100755 --- a/archive-pubmed +++ b/archive-pubmed @@ -1,7 +1,7 @@ #!/bin/sh # Public domain notice for all NCBI EDirect scripts is located at: -# https://www.ncbi.nlm.nih.gov/IEB/ToolBox/CPP_DOC/lxr/source/scripts/projects/blast/LICENSE +# https://www.ncbi.nlm.nih.gov/books/NBK179288/#chapter6.Public_Domain_Notice while [ $# -gt 0 ] do diff --git a/between-two-genes b/between-two-genes index 673f159..f6518a4 100755 --- a/between-two-genes +++ b/between-two-genes @@ -1,6 +1,6 @@ #!/bin/bash -norc # Public domain notice for all NCBI EDirect scripts is located at: -# https://www.ncbi.nlm.nih.gov/IEB/ToolBox/CPP_DOC/lxr/source/scripts/projects/blast/LICENSE +# https://www.ncbi.nlm.nih.gov/books/NBK179288/#chapter6.Public_Domain_Notice awk -F '\t' -v 'OFS=\t' "/^$1\t/{a++}/^$2\t/{a++}a>0{print}a>1{exit}" diff --git a/cmd/build.sh b/cmd/build.sh index 6bbb0e0..a23f216 100755 --- a/cmd/build.sh +++ b/cmd/build.sh @@ -1,13 +1,90 @@ #!/bin/sh -# set crossCompileAll to true to cross-compile for all platforms +# Public domain notice for all NCBI EDirect scripts is located at: +# https://www.ncbi.nlm.nih.gov/books/NBK179288/#chapter6.Public_Domain_Notice + +# determine current platform +platform="" +osname=`uname -s` +cputype=`uname -m` +case "$osname-$cputype" in + Linux-x86_64 ) platform=Linux ;; + Darwin-x86_64 ) platform=Darwin ;; + Darwin-*arm* ) platform=Silicon ;; + CYGWIN_NT-* | MINGW*-* ) platform=CYGWIN_NT ;; + Linux-*arm* ) platform=ARM ;; + * ) platform=UNSUPPORTED ;; +esac + +canBuildSilicon=false + +gv=$( go version ) +case "$gv" in + # only build for Apple Silicon if using Go compiler 1.16 or later + *go1.16* | *go1.17* | *go1.18* ) + canBuildSilicon=true + ;; + * ) + ;; +esac + crossCompileAll=false +install=false +cleanup=true +target="" + +# process optional command-line arguments +while [ "$#" -ne 0 ] +do + case "$1" in + -install | install ) + # install native executables on development machine + install=true + # default executable path + target="$HOME/Misc/scripts/" + shift + ;; + -desktop | desktop ) + # place native executables on desktop + install=true + target="$HOME/Desktop/" + shift + ;; + -silicon | silicon ) + # coerce platform to create Silicon executables + platform=Silicon + install=true + target="$HOME/Desktop/" + # but do not remove existing native binaries on deskop + cleanup=false + shift + ;; + -distrib | -distribute | distrib | distribute ) + # cross-compile all versions for ftp distribution + crossCompileAll=true + install=true + # default distribution path + target="$HOME/goxtract/" + shift + ;; + * ) + if [ -n "$1" ] + then + # allow override of default target path + install=true + target="$1" + fi + # break out of loop + break + ;; + esac +done # create module files if [ ! -f "go.mod" ] then go mod init edirect - # add explicit location of local helper package + # add explicit location to find local helper package echo "replace eutils => ../eutils" >> go.mod fi if [ ! -f "go.sum" ] @@ -15,25 +92,12 @@ then go mod tidy fi -# erase existing executables +# erase any existing executables in current directory for plt in Darwin Silicon Linux CYGWIN_NT ARM do rm -f *.$plt done -# determine current platform -platform="" -osname=`uname -s` -cputype=`uname -m` -case "$osname-$cputype" in - Linux-x86_64 ) platform=Linux ;; - Darwin-x86_64 ) platform=Darwin ;; - Darwin-*arm* ) platform=Silicon ;; - CYGWIN_NT-* | MINGW*-* ) platform=CYGWIN_NT ;; - Linux-*arm* ) platform=ARM ;; - * ) platform=UNSUPPORTED ;; -esac - # platform-specific compiler environment variable values mods="darwin amd64 Darwin \ darwin arm64 Silicon \ @@ -50,9 +114,41 @@ do then continue fi - # echo "$pl" + if [ "$pl" = "Silicon" ] && [ "$canBuildSilicon" = false ] + then + continue + fi for exc in xtract rchive transmute do env GOOS="$os" GOARCH="$ar" go build -o "$exc.$pl" "$exc.go" done done + +if [ "$install" = true ] && [ -n "$target" ] +then + if [ "$cleanup" = true ] + then + # remove old executables from target + for plt in Darwin Silicon Linux CYGWIN_NT ARM + do + rm -f $target/*.$plt + done + fi + # copy new executables to target + for plt in Darwin Silicon Linux CYGWIN_NT ARM + do + for exc in xtract rchive transmute + do + if [ -f "$exc.$plt" ] + then + mv -f "$exc.$plt" "$target" + fi + done + done +fi + +# erase any remaining executables after compiling +for plt in Darwin Silicon Linux CYGWIN_NT ARM +do + rm -f *.$plt +done diff --git a/cmd/rchive.go b/cmd/rchive.go index af3db22..c17385a 100644 --- a/cmd/rchive.go +++ b/cmd/rchive.go @@ -2325,35 +2325,7 @@ func prepareQuery(str string) string { str = "stdin " + str } - // cleanup string - if eutils.IsNotASCII(str) { - str = eutils.DoAccentTransform(str) - if eutils.HasUnicodeMarkup(str) { - str = eutils.RepairUnicodeMarkup(str, eutils.SPACE) - } - } - - if eutils.HasBadSpace(str) { - str = eutils.CleanupBadSpaces(str) - } - if eutils.HasAngleBracket(str) { - str = eutils.RepairEncodedMarkup(str) - str = eutils.RepairScriptMarkup(str, eutils.SPACE) - str = eutils.RepairMathMLMarkup(str, eutils.SPACE) - str = eutils.RemoveEmbeddedMarkup(str) - } - - if eutils.HasAmpOrNotASCII(str) { - str = html.UnescapeString(str) - } - - if eutils.IsNotASCII(str) { - if eutils.HasGreek(str) { - str = eutils.SpellGreek(str) - str = eutils.CompressRunsOfSpaces(str) - } - str = eutils.UnicodeToASCII(str) - } + str = eutils.CleanupQuery(str, false, true) str = strings.Replace(str, "~ ~", "~~", -1) str = strings.Replace(str, "~ ~", "~~", -1) @@ -2475,38 +2447,7 @@ func prepareExact(str string) string { return "" } - str = html.EscapeString(str) - - if eutils.IsNotASCII(str) { - str = eutils.DoAccentTransform(str) - if eutils.HasUnicodeMarkup(str) { - str = eutils.RepairUnicodeMarkup(str, eutils.SPACE) - } - } - - str = strings.ToLower(str) - - if eutils.HasBadSpace(str) { - str = eutils.CleanupBadSpaces(str) - } - if eutils.HasAngleBracket(str) { - str = eutils.RepairEncodedMarkup(str) - str = eutils.RepairScriptMarkup(str, eutils.SPACE) - str = eutils.RepairMathMLMarkup(str, eutils.SPACE) - // RemoveEmbeddedMarkup must be called before UnescapeString, which was suppressed in ExploreElements - str = eutils.RemoveEmbeddedMarkup(str) - } - - if eutils.HasAmpOrNotASCII(str) { - str = html.UnescapeString(str) - } - - if eutils.IsNotASCII(str) { - if eutils.HasGreek(str) { - str = eutils.SpellGreek(str) - str = eutils.CompressRunsOfSpaces(str) - } - } + str = eutils.CleanupQuery(str, true, true) str = strings.Replace(str, "(", " ", -1) str = strings.Replace(str, ")", " ", -1) @@ -5009,26 +4950,7 @@ func createMatchers(phrs string, exclude bool, inp <-chan eutils.XMLRecord) <-ch // split at punctuation, but leave < and > in to delimit content strings cleanupRecord := func(str string) string { - if eutils.IsNotASCII(str) { - str = eutils.DoAccentTransform(str) - if eutils.HasUnicodeMarkup(str) { - str = eutils.RepairUnicodeMarkup(str, eutils.SPACE) - } - } - - if eutils.HasBadSpace(str) { - str = eutils.CleanupBadSpaces(str) - } - - if eutils.HasAmpOrNotASCII(str) { - str = html.UnescapeString(str) - } - - if eutils.IsNotASCII(str) { - if eutils.HasGreek(str) { - str = eutils.SpellGreek(str) - } - } + str = eutils.CleanupQuery(str, false, false) if eutils.HasHyphenOrApostrophe(str) { str = eutils.FixSpecialCases(str) diff --git a/download-ncbi-data b/download-ncbi-data index 992df6d..60912b8 100755 --- a/download-ncbi-data +++ b/download-ncbi-data @@ -1,7 +1,7 @@ #!/bin/sh # Public domain notice for all NCBI EDirect scripts is located at: -# https://www.ncbi.nlm.nih.gov/IEB/ToolBox/CPP_DOC/lxr/source/scripts/projects/blast/LICENSE +# https://www.ncbi.nlm.nih.gov/books/NBK179288/#chapter6.Public_Domain_Notice cmd="$1" shift diff --git a/download-pubmed b/download-pubmed index 22b6f65..ad3dcfc 100755 --- a/download-pubmed +++ b/download-pubmed @@ -1,7 +1,7 @@ #!/bin/sh # Public domain notice for all NCBI EDirect scripts is located at: -# https://www.ncbi.nlm.nih.gov/IEB/ToolBox/CPP_DOC/lxr/source/scripts/projects/blast/LICENSE +# https://www.ncbi.nlm.nih.gov/books/NBK179288/#chapter6.Public_Domain_Notice download() { dir="$1" diff --git a/download-sequence b/download-sequence index 0e7039f..76e7e7c 100755 --- a/download-sequence +++ b/download-sequence @@ -1,7 +1,7 @@ #!/bin/sh # Public domain notice for all NCBI EDirect scripts is located at: -# https://www.ncbi.nlm.nih.gov/IEB/ToolBox/CPP_DOC/lxr/source/scripts/projects/blast/LICENSE +# https://www.ncbi.nlm.nih.gov/books/NBK179288/#chapter6.Public_Domain_Notice filt="" while [ "$#" -gt 0 ] @@ -34,7 +34,7 @@ # export USE_NEW_EDIRECT="true" -version="14.7" +version="14.8" # initialize common flags @@ -530,27 +530,29 @@ ColorSetup() { then red_fg="" blue_fg="" - orig_colors="" + reset_colors="" elif command -v tput >/dev/null then red_fg="$(tput setaf 1)" blue_fg="$(tput setaf 4)" - orig_colors="$(tput op)" + reset_colors="$(tput op)" else # assume ANSI escape="$(printf '\033')" red_fg="$escape[31m" blue_fg="$escape[34m" - orig_colors="$escape[39;49m" + reset_colors="$escape[0m" fi } +ColorSetup + ErrorHead() { wrn="$1" whn="$2" - printf "${red_fg}${wrn}: FAILURE ( $whn )${orig_colors}\n" >&2 + printf "${red_fg}${wrn}: FAILURE ( $whn )${reset_colors}\n" >&2 # display original command in blue letters printf "${blue_fg}" >&2 } @@ -560,14 +562,14 @@ ErrorTail() { msg="$1" whc="$2" - printf "${orig_colors}" >&2 + printf "${reset_colors}" >&2 # display reformatted result in red letters - printf "${red_fg}${msg}${orig_colors}\n" >&2 + printf "${red_fg}${msg}${reset_colors}\n" >&2 if [ "$goOn" = true ] then - printf "${blue_fg}${whc} ATTEMPT${orig_colors}\n" >&2 + printf "${blue_fg}${whc} ATTEMPT${reset_colors}\n" >&2 else - printf "${blue_fg}QUERY FAILURE${orig_colors}\n" >&2 + printf "${blue_fg}QUERY FAILURE${reset_colors}\n" >&2 fi } @@ -580,8 +582,6 @@ RequestWithRetry() { # execute query res=$( "$@" ) - ColorSetup - warn="WARNING" whch="SECOND" while [ "$goOn" = true ] @@ -7294,6 +7294,11 @@ sub etest { # main block dispatches control to appropriate subroutine +$RED="\e[31m"; +$BLK="\e[39;49m"; + +print STDERR "\n${RED}WARNING: Use of -oldmode or USE_NEW_EDIRECT environment variable is DEPRECATED${BLK}\n\n"; + if ( scalar @ARGV > 0 and $ARGV[0] eq "-version" ) { print "$version\n"; } elsif ( $fnc eq "-search" ) { @@ -1112,9 +1112,9 @@ then then transmute -x2j else - transmute -normalize "$dbase" | + transmute -mixed -normalize "$dbase" | sed -e 's/<!DOCTYPE eSummaryResult PUBLIC/<!DOCTYPE DocumentSummarySet PUBLIC/g; s/<eSummaryResult>//g; s/<\/eSummaryResult>//g' | - transmute -compress -format indent -doctype "" + transmute -mixed -compress -format indent -doctype "" fi exit 0 @@ -185,10 +185,14 @@ Error Positive Controls elink -db pubmed -id 123456789 -related + elink -db assembly -id GCF_000178675.1 -target nuccore + efetch -db pubmed -id 123456789 -format docsum efetch -db nuccore -id U1234567890 -format acc + epost -db nuccore -id 304652336 + EOF } @@ -206,7 +206,7 @@ do db="$1" shift else - echo "ERROR: Missing -db argument" >&2 + echo "${red_fg}ERROR: Missing -db argument${reset_colors}" >&2 exit 1 fi ;; @@ -217,7 +217,7 @@ do ids="$1" shift else - echo "ERROR: Missing -id argument" >&2 + echo "${red_fg}ERROR: Missing -id argument${reset_colors}" >&2 exit 1 fi while [ $# -gt 0 ] @@ -244,7 +244,7 @@ do idtype=acc fi else - echo "ERROR: Missing -format argument" >&2 + echo "${red_fg}ERROR: Missing -format argument${reset_colors}" >&2 exit 1 fi ;; @@ -255,7 +255,7 @@ do target="$1" shift else - echo "ERROR: Missing -target argument" >&2 + echo "${red_fg}ERROR: Missing -target argument${reset_colors}" >&2 exit 1 fi ;; @@ -266,7 +266,7 @@ do name="$1" shift else - echo "ERROR: Missing -name argument" >&2 + echo "${red_fg}ERROR: Missing -name argument${reset_colors}" >&2 exit 1 fi ;; @@ -277,7 +277,7 @@ do cmmd="$1" shift else - echo "ERROR: Missing -cmd argument" >&2 + echo "${red_fg}ERROR: Missing -cmd argument${reset_colors}" >&2 exit 1 fi ;; @@ -288,7 +288,7 @@ do mode="$1" shift else - echo "ERROR: Missing -mode argument" >&2 + echo "${red_fg}ERROR: Missing -mode argument${reset_colors}" >&2 exit 1 fi ;; @@ -322,7 +322,7 @@ do then shift "$argsConsumed" else - echo "ERROR: Unrecognized option $1" >&2 + echo "${red_fg}ERROR: Unrecognized option $1${reset_colors}" >&2 exit 1 fi ;; @@ -363,7 +363,7 @@ fi if [ -z "$dbase" ] then - echo "ERROR: Missing -db argument" >&2 + echo "${red_fg}ERROR: Missing -db argument${reset_colors}" >&2 exit 1 fi @@ -400,7 +400,7 @@ case "$cmmd" in * ) if [ -z "$target" ] && [ "$related" = false ] && [ "$cited" = false ] && [ "$cites" = false ] then - echo "ERROR: Must supply -target or -related on command line" >&2 + echo "${red_fg}ERROR: Must supply -target or -related on command line${reset_colors}" >&2 exit 1 fi if [ -z "$target" ] @@ -428,7 +428,7 @@ fi if [ "$dbase" = "nlmcatalog" ] then - echo "ERROR: Entrez Direct does not support links for the nlmcatalog database" >&2 + echo "${red_fg}ERROR: Entrez Direct does not support links for the nlmcatalog database${reset_colors}" >&2 exit 1 fi @@ -438,22 +438,23 @@ if [ "$needHistory" = true ] then if [ -t 0 ] then - echo "ERROR: ENTREZ_DIRECT message not piped from stdin" >&2 + echo "${red_fg}ERROR: ENTREZ_DIRECT message not piped from stdin${reset_colors}" >&2 exit 1 fi if [ -z "$web_env" ] then - echo "ERROR: WebEnv value not found in elink input" >&2 + echo "${red_fg}ERROR: WebEnv value not found in elink input${reset_colors}" >&2 exit 1 fi if [ -z "$qry_key" ] then - echo "ERROR: QueryKey value not found in elink input" >&2 + echo "${red_fg}ERROR: QueryKey value not found in elink input${reset_colors}" >&2 exit 1 fi if [ "$num" -lt 1 ] then - # silently exit if no results to process + # print message with count of 0 if no results to process + WriteEDirect "$target" "$web_env" "$qry_key" "0" "$stp" "$err" exit 0 fi fi @@ -462,7 +463,7 @@ if [ "$cited" = true ] || [ "$cites" = true ] then if [ "$dbase" != "pubmed" ] then - echo "ERROR: -cited or -cites can only be used with -db pubmed" >&2 + echo "${red_fg}ERROR: -cited or -cites can only be used with -db pubmed${reset_colors}" >&2 exit 1 fi fi @@ -603,7 +604,7 @@ LinkInGroups() { if [ -n "$err" ] then - echo "ERROR: elink failed - $err" >&2 + echo "${red_fg}ERROR: elink failed - $err${reset_colors}" >&2 exit 1 fi if [ -z "$web_env" ] @@ -660,8 +661,9 @@ then if [ -n "$num" ] && [ "$num" -lt 1 ] then - res=$( RunWithCommonArgs nquire -url "$base" elink.fcgi -dbfrom "$target" \ - -query_key "$qry_key" -WebEnv "$wbnv" -cmd "acheck" ) + uids=$( GenerateUidList "$dbase" | head -n 500 | join-into-groups-of 500 ) + res=$( RunWithCommonArgs nquire -url "$base" elink.fcgi \ + -dbfrom "$dbase" -id "$uids" -cmd "acheck" ) if [ -n "$res" ] then @@ -672,10 +674,10 @@ then tst=$( echo "$res" | xtract -pattern LinkInfo -if LinkName -equals "$name" -element LinkName ) if [ -n "$tst" ] then - echo "ERROR: acheck test indicates non-zero count expected" >&2 + echo "${red_fg}ERROR: acheck test indicates non-zero count expected${reset_colors}" >&2 fi else - echo "ERROR: acheck test failed - $err" >&2 + echo "${red_fg}ERROR: acheck test failed - $err${reset_colors}" >&2 fi fi fi @@ -687,5 +689,5 @@ fi # warn on error -echo "ERROR: ELink failure" >&2 +echo "${red_fg}ERROR: ELink failure${reset_colors}" >&2 exit 1 @@ -1,7 +1,7 @@ #!/bin/sh # Public domain notice for all NCBI EDirect scripts is located at: -# https://www.ncbi.nlm.nih.gov/IEB/ToolBox/CPP_DOC/lxr/source/scripts/projects/blast/LICENSE +# https://www.ncbi.nlm.nih.gov/books/NBK179288/#chapter6.Public_Domain_Notice do_help() { cat <<EOF @@ -157,10 +157,10 @@ Examples esearch -db nuccore -query "MatK [GENE] AND NC_0:NC_999999999 [PACC]" esearch -db protein -query "amyloid* [PROT]" | - elink -target pubmed | + elink -target pubmed -label prot_cit | esearch -db gene -query "apo* [GENE]" | - elink -target pubmed | - esearch -query "(#3) AND (#6)" | + elink -target pubmed -label gene_cit | + esearch -query "(#prot_cit) AND (#gene_cit)" | efetch -format docsum | xtract -pattern DocumentSummary -element Id Title diff --git a/eutils/format.go b/eutils/format.go index 010cd1a..1aa24fc 100644 --- a/eutils/format.go +++ b/eutils/format.go @@ -368,7 +368,7 @@ func xmlFormatter(rcrd, prnt string, inp <-chan XMLToken, offset int, doXML bool return txt } - cleanToken := func(tkn XMLToken, nxtTag int, nxtName, nxtAttr string) { + cleanToken := func(tkn XMLToken, nxtTag int, nxtName, nxtAttr string, lastContent bool) { if skip > 0 { skip-- @@ -406,7 +406,13 @@ func xmlFormatter(rcrd, prnt string, inp <-chan XMLToken, offset int, doXML bool return } buffer.WriteString(pfx) - doIndent(indent) + if doMixed { + if !lastContent { + doIndent(indent) + } + } else { + doIndent(indent) + } indent++ buffer.WriteString("<") buffer.WriteString(name) @@ -445,7 +451,7 @@ func xmlFormatter(rcrd, prnt string, inp <-chan XMLToken, offset int, doXML bool buffer.WriteString("</") buffer.WriteString(name) buffer.WriteString(">") - if doMixed && nxtTag == CONTENTTAG { + if doMixed && nxtTag == CONTENTTAG && nxtName != "." { buffer.WriteString(" ") } pfx = ret @@ -456,9 +462,9 @@ func xmlFormatter(rcrd, prnt string, inp <-chan XMLToken, offset int, doXML bool case CONTENTTAG: if nxtTag == STARTTAG || nxtTag == SELFTAG { if doStrict { - fmt.Fprintf(os.Stderr, "ERROR: UNRECOGNIZED MIXED CONTENT <%s> in <%s>\n", nxtName, name) + fmt.Fprintf(os.Stderr, "%sERROR: UNRECOGNIZED MIXED CONTENT <%s> in <%s>%s\n", redColor, nxtName, name, clrColor) } else if !doMixed { - fmt.Fprintf(os.Stderr, "ERROR: UNEXPECTED MIXED CONTENT <%s> in <%s>\n", nxtName, name) + fmt.Fprintf(os.Stderr, "%sERROR: UNEXPECTED MIXED CONTENT <%s> in <%s>%s\n", redColor, nxtName, name, clrColor) } } if len(name) > 0 && IsNotJustWhitespace(name) { @@ -472,7 +478,7 @@ func xmlFormatter(rcrd, prnt string, inp <-chan XMLToken, offset int, doXML bool } buffer.WriteString(name) } - if doMixed && nxtTag == STARTTAG { + if (doStrict || doMixed) && nxtTag == STARTTAG { buffer.WriteString(" ") } pfx = "" @@ -525,6 +531,7 @@ func xmlFormatter(rcrd, prnt string, inp <-chan XMLToken, offset int, doXML bool var prev XMLToken primed := false skipDoctype := false + lastContent := false // track adjacent pairs to give look-ahead at next token doPair := func(tkn XMLToken) { @@ -541,9 +548,10 @@ func xmlFormatter(rcrd, prnt string, inp <-chan XMLToken, offset int, doXML bool } if primed { - cleanToken(prev, tkn.Tag, tkn.Name, tkn.Attr) + cleanToken(prev, tkn.Tag, tkn.Name, tkn.Attr, lastContent) } + lastContent = (prev.Tag == CONTENTTAG) prev = XMLToken{tkn.Tag, tkn.Cont, tkn.Name, tkn.Attr, tkn.Index, tkn.Line} primed = true } diff --git a/eutils/parse.go b/eutils/parse.go index 407d0a2..9342053 100644 --- a/eutils/parse.go +++ b/eutils/parse.go @@ -722,9 +722,9 @@ func parseXML(record, parent string, inp <-chan XMLBlock, tokens func(XMLToken), if tag == BADTAG { if countLines { - fmt.Fprintf(os.Stderr, "\nERROR: Unparsable XML element, line %d\n", lineNum) + fmt.Fprintf(os.Stderr, "\n%sERROR: Unparsable XML element, line %d%s\n", redColor, lineNum, clrColor) } else { - fmt.Fprintf(os.Stderr, "\nERROR: Unparsable XML element\n") + fmt.Fprintf(os.Stderr, "\n%sERROR: Unparsable XML element%s\n", redColor, clrColor) } break } @@ -735,7 +735,7 @@ func parseXML(record, parent string, inp <-chan XMLBlock, tokens func(XMLToken), switch tag { case STARTTAG: if status == CHAR { - fmt.Fprintf(os.Stderr, "ERROR: UNEXPECTED MIXED CONTENT <%s> in <%s>\n", name, prnt) + fmt.Fprintf(os.Stderr, "%sERROR: UNEXPECTED MIXED CONTENT <%s> in <%s>%s\n", redColor, name, prnt, clrColor) } // read sub tree obj, ok = parseSpecial(name, attr, node.Name) @@ -812,9 +812,9 @@ func parseXML(record, parent string, inp <-chan XMLBlock, tokens func(XMLToken), if tag == BADTAG { if countLines { - fmt.Fprintf(os.Stderr, "\nERROR: Unparsable XML element, line %d\n", lineNum) + fmt.Fprintf(os.Stderr, "\n%sERROR: Unparsable XML element, line %d%s\n", redColor, lineNum, clrColor) } else { - fmt.Fprintf(os.Stderr, "\nERROR: Unparsable XML element\n") + fmt.Fprintf(os.Stderr, "\n%sERROR: Unparsable XML element%s\n", redColor, clrColor) } break } @@ -826,9 +826,9 @@ func parseXML(record, parent string, inp <-chan XMLBlock, tokens func(XMLToken), case STARTTAG: if status == CHAR { if doStrict { - fmt.Fprintf(os.Stderr, "ERROR: UNRECOGNIZED MIXED CONTENT <%s> in <%s>\n", name, prnt) + fmt.Fprintf(os.Stderr, "%sERROR: UNRECOGNIZED MIXED CONTENT <%s> in <%s>%s\n", redColor, name, prnt, clrColor) } else if !doMixed { - fmt.Fprintf(os.Stderr, "ERROR: UNEXPECTED MIXED CONTENT <%s> in <%s>\n", name, prnt) + fmt.Fprintf(os.Stderr, "%sERROR: UNEXPECTED MIXED CONTENT <%s> in <%s>%s\n", redColor, name, prnt, clrColor) } } // read sub tree @@ -943,9 +943,9 @@ func parseXML(record, parent string, inp <-chan XMLBlock, tokens func(XMLToken), if tag == BADTAG { if countLines { - fmt.Fprintf(os.Stderr, "\nERROR: Unparsable XML element, line %d\n", lineNum) + fmt.Fprintf(os.Stderr, "\n%sERROR: Unparsable XML element, line %d%s\n", redColor, lineNum, clrColor) } else { - fmt.Fprintf(os.Stderr, "\nERROR: Unparsable XML element\n") + fmt.Fprintf(os.Stderr, "\n%sERROR: Unparsable XML element%s\n", redColor, clrColor) } break } @@ -1001,9 +1001,9 @@ func parseXML(record, parent string, inp <-chan XMLBlock, tokens func(XMLToken), if tag == BADTAG { if countLines { - fmt.Fprintf(os.Stderr, "\nERROR: Unparsable XML element, line %d\n", lineNum) + fmt.Fprintf(os.Stderr, "\n%sERROR: Unparsable XML element, line %d%s\n", redColor, lineNum, clrColor) } else { - fmt.Fprintf(os.Stderr, "\nERROR: Unparsable XML element\n") + fmt.Fprintf(os.Stderr, "\n%sERROR: Unparsable XML element%s\n", redColor, clrColor) } break } @@ -1033,9 +1033,9 @@ func parseXML(record, parent string, inp <-chan XMLBlock, tokens func(XMLToken), if tag == BADTAG { if countLines { - fmt.Fprintf(os.Stderr, "\nERROR: Unparsable XML element, line %d\n", lineNum) + fmt.Fprintf(os.Stderr, "\n%sERROR: Unparsable XML element, line %d%s\n", redColor, lineNum, clrColor) } else { - fmt.Fprintf(os.Stderr, "\nERROR: Unparsable XML element\n") + fmt.Fprintf(os.Stderr, "\n%sERROR: Unparsable XML element%s\n", redColor, clrColor) } break } @@ -1062,9 +1062,9 @@ func parseXML(record, parent string, inp <-chan XMLBlock, tokens func(XMLToken), if tag == BADTAG { if countLines { - fmt.Fprintf(os.Stderr, "\nERROR: Unparsable XML element, line %d\n", lineNum) + fmt.Fprintf(os.Stderr, "\n%sERROR: Unparsable XML element, line %d%s\n", redColor, lineNum, clrColor) } else { - fmt.Fprintf(os.Stderr, "\nERROR: Unparsable XML element\n") + fmt.Fprintf(os.Stderr, "\n%sERROR: Unparsable XML element%s\n", redColor, clrColor) } break } @@ -1159,7 +1159,7 @@ func CreateTokenizer(inp <-chan XMLBlock) <-chan XMLToken { out := make(chan XMLToken, chanDepth) if out == nil { - fmt.Fprintf(os.Stderr, "\nERROR: Unable to create XML tokenizer channel\n") + fmt.Fprintf(os.Stderr, "\n%sERROR: Unable to create XML tokenizer channel%s\n", redColor, clrColor) os.Exit(1) } diff --git a/eutils/utils.go b/eutils/utils.go index a2f8cb6..ef8f952 100644 --- a/eutils/utils.go +++ b/eutils/utils.go @@ -42,7 +42,14 @@ import ( ) // EDirectVersion is the current EDirect release number -const EDirectVersion = "14.7" +const EDirectVersion = "14.8" + +// ASCII terminal color constants +const ( + redColor = "\033[31m" + bluColor = "\033[34m" + clrColor = "\033[39;49m" +) // parser character type lookup tables var ( diff --git a/eutils/xml.go b/eutils/xml.go index 6a4be7c..9cd9f77 100644 --- a/eutils/xml.go +++ b/eutils/xml.go @@ -120,7 +120,7 @@ func CreateXMLStreamer(in io.Reader) <-chan XMLBlock { if err != nil { if err != io.EOF { // real error. - fmt.Fprintf(os.Stderr, "\nERROR: %s\n", err.Error()) + fmt.Fprintf(os.Stderr, "\n%sERROR: %s%s\n", redColor, err.Error(), clrColor) // ignore bytes - non-conforming implementations of io.Reader may // return mangled data on non-EOF errors isClosed = true @@ -136,7 +136,7 @@ func CreateXMLStreamer(in io.Reader) <-chan XMLBlock { } if n < 0 { // reality check - non-conforming implementations of io.Reader may return -1 - fmt.Fprintf(os.Stderr, "\nERROR: io.Reader returned negative count %d\n", n) + fmt.Fprintf(os.Stderr, "\n%sERROR: io.Reader returned negative count %d%s\n", redColor, n, clrColor) // treat as n == 0 in order to update file offset and avoid losing previous remainder n = 0 } diff --git a/exclude-uid-lists b/exclude-uid-lists index 0c2792b..4431143 100755 --- a/exclude-uid-lists +++ b/exclude-uid-lists @@ -1,7 +1,7 @@ #!/bin/bash -norc # Public domain notice for all NCBI EDirect scripts is located at: -# https://www.ncbi.nlm.nih.gov/IEB/ToolBox/CPP_DOC/lxr/source/scripts/projects/blast/LICENSE +# https://www.ncbi.nlm.nih.gov/books/NBK179288/#chapter6.Public_Domain_Notice # Usage: exclude-uid-lists FILE1 FILE2 diff --git a/expand-current b/expand-current index 2a7e1fa..7c626ca 100755 --- a/expand-current +++ b/expand-current @@ -1,7 +1,7 @@ #!/bin/sh # Public domain notice for all NCBI EDirect scripts is located at: -# https://www.ncbi.nlm.nih.gov/IEB/ToolBox/CPP_DOC/lxr/source/scripts/projects/blast/LICENSE +# https://www.ncbi.nlm.nih.gov/books/NBK179288/#chapter6.Public_Domain_Notice while [ $# -gt 0 ] do diff --git a/fetch-pubmed b/fetch-pubmed index c4346d2..21191e8 100755 --- a/fetch-pubmed +++ b/fetch-pubmed @@ -1,7 +1,7 @@ #!/bin/sh # Public domain notice for all NCBI EDirect scripts is located at: -# https://www.ncbi.nlm.nih.gov/IEB/ToolBox/CPP_DOC/lxr/source/scripts/projects/blast/LICENSE +# https://www.ncbi.nlm.nih.gov/books/NBK179288/#chapter6.Public_Domain_Notice doall=false dofresh=false diff --git a/filter-stop-words b/filter-stop-words index 82ed462..8e841f6 100755 --- a/filter-stop-words +++ b/filter-stop-words @@ -1,7 +1,7 @@ #!/bin/bash -norc # Public domain notice for all NCBI EDirect scripts is located at: -# https://www.ncbi.nlm.nih.gov/IEB/ToolBox/CPP_DOC/lxr/source/scripts/projects/blast/LICENSE +# https://www.ncbi.nlm.nih.gov/books/NBK179288/#chapter6.Public_Domain_Notice stop_words="#a#about#above#abs#accordingly#across#after#afterwards#again#\ against#all#almost#alone#along#already#also#although#always#am#among#\ diff --git a/filter-table b/filter-table index 8be2240..dc79ef8 100755 --- a/filter-table +++ b/filter-table @@ -1,7 +1,7 @@ #!/bin/sh # Public domain notice for all NCBI EDirect scripts is located at: -# https://www.ncbi.nlm.nih.gov/IEB/ToolBox/CPP_DOC/lxr/source/scripts/projects/blast/LICENSE +# https://www.ncbi.nlm.nih.gov/books/NBK179288/#chapter6.Public_Domain_Notice # MUST be called with single quotes, e.g.: # filter-table '10 <= $2 && $2 <= 30' @@ -1,6 +1,6 @@ #!/bin/sh # Public domain notice for all NCBI EDirect scripts is located at: -# https://www.ncbi.nlm.nih.gov/IEB/ToolBox/CPP_DOC/lxr/source/scripts/projects/blast/LICENSE +# https://www.ncbi.nlm.nih.gov/books/NBK179288/#chapter6.Public_Domain_Notice transmute -g2x "$@" diff --git a/hlp-xtract.txt b/hlp-xtract.txt index df9944d..2de2d74 100644 --- a/hlp-xtract.txt +++ b/hlp-xtract.txt @@ -117,7 +117,7 @@ Citation Lookup esearch -db pubmed -query "Beadle GW [AUTH] AND Tatum EL [AUTH]" | elink -cited | efilter -days 365 | - efetch -format abstract + efetch -format abstract -start 3 -stop 5 Stopwords and Stemming @@ -133,7 +133,8 @@ DOI Extraction efetch -format xml | xtract -pattern PubmedArticle \ -block ArticleId -if @IdType -equals doi \ - -doi ArticleId + -doi ArticleId | + tail -n 25 Combining Independent Queries @@ -143,7 +144,7 @@ Combining Independent Queries elink -target pubmed -label gene_cit | esearch -query "(#prot_cit) AND (#gene_cit)" | efetch -format docsum | - xtract -pattern DocumentSummary -element Id Title | + xtract -mixed -pattern DocumentSummary -element Id Title | cat -v PMC @@ -185,7 +186,7 @@ Peptide Sequences Vitamin Biosynthesis - esearch -db pubmed -query "lycopene cyclase" | + esearch -db pubmed -query "lycopene cyclase" -log | elink -related | elink -target protein | efilter -organism rodents -source refseq | diff --git a/index-extras b/index-extras index 5359443..f21c54a 100755 --- a/index-extras +++ b/index-extras @@ -1,7 +1,7 @@ #!/bin/sh # Public domain notice for all NCBI EDirect scripts is located at: -# https://www.ncbi.nlm.nih.gov/IEB/ToolBox/CPP_DOC/lxr/source/scripts/projects/blast/LICENSE +# https://www.ncbi.nlm.nih.gov/books/NBK179288/#chapter6.Public_Domain_Notice while [ $# -gt 0 ] do diff --git a/index-pubmed b/index-pubmed index 003b33d..c950e57 100755 --- a/index-pubmed +++ b/index-pubmed @@ -1,7 +1,7 @@ #!/bin/sh # Public domain notice for all NCBI EDirect scripts is located at: -# https://www.ncbi.nlm.nih.gov/IEB/ToolBox/CPP_DOC/lxr/source/scripts/projects/blast/LICENSE +# https://www.ncbi.nlm.nih.gov/books/NBK179288/#chapter6.Public_Domain_Notice startat=0 diff --git a/intersect-uid-lists b/intersect-uid-lists index 61281b5..5c1c1d2 100755 --- a/intersect-uid-lists +++ b/intersect-uid-lists @@ -1,7 +1,7 @@ #!/bin/bash -norc # Public domain notice for all NCBI EDirect scripts is located at: -# https://www.ncbi.nlm.nih.gov/IEB/ToolBox/CPP_DOC/lxr/source/scripts/projects/blast/LICENSE +# https://www.ncbi.nlm.nih.gov/books/NBK179288/#chapter6.Public_Domain_Notice # Usage: intersect-uid-lists FILE1 FILE2 diff --git a/join-into-groups-of b/join-into-groups-of index 7832a13..3d5ff37 100755 --- a/join-into-groups-of +++ b/join-into-groups-of @@ -1,7 +1,7 @@ #!/bin/sh # Public domain notice for all NCBI EDirect scripts is located at: -# https://www.ncbi.nlm.nih.gov/IEB/ToolBox/CPP_DOC/lxr/source/scripts/projects/blast/LICENSE +# https://www.ncbi.nlm.nih.gov/books/NBK179288/#chapter6.Public_Domain_Notice xargs -n "$@" echo | sed 's/ /,/g' @@ -73,6 +73,8 @@ Query Commands -get Uses HTTP GET instead of POST -lst Lists contents of FTP site + -dir FTP listing with file sizes + -ftp Retrieves data from FTP site File Downloads @@ -362,7 +364,7 @@ fi if [ $# -gt 0 ] then case "$1" in - -url | -get | -lst | -ftp | -dwn | -asp ) + -url | -get | -lst | -dir | -ftp | -dwn | -asp ) mode="$1" shift ;; @@ -526,33 +528,34 @@ ColorSetup() { then red_fg="" blue_fg="" - orig_colors="" + reset_colors="" elif command -v tput >/dev/null then red_fg="$(tput setaf 1)" blue_fg="$(tput setaf 4)" - orig_colors="$(tput op)" + reset_colors="$(tput op)" else # assume ANSI escape="$(printf '\033')" red_fg="$escape[31m" blue_fg="$escape[34m" - orig_colors="$escape[39;49m" + reset_colors="$escape[0m" fi } +ColorSetup + # common function to execute curl or wget command SendRequest() { - ColorSetup when=$( date ) case "$binary" in */curl ) if [ "$log" = true ] then - echo "${blue_fg}$@${orig_colors}" >&2 + echo "${blue_fg}$@${reset_colors}" >&2 fi temp=$(mktemp /tmp/NQUIRE_HEADER.XXXXXXXXX) @@ -569,8 +572,8 @@ SendRequest() { if [ "$res" -ne 0 ] then # report failure - echo "${red_fg}ERROR: curl command failed ( $when ) with: $res${orig_colors}" >&2 - echo "${blue_fg}$@${orig_colors}" >&2 + echo "${red_fg}ERROR: curl command failed ( $when ) with: $res${reset_colors}" >&2 + echo "${blue_fg}$@${reset_colors}" >&2 # show return code in first line of header head -n 1 "$temp" >&2 fi @@ -580,11 +583,13 @@ SendRequest() { */wget ) if [ "$log" = true ] then - echo "${blue_fg}$@${orig_colors}" >&2 + echo "${blue_fg}$@${reset_colors}" >&2 fi temp=$(mktemp /tmp/NQUIRE_HEADER.XXXXXXXXX) + # wget needs --no-remove-listing for ftp listing? + full_output="" if [ -f "$pth"/cacert.pem ] then @@ -598,8 +603,8 @@ SendRequest() { if [ "$res" -ne 0 ] then # report failure - echo "${red_fg}ERROR: wget command failed ( $when ) with: $res${orig_colors}" >&2 - echo "${blue_fg}$@${orig_colors}" >&2 + echo "${red_fg}ERROR: wget command failed ( $when ) with: $res${reset_colors}" >&2 + echo "${blue_fg}$@${reset_colors}" >&2 # show return code in first line of header head -n 1 "$temp" >&2 fi @@ -708,11 +713,10 @@ DownloadOneFile() { esac if [ ! -f "$fl" ] then - ColorSetup failed=$((failed + 1)) # report failure to download requested file echo "" >&2 - echo "${red_fg}${fl} FAILED${orig_colors}" >&2 + echo "${red_fg}${fl} FAILED${reset_colors}" >&2 fi fi } @@ -755,9 +759,20 @@ case "$mode" in tr -s ' ' | tr ' ' '\t' | cut -f 9 | grep '.' ;; */wget ) - SendRequest "$url" | - sed -e 's/<[^>]*>//g' | tr ' ' '\t' | cut -f 1 | grep '.' + echo "ERROR: -lst not supported for wget" >&2 + exit 1 + ;; + esac + ;; + -dir ) + case "$binary" in + */curl ) + SendRequest "$url/" | + tr -s ' ' | tr ' ' '\t' | cut -f 5,9 | grep '.' ;; + */wget ) + echo "ERROR: -dir not supported for wget" >&2 + exit 1 esac ;; -ftp ) @@ -793,7 +808,7 @@ case "$mode" in sfx="S" fi echo "" >&2 - echo "${red_fg}FAILED TO DOWNLOAD $failed FILE${sfx}${orig_colors}" >&2 + echo "${red_fg}FAILED TO DOWNLOAD $failed FILE${sfx}${reset_colors}" >&2 exit 1 fi ;; @@ -1,7 +1,7 @@ #!/bin/sh # Public domain notice for all NCBI EDirect scripts is located at: -# https://www.ncbi.nlm.nih.gov/IEB/ToolBox/CPP_DOC/lxr/source/scripts/projects/blast/LICENSE +# https://www.ncbi.nlm.nih.gov/books/NBK179288/#chapter6.Public_Domain_Notice if [ "$#" -eq 0 ] then @@ -1,7 +1,7 @@ #!/bin/sh # Public domain notice for all NCBI EDirect scripts is located at: -# https://www.ncbi.nlm.nih.gov/IEB/ToolBox/CPP_DOC/lxr/source/scripts/projects/blast/LICENSE +# https://www.ncbi.nlm.nih.gov/books/NBK179288/#chapter6.Public_Domain_Notice if [ "$#" -eq 0 ] then @@ -1,7 +1,7 @@ #!/bin/sh # Public domain notice for all NCBI EDirect scripts is located at: -# https://www.ncbi.nlm.nih.gov/IEB/ToolBox/CPP_DOC/lxr/source/scripts/projects/blast/LICENSE +# https://www.ncbi.nlm.nih.gov/books/NBK179288/#chapter6.Public_Domain_Notice if [ "$#" -eq 0 ] then @@ -1,7 +1,7 @@ #!/bin/sh # Public domain notice for all NCBI EDirect scripts is located at: -# https://www.ncbi.nlm.nih.gov/IEB/ToolBox/CPP_DOC/lxr/source/scripts/projects/blast/LICENSE +# https://www.ncbi.nlm.nih.gov/books/NBK179288/#chapter6.Public_Domain_Notice if [ "$#" -eq 0 ] then @@ -1,7 +1,7 @@ #!/bin/sh # Public domain notice for all NCBI EDirect scripts is located at: -# https://www.ncbi.nlm.nih.gov/IEB/ToolBox/CPP_DOC/lxr/source/scripts/projects/blast/LICENSE +# https://www.ncbi.nlm.nih.gov/books/NBK179288/#chapter6.Public_Domain_Notice if [ "$#" -eq 0 ] then @@ -1,7 +1,7 @@ #!/bin/sh # Public domain notice for all NCBI EDirect scripts is located at: -# https://www.ncbi.nlm.nih.gov/IEB/ToolBox/CPP_DOC/lxr/source/scripts/projects/blast/LICENSE +# https://www.ncbi.nlm.nih.gov/books/NBK179288/#chapter6.Public_Domain_Notice if [ "$#" -eq 0 ] then @@ -1,7 +1,7 @@ #!/bin/sh # Public domain notice for all NCBI EDirect scripts is located at: -# https://www.ncbi.nlm.nih.gov/IEB/ToolBox/CPP_DOC/lxr/source/scripts/projects/blast/LICENSE +# https://www.ncbi.nlm.nih.gov/books/NBK179288/#chapter6.Public_Domain_Notice if [ "$#" -eq 0 ] then @@ -1,7 +1,7 @@ #!/bin/sh # Public domain notice for all NCBI EDirect scripts is located at: -# https://www.ncbi.nlm.nih.gov/IEB/ToolBox/CPP_DOC/lxr/source/scripts/projects/blast/LICENSE +# https://www.ncbi.nlm.nih.gov/books/NBK179288/#chapter6.Public_Domain_Notice if [ "$#" -eq 0 ] then diff --git a/print-columns b/print-columns index 42aeb6f..4f78642 100755 --- a/print-columns +++ b/print-columns @@ -1,7 +1,7 @@ #!/bin/sh # Public domain notice for all NCBI EDirect scripts is located at: -# https://www.ncbi.nlm.nih.gov/IEB/ToolBox/CPP_DOC/lxr/source/scripts/projects/blast/LICENSE +# https://www.ncbi.nlm.nih.gov/books/NBK179288/#chapter6.Public_Domain_Notice # MUST be called with single quotes, e.g.: # print-columns '$1, $2+1, $3, $4-1, $5' diff --git a/reorder-columns b/reorder-columns index f8fc2dd..2e7f06b 100755 --- a/reorder-columns +++ b/reorder-columns @@ -1,7 +1,7 @@ #!/bin/sh # Public domain notice for all NCBI EDirect scripts is located at: -# https://www.ncbi.nlm.nih.gov/IEB/ToolBox/CPP_DOC/lxr/source/scripts/projects/blast/LICENSE +# https://www.ncbi.nlm.nih.gov/books/NBK179288/#chapter6.Public_Domain_Notice # Usage: reorder-columns COLUMN NUMBERS... diff --git a/run-ncbi-converter b/run-ncbi-converter index 222e8c7..7c39733 100755 --- a/run-ncbi-converter +++ b/run-ncbi-converter @@ -1,7 +1,7 @@ #!/usr/bin/env perl # Public domain notice for all NCBI EDirect scripts is located at: -# https://www.ncbi.nlm.nih.gov/IEB/ToolBox/CPP_DOC/lxr/source/scripts/projects/blast/LICENSE +# https://www.ncbi.nlm.nih.gov/books/NBK179288/#chapter6.Public_Domain_Notice use warnings; use strict; diff --git a/setup-deps.pl b/setup-deps.pl index 081a406..1c645d4 100755 --- a/setup-deps.pl +++ b/setup-deps.pl @@ -1,7 +1,7 @@ #!/usr/bin/env perl # Public domain notice for all NCBI EDirect scripts is located at: -# https://www.ncbi.nlm.nih.gov/IEB/ToolBox/CPP_DOC/lxr/source/scripts/projects/blast/LICENSE +# https://www.ncbi.nlm.nih.gov/books/NBK179288/#chapter6.Public_Domain_Notice use warnings; use strict; @@ -1,7 +1,7 @@ #!/bin/bash -norc # Public domain notice for all NCBI EDirect scripts is located at: -# https://www.ncbi.nlm.nih.gov/IEB/ToolBox/CPP_DOC/lxr/source/scripts/projects/blast/LICENSE +# https://www.ncbi.nlm.nih.gov/books/NBK179288/#chapter6.Public_Domain_Notice PERL=perl case "`uname -s`" in diff --git a/skip-if-file-exists b/skip-if-file-exists index 5759fb8..989ccd3 100755 --- a/skip-if-file-exists +++ b/skip-if-file-exists @@ -1,7 +1,7 @@ #!/bin/bash -norc # Public domain notice for all NCBI EDirect scripts is located at: -# https://www.ncbi.nlm.nih.gov/IEB/ToolBox/CPP_DOC/lxr/source/scripts/projects/blast/LICENSE +# https://www.ncbi.nlm.nih.gov/books/NBK179288/#chapter6.Public_Domain_Notice while IFS=$'\t' read fl do @@ -1,6 +1,7 @@ #!/bin/sh # Public domain notice for all NCBI EDirect scripts is located at: -# https://www.ncbi.nlm.nih.gov/IEB/ToolBox/CPP_DOC/lxr/source/scripts/projects/blast/LICENSE +# https://www.ncbi.nlm.nih.gov/books/NBK179288/#chapter6.Public_Domain_Notice +grep '.' | sort -t "$(printf '\t')" "$@" diff --git a/sort-uniq-count b/sort-uniq-count index 69fc193..70cc1f5 100755 --- a/sort-uniq-count +++ b/sort-uniq-count @@ -1,7 +1,7 @@ #!/bin/sh # Public domain notice for all NCBI EDirect scripts is located at: -# https://www.ncbi.nlm.nih.gov/IEB/ToolBox/CPP_DOC/lxr/source/scripts/projects/blast/LICENSE +# https://www.ncbi.nlm.nih.gov/books/NBK179288/#chapter6.Public_Domain_Notice flags="f" if [ -n "$*" ] diff --git a/sort-uniq-count-rank b/sort-uniq-count-rank index b363daf..285197a 100755 --- a/sort-uniq-count-rank +++ b/sort-uniq-count-rank @@ -1,7 +1,7 @@ #!/bin/sh # Public domain notice for all NCBI EDirect scripts is located at: -# https://www.ncbi.nlm.nih.gov/IEB/ToolBox/CPP_DOC/lxr/source/scripts/projects/blast/LICENSE +# https://www.ncbi.nlm.nih.gov/books/NBK179288/#chapter6.Public_Domain_Notice flags="f" if [ -n "$*" ] diff --git a/stream-pubmed b/stream-pubmed index b3d9e60..19f5244 100755 --- a/stream-pubmed +++ b/stream-pubmed @@ -1,7 +1,7 @@ #!/bin/sh # Public domain notice for all NCBI EDirect scripts is located at: -# https://www.ncbi.nlm.nih.gov/IEB/ToolBox/CPP_DOC/lxr/source/scripts/projects/blast/LICENSE +# https://www.ncbi.nlm.nih.gov/books/NBK179288/#chapter6.Public_Domain_Notice flag="none" diff --git a/test-edirect b/test-edirect index 0af9687..c02581e 100755 --- a/test-edirect +++ b/test-edirect @@ -196,7 +196,7 @@ PrintTimeAndTitle "Citation Lookup" esearch -db pubmed -query "Beadle GW [AUTH] AND Tatum EL [AUTH]" | elink -cited | efilter -days 365 | - efetch -format abstract + efetch -format abstract -start 3 -stop 5 PrintTimeAndTitle "Stopwords and Stemming" @@ -212,17 +212,18 @@ PrintTimeAndTitle "DOI Extraction" efetch -format xml | xtract -pattern PubmedArticle \ -block ArticleId -if @IdType -equals doi \ - -doi ArticleId + -doi ArticleId | + tail -n 25 PrintTimeAndTitle "Combining Independent Queries" esearch -db protein -query "amyloid* [PROT]" | - elink -target pubmed -label prot_cit | + elink -target pubmed -label prot_cit | tee /dev/tty | esearch -db gene -query "apo* [GENE]" | - elink -target pubmed -label gene_cit | + elink -target pubmed -label gene_cit | tee /dev/tty | esearch -query "(#prot_cit) AND (#gene_cit)" | efetch -format docsum | - xtract -pattern DocumentSummary -element Id Title | + xtract -mixed -pattern DocumentSummary -element Id Title | cat -v PrintTimeAndTitle "Formatting Tag Removal" @@ -241,7 +242,7 @@ PrintTimeAndTitle "Peptide Sequences" PrintTimeAndTitle "Vitamin Biosynthesis" - esearch -db pubmed -query "lycopene cyclase" | + esearch -db pubmed -query "lycopene cyclase" -log | elink -related | elink -target protein | efilter -organism rodents -source refseq | @@ -460,6 +461,28 @@ PrintTimeAndTitle "Underscore Protection" efilter -query "refseq has annotation [PROP] NOT anomalous [FILT]" | xtract -pattern ENTREZ_DIRECT -element Count +PrintTimeAndTitle "Mitochondrial Mistranslation" + + efetch -db nuccore -id NC_012920 -format gb | + transmute -g2x | + xtract -insd CDS gene product protein_id translation sub_sequence | + while IFS=$'\t' read acc gene prod prid prot seq + do + mito=$( echo "$seq" | transmute -cds2prot -code 2 -stop ) + norm=$( echo "$seq" | transmute -cds2prot -code 1 -stop ) + if [ "$mito" != "$norm" ] + then + echo ">$acc $gene $prid $prod" + transmute -diff <( echo "$mito" ) <( echo "$norm" ) + echo "" + fi + done + +PrintTimeAndTitle "Variation Extraction" + + echo "NP_000504.1:p.Glu41Lys,NP_000504.1:p.P43Leu,NP_000504.1:p.Trp142Ter" | + transmute -hgvs | transmute -format + PrintTimeAndTitle "Amino Acid Substitutions" esearch -db gene -query "OPN1MW [PREF] AND human [ORGN]" | diff --git a/test-eutils b/test-eutils index f3bd4fa..537f153 100755 --- a/test-eutils +++ b/test-eutils @@ -552,21 +552,12 @@ DoCmd() { echo "" } -export USE_NEW_EDIRECT=false -seconds_start=$(date "+%s") - -ver=$( einfo -help | head -n 1 | sed 's/einfo //g' ) -echo "Old EDirect $ver" -echo "" - -DoCmd - export USE_NEW_EDIRECT=true seconds_start=$(date "+%s") echo "" ver=$( einfo -help | head -n 1 | sed 's/einfo //g' ) -echo "New EDirect $ver" +echo "EDirect $ver" echo "" DoCmd @@ -579,10 +570,14 @@ then echo "" ver=$( einfo -help | head -n 1 | sed 's/einfo //g' ) - echo "Ext EDirect $ver" - echo "" + int=$( einfo -internal -help | grep internal ) + if [ -z "$int" ] + then + echo "Ext EDirect $ver" + echo "" - DoCmd + DoCmd + fi fi if [ "$failed" = true ] diff --git a/test-pubmed-index b/test-pubmed-index index 26675c6..f8fc169 100755 --- a/test-pubmed-index +++ b/test-pubmed-index @@ -1,7 +1,7 @@ #!/bin/bash # Public domain notice for all NCBI EDirect scripts is located at: -# https://www.ncbi.nlm.nih.gov/IEB/ToolBox/CPP_DOC/lxr/source/scripts/projects/blast/LICENSE +# https://www.ncbi.nlm.nih.gov/books/NBK179288/#chapter6.Public_Domain_Notice seconds_start=$(date "+%s") for i in {1..100} diff --git a/theme-aliases b/theme-aliases index f35ca0f..2c5fb1e 100755 --- a/theme-aliases +++ b/theme-aliases @@ -1,7 +1,7 @@ #!/bin/bash # Public domain notice for all NCBI EDirect scripts is located at: -# https://www.ncbi.nlm.nih.gov/IEB/ToolBox/CPP_DOC/lxr/source/scripts/projects/blast/LICENSE +# https://www.ncbi.nlm.nih.gov/books/NBK179288/#chapter6.Public_Domain_Notice alias ChemCodeToName='phrase-search -convert chem Code Name' alias ChemCodeToTerm='phrase-search -convert chem Code Term' diff --git a/word-at-a-time b/word-at-a-time index b72a2b1..40159f4 100755 --- a/word-at-a-time +++ b/word-at-a-time @@ -1,7 +1,7 @@ #!/bin/bash -norc # Public domain notice for all NCBI EDirect scripts is located at: -# https://www.ncbi.nlm.nih.gov/IEB/ToolBox/CPP_DOC/lxr/source/scripts/projects/blast/LICENSE +# https://www.ncbi.nlm.nih.gov/books/NBK179288/#chapter6.Public_Domain_Notice sed 's/[^a-zA-Z0-9]/ /g; s/^ *//' | tr 'A-Z' 'a-z' | @@ -1,7 +1,7 @@ #!/bin/sh # Public domain notice for all NCBI EDirect scripts is located at: -# https://www.ncbi.nlm.nih.gov/IEB/ToolBox/CPP_DOC/lxr/source/scripts/projects/blast/LICENSE +# https://www.ncbi.nlm.nih.gov/books/NBK179288/#chapter6.Public_Domain_Notice xtract -pattern INSDSeq -pfx ">Feature " \ -first INSDSeqid,INSDSeq_accession-version \ @@ -1,7 +1,7 @@ #!/bin/sh # Public domain notice for all NCBI EDirect scripts is located at: -# https://www.ncbi.nlm.nih.gov/IEB/ToolBox/CPP_DOC/lxr/source/scripts/projects/blast/LICENSE +# https://www.ncbi.nlm.nih.gov/books/NBK179288/#chapter6.Public_Domain_Notice # For Mac, please obtain command-line-enabled Plot2x from http://apps.micw.org/apps/plot2/downloads.php # For Unix or PC/Cygwin, please obtain gnuplot from http://gnuplot.sourceforge.net/download.html |