diff options
Diffstat (limited to 'unpack-pubmed')
-rwxr-xr-x | unpack-pubmed | 18 |
1 files changed, 8 insertions, 10 deletions
diff --git a/unpack-pubmed b/unpack-pubmed index 46bfd39..a671512 100755 --- a/unpack-pubmed +++ b/unpack-pubmed @@ -1,22 +1,20 @@ #!/bin/sh +flags="none" + +if [ "$#" -gt 0 ] +then + flags="$1" +fi + for fl in *.xml.gz do base=${fl%.xml.gz} - if [ -f "$base.snt" ] - then - continue - fi if [ -f "$base.xml" ] then continue fi echo "$fl" - gunzip -c "$fl" | xtract -strict -compress -format flush > "$base.tmp.xml" - xtract -input "$base.tmp.xml" -pattern PubmedArticle -element MedlineCitation/PMID > "$base.uid" - xtract -input "$base.tmp.xml" -unique "$base.uid" -index MedlineCitation/PMID \ + gunzip -c "$base.xml.gz" | xtract -flags "$flags" -unique "$base.uid" -index MedlineCitation/PMID \ -head "<PubmedArticleSet>" -tail "</PubmedArticleSet>" -pattern PubmedArticle > "$base.xml" - rm "$base.tmp.xml" - rm "$base.uid" - touch "$base.snt" done |