summaryrefslogtreecommitdiff
path: root/unpack-pubmed
diff options
context:
space:
mode:
Diffstat (limited to 'unpack-pubmed')
-rwxr-xr-xunpack-pubmed18
1 files changed, 8 insertions, 10 deletions
diff --git a/unpack-pubmed b/unpack-pubmed
index 46bfd39..a671512 100755
--- a/unpack-pubmed
+++ b/unpack-pubmed
@@ -1,22 +1,20 @@
#!/bin/sh
+flags="none"
+
+if [ "$#" -gt 0 ]
+then
+ flags="$1"
+fi
+
for fl in *.xml.gz
do
base=${fl%.xml.gz}
- if [ -f "$base.snt" ]
- then
- continue
- fi
if [ -f "$base.xml" ]
then
continue
fi
echo "$fl"
- gunzip -c "$fl" | xtract -strict -compress -format flush > "$base.tmp.xml"
- xtract -input "$base.tmp.xml" -pattern PubmedArticle -element MedlineCitation/PMID > "$base.uid"
- xtract -input "$base.tmp.xml" -unique "$base.uid" -index MedlineCitation/PMID \
+ gunzip -c "$base.xml.gz" | xtract -flags "$flags" -unique "$base.uid" -index MedlineCitation/PMID \
-head "<PubmedArticleSet>" -tail "</PubmedArticleSet>" -pattern PubmedArticle > "$base.xml"
- rm "$base.tmp.xml"
- rm "$base.uid"
- touch "$base.snt"
done