summaryrefslogtreecommitdiff
path: root/unpack-pubmed
blob: 46bfd39a879c84910f827a25ebc3c4d1376b00d4 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
#!/bin/sh

for fl in *.xml.gz
do
  base=${fl%.xml.gz}
  if [ -f "$base.snt" ]
  then
    continue
  fi
  if [ -f "$base.xml" ]
  then
    continue
  fi
  echo "$fl"
  gunzip -c "$fl" | xtract -strict -compress -format flush > "$base.tmp.xml"
  xtract -input "$base.tmp.xml" -pattern PubmedArticle -element MedlineCitation/PMID > "$base.uid"
  xtract -input "$base.tmp.xml" -unique "$base.uid" -index MedlineCitation/PMID \
    -head "<PubmedArticleSet>" -tail "</PubmedArticleSet>" -pattern PubmedArticle > "$base.xml"
  rm "$base.tmp.xml"
  rm "$base.uid"
  touch "$base.snt"
done