blob: 46bfd39a879c84910f827a25ebc3c4d1376b00d4 (
plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
|
#!/bin/sh
for fl in *.xml.gz
do
base=${fl%.xml.gz}
if [ -f "$base.snt" ]
then
continue
fi
if [ -f "$base.xml" ]
then
continue
fi
echo "$fl"
gunzip -c "$fl" | xtract -strict -compress -format flush > "$base.tmp.xml"
xtract -input "$base.tmp.xml" -pattern PubmedArticle -element MedlineCitation/PMID > "$base.uid"
xtract -input "$base.tmp.xml" -unique "$base.uid" -index MedlineCitation/PMID \
-head "<PubmedArticleSet>" -tail "</PubmedArticleSet>" -pattern PubmedArticle > "$base.xml"
rm "$base.tmp.xml"
rm "$base.uid"
touch "$base.snt"
done
|