1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
|
#!/bin/sh
target=""
mode="query"
field=""
debug=false
while [ $# -gt 0 ]
do
case "$1" in
-h | -help | --help )
mode=help
break
;;
-debug )
debug=true
shift
;;
-path | -master )
target=$2
shift
shift
;;
-count )
mode="count"
shift
;;
-counts )
mode="counts"
shift
;;
-countr )
mode="countr"
shift
;;
-countp )
mode="countp"
shift
;;
-query | -phrase )
mode="query"
shift
;;
-filter )
mode="filter"
shift
;;
-search )
mode="search"
shift
;;
-exact )
mode="exact"
shift
;;
-mock )
mode="mock"
shift
;;
-mocks )
mode="mocks"
shift
;;
-mockx )
mode="mockx"
shift
;;
-term | -terms )
mode="terms"
shift
if [ $# -gt 0 ]
then
field=$1
shift
fi
;;
-* )
exec >&2
echo "$0: Unrecognized option $1"
exit 1
;;
* )
break
;;
esac
done
if [ $mode = "help" ]
then
cat <<EOF
USAGE: $0
[-path path_to_pubmed_master]
-count | -counts | -query | -filter | -exact | -terms
query arguments
EXAMPLES:
phrase-search -terms NORM
phrase-search -count "catabolite repress*"
phrase-search -counts "catabolite repress*"
phrase-search -query "(literacy AND numeracy) NOT (adolescent OR child)"
phrase-search -query "selective serotonin reuptake inhibit*"
phrase-search -query "monoamine oxidase inhibitor [STEM]"
phrase-search -query "vitamin c + + common cold"
phrase-search -query "vitamin c ~ ~ common cold"
phrase-search -query "C02.782.417* [TREE] AND 2015:2018 [YEAR]"
phrase-search -exact "Genetic Control of Biochemical Reactions in Neurospora."
AUTOMATION:
ascend_mesh_tree() {
var="${1%\*}"
while :
do
phrase-search -count "$var* [TREE]"
case "$var" in
*.* ) var="${var%????}" ;;
* ) break ;;
esac
done
}
ascend_mesh_tree "C14.907.617.812"
declare -a THEMES
THEMES=( A+ A- Bc Bg C D Ec Ec+ Ec- Eg \\
Eg+ G H I Jc Jg K L Md Mp N O Pa \\
Pr Q Rg Sa T Te U Ud V+ W X Y Z )
declare -a REMAINS
REMAINS=("${THEMES[@]:1}")
for fst in ${THEMES[@]}
do
num=$(phrase-search -query "$fst [THME]" | wc -l)
echo -e "$fst\t \t$num"
for scd in ${REMAINS[@]}
do
num=$(phrase-search -query "$fst [THME] AND $scd [THME]" | wc -l)
echo -e "$fst\t$scd\t$num"
echo -e "$scd\t$fst\t$num"
done
REMAINS=("${REMAINS[@]:1}")
done | sort | expand -t 7,13
ENTREZ INTEGRATION
esearch -db pubmed -query "complement system proteins [MESH]" -pub clinical |
efetch -format uid |
phrase-search -filter "L [THME] AND D03* [TREE]"
MESH DISEASES
C01 – bacterial infections and mycoses
C02 – virus diseases
C03 – parasitic diseases
C04 – neoplasms
C05 – musculoskeletal diseases
C06 – digestive system diseases
C07 – stomatognathic diseases
C08 – respiratory tract diseases
C09 – otorhinolaryngologic diseases
C10 – nervous system diseases
C11 – eye diseases
C12 – male urogenital diseases
C13 – female urogenital diseases and pregnancy complications
C14 – cardiovascular diseases
C15 – hemic and lymphatic diseases
C16 – congenital, hereditary, and neonatal diseases and abnormalities
C17 – skin and connective tissue diseases
C18 – nutritional and metabolic diseases
C19 – endocrine system diseases
C20 – immune system diseases
C21 – disorders of environmental origin
C22 – animal diseases
C23 – pathological conditions, signs and symptoms
C24 - occupational diseases
C25 - chemically-induced disorders
C26 - wounds and injuries
MESH CHEMICALS AND DRUGS
D01 – inorganic chemicals
D02 – organic chemicals
D03 – heterocyclic compounds
D04 – polycyclic compounds
D05 – macromolecular substances
D06 – hormones, hormone substitutes, and hormone antagonists
D08 – enzymes and coenzymes
D09 – carbohydrates
D10 – lipids
D12 – amino acids, peptides, and proteins
D13 – nucleic acids, nucleotides, and nucleosides
D20 – complex mixtures
D23 – biological factors
D25 – biomedical and dental materials
D26 – pharmaceutical preparations
D27 – chemical actions and uses
THEME CODES:
Chemical-Gene
A+ agonism, activation
A- antagonism, blocking
Bc binding, ligand (especially receptors)
Ec+ increases expression/production
Ec- decreases expression/production
Ec affects expression/production (neutral)
N inhibits
Gene-Chemical
O transport, channels
K metabolism, pharmacokinetics
Z enzyme activity
Chemical-Disease
T treatment/therapy (including investigatory)
C inhibits cell growth (especially cancers)
Sa side effect/adverse event
Pr prevents, suppresses
Pa alleviates, reduces
Jc role in disease pathogenesis
Disease-Chemical
Mp biomarkers (of disease progression)
Gene-Disease
U causal mutations
Ud mutations affecting disease course
D drug targets
Jg role in pathogenesis
Te possible therapeutic effect
Y polymorphisms alter risk
G promotes progression
Disease-Gene
Md biomarkers (diagnostic)
X overexpression in disease
L improper regulation linked to disease
Gene-Gene
Bg binding, ligand (especially receptors)
W enhances response
V+ activates, stimulates
Eg+ increases expression/production
Eg affects expression/production (neutral)
I signaling pathway
H same protein or complex
Rg regulation
Q production by cell population
EOF
exit
fi
if [ -z "$target" ]
then
if [ -z "${EDIRECT_PUBMED_MASTER}" ]
then
echo "Must supply path to postings files or set EDIRECT_PUBMED_MASTER environment variable"
exit 1
else
MASTER="${EDIRECT_PUBMED_MASTER}"
MASTER=${MASTER%/}
target="$MASTER/Postings"
fi
else
argument="$target"
target=$(cd "$argument" && pwd)
target=${target%/}
case "$target" in
*/Postings ) ;;
* ) target=$target/Postings ;;
esac
fi
osname=`uname -s | sed -e 's/_NT-.*$/_NT/; s/^MINGW[0-9]*/CYGWIN/'`
if [ "$osname" = "CYGWIN_NT" -a -x /bin/cygpath ]
then
target=`cygpath -w "$target"`
fi
target=${target%/}
if [ "$debug" = true ]
then
echo "mode: $mode, path: '$target', args: '$*'"
exit
fi
case "$mode" in
count )
rchive -path "$target" -count "$*"
;;
counts )
rchive -path "$target" -counts "$*"
;;
countr )
rchive -path "$target" -countr "$*"
;;
countp )
rchive -path "$target" -countp "$*"
;;
query )
rchive -path "$target" -query "$*"
;;
filter )
case "$*" in
"AND "* | "OR "* | "NOT "* )
rchive -path "$target" -query "[PIPE] $*"
;;
"[PIPE] "* )
rchive -path "$target" -query "$*"
;;
*)
rchive -path "$target" -query "[PIPE] AND $*"
;;
esac
;;
search )
rchive -path "$target" -search "$*"
;;
exact )
rchive -path "$target" -exact "$*"
;;
mock )
rchive -path "$target" -mock "$*"
;;
mocks )
rchive -path "$target" -mocks "$*"
;;
mockx )
rchive -path "$target" -mockx "$*"
;;
terms )
if [ -z "$field" ]
then
cd "$target"
for dr in *
do
if [ -d "$dr" ]
then
echo "$dr"
fi
done
else
for dr in "$target/$field"/*
do
if [ -d "$dr" ]
then
find "$dr" -name "*.$field.trm" -print0 | sort -Vz | xargs -0 cat
fi
done
fi
;;
esac
|