summaryrefslogtreecommitdiff
path: root/phrase-search
blob: f4f2021658ab2fbd939b38d7acd40bb203a0127c (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
#!/bin/sh

target=""
mode="query"
field=""
debug=false

while [ $# -gt 0 ]
do
  case "$1" in
    -h | -help | --help )
      mode=help
      break
      ;;
    -debug )
      debug=true
      shift
      ;;
    -path | -master )
      target=$2
      shift
      shift
      ;;
    -count )
      mode="count"
      shift
      ;;
    -counts )
      mode="counts"
      shift
      ;;
    -countr )
      mode="countr"
      shift
      ;;
    -countp )
      mode="countp"
      shift
      ;;
    -query | -phrase )
      mode="query"
      shift
      ;;
    -filter )
      mode="filter"
      shift
      ;;
    -search )
      mode="search"
      shift
      ;;
    -exact )
      mode="exact"
      shift
      ;;
    -mock )
      mode="mock"
      shift
      ;;
    -mocks )
      mode="mocks"
      shift
      ;;
    -mockx )
      mode="mockx"
      shift
      ;;
    -term | -terms )
      mode="terms"
      shift
      if [ $# -gt 0 ]
      then
        field=$1
       shift
      fi
      ;;
    -* )
      exec >&2
      echo "$0: Unrecognized option $1"
      exit 1
      ;;
    * )
      break
      ;;
  esac
done

if [ $mode = "help" ]
then
  cat <<EOF
USAGE: $0
       [-path path_to_pubmed_master]
       -count | -counts | -query | -filter | -exact | -terms
       query arguments

EXAMPLES:

  phrase-search -terms NORM

  phrase-search -count "catabolite repress*"

  phrase-search -counts "catabolite repress*"

  phrase-search -query "(literacy AND numeracy) NOT (adolescent OR child)"

  phrase-search -query "selective serotonin reuptake inhibit*"

  phrase-search -query "monoamine oxidase inhibitor [STEM]"

  phrase-search -query "vitamin c + + common cold"

  phrase-search -query "vitamin c ~ ~ common cold"

  phrase-search -query "C02.782.417* [TREE] AND 2015:2018 [YEAR]"

  phrase-search -exact "Genetic Control of Biochemical Reactions in Neurospora."

AUTOMATION:

  ascend_mesh_tree() {
    var="${1%\*}"
    while :
    do
      phrase-search -count "$var* [TREE]"
      case "$var" in
        *.* ) var="${var%????}" ;;
        *   ) break             ;;
      esac
    done
  }

  ascend_mesh_tree "C14.907.617.812"

  declare -a THEMES
  THEMES=( A+ A- Bc Bg C D Ec Ec+ Ec- Eg \\
           Eg+ G H I Jc Jg K L Md Mp N O Pa \\
           Pr Q Rg Sa T Te U Ud V+ W X Y Z )
  declare -a REMAINS
  REMAINS=("${THEMES[@]:1}")

  for fst in ${THEMES[@]}
  do
    num=$(phrase-search -query "$fst [THME]" | wc -l)
    echo -e "$fst\t \t$num"
    for scd in ${REMAINS[@]}
    do
      num=$(phrase-search -query "$fst [THME] AND $scd [THME]" | wc -l)
      echo -e "$fst\t$scd\t$num"
      echo -e "$scd\t$fst\t$num"
    done
    REMAINS=("${REMAINS[@]:1}")
  done | sort | expand -t 7,13

ENTREZ INTEGRATION

  esearch -db pubmed -query "complement system proteins [MESH]" -pub clinical |
  efetch -format uid |
  phrase-search -filter "L [THME] AND D03* [TREE]"

MESH DISEASES

  C01 – bacterial infections and mycoses
  C02 – virus diseases
  C03 – parasitic diseases
  C04 – neoplasms
  C05 – musculoskeletal diseases
  C06 – digestive system diseases
  C07 – stomatognathic diseases
  C08 – respiratory tract diseases
  C09 – otorhinolaryngologic diseases
  C10 – nervous system diseases
  C11 – eye diseases
  C12 – male urogenital diseases
  C13 – female urogenital diseases and pregnancy complications
  C14 – cardiovascular diseases
  C15 – hemic and lymphatic diseases
  C16 – congenital, hereditary, and neonatal diseases and abnormalities
  C17 – skin and connective tissue diseases
  C18 – nutritional and metabolic diseases
  C19 – endocrine system diseases
  C20 – immune system diseases
  C21 – disorders of environmental origin
  C22 – animal diseases
  C23 – pathological conditions, signs and symptoms
  C24 - occupational diseases
  C25 - chemically-induced disorders
  C26 - wounds and injuries

MESH CHEMICALS AND DRUGS

  D01 – inorganic chemicals
  D02 – organic chemicals
  D03 – heterocyclic compounds
  D04 – polycyclic compounds
  D05 – macromolecular substances
  D06 – hormones, hormone substitutes, and hormone antagonists
  D08 – enzymes and coenzymes
  D09 – carbohydrates
  D10 – lipids
  D12 – amino acids, peptides, and proteins
  D13 – nucleic acids, nucleotides, and nucleosides
  D20 – complex mixtures
  D23 – biological factors
  D25 – biomedical and dental materials
  D26 – pharmaceutical preparations
  D27 – chemical actions and uses

THEME CODES:

Chemical-Gene

  A+    agonism, activation
  A-    antagonism, blocking
  Bc    binding, ligand (especially receptors)
  Ec+   increases expression/production
  Ec-   decreases expression/production
  Ec    affects expression/production (neutral)
  N     inhibits

Gene-Chemical

  O     transport, channels
  K     metabolism, pharmacokinetics
  Z     enzyme activity

Chemical-Disease

  T     treatment/therapy (including investigatory)
  C     inhibits cell growth (especially cancers)
  Sa    side effect/adverse event
  Pr    prevents, suppresses
  Pa    alleviates, reduces
  Jc    role in disease pathogenesis

Disease-Chemical

  Mp    biomarkers (of disease progression)

Gene-Disease

  U     causal mutations
  Ud    mutations affecting disease course
  D     drug targets
  Jg    role in pathogenesis
  Te    possible therapeutic effect
  Y     polymorphisms alter risk
  G     promotes progression

Disease-Gene

  Md    biomarkers (diagnostic)
  X     overexpression in disease
  L     improper regulation linked to disease

Gene-Gene

  Bg    binding, ligand (especially receptors)
  W     enhances response
  V+    activates, stimulates
  Eg+   increases expression/production
  Eg    affects expression/production (neutral)
  I     signaling pathway
  H     same protein or complex
  Rg    regulation
  Q     production by cell population

EOF
  exit
fi

if [ -z "$target" ]
then
  if [ -z "${EDIRECT_PUBMED_MASTER}" ]
  then
    echo "Must supply path to postings files or set EDIRECT_PUBMED_MASTER environment variable"
    exit 1
  else
    MASTER="${EDIRECT_PUBMED_MASTER}"
    MASTER=${MASTER%/}
    target="$MASTER/Postings"
  fi
else
  argument="$target"
  target=$(cd "$argument" && pwd)
  target=${target%/}
  case "$target" in
    */Postings ) ;;
    * ) target=$target/Postings ;;
  esac
fi

osname=`uname -s | sed -e 's/_NT-.*$/_NT/; s/^MINGW[0-9]*/CYGWIN/'`
if [ "$osname" = "CYGWIN_NT" -a -x /bin/cygpath ]
then
  target=`cygpath -w "$target"`
fi

target=${target%/}

if [ "$debug" = true ]
then
  echo "mode: $mode, path: '$target', args: '$*'"
  exit
fi

case "$mode" in
   count )
     rchive -path "$target" -count "$*" 
     ;;
   counts )
     rchive -path "$target" -counts "$*" 
     ;;
   countr )
     rchive -path "$target" -countr "$*" 
     ;;
   countp )
     rchive -path "$target" -countp "$*" 
     ;;
   query )
     rchive -path "$target" -query "$*"
     ;;
   filter )
     case "$*" in
       "AND "* | "OR "* | "NOT "* )
         rchive -path "$target" -query "[PIPE] $*"
         ;;
       "[PIPE] "* )
         rchive -path "$target" -query "$*"
         ;;
       *)
         rchive -path "$target" -query "[PIPE] AND $*"
         ;;
     esac
     ;;
   search )
     rchive -path "$target" -search "$*"
     ;;
   exact )
     rchive -path "$target" -exact "$*"
     ;;
   mock )
     rchive -path "$target" -mock "$*"
     ;;
   mocks )
     rchive -path "$target" -mocks "$*"
     ;;
   mockx )
     rchive -path "$target" -mockx "$*"
     ;;
   terms )
     if [ -z "$field" ]
     then
       cd "$target"
       for dr in *
       do
         if [ -d "$dr" ]
         then
           echo "$dr"
         fi
       done
     else
       for dr in "$target/$field"/*
       do
         if [ -d "$dr" ]
         then
           find "$dr" -name "*.$field.trm" -print0 | sort -Vz | xargs -0 cat
         fi
       done
     fi
     ;;
esac