summaryrefslogtreecommitdiff
path: root/amino-acid-composition
blob: 817e6e58698a406cb871b2ff0a5c90ed4847f455 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
#!/bin/bash -norc

# Public domain notice for all NCBI EDirect scripts is located at:
# https://www.ncbi.nlm.nih.gov/books/NBK179288/#chapter6.Public_Domain_Notice   

abbrev=( Ala Asx Cys Asp Glu Phe Gly His Ile \
         Xle Lys Leu Met Asn Pyl Pro Gln Arg \
         Ser Thr Sec Val Trp Xxx Tyr Glx )

AminoAcidComp() {
  local count
  while read num lttr
  do
    idx=$(printf %i "'$lttr'")
    ofs=$((idx-97))
    count[$ofs]="$num"
  done <<< "$1"
  for i in {0..25}
  do
    echo -e "${abbrev[$i]}\t${count[$i]-0}"
  done |
  sort
}

while read seq
do
  comp="$(echo "$seq" | tr A-Z a-z | sed 's/[^a-z]//g' | fold -w 1 | sort-uniq-count)"
  AminoAcidComp "$comp"
done