summaryrefslogtreecommitdiff
path: root/util
diff options
context:
space:
mode:
authorAaron M. Ucko <ucko@debian.org>2005-03-23 23:49:09 +0000
committerAaron M. Ucko <ucko@debian.org>2005-03-23 23:49:09 +0000
commit5349ec8772bc373e4c2349a04e57d7952c006326 (patch)
treeb733fe1df8c0c2d2418b3ce82ebcbd3d0db12a1f /util
parent0eff2d00595b4adcf6f1c4e6bbbcf0f416c70310 (diff)
Load ncbi (6.1.20031028) into ncbi-tools6/branches/upstream/current.
Diffstat (limited to 'util')
-rw-r--r--util/tables/raw_scoremat.c122
-rw-r--r--util/tables/raw_scoremat.h95
-rw-r--r--util/tables/sm_blosum45.c99
-rw-r--r--util/tables/sm_blosum62.c99
-rw-r--r--util/tables/sm_blosum80.c99
-rw-r--r--util/tables/sm_pam30.c97
-rw-r--r--util/tables/sm_pam70.c97
-rw-r--r--util/tables/tables_export.h57
8 files changed, 765 insertions, 0 deletions
diff --git a/util/tables/raw_scoremat.c b/util/tables/raw_scoremat.c
new file mode 100644
index 00000000..d0c4e47d
--- /dev/null
+++ b/util/tables/raw_scoremat.c
@@ -0,0 +1,122 @@
+/* $Id: raw_scoremat.c,v 1.2 2003/10/02 15:37:34 ivanov Exp $
+ * ===========================================================================
+ *
+ * PUBLIC DOMAIN NOTICE
+ * National Center for Biotechnology Information
+ *
+ * This software/database is a "United States Government Work" under the
+ * terms of the United States Copyright Act. It was written as part of
+ * the author's official duties as a United States Government employee and
+ * thus cannot be copyrighted. This software/database is freely available
+ * to the public for use. The National Library of Medicine and the U.S.
+ * Government have not placed any restriction on its use or reproduction.
+ *
+ * Although all reasonable efforts have been taken to ensure the accuracy
+ * and reliability of the software and data, the NLM and the U.S.
+ * Government do not and cannot warrant the performance or results that
+ * may be obtained by using this software or data. The NLM and the U.S.
+ * Government disclaim all warranties, express or implied, including
+ * warranties of performance, merchantability or fitness for any particular
+ * purpose.
+ *
+ * Please cite the author in any work or product based on this material.
+ *
+ * ===========================================================================
+ *
+ * Author: Aaron Ucko
+ *
+ * File Description:
+ * Protein alignment score matrices; shared between the two toolkits.
+ *
+ */
+
+#include <util/tables/raw_scoremat.h>
+
+#include <ctype.h>
+#include <string.h>
+
+#include "sm_blosum45.c"
+#include "sm_blosum62.c"
+#include "sm_blosum80.c"
+#include "sm_pam30.c"
+#include "sm_pam70.c"
+
+static const char kNCBIstdaa[] = "-ABCDEFGHIKLMNPQRSTVWXYZU*";
+
+
+int NCBISM_GetIndex(const SNCBIPackedScoreMatrix* sm, int aa)
+{
+ const char *p;
+
+ /* Translate to NCBIeaa */
+ if (aa >= 0 && aa < sizeof(kNCBIstdaa)) {
+ aa = kNCBIstdaa[aa];
+ } else if (islower(aa)) {
+ aa = toupper(aa);
+ }
+
+ p = strchr(sm->symbols, aa);
+ return p ? p - sm->symbols : -1;
+}
+
+
+TNCBIScore NCBISM_GetScore(const SNCBIPackedScoreMatrix* sm,
+ int aa1, int aa2)
+{
+ int i1, i2;
+ i1 = NCBISM_GetIndex(sm, aa1);
+ i2 = NCBISM_GetIndex(sm, aa2);
+ if (i1 >=0 && i2 >= 0) {
+ return sm->scores[i1 * strlen(sm->symbols) + i2];
+ } else {
+ return sm->defscore;
+ }
+}
+
+
+void NCBISM_Unpack(const SNCBIPackedScoreMatrix* psm,
+ SNCBIFullScoreMatrix* fsm)
+{
+ const char* sym;
+ int dim, i, j, aa1, aa2;
+
+ sym = psm->symbols;
+ dim = strlen(sym);
+ /* fill with default */
+ memset(&fsm->s, psm->defscore, NCBI_FSM_DIM * NCBI_FSM_DIM);
+ for (i = 0; i < dim; ++i) {
+ aa1 = sym[i];
+ /* get core (NCBIeaa x NCBIeaa) */
+ for (j = 0; j < dim; ++j) {
+ aa2 = sym[j];
+ fsm->s[aa1][aa2] = psm->scores[i * dim + j];
+ }
+ /* extend horizontally */
+ for (aa2 = 0; aa2 < sizeof(kNCBIstdaa); ++aa2) {
+ fsm->s[aa1][aa2] = fsm->s[aa1][(int)kNCBIstdaa[aa2]];
+ }
+ for (aa2 = 'a'; aa2 <= 'z'; ++aa2) {
+ fsm->s[aa1][aa2] = fsm->s[aa1][toupper(aa2)];
+ }
+ }
+ /* extend vertically */
+ for (aa1 = 0; aa1 < sizeof(kNCBIstdaa); ++aa1) {
+ memcpy(fsm->s[aa1], fsm->s[(int)kNCBIstdaa[aa1]], NCBI_FSM_DIM);
+ }
+ for (aa1 = 'a'; aa1 <= 'z'; ++aa1) {
+ memcpy(fsm->s[aa1], fsm->s[toupper(aa1)], NCBI_FSM_DIM);
+ }
+}
+
+
+/*
+ * ===========================================================================
+ * $Log: raw_scoremat.c,v $
+ * Revision 1.2 2003/10/02 15:37:34 ivanov
+ * Get rid of compilation warnings
+ *
+ * Revision 1.1 2003/08/21 19:48:20 ucko
+ * Add tables library (shared with C) for raw score matrices, etc.
+ *
+ * ===========================================================================
+ */
diff --git a/util/tables/raw_scoremat.h b/util/tables/raw_scoremat.h
new file mode 100644
index 00000000..434e4a0a
--- /dev/null
+++ b/util/tables/raw_scoremat.h
@@ -0,0 +1,95 @@
+#ifndef UTIL_TABLES___SCOREMAT__H
+#define UTIL_TABLES___SCOREMAT__H
+
+/* $Id: raw_scoremat.h,v 1.1 2003/08/21 19:48:19 ucko Exp $
+ * ===========================================================================
+ *
+ * PUBLIC DOMAIN NOTICE
+ * National Center for Biotechnology Information
+ *
+ * This software/database is a "United States Government Work" under the
+ * terms of the United States Copyright Act. It was written as part of
+ * the author's official duties as a United States Government employee and
+ * thus cannot be copyrighted. This software/database is freely available
+ * to the public for use. The National Library of Medicine and the U.S.
+ * Government have not placed any restriction on its use or reproduction.
+ *
+ * Although all reasonable efforts have been taken to ensure the accuracy
+ * and reliability of the software and data, the NLM and the U.S.
+ * Government do not and cannot warrant the performance or results that
+ * may be obtained by using this software or data. The NLM and the U.S.
+ * Government disclaim all warranties, express or implied, including
+ * warranties of performance, merchantability or fitness for any particular
+ * purpose.
+ *
+ * Please cite the author in any work or product based on this material.
+ *
+ * ===========================================================================
+ *
+ * Author: Aaron Ucko
+ *
+ */
+
+/** @file scoremat.h
+ ** Protein alignment score matrices; shared between the two toolkits.
+ **/
+
+#include <util/tables/tables_export.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/** data types */
+
+typedef signed char TNCBIScore;
+typedef struct SNCBIPackedScoreMatrix {
+ const char* symbols; /**< order of residues */
+ const TNCBIScore* scores; /**< strlen(symbols) x strlen(symbols) */
+ TNCBIScore defscore; /**< score for unknown residues */
+} SNCBIPackedScoreMatrix;
+
+/** These two functions aren't very fast, but avoid the memory and time
+ ** overhead of unpacking.
+ ** Residues (aa, aa1, aa2) may be either NCBIstdaa, NCBIeaa, or
+ ** lowercase NCBIeaa, though matrices generally don't cover U.
+ **/
+extern NCBI_TABLES_EXPORT
+int NCBISM_GetIndex(const SNCBIPackedScoreMatrix* sm, int aa);
+extern NCBI_TABLES_EXPORT
+TNCBIScore NCBISM_GetScore(const SNCBIPackedScoreMatrix* sm,
+ int aa1, int aa2);
+
+/** Recommended approach: unpack and index directly. */
+#define NCBI_FSM_DIM 128
+typedef struct SNCBIFullScoreMatrix {
+ TNCBIScore s[NCBI_FSM_DIM][NCBI_FSM_DIM];
+} SNCBIFullScoreMatrix;
+
+extern NCBI_TABLES_EXPORT
+void NCBISM_Unpack(const SNCBIPackedScoreMatrix* psm,
+ SNCBIFullScoreMatrix* fsm);
+
+/** The standard matrices. */
+extern NCBI_TABLES_EXPORT const SNCBIPackedScoreMatrix NCBISM_Blosum45;
+extern NCBI_TABLES_EXPORT const SNCBIPackedScoreMatrix NCBISM_Blosum62;
+extern NCBI_TABLES_EXPORT const SNCBIPackedScoreMatrix NCBISM_Blosum80;
+extern NCBI_TABLES_EXPORT const SNCBIPackedScoreMatrix NCBISM_Pam30;
+extern NCBI_TABLES_EXPORT const SNCBIPackedScoreMatrix NCBISM_Pam70;
+
+#ifdef __cplusplus
+}
+#endif
+
+/*
+* ===========================================================================
+*
+* $Log: raw_scoremat.h,v $
+* Revision 1.1 2003/08/21 19:48:19 ucko
+* Add tables library (shared with C) for raw score matrices, etc.
+*
+*
+* ===========================================================================
+*/
+
+#endif /* UTIL_TABLES___SCOREMAT__H */
diff --git a/util/tables/sm_blosum45.c b/util/tables/sm_blosum45.c
new file mode 100644
index 00000000..bfa42b59
--- /dev/null
+++ b/util/tables/sm_blosum45.c
@@ -0,0 +1,99 @@
+/* $Id: sm_blosum45.c,v 1.1 2003/08/21 19:48:20 ucko Exp $
+* ===========================================================================
+*
+* PUBLIC DOMAIN NOTICE
+* National Center for Biotechnology Information
+*
+* This software/database is a "United States Government Work" under the
+* terms of the United States Copyright Act. It was written as part of
+* the author's official duties as a United States Government employee and
+* thus cannot be copyrighted. This software/database is freely available
+* to the public for use. The National Library of Medicine and the U.S.
+* Government have not placed any restriction on its use or reproduction.
+*
+* Although all reasonable efforts have been taken to ensure the accuracy
+* and reliability of the software and data, the NLM and the U.S.
+* Government do not and cannot warrant the performance or results that
+* may be obtained by using this software or data. The NLM and the U.S.
+* Government disclaim all warranties, express or implied, including
+* warranties of performance, merchantability or fitness for any particular
+* purpose.
+*
+* Please cite the author in any work or product based on this material.
+*
+* ===========================================================================
+*
+* Author: Aaron Ucko (via ./convert_scoremat.pl)
+*
+* File Description:
+* Protein alignment score matrices; shared between the two toolkits.
+*
+* ===========================================================================
+*/
+
+#include <util/tables/raw_scoremat.h>
+
+/* Matrix made by matblas from blosum45.iij */
+/* * column uses minimum score */
+/* BLOSUM Clustered Scoring Matrix in 1/3 Bit Units */
+/* Blocks Database = /data/blocks_5.0/blocks.dat */
+/* Cluster Percentage: >= 45 */
+/* Entropy = 0.3795, Expected = -0.2789 */
+
+static const TNCBIScore s_Blosum45PSM[24][24] = {
+ /* A, R, N, D, C, Q, E, G, H, I, L, K,
+ M, F, P, S, T, W, Y, V, B, Z, X, * */
+ /*A*/ { 5, -2, -1, -2, -1, -1, -1, 0, -2, -1, -1, -1,
+ -1, -2, -1, 1, 0, -2, -2, 0, -1, -1, 0, -5 },
+ /*R*/ { -2, 7, 0, -1, -3, 1, 0, -2, 0, -3, -2, 3,
+ -1, -2, -2, -1, -1, -2, -1, -2, -1, 0, -1, -5 },
+ /*N*/ { -1, 0, 6, 2, -2, 0, 0, 0, 1, -2, -3, 0,
+ -2, -2, -2, 1, 0, -4, -2, -3, 4, 0, -1, -5 },
+ /*D*/ { -2, -1, 2, 7, -3, 0, 2, -1, 0, -4, -3, 0,
+ -3, -4, -1, 0, -1, -4, -2, -3, 5, 1, -1, -5 },
+ /*C*/ { -1, -3, -2, -3, 12, -3, -3, -3, -3, -3, -2, -3,
+ -2, -2, -4, -1, -1, -5, -3, -1, -2, -3, -2, -5 },
+ /*Q*/ { -1, 1, 0, 0, -3, 6, 2, -2, 1, -2, -2, 1,
+ 0, -4, -1, 0, -1, -2, -1, -3, 0, 4, -1, -5 },
+ /*E*/ { -1, 0, 0, 2, -3, 2, 6, -2, 0, -3, -2, 1,
+ -2, -3, 0, 0, -1, -3, -2, -3, 1, 4, -1, -5 },
+ /*G*/ { 0, -2, 0, -1, -3, -2, -2, 7, -2, -4, -3, -2,
+ -2, -3, -2, 0, -2, -2, -3, -3, -1, -2, -1, -5 },
+ /*H*/ { -2, 0, 1, 0, -3, 1, 0, -2, 10, -3, -2, -1,
+ 0, -2, -2, -1, -2, -3, 2, -3, 0, 0, -1, -5 },
+ /*I*/ { -1, -3, -2, -4, -3, -2, -3, -4, -3, 5, 2, -3,
+ 2, 0, -2, -2, -1, -2, 0, 3, -3, -3, -1, -5 },
+ /*L*/ { -1, -2, -3, -3, -2, -2, -2, -3, -2, 2, 5, -3,
+ 2, 1, -3, -3, -1, -2, 0, 1, -3, -2, -1, -5 },
+ /*K*/ { -1, 3, 0, 0, -3, 1, 1, -2, -1, -3, -3, 5,
+ -1, -3, -1, -1, -1, -2, -1, -2, 0, 1, -1, -5 },
+ /*M*/ { -1, -1, -2, -3, -2, 0, -2, -2, 0, 2, 2, -1,
+ 6, 0, -2, -2, -1, -2, 0, 1, -2, -1, -1, -5 },
+ /*F*/ { -2, -2, -2, -4, -2, -4, -3, -3, -2, 0, 1, -3,
+ 0, 8, -3, -2, -1, 1, 3, 0, -3, -3, -1, -5 },
+ /*P*/ { -1, -2, -2, -1, -4, -1, 0, -2, -2, -2, -3, -1,
+ -2, -3, 9, -1, -1, -3, -3, -3, -2, -1, -1, -5 },
+ /*S*/ { 1, -1, 1, 0, -1, 0, 0, 0, -1, -2, -3, -1,
+ -2, -2, -1, 4, 2, -4, -2, -1, 0, 0, 0, -5 },
+ /*T*/ { 0, -1, 0, -1, -1, -1, -1, -2, -2, -1, -1, -1,
+ -1, -1, -1, 2, 5, -3, -1, 0, 0, -1, 0, -5 },
+ /*W*/ { -2, -2, -4, -4, -5, -2, -3, -2, -3, -2, -2, -2,
+ -2, 1, -3, -4, -3, 15, 3, -3, -4, -2, -2, -5 },
+ /*Y*/ { -2, -1, -2, -2, -3, -1, -2, -3, 2, 0, 0, -1,
+ 0, 3, -3, -2, -1, 3, 8, -1, -2, -2, -1, -5 },
+ /*V*/ { 0, -2, -3, -3, -1, -3, -3, -3, -3, 3, 1, -2,
+ 1, 0, -3, -1, 0, -3, -1, 5, -3, -3, -1, -5 },
+ /*B*/ { -1, -1, 4, 5, -2, 0, 1, -1, 0, -3, -3, 0,
+ -2, -3, -2, 0, 0, -4, -2, -3, 4, 2, -1, -5 },
+ /*Z*/ { -1, 0, 0, 1, -3, 4, 4, -2, 0, -3, -2, 1,
+ -1, -3, -1, 0, -1, -2, -2, -3, 2, 4, -1, -5 },
+ /*X*/ { 0, -1, -1, -1, -2, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, 0, 0, -2, -1, -1, -1, -1, -1, -5 },
+ /***/ { -5, -5, -5, -5, -5, -5, -5, -5, -5, -5, -5, -5,
+ -5, -5, -5, -5, -5, -5, -5, -5, -5, -5, -5, 1 }
+};
+const SNCBIPackedScoreMatrix NCBISM_Blosum45 = {
+ "ARNDCQEGHILKMFPSTWYVBZX*",
+ s_Blosum45PSM[0],
+ -5
+};
diff --git a/util/tables/sm_blosum62.c b/util/tables/sm_blosum62.c
new file mode 100644
index 00000000..3f356def
--- /dev/null
+++ b/util/tables/sm_blosum62.c
@@ -0,0 +1,99 @@
+/* $Id: sm_blosum62.c,v 1.1 2003/08/21 19:48:20 ucko Exp $
+* ===========================================================================
+*
+* PUBLIC DOMAIN NOTICE
+* National Center for Biotechnology Information
+*
+* This software/database is a "United States Government Work" under the
+* terms of the United States Copyright Act. It was written as part of
+* the author's official duties as a United States Government employee and
+* thus cannot be copyrighted. This software/database is freely available
+* to the public for use. The National Library of Medicine and the U.S.
+* Government have not placed any restriction on its use or reproduction.
+*
+* Although all reasonable efforts have been taken to ensure the accuracy
+* and reliability of the software and data, the NLM and the U.S.
+* Government do not and cannot warrant the performance or results that
+* may be obtained by using this software or data. The NLM and the U.S.
+* Government disclaim all warranties, express or implied, including
+* warranties of performance, merchantability or fitness for any particular
+* purpose.
+*
+* Please cite the author in any work or product based on this material.
+*
+* ===========================================================================
+*
+* Author: Aaron Ucko (via ./convert_scoremat.pl)
+*
+* File Description:
+* Protein alignment score matrices; shared between the two toolkits.
+*
+* ===========================================================================
+*/
+
+#include <util/tables/raw_scoremat.h>
+
+/* Matrix made by matblas from blosum62.iij */
+/* * column uses minimum score */
+/* BLOSUM Clustered Scoring Matrix in 1/2 Bit Units */
+/* Blocks Database = /data/blocks_5.0/blocks.dat */
+/* Cluster Percentage: >= 62 */
+/* Entropy = 0.6979, Expected = -0.5209 */
+
+static const TNCBIScore s_Blosum62PSM[24][24] = {
+ /* A, R, N, D, C, Q, E, G, H, I, L, K,
+ M, F, P, S, T, W, Y, V, B, Z, X, * */
+ /*A*/ { 4, -1, -2, -2, 0, -1, -1, 0, -2, -1, -1, -1,
+ -1, -2, -1, 1, 0, -3, -2, 0, -2, -1, 0, -4 },
+ /*R*/ { -1, 5, 0, -2, -3, 1, 0, -2, 0, -3, -2, 2,
+ -1, -3, -2, -1, -1, -3, -2, -3, -1, 0, -1, -4 },
+ /*N*/ { -2, 0, 6, 1, -3, 0, 0, 0, 1, -3, -3, 0,
+ -2, -3, -2, 1, 0, -4, -2, -3, 3, 0, -1, -4 },
+ /*D*/ { -2, -2, 1, 6, -3, 0, 2, -1, -1, -3, -4, -1,
+ -3, -3, -1, 0, -1, -4, -3, -3, 4, 1, -1, -4 },
+ /*C*/ { 0, -3, -3, -3, 9, -3, -4, -3, -3, -1, -1, -3,
+ -1, -2, -3, -1, -1, -2, -2, -1, -3, -3, -2, -4 },
+ /*Q*/ { -1, 1, 0, 0, -3, 5, 2, -2, 0, -3, -2, 1,
+ 0, -3, -1, 0, -1, -2, -1, -2, 0, 3, -1, -4 },
+ /*E*/ { -1, 0, 0, 2, -4, 2, 5, -2, 0, -3, -3, 1,
+ -2, -3, -1, 0, -1, -3, -2, -2, 1, 4, -1, -4 },
+ /*G*/ { 0, -2, 0, -1, -3, -2, -2, 6, -2, -4, -4, -2,
+ -3, -3, -2, 0, -2, -2, -3, -3, -1, -2, -1, -4 },
+ /*H*/ { -2, 0, 1, -1, -3, 0, 0, -2, 8, -3, -3, -1,
+ -2, -1, -2, -1, -2, -2, 2, -3, 0, 0, -1, -4 },
+ /*I*/ { -1, -3, -3, -3, -1, -3, -3, -4, -3, 4, 2, -3,
+ 1, 0, -3, -2, -1, -3, -1, 3, -3, -3, -1, -4 },
+ /*L*/ { -1, -2, -3, -4, -1, -2, -3, -4, -3, 2, 4, -2,
+ 2, 0, -3, -2, -1, -2, -1, 1, -4, -3, -1, -4 },
+ /*K*/ { -1, 2, 0, -1, -3, 1, 1, -2, -1, -3, -2, 5,
+ -1, -3, -1, 0, -1, -3, -2, -2, 0, 1, -1, -4 },
+ /*M*/ { -1, -1, -2, -3, -1, 0, -2, -3, -2, 1, 2, -1,
+ 5, 0, -2, -1, -1, -1, -1, 1, -3, -1, -1, -4 },
+ /*F*/ { -2, -3, -3, -3, -2, -3, -3, -3, -1, 0, 0, -3,
+ 0, 6, -4, -2, -2, 1, 3, -1, -3, -3, -1, -4 },
+ /*P*/ { -1, -2, -2, -1, -3, -1, -1, -2, -2, -3, -3, -1,
+ -2, -4, 7, -1, -1, -4, -3, -2, -2, -1, -2, -4 },
+ /*S*/ { 1, -1, 1, 0, -1, 0, 0, 0, -1, -2, -2, 0,
+ -1, -2, -1, 4, 1, -3, -2, -2, 0, 0, 0, -4 },
+ /*T*/ { 0, -1, 0, -1, -1, -1, -1, -2, -2, -1, -1, -1,
+ -1, -2, -1, 1, 5, -2, -2, 0, -1, -1, 0, -4 },
+ /*W*/ { -3, -3, -4, -4, -2, -2, -3, -2, -2, -3, -2, -3,
+ -1, 1, -4, -3, -2, 11, 2, -3, -4, -3, -2, -4 },
+ /*Y*/ { -2, -2, -2, -3, -2, -1, -2, -3, 2, -1, -1, -2,
+ -1, 3, -3, -2, -2, 2, 7, -1, -3, -2, -1, -4 },
+ /*V*/ { 0, -3, -3, -3, -1, -2, -2, -3, -3, 3, 1, -2,
+ 1, -1, -2, -2, 0, -3, -1, 4, -3, -2, -1, -4 },
+ /*B*/ { -2, -1, 3, 4, -3, 0, 1, -1, 0, -3, -4, 0,
+ -3, -3, -2, 0, -1, -4, -3, -3, 4, 1, -1, -4 },
+ /*Z*/ { -1, 0, 0, 1, -3, 3, 4, -2, 0, -3, -3, 1,
+ -1, -3, -1, 0, -1, -3, -2, -2, 1, 4, -1, -4 },
+ /*X*/ { 0, -1, -1, -1, -2, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -2, 0, 0, -2, -1, -1, -1, -1, -1, -4 },
+ /***/ { -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4,
+ -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, 1 }
+};
+const SNCBIPackedScoreMatrix NCBISM_Blosum62 = {
+ "ARNDCQEGHILKMFPSTWYVBZX*",
+ s_Blosum62PSM[0],
+ -4
+};
diff --git a/util/tables/sm_blosum80.c b/util/tables/sm_blosum80.c
new file mode 100644
index 00000000..dad0d652
--- /dev/null
+++ b/util/tables/sm_blosum80.c
@@ -0,0 +1,99 @@
+/* $Id: sm_blosum80.c,v 1.1 2003/08/21 19:48:20 ucko Exp $
+* ===========================================================================
+*
+* PUBLIC DOMAIN NOTICE
+* National Center for Biotechnology Information
+*
+* This software/database is a "United States Government Work" under the
+* terms of the United States Copyright Act. It was written as part of
+* the author's official duties as a United States Government employee and
+* thus cannot be copyrighted. This software/database is freely available
+* to the public for use. The National Library of Medicine and the U.S.
+* Government have not placed any restriction on its use or reproduction.
+*
+* Although all reasonable efforts have been taken to ensure the accuracy
+* and reliability of the software and data, the NLM and the U.S.
+* Government do not and cannot warrant the performance or results that
+* may be obtained by using this software or data. The NLM and the U.S.
+* Government disclaim all warranties, express or implied, including
+* warranties of performance, merchantability or fitness for any particular
+* purpose.
+*
+* Please cite the author in any work or product based on this material.
+*
+* ===========================================================================
+*
+* Author: Aaron Ucko (via ./convert_scoremat.pl)
+*
+* File Description:
+* Protein alignment score matrices; shared between the two toolkits.
+*
+* ===========================================================================
+*/
+
+#include <util/tables/raw_scoremat.h>
+
+/* Matrix made by matblas from blosum80.iij */
+/* * column uses minimum score */
+/* BLOSUM Clustered Scoring Matrix in 1/2 Bit Units */
+/* Blocks Database = /data/blocks_5.0/blocks.dat */
+/* Cluster Percentage: >= 80 */
+/* Entropy = 0.9868, Expected = -0.7442 */
+
+static const TNCBIScore s_Blosum80PSM[24][24] = {
+ /* A, R, N, D, C, Q, E, G, H, I, L, K,
+ M, F, P, S, T, W, Y, V, B, Z, X, * */
+ /*A*/ { 5, -2, -2, -2, -1, -1, -1, 0, -2, -2, -2, -1,
+ -1, -3, -1, 1, 0, -3, -2, 0, -2, -1, -1, -6 },
+ /*R*/ { -2, 6, -1, -2, -4, 1, -1, -3, 0, -3, -3, 2,
+ -2, -4, -2, -1, -1, -4, -3, -3, -2, 0, -1, -6 },
+ /*N*/ { -2, -1, 6, 1, -3, 0, -1, -1, 0, -4, -4, 0,
+ -3, -4, -3, 0, 0, -4, -3, -4, 4, 0, -1, -6 },
+ /*D*/ { -2, -2, 1, 6, -4, -1, 1, -2, -2, -4, -5, -1,
+ -4, -4, -2, -1, -1, -6, -4, -4, 4, 1, -2, -6 },
+ /*C*/ { -1, -4, -3, -4, 9, -4, -5, -4, -4, -2, -2, -4,
+ -2, -3, -4, -2, -1, -3, -3, -1, -4, -4, -3, -6 },
+ /*Q*/ { -1, 1, 0, -1, -4, 6, 2, -2, 1, -3, -3, 1,
+ 0, -4, -2, 0, -1, -3, -2, -3, 0, 3, -1, -6 },
+ /*E*/ { -1, -1, -1, 1, -5, 2, 6, -3, 0, -4, -4, 1,
+ -2, -4, -2, 0, -1, -4, -3, -3, 1, 4, -1, -6 },
+ /*G*/ { 0, -3, -1, -2, -4, -2, -3, 6, -3, -5, -4, -2,
+ -4, -4, -3, -1, -2, -4, -4, -4, -1, -3, -2, -6 },
+ /*H*/ { -2, 0, 0, -2, -4, 1, 0, -3, 8, -4, -3, -1,
+ -2, -2, -3, -1, -2, -3, 2, -4, -1, 0, -2, -6 },
+ /*I*/ { -2, -3, -4, -4, -2, -3, -4, -5, -4, 5, 1, -3,
+ 1, -1, -4, -3, -1, -3, -2, 3, -4, -4, -2, -6 },
+ /*L*/ { -2, -3, -4, -5, -2, -3, -4, -4, -3, 1, 4, -3,
+ 2, 0, -3, -3, -2, -2, -2, 1, -4, -3, -2, -6 },
+ /*K*/ { -1, 2, 0, -1, -4, 1, 1, -2, -1, -3, -3, 5,
+ -2, -4, -1, -1, -1, -4, -3, -3, -1, 1, -1, -6 },
+ /*M*/ { -1, -2, -3, -4, -2, 0, -2, -4, -2, 1, 2, -2,
+ 6, 0, -3, -2, -1, -2, -2, 1, -3, -2, -1, -6 },
+ /*F*/ { -3, -4, -4, -4, -3, -4, -4, -4, -2, -1, 0, -4,
+ 0, 6, -4, -3, -2, 0, 3, -1, -4, -4, -2, -6 },
+ /*P*/ { -1, -2, -3, -2, -4, -2, -2, -3, -3, -4, -3, -1,
+ -3, -4, 8, -1, -2, -5, -4, -3, -2, -2, -2, -6 },
+ /*S*/ { 1, -1, 0, -1, -2, 0, 0, -1, -1, -3, -3, -1,
+ -2, -3, -1, 5, 1, -4, -2, -2, 0, 0, -1, -6 },
+ /*T*/ { 0, -1, 0, -1, -1, -1, -1, -2, -2, -1, -2, -1,
+ -1, -2, -2, 1, 5, -4, -2, 0, -1, -1, -1, -6 },
+ /*W*/ { -3, -4, -4, -6, -3, -3, -4, -4, -3, -3, -2, -4,
+ -2, 0, -5, -4, -4, 11, 2, -3, -5, -4, -3, -6 },
+ /*Y*/ { -2, -3, -3, -4, -3, -2, -3, -4, 2, -2, -2, -3,
+ -2, 3, -4, -2, -2, 2, 7, -2, -3, -3, -2, -6 },
+ /*V*/ { 0, -3, -4, -4, -1, -3, -3, -4, -4, 3, 1, -3,
+ 1, -1, -3, -2, 0, -3, -2, 4, -4, -3, -1, -6 },
+ /*B*/ { -2, -2, 4, 4, -4, 0, 1, -1, -1, -4, -4, -1,
+ -3, -4, -2, 0, -1, -5, -3, -4, 4, 0, -2, -6 },
+ /*Z*/ { -1, 0, 0, 1, -4, 3, 4, -3, 0, -4, -3, 1,
+ -2, -4, -2, 0, -1, -4, -3, -3, 0, 4, -1, -6 },
+ /*X*/ { -1, -1, -1, -2, -3, -1, -1, -2, -2, -2, -2, -1,
+ -1, -2, -2, -1, -1, -3, -2, -1, -2, -1, -1, -6 },
+ /***/ { -6, -6, -6, -6, -6, -6, -6, -6, -6, -6, -6, -6,
+ -6, -6, -6, -6, -6, -6, -6, -6, -6, -6, -6, 1 }
+};
+const SNCBIPackedScoreMatrix NCBISM_Blosum80 = {
+ "ARNDCQEGHILKMFPSTWYVBZX*",
+ s_Blosum80PSM[0],
+ -6
+};
diff --git a/util/tables/sm_pam30.c b/util/tables/sm_pam30.c
new file mode 100644
index 00000000..3b62f8ad
--- /dev/null
+++ b/util/tables/sm_pam30.c
@@ -0,0 +1,97 @@
+/* $Id: sm_pam30.c,v 1.1 2003/08/21 19:48:20 ucko Exp $
+* ===========================================================================
+*
+* PUBLIC DOMAIN NOTICE
+* National Center for Biotechnology Information
+*
+* This software/database is a "United States Government Work" under the
+* terms of the United States Copyright Act. It was written as part of
+* the author's official duties as a United States Government employee and
+* thus cannot be copyrighted. This software/database is freely available
+* to the public for use. The National Library of Medicine and the U.S.
+* Government have not placed any restriction on its use or reproduction.
+*
+* Although all reasonable efforts have been taken to ensure the accuracy
+* and reliability of the software and data, the NLM and the U.S.
+* Government do not and cannot warrant the performance or results that
+* may be obtained by using this software or data. The NLM and the U.S.
+* Government disclaim all warranties, express or implied, including
+* warranties of performance, merchantability or fitness for any particular
+* purpose.
+*
+* Please cite the author in any work or product based on this material.
+*
+* ===========================================================================
+*
+* Author: Aaron Ucko (via ./convert_scoremat.pl)
+*
+* File Description:
+* Protein alignment score matrices; shared between the two toolkits.
+*
+* ===========================================================================
+*/
+
+#include <util/tables/raw_scoremat.h>
+
+/* This matrix was produced by "pam" Version 1.0.6 [28-Jul-93] */
+/* PAM 30 substitution matrix, scale = ln(2)/2 = 0.346574 */
+/* Expected score = -5.06, Entropy = 2.57 bits */
+/* Lowest score = -17, Highest score = 13 */
+
+static const TNCBIScore s_Pam30PSM[24][24] = {
+ /* A, R, N, D, C, Q, E, G, H, I, L, K,
+ M, F, P, S, T, W, Y, V, B, Z, X, * */
+ /*A*/ { 6, -7, -4, -3, -6, -4, -2, -2, -7, -5, -6, -7,
+ -5, -8, -2, 0, -1,-13, -8, -2, -3, -3, -3,-17 },
+ /*R*/ { -7, 8, -6,-10, -8, -2, -9, -9, -2, -5, -8, 0,
+ -4, -9, -4, -3, -6, -2,-10, -8, -7, -4, -6,-17 },
+ /*N*/ { -4, -6, 8, 2,-11, -3, -2, -3, 0, -5, -7, -1,
+ -9, -9, -6, 0, -2, -8, -4, -8, 6, -3, -3,-17 },
+ /*D*/ { -3,-10, 2, 8,-14, -2, 2, -3, -4, -7,-12, -4,
+ -11,-15, -8, -4, -5,-15,-11, -8, 6, 1, -5,-17 },
+ /*C*/ { -6, -8,-11,-14, 10,-14,-14, -9, -7, -6,-15,-14,
+ -13,-13, -8, -3, -8,-15, -4, -6,-12,-14, -9,-17 },
+ /*Q*/ { -4, -2, -3, -2,-14, 8, 1, -7, 1, -8, -5, -3,
+ -4,-13, -3, -5, -5,-13,-12, -7, -3, 6, -5,-17 },
+ /*E*/ { -2, -9, -2, 2,-14, 1, 8, -4, -5, -5, -9, -4,
+ -7,-14, -5, -4, -6,-17, -8, -6, 1, 6, -5,-17 },
+ /*G*/ { -2, -9, -3, -3, -9, -7, -4, 6, -9,-11,-10, -7,
+ -8, -9, -6, -2, -6,-15,-14, -5, -3, -5, -5,-17 },
+ /*H*/ { -7, -2, 0, -4, -7, 1, -5, -9, 9, -9, -6, -6,
+ -10, -6, -4, -6, -7, -7, -3, -6, -1, -1, -5,-17 },
+ /*I*/ { -5, -5, -5, -7, -6, -8, -5,-11, -9, 8, -1, -6,
+ -1, -2, -8, -7, -2,-14, -6, 2, -6, -6, -5,-17 },
+ /*L*/ { -6, -8, -7,-12,-15, -5, -9,-10, -6, -1, 7, -8,
+ 1, -3, -7, -8, -7, -6, -7, -2, -9, -7, -6,-17 },
+ /*K*/ { -7, 0, -1, -4,-14, -3, -4, -7, -6, -6, -8, 7,
+ -2,-14, -6, -4, -3,-12, -9, -9, -2, -4, -5,-17 },
+ /*M*/ { -5, -4, -9,-11,-13, -4, -7, -8,-10, -1, 1, -2,
+ 11, -4, -8, -5, -4,-13,-11, -1,-10, -5, -5,-17 },
+ /*F*/ { -8, -9, -9,-15,-13,-13,-14, -9, -6, -2, -3,-14,
+ -4, 9,-10, -6, -9, -4, 2, -8,-10,-13, -8,-17 },
+ /*P*/ { -2, -4, -6, -8, -8, -3, -5, -6, -4, -8, -7, -6,
+ -8,-10, 8, -2, -4,-14,-13, -6, -7, -4, -5,-17 },
+ /*S*/ { 0, -3, 0, -4, -3, -5, -4, -2, -6, -7, -8, -4,
+ -5, -6, -2, 6, 0, -5, -7, -6, -1, -5, -3,-17 },
+ /*T*/ { -1, -6, -2, -5, -8, -5, -6, -6, -7, -2, -7, -3,
+ -4, -9, -4, 0, 7,-13, -6, -3, -3, -6, -4,-17 },
+ /*W*/ {-13, -2, -8,-15,-15,-13,-17,-15, -7,-14, -6,-12,
+ -13, -4,-14, -5,-13, 13, -5,-15,-10,-14,-11,-17 },
+ /*Y*/ { -8,-10, -4,-11, -4,-12, -8,-14, -3, -6, -7, -9,
+ -11, 2,-13, -7, -6, -5, 10, -7, -6, -9, -7,-17 },
+ /*V*/ { -2, -8, -8, -8, -6, -7, -6, -5, -6, 2, -2, -9,
+ -1, -8, -6, -6, -3,-15, -7, 7, -8, -6, -5,-17 },
+ /*B*/ { -3, -7, 6, 6,-12, -3, 1, -3, -1, -6, -9, -2,
+ -10,-10, -7, -1, -3,-10, -6, -8, 6, 0, -5,-17 },
+ /*Z*/ { -3, -4, -3, 1,-14, 6, 6, -5, -1, -6, -7, -4,
+ -5,-13, -4, -5, -6,-14, -9, -6, 0, 6, -5,-17 },
+ /*X*/ { -3, -6, -3, -5, -9, -5, -5, -5, -5, -5, -6, -5,
+ -5, -8, -5, -3, -4,-11, -7, -5, -5, -5, -5,-17 },
+ /***/ {-17,-17,-17,-17,-17,-17,-17,-17,-17,-17,-17,-17,
+ -17,-17,-17,-17,-17,-17,-17,-17,-17,-17,-17, 1 }
+};
+const SNCBIPackedScoreMatrix NCBISM_Pam30 = {
+ "ARNDCQEGHILKMFPSTWYVBZX*",
+ s_Pam30PSM[0],
+ -17
+};
diff --git a/util/tables/sm_pam70.c b/util/tables/sm_pam70.c
new file mode 100644
index 00000000..900398b0
--- /dev/null
+++ b/util/tables/sm_pam70.c
@@ -0,0 +1,97 @@
+/* $Id: sm_pam70.c,v 1.1 2003/08/21 19:48:20 ucko Exp $
+* ===========================================================================
+*
+* PUBLIC DOMAIN NOTICE
+* National Center for Biotechnology Information
+*
+* This software/database is a "United States Government Work" under the
+* terms of the United States Copyright Act. It was written as part of
+* the author's official duties as a United States Government employee and
+* thus cannot be copyrighted. This software/database is freely available
+* to the public for use. The National Library of Medicine and the U.S.
+* Government have not placed any restriction on its use or reproduction.
+*
+* Although all reasonable efforts have been taken to ensure the accuracy
+* and reliability of the software and data, the NLM and the U.S.
+* Government do not and cannot warrant the performance or results that
+* may be obtained by using this software or data. The NLM and the U.S.
+* Government disclaim all warranties, express or implied, including
+* warranties of performance, merchantability or fitness for any particular
+* purpose.
+*
+* Please cite the author in any work or product based on this material.
+*
+* ===========================================================================
+*
+* Author: Aaron Ucko (via ./convert_scoremat.pl)
+*
+* File Description:
+* Protein alignment score matrices; shared between the two toolkits.
+*
+* ===========================================================================
+*/
+
+#include <util/tables/raw_scoremat.h>
+
+/* This matrix was produced by "pam" Version 1.0.6 [28-Jul-93] */
+/* PAM 70 substitution matrix, scale = ln(2)/2 = 0.346574 */
+/* Expected score = -2.77, Entropy = 1.60 bits */
+/* Lowest score = -11, Highest score = 13 */
+
+static const TNCBIScore s_Pam70PSM[24][24] = {
+ /* A, R, N, D, C, Q, E, G, H, I, L, K,
+ M, F, P, S, T, W, Y, V, B, Z, X, * */
+ /*A*/ { 5, -4, -2, -1, -4, -2, -1, 0, -4, -2, -4, -4,
+ -3, -6, 0, 1, 1, -9, -5, -1, -1, -1, -2,-11 },
+ /*R*/ { -4, 8, -3, -6, -5, 0, -5, -6, 0, -3, -6, 2,
+ -2, -7, -2, -1, -4, 0, -7, -5, -4, -2, -3,-11 },
+ /*N*/ { -2, -3, 6, 3, -7, -1, 0, -1, 1, -3, -5, 0,
+ -5, -6, -3, 1, 0, -6, -3, -5, 5, -1, -2,-11 },
+ /*D*/ { -1, -6, 3, 6, -9, 0, 3, -1, -1, -5, -8, -2,
+ -7,-10, -4, -1, -2,-10, -7, -5, 5, 2, -3,-11 },
+ /*C*/ { -4, -5, -7, -9, 9, -9, -9, -6, -5, -4,-10, -9,
+ -9, -8, -5, -1, -5,-11, -2, -4, -8, -9, -6,-11 },
+ /*Q*/ { -2, 0, -1, 0, -9, 7, 2, -4, 2, -5, -3, -1,
+ -2, -9, -1, -3, -3, -8, -8, -4, -1, 5, -2,-11 },
+ /*E*/ { -1, -5, 0, 3, -9, 2, 6, -2, -2, -4, -6, -2,
+ -4, -9, -3, -2, -3,-11, -6, -4, 2, 5, -3,-11 },
+ /*G*/ { 0, -6, -1, -1, -6, -4, -2, 6, -6, -6, -7, -5,
+ -6, -7, -3, 0, -3,-10, -9, -3, -1, -3, -3,-11 },
+ /*H*/ { -4, 0, 1, -1, -5, 2, -2, -6, 8, -6, -4, -3,
+ -6, -4, -2, -3, -4, -5, -1, -4, 0, 1, -3,-11 },
+ /*I*/ { -2, -3, -3, -5, -4, -5, -4, -6, -6, 7, 1, -4,
+ 1, 0, -5, -4, -1, -9, -4, 3, -4, -4, -3,-11 },
+ /*L*/ { -4, -6, -5, -8,-10, -3, -6, -7, -4, 1, 6, -5,
+ 2, -1, -5, -6, -4, -4, -4, 0, -6, -4, -4,-11 },
+ /*K*/ { -4, 2, 0, -2, -9, -1, -2, -5, -3, -4, -5, 6,
+ 0, -9, -4, -2, -1, -7, -7, -6, -1, -2, -3,-11 },
+ /*M*/ { -3, -2, -5, -7, -9, -2, -4, -6, -6, 1, 2, 0,
+ 10, -2, -5, -3, -2, -8, -7, 0, -6, -3, -3,-11 },
+ /*F*/ { -6, -7, -6,-10, -8, -9, -9, -7, -4, 0, -1, -9,
+ -2, 8, -7, -4, -6, -2, 4, -5, -7, -9, -5,-11 },
+ /*P*/ { 0, -2, -3, -4, -5, -1, -3, -3, -2, -5, -5, -4,
+ -5, -7, 7, 0, -2, -9, -9, -3, -4, -2, -3,-11 },
+ /*S*/ { 1, -1, 1, -1, -1, -3, -2, 0, -3, -4, -6, -2,
+ -3, -4, 0, 5, 2, -3, -5, -3, 0, -2, -1,-11 },
+ /*T*/ { 1, -4, 0, -2, -5, -3, -3, -3, -4, -1, -4, -1,
+ -2, -6, -2, 2, 6, -8, -4, -1, -1, -3, -2,-11 },
+ /*W*/ { -9, 0, -6,-10,-11, -8,-11,-10, -5, -9, -4, -7,
+ -8, -2, -9, -3, -8, 13, -3,-10, -7,-10, -7,-11 },
+ /*Y*/ { -5, -7, -3, -7, -2, -8, -6, -9, -1, -4, -4, -7,
+ -7, 4, -9, -5, -4, -3, 9, -5, -4, -7, -5,-11 },
+ /*V*/ { -1, -5, -5, -5, -4, -4, -4, -3, -4, 3, 0, -6,
+ 0, -5, -3, -3, -1,-10, -5, 6, -5, -4, -2,-11 },
+ /*B*/ { -1, -4, 5, 5, -8, -1, 2, -1, 0, -4, -6, -1,
+ -6, -7, -4, 0, -1, -7, -4, -5, 5, 1, -2,-11 },
+ /*Z*/ { -1, -2, -1, 2, -9, 5, 5, -3, 1, -4, -4, -2,
+ -3, -9, -2, -2, -3,-10, -7, -4, 1, 5, -3,-11 },
+ /*X*/ { -2, -3, -2, -3, -6, -2, -3, -3, -3, -3, -4, -3,
+ -3, -5, -3, -1, -2, -7, -5, -2, -2, -3, -3,-11 },
+ /***/ {-11,-11,-11,-11,-11,-11,-11,-11,-11,-11,-11,-11,
+ -11,-11,-11,-11,-11,-11,-11,-11,-11,-11,-11, 1 }
+};
+const SNCBIPackedScoreMatrix NCBISM_Pam70 = {
+ "ARNDCQEGHILKMFPSTWYVBZX*",
+ s_Pam70PSM[0],
+ -11
+};
diff --git a/util/tables/tables_export.h b/util/tables/tables_export.h
new file mode 100644
index 00000000..6fdf43c4
--- /dev/null
+++ b/util/tables/tables_export.h
@@ -0,0 +1,57 @@
+#ifndef UTIL_TABLES___TABLES_EXPORT__H
+#define UTIL_TABLES___TABLES_EXPORT__H
+
+/* $Id: tables_export.h,v 1.1 2003/08/21 19:50:51 ucko Exp $
+ * ===========================================================================
+ *
+ * PUBLIC DOMAIN NOTICE
+ * National Center for Biotechnology Information
+ *
+ * This software/database is a "United States Government Work" under the
+ * terms of the United States Copyright Act. It was written as part of
+ * the author's official duties as a United States Government employee and
+ * thus cannot be copyrighted. This software/database is freely available
+ * to the public for use. The National Library of Medicine and the U.S.
+ * Government have not placed any restriction on its use or reproduction.
+ *
+ * Although all reasonable efforts have been taken to ensure the accuracy
+ * and reliability of the software and data, the NLM and the U.S.
+ * Government do not and cannot warrant the performance or results that
+ * may be obtained by using this software or data. The NLM and the U.S.
+ * Government disclaim all warranties, express or implied, including
+ * warranties of performance, merchantability or fitness for any particular
+ * purpose.
+ *
+ * Please cite the author in any work or product based on this material.
+ *
+ * ===========================================================================
+ *
+ * Author: Anatoliy Kuznetsov, Mike DiCuccio
+ *
+ * File Description:
+ * Defines to provide correct exporting from TABLES DLL in Windows.
+ * These are necessary to compile DLLs with Visual C++ - exports must be
+ * explicitly labeled as such.
+ */
+
+
+/*
+ * NULL operations for other cases (C Toolkit)
+ */
+
+# define NCBI_TABLES_EXPORT
+
+
+
+/*
+ * ==========================================================================
+ *
+ * $Log: tables_export.h,v $
+ * Revision 1.1 2003/08/21 19:50:51 ucko
+ * Add C-Toolkit-specific export setup for tables library
+ *
+ *
+ * ==========================================================================
+ */
+
+#endif /* UTIL_TABLES___TABLES_EXPORT__H */