diff options
author | Aaron M. Ucko <ucko@debian.org> | 2005-03-23 23:49:09 +0000 |
---|---|---|
committer | Aaron M. Ucko <ucko@debian.org> | 2005-03-23 23:49:09 +0000 |
commit | 5349ec8772bc373e4c2349a04e57d7952c006326 (patch) | |
tree | b733fe1df8c0c2d2418b3ce82ebcbd3d0db12a1f /util | |
parent | 0eff2d00595b4adcf6f1c4e6bbbcf0f416c70310 (diff) |
Load ncbi (6.1.20031028) into ncbi-tools6/branches/upstream/current.
Diffstat (limited to 'util')
-rw-r--r-- | util/tables/raw_scoremat.c | 122 | ||||
-rw-r--r-- | util/tables/raw_scoremat.h | 95 | ||||
-rw-r--r-- | util/tables/sm_blosum45.c | 99 | ||||
-rw-r--r-- | util/tables/sm_blosum62.c | 99 | ||||
-rw-r--r-- | util/tables/sm_blosum80.c | 99 | ||||
-rw-r--r-- | util/tables/sm_pam30.c | 97 | ||||
-rw-r--r-- | util/tables/sm_pam70.c | 97 | ||||
-rw-r--r-- | util/tables/tables_export.h | 57 |
8 files changed, 765 insertions, 0 deletions
diff --git a/util/tables/raw_scoremat.c b/util/tables/raw_scoremat.c new file mode 100644 index 00000000..d0c4e47d --- /dev/null +++ b/util/tables/raw_scoremat.c @@ -0,0 +1,122 @@ +/* $Id: raw_scoremat.c,v 1.2 2003/10/02 15:37:34 ivanov Exp $ + * =========================================================================== + * + * PUBLIC DOMAIN NOTICE + * National Center for Biotechnology Information + * + * This software/database is a "United States Government Work" under the + * terms of the United States Copyright Act. It was written as part of + * the author's official duties as a United States Government employee and + * thus cannot be copyrighted. This software/database is freely available + * to the public for use. The National Library of Medicine and the U.S. + * Government have not placed any restriction on its use or reproduction. + * + * Although all reasonable efforts have been taken to ensure the accuracy + * and reliability of the software and data, the NLM and the U.S. + * Government do not and cannot warrant the performance or results that + * may be obtained by using this software or data. The NLM and the U.S. + * Government disclaim all warranties, express or implied, including + * warranties of performance, merchantability or fitness for any particular + * purpose. + * + * Please cite the author in any work or product based on this material. + * + * =========================================================================== + * + * Author: Aaron Ucko + * + * File Description: + * Protein alignment score matrices; shared between the two toolkits. + * + */ + +#include <util/tables/raw_scoremat.h> + +#include <ctype.h> +#include <string.h> + +#include "sm_blosum45.c" +#include "sm_blosum62.c" +#include "sm_blosum80.c" +#include "sm_pam30.c" +#include "sm_pam70.c" + +static const char kNCBIstdaa[] = "-ABCDEFGHIKLMNPQRSTVWXYZU*"; + + +int NCBISM_GetIndex(const SNCBIPackedScoreMatrix* sm, int aa) +{ + const char *p; + + /* Translate to NCBIeaa */ + if (aa >= 0 && aa < sizeof(kNCBIstdaa)) { + aa = kNCBIstdaa[aa]; + } else if (islower(aa)) { + aa = toupper(aa); + } + + p = strchr(sm->symbols, aa); + return p ? p - sm->symbols : -1; +} + + +TNCBIScore NCBISM_GetScore(const SNCBIPackedScoreMatrix* sm, + int aa1, int aa2) +{ + int i1, i2; + i1 = NCBISM_GetIndex(sm, aa1); + i2 = NCBISM_GetIndex(sm, aa2); + if (i1 >=0 && i2 >= 0) { + return sm->scores[i1 * strlen(sm->symbols) + i2]; + } else { + return sm->defscore; + } +} + + +void NCBISM_Unpack(const SNCBIPackedScoreMatrix* psm, + SNCBIFullScoreMatrix* fsm) +{ + const char* sym; + int dim, i, j, aa1, aa2; + + sym = psm->symbols; + dim = strlen(sym); + /* fill with default */ + memset(&fsm->s, psm->defscore, NCBI_FSM_DIM * NCBI_FSM_DIM); + for (i = 0; i < dim; ++i) { + aa1 = sym[i]; + /* get core (NCBIeaa x NCBIeaa) */ + for (j = 0; j < dim; ++j) { + aa2 = sym[j]; + fsm->s[aa1][aa2] = psm->scores[i * dim + j]; + } + /* extend horizontally */ + for (aa2 = 0; aa2 < sizeof(kNCBIstdaa); ++aa2) { + fsm->s[aa1][aa2] = fsm->s[aa1][(int)kNCBIstdaa[aa2]]; + } + for (aa2 = 'a'; aa2 <= 'z'; ++aa2) { + fsm->s[aa1][aa2] = fsm->s[aa1][toupper(aa2)]; + } + } + /* extend vertically */ + for (aa1 = 0; aa1 < sizeof(kNCBIstdaa); ++aa1) { + memcpy(fsm->s[aa1], fsm->s[(int)kNCBIstdaa[aa1]], NCBI_FSM_DIM); + } + for (aa1 = 'a'; aa1 <= 'z'; ++aa1) { + memcpy(fsm->s[aa1], fsm->s[toupper(aa1)], NCBI_FSM_DIM); + } +} + + +/* + * =========================================================================== + * $Log: raw_scoremat.c,v $ + * Revision 1.2 2003/10/02 15:37:34 ivanov + * Get rid of compilation warnings + * + * Revision 1.1 2003/08/21 19:48:20 ucko + * Add tables library (shared with C) for raw score matrices, etc. + * + * =========================================================================== + */ diff --git a/util/tables/raw_scoremat.h b/util/tables/raw_scoremat.h new file mode 100644 index 00000000..434e4a0a --- /dev/null +++ b/util/tables/raw_scoremat.h @@ -0,0 +1,95 @@ +#ifndef UTIL_TABLES___SCOREMAT__H +#define UTIL_TABLES___SCOREMAT__H + +/* $Id: raw_scoremat.h,v 1.1 2003/08/21 19:48:19 ucko Exp $ + * =========================================================================== + * + * PUBLIC DOMAIN NOTICE + * National Center for Biotechnology Information + * + * This software/database is a "United States Government Work" under the + * terms of the United States Copyright Act. It was written as part of + * the author's official duties as a United States Government employee and + * thus cannot be copyrighted. This software/database is freely available + * to the public for use. The National Library of Medicine and the U.S. + * Government have not placed any restriction on its use or reproduction. + * + * Although all reasonable efforts have been taken to ensure the accuracy + * and reliability of the software and data, the NLM and the U.S. + * Government do not and cannot warrant the performance or results that + * may be obtained by using this software or data. The NLM and the U.S. + * Government disclaim all warranties, express or implied, including + * warranties of performance, merchantability or fitness for any particular + * purpose. + * + * Please cite the author in any work or product based on this material. + * + * =========================================================================== + * + * Author: Aaron Ucko + * + */ + +/** @file scoremat.h + ** Protein alignment score matrices; shared between the two toolkits. + **/ + +#include <util/tables/tables_export.h> + +#ifdef __cplusplus +extern "C" { +#endif + +/** data types */ + +typedef signed char TNCBIScore; +typedef struct SNCBIPackedScoreMatrix { + const char* symbols; /**< order of residues */ + const TNCBIScore* scores; /**< strlen(symbols) x strlen(symbols) */ + TNCBIScore defscore; /**< score for unknown residues */ +} SNCBIPackedScoreMatrix; + +/** These two functions aren't very fast, but avoid the memory and time + ** overhead of unpacking. + ** Residues (aa, aa1, aa2) may be either NCBIstdaa, NCBIeaa, or + ** lowercase NCBIeaa, though matrices generally don't cover U. + **/ +extern NCBI_TABLES_EXPORT +int NCBISM_GetIndex(const SNCBIPackedScoreMatrix* sm, int aa); +extern NCBI_TABLES_EXPORT +TNCBIScore NCBISM_GetScore(const SNCBIPackedScoreMatrix* sm, + int aa1, int aa2); + +/** Recommended approach: unpack and index directly. */ +#define NCBI_FSM_DIM 128 +typedef struct SNCBIFullScoreMatrix { + TNCBIScore s[NCBI_FSM_DIM][NCBI_FSM_DIM]; +} SNCBIFullScoreMatrix; + +extern NCBI_TABLES_EXPORT +void NCBISM_Unpack(const SNCBIPackedScoreMatrix* psm, + SNCBIFullScoreMatrix* fsm); + +/** The standard matrices. */ +extern NCBI_TABLES_EXPORT const SNCBIPackedScoreMatrix NCBISM_Blosum45; +extern NCBI_TABLES_EXPORT const SNCBIPackedScoreMatrix NCBISM_Blosum62; +extern NCBI_TABLES_EXPORT const SNCBIPackedScoreMatrix NCBISM_Blosum80; +extern NCBI_TABLES_EXPORT const SNCBIPackedScoreMatrix NCBISM_Pam30; +extern NCBI_TABLES_EXPORT const SNCBIPackedScoreMatrix NCBISM_Pam70; + +#ifdef __cplusplus +} +#endif + +/* +* =========================================================================== +* +* $Log: raw_scoremat.h,v $ +* Revision 1.1 2003/08/21 19:48:19 ucko +* Add tables library (shared with C) for raw score matrices, etc. +* +* +* =========================================================================== +*/ + +#endif /* UTIL_TABLES___SCOREMAT__H */ diff --git a/util/tables/sm_blosum45.c b/util/tables/sm_blosum45.c new file mode 100644 index 00000000..bfa42b59 --- /dev/null +++ b/util/tables/sm_blosum45.c @@ -0,0 +1,99 @@ +/* $Id: sm_blosum45.c,v 1.1 2003/08/21 19:48:20 ucko Exp $ +* =========================================================================== +* +* PUBLIC DOMAIN NOTICE +* National Center for Biotechnology Information +* +* This software/database is a "United States Government Work" under the +* terms of the United States Copyright Act. It was written as part of +* the author's official duties as a United States Government employee and +* thus cannot be copyrighted. This software/database is freely available +* to the public for use. The National Library of Medicine and the U.S. +* Government have not placed any restriction on its use or reproduction. +* +* Although all reasonable efforts have been taken to ensure the accuracy +* and reliability of the software and data, the NLM and the U.S. +* Government do not and cannot warrant the performance or results that +* may be obtained by using this software or data. The NLM and the U.S. +* Government disclaim all warranties, express or implied, including +* warranties of performance, merchantability or fitness for any particular +* purpose. +* +* Please cite the author in any work or product based on this material. +* +* =========================================================================== +* +* Author: Aaron Ucko (via ./convert_scoremat.pl) +* +* File Description: +* Protein alignment score matrices; shared between the two toolkits. +* +* =========================================================================== +*/ + +#include <util/tables/raw_scoremat.h> + +/* Matrix made by matblas from blosum45.iij */ +/* * column uses minimum score */ +/* BLOSUM Clustered Scoring Matrix in 1/3 Bit Units */ +/* Blocks Database = /data/blocks_5.0/blocks.dat */ +/* Cluster Percentage: >= 45 */ +/* Entropy = 0.3795, Expected = -0.2789 */ + +static const TNCBIScore s_Blosum45PSM[24][24] = { + /* A, R, N, D, C, Q, E, G, H, I, L, K, + M, F, P, S, T, W, Y, V, B, Z, X, * */ + /*A*/ { 5, -2, -1, -2, -1, -1, -1, 0, -2, -1, -1, -1, + -1, -2, -1, 1, 0, -2, -2, 0, -1, -1, 0, -5 }, + /*R*/ { -2, 7, 0, -1, -3, 1, 0, -2, 0, -3, -2, 3, + -1, -2, -2, -1, -1, -2, -1, -2, -1, 0, -1, -5 }, + /*N*/ { -1, 0, 6, 2, -2, 0, 0, 0, 1, -2, -3, 0, + -2, -2, -2, 1, 0, -4, -2, -3, 4, 0, -1, -5 }, + /*D*/ { -2, -1, 2, 7, -3, 0, 2, -1, 0, -4, -3, 0, + -3, -4, -1, 0, -1, -4, -2, -3, 5, 1, -1, -5 }, + /*C*/ { -1, -3, -2, -3, 12, -3, -3, -3, -3, -3, -2, -3, + -2, -2, -4, -1, -1, -5, -3, -1, -2, -3, -2, -5 }, + /*Q*/ { -1, 1, 0, 0, -3, 6, 2, -2, 1, -2, -2, 1, + 0, -4, -1, 0, -1, -2, -1, -3, 0, 4, -1, -5 }, + /*E*/ { -1, 0, 0, 2, -3, 2, 6, -2, 0, -3, -2, 1, + -2, -3, 0, 0, -1, -3, -2, -3, 1, 4, -1, -5 }, + /*G*/ { 0, -2, 0, -1, -3, -2, -2, 7, -2, -4, -3, -2, + -2, -3, -2, 0, -2, -2, -3, -3, -1, -2, -1, -5 }, + /*H*/ { -2, 0, 1, 0, -3, 1, 0, -2, 10, -3, -2, -1, + 0, -2, -2, -1, -2, -3, 2, -3, 0, 0, -1, -5 }, + /*I*/ { -1, -3, -2, -4, -3, -2, -3, -4, -3, 5, 2, -3, + 2, 0, -2, -2, -1, -2, 0, 3, -3, -3, -1, -5 }, + /*L*/ { -1, -2, -3, -3, -2, -2, -2, -3, -2, 2, 5, -3, + 2, 1, -3, -3, -1, -2, 0, 1, -3, -2, -1, -5 }, + /*K*/ { -1, 3, 0, 0, -3, 1, 1, -2, -1, -3, -3, 5, + -1, -3, -1, -1, -1, -2, -1, -2, 0, 1, -1, -5 }, + /*M*/ { -1, -1, -2, -3, -2, 0, -2, -2, 0, 2, 2, -1, + 6, 0, -2, -2, -1, -2, 0, 1, -2, -1, -1, -5 }, + /*F*/ { -2, -2, -2, -4, -2, -4, -3, -3, -2, 0, 1, -3, + 0, 8, -3, -2, -1, 1, 3, 0, -3, -3, -1, -5 }, + /*P*/ { -1, -2, -2, -1, -4, -1, 0, -2, -2, -2, -3, -1, + -2, -3, 9, -1, -1, -3, -3, -3, -2, -1, -1, -5 }, + /*S*/ { 1, -1, 1, 0, -1, 0, 0, 0, -1, -2, -3, -1, + -2, -2, -1, 4, 2, -4, -2, -1, 0, 0, 0, -5 }, + /*T*/ { 0, -1, 0, -1, -1, -1, -1, -2, -2, -1, -1, -1, + -1, -1, -1, 2, 5, -3, -1, 0, 0, -1, 0, -5 }, + /*W*/ { -2, -2, -4, -4, -5, -2, -3, -2, -3, -2, -2, -2, + -2, 1, -3, -4, -3, 15, 3, -3, -4, -2, -2, -5 }, + /*Y*/ { -2, -1, -2, -2, -3, -1, -2, -3, 2, 0, 0, -1, + 0, 3, -3, -2, -1, 3, 8, -1, -2, -2, -1, -5 }, + /*V*/ { 0, -2, -3, -3, -1, -3, -3, -3, -3, 3, 1, -2, + 1, 0, -3, -1, 0, -3, -1, 5, -3, -3, -1, -5 }, + /*B*/ { -1, -1, 4, 5, -2, 0, 1, -1, 0, -3, -3, 0, + -2, -3, -2, 0, 0, -4, -2, -3, 4, 2, -1, -5 }, + /*Z*/ { -1, 0, 0, 1, -3, 4, 4, -2, 0, -3, -2, 1, + -1, -3, -1, 0, -1, -2, -2, -3, 2, 4, -1, -5 }, + /*X*/ { 0, -1, -1, -1, -2, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, 0, 0, -2, -1, -1, -1, -1, -1, -5 }, + /***/ { -5, -5, -5, -5, -5, -5, -5, -5, -5, -5, -5, -5, + -5, -5, -5, -5, -5, -5, -5, -5, -5, -5, -5, 1 } +}; +const SNCBIPackedScoreMatrix NCBISM_Blosum45 = { + "ARNDCQEGHILKMFPSTWYVBZX*", + s_Blosum45PSM[0], + -5 +}; diff --git a/util/tables/sm_blosum62.c b/util/tables/sm_blosum62.c new file mode 100644 index 00000000..3f356def --- /dev/null +++ b/util/tables/sm_blosum62.c @@ -0,0 +1,99 @@ +/* $Id: sm_blosum62.c,v 1.1 2003/08/21 19:48:20 ucko Exp $ +* =========================================================================== +* +* PUBLIC DOMAIN NOTICE +* National Center for Biotechnology Information +* +* This software/database is a "United States Government Work" under the +* terms of the United States Copyright Act. It was written as part of +* the author's official duties as a United States Government employee and +* thus cannot be copyrighted. This software/database is freely available +* to the public for use. The National Library of Medicine and the U.S. +* Government have not placed any restriction on its use or reproduction. +* +* Although all reasonable efforts have been taken to ensure the accuracy +* and reliability of the software and data, the NLM and the U.S. +* Government do not and cannot warrant the performance or results that +* may be obtained by using this software or data. The NLM and the U.S. +* Government disclaim all warranties, express or implied, including +* warranties of performance, merchantability or fitness for any particular +* purpose. +* +* Please cite the author in any work or product based on this material. +* +* =========================================================================== +* +* Author: Aaron Ucko (via ./convert_scoremat.pl) +* +* File Description: +* Protein alignment score matrices; shared between the two toolkits. +* +* =========================================================================== +*/ + +#include <util/tables/raw_scoremat.h> + +/* Matrix made by matblas from blosum62.iij */ +/* * column uses minimum score */ +/* BLOSUM Clustered Scoring Matrix in 1/2 Bit Units */ +/* Blocks Database = /data/blocks_5.0/blocks.dat */ +/* Cluster Percentage: >= 62 */ +/* Entropy = 0.6979, Expected = -0.5209 */ + +static const TNCBIScore s_Blosum62PSM[24][24] = { + /* A, R, N, D, C, Q, E, G, H, I, L, K, + M, F, P, S, T, W, Y, V, B, Z, X, * */ + /*A*/ { 4, -1, -2, -2, 0, -1, -1, 0, -2, -1, -1, -1, + -1, -2, -1, 1, 0, -3, -2, 0, -2, -1, 0, -4 }, + /*R*/ { -1, 5, 0, -2, -3, 1, 0, -2, 0, -3, -2, 2, + -1, -3, -2, -1, -1, -3, -2, -3, -1, 0, -1, -4 }, + /*N*/ { -2, 0, 6, 1, -3, 0, 0, 0, 1, -3, -3, 0, + -2, -3, -2, 1, 0, -4, -2, -3, 3, 0, -1, -4 }, + /*D*/ { -2, -2, 1, 6, -3, 0, 2, -1, -1, -3, -4, -1, + -3, -3, -1, 0, -1, -4, -3, -3, 4, 1, -1, -4 }, + /*C*/ { 0, -3, -3, -3, 9, -3, -4, -3, -3, -1, -1, -3, + -1, -2, -3, -1, -1, -2, -2, -1, -3, -3, -2, -4 }, + /*Q*/ { -1, 1, 0, 0, -3, 5, 2, -2, 0, -3, -2, 1, + 0, -3, -1, 0, -1, -2, -1, -2, 0, 3, -1, -4 }, + /*E*/ { -1, 0, 0, 2, -4, 2, 5, -2, 0, -3, -3, 1, + -2, -3, -1, 0, -1, -3, -2, -2, 1, 4, -1, -4 }, + /*G*/ { 0, -2, 0, -1, -3, -2, -2, 6, -2, -4, -4, -2, + -3, -3, -2, 0, -2, -2, -3, -3, -1, -2, -1, -4 }, + /*H*/ { -2, 0, 1, -1, -3, 0, 0, -2, 8, -3, -3, -1, + -2, -1, -2, -1, -2, -2, 2, -3, 0, 0, -1, -4 }, + /*I*/ { -1, -3, -3, -3, -1, -3, -3, -4, -3, 4, 2, -3, + 1, 0, -3, -2, -1, -3, -1, 3, -3, -3, -1, -4 }, + /*L*/ { -1, -2, -3, -4, -1, -2, -3, -4, -3, 2, 4, -2, + 2, 0, -3, -2, -1, -2, -1, 1, -4, -3, -1, -4 }, + /*K*/ { -1, 2, 0, -1, -3, 1, 1, -2, -1, -3, -2, 5, + -1, -3, -1, 0, -1, -3, -2, -2, 0, 1, -1, -4 }, + /*M*/ { -1, -1, -2, -3, -1, 0, -2, -3, -2, 1, 2, -1, + 5, 0, -2, -1, -1, -1, -1, 1, -3, -1, -1, -4 }, + /*F*/ { -2, -3, -3, -3, -2, -3, -3, -3, -1, 0, 0, -3, + 0, 6, -4, -2, -2, 1, 3, -1, -3, -3, -1, -4 }, + /*P*/ { -1, -2, -2, -1, -3, -1, -1, -2, -2, -3, -3, -1, + -2, -4, 7, -1, -1, -4, -3, -2, -2, -1, -2, -4 }, + /*S*/ { 1, -1, 1, 0, -1, 0, 0, 0, -1, -2, -2, 0, + -1, -2, -1, 4, 1, -3, -2, -2, 0, 0, 0, -4 }, + /*T*/ { 0, -1, 0, -1, -1, -1, -1, -2, -2, -1, -1, -1, + -1, -2, -1, 1, 5, -2, -2, 0, -1, -1, 0, -4 }, + /*W*/ { -3, -3, -4, -4, -2, -2, -3, -2, -2, -3, -2, -3, + -1, 1, -4, -3, -2, 11, 2, -3, -4, -3, -2, -4 }, + /*Y*/ { -2, -2, -2, -3, -2, -1, -2, -3, 2, -1, -1, -2, + -1, 3, -3, -2, -2, 2, 7, -1, -3, -2, -1, -4 }, + /*V*/ { 0, -3, -3, -3, -1, -2, -2, -3, -3, 3, 1, -2, + 1, -1, -2, -2, 0, -3, -1, 4, -3, -2, -1, -4 }, + /*B*/ { -2, -1, 3, 4, -3, 0, 1, -1, 0, -3, -4, 0, + -3, -3, -2, 0, -1, -4, -3, -3, 4, 1, -1, -4 }, + /*Z*/ { -1, 0, 0, 1, -3, 3, 4, -2, 0, -3, -3, 1, + -1, -3, -1, 0, -1, -3, -2, -2, 1, 4, -1, -4 }, + /*X*/ { 0, -1, -1, -1, -2, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -2, 0, 0, -2, -1, -1, -1, -1, -1, -4 }, + /***/ { -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, + -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, 1 } +}; +const SNCBIPackedScoreMatrix NCBISM_Blosum62 = { + "ARNDCQEGHILKMFPSTWYVBZX*", + s_Blosum62PSM[0], + -4 +}; diff --git a/util/tables/sm_blosum80.c b/util/tables/sm_blosum80.c new file mode 100644 index 00000000..dad0d652 --- /dev/null +++ b/util/tables/sm_blosum80.c @@ -0,0 +1,99 @@ +/* $Id: sm_blosum80.c,v 1.1 2003/08/21 19:48:20 ucko Exp $ +* =========================================================================== +* +* PUBLIC DOMAIN NOTICE +* National Center for Biotechnology Information +* +* This software/database is a "United States Government Work" under the +* terms of the United States Copyright Act. It was written as part of +* the author's official duties as a United States Government employee and +* thus cannot be copyrighted. This software/database is freely available +* to the public for use. The National Library of Medicine and the U.S. +* Government have not placed any restriction on its use or reproduction. +* +* Although all reasonable efforts have been taken to ensure the accuracy +* and reliability of the software and data, the NLM and the U.S. +* Government do not and cannot warrant the performance or results that +* may be obtained by using this software or data. The NLM and the U.S. +* Government disclaim all warranties, express or implied, including +* warranties of performance, merchantability or fitness for any particular +* purpose. +* +* Please cite the author in any work or product based on this material. +* +* =========================================================================== +* +* Author: Aaron Ucko (via ./convert_scoremat.pl) +* +* File Description: +* Protein alignment score matrices; shared between the two toolkits. +* +* =========================================================================== +*/ + +#include <util/tables/raw_scoremat.h> + +/* Matrix made by matblas from blosum80.iij */ +/* * column uses minimum score */ +/* BLOSUM Clustered Scoring Matrix in 1/2 Bit Units */ +/* Blocks Database = /data/blocks_5.0/blocks.dat */ +/* Cluster Percentage: >= 80 */ +/* Entropy = 0.9868, Expected = -0.7442 */ + +static const TNCBIScore s_Blosum80PSM[24][24] = { + /* A, R, N, D, C, Q, E, G, H, I, L, K, + M, F, P, S, T, W, Y, V, B, Z, X, * */ + /*A*/ { 5, -2, -2, -2, -1, -1, -1, 0, -2, -2, -2, -1, + -1, -3, -1, 1, 0, -3, -2, 0, -2, -1, -1, -6 }, + /*R*/ { -2, 6, -1, -2, -4, 1, -1, -3, 0, -3, -3, 2, + -2, -4, -2, -1, -1, -4, -3, -3, -2, 0, -1, -6 }, + /*N*/ { -2, -1, 6, 1, -3, 0, -1, -1, 0, -4, -4, 0, + -3, -4, -3, 0, 0, -4, -3, -4, 4, 0, -1, -6 }, + /*D*/ { -2, -2, 1, 6, -4, -1, 1, -2, -2, -4, -5, -1, + -4, -4, -2, -1, -1, -6, -4, -4, 4, 1, -2, -6 }, + /*C*/ { -1, -4, -3, -4, 9, -4, -5, -4, -4, -2, -2, -4, + -2, -3, -4, -2, -1, -3, -3, -1, -4, -4, -3, -6 }, + /*Q*/ { -1, 1, 0, -1, -4, 6, 2, -2, 1, -3, -3, 1, + 0, -4, -2, 0, -1, -3, -2, -3, 0, 3, -1, -6 }, + /*E*/ { -1, -1, -1, 1, -5, 2, 6, -3, 0, -4, -4, 1, + -2, -4, -2, 0, -1, -4, -3, -3, 1, 4, -1, -6 }, + /*G*/ { 0, -3, -1, -2, -4, -2, -3, 6, -3, -5, -4, -2, + -4, -4, -3, -1, -2, -4, -4, -4, -1, -3, -2, -6 }, + /*H*/ { -2, 0, 0, -2, -4, 1, 0, -3, 8, -4, -3, -1, + -2, -2, -3, -1, -2, -3, 2, -4, -1, 0, -2, -6 }, + /*I*/ { -2, -3, -4, -4, -2, -3, -4, -5, -4, 5, 1, -3, + 1, -1, -4, -3, -1, -3, -2, 3, -4, -4, -2, -6 }, + /*L*/ { -2, -3, -4, -5, -2, -3, -4, -4, -3, 1, 4, -3, + 2, 0, -3, -3, -2, -2, -2, 1, -4, -3, -2, -6 }, + /*K*/ { -1, 2, 0, -1, -4, 1, 1, -2, -1, -3, -3, 5, + -2, -4, -1, -1, -1, -4, -3, -3, -1, 1, -1, -6 }, + /*M*/ { -1, -2, -3, -4, -2, 0, -2, -4, -2, 1, 2, -2, + 6, 0, -3, -2, -1, -2, -2, 1, -3, -2, -1, -6 }, + /*F*/ { -3, -4, -4, -4, -3, -4, -4, -4, -2, -1, 0, -4, + 0, 6, -4, -3, -2, 0, 3, -1, -4, -4, -2, -6 }, + /*P*/ { -1, -2, -3, -2, -4, -2, -2, -3, -3, -4, -3, -1, + -3, -4, 8, -1, -2, -5, -4, -3, -2, -2, -2, -6 }, + /*S*/ { 1, -1, 0, -1, -2, 0, 0, -1, -1, -3, -3, -1, + -2, -3, -1, 5, 1, -4, -2, -2, 0, 0, -1, -6 }, + /*T*/ { 0, -1, 0, -1, -1, -1, -1, -2, -2, -1, -2, -1, + -1, -2, -2, 1, 5, -4, -2, 0, -1, -1, -1, -6 }, + /*W*/ { -3, -4, -4, -6, -3, -3, -4, -4, -3, -3, -2, -4, + -2, 0, -5, -4, -4, 11, 2, -3, -5, -4, -3, -6 }, + /*Y*/ { -2, -3, -3, -4, -3, -2, -3, -4, 2, -2, -2, -3, + -2, 3, -4, -2, -2, 2, 7, -2, -3, -3, -2, -6 }, + /*V*/ { 0, -3, -4, -4, -1, -3, -3, -4, -4, 3, 1, -3, + 1, -1, -3, -2, 0, -3, -2, 4, -4, -3, -1, -6 }, + /*B*/ { -2, -2, 4, 4, -4, 0, 1, -1, -1, -4, -4, -1, + -3, -4, -2, 0, -1, -5, -3, -4, 4, 0, -2, -6 }, + /*Z*/ { -1, 0, 0, 1, -4, 3, 4, -3, 0, -4, -3, 1, + -2, -4, -2, 0, -1, -4, -3, -3, 0, 4, -1, -6 }, + /*X*/ { -1, -1, -1, -2, -3, -1, -1, -2, -2, -2, -2, -1, + -1, -2, -2, -1, -1, -3, -2, -1, -2, -1, -1, -6 }, + /***/ { -6, -6, -6, -6, -6, -6, -6, -6, -6, -6, -6, -6, + -6, -6, -6, -6, -6, -6, -6, -6, -6, -6, -6, 1 } +}; +const SNCBIPackedScoreMatrix NCBISM_Blosum80 = { + "ARNDCQEGHILKMFPSTWYVBZX*", + s_Blosum80PSM[0], + -6 +}; diff --git a/util/tables/sm_pam30.c b/util/tables/sm_pam30.c new file mode 100644 index 00000000..3b62f8ad --- /dev/null +++ b/util/tables/sm_pam30.c @@ -0,0 +1,97 @@ +/* $Id: sm_pam30.c,v 1.1 2003/08/21 19:48:20 ucko Exp $ +* =========================================================================== +* +* PUBLIC DOMAIN NOTICE +* National Center for Biotechnology Information +* +* This software/database is a "United States Government Work" under the +* terms of the United States Copyright Act. It was written as part of +* the author's official duties as a United States Government employee and +* thus cannot be copyrighted. This software/database is freely available +* to the public for use. The National Library of Medicine and the U.S. +* Government have not placed any restriction on its use or reproduction. +* +* Although all reasonable efforts have been taken to ensure the accuracy +* and reliability of the software and data, the NLM and the U.S. +* Government do not and cannot warrant the performance or results that +* may be obtained by using this software or data. The NLM and the U.S. +* Government disclaim all warranties, express or implied, including +* warranties of performance, merchantability or fitness for any particular +* purpose. +* +* Please cite the author in any work or product based on this material. +* +* =========================================================================== +* +* Author: Aaron Ucko (via ./convert_scoremat.pl) +* +* File Description: +* Protein alignment score matrices; shared between the two toolkits. +* +* =========================================================================== +*/ + +#include <util/tables/raw_scoremat.h> + +/* This matrix was produced by "pam" Version 1.0.6 [28-Jul-93] */ +/* PAM 30 substitution matrix, scale = ln(2)/2 = 0.346574 */ +/* Expected score = -5.06, Entropy = 2.57 bits */ +/* Lowest score = -17, Highest score = 13 */ + +static const TNCBIScore s_Pam30PSM[24][24] = { + /* A, R, N, D, C, Q, E, G, H, I, L, K, + M, F, P, S, T, W, Y, V, B, Z, X, * */ + /*A*/ { 6, -7, -4, -3, -6, -4, -2, -2, -7, -5, -6, -7, + -5, -8, -2, 0, -1,-13, -8, -2, -3, -3, -3,-17 }, + /*R*/ { -7, 8, -6,-10, -8, -2, -9, -9, -2, -5, -8, 0, + -4, -9, -4, -3, -6, -2,-10, -8, -7, -4, -6,-17 }, + /*N*/ { -4, -6, 8, 2,-11, -3, -2, -3, 0, -5, -7, -1, + -9, -9, -6, 0, -2, -8, -4, -8, 6, -3, -3,-17 }, + /*D*/ { -3,-10, 2, 8,-14, -2, 2, -3, -4, -7,-12, -4, + -11,-15, -8, -4, -5,-15,-11, -8, 6, 1, -5,-17 }, + /*C*/ { -6, -8,-11,-14, 10,-14,-14, -9, -7, -6,-15,-14, + -13,-13, -8, -3, -8,-15, -4, -6,-12,-14, -9,-17 }, + /*Q*/ { -4, -2, -3, -2,-14, 8, 1, -7, 1, -8, -5, -3, + -4,-13, -3, -5, -5,-13,-12, -7, -3, 6, -5,-17 }, + /*E*/ { -2, -9, -2, 2,-14, 1, 8, -4, -5, -5, -9, -4, + -7,-14, -5, -4, -6,-17, -8, -6, 1, 6, -5,-17 }, + /*G*/ { -2, -9, -3, -3, -9, -7, -4, 6, -9,-11,-10, -7, + -8, -9, -6, -2, -6,-15,-14, -5, -3, -5, -5,-17 }, + /*H*/ { -7, -2, 0, -4, -7, 1, -5, -9, 9, -9, -6, -6, + -10, -6, -4, -6, -7, -7, -3, -6, -1, -1, -5,-17 }, + /*I*/ { -5, -5, -5, -7, -6, -8, -5,-11, -9, 8, -1, -6, + -1, -2, -8, -7, -2,-14, -6, 2, -6, -6, -5,-17 }, + /*L*/ { -6, -8, -7,-12,-15, -5, -9,-10, -6, -1, 7, -8, + 1, -3, -7, -8, -7, -6, -7, -2, -9, -7, -6,-17 }, + /*K*/ { -7, 0, -1, -4,-14, -3, -4, -7, -6, -6, -8, 7, + -2,-14, -6, -4, -3,-12, -9, -9, -2, -4, -5,-17 }, + /*M*/ { -5, -4, -9,-11,-13, -4, -7, -8,-10, -1, 1, -2, + 11, -4, -8, -5, -4,-13,-11, -1,-10, -5, -5,-17 }, + /*F*/ { -8, -9, -9,-15,-13,-13,-14, -9, -6, -2, -3,-14, + -4, 9,-10, -6, -9, -4, 2, -8,-10,-13, -8,-17 }, + /*P*/ { -2, -4, -6, -8, -8, -3, -5, -6, -4, -8, -7, -6, + -8,-10, 8, -2, -4,-14,-13, -6, -7, -4, -5,-17 }, + /*S*/ { 0, -3, 0, -4, -3, -5, -4, -2, -6, -7, -8, -4, + -5, -6, -2, 6, 0, -5, -7, -6, -1, -5, -3,-17 }, + /*T*/ { -1, -6, -2, -5, -8, -5, -6, -6, -7, -2, -7, -3, + -4, -9, -4, 0, 7,-13, -6, -3, -3, -6, -4,-17 }, + /*W*/ {-13, -2, -8,-15,-15,-13,-17,-15, -7,-14, -6,-12, + -13, -4,-14, -5,-13, 13, -5,-15,-10,-14,-11,-17 }, + /*Y*/ { -8,-10, -4,-11, -4,-12, -8,-14, -3, -6, -7, -9, + -11, 2,-13, -7, -6, -5, 10, -7, -6, -9, -7,-17 }, + /*V*/ { -2, -8, -8, -8, -6, -7, -6, -5, -6, 2, -2, -9, + -1, -8, -6, -6, -3,-15, -7, 7, -8, -6, -5,-17 }, + /*B*/ { -3, -7, 6, 6,-12, -3, 1, -3, -1, -6, -9, -2, + -10,-10, -7, -1, -3,-10, -6, -8, 6, 0, -5,-17 }, + /*Z*/ { -3, -4, -3, 1,-14, 6, 6, -5, -1, -6, -7, -4, + -5,-13, -4, -5, -6,-14, -9, -6, 0, 6, -5,-17 }, + /*X*/ { -3, -6, -3, -5, -9, -5, -5, -5, -5, -5, -6, -5, + -5, -8, -5, -3, -4,-11, -7, -5, -5, -5, -5,-17 }, + /***/ {-17,-17,-17,-17,-17,-17,-17,-17,-17,-17,-17,-17, + -17,-17,-17,-17,-17,-17,-17,-17,-17,-17,-17, 1 } +}; +const SNCBIPackedScoreMatrix NCBISM_Pam30 = { + "ARNDCQEGHILKMFPSTWYVBZX*", + s_Pam30PSM[0], + -17 +}; diff --git a/util/tables/sm_pam70.c b/util/tables/sm_pam70.c new file mode 100644 index 00000000..900398b0 --- /dev/null +++ b/util/tables/sm_pam70.c @@ -0,0 +1,97 @@ +/* $Id: sm_pam70.c,v 1.1 2003/08/21 19:48:20 ucko Exp $ +* =========================================================================== +* +* PUBLIC DOMAIN NOTICE +* National Center for Biotechnology Information +* +* This software/database is a "United States Government Work" under the +* terms of the United States Copyright Act. It was written as part of +* the author's official duties as a United States Government employee and +* thus cannot be copyrighted. This software/database is freely available +* to the public for use. The National Library of Medicine and the U.S. +* Government have not placed any restriction on its use or reproduction. +* +* Although all reasonable efforts have been taken to ensure the accuracy +* and reliability of the software and data, the NLM and the U.S. +* Government do not and cannot warrant the performance or results that +* may be obtained by using this software or data. The NLM and the U.S. +* Government disclaim all warranties, express or implied, including +* warranties of performance, merchantability or fitness for any particular +* purpose. +* +* Please cite the author in any work or product based on this material. +* +* =========================================================================== +* +* Author: Aaron Ucko (via ./convert_scoremat.pl) +* +* File Description: +* Protein alignment score matrices; shared between the two toolkits. +* +* =========================================================================== +*/ + +#include <util/tables/raw_scoremat.h> + +/* This matrix was produced by "pam" Version 1.0.6 [28-Jul-93] */ +/* PAM 70 substitution matrix, scale = ln(2)/2 = 0.346574 */ +/* Expected score = -2.77, Entropy = 1.60 bits */ +/* Lowest score = -11, Highest score = 13 */ + +static const TNCBIScore s_Pam70PSM[24][24] = { + /* A, R, N, D, C, Q, E, G, H, I, L, K, + M, F, P, S, T, W, Y, V, B, Z, X, * */ + /*A*/ { 5, -4, -2, -1, -4, -2, -1, 0, -4, -2, -4, -4, + -3, -6, 0, 1, 1, -9, -5, -1, -1, -1, -2,-11 }, + /*R*/ { -4, 8, -3, -6, -5, 0, -5, -6, 0, -3, -6, 2, + -2, -7, -2, -1, -4, 0, -7, -5, -4, -2, -3,-11 }, + /*N*/ { -2, -3, 6, 3, -7, -1, 0, -1, 1, -3, -5, 0, + -5, -6, -3, 1, 0, -6, -3, -5, 5, -1, -2,-11 }, + /*D*/ { -1, -6, 3, 6, -9, 0, 3, -1, -1, -5, -8, -2, + -7,-10, -4, -1, -2,-10, -7, -5, 5, 2, -3,-11 }, + /*C*/ { -4, -5, -7, -9, 9, -9, -9, -6, -5, -4,-10, -9, + -9, -8, -5, -1, -5,-11, -2, -4, -8, -9, -6,-11 }, + /*Q*/ { -2, 0, -1, 0, -9, 7, 2, -4, 2, -5, -3, -1, + -2, -9, -1, -3, -3, -8, -8, -4, -1, 5, -2,-11 }, + /*E*/ { -1, -5, 0, 3, -9, 2, 6, -2, -2, -4, -6, -2, + -4, -9, -3, -2, -3,-11, -6, -4, 2, 5, -3,-11 }, + /*G*/ { 0, -6, -1, -1, -6, -4, -2, 6, -6, -6, -7, -5, + -6, -7, -3, 0, -3,-10, -9, -3, -1, -3, -3,-11 }, + /*H*/ { -4, 0, 1, -1, -5, 2, -2, -6, 8, -6, -4, -3, + -6, -4, -2, -3, -4, -5, -1, -4, 0, 1, -3,-11 }, + /*I*/ { -2, -3, -3, -5, -4, -5, -4, -6, -6, 7, 1, -4, + 1, 0, -5, -4, -1, -9, -4, 3, -4, -4, -3,-11 }, + /*L*/ { -4, -6, -5, -8,-10, -3, -6, -7, -4, 1, 6, -5, + 2, -1, -5, -6, -4, -4, -4, 0, -6, -4, -4,-11 }, + /*K*/ { -4, 2, 0, -2, -9, -1, -2, -5, -3, -4, -5, 6, + 0, -9, -4, -2, -1, -7, -7, -6, -1, -2, -3,-11 }, + /*M*/ { -3, -2, -5, -7, -9, -2, -4, -6, -6, 1, 2, 0, + 10, -2, -5, -3, -2, -8, -7, 0, -6, -3, -3,-11 }, + /*F*/ { -6, -7, -6,-10, -8, -9, -9, -7, -4, 0, -1, -9, + -2, 8, -7, -4, -6, -2, 4, -5, -7, -9, -5,-11 }, + /*P*/ { 0, -2, -3, -4, -5, -1, -3, -3, -2, -5, -5, -4, + -5, -7, 7, 0, -2, -9, -9, -3, -4, -2, -3,-11 }, + /*S*/ { 1, -1, 1, -1, -1, -3, -2, 0, -3, -4, -6, -2, + -3, -4, 0, 5, 2, -3, -5, -3, 0, -2, -1,-11 }, + /*T*/ { 1, -4, 0, -2, -5, -3, -3, -3, -4, -1, -4, -1, + -2, -6, -2, 2, 6, -8, -4, -1, -1, -3, -2,-11 }, + /*W*/ { -9, 0, -6,-10,-11, -8,-11,-10, -5, -9, -4, -7, + -8, -2, -9, -3, -8, 13, -3,-10, -7,-10, -7,-11 }, + /*Y*/ { -5, -7, -3, -7, -2, -8, -6, -9, -1, -4, -4, -7, + -7, 4, -9, -5, -4, -3, 9, -5, -4, -7, -5,-11 }, + /*V*/ { -1, -5, -5, -5, -4, -4, -4, -3, -4, 3, 0, -6, + 0, -5, -3, -3, -1,-10, -5, 6, -5, -4, -2,-11 }, + /*B*/ { -1, -4, 5, 5, -8, -1, 2, -1, 0, -4, -6, -1, + -6, -7, -4, 0, -1, -7, -4, -5, 5, 1, -2,-11 }, + /*Z*/ { -1, -2, -1, 2, -9, 5, 5, -3, 1, -4, -4, -2, + -3, -9, -2, -2, -3,-10, -7, -4, 1, 5, -3,-11 }, + /*X*/ { -2, -3, -2, -3, -6, -2, -3, -3, -3, -3, -4, -3, + -3, -5, -3, -1, -2, -7, -5, -2, -2, -3, -3,-11 }, + /***/ {-11,-11,-11,-11,-11,-11,-11,-11,-11,-11,-11,-11, + -11,-11,-11,-11,-11,-11,-11,-11,-11,-11,-11, 1 } +}; +const SNCBIPackedScoreMatrix NCBISM_Pam70 = { + "ARNDCQEGHILKMFPSTWYVBZX*", + s_Pam70PSM[0], + -11 +}; diff --git a/util/tables/tables_export.h b/util/tables/tables_export.h new file mode 100644 index 00000000..6fdf43c4 --- /dev/null +++ b/util/tables/tables_export.h @@ -0,0 +1,57 @@ +#ifndef UTIL_TABLES___TABLES_EXPORT__H +#define UTIL_TABLES___TABLES_EXPORT__H + +/* $Id: tables_export.h,v 1.1 2003/08/21 19:50:51 ucko Exp $ + * =========================================================================== + * + * PUBLIC DOMAIN NOTICE + * National Center for Biotechnology Information + * + * This software/database is a "United States Government Work" under the + * terms of the United States Copyright Act. It was written as part of + * the author's official duties as a United States Government employee and + * thus cannot be copyrighted. This software/database is freely available + * to the public for use. The National Library of Medicine and the U.S. + * Government have not placed any restriction on its use or reproduction. + * + * Although all reasonable efforts have been taken to ensure the accuracy + * and reliability of the software and data, the NLM and the U.S. + * Government do not and cannot warrant the performance or results that + * may be obtained by using this software or data. The NLM and the U.S. + * Government disclaim all warranties, express or implied, including + * warranties of performance, merchantability or fitness for any particular + * purpose. + * + * Please cite the author in any work or product based on this material. + * + * =========================================================================== + * + * Author: Anatoliy Kuznetsov, Mike DiCuccio + * + * File Description: + * Defines to provide correct exporting from TABLES DLL in Windows. + * These are necessary to compile DLLs with Visual C++ - exports must be + * explicitly labeled as such. + */ + + +/* + * NULL operations for other cases (C Toolkit) + */ + +# define NCBI_TABLES_EXPORT + + + +/* + * ========================================================================== + * + * $Log: tables_export.h,v $ + * Revision 1.1 2003/08/21 19:50:51 ucko + * Add C-Toolkit-specific export setup for tables library + * + * + * ========================================================================== + */ + +#endif /* UTIL_TABLES___TABLES_EXPORT__H */ |