summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorAaron M. Ucko <ucko@debian.org>2005-03-24 18:32:05 +0000
committerAaron M. Ucko <ucko@debian.org>2005-03-24 18:32:05 +0000
commitf06fc23cbc179836f402001f24176fc9d5725482 (patch)
tree39e97ad8f13a33296b32a3907f3409b056cf851b
parentccba467ae4f393d7acce357a9847bfe1fb77ccc7 (diff)
Load ncbi (6.1.20040616) into ncbi-tools6/branches/upstream/current.
-rw-r--r--VERSION2
-rw-r--r--access/ent2api.c3
-rw-r--r--access/pmfapi.c46
-rw-r--r--algo/blast/api/blast_format.c433
-rw-r--r--algo/blast/api/blast_format.h44
-rw-r--r--algo/blast/api/blast_returns.c405
-rw-r--r--algo/blast/api/blast_returns.h105
-rw-r--r--algo/blast/api/blast_seq.c16
-rw-r--r--algo/blast/api/blast_seqalign.c164
-rw-r--r--algo/blast/api/blast_seqalign.h15
-rw-r--r--algo/blast/api/blast_tabular.c238
-rw-r--r--algo/blast/api/blast_tabular.h101
-rw-r--r--algo/blast/api/hspstream_queue.c182
-rw-r--r--algo/blast/api/hspstream_queue.h64
-rw-r--r--algo/blast/api/multiseq_src.c6
-rw-r--r--algo/blast/api/seqsrc_readdb.c46
-rw-r--r--algo/blast/api/seqsrc_readdb.h9
-rw-r--r--algo/blast/api/twoseq_api.c262
-rw-r--r--algo/blast/api/twoseq_api.h77
-rw-r--r--algo/blast/core/aa_ungapped.c173
-rw-r--r--algo/blast/core/aa_ungapped.h80
-rw-r--r--algo/blast/core/blast_def.h94
-rw-r--r--algo/blast/core/blast_diagnostics.c80
-rw-r--r--algo/blast/core/blast_diagnostics.h99
-rw-r--r--algo/blast/core/blast_dust.c77
-rw-r--r--algo/blast/core/blast_dust.h74
-rw-r--r--algo/blast/core/blast_encoding.c19
-rw-r--r--algo/blast/core/blast_engine.c402
-rw-r--r--algo/blast/core/blast_engine.h127
-rw-r--r--algo/blast/core/blast_extend.c206
-rw-r--r--algo/blast/core/blast_extend.h107
-rw-r--r--algo/blast/core/blast_filter.c74
-rw-r--r--algo/blast/core/blast_filter.h87
-rw-r--r--algo/blast/core/blast_gapalign.c565
-rw-r--r--algo/blast/core/blast_gapalign.h138
-rw-r--r--algo/blast/core/blast_gapalign_pri.h83
-rw-r--r--algo/blast/core/blast_hits.c691
-rw-r--r--algo/blast/core/blast_hits.h191
-rw-r--r--algo/blast/core/blast_hspstream.c228
-rw-r--r--algo/blast/core/blast_hspstream.h206
-rw-r--r--algo/blast/core/blast_inline.h31
-rw-r--r--algo/blast/core/blast_kappa.c2731
-rw-r--r--algo/blast/core/blast_kappa.h116
-rw-r--r--algo/blast/core/blast_lookup.c59
-rw-r--r--algo/blast/core/blast_lookup.h272
-rw-r--r--algo/blast/core/blast_message.c149
-rw-r--r--algo/blast/core/blast_message.h71
-rw-r--r--algo/blast/core/blast_options.c1079
-rw-r--r--algo/blast/core/blast_options.h379
-rw-r--r--algo/blast/core/blast_psi.c325
-rw-r--r--algo/blast/core/blast_psi.h168
-rw-r--r--algo/blast/core/blast_psi_priv.c1498
-rw-r--r--algo/blast/core/blast_psi_priv.h268
-rw-r--r--algo/blast/core/blast_rps.h68
-rw-r--r--algo/blast/core/blast_seg.c187
-rw-r--r--algo/blast/core/blast_seg.h101
-rw-r--r--algo/blast/core/blast_seqsrc.c92
-rw-r--r--algo/blast/core/blast_seqsrc.h74
-rw-r--r--algo/blast/core/blast_setup.c284
-rw-r--r--algo/blast/core/blast_setup.h130
-rw-r--r--algo/blast/core/blast_stat.c1099
-rw-r--r--algo/blast/core/blast_stat.h336
-rw-r--r--algo/blast/core/blast_traceback.c507
-rw-r--r--algo/blast/core/blast_traceback.h116
-rw-r--r--algo/blast/core/blast_util.c113
-rw-r--r--algo/blast/core/blast_util.h91
-rw-r--r--algo/blast/core/gapinfo.c74
-rw-r--r--algo/blast/core/gapinfo.h92
-rw-r--r--algo/blast/core/greedy_align.c129
-rw-r--r--algo/blast/core/greedy_align.h120
-rw-r--r--algo/blast/core/hspstream_collector.c209
-rw-r--r--algo/blast/core/hspstream_collector.h76
-rw-r--r--algo/blast/core/link_hsps.c88
-rw-r--r--algo/blast/core/link_hsps.h69
-rw-r--r--algo/blast/core/lookup_util.c61
-rw-r--r--algo/blast/core/lookup_util.h54
-rw-r--r--algo/blast/core/lookup_wrap.c67
-rw-r--r--algo/blast/core/lookup_wrap.h73
-rw-r--r--algo/blast/core/matrix_freq_ratios.c456
-rw-r--r--algo/blast/core/matrix_freq_ratios.h323
-rw-r--r--algo/blast/core/mb_lookup.c74
-rw-r--r--algo/blast/core/mb_lookup.h344
-rw-r--r--algo/blast/core/ncbi_math.c179
-rw-r--r--algo/blast/core/ncbi_math.h164
-rw-r--r--algo/blast/core/ncbi_std.c36
-rw-r--r--algo/blast/core/ncbi_std.h75
-rw-r--r--algo/blast/core/pattern.c69
-rw-r--r--algo/blast/core/pattern.h84
-rw-r--r--algo/blast/core/phi_extend.c79
-rw-r--r--algo/blast/core/phi_extend.h79
-rw-r--r--algo/blast/core/phi_lookup.c97
-rw-r--r--algo/blast/core/phi_lookup.h70
-rw-r--r--api/alignmgr.c15
-rw-r--r--api/alignmgr2.c17
-rw-r--r--api/asn2ff1.c25
-rw-r--r--api/asn2ff2.c25
-rw-r--r--api/asn2gnb1.c563
-rw-r--r--api/asn2gnb2.c346
-rw-r--r--api/asn2gnb3.c223
-rw-r--r--api/asn2gnb4.c196
-rw-r--r--api/asn2gnb5.c82
-rw-r--r--api/asn2gnb6.c439
-rw-r--r--api/asn2gnbi.h111
-rw-r--r--api/asn2gnbp.h2
-rw-r--r--api/objmgr.c168
-rw-r--r--api/salpacc.c14
-rw-r--r--api/salprop.c1
-rw-r--r--api/salsap.c57
-rw-r--r--api/seqmgr.c11
-rw-r--r--api/seqport.c256
-rw-r--r--api/seqport.h9
-rw-r--r--api/sequtil.c62
-rw-r--r--api/sqnutil1.c105
-rw-r--r--api/sqnutil2.c240
-rw-r--r--api/sqnutil3.c444
-rw-r--r--api/sqnutils.h11
-rw-r--r--api/tofasta.c31
-rw-r--r--api/txalign.c210
-rw-r--r--api/txalign.h12
-rw-r--r--api/valid.c157
-rw-r--r--api/valid.h7
-rw-r--r--api/valid.msg4
-rw-r--r--api/validerr.h1
-rw-r--r--asn/asn.all134
-rw-r--r--asn/insdseq.asn128
-rw-r--r--asn/seq.asn6
-rw-r--r--asnstat/all.h2909
-rw-r--r--asnstat/asninsdseq.h33
-rw-r--r--asnstat/asnseq.h1052
-rw-r--r--biostruc/cdd/cddserver.c53
-rw-r--r--biostruc/cdd/wrpsbcl3.c27
-rw-r--r--biostruc/cdd/wrpsbtool.c13
-rw-r--r--biostruc/mmdbapi1.c26
-rw-r--r--checkout.date2
-rw-r--r--cn3d/README5
-rw-r--r--connect/ncbi_connection.c86
-rw-r--r--connect/ncbi_connection.h36
-rw-r--r--connect/ncbi_service.c52
-rw-r--r--connect/ncbi_service.h7
-rw-r--r--connect/test/test_assert.h10
-rw-r--r--corelib/ncbifile.c239
-rw-r--r--corelib/ncbifile.h36
-rw-r--r--data/sequin.hlp142
-rw-r--r--demo/asn2fsa.c80
-rw-r--r--demo/asn2gb.c54
-rw-r--r--demo/blast_driver.c193
-rw-r--r--demo/blastall.c9
-rw-r--r--demo/fastacmd.c13
-rw-r--r--demo/fmerge.c445
-rw-r--r--demo/makemat.c8
-rw-r--r--demo/megablast.c9
-rw-r--r--demo/tbl2asn.c29
-rw-r--r--desktop/biosrc.c6
-rw-r--r--desktop/cdrgn.c19
-rw-r--r--desktop/dlgutil1.c19
-rw-r--r--desktop/dlgutil2.c10
-rw-r--r--desktop/e2docsum.c26
-rw-r--r--desktop/gbfview.c89
-rw-r--r--desktop/salsa.c166
-rw-r--r--desktop/seqpanel.c357
-rw-r--r--desktop/vsm.c12
-rw-r--r--doc/README.pbl610
-rw-r--r--doc/asn2gb.txt20
-rw-r--r--doc/blast/bl2seq.html57
-rw-r--r--doc/blast/blast.html1517
-rw-r--r--doc/blast/blastall.html99
-rw-r--r--doc/blast/blastclust.html78
-rw-r--r--doc/blast/blastdb.html336
-rw-r--r--doc/blast/blastftp.html526
-rw-r--r--doc/blast/blastpgp.html520
-rw-r--r--doc/blast/fastacmd.html19
-rw-r--r--doc/blast/filter.html74
-rw-r--r--doc/blast/formatdb.html27
-rw-r--r--doc/blast/impala.html73
-rw-r--r--doc/blast/index.html66
-rw-r--r--doc/blast/megablast.html21
-rw-r--r--doc/blast/netblast.html26
-rw-r--r--doc/blast/rpsblast.html143
-rwxr-xr-xdoc/fwd_check.sh12
-rw-r--r--doc/sequin.htm5
-rwxr-xr-xdoc/tbl2asn.txt30
-rw-r--r--link/mswin/insdseqget.rc11
-rw-r--r--link/winmet/ApplicationStationery/GuiAppDefaults.mcp.xml2
-rw-r--r--link/winmet/LibraryStationery/LibraryStationery.mcp.xml4
-rwxr-xr-xmake/makeApps.met13
-rw-r--r--make/makeall.unx34
-rwxr-xr-xmake/makeallchives11
-rw-r--r--make/makedemo.unx11
-rwxr-xr-xmake/makedis.csh5
-rw-r--r--make/makenet.unx8
-rw-r--r--make/msvc_prj/algo/blast/api/blastapi.dsp24
-rw-r--r--make/msvc_prj/algo/blast/core/blast.dsp42
-rw-r--r--make/msvc_prj/connect/connect.dsp258
-rw-r--r--make/msvc_prj/corelib/ncbi/ncbi.dsp172
-rw-r--r--make/msvc_prj/demo/insdseqget/demo_insdseqget.dsp (renamed from make/msvc_prj/demo/fmerge/demo_fmerge.dsp)22
-rw-r--r--make/msvc_prj/ncbi.dsw225
-rw-r--r--network/blast3/client/blastcl3.c656
-rw-r--r--network/id1arch/idfetch.c7
-rw-r--r--network/id2arch/id2.asn12
-rw-r--r--network/id2arch/seqsplit.asn12
-rwxr-xr-xnetwork/wwwblast/Src/test/run.pl74
-rw-r--r--network/wwwblast/Src/wblast2.c143
-rw-r--r--object/objalign.c3
-rw-r--r--object/objalignloc.c3
-rw-r--r--object/objfeat.c3
-rw-r--r--object/objgbseq.c2
-rw-r--r--object/objinsdseq.c34
-rw-r--r--object/objinsdseq.h8
-rw-r--r--object/objloc.c7
-rw-r--r--object/objproj.c1
-rw-r--r--object/objres.c6
-rw-r--r--object/objseq.c6
-rw-r--r--object/objsset.c7
-rw-r--r--object/objsub.c7
-rw-r--r--object/objtseq.c2
-rw-r--r--platform/linux-alpha.ncbi.mk51
-rw-r--r--sequin/sequin.h10
-rw-r--r--sequin/sequin1.c297
-rw-r--r--sequin/sequin10.c446
-rw-r--r--sequin/sequin2.c200
-rw-r--r--sequin/sequin3.c112
-rw-r--r--sequin/sequin4.c98
-rw-r--r--sequin/sequin5.c12
-rw-r--r--sequin/sequin6.c762
-rw-r--r--sequin/sequin7.c202
-rw-r--r--sequin/sequin8.c36
-rw-r--r--sequin/sequin9.c75
-rw-r--r--tools/blast.c146
-rw-r--r--tools/blastkar.c13
-rw-r--r--tools/blastool.c138
-rw-r--r--tools/blastutl.c68
-rw-r--r--tools/kappa.c237
-rw-r--r--tools/mblast.c178
-rw-r--r--tools/posit.c47
-rw-r--r--tools/posit.h11
-rw-r--r--tools/rpsutil.c11
-rw-r--r--tools/salptool.c4
-rw-r--r--tools/toasn3.c10
-rw-r--r--util/creaders/alnread.c637
-rw-r--r--util/creaders/alnread.h7
-rw-r--r--vibrant/vibutils.c15
-rw-r--r--vibrant/vibwndws.c44
-rw-r--r--webdesign/designs/cubby/storedsearch/Templates/EditSearch.html17
-rw-r--r--webdesign/designs/cubby/storedsearch/Templates/Makefile3
-rw-r--r--webdesign/designs/cubby/storedsearch/Templates/MySearches.html165
-rw-r--r--webdesign/designs/cubby/storedsearch/Templates/PubMed.html4
-rw-r--r--webdesign/designs/cubby/storedsearch/Templates/WhatsNewSummary.html79
-rw-r--r--webdesign/designs/cubby/storedsearch/Templates/cubby.css60
248 files changed, 26142 insertions, 14135 deletions
diff --git a/VERSION b/VERSION
index 7d58a5ad..bb79c44f 100644
--- a/VERSION
+++ b/VERSION
@@ -1 +1 @@
-Wed May 5 12:47:30 EDT 2004
+Wed Jun 16 15:20:05 EDT 2004
diff --git a/access/ent2api.c b/access/ent2api.c
index 01f4b92c..0fcb93f7 100644
--- a/access/ent2api.c
+++ b/access/ent2api.c
@@ -29,7 +29,7 @@
*
* Version Creation Date: 7/29/99
*
-* $Revision: 1.64 $
+* $Revision: 1.65 $
*
* File Description:
*
@@ -1285,6 +1285,7 @@ NLM_EXTERN Boolean ValidateEntrez2InfoPtr (
} else if (StringICmp (last, "Rank") == 0 && StringICmp (str, "Ranked standard deviation") == 0) {
} else if (StringICmp (last, "Book") == 0 && StringICmp (str, "Book's Topic") == 0) {
} else if (StringICmp (last, "Gene Name") == 0 && StringICmp (str, "Gene Name or Description") == 0) {
+ } else if (StringICmp (last, "Submitter") == 0 && StringICmp (str, "Submitter Handle") == 0) {
} else {
sprintf (buf, "Menu names %s [%s] and %s [%s] may be unintended variants", last, dbnames [lastvnp->choice], str, dbnames [vnp->choice]);
ValNodeCopyStr (head, 0, buf);
diff --git a/access/pmfapi.c b/access/pmfapi.c
index f288d240..f433518e 100644
--- a/access/pmfapi.c
+++ b/access/pmfapi.c
@@ -29,7 +29,7 @@
*
* Version Creation Date: 5/5/00
*
-* $Revision: 1.63 $
+* $Revision: 1.64 $
*
* File Description:
*
@@ -1236,12 +1236,15 @@ static CharPtr pubseqfetchproc = "PubSeqBioseqFetch";
static CharPtr pubseqseqidtogi = "PubSeqSeqIdForGi";
static CharPtr pubseqgitoseqid = "PubSeqGiForSeqId";
+static Boolean fetch_fail_warn = FALSE;
+static Boolean fetch_fail_warn_set = FALSE;
+
static Int2 LIBCALLBACK PubSeqBioseqFetchFunc (Pointer data)
{
BioseqPtr bsp;
Int4 flags = -1;
- Char id [41];
+ Char id [64];
OMUserDataPtr omdp = NULL;
OMProcControlPtr ompcp;
ObjMgrProcPtr ompp;
@@ -1250,6 +1253,12 @@ static Int2 LIBCALLBACK PubSeqBioseqFetchFunc (Pointer data)
SeqIdPtr sid;
SeqIdPtr sip;
Int4 uid = 0;
+#ifdef OS_UNIX
+ BioseqPtr firstbsp;
+ SeqEntryPtr firstsep;
+ ObjMgrPtr omp;
+ CharPtr str;
+#endif
ompcp = (OMProcControlPtr) data;
if (ompcp == NULL) return OM_MSG_RET_ERROR;
@@ -1277,6 +1286,39 @@ static Int2 LIBCALLBACK PubSeqBioseqFetchFunc (Pointer data)
if (sep == NULL) return OM_MSG_RET_OK;
bsp = BioseqFindInSeqEntry (sip, sep);
+
+#ifdef OS_UNIX
+ if (bsp == NULL) {
+
+ if (! fetch_fail_warn_set) {
+ str = (CharPtr) getenv ("PUBSEQ_FETCH_FAIL_WARN");
+ if (StringDoesHaveText (str)) {
+ if (StringICmp (str, "TRUE") == 0) {
+ fetch_fail_warn = TRUE;
+ }
+ }
+ fetch_fail_warn_set = TRUE;
+ }
+
+ if (fetch_fail_warn) {
+ firstsep = FindNthBioseq (sep, 1);
+ if (firstsep != NULL && IS_Bioseq (firstsep)) {
+ firstbsp = (BioseqPtr) firstsep->data.ptrvalue;
+ if (firstbsp != NULL && firstbsp->id != NULL) {
+ SeqIdWrite (firstbsp->id, id, PRINTID_FASTA_LONG, sizeof (id) - 1);
+ ErrPostEx (SEV_ERROR, 0, 0, "PubSeqBioseqFetchFunc requested gi %ld, got %s", uid, id);
+ omp = ObjMgrGet ();
+ if (omp != NULL) {
+ ErrPostEx (SEV_ERROR, 0, 0, "ObjMgr highEid %d totobj %d currobj %d maxtemp %d tempcnt %d hold %d",
+ (int) omp->HighestEntityID, (int) omp->totobj, (int) omp->currobj,
+ (int) omp->maxtemp, (int) omp->tempcnt, (int) omp->hold);
+ }
+ }
+ }
+ }
+ }
+#endif
+
ompcp->output_data = (Pointer) bsp;
ompcp->output_entityID = ObjMgrGetEntityIDForChoice (sep);
diff --git a/algo/blast/api/blast_format.c b/algo/blast/api/blast_format.c
index eb63b53a..4591df8d 100644
--- a/algo/blast/api/blast_format.c
+++ b/algo/blast/api/blast_format.c
@@ -1,4 +1,4 @@
-/* $Id: blast_format.c,v 1.48 2004/05/03 15:24:04 dondosha Exp $
+/* $Id: blast_format.c,v 1.51 2004/06/07 18:40:34 dondosha Exp $
* ===========================================================================
*
* PUBLIC DOMAIN NOTICE
@@ -34,18 +34,19 @@ Contents: Formatting of BLAST results (SeqAlign)
Detailed Contents:
******************************************************************************
- * $Revision: 1.48 $
+ * $Revision: 1.51 $
* */
-static char const rcsid[] = "$Id: blast_format.c,v 1.48 2004/05/03 15:24:04 dondosha Exp $";
+static char const rcsid[] = "$Id: blast_format.c,v 1.51 2004/06/07 18:40:34 dondosha Exp $";
#include <algo/blast/api/blast_format.h>
#include <algo/blast/api/blast_seq.h>
#include <algo/blast/core/blast_filter.h>
#include <algo/blast/core/blast_util.h>
+#include <algo/blast/core/blast_seqsrc.h>
+#include <algo/blast/api/blast_returns.h>
#include <sequtil.h>
#include <txalign.h>
-#include <readdb.h>
extern Uint1 LIBCALL
BlastGetTypes PROTO((char* blast_program, Boolean* query_is_na,
@@ -91,19 +92,23 @@ extern CharPtr LIBCALL BlastGetReference (Boolean html);
extern CharPtr LIBCALL BlastGetVersionNumber PROTO((void));
extern CharPtr LIBCALL BlastGetReleaseDate PROTO((void));
-typedef struct TxDfDbInfo {
- struct TxDfDbInfo* next;
- Boolean is_protein;
- char* name;
- char* definition;
- char* date;
- Int8 total_length;
- Int4 number_seqs;
- Boolean subset; /* Print the subset message. */
-} TxDfDbInfo, *TxDfDbInfoPtr;
+/** This function is defined in distrib/tools/blastool.c; 1st argument is
+ * a pointer to TxDfDbInfo, defined in blastpri.h, which is identical to
+ * BLAST_DbSummary, defined in algo/blast/api/blast_returns.h
+ */
+extern Boolean LIBCALL
+PrintDbReport PROTO((BLAST_DbSummary* dbinfo, Int4 line_length, FILE *outfp));
+
+/** The following 3 functions are from distrib/tools/readdb.h */
+extern Boolean LIBCALL
+ReadDBBioseqFetchEnable PROTO((CharPtr program, CharPtr dbname, Boolean is_na,
+ Boolean now));
+
+extern void LIBCALL ReadDBBioseqFetchDisable PROTO((void));
extern Boolean LIBCALL
-PrintDbReport PROTO((TxDfDbInfo* dbinfo, Int4 line_length, FILE *outfp));
+PrintDbInformation PROTO((CharPtr database, Boolean is_aa, Int4 line_length,
+ FILE *outfp, Boolean html));
Int2 BlastFormattingOptionsNew(Uint1 program_number, char* file_out,
Int4 num_descriptions, Int4 num_alignments, Int4 align_view,
@@ -395,7 +400,7 @@ static MBXml* MBXmlInit(AsnIoPtr aip, CharPtr program, CharPtr database,
Int2 BLAST_FormatResults(SeqAlignPtr head, char* blast_database,
char* blast_program, Int4 num_queries,
SeqLocPtr query_slp, BlastMaskLoc* blast_mask,
- BlastFormattingOptions* format_options, Boolean is_ooframe)
+ const BlastFormattingOptions* format_options, Boolean is_ooframe)
{
SeqAlignPtr seqalign = head, sap, next_seqalign;
SeqLocPtr mask_loc, next_mask_loc = NULL, tmp_loc = NULL, mask_loc_head;
@@ -598,277 +603,20 @@ Int2 BLAST_FormatResults(SeqAlignPtr head, char* blast_database,
return 0;
}
-static TxDfDbInfo* LIBCALL
-TxDfDbInfoDestruct (TxDfDbInfo* dbinfo)
-{
- TxDfDbInfo* next;
-
- if (dbinfo == NULL)
- return NULL;
-
- while (dbinfo)
- {
- sfree(dbinfo->name);
- sfree(dbinfo->definition);
- sfree(dbinfo->date);
- next = dbinfo->next;
- sfree(dbinfo);
- dbinfo = next;
- }
-
- return dbinfo;
-}
-
-static TxDfDbInfo* BLAST_GetDbInfo(ReadDBFILEPtr rdfp)
-{
- TxDfDbInfo* dbinfo,* head = NULL,* dbinfo_var = NULL;
- char* chptr;
-
- while (rdfp) {
- dbinfo = calloc(1, sizeof(TxDfDbInfo));
- dbinfo->name = strdup(readdb_get_filename(rdfp));
-
- if((chptr = readdb_get_title(rdfp)) == NULL)
- chptr = readdb_get_filename(rdfp);
- dbinfo->definition = strdup(chptr);
-
- dbinfo->date = strdup(readdb_get_date(rdfp));
-
- dbinfo->is_protein = readdb_is_prot(rdfp);
-
- if (rdfp->aliaslen)
- dbinfo->total_length = rdfp->aliaslen;
- else
- dbinfo->total_length = readdb_get_dblen(rdfp);
- if (rdfp->aliasnseq)
- dbinfo->number_seqs = rdfp->aliasnseq;
- else
- dbinfo->number_seqs = readdb_get_num_entries(rdfp);
- if (head == NULL) {
- head = dbinfo;
- dbinfo_var = dbinfo;
- } else {
- dbinfo_var->next = dbinfo;
- dbinfo_var = dbinfo_var->next;
- }
- rdfp = rdfp->next;
- }
- return head;
-}
-/*
- adds the new string to the buffer, separating by a tilde.
- Checks the size of the buffer for FormatBlastParameters and
- allocates longer replacement if needed.
-*/
-
-static Boolean
-add_string_to_bufferEx(char* buffer, char* *old, Int2* old_length, Boolean add_tilde)
-
-{
- char* new,* ptr;
- Int2 length, new_length;
-
- length = (StringLen(*old));
-
- if((Int2)(StringLen(buffer)+length+3) > *old_length)
- {
- new_length = *old_length + 255;
- new = calloc(new_length, sizeof(char));
- if (*old_length > 0 && *old != NULL)
- {
- memcpy(new, *old, *old_length);
- sfree(*old);
- }
- *old = new;
- *old_length = new_length;
- }
-
- ptr = *old;
- ptr += length;
- if (add_tilde)
- {
- *ptr = '~';
- ptr++;
- }
-
- while (*buffer != NULLB)
- {
- *ptr = *buffer;
- buffer++; ptr++;
- }
-
- return TRUE;
-}
-
-static Boolean
-add_string_to_buffer(char* buffer, char* *old, Int2* old_length)
-
-{
- return add_string_to_bufferEx(buffer, old, old_length, TRUE);
-}
-
-
-
-/*
- Formats the BLAST parameters for the BLAST report.
- One char* is returned, newlines are indicated by tildes ('~').
-*/
-
-
-static char*
-FormatBlastParameters(Uint1 program_number,
- BlastScoringOptions* score_options,
- BlastScoreBlk* sbp, LookupTableOptions* lookup_options,
- BlastInitialWordOptions* word_options,
- BlastExtensionOptions* ext_options,
- BlastHitSavingOptions* hit_options,
- BlastQueryInfo* query_info, ReadDBFILEPtr rdfp,
- BlastReturnStat* return_stats)
-{
- Int4 cutoff = 0;
- char buffer[128];
- char* ret_buffer;
- Int2 ret_buffer_length;
- Int4 num_entries;
- Int8 total_length;
- Int4 qlen;
- double evalue;
- Boolean single_query = (query_info->last_context <= 1);
- Blast_KarlinBlk* kbp;
-
- ret_buffer = NULL;
- ret_buffer_length = 0;
-
-
- if (score_options->matrix) {
- sprintf(buffer, "Matrix: %s", score_options->matrix);
- add_string_to_buffer(buffer, &ret_buffer, &ret_buffer_length);
- }
-
- if (score_options->gapped_calculation) {
- sprintf(buffer, "Gap Penalties: Existence: %ld, Extension: %ld",
- (long) score_options->gap_open,
- (long) score_options->gap_extend);
- add_string_to_buffer(buffer, &ret_buffer, &ret_buffer_length);
- }
-
- if (rdfp) {
- readdb_get_totals_ex(rdfp, &total_length, &num_entries, TRUE);
- } else {
- num_entries = 1;
- total_length = 1000;
- }
-
- sprintf(buffer, "Number of Sequences: %ld", (long) num_entries);
- add_string_to_buffer(buffer, &ret_buffer, &ret_buffer_length);
- if (return_stats) {
- sprintf(buffer, "Number of Hits to DB: %s",
- Nlm_Int8tostr((Int8) return_stats->db_hits, 1));
- add_string_to_buffer(buffer, &ret_buffer, &ret_buffer_length);
-
- sprintf(buffer, "Number of extensions: %ld",
- (long) return_stats->init_extends);
- add_string_to_buffer(buffer, &ret_buffer, &ret_buffer_length);
- sprintf(buffer, "Number of successful extensions: %ld",
- (long) return_stats->good_init_extends);
- add_string_to_buffer(buffer, &ret_buffer, &ret_buffer_length);
-
- if (hit_options->expect_value > 0.1) {
- sprintf(buffer, "Number of sequences better than %4.1f: %ld",
- hit_options->expect_value,
- (long) return_stats->number_of_seqs_better_E);
- } else {
- sprintf(buffer, "Number of sequences better than %3.1e: %ld",
- hit_options->expect_value,
- (long) return_stats->number_of_seqs_better_E);
- }
- add_string_to_buffer(buffer, &ret_buffer, &ret_buffer_length);
-
- if (score_options->gapped_calculation) {
- sprintf(buffer,
- "Number of HSP's better than %4.1f without gapping: %ld",
- hit_options->expect_value,
- (long) return_stats->prelim_gap_no_contest);
- add_string_to_buffer(buffer, &ret_buffer, &ret_buffer_length);
- sprintf(buffer,
- "Number of HSP's successfully gapped in prelim test: %ld",
- (long) return_stats->prelim_gap_passed);
- add_string_to_buffer(buffer, &ret_buffer, &ret_buffer_length);
- }
- }
- /* Total query length does not include the first and last sentinel byte */
- qlen = query_info->context_offsets[query_info->last_context+1] - 1;
- sprintf(buffer, "length of query: %ld", (long)qlen);
- add_string_to_buffer(buffer, &ret_buffer, &ret_buffer_length);
- sprintf(buffer, "length of database: %s", Nlm_Int8tostr (total_length, 1));
- add_string_to_buffer(buffer, &ret_buffer, &ret_buffer_length);
-
- if (single_query) {
- sprintf(buffer, "effective search space used: %s",
- Nlm_Int8tostr(
- query_info->eff_searchsp_array[query_info->first_context], 1));
- add_string_to_buffer(buffer, &ret_buffer, &ret_buffer_length);
- }
- sprintf(buffer, "T: %ld", (long) lookup_options->threshold);
- add_string_to_buffer(buffer, &ret_buffer, &ret_buffer_length);
- sprintf(buffer, "A: %ld", (long) word_options->window_size);
- add_string_to_buffer(buffer, &ret_buffer, &ret_buffer_length);
-
- if (!score_options->gapped_calculation)
- kbp = sbp->kbp[query_info->first_context];
- else
- kbp = sbp->kbp_gap[query_info->first_context];
-
- sprintf(buffer, "X1: %ld (%4.1f bits)",
- (long)return_stats->x_drop_ungapped, word_options->x_dropoff);
- add_string_to_buffer(buffer, &ret_buffer, &ret_buffer_length);
- if (score_options->gapped_calculation) {
- sprintf(buffer, "X2: %ld (%4.1f bits)",
- (long)return_stats->x_drop_gap, ext_options->gap_x_dropoff);
- add_string_to_buffer(buffer, &ret_buffer, &ret_buffer_length);
- sprintf(buffer, "X3: %ld (%4.1f bits)",
- (long)return_stats->x_drop_gap_final,
- ext_options->gap_x_dropoff_final);
- add_string_to_buffer(buffer, &ret_buffer, &ret_buffer_length);
-
- sprintf(buffer, "S1: %ld (%4.1f bits)",
- (long)return_stats->gap_trigger, ext_options->gap_trigger);
- add_string_to_buffer(buffer, &ret_buffer, &ret_buffer_length);
- }
-
- cutoff = 0;
- if (single_query) {
- Int4 context = query_info->first_context;
- double searchsp = (double) query_info->eff_searchsp_array[context];
-
- /* For translated RPS blast the search space must be scaled down */
- if (program_number == blast_type_rpstblastn)
- searchsp = searchsp / NUM_FRAMES;
-
- evalue = hit_options->expect_value;
- BLAST_Cutoffs(&cutoff, &evalue, kbp, searchsp, FALSE, 0);
- sprintf(buffer, "S2: %ld (%4.1f bits)", (long) cutoff,
- (((cutoff)*(kbp->Lambda))-(kbp->logK))/NCBIMATH_LN2);
- add_string_to_buffer(buffer, &ret_buffer, &ret_buffer_length);
- }
- return ret_buffer;
-}
-
Int2 PrintOutputFooter(Uint1 program_number,
- BlastFormattingOptions* format_options,
- BlastScoringOptions* score_options, BlastScoreBlk* sbp,
- LookupTableOptions* lookup_options,
- BlastInitialWordOptions* word_options,
- BlastExtensionOptions* ext_options,
- BlastHitSavingOptions* hit_options,
- BlastQueryInfo* query_info, char* dbname,
- BlastReturnStat* return_stats,
- Boolean db_is_na)
+ const BlastFormattingOptions* format_options,
+ const BlastScoringOptions* score_options, const BlastScoreBlk* sbp,
+ const LookupTableOptions* lookup_options,
+ const BlastInitialWordOptions* word_options,
+ const BlastExtensionOptions* ext_options,
+ const BlastHitSavingOptions* hit_options,
+ const BlastEffectiveLengthsOptions* eff_len_options,
+ const BlastQueryInfo* query_info, const BlastSeqSrc* seq_src,
+ const BlastDiagnostics* diagnostics)
{
FILE *outfp;
- TxDfDbInfo* dbinfo_head,* dbinfo;
+ BLAST_DbSummary* dbinfo_head,* dbinfo;
char* params_buffer;
- ReadDBFILEPtr rdfp = NULL;
if (!format_options || !format_options->outfp)
return -1;
@@ -882,14 +630,12 @@ Int2 PrintOutputFooter(Uint1 program_number,
fprintf(outfp, "<PRE>\n");
init_buff_ex(85);
- if (dbname && (rdfp = readdb_new(dbname, !db_is_na))) {
- dbinfo_head = BLAST_GetDbInfo(rdfp);
+ dbinfo_head = Blast_GetDbSummary(seq_src);
- for (dbinfo = dbinfo_head; dbinfo; dbinfo = dbinfo->next) {
- PrintDbReport(dbinfo, 70, outfp);
- }
- dbinfo_head = TxDfDbInfoDestruct(dbinfo_head);
+ for (dbinfo = dbinfo_head; dbinfo; dbinfo = dbinfo->next) {
+ PrintDbReport(dbinfo, 70, outfp);
}
+ dbinfo_head = Blast_DbSummaryFree(dbinfo_head);
if (sbp && sbp->kbp) {
Blast_KarlinBlk* ka_params;
@@ -908,20 +654,19 @@ Int2 PrintOutputFooter(Uint1 program_number,
}
params_buffer =
- FormatBlastParameters(program_number, score_options, sbp,
- lookup_options, word_options, ext_options, hit_options, query_info,
- rdfp, return_stats);
+ Blast_GetParametersBuffer(program_number, score_options, sbp,
+ lookup_options, word_options, ext_options,
+ hit_options, eff_len_options, query_info,
+ seq_src, diagnostics);
PrintTildeSepLines(params_buffer, 70, outfp);
sfree(params_buffer);
free_buff();
- readdb_destruct(rdfp);
-
return 0;
}
-Int2 BLAST_PrintOutputHeader(BlastFormattingOptions* format_options,
+Int2 BLAST_PrintOutputHeader(const BlastFormattingOptions* format_options,
Boolean is_megablast, char* dbname, Boolean is_protein)
{
if (format_options->align_view < 7) {
@@ -965,24 +710,15 @@ Int2 BLAST_PrintOutputHeader(BlastFormattingOptions* format_options,
#define BUFFER_LENGTH 256
-static void
-PrintSeqDefline(SeqIdPtr sip, char* descr, char** buffer_ptr,
+void
+Blast_SeqIdGetDefLine(SeqIdPtr sip, char* descr, char** buffer_ptr,
Boolean ncbi_gi, Boolean accession_only, Boolean seqid_only,
Boolean believe_local_id)
{
- char* seqid_buffer = NULL, *title = NULL, *defline_buffer;
+ char* seqid_buffer = NULL, *title = NULL, *defline_buffer = NULL;
Int4 gi;
Boolean numeric_sip_type = FALSE;
- if (!descr) {
- /* Get the title */
- BioseqPtr bsp = BioseqLockById(sip);
- title = strdup(BioseqGetTitle(bsp));
- BioseqUnlock(bsp);
- } else {
- title = descr;
- }
-
if ((believe_local_id || sip->choice != SEQID_LOCAL) &&
(sip->choice != SEQID_GENERAL ||
StringCmp(((DbtagPtr)sip->data.ptrvalue)->db, "BL_ORD_ID"))) {
@@ -998,8 +734,20 @@ PrintSeqDefline(SeqIdPtr sip, char* descr, char** buffer_ptr,
GetAccessionFromSeqId(SeqIdFindBestAccession(sip),
&gi, &seqid_buffer);
}
- } else if (seqid_only && title) {
- seqid_buffer = StringTokMT(title, " \t\n\r", &title);
+ } else {
+ if (!descr) {
+ /* Get the title */
+ BioseqPtr bsp = BioseqLockById(sip);
+ if (bsp) {
+ title = strdup(BioseqGetTitle(bsp));
+ BioseqUnlock(bsp);
+ }
+ } else {
+ title = descr;
+ }
+
+ if (seqid_only)
+ seqid_buffer = StringTokMT(title, " \t\n\r", &title);
}
if (numeric_sip_type && !seqid_buffer) {
@@ -1021,35 +769,13 @@ PrintSeqDefline(SeqIdPtr sip, char* descr, char** buffer_ptr,
*buffer_ptr = defline_buffer;
}
-static void ScoreAndEvalueToBuffers(double bit_score, double evalue,
- char* *bit_score_buf, char* *evalue_buf)
-{
- if (evalue < 1.0e-180)
- sprintf(*evalue_buf, "0.0");
- else if (evalue < 1.0e-99)
- sprintf(*evalue_buf, "%2.0e", evalue);
- else if (evalue < 0.0009)
- sprintf(*evalue_buf, "%3.1e", evalue);
- else if (evalue < 1.0)
- sprintf(*evalue_buf, "%4.3f", evalue);
- else
- sprintf(*evalue_buf, "%5.1f", evalue);
-
- if (bit_score > 9999)
- sprintf(*bit_score_buf, "%4.3e", bit_score);
- else if (bit_score > 99.9)
- sprintf(*bit_score_buf, "%4.1f", bit_score);
- else
- sprintf(*bit_score_buf, "%4.2f", bit_score);
-}
-
/* This function might serve as a starting point for a callback function
* that prints results before the traceback stage, e.g. the on-the-fly
* tabular output, a la the -D3 option of the old megablast.
*/
void BLAST_PrintIntermediateResults(BlastHSPResults* results,
BlastQueryInfo* query_info, SeqLocPtr query_slp,
- ReadDBFILEPtr rdfp, SeqIdPtr seqid, BlastScoreBlk* sbp,
+ BlastSeqSrc* seq_src, BlastScoreBlk* sbp,
char* filename)
{
Int4 query_index, hit_index, hsp_index;
@@ -1061,33 +787,24 @@ void BLAST_PrintIntermediateResults(BlastHSPResults* results,
Int4 q_start, q_end, s_start, s_end;
FILE *outfp;
SeqLocPtr slp;
- char* subject_descr;
Blast_KarlinBlk* kbp;
- char* bit_score_buff,* eval_buff;
- double bit_score;
+ char bit_score_buff[10], eval_buff[10];
+ char* eval_buff_ptr = NULL;
BlastHSP* hsp;
+ ListNode* seqid_wrap = NULL;
- if (!results || !query_info || !query_slp || (!rdfp && !seqid) ||
+ if (!results || !query_info || !query_slp || !seq_src ||
!sbp || !filename)
return;
outfp = FileOpen(filename, "w");
- eval_buff = (char *) malloc(10);
- bit_score_buff = (char *) malloc(10);
-
- if (!rdfp) {
- /* Two sequences case */
- subject_id = seqid;
- subject_descr = NULL;
- }
-
for (query_index = 0, slp = query_slp;
query_index < results->num_queries && slp;
++query_index, slp = slp->next) {
hit_list = results->hitlist_array[query_index];
query_id = SeqLocId(slp);
- PrintSeqDefline(query_id, NULL, &query_buffer, TRUE, FALSE, FALSE,
+ Blast_SeqIdGetDefLine(query_id, NULL, &query_buffer, TRUE, FALSE, FALSE,
FALSE);
fprintf(outfp, "#Query = %s\n\n", query_buffer);
sfree(query_buffer);
@@ -1100,11 +817,16 @@ void BLAST_PrintIntermediateResults(BlastHSPResults* results,
for (hit_index = 0; hit_index < hit_list->hsplist_count;
++hit_index) {
hsp_list = hit_list->hsplist_array[hit_index];
- if (rdfp) {
- readdb_get_descriptor(rdfp, hsp_list->oid, &subject_id,
- &subject_descr);
+ BLASTSeqSrcGetSeqId(seq_src, (void*) &hsp_list->oid);
+ if (seqid_wrap->choice == BLAST_SEQSRC_C_SEQID) {
+ subject_id = (SeqId*) seqid_wrap->ptr;
+ ListNodeFree(seqid_wrap);
+ } else {
+ /* Could not retrieve id for this subject sequence. This should
+ never happen, but if it does, skip this HSP. */
+ continue;
}
- PrintSeqDefline(subject_id, subject_descr, &subject_buffer,
+ Blast_SeqIdGetDefLine(subject_id, NULL, &subject_buffer,
TRUE, FALSE, FALSE, TRUE);
fprintf(outfp, ">%s\n\n", subject_buffer);
sfree(subject_buffer);
@@ -1124,19 +846,18 @@ void BLAST_PrintIntermediateResults(BlastHSPResults* results,
}
kbp = sbp->kbp[hsp->context];
- bit_score = (hsp->score*kbp->Lambda - kbp->logK) /
- NCBIMATH_LN2;
- ScoreAndEvalueToBuffers(bit_score, hsp->evalue,
- &bit_score_buff, &eval_buff);
+ eval_buff_ptr = eval_buff;
+
+ ScoreAndEvalueToBuffers(hsp->bit_score, hsp->evalue,
+ bit_score_buff, &eval_buff_ptr, TRUE);
fprintf(outfp, "[%ld %ld] [%ld %ld] %s %s\n",
(long)q_start, (long)q_end, (long)s_start, (long)s_end,
- bit_score_buff, eval_buff);
+ bit_score_buff, eval_buff_ptr);
}
}
}
}
FileClose(outfp);
}
-
diff --git a/algo/blast/api/blast_format.h b/algo/blast/api/blast_format.h
index 28535ff6..24f3bfa9 100644
--- a/algo/blast/api/blast_format.h
+++ b/algo/blast/api/blast_format.h
@@ -1,4 +1,4 @@
-/* $Id: blast_format.h,v 1.20 2004/04/22 22:15:40 dondosha Exp $
+/* $Id: blast_format.h,v 1.22 2004/06/07 18:40:48 dondosha Exp $
* ===========================================================================
*
* PUBLIC DOMAIN NOTICE
@@ -32,7 +32,7 @@ Author: Ilya Dondoshansky
Contents: Functions needed for formatting of BLAST results
******************************************************************************
- * $Revision: 1.20 $
+ * $Revision: 1.22 $
* */
#ifndef __BLAST_FORMAT__
#define __BLAST_FORMAT__
@@ -48,10 +48,11 @@ extern "C" {
#include <ncbi.h>
#include <asn.h>
#include <bxmlobj.h>
-#include <readdb.h>
#include <algo/blast/core/blast_options.h>
#include <algo/blast/core/blast_hits.h>
-#include <algo/blast/api/seqsrc_readdb.h>
+#include <algo/blast/core/blast_seqsrc.h>
+#include <algo/blast/core/blast_diagnostics.h>
+#include <algo/blast/api/twoseq_api.h>
/** Options for formatting BLAST results
*/
@@ -117,7 +118,7 @@ typedef struct MBXml {
Int2 BLAST_FormatResults(SeqAlignPtr head, char* blast_database,
char* blast_program, Int4 num_queries,
SeqLocPtr query_slp, BlastMaskLoc* mask_loc,
- BlastFormattingOptions* format_options, Boolean is_ooframe);
+ const BlastFormattingOptions* format_options, Boolean is_ooframe);
/** Print the summary at the end of the BLAST report.
* @param program_number Type of BLAST program [in]
@@ -128,20 +129,23 @@ Int2 BLAST_FormatResults(SeqAlignPtr head, char* blast_database,
* @param word_options Word finding options and parameters [in]
* @param ext_options Extension options and parameters [in]
* @param hit_options Hit saving options [in]
+ * @param eff_len_options Effective lengths options, containing user-specified
+ * values for database length or eff. search space [in]
* @param query_info Query information [in]
- * @param dbname BLAST database name [in]
- * @param return_stats Data about this run [in]
- * @param db_is_na TRUE if a nucleotide database [in]
+ * @param seq_src Source of subject sequences [in]
+ * @param diagnostics Data about this run [in]
*/
Int2 PrintOutputFooter(Uint1 program_number,
- BlastFormattingOptions* format_options,
- BlastScoringOptions* score_options, BlastScoreBlk* sbp,
- LookupTableOptions* lookup_options,
- BlastInitialWordOptions* word_options,
- BlastExtensionOptions* ext_options,
- BlastHitSavingOptions* hit_options,
- BlastQueryInfo* query_info, char* dbname,
- BlastReturnStat* return_stats, Boolean db_is_na);
+ const BlastFormattingOptions* format_options,
+ const BlastScoringOptions* score_options,
+ const BlastScoreBlk* sbp,
+ const LookupTableOptions* lookup_options,
+ const BlastInitialWordOptions* word_options,
+ const BlastExtensionOptions* ext_options,
+ const BlastHitSavingOptions* hit_options,
+ const BlastEffectiveLengthsOptions* eff_len_options,
+ const BlastQueryInfo* query_info, const BlastSeqSrc* seq_src,
+ const BlastDiagnostics* diagnostics);
/** Prints the top part of the traditional BLAST output, including version,
* reference(s) and database information.
@@ -151,13 +155,17 @@ Int2 PrintOutputFooter(Uint1 program_number,
* @param dbname BLAST database name [in]
* @param is_protein Is the database protein or nucleotide? [in]
*/
-Int2 BLAST_PrintOutputHeader(BlastFormattingOptions* format_options,
+Int2 BLAST_PrintOutputHeader(const BlastFormattingOptions* format_options,
Boolean is_megablast, char* dbname, Boolean is_protein);
void BLAST_PrintIntermediateResults(BlastHSPResults* results,
BlastQueryInfo* query_info, SeqLocPtr query_slp,
- ReadDBFILEPtr rdfp, SeqIdPtr seqid, BlastScoreBlk* sbp,
+ BlastSeqSrc* seq_src, BlastScoreBlk* sbp,
char* filename);
+void
+Blast_SeqIdGetDefLine(SeqIdPtr sip, char* descr, char** buffer_ptr,
+ Boolean ncbi_gi, Boolean accession_only,
+ Boolean seqid_only, Boolean believe_local_id);
#ifdef __cplusplus
diff --git a/algo/blast/api/blast_returns.c b/algo/blast/api/blast_returns.c
new file mode 100644
index 00000000..6d385346
--- /dev/null
+++ b/algo/blast/api/blast_returns.c
@@ -0,0 +1,405 @@
+/* $Id: blast_returns.c,v 1.1 2004/05/14 17:19:03 dondosha Exp $
+* ===========================================================================
+*
+* PUBLIC DOMAIN NOTICE
+* National Center for Biotechnology Information
+*
+* This software/database is a "United States Government Work" under the
+* terms of the United States Copyright Act. It was written as part of
+* the author's offical duties as a United States Government employee and
+* thus cannot be copyrighted. This software/database is freely available
+* to the public for use. The National Library of Medicine and the U.S.
+* Government have not placed any restriction on its use or reproduction.
+*
+* Although all reasonable efforts have been taken to ensure the accuracy
+* and reliability of the software and data, the NLM and the U.S.
+* Government do not and cannot warrant the performance or results that
+* may be obtained by using this software or data. The NLM and the U.S.
+* Government disclaim all warranties, express or implied, including
+* warranties of performance, merchantability or fitness for any particular
+* purpose.
+*
+* Please cite the author in any work or product based on this material.
+*
+* ===========================================================================*/
+
+/*****************************************************************************
+
+File name: blast_returns.c
+
+Author: Ilya Dondoshansky
+
+Contents: Manipulating data returned from BLAST other than Seq-aligns
+
+Detailed Contents:
+
+******************************************************************************
+ * $Revision: 1.1 $
+ * */
+
+static char const rcsid[] = "$Id: blast_returns.c,v 1.1 2004/05/14 17:19:03 dondosha Exp $";
+
+#include <algo/blast/api/blast_returns.h>
+#include <algo/blast/api/blast_seq.h>
+#include <algo/blast/core/blast_filter.h>
+#include <algo/blast/core/blast_util.h>
+#include <algo/blast/core/blast_seqsrc.h>
+
+BLAST_DbSummary* LIBCALL
+Blast_DbSummaryFree (BLAST_DbSummary* dbinfo)
+{
+ BLAST_DbSummary* next;
+
+ if (dbinfo == NULL)
+ return NULL;
+
+ while (dbinfo)
+ {
+ sfree(dbinfo->name);
+ sfree(dbinfo->definition);
+ sfree(dbinfo->date);
+ next = dbinfo->next;
+ sfree(dbinfo);
+ dbinfo = next;
+ }
+
+ return dbinfo;
+}
+
+BLAST_DbSummary* Blast_GetDbSummary(const BlastSeqSrc* seq_src)
+{
+ BLAST_DbSummary* dbinfo = NULL;
+ char* chptr = NULL;
+
+ dbinfo = calloc(1, sizeof(BLAST_DbSummary));
+ dbinfo->name = BLASTSeqSrcGetName(seq_src);
+
+ if((chptr = BLASTSeqSrcGetDefinition(seq_src)) == NULL)
+ chptr = dbinfo->name;
+
+ if (chptr)
+ dbinfo->definition = strdup(chptr);
+
+ dbinfo->date = BLASTSeqSrcGetDate(seq_src);
+
+ dbinfo->is_protein = BLASTSeqSrcGetIsProt(seq_src);
+
+ if ((dbinfo->total_length = BLASTSeqSrcGetTotLen(seq_src)) == 0)
+ dbinfo->total_length = BLASTSeqSrcGetMaxSeqLen(seq_src);
+ dbinfo->number_seqs = BLASTSeqSrcGetNumSeqs(seq_src);
+
+ return dbinfo;
+}
+
+/*
+ adds the new string to the buffer, separating by a tilde.
+ Checks the size of the buffer for Blast_GetParametersBuffer and
+ allocates longer replacement if needed.
+*/
+
+static Boolean
+add_string_to_buffer(char* buffer, char* *old, Int2* old_length)
+
+{
+ char* new,* ptr;
+ Int2 length = 0, new_length;
+
+ if (!buffer)
+ return FALSE;
+
+ if (*old)
+ length = (strlen(*old));
+
+ if((Int2)(strlen(buffer)+length+3) > *old_length)
+ {
+ new_length = *old_length + 255;
+ new = calloc(new_length, sizeof(char));
+ if (*old_length > 0 && *old != NULL)
+ {
+ memcpy(new, *old, *old_length);
+ sfree(*old);
+ }
+ *old = new;
+ *old_length = new_length;
+ }
+
+ ptr = *old;
+ ptr += length;
+
+ /* Add a tilde */
+ *ptr = '~';
+ ptr++;
+
+ while (*buffer != NULLB)
+ {
+ *ptr = *buffer;
+ buffer++; ptr++;
+ }
+
+ return TRUE;
+}
+
+char*
+Blast_GetParametersBuffer(Uint1 program_number,
+ const BlastScoringOptions* score_options,
+ const BlastScoreBlk* sbp, const LookupTableOptions* lookup_options,
+ const BlastInitialWordOptions* word_options,
+ const BlastExtensionOptions* ext_options,
+ const BlastHitSavingOptions* hit_options,
+ const BlastEffectiveLengthsOptions* eff_len_options,
+ const BlastQueryInfo* query_info, const BlastSeqSrc* seq_src,
+ const BlastDiagnostics* diagnostics)
+{
+ Int4 cutoff = 0;
+ char buffer[128];
+ char* ret_buffer;
+ Int2 ret_buffer_length;
+ Int4 num_entries = 0;
+ Int8 total_length = 0;
+ Int4 qlen = 0;
+ double evalue;
+ Int2 num_frames;
+ Boolean single_query;
+ Blast_KarlinBlk* kbp;
+ BlastUngappedStats* ungapped_stats = NULL;
+ BlastGappedStats* gapped_stats = NULL;
+ BlastRawCutoffs* raw_cutoffs = NULL;
+
+ ret_buffer = NULL;
+ ret_buffer_length = 0;
+
+ if (program_number == blast_type_blastx ||
+ program_number == blast_type_tblastx)
+ num_frames = NUM_FRAMES;
+ else if (program_number == blast_type_blastn)
+ num_frames = 2;
+ else
+ num_frames = 1;
+
+ single_query = (query_info->last_context < num_frames);
+
+ if (diagnostics) {
+ ungapped_stats = diagnostics->ungapped_stat;
+ gapped_stats = diagnostics->gapped_stat;
+ raw_cutoffs = diagnostics->cutoffs;
+ }
+
+ if (score_options->matrix) {
+ sprintf(buffer, "Matrix: %s", score_options->matrix);
+ add_string_to_buffer(buffer, &ret_buffer, &ret_buffer_length);
+ }
+
+ if (score_options->gapped_calculation) {
+ sprintf(buffer, "Gap Penalties: Existence: %ld, Extension: %ld",
+ (long) score_options->gap_open,
+ (long) score_options->gap_extend);
+ add_string_to_buffer(buffer, &ret_buffer, &ret_buffer_length);
+ }
+
+ if (eff_len_options->db_length)
+ total_length = eff_len_options->db_length;
+ else if (seq_src) {
+ if ((total_length = BLASTSeqSrcGetTotLen(seq_src)) == 0)
+ total_length = BLASTSeqSrcGetMaxSeqLen(seq_src);
+ }
+
+ if (program_number == blast_type_tblastn ||
+ program_number == blast_type_rpstblastn ||
+ program_number == blast_type_tblastx)
+ total_length /= 3;
+
+ if (eff_len_options->dbseq_num)
+ num_entries = eff_len_options->dbseq_num;
+ else if (seq_src)
+ num_entries = BLASTSeqSrcGetNumSeqs(seq_src);
+
+ sprintf(buffer, "Number of Sequences: %ld", (long) num_entries);
+ add_string_to_buffer(buffer, &ret_buffer, &ret_buffer_length);
+ if (ungapped_stats) {
+ sprintf(buffer, "Number of Hits to DB: %s",
+ Nlm_Int8tostr(ungapped_stats->lookup_hits, 1));
+ add_string_to_buffer(buffer, &ret_buffer, &ret_buffer_length);
+
+ sprintf(buffer, "Number of extensions: %ld",
+ (long) ungapped_stats->init_extends);
+ add_string_to_buffer(buffer, &ret_buffer, &ret_buffer_length);
+ sprintf(buffer, "Number of successful extensions: %ld",
+ (long) ungapped_stats->good_init_extends);
+ add_string_to_buffer(buffer, &ret_buffer, &ret_buffer_length);
+ }
+
+ if (gapped_stats) {
+ if (hit_options->expect_value > 0.1) {
+ sprintf(buffer, "Number of sequences better than %4.1f: %ld",
+ hit_options->expect_value,
+ (long) gapped_stats->num_seqs_passed);
+ } else {
+ sprintf(buffer, "Number of sequences better than %3.1e: %ld",
+ hit_options->expect_value,
+ (long) gapped_stats->num_seqs_passed);
+ }
+ add_string_to_buffer(buffer, &ret_buffer, &ret_buffer_length);
+
+ if (score_options->gapped_calculation) {
+ sprintf(buffer,
+ "Number of HSP's better than %4.1f without gapping: %ld",
+ hit_options->expect_value,
+ (long) gapped_stats->seqs_ungapped_passed);
+ add_string_to_buffer(buffer, &ret_buffer, &ret_buffer_length);
+ sprintf(buffer,
+ "Number of HSP's gapped: %ld",
+ (long) gapped_stats->extensions);
+ add_string_to_buffer(buffer, &ret_buffer, &ret_buffer_length);
+ sprintf(buffer,
+ "Number of HSP's successfully gapped: %ld",
+ (long) gapped_stats->good_extensions);
+ add_string_to_buffer(buffer, &ret_buffer, &ret_buffer_length);
+ sprintf(buffer,
+ "Number of extra gapped extensions for HSPs above %4.1f: %ld",
+ hit_options->expect_value,
+ (long) gapped_stats->extra_extensions);
+ add_string_to_buffer(buffer, &ret_buffer, &ret_buffer_length);
+ }
+ }
+
+ /* Query length makes sense only for single query sequence. */
+ if (single_query) {
+ qlen = BLAST_GetQueryLength(query_info, query_info->first_context);
+ sprintf(buffer, "Length of query: %ld", (long)qlen);
+ add_string_to_buffer(buffer, &ret_buffer, &ret_buffer_length);
+ }
+
+ sprintf(buffer, "Length of database: %s", Nlm_Int8tostr (total_length, 1));
+ add_string_to_buffer(buffer, &ret_buffer, &ret_buffer_length);
+
+ if (single_query) {
+ Int4 length_adjustment =
+ query_info->length_adjustments[query_info->first_context];
+ Int4 eff_qlen;
+ Int8 eff_dblen;
+ sprintf(buffer, "Length adjustment: %ld", (long) length_adjustment);
+ add_string_to_buffer(buffer, &ret_buffer, &ret_buffer_length);
+
+ /** FIXME: Should this be different for RPS BLAST? */
+ eff_qlen = qlen - length_adjustment;
+ sprintf(buffer, "Effective length of query: %ld", (long)eff_qlen);
+ add_string_to_buffer(buffer, &ret_buffer, &ret_buffer_length);
+
+ eff_dblen = total_length - num_entries*length_adjustment;
+ sprintf(buffer, "Effective length of database: %s",
+ Nlm_Int8tostr (eff_dblen , 1));
+ add_string_to_buffer(buffer, &ret_buffer, &ret_buffer_length);
+ sprintf(buffer, "Effective search space: %8.0f",
+ ((double) eff_dblen)*((double) eff_qlen));
+ add_string_to_buffer(buffer, &ret_buffer, &ret_buffer_length);
+ sprintf(buffer, "Effective search space used: %8.0f",
+ (double)query_info->eff_searchsp_array[query_info->first_context]);
+ add_string_to_buffer(buffer, &ret_buffer, &ret_buffer_length);
+ }
+ sprintf(buffer, "Neighboring words threshold: %ld",
+ (long) lookup_options->threshold);
+ add_string_to_buffer(buffer, &ret_buffer, &ret_buffer_length);
+ sprintf(buffer, "Window for multiple hits: %ld",
+ (long) word_options->window_size);
+ add_string_to_buffer(buffer, &ret_buffer, &ret_buffer_length);
+
+ if (raw_cutoffs) {
+ kbp = sbp->kbp[query_info->first_context];
+ sprintf(buffer, "X1: %ld (%4.1f bits)",
+ (long)raw_cutoffs->x_drop_ungapped,
+ raw_cutoffs->x_drop_ungapped*kbp->Lambda/NCBIMATH_LN2);
+ add_string_to_buffer(buffer, &ret_buffer, &ret_buffer_length);
+ if (score_options->gapped_calculation) {
+ sprintf(buffer, "X2: %ld (%4.1f bits)",
+ (long)raw_cutoffs->x_drop_gap, ext_options->gap_x_dropoff);
+ add_string_to_buffer(buffer, &ret_buffer, &ret_buffer_length);
+ sprintf(buffer, "X3: %ld (%4.1f bits)",
+ (long)raw_cutoffs->x_drop_gap_final,
+ ext_options->gap_x_dropoff_final);
+ add_string_to_buffer(buffer, &ret_buffer, &ret_buffer_length);
+
+ sprintf(buffer, "S1: %ld (%4.1f bits)",
+ (long)raw_cutoffs->gap_trigger, ext_options->gap_trigger);
+ add_string_to_buffer(buffer, &ret_buffer, &ret_buffer_length);
+ }
+ }
+ cutoff = 0;
+ if (single_query) {
+ Int4 context = query_info->first_context;
+ double searchsp = (double) query_info->eff_searchsp_array[context];
+
+ /* For translated RPS blast the search space must be scaled down */
+ if (program_number == blast_type_rpstblastn)
+ searchsp = searchsp / NUM_FRAMES;
+
+ evalue = hit_options->expect_value;
+ if (!score_options->gapped_calculation)
+ kbp = sbp->kbp[query_info->first_context];
+ else
+ kbp = sbp->kbp_gap[query_info->first_context];
+
+ BLAST_Cutoffs(&cutoff, &evalue, kbp, searchsp, FALSE, 0);
+ sprintf(buffer, "S2: %ld (%4.1f bits)", (long) cutoff,
+ (((cutoff)*(kbp->Lambda))-(kbp->logK))/NCBIMATH_LN2);
+ add_string_to_buffer(buffer, &ret_buffer, &ret_buffer_length);
+ }
+ return ret_buffer;
+}
+
+/** Save the Karlin-Altschul parameters calculated in the BLAST search.
+ * @param sbp Internal scoring block structure [in]
+ * @param context Index into the array of structures containing
+ * Karlin-Altschul parameters [in]
+ * @param sum_returns Returns summary structure [out]
+*/
+static void
+Blast_SummaryFillKAParameters(const BlastScoreBlk* sbp, Int4 context,
+ BLAST_SummaryReturn* sum_returns)
+{
+ Blast_KarlinBlk* kbp;
+
+ if (!sbp)
+ return;
+
+ if (sbp->kbp) {
+ kbp = sbp->kbp[context];
+ sum_returns->ka_params =
+ (BLAST_KAParameters*) malloc(sizeof(BLAST_KAParameters));
+ sum_returns->ka_params->Lambda = kbp->Lambda;
+ sum_returns->ka_params->K = kbp->K;
+ sum_returns->ka_params->H = kbp->H;
+ }
+
+ if (sbp->kbp_gap) {
+ kbp = sbp->kbp_gap[context];
+ sum_returns->ka_params_gap =
+ (BLAST_KAParameters*) malloc(sizeof(BLAST_KAParameters));
+ sum_returns->ka_params_gap->Lambda = kbp->Lambda;
+ sum_returns->ka_params_gap->K = kbp->K;
+ sum_returns->ka_params_gap->H = kbp->H;
+ }
+}
+
+void Blast_SummaryReturnFill(Uint1 program_number,
+ const BlastScoringOptions* score_options,
+ const BlastScoreBlk* sbp,
+ const LookupTableOptions* lookup_options,
+ const BlastInitialWordOptions* word_options,
+ const BlastExtensionOptions* ext_options,
+ const BlastHitSavingOptions* hit_options,
+ const BlastEffectiveLengthsOptions* eff_len_options,
+ const BlastQueryInfo* query_info,
+ const BlastSeqSrc* seq_src,
+ const BlastDiagnostics* diagnostics,
+ BLAST_SummaryReturn** sum_returns_out)
+{
+ BLAST_SummaryReturn* sum_returns =
+ (BLAST_SummaryReturn*) calloc(1, sizeof(BLAST_SummaryReturn));
+ Blast_SummaryFillKAParameters(sbp, query_info->first_context, sum_returns);
+ sum_returns->params_buffer =
+ Blast_GetParametersBuffer(program_number, score_options, sbp,
+ lookup_options, word_options, ext_options,
+ hit_options, eff_len_options, query_info,
+ seq_src, diagnostics);
+ *sum_returns_out = sum_returns;
+}
diff --git a/algo/blast/api/blast_returns.h b/algo/blast/api/blast_returns.h
new file mode 100644
index 00000000..74c78992
--- /dev/null
+++ b/algo/blast/api/blast_returns.h
@@ -0,0 +1,105 @@
+/* $Id: blast_returns.h,v 1.1 2004/05/14 17:19:03 dondosha Exp $
+* ===========================================================================
+*
+* PUBLIC DOMAIN NOTICE
+* National Center for Biotechnology Information
+*
+* This software/database is a "United States Government Work" under the
+* terms of the United States Copyright Act. It was written as part of
+* the author's offical duties as a United States Government employee and
+* thus cannot be copyrighted. This software/database is freely available
+* to the public for use. The National Library of Medicine and the U.S.
+* Government have not placed any restriction on its use or reproduction.
+*
+* Although all reasonable efforts have been taken to ensure the accuracy
+* and reliability of the software and data, the NLM and the U.S.
+* Government do not and cannot warrant the performance or results that
+* may be obtained by using this software or data. The NLM and the U.S.
+* Government disclaim all warranties, express or implied, including
+* warranties of performance, merchantability or fitness for any particular
+* purpose.
+*
+* Please cite the author in any work or product based on this material.
+*
+* ===========================================================================*/
+
+/*****************************************************************************
+
+File name: blast_returns.h
+
+Author: Ilya Dondoshansky
+
+Contents: Manipulation of data returned from BLAST other than Seq-aligns
+
+******************************************************************************
+ * $Revision: 1.1 $
+ * */
+#ifndef __BLAST_RETURNS__
+#define __BLAST_RETURNS__
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#ifndef NCBI_C_TOOLKIT
+#define NCBI_C_TOOLKIT
+#endif
+
+#include <algo/blast/core/blast_options.h>
+#include <algo/blast/core/blast_hits.h>
+#include <algo/blast/core/blast_seqsrc.h>
+#include <algo/blast/core/blast_diagnostics.h>
+#include <algo/blast/api/twoseq_api.h>
+
+typedef struct BLAST_DbSummary {
+ struct BLAST_DbSummary* next;
+ Boolean is_protein;
+ char* name;
+ char* definition;
+ char* date;
+ Int8 total_length;
+ Int4 number_seqs;
+ Boolean subset; /* Print the subset message. */
+} BLAST_DbSummary;
+
+BLAST_DbSummary* LIBCALL
+Blast_DbSummaryFree (BLAST_DbSummary* dbinfo);
+
+/** Retrieves necessary information from a sequence source and fills the
+ * BLAST_DbSummary structure.
+ */
+BLAST_DbSummary* Blast_GetDbSummary(const BlastSeqSrc* seq_src);
+
+/** Formats the BLAST parameters for the BLAST report.
+ * One char* is returned, newlines are indicated by tildes ('~').
+ */
+char*
+Blast_GetParametersBuffer(Uint1 program_number,
+ const BlastScoringOptions* score_options,
+ const BlastScoreBlk* sbp, const LookupTableOptions* lookup_options,
+ const BlastInitialWordOptions* word_options,
+ const BlastExtensionOptions* ext_options,
+ const BlastHitSavingOptions* hit_options,
+ const BlastEffectiveLengthsOptions* eff_len_options,
+ const BlastQueryInfo* query_info, const BlastSeqSrc* seq_src,
+ const BlastDiagnostics* diagnostics);
+
+/** Fills the summary returns */
+void Blast_SummaryReturnFill(Uint1 program_number,
+ const BlastScoringOptions* score_options,
+ const BlastScoreBlk* sbp,
+ const LookupTableOptions* lookup_options,
+ const BlastInitialWordOptions* word_options,
+ const BlastExtensionOptions* ext_options,
+ const BlastHitSavingOptions* hit_options,
+ const BlastEffectiveLengthsOptions* eff_len_options,
+ const BlastQueryInfo* query_info,
+ const BlastSeqSrc* seq_src,
+ const BlastDiagnostics* diagnostics,
+ BLAST_SummaryReturn** sum_returns_out);
+
+#ifdef __cplusplus
+}
+#endif
+#endif /* !__BLAST_FORMAT__ */
+
diff --git a/algo/blast/api/blast_seq.c b/algo/blast/api/blast_seq.c
index c4bb1a2c..2eb93b5d 100644
--- a/algo/blast/api/blast_seq.c
+++ b/algo/blast/api/blast_seq.c
@@ -1,4 +1,4 @@
-static char const rcsid[] = "$Id: blast_seq.c,v 1.39 2004/04/16 14:44:07 papadopo Exp $";
+static char const rcsid[] = "$Id: blast_seq.c,v 1.40 2004/05/14 17:20:50 dondosha Exp $";
/*
* ===========================================================================
*
@@ -33,7 +33,7 @@ Author: Ilya Dondoshansky
Contents: Functions converting between SeqLocs and structures used in BLAST.
******************************************************************************
- * $Revision: 1.39 $
+ * $Revision: 1.40 $
* */
#include <seqport.h>
@@ -274,7 +274,7 @@ static Int4 BLAST_SetUpQueryInfo(SeqLocPtr slp, Uint1 program,
Uint4 max_length = 0;
if (translate)
- num_frames = 6;
+ num_frames = NUM_FRAMES;
else if (is_na)
num_frames = 2;
else
@@ -302,14 +302,14 @@ static Int4 BLAST_SetUpQueryInfo(SeqLocPtr slp, Uint1 program,
}
if ((context_offsets = (Int4*)
- malloc((total_contexts+1)*sizeof(Int4))) == NULL)
+ calloc((total_contexts+1), sizeof(Int4))) == NULL)
return -1;
if ((query_info->eff_searchsp_array =
- (Int8*) malloc(total_contexts*sizeof(Int8))) == NULL)
+ (Int8*) calloc(total_contexts, sizeof(Int8))) == NULL)
return -1;
if ((query_info->length_adjustments =
- (Int4*) malloc(total_contexts*sizeof(Int4))) == NULL)
+ (Int4*) calloc(total_contexts, sizeof(Int4))) == NULL)
return -1;
context_offsets[0] = 0;
@@ -542,7 +542,7 @@ BLAST_GetSequence(SeqLocPtr slp, BlastQueryInfo* query_info,
}
}
- if (num_frames == 6) {
+ if (num_frames == NUM_FRAMES) {
/* Sequence must be translated in 6 frames. This can only happen
for query - subject sequences are translated later. */
Int4 gc;
@@ -639,7 +639,7 @@ Int2 BLAST_SetUpQuery(Uint1 program_number, SeqLocPtr query_slp,
num_frames = 1;
} else {
encoding = NCBI4NA_ENCODING;
- num_frames = 6;
+ num_frames = NUM_FRAMES;
}
if ((status=BLAST_GetSequence(query_slp, *query_info, query_options,
diff --git a/algo/blast/api/blast_seqalign.c b/algo/blast/api/blast_seqalign.c
index ea6baeb6..9e32cb19 100644
--- a/algo/blast/api/blast_seqalign.c
+++ b/algo/blast/api/blast_seqalign.c
@@ -1,4 +1,4 @@
-/* $Id: blast_seqalign.c,v 1.31 2004/04/19 15:03:20 papadopo Exp $
+/* $Id: blast_seqalign.c,v 1.35 2004/06/08 17:47:24 dondosha Exp $
* ===========================================================================
*
* PUBLIC DOMAIN NOTICE
@@ -32,10 +32,10 @@ Author: Ilya Dondoshansky
Contents: Conversion of BLAST results to the SeqAlign form
******************************************************************************
- * $Revision: 1.31 $
+ * $Revision: 1.35 $
* */
-static char const rcsid[] = "$Id: blast_seqalign.c,v 1.31 2004/04/19 15:03:20 papadopo Exp $";
+static char const rcsid[] = "$Id: blast_seqalign.c,v 1.35 2004/06/08 17:47:24 dondosha Exp $";
#include <algo/blast/api/blast_seqalign.h>
@@ -46,14 +46,12 @@ extern ScorePtr MakeBlastScore (ScorePtr PNTR old, CharPtr scoretype,
Nlm_FloatHi prob, Int4 score);
static ScorePtr
-GetScoreSetFromBlastHsp(Uint1 program_number, BlastHSP* hsp,
- BlastScoreBlk* sbp, BlastScoringOptions* score_options)
+GetScoreSetFromBlastHsp(BlastHSP* hsp)
{
ScorePtr score_set=NULL;
double prob;
Int4 score;
char* scoretype;
- Blast_KarlinBlk* kbp;
score = hsp->score;
if (score > 0)
@@ -77,16 +75,9 @@ GetScoreSetFromBlastHsp(Uint1 program_number, BlastHSP* hsp,
MakeBlastScore(&score_set, scoretype, prob, 0);
}
- if (!score_options->gapped_calculation) {
- kbp = sbp->kbp[hsp->context];
- } else {
- kbp = sbp->kbp_gap[hsp->context];
- }
-
/* Calculate bit score from the raw score */
- prob = ((hsp->score*kbp->Lambda) - kbp->logK)/NCBIMATH_LN2;
- if (prob >= 0.)
- MakeBlastScore(&score_set, "bit_score", prob, 0);
+ if (hsp->bit_score >= 0.)
+ MakeBlastScore(&score_set, "bit_score", hsp->bit_score, 0);
if (hsp->num_ident > 0)
MakeBlastScore(&score_set, "num_ident", 0.0, hsp->num_ident);
@@ -121,10 +112,8 @@ static Int2 AddGiListToScoreSet(ScorePtr score_set, ValNodePtr gi_list)
************************************************************************/
static DenseDiagPtr
-BLAST_HSPToDenseDiag(Uint1 program_number, DenseDiagPtr* old,
- BlastHSP* hsp, Boolean reverse,
- Int4 query_length, Int4 subject_length, BlastScoreBlk* sbp,
- BlastScoringOptions* score_options)
+BLAST_HSPToDenseDiag(DenseDiagPtr* old, BlastHSP* hsp, Boolean reverse,
+ Int4 query_length, Int4 subject_length)
{
DenseDiagPtr ddp, new;
@@ -180,8 +169,7 @@ BLAST_HSPToDenseDiag(Uint1 program_number, DenseDiagPtr* old,
new->starts[1] = subject_length - hsp->subject.offset - hsp->subject.length;
}
}
- new->scores =
- GetScoreSetFromBlastHsp(program_number, hsp, sbp, score_options);
+ new->scores = GetScoreSetFromBlastHsp(hsp);
/* Go to the end of the chain, and then attach "new" */
if (*old)
@@ -210,10 +198,8 @@ BLAST_HSPToDenseDiag(Uint1 program_number, DenseDiagPtr* old,
*
************************************************************************/
static StdSeg*
-BLAST_HSPToStdSeg(Uint1 program_number, StdSeg** old,
- BlastHSP* hsp, Int4 query_length, Int4 subject_length, SeqIdPtr sip,
- Boolean reverse, BlastScoreBlk* sbp,
- BlastScoringOptions* score_options)
+BLAST_HSPToStdSeg(StdSeg** old, BlastHSP* hsp, Int4 query_length,
+ Int4 subject_length, SeqIdPtr sip, Boolean reverse)
{
StdSeg* ssp,* new;
SeqIdPtr query_sip, subject_sip;
@@ -288,8 +274,7 @@ BLAST_HSPToStdSeg(Uint1 program_number, StdSeg** old,
}
new->loc = slp;
- new->scores =
- GetScoreSetFromBlastHsp(program_number, hsp, sbp, score_options);
+ new->scores = GetScoreSetFromBlastHsp(hsp);
/* Go to the end of the chain, and then attach "new" */
if (*old)
@@ -327,9 +312,8 @@ BLAST_HSPToStdSeg(Uint1 program_number, StdSeg** old,
static Int2
BLAST_UngappedHSPToSeqAlign(Uint1 program_number,
BlastHSPList* hsp_list, SeqIdPtr query_id,
- SeqIdPtr subject_id, BlastScoreBlk* sbp, Int4 query_length,
- Int4 subject_length, BlastScoringOptions* score_options,
- SeqAlignPtr* seqalign_ptr)
+ SeqIdPtr subject_id, Int4 query_length,
+ Int4 subject_length, SeqAlignPtr* seqalign_ptr)
{
BlastHSP* hsp;
DenseDiagPtr ddp_head=NULL, ddp;
@@ -361,13 +345,12 @@ BLAST_UngappedHSPToSeqAlign(Uint1 program_number,
sip->next = SeqIdDup(subject_id);
if (getdensediag) {
- ddp = BLAST_HSPToDenseDiag(program_number, &ddp_head, hsp, FALSE,
- query_length, subject_length, sbp, score_options);
+ ddp = BLAST_HSPToDenseDiag(&ddp_head, hsp, FALSE, query_length,
+ subject_length);
ddp->id = sip;
} else {
- ssp = BLAST_HSPToStdSeg(program_number, &ssp_head, hsp,
- query_length, subject_length, sip, FALSE, sbp,
- score_options);
+ ssp = BLAST_HSPToStdSeg(&ssp_head, hsp, query_length,
+ subject_length, sip, FALSE);
ssp->ids = sip;
}
sip = NULL; /* This SeqIdPtr is now on the SeqAlign. */
@@ -454,8 +437,8 @@ Boolean GapCollectDataForSeqalign(GapEditBlock* edit_block,
index=0;
for (i = 0, curr=curr_in; curr && i < numseg; curr=curr->next, i++) {
switch(curr->op_type) {
- case GAPALIGN_DECLINE:
- case GAPALIGN_SUB:
+ case eGapAlignDecline:
+ case eGapAlignSub:
if (strand1 != Seq_strand_minus) {
if(translate1 == FALSE)
begin1 = get_current_pos(start1, curr->num);
@@ -494,7 +477,7 @@ Boolean GapCollectDataForSeqalign(GapEditBlock* edit_block,
break;
- case GAPALIGN_DEL:
+ case eGapAlignDel:
begin1 = -1;
if (strand2 != Seq_strand_minus) {
if(translate2 == FALSE)
@@ -528,7 +511,7 @@ Boolean GapCollectDataForSeqalign(GapEditBlock* edit_block,
break;
- case GAPALIGN_INS:
+ case eGapAlignIns:
if (strand1 != Seq_strand_minus) {
if(translate1 == FALSE)
begin1 = get_current_pos(start1, curr->num);
@@ -581,7 +564,7 @@ static void GapCorrectUASequence(GapEditBlock* edit_block)
for (curr=edit_block->esp; curr; curr = curr->next) {
- if(curr->op_type == GAPALIGN_DECLINE && last_indel == TRUE) {
+ if(curr->op_type == eGapAlignDecline && last_indel == TRUE) {
/* This is invalid condition and regions should be
exchanged */
@@ -596,7 +579,7 @@ static void GapCorrectUASequence(GapEditBlock* edit_block)
last_indel = FALSE;
- if(curr->op_type == GAPALIGN_INS || curr->op_type == GAPALIGN_DEL) {
+ if(curr->op_type == eGapAlignIns || curr->op_type == eGapAlignDel) {
last_indel = TRUE;
curr_last2 = curr_last;
}
@@ -769,14 +752,14 @@ GapEditBlockToSeqAlign(GapEditBlock* edit_block, SeqIdPtr subject_id, SeqIdPtr q
for (curr=edit_block->esp; curr; curr = curr->next) {
numseg++;
- if(/*edit_block->discontinuous && */curr->op_type == GAPALIGN_DECLINE)
+ if(/*edit_block->discontinuous && */curr->op_type == eGapAlignDecline)
is_disc_align = TRUE;
}
start1 = edit_block->start1;
start2 = edit_block->start2;
- /* If no GAPALIGN_DECLINE regions exists output seqalign will be
+ /* If no eGapAlignDecline regions exists output seqalign will be
regular Den-Seg or Std-seg */
if(is_disc_align == FALSE) {
/* Please note, that edit_block passed only for data like
@@ -812,11 +795,11 @@ GapEditBlockToSeqAlign(GapEditBlock* edit_block, SeqIdPtr subject_id, SeqIdPtr q
for (numseg = 0, curr = curr_last;
curr; curr = curr->next, numseg++) {
- if(curr->op_type == GAPALIGN_DECLINE) {
+ if(curr->op_type == eGapAlignDecline) {
if(numseg != 0) { /* End of aligned area */
break;
} else {
- while(curr && curr->op_type == GAPALIGN_DECLINE) {
+ while(curr && curr->op_type == eGapAlignDecline) {
numseg++;
curr = curr->next;
}
@@ -935,7 +918,7 @@ BLAST_OOFEditBlockToSeqAlign(Uint1 program, GapEditBlock* edit_block,
slp2 = NULL;
switch (curr->op_type) {
- case 0: /* deletion of three nucleotides. */
+ case eGapAlignDel: /* deletion of three nucleotides. */
first_shift = FALSE;
@@ -960,7 +943,7 @@ BLAST_OOFEditBlockToSeqAlign(Uint1 program, GapEditBlock* edit_block,
break;
- case 6: /* insertion of three nucleotides. */
+ case eGapAlignIns: /* insertion of three nucleotides. */
/* If gap is followed after frameshift - we have to
add this element for the alignment to be correct */
@@ -1070,7 +1053,7 @@ BLAST_OOFEditBlockToSeqAlign(Uint1 program, GapEditBlock* edit_block,
break;
- case 3: /* Substitution. */
+ case eGapAlignSub: /* Substitution. */
first_shift = FALSE;
@@ -1090,14 +1073,8 @@ BLAST_OOFEditBlockToSeqAlign(Uint1 program, GapEditBlock* edit_block,
/* Nucleotide scale shifted by op_type */
seq_int2 = SeqIntNew();
- /* Adjusting last segment and new start point in
- nucleotide coordinates */
- /* if(seq_int2_last != NULL) {
- seq_int2_last->to = seq_int2_last->to - (3 - curr->op_type);
- start2 = start2 - (3 - curr->op_type);
- } */
-
- seq_int2->from = get_current_pos(&start2, curr->num*curr->op_type);
+ seq_int2->from =
+ get_current_pos(&start2, curr->num*(Uint1)curr->op_type);
seq_int2->to = start2 - 1;
/* Chop off three bases and one residue at a time.
@@ -1125,10 +1102,10 @@ BLAST_OOFEditBlockToSeqAlign(Uint1 program, GapEditBlock* edit_block,
seq_int2_last = seq_int2; /* Will be used to adjust "to" value */
break;
- case 1: /* gap of two nucleotides. */
- case 2: /* Gap of one nucleotide. */
- case 4: /* Insertion of one nucleotide. */
- case 5: /* Insertion of two nucleotides. */
+ case eGapAlignDel2: /* gap of two nucleotides. */
+ case eGapAlignDel1: /* Gap of one nucleotide. */
+ case eGapAlignIns1: /* Insertion of one nucleotide. */
+ case eGapAlignIns2: /* Insertion of two nucleotides. */
if(first_shift == TRUE) { /* Second frameshift in a row */
/* Protein coordinates */
@@ -1147,7 +1124,8 @@ BLAST_OOFEditBlockToSeqAlign(Uint1 program, GapEditBlock* edit_block,
/* Nucleotide scale shifted by op_type */
seq_int2 = SeqIntNew();
- seq_int2->from = get_current_pos(&start2, curr->op_type);
+ seq_int2->from =
+ get_current_pos(&start2, (Uint1)curr->op_type);
seq_int2->to = start2 - 1;
if(seq_int2->to >= original_length2) {
@@ -1181,10 +1159,8 @@ BLAST_OOFEditBlockToSeqAlign(Uint1 program, GapEditBlock* edit_block,
/* If this substitution is following simple frameshift
we do not need to start new segment, but may continue
old one */
- /* printf("curr_num = %d (%d)\n", curr->num, curr->op_type); */
-
if(seq_int2_last != NULL) {
- get_current_pos(&start2, curr->num*(curr->op_type-3));
+ get_current_pos(&start2, curr->num*((Uint1)curr->op_type-3));
if(strand2 != Seq_strand_minus) {
seq_int2_last->to = start2 - 1;
} else {
@@ -1206,13 +1182,14 @@ BLAST_OOFEditBlockToSeqAlign(Uint1 program, GapEditBlock* edit_block,
seq_int1_last++;
}
- } else if (curr->op_type > 3) {
+ } else if ((Uint1)curr->op_type > 3) {
/* Protein piece is empty */
ValNodeAddPointer(&slp1, SEQLOC_EMPTY, SeqIdDup(id1));
/* Simulating insertion of nucleotides */
seq_int2 = SeqIntNew();
- seq_int2->from = get_current_pos(&start2,
- curr->num*(curr->op_type-3));
+ seq_int2->from =
+ get_current_pos(&start2,
+ curr->num*((Uint1)curr->op_type-3));
seq_int2->to = start2 - 1;
if(seq_int2->to >= original_length2) {
@@ -1279,9 +1256,8 @@ BLAST_OOFEditBlockToSeqAlign(Uint1 program, GapEditBlock* edit_block,
static Int2
BLAST_GapInfoToSeqAlign(Uint1 program_number, BlastHSPList* hsp_list,
- SeqIdPtr query_id, SeqIdPtr subject_id, BlastScoreBlk* sbp,
- Int4 query_length, BlastScoringOptions* score_options,
- SeqAlignPtr* head_seqalign)
+ SeqIdPtr query_id, SeqIdPtr subject_id, Int4 query_length,
+ Boolean is_ooframe, SeqAlignPtr* head_seqalign)
{
Int2 status = 0;
BlastHSP** hsp_array;
@@ -1294,7 +1270,7 @@ BLAST_GapInfoToSeqAlign(Uint1 program_number, BlastHSPList* hsp_list,
for (index=0; index<hsp_list->hspcnt; index++) {
hsp_array[index]->gap_info->original_length1 = query_length;
- if (score_options->is_ooframe) {
+ if (is_ooframe) {
seqalign = BLAST_OOFEditBlockToSeqAlign(program_number,
hsp_array[index]->gap_info,
query_id, subject_id);
@@ -1310,9 +1286,7 @@ BLAST_GapInfoToSeqAlign(Uint1 program_number, BlastHSPList* hsp_list,
last_seqalign->next = seqalign;
last_seqalign = last_seqalign->next;
}
- seqalign->score =
- GetScoreSetFromBlastHsp(program_number, hsp_array[index], sbp,
- score_options);
+ seqalign->score = GetScoreSetFromBlastHsp(hsp_array[index]);
}
return status;
@@ -1320,9 +1294,8 @@ BLAST_GapInfoToSeqAlign(Uint1 program_number, BlastHSPList* hsp_list,
Int2 BLAST_ResultsToSeqAlign(Uint1 program_number,
BlastHSPResults* results, SeqLocPtr query_slp,
- BlastSeqSrc* bssp, SeqLocPtr subject_slp,
- BlastScoringOptions* score_options, BlastScoreBlk* sbp,
- Boolean is_gapped, SeqAlignPtr* head_seqalign)
+ BlastSeqSrc* seq_src, Boolean is_gapped, Boolean is_ooframe,
+ SeqAlignPtr* head_seqalign)
{
Int4 query_index, subject_index;
SeqLocPtr slp = query_slp;
@@ -1331,13 +1304,16 @@ Int2 BLAST_ResultsToSeqAlign(Uint1 program_number,
BlastHSPList* hsp_list;
SeqAlignPtr seqalign = NULL, last_seqalign = NULL;
Int4 subject_length = 0;
+ ListNode* seqid_wrap = NULL;
+ char* bad_id_str = NULL; /* In case an unknown id is returned from sequence
+ source */
*head_seqalign = NULL;
+ if (!results)
+ return 0;
- if (!bssp) {
- subject_id = SeqLocId(subject_slp);
- subject_length = SeqLocLen(subject_slp);
- }
+ if (!seq_src)
+ return -1;
for (query_index = 0; slp && query_index < results->num_queries;
++query_index, slp = slp->next) {
@@ -1351,22 +1327,30 @@ Int2 BLAST_ResultsToSeqAlign(Uint1 program_number,
hsp_list = hit_list->hsplist_array[subject_index];
if (!hsp_list)
continue;
- if (bssp) {
- char* id_str = BLASTSeqSrcGetSeqIdStr(bssp, (void*) &hsp_list->oid);
- subject_id = SeqIdParse(id_str);
- subject_length = BLASTSeqSrcGetSeqLen(bssp, (void*) &hsp_list->oid);
- sfree(id_str);
+ bad_id_str = NULL;
+ seqid_wrap = BLASTSeqSrcGetSeqId(seq_src, (void*) &hsp_list->oid);
+ if (seqid_wrap->choice == BLAST_SEQSRC_C_SEQID) {
+ subject_id = (SeqId*) seqid_wrap->ptr;
+ ListNodeFree(seqid_wrap);
+ } else {
+ /* Should not happen: wrong type of subject id returned;
+ Create a fake local id. */
+ bad_id_str = strdup("lcl|unknown");
+ subject_id = SeqIdParse(bad_id_str);
}
+ subject_length =
+ BLASTSeqSrcGetSeqLen(seq_src, (void*) &hsp_list->oid);
if (is_gapped) {
BLAST_GapInfoToSeqAlign(program_number, hsp_list, query_id,
- subject_id, sbp, SeqLocLen(slp), score_options, &seqalign);
+ subject_id, SeqLocLen(slp), is_ooframe, &seqalign);
} else {
BLAST_UngappedHSPToSeqAlign(program_number, hsp_list, query_id,
- subject_id, sbp, SeqLocLen(slp), subject_length,
- score_options, &seqalign);
+ subject_id, SeqLocLen(slp), subject_length, &seqalign);
}
- if (bssp)
+ /* If unknown seqid was allocated here, free it, since it has been
+ duplicated in the Seq-align */
+ if (bad_id_str)
SeqIdSetFree(subject_id);
if (seqalign) {
diff --git a/algo/blast/api/blast_seqalign.h b/algo/blast/api/blast_seqalign.h
index d78377dd..c6d009d7 100644
--- a/algo/blast/api/blast_seqalign.h
+++ b/algo/blast/api/blast_seqalign.h
@@ -1,4 +1,4 @@
-/* $Id: blast_seqalign.h,v 1.13 2004/03/12 15:18:53 coulouri Exp $
+/* $Id: blast_seqalign.h,v 1.16 2004/06/08 17:47:24 dondosha Exp $
* ===========================================================================
*
* PUBLIC DOMAIN NOTICE
@@ -32,7 +32,7 @@ Author: Ilya Dondoshansky
Contents: Functions to convert BLAST results to the SeqAlign form
******************************************************************************
- * $Revision: 1.13 $
+ * $Revision: 1.16 $
* */
#ifndef __BLAST_SEQALIGN__
#define __BLAST_SEQALIGN__
@@ -55,17 +55,14 @@ extern "C" {
* @param program_number Type of BLAST program [in]
* @param results The BLAST results [in]
* @param query_slp List of query SeqLoc's [in]
- * @param bssp Pointer to the BLAST database wrapper structure [in]
- * @param subject_slp Subject SeqLoc (for two sequences search) [in]
- * @param score_options Scoring options block [in]
- * @param sbp Scoring and statistical information [in]
+ * @param seq_src Pointer to the BLAST database wrapper structure [in]
* @param is_gapped Is this a gapped alignment search? [in]
+ * @param is_ooframe Is this a search with out-of-frame gapping? [in]
* @param head_seqalign List of SeqAlign's [out]
*/
Int2 BLAST_ResultsToSeqAlign(Uint1 program_number, BlastHSPResults* results,
- SeqLocPtr query_slp, BlastSeqSrc* bssp, SeqLocPtr subject_slp,
- BlastScoringOptions* score_options, BlastScoreBlk* sbp,
- Boolean is_gapped, SeqAlignPtr* head_seqalign);
+ SeqLocPtr query_slp, BlastSeqSrc* seq_src,
+ Boolean is_gapped, Boolean is_ooframe, SeqAlignPtr* head_seqalign);
Boolean GapCollectDataForSeqalign(GapEditBlock* edit_block,
GapEditScript* curr_in, Int4 numseg,
diff --git a/algo/blast/api/blast_tabular.c b/algo/blast/api/blast_tabular.c
new file mode 100644
index 00000000..7963dade
--- /dev/null
+++ b/algo/blast/api/blast_tabular.c
@@ -0,0 +1,238 @@
+/* $Id: blast_tabular.c,v 1.3 2004/06/14 20:43:30 dondosha Exp $
+* ===========================================================================
+*
+* PUBLIC DOMAIN NOTICE
+* National Center for Biotechnology Information
+*
+* This software/database is a "United States Government Work" under the
+* terms of the United States Copyright Act. It was written as part of
+* the author's offical duties as a United States Government employee and
+* thus cannot be copyrighted. This software/database is freely available
+* to the public for use. The National Library of Medicine and the U.S.
+* Government have not placed any restriction on its use or reproduction.
+*
+* Although all reasonable efforts have been taken to ensure the accuracy
+* and reliability of the software and data, the NLM and the U.S.
+* Government do not and cannot warrant the performance or results that
+* may be obtained by using this software or data. The NLM and the U.S.
+* Government disclaim all warranties, express or implied, including
+* warranties of performance, merchantability or fitness for any particular
+* purpose.
+*
+* Please cite the author in any work or product based on this material.
+*
+* ===========================================================================*/
+
+/*****************************************************************************
+
+File name: blast_tabular.c
+
+Author: Ilya Dondoshansky
+
+Contents: On-the-fly tabular formatting of BLAST results
+
+Detailed Contents:
+
+******************************************************************************
+ * $Revision: 1.3 $
+ * */
+
+static char const rcsid[] = "$Id: blast_tabular.c,v 1.3 2004/06/14 20:43:30 dondosha Exp $";
+
+#include <algo/blast/api/blast_tabular.h>
+#include <algo/blast/core/blast_util.h>
+#include <algo/blast/core/blast_setup.h>
+#include <algo/blast/core/blast_engine.h>
+#include <algo/blast/core/blast_traceback.h>
+#include <algo/blast/api/blast_format.h>
+#include <txalign.h>
+
+BlastTabularFormatData*
+Blast_TabularFormatDataInit(Uint1 program, BlastHSPStream* hsp_stream,
+ BlastSeqSrc* seq_src, BLAST_SequenceBlk* query, BlastQueryInfo* query_info,
+ const BlastScoringOptions* score_options, BlastScoreBlk* sbp,
+ const BlastEffectiveLengthsOptions* eff_len_options,
+ const BlastExtensionOptions* ext_options,
+ const BlastHitSavingOptions* hit_options,
+ const BlastDatabaseOptions* db_options,
+ SeqLoc* query_slp, FILE* outfp)
+{
+ BlastTabularFormatData* tf_data =
+ (BlastTabularFormatData*) calloc(1, sizeof(BlastTabularFormatData));
+ tf_data->perform_traceback =
+ (score_options->gapped_calculation &&
+ ext_options->eTbackExt != eSkipTbck);
+
+ tf_data->program = program;
+ tf_data->hsp_stream = hsp_stream;
+ tf_data->query = query;
+ tf_data->gen_code_string = db_options->gen_code_string;
+ tf_data->query_slp = query_slp;
+ tf_data->outfp = outfp;
+ /* Sequence source must be copied, to guarantee multi-thread safety. */
+ tf_data->seq_src = BlastSeqSrcCopy(seq_src);
+ /* Effective lengths must be duplicated in query info structure, because
+ they might be changing in the preliminary search. */
+ tf_data->query_info = BlastQueryInfoDup(query_info);
+
+ /* If traceback will have to be performed before tabular output,
+ do the preparation for it here. */
+ if (tf_data->perform_traceback) {
+ BLAST_GapAlignSetUp(program, seq_src,
+ score_options, eff_len_options, ext_options, hit_options,
+ query_info, sbp, &tf_data->score_params, &tf_data->ext_params,
+ &tf_data->hit_params, &tf_data->eff_len_params, &tf_data->gap_align);
+ }
+
+ return tf_data;
+}
+
+void BlastTabularFormatDataFree(BlastTabularFormatData* tf_data)
+{
+ /* Free only the structures that have been initialized internally */
+ tf_data->query_info = BlastQueryInfoFree(tf_data->query_info);
+ tf_data->score_params = BlastScoringParametersFree(tf_data->score_params);
+ tf_data->ext_params = BlastExtensionParametersFree(tf_data->ext_params);
+ tf_data->hit_params = BlastHitSavingParametersFree(tf_data->hit_params);
+ tf_data->eff_len_params =
+ BlastEffectiveLengthsParametersFree(tf_data->eff_len_params);
+ tf_data->gap_align = BLAST_GapAlignStructFree(tf_data->gap_align);
+ tf_data->seq_src = BlastSeqSrcFree(tf_data->seq_src);
+ sfree(tf_data);
+}
+
+/* This function might serve as a starting point for a callback function
+ * that prints results before the traceback stage, e.g. the on-the-fly
+ * tabular output, a la the -D3 option of the old megablast.
+ */
+void* Blast_TabularFormatThread(void* data)
+{
+ BlastTabularFormatData* tf_data;
+ Uint1 program;
+ BlastHSPList* hsp_list = NULL;
+ BlastSeqSrc* seq_src;
+ BLAST_SequenceBlk* query = NULL;
+ BlastQueryInfo* query_info = NULL;
+ BlastScoringParameters* score_params = NULL;
+ BlastExtensionParameters* ext_params = NULL;
+ BlastHitSavingParameters* hit_params = NULL;
+ BlastEffectiveLengthsParameters* eff_len_params = NULL;
+ Uint1* gen_code_string = NULL;
+ BlastGapAlignStruct* gap_align = NULL;
+ Int4 query_index, index;
+ char* query_buffer = NULL;
+ char* subject_buffer = NULL;
+ Int4 q_start=0, q_end=0, s_start=0, s_end=0;
+ SeqLocPtr slp;
+ char bit_score_buff[10], eval_buff[10];
+ char* eval_buff_ptr = NULL;
+ BlastHSP* hsp;
+ SeqId** query_id_array = NULL;
+ Int4 align_length = 0;
+ Int4 num_gaps = 0, num_gap_opens = 0, num_mismatches = 0;
+ double perc_ident = 0;
+ GetSeqArg seq_arg;
+ Boolean one_seq_update_params;
+
+ tf_data = (BlastTabularFormatData*) data;
+ if (!tf_data || !tf_data->query_slp || !tf_data->hsp_stream ||
+ !tf_data->seq_src || !tf_data->outfp)
+ return NULL;
+
+ program = tf_data->program;
+ seq_src = tf_data->seq_src;
+
+ if (tf_data->perform_traceback) {
+ query = tf_data->query;
+ query_info = tf_data->query_info;
+ score_params = tf_data->score_params;
+ ext_params = tf_data->ext_params;
+ hit_params = tf_data->hit_params;
+ eff_len_params = tf_data->eff_len_params;
+ gap_align = tf_data->gap_align;
+ gen_code_string = tf_data->gen_code_string;
+
+ memset((void*) &seq_arg, 0, sizeof(seq_arg));
+ seq_arg.encoding = Blast_TracebackGetEncoding(program);
+ }
+
+ query_id_array =
+ (SeqId**) malloc(ValNodeLen(tf_data->query_slp)*sizeof(SeqId*));
+
+ for (index = 0, slp = tf_data->query_slp; slp; ++index, slp = slp->next) {
+ query_id_array[index] = SeqLocId(slp);
+ }
+
+ one_seq_update_params = (BLASTSeqSrcGetTotLen(seq_src) == 0);
+
+ while (BlastHSPStreamRead(tf_data->hsp_stream, &hsp_list)
+ != kBlastHSPStream_Eof) {
+ if (!hsp_list) {
+ /* This should not happen, but just in case */
+ continue;
+ }
+
+ /* Perform traceback if necessary */
+ if (tf_data->perform_traceback) {
+ BlastSequenceBlkClean(seq_arg.seq);
+ seq_arg.oid = hsp_list->oid;
+ if (BLASTSeqSrcGetSequence(seq_src, (void*) &seq_arg) < 0)
+ continue;
+
+ if (one_seq_update_params) {
+ Int2 status;
+ /* This is not a database search, so effective search spaces
+ need to be recalculated based on this subject sequence length */
+ if ((status = BLAST_OneSubjectUpdateParameters(program,
+ seq_arg.seq->length,
+ score_params->options,
+ query_info, gap_align->sbp,
+ ext_params, hit_params, NULL,
+ eff_len_params)) != 0) {
+ continue;
+ }
+ }
+
+ Blast_TracebackFromHSPList(program, hsp_list, query,
+ seq_arg.seq, query_info, gap_align, gap_align->sbp, score_params,
+ ext_params->options, hit_params, gen_code_string);
+ BLASTSeqSrcRetSequence(seq_src, (void*)&seq_arg);
+ /* Recalculate the bit scores, since they might have changed. */
+ Blast_HSPListGetBitScores(hsp_list,
+ score_params->options->gapped_calculation, gap_align->sbp);
+ }
+ subject_buffer =
+ BLASTSeqSrcGetSeqIdStr(seq_src, (void*) &hsp_list->oid);
+
+ for (index = 0; index < hsp_list->hspcnt; ++index) {
+ hsp = hsp_list->hsp_array[index];
+ query_index =
+ Blast_GetQueryIndexFromContext(hsp->context, program);
+ Blast_SeqIdGetDefLine(query_id_array[query_index], NULL,
+ &query_buffer, TRUE, FALSE, TRUE, FALSE);
+
+ eval_buff_ptr = eval_buff;
+ ScoreAndEvalueToBuffers(hsp->bit_score, hsp->evalue,
+ bit_score_buff, &eval_buff_ptr, FALSE);
+
+ /* Calculate percentage of identities */
+ Blast_HSPCalcLengthAndGaps(hsp, &align_length, &num_gaps,
+ &num_gap_opens);
+ perc_ident = ((double)hsp->num_ident)/align_length * 100;
+ num_mismatches = align_length - hsp->num_ident - num_gaps;
+
+ Blast_HSPGetAdjustedOffsets(hsp, &q_start, &q_end, &s_start, &s_end);
+
+ fprintf(tf_data->outfp,
+ "%s\t%s\t%.2f\t%ld\t%ld\t%ld\t%ld\t%ld\t%ld\t%ld\t%s\t%s\n",
+ query_buffer, subject_buffer, perc_ident,
+ (long) align_length, (long) num_mismatches,
+ (long) num_gap_opens, (long) q_start, (long) q_end,
+ (long) s_start, (long) s_end, eval_buff, bit_score_buff);
+ }
+ fflush(tf_data->outfp);
+ }
+ /* Deallocate the formatting thread data structure */
+ BlastTabularFormatDataFree(tf_data);
+ return NULL;
+}
diff --git a/algo/blast/api/blast_tabular.h b/algo/blast/api/blast_tabular.h
new file mode 100644
index 00000000..7befa54f
--- /dev/null
+++ b/algo/blast/api/blast_tabular.h
@@ -0,0 +1,101 @@
+/* $Id: blast_tabular.h,v 1.2 2004/06/14 20:43:30 dondosha Exp $
+* ===========================================================================
+*
+* PUBLIC DOMAIN NOTICE
+* National Center for Biotechnology Information
+*
+* This software/database is a "United States Government Work" under the
+* terms of the United States Copyright Act. It was written as part of
+* the author's offical duties as a United States Government employee and
+* thus cannot be copyrighted. This software/database is freely available
+* to the public for use. The National Library of Medicine and the U.S.
+* Government have not placed any restriction on its use or reproduction.
+*
+* Although all reasonable efforts have been taken to ensure the accuracy
+* and reliability of the software and data, the NLM and the U.S.
+* Government do not and cannot warrant the performance or results that
+* may be obtained by using this software or data. The NLM and the U.S.
+* Government disclaim all warranties, express or implied, including
+* warranties of performance, merchantability or fitness for any particular
+* purpose.
+*
+* Please cite the author in any work or product based on this material.
+*
+* ===========================================================================*/
+
+/*****************************************************************************
+
+File name: blast_tabular.h
+
+Author: Ilya Dondoshansky
+
+Contents: Functions needed for formatting of BLAST results
+
+******************************************************************************
+ * $Revision: 1.2 $
+ * */
+#ifndef __BLAST_TABULAR__
+#define __BLAST_TABULAR__
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#ifndef NCBI_C_TOOLKIT
+#define NCBI_C_TOOLKIT
+#endif
+
+#include <ncbi.h>
+#include <asn.h>
+#include <algo/blast/core/blast_hits.h>
+#include <algo/blast/core/lookup_wrap.h>
+#include <algo/blast/core/blast_seqsrc.h>
+#include <algo/blast/core/blast_hspstream.h>
+#include <algo/blast/core/blast_gapalign.h>
+#include <objloc.h>
+
+/** Data structure containing all information necessary for production of the
+ * tabular output.
+ */
+typedef struct BlastTabularFormatData {
+ Uint1 program; /**< Type of BLAST program */
+ BlastHSPStream* hsp_stream; /**< Source of the BLAST results */
+ BlastSeqSrc* seq_src; /**< Source of the subject sequences */
+ BLAST_SequenceBlk* query; /**< Query sequence */
+ BlastQueryInfo* query_info; /**< Query information, including context
+ offsets and effective lengths. */
+ BlastScoringParameters* score_params;
+ BlastExtensionParameters* ext_params;
+ BlastHitSavingParameters* hit_params;
+ BlastEffectiveLengthsParameters* eff_len_params;
+ Uint1* gen_code_string;
+ BlastGapAlignStruct* gap_align;
+ SeqLoc* query_slp; /**< Source of query sequences identifiers */
+ FILE* outfp; /**< Output stream */
+ Boolean perform_traceback; /**< Must gapped extension with traceback be
+ performed before formatting? */
+} BlastTabularFormatData;
+
+/** Function initializing the BlastTabularFormatData data structure fields. */
+BlastTabularFormatData*
+Blast_TabularFormatDataInit(Uint1 program, BlastHSPStream* hsp_stream,
+ BlastSeqSrc* seq_src, BLAST_SequenceBlk* query, BlastQueryInfo* query_info,
+ const BlastScoringOptions* scoring_options, BlastScoreBlk* sbp,
+ const BlastEffectiveLengthsOptions* eff_len_options,
+ const BlastExtensionOptions* ext_options,
+ const BlastHitSavingOptions* hit_options,
+ const BlastDatabaseOptions* db_options, SeqLoc* query_slp, FILE* outfp);
+
+/** Free the tabular formatting data structure and all its internally
+ * allocated substructures.
+ */
+void BlastTabularFormatDataFree(BlastTabularFormatData* tf_data);
+
+/** Driver for the thread producing tabular output. */
+void* Blast_TabularFormatThread(void* data);
+
+#ifdef __cplusplus
+}
+#endif
+#endif /* !__BLAST_TABULAR__ */
+
diff --git a/algo/blast/api/hspstream_queue.c b/algo/blast/api/hspstream_queue.c
new file mode 100644
index 00000000..ea5dac83
--- /dev/null
+++ b/algo/blast/api/hspstream_queue.c
@@ -0,0 +1,182 @@
+/* $Id: hspstream_queue.c,v 1.2 2004/06/08 17:46:35 dondosha Exp $
+ * ===========================================================================
+ *
+ * PUBLIC DOMAIN NOTICE
+ * National Center for Biotechnology Information
+ *
+ * This software/database is a "United States Government Work" under the
+ * terms of the United States Copyright Act. It was written as part of
+ * the author's official duties as a United States Government employee and
+ * thus cannot be copyrighted. This software/database is freely available
+ * to the public for use. The National Library of Medicine and the U.S.
+ * Government have not placed any restriction on its use or reproduction.
+ *
+ * Although all reasonable efforts have been taken to ensure the accuracy
+ * and reliability of the software and data, the NLM and the U.S.
+ * Government do not and cannot warrant the performance or results that
+ * may be obtained by using this software or data. The NLM and the U.S.
+ * Government disclaim all warranties, express or implied, including
+ * warranties of performance, merchantability or fitness for any particular
+ * purpose.
+ *
+ * Please cite the author in any work or product based on this material.
+ *
+ * ===========================================================================
+ *
+ * Author: Ilya Dondoshansky
+ *
+ */
+
+/** @file hspstream_queue.c
+ * Implementation of the BlastHSPStream interface for producing BLAST results
+ * on the fly.
+ */
+
+static char const rcsid[] =
+ "$Id: hspstream_queue.c,v 1.2 2004/06/08 17:46:35 dondosha Exp $";
+
+
+#include <algo/blast/core/blast_hits.h>
+#include <algo/blast/api/hspstream_queue.h>
+#include <ncbithr.h>
+
+/** Default hit saving stream methods */
+
+static BlastHSPStream*
+BlastHSPListQueueFree(BlastHSPStream* hsp_stream)
+{
+ BlastHSPListQueueData* stream_data =
+ (BlastHSPListQueueData*) GetData(hsp_stream);
+ ListNode* node;
+
+ NlmSemaDestroy(stream_data->m_resultsSema);
+ NlmMutexDestroy(stream_data->m_resultsMutex);
+
+ for (node = stream_data->m_queueStart; node; node = node->next) {
+ node->ptr = (void*) Blast_HSPListFree((BlastHSPList*)node->ptr);
+ }
+ stream_data->m_queueStart = ListNodeFree(stream_data->m_queueStart);
+ sfree(stream_data);
+ sfree(hsp_stream);
+ return NULL;
+}
+
+static int
+BlastHSPListQueueRead(BlastHSPStream* hsp_stream,
+ BlastHSPList** hsp_list_out)
+{
+ BlastHSPListQueueData* stream_data =
+ (BlastHSPListQueueData*) GetData(hsp_stream);
+ int status = kBlastHSPStream_Error;
+
+ /* Lock the mutex */
+ NlmMutexLockEx(&stream_data->m_resultsMutex);
+
+ if (!stream_data->m_writingDone) {
+ while (!stream_data->m_writingDone && !stream_data->m_queueStart) {
+ /* Decrement the semaphore count to 0, then wait for it to be
+ * incremented. Note that mutex must be locked whenever the
+ * contents of the stream are checked, but it must be unlocked
+ * for the semaphore wait. */
+ NlmMutexUnlock(stream_data->m_resultsMutex);
+ NlmSemaWait(stream_data->m_resultsSema);
+ NlmMutexLockEx(&stream_data->m_resultsMutex);
+ }
+ }
+
+ if (!stream_data->m_queueStart) {
+ /* Nothing in the queue, but no more writing to the queue is expected. */
+ *hsp_list_out = NULL;
+ status = kBlastHSPStream_Eof;
+ } else {
+ ListNode* start_node = stream_data->m_queueStart;
+
+ *hsp_list_out = (BlastHSPList*) start_node->ptr;
+
+ stream_data->m_queueStart = start_node->next;
+ start_node->next = NULL;
+ ListNodeFree(start_node);
+ if (!stream_data->m_queueStart)
+ stream_data->m_queueEnd = NULL;
+ status = kBlastHSPStream_Success;
+ }
+
+ NlmMutexUnlock(stream_data->m_resultsMutex);
+
+ return status;
+}
+
+static int
+BlastHSPListQueueWrite(BlastHSPStream* hsp_stream,
+ BlastHSPList** hsp_list)
+{
+ BlastHSPListQueueData* stream_data =
+ (BlastHSPListQueueData*) GetData(hsp_stream);
+
+ /* If input is empty, don't do anything, but return success */
+ if (*hsp_list == NULL)
+ return kBlastHSPStream_Success;
+
+ /* If stream is closed for writing, return error */
+ if (stream_data->m_writingDone)
+ return kBlastHSPStream_Error;
+
+ NlmMutexLockEx(&stream_data->m_resultsMutex);
+ stream_data->m_queueEnd =
+ ListNodeAddPointer(&stream_data->m_queueEnd, 0, (void*)(*hsp_list));
+ if (!stream_data->m_queueStart)
+ stream_data->m_queueStart = stream_data->m_queueEnd;
+ /* Free the caller from this pointer's ownership. */
+ *hsp_list = NULL;
+ /* Increment the semaphore count. */
+ NlmSemaPost(stream_data->m_resultsSema);
+ NlmMutexUnlock(stream_data->m_resultsMutex);
+
+ return kBlastHSPStream_Success;
+}
+
+static void
+BlastHSPListQueueClose(BlastHSPStream* hsp_stream)
+{
+ BlastHSPListQueueData* stream_data =
+ (BlastHSPListQueueData*) GetData(hsp_stream);
+ NlmMutexLockEx(&stream_data->m_resultsMutex);
+ stream_data->m_writingDone = TRUE;
+ /* Increment the semaphore count so the reading thread can get out of
+ * the waiting state and check the m_writingDone variable. */
+ NlmSemaPost(stream_data->m_resultsSema);
+ NlmMutexUnlock(stream_data->m_resultsMutex);
+}
+
+static BlastHSPStream*
+BlastHSPListQueueNew(BlastHSPStream* hsp_stream, void* args)
+{
+ BlastHSPStreamFunctionPointerTypes fnptr;
+
+ fnptr.dtor = &BlastHSPListQueueFree;
+ SetMethod(hsp_stream, eDestructor, fnptr);
+ fnptr.method = &BlastHSPListQueueRead;
+ SetMethod(hsp_stream, eRead, fnptr);
+ fnptr.method = &BlastHSPListQueueWrite;
+ SetMethod(hsp_stream, eWrite, fnptr);
+ fnptr.closeFn = &BlastHSPListQueueClose;
+ SetMethod(hsp_stream, eClose, fnptr);
+
+ SetData(hsp_stream, args);
+ return hsp_stream;
+}
+
+BlastHSPStream* Blast_HSPListQueueInit()
+{
+ BlastHSPListQueueData* stream_data =
+ (BlastHSPListQueueData*) calloc(1, sizeof(BlastHSPListQueueData));
+ BlastHSPStreamNewInfo info;
+
+ /* At the start of the search there is nothing in the results queue, so
+ * initialize the semaphore count with 0. */
+ stream_data->m_resultsSema = NlmSemaInit(0);
+ info.constructor = &BlastHSPListQueueNew;
+ info.ctor_argument = (void*)stream_data;
+
+ return BlastHSPStreamNew(&info);
+}
diff --git a/algo/blast/api/hspstream_queue.h b/algo/blast/api/hspstream_queue.h
new file mode 100644
index 00000000..1b00b0e7
--- /dev/null
+++ b/algo/blast/api/hspstream_queue.h
@@ -0,0 +1,64 @@
+/* $Id: hspstream_queue.h,v 1.2 2004/06/08 17:46:35 dondosha Exp $
+ * ===========================================================================
+ *
+ * PUBLIC DOMAIN NOTICE
+ * National Center for Biotechnology Information
+ *
+ * This software/database is a "United States Government Work" under the
+ * terms of the United States Copyright Act. It was written as part of
+ * the author's official duties as a United States Government employee and
+ * thus cannot be copyrighted. This software/database is freely available
+ * to the public for use. The National Library of Medicine and the U.S.
+ * Government have not placed any restriction on its use or reproduction.
+ *
+ * Although all reasonable efforts have been taken to ensure the accuracy
+ * and reliability of the software and data, the NLM and the U.S.
+ * Government do not and cannot warrant the performance or results that
+ * may be obtained by using this software or data. The NLM and the U.S.
+ * Government disclaim all warranties, express or implied, including
+ * warranties of performance, merchantability or fitness for any particular
+ * purpose.
+ *
+ * Please cite the author in any work or product based on this material.
+ *
+ * ===========================================================================
+ *
+ * Author: Ilya Dondoshansky
+ *
+ */
+
+/** @file hspstream_queue.h
+ * Implementation of the BlastHSPStream interface for producing results on the
+ * fly.
+ */
+
+#ifndef HSPSTREAM_QUEUE_H
+#define HSPSTREAM_QUEUE_H
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include <ncbithr.h>
+#include <algo/blast/core/blast_options.h>
+#include <algo/blast/core/blast_hits.h>
+#include <algo/blast/core/blast_seqsrc.h>
+#include <algo/blast/core/blast_hspstream.h>
+
+/** Data structure for the queue implementation of BlastHSPStream */
+typedef struct BlastHSPListQueueData {
+ ListNode* m_queueStart;
+ ListNode* m_queueEnd;
+ Boolean m_writingDone;
+ TNlmMutex m_resultsMutex;
+ TNlmSemaphore m_resultsSema;
+} BlastHSPListQueueData;
+
+/** Function to initialize the queue implementation of BlastHSPStream */
+BlastHSPStream* Blast_HSPListQueueInit(void);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* HSPSTREAM_QUEUE_H */
diff --git a/algo/blast/api/multiseq_src.c b/algo/blast/api/multiseq_src.c
index 31018649..c8e57990 100644
--- a/algo/blast/api/multiseq_src.c
+++ b/algo/blast/api/multiseq_src.c
@@ -1,4 +1,4 @@
-/* $Id: multiseq_src.c,v 1.5 2004/04/28 19:50:02 dondosha Exp $
+/* $Id: multiseq_src.c,v 1.6 2004/06/08 17:46:35 dondosha Exp $
* ===========================================================================
*
* PUBLIC DOMAIN NOTICE
@@ -31,7 +31,7 @@
*
*/
-static char const rcsid[] = "$Id: multiseq_src.c,v 1.5 2004/04/28 19:50:02 dondosha Exp $";
+static char const rcsid[] = "$Id: multiseq_src.c,v 1.6 2004/06/08 17:46:35 dondosha Exp $";
#include <algo/blast/api/multiseq_src.h>
#include <algo/blast/core/blast_util.h>
@@ -57,7 +57,7 @@ static MultiSeqInfo* MultiSeqInfoNew(const SeqLoc* seqloc_list, Uint1 program)
index < num_seqs; ++index, seqloc_ptr = seqloc_ptr->next) {
retval->seqloc_array[index] = seqloc_ptr;
BLAST_SetUpSubject(program, seqloc_ptr, &retval->seqblk_array[index]);
- max_length = MAX(max_length, retval->seqblk_array[index]->length);
+ max_length = MAX(max_length, (Uint4)retval->seqblk_array[index]->length);
}
retval->max_length = max_length;
diff --git a/algo/blast/api/seqsrc_readdb.c b/algo/blast/api/seqsrc_readdb.c
index 1688c3cf..935dcb55 100644
--- a/algo/blast/api/seqsrc_readdb.c
+++ b/algo/blast/api/seqsrc_readdb.c
@@ -1,4 +1,4 @@
-/* $Id: seqsrc_readdb.c,v 1.27 2004/04/28 19:39:01 dondosha Exp $
+/* $Id: seqsrc_readdb.c,v 1.29 2004/06/07 17:14:58 dondosha Exp $
* ===========================================================================
*
* PUBLIC DOMAIN NOTICE
@@ -30,7 +30,7 @@
*
*/
-static char const rcsid[] = "$Id: seqsrc_readdb.c,v 1.27 2004/04/28 19:39:01 dondosha Exp $";
+static char const rcsid[] = "$Id: seqsrc_readdb.c,v 1.29 2004/06/07 17:14:58 dondosha Exp $";
#include <algo/blast/api/seqsrc_readdb.h>
#include <algo/blast/core/blast_def.h>
@@ -197,8 +197,10 @@ static Int2 ReaddbRetSequence(void* readdb_handle, void* args)
ASSERT(readdb_args);
- if (readdb_args->seq->sequence_start_allocated)
+ if (readdb_args->seq->sequence_start_allocated) {
sfree(readdb_args->seq->sequence_start);
+ readdb_args->seq->sequence_start_allocated = FALSE;
+ }
return 0;
}
@@ -215,21 +217,26 @@ static char* ReaddbGetSeqIdStr(void* readdb_handle, void* args)
ReadDBFILEPtr rdfp = (ReadDBFILEPtr) readdb_handle;
Int4* oid = (Int4*) args;
SeqIdPtr sip = NULL;
+ char* descr = NULL;
char *seqid_str = NULL;
if (!rdfp || !oid)
return NULL;
- if ( !(seqid_str = (char*) malloc(sizeof(char)*SEQIDLEN_MAX)))
- return NULL;
-
- if (!readdb_get_descriptor(rdfp, *oid, &sip, NULL)) {
+ if (!readdb_get_descriptor(rdfp, *oid, &sip, &descr)) {
sfree(seqid_str);
return NULL;
}
- SeqIdWrite(sip, seqid_str, PRINTID_FASTA_LONG, SEQIDLEN_MAX-1);
-
+ if (sip->choice != SEQID_GENERAL ||
+ strcmp(((DbtagPtr)sip->data.ptrvalue)->db, "BL_ORD_ID")) {
+ if ( !(seqid_str = (char*) malloc(sizeof(char)*SEQIDLEN_MAX)))
+ return NULL;
+ SeqIdWrite(sip, seqid_str, PRINTID_FASTA_LONG, SEQIDLEN_MAX-1);
+ sfree(descr);
+ } else {
+ seqid_str = strtok(descr, " \t\n\r");
+ }
sip = SeqIdSetFree(sip);
return seqid_str;
@@ -445,6 +452,7 @@ BlastSeqSrc* ReaddbSeqSrcNew(BlastSeqSrc* retval, void* args)
/* Initialize the BlastSeqSrc structure fields with user-defined function
* pointers and rdfp */
SetDeleteFnPtr(retval, &ReaddbSeqSrcFree);
+ SetCopyFnPtr(retval, &ReaddbSeqSrcCopy);
SetDataStructure(retval, (void*) rdfp);
SetGetNumSeqs(retval, &ReaddbGetNumSeqs);
SetGetMaxSeqLen(retval, &ReaddbGetMaxLength);
@@ -478,9 +486,11 @@ BlastSeqSrc* ReaddbSeqSrcNew(BlastSeqSrc* retval, void* args)
while (rdfp && rdfp->stop < rargs->final_db_seq)
rdfp = rdfp->next;
/* Set last sequence for this and all subsequent rdfp's to the one
- in the arguments, making the subsequent rdfp's ranges empty. */
+ in the arguments, making the subsequent rdfp's ranges empty.
+ Note that final_db_seq in arguments is 1 beyond the last sequence
+ number to search. */
for ( ; rdfp; rdfp = rdfp->next)
- rdfp->stop = rargs->final_db_seq;
+ rdfp->stop = rargs->final_db_seq - 1;
}
return retval;
@@ -495,6 +505,20 @@ BlastSeqSrc* ReaddbSeqSrcFree(BlastSeqSrc* bssp)
return NULL;
}
+BlastSeqSrc* ReaddbSeqSrcCopy(BlastSeqSrc* bssp)
+{
+ ReadDBFILE* rdfp = NULL;
+
+ if (!bssp)
+ return NULL;
+
+ rdfp = readdb_attach((ReadDBFILEPtr)GetDataStructure(bssp));
+
+ SetDataStructure(bssp, (void*) rdfp);
+
+ return bssp;
+}
+
BlastSeqSrc*
ReaddbBlastSeqSrcInit(const char* dbname, Boolean is_prot, int first_seq,
int last_seq, void* extra_arg)
diff --git a/algo/blast/api/seqsrc_readdb.h b/algo/blast/api/seqsrc_readdb.h
index 0121a380..482222af 100644
--- a/algo/blast/api/seqsrc_readdb.h
+++ b/algo/blast/api/seqsrc_readdb.h
@@ -1,4 +1,4 @@
-/* $Id: seqsrc_readdb.h,v 1.9 2004/02/18 19:38:20 dondosha Exp $
+/* $Id: seqsrc_readdb.h,v 1.10 2004/06/07 17:15:18 dondosha Exp $
* ===========================================================================
*
* PUBLIC DOMAIN NOTICE
@@ -69,6 +69,13 @@ BlastSeqSrc* ReaddbSeqSrcNew(BlastSeqSrc* bssp, void* args);
*/
BlastSeqSrc* ReaddbSeqSrcFree(BlastSeqSrc* bssp);
+/** Readdb sequence source copier:
+ * creates a new copy of the ReadDBFILE structure by calling readdb_attach.
+ * @param bssp BlastSeqSrc structure to copy [in]
+ * @return New BlastSeqSrc structure
+ */
+BlastSeqSrc* ReaddbSeqSrcCopy(BlastSeqSrc* bssp);
+
/** Initialize the sequence source structure.
* @param dbname BLAST database name [in]
* @param is_prot Is this a protein or nucleotide database? [in]
diff --git a/algo/blast/api/twoseq_api.c b/algo/blast/api/twoseq_api.c
index 9f3294ab..1a866bae 100644
--- a/algo/blast/api/twoseq_api.c
+++ b/algo/blast/api/twoseq_api.c
@@ -1,4 +1,4 @@
-/* $Id: twoseq_api.c,v 1.6 2004/05/05 15:30:13 dondosha Exp $
+/* $Id: twoseq_api.c,v 1.13 2004/06/08 17:47:24 dondosha Exp $
***************************************************************************
* *
* COPYRIGHT NOTICE *
@@ -40,10 +40,15 @@
#include <algo/blast/core/blast_util.h>
#include <algo/blast/core/blast_engine.h>
#include <algo/blast/core/mb_lookup.h>
+#include <algo/blast/core/blast_filter.h>
+#include <algo/blast/core/hspstream_collector.h>
#include <algo/blast/api/multiseq_src.h>
#include <algo/blast/api/blast_seqalign.h>
#include <algo/blast/api/blast_seq.h>
#include <algo/blast/api/twoseq_api.h>
+#include <algo/blast/api/blast_returns.h>
+/* For the AdjustOffSetsInSeqalign function */
+#include <sequtil.h>
Int2 BLAST_SummaryOptionsInit(BLAST_SummaryOptions **options)
{
@@ -64,6 +69,9 @@ Int2 BLAST_SummaryOptionsInit(BLAST_SummaryOptions **options)
new_options->gapped_calculation = TRUE;
new_options->nucleotide_match = 1;
new_options->nucleotide_mismatch = -3;
+ new_options->word_threshold = 0;
+ new_options->init_seed_method = eDefaultSeedType;
+
*options = new_options;
return 0;
}
@@ -87,7 +95,7 @@ BLAST_FillOptions(Uint1 program_number,
BlastEffectiveLengthsOptions* eff_len_options,
PSIBlastOptions* psi_options,
BlastDatabaseOptions* db_options,
- BlastSeqSrc* seq_src, Int4 query_length,
+ Int4 query_length,
Int4 subject_length, RPSInfo *rps_info)
{
Boolean do_megablast = FALSE;
@@ -107,11 +115,12 @@ BLAST_FillOptions(Uint1 program_number,
/* If one of the sequences is large enough,
set up a megablast search */
- if (query_length > MEGABLAST_CUTOFF ||
+ if (basic_options->hint == eFast ||
+ query_length > MEGABLAST_CUTOFF ||
subject_length > MEGABLAST_CUTOFF) {
do_megablast = TRUE;
if (basic_options->gapped_calculation)
- greedy_align = TRUE; /* one-pass, no ungapped */
+ greedy_align = 1; /* one-pass, no ungapped */
}
/* For a megablast search or a blastn search with
@@ -122,20 +131,21 @@ BLAST_FillOptions(Uint1 program_number,
/* If megablast was turned on but the input indicates
a sensitive search is desired, switch to discontiguous
- megablast (hardwired to the 12-of-21 optimal template) */
+ megablast (hardwired to the 11-of-21 optimal template) */
- if (word_size == 0 && do_megablast &&
- basic_options->hint == eSensitive) {
- word_size = 12;
+ if (do_megablast && basic_options->hint == eSensitive) {
+ if (word_size == 0)
+ word_size = 11;
do_discontig = TRUE;
do_ag_blast = FALSE;
}
}
+
BLAST_FillLookupTableOptions(lookup_options,
program_number,
do_megablast,
- 0, /* default threshold */
+ basic_options->word_threshold,
word_size,
do_ag_blast,
0, /* no variable wordsize */
@@ -158,24 +168,27 @@ BLAST_FillOptions(Uint1 program_number,
BLAST_FillInitialWordOptions(word_options,
program_number,
- greedy_align,
+ (Boolean)greedy_align,
0, /* default for ungapped extensions */
0, /* no variable wordsize */
do_ag_blast,
do_megablast,
0); /* default ungapped X dropoff */
+ /* If we need to enforce a single-hit method, reset window size to 0.
+ To enforce two-hit method, set window size to a default non-zero
+ value */
+ if (basic_options->init_seed_method == eOneHit)
+ word_options->window_size = 0;
+ else if (basic_options->init_seed_method == eTwoHits)
+ word_options->window_size = BLAST_WINDOW_SIZE_PROT;
+
BLAST_FillExtensionOptions(ext_options,
program_number,
greedy_align,
- 0, /* default gapped X dropoff */
+ basic_options->gap_x_dropoff,
0); /* default final X dropoff */
- if (greedy_align == 1) {
- ext_options->algorithm_type = EXTEND_GREEDY;
- word_options->ungapped_extension = FALSE;
- }
-
if (basic_options->matrix == NULL)
matrix = "BLOSUM62";
else
@@ -183,12 +196,12 @@ BLAST_FillOptions(Uint1 program_number,
BLAST_FillScoringOptions(score_options,
program_number,
- greedy_align,
+ (Boolean)greedy_align,
basic_options->nucleotide_mismatch,
basic_options->nucleotide_match,
matrix,
- 0, /* default gap open penalty */
- 0); /* default gap extend penalty */
+ basic_options->gap_open,
+ basic_options->gap_extend);
score_options->gapped_calculation = basic_options->gapped_calculation;
@@ -197,23 +210,95 @@ BLAST_FillOptions(Uint1 program_number,
0); /* default number of alignments saved */
hit_options->percent_identity = 0; /* no percent identity cutoff */
-
- BLAST_FillEffectiveLengthsOptions(eff_len_options,
- 1, /* one sequence */
- subject_length, /* this long */
- 0); /* default search space */
+
+ eff_len_options->db_length = basic_options->db_length;
+
return 0;
}
Int2
-BLAST_TwoSequencesSearch(const BLAST_SummaryOptions *basic_options,
- BioseqPtr bsp1, BioseqPtr bsp2, SeqAlign **seqalign_out)
+BLAST_TwoSequencesSearch(BLAST_SummaryOptions *basic_options,
+ BioseqPtr bsp1, BioseqPtr bsp2,
+ SeqAlign **seqalign_out)
{
+ Uint1 program_type = blast_type_undefined;
SeqLocPtr query_slp = NULL; /* sequence variables */
SeqLocPtr subject_slp = NULL;
+ Boolean seq1_is_aa, seq2_is_aa;
+ Int2 status = 0;
+
+ /* sanity checks */
+
+ *seqalign_out = NULL;
+ if (bsp1 == NULL || bsp2 == NULL)
+ return 0;
+
+ seq1_is_aa = ISA_aa(bsp1->mol);
+ seq2_is_aa = ISA_aa(bsp2->mol);
+
+ /* Find program type consistent with the sequences. */
+ if (!seq1_is_aa && !seq2_is_aa) {
+ if (basic_options->program == eTblastx)
+ program_type = blast_type_tblastx;
+ else
+ program_type = blast_type_blastn;
+ } else if (seq1_is_aa && seq2_is_aa) {
+ program_type = blast_type_blastp;
+ } else if (!seq1_is_aa && seq2_is_aa) {
+ program_type = blast_type_blastx;
+ } else if (seq1_is_aa && !seq2_is_aa) {
+ program_type = blast_type_tblastn;
+ }
+
+ /* Check if program type in options is consistent with the one determined
+ from sequences. */
+ if (basic_options->program == eChoose)
+ basic_options->program = program_type;
+ else if (basic_options->program != program_type)
+ return -1;
+
+ /* Convert the bioseqs into seqlocs. */
+
+ ValNodeAddPointer(&query_slp, SEQLOC_WHOLE,
+ SeqIdDup(SeqIdFindBest(bsp1->id, SEQID_GI)));
+ if (!query_slp)
+ return -1;
+ ValNodeAddPointer(&subject_slp, SEQLOC_WHOLE,
+ SeqIdDup(SeqIdFindBest(bsp2->id, SEQID_GI)));
+ if (!subject_slp)
+ return -1;
+
+ status = BLAST_TwoSeqLocSets(basic_options, query_slp, subject_slp,
+ seqalign_out, NULL, NULL);
+ SeqLocFree(query_slp);
+ SeqLocFree(subject_slp);
+
+ return status;
+}
+
+static Int4 SeqLocListLen(SeqLoc* seqloc)
+{
+ Int4 length = 0;
+
+ for ( ; seqloc; seqloc = seqloc->next)
+ length += SeqLocLen(seqloc);
+
+ return length;
+}
+
+/** Compares one list of SeqLoc's against another list of SeqLoc's using the
+ * BLAST algorithm.
+ */
+Int2
+BLAST_TwoSeqLocSets(const BLAST_SummaryOptions *basic_options,
+ SeqLoc* query_seqloc, SeqLoc* subject_seqloc,
+ SeqAlign **seqalign_out,
+ SeqLoc** filter_out,
+ BLAST_SummaryReturn* *extra_returns)
+{
+ Uint1 program_type;
BlastSeqSrc *seq_src = NULL;
BLAST_SequenceBlk *query = NULL;
- Uint1 program_type;
BlastQueryInfo* query_info = NULL;
ListNode* lookup_segments = NULL; /* query filtering structures */
@@ -229,39 +314,31 @@ BLAST_TwoSequencesSearch(const BLAST_SummaryOptions *basic_options,
BlastEffectiveLengthsOptions* eff_len_options = NULL;
BlastScoreBlk* sbp = NULL;
BlastHSPResults* results = NULL;
- BlastReturnStat* return_stats = NULL;
+ BlastDiagnostics* diagnostics = NULL;
PSIBlastOptions* psi_options = NULL;
BlastDatabaseOptions* db_options = NULL;
RPSInfo *rps_info = NULL;
-
- Int2 status;
-
- /* sanity checks */
-
- *seqalign_out = NULL;
- if (bsp1 == NULL || bsp2 == NULL)
- return 0;
-
- if (bsp1->mol != bsp2->mol)
- return 0;
-
- /* decide which blast program to execute */
+ Int2 status = 0;
+ BlastHSPStream* hsp_stream;
switch(basic_options->program) {
- case eBlastp:
- program_type = blast_type_blastp;
- break;
case eBlastn:
- program_type = blast_type_blastn;
- break;
- case eChoose:
- if (ISA_aa(bsp1->mol))
- program_type = blast_type_blastp;
- else
- program_type = blast_type_blastn;
- break;
+ program_type = blast_type_blastn;
+ break;
+ case eBlastp:
+ program_type = blast_type_blastp;
+ break;
+ case eBlastx:
+ program_type = blast_type_blastx;
+ break;
+ case eTblastn:
+ program_type = blast_type_tblastn;
+ break;
+ case eTblastx:
+ program_type = blast_type_tblastx;
+ break;
default:
- return -1;
+ return -1;
}
/* fill in the engine-specific options */
@@ -273,36 +350,35 @@ BLAST_TwoSequencesSearch(const BLAST_SummaryOptions *basic_options,
if (status != 0)
goto bail_out;
+ if (program_type == blast_type_tblastn ||
+ program_type == blast_type_tblastx) {
+ if ((status = BLAST_GeneticCodeFind(db_options->genetic_code,
+ &db_options->gen_code_string)))
+ return status;
+ }
+
+ seq_src = MultiSeqSrcInit(subject_seqloc, program_type);
+ if (seq_src == NULL)
+ goto bail_out;
+
status = BLAST_FillOptions(program_type, basic_options,
lookup_options, query_options, word_options,
ext_options, hit_options, score_options,
eff_len_options, psi_options, db_options,
- NULL, bsp1->length, bsp2->length, rps_info);
+ SeqLocListLen(query_seqloc),
+ BLASTSeqSrcGetMaxSeqLen(seq_src),
+ rps_info);
if (status != 0)
goto bail_out;
- /* Convert the bioseqs into seqlocs. */
-
- ValNodeAddPointer(&query_slp, SEQLOC_WHOLE,
- SeqIdDup(SeqIdFindBest(bsp1->id, SEQID_GI)));
- ValNodeAddPointer(&subject_slp, SEQLOC_WHOLE,
- SeqIdDup(SeqIdFindBest(bsp2->id, SEQID_GI)));
- if (query_slp == NULL || subject_slp == NULL) {
- status = -1;
- goto bail_out;
- }
/* convert the seqlocs into SequenceBlks, and fill in query_info */
- status = BLAST_SetUpQuery(program_type, query_slp, query_options,
+ status = BLAST_SetUpQuery(program_type, query_seqloc, query_options,
&query_info, &query);
if (status != 0)
goto bail_out;
- seq_src = MultiSeqSrcInit(subject_slp, program_type);
- if (seq_src == NULL)
- goto bail_out;
-
/* perform final setup */
status = BLAST_ValidateOptions(program_type, ext_options,
@@ -311,37 +387,58 @@ BLAST_TwoSequencesSearch(const BLAST_SummaryOptions *basic_options,
goto bail_out;
status = BLAST_MainSetUp(program_type, query_options, score_options,
- hit_options, query, query_info, &lookup_segments,
+ hit_options, query, query_info, 1.0, &lookup_segments,
&filter_loc, &sbp, NULL);
if (status != 0)
goto bail_out;
- return_stats = (BlastReturnStat*) calloc(1, sizeof(BlastReturnStat));
- if (return_stats == NULL)
- goto bail_out;
+ if (extra_returns) {
+ if ((diagnostics = Blast_DiagnosticsInit()) == NULL)
+ goto bail_out;
+ }
- Blast_HSPResultsInit(query_info->num_queries, &results);
LookupTableWrapInit(query, lookup_options,
lookup_segments, sbp, &lookup_wrap, NULL);
+ /* Initialize the HSPList collector stream. Results should not be sorted
+ before reading from it. */
+ hsp_stream =
+ Blast_HSPListCollectorInit(program_type, hit_options,
+ query_info->num_queries, FALSE);
/* finally, do the search */
status = BLAST_SearchEngine(program_type, query, query_info,
seq_src, sbp, score_options, lookup_wrap, word_options,
ext_options, hit_options, eff_len_options,
- psi_options, db_options, results, return_stats);
+ psi_options, db_options, hsp_stream, diagnostics, &results);
+
+ hsp_stream = BlastHSPStreamFree(hsp_stream);
if (status != 0)
goto bail_out;
/* Convert results to the SeqAlign form */
- status = BLAST_ResultsToSeqAlign(program_type, results, query_slp, NULL,
- subject_slp, score_options, sbp, score_options->gapped_calculation,
- seqalign_out);
+ status = BLAST_ResultsToSeqAlign(program_type, results, query_seqloc,
+ seq_src, score_options->gapped_calculation,
+ score_options->is_ooframe, seqalign_out);
bail_out:
- if (return_stats)
- sfree(return_stats);
+ AdjustOffSetsInSeqAlign(*seqalign_out, query_seqloc, subject_seqloc);
+
+ if (!status && extra_returns) {
+ Blast_SummaryReturnFill(program_type, score_options, sbp,
+ lookup_options, word_options, ext_options,
+ hit_options, eff_len_options, query_info,
+ seq_src, diagnostics, extra_returns);
+ }
+
+ if (filter_out) {
+ *filter_out =
+ BlastMaskLocToSeqLoc(program_type, filter_loc, query_seqloc);
+ }
+
+ Blast_DiagnosticsFree(diagnostics);
+ BlastMaskLocFree(filter_loc);
BlastSeqSrcFree(seq_src);
LookupTableWrapFree(lookup_wrap);
ListNodeFreeData(lookup_segments);
@@ -358,8 +455,9 @@ bail_out:
BlastEffectiveLengthsOptionsFree(eff_len_options);
PSIBlastOptionsFree(psi_options);
BlastDatabaseOptionsFree(db_options);
- SeqLocFree(query_slp);
- SeqLocFree(subject_slp);
return status;
}
+
+
+
diff --git a/algo/blast/api/twoseq_api.h b/algo/blast/api/twoseq_api.h
index f449b699..4ba5f768 100644
--- a/algo/blast/api/twoseq_api.h
+++ b/algo/blast/api/twoseq_api.h
@@ -1,4 +1,4 @@
-/* $Id: twoseq_api.h,v 1.2 2004/03/24 19:14:21 papadopo Exp $
+/* $Id: twoseq_api.h,v 1.3 2004/05/14 17:24:03 dondosha Exp $
***************************************************************************
* *
* COPYRIGHT NOTICE *
@@ -52,9 +52,13 @@
* megablast with word size 12 is used.
*/
enum blast_type {
- eChoose = 0, /**< blastn for nuc. sequences, blastp otherwise */
+ eChoose = 0, /**< Choose type of search by sequences molecule type:
+ n-n=blastn, p-p=blastp, n-p=blastx, p-n=tblastn */
eBlastn = 1, /**< blastn or megablast (determined automatically) */
- eBlastp = 2 /**< blastp search on two protein sequences */
+ eBlastp = 2, /**< blastp search between protein sequences */
+ eBlastx = 3, /**< blastx for nucleotide vs protein sequences */
+ eTblastn = 4, /**< tblastn for protein vs nucleotide sequences */
+ eTblastx = 5 /**< tblastx for translated nucleotide sequences */
};
/**
@@ -66,6 +70,14 @@ enum blast_hint {
eFast = 1 /**< trade off sensitivity for speed */
};
+typedef enum seed_type {
+ eDefaultSeedType = 0, /**< BLAST will decide which method to use based on
+ program and other information. */
+ eOneHit = 1, /**< Require only one initial hit for extension */
+ eTwoHits = 2 /**< Require more than one hit within a window
+ for extension */
+} seed_type;
+
/**
* The main user-visible setup structure for the API. This
* only makes a (small) subset of the complete options available
@@ -99,8 +111,43 @@ typedef struct {
for matching letters (default 1) */
Int4 nucleotide_mismatch; /**< For nucleotide searches, the penalty
for mismatching letters (default -3) */
+ Int4 gap_open; /**< Cost of opening a gap. Default=0, invokes
+ default values: 5 for nucleotide;
+ depends on matrix for protein search.*/
+ Int4 gap_extend; /**< Cost of extending a gap. Default=0,
+ invokes default values: 2 for nucleotide;
+ depends on matrix for protein search.*/
+ Int4 gap_x_dropoff; /**< Dropoff value for the gapped extension.
+ Default=0, invokes default values. */
+ double db_length; /**< Database length to use in statistical
+ calculations.
+ Default=0 means "database length" is set
+ to the subject sequence length for each
+ subject sequence. */
+ Int4 word_threshold; /**< Threshold for finding neighboring words
+ in protein searches. */
+ seed_type init_seed_method; /**< Single-hit or multiple-hit choice of
+ initial seeds for extension. */
} BLAST_SummaryOptions;
+/** Small structure containing the just those Karlin-Altschul parameters needed
+ * for the BLAST formatting */
+typedef struct BLAST_KAParameters {
+ double Lambda;
+ double K;
+ double H;
+} BLAST_KAParameters;
+
+/** Structure holding all calculated data returned from a BLAST search other
+ * than the alignment.
+ */
+typedef struct BLAST_SummaryReturn {
+ BLAST_KAParameters* ka_params; /**< Ungapped Karlin-Altschul parameters */
+ BLAST_KAParameters* ka_params_gap;/**< Gapped Karlin-Altschul parameters */
+ char* params_buffer; /**< Buffer holding the bottom of BLAST report. */
+} BLAST_SummaryReturn;
+
+
/**
* Allocate storage for an API setup structure and set the
* default options for it.
@@ -129,9 +176,31 @@ BLAST_SummaryOptions* BLAST_SummaryOptionsFree(BLAST_SummaryOptions *options);
* If search failed or no alignments were found, set to NULL [out]
* @return 0 for a successful search, nonzero if search failed
*/
-Int2 BLAST_TwoSequencesSearch(const BLAST_SummaryOptions *options,
+Int2 BLAST_TwoSequencesSearch(BLAST_SummaryOptions *options,
Bioseq *bsp1,
Bioseq *bsp2,
SeqAlign **seqalign_out);
+/**
+ * Perform a BLAST search on the two input sequences and return
+ * the list of alignments the search generates
+ * @param options Structure describing how the search will be configured [in]
+ * @param seqloc1 The first list of sequences (queries) to be compared.
+ * Filtering is applied only to these sequences [in]
+ * @param seqloc2 The second list of sequences (subjects) to be compared [in]
+ * @param seqalign_out The list of alignments generated by the search.
+ * Alignments are sorted by query; then by subject among
+ * same query alignments.
+ * If search failed or no alignments were found,
+ * set to NULL [out]
+ * @param filter_out Masking locations [out]
+ * @param extra_returns Data needed to print the bottom of BLAST report [out]
+ * @return 0 for a successful search, nonzero if search failed
+ */
+Int2 BLAST_TwoSeqLocSets(const BLAST_SummaryOptions *options,
+ SeqLoc* seqloc1, SeqLoc* seqloc2,
+ SeqAlign **seqalign_out,
+ SeqLoc** filter_out,
+ BLAST_SummaryReturn* *extra_returns);
+
#endif /* !_TWOSEQ_API_H_ */
diff --git a/algo/blast/core/aa_ungapped.c b/algo/blast/core/aa_ungapped.c
index a5c131a7..b95d5b39 100644
--- a/algo/blast/core/aa_ungapped.c
+++ b/algo/blast/core/aa_ungapped.c
@@ -1,83 +1,81 @@
-/* $Id: aa_ungapped.c,v 1.29 2004/03/10 17:33:10 papadopo Exp $
-
-* ===========================================================================
-*
-* PUBLIC DOMAIN NOTICE
-* National Center for Biotechnology Information
-*
-* This software/database is a "United States Government Work" under the
-* terms of the United States Copyright Act. It was written as part of
-* the author's offical duties as a United States Government employee and
-* thus cannot be copyrighted. This software/database is freely available
-* to the public for use. The National Library of Medicine and the U.S.
-* Government have not placed any restriction on its use or reproduction.
-*
-* Although all reasonable efforts have been taken to ensure the accuracy
-* and reliability of the software and data, the NLM and the U.S.
-* Government do not and cannot warrant the performance or results that
-* may be obtained by using this software or data. The NLM and the U.S.
-* Government disclaim all warranties, express or implied, including
-* warranties of performance, merchantability or fitness for any particular
-* purpose.
-*
-* Please cite the author in any work or product based on this material.
-*
-* ===========================================================================
-
-*/
-
-static char const rcsid[] = "$Id: aa_ungapped.c,v 1.29 2004/03/10 17:33:10 papadopo Exp $";
+/* $Id: aa_ungapped.c,v 1.33 2004/06/08 17:30:06 dondosha Exp $
+ * ===========================================================================
+ *
+ * PUBLIC DOMAIN NOTICE
+ * National Center for Biotechnology Information
+ *
+ * This software/database is a "United States Government Work" under the
+ * terms of the United States Copyright Act. It was written as part of
+ * the author's offical duties as a United States Government employee and
+ * thus cannot be copyrighted. This software/database is freely available
+ * to the public for use. The National Library of Medicine and the U.S.
+ * Government have not placed any restriction on its use or reproduction.
+ *
+ * Although all reasonable efforts have been taken to ensure the accuracy
+ * and reliability of the software and data, the NLM and the U.S.
+ * Government do not and cannot warrant the performance or results that
+ * may be obtained by using this software or data. The NLM and the U.S.
+ * Government disclaim all warranties, express or implied, including
+ * warranties of performance, merchantability or fitness for any particular
+ * purpose.
+ *
+ * Please cite the author in any work or product based on this material.
+ *
+ * ===========================================================================
+ */
+
+/** @file aa_ungapped.c
+ * @todo FIXME Need description
+ */
+
+static char const rcsid[] =
+ "$Id: aa_ungapped.c,v 1.33 2004/06/08 17:30:06 dondosha Exp $";
#include <algo/blast/core/aa_ungapped.h>
-Int4 BlastAaWordFinder(BLAST_SequenceBlk* subject,
+Int2 BlastAaWordFinder(BLAST_SequenceBlk* subject,
BLAST_SequenceBlk* query,
LookupTableWrap* lut_wrap,
Int4** matrix,
const BlastInitialWordParameters* word_params,
- BLAST_ExtendWord* ewp,
- Uint4* query_offsets,
- Uint4* subject_offsets,
- Int4 offset_array_size,
- BlastInitHitList* init_hitlist)
+ Blast_ExtendWord* ewp,
+ Uint4* query_offsets,
+ Uint4* subject_offsets,
+ Int4 offset_array_size,
+ BlastInitHitList* init_hitlist,
+ BlastUngappedStats* ungapped_stats)
{
- Int4 hits=0;
+ Int2 status=0;
/* find the word hits and do ungapped extensions */
if (ewp->diag_table->multiple_hits)
{
- hits = BlastAaWordFinder_TwoHit(subject,
- query,
- lut_wrap,
- ewp->diag_table,
- matrix,
- word_params->cutoff_score,
- word_params->x_dropoff,
- query_offsets,
- subject_offsets,
- offset_array_size,
- init_hitlist);
+ status = BlastAaWordFinder_TwoHit(subject, query,
+ lut_wrap, ewp->diag_table,
+ matrix,
+ word_params->cutoff_score,
+ word_params->x_dropoff,
+ query_offsets, subject_offsets,
+ offset_array_size,
+ init_hitlist, ungapped_stats);
}
else
{
- hits = BlastAaWordFinder_OneHit(subject,
- query,
- lut_wrap,
- ewp->diag_table,
- matrix,
- word_params->cutoff_score,
- word_params->x_dropoff,
- query_offsets,
- subject_offsets,
- offset_array_size,
- init_hitlist);
+ status = BlastAaWordFinder_OneHit(subject, query,
+ lut_wrap, ewp->diag_table,
+ matrix,
+ word_params->cutoff_score,
+ word_params->x_dropoff,
+ query_offsets, subject_offsets,
+ offset_array_size,
+ init_hitlist, ungapped_stats);
}
- return hits;
+ return status;
}
-Int4 BlastAaWordFinder_TwoHit(const BLAST_SequenceBlk* subject,
+Int2 BlastAaWordFinder_TwoHit(const BLAST_SequenceBlk* subject,
const BLAST_SequenceBlk* query,
const LookupTableWrap* lookup_wrap,
BLAST_DiagTable* diag,
@@ -87,7 +85,8 @@ Int4 BlastAaWordFinder_TwoHit(const BLAST_SequenceBlk* subject,
Uint4 * NCBI_RESTRICT query_offsets,
Uint4 * NCBI_RESTRICT subject_offsets,
Int4 array_size,
- BlastInitHitList* ungapped_hsps)
+ BlastInitHitList* ungapped_hsps,
+ BlastUngappedStats* ungapped_stats)
{
LookupTable* lookup=NULL;
RPSLookupTable* rps_lookup=NULL;
@@ -105,6 +104,7 @@ Int4 BlastAaWordFinder_TwoHit(const BLAST_SequenceBlk* subject,
Int4 diag_offset, diag_coord, diag_mask;
DiagStruct* diag_array;
Boolean right_extend;
+ Int4 hits_extended = 0;
if (diag == NULL)
return -1;
@@ -177,6 +177,8 @@ Int4 BlastAaWordFinder_TwoHit(const BLAST_SequenceBlk* subject,
&hsp_len, use_pssm,
wordsize, &right_extend, &s_last_off);
+ ++hits_extended;
+
/* if the hsp meets the score threshold, report it */
if (score >= cutoff)
BlastSaveInitHsp(ungapped_hsps, hsp_q, hsp_s,
@@ -206,10 +208,12 @@ Int4 BlastAaWordFinder_TwoHit(const BLAST_SequenceBlk* subject,
/* increment the offset in the diagonal array */
DiagUpdate(diag, subject->length + window);
- return totalhits;
+ Blast_UngappedStatsUpdate(ungapped_stats, totalhits, hits_extended,
+ ungapped_hsps->total);
+ return 0;
}
-Int4 BlastAaWordFinder_OneHit(const BLAST_SequenceBlk* subject,
+Int2 BlastAaWordFinder_OneHit(const BLAST_SequenceBlk* subject,
const BLAST_SequenceBlk* query,
const LookupTableWrap* lookup_wrap,
BLAST_DiagTable* diag,
@@ -219,7 +223,8 @@ Int4 BlastAaWordFinder_OneHit(const BLAST_SequenceBlk* subject,
Uint4 * NCBI_RESTRICT query_offsets,
Uint4 * NCBI_RESTRICT subject_offsets,
Int4 array_size,
- BlastInitHitList* ungapped_hsps)
+ BlastInitHitList* ungapped_hsps,
+ BlastUngappedStats* ungapped_stats)
{
LookupTable* lookup=NULL;
RPSLookupTable* rps_lookup=NULL;
@@ -235,6 +240,7 @@ Int4 BlastAaWordFinder_OneHit(const BLAST_SequenceBlk* subject,
Int4 score;
Int4 diag_offset, diag_coord, diag_mask, diff;
DiagStruct* diag_array;
+ Int4 hits_extended = 0;
if (!diag)
return -1;
@@ -276,6 +282,7 @@ Int4 BlastAaWordFinder_OneHit(const BLAST_SequenceBlk* subject,
/* do an extension, but only if we have not already extended
this far */
if (diff > 0) {
+ ++hits_extended;
score=BlastAaExtendOneHit(matrix, subject, query,
subject_offsets[i], query_offsets[i], dropoff,
&hsp_q, &hsp_s, &hsp_len, use_pssm, &s_last_off);
@@ -294,7 +301,9 @@ Int4 BlastAaWordFinder_OneHit(const BLAST_SequenceBlk* subject,
/* increment the offset in the diagonal array (no windows used) */
DiagUpdate(diag, subject->length);
- return totalhits;
+ Blast_UngappedStatsUpdate(ungapped_stats, totalhits, hits_extended,
+ ungapped_hsps->total);
+ return 0;
}
Int4 BlastAaExtendRight(Int4 ** matrix,
@@ -404,7 +413,6 @@ Int4 BlastPSSMExtendRight(Int4 ** matrix,
Int4 BlastPSSMExtendLeft(Int4 ** matrix,
const BLAST_SequenceBlk* subject,
- Int4 query_size,
Int4 s_off,
Int4 q_off,
Int4 dropoff,
@@ -450,7 +458,7 @@ Int4 BlastAaExtendOneHit(Int4 ** matrix,
Int4 left_disp, right_disp;
if (use_pssm) {
- left_score = BlastPSSMExtendLeft(matrix, subject, query->length,
+ left_score = BlastPSSMExtendLeft(matrix, subject,
s_off, q_off, dropoff, &left_disp);
right_score = BlastPSSMExtendRight(matrix, subject, query->length,
s_off+1, q_off+1, dropoff, &right_disp,
@@ -471,20 +479,21 @@ Int4 BlastAaExtendOneHit(Int4 ** matrix,
return right_score;
}
-Int4 BlastAaExtendTwoHit(Int4 ** matrix,
- const BLAST_SequenceBlk* subject,
- const BLAST_SequenceBlk* query,
- Int4 s_left_off,
- Int4 s_right_off,
- Int4 q_right_off,
- Int4 dropoff,
- Int4* hsp_q,
- Int4* hsp_s,
- Int4* hsp_len,
- Boolean use_pssm,
- Int4 word_size,
- Boolean *right_extend,
- Int4 *s_last_off)
+Int4
+BlastAaExtendTwoHit(Int4 ** matrix,
+ const BLAST_SequenceBlk* subject,
+ const BLAST_SequenceBlk* query,
+ Int4 s_left_off,
+ Int4 s_right_off,
+ Int4 q_right_off,
+ Int4 dropoff,
+ Int4* hsp_q,
+ Int4* hsp_s,
+ Int4* hsp_len,
+ Boolean use_pssm,
+ Int4 word_size,
+ Boolean *right_extend,
+ Int4 *s_last_off)
{
Int4 left_d = 0, right_d = 0; /* left and right displacements */
Int4 left_score = 0, right_score = 0; /* left and right scores */
@@ -514,7 +523,7 @@ Int4 BlastAaExtendTwoHit(Int4 ** matrix,
/* first, try to extend left, from the second hit to the first hit. */
if (use_pssm)
- left_score = BlastPSSMExtendLeft(matrix, subject, query->length,
+ left_score = BlastPSSMExtendLeft(matrix, subject,
s_right_off, q_right_off, dropoff, &left_d);
else
left_score = BlastAaExtendLeft(matrix, subject, query,
diff --git a/algo/blast/core/aa_ungapped.h b/algo/blast/core/aa_ungapped.h
index f8f861fc..80aa902a 100644
--- a/algo/blast/core/aa_ungapped.h
+++ b/algo/blast/core/aa_ungapped.h
@@ -1,30 +1,33 @@
-/* $Id: aa_ungapped.h,v 1.15 2004/03/11 18:31:06 papadopo Exp $
-
-* ===========================================================================
-*
-* PUBLIC DOMAIN NOTICE
-* National Center for Biotechnology Information
-*
-* This software/database is a "United States Government Work" under the
-* terms of the United States Copyright Act. It was written as part of
-* the author's offical duties as a United States Government employee and
-* thus cannot be copyrighted. This software/database is freely available
-* to the public for use. The National Library of Medicine and the U.S.
-* Government have not placed any restriction on its use or reproduction.
-*
-* Although all reasonable efforts have been taken to ensure the accuracy
-* and reliability of the software and data, the NLM and the U.S.
-* Government do not and cannot warrant the performance or results that
-* may be obtained by using this software or data. The NLM and the U.S.
-* Government disclaim all warranties, express or implied, including
-* warranties of performance, merchantability or fitness for any particular
-* purpose.
-*
-* Please cite the author in any work or product based on this material.
-*
-* ===========================================================================
+/* $Id: aa_ungapped.h,v 1.19 2004/06/08 17:29:57 dondosha Exp $
+ * ===========================================================================
+ *
+ * PUBLIC DOMAIN NOTICE
+ * National Center for Biotechnology Information
+ *
+ * This software/database is a "United States Government Work" under the
+ * terms of the United States Copyright Act. It was written as part of
+ * the author's offical duties as a United States Government employee and
+ * thus cannot be copyrighted. This software/database is freely available
+ * to the public for use. The National Library of Medicine and the U.S.
+ * Government have not placed any restriction on its use or reproduction.
+ *
+ * Although all reasonable efforts have been taken to ensure the accuracy
+ * and reliability of the software and data, the NLM and the U.S.
+ * Government do not and cannot warrant the performance or results that
+ * may be obtained by using this software or data. The NLM and the U.S.
+ * Government disclaim all warranties, express or implied, including
+ * warranties of performance, merchantability or fitness for any particular
+ * purpose.
+ *
+ * Please cite the author in any work or product based on this material.
+ *
+ * ===========================================================================
+ */
-*/
+/** @file aa_ungapped.h
+ * @todo FIXME: Need file description (protein wordfinding & ungapped
+ * extension code?)
+ */
#ifndef AA_UNGAPPED__H
#define AA_UNGAPPED__H
@@ -49,18 +52,19 @@ extern "C" {
* @param subject_offsets array for storing subject offsets [out]
* @param offset_array_size the number of elements in each offset array [in]
* @param init_hitlist hsps resulting from the ungapped extension [out]
- * @return the number of hits found
+ * @param ungapped_stats Various hit counts. Not filled if NULL [out]
*/
-Int4 BlastAaWordFinder(BLAST_SequenceBlk* subject,
+Int2 BlastAaWordFinder(BLAST_SequenceBlk* subject,
BLAST_SequenceBlk* query,
LookupTableWrap* lookup,
Int4** matrix,
const BlastInitialWordParameters* word_params,
- BLAST_ExtendWord* ewp,
+ Blast_ExtendWord* ewp,
Uint4* NCBI_RESTRICT query_offsets,
Uint4* NCBI_RESTRICT subject_offsets,
Int4 offset_array_size,
- BlastInitHitList* init_hitlist);
+ BlastInitHitList* init_hitlist,
+ BlastUngappedStats* ungapped_stats);
/** Scan a subject sequence for word hits and trigger two-hit extensions.
*
@@ -75,10 +79,10 @@ Int4 BlastAaWordFinder(BLAST_SequenceBlk* subject,
* @param subject_offsets array for storing subject offsets [out]
* @param array_size the number of elements in each offset array [in]
* @param ungapped_hsps hsps resulting from the ungapped extension [out]
- * @return the number of hits found
+ * @param ungapped_stats Various hit counts. Not filled if NULL [out]
*/
-Int4 BlastAaWordFinder_TwoHit(const BLAST_SequenceBlk* subject,
+Int2 BlastAaWordFinder_TwoHit(const BLAST_SequenceBlk* subject,
const BLAST_SequenceBlk* query,
const LookupTableWrap* lookup_wrap,
BLAST_DiagTable* diag,
@@ -88,7 +92,8 @@ Int4 BlastAaWordFinder_TwoHit(const BLAST_SequenceBlk* subject,
Uint4 * NCBI_RESTRICT query_offsets,
Uint4 * NCBI_RESTRICT subject_offsets,
Int4 array_size,
- BlastInitHitList* ungapped_hsps);
+ BlastInitHitList* ungapped_hsps,
+ BlastUngappedStats* ungapped_stats);
/** Scan a subject sequence for word hits and trigger one-hit extensions.
*
@@ -103,10 +108,9 @@ Int4 BlastAaWordFinder_TwoHit(const BLAST_SequenceBlk* subject,
* @param subject_offsets array for storing subject offsets
* @param array_size the number of elements in each offset array
* @param ungapped_hsps hsps resulting from the ungapped extensions [out]
- * @return the number of hits found
+ * @param ungapped_stats Various hit counts. Not filled if NULL [out]
*/
-
-Int4 BlastAaWordFinder_OneHit(const BLAST_SequenceBlk* subject,
+Int2 BlastAaWordFinder_OneHit(const BLAST_SequenceBlk* subject,
const BLAST_SequenceBlk* query,
const LookupTableWrap* lookup_wrap,
BLAST_DiagTable* diag,
@@ -116,7 +120,8 @@ Int4 BlastAaWordFinder_OneHit(const BLAST_SequenceBlk* subject,
Uint4 * NCBI_RESTRICT query_offsets,
Uint4 * NCBI_RESTRICT subject_offsets,
Int4 array_size,
- BlastInitHitList* ungapped_hsps);
+ BlastInitHitList* ungapped_hsps,
+ BlastUngappedStats* ungapped_stats);
/**
* Beginning at s_off and q_off in the subject and query, respectively,
@@ -181,7 +186,6 @@ Int4 BlastAaExtendLeft(Int4 ** matrix,
Int4 BlastPSSMExtendLeft(Int4 ** matrix,
const BLAST_SequenceBlk* subject,
- Int4 query_size,
Int4 s_off,
Int4 q_off,
Int4 dropoff,
diff --git a/algo/blast/core/blast_def.h b/algo/blast/core/blast_def.h
index ca5fcdc5..41dcd301 100644
--- a/algo/blast/core/blast_def.h
+++ b/algo/blast/core/blast_def.h
@@ -1,41 +1,36 @@
-/* $Id: blast_def.h,v 1.40 2004/04/16 14:12:33 papadopo Exp $
-* ===========================================================================
-*
-* PUBLIC DOMAIN NOTICE
-* National Center for Biotechnology Information
-*
-* This software/database is a "United States Government Work" under the
-* terms of the United States Copyright Act. It was written as part of
-* the author's offical duties as a United States Government employee and
-* thus cannot be copyrighted. This software/database is freely available
-* to the public for use. The National Library of Medicine and the U.S.
-* Government have not placed any restriction on its use or reproduction.
-*
-* Although all reasonable efforts have been taken to ensure the accuracy
-* and reliability of the software and data, the NLM and the U.S.
-* Government do not and cannot warrant the performance or results that
-* may be obtained by using this software or data. The NLM and the U.S.
-* Government disclaim all warranties, express or implied, including
-* warranties of performance, merchantability or fitness for any particular
-* purpose.
-*
-* Please cite the author in any work or product based on this material.
-*
-* ===========================================================================*/
-
-/*****************************************************************************
-
-File name: blast_def.h
-
-Author: Ilya Dondoshansky
-
-Contents: Definitions of major structures used throughout BLAST
-
-Detailed Contents:
-
-******************************************************************************
- * $Revision: 1.40 $
- * */
+/* $Id: blast_def.h,v 1.42 2004/05/19 14:52:01 camacho Exp $
+ * ===========================================================================
+ *
+ * PUBLIC DOMAIN NOTICE
+ * National Center for Biotechnology Information
+ *
+ * This software/database is a "United States Government Work" under the
+ * terms of the United States Copyright Act. It was written as part of
+ * the author's offical duties as a United States Government employee and
+ * thus cannot be copyrighted. This software/database is freely available
+ * to the public for use. The National Library of Medicine and the U.S.
+ * Government have not placed any restriction on its use or reproduction.
+ *
+ * Although all reasonable efforts have been taken to ensure the accuracy
+ * and reliability of the software and data, the NLM and the U.S.
+ * Government do not and cannot warrant the performance or results that
+ * may be obtained by using this software or data. The NLM and the U.S.
+ * Government disclaim all warranties, express or implied, including
+ * warranties of performance, merchantability or fitness for any particular
+ * purpose.
+ *
+ * Please cite the author in any work or product based on this material.
+ *
+ * ===========================================================================
+ *
+ * Author: Ilya Dondoshansky
+ *
+ */
+
+/** @file blast_def.h
+ * Definitions of major structures used throughout BLAST
+ */
+
#ifndef __BLAST_DEF__
#define __BLAST_DEF__
@@ -77,6 +72,9 @@ void __sfree(void** x); /* implemented in lib/util.c */
#ifndef NUM_FRAMES
#define NUM_FRAMES 6
#endif
+#ifndef NUM_STRANDS
+#define NUM_STRANDS 2
+#endif
/********************* Structure definitions ********************************/
@@ -141,26 +139,6 @@ typedef struct SSeqRange {
*/
#define BlastSeqLoc ListNode
-/** Return statistics from the BLAST search */
-typedef struct BlastReturnStat {
- Int8 db_hits; /**< Number of successful lookup table hits */
- Int4 init_extends; /**< Number of initial words found and extended */
- Int4 good_init_extends; /**< Number of successful initial extensions */
- Int4 prelim_gap_no_contest; /**< Number of HSPs better than e-value
- threshold before gapped extension */
- Int4 prelim_gap_passed; /**< Number of HSPs better than e-value threshold
- after preliminary gapped extension */
- Int4 number_of_seqs_better_E; /**< Number of sequences with best HSP passing
- the e-value threshold */
- Int4 x_drop_ungapped; /**< Raw value of the x-dropoff for ungapped
- extensions */
- Int4 x_drop_gap; /**< Raw value of the x-dropoff for preliminary gapped
- extensions */
- Int4 x_drop_gap_final; /**< Raw value of the x-dropoff for gapped
- extensions with traceback */
- Int4 gap_trigger; /**< Minimal raw score for starting gapped extension */
-} BlastReturnStat;
-
#ifdef __cplusplus
}
#endif
diff --git a/algo/blast/core/blast_diagnostics.c b/algo/blast/core/blast_diagnostics.c
new file mode 100644
index 00000000..cee9c8a4
--- /dev/null
+++ b/algo/blast/core/blast_diagnostics.c
@@ -0,0 +1,80 @@
+/* $Id: blast_diagnostics.c,v 1.2 2004/05/19 14:52:02 camacho Exp $
+ * ===========================================================================
+ *
+ * PUBLIC DOMAIN NOTICE
+ * National Center for Biotechnology Information
+ *
+ * This software/database is a "United States Government Work" under the
+ * terms of the United States Copyright Act. It was written as part of
+ * the author's offical duties as a United States Government employee and
+ * thus cannot be copyrighted. This software/database is freely available
+ * to the public for use. The National Library of Medicine and the U.S.
+ * Government have not placed any restriction on its use or reproduction.
+ *
+ * Although all reasonable efforts have been taken to ensure the accuracy
+ * and reliability of the software and data, the NLM and the U.S.
+ * Government do not and cannot warrant the performance or results that
+ * may be obtained by using this software or data. The NLM and the U.S.
+ * Government disclaim all warranties, express or implied, including
+ * warranties of performance, merchantability or fitness for any particular
+ * purpose.
+ *
+ * Please cite the author in any work or product based on this material.
+ *
+ * ===========================================================================
+ *
+ * Author: Ilya Dondoshansky
+ *
+ */
+
+/** @file blast_diagnostics.c
+ * Manipulating diagnostics data returned from BLAST
+ */
+
+
+static char const rcsid[] =
+ "$Id: blast_diagnostics.c,v 1.2 2004/05/19 14:52:02 camacho Exp $";
+
+#include <algo/blast/core/blast_diagnostics.h>
+#include <algo/blast/core/blast_def.h>
+
+BlastDiagnostics* Blast_DiagnosticsFree(BlastDiagnostics* diagnostics)
+{
+ if (diagnostics) {
+ sfree(diagnostics->ungapped_stat);
+ sfree(diagnostics->gapped_stat);
+ sfree(diagnostics->cutoffs);
+ sfree(diagnostics);
+ }
+ return NULL;
+}
+
+BlastDiagnostics* Blast_DiagnosticsInit()
+{
+ BlastDiagnostics* diagnostics =
+ (BlastDiagnostics*) calloc(1, sizeof(BlastDiagnostics));
+
+ diagnostics->ungapped_stat =
+ (BlastUngappedStats*) calloc(1, sizeof(BlastUngappedStats));
+ diagnostics->gapped_stat =
+ (BlastGappedStats*) calloc(1, sizeof(BlastGappedStats));
+ diagnostics->cutoffs =
+ (BlastRawCutoffs*) calloc(1, sizeof(BlastRawCutoffs));
+
+ return diagnostics;
+}
+
+void Blast_UngappedStatsUpdate(BlastUngappedStats* ungapped_stats,
+ Int4 total_hits, Int4 extended_hits,
+ Int4 saved_hits)
+{
+ if (!ungapped_stats || total_hits == 0)
+ return;
+
+ ungapped_stats->lookup_hits += total_hits;
+ ++ungapped_stats->num_seqs_lookup_hits;
+ ungapped_stats->init_extends += extended_hits;
+ ungapped_stats->good_init_extends += saved_hits;
+ if (saved_hits > 0)
+ ++ungapped_stats->num_seqs_passed;
+}
diff --git a/algo/blast/core/blast_diagnostics.h b/algo/blast/core/blast_diagnostics.h
new file mode 100644
index 00000000..49a79eee
--- /dev/null
+++ b/algo/blast/core/blast_diagnostics.h
@@ -0,0 +1,99 @@
+/* $Id: blast_diagnostics.h,v 1.3 2004/06/08 17:29:57 dondosha Exp $
+ * ===========================================================================
+ *
+ * PUBLIC DOMAIN NOTICE
+ * National Center for Biotechnology Information
+ *
+ * This software/database is a "United States Government Work" under the
+ * terms of the United States Copyright Act. It was written as part of
+ * the author's offical duties as a United States Government employee and
+ * thus cannot be copyrighted. This software/database is freely available
+ * to the public for use. The National Library of Medicine and the U.S.
+ * Government have not placed any restriction on its use or reproduction.
+ *
+ * Although all reasonable efforts have been taken to ensure the accuracy
+ * and reliability of the software and data, the NLM and the U.S.
+ * Government do not and cannot warrant the performance or results that
+ * may be obtained by using this software or data. The NLM and the U.S.
+ * Government disclaim all warranties, express or implied, including
+ * warranties of performance, merchantability or fitness for any particular
+ * purpose.
+ *
+ * Please cite the author in any work or product based on this material.
+ *
+ * ===========================================================================
+ *
+ * Author: Ilya Dondoshansky
+ *
+ */
+
+/** @file blast_diagnostics.h
+ * Various diagnostics (hit counts, etc.) returned from the BLAST engine
+ */
+
+#ifndef __BLAST_DIAGNOSTICS__
+#define __BLAST_DIAGNOSTICS__
+
+#include <algo/blast/core/ncbi_std.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+typedef struct BlastRawCutoffs {
+ Int4 x_drop_ungapped; /**< Raw value of the x-dropoff for ungapped
+ extensions */
+ Int4 x_drop_gap; /**< Raw value of the x-dropoff for preliminary gapped
+ extensions */
+ Int4 x_drop_gap_final; /**< Raw value of the x-dropoff for gapped
+ extensions with traceback */
+ Int4 gap_trigger; /**< Minimal raw score for starting gapped extension */
+} BlastRawCutoffs;
+
+typedef struct BlastUngappedStats {
+ Int8 lookup_hits; /**< Number of successful lookup table hits */
+ Int4 num_seqs_lookup_hits; /**< Number of sequences which had at least one
+ lookup table hit. */
+ Int4 init_extends; /**< Number of initial words found and extended */
+ Int4 good_init_extends; /**< Number of successful initial extensions,
+ i.e. number of HSPs saved after ungapped stage.*/
+ Int4 num_seqs_passed; /**< Number of sequences with at least one HSP saved
+ after ungapped stage. */
+} BlastUngappedStats;
+
+typedef struct BlastGappedStats {
+ Int4 seqs_ungapped_passed; /**< Number of sequences with top HSP
+ after ungapped extension passing the
+ e-value threshold. */
+ Int4 extra_extensions; /**< Number of extra gapped extensions performed for
+ ungapped HSPs above the e-value threshold. */
+ Int4 extensions; /**< Total number of gapped extensions performed. */
+ Int4 good_extensions; /**< Number of HSPs below the e-value threshold after
+ gapped extension */
+ Int4 num_seqs_passed; /**< Number of sequences with top HSP passing the
+ e-value threshold. */
+} BlastGappedStats;
+
+/** Return statistics from the BLAST search */
+typedef struct BlastDiagnostics {
+ BlastUngappedStats* ungapped_stat; /**< Ungapped extension counts */
+ BlastGappedStats* gapped_stat; /**< Gapped extension counts */
+ BlastRawCutoffs* cutoffs; /**< Various raw values for the cutoffs */
+} BlastDiagnostics;
+
+/** Free the BlastDiagnostics structure and all substructures. */
+BlastDiagnostics* Blast_DiagnosticsFree(BlastDiagnostics* diagnostics);
+
+/** Initialize the BlastDiagnostics structure and all its substructures. */
+BlastDiagnostics* Blast_DiagnosticsInit(void);
+
+/** Fill data in the ungapped hits diagnostics structure */
+void Blast_UngappedStatsUpdate(BlastUngappedStats* ungapped_stats,
+ Int4 total_hits, Int4 extended_hits,
+ Int4 saved_hits);
+
+
+#ifdef __cplusplus
+}
+#endif
+#endif /* !__BLAST_DIAGNOSTICS__ */
diff --git a/algo/blast/core/blast_dust.c b/algo/blast/core/blast_dust.c
index 662031d3..e3faf404 100644
--- a/algo/blast/core/blast_dust.c
+++ b/algo/blast/core/blast_dust.c
@@ -1,43 +1,42 @@
-static char const rcsid[] = "$Id: blast_dust.c,v 1.23 2004/04/07 03:06:16 camacho Exp $";
-/*
-* ===========================================================================
-*
-* PUBLIC DOMAIN NOTICE
-* National Center for Biotechnology Information
-*
-* This software/database is a "United States Government Work" under the
-* terms of the United States Copyright Act. It was written as part of
-* the author's offical duties as a United States Government employee and
-* thus cannot be copyrighted. This software/database is freely available
-* to the public for use. The National Library of Medicine and the U.S.
-* Government have not placed any restriction on its use or reproduction.
-*
-* Although all reasonable efforts have been taken to ensure the accuracy
-* and reliability of the software and data, the NLM and the U.S.
-* Government do not and cannot warrant the performance or results that
-* may be obtained by using this software or data. The NLM and the U.S.
-* Government disclaim all warranties, express or implied, including
-* warranties of performance, merchantability or fitness for any particular
-* purpose.
-*
-* Please cite the author in any work or product based on this material.
-*
-*
-* File Name: blast_dust.c
-*
-* Author(s): Richa Agarwala (based upon versions variously worked upon by Roma Tatusov,
-* John Kuzio, and Ilya Dondoshansky).
-*
-* Version Creation Date: 02/09/2004
-*
-* $Revision: 1.23 $
-*
-* File Description: A utility to find low complexity NA regions.
-* This parallels functionality of dust.c from the C toolkit,
-* but without using the structures generated from ASN.1 spec.
-* ==========================================================================
-*/
+/* $Id: blast_dust.c,v 1.24 2004/05/19 14:52:02 camacho Exp $
+ * ===========================================================================
+ *
+ * PUBLIC DOMAIN NOTICE
+ * National Center for Biotechnology Information
+ *
+ * This software/database is a "United States Government Work" under the
+ * terms of the United States Copyright Act. It was written as part of
+ * the author's offical duties as a United States Government employee and
+ * thus cannot be copyrighted. This software/database is freely available
+ * to the public for use. The National Library of Medicine and the U.S.
+ * Government have not placed any restriction on its use or reproduction.
+ *
+ * Although all reasonable efforts have been taken to ensure the accuracy
+ * and reliability of the software and data, the NLM and the U.S.
+ * Government do not and cannot warrant the performance or results that
+ * may be obtained by using this software or data. The NLM and the U.S.
+ * Government disclaim all warranties, express or implied, including
+ * warranties of performance, merchantability or fitness for any particular
+ * purpose.
+ *
+ * Please cite the author in any work or product based on this material.
+ *
+ * ==========================================================================
+ *
+ * Authors: Richa Agarwala (based upon versions variously worked upon by Roma
+ * Tatusov, John Kuzio, and Ilya Dondoshansky).
+ *
+ * ==========================================================================
+ */
+
+/** @file blast_dust.c
+ * A utility to find low complexity NA regions. This parallels functionality
+ * of dust.c from the C toolkit, but without using the structures generated
+ * from ASN.1 spec.
+ */
+static char const rcsid[] =
+ "$Id: blast_dust.c,v 1.24 2004/05/19 14:52:02 camacho Exp $";
#include <algo/blast/core/blast_dust.h>
#include <algo/blast/core/blast_util.h>
diff --git a/algo/blast/core/blast_dust.h b/algo/blast/core/blast_dust.h
index 0027093e..bfed7a33 100644
--- a/algo/blast/core/blast_dust.h
+++ b/algo/blast/core/blast_dust.h
@@ -1,50 +1,46 @@
-/* $Id: blast_dust.h,v 1.9 2004/04/07 03:06:15 camacho Exp $
-* ===========================================================================
-*
-* PUBLIC DOMAIN NOTICE
-* National Center for Biotechnology Information
-*
-* This software/database is a "United States Government Work" under the
-* terms of the United States Copyright Act. It was written as part of
-* the author's offical duties as a United States Government employee and
-* thus cannot be copyrighted. This software/database is freely available
-* to the public for use. The National Library of Medicine and the U.S.
-* Government have not placed any restriction on its use or reproduction.
-*
-* Although all reasonable efforts have been taken to ensure the accuracy
-* and reliability of the software and data, the NLM and the U.S.
-* Government do not and cannot warrant the performance or results that
-* may be obtained by using this software or data. The NLM and the U.S.
-* Government disclaim all warranties, express or implied, including
-* warranties of performance, merchantability or fitness for any particular
-* purpose.
-*
-* Please cite the author in any work or product based on this material.
-*
-* ===========================================================================*/
+/* $Id: blast_dust.h,v 1.11 2004/06/16 14:53:03 dondosha Exp $
+ * ===========================================================================
+ *
+ * PUBLIC DOMAIN NOTICE
+ * National Center for Biotechnology Information
+ *
+ * This software/database is a "United States Government Work" under the
+ * terms of the United States Copyright Act. It was written as part of
+ * the author's offical duties as a United States Government employee and
+ * thus cannot be copyrighted. This software/database is freely available
+ * to the public for use. The National Library of Medicine and the U.S.
+ * Government have not placed any restriction on its use or reproduction.
+ *
+ * Although all reasonable efforts have been taken to ensure the accuracy
+ * and reliability of the software and data, the NLM and the U.S.
+ * Government do not and cannot warrant the performance or results that
+ * may be obtained by using this software or data. The NLM and the U.S.
+ * Government disclaim all warranties, express or implied, including
+ * warranties of performance, merchantability or fitness for any particular
+ * purpose.
+ *
+ * Please cite the author in any work or product based on this material.
+ *
+ * ===========================================================================
+ *
+ * Author: Ilya Dondoshansky
+ *
+ */
+
+/** @file blast_dust.h
+ * DUST filtering functions. (shouldn't this be merged with blast_filter?)
+ * @todo FIXME: include reference?
+ */
-/*****************************************************************************
-
-File name: blast_filter.h
-
-Author: Ilya Dondoshansky
-
-Contents: DUST filtering functions.
-
-Detailed Contents:
-
-******************************************************************************
- * $Revision: 1.9 $
- * */
#ifndef __BLAST_DUST__
#define __BLAST_DUST__
+#include <algo/blast/core/blast_def.h>
+
#ifdef __cplusplus
extern "C" {
#endif
-#include <algo/blast/core/blast_def.h>
-
Int2 SeqBufferDust (Uint1* sequence, Int4 length, Int4 offset,
Int2 level, Int2 window, Int2 minwin, Int2 linker,
BlastSeqLoc** dust_loc);
diff --git a/algo/blast/core/blast_encoding.c b/algo/blast/core/blast_encoding.c
index 125ae954..07adfa90 100644
--- a/algo/blast/core/blast_encoding.c
+++ b/algo/blast/core/blast_encoding.c
@@ -1,6 +1,5 @@
-static char const rcsid[] =
- "$Id: blast_encoding.c,v 1.1 2004/04/07 03:10:56 camacho Exp $";
-/* ===========================================================================
+/* $Id: blast_encoding.c,v 1.2 2004/05/19 14:52:02 camacho Exp $
+ * ===========================================================================
*
* PUBLIC DOMAIN NOTICE
* National Center for Biotechnology Information
@@ -28,6 +27,14 @@ static char const rcsid[] =
*
*/
+/** @file blast_encoding.c
+ * Definitions of static arrays defined in blast_encoding.h.
+ * @sa blast_encoding.h
+ */
+
+static char const rcsid[] =
+ "$Id: blast_encoding.c,v 1.2 2004/05/19 14:52:02 camacho Exp $";
+
#include <algo/blast/core/blast_encoding.h>
const Uint1 NCBI4NA_TO_BLASTNA[BLASTNA_SIZE] = {
@@ -101,6 +108,12 @@ const Uint1 AMINOACID_TO_NCBISTDAA[128] = {
/*
* ===========================================================================
* $Log: blast_encoding.c,v $
+ * Revision 1.2 2004/05/19 14:52:02 camacho
+ * 1. Added doxygen tags to enable doxygen processing of algo/blast/core
+ * 2. Standardized copyright, CVS $Id string, $Log and rcsid formatting and i
+ * location
+ * 3. Added use of @todo doxygen keyword
+ *
* Revision 1.1 2004/04/07 03:10:56 camacho
* Initial revision
*
diff --git a/algo/blast/core/blast_engine.c b/algo/blast/core/blast_engine.c
index 6491cbd3..c4985cb0 100644
--- a/algo/blast/core/blast_engine.c
+++ b/algo/blast/core/blast_engine.c
@@ -1,39 +1,38 @@
-/* $Id: blast_engine.c,v 1.126 2004/05/05 15:27:44 dondosha Exp $
-* ===========================================================================
-*
-* PUBLIC DOMAIN NOTICE
-* National Center for Biotechnology Information
-*
-* This software/database is a "United States Government Work" under the
-* terms of the United States Copyright Act. It was written as part of
-* the author's offical duties as a United States Government employee and
-* thus cannot be copyrighted. This software/database is freely available
-* to the public for use. The National Library of Medicine and the U.S.
-* Government have not placed any restriction on its use or reproduction.
-*
-* Although all reasonable efforts have been taken to ensure the accuracy
-* and reliability of the software and data, the NLM and the U.S.
-* Government do not and cannot warrant the performance or results that
-* may be obtained by using this software or data. The NLM and the U.S.
-* Government disclaim all warranties, express or implied, including
-* warranties of performance, merchantability or fitness for any particular
-* purpose.
-*
-* Please cite the author in any work or product based on this material.
-*
-* ===========================================================================*/
-
-/*****************************************************************************
-
-File name: blast_engine.c
-
-Author: Ilya Dondoshansky
-
-Contents: High level BLAST functions
-
-******************************************************************************/
-
-static char const rcsid[] = "$Id: blast_engine.c,v 1.126 2004/05/05 15:27:44 dondosha Exp $";
+/* $Id: blast_engine.c,v 1.143 2004/06/14 15:37:37 dondosha Exp $
+ * ===========================================================================
+ *
+ * PUBLIC DOMAIN NOTICE
+ * National Center for Biotechnology Information
+ *
+ * This software/database is a "United States Government Work" under the
+ * terms of the United States Copyright Act. It was written as part of
+ * the author's offical duties as a United States Government employee and
+ * thus cannot be copyrighted. This software/database is freely available
+ * to the public for use. The National Library of Medicine and the U.S.
+ * Government have not placed any restriction on its use or reproduction.
+ *
+ * Although all reasonable efforts have been taken to ensure the accuracy
+ * and reliability of the software and data, the NLM and the U.S.
+ * Government do not and cannot warrant the performance or results that
+ * may be obtained by using this software or data. The NLM and the U.S.
+ * Government disclaim all warranties, express or implied, including
+ * warranties of performance, merchantability or fitness for any particular
+ * purpose.
+ *
+ * Please cite the author in any work or product based on this material.
+ *
+ * ===========================================================================
+ *
+ * Author: Ilya Dondoshansky
+ *
+ */
+
+/** @file blast_engine.c
+ * High level BLAST functions
+ */
+
+static char const rcsid[] =
+ "$Id: blast_engine.c,v 1.143 2004/06/14 15:37:37 dondosha Exp $";
#include <algo/blast/core/blast_engine.h>
#include <algo/blast/core/lookup_wrap.h>
@@ -45,6 +44,30 @@ static char const rcsid[] = "$Id: blast_engine.c,v 1.126 2004/05/05 15:27:44 don
#include <algo/blast/core/phi_extend.h>
#include <algo/blast/core/link_hsps.h>
+/** Structure to be passed to BLAST_SearchEngineCore, containing pointers
+ to various preallocated structures and arrays. */
+typedef struct BlastCoreAuxStruct {
+
+ Blast_ExtendWord* ewp; /**< Structure for keeping track of diagonal
+ information for initial word matches */
+ BlastWordFinderType WordFinder; /**< Word finder function pointer */
+ BlastGetGappedScoreType GetGappedScore; /**< Gapped extension function
+ pointer */
+ BlastInitHitList* init_hitlist; /**< Placeholder for HSPs after
+ ungapped extension */
+ BlastHSPList* hsp_list; /**< Placeholder for HSPs after gapped
+ extension */
+ Uint4* query_offsets; /**< Placeholder for initial word match query
+ offsets */
+ Uint4* subject_offsets; /**< Placeholder for initial word match
+ subject offsets */
+ Uint1* translation_buffer; /**< Placeholder for translated subject
+ sequences */
+ Uint1* translation_table; /**< Translation table for forward strand */
+ Uint1* translation_table_rc; /**< Translation table for reverse
+ strand */
+} BlastCoreAuxStruct;
+
/** Deallocates all memory in BlastCoreAuxStruct */
static BlastCoreAuxStruct*
BlastCoreAuxStructFree(BlastCoreAuxStruct* aux_struct)
@@ -117,44 +140,59 @@ static void TranslateHSPsToDNAPCoord(Uint1 program,
* query against one subject sequence. Translation of the subject sequence
* into 6 frames is done inside, if necessary. If subject sequence is
* too long, it can be split into several chunks.
+ * @ param program_number BLAST program type [in]
+ * @param query Query sequence structure [in]
+ * @param query_info Query information [in]
+ * @param subject Subject sequence structure [in]
+ * @param lookup Lookup table [in]
+ * @param gap_align Structure for gapped alignment information [in]
+ * @param score_params Scoring parameters [in]
+ * @param word_params Initial word finding and ungapped extension
+ * parameters [in]
+ * @param ext_params Gapped extension parameters [in]
+ * @param hit_params Hit saving parameters [in]
+ * @param db_options Database options [in]
+ * @param diagnostics Hit counts and other diagnostics [in] [out]
+ * @param aux_struct Structure containing different auxiliary data and memory
+ * for the preliminary stage of the BLAST search [in]
+ * @param hsp_list_out List of HSPs found for a given subject sequence [in]
*/
static Int2
BLAST_SearchEngineCore(Uint1 program_number, BLAST_SequenceBlk* query,
BlastQueryInfo* query_info, BLAST_SequenceBlk* subject,
LookupTableWrap* lookup, BlastGapAlignStruct* gap_align,
- const BlastScoringOptions* score_options,
+ BlastScoringParameters* score_params,
BlastInitialWordParameters* word_params,
BlastExtensionParameters* ext_params,
BlastHitSavingParameters* hit_params,
- const PSIBlastOptions* psi_options,
const BlastDatabaseOptions* db_options,
- BlastReturnStat* return_stats,
+ BlastDiagnostics* diagnostics,
BlastCoreAuxStruct* aux_struct,
BlastHSPList** hsp_list_out)
{
BlastInitHitList* init_hitlist = aux_struct->init_hitlist;
BlastHSPList* hsp_list = aux_struct->hsp_list;
- BLAST_ExtendWord* ewp = aux_struct->ewp;
- Uint4* query_offsets = aux_struct->query_offsets;
- Uint4* subject_offsets = aux_struct->subject_offsets;
Uint1* translation_buffer = NULL;
Int4* frame_offsets = NULL;
- Int4 num_chunks, chunk, total_subject_length, offset;
BlastHitSavingOptions* hit_options = hit_params->options;
+ BlastScoringOptions* score_options = score_params->options;
BlastHSPList* combined_hsp_list = NULL;
Int2 status = 0;
- Boolean translated_subject;
Int4 context, first_context, last_context;
- Int4 orig_length = subject->length, prot_length = 0;
+ Int4 orig_length = subject->length;
Uint1* orig_sequence = subject->sequence;
Int4 **matrix;
Int4 hsp_num_max;
+ BlastUngappedStats* ungapped_stats = NULL;
+ BlastGappedStats* gapped_stats = NULL;
+ Boolean prelim_traceback =
+ (ext_params->options->ePrelimGapExt == eGreedyWithTracebackExt);
- translated_subject = (program_number == blast_type_tblastn
+ const Boolean k_translated_subject = (program_number == blast_type_tblastn
|| program_number == blast_type_tblastx
|| program_number == blast_type_rpstblastn);
- if (translated_subject) {
+ if (k_translated_subject) {
first_context = 0;
last_context = 5;
if (score_options->is_ooframe) {
@@ -184,9 +222,19 @@ BLAST_SearchEngineCore(Uint1 program_number, BLAST_SequenceBlk* query,
hsp_num_max = (hit_options->hsp_num_max ? hit_options->hsp_num_max : INT4_MAX);
+ if (diagnostics) {
+ ungapped_stats = diagnostics->ungapped_stat;
+ gapped_stats = diagnostics->gapped_stat;
+ }
+
/* Loop over frames of the subject sequence */
for (context=first_context; context<=last_context; context++) {
- if (translated_subject) {
+ Int4 chunk; /* loop variable below. */
+ Int4 num_chunks; /* loop variable below. */
+ Int4 offset = 0; /* Used as offset into subject sequence (if chunked) */
+ Int4 total_subject_length; /* Length of subject sequence used when split. */
+
+ if (k_translated_subject) {
subject->frame = BLAST_ContextToFrame(blast_type_blastx, context);
subject->sequence =
translation_buffer + frame_offsets[context] + 1;
@@ -199,7 +247,6 @@ BLAST_SearchEngineCore(Uint1 program_number, BLAST_SequenceBlk* query,
/* Split subject sequence into chunks if it is too long */
num_chunks = (subject->length - DBSEQ_CHUNK_OVERLAP) /
(MAX_DBSEQ_LEN - DBSEQ_CHUNK_OVERLAP) + 1;
- offset = 0;
total_subject_length = subject->length;
for (chunk = 0; chunk < num_chunks; ++chunk) {
@@ -217,23 +264,22 @@ BLAST_SearchEngineCore(Uint1 program_number, BLAST_SequenceBlk* query,
BlastInitHitListReset(init_hitlist);
- return_stats->db_hits +=
- aux_struct->WordFinder(subject, query, lookup,
- matrix, word_params, ewp, query_offsets,
- subject_offsets, GetOffsetArraySize(lookup), init_hitlist);
+ aux_struct->WordFinder(subject, query, lookup,
+ matrix, word_params, aux_struct->ewp, aux_struct->query_offsets,
+ aux_struct->subject_offsets, GetOffsetArraySize(lookup),
+ init_hitlist, ungapped_stats);
if (init_hitlist->total == 0)
continue;
- return_stats->init_extends += init_hitlist->total;
-
if (score_options->gapped_calculation) {
+ Int4 prot_length = 0;
if (score_options->is_ooframe) {
/* Convert query offsets in all HSPs into the mixed-frame
coordinates */
TranslateHSPsToDNAPCoord(program_number, init_hitlist,
query_info, subject->frame, orig_length, offset);
- if (translated_subject) {
+ if (k_translated_subject) {
prot_length = subject->length;
subject->length = 2*orig_length + 1;
}
@@ -245,9 +291,9 @@ BLAST_SearchEngineCore(Uint1 program_number, BLAST_SequenceBlk* query,
* are saved.
*/
aux_struct->GetGappedScore(program_number, query, query_info,
- subject, gap_align, score_options, ext_params, hit_params,
- init_hitlist, &hsp_list);
- if (score_options->is_ooframe && translated_subject)
+ subject, gap_align, score_params, ext_params, hit_params,
+ init_hitlist, &hsp_list, gapped_stats);
+ if (score_options->is_ooframe && k_translated_subject)
subject->length = prot_length;
} else {
BLAST_GetUngappedHSPList(init_hitlist, query_info, subject,
@@ -257,8 +303,6 @@ BLAST_SearchEngineCore(Uint1 program_number, BLAST_SequenceBlk* query,
if (hsp_list->hspcnt == 0)
continue;
- return_stats->good_init_extends += hsp_list->hspcnt;
-
/* The subject ordinal id is not yet filled in this HSP list */
hsp_list->oid = subject->oid;
@@ -270,14 +314,19 @@ BLAST_SearchEngineCore(Uint1 program_number, BLAST_SequenceBlk* query,
/* Allow merging of HSPs either if traceback is already
available, or if it is an ungapped search */
Blast_HSPListsMerge(hsp_list, &combined_hsp_list, hsp_num_max, offset,
- (hsp_list->traceback_done || !score_options->gapped_calculation));
+ (Boolean)(prelim_traceback || !score_options->gapped_calculation));
} /* End loop on chunks of subject sequence */
Blast_HSPListAppend(combined_hsp_list, hsp_list_out, hsp_num_max);
combined_hsp_list = Blast_HSPListFree(combined_hsp_list);
} /* End loop on frames */
+ /* Restore the original contents of the subject block */
+ subject->length = orig_length;
+ subject->sequence = orig_sequence;
+
hsp_list = *hsp_list_out;
+
if (hit_params->do_sum_stats == TRUE) {
status = BLAST_LinkHsps(program_number, hsp_list, query_info,
subject, gap_align->sbp, hit_params,
@@ -293,40 +342,47 @@ BLAST_SearchEngineCore(Uint1 program_number, BLAST_SequenceBlk* query,
requires precomputation that has not been done yet */
if (program_number != blast_type_rpsblast &&
program_number != blast_type_rpstblastn)
- status = Blast_HSPListGetEvalues(program_number, query_info,
- hsp_list, score_options->gapped_calculation,
- gap_align->sbp);
+ status = Blast_HSPListGetEvalues(query_info, hsp_list,
+ score_options->gapped_calculation, gap_align->sbp);
}
/* Discard HSPs that don't pass the e-value test */
status = Blast_HSPListReapByEvalue(hsp_list, hit_options);
-
+
+ if (gapped_stats && hsp_list && hsp_list->hspcnt > 0) {
+ ++gapped_stats->num_seqs_passed;
+ gapped_stats->good_extensions += hsp_list->hspcnt;
+ }
+
if (translation_buffer) {
sfree(translation_buffer);
}
if (frame_offsets) {
sfree(frame_offsets);
}
-
- /* Restore the original contents of the subject block */
- subject->length = orig_length;
- subject->sequence = orig_sequence;
return status;
}
static Int2
-FillReturnXDropoffsInfo(BlastReturnStat* return_stats,
+FillReturnCutoffsInfo(BlastRawCutoffs* return_cutoffs,
+ BlastScoringParameters* score_params,
BlastInitialWordParameters* word_params,
BlastExtensionParameters* ext_params)
{
- if (!return_stats)
+ /* since the cutoff score here will be used for display
+ putposes, strip out any internal scaling of the scores */
+
+ Int4 scale_factor = (Int4)score_params->scale_factor;
+
+ if (!return_cutoffs)
return -1;
- return_stats->x_drop_ungapped = word_params->x_dropoff;
- return_stats->x_drop_gap = ext_params->gap_x_dropoff;
- return_stats->x_drop_gap_final = ext_params->gap_x_dropoff_final;
- return_stats->gap_trigger = ext_params->gap_trigger;
+ return_cutoffs->x_drop_ungapped = word_params->x_dropoff / scale_factor;
+ return_cutoffs->x_drop_gap = ext_params->gap_x_dropoff / scale_factor;
+ return_cutoffs->x_drop_gap_final = ext_params->gap_x_dropoff_final /
+ scale_factor;
+ return_cutoffs->gap_trigger = ext_params->gap_trigger / scale_factor;
return 0;
}
@@ -346,6 +402,7 @@ FillReturnXDropoffsInfo(BlastReturnStat* return_stats,
* @param query_info The query information block [in]
* @param sbp Contains scoring information. [in]
* @param gap_align Gapped alignment information and allocated memory [out]
+ * @param score_params Parameters for scoring [out]
* @param word_params Parameters for initial word processing [out]
* @param ext_params Parameters for gapped extension [out]
* @param hit_params Parameters for saving hits [out]
@@ -365,6 +422,7 @@ BLAST_SetUpAuxStructures(Uint1 program_number,
BLAST_SequenceBlk* query, BlastQueryInfo* query_info,
BlastScoreBlk* sbp,
BlastGapAlignStruct** gap_align,
+ BlastScoringParameters** score_params,
BlastInitialWordParameters** word_params,
BlastExtensionParameters** ext_params,
BlastHitSavingParameters** hit_params,
@@ -395,7 +453,7 @@ BLAST_SetUpAuxStructures(Uint1 program_number,
if ((status = BLAST_GapAlignSetUp(program_number, seq_src,
scoring_options, eff_len_options, ext_options,
- hit_options, query_info, sbp,
+ hit_options, query_info, sbp, score_params,
ext_params, hit_params, eff_len_params, gap_align)) != 0)
return status;
@@ -424,7 +482,7 @@ BLAST_SetUpAuxStructures(Uint1 program_number,
/* Pick which gapped alignment algorithm to use. */
if (phi_lookup)
aux_struct->GetGappedScore = PHIGetGappedScore;
- else if (ext_options->algorithm_type == EXTEND_DYN_PROG)
+ else if (ext_options->ePrelimGapExt == eDynProgExt)
aux_struct->GetGappedScore = BLAST_GetGappedScore;
else
aux_struct->GetGappedScore = BLAST_MbGetGappedScore;
@@ -433,8 +491,6 @@ BLAST_SetUpAuxStructures(Uint1 program_number,
return status;
}
-#define BLAST_DB_CHUNK_SIZE 1024
-
Int4
BLAST_RPSSearchEngine(Uint1 program_number,
BLAST_SequenceBlk* query, BlastQueryInfo* query_info,
@@ -447,84 +503,46 @@ BLAST_RPSSearchEngine(Uint1 program_number,
const BlastEffectiveLengthsOptions* eff_len_options,
const PSIBlastOptions* psi_options,
const BlastDatabaseOptions* db_options,
- BlastHSPResults* results, BlastReturnStat* return_stats)
+ BlastHSPStream* hsp_stream, BlastDiagnostics* diagnostics,
+ BlastHSPResults** results)
{
BlastCoreAuxStruct* aux_struct = NULL;
- BlastHSPList* hsp_list;
- BlastInitialWordParameters* word_params;
- BlastExtensionParameters* ext_params;
- BlastHitSavingParameters* hit_params;
+ BlastHSPList* hsp_list = NULL;
+ BlastScoringParameters* score_params = NULL;
+ BlastInitialWordParameters* word_params = NULL;
+ BlastExtensionParameters* ext_params = NULL;
+ BlastHitSavingParameters* hit_params = NULL;
BlastEffectiveLengthsParameters* eff_len_params = NULL;
BlastGapAlignStruct* gap_align;
Int2 status = 0;
-
- BlastHitSavingOptions *internal_hit_options =
- (BlastHitSavingOptions *)hit_options;
- BlastScoringOptions *internal_score_options =
- (BlastScoringOptions *)score_options;
- PSIBlastOptions *internal_psi_options =
- (PSIBlastOptions *)psi_options;
-
Int8 dbsize;
Int4 num_db_seqs;
Uint4 avg_subj_length = 0;
RPSLookupTable *lookup = (RPSLookupTable *)lookup_wrap->lut;
- double scale_factor;
BlastQueryInfo concat_db_info;
BLAST_SequenceBlk concat_db;
RPSAuxInfo *rps_info;
- BlastHSPResults prelim_results;
Uint1 *orig_query_seq = NULL;
+ BlastRawCutoffs* raw_cutoffs = NULL;
if (program_number != blast_type_rpsblast &&
program_number != blast_type_rpstblastn)
return -1;
- if (results->num_queries != 1)
- return -2;
if ((status =
BLAST_SetUpAuxStructures(program_number, seq_src,
- internal_score_options, eff_len_options, lookup_wrap, word_options,
- ext_options, internal_hit_options, query, query_info, sbp,
- &gap_align, &word_params, &ext_params,
+ score_options, eff_len_options, lookup_wrap, word_options,
+ ext_options, hit_options, query, query_info, sbp,
+ &gap_align, &score_params, &word_params, &ext_params,
&hit_params, &eff_len_params, &aux_struct)) != 0)
return status;
- FillReturnXDropoffsInfo(return_stats, word_params, ext_params);
-
/* modify scoring and gap alignment structures for
- use with RPS blast.
-
- FIXME these should not all be done here, but scattered
- among the relevant initialization functions */
+ use with RPS blast. */
rps_info = lookup->rps_aux_info;
- scale_factor = rps_info->scale_factor;
- internal_psi_options->scalingFactor = scale_factor;
gap_align->positionBased = TRUE;
gap_align->sbp->posMatrix = lookup->rps_pssm;
- word_params->cutoff_score = (Int4)(scale_factor *
- (double)word_params->cutoff_score);
- word_params->x_dropoff = (Int4)(scale_factor *
- (double)word_params->x_dropoff);
- internal_hit_options->cutoff_score = (Int4)(scale_factor *
- (double)internal_hit_options->cutoff_score);
- internal_score_options->gap_open = (Int4)(scale_factor *
- (double)rps_info->gap_open_penalty);
- internal_score_options->gap_extend = (Int4)(scale_factor *
- (double)rps_info->gap_extend_penalty);
- hit_params->cutoff_score = (Int4)(scale_factor *
- (double)hit_params->cutoff_score);
- hit_params->cutoff_small_gap = (Int4)(scale_factor *
- (double)hit_params->cutoff_small_gap);
- hit_params->cutoff_big_gap = (Int4)(scale_factor *
- (double)hit_params->cutoff_big_gap);
- gap_align->gap_x_dropoff = (Int4)(scale_factor *
- (double)gap_align->gap_x_dropoff);
- ext_params->gap_x_dropoff = (Int4)(scale_factor *
- (double)ext_params->gap_x_dropoff);
- ext_params->gap_x_dropoff_final = (Int4)(scale_factor *
- (double)ext_params->gap_x_dropoff_final);
/* determine the total number of residues in the db.
This figure must also include one trailing NULL for
@@ -561,10 +579,6 @@ BLAST_RPSSearchEngine(Uint1 program_number,
concat_db_info.last_context = num_db_seqs - 1;
concat_db_info.context_offsets = lookup->rps_seq_offsets;
- prelim_results.num_queries = num_db_seqs;
- prelim_results.hitlist_array = (BlastHitList **)calloc(num_db_seqs,
- sizeof(BlastHitList *));
-
/* Change the table of diagonals that will be used for the
search; we need a diag table that can fit the entire
concatenated DB */
@@ -584,25 +598,23 @@ BLAST_RPSSearchEngine(Uint1 program_number,
E-values cannot be calculated after hits are found. */
BLAST_SearchEngineCore(program_number, &concat_db, &concat_db_info,
- query, lookup_wrap, gap_align, internal_score_options,
- word_params, ext_params, hit_params, internal_psi_options,
- db_options, return_stats, aux_struct, &hsp_list);
+ query, lookup_wrap, gap_align, score_params,
+ word_params, ext_params, hit_params, db_options,
+ diagnostics, aux_struct, &hsp_list);
- /* save the resulting list of HSPs. 'query' and 'subject' are
- still reversed */
+ /* Fill the cutoff values in the diagnostics structure */
+ if (diagnostics->cutoffs)
+ raw_cutoffs = diagnostics->cutoffs;
+
+ FillReturnCutoffsInfo(raw_cutoffs, score_params, word_params, ext_params);
+ /* Save the resulting list of HSPs. 'query' and 'subject' are
+ still reversed */
if (hsp_list && hsp_list->hspcnt > 0) {
- return_stats->prelim_gap_passed += hsp_list->hspcnt;
/* Save the HSPs into a hit list */
- Blast_HSPResultsSaveHitList(program_number, &prelim_results, hsp_list, hit_params);
+ BlastHSPStreamWrite(hsp_stream, &hsp_list);
}
- /* Change the results from a single hsplist with many
- contexts to many hsplists each with a single context.
- 'query' and 'subject' offsets are still reversed. */
-
- Blast_HSPResultsRPSUpdate(results, &prelim_results);
-
/* for a translated search, throw away the packed version
of the query and replace with the original (excluding the
starting sentinel) */
@@ -615,15 +627,10 @@ BLAST_RPSSearchEngine(Uint1 program_number,
/* Do the traceback. After this call, query and
subject have reverted to their traditional meanings. */
- BLAST_RPSTraceback(program_number, results, &concat_db,
+ BLAST_RPSTraceback(program_number, hsp_stream, &concat_db,
&concat_db_info, query, query_info, gap_align,
- internal_score_options, ext_params, hit_params, db_options,
- internal_psi_options, rps_info->karlin_k);
-
- /* The traceback calculated the E values, so it's safe
- to sort the results now */
-
- Blast_HSPResultsSortByEvalue(results);
+ score_params, ext_params, hit_params, db_options,
+ rps_info->karlin_k, results);
/* free the internal structures used */
/* Do not destruct score block here */
@@ -632,15 +639,16 @@ BLAST_RPSSearchEngine(Uint1 program_number,
query->sequence_start = orig_query_seq;
query->sequence = orig_query_seq + 1;
}
- sfree(prelim_results.hitlist_array);
gap_align->sbp->posMatrix = NULL;
gap_align->positionBased = FALSE;
gap_align->sbp = NULL;
BLAST_GapAlignStructFree(gap_align);
BlastCoreAuxStructFree(aux_struct);
+ sfree(score_params);
sfree(hit_params);
sfree(ext_params);
sfree(word_params);
+ sfree(eff_len_params);
return status;
}
@@ -656,10 +664,12 @@ BLAST_SearchEngine(Uint1 program_number,
const BlastEffectiveLengthsOptions* eff_len_options,
const PSIBlastOptions* psi_options,
const BlastDatabaseOptions* db_options,
- BlastHSPResults* results, BlastReturnStat* return_stats)
+ BlastHSPStream* hsp_stream, BlastDiagnostics* diagnostics,
+ BlastHSPResults** results)
{
BlastCoreAuxStruct* aux_struct = NULL;
- BlastHSPList* hsp_list;
+ BlastHSPList* hsp_list = NULL;
+ BlastScoringParameters* score_params = NULL;
BlastInitialWordParameters* word_params = NULL;
BlastExtensionParameters* ext_params = NULL;
BlastHitSavingParameters* hit_params = NULL;
@@ -669,16 +679,19 @@ BLAST_SearchEngine(Uint1 program_number,
Int2 status = 0;
BlastSeqSrcIterator* itr = NULL;
Int8 db_length = 0;
+ BlastRawCutoffs* raw_cutoffs = NULL;
+ Boolean prelim_traceback;
if ((status =
BLAST_SetUpAuxStructures(program_number, seq_src,
score_options, eff_len_options, lookup_wrap, word_options,
ext_options, hit_options, query, query_info, sbp,
- &gap_align, &word_params, &ext_params,
+ &gap_align, &score_params, &word_params, &ext_params,
&hit_params, &eff_len_params, &aux_struct)) != 0)
return status;
- FillReturnXDropoffsInfo(return_stats, word_params, ext_params);
+ prelim_traceback = (ext_options->ePrelimGapExt == eGreedyWithTracebackExt);
+
memset((void*) &seq_arg, 0, sizeof(seq_arg));
/* Encoding is set so there are no sentinel bytes, and protein/nucleotide
@@ -712,58 +725,75 @@ BLAST_SearchEngine(Uint1 program_number,
return status;
}
- /* Calculate cutoff scores for linking HSPs */
- if (hit_params->do_sum_stats) {
+ /* Calculate cutoff scores for linking HSPs. Do this only for ungapped
+ translated searches. */
+ if (hit_params->do_sum_stats && program_number != blast_type_blastn &&
+ !score_options->gapped_calculation) {
CalculateLinkHSPCutoffs(program_number, query_info, sbp,
- hit_params, db_length, seq_arg.seq->length,
- psi_options);
+ hit_params, ext_params, db_length, seq_arg.seq->length);
}
BLAST_SearchEngineCore(program_number, query, query_info,
- seq_arg.seq, lookup_wrap, gap_align, score_options, word_params,
- ext_params, hit_params, psi_options, db_options,
- return_stats, aux_struct, &hsp_list);
+ seq_arg.seq, lookup_wrap, gap_align, score_params, word_params,
+ ext_params, hit_params, db_options, diagnostics, aux_struct,
+ &hsp_list);
if (hsp_list && hsp_list->hspcnt > 0) {
- return_stats->prelim_gap_passed += hsp_list->hspcnt;
if (program_number == blast_type_blastn) {
- status =
- Blast_HSPListReevaluateWithAmbiguities(hsp_list, query,
- seq_arg.seq, hit_options, query_info, sbp, score_options,
- seq_src);
+ if (prelim_traceback || !score_options->gapped_calculation) {
+ status =
+ Blast_HSPListReevaluateWithAmbiguities(hsp_list, query,
+ seq_arg.seq, hit_options, query_info, sbp, score_params,
+ seq_src);
+ }
+ /* Check for HSP inclusion */
+ status = Blast_HSPListUniqSort(hsp_list);
+ }
+ /* Calculate and fill the bit scores, but only if final scores are
+ already available, i.e. either traceback has already been done,
+ or this is an ungapped search. */
+ if (prelim_traceback || !score_options->gapped_calculation) {
+ Blast_HSPListGetBitScores(hsp_list,
+ score_options->gapped_calculation, sbp);
}
- /* Save the HSPs into a hit list */
- Blast_HSPResultsSaveHitList(program_number, results, hsp_list, hit_params);
+ /* Save the results. */
+ BlastHSPStreamWrite(hsp_stream, &hsp_list);
}
- /*BlastSequenceBlkClean(subject);*/
}
itr = BlastSeqSrcIteratorFree(itr);
BlastSequenceBlkFree(seq_arg.seq);
+ /* Fill the cutoff values in the diagnostics structure */
+ if (diagnostics->cutoffs)
+ raw_cutoffs = diagnostics->cutoffs;
+
+ FillReturnCutoffsInfo(raw_cutoffs, score_params, word_params, ext_params);
+
+
if (hit_options->phi_align) {
/* Save the product of effective occurrencies of pattern in query and
occurrencies of pattern in database */
- gap_align->sbp->effective_search_sp *= return_stats->db_hits;
+ Int8 db_hits = 1;
+ if (diagnostics && diagnostics->ungapped_stat)
+ db_hits = diagnostics->ungapped_stat->lookup_hits;
+ gap_align->sbp->effective_search_sp *= db_hits;
}
- /* Now sort the hit lists for all queries, but only if this is a database
- search. */
- if (db_length > 0)
- Blast_HSPResultsSortByEvalue(results);
+ /* Prohibit any subsequent writing to the HSP stream. */
+ BlastHSPStreamClose(hsp_stream);
- if (!ext_options->skip_traceback && score_options->gapped_calculation) {
- status =
- BLAST_ComputeTraceback(program_number, results, query, query_info,
- seq_src, gap_align, score_options, ext_params, hit_params,
- eff_len_params, db_options, psi_options);
- }
+ status =
+ BLAST_ComputeTraceback(program_number, hsp_stream, query, query_info,
+ seq_src, gap_align, score_params, ext_params, hit_params,
+ eff_len_params, db_options, psi_options, results);
/* Do not destruct score block here */
gap_align->sbp = NULL;
BLAST_GapAlignStructFree(gap_align);
BlastCoreAuxStructFree(aux_struct);
+ sfree(score_params);
sfree(hit_params);
sfree(ext_params);
sfree(word_params);
diff --git a/algo/blast/core/blast_engine.h b/algo/blast/core/blast_engine.h
index 3058bc65..1ff008a5 100644
--- a/algo/blast/core/blast_engine.h
+++ b/algo/blast/core/blast_engine.h
@@ -1,51 +1,53 @@
-/* $Id: blast_engine.h,v 1.32 2004/03/15 19:53:18 dondosha Exp $
-* ===========================================================================
-*
-* PUBLIC DOMAIN NOTICE
-* National Center for Biotechnology Information
-*
-* This software/database is a "United States Government Work" under the
-* terms of the United States Copyright Act. It was written as part of
-* the author's offical duties as a United States Government employee and
-* thus cannot be copyrighted. This software/database is freely available
-* to the public for use. The National Library of Medicine and the U.S.
-* Government have not placed any restriction on its use or reproduction.
-*
-* Although all reasonable efforts have been taken to ensure the accuracy
-* and reliability of the software and data, the NLM and the U.S.
-* Government do not and cannot warrant the performance or results that
-* may be obtained by using this software or data. The NLM and the U.S.
-* Government disclaim all warranties, express or implied, including
-* warranties of performance, merchantability or fitness for any particular
-* purpose.
-*
-* Please cite the author in any work or product based on this material.
-*
-* ===========================================================================*/
-
-/*****************************************************************************
-
-File name: blast_engine.h
-
-Author: Ilya Dondoshansky
+/* $Id: blast_engine.h,v 1.41 2004/06/16 14:53:03 dondosha Exp $
+ * ===========================================================================
+ *
+ * PUBLIC DOMAIN NOTICE
+ * National Center for Biotechnology Information
+ *
+ * This software/database is a "United States Government Work" under the
+ * terms of the United States Copyright Act. It was written as part of
+ * the author's official duties as a United States Government employee and
+ * thus cannot be copyrighted. This software/database is freely available
+ * to the public for use. The National Library of Medicine and the U.S.
+ * Government have not placed any restriction on its use or reproduction.
+ *
+ * Although all reasonable efforts have been taken to ensure the accuracy
+ * and reliability of the software and data, the NLM and the U.S.
+ * Government do not and cannot warrant the performance or results that
+ * may be obtained by using this software or data. The NLM and the U.S.
+ * Government disclaim all warranties, express or implied, including
+ * warranties of performance, merchantability or fitness for any particular
+ * purpose.
+ *
+ * Please cite the author in any work or product based on this material.
+ *
+ * ===========================================================================
+ *
+ * Author: Ilya Dondoshansky
+ *
+ */
-Contents: High level BLAST functions
+/** @file blast_engine.h
+ * High level BLAST functions
+ */
-******************************************************************************
- * $Revision: 1.32 $
- * */
#ifndef __BLAST_ENGINE__
#define __BLAST_ENGINE__
-#ifdef __cplusplus
-extern "C" {
-#endif
-
#include <algo/blast/core/blast_def.h>
#include <algo/blast/core/blast_extend.h>
#include <algo/blast/core/blast_gapalign.h>
#include <algo/blast/core/blast_hits.h>
#include <algo/blast/core/blast_seqsrc.h>
+#include <algo/blast/core/blast_diagnostics.h>
+#include <algo/blast/core/blast_hspstream.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/** How many subject sequences to process in one database chunk. */
+#define BLAST_DB_CHUNK_SIZE 1024
/** The high level function performing the BLAST search against a BLAST
* database after all the setup has been done.
@@ -63,8 +65,9 @@ extern "C" {
* @param psi_options Options specific to PSI-BLAST [in]
* @param db_options Options for handling BLAST database [in]
* @param results Structure holding all saved results [in] [out]
- * @param return_stats Return statistics containing numbers of hits on
- * different stages of the search [out]
+ * @param diagnostics Return statistics containing numbers of hits on
+ * different stages of the search [out]
+ * @param results Results of the BLAST search [out]
*/
Int4
BLAST_SearchEngine(Uint1 program_number,
@@ -78,7 +81,8 @@ BLAST_SearchEngine(Uint1 program_number,
const BlastEffectiveLengthsOptions* eff_len_options,
const PSIBlastOptions* psi_options,
const BlastDatabaseOptions* db_options,
- BlastHSPResults* results, BlastReturnStat* return_stats);
+ BlastHSPStream* hsp_stream, BlastDiagnostics* diagnostics,
+ BlastHSPResults** results);
/** The high level function performing an RPS BLAST search
* @param program_number Type of BLAST program [in]
@@ -94,9 +98,10 @@ BLAST_SearchEngine(Uint1 program_number,
* @param eff_len_options Options for setting effective lengths [in]
* @param psi_options Options specific to PSI-BLAST [in]
* @param db_options Options for handling BLAST database [in]
+ * @param hsp_stream Placeholder for saving results [in]
+ * @param diagnostics Return statistics containing numbers of hits on
+ * different stages of the search [out]
* @param results Structure holding all saved results [in] [out]
- * @param return_stats Return statistics containing numbers of hits on
- * different stages of the search [out]
*/
Int4
BLAST_RPSSearchEngine(Uint1 program_number,
@@ -110,44 +115,22 @@ BLAST_RPSSearchEngine(Uint1 program_number,
const BlastEffectiveLengthsOptions* eff_len_options,
const PSIBlastOptions* psi_options,
const BlastDatabaseOptions* db_options,
- BlastHSPResults* results, BlastReturnStat* return_stats);
+ BlastHSPStream* hsp_stream, BlastDiagnostics* diagnostics,
+ BlastHSPResults** results);
/** Gapped extension function pointer type */
typedef Int2 (*BlastGetGappedScoreType)
(Uint1, BLAST_SequenceBlk*, BlastQueryInfo* query_info,
- BLAST_SequenceBlk*, BlastGapAlignStruct*, const BlastScoringOptions*,
+ BLAST_SequenceBlk*, BlastGapAlignStruct*, const BlastScoringParameters*,
const BlastExtensionParameters*, const BlastHitSavingParameters*,
- BlastInitHitList*, BlastHSPList**);
+ BlastInitHitList*, BlastHSPList**, BlastGappedStats*);
/** Word finder function pointer type */
-typedef Int4 (*BlastWordFinderType)
+typedef Int2 (*BlastWordFinderType)
(BLAST_SequenceBlk*, BLAST_SequenceBlk*,
LookupTableWrap*, Int4**, const BlastInitialWordParameters*,
- BLAST_ExtendWord*, Uint4*, Uint4*, Int4, BlastInitHitList*);
-
-/** Structure to be passed to BLAST_SearchEngineCore, containing pointers
- to various preallocated structures and arrays. */
-typedef struct BlastCoreAuxStruct {
-
- BLAST_ExtendWord* ewp; /**< Structure for keeping track of diagonal
- information for initial word matches */
- BlastWordFinderType WordFinder; /**< Word finder function pointer */
- BlastGetGappedScoreType GetGappedScore; /**< Gapped extension function
- pointer */
- BlastInitHitList* init_hitlist; /**< Placeholder for HSPs after
- ungapped extension */
- BlastHSPList* hsp_list; /**< Placeholder for HSPs after gapped
- extension */
- Uint4* query_offsets; /**< Placeholder for initial word match query
- offsets */
- Uint4* subject_offsets; /**< Placeholder for initial word match
- subject offsets */
- Uint1* translation_buffer; /**< Placeholder for translated subject
- sequences */
- Uint1* translation_table; /**< Translation table for forward strand */
- Uint1* translation_table_rc; /**< Translation table for reverse
- strand */
-} BlastCoreAuxStruct;
+ Blast_ExtendWord*, Uint4*, Uint4*, Int4, BlastInitHitList*,
+ BlastUngappedStats*);
#ifdef __cplusplus
}
diff --git a/algo/blast/core/blast_extend.c b/algo/blast/core/blast_extend.c
index 5d5cb180..4ed0f6ea 100644
--- a/algo/blast/core/blast_extend.c
+++ b/algo/blast/core/blast_extend.c
@@ -1,41 +1,38 @@
-/* $Id: blast_extend.c,v 1.60 2004/04/27 15:56:53 coulouri Exp $
-* ===========================================================================
-*
-* PUBLIC DOMAIN NOTICE
-* National Center for Biotechnology Information
-*
-* This software/database is a "United States Government Work" under the
-* terms of the United States Copyright Act. It was written as part of
-* the author's offical duties as a United States Government employee and
-* thus cannot be copyrighted. This software/database is freely available
-* to the public for use. The National Library of Medicine and the U.S.
-* Government have not placed any restriction on its use or reproduction.
-*
-* Although all reasonable efforts have been taken to ensure the accuracy
-* and reliability of the software and data, the NLM and the U.S.
-* Government do not and cannot warrant the performance or results that
-* may be obtained by using this software or data. The NLM and the U.S.
-* Government disclaim all warranties, express or implied, including
-* warranties of performance, merchantability or fitness for any particular
-* purpose.
-*
-* Please cite the author in any work or product based on this material.
-*
-* ===========================================================================*/
-
-/*****************************************************************************
-
-File name: blast_extend.c
-
-Author: Ilya Dondoshansky
-
-Contents: Functions to initialize structures used for BLAST extension
-
-******************************************************************************
- * $Revision: 1.60 $
- * */
-
-static char const rcsid[] = "$Id: blast_extend.c,v 1.60 2004/04/27 15:56:53 coulouri Exp $";
+/* $Id: blast_extend.c,v 1.64 2004/05/24 13:26:27 madden Exp $
+ * ===========================================================================
+ *
+ * PUBLIC DOMAIN NOTICE
+ * National Center for Biotechnology Information
+ *
+ * This software/database is a "United States Government Work" under the
+ * terms of the United States Copyright Act. It was written as part of
+ * the author's offical duties as a United States Government employee and
+ * thus cannot be copyrighted. This software/database is freely available
+ * to the public for use. The National Library of Medicine and the U.S.
+ * Government have not placed any restriction on its use or reproduction.
+ *
+ * Although all reasonable efforts have been taken to ensure the accuracy
+ * and reliability of the software and data, the NLM and the U.S.
+ * Government do not and cannot warrant the performance or results that
+ * may be obtained by using this software or data. The NLM and the U.S.
+ * Government disclaim all warranties, express or implied, including
+ * warranties of performance, merchantability or fitness for any particular
+ * purpose.
+ *
+ * Please cite the author in any work or product based on this material.
+ *
+ * ===========================================================================
+ *
+ * Author: Ilya Dondoshansky
+ *
+ */
+
+/** @file blast_extend.c
+ * Functions to initialize structures used for BLAST extension
+ */
+
+static char const rcsid[] =
+ "$Id: blast_extend.c,v 1.64 2004/05/24 13:26:27 madden Exp $";
#include <algo/blast/core/blast_extend.h>
#include <algo/blast/core/blast_options.h>
@@ -116,12 +113,12 @@ BLAST_DiagTableNew (Int4 qlen, Boolean multiple_hits, Int4 window_size)
/* Description in blast_extend.h */
Int2 BlastExtendWordNew(Uint4 query_length,
const BlastInitialWordOptions* word_options,
- Uint4 subject_length, BLAST_ExtendWord** ewp_ptr)
+ Uint4 subject_length, Blast_ExtendWord** ewp_ptr)
{
- BLAST_ExtendWord* ewp;
+ Blast_ExtendWord* ewp;
Int4 index, i;
- *ewp_ptr = ewp = (BLAST_ExtendWord*) calloc(1, sizeof(BLAST_ExtendWord));
+ *ewp_ptr = ewp = (Blast_ExtendWord*) calloc(1, sizeof(Blast_ExtendWord));
if (!ewp) {
return -1;
@@ -142,10 +139,10 @@ Int2 BlastExtendWordNew(Uint4 query_length,
stack_table->stack_index = (Int4*) calloc(num_stacks, sizeof(Int4));
stack_table->stack_size = (Int4*) malloc(num_stacks*sizeof(Int4));
stack_table->estack =
- (MbStack**) malloc(num_stacks*sizeof(MbStack*));
+ (MB_Stack**) malloc(num_stacks*sizeof(MB_Stack*));
for (index=0; index<num_stacks; index++) {
stack_table->estack[index] =
- (MbStack*) malloc(stack_size*sizeof(MbStack));
+ (MB_Stack*) malloc(stack_size*sizeof(MB_Stack));
stack_table->stack_size[index] = stack_size;
}
stack_table->num_stacks = num_stacks;
@@ -222,20 +219,21 @@ Boolean BLAST_SaveInitialHit(BlastInitHitList* init_hitlist,
* @param s_off The offset in the subject sequence [in]
* @param init_hitlist The structure containing information about all
* initial hits [in] [out]
+ * @return Has this hit been extended?
*/
-static Int2
+static Boolean
MB_ExtendInitialHit(BLAST_SequenceBlk* query,
BLAST_SequenceBlk* subject, LookupTableWrap* lookup,
const BlastInitialWordParameters* word_params,
- Int4** matrix, BLAST_ExtendWord* ewp, Int4 q_off, Int4 s_off,
+ Int4** matrix, Blast_ExtendWord* ewp, Int4 q_off, Int4 s_off,
BlastInitHitList* init_hitlist)
{
Int4 index, index1, step;
MBLookupTable* mb_lt = (MBLookupTable*) lookup->lut;
- MbStack* estack;
+ MB_Stack* estack;
Int4 diag, stack_top;
Int4 window, word_extra_length, scan_step;
- Boolean new_hit, hit_ready, two_hits, do_ungapped_extension;
+ Boolean new_hit, hit_ready = FALSE, two_hits, do_ungapped_extension;
BLAST_DiagTable* diag_table = ewp->diag_table;
MB_StackTable* stack_table = ewp->stack_table;
BlastUngappedData* ungapped_data = NULL;
@@ -264,9 +262,8 @@ MB_ExtendInitialHit(BLAST_SequenceBlk* query,
diag_array_elem = &diag_array[diag];
step = s_pos - diag_array_elem->last_hit;
if (step <= 0)
- return 0;
+ return FALSE;
- hit_ready = FALSE;
if (!two_hits) {
/* Single hit version */
new_hit = (step > scan_step);
@@ -333,9 +330,8 @@ MB_ExtendInitialHit(BLAST_SequenceBlk* query,
if (estack[index].diag == s_off - q_off) {
if (step <= 0) {
stack_table->stack_index[index1] = stack_top + 1;
- return 0;
+ return FALSE;
}
- hit_ready = FALSE;
if (!two_hits) {
/* Single hit version */
new_hit = (step > scan_step);
@@ -390,7 +386,7 @@ MB_ExtendInitialHit(BLAST_SequenceBlk* query,
/* In case the size of this stack changed */
stack_table->stack_index[index1] = stack_top + 1;
- return 0;
+ return hit_ready;
} else if (step <= scan_step || (step <= window &&
estack[index].length >= word_extra_length)) {
/* Hit from a different diagonal, and it can be continued */
@@ -406,10 +402,10 @@ MB_ExtendInitialHit(BLAST_SequenceBlk* query,
/* Need an extra slot on the stack for this hit */
if (++stack_top >= stack_table->stack_size[index1]) {
/* Stack about to overflow - reallocate memory */
- MbStack* ptr;
- if (!(ptr = (MbStack*)realloc(estack,
- 2*stack_table->stack_size[index1]*sizeof(MbStack)))) {
- return 1;
+ MB_Stack* ptr;
+ if (!(ptr = (MB_Stack*)realloc(estack,
+ 2*stack_table->stack_size[index1]*sizeof(MB_Stack)))) {
+ return FALSE;
} else {
stack_table->stack_size[index1] *= 2;
estack = stack_table->estack[index1] = ptr;
@@ -422,6 +418,7 @@ MB_ExtendInitialHit(BLAST_SequenceBlk* query,
stack_table->stack_index[index1] = stack_top + 1;
/* Save the hit if it already qualifies */
if (!two_hits && (word_extra_length == 0)) {
+ hit_ready = TRUE;
if (do_ungapped_extension) {
/* Perform ungapped extension */
BlastnWordUngappedExtend(query, subject, matrix, q_off, s_off,
@@ -446,7 +443,7 @@ MB_ExtendInitialHit(BLAST_SequenceBlk* query,
}
}
- return 0;
+ return hit_ready;
}
/** Update the word extension structure after scanning of each subject sequence
@@ -454,7 +451,7 @@ MB_ExtendInitialHit(BLAST_SequenceBlk* query,
* @param subject_length The length of the subject sequence that has just been
* processed [in]
*/
-static Int2 BlastNaExtendWordExit(BLAST_ExtendWord* ewp, Int4 subject_length)
+static Int2 BlastNaExtendWordExit(Blast_ExtendWord* ewp, Int4 subject_length)
{
BLAST_DiagTable* diag_table;
Int4 diag_array_length, i;
@@ -487,7 +484,7 @@ static Int2 BlastNaExtendWordExit(BLAST_ExtendWord* ewp, Int4 subject_length)
* @param subject_length The length of the subject sequence that has just been
* processed [in]
*/
-static Int2 MB_ExtendWordExit(BLAST_ExtendWord* ewp, Int4 subject_length)
+static Int2 MB_ExtendWordExit(Blast_ExtendWord* ewp, Int4 subject_length)
{
if (!ewp)
return -1;
@@ -619,12 +616,13 @@ BlastnWordUngappedExtend(BLAST_SequenceBlk* query,
* @param s_off The offset in the subject sequence [in]
* @param init_hitlist The structure containing information about all
* initial hits [in] [out]
+ * @return Has this hit been extended?
*/
-static Int2
+static Boolean
BlastnExtendInitialHit(BLAST_SequenceBlk* query,
BLAST_SequenceBlk* subject, Uint4 min_step,
const BlastInitialWordParameters* word_params,
- Int4** matrix, BLAST_ExtendWord* ewp, Int4 q_off, Int4 s_end,
+ Int4** matrix, Blast_ExtendWord* ewp, Int4 q_off, Int4 s_end,
Int4 s_off, BlastInitHitList* init_hitlist)
{
Int4 diag, real_diag;
@@ -697,20 +695,22 @@ BlastnExtendInitialHit(BLAST_SequenceBlk* query,
if (new_hit)
diag_array_elem->diag_level = 0;
}
- return 0;
+
+ return hit_ready;
}
/* Description in blast_extend.h */
-Int4 BlastNaWordFinder(BLAST_SequenceBlk* subject,
+Int2 BlastNaWordFinder(BLAST_SequenceBlk* subject,
BLAST_SequenceBlk* query,
LookupTableWrap* lookup_wrap,
Int4** matrix,
const BlastInitialWordParameters* word_params,
- BLAST_ExtendWord* ewp,
+ Blast_ExtendWord* ewp,
Uint4* q_offsets,
Uint4* s_offsets,
Int4 max_hits,
- BlastInitHitList* init_hitlist)
+ BlastInitHitList* init_hitlist,
+ BlastUngappedStats* ungapped_stats)
{
LookupTable* lookup = (LookupTable*) lookup_wrap->lut;
Uint1* s_start = subject->sequence;
@@ -719,6 +719,7 @@ Int4 BlastNaWordFinder(BLAST_SequenceBlk* subject,
Uint1* s;
Uint1* q_start = query->sequence;
Int4 hitsfound, total_hits = 0;
+ Int4 hits_extended = 0;
Uint4 word_size, compressed_wordsize, reduced_word_length;
Uint4 extra_bytes_needed;
Uint2 extra_bases, left, right;
@@ -775,17 +776,22 @@ Int4 BlastNaWordFinder(BLAST_SequenceBlk* subject,
if (left + right >= extra_bases) {
/* Check if this diagonal has already been explored. */
- BlastnExtendInitialHit(query, subject, 0,
- word_params, matrix, ewp, q_offsets[i],
- s_offsets[i] + word_size + right,
- s_offsets[i], init_hitlist);
+ if (BlastnExtendInitialHit(query, subject, 0,
+ word_params, matrix, ewp, q_offsets[i],
+ s_offsets[i] + word_size + right,
+ s_offsets[i], init_hitlist))
+ ++hits_extended;
}
}
start_offset = next_start;
}
BlastNaExtendWordExit(ewp, subject->length);
- return total_hits;
+
+ Blast_UngappedStatsUpdate(ungapped_stats, total_hits, hits_extended,
+ init_hitlist->total);
+
+ return 0;
}
/** Extend an exact match in both directions up to the provided
@@ -876,17 +882,17 @@ BlastNaExactMatchExtend(Uint1* q_start, Uint1* s_start,
* extent of already processed hits on each diagonal [in]
* @param init_hitlist Structure to keep the extended hits.
* Must be allocated outside of this function [in] [out]
+ * @return Number of hits extended.
*/
-static void
+static Int4
BlastNaExtendRightAndLeft(Uint4* q_offsets, Uint4* s_offsets, Int4 num_hits,
const BlastInitialWordParameters* word_params,
LookupTableWrap* lookup_wrap,
BLAST_SequenceBlk* query, BLAST_SequenceBlk* subject,
- Int4** matrix, BLAST_ExtendWord* ewp,
+ Int4** matrix, Blast_ExtendWord* ewp,
BlastInitHitList* init_hitlist)
{
Int4 index;
-
Uint4 query_length = query->length;
Uint4 subject_length = subject->length;
Uint1* q_start = query->sequence;
@@ -901,6 +907,7 @@ BlastNaExtendRightAndLeft(Uint4* q_offsets, Uint4* s_offsets, Int4 num_hits,
Boolean do_ungapped_extension = word_params->options->ungapped_extension;
Boolean variable_wordsize =
(Boolean) word_params->options->variable_wordsize;
+ Int4 hits_extended = 0;
if (lookup_wrap->lut_type == MB_LOOKUP_TABLE) {
MBLookupTable* lut = (MBLookupTable*)lookup_wrap->lut;
@@ -929,38 +936,42 @@ BlastNaExtendRightAndLeft(Uint4* q_offsets, Uint4* s_offsets, Int4 num_hits,
if (BlastNaExactMatchExtend(q, s, max_bases_left,
max_bases_right, word_length,
- !variable_wordsize, &extended_right))
+ (Boolean) !variable_wordsize, &extended_right))
{
/* Check if this diagonal has already been explored and save
the hit if needed. */
- BlastnExtendInitialHit(query, subject, min_step,
+ if (BlastnExtendInitialHit(query, subject, min_step,
word_params, matrix, ewp, q_offsets[index],
s_off + extended_right, s_offsets[index],
- init_hitlist);
+ init_hitlist))
+ ++hits_extended;
}
}
+ return hits_extended;
}
/* Description in blast_extend.h */
-Int4 MB_WordFinder(BLAST_SequenceBlk* subject,
+Int2 MB_WordFinder(BLAST_SequenceBlk* subject,
BLAST_SequenceBlk* query,
LookupTableWrap* lookup_wrap,
Int4** matrix,
const BlastInitialWordParameters* word_params,
- BLAST_ExtendWord* ewp,
+ Blast_ExtendWord* ewp,
Uint4* q_offsets,
Uint4* s_offsets,
Int4 max_hits,
- BlastInitHitList* init_hitlist)
+ BlastInitHitList* init_hitlist,
+ BlastUngappedStats* ungapped_stats)
{
const BlastInitialWordOptions* word_options = word_params->options;
/* Pointer to the beginning of the first word of the subject sequence */
MBLookupTable* mb_lt = (MBLookupTable*) lookup_wrap->lut;
Int4 hitsfound=0;
- Int4 hit_counter=0, index;
+ Int4 total_hits=0, index;
Int4 start_offset, next_start, last_start, last_end;
Int4 subject_length = subject->length;
Boolean ag_blast;
+ Int4 hits_extended = 0;
ag_blast = (Boolean) (word_options->extension_method == eRightAndLeft);
@@ -994,41 +1005,49 @@ Int4 MB_WordFinder(BLAST_SequenceBlk* subject,
q_offsets, s_offsets, max_hits, &next_start);
}
if (ag_blast) {
- BlastNaExtendRightAndLeft(q_offsets, s_offsets, hitsfound,
+ hits_extended +=
+ BlastNaExtendRightAndLeft(q_offsets, s_offsets, hitsfound,
word_params, lookup_wrap, query, subject,
matrix, ewp, init_hitlist);
} else {
for (index = 0; index < hitsfound; ++index) {
- MB_ExtendInitialHit(query, subject, lookup_wrap, word_params,
- matrix, ewp, q_offsets[index], s_offsets[index], init_hitlist);
+ if (MB_ExtendInitialHit(query, subject, lookup_wrap, word_params,
+ matrix, ewp, q_offsets[index],
+ s_offsets[index], init_hitlist))
+ ++hits_extended;
}
}
/* next_start returned from the ScanSubject points to the beginning
of the word */
start_offset = next_start;
- hit_counter += hitsfound;
+ total_hits += hitsfound;
}
MB_ExtendWordExit(ewp, subject_length);
- return hit_counter;
+ Blast_UngappedStatsUpdate(ungapped_stats, total_hits, hits_extended,
+ init_hitlist->total);
+
+ return 0;
}
/* Description in blast_extend.h */
-Int4 BlastNaWordFinder_AG(BLAST_SequenceBlk* subject,
+Int2 BlastNaWordFinder_AG(BLAST_SequenceBlk* subject,
BLAST_SequenceBlk* query,
LookupTableWrap* lookup_wrap,
Int4** matrix,
const BlastInitialWordParameters* word_params,
- BLAST_ExtendWord* ewp,
+ Blast_ExtendWord* ewp,
Uint4* q_offsets,
Uint4* s_offsets,
Int4 max_hits,
- BlastInitHitList* init_hitlist)
+ BlastInitHitList* init_hitlist,
+ BlastUngappedStats* ungapped_stats)
{
Int4 hitsfound, total_hits = 0;
Int4 start_offset, end_offset, next_start;
LookupTable* lookup = (LookupTable*) lookup_wrap->lut;
+ Int4 hits_extended = 0;
start_offset = 0;
end_offset = subject->length - COMPRESSION_RATIO*lookup->reduced_wordsize;
@@ -1041,7 +1060,8 @@ Int4 BlastNaWordFinder_AG(BLAST_SequenceBlk* subject,
total_hits += hitsfound;
- BlastNaExtendRightAndLeft(q_offsets, s_offsets, hitsfound,
+ hits_extended +=
+ BlastNaExtendRightAndLeft(q_offsets, s_offsets, hitsfound,
word_params, lookup_wrap, query, subject,
matrix, ewp, init_hitlist);
@@ -1049,7 +1069,11 @@ Int4 BlastNaWordFinder_AG(BLAST_SequenceBlk* subject,
}
BlastNaExtendWordExit(ewp, subject->length);
- return total_hits;
+
+ Blast_UngappedStatsUpdate(ungapped_stats, total_hits, hits_extended,
+ init_hitlist->total);
+
+ return 0;
}
/** Deallocate memory for the diagonal table structure */
@@ -1079,7 +1103,7 @@ static MB_StackTable* MBStackTableFree(MB_StackTable* stack_table)
return NULL;
}
-BLAST_ExtendWord* BlastExtendWordFree(BLAST_ExtendWord* ewp)
+Blast_ExtendWord* BlastExtendWordFree(Blast_ExtendWord* ewp)
{
BlastDiagTableFree(ewp->diag_table);
MBStackTableFree(ewp->stack_table);
diff --git a/algo/blast/core/blast_extend.h b/algo/blast/core/blast_extend.h
index 74d0e6eb..72da23e8 100644
--- a/algo/blast/core/blast_extend.h
+++ b/algo/blast/core/blast_extend.h
@@ -1,39 +1,36 @@
-/* $Id: blast_extend.h,v 1.23 2004/03/24 19:09:46 dondosha Exp $
-* ===========================================================================
-*
-* PUBLIC DOMAIN NOTICE
-* National Center for Biotechnology Information
-*
-* This software/database is a "United States Government Work" under the
-* terms of the United States Copyright Act. It was written as part of
-* the author's offical duties as a United States Government employee and
-* thus cannot be copyrighted. This software/database is freely available
-* to the public for use. The National Library of Medicine and the U.S.
-* Government have not placed any restriction on its use or reproduction.
-*
-* Although all reasonable efforts have been taken to ensure the accuracy
-* and reliability of the software and data, the NLM and the U.S.
-* Government do not and cannot warrant the performance or results that
-* may be obtained by using this software or data. The NLM and the U.S.
-* Government disclaim all warranties, express or implied, including
-* warranties of performance, merchantability or fitness for any particular
-* purpose.
-*
-* Please cite the author in any work or product based on this material.
-*
-* ===========================================================================*/
-
-/*****************************************************************************
-
-File name: blast_extend.h
-
-Author: Ilya Dondoshansky
-
-Contents: Structures used for BLAST extension
+/* $Id: blast_extend.h,v 1.26 2004/05/19 14:52:01 camacho Exp $
+ * ===========================================================================
+ *
+ * PUBLIC DOMAIN NOTICE
+ * National Center for Biotechnology Information
+ *
+ * This software/database is a "United States Government Work" under the
+ * terms of the United States Copyright Act. It was written as part of
+ * the author's official duties as a United States Government employee and
+ * thus cannot be copyrighted. This software/database is freely available
+ * to the public for use. The National Library of Medicine and the U.S.
+ * Government have not placed any restriction on its use or reproduction.
+ *
+ * Although all reasonable efforts have been taken to ensure the accuracy
+ * and reliability of the software and data, the NLM and the U.S.
+ * Government do not and cannot warrant the performance or results that
+ * may be obtained by using this software or data. The NLM and the U.S.
+ * Government disclaim all warranties, express or implied, including
+ * warranties of performance, merchantability or fitness for any particular
+ * purpose.
+ *
+ * Please cite the author in any work or product based on this material.
+ *
+ * ===========================================================================
+ *
+ * Author: Ilya Dondoshansky
+ *
+ */
-******************************************************************************
- * $Revision: 1.23 $
- * */
+/** @file blast_extend.h
+ * Structures used for BLAST extension @todo FIXME: elaborate description
+ * rename to nt_ungapped.h?
+ */
#ifndef __BLAST_EXTEND__
#define __BLAST_EXTEND__
@@ -41,6 +38,7 @@ Contents: Structures used for BLAST extension
#include <algo/blast/core/blast_def.h>
#include <algo/blast/core/blast_options.h>
#include <algo/blast/core/lookup_wrap.h>
+#include <algo/blast/core/blast_diagnostics.h>
#ifdef __cplusplus
extern "C" {
@@ -79,11 +77,11 @@ typedef struct DiagStruct {
} DiagStruct;
/** Structure for keeping last hit information for a diagonal on a stack */
-typedef struct MbStack {
+typedef struct MB_Stack {
Int4 diag; /**< This hit's actual diagonal */
Int4 level; /**< This hit's offset in the subject sequence */
Int4 length; /**< To what length has this hit been extended so far? */
-} MbStack;
+} MB_Stack;
/** Structure containing parameters needed for initial word extension.
* Only one copy of this structure is needed, regardless of how many
@@ -112,14 +110,14 @@ typedef struct MB_StackTable {
by MegaBLAST */
Int4* stack_index; /**< Current number of elements in each stack */
Int4* stack_size; /**< Available memory for each stack */
- MbStack** estack; /**< Array of stacks for most recent hits */
+ MB_Stack** estack; /**< Array of stacks for most recent hits */
} MB_StackTable;
/** Structure for keeping initial word extension information */
-typedef struct BLAST_ExtendWord {
+typedef struct Blast_ExtendWord {
BLAST_DiagTable* diag_table; /**< Diagonal array and related parameters */
MB_StackTable* stack_table; /**< Stacks and related parameters */
-} BLAST_ExtendWord;
+} Blast_ExtendWord;
/** Initializes the word extension structure
* @param query_length Length of the query sequence [in]
@@ -130,7 +128,7 @@ typedef struct BLAST_ExtendWord {
*/
Int2 BlastExtendWordNew(Uint4 query_length,
const BlastInitialWordOptions* word_options,
- Uint4 subject_length, BLAST_ExtendWord** ewp_ptr);
+ Uint4 subject_length, Blast_ExtendWord** ewp_ptr);
/** Allocate memory for the BlastInitHitList structure */
BlastInitHitList* BLAST_InitHitListNew(void);
@@ -157,17 +155,19 @@ BlastInitHitList* BLAST_InitHitListFree(BlastInitHitList* init_hitlist);
* @param max_hits size of offset arrays [in]
* @param init_hitlist Structure to hold all hits information. Has to be
* allocated up front [out]
+ * @param ungapped_stats Various hit counts. Not filled if NULL [out]
*/
-Int4 MB_WordFinder(BLAST_SequenceBlk* subject,
+Int2 MB_WordFinder(BLAST_SequenceBlk* subject,
BLAST_SequenceBlk* query,
LookupTableWrap* lookup,
Int4** matrix,
const BlastInitialWordParameters* word_params,
- BLAST_ExtendWord* ewp,
+ Blast_ExtendWord* ewp,
Uint4* q_offsets,
Uint4* s_offsets,
Int4 max_hits,
- BlastInitHitList* init_hitlist);
+ BlastInitHitList* init_hitlist,
+ BlastUngappedStats* ungapped_stats);
/** Perform ungapped extension of a word hit
* @param query The query sequence [in]
@@ -202,17 +202,19 @@ BlastnWordUngappedExtend(BLAST_SequenceBlk* query,
* @param max_hits size of offset arrays [in]
* @param init_hitlist Structure to hold all hits information. Has to be
* allocated up front [out]
+ * @param ungapped_stats Various hit counts. Not filled if NULL [out]
*/
-Int4 BlastNaWordFinder(BLAST_SequenceBlk* subject,
+Int2 BlastNaWordFinder(BLAST_SequenceBlk* subject,
BLAST_SequenceBlk* query,
LookupTableWrap* lookup_wrap,
Int4** matrix,
const BlastInitialWordParameters* word_params,
- BLAST_ExtendWord* ewp,
+ Blast_ExtendWord* ewp,
Uint4* q_offsets,
Uint4* s_offsets,
Int4 max_hits,
- BlastInitHitList* init_hitlist);
+ BlastInitHitList* init_hitlist,
+ BlastUngappedStats* ungapped_stats);
/** Finds all words for a given subject sequence, satisfying the wordsize and
* discontiguous template conditions, and performs initial (exact match)
@@ -230,17 +232,19 @@ Int4 BlastNaWordFinder(BLAST_SequenceBlk* subject,
* @param max_hits size of offset arrays [in]
* @param init_hitlist Structure to hold all hits information. Has to be
* allocated up front [out]
+ * @param ungapped_stats Various hit counts. Not filled if NULL [out]
*/
-Int4 BlastNaWordFinder_AG(BLAST_SequenceBlk* subject,
+Int2 BlastNaWordFinder_AG(BLAST_SequenceBlk* subject,
BLAST_SequenceBlk* query,
LookupTableWrap* lookup_wrap,
Int4** matrix,
const BlastInitialWordParameters* word_params,
- BLAST_ExtendWord* ewp,
+ Blast_ExtendWord* ewp,
Uint4* q_offsets,
Uint4* s_offsets,
Int4 max_hits,
- BlastInitHitList* init_hitlist);
+ BlastInitHitList* init_hitlist,
+ BlastUngappedStats* ungapped_stats);
/** Save the initial hit data into the initial hit list structure.
* @param init_hitlist the structure holding all the initial hits
@@ -254,13 +258,12 @@ Boolean BLAST_SaveInitialHit(BlastInitHitList* init_hitlist,
Int4 q_off, Int4 s_off, BlastUngappedData* ungapped_data);
/** Deallocate memory for the word extension structure */
-BLAST_ExtendWord* BlastExtendWordFree(BLAST_ExtendWord* ewp);
+Blast_ExtendWord* BlastExtendWordFree(Blast_ExtendWord* ewp);
void
BlastSaveInitHsp(BlastInitHitList* ungapped_hsps, Int4 q_start, Int4 s_start,
Int4 q_off, Int4 s_off, Int4 len, Int4 score);
-
#ifdef __cplusplus
}
#endif
diff --git a/algo/blast/core/blast_filter.c b/algo/blast/core/blast_filter.c
index ec15a0ce..0a642e0c 100644
--- a/algo/blast/core/blast_filter.c
+++ b/algo/blast/core/blast_filter.c
@@ -1,40 +1,38 @@
-static char const rcsid[] = "$Id: blast_filter.c,v 1.43 2004/04/29 15:09:11 madden Exp $";
-/*
-* ===========================================================================
-*
-* PUBLIC DOMAIN NOTICE
-* National Center for Biotechnology Information
-*
-* This software/database is a "United States Government Work" under the
-* terms of the United States Copyright Act. It was written as part of
-* the author's offical duties as a United States Government employee and
-* thus cannot be copyrighted. This software/database is freely available
-* to the public for use. The National Library of Medicine and the U.S.
-* Government have not placed any restriction on its use or reproduction.
-*
-* Although all reasonable efforts have been taken to ensure the accuracy
-* and reliability of the software and data, the NLM and the U.S.
-* Government do not and cannot warrant the performance or results that
-* may be obtained by using this software or data. The NLM and the U.S.
-* Government disclaim all warranties, express or implied, including
-* warranties of performance, merchantability or fitness for any particular
-* purpose.
-*
-* Please cite the author in any work or product based on this material.
-*
-* ===========================================================================*/
-
-/*****************************************************************************
-
-File name: blast_filter.c
-
-Author: Ilya Dondoshansky
-
-Contents: All code related to query sequence masking/filtering for BLAST
-
-******************************************************************************
- * $Revision: 1.43 $
- * */
+/* $Id: blast_filter.c,v 1.45 2004/05/24 13:26:27 madden Exp $
+ * ===========================================================================
+ *
+ * PUBLIC DOMAIN NOTICE
+ * National Center for Biotechnology Information
+ *
+ * This software/database is a "United States Government Work" under the
+ * terms of the United States Copyright Act. It was written as part of
+ * the author's offical duties as a United States Government employee and
+ * thus cannot be copyrighted. This software/database is freely available
+ * to the public for use. The National Library of Medicine and the U.S.
+ * Government have not placed any restriction on its use or reproduction.
+ *
+ * Although all reasonable efforts have been taken to ensure the accuracy
+ * and reliability of the software and data, the NLM and the U.S.
+ * Government do not and cannot warrant the performance or results that
+ * may be obtained by using this software or data. The NLM and the U.S.
+ * Government disclaim all warranties, express or implied, including
+ * warranties of performance, merchantability or fitness for any particular
+ * purpose.
+ *
+ * Please cite the author in any work or product based on this material.
+ *
+ * ===========================================================================
+ *
+ * Author: Ilya Dondoshansky
+ *
+ */
+
+/** @file blast_filter.c
+ * All code related to query sequence masking/filtering for BLAST
+ */
+
+static char const rcsid[] =
+ "$Id: blast_filter.c,v 1.45 2004/05/24 13:26:27 madden Exp $";
#include <algo/blast/core/blast_def.h>
#include <algo/blast/core/blast_util.h>
@@ -904,7 +902,7 @@ BlastSetUp_GetFilteringLocations(BLAST_SequenceBlk* query_blk, BlastQueryInfo* q
{
Int2 status = 0;
- Int4 context = 0; /* loop variable. */
+ Int2 context = 0; /* loop variable. */
const Boolean k_is_na = (program_number == blast_type_blastn);
BlastMaskLoc *last_maskloc = NULL;
BlastMaskLoc *filter_maskloc = NULL; /* Local variable for mask locs. */
diff --git a/algo/blast/core/blast_filter.h b/algo/blast/core/blast_filter.h
index 8625b0fa..60b69bb4 100644
--- a/algo/blast/core/blast_filter.h
+++ b/algo/blast/core/blast_filter.h
@@ -1,51 +1,47 @@
-/* $Id: blast_filter.h,v 1.17 2004/04/29 15:09:27 madden Exp $
-* ===========================================================================
-*
-* PUBLIC DOMAIN NOTICE
-* National Center for Biotechnology Information
-*
-* This software/database is a "United States Government Work" under the
-* terms of the United States Copyright Act. It was written as part of
-* the author's offical duties as a United States Government employee and
-* thus cannot be copyrighted. This software/database is freely available
-* to the public for use. The National Library of Medicine and the U.S.
-* Government have not placed any restriction on its use or reproduction.
-*
-* Although all reasonable efforts have been taken to ensure the accuracy
-* and reliability of the software and data, the NLM and the U.S.
-* Government do not and cannot warrant the performance or results that
-* may be obtained by using this software or data. The NLM and the U.S.
-* Government disclaim all warranties, express or implied, including
-* warranties of performance, merchantability or fitness for any particular
-* purpose.
-*
-* Please cite the author in any work or product based on this material.
-*
-* ===========================================================================*/
-
-/*****************************************************************************
-
-File name: blast_filter.h
-
-Author: Ilya Dondoshansky
-
-Contents: BLAST filtering functions.
-
-Detailed Contents:
-
-******************************************************************************
- * $Revision: 1.17 $
- * */
+/* $Id: blast_filter.h,v 1.19 2004/06/16 14:53:03 dondosha Exp $
+ * ===========================================================================
+ *
+ * PUBLIC DOMAIN NOTICE
+ * National Center for Biotechnology Information
+ *
+ * This software/database is a "United States Government Work" under the
+ * terms of the United States Copyright Act. It was written as part of
+ * the author's official duties as a United States Government employee and
+ * thus cannot be copyrighted. This software/database is freely available
+ * to the public for use. The National Library of Medicine and the U.S.
+ * Government have not placed any restriction on its use or reproduction.
+ *
+ * Although all reasonable efforts have been taken to ensure the accuracy
+ * and reliability of the software and data, the NLM and the U.S.
+ * Government do not and cannot warrant the performance or results that
+ * may be obtained by using this software or data. The NLM and the U.S.
+ * Government disclaim all warranties, express or implied, including
+ * warranties of performance, merchantability or fitness for any particular
+ * purpose.
+ *
+ * Please cite the author in any work or product based on this material.
+ *
+ * ===========================================================================
+ *
+ * Author: Ilya Dondoshansky
+ *
+ */
+
+/** @file blast_filter.h
+ * BLAST filtering functions. @todo FIXME: contains more than filtering
+ * functions, combine with blast_dust.h?
+ */
+
#ifndef __BLAST_FILTER__
#define __BLAST_FILTER__
+#include <algo/blast/core/blast_def.h>
+#include <algo/blast/core/blast_message.h>
+
#ifdef __cplusplus
extern "C" {
#endif
-#include <algo/blast/core/blast_def.h>
-#include <algo/blast/core/blast_message.h>
-
/** Create and initialize a new sequence interval.
* @param from Start of the interval [in]
* @param to End of the interval [in]
@@ -108,8 +104,13 @@ BLAST_ComplementMaskLocations(Uint1 program_number,
* @param seqloc_retval Resulting locations for filtered region. [out]
*/
Int2
-BlastSetUp_Filter(Uint1 program_number, Uint1* sequence, Int4 length,
- Int4 offset, const char* instructions, Boolean *mask_at_hash, BlastSeqLoc* *seqloc_retval);
+BlastSetUp_Filter(Uint1 program_number,
+ Uint1* sequence,
+ Int4 length,
+ Int4 offset,
+ const char* instructions,
+ Boolean *mask_at_hash,
+ BlastSeqLoc* *seqloc_retval);
/** Does preparation for filtering and then calls BlastSetUp_Filter
diff --git a/algo/blast/core/blast_gapalign.c b/algo/blast/core/blast_gapalign.c
index 0a2c458f..e9aa00fb 100644
--- a/algo/blast/core/blast_gapalign.c
+++ b/algo/blast/core/blast_gapalign.c
@@ -1,41 +1,38 @@
-/* $Id: blast_gapalign.c,v 1.91 2004/04/23 20:55:34 dondosha Exp $
-* ===========================================================================
-*
-* PUBLIC DOMAIN NOTICE
-* National Center for Biotechnology Information
-*
-* This software/database is a "United States Government Work" under the
-* terms of the United States Copyright Act. It was written as part of
-* the author's offical duties as a United States Government employee and
-* thus cannot be copyrighted. This software/database is freely available
-* to the public for use. The National Library of Medicine and the U.S.
-* Government have not placed any restriction on its use or reproduction.
-*
-* Although all reasonable efforts have been taken to ensure the accuracy
-* and reliability of the software and data, the NLM and the U.S.
-* Government do not and cannot warrant the performance or results that
-* may be obtained by using this software or data. The NLM and the U.S.
-* Government disclaim all warranties, express or implied, including
-* warranties of performance, merchantability or fitness for any particular
-* purpose.
-*
-* Please cite the author in any work or product based on this material.
-*
-* ===========================================================================*/
-
-/*****************************************************************************
-
-File name: blast_gapalign.c
-
-Author: Ilya Dondoshansky
-
-Contents: Functions to perform gapped alignment
-
-******************************************************************************
- * $Revision: 1.91 $
- * */
-
-static char const rcsid[] = "$Id: blast_gapalign.c,v 1.91 2004/04/23 20:55:34 dondosha Exp $";
+/* $Id: blast_gapalign.c,v 1.107 2004/06/15 20:01:28 dondosha Exp $
+ * ===========================================================================
+ *
+ * PUBLIC DOMAIN NOTICE
+ * National Center for Biotechnology Information
+ *
+ * This software/database is a "United States Government Work" under the
+ * terms of the United States Copyright Act. It was written as part of
+ * the author's offical duties as a United States Government employee and
+ * thus cannot be copyrighted. This software/database is freely available
+ * to the public for use. The National Library of Medicine and the U.S.
+ * Government have not placed any restriction on its use or reproduction.
+ *
+ * Although all reasonable efforts have been taken to ensure the accuracy
+ * and reliability of the software and data, the NLM and the U.S.
+ * Government do not and cannot warrant the performance or results that
+ * may be obtained by using this software or data. The NLM and the U.S.
+ * Government disclaim all warranties, express or implied, including
+ * warranties of performance, merchantability or fitness for any particular
+ * purpose.
+ *
+ * Please cite the author in any work or product based on this material.
+ *
+ * ===========================================================================
+ *
+ * Author: Ilya Dondoshansky
+ *
+ */
+
+/** @file blast_gapalign.c
+ * Functions to perform gapped alignment
+ */
+
+static char const rcsid[] =
+ "$Id: blast_gapalign.c,v 1.107 2004/06/15 20:01:28 dondosha Exp $";
#include <algo/blast/core/blast_options.h>
#include <algo/blast/core/blast_def.h>
@@ -43,18 +40,26 @@ static char const rcsid[] = "$Id: blast_gapalign.c,v 1.91 2004/04/23 20:55:34 do
#include <algo/blast/core/blast_util.h> /* for NCBI2NA_UNPACK_BASE macros */
#include <algo/blast/core/blast_setup.h>
#include <algo/blast/core/greedy_align.h>
+#include "blast_gapalign_pri.h"
static Int2 BLAST_DynProgNtGappedAlignment(BLAST_SequenceBlk* query_blk,
BLAST_SequenceBlk* subject_blk, BlastGapAlignStruct* gap_align,
- const BlastScoringOptions* score_options, BlastInitHSP* init_hsp);
+ const BlastScoringParameters* score_params, BlastInitHSP* init_hsp);
static Int4 BLAST_AlignPackedNucl(Uint1* B, Uint1* A, Int4 N, Int4 M,
Int4* pej, Int4* pei, BlastGapAlignStruct* gap_align,
- const BlastScoringOptions* score_options, Boolean reverse_sequence);
+ const BlastScoringParameters* score_params, Boolean reverse_sequence);
static Int2 BLAST_ProtGappedAlignment(Uint1 program,
BLAST_SequenceBlk* query_in, BLAST_SequenceBlk* subject_in,
BlastGapAlignStruct* gap_align,
- const BlastScoringOptions* score_options, BlastInitHSP* init_hsp);
+ const BlastScoringParameters* score_params, BlastInitHSP* init_hsp);
+
+/** Auxiliary structure for dynamic programming gapped extension */
+typedef struct BlastGapDP {
+ Int4 best;
+ Int4 best_gap;
+ Int4 best_decline;
+} BlastGapDP;
typedef struct GapData {
BlastGapDP* CD;
@@ -385,7 +390,7 @@ static Int4 gdb3(Int4* a, Int4* b, Int4* c)
}
/** Deallocate the memory for greedy gapped alignment */
-static GreedyAlignMem* BLAST_GreedyAlignsfree(GreedyAlignMem* gamp)
+static SGreedyAlignMem* BLAST_GreedyAlignsfree(SGreedyAlignMem* gamp)
{
if (gamp->flast_d) {
sfree(gamp->flast_d[0]);
@@ -405,41 +410,45 @@ static GreedyAlignMem* BLAST_GreedyAlignsfree(GreedyAlignMem* gamp)
}
/** Allocate memory for the greedy gapped alignment algorithm
- * @param score_options Options related to scoring [in]
+ * @param score_params Parameters related to scoring [in]
* @param ext_params Options and parameters related to the extension [in]
* @param max_dbseq_length The length of the longest sequence in the
* database [in]
- * @return The allocated GreedyAlignMem structure
+ * @return The allocated SGreedyAlignMem structure
*/
-static GreedyAlignMem*
-BLAST_GreedyAlignMemAlloc(const BlastScoringOptions* score_options,
+static SGreedyAlignMem*
+BLAST_GreedyAlignMemAlloc(const BlastScoringParameters* score_params,
const BlastExtensionParameters* ext_params,
Int4 max_dbseq_length)
{
- GreedyAlignMem* gamp;
+#define ERROR_FRACTION 2 /* N.B.: This value should match the value of
+ ERROR_FRACTION in the anonymous enum in
+ greedy_align.c */
+#define ICEIL(x,y) ((((x)-1)/(y))+1) /* FIXME: duplicated from greedy_align.c */
+ SGreedyAlignMem* gamp;
Int4 max_d, max_d_1, Xdrop, d_diff, max_cost, gd, i;
Int4 reward, penalty, gap_open, gap_extend;
Int4 Mis_cost, GE_cost;
Boolean do_traceback;
- if (score_options == NULL || ext_params == NULL)
+ if (score_params == NULL || ext_params == NULL)
return NULL;
do_traceback =
- (ext_params->options->algorithm_type != EXTEND_GREEDY_NO_TRACEBACK);
+ (ext_params->options->ePrelimGapExt != eGreedyExt);
- if (score_options->reward % 2 == 1) {
- reward = 2*score_options->reward;
- penalty = -2*score_options->penalty;
+ if (score_params->reward % 2 == 1) {
+ reward = 2*score_params->reward;
+ penalty = -2*score_params->penalty;
Xdrop = 2*ext_params->gap_x_dropoff;
- gap_open = 2*score_options->gap_open;
- gap_extend = 2*score_options->gap_extend;
+ gap_open = 2*score_params->gap_open;
+ gap_extend = 2*score_params->gap_extend;
} else {
- reward = score_options->reward;
- penalty = -score_options->penalty;
+ reward = score_params->reward;
+ penalty = -score_params->penalty;
Xdrop = ext_params->gap_x_dropoff;
- gap_open = score_options->gap_open;
- gap_extend = score_options->gap_extend;
+ gap_open = score_params->gap_open;
+ gap_extend = score_params->gap_extend;
}
if (gap_open == 0 && gap_extend == 0)
@@ -447,9 +456,9 @@ BLAST_GreedyAlignMemAlloc(const BlastScoringOptions* score_options,
max_d = (Int4) (max_dbseq_length / ERROR_FRACTION + 1);
- gamp = (GreedyAlignMem*) calloc(1, sizeof(GreedyAlignMem));
+ gamp = (SGreedyAlignMem*) calloc(1, sizeof(SGreedyAlignMem));
- if (score_options->gap_open==0 && score_options->gap_extend==0) {
+ if (score_params->gap_open==0 && score_params->gap_extend==0) {
d_diff = ICEIL(Xdrop+reward/2, penalty+reward);
gamp->flast_d = (Int4**) malloc((max_d + 2) * sizeof(Int4*));
@@ -482,14 +491,14 @@ BLAST_GreedyAlignMemAlloc(const BlastScoringOptions* score_options,
gd = gdb3(&Mis_cost, &gap_open, &GE_cost);
d_diff = ICEIL(Xdrop+reward/2, gd);
gamp->uplow_free = (Int4*) calloc(2*(max_d+1+max_cost), sizeof(Int4));
- gamp->flast_d_affine = (ThreeVal**)
- malloc((MAX(max_d, max_cost) + 2) * sizeof(ThreeVal*));
+ gamp->flast_d_affine = (SThreeVal**)
+ malloc((MAX(max_d, max_cost) + 2) * sizeof(SThreeVal*));
if (!gamp->uplow_free || !gamp->flast_d_affine) {
BLAST_GreedyAlignsfree(gamp);
return NULL;
}
- gamp->flast_d_affine[0] = (ThreeVal*)
- calloc((2*max_d_1 + 6) , sizeof(ThreeVal) * (max_cost+1));
+ gamp->flast_d_affine[0] = (SThreeVal*)
+ calloc((2*max_d_1 + 6) , sizeof(SThreeVal) * (max_cost+1));
for (i = 1; i <= max_cost; i++)
gamp->flast_d_affine[i] =
gamp->flast_d_affine[i-1] + 2*max_d_1 + 6;
@@ -511,6 +520,9 @@ BLAST_GreedyAlignMemAlloc(const BlastScoringOptions* score_options,
BlastGapAlignStruct*
BLAST_GapAlignStructFree(BlastGapAlignStruct* gap_align)
{
+ if (!gap_align)
+ return NULL;
+
GapEditBlockDelete(gap_align->edit_block);
if (gap_align->greedy_align_mem)
BLAST_GreedyAlignsfree(gap_align->greedy_align_mem);
@@ -522,9 +534,9 @@ BLAST_GapAlignStructFree(BlastGapAlignStruct* gap_align)
/* Documented in blast_gapalign.h */
Int2
-BLAST_GapAlignStructNew(const BlastScoringOptions* score_options,
+BLAST_GapAlignStructNew(const BlastScoringParameters* score_params,
const BlastExtensionParameters* ext_params,
- Uint4 max_subject_length, Int4 query_length,
+ Uint4 max_subject_length,
BlastScoreBlk* sbp, BlastGapAlignStruct** gap_align_ptr)
{
Int2 status = 0;
@@ -534,7 +546,7 @@ BLAST_GapAlignStructNew(const BlastScoringOptions* score_options,
if (!gap_align_ptr)
return 0;
- if (!gap_align_ptr || !sbp || !score_options || !ext_params)
+ if (!gap_align_ptr || !sbp || !score_params || !ext_params)
return -1;
gap_align = (BlastGapAlignStruct*) calloc(1, sizeof(BlastGapAlignStruct));
@@ -545,10 +557,10 @@ BLAST_GapAlignStructNew(const BlastScoringOptions* score_options,
gap_align->gap_x_dropoff = ext_params->gap_x_dropoff;
- if (ext_params->options->algorithm_type != EXTEND_DYN_PROG) {
+ if (ext_params->options->ePrelimGapExt != eDynProgExt) {
max_subject_length = MIN(max_subject_length, MAX_DBSEQ_LEN);
gap_align->greedy_align_mem =
- BLAST_GreedyAlignMemAlloc(score_options, ext_params,
+ BLAST_GreedyAlignMemAlloc(score_params, ext_params,
max_subject_length);
if (!gap_align->greedy_align_mem)
gap_align = BLAST_GapAlignStructFree(gap_align);
@@ -574,7 +586,7 @@ BLAST_GapAlignStructNew(const BlastScoringOptions* score_options,
* @param sapp The traceback information [out]
* @param gap_align Structure holding various information and allocated
* memory for the gapped alignment [in]
- * @param score_options Options related to scoring [in]
+ * @param score_params Parameters related to scoring [in]
* @param query_offset The starting offset in query [in]
* @param reversed Has the sequence been reversed? Used for psi-blast [in]
* @param reverse_sequence Do reverse the sequence [in]
@@ -594,10 +606,10 @@ BLAST_GapAlignStructNew(const BlastScoringOptions* score_options,
#define SCRIPT_INS_COL 0x40
#define SCRIPT_DEL_COL 0x80
-static Int4
+Int4
ALIGN_EX(Uint1* A, Uint1* B, Int4 M, Int4 N, Int4* S, Int4* a_offset,
Int4* b_offset, Int4** sapp, BlastGapAlignStruct* gap_align,
- const BlastScoringOptions* score_options, Int4 query_offset,
+ const BlastScoringParameters* score_params, Int4 query_offset,
Boolean reversed, Boolean reverse_sequence)
{
@@ -639,10 +651,10 @@ ALIGN_EX(Uint1* A, Uint1* B, Int4 M, Int4 N, Int4* S, Int4* a_offset,
matrix = gap_align->sbp->matrix;
*a_offset = 0;
*b_offset = 0;
- gap_open = score_options->gap_open;
- gap_extend = score_options->gap_extend;
+ gap_open = score_params->gap_open;
+ gap_extend = score_params->gap_extend;
gap_open_extend = gap_open + gap_extend;
- decline_penalty = score_options->decline_align;
+ decline_penalty = score_params->decline_align;
x_dropoff = gap_align->gap_x_dropoff;
if (x_dropoff < gap_open_extend)
@@ -945,7 +957,7 @@ ALIGN_EX(Uint1* A, Uint1* B, Int4 M, Int4 N, Int4* S, Int4* a_offset,
* @param sapp The traceback information [out]
* @param gap_align Structure holding various information and allocated
* memory for the gapped alignment [in]
- * @param score_options Options related to scoring [in]
+ * @param score_params Parameters related to scoring [in]
* @param query_offset The starting offset in query [in]
* @param reversed Has the sequence been reversed? Used for psi-blast [in]
* @param reverse_sequence Do reverse the sequence [in]
@@ -953,7 +965,7 @@ ALIGN_EX(Uint1* A, Uint1* B, Int4 M, Int4 N, Int4* S, Int4* a_offset,
*/
static Int4 SEMI_G_ALIGN_EX(Uint1* A, Uint1* B, Int4 M, Int4 N,
Int4* S, Int4* a_offset, Int4* b_offset, Boolean score_only, Int4** sapp,
- BlastGapAlignStruct* gap_align, const BlastScoringOptions* score_options,
+ BlastGapAlignStruct* gap_align, const BlastScoringParameters* score_params,
Int4 query_offset, Boolean reversed, Boolean reverse_sequence)
{
BlastGapDP* score_array; /* sequence pointers and indices */
@@ -981,7 +993,7 @@ static Int4 SEMI_G_ALIGN_EX(Uint1* A, Uint1* B, Int4 M, Int4 N,
if (!score_only) {
return ALIGN_EX(A, B, M, N, S, a_offset, b_offset, sapp, gap_align,
- score_options, query_offset, reversed, reverse_sequence);
+ score_params, query_offset, reversed, reverse_sequence);
}
/* do initialization and sanity-checking */
@@ -989,10 +1001,10 @@ static Int4 SEMI_G_ALIGN_EX(Uint1* A, Uint1* B, Int4 M, Int4 N,
matrix = gap_align->sbp->matrix;
*a_offset = 0;
*b_offset = 0;
- gap_open = score_options->gap_open;
- gap_extend = score_options->gap_extend;
+ gap_open = score_params->gap_open;
+ gap_extend = score_params->gap_extend;
gap_open_extend = gap_open + gap_extend;
- decline_penalty = score_options->decline_align;
+ decline_penalty = score_params->decline_align;
x_dropoff = gap_align->gap_x_dropoff;
if (x_dropoff < gap_open_extend)
@@ -1187,7 +1199,7 @@ static Int4 SEMI_G_ALIGN_EX(Uint1* A, Uint1* B, Int4 M, Int4 N,
* @param sapp The traceback information [out]
* @param gap_align Structure holding various information and allocated
* memory for the gapped alignment [in]
- * @param score_options Options related to scoring [in]
+ * @param score_params Parameters related to scoring [in]
* @param query_offset The starting offset in query [in]
* @param reversed Has the sequence been reversed? Used for psi-blast [in]
* @return The best alignment score found.
@@ -1208,7 +1220,7 @@ static Int4 SEMI_G_ALIGN_EX(Uint1* A, Uint1* B, Int4 M, Int4 N,
static Int4 OOF_ALIGN(Uint1* A, Uint1* B, Int4 M, Int4 N,
Int4* S, Int4* a_offset, Int4* b_offset, Int4** sapp,
- BlastGapAlignStruct* gap_align, const BlastScoringOptions* score_options,
+ BlastGapAlignStruct* gap_align, const BlastScoringParameters* score_params,
Int4 query_offset, Boolean reversed)
{
BlastGapDP* score_array; /* sequence pointers and indices */
@@ -1250,10 +1262,10 @@ static Int4 OOF_ALIGN(Uint1* A, Uint1* B, Int4 M, Int4 N,
matrix = gap_align->sbp->matrix;
*a_offset = 0;
*b_offset = -2;
- gap_open = score_options->gap_open;
- gap_extend = score_options->gap_extend;
+ gap_open = score_params->gap_open;
+ gap_extend = score_params->gap_extend;
gap_open_extend = gap_open + gap_extend;
- shift_penalty = score_options->shift_pen;
+ shift_penalty = score_params->shift_pen;
x_dropoff = gap_align->gap_x_dropoff;
if (x_dropoff < gap_open_extend)
@@ -1749,14 +1761,14 @@ static Int4 OOF_ALIGN(Uint1* A, Uint1* B, Int4 M, Int4 N,
* @param sapp the traceback information [out]
* @param gap_align Structure holding various information and allocated
* memory for the gapped alignment [in]
- * @param score_options Options related to scoring [in]
+ * @param score_params Parameters related to scoring [in]
* @param query_offset The starting offset in query [in]
* @param reversed Has the sequence been reversed? Used for psi-blast [in]
* @return The best alignment score found.
*/
static Int4 OOF_SEMI_G_ALIGN(Uint1* A, Uint1* B, Int4 M, Int4 N,
Int4* S, Int4* a_offset, Int4* b_offset, Boolean score_only, Int4** sapp,
- BlastGapAlignStruct* gap_align, const BlastScoringOptions* score_options,
+ BlastGapAlignStruct* gap_align, const BlastScoringParameters* score_params,
Int4 query_offset, Boolean reversed)
{
BlastGapDP* score_array; /* sequence pointers and indices */
@@ -1787,7 +1799,7 @@ static Int4 OOF_SEMI_G_ALIGN(Uint1* A, Uint1* B, Int4 M, Int4 N,
if (!score_only) {
return OOF_ALIGN(A, B, M, N, S, a_offset, b_offset, sapp, gap_align,
- score_options, query_offset, reversed);
+ score_params, query_offset, reversed);
}
/* do initialization and sanity-checking */
@@ -1795,10 +1807,10 @@ static Int4 OOF_SEMI_G_ALIGN(Uint1* A, Uint1* B, Int4 M, Int4 N,
matrix = gap_align->sbp->matrix;
*a_offset = 0;
*b_offset = -2;
- gap_open = score_options->gap_open;
- gap_extend = score_options->gap_extend;
+ gap_open = score_params->gap_open;
+ gap_extend = score_params->gap_extend;
gap_open_extend = gap_open + gap_extend;
- shift_penalty = score_options->shift_pen;
+ shift_penalty = score_params->shift_pen;
x_dropoff = gap_align->gap_x_dropoff;
if (x_dropoff < gap_open_extend)
@@ -2233,11 +2245,11 @@ Int2 BLAST_MbGetGappedScore(Uint1 program_number,
BLAST_SequenceBlk* query, BlastQueryInfo* query_info,
BLAST_SequenceBlk* subject,
BlastGapAlignStruct* gap_align,
- const BlastScoringOptions* score_options,
+ const BlastScoringParameters* score_params,
const BlastExtensionParameters* ext_params,
const BlastHitSavingParameters* hit_params,
BlastInitHitList* init_hitlist,
- BlastHSPList** hsp_list_ptr)
+ BlastHSPList** hsp_list_ptr, BlastGappedStats* gapped_stats)
{
const BlastExtensionOptions* ext_options = ext_params->options;
Int4 index, i;
@@ -2293,21 +2305,24 @@ Int2 BLAST_MbGetGappedScore(Uint1 program_number,
}
if (!delete_hsp) {
Boolean good_hit = TRUE;
+ Int4 hsp_length;
+
+ if (gapped_stats)
+ ++gapped_stats->extensions;
+
BLAST_GreedyGappedAlignment(query_tmp.sequence,
subject->sequence, query_tmp.length, subject->length, gap_align,
- score_options, init_hsp->q_off, init_hsp->s_off,
- (Boolean) TRUE, (ext_options->algorithm_type == EXTEND_GREEDY));
- /* For neighboring we have a stricter criterion to keep an HSP */
- if (hit_options->is_neighboring) {
- Int4 hsp_length;
-
- hsp_length =
- MIN(gap_align->query_stop-gap_align->query_start,
- gap_align->subject_stop-gap_align->subject_start) + 1;
- if (hsp_length < MIN_NEIGHBOR_HSP_LENGTH ||
- gap_align->percent_identity < MIN_NEIGHBOR_PERC_IDENTITY)
- good_hit = FALSE;
- }
+ score_params, init_hsp->q_off, init_hsp->s_off, (Boolean) TRUE,
+ (Boolean) (ext_options->ePrelimGapExt == eGreedyWithTracebackExt));
+
+ /* Take advantage of an opportunity to easily check whether this
+ hit passes the percent identity and minimal length criteria. */
+ hsp_length =
+ MIN(gap_align->query_stop-gap_align->query_start,
+ gap_align->subject_stop-gap_align->subject_start) + 1;
+ if (hsp_length < hit_options->min_hit_length ||
+ gap_align->percent_identity < hit_options->percent_identity)
+ good_hit = FALSE;
if (good_hit && gap_align->score >= hit_options->cutoff_score) {
/* gap_align contains alignment endpoints; init_hsp contains
@@ -2324,10 +2339,6 @@ Int2 BLAST_MbGetGappedScore(Uint1 program_number,
}
}
- if (ext_options->algorithm_type != EXTEND_GREEDY_NO_TRACEBACK)
- /* Set the flag that traceback is already done for this HSP list */
- hsp_list->traceback_done = TRUE;
-
sfree(init_hsp_array);
return 0;
@@ -2339,6 +2350,10 @@ Int2 BLAST_MbGetGappedScore(Uint1 program_number,
static GapEditScript*
MBToGapEditScript (MBGapEditScript* ed_script)
{
+ /* Moved from greedy_align.h because it's only needed in this function */
+#define EDIT_VAL(op) (op >> 2)
+#define EDIT_OPC(op) (op & 0x3) /* EDIT_OP_MASK == 0x3 */
+
GapEditScript* esp_start = NULL,* esp,* esp_prev = NULL;
Uint4 i;
@@ -2348,9 +2363,16 @@ MBToGapEditScript (MBGapEditScript* ed_script)
for (i=0; i<ed_script->num; i++) {
esp = (GapEditScript*) calloc(1, sizeof(GapEditScript));
esp->num = EDIT_VAL(ed_script->op[i]);
- esp->op_type = 3 - EDIT_OPC(ed_script->op[i]);
- if (esp->op_type == 3)
- fprintf(stderr, "op_type = 3\n");
+ switch (EDIT_OPC(ed_script->op[i])) {
+ case 1:
+ esp->op_type = eGapAlignDel; break;
+ case 2:
+ esp->op_type = eGapAlignIns; break;
+ case 3:
+ esp->op_type = eGapAlignSub; break;
+ default:
+ fprintf(stderr, "op_type = 3\n"); break;
+ }
if (i==0)
esp_start = esp_prev = esp;
else {
@@ -2392,6 +2414,8 @@ BLAST_GapAlignStructFill(BlastGapAlignStruct* gap_align, Int4 q_start,
gap_align->edit_block->start2 = s_start;
gap_align->edit_block->length1 = query_length;
gap_align->edit_block->length2 = subject_length;
+ gap_align->edit_block->original_length1 = query_length;
+ gap_align->edit_block->original_length2 = subject_length;
gap_align->edit_block->frame1 = gap_align->edit_block->frame2 = 1;
gap_align->edit_block->reverse = 0;
gap_align->edit_block->esp = esp;
@@ -2402,7 +2426,7 @@ BLAST_GapAlignStructFill(BlastGapAlignStruct* gap_align, Int4 q_start,
Int2
BLAST_GreedyGappedAlignment(Uint1* query, Uint1* subject,
Int4 query_length, Int4 subject_length, BlastGapAlignStruct* gap_align,
- const BlastScoringOptions* score_options,
+ const BlastScoringParameters* score_params,
Int4 q_off, Int4 s_off, Boolean compressed_subject, Boolean do_traceback)
{
Uint1* q;
@@ -2441,8 +2465,8 @@ BLAST_GreedyGappedAlignment(Uint1* query, Uint1* subject,
/* extend to the right */
score = BLAST_AffineGreedyAlign(s, s_avail, q, q_avail, FALSE, X,
- score_options->reward, -score_options->penalty,
- score_options->gap_open, score_options->gap_extend,
+ score_params->reward, -score_params->penalty,
+ score_params->gap_open, score_params->gap_extend,
&s_ext_r, &q_ext_r, gap_align->greedy_align_mem,
ed_script_fwd, rem);
@@ -2452,22 +2476,22 @@ BLAST_GreedyGappedAlignment(Uint1* query, Uint1* subject,
/* extend to the left */
score += BLAST_AffineGreedyAlign(subject, s_off,
query, q_off, TRUE, X,
- score_options->reward, -score_options->penalty,
- score_options->gap_open, score_options->gap_extend,
+ score_params->reward, -score_params->penalty,
+ score_params->gap_open, score_params->gap_extend,
&s_ext_l, &q_ext_l, gap_align->greedy_align_mem,
ed_script_rev, rem);
/* In basic case the greedy algorithm returns number of
differences, hence we need to convert it to score */
- if (score_options->gap_open==0 && score_options->gap_extend==0) {
+ if (score_params->gap_open==0 && score_params->gap_extend==0) {
/* Take advantage of an opportunity to easily calculate percent
identity, to avoid parsing the traceback later */
gap_align->percent_identity =
100*(1 - ((double)score) / MIN(q_ext_l+q_ext_r, s_ext_l+s_ext_r));
score =
- (q_ext_r + s_ext_r + q_ext_l + s_ext_l)*score_options->reward/2 -
- score*(score_options->reward - score_options->penalty);
- } else if (score_options->reward % 2 == 1) {
+ (q_ext_r + s_ext_r + q_ext_l + s_ext_l)*score_params->reward/2 -
+ score*(score_params->reward - score_params->penalty);
+ } else if (score_params->reward % 2 == 1) {
score /= 2;
}
@@ -2491,12 +2515,12 @@ BLAST_GreedyGappedAlignment(Uint1* query, Uint1* subject,
* @param query_blk The query sequence [in]
* @param subject_blk The subject sequence [in]
* @param gap_align The auxiliary structure for gapped alignment [in]
- * @param score_options Options related to scoring [in]
+ * @param score_params Parameters related to scoring [in]
* @param init_hsp The initial HSP that needs to be extended [in]
*/
static Int2 BLAST_DynProgNtGappedAlignment(BLAST_SequenceBlk* query_blk,
BLAST_SequenceBlk* subject_blk, BlastGapAlignStruct* gap_align,
- const BlastScoringOptions* score_options, BlastInitHSP* init_hsp)
+ const BlastScoringParameters* score_params, BlastInitHSP* init_hsp)
{
Boolean found_start, found_end;
Int4 q_length=0, s_length=0, score_right, score_left,
@@ -2521,7 +2545,7 @@ static Int2 BLAST_DynProgNtGappedAlignment(BLAST_SequenceBlk* query_blk,
found_start = TRUE;
score_left = BLAST_AlignPackedNucl(query, subject, q_length, s_length,
&private_q_start, &private_s_start, gap_align,
- score_options, TRUE);
+ score_params, TRUE);
if (score_left < 0)
return -1;
gap_align->query_start = q_length - private_q_start;
@@ -2537,7 +2561,7 @@ static Int2 BLAST_DynProgNtGappedAlignment(BLAST_SequenceBlk* query_blk,
subject+(s_length+3)/COMPRESSION_RATIO - 1,
query_blk->length-q_length,
subject_blk->length-s_length, &(gap_align->query_stop),
- &(gap_align->subject_stop), gap_align, score_options, FALSE);
+ &(gap_align->subject_stop), gap_align, score_params, FALSE);
if (score_right < 0)
return -1;
gap_align->query_stop += q_length;
@@ -2570,14 +2594,14 @@ static Int2 BLAST_DynProgNtGappedAlignment(BLAST_SequenceBlk* query_blk,
* @param b_offset Resulting starting offset in query [out]
* @param a_offset Resulting starting offset in subject [out]
* @param gap_align The auxiliary structure for gapped alignment [in]
- * @param score_options Options related to scoring [in]
+ * @param score_params Parameters related to scoring [in]
* @param reverse_sequence Reverse the sequence.
* @return The best alignment score found.
*/
static Int4 BLAST_AlignPackedNucl(Uint1* B, Uint1* A, Int4 N, Int4 M,
Int4* b_offset, Int4* a_offset,
BlastGapAlignStruct* gap_align,
- const BlastScoringOptions* score_options,
+ const BlastScoringParameters* score_params,
Boolean reverse_sequence)
{
BlastGapDP* score_array; /* sequence pointers and indices */
@@ -2608,10 +2632,10 @@ static Int4 BLAST_AlignPackedNucl(Uint1* B, Uint1* A, Int4 N, Int4 M,
matrix = gap_align->sbp->matrix;
*a_offset = 0;
*b_offset = 0;
- gap_open = score_options->gap_open;
- gap_extend = score_options->gap_extend;
+ gap_open = score_params->gap_open;
+ gap_extend = score_params->gap_extend;
gap_open_extend = gap_open + gap_extend;
- decline_penalty = score_options->decline_align;
+ decline_penalty = score_params->decline_align;
x_dropoff = gap_align->gap_x_dropoff;
if (x_dropoff < gap_open_extend)
@@ -2880,64 +2904,37 @@ BlastGetStartForGappedAlignment (Uint1* query, Uint1* subject,
return max_offset;
}
-Int2 BLAST_GetGappedScore (Uint1 program_number,
+static Boolean
+Blast_GappedScorePrelimTest(Uint1 program_number,
BLAST_SequenceBlk* query, BlastQueryInfo* query_info,
BLAST_SequenceBlk* subject,
BlastGapAlignStruct* gap_align,
- const BlastScoringOptions* score_options,
+ const BlastScoringParameters* score_params,
const BlastExtensionParameters* ext_params,
const BlastHitSavingParameters* hit_params,
- BlastInitHitList* init_hitlist,
- BlastHSPList** hsp_list_ptr)
-
+ BlastInitHSP** init_hsp_array, Int4 init_hsp_count,
+ BlastGappedStats* gapped_stats)
{
- SSeqRange* helper = NULL;
- Boolean hsp_start_is_contained, hsp_end_is_contained;
- Int4 index, index1, next_offset;
BlastInitHSP* init_hsp = NULL;
- BlastHSP* hsp1 = NULL;
- Int4 q_start, s_start, q_end, s_end;
- Boolean is_prot;
- Int4 max_offset;
- Int2 status = 0;
- Int2 frame = 0; /* CHANGE!!!!!!!!!!!!!!!!! */
- BlastInitHSP** init_hsp_array = NULL;
- BlastHSPList* hsp_list = NULL;
- Int4 gap_trigger;
- double cutoff_score;
- Boolean further_process = FALSE;
- const BlastHitSavingOptions* hit_options = hit_params->options;
- BLAST_SequenceBlk query_tmp;
BlastInitHSP init_hsp_tmp;
+ Int4 index;
+ BLAST_SequenceBlk query_tmp;
Int4 context;
Int4 **orig_pssm;
-
- if (!query || !subject || !gap_align || !score_options || !ext_params ||
- !hit_params || !init_hitlist || !hsp_list_ptr)
- return 1;
-
- if (init_hitlist->total == 0)
- return 0;
+ Boolean further_process = FALSE;
+ Int4 gap_trigger;
+ Int4 cutoff_score;
+ Boolean is_prot;
+ Int4 max_offset;
+ Int2 status = 0;
gap_trigger = ext_params->gap_trigger;
cutoff_score = hit_params->cutoff_score;
-
is_prot = (program_number != blast_type_blastn);
orig_pssm = gap_align->sbp->posMatrix;
- if (*hsp_list_ptr == NULL)
- *hsp_list_ptr = hsp_list = Blast_HSPListNew(hit_options->hsp_num_max);
- else
- hsp_list = *hsp_list_ptr;
-
- init_hsp_array = (BlastInitHSP**)
- malloc(init_hitlist->total*sizeof(BlastInitHSP*));
-
- for (index = 0; index < init_hitlist->total; ++index)
- init_hsp_array[index] = &init_hitlist->init_hsp_array[index];
-
- qsort(init_hsp_array, init_hitlist->total,
- sizeof(BlastInitHSP*), score_compare_match);
+ qsort(init_hsp_array, init_hsp_count,
+ sizeof(BlastInitHSP*), score_compare_match);
/* If no initial HSP passes the e-value threshold so far, check if any
would do after gapped alignment, and exit if none are found.
@@ -2946,12 +2943,18 @@ Int2 BLAST_GetGappedScore (Uint1 program_number,
if (init_hsp_array[0]->ungapped_data &&
init_hsp_array[0]->ungapped_data->score < cutoff_score) {
init_hsp_tmp.ungapped_data = NULL;
- for (index=0; index<init_hitlist->total; index++) {
+ for (index=0; index<init_hsp_count; index++) {
init_hsp = init_hsp_array[index];
if (init_hsp->ungapped_data &&
init_hsp->ungapped_data->score < gap_trigger)
break;
+
+ if (gapped_stats) {
+ ++gapped_stats->extra_extensions;
+ ++gapped_stats->extensions;
+ }
+
/* Don't modify initial HSP's coordinates here, because it will be
done again if further processing is required */
GetRelativeCoordinates(query, query_info, init_hsp, &query_tmp,
@@ -2959,14 +2962,31 @@ Int2 BLAST_GetGappedScore (Uint1 program_number,
if (orig_pssm)
gap_align->sbp->posMatrix = orig_pssm +
query_info->context_offsets[context];
+
+ if(is_prot && !score_params->options->is_ooframe) {
+ max_offset =
+ BlastGetStartForGappedAlignment(query_tmp.sequence,
+ subject->sequence, gap_align->sbp,
+ init_hsp_tmp.ungapped_data->q_start,
+ init_hsp_tmp.ungapped_data->length,
+ init_hsp_tmp.ungapped_data->s_start,
+ init_hsp_tmp.ungapped_data->length);
+ init_hsp_tmp.s_off += max_offset - init_hsp_tmp.q_off;
+ init_hsp_tmp.q_off = max_offset;
+ }
+
if (is_prot) {
status =
BLAST_ProtGappedAlignment(program_number, &query_tmp,
- subject, gap_align, score_options, &init_hsp_tmp);
+ subject, gap_align, score_params, &init_hsp_tmp);
} else {
status =
BLAST_DynProgNtGappedAlignment(&query_tmp, subject,
- gap_align, score_options, &init_hsp_tmp);
+ gap_align, score_params, &init_hsp_tmp);
+ }
+ if (status) {
+ further_process = FALSE;
+ break;
}
if (gap_align->score >= cutoff_score) {
further_process = TRUE;
@@ -2977,24 +2997,80 @@ Int2 BLAST_GetGappedScore (Uint1 program_number,
} else {
index = 0;
further_process = TRUE;
+ if (gapped_stats)
+ ++gapped_stats->seqs_ungapped_passed;
}
if (!further_process) {
/* Free the ungapped data */
- for (index = 0; index < init_hitlist->total; ++index) {
+ for (index = 0; index < init_hsp_count; ++index) {
sfree(init_hsp_array[index]->ungapped_data);
}
sfree(init_hsp_array);
gap_align->sbp->posMatrix = orig_pssm;
- return 0;
- }
-
- /* Sort again, if necessary */
- if (index > 0) {
- qsort(init_hsp_array, init_hitlist->total,
+ } else if (index > 0) { /* Sort again, if necessary */
+ qsort(init_hsp_array, init_hsp_count,
sizeof(BlastInitHSP*), score_compare_match);
}
+ return further_process;
+}
+
+Int2 BLAST_GetGappedScore (Uint1 program_number,
+ BLAST_SequenceBlk* query, BlastQueryInfo* query_info,
+ BLAST_SequenceBlk* subject,
+ BlastGapAlignStruct* gap_align,
+ const BlastScoringParameters* score_params,
+ const BlastExtensionParameters* ext_params,
+ const BlastHitSavingParameters* hit_params,
+ BlastInitHitList* init_hitlist,
+ BlastHSPList** hsp_list_ptr, BlastGappedStats* gapped_stats)
+
+{
+ SSeqRange* helper = NULL;
+ Boolean hsp_start_is_contained, hsp_end_is_contained;
+ Int4 index, index1, next_offset;
+ BlastInitHSP* init_hsp = NULL;
+ BlastHSP* hsp1 = NULL;
+ Int4 q_start, s_start, q_end, s_end;
+ Boolean is_prot;
+ Int4 max_offset;
+ Int2 status = 0;
+ Int2 frame = 0; /* CHANGE!!!!!!!!!!!!!!!!! */
+ BlastInitHSP** init_hsp_array = NULL;
+ BlastHSPList* hsp_list = NULL;
+ const BlastHitSavingOptions* hit_options = hit_params->options;
+ BLAST_SequenceBlk query_tmp;
+ Int4 context;
+ Int4 **orig_pssm;
+
+ if (!query || !subject || !gap_align || !score_params || !ext_params ||
+ !hit_params || !init_hitlist || !hsp_list_ptr)
+ return 1;
+
+ if (init_hitlist->total == 0)
+ return 0;
+
+ is_prot = (program_number != blast_type_blastn);
+ orig_pssm = gap_align->sbp->posMatrix;
+
+ if (*hsp_list_ptr == NULL)
+ *hsp_list_ptr = hsp_list = Blast_HSPListNew(hit_options->hsp_num_max);
+ else
+ hsp_list = *hsp_list_ptr;
+
+ init_hsp_array = (BlastInitHSP**)
+ malloc(init_hitlist->total*sizeof(BlastInitHSP*));
+
+ for (index = 0; index < init_hitlist->total; ++index)
+ init_hsp_array[index] = &init_hitlist->init_hsp_array[index];
+
+
+ if (!Blast_GappedScorePrelimTest(program_number, query, query_info,
+ subject, gap_align, score_params, ext_params, hit_params,
+ init_hsp_array, init_hitlist->total, gapped_stats))
+ return 0;
+
/* helper contains most frequently used information to speed up access. */
helper = (SSeqRange*) malloc((init_hitlist->total)*sizeof(SSeqRange));
@@ -3067,11 +3143,12 @@ Int2 BLAST_GetGappedScore (Uint1 program_number,
(hsp1 && init_hsp->ungapped_data &&
init_hsp->ungapped_data->score > hsp1->score)) {
BlastHSP* new_hsp;
-#ifdef NEWBLAST_COLLECT_STATS
- real_gap_number_of_hsps++;
-#endif
+
+ if (gapped_stats) {
+ ++gapped_stats->extensions;
+ }
- if(is_prot && !score_options->is_ooframe) {
+ if(is_prot && !score_params->options->is_ooframe) {
max_offset =
BlastGetStartForGappedAlignment(query_tmp.sequence,
subject->sequence, gap_align->sbp,
@@ -3085,10 +3162,10 @@ Int2 BLAST_GetGappedScore (Uint1 program_number,
if (is_prot) {
status = BLAST_ProtGappedAlignment(program_number, &query_tmp,
- subject, gap_align, score_options, init_hsp);
+ subject, gap_align, score_params, init_hsp);
} else {
status = BLAST_DynProgNtGappedAlignment(&query_tmp, subject,
- gap_align, score_options, init_hsp);
+ gap_align, score_params, init_hsp);
}
if (status) {
@@ -3142,7 +3219,7 @@ Int2 BLAST_GetGappedScore (Uint1 program_number,
* @param sapp End of the traceback buffer [out]
* @param gap_align Gapped alignment information and preallocated
* memory [in] [out]
- * @param score_options Scoring options [in]
+ * @param score_params Scoring parameters [in]
* @param psi_offset Starting position in PSI-BLAST matrix [in]
* @param reversed Direction of the extension [in]
* @param switch_seq Sequences need to be switched for blastx,
@@ -3152,17 +3229,17 @@ static Int4
OOF_SEMI_G_ALIGN_EX(Uint1* query, Uint1* subject, Int4 q_off,
Int4 s_off, Int4* S, Int4* private_q_start, Int4* private_s_start,
Boolean score_only, Int4** sapp, BlastGapAlignStruct* gap_align,
- const BlastScoringOptions* score_options, Int4 psi_offset,
+ const BlastScoringParameters* score_params, Int4 psi_offset,
Boolean reversed, Boolean switch_seq)
{
if (switch_seq) {
return OOF_SEMI_G_ALIGN(subject, query, s_off, q_off, S,
private_s_start, private_q_start, score_only, sapp,
- gap_align, score_options, psi_offset, reversed);
+ gap_align, score_params, psi_offset, reversed);
} else {
return OOF_SEMI_G_ALIGN(query, subject, q_off, s_off, S,
private_q_start, private_s_start, score_only, sapp,
- gap_align, score_options, psi_offset, reversed);
+ gap_align, score_params, psi_offset, reversed);
}
}
@@ -3196,9 +3273,8 @@ AdjustSubjectRange(Int4* subject_offset_ptr, Int4* subject_length_ptr,
*subject_offset_ptr = max_extension_left;
}
- if (subject_length - s_offset > max_extension_right) {
- *subject_length_ptr = s_offset + max_extension_right - *start_shift;
- }
+ *subject_length_ptr =
+ MIN(subject_length, s_offset + max_extension_right) - *start_shift;
}
/** Performs gapped extension for protein sequences, given two
@@ -3208,13 +3284,13 @@ AdjustSubjectRange(Int4* subject_offset_ptr, Int4* subject_length_ptr,
* @param query_blk The query sequence block [in]
* @param subject_blk The subject sequence block [in]
* @param gap_align The auxiliary structure for gapped alignment [in]
- * @param score_options Options related to scoring [in]
+ * @param score_params Parameters related to scoring [in]
* @param init_hsp The initial HSP information [in]
*/
static Int2 BLAST_ProtGappedAlignment(Uint1 program,
BLAST_SequenceBlk* query_blk, BLAST_SequenceBlk* subject_blk,
BlastGapAlignStruct* gap_align,
- const BlastScoringOptions* score_options, BlastInitHSP* init_hsp)
+ const BlastScoringParameters* score_params, BlastInitHSP* init_hsp)
{
Boolean found_start, found_end;
Int4 q_length=0, s_length=0, score_right, score_left;
@@ -3224,6 +3300,7 @@ static Int2 BLAST_ProtGappedAlignment(Uint1 program,
Int4 query_length = query_blk->length;
Int4 subject_length = subject_blk->length;
Int4 subject_shift = 0;
+ BlastScoringOptions *score_options = score_params->options;
if (gap_align == NULL)
return FALSE;
@@ -3263,12 +3340,12 @@ static Int2 BLAST_ProtGappedAlignment(Uint1 program,
if(score_options->is_ooframe) {
score_left = OOF_SEMI_G_ALIGN_EX(query, subject, q_length, s_length,
NULL, &private_q_start, &private_s_start, TRUE, NULL,
- gap_align, score_options, q_length, TRUE, switch_seq);
+ gap_align, score_params, q_length, TRUE, switch_seq);
} else {
score_left = SEMI_G_ALIGN_EX(query, subject+subject_shift, q_length,
s_length, NULL,
&private_q_start, &private_s_start, TRUE, NULL, gap_align,
- score_options, init_hsp->q_off, FALSE, TRUE);
+ score_params, init_hsp->q_off, FALSE, TRUE);
}
gap_align->query_start = q_length - private_q_start;
@@ -3284,7 +3361,7 @@ static Int2 BLAST_ProtGappedAlignment(Uint1 program,
query_length-q_length+1, subject_length-s_length+1,
NULL, &(gap_align->query_stop), &(gap_align->subject_stop),
TRUE, NULL, gap_align,
- score_options, q_length, FALSE, switch_seq);
+ score_params, q_length, FALSE, switch_seq);
gap_align->query_stop += q_length;
gap_align->subject_stop += s_length + subject_shift;
} else {
@@ -3292,7 +3369,7 @@ static Int2 BLAST_ProtGappedAlignment(Uint1 program,
subject+init_hsp->s_off, query_length-q_length,
subject_length-s_length, NULL, &(gap_align->query_stop),
&(gap_align->subject_stop), TRUE, NULL, gap_align,
- score_options, init_hsp->q_off, FALSE, FALSE);
+ score_params, init_hsp->q_off, FALSE, FALSE);
/* Make end offsets point to the byte after the end of the
alignment */
gap_align->query_stop += init_hsp->q_off + 1;
@@ -3320,18 +3397,9 @@ static Int2 BLAST_ProtGappedAlignment(Uint1 program,
return 0;
}
-/** Copy of the TracebackToGapXEditBlock function from gapxdrop.c, only
- * without 2 unused arguments
- * @param S The traceback obtained from ALIGN [in]
- * @param M Length of alignment in query [in]
- * @param N Length of alignment in subject [in]
- * @param start1 Starting query offset [in]
- * @param start2 Starting subject offset [in]
- * @param edit_block The constructed edit block [out]
- */
Int2
BLAST_TracebackToGapEditBlock(Int4* S, Int4 M, Int4 N, Int4 start1,
- Int4 start2, GapEditBlock** edit_block)
+ Int4 start2, GapEditBlock** edit_block)
{
Int4 i, j, op, number_of_subs, number_of_decline;
@@ -3350,14 +3418,14 @@ BLAST_TracebackToGapEditBlock(Int4* S, Int4 M, Int4 N, Int4 start1,
op = *S;
if (op != MININT && number_of_decline > 0)
{
- e_script->op_type = GAPALIGN_DECLINE;
+ e_script->op_type = eGapAlignDecline;
e_script->num = number_of_decline;
e_script = GapEditScriptNew(e_script);
number_of_decline = 0;
}
if (op != 0 && number_of_subs > 0)
{
- e_script->op_type = GAPALIGN_SUB;
+ e_script->op_type = eGapAlignSub;
e_script->num = number_of_subs;
e_script = GapEditScriptNew(e_script);
number_of_subs = 0;
@@ -3372,7 +3440,7 @@ BLAST_TracebackToGapEditBlock(Int4* S, Int4 M, Int4 N, Int4 start1,
{
if(op > 0)
{
- e_script->op_type = GAPALIGN_DEL;
+ e_script->op_type = eGapAlignDel;
e_script->num = op;
j += op;
if (i < M || j < N)
@@ -3380,7 +3448,7 @@ BLAST_TracebackToGapEditBlock(Int4* S, Int4 M, Int4 N, Int4 start1,
}
else
{
- e_script->op_type = GAPALIGN_INS;
+ e_script->op_type = eGapAlignIns;
e_script->num = ABS(op);
i += ABS(op);
if (i < M || j < N)
@@ -3392,10 +3460,10 @@ BLAST_TracebackToGapEditBlock(Int4* S, Int4 M, Int4 N, Int4 start1,
if (number_of_subs > 0)
{
- e_script->op_type = GAPALIGN_SUB;
+ e_script->op_type = eGapAlignSub;
e_script->num = number_of_subs;
} else if (number_of_decline > 0) {
- e_script->op_type = GAPALIGN_DECLINE;
+ e_script->op_type = eGapAlignDecline;
e_script->num = number_of_decline;
}
@@ -3459,7 +3527,7 @@ BLAST_OOFTracebackToGapEditBlock(Int4* S, Int4 q_length,
e_script = GapEditScriptNew(e_script);
/* if(last_val%3 != 0 && current_val%3 == 0) */
- if(last_val%3 != 0 && current_val == 3)
+ if(last_val%3 != 0 && current_val == eGapAlignSub)
/* 1, 2, 4, 5 vs. 0, 3, 6*/
number = 1;
else
@@ -3471,7 +3539,7 @@ BLAST_OOFTracebackToGapEditBlock(Int4* S, Int4 q_length,
/* for out_of_frame == TRUE - we have op_type == S parameter */
e_script->op_type = current_val;
- if(current_val != 6) {
+ if(current_val != eGapAlignIns) {
index1++;
index2 += current_val;
} else {
@@ -3486,7 +3554,7 @@ BLAST_OOFTracebackToGapEditBlock(Int4* S, Int4 q_length,
Int2 BLAST_GappedAlignmentWithTraceback(Uint1 program, Uint1* query,
Uint1* subject, BlastGapAlignStruct* gap_align,
- const BlastScoringOptions* score_options,
+ const BlastScoringParameters* score_params,
Int4 q_start, Int4 s_start, Int4 query_length, Int4 subject_length)
{
Boolean found_start, found_end;
@@ -3494,7 +3562,7 @@ Int2 BLAST_GappedAlignmentWithTraceback(Uint1 program, Uint1* query,
Int4 q_length, s_length;
Int4 prev;
Int4* tback,* tback1,* p = NULL,* q;
- Boolean is_ooframe = score_options->is_ooframe;
+ Boolean is_ooframe = score_params->options->is_ooframe;
Int2 status = 0;
Boolean switch_seq = FALSE;
@@ -3530,7 +3598,7 @@ Int2 BLAST_GappedAlignmentWithTraceback(Uint1 program, Uint1* query,
score_left =
OOF_SEMI_G_ALIGN_EX(query+q_start, subject+s_start, q_start,
s_start, tback, &private_q_length, &private_s_length, FALSE,
- &tback1, gap_align, score_options, q_start, TRUE, switch_seq);
+ &tback1, gap_align, score_params, q_start, TRUE, switch_seq);
gap_align->query_start = q_start - private_q_length;
gap_align->subject_start = s_start - private_s_length;
} else {
@@ -3539,7 +3607,7 @@ Int2 BLAST_GappedAlignmentWithTraceback(Uint1 program, Uint1* query,
score_left =
SEMI_G_ALIGN_EX(query, subject, q_start+1, s_start+1, tback,
&private_q_length, &private_s_length, FALSE, &tback1,
- gap_align, score_options, q_start, FALSE, TRUE);
+ gap_align, score_params, q_start, FALSE, TRUE);
gap_align->query_start = q_start - private_q_length + 1;
gap_align->subject_start = s_start - private_s_length + 1;
}
@@ -3566,7 +3634,7 @@ Int2 BLAST_GappedAlignmentWithTraceback(Uint1 program, Uint1* query,
OOF_SEMI_G_ALIGN_EX(query+q_start-1, subject+s_start-1,
q_length-q_start, s_length-s_start,
tback1, &private_q_length, &private_s_length, FALSE,
- &tback1, gap_align, score_options, q_start, FALSE, switch_seq);
+ &tback1, gap_align, score_params, q_start, FALSE, switch_seq);
if (prev != 3 && p) {
while (*p == 0 || *p == 6) p++;
*p = prev+*p-3;
@@ -3576,7 +3644,7 @@ Int2 BLAST_GappedAlignmentWithTraceback(Uint1 program, Uint1* query,
SEMI_G_ALIGN_EX(query+q_start, subject+s_start,
q_length-q_start-1, s_length-s_start-1,
tback1, &private_q_length, &private_s_length, FALSE,
- &tback1, gap_align, score_options, q_start, FALSE, FALSE);
+ &tback1, gap_align, score_params, q_start, FALSE, FALSE);
}
gap_align->query_stop = q_start + private_q_length + 1;
@@ -3630,10 +3698,9 @@ GetPatternLengthFromGapAlignStruct(BlastGapAlignStruct* gap_align)
return gap_align->query_stop;
}
-Int2 PHIGappedAlignmentWithTraceback(Uint1 program,
- Uint1* query, Uint1* subject,
+Int2 PHIGappedAlignmentWithTraceback(Uint1* query, Uint1* subject,
BlastGapAlignStruct* gap_align,
- const BlastScoringOptions* score_options,
+ const BlastScoringParameters* score_params,
Int4 q_start, Int4 s_start, Int4 query_length, Int4 subject_length)
{
Boolean found_end;
@@ -3655,7 +3722,7 @@ Int2 PHIGappedAlignmentWithTraceback(Uint1 program,
score_left =
SEMI_G_ALIGN_EX(query, subject, q_start, s_start, tback,
&private_q_length, &private_s_length, FALSE, &tback1,
- gap_align, score_options, q_start, FALSE, TRUE);
+ gap_align, score_params, q_start, FALSE, TRUE);
gap_align->query_start = q_start - private_q_length;
gap_align->subject_start = s_start - private_s_length;
@@ -3680,7 +3747,7 @@ Int2 PHIGappedAlignmentWithTraceback(Uint1 program,
SEMI_G_ALIGN_EX(query+q_start, subject+s_start,
query_length-q_start-1, subject_length-s_start-1,
tback1, &private_q_length, &private_s_length, FALSE,
- &tback1, gap_align, score_options, q_start, FALSE, FALSE);
+ &tback1, gap_align, score_params, q_start, FALSE, FALSE);
gap_align->query_stop = q_start + private_q_length + 1;
gap_align->subject_stop = s_start + private_s_length + 1;
@@ -3759,17 +3826,18 @@ Int2 BLAST_GetUngappedHSPList(BlastInitHitList* init_hitlist,
/** Performs gapped extension for PHI BLAST, given two
* sequence blocks, scoring and extension options, and an initial HSP
* with information from the previously performed ungapped extension
- * @param program BLAST program [in]
* @param query_blk The query sequence block [in]
* @param subject_blk The subject sequence block [in]
* @param gap_align The auxiliary structure for gapped alignment [in]
- * @param score_options Options related to scoring [in]
+ * @param score_params Parameters related to scoring [in]
* @param init_hsp The initial HSP information [in]
*/
-static Int2 PHIGappedAlignment(Uint1 program,
- BLAST_SequenceBlk* query_blk, BLAST_SequenceBlk* subject_blk,
- BlastGapAlignStruct* gap_align,
- const BlastScoringOptions* score_options, BlastInitHSP* init_hsp)
+static Int2
+PHIGappedAlignment(BLAST_SequenceBlk* query_blk,
+ BLAST_SequenceBlk* subject_blk,
+ BlastGapAlignStruct* gap_align,
+ const BlastScoringParameters* score_params,
+ BlastInitHSP* init_hsp)
{
Boolean found_start, found_end;
Int4 q_length=0, s_length=0, score_right, score_left;
@@ -3794,7 +3862,7 @@ static Int2 PHIGappedAlignment(Uint1 program,
score_left =
SEMI_G_ALIGN_EX(query, subject, q_length, s_length, NULL,
&private_q_start, &private_s_start, TRUE, NULL, gap_align,
- score_options, init_hsp->q_off, FALSE, TRUE);
+ score_params, init_hsp->q_off, FALSE, TRUE);
gap_align->query_start = q_length - private_q_start + 1;
gap_align->subject_start = s_length - private_s_start + 1;
@@ -3816,7 +3884,7 @@ static Int2 PHIGappedAlignment(Uint1 program,
subject+s_length, query_blk->length-q_length,
subject_blk->length-s_length, NULL, &(gap_align->query_stop),
&(gap_align->subject_stop), TRUE, NULL, gap_align,
- score_options, q_length, FALSE, FALSE);
+ score_params, q_length, FALSE, FALSE);
gap_align->query_stop += q_length;
gap_align->subject_stop += s_length;
}
@@ -3840,11 +3908,11 @@ Int2 PHIGetGappedScore (Uint1 program_number,
BLAST_SequenceBlk* query, BlastQueryInfo* query_info,
BLAST_SequenceBlk* subject,
BlastGapAlignStruct* gap_align,
- const BlastScoringOptions* score_options,
+ const BlastScoringParameters* score_params,
const BlastExtensionParameters* ext_params,
const BlastHitSavingParameters* hit_params,
BlastInitHitList* init_hitlist,
- BlastHSPList** hsp_list_ptr)
+ BlastHSPList** hsp_list_ptr, BlastGappedStats* gapped_stats)
{
BlastHSPList* hsp_list;
@@ -3852,18 +3920,18 @@ Int2 PHIGetGappedScore (Uint1 program_number,
BlastInitHSP* init_hsp;
Int4 index;
Int2 status = 0;
- BlastHitSavingOptions* hit_options = hit_params->options;
+ BlastHitSavingOptions* hit_options;
BLAST_SequenceBlk query_tmp;
Int4 context;
- if (!query || !subject || !gap_align || !score_options || !ext_params ||
+ if (!query || !subject || !gap_align || !score_params || !ext_params ||
!hit_params || !init_hitlist || !hsp_list_ptr)
return 1;
if (init_hitlist->total == 0)
return 0;
-
+ hit_options = hit_params->options;
if (*hsp_list_ptr == NULL)
*hsp_list_ptr = hsp_list = Blast_HSPListNew(hit_options->hsp_num_max);
else
@@ -3880,12 +3948,15 @@ Int2 PHIGetGappedScore (Uint1 program_number,
BlastHSP* new_hsp;
init_hsp = init_hsp_array[index];
+ if (gapped_stats)
+ ++gapped_stats->extensions;
+
/* Adjust the initial HSP's coordinates to ones relative to an
individual query sequence */
GetRelativeCoordinates(query, query_info, init_hsp, &query_tmp,
NULL, &context);
- status = PHIGappedAlignment(program_number, &query_tmp,
- subject, gap_align, score_options, init_hsp);
+ status = PHIGappedAlignment(&query_tmp, subject, gap_align,
+ score_params, init_hsp);
if (status) {
sfree(init_hsp_array);
diff --git a/algo/blast/core/blast_gapalign.h b/algo/blast/core/blast_gapalign.h
index 422f1e33..08f0037b 100644
--- a/algo/blast/core/blast_gapalign.h
+++ b/algo/blast/core/blast_gapalign.h
@@ -1,39 +1,36 @@
-/* $Id: blast_gapalign.h,v 1.40 2004/04/16 14:56:58 papadopo Exp $
-* ===========================================================================
-*
-* PUBLIC DOMAIN NOTICE
-* National Center for Biotechnology Information
-*
-* This software/database is a "United States Government Work" under the
-* terms of the United States Copyright Act. It was written as part of
-* the author's offical duties as a United States Government employee and
-* thus cannot be copyrighted. This software/database is freely available
-* to the public for use. The National Library of Medicine and the U.S.
-* Government have not placed any restriction on its use or reproduction.
-*
-* Although all reasonable efforts have been taken to ensure the accuracy
-* and reliability of the software and data, the NLM and the U.S.
-* Government do not and cannot warrant the performance or results that
-* may be obtained by using this software or data. The NLM and the U.S.
-* Government disclaim all warranties, express or implied, including
-* warranties of performance, merchantability or fitness for any particular
-* purpose.
-*
-* Please cite the author in any work or product based on this material.
-*
-* ===========================================================================*/
-
-/*****************************************************************************
-
-File name: blast_gapalign.h
-
-Author: Ilya Dondoshansky
-
-Contents: Structures and functions prototypes used for BLAST gapped extension
+/* $Id: blast_gapalign.h,v 1.52 2004/06/08 17:54:02 dondosha Exp $
+ * ===========================================================================
+ *
+ * PUBLIC DOMAIN NOTICE
+ * National Center for Biotechnology Information
+ *
+ * This software/database is a "United States Government Work" under the
+ * terms of the United States Copyright Act. It was written as part of
+ * the author's official duties as a United States Government employee and
+ * thus cannot be copyrighted. This software/database is freely available
+ * to the public for use. The National Library of Medicine and the U.S.
+ * Government have not placed any restriction on its use or reproduction.
+ *
+ * Although all reasonable efforts have been taken to ensure the accuracy
+ * and reliability of the software and data, the NLM and the U.S.
+ * Government do not and cannot warrant the performance or results that
+ * may be obtained by using this software or data. The NLM and the U.S.
+ * Government disclaim all warranties, express or implied, including
+ * warranties of performance, merchantability or fitness for any particular
+ * purpose.
+ *
+ * Please cite the author in any work or product based on this material.
+ *
+ * ===========================================================================
+ *
+ * Author: Ilya Dondoshansky
+ *
+ */
-******************************************************************************
- * $Revision: 1.40 $
- * */
+/** @file blast_gapalign.h
+ * Structures and functions prototypes used for BLAST gapped extension
+ * @todo FIXME: elaborate on contents.
+ */
#ifndef __BLAST_GAPALIGN__
#define __BLAST_GAPALIGN__
@@ -49,14 +46,6 @@ Contents: Structures and functions prototypes used for BLAST gapped extension
extern "C" {
#endif
-/** Defines extension algorithm types */
-typedef enum {
- EXTEND_DYN_PROG = 1,
- EXTEND_GREEDY,
- EXTEND_GREEDY_NO_TRACEBACK,
- EXTEND_ALGO_MAX
-} ExtensionAlgorithmType;
-
/** Diagonal distance cutoff when looking for HSP inclusion in Mega BLAST */
#define MB_DIAG_NEAR 30
@@ -64,22 +53,9 @@ typedef enum {
the other in Mega BLAST */
#define MB_DIAG_CLOSE 6
-/** Minimal HSP length allowed for neighboring */
-#define MIN_NEIGHBOR_HSP_LENGTH 100
-
-/** Minimal percent of identities allowed for neighboring */
-#define MIN_NEIGHBOR_PERC_IDENTITY 96
-
/** Split subject sequences if longer than this */
#define MAX_DBSEQ_LEN 5000000
-/** Auxiliary structure for dynamic programming gapped extension */
-typedef struct BlastGapDP {
- Int4 best;
- Int4 best_gap;
- Int4 best_decline;
-} BlastGapDP;
-
/** Structure supporting the gapped alignment */
typedef struct BlastGapAlignStruct {
Boolean positionBased; /**< Is this PSI-BLAST? */
@@ -87,7 +63,7 @@ typedef struct BlastGapAlignStruct {
GapStateArrayStruct* state_struct; /**< Structure to keep extension
state information */
GapEditBlock* edit_block; /**< The traceback (gap) information */
- GreedyAlignMem* greedy_align_mem;/**< Preallocated memory for the greedy
+ SGreedyAlignMem* greedy_align_mem;/**< Preallocated memory for the greedy
gapped extension */
BlastScoreBlk* sbp; /**< Pointer to the scoring information block */
Int4 gap_x_dropoff; /**< X-dropoff parameter to use */
@@ -99,19 +75,18 @@ typedef struct BlastGapAlignStruct {
} BlastGapAlignStruct;
/** Initializes the BlastGapAlignStruct structure
- * @param score_options Options related to scoring alignments [in]
- * @param ext_params Options and parameters related to gapped extension [in]
+ * @param score_params Parameters related to scoring alignments [in]
+ * @param ext_params parameters related to gapped extension [in]
* @param max_subject_length Maximum length of any subject sequence (needed
* for greedy extension allocation only) [in]
- * @param query_length The length of the query sequence [in]
* @param sbp The scoring information block [in]
* @param gap_align_ptr The BlastGapAlignStruct structure [out]
*/
Int2
-BLAST_GapAlignStructNew(const BlastScoringOptions* score_options,
+BLAST_GapAlignStructNew(const BlastScoringParameters* score_params,
const BlastExtensionParameters* ext_params,
- Uint4 max_subject_length, Int4 query_length,
- BlastScoreBlk* sbp, BlastGapAlignStruct** gap_align_ptr);
+ Uint4 max_subject_length, BlastScoreBlk* sbp,
+ BlastGapAlignStruct** gap_align_ptr);
/** Deallocates memory in the BlastGapAlignStruct structure */
BlastGapAlignStruct*
@@ -132,21 +107,22 @@ BLAST_GapAlignStructFree(BlastGapAlignStruct* gap_align);
* @param subject The subject sequence [in]
* @param gap_align A placeholder for gapped alignment information and
* score block. [in] [out]
- * @param score_options Options related to scoring alignments [in]
+ * @param score_params Options related to scoring alignments [in]
* @param ext_params Options related to alignment extension [in]
* @param hit_params Options related to saving HSPs [in]
* @param init_hitlist Contains all the initial hits [in]
* @param hsp_list_ptr List of HSPs with full extension information [out]
+ * @param gapped_stats Return statistics (not filled if NULL) [out]
*/
Int2 BLAST_MbGetGappedScore(Uint1 program_number,
BLAST_SequenceBlk* query, BlastQueryInfo* query_info,
BLAST_SequenceBlk* subject,
BlastGapAlignStruct* gap_align,
- const BlastScoringOptions* score_options,
+ const BlastScoringParameters* score_params,
const BlastExtensionParameters* ext_params,
const BlastHitSavingParameters* hit_params,
BlastInitHitList* init_hitlist,
- BlastHSPList** hsp_list_ptr);
+ BlastHSPList** hsp_list_ptr, BlastGappedStats* gapped_stats);
@@ -162,29 +138,30 @@ Int2 BLAST_MbGetGappedScore(Uint1 program_number,
* the concatenated sequence [in]
* @param subject The subject sequence block [in]
* @param gap_align The auxiliary structure for gapped alignment [in]
- * @param score_options Options related to scoring [in]
+ * @param score_params Options related to scoring [in]
* @param ext_params Options and parameters related to extensions [in]
* @param hit_params Options related to saving hits [in]
* @param init_hitlist List of initial HSPs (offset pairs with additional
* information from the ungapped alignment performed earlier) [in]
* @param hsp_list_ptr Structure containing all saved HSPs [out]
+ * @param gapped_stats Return statistics (not filled if NULL) [out]
*/
Int2 BLAST_GetGappedScore (Uint1 program_number,
BLAST_SequenceBlk* query, BlastQueryInfo* query_info,
BLAST_SequenceBlk* subject,
BlastGapAlignStruct* gap_align,
- const BlastScoringOptions* score_options,
+ const BlastScoringParameters* score_parameters,
const BlastExtensionParameters* ext_params,
const BlastHitSavingParameters* hit_params,
BlastInitHitList* init_hitlist,
- BlastHSPList** hsp_list_ptr);
+ BlastHSPList** hsp_list_ptr, BlastGappedStats* gapped_stats);
/** Perform a gapped alignment with traceback
* @param program Type of BLAST program [in]
* @param query The query sequence [in]
* @param subject The subject sequence [in]
* @param gap_align The gapped alignment structure [in] [out]
- * @param score_options Scoring parameters [in]
+ * @param score_params Scoring parameters [in]
* @param q_start Offset in query where to start alignment [in]
* @param s_start Offset in subject where to start alignment [in]
* @param query_length Maximal allowed extension in query [in]
@@ -193,7 +170,7 @@ Int2 BLAST_GetGappedScore (Uint1 program_number,
Int2 BLAST_GappedAlignmentWithTraceback(Uint1 program,
Uint1* query, Uint1* subject,
BlastGapAlignStruct* gap_align,
- const BlastScoringOptions* score_options,
+ const BlastScoringParameters* score_params,
Int4 q_start, Int4 s_start, Int4 query_length, Int4 subject_length);
/** Greedy gapped alignment, with or without traceback.
@@ -206,7 +183,7 @@ Int2 BLAST_GappedAlignmentWithTraceback(Uint1 program,
* @param subject_length The subject sequence length [in]
* @param gap_align The structure holding various information and memory
* needed for gapped alignment [in] [out]
- * @param score_options Options related to scoring alignments [in]
+ * @param score_params Parameters related to scoring alignments [in]
* @param q_off Starting offset in query [in]
* @param s_off Starting offset in subject [in]
* @param compressed_subject Is subject sequence compressed? [in]
@@ -215,24 +192,22 @@ Int2 BLAST_GappedAlignmentWithTraceback(Uint1 program,
Int2
BLAST_GreedyGappedAlignment(Uint1* query, Uint1* subject,
Int4 query_length, Int4 subject_length, BlastGapAlignStruct* gap_align,
- const BlastScoringOptions* score_options,
+ const BlastScoringParameters* score_params,
Int4 q_off, Int4 s_off, Boolean compressed_subject, Boolean do_traceback);
/** Perform a gapped alignment with traceback for PHI BLAST
- * @param program Type of BLAST program [in]
* @param query The query sequence [in]
* @param subject The subject sequence [in]
* @param gap_align The gapped alignment structure [in] [out]
- * @param score_options Scoring parameters [in]
+ * @param score_params Scoring parameters [in]
* @param q_start Offset in query where to start alignment [in]
* @param s_start Offset in subject where to start alignment [in]
* @param query_length Maximal allowed extension in query [in]
* @param subject_length Maximal allowed extension in subject [in]
*/
-Int2 PHIGappedAlignmentWithTraceback(Uint1 program,
- Uint1* query, Uint1* subject,
+Int2 PHIGappedAlignmentWithTraceback(Uint1* query, Uint1* subject,
BlastGapAlignStruct* gap_align,
- const BlastScoringOptions* score_options,
+ const BlastScoringParameters* score_params,
Int4 q_start, Int4 s_start, Int4 query_length, Int4 subject_length);
/** Convert initial HSP list to an HSP list: to be used in ungapped search.
@@ -258,22 +233,23 @@ Int2 BLAST_GetUngappedHSPList(BlastInitHitList* init_hitlist,
* the concatenated sequence [in]
* @param subject The subject sequence block [in]
* @param gap_align The auxiliary structure for gapped alignment [in]
- * @param score_options Options related to scoring [in]
+ * @param score_params Options related to scoring [in]
* @param ext_params Options and parameters related to extensions [in]
* @param hit_params Options related to saving hits [in]
* @param init_hitlist List of initial HSPs, including offset pairs and
* pattern match lengths [in]
* @param hsp_list_ptr Structure containing all saved HSPs [out]
+ * @param gapped_stats Return statistics (not filled if NULL) [out]
*/
Int2 PHIGetGappedScore (Uint1 program_number,
BLAST_SequenceBlk* query, BlastQueryInfo* query_info,
BLAST_SequenceBlk* subject,
BlastGapAlignStruct* gap_align,
- const BlastScoringOptions* score_options,
+ const BlastScoringParameters* score_params,
const BlastExtensionParameters* ext_params,
const BlastHitSavingParameters* hit_params,
BlastInitHitList* init_hitlist,
- BlastHSPList** hsp_list_ptr);
+ BlastHSPList** hsp_list_ptr, BlastGappedStats* gapped_stats);
void
AdjustSubjectRange(Int4* subject_offset_ptr, Int4* subject_length_ptr,
diff --git a/algo/blast/core/blast_gapalign_pri.h b/algo/blast/core/blast_gapalign_pri.h
new file mode 100644
index 00000000..856b4d97
--- /dev/null
+++ b/algo/blast/core/blast_gapalign_pri.h
@@ -0,0 +1,83 @@
+#ifndef ALGO_BLAST_CORE___BLAST_GAPALIGN_PRI__H
+#define ALGO_BLAST_CORE___BLAST_GAPALIGN_PRI__H
+
+/* $Id: blast_gapalign_pri.h,v 1.1 2004/05/18 13:23:26 madden Exp $
+ * ===========================================================================
+ *
+ * PUBLIC DOMAIN NOTICE
+ * National Center for Biotechnology Information
+ *
+ * This software/database is a "United States Government Work" under the
+ * terms of the United States Copyright Act. It was written as part of
+ * the author's official duties as a United States Government employee and
+ * thus cannot be copyrighted. This software/database is freely available
+ * to the public for use. The National Library of Medicine and the U.S.
+ * Government have not placed any restriction on its use or reproduction.
+ *
+ * Although all reasonable efforts have been taken to ensure the accuracy
+ * and reliability of the software and data, the NLM and the U.S.
+ * Government do not and cannot warrant the performance or results that
+ * may be obtained by using this software or data. The NLM and the U.S.
+ * Government disclaim all warranties, express or implied, including
+ * warranties of performance, merchantability or fitness for any particular
+ * purpose.
+ *
+ * Please cite the author in any work or product based on this material.
+ *
+ * ===========================================================================
+ *
+ * Author: Tom Madden
+ *
+ */
+
+/** @file blast_gapalign_pri.h
+ * Private interface for blast_gapalign.c
+ */
+
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+Int4
+ALIGN_EX(Uint1* A, Uint1* B, Int4 M, Int4 N, Int4* S, Int4* a_offset,
+ Int4* b_offset, Int4** sapp, BlastGapAlignStruct* gap_align,
+ const BlastScoringParameters* scoringParams, Int4 query_offset,
+ Boolean reversed, Boolean reverse_sequence);
+
+
+/** Converts a traceback produced by the ALIGN or ALIGN_EX
+ * routine to a GapEditBlock, which is normally then further
+ * processed to a SeqAlignPtr. Note: the old routine had two
+ * unused parameters that are not present here.
+ * @param S The traceback obtained from ALIGN [in]
+ * @param M Length of alignment in query [in]
+ * @param N Length of alignment in subject [in]
+ * @param start1 Starting query offset [in]
+ * @param start2 Starting subject offset [in]
+ * @param edit_block The constructed edit block [out]
+ */
+Int2
+BLAST_TracebackToGapEditBlock(Int4* S, Int4 M, Int4 N, Int4 start1,
+ Int4 start2, GapEditBlock** edit_block);
+
+
+
+
+#ifdef __cplusplus
+}
+#endif
+
+
+/*
+ * ===========================================================================
+ *
+ * $Log: blast_gapalign_pri.h,v $
+ * Revision 1.1 2004/05/18 13:23:26 madden
+ * Private declarations for blast_gapalign.c
+ *
+ *
+ * ===========================================================================
+ */
+
+#endif /* !ALGO_BLAST_CORE__BLAST_GAPALIGN_PRI__H */
diff --git a/algo/blast/core/blast_hits.c b/algo/blast/core/blast_hits.c
index 217e8b4a..a555d071 100644
--- a/algo/blast/core/blast_hits.c
+++ b/algo/blast/core/blast_hits.c
@@ -1,46 +1,38 @@
-/* $Id: blast_hits.c,v 1.94 2004/05/05 15:27:44 dondosha Exp $
-* ===========================================================================
-*
-* PUBLIC DOMAIN NOTICE
-* National Center for Biotechnology Information
-*
-* This software/database is a "United States Government Work" under the
-* terms of the United States Copyright Act. It was written as part of
-* the author's offical duties as a United States Government employee and
-* thus cannot be copyrighted. This software/database is freely available
-* to the public for use. The National Library of Medicine and the U.S.
-* Government have not placed any restriction on its use or reproduction.
-*
-* Although all reasonable efforts have been taken to ensure the accuracy
-* and reliability of the software and data, the NLM and the U.S.
-* Government do not and cannot warrant the performance or results that
-* may be obtained by using this software or data. The NLM and the U.S.
-* Government disclaim all warranties, express or implied, including
-* warranties of performance, merchantability or fitness for any particular
-* purpose.
-*
-* Please cite the author in any work or product based on this material.
-*
-* ===========================================================================*/
-
-/*****************************************************************************
-
-File name: blast_hits.c
-
-Author: Ilya Dondoshansky
-
-Contents: BLAST functions
-
-Detailed Contents:
-
- - BLAST functions for saving hits after the (preliminary) gapped
- alignment
-
-******************************************************************************
- * $Revision: 1.94 $
- * */
-
-static char const rcsid[] = "$Id: blast_hits.c,v 1.94 2004/05/05 15:27:44 dondosha Exp $";
+/* $Id: blast_hits.c,v 1.109 2004/06/15 14:50:55 dondosha Exp $
+ * ===========================================================================
+ *
+ * PUBLIC DOMAIN NOTICE
+ * National Center for Biotechnology Information
+ *
+ * This software/database is a "United States Government Work" under the
+ * terms of the United States Copyright Act. It was written as part of
+ * the author's offical duties as a United States Government employee and
+ * thus cannot be copyrighted. This software/database is freely available
+ * to the public for use. The National Library of Medicine and the U.S.
+ * Government have not placed any restriction on its use or reproduction.
+ *
+ * Although all reasonable efforts have been taken to ensure the accuracy
+ * and reliability of the software and data, the NLM and the U.S.
+ * Government do not and cannot warrant the performance or results that
+ * may be obtained by using this software or data. The NLM and the U.S.
+ * Government disclaim all warranties, express or implied, including
+ * warranties of performance, merchantability or fitness for any particular
+ * purpose.
+ *
+ * Please cite the author in any work or product based on this material.
+ *
+ * ===========================================================================
+ *
+ * Author: Ilya Dondoshansky
+ *
+ */
+
+/** @file blast_hits.c
+ * BLAST functions for saving hits after the (preliminary) gapped alignment
+ */
+
+static char const rcsid[] =
+ "$Id: blast_hits.c,v 1.109 2004/06/15 14:50:55 dondosha Exp $";
#include <algo/blast/core/blast_options.h>
#include <algo/blast/core/blast_extend.h>
@@ -151,9 +143,10 @@ void Blast_HSPPHIGetEvalue(BlastHSP* hsp, BlastScoreBlk* sbp)
Boolean Blast_HSPReevaluateWithAmbiguities(BlastHSP* hsp,
Uint1* query_start, Uint1* subject_start,
const BlastHitSavingOptions* hit_options,
- const BlastScoringOptions* score_options,
+ const BlastScoringParameters* score_params,
BlastQueryInfo* query_info, BlastScoreBlk* sbp)
{
+ BlastScoringOptions *score_options = score_params->options;
Int4 sum, score, gap_open, gap_extend;
Int4** matrix;
Uint1* query,* subject;
@@ -163,7 +156,7 @@ Boolean Blast_HSPReevaluateWithAmbiguities(BlastHSP* hsp,
Uint1 mask = 0x0f;
GapEditScript* esp,* last_esp = NULL,* prev_esp,* first_esp = NULL;
Boolean delete_hsp;
- double searchsp_eff;
+ Int8 searchsp_eff;
Int4 last_esp_num = 0;
Int4 align_length;
Blast_KarlinBlk* kbp;
@@ -172,17 +165,17 @@ Boolean Blast_HSPReevaluateWithAmbiguities(BlastHSP* hsp,
/* NB: this function is called only for BLASTn, so we know where the
Karlin block is */
kbp = sbp->kbp_std[hsp->context];
- searchsp_eff = (double)query_info->eff_searchsp_array[hsp->context];
+ searchsp_eff = query_info->eff_searchsp_array[hsp->context];
- if (score_options->gap_open == 0 && score_options->gap_extend == 0) {
- if (score_options->reward % 2 == 1)
+ if (score_params->gap_open == 0 && score_params->gap_extend == 0) {
+ if (score_params->reward % 2 == 1)
factor = 2;
gap_open = 0;
gap_extend =
- (score_options->reward - 2*score_options->penalty) * factor / 2;
+ (score_params->reward - 2*score_params->penalty) * factor / 2;
} else {
- gap_open = score_options->gap_open;
- gap_extend = score_options->gap_extend;
+ gap_open = score_params->gap_open;
+ gap_extend = score_params->gap_extend;
}
matrix = sbp->matrix;
@@ -225,16 +218,16 @@ Boolean Blast_HSPReevaluateWithAmbiguities(BlastHSP* hsp,
last_esp_num = 0;
while (esp) {
- if (esp->op_type == GAPALIGN_SUB) {
+ if (esp->op_type == eGapAlignSub) {
sum += factor*matrix[*query & mask][*subject];
query++;
subject++;
index++;
- } else if (esp->op_type == GAPALIGN_DEL) {
+ } else if (esp->op_type == eGapAlignDel) {
sum -= gap_open + gap_extend * esp->num;
subject += esp->num;
index += esp->num;
- } else if (esp->op_type == GAPALIGN_INS) {
+ } else if (esp->op_type == eGapAlignIns) {
sum -= gap_open + gap_extend * esp->num;
query += esp->num;
index += esp->num;
@@ -363,16 +356,16 @@ Blast_HSPGetNumIdentities(Uint1* query, Uint1* subject,
for (esp = gap_info->esp; esp; esp = esp->next) {
align_length += esp->num;
switch (esp->op_type) {
- case GAPALIGN_SUB:
+ case eGapAlignSub:
for (i=0; i<esp->num; i++) {
if (*q++ == *s++)
num_ident++;
}
break;
- case GAPALIGN_DEL:
+ case eGapAlignDel:
s += esp->num;
break;
- case GAPALIGN_INS:
+ case eGapAlignIns:
q += esp->num;
break;
default:
@@ -414,7 +407,7 @@ Blast_HSPGetOOFNumIdentities(Uint1* query, Uint1* subject,
for (esp = hsp->gap_info->esp; esp; esp = esp->next) {
switch (esp->op_type) {
- case 3: /* Substitution */
+ case eGapAlignSub: /* Substitution */
align_length += esp->num;
for (i=0; i<esp->num; i++) {
if (*q == *s)
@@ -423,24 +416,24 @@ Blast_HSPGetOOFNumIdentities(Uint1* query, Uint1* subject,
s += CODON_LENGTH;
}
break;
- case 6: /* Insertion */
+ case eGapAlignIns: /* Insertion */
align_length += esp->num;
s += esp->num * CODON_LENGTH;
break;
- case 0: /* Deletion */
+ case eGapAlignDel: /* Deletion */
align_length += esp->num;
q += esp->num;
break;
- case 1: /* Gap of two nucleotides. */
+ case eGapAlignDel2: /* Gap of two nucleotides. */
s -= 2;
break;
- case 2: /* Gap of one nucleotide. */
+ case eGapAlignDel1: /* Gap of one nucleotide. */
s -= 1;
break;
- case 4: /* Insertion of one nucleotide. */
+ case eGapAlignIns1: /* Insertion of one nucleotide. */
s += 1;
break;
- case 5: /* Insertion of two nucleotides. */
+ case eGapAlignIns2: /* Insertion of two nucleotides. */
s += 2;
break;
default:
@@ -452,9 +445,104 @@ Blast_HSPGetOOFNumIdentities(Uint1* query, Uint1* subject,
*align_length_ptr = align_length;
*num_ident_ptr = num_ident;
+
return 0;
+}
+
+void
+Blast_HSPCalcLengthAndGaps(BlastHSP* hsp, Int4* length_out,
+ Int4* gaps_out, Int4* gap_opens_out)
+{
+ Int4 length = hsp->query.length;
+ Int4 gap_opens = 0, gaps = 0;
+
+ if (hsp->gap_info) {
+ GapEditScript* esp = hsp->gap_info->esp;
+ for ( ; esp; esp = esp->next) {
+ if (esp->op_type == eGapAlignDel) {
+ length += esp->num;
+ gaps += esp->num;
+ ++gap_opens;
+ } else if (esp->op_type == eGapAlignIns) {
+ ++gap_opens;
+ gaps += esp->num;
+ }
+ }
+ } else if (hsp->subject.length > length) {
+ length = hsp->subject.length;
+ }
+
+ *length_out = length;
+ *gap_opens_out = gap_opens;
+ *gaps_out = gaps;
+}
+
+/** Adjust start and end of an HSP in a translated sequence segment.
+ * @param segment BlastSeg structure (part of BlastHSP) [in]
+ * @param seq_length Length of the full sequence [in]
+ * @param start Start of the alignment in this segment in nucleotide
+ * coordinates, 1-offset [out]
+ * @param end End of the alignment in this segment in nucleotide
+ * coordinates, 1-offset [out]
+ */
+static void
+Blast_SegGetTranslatedOffsets(BlastSeg* segment, Int4 seq_length,
+ Int4* start, Int4* end)
+{
+ if (segment->frame < 0) {
+ *start = seq_length - CODON_LENGTH*segment->offset
+ + segment->frame;
+ *end = seq_length
+ - CODON_LENGTH*(segment->offset+segment->length)
+ + segment->frame + 1;
+ } else if (segment->frame > 0) {
+ *start = CODON_LENGTH*(segment->offset) + segment->frame - 1;
+ *end = CODON_LENGTH*(segment->offset+segment->length)
+ + segment->frame - 2;
+ } else {
+ *start = segment->offset + 1;
+ *end = segment->offset + segment->length;
+ }
+}
+void
+Blast_HSPGetAdjustedOffsets(BlastHSP* hsp, Int4* q_start, Int4* q_end,
+ Int4* s_start, Int4* s_end)
+{
+ Int4 query_length, subject_length;
+
+ if (!hsp->gap_info) {
+ *q_start = hsp->query.offset + 1;
+ *q_end = hsp->query.offset + hsp->query.length;
+ *s_start = hsp->subject.offset + 1;
+ *s_end = hsp->subject.offset + hsp->subject.length;
+ return;
+ }
+ /* Non-translated lengths are stored in the GapEditBlock */
+ query_length = hsp->gap_info->original_length1;
+ subject_length = hsp->gap_info->original_length2;
+
+ if (!hsp->gap_info->translate1 && !hsp->gap_info->translate2) {
+ if (hsp->query.frame != hsp->subject.frame) {
+ /* Blastn: if different strands, flip offsets in query; leave
+ offsets in subject as they are, but change order for correct
+ correspondence. */
+ *q_end = query_length - hsp->query.offset;
+ *q_start = *q_end - hsp->query.length + 1;
+ *s_end = hsp->subject.offset + 1;
+ *s_start = hsp->subject.offset + hsp->subject.length;
+ } else {
+ *q_start = hsp->query.offset + 1;
+ *q_end = hsp->query.offset + hsp->query.length;
+ *s_start = hsp->subject.offset + 1;
+ *s_end = hsp->subject.offset + hsp->subject.length;
+ }
+ } else {
+ Blast_SegGetTranslatedOffsets(&hsp->query, query_length, q_start, q_end);
+ Blast_SegGetTranslatedOffsets(&hsp->subject, subject_length,
+ q_start, s_end);
+ }
}
/** TRUE if c is between a and b; f between d and f. Determines if the
@@ -480,7 +568,7 @@ static Boolean Blast_HSPContained(BlastHSP* hsp1, BlastHSP* hsp2)
return (hsp_start_is_contained && hsp_end_is_contained);
}
-/** Comparison callback function for sorting HSPs by e-value */
+/** Comparison callback function for sorting HSPs by score */
static int
score_compare_hsps(const void* v1, const void* v2)
{
@@ -519,62 +607,36 @@ score_compare_hsps(const void* v1, const void* v2)
return 0;
}
-/** Comparison callback function for sorting HSPs by diagonal and flagging
- * the HSPs contained in or identical to other HSPs for future deletion.
-*/
-static int
-diag_uniq_compare_hsps(const void* v1, const void* v2)
+#define FUZZY_EVALUE_COMPARE_FACTOR 1e-6
+/** Compares 2 real numbers up to a fixed precision */
+static int fuzzy_evalue_comp(double evalue1, double evalue2)
{
- BlastHSP* h1,* h2;
- BlastHSP** hp1,** hp2;
-
- hp1 = (BlastHSP**) v1;
- hp2 = (BlastHSP**) v2;
- h1 = *hp1;
- h2 = *hp2;
-
- if (h1==NULL && h2==NULL) return 0;
- else if (h1==NULL) return 1;
- else if (h2==NULL) return -1;
-
- /* Separate different queries and/or strands */
- if (h1->context < h2->context)
+ if (evalue1 < (1-FUZZY_EVALUE_COMPARE_FACTOR)*evalue2)
return -1;
- else if (h1->context > h2->context)
+ else if (evalue1 > (1+FUZZY_EVALUE_COMPARE_FACTOR)*evalue2)
return 1;
-
- /* Check if one HSP is contained in the other, if so,
- leave only the longer one, given it has lower evalue */
- if (h1->query.offset >= h2->query.offset &&
- h1->query.end <= h2->query.end &&
- h1->subject.offset >= h2->subject.offset &&
- h1->subject.end <= h2->subject.end &&
- h1->score <= h2->score) {
- (*hp1)->score = 0;
- } else if (h1->query.offset <= h2->query.offset &&
- h1->query.end >= h2->query.end &&
- h1->subject.offset <= h2->subject.offset &&
- h1->subject.end >= h2->subject.end &&
- h1->score >= h2->score) {
- (*hp2)->score = 0;
- }
-
- return (h1->query.offset - h1->subject.offset) -
- (h2->query.offset - h2->subject.offset);
+ else
+ return 0;
}
+/** Comparison callback function for sorting HSPs by e-value and score, before
+ saving BlastHSPList in a BlastHitList. E-value has priority over score,
+ because lower scoring HSPs might have lower e-values, if they are linked
+ with sum statistics.
+ E-values are compared only up to a certain precision. */
static int
-null_compare_hsps(const void* v1, const void* v2)
+evalue_compare_hsps(const void* v1, const void* v2)
{
BlastHSP* h1,* h2;
-
+ int retval = 0;
+
h1 = *((BlastHSP**) v1);
h2 = *((BlastHSP**) v2);
- if ((h1 && h2) || (!h1 && !h2))
- return 0;
- else if (!h1) return 1;
- else return -1;
+ if ((retval = fuzzy_evalue_comp(h1->evalue, h2->evalue)) != 0)
+ return retval;
+
+ return score_compare_hsps(v1, v2);
}
/** Comparison callback for sorting HSPs by diagonal. Do not compare
@@ -597,6 +659,13 @@ diag_compare_hsps(const void* v1, const void* v2)
(h2->query.offset - h2->subject.offset);
}
+/** An auxiliary structure used for merging HSPs */
+typedef struct BlastHSPSegment {
+ Int4 q_start, q_end;
+ Int4 s_start, s_end;
+ struct BlastHSPSegment* next;
+} BlastHSPSegment;
+
#define OVERLAP_DIAG_CLOSE 10
/** Merge the two HSPs if they intersect.
* @param hsp1 The first HSP; also contains the result of merge. [in] [out]
@@ -607,14 +676,14 @@ diag_compare_hsps(const void* v1, const void* v2)
static Boolean
Blast_HSPsMerge(BlastHSP* hsp1, BlastHSP* hsp2, Int4 start)
{
- BLASTHSPSegment* segments1,* segments2,* new_segment1,* new_segment2;
+ BlastHSPSegment* segments1,* segments2,* new_segment1,* new_segment2;
GapEditScript* esp1,* esp2,* esp;
Int4 end = start + DBSEQ_CHUNK_OVERLAP - 1;
Int4 min_diag, max_diag, num1, num2, dist = 0, next_dist = 0;
Int4 diag1_start, diag1_end, diag2_start, diag2_end;
Int4 index;
Uint1 intersection_found;
- Uint1 op_type = 0;
+ EGapAlignOpType op_type = eGapAlignSub;
if (!hsp1->gap_info || !hsp2->gap_info) {
/* Assume that this is an ungapped alignment, hence simply compare
@@ -630,7 +699,7 @@ Blast_HSPsMerge(BlastHSP* hsp1, BlastHSP* hsp2, Int4 start)
return FALSE;
}
/* Find whether these HSPs have an intersection point */
- segments1 = (BLASTHSPSegment*) calloc(1, sizeof(BLASTHSPSegment));
+ segments1 = (BlastHSPSegment*) calloc(1, sizeof(BlastHSPSegment));
esp1 = hsp1->gap_info->esp;
esp2 = hsp2->gap_info->esp;
@@ -638,13 +707,13 @@ Blast_HSPsMerge(BlastHSP* hsp1, BlastHSP* hsp2, Int4 start)
segments1->q_start = hsp1->query.offset;
segments1->s_start = hsp1->subject.offset;
while (segments1->s_start < start) {
- if (esp1->op_type == GAPALIGN_INS)
+ if (esp1->op_type == eGapAlignIns)
segments1->q_start += esp1->num;
else if (segments1->s_start + esp1->num < start) {
- if (esp1->op_type == GAPALIGN_SUB) {
+ if (esp1->op_type == eGapAlignSub) {
segments1->s_start += esp1->num;
segments1->q_start += esp1->num;
- } else if (esp1->op_type == GAPALIGN_DEL)
+ } else if (esp1->op_type == eGapAlignDel)
segments1->s_start += esp1->num;
} else
break;
@@ -652,7 +721,7 @@ Blast_HSPsMerge(BlastHSP* hsp1, BlastHSP* hsp2, Int4 start)
}
/* Current esp is the first segment within the overlap region */
segments1->s_end = segments1->s_start + esp1->num - 1;
- if (esp1->op_type == GAPALIGN_SUB)
+ if (esp1->op_type == eGapAlignSub)
segments1->q_end = segments1->q_start + esp1->num - 1;
else
segments1->q_end = segments1->q_start;
@@ -660,15 +729,15 @@ Blast_HSPsMerge(BlastHSP* hsp1, BlastHSP* hsp2, Int4 start)
new_segment1 = segments1;
for (esp = esp1->next; esp; esp = esp->next) {
- new_segment1->next = (BLASTHSPSegment*)
- calloc(1, sizeof(BLASTHSPSegment));
+ new_segment1->next = (BlastHSPSegment*)
+ calloc(1, sizeof(BlastHSPSegment));
new_segment1->next->q_start = new_segment1->q_end + 1;
new_segment1->next->s_start = new_segment1->s_end + 1;
new_segment1 = new_segment1->next;
- if (esp->op_type == GAPALIGN_SUB) {
+ if (esp->op_type == eGapAlignSub) {
new_segment1->q_end += esp->num - 1;
new_segment1->s_end += esp->num - 1;
- } else if (esp->op_type == GAPALIGN_INS) {
+ } else if (esp->op_type == eGapAlignIns) {
new_segment1->q_end += esp->num - 1;
new_segment1->s_end = new_segment1->s_start;
} else {
@@ -679,7 +748,7 @@ Blast_HSPsMerge(BlastHSP* hsp1, BlastHSP* hsp2, Int4 start)
/* Now create the second segments list */
- segments2 = (BLASTHSPSegment*) calloc(1, sizeof(BLASTHSPSegment));
+ segments2 = (BlastHSPSegment*) calloc(1, sizeof(BlastHSPSegment));
segments2->q_start = hsp2->query.offset;
segments2->s_start = hsp2->subject.offset;
segments2->q_end = segments2->q_start + esp2->num - 1;
@@ -689,18 +758,18 @@ Blast_HSPsMerge(BlastHSP* hsp1, BlastHSP* hsp2, Int4 start)
for (esp = esp2->next; esp && new_segment2->s_end < end;
esp = esp->next) {
- new_segment2->next = (BLASTHSPSegment*)
- calloc(1, sizeof(BLASTHSPSegment));
+ new_segment2->next = (BlastHSPSegment*)
+ calloc(1, sizeof(BlastHSPSegment));
new_segment2->next->q_start = new_segment2->q_end + 1;
new_segment2->next->s_start = new_segment2->s_end + 1;
new_segment2 = new_segment2->next;
- if (esp->op_type == GAPALIGN_INS) {
+ if (esp->op_type == eGapAlignIns) {
new_segment2->s_end = new_segment2->s_start;
new_segment2->q_end = new_segment2->q_start + esp->num - 1;
- } else if (esp->op_type == GAPALIGN_DEL) {
+ } else if (esp->op_type == eGapAlignDel) {
new_segment2->s_end = new_segment2->s_start + esp->num - 1;
new_segment2->q_end = new_segment2->q_start;
- } else if (esp->op_type == GAPALIGN_SUB) {
+ } else if (esp->op_type == eGapAlignSub) {
new_segment2->s_end = new_segment2->s_start + esp->num - 1;
new_segment2->q_end = new_segment2->q_start + esp->num - 1;
}
@@ -738,12 +807,12 @@ Blast_HSPsMerge(BlastHSP* hsp1, BlastHSP* hsp2, Int4 start)
/* Both segments gaps - must intersect */
intersection_found = 3;
+ op_type = eGapAlignIns;
dist = new_segment2->s_end - new_segment1->s_start + 1;
- op_type = GAPALIGN_INS;
next_dist = new_segment2->q_end - new_segment1->q_start - dist + 1;
if (new_segment2->q_end - new_segment1->q_start < dist) {
dist = new_segment2->q_end - new_segment1->q_start + 1;
- op_type = GAPALIGN_DEL;
+ op_type = eGapAlignDel;
next_dist = new_segment2->s_end - new_segment1->s_start - dist + 1;
}
break;
@@ -1019,7 +1088,7 @@ Blast_HSPListSetFrames(Uint1 program_number, BlastHSPList* hsp_list,
}
}
-Int2 Blast_HSPListGetEvalues(Uint1 program, BlastQueryInfo* query_info,
+Int2 Blast_HSPListGetEvalues(BlastQueryInfo* query_info,
BlastHSPList* hsp_list, Boolean gapped_calculation,
BlastScoreBlk* sbp)
{
@@ -1048,17 +1117,43 @@ Int2 Blast_HSPListGetEvalues(Uint1 program, BlastQueryInfo* query_info,
query information block, in order of preference */
if (sbp->effective_search_sp) {
hsp->evalue = BLAST_KarlinStoE_simple(hsp->score, kbp[hsp->context],
- (double)sbp->effective_search_sp);
+ sbp->effective_search_sp);
} else {
hsp->evalue =
BLAST_KarlinStoE_simple(hsp->score, kbp[hsp->context],
- (double)query_info->eff_searchsp_array[hsp->context]);
+ query_info->eff_searchsp_array[hsp->context]);
}
}
return 0;
}
+Int2 Blast_HSPListGetBitScores(BlastHSPList* hsp_list,
+ Boolean gapped_calculation, BlastScoreBlk* sbp)
+{
+ BlastHSP* hsp;
+ Blast_KarlinBlk** kbp;
+ Int4 index;
+
+ if (hsp_list == NULL)
+ return 1;
+
+ if (gapped_calculation)
+ kbp = sbp->kbp_gap_std;
+ else
+ kbp = sbp->kbp_std;
+
+ for (index=0; index<hsp_list->hspcnt; index++) {
+ hsp = hsp_list->hsp_array[index];
+ ASSERT(hsp != NULL);
+ hsp->bit_score =
+ (hsp->score*kbp[hsp->context]->Lambda - kbp[hsp->context]->logK) /
+ NCBIMATH_LN2;
+ }
+
+ return 0;
+}
+
void Blast_HSPListPHIGetEvalues(BlastHSPList* hsp_list, BlastScoreBlk* sbp)
{
Int4 index;
@@ -1140,60 +1235,87 @@ Blast_HSPListPurgeNullHSPs(BlastHSPList* hsp_list)
return 0;
}
+typedef enum EHSPInclusionStatus {
+ eEqual = 0, /**< Identical */
+ eFirstInSecond, /**< First included in rectangle formed by second */
+ eSecondInFirst, /**< Second included in rectangle formed by first */
+ eDiagNear, /**< Diagonals are near, but neither HSP is included in
+ the other. */
+ eDiagDistant /**< Diagonals are far apart, or different contexts */
+} EHSPInclusionStatus;
+
/** Are the two HSPs within a given diagonal distance of each other? */
#define MB_HSP_CLOSE(q1, q2, s1, s2, c) (ABS(((q1)-(s1)) - ((q2)-(s2))) < (c))
#define MIN_DIAG_DIST 60
+/** HSP inclusion criterion for megablast: one HSP must be included in a
+ * diagonal strip of a certain width around the other, and also in a rectangle
+ * formed by the other HSP's endpoints.
+ */
+static EHSPInclusionStatus
+Blast_HSPInclusionTest(BlastHSP* hsp1, BlastHSP* hsp2)
+{
+ if (hsp1->context != hsp2->context ||
+ !MB_HSP_CLOSE(hsp1->query.offset, hsp2->query.offset,
+ hsp1->subject.offset, hsp2->subject.offset,
+ MIN_DIAG_DIST))
+ return eDiagDistant;
+
+ if (hsp1->query.offset == hsp2->query.offset &&
+ hsp1->query.end == hsp2->query.end &&
+ hsp1->subject.offset == hsp2->subject.offset &&
+ hsp1->subject.end == hsp2->subject.end &&
+ hsp1->score == hsp2->score) {
+ return eEqual;
+ } else if (hsp1->query.offset >= hsp2->query.offset &&
+ hsp1->query.end <= hsp2->query.end &&
+ hsp1->subject.offset >= hsp2->subject.offset &&
+ hsp1->subject.end <= hsp2->subject.end &&
+ hsp1->score < hsp2->score) {
+ return eFirstInSecond;
+ } else if (hsp1->query.offset <= hsp2->query.offset &&
+ hsp1->query.end >= hsp2->query.end &&
+ hsp1->subject.offset <= hsp2->subject.offset &&
+ hsp1->subject.end >= hsp2->subject.end &&
+ hsp1->score >= hsp2->score) {
+ return eSecondInFirst;
+ }
+ return eDiagNear;
+}
+
+/** How many HSPs to check for inclusion for each new HSP? */
+#define MAX_NUM_CHECK_INCLUSION 20
+
/** Sort the HSPs in an HSP list by diagonal and remove redundant HSPs */
-static Int2
+Int2
Blast_HSPListUniqSort(BlastHSPList* hsp_list)
{
- Int4 index, new_hspcnt, index1, q_off, s_off, q_end, s_end, index2;
+ Int4 index, new_hspcnt, index1, index2;
BlastHSP** hsp_array = hsp_list->hsp_array;
Boolean shift_needed = FALSE;
- Int4 context;
- double evalue;
+ EHSPInclusionStatus inclusion_status = eDiagNear;
+
+ if (hsp_list->hspcnt <= 1)
+ return 0;
qsort(hsp_array, hsp_list->hspcnt, sizeof(BlastHSP*),
- diag_uniq_compare_hsps);
- /* Delete all HSPs that were flagged in qsort */
- for (index = 0; index < hsp_list->hspcnt; ++index) {
- if (hsp_array[index]->score == 0) {
- hsp_array[index] = Blast_HSPFree(hsp_array[index]);
- }
- }
- /* Move all nulled out HSPs to the end */
- qsort(hsp_array, hsp_list->hspcnt, sizeof(BlastHSP*),
- null_compare_hsps);
+ diag_compare_hsps);
for (index=1, new_hspcnt=0; index<hsp_list->hspcnt; index++) {
if (!hsp_array[index])
break;
-
- q_off = hsp_array[index]->query.offset;
- s_off = hsp_array[index]->subject.offset;
- q_end = hsp_array[index]->query.end;
- s_end = hsp_array[index]->subject.end;
- evalue = hsp_array[index]->evalue;
- context = hsp_array[index]->context;
- for (index1 = new_hspcnt; index1 >= 0 &&
- hsp_array[index1]->context == context && new_hspcnt-index1 < 10 &&
- MB_HSP_CLOSE(q_off, hsp_array[index1]->query.offset,
- s_off, hsp_array[index1]->subject.offset,
- MIN_DIAG_DIST);
+ inclusion_status = eDiagNear;
+ for (index1 = new_hspcnt; inclusion_status != eDiagDistant &&
+ index1 >= 0 && new_hspcnt-index1 < MAX_NUM_CHECK_INCLUSION;
index1--) {
- if (q_off >= hsp_array[index1]->query.offset &&
- s_off >= hsp_array[index1]->subject.offset &&
- q_end <= hsp_array[index1]->query.end &&
- s_end <= hsp_array[index1]->subject.end &&
- evalue >= hsp_array[index1]->evalue) {
+ inclusion_status =
+ Blast_HSPInclusionTest(hsp_array[index], hsp_array[index1]);
+ if (inclusion_status == eFirstInSecond ||
+ inclusion_status == eEqual) {
+ /* Free the new HSP and break out of the inclusion test loop */
hsp_array[index] = Blast_HSPFree(hsp_array[index]);
break;
- } else if (q_off <= hsp_array[index1]->query.offset &&
- s_off <= hsp_array[index1]->subject.offset &&
- q_end >= hsp_array[index1]->query.end &&
- s_end >= hsp_array[index1]->subject.end &&
- evalue <= hsp_array[index1]->evalue) {
+ } else if (inclusion_status == eSecondInFirst) {
hsp_array[index1] = Blast_HSPFree(hsp_array[index1]);
shift_needed = TRUE;
}
@@ -1228,7 +1350,7 @@ Int2
Blast_HSPListReevaluateWithAmbiguities(BlastHSPList* hsp_list,
BLAST_SequenceBlk* query_blk, BLAST_SequenceBlk* subject_blk,
const BlastHitSavingOptions* hit_options, BlastQueryInfo* query_info,
- BlastScoreBlk* sbp, const BlastScoringOptions* score_options,
+ BlastScoreBlk* sbp, const BlastScoringParameters* score_params,
const BlastSeqSrc* seq_src)
{
BlastHSP** hsp_array,* hsp;
@@ -1237,7 +1359,6 @@ Blast_HSPListReevaluateWithAmbiguities(BlastHSPList* hsp_list,
Boolean purge, delete_hsp;
Int2 status = 0;
GetSeqArg seq_arg;
- Boolean gapped_calculation = score_options->gapped_calculation;
if (!hsp_list)
return status;
@@ -1246,13 +1367,6 @@ Blast_HSPListReevaluateWithAmbiguities(BlastHSPList* hsp_list,
hsp_array = hsp_list->hsp_array;
memset((void*) &seq_arg, 0, sizeof(seq_arg));
- /* In case of no traceback, return without doing anything */
- if (!hsp_list->traceback_done && gapped_calculation) {
- if (hsp_list->hspcnt > 1)
- status = Blast_HSPListUniqSort(hsp_list);
- return status;
- }
-
if (hsp_list->hspcnt == 0)
/* All HSPs have been deleted */
return status;
@@ -1285,7 +1399,7 @@ Blast_HSPListReevaluateWithAmbiguities(BlastHSPList* hsp_list,
delete_hsp =
Blast_HSPReevaluateWithAmbiguities(hsp, query_start, subject_start,
- hit_options, score_options, query_info, sbp);
+ hit_options, score_params, query_info, sbp);
if (delete_hsp) { /* This HSP is now below the cutoff */
hsp_array[index] = Blast_HSPFree(hsp_array[index]);
@@ -1296,12 +1410,8 @@ Blast_HSPListReevaluateWithAmbiguities(BlastHSPList* hsp_list,
if (purge) {
Blast_HSPListPurgeNullHSPs(hsp_list);
}
-
- /* Check for HSP inclusion once more */
- if (hsp_list->hspcnt > 1)
- status = Blast_HSPListUniqSort(hsp_list);
- BlastSequenceBlkFree(seq_arg.seq);
+ BLASTSeqSrcRetSequence(seq_src, (void*)&seq_arg);
subject_blk->sequence = NULL;
return status;
@@ -1487,8 +1597,7 @@ Int2 Blast_HSPListsMerge(BlastHSPList* hsp_list,
} else { /* No gap information available */
if (Blast_HSPContained(hspp1[index], hspp2[index1])) {
sfree(hspp1[index]);
- /* Point the first HSP to the new HSP;
- free the second HSP. */
+ /* Point the first HSP to the new HSP; free the old HSP. */
hspp1[index] = hspp2[index1];
hspp2[index1] = NULL;
/* This HSP has been removed, so break out of the inner
@@ -1508,7 +1617,7 @@ Int2 Blast_HSPListsMerge(BlastHSPList* hsp_list,
}
/* Purge the nulled out HSPs from the new HSP list */
- Blast_HSPListPurgeNullHSPs(hsp_list);
+ Blast_HSPListPurgeNullHSPs(hsp_list);
/* The new number of HSPs is now the sum of the remaining counts in the
two lists, but if there is a restriction on the number of HSPs to keep,
@@ -1556,6 +1665,8 @@ void Blast_HSPListAdjustOffsets(BlastHSPList* hsp_list, Int4 offset)
}
/** Callback for sorting hsp lists by their best evalue/score;
+ * Evalues are compared only up to a relative error of
+ * FUZZY_EVALUE_COMPARE_FACTOR.
* It is assumed that the HSP arrays in each hit list are already sorted by
* e-value/score.
*/
@@ -1563,6 +1674,7 @@ static int
evalue_compare_hsp_lists(const void* v1, const void* v2)
{
BlastHSPList* h1,* h2;
+ int retval = 0;
h1 = *(BlastHSPList**) v1;
h2 = *(BlastHSPList**) v2;
@@ -1576,11 +1688,10 @@ evalue_compare_hsp_lists(const void* v1, const void* v2)
else if (h2->hspcnt == 0)
return -1;
- if (h1->hsp_array[0]->evalue < h2->hsp_array[0]->evalue)
- return -1;
- if (h1->hsp_array[0]->evalue > h2->hsp_array[0]->evalue)
- return 1;
-
+ if ((retval = fuzzy_evalue_comp(h1->hsp_array[0]->evalue,
+ h2->hsp_array[0]->evalue)) != 0)
+ return retval;
+
if (h1->hsp_array[0]->score > h2->hsp_array[0]->score)
return -1;
if (h1->hsp_array[0]->score < h2->hsp_array[0]->score)
@@ -1596,6 +1707,15 @@ evalue_compare_hsp_lists(const void* v1, const void* v2)
return 0;
}
+/** Callback for sorting hsp lists by their best e-value/score, in
+ * reverse order - from higher e-value to lower (lower score to higher).
+*/
+static int
+evalue_compare_hsp_lists_rev(const void* v1, const void* v2)
+{
+ return evalue_compare_hsp_lists(v2, v1);
+}
+
/********************************************************************************
Functions manipulating BlastHitList's
********************************************************************************/
@@ -1740,7 +1860,7 @@ Blast_HitListInsertHSPListInHeap(BlastHitList* hit_list,
}
Int2 Blast_HitListUpdate(BlastHitList* hit_list,
- BlastHSPList* hsp_list)
+ BlastHSPList* hsp_list)
{
if (hit_list->hsplist_count < hit_list->hsplist_max) {
/* If the array of HSP lists for this query is not yet allocated,
@@ -1754,21 +1874,57 @@ Int2 Blast_HitListUpdate(BlastHitList* hit_list,
MAX(hsp_list->hsp_array[0]->evalue, hit_list->worst_evalue);
hit_list->low_score =
MIN(hsp_list->hsp_array[0]->score, hit_list->low_score);
- } else if ((hsp_list->hsp_array[0]->evalue > hit_list->worst_evalue) ||
- ((hsp_list->hsp_array[0]->evalue == hit_list->worst_evalue) &&
- (hsp_list->hsp_array[0]->score <= hit_list->low_score))) {
- /* This hit list is less significant than any of those already saved;
- discard it */
- Blast_HSPListFree(hsp_list);
} else {
- if (!hit_list->heapified) {
- CreateHeap(hit_list->hsplist_array, hit_list->hsplist_count,
- sizeof(BlastHSPList*), evalue_compare_hsp_lists);
- hit_list->heapified = TRUE;
+ /* Compare e-values only with a certain precision */
+ int evalue_order = fuzzy_evalue_comp(hsp_list->hsp_array[0]->evalue,
+ hit_list->worst_evalue);
+ if (evalue_order > 0 ||
+ (evalue_order == 0 &&
+ (hsp_list->hsp_array[0]->score < hit_list->low_score))) {
+ /* This hit list is less significant than any of those already saved;
+ discard it. Note that newer hits with score and e-value both equal
+ to the current worst will be saved, at the expense of some older
+ hit.
+ */
+ Blast_HSPListFree(hsp_list);
+ } else {
+ if (!hit_list->heapified) {
+ CreateHeap(hit_list->hsplist_array, hit_list->hsplist_count,
+ sizeof(BlastHSPList*), evalue_compare_hsp_lists);
+ hit_list->heapified = TRUE;
+ }
+ Blast_HitListInsertHSPListInHeap(hit_list, hsp_list);
+ }
+ }
+ return 0;
+}
+
+static Int2 Blast_HitListPurgeNullHSPLists(BlastHitList* hit_list)
+{
+ Int4 index, index1; /* loop indices. */
+ Int4 hsplist_count; /* total number of HSPList's to iterate over. */
+ BlastHSPList** hsplist_array; /* hsplist_array to purge. */
+
+ if (hit_list == NULL || hit_list->hsplist_count == 0)
+ return 0;
+
+ hsplist_array = hit_list->hsplist_array;
+ hsplist_count = hit_list->hsplist_count;
+
+ index1 = 0;
+ for (index=0; index<hsplist_count; index++) {
+ if (hsplist_array[index]) {
+ hsplist_array[index1] = hsplist_array[index];
+ index1++;
}
- Blast_HitListInsertHSPListInHeap(hit_list, hsp_list);
}
+ for (index=index1; index<hsplist_count; index++) {
+ hsplist_array[index] = NULL;
+ }
+
+ hit_list->hsplist_count = index1;
+
return 0;
}
@@ -1821,47 +1977,123 @@ Int2 Blast_HSPResultsSortByEvalue(BlastHSPResults* results)
return 0;
}
-Int2 Blast_HSPResultsSaveHitList(Uint1 program, BlastHSPResults* results,
- BlastHSPList* hsp_list, BlastHitSavingParameters* hit_parameters)
+Int2 Blast_HSPResultsReverseSort(BlastHSPResults* results)
+{
+ Int4 index;
+ BlastHitList* hit_list;
+
+ for (index = 0; index < results->num_queries; ++index) {
+ hit_list = results->hitlist_array[index];
+ if (hit_list && hit_list->hsplist_count > 1) {
+ qsort(hit_list->hsplist_array, hit_list->hsplist_count,
+ sizeof(BlastHSPList*), evalue_compare_hsp_lists_rev);
+ }
+ Blast_HitListPurge(hit_list);
+ }
+ return 0;
+}
+
+Int2 Blast_HSPResultsSaveRPSHSPList(Uint1 program, BlastHSPResults* results,
+ BlastHSPList* hsplist_in, const BlastHitSavingOptions* hit_options)
+{
+ Int4 index, oid;
+ BlastHitList* hit_list;
+ BlastHSPList* hsp_list;
+ BlastHSP* hsp;
+ Int2 status = 0;
+
+ if (!hsplist_in)
+ return 0;
+
+ /* There is only one query allowed in RPS BLAST. Its hit list must have
+ already been allocated with size equal to the number of sequences in
+ RPS BLAST database. */
+ hit_list = results->hitlist_array[0];
+ ASSERT(hit_list);
+
+ /* Initialize the HSPList array, if necessary. */
+ if (hsplist_in->hspcnt > 0 && !hit_list->hsplist_array) {
+ hit_list->hsplist_array = (BlastHSPList**)
+ calloc(hit_list->hsplist_max, sizeof(BlastHSPList*));
+ }
+
+ /* Save all HSPs into HSPList's corresponding to correct database
+ sequences. */
+ for (index = 0; index < hsplist_in->hspcnt; ++index) {
+ hsp = hsplist_in->hsp_array[index];
+ oid = Blast_GetQueryIndexFromContext(hsp->context, program);
+ /* HSP context is no longer needed; set it to 0. */
+ hsp->context = 0;
+ ASSERT(oid < hit_list->hsplist_max);
+ hsp_list = hit_list->hsplist_array[oid];
+ if (!hsp_list) {
+ hsp_list = hit_list->hsplist_array[oid] =
+ Blast_HSPListNew(hit_options->hsp_num_max);
+ hsp_list->oid = oid;
+ }
+ status = Blast_HSPListSaveHSP(hsp_list, hsp);
+ }
+ /* Purge the NULL HSPLists from the resulting HitList. */
+ hit_list->hsplist_count = hit_list->hsplist_max;
+ Blast_HitListPurgeNullHSPLists(hit_list);
+ /* Sort the HSPList's by e-value. */
+ qsort(hit_list->hsplist_array, hit_list->hsplist_count,
+ sizeof(BlastHSPList*), evalue_compare_hsp_lists);
+ /* Leave only the number of HSPList's allowed by the hitlist size
+ option. */
+ for (index = hit_options->prelim_hitlist_size;
+ index < hit_list->hsplist_count; ++index) {
+ hit_list->hsplist_array[index] =
+ Blast_HSPListFree(hit_list->hsplist_array[index]);
+ }
+ hit_list->hsplist_count =
+ MIN(hit_list->hsplist_count, hit_options->prelim_hitlist_size);
+
+ /* All HSPs from the input HSP list have been moved to the results
+ structure, so make sure there is no attempt to free them now. */
+ sfree(hsplist_in->hsp_array);
+ hsplist_in->hspcnt = 0;
+ hsplist_in = Blast_HSPListFree(hsplist_in);
+
+ return 0;
+}
+
+Int2 Blast_HSPResultsSaveHSPList(Uint1 program, BlastHSPResults* results,
+ BlastHSPList* hsp_list, const BlastHitSavingOptions* hit_options)
{
Int2 status = 0;
BlastHSPList** hsp_list_array;
BlastHSP* hsp;
Int4 index;
- BlastHitSavingOptions* hit_options = hit_parameters->options;
- Uint1 context_factor;
if (!hsp_list)
return 0;
- if (program == blast_type_blastn) {
- context_factor = 2;
- } else if (program == blast_type_blastx ||
- program == blast_type_tblastx) {
- context_factor = 6;
- } else {
- context_factor = 1;
- }
+ if (!results || !hit_options)
+ return -1;
- /* Sort the HSPs by score */
+ /* Sort the HSPs by e-value/score. E-value has a priority here, because
+ lower scoring HSPs in linked sets might have lower e-values, and must
+ be moved higher in the list. */
if (hsp_list->hspcnt > 1) {
qsort(hsp_list->hsp_array, hsp_list->hspcnt, sizeof(BlastHSP*),
- score_compare_hsps);
+ evalue_compare_hsps);
}
/* Rearrange HSPs into multiple hit lists if more than one query */
if (results->num_queries > 1) {
BlastHSPList* tmp_hsp_list;
+ Int4 query_index;
hsp_list_array = calloc(results->num_queries, sizeof(BlastHSPList*));
for (index = 0; index < hsp_list->hspcnt; index++) {
hsp = hsp_list->hsp_array[index];
- tmp_hsp_list = hsp_list_array[hsp->context/context_factor];
+ query_index = Blast_GetQueryIndexFromContext(hsp->context, program);
+ tmp_hsp_list = hsp_list_array[query_index];
if (!tmp_hsp_list) {
- hsp_list_array[hsp->context/context_factor] = tmp_hsp_list =
+ hsp_list_array[query_index] = tmp_hsp_list =
Blast_HSPListNew(hit_options->hsp_num_max);
tmp_hsp_list->oid = hsp_list->oid;
- tmp_hsp_list->traceback_done = hsp_list->traceback_done;
}
if (!tmp_hsp_list || tmp_hsp_list->do_not_reallocate) {
@@ -1915,13 +2147,28 @@ Int2 Blast_HSPResultsSaveHitList(Uint1 program, BlastHSPResults* results,
results->hitlist_array[0] =
Blast_HitListNew(hit_options->prelim_hitlist_size);
}
- Blast_HitListUpdate(results->hitlist_array[0],
- hsp_list);
+ Blast_HitListUpdate(results->hitlist_array[0], hsp_list);
}
return status;
}
+Int2 Blast_HSPResultsInsertHSPList(BlastHSPResults* results,
+ BlastHSPList* hsp_list, Int4 hitlist_size)
+{
+ if (!hsp_list || hsp_list->hspcnt == 0)
+ return 0;
+
+ ASSERT(hsp_list->query_index < results->num_queries);
+
+ if (!results->hitlist_array[hsp_list->query_index]) {
+ results->hitlist_array[hsp_list->query_index] =
+ Blast_HitListNew(hitlist_size);
+ }
+ Blast_HitListUpdate(results->hitlist_array[hsp_list->query_index],
+ hsp_list);
+ return 0;
+}
void Blast_HSPResultsRPSUpdate(BlastHSPResults *final_result,
BlastHSPResults *init_result)
diff --git a/algo/blast/core/blast_hits.h b/algo/blast/core/blast_hits.h
index 955fd372..be0b1141 100644
--- a/algo/blast/core/blast_hits.h
+++ b/algo/blast/core/blast_hits.h
@@ -1,50 +1,47 @@
-/* $Id: blast_hits.h,v 1.38 2004/05/05 15:26:45 dondosha Exp $
-* ===========================================================================
-*
-* PUBLIC DOMAIN NOTICE
-* National Center for Biotechnology Information
-*
-* This software/database is a "United States Government Work" under the
-* terms of the United States Copyright Act. It was written as part of
-* the author's offical duties as a United States Government employee and
-* thus cannot be copyrighted. This software/database is freely available
-* to the public for use. The National Library of Medicine and the U.S.
-* Government have not placed any restriction on its use or reproduction.
-*
-* Although all reasonable efforts have been taken to ensure the accuracy
-* and reliability of the software and data, the NLM and the U.S.
-* Government do not and cannot warrant the performance or results that
-* may be obtained by using this software or data. The NLM and the U.S.
-* Government disclaim all warranties, express or implied, including
-* warranties of performance, merchantability or fitness for any particular
-* purpose.
-*
-* Please cite the author in any work or product based on this material.
-*
-* ===========================================================================*/
-
-/*****************************************************************************
-
-File name: blast_hits.h
-
-Author: Ilya Dondoshansky
-
-Contents: Structures and API used for saving BLAST hits
-
-******************************************************************************
- * $Revision: 1.38 $
- * */
+/* $Id: blast_hits.h,v 1.47 2004/06/16 14:29:43 ivanov Exp $
+ * ===========================================================================
+ *
+ * PUBLIC DOMAIN NOTICE
+ * National Center for Biotechnology Information
+ *
+ * This software/database is a "United States Government Work" under the
+ * terms of the United States Copyright Act. It was written as part of
+ * the author's official duties as a United States Government employee and
+ * thus cannot be copyrighted. This software/database is freely available
+ * to the public for use. The National Library of Medicine and the U.S.
+ * Government have not placed any restriction on its use or reproduction.
+ *
+ * Although all reasonable efforts have been taken to ensure the accuracy
+ * and reliability of the software and data, the NLM and the U.S.
+ * Government do not and cannot warrant the performance or results that
+ * may be obtained by using this software or data. The NLM and the U.S.
+ * Government disclaim all warranties, express or implied, including
+ * warranties of performance, merchantability or fitness for any particular
+ * purpose.
+ *
+ * Please cite the author in any work or product based on this material.
+ *
+ * ===========================================================================
+ *
+ * Author: Ilya Dondoshansky
+ *
+ */
+
+/** @file blast_hits.h
+ * Structures and API used for saving BLAST hits
+ */
+
#ifndef __BLAST_HITS__
#define __BLAST_HITS__
-#ifdef __cplusplus
-extern "C" {
-#endif
-
#include <algo/blast/core/blast_options.h>
#include <algo/blast/core/gapinfo.h>
#include <algo/blast/core/blast_seqsrc.h>
+#ifdef __cplusplus
+extern "C" {
+#endif
+
/** One sequence segment within an HSP */
typedef struct BlastSeg {
Int2 frame; /**< Translation frame */
@@ -56,17 +53,18 @@ typedef struct BlastSeg {
/** Structure holding all information about an HSP */
typedef struct BlastHSP {
- Int4 score; /**< This HSP's raw score */
- Int4 num_ident; /**< Number of identical base pairs in this HSP */
+ Int4 score; /**< This HSP's raw score */
+ Int4 num_ident; /**< Number of identical base pairs in this HSP */
+ double bit_score; /**< Bit score, calculated from score */
double evalue; /**< This HSP's e-value */
- BlastSeg query; /**< Query sequence info. */
- BlastSeg subject; /**< Subject sequence info. */
- Int4 context; /**< Context number of query */
- GapEditBlock* gap_info; /**< ALL gapped alignment is here */
+ BlastSeg query; /**< Query sequence info. */
+ BlastSeg subject; /**< Subject sequence info. */
+ Int4 context; /**< Context number of query */
+ GapEditBlock* gap_info;/**< ALL gapped alignment is here */
Int2 splice_junction; /**< Splice junction count in a linked set of
HSPs. Each present splice junction counts as +1,
absent as -1. */
- Int4 num; /**< How many HSP's make up this (sum) segment? */
+ Int4 num; /**< How many HSP's make up this (sum) segment? */
Uint4 pattern_length; /**< Length of pattern occurrence in this HSP, in
PHI BLAST */
} BlastHSP;
@@ -76,13 +74,13 @@ typedef struct BlastHSP {
*/
typedef struct BlastHSPList {
Int4 oid;/**< The ordinal id of the subject sequence this HSP list is for */
+ Int4 query_index; /**< Index of the query which this HSPList corresponds to.
+ Set to 0 if not applicable */
BlastHSP** hsp_array; /**< Array of pointers to individual HSPs */
Int4 hspcnt; /**< Number of HSPs saved */
Int4 allocated; /**< The allocated size of the hsp_array */
Int4 hsp_max; /**< The maximal number of HSPs allowed to be saved */
Boolean do_not_reallocate; /**< Is reallocation of the hsp_array allowed? */
- Boolean traceback_done; /**< Has the traceback already been done on HSPs in
- this list? */
} BlastHSPList;
/** The structure to contain all BLAST results for one query sequence */
@@ -105,13 +103,6 @@ typedef struct BlastHSPResults {
} BlastHSPResults;
-/** An auxiliary structure used for merging HSPs */
-typedef struct BLASTHSPSegment {
- Int4 q_start, q_end;
- Int4 s_start, s_end;
- struct BLASTHSPSegment* next;
-} BLASTHSPSegment;
-
/* By how much should the chunks of a subject sequence overlap if it is
too long and has to be split */
#define DBSEQ_CHUNK_OVERLAP 100
@@ -198,7 +189,7 @@ void Blast_HSPPHIGetEvalue(BlastHSP* hsp, BlastScoreBlk* sbp);
* @param query_start Pointer to the start of the query sequence [in]
* @param subject_start Pointer to the start of the subject sequence [in]
* @param hit_options Hit saving options with e-value cut-off [in]
- * @param score_options Scoring options [in]
+ * @param score_params Scoring parameters [in]
* @param query_info Query information structure, containing effective search
* space(s) [in]
* @param sbp Score block with Karlin-Altschul parameters [in]
@@ -207,7 +198,7 @@ void Blast_HSPPHIGetEvalue(BlastHSP* hsp, BlastScoreBlk* sbp);
Boolean Blast_HSPReevaluateWithAmbiguities(BlastHSP* hsp,
Uint1* query_start, Uint1* subject_start,
const BlastHitSavingOptions* hit_options,
- const BlastScoringOptions* score_options,
+ const BlastScoringParameters* score_params,
BlastQueryInfo* query_info, BlastScoreBlk* sbp);
/** Calculate number of identities in an HSP.
@@ -236,6 +227,29 @@ Blast_HSPGetOOFNumIdentities(Uint1* query, Uint1* subject, BlastHSP* hsp,
Uint1 program, Int4* num_ident_ptr,
Int4* align_length_ptr);
+/** Calculate length of an HSP as length in query plus length of gaps in
+ * query. If gap information is unavailable, return maximum between length in
+ * query and in subject.
+ * @param hsp An HSP structure [in]
+ * @param length Length of this HSP [out]
+ * @param gaps Total number of gaps in this HSP [out]
+ * @param gap_opens Number of gap openings in this HSP [out]
+ */
+void Blast_HSPCalcLengthAndGaps(BlastHSP* hsp, Int4* length_out,
+ Int4* gaps_out, Int4* gap_opens_out);
+
+/** Adjust HSP endpoint offsets according to strand/frame; return values in
+ * 1-offset coordinates instead of internal 0-offset.
+ * @param hsp An HSP structure [in]
+ * @param q_start Start of alignment in query [out]
+ * @param q_end End of alignment in query [out]
+ * @param s_start Start of alignment in subject [out]
+ * @param q_end End of alignment in subject [out]
+ */
+void
+Blast_HSPGetAdjustedOffsets(BlastHSP* hsp, Int4* q_start, Int4* q_end,
+ Int4* s_start, Int4* s_end);
+
/********************************************************************************
HSPList API
********************************************************************************/
@@ -273,14 +287,13 @@ Blast_HSPListSetFrames(Uint1 program_number, BlastHSPList* hsp_list,
* the sum statistics. In case of multiple queries, the offsets are assumed
* to be already adjusted to individual query coordinates, and the contexts
* are set for each HSP.
- * @param program The integer BLAST program index [in]
* @param query_info Auxiliary query information - needed only for effective
* search space calculation if it is not provided [in]
* @param hsp_list List of HSPs for one subject sequence [in] [out]
* @param gapped_calculation Is this for a gapped or ungapped search? [in]
* @param sbp Structure containing statistical information [in]
*/
-Int2 Blast_HSPListGetEvalues(Uint1 program, BlastQueryInfo* query_info,
+Int2 Blast_HSPListGetEvalues(BlastQueryInfo* query_info,
BlastHSPList* hsp_list, Boolean gapped_calculation,
BlastScoreBlk* sbp);
@@ -290,6 +303,14 @@ Int2 Blast_HSPListGetEvalues(Uint1 program, BlastQueryInfo* query_info,
*/
void Blast_HSPListPHIGetEvalues(BlastHSPList* hsp_list, BlastScoreBlk* sbp);
+/** Calculate bit scores from raw scores in an HSP list.
+ * @param hsp_list List of HSPs [in] [out]
+ * @param gapped_calculation Is this a gapped search? [in]
+ * @param sbp Scoring block with statistical parameters [in]
+ */
+Int2 Blast_HSPListGetBitScores(BlastHSPList* hsp_list,
+ Boolean gapped_calculation, BlastScoreBlk* sbp);
+
/** Discard the HSPs above the e-value threshold from the HSP list
* @param hsp_list List of HSPs for one subject sequence [in] [out]
* @param hit_options Options block containing the e-value cut-off [in]
@@ -316,7 +337,7 @@ Blast_HSPListPurgeNullHSPs(BlastHSPList* hsp_list);
* @param hit_options The options related to saving hits [in]
* @param query_info Auxiliary query information [in]
* @param sbp The statistical information [in]
- * @param score_options The scoring options [in]
+ * @param score_params Parameters related to scoring [in]
* @param seq_src The BLAST database structure (for retrieving uncompressed
* sequence) [in]
*/
@@ -324,7 +345,7 @@ Int2
Blast_HSPListReevaluateWithAmbiguities(BlastHSPList* hsp_list,
BLAST_SequenceBlk* query_blk, BLAST_SequenceBlk* subject_blk,
const BlastHitSavingOptions* hit_options, BlastQueryInfo* query_info,
- BlastScoreBlk* sbp, const BlastScoringOptions* score_options,
+ BlastScoreBlk* sbp, const BlastScoringParameters* score_params,
const BlastSeqSrc* seq_src);
/** Append one HSP list to the other. Discard lower scoring HSPs if there is
@@ -360,6 +381,10 @@ Int2 Blast_HSPListsMerge(BlastHSPList* hsp_list,
*/
void Blast_HSPListAdjustOffsets(BlastHSPList* hsp_list, Int4 offset);
+/** Sort the HSPs in an HSP list by diagonal and remove redundant HSPs. */
+Int2
+Blast_HSPListUniqSort(BlastHSPList* hsp_list);
+
/********************************************************************************
HitList API.
********************************************************************************/
@@ -388,7 +413,6 @@ Int2 Blast_HitListHSPListsFree(BlastHitList* hitlist);
*/
Int2 Blast_HitListUpdate(BlastHitList* hit_list, BlastHSPList* hsp_list);
-
/********************************************************************************
HSPResults API.
********************************************************************************/
@@ -405,18 +429,49 @@ BlastHSPResults* Blast_HSPResultsFree(BlastHSPResults* results);
/** Sort each hit list in the BLAST results by best e-value */
Int2 Blast_HSPResultsSortByEvalue(BlastHSPResults* results);
+/** Sort each hit list in the BLAST results by best e-value, in reverse
+ order. */
+Int2 Blast_HSPResultsReverseSort(BlastHSPResults* results);
+
+/** Blast_HSPResultsSaveRPSHSPList
+ * Save the HSPs from an HSPList obtained on the preliminary stage of
+ * RPS BLAST to appropriate places in the results structure. Input HSPList
+ * contains HSPs from a single query, but from all RPS BLAST database
+ * sequences.
+ * @param program The type of BLAST search [in]
+ * @param results The structure holding results for all queries [in] [out]
+ * @param hsp_list The results for the current subject sequence; in case of
+ * multiple queries, offsets are still in the concatenated
+ * sequence coordinates [in]
+ * @param hit_options The options related to saving hits [in]
+ */
+Int2 Blast_HSPResultsSaveRPSHSPList(Uint1 program, BlastHSPResults* results,
+ BlastHSPList* hsp_list, const BlastHitSavingOptions* hit_options);
-/** Blast_HSPResultsSaveHitList
- * Save the current hit list to appropriate places in the results structure
+/** Blast_HSPResultsSaveHSPList
+ * Save the current HSP list to appropriate places in the results structure.
+ * The input HSPList contains HSPs from a single BLAST database sequence, but
+ * possibly from multiple queries.
* @param program The type of BLAST search [in]
* @param results The structure holding results for all queries [in] [out]
* @param hsp_list The results for the current subject sequence; in case of
* multiple queries, offsets are still in the concatenated
* sequence coordinates [in]
- * @param hit_parameters The options/parameters related to saving hits [in]
+ * @param hit_options The options related to saving hits [in]
+ */
+Int2 Blast_HSPResultsSaveHSPList(Uint1 program, BlastHSPResults* results,
+ BlastHSPList* hsp_list, const BlastHitSavingOptions* hit_options);
+
+/** Blast_HSPResultsSaveHSPList
+ * Insert an HSP list to the appropriate place in the results structure.
+ * All HSPs in this list must be from the same query, and the query index
+ * must be set in the BlastHSPList input structure.
+ * @param results The structure holding results for all queries [in] [out]
+ * @param hsp_list The results for one query-subject sequence pair. [in]
+ * @param hitlist_size Maximal allowed hit list size. [in]
*/
-Int2 Blast_HSPResultsSaveHitList(Uint1 program, BlastHSPResults* results,
- BlastHSPList* hsp_list, BlastHitSavingParameters* hit_parameters);
+Int2 Blast_HSPResultsInsertHSPList(BlastHSPResults* results,
+ BlastHSPList* hsp_list, Int4 hitlist_size);
/** Convert a prelimiary list of HSPs, that are the result of
* an RPS blast search, to a format compatible with the rest
diff --git a/algo/blast/core/blast_hspstream.c b/algo/blast/core/blast_hspstream.c
new file mode 100644
index 00000000..0466e490
--- /dev/null
+++ b/algo/blast/core/blast_hspstream.c
@@ -0,0 +1,228 @@
+/* $Id: blast_hspstream.c,v 1.3 2004/06/07 17:06:49 dondosha Exp $
+ * ===========================================================================
+ *
+ * PUBLIC DOMAIN NOTICE
+ * National Center for Biotechnology Information
+ *
+ * This software/database is a "United States Government Work" under the
+ * terms of the United States Copyright Act. It was written as part of
+ * the author's official duties as a United States Government employee and
+ * thus cannot be copyrighted. This software/database is freely available
+ * to the public for use. The National Library of Medicine and the U.S.
+ * Government have not placed any restriction on its use or reproduction.
+ *
+ * Although all reasonable efforts have been taken to ensure the accuracy
+ * and reliability of the software and data, the NLM and the U.S.
+ * Government do not and cannot warrant the performance or results that
+ * may be obtained by using this software or data. The NLM and the U.S.
+ * Government disclaim all warranties, express or implied, including
+ * warranties of performance, merchantability or fitness for any particular
+ * purpose.
+ *
+ * Please cite the author in any work or product based on this material.
+ *
+ * ===========================================================================
+ *
+ * Author: Christiam Camacho
+ *
+ */
+
+/** @file blast_hspstream.c
+ * Definition of ADT to save and retrieve lists of HSPs in the BLAST engine.
+ */
+
+static char const rcsid[] =
+ "$Id: blast_hspstream.c,v 1.3 2004/06/07 17:06:49 dondosha Exp $";
+
+#include <algo/blast/core/blast_hspstream.h>
+#include <algo/blast/core/blast_def.h> /* needed for sfree */
+
+/** Complete type definition of Blast Hsp Stream ADT */
+struct BlastHSPStream {
+ BlastHSPStreamConstructor NewFnPtr; /**< Constructor */
+ BlastHSPStreamDestructor DeleteFnPtr; /**< Destructor */
+
+ /* The operational interface */
+
+ BlastHSPStreamMethod WriteFnPtr; /**< Write to BlastHSPStream */
+ BlastHSPStreamMethod ReadFnPtr; /**< Read from BlastHSPStream */
+ BlastHSPStreamCloseFnType CloseFnPtr; /**< Close BlastHSPStream for
+ writing */
+ void* DataStructure; /**< ADT holding HSPStream */
+};
+
+BlastHSPStream* BlastHSPStreamNew(const BlastHSPStreamNewInfo* bhsn_info)
+{
+ BlastHSPStream* retval = NULL;
+ BlastHSPStreamFunctionPointerTypes fnptr;
+
+ if ( bhsn_info == NULL ) {
+ return NULL;
+ }
+
+ if ( !(retval = (BlastHSPStream*) calloc(1, sizeof(BlastHSPStream)))) {
+ return NULL;
+ }
+
+ /* Save the constructor and invoke it */
+ fnptr.ctor = bhsn_info->constructor;
+ SetMethod(retval, eConstructor, fnptr);
+ if (retval->NewFnPtr) {
+ retval = (*retval->NewFnPtr)(retval, bhsn_info->ctor_argument);
+ } else {
+ sfree(retval);
+ }
+
+ ASSERT(retval->DeleteFnPtr);
+ ASSERT(retval->WriteFnPtr);
+ ASSERT(retval->ReadFnPtr);
+
+ return retval;
+}
+
+BlastHSPStream* BlastHSPStreamFree(BlastHSPStream* hsp_stream)
+{
+ BlastHSPStreamDestructor destructor_fnptr = NULL;
+
+ if (!hsp_stream) {
+ return (BlastHSPStream*) NULL;
+ }
+
+ if ( !(destructor_fnptr = (*hsp_stream->DeleteFnPtr))) {
+ sfree(hsp_stream);
+ return NULL;
+ }
+
+ return (BlastHSPStream*) (*destructor_fnptr)(hsp_stream);
+}
+
+void BlastHSPStreamClose(BlastHSPStream* hsp_stream)
+{
+ BlastHSPStreamCloseFnType close_fnptr = NULL;
+
+ if (!hsp_stream)
+ return;
+
+ /** Close functionality is optional. If closing function is not provided,
+ just do nothing. */
+ if ( !(close_fnptr = (*hsp_stream->CloseFnPtr))) {
+ return;
+ }
+
+ (*close_fnptr)(hsp_stream);
+}
+
+const int kBlastHSPStream_Error = -1;
+const int kBlastHSPStream_Success = 0;
+const int kBlastHSPStream_Eof = 1;
+
+/** This method is akin to a vtable dispatcher, invoking the method registered
+ * upon creation of the implementation of the BlastHSPStream interface
+ * @param hsp_stream The BlastHSPStream object [in]
+ * @param name Name of the method to invoke on hsp_stream [in]
+ * @param arg Arbitrary argument passed to the method name [in]
+ * @return kBlastHSPStream_Error on NULL hsp_stream or NULL method pointer
+ * (i.e.: unimplemented or uninitialized method on the BlastHSPStream
+ * interface) or return value of the implementation.
+ */
+static int
+_MethodDispatcher(BlastHSPStream* hsp_stream, EMethodName name,
+ BlastHSPList** hsp_list)
+{
+ BlastHSPStreamMethod method_fnptr = NULL;
+
+ if (!hsp_stream) {
+ return kBlastHSPStream_Error;
+ }
+
+ ASSERT(name < eMethodBoundary);
+
+ switch (name) {
+ case eRead:
+ method_fnptr = (*hsp_stream->ReadFnPtr);
+ break;
+
+ case eWrite:
+ method_fnptr = (*hsp_stream->WriteFnPtr);
+ break;
+
+ default:
+ abort(); /* should never happen */
+ }
+
+ if (!method_fnptr) {
+ return kBlastHSPStream_Error;
+ }
+
+ return (*method_fnptr)(hsp_stream, hsp_list);
+}
+
+int BlastHSPStreamRead(BlastHSPStream* hsp_stream, BlastHSPList** hsp_list)
+{
+ return _MethodDispatcher(hsp_stream, eRead, hsp_list);
+}
+
+int BlastHSPStreamWrite(BlastHSPStream* hsp_stream, BlastHSPList** hsp_list)
+{
+ return _MethodDispatcher(hsp_stream, eWrite, hsp_list);
+}
+
+/*****************************************************************************/
+
+void* GetData(BlastHSPStream* hsp_stream)
+{
+ if ( !hsp_stream ) {
+ return NULL;
+ }
+
+ return hsp_stream->DataStructure;
+}
+
+int SetData(BlastHSPStream* hsp_stream, void* data)
+{
+ if ( !hsp_stream ) {
+ return kBlastHSPStream_Error;
+ }
+
+ hsp_stream->DataStructure = data;
+
+ return kBlastHSPStream_Success;
+}
+
+int SetMethod(BlastHSPStream* hsp_stream,
+ EMethodName name,
+ BlastHSPStreamFunctionPointerTypes fnptr_type)
+{
+ if ( !hsp_stream ) {
+ return kBlastHSPStream_Error;
+ }
+
+ ASSERT(name < eMethodBoundary);
+
+ switch (name) {
+ case eConstructor:
+ hsp_stream->NewFnPtr = fnptr_type.ctor;
+ break;
+
+ case eDestructor:
+ hsp_stream->DeleteFnPtr = fnptr_type.dtor;
+ break;
+
+ case eRead:
+ hsp_stream->ReadFnPtr = fnptr_type.method;
+ break;
+
+ case eWrite:
+ hsp_stream->WriteFnPtr = fnptr_type.method;
+ break;
+
+ case eClose:
+ hsp_stream->CloseFnPtr = fnptr_type.closeFn;
+ break;
+
+ default:
+ abort(); /* should never happen */
+ }
+
+ return kBlastHSPStream_Success;
+}
diff --git a/algo/blast/core/blast_hspstream.h b/algo/blast/core/blast_hspstream.h
new file mode 100644
index 00000000..357f1fd3
--- /dev/null
+++ b/algo/blast/core/blast_hspstream.h
@@ -0,0 +1,206 @@
+#ifndef ALGO_BLAST_CORE__BLAST_HSPSTREAM_H
+#define ALGO_BLAST_CORE__BLAST_HSPSTREAM_H
+
+/* $Id: blast_hspstream.h,v 1.2 2004/06/07 17:06:33 dondosha Exp $
+ * ===========================================================================
+ *
+ * PUBLIC DOMAIN NOTICE
+ * National Center for Biotechnology Information
+ *
+ * This software/database is a "United States Government Work" under the
+ * terms of the United States Copyright Act. It was written as part of
+ * the author's official duties as a United States Government employee and
+ * thus cannot be copyrighted. This software/database is freely available
+ * to the public for use. The National Library of Medicine and the U.S.
+ * Government have not placed any restriction on its use or reproduction.
+ *
+ * Although all reasonable efforts have been taken to ensure the accuracy
+ * and reliability of the software and data, the NLM and the U.S.
+ * Government do not and cannot warrant the performance or results that
+ * may be obtained by using this software or data. The NLM and the U.S.
+ * Government disclaim all warranties, express or implied, including
+ * warranties of performance, merchantability or fitness for any particular
+ * purpose.
+ *
+ * Please cite the author in any work or product based on this material.
+ *
+ * ===========================================================================
+ *
+ * Author: Christiam Camacho
+ *
+ */
+
+/** @file blast_hspstream.h
+ * Declaration of ADT to save and retrieve lists of HSPs in the BLAST engine.
+ */
+
+#include <algo/blast/core/blast_hits.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/** The BlastHSPStream ADT is an opaque data type that defines a thread-safe
+ * interface which is used by the core BLAST code to save lists of HSPs.
+ * The interface currently provides the following services:
+ * - Management of the ADT (construction, destruction)
+ * - Writing lists of HSPs to the ADT
+ * - Reading lists of HSPs from the ADT
+ * .
+ * The default implementation simply buffers HSPs from one stage of the
+ * algorithm to the next @sa FIXME
+ * Implementations of this interface should provide functions for all
+ * the functions listed above.
+ */
+typedef struct BlastHSPStream BlastHSPStream;
+
+/** Function pointer typedef to create a new BlastHSPStream structure.
+ * First argument is a pointer to the structure to be populated (allocated for
+ * client implementations), second argument should be typecast'd to the
+ * correct type by user-defined constructor function */
+typedef BlastHSPStream* (*BlastHSPStreamConstructor) (BlastHSPStream*, void*);
+
+/** Function pointer typedef to deallocate a BlastHSPStream structure.
+ * Argument is the BlastHSPStream structure to free, always returns NULL. */
+typedef BlastHSPStream* (*BlastHSPStreamDestructor) (BlastHSPStream*);
+
+/** Function pointer typedef to implement the read/write functionality of the
+ * BlastHSPStream. The first argument is the BlastHSPStream structure used,
+ * second argument is the list of HSPs to be saved/read (reading assumes
+ * ownership, writing releases ownership) */
+typedef int (*BlastHSPStreamMethod) (BlastHSPStream*, BlastHSPList**);
+
+/** Function pointer typedef to implement the close functionality of the
+ * BlastHSPStream. Argument is a pointer to the structure to close for
+ * writing.
+ */
+typedef void (*BlastHSPStreamCloseFnType) (BlastHSPStream*);
+
+/*****************************************************************************/
+
+/** Structure that contains the information needed for BlastHSPStreamNew to
+ * fully populate the BlastHSPStream structure it returns */
+typedef struct BlastHSPStreamNewInfo {
+ BlastHSPStreamConstructor constructor; /**< User-defined function to
+ initialize a BlastHSPStream
+ structure */
+ void* ctor_argument; /**< Argument to the above function */
+} BlastHSPStreamNewInfo;
+
+/** Allocates memory for a BlastHSPStream structure and then invokes the
+ * constructor function defined in its first argument, passing the
+ * ctor_argument member of that same structure. If the constructor function
+ * pointer is not set, NULL is returned.
+ * @param bhsn_info Structure defining constructor and its argument to be
+ * invoked from this function [in]
+ */
+BlastHSPStream* BlastHSPStreamNew(const BlastHSPStreamNewInfo* bhsn_info);
+
+/** Frees the BlastHSPStream structure by invoking the destructor function set
+ * by the user-defined constructor function when the structure is initialized
+ * (indirectly, by BlastHSPStreamNew). If the destructor function pointer is not
+ * set, a memory leak could occur.
+ * @param hsp_stream BlastHSPStream to free [in]
+ * @return NULL
+ */
+BlastHSPStream* BlastHSPStreamFree(BlastHSPStream* hsp_stream);
+
+/** Closes the BlastHSPStream structure for writing. Any subsequent attempt
+ * to write to the stream will return error.
+ * @param hsp_stream The stream to close [in] [out]
+ */
+void BlastHSPStreamClose(BlastHSPStream* hsp_stream);
+
+/** Standard error return value for BlastHSPStream methods */
+extern const int kBlastHSPStream_Error;
+
+/** Standard success return value for BlastHSPStream methods */
+extern const int kBlastHSPStream_Success;
+
+/** Return value when the end of the stream is reached (applicable to read
+ * method only) */
+extern const int kBlastHSPStream_Eof;
+
+/** Invokes the user-specified write function for this BlastHSPStream
+ * implementation.
+ * @param hsp_stream The BlastHSPStream object [in]
+ * @param hsp_list List of HSPs for the HSPStream to keep track of. The caller
+ * releases ownership of the hsp_list [in]
+ * @return kBlastHSPStream_Success on success, otherwise kBlastHSPStream_Error
+ */
+int BlastHSPStreamWrite(BlastHSPStream* hsp_stream, BlastHSPList** hsp_list);
+
+/** Invokes the user-specified read function for this BlastHSPStream
+ * implementation.
+ * @param hsp_stream The BlastHSPStream object [in]
+ * @param hsp_list List of HSPs for the HSPStream to return. The caller
+ * acquires ownership of the hsp_list [in]
+ * @return kBlastHSPStream_Success on success, kBlastHSPStream_Error, or
+ * kBlastHSPStream_Eof on end of stream
+ */
+int BlastHSPStreamRead(BlastHSPStream* hsp_stream, BlastHSPList** hsp_list);
+
+/*****************************************************************************/
+/* The following enumeration and function are only of interest to implementors
+ * of this interface */
+
+/** Defines the methods supported by the BlastHSPStream ADT */
+typedef enum EMethodName {
+ eConstructor, /**< Constructor for a BlastHSPStream implementation */
+ eDestructor, /**< Destructor for a BlastHSPStream implementation */
+ eRead, /**< Read from the BlastHSPStream */
+ eWrite, /**< Write to the BlastHSPStream */
+ eClose, /**< Close the BlastHSPStream for writing */
+ eMethodBoundary /**< Limit to facilitate error checking */
+} EMethodName;
+
+/** Union to encapsulate the supported methods on the BlastHSPStream interface
+ */
+typedef union BlastHSPStreamFunctionPointerTypes {
+ /** Used for read/write function pointers */
+ BlastHSPStreamMethod method;
+
+ /** Used for constructor function pointer */
+ BlastHSPStreamConstructor ctor;
+
+ /** Used for destructor function pointer */
+ BlastHSPStreamDestructor dtor;
+ /** Use for close function pointer */
+ BlastHSPStreamCloseFnType closeFn;
+} BlastHSPStreamFunctionPointerTypes;
+
+/** Sets implementation specific data structure
+ * @param hsp_stream structure to initialize [in]
+ * @param data structure to assign to the hsp_stream [in]
+ * @return kBlastHSPStream_Error if hsp_stream is NULL else,
+ * kBlastHSPStream_Success;
+ */
+int SetData(BlastHSPStream* hsp_stream, void* data);
+
+/** Gets implementation specific data structure
+ * @param hsp_stream structure from which to obtain the internal data. It is
+ * expected that the caller (implementation of BlastHSPStream) knows what type
+ * to cast the return value to. [in]
+ * @return pointer to internal data structure of the implementation of the
+ * BlastHSPStream, or NULL if hsp_stream is NULL
+ */
+void* GetData(BlastHSPStream* hsp_stream);
+
+/** Use this function to set the pointers to functions implementing the various
+ * methods supported in the BlastHSPStream interface
+ * @param hsp_stream structure to initialize [in]
+ * @param name method for which a function pointer is being provided [in]
+ * @param fnptr_type union containing the pointer to the function specified by
+ * name [in]
+ * @return kBlastHSPStream_Error if hsp_stream is NULL else,
+ * kBlastHSPStream_Success;
+ */
+int SetMethod(BlastHSPStream* hsp_stream,
+ EMethodName name,
+ BlastHSPStreamFunctionPointerTypes fnptr_type);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* ALGO_BLAST_CORE__BLAST_HSPSTREAM_H */
diff --git a/algo/blast/core/blast_inline.h b/algo/blast/core/blast_inline.h
index c4568f2b..d328d44e 100644
--- a/algo/blast/core/blast_inline.h
+++ b/algo/blast/core/blast_inline.h
@@ -1,3 +1,34 @@
+/* $Id: blast_inline.h,v 1.2 2004/05/19 14:52:02 camacho Exp $
+ * ===========================================================================
+ *
+ * PUBLIC DOMAIN NOTICE
+ * National Center for Biotechnology Information
+ *
+ * This software/database is a "United States Government Work" under the
+ * terms of the United States Copyright Act. It was written as part of
+ * the author's offical duties as a United States Government employee and
+ * thus cannot be copyrighted. This software/database is freely available
+ * to the public for use. The National Library of Medicine and the U.S.
+ * Government have not placed any restriction on its use or reproduction.
+ *
+ * Although all reasonable efforts have been taken to ensure the accuracy
+ * and reliability of the software and data, the NLM and the U.S.
+ * Government do not and cannot warrant the performance or results that
+ * may be obtained by using this software or data. The NLM and the U.S.
+ * Government disclaim all warranties, express or implied, including
+ * warranties of performance, merchantability or fitness for any particular
+ * purpose.
+ *
+ * Please cite the author in any work or product based on this material.
+ *
+ * ===========================================================================
+ *
+ */
+
+/** @file blast_inline.h
+ * @todo FIXME needs file description
+ */
+
#include <algo/blast/core/mb_lookup.h>
#include <algo/blast/core/blast_util.h>
diff --git a/algo/blast/core/blast_kappa.c b/algo/blast/core/blast_kappa.c
new file mode 100644
index 00000000..2f04aa14
--- /dev/null
+++ b/algo/blast/core/blast_kappa.c
@@ -0,0 +1,2731 @@
+/* $Id: blast_kappa.c,v 1.13 2004/06/10 13:25:02 madden Exp $
+ * ==========================================================================
+ *
+ * PUBLIC DOMAIN NOTICE
+ * National Center for Biotechnology Information
+ *
+ * This software/database is a "United States Government Work" under the
+ * terms of the United States Copyright Act. It was written as part of
+ * the author's official duties as a United States Government employee and
+ * thus cannot be copyrighted. This software/database is freely available
+ * to the public for use. The National Library of Medicine and the U.S.
+ * Government have not placed any restriction on its use or reproduction.
+ *
+ * Although all reasonable efforts have been taken to ensure the accuracy
+ * and reliability of the software and data, the NLM and the U.S.
+ * Government do not and cannot warrant the performance or results that
+ * may be obtained by using this software or data. The NLM and the U.S.
+ * Government disclaim all warranties, express or implied, including
+ * warranties of performance, merchantability or fitness for any particular
+ * purpose.
+ *
+ * Please cite the author in any work or product based on this material.
+ *
+ * ===========================================================================
+ *
+ * Authors: Alejandro Schaffer, Mike Gertz (ported to algo/blast by Tom Madden)
+ *
+ */
+
+/** @file blast_kappa.c
+ * Utilities for doing Smith-Waterman alignments and adjusting the scoring
+ * system for each match in blastpgp
+ */
+
+static char const rcsid[] =
+ "$Id: blast_kappa.c,v 1.13 2004/06/10 13:25:02 madden Exp $";
+
+#include <algo/blast/core/blast_def.h>
+#include <algo/blast/core/blast_hits.h>
+#include <algo/blast/core/blast_stat.h>
+#include <algo/blast/core/blast_kappa.h>
+#include <algo/blast/core/blast_util.h>
+#include <algo/blast/core/blast_gapalign.h>
+#include <algo/blast/core/blast_traceback.h>
+#include <algo/blast/core/blast_filter.h>
+#include "blast_psi_priv.h"
+#include "matrix_freq_ratios.h"
+#include "blast_gapalign_pri.h"
+
+
+
+#define EVALUE_STRETCH 5 /*by what factor might initially reported E-value
+ exceed true Evalue*/
+
+#define PRO_TRUE_ALPHABET_SIZE 20
+#define scoreRange 10000
+
+
+
+
+/*positions of true characters in protein alphabet*/
+Int4 trueCharPositions[20] = {1,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,22};
+
+
+/** Structure used for full Smith-Waterman results.
+*/
+typedef struct SWResults {
+ struct SWResults *next; /**< next object in list */
+ Uint1* seq; /**< match sequence. */
+ Int4 seqStart; /**< start of alignment on match */
+ Int4 seqEnd; /**< end of alignment on match */
+ Int4 queryStart; /**< start of alignment on query */
+ Int4 queryEnd; /**< end of alignment on query */
+ Int4 score; /**< score of alignment */
+ double eValue; /**< best expect value for this match record */
+ double eValueThisAlign; /**< expect value of this alignment. */
+ double Lambda; /**< Karlin-Altschul parameter. */
+ double logK; /**< log of Karlin-Altschul parameter */
+ Boolean isFirstAlignment; /**< TRUE if first alignment for this sequence */
+ Int4 subject_index; /**< ordinal ID of match sequence, needed to break
+ ties on rare occasions */
+ BlastHSP* hsp; /**< Saves alignment informaiton for conversion to SeqAlign. */
+} SWResults;
+
+/**
+ * Frees the linked-list of SWResults. Does not deallocate the BlastHSP
+ * on the SWResults as that is saved elsewhere.
+ * @param sw_results the head of the linked list to be freed [in]
+ * @return NULL pointer
+*/
+static SWResults* SWResultsFree(SWResults* sw_results)
+{
+ SWResults *current, *next;
+
+ next = current = sw_results;
+ while (current)
+ {
+ next = current->next;
+ sfree(current);
+ current = next;
+ }
+
+ return NULL;
+}
+
+/**
+ * SWResultsNew Create a new instance of the SWResults struct, initializing
+ * it with values common to different kinds of searches
+ * The parameters of this function correspond directly to fields
+ * in the SWResults data structure.
+ * @param sequence match sequence [in]
+ * @param score score of match [in]
+ * @param newEvalue expect value of this alignment [in]
+ * @param bestEvalue lowest expect value of this match sequence [in]
+ * @param isFirstAlignment TRUE if first alignment for this sequence [in]
+ * @param lambda Karlin-Altschul parameter [in]
+ * @param logK log of Karlin-Altschul parameter [in]
+ * @param subject_index ordinal ID of match sequence [in]
+ */
+static SWResults *
+SWResultsNew(Uint1* sequence,
+ Int4 score,
+ double newEvalue,
+ double bestEvalue,
+ Boolean isFirstAlignment,
+ double lambda,
+ double logK,
+ Int4 subject_index)
+{
+ SWResults *newSW; /* The newly created instance of SWResults */
+
+ newSW = (SWResults *) calloc(1, sizeof(SWResults));
+ if(newSW) {
+ newSW->seq = sequence;
+ newSW->score = score;
+ newSW->eValue = bestEvalue;
+ newSW->Lambda = lambda;
+ newSW->logK = logK;
+
+ newSW->eValueThisAlign = newEvalue;
+ newSW->isFirstAlignment = isFirstAlignment;
+ newSW->subject_index = subject_index;
+
+ newSW->next = NULL;
+ }
+ return newSW;
+}
+
+
+/**
+ * An instance of struct Kappa_MatchRecord represents all alignments
+ * of a query sequence to a matching subject sequence.
+ *
+ * For a given query-subject pair, a Kappa_MatchRecord is created once it
+ * is known that the eValue of the best alignment is small enough to be
+ * significant. Then alignments of the two sequences are added to the
+ * Kappa_MatchRecord one at a time, using one of the following two routines
+ *
+ * - Kappa_MatchRecordInsertHSP inserts the alignment represented
+ * by a single HSP into the match record.
+ * - Kappa_MatchRecordInsertSwAlign inserts an alignment computed by
+ * the Smith-Waterman algorithm into the match record.
+ *
+ * Alignments should be specified in order of smallest (best) e-value to
+ * largest (worst) e-value.
+ *
+ * The Kappa_MatchRecord::alignments field stores the alignments in
+ * the reverse order, i.e. from largest (worst) e-value to smallest
+ * (best) e-value. The reason the alignments are stored in reverse
+ * order is that this order is consistent with the order that matches
+ * are returned by a SWheap (see below), i.e. worst to best.
+ */
+
+struct Kappa_MatchRecord {
+ double eValue; /**< best evalue of all alignments the record */
+ Int4 score; /**< best score of all alignments the record */
+ Uint1* sequence; /**< the subject sequence */
+ Int4 subject_index; /**< the index number of the subject sequence */
+ SWResults *alignments; /**< a list of query-subject alignments */
+};
+typedef struct Kappa_MatchRecord Kappa_MatchRecord;
+
+
+/** Initialize a Kappa_MatchRecord. Parameters to this function correspond
+ * directly to fields of Kappa_MatchRecord.
+ * @param self the record to be modified [in][out]
+ * @param eValue expect value of this alignment [in]
+ * @param score score of match [in]
+ * @param sequence match sequence [in]
+ * @param subject_index ordinal ID of sequence in database [in]
+ */
+static void
+Kappa_MatchRecordInitialize(Kappa_MatchRecord * self,
+ double eValue,
+ Int4 score,
+ Uint1* sequence,
+ Int4 subject_index)
+{
+ self->eValue = eValue;
+ self->score = score;
+ self->sequence = sequence;
+ self->subject_index = subject_index;
+ self->alignments = NULL;
+}
+
+
+/** The following procedure computes the number of identities in an
+ * alignment of query_seq to the matching sequence stored in
+ * SWAlign. The alignment is encoded in gap_info
+ * @param SWAlign input structure holding HSP to be modified [in][out]
+ * @param query_seq Query sequence used for calculation [in]
+ */
+static Int2 SWAlignGetNumIdentical(SWResults *SWAlign, Uint1* query_seq)
+{
+ Int4 num_ident; /*number of identities to return*/
+ Int4 align_length; /*aligned length, calculated but discarded. */
+
+ Blast_HSPGetNumIdentities(query_seq, SWAlign->seq,
+ SWAlign->hsp, TRUE, &num_ident, &align_length);
+
+ SWAlign->hsp->num_ident = num_ident;
+ return 0;
+}
+
+/**
+ * Insert an alignment represented by a seqAlign into the match
+ * record.
+ * @param self the match record to be modified [in][out]
+ * @param hsp contains alignment and scoring information,
+ * will be NULLed out [in][out]
+ * @param lambda a statistical parameter used to evaluate the significance of the
+ * match [in]
+ * @param logK a statistical parameter used to evaluate the significance of the
+ * match [in]
+ * @param localScalingFactor the factor by which the scoring system has been
+ * scaled in order to obtain greater precision [in]
+ * @param query_seq Used to calculate percent identity [in]
+ */
+static void
+Kappa_MatchRecordInsertHSP(
+ Kappa_MatchRecord * self,
+ BlastHSP* *hsp,
+ double lambda,
+ double logK,
+ double localScalingFactor,
+ Uint1* query_seq
+) {
+ SWResults *newSW; /* A new SWResults object that
+ represents the alignment to be
+ inserted */
+
+ newSW =
+ SWResultsNew(self->sequence, self->score,
+ (*hsp)->evalue, self->eValue, (Boolean) (NULL == self->alignments),
+ localScalingFactor * lambda, logK,
+ self->subject_index);
+
+ newSW->queryStart = (*hsp)->gap_info->start1;
+ newSW->seqStart = (*hsp)->gap_info->start2;
+ newSW->hsp = *hsp;
+ *hsp = NULL; /* Information stored on SWResults now. */
+ SWAlignGetNumIdentical(newSW, query_seq); /* Calculate num identities, attach to HSP. */
+ newSW->next = self->alignments;
+
+ self->alignments = newSW;
+}
+
+
+/**
+ * Insert an alignment computed by the Smith-Waterman algorithm into
+ * the match record.
+ * @param self the match record to be modified [in][out]
+ * @param newScore the score of the alignment [in]
+ * @param newEvalue the expect value of the alignment [in]
+ * @param lambda a statistical parameter used to evaluate the significance of the
+ * match [in]
+ * @param logK a statistical parameter used to evaluate the significance of the
+ * match [in]
+ * @param localScalingFactor the factor by which the scoring system has been
+ * scaled in order to obtain greater precision [in]
+ * @param matchStart start of the alignment in the subject [in]
+ * @param matchAlignmentExtent length of the alignment in the subject [in]
+ * @param queryStart start of the alignment in the query [in]
+ * @param queryAlignmentExtent length of the alignment in the query [in]
+ * @param reverseAlignScript Alignment information (script) returned by
+ * the X-drop alignment algorithm [in]
+ * @param query_seq Used to calculate percent identity [in]
+ */
+static void
+Kappa_MatchRecordInsertSwAlign(
+ Kappa_MatchRecord * self,
+ Int4 newScore,
+ double newEvalue,
+ double lambda,
+ double logK,
+ double localScalingFactor,
+ Int4 matchStart,
+ Int4 matchAlignmentExtent,
+ Int4 queryStart,
+ Int4 queryAlignmentExtent,
+ Int4 * reverseAlignScript,
+ Uint1* query_seq
+) {
+ SWResults *newSW; /* A new SWResults object that
+ represents the alignment to be
+ inserted */
+ GapEditBlock* editBlock=NULL; /* Contains representation of traceback. */
+
+ if(NULL == self->alignments) {
+ /* This is the first sequence recorded for this match. Use the x-drop
+ * score, "newScore", as the score for the sequence */
+ self->score = newScore;
+ }
+ newSW =
+ SWResultsNew(self->sequence, self->score, newEvalue,
+ self->eValue, (Boolean) (NULL == self->alignments),
+ lambda * localScalingFactor, logK, self->subject_index);
+
+ newSW->seqStart = matchStart;
+ newSW->seqEnd = matchStart + matchAlignmentExtent;
+ newSW->queryStart = queryStart;
+ newSW->queryEnd = queryStart + queryAlignmentExtent;
+ newSW->next = self->alignments;
+
+ BLAST_TracebackToGapEditBlock(reverseAlignScript, queryAlignmentExtent, matchAlignmentExtent,
+ queryStart, matchStart, &editBlock);
+
+ Blast_HSPInit(queryStart, queryStart + queryAlignmentExtent,
+ matchStart, matchStart + matchAlignmentExtent,
+ 0, 0, 0, 0, newScore, &editBlock, &(newSW->hsp));
+ newSW->hsp->evalue = newEvalue;
+
+ SWAlignGetNumIdentical(newSW, query_seq); /* Calculate num identities, attach to HSP. */
+
+ self->alignments = newSW;
+}
+
+
+/**
+ * The struct SWheapRecord data type is used below to define the
+ * internal structure of a SWheap (see below). A SWheapRecord
+ * represents all alignments of a query sequence to a particular
+ * matching sequence.
+ *
+ * The SWResults::theseAlignments field is a linked list of alignments
+ * of the query-subject pair. The list is ordered by evalue in
+ * descending order. Thus the first element has biggest (worst) evalue
+ * and the last element has smallest (best) evalue.
+ */
+typedef struct SWheapRecord {
+ double bestEvalue; /* best (smallest) evalue of all alignments
+ * in the record */
+ SWResults *theseAlignments; /* a list of alignments */
+} SWheapRecord;
+
+
+/** Compare two records in the heap.
+ * @param place1 the first record to be compared [in]
+ * @param place2 the other record to be compared [in]
+ */
+static Boolean
+SWheapRecordCompare(SWheapRecord * place1,
+ SWheapRecord * place2)
+{
+ return ((place1->bestEvalue > place2->bestEvalue) ||
+ (place1->bestEvalue == place2->bestEvalue &&
+ place1->theseAlignments->subject_index >
+ place2->theseAlignments->subject_index));
+}
+
+
+/** swap two records in the heap
+ * @param heapArray holds the records to be swapped [in][out]
+ * @param i the first record to be swapped [in]
+ * @param j the other record to be swapped [in]
+ */
+static void
+SWheapRecordSwap(SWheapRecord * heapArray,
+ Int4 i,
+ Int4 j)
+{
+ /* bestEvalue and theseAlignments are temporary variables used to
+ * perform the swap. */
+ double bestEvalue = heapArray[i].bestEvalue;
+ SWResults *theseAlignments = heapArray[i].theseAlignments;
+
+ heapArray[i].bestEvalue = heapArray[j].bestEvalue;
+ heapArray[i].theseAlignments = heapArray[j].theseAlignments;
+
+ heapArray[j].bestEvalue = bestEvalue;
+ heapArray[j].theseAlignments = theseAlignments;
+}
+
+
+#ifdef KAPPA_INTENSE_DEBUG
+
+/**
+ * Verifies that the array heapArray[i] .. heapArray[n] is ordered so
+ * as to be a valid heap. This routine checks every element in the array,
+ * an so is very time consuming. It is for debugging purposes only.
+ */
+static Boolean
+SWheapIsValid(SWheapRecord * heapArray,
+ Int4 i,
+ Int4 n)
+{
+ /* indices of nodes to the left and right of node i */
+ Int4 left = 2 * i, right = 2 * i + 1;
+
+ if(right <= n) {
+ return !SWheapRecordCompare(&(heapArray[right]), &(heapArray[i])) &&
+ SWheapIsValid(heapArray, right, n);
+ }
+ if(left <= n) {
+ return !SWheapRecordCompare(&(heapArray[left]), &(heapArray[i])) &&
+ SWheapIsValid(heapArray, left, n);
+ }
+ return TRUE;
+}
+
+#define KAPPA_ASSERT(expr) ((expr) ? 0 : \
+(fprintf( stderr, "KAPPA_ASSERT failed line %d: %s", __LINE__, #expr ), \
+exit(1)))
+#else
+#define KAPPA_ASSERT(expr) (void)(0)
+#endif
+
+
+/** On entry, all but the first element of the array heapArray[i]
+ * .. heapArray[n] are in valid heap order. This routine rearranges
+ * the elements so that on exit they all are in heap order.
+ * @param heapArray holds the heap [in][out]
+ * @param i ?? [in]
+ * @param n ?? [in]
+ */
+static void
+SWheapifyDown(SWheapRecord * heapArray,
+ Int4 i,
+ Int4 n)
+{
+ Boolean moreswap = TRUE; /* is more swapping needed */
+ Int4 left, right, largest; /* placeholders for indices in swapping */
+ do {
+ left = 2 * i;
+ right = 2 * i + 1;
+ if((left <= n) &&
+ (SWheapRecordCompare(&(heapArray[left]), &(heapArray[i]))))
+ largest = left;
+ else
+ largest = i;
+ if((right <= n) &&
+ (SWheapRecordCompare(&(heapArray[right]), &(heapArray[largest]))))
+ largest = right;
+ if(largest != i) {
+ SWheapRecordSwap(heapArray, i, largest);
+ /* push largest up the heap */
+ i = largest; /* check next level down */
+ } else
+ moreswap = FALSE;
+ } while(moreswap); /* function builds the heap */
+ KAPPA_ASSERT(SWheapIsValid(heapArray, i, n));
+}
+
+
+/** On entry, all but the last element of the array heapArray[i]
+ * .. heapArray[n] are in valid heap order. This routine rearranges
+ * the elements so that on exit they all are in heap order.
+ * @param heapArray holds the heap [in][out]
+ * @param i the largest element to work with [in]
+ * @param n the largest element in the heap [in]
+ */
+static void
+SWheapifyUp(SWheapRecord * heapArray,
+ Int4 i,
+ Int4 n)
+{
+ Int4 parent = i / 2; /* index to the node that is the
+ parent of node i */
+ while(parent >= 1 &&
+ SWheapRecordCompare(&(heapArray[i]), &(heapArray[parent]))){
+ SWheapRecordSwap(heapArray, i, parent);
+
+ i = parent;
+ parent /= 2;
+ }
+ KAPPA_ASSERT(SWheapIsValid(heapArray, 1, n));
+}
+
+/** A SWheap represents a collection of alignments between one query
+ * sequence and several matching subject sequences.
+ *
+ * Each matching sequence is allocated one record in a SWheap. The
+ * eValue of a query-subject pair is the best (smallest positive)
+ * evalue of all alignments between the two sequences.
+ *
+ * A match will be inserted in the the SWheap if:
+ * - there are fewer that SWheap::heapThreshold elements in the SWheap;
+ * - the eValue of the match is <= SWheap::ecutoff; or
+ * - the eValue of the match is less than the largest (worst) eValue
+ * already in the SWheap.
+ *
+ * If there are >= SWheap::heapThreshold matches already in the SWheap
+ * when a new match is to be inserted, then the match with the largest
+ * (worst) eValue is removed, unless the largest eValue <=
+ * SWheap::ecutoff. Matches with eValue <= SWheap::ecutoff are never
+ * removed by the insertion routine. As a consequence, the SWheap can
+ * hold an arbitrarily large number of matches, although it is
+ * atypical for the number of matches to be greater than
+ * SWheap::heapThreshold.
+ *
+ * Once all matches have been collected, the SWheapToFlatList routine
+ * may be invoked to return a list of all alignments. (see below).
+ *
+ * While the number of elements in a heap < SWheap::heapThreshold, the
+ * SWheap is implemented as an unordered array, rather than a
+ * heap-ordered array. The SWheap is converted to a heap-ordered
+ * array as soon as it becomes necessary to order the matches by
+ * evalue. The routines that operate on a SWheap should behave
+ * properly whichever state the SWheap is in.
+ */
+struct SWheap {
+ Int4 n; /**< The current number of elements */
+ Int4 capacity; /**< The maximum number of elements that may be
+ inserted before the SWheap must be resized */
+ Int4 heapThreshold; /**< see above */
+ double ecutoff; /**< matches with evalue below ecutoff may
+ always be inserted in the SWheap */
+ double worstEvalue; /**< the worst (biggest) evalue currently in
+ the heap */
+
+ SWheapRecord *array; /**< the SWheapRecord array if the SWheap is
+ being represented as an unordered array */
+ SWheapRecord *heapArray; /**< the SWheapRecord array if the SWheap is
+ being represented as an heap-ordered
+ array. At least one of (array, heapArray)
+ is NULL */
+
+};
+typedef struct SWheap SWheap;
+
+
+/** Convert a SWheap from a representation as an unordered array to
+ * a representation as a heap-ordered array.
+ * @param self record to be modified [in][out]
+ */
+static void
+ConvertToHeap(SWheap * self)
+{
+ if(NULL != self->array) {
+ Int4 i; /* heap node index */
+ Int4 n; /* number of elements in the heap */
+ /* We aren't already a heap */
+ self->heapArray = self->array;
+ self->array = NULL;
+
+ n = self->n;
+ for(i = n / 2; i >= 1; --i) {
+ SWheapifyDown(self->heapArray, i, n);
+ }
+ }
+ KAPPA_ASSERT(SWheapIsValid(self->heapArray, 1, self->n));
+}
+
+/*When the heap is about to exceed its capacity, it will be grown by
+ *the minimum of a multiplicative factor of SWHEAP_RESIZE_FACTOR
+ *and an additive factor of SWHEAP_MIN_RESIZE. The heap never
+ *decreases in size */
+#define SWHEAP_RESIZE_FACTOR 1.5
+#define SWHEAP_MIN_RESIZE 100
+
+/** Return true if self would insert a match that had the given eValue
+ * @param self record to be modified [in][out]
+ * @param eValue specified expect value [in]
+ */
+static Boolean
+SWheapWouldInsert(SWheap * self,
+ double eValue)
+{
+ return self->n < self->heapThreshold ||
+ eValue <= self->ecutoff ||
+ eValue < self->worstEvalue;
+}
+
+
+/** Try to insert matchRecord into the SWheap. The alignments stored in
+ * matchRecord are used directly, i.e. they are not copied, but are
+ * rather stored in the SWheap or deleted
+ * @param self record to be modified [in][out]
+ * @param matchRecord record to be inserted [in]
+ */
+static void
+SWheapInsert(SWheap * self,
+ Kappa_MatchRecord * matchRecord)
+{
+ if(self->array && self->n >= self->heapThreshold) {
+ ConvertToHeap(self);
+ }
+ if(self->array != NULL) {
+ /* "self" is currently a list. Add the new alignments to the end */
+ SWheapRecord *heapRecord; /* destination for the new alignments */
+ heapRecord = &self->array[++self->n];
+ heapRecord->bestEvalue = matchRecord->eValue;
+ heapRecord->theseAlignments = matchRecord->alignments;
+ if( self->worstEvalue < matchRecord->eValue ) {
+ self->worstEvalue = matchRecord->eValue;
+ }
+ } else { /* "self" is currently a heap */
+ if(self->n < self->heapThreshold ||
+ (matchRecord->eValue <= self->ecutoff &&
+ self->worstEvalue <= self->ecutoff)) {
+ SWheapRecord *heapRecord; /* Destination for the new alignments */
+ /* The new alignments must be inserted into the heap, and all old
+ * alignments retained */
+ if(self->n >= self->capacity) {
+ /* The heap must be resized */
+ Int4 newCapacity; /* capacity the heap will have after
+ * it is resized */
+ newCapacity = MAX(SWHEAP_MIN_RESIZE + self->capacity,
+ (Int4) (SWHEAP_RESIZE_FACTOR * self->capacity));
+ self->heapArray = (SWheapRecord *)
+ realloc(self->heapArray, (newCapacity + 1) * sizeof(SWheapRecord));
+ self->capacity = newCapacity;
+ }
+ /* end if the heap must be resized */
+ heapRecord = &self->heapArray[++self->n];
+ heapRecord->bestEvalue = matchRecord->eValue;
+ heapRecord->theseAlignments = matchRecord->alignments;
+
+ SWheapifyUp(self->heapArray, self->n, self->n);
+ } else {
+ /* Some set of alignments must be discarded */
+ SWResults *discardedAlignments = NULL; /* alignments that
+ * will be discarded
+ * so that the new
+ * alignments may be
+ * inserted. */
+
+ if(matchRecord->eValue >= self->worstEvalue) {
+ /* the new alignments must be discarded */
+ discardedAlignments = matchRecord->alignments;
+ } else {
+ /* the largest element in the heap must be discarded */
+ SWheapRecord *heapRecord; /* destination for the new alignments */
+ discardedAlignments = self->heapArray[1].theseAlignments;
+
+ heapRecord = &self->heapArray[1];
+ heapRecord->bestEvalue = matchRecord->eValue;
+ heapRecord->theseAlignments = matchRecord->alignments;
+
+ SWheapifyDown(self->heapArray, 1, self->n);
+ }
+ /* end else the largest element in the heap must be discarded */
+ while(discardedAlignments != NULL) {
+ /* There are discarded alignments that have not been freed */
+ SWResults *thisAlignment; /* the head of the list of
+ * discarded alignments */
+ thisAlignment = discardedAlignments;
+ discardedAlignments = thisAlignment->next;
+ sfree(thisAlignment);
+ }
+ /* end while there are discarded alignments that have not been freed */
+ }
+ /* end else some set of alignments must be discarded */
+
+ self->worstEvalue = self->heapArray[1].bestEvalue;
+ KAPPA_ASSERT(SWheapIsValid(self->heapArray, 1, self->n));
+ }
+ /* end else "self" is currently a heap. */
+
+ /* The matchRecord->alignments pointer is no longer valid */
+ matchRecord->alignments = NULL;
+}
+
+
+/** Return true if only matches with evalue <= self->ecutoff
+ * may be inserted.
+ * @param self heap containing data [in]
+ */
+static Boolean
+SWheapWillAcceptOnlyBelowCutoff(SWheap * self)
+{
+ return self->n >= self->heapThreshold && self->worstEvalue <= self->ecutoff;
+}
+
+
+/** Initialize a new SWheap; parameters to this function correspond
+ * directly to fields in the SWheap
+ * @param self the object to be filled [in|out]
+ * @param capacity size of heap [in]
+ * @param heapThreshold items always inserted if fewer than this number in heap [in]
+ * @param ecutoff items with a expect value less than this will always be inserted into heap [in]
+ */
+static void
+SWheapInitialize(SWheap * self,
+ Int4 capacity,
+ Int4 heapThreshold,
+ double ecutoff)
+{
+ self->n = 0;
+ self->heapThreshold = heapThreshold;
+ self->ecutoff = ecutoff;
+ self->heapArray = NULL;
+ self->capacity = 0;
+ self->worstEvalue = 0;
+ /* Begin life as a list */
+ self->array =
+ (SWheapRecord *) calloc(1, (capacity + 1) * sizeof(SWheapRecord));
+ self->capacity = capacity;
+}
+
+
+/** Release the storage associated with the fields of a SWheap. Don't
+ * delete the SWheap structure itself.
+ * @param self record to be cleared [in][out]
+ */
+static void
+SWheapRelease(SWheap * self)
+{
+ if(self->heapArray) free(self->heapArray);
+ if(self->array) free(self->array);
+
+ self->n = self->capacity = self->heapThreshold = 0;
+ self->heapArray = NULL;
+}
+
+
+/** Remove and return the element in the SWheap with largest (worst) evalue
+ * @param self heap that contains record to be removed [in]
+ * @return record that was removed
+ */
+static SWResults *
+SWheapPop(SWheap * self)
+{
+ SWResults *results = NULL;
+
+ ConvertToHeap(self);
+ if(self->n > 0) { /* The heap is not empty */
+ SWheapRecord *first, *last; /* The first and last elements of the
+ * array that represents the heap. */
+ first = &self->heapArray[1];
+ last = &self->heapArray[self->n];
+
+ results = first->theseAlignments;
+
+ first->theseAlignments = last->theseAlignments;
+ first->bestEvalue = last->bestEvalue;
+
+ SWheapifyDown(self->heapArray, 1, --self->n);
+ }
+
+ KAPPA_ASSERT(SWheapIsValid(self->heapArray, 1, self->n));
+
+ return results;
+}
+
+
+/** Convert a SWheap to a flat list of SWResults. Note that there
+ * may be more than one alignment per match. The list of all
+ * alignments are sorted by the following keys:
+ * - First by the evalue the best alignment between the query and a
+ * particular matching sequence;
+ * - Second by the subject_index of the matching sequence; and
+ * - Third by the evalue of each individual alignment.
+ * @param self heap to be "flattened" [in]
+ * @return "flattened" version of the input
+ */
+static SWResults *
+SWheapToFlatList(SWheap * self)
+{
+ SWResults *list = NULL; /* the new list of SWResults */
+ SWResults *result; /* the next list of alignments to be
+ prepended to "list" */
+
+ while(NULL != (result = SWheapPop(self))) {
+ SWResults *head, *remaining; /* The head and remaining
+ elements in a list of
+ alignments to be prepended to
+ "list" */
+ remaining = result;
+ while(NULL != (head = remaining)) {
+ remaining = head->next;
+ head->next = list;
+ list = head;
+ }
+ }
+
+ return list;
+}
+
+/** keeps one row of the Smith-Waterman matrix
+ */
+typedef struct SWpairs {
+ Int4 noGap;
+ Int4 gapExists;
+} SWpairs;
+
+
+/** computes Smith-Waterman local alignment score and returns the
+ * evalue
+ *
+ * @param matchSeq is a database sequence matched by this query [in]
+ * @param matchSeqLength is the length of matchSeq in amino acids [in]
+ * @param query is the input query sequence [in]
+ * @param queryLength is the length of query [in]
+ * @param matrix is the position-specific matrix associated with query [in]
+ * @param gapOpen is the cost of opening a gap [in]
+ * @param gapExtend is the cost of extending an existing gap by 1 position [in]
+ * @param matchSeqEnd returns the final position in the matchSeq of an optimal
+ * local alignment [in]
+ * @param queryEnd returns the final position in query of an optimal
+ * local alignment [in]
+ * matchSeqEnd and queryEnd can be used to run the local alignment in reverse
+ * to find optimal starting positions [in]
+ * @param score is used to pass back the optimal score [in]
+ * @param kbp holds the Karlin-Altschul parameters [in]
+ * @param effSearchSpace effective search space for calculation of expect value [in]
+ * @param positionSpecific determines whether matrix is position specific or not [in]
+ * @return the expect value of the alignment
+*/
+
+static double BLbasicSmithWatermanScoreOnly(Uint1 * matchSeq,
+ Int4 matchSeqLength, Uint1 *query, Int4 queryLength, Int4 **matrix,
+ Int4 gapOpen, Int4 gapExtend, Int4 *matchSeqEnd, Int4 *queryEnd, Int4 *score,
+ Blast_KarlinBlk* kbp, Int8 effSearchSpace, Boolean positionSpecific)
+{
+
+ Int4 bestScore; /*best score seen so far*/
+ Int4 newScore; /* score of next entry*/
+ Int4 bestMatchSeqPos, bestQueryPos; /*position ending best score in
+ matchSeq and query sequences*/
+ SWpairs *scoreVector; /*keeps one row of the Smith-Waterman matrix
+ overwrite old row with new row*/
+ Int4 *matrixRow; /*one row of score matrix*/
+ Int4 newGapCost; /*cost to have a gap of one character*/
+ Int4 prevScoreNoGapMatchSeq; /*score one row and column up
+ with no gaps*/
+ Int4 prevScoreGapMatchSeq; /*score if a gap already started in matchSeq*/
+ Int4 continueGapScore; /*score for continuing a gap in matchSeq*/
+ Int4 matchSeqPos, queryPos; /*positions in matchSeq and query*/
+ double returnEvalue; /*e-value to return*/
+
+
+ scoreVector = (SWpairs *) calloc(1, matchSeqLength * sizeof(SWpairs));
+ bestMatchSeqPos = 0;
+ bestQueryPos = 0;
+ bestScore = 0;
+ newGapCost = gapOpen + gapExtend;
+ for (matchSeqPos = 0; matchSeqPos < matchSeqLength; matchSeqPos++) {
+ scoreVector[matchSeqPos].noGap = 0;
+ scoreVector[matchSeqPos].gapExists = -(gapOpen);
+ }
+ for(queryPos = 0; queryPos < queryLength; queryPos++) {
+ if (positionSpecific)
+ matrixRow = matrix[queryPos];
+ else
+ matrixRow = matrix[query[queryPos]];
+ newScore = 0;
+ prevScoreNoGapMatchSeq = 0;
+ prevScoreGapMatchSeq = -(gapOpen);
+ for(matchSeqPos = 0; matchSeqPos < matchSeqLength; matchSeqPos++) {
+ /*testing scores with a gap in matchSeq, either starting a new
+ gap or extending an existing gap*/
+ if ((newScore = newScore - newGapCost) >
+ (prevScoreGapMatchSeq = prevScoreGapMatchSeq - gapExtend))
+ prevScoreGapMatchSeq = newScore;
+ /*testing scores with a gap in query, either starting a new
+ gap or extending an existing gap*/
+ if ((newScore = scoreVector[matchSeqPos].noGap - newGapCost) >
+ (continueGapScore = scoreVector[matchSeqPos].gapExists - gapExtend))
+ continueGapScore = newScore;
+ /*compute new score extending one position in matchSeq and query*/
+ newScore = prevScoreNoGapMatchSeq + matrixRow[matchSeq[matchSeqPos]];
+ if (newScore < 0)
+ newScore = 0; /*Smith-Waterman locality condition*/
+ /*test two alternatives*/
+ if (newScore < prevScoreGapMatchSeq)
+ newScore = prevScoreGapMatchSeq;
+ if (newScore < continueGapScore)
+ newScore = continueGapScore;
+ prevScoreNoGapMatchSeq = scoreVector[matchSeqPos].noGap;
+ scoreVector[matchSeqPos].noGap = newScore;
+ scoreVector[matchSeqPos].gapExists = continueGapScore;
+ if (newScore > bestScore) {
+ bestScore = newScore;
+ bestQueryPos = queryPos;
+ bestMatchSeqPos = matchSeqPos;
+ }
+ }
+ }
+ sfree(scoreVector);
+ if (bestScore < 0)
+ bestScore = 0;
+ *matchSeqEnd = bestMatchSeqPos;
+ *queryEnd = bestQueryPos;
+ *score = bestScore;
+ returnEvalue = BLAST_KarlinStoE_simple(bestScore,kbp, effSearchSpace);
+ return(returnEvalue);
+}
+
+/** computes where optimal Smith-Waterman local alignment starts given the
+ * ending positions and score
+ * matchSeqEnd and queryEnd can be used to run the local alignment in reverse
+ * to find optimal starting positions
+ * these are passed back in matchSeqStart and queryStart
+ * the optimal score is passed in to check when it has
+ * been reached going backwards
+ * the score is also returned
+ * @param matchSeq is a database sequence matched by this query [in]
+ * @param matchSeqLength is the length of matchSeq in amino acids [in]
+ * @param query is the input query sequence [in]
+ * @param matrix is the position-specific matrix associated with query
+ * or the standard matrix [in]
+ * @param gapOpen is the cost of opening a gap [in]
+ * @param gapExtend is the cost of extending an existing gap by 1 position [in]
+ * @param matchSeqEnd is the final position in the matchSeq of an optimal
+ * local alignment [in]
+ * @param queryEnd is the final position in query of an optimal
+ * local alignment [in]
+ * @param score optimal score to be obtained [in]
+ * @param matchSeqStart starting point of optimal alignment [out]
+ * @param queryStart starting point of optimal alignment [out]
+ * @param positionSpecific determines whether matrix is position specific or not
+*/
+
+static Int4 BLSmithWatermanFindStart(Uint1 * matchSeq,
+ Int4 matchSeqLength, Uint1 *query, Int4 **matrix,
+ Int4 gapOpen, Int4 gapExtend, Int4 matchSeqEnd, Int4 queryEnd, Int4 score,
+ Int4 *matchSeqStart, Int4 *queryStart, Boolean positionSpecific)
+{
+
+ Int4 bestScore; /*best score seen so far*/
+ Int4 newScore; /* score of next entry*/
+ Int4 bestMatchSeqPos, bestQueryPos; /*position starting best score in
+ matchSeq and database sequences*/
+ SWpairs *scoreVector; /*keeps one row of the Smith-Waterman matrix
+ overwrite old row with new row*/
+ Int4 *matrixRow; /*one row of score matrix*/
+ Int4 newGapCost; /*cost to have a gap of one character*/
+ Int4 prevScoreNoGapMatchSeq; /*score one row and column up
+ with no gaps*/
+ Int4 prevScoreGapMatchSeq; /*score if a gap already started in matchSeq*/
+ Int4 continueGapScore; /*score for continuing a gap in query*/
+ Int4 matchSeqPos, queryPos; /*positions in matchSeq and query*/
+
+ scoreVector = (SWpairs *) calloc(1, matchSeqLength * sizeof(SWpairs));
+ bestMatchSeqPos = 0;
+ bestQueryPos = 0;
+ bestScore = 0;
+ newGapCost = gapOpen + gapExtend;
+ for (matchSeqPos = 0; matchSeqPos < matchSeqLength; matchSeqPos++) {
+ scoreVector[matchSeqPos].noGap = 0;
+ scoreVector[matchSeqPos].gapExists = -(gapOpen);
+ }
+ for(queryPos = queryEnd; queryPos >= 0; queryPos--) {
+ if (positionSpecific)
+ matrixRow = matrix[queryPos];
+ else
+ matrixRow = matrix[query[queryPos]];
+ newScore = 0;
+ prevScoreNoGapMatchSeq = 0;
+ prevScoreGapMatchSeq = -(gapOpen);
+ for(matchSeqPos = matchSeqEnd; matchSeqPos >= 0; matchSeqPos--) {
+ /*testing scores with a gap in matchSeq, either starting a new
+ gap or extending an existing gap*/
+ if ((newScore = newScore - newGapCost) >
+ (prevScoreGapMatchSeq = prevScoreGapMatchSeq - gapExtend))
+ prevScoreGapMatchSeq = newScore;
+ /*testing scores with a gap in query, either starting a new
+ gap or extending an existing gap*/
+ if ((newScore = scoreVector[matchSeqPos].noGap - newGapCost) >
+ (continueGapScore = scoreVector[matchSeqPos].gapExists - gapExtend))
+ continueGapScore = newScore;
+ /*compute new score extending one position in matchSeq and query*/
+ newScore = prevScoreNoGapMatchSeq + matrixRow[matchSeq[matchSeqPos]];
+ if (newScore < 0)
+ newScore = 0; /*Smith-Waterman locality condition*/
+ /*test two alternatives*/
+ if (newScore < prevScoreGapMatchSeq)
+ newScore = prevScoreGapMatchSeq;
+ if (newScore < continueGapScore)
+ newScore = continueGapScore;
+ prevScoreNoGapMatchSeq = scoreVector[matchSeqPos].noGap;
+ scoreVector[matchSeqPos].noGap = newScore;
+ scoreVector[matchSeqPos].gapExists = continueGapScore;
+ if (newScore > bestScore) {
+ bestScore = newScore;
+ bestQueryPos = queryPos;
+ bestMatchSeqPos = matchSeqPos;
+ }
+ if (bestScore >= score)
+ break;
+ }
+ if (bestScore >= score)
+ break;
+ }
+ sfree(scoreVector);
+ if (bestScore < 0)
+ bestScore = 0;
+ *matchSeqStart = bestMatchSeqPos;
+ *queryStart = bestQueryPos;
+ return(bestScore);
+}
+
+
+/** computes Smith-Waterman local alignment score and returns the
+ * evalue assuming some positions are forbidden
+ * matchSeqEnd and query can be used to run the local alignment in reverse
+ * to find optimal starting positions
+ * @param matchSeq is the matchSeq sequence [in]
+ * @param matchSeqLength is the length of matchSeq in amino acids [in]
+ * @param query is the input query sequence [in]
+ * @param queryLength is the length of query [in]
+ * @param matrix is either the position-specific matrix associated with query
+ * or the standard matrix [in]
+ * @param gapOpen is the cost of opening a gap [in]
+ * @param gapExtend is the cost of extending an existing gap by 1 position [in]
+ * @param matchSeqEnd returns the final position in the matchSeq of an optimal
+ * local alignment [in]
+ * @param queryEnd returns the final position in query of an optimal
+ * local alignment [in]
+ * @param score is used to pass back the optimal score [out]
+ * @param kbp holds the Karlin-Altschul parameters [in]
+ * @param effSearchSpace effective search space [in]
+ * @param numForbidden number of forbidden ranges [in]
+ * @param forbiddenRanges lists areas that should not be aligned [in]
+ * @param positionSpecific determines whether matrix is position specific or not [in]
+*/
+
+
+static double BLspecialSmithWatermanScoreOnly(Uint1 * matchSeq,
+ Int4 matchSeqLength, Uint1 *query, Int4 queryLength, Int4 **matrix,
+ Int4 gapOpen, Int4 gapExtend, Int4 *matchSeqEnd, Int4 *queryEnd, Int4 *score,
+ Blast_KarlinBlk* kbp, Int8 effSearchSpace,
+ Int4 *numForbidden, Int4 ** forbiddenRanges, Boolean positionSpecific)
+{
+
+ Int4 bestScore; /*best score seen so far*/
+ Int4 newScore; /* score of next entry*/
+ Int4 bestMatchSeqPos, bestQueryPos; /*position ending best score in
+ matchSeq and database sequences*/
+ SWpairs *scoreVector; /*keeps one row of the Smith-Waterman matrix
+ overwrite old row with new row*/
+ Int4 *matrixRow; /*one row of score matrix*/
+ Int4 newGapCost; /*cost to have a gap of one character*/
+ Int4 prevScoreNoGapMatchSeq; /*score one row and column up
+ with no gaps*/
+ Int4 prevScoreGapMatchSeq; /*score if a gap already started in matchSeq*/
+ Int4 continueGapScore; /*score for continuing a gap in query*/
+ Int4 matchSeqPos, queryPos; /*positions in matchSeq and query*/
+ double returnEvalue; /*e-value to return*/
+ Boolean forbidden; /*is this position forbidden?*/
+ Int4 f; /*index over forbidden positions*/
+
+
+ scoreVector = (SWpairs *) calloc(1, matchSeqLength * sizeof(SWpairs));
+ bestMatchSeqPos = 0;
+ bestQueryPos = 0;
+ bestScore = 0;
+ newGapCost = gapOpen + gapExtend;
+ for (matchSeqPos = 0; matchSeqPos < matchSeqLength; matchSeqPos++) {
+ scoreVector[matchSeqPos].noGap = 0;
+ scoreVector[matchSeqPos].gapExists = -(gapOpen);
+ }
+ for(queryPos = 0; queryPos < queryLength; queryPos++) {
+ if (positionSpecific)
+ matrixRow = matrix[queryPos];
+ else
+ matrixRow = matrix[query[queryPos]];
+ newScore = 0;
+ prevScoreNoGapMatchSeq = 0;
+ prevScoreGapMatchSeq = -(gapOpen);
+ for(matchSeqPos = 0; matchSeqPos < matchSeqLength; matchSeqPos++) {
+ /*testing scores with a gap in matchSeq, either starting a new
+ gap or extending an existing gap*/
+ if ((newScore = newScore - newGapCost) >
+ (prevScoreGapMatchSeq = prevScoreGapMatchSeq - gapExtend))
+ prevScoreGapMatchSeq = newScore;
+ /*testing scores with a gap in query, either starting a new
+ gap or extending an existing gap*/
+ if ((newScore = scoreVector[matchSeqPos].noGap - newGapCost) >
+ (continueGapScore = scoreVector[matchSeqPos].gapExists - gapExtend))
+ continueGapScore = newScore;
+ /*compute new score extending one position in matchSeq and query*/
+ forbidden = FALSE;
+ for(f = 0; f < numForbidden[queryPos]; f++) {
+ if ((matchSeqPos >= forbiddenRanges[queryPos][2 * f]) &&
+ (matchSeqPos <= forbiddenRanges[queryPos][2*f + 1])) {
+ forbidden = TRUE;
+ break;
+ }
+ }
+ if (forbidden)
+ newScore = BLAST_SCORE_MIN;
+ else
+ newScore = prevScoreNoGapMatchSeq + matrixRow[matchSeq[matchSeqPos]];
+ if (newScore < 0)
+ newScore = 0; /*Smith-Waterman locality condition*/
+ /*test two alternatives*/
+ if (newScore < prevScoreGapMatchSeq)
+ newScore = prevScoreGapMatchSeq;
+ if (newScore < continueGapScore)
+ newScore = continueGapScore;
+ prevScoreNoGapMatchSeq = scoreVector[matchSeqPos].noGap;
+ scoreVector[matchSeqPos].noGap = newScore;
+ scoreVector[matchSeqPos].gapExists = continueGapScore;
+ if (newScore > bestScore) {
+ bestScore = newScore;
+ bestQueryPos = queryPos;
+ bestMatchSeqPos = matchSeqPos;
+
+ }
+ }
+ }
+ sfree(scoreVector);
+ if (bestScore < 0)
+ bestScore = 0;
+ *matchSeqEnd = bestMatchSeqPos;
+ *queryEnd = bestQueryPos;
+ *score = bestScore;
+ returnEvalue = BLAST_KarlinStoE_simple(bestScore,kbp, effSearchSpace);
+ return(returnEvalue);
+}
+
+/** computes where optimal Smith-Waterman local alignment starts given the
+ * ending positions. matchSeqEnd and queryEnd can be used to run the local alignment in reverse
+ * to find optimal starting positions
+ * these are passed back in matchSeqStart and queryStart
+ * the optimal score is passed in to check when it has
+ * been reached going backwards the score is also returned
+ * @param matchSeq is the matchSeq sequence [in]
+ * @param matchSeqLength is the length of matchSeq in amino acids [in]
+ * @param query is the sequence corresponding to some matrix profile [in]
+ * @param matrix is the position-specific matrix associated with query [in]
+ * @param gapOpen is the cost of opening a gap [in]
+ * @param gapExtend is the cost of extending an existing gap by 1 position [in]
+ * @param matchSeqEnd is the final position in the matchSeq of an optimal
+ * local alignment [in]
+ * @param queryEnd is the final position in query of an optimal
+ * local alignment [in]
+ * @param score optimal score is passed in to check when it has
+ * been reached going backwards [in]
+ * @param matchSeqStart optimal starting point [in]
+ * @param queryStart optimal starting point [in]
+ * @param numForbidden array of regions not to be aligned. [in]
+ * @param numForbidden array of regions not to be aligned. [in]
+ * @param forbiddenRanges regions not to be aligned. [in]
+ * @param positionSpecific determines whether matrix is position specific or not
+ * @return the score found
+*/
+
+static Int4 BLspecialSmithWatermanFindStart(Uint1 * matchSeq,
+ Int4 matchSeqLength, Uint1 *query, Int4 **matrix,
+ Int4 gapOpen, Int4 gapExtend, Int4 matchSeqEnd, Int4 queryEnd, Int4 score,
+ Int4 *matchSeqStart, Int4 *queryStart, Int4 *numForbidden,
+ Int4 ** forbiddenRanges, Boolean positionSpecific)
+{
+
+ Int4 bestScore; /*best score seen so far*/
+ Int4 newScore; /* score of next entry*/
+ Int4 bestMatchSeqPos, bestQueryPos; /*position starting best score in
+ matchSeq and database sequences*/
+ SWpairs *scoreVector; /*keeps one row of the Smith-Waterman matrix
+ overwrite old row with new row*/
+ Int4 *matrixRow; /*one row of score matrix*/
+ Int4 newGapCost; /*cost to have a gap of one character*/
+ Int4 prevScoreNoGapMatchSeq; /*score one row and column up
+ with no gaps*/
+ Int4 prevScoreGapMatchSeq; /*score if a gap already started in matchSeq*/
+ Int4 continueGapScore; /*score for continuing a gap in query*/
+ Int4 matchSeqPos, queryPos; /*positions in matchSeq and query*/
+ Boolean forbidden; /*is this position forbidden?*/
+ Int4 f; /*index over forbidden positions*/
+
+ scoreVector = (SWpairs *) calloc(1, matchSeqLength * sizeof(SWpairs));
+ bestMatchSeqPos = 0;
+ bestQueryPos = 0;
+ bestScore = 0;
+ newGapCost = gapOpen + gapExtend;
+ for (matchSeqPos = 0; matchSeqPos < matchSeqLength; matchSeqPos++) {
+ scoreVector[matchSeqPos].noGap = 0;
+ scoreVector[matchSeqPos].gapExists = -(gapOpen);
+ }
+ for(queryPos = queryEnd; queryPos >= 0; queryPos--) {
+ if (positionSpecific)
+ matrixRow = matrix[queryPos];
+ else
+ matrixRow = matrix[query[queryPos]];
+ newScore = 0;
+ prevScoreNoGapMatchSeq = 0;
+ prevScoreGapMatchSeq = -(gapOpen);
+ for(matchSeqPos = matchSeqEnd; matchSeqPos >= 0; matchSeqPos--) {
+ /*testing scores with a gap in matchSeq, either starting a new
+ gap or extending an existing gap*/
+ if ((newScore = newScore - newGapCost) >
+ (prevScoreGapMatchSeq = prevScoreGapMatchSeq - gapExtend))
+ prevScoreGapMatchSeq = newScore;
+ /*testing scores with a gap in query, either starting a new
+ gap or extending an existing gap*/
+ if ((newScore = scoreVector[matchSeqPos].noGap - newGapCost) >
+ (continueGapScore = scoreVector[matchSeqPos].gapExists - gapExtend))
+ continueGapScore = newScore;
+ /*compute new score extending one position in matchSeq and query*/
+ forbidden = FALSE;
+ for(f = 0; f < numForbidden[queryPos]; f++) {
+ if ((matchSeqPos >= forbiddenRanges[queryPos][2 * f]) &&
+ (matchSeqPos <= forbiddenRanges[queryPos][2*f + 1])) {
+ forbidden = TRUE;
+ break;
+ }
+ }
+ if (forbidden)
+ newScore = BLAST_SCORE_MIN;
+ else
+ newScore = prevScoreNoGapMatchSeq + matrixRow[matchSeq[matchSeqPos]];
+ if (newScore < 0)
+ newScore = 0; /*Smith-Waterman locality condition*/
+ /*test two alternatives*/
+ if (newScore < prevScoreGapMatchSeq)
+ newScore = prevScoreGapMatchSeq;
+ if (newScore < continueGapScore)
+ newScore = continueGapScore;
+ prevScoreNoGapMatchSeq = scoreVector[matchSeqPos].noGap;
+ scoreVector[matchSeqPos].noGap = newScore;
+ scoreVector[matchSeqPos].gapExists = continueGapScore;
+ if (newScore > bestScore) {
+ bestScore = newScore;
+ bestQueryPos = queryPos;
+ bestMatchSeqPos = matchSeqPos;
+ }
+ if (bestScore >= score)
+ break;
+ }
+ if (bestScore >= score)
+ break;
+ }
+ sfree(scoreVector);
+ if (bestScore < 0)
+ bestScore = 0;
+ *matchSeqStart = bestMatchSeqPos;
+ *queryStart = bestQueryPos;
+ return(bestScore);
+}
+
+
+/** converts the list of Smith-Waterman alignments to a corresponding list
+ * of HSP's. kbp stores parameters for computing the score
+ * Code is adapted from procedure output_hits of pseed3.c
+ * @param SWAligns List of Smith-Waterman alignments [in]
+ * @param hitlist BlastHitList that is filled in [in|out]
+ */
+static Int2 newConvertSWalignsUpdateHitList(SWResults * SWAligns, BlastHitList* hitList)
+{
+ BlastHSPList* hspList=NULL;
+ SWResults* curSW;
+
+ if (SWAligns == NULL)
+ return 0;
+
+ curSW = SWAligns;
+ while (curSW != NULL) {
+ if (hspList == NULL)
+ {
+ hspList = Blast_HSPListNew(0);
+ hspList->oid = curSW->subject_index;
+ }
+
+ Blast_HSPListSaveHSP(hspList, curSW->hsp);
+ curSW->hsp = NULL; /* Saved on the hitlist, will be deleted there. */
+
+ /* Changing OID being worked on. */
+ if (curSW->next == NULL ||
+ curSW->subject_index != curSW->next->subject_index)
+ {
+ Blast_HitListUpdate(hitList, hspList);
+ hspList = NULL;
+ }
+
+ curSW = curSW->next;
+ }
+
+ return 0;
+}
+
+
+/** allocates a score matrix with numPositions positions and initializes some
+ * positions on the side
+ * @param numPositions length of matrix (or query) [in]
+ * @return matrix (Int4**)
+ */
+static Int4 **allocateScaledMatrix(Int4 numPositions)
+{
+ Int4 **returnMatrix; /*allocated matrix to return*/
+ Int4 c; /*loop index over characters*/
+
+ returnMatrix = (Int4**) _PSIAllocateMatrix(numPositions+1, BLASTAA_SIZE, sizeof(Int4));
+ for(c = 0; c < BLASTAA_SIZE; c++)
+ returnMatrix[numPositions][c] = BLAST_SCORE_MIN;
+ return(returnMatrix);
+}
+
+/** allocate a frequency ratio matrix with numPositions positions and initialize
+ * some positions.
+ * @param numPositions the length of matrix or query [in]
+ * @return frequency matrix (double**)
+ */
+static double **allocateStartFreqs(Int4 numPositions)
+{
+ double **returnMatrix; /*allocated matrix to return*/
+ Int4 c; /*loop index over characters*/
+
+ returnMatrix = (double**) _PSIAllocateMatrix(numPositions+1, BLASTAA_SIZE, sizeof(double));
+ for(c = 0; c < BLASTAA_SIZE; c++)
+ returnMatrix[numPositions][c] = BLAST_SCORE_MIN;
+ return(returnMatrix);
+}
+
+#if 0
+FIXME delte if not needed
+/*deallocate a frequency ratio matrix*/
+static void freeStartFreqs(double **matrix, Int4 numPositions)
+{
+ int row; /*loop index*/
+
+ for(row = 0; row <= numPositions; row++)
+ sfree(matrix[row]);
+ sfree(matrix);
+}
+#endif
+
+/*matrix is a position-specific score matrix with matrixLength positions
+ queryProbArray is an array containing the probability of occurrence
+ of each residue in the query
+ scoreArray is an array of probabilities for each score that is
+ to be used as a field in return_sfp
+ return_sfp is a the structure to be filled in and returned
+ range is the size of scoreArray and is an upper bound on the
+ difference between maximum score and minimum score in the matrix
+ the routine posfillSfp computes the probability of each score weighted
+ by the probability of each query residue and fills those probabilities
+ into scoreArray and puts scoreArray as a field in
+ that in the structure that is returned
+ for indexing convenience the field storing scoreArray points to the
+ entry for score 0, so that referring to the -k index corresponds to
+ score -k */
+static Blast_ScoreFreq* notposfillSfp(Int4 **matrix, double *subjectProbArray, double *queryProbArray, double *scoreArray, Blast_ScoreFreq* return_sfp, Int4 range)
+{
+ Int4 minScore, maxScore; /*observed minimum and maximum scores*/
+ Int4 i,j,k; /* indices */
+
+ minScore = maxScore = 0;
+
+ for(i = 0; i < BLASTAA_SIZE; i++) {
+ for(j = 0 ; j < PRO_TRUE_ALPHABET_SIZE; j++) {
+ k = trueCharPositions[j];
+ if ((matrix[i][k] != BLAST_SCORE_MIN) && (matrix[i][k] < minScore))
+ minScore = matrix[i][k];
+ if (matrix[i][k] > maxScore)
+ maxScore = matrix[i][k];
+ }
+ }
+ return_sfp->obs_min = minScore;
+ return_sfp->obs_max = maxScore;
+ for (i = 0; i < range; i++)
+ scoreArray[i] = 0.0;
+ return_sfp->sprob = &(scoreArray[-minScore]); /*center around 0*/
+ for(i = 0; i < BLASTAA_SIZE; i++) {
+ for (j = 0; j < PRO_TRUE_ALPHABET_SIZE; j++) {
+ k = trueCharPositions[j];
+ if(matrix[i][k] >= minScore) {
+ return_sfp->sprob[matrix[i][k]] += (queryProbArray[i] * subjectProbArray[k]);
+ }
+ }
+ }
+ return_sfp->score_avg = 0;
+ for(i = minScore; i <= maxScore; i++)
+ return_sfp->score_avg += i * return_sfp->sprob[i];
+ return(return_sfp);
+}
+
+/*matrix is a position-specific score matrix with matrixLength positions
+ subjectProbArray is an array containing the probability of occurrence
+ of each residue in the matching sequence often called the subject
+ scoreArray is an array of probabilities for each score that is
+ to be used as a field in return_sfp
+ return_sfp is a the structure to be filled in and returned
+ range is the size of scoreArray and is an upper bound on the
+ difference between maximum score and minimum score in the matrix
+ the routine posfillSfp computes the probability of each score weighted
+ by the probability of each query residue and fills those probabilities
+ into scoreArray and puts scoreArray as a field in
+ that in the structure that is returned
+ for indexing convenience the field storing scoreArray points to the
+ entry for score 0, so that referring to the -k index corresponds to
+ score -k */
+static Blast_ScoreFreq* posfillSfp(Int4 **matrix, Int4 matrixLength, double *subjectProbArray, double *scoreArray, Blast_ScoreFreq* return_sfp, Int4 range)
+{
+ Int4 minScore, maxScore; /*observed minimum and maximum scores*/
+ Int4 i,j,k; /* indices */
+ double onePosFrac; /*1/matrix length as a double*/
+
+ minScore = maxScore = 0;
+
+ for(i = 0; i < matrixLength; i++) {
+ for(j = 0 ; j < PRO_TRUE_ALPHABET_SIZE; j++) {
+ k = trueCharPositions[j];
+ if ((matrix[i][k] != BLAST_SCORE_MIN) && (matrix[i][k] < minScore))
+ minScore = matrix[i][k];
+ if (matrix[i][k] > maxScore)
+ maxScore = matrix[i][k];
+ }
+ }
+ return_sfp->obs_min = minScore;
+ return_sfp->obs_max = maxScore;
+ for (i = 0; i < range; i++)
+ scoreArray[i] = 0.0;
+ return_sfp->sprob = &(scoreArray[-minScore]); /*center around 0*/
+ onePosFrac = 1.0/ ((double) matrixLength);
+ for(i = 0; i < matrixLength; i++) {
+ for (j = 0; j < PRO_TRUE_ALPHABET_SIZE; j++) {
+ k = trueCharPositions[j];
+ if(matrix[i][k] >= minScore) {
+ return_sfp->sprob[matrix[i][k]] += (onePosFrac * subjectProbArray[k]);
+ }
+ }
+ }
+ return_sfp->score_avg = 0;
+ for(i = minScore; i <= maxScore; i++)
+ return_sfp->score_avg += i * return_sfp->sprob[i];
+ return(return_sfp);
+}
+
+
+
+/** Return the a matrix of the frequency ratios that underlie the
+ * score matrix being used on this pass. The returned matrix
+ * is position-specific, so if we are in the first pass, use
+ * query to convert the 20x20 standard matrix into a position-specific
+ * variant. matrixName is the name of the underlying 20x20
+ * score matrix used. numPositions is the length of the query;
+ * startNumerator is the matrix of frequency ratios as stored
+ * in posit.h. It needs to be divided by the frequency of the
+ * second character to get the intended ratio
+ * @param sbp statistical information for blast [in]
+ * @param query the query sequence [in]
+ * @param matrixName name of the underlying matrix [in]
+ * @param startNumerator matrix of frequency ratios as stored
+ * in posit.h. It needs to be divided by the frequency of the
+ * second character to get the intended ratio [in]
+ * @param numPositions length of the query [in]
+ */
+static double **getStartFreqRatios(BlastScoreBlk* sbp,
+ Uint1* query,
+ const char *matrixName,
+ double **startNumerator,
+ Int4 numPositions)
+{
+ double** returnRatios; /*frequency ratios to start investigating each pair*/
+ double *standardProb; /*probabilities of each letter*/
+ Int4 i,j; /* Loop indices. */
+ SFreqRatios* freqRatios=NULL; /* frequency ratio container for given matrix */
+ const double KposEpsilon = 0.0001;
+
+ returnRatios = allocateStartFreqs(numPositions);
+
+ freqRatios = _PSIMatrixFrequencyRatiosNew(matrixName);
+ ASSERT(freqRatios);
+ if (freqRatios == NULL)
+ return NULL;
+
+ for(i = 0; i < numPositions; i++) {
+ for(j = 0; j < BLASTAA_SIZE; j++) {
+ returnRatios[i][j] = freqRatios->data[query[i]][j];
+ }
+ }
+
+ freqRatios = _PSIMatrixFrequencyRatiosFree(freqRatios);
+
+ standardProb = _PSIGetStandardProbabilities(sbp);
+
+ /*reverse multiplication done in posit.c*/
+ for(i = 0; i < numPositions; i++)
+ for(j = 0; j < BLASTAA_SIZE; j++)
+ if ((standardProb[query[i]] > KposEpsilon) && (standardProb[j] > KposEpsilon) &&
+ (j != AMINOACID_TO_NCBISTDAA['X']) && (j != AMINOACID_TO_NCBISTDAA['*'])
+ && (startNumerator[i][j] > KposEpsilon))
+ returnRatios[i][j] = startNumerator[i][j]/standardProb[j];
+
+ sfree(standardProb);
+
+ return(returnRatios);
+}
+
+/** take every entry of startFreqRatios that is not corresponding to
+ * a score of BLAST_SCORE_MIN and take its log, divide by Lambda and
+ * multiply by LambdaRatio then round to the nearest integer and
+ * put the result in the corresponding entry of matrix.
+ * startMatrix and matrix have dimensions numPositions X BLASTAA_SIZE
+ * @param matrix preallocated matrix to be filled in [out]
+ * @param startMatrix matrix to be scaled up [in]
+ * @param startFreqRatios frequency ratios of starting matrix [in]
+ * @param numPositions length of query [in]
+ * @param Lambda A Karlin-Altschul parameter. [in]
+ * @param LambdaRatio ratio of correct Lambda to it's original value [in]
+*/
+static void scaleMatrix(Int4 **matrix, Int4 **startMatrix,
+ double **startFreqRatios, Int4 numPositions,
+ double Lambda, double LambdaRatio)
+{
+ Int4 p, c; /*indices over positions and characters*/
+ double temp; /*intermediate term in computation*/
+
+ for (p = 0; p < numPositions; p++) {
+ for (c = 0; c < BLASTAA_SIZE; c++) {
+ if (matrix[p][c] == BLAST_SCORE_MIN)
+ matrix[p][c] = startMatrix[p][c];
+ else {
+ temp = log(startFreqRatios[p][c]);
+ temp = temp/Lambda;
+ temp = temp * LambdaRatio;
+ matrix[p][c] = BLAST_Nint(temp);
+ }
+ }
+ }
+}
+
+/*SCALING_FACTOR is a multiplicative factor used to get more bits of
+ * precision in the integer matrix scores. It cannot be arbitrarily
+ * large because we do not want total alignment scores to exceedto
+ * -(BLAST_SCORE_MIN) */
+#define SCALING_FACTOR 32
+/** Compute a scaled up version of the standard matrix encoded by matrix name.
+ * Standard matrices are in half-bit units.
+ * @param matrix preallocated matrix [in][out]
+ * @param matrixName name of matrix (e.g., BLOSUM62, PAM30). [in]
+ * @param Lambda A Karlin-Altschul parameter. [in]
+*/
+static void computeScaledStandardMatrix(Int4 **matrix, char *matrixName, double Lambda)
+{
+ int i,j; /*loop indices*/
+ double temp; /*intermediate term in computation*/
+ SFreqRatios* freqRatios=NULL; /* frequency ratio container for given matrix */
+
+ freqRatios = _PSIMatrixFrequencyRatiosNew(matrixName);
+ ASSERT(freqRatios);
+ if (freqRatios == NULL)
+ return;
+
+ for(i = 0; i < BLASTAA_SIZE; i++)
+ for(j = 0; j < BLASTAA_SIZE; j++) {
+ if(0.0 == freqRatios->data[i][j])
+ matrix[i][j] = BLAST_SCORE_MIN;
+ else {
+ temp = log(freqRatios->data[i][j])/Lambda;
+ matrix[i][j] = BLAST_Nint(temp);
+ }
+ }
+
+ freqRatios = _PSIMatrixFrequencyRatiosFree(freqRatios);
+ return;
+}
+
+
+#if 0 /* FIXME */
+/************************************************************
+produce a scaled-up version of the position-specific matrix starting from
+posFreqs
+fillPosMatrix is the matrix to be filled
+nonposMatrix is the underlying position-independent matrix, used to
+fill positions where frequencies are irrelevant
+sbp stores various parameters of the search
+*****************************************************************/
+void scalePosMatrix(Int4 **fillPosMatrix, Int4 **nonposMatrix, char *matrixName, double **posFreqs, Uint1 *query, Int4 queryLength, BLAST_ScoreBlk* sbp)
+{
+
+ posSearchItems *posSearch; /*used to pass data into scaling routines*/
+ compactSearchItems *compactSearch; /*used to pass data into scaling routines*/
+ Int4 i,j ; /*loop indices*/
+ BLAST_ResFreq* stdrfp; /* gets standard frequencies in prob field */
+ Int4 a; /*index over characters*/
+ double **standardFreqRatios; /*frequency ratios for standard score matrix*/
+ Int4 multiplier; /*bit scale factor for scores*/
+
+
+ posSearch = (posSearchItems *) calloc (1, sizeof(posSearchItems));
+ compactSearch = (compactSearchItems *) calloc (1, sizeof(compactSearchItems));
+ posSearch->posMatrix = (Int4 **) calloc((queryLength + 1), sizeof(Int4 *));
+ posSearch->posPrivateMatrix = fillPosMatrix;
+ posSearch->posFreqs = posFreqs;
+ for(i = 0; i <= queryLength; i++)
+ posSearch->posMatrix[i] = (Int4 *) calloc(BLASTAA_SIZE, sizeof(Int4));
+
+ compactSearch->query = (Uint1*) query;
+ compactSearch->qlength = queryLength;
+ compactSearch->alphabetSize = BLASTAA_SIZE;
+ compactSearch->gapped_calculation = TRUE;
+ compactSearch->matrix = nonposMatrix;
+ compactSearch->lambda = sbp->kbp_gap_std[0]->Lambda;
+ compactSearch->kbp_std = sbp->kbp_std;
+ compactSearch->kbp_psi = sbp->kbp_psi;
+ compactSearch->kbp_gap_psi = sbp->kbp_gap_psi;
+ compactSearch->kbp_gap_std = sbp->kbp_gap_std;
+ compactSearch->lambda_ideal = sbp->kbp_ideal->Lambda;
+ compactSearch->K_ideal = sbp->kbp_ideal->K;
+
+ stdrfp = BlastResFreqNew(sbp);
+ BlastResFreqStdComp(sbp,stdrfp);
+ compactSearch->standardProb = calloc(compactSearch->alphabetSize, sizeof(double));
+ for(a = 0; a < compactSearch->alphabetSize; a++)
+ compactSearch->standardProb[a] = stdrfp->prob[a];
+ stdrfp = BlastResFreqDestruct(stdrfp);
+
+ standardFreqRatios = (double **) calloc(BLASTAA_SIZE, sizeof(double *));
+ for (i = 0; i < BLASTAA_SIZE; i++)
+ standardFreqRatios[i] = (double *) calloc(BLASTAA_SIZE, sizeof(double));
+
+ if ((0 == strcmp(matrixName,"BLOSUM62")) ||
+ (0 == strcmp(matrixName,"BLOSUM62_20"))) {
+ multiplier = 2;
+ for(i = 0; i < BLASTAA_SIZE; i++)
+ for(j = 0; j < BLASTAA_SIZE; j++)
+ standardFreqRatios[i][j] = BLOSUM62_FREQRATIOS[i][j];
+ }
+ if (0 == strcmp(matrixName,"BLOSUM62_20A")) {
+ multiplier = 2;
+ for(i = 0; i < BLASTAA_SIZE; i++)
+ for(j = 0; j < BLASTAA_SIZE; j++)
+ standardFreqRatios[i][j] = 0.9666 * BLOSUM62_FREQRATIOS[i][j];
+ }
+ if (0 == strcmp(matrixName,"BLOSUM62_20B")) {
+ multiplier = 2;
+ for(i = 0; i < BLASTAA_SIZE; i++)
+ for(j = 0; j < BLASTAA_SIZE; j++)
+ standardFreqRatios[i][j] = 0.9344 * BLOSUM62_FREQRATIOS[i][j];
+ }
+ if (0 == strcmp(matrixName,"BLOSUM45")) {
+ multiplier = 3;
+ for(i = 0; i < BLASTAA_SIZE; i++)
+ for(j = 0; j < BLASTAA_SIZE; j++)
+ standardFreqRatios[i][j] = BLOSUM45_FREQRATIOS[i][j];
+ }
+ if (0 == strcmp(matrixName,"BLOSUM80")) {
+ multiplier = 2;
+ for(i = 0; i < BLASTAA_SIZE; i++)
+ for(j = 0; j < BLASTAA_SIZE; j++)
+ standardFreqRatios[i][j] = BLOSUM80_FREQRATIOS[i][j];
+ }
+ if (0 == strcmp(matrixName,"BLOSUM50")) {
+ multiplier = 2;
+ for(i = 0; i < BLASTAA_SIZE; i++)
+ for(j = 0; j < BLASTAA_SIZE; j++)
+ standardFreqRatios[i][j] = BLOSUM50_FREQRATIOS[i][j];
+ }
+ if (0 == strcmp(matrixName,"BLOSUM90")) {
+ multiplier = 2;
+ for(i = 0; i < PROTEIN_ALPHABET; i++)
+ for(j = 0; j < PROTEIN_ALPHABET; j++)
+ standardFreqRatios[i][j] = BLOSUM90_FREQRATIOS[i][j];
+ }
+ if (0 == strcmp(matrixName,"PAM250")) {
+ multiplier = 2;
+ for(i = 0; i < PROTEIN_ALPHABET; i++)
+ for(j = 0; j < PROTEIN_ALPHABET; j++)
+ standardFreqRatios[i][j] = PAM250_FREQRATIOS[i][j];
+ }
+ if (0 == strcmp(matrixName,"PAM30")) {
+ multiplier = 2;
+ for(i = 0; i < PROTEIN_ALPHABET; i++)
+ for(j = 0; j < PROTEIN_ALPHABET; j++)
+ standardFreqRatios[i][j] = PAM30_FREQRATIOS[i][j];
+ }
+ if (0 == strcmp(matrixName,"PAM70")) {
+ multiplier = 2;
+ for(i = 0; i < PROTEIN_ALPHABET; i++)
+ for(j = 0; j < PROTEIN_ALPHABET; j++)
+ standardFreqRatios[i][j] = PAM70_FREQRATIOS[i][j];
+ }
+
+ posFreqsToMatrix(posSearch,compactSearch, standardFreqRatios, multiplier);
+ impalaScaling(posSearch, compactSearch, ((double) SCALING_FACTOR), FALSE);
+
+ for(i = 0; i <= queryLength; i++)
+ sfree(posSearch->posMatrix[i]);
+ for(i = 0; i < PROTEIN_ALPHABET; i++)
+ sfree(standardFreqRatios[i]);
+
+ sfree(standardFreqRatios);
+ sfree(compactSearch->standardProb);
+ sfree(posSearch->posMatrix);
+ sfree(posSearch);
+ sfree(compactSearch);
+}
+#endif
+
+/**
+ * A Kappa_MatchingSequence represents a subject sequence to be aligned
+ * with the query. This abstract sequence is used to hide the
+ * complexity associated with actually obtaining and releasing the
+ * data for a matching sequence, e.g. reading the sequence from a DB
+ * or translating it from a nucleotide sequence.
+ */
+struct Kappa_MatchingSequence {
+ Int4 length; /**< length of the sequence */
+ Uint1* sequence; /**< the sequence data */
+ Uint1* filteredSequence; /**< a copy of the sequence data that has
+ been filtered */
+ Uint1* filteredSequenceStart; /**< the address of the chunk of
+ memory that has been
+ allocated to hold
+ "filterSequence". */
+ BLAST_SequenceBlk* seq_blk; /**< sequence blk for "database" sequence. */
+};
+typedef struct Kappa_MatchingSequence Kappa_MatchingSequence;
+
+
+#define BLASTP_MASK_INSTRUCTIONS "S 10 1.8 2.1"
+
+/** Initialize a new matching sequence, obtaining the data from an
+ * appropriate location
+ * @param self the Kappa_MatchingSequence to be filled in [in|out]
+ * @param seqSrc Used to access match sequences [in]
+ * @param subject_id ordinal ID of matching sequence [in]
+ */
+static void
+Kappa_MatchingSequenceInitialize(Kappa_MatchingSequence * self,
+ const BlastSeqSrc* seqSrc,
+ Int4 subject_id)
+{
+ GetSeqArg seq_arg;
+
+ memset((void*) &seq_arg, 0, sizeof(seq_arg));
+ seq_arg.oid = subject_id;
+ seq_arg.encoding = BLASTP_ENCODING;
+
+ BlastSequenceBlkClean(seq_arg.seq);
+
+ if (BLASTSeqSrcGetSequence(seqSrc, (void*) &seq_arg) < 0)
+ return;
+
+ self->length = BLASTSeqSrcGetSeqLen(seqSrc, &seq_arg);
+
+ self->sequence = BlastMemDup(seq_arg.seq->sequence, (1+self->length)*sizeof(Uint1));
+
+ self->filteredSequenceStart = calloc((self->length + 2), sizeof(Uint1));
+ self->filteredSequence = self->filteredSequenceStart + 1;
+ memcpy(self->filteredSequence, self->sequence, self->length);
+
+#ifndef KAPPA_NO_SEG_SEQUENCE
+/*take as input an amino acid string and its length; compute a filtered
+ amino acid string and return the filtered string*/
+ {{
+ BlastSeqLoc* mask_seqloc;
+ const Uint1 k_program_name = blast_type_blastp;
+
+ BlastSetUp_Filter(k_program_name, self->sequence, self->length,
+ 0, BLASTP_MASK_INSTRUCTIONS, NULL, &mask_seqloc);
+
+ Blast_MaskTheResidues(self->filteredSequence, self->length, FALSE, mask_seqloc, FALSE, 0);
+
+ mask_seqloc = BlastSeqLocFree(mask_seqloc);
+ }}
+#endif
+ self->seq_blk = NULL;
+ BlastSetUp_SeqBlkNew(self->filteredSequence, self->length, 0, &(self->seq_blk), FALSE);
+ return;
+
+}
+
+
+/** Release the data associated with a matching sequence
+ * @param self the Kappa_MatchingSequence whose data will be freed [in|out]
+ */
+static void
+Kappa_MatchingSequenceRelease(Kappa_MatchingSequence * self)
+{
+ if(self->sequence != self->filteredSequence) {
+ sfree(self->filteredSequenceStart);
+ }
+ sfree(self->sequence);
+ self->seq_blk = BlastSequenceBlkFree(self->seq_blk);
+}
+
+
+/** An instance of Kappa_ForbiddenRanges is used by the Smith-Waterman
+ * algorithm to represent ranges in the database that are not to be
+ * aligned.
+ */
+
+struct Kappa_ForbiddenRanges {
+ Int4 *numForbidden; /**< how many forbidden ranges at each db
+ position */
+ Int4 **ranges; /**< forbidden ranges for each database
+ position */
+ Int4 queryLength; /**< length of query. */
+};
+typedef struct Kappa_ForbiddenRanges Kappa_ForbiddenRanges;
+
+
+/** Initialize a new, empty Kappa_ForbiddenRanges
+ * @param self object to be initialized [in|out]
+ * @param queryLength length of the query [in]
+ */
+static void
+Kappa_ForbiddenRangesInitialize(
+ Kappa_ForbiddenRanges * self,
+ Int4 queryLength
+) {
+ Int4 f;
+ self->queryLength = queryLength;
+ self->numForbidden = (Int4 *) calloc(queryLength, sizeof(Int4));
+ self->ranges = (Int4 **) calloc(queryLength, sizeof(Int4 *));
+
+ for(f = 0; f < queryLength; f++) {
+ self->numForbidden[f] = 0;
+ self->ranges[f] = (Int4 *) calloc(2, sizeof(Int4));
+ self->ranges[f][0] = 0;
+ self->ranges[f][1] = 0;
+ }
+}
+
+
+/** Reset self to be empty
+ * @param self the object to be reset [in|out]
+ */
+static void
+Kappa_ForbiddenRangesClear(Kappa_ForbiddenRanges * self)
+{
+ Int4 f;
+ for(f = 0; f < self->queryLength; f++) {
+ self->numForbidden[f] = 0;
+ }
+}
+
+
+/** Add some ranges to self
+ * @param self object to be be "pushed" [in|out]
+ * @param queryStart start of the alignment in the query sequence [in]
+ * @param queryAlignmentExtent length of the alignment in the query sequence [in]
+ * @param matchStart start of the alignment in the subject sequence [in]
+ * @param matchAlignmentExtent length of the alignment in the subject sequence [in]
+ */
+static void
+Kappa_ForbiddenRangesPush(
+ Kappa_ForbiddenRanges * self,
+ Int4 queryStart, /* start of the alignment in the query sequence */
+ Int4 queryAlignmentExtent, /* length of the alignment in the query sequence */
+ Int4 matchStart, /* start of the alignment in the subject sequence */
+ Int4 matchAlignmentExtent) /* length of the alignment in the subject sequence */
+{
+ Int4 f;
+ for(f = queryStart; f < (queryStart + queryAlignmentExtent); f++) {
+ Int4 last = 2 * self->numForbidden[f];
+ if(0 != last) { /* we must resize the array */
+ self->ranges[f] =
+ (Int4 *) realloc(self->ranges[f], (last + 2) * sizeof(Int4));
+ }
+ self->ranges[f][last] = matchStart;
+ self->ranges[f][last + 1] = matchStart + matchAlignmentExtent;
+
+ self->numForbidden[f]++;
+ }
+}
+
+
+/** Release the storage associated with the fields of self, but do not
+ * delete self
+ * @param self the object whose storage will be released [in|out]
+ */
+static void
+Kappa_ForbiddenRangesRelease(Kappa_ForbiddenRanges * self)
+{
+ Int4 f;
+ for(f = 0; f < self->queryLength; f++) sfree(self->ranges[f]);
+
+ sfree(self->ranges); self->ranges = NULL;
+ sfree(self->numForbidden); self->numForbidden = NULL;
+}
+
+
+/** Redo a S-W alignment using an x-drop alignment. The result will
+ * usually be the same as the S-W alignment. The call to ALIGN
+ * attempts to force the endpoints of the alignment to match the
+ * optimal endpoints determined by the Smith-Waterman algorithm.
+ * ALIGN is used, so that if the data structures for storing BLAST
+ * alignments are changed, the code will not break
+ *
+ * @param query the query sequence [in]
+ * @param queryLength length of the query sequence [in]
+ * @param queryStart start of the alignment in the query sequence [in]
+ * @param queryEnd end of the alignment in the query sequence,
+ * as computed by the Smith-Waterman algorithm [in]
+ * @param match the subject (database) sequence [in]
+ * @param matchLength length of the subject sequence [in]
+ * @param matchStart start of the alignment in the subject sequence [in]
+ * @param matchEnd end of the alignment in the query sequence,
+ as computed by the Smith-Waterman algorithm [in]
+ * @param gap_align parameters for a gapped alignment [in]
+ * @param scoringParams Settings for gapped alignment.[in]
+ * @param score score computed by the Smith-Waterman algorithm [in]
+ * @param localScalingFactor the factor by which the
+ * scoring system has been scaled in order to obtain
+ * greater precision [in]
+ * @param * queryAlignmentExtent length of the alignment in the query sequence,
+ * as computed by the x-drop algorithm [out]
+ * @param * matchAlignmentExtent length of the alignment in the subject sequence,
+ * as computed by the x-drop algorithm [out]
+ * @param ** reverseAlignScript alignment information (script) returned by
+ a x-drop alignment algorithm [out]
+ * @param * newScore alignment score computed by the x-drop algorithm [out]
+ */
+static void
+Kappa_SWFindFinalEndsUsingXdrop(
+ Uint1* query, /* the query sequence */
+ Int4 queryLength, /* length of the query sequence */
+ Int4 queryStart, /* start of the alignment in the query sequence */
+ Int4 queryEnd, /* end of the alignment in the query sequence,
+ as computed by the Smith-Waterman algorithm */
+ Uint1* match, /* the subject (database) sequence */
+ Int4 matchLength, /* length of the subject sequence */
+ Int4 matchStart, /* start of the alignment in the subject sequence */
+ Int4 matchEnd, /* end of the alignment in the query sequence,
+ as computed by the Smith-Waterman algorithm */
+ BlastGapAlignStruct* gap_align, /* parameters for a gapped alignment */
+ const BlastScoringParameters* scoringParams, /* Settings for gapped alignment. */
+ Int4 score, /* score computed by the Smith-Waterman algorithm */
+ double localScalingFactor, /* the factor by which the
+ * scoring system has been
+ * scaled in order to obtain
+ * greater precision */
+ Int4 * queryAlignmentExtent, /* length of the alignment in the query sequence,
+ as computed by the x-drop algorithm */
+ Int4 * matchAlignmentExtent, /* length of the alignment in the subject sequence,
+ as computed by the x-drop algorithm */
+ Int4 ** reverseAlignScript, /* alignment information (script)
+ * returned by a x-drop alignment algorithm */
+ Int4 * newScore /* alignment score computed by the
+ x-drop algorithm */
+) {
+ Int4 XdropAlignScore; /* alignment score obtained using X-dropoff
+ * method rather than Smith-Waterman */
+ Int4 doublingCount = 0; /* number of times X-dropoff had to be
+ * doubled */
+ do {
+ Int4 *alignScript; /* the alignment script that will be
+ generated below by the ALIGN
+ routine. */
+
+ *reverseAlignScript = alignScript =
+ (Int4 *) calloc(matchLength, (queryLength + 3) * sizeof(Int4));
+
+ XdropAlignScore =
+ ALIGN_EX(&(query[queryStart]) - 1, &(match[matchStart]) - 1,
+ queryEnd - queryStart + 1, matchEnd - matchStart + 1,
+ *reverseAlignScript, queryAlignmentExtent, matchAlignmentExtent, &alignScript,
+ gap_align, scoringParams, queryStart - 1, FALSE, FALSE);
+
+ gap_align->gap_x_dropoff *= 2;
+ doublingCount++;
+ if((XdropAlignScore < score) && (doublingCount < 3)) {
+ sfree(*reverseAlignScript);
+ }
+ } while((XdropAlignScore < score) && (doublingCount < 3));
+
+ *newScore = BLAST_Nint(((double) XdropAlignScore) / localScalingFactor);
+}
+
+
+/** A Kappa_SearchParameters represents the data needed by
+ * RedoAlignmentCore to adjust the parameters of a search, including
+ * the original value of these parameters
+ */
+struct Kappa_SearchParameters {
+ Int4 gapOpen; /**< a penalty for the existence of a gap */
+ Int4 gapExtend; /**< a penalty for each residue (or nucleotide)
+ * in the gap */
+ Int4 gapDecline; /**< a penalty for declining to align a pair of
+ * residues */
+ Int4 mRows, nCols; /**< the number of rows an columns in a scoring
+ * matrix */
+ double scaledUngappedLambda; /**< The value of Karlin-Altchul
+ * parameter lambda, rescaled
+ * to allow scores to have
+ * greater precision */
+ Int4 **startMatrix, **origMatrix;
+ SFreqRatios* sFreqRatios; /**< Stores the frequency ratios along
+ * with their bit scale factor */
+ double **startFreqRatios; /**< frequency ratios to start
+ * investigating each pair */
+ double *scoreArray; /**< array of score probabilities */
+ double *resProb; /**< array of probabilities for each residue in
+ * a matching sequence */
+ double *queryProb; /**< array of probabilities for each residue in
+ * the query */
+ Boolean adjustParameters;
+
+ Blast_ScoreFreq* return_sfp; /**< score frequency pointers to
+ * compute lambda */
+ Blast_KarlinBlk *kbp_gap_orig, **orig_kbp_gap_array; /* FIXME, AS only had one * on orig_kbp_gap_array, check with him about this. */
+ double scale_factor; /**< The original scale factor (to be restored). */
+};
+typedef struct Kappa_SearchParameters Kappa_SearchParameters;
+
+
+/** Release the date associated with a Kappa_SearchParameters and
+ * delete the object
+ * @param searchParams the object to be deleted [in][out]
+*/
+static void
+Kappa_SearchParametersFree(Kappa_SearchParameters ** searchParams)
+{
+ /* for convenience, remove one level of indirection from searchParams */
+ Kappa_SearchParameters *sp = *searchParams;
+
+ if(sp->kbp_gap_orig) Blast_KarlinBlkDestruct(sp->kbp_gap_orig);
+
+ /* An extra row is added at end during allocation. */
+ if(sp->startMatrix) _PSIDeallocateMatrix((void**) sp->startMatrix, 1+sp->mRows);
+ if(sp->origMatrix) _PSIDeallocateMatrix((void**) sp->origMatrix, 1+sp->mRows);
+ if(sp->sFreqRatios) _PSIMatrixFrequencyRatiosFree(sp->sFreqRatios);
+/*
+ if(sp->startFreqRatios) freeStartFreqs(sp->startFreqRatios, sp->mRows);
+*/
+
+ if(sp->return_sfp) sfree(sp->return_sfp);
+ if(sp->scoreArray) sfree(sp->scoreArray);
+ if(sp->resProb) sfree(sp->resProb);
+ if(sp->queryProb) sfree(sp->queryProb);
+
+ sfree(*searchParams);
+ *searchParams = NULL;
+}
+
+
+/** Create a new instance of Kappa_SearchParameters
+ * @param number of rows in the scoring matrix [in]
+ * @param adjustParameters if true, use composition-based statistics [in]
+ * @param positionBased if true, the search is position-based [in]
+*/
+static Kappa_SearchParameters *
+Kappa_SearchParametersNew(
+ Int4 rows, /* number of rows in the scoring matrix */
+ Boolean adjustParameters, /* if true, use composition-based statistics */
+ Boolean positionBased /* if true, the search is position-based */
+) {
+ Kappa_SearchParameters *sp; /* the new object */
+ sp = malloc(sizeof(Kappa_SearchParameters));
+
+ sp->orig_kbp_gap_array = NULL;
+
+ sp->mRows = positionBased ? rows : BLASTAA_SIZE;
+ sp->nCols = BLASTAA_SIZE;
+
+ sp->kbp_gap_orig = NULL;
+ sp->startMatrix = NULL;
+ sp->origMatrix = NULL;
+ sp->sFreqRatios = NULL;
+ sp->startFreqRatios = NULL;
+ sp->return_sfp = NULL;
+ sp->scoreArray = NULL;
+ sp->resProb = NULL;
+ sp->queryProb = NULL;
+ sp->adjustParameters = adjustParameters;
+
+ if(adjustParameters) {
+ sp->kbp_gap_orig = Blast_KarlinBlkCreate();
+ sp->startMatrix = allocateScaledMatrix(sp->mRows);
+ sp->origMatrix = allocateScaledMatrix(sp->mRows);
+
+ sp->resProb =
+ (double *) calloc(BLASTAA_SIZE, sizeof(double));
+ sp->scoreArray =
+ (double *) calloc(scoreRange, sizeof(double));
+ sp->return_sfp =
+ (Blast_ScoreFreq*) calloc(1, sizeof(Blast_ScoreFreq));
+
+ if(!positionBased) {
+ sp->queryProb =
+ (double *) calloc(BLASTAA_SIZE, sizeof(double));
+ }
+ }
+ /* end if(adjustParameters) */
+
+ return sp;
+}
+
+
+/** Record the initial value of the search parameters that are to be
+ * adjusted.
+ * @param searchParams the object to be filled in [in|out]
+ * @param queryBlk query sequence [in]
+ * @param queryInfo query sequence information [in]
+ * @param sbp Scoring Blk (contains Karlin-Altschul parameters) [in]
+ * @param scoring gap-open/extend/decline_align information [in]
+ */
+static void
+Kappa_RecordInitialSearch(Kappa_SearchParameters * searchParams,
+ BLAST_SequenceBlk * queryBlk,
+ BlastQueryInfo* queryInfo,
+ BlastScoreBlk* sbp,
+ const BlastScoringParameters* scoring)
+{
+ Uint1* query; /* the query sequence */
+ Int4 queryLength; /* the length of the query sequence */
+ const Int4 k_context_offset = queryInfo->context_offsets[0]; /* offset in buffer of start of query. */
+
+ query = &queryBlk->sequence[k_context_offset];
+ queryLength = BLAST_GetQueryLength(queryInfo, 0);
+
+ if(searchParams->adjustParameters) {
+ Int4 i, j;
+ Blast_KarlinBlk* kbp; /* statistical parameters used to evaluate a
+ * query-subject pair */
+ Int4 **matrix; /* matrix used to score a local
+ query-subject alignment */
+ Boolean positionBased = FALSE; /* FIXME, how is this set in scoring options? */
+
+ if(positionBased) {
+ kbp = sbp->kbp_gap_psi[0];
+ matrix = sbp->posMatrix;
+ } else {
+ kbp = sbp->kbp_gap_std[0];
+ matrix = sbp->matrix;
+ Blast_FillResidueProbability(query, queryLength, searchParams->queryProb);
+ }
+ searchParams->gapOpen = scoring->gap_open;
+ searchParams->gapExtend = scoring->gap_extend;
+ searchParams->gapDecline = scoring->decline_align;
+ searchParams->scale_factor = scoring->scale_factor;
+
+ searchParams->orig_kbp_gap_array = sbp->kbp_gap;
+
+ searchParams->kbp_gap_orig->Lambda = kbp->Lambda;
+ searchParams->kbp_gap_orig->K = kbp->K;
+ searchParams->kbp_gap_orig->logK = kbp->logK;
+ searchParams->kbp_gap_orig->H = kbp->H;
+
+ for(i = 0; i < searchParams->mRows; i++) {
+ for(j = 0; j < BLASTAA_SIZE; j++) {
+ searchParams->origMatrix[i][j] = matrix[i][j];
+ }
+ }
+ }
+}
+
+/** Rescale the search parameters in the search object and options object to
+ * obtain more precision.
+ * @param sp record of parameters used and frequencies [in|out]
+ * @param queryBlk query sequence [in]
+ * @param queryInfo query sequence information [in]
+ * @param sbp Scoring Blk (contains Karlin-Altschul parameters) [in]
+ * @param scoring gap-open/extend/decline_align information [in]
+ * @return scaling-factor to be used.
+ */
+static double
+Kappa_RescaleSearch(Kappa_SearchParameters * sp,
+ BLAST_SequenceBlk* queryBlk,
+ BlastQueryInfo* queryInfo,
+ BlastScoreBlk* sbp,
+ BlastScoringParameters* scoringParams)
+{
+ double localScalingFactor; /* the factor by which to
+ * scale the scoring system in
+ * order to obtain greater
+ * precision */
+
+ if(!sp->adjustParameters) {
+ localScalingFactor = 1.0;
+ } else {
+ double initialUngappedLambda; /* initial value of the
+ * statistical parameter
+ * lambda used to evaluate
+ * ungapped alignments */
+ Blast_KarlinBlk* kbp; /* the statistical parameters used to
+ * evaluate alignments of a
+ * query-subject pair */
+ Uint1* query; /* the query sequence */
+ Int4 queryLength; /* the length of the query sequence */
+ Boolean positionBased=FALSE; /* FIXME, how is this set with options?? */
+
+ if((0 == strcmp(scoringParams->options->matrix, "BLOSUM62_20"))) {
+ localScalingFactor = SCALING_FACTOR / 10;
+ } else {
+ localScalingFactor = SCALING_FACTOR;
+ }
+
+ scoringParams->scale_factor = localScalingFactor;
+
+ scoringParams->gap_open = BLAST_Nint(sp->gapOpen * localScalingFactor);
+ scoringParams->gap_extend = BLAST_Nint(sp->gapExtend * localScalingFactor);
+ if(sp->gapDecline != INT2_MAX) {
+ scoringParams->decline_align =
+ BLAST_Nint(sp->gapDecline * localScalingFactor);
+ }
+
+ query = &queryBlk->sequence[0];
+ queryLength = BLAST_GetQueryLength(queryInfo, 0);
+ if(positionBased) {
+ sp->startFreqRatios =
+ getStartFreqRatios(sbp, query, scoringParams->options->matrix,
+ sbp->posFreqs, queryLength);
+/* FIXME scalePosMatrix(sp->startMatrix, sbp->matrix, scoringParams->options->matrix,
+ sbp->posFreqs, query, queryLength, sbp);
+*/
+ initialUngappedLambda = sbp->kbp_psi[0]->Lambda;
+ } else {
+/*
+ sp->startFreqRatios =
+ getStartFreqRatios(sbp, query, scoringParams->options->matrix, NULL,
+ PROTEIN_ALPHABET, FALSE);
+*/
+ sp->sFreqRatios = _PSIMatrixFrequencyRatiosNew(scoringParams->options->matrix);
+ sp->startFreqRatios = sp->sFreqRatios->data;
+ initialUngappedLambda = sbp->kbp_ideal->Lambda;
+ }
+ sp->scaledUngappedLambda = initialUngappedLambda / localScalingFactor;
+ if(!positionBased) {
+ computeScaledStandardMatrix(sp->startMatrix, scoringParams->options->matrix,
+ sp->scaledUngappedLambda);
+ }
+ if(positionBased) {
+ kbp = sbp->kbp_gap_psi[0];
+ } else {
+ kbp = sbp->kbp_gap_std[0];
+ }
+ kbp->Lambda /= localScalingFactor;
+ kbp->logK = log(kbp->K);
+ }
+
+ return localScalingFactor;
+}
+
+
+/*LambdaRatioLowerBound is used when the expected score is too large
+ * causing impalaKarlinLambdaNR to give a Lambda estimate that
+ * is too small, or to fail entirely returning -1*/
+#define LambdaRatioLowerBound 0.5
+
+/** Adjust the search parameters
+ * @param searchParams a record of the initial search parameters [in|out]
+ * @param queryLength length of query sequence [in]
+ * @param filteredSequence a filtered subject sequence [in]
+ * @param length length of the filtered sequence [in]
+ * @param matrix a scoring matrix to be adjusted [out]
+ * @return scaling-factor to be used.
+ */
+static Int4
+Kappa_AdjustSearch(
+ Kappa_SearchParameters * sp, /* a record of the initial search parameters */
+ Int4 queryLength, /* length of the query. */
+ Uint1* filteredSequence, /* a filtered subject sequence */
+ Int4 length, /* length of the filtered sequence */
+ Int4 ** matrix /* a scoring matrix to be adjusted */
+) {
+
+ double LambdaRatio; /* the ratio of the corrected lambda to the
+ * original lambda */
+ if(!sp->adjustParameters) {
+ LambdaRatio = 1.0;
+ } else {
+ /* do adjust the parameters */
+ Blast_ScoreFreq* this_sfp;
+ double correctUngappedLambda; /* new value of ungapped lambda */
+ Boolean positionBased=FALSE; /* FIXME */
+
+ /* compute and plug in new matrix here */
+ Blast_FillResidueProbability(filteredSequence, length, sp->resProb);
+
+ if(positionBased) {
+ this_sfp =
+ posfillSfp(sp->startMatrix, queryLength, sp->resProb, sp->scoreArray,
+ sp->return_sfp, scoreRange);
+ } else {
+ this_sfp =
+ notposfillSfp(sp->startMatrix, sp->resProb, sp->queryProb,
+ sp->scoreArray, sp->return_sfp, scoreRange);
+ }
+ correctUngappedLambda =
+ Blast_KarlinLambdaNR(this_sfp, sp->scaledUngappedLambda);
+
+ /* impalaKarlinLambdaNR will return -1 in the case where the
+ * expected score is >=0; however, because of the MAX statement 3
+ * lines below, LambdaRatio should always be > 0; the succeeding
+ * test is retained as a vestige, in case one wishes to remove the
+ * MAX statement and allow LambdaRatio to take on the error value
+ * -1 */
+
+ LambdaRatio = correctUngappedLambda / sp->scaledUngappedLambda;
+ LambdaRatio = MIN(1, LambdaRatio);
+ LambdaRatio = MAX(LambdaRatio, LambdaRatioLowerBound);
+
+ if(LambdaRatio > 0) {
+ scaleMatrix(matrix, sp->startMatrix, sp->startFreqRatios, sp->mRows,
+ sp->scaledUngappedLambda, LambdaRatio);
+ }
+ }
+ /* end else do adjust the parameters */
+
+ return LambdaRatio > 0 ? 0 : 1;
+}
+
+
+/** Restore the parameters that were adjusted to their original values
+ * @param searchParams a record of the original values [in]
+ * @param sbp Karlin-Altschul parameters to be restored. [out]
+ * @param matrix the scoring matrix to be restored [out]
+ * @param scoring the scoring parameters to be restored [out]
+*/
+static void
+Kappa_RestoreSearch(
+ Kappa_SearchParameters * searchParams,
+ BlastScoreBlk* sbp,
+ Int4 ** matrix,
+ BlastScoringParameters* scoring
+) {
+ if(searchParams->adjustParameters) {
+ Blast_KarlinBlk* kbp; /* statistical parameters used to
+ evaluate the significance of
+ alignment of a query-subject
+ pair */
+ Int4 i, j; /* loop variables. */
+ Boolean positionBased=FALSE; /* FIXME. */
+
+ scoring->gap_open = searchParams->gapOpen;
+ scoring->gap_extend = searchParams->gapExtend;
+ scoring->decline_align = searchParams->gapDecline;
+ scoring->scale_factor = searchParams->scale_factor;
+
+ sbp->kbp_gap = searchParams->orig_kbp_gap_array;
+
+ if(positionBased) {
+ kbp = sbp->kbp_gap_psi[0];
+ } else {
+ kbp = sbp->kbp_gap_std[0];
+ }
+ kbp->Lambda = searchParams->kbp_gap_orig->Lambda;
+ kbp->K = searchParams->kbp_gap_orig->K;
+ kbp->logK = searchParams->kbp_gap_orig->logK;
+ kbp->H = searchParams->kbp_gap_orig->H;
+
+ for(i = 0; i < searchParams->mRows; i++) {
+ for(j = 0; j < BLASTAA_SIZE; j++) {
+ matrix[i][j] = searchParams->origMatrix[i][j];
+ }
+ }
+ }
+}
+
+/** Gets best expect value of the list
+ *
+ * @param hsplist the list to be examined [in]
+ * @return the best (lowest) expect value found
+ */
+
+static double
+BlastHitsGetBestEvalue(BlastHSPList* hsplist)
+{
+ double retval = (double) INT4_MAX; /* return value */
+ Int4 index; /* loop iterator */
+
+ if (hsplist == NULL || hsplist->hspcnt == 0)
+ return retval;
+
+ for (index=0; index<hsplist->hspcnt; index++)
+ {
+ retval = MIN(retval, hsplist->hsp_array[index]->evalue);
+ }
+
+ return retval;
+}
+
+/** Save the results for one query, and clean the internal structure. */
+
+static Int2
+Blast_HSPResultsUpdateFromSWheap(SWheap* significantMatches,
+ Int4 query_index, Int4 hitlist_size,
+ BlastHSPResults* results)
+{
+ SWResults *SWAligns; /* All new alignments, concatenated
+ into a single, flat list */
+ if (query_index < 0)
+ return 0;
+
+ SWAligns = SWheapToFlatList(significantMatches);
+
+ results->hitlist_array[query_index] = Blast_HitListNew(hitlist_size);
+
+ if(SWAligns != NULL) {
+ newConvertSWalignsUpdateHitList(SWAligns,
+ results->hitlist_array[query_index]);
+ }
+ SWAligns = SWResultsFree(SWAligns);
+ /* Clean up */
+ SWheapRelease(significantMatches);
+ return 0;
+}
+
+Int2
+Kappa_RedoAlignmentCore(BLAST_SequenceBlk * queryBlk,
+ BlastQueryInfo* queryInfo,
+ BlastScoreBlk* sbp,
+ BlastHSPStream* hsp_stream,
+ const BlastSeqSrc* seqSrc,
+ BlastScoringParameters* scoringParams,
+ const BlastExtensionParameters* extendParams,
+ const BlastHitSavingParameters* hitsavingParams,
+ const PSIBlastOptions* psiOptions,
+ BlastHSPResults* results)
+{
+
+ const Uint1 k_program_name = blast_type_blastp;
+ Boolean adjustParameters = FALSE; /* If true take match composition into account
+ and seg match sequence. */
+ Boolean SmithWaterman = FALSE; /* USe smith-waterman to get scores.*/
+ Boolean positionBased=FALSE; /* FIXME, how is this determined? */
+ Int2 status=0; /* Return value. */
+ Uint1* query; /* the query sequence */
+ Int4 queryLength; /* the length of the query sequence */
+ double localScalingFactor; /* the factor by which to
+ * scale the scoring system in
+ * order to obtain greater
+ * precision */
+
+ Int4 **matrix; /* score matrix */
+ Blast_KarlinBlk* kbp; /* stores Karlin-Altschul parameters */
+ BlastGapAlignStruct* gapAlign; /* keeps track of gapped alignment params */
+
+ Kappa_SearchParameters *searchParams; /* the values of the search
+ * parameters that will be
+ * recorded, altered in the
+ * search structure in this
+ * routine, and then restored
+ * before the routine
+ * exits. */
+ Kappa_ForbiddenRanges forbidden; /* forbidden ranges for each
+ * database position (used in
+ * Smith-Waterman alignments) */
+ SWheap significantMatches; /* a collection of alignments of the
+ * query sequence with sequences from
+ * the database */
+
+ BlastExtensionOptions* extendOptions=NULL; /* Options for extension. */
+ BlastHitSavingOptions* hitsavingOptions=NULL; /* Options for saving hits. */
+ BlastHSPList* thisMatch = NULL;
+ Int4 current_query_index;
+
+ /* Get pointer to options for extensions and hitsaving. */
+ if (extendParams == NULL || (extendOptions=extendParams->options) == NULL)
+ return -1;
+
+ if (hitsavingParams == NULL || (hitsavingOptions=hitsavingParams->options) == NULL)
+ return -1;
+
+ if (extendParams->options->eTbackExt == eSmithWatermanTbck)
+ SmithWaterman = TRUE;
+
+ adjustParameters = extendParams->options->compositionBasedStats;
+
+ sbp->kbp_ideal = Blast_KarlinBlkIdealCalc(sbp);
+
+
+ /**** Validate parameters *************/
+ if(0 == strcmp(scoringParams->options->matrix, "BLOSUM62_20") && !adjustParameters) {
+ return 0; /* BLOSUM62_20 only makes sense if
+ * adjustParameters is on */
+ }
+ /*****************/
+ query = &queryBlk->sequence[0];
+ queryLength = BLAST_GetQueryLength(queryInfo, 0);
+
+ if(SmithWaterman) {
+ Kappa_ForbiddenRangesInitialize(&forbidden, queryLength);
+ }
+
+ if ((status=BLAST_GapAlignStructNew(scoringParams, extendParams,
+ BLASTSeqSrcGetMaxSeqLen(seqSrc), sbp, &gapAlign)) != 0)
+ return status;
+
+ if(positionBased) {
+ kbp = sbp->kbp_gap_psi[0];
+ matrix = sbp->posMatrix;
+ if(sbp->posFreqs == NULL) {
+ sbp->posFreqs = (double**) _PSIAllocateMatrix(queryLength, BLASTAA_SIZE, sizeof(double));
+ }
+ } else {
+ kbp = sbp->kbp_gap_std[0];
+ matrix = sbp->matrix;
+ }
+
+ /* Initialize searchParams */
+ searchParams =
+ Kappa_SearchParametersNew(queryLength, adjustParameters,
+ positionBased);
+ Kappa_RecordInitialSearch(searchParams, queryBlk, queryInfo, sbp, scoringParams);
+
+ localScalingFactor = Kappa_RescaleSearch(searchParams, queryBlk, queryInfo, sbp, scoringParams);
+
+ /* Initialize current query index to -1, so index 0 would indicate a new
+ query. */
+ current_query_index = -1;
+
+ while (BlastHSPStreamRead(hsp_stream, &thisMatch) != kBlastHSPStream_Eof) {
+ /* for all matching sequences */
+ Kappa_MatchingSequence matchingSeq; /* the data for a matching
+ * database sequence */
+
+ if(thisMatch->hsp_array == NULL) {
+ continue;
+ }
+
+ if (thisMatch->query_index != current_query_index) {
+ /* This HSP list is for a new query sequence. Save results for
+ the previous query. */
+ Blast_HSPResultsUpdateFromSWheap(&significantMatches,
+ current_query_index, hitsavingOptions->hitlist_size, results);
+ SWheapInitialize(&significantMatches, hitsavingOptions->hitlist_size,
+ hitsavingOptions->hitlist_size,
+ psiOptions->inclusion_ethresh);
+ current_query_index = thisMatch->query_index;
+ }
+
+ if(SWheapWillAcceptOnlyBelowCutoff(&significantMatches)) {
+ /* Only matches with evalue <= psiOptions->inclusion_ethresh will be saved */
+
+ /* e-value for a sequence is the smallest e-value among the hsps
+ * matching a region of the sequence to the query */
+ double minEvalue = BlastHitsGetBestEvalue(thisMatch); /* FIXME, do we have this on new structures? */
+ if(minEvalue > (EVALUE_STRETCH * psiOptions->inclusion_ethresh)) {
+ /* This match is likely to have an evalue > options->inclusion_ethresh
+ * and therefore, we assume that all other matches with higher
+ * input evalues are also unlikely to get sufficient
+ * improvement in a redone alignment */
+ break;
+ }
+ }
+ /* Get the sequence for this match */
+ Kappa_MatchingSequenceInitialize(&matchingSeq, seqSrc,
+ thisMatch->oid);
+
+ if(0 == Kappa_AdjustSearch(searchParams, queryLength,
+ matchingSeq.filteredSequence,
+ matchingSeq.length, matrix)) {
+ /* Kappa_AdjustSearch ran without error. Compute the new alignments. */
+ if(SmithWaterman) {
+ /* We are performing a Smith-Waterman alignment */
+ double newSwEvalue; /* the evalue computed by the SW algorithm */
+ Int4 aSwScore; /* a score computed by the SW algorithm */
+ Int4 matchStart, queryStart; /* Start positions of a local
+ * S-W alignment */
+ Int4 queryEnd, matchEnd; /* End positions of a local
+ * S-W alignment */
+
+ Kappa_ForbiddenRangesClear(&forbidden);
+
+ newSwEvalue =
+ BLbasicSmithWatermanScoreOnly(matchingSeq.filteredSequence,
+ matchingSeq.length, query,
+ queryLength, matrix,
+ scoringParams->gap_open,
+ scoringParams->gap_extend, &matchEnd,
+ &queryEnd, &aSwScore, kbp,
+ queryInfo->eff_searchsp_array[0],
+ positionBased);
+
+ if(newSwEvalue <= hitsavingOptions->expect_value &&
+ SWheapWouldInsert(&significantMatches, newSwEvalue ) ) {
+ /* The initial local alignment is significant. Continue the
+ * computation */
+ Kappa_MatchRecord aSwMatch; /* the newly computed
+ * alignments of the query to
+ * the current database
+ * sequence */
+
+ Kappa_MatchRecordInitialize(&aSwMatch, newSwEvalue, aSwScore,
+ matchingSeq.sequence,
+ thisMatch->oid);
+
+ BLSmithWatermanFindStart(matchingSeq.filteredSequence,
+ matchingSeq.length, query, matrix,
+ scoringParams->gap_open,
+ scoringParams->gap_extend, matchEnd, queryEnd,
+ aSwScore, &matchStart, &queryStart,
+ positionBased);
+
+ do {
+ /* score computed by an x-drop alignment (usually the same
+ * as aSwScore */
+ Int4 newXdropScore;
+ /* Lengths of the alignment as recomputed by an x-drop alignment,
+ in the query and the match*/
+ Int4 queryAlignmentExtent, matchAlignmentExtent;
+ /* Alignment information (script) returned by a x-drop
+ * alignment algorithm */
+ Int4 *reverseAlignScript=NULL;
+
+ gapAlign->gap_x_dropoff =
+ (Int4) (extendParams->gap_x_dropoff_final * localScalingFactor);
+
+ Kappa_SWFindFinalEndsUsingXdrop(query, queryLength, queryStart,
+ queryEnd,
+ matchingSeq.filteredSequence,
+ matchingSeq.length, matchStart,
+ matchEnd, gapAlign, scoringParams,
+ aSwScore, localScalingFactor,
+ &queryAlignmentExtent, &matchAlignmentExtent,
+ &reverseAlignScript,
+ &newXdropScore);
+
+ Kappa_MatchRecordInsertSwAlign(&aSwMatch, newXdropScore,
+ newSwEvalue, kbp->Lambda,
+ kbp->logK, localScalingFactor,
+ matchStart, matchAlignmentExtent,
+ queryStart, queryAlignmentExtent,
+ reverseAlignScript, query);
+ sfree(reverseAlignScript);
+
+ Kappa_ForbiddenRangesPush(&forbidden, queryStart, queryAlignmentExtent,
+ matchStart, matchAlignmentExtent);
+ if(thisMatch->hspcnt > 1) {
+ /* There are more HSPs */
+ newSwEvalue =
+ BLspecialSmithWatermanScoreOnly(matchingSeq.filteredSequence,
+ matchingSeq.length, query,
+ queryLength, matrix,
+ scoringParams->gap_open,
+ scoringParams->gap_extend,
+ &matchEnd, &queryEnd,
+ &aSwScore, kbp,
+ queryInfo->eff_searchsp_array[0],
+ forbidden.numForbidden,
+ forbidden.ranges,
+ positionBased);
+
+ if(newSwEvalue <= hitsavingOptions->expect_value) {
+ /* The next local alignment is significant */
+ BLspecialSmithWatermanFindStart(matchingSeq.filteredSequence,
+ matchingSeq.length, query,
+ matrix,
+ scoringParams->gap_open,
+ scoringParams->gap_extend,
+ matchEnd, queryEnd, aSwScore,
+ &matchStart, &queryStart,
+ forbidden.numForbidden,
+ forbidden.ranges,
+ positionBased);
+ }
+ /* end if the next local alignment is significant */
+ }
+ /* end if there are more HSPs */
+ } while(thisMatch->hspcnt > 1 &&
+ newSwEvalue <= hitsavingOptions->expect_value);
+ /* end do..while there are more HSPs and the next local alignment
+ * is significant */
+
+ SWheapInsert(&significantMatches, &aSwMatch);
+ }
+ /* end if the initial local alignment is significant */
+ } else {
+ /* We are not doing a Smith-Waterman alignment */
+ gapAlign->gap_x_dropoff =
+ (Int4) (extendParams->gap_x_dropoff_final * localScalingFactor);
+ /* recall that index is the counter corresponding to
+ * thisMatch; by aliasing, thisMatch will get updated during
+ * the following call to BlastGetGapAlgnTbck, so that
+ * thisMatch stores newly computed alignments between the
+ * query and the matching sequence number index */
+ if ((status=Blast_TracebackFromHSPList(k_program_name, thisMatch, queryBlk,
+ matchingSeq.seq_blk, queryInfo, gapAlign, sbp, scoringParams,
+ extendOptions, hitsavingParams, NULL)) != 0)
+ return status;
+
+ if(thisMatch->hspcnt) {
+ /* There are alignments of the query to this matching sequence */
+ double bestEvalue = BlastHitsGetBestEvalue(thisMatch);
+
+ if(bestEvalue <= hitsavingOptions->expect_value &&
+ SWheapWouldInsert(&significantMatches, bestEvalue ) ) {
+ /* The best alignment is significant */
+ Int4 alignIndex; /* Iteration index */
+ Int4 numNewAlignments; /* the number of alignments
+ * just computed */
+ Kappa_MatchRecord matchRecord; /* the newly computed
+ * alignments of the
+ * query to the
+ * current database
+ * sequence */
+ Int4 bestScore; /* the score of the highest
+ * scoring alignment */
+ numNewAlignments = thisMatch->hspcnt;
+ bestScore =
+ (Int4) BLAST_Nint(((double) thisMatch->hsp_array[0]->score) /
+ localScalingFactor);
+
+ Kappa_MatchRecordInitialize(&matchRecord, bestEvalue, bestScore,
+ matchingSeq.sequence,
+ thisMatch->oid);
+
+
+ for(alignIndex = 0; alignIndex < numNewAlignments; alignIndex++) {
+ Kappa_MatchRecordInsertHSP(&matchRecord,
+ &(thisMatch->hsp_array[alignIndex]),
+ kbp->Lambda, kbp->logK,
+ localScalingFactor, query);
+ }
+ /* end for all alignments of this matching sequence */
+ SWheapInsert(&significantMatches, &matchRecord);
+ }
+ /* end if the best alignment is significant */
+ }
+ /* end there are alignments of the query to this matching sequence */
+ }
+ /* end else we are not doing a Smith-Waterman alignment */
+ }
+ /* end if Kappa_AdjustSearch ran without error */
+ Kappa_MatchingSequenceRelease(&matchingSeq);
+
+ }
+ /* end for all matching sequences */
+
+ /* Save results for the last query, which were not saved inside the loop. */
+ Blast_HSPResultsUpdateFromSWheap(&significantMatches,
+ current_query_index, hitsavingOptions->hitlist_size, results);
+
+ if(SmithWaterman)
+ Kappa_ForbiddenRangesRelease(&forbidden);
+ Kappa_RestoreSearch(searchParams, sbp, matrix, scoringParams);
+
+ Kappa_SearchParametersFree(&searchParams);
+
+ gapAlign = BLAST_GapAlignStructFree(gapAlign);
+
+ return 0;
+}
diff --git a/algo/blast/core/blast_kappa.h b/algo/blast/core/blast_kappa.h
new file mode 100644
index 00000000..4d6e3e90
--- /dev/null
+++ b/algo/blast/core/blast_kappa.h
@@ -0,0 +1,116 @@
+/* $Id: blast_kappa.h,v 1.7 2004/06/16 14:53:03 dondosha Exp $
+ * ===========================================================================
+ *
+ * PUBLIC DOMAIN NOTICE
+ * National Center for Biotechnology Information
+ *
+ * This software/database is a "United States Government Work" under the
+ * terms of the United States Copyright Act. It was written as part of
+ * the author's official duties as a United States Government employee and
+ * thus cannot be copyrighted. This software/database is freely available
+ * to the public for use. The National Library of Medicine and the U.S.
+ * Government have not placed any restriction on its use or reproduction.
+ *
+ * Although all reasonable efforts have been taken to ensure the accuracy
+ * and reliability of the software and data, the NLM and the U.S.
+ * Government do not and cannot warrant the performance or results that
+ * may be obtained by using this software or data. The NLM and the U.S.
+ * Government disclaim all warranties, express or implied, including
+ * warranties of performance, merchantability or fitness for any particular
+ * purpose.
+ *
+ * Please cite the author in any work or product based on this material.
+ *
+ * ===========================================================================
+ *
+ * Author: Alejandro Schaffer
+ *
+ */
+
+/** @file blast_kappa.h
+ * Header file for composition-based statistics
+ * @todo FIXME needs doxygen comments
+ */
+
+#ifndef __BLAST_KAPPA__
+#define __BLAST_KAPPA__
+
+#include <algo/blast/core/blast_stat.h>
+#include <algo/blast/core/blast_hits.h>
+#include <algo/blast/core/blast_hspstream.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/** Top level routine to recompute alignments for each
+ * match found by the gapped BLAST algorithm
+ * A linked list of alignments is returned (param hitList); the alignments
+ * are sorted according to the lowest E-value of the best alignment for each
+ * matching sequence; alignments for the same matching sequence
+ * are in the list consecutively regardless of the E-value of the
+ * secondary alignments. Ties in sorted order are much rarer than
+ * for the standard BLAST method, but are broken deterministically
+ * based on the index of the matching sequences in the database.
+ * @param queryBlk query sequence [in]
+ * @param query_info query information [in]
+ * @param sbp (Karlin-Altschul) information for search [in]
+ * @param seqSrc used to fetch database/match sequences [in]
+ * @param scoringParams parameters used for scoring (matrix, gap costs etc.) [in]
+ * @param extendParams parameters used for extension [in]
+ * @param hitsavingParams parameters used for saving hits [in]
+ * @param psiOptions options related to psi-blast [in]
+ * @param results All HSP results from previous stages of the search [in] [out]
+ * @return 0 on success, otherwise failure.
+*/
+
+Int2
+Kappa_RedoAlignmentCore(BLAST_SequenceBlk * queryBlk,
+ BlastQueryInfo* query_info,
+ BlastScoreBlk* sbp,
+ BlastHSPStream* hsp_stream,
+ const BlastSeqSrc* seq_src,
+ BlastScoringParameters* scoringParams,
+ const BlastExtensionParameters* extendParams,
+ const BlastHitSavingParameters* hitsavingParams,
+ const PSIBlastOptions* psi_options,
+ BlastHSPResults* results);
+
+#ifdef __cplusplus
+
+}
+#endif
+
+/*
+ * ===========================================================================
+ *
+ * $Log: blast_kappa.h,v $
+ * Revision 1.7 2004/06/16 14:53:03 dondosha
+ * Moved extern "C" after the #includes
+ *
+ * Revision 1.6 2004/06/08 15:09:33 dondosha
+ * Use BlastHSPStream interface in the engine instead of saving hits directly
+ *
+ * Revision 1.5 2004/05/24 17:27:37 madden
+ * Doxygen fix
+ *
+ * Revision 1.4 2004/05/20 15:20:57 madden
+ * Doxygen compliance fixes
+ *
+ * Revision 1.3 2004/05/19 17:03:20 madden
+ * Remove (to blast_kappa.c) #defines for Xchar and StarChar
+ *
+ * Revision 1.2 2004/05/19 14:52:01 camacho
+ * 1. Added doxygen tags to enable doxygen processing of algo/blast/core
+ * 2. Standardized copyright, CVS $Id string, $Log and rcsid formatting and i
+ * location
+ * 3. Added use of @todo doxygen keyword
+ *
+ * Revision 1.1 2004/05/18 13:22:33 madden
+ * SmithWaterman and composition-based stats code
+ *
+ * ===========================================================================
+ */
+
+#endif /* __BLAST_KAPPA__ */
+
diff --git a/algo/blast/core/blast_lookup.c b/algo/blast/core/blast_lookup.c
index 24b33543..4dc0862c 100644
--- a/algo/blast/core/blast_lookup.c
+++ b/algo/blast/core/blast_lookup.c
@@ -1,30 +1,32 @@
-/* $Id: blast_lookup.c,v 1.24 2004/04/27 15:56:53 coulouri Exp $
-
-* ===========================================================================
-*
-* PUBLIC DOMAIN NOTICE
-* National Center for Biotechnology Information
-*
-* This software/database is a "United States Government Work" under the
-* terms of the United States Copyright Act. It was written as part of
-* the author's offical duties as a United States Government employee and
-* thus cannot be copyrighted. This software/database is freely available
-* to the public for use. The National Library of Medicine and the U.S.
-* Government have not placed any restriction on its use or reproduction.
-*
-* Although all reasonable efforts have been taken to ensure the accuracy
-* and reliability of the software and data, the NLM and the U.S.
-* Government do not and cannot warrant the performance or results that
-* may be obtained by using this software or data. The NLM and the U.S.
-* Government disclaim all warranties, express or implied, including
-* warranties of performance, merchantability or fitness for any particular
-* purpose.
-*
-* Please cite the author in any work or product based on this material.
-*
-* ===========================================================================
-
-*/
+/* $Id: blast_lookup.c,v 1.25 2004/05/19 14:52:02 camacho Exp $
+ * ===========================================================================
+ *
+ * PUBLIC DOMAIN NOTICE
+ * National Center for Biotechnology Information
+ *
+ * This software/database is a "United States Government Work" under the
+ * terms of the United States Copyright Act. It was written as part of
+ * the author's offical duties as a United States Government employee and
+ * thus cannot be copyrighted. This software/database is freely available
+ * to the public for use. The National Library of Medicine and the U.S.
+ * Government have not placed any restriction on its use or reproduction.
+ *
+ * Although all reasonable efforts have been taken to ensure the accuracy
+ * and reliability of the software and data, the NLM and the U.S.
+ * Government do not and cannot warrant the performance or results that
+ * may be obtained by using this software or data. The NLM and the U.S.
+ * Government disclaim all warranties, express or implied, including
+ * warranties of performance, merchantability or fitness for any particular
+ * purpose.
+ *
+ * Please cite the author in any work or product based on this material.
+ *
+ * ===========================================================================
+ */
+
+/** @file blast_lookup.c
+ * @todo FIXME needs file description
+ */
#include <algo/blast/core/blast_def.h>
#include <algo/blast/core/blast_options.h>
@@ -34,7 +36,8 @@
#include <algo/blast/core/blast_encoding.h>
#include "blast_inline.h"
-static char const rcsid[] = "$Id: blast_lookup.c,v 1.24 2004/04/27 15:56:53 coulouri Exp $";
+static char const rcsid[] =
+ "$Id: blast_lookup.c,v 1.25 2004/05/19 14:52:02 camacho Exp $";
static void AddWordHits( LookupTable *lookup,
Int4** matrix,
diff --git a/algo/blast/core/blast_lookup.h b/algo/blast/core/blast_lookup.h
index 408f6f65..3153b838 100644
--- a/algo/blast/core/blast_lookup.h
+++ b/algo/blast/core/blast_lookup.h
@@ -1,30 +1,33 @@
-/* $Id: blast_lookup.h,v 1.7 2004/04/06 16:34:37 papadopo Exp $
-
-* ===========================================================================
-*
-* PUBLIC DOMAIN NOTICE
-* National Center for Biotechnology Information
-*
-* This software/database is a "United States Government Work" under the
-* terms of the United States Copyright Act. It was written as part of
-* the author's offical duties as a United States Government employee and
-* thus cannot be copyrighted. This software/database is freely available
-* to the public for use. The National Library of Medicine and the U.S.
-* Government have not placed any restriction on its use or reproduction.
-*
-* Although all reasonable efforts have been taken to ensure the accuracy
-* and reliability of the software and data, the NLM and the U.S.
-* Government do not and cannot warrant the performance or results that
-* may be obtained by using this software or data. The NLM and the U.S.
-* Government disclaim all warranties, express or implied, including
-* warranties of performance, merchantability or fitness for any particular
-* purpose.
-*
-* Please cite the author in any work or product based on this material.
-*
-* ===========================================================================
+/* $Id: blast_lookup.h,v 1.10 2004/06/04 15:16:01 papadopo Exp $
+ * ===========================================================================
+ *
+ * PUBLIC DOMAIN NOTICE
+ * National Center for Biotechnology Information
+ *
+ * This software/database is a "United States Government Work" under the
+ * terms of the United States Copyright Act. It was written as part of
+ * the author's offical duties as a United States Government employee and
+ * thus cannot be copyrighted. This software/database is freely available
+ * to the public for use. The National Library of Medicine and the U.S.
+ * Government have not placed any restriction on its use or reproduction.
+ *
+ * Although all reasonable efforts have been taken to ensure the accuracy
+ * and reliability of the software and data, the NLM and the U.S.
+ * Government do not and cannot warrant the performance or results that
+ * may be obtained by using this software or data. The NLM and the U.S.
+ * Government disclaim all warranties, express or implied, including
+ * warranties of performance, merchantability or fitness for any particular
+ * purpose.
+ *
+ * Please cite the author in any work or product based on this material.
+ *
+ * ===========================================================================
+ */
-*/
+/** @file blast_lookup.h
+ * Contains definitions and prototypes for the lookup table
+ * construction and scanning phase of blastn, blastp, RPS blast
+ */
#include <algo/blast/core/blast_def.h>
#include <algo/blast/core/blast_options.h>
@@ -41,69 +44,96 @@ extern "C" {
/* some defines for the pv_array, as this changes from 32-bit to 64-bit systems. */
#if defined(LONG_BIT) && LONG_BIT==64
-#define PV_ARRAY_TYPE Uint8 /* The pv_array 'native' type. */
-#define PV_ARRAY_BYTES 8 /* number of BYTES in 'native' type. */
-#define PV_ARRAY_BTS 6 /* bits-to-shift from lookup_index to pv_array index. */
-#define PV_ARRAY_MASK 63 /* amount to mask off. */
+#define PV_ARRAY_TYPE Uint8 /**< The pv_array 'native' type. */
+#define PV_ARRAY_BYTES 8 /**< number of BYTES in 'native' type. */
+#define PV_ARRAY_BTS 6 /**< bits-to-shift from lookup_index to pv_array index. */
+#define PV_ARRAY_MASK 63 /**< amount to mask off. */
#else
-#define PV_ARRAY_TYPE Uint4 /* The pv_array 'native' type. */
-#define PV_ARRAY_BYTES 4 /* number of BYTES in 'native' type. */
-#define PV_ARRAY_BTS 5 /* bits-to-shift from lookup_index to pv_array index. */
-#define PV_ARRAY_MASK 31 /* amount to mask off. */
+#define PV_ARRAY_TYPE Uint4 /**< The pv_array 'native' type. */
+#define PV_ARRAY_BYTES 4 /**< number of BYTES in 'native' type. */
+#define PV_ARRAY_BTS 5 /**< bits-to-shift from lookup_index to pv_array index. */
+#define PV_ARRAY_MASK 31 /**< amount to mask off. */
#endif
+/** Set the bit at position 'index' in the PV
+ * array bitfield within 'lookup'
+ */
#define PV_SET(lookup, index) ( (lookup)->pv[(index)>>PV_ARRAY_BTS] |= 1 << ((index) & PV_ARRAY_MASK) )
-#define PV_TEST(lookup, index) ( (lookup)->pv[(index)>>PV_ARRAY_BTS] & 1 << ((index) & PV_ARRAY_MASK) )
-
-/* Number of bits to shift in lookup index calculation when scanning compressed
- * nucleotide sequence
+/** Test the bit at position 'index' in the PV
+ * array bitfield within 'lookup'
*/
-#define FULL_BYTE_SHIFT 8
+#define PV_TEST(lookup, index) ( (lookup)->pv[(index)>>PV_ARRAY_BTS] & 1 << ((index) & PV_ARRAY_MASK) )
- /* structure defining one cell of the compacted lookup table */
- /* stores the number of hits and
- up to three hits if the total number of hits is <= 3
- or
- a pointer to more hits if the total number of hits is > 3
- */
+#define FULL_BYTE_SHIFT 8 /**< Number of bits to shift in lookup
+ index calculation when scanning
+ compressed nucleotide sequence */
-#define HITS_ON_BACKBONE 3
+#define HITS_ON_BACKBONE 3 /**< maximum number of hits in one lookup
+ table cell */
- typedef struct LookupBackboneCell {
- Int4 num_used; /* num valid positions */
+/** structure defining one cell of the compacted lookup table */
+typedef struct LookupBackboneCell {
+ Int4 num_used; /**< number of hits stored for this cell */
union {
- Int4 overflow_cursor;
- Int4 entries[HITS_ON_BACKBONE];
+ Int4 overflow_cursor; /**< integer offset into the overflow array
+ where the list of hits for this cell begins */
+ Int4 entries[HITS_ON_BACKBONE]; /**< if the number of hits for this
+ cell is HITS_ON_BACKBONE or less,
+ the hits are all stored directly in
+ the cell */
} payload;
- } LookupBackboneCell;
+} LookupBackboneCell;
- typedef struct LookupTable {
- Int4 threshold; /* the score threshold for neighboring words */
- Int4 neighbor_matches; /* the number of neighboring words found while indexing the queries, used for informational/debugging purposes */
- Int4 exact_matches; /* the number of exact matches found while indexing the queries, used for informational/debugging purposes */
- Int4 mask; /* part of index to mask off, that is, top (wordsize*charsize) bits should be discarded. */
- Int4 word_length; /* Length in bases of the full word match required to
- trigger extension */
- Int4 wordsize; /* number of full bytes in a full word */
- Int4 reduced_wordsize; /* number of bytes in a word stored in the LT */
- Int4 charsize; /* number of bits for a base/residue */
- Int4 scan_step; /* number of bases between successive words */
- Int4 alphabet_size; /* number of letters in the alphabet */
- Int4 backbone_size; /* number of cells in the backbone */
- Int4 longest_chain; /* length of the longest chain on the backbone */
- Int4 ** thin_backbone; /* the "thin" backbone. for each index cell, maintain a pointer to a dynamically-allocated chain of hits. */
- LookupBackboneCell * thick_backbone; /* the "thick" backbone. after queries are indexed, compact the backbone to put at most HITS_ON_BACKBONE hits on the backbone, otherwise point to some overflow storage */
- Int4 * overflow; /* the overflow array for the compacted lookup table */
- Int4 overflow_size; /* Number of elements in the overflow array (above). */
- PV_ARRAY_TYPE *pv; /* presence vector. a bit vector indicating which cells are occupied */
- Uint1* neighbors; /* neighboring word array */
- Int4 neighbors_length; /* length of neighboring word array */
- Boolean use_pssm; /* if True use PSSM rather than (protein) sequence to construct lookup table. */
+/** The basic lookup table structure for blastn
+ * and blastp searches
+ */
+typedef struct LookupTable {
+ Int4 threshold; /**< the score threshold for neighboring words */
+ Int4 neighbor_matches; /**< the number of neighboring words found while
+ indexing the queries, used for informational/
+ debugging purposes */
+ Int4 exact_matches; /**< the number of exact matches found while
+ indexing the queries, used for informational/
+ debugging purposes */
+ Int4 mask; /**< part of index to mask off, that is, top
+ (wordsize*charsize) bits should be discarded. */
+ Int4 word_length; /**< Length in bases of the full word match
+ required to trigger extension */
+ Int4 wordsize; /**< number of full bytes in a full word */
+ Int4 reduced_wordsize; /**< number of bytes in a word stored in the LUT */
+ Int4 charsize; /**< number of bits for a base/residue */
+ Int4 scan_step; /**< number of bases between successive words */
+ Int4 alphabet_size; /**< number of letters in the alphabet */
+ Int4 backbone_size; /**< number of cells in the backbone */
+ Int4 longest_chain; /**< length of the longest chain on the backbone */
+ Int4 ** thin_backbone; /**< the "thin" backbone. for each index cell,
+ maintain a pointer to a dynamically-allocated
+ chain of hits. */
+ LookupBackboneCell * thick_backbone; /**< the "thick" backbone. after
+ queries are indexed, compact the
+ backbone to put at most
+ HITS_ON_BACKBONE hits on the
+ backbone, otherwise point to
+ some overflow storage */
+ Int4 * overflow; /**< the overflow array for the compacted
+ lookup table */
+ Int4 overflow_size; /**< Number of elements in the overflow array */
+ PV_ARRAY_TYPE *pv; /**< Presence vector bitfield; bit positions that
+ are set indicate that the corresponding thick
+ backbone cell contains hits */
+ Uint1* neighbors; /**< neighboring word array, used during lookup
+ table construction to hold the complete set
+ of subject words that can occur during the
+ search*/
+ Int4 neighbors_length; /**< length of neighboring word array */
+ Boolean use_pssm; /**< if TRUE, lookup table construction will assume
+ that the underlying score matrix is position-
+ specific */
} LookupTable;
/** Create a mapping from word w to the supplied query offset
@@ -140,11 +170,11 @@ Int4 _BlastAaLookupFinalize(LookupTable* lookup);
* @param array_size length of the offset arrays [in]
* @return The number of hits found.
*/
-Int4 BlastAaScanSubject(const LookupTableWrap* lookup_wrap, /* in: the LUT */
+Int4 BlastAaScanSubject(const LookupTableWrap* lookup_wrap,
const BLAST_SequenceBlk *subject,
Int4* offset,
- Uint4 * NCBI_RESTRICT query_offsets, /* out: pointer to the array to which hits will be copied */
- Uint4 * NCBI_RESTRICT subject_offsets, /* out : pointer to the array where offsets will be stored */
+ Uint4 * NCBI_RESTRICT query_offsets,
+ Uint4 * NCBI_RESTRICT subject_offsets,
Int4 array_size);
/**
@@ -162,16 +192,17 @@ Int4 BlastAaScanSubject(const LookupTableWrap* lookup_wrap, /* in: the LUT */
* @param array_size length of the offset arrays [in]
* @return The number of hits found.
*/
-Int4 BlastRPSScanSubject(const LookupTableWrap* lookup_wrap, /* in: the LUT */
+Int4 BlastRPSScanSubject(const LookupTableWrap* lookup_wrap,
const BLAST_SequenceBlk *sequence,
Int4* offset,
- Uint4 * table_offsets, /* out : pointer to the array where offsets will be stored */
- Uint4 * sequence_offsets, /* out: pointer to the array to which hits will be copied */
+ Uint4 * table_offsets,
+ Uint4 * sequence_offsets,
Int4 array_size);
/** Create a new protein lookup table.
* @param opt pointer to lookup table options structure [in]
* @param lut handle to lookup table structure [in/modified]
+ * @return 0 if successful, nonzero on failure
*/
Int4 BlastAaLookupNew(const LookupTableOptions* opt, LookupTable* * lut);
@@ -181,12 +212,16 @@ Int4 BlastAaLookupNew(const LookupTableOptions* opt, LookupTable* * lut);
* @param opt pointer to lookup table options structure [in]
* @param lut handle to lookup table [in/modified]
* @param is_protein boolean indicating protein or nucleotide [in]
+ * @return 0 if successful, nonzero on failure
*/
Int4 LookupTableNew(const LookupTableOptions* opt, LookupTable* * lut,
Boolean is_protein);
-/** Free the lookup table. */
+/** Free the lookup table.
+ * @param lookup The lookup table structure to be frees
+ * @return NULL
+ */
LookupTable* LookupTableDestruct(LookupTable* lookup);
/** Index an array of queries.
@@ -253,52 +288,70 @@ Int4 AddNeighboringWords(LookupTable* lookup,
Int4 offset,
Int4 query_bias);
-#define SET_HIGH_BIT(x) (x |= 0x80000000)
-#define CLEAR_HIGH_BIT(x) (x &= 0x7FFFFFFF)
-#define TEST_HIGH_BIT(x) ( ((x) >> 31) & 1 )
-
/* RPS blast structures and functions */
-#define RPS_HITS_PER_CELL 3
+#define RPS_HITS_PER_CELL 3 /**< maximum number of hits in an RPS backbone
+ cell; this may be redundant (have the same
+ value as HITS_ON_BACKBONE) but must be
+ separate to guarantee binary compatibility
+ with existing RPS blast databases */
+/** structure defining one cell of the RPS lookup table */
typedef struct RPSBackboneCell {
- Int4 num_used;
- Int4 entries[RPS_HITS_PER_CELL];
+ Int4 num_used; /**< number of hits in this cell */
+ Int4 entries[RPS_HITS_PER_CELL]; /**< if the number of hits in this cell
+ is RPS_HITS_PER_CELL or less, all
+ hits go into this array. Otherwise,
+ the first hit in the list goes into
+ element 0 of the array, and element 1
+ contains the byte offset into the
+ overflow array where the list of the
+ remaining hits begins */
} RPSBackboneCell;
+/**
+ * The basic lookup table structure for RPS blast searches
+ */
typedef struct RPSLookupTable {
- Int4 wordsize; /* number of full bytes in a full word */
- Int4 longest_chain; /* length of the longest chain on the backbone */
- Int4 mask; /* part of index to mask off, that is, top (wordsize*charsize) bits should be discarded. */
- Int4 alphabet_size; /* number of letters in the alphabet */
- Int4 charsize; /* number of bits for a base/residue */
- Int4 backbone_size; /* number of cells in the backbone */
- RPSBackboneCell * rps_backbone; /* the lookup table used for RPS blast */
- Int4 ** rps_pssm; /* Pointer to memory-mapped RPS Blast profile file */
- Int4 * rps_seq_offsets; /* array of start offsets for each RPS DB seq. */
- RPSAuxInfo* rps_aux_info; /* RPS Blast auxiliary information */
- Int4 * overflow; /* the overflow array for the compacted lookup table */
- Int4 overflow_size; /* Number of elements in the overflow array (above). */
- PV_ARRAY_TYPE *pv; /* presence vector. a bit vector indicating which cells are occupied */
+ Int4 wordsize; /**< number of full bytes in a full word */
+ Int4 longest_chain; /**< length of the longest chain on the backbone */
+ Int4 mask; /**< part of index to mask off, that is,
+ top (wordsize*charsize) bits should be
+ discarded. */
+ Int4 alphabet_size; /**< number of letters in the alphabet */
+ Int4 charsize; /**< number of bits for a base/residue */
+ Int4 backbone_size; /**< number of cells in the backbone */
+ RPSBackboneCell * rps_backbone; /**< the lookup table used for RPS blast */
+ Int4 ** rps_pssm; /**< Pointer to memory-mapped RPS Blast profile file */
+ Int4 * rps_seq_offsets; /**< array of start offsets for each RPS DB seq. */
+ RPSAuxInfo* rps_aux_info; /**< RPS Blast auxiliary information */
+ Int4 * overflow; /**< the overflow array for the compacted
+ lookup table */
+ Int4 overflow_size;/**< Number of elements in the overflow array */
+ PV_ARRAY_TYPE *pv; /**< Presence vector bitfield; bit positions that
+ are set indicate that the corresponding thick
+ backbone cell contains hits */
} RPSLookupTable;
/** Create a new RPS blast lookup table.
* @param rps_info pointer to structure with RPS setup information [in]
* @param lut handle to lookup table [in/modified]
+ * @return 0 if successful, nonzero on failure
*/
Int4 RPSLookupTableNew(const RPSInfo *rps_info, RPSLookupTable* * lut);
-/** Free the lookup table. */
+/** Free the lookup table.
+ * @param lookup The lookup table structure to free; note that
+ * the rps_backbone and rps_seq_offsets fields are not freed
+ * by this call, since they may refer to memory-mapped arrays
+ * @return NULL
+ */
RPSLookupTable* RPSLookupTableDestruct(RPSLookupTable* lookup);
-/*********************************
- *
- * Nucleotide functions
- *
- *********************************/
+/********************* Nucleotide functions *******************/
-/* Macro to test the presence vector array value for a lookup table index */
+/** Macro to test the presence vector array value for a lookup table index */
#define NA_PV_TEST(pv_array, index, pv_array_bts) (pv_array[(index)>>pv_array_bts]&(((PV_ARRAY_TYPE) 1)<<((index)&PV_ARRAY_MASK)))
/** Scan the compressed subject sequence, returning all word hits, using the
@@ -332,6 +385,7 @@ Int4 BlastNaScanSubject(const LookupTableWrap* lookup_wrap,
* @param max_hits The allocated size of the above arrays - how many offsets
* can be returned [in]
* @param end_offset Where the scanning should stop [in], has stopped [out]
+ * @return The number of hits found from the lookup table
*/
Int4 BlastNaScanSubject_AG(const LookupTableWrap* lookup_wrap,
const BLAST_SequenceBlk* subject,
@@ -345,8 +399,10 @@ Int4 BlastNaScanSubject_AG(const LookupTableWrap* lookup_wrap,
* @param lookup Pointer to the lookup table structure [in] [out]
* @param query The query sequence [in]
* @param location What locations on the query sequence to index? [in]
+ * @return Always 0
*/
-Int4 BlastNaLookupIndexQuery(LookupTable* lookup, BLAST_SequenceBlk* query,
+Int4 BlastNaLookupIndexQuery(LookupTable* lookup,
+ BLAST_SequenceBlk* query,
ListNode* location);
#ifdef __cplusplus
diff --git a/algo/blast/core/blast_message.c b/algo/blast/core/blast_message.c
index f562a163..9048f759 100644
--- a/algo/blast/core/blast_message.c
+++ b/algo/blast/core/blast_message.c
@@ -1,77 +1,35 @@
-/*
-**************************************************************************
-* *
-* COPYRIGHT NOTICE *
-* *
-* This software/database is categorized as "United States Government *
-* Work" under the terms of the United States Copyright Act. It was *
-* produced as part of the author's official duties as a Government *
-* employee and thus can not be copyrighted. This software/database is *
-* freely available to the public for use without a copyright notice. *
-* Restrictions can not be placed on its present or future use. *
-* *
-* Although all reasonable efforts have been taken to ensure the accuracy *
-* and reliability of the software and data, the National Library of *
-* Medicine (NLM) and the U.S. Government do not and can not warrant the *
-* performance or results that may be obtained by using this software, *
-* data, or derivative works thereof. The NLM and the U.S. Government *
-* disclaim any and all warranties, expressed or implied, as to the *
-* performance, merchantability or fitness for any particular purpose or *
-* use. *
-* *
-* In any work or product derived from this material, proper attribution *
-* of the author(s) as the source of the software or data would be *
-* appreciated. *
-* *
-**************************************************************************
+/* $Id: blast_message.c,v 1.12 2004/05/19 14:52:02 camacho Exp $
+ * ===========================================================================
*
- * $Log: blast_message.c,v $
- * Revision 1.11 2004/02/19 21:16:00 dondosha
- * Use enum type for severity argument in Blast_MessageWrite
- *
- * Revision 1.10 2003/08/11 15:01:59 dondosha
- * Added algo/blast/core to all #included headers
+ * PUBLIC DOMAIN NOTICE
+ * National Center for Biotechnology Information
*
- * Revision 1.9 2003/07/31 14:31:41 camacho
- * Replaced Char for char
+ * This software/database is a "United States Government Work" under the
+ * terms of the United States Copyright Act. It was written as part of
+ * the author's offical duties as a United States Government employee and
+ * thus cannot be copyrighted. This software/database is freely available
+ * to the public for use. The National Library of Medicine and the U.S.
+ * Government have not placed any restriction on its use or reproduction.
*
- * Revision 1.8 2003/07/31 00:32:37 camacho
- * Eliminated Ptr notation
+ * Although all reasonable efforts have been taken to ensure the accuracy
+ * and reliability of the software and data, the NLM and the U.S.
+ * Government do not and cannot warrant the performance or results that
+ * may be obtained by using this software or data. The NLM and the U.S.
+ * Government disclaim all warranties, express or implied, including
+ * warranties of performance, merchantability or fitness for any particular
+ * purpose.
*
- * Revision 1.7 2003/07/30 16:32:02 madden
- * Use ansi functions when possible
+ * Please cite the author in any work or product based on this material.
*
- * Revision 1.6 2003/07/29 14:42:31 coulouri
- * use strdup() instead of StringSave()
- *
- * Revision 1.5 2003/07/25 21:12:28 coulouri
- * remove constructions of the form "return sfree();" and "a=sfree(a);"
- *
- * Revision 1.4 2003/07/25 19:11:16 camacho
- * Change VoidPtr to const void* in compare functions
- *
- * Revision 1.3 2003/07/25 17:25:43 coulouri
- * in progres:
- * * use malloc/calloc/realloc instead of Malloc/Calloc/Realloc
- * * add sfree() macro and __sfree() helper function to util.[ch]
- * * use sfree() instead of MemFree()
- *
- * Revision 1.2 2003/05/15 22:01:22 coulouri
- * add rcsid string to sources
- *
- * Revision 1.1 2003/03/31 18:22:30 camacho
- * Moved from parent directory
- *
- * Revision 1.2 2003/03/04 14:09:14 madden
- * Fix prototype problem
- *
- * Revision 1.1 2003/02/13 21:38:54 madden
- * Files for messaging warnings etc.
- *
- *
-*/
+ * ===========================================================================
+ */
+
+/** @file blast_message.c
+ * @todo FIXME needs file description & doxygen comments
+ */
-static char const rcsid[] = "$Id: blast_message.c,v 1.11 2004/02/19 21:16:00 dondosha Exp $";
+static char const rcsid[] =
+ "$Id: blast_message.c,v 1.12 2004/05/19 14:52:02 camacho Exp $";
#include <algo/blast/core/blast_message.h>
@@ -127,3 +85,58 @@ Blast_MessagePost(Blast_Message* blast_msg)
return 0;
}
+
+/*
+ * ===========================================================================
+ *
+ * $Log: blast_message.c,v $
+ * Revision 1.12 2004/05/19 14:52:02 camacho
+ * 1. Added doxygen tags to enable doxygen processing of algo/blast/core
+ * 2. Standardized copyright, CVS $Id string, $Log and rcsid formatting and i
+ * location
+ * 3. Added use of @todo doxygen keyword
+ *
+ * Revision 1.11 2004/02/19 21:16:00 dondosha
+ * Use enum type for severity argument in Blast_MessageWrite
+ *
+ * Revision 1.10 2003/08/11 15:01:59 dondosha
+ * Added algo/blast/core to all #included headers
+ *
+ * Revision 1.9 2003/07/31 14:31:41 camacho
+ * Replaced Char for char
+ *
+ * Revision 1.8 2003/07/31 00:32:37 camacho
+ * Eliminated Ptr notation
+ *
+ * Revision 1.7 2003/07/30 16:32:02 madden
+ * Use ansi functions when possible
+ *
+ * Revision 1.6 2003/07/29 14:42:31 coulouri
+ * use strdup() instead of StringSave()
+ *
+ * Revision 1.5 2003/07/25 21:12:28 coulouri
+ * remove constructions of the form "return sfree();" and "a=sfree(a);"
+ *
+ * Revision 1.4 2003/07/25 19:11:16 camacho
+ * Change VoidPtr to const void* in compare functions
+ *
+ * Revision 1.3 2003/07/25 17:25:43 coulouri
+ * in progres:
+ * * use malloc/calloc/realloc instead of Malloc/Calloc/Realloc
+ * * add sfree() macro and __sfree() helper function to util.[ch]
+ * * use sfree() instead of MemFree()
+ *
+ * Revision 1.2 2003/05/15 22:01:22 coulouri
+ * add rcsid string to sources
+ *
+ * Revision 1.1 2003/03/31 18:22:30 camacho
+ * Moved from parent directory
+ *
+ * Revision 1.2 2003/03/04 14:09:14 madden
+ * Fix prototype problem
+ *
+ * Revision 1.1 2003/02/13 21:38:54 madden
+ * Files for messaging warnings etc.
+ *
+ * ===========================================================================
+ */
diff --git a/algo/blast/core/blast_message.h b/algo/blast/core/blast_message.h
index 8275996c..bfa315af 100644
--- a/algo/blast/core/blast_message.h
+++ b/algo/blast/core/blast_message.h
@@ -1,42 +1,35 @@
-/*
-* ===========================================================================
-*
-* PUBLIC DOMAIN NOTICE
-* National Center for Biotechnology Information
-*
-* This software/database is a "United States Government Work" under the
-* terms of the United States Copyright Act. It was written as part of
-* the author's offical duties as a United States Government employee and
-* thus cannot be copyrighted. This software/database is freely available
-* to the public for use. The National Library of Medicine and the U.S.
-* Government have not placed any restriction on its use or reproduction.
-*
-* Although all reasonable efforts have been taken to ensure the accuracy
-* and reliability of the software and data, the NLM and the U.S.
-* Government do not and cannot warrant the performance or results that
-* may be obtained by using this software or data. The NLM and the U.S.
-* Government disclaim all warranties, express or implied, including
-* warranties of performance, merchantability or fitness for any particular
-* purpose.
-*
-* Please cite the author in any work or product based on this material.
-*
-* ===========================================================================*/
-
-/*****************************************************************************
-
-File name: blast_message.h
-
-Author: Tom Madden
-
-Contents: Structures for BLAST messages
-
-Detailed Contents:
-
-
-******************************************************************************
- * $Revision: 1.6 $
- * */
+/* $Id: blast_message.h,v 1.7 2004/05/19 14:52:01 camacho Exp $
+ * ===========================================================================
+ *
+ * PUBLIC DOMAIN NOTICE
+ * National Center for Biotechnology Information
+ *
+ * This software/database is a "United States Government Work" under the
+ * terms of the United States Copyright Act. It was written as part of
+ * the author's official duties as a United States Government employee and
+ * thus cannot be copyrighted. This software/database is freely available
+ * to the public for use. The National Library of Medicine and the U.S.
+ * Government have not placed any restriction on its use or reproduction.
+ *
+ * Although all reasonable efforts have been taken to ensure the accuracy
+ * and reliability of the software and data, the NLM and the U.S.
+ * Government do not and cannot warrant the performance or results that
+ * may be obtained by using this software or data. The NLM and the U.S.
+ * Government disclaim all warranties, express or implied, including
+ * warranties of performance, merchantability or fitness for any particular
+ * purpose.
+ *
+ * Please cite the author in any work or product based on this material.
+ *
+ * ===========================================================================
+ *
+ * Author: Tom Madden
+ *
+ */
+
+/** @file blast_message.h
+ * Structures for BLAST messages
+ */
#ifndef __BLASTMESSAGES__
#define __BLASTMESSAGES__
diff --git a/algo/blast/core/blast_options.c b/algo/blast/core/blast_options.c
index 56c696a2..8e2ce2fc 100644
--- a/algo/blast/core/blast_options.c
+++ b/algo/blast/core/blast_options.c
@@ -1,436 +1,37 @@
-/*
-**************************************************************************
-* *
-* COPYRIGHT NOTICE *
-* *
-* This software/database is categorized as "United States Government *
-* Work" under the terms of the United States Copyright Act. It was *
-* produced as part of the author's official duties as a Government *
-* employee and thus can not be copyrighted. This software/database is *
-* freely available to the public for use without a copyright notice. *
-* Restrictions can not be placed on its present or future use. *
-* *
-* Although all reasonable efforts have been taken to ensure the accuracy *
-* and reliability of the software and data, the National Library of *
-* Medicine (NLM) and the U.S. Government do not and can not warrant the *
-* performance or results that may be obtained by using this software, *
-* data, or derivative works thereof. The NLM and the U.S. Government *
-* disclaim any and all warranties, expressed or implied, as to the *
-* performance, merchantability or fitness for any particular purpose or *
-* use. *
-* *
-* In any work or product derived from this material, proper attribution *
-* of the author(s) as the source of the software or data would be *
-* appreciated. *
-* *
-**************************************************************************
- *
- * $Log: blast_options.c,v $
- * Revision 1.101 2004/04/29 17:41:05 papadopo
- * Scale down the search space when calculating the S2 cutoff score for a translated RPS search
- *
- * Revision 1.100 2004/04/29 15:08:43 madden
- * Add BlastScoringOptionsDup
- *
- * Revision 1.99 2004/04/23 14:02:25 papadopo
- * ignore validation of LookupTableOptions if performing an RPS search
- *
- * Revision 1.98 2004/04/22 22:18:03 dondosha
- * Set lookup table type correctly in BLAST_FillLookupTableOptions - needed for C driver only
- *
- * Revision 1.97 2004/04/21 17:00:59 madden
- * Removed set but not read variable
- *
- * Revision 1.96 2004/04/19 12:58:44 madden
- * Changed BLAST_KarlinBlk to Blast_KarlinBlk to avoid conflict with blastkar.h structure, renamed some functions to start with Blast_Karlin, made Blast_KarlinBlkDestruct public
- *
- * Revision 1.95 2004/04/16 14:17:06 papadopo
- * add use of RPS-specific defines, remove RPS argument to FillLookupTableOptions
- *
- * Revision 1.94 2004/04/07 03:06:16 camacho
- * Added blast_encoding.[hc], refactoring blast_stat.[hc]
- *
- * Revision 1.93 2004/03/26 20:46:00 dondosha
- * Made gap_trigger parameter an integer, as in the old code
- *
- * Revision 1.92 2004/03/22 20:11:37 dondosha
- * Do not allow small gaps cutoff to be less than gap trigger
- *
- * Revision 1.91 2004/03/17 15:19:10 camacho
- * Add missing casts
- *
- * Revision 1.90 2004/03/11 23:58:10 dondosha
- * Set cutoff_score to 0 before calling BLAST_Cutoffs, so it knows what to calculate
- *
- * Revision 1.89 2004/03/11 20:41:49 camacho
- * Remove dead code
- *
- * Revision 1.88 2004/03/10 17:33:10 papadopo
- * Make a separate lookup table type for RPS blast
- *
- * Revision 1.87 2004/03/09 22:37:26 dondosha
- * Added const qualifiers to parameter arguments wherever relevant
- *
- * Revision 1.86 2004/03/09 18:46:24 dondosha
- * Corrected how cutoffs are calculated
- *
- * Revision 1.85 2004/03/04 21:07:48 papadopo
- * add RPS BLAST functionality
- *
- * Revision 1.84 2004/02/27 15:56:33 papadopo
- * Mike Gertz' modifications to unify handling of gapped Karlin blocks for protein and nucleotide searches. Also modified BLAST_MainSetUp to allocate gapped Karlin blocks last
- *
- * Revision 1.83 2004/02/24 17:57:14 dondosha
- * Added function to combine all options validation functions for the C engine
- *
- * Revision 1.82 2004/02/19 21:16:48 dondosha
- * Use enum type for severity argument in Blast_MessageWrite
- *
- * Revision 1.81 2004/02/17 22:10:30 dondosha
- * Set preliminary hitlist size in options initialization
- *
- * Revision 1.80 2004/02/07 15:48:30 ucko
- * PSIBlastOptionsNew: rearrange slightly so that declarations come first.
- *
- * Revision 1.79 2004/02/06 22:49:30 dondosha
- * Check for NULL pointer in PSIBlastOptionsNew
- *
- * Revision 1.78 2004/02/03 18:33:39 dondosha
- * Correction to previous change: word size can be 11 if discontiguous words
- *
- * Revision 1.77 2004/02/03 16:17:33 dondosha
- * Require word size to be >= 12 with megablast lookup table
- *
- * Revision 1.76 2004/02/02 18:49:32 dondosha
- * Fixes for minor compiler warnings
- *
- * Revision 1.75 2003/12/31 20:04:47 dondosha
- * Round best stride to a number divisible by 4 for all values except 6 and 7
- *
- * Revision 1.74 2003/12/31 16:04:37 coulouri
- * use -1 to disable protein neighboring words
- *
- * Revision 1.73 2003/12/08 16:03:05 coulouri
- * Propagate protein neighboring threshold even if it is zero
- *
- * Revision 1.72 2003/11/24 23:18:32 dondosha
- * Added gap_decay_rate argument to BLAST_Cutoffs; removed BLAST_Cutoffs_simple
- *
- * Revision 1.71 2003/11/12 18:17:46 dondosha
- * Correction in calculating scanning stride
- *
- * Revision 1.70 2003/11/04 23:22:47 dondosha
- * Do not calculate hit saving cutoff score for PHI BLAST
- *
- * Revision 1.69 2003/10/30 19:34:01 dondosha
- * Removed gapped_calculation from BlastHitSavingOptions structure
- *
- * Revision 1.68 2003/10/24 20:55:10 camacho
- * Rename GetDefaultStride
- *
- * Revision 1.67 2003/10/22 16:44:33 dondosha
- * Added function to calculate default stride value for AG method
- *
- * Revision 1.66 2003/10/21 22:15:34 camacho
- * Rearranging of C options structures, fix seed extension method
- *
- * Revision 1.65 2003/10/17 18:20:20 dondosha
- * Use separate variables for different initial word extension options
- *
- * Revision 1.64 2003/10/15 16:59:43 coulouri
- * type correctness fixes
- *
- * Revision 1.63 2003/10/07 17:26:11 dondosha
- * Lower case mask moved from options to the sequence block
- *
- * Revision 1.62 2003/10/02 22:08:34 dondosha
- * Corrections for one-strand translated searches
- *
- * Revision 1.61 2003/10/01 22:36:52 dondosha
- * Correction of setting of e2 in revision 1.57 was wrong
- *
- * Revision 1.60 2003/09/24 19:28:20 dondosha
- * Correction in setting extend word method: unset options that are set by default but overridden
- *
- * Revision 1.59 2003/09/12 17:26:01 dondosha
- * Added check that gap extension option cannot be 0 when gap open is not 0
- *
- * Revision 1.58 2003/09/10 19:48:08 dondosha
- * Removed dependency on mb_lookup.h
- *
- * Revision 1.57 2003/09/09 22:12:02 dondosha
- * Minor correction for ungapped cutoff calculation; added freeing of PHI pattern
- *
- * Revision 1.56 2003/09/08 12:55:57 madden
- * Allow use of PSSM to construct lookup table
- *
- * Revision 1.55 2003/08/27 15:05:37 camacho
- * Use symbolic name for alphabet sizes
- *
- * Revision 1.54 2003/08/26 21:53:33 madden
- * Protein alphabet is 26 chars, not 25
- *
- * Revision 1.53 2003/08/11 15:01:59 dondosha
- * Added algo/blast/core to all #included headers
- *
- * Revision 1.52 2003/08/01 17:26:19 dondosha
- * Use renamed versions of functions from local blastkar.h
- *
- * Revision 1.51 2003/07/31 17:45:17 dondosha
- * Made use of const qualifier consistent throughout the library
- *
- * Revision 1.50 2003/07/31 14:31:41 camacho
- * Replaced Char for char
- *
- * Revision 1.49 2003/07/31 14:19:28 camacho
- * Replaced FloatHi for double
- *
- * Revision 1.48 2003/07/31 00:32:37 camacho
- * Eliminated Ptr notation
- *
- * Revision 1.47 2003/07/30 22:06:25 dondosha
- * Convert matrix name to upper case when filling scoring options
- *
- * Revision 1.46 2003/07/30 19:39:14 camacho
- * Remove PNTRs
- *
- * Revision 1.45 2003/07/30 18:58:10 dondosha
- * Removed unused member matrixname from lookup table options
- *
- * Revision 1.44 2003/07/30 17:15:00 dondosha
- * Minor fixes for very strict compiler warnings
- *
- * Revision 1.43 2003/07/30 16:32:02 madden
- * Use ansi functions when possible
- *
- * Revision 1.42 2003/07/29 14:42:31 coulouri
- * use strdup() instead of StringSave()
- *
- * Revision 1.41 2003/07/28 19:04:15 camacho
- * Replaced all MemNews for calloc
- *
- * Revision 1.40 2003/07/25 21:12:28 coulouri
- * remove constructions of the form "return sfree();" and "a=sfree(a);"
- *
- * Revision 1.39 2003/07/25 17:25:43 coulouri
- * in progres:
- * * use malloc/calloc/realloc instead of Malloc/Calloc/Realloc
- * * add sfree() macro and __sfree() helper function to util.[ch]
- * * use sfree() instead of MemFree()
- *
- * Revision 1.38 2003/07/23 17:31:10 camacho
- * BlastDatabaseParameters struct is deprecated
- *
- * Revision 1.37 2003/07/23 16:42:01 dondosha
- * Formatting options moved from blast_options.c to blast_format.c
- *
- * Revision 1.36 2003/07/22 20:26:16 dondosha
- * Initialize BlastDatabaseParameters structure outside engine
- *
- * Revision 1.35 2003/07/22 15:32:55 dondosha
- * Removed dependence on readdb API
- *
- * Revision 1.34 2003/07/21 20:31:47 dondosha
- * Added BlastDatabaseParameters structure with genetic code string
- *
- * Revision 1.33 2003/06/26 21:38:05 dondosha
- * Program number is removed from options structures, and passed explicitly as a parameter to functions that need it
- *
- * Revision 1.32 2003/06/26 20:24:06 camacho
- * Do not free options structure in BlastExtensionParametersFree
- *
- * Revision 1.31 2003/06/23 21:49:11 dondosha
- * Possibility of linking HSPs for tblastn activated
- *
- * Revision 1.30 2003/06/20 21:40:21 dondosha
- * Added parameters for linking HSPs
- *
- * Revision 1.29 2003/06/20 15:20:21 dondosha
- * Memory leak fixes
- *
- * Revision 1.28 2003/06/18 12:21:01 camacho
- * Added proper return value
- *
- * Revision 1.27 2003/06/17 20:42:43 camacho
- * Moved comments to header file, fixed includes
- *
- * Revision 1.26 2003/06/11 16:14:53 dondosha
- * Added initialization of PSI-BLAST and database options
- *
- * Revision 1.25 2003/06/09 20:13:17 dondosha
- * Minor type casting compiler warnings fixes
- *
- * Revision 1.24 2003/06/06 17:02:30 dondosha
- * Typo fix
- *
- * Revision 1.23 2003/06/04 20:16:51 coulouri
- * make prototypes and definitions agree
- *
- * Revision 1.22 2003/06/03 15:50:39 coulouri
- * correct function pointer argument
- *
- * Revision 1.21 2003/05/30 15:52:11 coulouri
- * various lint-induced cleanups
- *
- * Revision 1.20 2003/05/21 22:31:53 dondosha
- * Added forcing of ungapped search for tblastx to option validation
- *
- * Revision 1.19 2003/05/18 21:57:37 camacho
- * Use Uint1 for program name whenever possible
- *
- * Revision 1.18 2003/05/15 22:01:22 coulouri
- * add rcsid string to sources
- *
- * Revision 1.17 2003/05/13 20:41:48 dondosha
- * Correction in assigning of number of db sequences for 2 sequence case
- *
- * Revision 1.16 2003/05/13 15:11:34 dondosha
- * Changed some char * arguments to const char *
- *
- * Revision 1.15 2003/05/07 17:44:31 dondosha
- * Assign ungapped xdropoff default correctly for protein programs
- *
- * Revision 1.14 2003/05/06 20:29:57 dondosha
- * Fix in filling effective length options
- *
- * Revision 1.13 2003/05/06 14:34:51 dondosha
- * Fix in comment
- *
- * Revision 1.12 2003/05/01 16:56:30 dondosha
- * Fixed strict compiler warnings
- *
- * Revision 1.11 2003/05/01 15:33:39 dondosha
- * Reorganized the setup of BLAST search
- *
- * Revision 1.10 2003/04/24 14:27:35 dondosha
- * Correction for latest changes
- *
- * Revision 1.9 2003/04/23 20:04:49 dondosha
- * Added a function BLAST_InitAllDefaultOptions to initialize all various options structures with only default values
- *
- * Revision 1.8 2003/04/17 21:14:41 dondosha
- * Added cutoff score hit parameters that is calculated from e-value
- *
- * Revision 1.7 2003/04/16 22:25:37 dondosha
- * Correction to previous change
- *
- * Revision 1.6 2003/04/16 22:20:24 dondosha
- * Correction in calculation of cutoff score for ungapped extensions
- *
- * Revision 1.5 2003/04/11 22:35:48 dondosha
- * Minor corrections for blastn
- *
- * Revision 1.4 2003/04/03 22:57:50 dondosha
- * Uninitialized variable fix
- *
- * Revision 1.3 2003/04/02 17:20:41 dondosha
- * Added calculation of ungapped cutoff score in correct place
- *
- * Revision 1.2 2003/04/01 17:42:33 dondosha
- * Added arguments to BlastExtensionParametersNew
- *
- * Revision 1.1 2003/03/31 18:22:30 camacho
- * Moved from parent directory
- *
- * Revision 1.30 2003/03/28 23:12:34 dondosha
- * Added program argument to BlastFormattingOptionsNew
- *
- * Revision 1.29 2003/03/27 20:54:19 dondosha
- * Moved ungapped cutoff from hit options to word options
- *
- * Revision 1.28 2003/03/25 16:30:25 dondosha
- * Strict compiler warning fixes
- *
- * Revision 1.27 2003/03/24 20:39:17 dondosha
- * Added BlastExtensionParameters structure to hold raw gapped X-dropoff values
- *
- * Revision 1.26 2003/03/19 19:52:42 dondosha
- * 1. Added strand option argument to BlastQuerySetUpOptionsNew
- * 2. Added check of discontiguous template parameters in LookupTableOptionsValidate
- *
- * Revision 1.25 2003/03/14 19:08:53 dondosha
- * Added arguments to various OptionsNew functions, so all initialization can be done inside
- *
- * Revision 1.24 2003/03/12 17:03:41 dondosha
- * Set believe_query in formatting options to FALSE by default
- *
- * Revision 1.23 2003/03/11 20:40:32 dondosha
- * Correction in assigning gap_x_dropoff_final
- *
- * Revision 1.22 2003/03/10 16:44:42 dondosha
- * Added functions for initialization and freeing of formatting options structure
- *
- * Revision 1.21 2003/03/07 20:41:08 dondosha
- * Small corrections in option initialization functions
- *
- * Revision 1.20 2003/03/06 19:25:52 madden
- * Include blast_util.h
- *
- * Revision 1.19 2003/03/05 21:19:09 coulouri
- * set NA_LOOKUP_TABLE flag
- *
- * Revision 1.18 2003/03/05 20:58:50 dondosha
- * Corrections for handling effective search space for multiple queries
- *
- * Revision 1.17 2003/03/05 15:36:34 madden
- * Moved BlastNumber2Program and BlastProgram2Number from blast_options to blast_util
- *
- * Revision 1.16 2003/03/03 14:43:21 madden
- * Use BlastKarlinkGapBlkFill, PrintMatrixMessage, and PrintAllowedValuesMessage
- *
- * Revision 1.15 2003/02/26 15:42:50 madden
- * const charPtr becomes const char *, add BlastExtensionOptionsValidate
- *
- * Revision 1.14 2003/02/14 16:30:19 dondosha
- * Get rid of a compiler warning for type mismatch
- *
- * Revision 1.13 2003/02/13 21:42:25 madden
- * Added validation functions
- *
- * Revision 1.12 2003/02/04 13:14:36 dondosha
- * Changed the macro definitions for
- *
- * Revision 1.11 2003/01/31 17:00:32 dondosha
- * Do not set the scan step in LookupTableOptionsNew
- *
- * Revision 1.10 2003/01/28 15:13:25 madden
- * Added functions and structures for parameters
- *
- * Revision 1.9 2003/01/22 20:49:31 dondosha
- * Set decline_align for blastn too
- *
- * Revision 1.8 2003/01/22 15:09:55 dondosha
- * Correction for default penalty assignment
- *
- * Revision 1.7 2003/01/17 22:10:45 madden
- * Added functions for BlastExtensionOptions, BlastInitialWordOptions as well as defines for default values
- *
- * Revision 1.6 2003/01/10 18:36:40 madden
- * Change call to BlastEffectiveLengthsOptionsNew
- *
- * Revision 1.5 2003/01/02 17:09:35 dondosha
- * Fill alphabet size when creating lookup table options structure
- *
- * Revision 1.4 2002/12/24 14:49:00 madden
- * Set defaults for LookupTableOptions for protein-protein searches
- *
- * Revision 1.3 2002/12/04 13:38:21 madden
- * Add function LookupTableOptionsNew
- *
- * Revision 1.2 2002/10/17 15:45:17 madden
- * Make BLOSUM62 default
- *
- * Revision 1.1 2002/10/07 21:05:12 madden
- * Sets default option values
- *
- *
- *
-*/
+/* $Id: blast_options.c,v 1.116 2004/06/09 22:44:03 dondosha Exp $
+ * ===========================================================================
+ *
+ * PUBLIC DOMAIN NOTICE
+ * National Center for Biotechnology Information
+ *
+ * This software/database is a "United States Government Work" under the
+ * terms of the United States Copyright Act. It was written as part of
+ * the author's offical duties as a United States Government employee and
+ * thus cannot be copyrighted. This software/database is freely available
+ * to the public for use. The National Library of Medicine and the U.S.
+ * Government have not placed any restriction on its use or reproduction.
+ *
+ * Although all reasonable efforts have been taken to ensure the accuracy
+ * and reliability of the software and data, the NLM and the U.S.
+ * Government do not and cannot warrant the performance or results that
+ * may be obtained by using this software or data. The NLM and the U.S.
+ * Government disclaim all warranties, express or implied, including
+ * warranties of performance, merchantability or fitness for any particular
+ * purpose.
+ *
+ * Please cite the author in any work or product based on this material.
+ *
+ * ===========================================================================
+ */
-static char const rcsid[] = "$Id: blast_options.c,v 1.101 2004/04/29 17:41:05 papadopo Exp $";
+/** @file blast_options.c
+ * @todo FIXME needs file description & doxygen comments
+ */
+
+static char const rcsid[] =
+ "$Id: blast_options.c,v 1.116 2004/06/09 22:44:03 dondosha Exp $";
#include <algo/blast/core/blast_options.h>
-#include <algo/blast/core/blast_gapalign.h>
#include <algo/blast/core/blast_filter.h>
#include <algo/blast/core/blast_encoding.h>
@@ -565,6 +166,10 @@ BlastInitialWordParametersFree(BlastInitialWordParameters* parameters)
}
+/** Compute the default cutoff expect value for ungapped extensions
+ * @param program The blast program type
+ * @return The default per-program expect value
+ */
static double GetUngappedCutoff(Uint1 program)
{
switch(program) {
@@ -611,7 +216,7 @@ BlastInitialWordParametersNew(Uint1 program_number,
ASSERT(sbp->kbp_std[query_info->first_context]);
(*parameters)->x_dropoff = (Int4)
- ceil(word_options->x_dropoff*NCBIMATH_LN2/
+ ceil(sbp->scale_factor * word_options->x_dropoff * NCBIMATH_LN2/
sbp->kbp_std[query_info->first_context]->Lambda);
status = BlastInitialWordParametersUpdate(program_number,
@@ -653,16 +258,24 @@ BlastInitialWordParametersUpdate(Uint1 program_number,
} else {
Int4 s2 = 0;
double e2;
- double qlen;
+ Int4 qlen;
/* Calculate score cutoff corresponding to a fixed e-value (0.05);
If it is smaller, then use this one */
qlen = query_info->context_offsets[query_info->last_context+1] - 1;
e2 = GetUngappedCutoff(program_number);
- BLAST_Cutoffs(&s2, &e2, kbp, MIN(subj_length, qlen)*subj_length, TRUE,
+ BLAST_Cutoffs(&s2, &e2, kbp, MIN(subj_length, (Uint4) qlen)*subj_length, TRUE,
hit_params->gap_decay_rate);
+ s2 *= (Int4)sbp->scale_factor;
parameters->cutoff_score = MIN(hit_params->cutoff_score, s2);
+
+ if (parameters->x_dropoff != 0 && parameters->cutoff_score != 0) {
+ parameters->x_dropoff =
+ MIN(parameters->x_dropoff, parameters->cutoff_score);
+ } else if (parameters->cutoff_score != 0) {
+ parameters->x_dropoff = parameters->cutoff_score;
+ }
}
return 0;
@@ -696,7 +309,8 @@ BlastExtensionOptionsNew(Uint1 program, BlastExtensionOptions* *options)
(*options)->gap_x_dropoff_final =
BLAST_GAP_X_DROPOFF_FINAL_PROT;
(*options)->gap_trigger = BLAST_GAP_TRIGGER_PROT;
- (*options)->algorithm_type = EXTEND_DYN_PROG;
+ (*options)->ePrelimGapExt = eDynProgExt;
+ (*options)->eTbackExt = eDynProgTbck;
}
else
{
@@ -708,26 +322,42 @@ BlastExtensionOptionsNew(Uint1 program, BlastExtensionOptions* *options)
Int2
BLAST_FillExtensionOptions(BlastExtensionOptions* options,
- Uint1 program, Boolean greedy, double x_dropoff, double x_dropoff_final)
+ Uint1 program, Int4 greedy, double x_dropoff, double x_dropoff_final)
{
if (!options)
return 1;
if (program == blast_type_blastn) {
- if (greedy) {
+ switch (greedy) {
+ case 1:
options->gap_x_dropoff = BLAST_GAP_X_DROPOFF_GREEDY;
- options->algorithm_type = EXTEND_GREEDY;
- } else {
+ options->ePrelimGapExt = eGreedyWithTracebackExt;
+ options->eTbackExt = eSkipTbck;
+ break;
+ case 2:
+ options->gap_x_dropoff = BLAST_GAP_X_DROPOFF_GREEDY;
+ options->gap_x_dropoff_final = BLAST_GAP_X_DROPOFF_FINAL_NUCL;
+ options->ePrelimGapExt = eGreedyExt;
+ options->eTbackExt = eGreedyTbck;
+ break;
+ default: /* Non-greedy */
options->gap_x_dropoff = BLAST_GAP_X_DROPOFF_NUCL;
options->gap_x_dropoff_final = BLAST_GAP_X_DROPOFF_FINAL_NUCL;
- options->algorithm_type = EXTEND_DYN_PROG;
+ options->ePrelimGapExt = eDynProgExt;
+ options->eTbackExt = eDynProgTbck;
+ break;
}
}
if (x_dropoff)
options->gap_x_dropoff = x_dropoff;
- if (x_dropoff_final)
+ if (x_dropoff_final) {
options->gap_x_dropoff_final = x_dropoff_final;
+ } else {
+ /* Final X-dropoff can't be smaller than preliminary X-dropoff */
+ options->gap_x_dropoff_final =
+ MAX(options->gap_x_dropoff_final, x_dropoff);
+ }
return 0;
@@ -743,8 +373,9 @@ BlastExtensionOptionsValidate(Uint1 program_number,
if (program_number != blast_type_blastn)
{
- if (options->algorithm_type == EXTEND_GREEDY ||
- options->algorithm_type == EXTEND_GREEDY_NO_TRACEBACK)
+ if (options->ePrelimGapExt == eGreedyWithTracebackExt ||
+ options->ePrelimGapExt == eGreedyExt ||
+ options->eTbackExt == eGreedyTbck)
{
Int4 code=2;
Int4 subcode=1;
@@ -795,6 +426,11 @@ Int2 BlastExtensionParametersNew(Uint1 program_number,
params->gap_trigger = (Int4)
((options->gap_trigger*NCBIMATH_LN2 + kbp->logK) / kbp->Lambda);
+ if (sbp->scale_factor > 1.0) {
+ params->gap_trigger *= (Int4)sbp->scale_factor;
+ params->gap_x_dropoff *= (Int4)sbp->scale_factor;
+ params->gap_x_dropoff_final *= (Int4)sbp->scale_factor;
+ }
return 0;
}
@@ -993,6 +629,42 @@ BlastScoringOptionsDup(BlastScoringOptions* *new_opt, const BlastScoringOptions*
return 0;
}
+BlastScoringParameters*
+BlastScoringParametersFree(BlastScoringParameters* parameters)
+{
+ sfree(parameters);
+ return NULL;
+}
+
+
+Int2
+BlastScoringParametersNew(const BlastScoringOptions* score_options,
+ BlastScoreBlk* sbp,
+ BlastScoringParameters* *parameters)
+{
+ BlastScoringParameters *params;
+ double scale_factor;
+
+ if (score_options == NULL)
+ return 1;
+
+ *parameters = params = (BlastScoringParameters*)
+ calloc(1, sizeof(BlastScoringParameters));
+ if (params == NULL)
+ return 2;
+
+ params->options = (BlastScoringOptions *)score_options;
+ scale_factor = sbp->scale_factor;
+ params->scale_factor = scale_factor;
+ params->reward = score_options->reward;
+ params->penalty = score_options->penalty;
+ params->gap_open = score_options->gap_open * (Int4)scale_factor;
+ params->gap_extend = score_options->gap_extend * (Int4)scale_factor;
+ params->decline_align = score_options->decline_align * (Int4)scale_factor;
+ params->shift_pen = score_options->shift_pen * (Int4)scale_factor;
+ return 0;
+}
+
BlastEffectiveLengthsOptions*
BlastEffectiveLengthsOptionsFree(BlastEffectiveLengthsOptions* options)
@@ -1399,15 +1071,19 @@ BlastHitSavingParametersNew(Uint1 program_number,
/* If sum statistics use is forced by the options,
set it in the paramters */
- params->do_sum_stats = options->do_sum_stats;
- /* Sum statistics is used anyway for all ungapped searches and all
- translated gapped searches (except RPS translated searches) */
- if (!gapped_calculation ||
- (program_number != blast_type_blastn &&
- program_number != blast_type_blastp &&
- program_number != blast_type_rpsblast &&
- program_number != blast_type_rpstblastn))
+ if (options->do_sum_stats == eSumStatsTrue) {
params->do_sum_stats = TRUE;
+ } else if (options->do_sum_stats == eSumStatsNotSet) {
+ /* By default, sum statistics is used for all translated searches
+ * (except RPS BLAST), and for ungapped blastn.
+ */
+ if (!gapped_calculation ||
+ (program_number == blast_type_blastx) ||
+ (program_number == blast_type_tblastn) ||
+ (program_number == blast_type_tblastx))
+ params->do_sum_stats = TRUE;
+ }
+
if (program_number == blast_type_blastn || !gapped_calculation) {
params->gap_prob = BLAST_GAP_PROB;
params->gap_decay_rate = BLAST_GAP_DECAY_RATE;
@@ -1432,6 +1108,8 @@ BlastHitSavingParametersUpdate(Uint1 program_number,
BlastHitSavingOptions* options;
Blast_KarlinBlk* kbp;
double evalue;
+ double scale_factor = sbp->scale_factor;
+ Boolean gapped_calculation;
ASSERT(params);
ASSERT(query_info);
@@ -1443,7 +1121,8 @@ BlastHitSavingParametersUpdate(Uint1 program_number,
/* Scoring options are not available here, but we can determine whether
this is a gapped or ungapped search by checking whether gapped
Karlin blocks have been set. */
- if (sbp->kbp_gap) {
+ gapped_calculation = (sbp->kbp_gap != NULL);
+ if (gapped_calculation) {
kbp = sbp->kbp_gap[query_info->first_context];
} else {
kbp = sbp->kbp[query_info->first_context];
@@ -1451,10 +1130,10 @@ BlastHitSavingParametersUpdate(Uint1 program_number,
/* Calculate cutoffs based on the current effective lengths information */
if (options->cutoff_score > 0) {
- params->cutoff_score = options->cutoff_score;
+ params->cutoff_score = options->cutoff_score * (Int4) sbp->scale_factor;
} else if (!options->phi_align) {
Int4 context = query_info->first_context;
- double searchsp = (double)query_info->eff_searchsp_array[context];
+ Int8 searchsp = query_info->eff_searchsp_array[context];
/* translated RPS searches must scale the search space down */
if (program_number == blast_type_rpstblastn)
@@ -1462,20 +1141,28 @@ BlastHitSavingParametersUpdate(Uint1 program_number,
params->cutoff_score = 0;
BLAST_Cutoffs(&params->cutoff_score, &evalue, kbp, searchsp, FALSE, 0);
+ params->cutoff_score *= (Int4) scale_factor;
+
/* When sum statistics is used, all HSPs above the gap trigger
cutoff are saved until the sum statistics is applied to potentially
link them with other HSPs and improve their e-values.
However this does not apply to the ungapped search! */
- if (params->do_sum_stats) {
+ if (params->do_sum_stats && gapped_calculation) {
params->cutoff_score =
MIN(params->cutoff_score, ext_params->gap_trigger);
}
} else {
params->cutoff_score = 0;
}
-
- params->cutoff_small_gap =
- MIN(params->cutoff_score, ext_params->gap_trigger);
+
+ if (params->do_sum_stats) {
+ if (gapped_calculation) {
+ params->cutoff_small_gap =
+ MIN(params->cutoff_score, ext_params->gap_trigger);
+ } else {
+ params->cutoff_small_gap = params->cutoff_score;
+ }
+ }
return 0;
}
@@ -1487,10 +1174,9 @@ Int2 PSIBlastOptionsNew(PSIBlastOptions** psi_options)
return 0;
options = (PSIBlastOptions*)calloc(1, sizeof(PSIBlastOptions));
*psi_options = options;
- options->ethresh = PSI_ETHRESH;
- options->maxNumPasses = PSI_MAX_NUM_PASSES;
- options->pseudoCountConst = PSI_PSEUDO_COUNT_CONST;
- options->scalingFactor = PSI_SCALING_FACTOR;
+ options->inclusion_ethresh = PSI_INCLUSION_ETHRESH;
+ options->pseudo_count = PSI_PSEUDO_COUNT_CONST;
+ options->use_best_alignment = TRUE;
return 0;
}
@@ -1588,18 +1274,20 @@ Int2 BLAST_ValidateOptions(Uint1 program_number,
return status;
}
+/** machine epsilon assumed by CalculateLinkHSPCutoffs */
#define MY_EPS 1.0e-9
+
void
CalculateLinkHSPCutoffs(Uint1 program, BlastQueryInfo* query_info,
BlastScoreBlk* sbp, BlastHitSavingParameters* hit_params,
- Int8 db_length, Int4 subject_length,
- const PSIBlastOptions* psi_options)
+ BlastExtensionParameters* ext_params,
+ Int8 db_length, Int4 subject_length)
{
double gap_prob, gap_decay_rate, x_variable, y_variable;
Blast_KarlinBlk* kbp;
Int4 expected_length, gap_size, query_length;
Int8 search_sp;
- Boolean translated_subject = (program == blast_type_tblastn ||
+ Boolean translated_subject = (program == blast_type_tblastn ||
program == blast_type_rpstblastn ||
program == blast_type_tblastx);
@@ -1651,7 +1339,7 @@ CalculateLinkHSPCutoffs(Uint1 program, BlastQueryInfo* query_info,
x_variable = y_variable*(gap_size*gap_size);
x_variable /= (gap_prob + MY_EPS);
hit_params->cutoff_small_gap =
- MAX(hit_params->cutoff_small_gap,
+ MAX(ext_params->gap_trigger,
(Int4) floor((log(x_variable)/kbp->Lambda)) + 1);
hit_params->ignore_small_gaps = FALSE;
@@ -1662,8 +1350,461 @@ CalculateLinkHSPCutoffs(Uint1 program, BlastQueryInfo* query_info,
hit_params->ignore_small_gaps = TRUE;
}
- if (psi_options) {
- hit_params->cutoff_big_gap *= (Int4) psi_options->scalingFactor;
- hit_params->cutoff_small_gap *= (Int4) psi_options->scalingFactor;
- }
+ hit_params->cutoff_big_gap *= (Int4)sbp->scale_factor;
+ hit_params->cutoff_small_gap *= (Int4)sbp->scale_factor;
}
+
+
+/*
+ * ===========================================================================
+ *
+ * $Log: blast_options.c,v $
+ * Revision 1.116 2004/06/09 22:44:03 dondosha
+ * Set sum statistics parameter to TRUE by default for ungapped blastp
+ *
+ * Revision 1.115 2004/06/09 22:27:44 dondosha
+ * Do not reduce score cutoffs to gap_trigger value for ungapped blastn
+ *
+ * Revision 1.114 2004/06/09 14:11:34 camacho
+ * Set default for use_best_alignment
+ *
+ * Revision 1.113 2004/06/08 15:12:51 dondosha
+ * Removed skip_traceback option; added eSkipTbck type to traceback extension types enum
+ *
+ * Revision 1.112 2004/06/07 15:44:47 dondosha
+ * do_sum_stats option is now an enum; set do_sum_stats parameter only if option is not set;
+ *
+ * Revision 1.111 2004/05/26 16:04:54 papadopo
+ * fix doxygen errors
+ *
+ * Revision 1.110 2004/05/24 17:26:21 camacho
+ * Fix PC warning
+ *
+ * Revision 1.109 2004/05/20 16:29:30 madden
+ * Make searchsp an Int8 consistent with rest of blast
+ *
+ * Revision 1.108 2004/05/19 14:52:02 camacho
+ * 1. Added doxygen tags to enable doxygen processing of algo/blast/core
+ * 2. Standardized copyright, CVS $Id string, $Log and rcsid formatting and i
+ * location
+ * 3. Added use of @todo doxygen keyword
+ *
+ * Revision 1.107 2004/05/17 15:30:20 madden
+ * Int algorithm_type replaced with enum EBlastPrelimGapExt, removed include for blast_gapalign.h
+ *
+ * Revision 1.106 2004/05/14 17:11:03 dondosha
+ * Minor correction in setting X-dropoffs
+ *
+ * Revision 1.105 2004/05/14 13:14:15 camacho
+ * Use correct definition for inclusion threshold
+ *
+ * Revision 1.104 2004/05/12 12:18:06 madden
+ * Clean out PSIBlast options, add fields to ExtensionOptions to support smith-waterman and composition-based stats
+ *
+ * Revision 1.103 2004/05/10 14:27:23 madden
+ * Correction to CalculateLinkHSPCutoffs to use gap_trigger in calculation of small cutoff
+ *
+ * Revision 1.102 2004/05/07 15:22:15 papadopo
+ * 1. add functions to allocate and free BlastScoringParameters structures
+ * 2. apply a scaling factor to all cutoffs generated in HitSavingParameters
+ * or ExtentionParameters structures
+ *
+ * Revision 1.101 2004/04/29 17:41:05 papadopo
+ * Scale down the search space when calculating the S2 cutoff score for a translated RPS search
+ *
+ * Revision 1.100 2004/04/29 15:08:43 madden
+ * Add BlastScoringOptionsDup
+ *
+ * Revision 1.99 2004/04/23 14:02:25 papadopo
+ * ignore validation of LookupTableOptions if performing an RPS search
+ *
+ * Revision 1.98 2004/04/22 22:18:03 dondosha
+ * Set lookup table type correctly in BLAST_FillLookupTableOptions - needed for C driver only
+ *
+ * Revision 1.97 2004/04/21 17:00:59 madden
+ * Removed set but not read variable
+ *
+ * Revision 1.96 2004/04/19 12:58:44 madden
+ * Changed BLAST_KarlinBlk to Blast_KarlinBlk to avoid conflict with blastkar.h structure, renamed some functions to start with Blast_Karlin, made Blast_KarlinBlkDestruct public
+ *
+ * Revision 1.95 2004/04/16 14:17:06 papadopo
+ * add use of RPS-specific defines, remove RPS argument to FillLookupTableOptions
+ *
+ * Revision 1.94 2004/04/07 03:06:16 camacho
+ * Added blast_encoding.[hc], refactoring blast_stat.[hc]
+ *
+ * Revision 1.93 2004/03/26 20:46:00 dondosha
+ * Made gap_trigger parameter an integer, as in the old code
+ *
+ * Revision 1.92 2004/03/22 20:11:37 dondosha
+ * Do not allow small gaps cutoff to be less than gap trigger
+ *
+ * Revision 1.91 2004/03/17 15:19:10 camacho
+ * Add missing casts
+ *
+ * Revision 1.90 2004/03/11 23:58:10 dondosha
+ * Set cutoff_score to 0 before calling BLAST_Cutoffs, so it knows what to calculate
+ *
+ * Revision 1.89 2004/03/11 20:41:49 camacho
+ * Remove dead code
+ *
+ * Revision 1.88 2004/03/10 17:33:10 papadopo
+ * Make a separate lookup table type for RPS blast
+ *
+ * Revision 1.87 2004/03/09 22:37:26 dondosha
+ * Added const qualifiers to parameter arguments wherever relevant
+ *
+ * Revision 1.86 2004/03/09 18:46:24 dondosha
+ * Corrected how cutoffs are calculated
+ *
+ * Revision 1.85 2004/03/04 21:07:48 papadopo
+ * add RPS BLAST functionality
+ *
+ * Revision 1.84 2004/02/27 15:56:33 papadopo
+ * Mike Gertz' modifications to unify handling of gapped Karlin blocks for protein and nucleotide searches. Also modified BLAST_MainSetUp to allocate gapped Karlin blocks last
+ *
+ * Revision 1.83 2004/02/24 17:57:14 dondosha
+ * Added function to combine all options validation functions for the C engine
+ *
+ * Revision 1.82 2004/02/19 21:16:48 dondosha
+ * Use enum type for severity argument in Blast_MessageWrite
+ *
+ * Revision 1.81 2004/02/17 22:10:30 dondosha
+ * Set preliminary hitlist size in options initialization
+ *
+ * Revision 1.80 2004/02/07 15:48:30 ucko
+ * PSIBlastOptionsNew: rearrange slightly so that declarations come first.
+ *
+ * Revision 1.79 2004/02/06 22:49:30 dondosha
+ * Check for NULL pointer in PSIBlastOptionsNew
+ *
+ * Revision 1.78 2004/02/03 18:33:39 dondosha
+ * Correction to previous change: word size can be 11 if discontiguous words
+ *
+ * Revision 1.77 2004/02/03 16:17:33 dondosha
+ * Require word size to be >= 12 with megablast lookup table
+ *
+ * Revision 1.76 2004/02/02 18:49:32 dondosha
+ * Fixes for minor compiler warnings
+ *
+ * Revision 1.75 2003/12/31 20:04:47 dondosha
+ * Round best stride to a number divisible by 4 for all values except 6 and 7
+ *
+ * Revision 1.74 2003/12/31 16:04:37 coulouri
+ * use -1 to disable protein neighboring words
+ *
+ * Revision 1.73 2003/12/08 16:03:05 coulouri
+ * Propagate protein neighboring threshold even if it is zero
+ *
+ * Revision 1.72 2003/11/24 23:18:32 dondosha
+ * Added gap_decay_rate argument to BLAST_Cutoffs; removed BLAST_Cutoffs_simple
+ *
+ * Revision 1.71 2003/11/12 18:17:46 dondosha
+ * Correction in calculating scanning stride
+ *
+ * Revision 1.70 2003/11/04 23:22:47 dondosha
+ * Do not calculate hit saving cutoff score for PHI BLAST
+ *
+ * Revision 1.69 2003/10/30 19:34:01 dondosha
+ * Removed gapped_calculation from BlastHitSavingOptions structure
+ *
+ * Revision 1.68 2003/10/24 20:55:10 camacho
+ * Rename GetDefaultStride
+ *
+ * Revision 1.67 2003/10/22 16:44:33 dondosha
+ * Added function to calculate default stride value for AG method
+ *
+ * Revision 1.66 2003/10/21 22:15:34 camacho
+ * Rearranging of C options structures, fix seed extension method
+ *
+ * Revision 1.65 2003/10/17 18:20:20 dondosha
+ * Use separate variables for different initial word extension options
+ *
+ * Revision 1.64 2003/10/15 16:59:43 coulouri
+ * type correctness fixes
+ *
+ * Revision 1.63 2003/10/07 17:26:11 dondosha
+ * Lower case mask moved from options to the sequence block
+ *
+ * Revision 1.62 2003/10/02 22:08:34 dondosha
+ * Corrections for one-strand translated searches
+ *
+ * Revision 1.61 2003/10/01 22:36:52 dondosha
+ * Correction of setting of e2 in revision 1.57 was wrong
+ *
+ * Revision 1.60 2003/09/24 19:28:20 dondosha
+ * Correction in setting extend word method: unset options that are set by default but overridden
+ *
+ * Revision 1.59 2003/09/12 17:26:01 dondosha
+ * Added check that gap extension option cannot be 0 when gap open is not 0
+ *
+ * Revision 1.58 2003/09/10 19:48:08 dondosha
+ * Removed dependency on mb_lookup.h
+ *
+ * Revision 1.57 2003/09/09 22:12:02 dondosha
+ * Minor correction for ungapped cutoff calculation; added freeing of PHI pattern
+ *
+ * Revision 1.56 2003/09/08 12:55:57 madden
+ * Allow use of PSSM to construct lookup table
+ *
+ * Revision 1.55 2003/08/27 15:05:37 camacho
+ * Use symbolic name for alphabet sizes
+ *
+ * Revision 1.54 2003/08/26 21:53:33 madden
+ * Protein alphabet is 26 chars, not 25
+ *
+ * Revision 1.53 2003/08/11 15:01:59 dondosha
+ * Added algo/blast/core to all #included headers
+ *
+ * Revision 1.52 2003/08/01 17:26:19 dondosha
+ * Use renamed versions of functions from local blastkar.h
+ *
+ * Revision 1.51 2003/07/31 17:45:17 dondosha
+ * Made use of const qualifier consistent throughout the library
+ *
+ * Revision 1.50 2003/07/31 14:31:41 camacho
+ * Replaced Char for char
+ *
+ * Revision 1.49 2003/07/31 14:19:28 camacho
+ * Replaced FloatHi for double
+ *
+ * Revision 1.48 2003/07/31 00:32:37 camacho
+ * Eliminated Ptr notation
+ *
+ * Revision 1.47 2003/07/30 22:06:25 dondosha
+ * Convert matrix name to upper case when filling scoring options
+ *
+ * Revision 1.46 2003/07/30 19:39:14 camacho
+ * Remove PNTRs
+ *
+ * Revision 1.45 2003/07/30 18:58:10 dondosha
+ * Removed unused member matrixname from lookup table options
+ *
+ * Revision 1.44 2003/07/30 17:15:00 dondosha
+ * Minor fixes for very strict compiler warnings
+ *
+ * Revision 1.43 2003/07/30 16:32:02 madden
+ * Use ansi functions when possible
+ *
+ * Revision 1.42 2003/07/29 14:42:31 coulouri
+ * use strdup() instead of StringSave()
+ *
+ * Revision 1.41 2003/07/28 19:04:15 camacho
+ * Replaced all MemNews for calloc
+ *
+ * Revision 1.40 2003/07/25 21:12:28 coulouri
+ * remove constructions of the form "return sfree();" and "a=sfree(a);"
+ *
+ * Revision 1.39 2003/07/25 17:25:43 coulouri
+ * in progres:
+ * * use malloc/calloc/realloc instead of Malloc/Calloc/Realloc
+ * * add sfree() macro and __sfree() helper function to util.[ch]
+ * * use sfree() instead of MemFree()
+ *
+ * Revision 1.38 2003/07/23 17:31:10 camacho
+ * BlastDatabaseParameters struct is deprecated
+ *
+ * Revision 1.37 2003/07/23 16:42:01 dondosha
+ * Formatting options moved from blast_options.c to blast_format.c
+ *
+ * Revision 1.36 2003/07/22 20:26:16 dondosha
+ * Initialize BlastDatabaseParameters structure outside engine
+ *
+ * Revision 1.35 2003/07/22 15:32:55 dondosha
+ * Removed dependence on readdb API
+ *
+ * Revision 1.34 2003/07/21 20:31:47 dondosha
+ * Added BlastDatabaseParameters structure with genetic code string
+ *
+ * Revision 1.33 2003/06/26 21:38:05 dondosha
+ * Program number is removed from options structures, and passed explicitly as a parameter to functions that need it
+ *
+ * Revision 1.32 2003/06/26 20:24:06 camacho
+ * Do not free options structure in BlastExtensionParametersFree
+ *
+ * Revision 1.31 2003/06/23 21:49:11 dondosha
+ * Possibility of linking HSPs for tblastn activated
+ *
+ * Revision 1.30 2003/06/20 21:40:21 dondosha
+ * Added parameters for linking HSPs
+ *
+ * Revision 1.29 2003/06/20 15:20:21 dondosha
+ * Memory leak fixes
+ *
+ * Revision 1.28 2003/06/18 12:21:01 camacho
+ * Added proper return value
+ *
+ * Revision 1.27 2003/06/17 20:42:43 camacho
+ * Moved comments to header file, fixed includes
+ *
+ * Revision 1.26 2003/06/11 16:14:53 dondosha
+ * Added initialization of PSI-BLAST and database options
+ *
+ * Revision 1.25 2003/06/09 20:13:17 dondosha
+ * Minor type casting compiler warnings fixes
+ *
+ * Revision 1.24 2003/06/06 17:02:30 dondosha
+ * Typo fix
+ *
+ * Revision 1.23 2003/06/04 20:16:51 coulouri
+ * make prototypes and definitions agree
+ *
+ * Revision 1.22 2003/06/03 15:50:39 coulouri
+ * correct function pointer argument
+ *
+ * Revision 1.21 2003/05/30 15:52:11 coulouri
+ * various lint-induced cleanups
+ *
+ * Revision 1.20 2003/05/21 22:31:53 dondosha
+ * Added forcing of ungapped search for tblastx to option validation
+ *
+ * Revision 1.19 2003/05/18 21:57:37 camacho
+ * Use Uint1 for program name whenever possible
+ *
+ * Revision 1.18 2003/05/15 22:01:22 coulouri
+ * add rcsid string to sources
+ *
+ * Revision 1.17 2003/05/13 20:41:48 dondosha
+ * Correction in assigning of number of db sequences for 2 sequence case
+ *
+ * Revision 1.16 2003/05/13 15:11:34 dondosha
+ * Changed some char * arguments to const char *
+ *
+ * Revision 1.15 2003/05/07 17:44:31 dondosha
+ * Assign ungapped xdropoff default correctly for protein programs
+ *
+ * Revision 1.14 2003/05/06 20:29:57 dondosha
+ * Fix in filling effective length options
+ *
+ * Revision 1.13 2003/05/06 14:34:51 dondosha
+ * Fix in comment
+ *
+ * Revision 1.12 2003/05/01 16:56:30 dondosha
+ * Fixed strict compiler warnings
+ *
+ * Revision 1.11 2003/05/01 15:33:39 dondosha
+ * Reorganized the setup of BLAST search
+ *
+ * Revision 1.10 2003/04/24 14:27:35 dondosha
+ * Correction for latest changes
+ *
+ * Revision 1.9 2003/04/23 20:04:49 dondosha
+ * Added a function BLAST_InitAllDefaultOptions to initialize all various options structures with only default values
+ *
+ * Revision 1.8 2003/04/17 21:14:41 dondosha
+ * Added cutoff score hit parameters that is calculated from e-value
+ *
+ * Revision 1.7 2003/04/16 22:25:37 dondosha
+ * Correction to previous change
+ *
+ * Revision 1.6 2003/04/16 22:20:24 dondosha
+ * Correction in calculation of cutoff score for ungapped extensions
+ *
+ * Revision 1.5 2003/04/11 22:35:48 dondosha
+ * Minor corrections for blastn
+ *
+ * Revision 1.4 2003/04/03 22:57:50 dondosha
+ * Uninitialized variable fix
+ *
+ * Revision 1.3 2003/04/02 17:20:41 dondosha
+ * Added calculation of ungapped cutoff score in correct place
+ *
+ * Revision 1.2 2003/04/01 17:42:33 dondosha
+ * Added arguments to BlastExtensionParametersNew
+ *
+ * Revision 1.1 2003/03/31 18:22:30 camacho
+ * Moved from parent directory
+ *
+ * Revision 1.30 2003/03/28 23:12:34 dondosha
+ * Added program argument to BlastFormattingOptionsNew
+ *
+ * Revision 1.29 2003/03/27 20:54:19 dondosha
+ * Moved ungapped cutoff from hit options to word options
+ *
+ * Revision 1.28 2003/03/25 16:30:25 dondosha
+ * Strict compiler warning fixes
+ *
+ * Revision 1.27 2003/03/24 20:39:17 dondosha
+ * Added BlastExtensionParameters structure to hold raw gapped X-dropoff values
+ *
+ * Revision 1.26 2003/03/19 19:52:42 dondosha
+ * 1. Added strand option argument to BlastQuerySetUpOptionsNew
+ * 2. Added check of discontiguous template parameters in LookupTableOptionsValidate
+ *
+ * Revision 1.25 2003/03/14 19:08:53 dondosha
+ * Added arguments to various OptionsNew functions, so all initialization can be done inside
+ *
+ * Revision 1.24 2003/03/12 17:03:41 dondosha
+ * Set believe_query in formatting options to FALSE by default
+ *
+ * Revision 1.23 2003/03/11 20:40:32 dondosha
+ * Correction in assigning gap_x_dropoff_final
+ *
+ * Revision 1.22 2003/03/10 16:44:42 dondosha
+ * Added functions for initialization and freeing of formatting options structure
+ *
+ * Revision 1.21 2003/03/07 20:41:08 dondosha
+ * Small corrections in option initialization functions
+ *
+ * Revision 1.20 2003/03/06 19:25:52 madden
+ * Include blast_util.h
+ *
+ * Revision 1.19 2003/03/05 21:19:09 coulouri
+ * set NA_LOOKUP_TABLE flag
+ *
+ * Revision 1.18 2003/03/05 20:58:50 dondosha
+ * Corrections for handling effective search space for multiple queries
+ *
+ * Revision 1.17 2003/03/05 15:36:34 madden
+ * Moved BlastNumber2Program and BlastProgram2Number from blast_options to blast_util
+ *
+ * Revision 1.16 2003/03/03 14:43:21 madden
+ * Use BlastKarlinkGapBlkFill, PrintMatrixMessage, and PrintAllowedValuesMessage
+ *
+ * Revision 1.15 2003/02/26 15:42:50 madden
+ * const charPtr becomes const char *, add BlastExtensionOptionsValidate
+ *
+ * Revision 1.14 2003/02/14 16:30:19 dondosha
+ * Get rid of a compiler warning for type mismatch
+ *
+ * Revision 1.13 2003/02/13 21:42:25 madden
+ * Added validation functions
+ *
+ * Revision 1.12 2003/02/04 13:14:36 dondosha
+ * Changed the macro definitions for
+ *
+ * Revision 1.11 2003/01/31 17:00:32 dondosha
+ * Do not set the scan step in LookupTableOptionsNew
+ *
+ * Revision 1.10 2003/01/28 15:13:25 madden
+ * Added functions and structures for parameters
+ *
+ * Revision 1.9 2003/01/22 20:49:31 dondosha
+ * Set decline_align for blastn too
+ *
+ * Revision 1.8 2003/01/22 15:09:55 dondosha
+ * Correction for default penalty assignment
+ *
+ * Revision 1.7 2003/01/17 22:10:45 madden
+ * Added functions for BlastExtensionOptions, BlastInitialWordOptions as well as defines for default values
+ *
+ * Revision 1.6 2003/01/10 18:36:40 madden
+ * Change call to BlastEffectiveLengthsOptionsNew
+ *
+ * Revision 1.5 2003/01/02 17:09:35 dondosha
+ * Fill alphabet size when creating lookup table options structure
+ *
+ * Revision 1.4 2002/12/24 14:49:00 madden
+ * Set defaults for LookupTableOptions for protein-protein searches
+ *
+ * Revision 1.3 2002/12/04 13:38:21 madden
+ * Add function LookupTableOptionsNew
+ *
+ * Revision 1.2 2002/10/17 15:45:17 madden
+ * Make BLOSUM62 default
+ *
+ * Revision 1.1 2002/10/07 21:05:12 madden
+ * Sets default option values
+ *
+ * ===========================================================================
+ */
diff --git a/algo/blast/core/blast_options.h b/algo/blast/core/blast_options.h
index 1f8adc1a..2ffb4923 100644
--- a/algo/blast/core/blast_options.h
+++ b/algo/blast/core/blast_options.h
@@ -1,43 +1,35 @@
-/* $Id: blast_options.h,v 1.67 2004/04/29 15:09:49 madden Exp $
-* ===========================================================================
-*
-* PUBLIC DOMAIN NOTICE
-* National Center for Biotechnology Information
-*
-* This software/database is a "United States Government Work" under the
-* terms of the United States Copyright Act. It was written as part of
-* the author's offical duties as a United States Government employee and
-* thus cannot be copyrighted. This software/database is freely available
-* to the public for use. The National Library of Medicine and the U.S.
-* Government have not placed any restriction on its use or reproduction.
-*
-* Although all reasonable efforts have been taken to ensure the accuracy
-* and reliability of the software and data, the NLM and the U.S.
-* Government do not and cannot warrant the performance or results that
-* may be obtained by using this software or data. The NLM and the U.S.
-* Government disclaim all warranties, express or implied, including
-* warranties of performance, merchantability or fitness for any particular
-* purpose.
-*
-* Please cite the author in any work or product based on this material.
-*
-* ===========================================================================*/
-
-/*****************************************************************************
-
-File name: blast_options.h
-
-Author: Tom Madden
-
-Contents: BLAST options
-
-Detailed Contents:
-
- - Options to be used for different tasks of the BLAST search
-
-******************************************************************************
- * $Revision: 1.67 $
- * */
+/* $Id: blast_options.h,v 1.80 2004/06/09 14:10:54 camacho Exp $
+ * ===========================================================================
+ *
+ * PUBLIC DOMAIN NOTICE
+ * National Center for Biotechnology Information
+ *
+ * This software/database is a "United States Government Work" under the
+ * terms of the United States Copyright Act. It was written as part of
+ * the author's offical duties as a United States Government employee and
+ * thus cannot be copyrighted. This software/database is freely available
+ * to the public for use. The National Library of Medicine and the U.S.
+ * Government have not placed any restriction on its use or reproduction.
+ *
+ * Although all reasonable efforts have been taken to ensure the accuracy
+ * and reliability of the software and data, the NLM and the U.S.
+ * Government do not and cannot warrant the performance or results that
+ * may be obtained by using this software or data. The NLM and the U.S.
+ * Government disclaim all warranties, express or implied, including
+ * warranties of performance, merchantability or fitness for any particular
+ * purpose.
+ *
+ * Please cite the author in any work or product based on this material.
+ *
+ * ===========================================================================
+ *
+ * Author: Tom Madden
+ *
+ */
+
+/** @file blast_options.h
+ * Options to be used for different stages of the BLAST search.
+ */
#ifndef __BLASTOPTIONS__
#define __BLASTOPTIONS__
@@ -57,82 +49,110 @@ extern "C" {
*/
/** "window" between hits to trigger an extension. */
-#define BLAST_WINDOW_SIZE_PROT 40
-#define BLAST_WINDOW_SIZE_NUCL 0
-#define BLAST_WINDOW_SIZE_MEGABLAST 0
-#define BLAST_WINDOW_SIZE_DISC 40
+#define BLAST_WINDOW_SIZE_PROT 40 /**< default window (all protein searches) */
+#define BLAST_WINDOW_SIZE_NUCL 0 /**< default window size (blastn) */
+#define BLAST_WINDOW_SIZE_MEGABLAST 0 /**< default window size
+ (contiguous megablast) */
+#define BLAST_WINDOW_SIZE_DISC 40 /**< default window size
+ (discontiguous megablast) */
/** length of word to trigger an extension. */
-#define BLAST_WORDSIZE_PROT 3
-#define BLAST_WORDSIZE_NUCL 11
-#define BLAST_WORDSIZE_MEGABLAST 28
-#define BLAST_VARWORD_NUCL 0
-#define BLAST_VARWORD_MEGABLAST 1
+#define BLAST_WORDSIZE_PROT 3 /**< default word size (all protein searches) */
+#define BLAST_WORDSIZE_NUCL 11 /**< default word size (blastn) */
+#define BLAST_WORDSIZE_MEGABLAST 28 /**< default word size (contiguous
+ megablast; for discontig megablast
+ the word size is explicitly
+ overridden) */
+#define BLAST_VARWORD_NUCL 0 /**< blastn with variable wordsize */
+#define BLAST_VARWORD_MEGABLAST 1 /**< megablast with variable wordsize */
/** Protein gap costs are the defaults for the BLOSUM62 scoring matrix.
* More gap costs are listed in BLASTOptionSetGapParams
*/
/** cost for the existence of a gap.*/
-#define BLAST_GAP_OPEN_PROT 11
-#define BLAST_GAP_OPEN_NUCL 5
-#define BLAST_GAP_OPEN_MEGABLAST 0
+#define BLAST_GAP_OPEN_PROT 11 /**< default gap open penalty (all
+ protein searches) */
+#define BLAST_GAP_OPEN_NUCL 5 /**< default gap open penalty (blastn) */
+#define BLAST_GAP_OPEN_MEGABLAST 0 /**< default gap open penalty (megablast
+ with greedy gapped alignment) */
/** cost to extend a gap. */
-#define BLAST_GAP_EXTN_PROT 1
-#define BLAST_GAP_EXTN_NUCL 2
-#define BLAST_GAP_EXTN_MEGABLAST 0
-
-/** neighboring word score thresholds */
-#define BLAST_WORD_THRESHOLD_BLASTP 11
-#define BLAST_WORD_THRESHOLD_BLASTN 0
-#define BLAST_WORD_THRESHOLD_BLASTX 12
-#define BLAST_WORD_THRESHOLD_TBLASTN 13
-#define BLAST_WORD_THRESHOLD_TBLASTX 13
-#define BLAST_WORD_THRESHOLD_MEGABLAST 0
-
-/** dropoff for ungapped extension */
-#define BLAST_UNGAPPED_X_DROPOFF_PROT 7
-#define BLAST_UNGAPPED_X_DROPOFF_NUCL 20
-
-/** dropoff for gapped extension */
-#define BLAST_GAP_X_DROPOFF_PROT 15
-#define BLAST_GAP_X_DROPOFF_NUCL 30
-#define BLAST_GAP_X_DROPOFF_GREEDY 30
-#define BLAST_GAP_X_DROPOFF_TBLASTX 0
-
-/** minimal score for triggering gapped extension */
-#define BLAST_GAP_TRIGGER_PROT 22.0
-#define BLAST_GAP_TRIGGER_NUCL 25.0
-
-/** dropoff for the final gapped extension with traceback */
-#define BLAST_GAP_X_DROPOFF_FINAL_PROT 25
-#define BLAST_GAP_X_DROPOFF_FINAL_NUCL 50
-#define BLAST_GAP_X_DROPOFF_FINAL_TBLASTX 0
-
-/** reward and penalty only apply to blastn/megablast */
-#define BLAST_PENALTY -3
-#define BLAST_REWARD 1
-
-/** expect value cutoff */
-#define BLAST_EXPECT_VALUE 10.0
+#define BLAST_GAP_EXTN_PROT 1 /**< default gap open penalty (all
+ protein searches) */
+#define BLAST_GAP_EXTN_NUCL 2 /**< default gap open penalty (blastn) */
+#define BLAST_GAP_EXTN_MEGABLAST 0 /**< default gap open penalty (megablast)
+ with greedy gapped alignment) */
+
+/** neighboring word score thresholds; a threshold of zero
+ * means that only query and subject words that match exactly
+ * will go into the BLAST lookup table when it is generated
+ */
+#define BLAST_WORD_THRESHOLD_BLASTP 11 /**< default neighboring threshold
+ (blastp/rpsblast) */
+#define BLAST_WORD_THRESHOLD_BLASTN 0 /**< default threshold (blastn) */
+#define BLAST_WORD_THRESHOLD_BLASTX 12 /**< default threshold (blastx) */
+#define BLAST_WORD_THRESHOLD_TBLASTN 13 /**< default neighboring threshold
+ (tblastn/rpstblastn) */
+#define BLAST_WORD_THRESHOLD_TBLASTX 13 /**< default threshold (tblastx) */
+#define BLAST_WORD_THRESHOLD_MEGABLAST 0 /**< default threshold (megablast) */
+
+/** default dropoff for ungapped extension; ungapped extensions
+ * will stop when the score for the extension has dropped from
+ * the current best score by at least this much
+ */
+#define BLAST_UNGAPPED_X_DROPOFF_PROT 7 /**< ungapped dropoff score for all
+ searches except blastn */
+#define BLAST_UNGAPPED_X_DROPOFF_NUCL 20 /**< ungapped dropoff score for
+ blastn (and megablast) */
+
+/** default dropoff for preliminary gapped extensions */
+#define BLAST_GAP_X_DROPOFF_PROT 15 /**< default dropoff (all protein-
+ based gapped extensions) */
+#define BLAST_GAP_X_DROPOFF_NUCL 30 /**< default dropoff for non-greedy
+ nucleotide gapped extensions */
+#define BLAST_GAP_X_DROPOFF_GREEDY 30 /**< default dropoff for greedy
+ nucleotide gapped extensions */
+#define BLAST_GAP_X_DROPOFF_TBLASTX 0 /**< default dropoff for tblastx */
+
+/** default bit score that will trigger gapped extension */
+#define BLAST_GAP_TRIGGER_PROT 22.0 /**< default bit score that will trigger
+ a gapped extension for all protein-
+ based searches */
+#define BLAST_GAP_TRIGGER_NUCL 25.0 /**< default bit score that will trigger
+ a gapped extension for blastn */
+
+/** default dropoff for the final gapped extension with traceback */
+#define BLAST_GAP_X_DROPOFF_FINAL_PROT 25 /**< default dropoff (all protein-
+ based gapped extensions) */
+#define BLAST_GAP_X_DROPOFF_FINAL_NUCL 50 /**< default dropoff for nucleotide
+ gapped extensions) */
+#define BLAST_GAP_X_DROPOFF_FINAL_TBLASTX 0 /**< default dropoff for tblastx */
+
+/** default reward and penalty (only applies to blastn/megablast) */
+#define BLAST_PENALTY -3 /**< default nucleotide mismatch score */
+#define BLAST_REWARD 1 /**< default nucleotide match score */
+
+#define BLAST_EXPECT_VALUE 10.0 /**< by default, alignments whose expect
+ value exceeds this number are discarded */
/** Types of the lookup table */
-#define MB_LOOKUP_TABLE 1
-#define NA_LOOKUP_TABLE 2
-#define AA_LOOKUP_TABLE 3
-#define PHI_AA_LOOKUP 4
-#define PHI_NA_LOOKUP 5
-#define RPS_LOOKUP_TABLE 6
+#define MB_LOOKUP_TABLE 1 /**< megablast lookup table (includes both
+ contiguous and discontiguous megablast) */
+#define NA_LOOKUP_TABLE 2 /**< blastn lookup table */
+#define AA_LOOKUP_TABLE 3 /**< standard protein (blastp) lookup table */
+#define PHI_AA_LOOKUP 4 /**< protein lookup table specialized for phi-blast */
+#define PHI_NA_LOOKUP 5 /**< nucleotide lookup table for phi-blast */
+#define RPS_LOOKUP_TABLE 6 /**< RPS lookup table (rpsblast and rpstblastn) */
/** Defaults for PSI-BLAST options */
-#define PSI_ETHRESH 0.005
-#define PSI_MAX_NUM_PASSES 1
+#define PSI_INCLUSION_ETHRESH 0.002
#define PSI_PSEUDO_COUNT_CONST 9
-#define PSI_SCALING_FACTOR 32
/** Default genetic code for query and/or database */
-#define BLAST_GENETIC_CODE 1
+#define BLAST_GENETIC_CODE 1 /**< Use the standard genetic code for converting
+ groups of three nucleotide bases to protein
+ letters */
/** Default parameters for linking HSPs */
#define BLAST_GAP_PROB 0.5
@@ -171,16 +191,23 @@ typedef struct QuerySetUpOptions {
[t]blastx only */
} QuerySetUpOptions;
+/** specifies the data structures used for bookkeeping
+ * during computation of ungapped extensions
+ */
typedef enum SeedContainerType {
- eDiagArray, /**< FIXME: EXTEND_WORD_DIAG_ARRAY */
- eMbStacks, /**< FIXME: EXTEND_WORD_MB_STACKS */
- eMaxContainerType
+ eDiagArray, /**< use diagonal structures */
+ eMbStacks, /**< use stacks (megablast only) */
+ eMaxContainerType /**< maximum value for this enumeration */
} SeedContainerType;
+/** when performing mini-extensions on hits from the
+ * blastn or megablast lookup table, this determines
+ * the direction in which the mini-extension is attempted
+ */
typedef enum SeedExtensionMethod {
- eRight, /**< FIXME: EXTEND_WORD_BLASTN */
- eRightAndLeft, /**< FIXME: EXTEND_WORD_AG */
- eMaxSeedExtensionMethod
+ eRight, /**< extend only to the right */
+ eRightAndLeft, /**< extend to left and right (used with AG method) */
+ eMaxSeedExtensionMethod /**< maximum value for this enumeration */
} SeedExtensionMethod;
/** Options needed for initial word finding and processing */
@@ -199,11 +226,14 @@ typedef struct BlastInitialWordOptions {
extension */
} BlastInitialWordOptions;
-#define UNGAPPED_CUTOFF_E_BLASTN 0.05
-#define UNGAPPED_CUTOFF_E_BLASTP 1e-300
-#define UNGAPPED_CUTOFF_E_BLASTX 1.0
-#define UNGAPPED_CUTOFF_E_TBLASTN 1.0
-#define UNGAPPED_CUTOFF_E_TBLASTX 1e-300
+/** Expect values corresponding to the default cutoff
+ * scores for ungapped alignments
+ */
+#define UNGAPPED_CUTOFF_E_BLASTN 0.05 /**< default ungapped evalue (blastn) */
+#define UNGAPPED_CUTOFF_E_BLASTP 1e-300 /**< default ungapped evalue (blastp) */
+#define UNGAPPED_CUTOFF_E_BLASTX 1.0 /**< default ungapped evalue (blastx) */
+#define UNGAPPED_CUTOFF_E_TBLASTN 1.0 /**< default ungapped evalue (tblastn) */
+#define UNGAPPED_CUTOFF_E_TBLASTX 1e-300 /**< default ungapped evalue (tblastx) */
/** Parameter block that contains a pointer to BlastInitialWordOptions
* and parsed values for those options that require it
@@ -215,6 +245,27 @@ typedef struct BlastInitialWordParameters {
Int4 cutoff_score; /**< Cutoff score for saving ungapped hits. */
} BlastInitialWordParameters;
+/** The algorithm to be used for preliminary
+ * gapped extensions
+ */
+typedef enum EBlastPrelimGapExt {
+ eDynProgExt, /**< standard affine gapping */
+ eGreedyExt, /**< Greedy extension (megaBlast) */
+ eGreedyWithTracebackExt /**< Greedy extension with Traceback
+ calculated. */
+} EBlastPrelimGapExt;
+
+/** The algorithm to be used for final gapped
+ * extensions with traceback
+ */
+typedef enum EBlastTbackExt {
+ eDynProgTbck, /**< standard affine gapping */
+ eGreedyTbck, /**< Greedy extension (megaBlast) */
+ eSmithWatermanTbck, /**< Smith-waterman finds optimal scores, then
+ ALIGN_EX to find alignment. */
+ eSkipTbck /**< Traceback information is not needed */
+} EBlastTbackExt;
+
/** Options used for gapped extension
* These include:
* a. Penalties for various types of gapping;
@@ -226,20 +277,32 @@ typedef struct BlastExtensionOptions {
double gap_x_dropoff; /**< X-dropoff value for gapped extension (in bits) */
double gap_x_dropoff_final;/**< X-dropoff value for the final gapped
extension (in bits) */
- double gap_trigger;/**< Score in bits for starting gapped extension */
- Int4 algorithm_type; /**< E.g. for blastn: dynamic programming;
- greedy without traceback; greedy with traceback */
- Boolean skip_traceback; /**< Is traceback information needed in results? */
+ double gap_trigger; /**< Score in bits for starting gapped extension */
+ EBlastPrelimGapExt ePrelimGapExt; /**< type of preliminary gapped extension (normally) for calculating
+ score. */
+ EBlastTbackExt eTbackExt; /**< type of traceback extension. */
+ Boolean compositionBasedStats; /**< if TRUE use composition-based stats. */
} BlastExtensionOptions;
+/** Computed values used as parameters for gapped alignments */
typedef struct BlastExtensionParameters {
- BlastExtensionOptions* options;
+ BlastExtensionOptions* options; /**< The original (unparsed) options. */
Int4 gap_x_dropoff; /**< X-dropoff value for gapped extension (raw) */
Int4 gap_x_dropoff_final;/**< X-dropoff value for the final gapped
extension (raw) */
Int4 gap_trigger; /**< Minimal raw score for starting gapped extension */
} BlastExtensionParameters;
+/** Should sum statistics be performed? If not set, the engine decides this
+ * question based on the program and gapped calculation option.
+ */
+typedef enum ESumStatsMode {
+ eSumStatsNotSet = 0, /**< Let the engine decide, based on the program and
+ gapped calculation option. */
+ eSumStatsFalse, /**< Do not use sum statistics. */
+ eSumStatsTrue /**< Use sum statistics. */
+} ESumStatsMode;
+
/** Options used when evaluating and saving hits
* These include:
* a. Restrictions on the number of hits to be saved;
@@ -274,17 +337,15 @@ typedef struct BlastHitSavingOptions {
/********************************************************************/
/* Merge all these in a structure for clarity? */
/* applicable to all, except blastn */
- Boolean do_sum_stats; /**< Force sum statistics to be used to combine
- HSPs */
+ ESumStatsMode do_sum_stats; /**< Force sum statistics to be used to combine
+ HSPs */
/* tblastn w/ sum statistics */
Int4 longest_intron; /**< The longest distance between HSPs allowed for
combining via sum statistics with uneven gaps */
/********************************************************************/
- Int4 min_hit_length;
- Boolean is_neighboring; /**< FIXME: neighboring is specified by a percent
- identity and a minimum hit length */
-
+ Int4 min_hit_length; /**< optional minimum alignment length; alignments
+ not at least this long are discarded */
Boolean phi_align; /**< Is this a PHI BLAST search? */
} BlastHitSavingOptions;
@@ -330,6 +391,22 @@ typedef struct BlastScoringOptions {
gapping */
} BlastScoringOptions;
+/** Scoring parameters block
+ * Contains scoring-related information that is actually used
+ * for the blast search
+ */
+typedef struct BlastScoringParameters {
+ BlastScoringOptions *options; /**< User-provided values for these params */
+ Int2 reward; /**< Reward for a match */
+ Int2 penalty; /**< Penalty for a mismatch */
+ Int4 gap_open; /**< Extra penalty for starting a gap (scaled version) */
+ Int4 gap_extend; /**< Penalty for each gap residue (scaled version) */
+ Int4 decline_align; /**< Cost for declining alignment (scaled version) */
+ Int4 shift_pen; /**< Penalty for shifting a frame in out-of-frame
+ gapping (scaled version) */
+ double scale_factor; /**< multiplier for all cutoff scores */
+} BlastScoringParameters;
+
/** Options for setting up effective lengths and search spaces.
* The values are those the user has specified to override the real sizes.
*/
@@ -361,17 +438,22 @@ typedef struct BlastEffectiveLengthsParameters {
* Some of these possibly should be transfered elsewhere
*/
typedef struct PSIBlastOptions {
- double ethresh; /**< PSI-BLAST */
- Int4 maxNumPasses; /**< PSI-BLAST */
- Int4 pseudoCountConst; /**< PSI-BLAST */
- Boolean composition_based_stat;/**< PSI-BLAST */
- double scalingFactor; /**< Scaling factor used when constructing PSSM for
- RPS-BLAST */
- Boolean use_best_align; /**< Use only alignments chosen by user for PSSM
- computation: WWW PSI-BLAST only */
- Boolean smith_waterman; /**< PSI-BLAST */
- Boolean discontinuous; /**< PSI-BLAST */
- Boolean is_rps_blast; /**< RPS-BLAST */
+ /** Pseudocount constant. Needed for the computing the PSSM residue
+ * frequencies */
+ Int4 pseudo_count;
+
+ /*** The following options are used at the API layer to specify how the
+ * multiple sequence alignment is built from pairwise alignments. These
+ * could go in their own structure in the future. */
+
+ /** Minimum evalue for inclusion in PSSM calculation. Needed for the first
+ * stage of the PSSM calculation algorithm */
+ double inclusion_ethresh;
+
+ /** If set to TRUE, use the best alignment when multiple HSPs are found
+ * in a query-subject alignment (i.e.: HSP with the lowest e-value), else
+ * use all HSPs in a query-subject alignment. */
+ Boolean use_best_alignment;
} PSIBlastOptions;
/** Options used to create the ReadDBFILE structure
@@ -430,7 +512,7 @@ BlastInitialWordOptionsNew(Uint1 program,
/** Fill non-default values in the BlastInitialWordOptions structure.
* @param options The options structure [in] [out]
* @param program Program number (blastn, blastp, etc.) [in]
- * @param greedy Settings should assume greedy alignments [in]
+ * @param greedy Settings should assume greedy alignments. [in]
* @param window_size Size of a largest window between 2 words for the two-hit
* version [in]
* @param variable_wordsize Will only full bytes of the compressed sequence be
@@ -510,7 +592,8 @@ BlastExtensionOptionsNew(Uint1 program, BlastExtensionOptions* *options);
/** Fill non-default values in the BlastExtensionOptions structure.
* @param options The options structure [in] [out]
* @param program Program number (blastn, blastp, etc.) [in]
- * @param greedy Settings should assume greedy alignments [in]
+ * @param greedy In how many stages of the search greedy alignment is
+ * used (values 0, 1, 2)? [in]
* @param x_dropoff X-dropoff parameter value for preliminary gapped
* extensions [in]
* @param x_dropoff_final X-dropoff parameter value for final gapped
@@ -518,7 +601,7 @@ BlastExtensionOptionsNew(Uint1 program, BlastExtensionOptions* *options);
*/
Int2
BLAST_FillExtensionOptions(BlastExtensionOptions* options,
- Uint1 program, Boolean greedy, double x_dropoff,
+ Uint1 program, Int4 greedy, double x_dropoff,
double x_dropoff_final);
@@ -576,7 +659,6 @@ BLAST_FillScoringOptions(BlastScoringOptions* options, Uint1 program,
Boolean greedy_extension, Int4 penalty, Int4 reward, const char *matrix,
Int4 gap_open, Int4 gap_extend);
-
/** Validate contents of BlastScoringOptions.
* @param program_number Type of BLAST program [in]
* @param options Options to be validated [in]
@@ -587,11 +669,26 @@ BlastScoringOptionsValidate(Uint1 program_number,
const BlastScoringOptions* options, Blast_Message* *blast_msg);
/** Produces copy of "old" options, with new memory allocated.
- * @param contains copied BlastScoringOptions upon return [out]
- * @param BlastScoringOptions to be copied [in]
+ * @param new_opt Contains copied BlastScoringOptions upon return [out]
+ * @param old_opt BlastScoringOptions to be copied [in]
*/
Int2 BlastScoringOptionsDup(BlastScoringOptions* *new_opt, const BlastScoringOptions* old_opt);
+/** Deallocate memory for BlastScoringParameters.
+ * @param parameters Structure to free [in]
+ */
+BlastScoringParameters* BlastScoringParametersFree(
+ BlastScoringParameters* parameters);
+
+/** Calculate scaled cutoff scores and gap penalties
+ * @param options Already allocated scoring options [in]
+ * @param sbp Structure containing scale factor [in]
+ * @param parameters Scoring parameters [out]
+ */
+Int2 BlastScoringParametersNew(const BlastScoringOptions *options,
+ BlastScoreBlk* sbp,
+ BlastScoringParameters* *parameters);
+
/** Deallocate memory for BlastEffectiveLengthsOptions*.
* @param options Structure to free [in]
*/
@@ -813,16 +910,16 @@ Int2 BLAST_ValidateOptions(Uint1 program_number,
* @param sbp Scoring statistical parameters [in]
* @param hit_params Hit saving parameters, including all cutoff
* scores [in] [out]
+ * @param ext_params Extension parameters (gap_trigger used) [in]
* @param db_length Total length of database (non-database search if 0) [in]
* @param subject_length Length of the subject sequence. [in]
- * @param psi_options PSI BLAST options, containing scaling factor [in]
*
*/
void
CalculateLinkHSPCutoffs(Uint1 program, BlastQueryInfo* query_info,
BlastScoreBlk* sbp, BlastHitSavingParameters* hit_params,
- Int8 db_length, Int4 subject_length,
- const PSIBlastOptions* psi_options);
+ BlastExtensionParameters* ext_params,
+ Int8 db_length, Int4 subject_length);
#ifdef __cplusplus
}
diff --git a/algo/blast/core/blast_psi.c b/algo/blast/core/blast_psi.c
new file mode 100644
index 00000000..29130e7e
--- /dev/null
+++ b/algo/blast/core/blast_psi.c
@@ -0,0 +1,325 @@
+static char const rcsid[] =
+ "$Id: blast_psi.c,v 1.3 2004/06/16 15:18:16 camacho Exp $";
+/* ===========================================================================
+ *
+ * PUBLIC DOMAIN NOTICE
+ * National Center for Biotechnology Information
+ *
+ * This software/database is a "United States Government Work" under the
+ * terms of the United States Copyright Act. It was written as part of
+ * the author's offical duties as a United States Government employee and
+ * thus cannot be copyrighted. This software/database is freely available
+ * to the public for use. The National Library of Medicine and the U.S.
+ * Government have not placed any restriction on its use or reproduction.
+ *
+ * Although all reasonable efforts have been taken to ensure the accuracy
+ * and reliability of the software and data, the NLM and the U.S.
+ * Government do not and cannot warrant the performance or results that
+ * may be obtained by using this software or data. The NLM and the U.S.
+ * Government disclaim all warranties, express or implied, including
+ * warranties of performance, merchantability or fitness for any particular
+ * purpose.
+ *
+ * Please cite the author in any work or product based on this material.
+ *
+ * ===========================================================================
+ *
+ * Author: Christiam Camacho
+ *
+ */
+
+/** @file blast_psi.c
+ * Implementation of the high level PSI-BLAST API
+ */
+
+#include <algo/blast/core/blast_psi.h>
+#include <algo/blast/core/blast_stat.h>
+#include <algo/blast/core/blast_encoding.h>
+#include "blast_psi_priv.h"
+
+/* FIXME: document all local variables */
+
+/****************************************************************************/
+/* Use the following #define's to enable/disable functionality */
+
+/* Taking gaps into account when constructing a PSSM was introduced in the
+ * 2001 paper "Improving the accuracy of PSI-BLAST protein database searches
+ * with composition based-statistics and other refinements". This feature
+ * can be disabled by defining the PSI_IGNORE_GAPS_IN_COLUMNS symbol below */
+/* #define PSI_IGNORE_GAPS_IN_COLUMNS */
+/****************************************************************************/
+
+PsiMatrix*
+PSICreatePSSM(PsiAlignmentData* alignment, /* [in] */
+ const PSIBlastOptions* options, /* [in] */
+ BlastScoreBlk* sbp, /* [in] */
+ PsiDiagnostics* diagnostics) /* [out] */
+{
+ PsiMatrix* retval = NULL;
+
+ PsiAlignedBlock* aligned_block = NULL;
+ PsiSequenceWeights* seq_weights = NULL;
+
+ aligned_block = _PSIAlignedBlockNew(alignment->dimensions->query_sz);
+ seq_weights = _PSISequenceWeightsNew(alignment->dimensions, sbp);
+ retval = PSIMatrixNew(alignment->dimensions->query_sz, sbp->alphabet_size);
+
+ PSIPurgeBiasedSegments(alignment);
+ PSIComputeAlignmentBlocks(alignment, aligned_block);
+ PSIComputeSequenceWeights(alignment, aligned_block, seq_weights);
+ PSIComputeResidueFrequencies(alignment, seq_weights, sbp, aligned_block,
+ options, retval);
+ PSIConvertResidueFreqsToPSSM(retval, alignment->query, sbp,
+ seq_weights->std_prob);
+ PSIScaleMatrix(alignment->query, alignment->dimensions->query_sz,
+ seq_weights->std_prob, NULL, retval, sbp);
+
+ if (diagnostics) {
+ diagnostics = _PSISaveDiagnostics(alignment, aligned_block,
+ seq_weights);
+ } else {
+
+ /* FIXME: Deallocate structures selectively as some of these will be
+ * copied into the diagnostics structure */
+ _PSIAlignedBlockFree(aligned_block);
+ _PSISequenceWeightsFree(seq_weights);
+ }
+
+ return retval;
+}
+
+/****************************************************************************/
+
+/** Initializes the alignment data structure with the query sequence
+ * information.
+ */
+static void
+PSIExtractQuerySequenceInfo(PsiAlignmentData* alignment);
+
+PsiAlignmentData*
+PSIAlignmentDataNew(const Uint1* query, const PsiInfo* info)
+{
+ PsiAlignmentData* retval = NULL; /* the return value */
+ Uint4 s = 0; /* index in sequences */
+ Uint4 p = 0; /* index on positions */
+
+ if ( !query || !info ) {
+ return NULL;
+ }
+
+ retval = (PsiAlignmentData*) calloc(1, sizeof(PsiAlignmentData));
+ if ( !retval ) {
+ return NULL;
+ }
+
+ /* This doesn't need to be query_sz + 1 (posC) */
+ retval->res_counts = (Uint4**) _PSIAllocateMatrix(info->query_sz,
+ PSI_ALPHABET_SIZE,
+ sizeof(Uint4));
+ if ( !(retval->res_counts) ) {
+ return PSIAlignmentDataFree(retval);
+ }
+
+ retval->match_seqs = (Uint4*) calloc(info->query_sz, sizeof(int));
+ if ( !(retval->match_seqs)) {
+ return PSIAlignmentDataFree(retval);
+ }
+
+ retval->desc_matrix = (PsiDesc**) _PSIAllocateMatrix(info->num_seqs + 1,
+ info->query_sz,
+ sizeof(PsiDesc));
+ if (!retval->desc_matrix) {
+ return PSIAlignmentDataFree(retval);
+ }
+ for (s = 0; s < info->num_seqs + 1; s++) {
+ for (p = 0; p < info->query_sz; p++) {
+ retval->desc_matrix[s][p].letter = (unsigned char) -1;
+ retval->desc_matrix[s][p].used = FALSE;
+ retval->desc_matrix[s][p].e_value = kDefaultEvalueForPosition;
+ retval->desc_matrix[s][p].extents.left = (unsigned int) -1;
+ retval->desc_matrix[s][p].extents.right = info->query_sz;
+ }
+ }
+
+ retval->use_sequences = (Boolean*) calloc(info->num_seqs + 1,
+ sizeof(Boolean));
+ if (!retval->use_sequences) {
+ return PSIAlignmentDataFree(retval);
+ }
+ /* All sequences are valid candidates for taking part in
+ PSSM construction */
+ for (s = 0; s < info->num_seqs + 1; s++) {
+ retval->use_sequences[s] = TRUE;
+ }
+
+ if ( !(retval->dimensions = (PsiInfo*) calloc(1, sizeof(PsiInfo)))) {
+ return PSIAlignmentDataFree(retval);
+ }
+ memcpy((void*) retval->dimensions, (void*) info, sizeof(*info));
+
+ retval->query = (Uint1*) malloc(info->query_sz * sizeof(Uint1));
+ if ( !retval->query ) {
+ return PSIAlignmentDataFree(retval);
+ }
+ memcpy((void*) retval->query, (void*) query, info->query_sz);
+
+ PSIExtractQuerySequenceInfo(retval);
+
+ return retval;
+}
+
+PsiAlignmentData*
+PSIAlignmentDataFree(PsiAlignmentData* alignment)
+{
+ if ( !alignment ) {
+ return NULL;
+ }
+
+ if (alignment->res_counts) {
+ _PSIDeallocateMatrix((void**) alignment->res_counts,
+ alignment->dimensions->query_sz);
+ alignment->res_counts = NULL;
+ }
+
+ if (alignment->match_seqs) {
+ sfree(alignment->match_seqs);
+ }
+
+ if (alignment->desc_matrix) {
+ _PSIDeallocateMatrix((void**) alignment->desc_matrix,
+ alignment->dimensions->num_seqs + 1);
+ alignment->desc_matrix = NULL;
+ }
+
+ if (alignment->use_sequences) {
+ sfree(alignment->use_sequences);
+ }
+
+ if (alignment->dimensions) {
+ sfree(alignment->dimensions);
+ }
+
+ if (alignment->query) {
+ sfree(alignment->query);
+ }
+
+ sfree(alignment);
+ return NULL;
+}
+
+PsiMatrix*
+PSIMatrixNew(Uint4 query_sz, Uint4 alphabet_size)
+{
+ PsiMatrix* retval = NULL;
+
+ retval = (PsiMatrix*) calloc(1, sizeof(PsiMatrix));
+ if ( !retval ) {
+ return NULL;
+ }
+ retval->ncols = query_sz + 1;
+
+ retval->pssm = (int**) _PSIAllocateMatrix(query_sz + 1, alphabet_size,
+ sizeof(int));
+ if ( !(retval->pssm) ) {
+ return PSIMatrixFree(retval);
+ }
+
+ retval->scaled_pssm = (int**) _PSIAllocateMatrix(query_sz + 1,
+ alphabet_size,
+ sizeof(int));
+ if ( !(retval->scaled_pssm) ) {
+ return PSIMatrixFree(retval);
+ }
+
+ retval->res_freqs = (double**) _PSIAllocateMatrix(query_sz + 1,
+ alphabet_size,
+ sizeof(double));
+ if ( !(retval->res_freqs) ) {
+ return PSIMatrixFree(retval);
+ }
+
+ return retval;
+}
+
+PsiMatrix*
+PSIMatrixFree(PsiMatrix* matrix)
+{
+ if ( !matrix ) {
+ return NULL;
+ }
+
+ if (matrix->pssm) {
+ _PSIDeallocateMatrix((void**) matrix->pssm, matrix->ncols);
+ }
+
+ if (matrix->scaled_pssm) {
+ _PSIDeallocateMatrix((void**) matrix->scaled_pssm, matrix->ncols);
+ }
+
+ if (matrix->res_freqs) {
+ _PSIDeallocateMatrix((void**) matrix->res_freqs, matrix->ncols);
+ }
+
+ sfree(matrix);
+
+ return NULL;
+}
+
+PsiDiagnostics*
+PSIDiagnosticsNew(Uint4 query_sz, Uint4 alphabet_size)
+{
+ PsiDiagnostics* retval = NULL;
+
+ retval = (PsiDiagnostics*) calloc(1, sizeof(PsiDiagnostics));
+ if ( !retval ) {
+ return NULL;
+ }
+
+ retval->info_content = (double**) _PSIAllocateMatrix(query_sz,
+ alphabet_size,
+ sizeof(double));
+ if ( !(retval->info_content) ) {
+ return PSIDiagnosticsFree(retval);
+ }
+ retval->ncols = query_sz;
+
+ return retval;
+}
+
+PsiDiagnostics*
+PSIDiagnosticsFree(PsiDiagnostics* diags)
+{
+ if ( !diags )
+ return NULL;
+
+ if (diags->info_content) {
+ _PSIDeallocateMatrix((void**) diags->info_content, diags->ncols);
+ }
+
+ sfree(diags);
+
+ return NULL;
+}
+
+/****************************************************************************/
+/* Auxiliary functions to populate PsiAlignmentData structure */
+static void
+PSIExtractQuerySequenceInfo(PsiAlignmentData* alignment)
+{
+ Uint4 i = 0;
+
+ ASSERT(alignment);
+
+ for (i = 0; i < alignment->dimensions->query_sz; i++) {
+ alignment->desc_matrix[kQueryIndex][i].letter = alignment->query[i];
+ alignment->desc_matrix[kQueryIndex][i].used = TRUE;
+ alignment->desc_matrix[kQueryIndex][i].e_value =
+ PSI_INCLUSION_ETHRESH / 2;
+ alignment->desc_matrix[kQueryIndex][i].extents.left = 0;
+ alignment->desc_matrix[kQueryIndex][i].extents.right =
+ alignment->dimensions->query_sz;
+
+ alignment->res_counts[i][alignment->query[i]]++;
+ alignment->match_seqs[i]++;
+ }
+}
diff --git a/algo/blast/core/blast_psi.h b/algo/blast/core/blast_psi.h
new file mode 100644
index 00000000..80535244
--- /dev/null
+++ b/algo/blast/core/blast_psi.h
@@ -0,0 +1,168 @@
+#ifndef ALGO_BLAST_CORE___BLAST_PSI__H
+#define ALGO_BLAST_CORE___BLAST_PSI__H
+
+/* $Id: blast_psi.h,v 1.3 2004/06/09 14:10:15 camacho Exp $
+ * ===========================================================================
+ *
+ * PUBLIC DOMAIN NOTICE
+ * National Center for Biotechnology Information
+ *
+ * This software/database is a "United States Government Work" under the
+ * terms of the United States Copyright Act. It was written as part of
+ * the author's offical duties as a United States Government employee and
+ * thus cannot be copyrighted. This software/database is freely available
+ * to the public for use. The National Library of Medicine and the U.S.
+ * Government have not placed any restriction on its use or reproduction.
+ *
+ * Although all reasonable efforts have been taken to ensure the accuracy
+ * and reliability of the software and data, the NLM and the U.S.
+ * Government do not and cannot warrant the performance or results that
+ * may be obtained by using this software or data. The NLM and the U.S.
+ * Government disclaim all warranties, express or implied, including
+ * warranties of performance, merchantability or fitness for any particular
+ * purpose.
+ *
+ * Please cite the author in any work or product based on this material.
+ *
+ * ===========================================================================
+ *
+ * Author: Christiam Camacho
+ *
+ */
+
+/** @file blast_psi.h
+ * High level definitions and declarations for the PSI-BLAST API.
+ */
+
+#include <algo/blast/core/ncbi_std.h>
+#include <algo/blast/core/blast_options.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/* Defaults for PSI-BLAST options - these are application level options */
+#define PSI_MAX_NUM_PASSES 1
+#define PSI_SCALING_FACTOR 32
+
+/** Percent identity threshold for discarding near-identical matches */
+#define PSI_NEAR_IDENTICAL 0.94
+#define PSI_IDENTICAL 1.0
+
+/** Structure to describe the characteristics of a position in the model */
+typedef struct PsiDesc {
+ Uint1 letter; /**< Preferred letter at this position */
+ Boolean used; /**< Is this letter being used? */
+ double e_value; /**< E-value of the highest HSP including this
+ position */
+ SSeqRange extents; /**< Extent of this aligned position */
+} PsiDesc;
+
+typedef struct PsiInfo {
+ Uint4 query_sz; /**< Size of the query */
+ Uint4 num_seqs; /**< Number of sequences in involved w/o including
+ the query */
+} PsiInfo;
+
+/** This structure is to be populated at the API level from the Seq-aligns */
+typedef struct PsiAlignmentData {
+ Uint4** res_counts;/**< matrix which keeps track of the number of
+ residues aligned with the query at each query
+ position (columns of multiple alignment). Its
+ dimensions are query_sz by alphabet_size */
+ Uint4* match_seqs;/**< Count of how many sequences match the query
+ at each query position, default value is 1 to
+ include the query itself. This dynamically
+ allocated array has a length of query_sz */
+ PsiDesc** desc_matrix; /**< Matrix of PsiDesc structures, each cell
+ represents an aligned character with the
+ query. Its dimensions are query_sz by
+ num_seqs + 1. */
+ Boolean* use_sequences; /**< Determines if sequences must be used or not.
+ This dynamically allocated array has a length
+ of num_seqs + 1. */
+
+ PsiInfo* dimensions; /**< Dimensions of the multiple sequence alignment
+ (query size by number of sequences aligned + 1
+ (to include the query) */
+ Uint1* query; /**< Query sequence (aka master, consensus) */
+} PsiAlignmentData;
+
+/** The functions to create internal structure to store intermediate data
+ * while creating a PSSM */
+PsiAlignmentData*
+PSIAlignmentDataNew(const Uint1* query, const PsiInfo* info);
+
+PsiAlignmentData*
+PSIAlignmentDataFree(PsiAlignmentData* align_data);
+
+/** This is the return value from all the processing performed in this library.
+ * At the API level this information should be copied into an Score-mat ASN.1
+ * object */
+typedef struct PsiMatrix {
+ int** pssm; /**< The PSSM, its dimensions are query_sz+1 by
+ PSI_ALPHABET_SIZE */
+ int** scaled_pssm; /**< not to be used by the public ? Dimensions are
+ the same as above Needed in the last 2 stages
+ of PSI-BLAST */
+ double** res_freqs; /**< The residue frequencies. Dimensions are the
+ same as above (FIXME?) */
+ Uint4 ncols; /**< Number of columns in the matrices above
+ (query size+1) */
+} PsiMatrix;
+
+/** Allocates a new PsiMatrix structure */
+PsiMatrix*
+PSIMatrixNew(Uint4 query_sz, Uint4 alphabet_size);
+
+/** Deallocates the PsiMatrix structure passed in.
+ * @param matrix structure to deallocate [in]
+ * @return NULL
+ */
+PsiMatrix*
+PSIMatrixFree(PsiMatrix* matrix);
+
+
+/** This structure returns detailed information collected during the process of
+ * creating a PSSM.
+ */
+typedef struct PsiDiagnostics {
+ double** info_content;
+ Uint4 ncols; /**< Number of columns in the matrix above
+ (query size+1) */
+ /* FIXME: add sequence weights */
+} PsiDiagnostics;
+
+/** Allocates a new PSI-BLAST diagnostics structure
+ */
+PsiDiagnostics*
+PSIDiagnosticsNew(Uint4 query_sz, Uint4 alphabet_size);
+
+/** Deallocates the PsiDiagnostics structure passed in.
+ * @param diags structure to deallocate [in]
+ * @return NULL
+ */
+PsiDiagnostics*
+PSIDiagnosticsFree(PsiDiagnostics* diags);
+
+/****************************************************************************/
+
+/* TOP LEVEL FUNCTION FIXME: alignment should contain data from multiple
+ * sequence alignment
+ * @param diagnostics If non-NULL this structure will be populated and it is
+ * the caller's responsibility to deallocate this structure.
+ * @retval The PSSM along with residue frequencies and statistical information
+ * (the latter is returned in the sbp)
+ */
+PsiMatrix*
+PSICreatePSSM(PsiAlignmentData* alignment, /* [in] but modified */
+ const PSIBlastOptions* options, /* [in] */
+ BlastScoreBlk* sbp, /* [in] */
+ PsiDiagnostics* diagnostics); /* [out] */
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* !ALGO_BLAST_CORE__BLAST_PSI__H */
+
diff --git a/algo/blast/core/blast_psi_priv.c b/algo/blast/core/blast_psi_priv.c
index 40160c32..fd8824f2 100644
--- a/algo/blast/core/blast_psi_priv.c
+++ b/algo/blast/core/blast_psi_priv.c
@@ -1,5 +1,5 @@
static char const rcsid[] =
- "$Id: blast_psi_priv.c,v 1.2 2004/04/07 22:08:37 kans Exp $";
+ "$Id: blast_psi_priv.c,v 1.10 2004/06/16 15:22:47 camacho Exp $";
/* ===========================================================================
*
* PUBLIC DOMAIN NOTICE
@@ -24,25 +24,42 @@ static char const rcsid[] =
*
* ===========================================================================
*
- * Author: Christiam Camacho
+ * Author: Alejandro Schaffer, ported by Christiam Camacho
*
*/
+/** @file blast_psi_priv.c
+ * Defintions for functions in private interface for Position Iterated BLAST
+ * API.
+ */
+
#include "blast_psi_priv.h"
-#include <algo/blast/core/blast_def.h>
+#include "matrix_freq_ratios.h"
+
+/****************************************************************************/
+/* Constants */
+const unsigned int kQueryIndex = 0;
+const double kEpsilon = 0.0001;
+const double kDefaultEvalueForPosition = 1.0;
+const int kPsiScaleFactor = 200;
+
+/****************************************************************************/
void**
_PSIAllocateMatrix(unsigned int ncols, unsigned int nrows,
- unsigned int data_type_sz)
+ unsigned int data_type_sz)
{
void** retval = NULL;
unsigned int i = 0;
- if ( !(retval = (void**) malloc(sizeof(void*) * ncols)))
+ retval = (void**) malloc(sizeof(void*) * ncols);
+ if ( !retval ) {
return NULL;
+ }
for (i = 0; i < ncols; i++) {
- if ( !(retval[i] = (void*) calloc(nrows, data_type_sz))) {
+ retval[i] = (void*) calloc(nrows, data_type_sz);
+ if ( !retval ) {
retval = _PSIDeallocateMatrix(retval, i);
break;
}
@@ -65,16 +82,1481 @@ _PSIDeallocateMatrix(void** matrix, unsigned int ncols)
return NULL;
}
+void
+_PSICopyMatrix(double** dest, const double** src,
+ unsigned int ncols, unsigned int nrows)
+{
+ unsigned int i = 0;
+ unsigned int j = 0;
+
+ for (i = 0; i < ncols; i++) {
+ for (j = 0; j < nrows; j++) {
+ dest[i][j] = src[i][j];
+ }
+ }
+}
+
+/****************************************************************************/
+
+PsiAlignedBlock*
+_PSIAlignedBlockNew(Uint4 num_positions)
+{
+ PsiAlignedBlock* retval = NULL;
+ Uint4 i = 0;
+
+ retval = (PsiAlignedBlock*) calloc(1, sizeof(PsiAlignedBlock));
+ if ( !retval ) {
+ return NULL;
+ }
+
+ retval->pos_extnt = (SSeqRange*) calloc(num_positions, sizeof(SSeqRange));
+ if ( !retval->pos_extnt ) {
+ return _PSIAlignedBlockFree(retval);
+ }
+
+ retval->size = (Uint4*) calloc(num_positions, sizeof(Uint4));
+ if ( !retval->size ) {
+ return _PSIAlignedBlockFree(retval);
+ }
+
+ for (i = 0; i < num_positions; i++) {
+ retval->pos_extnt[i].left = -1;
+ retval->pos_extnt[i].right = num_positions;
+ }
+ return retval;
+}
+
+PsiAlignedBlock*
+_PSIAlignedBlockFree(PsiAlignedBlock* aligned_blocks)
+{
+ if ( !aligned_blocks ) {
+ return NULL;
+ }
+
+ if (aligned_blocks->pos_extnt) {
+ sfree(aligned_blocks->pos_extnt);
+ }
+
+ if (aligned_blocks->size) {
+ sfree(aligned_blocks->size);
+ }
+
+ sfree(aligned_blocks);
+ return NULL;
+}
+
+PsiSequenceWeights*
+_PSISequenceWeightsNew(const PsiInfo* info, const BlastScoreBlk* sbp)
+{
+ PsiSequenceWeights* retval = NULL;
+
+ ASSERT(info);
+
+ retval = (PsiSequenceWeights*) calloc(1, sizeof(PsiSequenceWeights));
+ if ( !retval ) {
+ return NULL;
+ }
+
+ retval->row_sigma = (double*) calloc(info->num_seqs + 1, sizeof(double));
+ if ( !retval->row_sigma ) {
+ return _PSISequenceWeightsFree(retval);
+ }
+
+ retval->norm_seq_weights = (double*) calloc(info->num_seqs + 1,
+ sizeof(double));
+ if ( !retval->norm_seq_weights ) {
+ return _PSISequenceWeightsFree(retval);
+ }
+
+ retval->sigma = (double*) calloc(info->num_seqs + 1, sizeof(double));
+ if ( !retval->sigma ) {
+ return _PSISequenceWeightsFree(retval);
+ }
+
+ retval->match_weights = (double**) _PSIAllocateMatrix(info->query_sz + 1,
+ PSI_ALPHABET_SIZE,
+ sizeof(double));
+ retval->match_weights_size = info->query_sz + 1;
+ if ( !retval->match_weights ) {
+ return _PSISequenceWeightsFree(retval);
+ }
+
+ retval->std_prob = _PSIGetStandardProbabilities(sbp);
+ if ( !retval->std_prob ) {
+ return _PSISequenceWeightsFree(retval);
+ }
+
+ retval->info_content = (double*) calloc(info->query_sz, sizeof(double));
+ if ( !retval->info_content ) {
+ return _PSISequenceWeightsFree(retval);
+ }
+
+ return retval;
+}
+
+PsiSequenceWeights*
+_PSISequenceWeightsFree(PsiSequenceWeights* seq_weights)
+{
+ if ( !seq_weights ) {
+ return NULL;
+ }
+
+ if (seq_weights->row_sigma) {
+ sfree(seq_weights->row_sigma);
+ }
+
+ if (seq_weights->norm_seq_weights) {
+ sfree(seq_weights->norm_seq_weights);
+ }
+
+ if (seq_weights->sigma) {
+ sfree(seq_weights->sigma);
+ }
+
+ if (seq_weights->match_weights) {
+ _PSIDeallocateMatrix((void**) seq_weights->match_weights,
+ seq_weights->match_weights_size);
+ }
+
+ if (seq_weights->std_prob) {
+ sfree(seq_weights->std_prob);
+ }
+
+ if (seq_weights->info_content) {
+ sfree(seq_weights->info_content);
+ }
+
+ sfree(seq_weights);
+
+ return NULL;
+}
+
+/****************************************************************************/
+/* Function prototypes */
+
+/* Purges any aligned segments which are identical to the query sequence */
+static void
+_PSIPurgeIdenticalAlignments(PsiAlignmentData* alignment);
+
+/* Keeps only one copy of any aligned sequences which are >PSI_NEAR_IDENTICAL%
+ * identical to one another */
+static void
+_PSIPurgeNearIdenticalAlignments(PsiAlignmentData* alignment);
+static void
+_PSIUpdatePositionCounts(PsiAlignmentData* alignment);
+
+/* FIXME: needs more descriptive name */
+static void
+_PSIPurgeSimilarAlignments(PsiAlignmentData* alignment,
+ Uint4 seq_index1,
+ Uint4 seq_index2,
+ double max_percent_identity);
+/****************************************************************************/
+
+/**************** PurgeMatches stage of PSSM creation ***********************/
+int
+PSIPurgeBiasedSegments(PsiAlignmentData* alignment)
+{
+ if ( !alignment ) {
+ return PSIERR_BADPARAM;
+ }
+
+ _PSIPurgeIdenticalAlignments(alignment);
+ _PSIPurgeNearIdenticalAlignments(alignment);
+ _PSIUpdatePositionCounts(alignment);
+
+ return PSI_SUCCESS;
+}
+
+/** Remove those sequences which are identical to the query sequence
+ * FIXME: Rename to _PSIPurgeSelfHits() ?
+ */
+static void
+_PSIPurgeIdenticalAlignments(PsiAlignmentData* alignment)
+{
+ Uint4 s = 0; /* index on sequences */
+
+ ASSERT(alignment);
+
+ for (s = 0; s < alignment->dimensions->num_seqs + 1; s++) {
+ _PSIPurgeSimilarAlignments(alignment, kQueryIndex, s, PSI_IDENTICAL);
+ }
+}
+
+static void
+_PSIPurgeNearIdenticalAlignments(PsiAlignmentData* alignment)
+{
+ Uint4 i = 0;
+ Uint4 j = 0;
+
+ ASSERT(alignment);
+
+ for (i = 1; i < alignment->dimensions->num_seqs + 1; i++) {
+ for (j = 1; (i + j) < alignment->dimensions->num_seqs + 1; j++) {
+ _PSIPurgeSimilarAlignments(alignment, i, (i + j),
+ PSI_NEAR_IDENTICAL);
+ }
+ }
+}
+
+/** Counts the number of sequences matching the query per query position
+ * (columns of the multiple alignment) as well as the number of residues
+ * present in each position of the query.
+ * Should be called after multiple alignment data has been purged from biased
+ * sequences.
+ */
+static void
+_PSIUpdatePositionCounts(PsiAlignmentData* alignment)
+{
+ Uint4 s = 0; /* index on aligned sequences */
+ Uint4 p = 0; /* index on positions */
+
+ ASSERT(alignment);
+
+ for (s = kQueryIndex + 1; s < alignment->dimensions->num_seqs + 1; s++) {
+
+ if ( !alignment->use_sequences[s] )
+ continue;
+
+ for (p = 0; p < alignment->dimensions->query_sz; p++) {
+ if (alignment->desc_matrix[s][p].used) {
+ const Uint1 res = alignment->desc_matrix[s][p].letter;
+ if (res >= PSI_ALPHABET_SIZE) {
+ continue;
+ }
+ alignment->res_counts[p][res]++;
+ alignment->match_seqs[p]++;
+ }
+ }
+ }
+}
+
+/** This function compares the sequences in the alignment->desc_matrix
+ * structure indexed by sequence_index1 and seq_index2. If it finds aligned
+ * regions that have a greater percent identity than max_percent_identity,
+ * it removes the sequence identified by seq_index2.
+ */
+static void
+_PSIPurgeSimilarAlignments(PsiAlignmentData* alignment,
+ Uint4 seq_index1,
+ Uint4 seq_index2,
+ double max_percent_identity)
+{
+ Uint4 i = 0;
+
+ /* Nothing to do if sequences are the same or not selected for further
+ processing */
+ if ( seq_index1 == seq_index2 ||
+ !alignment->use_sequences[seq_index1] ||
+ !alignment->use_sequences[seq_index2] ) {
+ return;
+ }
+
+ for (i = 0; i < alignment->dimensions->query_sz; i++) {
+
+ const PsiDesc* seq1 = alignment->desc_matrix[seq_index1];
+ const PsiDesc* seq2 = alignment->desc_matrix[seq_index2];
+
+
+ /* starting index of the aligned region */
+ Uint4 align_start = i;
+ /* length of the aligned region */
+ Uint4 align_length = 0;
+ /* # of identical residues in aligned region */
+ Uint4 nidentical = 0;
+
+ /* both positions in the sequences must be used */
+ if ( !(seq1[i].used && seq2[i].used) ) {
+ continue;
+ }
+
+ /* examine the aligned region (FIXME: should we care about Xs?) */
+ while ( (i < alignment->dimensions->query_sz) &&
+ (seq1[i].used && seq2[i].used)) {
+ if (seq1[i].letter == seq2[i].letter)
+ nidentical++;
+ align_length++;
+ i++;
+ }
+ ASSERT(align_length != 0);
+
+ /* percentage of similarity of an aligned region between seq1 and
+ seq2 */
+ {
+ double percent_identity = (double) nidentical / align_length;
+
+ if (percent_identity >= max_percent_identity) {
+ int rv = _PSIPurgeAlignedRegion(alignment, seq_index2,
+ align_start,
+ align_start+align_length);
+ ASSERT(rv == PSI_SUCCESS);
+ }
+ }
+ }
+}
+
+/****************************************************************************/
+/* Function prototypes */
+static void
+_PSIGetLeftExtents(const PsiAlignmentData* alignment, Uint4 seq_index);
+static void
+_PSIGetRightExtents(const PsiAlignmentData* alignment, Uint4 seq_index);
+
+static void
+_PSIComputePositionExtents(const PsiAlignmentData* alignment,
+ Uint4 seq_index,
+ PsiAlignedBlock* aligned_blocks);
+static void
+_PSIComputeAlignedRegionLengths(const PsiAlignmentData* alignment,
+ PsiAlignedBlock* aligned_blocks);
+
+/****************************************************************************/
+/******* Compute alignment extents stage of PSSM creation *******************/
+/* posComputeExtents in posit.c */
+int
+PSIComputeAlignmentBlocks(const PsiAlignmentData* alignment, /* [in] */
+ PsiAlignedBlock* aligned_blocks) /* [out] */
+{
+ Uint4 s = 0; /* index on aligned sequences */
+
+ if ( !alignment || !aligned_blocks ) {
+ return PSIERR_BADPARAM;
+ }
+
+ /* no need to compute extents for query sequence */
+ for (s = kQueryIndex + 1; s < alignment->dimensions->num_seqs + 1; s++) {
+ if ( !alignment->use_sequences[s] )
+ continue;
+
+ _PSIGetLeftExtents(alignment, s);
+ _PSIGetRightExtents(alignment, s);
+ _PSIComputePositionExtents(alignment, s, aligned_blocks);
+ }
+
+ _PSIComputeAlignedRegionLengths(alignment, aligned_blocks);
+
+ return PSI_SUCCESS;
+}
+
+static void
+_PSIGetLeftExtents(const PsiAlignmentData* alignment, Uint4 seq_index)
+{
+ const Uint1 GAP = AMINOACID_TO_NCBISTDAA['-'];
+ PsiDesc* sequence_position = NULL;
+ Uint4 prev = 0; /* index for the first and previous position */
+ Uint4 curr = 0; /* index for the current position */
+
+ ASSERT(alignment);
+ ASSERT(seq_index < alignment->dimensions->num_seqs + 1);
+ ASSERT(alignment->use_sequences[seq_index]);
+
+ sequence_position = alignment->desc_matrix[seq_index];
+
+ if (sequence_position[prev].used && sequence_position[prev].letter != GAP) {
+ sequence_position[prev].extents.left = prev;
+ }
+
+ for (curr = prev + 1; curr < alignment->dimensions->query_sz;
+ curr++, prev++) {
+
+ if ( !sequence_position[curr].used ) {
+ continue;
+ }
+
+ if (sequence_position[prev].used) {
+ sequence_position[curr].extents.left =
+ sequence_position[prev].extents.left;
+ } else {
+ sequence_position[curr].extents.left = curr;
+ }
+ }
+}
+
+static void
+_PSIGetRightExtents(const PsiAlignmentData* alignment, Uint4 seq_index)
+{
+ const Uint1 GAP = AMINOACID_TO_NCBISTDAA['-'];
+ PsiDesc* sequence_position = NULL;
+ Uint4 last = 0; /* index for the last position */
+ Int4 curr = 0; /* index for the current position */
+
+ ASSERT(alignment);
+ ASSERT(seq_index < alignment->dimensions->num_seqs + 1);
+ ASSERT(alignment->use_sequences[seq_index]);
+
+ sequence_position = alignment->desc_matrix[seq_index];
+ last = alignment->dimensions->query_sz - 1;
+
+ if (sequence_position[last].used && sequence_position[last].letter != GAP) {
+ sequence_position[last].extents.right = last;
+ }
+
+ for (curr = last - 1; curr >= 0; curr--, last--) {
+
+ if ( !sequence_position[curr].used ) {
+ continue;
+ }
+
+ if (sequence_position[last].used) {
+ sequence_position[curr].extents.right =
+ sequence_position[last].extents.right;
+ } else {
+ sequence_position[curr].extents.right = curr;
+ }
+ }
+}
+
+static void
+_PSIComputePositionExtents(const PsiAlignmentData* alignment,
+ Uint4 seq_index,
+ PsiAlignedBlock* aligned_blocks)
+{
+#ifdef PSI_IGNORE_GAPS_IN_COLUMNS
+ const Uint1 GAP = AMINOACID_TO_NCBISTDAA['-'];
+#endif
+ PsiDesc* sequence_position = NULL;
+ Uint4 i = 0;
+
+ ASSERT(aligned_blocks);
+ ASSERT(alignment);
+ ASSERT(seq_index < alignment->dimensions->num_seqs + 1);
+ ASSERT(alignment->use_sequences[seq_index]);
+
+ sequence_position = alignment->desc_matrix[seq_index];
+
+ for (i = 0; i < alignment->dimensions->query_sz; i++) {
+#ifdef PSI_IGNORE_GAPS_IN_COLUMNS
+ if (sequence_position[i].used &&
+ sequence_position[i].letter != GAP) {
+#else
+ if (sequence_position[i].used) {
+#endif
+ aligned_blocks->pos_extnt[i].left =
+ MAX(aligned_blocks->pos_extnt[i].left,
+ sequence_position[i].extents.left);
+ aligned_blocks->pos_extnt[i].right =
+ MIN(aligned_blocks->pos_extnt[i].right,
+ sequence_position[i].extents.right);
+ }
+ }
+}
+
+static void
+_PSIComputeAlignedRegionLengths(const PsiAlignmentData* alignment,
+ PsiAlignedBlock* aligned_blocks)
+{
+ const Uint1 X = AMINOACID_TO_NCBISTDAA['X'];
+ PsiDesc* query_seq = NULL;
+ Uint4 i = 0;
+
+ ASSERT(alignment);
+ ASSERT(aligned_blocks);
+
+ for (i = 0; i < alignment->dimensions->query_sz; i++) {
+ aligned_blocks->size[i] = aligned_blocks->pos_extnt[i].right -
+ aligned_blocks->pos_extnt[i].left;
+ }
+
+ query_seq = alignment->desc_matrix[kQueryIndex];
+
+ /* Do not include X's in aligned region lengths */
+ for (i = 0; i < alignment->dimensions->query_sz; i++) {
+
+ if (query_seq[i].letter == X) {
+
+ Uint4 idx = 0;
+ for (idx = 0; idx < i; idx++) {
+ if ((Uint4)aligned_blocks->pos_extnt[idx].right >= i &&
+ query_seq[idx].letter != X) {
+ aligned_blocks->size[idx]--;
+ }
+ }
+ for (idx = alignment->dimensions->query_sz - 1; idx > i; idx--) {
+ if ((Uint4)aligned_blocks->pos_extnt[idx].left <= i &&
+ query_seq[idx].letter != X) {
+ aligned_blocks->size[idx]--;
+ }
+ }
+ }
+
+ }
+}
+
+/****************************************************************************/
+static Uint4
+_PSIGetAlignedSequencesForPosition(
+ const PsiAlignmentData* alignment,
+ Uint4 position,
+ Uint4* aligned_sequences);
+
+static void
+_PSICalculatePositionWeightsAndIntervalSigmas(
+ const PsiAlignmentData* alignment, /* [in] */
+ const PsiAlignedBlock* aligned_blocks, /* [in] */
+ Uint4 position, /* [in] */
+ const Uint4* aligned_seqs, /* [in] */
+ Uint4 num_aligned_seqs, /* [in] */
+ PsiSequenceWeights* seq_weights, /* [out] */
+ double* sigma, /* [out] */
+ double* interval_sigma); /* [out] */
+
+static void
+_PSICalculateNormalizedSequenceWeights(
+ const PsiAlignedBlock* aligned_blocks, /* [in] */
+ Uint4 position, /* [in] */
+ const Uint4* aligned_seqs, /* [in] */
+ Uint4 num_aligned_seqs, /* [in] */
+ double sigma, /* [in] */
+ PsiSequenceWeights* seq_weights); /* [out] */
+
+static void
+_PSICalculateMatchWeights(
+ const PsiAlignmentData* alignment, /* [in] */
+ Uint4 position, /* [in] */
+ const Uint4* aligned_seqs, /* [in] */
+ Uint4 num_aligned_seqs, /* [in] */
+ PsiSequenceWeights* seq_weights); /* [out] */
+
+static int
+_PSICheckSequenceWeights(
+ const PsiAlignmentData* alignment, /* [in] */
+ PsiSequenceWeights* seq_weights); /* [in] */
+
+/****************************************************************************/
+/******* Calculate sequence weights stage of PSSM creation ******************/
+/* Needs the PsiAlignedBlock structure calculated in previous stage as well
+ * as PsiAlignmentData structure */
+
+int
+PSIComputeSequenceWeights(const PsiAlignmentData* alignment, /* [in] */
+ const PsiAlignedBlock* aligned_blocks, /* [in] */
+ PsiSequenceWeights* seq_weights) /* [out] */
+{
+ Uint4* aligned_seqs = NULL; /* list of indices of sequences
+ which participate in an
+ aligned position */
+ Uint4 pos = 0; /* position index */
+
+ ASSERT(alignment);
+ ASSERT(aligned_blocks);
+ ASSERT(seq_weights);
+
+ aligned_seqs = (Uint4*) calloc(alignment->dimensions->num_seqs + 1,
+ sizeof(Uint4));
+ if ( !aligned_seqs ) {
+ return PSIERR_OUTOFMEM;
+ }
+
+ for (pos = 0; pos < alignment->dimensions->query_sz; pos++) {
+
+ Uint4 num_aligned_seqs = 0;
+ double sigma = 0.0; /**< number of different characters
+ occurring in matches within a multiple
+ alignment block, excluding identical
+ columns */
+ double interval_sigma = 0.0; /**< same as sigma but includes identical
+ columns */
+
+ /* ignore positions of no interest */
+ if (aligned_blocks->size[pos] == 0 || alignment->match_seqs[pos] <= 1) {
+ continue;
+ }
+
+ memset((void*)aligned_seqs, 0,
+ sizeof(Uint4) * (alignment->dimensions->num_seqs + 1));
+
+ num_aligned_seqs = _PSIGetAlignedSequencesForPosition(alignment, pos,
+ aligned_seqs);
+ if (num_aligned_seqs <= 1) {
+ continue;
+ }
+
+ /* Skipping optimization about redundant sets */
+
+ /* if (newSequenceSet) in posit.c */
+ memset((void*)seq_weights->norm_seq_weights, 0,
+ sizeof(double)*(alignment->dimensions->num_seqs+1));
+ memset((void*)seq_weights->row_sigma, 0,
+ sizeof(double)*(alignment->dimensions->num_seqs+1));
+ _PSICalculatePositionWeightsAndIntervalSigmas(alignment,
+ aligned_blocks, pos, aligned_seqs,
+ num_aligned_seqs, seq_weights,
+ &sigma, &interval_sigma);
+
+ seq_weights->sigma[pos] = interval_sigma;
+
+ /* Populates norm_seq_weights */
+ _PSICalculateNormalizedSequenceWeights(aligned_blocks, pos,
+ aligned_seqs, num_aligned_seqs, sigma,
+ seq_weights);
+
+
+ /* Uses seq_weights->norm_seq_weights to populate match_weights */
+ _PSICalculateMatchWeights(alignment, pos, aligned_seqs,
+ num_aligned_seqs, seq_weights);
+ }
+
+ sfree(aligned_seqs);
+
+ /* Check that the sequence weights add up to 1 in each column */
+ _PSICheckSequenceWeights(alignment, seq_weights);
+
+ /* Return seq_weights->match_weigths, should free others? FIXME: need to
+ * keep sequence weights for diagnostics for structure group */
+ return PSI_SUCCESS;
+}
+
+/* Calculates the position based weights using a modified version of the
+ * Henikoff's algorithm presented in "Position-based sequence weights".
+ * More documentation pending */
+static void
+_PSICalculatePositionWeightsAndIntervalSigmas(
+ const PsiAlignmentData* alignment, /* [in] */
+ const PsiAlignedBlock* aligned_blocks, /* [in] */
+ Uint4 position, /* [in] */
+ const Uint4* aligned_seqs, /* [in] */
+ Uint4 num_aligned_seqs, /* [in] */
+ PsiSequenceWeights* seq_weights, /* [out] */
+ double* sigma, /* [out] */
+ double* interval_sigma) /* [out] */
+{
+ /** keeps track of how many occurrences of each residue was found at a
+ * given position */
+ Uint4 residue_counts[PSI_ALPHABET_SIZE];
+ Uint4 num_distinct_residues = 0; /**< number of distinct
+ residues found at a given
+ position */
+
+ Uint4 i = 0; /**< index into aligned block for requested
+ position */
+
+ ASSERT(seq_weights);
+ ASSERT(sigma);
+ ASSERT(interval_sigma);
+
+ *sigma = 0.0;
+ *interval_sigma = 0.0;
+
+ for (i = 0; i < sizeof(residue_counts); i++) {
+ residue_counts[i] = 0;
+ }
+
+ for (i = (Uint4) aligned_blocks->pos_extnt[position].left;
+ i <= (Uint4) aligned_blocks->pos_extnt[position].right; i++) {
+
+ Uint4 asi = 0; /**< index into array of aligned sequences */
+
+ /**** Count number of occurring residues at a position ****/
+ for (asi = 0; asi < num_aligned_seqs; asi++) {
+ const Uint4 seq_idx = aligned_seqs[asi];
+ const Uint1 residue =
+ alignment->desc_matrix[seq_idx][i].letter;
+
+ if (residue_counts[residue] == 0) {
+ num_distinct_residues++;
+ }
+ residue_counts[residue]++;
+ }
+ /**** END: Count number of occurring residues at a position ****/
+
+ /* FIXME: see Alejandro's email about this */
+ (*interval_sigma) += num_distinct_residues;
+ if (num_distinct_residues > 1) { /* if this is not 1 */
+ (*sigma) += num_distinct_residues;
+ }
+
+ /* Calculate row_sigma */
+ for (asi = 0; asi < num_aligned_seqs; asi++) {
+ const Uint4 seq_idx = aligned_seqs[asi];
+ const Uint1 residue =
+ alignment->desc_matrix[seq_idx][i].letter;
+
+ seq_weights->row_sigma[seq_idx] +=
+ (1.0 / (double)
+ (residue_counts[residue] * num_distinct_residues) );
+ }
+ }
+
+ return;
+}
+
+/** Calculates the normalized sequence weights for the requested position */
+static void
+_PSICalculateNormalizedSequenceWeights(
+ const PsiAlignedBlock* aligned_blocks, /* [in] */
+ Uint4 position, /* [in] */
+ const Uint4* aligned_seqs, /* [in] */
+ Uint4 num_aligned_seqs, /* [in] */
+ double sigma, /* [in] */
+ PsiSequenceWeights* seq_weights) /* [out] */
+{
+ Uint4 asi = 0; /**< index into array of aligned sequences */
+
+ if (sigma > 0) {
+ double weight_sum = 0.0;
+
+ for (asi = 0; asi < num_aligned_seqs; asi++) {
+ const Uint4 seq_idx = aligned_seqs[asi];
+ seq_weights->norm_seq_weights[seq_idx] =
+ seq_weights->row_sigma[seq_idx] /
+ (aligned_blocks->pos_extnt[position].right -
+ aligned_blocks->pos_extnt[position].left + 1);
+#ifndef PSI_IGNORE_GAPS_IN_COLUMNS
+ weight_sum += seq_weights->norm_seq_weights[seq_idx];
+#endif
+ }
+
+ for (asi = 0; asi < num_aligned_seqs; asi++) {
+ const Uint4 seq_idx = aligned_seqs[asi];
+ seq_weights->norm_seq_weights[seq_idx] /= weight_sum;
+ }
+
+ } else {
+ for (asi = 0; asi < num_aligned_seqs; asi++) {
+ const Uint4 seq_idx = aligned_seqs[asi];
+ seq_weights->norm_seq_weights[seq_idx] =
+ (1.0/(double) num_aligned_seqs);
+ }
+ }
+
+}
+
+static void
+_PSICalculateMatchWeights(
+ const PsiAlignmentData* alignment, /* [in] */
+ Uint4 position, /* [in] */
+ const Uint4* aligned_seqs, /* [in] */
+ Uint4 num_aligned_seqs, /* [in] */
+ PsiSequenceWeights* seq_weights) /* [out] */
+{
+ const Uint1 GAP = AMINOACID_TO_NCBISTDAA['-'];
+ Uint4 asi = 0; /**< index into array of aligned sequences */
+
+ for (asi = 0; asi < num_aligned_seqs; asi++) {
+ const Uint4 seq_idx = aligned_seqs[asi];
+ const Uint1 residue =
+ alignment->desc_matrix[seq_idx][position].letter;
+
+ seq_weights->match_weights[position][residue] +=
+ seq_weights->norm_seq_weights[seq_idx];
+
+ /* FIXME: this field is populated but never used */
+ if (residue == GAP) {
+ /*seq_weights->gapless_column_weights[position] +=
+ * seq_weights->a[seq_idx]; */
+ ;
+ }
+ }
+}
+
+/** Finds the sequences aligned in a given position.
+ * @param alignment Multiple-alignment data [in]
+ * @param position position of interest [in]
+ * @param aligned_sequences array which will contain the indices of the
+ * sequences aligned at the requested position. This array must have size
+ * greater than or equal to the number of sequences + 1 in multiple alignment
+ * data structure (alignment->dimensions->num_seqs + 1) [out]
+ * @return number of sequences aligned at the requested position
+ */
+static Uint4
+_PSIGetAlignedSequencesForPosition(const PsiAlignmentData* alignment,
+ Uint4 position,
+ Uint4* aligned_sequences)
+{
+#ifdef PSI_IGNORE_GAPS_IN_COLUMNS
+ const Uint1 GAP = AMINOACID_TO_NCBISTDAA['-'];
+#endif
+ Uint4 retval = 0;
+ Uint4 i = 0;
+
+ ASSERT(alignment);
+ ASSERT(position < alignment->dimensions->query_sz);
+ ASSERT(aligned_sequences);
+
+ for (i = 0; i < alignment->dimensions->num_seqs + 1; i++) {
+
+ if ( !alignment->use_sequences[i] ) {
+ continue;
+ }
+
+#ifdef PSI_IGNORE_GAPS_IN_COLUMNS
+ if (alignment->desc_matrix[i][position].used &&
+ alignment->desc_matrix[i][position].letter != GAP) {
+#else
+ if (alignment->desc_matrix[i][position].used) {
+#endif
+ aligned_sequences[retval++] = i;
+ }
+ }
+
+ return retval;
+}
+
+/* The second parameter is not really const, it's updated! */
+static int
+_PSICheckSequenceWeights(const PsiAlignmentData* alignment,
+ PsiSequenceWeights* seq_weights)
+{
+ const Uint1 GAP = AMINOACID_TO_NCBISTDAA['-'];
+ const Uint1 X = AMINOACID_TO_NCBISTDAA['X'];
+ Uint4 pos = 0; /* residue position (ie: column number) */
+ Uint4 res = 0; /* residue */
+
+ ASSERT(alignment);
+ ASSERT(seq_weights);
+
+ for (pos = 0; pos < alignment->dimensions->query_sz; pos++) {
+
+ double running_total = 0.0;
+
+ if (alignment->match_seqs[pos] <= 1 ||
+ alignment->desc_matrix[kQueryIndex][pos].letter == X) {
+ continue;
+ }
+
+ for (res = 0; res < PSI_ALPHABET_SIZE; res++) {
+ running_total += seq_weights->match_weights[pos][res];
+ }
+ ASSERT(running_total < 0.99 || running_total > 1.01);
+
+#ifndef PSI_IGNORE_GAPS_IN_COLUMNS
+ /* Disperse method of spreading the gap weights */
+ for (res = 0; res < PSI_ALPHABET_SIZE; res++) {
+ if (seq_weights->std_prob[res] > kEpsilon) {
+ seq_weights->match_weights[pos][res] +=
+ (seq_weights->match_weights[pos][GAP] *
+ seq_weights->std_prob[res]);
+ }
+ }
+#endif
+ seq_weights->match_weights[pos][GAP] = 0.0;
+ running_total = 0.0;
+ for (res = 0; res < PSI_ALPHABET_SIZE; res++) {
+ running_total += seq_weights->match_weights[pos][res];
+ }
+
+ if (running_total < 0.99 || running_total > 1.01) {
+ return PSIERR_BADSEQWEIGHTS;
+ }
+ }
+
+ return PSI_SUCCESS;
+}
+
+/****************************************************************************/
+/******* Compute residue frequencies stage of PSSM creation *****************/
+/* port of posComputePseudoFreqs */
+int
+PSIComputeResidueFrequencies(const PsiAlignmentData* alignment, /* [in] */
+ const PsiSequenceWeights* seq_weights, /* [in] */
+ const BlastScoreBlk* sbp, /* [in] */
+ const PsiAlignedBlock* aligned_blocks, /* [in] */
+ const PSIBlastOptions* opts, /* [in] */
+ PsiMatrix* score_matrix) /* [out] */
+{
+ const Uint1 X = AMINOACID_TO_NCBISTDAA['X'];
+ Uint4 i = 0; /* loop index into query positions */
+ SFreqRatios* freq_ratios; /* matrix-specific frequency ratios */
+
+ if ( !alignment || !seq_weights || !sbp ||
+ !aligned_blocks || !opts || !score_matrix ) {
+ return PSIERR_BADPARAM;
+ }
+
+ freq_ratios = _PSIMatrixFrequencyRatiosNew(sbp->name);
+
+ for (i = 0; i < alignment->dimensions->query_sz; i++) {
+
+ Uint4 j = 0; /* loop index into alphabet */
+ double info_sum = 0.0; /* for information content - FIXME calculate
+ separately */
+
+ /* If there is an 'X' in the query sequence at position i... */
+ if (alignment->desc_matrix[kQueryIndex][i].letter == X) {
+
+ for (j = 0; j < (Uint4) sbp->alphabet_size; j++) {
+ score_matrix->res_freqs[i][j] = 0.0;
+ }
+
+ } else {
+
+ for (j = 0; j < (Uint4) sbp->alphabet_size; j++) {
+
+ if (seq_weights->std_prob[j] > kEpsilon) {
+ Uint4 interval_size = 0; /* length of a block */
+ Uint4 idx = 0; /* loop index */
+ double sigma = 0.0; /* number of chars in an interval */
+
+ double pseudo = 0.0; /* intermediate term */
+ double numerator = 0.0; /* intermediate term */
+ double denominator = 0.0; /* intermediate term */
+ double qOverPEstimate = 0.0; /* intermediate term */
+
+ /* changed to matrix specific ratios here May 2000 */
+ for (idx = 0; idx < (Uint4) sbp->alphabet_size; idx++) {
+ if (sbp->matrix[j][idx] != BLAST_SCORE_MIN) {
+ pseudo += (seq_weights->match_weights[i][idx] *
+ freq_ratios->data[j][idx]);
+ }
+ }
+ pseudo *= opts->pseudo_count;
+
+ /* FIXME: document where this formula is coming from
+ * (probably 2001 paper, p 2996) */
+ sigma = seq_weights->sigma[i];
+ interval_size = aligned_blocks->size[i];
+
+ numerator = pseudo +
+ ((sigma/interval_size-1) *
+ seq_weights->match_weights[i][j] /
+ seq_weights->std_prob[j]);
+
+ denominator = (sigma/interval_size-1) +
+ opts->pseudo_count;
+
+ qOverPEstimate = numerator/denominator;
+
+ /* Note artificial multiplication by standard probability
+ * to normalize */
+ score_matrix->res_freqs[i][j] = qOverPEstimate *
+ seq_weights->std_prob[j];
+
+ if ( qOverPEstimate != 0.0 &&
+ (seq_weights->std_prob[j] > kEpsilon) ) {
+ info_sum += qOverPEstimate * seq_weights->std_prob[j] *
+ log(qOverPEstimate)/NCBIMATH_LN2;
+ }
+
+ } else {
+ score_matrix->res_freqs[i][j] = 0.0;
+ } /* END: if (seq_weights->std_prob[j] > kEpsilon) { */
+ } /* END: for (j = 0; j < sbp->alphabet_size; j++) */
+
+ }
+ /* FIXME: Should move out the calculation of information content to its
+ * own function (see posFreqsToInformation)! */
+ seq_weights->info_content[i] = info_sum;
+ }
+
+ freq_ratios = _PSIMatrixFrequencyRatiosFree(freq_ratios);
+
+ return PSI_SUCCESS;
+}
+
+/****************************************************************************/
+/**************** Convert residue frequencies to PSSM stage *****************/
+
+/* FIXME: Answer questions
+ FIXME: need ideal_labmda, regular scoring matrix, length of query
+*/
+int
+PSIConvertResidueFreqsToPSSM(PsiMatrix* score_matrix, /* [in|out] */
+ const Uint1* query, /* [in] */
+ const BlastScoreBlk* sbp, /* [in] */
+ const double* std_probs) /* [in] */
+{
+ const Uint4 X = AMINOACID_TO_NCBISTDAA['X'];
+ const Uint4 Star = AMINOACID_TO_NCBISTDAA['*'];
+ Uint4 i = 0;
+ Uint4 j = 0;
+ SFreqRatios* std_freq_ratios = NULL; /* only needed when there are not
+ residue frequencies for a given
+ column */
+ double ideal_lambda;
+
+ if ( !score_matrix || !sbp || !std_probs )
+ return PSIERR_BADPARAM;
+
+ std_freq_ratios = _PSIMatrixFrequencyRatiosNew(sbp->name);
+ ideal_lambda = sbp->kbp_ideal->Lambda;
+
+ /* Each column is a position in the query */
+ for (i = 0; i < score_matrix->ncols; i++) {
+
+ /* True if all frequencies in column i are zero */
+ Boolean is_unaligned_column = TRUE;
+ const Uint4 query_res = query[i];
+
+ for (j = 0; j < (Uint4) sbp->alphabet_size; j++) {
+
+ double qOverPEstimate = 0.0;
+
+ /* Division compensates for multiplication in previous function */
+ if (std_probs[j] > kEpsilon) {
+ qOverPEstimate =
+ score_matrix->res_freqs[i][j] / std_probs[j];
+ }
+
+ if (is_unaligned_column && qOverPEstimate != 0.0) {
+ is_unaligned_column = FALSE;
+ }
+
+ /* Populate scaled matrix */
+ if (qOverPEstimate == 0.0 || std_probs[j] < kEpsilon) {
+ score_matrix->scaled_pssm[i][j] = BLAST_SCORE_MIN;
+ } else {
+ double tmp = log(qOverPEstimate)/ideal_lambda;
+ score_matrix->scaled_pssm[i][j] = (int)
+ BLAST_Nint(kPsiScaleFactor * tmp);
+ }
+
+ if ( (j == X || j == Star) &&
+ (sbp->matrix[query_res][X] != BLAST_SCORE_MIN) ) {
+ score_matrix->scaled_pssm[i][j] =
+ sbp->matrix[query_res][j] * kPsiScaleFactor;
+ }
+ }
+
+ if (is_unaligned_column) {
+ for (j = 0; j < (Uint4) sbp->alphabet_size; j++) {
+
+ score_matrix->pssm[i][j] = sbp->matrix[query_res][j];
+
+ if (sbp->matrix[query_res][j] != BLAST_SCORE_MIN) {
+ double tmp =
+ kPsiScaleFactor * std_freq_ratios->bit_scale_factor *
+ log(std_freq_ratios->data[query_res][j])/NCBIMATH_LN2;
+
+ score_matrix->scaled_pssm[i][j] = BLAST_Nint(tmp);
+ } else {
+ score_matrix->scaled_pssm[i][j] = BLAST_SCORE_MIN;
+ }
+ }
+ }
+ }
+
+ std_freq_ratios = _PSIMatrixFrequencyRatiosFree(std_freq_ratios);
+
+ /* Set the last column of the matrix to BLAST_SCORE_MIN (why?) */
+ for (j = 0; j < (Uint4) sbp->alphabet_size; j++) {
+ score_matrix->scaled_pssm[score_matrix->ncols-1][j] = BLAST_SCORE_MIN;
+ }
+
+ return PSI_SUCCESS;
+}
+
+/****************************************************************************/
+/************************* Scaling of PSSM stage ****************************/
+
+/**
+ * @param initial_lambda_guess value to be used when calculating lambda if this
+ * is not null [in]
+ * @param sbp Score block structure where the calculated lambda and K will be
+ * returned
+ */
+void
+_PSIUpdateLambdaK(const int** pssm, /* [in] */
+ const Uint1* query, /* [in] */
+ Uint4 query_length, /* [in] */
+ const double* std_probs, /* [in] */
+ double* initial_lambda_guess, /* [in] */
+ BlastScoreBlk* sbp); /* [in|out] */
+
+/* FIXME: change so that only lambda is calculated inside the loop that scales
+ the matrix and kappa is calculated before returning from this function.
+ Scaling factor should be optional argument to accomodate kappa.c's needs?
+*/
+int
+PSIScaleMatrix(const Uint1* query, /* [in] */
+ Uint4 query_length, /* [in] */
+ const double* std_probs, /* [in] */
+ double* scaling_factor, /* [in] */
+ PsiMatrix* score_matrix, /* [in|out] */
+ BlastScoreBlk* sbp) /* [in|out] */
+{
+ Boolean first_time = TRUE;
+ Uint4 index = 0; /* loop index */
+ int** scaled_pssm = NULL;
+ int** pssm = NULL;
+ double factor;
+ double factor_low = 0.0;
+ double factor_high = 0.0;
+ double new_lambda; /* Karlin-Altschul parameter */
+
+ const double kPositPercent = 0.05;
+ const Uint4 kPositNumIterations = 10;
+ Boolean too_high = TRUE;
+ double ideal_lambda;
+
+ if ( !score_matrix || !sbp || !query || !std_probs )
+ return PSIERR_BADPARAM;
+
+ ASSERT(sbp->kbp_psi[0]);
+
+ scaled_pssm = score_matrix->scaled_pssm;
+ pssm = score_matrix->pssm;
+ ideal_lambda = sbp->kbp_ideal->Lambda;
+
+ /* FIXME: need to take scaling_factor into account */
+
+ factor = 1.0;
+ for ( ; ; ) {
+ Uint4 i = 0;
+ Uint4 j = 0;
+
+ for (i = 0; i < score_matrix->ncols; i++) {
+ for (j = 0; j < (Uint4) sbp->alphabet_size; j++) {
+ if (scaled_pssm[i][j] != BLAST_SCORE_MIN) {
+ pssm[i][j] =
+ BLAST_Nint(factor*scaled_pssm[i][j]/kPsiScaleFactor);
+ } else {
+ pssm[i][j] = BLAST_SCORE_MIN;
+ }
+ }
+ }
+
+ if (scaling_factor) {
+ double init_lambda_guess =
+ sbp->kbp_psi[0]->Lambda / *scaling_factor;
+ _PSIUpdateLambdaK((const int**)pssm, query, query_length,
+ std_probs, &init_lambda_guess, sbp);
+ } else {
+ _PSIUpdateLambdaK((const int**)pssm, query, query_length,
+ std_probs, NULL, sbp);
+ }
+
+ new_lambda = sbp->kbp_psi[0]->Lambda;
+
+ if (new_lambda > ideal_lambda) {
+ if (first_time) {
+ factor_high = 1.0 + kPositPercent;
+ factor = factor_high;
+ too_high = TRUE;
+ first_time = FALSE;
+ } else {
+ if (too_high == FALSE) {
+ break;
+ }
+ factor_high += (factor_high - 1.0);
+ factor = factor_high;
+ }
+ } else if (new_lambda > 0) {
+ if (first_time) {
+ factor_high = 1.0;
+ factor_low = 1.0 - kPositPercent;
+ factor = factor_low;
+ too_high = FALSE;
+ first_time = FALSE;
+ } else {
+ if (too_high == TRUE) {
+ break;
+ }
+ factor_low += (factor_low - 1.0);
+ factor = factor_low;
+ }
+ } else {
+ return PSIERR_POSITIVEAVGSCORE;
+ }
+ }
+
+ /* Binary search for kPositNumIterations times */
+ for (index = 0; index < kPositNumIterations; index++) {
+ Uint4 i = 0;
+ Uint4 j = 0;
+
+ factor = (factor_high + factor_low)/2;
+
+ for (i = 0; i < score_matrix->ncols; i++) {
+ for (j = 0; j < (Uint4) sbp->alphabet_size; j++) {
+ if (scaled_pssm[i][j] != BLAST_SCORE_MIN) {
+ pssm[i][j] =
+ BLAST_Nint(factor*scaled_pssm[i][j]/kPsiScaleFactor);
+ } else {
+ pssm[i][j] = BLAST_SCORE_MIN;
+ }
+ }
+ }
+
+ if (scaling_factor) {
+ double init_lambda_guess =
+ sbp->kbp_psi[0]->Lambda / *scaling_factor;
+ _PSIUpdateLambdaK((const int**)pssm, query, query_length,
+ std_probs, &init_lambda_guess, sbp);
+ } else {
+ _PSIUpdateLambdaK((const int**)pssm, query, query_length,
+ std_probs, NULL, sbp);
+ }
+
+ new_lambda = sbp->kbp_psi[0]->Lambda;
+
+ if (new_lambda > ideal_lambda) {
+ factor_low = factor;
+ } else {
+ factor_high = factor;
+ }
+ }
+
+ /* FIXME: Why is this needed? */
+ for (index = 0; index < (Uint4) sbp->alphabet_size; index++) {
+ pssm[score_matrix->ncols-1][index] = BLAST_SCORE_MIN;
+ }
+
+ return PSI_SUCCESS;
+}
+
+Uint4
+_PSISequenceLengthWithoutX(const Uint1* seq, Uint4 length)
+{
+ const Uint1 X = AMINOACID_TO_NCBISTDAA['X'];
+ Uint4 retval = 0; /* the return value */
+ Uint4 i = 0; /* loop index */
+
+ ASSERT(seq);
+
+ for(i = 0; i < length; i++) {
+ if (seq[i] != X) {
+ retval++;
+ }
+ }
+
+ return retval;
+}
+
+Blast_ScoreFreq*
+_PSIComputeScoreProbabilities(const int** pssm, /* [in] */
+ const Uint1* query, /* [in] */
+ Uint4 query_length, /* [in] */
+ const double* std_probs, /* [in] */
+ const BlastScoreBlk* sbp) /* [in] */
+{
+ const Uint1 X = AMINOACID_TO_NCBISTDAA['X'];
+ Uint1 aa_alphabet[BLASTAA_SIZE]; /* ncbistdaa alphabet */
+ Uint4 effective_length = 0; /* length of query w/o Xs */
+ Uint4 p = 0; /* index on positions */
+ Uint4 c = 0; /* index on characters */
+ int s = 0; /* index on scores */
+ int min_score = 0; /* minimum score in pssm */
+ int max_score = 0; /* maximum score in pssm */
+ short rv = 0; /* temporary return value */
+ Blast_ScoreFreq* score_freqs = NULL; /* score frequencies */
+
+ ASSERT(pssm);
+ ASSERT(query);
+ ASSERT(std_probs);
+ ASSERT(sbp);
+ ASSERT(sbp->alphabet_code == BLASTAA_SEQ_CODE);
+
+ rv = Blast_GetStdAlphabet(sbp->alphabet_code, aa_alphabet, BLASTAA_SIZE);
+ if (rv <= 0) {
+ return NULL;
+ }
+ ASSERT(rv == sbp->alphabet_size);
+
+ effective_length = _PSISequenceLengthWithoutX(query, query_length);
+
+ /* Get the minimum and maximum scores */
+ for (p = 0; p < query_length; p++) {
+ for (c = 0; c < (Uint4) sbp->alphabet_size; c++) {
+ const int kScore = pssm[p][aa_alphabet[c]];
+
+ if (kScore <= BLAST_SCORE_MIN || kScore >= BLAST_SCORE_MAX) {
+ continue;
+ }
+ max_score = MAX(kScore, max_score);
+ min_score = MIN(kScore, min_score);
+ }
+ }
+
+ score_freqs = Blast_ScoreFreqNew(min_score, max_score);
+ if ( !score_freqs ) {
+ return NULL;
+ }
+
+ score_freqs->obs_min = min_score;
+ score_freqs->obs_max = max_score;
+ for (p = 0; p < query_length; p++) {
+ if (query[p] == X) {
+ continue;
+ }
+
+ for (c = 0; c < (Uint4) sbp->alphabet_size; c++) {
+ const int kScore = pssm[p][aa_alphabet[c]];
+
+ if (kScore <= BLAST_SCORE_MIN || kScore >= BLAST_SCORE_MAX) {
+ continue;
+ }
+
+ /* Increment the weight for the score in position p, residue c */
+ score_freqs->sprob[kScore] +=
+ (std_probs[aa_alphabet[c]]/effective_length);
+ }
+ }
+
+ for (s = min_score; s < max_score; s++) {
+ score_freqs->score_avg += (s * score_freqs->sprob[s]);
+ }
+
+ return score_freqs;
+}
+
+void
+_PSIUpdateLambdaK(const int** pssm, /* [in] */
+ const Uint1* query, /* [in] */
+ Uint4 query_length, /* [in] */
+ const double* std_probs, /* [in] */
+ double* initial_lambda_guess, /* [in] */
+ BlastScoreBlk* sbp) /* [in|out] */
+{
+ Blast_ScoreFreq* score_freqs =
+ _PSIComputeScoreProbabilities(pssm, query, query_length,
+ std_probs, sbp);
+
+ if (initial_lambda_guess) {
+ sbp->kbp_psi[0]->Lambda = Blast_KarlinLambdaNR(score_freqs,
+ *initial_lambda_guess);
+
+ } else {
+ /* Calculate lambda and K */
+ Blast_KarlinBlkCalc(sbp->kbp_psi[0], score_freqs);
+
+ /* Shouldn't this be in a function? */
+ sbp->kbp_gap_psi[0]->K =
+ sbp->kbp_psi[0]->K * sbp->kbp_gap_std[0]->K / sbp->kbp_ideal->K;
+ sbp->kbp_gap_psi[0]->logK = log(sbp->kbp_gap_psi[0]->K);
+ }
+
+ score_freqs = Blast_ScoreFreqDestruct(score_freqs);
+}
+
+
+/****************************************************************************/
+/* Function definitions for auxiliary functions for the stages above */
+int
+_PSIPurgeAlignedRegion(PsiAlignmentData* alignment,
+ unsigned int seq_index,
+ unsigned int start,
+ unsigned int stop)
+{
+ PsiDesc* sequence_position = NULL;
+ unsigned int i = 0;
+
+ if (!alignment)
+ return PSIERR_BADPARAM;
+
+ /* Cannot remove the query sequence from multiple alignment data or
+ bad index */
+ if (seq_index == kQueryIndex ||
+ seq_index > alignment->dimensions->num_seqs + 1 ||
+ stop > alignment->dimensions->query_sz)
+ return PSIERR_BADPARAM;
+
+
+ sequence_position = alignment->desc_matrix[seq_index];
+ for (i = start; i < stop; i++) {
+ /**
+ @todo This function is the successor to posit.c's posCancel and it
+ has been implemented to be consistent with it. However, its choice
+ of sentinel values to flag positions as unused is inconsistent with
+ the state of newly allocated positions (which would be preferred).
+ This behavior should be fixed once the algo/blast implementation the
+ PSSM engine replaces posit.c
+ sequence_position[i].letter = (unsigned char) -1;
+ sequence_position[i].e_value = kDefaultEvalueForPosition;
+ sequence_position[i].extents.left = (unsigned int) -1;
+ */
+ /* posCancel initializes positions differently than when they are
+ * allocated, why?*/
+ sequence_position[i].letter = 0;
+ sequence_position[i].used = FALSE;
+ sequence_position[i].e_value = PSI_INCLUSION_ETHRESH;
+ sequence_position[i].extents.left = 0;
+ sequence_position[i].extents.right = alignment->dimensions->query_sz;
+ }
+
+ _PSIDiscardIfUnused(alignment, seq_index);
+
+ return PSI_SUCCESS;
+}
+
+/* Check if we still need this sequence */
+void
+_PSIDiscardIfUnused(PsiAlignmentData* alignment, unsigned int seq_index)
+{
+ Boolean contains_aligned_regions = FALSE;
+ unsigned int i = 0;
+
+ for (i = 0; i < alignment->dimensions->query_sz; i++) {
+ if (alignment->desc_matrix[seq_index][i].used) {
+ contains_aligned_regions = TRUE;
+ break;
+ }
+ }
+
+ if ( !contains_aligned_regions ) {
+ alignment->use_sequences[seq_index] = FALSE;
+ }
+}
+
+/****************************************************************************/
+double*
+_PSIGetStandardProbabilities(const BlastScoreBlk* sbp)
+{
+ Blast_ResFreq* standard_probabilities = NULL;
+ Uint4 i = 0;
+ double* retval = NULL;
+
+ retval = (double*) malloc(sbp->alphabet_size * sizeof(double));
+ if ( !retval ) {
+ return NULL;
+ }
+
+ standard_probabilities = Blast_ResFreqNew(sbp);
+ Blast_ResFreqStdComp(sbp, standard_probabilities);
+
+ for (i = 0; i < (Uint4) sbp->alphabet_size; i++) {
+ retval[i] = standard_probabilities->prob[i];
+ }
+
+ Blast_ResFreqDestruct(standard_probabilities);
+ return retval;
+}
+
+PsiDiagnostics*
+_PSISaveDiagnostics(const PsiAlignmentData* alignment,
+ const PsiAlignedBlock* aligned_block,
+ const PsiSequenceWeights* seq_weights)
+{
+ /* _PSICalculateInformationContent(seq_weights); */
+ abort();
+ return NULL;
+}
+
+
/*
* ===========================================================================
- *
* $Log: blast_psi_priv.c,v $
+ * Revision 1.10 2004/06/16 15:22:47 camacho
+ * Fixes to add new unit tests
+ *
+ * Revision 1.9 2004/06/09 14:21:03 camacho
+ * Removed msvc compiler warnings
+ *
+ * Revision 1.8 2004/06/08 17:30:06 dondosha
+ * Compiler warnings fixes
+ *
+ * Revision 1.7 2004/06/07 14:18:24 dondosha
+ * Added some variables initialization, to remove compiler warnings
+ *
+ * Revision 1.6 2004/05/28 17:35:03 camacho
+ * Fix msvc6 warnings
+ *
+ * Revision 1.5 2004/05/28 16:00:09 camacho
+ * + first port of PSSM generation engine
+ *
+ * Revision 1.4 2004/05/19 14:52:02 camacho
+ * 1. Added doxygen tags to enable doxygen processing of algo/blast/core
+ * 2. Standardized copyright, CVS $Id string, $Log and rcsid formatting and i
+ * location
+ * 3. Added use of @todo doxygen keyword
+ *
+ * Revision 1.3 2004/05/06 14:01:40 camacho
+ * + _PSICopyMatrix
+ *
* Revision 1.2 2004/04/07 22:08:37 kans
* needed to include blast_def.h for sfree prototype
*
* Revision 1.1 2004/04/07 19:11:17 camacho
* Initial revision
*
- *
* ===========================================================================
*/
diff --git a/algo/blast/core/blast_psi_priv.h b/algo/blast/core/blast_psi_priv.h
index 02600909..7d4efa16 100644
--- a/algo/blast/core/blast_psi_priv.h
+++ b/algo/blast/core/blast_psi_priv.h
@@ -1,7 +1,7 @@
#ifndef ALGO_BLAST_CORE___BLAST_PSI_PRIV__H
#define ALGO_BLAST_CORE___BLAST_PSI_PRIV__H
-/* $Id: blast_psi_priv.h,v 1.2 2004/04/07 21:43:47 camacho Exp $
+/* $Id: blast_psi_priv.h,v 1.5 2004/06/09 14:20:30 camacho Exp $
* ===========================================================================
*
* PUBLIC DOMAIN NOTICE
@@ -26,29 +26,64 @@
*
* ===========================================================================
*
- * Author: Christiam Camacho
+ * Author: Alejandro Schaffer, ported by Christiam Camacho
*
*/
/** @file blast_psi_priv.h
- * Private interface for Position Iterated BLAST API.
+ * Private interface for Position Iterated BLAST API, contains the
+ * PSSM generation engine.
+ *
+ * <pre>
+ * Calculating PSSMs from Seq-aligns is a multi-stage process. These stages
+ * include:
+ * 1) Processing the Seq-align
+ * Examine alignment and extract information about aligned characters,
+ * performed at the API level
+ * 2) Purge biased sequences: construct M multiple sequence alignment as
+ * described in page 3395[1] - performed at the core level; custom
+ * selection of sequences should be performed at the API level.
+ * 3) Compute extents of the alignment: M sub C as described in page 3395[1]
+ * 4) Compute sequence weights
+ * 5) Compute residue frequencies
+ * 6) Convert residue frequencies to PSSM
+ * 7) Scale the resulting PSSM
+ * </pre>
*/
-/*#include <algo/blast/core/blast_psi.h>*/
+#include <algo/blast/core/blast_psi.h>
+#include "matrix_freq_ratios.h"
#ifdef __cplusplus
extern "C" {
#endif
-#define MASTER_INDEX 0
+/****************************************************************************/
+/* Extern declarations for constants (defined in blast_psi_priv.c) */
+
+/** Index into PsiAlignmentData structure for the query sequence */
+extern const unsigned int kQueryIndex;
+
+/** Small constant to test against 0 */
+extern const double kEpsilon;
+
+/** FIXME: Should this value be replaced by BLAST_EXPECT_VALUE? */
+extern const double kDefaultEvalueForPosition;
+
+/** Successor to POSIT_SCALE_FACTOR */
+extern const int kPsiScaleFactor;
+
+
+/****************************************************************************/
+/* Matrix utility functions */
/** Generic 2 dimensional matrix allocator.
* Allocates a ncols by nrows matrix with cells of size data_type_sz. Must be
* freed using x_DeallocateMatrix
- * @param ncols number of columns in matrix
- * @param nrows number of rows in matrix
- * @param data_type_sz size of the data type (in bytes) to allocate for each
- * element in the matrix
+ * @param ncols number of columns in matrix [in]
+ * @param nrows number of rows in matrix [in]
+ * @param data_type_sz size of the data type (in bytes) to allocate for each
+ * element in the matrix [in]
* @return pointer to allocated memory or NULL in case of failure
*/
void**
@@ -57,13 +92,215 @@ _PSIAllocateMatrix(unsigned int ncols, unsigned int nrows,
/** Generic 2 dimensional matrix deallocator.
* Deallocates the memory allocated by x_AllocateMatrix
- * @param matrix matrix to deallocate
- * @param ncols number of columns in the matrix
+ * @param matrix matrix to deallocate [in]
+ * @param ncols number of columns in the matrix [in]
* @return NULL
*/
void**
_PSIDeallocateMatrix(void** matrix, unsigned int ncols);
+/** Copies src matrix into dest matrix, both of which must be ncols by nrows
+ * matrices
+ * @param dest Destination matrix [out]
+ * @param src Source matrix [in]
+ * @param ncols Number of columns to copy [in]
+ * @param ncows Number of rows to copy [in]
+ */
+void
+_PSICopyMatrix(double** dest, const double** src,
+ unsigned int ncols, unsigned int nrows);
+
+/****************************************************************************/
+/* Structure declarations */
+
+/* FIXME: Should be renamed to extents? - this is what posExtents was in old
+ code, only using a simpler structure */
+
+/** This structure keeps track of the regions aligned between the query
+ * sequence and those that were not purged. It is used when calculating the
+ * sequence weights */
+typedef struct PsiAlignedBlock {
+ SSeqRange* pos_extnt; /**< Dynamically allocated array of size query_sz
+ to keep track of the extents of each aligned
+ position */
+
+ Uint4* size; /**< Dynamically allocated array of size query_sz
+ that contains the size of the intervals in the
+ array above */
+} PsiAlignedBlock;
+
+PsiAlignedBlock*
+_PSIAlignedBlockNew(Uint4 num_positions);
+
+PsiAlignedBlock*
+_PSIAlignedBlockFree(PsiAlignedBlock* aligned_blocks);
+
+/** FIXME: Where are the formulas for these? Need better names */
+typedef struct PsiSequenceWeights {
+ double** match_weights; /* observed residue frequencies (fi in paper)
+ dimensions are query_sz+1 by PSI_ALPHABET_SIZE
+ */
+ Uint4 match_weights_size; /* kept for deallocation purposes */
+
+ double* norm_seq_weights; /**< Stores the normalized sequence weights
+ (size num_seqs + 1) */
+ double* row_sigma; /**< array of num_seqs + 1 */
+ /* Sigma: number of different characters occurring in matches within a
+ * multi-alignment block - why is it a double? */
+ double* sigma; /**< array of num_seqs+1 (query_sz) length */
+
+ double* std_prob; /**< standard amino acid probabilities */
+
+ /* These fields are required for important diagnostic output */
+ double* gapless_column_weights; /**< FIXME */
+ double* info_content; /**< position information content (query_sz)*/
+
+} PsiSequenceWeights;
+
+PsiSequenceWeights*
+_PSISequenceWeightsNew(const PsiInfo* info, const BlastScoreBlk* sbp);
+
+PsiSequenceWeights*
+_PSISequenceWeightsFree(PsiSequenceWeights* seq_weights);
+
+/* Return values for internal PSI-BLAST functions */
+
+#define PSI_SUCCESS (0)
+/** Bad parameter used in function */
+#define PSIERR_BADPARAM (-1)
+/** Out of memory */
+#define PSIERR_OUTOFMEM (-2)
+/** Sequence weights do not add to 1 */
+#define PSIERR_BADSEQWEIGHTS (-3)
+/** No frequency ratios were found for the given scoring matrix */
+#define PSIERR_NOFREQRATIOS (-4)
+/** Positive average score found when scaling matrix */
+#define PSIERR_POSITIVEAVGSCORE (-5)
+
+/****************************************************************************/
+/* Function prototypes for the various stages of the PSSM generation engine */
+
+/** Main function for keeping only those selected sequences for PSSM
+ * construction (stage 2)
+ * FIXME: add boolean flag for custom selection of sequences?
+ * @retval PSIERR_BADPARAM if alignment is NULL
+ * PSI_SUCCESS otherwise
+ */
+int
+PSIPurgeBiasedSegments(PsiAlignmentData* alignment);
+
+/** Main function to compute aligned blocks for each position within multiple
+ * alignment (stage 3) */
+int
+PSIComputeAlignmentBlocks(const PsiAlignmentData* alignment, /* [in] */
+ PsiAlignedBlock* aligned_block); /* [out] */
+
+/** Main function to calculate the sequence weights. Should be called with the
+ * return value of PSIComputeAlignmentBlocks (stage 4) */
+int
+PSIComputeSequenceWeights(const PsiAlignmentData* alignment, /* [in] */
+ const PsiAlignedBlock* aligned_blocks, /* [in] */
+ PsiSequenceWeights* seq_weights); /* [out] */
+
+/** Main function to compute the residue frequencies for the PSSM (stage 5) */
+int
+PSIComputeResidueFrequencies(const PsiAlignmentData* alignment, /* [in] */
+ const PsiSequenceWeights* seq_weights, /* [in] */
+ const BlastScoreBlk* sbp, /* [in] */
+ const PsiAlignedBlock* aligned_blocks, /* [in] */
+ const PSIBlastOptions* opts, /* [in] */
+ PsiMatrix* score_matrix); /* [out] */
+
+/** Converts the residue frequencies obtained in the previous stage to a PSSM
+ * (stage 6) */
+int
+PSIConvertResidueFreqsToPSSM(PsiMatrix* score_matrix, /* [in|out] */
+ const Uint1* query, /* [in] */
+ const BlastScoreBlk* sbp, /* [in] */
+ const double* std_probs); /* [in] */
+
+/** Scales the PSSM (stage 7)
+ * @param scaling_factor if not null, use this value to further scale the
+ * matrix (default is kPsiScaleFactor). Useful for composition based statistics
+ * [in] optional
+ */
+int
+PSIScaleMatrix(const Uint1* query, /* [in] */
+ Uint4 query_length, /* [in] */
+ const double* std_probs, /* [in] */
+ double* scaling_factor, /* [in - optional] */
+ PsiMatrix* score_matrix, /* [in|out] */
+ BlastScoreBlk* sbp); /* [in|out] */
+
+/****************************************************************************/
+/* Function prototypes for auxiliary functions for the stages above */
+
+/** Marks the (start, stop] region corresponding to sequence seq_index in
+ * alignment so that it is not further considered for PSSM calculation.
+ * This function is not applicable to the query sequence in the alignment
+ * (seq_index == 0)
+ * @param alignment Alignment data [in|out]
+ * @param seq_index index of the sequence of interested in alignment [in]
+ * @param start start of the region to remove [in]
+ * @param stop stop of the region to remove [in]
+ * @return PSIERR_BADPARAM if no alignment is given, or if seq_index or stop
+ * are invalid,
+ * PSI_SUCCESS otherwise
+ */
+int
+_PSIPurgeAlignedRegion(PsiAlignmentData* alignment,
+ unsigned int seq_index,
+ unsigned int start,
+ unsigned int stop);
+
+/** Checks for any positions in sequence seq_index still considered for PSSM
+ * construction. If none is found, the entire sequence is marked as unused.
+ * @param alignment Alignment data
+ * @param seq_idnex index of the sequence of interest
+ */
+void
+_PSIDiscardIfUnused(PsiAlignmentData* alignment, unsigned int seq_index);
+
+/** The the standard residue frequencies for a scoring system specified in the
+ * BlastScoreBlk structure. This is a wrapper for Blast_ResFreqStdComp() from
+ * blast_stat.c with a more intention-revealing name :) .
+ * used in kappa.c?
+ * Caller is responsible for deallocating return value via sfree().
+ * @param sbp Score block structure [in]
+ * @retval NULL if there is not enough memory otherwise an array of lenght
+ * sbp->alphabet_size with the standard background probabilities for
+ * the scoring system requested.
+ */
+double*
+_PSIGetStandardProbabilities(const BlastScoreBlk* sbp);
+
+/** Calculates the length of the sequence without including any 'X' residues.
+ * used in kappa.c
+ * @param seq sequence to examine [in]
+ * @param length length of the sequence above [in]
+ * @retval number of non-X residues in the sequence
+ */
+Uint4
+_PSISequenceLengthWithoutX(const Uint1* seq, Uint4 length);
+
+/* Compute the probabilities for each score in the PSSM.
+ * This is only valid for protein sequences.
+ * Should this go in blast_stat.[hc]?
+ * used in kappa.c in notposfillSfp()
+ */
+Blast_ScoreFreq*
+_PSIComputeScoreProbabilities(const int** pssm, /* [in] */
+ const Uint1* query, /* [in] */
+ Uint4 query_length, /* [in] */
+ const double* std_probs, /* [in] */
+ const BlastScoreBlk* sbp); /* [in] */
+
+/** Collects "diagnostic" information from the process of creating the PSSM */
+PsiDiagnostics*
+_PSISaveDiagnostics(const PsiAlignmentData* alignment,
+ const PsiAlignedBlock* aligned_block,
+ const PsiSequenceWeights* seq_weights);
+
#ifdef __cplusplus
}
#endif
@@ -73,6 +310,15 @@ _PSIDeallocateMatrix(void** matrix, unsigned int ncols);
* ===========================================================================
*
* $Log: blast_psi_priv.h,v $
+ * Revision 1.5 2004/06/09 14:20:30 camacho
+ * Updated comments
+ *
+ * Revision 1.4 2004/05/28 16:00:10 camacho
+ * + first port of PSSM generation engine
+ *
+ * Revision 1.3 2004/05/06 14:01:40 camacho
+ * + _PSICopyMatrix
+ *
* Revision 1.2 2004/04/07 21:43:47 camacho
* Removed unneeded #include directive
*
diff --git a/algo/blast/core/blast_rps.h b/algo/blast/core/blast_rps.h
index 76c47282..7f2426af 100644
--- a/algo/blast/core/blast_rps.h
+++ b/algo/blast/core/blast_rps.h
@@ -1,37 +1,35 @@
-/* $Id: blast_rps.h,v 1.4 2004/04/09 14:23:35 papadopo Exp $
-* ===========================================================================
-*
-* PUBLIC DOMAIN NOTICE
-* National Center for Biotechnology Information
-*
-* This software/database is a "United States Government Work" under the
-* terms of the United States Copyright Act. It was written as part of
-* the author's offical duties as a United States Government employee and
-* thus cannot be copyrighted. This software/database is freely available
-* to the public for use. The National Library of Medicine and the U.S.
-* Government have not placed any restriction on its use or reproduction.
-*
-* Although all reasonable efforts have been taken to ensure the accuracy
-* and reliability of the software and data, the NLM and the U.S.
-* Government do not and cannot warrant the performance or results that
-* may be obtained by using this software or data. The NLM and the U.S.
-* Government disclaim all warranties, express or implied, including
-* warranties of performance, merchantability or fitness for any particular
-* purpose.
-*
-* Please cite the author in any work or product based on this material.
-*
-* ===========================================================================*/
-
-/*****************************************************************************
-
-File name: blast_rps.h
-
-Author: Jason Papadopoulos
-
-Contents: RPS BLAST structure definitions
+/* $Id: blast_rps.h,v 1.6 2004/05/27 14:48:58 papadopo Exp $
+ * ===========================================================================
+ *
+ * PUBLIC DOMAIN NOTICE
+ * National Center for Biotechnology Information
+ *
+ * This software/database is a "United States Government Work" under the
+ * terms of the United States Copyright Act. It was written as part of
+ * the author's offical duties as a United States Government employee and
+ * thus cannot be copyrighted. This software/database is freely available
+ * to the public for use. The National Library of Medicine and the U.S.
+ * Government have not placed any restriction on its use or reproduction.
+ *
+ * Although all reasonable efforts have been taken to ensure the accuracy
+ * and reliability of the software and data, the NLM and the U.S.
+ * Government do not and cannot warrant the performance or results that
+ * may be obtained by using this software or data. The NLM and the U.S.
+ * Government disclaim all warranties, express or implied, including
+ * warranties of performance, merchantability or fitness for any particular
+ * purpose.
+ *
+ * Please cite the author in any work or product based on this material.
+ *
+ * ===========================================================================
+ *
+ * Author: Jason Papadopoulos
+ *
+ */
-*****************************************************************************/
+/** @file blast_rps.h
+ * RPS BLAST structure definitions.
+ */
#ifndef BLAST_RPS__H
#define BLAST_RPS__H
@@ -42,8 +40,8 @@ Contents: RPS BLAST structure definitions
extern "C" {
#endif
-#define RPS_MAGIC_NUM 0x1e16 /* RPS data files contain this number */
-#define NUM_EXPANSION_WORDS 3
+#define RPS_MAGIC_NUM 0x1e16 /**< RPS data files contain this number */
+#define NUM_EXPANSION_WORDS 3 /**< Intentionally unused words in .loo file */
/** header of RPS blast '.loo' file */
diff --git a/algo/blast/core/blast_seg.c b/algo/blast/core/blast_seg.c
index cb0d2207..896d915e 100644
--- a/algo/blast/core/blast_seg.c
+++ b/algo/blast/core/blast_seg.c
@@ -1,51 +1,45 @@
-static char const rcsid[] = "$Id: blast_seg.c,v 1.21 2004/04/05 16:09:27 camacho Exp $";
-/*
-* ===========================================================================
-*
-* PUBLIC DOMAIN NOTICE
-* National Center for Biotechnology Information
-*
-* This software/database is a "United States Government Work" under the
-* terms of the United States Copyright Act. It was written as part of
-* the author's official duties as a United States Government employee and
-* thus cannot be copyrighted. This software/database is freely available
-* to the public for use. The National Library of Medicine and the U.S.
-* Government have not placed any restriction on its use or reproduction.
-*
-* Although all reasonable efforts have been taken to ensure the accuracy
-* and reliability of the software and data, the NLM and the U.S.
-* Government do not and cannot warrant the performance or results that
-* may be obtained by using this software or data. The NLM and the U.S.
-* Government disclaim all warranties, express or implied, including
-* warranties of performance, merchantability or fitness for any particular
-* purpose.
-*
-* Please cite the author in any work or product based on this material.
-*
-* ===========================================================================
-*
-* File Name: blast_seg.c
-*
-* Author(s): Ilya Dondoshansky
-*
-* Version Creation Date: 05/28/2003
-*
-* $Revision: 1.21 $
-*
-* File Description: A utility to find low complexity AA regions.
-* This parallels functionality of seg.c from the C toolkit,
-* but without using the structures generated from ASN.1 spec.
-* ==========================================================================
-*/
+/* $Id: blast_seg.c,v 1.23 2004/05/24 15:51:34 madden Exp $
+ * ===========================================================================
+ *
+ * PUBLIC DOMAIN NOTICE
+ * National Center for Biotechnology Information
+ *
+ * This software/database is a "United States Government Work" under the
+ * terms of the United States Copyright Act. It was written as part of
+ * the author's official duties as a United States Government employee and
+ * thus cannot be copyrighted. This software/database is freely available
+ * to the public for use. The National Library of Medicine and the U.S.
+ * Government have not placed any restriction on its use or reproduction.
+ *
+ * Although all reasonable efforts have been taken to ensure the accuracy
+ * and reliability of the software and data, the NLM and the U.S.
+ * Government do not and cannot warrant the performance or results that
+ * may be obtained by using this software or data. The NLM and the U.S.
+ * Government disclaim all warranties, express or implied, including
+ * warranties of performance, merchantability or fitness for any particular
+ * purpose.
+ *
+ * Please cite the author in any work or product based on this material.
+ *
+ * ===========================================================================
+ *
+ * Author: Ilya Dondoshansky
+ *
+ */
+
+/** @file blast_seg.c
+ * A utility to find low complexity AA regions. This parallels functionality
+ * of seg.c from the C toolkit, but without using the structures generated
+ * from ASN.1 spec.
+ * @todo FIXME needs comments
+ */
+
+static char const rcsid[] =
+ "$Id: blast_seg.c,v 1.23 2004/05/24 15:51:34 madden Exp $";
#include <algo/blast/core/blast_seg.h>
-#ifdef WIN16
-float
-#else
-double
-#endif
- lnfact[] =
+double lnfact[] =
{
0.000000, 0.000000, 0.693147, 1.791759, 3.178054, 4.787492, 6.579251, 8.525161,
10.604603, 12.801827, 15.104413, 17.502308, 19.987214, 22.552164, 25.191221, 27.899271,
@@ -1300,20 +1294,47 @@ double
82108.927837
};
+#define AA20 2
+
+#define LN20 2.9957322735539909
+
+#define CHAR_SET 128
+
+typedef struct SSequence
+ {
+ struct SSequence* parent;
+ char* seq;
+ Alpha* palpha;
+ Int4 start;
+ Int4 length;
+ Int4 bogus;
+ Boolean punctuation;
+ Int4* composition;
+ Int4* state;
+ double entropy;
+} SSequence;
+
+typedef struct SSeg
+ {
+ int begin;
+ int end;
+ struct SSeg *next;
+ } SSeg;
+
/*---------------------------------------------------------------(SeqNew)---*/
-static Sequence* SeqNew(void)
+static SSequence* SeqNew(void)
{
- Sequence* seq;
+ SSequence* seq;
- seq = (Sequence*) calloc(1, sizeof(Sequence));
+ seq = (SSequence*) calloc(1, sizeof(SSequence));
if (seq==NULL)
{
/* raise error flag and etc. */
return(seq);
}
- seq->parent = (Sequence*) NULL;
+ seq->parent = (SSequence*) NULL;
seq->seq = (char*) NULL;
seq->palpha = (Alpha*) NULL;
seq->start = seq->length = 0;
@@ -1341,7 +1362,7 @@ static void AlphaFree (Alpha* palpha)
/*--------------------------------------------------------------(SeqFree)---*/
-static void SeqFree(Sequence* seq)
+static void SeqFree(SSequence* seq)
{
if (seq==NULL) return;
@@ -1353,11 +1374,11 @@ static void SeqFree(Sequence* seq)
return;
}
-/*--------------------------------------------------------------(SegFree)---*/
+/*--------------------------------------------------------------(SSegFree)---*/
-static void SegFree(Seg* seg)
+static void SegFree(SSeg* seg)
{
- Seg* nextseg;
+ SSeg* nextseg;
while (seg)
{
@@ -1371,7 +1392,7 @@ static void SegFree(Seg* seg)
/*--------------------------------------------------------------(hasdash)---*/
-static Boolean hasdash(Sequence* win)
+static Boolean hasdash(SSequence* win)
{
register char *seq, *seqmax;
@@ -1399,7 +1420,7 @@ static int state_cmp(const void* s1, const void* s2)
/*---------------------------------------------------------------(compon)---*/
-static void compon(Sequence* win)
+static void compon(SSequence* win)
{
Int4* comp;
@@ -1430,7 +1451,7 @@ static void compon(Sequence* win)
/*--------------------------------------------------------------(stateon)---*/
-static void stateon(Sequence* win)
+static void stateon(SSequence* win)
{
Int4 letter, nel, c;
@@ -1458,16 +1479,16 @@ static void stateon(Sequence* win)
/*--------------------------------------------------------------(openwin)---*/
-static Sequence* openwin(Sequence* parent, Int4 start, Int4 length)
+static SSequence* openwin(SSequence* parent, Int4 start, Int4 length)
{
- Sequence* win;
+ SSequence* win;
if (start<0 || length<0 || start+length>parent->length)
{
- return((Sequence*) NULL);
+ return((SSequence*) NULL);
}
- win = (Sequence*) calloc(1, sizeof(Sequence));
+ win = (SSequence*) calloc(1, sizeof(SSequence));
/*--- ---[set links, up and down]---*/
@@ -1553,7 +1574,7 @@ static void incrementsv(Int4* sv, Int4 class)
/*------------------------------------------------------------(shiftwin1)---*/
-static Int4 shiftwin1(Sequence* win)
+static Int4 shiftwin1(SSequence* win)
{
Int4 j, length;
Int4* comp;
@@ -1589,7 +1610,7 @@ static Int4 shiftwin1(Sequence* win)
/*-------------------------------------------------------------(closewin)---*/
-static void closewin(Sequence* win)
+static void closewin(SSequence* win)
{
if (win==NULL) return;
@@ -1602,7 +1623,7 @@ static void closewin(Sequence* win)
/*----------------------------------------------------------------(enton)---*/
-static void enton(Sequence* win)
+static void enton(SSequence* win)
{
if (win->state==NULL) {stateon(win);}
@@ -1613,9 +1634,9 @@ static void enton(Sequence* win)
}
/*---------------------------------------------------------------(seqent)---*/
-static double* seqent(Sequence* seq, Int4 window, Int4 maxbogus)
+static double* seqent(SSequence* seq, Int4 window, Int4 maxbogus)
{
- Sequence* win;
+ SSequence* win;
double* H;
Int4 i, first, last, downset, upset;
@@ -1665,9 +1686,9 @@ static double* seqent(Sequence* seq, Int4 window, Int4 maxbogus)
/*------------------------------------------------------------(appendseg)---*/
static void
-appendseg(Seg* segs, Seg* seg)
+appendseg(SSeg* segs, SSeg* seg)
- {Seg* temp;
+ {SSeg* temp;
temp = segs;
while (TRUE)
@@ -1816,11 +1837,11 @@ fprintf(stderr, "%lf %lf %lf\n", ans, ans1, ans2);
/*-----------------------------------------------------------------(trim)---*/
-static void trim(Sequence* seq, Int4* leftend, Int4* rightend,
+static void trim(SSequence* seq, Int4* leftend, Int4* rightend,
SegParameters* sparamsp)
{
- Sequence* win;
+ SSequence* win;
double prob, minprob;
Int4 shift, len, i;
Int4 lend, rend;
@@ -1875,11 +1896,11 @@ static void trim(Sequence* seq, Int4* leftend, Int4* rightend,
/*---------------------------------------------------------------(SegSeq)---*/
-static void SegSeq(Sequence* seq, SegParameters* sparamsp, Seg* *segs,
+static void SegSeq(SSequence* seq, SegParameters* sparamsp, SSeg* *segs,
Int4 offset)
{
- Seg* seg,* leftsegs;
- Sequence* leftseq;
+ SSeg* seg,* leftsegs;
+ SSequence* leftseq;
Int4 window;
double locut, hicut;
Int4 maxbogus;
@@ -1926,7 +1947,7 @@ static void SegSeq(Sequence* seq, SegParameters* sparamsp, Seg* *segs,
rend = leftend - 1;
leftseq = openwin(seq, lend, rend-lend+1);
- leftsegs = (Seg*) NULL;
+ leftsegs = (SSeg*) NULL;
SegSeq(leftseq, sparamsp, &leftsegs, offset+lend);
if (leftsegs!=NULL)
{
@@ -1936,18 +1957,18 @@ static void SegSeq(Sequence* seq, SegParameters* sparamsp, Seg* *segs,
closewin(leftseq);
/* trim(openwin(seq, lend, rend-lend+1), &lend, &rend);
- seg = (Seg*) calloc(1, sizeof(Seg));
+ seg = (SSeg*) calloc(1, sizeof(SSeg));
seg->begin = lend;
seg->end = rend;
- seg->next = (Seg*) NULL;
+ seg->next = (SSeg*) NULL;
if (segs==NULL) segs = seg;
else appendseg(segs, seg); */
}
- seg = (Seg*) calloc(1, sizeof(Seg));
+ seg = (SSeg*) calloc(1, sizeof(SSeg));
seg->begin = leftend + offset;
seg->end = rightend + offset;
- seg->next = (Seg*) NULL;
+ seg->next = (SSeg*) NULL;
if (*segs==NULL) *segs = seg;
else appendseg(*segs, seg);
@@ -1966,9 +1987,9 @@ static void SegSeq(Sequence* seq, SegParameters* sparamsp, Seg* *segs,
hilenmin also does something, but we need to ask Scott Federhen what?
*/
-static void mergesegs(Sequence* seq, Seg* segs, Boolean overlaps)
+static void mergesegs(SSequence* seq, SSeg* segs, Boolean overlaps)
{
- Seg* seg,* nextseg;
+ SSeg* seg,* nextseg;
Int4 hilenmin; /* hilenmin yet unset */
Int4 len;
@@ -2020,7 +2041,7 @@ static void mergesegs(Sequence* seq, Seg* segs, Boolean overlaps)
return;
}
-static Int2 SegsToBlastSeqLoc(Seg* segs, Int4 offset, BlastSeqLoc** seg_locs)
+static Int2 SegsToBlastSeqLoc(SSeg* segs, Int4 offset, BlastSeqLoc** seg_locs)
{
SSeqRange* dip;
BlastSeqLoc* last_slp = NULL,* head_slp = NULL;
@@ -2176,8 +2197,8 @@ static Alpha* AlphaCopy (Alpha* palpha)
Int2 SeqBufferSeg (Uint1* sequence, Int4 length, Int4 offset,
SegParameters* sparamsp, BlastSeqLoc** seg_locs)
{
- Sequence* seqwin;
- Seg* segs;
+ SSequence* seqwin;
+ SSeg* segs;
Boolean params_allocated = FALSE;
SegParametersCheck (sparamsp);
@@ -2206,7 +2227,7 @@ Int2 SeqBufferSeg (Uint1* sequence, Int4 length, Int4 offset,
/* seg the sequence */
- segs = (Seg*) NULL;
+ segs = (SSeg*) NULL;
SegSeq (seqwin, sparamsp, &segs, 0);
/* merge the segment if desired. */
diff --git a/algo/blast/core/blast_seg.h b/algo/blast/core/blast_seg.h
index 19d2e4cf..ed0d0857 100644
--- a/algo/blast/core/blast_seg.h
+++ b/algo/blast/core/blast_seg.h
@@ -1,65 +1,48 @@
-/* $Id: blast_seg.h,v 1.9 2003/08/11 14:57:16 dondosha Exp $
-* ===========================================================================
-*
-* PUBLIC DOMAIN NOTICE
-* National Center for Biotechnology Information
-*
-* This software/database is a "United States Government Work" under the
-* terms of the United States Copyright Act. It was written as part of
-* the author's offical duties as a United States Government employee and
-* thus cannot be copyrighted. This software/database is freely available
-* to the public for use. The National Library of Medicine and the U.S.
-* Government have not placed any restriction on its use or reproduction.
-*
-* Although all reasonable efforts have been taken to ensure the accuracy
-* and reliability of the software and data, the NLM and the U.S.
-* Government do not and cannot warrant the performance or results that
-* may be obtained by using this software or data. The NLM and the U.S.
-* Government disclaim all warranties, express or implied, including
-* warranties of performance, merchantability or fitness for any particular
-* purpose.
-*
-* Please cite the author in any work or product based on this material.
-*
-* ===========================================================================*/
+/* $Id: blast_seg.h,v 1.12 2004/06/16 14:53:03 dondosha Exp $
+ * ===========================================================================
+ *
+ * PUBLIC DOMAIN NOTICE
+ * National Center for Biotechnology Information
+ *
+ * This software/database is a "United States Government Work" under the
+ * terms of the United States Copyright Act. It was written as part of
+ * the author's offical duties as a United States Government employee and
+ * thus cannot be copyrighted. This software/database is freely available
+ * to the public for use. The National Library of Medicine and the U.S.
+ * Government have not placed any restriction on its use or reproduction.
+ *
+ * Although all reasonable efforts have been taken to ensure the accuracy
+ * and reliability of the software and data, the NLM and the U.S.
+ * Government do not and cannot warrant the performance or results that
+ * may be obtained by using this software or data. The NLM and the U.S.
+ * Government disclaim all warranties, express or implied, including
+ * warranties of performance, merchantability or fitness for any particular
+ * purpose.
+ *
+ * Please cite the author in any work or product based on this material.
+ *
+ * ===========================================================================
+ *
+ * Author: Ilya Dondoshansky
+ *
+ */
+
+/** @file blast_seg.h
+ * SEG filtering functions. @todo FIXME: should this be combined with
+ * blast_filter/dust? Needs doxygen documentation and comments
+ */
-/*****************************************************************************
-
-File name: blast_filter.h
-
-Author: Ilya Dondoshansky
-
-Contents: SEG filtering functions.
-
-Detailed Contents:
-
-******************************************************************************
- * $Revision: 1.9 $
- * */
#ifndef __BLAST_SEG__
#define __BLAST_SEG__
+#include <algo/blast/core/blast_def.h>
+
#ifdef __cplusplus
extern "C" {
#endif
-#include <algo/blast/core/blast_def.h>
-
-#define AA20 2
-
-#define LN20 2.9957322735539909
-
-#define CHAR_SET 128
-
/*--------------------------------------------------------------(structs)---*/
-typedef struct Seg
- {
- int begin;
- int end;
- struct Seg *next;
- } Seg;
-
typedef struct Alpha
{
Int4 alphabet;
@@ -83,20 +66,6 @@ typedef struct SegParameters
Alpha* palpha;
} SegParameters;
-typedef struct Sequence
- {
- struct Sequence* parent;
- char* seq;
- Alpha* palpha;
- Int4 start;
- Int4 length;
- Int4 bogus;
- Boolean punctuation;
- Int4* composition;
- Int4* state;
- double entropy;
- } Sequence;
-
SegParameters* SegParametersNewAa (void);
void SegParametersFree(SegParameters* sparamsp);
diff --git a/algo/blast/core/blast_seqsrc.c b/algo/blast/core/blast_seqsrc.c
index 7b153416..6721e92a 100644
--- a/algo/blast/core/blast_seqsrc.c
+++ b/algo/blast/core/blast_seqsrc.c
@@ -1,36 +1,39 @@
-/* $Id: blast_seqsrc.c,v 1.18 2004/04/28 19:37:16 dondosha Exp $
-* ===========================================================================
-*
-* PUBLIC DOMAIN NOTICE
-* National Center for Biotechnology Information
-*
-* This software/database is a "United States Government Work" under the
-* terms of the United States Copyright Act. It was written as part of
-* the author's official duties as a United States Government employee and
-* thus cannot be copyrighted. This software/database is freely available
-* to the public for use. The National Library of Medicine and the U.S.
-* Government have not placed any restriction on its use or reproduction.
-*
-* Although all reasonable efforts have been taken to ensure the accuracy
-* and reliability of the software and data, the NLM and the U.S.
-* Government do not and cannot warrant the performance or results that
-* may be obtained by using this software or data. The NLM and the U.S.
-* Government disclaim all warranties, express or implied, including
-* warranties of performance, merchantability or fitness for any particular
-* purpose.
-*
-* Please cite the author in any work or product based on this material.
-*
-* ===========================================================================
-*
-* Author: Christiam Camacho
-*
-* File Description:
-* Definition of ADT to retrieve sequences for the BLAST engine
-*
-*/
-
-static char const rcsid[] = "$Id: blast_seqsrc.c,v 1.18 2004/04/28 19:37:16 dondosha Exp $";
+/* $Id: blast_seqsrc.c,v 1.20 2004/06/07 17:12:06 dondosha Exp $
+ * ===========================================================================
+ *
+ * PUBLIC DOMAIN NOTICE
+ * National Center for Biotechnology Information
+ *
+ * This software/database is a "United States Government Work" under the
+ * terms of the United States Copyright Act. It was written as part of
+ * the author's official duties as a United States Government employee and
+ * thus cannot be copyrighted. This software/database is freely available
+ * to the public for use. The National Library of Medicine and the U.S.
+ * Government have not placed any restriction on its use or reproduction.
+ *
+ * Although all reasonable efforts have been taken to ensure the accuracy
+ * and reliability of the software and data, the NLM and the U.S.
+ * Government do not and cannot warrant the performance or results that
+ * may be obtained by using this software or data. The NLM and the U.S.
+ * Government disclaim all warranties, express or implied, including
+ * warranties of performance, merchantability or fitness for any particular
+ * purpose.
+ *
+ * Please cite the author in any work or product based on this material.
+ *
+ * ===========================================================================
+ *
+ * Author: Christiam Camacho
+ *
+ *
+ */
+
+/** @file blast_seqsrc.c
+ * Definition of ADT to retrieve sequences for the BLAST engine
+ */
+
+static char const rcsid[] =
+ "$Id: blast_seqsrc.c,v 1.20 2004/06/07 17:12:06 dondosha Exp $";
#include <algo/blast/core/blast_seqsrc.h>
@@ -39,6 +42,7 @@ struct BlastSeqSrc {
BlastSeqSrcConstructor NewFnPtr; /**< Constructor */
BlastSeqSrcDestructor DeleteFnPtr; /**< Destructor */
+ BlastSeqSrcCopier CopyFnPtr; /**< Copier */
/* Functions to get information about database as a whole */
GetInt4FnPtr GetNumSeqs; /**< Get number of sequences in set */
@@ -111,6 +115,27 @@ BlastSeqSrc* BlastSeqSrcFree(BlastSeqSrc* bssp)
return (BlastSeqSrc*) (*destructor_fnptr)(bssp);
}
+BlastSeqSrc* BlastSeqSrcCopy(const BlastSeqSrc* bssp)
+{
+ BlastSeqSrcCopier copy_fnptr = NULL;
+ BlastSeqSrc* retval;
+
+ if (!bssp) {
+ return NULL;
+ }
+
+ if ( !(retval = (BlastSeqSrc*) BlastMemDup(bssp, sizeof(BlastSeqSrc)))) {
+ return NULL;
+ }
+
+ /* If copy function is not provided, just return a copy of the structure */
+ if ( !(copy_fnptr = (*bssp->CopyFnPtr))) {
+ return retval;
+ }
+
+ return (BlastSeqSrc*) (*copy_fnptr)(retval);
+}
+
/******************** BlastSeqSrcIterator API *******************************/
BlastSeqSrcIterator* BlastSeqSrcIteratorNew(unsigned int chunk_sz)
@@ -179,6 +204,7 @@ void Set##member(data_structure_type var, member_type arg) \
/* Note there's no ; after these macros! */
DEFINE_MEMBER_FUNCTIONS(BlastSeqSrcConstructor, NewFnPtr, BlastSeqSrc*)
DEFINE_MEMBER_FUNCTIONS(BlastSeqSrcDestructor, DeleteFnPtr, BlastSeqSrc*)
+DEFINE_MEMBER_FUNCTIONS(BlastSeqSrcCopier, CopyFnPtr, BlastSeqSrc*)
DEFINE_MEMBER_FUNCTIONS(void*, DataStructure, BlastSeqSrc*)
DEFINE_MEMBER_FUNCTIONS(GetInt4FnPtr, GetNumSeqs, BlastSeqSrc*)
diff --git a/algo/blast/core/blast_seqsrc.h b/algo/blast/core/blast_seqsrc.h
index 9df239c9..f20c540f 100644
--- a/algo/blast/core/blast_seqsrc.h
+++ b/algo/blast/core/blast_seqsrc.h
@@ -1,36 +1,39 @@
-/* $Id: blast_seqsrc.h,v 1.18 2004/04/28 19:36:57 dondosha Exp $
-* ===========================================================================
-*
-* PUBLIC DOMAIN NOTICE
-* National Center for Biotechnology Information
-*
-* This software/database is a "United States Government Work" under the
-* terms of the United States Copyright Act. It was written as part of
-* the author's official duties as a United States Government employee and
-* thus cannot be copyrighted. This software/database is freely available
-* to the public for use. The National Library of Medicine and the U.S.
-* Government have not placed any restriction on its use or reproduction.
-*
-* Although all reasonable efforts have been taken to ensure the accuracy
-* and reliability of the software and data, the NLM and the U.S.
-* Government do not and cannot warrant the performance or results that
-* may be obtained by using this software or data. The NLM and the U.S.
-* Government disclaim all warranties, express or implied, including
-* warranties of performance, merchantability or fitness for any particular
-* purpose.
-*
-* Please cite the author in any work or product based on this material.
-*
-* ===========================================================================
-*
-* Author: Christiam Camacho
-*
-* Contents: Declaration of ADT to retrieve sequences for the BLAST engine.
-*
-*/
+/* $Id: blast_seqsrc.h,v 1.20 2004/06/07 17:11:34 dondosha Exp $
+ * ===========================================================================
+ *
+ * PUBLIC DOMAIN NOTICE
+ * National Center for Biotechnology Information
+ *
+ * This software/database is a "United States Government Work" under the
+ * terms of the United States Copyright Act. It was written as part of
+ * the author's official duties as a United States Government employee and
+ * thus cannot be copyrighted. This software/database is freely available
+ * to the public for use. The National Library of Medicine and the U.S.
+ * Government have not placed any restriction on its use or reproduction.
+ *
+ * Although all reasonable efforts have been taken to ensure the accuracy
+ * and reliability of the software and data, the NLM and the U.S.
+ * Government do not and cannot warrant the performance or results that
+ * may be obtained by using this software or data. The NLM and the U.S.
+ * Government disclaim all warranties, express or implied, including
+ * warranties of performance, merchantability or fitness for any particular
+ * purpose.
+ *
+ * Please cite the author in any work or product based on this material.
+ *
+ * ===========================================================================
+ *
+ * Author: Christiam Camacho
+ *
+ */
+
+/** @file blast_seqsrc.h
+ * Declaration of ADT to retrieve sequences for the BLAST engine.
+ */
#ifndef BLAST_SEQSRC_H
#define BLAST_SEQSRC_H
+
#include <algo/blast/core/blast_def.h>
#ifdef __cplusplus
@@ -65,6 +68,12 @@ typedef BlastSeqSrc* (*BlastSeqSrcConstructor) (BlastSeqSrc*, void*);
* Argument is the BlastSeqSrc structure to free, always returns NULL. */
typedef BlastSeqSrc* (*BlastSeqSrcDestructor) (BlastSeqSrc*);
+/** Function pointer typedef to modify whatever is necessary in a copy of a
+ * BlastSeqSrc structure to achieve multi-thread safety.
+ * Argument is the already copied BlastSeqSrc structure;
+ * returns the modified structure. */
+typedef BlastSeqSrc* (*BlastSeqSrcCopier) (BlastSeqSrc*);
+
/** Function pointer typedef to return a 4-byte integer.
* First argument is the BlastSeqSrc structure used, second argument is
* passed to user-defined implementation */
@@ -220,6 +229,10 @@ BlastSeqSrc* BlastSeqSrcNew(const BlastSeqSrcNewInfo* bssn_info);
*/
BlastSeqSrc* BlastSeqSrcFree(BlastSeqSrc* bssp);
+/** Copy function: needed to guarantee thread safety.
+ */
+BlastSeqSrc* BlastSeqSrcCopy(const BlastSeqSrc* seq_src);
+
/** Convenience macros call function pointers (TODO: needs to be more robust)
* Currently, this defines the API */
#define BLASTSeqSrcGetNumSeqs(bssp) \
@@ -268,6 +281,7 @@ void Set##member(data_structure_type var, member_type arg) \
DECLARE_MEMBER_FUNCTIONS(BlastSeqSrcConstructor, NewFnPtr, BlastSeqSrc*);
DECLARE_MEMBER_FUNCTIONS(BlastSeqSrcDestructor, DeleteFnPtr, BlastSeqSrc*);
+DECLARE_MEMBER_FUNCTIONS(BlastSeqSrcCopier, CopyFnPtr, BlastSeqSrc*);
DECLARE_MEMBER_FUNCTIONS(void*, DataStructure, BlastSeqSrc*);
DECLARE_MEMBER_FUNCTIONS(GetInt4FnPtr, GetNumSeqs, BlastSeqSrc*);
diff --git a/algo/blast/core/blast_setup.c b/algo/blast/core/blast_setup.c
index b4a2b633..b0a9e6ab 100644
--- a/algo/blast/core/blast_setup.c
+++ b/algo/blast/core/blast_setup.c
@@ -1,40 +1,39 @@
+/* $Id: blast_setup.c,v 1.89 2004/06/15 14:51:51 dondosha Exp $
+ * ===========================================================================
+ *
+ * PUBLIC DOMAIN NOTICE
+ * National Center for Biotechnology Information
+ *
+ * This software/database is a "United States Government Work" under the
+ * terms of the United States Copyright Act. It was written as part of
+ * the author's official duties as a United States Government employee and
+ * thus cannot be copyrighted. This software/database is freely available
+ * to the public for use. The National Library of Medicine and the U.S.
+ * Government have not placed any restriction on its use or reproduction.
+ *
+ * Although all reasonable efforts have been taken to ensure the accuracy
+ * and reliability of the software and data, the NLM and the U.S.
+ * Government do not and cannot warrant the performance or results that
+ * may be obtained by using this software or data. The NLM and the U.S.
+ * Government disclaim all warranties, express or implied, including
+ * warranties of performance, merchantability or fitness for any particular
+ * purpose.
+ *
+ * Please cite the author in any work or product based on this material.
+ *
+ * ===========================================================================
+ *
+ * Author: Tom Madden
+ *
+ */
+
+/** @file blast_setup.c
+ * Utilities initialize/setup BLAST.
+ */
+
+
static char const rcsid[] =
- "$Id: blast_setup.c,v 1.83 2004/04/29 19:53:39 papadopo Exp $";
-/* ===========================================================================
-*
-* PUBLIC DOMAIN NOTICE
-* National Center for Biotechnology Information
-*
-* This software/database is a "United States Government Work" under the
-* terms of the United States Copyright Act. It was written as part of
-* the author's official duties as a United States Government employee and
-* thus cannot be copyrighted. This software/database is freely available
-* to the public for use. The National Library of Medicine and the U.S.
-* Government have not placed any restriction on its use or reproduction.
-*
-* Although all reasonable efforts have been taken to ensure the accuracy
-* and reliability of the software and data, the NLM and the U.S.
-* Government do not and cannot warrant the performance or results that
-* may be obtained by using this software or data. The NLM and the U.S.
-* Government disclaim all warranties, express or implied, including
-* warranties of performance, merchantability or fitness for any particular
-* purpose.
-*
-* Please cite the author in any work or product based on this material.
-*
-* ===========================================================================*/
-
-/*****************************************************************************
-
-File name: blast_setup.c
-
-Author: Tom Madden
-
-Contents: Utilities initialize/setup BLAST.
-
-$Revision: 1.83 $
-
-******************************************************************************/
+ "$Id: blast_setup.c,v 1.89 2004/06/15 14:51:51 dondosha Exp $";
#include <algo/blast/core/blast_setup.h>
#include <algo/blast/core/blast_util.h>
@@ -333,8 +332,6 @@ BlastScoreBlkMatrixInit(Uint1 program_number,
const BlastScoringOptions* scoring_options,
BlastScoreBlk* sbp)
{
- Int2 status = 0;
-
if (!sbp || !scoring_options)
return 1;
@@ -357,7 +354,6 @@ BlastScoreBlkMatrixInit(Uint1 program_number,
(long) sbp->reward, (long) sbp->penalty);
sbp->name = strdup(buffer);
}
- status = BLAST_ScoreBlkMatFill(sbp, scoring_options->matrix_path);
} else {
char* p = NULL;
@@ -368,19 +364,26 @@ BlastScoreBlkMatrixInit(Uint1 program_number,
/* protein matrices are in all caps by convention */
for (p = sbp->name; *p != NULLB; p++)
*p = toupper(*p);
- status = BLAST_ScoreBlkMatFill(sbp, scoring_options->matrix_path);
}
- return status;
+ return BLAST_ScoreBlkMatFill(sbp, scoring_options->matrix_path);
}
Int2
-BlastSetup_GetScoreBlock(BLAST_SequenceBlk* query_blk, BlastQueryInfo* query_info, const BlastScoringOptions* scoring_options, Uint1 program_number, Boolean phi_align, BlastScoreBlk* *sbpp, Blast_Message* *blast_message)
+BlastSetup_GetScoreBlock(BLAST_SequenceBlk* query_blk,
+ BlastQueryInfo* query_info,
+ const BlastScoringOptions* scoring_options,
+ Uint1 program_number,
+ Boolean phi_align,
+ BlastScoreBlk* *sbpp,
+ double scale_factor,
+ Blast_Message* *blast_message)
{
BlastScoreBlk* sbp;
Int2 status=0; /* return value. */
Int4 context; /* loop variable. */
+ Boolean query_valid = FALSE;
if (sbpp == NULL)
return 1;
@@ -394,48 +397,53 @@ BlastSetup_GetScoreBlock(BLAST_SequenceBlk* query_blk, BlastQueryInfo* query_inf
return 1;
*sbpp = sbp;
+ sbp->scale_factor = scale_factor;
status = BlastScoreBlkMatrixInit(program_number, scoring_options, sbp);
if (status != 0)
return status;
- for (context = query_info->first_context;
- context <= query_info->last_context; ++context) {
-
- Int4 context_offset;
- Int4 query_length;
- Uint1 *buffer; /* holds sequence */
-
- /* For each query, check if forward strand is present */
- if ((query_length = BLAST_GetQueryLength(query_info, context)) < 0)
- continue;
-
- context_offset = query_info->context_offsets[context];
- buffer = &query_blk->sequence[context_offset];
-
- if (!phi_align &&
- (status = BLAST_ScoreBlkFill(sbp, (char *) buffer,
- query_length, context))) {
- Blast_MessageWrite(blast_message, BLAST_SEV_ERROR, 2, 1,
- "Query completely filtered; nothing left to search");
- return status;
- }
- }
-
-
/* Fills in block for gapped blast. */
if (phi_align) {
PHIScoreBlkFill(sbp, scoring_options, blast_message);
- } else if (scoring_options->gapped_calculation) {
- status = BlastScoreBlkGappedFill(sbp, scoring_options,
- program_number, query_info);
- if (status) {
+ } else {
+ for (context = query_info->first_context;
+ context <= query_info->last_context; ++context) {
+
+ Int4 context_offset;
+ Int4 query_length;
+ Uint1 *buffer; /* holds sequence */
+
+ /* For each query, check if forward strand is present */
+ if ((query_length = BLAST_GetQueryLength(query_info, context)) < 0)
+ continue;
+
+ context_offset = query_info->context_offsets[context];
+ buffer = &query_blk->sequence[context_offset];
+
+ if ((status = BLAST_ScoreBlkFill(sbp, (char *) buffer,
+ query_length, context)) == 0) {
+ query_valid = TRUE;
+ }
+ }
+
+ if (!query_valid) {
Blast_MessageWrite(blast_message, BLAST_SEV_ERROR, 2, 1,
- "Unable to initialize scoring block");
+ "Query completely filtered; nothing left to search");
return status;
}
+
+ if (scoring_options->gapped_calculation) {
+ status = BlastScoreBlkGappedFill(sbp, scoring_options,
+ program_number, query_info);
+ if (status) {
+ Blast_MessageWrite(blast_message, BLAST_SEV_ERROR, 2, 1,
+ "Unable to initialize scoring block");
+ return status;
+ }
+ }
}
-
+
/* Get "ideal" values if the calculated Karlin-Altschul params bad. */
if (program_number == blast_type_blastx ||
program_number == blast_type_tblastx ||
@@ -465,29 +473,40 @@ BlastSetup_GetScoreBlock(BLAST_SequenceBlk* query_blk, BlastQueryInfo* query_inf
}
Int2 BLAST_MainSetUp(Uint1 program_number,
- const QuerySetUpOptions * qsup_options,
- const BlastScoringOptions * scoring_options,
- const BlastHitSavingOptions * hit_options,
- BLAST_SequenceBlk * query_blk,
- BlastQueryInfo * query_info,
- BlastSeqLoc ** lookup_segments, BlastMaskLoc * *filter_out,
- BlastScoreBlk * *sbpp, Blast_Message * *blast_message)
+ const QuerySetUpOptions *qsup_options,
+ const BlastScoringOptions *scoring_options,
+ const BlastHitSavingOptions *hit_options,
+ BLAST_SequenceBlk *query_blk,
+ BlastQueryInfo *query_info,
+ double scale_factor,
+ BlastSeqLoc **lookup_segments,
+ BlastMaskLoc **filter_out,
+ BlastScoreBlk **sbpp,
+ Blast_Message **blast_message)
{
Boolean mask_at_hash = FALSE; /* mask only for making lookup table? */
Int2 status = 0; /* return value */
BlastMaskLoc *filter_maskloc = NULL; /* Local variable for mask locs. */
- if ((status=BlastSetUp_GetFilteringLocations(query_blk, query_info, program_number, qsup_options->filter_string,
- &filter_maskloc, &mask_at_hash, blast_message)))
- {
+ status = BlastSetUp_GetFilteringLocations(query_blk,
+ query_info,
+ program_number,
+ qsup_options->filter_string,
+ &filter_maskloc,
+ &mask_at_hash,
+ blast_message);
+ if (status) {
return status;
}
if (!mask_at_hash)
{
- if ((status=BlastSetUp_MaskQuery(query_blk, query_info, filter_maskloc, program_number)) != 0)
+ status = BlastSetUp_MaskQuery(query_blk, query_info, filter_maskloc,
+ program_number);
+ if (status != 0) {
return status;
+ }
}
/* If there was a lower case mask, its contents have now been moved to
@@ -504,12 +523,16 @@ Int2 BLAST_MainSetUp(Uint1 program_number,
BLAST_InitDNAPSequence(query_blk, query_info);
}
- BLAST_ComplementMaskLocations(program_number, query_info, filter_maskloc, lookup_segments);
+ BLAST_ComplementMaskLocations(program_number, query_info, filter_maskloc,
+ lookup_segments);
- if ((status=BlastSetup_GetScoreBlock(query_blk, query_info, scoring_options, program_number,
- hit_options->phi_align, sbpp, blast_message)) > 0)
+ status = BlastSetup_GetScoreBlock(query_blk, query_info, scoring_options,
+ program_number, hit_options->phi_align,
+ sbpp, scale_factor, blast_message);
+ if (status > 0) {
return status;
+ }
return 0;
}
@@ -574,42 +597,40 @@ Int2 BLAST_CalcEffLengths (Uint1 program_number,
current context */
kbp = kbp_ptr[index];
- if (eff_len_options->searchsp_eff) {
- effective_search_space = eff_len_options->searchsp_eff;
- } else {
- if ( (query_length = BLAST_GetQueryLength(query_info, index)) <= 0) {
- continue;
- }
- /* Use the correct Karlin block. For blastn, two identical Karlin
- blocks are allocated for each sequence (one per strand), but we
- only need one of them.
- */
- if (program_number != blast_type_blastn &&
- scoring_options->gapped_calculation) {
+ if ( (query_length = BLAST_GetQueryLength(query_info, index)) > 0) {
+ /* Use the correct Karlin block. For blastn, two identical Karlin
+ blocks are allocated for each sequence (one per strand), but we
+ only need one of them.
+ */
+ if (program_number != blast_type_blastn &&
+ scoring_options->gapped_calculation) {
BLAST_ComputeLengthAdjustment(kbp->K, kbp->logK,
alpha/kbp->Lambda, beta,
query_length, db_length,
db_num_seqs, &length_adjustment);
- } else {
+ } else {
BLAST_ComputeLengthAdjustment(kbp->K, kbp->logK, 1/kbp->H, 0,
query_length, db_length,
db_num_seqs, &length_adjustment);
- }
-
- effective_search_space =
- (query_length - length_adjustment) *
- (db_length - db_num_seqs*length_adjustment);
-
- /* For translated RPS blast, the DB size is left unchanged
- and the query size is divided by 3 (for conversion to
- a protein sequence) and multiplied by 6 (for 6 frames) */
-
- if (program_number == blast_type_rpstblastn)
- effective_search_space *= (Int8)(NUM_FRAMES / CODON_LENGTH);
- }
- query_info->eff_searchsp_array[index] = effective_search_space;
- query_info->length_adjustments[index] = length_adjustment;
-
+ }
+ /* If overriding search space value is provided in options,
+ do not calculate it. */
+ if (eff_len_options->searchsp_eff) {
+ effective_search_space = eff_len_options->searchsp_eff;
+ } else {
+ effective_search_space =
+ (query_length - length_adjustment) *
+ (db_length - db_num_seqs*length_adjustment);
+
+ /* For translated RPS blast, the DB size is left unchanged
+ and the query size is divided by 3 (for conversion to
+ a protein sequence) and multiplied by 6 (for 6 frames) */
+ if (program_number == blast_type_rpstblastn)
+ effective_search_space *= (Int8)(NUM_FRAMES / CODON_LENGTH);
+ }
+ }
+ query_info->eff_searchsp_array[index] = effective_search_space;
+ query_info->length_adjustments[index] = length_adjustment;
}
return 0;
@@ -617,17 +638,18 @@ Int2 BLAST_CalcEffLengths (Uint1 program_number,
Int2
BLAST_GapAlignSetUp(Uint1 program_number,
- const BlastSeqSrc* seq_src,
- const BlastScoringOptions* scoring_options,
- const BlastEffectiveLengthsOptions* eff_len_options,
- const BlastExtensionOptions* ext_options,
- const BlastHitSavingOptions* hit_options,
- BlastQueryInfo* query_info,
- BlastScoreBlk* sbp,
- BlastExtensionParameters** ext_params,
- BlastHitSavingParameters** hit_params,
- BlastEffectiveLengthsParameters** eff_len_params,
- BlastGapAlignStruct** gap_align)
+ const BlastSeqSrc* seq_src,
+ const BlastScoringOptions* scoring_options,
+ const BlastEffectiveLengthsOptions* eff_len_options,
+ const BlastExtensionOptions* ext_options,
+ const BlastHitSavingOptions* hit_options,
+ BlastQueryInfo* query_info,
+ BlastScoreBlk* sbp,
+ BlastScoringParameters** score_params,
+ BlastExtensionParameters** ext_params,
+ BlastHitSavingParameters** hit_params,
+ BlastEffectiveLengthsParameters** eff_len_params,
+ BlastGapAlignStruct** gap_align)
{
Int2 status = 0;
Uint4 max_subject_length;
@@ -652,6 +674,8 @@ BLAST_GapAlignSetUp(Uint1 program_number,
*eff_len_params, sbp, query_info)) != 0)
return status;
+ BlastScoringParametersNew(scoring_options, sbp, score_params);
+
BlastExtensionParametersNew(program_number, ext_options, sbp,
query_info, ext_params);
@@ -662,9 +686,8 @@ BLAST_GapAlignSetUp(Uint1 program_number,
maximal subject sequence length */
max_subject_length = BLASTSeqSrcGetMaxSeqLen(seq_src);
- if ((status = BLAST_GapAlignStructNew(scoring_options, *ext_params,
- max_subject_length, query_info->max_length, sbp,
- gap_align)) != 0) {
+ if ((status = BLAST_GapAlignStructNew(*score_params, *ext_params,
+ max_subject_length, sbp, gap_align)) != 0) {
return status;
}
@@ -688,7 +711,8 @@ Int2 BLAST_OneSubjectUpdateParameters(Uint1 program_number,
return status;
/* Update cutoff scores in hit saving parameters */
BlastHitSavingParametersUpdate(program_number, ext_params,
- sbp, query_info, hit_params);
+ sbp, query_info,
+ hit_params);
if (word_params) {
/* Update cutoff scores in initial word parameters */
diff --git a/algo/blast/core/blast_setup.h b/algo/blast/core/blast_setup.h
index 02ca0229..fa406de1 100644
--- a/algo/blast/core/blast_setup.h
+++ b/algo/blast/core/blast_setup.h
@@ -1,60 +1,59 @@
-/* ===========================================================================
-*
-* PUBLIC DOMAIN NOTICE
-* National Center for Biotechnology Information
-*
-* This software/database is a "United States Government Work" under the
-* terms of the United States Copyright Act. It was written as part of
-* the author's official duties as a United States Government employee and
-* thus cannot be copyrighted. This software/database is freely available
-* to the public for use. The National Library of Medicine and the U.S.
-* Government have not placed any restriction on its use or reproduction.
-*
-* Although all reasonable efforts have been taken to ensure the accuracy
-* and reliability of the software and data, the NLM and the U.S.
-* Government do not and cannot warrant the performance or results that
-* may be obtained by using this software or data. The NLM and the U.S.
-* Government disclaim all warranties, express or implied, including
-* warranties of performance, merchantability or fitness for any particular
-* purpose.
-*
-* Please cite the author in any work or product based on this material.
-*
-* ===========================================================================*/
-
-/*****************************************************************************
-
-File name: blast_setup.h
-
-Author: Tom Madden
-
-Contents: Utilities initialize/setup BLAST.
-
-$Revision: 1.36 $
+/* $Id: blast_setup.h,v 1.40 2004/06/16 14:53:03 dondosha Exp $
+ * ===========================================================================
+ *
+ * PUBLIC DOMAIN NOTICE
+ * National Center for Biotechnology Information
+ *
+ * This software/database is a "United States Government Work" under the
+ * terms of the United States Copyright Act. It was written as part of
+ * the author's official duties as a United States Government employee and
+ * thus cannot be copyrighted. This software/database is freely available
+ * to the public for use. The National Library of Medicine and the U.S.
+ * Government have not placed any restriction on its use or reproduction.
+ *
+ * Although all reasonable efforts have been taken to ensure the accuracy
+ * and reliability of the software and data, the NLM and the U.S.
+ * Government do not and cannot warrant the performance or results that
+ * may be obtained by using this software or data. The NLM and the U.S.
+ * Government disclaim all warranties, express or implied, including
+ * warranties of performance, merchantability or fitness for any particular
+ * purpose.
+ *
+ * Please cite the author in any work or product based on this material.
+ *
+ * ===========================================================================
+ *
+ * Author: Tom Madden
+ *
+ */
-******************************************************************************/
+/** @file blast_setup.h
+ * Utilities initialize/setup BLAST.
+ */
#ifndef __BLAST_SETUP__
#define __BLAST_SETUP__
-#ifdef __cplusplus
-extern "C" {
-#endif
-
#include <algo/blast/core/blast_def.h>
#include <algo/blast/core/blast_options.h>
#include <algo/blast/core/blast_stat.h>
#include <algo/blast/core/blast_extend.h>
#include <algo/blast/core/blast_gapalign.h>
+#ifdef __cplusplus
+extern "C" {
+#endif
+
/** "Main" setup routine for BLAST. Calculates all information for BLAST search
* that is dependent on the ASN.1 structures.
+ * @todo FIXME: this function only filters query and sets up score block structure
* @param program_number Type of BLAST program (0=blastn, ...). [in]
* @param qsup_options options for query setup. [in]
* @param scoring_options options for scoring. [in]
* @param hit_options options for saving hits. [in]
* @param query_blk BLAST_SequenceBlk* for the query. [in]
* @param query_info The query information block [in]
+ * @param scale_factor Multiplier for cutoff and dropoff scores [in]
* @param lookup_segments Start/stop locations for non-masked query
* segments [out]
* @param filter_slp_out Filtering/masking locations. [out]
@@ -66,9 +65,12 @@ Int2 BLAST_MainSetUp(Uint1 program_number,
const BlastScoringOptions* scoring_options,
const BlastHitSavingOptions* hit_options,
BLAST_SequenceBlk* query_blk,
- BlastQueryInfo* query_info, BlastSeqLoc* *lookup_segments,
+ BlastQueryInfo* query_info,
+ double scale_factor,
+ BlastSeqLoc* *lookup_segments,
BlastMaskLoc* *filter_slp_out,
- BlastScoreBlk* *sbpp, Blast_Message* *blast_message);
+ BlastScoreBlk* *sbpp,
+ Blast_Message* *blast_message);
/** BlastScoreBlkGappedFill, fills the ScoreBlkPtr for a gapped search.
* Should be moved to blast_stat.c in the future.
@@ -108,6 +110,7 @@ Int2 BLAST_CalcEffLengths (Uint1 program_number,
* @param hit_options options for saving hits. [in]
* @param query_info The query information block [in]
* @param sbp Contains scoring information. [in]
+ * @param score_params Parameters for scoring [out]
* @param ext_params Parameters for gapped extension [out]
* @param hit_params Parameters for saving hits [out]
* @param eff_len_params Parameters for search space calculations [out]
@@ -122,6 +125,7 @@ BLAST_GapAlignSetUp(Uint1 program_number,
const BlastHitSavingOptions* hit_options,
BlastQueryInfo* query_info,
BlastScoreBlk* sbp,
+ BlastScoringParameters** score_params,
BlastExtensionParameters** ext_params,
BlastHitSavingParameters** hit_params,
BlastEffectiveLengthsParameters** eff_len_params,
@@ -144,14 +148,14 @@ BLAST_GapAlignSetUp(Uint1 program_number,
* with the current sequence data [in] [out]
*/
Int2 BLAST_OneSubjectUpdateParameters(Uint1 program_number,
- Uint4 subject_length,
- const BlastScoringOptions* scoring_options,
- BlastQueryInfo* query_info,
- BlastScoreBlk* sbp,
- const BlastExtensionParameters* ext_params,
- BlastHitSavingParameters* hit_params,
- BlastInitialWordParameters* word_params,
- BlastEffectiveLengthsParameters* eff_len_params);
+ Uint4 subject_length,
+ const BlastScoringOptions* scoring_options,
+ BlastQueryInfo* query_info,
+ BlastScoreBlk* sbp,
+ const BlastExtensionParameters* ext_params,
+ BlastHitSavingParameters* hit_params,
+ BlastInitialWordParameters* word_params,
+ BlastEffectiveLengthsParameters* eff_len_params);
/** BlastScoreBlkMatrixInit, fills score matrix parameters in the ScoreBlkPtr
* Should be moved to blast_stat.c in the future.
@@ -163,14 +167,19 @@ Int2 BLAST_OneSubjectUpdateParameters(Uint1 program_number,
Int2
BlastScoreBlkMatrixInit(Uint1 program_number,
- const BlastScoringOptions* scoring_options,
- BlastScoreBlk* sbp);
+ const BlastScoringOptions* scoring_options,
+ BlastScoreBlk* sbp);
Int2
-BlastSetup_GetScoreBlock(BLAST_SequenceBlk* query_blk, BlastQueryInfo* query_info,
- const BlastScoringOptions* scoring_options, Uint1 program_number, Boolean phi_align,
- BlastScoreBlk* *sbpp, Blast_Message* *blast_message);
+BlastSetup_GetScoreBlock(BLAST_SequenceBlk* query_blk,
+ BlastQueryInfo* query_info,
+ const BlastScoringOptions* scoring_options,
+ Uint1 program_number,
+ Boolean phi_align,
+ BlastScoreBlk* *sbpp,
+ double scale_factor,
+ Blast_Message* *blast_message);
#ifdef __cplusplus
}
@@ -180,6 +189,21 @@ BlastSetup_GetScoreBlock(BLAST_SequenceBlk* query_blk, BlastQueryInfo* query_inf
/*
*
* $Log: blast_setup.h,v $
+* Revision 1.40 2004/06/16 14:53:03 dondosha
+* Moved extern "C" after the #includes
+*
+* Revision 1.39 2004/05/19 14:52:01 camacho
+* 1. Added doxygen tags to enable doxygen processing of algo/blast/core
+* 2. Standardized copyright, CVS $Id string, $Log and rcsid formatting and i
+* location
+* 3. Added use of @todo doxygen keyword
+*
+* Revision 1.38 2004/05/17 16:38:08 camacho
+* Make function declarations more readable
+*
+* Revision 1.37 2004/05/07 15:36:40 papadopo
+* add scale factor as input argument to BlastMainSetup and GetScoreBlk
+*
* Revision 1.36 2004/03/30 15:49:07 madden
* Add prototype for BlastSetup_GetScoreBlock
*
diff --git a/algo/blast/core/blast_stat.c b/algo/blast/core/blast_stat.c
index 3ece0545..f0f564cf 100644
--- a/algo/blast/core/blast_stat.c
+++ b/algo/blast/core/blast_stat.c
@@ -1,286 +1,57 @@
-static char const rcsid[] = "$Id: blast_stat.c,v 1.66 2004/05/04 13:00:02 madden Exp $";
-
-/* ===========================================================================
-*
-* PUBLIC DOMAIN NOTICE
-* National Center for Biotechnology Information
-*
-* This software/database is a "United States Government Work" under the
-* terms of the United States Copyright Act. It was written as part of
-* the author's official duties as a United States Government employee and
-* thus cannot be copyrighted. This software/database is freely available
-* to the public for use. The National Library of Medicine and the U.S.
-* Government have not placed any restriction on its use or reproduction.
-*
-* Although all reasonable efforts have been taken to ensure the accuracy
-* and reliability of the software and data, the NLM and the U.S.
-* Government do not and cannot warrant the performance or results that
-* may be obtained by using this software or data. The NLM and the U.S.
-* Government disclaim all warranties, express or implied, including
-* warranties of performance, merchantability or fitness for any particular
-* purpose.
-*
-* Please cite the author in any work or product based on this material.
-*
-* ===========================================================================*/
-/*****************************************************************************
-
-File name: blast_stat.c
-
-Author: Tom Madden
-
-Contents: Functions to calculate BLAST probabilities etc.
-
-Detailed Contents:
-
- - allocate and deallocate structures used by BLAST to calculate
- probabilities etc.
-
- - calculate residue frequencies for query and "average" database.
-
- - read in matrix.
-
- - calculate sum-p from a collection of HSP's, for both the case
- of a "small" gap and a "large" gap, when give a total score and the
- number of HSP's.
-
- - calculate expect values for p-values.
-
- - calculate pseuod-scores from p-values.
-
-******************************************************************************
- * $Revision: 1.66 $
- * $Log: blast_stat.c,v $
- * Revision 1.66 2004/05/04 13:00:02 madden
- * Change BlastKarlinBlkStandardCalcEx to more descriptive Blast_KarlinBlkIdealCalc, make public
- *
- * Revision 1.65 2004/04/30 14:39:44 papadopo
- * 1. Remove unneeded #defines
- * 2. use BLAST_SCORE_RANGE_MAX during RPS PSSM creation instead of
- * (possibly incompatible) RPS_SCORE_MAX
- * 3. return NULL instead of FALSE on an error
- *
- * Revision 1.64 2004/04/30 12:58:49 camacho
- * Replace RPSKarlinLambdaNR by Blast_KarlinLambdaNR
- *
- * Revision 1.63 2004/04/29 20:32:38 papadopo
- * remove RPS_SCORE_MIN, since it turned out to be a workaround for a bug that has since been fixed
- *
- * Revision 1.62 2004/04/29 19:58:03 camacho
- * Use generic matrix allocator/deallocator from blast_psi_priv.h
- *
- * Revision 1.61 2004/04/28 14:40:23 madden
- * Changes from Mike Gertz:
- * - I created the new routine BLAST_GapDecayDivisor that computes a
- * divisor used to weight the evalue of a collection of distinct
- * alignments.
- * - I removed BLAST_GapDecay and BLAST_GapDecayInverse which had become
- * redundant.
- * - I modified the BLAST_Cutoffs routine so that it uses the value
- * returned by BLAST_GapDecayDivisor to weight evalues.
- * - I modified BLAST_SmallGapSumE, BLAST_LargeGapSumE and
- * BLAST_UnevenGapSumE no longer refer to the gap_prob parameter.
- * Replaced the gap_decay_rate parameter of each of these routines with
- * a weight_divisor parameter. Added documentation.
- *
- * Revision 1.60 2004/04/23 19:06:33 camacho
- * Do NOT use lowercase names for #defines
- *
- * Revision 1.59 2004/04/23 13:49:20 madden
- * Cleaned up ifndef in BlastKarlinLHtoK
- *
- * Revision 1.58 2004/04/23 13:21:25 madden
- * Rewrote BlastKarlinLHtoK to do the following and more:
- * 1. fix a bug whereby the wrong formula was used when high score == 1
- * and low score == -1;
- * 2. fix a methodological error of truncating the first sum
- * and trying to make it converge quickly by adding terms
- * of a geometric progression, even though the geometric progression
- * estimate is not correct in all cases;
- * the old adjustment code is left in for historical purposes but
- * #ifdef'd out
- * 3. Eliminate the Boolean bi_modal_score variable. The old test that
- * set the value of bi_modal_score would frequently fail to choose the
- * correct value due to rounding error.
- * 4. changed numerous local variable names to make them more meaningful;
- * 5. added substantial comments to explain what the procedure
- * is doing and what each variable represents
- *
- * Revision 1.57 2004/04/19 12:58:18 madden
- * Changed BLAST_KarlinBlk to Blast_KarlinBlk to avoid conflict with blastkar.h structure, renamed some functions to start with Blast_Karlin, made Blast_KarlinBlkDestruct public
- *
- * Revision 1.56 2004/04/12 18:57:31 madden
- * Rename BLAST_ResFreq to Blast_ResFreq, make Blast_ResFreqNew, Blast_ResFreqDestruct, and Blast_ResFreqStdComp non-static
- *
- * Revision 1.55 2004/04/08 13:53:10 papadopo
- * fix doxygen warning
- *
- * Revision 1.54 2004/04/07 03:06:16 camacho
- * Added blast_encoding.[hc], refactoring blast_stat.[hc]
- *
-v * Revision 1.53 2004/04/05 18:53:35 madden
- * Set dimensions if matrix from memory
- *
- * Revision 1.52 2004/04/01 14:14:02 lavr
- * Spell "occurred", "occurrence", and "occurring"
- *
- * Revision 1.51 2004/03/31 17:50:09 papadopo
- * Mike Gertz' changes for length adjustment calculations
- *
- * Revision 1.50 2004/03/11 18:52:41 camacho
- * Remove THREADS_IMPLEMENTED
- *
- * Revision 1.49 2004/03/10 18:00:06 camacho
- * Remove outdated references to blastkar
- *
- * Revision 1.48 2004/03/05 17:52:33 papadopo
- * Allow 32-bit context numbers for queries
- *
- * Revision 1.47 2004/03/04 21:07:51 papadopo
- * add RPS BLAST functionality
- *
- * Revision 1.46 2004/02/19 21:16:48 dondosha
- * Use enum type for severity argument in Blast_MessageWrite
- *
- * Revision 1.45 2003/12/05 16:03:57 camacho
- * Remove compiler warnings
- *
- * Revision 1.44 2003/11/28 22:39:11 camacho
- * + static keyword to BlastKarlinLtoH
- *
- * Revision 1.43 2003/11/28 15:03:48 camacho
- * Added static keyword to BlastKarlinLtoH
- *
- * Revision 1.42 2003/11/26 19:12:13 madden
- * code to simplify some routines and use NlmKarlinLambdaNR in place of BlastKarlinLambdaBis (following Mike Gertzs changes to blastkar.c )
- *
- * Revision 1.41 2003/11/24 23:18:32 dondosha
- * Added gap_decay_rate argument to BLAST_Cutoffs; removed BLAST_Cutoffs_simple
- *
- * Revision 1.40 2003/11/19 15:17:42 dondosha
- * Removed unused members from Karlin block structure
- *
- * Revision 1.39 2003/10/16 15:55:22 coulouri
- * fix uninitialized variables
- *
- * Revision 1.38 2003/10/16 15:52:08 coulouri
- * fix uninitialized variables
- *
- * Revision 1.37 2003/10/15 16:59:43 coulouri
- * type correctness fixes
- *
- * Revision 1.36 2003/10/02 22:08:34 dondosha
- * Corrections for one-strand translated searches
- *
- * Revision 1.35 2003/09/26 19:01:59 madden
- * Prefix ncbimath functions with BLAST_
- *
- * Revision 1.34 2003/09/09 14:21:39 coulouri
- * change blastkar.h to blast_stat.h
- *
- * Revision 1.33 2003/09/02 21:12:07 camacho
- * Fix small memory leak
- *
- * Revision 1.32 2003/08/26 15:23:51 dondosha
- * Rolled back previous change as it is not necessary any more
- *
- * Revision 1.31 2003/08/25 22:29:07 dondosha
- * Default matrix loading is defined only in C++ toolkit
- *
- * Revision 1.30 2003/08/25 18:05:41 dondosha
- * Moved assert statement after variables declarations
- *
- * Revision 1.29 2003/08/25 16:23:33 camacho
- * +Loading protein scoring matrices from utils/tables
- *
- * Revision 1.28 2003/08/11 15:01:59 dondosha
- * Added algo/blast/core to all #included headers
- *
- * Revision 1.27 2003/08/01 17:27:04 dondosha
- * Renamed external functions to avoid collisions with ncbitool library; made other functions static
- *
- * Revision 1.26 2003/07/31 18:48:49 dondosha
- * Use Int4 instead of BLAST_Score
- *
- * Revision 1.25 2003/07/31 17:48:06 madden
- * Remove call to FileLength
- *
- * Revision 1.24 2003/07/31 14:31:41 camacho
- * Replaced Char for char
- *
- * Revision 1.23 2003/07/31 14:19:28 camacho
- * Replaced FloatHi for double
- *
- * Revision 1.22 2003/07/31 00:32:37 camacho
- * Eliminated Ptr notation
- *
- * Revision 1.21 2003/07/30 22:08:09 dondosha
- * Process of finding path to the matrix is moved out of the blast library
- *
- * Revision 1.20 2003/07/30 21:52:41 camacho
- * Follow conventional structure definition
- *
- * Revision 1.19 2003/07/30 19:39:14 camacho
- * Remove PNTRs
- *
- * Revision 1.18 2003/07/30 17:58:25 dondosha
- * Changed ValNode to ListNode
- *
- * Revision 1.17 2003/07/30 17:15:00 dondosha
- * Minor fixes for very strict compiler warnings
+/* $Id: blast_stat.c,v 1.82 2004/06/10 13:21:24 madden Exp $
+ * ===========================================================================
*
- * Revision 1.16 2003/07/30 17:06:40 camacho
- * Removed old cvs log
+ * PUBLIC DOMAIN NOTICE
+ * National Center for Biotechnology Information
*
- * Revision 1.15 2003/07/30 16:32:02 madden
- * Use ansi functions when possible
- *
- * Revision 1.14 2003/07/30 15:29:37 madden
- * Removed MemSets
- *
- * Revision 1.13 2003/07/29 14:42:31 coulouri
- * use strdup() instead of StringSave()
- *
- * Revision 1.12 2003/07/28 19:04:15 camacho
- * Replaced all MemNews for calloc
+ * This software/database is a "United States Government Work" under the
+ * terms of the United States Copyright Act. It was written as part of
+ * the author's official duties as a United States Government employee and
+ * thus cannot be copyrighted. This software/database is freely available
+ * to the public for use. The National Library of Medicine and the U.S.
+ * Government have not placed any restriction on its use or reproduction.
*
- * Revision 1.11 2003/07/28 03:41:49 camacho
- * Use f{open,close,gets} instead of File{Open,Close,Gets}
+ * Although all reasonable efforts have been taken to ensure the accuracy
+ * and reliability of the software and data, the NLM and the U.S.
+ * Government do not and cannot warrant the performance or results that
+ * may be obtained by using this software or data. The NLM and the U.S.
+ * Government disclaim all warranties, express or implied, including
+ * warranties of performance, merchantability or fitness for any particular
+ * purpose.
*
- * Revision 1.10 2003/07/25 21:12:28 coulouri
- * remove constructions of the form "return sfree();" and "a=sfree(a);"
+ * Please cite the author in any work or product based on this material.
*
- * Revision 1.9 2003/07/25 18:58:43 camacho
- * Avoid using StrUpper and StringHasNoText
+ * ===========================================================================
*
- * Revision 1.8 2003/07/25 17:25:43 coulouri
- * in progres:
- * * use malloc/calloc/realloc instead of Malloc/Calloc/Realloc
- * * add sfree() macro and __sfree() helper function to util.[ch]
- * * use sfree() instead of MemFree()
+ * Author: Tom Madden
*
- * Revision 1.7 2003/07/24 22:37:33 dondosha
- * Removed some unused function parameters
+ */
+
+/** @file blast_stat.c
+ * Functions to calculate BLAST probabilities etc.
+ * Detailed Contents:
*
- * Revision 1.6 2003/07/24 22:01:44 camacho
- * Removed unused variables
+ * - allocate and deallocate structures used by BLAST to calculate
+ * probabilities etc.
*
- * Revision 1.5 2003/07/24 21:31:06 dondosha
- * Changed to calls to BlastConstructErrorMessage to API from blast_message.h
+ * - calculate residue frequencies for query and "average" database.
*
- * Revision 1.4 2003/07/24 20:38:30 dondosha
- * Removed LIBCALL etc. macros
+ * - read in matrix or load it from memory.
*
- * Revision 1.3 2003/07/24 17:37:46 dondosha
- * Removed MakeBlastScore function that is dependent on objalign.h
+ * - calculate sum-p from a collection of HSP's, for both the case
+ * of a "small" gap and a "large" gap, when give a total score and the
+ * number of HSP's.
*
- * Revision 1.2 2003/07/24 15:50:49 dondosha
- * Commented out mutex operations
+ * - calculate expect values for p-values.
*
- * Revision 1.1 2003/07/24 15:18:09 dondosha
- * Copy of blastkar.h from ncbitools library, stripped of dependency on ncbiobj
+ * - calculate pseuod-scores from p-values.
*
- * */
+ * @todo FIXME needs doxygen comments
+ */
+
+static char const rcsid[] =
+ "$Id: blast_stat.c,v 1.82 2004/06/10 13:21:24 madden Exp $";
+
#include <algo/blast/core/blast_stat.h>
#include <algo/blast/core/blast_util.h>
#include <util/tables/raw_scoremat.h>
@@ -288,39 +59,53 @@ v * Revision 1.53 2004/04/05 18:53:35 madden
#include "blast_psi_priv.h"
/* OSF1 apparently doesn't like this. */
-#if defined(HUGE_VAL) && !defined(OS_UNIX_OSF1)
-#define BLASTKAR_HUGE_VAL HUGE_VAL
+#if defined(HUGE_VAL) && !defined(OS_UNIX_OSF1)
+#define BLASTKAR_HUGE_VAL HUGE_VAL /**< Rename HUGE_VAL for OSF1. */
#else
-#define BLASTKAR_HUGE_VAL 1.e30
+#define BLASTKAR_HUGE_VAL 1.e30 /**< Redefine HUGE_VAL for OSF1. */
#endif
-/* Allocates and Deallocates the two-dimensional matrix. */
-static BLASTMatrixStructure* BlastMatrixAllocate (Int2 alphabet_size);
+/** Allocates and Deallocates the two-dimensional matrix.
+ * @param alphabet_size the number of letters in the alphabet
+ * @return the allocated matrix
+ */
+static SBLASTMatrixStructure* BlastMatrixAllocate (Int2 alphabet_size);
/* performs sump calculation, used by BlastSumPStd */
static double BlastSumPCalc (int r, double s);
-#define COMMENT_CHR '#'
-#define TOKSTR " \t\n\r"
#define BLAST_MAX_ALPHABET 40 /* ncbistdaa is only 26, this should be enough */
-/*
- How many of the first bases are not ambiguous
- (it's four, of course).
-*/
-#define NUMBER_NON_AMBIG_BP 4
+#define BLAST_SCORE_RANGE_MAX (BLAST_SCORE_MAX - BLAST_SCORE_MIN)
+
+/****************************************************************************
+For more accuracy in the calculation of K, set K_SUMLIMIT to 0.00001.
+For high speed in the calculation of K, use a K_SUMLIMIT of 0.001
+Note: statistical significance is often not greatly affected by the value
+of K, so high accuracy is generally unwarranted.
+*****************************************************************************/
+#define BLAST_KARLIN_K_SUMLIMIT_DEFAULT 0.0001 /**< K_SUMLIMIT_DEFAULT == sumlimit used in BlastKarlinLHtoK() */
+
+#define BLAST_KARLIN_LAMBDA_ACCURACY_DEFAULT (1.e-5) /**< LAMBDA_ACCURACY_DEFAULT == accuracy to which Lambda should be calc'd */
+
+#define BLAST_KARLIN_LAMBDA_ITER_DEFAULT 17 /**< LAMBDA_ITER_DEFAULT == no. of iterations in LambdaBis = ln(accuracy)/ln(2)*/
+
+#define BLAST_KARLIN_LAMBDA0_DEFAULT 0.5 /**< Initial guess for the value of Lambda in BlastKarlinLambdaNR */
-/* Used in BlastKarlinBlkGappedCalc */
-typedef double array_of_8[8];
+#define BLAST_KARLIN_K_ITER_MAX 100 /**< upper limit on iterations for BlastKarlinLHtoK */
+#define BLAST_SUMP_EPSILON_DEFAULT 0.002 /**< accuracy for SumP calculations */
-/* Used to temporarily store matrix values for retrieval. */
+
+typedef double array_of_8[8]; /**< Holds values (gap-opening, extension, etc.) for a matrix. */
+
+/** Used to temporarily store matrix values for retrieval. */
typedef struct MatrixInfo {
- char* name; /* name of matrix (e.g., BLOSUM90). */
- array_of_8 *values; /* The values (below). */
- Int4 *prefs; /* Preferences for display. */
- Int4 max_number_values; /* number of values (e.g., BLOSUM90_VALUES_MAX). */
+ char* name; /**< name of matrix (e.g., BLOSUM90). */
+ array_of_8 *values; /**< The values (gap-opening, extension etc.). */
+ Int4 *prefs; /**< Preferences for display. */
+ Int4 max_number_values; /**< number of values (e.g., BLOSUM90_VALUES_MAX). */
} MatrixInfo;
@@ -398,7 +183,7 @@ add two lines before the return at the end of the function:
-#define BLOSUM45_VALUES_MAX 14
+#define BLOSUM45_VALUES_MAX 14 /**< Number of different combinations supported for BLOSUM45. */
static double blosum45_values[BLOSUM45_VALUES_MAX][8] = {
{(double) INT2_MAX, (double) INT2_MAX, (double) INT2_MAX, 0.2291, 0.0924, 0.2514, 0.9113, -5.7},
{13, 3, (double) INT2_MAX, 0.207, 0.049, 0.14, 1.5, -22},
@@ -414,7 +199,7 @@ static double blosum45_values[BLOSUM45_VALUES_MAX][8] = {
{18, 1, (double) INT2_MAX, 0.198, 0.032, 0.10, 2.0, -43},
{17, 1, (double) INT2_MAX, 0.189, 0.024, 0.079, 2.4, -57},
{16, 1, (double) INT2_MAX, 0.176, 0.016, 0.063, 2.8, -67},
-};
+}; /**< Supported values (gap-existence, extension, etc.) for BLOSUM45. */
static Int4 blosum45_prefs[BLOSUM45_VALUES_MAX] = {
BLAST_MATRIX_NOMINAL,
@@ -434,7 +219,7 @@ BLAST_MATRIX_NOMINAL
};
-#define BLOSUM50_VALUES_MAX 16
+#define BLOSUM50_VALUES_MAX 16 /**< Number of different combinations supported for BLOSUM50. */
static double blosum50_values[BLOSUM50_VALUES_MAX][8] = {
{(double) INT2_MAX, (double) INT2_MAX, (double) INT2_MAX, 0.2318, 0.112, 0.3362, 0.6895, -4.0},
{13, 3, (double) INT2_MAX, 0.212, 0.063, 0.19, 1.1, -16},
@@ -452,7 +237,7 @@ static double blosum50_values[BLOSUM50_VALUES_MAX][8] = {
{17, 1, (double) INT2_MAX, 0.198, 0.037, 0.12, 1.6, -33},
{16, 1, (double) INT2_MAX, 0.186, 0.025, 0.10, 1.9, -42},
{15, 1, (double) INT2_MAX, 0.171, 0.015, 0.063, 2.7, -76},
-};
+}; /**< Supported values (gap-existence, extension, etc.) for BLOSUM50. */
static Int4 blosum50_prefs[BLOSUM50_VALUES_MAX] = {
BLAST_MATRIX_NOMINAL,
@@ -473,7 +258,7 @@ BLAST_MATRIX_NOMINAL,
BLAST_MATRIX_NOMINAL
};
-#define BLOSUM62_VALUES_MAX 12
+#define BLOSUM62_VALUES_MAX 12 /**< Number of different combinations supported for BLOSUM62. */
static double blosum62_values[BLOSUM62_VALUES_MAX][8] = {
{(double) INT2_MAX, (double) INT2_MAX, (double) INT2_MAX, 0.3176, 0.134, 0.4012, 0.7916, -3.2},
{11, 2, (double) INT2_MAX, 0.297, 0.082, 0.27, 1.1, -10},
@@ -487,7 +272,7 @@ static double blosum62_values[BLOSUM62_VALUES_MAX][8] = {
{11, 1, (double) INT2_MAX, 0.267, 0.041, 0.14, 1.9, -30},
{10, 1, (double) INT2_MAX, 0.243, 0.024, 0.10, 2.5, -44},
{9, 1, (double) INT2_MAX, 0.206, 0.010, 0.052, 4.0, -87},
-};
+}; /**< Supported values (gap-existence, extension, etc.) for BLOSUM62. */
static Int4 blosum62_prefs[BLOSUM62_VALUES_MAX] = {
BLAST_MATRIX_NOMINAL,
@@ -505,7 +290,7 @@ static Int4 blosum62_prefs[BLOSUM62_VALUES_MAX] = {
};
-#define BLOSUM80_VALUES_MAX 10
+#define BLOSUM80_VALUES_MAX 10 /**< Number of different combinations supported for BLOSUM80. */
static double blosum80_values[BLOSUM80_VALUES_MAX][8] = {
{(double) INT2_MAX, (double) INT2_MAX, (double) INT2_MAX, 0.3430, 0.177, 0.6568, 0.5222, -1.6},
{25, 2, (double) INT2_MAX, 0.342, 0.17, 0.66, 0.52, -1.6},
@@ -517,7 +302,7 @@ static double blosum80_values[BLOSUM80_VALUES_MAX][8] = {
{11, 1, (double) INT2_MAX, 0.314, 0.095, 0.35, 0.90, -9},
{10, 1, (double) INT2_MAX, 0.299, 0.071, 0.27, 1.1, -14},
{9, 1, (double) INT2_MAX, 0.279, 0.048, 0.20, 1.4, -19},
-};
+}; /**< Supported values (gap-existence, extension, etc.) for BLOSUM80. */
static Int4 blosum80_prefs[BLOSUM80_VALUES_MAX] = {
BLAST_MATRIX_NOMINAL,
@@ -531,7 +316,7 @@ static Int4 blosum80_prefs[BLOSUM80_VALUES_MAX] = {
BLAST_MATRIX_NOMINAL
};
-#define BLOSUM90_VALUES_MAX 8
+#define BLOSUM90_VALUES_MAX 8 /**< Number of different combinations supported for BLOSUM90. */
static double blosum90_values[BLOSUM90_VALUES_MAX][8] = {
{(double) INT2_MAX, (double) INT2_MAX, (double) INT2_MAX, 0.3346, 0.190, 0.7547, 0.4434, -1.4},
{9, 2, (double) INT2_MAX, 0.310, 0.12, 0.46, 0.67, -6},
@@ -541,7 +326,7 @@ static double blosum90_values[BLOSUM90_VALUES_MAX][8] = {
{11, 1, (double) INT2_MAX, 0.302, 0.093, 0.39, 0.78, -8},
{10, 1, (double) INT2_MAX, 0.290, 0.075, 0.28, 1.04, -15},
{9, 1, (double) INT2_MAX, 0.265, 0.044, 0.20, 1.3, -19},
-};
+}; /**< Supported values (gap-existence, extension, etc.) for BLOSUM90. */
static Int4 blosum90_prefs[BLOSUM90_VALUES_MAX] = {
BLAST_MATRIX_NOMINAL,
@@ -554,7 +339,7 @@ static Int4 blosum90_prefs[BLOSUM90_VALUES_MAX] = {
BLAST_MATRIX_NOMINAL
};
-#define PAM250_VALUES_MAX 16
+#define PAM250_VALUES_MAX 16 /**< Number of different combinations supported for PAM250. */
static double pam250_values[PAM250_VALUES_MAX][8] = {
{(double) INT2_MAX, (double) INT2_MAX, (double) INT2_MAX, 0.2252, 0.0868, 0.2223, 0.98, -5.0},
{15, 3, (double) INT2_MAX, 0.205, 0.049, 0.13, 1.6, -23},
@@ -572,7 +357,7 @@ static double pam250_values[PAM250_VALUES_MAX][8] = {
{19, 1, (double) INT2_MAX, 0.192, 0.029, 0.083, 2.3, -52},
{18, 1, (double) INT2_MAX, 0.183, 0.021, 0.070, 2.6, -60},
{17, 1, (double) INT2_MAX, 0.171, 0.014, 0.052, 3.3, -86},
-};
+}; /**< Supported values (gap-existence, extension, etc.) for PAM250. */
static Int4 pam250_prefs[PAM250_VALUES_MAX] = {
BLAST_MATRIX_NOMINAL,
@@ -593,7 +378,7 @@ BLAST_MATRIX_NOMINAL,
BLAST_MATRIX_NOMINAL
};
-#define PAM30_VALUES_MAX 7
+#define PAM30_VALUES_MAX 7 /**< Number of different combinations supported for PAM30. */
static double pam30_values[PAM30_VALUES_MAX][8] = {
{(double) INT2_MAX, (double) INT2_MAX, (double) INT2_MAX, 0.3400, 0.283, 1.754, 0.1938, -0.3},
{7, 2, (double) INT2_MAX, 0.305, 0.15, 0.87, 0.35, -3},
@@ -602,7 +387,7 @@ static double pam30_values[PAM30_VALUES_MAX][8] = {
{10, 1, (double) INT2_MAX, 0.309, 0.15, 0.88, 0.35, -3},
{9, 1, (double) INT2_MAX, 0.294, 0.11, 0.61, 0.48, -6},
{8, 1, (double) INT2_MAX, 0.270, 0.072, 0.40, 0.68, -10},
-};
+}; /**< Supported values (gap-existence, extension, etc.) for PAM30. */
static Int4 pam30_prefs[PAM30_VALUES_MAX] = {
BLAST_MATRIX_NOMINAL,
@@ -615,7 +400,7 @@ BLAST_MATRIX_NOMINAL,
};
-#define PAM70_VALUES_MAX 7
+#define PAM70_VALUES_MAX 7 /**< Number of different combinations supported for PAM70. */
static double pam70_values[PAM70_VALUES_MAX][8] = {
{(double) INT2_MAX, (double) INT2_MAX, (double) INT2_MAX, 0.3345, 0.229, 1.029, 0.3250, -0.7},
{8, 2, (double) INT2_MAX, 0.301, 0.12, 0.54, 0.56, -5},
@@ -624,7 +409,7 @@ static double pam70_values[PAM70_VALUES_MAX][8] = {
{11, 1, (double) INT2_MAX, 0.305, 0.12, 0.52, 0.59, -6},
{10, 1, (double) INT2_MAX, 0.291, 0.091, 0.41, 0.71, -9},
{9, 1, (double) INT2_MAX, 0.270, 0.060, 0.28, 0.97, -14},
-};
+}; /**< Supported values (gap-existence, extension, etc.) for PAM70. */
static Int4 pam70_prefs[PAM70_VALUES_MAX] = {
BLAST_MATRIX_NOMINAL,
@@ -638,7 +423,7 @@ BLAST_MATRIX_NOMINAL
-#define BLOSUM62_20_VALUES_MAX 65
+#define BLOSUM62_20_VALUES_MAX 65 /**< Number of different combinations supported for BLOSUM62 with 1/20 bit scaling. */
static double blosum62_20_values[BLOSUM62_20_VALUES_MAX][8] = {
{(double) INT2_MAX, (double) INT2_MAX, (double) INT2_MAX, 0.03391, 0.125, 0.4544, 0.07462, -3.2},
{100, 12, (double) INT2_MAX, 0.0300, 0.056, 0.21, 0.14, -15},
@@ -705,7 +490,7 @@ static double blosum62_20_values[BLOSUM62_20_VALUES_MAX][8] = {
{110,13,3, 0.0279, 0.034, 0.10, 0.27, -50},
{115,12,3, 0.0282, 0.035, 0.12, 0.24, -42},
{120,11,3, 0.0286, 0.037, 0.12, 0.24, -44},
-};
+}; /**< Supported values (gap-existence, extension, etc.) for BLOSUM62_20. */
static Int4 blosum62_20_prefs[BLOSUM62_20_VALUES_MAX] = {
BLAST_MATRIX_NOMINAL,
@@ -815,9 +600,10 @@ BlastScoreBlkNew(Uint1 alphabet, Int4 number_of_contexts)
}
sbp->matrix = sbp->matrix_struct->matrix;
sbp->maxscore = (Int4 *) calloc(BLAST_MATRIX_SIZE, sizeof(Int4));
+ sbp->scale_factor = 1.0;
sbp->number_of_contexts = number_of_contexts;
- sbp->sfp = (BLAST_ScoreFreq**)
- calloc(sbp->number_of_contexts, sizeof(BLAST_ScoreFreq*));
+ sbp->sfp = (Blast_ScoreFreq**)
+ calloc(sbp->number_of_contexts, sizeof(Blast_ScoreFreq*));
sbp->kbp_std = (Blast_KarlinBlk**)
calloc(sbp->number_of_contexts, sizeof(Blast_KarlinBlk*));
sbp->kbp_gap_std = (Blast_KarlinBlk**)
@@ -831,8 +617,8 @@ BlastScoreBlkNew(Uint1 alphabet, Int4 number_of_contexts)
return sbp;
}
-static BLAST_ScoreFreq*
-BlastScoreFreqDestruct(BLAST_ScoreFreq* sfp)
+Blast_ScoreFreq*
+Blast_ScoreFreqDestruct(Blast_ScoreFreq* sfp)
{
if (sfp == NULL)
return NULL;
@@ -855,8 +641,12 @@ Blast_KarlinBlkDestruct(Blast_KarlinBlk* kbp)
return kbp;
}
-static BLASTMatrixStructure*
-BlastMatrixDestruct(BLASTMatrixStructure* matrix_struct)
+/** Deallocates the SBLASTMatrix structure.
+ * @param matrix_struct the object to be deallocated [in]
+ * @return NULL;
+ */
+static SBLASTMatrixStructure*
+BlastMatrixDestruct(SBLASTMatrixStructure* matrix_struct)
{
@@ -878,7 +668,7 @@ BlastScoreBlkFree(BlastScoreBlk* sbp)
for (index=0; index<sbp->number_of_contexts; index++) {
if (sbp->sfp)
- sbp->sfp[index] = BlastScoreFreqDestruct(sbp->sfp[index]);
+ sbp->sfp[index] = Blast_ScoreFreqDestruct(sbp->sfp[index]);
if (sbp->kbp_std)
sbp->kbp_std[index] = Blast_KarlinBlkDestruct(sbp->kbp_std[index]);
if (sbp->kbp_gap_std)
@@ -967,29 +757,37 @@ BLAST_ScoreSetAmbigRes(BlastScoreBlk* sbp, char ambiguous_res)
return 0;
}
-/*
- Fill in the matrix for blastn using the penaly and rewards
-
- The query sequence alphabet is blastna, the subject sequence
- is ncbi2na. The alphabet blastna is defined in blastkar.h
- and the first four elements of blastna are identical to ncbi2na.
-
- The query is in the first index, the subject is the second.
- if matrix==NULL, it is allocated and returned.
+/** Fill in the matrix for blastn using the penaly and rewards
+ * The query sequence alphabet is blastna, the subject sequence
+ * is ncbi2na. The alphabet blastna is defined in blast_stat.h
+ * and the first four elements of blastna are identical to ncbi2na.
+ * if sbp->matrix==NULL, it is allocated.
+ * @param sbp the BlastScoreBlk on which reward, penalty, and matrix will be set [in|out]
+ * @return zero on success.
*/
-static Int4 **BlastScoreBlkMatCreateEx(Int4 **matrix,Int4 penalty,
- Int4 reward)
+static Int2 BlastScoreBlkMatCreate(BlastScoreBlk* sbp)
{
Int2 index1, index2, degen;
Int2 degeneracy[BLASTNA_SIZE+1];
+ Int4 reward; /* reward for match of bases. */
+ Int4 penalty; /* cost for mismatch of bases. */
+ Int4** matrix; /* matrix to be populated. */
+ const int k_number_non_ambig_bp = 4; /* How many of the first bases are ambiguous (four, of course). */
+
+ ASSERT(sbp);
+
+ reward = sbp->reward;
+ penalty = sbp->penalty;
+ matrix = sbp->matrix;
if(!matrix) {
- BLASTMatrixStructure* matrix_struct;
+ SBLASTMatrixStructure* matrix_struct;
matrix_struct =BlastMatrixAllocate((Int2) BLASTNA_SIZE);
- matrix = matrix_struct->matrix;
+ matrix = sbp->matrix = matrix_struct->matrix;
}
+
for (index1 = 0; index1<BLASTNA_SIZE; index1++) /* blastna */
for (index2 = 0; index2<BLASTNA_SIZE; index2++) /* blastna */
matrix[index1][index2] = 0;
@@ -998,13 +796,13 @@ static Int4 **BlastScoreBlkMatCreateEx(Int4 **matrix,Int4 penalty,
/* ncbi4na gives them the value 1, 2, 4, and 8. */
/* Set the first four bases to degen. one */
- for (index1=0; index1<NUMBER_NON_AMBIG_BP; index1++)
+ for (index1=0; index1<k_number_non_ambig_bp; index1++)
degeneracy[index1] = 1;
- for (index1=NUMBER_NON_AMBIG_BP; index1<BLASTNA_SIZE; index1++) /* blastna */
+ for (index1=k_number_non_ambig_bp; index1<BLASTNA_SIZE; index1++) /* blastna */
{
degen=0;
- for (index2=0; index2<NUMBER_NON_AMBIG_BP; index2++) /* ncbi2na */
+ for (index2=0; index2<k_number_non_ambig_bp; index2++) /* ncbi2na */
{
if (BLASTNA_TO_NCBI4NA[index1] & BLASTNA_TO_NCBI4NA[index2])
degen++;
@@ -1040,33 +838,16 @@ static Int4 **BlastScoreBlkMatCreateEx(Int4 **matrix,Int4 penalty,
for (index1=0; index1<BLASTNA_SIZE; index1++) /* blastna */
matrix[index1][BLASTNA_SIZE-1] = INT4_MIN / 2;
- return matrix;
-}
-/*
- Fill in the matrix for blastn using the penaly and rewards on
- the BlastScoreBlk*.
-
- The query sequence alphabet is blastna, the subject sequence
- is ncbi2na. The alphabet blastna is defined in blastkar.h
- and the first four elements of blastna are identical to ncbi2na.
-
- The query is in the first index, the subject is the second.
-*/
-static Int2 BlastScoreBlkMatCreate(BlastScoreBlk* sbp)
-{
- sbp->matrix = BlastScoreBlkMatCreateEx(sbp->matrix,sbp->penalty, sbp->reward);
sbp->mat_dim1 = BLASTNA_SIZE;
sbp->mat_dim2 = BLASTNA_SIZE;
return 0;
}
-/*
- Read in the matrix from the FILE *fp.
-
- This function ASSUMES that the matrices are in the ncbistdaa
- format. BLAST should be able to use any alphabet, though it
- is expected that ncbistdaa will be used.
+/** Read in the matrix from the FILE *fp.
+ * This function ASSUMES that the matrices are in the ncbistdaa
+ * @param sbp the BlastScoreBlk with the matrix to be populated [in|out]
+ * @return zero on success
*/
static Int2
@@ -1085,6 +866,8 @@ BlastScoreBlkMatRead(BlastScoreBlk* sbp, FILE *fp)
double xscore;
register int index1, index2;
Int2 status;
+ const char k_comment_chr = '#';
+ const char* k_token_str = " \t\n\r";
matrix = sbp->matrix;
@@ -1108,15 +891,15 @@ BlastScoreBlkMatRead(BlastScoreBlk* sbp, FILE *fp)
return 2;
}
- if (buf[0] == COMMENT_CHR) {
+ if (buf[0] == k_comment_chr) {
/* save the comment line in a linked list */
*strchr(buf, '\n') = NULLB;
ListNodeCopyStr(&sbp->comments, 0, buf+1);
continue;
}
- if ((cp = strchr(buf, COMMENT_CHR)) != NULL)
+ if ((cp = strchr(buf, k_comment_chr)) != NULL)
*cp = NULLB;
- lp = (char*)strtok(buf, TOKSTR);
+ lp = (char*)strtok(buf, k_token_str);
if (lp == NULL) /* skip blank lines */
continue;
while (lp != NULL) {
@@ -1128,7 +911,7 @@ BlastScoreBlkMatRead(BlastScoreBlk* sbp, FILE *fp)
ch = *lp;
}
a2chars[a2cnt++] = ch;
- lp = (char*)strtok(NULL, TOKSTR);
+ lp = (char*)strtok(NULL, k_token_str);
}
break; /* Exit loop after reading one line. */
@@ -1139,20 +922,20 @@ BlastScoreBlkMatRead(BlastScoreBlk* sbp, FILE *fp)
}
if (sbp->alphabet_code != BLASTNA_SEQ_CODE) {
- sbp->mat_dim2 = a2cnt;
+ sbp->mat_dim2 = sbp->alphabet_size;
}
while (fgets(buf, sizeof(buf), fp) != NULL) {
++lineno;
if ((cp = strchr(buf, '\n')) == NULL) {
return 2;
}
- if ((cp = strchr(buf, COMMENT_CHR)) != NULL)
+ if ((cp = strchr(buf, k_comment_chr)) != NULL)
*cp = NULLB;
- if ((lp = (char*)strtok(buf, TOKSTR)) == NULL)
+ if ((lp = (char*)strtok(buf, k_token_str)) == NULL)
continue;
ch = *lp;
cp = (char*) lp;
- if ((cp = strtok(NULL, TOKSTR)) == NULL) {
+ if ((cp = strtok(NULL, k_token_str)) == NULL) {
return 2;
}
if (a1cnt >= DIM(a1chars)) {
@@ -1176,13 +959,13 @@ BlastScoreBlkMatRead(BlastScoreBlk* sbp, FILE *fp)
strcpy(temp, cp);
if (strcasecmp(temp, "na") == 0) {
- score = BLAST_SCORE_1MIN;
+ score = BLAST_SCORE_MIN;
} else {
if (sscanf(temp, "%lg", &xscore) != 1) {
return 2;
}
/*xscore = MAX(xscore, BLAST_SCORE_1MIN);*/
- if (xscore > BLAST_SCORE_1MAX || xscore < BLAST_SCORE_1MIN) {
+ if (xscore > BLAST_SCORE_MAX || xscore < BLAST_SCORE_MIN) {
return 2;
}
xscore += (xscore >= 0. ? 0.5 : -0.5);
@@ -1191,7 +974,7 @@ BlastScoreBlkMatRead(BlastScoreBlk* sbp, FILE *fp)
m[(int)a2chars[index2++]] = score;
- cp = strtok(NULL, TOKSTR);
+ cp = strtok(NULL, k_token_str);
}
}
@@ -1200,7 +983,7 @@ BlastScoreBlkMatRead(BlastScoreBlk* sbp, FILE *fp)
}
if (sbp->alphabet_code != BLASTNA_SEQ_CODE) {
- sbp->mat_dim1 = a1cnt;
+ sbp->mat_dim1 = sbp->alphabet_size;
}
return 0;
@@ -1213,8 +996,8 @@ BlastScoreBlkMaxScoreSet(BlastScoreBlk* sbp)
Int4 ** matrix;
Int2 index1, index2;
- sbp->loscore = BLAST_SCORE_1MAX;
- sbp->hiscore = BLAST_SCORE_1MIN;
+ sbp->loscore = BLAST_SCORE_MAX;
+ sbp->hiscore = BLAST_SCORE_MIN;
matrix = sbp->matrix;
for (index1=0; index1<sbp->alphabet_size; index1++)
{
@@ -1238,10 +1021,10 @@ BlastScoreBlkMaxScoreSet(BlastScoreBlk* sbp)
/* If the lo/hi-scores are BLAST_SCORE_MIN/BLAST_SCORE_MAX, (i.e., for
gaps), then use other scores. */
- if (sbp->loscore < BLAST_SCORE_1MIN)
- sbp->loscore = BLAST_SCORE_1MIN;
- if (sbp->hiscore > BLAST_SCORE_1MAX)
- sbp->hiscore = BLAST_SCORE_1MAX;
+ if (sbp->loscore < BLAST_SCORE_MIN)
+ sbp->loscore = BLAST_SCORE_MIN;
+ if (sbp->hiscore > BLAST_SCORE_MAX)
+ sbp->hiscore = BLAST_SCORE_MAX;
return 0;
}
@@ -1302,7 +1085,6 @@ BLAST_ScoreBlkMatFill(BlastScoreBlk* sbp, char* matrix_path)
Int2 status = 0;
if (sbp->read_in_matrix) {
-
if (matrix_path && *matrix_path != NULLB) {
FILE *fp = NULL;
@@ -1391,9 +1173,11 @@ Blast_ResFreqNew(const BlastScoreBlk* sbp)
return rfp;
}
+/** Records probability of letter appearing in sequence.
+*/
typedef struct BLAST_LetterProb {
- char ch;
- double p;
+ char ch; /**< residue */
+ double p; /**< probability of residue. */
} BLAST_LetterProb;
#if 0
@@ -1513,17 +1297,8 @@ Blast_ResFreqNormalize(const BlastScoreBlk* sbp, Blast_ResFreq* rfp, double norm
return 0;
}
-/*
- Fills a buffer with the 'standard' alphabet (given by
- STD_AMINO_ACID_FREQS[index].ch).
-
- Return value is the number of residues in alphabet.
- Negative returns upon error.
-*/
-
-static Int2
-BlastGetStdAlphabet (Uint1 alphabet_code, Uint1* residues, Uint4 residues_size)
-
+Int2
+Blast_GetStdAlphabet(Uint1 alphabet_code, Uint1* residues, Uint4 residues_size)
{
Int2 index;
@@ -1556,7 +1331,7 @@ Blast_ResFreqStdComp(const BlastScoreBlk* sbp, Blast_ResFreq* rfp)
if (sbp->protein_alphabet == TRUE)
{
residues = (Uint1*) calloc(DIM(STD_AMINO_ACID_FREQS), sizeof(Uint1));
- retval = BlastGetStdAlphabet(sbp->alphabet_code, residues, DIM(STD_AMINO_ACID_FREQS));
+ retval = Blast_GetStdAlphabet(sbp->alphabet_code, residues, DIM(STD_AMINO_ACID_FREQS));
if (retval < 1)
return retval;
@@ -1580,8 +1355,18 @@ Blast_ResFreqStdComp(const BlastScoreBlk* sbp, Blast_ResFreq* rfp)
return 0;
}
-static BLAST_ResComp*
-BlastResCompDestruct(BLAST_ResComp* rcp)
+/**
+Intermediate structure to store the composition of a sequence
+*/
+
+typedef struct Blast_ResComp {
+ Uint1 alphabet_code; /**< indicates alphabet. */
+ Int4* comp; /**< store composition of a string. */
+ Int4* comp0; /**< Same array as above, starts at zero. */
+} Blast_ResComp;
+
+static Blast_ResComp*
+BlastResCompDestruct(Blast_ResComp* rcp)
{
if (rcp == NULL)
return NULL;
@@ -1594,15 +1379,15 @@ BlastResCompDestruct(BLAST_ResComp* rcp)
}
/*
- Allocated the BLAST_ResComp* for a given alphabet. Only the
+ Allocated the Blast_ResComp* for a given alphabet. Only the
alphabets ncbistdaa and ncbi4na should be used by BLAST.
*/
-static BLAST_ResComp*
+static Blast_ResComp*
BlastResCompNew(BlastScoreBlk* sbp)
{
- BLAST_ResComp* rcp;
+ Blast_ResComp* rcp;
- rcp = (BLAST_ResComp*) calloc(1, sizeof(BLAST_ResComp));
+ rcp = (Blast_ResComp*) calloc(1, sizeof(Blast_ResComp));
if (rcp == NULL)
return NULL;
@@ -1626,7 +1411,7 @@ array is allocated. */
Store the composition of a (query) string.
*/
static Int2
-BlastResCompStr(BlastScoreBlk* sbp, BLAST_ResComp* rcp, char* str, Int4 length)
+BlastResCompStr(BlastScoreBlk* sbp, Blast_ResComp* rcp, char* str, Int4 length)
{
char* lp,* lpmax;
Int2 index;
@@ -1676,7 +1461,8 @@ Blast_ResFreqClr(const BlastScoreBlk* sbp, Blast_ResFreq* rfp)
Calculate the residue frequencies associated with the provided ResComp
*/
static Int2
-Blast_ResFreqResComp(BlastScoreBlk* sbp, Blast_ResFreq* rfp, BLAST_ResComp* rcp)
+Blast_ResFreqResComp(BlastScoreBlk* sbp, Blast_ResFreq* rfp,
+ Blast_ResComp* rcp)
{
Int2 alphabet_max, index;
double sum = 0.;
@@ -1705,7 +1491,7 @@ Blast_ResFreqResComp(BlastScoreBlk* sbp, Blast_ResFreq* rfp, BLAST_ResComp* rcp)
static Int2
Blast_ResFreqString(BlastScoreBlk* sbp, Blast_ResFreq* rfp, char* string, Int4 length)
{
- BLAST_ResComp* rcp;
+ Blast_ResComp* rcp;
rcp = BlastResCompNew(sbp);
@@ -1722,7 +1508,7 @@ static Int2
BlastScoreChk(Int4 lo, Int4 hi)
{
if (lo >= 0 || hi <= 0 ||
- lo < BLAST_SCORE_1MIN || hi > BLAST_SCORE_1MAX)
+ lo < BLAST_SCORE_MIN || hi > BLAST_SCORE_MAX)
return 1;
if (hi - lo > BLAST_SCORE_RANGE_MAX)
@@ -1731,16 +1517,16 @@ BlastScoreChk(Int4 lo, Int4 hi)
return 0;
}
-static BLAST_ScoreFreq*
-BlastScoreFreqNew(Int4 score_min, Int4 score_max)
+Blast_ScoreFreq*
+Blast_ScoreFreqNew(Int4 score_min, Int4 score_max)
{
- BLAST_ScoreFreq* sfp;
+ Blast_ScoreFreq* sfp;
Int4 range;
if (BlastScoreChk(score_min, score_max) != 0)
return NULL;
- sfp = (BLAST_ScoreFreq*) calloc(1, sizeof(BLAST_ScoreFreq));
+ sfp = (Blast_ScoreFreq*) calloc(1, sizeof(Blast_ScoreFreq));
if (sfp == NULL)
return NULL;
@@ -1748,12 +1534,12 @@ BlastScoreFreqNew(Int4 score_min, Int4 score_max)
sfp->sprob = (double*) calloc(range, sizeof(double));
if (sfp->sprob == NULL)
{
- BlastScoreFreqDestruct(sfp);
+ Blast_ScoreFreqDestruct(sfp);
return NULL;
}
sfp->sprob0 = sfp->sprob;
- sfp->sprob -= score_min;
+ sfp->sprob -= score_min; /* center around 0 */
sfp->score_min = score_min;
sfp->score_max = score_max;
sfp->obs_min = sfp->obs_max = 0;
@@ -1761,8 +1547,16 @@ BlastScoreFreqNew(Int4 score_min, Int4 score_max)
return sfp;
}
+/** Calculates the score frequencies.
+ *
+ * @param sbp object with scoring information [in]
+ * @param sfp object to hold frequency information [in|out]
+ * @param rfp1 letter frequencies for first sequence (query) [in]
+ * @param rfp2 letter frequencies for second sequence (database) [in]
+ * @return zero on success
+ */
static Int2
-BlastScoreFreqCalc(BlastScoreBlk* sbp, BLAST_ScoreFreq* sfp, Blast_ResFreq* rfp1, Blast_ResFreq* rfp2)
+BlastScoreFreqCalc(BlastScoreBlk* sbp, Blast_ScoreFreq* sfp, Blast_ResFreq* rfp1, Blast_ResFreq* rfp2)
{
Int4 ** matrix;
Int4 score, obs_min, obs_max;
@@ -1824,14 +1618,11 @@ BlastScoreFreqCalc(BlastScoreBlk* sbp, BLAST_ScoreFreq* sfp, Blast_ResFreq* rfp1
}
-#define DIMOFP0 (iterlimit*range + 1)
-#define DIMOFP0_MAX (BLAST_KARLIN_K_ITER_MAX*BLAST_SCORE_RANGE_MAX+1)
-
#define SMALL_LAMBDA_THRESHOLD 20 /*defines special case in K computation*/
/*threshold is on exp(-Lambda)*/
-/*The following procedure computes K. The input includes Lambda, H,
+/** The following procedure computes K. The input includes Lambda, H,
* and an array of probabilities for each score.
* There are distinct closed form for three cases:
* 1. high score is 1 low score is -1
@@ -1856,10 +1647,15 @@ BlastScoreFreqCalc(BlastScoreBlk* sbp, BLAST_ScoreFreq* sfp, Blast_ResFreq* rfp1
* and tried to replace the tail of the computation of outerSum
* by a geometric series, but the base of the geometric series
* was not accurately estimated in some cases.
+ *
+ * @param sfp object holding scoring frequency information [in]
+ * @param lambda a Karlin-Altschul parameter [in]
+ * @param H a Karlin-Altschul parameter [in]
+ * @return K, another Karlin-Altschul parameter
*/
static double
-BlastKarlinLHtoK(BLAST_ScoreFreq* sfp, double lambda, double H)
+BlastKarlinLHtoK(Blast_ScoreFreq* sfp, double lambda, double H)
{
/*The next array stores the probabilities of getting each possible
score in an alignment of fixed length; the array is shifted
@@ -1918,7 +1714,7 @@ BlastKarlinLHtoK(BLAST_ScoreFreq* sfp, double lambda, double H)
/* Look for the greatest common divisor ("delta" in Appendix of PNAS 87 of
Karlin&Altschul (1990) */
for (i = 1, divisor = -low; i <= range && divisor > 1; ++i) {
- if (probArrayStartLow[i])
+ if (probArrayStartLow[i] != 0.0)
divisor = BLAST_Gcd(divisor, i);
}
@@ -1951,11 +1747,8 @@ BlastKarlinLHtoK(BLAST_ScoreFreq* sfp, double lambda, double H)
sumlimit = BLAST_KARLIN_K_SUMLIMIT_DEFAULT;
iterlimit = BLAST_KARLIN_K_ITER_MAX;
- if (DIMOFP0 > DIMOFP0_MAX) {
- return -1.;
- }
alignmentScoreProbabilities =
- (double *)calloc(DIMOFP0, sizeof(*alignmentScoreProbabilities));
+ (double *)calloc((iterlimit*range + 1), sizeof(*alignmentScoreProbabilities));
if (alignmentScoreProbabilities == NULL)
return -1.;
@@ -2157,7 +1950,7 @@ NlmKarlinLambdaNR( double* probs, Int4 d, Int4 low, Int4 high, double lambda0, d
double
-Blast_KarlinLambdaNR(BLAST_ScoreFreq* sfp, double initialLambdaGuess)
+Blast_KarlinLambdaNR(Blast_ScoreFreq* sfp, double initialLambdaGuess)
{
Int4 low; /* Lowest score (must be negative) */
Int4 high; /* Highest score (must be positive) */
@@ -2176,7 +1969,7 @@ Blast_KarlinLambdaNR(BLAST_ScoreFreq* sfp, double initialLambdaGuess)
sprob = sfp->sprob;
/* Find greatest common divisor of all scores */
for (i = 1, d = -low; i <= high-low && d > 1; ++i) {
- if (sprob[i+low] != 0) {
+ if (sprob[i+low] != 0.0) {
d = BLAST_Gcd(d, i);
}
}
@@ -2190,13 +1983,14 @@ Blast_KarlinLambdaNR(BLAST_ScoreFreq* sfp, double initialLambdaGuess)
return returnValue;
}
-/*
- BlastKarlinLtoH
-
- Calculate H, the relative entropy of the p's and q's
-*/
+/** Calculate H, the relative entropy of the p's and q's
+ *
+ * @param sfp object containing scoring frequency information [in]
+ * @param lambda a Karlin-Altschul parameter [in]
+ * @return H, a Karlin-Altschul parameter
+ */
static double
-BlastKarlinLtoH(BLAST_ScoreFreq* sfp, double lambda)
+BlastKarlinLtoH(Blast_ScoreFreq* sfp, double lambda)
{
Int4 score;
double H, etonlam, sum, scale;
@@ -2216,7 +2010,7 @@ BlastKarlinLtoH(BLAST_ScoreFreq* sfp, double lambda)
}
scale = BLAST_Powi( etonlam, high );
- if( scale > 0 ) {
+ if( scale > 0.0 ) {
H = lambda * sum/scale;
} else { /* Underflow of exp( -lambda * high ) */
H = lambda * exp( lambda * high + log(sum) );
@@ -2287,8 +2081,8 @@ See: Karlin, S. & Altschul, S.F. "Methods for Assessing the Statistical
as the previous formula.
*******************************************************************************/
-static Int2
-BlastKarlinBlkCalc(Blast_KarlinBlk* kbp, BLAST_ScoreFreq* sfp)
+Int2
+Blast_KarlinBlkCalc(Blast_KarlinBlk* kbp, Blast_ScoreFreq* sfp)
{
@@ -2321,11 +2115,7 @@ BlastKarlinBlkCalc(Blast_KarlinBlk* kbp, BLAST_ScoreFreq* sfp)
ErrExit:
kbp->Lambda = kbp->H = kbp->K = -1.;
-#ifdef BLASTKAR_HUGE_VAL
kbp->logK = BLASTKAR_HUGE_VAL;
-#else
- kbp->logK = 1.e30;
-#endif
return 1;
}
@@ -2347,10 +2137,10 @@ BLAST_ScoreBlkFill(BlastScoreBlk* sbp, char* query, Int4 query_length, Int4 cont
stdrfp = Blast_ResFreqNew(sbp);
Blast_ResFreqStdComp(sbp, stdrfp);
Blast_ResFreqString(sbp, rfp, query, query_length);
- sbp->sfp[context_number] = BlastScoreFreqNew(sbp->loscore, sbp->hiscore);
+ sbp->sfp[context_number] = Blast_ScoreFreqNew(sbp->loscore, sbp->hiscore);
BlastScoreFreqCalc(sbp, sbp->sfp[context_number], rfp, stdrfp);
sbp->kbp_std[context_number] = Blast_KarlinBlkCreate();
- retval = BlastKarlinBlkCalc(sbp->kbp_std[context_number], sbp->sfp[context_number]);
+ retval = Blast_KarlinBlkCalc(sbp->kbp_std[context_number], sbp->sfp[context_number]);
if (retval)
{
rfp = Blast_ResFreqDestruct(rfp);
@@ -2358,7 +2148,7 @@ BLAST_ScoreBlkFill(BlastScoreBlk* sbp, char* query, Int4 query_length, Int4 cont
return retval;
}
sbp->kbp_psi[context_number] = Blast_KarlinBlkCreate();
- retval = BlastKarlinBlkCalc(sbp->kbp_psi[context_number], sbp->sfp[context_number]);
+ retval = Blast_KarlinBlkCalc(sbp->kbp_psi[context_number], sbp->sfp[context_number]);
rfp = Blast_ResFreqDestruct(rfp);
stdrfp = Blast_ResFreqDestruct(stdrfp);
@@ -2377,17 +2167,17 @@ Blast_KarlinBlkIdealCalc(BlastScoreBlk* sbp)
{
Blast_KarlinBlk* kbp_ideal;
Blast_ResFreq* stdrfp;
- BLAST_ScoreFreq* sfp;
+ Blast_ScoreFreq* sfp;
stdrfp = Blast_ResFreqNew(sbp);
Blast_ResFreqStdComp(sbp, stdrfp);
- sfp = BlastScoreFreqNew(sbp->loscore, sbp->hiscore);
+ sfp = Blast_ScoreFreqNew(sbp->loscore, sbp->hiscore);
BlastScoreFreqCalc(sbp, sfp, stdrfp, stdrfp);
kbp_ideal = Blast_KarlinBlkCreate();
- BlastKarlinBlkCalc(kbp_ideal, sfp);
+ Blast_KarlinBlkCalc(kbp_ideal, sfp);
stdrfp = Blast_ResFreqDestruct(stdrfp);
- sfp = BlastScoreFreqDestruct(sfp);
+ sfp = Blast_ScoreFreqDestruct(sfp);
return kbp_ideal;
}
@@ -2434,17 +2224,17 @@ Blast_KarlinBlkCreate(void)
return kbp;
}
-static BLASTMatrixStructure*
+static SBLASTMatrixStructure*
BlastMatrixAllocate(Int2 alphabet_size)
{
- BLASTMatrixStructure* matrix_struct;
+ SBLASTMatrixStructure* matrix_struct;
Int2 index;
if (alphabet_size <= 0 || alphabet_size >= BLAST_MATRIX_SIZE)
return NULL;
- matrix_struct = (BLASTMatrixStructure*) calloc(1, sizeof(BLASTMatrixStructure));
+ matrix_struct = (SBLASTMatrixStructure*) calloc(1, sizeof(SBLASTMatrixStructure));
if (matrix_struct == NULL)
return NULL;
@@ -2457,9 +2247,10 @@ BlastMatrixAllocate(Int2 alphabet_size)
return matrix_struct;
}
-/*
- Deallocates MatrixInfo*
-*/
+/** Deallocates MatrixInfo as well as name string.
+ * @param matrix_info the object to be deallocated [in]
+ * @return NULL pointer
+ */
static MatrixInfo*
MatrixInfoDestruct(MatrixInfo* matrix_info)
@@ -2473,12 +2264,16 @@ MatrixInfoDestruct(MatrixInfo* matrix_info)
return NULL;
}
-/*
- Makes New MatrixInfo*
-*/
+/** Allocates New MatrixInfo*
+ * @param name name of matrix [in]
+ * @param array_of_8 contains information on a matrix [in]
+ * @param prefs contains information on a which values are preferred [in]
+ * @param max_number size of those arrays [in]
+ * @return pointer to the allocated MatrixInfo
+ */
static MatrixInfo*
-MatrixInfoNew(char* name, array_of_8 *values, Int4* prefs, Int4 max_number)
+MatrixInfoNew(const char* name, array_of_8 *values, Int4* prefs, Int4 max_number)
{
MatrixInfo* matrix_info;
@@ -2492,6 +2287,10 @@ MatrixInfoNew(char* name, array_of_8 *values, Int4* prefs, Int4 max_number)
return matrix_info;
}
+/** Free linked list of MatrixValues and all associated data
+ * @param vnp linked list of MatrixValues [in]
+ * @return NULL pointer
+ */
static ListNode*
BlastMatrixValuesDestruct(ListNode* vnp)
@@ -2510,12 +2309,10 @@ BlastMatrixValuesDestruct(ListNode* vnp)
return head;
}
-/*
- ListNode* BlastLoadMatrixValues (void)
-
- Loads all the matrix values, returns a ListNode* chain that contains
- MatrixInfo*'s.
-
+/** Loads all the matrix values, returns a ListNode* chain that contains
+ * MatrixInfo*'s.
+ * @return list of MatrixInfos.
+ *
*/
static ListNode*
BlastLoadMatrixValues (void)
@@ -2951,7 +2748,7 @@ below.
static Int4
BlastKarlinEtoS_simple(double E, /* Expect value */
Blast_KarlinBlk* kbp,
- double searchsp) /* size of search space */
+ Int8 searchsp) /* size of search space */
{
double Lambda, K, H; /* parameters for Karlin statistics */
@@ -2994,13 +2791,12 @@ BLAST_GapDecayDivisor(double decayrate, unsigned nsegs )
/*
BlastCutoffs
Calculate the cutoff score, S, and the highest expected score.
- WRG (later modified by TLM).
*/
Int2
BLAST_Cutoffs(Int4 *S, /* cutoff score */
double* E, /* expected no. of HSPs scoring at or above S */
Blast_KarlinBlk* kbp,
- double searchsp, /* size of search space. */
+ Int8 searchsp, /* size of search space. */
Boolean dodecay, /* TRUE ==> use gapdecay feature */
double gap_decay_rate)
{
@@ -3066,7 +2862,7 @@ BLAST_Cutoffs(Int4 *S, /* cutoff score */
double
BLAST_KarlinStoE_simple(Int4 S,
Blast_KarlinBlk* kbp,
- double searchsp) /* size of search space. */
+ Int8 searchsp) /* size of search space. */
{
double Lambda, K, H; /* parameters for Karlin statistics */
@@ -3077,7 +2873,7 @@ BLAST_KarlinStoE_simple(Int4 S,
return -1.;
}
- return searchsp * exp((double)(-Lambda * S) + kbp->logK);
+ return (double) searchsp * exp((double)(-Lambda * S) + kbp->logK);
}
/*
@@ -3164,18 +2960,21 @@ BlastSumP(Int4 r, double s)
return BlastSumPCalc(r, s);
}
-/*
- BlastSumPCalc
-
- Evaluate the following double integral, where r = number of segments
- and s = the adjusted score in nats:
-
- (r-2) oo oo
- Prob(r,s) = r - - (r-2)
- ------------- | exp(-y) | x exp(-exp(x - y/r)) dx dy
- (r-1)! (r-2)! U U
- s 0
-*/
+/**
+ *
+ * Evaluate the following double integral, where r = number of segments
+ *
+ * and s = the adjusted score in nats:
+ *
+ * (r-2) oo oo
+ * Prob(r,s) = r - - (r-2)
+ * ------------- | exp(-y) | x exp(-exp(x - y/r)) dx dy
+ * (r-1)! (r-2)! U U
+ * s 0
+ * @param r number of segments
+ * @param s adjusted score in nats
+ * @return P value
+ */
static double
BlastSumPCalc(int r, double s)
{
@@ -3260,10 +3059,8 @@ BlastSumPCalc(int r, double s)
do {
d = BLAST_RombergIntegrate(g, args, s, t, epsilon, 0, itmin);
-#ifdef BLASTKAR_HUGE_VAL
if (d == BLASTKAR_HUGE_VAL)
return d;
-#endif
} while (s < mean && d < 0.4 && itmin++ < 4);
return (d < 1. ? d : 1.);
@@ -3288,10 +3085,8 @@ f(double x, void* vp)
register double y;
y = exp(x - ARG_SDIVR);
-#ifdef BLASTKAR_HUGE_VAL
if (y == BLASTKAR_HUGE_VAL)
return 0.;
-#endif
if (ARG_R2 == 0.)
return exp(ARG_ADJ2 - y);
if (x == 0.)
@@ -3437,6 +3232,40 @@ BLAST_LargeGapSumE(
return sum_e;
}
+/** Given a sequence of 'length' amino acid residues, compute the
+ * probability of each residue and put that in the array resProb
+ *
+ * @param sequence the sequence to be computed upon [in]
+ * @param length the length of the sequence [in]
+ * @param resProb the object to be filled in [in|out]
+ */
+
+void
+Blast_FillResidueProbability(const Uint1* sequence, Int4 length, double * resProb)
+{
+ Int4 frequency[PSI_ALPHABET_SIZE]; /*frequency of each letter*/
+ Int4 i; /*index*/
+ Int4 denominator; /*length not including X's*/
+
+ denominator = length;
+ for(i = 0; i < PSI_ALPHABET_SIZE; i++)
+ frequency[i] = 0;
+
+ for(i = 0; i < length; i++) {
+ if (sequence[i] != AMINOACID_TO_NCBISTDAA['X'])
+ frequency[sequence[i]]++;
+ else
+ denominator--;
+ }
+
+ for(i = 0; i < PSI_ALPHABET_SIZE; i++) {
+ if (frequency[i] == 0)
+ resProb[i] = 0.0;
+ else
+ resProb[i] = ((double) frequency[i]) /((double) denominator);
+ }
+}
+
/*------------------- RPS BLAST functions --------------------*/
static double
@@ -3476,12 +3305,13 @@ RPSfindUngappedLambda(Char *matrixName)
for indexing convenience the field storing scoreArray points to the
entry for score 0, so that referring to the -k index corresponds to
score -k
+ FIXME: This can be replaced by _PSIComputeScoreProbabilities??
*/
static void
RPSFillScores(Int4 **matrix, Int4 matrixLength,
double *queryProbArray, double *scoreArray,
- BLAST_ScoreFreq* return_sfp, Int4 range)
+ Blast_ScoreFreq* return_sfp, Int4 range)
{
Int4 minScore, maxScore; /*observed minimum and maximum scores */
Int4 i,j; /* indices */
@@ -3523,35 +3353,6 @@ RPSFillScores(Int4 **matrix, Int4 matrixLength,
return_sfp->score_avg += i * return_sfp->sprob[i];
}
-/* Given a sequence of 'length' amino acid residues, compute the
- probability of each residue and put that in the array resProb*/
-
-static void
-RPSFillResidueProbability(Uint1 * sequence, Int4 length, double * resProb)
-{
- Int4 frequency[PSI_ALPHABET_SIZE]; /*frequency of each letter*/
- Int4 i; /*index*/
- Int4 denominator; /*length not including X's*/
-
- denominator = length;
- for(i = 0; i < PSI_ALPHABET_SIZE; i++)
- frequency[i] = 0;
-
- for(i = 0; i < length; i++) {
- if (sequence[i] != AMINOACID_TO_NCBISTDAA['X'])
- frequency[sequence[i]]++;
- else
- denominator--;
- }
-
- for(i = 0; i < PSI_ALPHABET_SIZE; i++) {
- if (frequency[i] == 0)
- resProb[i] = 0.0;
- else
- resProb[i] = ((double) frequency[i]) /((double) denominator);
- }
-}
-
/* Calculate a new PSSM, using composition-based statistics, for use
with RPS BLAST. This function produces a PSSM for a single RPS DB
sequence (of size db_seq_length) and incorporates information from
@@ -3567,7 +3368,7 @@ RPSCalculatePSSM(double scalingFactor, Int4 rps_query_length,
{
double *scoreArray; /*array of score probabilities*/
double *resProb; /*array of probabilities for each residue*/
- BLAST_ScoreFreq * return_sfp;/*score frequency pointers to compute lambda*/
+ Blast_ScoreFreq * return_sfp;/*score frequency pointers to compute lambda*/
Int4* * returnMatrix; /*the PSSM to return */
double initialUngappedLambda;
double scaledInitialUngappedLambda;
@@ -3578,9 +3379,9 @@ RPSCalculatePSSM(double scalingFactor, Int4 rps_query_length,
resProb = (double *)malloc(PSI_ALPHABET_SIZE * sizeof(double));
scoreArray = (double *)malloc(BLAST_SCORE_RANGE_MAX * sizeof(double));
- return_sfp = (BLAST_ScoreFreq *)malloc(sizeof(BLAST_ScoreFreq));
+ return_sfp = (Blast_ScoreFreq *)malloc(sizeof(Blast_ScoreFreq));
- RPSFillResidueProbability(rps_query_seq, rps_query_length, resProb);
+ Blast_FillResidueProbability(rps_query_seq, rps_query_length, resProb);
RPSFillScores(posMatrix, db_seq_length, resProb, scoreArray,
return_sfp, BLAST_SCORE_RANGE_MAX);
@@ -3669,7 +3470,9 @@ BLAST_ComputeLengthAdjustment(double K,
{
Int4 i; /* iteration index */
const Int4 maxits = 20; /* maximum allowed iterations */
- double m = query_length, n = db_length, N = db_num_seqs;
+ double m = (double) query_length;
+ double n = (double) db_length;
+ double N = (double) db_num_seqs;
double ell; /* A float value of the length adjustment */
double ss; /* effective size of the search space */
@@ -3742,3 +3545,293 @@ BLAST_ComputeLengthAdjustment(double K,
return converged ? 0 : 1;
}
+
+/*
+ * ===========================================================================
+ *
+ * $Log: blast_stat.c,v $
+ * Revision 1.82 2004/06/10 13:21:24 madden
+ * Rename RPSFillResidueProbability to Blast_FillResidueProbability, made public.
+ * Removed usage of BLAST_SCORE_1MIN/MAX, simply use BLAST_SCORE_MIN/MAX instead
+ * Removed useless defines DIMOFP0 and DIMOFP0_MAX
+ * Moved over some defines from blast_stat.h
+ *
+ * Revision 1.81 2004/06/08 15:05:05 madden
+ * Doxygen fixes
+ *
+ * Revision 1.80 2004/06/07 20:03:34 coulouri
+ * use floating point constants for comparisons with floating point variables
+ *
+ * Revision 1.79 2004/06/07 14:44:01 madden
+ * Doxygen fixes
+ *
+ * Revision 1.78 2004/06/07 14:20:41 dondosha
+ * Set matrix dimensions to 26 when matrix is read from a file, to make it the same as when matrix is loaded from a library
+ *
+ * Revision 1.77 2004/05/24 15:09:40 camacho
+ * Fixed conflict
+ *
+ * Revision 1.76 2004/05/24 13:26:27 madden
+ * Fix PC compiler warnings
+ *
+ * Revision 1.75 2004/05/20 16:29:30 madden
+ * Make searchsp an Int8 consistent with rest of blast
+ *
+ * Revision 1.74 2004/05/19 15:34:38 dondosha
+ * Moved Blast_ResComp definition from header file
+ *
+ * Revision 1.73 2004/05/19 14:52:03 camacho
+ * 1. Added doxygen tags to enable doxygen processing of algo/blast/core
+ * 2. Standardized copyright, CVS $Id string, $Log and rcsid formatting and i
+ * location
+ * 3. Added use of @todo doxygen keyword
+ *
+ * Revision 1.72 2004/05/17 10:37:38 camacho
+ * Rename BLAST_ScoreFreq, BLASTMatrixStructure and BLAST_ResComp to avoid conflicts with C toolkit
+ *
+ * Revision 1.71 2004/05/07 15:23:47 papadopo
+ * add initialization of scale factor to ScoreBlkNew
+ *
+ * Revision 1.70 2004/05/06 15:59:29 camacho
+ * Made Blast_KarlinBlkCalc non-static
+ *
+ * Revision 1.69 2004/05/06 15:05:13 camacho
+ * Fix to previous commit
+ *
+ * Revision 1.68 2004/05/06 14:44:27 camacho
+ * Made Blast_ScoreFreqDestruct non-static
+ *
+ * Revision 1.67 2004/05/05 21:16:24 camacho
+ * Make Blast_GetStdAlphabet and Blast_ScoreFreqNew non-static
+ *
+ * Revision 1.66 2004/05/04 13:00:02 madden
+ * Change BlastKarlinBlkStandardCalcEx to more descriptive Blast_KarlinBlkIdealCalc, make public
+ *
+ * Revision 1.65 2004/04/30 14:39:44 papadopo
+ * 1. Remove unneeded #defines
+ * 2. use BLAST_SCORE_RANGE_MAX during RPS PSSM creation instead of
+ * (possibly incompatible) RPS_SCORE_MAX
+ * 3. return NULL instead of FALSE on an error
+ *
+ * Revision 1.64 2004/04/30 12:58:49 camacho
+ * Replace RPSKarlinLambdaNR by Blast_KarlinLambdaNR
+ *
+ * Revision 1.63 2004/04/29 20:32:38 papadopo
+ * remove RPS_SCORE_MIN, since it turned out to be a workaround for a bug that has since been fixed
+ *
+ * Revision 1.62 2004/04/29 19:58:03 camacho
+ * Use generic matrix allocator/deallocator from blast_psi_priv.h
+ *
+ * Revision 1.61 2004/04/28 14:40:23 madden
+ * Changes from Mike Gertz:
+ * - I created the new routine BLAST_GapDecayDivisor that computes a
+ * divisor used to weight the evalue of a collection of distinct
+ * alignments.
+ * - I removed BLAST_GapDecay and BLAST_GapDecayInverse which had become
+ * redundant.
+ * - I modified the BLAST_Cutoffs routine so that it uses the value
+ * returned by BLAST_GapDecayDivisor to weight evalues.
+ * - I modified BLAST_SmallGapSumE, BLAST_LargeGapSumE and
+ * BLAST_UnevenGapSumE no longer refer to the gap_prob parameter.
+ * Replaced the gap_decay_rate parameter of each of these routines with
+ * a weight_divisor parameter. Added documentation.
+ *
+ * Revision 1.60 2004/04/23 19:06:33 camacho
+ * Do NOT use lowercase names for #defines
+ *
+ * Revision 1.59 2004/04/23 13:49:20 madden
+ * Cleaned up ifndef in BlastKarlinLHtoK
+ *
+ * Revision 1.58 2004/04/23 13:21:25 madden
+ * Rewrote BlastKarlinLHtoK to do the following and more:
+ * 1. fix a bug whereby the wrong formula was used when high score == 1
+ * and low score == -1;
+ * 2. fix a methodological error of truncating the first sum
+ * and trying to make it converge quickly by adding terms
+ * of a geometric progression, even though the geometric progression
+ * estimate is not correct in all cases;
+ * the old adjustment code is left in for historical purposes but
+ * #ifdef'd out
+ * 3. Eliminate the Boolean bi_modal_score variable. The old test that
+ * set the value of bi_modal_score would frequently fail to choose the
+ * correct value due to rounding error.
+ * 4. changed numerous local variable names to make them more meaningful;
+ * 5. added substantial comments to explain what the procedure
+ * is doing and what each variable represents
+ *
+ * Revision 1.57 2004/04/19 12:58:18 madden
+ * Changed BLAST_KarlinBlk to Blast_KarlinBlk to avoid conflict with blastkar.h structure, renamed some functions to start with Blast_Karlin, made Blast_KarlinBlkDestruct public
+ *
+ * Revision 1.56 2004/04/12 18:57:31 madden
+ * Rename BLAST_ResFreq to Blast_ResFreq, make Blast_ResFreqNew, Blast_ResFreqDestruct, and Blast_ResFreqStdComp non-static
+ *
+ * Revision 1.55 2004/04/08 13:53:10 papadopo
+ * fix doxygen warning
+ *
+ * Revision 1.54 2004/04/07 03:06:16 camacho
+ * Added blast_encoding.[hc], refactoring blast_stat.[hc]
+ *
+v * Revision 1.53 2004/04/05 18:53:35 madden
+ * Set dimensions if matrix from memory
+ *
+ * Revision 1.52 2004/04/01 14:14:02 lavr
+ * Spell "occurred", "occurrence", and "occurring"
+ *
+ * Revision 1.51 2004/03/31 17:50:09 papadopo
+ * Mike Gertz' changes for length adjustment calculations
+ *
+ * Revision 1.50 2004/03/11 18:52:41 camacho
+ * Remove THREADS_IMPLEMENTED
+ *
+ * Revision 1.49 2004/03/10 18:00:06 camacho
+ * Remove outdated references to blastkar
+ *
+ * Revision 1.48 2004/03/05 17:52:33 papadopo
+ * Allow 32-bit context numbers for queries
+ *
+ * Revision 1.47 2004/03/04 21:07:51 papadopo
+ * add RPS BLAST functionality
+ *
+ * Revision 1.46 2004/02/19 21:16:48 dondosha
+ * Use enum type for severity argument in Blast_MessageWrite
+ *
+ * Revision 1.45 2003/12/05 16:03:57 camacho
+ * Remove compiler warnings
+ *
+ * Revision 1.44 2003/11/28 22:39:11 camacho
+ * + static keyword to BlastKarlinLtoH
+ *
+ * Revision 1.43 2003/11/28 15:03:48 camacho
+ * Added static keyword to BlastKarlinLtoH
+ *
+ * Revision 1.42 2003/11/26 19:12:13 madden
+ * code to simplify some routines and use NlmKarlinLambdaNR in place of BlastKarlinLambdaBis (following Mike Gertzs changes to blastkar.c )
+ *
+ * Revision 1.41 2003/11/24 23:18:32 dondosha
+ * Added gap_decay_rate argument to BLAST_Cutoffs; removed BLAST_Cutoffs_simple
+ *
+ * Revision 1.40 2003/11/19 15:17:42 dondosha
+ * Removed unused members from Karlin block structure
+ *
+ * Revision 1.39 2003/10/16 15:55:22 coulouri
+ * fix uninitialized variables
+ *
+ * Revision 1.38 2003/10/16 15:52:08 coulouri
+ * fix uninitialized variables
+ *
+ * Revision 1.37 2003/10/15 16:59:43 coulouri
+ * type correctness fixes
+ *
+ * Revision 1.36 2003/10/02 22:08:34 dondosha
+ * Corrections for one-strand translated searches
+ *
+ * Revision 1.35 2003/09/26 19:01:59 madden
+ * Prefix ncbimath functions with BLAST_
+ *
+ * Revision 1.34 2003/09/09 14:21:39 coulouri
+ * change blastkar.h to blast_stat.h
+ *
+ * Revision 1.33 2003/09/02 21:12:07 camacho
+ * Fix small memory leak
+ *
+ * Revision 1.32 2003/08/26 15:23:51 dondosha
+ * Rolled back previous change as it is not necessary any more
+ *
+ * Revision 1.31 2003/08/25 22:29:07 dondosha
+ * Default matrix loading is defined only in C++ toolkit
+ *
+ * Revision 1.30 2003/08/25 18:05:41 dondosha
+ * Moved assert statement after variables declarations
+ *
+ * Revision 1.29 2003/08/25 16:23:33 camacho
+ * +Loading protein scoring matrices from utils/tables
+ *
+ * Revision 1.28 2003/08/11 15:01:59 dondosha
+ * Added algo/blast/core to all #included headers
+ *
+ * Revision 1.27 2003/08/01 17:27:04 dondosha
+ * Renamed external functions to avoid collisions with ncbitool library; made other functions static
+ *
+ * Revision 1.26 2003/07/31 18:48:49 dondosha
+ * Use Int4 instead of BLAST_Score
+ *
+ * Revision 1.25 2003/07/31 17:48:06 madden
+ * Remove call to FileLength
+ *
+ * Revision 1.24 2003/07/31 14:31:41 camacho
+ * Replaced Char for char
+ *
+ * Revision 1.23 2003/07/31 14:19:28 camacho
+ * Replaced FloatHi for double
+ *
+ * Revision 1.22 2003/07/31 00:32:37 camacho
+ * Eliminated Ptr notation
+ *
+ * Revision 1.21 2003/07/30 22:08:09 dondosha
+ * Process of finding path to the matrix is moved out of the blast library
+ *
+ * Revision 1.20 2003/07/30 21:52:41 camacho
+ * Follow conventional structure definition
+ *
+ * Revision 1.19 2003/07/30 19:39:14 camacho
+ * Remove PNTRs
+ *
+ * Revision 1.18 2003/07/30 17:58:25 dondosha
+ * Changed ValNode to ListNode
+ *
+ * Revision 1.17 2003/07/30 17:15:00 dondosha
+ * Minor fixes for very strict compiler warnings
+ *
+ * Revision 1.16 2003/07/30 17:06:40 camacho
+ * Removed old cvs log
+ *
+ * Revision 1.15 2003/07/30 16:32:02 madden
+ * Use ansi functions when possible
+ *
+ * Revision 1.14 2003/07/30 15:29:37 madden
+ * Removed MemSets
+ *
+ * Revision 1.13 2003/07/29 14:42:31 coulouri
+ * use strdup() instead of StringSave()
+ *
+ * Revision 1.12 2003/07/28 19:04:15 camacho
+ * Replaced all MemNews for calloc
+ *
+ * Revision 1.11 2003/07/28 03:41:49 camacho
+ * Use f{open,close,gets} instead of File{Open,Close,Gets}
+ *
+ * Revision 1.10 2003/07/25 21:12:28 coulouri
+ * remove constructions of the form "return sfree();" and "a=sfree(a);"
+ *
+ * Revision 1.9 2003/07/25 18:58:43 camacho
+ * Avoid using StrUpper and StringHasNoText
+ *
+ * Revision 1.8 2003/07/25 17:25:43 coulouri
+ * in progres:
+ * * use malloc/calloc/realloc instead of Malloc/Calloc/Realloc
+ * * add sfree() macro and __sfree() helper function to util.[ch]
+ * * use sfree() instead of MemFree()
+ *
+ * Revision 1.7 2003/07/24 22:37:33 dondosha
+ * Removed some unused function parameters
+ *
+ * Revision 1.6 2003/07/24 22:01:44 camacho
+ * Removed unused variables
+ *
+ * Revision 1.5 2003/07/24 21:31:06 dondosha
+ * Changed to calls to BlastConstructErrorMessage to API from blast_message.h
+ *
+ * Revision 1.4 2003/07/24 20:38:30 dondosha
+ * Removed LIBCALL etc. macros
+ *
+ * Revision 1.3 2003/07/24 17:37:46 dondosha
+ * Removed MakeBlastScore function that is dependent on objalign.h
+ *
+ * Revision 1.2 2003/07/24 15:50:49 dondosha
+ * Commented out mutex operations
+ *
+ * Revision 1.1 2003/07/24 15:18:09 dondosha
+ * Copy of blastkar.h from ncbitools library, stripped of dependency on ncbiobj
+ *
+ * ===========================================================================
+ */
diff --git a/algo/blast/core/blast_stat.h b/algo/blast/core/blast_stat.h
index e07bd18a..b1e030e8 100644
--- a/algo/blast/core/blast_stat.h
+++ b/algo/blast/core/blast_stat.h
@@ -1,39 +1,37 @@
-/* $Id: blast_stat.h,v 1.34 2004/05/04 12:58:53 madden Exp $
-*
-* PUBLIC DOMAIN NOTICE
-* National Center for Biotechnology Information
-*
-* This software/database is a "United States Government Work" under the
-* terms of the United States Copyright Act. It was written as part of
-* the author's official duties as a United States Government employee and
-* thus cannot be copyrighted. This software/database is freely available
-* to the public for use. The National Library of Medicine and the U.S.
-* Government have not placed any restriction on its use or reproduction.
-*
-* Although all reasonable efforts have been taken to ensure the accuracy
-* and reliability of the software and data, the NLM and the U.S.
-* Government do not and cannot warrant the performance or results that
-* may be obtained by using this software or data. The NLM and the U.S.
-* Government disclaim all warranties, express or implied, including
-* warranties of performance, merchantability or fitness for any particular
-* purpose.
-*
-* Please cite the author in any work or product based on this material.
-*
-* ===========================================================================*/
-/*****************************************************************************
-
-File name: blast_stat.h
-
-Author: Tom Madden
-
-Contents: definitions and prototypes used by blast_stat.c to calculate BLAST
- statistics.
-
-******************************************************************************/
-
-/* $Revision: 1.34 $
- * */
+/* $Id: blast_stat.h,v 1.47 2004/06/10 13:22:33 madden Exp $
+ * ===========================================================================
+ *
+ * PUBLIC DOMAIN NOTICE
+ * National Center for Biotechnology Information
+ *
+ * This software/database is a "United States Government Work" under the
+ * terms of the United States Copyright Act. It was written as part of
+ * the author's official duties as a United States Government employee and
+ * thus cannot be copyrighted. This software/database is freely available
+ * to the public for use. The National Library of Medicine and the U.S.
+ * Government have not placed any restriction on its use or reproduction.
+ *
+ * Although all reasonable efforts have been taken to ensure the accuracy
+ * and reliability of the software and data, the NLM and the U.S.
+ * Government do not and cannot warrant the performance or results that
+ * may be obtained by using this software or data. The NLM and the U.S.
+ * Government disclaim all warranties, express or implied, including
+ * warranties of performance, merchantability or fitness for any particular
+ * purpose.
+ *
+ * Please cite the author in any work or product based on this material.
+ *
+ * ===========================================================================
+ *
+ * Author: Tom Madden
+ *
+ */
+
+/** @file blast_stat.h
+ * Definitions and prototypes used by blast_stat.c to calculate BLAST
+ * statistics. @todo FIXME: needs doxygen comments
+ */
+
#ifndef __BLAST_STAT__
#define __BLAST_STAT__
@@ -52,44 +50,20 @@ extern "C" {
#define BLAST_MATRIX_BEST 2
-/****************************************************************************
-For more accuracy in the calculation of K, set K_SUMLIMIT to 0.00001.
-For high speed in the calculation of K, use a K_SUMLIMIT of 0.001
-Note: statistical significance is often not greatly affected by the value
-of K, so high accuracy is generally unwarranted.
-*****************************************************************************/
-/* K_SUMLIMIT_DEFAULT == sumlimit used in BlastKarlinLHtoK() */
-#define BLAST_KARLIN_K_SUMLIMIT_DEFAULT 0.0001
-
-/* LAMBDA_ACCURACY_DEFAULT == accuracy to which Lambda should be calc'd */
-#define BLAST_KARLIN_LAMBDA_ACCURACY_DEFAULT (1.e-5)
-
-/* LAMBDA_ITER_DEFAULT == no. of iterations in LambdaBis = ln(accuracy)/ln(2)*/
-#define BLAST_KARLIN_LAMBDA_ITER_DEFAULT 17
-
-/* Initial guess for the value of Lambda in BlastKarlinLambdaNR */
-#define BLAST_KARLIN_LAMBDA0_DEFAULT 0.5
-
-#define BLAST_KARLIN_K_ITER_MAX 100
-#define BLAST_SUMP_EPSILON_DEFAULT 0.002 /* accuracy for SumP calculations */
-
/*
Where are the BLAST matrices located?
*/
#define BLASTMAT_DIR "/usr/ncbi/blast/matrix"
-/*************************************************************************
- Structure to the Karlin-Blk parameters.
-
- This structure was (more or less) copied from the old
- karlin.h.
-**************************************************************************/
+/**
+ Structure to hold the Karlin-Altschul parameters.
+*/
typedef struct Blast_KarlinBlk {
- double Lambda; /* Lambda value used in statistics */
- double K, logK; /* K value used in statistics */
- double H; /* H value used in statistics */
- double paramC; /* for use in seed. */
+ double Lambda; /**< Lambda value used in statistics */
+ double K, logK; /**< K value used in statistics */
+ double H; /**< H value used in statistics */
+ double paramC; /**< for use in seed. */
} Blast_KarlinBlk;
@@ -112,102 +86,109 @@ For this reason, SCORE_MIN is not simply defined to be LONG_MIN/2.
#define BLAST_SCORE_MAX INT2_MAX
-#if defined(OS_DOS) || defined(OS_MAC)
-#define BLAST_SCORE_1MIN (-100)
-#define BLAST_SCORE_1MAX ( 100)
-#else
-#define BLAST_SCORE_1MIN (-10000)
-#define BLAST_SCORE_1MAX ( 10000)
-#endif
-#define BLAST_SCORE_RANGE_MAX (BLAST_SCORE_1MAX - BLAST_SCORE_1MIN)
-
-typedef struct BLAST_ScoreFreq {
- Int4 score_min, score_max;
- Int4 obs_min, obs_max;
- double score_avg;
- double* sprob0,* sprob;
-} BLAST_ScoreFreq;
+/** Holds score frequencies used in calculation
+of Karlin-Altschul parameters for an ungapped search.
+*/
+typedef struct Blast_ScoreFreq {
+ Int4 score_min, score_max; /**< lowest and highest allowed scores */
+ Int4 obs_min, obs_max; /**< lowest and highest observed (actual) scores */
+ double score_avg; /**< average score, must be negative for local alignment. */
+ double* sprob0,* sprob; /**< arrays for frequency of given score, sprob is shifted down by score_min. */
+} Blast_ScoreFreq;
#define BLAST_MATRIX_SIZE 32
-typedef struct BLASTMatrixStructure {
+/* Remove me */
+typedef struct SBLASTMatrixStructure {
Int4 *matrix[BLAST_MATRIX_SIZE];
- Int4 long_matrix[BLAST_MATRIX_SIZE*BLAST_MATRIX_SIZE];
-} BLASTMatrixStructure;
+ Int4 long_matrix[BLAST_MATRIX_SIZE*BLAST_MATRIX_SIZE]; /* not used */
+} SBLASTMatrixStructure;
+/** Structure used for scoring calculations.
+*/
typedef struct BlastScoreBlk {
- Boolean protein_alphabet; /* TRUE if alphabet_code is for a
+ Boolean protein_alphabet; /**< TRUE if alphabet_code is for a
protein alphabet (e.g., ncbistdaa etc.), FALSE for nt. alphabets. */
- Uint1 alphabet_code; /* NCBI alphabet code. */
- Int2 alphabet_size; /* size of alphabet. */
- Int2 alphabet_start; /* numerical value of 1st letter. */
- BLASTMatrixStructure* matrix_struct; /* Holds info about matrix. */
- Int4 **matrix; /* Substitution matrix */
- Int4 **posMatrix; /* Sub matrix for position depend BLAST. */
- double karlinK; /* Karlin-Altschul parameter associated with posMatrix */
- Int2 mat_dim1, mat_dim2; /* dimensions of matrix. */
- Int4 *maxscore; /* Max. score for each letter */
- Int4 loscore, hiscore; /* Min. & max. substitution scores */
- Int4 penalty, reward; /* penalty and reward for blastn. */
- Boolean read_in_matrix; /* If TRUE, matrix is read in, otherwise
+ Uint1 alphabet_code; /**< NCBI alphabet code. */
+ Int2 alphabet_size; /**< size of alphabet. */
+ Int2 alphabet_start; /**< numerical value of 1st letter. */
+ SBLASTMatrixStructure* matrix_struct; /**< Holds info about matrix. */
+ Int4 **matrix; /**< Substitution matrix */
+ Int4 **posMatrix; /**< Sub matrix for position depend BLAST. */
+ double karlinK; /**< Karlin-Altschul parameter associated with posMatrix */
+ Int2 mat_dim1, mat_dim2; /**< dimensions of matrix. */
+ Int4 *maxscore; /**< Max. score for each letter */
+ Int4 loscore, hiscore; /**< Min. & max. substitution scores */
+ Int4 penalty, reward; /**< penalty and reward for blastn. */
+ double scale_factor; /**< multiplier for all cutoff and dropoff scores */
+ Boolean read_in_matrix; /**< If TRUE, matrix is read in, otherwise
produce one from penalty and reward above. */
- BLAST_ScoreFreq** sfp; /* score frequencies. */
- double **posFreqs; /*matrix of position specific frequencies*/
+ Blast_ScoreFreq** sfp; /**< score frequencies. */
+ double **posFreqs; /**<matrix of position specific frequencies*/
/* kbp & kbp_gap are ptrs that should be set to kbp_std, kbp_psi, etc. */
- Blast_KarlinBlk** kbp; /* Karlin-Altschul parameters. */
- Blast_KarlinBlk** kbp_gap; /* K-A parameters for gapped alignments. */
+ Blast_KarlinBlk** kbp; /**< Karlin-Altschul parameters. */
+ Blast_KarlinBlk** kbp_gap; /**< K-A parameters for gapped alignments. */
/* Below are the Karlin-Altschul parameters for non-position based ('std')
and position based ('psi') searches. */
Blast_KarlinBlk **kbp_std,
**kbp_psi,
**kbp_gap_std,
**kbp_gap_psi;
- Blast_KarlinBlk* kbp_ideal; /* Ideal values (for query with average database composition). */
- Int4 number_of_contexts; /* Used by sfp and kbp, how large are these*/
- char* name; /* name of matrix. */
- Uint1* ambiguous_res; /* Array of ambiguous res. (e.g, 'X', 'N')*/
- Int2 ambig_size, /* size of array above. */
- ambig_occupy; /* How many occupied? */
- ListNode* comments; /* Comments about matrix. */
- Int4 query_length; /* the length of the query. */
- Int8 effective_search_sp; /* product of above two */
+ Blast_KarlinBlk* kbp_ideal; /**< Ideal values (for query with average database composition). */
+ Int4 number_of_contexts; /**< Used by sfp and kbp, how large are these*/
+ char* name; /**< name of matrix. */
+ Uint1* ambiguous_res; /**< Array of ambiguous res. (e.g, 'X', 'N')*/
+ Int2 ambig_size, /**< size of array above. FIXME: not needed here? */
+ ambig_occupy; /**< How many occupied? */
+ ListNode* comments; /**< Comments about matrix. */
+ Int4 query_length; /**< the length of the query. */
+ Int8 effective_search_sp; /**< product of above two */
} BlastScoreBlk;
-/* Used for communicating between BLAST and other applications. */
-typedef struct BLAST_Matrix {
- Boolean is_prot; /* Matrix is for proteins */
- char* name; /* Name of Matrix (i.e., BLOSUM62). */
- /* Position-specific BLAST rows and columns are different, otherwise they are the
- alphabet length. */
- Int4 rows, /* query length + 1 for PSSM. */
- columns; /* alphabet size in all cases (26). */
- Int4** matrix;
- double ** posFreqs;
- double karlinK;
- Int4** original_matrix;
-} BLAST_Matrix;
-
-typedef struct BLAST_ResComp {
- Uint1 alphabet_code;
- Int4* comp; /* composition of alphabet, array starts at beginning of alphabet. */
- Int4* comp0; /* Same array as above, starts at zero. */
-} BLAST_ResComp;
-
+/**
+Stores the letter frequency of a sequence or database.
+*/
typedef struct Blast_ResFreq {
- Uint1 alphabet_code;
- double* prob; /* probs, (possible) non-zero offset. */
- double* prob0; /* probs, zero offset. */
+ Uint1 alphabet_code; /**< indicates alphabet. */
+ double* prob; /**< letter probs, (possible) non-zero offset. */
+ double* prob0; /**< probs, zero offset. */
} Blast_ResFreq;
+/**
+ * Allocates and initializes BlastScoreBlk
+ * @param alphabet either BLASTAA_SEQ_CODE or BLASTNA_SEQ_CODE [in]
+ * @param number_of_contexts how many strands or sequences [in]
+ * @return BlastScoreBlk*
+*/
BlastScoreBlk* BlastScoreBlkNew (Uint1 alphabet, Int4 number_of_contexts);
-Int2 BlastScoreBlkMatrixLoad(BlastScoreBlk* sbp);
-
+/** Deallocates BlastScoreBlk as well as all associated structures.
+ * @param sbp BlastScoreBlk to be deallocated [in]
+ * @return NULL pointer.
+ */
BlastScoreBlk* BlastScoreBlkFree (BlastScoreBlk* sbp);
+/* FIXME make private? */
+Int2 BlastScoreBlkMatrixLoad(BlastScoreBlk* sbp);
+
+/** Set the ambiguous residue (e.g, 'N', 'X') in the BlastScoreBlk*.
+ * Convert from ncbieaa to sbp->alphabet_code (i.e., ncbistdaa) first.
+ *
+ * @param sbp the object to be modified [in|out]
+ * @param ambiguous_res the residue to be set on the BlastScoreBlk
+ * @return zero on success, others on error
+ */
Int2 BLAST_ScoreSetAmbigRes (BlastScoreBlk* sbp, char ambiguous_res);
+/** Calculate the Karlin parameters. This function should be called once
+ * for each context, or frame translated.
+ * @param sbp the object to be modified [in|out]
+ * @param string the query sequence [in]
+ * @param length length of above sequence [in]
+ * @param context_number which element in various arrays [in]
+ * @return zero on success.
+ */
Int2 BLAST_ScoreBlkFill (BlastScoreBlk* sbp, char* string, Int4 length, Int4 context_number);
/** This function fills in the BlastScoreBlk structure.
@@ -219,14 +200,14 @@ Int2 BLAST_ScoreBlkFill (BlastScoreBlk* sbp, char* string, Int4 length, Int4 con
*/
Int2 BLAST_ScoreBlkMatFill (BlastScoreBlk* sbp, char* matrix);
-/*
- Functions taken from the OLD karlin.c
+/** Callocs a Blast_KarlinBlk
+ * @return pointer to the Blast_KarlinBlk
*/
-
Blast_KarlinBlk* Blast_KarlinBlkCreate (void);
/** Deallocates the KarlinBlk
* @param kbp KarlinBlk to be deallocated [in]
+ * @return NULL
*/
Blast_KarlinBlk* Blast_KarlinBlkDestruct(Blast_KarlinBlk* kbp);
@@ -244,6 +225,16 @@ Int2 Blast_KarlinBlkGappedCalc (Blast_KarlinBlk* kbp, Int4 gap_open,
Blast_KarlinBlk* Blast_KarlinBlkIdealCalc(BlastScoreBlk* sbp);
+/** Fills KarlinBlk pointers in BlastScoreBlk with "ideal" values if the
+ * ideal Lambda is less than the actual Lambda. This happens if
+ * if the query is translated and the calculated (real) Karlin
+ *
+ * parameters are bad, as they're calculated for non-coding regions.
+ * @param sbp the object to be modified [in|out]
+ * @param context_start first context to start with [in]
+ * @param context_end last context to work on [in]
+ * @return zero on success
+ */
Int2 Blast_KarlinBlkStandardCalc(BlastScoreBlk* sbp, Int4 context_start,
Int4 context_end);
@@ -257,18 +248,35 @@ Int2 Blast_KarlinBlkStandardCalc(BlastScoreBlk* sbp, Int4 context_start,
*/
Int2 Blast_KarlinkGapBlkFill(Blast_KarlinBlk* kbp, Int4 gap_open, Int4 gap_extend, Int4 decline_align, char* matrix_name);
-/* Prints a messages about the allowed matrices, BlastKarlinkGapBlkFill should return 1 before this is called. */
+/** Prints a messages about the allowed matrices, BlastKarlinkGapBlkFill should return 1 before this is called.
+ * @param matrix the matrix to print a message about [in]
+ * @return the message
+ */
char* BLAST_PrintMatrixMessage(const char *matrix);
-/* Prints a messages about the allowed open etc values for the given matrix,
-BlastKarlinkGapBlkFill should return 2 before this is called. */
+/** Prints a messages about the allowed open etc values for the given matrix,
+ * BlastKarlinkGapBlkFill should return 2 before this is called.
+ * @param matrix name of the matrix [in]
+ * @param gap_open gap existence cost [in]
+ * @param gap_extend cost to extend a gap by one [in]
+ * @param decline_align cost of declining to align [in]
+ * @return message
+ */
char* BLAST_PrintAllowedValues(const char *matrix, Int4 gap_open, Int4 gap_extend, Int4 decline_align);
/** Calculates the parameter Lambda given an initial guess for its value */
double
-Blast_KarlinLambdaNR(BLAST_ScoreFreq* sfp, double initialLambdaGuess);
+Blast_KarlinLambdaNR(Blast_ScoreFreq* sfp, double initialLambdaGuess);
+
+/** Calculates the Expect value based upon the search space and some Karlin-Altschul
+ * parameters. It is "simple" as it does not use sum-statistics.
+ * @param S the score of the alignment. [in]
+ * @param kbp the Karlin-Altschul parameters. [in]
+ * @param searchsp total search space to be used [in]
+ * @return the expect value
+ */
-double BLAST_KarlinStoE_simple (Int4 S, Blast_KarlinBlk* kbp, double searchsp);
+double BLAST_KarlinStoE_simple (Int4 S, Blast_KarlinBlk* kbp, Int8 searchsp);
double BLAST_GapDecayDivisor(double decayrate, unsigned nsegs );
/** Calculate the cutoff score from the expected number of HSPs or vice versa.
@@ -280,7 +288,7 @@ double BLAST_GapDecayDivisor(double decayrate, unsigned nsegs );
* @param gap_decay_rate Gap decay rate to use, if dodecay is set [in]
*/
Int2 BLAST_Cutoffs (Int4 *S, double* E, Blast_KarlinBlk* kbp,
- double searchsp, Boolean dodecay, double gap_decay_rate);
+ Int8 searchsp, Boolean dodecay, double gap_decay_rate);
/* Functions to calculate SumE (for large and small gaps). */
double BLAST_SmallGapSumE (Blast_KarlinBlk* kbp, Int4 gap, Int2 num, double xsum, Int4 query_length, Int4 subject_length, double weight_divisor);
@@ -335,6 +343,44 @@ Blast_ResFreq* Blast_ResFreqDestruct(Blast_ResFreq* rfp);
*/
Int2 Blast_ResFreqStdComp(const BlastScoreBlk* sbp, Blast_ResFreq* rfp);
+/** Creates a new structure to keep track of score frequencies for a scoring
+ * system.
+ * @param score_min Minimum score [in]
+ * @param score_max Maximum score [in]
+ */
+Blast_ScoreFreq*
+Blast_ScoreFreqNew(Int4 score_min, Int4 score_max);
+
+/** Deallocates the score frequencies structure
+ * @param sfp the structure to deallocate [in]
+ * @return NULL
+ */
+Blast_ScoreFreq*
+Blast_ScoreFreqDestruct(Blast_ScoreFreq* sfp);
+
+/** Fills a buffer with the 'standard' alphabet
+ * (given by STD_AMINO_ACID_FREQS[index].ch).
+ *
+ * @return Number of residues in alphabet or negative returns upon error.
+ */
+Int2
+Blast_GetStdAlphabet(Uint1 alphabet_code, Uint1* residues,
+ Uint4 residues_size);
+
+/* Please see comment on blast_stat.c */
+Int2
+Blast_KarlinBlkCalc(Blast_KarlinBlk* kbp, Blast_ScoreFreq* sfp);
+
+/** Given a sequence of 'length' amino acid residues, compute the
+ * probability of each residue and put that in the array resProb
+ *
+ * @param sequence the sequence to be computed upon [in]
+ * @param length the length of the sequence [in]
+ * @param resProb the object to be filled in [in|out]
+ */
+void
+Blast_FillResidueProbability(const Uint1* sequence, Int4 length, double * resProb);
+
#ifdef __cplusplus
}
#endif
diff --git a/algo/blast/core/blast_traceback.c b/algo/blast/core/blast_traceback.c
index 3e640f7d..968215c7 100644
--- a/algo/blast/core/blast_traceback.c
+++ b/algo/blast/core/blast_traceback.c
@@ -1,50 +1,46 @@
-/* $Id: blast_traceback.c,v 1.101 2004/05/05 15:27:44 dondosha Exp $
-* ===========================================================================
-*
-* PUBLIC DOMAIN NOTICE
-* National Center for Biotechnology Information
-*
-* This software/database is a "United States Government Work" under the
-* terms of the United States Copyright Act. It was written as part of
-* the author's offical duties as a United States Government employee and
-* thus cannot be copyrighted. This software/database is freely available
-* to the public for use. The National Library of Medicine and the U.S.
-* Government have not placed any restriction on its use or reproduction.
-*
-* Although all reasonable efforts have been taken to ensure the accuracy
-* and reliability of the software and data, the NLM and the U.S.
-* Government do not and cannot warrant the performance or results that
-* may be obtained by using this software or data. The NLM and the U.S.
-* Government disclaim all warranties, express or implied, including
-* warranties of performance, merchantability or fitness for any particular
-* purpose.
-*
-* Please cite the author in any work or product based on this material.
-*
-* ===========================================================================*/
-
-/*****************************************************************************
-
-File name: blast_traceback.c
-
-Author: Ilya Dondoshansky
-
-Contents: Traceback functions
-
-Detailed Contents:
-
- - Functions responsible for the traceback stage of the BLAST algorithm
-
-******************************************************************************/
+/* $Id: blast_traceback.c,v 1.116 2004/06/14 15:38:12 dondosha Exp $
+ * ===========================================================================
+ *
+ * PUBLIC DOMAIN NOTICE
+ * National Center for Biotechnology Information
+ *
+ * This software/database is a "United States Government Work" under the
+ * terms of the United States Copyright Act. It was written as part of
+ * the author's offical duties as a United States Government employee and
+ * thus cannot be copyrighted. This software/database is freely available
+ * to the public for use. The National Library of Medicine and the U.S.
+ * Government have not placed any restriction on its use or reproduction.
+ *
+ * Although all reasonable efforts have been taken to ensure the accuracy
+ * and reliability of the software and data, the NLM and the U.S.
+ * Government do not and cannot warrant the performance or results that
+ * may be obtained by using this software or data. The NLM and the U.S.
+ * Government disclaim all warranties, express or implied, including
+ * warranties of performance, merchantability or fitness for any particular
+ * purpose.
+ *
+ * Please cite the author in any work or product based on this material.
+ *
+ * ===========================================================================
+ *
+ * Author: Ilya Dondoshansky
+ *
+ */
+
+/** @file blast_traceback.c
+ * Functions responsible for the traceback stage of the BLAST algorithm
+ */
+
+static char const rcsid[] =
+ "$Id: blast_traceback.c,v 1.116 2004/06/14 15:38:12 dondosha Exp $";
#include <algo/blast/core/blast_traceback.h>
#include <algo/blast/core/blast_util.h>
#include <algo/blast/core/link_hsps.h>
#include <algo/blast/core/blast_setup.h>
+#include <algo/blast/core/blast_kappa.h>
#include "blast_psi_priv.h"
-static char const rcsid[] = "$Id: blast_traceback.c,v 1.101 2004/05/05 15:27:44 dondosha Exp $";
-
/* Comparison function for sorting HSPs by score.
* Ties are broken based on subject sequence offsets.
*/
@@ -400,89 +396,101 @@ HSPContainedInHSPCheck(BlastHSP** hsp_array, BlastHSP* hsp, Int4 max_index, Bool
* @param subject database sequence as a raw string [in]
* @param program_number which program [in]
* @param sbp the scoring information [in]
- * @param psi_options parameters for PSI blast [in]
- * @param scoring_options instructions on how to score matches. [in]
- * @param hit_options determines which scores to save. [in]
+ * @param scoring_params Parameters for how to score matches. [in]
+ * @param hit_params Determines which scores to save, and whether to calculate
+ * e-values. [in]
*/
static Boolean
HSPSetScores(BlastQueryInfo* query_info, Uint1* query,
Uint1* subject, BlastHSP* hsp,
Uint1 program_number, BlastScoreBlk* sbp,
- const PSIBlastOptions* psi_options,
- const BlastScoringOptions* scoring_options,
- const BlastHitSavingOptions* hit_options)
+ const BlastScoringParameters* score_params,
+ const BlastHitSavingParameters* hit_params)
{
- Boolean keep = TRUE;
- Int4 align_length = 0;
- double scalingFactor;
-
- if (psi_options == NULL)
- scalingFactor = 1.0;
- else
- scalingFactor = psi_options->scalingFactor;
-
- /* Calculate alignment length and number of identical letters */
- if (scoring_options->is_ooframe) {
- Blast_HSPGetOOFNumIdentities(query, subject, hsp, program_number,
- &hsp->num_ident, &align_length);
- }
- else {
- /* Do not get the number of identities for PSI blast,
- because the query may not be available */
- if (psi_options == NULL)
- Blast_HSPGetNumIdentities(query, subject, hsp,
- scoring_options->gapped_calculation, &hsp->num_ident,
- &align_length);
- }
+ Boolean keep = TRUE;
+ Int4 align_length = 0;
+ double scale_factor = 1.0;
+ BlastScoringOptions *score_options = score_params->options;
+ BlastHitSavingOptions *hit_options = hit_params->options;
+
+ /* For RPS BLAST only, we'll need to divide Lambda by the scaling factor
+ for the e-value calculations, because scores are scaled; for PSI-BLAST
+ Lambda is already divided by scaling factor, so there is no need to do
+ it again. In all other programs, scaling factor is 1 anyway. */
+ if (program_number == blast_type_rpsblast ||
+ program_number == blast_type_rpstblastn)
+ scale_factor = score_params->scale_factor;
+
+ /* Calculate alignment length and number of identical letters.
+ Do not get the number of identities if the query is not available */
+ if (query != NULL) {
+ if (score_options->is_ooframe) {
+ Blast_HSPGetOOFNumIdentities(query, subject, hsp, program_number,
+ &hsp->num_ident, &align_length);
+ } else {
+ Blast_HSPGetNumIdentities(query, subject, hsp,
+ score_options->gapped_calculation,
+ &hsp->num_ident, &align_length);
+ }
+ }
- if (hsp->num_ident * 100 <
- align_length * hit_options->percent_identity) {
- keep = FALSE;
- }
+ /* Check whether this HSP passes the percent identity and minimal hit
+ length criteria. */
+ if ((hsp->num_ident * 100 <
+ align_length * hit_options->percent_identity) ||
+ align_length < hit_options->min_hit_length) {
+ keep = FALSE;
+ }
- if (keep == TRUE)
- {
- if (program_number == blast_type_blastp ||
- program_number == blast_type_rpsblast ||
- program_number == blast_type_blastn) {
-
- Blast_KarlinBlk** kbp;
- if (scoring_options->gapped_calculation)
- kbp = sbp->kbp_gap;
- else
- kbp = sbp->kbp;
-
- if (hit_options->phi_align) {
- Blast_HSPPHIGetEvalue(hsp, sbp);
- } else {
- hsp->evalue = BLAST_KarlinStoE_simple(hsp->score, kbp[hsp->context],
- (double)query_info->eff_searchsp_array[hsp->context]);
- }
- if (hsp->evalue > hit_options->expect_value)
- /* put in for comp. based stats. */
- keep = FALSE;
- }
+ if (keep == TRUE) {
+ /* If sum statistics is not used, calcualte e-values here. */
+ if (!hit_params->do_sum_stats) {
+
+ Blast_KarlinBlk** kbp;
+ if (score_options->gapped_calculation)
+ kbp = sbp->kbp_gap;
+ else
+ kbp = sbp->kbp;
+
+ if (hit_options->phi_align) {
+ Blast_HSPPHIGetEvalue(hsp, sbp);
+ } else {
+ /* Divide lambda by the scaling factor, so e-value is
+ calculated correctly from a scaled score. Since score
+ is an integer, adjusting score before the e-value
+ calculation would have lead to loss of precision.*/
+ kbp[hsp->context]->Lambda /= scale_factor;
+ hsp->evalue =
+ BLAST_KarlinStoE_simple(hsp->score, kbp[hsp->context],
+ query_info->eff_searchsp_array[hsp->context]);
+ kbp[hsp->context]->Lambda *= scale_factor;
+ }
+ if (hsp->evalue > hit_options->expect_value) {
+ /* put in for comp. based stats. */
+ keep = FALSE;
+ }
+ }
- if (scalingFactor != 0.0 && scalingFactor != 1.0) {
- /* Scale down score for blastp and tblastn. */
- hsp->score = (Int4) ((hsp->score+(0.5*scalingFactor))/scalingFactor);
- }
+ /* only one alignment considered for blast[np]. */
+ /* This may be changed by LinkHsps for blastx or tblastn. */
+ hsp->num = 1;
+ if ((program_number == blast_type_tblastn ||
+ program_number == blast_type_rpstblastn) &&
+ hit_options->longest_intron > 0) {
+ /* For uneven version of LinkHsps, the individual e-values
+ need to be calculated for each HSP. */
+ hsp->evalue =
+ BLAST_KarlinStoE_simple(hsp->score, sbp->kbp_gap[hsp->context],
+ query_info->eff_searchsp_array[hsp->context]);
+ }
- /* only one alignment considered for blast[np]. */
- /* This may be changed by LinkHsps for blastx or tblastn. */
- hsp->num = 1;
- if ((program_number == blast_type_tblastn ||
- program_number == blast_type_rpstblastn) &&
- hit_options->longest_intron > 0) {
- hsp->evalue =
- BLAST_KarlinStoE_simple(hsp->score,
- sbp->kbp_gap[hsp->context],
- (double) query_info->eff_searchsp_array[hsp->context]);
- }
- }
+ /* remove any scaling of the calculated score */
+ hsp->score = (Int4) ((hsp->score+(0.5*score_params->scale_factor)) /
+ score_params->scale_factor);
+ }
- return keep;
+ return keep;
}
/** Adjusts offset if out-of-frame and negative frame, or if partial sequence used for extension.
@@ -566,11 +574,10 @@ Blast_TracebackFromHSPList(Uint1 program_number, BlastHSPList* hsp_list,
BLAST_SequenceBlk* query_blk, BLAST_SequenceBlk* subject_blk,
BlastQueryInfo* query_info,
BlastGapAlignStruct* gap_align, BlastScoreBlk* sbp,
- const BlastScoringOptions* score_options,
+ const BlastScoringParameters* score_params,
const BlastExtensionOptions* ext_options,
const BlastHitSavingParameters* hit_params,
- const Uint1* gen_code_string,
- const PSIBlastOptions* psi_options)
+ const Uint1* gen_code_string)
{
Int4 index;
BlastHSP* hsp;
@@ -580,12 +587,13 @@ Blast_TracebackFromHSPList(Uint1 program_number, BlastHSPList* hsp_list,
BlastHSP** hsp_array;
Int4 q_start, s_start;
BlastHitSavingOptions* hit_options = hit_params->options;
+ BlastScoringOptions* score_options = score_params->options;
Int4 context_offset;
Uint1* translation_buffer = NULL;
Int4* frame_offsets = NULL;
Boolean partial_translation = FALSE;
const Boolean k_is_ooframe = score_options->is_ooframe;
- const Boolean kGreedyTraceback = (ext_options->algorithm_type == EXTEND_GREEDY_NO_TRACEBACK);
+ const Boolean kGreedyTraceback = (ext_options->eTbackExt == eGreedyTbck);
const Boolean kTranslateSubject =
(program_number == blast_type_tblastn ||
program_number == blast_type_rpstblastn);
@@ -702,9 +710,8 @@ Blast_TracebackFromHSPList(Uint1 program_number, BlastHSPList* hsp_list,
if (hit_options->phi_align) {
Int4 pat_length = GetPatternLengthFromBlastHSP(hsp);
SavePatternLengthInGapAlignStruct(pat_length, gap_align);
- PHIGappedAlignmentWithTraceback(program_number, query, subject,
- gap_align, score_options, q_start, s_start, query_length,
- subject_length);
+ PHIGappedAlignmentWithTraceback(query, subject, gap_align,
+ score_params, q_start, s_start, query_length, subject_length);
} else {
if (!kTranslateSubject) {
AdjustSubjectRange(&s_start, &adjusted_s_length, q_start,
@@ -714,10 +721,10 @@ Blast_TracebackFromHSPList(Uint1 program_number, BlastHSPList* hsp_list,
if (kGreedyTraceback) {
BLAST_GreedyGappedAlignment(query, adjusted_subject,
query_length, adjusted_s_length, gap_align,
- score_options, q_start, s_start, FALSE, TRUE);
+ score_params, q_start, s_start, FALSE, TRUE);
} else {
BLAST_GappedAlignmentWithTraceback(program_number, query,
- adjusted_subject, gap_align, score_options, q_start, s_start,
+ adjusted_subject, gap_align, score_params, q_start, s_start,
query_length, adjusted_s_length);
}
}
@@ -745,12 +752,11 @@ Blast_TracebackFromHSPList(Uint1 program_number, BlastHSPList* hsp_list,
/* Low level greedy algorithm ignores ambiguities, so the score
needs to be reevaluated. */
Blast_HSPReevaluateWithAmbiguities(hsp, query, adjusted_subject,
- hit_options, score_options, query_info, sbp);
+ hit_options, score_params, query_info, sbp);
}
keep = HSPSetScores(query_info, query, adjusted_subject, hsp,
- program_number, sbp, psi_options,
- score_options, hit_options);
+ program_number, sbp, score_params, hit_params);
HSPAdjustSubjectOffset(hsp, subject_blk, k_is_ooframe,
start_shift);
@@ -783,22 +789,10 @@ Blast_TracebackFromHSPList(Uint1 program_number, BlastHSPList* hsp_list,
Blast_HSPListPurgeNullHSPs(hsp_list);
/* Relink and rereap the HSP list, if needed. */
-
- if (program_number == blast_type_blastx ||
- program_number == blast_type_tblastn ||
- program_number == blast_type_rpstblastn) {
-
- if (hit_params->do_sum_stats == TRUE) {
- BLAST_LinkHsps(program_number, hsp_list, query_info, subject_blk,
- sbp, hit_params, score_options->gapped_calculation);
- } else if (hit_options->phi_align) {
- Blast_HSPListPHIGetEvalues(hsp_list, sbp);
- } else {
- Blast_HSPListGetEvalues(program_number, query_info, hsp_list,
- score_options->gapped_calculation, sbp);
- }
-
- Blast_HSPListReapByEvalue(hsp_list, hit_options);
+ if (hit_params->do_sum_stats) {
+ BLAST_LinkHsps(program_number, hsp_list, query_info, subject_blk,
+ sbp, hit_params, score_options->gapped_calculation);
+ Blast_HSPListReapByEvalue(hsp_list, hit_options);
}
qsort(hsp_array, hsp_list->hspcnt, sizeof(BlastHSP*), score_compare_hsps);
@@ -816,7 +810,7 @@ Blast_TracebackFromHSPList(Uint1 program_number, BlastHSPList* hsp_list,
return 0;
}
-static Uint1 GetTracebackEncoding(Uint1 program_number)
+Uint1 Blast_TracebackGetEncoding(Uint1 program_number)
{
Uint1 encoding;
@@ -869,25 +863,25 @@ BlastPruneExtraHits(BlastHSPResults* results, Int4 hitlist_size)
}
}
-Int2 BLAST_ComputeTraceback(Uint1 program_number, BlastHSPResults* results,
+Int2 BLAST_ComputeTraceback(Uint1 program_number, BlastHSPStream* hsp_stream,
BLAST_SequenceBlk* query, BlastQueryInfo* query_info,
const BlastSeqSrc* seq_src, BlastGapAlignStruct* gap_align,
- const BlastScoringOptions* score_options,
+ BlastScoringParameters* score_params,
const BlastExtensionParameters* ext_params,
BlastHitSavingParameters* hit_params,
BlastEffectiveLengthsParameters* eff_len_params,
const BlastDatabaseOptions* db_options,
- const PSIBlastOptions* psi_options)
+ const PSIBlastOptions* psi_options,
+ BlastHSPResults** results_out)
{
Int2 status = 0;
- Int4 query_index, subject_index;
- BlastHitList* hit_list;
- BlastHSPList* hsp_list;
+ BlastHSPResults* results = NULL;
+ BlastHSPList* hsp_list = NULL;
BlastScoreBlk* sbp;
Uint1 encoding;
GetSeqArg seq_arg;
- if (!results || !query_info || !seq_src) {
+ if (!query_info || !seq_src || !hsp_stream || !results_out) {
return 0;
}
@@ -897,44 +891,55 @@ Int2 BLAST_ComputeTraceback(Uint1 program_number, BlastHSPResults* results,
sbp = gap_align->sbp;
- encoding = GetTracebackEncoding(program_number);
+ encoding = Blast_TracebackGetEncoding(program_number);
memset((void*) &seq_arg, 0, sizeof(seq_arg));
- for (query_index = 0; query_index < results->num_queries; ++query_index) {
- hit_list = results->hitlist_array[query_index];
-
- if (!hit_list)
- continue;
- for (subject_index = 0; subject_index < hit_list->hsplist_count;
- ++subject_index) {
- hsp_list = hit_list->hsplist_array[subject_index];
- if (!hsp_list)
- continue;
+ Blast_HSPResultsInit(query_info->num_queries, &results);
- if (!hsp_list->traceback_done) {
+ if (program_number == blast_type_blastp &&
+ (ext_params->options->compositionBasedStats == TRUE ||
+ ext_params->options->eTbackExt == eSmithWatermanTbck)) {
+ Kappa_RedoAlignmentCore(query, query_info, sbp, hsp_stream, seq_src,
+ score_params, ext_params, hit_params, psi_options, results);
+ } else {
+ Boolean perform_traceback =
+ (score_params->options->gapped_calculation &&
+ (ext_params->options->ePrelimGapExt != eGreedyWithTracebackExt) &&
+ (ext_params->options->eTbackExt != eSkipTbck));
+
+ while (BlastHSPStreamRead(hsp_stream, &hsp_list)
+ != kBlastHSPStream_Eof) {
+ /* Perform traceback here, if necessary. */
+ if (perform_traceback) {
seq_arg.oid = hsp_list->oid;
seq_arg.encoding = encoding;
BlastSequenceBlkClean(seq_arg.seq);
if (BLASTSeqSrcGetSequence(seq_src, (void*) &seq_arg) < 0)
- continue;
-
+ continue;
+
if (BLASTSeqSrcGetTotLen(seq_src) == 0) {
/* This is not a database search, so effective search spaces
need to be recalculated based on this subject sequence
length */
if ((status = BLAST_OneSubjectUpdateParameters(program_number,
- seq_arg.seq->length, score_options,
+ seq_arg.seq->length, score_params->options,
query_info, sbp, ext_params, hit_params,
NULL, eff_len_params)) != 0)
return status;
}
Blast_TracebackFromHSPList(program_number, hsp_list, query,
- seq_arg.seq, query_info, gap_align, sbp, score_options,
- ext_params->options, hit_params, db_options->gen_code_string,
- psi_options);
+ seq_arg.seq, query_info, gap_align, sbp, score_params,
+ ext_params->options, hit_params, db_options->gen_code_string);
BLASTSeqSrcRetSequence(seq_src, (void*)&seq_arg);
}
+
+ /* Recalculate the bit scores, as they might have changed. */
+ Blast_HSPListGetBitScores(hsp_list,
+ score_params->options->gapped_calculation, sbp);
+
+ Blast_HSPResultsInsertHSPList(results, hsp_list,
+ hit_params->options->hitlist_size);
}
}
@@ -951,13 +956,15 @@ Int2 BLAST_ComputeTraceback(Uint1 program_number, BlastHSPResults* results,
BlastSequenceBlkFree(seq_arg.seq);
+ *results_out = results;
+
return status;
}
#define SWAP(a, b) {tmp = (a); (a) = (b); (b) = tmp; }
static void
-RPSUpdateTraceback(BlastHSP *hsp)
+Blast_HSPRPSUpdate(BlastHSP *hsp)
{
Int4 tmp;
GapEditBlock *gap_info = hsp->gap_info;
@@ -974,17 +981,17 @@ RPSUpdateTraceback(BlastHSP *hsp)
esp = gap_info->esp;
while (esp != NULL) {
- if (esp->op_type == GAPALIGN_INS)
- esp->op_type = GAPALIGN_DEL;
- else if (esp->op_type == GAPALIGN_DEL)
- esp->op_type = GAPALIGN_INS;
+ if (esp->op_type == eGapAlignIns)
+ esp->op_type = eGapAlignDel;
+ else if (esp->op_type == eGapAlignDel)
+ esp->op_type = eGapAlignIns;
esp = esp->next;
}
}
static void
-RPSUpdateHSPList(BlastHSPList *hsplist)
+Blast_HSPListRPSUpdate(BlastHSPList *hsplist)
{
Int4 i;
BlastHSP **hsp;
@@ -1003,32 +1010,35 @@ RPSUpdateHSPList(BlastHSPList *hsplist)
/* Change the traceback information to reflect the
query and subject sequences getting switched */
- RPSUpdateTraceback(hsp[i]);
+ Blast_HSPRPSUpdate(hsp[i]);
}
}
#define RPS_K_MULT 1.2
Int2 BLAST_RPSTraceback(Uint1 program_number,
- BlastHSPResults* results,
+ BlastHSPStream* hsp_stream,
BLAST_SequenceBlk* concat_db, BlastQueryInfo* concat_db_info,
BLAST_SequenceBlk* query, BlastQueryInfo* query_info,
BlastGapAlignStruct* gap_align,
- const BlastScoringOptions* score_options,
+ const BlastScoringParameters* score_params,
const BlastExtensionParameters* ext_params,
BlastHitSavingParameters* hit_params,
const BlastDatabaseOptions* db_options,
- const PSIBlastOptions* psi_options,
- const double* karlin_k)
+ const double* karlin_k,
+ BlastHSPResults** results_out)
{
Int2 status = 0;
- Int4 i;
- BlastHitList* hit_list;
BlastHSPList* hsp_list;
BlastScoreBlk* sbp;
Int4 **orig_pssm;
+ Int4 offsets[2];
+ BLAST_SequenceBlk one_db_seq;
+ BlastQueryInfo one_db_seq_info;
+ Int4 *db_seq_start;
+ BlastHSPResults* results = NULL;
- if (!results || !concat_db_info || !concat_db) {
+ if (!hsp_stream || !concat_db_info || !concat_db || !results_out) {
return 0;
}
@@ -1039,94 +1049,83 @@ Int2 BLAST_RPSTraceback(Uint1 program_number,
sbp = gap_align->sbp;
orig_pssm = gap_align->sbp->posMatrix;
- hit_list = results->hitlist_array[0];
- if (!hit_list)
- return 0;
-
- /* for translated searches, the traceback code calculates
- E values *after* the scaling factor has been removed from
- the alignment scores. Thus, lambda must not be pre-scaled
- for a translated search */
-
- if (program_number != blast_type_rpstblastn)
- sbp->kbp_gap[0]->Lambda /= psi_options->scalingFactor;
+ Blast_HSPResultsInit(query_info->num_queries, &results);
- for (i = 0; i < hit_list->hsplist_count; i++) {
- hsp_list = hit_list->hsplist_array[i];
+ while (BlastHSPStreamRead(hsp_stream, &hsp_list)
+ != kBlastHSPStream_Eof) {
if (!hsp_list)
continue;
- if (!hsp_list->traceback_done) {
-
- Int4 offsets[2];
- BLAST_SequenceBlk one_db_seq;
- BlastQueryInfo one_db_seq_info;
- Int4 *db_seq_start;
-
- /* pick out one of the sequences from the concatenated
- DB (given by the OID of this HSPList). The sequence
- size does not include the trailing NULL */
-
- db_seq_start = &concat_db_info->context_offsets[hsp_list->oid];
- memset(&one_db_seq, 0, sizeof(one_db_seq));
- one_db_seq.sequence = NULL;
- one_db_seq.length = db_seq_start[1] - db_seq_start[0] - 1;
-
- /* Set up the QueryInfo structure for this sequence. The
- trailing NULL must be added back */
-
- offsets[0] = 0;
- offsets[1] = one_db_seq.length + 1;
-
- memset(&one_db_seq_info, 0, sizeof(one_db_seq_info));
- one_db_seq_info.first_context = 0;
- one_db_seq_info.last_context = 0;
- one_db_seq_info.num_queries = 1;
- one_db_seq_info.context_offsets = &offsets[0];
- one_db_seq_info.eff_searchsp_array = query_info->eff_searchsp_array;
-
- /* Update the statistics for this database sequence
- (if not a translated search) */
-
- if (program_number == blast_type_rpstblastn) {
- sbp->posMatrix = orig_pssm + db_seq_start[0];
- }
- else {
- /* replace the PSSM and the Karlin values
- for this DB sequence. */
-
- sbp->posMatrix = RPSCalculatePSSM(psi_options->scalingFactor,
- query->length, query->sequence, one_db_seq.length,
- orig_pssm + db_seq_start[0]);
- if (sbp->posMatrix == NULL)
- return -1;
-
- sbp->kbp_gap[0]->K = RPS_K_MULT * karlin_k[hsp_list->oid];
- sbp->kbp_gap[0]->logK = log(RPS_K_MULT * karlin_k[hsp_list->oid]);
- }
-
- /* compute the traceback information and calculate E values
- for all HSPs in the list */
+ /* pick out one of the sequences from the concatenated
+ DB (given by the OID of this HSPList). The sequence
+ size does not include the trailing NULL */
+
+ db_seq_start = &concat_db_info->context_offsets[hsp_list->oid];
+ memset(&one_db_seq, 0, sizeof(one_db_seq));
+ one_db_seq.sequence = NULL;
+ one_db_seq.length = db_seq_start[1] - db_seq_start[0] - 1;
+
+ /* Set up the QueryInfo structure for this sequence. The
+ trailing NULL must be added back */
+
+ offsets[0] = 0;
+ offsets[1] = one_db_seq.length + 1;
+
+ memset(&one_db_seq_info, 0, sizeof(one_db_seq_info));
+ one_db_seq_info.first_context = 0;
+ one_db_seq_info.last_context = 0;
+ one_db_seq_info.num_queries = 1;
+ one_db_seq_info.context_offsets = &offsets[0];
+ one_db_seq_info.eff_searchsp_array = query_info->eff_searchsp_array;
+
+ /* Update the statistics for this database sequence
+ (if not a translated search) */
+
+ if (program_number == blast_type_rpstblastn) {
+ sbp->posMatrix = orig_pssm + db_seq_start[0];
+ } else {
+ /* replace the PSSM and the Karlin values for this DB sequence. */
+ sbp->posMatrix =
+ RPSCalculatePSSM(score_params->scale_factor,
+ query->length, query->sequence, one_db_seq.length,
+ orig_pssm + db_seq_start[0]);
+ if (sbp->posMatrix == NULL)
+ return -1;
+
+ sbp->kbp_gap[0]->K = RPS_K_MULT * karlin_k[hsp_list->oid];
+ sbp->kbp_gap[0]->logK = log(RPS_K_MULT * karlin_k[hsp_list->oid]);
+ }
- Blast_TracebackFromHSPList(program_number, hsp_list, &one_db_seq,
- query, &one_db_seq_info, gap_align, sbp, score_options,
- ext_params->options, hit_params, db_options->gen_code_string,
- psi_options);
+ /* compute the traceback information and calculate E values
+ for all HSPs in the list */
+
+ Blast_TracebackFromHSPList(program_number, hsp_list, &one_db_seq,
+ query, &one_db_seq_info, gap_align, sbp, score_params,
+ ext_params->options, hit_params, db_options->gen_code_string);
- if (program_number != blast_type_rpstblastn)
- _PSIDeallocateMatrix((void**)sbp->posMatrix, one_db_seq.length);
- }
+ if (program_number != blast_type_rpstblastn)
+ _PSIDeallocateMatrix((void**)sbp->posMatrix, one_db_seq.length+1);
/* Revert query and subject to their traditional meanings.
This involves switching the offsets around and reversing
any traceback information */
+ Blast_HSPListRPSUpdate(hsp_list);
- RPSUpdateHSPList(hsp_list);
+ /* Calculate and fill the bit scores. This is the only time when
+ they are calculated. */
+ Blast_HSPListGetBitScores(hsp_list,
+ score_params->options->gapped_calculation, sbp);
+
+ /* Save this HSP list in the results structure. */
+ Blast_HSPResultsInsertHSPList(results, hsp_list,
+ hit_params->options->hitlist_size);
}
- /* restore input data */
- if (program_number != blast_type_rpstblastn)
- sbp->kbp_gap[0]->Lambda *= psi_options->scalingFactor;
+ /* The traceback calculated the E values, so it's safe
+ to sort the results now */
+ Blast_HSPResultsSortByEvalue(results);
+
+ *results_out = results;
gap_align->sbp->posMatrix = orig_pssm;
return status;
diff --git a/algo/blast/core/blast_traceback.h b/algo/blast/core/blast_traceback.h
index 8ecfe130..7ca77785 100644
--- a/algo/blast/core/blast_traceback.h
+++ b/algo/blast/core/blast_traceback.h
@@ -1,49 +1,47 @@
-/* $Id: blast_traceback.h,v 1.26 2004/05/05 15:26:55 dondosha Exp $
-* ===========================================================================
-*
-* PUBLIC DOMAIN NOTICE
-* National Center for Biotechnology Information
-*
-* This software/database is a "United States Government Work" under the
-* terms of the United States Copyright Act. It was written as part of
-* the author's offical duties as a United States Government employee and
-* thus cannot be copyrighted. This software/database is freely available
-* to the public for use. The National Library of Medicine and the U.S.
-* Government have not placed any restriction on its use or reproduction.
-*
-* Although all reasonable efforts have been taken to ensure the accuracy
-* and reliability of the software and data, the NLM and the U.S.
-* Government do not and cannot warrant the performance or results that
-* may be obtained by using this software or data. The NLM and the U.S.
-* Government disclaim all warranties, express or implied, including
-* warranties of performance, merchantability or fitness for any particular
-* purpose.
-*
-* Please cite the author in any work or product based on this material.
-*
-* ===========================================================================*/
-
-/*****************************************************************************
-
-File name: blast_traceback.h
-
-Author: Ilya Dondoshansky
+/* $Id: blast_traceback.h,v 1.33 2004/06/16 14:53:03 dondosha Exp $
+ * ===========================================================================
+ *
+ * PUBLIC DOMAIN NOTICE
+ * National Center for Biotechnology Information
+ *
+ * This software/database is a "United States Government Work" under the
+ * terms of the United States Copyright Act. It was written as part of
+ * the author's offical duties as a United States Government employee and
+ * thus cannot be copyrighted. This software/database is freely available
+ * to the public for use. The National Library of Medicine and the U.S.
+ * Government have not placed any restriction on its use or reproduction.
+ *
+ * Although all reasonable efforts have been taken to ensure the accuracy
+ * and reliability of the software and data, the NLM and the U.S.
+ * Government do not and cannot warrant the performance or results that
+ * may be obtained by using this software or data. The NLM and the U.S.
+ * Government disclaim all warranties, express or implied, including
+ * warranties of performance, merchantability or fitness for any particular
+ * purpose.
+ *
+ * Please cite the author in any work or product based on this material.
+ *
+ * ===========================================================================
+ *
+ * Author: Ilya Dondoshansky
+ *
+ */
-Contents: Functions to do gapped alignment with traceback
+/** @file blast_traceback.h
+ * Functions to do gapped alignment with traceback
+ */
-******************************************************************************
- * $Revision: 1.26 $
- * */
#ifndef __BLAST_TRACEBACK__
#define __BLAST_TRACEBACK__
+#include <algo/blast/core/blast_seqsrc.h>
+#include <algo/blast/core/blast_gapalign.h>
+#include <algo/blast/core/blast_hspstream.h>
+
#ifdef __cplusplus
extern "C" {
#endif
-#include <algo/blast/core/blast_seqsrc.h>
-#include <algo/blast/core/blast_gapalign.h>
-
/** Compute gapped alignment with traceback for all HSPs from a single
* query/subject sequence pair.
* Final e-values are calculated here, except when sum statistics is used,
@@ -57,50 +55,49 @@ extern "C" {
* start of this query within the concatenated sequence [in]
* @param gap_align Auxiliary structure used for gapped alignment [in]
* @param sbp Statistical parameters [in]
- * @param score_options Scoring parameters [in]
+ * @param score_params Scoring parameters (esp. scale factor) [in]
* @param ext_options Gapped extension options [in]
* @param hit_params Hit saving parameters [in]
* @param gen_code_string specifies genetic code [in]
- * @param psi_options Options specific to PSI BLAST [in]
*/
Int2
Blast_TracebackFromHSPList(Uint1 program_number, BlastHSPList* hsp_list,
BLAST_SequenceBlk* query_blk, BLAST_SequenceBlk* subject_blk,
BlastQueryInfo* query_info,
BlastGapAlignStruct* gap_align, BlastScoreBlk* sbp,
- const BlastScoringOptions* score_options,
+ const BlastScoringParameters* score_params,
const BlastExtensionOptions* ext_options,
const BlastHitSavingParameters* hit_params,
- const Uint1* gen_code_string,
- const PSIBlastOptions* psi_options);
+ const Uint1* gen_code_string);
/** Given the preliminary alignment results from a database search, redo
* the gapped alignment with traceback, if it has not yet been done.
* @param program_number Type of the BLAST program [in]
- * @param results Results of this BLAST search [in] [out]
+ * @param hsp_stream A stream for reading HSP lists [in]
* @param query The query sequence [in]
* @param query_info Information about the query [in]
* @param bssp BLAST database structure [in]
* @param gap_align The auxiliary structure for gapped alignment [in]
- * @param score_options The scoring related options [in]
+ * @param score_params Scoring parameters (esp. scale factor) [in]
* @param ext_params Gapped extension parameters [in]
* @param hit_params Parameters for saving hits. Can change if not a
database search [in]
* @param eff_len_params Parameters for recalculating effective search
* space. Can change if not a database search. [in]
* @param db_options Options containing database genetic code string [in]
- * @param psi_options Options specific to PSI BLAST [in]
+ * @param psi_options Options for iterative searches [in]
+ * @param results All results from the BLAST search [out]
* @return nonzero indicates failure, otherwise zero
*/
-Int2 BLAST_ComputeTraceback(Uint1 program_number, BlastHSPResults* results,
+Int2 BLAST_ComputeTraceback(Uint1 program_number, BlastHSPStream* hsp_stream,
BLAST_SequenceBlk* query, BlastQueryInfo* query_info,
const BlastSeqSrc* bssp, BlastGapAlignStruct* gap_align,
- const BlastScoringOptions* score_options,
+ BlastScoringParameters* score_params,
const BlastExtensionParameters* ext_params,
BlastHitSavingParameters* hit_params,
BlastEffectiveLengthsParameters* eff_len_params,
const BlastDatabaseOptions* db_options,
- const PSIBlastOptions* psi_options);
+ const PSIBlastOptions* psi_options, BlastHSPResults** results);
/** Compute traceback information for alignments found by an
* RPS blast search. This function performs two major tasks:
@@ -114,9 +111,7 @@ Int2 BLAST_ComputeTraceback(Uint1 program_number, BlastHSPResults* results,
* exists to compute E-values for alignments that are found.
*
* @param program_number Type of the BLAST program [in]
- * @param results Structure containing the single HSPList
- * that is the result of a call to Blast_HSPResultsRPSUpdate.
- * Traceback information is added to HSPs in list [in] [out]
+ * @param hsp_stream A stream for reading HSP lists [in]
* @param concat_db The concatentation of all RPS DB sequences.
* The sequence data itself is not needed,
* only its size [in]
@@ -126,30 +121,35 @@ Int2 BLAST_ComputeTraceback(Uint1 program_number, BlastHSPResults* results,
* @param query_info Information associated with the original query.
* Only used for the search space [in]
* @param gap_align The auxiliary structure for gapped alignment [in]
- * @param score_options The scoring related options [in]
+ * @param score_params Scoring parameters (esp. scale factor) [in]
* @param ext_params Gapped extension parameters [in]
* @param hit_params Parameters for saving hits. Can change if not a
database search [in]
* @param db_options Options containing database genetic code string [in]
- * @param psi_options Options specific to PSI BLAST. Only used for
- * the scaling factor at present [in]
* @param karlin_k Array of Karlin values, one for each database
* sequence. Used for E-value calculation [in]
+ * @param results Results structure containing all HSPs, with added
+ * traceback information. [out]
* @return nonzero indicates failure, otherwise zero
*/
Int2 BLAST_RPSTraceback(Uint1 program_number,
- BlastHSPResults* results,
+ BlastHSPStream* hsp_stream,
BLAST_SequenceBlk* concat_db,
BlastQueryInfo* concat_db_info,
BLAST_SequenceBlk* query,
BlastQueryInfo* query_info,
BlastGapAlignStruct* gap_align,
- const BlastScoringOptions* score_options,
+ const BlastScoringParameters* score_params,
const BlastExtensionParameters* ext_params,
BlastHitSavingParameters* hit_params,
const BlastDatabaseOptions* db_options,
- const PSIBlastOptions* psi_options,
- const double* karlin_k);
+ const double* karlin_k,
+ BlastHSPResults** results);
+
+/** Get the subject sequence encoding type for the traceback,
+ * given a program number.
+ */
+Uint1 Blast_TracebackGetEncoding(Uint1 program_number);
#ifdef __cplusplus
}
diff --git a/algo/blast/core/blast_util.c b/algo/blast/core/blast_util.c
index 44d35a59..23a54324 100644
--- a/algo/blast/core/blast_util.c
+++ b/algo/blast/core/blast_util.c
@@ -1,46 +1,45 @@
-/* $Id: blast_util.c,v 1.66 2004/04/19 18:34:19 madden Exp $
-* ===========================================================================
-*
-* PUBLIC DOMAIN NOTICE
-* National Center for Biotechnology Information
-*
-* This software/database is a "United States Government Work" under the
-* terms of the United States Copyright Act. It was written as part of
-* the author's offical duties as a United States Government employee and
-* thus cannot be copyrighted. This software/database is freely available
-* to the public for use. The National Library of Medicine and the U.S.
-* Government have not placed any restriction on its use or reproduction.
-*
-* Although all reasonable efforts have been taken to ensure the accuracy
-* and reliability of the software and data, the NLM and the U.S.
-* Government do not and cannot warrant the performance or results that
-* may be obtained by using this software or data. The NLM and the U.S.
-* Government disclaim all warranties, express or implied, including
-* warranties of performance, merchantability or fitness for any particular
-* purpose.
-*
-* Please cite the author in any work or product based on this material.
-*
-* ===========================================================================*/
-
-/*****************************************************************************
-
-File name: blast_util.c
-
-Author: Ilya Dondoshansky
-
-Contents: Various BLAST utilities
-
-******************************************************************************
- * $Revision: 1.66 $
- * */
+/* $Id: blast_util.c,v 1.71 2004/06/07 14:23:04 dondosha Exp $
+ * ===========================================================================
+ *
+ * PUBLIC DOMAIN NOTICE
+ * National Center for Biotechnology Information
+ *
+ * This software/database is a "United States Government Work" under the
+ * terms of the United States Copyright Act. It was written as part of
+ * the author's offical duties as a United States Government employee and
+ * thus cannot be copyrighted. This software/database is freely available
+ * to the public for use. The National Library of Medicine and the U.S.
+ * Government have not placed any restriction on its use or reproduction.
+ *
+ * Although all reasonable efforts have been taken to ensure the accuracy
+ * and reliability of the software and data, the NLM and the U.S.
+ * Government do not and cannot warrant the performance or results that
+ * may be obtained by using this software or data. The NLM and the U.S.
+ * Government disclaim all warranties, express or implied, including
+ * warranties of performance, merchantability or fitness for any particular
+ * purpose.
+ *
+ * Please cite the author in any work or product based on this material.
+ *
+ * ===========================================================================
+ *
+ * Author: Ilya Dondoshansky
+ *
+ */
+
+/** @file blast_util.c
+ * Various BLAST utilities
+ */
+
+
+static char const rcsid[] =
+ "$Id: blast_util.c,v 1.71 2004/06/07 14:23:04 dondosha Exp $";
#include <algo/blast/core/blast_def.h>
#include <algo/blast/core/blast_util.h>
#include <algo/blast/core/blast_encoding.h>
#include <algo/blast/core/blast_filter.h>
-static char const rcsid[] = "$Id: blast_util.c,v 1.66 2004/04/19 18:34:19 madden Exp $";
Int2
BlastSetUp_SeqBlkNew (const Uint1* buffer, Int4 length, Int4 context,
@@ -672,7 +671,25 @@ Int2 BLAST_ContextToFrame(Uint1 prog_number, Int4 context_number)
return frame;
}
-Int4 BLAST_GetQueryLength(BlastQueryInfo* query_info, Int4 context)
+Int4
+Blast_GetQueryIndexFromContext(Int4 context, Uint1 program)
+{
+ Int4 index = 0;
+ switch (program) {
+ case blast_type_blastn:
+ index = context/NUM_STRANDS; break;
+ case blast_type_blastp: case blast_type_tblastn:
+ case blast_type_rpsblast: case blast_type_rpstblastn:
+ index = context; break;
+ case blast_type_blastx: case blast_type_tblastx:
+ index = context/NUM_FRAMES; break;
+ default:
+ break;
+ }
+ return index;
+}
+
+Int4 BLAST_GetQueryLength(const BlastQueryInfo* query_info, Int4 context)
{
return query_info->context_offsets[context+1] -
query_info->context_offsets[context] - 1;
@@ -687,6 +704,20 @@ BlastQueryInfo* BlastQueryInfoFree(BlastQueryInfo* query_info)
return NULL;
}
+BlastQueryInfo* BlastQueryInfoDup(BlastQueryInfo* query_info)
+{
+ BlastQueryInfo* retval = BlastMemDup(query_info, sizeof(BlastQueryInfo));
+ Int4 num_contexts = query_info->last_context + 1;
+
+ retval->context_offsets =
+ BlastMemDup(query_info->context_offsets, (num_contexts+1)*sizeof(Int4));
+ retval->length_adjustments =
+ BlastMemDup(query_info->length_adjustments, num_contexts*sizeof(Int4));
+ retval->eff_searchsp_array =
+ BlastMemDup(query_info->eff_searchsp_array, num_contexts*sizeof(Int8));
+ return retval;
+}
+
/** Convert a sequence in ncbi4na or blastna encoding into a packed sequence
* in ncbi2na encoding. Needed for 2 sequences BLASTn comparison.
*/
@@ -915,8 +946,8 @@ Int2 BLAST_GetAllTranslations(const Uint1* nucl_seq, Uint1 encoding,
seq = mixed_seq;
for (index = 0; index < NUM_FRAMES; index += CODON_LENGTH) {
for (i = 0; i <= nucl_length; ++i) {
- context = i % 3;
- offset = i / 3;
+ context = i % CODON_LENGTH;
+ offset = i / CODON_LENGTH;
*seq++ = translation_buffer[frame_offsets[index+context]+offset];
}
}
@@ -970,7 +1001,7 @@ int GetPartialTranslation(const Uint1* nucl_seq,
for (index = 1; index <= 3; ++index) {
length =
BLAST_GetTranslation(nucl_seq, nucl_seq_rev,
- nucl_length, frame_sign*index, translation_buffer+offset,
+ nucl_length, (short)(frame_sign*index), translation_buffer+offset,
genetic_code);
frame_offsets[index-1] = offset;
offset += length + 1;
diff --git a/algo/blast/core/blast_util.h b/algo/blast/core/blast_util.h
index 67b9ed07..77a00d7c 100644
--- a/algo/blast/core/blast_util.h
+++ b/algo/blast/core/blast_util.h
@@ -1,48 +1,39 @@
-/* $Id: blast_util.h,v 1.47 2004/04/21 18:34:55 gorelenk Exp $
-* ===========================================================================
-*
-* PUBLIC DOMAIN NOTICE
-* National Center for Biotechnology Information
-*
-* This software/database is a "United States Government Work" under the
-* terms of the United States Copyright Act. It was written as part of
-* the author's offical duties as a United States Government employee and
-* thus cannot be copyrighted. This software/database is freely available
-* to the public for use. The National Library of Medicine and the U.S.
-* Government have not placed any restriction on its use or reproduction.
-*
-* Although all reasonable efforts have been taken to ensure the accuracy
-* and reliability of the software and data, the NLM and the U.S.
-* Government do not and cannot warrant the performance or results that
-* may be obtained by using this software or data. The NLM and the U.S.
-* Government disclaim all warranties, express or implied, including
-* warranties of performance, merchantability or fitness for any particular
-* purpose.
-*
-* Please cite the author in any work or product based on this material.
-*
-* ===========================================================================*/
-
-/*****************************************************************************
-
-File name: blast_util.h
-
-Author: Ilya Dondoshansky
-
-Contents: Various auxiliary BLAST utility functions
-
-Detailed Contents:
-
-******************************************************************************
- * $Revision: 1.47 $
- * */
+/* $Id: blast_util.h,v 1.51 2004/06/16 14:53:03 dondosha Exp $
+ * ===========================================================================
+ *
+ * PUBLIC DOMAIN NOTICE
+ * National Center for Biotechnology Information
+ *
+ * This software/database is a "United States Government Work" under the
+ * terms of the United States Copyright Act. It was written as part of
+ * the author's offical duties as a United States Government employee and
+ * thus cannot be copyrighted. This software/database is freely available
+ * to the public for use. The National Library of Medicine and the U.S.
+ * Government have not placed any restriction on its use or reproduction.
+ *
+ * Although all reasonable efforts have been taken to ensure the accuracy
+ * and reliability of the software and data, the NLM and the U.S.
+ * Government do not and cannot warrant the performance or results that
+ * may be obtained by using this software or data. The NLM and the U.S.
+ * Government disclaim all warranties, express or implied, including
+ * warranties of performance, merchantability or fitness for any particular
+ * purpose.
+ *
+ * Please cite the author in any work or product based on this material.
+ *
+ * ===========================================================================
+ *
+ * Author: Ilya Dondoshansky
+ *
+ */
+
+/** @file blast_util.h
+ * Various auxiliary BLAST utility functions
+ */
+
#ifndef __BLAST_UTIL__
#define __BLAST_UTIL__
-#ifdef __cplusplus
-extern "C" {
-#endif
-
#include <algo/blast/core/blast_def.h>
#ifdef NCBI_DLL_BUILD
@@ -52,6 +43,10 @@ extern "C" {
#define NCBI_XBLAST_EXPORT
#endif
+#ifdef __cplusplus
+extern "C" {
+#endif
+
/** Different types of sequence encodings for sequence retrieval from the
* BLAST database
*/
@@ -215,6 +210,13 @@ Int2 GetReverseNuclSequence(const Uint1* sequence, Int4 length,
*/
Int2 BLAST_ContextToFrame(Uint1 prog_number, Int4 context_number);
+/** Given a context from BLAST engine core, return the query index.
+ * @param context Context saved in a BlastHSP structure [in]
+ * @param program Type of BLAST program [in]
+ * @return Query index in a set of queries.
+ */
+Int4 Blast_GetQueryIndexFromContext(Int4 context, Uint1 program);
+
/** Find the length of an individual query within a concatenated set of
* queries.
* @param query_info Queries information structure containing offsets into
@@ -223,11 +225,14 @@ Int2 BLAST_ContextToFrame(Uint1 prog_number, Int4 context_number);
* set [in]
* @return Length of the individual sequence/strand/frame.
*/
-Int4 BLAST_GetQueryLength(BlastQueryInfo* query_info, Int4 context);
+Int4 BLAST_GetQueryLength(const BlastQueryInfo* query_info, Int4 context);
/** Deallocate memory for query information structure */
BlastQueryInfo* BlastQueryInfoFree(BlastQueryInfo* query_info);
+/** Duplicates the query information structure */
+BlastQueryInfo* BlastQueryInfoDup(BlastQueryInfo* query_info);
+
Int2 BLAST_PackDNA(Uint1* buffer, Int4 length, Uint1 encoding,
Uint1** packed_seq);
diff --git a/algo/blast/core/gapinfo.c b/algo/blast/core/gapinfo.c
index 4397a56d..dda7e5f6 100644
--- a/algo/blast/core/gapinfo.c
+++ b/algo/blast/core/gapinfo.c
@@ -1,41 +1,39 @@
-/* $Id: gapinfo.c,v 1.7 2003/08/11 15:02:00 dondosha Exp $
-* ===========================================================================
-*
-* PUBLIC DOMAIN NOTICE
-* National Center for Biotechnology Information
-*
-* This software/database is a "United States Government Work" under the
-* terms of the United States Copyright Act. It was written as part of
-* the author's offical duties as a United States Government employee and
-* thus cannot be copyrighted. This software/database is freely available
-* to the public for use. The National Library of Medicine and the U.S.
-* Government have not placed any restriction on its use or reproduction.
-*
-* Although all reasonable efforts have been taken to ensure the accuracy
-* and reliability of the software and data, the NLM and the U.S.
-* Government do not and cannot warrant the performance or results that
-* may be obtained by using this software or data. The NLM and the U.S.
-* Government disclaim all warranties, express or implied, including
-* warranties of performance, merchantability or fitness for any particular
-* purpose.
-*
-* Please cite the author in any work or product based on this material.
-*
-* ===========================================================================*/
-
-/*****************************************************************************
-
-File name: gapinfo.c
-
-Author: Ilya Dondoshansky
-
-Contents: Initialization and freeing of structures for gapped alignment
-
-******************************************************************************
- * $Revision: 1.7 $
- * */
-
-static char const rcsid[] = "$Id: gapinfo.c,v 1.7 2003/08/11 15:02:00 dondosha Exp $";
+/* $Id: gapinfo.c,v 1.8 2004/05/19 14:52:03 camacho Exp $
+ * ===========================================================================
+ *
+ * PUBLIC DOMAIN NOTICE
+ * National Center for Biotechnology Information
+ *
+ * This software/database is a "United States Government Work" under the
+ * terms of the United States Copyright Act. It was written as part of
+ * the author's offical duties as a United States Government employee and
+ * thus cannot be copyrighted. This software/database is freely available
+ * to the public for use. The National Library of Medicine and the U.S.
+ * Government have not placed any restriction on its use or reproduction.
+ *
+ * Although all reasonable efforts have been taken to ensure the accuracy
+ * and reliability of the software and data, the NLM and the U.S.
+ * Government do not and cannot warrant the performance or results that
+ * may be obtained by using this software or data. The NLM and the U.S.
+ * Government disclaim all warranties, express or implied, including
+ * warranties of performance, merchantability or fitness for any particular
+ * purpose.
+ *
+ * Please cite the author in any work or product based on this material.
+ *
+ * ===========================================================================
+ *
+ * Author: Ilya Dondoshansky
+ *
+ */
+
+/** @file gapinfo.c
+ * Initialization and freeing of structures for gapped alignment
+ */
+
+
+static char const rcsid[] =
+ "$Id: gapinfo.c,v 1.8 2004/05/19 14:52:03 camacho Exp $";
#include <algo/blast/core/gapinfo.h>
diff --git a/algo/blast/core/gapinfo.h b/algo/blast/core/gapinfo.h
index 3e241ea8..f985c0e2 100644
--- a/algo/blast/core/gapinfo.h
+++ b/algo/blast/core/gapinfo.h
@@ -1,57 +1,63 @@
-/* $Id: gapinfo.h,v 1.5 2003/08/11 14:57:16 dondosha Exp $
-* ===========================================================================
-*
-* PUBLIC DOMAIN NOTICE
-* National Center for Biotechnology Information
-*
-* This software/database is a "United States Government Work" under the
-* terms of the United States Copyright Act. It was written as part of
-* the author's offical duties as a United States Government employee and
-* thus cannot be copyrighted. This software/database is freely available
-* to the public for use. The National Library of Medicine and the U.S.
-* Government have not placed any restriction on its use or reproduction.
-*
-* Although all reasonable efforts have been taken to ensure the accuracy
-* and reliability of the software and data, the NLM and the U.S.
-* Government do not and cannot warrant the performance or results that
-* may be obtained by using this software or data. The NLM and the U.S.
-* Government disclaim all warranties, express or implied, including
-* warranties of performance, merchantability or fitness for any particular
-* purpose.
-*
-* Please cite the author in any work or product based on this material.
-*
-* ===========================================================================*/
+/* $Id: gapinfo.h,v 1.12 2004/06/16 14:53:03 dondosha Exp $
+ * ===========================================================================
+ *
+ * PUBLIC DOMAIN NOTICE
+ * National Center for Biotechnology Information
+ *
+ * This software/database is a "United States Government Work" under the
+ * terms of the United States Copyright Act. It was written as part of
+ * the author's offical duties as a United States Government employee and
+ * thus cannot be copyrighted. This software/database is freely available
+ * to the public for use. The National Library of Medicine and the U.S.
+ * Government have not placed any restriction on its use or reproduction.
+ *
+ * Although all reasonable efforts have been taken to ensure the accuracy
+ * and reliability of the software and data, the NLM and the U.S.
+ * Government do not and cannot warrant the performance or results that
+ * may be obtained by using this software or data. The NLM and the U.S.
+ * Government disclaim all warranties, express or implied, including
+ * warranties of performance, merchantability or fitness for any particular
+ * purpose.
+ *
+ * Please cite the author in any work or product based on this material.
+ *
+ * ===========================================================================
+ *
+ * Author: Ilya Dondoshansky
+ *
+ */
-/*****************************************************************************
-
-File name: gapinfo.h
-
-Author: Ilya Dondoshansky
-
-Contents: Structures definitions from gapxdrop.h in ncbitools
-
-******************************************************************************
- * $Revision: 1.5 $
- * */
+/** @file gapinfo.h
+ * Structures definitions from gapxdrop.h in ncbitools
+ * @todo FIXME: doxygen comments
+ */
#ifndef __GAPINFO__
#define __GAPINFO__
+#include <algo/blast/core/blast_def.h>
+
#ifdef __cplusplus
extern "C" {
#endif
-#include <algo/blast/core/blast_def.h>
-#define GAPALIGN_SUB ((Uint1)0) /*op types within the edit script*/
-#define GAPALIGN_INS ((Uint1)1)
-#define GAPALIGN_DEL ((Uint1)2)
-#define GAPALIGN_DECLINE ((Uint1)3)
+/** Operation types within the edit script*/
+typedef enum EGapAlignOpType {
+ eGapAlignDel = 0, /**< Deletion: a gap in query */
+ eGapAlignDel2 = 1,/**< Frame shift deletion of two nucleotides */
+ eGapAlignDel1 = 2,/**< Frame shift deletion of one nucleotide */
+ eGapAlignSub = 3, /**< Substitution */
+ eGapAlignIns1 = 4,/**< Frame shift insertion of one nucleotide */
+ eGapAlignIns2 = 5,/**< Frame shift insertion of two nucleotides */
+ eGapAlignIns = 6, /**< Insertion: a gap in subject */
+ eGapAlignDecline = 7 /**< Non-aligned region */
+} EGapAlignOpType;
+/** Edit script: linked list of correspondencies between two sequences */
typedef struct GapEditScript {
- Uint1 op_type; /* GAPALIGN_SUB, GAPALIGN_INS, or GAPALIGN_DEL */
- Int4 num; /* Number of operations */
- struct GapEditScript* next;
+ EGapAlignOpType op_type; /**< Type of operation */
+ Int4 num; /**< Number of operations */
+ struct GapEditScript* next; /**< Pointer to next link */
} GapEditScript;
typedef struct GapEditBlock {
diff --git a/algo/blast/core/greedy_align.c b/algo/blast/core/greedy_align.c
index b6a09a0a..f12683cb 100644
--- a/algo/blast/core/greedy_align.c
+++ b/algo/blast/core/greedy_align.c
@@ -1,45 +1,59 @@
-/* $Id: greedy_align.c,v 1.16 2004/03/29 20:57:57 dondosha Exp $
-* ===========================================================================
-*
-* PUBLIC DOMAIN NOTICE
-* National Center for Biotechnology Information
-*
-* This software/database is a "United States Government Work" under the
-* terms of the United States Copyright Act. It was written as part of
-* the author's official duties as a United States Government employee and
-* thus cannot be copyrighted. This software/database is freely available
-* to the public for use. The National Library of Medicine and the U.S.
-* Government have not placed any restriction on its use or reproduction.
-*
-* Although all reasonable efforts have been taken to ensure the accuracy
-* and reliability of the software and data, the NLM and the U.S.
-* Government do not and cannot warrant the performance or results that
-* may be obtained by using this software or data. The NLM and the U.S.
-* Government disclaim all warranties, express or implied, including
-* warranties of performance, merchantability or fitness for any particular
-* purpose.
-*
-* Please cite the author in any work or product based on this material.
-*
-* ===========================================================================
-*
-* File Name: $RCSfile: greedy_align.c,v $
-*
-* Author: Webb Miller and Co.
-* Adopted for NCBI standard libraries by Sergey Shavirin
-*
-* Initial Creation Date: 10/27/1999
-*
-* $Revision: 1.16 $
-*
-* File Description: Greedy gapped alignment functions
-*/
-
-static char const rcsid[] = "$Id: greedy_align.c,v 1.16 2004/03/29 20:57:57 dondosha Exp $";
+/* $Id: greedy_align.c,v 1.19 2004/05/19 14:52:03 camacho Exp $
+ * ===========================================================================
+ *
+ * PUBLIC DOMAIN NOTICE
+ * National Center for Biotechnology Information
+ *
+ * This software/database is a "United States Government Work" under the
+ * terms of the United States Copyright Act. It was written as part of
+ * the author's official duties as a United States Government employee and
+ * thus cannot be copyrighted. This software/database is freely available
+ * to the public for use. The National Library of Medicine and the U.S.
+ * Government have not placed any restriction on its use or reproduction.
+ *
+ * Although all reasonable efforts have been taken to ensure the accuracy
+ * and reliability of the software and data, the NLM and the U.S.
+ * Government do not and cannot warrant the performance or results that
+ * may be obtained by using this software or data. The NLM and the U.S.
+ * Government disclaim all warranties, express or implied, including
+ * warranties of performance, merchantability or fitness for any particular
+ * purpose.
+ *
+ * Please cite the author in any work or product based on this material.
+ *
+ * ===========================================================================
+ *
+ * Author: Webb Miller and Co. Adopted for NCBI libraries by Sergey Shavirin
+ *
+ * Initial Creation Date: 10/27/1999
+ *
+ */
+
+/** @file greedy_align.c
+ * Greedy gapped alignment functions
+ */
+
+static char const rcsid[] =
+ "$Id: greedy_align.c,v 1.19 2004/05/19 14:52:03 camacho Exp $";
#include <algo/blast/core/greedy_align.h>
#include <algo/blast/core/blast_util.h> /* for NCBI2NA_UNPACK_BASE macros */
+enum {
+ EDIT_OP_MASK = 0x3,
+ EDIT_OP_ERR = 0x0,
+ EDIT_OP_INS = 0x1,
+ EDIT_OP_DEL = 0x2,
+ EDIT_OP_REP = 0x3
+};
+
+enum { /* half of the (fixed) match score */
+ ERROR_FRACTION=2, /* 1/this */
+ MAX_SPACE=1000000,
+ sC = 0, sI = 1, sD = 2, LARGE=100000000
+};
+
+
/* -------- From original file edit.c ------------- */
static Uint4 edit_val_get(edit_op_t op)
@@ -205,14 +219,14 @@ static MBGapEditScript *edit_script_reverse_inplace(MBGapEditScript *es)
return es;
}
-MBSpace* MBSpaceNew()
+SMBSpace* MBSpaceNew()
{
- MBSpace* p;
+ SMBSpace* p;
Int4 amount;
- p = (MBSpace*) malloc(sizeof(MBSpace));
+ p = (SMBSpace*) malloc(sizeof(SMBSpace));
amount = MAX_SPACE;
- p->space_array = (ThreeVal*) malloc(sizeof(ThreeVal)*amount);
+ p->space_array = (SThreeVal*) malloc(sizeof(SThreeVal)*amount);
if (p->space_array == NULL) {
sfree(p);
return NULL;
@@ -224,7 +238,7 @@ MBSpace* MBSpaceNew()
return p;
}
-static void refresh_mb_space(MBSpace* sp)
+static void refresh_mb_space(SMBSpace* sp)
{
while (sp) {
sp->used = 0;
@@ -232,9 +246,9 @@ static void refresh_mb_space(MBSpace* sp)
}
}
-void MBSpaceFree(MBSpace* sp)
+void MBSpaceFree(SMBSpace* sp)
{
- MBSpace* next_sp;
+ SMBSpace* next_sp;
while (sp) {
next_sp = sp->next;
@@ -244,9 +258,9 @@ void MBSpaceFree(MBSpace* sp)
}
}
-static ThreeVal* get_mb_space(MBSpace* S, Int4 amount)
+static SThreeVal* get_mb_space(SMBSpace* S, Int4 amount)
{
- ThreeVal* s;
+ SThreeVal* s;
if (amount < 0)
return NULL;
@@ -296,7 +310,7 @@ static Int4 gdb3(Int4* a, Int4* b, Int4* c)
return g;
}
-static Int4 get_lastC(ThreeVal** flast_d, Int4* lower, Int4* upper,
+static Int4 get_lastC(SThreeVal** flast_d, Int4* lower, Int4* upper,
Int4* d, Int4 diag, Int4 Mis_cost, Int4* row1)
{
Int4 row;
@@ -318,7 +332,7 @@ static Int4 get_lastC(ThreeVal** flast_d, Int4* lower, Int4* upper,
}
}
-static Int4 get_last_ID(ThreeVal** flast_d, Int4* lower, Int4* upper,
+static Int4 get_last_ID(SThreeVal** flast_d, Int4* lower, Int4* upper,
Int4* d, Int4 diag, Int4 GO_cost,
Int4 GE_cost, Int4 IorD)
{
@@ -337,14 +351,14 @@ static Int4 get_last_ID(ThreeVal** flast_d, Int4* lower, Int4* upper,
return IorD;
}
-static Int4 get_lastI(ThreeVal** flast_d, Int4* lower, Int4* upper,
+static Int4 get_lastI(SThreeVal** flast_d, Int4* lower, Int4* upper,
Int4* d, Int4 diag, Int4 GO_cost, Int4 GE_cost)
{
return get_last_ID(flast_d, lower, upper, d, diag, GO_cost, GE_cost, sI);
}
-static int get_lastD(ThreeVal** flast_d, Int4* lower, Int4* upper,
+static int get_lastD(SThreeVal** flast_d, Int4* lower, Int4* upper,
Int4* d, Int4 diag, Int4 GO_cost, Int4 GE_cost)
{
return get_last_ID(flast_d, lower, upper, d, diag, GO_cost, GE_cost, sD);
@@ -389,9 +403,10 @@ Int4 BLAST_GreedyAlign(const Uint1* s1, Int4 len1,
Boolean reverse, Int4 xdrop_threshold,
Int4 match_cost, Int4 mismatch_cost,
Int4* e1, Int4* e2,
- GreedyAlignMem* gamp, MBGapEditScript *S,
+ SGreedyAlignMem* gamp, MBGapEditScript *S,
Uint1 rem)
{
+#define ICEIL(x,y) ((((x)-1)/(y))+1)
Int4 col, /* column number */
d, /* current distance */
k, /* current diagonal */
@@ -411,7 +426,7 @@ Int4 BLAST_GreedyAlign(const Uint1* s1, Int4 len1,
Int4 x, cur_max, b_diag = 0, best_diag = INT4_MAX/2;
Int4* max_row_free = gamp->max_row_free;
char nlower = 0, nupper = 0;
- MBSpace* space = gamp->space;
+ SMBSpace* space = gamp->space;
Int4 max_len = len2;
MAX_D = (Int4) (len1/ERROR_FRACTION + 1);
@@ -558,7 +573,7 @@ Int4 BLAST_GreedyAlign(const Uint1* s1, Int4 len1,
if (S==NULL)
flast_d[d] = flast_d[d - 2];
else {
- /* space array consists of ThreeVal structures which are
+ /* space array consists of SThreeVal structures which are
3 times larger than Int4, so divide requested amount by 3
*/
flast_d[d] = (Int4*) get_mb_space(space, (fupper-flower+7)/3);
@@ -601,7 +616,7 @@ Int4 BLAST_AffineGreedyAlign (const Uint1* s1, Int4 len1,
Int4 match_score, Int4 mismatch_score,
Int4 gap_open, Int4 gap_extend,
Int4* e1, Int4* e2,
- GreedyAlignMem* gamp, MBGapEditScript *S,
+ SGreedyAlignMem* gamp, MBGapEditScript *S,
Uint1 rem)
{
Int4 col, /* column number */
@@ -612,7 +627,7 @@ Int4 BLAST_AffineGreedyAlign (const Uint1* s1, Int4 len1,
MAX_D, /* maximum cost */
ORIGIN,
return_val = 0;
- ThreeVal** flast_d; /* rows containing the last d */
+ SThreeVal** flast_d; /* rows containing the last d */
Int4* max_row_free = gamp->max_row_free;
Int4* max_row; /* reached for cost d=0, ... len1. */
Int4 Mis_cost, GO_cost, GE_cost;
@@ -623,7 +638,7 @@ Int4 BLAST_AffineGreedyAlign (const Uint1* s1, Int4 len1,
Int4 x, cur_max, b_diag = 0, best_diag = INT4_MAX/2;
char nlower = 0, nupper = 0;
- MBSpace* space = gamp->space;
+ SMBSpace* space = gamp->space;
Int4 stop_condition;
Int4 max_d;
Int4* uplow_free;
diff --git a/algo/blast/core/greedy_align.h b/algo/blast/core/greedy_align.h
index aa690ebb..f6dd2a8b 100644
--- a/algo/blast/core/greedy_align.h
+++ b/algo/blast/core/greedy_align.h
@@ -1,39 +1,37 @@
-/* $Id: greedy_align.h,v 1.8 2003/08/11 14:57:16 dondosha Exp $
-* ===========================================================================
-*
-* PUBLIC DOMAIN NOTICE
-* National Center for Biotechnology Information
-*
-* This software/database is a "United States Government Work" under the
-* terms of the United States Copyright Act. It was written as part of
-* the author's offical duties as a United States Government employee and
-* thus cannot be copyrighted. This software/database is freely available
-* to the public for use. The National Library of Medicine and the U.S.
-* Government have not placed any restriction on its use or reproduction.
-*
-* Although all reasonable efforts have been taken to ensure the accuracy
-* and reliability of the software and data, the NLM and the U.S.
-* Government do not and cannot warrant the performance or results that
-* may be obtained by using this software or data. The NLM and the U.S.
-* Government disclaim all warranties, express or implied, including
-* warranties of performance, merchantability or fitness for any particular
-* purpose.
-*
-* Please cite the author in any work or product based on this material.
-*
-* ===========================================================================*/
+/* $Id: greedy_align.h,v 1.11 2004/05/19 14:52:01 camacho Exp $
+ * ===========================================================================
+ *
+ * PUBLIC DOMAIN NOTICE
+ * National Center for Biotechnology Information
+ *
+ * This software/database is a "United States Government Work" under the
+ * terms of the United States Copyright Act. It was written as part of
+ * the author's offical duties as a United States Government employee and
+ * thus cannot be copyrighted. This software/database is freely available
+ * to the public for use. The National Library of Medicine and the U.S.
+ * Government have not placed any restriction on its use or reproduction.
+ *
+ * Although all reasonable efforts have been taken to ensure the accuracy
+ * and reliability of the software and data, the NLM and the U.S.
+ * Government do not and cannot warrant the performance or results that
+ * may be obtained by using this software or data. The NLM and the U.S.
+ * Government disclaim all warranties, express or implied, including
+ * warranties of performance, merchantability or fitness for any particular
+ * purpose.
+ *
+ * Please cite the author in any work or product based on this material.
+ *
+ * ===========================================================================
+ *
+ * Author: Ilya Dondoshansky
+ *
+ */
+
+/** @file greedy_align.h
+ * Copy of mbalign.h from ncbitools library
+ * @todo FIXME need better file description
+ */
-/*****************************************************************************
-
-File name: greedy_align.h
-
-Author: Ilya Dondoshansky
-
-Contents: Copy of mbalign.h from ncbitools library
-
-******************************************************************************
- * $Revision: 1.8 $
- * */
#ifndef _GREEDY_H_
#define _GREEDY_H_
@@ -54,54 +52,38 @@ MBGapEditScript *MBGapEditScriptFree(MBGapEditScript *es);
MBGapEditScript *MBGapEditScriptNew(void);
MBGapEditScript *MBGapEditScriptAppend(MBGapEditScript *es, MBGapEditScript *et);
-enum {
- EDIT_OP_MASK = 0x3,
- EDIT_OP_ERR = 0x0,
- EDIT_OP_INS = 0x1,
- EDIT_OP_DEL = 0x2,
- EDIT_OP_REP = 0x3
-};
-
-enum { /* half of the (fixed) match score */
- ERROR_FRACTION=2, /* 1/this */
- MAX_SPACE=1000000,
- sC = 0, sI = 1, sD = 2, LARGE=100000000
-};
-
-#define ICEIL(x,y) ((((x)-1)/(y))+1)
-
/* ----- pool allocator ----- */
-typedef struct ThreeVal {
+
+/** @todo FIXME Need to determine what the members of this structure mean.
+ * Can these be combined with the BlastGapDP structure? @sa BlastGapDP
+ */
+typedef struct SThreeVal {
Int4 I, C, D;
-} ThreeVal;
+} SThreeVal;
-typedef struct MBSpace {
- ThreeVal* space_array;
+typedef struct SMBSpace {
+ SThreeVal* space_array;
Int4 used, size;
- struct MBSpace *next;
-} MBSpace;
-
-#define EDIT_VAL(op) (op >> 2)
-
-#define EDIT_OPC(op) (op & EDIT_OP_MASK)
+ struct SMBSpace *next;
+} SMBSpace;
-MBSpace* MBSpaceNew(void);
-void MBSpaceFree(MBSpace* sp);
+SMBSpace* MBSpaceNew(void);
+void MBSpaceFree(SMBSpace* sp);
-typedef struct GreedyAlignMem {
+typedef struct SGreedyAlignMem {
Int4** flast_d;
Int4* max_row_free;
- ThreeVal** flast_d_affine;
+ SThreeVal** flast_d_affine;
Int4* uplow_free;
- MBSpace* space;
-} GreedyAlignMem;
+ SMBSpace* space;
+} SGreedyAlignMem;
Int4
BLAST_GreedyAlign (const Uint1* s1, Int4 len1,
const Uint1* s2, Int4 len2,
Boolean reverse, Int4 xdrop_threshold,
Int4 match_cost, Int4 mismatch_cost,
- Int4* e1, Int4* e2, GreedyAlignMem* abmp,
+ Int4* e1, Int4* e2, SGreedyAlignMem* abmp,
MBGapEditScript *S, Uint1 rem);
Int4
BLAST_AffineGreedyAlign (const Uint1* s1, Int4 len1,
@@ -110,7 +92,7 @@ BLAST_AffineGreedyAlign (const Uint1* s1, Int4 len1,
Int4 match_cost, Int4 mismatch_cost,
Int4 gap_open, Int4 gap_extend,
Int4* e1, Int4* e2,
- GreedyAlignMem* abmp,
+ SGreedyAlignMem* abmp,
MBGapEditScript *S, Uint1 rem);
#ifdef __cplusplus
diff --git a/algo/blast/core/hspstream_collector.c b/algo/blast/core/hspstream_collector.c
new file mode 100644
index 00000000..bb7dc32b
--- /dev/null
+++ b/algo/blast/core/hspstream_collector.c
@@ -0,0 +1,209 @@
+/* $Id: hspstream_collector.c,v 1.2 2004/06/08 17:30:07 dondosha Exp $
+ * ===========================================================================
+ *
+ * PUBLIC DOMAIN NOTICE
+ * National Center for Biotechnology Information
+ *
+ * This software/database is a "United States Government Work" under the
+ * terms of the United States Copyright Act. It was written as part of
+ * the author's official duties as a United States Government employee and
+ * thus cannot be copyrighted. This software/database is freely available
+ * to the public for use. The National Library of Medicine and the U.S.
+ * Government have not placed any restriction on its use or reproduction.
+ *
+ * Although all reasonable efforts have been taken to ensure the accuracy
+ * and reliability of the software and data, the NLM and the U.S.
+ * Government do not and cannot warrant the performance or results that
+ * may be obtained by using this software or data. The NLM and the U.S.
+ * Government disclaim all warranties, express or implied, including
+ * warranties of performance, merchantability or fitness for any particular
+ * purpose.
+ *
+ * Please cite the author in any work or product based on this material.
+ *
+ * ===========================================================================
+ *
+ * Author: Ilya Dondoshansky
+ *
+ */
+
+/** @file hspstream_collector.c
+ * Default implementation of the BlastHSPStream interface to save hits from
+ * a BLAST search, and subsequently return them in sorted order.
+ */
+
+static char const rcsid[] =
+ "$Id: hspstream_collector.c,v 1.2 2004/06/08 17:30:07 dondosha Exp $";
+
+
+#include <algo/blast/core/blast_hits.h>
+#include <algo/blast/core/hspstream_collector.h>
+
+/** Default hit saving stream methods */
+
+static BlastHSPStream*
+BlastHSPListCollectorFree(BlastHSPStream* hsp_stream)
+{
+ BlastHSPListCollectorData* stream_data =
+ (BlastHSPListCollectorData*) GetData(hsp_stream);
+ Blast_HSPResultsFree(stream_data->results);
+ sfree(stream_data);
+ sfree(hsp_stream);
+ return NULL;
+}
+
+static void
+BlastHSPListCollectorClose(BlastHSPStream* hsp_stream)
+{
+ BlastHSPListCollectorData* stream_data =
+ (BlastHSPListCollectorData*) GetData(hsp_stream);
+
+ if (stream_data->results == NULL || stream_data->results_sorted)
+ return;
+
+ if (stream_data->sort_on_read) {
+ Blast_HSPResultsReverseSort(stream_data->results);
+ }
+ stream_data->results_sorted = TRUE;
+}
+
+static int
+BlastHSPListCollectorRead(BlastHSPStream* hsp_stream,
+ BlastHSPList** hsp_list_out)
+{
+ BlastHSPListCollectorData* stream_data =
+ (BlastHSPListCollectorData*) GetData(hsp_stream);
+ Int4 last_hsplist_index = -1;
+ BlastHitList* hit_list = NULL;
+ BlastHSPResults* results = stream_data->results;
+ Int4 index;
+
+ *hsp_list_out = NULL;
+ if (!results)
+ return kBlastHSPStream_Eof;
+
+ /* If this stream is not yet closed for writing, close it. In particular,
+ this includes sorting of results.
+ NB: to lift the prohibition on write after the first read, the
+ following 2 lines should be removed, and stream closure for writing
+ should be done outside of the read function. */
+ if (!stream_data->results_sorted)
+ BlastHSPListCollectorClose(hsp_stream);
+
+ /* Find index of the first query that has results. */
+ for (index = stream_data->first_query_index;
+ index < results->num_queries; ++index) {
+ if (results->hitlist_array[index] &&
+ results->hitlist_array[index]->hsplist_count > 0)
+ break;
+ }
+ if (index >= results->num_queries)
+ return kBlastHSPStream_Eof;
+
+ stream_data->first_query_index = index;
+
+ hit_list = results->hitlist_array[index];
+ last_hsplist_index = hit_list->hsplist_count - 1;
+
+ *hsp_list_out = hit_list->hsplist_array[last_hsplist_index];
+ /* Assign the query index here so the caller knows which query this HSP
+ list comes from */
+ (*hsp_list_out)->query_index = index;
+ /* Dequeue this HSP list by decrementing the HSPList count */
+ --hit_list->hsplist_count;
+ if (hit_list->hsplist_count == 0) {
+ /* Advance the first query index, without checking that the next query
+ has results - that will be done on the next call. */
+ ++stream_data->first_query_index;
+ }
+
+ return kBlastHSPStream_Success;
+}
+
+static int
+BlastHSPListCollectorWrite(BlastHSPStream* hsp_stream,
+ BlastHSPList** hsp_list)
+{
+ BlastHSPListCollectorData* stream_data =
+ (BlastHSPListCollectorData*) GetData(hsp_stream);
+
+ /** @todo Lock the mutex here */
+
+ /** Prohibit writing after reading has already started. This prohibition
+ * can be lifted later. There is no inherent problem in using read and
+ * write in any order, except that sorting would have to be done on
+ * every read after a write.
+ */
+ if (stream_data->results_sorted) {
+ return kBlastHSPStream_Error;
+ }
+
+ /* For RPS BLAST saving procedure is different, because HSPs from different
+ subjects are bundled in one HSP list */
+ if (stream_data->program == blast_type_rpsblast ||
+ stream_data->program == blast_type_rpstblastn) {
+ Blast_HSPResultsSaveRPSHSPList(stream_data->program,
+ stream_data->results, *hsp_list, stream_data->hit_options);
+ } else {
+ Blast_HSPResultsSaveHSPList(stream_data->program, stream_data->results,
+ *hsp_list, stream_data->hit_options);
+ }
+ /* Results structure is no longer sorted, even if it was before.
+ The following assignment is only necessary if the logic to prohibit
+ writing after the first read is removed. */
+ stream_data->results_sorted = FALSE;
+
+ /* Free the caller from this pointer's ownership. */
+ *hsp_list = NULL;
+
+ /** @todo Unlock the mutex here */
+
+ return kBlastHSPStream_Success;
+}
+
+static BlastHSPStream*
+BlastHSPListCollectorNew(BlastHSPStream* hsp_stream, void* args)
+{
+ BlastHSPStreamFunctionPointerTypes fnptr;
+
+ fnptr.dtor = &BlastHSPListCollectorFree;
+ SetMethod(hsp_stream, eDestructor, fnptr);
+ fnptr.method = &BlastHSPListCollectorRead;
+ SetMethod(hsp_stream, eRead, fnptr);
+ fnptr.method = &BlastHSPListCollectorWrite;
+ SetMethod(hsp_stream, eWrite, fnptr);
+ fnptr.closeFn = &BlastHSPListCollectorClose;
+ SetMethod(hsp_stream, eClose, fnptr);
+
+ SetData(hsp_stream, args);
+ return hsp_stream;
+}
+
+BlastHSPStream*
+Blast_HSPListCollectorInit(Uint1 program, BlastHitSavingOptions* hit_options,
+ Int4 num_queries, Boolean sort_on_read)
+{
+ BlastHSPListCollectorData* stream_data =
+ (BlastHSPListCollectorData*) malloc(sizeof(BlastHSPListCollectorData));
+ BlastHSPStreamNewInfo info;
+
+ stream_data->program = program;
+ stream_data->hit_options = hit_options;
+ if (program == blast_type_rpsblast || program == blast_type_rpstblastn) {
+ /* For RPS BLAST, there is only one query, and num_queries variable
+ * is in fact the number of database sequences. */
+ Blast_HSPResultsInit(1, &stream_data->results);
+ stream_data->results->hitlist_array[0] =
+ Blast_HitListNew(num_queries);
+ } else {
+ Blast_HSPResultsInit(num_queries, &stream_data->results);
+ }
+ stream_data->results_sorted = FALSE;
+ stream_data->sort_on_read = sort_on_read;
+ stream_data->first_query_index = 0;
+
+ info.constructor = &BlastHSPListCollectorNew;
+ info.ctor_argument = (void*)stream_data;
+
+ return BlastHSPStreamNew(&info);
+}
diff --git a/algo/blast/core/hspstream_collector.h b/algo/blast/core/hspstream_collector.h
new file mode 100644
index 00000000..46b026b2
--- /dev/null
+++ b/algo/blast/core/hspstream_collector.h
@@ -0,0 +1,76 @@
+/* $Id: hspstream_collector.h,v 1.2 2004/06/16 14:53:03 dondosha Exp $
+ * ===========================================================================
+ *
+ * PUBLIC DOMAIN NOTICE
+ * National Center for Biotechnology Information
+ *
+ * This software/database is a "United States Government Work" under the
+ * terms of the United States Copyright Act. It was written as part of
+ * the author's official duties as a United States Government employee and
+ * thus cannot be copyrighted. This software/database is freely available
+ * to the public for use. The National Library of Medicine and the U.S.
+ * Government have not placed any restriction on its use or reproduction.
+ *
+ * Although all reasonable efforts have been taken to ensure the accuracy
+ * and reliability of the software and data, the NLM and the U.S.
+ * Government do not and cannot warrant the performance or results that
+ * may be obtained by using this software or data. The NLM and the U.S.
+ * Government disclaim all warranties, express or implied, including
+ * warranties of performance, merchantability or fitness for any particular
+ * purpose.
+ *
+ * Please cite the author in any work or product based on this material.
+ *
+ * ===========================================================================
+ *
+ * Author: Ilya Dondoshansky
+ *
+ */
+
+/** @file hspstream_collector.h
+ * Default implementation of the BlastHSPStream interface to save hits from
+ * a BLAST search, and subsequently return them in sorted order.
+ */
+
+#ifndef HSPSTREAM_COLLECTOR_H
+#define HSPSTREAM_COLLECTOR_H
+
+#include <algo/blast/core/blast_options.h>
+#include <algo/blast/core/blast_hits.h>
+#include <algo/blast/core/blast_hspstream.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/** Default implementation of BlastHSPStream */
+typedef struct BlastHSPListCollectorData {
+ Uint1 program; /**< BLAST program type */
+ BlastHitSavingOptions* hit_options; /**< Hit saving options */
+ BlastHSPResults* results;/**< Structure for saving HSP lists */
+ Boolean results_sorted; /**< Have the results already been sorted?
+ Set to true after the first read call. */
+ Boolean sort_on_read; /**< Should the results be sorted on the first
+ read call? */
+ Int4 first_query_index; /**< Index of the first query to try getting
+ results from. */
+ /* TNlmMutex results_mutex; */ /**< Mutex for writing and reading results.
+ @todo FIXME: not implemented yet. */
+} BlastHSPListCollectorData;
+
+/** Initialize the internal data structure.
+ * @param program Type of BlAST program [in]
+ * @param hit_options Hit saving options containing limits on numbers of
+ * results to save[in]
+ * @param num_queries Number of query sequences in this BLAST search [in]
+ * @param sort_on_read Should results be sorted on the first read call? [in]
+ */
+BlastHSPStream*
+Blast_HSPListCollectorInit(Uint1 program, BlastHitSavingOptions* hit_options,
+ Int4 num_queries, Boolean sort_on_read);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* HSPSTREAM_COLLECTOR_H */
diff --git a/algo/blast/core/link_hsps.c b/algo/blast/core/link_hsps.c
index c7d5f19e..fb5a1c05 100644
--- a/algo/blast/core/link_hsps.c
+++ b/algo/blast/core/link_hsps.c
@@ -1,44 +1,42 @@
-/* $Id: link_hsps.c,v 1.31 2004/05/05 15:27:45 dondosha Exp $
-* ===========================================================================
-*
-* PUBLIC DOMAIN NOTICE
-* National Center for Biotechnology Information
-*
-* This software/database is a "United States Government Work" under the
-* terms of the United States Copyright Act. It was written as part of
-* the author's offical duties as a United States Government employee and
-* thus cannot be copyrighted. This software/database is freely available
-* to the public for use. The National Library of Medicine and the U.S.
-* Government have not placed any restriction on its use or reproduction.
-*
-* Although all reasonable efforts have been taken to ensure the accuracy
-* and reliability of the software and data, the NLM and the U.S.
-* Government do not and cannot warrant the performance or results that
-* may be obtained by using this software or data. The NLM and the U.S.
-* Government disclaim all warranties, express or implied, including
-* warranties of performance, merchantability or fitness for any particular
-* purpose.
-*
-* Please cite the author in any work or product based on this material.
-*
-* ===========================================================================*/
-
-/*****************************************************************************
-
-File name: link_hsps.c
-
-Author: Ilya Dondoshansky
-
-Contents: Functions to link with use of sum statistics
-
-Detailed Contents:
-
-******************************************************************************/
+/* $Id: link_hsps.c,v 1.36 2004/06/08 17:30:07 dondosha Exp $
+ * ===========================================================================
+ *
+ * PUBLIC DOMAIN NOTICE
+ * National Center for Biotechnology Information
+ *
+ * This software/database is a "United States Government Work" under the
+ * terms of the United States Copyright Act. It was written as part of
+ * the author's offical duties as a United States Government employee and
+ * thus cannot be copyrighted. This software/database is freely available
+ * to the public for use. The National Library of Medicine and the U.S.
+ * Government have not placed any restriction on its use or reproduction.
+ *
+ * Although all reasonable efforts have been taken to ensure the accuracy
+ * and reliability of the software and data, the NLM and the U.S.
+ * Government do not and cannot warrant the performance or results that
+ * may be obtained by using this software or data. The NLM and the U.S.
+ * Government disclaim all warranties, express or implied, including
+ * warranties of performance, merchantability or fitness for any particular
+ * purpose.
+ *
+ * Please cite the author in any work or product based on this material.
+ *
+ * ===========================================================================
+ *
+ * Author: Ilya Dondoshansky
+ *
+ */
+
+/** @file link_hsps.c
+ * Functions to link with use of sum statistics
+ */
+
+static char const rcsid[] =
+ "$Id: link_hsps.c,v 1.36 2004/06/08 17:30:07 dondosha Exp $";
#include <algo/blast/core/link_hsps.h>
#include <algo/blast/core/blast_util.h>
-static char const rcsid[] = "$Id: link_hsps.c,v 1.31 2004/05/05 15:27:45 dondosha Exp $";
/** Methods used to "order" the HSP's. */
#define BLAST_NUMBER_OF_ORDERING_METHODS 2
@@ -551,7 +549,7 @@ link_hsps(Uint1 program_number, BlastHSPList* hsp_list,
program_number == blast_type_tblastx);
if (program_number == blast_type_tblastn ||
program_number == blast_type_tblastx)
- num_subject_frames = 2;
+ num_subject_frames = NUM_STRANDS;
else
num_subject_frames = 1;
@@ -576,7 +574,7 @@ link_hsps(Uint1 program_number, BlastHSPList* hsp_list,
ignore_small_gaps = hit_params->ignore_small_gaps;
if (translated_query)
- num_query_frames = 2*query_info->num_queries;
+ num_query_frames = NUM_STRANDS*query_info->num_queries;
else
num_query_frames = query_info->num_queries;
@@ -636,12 +634,16 @@ link_hsps(Uint1 program_number, BlastHSPList* hsp_list,
length_adjustment = query_info->length_adjustments[query_context];
query_length = BLAST_GetQueryLength(query_info, query_context);
query_length = MAX(query_length - length_adjustment, 1);
+ subject_length = subject->length; /* in nucleotides even for tblast[nx] */
/* If subject is translated, length adjustment is given in nucleotide
scale. */
if (program_number == blast_type_tblastn ||
program_number == blast_type_tblastx)
+ {
length_adjustment /= CODON_LENGTH;
- subject_length = MAX(subject->length - length_adjustment, 1);
+ subject_length /= CODON_LENGTH;
+ }
+ subject_length = MAX(subject_length - length_adjustment, 1);
lh_helper[0].ptr = hp_start;
lh_helper[0].q_off_trim = 0;
@@ -996,7 +998,7 @@ link_hsps(Uint1 program_number, BlastHSPList* hsp_list,
best[1]->hsp_link.xsum[1],
query_length, subject_length,
BLAST_GapDecayDivisor(gap_decay_rate,
- best[1]->hsp_link.num[0]));
+ best[1]->hsp_link.num[1]));
if( best[1]->hsp_link.num[1] > 1 ) {
if( 1 - gap_prob == 0 || (prob[1] /= 1 - gap_prob) > INT4_MAX ) {
@@ -1399,8 +1401,8 @@ BLAST_LinkHsps(Uint1 program_number, BlastHSPList* hsp_list,
} else {
/* Calculate individual HSP e-values first - they'll be needed to
compare with sum e-values. */
- Blast_HSPListGetEvalues(program_number, query_info,
- hsp_list, gapped_calculation, sbp);
+ Blast_HSPListGetEvalues(query_info, hsp_list,
+ gapped_calculation, sbp);
new_link_hsps(program_number, hsp_list, query_info, subject, sbp,
hit_params);
diff --git a/algo/blast/core/link_hsps.h b/algo/blast/core/link_hsps.h
index f3f1f999..dc689f13 100644
--- a/algo/blast/core/link_hsps.h
+++ b/algo/blast/core/link_hsps.h
@@ -1,48 +1,45 @@
-/* $Id: link_hsps.h,v 1.9 2004/03/24 19:07:57 dondosha Exp $
-* ===========================================================================
-*
-* PUBLIC DOMAIN NOTICE
-* National Center for Biotechnology Information
-*
-* This software/database is a "United States Government Work" under the
-* terms of the United States Copyright Act. It was written as part of
-* the author's offical duties as a United States Government employee and
-* thus cannot be copyrighted. This software/database is freely available
-* to the public for use. The National Library of Medicine and the U.S.
-* Government have not placed any restriction on its use or reproduction.
-*
-* Although all reasonable efforts have been taken to ensure the accuracy
-* and reliability of the software and data, the NLM and the U.S.
-* Government do not and cannot warrant the performance or results that
-* may be obtained by using this software or data. The NLM and the U.S.
-* Government disclaim all warranties, express or implied, including
-* warranties of performance, merchantability or fitness for any particular
-* purpose.
-*
-* Please cite the author in any work or product based on this material.
-*
-* ===========================================================================*/
-
-/*****************************************************************************
-
-File name: link_hsps.h
-
-Author: Ilya Dondoshansky
+/* $Id: link_hsps.h,v 1.11 2004/06/16 14:53:03 dondosha Exp $
+ * ===========================================================================
+ *
+ * PUBLIC DOMAIN NOTICE
+ * National Center for Biotechnology Information
+ *
+ * This software/database is a "United States Government Work" under the
+ * terms of the United States Copyright Act. It was written as part of
+ * the author's offical duties as a United States Government employee and
+ * thus cannot be copyrighted. This software/database is freely available
+ * to the public for use. The National Library of Medicine and the U.S.
+ * Government have not placed any restriction on its use or reproduction.
+ *
+ * Although all reasonable efforts have been taken to ensure the accuracy
+ * and reliability of the software and data, the NLM and the U.S.
+ * Government do not and cannot warrant the performance or results that
+ * may be obtained by using this software or data. The NLM and the U.S.
+ * Government disclaim all warranties, express or implied, including
+ * warranties of performance, merchantability or fitness for any particular
+ * purpose.
+ *
+ * Please cite the author in any work or product based on this material.
+ *
+ * ===========================================================================
+ *
+ * Author: Ilya Dondoshansky
+ *
+ */
-Contents: Functions to link HSPs using sum statistics
+/** @file link_hsps.h
+ * Functions to link HSPs using sum statistics
+ */
-******************************************************************************
- * $Revision: 1.9 $
- * */
#ifndef __LINK_HSPS__
#define __LINK_HSPS__
+#include <algo/blast/core/blast_hits.h>
+
#ifdef __cplusplus
extern "C" {
#endif
-#include <algo/blast/core/blast_hits.h>
-
/** Link HSPs using sum statistics.
* @param program_number BLAST program [in]
* @param hsp_list List of HSPs [in]
diff --git a/algo/blast/core/lookup_util.c b/algo/blast/core/lookup_util.c
index 9facc84c..6226e01a 100644
--- a/algo/blast/core/lookup_util.c
+++ b/algo/blast/core/lookup_util.c
@@ -1,32 +1,35 @@
-static char const rcsid[] = "$Id: lookup_util.c,v 1.7 2003/11/20 15:29:12 coulouri Exp $";
-
-/*
-
-* ===========================================================================
-*
-* PUBLIC DOMAIN NOTICE
-* National Center for Biotechnology Information
-*
-* This software/database is a "United States Government Work" under the
-* terms of the United States Copyright Act. It was written as part of
-* the author's offical duties as a United States Government employee and
-* thus cannot be copyrighted. This software/database is freely available
-* to the public for use. The National Library of Medicine and the U.S.
-* Government have not placed any restriction on its use or reproduction.
-*
-* Although all reasonable efforts have been taken to ensure the accuracy
-* and reliability of the software and data, the NLM and the U.S.
-* Government do not and cannot warrant the performance or results that
-* may be obtained by using this software or data. The NLM and the U.S.
-* Government disclaim all warranties, express or implied, including
-* warranties of performance, merchantability or fitness for any particular
-* purpose.
-*
-* Please cite the author in any work or product based on this material.
-*
-* ===========================================================================
-
-*/
+/* $Id: lookup_util.c,v 1.8 2004/05/19 14:52:03 camacho Exp $
+ * ===========================================================================
+ *
+ * PUBLIC DOMAIN NOTICE
+ * National Center for Biotechnology Information
+ *
+ * This software/database is a "United States Government Work" under the
+ * terms of the United States Copyright Act. It was written as part of
+ * the author's offical duties as a United States Government employee and
+ * thus cannot be copyrighted. This software/database is freely available
+ * to the public for use. The National Library of Medicine and the U.S.
+ * Government have not placed any restriction on its use or reproduction.
+ *
+ * Although all reasonable efforts have been taken to ensure the accuracy
+ * and reliability of the software and data, the NLM and the U.S.
+ * Government do not and cannot warrant the performance or results that
+ * may be obtained by using this software or data. The NLM and the U.S.
+ * Government disclaim all warranties, express or implied, including
+ * warranties of performance, merchantability or fitness for any particular
+ * purpose.
+ *
+ * Please cite the author in any work or product based on this material.
+ *
+ * ===========================================================================
+ */
+
+/** @file lookup_util.c
+ * @todo FIXME needs file description
+ */
+
+static char const rcsid[] =
+ "$Id: lookup_util.c,v 1.8 2004/05/19 14:52:03 camacho Exp $";
#include <algo/blast/core/lookup_util.h>
diff --git a/algo/blast/core/lookup_util.h b/algo/blast/core/lookup_util.h
index c443f681..d3438c24 100644
--- a/algo/blast/core/lookup_util.h
+++ b/algo/blast/core/lookup_util.h
@@ -1,30 +1,32 @@
-/* $Id: lookup_util.h,v 1.7 2003/08/11 14:57:16 dondosha Exp $
-
-* ===========================================================================
-*
-* PUBLIC DOMAIN NOTICE
-* National Center for Biotechnology Information
-*
-* This software/database is a "United States Government Work" under the
-* terms of the United States Copyright Act. It was written as part of
-* the author's offical duties as a United States Government employee and
-* thus cannot be copyrighted. This software/database is freely available
-* to the public for use. The National Library of Medicine and the U.S.
-* Government have not placed any restriction on its use or reproduction.
-*
-* Although all reasonable efforts have been taken to ensure the accuracy
-* and reliability of the software and data, the NLM and the U.S.
-* Government do not and cannot warrant the performance or results that
-* may be obtained by using this software or data. The NLM and the U.S.
-* Government disclaim all warranties, express or implied, including
-* warranties of performance, merchantability or fitness for any particular
-* purpose.
-*
-* Please cite the author in any work or product based on this material.
-*
-* ===========================================================================
+/* $Id: lookup_util.h,v 1.8 2004/05/19 14:52:01 camacho Exp $
+ * ===========================================================================
+ *
+ * PUBLIC DOMAIN NOTICE
+ * National Center for Biotechnology Information
+ *
+ * This software/database is a "United States Government Work" under the
+ * terms of the United States Copyright Act. It was written as part of
+ * the author's offical duties as a United States Government employee and
+ * thus cannot be copyrighted. This software/database is freely available
+ * to the public for use. The National Library of Medicine and the U.S.
+ * Government have not placed any restriction on its use or reproduction.
+ *
+ * Although all reasonable efforts have been taken to ensure the accuracy
+ * and reliability of the software and data, the NLM and the U.S.
+ * Government do not and cannot warrant the performance or results that
+ * may be obtained by using this software or data. The NLM and the U.S.
+ * Government disclaim all warranties, express or implied, including
+ * warranties of performance, merchantability or fitness for any particular
+ * purpose.
+ *
+ * Please cite the author in any work or product based on this material.
+ *
+ * ===========================================================================
+ */
-*/
+/** @file lookup_util.h
+ * @todo FIXME: need description
+ */
#include <algo/blast/core/blast_def.h>
#ifndef UTIL__H
diff --git a/algo/blast/core/lookup_wrap.c b/algo/blast/core/lookup_wrap.c
index 53332c94..5e5160a1 100644
--- a/algo/blast/core/lookup_wrap.c
+++ b/algo/blast/core/lookup_wrap.c
@@ -1,39 +1,38 @@
-/* $Id: lookup_wrap.c,v 1.5 2004/03/19 15:42:54 papadopo Exp $
-* ===========================================================================
-*
-* PUBLIC DOMAIN NOTICE
-* National Center for Biotechnology Information
-*
-* This software/database is a "United States Government Work" under the
-* terms of the United States Copyright Act. It was written as part of
-* the author's offical duties as a United States Government employee and
-* thus cannot be copyrighted. This software/database is freely available
-* to the public for use. The National Library of Medicine and the U.S.
-* Government have not placed any restriction on its use or reproduction.
-*
-* Although all reasonable efforts have been taken to ensure the accuracy
-* and reliability of the software and data, the NLM and the U.S.
-* Government do not and cannot warrant the performance or results that
-* may be obtained by using this software or data. The NLM and the U.S.
-* Government disclaim all warranties, express or implied, including
-* warranties of performance, merchantability or fitness for any particular
-* purpose.
-*
-* Please cite the author in any work or product based on this material.
-*
-* ===========================================================================*/
+/* $Id: lookup_wrap.c,v 1.6 2004/05/19 14:52:03 camacho Exp $
+ * ===========================================================================
+ *
+ * PUBLIC DOMAIN NOTICE
+ * National Center for Biotechnology Information
+ *
+ * This software/database is a "United States Government Work" under the
+ * terms of the United States Copyright Act. It was written as part of
+ * the author's offical duties as a United States Government employee and
+ * thus cannot be copyrighted. This software/database is freely available
+ * to the public for use. The National Library of Medicine and the U.S.
+ * Government have not placed any restriction on its use or reproduction.
+ *
+ * Although all reasonable efforts have been taken to ensure the accuracy
+ * and reliability of the software and data, the NLM and the U.S.
+ * Government do not and cannot warrant the performance or results that
+ * may be obtained by using this software or data. The NLM and the U.S.
+ * Government disclaim all warranties, express or implied, including
+ * warranties of performance, merchantability or fitness for any particular
+ * purpose.
+ *
+ * Please cite the author in any work or product based on this material.
+ *
+ * ===========================================================================
+ *
+ * Author: Ilya Dondoshansky
+ *
+ */
-/*****************************************************************************
+/** @file lookup_wrap.c
+ * @todo FIXME file had copy-and-paste description!
+ */
-File name: lookup_wrap.c
-
-Author: Ilya Dondoshansky
-
-Contents: High level BLAST functions
-
-******************************************************************************/
-
-static char const rcsid[] = "$Id: lookup_wrap.c,v 1.5 2004/03/19 15:42:54 papadopo Exp $";
+static char const rcsid[] =
+ "$Id: lookup_wrap.c,v 1.6 2004/05/19 14:52:03 camacho Exp $";
#include <algo/blast/core/lookup_wrap.h>
#include <algo/blast/core/blast_lookup.h>
diff --git a/algo/blast/core/lookup_wrap.h b/algo/blast/core/lookup_wrap.h
index 2010b142..150496de 100644
--- a/algo/blast/core/lookup_wrap.h
+++ b/algo/blast/core/lookup_wrap.h
@@ -1,51 +1,48 @@
-/* $Id: lookup_wrap.h,v 1.3 2004/03/11 18:31:06 papadopo Exp $
-* ===========================================================================
-*
-* PUBLIC DOMAIN NOTICE
-* National Center for Biotechnology Information
-*
-* This software/database is a "United States Government Work" under the
-* terms of the United States Copyright Act. It was written as part of
-* the author's offical duties as a United States Government employee and
-* thus cannot be copyrighted. This software/database is freely available
-* to the public for use. The National Library of Medicine and the U.S.
-* Government have not placed any restriction on its use or reproduction.
-*
-* Although all reasonable efforts have been taken to ensure the accuracy
-* and reliability of the software and data, the NLM and the U.S.
-* Government do not and cannot warrant the performance or results that
-* may be obtained by using this software or data. The NLM and the U.S.
-* Government disclaim all warranties, express or implied, including
-* warranties of performance, merchantability or fitness for any particular
-* purpose.
-*
-* Please cite the author in any work or product based on this material.
-*
-* ===========================================================================*/
-
-/*****************************************************************************
-
-File name: lookup_wrap.h
-
-Author: Ilya Dondoshansky
+/* $Id: lookup_wrap.h,v 1.5 2004/06/16 14:53:03 dondosha Exp $
+ * ===========================================================================
+ *
+ * PUBLIC DOMAIN NOTICE
+ * National Center for Biotechnology Information
+ *
+ * This software/database is a "United States Government Work" under the
+ * terms of the United States Copyright Act. It was written as part of
+ * the author's offical duties as a United States Government employee and
+ * thus cannot be copyrighted. This software/database is freely available
+ * to the public for use. The National Library of Medicine and the U.S.
+ * Government have not placed any restriction on its use or reproduction.
+ *
+ * Although all reasonable efforts have been taken to ensure the accuracy
+ * and reliability of the software and data, the NLM and the U.S.
+ * Government do not and cannot warrant the performance or results that
+ * may be obtained by using this software or data. The NLM and the U.S.
+ * Government disclaim all warranties, express or implied, including
+ * warranties of performance, merchantability or fitness for any particular
+ * purpose.
+ *
+ * Please cite the author in any work or product based on this material.
+ *
+ * ===========================================================================
+ *
+ * Author: Ilya Dondoshansky
+ *
+ */
-Contents: Wrapper for all lookup tables used in BLAST
+/** @file lookup_wrap.h
+ * Wrapper for all lookup tables used in BLAST
+ */
-******************************************************************************
- * $Revision: 1.3 $
- * */
#ifndef __LOOKUP_WRAP__
#define __LOOKUP_WRAP__
-#ifdef __cplusplus
-extern "C" {
-#endif
-
#include <algo/blast/core/blast_def.h>
#include <algo/blast/core/blast_options.h>
#include <algo/blast/core/blast_rps.h>
#include <algo/blast/core/blast_stat.h>
+#ifdef __cplusplus
+extern "C" {
+#endif
+
/** Wrapper structure for different types of BLAST lookup tables */
typedef struct LookupTableWrap {
Uint1 lut_type; /**< What kind of a lookup table it is? */
diff --git a/algo/blast/core/matrix_freq_ratios.c b/algo/blast/core/matrix_freq_ratios.c
new file mode 100644
index 00000000..19c3ac7e
--- /dev/null
+++ b/algo/blast/core/matrix_freq_ratios.c
@@ -0,0 +1,456 @@
+static char const rcsid[] =
+ "$Id: matrix_freq_ratios.c,v 1.3 2004/06/09 14:21:03 camacho Exp $";
+/* ===========================================================================
+ *
+ * PUBLIC DOMAIN NOTICE
+ * National Center for Biotechnology Information
+ *
+ * This software/database is a "United States Government Work" under the
+ * terms of the United States Copyright Act. It was written as part of
+ * the author's official duties as a United States Government employee and
+ * thus cannot be copyrighted. This software/database is freely available
+ * to the public for use. The National Library of Medicine and the U.S.
+ * Government have not placed any restriction on its use or reproduction.
+ *
+ * Although all reasonable efforts have been taken to ensure the accuracy
+ * and reliability of the software and data, the NLM and the U.S.
+ * Government do not and cannot warrant the performance or results that
+ * may be obtained by using this software or data. The NLM and the U.S.
+ * Government disclaim all warranties, express or implied, including
+ * warranties of performance, merchantability or fitness for any particular
+ * purpose.
+ *
+ * Please cite the author in any work or product based on this material.
+ *
+ * ===========================================================================
+ *
+ * Author: Christiam Camacho
+ *
+ */
+
+/** @file matrix_freq_ratios.c
+ * Definitions for various scoring matrices' frequency ratios.
+ */
+
+#include <algo/blast/core/blast_def.h>
+#include "matrix_freq_ratios.h"
+#include "blast_psi_priv.h"
+
+/*underlying frequency ratios for BLOSUM62 as determined by Stephen Altschul;
+ Stephen and Jorja Henikoff used different number for B,Z,X*/
+static const double BLOSUM62_FREQRATIOS[BLASTAA_SIZE][BLASTAA_SIZE] = {
+ {0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000,
+ 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000,
+ 0.000, 0.000, 0.000, 0.250},
+ {0.000, 3.903, 0.565, 0.868, 0.545, 0.741, 0.465, 1.057, 0.569, 0.632, 0.775,
+ 0.602, 0.723, 0.588, 0.754, 0.757, 0.613, 1.472, 0.984, 0.936, 0.416, 0.750,
+ 0.543, 0.747, 0.000, 0.250},
+ {0.000, 0.565, 4.438, 0.345, 4.743, 1.335, 0.324, 0.739, 0.925, 0.334, 0.855,
+ 0.297, 0.405, 4.071, 0.554, 0.944, 0.703, 1.058, 0.826, 0.351, 0.253, 0.750,
+ 0.409, 1.184, 0.000, 0.250},
+ {0.000, 0.868, 0.345, 19.577, 0.301, 0.286, 0.439, 0.420, 0.355, 0.653, 0.349,
+ 0.642, 0.611, 0.398, 0.380, 0.366, 0.309, 0.738, 0.741, 0.756, 0.450, 0.750,
+ 0.434, 0.317, 0.000, 0.250},
+ {0.000, 0.545, 4.743, 0.301, 7.398, 1.688, 0.299, 0.634, 0.679, 0.339, 0.784,
+ 0.287, 0.346, 1.554, 0.599, 0.897, 0.573, 0.913, 0.695, 0.337, 0.232, 0.750,
+ 0.346, 1.382, 0.000, 0.250},
+ {0.000, 0.741, 1.335, 0.286, 1.688, 5.470, 0.331, 0.481, 0.960, 0.331, 1.308,
+ 0.373, 0.500, 0.911, 0.679, 1.902, 0.961, 0.950, 0.741, 0.429, 0.374, 0.750,
+ 0.496, 4.090, 0.000, 0.250},
+ {0.000, 0.465, 0.324, 0.439, 0.299, 0.331, 8.129, 0.341, 0.652, 0.946, 0.344,
+ 1.155, 1.004, 0.354, 0.287, 0.334, 0.381, 0.440, 0.482, 0.745, 1.374, 0.750,
+ 2.769, 0.332, 0.000, 0.250},
+ {0.000, 1.057, 0.739, 0.420, 0.634, 0.481, 0.341, 6.876, 0.493, 0.275, 0.589,
+ 0.284, 0.396, 0.864, 0.477, 0.539, 0.450, 0.904, 0.579, 0.337, 0.422, 0.750,
+ 0.349, 0.503, 0.000, 0.250},
+ {0.000, 0.569, 0.925, 0.355, 0.679, 0.960, 0.652, 0.493, 13.506, 0.326, 0.779,
+ 0.381, 0.584, 1.222, 0.473, 1.168, 0.917, 0.737, 0.557, 0.339, 0.444, 0.750,
+ 1.798, 1.040, 0.000, 0.250},
+ {0.000, 0.632, 0.334, 0.653, 0.339, 0.331, 0.946, 0.275, 0.326, 3.998, 0.396,
+ 1.694, 1.478, 0.328, 0.385, 0.383, 0.355, 0.443, 0.780, 2.417, 0.409, 0.750,
+ 0.630, 0.351, 0.000, 0.250},
+ {0.000, 0.775, 0.855, 0.349, 0.784, 1.308, 0.344, 0.589, 0.779, 0.396, 4.764,
+ 0.428, 0.625, 0.940, 0.704, 1.554, 2.077, 0.932, 0.793, 0.457, 0.359, 0.750,
+ 0.532, 1.403, 0.000, 0.250},
+ {0.000, 0.602, 0.297, 0.642, 0.287, 0.373, 1.155, 0.284, 0.381, 1.694, 0.428,
+ 3.797, 1.994, 0.310, 0.371, 0.477, 0.474, 0.429, 0.660, 1.314, 0.568, 0.750,
+ 0.692, 0.413, 0.000, 0.250},
+ {0.000, 0.723, 0.405, 0.611, 0.346, 0.500, 1.004, 0.396, 0.584, 1.478, 0.625,
+ 1.994, 6.481, 0.474, 0.424, 0.864, 0.623, 0.599, 0.794, 1.269, 0.610, 0.750,
+ 0.708, 0.641, 0.000, 0.250},
+ {0.000, 0.588, 4.071, 0.398, 1.554, 0.911, 0.354, 0.864, 1.222, 0.328, 0.940,
+ 0.310, 0.474, 7.094, 0.500, 1.001, 0.859, 1.232, 0.984, 0.369, 0.278, 0.750,
+ 0.486, 0.946, 0.000, 0.250},
+ {0.000, 0.754, 0.554, 0.380, 0.599, 0.679, 0.287, 0.477, 0.473, 0.385, 0.704,
+ 0.371, 0.424, 0.500, 12.838, 0.641, 0.481, 0.755, 0.689, 0.443, 0.282, 0.750,
+ 0.363, 0.664, 0.000, 0.250},
+ {0.000, 0.757, 0.944, 0.366, 0.897, 1.902, 0.334, 0.539, 1.168, 0.383, 1.554,
+ 0.477, 0.864, 1.001, 0.641, 6.244, 1.406, 0.966, 0.791, 0.467, 0.509, 0.750,
+ 0.611, 3.582, 0.000, 0.250},
+ {0.000, 0.613, 0.703, 0.309, 0.573, 0.961, 0.381, 0.450, 0.917, 0.355, 2.077,
+ 0.474, 0.623, 0.859, 0.481, 1.406, 6.666, 0.767, 0.678, 0.420, 0.395, 0.750,
+ 0.556, 1.133, 0.000, 0.250},
+ {0.000, 1.472, 1.058, 0.738, 0.913, 0.950, 0.440, 0.904, 0.737, 0.443, 0.932,
+ 0.429, 0.599, 1.232, 0.755, 0.966, 0.767, 3.843, 1.614, 0.565, 0.385, 0.750,
+ 0.557, 0.956, 0.000, 0.250},
+ {0.000, 0.984, 0.826, 0.741, 0.695, 0.741, 0.482, 0.579, 0.557, 0.780, 0.793,
+ 0.660, 0.794, 0.984, 0.689, 0.791, 0.678, 1.614, 4.832, 0.981, 0.431, 0.750,
+ 0.573, 0.761, 0.000, 0.250},
+ {0.000, 0.936, 0.351, 0.756, 0.337, 0.429, 0.745, 0.337, 0.339, 2.417, 0.457,
+ 1.314, 1.269, 0.369, 0.443, 0.467, 0.420, 0.565, 0.981, 3.692, 0.374, 0.750,
+ 0.658, 0.444, 0.000, 0.250},
+ {0.000, 0.416, 0.253, 0.450, 0.232, 0.374, 1.374, 0.422, 0.444, 0.409, 0.359,
+ 0.568, 0.610, 0.278, 0.282, 0.509, 0.395, 0.385, 0.431, 0.374, 38.108, 0.750,
+ 2.110, 0.426, 0.000, 0.250},
+ {0.000, 0.750, 0.750, 0.750, 0.750, 0.750, 0.750, 0.750, 0.750, 0.750, 0.750,
+ 0.750, 0.750, 0.750, 0.750, 0.750, 0.750, 0.750, 0.750, 0.750, 0.750, 0.750,
+ 0.750, 0.750, 0.000, 0.250},
+ {0.000, 0.543, 0.409, 0.434, 0.346, 0.496, 2.769, 0.349, 1.798, 0.630, 0.532,
+ 0.692, 0.708, 0.486, 0.363, 0.611, 0.556, 0.557, 0.573, 0.658, 2.110, 0.750,
+ 9.832, 0.541, 0.000, 0.250},
+ {0.000, 0.747, 1.184, 0.317, 1.382, 4.090, 0.332, 0.503, 1.040, 0.351, 1.403,
+ 0.413, 0.641, 0.946, 0.664, 3.582, 1.133, 0.956, 0.761, 0.444, 0.426, 0.750,
+ 0.541, 3.893, 0.000, 0.250},
+ {0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000,
+ 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000,
+ 0.000, 0.000, 0.000, 0.250},
+ {0.250, 0.250, 0.250, 0.250, 0.250, 0.250, 0.250, 0.250, 0.250, 0.250, 0.250,
+ 0.250, 0.250, 0.250, 0.250, 0.250, 0.250, 0.250, 0.250, 0.250, 0.250, 0.250,
+ 0.250, 0.250, 0.250, 1.333},
+};
+
+static const double PAM30_FREQRATIOS[BLASTAA_SIZE][BLASTAA_SIZE] = {
+{0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.003},
+{0.000, 7.789, 0.302, 0.108, 0.317, 0.453, 0.057, 0.576, 0.083, 0.199, 0.095, 0.115, 0.189, 0.285, 0.593, 0.235, 0.091, 0.875, 0.827, 0.477, 0.010, 0.750, 0.070, 0.358, 0.000, 0.003},
+{0.000, 0.302, 8.118, 0.015, 8.456, 1.472, 0.027, 0.338, 0.664, 0.126, 0.441, 0.049, 0.034, 7.726, 0.102, 0.368, 0.082, 0.604, 0.353, 0.067, 0.032, 0.750, 0.114, 0.991, 0.000, 0.003},
+{0.000, 0.108, 0.015, 27.591, 0.008, 0.008, 0.012, 0.042, 0.078, 0.119, 0.008, 0.006, 0.009, 0.024, 0.063, 0.008, 0.068, 0.379, 0.068, 0.132, 0.005, 0.750, 0.258, 0.008, 0.000, 0.003},
+{0.000, 0.317, 8.456, 0.008, 14.236, 2.342, 0.006, 0.326, 0.271, 0.080, 0.220, 0.014, 0.023, 1.756, 0.068, 0.427, 0.029, 0.286, 0.200, 0.065, 0.005, 0.750, 0.020, 1.508, 0.000, 0.003},
+{0.000, 0.453, 1.472, 0.008, 2.343, 13.663, 0.008, 0.232, 0.181, 0.150, 0.226, 0.043, 0.092, 0.464, 0.153, 1.507, 0.043, 0.226, 0.130, 0.110, 0.003, 0.750, 0.057, 8.365, 0.000, 0.003},
+{0.000, 0.057, 0.027, 0.012, 0.006, 0.008, 21.408, 0.044, 0.132, 0.473, 0.009, 0.416, 0.250, 0.051, 0.036, 0.011, 0.042, 0.114, 0.051, 0.067, 0.213, 0.750, 1.786, 0.009, 0.000, 0.003},
+{0.000, 0.576, 0.338, 0.042, 0.326, 0.232, 0.044, 9.314, 0.048, 0.024, 0.086, 0.028, 0.057, 0.351, 0.122, 0.094, 0.040, 0.553, 0.135, 0.149, 0.006, 0.750, 0.009, 0.172, 0.000, 0.003},
+{0.000, 0.083, 0.664, 0.078, 0.271, 0.181, 0.132, 0.048, 22.927, 0.044, 0.120, 0.123, 0.028, 1.119, 0.248, 1.359, 0.587, 0.132, 0.086, 0.115, 0.081, 0.750, 0.324, 0.695, 0.000, 0.003},
+{0.000, 0.199, 0.126, 0.119, 0.080, 0.150, 0.473, 0.024, 0.044, 18.632, 0.127, 0.643, 0.793, 0.179, 0.053, 0.070, 0.159, 0.097, 0.444, 1.938, 0.009, 0.750, 0.119, 0.115, 0.000, 0.003},
+{0.000, 0.095, 0.441, 0.008, 0.220, 0.226, 0.009, 0.086, 0.120, 0.127, 9.988, 0.061, 0.563, 0.697, 0.106, 0.384, 1.111, 0.260, 0.336, 0.046, 0.018, 0.750, 0.042, 0.295, 0.000, 0.003},
+{0.000, 0.115, 0.049, 0.006, 0.014, 0.043, 0.416, 0.028, 0.123, 0.643, 0.061, 10.019, 1.242, 0.090, 0.090, 0.185, 0.053, 0.058, 0.103, 0.463, 0.127, 0.750, 0.095, 0.105, 0.000, 0.003},
+{0.000, 0.189, 0.034, 0.009, 0.023, 0.092, 0.250, 0.057, 0.028, 0.792, 0.563, 1.242, 46.604, 0.047, 0.065, 0.272, 0.243, 0.161, 0.263, 0.631, 0.013, 0.750, 0.021, 0.171, 0.000, 0.003},
+{0.000, 0.285, 7.726, 0.024, 1.756, 0.464, 0.051, 0.351, 1.119, 0.179, 0.697, 0.090, 0.047, 14.647, 0.142, 0.299, 0.144, 0.972, 0.531, 0.070, 0.064, 0.750, 0.223, 0.392, 0.000, 0.003},
+{0.000, 0.593, 0.102, 0.063, 0.068, 0.153, 0.036, 0.122, 0.248, 0.053, 0.106, 0.090, 0.065, 0.142, 15.809, 0.376, 0.270, 0.571, 0.240, 0.140, 0.009, 0.750, 0.010, 0.251, 0.000, 0.003},
+{0.000, 0.235, 0.368, 0.008, 0.427, 1.507, 0.011, 0.094, 1.360, 0.070, 0.384, 0.185, 0.272, 0.299, 0.376, 18.136, 0.585, 0.168, 0.150, 0.103, 0.012, 0.750, 0.017, 8.754, 0.000, 0.003},
+{0.000, 0.091, 0.082, 0.068, 0.029, 0.043, 0.042, 0.040, 0.587, 0.159, 1.111, 0.053, 0.243, 0.144, 0.270, 0.585, 18.926, 0.355, 0.106, 0.074, 0.517, 0.750, 0.030, 0.279, 0.000, 0.003},
+{0.000, 0.875, 0.604, 0.379, 0.286, 0.226, 0.114, 0.553, 0.132, 0.097, 0.260, 0.058, 0.161, 0.972, 0.571, 0.168, 0.355, 9.028, 1.145, 0.118, 0.179, 0.750, 0.097, 0.200, 0.000, 0.003},
+{0.000, 0.827, 0.353, 0.068, 0.200, 0.130, 0.051, 0.135, 0.086, 0.444, 0.336, 0.103, 0.263, 0.531, 0.240, 0.150, 0.106, 1.145, 11.695, 0.391, 0.013, 0.750, 0.112, 0.139, 0.000, 0.003},
+{0.000, 0.477, 0.067, 0.132, 0.065, 0.110, 0.067, 0.149, 0.115, 1.938, 0.046, 0.463, 0.631, 0.070, 0.141, 0.103, 0.074, 0.118, 0.391, 11.609, 0.005, 0.750, 0.081, 0.107, 0.000, 0.003},
+{0.000, 0.010, 0.032, 0.005, 0.005, 0.003, 0.213, 0.006, 0.081, 0.009, 0.018, 0.127, 0.013, 0.064, 0.009, 0.012, 0.517, 0.179, 0.013, 0.005, 88.722, 0.750, 0.173, 0.007, 0.000, 0.003},
+{0.000, 0.750, 0.750, 0.750, 0.750, 0.750, 0.750, 0.750, 0.750, 0.750, 0.750, 0.750, 0.750, 0.750, 0.750, 0.750, 0.750, 0.750, 0.750, 0.750, 0.750, 0.750, 0.750, 0.750, 0.000, 0.003},
+{0.000, 0.070, 0.114, 0.258, 0.020, 0.057, 1.786, 0.009, 0.324, 0.119, 0.042, 0.095, 0.021, 0.223, 0.010, 0.017, 0.030, 0.097, 0.112, 0.081, 0.173, 0.750, 28.442, 0.039, 0.000, 0.003},
+{0.000, 0.358, 0.991, 0.008, 1.508, 8.365, 0.009, 0.172, 0.695, 0.115, 0.295, 0.105, 0.171, 0.392, 0.251, 8.754, 0.279, 0.200, 0.139, 0.107, 0.007, 0.750, 0.039, 8.535, 0.000, 0.003},
+{0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.003},
+{0.003, 0.003, 0.003, 0.003, 0.003, 0.003, 0.003, 0.003, 0.003, 0.003, 0.003, 0.003, 0.003, 0.003, 0.003, 0.003, 0.003, 0.003, 0.003, 0.003, 0.003, 0.003, 0.003, 0.003, 0.003, 1.333},
+};
+
+static const double PAM70_FREQRATIOS[BLASTAA_SIZE][BLASTAA_SIZE] = {
+{0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.002},
+{0.000, 4.900, 0.605, 0.242, 0.619, 0.771, 0.135, 1.015, 0.221, 0.434, 0.249, 0.250, 0.376, 0.589, 1.030, 0.466, 0.227, 1.350, 1.327, 0.809, 0.043, 0.750, 0.154, 0.638, 0.000, 0.002},
+{0.000, 0.605, 5.422, 0.066, 5.890, 2.249, 0.079, 0.649, 1.099, 0.253, 0.778, 0.120, 0.140, 4.879, 0.274, 0.790, 0.253, 0.955, 0.654, 0.185, 0.078, 0.750, 0.223, 1.613, 0.000, 0.002},
+{0.000, 0.242, 0.066, 24.836, 0.040, 0.039, 0.055, 0.116, 0.165, 0.246, 0.038, 0.030, 0.044, 0.097, 0.159, 0.039, 0.151, 0.665, 0.186, 0.274, 0.022, 0.750, 0.523, 0.039, 0.000, 0.002},
+{0.000, 0.618, 5.890, 0.040, 8.900, 3.357, 0.034, 0.639, 0.600, 0.193, 0.484, 0.064, 0.103, 2.400, 0.213, 0.922, 0.139, 0.600, 0.442, 0.173, 0.026, 0.750, 0.080, 2.296, 0.000, 0.002},
+{0.000, 0.771, 2.249, 0.039, 3.357, 8.654, 0.038, 0.492, 0.480, 0.284, 0.466, 0.117, 0.212, 0.965, 0.347, 2.258, 0.179, 0.479, 0.338, 0.247, 0.019, 0.750, 0.119, 5.867, 0.000, 0.002},
+{0.000, 0.135, 0.079, 0.055, 0.034, 0.038, 17.455, 0.100, 0.277, 0.846, 0.041, 0.824, 0.508, 0.131, 0.089, 0.052, 0.099, 0.216, 0.138, 0.208, 0.452, 0.750, 3.378, 0.044, 0.000, 0.002},
+{0.000, 1.015, 0.649, 0.116, 0.639, 0.492, 0.100, 7.309, 0.145, 0.107, 0.210, 0.081, 0.144, 0.660, 0.308, 0.237, 0.122, 0.964, 0.370, 0.313, 0.027, 0.750, 0.044, 0.381, 0.000, 0.002},
+{0.000, 0.221, 1.099, 0.165, 0.600, 0.480, 0.277, 0.145, 16.422, 0.140, 0.335, 0.259, 0.122, 1.677, 0.493, 2.188, 1.058, 0.327, 0.230, 0.233, 0.186, 0.750, 0.611, 1.224, 0.000, 0.002},
+{0.000, 0.434, 0.253, 0.246, 0.193, 0.284, 0.846, 0.107, 0.140, 11.749, 0.266, 1.192, 1.369, 0.322, 0.160, 0.189, 0.301, 0.254, 0.761, 3.004, 0.044, 0.750, 0.281, 0.242, 0.000, 0.002},
+{0.000, 0.249, 0.778, 0.038, 0.484, 0.466, 0.041, 0.210, 0.335, 0.266, 7.610, 0.153, 0.948, 1.119, 0.256, 0.722, 1.937, 0.520, 0.620, 0.139, 0.079, 0.750, 0.101, 0.578, 0.000, 0.002},
+{0.000, 0.250, 0.120, 0.030, 0.064, 0.117, 0.824, 0.081, 0.259, 1.192, 0.153, 8.228, 2.130, 0.185, 0.199, 0.351, 0.137, 0.149, 0.240, 0.908, 0.267, 0.750, 0.239, 0.219, 0.000, 0.002},
+{0.000, 0.376, 0.140, 0.044, 0.103, 0.212, 0.508, 0.144, 0.122, 1.369, 0.948, 2.129, 28.592, 0.182, 0.176, 0.486, 0.488, 0.319, 0.498, 1.133, 0.060, 0.750, 0.097, 0.332, 0.000, 0.002},
+{0.000, 0.589, 4.879, 0.097, 2.400, 0.965, 0.131, 0.660, 1.677, 0.322, 1.119, 0.185, 0.182, 7.754, 0.344, 0.637, 0.387, 1.366, 0.899, 0.199, 0.138, 0.750, 0.390, 0.822, 0.000, 0.002},
+{0.000, 1.030, 0.274, 0.159, 0.213, 0.347, 0.089, 0.308, 0.493, 0.160, 0.256, 0.199, 0.176, 0.344, 11.871, 0.684, 0.527, 0.979, 0.531, 0.307, 0.042, 0.750, 0.047, 0.494, 0.000, 0.002},
+{0.000, 0.466, 0.790, 0.039, 0.923, 2.258, 0.052, 0.237, 2.188, 0.189, 0.722, 0.351, 0.486, 0.637, 0.684, 11.469, 1.024, 0.373, 0.329, 0.232, 0.055, 0.750, 0.074, 6.272, 0.000, 0.002},
+{0.000, 0.227, 0.253, 0.151, 0.139, 0.179, 0.099, 0.122, 1.058, 0.301, 1.937, 0.137, 0.488, 0.387, 0.527, 1.024, 13.660, 0.610, 0.284, 0.176, 0.991, 0.750, 0.086, 0.547, 0.000, 0.002},
+{0.000, 1.350, 0.955, 0.665, 0.600, 0.479, 0.216, 0.964, 0.327, 0.254, 0.520, 0.149, 0.319, 1.366, 0.979, 0.373, 0.610, 5.204, 1.695, 0.303, 0.324, 0.750, 0.208, 0.433, 0.000, 0.002},
+{0.000, 1.327, 0.654, 0.186, 0.442, 0.338, 0.138, 0.370, 0.230, 0.761, 0.620, 0.240, 0.498, 0.899, 0.531, 0.329, 0.284, 1.695, 7.337, 0.719, 0.055, 0.750, 0.228, 0.334, 0.000, 0.002},
+{0.000, 0.809, 0.185, 0.275, 0.173, 0.247, 0.208, 0.313, 0.233, 3.003, 0.139, 0.908, 1.133, 0.199, 0.307, 0.232, 0.176, 0.303, 0.719, 8.211, 0.027, 0.750, 0.181, 0.240, 0.000, 0.002},
+{0.000, 0.043, 0.078, 0.022, 0.026, 0.019, 0.452, 0.027, 0.186, 0.044, 0.079, 0.267, 0.060, 0.138, 0.042, 0.055, 0.991, 0.324, 0.054, 0.027, 80.645, 0.750, 0.377, 0.035, 0.000, 0.002},
+{0.000, 0.750, 0.750, 0.750, 0.750, 0.750, 0.750, 0.750, 0.750, 0.750, 0.750, 0.750, 0.750, 0.750, 0.750, 0.750, 0.750, 0.750, 0.750, 0.750, 0.750, 0.750, 0.750, 0.750, 0.000, 0.002},
+{0.000, 0.154, 0.223, 0.523, 0.080, 0.119, 3.379, 0.044, 0.611, 0.281, 0.101, 0.239, 0.097, 0.390, 0.047, 0.074, 0.086, 0.208, 0.228, 0.181, 0.377, 0.750, 23.141, 0.099, 0.000, 0.002},
+{0.000, 0.638, 1.613, 0.039, 2.296, 5.867, 0.044, 0.381, 1.224, 0.242, 0.578, 0.219, 0.332, 0.822, 0.494, 6.272, 0.547, 0.433, 0.334, 0.240, 0.035, 0.750, 0.099, 6.043, 0.000, 0.002},
+{0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.002},
+{0.002, 0.002, 0.002, 0.002, 0.002, 0.002, 0.002, 0.002, 0.002, 0.002, 0.002, 0.002, 0.002, 0.002, 0.002, 0.002, 0.002, 0.002, 0.002, 0.002, 0.002, 0.002, 0.002, 0.002, 0.002, 1.333},
+};
+
+static const double BLOSUM45_FREQRATIOS[BLASTAA_SIZE][BLASTAA_SIZE] = {
+{0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.290},
+{0.000, 2.950, 0.735, 0.800, 0.689, 0.825, 0.587, 1.080, 0.654, 0.747, 0.786, 0.712, 0.821, 0.789, 0.709, 0.867, 0.700, 1.300, 1.001, 1.010, 0.565, 0.750, 0.639, 0.841, 0.000, 0.290},
+{0.000, 0.735, 3.260, 0.600, 3.594, 1.300, 0.496, 0.886, 1.088, 0.497, 1.023, 0.472, 0.580, 2.862, 0.686, 0.992, 0.829, 1.053, 0.985, 0.520, 0.381, 0.750, 0.627, 1.182, 0.000, 0.290},
+{0.000, 0.800, 0.600, 17.090, 0.533, 0.545, 0.602, 0.557, 0.491, 0.543, 0.547, 0.673, 0.604, 0.680, 0.411, 0.486, 0.472, 0.797, 0.822, 0.715, 0.334, 0.750, 0.489, 0.523, 0.000, 0.290},
+{0.000, 0.689, 3.594, 0.533, 5.356, 1.643, 0.431, 0.740, 0.976, 0.440, 0.942, 0.463, 0.494, 1.502, 0.724, 0.958, 0.771, 0.929, 0.876, 0.494, 0.373, 0.750, 0.645, 1.381, 0.000, 0.290},
+{0.000, 0.825, 1.300, 0.545, 1.643, 3.873, 0.498, 0.576, 0.962, 0.485, 1.277, 0.571, 0.615, 0.893, 0.911, 1.531, 1.011, 0.912, 0.833, 0.555, 0.519, 0.750, 0.617, 2.978, 0.000, 0.290},
+{0.000, 0.587, 0.496, 0.602, 0.431, 0.498, 5.748, 0.480, 0.679, 1.064, 0.529, 1.303, 1.063, 0.572, 0.451, 0.444, 0.590, 0.610, 0.716, 0.953, 1.355, 0.750, 2.185, 0.477, 0.000, 0.290},
+{0.000, 1.080, 0.886, 0.557, 0.740, 0.576, 0.480, 5.071, 0.662, 0.416, 0.678, 0.450, 0.585, 1.059, 0.702, 0.687, 0.570, 1.058, 0.693, 0.479, 0.591, 0.750, 0.549, 0.619, 0.000, 0.290},
+{0.000, 0.654, 1.088, 0.491, 0.976, 0.962, 0.679, 0.662, 9.512, 0.453, 0.890, 0.670, 0.918, 1.220, 0.661, 1.151, 0.973, 0.854, 0.706, 0.457, 0.452, 0.750, 1.472, 1.034, 0.000, 0.290},
+{0.000, 0.747, 0.497, 0.543, 0.440, 0.485, 1.064, 0.416, 0.453, 3.233, 0.532, 1.596, 1.455, 0.564, 0.610, 0.578, 0.488, 0.618, 0.848, 2.176, 0.565, 0.750, 0.906, 0.521, 0.000, 0.290},
+{0.000, 0.786, 1.023, 0.547, 0.942, 1.277, 0.529, 0.678, 0.890, 0.532, 3.327, 0.554, 0.738, 1.119, 0.781, 1.330, 1.943, 0.890, 0.885, 0.592, 0.562, 0.750, 0.737, 1.297, 0.000, 0.290},
+{0.000, 0.712, 0.472, 0.673, 0.463, 0.571, 1.303, 0.450, 0.670, 1.596, 0.554, 2.997, 1.731, 0.484, 0.478, 0.642, 0.601, 0.556, 0.781, 1.334, 0.671, 0.750, 0.965, 0.598, 0.000, 0.290},
+{0.000, 0.821, 0.580, 0.604, 0.494, 0.615, 1.063, 0.585, 0.918, 1.455, 0.738, 1.731, 4.114, 0.682, 0.644, 0.941, 0.776, 0.660, 0.860, 1.236, 0.634, 0.750, 1.023, 0.739, 0.000, 0.290},
+{0.000, 0.789, 2.862, 0.680, 1.502, 0.893, 0.572, 1.059, 1.220, 0.564, 1.119, 0.484, 0.682, 4.478, 0.640, 1.032, 0.898, 1.200, 1.115, 0.552, 0.390, 0.750, 0.606, 0.946, 0.000, 0.290},
+{0.000, 0.709, 0.686, 0.411, 0.724, 0.911, 0.451, 0.702, 0.661, 0.610, 0.781, 0.478, 0.644, 0.640, 8.819, 0.716, 0.582, 0.750, 0.856, 0.540, 0.525, 0.750, 0.479, 0.836, 0.000, 0.290},
+{0.000, 0.867, 0.992, 0.486, 0.958, 1.531, 0.444, 0.687, 1.151, 0.578, 1.330, 0.642, 0.941, 1.032, 0.716, 4.407, 1.329, 1.092, 0.781, 0.547, 0.645, 0.750, 0.829, 2.630, 0.000, 0.290},
+{0.000, 0.700, 0.829, 0.472, 0.771, 1.011, 0.590, 0.570, 0.973, 0.488, 1.943, 0.601, 0.776, 0.898, 0.582, 1.329, 4.747, 0.799, 0.715, 0.578, 0.580, 0.750, 0.807, 1.132, 0.000, 0.290},
+{0.000, 1.300, 1.053, 0.797, 0.929, 0.912, 0.610, 1.058, 0.854, 0.618, 0.890, 0.556, 0.660, 1.200, 0.750, 1.092, 0.799, 2.782, 1.472, 0.728, 0.428, 0.750, 0.706, 0.981, 0.000, 0.290},
+{0.000, 1.001, 0.985, 0.822, 0.876, 0.833, 0.716, 0.693, 0.706, 0.848, 0.885, 0.781, 0.860, 1.115, 0.856, 0.781, 0.715, 1.472, 3.139, 1.040, 0.454, 0.750, 0.744, 0.813, 0.000, 0.290},
+{0.000, 1.010, 0.520, 0.715, 0.494, 0.555, 0.953, 0.479, 0.457, 2.176, 0.592, 1.334, 1.236, 0.552, 0.540, 0.547, 0.578, 0.728, 1.040, 2.871, 0.473, 0.750, 0.809, 0.552, 0.000, 0.290},
+{0.000, 0.565, 0.381, 0.334, 0.373, 0.519, 1.355, 0.591, 0.452, 0.565, 0.562, 0.671, 0.634, 0.390, 0.525, 0.645, 0.580, 0.428, 0.454, 0.473, 29.702, 0.750, 1.801, 0.567, 0.000, 0.290},
+{0.000, 0.750, 0.750, 0.750, 0.750, 0.750, 0.750, 0.750, 0.750, 0.750, 0.750, 0.750, 0.750, 0.750, 0.750, 0.750, 0.750, 0.750, 0.750, 0.750, 0.750, 0.750, 0.750, 0.750, 0.000, 0.290},
+{0.000, 0.639, 0.627, 0.489, 0.645, 0.617, 2.185, 0.549, 1.472, 0.906, 0.737, 0.965, 1.023, 0.606, 0.479, 0.829, 0.807, 0.706, 0.744, 0.809, 1.801, 0.750, 5.753, 0.698, 0.000, 0.290},
+{0.000, 0.841, 1.182, 0.523, 1.381, 2.978, 0.477, 0.619, 1.034, 0.521, 1.297, 0.598, 0.739, 0.946, 0.836, 2.630, 1.132, 0.981, 0.813, 0.552, 0.567, 0.750, 0.698, 2.845, 0.000, 0.290},
+{0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.290},
+{0.290, 0.290, 0.290, 0.290, 0.290, 0.290, 0.290, 0.290, 0.290, 0.290, 0.290, 0.290, 0.290, 0.290, 0.290, 0.290, 0.290, 0.290, 0.290, 0.290, 0.290, 0.290, 0.290, 0.290, 0.290, 1.333},
+};
+
+static const double BLOSUM80_FREQRATIOS[BLASTAA_SIZE][BLASTAA_SIZE] = {
+{0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.140},
+{0.000, 4.773, 0.477, 0.732, 0.451, 0.703, 0.397, 0.957, 0.514, 0.543, 0.723, 0.505, 0.625, 0.510, 0.771, 0.696, 0.555, 1.535, 0.980, 0.866, 0.309, 0.750, 0.436, 0.700, 0.000, 0.140},
+{0.000, 0.477, 5.362, 0.252, 5.759, 1.269, 0.252, 0.639, 0.830, 0.234, 0.793, 0.221, 0.310, 4.868, 0.428, 0.850, 0.609, 0.948, 0.743, 0.268, 0.179, 0.750, 0.307, 1.109, 0.000, 0.140},
+{0.000, 0.732, 0.252, 20.702, 0.214, 0.180, 0.395, 0.272, 0.221, 0.581, 0.241, 0.493, 0.499, 0.300, 0.269, 0.295, 0.233, 0.576, 0.602, 0.634, 0.302, 0.750, 0.308, 0.224, 0.000, 0.140},
+{0.000, 0.451, 5.759, 0.214, 9.106, 1.635, 0.234, 0.541, 0.594, 0.214, 0.677, 0.197, 0.252, 1.584, 0.452, 0.763, 0.477, 0.774, 0.611, 0.245, 0.145, 0.750, 0.245, 1.303, 0.000, 0.140},
+{0.000, 0.703, 1.269, 0.180, 1.635, 6.995, 0.249, 0.399, 0.901, 0.264, 1.195, 0.276, 0.429, 0.811, 0.581, 1.906, 0.832, 0.845, 0.685, 0.369, 0.241, 0.750, 0.333, 5.054, 0.000, 0.140},
+{0.000, 0.397, 0.252, 0.395, 0.234, 0.249, 9.486, 0.249, 0.572, 0.841, 0.283, 1.114, 0.893, 0.273, 0.237, 0.285, 0.287, 0.369, 0.445, 0.649, 1.089, 0.750, 2.780, 0.263, 0.000, 0.140},
+{0.000, 0.957, 0.639, 0.272, 0.541, 0.399, 0.249, 7.882, 0.387, 0.184, 0.483, 0.210, 0.286, 0.761, 0.347, 0.425, 0.377, 0.784, 0.492, 0.251, 0.264, 0.750, 0.230, 0.409, 0.000, 0.140},
+{0.000, 0.514, 0.830, 0.221, 0.594, 0.901, 0.572, 0.387, 16.070, 0.258, 0.740, 0.314, 0.432, 1.124, 0.420, 1.316, 0.925, 0.661, 0.540, 0.289, 0.390, 0.750, 1.819, 1.059, 0.000, 0.140},
+{0.000, 0.543, 0.234, 0.581, 0.214, 0.264, 0.841, 0.184, 0.258, 4.868, 0.313, 1.665, 1.512, 0.258, 0.286, 0.309, 0.299, 0.379, 0.701, 2.496, 0.343, 0.750, 0.539, 0.281, 0.000, 0.140},
+{0.000, 0.723, 0.793, 0.241, 0.677, 1.195, 0.283, 0.483, 0.740, 0.313, 6.326, 0.357, 0.534, 0.938, 0.597, 1.524, 2.192, 0.820, 0.736, 0.370, 0.241, 0.750, 0.408, 1.320, 0.000, 0.140},
+{0.000, 0.505, 0.221, 0.493, 0.197, 0.276, 1.114, 0.210, 0.314, 1.665, 0.357, 4.463, 2.123, 0.250, 0.303, 0.407, 0.363, 0.368, 0.561, 1.220, 0.439, 0.750, 0.581, 0.326, 0.000, 0.140},
+{0.000, 0.625, 0.310, 0.499, 0.252, 0.429, 0.893, 0.286, 0.432, 1.512, 0.534, 2.123, 8.883, 0.382, 0.362, 0.887, 0.506, 0.498, 0.758, 1.224, 0.561, 0.750, 0.550, 0.603, 0.000, 0.140},
+{0.000, 0.510, 4.868, 0.300, 1.584, 0.811, 0.273, 0.761, 1.124, 0.258, 0.938, 0.250, 0.382, 8.963, 0.398, 0.958, 0.773, 1.165, 0.908, 0.297, 0.221, 0.750, 0.385, 0.867, 0.000, 0.140},
+{0.000, 0.771, 0.428, 0.269, 0.452, 0.581, 0.237, 0.347, 0.420, 0.286, 0.597, 0.303, 0.362, 0.398, 15.155, 0.538, 0.446, 0.652, 0.560, 0.370, 0.178, 0.750, 0.258, 0.565, 0.000, 0.140},
+{0.000, 0.696, 0.850, 0.295, 0.763, 1.906, 0.285, 0.425, 1.316, 0.309, 1.524, 0.407, 0.887, 0.958, 0.538, 8.340, 1.394, 0.859, 0.724, 0.411, 0.408, 0.750, 0.462, 4.360, 0.000, 0.140},
+{0.000, 0.555, 0.609, 0.233, 0.477, 0.832, 0.287, 0.377, 0.925, 0.299, 2.192, 0.363, 0.506, 0.773, 0.446, 1.394, 8.245, 0.695, 0.598, 0.354, 0.294, 0.750, 0.418, 1.046, 0.000, 0.140},
+{0.000, 1.535, 0.948, 0.576, 0.774, 0.845, 0.369, 0.784, 0.661, 0.379, 0.820, 0.368, 0.498, 1.165, 0.652, 0.859, 0.695, 5.106, 1.663, 0.494, 0.271, 0.750, 0.462, 0.850, 0.000, 0.140},
+{0.000, 0.980, 0.743, 0.602, 0.611, 0.685, 0.445, 0.492, 0.540, 0.701, 0.736, 0.561, 0.758, 0.908, 0.560, 0.724, 0.598, 1.663, 6.205, 0.891, 0.285, 0.750, 0.474, 0.700, 0.000, 0.140},
+{0.000, 0.866, 0.268, 0.634, 0.245, 0.369, 0.649, 0.251, 0.289, 2.496, 0.370, 1.220, 1.224, 0.297, 0.370, 0.411, 0.354, 0.494, 0.891, 4.584, 0.342, 0.750, 0.489, 0.385, 0.000, 0.140},
+{0.000, 0.309, 0.179, 0.302, 0.145, 0.241, 1.089, 0.264, 0.390, 0.343, 0.241, 0.439, 0.561, 0.221, 0.178, 0.408, 0.294, 0.271, 0.285, 0.342, 41.552, 0.750, 2.036, 0.304, 0.000, 0.140},
+{0.000, 0.750, 0.750, 0.750, 0.750, 0.750, 0.750, 0.750, 0.750, 0.750, 0.750, 0.750, 0.750, 0.750, 0.750, 0.750, 0.750, 0.750, 0.750, 0.750, 0.750, 0.750, 0.750, 0.750, 0.000, 0.140},
+{0.000, 0.436, 0.307, 0.308, 0.245, 0.333, 2.780, 0.230, 1.819, 0.539, 0.408, 0.581, 0.550, 0.385, 0.258, 0.462, 0.418, 0.462, 0.474, 0.489, 2.036, 0.750, 12.194, 0.382, 0.000, 0.140},
+{0.000, 0.700, 1.109, 0.224, 1.303, 5.054, 0.263, 0.409, 1.059, 0.281, 1.320, 0.326, 0.603, 0.867, 0.565, 4.360, 1.046, 0.850, 0.700, 0.385, 0.304, 0.750, 0.382, 4.789, 0.000, 0.140},
+{0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.140},
+{0.140, 0.140, 0.140, 0.140, 0.140, 0.140, 0.140, 0.140, 0.140, 0.140, 0.140, 0.140, 0.140, 0.140, 0.140, 0.140, 0.140, 0.140, 0.140, 0.140, 0.140, 0.140, 0.140, 0.140, 0.140, 1.333},
+};
+
+static const double BLOSUM50_FREQRATIOS[BLASTAA_SIZE][BLASTAA_SIZE] = {
+{0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.290},
+{0.000, 3.273, 0.687, 0.888, 0.660, 0.797, 0.546, 1.101, 0.641, 0.715, 0.748, 0.657, 0.854, 0.720, 0.715, 0.820, 0.668, 1.364, 0.967, 0.982, 0.464, 0.750, 0.596, 0.806, 0.000, 0.290},
+{0.000, 0.687, 3.676, 0.507, 4.021, 1.319, 0.384, 0.848, 1.118, 0.407, 0.968, 0.391, 0.516, 3.269, 0.661, 0.998, 0.759, 1.061, 0.955, 0.446, 0.339, 0.750, 0.556, 1.196, 0.000, 0.290},
+{0.000, 0.888, 0.507, 18.231, 0.428, 0.456, 0.565, 0.524, 0.517, 0.587, 0.460, 0.650, 0.681, 0.601, 0.403, 0.481, 0.428, 0.830, 0.818, 0.809, 0.312, 0.750, 0.539, 0.466, 0.000, 0.290},
+{0.000, 0.660, 4.021, 0.428, 6.112, 1.658, 0.337, 0.745, 0.884, 0.370, 0.890, 0.373, 0.435, 1.558, 0.710, 0.967, 0.678, 0.921, 0.825, 0.432, 0.307, 0.750, 0.504, 1.394, 0.000, 0.290},
+{0.000, 0.797, 1.319, 0.456, 1.658, 4.437, 0.456, 0.540, 0.911, 0.416, 1.331, 0.478, 0.605, 0.920, 0.839, 1.671, 0.976, 0.882, 0.819, 0.479, 0.548, 0.750, 0.652, 3.380, 0.000, 0.290},
+{0.000, 0.546, 0.384, 0.565, 0.337, 0.456, 6.636, 0.414, 0.755, 0.990, 0.445, 1.262, 1.052, 0.439, 0.376, 0.417, 0.462, 0.553, 0.570, 0.860, 1.348, 0.750, 2.424, 0.441, 0.000, 0.290},
+{0.000, 1.101, 0.848, 0.524, 0.745, 0.540, 0.414, 5.792, 0.601, 0.370, 0.655, 0.377, 0.519, 0.969, 0.620, 0.641, 0.517, 0.999, 0.636, 0.403, 0.505, 0.750, 0.467, 0.579, 0.000, 0.290},
+{0.000, 0.641, 1.118, 0.517, 0.884, 0.911, 0.755, 0.601, 10.449, 0.411, 0.946, 0.547, 0.760, 1.394, 0.582, 1.209, 0.982, 0.823, 0.653, 0.414, 0.475, 0.750, 1.570, 1.025, 0.000, 0.290},
+{0.000, 0.715, 0.407, 0.587, 0.370, 0.416, 0.990, 0.370, 0.411, 3.411, 0.468, 1.697, 1.438, 0.451, 0.511, 0.503, 0.435, 0.546, 0.861, 2.313, 0.522, 0.750, 0.826, 0.449, 0.000, 0.290},
+{0.000, 0.748, 0.968, 0.460, 0.890, 1.331, 0.445, 0.655, 0.946, 0.468, 3.881, 0.479, 0.685, 1.060, 0.764, 1.419, 2.065, 0.893, 0.845, 0.523, 0.462, 0.750, 0.668, 1.365, 0.000, 0.290},
+{0.000, 0.657, 0.391, 0.650, 0.373, 0.478, 1.262, 0.377, 0.547, 1.697, 0.479, 3.328, 1.790, 0.413, 0.444, 0.563, 0.554, 0.511, 0.759, 1.324, 0.602, 0.750, 0.858, 0.511, 0.000, 0.290},
+{0.000, 0.854, 0.516, 0.681, 0.435, 0.605, 1.052, 0.519, 0.760, 1.438, 0.685, 1.790, 4.816, 0.612, 0.545, 0.960, 0.678, 0.682, 0.882, 1.213, 0.761, 0.750, 0.918, 0.741, 0.000, 0.290},
+{0.000, 0.720, 3.269, 0.601, 1.558, 0.920, 0.439, 0.969, 1.394, 0.451, 1.060, 0.413, 0.612, 5.285, 0.604, 1.035, 0.854, 1.224, 1.109, 0.463, 0.376, 0.750, 0.617, 0.964, 0.000, 0.290},
+{0.000, 0.715, 0.661, 0.403, 0.710, 0.839, 0.376, 0.620, 0.582, 0.511, 0.764, 0.444, 0.545, 0.604, 10.204, 0.750, 0.519, 0.759, 0.749, 0.524, 0.420, 0.750, 0.468, 0.805, 0.000, 0.290},
+{0.000, 0.820, 0.998, 0.481, 0.967, 1.671, 0.417, 0.641, 1.209, 0.503, 1.419, 0.563, 0.960, 1.035, 0.750, 4.697, 1.357, 1.069, 0.810, 0.557, 0.715, 0.750, 0.742, 2.828, 0.000, 0.290},
+{0.000, 0.668, 0.759, 0.428, 0.678, 0.976, 0.462, 0.517, 0.982, 0.435, 2.065, 0.554, 0.678, 0.854, 0.519, 1.357, 5.378, 0.804, 0.737, 0.512, 0.529, 0.750, 0.727, 1.122, 0.000, 0.290},
+{0.000, 1.364, 1.061, 0.830, 0.921, 0.882, 0.553, 0.999, 0.823, 0.546, 0.893, 0.511, 0.682, 1.224, 0.759, 1.069, 0.804, 3.143, 1.497, 0.679, 0.392, 0.750, 0.651, 0.953, 0.000, 0.290},
+{0.000, 0.967, 0.955, 0.818, 0.825, 0.819, 0.570, 0.636, 0.653, 0.861, 0.845, 0.759, 0.882, 1.109, 0.749, 0.810, 0.737, 1.497, 3.553, 1.060, 0.503, 0.750, 0.691, 0.816, 0.000, 0.290},
+{0.000, 0.982, 0.446, 0.809, 0.432, 0.479, 0.860, 0.403, 0.414, 2.313, 0.523, 1.324, 1.213, 0.463, 0.524, 0.557, 0.512, 0.679, 1.060, 3.118, 0.485, 0.750, 0.727, 0.509, 0.000, 0.290},
+{0.000, 0.464, 0.339, 0.312, 0.307, 0.548, 1.348, 0.505, 0.475, 0.522, 0.462, 0.602, 0.761, 0.376, 0.420, 0.715, 0.529, 0.392, 0.503, 0.485, 31.361, 0.750, 1.765, 0.612, 0.000, 0.290},
+{0.000, 0.750, 0.750, 0.750, 0.750, 0.750, 0.750, 0.750, 0.750, 0.750, 0.750, 0.750, 0.750, 0.750, 0.750, 0.750, 0.750, 0.750, 0.750, 0.750, 0.750, 0.750, 0.750, 0.750, 0.000, 0.290},
+{0.000, 0.596, 0.556, 0.539, 0.504, 0.652, 2.424, 0.467, 1.570, 0.826, 0.668, 0.858, 0.918, 0.617, 0.468, 0.742, 0.727, 0.651, 0.691, 0.727, 1.765, 0.750, 6.893, 0.686, 0.000, 0.290},
+{0.000, 0.806, 1.196, 0.466, 1.394, 3.380, 0.441, 0.579, 1.025, 0.449, 1.365, 0.511, 0.741, 0.964, 0.805, 2.828, 1.122, 0.953, 0.816, 0.509, 0.612, 0.750, 0.686, 3.169, 0.000, 0.290},
+{0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.290},
+{0.290, 0.290, 0.290, 0.290, 0.290, 0.290, 0.290, 0.290, 0.290, 0.290, 0.290, 0.290, 0.290, 0.290, 0.290, 0.290, 0.290, 0.290, 0.290, 0.290, 0.290, 0.290, 0.290, 0.290, 0.290, 1.333},
+};
+
+static const double BLOSUM90_FREQRATIOS[BLASTAA_SIZE][BLASTAA_SIZE] = {
+{0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.120},
+{0.000, 4.773, 0.477, 0.732, 0.451, 0.703, 0.397, 0.957, 0.514, 0.543, 0.723, 0.505, 0.625, 0.510, 0.771, 0.696, 0.555, 1.535, 0.980, 0.866, 0.309, 0.707, 0.436, 0.700, 0.000, 0.120},
+{0.000, 0.477, 5.362, 0.252, 5.759, 1.269, 0.252, 0.639, 0.830, 0.234, 0.793, 0.221, 0.310, 4.868, 0.428, 0.850, 0.609, 0.948, 0.743, 0.268, 0.179, 0.707, 0.307, 1.109, 0.000, 0.120},
+{0.000, 0.732, 0.252, 20.702, 0.214, 0.180, 0.395, 0.272, 0.221, 0.581, 0.241, 0.493, 0.499, 0.300, 0.269, 0.295, 0.233, 0.576, 0.602, 0.634, 0.302, 0.707, 0.308, 0.224, 0.000, 0.120},
+{0.000, 0.451, 5.759, 0.214, 9.106, 1.635, 0.234, 0.541, 0.594, 0.214, 0.677, 0.197, 0.252, 1.584, 0.452, 0.763, 0.477, 0.774, 0.611, 0.245, 0.145, 0.707, 0.245, 1.303, 0.000, 0.120},
+{0.000, 0.703, 1.269, 0.180, 1.635, 6.995, 0.249, 0.399, 0.901, 0.264, 1.195, 0.276, 0.429, 0.811, 0.581, 1.906, 0.832, 0.845, 0.685, 0.369, 0.241, 0.707, 0.333, 5.054, 0.000, 0.120},
+{0.000, 0.397, 0.252, 0.395, 0.234, 0.249, 9.486, 0.249, 0.572, 0.841, 0.283, 1.114, 0.893, 0.273, 0.237, 0.285, 0.287, 0.369, 0.445, 0.649, 1.089, 0.707, 2.780, 0.263, 0.000, 0.120},
+{0.000, 0.957, 0.639, 0.272, 0.541, 0.399, 0.249, 7.882, 0.387, 0.184, 0.483, 0.210, 0.286, 0.761, 0.347, 0.425, 0.377, 0.784, 0.492, 0.251, 0.264, 0.707, 0.230, 0.409, 0.000, 0.120},
+{0.000, 0.514, 0.830, 0.221, 0.594, 0.901, 0.572, 0.387, 16.070, 0.258, 0.740, 0.314, 0.432, 1.124, 0.420, 1.316, 0.925, 0.661, 0.540, 0.289, 0.390, 0.707, 1.819, 1.059, 0.000, 0.120},
+{0.000, 0.543, 0.234, 0.581, 0.214, 0.264, 0.841, 0.184, 0.258, 4.868, 0.313, 1.665, 1.512, 0.258, 0.286, 0.309, 0.299, 0.379, 0.701, 2.496, 0.343, 0.707, 0.539, 0.281, 0.000, 0.120},
+{0.000, 0.723, 0.793, 0.241, 0.677, 1.195, 0.283, 0.483, 0.740, 0.313, 6.326, 0.357, 0.534, 0.938, 0.597, 1.524, 2.192, 0.820, 0.736, 0.370, 0.241, 0.707, 0.408, 1.320, 0.000, 0.120},
+{0.000, 0.505, 0.221, 0.493, 0.197, 0.276, 1.114, 0.210, 0.314, 1.665, 0.357, 4.463, 2.123, 0.250, 0.303, 0.407, 0.363, 0.368, 0.561, 1.220, 0.439, 0.707, 0.581, 0.326, 0.000, 0.120},
+{0.000, 0.625, 0.310, 0.499, 0.252, 0.429, 0.893, 0.286, 0.432, 1.512, 0.534, 2.123, 8.883, 0.382, 0.362, 0.887, 0.506, 0.498, 0.758, 1.224, 0.561, 0.707, 0.550, 0.603, 0.000, 0.120},
+{0.000, 0.510, 4.868, 0.300, 1.584, 0.811, 0.273, 0.761, 1.124, 0.258, 0.938, 0.250, 0.382, 8.963, 0.398, 0.958, 0.773, 1.165, 0.908, 0.297, 0.221, 0.707, 0.385, 0.867, 0.000, 0.120},
+{0.000, 0.771, 0.428, 0.269, 0.452, 0.581, 0.237, 0.347, 0.420, 0.286, 0.597, 0.303, 0.362, 0.398, 15.155, 0.538, 0.446, 0.652, 0.560, 0.370, 0.178, 0.707, 0.258, 0.565, 0.000, 0.120},
+{0.000, 0.696, 0.850, 0.295, 0.763, 1.906, 0.285, 0.425, 1.316, 0.309, 1.524, 0.407, 0.887, 0.958, 0.538, 8.340, 1.394, 0.859, 0.724, 0.411, 0.408, 0.707, 0.462, 4.360, 0.000, 0.120},
+{0.000, 0.555, 0.609, 0.233, 0.477, 0.832, 0.287, 0.377, 0.925, 0.299, 2.192, 0.363, 0.506, 0.773, 0.446, 1.394, 8.245, 0.695, 0.598, 0.354, 0.294, 0.707, 0.418, 1.046, 0.000, 0.120},
+{0.000, 1.535, 0.948, 0.576, 0.774, 0.845, 0.369, 0.784, 0.661, 0.379, 0.820, 0.368, 0.498, 1.165, 0.652, 0.859, 0.695, 5.106, 1.663, 0.494, 0.271, 0.707, 0.462, 0.850, 0.000, 0.120},
+{0.000, 0.980, 0.743, 0.602, 0.611, 0.685, 0.445, 0.492, 0.540, 0.701, 0.736, 0.561, 0.758, 0.908, 0.560, 0.724, 0.598, 1.663, 6.205, 0.891, 0.285, 0.707, 0.474, 0.700, 0.000, 0.120},
+{0.000, 0.866, 0.268, 0.634, 0.245, 0.369, 0.649, 0.251, 0.289, 2.496, 0.370, 1.220, 1.224, 0.297, 0.370, 0.411, 0.354, 0.494, 0.891, 4.584, 0.342, 0.707, 0.489, 0.385, 0.000, 0.120},
+{0.000, 0.309, 0.179, 0.302, 0.145, 0.241, 1.089, 0.264, 0.390, 0.343, 0.241, 0.439, 0.561, 0.221, 0.178, 0.408, 0.294, 0.271, 0.285, 0.342, 41.552, 0.707, 2.036, 0.304, 0.000, 0.120},
+{0.000, 0.707, 0.707, 0.707, 0.707, 0.707, 0.707, 0.707, 0.707, 0.707, 0.707, 0.707, 0.707, 0.707, 0.707, 0.707, 0.707, 0.707, 0.707, 0.707, 0.707, 0.707, 0.707, 0.707, 0.000, 0.120},
+{0.000, 0.436, 0.307, 0.308, 0.245, 0.333, 2.780, 0.230, 1.819, 0.539, 0.408, 0.581, 0.550, 0.385, 0.258, 0.462, 0.418, 0.462, 0.474, 0.489, 2.036, 0.707, 12.194, 0.382, 0.000, 0.120},
+{0.000, 0.700, 1.109, 0.224, 1.303, 5.054, 0.263, 0.409, 1.059, 0.281, 1.320, 0.326, 0.603, 0.867, 0.565, 4.360, 1.046, 0.850, 0.700, 0.385, 0.304, 0.707, 0.382, 4.789, 0.000, 0.120},
+{0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.120},
+{0.120, 0.120, 0.120, 0.120, 0.120, 0.120, 0.120, 0.120, 0.120, 0.120, 0.120, 0.120, 0.120, 0.120, 0.120, 0.120, 0.120, 0.120, 0.120, 0.120, 0.120, 0.120, 0.120, 0.120, 0.120, 1.333},
+};
+
+static const double PAM250_FREQRATIOS[BLASTAA_SIZE][BLASTAA_SIZE] = {
+{0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.170},
+{0.000, 1.516, 1.056, 0.627, 1.070, 1.075, 0.446, 1.339, 0.732, 0.889, 0.766, 0.646, 0.770, 1.040, 1.294, 0.903, 0.701, 1.290, 1.317, 1.045, 0.264, 0.750, 0.450, 1.000, 0.000, 0.170},
+{0.000, 1.056, 1.843, 0.365, 2.052, 1.825, 0.354, 1.119, 1.297, 0.617, 1.130, 0.453, 0.605, 1.600, 0.846, 1.339, 0.862, 1.117, 1.032, 0.638, 0.297, 0.750, 0.486, 1.613, 0.000, 0.170},
+{0.000, 0.627, 0.365, 15.630, 0.307, 0.295, 0.371, 0.459, 0.452, 0.588, 0.286, 0.248, 0.299, 0.433, 0.527, 0.289, 0.430, 0.990, 0.601, 0.639, 0.169, 0.750, 1.078, 0.293, 0.000, 0.170},
+{0.000, 1.070, 2.052, 0.307, 2.433, 2.197, 0.274, 1.147, 1.173, 0.580, 1.020, 0.398, 0.548, 1.610, 0.806, 1.461, 0.743, 1.069, 0.968, 0.612, 0.213, 0.750, 0.369, 1.877, 0.000, 0.170},
+{0.000, 1.075, 1.825, 0.295, 2.197, 2.414, 0.287, 1.044, 1.165, 0.626, 0.988, 0.464, 0.614, 1.393, 0.880, 1.771, 0.782, 0.999, 0.914, 0.660, 0.201, 0.750, 0.372, 2.134, 0.000, 0.170},
+{0.000, 0.446, 0.354, 0.371, 0.274, 0.287, 8.028, 0.332, 0.659, 1.262, 0.298, 1.518, 1.043, 0.447, 0.351, 0.343, 0.359, 0.478, 0.488, 0.766, 1.077, 0.750, 4.975, 0.311, 0.000, 0.170},
+{0.000, 1.339, 1.119, 0.459, 1.147, 1.044, 0.332, 2.991, 0.616, 0.557, 0.678, 0.395, 0.525, 1.087, 0.894, 0.757, 0.555, 1.278, 0.999, 0.735, 0.200, 0.750, 0.300, 0.919, 0.000, 0.170},
+{0.000, 0.732, 1.297, 0.452, 1.173, 1.165, 0.660, 0.616, 4.475, 0.571, 0.990, 0.620, 0.610, 1.440, 0.947, 1.962, 1.431, 0.830, 0.741, 0.597, 0.544, 0.750, 0.979, 1.512, 0.000, 0.170},
+{0.000, 0.889, 0.617, 0.588, 0.580, 0.626, 1.262, 0.557, 0.571, 2.830, 0.641, 1.749, 1.650, 0.660, 0.627, 0.625, 0.629, 0.723, 1.017, 2.338, 0.303, 0.750, 0.800, 0.626, 0.000, 0.170},
+{0.000, 0.766, 1.130, 0.286, 1.020, 0.988, 0.298, 0.678, 0.990, 0.641, 2.926, 0.519, 1.100, 1.258, 0.770, 1.184, 2.182, 0.962, 0.996, 0.570, 0.451, 0.750, 0.360, 1.073, 0.000, 0.170},
+{0.000, 0.646, 0.453, 0.248, 0.398, 0.464, 1.518, 0.395, 0.620, 1.749, 0.519, 3.930, 2.338, 0.516, 0.556, 0.665, 0.501, 0.524, 0.677, 1.532, 0.647, 0.750, 0.815, 0.551, 0.000, 0.170},
+{0.000, 0.770, 0.605, 0.299, 0.548, 0.614, 1.043, 0.525, 0.609, 1.650, 1.100, 2.338, 4.404, 0.670, 0.621, 0.796, 0.905, 0.699, 0.873, 1.510, 0.375, 0.750, 0.568, 0.693, 0.000, 0.170},
+{0.000, 1.040, 1.600, 0.433, 1.610, 1.393, 0.447, 1.087, 1.440, 0.660, 1.258, 0.516, 0.671, 1.588, 0.893, 1.197, 1.000, 1.173, 1.107, 0.668, 0.394, 0.750, 0.621, 1.307, 0.000, 0.170},
+{0.000, 1.294, 0.846, 0.527, 0.806, 0.880, 0.351, 0.894, 0.946, 0.627, 0.770, 0.556, 0.621, 0.893, 3.841, 1.055, 0.960, 1.243, 1.076, 0.758, 0.275, 0.750, 0.321, 0.956, 0.000, 0.170},
+{0.000, 0.903, 1.339, 0.290, 1.462, 1.772, 0.343, 0.757, 1.962, 0.625, 1.184, 0.665, 0.796, 1.197, 1.056, 2.541, 1.335, 0.890, 0.833, 0.649, 0.335, 0.750, 0.395, 2.107, 0.000, 0.170},
+{0.000, 0.701, 0.862, 0.430, 0.743, 0.782, 0.359, 0.555, 1.430, 0.629, 2.182, 0.501, 0.905, 1.000, 0.960, 1.335, 4.078, 0.928, 0.820, 0.561, 1.652, 0.750, 0.379, 1.023, 0.000, 0.170},
+{0.000, 1.290, 1.117, 0.990, 1.069, 0.999, 0.478, 1.278, 0.830, 0.722, 0.962, 0.524, 0.699, 1.173, 1.244, 0.890, 0.928, 1.441, 1.362, 0.799, 0.566, 0.750, 0.520, 0.951, 0.000, 0.170},
+{0.000, 1.317, 1.032, 0.601, 0.968, 0.914, 0.488, 0.999, 0.741, 1.016, 0.996, 0.677, 0.873, 1.107, 1.075, 0.833, 0.820, 1.362, 1.806, 1.068, 0.306, 0.750, 0.530, 0.879, 0.000, 0.170},
+{0.000, 1.045, 0.638, 0.639, 0.612, 0.660, 0.766, 0.735, 0.597, 2.338, 0.570, 1.532, 1.511, 0.668, 0.758, 0.649, 0.561, 0.799, 1.068, 2.698, 0.237, 0.750, 0.566, 0.656, 0.000, 0.170},
+{0.000, 0.264, 0.297, 0.169, 0.213, 0.201, 1.077, 0.200, 0.544, 0.303, 0.450, 0.647, 0.375, 0.394, 0.275, 0.335, 1.651, 0.566, 0.306, 0.237, 52.679, 0.750, 0.970, 0.259, 0.000, 0.170},
+{0.000, 0.750, 0.750, 0.750, 0.750, 0.750, 0.750, 0.750, 0.750, 0.750, 0.750, 0.750, 0.750, 0.750, 0.750, 0.750, 0.750, 0.750, 0.750, 0.750, 0.750, 0.750, 0.750, 0.750, 0.000, 0.170},
+{0.000, 0.450, 0.486, 1.078, 0.369, 0.372, 4.976, 0.300, 0.979, 0.800, 0.360, 0.815, 0.568, 0.621, 0.321, 0.395, 0.379, 0.520, 0.530, 0.566, 0.970, 0.750, 10.338, 0.382, 0.000, 0.170},
+{0.000, 1.000, 1.613, 0.293, 1.877, 2.134, 0.311, 0.919, 1.512, 0.626, 1.073, 0.551, 0.693, 1.307, 0.956, 2.107, 1.023, 0.951, 0.879, 0.656, 0.259, 0.750, 0.382, 2.122, 0.000, 0.170},
+{0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.170},
+{0.170, 0.170, 0.170, 0.170, 0.170, 0.170, 0.170, 0.170, 0.170, 0.170, 0.170, 0.170, 0.170, 0.170, 0.170, 0.170, 0.170, 0.170, 0.170, 0.170, 0.170, 0.170, 0.170, 0.170, 0.170, 1.333},
+};
+
+#define BLOSUM62_20A_SCALE_MULTIPLIER 0.9666
+#define BLOSUM62_20B_SCALE_MULTIPLIER 0.9344
+
+SFreqRatios*
+_PSIMatrixFrequencyRatiosNew(const char* matrix_name)
+{
+ unsigned int i, j; /* loop indices */
+ SFreqRatios* retval = NULL; /* the return value */
+
+ ASSERT(matrix_name);
+
+ retval = (SFreqRatios*) malloc(sizeof(SFreqRatios));
+ if ( !retval ) {
+ return NULL;
+ }
+
+ retval->data = (double**) _PSIAllocateMatrix(BLASTAA_SIZE, BLASTAA_SIZE,
+ sizeof(double));
+ if ( !retval->data ) {
+ return _PSIMatrixFrequencyRatiosFree(retval);
+ }
+
+ if ( !strcmp(matrix_name, "BLOSUM62") ||
+ !strcmp(matrix_name, "BLOSUM62_20")) {
+ for (i = 0; i < BLASTAA_SIZE; i++) {
+ for (j = 0; j < BLASTAA_SIZE; j++) {
+ retval->data[i][j] = BLOSUM62_FREQRATIOS[i][j];
+ }
+ }
+ retval->bit_scale_factor = 2;
+ } else if ( !strcmp(matrix_name, "BLOSUM62_20A")) {
+ for (i = 0; i < BLASTAA_SIZE; i++) {
+ for (j = 0; j < BLASTAA_SIZE; j++) {
+ retval->data[i][j] =
+ BLOSUM62_20A_SCALE_MULTIPLIER * BLOSUM62_FREQRATIOS[i][j];
+ }
+ }
+ retval->bit_scale_factor = 2;
+ } else if ( !strcmp(matrix_name, "BLOSUM62_20B")) {
+ for (i = 0; i < BLASTAA_SIZE; i++) {
+ for (j = 0; j < BLASTAA_SIZE; j++) {
+ retval->data[i][j] =
+ BLOSUM62_20B_SCALE_MULTIPLIER * BLOSUM62_FREQRATIOS[i][j];
+ }
+ }
+ retval->bit_scale_factor = 2;
+ } else if ( !strcmp(matrix_name, "BLOSUM45") ) {
+ for (i = 0; i < BLASTAA_SIZE; i++) {
+ for (j = 0; j < BLASTAA_SIZE; j++) {
+ retval->data[i][j] = BLOSUM45_FREQRATIOS[i][j];
+ }
+ }
+ retval->bit_scale_factor = 3;
+ } else if ( !strcmp(matrix_name, "BLOSUM80") ) {
+ for (i = 0; i < BLASTAA_SIZE; i++) {
+ for (j = 0; j < BLASTAA_SIZE; j++) {
+ retval->data[i][j] = BLOSUM80_FREQRATIOS[i][j];
+ }
+ }
+ retval->bit_scale_factor = 2;
+ } else if ( !strcmp(matrix_name, "BLOSUM50") ) {
+ for (i = 0; i < BLASTAA_SIZE; i++) {
+ for (j = 0; j < BLASTAA_SIZE; j++) {
+ retval->data[i][j] = BLOSUM50_FREQRATIOS[i][j];
+ }
+ }
+ retval->bit_scale_factor = 2;
+ } else if ( !strcmp(matrix_name, "BLOSUM90") ) {
+ for (i = 0; i < BLASTAA_SIZE; i++) {
+ for (j = 0; j < BLASTAA_SIZE; j++) {
+ retval->data[i][j] = BLOSUM90_FREQRATIOS[i][j];
+ }
+ }
+ retval->bit_scale_factor = 2;
+ } else if ( !strcmp(matrix_name, "PAM30") ) {
+ for (i = 0; i < BLASTAA_SIZE; i++) {
+ for (j = 0; j < BLASTAA_SIZE; j++) {
+ retval->data[i][j] = PAM30_FREQRATIOS[i][j];
+ }
+ }
+ retval->bit_scale_factor = 2;
+ } else if ( !strcmp(matrix_name, "PAM70") ) {
+ for (i = 0; i < BLASTAA_SIZE; i++) {
+ for (j = 0; j < BLASTAA_SIZE; j++) {
+ retval->data[i][j] = PAM70_FREQRATIOS[i][j];
+ }
+ }
+ retval->bit_scale_factor = 2;
+ } else if ( !strcmp(matrix_name, "PAM250") ) {
+ for (i = 0; i < BLASTAA_SIZE; i++) {
+ for (j = 0; j < BLASTAA_SIZE; j++) {
+ retval->data[i][j] = PAM250_FREQRATIOS[i][j];
+ }
+ }
+ retval->bit_scale_factor = 2;
+ } else {
+ retval = _PSIMatrixFrequencyRatiosFree(retval);
+ }
+
+ return retval;
+}
+
+SFreqRatios*
+_PSIMatrixFrequencyRatiosFree(SFreqRatios* freq_ratios)
+{
+ if ( !freq_ratios )
+ return NULL;
+
+ if (freq_ratios->data) {
+ _PSIDeallocateMatrix((void**) freq_ratios->data, BLASTAA_SIZE);
+ }
+
+ sfree(freq_ratios);
+ return NULL;
+}
+
+/*
+ * ===========================================================================
+ *
+ * $Log: matrix_freq_ratios.c,v $
+ * Revision 1.3 2004/06/09 14:21:03 camacho
+ * Removed msvc compiler warnings
+ *
+ * Revision 1.2 2004/05/13 14:56:05 camacho
+ * Fix for retrieving frequency ratios matrices
+ *
+ * Revision 1.1 2004/05/06 15:26:54 camacho
+ * Initial revision
+ *
+ *
+ * ===========================================================================
+ */
diff --git a/algo/blast/core/matrix_freq_ratios.h b/algo/blast/core/matrix_freq_ratios.h
index 24a869de..74399ec9 100644
--- a/algo/blast/core/matrix_freq_ratios.h
+++ b/algo/blast/core/matrix_freq_ratios.h
@@ -1,7 +1,7 @@
#ifndef ALGO_BLAST_CORE___MATRIX_FREQ_RATIOS__H
#define ALGO_BLAST_CORE___MATRIX_FREQ_RATIOS__H
-/* $Id: matrix_freq_ratios.h,v 1.1 2004/04/19 15:07:58 camacho Exp $
+/* $Id: matrix_freq_ratios.h,v 1.2 2004/05/06 15:28:11 camacho Exp $
* ===========================================================================
*
* PUBLIC DOMAIN NOTICE
@@ -31,7 +31,7 @@
*/
/** @file matrix_freq_ratios.h
- * Definitions for various scoring matrices' frequency ratios.
+ * Interface to retrieve the frequency ratios for various scoring matrices.
*
* See explanation in p 2996 of Nucleic Acids Research, 2001, Vol 29, No 14.
*/
@@ -42,295 +42,54 @@
extern "C" {
#endif
-/*underlying frequency ratios for BLOSUM62 as determined by Stephen Altschul;
- Stephen and Jorja Henikoff used different number for B,Z,X*/
-static const double BLOSUM62_FREQRATIOS[BLASTAA_SIZE][BLASTAA_SIZE] = {
- {0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000,
- 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000,
- 0.000, 0.000, 0.000, 0.250},
- {0.000, 3.903, 0.565, 0.868, 0.545, 0.741, 0.465, 1.057, 0.569, 0.632, 0.775,
- 0.602, 0.723, 0.588, 0.754, 0.757, 0.613, 1.472, 0.984, 0.936, 0.416, 0.750,
- 0.543, 0.747, 0.000, 0.250},
- {0.000, 0.565, 4.438, 0.345, 4.743, 1.335, 0.324, 0.739, 0.925, 0.334, 0.855,
- 0.297, 0.405, 4.071, 0.554, 0.944, 0.703, 1.058, 0.826, 0.351, 0.253, 0.750,
- 0.409, 1.184, 0.000, 0.250},
- {0.000, 0.868, 0.345, 19.577, 0.301, 0.286, 0.439, 0.420, 0.355, 0.653, 0.349,
- 0.642, 0.611, 0.398, 0.380, 0.366, 0.309, 0.738, 0.741, 0.756, 0.450, 0.750,
- 0.434, 0.317, 0.000, 0.250},
- {0.000, 0.545, 4.743, 0.301, 7.398, 1.688, 0.299, 0.634, 0.679, 0.339, 0.784,
- 0.287, 0.346, 1.554, 0.599, 0.897, 0.573, 0.913, 0.695, 0.337, 0.232, 0.750,
- 0.346, 1.382, 0.000, 0.250},
- {0.000, 0.741, 1.335, 0.286, 1.688, 5.470, 0.331, 0.481, 0.960, 0.331, 1.308,
- 0.373, 0.500, 0.911, 0.679, 1.902, 0.961, 0.950, 0.741, 0.429, 0.374, 0.750,
- 0.496, 4.090, 0.000, 0.250},
- {0.000, 0.465, 0.324, 0.439, 0.299, 0.331, 8.129, 0.341, 0.652, 0.946, 0.344,
- 1.155, 1.004, 0.354, 0.287, 0.334, 0.381, 0.440, 0.482, 0.745, 1.374, 0.750,
- 2.769, 0.332, 0.000, 0.250},
- {0.000, 1.057, 0.739, 0.420, 0.634, 0.481, 0.341, 6.876, 0.493, 0.275, 0.589,
- 0.284, 0.396, 0.864, 0.477, 0.539, 0.450, 0.904, 0.579, 0.337, 0.422, 0.750,
- 0.349, 0.503, 0.000, 0.250},
- {0.000, 0.569, 0.925, 0.355, 0.679, 0.960, 0.652, 0.493, 13.506, 0.326, 0.779,
- 0.381, 0.584, 1.222, 0.473, 1.168, 0.917, 0.737, 0.557, 0.339, 0.444, 0.750,
- 1.798, 1.040, 0.000, 0.250},
- {0.000, 0.632, 0.334, 0.653, 0.339, 0.331, 0.946, 0.275, 0.326, 3.998, 0.396,
- 1.694, 1.478, 0.328, 0.385, 0.383, 0.355, 0.443, 0.780, 2.417, 0.409, 0.750,
- 0.630, 0.351, 0.000, 0.250},
- {0.000, 0.775, 0.855, 0.349, 0.784, 1.308, 0.344, 0.589, 0.779, 0.396, 4.764,
- 0.428, 0.625, 0.940, 0.704, 1.554, 2.077, 0.932, 0.793, 0.457, 0.359, 0.750,
- 0.532, 1.403, 0.000, 0.250},
- {0.000, 0.602, 0.297, 0.642, 0.287, 0.373, 1.155, 0.284, 0.381, 1.694, 0.428,
- 3.797, 1.994, 0.310, 0.371, 0.477, 0.474, 0.429, 0.660, 1.314, 0.568, 0.750,
- 0.692, 0.413, 0.000, 0.250},
- {0.000, 0.723, 0.405, 0.611, 0.346, 0.500, 1.004, 0.396, 0.584, 1.478, 0.625,
- 1.994, 6.481, 0.474, 0.424, 0.864, 0.623, 0.599, 0.794, 1.269, 0.610, 0.750,
- 0.708, 0.641, 0.000, 0.250},
- {0.000, 0.588, 4.071, 0.398, 1.554, 0.911, 0.354, 0.864, 1.222, 0.328, 0.940,
- 0.310, 0.474, 7.094, 0.500, 1.001, 0.859, 1.232, 0.984, 0.369, 0.278, 0.750,
- 0.486, 0.946, 0.000, 0.250},
- {0.000, 0.754, 0.554, 0.380, 0.599, 0.679, 0.287, 0.477, 0.473, 0.385, 0.704,
- 0.371, 0.424, 0.500, 12.838, 0.641, 0.481, 0.755, 0.689, 0.443, 0.282, 0.750,
- 0.363, 0.664, 0.000, 0.250},
- {0.000, 0.757, 0.944, 0.366, 0.897, 1.902, 0.334, 0.539, 1.168, 0.383, 1.554,
- 0.477, 0.864, 1.001, 0.641, 6.244, 1.406, 0.966, 0.791, 0.467, 0.509, 0.750,
- 0.611, 3.582, 0.000, 0.250},
- {0.000, 0.613, 0.703, 0.309, 0.573, 0.961, 0.381, 0.450, 0.917, 0.355, 2.077,
- 0.474, 0.623, 0.859, 0.481, 1.406, 6.666, 0.767, 0.678, 0.420, 0.395, 0.750,
- 0.556, 1.133, 0.000, 0.250},
- {0.000, 1.472, 1.058, 0.738, 0.913, 0.950, 0.440, 0.904, 0.737, 0.443, 0.932,
- 0.429, 0.599, 1.232, 0.755, 0.966, 0.767, 3.843, 1.614, 0.565, 0.385, 0.750,
- 0.557, 0.956, 0.000, 0.250},
- {0.000, 0.984, 0.826, 0.741, 0.695, 0.741, 0.482, 0.579, 0.557, 0.780, 0.793,
- 0.660, 0.794, 0.984, 0.689, 0.791, 0.678, 1.614, 4.832, 0.981, 0.431, 0.750,
- 0.573, 0.761, 0.000, 0.250},
- {0.000, 0.936, 0.351, 0.756, 0.337, 0.429, 0.745, 0.337, 0.339, 2.417, 0.457,
- 1.314, 1.269, 0.369, 0.443, 0.467, 0.420, 0.565, 0.981, 3.692, 0.374, 0.750,
- 0.658, 0.444, 0.000, 0.250},
- {0.000, 0.416, 0.253, 0.450, 0.232, 0.374, 1.374, 0.422, 0.444, 0.409, 0.359,
- 0.568, 0.610, 0.278, 0.282, 0.509, 0.395, 0.385, 0.431, 0.374, 38.108, 0.750,
- 2.110, 0.426, 0.000, 0.250},
- {0.000, 0.750, 0.750, 0.750, 0.750, 0.750, 0.750, 0.750, 0.750, 0.750, 0.750,
- 0.750, 0.750, 0.750, 0.750, 0.750, 0.750, 0.750, 0.750, 0.750, 0.750, 0.750,
- 0.750, 0.750, 0.000, 0.250},
- {0.000, 0.543, 0.409, 0.434, 0.346, 0.496, 2.769, 0.349, 1.798, 0.630, 0.532,
- 0.692, 0.708, 0.486, 0.363, 0.611, 0.556, 0.557, 0.573, 0.658, 2.110, 0.750,
- 9.832, 0.541, 0.000, 0.250},
- {0.000, 0.747, 1.184, 0.317, 1.382, 4.090, 0.332, 0.503, 1.040, 0.351, 1.403,
- 0.413, 0.641, 0.946, 0.664, 3.582, 1.133, 0.956, 0.761, 0.444, 0.426, 0.750,
- 0.541, 3.893, 0.000, 0.250},
- {0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000,
- 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000,
- 0.000, 0.000, 0.000, 0.250},
- {0.250, 0.250, 0.250, 0.250, 0.250, 0.250, 0.250, 0.250, 0.250, 0.250, 0.250,
- 0.250, 0.250, 0.250, 0.250, 0.250, 0.250, 0.250, 0.250, 0.250, 0.250, 0.250,
- 0.250, 0.250, 0.250, 1.333},
-};
+/** Stores the frequency ratios along with their bit scale factor */
+typedef struct SFreqRatios {
-static const double PAM30_FREQRATIOS[BLASTAA_SIZE][BLASTAA_SIZE] = {
-{0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.003},
-{0.000, 7.789, 0.302, 0.108, 0.317, 0.453, 0.057, 0.576, 0.083, 0.199, 0.095, 0.115, 0.189, 0.285, 0.593, 0.235, 0.091, 0.875, 0.827, 0.477, 0.010, 0.750, 0.070, 0.358, 0.000, 0.003},
-{0.000, 0.302, 8.118, 0.015, 8.456, 1.472, 0.027, 0.338, 0.664, 0.126, 0.441, 0.049, 0.034, 7.726, 0.102, 0.368, 0.082, 0.604, 0.353, 0.067, 0.032, 0.750, 0.114, 0.991, 0.000, 0.003},
-{0.000, 0.108, 0.015, 27.591, 0.008, 0.008, 0.012, 0.042, 0.078, 0.119, 0.008, 0.006, 0.009, 0.024, 0.063, 0.008, 0.068, 0.379, 0.068, 0.132, 0.005, 0.750, 0.258, 0.008, 0.000, 0.003},
-{0.000, 0.317, 8.456, 0.008, 14.236, 2.342, 0.006, 0.326, 0.271, 0.080, 0.220, 0.014, 0.023, 1.756, 0.068, 0.427, 0.029, 0.286, 0.200, 0.065, 0.005, 0.750, 0.020, 1.508, 0.000, 0.003},
-{0.000, 0.453, 1.472, 0.008, 2.343, 13.663, 0.008, 0.232, 0.181, 0.150, 0.226, 0.043, 0.092, 0.464, 0.153, 1.507, 0.043, 0.226, 0.130, 0.110, 0.003, 0.750, 0.057, 8.365, 0.000, 0.003},
-{0.000, 0.057, 0.027, 0.012, 0.006, 0.008, 21.408, 0.044, 0.132, 0.473, 0.009, 0.416, 0.250, 0.051, 0.036, 0.011, 0.042, 0.114, 0.051, 0.067, 0.213, 0.750, 1.786, 0.009, 0.000, 0.003},
-{0.000, 0.576, 0.338, 0.042, 0.326, 0.232, 0.044, 9.314, 0.048, 0.024, 0.086, 0.028, 0.057, 0.351, 0.122, 0.094, 0.040, 0.553, 0.135, 0.149, 0.006, 0.750, 0.009, 0.172, 0.000, 0.003},
-{0.000, 0.083, 0.664, 0.078, 0.271, 0.181, 0.132, 0.048, 22.927, 0.044, 0.120, 0.123, 0.028, 1.119, 0.248, 1.359, 0.587, 0.132, 0.086, 0.115, 0.081, 0.750, 0.324, 0.695, 0.000, 0.003},
-{0.000, 0.199, 0.126, 0.119, 0.080, 0.150, 0.473, 0.024, 0.044, 18.632, 0.127, 0.643, 0.793, 0.179, 0.053, 0.070, 0.159, 0.097, 0.444, 1.938, 0.009, 0.750, 0.119, 0.115, 0.000, 0.003},
-{0.000, 0.095, 0.441, 0.008, 0.220, 0.226, 0.009, 0.086, 0.120, 0.127, 9.988, 0.061, 0.563, 0.697, 0.106, 0.384, 1.111, 0.260, 0.336, 0.046, 0.018, 0.750, 0.042, 0.295, 0.000, 0.003},
-{0.000, 0.115, 0.049, 0.006, 0.014, 0.043, 0.416, 0.028, 0.123, 0.643, 0.061, 10.019, 1.242, 0.090, 0.090, 0.185, 0.053, 0.058, 0.103, 0.463, 0.127, 0.750, 0.095, 0.105, 0.000, 0.003},
-{0.000, 0.189, 0.034, 0.009, 0.023, 0.092, 0.250, 0.057, 0.028, 0.792, 0.563, 1.242, 46.604, 0.047, 0.065, 0.272, 0.243, 0.161, 0.263, 0.631, 0.013, 0.750, 0.021, 0.171, 0.000, 0.003},
-{0.000, 0.285, 7.726, 0.024, 1.756, 0.464, 0.051, 0.351, 1.119, 0.179, 0.697, 0.090, 0.047, 14.647, 0.142, 0.299, 0.144, 0.972, 0.531, 0.070, 0.064, 0.750, 0.223, 0.392, 0.000, 0.003},
-{0.000, 0.593, 0.102, 0.063, 0.068, 0.153, 0.036, 0.122, 0.248, 0.053, 0.106, 0.090, 0.065, 0.142, 15.809, 0.376, 0.270, 0.571, 0.240, 0.140, 0.009, 0.750, 0.010, 0.251, 0.000, 0.003},
-{0.000, 0.235, 0.368, 0.008, 0.427, 1.507, 0.011, 0.094, 1.360, 0.070, 0.384, 0.185, 0.272, 0.299, 0.376, 18.136, 0.585, 0.168, 0.150, 0.103, 0.012, 0.750, 0.017, 8.754, 0.000, 0.003},
-{0.000, 0.091, 0.082, 0.068, 0.029, 0.043, 0.042, 0.040, 0.587, 0.159, 1.111, 0.053, 0.243, 0.144, 0.270, 0.585, 18.926, 0.355, 0.106, 0.074, 0.517, 0.750, 0.030, 0.279, 0.000, 0.003},
-{0.000, 0.875, 0.604, 0.379, 0.286, 0.226, 0.114, 0.553, 0.132, 0.097, 0.260, 0.058, 0.161, 0.972, 0.571, 0.168, 0.355, 9.028, 1.145, 0.118, 0.179, 0.750, 0.097, 0.200, 0.000, 0.003},
-{0.000, 0.827, 0.353, 0.068, 0.200, 0.130, 0.051, 0.135, 0.086, 0.444, 0.336, 0.103, 0.263, 0.531, 0.240, 0.150, 0.106, 1.145, 11.695, 0.391, 0.013, 0.750, 0.112, 0.139, 0.000, 0.003},
-{0.000, 0.477, 0.067, 0.132, 0.065, 0.110, 0.067, 0.149, 0.115, 1.938, 0.046, 0.463, 0.631, 0.070, 0.141, 0.103, 0.074, 0.118, 0.391, 11.609, 0.005, 0.750, 0.081, 0.107, 0.000, 0.003},
-{0.000, 0.010, 0.032, 0.005, 0.005, 0.003, 0.213, 0.006, 0.081, 0.009, 0.018, 0.127, 0.013, 0.064, 0.009, 0.012, 0.517, 0.179, 0.013, 0.005, 88.722, 0.750, 0.173, 0.007, 0.000, 0.003},
-{0.000, 0.750, 0.750, 0.750, 0.750, 0.750, 0.750, 0.750, 0.750, 0.750, 0.750, 0.750, 0.750, 0.750, 0.750, 0.750, 0.750, 0.750, 0.750, 0.750, 0.750, 0.750, 0.750, 0.750, 0.000, 0.003},
-{0.000, 0.070, 0.114, 0.258, 0.020, 0.057, 1.786, 0.009, 0.324, 0.119, 0.042, 0.095, 0.021, 0.223, 0.010, 0.017, 0.030, 0.097, 0.112, 0.081, 0.173, 0.750, 28.442, 0.039, 0.000, 0.003},
-{0.000, 0.358, 0.991, 0.008, 1.508, 8.365, 0.009, 0.172, 0.695, 0.115, 0.295, 0.105, 0.171, 0.392, 0.251, 8.754, 0.279, 0.200, 0.139, 0.107, 0.007, 0.750, 0.039, 8.535, 0.000, 0.003},
-{0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.003},
-{0.003, 0.003, 0.003, 0.003, 0.003, 0.003, 0.003, 0.003, 0.003, 0.003, 0.003, 0.003, 0.003, 0.003, 0.003, 0.003, 0.003, 0.003, 0.003, 0.003, 0.003, 0.003, 0.003, 0.003, 0.003, 1.333},
-};
+ /** The actual frequency ratios */
+ double** data;
-static const double PAM70_FREQRATIOS[BLASTAA_SIZE][BLASTAA_SIZE] = {
-{0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.002},
-{0.000, 4.900, 0.605, 0.242, 0.619, 0.771, 0.135, 1.015, 0.221, 0.434, 0.249, 0.250, 0.376, 0.589, 1.030, 0.466, 0.227, 1.350, 1.327, 0.809, 0.043, 0.750, 0.154, 0.638, 0.000, 0.002},
-{0.000, 0.605, 5.422, 0.066, 5.890, 2.249, 0.079, 0.649, 1.099, 0.253, 0.778, 0.120, 0.140, 4.879, 0.274, 0.790, 0.253, 0.955, 0.654, 0.185, 0.078, 0.750, 0.223, 1.613, 0.000, 0.002},
-{0.000, 0.242, 0.066, 24.836, 0.040, 0.039, 0.055, 0.116, 0.165, 0.246, 0.038, 0.030, 0.044, 0.097, 0.159, 0.039, 0.151, 0.665, 0.186, 0.274, 0.022, 0.750, 0.523, 0.039, 0.000, 0.002},
-{0.000, 0.618, 5.890, 0.040, 8.900, 3.357, 0.034, 0.639, 0.600, 0.193, 0.484, 0.064, 0.103, 2.400, 0.213, 0.922, 0.139, 0.600, 0.442, 0.173, 0.026, 0.750, 0.080, 2.296, 0.000, 0.002},
-{0.000, 0.771, 2.249, 0.039, 3.357, 8.654, 0.038, 0.492, 0.480, 0.284, 0.466, 0.117, 0.212, 0.965, 0.347, 2.258, 0.179, 0.479, 0.338, 0.247, 0.019, 0.750, 0.119, 5.867, 0.000, 0.002},
-{0.000, 0.135, 0.079, 0.055, 0.034, 0.038, 17.455, 0.100, 0.277, 0.846, 0.041, 0.824, 0.508, 0.131, 0.089, 0.052, 0.099, 0.216, 0.138, 0.208, 0.452, 0.750, 3.378, 0.044, 0.000, 0.002},
-{0.000, 1.015, 0.649, 0.116, 0.639, 0.492, 0.100, 7.309, 0.145, 0.107, 0.210, 0.081, 0.144, 0.660, 0.308, 0.237, 0.122, 0.964, 0.370, 0.313, 0.027, 0.750, 0.044, 0.381, 0.000, 0.002},
-{0.000, 0.221, 1.099, 0.165, 0.600, 0.480, 0.277, 0.145, 16.422, 0.140, 0.335, 0.259, 0.122, 1.677, 0.493, 2.188, 1.058, 0.327, 0.230, 0.233, 0.186, 0.750, 0.611, 1.224, 0.000, 0.002},
-{0.000, 0.434, 0.253, 0.246, 0.193, 0.284, 0.846, 0.107, 0.140, 11.749, 0.266, 1.192, 1.369, 0.322, 0.160, 0.189, 0.301, 0.254, 0.761, 3.004, 0.044, 0.750, 0.281, 0.242, 0.000, 0.002},
-{0.000, 0.249, 0.778, 0.038, 0.484, 0.466, 0.041, 0.210, 0.335, 0.266, 7.610, 0.153, 0.948, 1.119, 0.256, 0.722, 1.937, 0.520, 0.620, 0.139, 0.079, 0.750, 0.101, 0.578, 0.000, 0.002},
-{0.000, 0.250, 0.120, 0.030, 0.064, 0.117, 0.824, 0.081, 0.259, 1.192, 0.153, 8.228, 2.130, 0.185, 0.199, 0.351, 0.137, 0.149, 0.240, 0.908, 0.267, 0.750, 0.239, 0.219, 0.000, 0.002},
-{0.000, 0.376, 0.140, 0.044, 0.103, 0.212, 0.508, 0.144, 0.122, 1.369, 0.948, 2.129, 28.592, 0.182, 0.176, 0.486, 0.488, 0.319, 0.498, 1.133, 0.060, 0.750, 0.097, 0.332, 0.000, 0.002},
-{0.000, 0.589, 4.879, 0.097, 2.400, 0.965, 0.131, 0.660, 1.677, 0.322, 1.119, 0.185, 0.182, 7.754, 0.344, 0.637, 0.387, 1.366, 0.899, 0.199, 0.138, 0.750, 0.390, 0.822, 0.000, 0.002},
-{0.000, 1.030, 0.274, 0.159, 0.213, 0.347, 0.089, 0.308, 0.493, 0.160, 0.256, 0.199, 0.176, 0.344, 11.871, 0.684, 0.527, 0.979, 0.531, 0.307, 0.042, 0.750, 0.047, 0.494, 0.000, 0.002},
-{0.000, 0.466, 0.790, 0.039, 0.923, 2.258, 0.052, 0.237, 2.188, 0.189, 0.722, 0.351, 0.486, 0.637, 0.684, 11.469, 1.024, 0.373, 0.329, 0.232, 0.055, 0.750, 0.074, 6.272, 0.000, 0.002},
-{0.000, 0.227, 0.253, 0.151, 0.139, 0.179, 0.099, 0.122, 1.058, 0.301, 1.937, 0.137, 0.488, 0.387, 0.527, 1.024, 13.660, 0.610, 0.284, 0.176, 0.991, 0.750, 0.086, 0.547, 0.000, 0.002},
-{0.000, 1.350, 0.955, 0.665, 0.600, 0.479, 0.216, 0.964, 0.327, 0.254, 0.520, 0.149, 0.319, 1.366, 0.979, 0.373, 0.610, 5.204, 1.695, 0.303, 0.324, 0.750, 0.208, 0.433, 0.000, 0.002},
-{0.000, 1.327, 0.654, 0.186, 0.442, 0.338, 0.138, 0.370, 0.230, 0.761, 0.620, 0.240, 0.498, 0.899, 0.531, 0.329, 0.284, 1.695, 7.337, 0.719, 0.055, 0.750, 0.228, 0.334, 0.000, 0.002},
-{0.000, 0.809, 0.185, 0.275, 0.173, 0.247, 0.208, 0.313, 0.233, 3.003, 0.139, 0.908, 1.133, 0.199, 0.307, 0.232, 0.176, 0.303, 0.719, 8.211, 0.027, 0.750, 0.181, 0.240, 0.000, 0.002},
-{0.000, 0.043, 0.078, 0.022, 0.026, 0.019, 0.452, 0.027, 0.186, 0.044, 0.079, 0.267, 0.060, 0.138, 0.042, 0.055, 0.991, 0.324, 0.054, 0.027, 80.645, 0.750, 0.377, 0.035, 0.000, 0.002},
-{0.000, 0.750, 0.750, 0.750, 0.750, 0.750, 0.750, 0.750, 0.750, 0.750, 0.750, 0.750, 0.750, 0.750, 0.750, 0.750, 0.750, 0.750, 0.750, 0.750, 0.750, 0.750, 0.750, 0.750, 0.000, 0.002},
-{0.000, 0.154, 0.223, 0.523, 0.080, 0.119, 3.379, 0.044, 0.611, 0.281, 0.101, 0.239, 0.097, 0.390, 0.047, 0.074, 0.086, 0.208, 0.228, 0.181, 0.377, 0.750, 23.141, 0.099, 0.000, 0.002},
-{0.000, 0.638, 1.613, 0.039, 2.296, 5.867, 0.044, 0.381, 1.224, 0.242, 0.578, 0.219, 0.332, 0.822, 0.494, 6.272, 0.547, 0.433, 0.334, 0.240, 0.035, 0.750, 0.099, 6.043, 0.000, 0.002},
-{0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.002},
-{0.002, 0.002, 0.002, 0.002, 0.002, 0.002, 0.002, 0.002, 0.002, 0.002, 0.002, 0.002, 0.002, 0.002, 0.002, 0.002, 0.002, 0.002, 0.002, 0.002, 0.002, 0.002, 0.002, 0.002, 0.002, 1.333},
-};
+ /** Used to multiply the values in the above matrix to obtain scores in bit
+ * units */
+ int bit_scale_factor;
-static const double BLOSUM45_FREQRATIOS[BLASTAA_SIZE][BLASTAA_SIZE] = {
-{0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.290},
-{0.000, 2.950, 0.735, 0.800, 0.689, 0.825, 0.587, 1.080, 0.654, 0.747, 0.786, 0.712, 0.821, 0.789, 0.709, 0.867, 0.700, 1.300, 1.001, 1.010, 0.565, 0.750, 0.639, 0.841, 0.000, 0.290},
-{0.000, 0.735, 3.260, 0.600, 3.594, 1.300, 0.496, 0.886, 1.088, 0.497, 1.023, 0.472, 0.580, 2.862, 0.686, 0.992, 0.829, 1.053, 0.985, 0.520, 0.381, 0.750, 0.627, 1.182, 0.000, 0.290},
-{0.000, 0.800, 0.600, 17.090, 0.533, 0.545, 0.602, 0.557, 0.491, 0.543, 0.547, 0.673, 0.604, 0.680, 0.411, 0.486, 0.472, 0.797, 0.822, 0.715, 0.334, 0.750, 0.489, 0.523, 0.000, 0.290},
-{0.000, 0.689, 3.594, 0.533, 5.356, 1.643, 0.431, 0.740, 0.976, 0.440, 0.942, 0.463, 0.494, 1.502, 0.724, 0.958, 0.771, 0.929, 0.876, 0.494, 0.373, 0.750, 0.645, 1.381, 0.000, 0.290},
-{0.000, 0.825, 1.300, 0.545, 1.643, 3.873, 0.498, 0.576, 0.962, 0.485, 1.277, 0.571, 0.615, 0.893, 0.911, 1.531, 1.011, 0.912, 0.833, 0.555, 0.519, 0.750, 0.617, 2.978, 0.000, 0.290},
-{0.000, 0.587, 0.496, 0.602, 0.431, 0.498, 5.748, 0.480, 0.679, 1.064, 0.529, 1.303, 1.063, 0.572, 0.451, 0.444, 0.590, 0.610, 0.716, 0.953, 1.355, 0.750, 2.185, 0.477, 0.000, 0.290},
-{0.000, 1.080, 0.886, 0.557, 0.740, 0.576, 0.480, 5.071, 0.662, 0.416, 0.678, 0.450, 0.585, 1.059, 0.702, 0.687, 0.570, 1.058, 0.693, 0.479, 0.591, 0.750, 0.549, 0.619, 0.000, 0.290},
-{0.000, 0.654, 1.088, 0.491, 0.976, 0.962, 0.679, 0.662, 9.512, 0.453, 0.890, 0.670, 0.918, 1.220, 0.661, 1.151, 0.973, 0.854, 0.706, 0.457, 0.452, 0.750, 1.472, 1.034, 0.000, 0.290},
-{0.000, 0.747, 0.497, 0.543, 0.440, 0.485, 1.064, 0.416, 0.453, 3.233, 0.532, 1.596, 1.455, 0.564, 0.610, 0.578, 0.488, 0.618, 0.848, 2.176, 0.565, 0.750, 0.906, 0.521, 0.000, 0.290},
-{0.000, 0.786, 1.023, 0.547, 0.942, 1.277, 0.529, 0.678, 0.890, 0.532, 3.327, 0.554, 0.738, 1.119, 0.781, 1.330, 1.943, 0.890, 0.885, 0.592, 0.562, 0.750, 0.737, 1.297, 0.000, 0.290},
-{0.000, 0.712, 0.472, 0.673, 0.463, 0.571, 1.303, 0.450, 0.670, 1.596, 0.554, 2.997, 1.731, 0.484, 0.478, 0.642, 0.601, 0.556, 0.781, 1.334, 0.671, 0.750, 0.965, 0.598, 0.000, 0.290},
-{0.000, 0.821, 0.580, 0.604, 0.494, 0.615, 1.063, 0.585, 0.918, 1.455, 0.738, 1.731, 4.114, 0.682, 0.644, 0.941, 0.776, 0.660, 0.860, 1.236, 0.634, 0.750, 1.023, 0.739, 0.000, 0.290},
-{0.000, 0.789, 2.862, 0.680, 1.502, 0.893, 0.572, 1.059, 1.220, 0.564, 1.119, 0.484, 0.682, 4.478, 0.640, 1.032, 0.898, 1.200, 1.115, 0.552, 0.390, 0.750, 0.606, 0.946, 0.000, 0.290},
-{0.000, 0.709, 0.686, 0.411, 0.724, 0.911, 0.451, 0.702, 0.661, 0.610, 0.781, 0.478, 0.644, 0.640, 8.819, 0.716, 0.582, 0.750, 0.856, 0.540, 0.525, 0.750, 0.479, 0.836, 0.000, 0.290},
-{0.000, 0.867, 0.992, 0.486, 0.958, 1.531, 0.444, 0.687, 1.151, 0.578, 1.330, 0.642, 0.941, 1.032, 0.716, 4.407, 1.329, 1.092, 0.781, 0.547, 0.645, 0.750, 0.829, 2.630, 0.000, 0.290},
-{0.000, 0.700, 0.829, 0.472, 0.771, 1.011, 0.590, 0.570, 0.973, 0.488, 1.943, 0.601, 0.776, 0.898, 0.582, 1.329, 4.747, 0.799, 0.715, 0.578, 0.580, 0.750, 0.807, 1.132, 0.000, 0.290},
-{0.000, 1.300, 1.053, 0.797, 0.929, 0.912, 0.610, 1.058, 0.854, 0.618, 0.890, 0.556, 0.660, 1.200, 0.750, 1.092, 0.799, 2.782, 1.472, 0.728, 0.428, 0.750, 0.706, 0.981, 0.000, 0.290},
-{0.000, 1.001, 0.985, 0.822, 0.876, 0.833, 0.716, 0.693, 0.706, 0.848, 0.885, 0.781, 0.860, 1.115, 0.856, 0.781, 0.715, 1.472, 3.139, 1.040, 0.454, 0.750, 0.744, 0.813, 0.000, 0.290},
-{0.000, 1.010, 0.520, 0.715, 0.494, 0.555, 0.953, 0.479, 0.457, 2.176, 0.592, 1.334, 1.236, 0.552, 0.540, 0.547, 0.578, 0.728, 1.040, 2.871, 0.473, 0.750, 0.809, 0.552, 0.000, 0.290},
-{0.000, 0.565, 0.381, 0.334, 0.373, 0.519, 1.355, 0.591, 0.452, 0.565, 0.562, 0.671, 0.634, 0.390, 0.525, 0.645, 0.580, 0.428, 0.454, 0.473, 29.702, 0.750, 1.801, 0.567, 0.000, 0.290},
-{0.000, 0.750, 0.750, 0.750, 0.750, 0.750, 0.750, 0.750, 0.750, 0.750, 0.750, 0.750, 0.750, 0.750, 0.750, 0.750, 0.750, 0.750, 0.750, 0.750, 0.750, 0.750, 0.750, 0.750, 0.000, 0.290},
-{0.000, 0.639, 0.627, 0.489, 0.645, 0.617, 2.185, 0.549, 1.472, 0.906, 0.737, 0.965, 1.023, 0.606, 0.479, 0.829, 0.807, 0.706, 0.744, 0.809, 1.801, 0.750, 5.753, 0.698, 0.000, 0.290},
-{0.000, 0.841, 1.182, 0.523, 1.381, 2.978, 0.477, 0.619, 1.034, 0.521, 1.297, 0.598, 0.739, 0.946, 0.836, 2.630, 1.132, 0.981, 0.813, 0.552, 0.567, 0.750, 0.698, 2.845, 0.000, 0.290},
-{0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.290},
-{0.290, 0.290, 0.290, 0.290, 0.290, 0.290, 0.290, 0.290, 0.290, 0.290, 0.290, 0.290, 0.290, 0.290, 0.290, 0.290, 0.290, 0.290, 0.290, 0.290, 0.290, 0.290, 0.290, 0.290, 0.290, 1.333},
-};
+} SFreqRatios;
-static const double BLOSUM80_FREQRATIOS[BLASTAA_SIZE][BLASTAA_SIZE] = {
-{0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.140},
-{0.000, 4.773, 0.477, 0.732, 0.451, 0.703, 0.397, 0.957, 0.514, 0.543, 0.723, 0.505, 0.625, 0.510, 0.771, 0.696, 0.555, 1.535, 0.980, 0.866, 0.309, 0.750, 0.436, 0.700, 0.000, 0.140},
-{0.000, 0.477, 5.362, 0.252, 5.759, 1.269, 0.252, 0.639, 0.830, 0.234, 0.793, 0.221, 0.310, 4.868, 0.428, 0.850, 0.609, 0.948, 0.743, 0.268, 0.179, 0.750, 0.307, 1.109, 0.000, 0.140},
-{0.000, 0.732, 0.252, 20.702, 0.214, 0.180, 0.395, 0.272, 0.221, 0.581, 0.241, 0.493, 0.499, 0.300, 0.269, 0.295, 0.233, 0.576, 0.602, 0.634, 0.302, 0.750, 0.308, 0.224, 0.000, 0.140},
-{0.000, 0.451, 5.759, 0.214, 9.106, 1.635, 0.234, 0.541, 0.594, 0.214, 0.677, 0.197, 0.252, 1.584, 0.452, 0.763, 0.477, 0.774, 0.611, 0.245, 0.145, 0.750, 0.245, 1.303, 0.000, 0.140},
-{0.000, 0.703, 1.269, 0.180, 1.635, 6.995, 0.249, 0.399, 0.901, 0.264, 1.195, 0.276, 0.429, 0.811, 0.581, 1.906, 0.832, 0.845, 0.685, 0.369, 0.241, 0.750, 0.333, 5.054, 0.000, 0.140},
-{0.000, 0.397, 0.252, 0.395, 0.234, 0.249, 9.486, 0.249, 0.572, 0.841, 0.283, 1.114, 0.893, 0.273, 0.237, 0.285, 0.287, 0.369, 0.445, 0.649, 1.089, 0.750, 2.780, 0.263, 0.000, 0.140},
-{0.000, 0.957, 0.639, 0.272, 0.541, 0.399, 0.249, 7.882, 0.387, 0.184, 0.483, 0.210, 0.286, 0.761, 0.347, 0.425, 0.377, 0.784, 0.492, 0.251, 0.264, 0.750, 0.230, 0.409, 0.000, 0.140},
-{0.000, 0.514, 0.830, 0.221, 0.594, 0.901, 0.572, 0.387, 16.070, 0.258, 0.740, 0.314, 0.432, 1.124, 0.420, 1.316, 0.925, 0.661, 0.540, 0.289, 0.390, 0.750, 1.819, 1.059, 0.000, 0.140},
-{0.000, 0.543, 0.234, 0.581, 0.214, 0.264, 0.841, 0.184, 0.258, 4.868, 0.313, 1.665, 1.512, 0.258, 0.286, 0.309, 0.299, 0.379, 0.701, 2.496, 0.343, 0.750, 0.539, 0.281, 0.000, 0.140},
-{0.000, 0.723, 0.793, 0.241, 0.677, 1.195, 0.283, 0.483, 0.740, 0.313, 6.326, 0.357, 0.534, 0.938, 0.597, 1.524, 2.192, 0.820, 0.736, 0.370, 0.241, 0.750, 0.408, 1.320, 0.000, 0.140},
-{0.000, 0.505, 0.221, 0.493, 0.197, 0.276, 1.114, 0.210, 0.314, 1.665, 0.357, 4.463, 2.123, 0.250, 0.303, 0.407, 0.363, 0.368, 0.561, 1.220, 0.439, 0.750, 0.581, 0.326, 0.000, 0.140},
-{0.000, 0.625, 0.310, 0.499, 0.252, 0.429, 0.893, 0.286, 0.432, 1.512, 0.534, 2.123, 8.883, 0.382, 0.362, 0.887, 0.506, 0.498, 0.758, 1.224, 0.561, 0.750, 0.550, 0.603, 0.000, 0.140},
-{0.000, 0.510, 4.868, 0.300, 1.584, 0.811, 0.273, 0.761, 1.124, 0.258, 0.938, 0.250, 0.382, 8.963, 0.398, 0.958, 0.773, 1.165, 0.908, 0.297, 0.221, 0.750, 0.385, 0.867, 0.000, 0.140},
-{0.000, 0.771, 0.428, 0.269, 0.452, 0.581, 0.237, 0.347, 0.420, 0.286, 0.597, 0.303, 0.362, 0.398, 15.155, 0.538, 0.446, 0.652, 0.560, 0.370, 0.178, 0.750, 0.258, 0.565, 0.000, 0.140},
-{0.000, 0.696, 0.850, 0.295, 0.763, 1.906, 0.285, 0.425, 1.316, 0.309, 1.524, 0.407, 0.887, 0.958, 0.538, 8.340, 1.394, 0.859, 0.724, 0.411, 0.408, 0.750, 0.462, 4.360, 0.000, 0.140},
-{0.000, 0.555, 0.609, 0.233, 0.477, 0.832, 0.287, 0.377, 0.925, 0.299, 2.192, 0.363, 0.506, 0.773, 0.446, 1.394, 8.245, 0.695, 0.598, 0.354, 0.294, 0.750, 0.418, 1.046, 0.000, 0.140},
-{0.000, 1.535, 0.948, 0.576, 0.774, 0.845, 0.369, 0.784, 0.661, 0.379, 0.820, 0.368, 0.498, 1.165, 0.652, 0.859, 0.695, 5.106, 1.663, 0.494, 0.271, 0.750, 0.462, 0.850, 0.000, 0.140},
-{0.000, 0.980, 0.743, 0.602, 0.611, 0.685, 0.445, 0.492, 0.540, 0.701, 0.736, 0.561, 0.758, 0.908, 0.560, 0.724, 0.598, 1.663, 6.205, 0.891, 0.285, 0.750, 0.474, 0.700, 0.000, 0.140},
-{0.000, 0.866, 0.268, 0.634, 0.245, 0.369, 0.649, 0.251, 0.289, 2.496, 0.370, 1.220, 1.224, 0.297, 0.370, 0.411, 0.354, 0.494, 0.891, 4.584, 0.342, 0.750, 0.489, 0.385, 0.000, 0.140},
-{0.000, 0.309, 0.179, 0.302, 0.145, 0.241, 1.089, 0.264, 0.390, 0.343, 0.241, 0.439, 0.561, 0.221, 0.178, 0.408, 0.294, 0.271, 0.285, 0.342, 41.552, 0.750, 2.036, 0.304, 0.000, 0.140},
-{0.000, 0.750, 0.750, 0.750, 0.750, 0.750, 0.750, 0.750, 0.750, 0.750, 0.750, 0.750, 0.750, 0.750, 0.750, 0.750, 0.750, 0.750, 0.750, 0.750, 0.750, 0.750, 0.750, 0.750, 0.000, 0.140},
-{0.000, 0.436, 0.307, 0.308, 0.245, 0.333, 2.780, 0.230, 1.819, 0.539, 0.408, 0.581, 0.550, 0.385, 0.258, 0.462, 0.418, 0.462, 0.474, 0.489, 2.036, 0.750, 12.194, 0.382, 0.000, 0.140},
-{0.000, 0.700, 1.109, 0.224, 1.303, 5.054, 0.263, 0.409, 1.059, 0.281, 1.320, 0.326, 0.603, 0.867, 0.565, 4.360, 1.046, 0.850, 0.700, 0.385, 0.304, 0.750, 0.382, 4.789, 0.000, 0.140},
-{0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.140},
-{0.140, 0.140, 0.140, 0.140, 0.140, 0.140, 0.140, 0.140, 0.140, 0.140, 0.140, 0.140, 0.140, 0.140, 0.140, 0.140, 0.140, 0.140, 0.140, 0.140, 0.140, 0.140, 0.140, 0.140, 0.140, 1.333},
-};
-
-static const double BLOSUM50_FREQRATIOS[BLASTAA_SIZE][BLASTAA_SIZE] = {
-{0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.290},
-{0.000, 3.273, 0.687, 0.888, 0.660, 0.797, 0.546, 1.101, 0.641, 0.715, 0.748, 0.657, 0.854, 0.720, 0.715, 0.820, 0.668, 1.364, 0.967, 0.982, 0.464, 0.750, 0.596, 0.806, 0.000, 0.290},
-{0.000, 0.687, 3.676, 0.507, 4.021, 1.319, 0.384, 0.848, 1.118, 0.407, 0.968, 0.391, 0.516, 3.269, 0.661, 0.998, 0.759, 1.061, 0.955, 0.446, 0.339, 0.750, 0.556, 1.196, 0.000, 0.290},
-{0.000, 0.888, 0.507, 18.231, 0.428, 0.456, 0.565, 0.524, 0.517, 0.587, 0.460, 0.650, 0.681, 0.601, 0.403, 0.481, 0.428, 0.830, 0.818, 0.809, 0.312, 0.750, 0.539, 0.466, 0.000, 0.290},
-{0.000, 0.660, 4.021, 0.428, 6.112, 1.658, 0.337, 0.745, 0.884, 0.370, 0.890, 0.373, 0.435, 1.558, 0.710, 0.967, 0.678, 0.921, 0.825, 0.432, 0.307, 0.750, 0.504, 1.394, 0.000, 0.290},
-{0.000, 0.797, 1.319, 0.456, 1.658, 4.437, 0.456, 0.540, 0.911, 0.416, 1.331, 0.478, 0.605, 0.920, 0.839, 1.671, 0.976, 0.882, 0.819, 0.479, 0.548, 0.750, 0.652, 3.380, 0.000, 0.290},
-{0.000, 0.546, 0.384, 0.565, 0.337, 0.456, 6.636, 0.414, 0.755, 0.990, 0.445, 1.262, 1.052, 0.439, 0.376, 0.417, 0.462, 0.553, 0.570, 0.860, 1.348, 0.750, 2.424, 0.441, 0.000, 0.290},
-{0.000, 1.101, 0.848, 0.524, 0.745, 0.540, 0.414, 5.792, 0.601, 0.370, 0.655, 0.377, 0.519, 0.969, 0.620, 0.641, 0.517, 0.999, 0.636, 0.403, 0.505, 0.750, 0.467, 0.579, 0.000, 0.290},
-{0.000, 0.641, 1.118, 0.517, 0.884, 0.911, 0.755, 0.601, 10.449, 0.411, 0.946, 0.547, 0.760, 1.394, 0.582, 1.209, 0.982, 0.823, 0.653, 0.414, 0.475, 0.750, 1.570, 1.025, 0.000, 0.290},
-{0.000, 0.715, 0.407, 0.587, 0.370, 0.416, 0.990, 0.370, 0.411, 3.411, 0.468, 1.697, 1.438, 0.451, 0.511, 0.503, 0.435, 0.546, 0.861, 2.313, 0.522, 0.750, 0.826, 0.449, 0.000, 0.290},
-{0.000, 0.748, 0.968, 0.460, 0.890, 1.331, 0.445, 0.655, 0.946, 0.468, 3.881, 0.479, 0.685, 1.060, 0.764, 1.419, 2.065, 0.893, 0.845, 0.523, 0.462, 0.750, 0.668, 1.365, 0.000, 0.290},
-{0.000, 0.657, 0.391, 0.650, 0.373, 0.478, 1.262, 0.377, 0.547, 1.697, 0.479, 3.328, 1.790, 0.413, 0.444, 0.563, 0.554, 0.511, 0.759, 1.324, 0.602, 0.750, 0.858, 0.511, 0.000, 0.290},
-{0.000, 0.854, 0.516, 0.681, 0.435, 0.605, 1.052, 0.519, 0.760, 1.438, 0.685, 1.790, 4.816, 0.612, 0.545, 0.960, 0.678, 0.682, 0.882, 1.213, 0.761, 0.750, 0.918, 0.741, 0.000, 0.290},
-{0.000, 0.720, 3.269, 0.601, 1.558, 0.920, 0.439, 0.969, 1.394, 0.451, 1.060, 0.413, 0.612, 5.285, 0.604, 1.035, 0.854, 1.224, 1.109, 0.463, 0.376, 0.750, 0.617, 0.964, 0.000, 0.290},
-{0.000, 0.715, 0.661, 0.403, 0.710, 0.839, 0.376, 0.620, 0.582, 0.511, 0.764, 0.444, 0.545, 0.604, 10.204, 0.750, 0.519, 0.759, 0.749, 0.524, 0.420, 0.750, 0.468, 0.805, 0.000, 0.290},
-{0.000, 0.820, 0.998, 0.481, 0.967, 1.671, 0.417, 0.641, 1.209, 0.503, 1.419, 0.563, 0.960, 1.035, 0.750, 4.697, 1.357, 1.069, 0.810, 0.557, 0.715, 0.750, 0.742, 2.828, 0.000, 0.290},
-{0.000, 0.668, 0.759, 0.428, 0.678, 0.976, 0.462, 0.517, 0.982, 0.435, 2.065, 0.554, 0.678, 0.854, 0.519, 1.357, 5.378, 0.804, 0.737, 0.512, 0.529, 0.750, 0.727, 1.122, 0.000, 0.290},
-{0.000, 1.364, 1.061, 0.830, 0.921, 0.882, 0.553, 0.999, 0.823, 0.546, 0.893, 0.511, 0.682, 1.224, 0.759, 1.069, 0.804, 3.143, 1.497, 0.679, 0.392, 0.750, 0.651, 0.953, 0.000, 0.290},
-{0.000, 0.967, 0.955, 0.818, 0.825, 0.819, 0.570, 0.636, 0.653, 0.861, 0.845, 0.759, 0.882, 1.109, 0.749, 0.810, 0.737, 1.497, 3.553, 1.060, 0.503, 0.750, 0.691, 0.816, 0.000, 0.290},
-{0.000, 0.982, 0.446, 0.809, 0.432, 0.479, 0.860, 0.403, 0.414, 2.313, 0.523, 1.324, 1.213, 0.463, 0.524, 0.557, 0.512, 0.679, 1.060, 3.118, 0.485, 0.750, 0.727, 0.509, 0.000, 0.290},
-{0.000, 0.464, 0.339, 0.312, 0.307, 0.548, 1.348, 0.505, 0.475, 0.522, 0.462, 0.602, 0.761, 0.376, 0.420, 0.715, 0.529, 0.392, 0.503, 0.485, 31.361, 0.750, 1.765, 0.612, 0.000, 0.290},
-{0.000, 0.750, 0.750, 0.750, 0.750, 0.750, 0.750, 0.750, 0.750, 0.750, 0.750, 0.750, 0.750, 0.750, 0.750, 0.750, 0.750, 0.750, 0.750, 0.750, 0.750, 0.750, 0.750, 0.750, 0.000, 0.290},
-{0.000, 0.596, 0.556, 0.539, 0.504, 0.652, 2.424, 0.467, 1.570, 0.826, 0.668, 0.858, 0.918, 0.617, 0.468, 0.742, 0.727, 0.651, 0.691, 0.727, 1.765, 0.750, 6.893, 0.686, 0.000, 0.290},
-{0.000, 0.806, 1.196, 0.466, 1.394, 3.380, 0.441, 0.579, 1.025, 0.449, 1.365, 0.511, 0.741, 0.964, 0.805, 2.828, 1.122, 0.953, 0.816, 0.509, 0.612, 0.750, 0.686, 3.169, 0.000, 0.290},
-{0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.290},
-{0.290, 0.290, 0.290, 0.290, 0.290, 0.290, 0.290, 0.290, 0.290, 0.290, 0.290, 0.290, 0.290, 0.290, 0.290, 0.290, 0.290, 0.290, 0.290, 0.290, 0.290, 0.290, 0.290, 0.290, 0.290, 1.333},
-};
-
-static const double BLOSUM90_FREQRATIOS[BLASTAA_SIZE][BLASTAA_SIZE] = {
-{0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.120},
-{0.000, 4.773, 0.477, 0.732, 0.451, 0.703, 0.397, 0.957, 0.514, 0.543, 0.723, 0.505, 0.625, 0.510, 0.771, 0.696, 0.555, 1.535, 0.980, 0.866, 0.309, 0.707, 0.436, 0.700, 0.000, 0.120},
-{0.000, 0.477, 5.362, 0.252, 5.759, 1.269, 0.252, 0.639, 0.830, 0.234, 0.793, 0.221, 0.310, 4.868, 0.428, 0.850, 0.609, 0.948, 0.743, 0.268, 0.179, 0.707, 0.307, 1.109, 0.000, 0.120},
-{0.000, 0.732, 0.252, 20.702, 0.214, 0.180, 0.395, 0.272, 0.221, 0.581, 0.241, 0.493, 0.499, 0.300, 0.269, 0.295, 0.233, 0.576, 0.602, 0.634, 0.302, 0.707, 0.308, 0.224, 0.000, 0.120},
-{0.000, 0.451, 5.759, 0.214, 9.106, 1.635, 0.234, 0.541, 0.594, 0.214, 0.677, 0.197, 0.252, 1.584, 0.452, 0.763, 0.477, 0.774, 0.611, 0.245, 0.145, 0.707, 0.245, 1.303, 0.000, 0.120},
-{0.000, 0.703, 1.269, 0.180, 1.635, 6.995, 0.249, 0.399, 0.901, 0.264, 1.195, 0.276, 0.429, 0.811, 0.581, 1.906, 0.832, 0.845, 0.685, 0.369, 0.241, 0.707, 0.333, 5.054, 0.000, 0.120},
-{0.000, 0.397, 0.252, 0.395, 0.234, 0.249, 9.486, 0.249, 0.572, 0.841, 0.283, 1.114, 0.893, 0.273, 0.237, 0.285, 0.287, 0.369, 0.445, 0.649, 1.089, 0.707, 2.780, 0.263, 0.000, 0.120},
-{0.000, 0.957, 0.639, 0.272, 0.541, 0.399, 0.249, 7.882, 0.387, 0.184, 0.483, 0.210, 0.286, 0.761, 0.347, 0.425, 0.377, 0.784, 0.492, 0.251, 0.264, 0.707, 0.230, 0.409, 0.000, 0.120},
-{0.000, 0.514, 0.830, 0.221, 0.594, 0.901, 0.572, 0.387, 16.070, 0.258, 0.740, 0.314, 0.432, 1.124, 0.420, 1.316, 0.925, 0.661, 0.540, 0.289, 0.390, 0.707, 1.819, 1.059, 0.000, 0.120},
-{0.000, 0.543, 0.234, 0.581, 0.214, 0.264, 0.841, 0.184, 0.258, 4.868, 0.313, 1.665, 1.512, 0.258, 0.286, 0.309, 0.299, 0.379, 0.701, 2.496, 0.343, 0.707, 0.539, 0.281, 0.000, 0.120},
-{0.000, 0.723, 0.793, 0.241, 0.677, 1.195, 0.283, 0.483, 0.740, 0.313, 6.326, 0.357, 0.534, 0.938, 0.597, 1.524, 2.192, 0.820, 0.736, 0.370, 0.241, 0.707, 0.408, 1.320, 0.000, 0.120},
-{0.000, 0.505, 0.221, 0.493, 0.197, 0.276, 1.114, 0.210, 0.314, 1.665, 0.357, 4.463, 2.123, 0.250, 0.303, 0.407, 0.363, 0.368, 0.561, 1.220, 0.439, 0.707, 0.581, 0.326, 0.000, 0.120},
-{0.000, 0.625, 0.310, 0.499, 0.252, 0.429, 0.893, 0.286, 0.432, 1.512, 0.534, 2.123, 8.883, 0.382, 0.362, 0.887, 0.506, 0.498, 0.758, 1.224, 0.561, 0.707, 0.550, 0.603, 0.000, 0.120},
-{0.000, 0.510, 4.868, 0.300, 1.584, 0.811, 0.273, 0.761, 1.124, 0.258, 0.938, 0.250, 0.382, 8.963, 0.398, 0.958, 0.773, 1.165, 0.908, 0.297, 0.221, 0.707, 0.385, 0.867, 0.000, 0.120},
-{0.000, 0.771, 0.428, 0.269, 0.452, 0.581, 0.237, 0.347, 0.420, 0.286, 0.597, 0.303, 0.362, 0.398, 15.155, 0.538, 0.446, 0.652, 0.560, 0.370, 0.178, 0.707, 0.258, 0.565, 0.000, 0.120},
-{0.000, 0.696, 0.850, 0.295, 0.763, 1.906, 0.285, 0.425, 1.316, 0.309, 1.524, 0.407, 0.887, 0.958, 0.538, 8.340, 1.394, 0.859, 0.724, 0.411, 0.408, 0.707, 0.462, 4.360, 0.000, 0.120},
-{0.000, 0.555, 0.609, 0.233, 0.477, 0.832, 0.287, 0.377, 0.925, 0.299, 2.192, 0.363, 0.506, 0.773, 0.446, 1.394, 8.245, 0.695, 0.598, 0.354, 0.294, 0.707, 0.418, 1.046, 0.000, 0.120},
-{0.000, 1.535, 0.948, 0.576, 0.774, 0.845, 0.369, 0.784, 0.661, 0.379, 0.820, 0.368, 0.498, 1.165, 0.652, 0.859, 0.695, 5.106, 1.663, 0.494, 0.271, 0.707, 0.462, 0.850, 0.000, 0.120},
-{0.000, 0.980, 0.743, 0.602, 0.611, 0.685, 0.445, 0.492, 0.540, 0.701, 0.736, 0.561, 0.758, 0.908, 0.560, 0.724, 0.598, 1.663, 6.205, 0.891, 0.285, 0.707, 0.474, 0.700, 0.000, 0.120},
-{0.000, 0.866, 0.268, 0.634, 0.245, 0.369, 0.649, 0.251, 0.289, 2.496, 0.370, 1.220, 1.224, 0.297, 0.370, 0.411, 0.354, 0.494, 0.891, 4.584, 0.342, 0.707, 0.489, 0.385, 0.000, 0.120},
-{0.000, 0.309, 0.179, 0.302, 0.145, 0.241, 1.089, 0.264, 0.390, 0.343, 0.241, 0.439, 0.561, 0.221, 0.178, 0.408, 0.294, 0.271, 0.285, 0.342, 41.552, 0.707, 2.036, 0.304, 0.000, 0.120},
-{0.000, 0.707, 0.707, 0.707, 0.707, 0.707, 0.707, 0.707, 0.707, 0.707, 0.707, 0.707, 0.707, 0.707, 0.707, 0.707, 0.707, 0.707, 0.707, 0.707, 0.707, 0.707, 0.707, 0.707, 0.000, 0.120},
-{0.000, 0.436, 0.307, 0.308, 0.245, 0.333, 2.780, 0.230, 1.819, 0.539, 0.408, 0.581, 0.550, 0.385, 0.258, 0.462, 0.418, 0.462, 0.474, 0.489, 2.036, 0.707, 12.194, 0.382, 0.000, 0.120},
-{0.000, 0.700, 1.109, 0.224, 1.303, 5.054, 0.263, 0.409, 1.059, 0.281, 1.320, 0.326, 0.603, 0.867, 0.565, 4.360, 1.046, 0.850, 0.700, 0.385, 0.304, 0.707, 0.382, 4.789, 0.000, 0.120},
-{0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.120},
-{0.120, 0.120, 0.120, 0.120, 0.120, 0.120, 0.120, 0.120, 0.120, 0.120, 0.120, 0.120, 0.120, 0.120, 0.120, 0.120, 0.120, 0.120, 0.120, 0.120, 0.120, 0.120, 0.120, 0.120, 0.120, 1.333},
-};
+/** Retrive the matrix's frequency ratios.
+ * @param matrix_name Available options include:
+ * BLOSUM62
+ * BLOSUM62_20
+ * BLOSUM62_20A
+ * BLOSUM62_20B
+ * BLOSUM45
+ * BLOSUM80
+ * BLOSUM50
+ * BLOSUM90
+ * PAM30
+ * PAM70
+ * PAM250
+ * @return NULL on error
+ */
+SFreqRatios*
+_PSIMatrixFrequencyRatiosNew(const char* matrix_name);
-static const double PAM250_FREQRATIOS[BLASTAA_SIZE][BLASTAA_SIZE] = {
-{0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.170},
-{0.000, 1.516, 1.056, 0.627, 1.070, 1.075, 0.446, 1.339, 0.732, 0.889, 0.766, 0.646, 0.770, 1.040, 1.294, 0.903, 0.701, 1.290, 1.317, 1.045, 0.264, 0.750, 0.450, 1.000, 0.000, 0.170},
-{0.000, 1.056, 1.843, 0.365, 2.052, 1.825, 0.354, 1.119, 1.297, 0.617, 1.130, 0.453, 0.605, 1.600, 0.846, 1.339, 0.862, 1.117, 1.032, 0.638, 0.297, 0.750, 0.486, 1.613, 0.000, 0.170},
-{0.000, 0.627, 0.365, 15.630, 0.307, 0.295, 0.371, 0.459, 0.452, 0.588, 0.286, 0.248, 0.299, 0.433, 0.527, 0.289, 0.430, 0.990, 0.601, 0.639, 0.169, 0.750, 1.078, 0.293, 0.000, 0.170},
-{0.000, 1.070, 2.052, 0.307, 2.433, 2.197, 0.274, 1.147, 1.173, 0.580, 1.020, 0.398, 0.548, 1.610, 0.806, 1.461, 0.743, 1.069, 0.968, 0.612, 0.213, 0.750, 0.369, 1.877, 0.000, 0.170},
-{0.000, 1.075, 1.825, 0.295, 2.197, 2.414, 0.287, 1.044, 1.165, 0.626, 0.988, 0.464, 0.614, 1.393, 0.880, 1.771, 0.782, 0.999, 0.914, 0.660, 0.201, 0.750, 0.372, 2.134, 0.000, 0.170},
-{0.000, 0.446, 0.354, 0.371, 0.274, 0.287, 8.028, 0.332, 0.659, 1.262, 0.298, 1.518, 1.043, 0.447, 0.351, 0.343, 0.359, 0.478, 0.488, 0.766, 1.077, 0.750, 4.975, 0.311, 0.000, 0.170},
-{0.000, 1.339, 1.119, 0.459, 1.147, 1.044, 0.332, 2.991, 0.616, 0.557, 0.678, 0.395, 0.525, 1.087, 0.894, 0.757, 0.555, 1.278, 0.999, 0.735, 0.200, 0.750, 0.300, 0.919, 0.000, 0.170},
-{0.000, 0.732, 1.297, 0.452, 1.173, 1.165, 0.660, 0.616, 4.475, 0.571, 0.990, 0.620, 0.610, 1.440, 0.947, 1.962, 1.431, 0.830, 0.741, 0.597, 0.544, 0.750, 0.979, 1.512, 0.000, 0.170},
-{0.000, 0.889, 0.617, 0.588, 0.580, 0.626, 1.262, 0.557, 0.571, 2.830, 0.641, 1.749, 1.650, 0.660, 0.627, 0.625, 0.629, 0.723, 1.017, 2.338, 0.303, 0.750, 0.800, 0.626, 0.000, 0.170},
-{0.000, 0.766, 1.130, 0.286, 1.020, 0.988, 0.298, 0.678, 0.990, 0.641, 2.926, 0.519, 1.100, 1.258, 0.770, 1.184, 2.182, 0.962, 0.996, 0.570, 0.451, 0.750, 0.360, 1.073, 0.000, 0.170},
-{0.000, 0.646, 0.453, 0.248, 0.398, 0.464, 1.518, 0.395, 0.620, 1.749, 0.519, 3.930, 2.338, 0.516, 0.556, 0.665, 0.501, 0.524, 0.677, 1.532, 0.647, 0.750, 0.815, 0.551, 0.000, 0.170},
-{0.000, 0.770, 0.605, 0.299, 0.548, 0.614, 1.043, 0.525, 0.609, 1.650, 1.100, 2.338, 4.404, 0.670, 0.621, 0.796, 0.905, 0.699, 0.873, 1.510, 0.375, 0.750, 0.568, 0.693, 0.000, 0.170},
-{0.000, 1.040, 1.600, 0.433, 1.610, 1.393, 0.447, 1.087, 1.440, 0.660, 1.258, 0.516, 0.671, 1.588, 0.893, 1.197, 1.000, 1.173, 1.107, 0.668, 0.394, 0.750, 0.621, 1.307, 0.000, 0.170},
-{0.000, 1.294, 0.846, 0.527, 0.806, 0.880, 0.351, 0.894, 0.946, 0.627, 0.770, 0.556, 0.621, 0.893, 3.841, 1.055, 0.960, 1.243, 1.076, 0.758, 0.275, 0.750, 0.321, 0.956, 0.000, 0.170},
-{0.000, 0.903, 1.339, 0.290, 1.462, 1.772, 0.343, 0.757, 1.962, 0.625, 1.184, 0.665, 0.796, 1.197, 1.056, 2.541, 1.335, 0.890, 0.833, 0.649, 0.335, 0.750, 0.395, 2.107, 0.000, 0.170},
-{0.000, 0.701, 0.862, 0.430, 0.743, 0.782, 0.359, 0.555, 1.430, 0.629, 2.182, 0.501, 0.905, 1.000, 0.960, 1.335, 4.078, 0.928, 0.820, 0.561, 1.652, 0.750, 0.379, 1.023, 0.000, 0.170},
-{0.000, 1.290, 1.117, 0.990, 1.069, 0.999, 0.478, 1.278, 0.830, 0.722, 0.962, 0.524, 0.699, 1.173, 1.244, 0.890, 0.928, 1.441, 1.362, 0.799, 0.566, 0.750, 0.520, 0.951, 0.000, 0.170},
-{0.000, 1.317, 1.032, 0.601, 0.968, 0.914, 0.488, 0.999, 0.741, 1.016, 0.996, 0.677, 0.873, 1.107, 1.075, 0.833, 0.820, 1.362, 1.806, 1.068, 0.306, 0.750, 0.530, 0.879, 0.000, 0.170},
-{0.000, 1.045, 0.638, 0.639, 0.612, 0.660, 0.766, 0.735, 0.597, 2.338, 0.570, 1.532, 1.511, 0.668, 0.758, 0.649, 0.561, 0.799, 1.068, 2.698, 0.237, 0.750, 0.566, 0.656, 0.000, 0.170},
-{0.000, 0.264, 0.297, 0.169, 0.213, 0.201, 1.077, 0.200, 0.544, 0.303, 0.450, 0.647, 0.375, 0.394, 0.275, 0.335, 1.651, 0.566, 0.306, 0.237, 52.679, 0.750, 0.970, 0.259, 0.000, 0.170},
-{0.000, 0.750, 0.750, 0.750, 0.750, 0.750, 0.750, 0.750, 0.750, 0.750, 0.750, 0.750, 0.750, 0.750, 0.750, 0.750, 0.750, 0.750, 0.750, 0.750, 0.750, 0.750, 0.750, 0.750, 0.000, 0.170},
-{0.000, 0.450, 0.486, 1.078, 0.369, 0.372, 4.976, 0.300, 0.979, 0.800, 0.360, 0.815, 0.568, 0.621, 0.321, 0.395, 0.379, 0.520, 0.530, 0.566, 0.970, 0.750, 10.338, 0.382, 0.000, 0.170},
-{0.000, 1.000, 1.613, 0.293, 1.877, 2.134, 0.311, 0.919, 1.512, 0.626, 1.073, 0.551, 0.693, 1.307, 0.956, 2.107, 1.023, 0.951, 0.879, 0.656, 0.259, 0.750, 0.382, 2.122, 0.000, 0.170},
-{0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.170},
-{0.170, 0.170, 0.170, 0.170, 0.170, 0.170, 0.170, 0.170, 0.170, 0.170, 0.170, 0.170, 0.170, 0.170, 0.170, 0.170, 0.170, 0.170, 0.170, 0.170, 0.170, 0.170, 0.170, 0.170, 0.170, 1.333},
-};
+/** Deallocate the frequency ratios structure */
+SFreqRatios*
+_PSIMatrixFrequencyRatiosFree(SFreqRatios* freq_ratios);
#ifdef __cplusplus
}
#endif
+/*
+ * ===========================================================================
+ *
+ * $Log: matrix_freq_ratios.h,v $
+ * Revision 1.2 2004/05/06 15:28:11 camacho
+ * Provide an interface to retrive the matrices' frequency ratios
+ *
+ *
+ * ===========================================================================
+ */
+
#endif /* !ALGO_BLAST_CORE__MATRIX_FREQ_RATIOS__H */
diff --git a/algo/blast/core/mb_lookup.c b/algo/blast/core/mb_lookup.c
index 22a1c01b..26c09d27 100644
--- a/algo/blast/core/mb_lookup.c
+++ b/algo/blast/core/mb_lookup.c
@@ -1,47 +1,45 @@
-/* $Id: mb_lookup.c,v 1.32 2004/04/27 15:56:53 coulouri Exp $
-* ===========================================================================
-*
-* PUBLIC DOMAIN NOTICE
-* National Center for Biotechnology Information
-*
-* This software/database is a "United States Government Work" under the
-* terms of the United States Copyright Act. It was written as part of
-* the author's offical duties as a United States Government employee and
-* thus cannot be copyrighted. This software/database is freely available
-* to the public for use. The National Library of Medicine and the U.S.
-* Government have not placed any restriction on its use or reproduction.
-*
-* Although all reasonable efforts have been taken to ensure the accuracy
-* and reliability of the software and data, the NLM and the U.S.
-* Government do not and cannot warrant the performance or results that
-* may be obtained by using this software or data. The NLM and the U.S.
-* Government disclaim all warranties, express or implied, including
-* warranties of performance, merchantability or fitness for any particular
-* purpose.
-*
-* Please cite the author in any work or product based on this material.
-*
-* ===========================================================================*/
-
-/*****************************************************************************
-
-File name: mb_lookup.c
-
-Author: Ilya Dondoshansky
-
-Contents: Functions responsible for the creation of a lookup table
-
-Detailed Contents:
-
-******************************************************************************
- * $Revision: 1.32 $
+/* $Id: mb_lookup.c,v 1.33 2004/05/19 14:52:03 camacho Exp $
+ * ===========================================================================
+ *
+ * PUBLIC DOMAIN NOTICE
+ * National Center for Biotechnology Information
+ *
+ * This software/database is a "United States Government Work" under the
+ * terms of the United States Copyright Act. It was written as part of
+ * the author's offical duties as a United States Government employee and
+ * thus cannot be copyrighted. This software/database is freely available
+ * to the public for use. The National Library of Medicine and the U.S.
+ * Government have not placed any restriction on its use or reproduction.
+ *
+ * Although all reasonable efforts have been taken to ensure the accuracy
+ * and reliability of the software and data, the NLM and the U.S.
+ * Government do not and cannot warrant the performance or results that
+ * may be obtained by using this software or data. The NLM and the U.S.
+ * Government disclaim all warranties, express or implied, including
+ * warranties of performance, merchantability or fitness for any particular
+ * purpose.
+ *
+ * Please cite the author in any work or product based on this material.
+ *
+ * ===========================================================================
+ *
+ * Author: Ilya Dondoshansky
+ *
*/
+
+/** @file mb_lookup.c
+ * Functions responsible for the creation of a lookup table
+ * @todo FIXME @sa mb_lookup.h
+ */
+
+static char const rcsid[] =
+ "$Id: mb_lookup.c,v 1.33 2004/05/19 14:52:03 camacho Exp $";
+
#include <algo/blast/core/blast_options.h>
#include <algo/blast/core/blast_def.h>
#include <algo/blast/core/mb_lookup.h>
#include "blast_inline.h"
-static char const rcsid[] = "$Id: mb_lookup.c,v 1.32 2004/04/27 15:56:53 coulouri Exp $";
MBLookupTable* MBLookupTableDestruct(MBLookupTable* mb_lt)
{
diff --git a/algo/blast/core/mb_lookup.h b/algo/blast/core/mb_lookup.h
index 57ff2673..ceecdc60 100644
--- a/algo/blast/core/mb_lookup.h
+++ b/algo/blast/core/mb_lookup.h
@@ -1,180 +1,294 @@
-/* $Id: mb_lookup.h,v 1.11 2004/02/23 19:52:22 madden Exp $
-* ===========================================================================
-*
-* PUBLIC DOMAIN NOTICE
-* National Center for Biotechnology Information
-*
-* This software/database is a "United States Government Work" under the
-* terms of the United States Copyright Act. It was written as part of
-* the author's offical duties as a United States Government employee and
-* thus cannot be copyrighted. This software/database is freely available
-* to the public for use. The National Library of Medicine and the U.S.
-* Government have not placed any restriction on its use or reproduction.
-*
-* Although all reasonable efforts have been taken to ensure the accuracy
-* and reliability of the software and data, the NLM and the U.S.
-* Government do not and cannot warrant the performance or results that
-* may be obtained by using this software or data. The NLM and the U.S.
-* Government disclaim all warranties, express or implied, including
-* warranties of performance, merchantability or fitness for any particular
-* purpose.
-*
-* Please cite the author in any work or product based on this material.
-*
-* ===========================================================================*/
-
-/*****************************************************************************
-
-File name: mb_lookup.h
-
-Author: Ilya Dondoshansky
-
-Contents: Functions responsible for the creation of a lookup table
-
-Detailed Contents:
-
-******************************************************************************
- * $Revision: 1.11 $
+/* $Id: mb_lookup.h,v 1.15 2004/06/16 14:53:03 dondosha Exp $
+ * ===========================================================================
+ *
+ * PUBLIC DOMAIN NOTICE
+ * National Center for Biotechnology Information
+ *
+ * This software/database is a "United States Government Work" under the
+ * terms of the United States Copyright Act. It was written as part of
+ * the author's offical duties as a United States Government employee and
+ * thus cannot be copyrighted. This software/database is freely available
+ * to the public for use. The National Library of Medicine and the U.S.
+ * Government have not placed any restriction on its use or reproduction.
+ *
+ * Although all reasonable efforts have been taken to ensure the accuracy
+ * and reliability of the software and data, the NLM and the U.S.
+ * Government do not and cannot warrant the performance or results that
+ * may be obtained by using this software or data. The NLM and the U.S.
+ * Government disclaim all warranties, express or implied, including
+ * warranties of performance, merchantability or fitness for any particular
+ * purpose.
+ *
+ * Please cite the author in any work or product based on this material.
+ *
+ * ===========================================================================
+ *
+ * Author: Ilya Dondoshansky
+ *
*/
+/** @file mb_lookup.h
+ * Functions responsible for the creation of a lookup table
+ * @todo FIXME: shouldn't file description read megablast lookup table? ; use
+ * doxygen comments
+ */
+
+
#ifndef MBLOOKUP__H
#define MBLOOKUP__H
-#ifdef __cplusplus
-extern "C" {
-#endif
-
#include <algo/blast/core/blast_def.h>
#include <algo/blast/core/blast_options.h>
#include <algo/blast/core/blast_lookup.h>
-/* The fraction of sites that must have at least one hit to not use
- PV_ARRAY. */
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/** The fraction of sites that must have at least one hit to not use
+ * PV_ARRAY.
+ */
#define PV_ARRAY_FACTOR 0.5
-/* Mask to determine whether a residue is an ambiguity */
+/** Mask to determine whether a residue is an ambiguity. */
#define NUC_MASK 0xfc
-/* Bit that is set in all indices corresponding to the second discontiguous
- word template with weight 11, to distinguish them from the first template
- indices
-*/
-#define SECOND_TEMPLATE_BIT 0x00800000
-
-/* Pack a nucleotide value into an integer index */
+/** Pack a nucleotide value into an integer index. */
#define PACK_EXTRA_CODE(ecode,val,mask) {ecode = ((ecode<<2) & mask) | val;}
-/* Get the 2 bit base starting from the n-th bit in a sequence byte; advance
- the base; advance sequence when last base in a byte is retrieved */
+/** Get the 2 bit base starting from the n-th bit in a sequence byte; advance
+ * the base; advance sequence when last base in a byte is retrieved.
+ */
#define GET_NEXT_PACKED_NUCL(s,n,val) { val = ((*s)>>(n)) & 0x00000003; n = (n-2)&0x07; s = s + ((n>>1)&(n>>2)&0x01); }
-/* OPTIMAL templates */
-
-/* 1,110,110,110,110,111 - 12 of 16 */
-/* 1,110,010,110,110,111 - 11 of 16 */
+/** Optimal word templates:
+ * Number of 1's in a template is word size (weight);
+ * total number of 1's and 0's - template length.
+ * 1,110,110,110,110,111 - 12 of 16
+ * 1,110,010,110,110,111 - 11 of 16
+ * 111,010,110,010,110,111 - 12 of 18
+ * 111,010,010,110,010,111 - 11 of 18
+ * 111,010,010,110,010,010,111 - 12 of 21
+ * 111,010,010,100,010,010,111 - 11 of 21
+ * Coding word templates:
+ * 111,110,110,110,110,1 - 12 of 16
+ * 110,110,110,110,110,1 - 11 of 16
+ * 10,110,110,110,110,110,1 - 12 of 18
+ * 10,110,110,010,110,110,1 - 11 of 18
+ * 10,010,110,110,110,010,110,1 - 12 of 21
+ * 10,010,110,010,110,010,110,1 - 11 of 21
+ *
+ * Lookup table index for each word under a template is calculated by first
+ * calculating the partial index, corresponding to the first 16 bases of a
+ * sequence, which form a 4 byte integer, then adding an extra code,
+ * corresponding to the remaining bases: none for length 16, 2 for length 18,
+ * 5 for length 21.
+ * Index values are calculated by masking the respective pieces of sequence so
+ * only bits corresponding to a contiguous string of 1's in a template are
+ * left, then shifting the masked value to a correct position in the final
+ * 24-bit lookup table index, which is the sum of such shifts.
+ */
+
+/** Masks for index calculation for different word templates. */
+/** Optimal, length 16 */
+/** First mask, same for optimal templates 11 and 12 of 16 */
#define MASK1_OPT 0x0000003f
+/** Second mask, same for optimal templates 11 and 12 of 16 */
#define MASK2_OPT 0x00000f00
+/** Third mask, same for optimal templates 11 and 12 of 16 */
#define MASK3_OPT 0x0003c000
+/** Fourth mask, specific to optimal template 12 of 16 */
#define MASK4_12_OPT 0x00f00000
+/** Fourth mask, specific to optimal template 11 of 16 */
#define MASK4_11_OPT 0x00300000
+/** Fifth mask, same for optimal templates 11 and 12 of 16 */
#define MASK5_OPT 0xfc000000
-/* 12 of 16 */
-#define GET_WORD_INDEX_12_16_OPT(n) (((n)&MASK1_OPT) | (((n)&MASK2_OPT)>>2) | (((n)&MASK3_OPT)>>4) | (((n)&MASK4_12_OPT)>>6) | (((n)&MASK5_OPT)>>8))
-/* 11 of 16 */
-#define GET_WORD_INDEX_11_16_OPT(n) (((n)&MASK1_OPT) | (((n)&MASK2_OPT)>>2) | (((n)&MASK3_OPT)>>4) | (((n)&MASK4_11_OPT)>>6) | (((n)&MASK5_OPT)>>10))
-
-/* 111,010,110,010,110,111 - 12 of 18 */
-/* 111,010,010,110,010,111 - 11 of 18 */
+
+/** Optimal, length 18 */
+/** First mask, same for optimal templates 11 and 12 of 18 */
#define MASK1_18_OPT 0x00000003
+/** Second mask, specific to optimal template 12 of 18 */
#define MASK2_12_18_OPT 0x000000f0
+/** Second mask, specific to optimal template 11 of 18 */
#define MASK2_11_18_OPT 0x00000030
-#define MASK3_11_18_OPT 0x00003c00
+/** Third mask, specific to optimal template 12 of 18 */
#define MASK3_12_18_OPT 0x00000c00
-#define MASK4_11_18_OPT 0x00030000
+/** Third mask, specific to optimal template 11 of 18 */
+#define MASK3_11_18_OPT 0x00003c00
+/** Fourth mask, specific to optimal template 12 of 18 */
#define MASK4_12_18_OPT 0x000f0000
+/** Fourth mask, specific to optimal template 11 of 18 */
+#define MASK4_11_18_OPT 0x00030000
+/** Fifth mask, same for optimal templates 11 and 12 of 18 */
#define MASK5_18_OPT 0x00c00000
+/** Sixth mask, same for optimal templates 11 and 12 of 18 */
#define MASK6_18_OPT 0xfc000000
-/* 12 of 18 */
-#define GET_WORD_INDEX_12_18_OPT(n) ((((n)&MASK1_18_OPT)<<4) | (((n)&MASK2_12_18_OPT)<<2) | ((n)&MASK3_12_18_OPT) | (((n)&MASK4_12_18_OPT)>>4) | (((n)&MASK5_18_OPT)>>6) | (((n)&MASK6_18_OPT)>>8))
-/* 11 of 18 */
-#define GET_WORD_INDEX_11_18_OPT(n) ((((n)&MASK1_18_OPT)<<4) | (((n)&MASK2_11_18_OPT)<<2) | (((n)&MASK3_11_18_OPT)>>2) | (((n)&MASK4_11_18_OPT)>>4) | (((n)&MASK5_18_OPT)>>8) | (((n)&MASK6_18_OPT)>>10))
-#define MASK_EXTRA_OPT 0x0000000f
-#define GET_EXTRA_CODE_18_OPT(s) (((*(s+1))<<2) | (*(s+2))) & MASK_EXTRA_OPT
-
-#define GET_EXTRA_CODE_PACKED_4_18_OPT(s) ((*(s))>>4)
-#define GET_EXTRA_CODE_PACKED_18_OPT(s,b,val,ecode) {GET_NEXT_PACKED_NUCL(s,b,ecode); GET_NEXT_PACKED_NUCL(s,b,val); PACK_EXTRA_CODE(ecode, val,MASK_EXTRA_OPT);}
-
-#define GET_AMBIG_CONDITION_18_OPT(s) (((*(s+1))&NUC_MASK) | (((*(s+2))&NUC_MASK)))
-/* 111,010,010,110,010,010,111 - 12 of 21 */
-/* 111,010,010,100,010,010,111 - 11 of 21 */
+/** Optimal, length 21 */
+/** First mask, same for optimal templates 11 and 12 of 21 */
#define MASK1_21_OPT 0x00000030
+/** Second mask specific to optimal template 12 of 21 */
#define MASK2_12_21_OPT 0x00003c00
+/** Second mask specific to optimal template 11 of 21 */
#define MASK2_11_21_OPT 0x00003000
+/** Third mask, same for optimal templates 11 and 12 of 21 */
#define MASK3_21_OPT 0x00030000
+/** Fourth mask, same for optimal templates 11 and 12 of 21 */
#define MASK4_21_OPT 0x00c00000
+/** Fifth mask, same for optimal templates 11 and 12 of 21 */
#define MASK5_21_OPT 0xfc000000
-#define GET_WORD_INDEX_12_21_OPT(n) ((((n)&MASK1_21_OPT)<<4) | ((n)&MASK2_12_21_OPT) | (((n)&MASK3_21_OPT)>>2) | (((n)&MASK4_21_OPT)>>6) | (((n)&MASK5_21_OPT)>>8))
-#define GET_WORD_INDEX_11_21_OPT(n) ((((n)&MASK1_21_OPT)<<4) | (((n)&MASK2_11_21_OPT)>>2) | (((n)&MASK3_21_OPT)>>4) | (((n)&MASK4_21_OPT)>>8) | (((n)&MASK5_21_OPT)>>10))
-#define MASK_EXTRA_21_OPT 0x000000ff
-#define GET_EXTRA_CODE_21_OPT(s) ((((*(s+1))<<6) | ((*(s+3))<<4) | ((*(s+4))<<2) | (*(s+5))) & MASK_EXTRA_21_OPT)
-#define GET_AMBIG_CONDITION_21_OPT(s) (((*(s+1))&NUC_MASK) | ((*(s+3))&NUC_MASK) | ((*(s+4))&NUC_MASK) | ((*(s+5))&NUC_MASK))
-#define GET_EXTRA_CODE_PACKED_4_21_OPT(s) ((((*(s))&0x0f)<<2)|((*(s))&0xc0)|((*(s+1))>>6))
-#define GET_EXTRA_CODE_PACKED_21_OPT(s,b,val,ecode) {GET_NEXT_PACKED_NUCL(s,b,ecode); GET_NEXT_PACKED_NUCL(s,b,val); GET_NEXT_PACKED_NUCL(s,b,val); PACK_EXTRA_CODE(ecode,val,MASK_EXTRA_21_OPT); GET_NEXT_PACKED_NUCL(s,b,val); PACK_EXTRA_CODE(ecode,val,MASK_EXTRA_21_OPT); GET_NEXT_PACKED_NUCL(s,b,val); PACK_EXTRA_CODE(ecode,val,MASK_EXTRA_21_OPT);}
-/* CODING TEMPLATES */
-
-/* 111,110,110,110,110,1 - 12 of 16 */
-/* 110,110,110,110,110,1 - 11 of 16 */
+/** Coding, length 16. */
+/** First mask, same for coding templates 11 and 12 of 16 */
#define MASK1 0x00000003
+/** Second mask, same for coding templates 11 and 12 of 16 */
#define MASK2 0x000000f0
+/** Third mask, same for coding templates 11 and 12 of 16 */
#define MASK3 0x00003c00
+/** Fourth mask, same for coding templates 11 and 12 of 16 */
#define MASK4 0x000f0000
+/** Fifth mask specific to coding template 12 of 16 */
#define MASK5_12 0xffc00000
+/** Fifth mask specific to coding template 11 of 16 */
#define MASK5_11 0x03c00000
+/** Sixth mask, same for coding templates 11 and 12 of 16 */
#define MASK6 0xf0000000
-/* 12 of 16 */
-#define GET_WORD_INDEX_12_16(n) (((n)&MASK1) | (((n)&MASK2)>>2) | (((n)&MASK3)>>4) | (((n)&MASK4)>>6) | (((n)&MASK5_12)>>8))
-/* 11 of 16 */
-#define GET_WORD_INDEX_11_16(n) (((n)&MASK1) | (((n)&MASK2)>>2) | (((n)&MASK3)>>4) | (((n)&MASK4)>>6) | (((n)&MASK5_11)>>8) | (((n)&MASK6)>>10))
-/* 10,110,110,110,110,110,1 - 12 of 18 */
-/* 10,110,110,010,110,110,1 - 11 of 18 */
+/** Coding, length 18. */
+/** First mask, same for coding templates 11 and 12 of 18 */
#define MASK1_18 0x0000000f
+/** Second mask, same for coding templates 11 and 12 of 18 */
#define MASK2_18 0x000003c0
-#define MASK3_11_18 0x00003000
+/** Third mask specific to coding template 12 of 18 */
#define MASK3_12_18 0x0000f000
+/** Third mask specific to coding template 11 of 18 */
+#define MASK3_11_18 0x00003000
+/** Fourth mask, same for coding templates 11 and 12 of 18 */
#define MASK4_18 0x003c0000
+/** Fifth mask, same for coding templates 11 and 12 of 18 */
#define MASK5_18 0x0f000000
+/** Sixth mask, same for coding templates 11 and 12 of 18 */
#define MASK6_18 0xc0000000
-/* 12 of 18 */
-#define GET_WORD_INDEX_12_18(n) ((((n)&MASK1_18)<<2) | ((n)&MASK2_18) | (((n)&MASK3_12_18)>>2) | (((n)&MASK4_18)>>4) | (((n)&MASK5_18)>>6) | (((n)&MASK6_18)>>8))
-/* 11 of 18 */
-#define GET_WORD_INDEX_11_18(n) ((((n)&MASK1_18)<<2) | ((n)&MASK2_18) | (((n)&MASK3_11_18)>>2) | (((n)&MASK4_18)>>6) | (((n)&MASK5_18)>>8) | (((n)&MASK6_18)>>10))
-
-#define MASK_EXTRA_18 0x00000003
-#define GET_EXTRA_CODE_18(s) ((*(s+2)) & MASK_EXTRA_18)
-#define GET_EXTRA_CODE_PACKED_4_18(s) (((*(s))>>4) & MASK_EXTRA_18)
-#define GET_EXTRA_CODE_PACKED_18(s,b,val,ecode) {GET_NEXT_PACKED_NUCL(s,b,val); GET_NEXT_PACKED_NUCL(s,b,ecode);}
-#define GET_AMBIG_CONDITION_18(s) ((*(s+2))&NUC_MASK)
-
-/* 10,010,110,110,110,010,110,1 - 12 of 21 */
-/* 10,010,110,010,110,010,110,1 - 11 of 21 */
+/** Coding, length 21 */
+/** First mask, same for coding templates 11 and 12 of 21 */
#define MASK1_21 0x00000003
+/** Second mask, same for coding templates 11 and 12 of 21 */
#define MASK2_21 0x000003c0
+/** Third mask specific to coding template 12 of 21 */
#define MASK3_12_21 0x0000f000
+/** Third mask specific to coding template 11 of 21 */
#define MASK3_11_21 0x00003000
+/** Fourth mask, same for coding templates 11 and 12 of 21 */
#define MASK4_21 0x003c0000
+/** Fifth mask, same for coding templates 11 and 12 of 21 */
#define MASK5_21 0x03000000
+/** Sixth mask, same for coding templates 11 and 12 of 21 */
#define MASK6_21 0xc0000000
+
+/** Mask for extra code calculation for optimal templates of length 18 */
+#define MASK_EXTRA_OPT 0x0000000f
+/** Mask for extra code calculation for optimal templates of length 21 */
+#define MASK_EXTRA_21_OPT 0x000000ff
+/** Mask for extra code calculation for coding templates of length 18 */
+#define MASK_EXTRA_18 0x00000003
+/** Mask for extra code calculation for coding templates of length 21 */
+#define MASK_EXTRA_21 0x0000003f
+
+
+/** Word index calculation for optimal template 12 of 16 */
+#define GET_WORD_INDEX_12_16_OPT(n) (((n)&MASK1_OPT) | (((n)&MASK2_OPT)>>2) | (((n)&MASK3_OPT)>>4) | (((n)&MASK4_12_OPT)>>6) | (((n)&MASK5_OPT)>>8))
+/** Word index calculation for optimal template 11 of 16 */
+#define GET_WORD_INDEX_11_16_OPT(n) (((n)&MASK1_OPT) | (((n)&MASK2_OPT)>>2) | (((n)&MASK3_OPT)>>4) | (((n)&MASK4_11_OPT)>>6) | (((n)&MASK5_OPT)>>10))
+
+/** Word index calculation for optimal template 12 of 18 */
+#define GET_WORD_INDEX_12_18_OPT(n) ((((n)&MASK1_18_OPT)<<4) | (((n)&MASK2_12_18_OPT)<<2) | ((n)&MASK3_12_18_OPT) | (((n)&MASK4_12_18_OPT)>>4) | (((n)&MASK5_18_OPT)>>6) | (((n)&MASK6_18_OPT)>>8))
+/** Word index calculation for optimal template 11 of 18 */
+#define GET_WORD_INDEX_11_18_OPT(n) ((((n)&MASK1_18_OPT)<<4) | (((n)&MASK2_11_18_OPT)<<2) | (((n)&MASK3_11_18_OPT)>>2) | (((n)&MASK4_11_18_OPT)>>4) | (((n)&MASK5_18_OPT)>>8) | (((n)&MASK6_18_OPT)>>10))
+
+/** Word index calculation for optimal template 12 of 21 */
+#define GET_WORD_INDEX_12_21_OPT(n) ((((n)&MASK1_21_OPT)<<4) | ((n)&MASK2_12_21_OPT) | (((n)&MASK3_21_OPT)>>2) | (((n)&MASK4_21_OPT)>>6) | (((n)&MASK5_21_OPT)>>8))
+/** Word index calculation for optimal template 11 of 21 */
+#define GET_WORD_INDEX_11_21_OPT(n) ((((n)&MASK1_21_OPT)<<4) | (((n)&MASK2_11_21_OPT)>>2) | (((n)&MASK3_21_OPT)>>4) | (((n)&MASK4_21_OPT)>>8) | (((n)&MASK5_21_OPT)>>10))
+
+/** Word index calculation for coding template 12 of 16 */
+#define GET_WORD_INDEX_12_16(n) (((n)&MASK1) | (((n)&MASK2)>>2) | (((n)&MASK3)>>4) | (((n)&MASK4)>>6) | (((n)&MASK5_12)>>8))
+/** Word index calculation for coding template 11 of 16 */
+#define GET_WORD_INDEX_11_16(n) (((n)&MASK1) | (((n)&MASK2)>>2) | (((n)&MASK3)>>4) | (((n)&MASK4)>>6) | (((n)&MASK5_11)>>8) | (((n)&MASK6)>>10))
+
+/** Word index calculation for coding template 12 of 18 */
+#define GET_WORD_INDEX_12_18(n) ((((n)&MASK1_18)<<2) | ((n)&MASK2_18) | (((n)&MASK3_12_18)>>2) | (((n)&MASK4_18)>>4) | (((n)&MASK5_18)>>6) | (((n)&MASK6_18)>>8))
+/** Word index calculation for coding template 11 of 18 */
+#define GET_WORD_INDEX_11_18(n) ((((n)&MASK1_18)<<2) | ((n)&MASK2_18) | (((n)&MASK3_11_18)>>2) | (((n)&MASK4_18)>>6) | (((n)&MASK5_18)>>8) | (((n)&MASK6_18)>>10))
+
+/** Word index calculation for coding template 12 of 21 */
#define GET_WORD_INDEX_12_21(n) ((((n)&MASK1_21)<<6) | (((n)&MASK2_21)<<2) | ((n)&MASK3_12_21) | (((n)&MASK4_21)>>2) | (((n)&MASK5_21)>>4) | (((n)&MASK6_21)>>8))
+/** Word index calculation for coding template 11 of 21 */
#define GET_WORD_INDEX_11_21(n) ((((n)&MASK1_21)<<6) | (((n)&MASK2_21)<<2) | ((n)&MASK3_11_21) | (((n)&MASK4_21)>>4) | (((n)&MASK5_21)>>6) | (((n)&MASK6_21)>>10))
-#define MASK_EXTRA_21 0x0000003f
+
+/** Extra code calculation for optimal templates of length 18 for an unpacked
+ * sequence.
+ */
+#define GET_EXTRA_CODE_18_OPT(s) (((*(s+1))<<2) | (*(s+2))) & MASK_EXTRA_OPT
+/** Extra code calculation for optimal templates of length 18 for a packed
+ * sequence, when sequence is advanced by 4 bases.
+ */
+#define GET_EXTRA_CODE_PACKED_4_18_OPT(s) ((*(s))>>4)
+/** Extra code calculation for optimal templates of length 18 for a packed
+ * sequence, when sequence is advanced by 1 base.
+ */
+#define GET_EXTRA_CODE_PACKED_18_OPT(s,b,val,ecode) {GET_NEXT_PACKED_NUCL(s,b,ecode); GET_NEXT_PACKED_NUCL(s,b,val); PACK_EXTRA_CODE(ecode, val,MASK_EXTRA_OPT);}
+/** Checks whether extra piece of the sequence under an optimal template of
+ * length 18 contains an ambiguity. */
+#define GET_AMBIG_CONDITION_18_OPT(s) (((*(s+1))&NUC_MASK) | (((*(s+2))&NUC_MASK)))
+
+/** Extra code calculation for optimal templates of length 21 for an unpacked
+ * sequence.
+ */
+#define GET_EXTRA_CODE_21_OPT(s) ((((*(s+1))<<6) | ((*(s+3))<<4) | ((*(s+4))<<2) | (*(s+5))) & MASK_EXTRA_21_OPT)
+/** Extra code calculation for optimal templates of length 21 for a packed
+ * sequence, when sequence is advanced by 4 bases.
+ */
+#define GET_EXTRA_CODE_PACKED_4_21_OPT(s) ((((*(s))&0x0f)<<2)|((*(s))&0xc0)|((*(s+1))>>6))
+/** Extra code calculation for optimal templates of length 21 for a packed
+ * sequence, when sequence is advanced by 1 base.
+ */
+#define GET_EXTRA_CODE_PACKED_21_OPT(s,b,val,ecode) {GET_NEXT_PACKED_NUCL(s,b,ecode); GET_NEXT_PACKED_NUCL(s,b,val); GET_NEXT_PACKED_NUCL(s,b,val); PACK_EXTRA_CODE(ecode,val,MASK_EXTRA_21_OPT); GET_NEXT_PACKED_NUCL(s,b,val); PACK_EXTRA_CODE(ecode,val,MASK_EXTRA_21_OPT); GET_NEXT_PACKED_NUCL(s,b,val); PACK_EXTRA_CODE(ecode,val,MASK_EXTRA_21_OPT);}
+/** Checks whether extra piece of the sequence under an optimal template of
+ * length 21 contains an ambiguity. */
+#define GET_AMBIG_CONDITION_21_OPT(s) (((*(s+1))&NUC_MASK) | ((*(s+3))&NUC_MASK) | ((*(s+4))&NUC_MASK) | ((*(s+5))&NUC_MASK))
+
+/** Extra code calculation for coding templates of length 18 for an unpacked
+ * sequence.
+ */
+#define GET_EXTRA_CODE_18(s) ((*(s+2)) & MASK_EXTRA_18)
+/** Extra code calculation for coding templates of length 18 for a packed
+ * sequence, when sequence is advanced by 4 bases.
+ */
+#define GET_EXTRA_CODE_PACKED_4_18(s) (((*(s))>>4) & MASK_EXTRA_18)
+/** Extra code calculation for coding templates of length 18 for a packed
+ * sequence, when sequence is advanced by 1 base.
+ */
+#define GET_EXTRA_CODE_PACKED_18(s,b,val,ecode) {GET_NEXT_PACKED_NUCL(s,b,val); GET_NEXT_PACKED_NUCL(s,b,ecode);}
+/** Checks whether extra piece of the sequence under an coding template of
+ * length 18 contains an ambiguity. */
+#define GET_AMBIG_CONDITION_18(s) ((*(s+2))&NUC_MASK)
+
+/** Extra code calculation for coding templates of length 21, for an unpacked
+ * sequence. */
#define GET_EXTRA_CODE_21(s) ((((*(s+2))<<4) | ((*(s+3))<<2) | (*(s+5))) & MASK_EXTRA_21)
-#define GET_AMBIG_CONDITION_21(s) (((*(s+2))&NUC_MASK) | ((*(s+3))&NUC_MASK) | ((*(s+5))&NUC_MASK))
+/** Extra code calculation for coding templates of length 21 for a packed
+ * sequence, when sequence is advanced by 4 bases.
+ */
#define GET_EXTRA_CODE_PACKED_4_21(s) (((*(s))&0x3c)|((*(s+1))>>6))
+/** Extra code calculation for coding templates of length 21 for a packed
+ * sequence, when sequence is advanced by 1 base.
+ */
#define GET_EXTRA_CODE_PACKED_21(s,b,val,ecode) {GET_NEXT_PACKED_NUCL(s,b,val); GET_NEXT_PACKED_NUCL(s,b,ecode); GET_NEXT_PACKED_NUCL(s,b,val); PACK_EXTRA_CODE(ecode,val,MASK_EXTRA_21); GET_NEXT_PACKED_NUCL(s,b,val); GET_NEXT_PACKED_NUCL(s,b,val); PACK_EXTRA_CODE(ecode,val,MASK_EXTRA_21);}
+/** Checks whether extra piece of the sequence under an coding template of
+ * length 21 contains an ambiguity. */
+#define GET_AMBIG_CONDITION_21(s) (((*(s+2))&NUC_MASK) | ((*(s+3))&NUC_MASK) | ((*(s+5))&NUC_MASK))
+
/** The lookup table structure used for Mega BLAST, generally with width 12 */
typedef struct MBLookupTable {
@@ -223,15 +337,15 @@ Int2 MB_LookupTableNew(BLAST_SequenceBlk* query, ListNode* location,
*/
MBLookupTable* MBLookupTableDestruct(MBLookupTable* mb_lt);
-/* General types of discontiguous word templates */
+/** General types of discontiguous word templates */
typedef enum {
MB_WORD_CODING = 0,
MB_WORD_OPTIMAL = 1,
MB_TWO_TEMPLATES = 2
} DiscWordType;
-/* Enumeration of all discontiguous word templates; the enumerated values
- * encode the weight, template length and type information
+/** Enumeration of all discontiguous word templates; the enumerated values
+ * encode the weight, template length and type information
*/
typedef enum {
TEMPL_CONTIGUOUS = 0,
diff --git a/algo/blast/core/ncbi_math.c b/algo/blast/core/ncbi_math.c
index a0c05a02..2093b295 100644
--- a/algo/blast/core/ncbi_math.c
+++ b/algo/blast/core/ncbi_math.c
@@ -1,112 +1,42 @@
-/* ncbi_math.c
-* ===========================================================================
-*
-* PUBLIC DOMAIN NOTICE
-* National Center for Biotechnology Information
-*
-* This software/database is a "United States Government Work" under the
-* terms of the United States Copyright Act. It was written as part of
-* the author's official duties as a United States Government employee and
-* thus cannot be copyrighted. This software/database is freely available
-* to the public for use. The National Library of Medicine and the U.S.
-* Government have not placed any restriction on its use or reproduction.
-*
-* Although all reasonable efforts have been taken to ensure the accuracy
-* and reliability of the software and data, the NLM and the U.S.
-* Government do not and cannot warrant the performance or results that
-* may be obtained by using this software or data. The NLM and the U.S.
-* Government disclaim all warranties, express or implied, including
-* warranties of performance, merchantability or fitness for any particular
-* purpose.
-*
-* Please cite the author in any work or product based on this material.
-*
-* ===========================================================================
-*
-* File Name: ncbi_math.c
-*
-* Author: Gish, Kans, Ostell, Schuler
-*
-* Version Creation Date: 10/23/91
-*
-* $Revision: 1.7 $
-*
-* File Description:
-* portable math functions
-*
-* Modifications:
-* --------------------------------------------------------------------------
-* Date Name Description of modification
-* ------- ---------- -----------------------------------------------------
-* 04-15-93 Schuler Changed _cdecl to LIBCALL
-* 12-22-93 Schuler Converted ERRPOST((...)) to ErrPostEx(...)
-*
-* $Log: ncbi_math.c,v $
-* Revision 1.7 2003/12/05 16:03:57 camacho
-* Remove compiler warnings
-*
-* Revision 1.6 2003/09/26 20:39:32 dondosha
-* Rearranged code so it compiles
-*
-* Revision 1.5 2003/09/26 19:01:59 madden
-* Prefix ncbimath functions with BLAST_
-*
-* Revision 1.4 2003/09/10 21:36:29 dondosha
-* Removed Nlm_ prefix from math functions definitions
-*
-* Revision 1.3 2003/08/25 22:32:51 dondosha
-* Added #ifndef for definition of DBL_EPSILON
-*
-* Revision 1.2 2003/08/11 15:02:00 dondosha
-* Added algo/blast/core to all #included headers
-*
-* Revision 1.1 2003/08/02 16:31:48 camacho
-* Moved ncbimath.c -> ncbi_math.c
-*
-* Revision 1.1 2003/08/01 21:03:46 madden
-* Cleaned up version of file for C++ toolkit
-*
-* Revision 6.3 1999/11/24 17:29:16 sicotte
-* Added LnFactorial function
-*
-* Revision 6.2 1997/11/26 21:26:18 vakatov
-* Fixed errors and warnings issued by C and C++ (GNU and Sun) compilers
-*
-* Revision 6.1 1997/10/31 16:22:49 madden
-* Limited the loop in Log1p to 500 iterations
-*
-* Revision 6.0 1997/08/25 18:16:35 madden
-* Revision changed to 6.0
-*
-* Revision 5.4 1997/01/31 22:21:40 kans
-* had to remove <fp.h> and define HUGE_VAL inline, because of a conflict
-* with <math.h> in 68K CodeWarrior 11
-*
- * Revision 5.3 1997/01/28 22:57:57 kans
- * include <fp.h> for CodeWarrior to get HUGE_VAL
+/* $Id: ncbi_math.c,v 1.9 2004/06/08 17:30:07 dondosha Exp $
+ * ===========================================================================
*
- * Revision 5.2 1996/12/03 21:48:33 vakatov
- * Adopted for 32-bit MS-Windows DLLs
+ * PUBLIC DOMAIN NOTICE
+ * National Center for Biotechnology Information
*
- * Revision 5.1 1996/06/20 14:08:00 madden
- * Changed int to Int4, double to FloatHi
+ * This software/database is a "United States Government Work" under the
+ * terms of the United States Copyright Act. It was written as part of
+ * the author's official duties as a United States Government employee and
+ * thus cannot be copyrighted. This software/database is freely available
+ * to the public for use. The National Library of Medicine and the U.S.
+ * Government have not placed any restriction on its use or reproduction.
*
- * Revision 5.0 1996/05/28 13:18:57 ostell
- * Set to revision 5.0
+ * Although all reasonable efforts have been taken to ensure the accuracy
+ * and reliability of the software and data, the NLM and the U.S.
+ * Government do not and cannot warrant the performance or results that
+ * may be obtained by using this software or data. The NLM and the U.S.
+ * Government disclaim all warranties, express or implied, including
+ * warranties of performance, merchantability or fitness for any particular
+ * purpose.
*
- * Revision 4.1 1996/03/06 19:47:15 epstein
- * fix problem observed by Epstein & fixed by Spouge in log calculation
+ * Please cite the author in any work or product based on this material.
*
- * Revision 4.0 1995/07/26 13:46:50 ostell
- * force revision to 4.0
+ * ===========================================================================
*
- * Revision 2.11 1995/05/15 18:45:58 ostell
- * added Log line
+ * Authors: Gish, Kans, Ostell, Schuler
*
-*
-*
-* ==========================================================================
-*/
+ * Version Creation Date: 10/23/91
+ *
+ * ==========================================================================
+ */
+
+/** @file ncbi_math.c
+ * Definitions for portable math library (ported from C Toolkit)
+ * @todo FIXME doxygen comments and formatting
+ */
+
+static char const rcsid[] =
+ "$Id: ncbi_math.c,v 1.9 2004/06/08 17:30:07 dondosha Exp $";
#define THIS_MODULE g_corelib
#define THIS_FILE _this_file
@@ -405,7 +335,7 @@ extern double BLAST_Factorial(Int4 n)
if (n >= 0) {
if (n <= nlim)
return precomputed[n];
- if (n < DIM(precomputed)) {
+ if ((Uint4)n < DIM(precomputed)) {
for (x = precomputed[m = nlim]; m < n; ) {
++m;
precomputed[m] = (x *= m);
@@ -428,7 +358,7 @@ extern double BLAST_LnGammaInt(Int4 n)
if (n >= 0) {
if (n <= nlim)
return precomputed[n];
- if (n < DIM(precomputed)) {
+ if ((Uint4)n < DIM(precomputed)) {
for (m = nlim; m < n; ++m) {
precomputed[m+1] = log(BLAST_Factorial(m));
}
@@ -605,3 +535,44 @@ extern double BLAST_LnFactorial (double x) {
return LnGamma(x+1.0);
}
+
+/*
+ * ===========================================================================
+ *
+ * $Log: ncbi_math.c,v $
+ * Revision 1.9 2004/06/08 17:30:07 dondosha
+ * Compiler warnings fixes
+ *
+ * Revision 1.8 2004/05/19 14:52:03 camacho
+ * 1. Added doxygen tags to enable doxygen processing of algo/blast/core
+ * 2. Standardized copyright, CVS $Id string, $Log and rcsid formatting and i
+ * location
+ * 3. Added use of @todo doxygen keyword
+ *
+ * Revision 1.7 2003/12/05 16:03:57 camacho
+ * Remove compiler warnings
+ *
+ * Revision 1.6 2003/09/26 20:39:32 dondosha
+ * Rearranged code so it compiles
+ *
+ * Revision 1.5 2003/09/26 19:01:59 madden
+ * Prefix ncbimath functions with BLAST_
+ *
+ * Revision 1.4 2003/09/10 21:36:29 dondosha
+ * Removed Nlm_ prefix from math functions definitions
+ *
+ * Revision 1.3 2003/08/25 22:32:51 dondosha
+ * Added #ifndef for definition of DBL_EPSILON
+ *
+ * Revision 1.2 2003/08/11 15:02:00 dondosha
+ * Added algo/blast/core to all #included headers
+ *
+ * Revision 1.1 2003/08/02 16:31:48 camacho
+ * Moved ncbimath.c -> ncbi_math.c
+ *
+ * Revision 1.1 2003/08/01 21:03:46 madden
+ * Cleaned up version of file for C++ toolkit
+ *
+ * ===========================================================================
+ */
+
diff --git a/algo/blast/core/ncbi_math.h b/algo/blast/core/ncbi_math.h
index 1c5bfcc8..220b0fb1 100644
--- a/algo/blast/core/ncbi_math.h
+++ b/algo/blast/core/ncbi_math.h
@@ -1,94 +1,39 @@
-/* ncbimath.h
-* ===========================================================================
-*
-* PUBLIC DOMAIN NOTICE
-* National Center for Biotechnology Information
-*
-* This software/database is a "United States Government Work" under the
-* terms of the United States Copyright Act. It was written as part of
-* the author's official duties as a United States Government employee and
-* thus cannot be copyrighted. This software/database is freely available
-* to the public for use. The National Library of Medicine and the U.S.
-* Government have not placed any restriction on its use or reproduction.
-*
-* Although all reasonable efforts have been taken to ensure the accuracy
-* and reliability of the software and data, the NLM and the U.S.
-* Government do not and cannot warrant the performance or results that
-* may be obtained by using this software or data. The NLM and the U.S.
-* Government disclaim all warranties, express or implied, including
-* warranties of performance, merchantability or fitness for any particular
-* purpose.
-*
-* Please cite the author in any work or product based on this material.
-*
-* ===========================================================================
-*
-* File Name: ncbimath.h
-*
-* Author: Gish, Kans, Ostell, Schuler
-*
-* Version Creation Date: 10/23/91
-*
-* $Revision: 1.6 $
-*
-* File Description:
-* prototypes for portable math library
-*
-* Modifications:
-* --------------------------------------------------------------------------
-* Date Name Description of modification
-* ------- ---------- -----------------------------------------------------
-*
-* $Log: ncbi_math.h,v $
-* Revision 1.6 2003/09/26 20:38:12 dondosha
-* Returned prototype for the factorial function (BLAST_Factorial)
-*
-* Revision 1.5 2003/09/26 19:02:31 madden
-* Prefix ncbimath functions with BLAST_
-*
-* Revision 1.4 2003/09/10 21:35:20 dondosha
-* Removed Nlm_ prefix from math functions
-*
-* Revision 1.3 2003/08/25 22:30:24 dondosha
-* Added LnGammaInt definition and Factorial prototype
-*
-* Revision 1.2 2003/08/11 14:57:16 dondosha
-* Added algo/blast/core path to all #included headers
-*
-* Revision 1.1 2003/08/02 16:32:11 camacho
-* Moved ncbimath.h -> ncbi_math.h
-*
-* Revision 1.2 2003/08/01 21:18:48 dondosha
-* Correction of a #include
-*
-* Revision 1.1 2003/08/01 21:03:40 madden
-* Cleaned up version of file for C++ toolkit
-*
-* Revision 6.1 1999/11/24 17:29:16 sicotte
-* Added LnFactorial function
-*
-* Revision 6.0 1997/08/25 18:16:37 madden
-* Revision changed to 6.0
-*
-* Revision 5.2 1996/12/03 21:48:33 vakatov
-* Adopted for 32-bit MS-Windows DLLs
-*
- * Revision 5.1 1996/06/20 14:08:00 madden
- * Changed int to Int4, double to FloatHi
- *
- * Revision 5.0 1996/05/28 13:18:57 ostell
- * Set to revision 5.0
- *
- * Revision 4.0 1995/07/26 13:46:50 ostell
- * force revision to 4.0
- *
- * Revision 2.4 1995/05/15 18:45:58 ostell
- * added Log line
- *
-*
-*
-* ==========================================================================
-*/
+/* $Id: ncbi_math.h,v 1.7 2004/05/19 14:52:01 camacho Exp $
+ * ===========================================================================
+ *
+ * PUBLIC DOMAIN NOTICE
+ * National Center for Biotechnology Information
+ *
+ * This software/database is a "United States Government Work" under the
+ * terms of the United States Copyright Act. It was written as part of
+ * the author's official duties as a United States Government employee and
+ * thus cannot be copyrighted. This software/database is freely available
+ * to the public for use. The National Library of Medicine and the U.S.
+ * Government have not placed any restriction on its use or reproduction.
+ *
+ * Although all reasonable efforts have been taken to ensure the accuracy
+ * and reliability of the software and data, the NLM and the U.S.
+ * Government do not and cannot warrant the performance or results that
+ * may be obtained by using this software or data. The NLM and the U.S.
+ * Government disclaim all warranties, express or implied, including
+ * warranties of performance, merchantability or fitness for any particular
+ * purpose.
+ *
+ * Please cite the author in any work or product based on this material.
+ *
+ * ===========================================================================
+ *
+ * Authors: Gish, Kans, Ostell, Schuler
+ *
+ * Version Creation Date: 10/23/91
+ *
+ * ==========================================================================
+ */
+
+/** @file ncbi_math.h
+ * Prototypes for portable math library (ported from C Toolkit)
+ * @todo FIXME doxygen comments
+ */
#include <algo/blast/core/ncbi_std.h>
@@ -159,6 +104,43 @@ extern double BLAST_Powi (double x, Int4 n);
}
#endif
+/*
+ * ===========================================================================
+ *
+ * $Log: ncbi_math.h,v $
+ * Revision 1.7 2004/05/19 14:52:01 camacho
+ * 1. Added doxygen tags to enable doxygen processing of algo/blast/core
+ * 2. Standardized copyright, CVS $Id string, $Log and rcsid formatting and i
+ * location
+ * 3. Added use of @todo doxygen keyword
+ *
+ * Revision 1.6 2003/09/26 20:38:12 dondosha
+ * Returned prototype for the factorial function (BLAST_Factorial)
+ *
+ * Revision 1.5 2003/09/26 19:02:31 madden
+ * Prefix ncbimath functions with BLAST_
+ *
+ * Revision 1.4 2003/09/10 21:35:20 dondosha
+ * Removed Nlm_ prefix from math functions
+ *
+ * Revision 1.3 2003/08/25 22:30:24 dondosha
+ * Added LnGammaInt definition and Factorial prototype
+ *
+ * Revision 1.2 2003/08/11 14:57:16 dondosha
+ * Added algo/blast/core path to all #included headers
+ *
+ * Revision 1.1 2003/08/02 16:32:11 camacho
+ * Moved ncbimath.h -> ncbi_math.h
+ *
+ * Revision 1.2 2003/08/01 21:18:48 dondosha
+ * Correction of a #include
+ *
+ * Revision 1.1 2003/08/01 21:03:40 madden
+ * Cleaned up version of file for C++ toolkit
+ *
+ * ===========================================================================
+ */
+
#endif /* !_NCBIMATH_ */
diff --git a/algo/blast/core/ncbi_std.c b/algo/blast/core/ncbi_std.c
index 7935e05a..2524da31 100644
--- a/algo/blast/core/ncbi_std.c
+++ b/algo/blast/core/ncbi_std.c
@@ -1,3 +1,37 @@
+/* $Id: ncbi_std.c,v 1.13 2004/05/19 14:52:03 camacho Exp $
+ * ===========================================================================
+ *
+ * PUBLIC DOMAIN NOTICE
+ * National Center for Biotechnology Information
+ *
+ * This software/database is a "United States Government Work" under the
+ * terms of the United States Copyright Act. It was written as part of
+ * the author's offical duties as a United States Government employee and
+ * thus cannot be copyrighted. This software/database is freely available
+ * to the public for use. The National Library of Medicine and the U.S.
+ * Government have not placed any restriction on its use or reproduction.
+ *
+ * Although all reasonable efforts have been taken to ensure the accuracy
+ * and reliability of the software and data, the NLM and the U.S.
+ * Government do not and cannot warrant the performance or results that
+ * may be obtained by using this software or data. The NLM and the U.S.
+ * Government disclaim all warranties, express or implied, including
+ * warranties of performance, merchantability or fitness for any particular
+ * purpose.
+ *
+ * Please cite the author in any work or product based on this material.
+ *
+ * ===========================================================================
+ *
+ */
+
+/** @file ncbi_std.c
+ * Function definitions for toolkit independent utility functions?
+ */
+
+static char const rcsid[] =
+ "$Id: ncbi_std.c,v 1.13 2004/05/19 14:52:03 camacho Exp $";
+
#include <algo/blast/core/blast_def.h> /* for sfree() macro */
#include <algo/blast/core/ncbi_std.h>
@@ -12,7 +46,7 @@ void * BlastMemDup (const void *orig, size_t size)
return NULL;
memcpy(copy, orig, size);
- return copy;
+ return copy;
}
/*****************************************************************************
diff --git a/algo/blast/core/ncbi_std.h b/algo/blast/core/ncbi_std.h
index ba1e7b17..4af33fca 100644
--- a/algo/blast/core/ncbi_std.h
+++ b/algo/blast/core/ncbi_std.h
@@ -1,42 +1,39 @@
-/* $Id: ncbi_std.h,v 1.27 2004/04/09 13:41:53 coulouri Exp $
-* ===========================================================================
-*
-* PUBLIC DOMAIN NOTICE
-* National Center for Biotechnology Information
-*
-* This software/database is a "United States Government Work" under the
-* terms of the United States Copyright Act. It was written as part of
-* the author's offical duties as a United States Government employee and
-* thus cannot be copyrighted. This software/database is freely available
-* to the public for use. The National Library of Medicine and the U.S.
-* Government have not placed any restriction on its use or reproduction.
-*
-* Although all reasonable efforts have been taken to ensure the accuracy
-* and reliability of the software and data, the NLM and the U.S.
-* Government do not and cannot warrant the performance or results that
-* may be obtained by using this software or data. The NLM and the U.S.
-* Government disclaim all warranties, express or implied, including
-* warranties of performance, merchantability or fitness for any particular
-* purpose.
-*
-* Please cite the author in any work or product based on this material.
-*
-* ===========================================================================*/
-
-/*****************************************************************************
-
-File name: ncbi_std.h
-
-Author: Ilya Dondoshansky
-
-Contents: Type and macro definitions from C toolkit that are not defined in
- C++ toolkit.
-
-Detailed Contents:
-
-******************************************************************************
- * $Revision: 1.27 $
- * */
+/* $Id: ncbi_std.h,v 1.28 2004/05/19 14:52:01 camacho Exp $
+ * ===========================================================================
+ *
+ * PUBLIC DOMAIN NOTICE
+ * National Center for Biotechnology Information
+ *
+ * This software/database is a "United States Government Work" under the
+ * terms of the United States Copyright Act. It was written as part of
+ * the author's offical duties as a United States Government employee and
+ * thus cannot be copyrighted. This software/database is freely available
+ * to the public for use. The National Library of Medicine and the U.S.
+ * Government have not placed any restriction on its use or reproduction.
+ *
+ * Although all reasonable efforts have been taken to ensure the accuracy
+ * and reliability of the software and data, the NLM and the U.S.
+ * Government do not and cannot warrant the performance or results that
+ * may be obtained by using this software or data. The NLM and the U.S.
+ * Government disclaim all warranties, express or implied, including
+ * warranties of performance, merchantability or fitness for any particular
+ * purpose.
+ *
+ * Please cite the author in any work or product based on this material.
+ *
+ * ===========================================================================
+ *
+ * Author: Ilya Dondoshansky
+ *
+ */
+
+/** @file ncbi_std.h
+ * Type and macro definitions from C toolkit that are not defined in C++
+ * toolkit.
+ */
+
+
+
#ifndef __NCBI_STD__
#define __NCBI_STD__
diff --git a/algo/blast/core/pattern.c b/algo/blast/core/pattern.c
index b569520b..ea0ca786 100644
--- a/algo/blast/core/pattern.c
+++ b/algo/blast/core/pattern.c
@@ -1,44 +1,43 @@
-/* $Id: pattern.c,v 1.8 2003/12/04 16:27:13 camacho Exp $
-* ===========================================================================
-*
-* PUBLIC DOMAIN NOTICE
-* National Center for Biotechnology Information
-*
-* This software/database is a "United States Government Work" under the
-* terms of the United States Copyright Act. It was written as part of
-* the author's offical duties as a United States Government employee and
-* thus cannot be copyrighted. This software/database is freely available
-* to the public for use. The National Library of Medicine and the U.S.
-* Government have not placed any restriction on its use or reproduction.
-*
-* Although all reasonable efforts have been taken to ensure the accuracy
-* and reliability of the software and data, the NLM and the U.S.
-* Government do not and cannot warrant the performance or results that
-* may be obtained by using this software or data. The NLM and the U.S.
-* Government disclaim all warranties, express or implied, including
-* warranties of performance, merchantability or fitness for any particular
-* purpose.
-*
-* Please cite the author in any work or product based on this material.
-*
-* ===========================================================================*/
+/* $Id: pattern.c,v 1.9 2004/05/19 14:52:03 camacho Exp $
+ * ===========================================================================
+ *
+ * PUBLIC DOMAIN NOTICE
+ * National Center for Biotechnology Information
+ *
+ * This software/database is a "United States Government Work" under the
+ * terms of the United States Copyright Act. It was written as part of
+ * the author's offical duties as a United States Government employee and
+ * thus cannot be copyrighted. This software/database is freely available
+ * to the public for use. The National Library of Medicine and the U.S.
+ * Government have not placed any restriction on its use or reproduction.
+ *
+ * Although all reasonable efforts have been taken to ensure the accuracy
+ * and reliability of the software and data, the NLM and the U.S.
+ * Government do not and cannot warrant the performance or results that
+ * may be obtained by using this software or data. The NLM and the U.S.
+ * Government disclaim all warranties, express or implied, including
+ * warranties of performance, merchantability or fitness for any particular
+ * purpose.
+ *
+ * Please cite the author in any work or product based on this material.
+ *
+ * ===========================================================================
+ *
+ * Author: Ilya Dondoshansky
+ *
+ */
-/*****************************************************************************
+/** @file pattern.c
+ * Functions for finding pattern matches in sequence.
+ * @todo FIXME needs doxygen comments and lines shorter than 80 characters
+ */
-File name: pattern.c
-
-Author: Ilya Dondoshansky
-
-Contents: Functions for finding pattern matches in sequence.
-
-******************************************************************************
- * $Revision: 1.8 $
- * */
+static char const rcsid[] =
+ "$Id: pattern.c,v 1.9 2004/05/19 14:52:03 camacho Exp $";
#include <algo/blast/core/blast_def.h>
#include <algo/blast/core/pattern.h>
-static char const rcsid[] = "$Id: pattern.c,v 1.8 2003/12/04 16:27:13 camacho Exp $";
/*Looks for 1 bits in the same position of s and mask
Let rightOne be the rightmost position where s and mask both have
diff --git a/algo/blast/core/pattern.h b/algo/blast/core/pattern.h
index 48b23448..256f4fbd 100644
--- a/algo/blast/core/pattern.h
+++ b/algo/blast/core/pattern.h
@@ -1,41 +1,35 @@
-/* $Id: pattern.h,v 1.2 2003/12/03 16:22:55 dondosha Exp $
+/* $Id: pattern.h,v 1.4 2004/05/19 15:06:10 dondosha Exp $
+ * ===========================================================================
+ *
+ * PUBLIC DOMAIN NOTICE
+ * National Center for Biotechnology Information
+ *
+ * This software/database is a "United States Government Work" under the
+ * terms of the United States Copyright Act. It was written as part of
+ * the author's offical duties as a United States Government employee and
+ * thus cannot be copyrighted. This software/database is freely available
+ * to the public for use. The National Library of Medicine and the U.S.
+ * Government have not placed any restriction on its use or reproduction.
+ *
+ * Although all reasonable efforts have been taken to ensure the accuracy
+ * and reliability of the software and data, the NLM and the U.S.
+ * Government do not and cannot warrant the performance or results that
+ * may be obtained by using this software or data. The NLM and the U.S.
+ * Government disclaim all warranties, express or implied, including
+ * warranties of performance, merchantability or fitness for any particular
+ * purpose.
+ *
+ * Please cite the author in any work or product based on this material.
+ *
+ * ===========================================================================
+ *
+ * Author: Ilya Dondoshansky
+ *
+ */
-* ===========================================================================
-*
-* PUBLIC DOMAIN NOTICE
-* National Center for Biotechnology Information
-*
-* This software/database is a "United States Government Work" under the
-* terms of the United States Copyright Act. It was written as part of
-* the author's offical duties as a United States Government employee and
-* thus cannot be copyrighted. This software/database is freely available
-* to the public for use. The National Library of Medicine and the U.S.
-* Government have not placed any restriction on its use or reproduction.
-*
-* Although all reasonable efforts have been taken to ensure the accuracy
-* and reliability of the software and data, the NLM and the U.S.
-* Government do not and cannot warrant the performance or results that
-* may be obtained by using this software or data. The NLM and the U.S.
-* Government disclaim all warranties, express or implied, including
-* warranties of performance, merchantability or fitness for any particular
-* purpose.
-*
-* Please cite the author in any work or product based on this material.
-*
-* ===========================================================================
-*****************************************************************************
-
-File name: pattern.h
-
-Author: Ilya Dondoshansky
-
-Contents: Functions for finding pattern matches in sequence.
-
-Detailed Contents:
-
-******************************************************************************
- * $Revision: 1.2 $
- * */
+/** @file pattern.h
+ * Functions for finding pattern matches in sequence (PHI-BLAST).
+ */
#include <algo/blast/core/blast_def.h>
#include <algo/blast/core/blast_options.h>
@@ -47,6 +41,8 @@ Detailed Contents:
extern "C" {
#endif
+/** @todo: FIXME comment #defines */
+
#define BUF_SIZE 100
#define ASCII_SIZE 256
#define BITS_PACKED_PER_WORD 30
@@ -106,20 +102,6 @@ typedef struct patternSearchItems {
Int4 wildcardProduct; /**< Product of wildcard lengths*/
} patternSearchItems;
-typedef struct seedSearchItems {
- double charMultiple[ALPHABET_SIZE];
- double paramC; /*used in e-value computation*/
- double paramLambda; /*used in e-value computation*/
- double paramK; /*used in the bit score computation*/
- Int4 cutoffScore; /*lower bound for what is a hit*/
- double standardProb[ALPHABET_SIZE]; /*probability of each letter*/
- char order[ASCII_SIZE];
- char pchars[ALPHABET_SIZE+1];
- char name_space[BUF_SIZE]; /*name of a pattern*/
- char pat_space[PATTERN_SPACE_SIZE]; /*string description
- of pattern*/
-} seedSearchItems;
-
/** Find the places where the pattern matches seq;
* 3 different methods are used depending on the length of the pattern.
* @param hitArray Stores the results as pairs of positions in consecutive
diff --git a/algo/blast/core/phi_extend.c b/algo/blast/core/phi_extend.c
index a0101a1e..8ea80c09 100644
--- a/algo/blast/core/phi_extend.c
+++ b/algo/blast/core/phi_extend.c
@@ -1,51 +1,50 @@
-/* $Id: phi_extend.c,v 1.4 2004/03/09 22:37:26 dondosha Exp $
-* ===========================================================================
-*
-* PUBLIC DOMAIN NOTICE
-* National Center for Biotechnology Information
-*
-* This software/database is a "United States Government Work" under the
-* terms of the United States Copyright Act. It was written as part of
-* the author's offical duties as a United States Government employee and
-* thus cannot be copyrighted. This software/database is freely available
-* to the public for use. The National Library of Medicine and the U.S.
-* Government have not placed any restriction on its use or reproduction.
-*
-* Although all reasonable efforts have been taken to ensure the accuracy
-* and reliability of the software and data, the NLM and the U.S.
-* Government do not and cannot warrant the performance or results that
-* may be obtained by using this software or data. The NLM and the U.S.
-* Government disclaim all warranties, express or implied, including
-* warranties of performance, merchantability or fitness for any particular
-* purpose.
-*
-* Please cite the author in any work or product based on this material.
-*
-* ===========================================================================*/
+/* $Id: phi_extend.c,v 1.7 2004/05/19 14:52:03 camacho Exp $
+ * ===========================================================================
+ *
+ * PUBLIC DOMAIN NOTICE
+ * National Center for Biotechnology Information
+ *
+ * This software/database is a "United States Government Work" under the
+ * terms of the United States Copyright Act. It was written as part of
+ * the author's offical duties as a United States Government employee and
+ * thus cannot be copyrighted. This software/database is freely available
+ * to the public for use. The National Library of Medicine and the U.S.
+ * Government have not placed any restriction on its use or reproduction.
+ *
+ * Although all reasonable efforts have been taken to ensure the accuracy
+ * and reliability of the software and data, the NLM and the U.S.
+ * Government do not and cannot warrant the performance or results that
+ * may be obtained by using this software or data. The NLM and the U.S.
+ * Government disclaim all warranties, express or implied, including
+ * warranties of performance, merchantability or fitness for any particular
+ * purpose.
+ *
+ * Please cite the author in any work or product based on this material.
+ *
+ * ===========================================================================
+ *
+ * Author: Ilya Dondoshansky
+ *
+ */
-/*****************************************************************************
+/** @file phi_extend.c
+ * Word finder functions for PHI-BLAST
+ */
-File name: phi_extend.c
-
-Author: Ilya Dondoshansky
-
-Contents: Word finder functions for PHI-BLAST
-
-******************************************************************************
- * $Revision: 1.4 $
- * */
+static char const rcsid[] =
+ "$Id: phi_extend.c,v 1.7 2004/05/19 14:52:03 camacho Exp $";
#include <algo/blast/core/blast_def.h>
#include <algo/blast/core/phi_lookup.h>
#include <algo/blast/core/phi_extend.h>
-static char const rcsid[] = "$Id: phi_extend.c,v 1.4 2004/03/09 22:37:26 dondosha Exp $";
-Int4 PHIBlastWordFinder(BLAST_SequenceBlk* subject,
+Int2 PHIBlastWordFinder(BLAST_SequenceBlk* subject,
BLAST_SequenceBlk* query, LookupTableWrap* lookup_wrap,
Int4** matrix, const BlastInitialWordParameters* word_params,
- BLAST_ExtendWord* ewp, Uint4* query_offsets, Uint4* subject_offsets,
- Int4 max_hits, BlastInitHitList* init_hitlist)
+ Blast_ExtendWord* ewp, Uint4* query_offsets, Uint4* subject_offsets,
+ Int4 max_hits, BlastInitHitList* init_hitlist,
+ BlastUngappedStats* ungapped_stats)
{
PHILookupTable* lookup = (PHILookupTable*) lookup_wrap->lut;
Int4 hits=0;
@@ -71,5 +70,7 @@ Int4 PHIBlastWordFinder(BLAST_SequenceBlk* subject,
lookup->lengths[query_offsets[i]], 0);
} /* end for */
} /* end while */
- return totalhits;
+
+ Blast_UngappedStatsUpdate(ungapped_stats, totalhits, totalhits, totalhits);
+ return 0;
}
diff --git a/algo/blast/core/phi_extend.h b/algo/blast/core/phi_extend.h
index 1312d17e..5878b731 100644
--- a/algo/blast/core/phi_extend.h
+++ b/algo/blast/core/phi_extend.h
@@ -1,41 +1,35 @@
-/* $Id: phi_extend.h,v 1.2 2004/03/09 22:37:11 dondosha Exp $
-
-* ===========================================================================
-*
-* PUBLIC DOMAIN NOTICE
-* National Center for Biotechnology Information
-*
-* This software/database is a "United States Government Work" under the
-* terms of the United States Copyright Act. It was written as part of
-* the author's offical duties as a United States Government employee and
-* thus cannot be copyrighted. This software/database is freely available
-* to the public for use. The National Library of Medicine and the U.S.
-* Government have not placed any restriction on its use or reproduction.
-*
-* Although all reasonable efforts have been taken to ensure the accuracy
-* and reliability of the software and data, the NLM and the U.S.
-* Government do not and cannot warrant the performance or results that
-* may be obtained by using this software or data. The NLM and the U.S.
-* Government disclaim all warranties, express or implied, including
-* warranties of performance, merchantability or fitness for any particular
-* purpose.
-*
-* Please cite the author in any work or product based on this material.
-*
-* ===========================================================================
-*****************************************************************************
-
-File name: phi_extend.h
-
-Author: Ilya Dondoshansky
-
-Contents: Word finder for PHI-BLAST
-
-Detailed Contents:
-
-******************************************************************************
- * $Revision: 1.2 $
- * */
+/* $Id: phi_extend.h,v 1.5 2004/05/19 14:52:01 camacho Exp $
+ * ===========================================================================
+ *
+ * PUBLIC DOMAIN NOTICE
+ * National Center for Biotechnology Information
+ *
+ * This software/database is a "United States Government Work" under the
+ * terms of the United States Copyright Act. It was written as part of
+ * the author's offical duties as a United States Government employee and
+ * thus cannot be copyrighted. This software/database is freely available
+ * to the public for use. The National Library of Medicine and the U.S.
+ * Government have not placed any restriction on its use or reproduction.
+ *
+ * Although all reasonable efforts have been taken to ensure the accuracy
+ * and reliability of the software and data, the NLM and the U.S.
+ * Government do not and cannot warrant the performance or results that
+ * may be obtained by using this software or data. The NLM and the U.S.
+ * Government disclaim all warranties, express or implied, including
+ * warranties of performance, merchantability or fitness for any particular
+ * purpose.
+ *
+ * Please cite the author in any work or product based on this material.
+ *
+ * ===========================================================================
+ *
+ * Author: Ilya Dondoshansky
+ *
+ */
+
+/** @file phi_extend.h
+ * Word finder for PHI-BLAST
+ */
#include <algo/blast/core/blast_extend.h>
#include <algo/blast/core/blast_util.h>
@@ -47,14 +41,15 @@ Detailed Contents:
extern "C" {
#endif
-Int4 PHIBlastWordFinder(BLAST_SequenceBlk* subject,
+Int2 PHIBlastWordFinder(BLAST_SequenceBlk* subject,
BLAST_SequenceBlk* query, LookupTableWrap* lookup_wrap,
Int4** matrix, const BlastInitialWordParameters* word_params,
- BLAST_ExtendWord* ewp, Uint4* q_offsets, Uint4* s_offsets,
- Int4 max_hits, BlastInitHitList* init_hitlist);
+ Blast_ExtendWord* ewp, Uint4* q_offsets, Uint4* s_offsets,
+ Int4 max_hits, BlastInitHitList* init_hitlist,
+ BlastUngappedStats* ungapped_stats);
#ifdef __cplusplus
}
#endif
-#endif /* PHI_LOOKUP__H */
+#endif /* PHI_EXTEND__H */
diff --git a/algo/blast/core/phi_lookup.c b/algo/blast/core/phi_lookup.c
index 1f9d7ea7..d2b4cb32 100644
--- a/algo/blast/core/phi_lookup.c
+++ b/algo/blast/core/phi_lookup.c
@@ -1,39 +1,39 @@
-/* $Id: phi_lookup.c,v 1.12 2004/04/05 16:09:27 camacho Exp $
-* ===========================================================================
-*
-* PUBLIC DOMAIN NOTICE
-* National Center for Biotechnology Information
-*
-* This software/database is a "United States Government Work" under the
-* terms of the United States Copyright Act. It was written as part of
-* the author's offical duties as a United States Government employee and
-* thus cannot be copyrighted. This software/database is freely available
-* to the public for use. The National Library of Medicine and the U.S.
-* Government have not placed any restriction on its use or reproduction.
-*
-* Although all reasonable efforts have been taken to ensure the accuracy
-* and reliability of the software and data, the NLM and the U.S.
-* Government do not and cannot warrant the performance or results that
-* may be obtained by using this software or data. The NLM and the U.S.
-* Government disclaim all warranties, express or implied, including
-* warranties of performance, merchantability or fitness for any particular
-* purpose.
-*
-* Please cite the author in any work or product based on this material.
-*
-* ===========================================================================*/
-
-/*****************************************************************************
-
-File name: phi_lookup.c
-
-Author: Ilya Dondoshansky
-
-Contents: Functions for accessing the lookup table for PHI-BLAST
-
-******************************************************************************
- * $Revision: 1.12 $
- * */
+/* $Id: phi_lookup.c,v 1.15 2004/06/08 17:30:07 dondosha Exp $
+ * ===========================================================================
+ *
+ * PUBLIC DOMAIN NOTICE
+ * National Center for Biotechnology Information
+ *
+ * This software/database is a "United States Government Work" under the
+ * terms of the United States Copyright Act. It was written as part of
+ * the author's offical duties as a United States Government employee and
+ * thus cannot be copyrighted. This software/database is freely available
+ * to the public for use. The National Library of Medicine and the U.S.
+ * Government have not placed any restriction on its use or reproduction.
+ *
+ * Although all reasonable efforts have been taken to ensure the accuracy
+ * and reliability of the software and data, the NLM and the U.S.
+ * Government do not and cannot warrant the performance or results that
+ * may be obtained by using this software or data. The NLM and the U.S.
+ * Government disclaim all warranties, express or implied, including
+ * warranties of performance, merchantability or fitness for any particular
+ * purpose.
+ *
+ * Please cite the author in any work or product based on this material.
+ *
+ * ===========================================================================
+ *
+ * Author: Ilya Dondoshansky
+ *
+ */
+
+/** @file phi_lookup.c
+ * Functions for accessing the lookup table for PHI-BLAST
+ * @todo FIXME needs doxygen comments and lines shorter than 80 characters
+ */
+
+static char const rcsid[] =
+ "$Id: phi_lookup.c,v 1.15 2004/06/08 17:30:07 dondosha Exp $";
#include <algo/blast/core/blast_def.h>
#include <algo/blast/core/blast_util.h>
@@ -41,11 +41,24 @@ Contents: Functions for accessing the lookup table for PHI-BLAST
#include <algo/blast/core/phi_lookup.h>
#include <algo/blast/core/blast_message.h>
-static char const rcsid[] = "$Id: phi_lookup.c,v 1.12 2004/04/05 16:09:27 camacho Exp $";
#define seedepsilon 0.00001
#define allone ((1 << ALPHABET_SIZE) - 1)
+typedef struct seedSearchItems {
+ double charMultiple[ALPHABET_SIZE];
+ double paramC; /*used in e-value computation*/
+ double paramLambda; /*used in e-value computation*/
+ double paramK; /*used in the bit score computation*/
+ Int4 cutoffScore; /*lower bound for what is a hit*/
+ double standardProb[ALPHABET_SIZE]; /*probability of each letter*/
+ char order[ASCII_SIZE];
+ char pchars[ALPHABET_SIZE+1];
+ char name_space[BUF_SIZE]; /*name of a pattern*/
+ char pat_space[PATTERN_SPACE_SIZE]; /*string description
+ of pattern*/
+} seedSearchItems;
+
/*Initialize the order of letters in the alphabet, the score matrix,
and the row sums of the score matrix. matrixToFill is the
score matrix, program_flag says which variant of the program is
@@ -402,8 +415,8 @@ init_pattern(Uint1 *pattern, Boolean is_dna, BlastScoreBlk* sbp,
patternSearchItems* *pattern_info,
Blast_Message* *error_msg)
{
- Uint4 i; /*index over string describing the pattern*/
- Uint4 j; /*index for position in pattern*/
+ Int4 i; /*index over string describing the pattern*/
+ Int4 j; /*index for position in pattern*/
Int4 charIndex; /*index over characters in alphabet*/
Int4 secondIndex; /*second index into pattern*/
Int4 numIdentical; /*number of consec. positions with identical specification*/
@@ -413,7 +426,7 @@ init_pattern(Uint1 *pattern, Boolean is_dna, BlastScoreBlk* sbp,
set of characters*/
Int4 minWildcard, maxWildcard; /*used for variable number of wildcard
positions*/
- Uint4 tj=0; /*temporary copy of j*/
+ Int4 tj=0; /*temporary copy of j*/
Int4 tempInputPatternMasked[MaxP]; /*local copy of parts
of inputPatternMasked*/
Uint1 c; /*character occurring in pattern*/
@@ -447,7 +460,7 @@ init_pattern(Uint1 *pattern, Boolean is_dna, BlastScoreBlk* sbp,
patternSearch->inputPatternMasked[i] = 0;
localPattern[i] = 0;
}
- for (i = 0, j = 0; i < strlen((Char *) pattern); i++) {
+ for (i = 0, j = 0; i < (Int4)strlen((Char *) pattern); i++) {
if ((c=pattern[i]) == '-' || c == '\n' || c == '.' || c =='>' || c ==' '
|| c == '<') /*spacers that mean nothing*/
continue;
@@ -608,7 +621,7 @@ init_pattern(Uint1 *pattern, Boolean is_dna, BlastScoreBlk* sbp,
that character can occur in*/
for (charIndex = 0; charIndex < ALPHABET_SIZE; charIndex++) {
thisMask = 0;
- for (charSetMask = 0; charSetMask < j; charSetMask++) {
+ for (charSetMask = 0; charSetMask < (Uint4)j; charSetMask++) {
if ((1<< charIndex) & patternSearch->inputPatternMasked[charSetMask])
thisMask |= (1 << charSetMask);
}
diff --git a/algo/blast/core/phi_lookup.h b/algo/blast/core/phi_lookup.h
index f2cecc53..b5099552 100644
--- a/algo/blast/core/phi_lookup.h
+++ b/algo/blast/core/phi_lookup.h
@@ -1,42 +1,36 @@
-/* $Id: phi_lookup.h,v 1.3 2004/03/11 18:31:06 papadopo Exp $
-
-* ===========================================================================
-*
-* PUBLIC DOMAIN NOTICE
-* National Center for Biotechnology Information
-*
-* This software/database is a "United States Government Work" under the
-* terms of the United States Copyright Act. It was written as part of
-* the author's offical duties as a United States Government employee and
-* thus cannot be copyrighted. This software/database is freely available
-* to the public for use. The National Library of Medicine and the U.S.
-* Government have not placed any restriction on its use or reproduction.
-*
-* Although all reasonable efforts have been taken to ensure the accuracy
-* and reliability of the software and data, the NLM and the U.S.
-* Government do not and cannot warrant the performance or results that
-* may be obtained by using this software or data. The NLM and the U.S.
-* Government disclaim all warranties, express or implied, including
-* warranties of performance, merchantability or fitness for any particular
-* purpose.
-*
-* Please cite the author in any work or product based on this material.
-*
-* ===========================================================================
-*****************************************************************************
-
-File name: phi_lookup.h
-
-Author: Ilya Dondoshansky
-
-Contents: Pseudo lookup table structure and database scanning functions used
- in PHI-BLAST
-
-Detailed Contents:
+/* $Id: phi_lookup.h,v 1.4 2004/05/19 14:52:01 camacho Exp $
+ * ===========================================================================
+ *
+ * PUBLIC DOMAIN NOTICE
+ * National Center for Biotechnology Information
+ *
+ * This software/database is a "United States Government Work" under the
+ * terms of the United States Copyright Act. It was written as part of
+ * the author's offical duties as a United States Government employee and
+ * thus cannot be copyrighted. This software/database is freely available
+ * to the public for use. The National Library of Medicine and the U.S.
+ * Government have not placed any restriction on its use or reproduction.
+ *
+ * Although all reasonable efforts have been taken to ensure the accuracy
+ * and reliability of the software and data, the NLM and the U.S.
+ * Government do not and cannot warrant the performance or results that
+ * may be obtained by using this software or data. The NLM and the U.S.
+ * Government disclaim all warranties, express or implied, including
+ * warranties of performance, merchantability or fitness for any particular
+ * purpose.
+ *
+ * Please cite the author in any work or product based on this material.
+ *
+ * ===========================================================================
+ *
+ * Author: Ilya Dondoshansky
+ *
+ */
-******************************************************************************
- * $Revision: 1.3 $
- * */
+/** @file phi_lookup.h
+ * Pseudo lookup table structure and database scanning functions used in
+ * PHI-BLAST
+ */
#include <algo/blast/core/blast_def.h>
#include <algo/blast/core/blast_options.h>
diff --git a/api/alignmgr.c b/api/alignmgr.c
index 6f42f214..9d28cb5c 100644
--- a/api/alignmgr.c
+++ b/api/alignmgr.c
@@ -28,13 +28,16 @@
*
* Version Creation Date: 7/99
*
-* $Revision: 6.178 $
+* $Revision: 6.179 $
*
* File Description: SeqAlign indexing and messaging functions
*
* Modifications:
* --------------------------------------------------------------------------
* $Log: alignmgr.c,v $
+* Revision 6.179 2004/05/20 19:44:28 bollin
+* removed unused variables
+*
* Revision 6.178 2001/11/09 17:22:34 wheelan
* fixed bug in TruncateSeqAlign
*
@@ -8866,7 +8869,6 @@ NLM_EXTERN SeqAlignPtr AlnMgrGetSubAlign(SeqAlignPtr sap, SeqIdPtr which_master,
SeqAlignPtr salp;
SeqAlignPtr salp_head;
SeqAlignPtr salp_prev;
- SeqAlignPtr sap_parent;
SeqIdPtr sip;
SeqIdPtr sip_curr,sip_next;
SeqIdPtr sip_prev;
@@ -10750,7 +10752,6 @@ NLM_EXTERN Int4 AlnMgrIsEditable(SeqAlignPtr sap)
DenseDiagPtr ddp;
DenseSegPtr dsp;
Boolean gapped;
- Int4 ibm;
SeqIdPtr id_prev;
SeqAlignPtr salp;
@@ -10808,7 +10809,7 @@ NLM_EXTERN Int4 AlnMgrIsEditable(SeqAlignPtr sap)
NLM_EXTERN Int4 AlnMgrMapBioseqToBioseq(SeqAlignPtr salp,Int4 pos,Int4 source_row,Int4 target_row,Boolean GetNextNonGap,Int4Ptr PostGap) {
Int4 aln_coord,pos_target;
AlnMsgPtr amp1;
- Boolean gap,status;
+ Boolean status;
if(!salp)
return -1;
aln_coord = AlnMgrMapBioseqToSeqAlign(salp, pos, source_row, NULL);
@@ -10899,7 +10900,7 @@ NLM_EXTERN DenseSegPtr DenseDiagToGlobalDenseSeg(DenseDiagPtr ddp_head) {
NLM_EXTERN DenseDiagPtr AlnMgrSeqAlignToDDP(SeqAlignPtr sap,Int4 aln_cut_from,Int4 aln_cut_to,Int4Ptr numseg_ptr) {
AlnMsgPtr amp1;
- Int4 numseg=0,curr_m,from_q,from_s,to_q,to_s,len,i;
+ Int4 numseg=0,curr_m,from_q,to_q,len,i;
DenseDiagPtr ddp,ddp_head=NULL,ddp_last=NULL;
SeqAlignPtr salp;
Boolean gap,status;
@@ -11085,13 +11086,11 @@ NLM_EXTERN DenseDiagPtr AlnMgrSeqAlignToDDP(SeqAlignPtr sap,Int4 aln_cut_from,In
*/
NLM_EXTERN SeqAlignPtr AlnMgrMerge3OverlappingSeqAligns(SeqAlignPtr salp1,SeqAlignPtr salp_merging,SeqAlignPtr salp2,Int4 master_cut_pos1, Int4 master_cut_pos2) {
- Int4 diff;
Int4 i,aln_cut_pos1,aln_cut_pos2;
Int4 numseg=0,this_numseg;
- SeqIdPtr sip_q,sip_s;
DenseDiagPtr ddp,ddp_head=NULL,ddp_last,ddp_next;
DenseSegPtr dsp;
- Boolean gap,sstrand_plus,qstrand_plus;
+ Boolean sstrand_plus,qstrand_plus;
SeqAlignPtr sap=NULL;
diff --git a/api/alignmgr2.c b/api/alignmgr2.c
index 51461430..72563e80 100644
--- a/api/alignmgr2.c
+++ b/api/alignmgr2.c
@@ -28,13 +28,19 @@
*
* Version Creation Date: 10/01
*
-* $Revision: 6.53 $
+* $Revision: 6.55 $
*
* File Description: SeqAlign indexing, access, and manipulation functions
*
* Modifications:
* --------------------------------------------------------------------------
* $Log: alignmgr2.c,v $
+* Revision 6.55 2004/05/20 19:46:25 bollin
+* removed unused variables
+*
+* Revision 6.54 2004/05/11 13:19:49 bollin
+* update the dimension of the shared alignment after adding a sequence.
+*
* Revision 6.53 2004/04/13 14:43:07 kskatz
* Final resolution of revisions 6.51 and 6.52: reverted 6.52; then cleaned up readability of AlnMgr2SeqPortRead() and ensured that it will never call SeqPortRead for a length > AM_SEQPORTSIZE
*
@@ -1184,6 +1190,7 @@ NLM_EXTERN void AlnMgr2IndexSeqAlignEx(SeqAlignPtr sap, Boolean replace_gi)
if (replace_gi) {
SAM_ReplaceGI(sap);
}
+
AlnMgr2IndexLite(sap);
AlnMgr2DecomposeToPairwise(sap);
amaip = (AMAlignIndex2Ptr)(sap->saip);
@@ -1670,7 +1677,6 @@ static void AlnMgr2HidePairwiseConflicts(SeqAlignPtr sap)
SeqIdPtr sip12;
SeqIdPtr sip21;
SeqIdPtr sip22;
- Boolean start;
Int4 start11;
Int4 start12;
Int4 start21;
@@ -3036,7 +3042,7 @@ NLM_EXTERN void AlnMgr2AddInNewPairwiseSA(SeqAlignPtr parent, SeqAlignPtr sap)
Int4 Pos, POS, max_POS;
Int4 A_end, B_beg;
Int4 anchor, Anchor;
- Int4 max_len, row;
+ Int4 row;
SeqIdPtr sip, extra_sip;
AMSeqPieceSetPtr a_set, A_set, b_set, B_set_head, B_set;
AMSeqPiecePtr a, A, b, B;
@@ -3481,6 +3487,9 @@ NLM_EXTERN void AlnMgr2AddInNewPairwiseSA(SeqAlignPtr parent, SeqAlignPtr sap)
AMSeqPieceSetFree(b_set);
amaip->sharedaln->segs = DSP;
+ /* update the dim for the shared_aln to match the new DensegPtr */
+ amaip->sharedaln->dim = DSP->dim;
+
DenseSegFree(Dsp);
}
@@ -5619,9 +5628,7 @@ NLM_EXTERN Boolean AlnMgr2GetNextAlnBit(SeqAlignPtr sap, AlnMsg2Ptr amp) /* NEXT
Int4 endoffset;
Boolean found;
Int4 i;
- Int4 ilen;
Int4 index;
- Int4 insert;
Int4 intfrom;
Int4 intto;
Int4 j;
diff --git a/api/asn2ff1.c b/api/asn2ff1.c
index 2c05ab1c..6190cf68 100644
--- a/api/asn2ff1.c
+++ b/api/asn2ff1.c
@@ -29,8 +29,8 @@
*
* Version Creation Date: 7/15/95
*
-* $Revision: 6.116 $
-* $Revision: 6.116 $
+* $Revision: 6.117 $
+* $Revision: 6.117 $
*
* File Description: files that go with "asn2ff"
*
@@ -396,7 +396,6 @@ Asn2ffJobPtr Asn2ffJobCreate(SeqEntryPtr sep, SeqSubmitPtr ssp, SeqLocPtr slp, F
/**********************************************************/
NLM_EXTERN LinkStrPtr asn2ff_print_to_mem(Asn2ffJobPtr ajp, LinkStrPtr lsp)
{
- AsnIoPtr aip;
CharPtr string;
FFPrintArrayPtr pap = NULL;
Int4 index, pap_size;
@@ -496,7 +495,6 @@ NLM_EXTERN Boolean SeqEntryToEntrez (SeqEntryPtr sep, FILE *fp, SeqIdPtr seqid,
BioseqPtr bsp;
BioseqSetPtr bssp;
SeqLocPtr slp = NULL;
- DeltaSeqPtr dsp;
SeqLoc sl;
SeqInt seqint;
StdPrintOptionsPtr Spop = NULL;
@@ -844,9 +842,6 @@ NLM_EXTERN LinkStrPtr SeqEntryToStrArrayEx(SeqEntryPtr sep, Uint1 format,
StdPrintOptionsPtr Spop = NULL;
LinkStrPtr lsp;
LinkStrPtr tlsp;
- CharPtr PNTR res;
- CharPtr PNTR tres;
- Int4 num;
Asn2ffJobPtr ajp;
ValNodePtr v;
BioseqPtr bsp;
@@ -931,9 +926,6 @@ NLM_EXTERN LinkStrPtr SeqEntryToStrArrayQEx(SeqEntryPtr sep, Uint1 format, Int4
StdPrintOptionsPtr Spop = NULL;
LinkStrPtr lsp;
LinkStrPtr tlsp;
- CharPtr PNTR res;
- CharPtr PNTR tres;
- Int4 num;
Asn2ffJobPtr ajp;
ValNodePtr v;
BioseqPtr bsp;
@@ -1068,8 +1060,6 @@ NLM_EXTERN ByteStorePtr AjpToByteStore(Asn2ffJobPtr ajp)
StdPrintOptionsPtr Spop = NULL;
ByteStorePtr bs;
BioseqSetPtr bssp;
- SeqSubmitPtr ssp;
- SeqEntryPtr sep;
SeqEntryPtr tsep;
if(ajp == NULL)
@@ -1583,8 +1573,6 @@ NLM_EXTERN Int4 asn2ff_setup (Asn2ffJobPtr ajp, FFPrintArrayPtr PNTR papp)
{
Int4 pap_size = -1;
Asn2ffWEPtr awp;
- SeqIdPtr sip;
- Uint2 itemID;
GatherScope gs;
Uint1 focus;
BioseqPtr bsp;
@@ -2241,7 +2229,6 @@ Int4 asn2gb_setup(Asn2ffJobPtr ajp, FFPrintArrayPtr PNTR papp)
SeqIdPtr sip;
TextSeqIdPtr tsip;
BioseqPtr bsp;
- BioseqSetPtr bssp;
GetLocusPartsAwp(ajp);
if ((gbp=ajp->asn2ffwep->gbp) != NULL) {
@@ -2495,9 +2482,8 @@ Int4 asn2gb_setup(Asn2ffJobPtr ajp, FFPrintArrayPtr PNTR papp)
Int4 asn2gr_setup(Asn2ffJobPtr ajp, FFPrintArrayPtr PNTR papp)
{
FFPrintArrayPtr pap;
- Int4 index, total, pub_num, seqblks_num;
+ Int4 index, total, pub_num;
GBEntryPtr gbp;
- SeqIdPtr sip;
GetLocusPartsAwp(ajp);
total=0;
@@ -4327,7 +4313,6 @@ static void print_taxinfo(Asn2ffJobPtr ajp, GBEntryPtr gbp, OrgRefPtr orp, CharP
DbtagPtr dbp;
Int4 id = -1, gcode=1;
CharPtr organelle, taxonomy=NULL;
- OrgNamePtr onp;
static Char tmp[3];
if (orp) {
@@ -5426,8 +5411,8 @@ static void PrintSeqRegion (Asn2ffJobPtr ajp, GBEntryPtr gbp)
{
SeqPortPtr spp;
Uint1 residue;
- Char buffer[MAX_BTP_BUF], num_buffer[10];
- CharPtr ptr = &(buffer[0]), num_ptr;
+ Char buffer[MAX_BTP_BUF];
+ CharPtr ptr = &(buffer[0]);
Int4 total;
BioseqPtr bsp;
diff --git a/api/asn2ff2.c b/api/asn2ff2.c
index 04548097..4c293e5d 100644
--- a/api/asn2ff2.c
+++ b/api/asn2ff2.c
@@ -29,7 +29,7 @@
*
* Version Creation Date: 7/15/95
*
- * $Revision: 6.36 $
+ * $Revision: 6.37 $
*
* File Description:
*
@@ -39,6 +39,9 @@
* ------- ---------- -----------------------------------------------------
*
* $Log: asn2ff2.c,v $
+ * Revision 6.37 2004/06/04 18:39:14 bollin
+ * removed unused variables, fixed compiler warning
+ *
* Revision 6.36 2001/12/24 16:21:32 kans
* initialize urf in GetStrForUserObject
*
@@ -454,7 +457,9 @@ NLM_EXTERN void PrintCommentByNumber(Asn2ffJobPtr ajp, GBEntryPtr gbp)
if (gbp == NULL) {
return;
}
- for (s = gbp->comm, i=0; s && i < index; s = s->next, i++);
+ for (s = gbp->comm, i=0; s && i < index; s = s->next, i++)
+ {
+ }
newstring = CheckEndPunctuation(s->string, '.');
www_PrintComment(newstring, FALSE, (Uint1)ajp->format);
newstring = MemFree(newstring);
@@ -464,7 +469,6 @@ NLM_EXTERN void PrintFirstComment(Asn2ffJobPtr ajp, GBEntryPtr gbp)
{
CharPtr newstring;
ComStructPtr s;
- Int4 i, index = ajp->pap_index;
if (gbp == NULL) {
return;
@@ -501,10 +505,9 @@ static ComStructPtr tie_next_comm(ComStructPtr head, ComStructPtr next)
static CharPtr GetStrForBankit(UserObjectPtr uop)
{
ObjectIdPtr oip;
- UserFieldPtr ufp, tmp, u;
- CharPtr ptr=NULL, ptr1 = NULL, str;
- Int2 i=0, acclen, ptrlen = 0;
- CharPtr p;
+ UserFieldPtr ufp;
+ CharPtr ptr=NULL, ptr1 = NULL;
+ Int2 i=0, ptrlen = 0;
if ((oip = uop->type) == NULL) return NULL;
if (StringCmp(oip->str, "Submission") != 0) return NULL;
@@ -669,9 +672,7 @@ static CharPtr GetStrForMap(DbtagPtr dbtag)
static CharPtr GetEvidence(Asn2ffJobPtr ajp, GBEntryPtr gbp)
{
CharPtr retval=NULL;
- SeqDescrPtr descr;
UserObjectPtr uop=NULL;
- ObjectIdPtr oip;
ValNodePtr tvnp, ds_vnp, vnp;
DescrStructPtr dsp;
@@ -681,7 +682,7 @@ static CharPtr GetEvidence(Asn2ffJobPtr ajp, GBEntryPtr gbp)
tvnp = GatherDescrListByChoice(ajp, gbp, Seq_descr_user);
for (ds_vnp= tvnp; ds_vnp;) {
dsp = (DescrStructPtr) ds_vnp->data.ptrvalue;
- if(vnp = dsp->vnp){
+ if((vnp = dsp->vnp) != NULL){
if(!retval){
uop = (UserObjectPtr) vnp->data.ptrvalue;
retval=mRNAEvidenceComment(uop, FALSE);
@@ -703,10 +704,9 @@ static CharPtr genanreftext4 = "~Also see:~ Documentation of NCBI's Annotatio
static CharPtr GetAnnotationComment(Asn2ffJobPtr ajp, GBEntryPtr gbp)
{
- SeqDescrPtr descr;
UserObjectPtr uop=NULL;
ObjectIdPtr oip;
- UserFieldPtr ufp, tmp, u, urf;
+ UserFieldPtr ufp;
CharPtr retval = NULL, name = NULL, method = NULL;
ValNodePtr tvnp, ds_vnp, vnp;
@@ -1151,7 +1151,6 @@ NLM_EXTERN void GBDescrComFeat(Asn2ffJobPtr ajp, GBEntryPtr gbp)
NLM_EXTERN Int2 GP_GetSeqDescrComms(Asn2ffJobPtr ajp, GBEntryPtr gbp)
{
- DescrStructPtr dsp;
ComStructPtr csp;
Boolean got_comment=FALSE;
CharPtr string;
diff --git a/api/asn2gnb1.c b/api/asn2gnb1.c
index c41e695c..f1d54a05 100644
--- a/api/asn2gnb1.c
+++ b/api/asn2gnb1.c
@@ -30,7 +30,7 @@
*
* Version Creation Date: 10/21/98
*
-* $Revision: 1.9 $
+* $Revision: 1.23 $
*
* File Description: New GenBank flatfile generator - work in progress
*
@@ -1743,9 +1743,9 @@ NLM_EXTERN void DoOneSection (
{
size_t acclen;
+ Asn2gbFormatPtr afp;
IntAsn2gbJobPtr ajp;
Asn2gbSectPtr asp;
- CharPtr bases = NULL;
SeqMgrBioseqContext bcontext;
BaseBlockPtr PNTR blockArray;
SeqMgrDescContext dcontext;
@@ -1788,6 +1788,11 @@ NLM_EXTERN void DoOneSection (
asp = Asn2gbAddSection (awp);
if (asp == NULL) return;
+ afp = awp->afp;
+ if (afp != NULL) {
+ afp->asp = asp;
+ }
+
numsegs = awp->partcount;
if (numsegs == 0 && SeqMgrGetBioseqContext (parent, &bcontext)) {
numsegs = bcontext.numsegs;
@@ -1827,7 +1832,6 @@ NLM_EXTERN void DoOneSection (
asp->to = to;
iasp = (IntAsn2gbSectPtr) asp;
- iasp->spp = NULL;
asp->blockArray = NULL;
asp->numBlocks = 0;
@@ -1975,23 +1979,20 @@ NLM_EXTERN void DoOneSection (
if (awp->showconfeats) {
AddFeatureBlock (awp);
+ } else if (awp->smartconfeats && bsp->length <= 1000000) {
+ AddFeatureBlock (awp);
}
AddContigBlock (awp);
if (awp->showContigAndSeq) {
- if (awp->stream) {
- bases = DoSeqPortStream (bsp);
- }
-
if (ISA_na (bsp->mol) && ajp->gbseq == NULL) {
if (awp->showBaseCount) {
- AddBasecountBlock (awp, bases);
+ AddBasecountBlock (awp);
}
}
AddOriginBlock (awp);
- AddSequenceBlock (awp, bases);
- MemFree (bases);
+ AddSequenceBlock (awp);
}
} else {
@@ -2006,19 +2007,14 @@ NLM_EXTERN void DoOneSection (
}
}
- if (awp->stream) {
- bases = DoSeqPortStream (bsp);
- }
-
if (ISA_na (bsp->mol) && ajp->gbseq == NULL) {
if (awp->showBaseCount) {
- AddBasecountBlock (awp, bases);
+ AddBasecountBlock (awp );
}
}
AddOriginBlock (awp);
- AddSequenceBlock (awp, bases);
- MemFree (bases);
+ AddSequenceBlock (awp);
}
AddSlashBlock (awp);
@@ -2485,7 +2481,7 @@ static Boolean IsSepRefseq (
}
typedef struct modeflags {
- Boolean flags [25];
+ Boolean flags [26];
} ModeFlags, PNTR ModeFlagsPtr;
static ModeFlags flagTable [] = {
@@ -2495,28 +2491,32 @@ static ModeFlags flagTable [] = {
TRUE, TRUE, TRUE, TRUE, TRUE,
TRUE, TRUE, TRUE, TRUE, TRUE,
TRUE, TRUE, TRUE, TRUE, TRUE,
- TRUE, TRUE, TRUE, TRUE, TRUE},
+ TRUE, TRUE, TRUE, TRUE, TRUE,
+ TRUE},
/* ENTREZ_MODE */
{FALSE, TRUE, TRUE, TRUE, TRUE,
FALSE, TRUE, TRUE, TRUE, TRUE,
TRUE, TRUE, FALSE, TRUE, TRUE,
TRUE, TRUE, FALSE, FALSE, TRUE,
- TRUE, TRUE, TRUE, TRUE, FALSE},
+ TRUE, TRUE, TRUE, TRUE, TRUE,
+ FALSE},
/* SEQUIN_MODE */
{FALSE, FALSE, FALSE, FALSE, FALSE,
FALSE, FALSE, TRUE, FALSE, FALSE,
FALSE, FALSE, FALSE, FALSE, FALSE,
FALSE, FALSE, FALSE, FALSE, FALSE,
- FALSE, FALSE, FALSE, FALSE, FALSE},
+ FALSE, FALSE, FALSE, FALSE, TRUE,
+ FALSE},
/* DUMP_MODE */
{FALSE, FALSE, FALSE, FALSE, FALSE,
FALSE, FALSE, FALSE, FALSE, FALSE,
FALSE, FALSE, FALSE, FALSE, FALSE,
FALSE, FALSE, FALSE, FALSE, FALSE,
- FALSE, FALSE, FALSE, FALSE, FALSE}
+ FALSE, FALSE, FALSE, FALSE, FALSE,
+ FALSE}
};
static void SetFlagsFromMode (
@@ -2564,6 +2564,8 @@ static void SetFlagsFromMode (
ajp->flags.goQualsToNote = *(bp++);
ajp->flags.geneSynsToNote = *(bp++);
ajp->flags.selenocysteineToNote = *(bp++);
+ ajp->flags.extraProductsToNote = *(bp++);
+
ajp->flags.forGbRelease = *(bp++);
/* unapproved qualifiers suppressed for flatfile, okay for GBSeq XML */
@@ -2590,7 +2592,7 @@ static void SetFlagsFromMode (
/* collaboration unapproved Gene Ontology quals on their own line only for RefSeq */
- ajp->flags.goQualsToNote = TRUE;
+ /* ajp->flags.goQualsToNote = TRUE; */
ajp->flags.geneSynsToNote = TRUE;
}
}
@@ -2637,7 +2639,6 @@ typedef struct lookforids {
Boolean isNTorNW;
Boolean isNC;
Boolean isTPA;
- Boolean isAEorCH;
Boolean isNuc;
Boolean isProt;
} LookForIDs, PNTR LookForIDsPtr;
@@ -2662,14 +2663,6 @@ static void LookForSeqIDs (BioseqPtr bsp, Pointer userdata)
case SEQID_EMBL :
case SEQID_DDBJ :
lfip->isGED = TRUE;
- tsip = (TextSeqIdPtr) sip->data.ptrvalue;
- if (tsip != NULL) {
- if (StringNCmp (tsip->accession, "AE", 2) == 0) {
- lfip->isAEorCH = TRUE;
- } else if (StringNCmp (tsip->accession, "CH", 2) == 0) {
- lfip->isAEorCH = TRUE;
- }
- }
break;
case SEQID_TPG :
case SEQID_TPE :
@@ -2700,7 +2693,6 @@ static void LookForGEDetc (
BoolPtr isNTorNW,
BoolPtr isNC,
BoolPtr isTPA,
- BoolPtr isAEorCH,
BoolPtr isNuc,
BoolPtr isProt
)
@@ -2714,16 +2706,29 @@ static void LookForGEDetc (
*isNTorNW = lfi.isNTorNW;
*isNC = lfi.isNC;
*isTPA = lfi.isTPA;
- *isAEorCH = lfi.isAEorCH;
*isNuc = lfi.isNuc;
*isProt = lfi.isProt;
}
+static CharPtr defHead = "\
+Content-type: text/html\n\n\
+<HTML>\n\
+<HEAD><TITLE>GenBank entry</TITLE></HEAD>\n\
+<BODY>\n\
+<hr>\n\
+<pre>";
+
+static CharPtr defTail = "\
+</pre>\n\
+<hr>\n\
+</BODY>\n\
+</HTML>\n";
+
#define FEAT_FETCH_MASK (ONLY_NEAR_FEATURES | FAR_FEATURES_SUPPRESS | NEAR_FEATURES_SUPPRESS)
#define HTML_XML_ASN_MASK (CREATE_HTML_FLATFILE | CREATE_XML_GBSEQ_FILE | CREATE_ASN_GBSEQ_FILE)
#define GENE_RIF_MASK (HIDE_GENE_RIFS | ONLY_GENE_RIFS | LATEST_GENE_RIFS)
-NLM_EXTERN Asn2gbJobPtr asn2gnbk_setup (
+static Asn2gbJobPtr asn2gnbk_setup_ex (
BioseqPtr bsp,
BioseqSetPtr bssp,
SeqLocPtr slp,
@@ -2733,20 +2738,27 @@ NLM_EXTERN Asn2gbJobPtr asn2gnbk_setup (
FlgType flags,
LckType locks,
CstType custom,
- XtraPtr extra
+ XtraPtr extra,
+ Boolean stream,
+ FILE *fp,
+ AsnIoPtr aip,
+ AsnTypePtr atp
)
{
+ Asn2gbFormat af;
IntAsn2gbJobPtr ajp = NULL;
Asn2gbSectPtr asp;
Asn2gbWork aw;
BaseBlockPtr bbp;
BaseBlockPtr PNTR blockArray;
Uint2 entityID = 0;
+ CharPtr ffhead = NULL;
+ CharPtr fftail = NULL;
+ Asn2gbWriteFunc ffwrite = NULL;
GBSeqPtr gbseq = NULL;
Int4 i;
IndxPtr index = NULL;
- Boolean isAEorCH;
Boolean isGED;
Boolean isNTorNW;
Boolean isNC;
@@ -2778,6 +2790,7 @@ NLM_EXTERN Asn2gbJobPtr asn2gnbk_setup (
Boolean skipProts = FALSE;
SeqSubmitPtr ssp;
BioseqSetPtr topbssp;
+ Pointer userdata = NULL;
ValNodePtr vnp;
Boolean is_html = FALSE;
@@ -2792,8 +2805,12 @@ NLM_EXTERN Asn2gbJobPtr asn2gnbk_setup (
}
if (extra != NULL) {
+ ffwrite = extra->ffwrite;
+ ffhead = extra->ffhead;
+ fftail = extra->fftail;
index = extra->index;
gbseq = extra->gbseq;
+ userdata = extra->userdata;
}
if (slp != NULL) {
@@ -2876,11 +2893,11 @@ NLM_EXTERN Asn2gbJobPtr asn2gnbk_setup (
ajp->index = index;
ajp->gbseq = gbseq; /* gbseq output can relax srcQualsToNote or goQualsToNote strictness */
+ ajp->aip = aip;
+ ajp->atp = atp;
SetFlagsFromMode (ajp, mode);
- ajp->transientSeqPort = (Boolean) ((locks & FREE_SEQPORT_EACH_TIME) != 0);
-
lockFarComp = (Boolean) ((locks & LOCK_FAR_COMPONENTS) != 0);
lockFarLocs = (Boolean) ((locks & LOCK_FAR_LOCATIONS) != 0);
lockFarProd = (Boolean) ((locks & LOCK_FAR_PRODUCTS) != 0);
@@ -2941,44 +2958,67 @@ NLM_EXTERN Asn2gbJobPtr asn2gnbk_setup (
aw.mode = mode;
aw.style = style;
+ /* internal format pointer if writing at time of creation */
+
+ if (stream) {
+ MemSet ((Pointer) &af, 0, sizeof (Asn2gbFormat));
+ af.ajp = ajp;
+ af.asp = NULL;
+ af.qvp = NULL;
+ af.format = format;
+ af.ffwrite = ffwrite;
+ af.userdata = userdata;
+ af.fp = fp;
+ af.aip = aip;
+ af.atp = atp;
+
+ aw.afp = &af;
+ }
+
sep = GetTopSeqEntryForEntityID (entityID);
- /* special types of records override feature fetching parameters */
+ /* special types of records override feature fetching and contig display parameters */
+
+ if (mode == ENTREZ_MODE) {
+ if (! aw.showconfeats) {
+ aw.smartconfeats = TRUE; /* features suppressed if CONTIG style and length > 1 MB */
+ aw.showconfeats = FALSE;
+ aw.showconsource = FALSE;
+ }
+ }
aw.onlyNearFeats = FALSE;
aw.farFeatsSuppress = FALSE;
aw.nearFeatsSuppress = FALSE;
- LookForGEDetc (sep, &isGED, &isNTorNW, &isNC, &isTPA, &isAEorCH, &isNuc, &isProt);
- if (ajp->ajp.slp != NULL) {
- /* specified location obeys fetching parameters, for now */
- aw.onlyNearFeats = (Boolean) ((flags & FEAT_FETCH_MASK) == ONLY_NEAR_FEATURES);
- aw.farFeatsSuppress = (Boolean) ((flags & FEAT_FETCH_MASK) == FAR_FEATURES_SUPPRESS);
- aw.nearFeatsSuppress = (Boolean) ((flags & FEAT_FETCH_MASK) == NEAR_FEATURES_SUPPRESS);
- } else if (mode == ENTREZ_MODE) {
- /* entrez_mode overrides settings to avoid far fetches */
- aw.onlyNearFeats = TRUE;
- aw.showconfeats = TRUE;
- } else if (isNTorNW || isTPA) {
- aw.onlyNearFeats = TRUE;
- } else if (isAEorCH) {
- /* AE or CH are special cases in CON division */
- if ((Boolean) ((flags & FEAT_FETCH_MASK) == ONLY_NEAR_FEATURES)) {
- aw.onlyNearFeats = TRUE;
- } else {
- aw.nearFeatsSuppress = TRUE;
- }
- } else if (isNC) {
+
+ LookForGEDetc (sep, &isGED, &isNTorNW, &isNC, &isTPA, &isNuc, &isProt);
+
+ if (isNC) {
+
if ((Boolean) ((flags & FEAT_FETCH_MASK) == ONLY_NEAR_FEATURES)) {
aw.onlyNearFeats = TRUE;
} else {
aw.nearFeatsSuppress = TRUE;
}
+
+ } else if (isNTorNW || isTPA) {
+
+ aw.onlyNearFeats = TRUE;
+
+ } else if (isGED) {
+
+ aw.nearFeatsSuppress = TRUE;
+ ajp->showFarTransl = TRUE;
+
} else {
+
aw.onlyNearFeats = (Boolean) ((flags & FEAT_FETCH_MASK) == ONLY_NEAR_FEATURES);
aw.farFeatsSuppress = (Boolean) ((flags & FEAT_FETCH_MASK) == FAR_FEATURES_SUPPRESS);
aw.nearFeatsSuppress = (Boolean) ((flags & FEAT_FETCH_MASK) == NEAR_FEATURES_SUPPRESS);
}
+ /* continue setting flags */
+
aw.hideImpFeats = (Boolean) ((custom & HIDE_IMP_FEATS) != 0);
aw.hideRemImpFeats = (Boolean) ((custom & HIDE_REM_IMP_FEATS) != 0);
@@ -3033,8 +3073,6 @@ NLM_EXTERN Asn2gbJobPtr asn2gnbk_setup (
aw.hup = FALSE;
aw.ssp = NULL;
- aw.stream = (Boolean) ((locks & STREAM_SEQ_PORT_FIRST) != 0);
-
aw.failed = FALSE;
omdp = ObjMgrGetData (entityID);
@@ -3051,6 +3089,23 @@ NLM_EXTERN Asn2gbJobPtr asn2gnbk_setup (
oldscope = SeqEntrySetScope (sep);
+ if (stream) {
+ /* send optional head string */
+
+ is_html = (Boolean) ((flags & HTML_XML_ASN_MASK) == CREATE_HTML_FLATFILE);
+ if (ffhead == NULL && is_html) {
+ ffhead = defHead;
+ }
+ if (ffhead != NULL) {
+ if (fp != NULL) {
+ fprintf (fp, ffhead);
+ }
+ }
+ if (ffwrite != NULL) {
+ ffwrite (ffhead, userdata, HEAD_BLOCK);
+ }
+ }
+
if (bssp != NULL) {
/* handle all components of a pop/phy/mut/eco set */
@@ -3065,6 +3120,22 @@ NLM_EXTERN Asn2gbJobPtr asn2gnbk_setup (
DoOneBioseq (bsp, &aw);
}
+ if (stream) {
+ /* send optional tail string */
+
+ if (fftail == NULL && is_html) {
+ fftail = defTail;
+ }
+ if (fftail != NULL) {
+ if (fp != NULL) {
+ fprintf (fp, fftail);
+ }
+ }
+ if (ffwrite != NULL) {
+ ffwrite (fftail, userdata, TAIL_BLOCK);
+ }
+ }
+
SeqEntrySetScope (oldscope);
/* check for failure to populate anything */
@@ -3139,6 +3210,25 @@ NLM_EXTERN Asn2gbJobPtr asn2gnbk_setup (
return (Asn2gbJobPtr) ajp;
}
+NLM_EXTERN Asn2gbJobPtr asn2gnbk_setup (
+ BioseqPtr bsp,
+ BioseqSetPtr bssp,
+ SeqLocPtr slp,
+ FmtType format,
+ ModType mode,
+ StlType style,
+ FlgType flags,
+ LckType locks,
+ CstType custom,
+ XtraPtr extra
+)
+
+{
+ return asn2gnbk_setup_ex (bsp, bssp, slp, format, mode, style,
+ flags, locks, custom, extra,
+ FALSE, NULL, NULL, NULL);
+}
+
/* ********************************************************************** */
/* format functions allocate printable string for given paragraph */
@@ -3164,14 +3254,14 @@ static FormatProc asn2gnbk_fmt_functions [27] = {
DefaultFormatBlock, DefaultFormatBlock, DefaultFormatBlock,
DefaultFormatBlock, DefaultFormatBlock, DefaultFormatBlock,
FormatSourceBlock, FormatOrganismBlock, FormatReferenceBlock,
- DefaultFormatBlock, FormatCommentBlock, DefaultFormatBlock,
+ DefaultFormatBlock, FormatCommentBlock, FormatFeatHeaderBlock,
FormatSourceFeatBlock, FormatFeatureBlock, FormatBasecountBlock,
DefaultFormatBlock, FormatSequenceBlock, FormatContigBlock,
DefaultFormatBlock, DefaultFormatBlock, FormatSlashBlock,
NULL
};
-static void PrintFtableIntervals (
+NLM_EXTERN void PrintFtableIntervals (
ValNodePtr PNTR head,
BioseqPtr target,
SeqLocPtr location,
@@ -3705,7 +3795,7 @@ static void PrintBioSourceFtableEntry (
}
}
-static void PrintFtableLocAndQuals (
+NLM_EXTERN void PrintFtableLocAndQuals (
IntAsn2gbJobPtr ajp,
ValNodePtr PNTR head,
BioseqPtr target,
@@ -4343,10 +4433,11 @@ static BioseqPtr BioseqLockAndIndexByEntity (Uint2 entityID)
return bsp;
}
-static CharPtr FormatFtableSourceFeatBlock (
+NLM_EXTERN CharPtr FormatFtableSourceFeatBlock (
BaseBlockPtr bbp,
BioseqPtr target
)
+
{
SeqFeatPtr sfp;
SeqDescPtr sdp;
@@ -4383,34 +4474,83 @@ static CharPtr FormatFtableSourceFeatBlock (
return str;
}
+NLM_EXTERN void DoImmediateFormat (
+ Asn2gbFormatPtr afp,
+ BaseBlockPtr bbp
+)
+
+{
+ BlockType blocktype;
+ BioseqPtr bsp;
+ FormatProc fmt;
+ size_t max;
+ SeqEntryPtr oldscope;
+ QualValPtr qv = NULL;
+ SeqEntryPtr sep;
+ CharPtr str = NULL;
+
+ if (afp == NULL || bbp == NULL) return;
+
+ blocktype = bbp->blocktype;
+ if (blocktype < LOCUS_BLOCK || blocktype > SLASH_BLOCK) return;
+ fmt = asn2gnbk_fmt_functions [(int) blocktype];
+ if (fmt == NULL) return;
+
+ max = (size_t) (MAX (ASN2GNBK_TOTAL_SOURCE, ASN2GNBK_TOTAL_FEATUR));
+ qv = MemNew (sizeof (QualVal) * (max + 5));
+ if (qv == NULL) return;
+
+ sep = GetTopSeqEntryForEntityID (bbp->entityID);
+
+ bsp = BioseqLockAndIndexByEntity (bbp->entityID);
+ oldscope = SeqEntrySetScope (sep);
+
+ afp->qvp = qv;
+ str = fmt (afp, bbp);
+ afp->qvp = NULL;
+
+ SeqEntrySetScope (oldscope);
+ BioseqUnlock (bsp);
+
+ if (str != NULL) {
+ if (afp->fp != NULL) {
+ fprintf (afp->fp, "%s", str);
+ }
+ if (afp->ffwrite != NULL) {
+ afp->ffwrite (str, afp->userdata, blocktype);
+ }
+ } else {
+ if (afp->fp != NULL) {
+ fprintf (afp->fp, "?\n");
+ }
+ if (afp->ffwrite != NULL) {
+ afp->ffwrite ("?\n", afp->userdata, blocktype);
+ }
+ }
+
+ MemFree (str);
+ MemFree (qv);
+}
+
NLM_EXTERN CharPtr asn2gnbk_format (
Asn2gbJobPtr ajp,
Int4 paragraph
)
{
- Asn2gbFormat af;
- Asn2gbSectPtr asp;
- BaseBlockPtr bbp;
- BlockType blocktype;
- BioseqPtr bsp;
- SeqMgrFeatContext fcontext;
- FormatProc fmt;
- ValNodePtr head;
- IntAsn2gbJobPtr iajp;
- Char id [42];
- IntRefBlockPtr irp;
- size_t max;
- SeqEntryPtr oldscope;
- QualValPtr qv;
- Int4 section;
- SeqEntryPtr sep;
- SeqFeatPtr sfp;
- SeqIdPtr sip;
- SeqIdPtr sip2;
- CharPtr str = NULL;
- BioseqPtr target;
- Char tmp [53];
+ Asn2gbFormat af;
+ Asn2gbSectPtr asp;
+ BaseBlockPtr bbp;
+ BlockType blocktype;
+ BioseqPtr bsp;
+ FormatProc fmt;
+ IntAsn2gbJobPtr iajp;
+ size_t max;
+ SeqEntryPtr oldscope;
+ QualValPtr qv;
+ Int4 section;
+ SeqEntryPtr sep;
+ CharPtr str = NULL;
/* qv must hold MAX (ASN2GNBK_TOTAL_SOURCE, ASN2GNBK_TOTAL_FEATUR) */
@@ -4434,83 +4574,26 @@ NLM_EXTERN CharPtr asn2gnbk_format (
qv = MemNew (sizeof (QualVal) * (max + 5));
if (qv == NULL) return NULL;
+ MemSet ((Pointer) &af, 0, sizeof (Asn2gbFormat));
af.ajp = (IntAsn2gbJobPtr) ajp;
af.asp = asp;
af.qvp = qv;
af.format = iajp->format;
+ af.aip = iajp->aip;
+ af.atp = iajp->atp;
sep = GetTopSeqEntryForEntityID (bbp->entityID);
- if (iajp->format != FTABLE_FMT) {
- fmt = asn2gnbk_fmt_functions [(int) blocktype];
- if (fmt == NULL) return NULL;
-
- bsp = BioseqLockAndIndexByEntity (bbp->entityID);
- oldscope = SeqEntrySetScope (sep);
-
- str = fmt (&af, bbp);
-
- SeqEntrySetScope (oldscope);
- BioseqUnlock (bsp);
-
- } else {
-
- target = asp->target;
- if (target != NULL) {
-
- bsp = BioseqLockAndIndexByEntity (bbp->entityID);
- oldscope = SeqEntrySetScope (sep);
-
- if (blocktype == FEATHEADER_BLOCK) {
- sip = SeqIdFindBest (target->id, 0);
- if (sip != NULL && sip->choice == SEQID_GI) {
- sip2 = GetSeqIdForGI (sip->data.intvalue);
- if (sip2 != NULL) {
- sip = sip2;
- }
- }
- SeqIdWrite (sip, id, PRINTID_FASTA_LONG, sizeof (id) - 1);
- if (! StringHasNoText (id)) {
- sprintf (tmp, ">Feature %s\n", id);
- str = StringSave (tmp);
- }
+ fmt = asn2gnbk_fmt_functions [(int) blocktype];
+ if (fmt == NULL) return NULL;
- } else if (blocktype == REFERENCE_BLOCK) {
-
- irp = (IntRefBlockPtr) bbp;
- if (irp->loc != NULL) {
- if (irp->rb.pmid != 0 || irp->rb.muid != 0) {
- head = NULL;
- PrintFtableIntervals (&head, target, irp->loc, "REFERENCE");
- if (irp->rb.pmid != 0) {
- sprintf (tmp, "\t\t\tpmid\t%ld\n", (long) irp->rb.pmid);
- ValNodeCopyStr (&head, 0, tmp);
- } else if (irp->rb.muid != 0) {
- sprintf (tmp, "\t\t\tmuid\t%ld\n", (long) irp->rb.muid);
- ValNodeCopyStr (&head, 0, tmp);
- }
- str = MergeFFValNodeStrs (head);
- ValNodeFreeData (head);
- }
- }
+ bsp = BioseqLockAndIndexByEntity (bbp->entityID);
+ oldscope = SeqEntrySetScope (sep);
- } else if (blocktype == FEATURE_BLOCK) {
+ str = fmt (&af, bbp);
- sfp = SeqMgrGetDesiredFeature (bbp->entityID, NULL, bbp->itemID, 0, NULL, &fcontext);
- if (sfp != NULL) {
- head = NULL;
- PrintFtableLocAndQuals (af.ajp, &head, target, sfp, &fcontext);
- str = MergeFFValNodeStrs (head);
- ValNodeFreeData (head);
- }
- } else if (blocktype == SOURCEFEAT_BLOCK) {
- str = FormatFtableSourceFeatBlock (bbp, target);
- }
-
- SeqEntrySetScope (oldscope);
- BioseqUnlock (bsp);
- }
- }
+ SeqEntrySetScope (oldscope);
+ BioseqUnlock (bsp);
if (str == NULL) {
str = StringSave ("???\n");
@@ -4605,7 +4688,6 @@ NLM_EXTERN Asn2gbJobPtr asn2gnbk_cleanup (
}
MemFree (asp->blockArray);
MemFree (asp->referenceArray);
- SeqPortFree (iasp->spp);
MemFree (asp);
}
}
@@ -4634,22 +4716,6 @@ NLM_EXTERN Asn2gbJobPtr asn2gnbk_cleanup (
return NULL;
}
-static CharPtr defHead = "\
-Content-type: text/html\n\n\
-<HTML>\n\
-<HEAD><TITLE>GenBank entry</TITLE></HEAD>\n\
-<BODY>\n\
-<hr>\n\
-<pre>";
-
-static CharPtr defTail = "\
-</pre>\n\
-<hr>\n\
-</BODY>\n\
-</HTML>\n";
-
-NLM_EXTERN void AsnPrintNewLine PROTO((AsnIoPtr aip));
-
NLM_EXTERN Boolean SeqEntryToGnbk (
SeqEntryPtr sep,
SeqLocPtr slp,
@@ -4665,31 +4731,32 @@ NLM_EXTERN Boolean SeqEntryToGnbk (
{
AsnIoPtr aip = NULL;
+ AsnIoPtr aipfree = NULL;
Asn2gbJobPtr ajp;
AsnTypePtr atp = NULL;
- BaseBlockPtr bbp;
- BlockType block;
BioseqPtr bsp = NULL;
BioseqSetPtr bssp = NULL;
Boolean do_gbseq_asn = FALSE;
Boolean do_gbseq_xml = FALSE;
- CharPtr ffhead = NULL;
- CharPtr fftail = NULL;
Asn2gbWriteFunc ffwrite = NULL;
GBSeqPtr gbseq = NULL;
GBSeq gbsq;
- GBSeqPtr gbtmp;
- Int4 i;
IntAsn2gbJobPtr iajp;
- IndxPtr index = NULL;
- Boolean is_html;
- Int4 numParagraphs;
- BaseBlockPtr PNTR paragraphArray;
Boolean rsult = FALSE;
- CharPtr str;
Int1 type = ASNIO_TEXT_OUT;
Pointer userdata = NULL;
XtraBlock xtra;
+ /*
+ BaseBlockPtr bbp;
+ BlockType block;
+ CharPtr ffhead = NULL;
+ CharPtr fftail = NULL;
+ Int4 i;
+ Boolean is_html;
+ Int4 numParagraphs;
+ BaseBlockPtr PNTR paragraphArray;
+ CharPtr str;
+ */
#ifdef WIN_MAC
#if __profile__
ValNodePtr bsplist = NULL;
@@ -4706,9 +4773,10 @@ NLM_EXTERN Boolean SeqEntryToGnbk (
if (extra != NULL) {
ffwrite = extra->ffwrite;
+ /*
ffhead = extra->ffhead;
fftail = extra->fftail;
- index = extra->index;
+ */
gbseq = extra->gbseq;
aip = extra->aip;
atp = extra->atp;
@@ -4771,6 +4839,7 @@ NLM_EXTERN Boolean SeqEntryToGnbk (
type |= ASNIO_XML;
}
aip = AsnIoNew (type, fp, NULL, NULL, NULL);
+ aipfree = aip;
fp = NULL;
}
if (extra == NULL) {
@@ -4784,79 +4853,84 @@ NLM_EXTERN Boolean SeqEntryToGnbk (
}
}
- ajp = asn2gnbk_setup (bsp, bssp, slp, format, mode, style, flags, locks, custom, extra);
+ /* pass TRUE for stream to do immediate write at time of creation for speed */
+ ajp = asn2gnbk_setup_ex (bsp, bssp, slp, format, mode, style,
+ flags, locks, custom, extra,
+ TRUE, fp, aip, atp);
if (ajp != NULL) {
rsult = TRUE;
iajp = (IntAsn2gbJobPtr) ajp;
- /* send optional head string */
- is_html = (Boolean) ((flags & HTML_XML_ASN_MASK) == CREATE_HTML_FLATFILE);
- if (ffhead == NULL && is_html) {
- ffhead = defHead;
- }
- if (ffhead != NULL) {
- if (fp != NULL) {
- fprintf (fp, ffhead);
- }
- }
- if (ffwrite != NULL) {
- ffwrite (ffhead, userdata, HEAD_BLOCK);
- }
+#if 0
+ /* if streaming, all output was written in setup function, otherwise output here */
- /* send each paragraph */
+ if (! stream) {
- numParagraphs = ajp->numParagraphs;
- paragraphArray = ajp->paragraphArray;
+ /* send optional head string */
- for (i = 0; i < numParagraphs; i++) {
- str = asn2gnbk_format (ajp, i);
- block = (BlockType) 0;
- if (paragraphArray != NULL) {
- bbp = paragraphArray [i];
- if (bbp != NULL) {
- block = bbp->blocktype;
- }
+ is_html = (Boolean) ((flags & HTML_XML_ASN_MASK) == CREATE_HTML_FLATFILE);
+ if (ffhead == NULL && is_html) {
+ ffhead = defHead;
}
- if (str != NULL) {
- if (fp != NULL) {
- fprintf (fp, "%s", str);
- }
- if (ffwrite != NULL) {
- ffwrite (str, userdata, block);
- }
- } else {
+ if (ffhead != NULL) {
if (fp != NULL) {
- fprintf (fp, "?\n");
- }
- if (ffwrite != NULL) {
- ffwrite ("?\n", userdata, block);
+ fprintf (fp, ffhead);
}
}
+ if (ffwrite != NULL) {
+ ffwrite (ffhead, userdata, HEAD_BLOCK);
+ }
- /* if generating GBSeq XML/ASN, write at each slash block */
+ /* send each paragraph */
- if (block == SLASH_BLOCK && gbseq != NULL && aip != NULL) {
- if (iajp->produceInsdSeq) {
- INSDSeqAsnWrite ((INSDSeqPtr) gbseq, aip, atp);
- } else {
- GBSeqAsnWrite (gbseq, aip, atp);
+ numParagraphs = ajp->numParagraphs;
+ paragraphArray = ajp->paragraphArray;
+
+ for (i = 0; i < numParagraphs; i++) {
+ str = asn2gnbk_format (ajp, i);
+ block = (BlockType) 0;
+ if (paragraphArray != NULL) {
+ bbp = paragraphArray [i];
+ if (bbp != NULL) {
+ block = bbp->blocktype;
+ }
}
- if (atp == NULL) {
- AsnPrintNewLine (aip);
+ if (str != NULL) {
+ if (fp != NULL) {
+ fprintf (fp, "%s", str);
+ }
+ if (ffwrite != NULL) {
+ ffwrite (str, userdata, block);
+ }
+ } else {
+ if (fp != NULL) {
+ fprintf (fp, "?\n");
+ }
+ if (ffwrite != NULL) {
+ ffwrite ("?\n", userdata, block);
+ }
}
- AsnIoFlush (aip);
- /* clean up gbseq fields */
+ MemFree (str);
+ }
+
+ /* send optional tail string */
- gbtmp = GBSeqNew ();
- MemCopy (gbtmp, gbseq, sizeof (GBSeq));
- MemSet (gbseq, 0, sizeof (GBSeq));
- GBSeqFree (gbtmp);
+ if (fftail == NULL && is_html) {
+ fftail = defTail;
+ }
+ if (fftail != NULL) {
+ if (fp != NULL) {
+ fprintf (fp, fftail);
+ }
+ }
+ if (ffwrite != NULL) {
+ ffwrite (fftail, userdata, TAIL_BLOCK);
}
- MemFree (str);
}
+#endif
/* if RELEASE_MODE, warn if unresolved gi numbers, missing translation, etc. */
@@ -4864,23 +4938,12 @@ NLM_EXTERN Boolean SeqEntryToGnbk (
rsult = FALSE;
}
- /* send optional tail string */
-
- if (fftail == NULL && is_html) {
- fftail = defTail;
- }
- if (fftail != NULL) {
- if (fp != NULL) {
- fprintf (fp, fftail);
- }
- }
- if (ffwrite != NULL) {
- ffwrite (fftail, userdata, TAIL_BLOCK);
- }
-
asn2gnbk_cleanup (ajp);
}
+ if (aipfree != NULL) {
+ AsnIoFree (aipfree, FALSE);
+ }
#ifdef WIN_MAC
#if __profile__
diff --git a/api/asn2gnb2.c b/api/asn2gnb2.c
index 93db4c01..eef7bdfb 100644
--- a/api/asn2gnb2.c
+++ b/api/asn2gnb2.c
@@ -30,7 +30,7 @@
*
* Version Creation Date: 10/21/98
*
-* $Revision: 1.4 $
+* $Revision: 1.11 $
*
* File Description: New GenBank flatfile generator - work in progress
*
@@ -413,14 +413,18 @@ NLM_EXTERN void AddLocusBlock (
CharPtr ebmol;
EMBLBlockPtr ebp;
SeqMgrFeatContext fcontext;
+ Boolean first = TRUE;
GBBlockPtr gbp;
Char gene [32];
Boolean genome_view;
GBSeqPtr gbseq;
+ Int4 gi = 0;
+ Char gi_buf [16];
Char id [41];
Int2 imol = 0;
IndxPtr index;
Int2 istrand;
+ Boolean is_aa;
Boolean is_nm = FALSE;
Boolean is_np = FALSE;
Boolean is_nz = FALSE;
@@ -436,10 +440,13 @@ NLM_EXTERN void AddLocusBlock (
Uint1 origin;
OrgRefPtr orp;
BioseqPtr parent;
+ CharPtr prefix = NULL;
SeqDescrPtr sdp;
SeqFeatPtr sfp;
SeqIdPtr sip;
SubSourcePtr ssp;
+ CharPtr str;
+ CharPtr suffix = NULL;
Uint1 tech;
Uint1 topology;
TextSeqIdPtr tsip;
@@ -896,6 +903,7 @@ NLM_EXTERN void AddLocusBlock (
/* Print the "LOCUS_NEW" line, if requested */
if (awp->newLocusLine) {
+
FFStartPrint (ffstring, awp->format, 0, 0, "LOCUS", 12, 0, 0, NULL, FALSE);
parent = awp->parent;
@@ -926,6 +934,7 @@ NLM_EXTERN void AddLocusBlock (
/* Else print the "LOCUS" line */
else {
+
FFStartPrint (ffstring, awp->format, 0, 0, "LOCUS", 12, 0, 0, NULL, FALSE);
if (parent->repr == Seq_repr_seg)
@@ -943,7 +952,9 @@ NLM_EXTERN void AddLocusBlock (
}
} else if (awp->format == EMBL_FMT || awp->format == EMBLPEPT_FMT) {
+
FFStartPrint (ffstring, awp->format, 0, 0, NULL, 0, 5, 0, "ID", FALSE);
+
FFAddOneString (ffstring, locus, FALSE, FALSE, TILDE_IGNORE);
loclen = StringLen(locus);
if (14 - 5 - loclen > 0) {
@@ -1030,8 +1041,60 @@ NLM_EXTERN void AddLocusBlock (
gbseq->update_date = StringSave (date);
}
- bbp->string = FFEndPrint(ajp, ffstring, awp->format, 12, 0, 5, 0, "ID");
+ suffix = FFEndPrint(ajp, ffstring, awp->format, 12, 0, 5, 0, "ID");
FFRecycleString(ajp, ffstring);
+
+ if (awp->contig && (! awp->showconfeats) && awp->smartconfeats && GetWWW (ajp) &&
+ (awp->format == GENBANK_FMT || awp->format == GENPEPT_FMT)) {
+ is_aa = ISA_aa (bsp->mol);
+ gi = 0;
+ for (sip = bsp->id; sip != NULL; sip = sip->next) {
+ if (sip->choice == SEQID_GI) {
+ gi = (Int4) sip->data.intvalue;
+ }
+ }
+ if (gi > 0) {
+ ffstring = FFGetString(ajp);
+
+ sprintf(gi_buf, "%ld", (long) gi);
+ FFAddOneString(ffstring, "<a href=", FALSE, FALSE, TILDE_IGNORE);
+ FFAddOneString(ffstring, link_feat, FALSE, FALSE, TILDE_IGNORE);
+ FFAddOneString(ffstring, "val=", FALSE, FALSE, TILDE_IGNORE);
+ FFAddOneString(ffstring, gi_buf, FALSE, FALSE, TILDE_IGNORE);
+ if ( is_aa ) {
+ FFAddOneString(ffstring, "&view=gpwithparts>", FALSE, FALSE, TILDE_IGNORE);
+ } else {
+ FFAddOneString(ffstring, "&view=gbwithparts>", FALSE, FALSE, TILDE_IGNORE);
+ }
+ if (bsp->length > 1000000) {
+ FFAddOneString(ffstring, "Click here to see all features and the sequence of this contig record.", FALSE, FALSE, TILDE_IGNORE);
+ } else {
+ FFAddOneString(ffstring, "Click here to see the sequence of this contig record.", FALSE, FALSE, TILDE_IGNORE);
+ }
+ FFAddOneString(ffstring, "</a>", FALSE, FALSE, TILDE_IGNORE);
+
+ prefix = FFEndPrint(ajp, ffstring, awp->format, 12, 12, 5, 5, "??");
+
+ FFRecycleString(ajp, ffstring);
+ }
+ }
+
+ if (StringDoesHaveText (prefix)) {
+ loclen = StringLen (prefix) + StringLen (suffix);
+ str = (CharPtr) MemNew (loclen + 10);
+ if (str != NULL) {
+ StringCpy (str, prefix);
+ StringCat (str, "\n\n");
+ StringCat (str, suffix);
+ }
+ bbp->string = str;
+ } else {
+ bbp->string = suffix;
+ }
+
+ if (awp->afp != NULL) {
+ DoImmediateFormat (awp->afp, bbp);
+ }
}
NLM_EXTERN void AddDeflineBlock (
@@ -1112,6 +1175,10 @@ NLM_EXTERN void AddDeflineBlock (
}
FFRecycleString(ajp, ffstring);
+
+ if (awp->afp != NULL) {
+ DoImmediateFormat (awp->afp, bbp);
+ }
}
static void FF_www_accession (
@@ -1385,6 +1452,10 @@ NLM_EXTERN void AddAccessionBlock (
bbp->string = FFEndPrint(ajp, ffstring, awp->format, 12, 12, 5, 5, "AC");
FFRecycleString(ajp, ffstring);
+
+ if (awp->afp != NULL) {
+ DoImmediateFormat (awp->afp, bbp);
+ }
}
NLM_EXTERN void AddVersionBlock (
@@ -1551,6 +1622,10 @@ NLM_EXTERN void AddVersionBlock (
}
bbp->string = FFEndPrint(ajp, ffstring, awp->format, 12, 12, 5, 5, "SV");
FFRecycleString(ajp, ffstring);
+
+ if (awp->afp != NULL) {
+ DoImmediateFormat (awp->afp, bbp);
+ }
}
/* only displaying PID in GenPept format */
@@ -2361,115 +2436,6 @@ static Boolean FF_www_dbsource(
return TRUE;
}
-static CharPtr CleanQualValue (
- CharPtr str
-)
-
-{
- Char ch;
- CharPtr dst;
- CharPtr ptr;
-
- if (str == NULL || str [0] == '\0') return NULL;
-
- dst = str;
- ptr = str;
- ch = *ptr;
- while (ch != '\0') {
- if (ch == '\n' || ch == '\r' || ch == '\t' || ch == '"') {
- *dst = ' ';
- dst++;
- } else {
- *dst = ch;
- dst++;
- }
- ptr++;
- ch = *ptr;
- }
- *dst = '\0';
-
- return str;
-}
-
-static CharPtr Asn2gnbkCompressSpaces (CharPtr str)
-
-{
- Char ch;
- CharPtr dst;
- Char last;
- CharPtr ptr;
-
- if (str != NULL && str [0] != '\0') {
- dst = str;
- ptr = str;
- ch = *ptr;
- while (ch != '\0' && ch <= ' ') {
- ptr++;
- ch = *ptr;
- }
- while (ch != '\0') {
- *dst = ch;
- dst++;
- ptr++;
- last = ch;
- ch = *ptr;
- if (ch != '\0' && ch < ' ') {
- *ptr = ' ';
- ch = *ptr;
- }
- while (ch != '\0' && last <= ' ' && ch <= ' ') {
- ptr++;
- ch = *ptr;
- }
- }
- *dst = '\0';
- dst = NULL;
- ptr = str;
- ch = *ptr;
- while (ch != '\0') {
- if (ch != ' ') {
- dst = NULL;
- } else if (dst == NULL) {
- dst = ptr;
- }
- ptr++;
- ch = *ptr;
- }
- if (dst != NULL) {
- *dst = '\0';
- }
- }
- return str;
-}
-
-static CharPtr StripAllSpaces (
- CharPtr str
-)
-
-{
- Char ch;
- CharPtr dst;
- CharPtr ptr;
-
- if (str == NULL || str [0] == '\0') return NULL;
-
- dst = str;
- ptr = str;
- ch = *ptr;
- while (ch != '\0') {
- if (ch == ' ' || ch == '\t') {
- } else {
- *dst = ch;
- dst++;
- }
- ptr++;
- ch = *ptr;
- }
- *dst = '\0';
-
- return str;
-}
-
NLM_EXTERN void AddDbsourceBlock (
Asn2gbWorkPtr awp
)
@@ -2638,6 +2604,10 @@ NLM_EXTERN void AddDbsourceBlock (
bbp->string = str;
FFRecycleString(ajp, ffstring);
+
+ if (awp->afp != NULL) {
+ DoImmediateFormat (awp->afp, bbp);
+ }
}
NLM_EXTERN void AddDateBlock (
@@ -2704,6 +2674,10 @@ NLM_EXTERN void AddDateBlock (
bbp->string = FFEndPrint(ajp, ffstring, awp->format, 0, 0, 5, 5, "DT");
FFRecycleString(ajp, ffstring);
+
+ if (awp->afp != NULL) {
+ DoImmediateFormat (awp->afp, bbp);
+ }
}
@@ -3055,6 +3029,10 @@ NLM_EXTERN void AddKeywordsBlock (
bbp->string = FFEndPrint(ajp, ffstring, awp->format, 12, 12, 5, 5, "KW");
FFRecycleString(ajp, ffstring);
+
+ if (awp->afp != NULL) {
+ DoImmediateFormat (awp->afp, bbp);
+ }
}
NLM_EXTERN void AddSegmentBlock (
@@ -3119,6 +3097,10 @@ NLM_EXTERN void AddSegmentBlock (
bbp->string = FFEndPrint(ajp, ffstring, awp->format, 12, 12, 5, 5, "XX");
FFRecycleString(ajp, ffstring);
+
+ if (awp->afp != NULL) {
+ DoImmediateFormat (awp->afp, bbp);
+ }
}
NLM_EXTERN void AddSourceBlock (
@@ -3193,6 +3175,10 @@ NLM_EXTERN void AddSourceBlock (
}
}
}
+
+ if (awp->afp != NULL) {
+ DoImmediateFormat (awp->afp, bbp);
+ }
}
NLM_EXTERN void AddOrganismBlock (
@@ -3252,6 +3238,10 @@ NLM_EXTERN void AddOrganismBlock (
}
}
}
+
+ if (awp->afp != NULL) {
+ DoImmediateFormat (awp->afp, bbp);
+ }
}
static RefBlockPtr AddPub (
@@ -4360,6 +4350,14 @@ NLM_EXTERN Boolean AddReferenceBlock (
awp->blockList = vnp;
}
+ if (awp->afp != NULL) {
+ for (vnp = head; vnp != NULL; vnp = vnp->next) {
+ rbp = (RefBlockPtr) vnp->data.ptrvalue;
+ if (rbp == NULL) continue;
+ DoImmediateFormat (awp->afp, (BaseBlockPtr) rbp);
+ }
+ }
+
return TRUE;
}
@@ -4461,6 +4459,10 @@ NLM_EXTERN void AddWGSBlock (
bbp->string = FFEndPrint(ajp, ffstring, awp->format, 12, 12, 0, 0, NULL);
FFRecycleString(ajp, ffstring);
}
+
+ if (awp->afp != NULL) {
+ DoImmediateFormat (awp->afp, bbp);
+ }
}
}
}
@@ -4544,26 +4546,21 @@ NLM_EXTERN void AddGenomeBlock (
bbp->string = FFEndPrint(ajp, ffstring, awp->format, 12, 12, 0, 0, NULL);
FFRecycleString(ajp, ffstring);
+
+ if (awp->afp != NULL) {
+ DoImmediateFormat (awp->afp, bbp);
+ }
}
NLM_EXTERN void AddBasecountBlock (
- Asn2gbWorkPtr awp,
- CharPtr bases
+ Asn2gbWorkPtr awp
)
{
IntAsn2gbJobPtr ajp;
- Int4 base_count [5];
BaseBlockPtr bbp;
BioseqPtr bsp;
- Char buf [80];
- Char ch;
- Int2 i;
- Int4 len;
- StringItemPtr ffstring;
- CharPtr ptr;
- CharPtr str;
-
+
if (awp == NULL) return;
ajp = awp->ajp;
if (ajp == NULL) return;
@@ -4572,71 +4569,9 @@ NLM_EXTERN void AddBasecountBlock (
bbp = Asn2gbAddBlock (awp, BASECOUNT_BLOCK, sizeof (BaseBlock));
- if (bases == NULL || ajp->ajp.slp != NULL) return;
- len = bsp->length;
- for (i = 0; i < 5; i++) {
- base_count [i] = 0;
- }
-
- ptr = bases;
- ch = *ptr;
- while (ch != '\0') {
- ch = TO_UPPER (ch);
- switch (ch) {
- case 'A' :
- (base_count [0])++;
- break;
- case 'C' :
- (base_count [1])++;
- break;
- case 'G' :
- (base_count [2])++;
- break;
- case 'T' :
- (base_count [3])++;
- break;
- default :
- (base_count [4])++;
- break;
- }
- ptr++;
- ch = *ptr;
- }
-
- if (awp->format == GENBANK_FMT || awp->format == GENPEPT_FMT) {
-
- if (base_count [4] == 0) {
- sprintf (buf, "%7ld a%7ld c%7ld g%7ld t",
- (long) base_count [0], (long) base_count [1],
- (long) base_count [2], (long) base_count [3]);
- } else {
- sprintf (buf, "%7ld a%7ld c%7ld g%7ld t%7ld others",
- (long) base_count [0], (long) base_count [1],
- (long) base_count [2], (long) base_count [3],
- (long) base_count [4]);
- }
-
- } else if (awp->format == EMBL_FMT || awp->format == EMBLPEPT_FMT) {
-
- sprintf (buf, "Sequence %ld BP; %ld A; %ld C; %ld G; %ld T; %ld other;",
- (long) len,
- (long) base_count [0], (long) base_count [1],
- (long) base_count [2], (long) base_count [3],
- (long) base_count [4]);
- }
-
- ffstring = FFGetString (ajp);
- if ( ffstring == NULL ) return;
-
- if (awp->format == EMBL_FMT || awp->format == EMBLPEPT_FMT) {
- FFAddOneString(ffstring, "XX\n", FALSE, FALSE, TILDE_IGNORE);
+ if (awp->afp != NULL) {
+ DoImmediateFormat (awp->afp, bbp);
}
- FFStartPrint (ffstring, awp->format, 0, 0, "BASE COUNT", 12, 5, 5, "SQ", FALSE);
- FFAddOneString (ffstring, buf, FALSE, FALSE, TILDE_TO_SPACES);
- str = FFEndPrint(ajp, ffstring, awp->format, 12, 0, 5, 5, "SQ");
- FFRecycleString(ajp, ffstring);
-
- bbp->string = StringSave (str);
}
NLM_EXTERN void AddOriginBlock (
@@ -4691,26 +4626,25 @@ NLM_EXTERN void AddOriginBlock (
bbp->string = FFEndPrint(ajp, ffstring, awp->format, 0, 12, 0, 0, NULL);
FFRecycleString(ajp, ffstring);
+
+ if (awp->afp != NULL) {
+ DoImmediateFormat (awp->afp, bbp);
+ }
}
#define BASES_PER_BLOCK 1200
NLM_EXTERN void AddSequenceBlock (
- Asn2gbWorkPtr awp,
- CharPtr bases
+ Asn2gbWorkPtr awp
)
{
IntAsn2gbJobPtr ajp;
BioseqPtr bsp;
- Int4 i;
- Int4 j;
Int4 len;
- CharPtr ptr;
SeqBlockPtr sbp;
Int4 start;
Int4 stop;
- CharPtr str;
if (awp == NULL) return;
ajp = awp->ajp;
@@ -4736,13 +4670,16 @@ NLM_EXTERN void AddSequenceBlock (
sbp->start = 0;
sbp->stop = len;
+
+ if (awp->afp != NULL) {
+ DoImmediateFormat (awp->afp, (BaseBlockPtr) sbp);
+ }
+
return;
}
/* otherwise populate individual sequence blocks for given range */
- ptr = bases;
-
for (start = 0; start < len; start += BASES_PER_BLOCK) {
sbp = (SeqBlockPtr) Asn2gbAddBlock (awp, SEQUENCE_BLOCK, sizeof (SeqBlock));
if (sbp == NULL) continue;
@@ -4759,17 +4696,8 @@ NLM_EXTERN void AddSequenceBlock (
sbp->start = start;
sbp->stop = stop;
- if (ptr != NULL) {
- str = MemNew (sizeof (Char) * (BASES_PER_BLOCK + 2));
- if (str != NULL) {
- sbp->bases = str;
- j = stop - start;
- for (i = 0; i < j; i++) {
- *str = *ptr;
- ptr++;
- str++;
- }
- }
+ if (awp->afp != NULL) {
+ DoImmediateFormat (awp->afp, (BaseBlockPtr) sbp);
}
}
}
@@ -4784,6 +4712,10 @@ NLM_EXTERN void AddContigBlock (
if (awp == NULL) return;
bbp = Asn2gbAddBlock (awp, CONTIG_BLOCK, sizeof (BaseBlock));
+
+ if (awp->afp != NULL) {
+ DoImmediateFormat (awp->afp, bbp);
+ }
}
NLM_EXTERN void AddSlashBlock (
@@ -4803,5 +4735,9 @@ NLM_EXTERN void AddSlashBlock (
StringNCpy(str, "//\n", 4);
bbp->string = str;
+
+ if (awp->afp != NULL) {
+ DoImmediateFormat (awp->afp, bbp);
+ }
}
diff --git a/api/asn2gnb3.c b/api/asn2gnb3.c
index 370b0b4f..65131214 100644
--- a/api/asn2gnb3.c
+++ b/api/asn2gnb3.c
@@ -30,7 +30,7 @@
*
* Version Creation Date: 10/21/98
*
-* $Revision: 1.6 $
+* $Revision: 1.13 $
*
* File Description: New GenBank flatfile generator - work in progress
*
@@ -1269,6 +1269,10 @@ NLM_EXTERN void AddPrimaryBlock (
if (gbseq != NULL) {
gbseq->primary = StringSave (str);
}
+
+ if (awp->afp != NULL) {
+ DoImmediateFormat (awp->afp, (BaseBlockPtr) bbp);
+ }
}
MemFree (str);
}
@@ -1382,7 +1386,7 @@ NLM_EXTERN void AddCommentBlock (
IntAsn2gbJobPtr ajp;
BioseqPtr bsp;
Char buf [128];
- CommentBlockPtr cbp = NULL;
+ CommentBlockPtr cbp;
Char ch;
Boolean didGenome = FALSE;
Boolean didRefTrack = FALSE;
@@ -1398,7 +1402,6 @@ NLM_EXTERN void AddCommentBlock (
CharPtr genomeBuildNumber = NULL;
CharPtr genomeVersionNumber = NULL;
Int4 gi = 0;
- CommentBlockPtr gsdbcbp = NULL;
Int4 gsdbid = 0;
Boolean has_gaps = FALSE;
Boolean hasRefTrackStatus = FALSE;
@@ -1518,6 +1521,10 @@ NLM_EXTERN void AddCommentBlock (
cbp->string = FFEndPrint(ajp, ffstring, awp->format, 12, 12, 5, 5, "CC");
FFRecycleString(ajp, ffstring);
ffstring = FFGetString(ajp);
+
+ if (awp->afp != NULL) {
+ DoImmediateFormat (awp->afp, (BaseBlockPtr) cbp);
+ }
}
}
@@ -1559,6 +1566,10 @@ NLM_EXTERN void AddCommentBlock (
cbp->string = FFEndPrint(ajp, ffstring, awp->format, 12, 12, 5, 5, "CC");
FFRecycleString(ajp, ffstring);
ffstring = FFGetString(ajp);
+
+ if (awp->afp != NULL) {
+ DoImmediateFormat (awp->afp, (BaseBlockPtr) cbp);
+ }
}
} else if (! hasRefTrackStatus) {
@@ -1627,6 +1638,10 @@ NLM_EXTERN void AddCommentBlock (
cbp->string = FFEndPrint(ajp, ffstring, awp->format, 12, 12, 5, 5, "CC");
FFRecycleString(ajp, ffstring);
ffstring = FFGetString(ajp);
+
+ if (awp->afp != NULL) {
+ DoImmediateFormat (awp->afp, (BaseBlockPtr) cbp);
+ }
}
}
@@ -1729,6 +1744,10 @@ NLM_EXTERN void AddCommentBlock (
cbp->string = FFEndPrint(ajp, ffstring, awp->format, 12, 12, 5, 5, "CC");
FFRecycleString(ajp, ffstring);
ffstring = FFGetString(ajp);
+
+ if (awp->afp != NULL) {
+ DoImmediateFormat (awp->afp, (BaseBlockPtr) cbp);
+ }
}
}
} else {
@@ -1779,14 +1798,33 @@ NLM_EXTERN void AddCommentBlock (
/* show GSDB sequence identifier */
if (dbt != NULL && StringCmp (dbt->db, "GSDB") == 0 && dbt->tag != NULL) {
- gsdbcbp = (CommentBlockPtr) Asn2gbAddBlock (awp, COMMENT_BLOCK, sizeof (CommentBlock));
- if (gsdbcbp != NULL) {
- gsdbcbp->first = first;
+ cbp = (CommentBlockPtr) Asn2gbAddBlock (awp, COMMENT_BLOCK, sizeof (CommentBlock));
+ if (cbp != NULL) {
+ cbp->first = first;
+ first = FALSE;
/* string will be created after we know if there are additional comments */
gsdbid = dbt->tag->id;
- first = FALSE;
+ sprintf (buf, "GSDB:S:%ld.", (long) gsdbid);
+
+ if (cbp->first) {
+ FFStartPrint (ffstring, awp->format, 0, 12, "COMMENT", 12, 5, 5, "CC", TRUE);
+ } else {
+ FFStartPrint (ffstring, awp->format, 0, 12, NULL, 12, 5, 5, "CC", FALSE);
+ }
+
+ /* CheckEndPunctuation, ConvertDoubleQuotes, and ExpandTildes already taken into account */
+
+ FFAddOneString (ffstring, buf, FALSE, FALSE, TILDE_IGNORE);
+
+ cbp->string = FFEndPrint(ajp, ffstring, awp->format, 12, 12, 5, 5, "CC");
+ FFRecycleString(ajp, ffstring);
+ ffstring = FFGetString(ajp);
+
+ if (awp->afp != NULL) {
+ DoImmediateFormat (awp->afp, (BaseBlockPtr) cbp);
+ }
}
}
@@ -1829,6 +1867,10 @@ NLM_EXTERN void AddCommentBlock (
cbp->string = FFEndPrint(ajp, ffstring, awp->format, 12, 12,5, 5, "CC");
FFRecycleString(ajp, ffstring);
ffstring = FFGetString(ajp);
+
+ if (awp->afp != NULL) {
+ DoImmediateFormat (awp->afp, (BaseBlockPtr) cbp);
+ }
}
}
}
@@ -1866,6 +1908,10 @@ NLM_EXTERN void AddCommentBlock (
cbp->string = FFEndPrint(ajp, ffstring, awp->format, 12, 12,5, 5, "CC");
FFRecycleString(ajp, ffstring);
ffstring = FFGetString(ajp);
+
+ if (awp->afp != NULL) {
+ DoImmediateFormat (awp->afp, (BaseBlockPtr) cbp);
+ }
}
MemFree (str);
didTPA = TRUE;
@@ -1896,6 +1942,10 @@ NLM_EXTERN void AddCommentBlock (
cbp->string = FFEndPrint(ajp, ffstring, awp->format, 12, 12,5, 5, "CC");
FFRecycleString(ajp, ffstring);
ffstring = FFGetString(ajp);
+
+ if (awp->afp != NULL) {
+ DoImmediateFormat (awp->afp, (BaseBlockPtr) cbp);
+ }
}
MemFree (str);
}
@@ -1927,6 +1977,10 @@ NLM_EXTERN void AddCommentBlock (
cbp->string = FFEndPrint(ajp, ffstring, awp->format, 12, 12,5, 5, "CC");
FFRecycleString(ajp, ffstring);
ffstring = FFGetString(ajp);
+
+ if (awp->afp != NULL) {
+ DoImmediateFormat (awp->afp, (BaseBlockPtr) cbp);
+ }
}
/* do not free static str from GetStatusForRefTrack */
didRefTrack = TRUE;
@@ -1957,6 +2011,10 @@ NLM_EXTERN void AddCommentBlock (
cbp->string = FFEndPrint(ajp, ffstring, awp->format, 12, 12, 5, 5, "CC");
FFRecycleString(ajp, ffstring);
ffstring = FFGetString(ajp);
+
+ if (awp->afp != NULL) {
+ DoImmediateFormat (awp->afp, (BaseBlockPtr) cbp);
+ }
}
MemFree (str);
didGenome = TRUE;
@@ -2000,6 +2058,10 @@ NLM_EXTERN void AddCommentBlock (
cbp->string = FFEndPrint(ajp, ffstring, awp->format, 12, 12, 5, 5, "CC");
FFRecycleString(ajp, ffstring);
ffstring = FFGetString(ajp);
+
+ if (awp->afp != NULL) {
+ DoImmediateFormat (awp->afp, (BaseBlockPtr) cbp);
+ }
}
}
}
@@ -2038,6 +2100,10 @@ NLM_EXTERN void AddCommentBlock (
cbp->string = FFEndPrint(ajp, ffstring, awp->format, 12, 12, 5, 5, "CC");
FFRecycleString(ajp, ffstring);
ffstring = FFGetString(ajp);
+
+ if (awp->afp != NULL) {
+ DoImmediateFormat (awp->afp, (BaseBlockPtr) cbp);
+ }
}
}
}
@@ -2071,6 +2137,10 @@ NLM_EXTERN void AddCommentBlock (
cbp->string = FFEndPrint(ajp, ffstring, awp->format, 12, 12, 5, 5, "CC");
FFRecycleString(ajp, ffstring);
ffstring = FFGetString(ajp);
+
+ if (awp->afp != NULL) {
+ DoImmediateFormat (awp->afp, (BaseBlockPtr) cbp);
+ }
}
}
}
@@ -2090,6 +2160,10 @@ NLM_EXTERN void AddCommentBlock (
cbp->itemtype = OBJ_SEQDESC;
cbp->first = first;
first = FALSE;
+
+ if (awp->afp != NULL) {
+ DoImmediateFormat (awp->afp, (BaseBlockPtr) cbp);
+ }
}
}
sdp = SeqMgrGetNextDescriptor (bsp, sdp, Seq_descr_comment, &dcontext);
@@ -2129,6 +2203,10 @@ NLM_EXTERN void AddCommentBlock (
cbp->string = FFEndPrint(ajp, ffstring, awp->format, 12, 12, 5, 5, "CC");
FFRecycleString(ajp, ffstring);
ffstring = FFGetString(ajp);
+
+ if (awp->afp != NULL) {
+ DoImmediateFormat (awp->afp, (BaseBlockPtr) cbp);
+ }
}
}
}
@@ -2160,6 +2238,10 @@ NLM_EXTERN void AddCommentBlock (
cbp->string = FFEndPrint(ajp, ffstring, awp->format, 12, 12, 5, 5, "CC");
FFRecycleString(ajp, ffstring);
ffstring = FFGetString(ajp);
+
+ if (awp->afp != NULL) {
+ DoImmediateFormat (awp->afp, (BaseBlockPtr) cbp);
+ }
}
}
}
@@ -2175,6 +2257,10 @@ NLM_EXTERN void AddCommentBlock (
cbp->itemtype = OBJ_SEQDESC;
cbp->first = first;
first = FALSE;
+
+ if (awp->afp != NULL) {
+ DoImmediateFormat (awp->afp, (BaseBlockPtr) cbp);
+ }
}
}
sdp = SeqMgrGetNextDescriptor (bsp, sdp, Seq_descr_comment, &dcontext);
@@ -2189,6 +2275,10 @@ NLM_EXTERN void AddCommentBlock (
cbp->itemtype = OBJ_SEQDESC;
cbp->first = first;
first = FALSE;
+
+ if (awp->afp != NULL) {
+ DoImmediateFormat (awp->afp, (BaseBlockPtr) cbp);
+ }
}
sdp = SeqMgrGetNextDescriptor (bsp, sdp, Seq_descr_maploc, &dcontext);
}
@@ -2203,6 +2293,10 @@ NLM_EXTERN void AddCommentBlock (
cbp->itemtype = OBJ_SEQDESC;
cbp->first = first;
first = FALSE;
+
+ if (awp->afp != NULL) {
+ DoImmediateFormat (awp->afp, (BaseBlockPtr) cbp);
+ }
}
}
sdp = SeqMgrGetNextDescriptor (bsp, sdp, Seq_descr_region, &dcontext);
@@ -2240,6 +2334,10 @@ NLM_EXTERN void AddCommentBlock (
cbp->string = FFEndPrint(ajp, ffstring, awp->format, 12, 12, 5, 5, "CC");
FFRecycleString(ajp, ffstring);
ffstring = FFGetString(ajp);
+
+ if (awp->afp != NULL) {
+ DoImmediateFormat (awp->afp, (BaseBlockPtr) cbp);
+ }
}
}
@@ -2270,6 +2368,10 @@ NLM_EXTERN void AddCommentBlock (
cbp->string = FFEndPrint(ajp, ffstring, awp->format, 12, 12, 5, 5, "CC");
FFRecycleString(ajp, ffstring);
ffstring = FFGetString(ajp);
+
+ if (awp->afp != NULL) {
+ DoImmediateFormat (awp->afp, (BaseBlockPtr) cbp);
+ }
}
} else {
@@ -2298,6 +2400,10 @@ NLM_EXTERN void AddCommentBlock (
cbp->string = FFEndPrint(ajp, ffstring, awp->format, 12, 12, 5, 5, "CC");
FFRecycleString(ajp, ffstring);
ffstring = FFGetString(ajp);
+
+ if (awp->afp != NULL) {
+ DoImmediateFormat (awp->afp, (BaseBlockPtr) cbp);
+ }
}
}
}
@@ -2319,6 +2425,10 @@ NLM_EXTERN void AddCommentBlock (
cbp->itemtype = OBJ_SEQFEAT;
cbp->first = first;
first = FALSE;
+
+ if (awp->afp != NULL) {
+ DoImmediateFormat (awp->afp, (BaseBlockPtr) cbp);
+ }
}
}
sfp = SeqMgrGetNextFeature (parent, sfp, SEQFEAT_COMMENT, 0, &fcontext);
@@ -2348,34 +2458,13 @@ NLM_EXTERN void AddCommentBlock (
cbp->string = FFEndPrint(ajp, ffstring, awp->format, 12, 12, 5, 5, "CC");
FFRecycleString(ajp, ffstring);
ffstring = FFGetString(ajp);
- }
- }
- ValNodeFreeData (head);
-
- if (gsdbcbp != NULL) {
-
- /* if there were no subsequent comments, do not add period after GSDB id */
- if (cbp == NULL) {
- sprintf (buf, "GSDB:S:%ld", (long) gsdbid);
- } else {
- sprintf (buf, "GSDB:S:%ld.", (long) gsdbid);
- }
-
- if (gsdbcbp->first) {
- FFStartPrint (ffstring, awp->format, 0, 12, "COMMENT", 12, 5, 5, "CC", TRUE);
- } else {
- FFStartPrint (ffstring, awp->format, 0, 12, NULL, 12, 5, 5, "CC", FALSE);
+ if (awp->afp != NULL) {
+ DoImmediateFormat (awp->afp, (BaseBlockPtr) cbp);
+ }
}
-
- /* CheckEndPunctuation, ConvertDoubleQuotes, and ExpandTildes already taken into account */
-
- FFAddOneString (ffstring, buf, FALSE, FALSE, TILDE_IGNORE);
-
- gsdbcbp->string = FFEndPrint(ajp, ffstring, awp->format, 12, 12, 5, 5, "CC");
- FFRecycleString(ajp, ffstring);
- ffstring = FFGetString(ajp);
}
+ ValNodeFreeData (head);
FFRecycleString(ajp, ffstring);
}
@@ -2396,27 +2485,31 @@ NLM_EXTERN void AddFeatHeaderBlock (
bbp = Asn2gbAddBlock (awp, FEATHEADER_BLOCK, sizeof (BaseBlock));
if (bbp == NULL) return;
- if (awp->format == FTABLE_FMT) return;
+ if (awp->format != FTABLE_FMT) {
+ ffstring = FFGetString(ajp);
+ if ( ffstring == NULL ) return;
- ffstring = FFGetString(ajp);
- if ( ffstring == NULL ) return;
+ FFStartPrint (ffstring, awp->format, 0, 12, "FEATURES", 21, 5, 0, "FH", TRUE);
- FFStartPrint (ffstring, awp->format, 0, 12, "FEATURES", 21, 5, 0, "FH", TRUE);
+ if (awp->format == EMBL_FMT || awp->format == EMBLPEPT_FMT) {
+ FFAddOneString (ffstring, "Key", FALSE, FALSE, TILDE_IGNORE);
+ FFAddNChar(ffstring, ' ', 13 , FALSE);
+ }
- if (awp->format == EMBL_FMT || awp->format == EMBLPEPT_FMT) {
- FFAddOneString (ffstring, "Key", FALSE, FALSE, TILDE_IGNORE);
- FFAddNChar(ffstring, ' ', 13 , FALSE);
- }
+ FFAddOneString (ffstring, "Location/Qualifiers", FALSE, FALSE, TILDE_TO_SPACES);
- FFAddOneString (ffstring, "Location/Qualifiers", FALSE, FALSE, TILDE_TO_SPACES);
+ if (awp->format == EMBL_FMT || awp->format == EMBLPEPT_FMT) {
+ FFAddNewLine(ffstring);
+ FFAddNewLine(ffstring);
+ }
- if (awp->format == EMBL_FMT || awp->format == EMBLPEPT_FMT) {
- FFAddNewLine(ffstring);
- FFAddNewLine(ffstring);
+ bbp->string = FFEndPrint(ajp, ffstring, awp->format, 12, 21, 5, 0, "FH");
+ FFRecycleString(ajp, ffstring);
}
- bbp->string = FFEndPrint(ajp, ffstring, awp->format, 12, 21, 5, 0, "FH");
- FFRecycleString(ajp, ffstring);
+ if (awp->afp != NULL) {
+ DoImmediateFormat (awp->afp, bbp);
+ }
}
static Uint2 ComputeSourceHash (
@@ -3269,6 +3362,10 @@ NLM_EXTERN void AddSourceFeatBlock (
}
FFRecycleString(ajp, ffstring);
+ if (awp->afp != NULL) {
+ DoImmediateFormat (awp->afp, (BaseBlockPtr) bbp);
+ }
+
/* optionally populate gbseq for XML-ized GenBank format */
if (gbseq != NULL) {
@@ -3402,6 +3499,15 @@ NLM_EXTERN void AddSourceFeatBlock (
awp->blockList = vnp;
}
FFRecycleString(ajp, ffstring);
+
+ if (awp->afp != NULL) {
+ for (vnp = head; vnp != NULL; vnp = vnp->next) {
+ isp = (IntSrcBlockPtr) vnp->data.ptrvalue;
+ if (isp == NULL) continue;
+ DoImmediateFormat (awp->afp, (BaseBlockPtr) isp);
+ }
+ }
+
}
static Boolean IsCDD (
@@ -3535,6 +3641,10 @@ static void GetFeatsOnCdsProduct (
ifp->mapToPep = FALSE;
ifp->firstfeat = awp->firstfeat;
awp->firstfeat = FALSE;
+
+ if (awp->afp != NULL) {
+ DoImmediateFormat (awp->afp, (BaseBlockPtr) fbp);
+ }
}
}
@@ -3992,6 +4102,11 @@ static Boolean LIBCALLBACK GetFeatsOnBioseq (
awp->firstfeat = FALSE;
awp->featseen = TRUE;
+
+ if (awp->afp != NULL) {
+ DoImmediateFormat (awp->afp, (BaseBlockPtr) fbp);
+ }
+
/* optionally map CDS from cDNA onto genomic */
if (awp->isGPS && ISA_na (bsp->mol) && awp->copyGpsCdsUp &&
@@ -4016,6 +4131,10 @@ static Boolean LIBCALLBACK GetFeatsOnBioseq (
ifp->mapToPep = FALSE;
ifp->firstfeat = awp->firstfeat;
awp->firstfeat = FALSE;
+
+ if (awp->afp != NULL) {
+ DoImmediateFormat (awp->afp, (BaseBlockPtr) fbp);
+ }
}
}
}
@@ -4218,6 +4337,10 @@ NLM_EXTERN void AddFeatureBlock (
ifp->isCDS = TRUE;
ifp->firstfeat = awp->firstfeat;
awp->firstfeat = FALSE;
+
+ if (awp->afp != NULL) {
+ DoImmediateFormat (awp->afp, (BaseBlockPtr) fbp);
+ }
}
}
}
@@ -4268,6 +4391,10 @@ NLM_EXTERN void AddFeatureBlock (
ifp->isCDS = TRUE;
ifp->firstfeat = awp->firstfeat;
awp->firstfeat = FALSE;
+
+ if (awp->afp != NULL) {
+ DoImmediateFormat (awp->afp, (BaseBlockPtr) fbp);
+ }
}
}
prot = SeqMgrGetPROTgivenProduct (bsp, &fcontext);
@@ -4298,6 +4425,10 @@ NLM_EXTERN void AddFeatureBlock (
ifp->mapToPep = TRUE;
ifp->firstfeat = awp->firstfeat;
awp->firstfeat = FALSE;
+
+ if (awp->afp != NULL) {
+ DoImmediateFormat (awp->afp, (BaseBlockPtr) fbp);
+ }
}
}
}
diff --git a/api/asn2gnb4.c b/api/asn2gnb4.c
index 7788e925..6b60cc6a 100644
--- a/api/asn2gnb4.c
+++ b/api/asn2gnb4.c
@@ -30,7 +30,7 @@
*
* Version Creation Date: 10/21/98
*
-* $Revision: 1.11 $
+* $Revision: 1.18 $
*
* File Description: New GenBank flatfile generator - work in progress
*
@@ -135,6 +135,7 @@ static FtQualType feat_qual_order [] = {
FTQUAL_organism,
FTQUAL_label,
FTQUAL_cds_product,
+ FTQUAL_extra_products,
FTQUAL_protein_id,
FTQUAL_transcript_id,
FTQUAL_db_xref,
@@ -218,6 +219,7 @@ static FeaturQual asn2gnbk_featur_quals [ASN2GNBK_TOTAL_FEATUR] = {
{ "evidence", Qual_class_evidence },
{ "exception", Qual_class_string },
{ "exception_note", Qual_class_string },
+ { "product", Qual_class_valnode },
{ "figure", Qual_class_string },
{ "frequency", Qual_class_quote },
{ "function", Qual_class_quote },
@@ -1347,13 +1349,9 @@ static CharPtr validRefSeqExceptionString [] = {
"RNA editing",
"reasons given in citation",
"ribosomal slippage",
- "ribosome slippage",
- "trans splicing",
"trans-splicing",
"alternative processing",
- "alternate processing",
"artificial frameshift",
- "non-consensus splice site",
"nonconsensus splice site",
"rearrangement required for product",
"modified codon recognition",
@@ -1431,7 +1429,7 @@ NLM_EXTERN Int2 ValidateAccn (
if (numAlpha == 3 && numDigits == 5) return 0;
if (numAlpha == 4 && numDigits == 8) return 0;
} else if (numUndersc == 1) {
- if (numAlpha != 2 || (numDigits != 6 && numDigits != 8)) return -2;
+ if (numAlpha != 2 || (numDigits != 6 && numDigits != 8 && numDigits != 9)) return -2;
if (accession [0] == 'N' || accession [0] == 'X' || accession [0] == 'Z') {
if (accession [1] == 'M' ||
accession [1] == 'C' ||
@@ -1575,20 +1573,27 @@ NLM_EXTERN CharPtr goFieldType [] = {
};
static CharPtr GetGOtext (
- UserFieldPtr topufp
+ UserFieldPtr topufp,
+ IntAsn2gbJobPtr ajp
)
{
- CharPtr evidence = NULL;
- Char gid [32];
- CharPtr goid = NULL;
- Int2 j;
- ObjectIdPtr oip;
- Int4 pmid = 0;
- CharPtr str;
- CharPtr textstr = NULL;
- Char tmp [32];
- UserFieldPtr ufp;
+ CharPtr evidence = NULL;
+ StringItemPtr ffstring;
+ Char gid [32];
+ CharPtr goid = NULL;
+ Boolean is_www;
+ Int2 j;
+ ObjectIdPtr oip;
+ Int4 pmid = 0;
+ CharPtr ptr;
+ CharPtr str;
+ CharPtr textstr = NULL;
+ Char tmp [32];
+ UserFieldPtr ufp;
+
+ if (topufp == NULL || ajp == NULL) return NULL;
+ is_www = GetWWW (ajp);
for (ufp = topufp; ufp != NULL; ufp = ufp->next) {
oip = ufp->label;
@@ -1627,13 +1632,31 @@ static CharPtr GetGOtext (
}
/* if (StringHasNoText (textstr)) return NULL; */
- str = (CharPtr) MemNew (StringLen (textstr) + StringLen (goid) + StringLen (evidence) + 50);
+ str = (CharPtr) MemNew (StringLen (textstr) + StringLen (goid) + StringLen (evidence) + StringLen (link_go) + 80);
if (str == NULL) return NULL;
StringCpy (str, textstr);
if (! StringHasNoText (goid)) {
StringCat (str, " [goid ");
- StringCat (str, goid);
+ if (is_www) {
+ ffstring = FFGetString (ajp);
+ if (ffstring != NULL) {
+ FFAddOneString(ffstring, "<a href=", FALSE, FALSE, TILDE_IGNORE);
+ FFAddOneString(ffstring, link_go, FALSE, FALSE, TILDE_IGNORE);
+ FFAddOneString(ffstring, goid, FALSE, FALSE, TILDE_IGNORE);
+ FFAddOneChar(ffstring, '>', FALSE);
+ FFAddOneString(ffstring, goid, FALSE, FALSE, TILDE_IGNORE);
+ FFAddOneString(ffstring, "</a>", FALSE, FALSE, TILDE_IGNORE);
+ ptr = FFToCharPtr (ffstring);
+ FFRecycleString (ajp, ffstring);
+ StringCat (str, ptr);
+ MemFree (ptr);
+ } else {
+ StringCat (str, goid);
+ }
+ } else {
+ StringCat (str, goid);
+ }
StringCat (str, "]");
}
if (! StringHasNoText (evidence)) {
@@ -1751,6 +1774,26 @@ static void LIBCALLBACK SaveGBSeqSequence (
*tmpp = tmp;
}
+static int LIBCALLBACK SortVnpByInt (VoidPtr ptr1, VoidPtr ptr2)
+
+{
+ ValNodePtr vnp1;
+ ValNodePtr vnp2;
+
+ if (ptr1 == NULL || ptr2 == NULL) return 0;
+ vnp1 = *((ValNodePtr PNTR) ptr1);
+ vnp2 = *((ValNodePtr PNTR) ptr2);
+ if (vnp1 == NULL || vnp2 == NULL) return 0;
+
+ if (vnp1->data.intvalue > vnp2->data.intvalue) {
+ return 1;
+ } else if (vnp1->data.intvalue < vnp2->data.intvalue) {
+ return -1;
+ }
+
+ return 0;
+}
+
static void FormatFeatureBlockQuals (
StringItemPtr ffstring,
IntAsn2gbJobPtr ajp,
@@ -1784,9 +1827,7 @@ static void FormatFeatureBlockQuals (
CodeBreakPtr cbp;
Char ch;
Uint1 choice;
- /*
- Uint1 code = Seq_code_ncbieaa;
- */
+ ValNodePtr citlist;
Int4 gi;
Boolean hadProtDesc = FALSE;
DbtagPtr dbt;
@@ -1823,9 +1864,6 @@ static void FormatFeatureBlockQuals (
SeqIdPtr sip;
SeqLocPtr slp;
Boolean split;
- /*
- SeqPortPtr spp;
- */
CharPtr start;
CharPtr str;
Boolean suppress_period;
@@ -2525,15 +2563,24 @@ static void FormatFeatureBlockQuals (
case Qual_class_pubset :
vnp = qvp [idx].vnp;
if (vnp != NULL && asp != NULL && asp->referenceArray != NULL) {
+ citlist = NULL;
for (ppr = vnp->data.ptrvalue; ppr != NULL; ppr = ppr->next) {
j = MatchRef (ppr, asp->referenceArray, asp->numReferences);
if (j > 0) {
+ ValNodeAddInt (&citlist, 0, (Int4) j);
+ }
+ }
+ citlist = ValNodeSort (citlist, SortVnpByInt);
+ for (vnp = citlist; vnp != NULL; vnp = vnp->next) {
+ j = (Int2) vnp->data.intvalue;
+ if (j > 0) {
sprintf (numbuf, "%d", (int) j);
FFAddTextToString(ffstring, "/citation=[", numbuf, "]",
FALSE, TRUE, TILDE_TO_SPACES);
FFAddOneChar(ffstring, '\n', FALSE);
}
}
+ citlist = ValNodeFree (citlist);
}
break;
@@ -2677,7 +2724,7 @@ static void FormatFeatureBlockQuals (
ajp->relModeError = TRUE;
}
} else {
- sip = GetSeqIdForGI(gi);
+ sip = GetSeqIdForGI (gi);
if (sip != NULL && SeqIdWrite (sip, seqid, PRINTID_TEXTID_ACC_VER, sizeof (seqid)) != NULL) {
if ((! ajp->flags.dropIllegalQuals) || ValidateAccn (seqid) == 0) {
FFAddTextToString(ffstring, "/", asn2gnbk_featur_quals [idx].name, "=\"",
@@ -2777,29 +2824,6 @@ static void FormatFeatureBlockQuals (
FFAddOneChar(ffstring, '\n', FALSE);
}
MemFree (str);
- /*
- spp = SeqPortNewByLoc (sfp->product, code);
- if (spp != NULL) {
- SeqPortSet_do_virtual (spp, TRUE);
- while ((residue = SeqPortGetResidue (spp)) != SEQPORT_EOF) {
- if (! (IS_residue (residue))) continue;
- if (residue == INVALID_RESIDUE) {
- residue = (Uint1) 'X';
- }
- *protein_seq = residue;
- protein_seq++;
- }
- if (! StringHasNoText (str)) {
- FFAddTextToString(ffstring, "/translation=\"", str, "\"",
- FALSE, TRUE, TILDE_TO_SPACES);
- FFAddOneChar(ffstring, '\n', FALSE);
- }
- MemFree (str);
- } else {
- ajp->relModeError = TRUE;
- }
- SeqPortFree (spp);
- */
} else {
ajp->relModeError = TRUE;
}
@@ -2876,7 +2900,7 @@ static void FormatFeatureBlockQuals (
for (entry = qvp [jdx].ufp; entry != NULL; entry = entry->next) {
if (entry == NULL || entry->choice != 11) break;
ufp = (UserFieldPtr) entry->data.ptrvalue;
- str = GetGOtext (ufp);
+ str = GetGOtext (ufp, ajp);
if (! StringHasNoText (str)) {
FFAddTextToString(ffstring, "/", asn2gnbk_featur_quals[jdx].name, "=",
FALSE, TRUE, TILDE_IGNORE);
@@ -2997,7 +3021,7 @@ static void FormatFeatureBlockQuals (
for (entry = qvp [jdx].ufp; entry != NULL; entry = entry->next) {
if (entry == NULL || entry->choice != 11) break;
ufp = (UserFieldPtr) entry->data.ptrvalue;
- str = GetGOtext (ufp);
+ str = GetGOtext (ufp, ajp);
if (! StringHasNoText (str)) {
if (StringCmp (prefix, "; ") == 0) {
prefix = ";\n";
@@ -3241,7 +3265,7 @@ static void FormatFeatureBlockQuals (
FALSE, FALSE, TILDE_IGNORE);
}
} else {
- sip = GetSeqIdForGI(gi);
+ sip = GetSeqIdForGI (gi);
if (sip != NULL && SeqIdWrite (sip, seqid, PRINTID_TEXTID_ACC_VER, sizeof (seqid)) != NULL) {
if ((! ajp->flags.dropIllegalQuals) || ValidateAccn (seqid) == 0) {
FFAddTextToString(unique, prefix, "transcript found in: ", seqid,
@@ -4264,7 +4288,11 @@ static CharPtr FormatFeatureBlockEx (
if (vnp != NULL && (! StringHasNoText ((CharPtr) vnp->data.ptrvalue))) {
qvp [FTQUAL_cds_product].str = (CharPtr) vnp->data.ptrvalue;
vnp = vnp->next;
- qvp [FTQUAL_prot_names].vnp = vnp;
+ if (ajp->flags.extraProductsToNote) {
+ qvp [FTQUAL_prot_names].vnp = vnp;
+ } else {
+ qvp [FTQUAL_extra_products].vnp = vnp;
+ }
}
qvp [FTQUAL_prot_desc].str = prp->desc;
qvp [FTQUAL_prot_activity].vnp = prp->activity;
@@ -4998,8 +5026,10 @@ NLM_EXTERN CharPtr FormatFeatureBlock (
BioseqPtr bsp;
SeqMgrFeatContext fcontext;
FmtType format;
+ ValNodePtr head;
QualValPtr qvp;
SeqFeatPtr sfp;
+ CharPtr str;
BioseqPtr target;
if (afp == NULL || bbp == NULL) return NULL;
@@ -5019,10 +5049,70 @@ NLM_EXTERN CharPtr FormatFeatureBlock (
sfp = SeqMgrGetDesiredFeature (bbp->entityID, NULL, bbp->itemID, 0, NULL, &fcontext);
if (sfp == NULL) return NULL;
+ /* five-column feature table uses special code for formatting */
+
+ if (ajp->format == FTABLE_FMT) {
+ head = NULL;
+ PrintFtableLocAndQuals (ajp, &head, target, sfp, &fcontext);
+ str = MergeFFValNodeStrs (head);
+ ValNodeFreeData (head);
+ return str;
+ }
+
+ /* otherwise do regular flatfile formatting */
+
return FormatFeatureBlockEx (ajp, asp, bsp, target, sfp, &fcontext, qvp,
format, (IntFeatBlockPtr) bbp, ISA_aa (bsp->mol), TRUE);
}
+NLM_EXTERN CharPtr FormatFeatHeaderBlock (
+ Asn2gbFormatPtr afp,
+ BaseBlockPtr bbp
+)
+
+{
+ IntAsn2gbJobPtr ajp;
+ Asn2gbSectPtr asp;
+ BioseqPtr bsp;
+ Char id [64];
+ SeqIdPtr sip;
+ SeqIdPtr sip2;
+ CharPtr str = NULL;
+ BioseqPtr target;
+ Char tmp [53];
+
+ if (afp == NULL || bbp == NULL) return NULL;
+ ajp = afp->ajp;
+ if (ajp == NULL) return NULL;
+ asp = afp->asp;
+ if (asp == NULL) return NULL;
+ target = asp->target;
+ bsp = asp->bsp;
+ if (target == NULL || bsp == NULL) return NULL;
+
+ /* five-column feature table uses special code for formatting */
+
+ if (ajp->format == FTABLE_FMT) {
+ sip = SeqIdFindBest (target->id, 0);
+ if (sip != NULL && sip->choice == SEQID_GI) {
+ sip2 = GetSeqIdForGI (sip->data.intvalue);
+ if (sip2 != NULL) {
+ sip = sip2;
+ }
+ }
+ SeqIdWrite (sip, id, PRINTID_FASTA_LONG, sizeof (id) - 1);
+ if (! StringHasNoText (id)) {
+ sprintf (tmp, ">Feature %s\n", id);
+ str = StringSave (tmp);
+ }
+ return str;
+ }
+
+ /* otherwise do regular flatfile formatting */
+
+ return StringSaveNoNull (bbp->string);
+}
+
/* stand alone function to produce qualifiers in genbank style */
diff --git a/api/asn2gnb5.c b/api/asn2gnb5.c
index 1f4fcab0..bcc04ce7 100644
--- a/api/asn2gnb5.c
+++ b/api/asn2gnb5.c
@@ -30,7 +30,7 @@
*
* Version Creation Date: 10/21/98
*
-* $Revision: 1.11 $
+* $Revision: 1.16 $
*
* File Description: New GenBank flatfile generator - work in progress
*
@@ -168,7 +168,7 @@ static Char link_niaest [MAX_WWWBUF];
#define DEF_LINK_NIAEST "http://lgsun.grc.nia.nih.gov/cgi-bin/pro3?sname1="
static Char link_worm_base [MAX_WWWBUF];
-#define DEF_LINK_WORM_BASE "http://www.wormbase.org/db/get?class=Sequence;name="
+#define DEF_LINK_WORM_BASE "http://www.wormbase.org/db/gene/gene?class=CDS;name="
static Char link_worfdb [MAX_WWWBUF];
#define DEF_LINK_WORFDB "http://worfdb.dfci.harvard.edu/search.pl?form=1&search="
@@ -212,12 +212,28 @@ static Char link_rebase [MAX_WWWBUF];
NLM_EXTERN Char link_encode [MAX_WWWBUF];
#define DEF_LINK_ENCODE "http://www.nhgri.nih.gov/10005107"
-NLM_EXTERN Char link_pgn [MAX_WWWBUF];
+static Char link_pgn [MAX_WWWBUF];
#define DEF_LINK_PGN "http://pgn.cornell.edu/cgi-bin/search/seq_search_result.pl?identifier="
-NLM_EXTERN Char link_subtilist [MAX_WWWBUF];
+static Char link_subtilist [MAX_WWWBUF];
#define DEF_LINK_SUBTILIST "http://genolist.pasteur.fr/SubtiList/genome.cgi?external_query+"
+NLM_EXTERN Char link_go [MAX_WWWBUF];
+#define DEF_LINK_GO "http://db.yeastgenome.org/cgi-bin/SGD/GO/go.pl?goid="
+
+static Char link_hinvdb [MAX_WWWBUF];
+#define DEF_LINK_HINVDB "http://www.h-invitational.jp"
+
+static Char link_hinvdbhit [MAX_WWWBUF];
+#define DEF_LINK_HINVDBHIT "http://www.jbirc.aist.go.jp/hinv/hinvsys/servlet/ExecServlet?KEN_INDEX=0&KEN_TYPE=30&KEN_STR="
+
+static Char link_hinvdbhix [MAX_WWWBUF];
+#define DEF_LINK_HINVDBHIX "http://www.jbirc.aist.go.jp/hinv/hinvsys/servlet/ExecServlet?KEN_INDEX=0&KEN_TYPE=31&KEN_STR="
+
+static Char link_asap [MAX_WWWBUF];
+#define DEF_LINK_ASAP "https://asap.ahabs.wisc.edu/annotation/php/feature_info.php?FeatureID="
+
+
/* www utility functions */
NLM_EXTERN Boolean GetWWW (IntAsn2gbJobPtr ajp) {
@@ -293,6 +309,11 @@ NLM_EXTERN void InitWWW (IntAsn2gbJobPtr ajp)
GetAppParam ("NCBI", "WWWENTREZ", "LINK_ENCODE", DEF_LINK_ENCODE, link_encode, MAX_WWWBUF);
GetAppParam ("NCBI", "WWWENTREZ", "LINK_PGN", DEF_LINK_PGN, link_pgn, MAX_WWWBUF);
GetAppParam ("NCBI", "WWWENTREZ", "LINK_SUBTILIST", DEF_LINK_SUBTILIST, link_subtilist, MAX_WWWBUF);
+ GetAppParam ("NCBI", "WWWENTREZ", "LINK_GO", DEF_LINK_GO, link_go, MAX_WWWBUF);
+ GetAppParam ("NCBI", "WWWENTREZ", "LINK_HINVDB", DEF_LINK_HINVDB, link_hinvdb, MAX_WWWBUF);
+ GetAppParam ("NCBI", "WWWENTREZ", "LINK_HINVDBHIT", DEF_LINK_HINVDBHIT, link_hinvdbhit, MAX_WWWBUF);
+ GetAppParam ("NCBI", "WWWENTREZ", "LINK_HINVDBHIX", DEF_LINK_HINVDBHIX, link_hinvdbhix, MAX_WWWBUF);
+ GetAppParam ("NCBI", "WWWENTREZ", "LINK_ASAP", DEF_LINK_ASAP, link_asap, MAX_WWWBUF);
}
@@ -498,6 +519,24 @@ static void FF_www_db_xref_rebase (
FFAddTextToString(ffstring, ".html>", identifier, "</a>", FALSE, FALSE, TILDE_IGNORE);
}
+static void FF_www_db_xref_hinvdb (
+ StringItemPtr ffstring,
+ CharPtr db,
+ CharPtr identifier
+)
+{
+ CharPtr link = link_hinvdb;
+
+ if ( StringStr(identifier, "HIT") != NULL ) {
+ link = link_hinvdbhit;
+ }
+ if ( StringStr(identifier, "HIX") != NULL ) {
+ link = link_hinvdbhix;
+ }
+
+ FF_www_db_xref_std(ffstring, db, identifier, link);
+}
+
static void Do_www_db_xref(
IntAsn2gbJobPtr ajp,
@@ -587,6 +626,12 @@ static void Do_www_db_xref(
FF_www_db_xref_std(ffstring, db, identifier, link_pgn);
} else if ( StringCmp(db , "SubtiList") == 0) {
FF_www_db_xref_std(ffstring, db, identifier, link_subtilist);
+ } else if ( StringCmp(db , "GO") == 0) {
+ FF_www_db_xref_std(ffstring, db, identifier, link_go);
+ } else if ( StringCmp(db , "H-InvDB") == 0) {
+ FF_www_db_xref_hinvdb(ffstring, db, identifier);
+ } else if ( StringCmp(db , "ASAP") == 0) {
+ FF_www_db_xref_std(ffstring, db, identifier, link_asap);
} else {
/* default: no link just the text */
@@ -3001,6 +3046,7 @@ NLM_EXTERN CharPtr FormatReferenceBlock (
Int4 gibbsq;
GBReferencePtr gbref = NULL;
GBSeqPtr gbseq;
+ ValNodePtr head;
Int2 i;
ImprintPtr imp;
IndxPtr index;
@@ -3030,6 +3076,7 @@ NLM_EXTERN CharPtr FormatReferenceBlock (
CharPtr str;
Boolean strict_isojta;
CharPtr suffix = NULL;
+ BioseqPtr target;
CharPtr tmp;
Boolean trailingPeriod = TRUE;
ValNodePtr vnp;
@@ -3041,8 +3088,33 @@ NLM_EXTERN CharPtr FormatReferenceBlock (
if (ajp == NULL) return NULL;
asp = afp->asp;
if (asp == NULL) return NULL;
+ target = asp->target;
bsp = asp->bsp;
- if (bsp == NULL) return NULL;
+ if (target == NULL || bsp == NULL) return NULL;
+
+ /* five-column feature table uses special code for formatting */
+
+ if (ajp->format == FTABLE_FMT) {
+ irp = (IntRefBlockPtr) bbp;
+ if (irp->loc != NULL) {
+ if (irp->rb.pmid != 0 || irp->rb.muid != 0) {
+ head = NULL;
+ PrintFtableIntervals (&head, target, irp->loc, "REFERENCE");
+ if (irp->rb.pmid != 0) {
+ sprintf (buf, "\t\t\tpmid\t%ld\n", (long) irp->rb.pmid);
+ ValNodeCopyStr (&head, 0, buf);
+ } else if (irp->rb.muid != 0) {
+ sprintf (buf, "\t\t\tmuid\t%ld\n", (long) irp->rb.muid);
+ ValNodeCopyStr (&head, 0, buf);
+ }
+ str = MergeFFValNodeStrs (head);
+ ValNodeFreeData (head);
+ }
+ }
+ return str;
+ }
+
+ /* otherwise do regular flatfile formatting */
ffstring = FFGetString(ajp);
if ( ffstring == NULL ) return NULL;
diff --git a/api/asn2gnb6.c b/api/asn2gnb6.c
index 462a94e0..a3ebfde5 100644
--- a/api/asn2gnb6.c
+++ b/api/asn2gnb6.c
@@ -30,7 +30,7 @@
*
* Version Creation Date: 10/21/98
*
-* $Revision: 1.9 $
+* $Revision: 1.23 $
*
* File Description: New GenBank flatfile generator - work in progress
*
@@ -350,6 +350,7 @@ NLM_EXTERN SourceType subSourceToSourceIdx [30] = {
NLM_EXTERN CharPtr legalDbXrefs [] = {
"PIDe", "PIDd", "PIDg", "PID",
"AceView/WormGenes",
+ "ASAP",
"ATCC",
"ATCC(in host)",
"ATCC(dna)",
@@ -364,6 +365,7 @@ NLM_EXTERN CharPtr legalDbXrefs [] = {
"ENSEMBL",
"ESTLIB",
"FANTOM_DB",
+ "FlyBase",
"FLYBASE",
"GABI",
"GDB",
@@ -372,6 +374,7 @@ NLM_EXTERN CharPtr legalDbXrefs [] = {
"GI",
"GO",
"GOA",
+ "H-InvDB",
"IFO",
"IMGT/LIGM",
"IMGT/HLA",
@@ -1837,10 +1840,10 @@ NLM_EXTERN CharPtr FFFlatLoc (
CheckSeqLocForPartial (location, &noLeft, &noRight);
hasNulls = LocationHasNullsBetween (location);
- loc = SeqLocMerge (bsp, location, NULL, FALSE, TRUE, hasNulls);
+ loc = SeqLocMergeEx (bsp, location, NULL, FALSE, TRUE, FALSE, hasNulls);
if (loc == NULL) {
tmp = TrimLocInSegment (bsp, location, &noLeft, &noRight);
- loc = SeqLocMerge (bsp, tmp, NULL, FALSE, TRUE, hasNulls);
+ loc = SeqLocMergeEx (bsp, tmp, NULL, FALSE, TRUE, FALSE, hasNulls);
SeqLocFree (tmp);
}
if (loc == NULL) {
@@ -2397,6 +2400,7 @@ NLM_EXTERN CharPtr FormatSourceFeatBlock (
SeqFeatPtr sfp = NULL;
SubSourcePtr ssp;
CharPtr str;
+ BioseqPtr target;
CharPtr taxname = NULL;
ValNodePtr vnp;
StringItemPtr ffstring, unique;
@@ -2406,8 +2410,9 @@ NLM_EXTERN CharPtr FormatSourceFeatBlock (
if (ajp == NULL) return NULL;
asp = afp->asp;
if (asp == NULL) return NULL;
+ target = asp->target;
bsp = asp->bsp;
- if (bsp == NULL) return NULL;
+ if (target == NULL || bsp == NULL) return NULL;
qvp = afp->qvp;
if (qvp == NULL) return NULL;
@@ -2417,6 +2422,15 @@ NLM_EXTERN CharPtr FormatSourceFeatBlock (
gbseq = NULL;
}
+ /* five-column feature table uses special code for formatting */
+
+ if (ajp->format == FTABLE_FMT) {
+ str = FormatFtableSourceFeatBlock (bbp, target);
+ return str;
+ }
+
+ /* otherwise do regular flatfile formatting */
+
if (! StringHasNoText (bbp->string)) return StringSave (bbp->string);
isp = (IntSrcBlockPtr) bbp;
@@ -2925,90 +2939,6 @@ NLM_EXTERN CharPtr FormatSourceFeatBlock (
return str;
}
-static void CountBases (
-)
-
-{
-
-}
-
-
-static Boolean CountBasesByRead (
- IntAsn2gbJobPtr ajp,
- BioseqPtr bsp,
- Int4Ptr base_count
-)
-
-{
- Byte bases [400];
- Uint1 code = Seq_code_iupacna;
- Int2 ctr;
- Int2 i;
- Int4 len;
- Uint1 residue;
- SeqPortPtr spp = NULL;
- Int4 total = 0;
-
- if (ISA_aa (bsp->mol)) {
- code = Seq_code_ncbieaa;
- }
-
- if (ajp->ajp.slp != NULL) {
- spp = SeqPortNewByLoc (ajp->ajp.slp, code);
- len = SeqLocLen (ajp->ajp.slp);
- } else {
- spp = SeqPortNew (bsp, 0, -1, 0, code);
- len = bsp->length;
- }
- if (spp == NULL) return FALSE;
- if (bsp->repr == Seq_repr_delta || bsp->repr == Seq_repr_virtual) {
- SeqPortSet_do_virtual (spp, TRUE);
- }
-
- /* use SeqPortRead rather than SeqPortGetResidue for faster performance */
-
- ctr = SeqPortRead (spp, bases, sizeof (bases));
- i = 0;
- residue = (Uint1) bases [i];
- while (residue != SEQPORT_EOF) {
- if (IS_residue (residue)) {
- total++;
- switch (residue) {
- case 'A' :
- (base_count [0])++;
- break;
- case 'C' :
- (base_count [1])++;
- break;
- case 'G' :
- (base_count [2])++;
- break;
- case 'T' :
- (base_count [3])++;
- break;
- default :
- (base_count [4])++;
- break;
- }
- }
- i++;
- if (i >= ctr) {
- i = 0;
- ctr = SeqPortRead (spp, bases, sizeof (bases));
- if (ctr < 0) {
- bases [0] = -ctr;
- } else if (ctr < 1) {
- bases [0] = SEQPORT_EOF;
- }
- }
- residue = (Uint1) bases [i];
- }
-
- SeqPortFree (spp);
-
- return TRUE;
-}
-
static void LIBCALLBACK CountBasesByStream (
CharPtr sequence,
Pointer userdata
@@ -3082,7 +3012,7 @@ NLM_EXTERN CharPtr FormatBasecountBlock (
if (ajp->ajp.slp != NULL) {
len = SeqLocLen (ajp->ajp.slp);
- if (! CountBasesByRead (ajp, bsp, base_count)) return NULL;
+ SeqPortStreamLoc (ajp->ajp.slp, STREAM_EXPAND_GAPS, (Pointer) base_count, CountBasesByStream);
} else {
len = bsp->length;
SeqPortStream (bsp, STREAM_EXPAND_GAPS, (Pointer) base_count, CountBasesByStream);
@@ -3393,13 +3323,16 @@ NLM_EXTERN CharPtr FormatContigBlock (
Asn2gbSectPtr asp;
BioseqPtr bsp;
DeltaSeqPtr dsp;
+ IntFuzzPtr fuzz;
GBSeqPtr gbseq;
SeqLitPtr litp;
CharPtr prefix = NULL;
Boolean segWithParts = FALSE;
SeqLocPtr slp_head = NULL;
CharPtr str;
- Char val [20];
+ Char tmp [16];
+ Boolean unknown;
+ Char val [32];
StringItemPtr ffstring;
/* CharPtr label;*/
@@ -3457,10 +3390,21 @@ NLM_EXTERN CharPtr FormatContigBlock (
/* don't know what to do here */
}
} else {
+ unknown = FALSE;
+ fuzz = litp->fuzz;
+ if (fuzz != NULL && fuzz->choice == 4 && fuzz->a == 0) {
+ if (! ajp->flags.forGbRelease)
+ unknown = TRUE;
+ }
+ if (unknown && litp->length > 0) {
+ sprintf (tmp, "unk%ld", (long) litp->length);
+ } else {
+ sprintf (tmp, "%ld", (long) litp->length);
+ }
if (prefix != NULL) {
- sprintf (val, "%sgap(%ld)", prefix, (long) litp->length);
+ sprintf (val, "%sgap(%s)", prefix, tmp);
} else {
- sprintf (val, "gap(%ld)", (long) litp->length);
+ sprintf (val, "gap(%s)", tmp);
}
FFAddOneString(ffstring, val, FALSE, FALSE, TILDE_IGNORE);
}
@@ -3524,25 +3468,20 @@ NLM_EXTERN CharPtr FormatSequenceBlock (
{
IntAsn2gbJobPtr ajp;
Asn2gbSectPtr asp;
- Byte bases [400];
Int2 blk;
BioseqPtr bsp;
+ Bioseq bsq;
Char buf [80];
Char ch;
- Int2 cnt = 0;
- Uint1 code = Seq_code_iupacna;
Int2 count;
- Int2 ctr;
GBSeqPtr gbseq;
- Int2 i;
IntAsn2gbSectPtr iasp;
- Boolean is_na;
Int2 lin;
- Int4 pos;
+ SeqLocPtr loc;
CharPtr ptr;
- Uint1 residue;
SeqBlockPtr sbp;
- SeqPortPtr spp;
+ SeqLoc sl;
+ SeqLocPtr slp;
Int4 start;
Int4 stop;
CharPtr str;
@@ -3589,212 +3528,109 @@ NLM_EXTERN CharPtr FormatSequenceBlock (
/* replace SeqPort with improved SeqPortStream */
- if (ajp->ajp.slp == NULL && sbp->bases == NULL) {
+ if (sbp->bases == NULL) {
start = sbp->start;
stop = sbp->stop;
if (stop > start) {
str = MemNew (sizeof (Char) * (stop - start + 3));
if (str != NULL) {
- tmp = str;
- SeqPortStreamInt (bsp, start, stop - 1, Seq_strand_plus, STREAM_EXPAND_GAPS, (Pointer) &tmp, SaveGBSeqSequence);
+ if (ajp->ajp.slp != NULL) {
+ slp = ajp->ajp.slp;
+ MemSet ((Pointer) &bsq, 0, sizeof (Bioseq));
+ MemSet ((Pointer) &sl, 0, sizeof (SeqLoc));
+ bsq.repr = Seq_repr_seg;
+ bsq.mol = bsp->mol;
+ bsq.seq_ext_type = 1;
+ bsq.length = SeqLocLen (slp);
+ bsq.seq_ext = &sl;
+ if (slp->choice == SEQLOC_MIX) {
+ loc = (SeqLocPtr) slp->data.ptrvalue;
+ if (loc != NULL) {
+ sl.choice = loc->choice;
+ sl.data.ptrvalue = (Pointer) loc->data.ptrvalue;
+ sl.next = loc->next;
+ }
+ } else {
+ sl.choice = slp->choice;
+ sl.data.ptrvalue = (Pointer) slp->data.ptrvalue;
+ sl.next = NULL;
+ }
+ SeqPortStreamInt (&bsq, start, stop - 1, Seq_strand_plus, STREAM_EXPAND_GAPS, (Pointer) str, NULL);
+ } else {
+ SeqPortStreamInt (bsp, start, stop - 1, Seq_strand_plus, STREAM_EXPAND_GAPS, (Pointer) str, NULL);
+ }
sbp->bases = str;
}
}
}
- /* if subsequence cached with SeqPortStream, use it */
+ if (sbp->bases == NULL) return NULL;
- if (sbp->bases != NULL) {
- ffstring = FFGetString (ajp);
-
- start = sbp->start;
- stop = sbp->stop;
-
- count = 0;
- blk = 0;
- lin = 0;
-
- ptr = sbp->bases;
- ch = *ptr;
-
- while (ch != '\0') {
- buf [count] = (Char) (TO_LOWER (ch));
- count++;
- ptr++;
- ch = *ptr;
-
- blk++;
- lin++;
- if (lin >= 60) {
-
- buf [count] = '\0';
- PrintSeqLine (ffstring, afp->format, buf, start, start + cnt);
- count = 0;
- cnt = 0;
- blk = 0;
- lin = 0;
- start += 60;
-
- } else if (blk >= 10) {
-
- buf [count] = ' ';
- count++;
- blk = 0;
-
- }
- }
-
- buf [count] = '\0';
- if (count > 0) {
- PrintSeqLine (ffstring, afp->format, buf, start, start + cnt);
- }
+ /* format subsequence cached with SeqPortStream */
- str = FFToCharPtr(ffstring);
-
- FFRecycleString (ajp, ffstring);
- return str;
- }
-
- /* otherwise split into smaller blocks */
-
- spp = iasp->spp;
- if (spp == NULL) {
-
- /* if first time, create SeqPort for this section */
-
- if (ISA_aa (bsp->mol)) {
- if (ajp->flags.iupacaaOnly) {
- code = Seq_code_iupacaa;
- } else {
- code = Seq_code_ncbieaa;
- }
- }
-
- if (ajp->ajp.slp != NULL) {
- spp = SeqPortNewByLoc (ajp->ajp.slp, code);
- } else {
- spp = SeqPortNew (bsp, 0, -1, 0, code);
- }
- if (spp == NULL) return NULL;
- if (bsp->repr == Seq_repr_delta || bsp->repr == Seq_repr_virtual) {
- SeqPortSet_do_virtual (spp, TRUE);
- }
-
- iasp->spp = spp;
- }
+ ffstring = FFGetString (ajp);
start = sbp->start;
stop = sbp->stop;
- if (start != spp->curpos) {
- SeqPortSeek (spp, start, SEEK_SET);
- }
-
- pos = start;
-
count = 0;
- cnt = 0;
blk = 0;
lin = 0;
- is_na = ISA_na (bsp->mol);
-
- ctr = (Int2) MIN ((Int4) (stop - pos), (Int4) sizeof (bases));
- ctr = SeqPortRead (spp, bases, ctr);
-
- i = 0;
-
- if (ctr < 0) {
- residue = -ctr;
- } else if (ctr < 1) {
- residue = SEQPORT_EOF;
- } else {
- residue = (Uint1) bases [i];
- }
+ ptr = sbp->bases;
+ ch = *ptr;
+ while (ch != '\0') {
+ buf [count] = (Char) (TO_LOWER (ch));
+ count++;
+ ptr++;
+ ch = *ptr;
- ffstring = FFGetString(ajp);
- while (pos < stop && residue != SEQPORT_EOF) {
+ blk++;
+ lin++;
+ if (lin >= 60) {
- if (residue == INVALID_RESIDUE) {
- if (is_na) {
- residue = 'N';
- } else {
- residue = 'X';
- }
- }
+ buf [count] = '\0';
+ PrintSeqLine (ffstring, afp->format, buf, start, start);
+ count = 0;
+ blk = 0;
+ lin = 0;
+ start += 60;
- if (IS_residue (residue)) {
+ } else if (blk >= 10) {
- buf [count] = (Char) (TO_LOWER (residue));
+ buf [count] = ' ';
count++;
- cnt++;
- pos++;
-
- blk++;
- lin++;
- if (lin >= 60) {
+ blk = 0;
- buf [count] = '\0';
- PrintSeqLine (ffstring, afp->format, buf, start, start + cnt);
- count = 0;
- cnt = 0;
- blk = 0;
- lin = 0;
- start += 60;
-
- } else if (blk >= 10) {
-
- buf [count] = ' ';
- count++;
- blk = 0;
-
- }
}
-
- i++;
- if (i >= ctr) {
- i = 0;
- ctr = (Int2) MIN ((Int4) (stop - pos), (Int4) sizeof (bases));
- ctr = SeqPortRead (spp, bases, ctr);
- if (ctr < 0) {
- bases [0] = -ctr;
- } else if (ctr < 1) {
- bases [0] = SEQPORT_EOF;
- }
- }
- residue = (Uint1) bases [i];
}
buf [count] = '\0';
if (count > 0) {
- PrintSeqLine (ffstring, afp->format, buf, start, start + cnt);
- }
-
- if (ajp->transientSeqPort) {
- iasp->spp = SeqPortFree (iasp->spp);
+ PrintSeqLine (ffstring, afp->format, buf, start, start);
}
str = FFToCharPtr(ffstring);
- /* optionally populate gbseq for XML-ized GenBank format */
+ FFRecycleString (ajp, ffstring);
+ return str;
+}
- /*
- if (ajp->gbseq) {
- gbseq = &asp->gbseq;
- } else {
- gbseq = NULL;
- }
+static CharPtr insd_strd [4] = {
+ NULL, "single", "double", "mixed"
+};
- if (gbseq != NULL) {
- CatenateSequenceInGbseq (gbseq, str);
- }
- */
+static CharPtr insd_mol [10] = {
+ "?", "DNA", "RNA", "tRNA", "rRNA", "mRNA", "uRNA", "snRNA", "snoRNA", "AA"
+};
- FFRecycleString(ajp, ffstring);
- return str;
-}
+static CharPtr insd_top [3] = {
+ NULL, "linear", "circular"
+};
+
+NLM_EXTERN void AsnPrintNewLine PROTO((AsnIoPtr aip));
NLM_EXTERN CharPtr FormatSlashBlock (
Asn2gbFormatPtr afp,
@@ -3806,8 +3642,10 @@ NLM_EXTERN CharPtr FormatSlashBlock (
Asn2gbSectPtr asp;
GBFeaturePtr currf, headf, nextf;
GBReferencePtr currr, headr, nextr;
- GBSeqPtr gbseq;
+ GBSeqPtr gbseq, gbtmp;
IndxPtr index;
+ INSDSeq is;
+ Int2 moltype, strandedness, topology;
if (afp == NULL || bbp == NULL) return NULL;
ajp = afp->ajp;
@@ -3870,6 +3708,71 @@ NLM_EXTERN CharPtr FormatSlashBlock (
gbseq->feature_table = headf;
}
+ /* if generating GBSeq XML/ASN, write at each slash block */
+
+ if (gbseq != NULL && afp->aip != NULL) {
+ if (ajp->produceInsdSeq) {
+ MemSet ((Pointer) &is, 0, sizeof (INSDSeq));
+ is.next = (INSDSeqPtr) gbseq->next;
+ is.OBbits__ = gbseq->OBbits__;
+ is.locus = gbseq->locus;
+ is.length = gbseq->length;
+ strandedness = (Int2) gbseq->strandedness;
+ if (strandedness < 0 || strandedness > 3) {
+ strandedness = 0;
+ }
+ is.strandedness = insd_strd [strandedness];
+ moltype = (Int2) gbseq->moltype;
+ if (moltype < 0 || moltype > 9) {
+ moltype = 0;
+ }
+ is.moltype = insd_mol [moltype];
+ topology = (Int2) gbseq->topology;
+ if (topology < 0 || topology > 2) {
+ topology = 0;
+ }
+ is.topology = insd_top [topology];
+ is.division = gbseq->division;
+ is.update_date = gbseq->update_date;
+ is.create_date = gbseq->create_date;
+ is.update_release = gbseq->update_release;
+ is.create_release = gbseq->create_release;
+ is.definition = gbseq->definition;
+ is.primary_accession = gbseq->primary_accession;
+ is.entry_version = gbseq->entry_version;
+ is.accession_version = gbseq->accession_version;
+ is.other_seqids = gbseq->other_seqids;
+ is.secondary_accessions = gbseq->secondary_accessions;
+ is.keywords = gbseq->keywords;
+ is.segment = gbseq->segment;
+ is.source = gbseq->source;
+ is.organism = gbseq->organism;
+ is.taxonomy = gbseq->taxonomy;
+ is.references = (INSDReferencePtr) gbseq->references;
+ is.comment = gbseq->comment;
+ is.primary = gbseq->primary;
+ is.source_db = gbseq->source_db;
+ is.database_reference = gbseq->database_reference;
+ is.feature_table = (INSDFeaturePtr) gbseq->feature_table;
+ is.sequence = gbseq->sequence;
+ is.contig = gbseq->contig;
+ INSDSeqAsnWrite (&is, afp->aip, afp->atp);
+ } else {
+ GBSeqAsnWrite (gbseq, afp->aip, afp->atp);
+ }
+ if (afp->atp == NULL) {
+ AsnPrintNewLine (afp->aip);
+ }
+ AsnIoFlush (afp->aip);
+
+ /* clean up gbseq fields */
+
+ gbtmp = GBSeqNew ();
+ MemCopy (gbtmp, gbseq, sizeof (GBSeq));
+ MemSet (gbseq, 0, sizeof (GBSeq));
+ GBSeqFree (gbtmp);
+ }
+
/* slash always has string pre-allocated by add slash block function */
return StringSaveNoNull (bbp->string);
diff --git a/api/asn2gnbi.h b/api/asn2gnbi.h
index 7f4be1ed..0079103a 100644
--- a/api/asn2gnbi.h
+++ b/api/asn2gnbi.h
@@ -29,7 +29,7 @@
*
* Version Creation Date: 12/30/03
*
-* $Revision: 1.4 $
+* $Revision: 1.12 $
*
* File Description: New GenBank flatfile generator, internal header
*
@@ -42,6 +42,7 @@
#define _ASN2NGNBI_
#include <asn2gnbp.h>
+#include <explore.h>
#undef NLM_EXTERN
#ifdef NLM_IMPORT
@@ -93,6 +94,7 @@ typedef struct asn2gbflags {
Boolean goQualsToNote;
Boolean geneSynsToNote;
Boolean selenocysteineToNote;
+ Boolean extraProductsToNote;
Boolean forGbRelease;
} Asn2gbFlags, PNTR Asn2gbFlagsPtr;
@@ -100,7 +102,6 @@ typedef struct asn2gbflags {
typedef struct int_Asn2gbSect {
Asn2gbSect asp;
- SeqPortPtr spp;
} IntAsn2gbSect, PNTR IntAsn2gbSectPtr;
/* string structure */
@@ -127,7 +128,6 @@ typedef struct int_asn2gb_job {
Boolean alwaysTranslCds;
Boolean showTranscript;
Boolean showPeptide;
- Boolean transientSeqPort;
Boolean masterStyle;
Boolean newSourceOrg;
Boolean produceInsdSeq;
@@ -137,10 +137,45 @@ typedef struct int_asn2gb_job {
Boolean skipMrnas;
IndxPtr index;
GBSeqPtr gbseq;
+ AsnIoPtr aip;
+ AsnTypePtr atp;
StringItemPtr pool;
Boolean www;
} IntAsn2gbJob, PNTR IntAsn2gbJobPtr;
+/* array for assigning biosource and feature data fields to qualifiers */
+/* should be allocated to MAX (ASN2GNBK_TOTAL_SOURCE, ASN2GNBK_TOTAL_FEATUR) */
+
+typedef union qualval {
+ CharPtr str;
+ Boolean ble;
+ Int4 num;
+ ValNodePtr vnp;
+ GBQualPtr gbq;
+ OrgModPtr omp;
+ SubSourcePtr ssp;
+ CodeBreakPtr cbp;
+ SeqLocPtr slp;
+ SeqIdPtr sip;
+ tRNAPtr trp;
+ UserObjectPtr uop;
+ UserFieldPtr ufp;
+} QualVal, PNTR QualValPtr;
+
+/* structure passed to individual paragraph format functions */
+
+typedef struct asn2gbformat {
+ IntAsn2gbJobPtr ajp;
+ Asn2gbSectPtr asp;
+ QualValPtr qvp;
+ FmtType format;
+ Asn2gbWriteFunc ffwrite;
+ Pointer userdata;
+ FILE *fp;
+ AsnIoPtr aip;
+ AsnTypePtr atp;
+} Asn2gbFormat, PNTR Asn2gbFormatPtr;
+
/* structure for storing working parameters while building asn2gb_job structure */
typedef struct asn2gbwork {
@@ -166,6 +201,10 @@ typedef struct asn2gbwork {
Int4 currsection;
+ /* set if doing immediate write at time of creation for web speed */
+
+ Asn2gbFormatPtr afp;
+
/* section fields needed for populating blocks */
Asn2gbSectPtr asp;
@@ -185,6 +224,7 @@ typedef struct asn2gbwork {
Boolean contig;
Boolean showconfeats;
Boolean showconsource;
+ Boolean smartconfeats;
Boolean onlyNearFeats;
Boolean farFeatsSuppress;
@@ -229,40 +269,9 @@ typedef struct asn2gbwork {
SeqSubmitPtr ssp;
Boolean hup;
- Boolean stream;
-
Boolean failed;
} Asn2gbWork, PNTR Asn2gbWorkPtr;
-/* array for assigning biosource and feature data fields to qualifiers */
-/* should be allocated to MAX (ASN2GNBK_TOTAL_SOURCE, ASN2GNBK_TOTAL_FEATUR) */
-
-typedef union qualval {
- CharPtr str;
- Boolean ble;
- Int4 num;
- ValNodePtr vnp;
- GBQualPtr gbq;
- OrgModPtr omp;
- SubSourcePtr ssp;
- CodeBreakPtr cbp;
- SeqLocPtr slp;
- SeqIdPtr sip;
- tRNAPtr trp;
- UserObjectPtr uop;
- UserFieldPtr ufp;
-} QualVal, PNTR QualValPtr;
-
-/* structure passed to individual paragraph format functions */
-
-typedef struct asn2gbformat {
- IntAsn2gbJobPtr ajp;
- Asn2gbSectPtr asp;
- QualValPtr qvp;
-
- FmtType format;
-} Asn2gbFormat, PNTR Asn2gbFormatPtr;
-
/* Seq-hist replacedBy is preformatted into string field, */
/* then comment descriptors, Map location:, and Region:, */
@@ -502,6 +511,7 @@ typedef enum {
FTQUAL_evidence,
FTQUAL_exception,
FTQUAL_exception_note,
+ FTQUAL_extra_products,
FTQUAL_figure,
FTQUAL_frequency,
FTQUAL_function,
@@ -590,6 +600,7 @@ NLM_EXTERN Char link_tax [MAX_WWWBUF];
NLM_EXTERN Char link_muid [MAX_WWWBUF];
NLM_EXTERN Char link_code [MAX_WWWBUF];
NLM_EXTERN Char link_encode [MAX_WWWBUF];
+NLM_EXTERN Char link_go [MAX_WWWBUF];
NLM_EXTERN void FF_www_db_xref(
IntAsn2gbJobPtr ajp,
@@ -933,15 +944,13 @@ NLM_EXTERN void AddContigBlock (
Asn2gbWorkPtr awp
);
NLM_EXTERN void AddBasecountBlock (
- Asn2gbWorkPtr awp,
- CharPtr bases
+ Asn2gbWorkPtr awp
);
NLM_EXTERN void AddOriginBlock (
Asn2gbWorkPtr awp
);
NLM_EXTERN void AddSequenceBlock (
- Asn2gbWorkPtr awp,
- CharPtr bases
+ Asn2gbWorkPtr awp
);
NLM_EXTERN void AddSlashBlock (
Asn2gbWorkPtr awp
@@ -967,6 +976,10 @@ NLM_EXTERN CharPtr FormatCommentBlock (
Asn2gbFormatPtr afp,
BaseBlockPtr bbp
);
+NLM_EXTERN CharPtr FormatFeatHeaderBlock (
+ Asn2gbFormatPtr afp,
+ BaseBlockPtr bbp
+);
NLM_EXTERN CharPtr FormatSourceFeatBlock (
Asn2gbFormatPtr afp,
BaseBlockPtr bbp
@@ -992,6 +1005,28 @@ NLM_EXTERN CharPtr FormatSlashBlock (
BaseBlockPtr bbp
);
+NLM_EXTERN void PrintFtableIntervals (
+ ValNodePtr PNTR head,
+ BioseqPtr target,
+ SeqLocPtr location,
+ CharPtr label
+);
+NLM_EXTERN void PrintFtableLocAndQuals (
+ IntAsn2gbJobPtr ajp,
+ ValNodePtr PNTR head,
+ BioseqPtr target,
+ SeqFeatPtr sfp,
+ SeqMgrFeatContextPtr context
+);
+NLM_EXTERN CharPtr FormatFtableSourceFeatBlock (
+ BaseBlockPtr bbp,
+ BioseqPtr target
+);
+
+NLM_EXTERN void DoImmediateFormat (
+ Asn2gbFormatPtr afp,
+ BaseBlockPtr bbp
+);
#ifdef __cplusplus
diff --git a/api/asn2gnbp.h b/api/asn2gnbp.h
index 97e0e8bd..bac55a71 100644
--- a/api/asn2gnbp.h
+++ b/api/asn2gnbp.h
@@ -29,7 +29,7 @@
*
* Version Creation Date: 10/21/98
*
-* $Revision: 6.29 $
+* $Revision: 6.31 $
*
* File Description: New GenBank flatfile generator, private header
*
diff --git a/api/objmgr.c b/api/objmgr.c
index eaba72d1..ff607cbd 100644
--- a/api/objmgr.c
+++ b/api/objmgr.c
@@ -29,13 +29,22 @@
*
* Version Creation Date: 9/94
*
-* $Revision: 6.52 $
+* $Revision: 6.55 $
*
* File Description: Manager for Bioseqs and BioseqSets
*
* Modifications:
* --------------------------------------------------------------------------
* $Log: objmgr.c,v $
+* Revision 6.55 2004/06/09 01:56:43 kans
+* initialize assigned id array from all functions that use it
+*
+* Revision 6.54 2004/06/08 20:48:09 kans
+* changed entityID recycling from small array of integers to bit array of all possible values
+*
+* Revision 6.53 2004/06/08 18:19:01 kans
+* ObjMgrFreeCacheFunc frees all TL_CACHED, and frees TL_LOADED if type == 0
+*
* Revision 6.52 2004/04/21 19:40:51 kans
* ObjMgrReap calculate tempcnt based on temp loaded records, but excluded locked ones - more work still to do in other functions to completely avoid unnecessary thrashing
*
@@ -545,65 +554,135 @@ NLM_EXTERN ObjMgrDataPtr LIBCALL ObjMgrFindByData (ObjMgrPtr omp, Pointer ptr)
return NULL;
}
-#define ENTITY_ID_STACK_SIZE 100
+static Uint4 assignedIDsArray [2050];
+static Int2 assignedIDStackPt = 0;
+static Boolean assignedIDsInited = FALSE;
-static Uint2 recycledEntityIDs [ENTITY_ID_STACK_SIZE];
-static Int4 recycledIDStackPt = 0;
+static Uint4 assignedIDsBitIdx [32];
-extern void ObjMgrRemoveEntityIDFromRecycle (Uint2 entityID, ObjMgrPtr omp);
-extern void ObjMgrRemoveEntityIDFromRecycle (Uint2 entityID, ObjMgrPtr omp)
+static Uint2 ObjMgrInitAssignedIDArray (void)
{
- Int4 i;
+ Uint4 bit;
+ Int2 jdx;
+
+ if (! assignedIDsInited) {
+ MemSet ((Pointer) &assignedIDsArray, 0, sizeof assignedIDsArray);
+ MemSet ((Pointer) &assignedIDsBitIdx, 0, sizeof (assignedIDsBitIdx));
+
+ /* initialize bit index array */
+
+ bit = 1;
+ for (jdx = 0; jdx < 32; jdx++) {
+ assignedIDsBitIdx [jdx] = bit;
+ bit = bit << 1;
+ }
+
+ /* entityID 0 is not available for use */
- if (entityID < 1) return;
- if (omp != NULL) {
- for (i = 0; i < recycledIDStackPt; i++) {
- if (entityID == recycledEntityIDs [i]) {
- recycledEntityIDs [i] = 0; /* remove from recycle list */
- if (recycledIDStackPt > i + 1) {
- recycledIDStackPt--;
- recycledEntityIDs [i] = recycledEntityIDs [recycledIDStackPt];
- } else {
- recycledIDStackPt--;
- }
- }
- }
- }
+ assignedIDsArray [0] = assignedIDsBitIdx [0];
+
+ assignedIDStackPt = 0;
+ assignedIDsInited = TRUE;
+ }
}
static Uint2 ObjMgrNextAvailEntityID (ObjMgrPtr omp)
{
- Uint2 entityID = 0;
- if (omp != NULL) {
- if (recycledIDStackPt > 0) {
- recycledIDStackPt--;
- entityID = recycledEntityIDs [recycledIDStackPt];
- } else {
- entityID = ++(omp->HighestEntityID);
- }
- }
- return entityID;
+ Uint2 entityID;
+ Int2 idx, jdx;
+ Uint4 val;
+
+ if (! assignedIDsInited) {
+ ObjMgrInitAssignedIDArray ();
+ }
+
+ /* find first 32 bit word with an available entityID */
+
+ idx = assignedIDStackPt;
+ while (idx < 2048 && assignedIDsArray [idx] == 0xFFFFFFFF) {
+ idx++;
+ }
+ if (idx >= 2048) return 0;
+
+ /* reset starting point, everything below should be in use */
+
+ assignedIDStackPt = idx;
+
+ /* find first empty bit in array element */
+
+ val = assignedIDsArray [idx];
+ jdx = 0;
+ while (jdx < 32 && (val & assignedIDsBitIdx [jdx]) != 0) {
+ jdx++;
+ }
+ if (jdx >= 32) return 0;
+
+ /* set bit to mark new entityID as in use */
+
+ assignedIDsArray [idx] |= assignedIDsBitIdx [jdx];
+
+ /* calculate entityID */
+
+ entityID = (Uint2) ((Int4) idx) * 32L + (Int4) jdx;
+
+ if (omp != NULL && omp->HighestEntityID < entityID) {
+ omp->HighestEntityID = entityID;
+ }
+
+ return entityID;
}
static void ObjMgrRecycleEntityID (Uint2 entityID, ObjMgrPtr omp)
{
+ Int2 idx, jdx;
- Int4 i;
+ if (! assignedIDsInited) {
+ ObjMgrInitAssignedIDArray ();
+ }
- if (entityID < 1) return;
- if (omp != NULL) {
- /* check to see if entity is already on stack (e.g., entity 1), abort if so */
- for (i = 0; i < recycledIDStackPt; i++) {
- if (entityID == recycledEntityIDs [i]) return;
- }
- if (recycledIDStackPt < ENTITY_ID_STACK_SIZE) {
- recycledEntityIDs [recycledIDStackPt] = entityID;
- recycledIDStackPt++;
- }
- }
+ if (entityID < 1) return;
+
+ idx = (Int2) (entityID / 32);
+ jdx = (Int2) (entityID % 32);
+
+ if (idx >= 2048 || idx < 0) return;
+ if (jdx >= 32 || jdx < 0) return;
+
+ /* clear bit to mark old entityID as available */
+
+ assignedIDsArray [idx] ^= assignedIDsBitIdx [jdx];
+
+ /* reset starting point, everything below should be in use */
+
+ if (idx < assignedIDStackPt) {
+ assignedIDStackPt = idx;
+ }
+}
+
+extern void ObjMgrRemoveEntityIDFromRecycle (Uint2 entityID, ObjMgrPtr omp);
+extern void ObjMgrRemoveEntityIDFromRecycle (Uint2 entityID, ObjMgrPtr omp)
+
+{
+ Int2 idx, jdx;
+
+ if (! assignedIDsInited) {
+ ObjMgrInitAssignedIDArray ();
+ }
+
+ if (entityID < 1) return;
+
+ idx = (Int2) (entityID / 32);
+ jdx = (Int2) (entityID % 32);
+
+ if (idx >= 2048 || idx < 0) return;
+ if (jdx >= 32 || jdx < 0) return;
+
+ /* set bit to restore old entityID status to in use */
+
+ assignedIDsArray [idx] |= assignedIDsBitIdx [jdx];
}
NLM_EXTERN Uint2 LIBCALL ObjMgrAddEntityID (ObjMgrPtr omp, ObjMgrDataPtr omdp)
@@ -2268,7 +2347,8 @@ static Boolean NEAR ObjMgrFreeCacheFunc (ObjMgrPtr omp, Uint2 type, Uint2Ptr ret
{
omdp = omdpp[i];
if ((omdp->parentptr == NULL) && /* top level */
- (omdp->tempload == TL_CACHED)) /* cached */
+ (omdp->tempload == TL_CACHED || /* cached or */
+ (type == 0 && omdp->tempload == TL_LOADED))) /* unlocked but not cached out */
{
if ((! type) ||
(ObjMgrMatch(type, omdp->datatype)) ||
diff --git a/api/salpacc.c b/api/salpacc.c
index 66e12944..cf97a31f 100644
--- a/api/salpacc.c
+++ b/api/salpacc.c
@@ -21,7 +21,7 @@
* Please cite the author in any work or product based on this material.
*
* ===========================================================================
- * $Id: salpacc.c,v 6.28 2004/04/07 17:36:48 bollin Exp $
+ * $Id: salpacc.c,v 6.29 2004/05/20 20:21:24 bollin Exp $
Collection of SeqAlign Accession utilities.
Maintainer: Hugues Sicotte
Authors of the original routines: Hugues Sicotte, Colombe Chappey, Tom Madden, Jinghui Zhang
@@ -567,24 +567,30 @@ NLM_EXTERN Uint1 LIBCALL SeqAlignMolType (SeqAlignPtr salp)
Int2 dim;
Uint1 moltype = 0;
Boolean molb;
+ SeqAlignPtr tmpsalp;
if (salp==NULL)
return FALSE;
- if (salp->segtype == 1) {
+ if (salp->segtype == SAS_DENDIAG) {
ddp = (DenseDiagPtr) salp->segs;
sip = ddp->id;
dim = ddp->dim;
}
- else if (salp->segtype == 2) {
+ else if (salp->segtype == SAS_DENSEG) {
dsp = (DenseSegPtr) salp->segs;
sip = dsp->ids;
dim = dsp->dim;
}
- else if (salp->segtype == 3) {
+ else if (salp->segtype == SAS_STD) {
ssp = (StdSegPtr) salp->segs;
sip = ssp->ids;
dim = ssp->dim;
}
+ else if (salp->segtype == SAS_DISC)
+ {
+ tmpsalp = (SeqAlignPtr) salp->segs;
+ return SeqAlignMolType (tmpsalp);
+ }
if (sip!=NULL) {
for (k = 0; k < dim && sip!=NULL; k++, sip = sip->next)
{
diff --git a/api/salprop.c b/api/salprop.c
index 0280d7bb..71359dac 100644
--- a/api/salprop.c
+++ b/api/salprop.c
@@ -120,7 +120,6 @@ static Boolean is_id_unique (Uint2 entityID, SeqIdPtr sip)
{
SeqEntryPtr sep;
CcId ci;
- Boolean ret;
sep = GetTopSeqEntryForEntityID (entityID);
ci.sip = SeqIdDup (sip);
diff --git a/api/salsap.c b/api/salsap.c
index f693e46e..f162517c 100644
--- a/api/salsap.c
+++ b/api/salsap.c
@@ -29,7 +29,7 @@
*
* Version Creation Date: 1/27/96
*
-* $Revision: 6.9 $
+* $Revision: 6.10 $
*
* File Description:
*
@@ -3172,6 +3172,59 @@ static void FindSeqAlignCallback (SeqEntryPtr sep, Pointer mydata,
}
}
+static Boolean IsIdInAlignment (SeqIdPtr sip, SeqAlignPtr sap)
+{
+ SeqAlignPtr sap_tmp;
+ Boolean found = FALSE;
+ DenseSegPtr dsp;
+
+ if (sip == NULL || sap == NULL) return FALSE;
+
+ if (sap->segtype == SAS_DISC)
+ {
+ sap_tmp = (SeqAlignPtr) sap->segs;
+ while (!found && sap_tmp != NULL)
+ {
+ found = IsIdInAlignment (sip, sap_tmp);
+ sap_tmp = sap_tmp->next;
+ }
+ }
+ else if (sap->segtype == SAS_DENSEG)
+ {
+ dsp = (DenseSegPtr) sap->segs;
+ if (SeqIdOrderInBioseqIdList (sip, dsp->ids) > 0)
+ {
+ found = TRUE;
+ }
+ }
+ return found;
+}
+
+static void FindSeqAlignVisitCallback (SeqAnnotPtr sap, Pointer userdata)
+{
+ CcId2Ptr cip;
+ SeqAlignPtr salp;
+ Boolean found = FALSE;
+
+ if (sap == NULL || sap->type != 2 || (cip = (CcId2Ptr) userdata) == NULL || cip->sap != NULL) return;
+
+ for (salp = sap->data; salp != NULL && cip->sap == NULL; salp = salp->next)
+ {
+ if (cip->sip == NULL || IsIdInAlignment (cip->sip, salp))
+ {
+ if (cip->choice == OBJ_SEQALIGN)
+ {
+ cip->sap = (Pointer) salp;
+ }
+ else
+ {
+ cip->sap = sap;
+ }
+ }
+ }
+}
+
+
NLM_EXTERN Pointer LIBCALL FindSeqAlignInSeqEntry (SeqEntryPtr sep, Uint1 choice)
{
SeqEntryPtr sep_head;
@@ -3193,7 +3246,7 @@ NLM_EXTERN Pointer LIBCALL FindSeqAlignInSeqEntry (SeqEntryPtr sep, Uint1 choice
}
entityID = ObjMgrGetEntityIDForChoice (sep);
sep_head = GetTopSeqEntryForEntityID (entityID);
- SeqEntryExplore (sep_head, (Pointer)&ci, FindSeqAlignCallback);
+ VisitAnnotsInSep (sep_head, (Pointer)&ci, FindSeqAlignVisitCallback);
if (ci.sip != NULL)
SeqIdFree (ci.sip);
return ci.sap;
diff --git a/api/seqmgr.c b/api/seqmgr.c
index 0046a2d4..917c5e40 100644
--- a/api/seqmgr.c
+++ b/api/seqmgr.c
@@ -29,7 +29,7 @@
*
* Version Creation Date: 9/94
*
-* $Revision: 6.214 $
+* $Revision: 6.215 $
*
* File Description: Manager for Bioseqs and BioseqSets
*
@@ -39,6 +39,9 @@
* ------- ---------- -----------------------------------------------------
*
* $Log: seqmgr.c,v $
+* Revision 6.215 2004/05/13 19:38:08 kans
+* SeqLocMergeExEx takes ignore_mixed so gene by overlap can ignore trans splicing confusion
+*
* Revision 6.214 2004/05/04 17:34:23 bollin
* initialize variables
*
@@ -5544,7 +5547,7 @@ static void RecordOneFeature (BioseqExtraPtr bspextra, ObjMgrDataPtr omdp,
single_interval = (Boolean) (item->subtype == FEATDEF_GENE ||
item->subtype == FEATDEF_PUB);
*/
- loc = SeqLocMergeEx (bsp, sfp->location, NULL, FALSE, FALSE, FALSE, FALSE);
+ loc = SeqLocMergeExEx (bsp, sfp->location, NULL, FALSE, FALSE, FALSE, FALSE, TRUE);
if (exindx->flip) {
sip = SeqIdFindBest (bsp->id, 0);
@@ -6890,7 +6893,7 @@ static void IndexRecordedFeatures (SeqEntryPtr sep, Boolean dorevfeats)
/* map to segmented bioseq coordinates if necessary */
- segloc = SeqLocMergeEx (nuc, dnaloc, NULL, FALSE, TRUE, FALSE, FALSE);
+ segloc = SeqLocMergeExEx (nuc, dnaloc, NULL, FALSE, TRUE, FALSE, FALSE, TRUE);
SeqLocFree (dnaloc);
if (segloc != NULL) {
@@ -8037,7 +8040,7 @@ static SeqFeatPtr SeqMgrGetBestOverlappingFeat (SeqLocPtr slp, Uint2 subtype,
hier = feat->overlap;
}
- loc = SeqLocMergeEx (bsp, slp, NULL, FALSE, /* TRUE */ FALSE, FALSE, FALSE);
+ loc = SeqLocMergeExEx (bsp, slp, NULL, FALSE, /* TRUE */ FALSE, FALSE, FALSE, TRUE);
strand = SeqLocStrand (loc);
if (overlapType == CHECK_INTERVALS) {
tmp = NULL;
diff --git a/api/seqport.c b/api/seqport.c
index 6bd8b494..26a9521f 100644
--- a/api/seqport.c
+++ b/api/seqport.c
@@ -29,7 +29,7 @@
*
* Version Creation Date: 7/13/91
*
-* $Revision: 6.108 $
+* $Revision: 6.124 $
*
* File Description: Ports onto Bioseqs
*
@@ -39,6 +39,54 @@
* ------- ---------- -----------------------------------------------------
*
* $Log: seqport.c,v $
+* Revision 6.124 2004/06/09 14:03:48 bollin
+* unknown length gaps should always be length 100 after converting
+*
+* Revision 6.123 2004/06/09 13:36:31 bollin
+* changed ConvertNsToGaps to create gaps of unknown length.
+*
+* Revision 6.122 2004/06/08 18:16:43 kans
+* removed SEQPORT_STREAM_FREE_COMPONENT and SEQPORT_STREAM_HAMMER environment variable support
+*
+* Revision 6.121 2004/06/07 21:17:13 kans
+* SeqPortStreamSeqLoc checks SEQPORT_STREAM_FREE_COMPONENT environment variable, frees after fetching to recycle entityIDs, keep from rolling over
+*
+* Revision 6.120 2004/06/03 17:21:49 kans
+* make SortByIntvalue extern
+*
+* Revision 6.119 2004/06/03 15:09:50 kans
+* SeqPortStreamSeqLoc is passed parent seqid for more informative error messages
+*
+* Revision 6.118 2004/06/02 21:26:07 kans
+* SeqPortStreamSeqLoc checks SEQPORT_STREAM_HAMMER environment variable, retries sequence fetch until success
+*
+* Revision 6.117 2004/06/01 14:09:33 kans
+* SeqPortStreamSeqLoc now checks environment variable for sleep time between fetch retries
+*
+* Revision 6.116 2004/05/27 20:54:53 kans
+* fixed single interval fake segmented sequence in FormatSequenceBlock and StreamCacheGetResidue
+*
+* Revision 6.115 2004/05/27 19:47:03 kans
+* report buffer overflow problem if q > protlen, not q >= protlen
+*
+* Revision 6.114 2004/05/27 19:37:09 kans
+* TransTableTranslateCommon protects against protein buffer overflow, then warns
+*
+* Revision 6.113 2004/05/24 19:21:58 kans
+* FormatSequenceBlock and StreamCacheGetResidue use Seq_repr_seg header for location, special case SEQLOC_MIX
+*
+* Revision 6.112 2004/05/13 21:33:59 kans
+* SeqPortStreamSeqLoc reports number of BioseqLockById attempts if first failed
+*
+* Revision 6.111 2004/05/13 17:58:00 kans
+* SeqPortStreamSeqLoc uses SEQPORT_STREAM_FETCH_ATTEMPTS environment variable for multiple fetch attempts
+*
+* Revision 6.110 2004/05/12 18:55:33 kans
+* StreamCache takes SeqLocPtr as well as BioseqPtr optional arguments, slp version is equivalent of SeqPortNewByLoc
+*
+* Revision 6.109 2004/05/05 17:32:09 kans
+* SaveCdsBases callback for ReadCodingRegionBases a little more efficient if frame > 0, does not call StringLen on sequence fragment
+*
* Revision 6.108 2004/05/03 20:58:33 kans
* SaveCdsBases protects against rare cases where the frame is >= the number of sequence bases passed from the first or second segment
*
@@ -2697,19 +2745,32 @@ static Int4 SeqPortStreamLit (
return count;
}
+static Int2 stream_retry_attempts = 0;
+static Boolean stream_retry_count_set = FALSE;
+
+static Int2 stream_retry_sleep = 0;
+static Boolean stream_retryp_sleep_set = FALSE;
+
static Int4 SeqPortStreamSeqLoc (
SeqLocPtr slp,
Int4 start,
Int4 stop,
Uint1 strand,
- StreamDataPtr sdp
+ StreamDataPtr sdp,
+ SeqIdPtr parentID
)
{
- BioseqPtr bsp;
- Char buf [64];
- Int4 count = 0;
- SeqIdPtr sip;
+ BioseqPtr bsp;
+ Char buf [64];
+ Int4 count = 0;
+ Char pid [64];
+ SeqIdPtr sip;
+#ifdef OS_UNIX
+ Int2 attempts;
+ CharPtr str;
+ int val = 0;
+#endif
if (slp == NULL || sdp == NULL) return 0;
@@ -2717,9 +2778,69 @@ static Int4 SeqPortStreamSeqLoc (
if (sip == NULL) return 0;
bsp = BioseqLockById (sip);
+
+#ifdef OS_UNIX
+ if (bsp == NULL) {
+
+ /* number of retries and sleep between retries now configured by environment variable */
+
+ if (! stream_retry_count_set) {
+ str = (CharPtr) getenv ("SEQPORT_STREAM_FETCH_ATTEMPTS");
+ if (StringDoesHaveText (str)) {
+ if (sscanf (str, "%d", &val) == 1) {
+ stream_retry_attempts = (Uint2) val;
+ }
+ }
+ stream_retry_count_set = TRUE;
+ }
+
+ if (! stream_retryp_sleep_set) {
+ str = (CharPtr) getenv ("SEQPORT_STREAM_RETRY_SLEEP");
+ if (StringDoesHaveText (str)) {
+ if (sscanf (str, "%d", &val) == 1) {
+ stream_retry_sleep = (Uint2) val;
+ }
+ }
+ stream_retryp_sleep_set = TRUE;
+ }
+
+ /* retry failed fetch attempt up to specified limit */
+
+ if (stream_retry_attempts > 1) {
+ attempts = 1;
+ while (bsp == NULL && attempts < stream_retry_attempts) {
+ if (stream_retry_sleep > 0) {
+ sleep (stream_retry_sleep);
+ }
+
+ bsp = BioseqLockById (sip);
+ attempts++;
+ }
+ if (bsp != NULL) {
+ SeqIdWrite (sip, buf, PRINTID_FASTA_SHORT, sizeof (buf) - 1);
+ if (parentID != NULL) {
+ SeqIdWrite (parentID, pid, PRINTID_FASTA_LONG, sizeof (pid) - 1);
+ ErrPostEx (SEV_WARNING, 0, 0,
+ "SeqPortStream loaded Bioseq %s component of %s after %d attempts",
+ buf, pid, (int) attempts);
+ } else {
+ ErrPostEx (SEV_WARNING, 0, 0,
+ "SeqPortStream loaded Bioseq %s after %d attempts",
+ buf, (int) attempts);
+ }
+ }
+ }
+ }
+#endif
+
if (bsp == NULL) {
SeqIdWrite (sip, buf, PRINTID_FASTA_SHORT, sizeof (buf) - 1);
- ErrPostEx (SEV_ERROR, 0, 0, "SeqPortStream failed to load Bioseq %s", buf);
+ if (parentID != NULL) {
+ SeqIdWrite (parentID, pid, PRINTID_FASTA_LONG, sizeof (pid) - 1);
+ ErrPostEx (SEV_ERROR, 0, 0, "SeqPortStream failed to load Bioseq %s component of %s", buf, pid);
+ } else {
+ ErrPostEx (SEV_ERROR, 0, 0, "SeqPortStream failed to load Bioseq %s", buf);
+ }
return 0;
}
@@ -2930,7 +3051,7 @@ static Int4 SeqPortStreamDelta (
if (sop->slp != NULL) {
- count += SeqPortStreamSeqLoc (sop->slp, sop->from, sop->to, sop->strand, sdp);
+ count += SeqPortStreamSeqLoc (sop->slp, sop->from, sop->to, sop->strand, sdp, bsp->id);
} else if (sop->slitp != NULL) {
@@ -3048,7 +3169,7 @@ static Int4 SeqPortStreamSeg (
if (sop->slp != NULL) {
- count += SeqPortStreamSeqLoc (sop->slp, sop->from, sop->to, sop->strand, sdp);
+ count += SeqPortStreamSeqLoc (sop->slp, sop->from, sop->to, sop->strand, sdp, bsp->id);
}
}
@@ -3209,7 +3330,7 @@ static Int4 SeqPortStreamSetup (
to = SeqLocStop (slp);
strand = SeqLocStrand (slp);
- count += SeqPortStreamSeqLoc (slp, from, to, strand, &sd);
+ count += SeqPortStreamSeqLoc (slp, from, to, strand, &sd, NULL);
slp = SeqLocFindNext (loc, slp);
}
@@ -3260,7 +3381,7 @@ NLM_EXTERN void SeqPortStreamLoc (
/*******************************************************************************
*
-* StreamCacheSetup (bsp, flags, scp)
+* StreamCacheSetup (bsp, slp, flags, scp)
* StreamCacheGetResidue (scp)
* StreamCacheSetPosition (scp, pos)
* SeqPort functional replacement implemented on top of SeqPortStreams
@@ -3269,17 +3390,24 @@ NLM_EXTERN void SeqPortStreamLoc (
NLM_EXTERN Boolean StreamCacheSetup (
BioseqPtr bsp,
+ SeqLocPtr slp,
StreamFlgType flags,
StreamCache PNTR scp
)
{
- if (bsp == NULL || scp == NULL) return FALSE;
+ if (bsp == NULL && slp == NULL) return FALSE;
+ if (scp == NULL) return FALSE;
MemSet ((Pointer) scp, 0, sizeof (StreamCache));
- scp->bsp = bsp;
- scp->length = bsp->length;
+ if (bsp != NULL) {
+ scp->bsp = bsp;
+ scp->length = bsp->length;
+ } else {
+ scp->slp = slp;
+ scp->length = SeqLocLen (slp);
+ }
scp->flags = flags;
return TRUE;
@@ -3290,8 +3418,12 @@ NLM_EXTERN Uint1 StreamCacheGetResidue (
)
{
- Uint1 residue = '\0';
- Int4 stop;
+ Bioseq bsq;
+ SeqLocPtr loc;
+ Uint1 residue = '\0';
+ SeqLoc sl;
+ SeqLocPtr slp;
+ Int4 stop;
if (scp == NULL) return residue;
@@ -3303,8 +3435,37 @@ NLM_EXTERN Uint1 StreamCacheGetResidue (
if (scp->offset >= scp->length) return residue;
stop = MIN (scp->offset + 4000L, scp->length);
- SeqPortStreamInt (scp->bsp, scp->offset, stop - 1, Seq_strand_plus,
- scp->flags, (Pointer) &(scp->buf), NULL);
+
+ if (scp->bsp != NULL) {
+
+ SeqPortStreamInt (scp->bsp, scp->offset, stop - 1, Seq_strand_plus,
+ scp->flags, (Pointer) &(scp->buf), NULL);
+
+ } else if (scp->slp != NULL) {
+
+ slp = scp->slp;
+ MemSet ((Pointer) &bsq, 0, sizeof (Bioseq));
+ MemSet ((Pointer) &sl, 0, sizeof (SeqLoc));
+ bsq.repr = Seq_repr_seg;
+ bsq.mol = Seq_mol_na;
+ bsq.seq_ext_type = 1;
+ bsq.length = SeqLocLen (slp);
+ bsq.seq_ext = &sl;
+ if (slp->choice == SEQLOC_MIX) {
+ loc = (SeqLocPtr) slp->data.ptrvalue;
+ if (loc != NULL) {
+ sl.choice = loc->choice;
+ sl.data.ptrvalue = (Pointer) loc->data.ptrvalue;
+ sl.next = loc->next;
+ }
+ } else {
+ sl.choice = slp->choice;
+ sl.data.ptrvalue = (Pointer) slp->data.ptrvalue;
+ sl.next = NULL;
+ }
+ SeqPortStreamInt (&bsq, scp->offset, stop - 1, Seq_strand_plus,
+ scp->flags, (Pointer) &(scp->buf), NULL);
+ }
scp->total = StringLen (scp->buf);
}
@@ -5807,13 +5968,20 @@ static void LIBCALLBACK SaveCdsBases (
)
{
- size_t len;
+ Char ch;
+ int len;
ReadCdsPtr rcp;
rcp = (ReadCdsPtr) userdata;
if (rcp->frame > 0) {
- len = StringLen (sequence);
+ len = 0;
+ ch = sequence [len];
+ while (ch != '\0' && len <= rcp->frame) {
+ len++;
+ ch = sequence [len];
+ }
+ /* len = StringLen (sequence); */
if (rcp->frame >= len) {
/* unusual locations can have fewer bases in the first segments than the frame, so just decrement */
@@ -5937,28 +6105,6 @@ NLM_EXTERN CharPtr ReadCodingRegionBases (SeqLocPtr location, Int4 len, Uint1 fr
return bases;
}
-static int LIBCALLBACK SortByIntvalue (VoidPtr ptr1, VoidPtr ptr2)
-
-{
- Int4 val1;
- Int4 val2;
- ValNodePtr vnp1;
- ValNodePtr vnp2;
-
- if (ptr1 == NULL || ptr2 == NULL) return 0;
- vnp1 = *((ValNodePtr PNTR) ptr1);
- vnp2 = *((ValNodePtr PNTR) ptr2);
- if (vnp1 == NULL || vnp2 == NULL) return 0;
- val1 = (Int4) vnp1->data.intvalue;
- val2 = (Int4) vnp2->data.intvalue;
- if (val1 > val2) {
- return 1;
- } else if (val1 < val2) {
- return -1;
- }
- return 0;
-}
-
static ValNodePtr MakeCodeBreakList (SeqLocPtr cdslocation, Int4 len, CodeBreakPtr cbp, Uint1 frame)
{
@@ -6187,7 +6333,9 @@ static ByteStorePtr TransTableTranslateCommon (
} else {
- protseq [q] = aa;
+ if (q < protlen) { /* protect against accidental buffer overflow */
+ protseq [q] = aa;
+ }
q++;
/*
BSPutByte (bs, (Int2) aa);
@@ -6199,6 +6347,10 @@ static ByteStorePtr TransTableTranslateCommon (
p++;
}
+ if (q > protlen) {
+ ErrPostEx (SEV_ERROR, 0, 0, "TransTableTranslate - %ld characters written, %ld characters expected", (long) q, (long) protlen);
+ }
+
if (k > total) {
incompleteLastCodon = TRUE;
}
@@ -7459,8 +7611,16 @@ NLM_EXTERN void ConvertNsToGaps (
Int4 len;
ValNodePtr seq_ext;
SeqLitPtr slp;
+ Boolean use_unknown = FALSE;
+ Int4 unknown_gap_size;
+ IntFuzzPtr ifp;
if (bsp == NULL || bsp->repr != Seq_repr_raw || ISA_aa (bsp->mol)) return;
+ if (userdata != NULL)
+ {
+ use_unknown = TRUE;
+ unknown_gap_size = *((Int4Ptr) userdata);
+ }
bases = GetSequenceByBsp (bsp);
if (bases == NULL) return;
@@ -7512,6 +7672,16 @@ NLM_EXTERN void ConvertNsToGaps (
slp->length = StringLen (str);
ValNodeAddPointer ((ValNodePtr PNTR) &(seq_ext), (Int2) 2, (Pointer) slp);
len += slp->length;
+ if (use_unknown && (unknown_gap_size == -1 || slp->length == unknown_gap_size))
+ {
+ ifp = IntFuzzNew ();
+ ifp->choice = 4;
+ slp->fuzz = ifp;
+ if (unknown_gap_size == -1)
+ {
+ slp->length = 100;
+ }
+ }
}
}
*txt = ch;
diff --git a/api/seqport.h b/api/seqport.h
index eafce65c..85bfca10 100644
--- a/api/seqport.h
+++ b/api/seqport.h
@@ -29,7 +29,7 @@
*
* Version Creation Date: 7/13/91
*
-* $Revision: 6.42 $
+* $Revision: 6.43 $
*
* File Description: Ports onto Bioseqs
*
@@ -40,6 +40,9 @@
*
*
* $Log: seqport.h,v $
+* Revision 6.43 2004/05/12 18:55:33 kans
+* StreamCache takes SeqLocPtr as well as BioseqPtr optional arguments, slp version is equivalent of SeqPortNewByLoc
+*
* Revision 6.42 2004/04/27 20:09:26 kans
* StreamCacheGetResidue returns Uint1 because Char might be signed, preventing IS_residue from working
*
@@ -394,7 +397,7 @@ NLM_EXTERN void SeqPortStreamLoc (
/*******************************************************************************
*
-* StreamCacheSetup (bsp, flags, scp)
+* StreamCacheSetup (bsp, slp, flags, scp)
* StreamCacheGetResidue (scp)
* StreamCacheSetPosition (scp, pos)
* SeqPort functional replacement implemented on top of SeqPortStreams
@@ -403,6 +406,7 @@ NLM_EXTERN void SeqPortStreamLoc (
typedef struct streamcache {
BioseqPtr bsp;
+ SeqLocPtr slp;
Char buf [4004];
Int2 ctr;
Int2 total;
@@ -413,6 +417,7 @@ typedef struct streamcache {
NLM_EXTERN Boolean StreamCacheSetup (
BioseqPtr bsp,
+ SeqLocPtr slp,
StreamFlgType flags,
StreamCache PNTR scp
);
diff --git a/api/sequtil.c b/api/sequtil.c
index 5d10033d..135953ad 100644
--- a/api/sequtil.c
+++ b/api/sequtil.c
@@ -29,13 +29,22 @@
*
* Version Creation Date: 4/1/91
*
-* $Revision: 6.147 $
+* $Revision: 6.150 $
*
* File Description: Sequence Utilities for objseq and objsset
*
* Modifications:
* --------------------------------------------------------------------------
* $Log: sequtil.c,v $
+* Revision 6.150 2004/06/04 17:31:34 kans
+* added CN and CO accession prefixes
+*
+* Revision 6.149 2004/05/27 15:37:31 kans
+* fixed typo in WHICH_db_accession - new 12-character RefSeq test used wrong comparison for underscore test
+*
+* Revision 6.148 2004/05/25 20:46:18 kans
+* WHICH_db_accession handles 12-character RefSeq accessions
+*
* Revision 6.147 2004/03/30 20:29:33 kans
* in static std_order array within SeqIdBestRank, demoted gibbsq, gibbmt, and patent
*
@@ -8794,7 +8803,8 @@ NLM_EXTERN Uint4 LIBCALL WHICH_db_accession (CharPtr s)
(StringICmp(temp,"CD") == 0) ||
(StringICmp(temp,"CF") == 0) ||
(StringICmp(temp,"CK") == 0) ||
- (StringICmp(temp,"CL") == 0) ) { /* NCBI EST */
+ (StringICmp(temp,"CN") == 0) ||
+ (StringICmp(temp,"CO") == 0) ) { /* NCBI EST */
retcode = ACCN_NCBI_EST;
} else if ((StringICmp(temp,"BV") == 0)) { /* NCBI STS */
retcode = ACCN_NCBI_STS;
@@ -8946,28 +8956,50 @@ NLM_EXTERN Uint4 LIBCALL WHICH_db_accession (CharPtr s)
s++;
}
break;
- case 12: /* whole genome shotgun 12-character accession, four letters + 8 digits */
- if(!IS_ALPHA(*s) || !IS_ALPHA(*(s+1)) || !IS_ALPHA(*(s+2)) || !IS_ALPHA(*(s+3)))
- break;
- temp[0] = *s; s++;
- temp[1] = *s; s++;
- temp[2] = *s; s++;
- temp[3] = *s; s++;
- temp[4] = '\0';
- if ((StringNICmp(temp,"A", 1) == 0)) {
+ case 12:
+ if(IS_ALPHA(*s) && IS_ALPHA(*(s+1)) && IS_ALPHA(*(s+2)) && IS_ALPHA(*(s+3))) {
+ /* whole genome shotgun 12-character accession, four letters + 8 digits */
+ temp[0] = *s; s++;
+ temp[1] = *s; s++;
+ temp[2] = *s; s++;
+ temp[3] = *s; s++;
+ temp[4] = '\0';
+ if ((StringNICmp(temp,"A", 1) == 0)) {
retcode = ACCN_NCBI_WGS;
- } else if ((StringNICmp(temp,"B", 1) == 0)) {
+ } else if ((StringNICmp(temp,"B", 1) == 0)) {
retcode = ACCN_DDBJ_WGS;
- } else if ((StringNICmp(temp,"C", 1) == 0)) {
+ } else if ((StringNICmp(temp,"C", 1) == 0)) {
retcode = ACCN_EMBL_WGS;
- } else
+ } else
retval = FALSE;
- while (*s) {
+ while (*s) {
+ if (! IS_DIGIT(*s)) {
+ retval = FALSE;
+ break;
+ }
+ s++;
+ }
+ } else if(IS_ALPHA(*s) && IS_ALPHA(*(s+1)) && (*(s+2)=='_')) {
+ /* New 12-character accession, two letters +"_"+ 9 digits */
+ temp[0] = *s; s++;
+ temp[1] = *s; s++;
+ temp[2] = NULLB; s++;
+
+ if ((StringICmp(temp,"NP") == 0)) {
+ retcode = ACCN_REFSEQ_PROT;
+ } else if ((StringICmp(temp,"NM") == 0)) {
+ retcode = ACCN_REFSEQ_mRNA;
+ } else if (IS_ALPHA(*temp) && IS_ALPHA(*(temp+1))) {
+ retcode =ACCN_REFSEQ | ACCN_AMBIGOUS_MOL;
+ } else
+ retval = FALSE;
+ while (*s) {
if (! IS_DIGIT(*s)) {
retval = FALSE;
break;
}
s++;
+ }
}
break;
default:
diff --git a/api/sqnutil1.c b/api/sqnutil1.c
index eb6a3979..ef3b9a05 100644
--- a/api/sqnutil1.c
+++ b/api/sqnutil1.c
@@ -29,7 +29,7 @@
*
* Version Creation Date: 9/2/97
*
-* $Revision: 6.296 $
+* $Revision: 6.300 $
*
* File Description:
*
@@ -1402,6 +1402,28 @@ NLM_EXTERN int LIBCALLBACK SortVnpByString (VoidPtr ptr1, VoidPtr ptr2)
return 0;
}
+NLM_EXTERN int LIBCALLBACK SortByIntvalue (VoidPtr ptr1, VoidPtr ptr2)
+
+{
+ Int4 val1;
+ Int4 val2;
+ ValNodePtr vnp1;
+ ValNodePtr vnp2;
+
+ if (ptr1 == NULL || ptr2 == NULL) return 0;
+ vnp1 = *((ValNodePtr PNTR) ptr1);
+ vnp2 = *((ValNodePtr PNTR) ptr2);
+ if (vnp1 == NULL || vnp2 == NULL) return 0;
+ val1 = (Int4) vnp1->data.intvalue;
+ val2 = (Int4) vnp2->data.intvalue;
+ if (val1 > val2) {
+ return 1;
+ } else if (val1 < val2) {
+ return -1;
+ }
+ return 0;
+}
+
NLM_EXTERN ValNodePtr UniqueValNode (ValNodePtr list)
{
@@ -1769,7 +1791,7 @@ NLM_EXTERN void PromoteXrefsEx (SeqFeatPtr sfp, BioseqPtr bsp, Uint2 entityID, B
GetSeqEntryParent (target, &parentptr, &parenttype);
sfp = first;
while (sfp != NULL) {
- if (sfp->data.choice == SEQFEAT_RNA && sfp->product == NULL) {
+ if (sfp->data.choice == SEQFEAT_RNA && sfp->product == NULL && (! sfp->pseudo)) {
gbq = sfp->qual;
prevqual = (GBQualPtr PNTR) &(sfp->qual);
id [0] = '\0';
@@ -1910,7 +1932,7 @@ NLM_EXTERN void PromoteXrefsEx (SeqFeatPtr sfp, BioseqPtr bsp, Uint2 entityID, B
next = xref->next;
if (xref->data.choice == SEQFEAT_PROT &&
sfp->data.choice == SEQFEAT_CDREGION &&
- sfp->product == NULL) {
+ sfp->product == NULL && (! sfp->pseudo)) {
prp = (ProtRefPtr) xref->data.value.ptrvalue;
xref->data.value.ptrvalue = NULL;
if (prp != NULL) {
@@ -2867,8 +2889,13 @@ static void CleanupTrna (SeqFeatPtr sfp, tRNAPtr trp)
if (trp == NULL) return;
if (sfp != NULL && sfp->comment != NULL && trp->codon [0] == 255) {
+ codon [0] = '\0';
if (StringNICmp (sfp->comment, "codon recognized: ", 18) == 0) {
StringNCpy_0 (codon, sfp->comment + 18, sizeof (codon));
+ } else if (StringNICmp (sfp->comment, "codons recognized: ", 19) == 0) {
+ StringNCpy_0 (codon, sfp->comment + 19, sizeof (codon));
+ }
+ if (StringDoesHaveText (codon)) {
if (StringLen (codon) > 3 && codon [3] == ';') {
codon [3] = '\0';
okayToFree = FALSE;
@@ -5871,6 +5898,75 @@ static Boolean InformativeString (CharPtr str)
return TRUE;
}
+static void CleanUpExceptText (SeqFeatPtr sfp)
+
+{
+ ValNodePtr head, vnp;
+ size_t len;
+ CharPtr prefix, ptr, str, tmp;
+
+ if (sfp == NULL || sfp->except_text == NULL) return;
+ if (StringStr (sfp->except_text, "ribosome slippage") == NULL &&
+ StringStr (sfp->except_text, "trans splicing") == NULL &&
+ StringStr (sfp->except_text, "alternate processing") == NULL &&
+ StringStr (sfp->except_text, "non-consensus splice site") == NULL) return;
+
+ head = NULL;
+ str = sfp->except_text;
+ tmp = str;
+ while (! StringHasNoText (tmp)) {
+ ptr = StringChr (tmp, ',');
+ if (ptr != NULL) {
+ *ptr = '\0';
+ ptr++;
+ }
+ TrimSpacesAroundString (tmp);
+ ValNodeCopyStr (&head, 0, tmp);
+ tmp = ptr;
+ }
+ for (vnp = head; vnp != NULL; vnp = vnp->next) {
+ tmp = (CharPtr) vnp->data.ptrvalue;
+ if (StringHasNoText (tmp)) continue;
+ if (StringCmp (tmp, "ribosome slippage") == 0) {
+ vnp->data.ptrvalue = MemFree (tmp);
+ vnp->data.ptrvalue = StringSave ("ribosomal slippage");
+ } else if (StringCmp (tmp, "trans splicing") == 0) {
+ vnp->data.ptrvalue = MemFree (tmp);
+ vnp->data.ptrvalue = StringSave ("trans-splicing");
+ } else if (StringCmp (tmp, "alternate processing") == 0) {
+ vnp->data.ptrvalue = MemFree (tmp);
+ vnp->data.ptrvalue = StringSave ("alternative processing");
+ } else if (StringCmp (tmp, "non-consensus splice site") == 0) {
+ vnp->data.ptrvalue = MemFree (tmp);
+ vnp->data.ptrvalue = StringSave ("nonconsensus splice site");
+ }
+ }
+
+ len = 0;
+ for (vnp = head; vnp != NULL; vnp = vnp->next) {
+ tmp = (CharPtr) vnp->data.ptrvalue;
+ if (StringHasNoText (tmp)) continue;
+ len += StringLen (tmp) + 2;
+ }
+
+ str = (CharPtr) MemNew (len + 2);
+ if (str == NULL) return;
+
+ prefix = "";
+ for (vnp = head; vnp != NULL; vnp = vnp->next) {
+ tmp = (CharPtr) vnp->data.ptrvalue;
+ if (StringHasNoText (tmp)) continue;
+ StringCat (str, prefix);
+ StringCat (str, tmp);
+ prefix = ", ";
+ }
+
+ sfp->except_text = MemFree (sfp->except_text);
+ sfp->except_text = str;
+
+ ValNodeFreeData (head);
+}
+
static void CleanupFeatureStrings (SeqFeatPtr sfp, Boolean stripSerial, ValNodePtr PNTR publist)
{
@@ -5904,6 +6000,9 @@ static void CleanupFeatureStrings (SeqFeatPtr sfp, Boolean stripSerial, ValNodeP
CleanVisString (&(sfp->comment));
CleanVisString (&(sfp->title));
CleanVisString (&(sfp->except_text));
+ if (StringDoesHaveText (sfp->except_text)) {
+ CleanUpExceptText (sfp);
+ }
CleanDoubleQuote (sfp->comment);
if (StringCmp (sfp->comment, ".") == 0) {
sfp->comment = MemFree (sfp->comment);
diff --git a/api/sqnutil2.c b/api/sqnutil2.c
index 2c2a142a..3c77010c 100644
--- a/api/sqnutil2.c
+++ b/api/sqnutil2.c
@@ -29,7 +29,7 @@
*
* Version Creation Date: 9/2/97
*
-* $Revision: 6.168 $
+* $Revision: 6.178 $
*
* File Description:
*
@@ -737,14 +737,16 @@ static SeqLocPtr SeqLocFromRange (SeqLocRangePtr head, BioseqPtr target,
return slp;
}
-NLM_EXTERN SeqLocPtr SeqLocMergeEx (BioseqPtr target, SeqLocPtr to, SeqLocPtr from,
+NLM_EXTERN SeqLocPtr SeqLocMergeExEx (BioseqPtr target, SeqLocPtr to, SeqLocPtr from,
Boolean single_interval, Boolean fuse_joints,
- Boolean merge_overlaps, Boolean add_null)
+ Boolean merge_overlaps, Boolean add_null, Boolean ignore_mixed)
{
+ SeqLocRangePtr curr;
SeqLocRangePtr slrp;
SeqLocRangePtr head;
SeqLocRangePtr last;
+ Boolean mixed;
Boolean partial5;
Boolean partial3;
SeqLocPtr slp;
@@ -767,26 +769,52 @@ NLM_EXTERN SeqLocPtr SeqLocMergeEx (BioseqPtr target, SeqLocPtr to, SeqLocPtr fr
last->next = CollectRanges (target, from);
}
if (head != NULL) {
+
+ /* test for mixed strands */
+ mixed = FALSE;
strand = head->strand;
- head = SortRanges (head, FALSE);
- head = MergeOverlaps (head, fuse_joints, merge_overlaps);
- if (single_interval) {
- last = head;
- while (last->next != NULL) {
- last = last->next;
+ curr = head->next;
+ while (curr != NULL) {
+ if (curr->strand == Seq_strand_minus) {
+ if (strand == Seq_strand_plus || strand == Seq_strand_unknown) {
+ mixed = TRUE;
+ }
+ } else {
+ if (strand == Seq_strand_minus) {
+ mixed = TRUE;
+ }
}
- head->left = MIN (head->left, last->left);
- head->right = MAX (head->right, last->right);
- head->next = SeqLocRangeFree (head->next);
+ curr = curr->next;
}
- last = head;
- while (last != NULL) {
- last->strand = strand;
- last = last->next;
+
+ /* but can override mixed strands behavior */
+ if (ignore_mixed) {
+ mixed = FALSE;
}
- if (strand == Seq_strand_minus) {
- head = SortRanges (head, TRUE);
+
+ if (! mixed) {
+ strand = head->strand;
+ head = SortRanges (head, FALSE);
+ head = MergeOverlaps (head, fuse_joints, merge_overlaps);
+ if (single_interval) {
+ last = head;
+ while (last->next != NULL) {
+ last = last->next;
+ }
+ head->left = MIN (head->left, last->left);
+ head->right = MAX (head->right, last->right);
+ head->next = SeqLocRangeFree (head->next);
+ }
+ last = head;
+ while (last != NULL) {
+ last->strand = strand;
+ last = last->next;
+ }
+ if (strand == Seq_strand_minus) {
+ head = SortRanges (head, TRUE);
+ }
}
+
for (slrp = head; slrp != NULL; slrp = slrp->next) {
if (slrp->left < 0) {
slrp->left += target->length;
@@ -801,12 +829,20 @@ NLM_EXTERN SeqLocPtr SeqLocMergeEx (BioseqPtr target, SeqLocPtr to, SeqLocPtr fr
return slp;
}
+NLM_EXTERN SeqLocPtr SeqLocMergeEx (BioseqPtr target, SeqLocPtr to, SeqLocPtr from,
+ Boolean single_interval, Boolean fuse_joints,
+ Boolean merge_overlaps, Boolean add_null)
+
+{
+ return SeqLocMergeExEx (target, to, from, single_interval, fuse_joints, merge_overlaps, add_null, FALSE);
+}
+
NLM_EXTERN SeqLocPtr SeqLocMerge (BioseqPtr target, SeqLocPtr to, SeqLocPtr from,
Boolean single_interval, Boolean fuse_joints,
Boolean add_null)
{
- return SeqLocMergeEx (target, to, from, single_interval, fuse_joints, TRUE, add_null);
+ return SeqLocMergeExEx (target, to, from, single_interval, fuse_joints, TRUE, add_null, FALSE);
}
NLM_EXTERN Boolean SeqLocBadSortOrder (BioseqPtr bsp, SeqLocPtr slp)
@@ -1049,7 +1085,6 @@ HasPubSrcComDescriptors
BioseqPtr bsp;
BioseqSetPtr bssp;
ValNodePtr list = NULL;
- AsnExpOptPtr aeop;
Boolean rval = FALSE;
if (sep == NULL || sep->data.ptrvalue == NULL) return FALSE;
@@ -1866,6 +1901,98 @@ NLM_EXTERN ProtRefPtr ParseTitleIntoProtRef (
return prp;
}
+static Boolean ParseAccessionRange (
+ CharPtr accn,
+ CharPtr prefix,
+ Int4Ptr startp,
+ Int4Ptr stopp,
+ Int2Ptr digitsp
+)
+
+{
+ Char ch;
+ Int2 digits;
+ CharPtr ptr, tmp;
+ Int4 start, stop;
+ long int val;
+
+ if (StringHasNoText (accn)) return FALSE;
+ if (prefix == NULL || startp == NULL || stopp == NULL || digitsp == NULL) return FALSE;
+
+ ptr = accn;
+ ch = *ptr;
+ while (IS_ALPHA (ch)) {
+ *prefix = ch;
+ prefix++;
+ ptr++;
+ ch = *ptr;
+ }
+ *prefix = '\0';
+
+ tmp = StringChr (ptr, '-');
+ if (tmp == NULL) return FALSE;
+ *tmp = '\0';
+ tmp++;
+
+ if (sscanf (ptr, "%ld", &val) != 1 || val < 1) return FALSE;
+ start = (Int4) val;
+
+ digits = 0;
+ while (IS_DIGIT (ch)) {
+ digits++;
+ ptr++;
+ ch = *ptr;
+ }
+
+ ptr = tmp;
+ ch = *ptr;
+ while (IS_ALPHA (ch)) {
+ ptr++;
+ ch = *ptr;
+ }
+
+ if (sscanf (ptr, "%ld", &val) != 1 || val < 1) return FALSE;
+ stop = (Int4) val;
+
+ *startp = start;
+ *stopp = stop;
+ *digitsp = digits;
+
+ return TRUE;
+}
+
+static void DoAddToSecAccn (
+ GBBlockPtr gbp,
+ CharPtr accn
+)
+
+{
+ Int2 digits, j;
+ Int4 idx;
+ Char numbers [32];
+ Char prefix [16];
+ Int4 start, stop;
+ Char tmp [64];
+
+ if (StringChr (accn, '-') != NULL) {
+ if (ParseAccessionRange (accn, prefix, &start, &stop, &digits)) {
+ for (idx = start; idx <= stop; idx++) {
+ sprintf (numbers, "%*ld", digits, (long) idx);
+ for (j = 0; j < digits && numbers [j] != '\0'; j++) {
+ if (numbers [j] == ' ') {
+ numbers [j] = '0';
+ }
+ }
+ StringCpy (tmp, prefix);
+ StringCat (tmp, numbers);
+ ValNodeCopyStr (&(gbp->extra_accessions), 0, tmp);
+ }
+ }
+ } else {
+ ValNodeCopyStr (&(gbp->extra_accessions), 0, accn);
+ }
+}
+
NLM_EXTERN GBBlockPtr ParseTitleIntoGenBank (
SqnTagPtr stp,
GBBlockPtr gbp
@@ -1899,7 +2026,7 @@ NLM_EXTERN GBBlockPtr ParseTitleIntoGenBank (
*ptr = '\0';
if (! StringHasNoText (last)) {
TrimSpacesAroundString (last);
- ValNodeCopyStr (&(gbp->extra_accessions), 0, last);
+ DoAddToSecAccn (gbp, last);
}
ptr++;
last = ptr;
@@ -1911,7 +2038,7 @@ NLM_EXTERN GBBlockPtr ParseTitleIntoGenBank (
}
if (! StringHasNoText (last)) {
TrimSpacesAroundString (last);
- ValNodeCopyStr (&(gbp->extra_accessions), 0, last);
+ DoAddToSecAccn (gbp, last);
}
MemFree (tmp);
}
@@ -1947,6 +2074,38 @@ static void AddStringToSeqHist (
tsip->accession = StringSave (str);
}
+static void DoAddToSeqHist (
+ SeqHistPtr shp,
+ CharPtr accn
+)
+
+{
+ Int2 digits, j;
+ Int4 idx;
+ Char numbers [32];
+ Char prefix [16];
+ Int4 start, stop;
+ Char tmp [64];
+
+ if (StringChr (accn, '-') != NULL) {
+ if (ParseAccessionRange (accn, prefix, &start, &stop, &digits)) {
+ for (idx = start; idx <= stop; idx++) {
+ sprintf (numbers, "%*ld", digits, (long) idx);
+ for (j = 0; j < digits && numbers [j] != '\0'; j++) {
+ if (numbers [j] == ' ') {
+ numbers [j] = '0';
+ }
+ }
+ StringCpy (tmp, prefix);
+ StringCat (tmp, numbers);
+ AddStringToSeqHist (shp, tmp);
+ }
+ }
+ } else {
+ AddStringToSeqHist (shp, accn);
+ }
+}
+
NLM_EXTERN SeqHistPtr ParseStringIntoSeqHist (
SeqHistPtr shp,
CharPtr str
@@ -1973,7 +2132,7 @@ NLM_EXTERN SeqHistPtr ParseStringIntoSeqHist (
*ptr = '\0';
if (! StringHasNoText (last)) {
TrimSpacesAroundString (last);
- AddStringToSeqHist (shp, last);
+ DoAddToSeqHist (shp, last);
}
ptr++;
last = ptr;
@@ -1985,7 +2144,7 @@ NLM_EXTERN SeqHistPtr ParseStringIntoSeqHist (
}
if (! StringHasNoText (last)) {
TrimSpacesAroundString (last);
- AddStringToSeqHist (shp, last);
+ DoAddToSeqHist (shp, last);
}
MemFree (tmp);
}
@@ -2026,7 +2185,7 @@ NLM_EXTERN SeqHistPtr ParseTitleIntoSeqHist (
*ptr = '\0';
if (! StringHasNoText (last)) {
TrimSpacesAroundString (last);
- AddStringToSeqHist (shp, last);
+ DoAddToSeqHist (shp, last);
}
ptr++;
last = ptr;
@@ -2038,7 +2197,7 @@ NLM_EXTERN SeqHistPtr ParseTitleIntoSeqHist (
}
if (! StringHasNoText (last)) {
TrimSpacesAroundString (last);
- AddStringToSeqHist (shp, last);
+ DoAddToSeqHist (shp, last);
}
MemFree (tmp);
}
@@ -2837,7 +2996,7 @@ static void AddNucToContig (CharPtr accnString, Int4 from, Int4 to,
sip->choice = (Uint1) SEQID_GI;
sip->data.intvalue = val;
} else {
- sip = SeqIdFromAccession (accnString, 0, NULL);
+ sip = SeqIdFromAccessionDotVersion (accnString);
if (sip == NULL) {
sip = ValNodeNew (NULL);
tsip = TextSeqIdNew ();
@@ -4446,6 +4605,23 @@ static void AddQualifierToFeatureEx (SeqFeatPtr sfp, CharPtr qual, CharPtr val,
}
} else if (sfp->data.choice == SEQFEAT_CDREGION && StringCmp (qual, "secondary_accession") == 0) {
bail = FALSE;
+ } else if (sfp->data.choice == SEQFEAT_RNA &&
+ (StringCmp (qual, "codon_recognized") == 0 || StringCmp (qual, "codons_recognized") == 0)) {
+ rrp = (RnaRefPtr) sfp->data.value.ptrvalue;
+ if (rrp != NULL && rrp->type == 3) {
+ StringNCpy_0 ((CharPtr) codon, val, sizeof (codon));
+ if (StringLen ((CharPtr) codon) == 3) {
+ for (j = 0; j < 3; j++) {
+ if (codon [j] == 'U') {
+ codon [j] = 'T';
+ }
+ }
+ trna = (tRNAPtr) rrp->ext.value.ptrvalue;
+ if (trna != NULL) {
+ ParseDegenerateCodon (trna, (Uint1Ptr) codon);
+ }
+ }
+ }
} else if (ifp != NULL && StringICmp (ifp->key, "variation") == 0 && ParseQualIntoSnpUserObject (sfp, qual, val)) {
} else if (ifp != NULL && StringICmp (ifp->key, "STS") == 0 && ParseQualIntoStsUserObject (sfp, qual, val)) {
} else if (ifp != NULL && StringICmp (ifp->key, "misc_feature") == 0 && ParseQualIntoCloneUserObject (sfp, qual, val)) {
@@ -4624,8 +4800,6 @@ static void AddQualifierToFeatureEx (SeqFeatPtr sfp, CharPtr qual, CharPtr val,
return;
} else if (qnum == GBQUAL_transl_except) {
if (ParseCodeBreak (sfp, val, offset)) return;
- } else if (qnum == GBQUAL_anticodon) {
- if (ParseAnticodon (sfp, val, offset)) return;
} else if (qnum == GBQUAL_codon_start) {
crp = (CdRegionPtr) sfp->data.value.ptrvalue;
if (sscanf (val, "%d", &num) == 1 && crp != NULL) {
@@ -4633,6 +4807,8 @@ static void AddQualifierToFeatureEx (SeqFeatPtr sfp, CharPtr qual, CharPtr val,
crp->frame = (Uint1) num;
}
}
+ } else if (qnum == GBQUAL_pseudo) {
+ sfp->pseudo = TRUE;
}
} else if (sfp->data.choice == SEQFEAT_PROT) {
if (qnum == GBQUAL_function || qnum == GBQUAL_EC_number || qnum == GBQUAL_product) {
@@ -4706,6 +4882,10 @@ static void AddQualifierToFeatureEx (SeqFeatPtr sfp, CharPtr qual, CharPtr val,
rrp->ext.value.ptrvalue = StringSave (val);
}
return;
+ } else if (qnum == GBQUAL_anticodon) {
+ if (ParseAnticodon (sfp, val, offset)) return;
+ } else if (qnum == GBQUAL_pseudo) {
+ sfp->pseudo = TRUE;
}
} else if (sfp->data.choice == SEQFEAT_BIOSRC) {
if (ParseQualIntoBioSource (sfp, qual, val)) return;
@@ -7242,7 +7422,7 @@ NLM_EXTERN void PrintQualityScoresForContig (
} else if (dsp->choice == 2) {
slitp = (SeqLitPtr) dsp->data.ptrvalue;
- if (slitp == NULL || slitp->seq_data != NULL) continue;
+ if (slitp == NULL /* || slitp->seq_data != NULL */) continue;
for (i = 0; i < slitp->length; i++) {
PrintAScore (fp, gap, &linepos);
}
diff --git a/api/sqnutil3.c b/api/sqnutil3.c
index faefadc4..b2c89ddb 100644
--- a/api/sqnutil3.c
+++ b/api/sqnutil3.c
@@ -29,7 +29,7 @@
*
* Version Creation Date: 2/7/00
*
-* $Revision: 6.31 $
+* $Revision: 6.32 $
*
* File Description:
*
@@ -1817,10 +1817,9 @@ extern void AbstractReportError (
}
-static void AddDefLinesToAlignmentSequences (
- TAlignmentFilePtr afp,
- SeqEntryPtr sep_head
-)
+static void AddDefLinesToAlignmentSequences
+(TAlignmentFilePtr afp,
+ SeqEntryPtr sep_head)
{
BioseqSetPtr bssp;
SeqEntryPtr sep;
@@ -1828,7 +1827,12 @@ static void AddDefLinesToAlignmentSequences (
ValNodePtr sdp;
CharPtr new_title;
Int4 new_title_len;
-
+ Int4 curr_seg;
+ Int4 num_sets = 1;
+ Boolean one_defline_per_sequence = TRUE;
+ Boolean all_extra_empty;
+
+
if (afp == NULL || sep_head == NULL || ! IS_Bioseq_set (sep_head))
{
return;
@@ -1838,32 +1842,96 @@ static void AddDefLinesToAlignmentSequences (
{
return;
}
-
bssp = sep_head->data.ptrvalue;
+
+ /* find out if all of our deflines are real */
+ if (afp->num_segments > 1 && afp->num_deflines == afp->num_sequences)
+ {
+ one_defline_per_sequence = FALSE;
+ num_sets = afp->num_sequences / afp->num_segments;
+ all_extra_empty = TRUE;
+ for (curr_seg = num_sets; curr_seg < afp->num_deflines && all_extra_empty; curr_seg ++)
+ {
+ if (afp->deflines [curr_seg] != NULL)
+ {
+ all_extra_empty = FALSE;
+ }
+ }
+ if (all_extra_empty)
+ {
+ one_defline_per_sequence = TRUE;
+ }
+ }
+
for (sep = bssp->seq_set, index = 0;
sep != NULL && (index < afp->num_deflines || index < afp->num_organisms);
sep = sep->next, index++)
{
new_title_len = 0;
- if (index < afp->num_organisms) {
- new_title_len += StringLen (afp->organisms [index]) + 1;
+ /* get lengths for organisms for this sequence */
+
+ if (afp->num_segments > 1 && afp->num_organisms == afp->num_sequences)
+ {
+ /* have one organism per segment, in which case use only the first one */
+ curr_seg = index * afp->num_segments;
}
- if (index < afp->num_deflines && afp->deflines [index] != NULL) {
- new_title_len += StringLen (afp->deflines [index]) + 1;
+ else
+ { /* otherwise one organism per sequence */
+ curr_seg = index;
}
+ if (curr_seg < afp->num_organisms)
+ {
+ new_title_len += StringLen (afp->organisms [curr_seg]) + 1;
+ }
+
+ /* get lengths for deflines for this sequence */
+ if (! one_defline_per_sequence)
+ { /* have one defline per segment, in which use only the first one */
+ curr_seg = index * afp->num_segments;
+ }
+ else
+ { /* otherwise one defline per sequence */
+ curr_seg = index;
+ }
+ if (curr_seg < afp->num_deflines)
+ {
+ new_title_len += StringLen (afp->deflines [curr_seg]) + 1;
+ }
+
if (new_title_len > 0) {
new_title = (CharPtr) MemNew (new_title_len);
if (new_title == NULL) return;
new_title [0] = 0;
- if (index < afp->num_organisms) {
- StringCat (new_title, afp->organisms [index]);
+
+ /* list organisms at beginning of new defline */
+ if (afp->num_segments > 1 && afp->num_organisms == afp->num_sequences)
+ { /* have one organism per segment, in which case use only first one */
+ curr_seg = index * afp->num_segments;
+ }
+ else
+ { /* otherwise one organism per sequence */
+ curr_seg = index;
+ }
+
+ if (curr_seg < afp->num_organisms) {
+ StringCat (new_title, afp->organisms [curr_seg]);
if (new_title_len > StringLen (new_title) + 1)
{
StringCat (new_title, " ");
}
}
- if (index < afp->num_deflines && afp->deflines [index] != NULL) {
- StringCat (new_title, afp->deflines [index]);
+
+ if (!one_defline_per_sequence)
+ { /* have one defline per segment, in which case all go to same sequence */
+ curr_seg = index * afp->num_segments;
+ }
+ else
+ {
+ curr_seg = index;
+ }
+ if (curr_seg < afp->num_deflines)
+ {
+ StringCat (new_title, afp->deflines [curr_seg]);
}
sdp = CreateNewDescriptor (sep, Seq_descr_title);
@@ -1875,32 +1943,283 @@ static void AddDefLinesToAlignmentSequences (
}
}
}
+
+
+static SeqEntryPtr
+MakeDeltaSetFromAlignment
+(SeqEntryPtr sep_list,
+ TAlignmentFilePtr afp,
+ Uint1 moltype,
+ Int4 gap_length
+ )
+{
+ BioseqPtr bsp, deltabsp;
+ SeqEntryPtr this_list, last_sep, next_list, sep, nextsep;
+ SeqEntryPtr topsep, last_delta_sep;
+ SeqIdPtr sip;
+ Int4 curr_seg;
+ Int4 num_sets = 0;
+ CharPtr seqbuf;
+ ValNodePtr vnp;
+ SeqLitPtr slp;
+ IntFuzzPtr ifp;
+ SeqEntryPtr delta_list = NULL;
+
+ delta_list = NULL;
+ last_delta_sep = NULL;
+ this_list = sep_list;
+ while (this_list != NULL)
+ {
+ last_sep = this_list;
+ curr_seg = 0;
+ while (last_sep != NULL && curr_seg < afp->num_segments - 1)
+ {
+ last_sep = last_sep->next;
+ curr_seg++;
+ }
+ if (last_sep == NULL) return NULL;
+ next_list = last_sep->next;
+ last_sep->next = NULL;
+
+ bsp = (BioseqPtr)this_list->data.ptrvalue;
+ if (bsp == NULL) return NULL;
+
+ sip = SeqIdDup (bsp->id);
+ vnp = ValNodeExtract (&(bsp->descr), Seq_descr_title);
+
+ deltabsp = BioseqNew ();
+ if (deltabsp == NULL) return NULL;
+ deltabsp->repr = Seq_repr_delta;
+ deltabsp->seq_ext_type = 4;
+ deltabsp->mol = moltype;
+ deltabsp->length = 0;
+
+ topsep = SeqEntryNew ();
+ if (topsep == NULL) return NULL;
+ topsep->choice = 1;
+ topsep->data.ptrvalue = (Pointer) deltabsp;
+
+ for (sep = this_list; sep != NULL; sep = nextsep) {
+ nextsep = sep->next;
+ sep->next = NULL;
+
+ bsp = (BioseqPtr) sep->data.ptrvalue;
+ if (bsp == NULL) continue;
+
+ if (bsp->repr == Seq_repr_raw) {
+ BioseqRawConvert (bsp, Seq_code_iupacna);
+ seqbuf = BSMerge ((ByteStorePtr) bsp->seq_data, NULL);
+ slp = (SeqLitPtr) MemNew (sizeof (SeqLit));
+ if (slp == NULL) continue;
+
+ slp->length = bsp->length;
+ ValNodeAddPointer ((ValNodePtr PNTR) &(deltabsp->seq_ext), (Int2) 2, (Pointer) slp);
+ slp->seq_data = BSNew (slp->length);
+ slp->seq_data_type = Seq_code_iupacna;
+ AddBasesToByteStore (slp->seq_data, seqbuf);
+ MemFree(seqbuf);
+
+ deltabsp->length += slp->length;
+
+ } else if (bsp->repr == Seq_repr_virtual) {
+ slp = (SeqLitPtr) MemNew (sizeof (SeqLit));
+ if (slp == NULL) continue;
+ slp->length = bsp->length;
+ if (slp == NULL) continue;
+
+ slp->length = bsp->length;
+ ValNodeAddPointer ((ValNodePtr PNTR) &(deltabsp->seq_ext), (Int2) 2, (Pointer) slp);
+ if (slp->length < 1) {
+ slp->length = 0;
+ ifp = IntFuzzNew ();
+ ifp->choice = 4;
+ slp->fuzz = ifp;
+ }
+
+ deltabsp->length += slp->length;
+ }
+ SeqEntryFree (sep);
+
+ if (nextsep != NULL)
+ {
+ /* add gap */
+ slp = (SeqLitPtr) MemNew (sizeof (SeqLit));
+ if (slp == NULL) continue;
+ slp->length = gap_length;
+ ValNodeAddPointer ((ValNodePtr PNTR) &(deltabsp->seq_ext), (Int2) 2, (Pointer) slp);
+ deltabsp->length += slp->length;
+ }
+ }
+
+ ValNodeLink (&(deltabsp->descr), vnp);
+ deltabsp->id = sip;
+
+ if (last_delta_sep == NULL)
+ {
+ delta_list = topsep;
+ }
+ else
+ {
+ last_delta_sep->next = topsep;
+ }
+ last_delta_sep = topsep;
+
+ this_list = next_list;
+ }
+ return delta_list;
+}
+
+static void RenameSegSet (SeqEntryPtr sep)
+{
+ BioseqSetPtr bssp, seg_bssp;
+ SeqEntryPtr seg_sep;
+ BioseqPtr main_bsp = NULL;
+ BioseqPtr seg_bsp = NULL;
+ Char new_id_str [255];
+
+ if (sep == NULL || !IS_Bioseq_set (sep) || (bssp = sep->data.ptrvalue) == NULL
+ || bssp->_class != BioseqseqSet_class_segset)
+ {
+ return;
+ }
+
+ sep = bssp->seq_set;
+ while (sep != NULL && (seg_bsp == NULL || main_bsp == NULL))
+ {
+ if (IS_Bioseq (sep))
+ {
+ main_bsp = (BioseqPtr) sep->data.ptrvalue;
+ }
+ else if (IS_Bioseq_set (sep))
+ {
+ seg_bssp = (BioseqSetPtr) sep->data.ptrvalue;
+ if (seg_bssp != NULL && seg_bssp->_class == BioseqseqSet_class_parts)
+ {
+ seg_sep = seg_bssp->seq_set;
+ while (seg_sep != NULL && seg_bsp == NULL)
+ {
+ if (IS_Bioseq (seg_sep))
+ {
+ seg_bsp = seg_sep->data.ptrvalue;
+ }
+ seg_sep = seg_sep->next;
+ }
+ }
+ }
+ sep = sep->next;
+ }
+ if (main_bsp == NULL || seg_bsp == NULL)
+ {
+ return;
+ }
+ SeqIdWrite (seg_bsp->id, new_id_str, PRINTID_FASTA_SHORT, sizeof (new_id_str) - 7);
+ StringCat (new_id_str, "_master");
+ SeqIdFree (main_bsp->id);
+ main_bsp->id = MakeSeqID (new_id_str);
+}
+
+static SeqEntryPtr
+MakeSegmentedSetFromAlignment
+(SeqEntryPtr sep_list,
+ TAlignmentFilePtr afp,
+ Uint1 moltype)
+{
+ SeqEntryPtr this_list, last_sep, next_list, nextsep, last_segset;
+ Int4 curr_seg;
+ Int4 num_sets = 0;
+
+ this_list = sep_list;
+ sep_list = NULL;
+ last_segset = NULL;
+ while (this_list != NULL)
+ {
+ last_sep = this_list;
+ curr_seg = 0;
+ while (last_sep != NULL && curr_seg < afp->num_segments - 1)
+ {
+ if (!IS_Bioseq (last_sep)) return NULL;
+ last_sep = last_sep->next;
+ curr_seg++;
+ }
+ if (last_sep == NULL) return NULL;
+ next_list = last_sep->next;
+ last_sep->next = NULL;
+
+ last_sep = this_list->next;
+ this_list->next = NULL;
+ while (last_sep != NULL)
+ {
+ nextsep = last_sep->next;
+ last_sep->next = NULL;
+ AddSeqEntryToSeqEntry (this_list, last_sep, FALSE);
+ last_sep = nextsep;
+ }
+
+ /* fix IDs for seg sets */
+ RenameSegSet (this_list);
+
+ if (sep_list == NULL)
+ {
+ sep_list = this_list;
+ }
+ else
+ {
+ last_segset->next = this_list;
+ }
+ last_segset = this_list;
+
+ this_list = next_list;
+ }
+ return sep_list;
+}
extern SeqEntryPtr MakeSequinDataFromAlignment (TAlignmentFilePtr afp, Uint1 moltype)
{
- SeqAnnotPtr sap;
- SeqEntryPtr sep_list, sep, sep_prev;
- SeqIdPtr sip_list, sip, sip_prev;
- ValNodePtr seqvnp, vnp;
- Int4 index, len;
+ SeqIdPtr PNTR sip_list;
+ SeqIdPtr PNTR sip_prev;
+ SeqAnnotPtr sap = NULL;
+ SeqAlignPtr salp, salp_list, salp_last;
+ ValNodePtr PNTR seqvnp;
+ SeqEntryPtr sep_list;
+ SeqEntryPtr sep, sep_prev;
+ SeqIdPtr sip;
+ ValNodePtr vnp;
+ Int4 index, len, curr_seg, num_sets;
if (afp == NULL) return NULL;
if (afp->num_sequences == 0) return NULL;
+ if (afp->num_segments < 1) return NULL;
+
+ sip_list = (SeqIdPtr PNTR) MemNew (afp->num_segments * sizeof (SeqIdPtr));
+ sip_prev = (SeqIdPtr PNTR) MemNew (afp->num_segments * sizeof (SeqIdPtr));
+ seqvnp = (ValNodePtr PNTR) MemNew (afp->num_segments * sizeof (ValNodePtr));
+ if (sip_list == NULL || sip_prev == NULL || seqvnp == NULL)
+ {
+ MemFree (sip_list);
+ MemFree (sip_prev);
+ MemFree (seqvnp);
+ return NULL;
+ }
+
+ for (curr_seg = 0; curr_seg < afp->num_segments; curr_seg ++)
+ {
+ sip_list [curr_seg] = NULL;
+ sip_prev [curr_seg] = NULL;
+ seqvnp [curr_seg] = NULL;
+ }
- seqvnp = NULL;
- sip_list = NULL;
- sip_prev = NULL;
sep_list = NULL;
sep_prev = NULL;
+ curr_seg = 0;
for (index = 0; index < afp->num_sequences; index++) {
sip = MakeSeqID (afp->ids [index]);
- if (sip_prev == NULL) {
- sip_list = sip;
+ if (sip_prev[curr_seg] == NULL) {
+ sip_list[curr_seg] = sip;
} else {
- sip_prev->next = sip;
+ sip_prev[curr_seg]->next = sip;
}
- sip_prev = sip;
+ sip_prev[curr_seg] = sip;
len = (Int4) StringLen (afp->sequences [index]);
sep = StringToSeqEntry (afp->sequences [index], sip, len, moltype);
if (sep != NULL) {
@@ -1910,17 +2229,74 @@ extern SeqEntryPtr MakeSequinDataFromAlignment (TAlignmentFilePtr afp, Uint1 mol
sep_prev->next = sep;
}
sep_prev = sep;
- vnp = ValNodeNew (seqvnp);
- if (seqvnp == NULL) seqvnp = vnp;
+ vnp = ValNodeNew (seqvnp[curr_seg]);
+ if (seqvnp[curr_seg] == NULL) seqvnp[curr_seg] = vnp;
vnp->data.ptrvalue = afp->sequences [index];
}
+ curr_seg ++;
+ if (curr_seg >= afp->num_segments)
+ {
+ curr_seg = 0;
+ }
+ }
+
+ if (afp->num_segments == 1)
+ {
+ sap = LocalAlignToSeqAnnotDimn (seqvnp[0], sip_list[0], NULL, afp->num_sequences,
+ 0, NULL, FALSE);
+ sep_list = make_seqentry_for_seqentry (sep_list);
+ SeqAlignAddInSeqEntry (sep_list, sap);
+ }
+ else
+ {
+ sep_list = MakeSegmentedSetFromAlignment (sep_list, afp, moltype);
+ sep_list = make_seqentry_for_seqentry (sep_list);
+ num_sets = afp->num_sequences / afp->num_segments;
+ salp_list = NULL;
+ salp_last = NULL;
+
+ for (curr_seg = 0; curr_seg < afp->num_segments; curr_seg++)
+ {
+ sap = LocalAlignToSeqAnnotDimn (seqvnp[curr_seg], sip_list[curr_seg], NULL, num_sets,
+ 0, NULL, FALSE);
+ if (sap != NULL)
+ {
+ salp = (SeqAlignPtr) sap->data;
+ if (salp != NULL)
+ {
+ if (salp_last == NULL)
+ {
+ salp_list = salp;
+ }
+ else
+ {
+ salp_last->next = salp;
+ }
+ salp_last = salp;
+ }
+ sap->data = NULL;
+ SeqAnnotFree (sap);
+ }
+ }
+ if (salp_list != NULL)
+ {
+ sap = SeqAnnotNew ();
+ sap->type = 2;
+ sap->data = (Pointer) salp_list;
+ SeqAlignAddInSeqEntry (sep_list, sap);
+ }
}
- sap = LocalAlignToSeqAnnotDimn (seqvnp, sip_list, NULL, afp->num_sequences,
- 0, NULL, FALSE);
- sep_list = make_seqentry_for_seqentry (sep_list);
- SeqAlignAddInSeqEntry (sep_list, sap);
- ValNodeFree (seqvnp);
+
+ for (curr_seg = 0; curr_seg < afp->num_segments; curr_seg ++)
+ {
+ ValNodeFree (seqvnp [curr_seg]);
+ }
+ MemFree (seqvnp);
+ MemFree (sip_list);
+ MemFree (sip_prev);
+
AddDefLinesToAlignmentSequences (afp, sep_list);
+
return sep_list;
}
diff --git a/api/sqnutils.h b/api/sqnutils.h
index 68453586..edaef08c 100644
--- a/api/sqnutils.h
+++ b/api/sqnutils.h
@@ -29,7 +29,7 @@
*
* Version Creation Date: 9/2/97
*
-* $Revision: 6.96 $
+* $Revision: 6.98 $
*
* File Description:
*
@@ -164,6 +164,11 @@ NLM_EXTERN SeqLocPtr SeqLocMergeEx (BioseqPtr target, SeqLocPtr to, SeqLocPtr fr
Boolean single_interval, Boolean fuse_joints,
Boolean merge_overlaps, Boolean add_null);
+NLM_EXTERN SeqLocPtr SeqLocMergeExEx (BioseqPtr target, SeqLocPtr to, SeqLocPtr from,
+ Boolean single_interval, Boolean fuse_joints,
+ Boolean merge_overlaps, Boolean add_null,
+ Boolean ignore_mixed);
+
NLM_EXTERN Boolean CheckSeqLocForPartial (SeqLocPtr location, BoolPtr p5ptr, BoolPtr p3ptr);
NLM_EXTERN void SetSeqLocPartial (SeqLocPtr location, Boolean partial5, Boolean partial3);
NLM_EXTERN void FreeAllFuzz (SeqLocPtr location);
@@ -308,6 +313,10 @@ NLM_EXTERN Boolean SerialNumberInString (CharPtr str);
NLM_EXTERN int LIBCALLBACK SortVnpByString (VoidPtr ptr1, VoidPtr ptr2);
NLM_EXTERN ValNodePtr UniqueValNode (ValNodePtr list);
+/* for sorting valnode list by data.intvalue */
+
+NLM_EXTERN int LIBCALLBACK SortByIntvalue (VoidPtr ptr1, VoidPtr ptr2);
+
/* keytag sorts/uniques and then owns valnode character list */
typedef struct keytag {
diff --git a/api/tofasta.c b/api/tofasta.c
index 3baa4031..b454be94 100644
--- a/api/tofasta.c
+++ b/api/tofasta.c
@@ -29,7 +29,7 @@
*
* Version Creation Date: 7/12/91
*
-* $Revision: 6.131 $
+* $Revision: 6.133 $
*
* File Description: various sequence objects to fasta output
*
@@ -39,6 +39,12 @@
* ------- ---------- -----------------------------------------------------
*
* $Log: tofasta.c,v $
+* Revision 6.133 2004/06/02 20:11:14 kans
+* In SeqEntryToFasta, if defline is >?unk100, make special local ID, recognized by tbl2asn
+*
+* Revision 6.132 2004/05/07 20:55:54 kans
+* MakeCompleteChromTitle handles organelles with multiple chromosomes, e.g., Guillardia theta nucleomorph
+*
* Revision 6.131 2004/05/03 15:32:07 kans
* BioseqFastaStream uses larger buffer for seqid because some RefSeqs use more than 41 characters for FASTA_LONG
*
@@ -2551,12 +2557,19 @@ static SeqEntryPtr FastaToSeqEntryInternalExEx
}
} else {
/* Unknown Seq-id */
- bsp->id = MakeSeqID ("lcl|gap");
- bsp->repr = Seq_repr_virtual;
ptr = defline + 1;
while (IS_WHITESP(*ptr))
ptr++;
+
+ if (StringNCmp (ptr, "unk100", 6) == 0) {
+ bsp->id = MakeSeqID ("lcl|unk100");
+ ptr += 3;
+ } else {
+ bsp->id = MakeSeqID ("lcl|gap");
+ }
+ bsp->repr = Seq_repr_virtual;
+
if(*ptr != '\0' && sscanf(ptr, "%ld", &len) == 1 && len > 0) {
bsp->length = (Int4) len;
} else {
@@ -3839,7 +3852,7 @@ static CharPtr MakeCompleteChromTitle (BioseqPtr bsp, Uint1 biomol, Uint1 comple
SubSourcePtr ssp;
CharPtr name = NULL, chr = NULL, orgnl = NULL,
seg = NULL, pls = NULL, def = NULL;
- Int2 deflen = 70; /* starts with space for all fixed text */
+ Int2 deflen = 80; /* starts with space for all fixed text */
Char ch;
Boolean plasmid;
Uint1 genome;
@@ -3957,6 +3970,16 @@ static CharPtr MakeCompleteChromTitle (BioseqPtr bsp, Uint1 biomol, Uint1 comple
StringCat (def, name);
}
if (orgnl != NULL) {
+ if (chr != NULL) {
+ StringCat (def, " ");
+ StringCat (def, orgnl);
+ StringCat (def, " chromosome ");
+ StringCat(def, chr);
+ StringCat (def, completeseq);
+ ch = *def;
+ *def = TO_UPPER (ch);
+ return def;
+ }
StringCat (def, " ");
StringCat (def, orgnl);
StringCat (def, completegen);
diff --git a/api/txalign.c b/api/txalign.c
index 21bfec8b..05dea89b 100644
--- a/api/txalign.c
+++ b/api/txalign.c
@@ -1,4 +1,4 @@
-/* $Id: txalign.c,v 6.81 2003/11/25 16:24:03 dondosha Exp $
+/* $Id: txalign.c,v 6.83 2004/05/14 16:31:03 kans Exp $
***************************************************************************
* *
* COPYRIGHT NOTICE *
@@ -27,13 +27,19 @@
*
* File Name: txalign.c
*
-* $Revision: 6.81 $
+* $Revision: 6.83 $
*
* File Description: Formating of text alignment for the BLAST output
*
* Modifications:
* --------------------------------------------------------------------------
* $Log: txalign.c,v $
+* Revision 6.83 2004/05/14 16:31:03 kans
+* ScoreAndEvalueToBuffers had a typo in OS_MAC specific code
+*
+* Revision 6.82 2004/05/14 15:38:09 dondosha
+* Made function ScoreAndEvalueToBuffers public
+*
* Revision 6.81 2003/11/25 16:24:03 dondosha
* Use query number for synchronizeCheck; do not show structure link if RID not available
*
@@ -4587,6 +4593,62 @@ Tx_PrintDefLine(BlastDefLinePtr bdsp, CharPtr buffer, Int4 length)
return TRUE;
}
+NLM_EXTERN void LIBCALL
+ScoreAndEvalueToBuffers(FloatHi bit_score, FloatHi evalue,
+ CharPtr bit_score_buf, CharPtr PNTR evalue_buf,
+ Boolean knock_off_allowed)
+{
+#ifdef OS_MAC
+ if (evalue < 1.0e-180) {
+ sprintf(*evalue_buf, "0.0");
+ } else if (evalue < 1.0e-99) {
+ sprintf(*evalue_buf, "%2.0Le", evalue);
+ if (knock_off_allowed)
+ (*evalue_buf)++; /* Knock off digit. */
+ } else if (evalue < 0.0009) {
+ sprintf(*evalue_buf, "%3.0Le", evalue);
+ } else if (evalue < 0.1) {
+ sprintf(*evalue_buf, "%4.3Lf", evalue);
+ } else if (evalue < 1.0) {
+ sprintf(*evalue_buf, "%3.2Lf", evalue);
+ } else if (evalue < 10.0) {
+ sprintf(*evalue_buf, "%2.1Lf", evalue);
+ } else {
+ sprintf(*evalue_buf, "%5.0Lf", evalue);
+ }
+ if (bit_score > 9999)
+ sprintf(bit_score_buf, "%4.3Le", bit_score);
+ else if (bit_score > 99.9)
+ sprintf(bit_score_buf, "%4.0ld", (long)bit_score);
+ else /* %4.1Lf is bad on 68K Mac, so cast to long */
+ sprintf(bit_score_buf, "%4.0ld", (long)bit_score);
+#else
+ if (evalue < 1.0e-180) {
+ sprintf(*evalue_buf, "0.0");
+ } else if (evalue < 1.0e-99) {
+ sprintf(*evalue_buf, "%2.0le", evalue);
+ if (knock_off_allowed)
+ (*evalue_buf)++; /* Knock off digit. */
+ } else if (evalue < 0.0009) {
+ sprintf(*evalue_buf, "%3.0le", evalue);
+ } else if (evalue < 0.1) {
+ sprintf(*evalue_buf, "%4.3lf", evalue);
+ } else if (evalue < 1.0) {
+ sprintf(*evalue_buf, "%3.2lf", evalue);
+ } else if (evalue < 10.0) {
+ sprintf(*evalue_buf, "%2.1lf", evalue);
+ } else {
+ sprintf(*evalue_buf, "%5.0lf", evalue);
+ }
+ if (bit_score > 9999)
+ sprintf(bit_score_buf, "%4.3le", bit_score);
+ else if (bit_score > 99.9)
+ sprintf(bit_score_buf, "%4.0ld", (long)bit_score);
+ else
+ sprintf(bit_score_buf, "%4.1lf", bit_score);
+#endif
+}
+
NLM_EXTERN Boolean LIBCALL
PrintDefLinesFromSeqAlignWithPath(SeqAlignPtr seqalign, Int4 line_length, FILE *outfp, Uint4 options,
Int4 mode, Int2Ptr marks, Int4 number_of_descriptions,
@@ -5032,91 +5094,24 @@ PrintDefLinesFromSeqAlignWithPath(SeqAlignPtr seqalign, Int4 line_length, FILE *
*(ptr + pos) = NULLB;
}
-#ifdef OS_MAC
if (txsp->found_score) {
evalue = txsp->evalue;
- eval_buff_ptr = eval_buff;
- if (evalue < 1.0e-180) {
- sprintf(eval_buff, "0.0");
- } else if (evalue < 1.0e-99) {
- sprintf(eval_buff, "%2.0Le", evalue);
- eval_buff_ptr++; /* Knock off digit. */
- } else if (evalue < 0.0009) {
- sprintf(eval_buff, "%3.0Le", evalue);
- } else if (evalue < 0.1) {
- sprintf(eval_buff, "%4.3Lf", evalue);
- } else if (evalue < 1.0) {
- sprintf(eval_buff, "%3.2Lf", evalue);
- } else if (evalue < 10.0) {
- sprintf(eval_buff, "%2.1Lf", evalue);
- } else {
- sprintf(eval_buff, "%5.0Lf", evalue);
- }
-
bit_score = txsp->bit_score;
- if (bit_score > 9999) {
- sprintf(bit_score_buff, "%4.3Le", bit_score);
- } else if (bit_score > 99.9) {
- sprintf(bit_score_buff, "%4.0ld", (long) bit_score);
- } else {
- sprintf(bit_score_buff, "%4.0ld", (long) bit_score); /* %4.0Lf is bad on 68K Mac, so cast to long */
- }
-
- if (options & TXALIGN_HTML) {
- if (gi != 0)
- sprintf(id_buffer, "%ld", (long) gi);
- else
- sprintf(id_buffer, "%s", txsp->buffer_id);
- bit_score_buff_ptr = bit_score_buff;
- if (*bit_score_buff_ptr == ' ') {
- bit_score_buff_ptr++;
- sprintf(buffer1, " <a href = #%s>%s</a>", id_buffer, bit_score_buff_ptr);
- } else {
- sprintf(buffer1, "<a href = #%s>%s</a>", id_buffer, bit_score_buff_ptr);
- }
- } else {
- sprintf(buffer1, "%s", bit_score_buff);
- }
-
-
-#else
- if (txsp->found_score) {
- evalue = txsp->evalue;
+
eval_buff_ptr = eval_buff;
- if (evalue < 1.0e-180) {
- sprintf(eval_buff, "0.0");
- } else if (evalue < 1.0e-99) {
- sprintf(eval_buff, "%2.0le", evalue);
- eval_buff_ptr++; /* Knock off digit. */
- } else if (evalue < 0.0009) {
- sprintf(eval_buff, "%3.0le", evalue);
- } else if (evalue < 0.1) {
- sprintf(eval_buff, "%4.3lf", evalue);
- } else if (evalue < 1.0) {
- sprintf(eval_buff, "%3.2lf", evalue);
- } else if (evalue < 10.0) {
- sprintf(eval_buff, "%2.1lf", evalue);
- } else {
- sprintf(eval_buff, "%2.0lf", evalue);
- }
-
- bit_score = txsp->bit_score;
- if (bit_score > 9999) {
- sprintf(bit_score_buff, "%4.3le", bit_score);
- } else if (bit_score > 99.9) {
- sprintf(bit_score_buff, "%4.0ld", (long) bit_score);
- } else {
- sprintf(bit_score_buff, "%4.0lf", bit_score);
- }
-
+ ScoreAndEvalueToBuffers(bit_score, evalue, bit_score_buff,
+ &eval_buff_ptr, TRUE);
+
if (options & TXALIGN_HTML) {
if (gi != 0)
sprintf(id_buffer, "%ld", (long) gi);
else {
- /*
- sprintf(id_buffer, "%s", txsp->buffer_id);
- */
- MuskSeqIdWrite(txsp->id, id_buffer, BUFFER_LENGTH, PRINTID_TEXTID_ACCESSION, FALSE, FALSE);
+#ifdef OS_MAC
+ sprintf(id_buffer, "%s", txsp->buffer_id);
+#else
+ MuskSeqIdWrite(txsp->id, id_buffer, BUFFER_LENGTH,
+ PRINTID_TEXTID_ACCESSION, FALSE, FALSE);
+#endif
}
bit_score_buff_ptr = bit_score_buff;
if (*bit_score_buff_ptr == ' ') {
@@ -5128,8 +5123,7 @@ PrintDefLinesFromSeqAlignWithPath(SeqAlignPtr seqalign, Int4 line_length, FILE *
} else {
sprintf(buffer1, "%s", bit_score_buff);
}
-
-#endif
+
/*adjust N position*/
strLen=StringLen(eval_buff_ptr);
extraSpace=strLen<maxEvalWidth?(maxEvalWidth-strLen):0;
@@ -5808,58 +5802,10 @@ NLM_EXTERN int LIBCALLBACK FormatScoreFunc(AlignStatOptionPtr asop)
}
ff_EndPrint();
eval_buff_ptr = eval_buff;
-#ifdef OS_MAC
- if (evalue < 1.0e-180) {
- sprintf(eval_buff, "0.0");
- } else if (evalue < 1.0e-99) {
- sprintf(eval_buff, "%2.0Le", evalue);
- eval_buff_ptr++; /* Knock off digit. */
- } else if (evalue < 0.0009) {
- sprintf(eval_buff, "%3.0Le", evalue);
- } else if (evalue < 0.1) {
- sprintf(eval_buff, "%4.3Lf", evalue);
- } else if (evalue < 1.0) {
- sprintf(eval_buff, "%3.2Lf", evalue);
- } else if (evalue < 10.0) {
- sprintf(eval_buff, "%2.1Lf", evalue);
- } else {
- sprintf(eval_buff, "%5.0Lf", evalue);
- }
-
- if (bit_score > 9999) {
- sprintf(bit_score_buff, "%4.3Le", bit_score);
- } else if (bit_score > 99.9) {
- sprintf(bit_score_buff, "%4.0ld", (long) bit_score);
- } else {
- sprintf(bit_score_buff, "%4.0ld", (long) bit_score); /* %4.1Lf is bad on 68K Mac, so cast to long */
- }
-#else
- if (evalue < 1.0e-180) {
- sprintf(eval_buff, "0.0");
- } else if (evalue < 1.0e-99) {
- sprintf(eval_buff, "%2.0le", evalue);
- eval_buff_ptr++; /* Knock off digit. */
- } else if (evalue < 0.0009) {
- sprintf(eval_buff, "%3.0le", evalue);
- } else if (evalue < 0.1) {
- sprintf(eval_buff, "%4.3lf", evalue);
- } else if (evalue < 1.0) {
- sprintf(eval_buff, "%3.2lf", evalue);
- } else if (evalue < 10.0) {
- sprintf(eval_buff, "%2.1lf", evalue);
- } else {
- sprintf(eval_buff, "%5.0lf", evalue);
- }
-
- if (bit_score > 9999) {
- sprintf(bit_score_buff, "%4.3le", bit_score);
- } else if (bit_score > 99.9) {
- sprintf(bit_score_buff, "%4.0ld", (long) bit_score);
- } else {
- sprintf(bit_score_buff, "%4.1lf", bit_score);
- }
-#endif
+ ScoreAndEvalueToBuffers(bit_score, evalue, bit_score_buff,
+ &eval_buff_ptr, TRUE);
+
if(asop->html_hot_link == TRUE && *id_buffer != NULLB) {
Int4 m_from, m_to, t_from, t_to;
diff --git a/api/txalign.h b/api/txalign.h
index 949ba262..8e9ef1c7 100644
--- a/api/txalign.h
+++ b/api/txalign.h
@@ -1,4 +1,4 @@
-/* $Id: txalign.h,v 6.16 2003/11/20 22:09:26 dondosha Exp $
+/* $Id: txalign.h,v 6.17 2004/05/14 15:38:09 dondosha Exp $
* ===========================================================================
*
* PUBLIC DOMAIN NOTICE
@@ -29,7 +29,7 @@
*
* Initial Version Creation Date: 03/13/94
*
-* $Revision: 6.16 $
+* $Revision: 6.17 $
*
* File Description:
* External include file for various alignments
@@ -38,6 +38,9 @@
*
*
* $Log: txalign.h,v $
+* Revision 6.17 2004/05/14 15:38:09 dondosha
+* Made function ScoreAndEvalueToBuffers public
+*
* Revision 6.16 2003/11/20 22:09:26 dondosha
* Added a PrindDefLinesFromSeqAlignWithPath function with an argument to provide root path for image links
*
@@ -585,6 +588,11 @@ NLM_EXTERN Boolean LIBCALL PrintDefLinesFromSeqAlignWithPath PROTO((
CharPtr www_root_path
));
+NLM_EXTERN void LIBCALL
+ScoreAndEvalueToBuffers PROTO((FloatHi bit_score, FloatHi evalue,
+ CharPtr bit_score_buf, CharPtr PNTR evalue_buf,
+ Boolean knock_off_allowed));
+
/*
Fills in the slots with score, bit_score, etc. from the SeqAlign.
*/
diff --git a/api/valid.c b/api/valid.c
index 4ae8a54d..c6e978d9 100644
--- a/api/valid.c
+++ b/api/valid.c
@@ -29,7 +29,7 @@
*
* Version Creation Date: 1/1/94
*
-* $Revision: 6.451 $
+* $Revision: 6.460 $
*
* File Description: Sequence editing utilities
*
@@ -39,6 +39,33 @@
* ------- ---------- -----------------------------------------------------
*
* $Log: valid.c,v $
+* Revision 6.460 2004/06/14 18:49:52 kans
+* TGA can be used for Selenocysteine without needing modified codon recognition exception
+*
+* Revision 6.459 2004/06/10 19:04:29 kans
+* ERR_SEQ_INST_GiWithoutAccession drops to WARNING if validator run in tbl2asnf
+*
+* Revision 6.458 2004/06/03 18:00:18 kans
+* added LookForMultiplePubs, ERR_SEQ_DESCR_CollidingPublications
+*
+* Revision 6.457 2004/05/28 19:56:50 kans
+* ifdef out section checking for length >350000
+*
+* Revision 6.456 2004/05/27 21:28:18 kans
+* FlyBase joins FLYBASE as legal capitalization for dbxref
+*
+* Revision 6.455 2004/05/24 20:17:24 kans
+* removed non-preferred variants ribosome slippage, trans splicing, alternate processing, and non-consensus splice site
+*
+* Revision 6.454 2004/05/24 17:28:26 kans
+* ERR_SEQ_INST_BadSeqIdFormat allows 2 letters + underscore + 9 digits, ValidateAccn does the same
+*
+* Revision 6.453 2004/05/12 18:55:33 kans
+* StreamCache takes SeqLocPtr as well as BioseqPtr optional arguments, slp version is equivalent of SeqPortNewByLoc
+*
+* Revision 6.452 2004/05/06 19:42:22 kans
+* new function GetValidCountryList for access to country code list, which is now NULL terminated
+*
* Revision 6.451 2004/05/03 12:20:23 kans
* use StreamCache in ValidateBioseqInst, CdTransCheck, latter also uses BSMerge into a buffer instead of many calls to BSGetByte
*
@@ -1602,6 +1629,7 @@ static void SpliceCheckEx (ValidStructPtr vsp, SeqFeatPtr sfp, Boolean check
static void CdsProductIdCheck (ValidStructPtr vsp, SeqFeatPtr sfp);
static void ValidateBioSource (ValidStructPtr vsp, GatherContextPtr gcp, BioSourcePtr biop);
static void ValidatePubdesc (ValidStructPtr vsp, GatherContextPtr gcp, PubdescPtr pdp);
+static void LookForMultiplePubs (ValidStructPtr vsp, GatherContextPtr gcp, SeqDescrPtr sdp);
static void ValidateSfpCit (ValidStructPtr vsp, GatherContextPtr gcp, SeqFeatPtr sfp);
/* alignment validator */
@@ -2127,6 +2155,7 @@ static Boolean Valid1GatherProc (GatherContextPtr gcp)
if (sdp->choice == Seq_descr_pub) {
pdp = (PubdescPtr) sdp->data.ptrvalue;
ValidatePubdesc (vsp, gcp, pdp);
+ LookForMultiplePubs (vsp, gcp, sdp);
}
if (sdp->choice == Seq_descr_mol_type) {
ValidErr (vsp, SEV_ERROR, ERR_SEQ_DESCR_InvalidForType, "MolType descriptor is obsolete");
@@ -3608,7 +3637,6 @@ static void ValidateBioseqInst (GatherContextPtr gcp)
SeqLitPtr slitp;
SeqCodeTablePtr sctp;
MolInfoPtr mip;
- Boolean litHasData;
SeqMgrDescContext context;
SeqFeatPtr cds;
GBBlockPtr gbp;
@@ -3627,7 +3655,6 @@ static void ValidateBioseqInst (GatherContextPtr gcp)
Boolean hasGi = FALSE;
SeqHistPtr hist;
Boolean isActiveFin = FALSE;
- Boolean isGenBankEMBLorDDBJ;
Boolean isPatent = FALSE;
Boolean isPDB = FALSE;
Boolean isNC = FALSE;
@@ -3824,6 +3851,7 @@ static void ValidateBioseqInst (GatherContextPtr gcp)
} else if (isNZ && numletters == 4 && numdigits == 8 && numunderscores == 0) {
} else if (numletters == 2 && numdigits == 6 && numunderscores == 1) {
} else if (numletters == 2 && numdigits == 8 && numunderscores == 1) {
+ } else if (numletters == 2 && numdigits == 9 && numunderscores == 1) {
} else {
ValidErr (vsp, SEV_ERROR, ERR_SEQ_INST_BadSeqIdFormat, "Bad accession %s", tsip->accession);
}
@@ -3878,7 +3906,12 @@ static void ValidateBioseqInst (GatherContextPtr gcp)
}
if (gi_count > 0 && accn_count == 0 && (! isPDB) && bsp->repr != Seq_repr_virtual) {
- ValidErr (vsp, SEV_ERROR, ERR_SEQ_INST_GiWithoutAccession, "No accession on sequence with gi number");
+ if (vsp->seqSubmitParent) {
+ sev = SEV_WARNING;
+ } else {
+ sev = SEV_ERROR;
+ }
+ ValidErr (vsp, sev, ERR_SEQ_INST_GiWithoutAccession, "No accession on sequence with gi number");
}
if (gi_count > 0 && accn_count > 1) {
ValidErr (vsp, SEV_ERROR, ERR_SEQ_INST_MultipleAccessions, "Multiple accessions on sequence with gi number");
@@ -4123,7 +4156,7 @@ static void ValidateBioseqInst (GatherContextPtr gcp)
termination = '\0';
break;
}
- if (! StreamCacheSetup (bsp, STREAM_EXPAND_GAPS, &sc)) {
+ if (! StreamCacheSetup (bsp, NULL, STREAM_EXPAND_GAPS, &sc)) {
ValidErr (vsp, SEV_REJECT, ERR_SEQ_INST_SeqPortFail, "Can't open StreamCache");
return;
}
@@ -4537,7 +4570,10 @@ static void ValidateBioseqInst (GatherContextPtr gcp)
}
}
+#if 0
if (bsp->length > 350000 && (! isNTorNC)) {
+ Boolean isGenBankEMBLorDDBJ;
+ Boolean litHasData;
if (bsp->repr == Seq_repr_delta) {
isGenBankEMBLorDDBJ = FALSE;
/* suppress this for data from genome annotation project */
@@ -4602,6 +4638,7 @@ static void ValidateBioseqInst (GatherContextPtr gcp)
/* No-op for now? Or generate a warning? */
}
}
+#endif
if (bsp->repr == Seq_repr_seg) {
CheckSegBspAgainstParts (vsp, gcp, bsp);
@@ -4828,6 +4865,63 @@ static void ValidateCitSub (ValidStructPtr vsp, CitSubPtr csp)
}
}
+static void LookForMultiplePubs (ValidStructPtr vsp, GatherContextPtr gcp, SeqDescrPtr sdp)
+
+{
+ Bioseq bs;
+ Boolean collision, otherpub;
+ Int4 muid, pmid;
+ SeqDescrPtr nextpub;
+ PubdescPtr pdp;
+ ValNodePtr vnp;
+
+
+ if (sdp != NULL && sdp->choice == Seq_descr_pub && sdp->extended != 0 && vsp != NULL && gcp != NULL) {
+ MemSet ((Pointer) &bs, 0, sizeof (Bioseq));
+ pdp = (PubdescPtr) sdp->data.ptrvalue;
+ if (pdp != NULL) {
+ otherpub = FALSE;
+ muid = 0;
+ pmid = 0;
+ for (vnp = pdp->pub; vnp != NULL; vnp = vnp->next) {
+ if (vnp->choice == PUB_Muid) {
+ muid = vnp->data.intvalue;
+ } else if (vnp->choice == PUB_PMid) {
+ pmid = vnp->data.intvalue;
+ } else {
+ otherpub = TRUE;
+ }
+ }
+ if (otherpub) {
+ if (muid > 0 || pmid > 0) {
+ collision = FALSE;
+ nextpub = GetNextDescriptorUnindexed (&bs, Seq_descr_pub, sdp);
+ while (nextpub != NULL) {
+ pdp = (PubdescPtr) nextpub->data.ptrvalue;
+ if (pdp != NULL) {
+ for (vnp = pdp->pub; vnp != NULL; vnp = vnp->next) {
+ if (vnp->choice == PUB_Muid) {
+ if (muid > 0 && muid == vnp->data.intvalue) {
+ collision = TRUE;
+ }
+ } else if (vnp->choice == PUB_PMid) {
+ if (pmid > 0 && pmid == vnp->data.intvalue) {
+ collision = TRUE;
+ }
+ }
+ }
+ }
+ nextpub = GetNextDescriptorUnindexed (&bs, Seq_descr_pub, nextpub);
+ }
+ if (collision) {
+ ValidErr (vsp, SEV_WARNING, ERR_SEQ_DESCR_CollidingPublications, "Multiple publications with same identifier");
+ }
+ }
+ }
+ }
+ }
+}
+
static void ValidatePubdesc (ValidStructPtr vsp, GatherContextPtr gcp, PubdescPtr pdp)
{
AuthListPtr alp;
@@ -5199,8 +5293,7 @@ static Boolean ValidateSeqFeatCommon (SeqFeatPtr sfp, BioseqValidStrPtr bvsp, Va
case SEQFEAT_CDREGION:
if (numivals > 1) {
if ((! sfp->excpt) ||
- (StringISearch (sfp->except_text, "ribosomal slippage") == NULL &&
- StringISearch (sfp->except_text, "ribosome slippage") == NULL)) {
+ (StringISearch (sfp->except_text, "ribosomal slippage") == NULL)) {
sev = SEV_ERROR;
if (is_refseq) {
sev = SEV_WARNING;
@@ -5368,7 +5461,7 @@ static void ValidateSeqFeatContext (GatherContextPtr gcp)
*
*****************************************************************************/
-static CharPtr countrycodes[] = {
+static CharPtr Nlm_valid_country_codes [] = {
"Afghanistan",
"Albania",
"Algeria",
@@ -5630,9 +5723,16 @@ static CharPtr countrycodes[] = {
"Yemen",
"Yugoslavia",
"Zambia",
- "Zimbabwe"
+ "Zimbabwe",
+ NULL
};
+NLM_EXTERN CharPtr PNTR GetValidCountryList (void)
+
+{
+ return (CharPtr PNTR) Nlm_valid_country_codes;
+}
+
static Boolean CountryIsValid (CharPtr name)
{
Int2 L, R, mid;
@@ -5648,18 +5748,18 @@ static Boolean CountryIsValid (CharPtr name)
}
L = 0;
- R = sizeof (countrycodes) / sizeof (countrycodes[0]);
+ R = sizeof (Nlm_valid_country_codes) / sizeof (Nlm_valid_country_codes[0]) - 1; /* -1 because now NULL terminated */
while (L < R) {
mid = (L + R) / 2;
- if (StringICmp (countrycodes[mid], str) < 0) {
+ if (StringICmp (Nlm_valid_country_codes[mid], str) < 0) {
L = mid + 1;
} else {
R = mid;
}
}
- if (StringICmp (countrycodes[R], str) == 0) {
+ if (StringICmp (Nlm_valid_country_codes[R], str) == 0) {
return TRUE;
}
@@ -6429,8 +6529,10 @@ static Boolean FlybaseDbxrefs (ValNodePtr vnp)
while (vnp != NULL) {
dbt = (DbtagPtr) vnp->data.ptrvalue;
- if (dbt != NULL && StringICmp (dbt->db, "FLYBASE") == 0) {
- return TRUE;
+ if (dbt != NULL) {
+ if (StringICmp (dbt->db, "FLYBASE") == 0 || StringICmp (dbt->db, "FlyBase") == 0) {
+ return TRUE;
+ }
}
vnp = vnp->next;
}
@@ -6549,13 +6651,11 @@ static Boolean IsNCorNT (SeqEntryPtr sep, SeqLocPtr location)
static Boolean NotPeptideException (SeqFeatPtr sfp, SeqFeatPtr last)
{
if (sfp != NULL && sfp->excpt) {
- if (StringISearch (sfp->except_text, "alternative processing") != NULL ||
- StringISearch (sfp->except_text, "alternate processing") != NULL)
+ if (StringISearch (sfp->except_text, "alternative processing") != NULL)
return FALSE;
}
if (last != NULL && last->excpt) {
- if (StringISearch (last->except_text, "alternative processing") != NULL ||
- StringISearch (last->except_text, "alternate processing") != NULL)
+ if (StringISearch (last->except_text, "alternative processing") != NULL)
return FALSE;
}
return TRUE;
@@ -7887,7 +7987,11 @@ static void CheckTrnaCodons (ValidStructPtr vsp, GatherContextPtr gcp, SeqFeatPt
if (aa == 'U') {
sev = SEV_WARNING;
}
- if (StringISearch (sfp->except_text, "modified codon recognition") == NULL) {
+ if (aa == 'U' && taa == '*' && trp->codon [j] == 14) {
+ /* selenocysteine normally uses TGA (14), so ignore without requiring exception in record */
+ /* TAG (11) is used for pyrrolysine in archaebacteria */
+ /* TAA (10) is not yet known to be used for an exceptional amino acid */
+ } else if (StringISearch (sfp->except_text, "modified codon recognition") == NULL) {
ValidErr (vsp, sev, ERR_SEQ_FEAT_TrnaCodonWrong, "tRNA codon does not match genetic code");
}
}
@@ -8253,8 +8357,7 @@ static void CheckForBadMRNAOverlap (ValidStructPtr vsp, SeqFeatPtr sfp)
}
mrna = SeqMgrGetOverlappingFeature (sfp->location, FEATDEF_mRNA, NULL, 0, NULL, LOCATION_SUBSET, &fcontext);
if (mrna != NULL) {
- if (StringISearch (sfp->except_text, "ribosomal slippage") == NULL &&
- StringISearch (sfp->except_text, "ribosome slippage") == NULL) {
+ if (StringISearch (sfp->except_text, "ribosomal slippage") == NULL) {
ValidErr (vsp, sev, ERR_SEQ_FEAT_CDSmRNArange, "mRNA contains CDS but internal intron-exon boundaries do not match");
}
} else {
@@ -8381,13 +8484,9 @@ static CharPtr legal_exception_strings [] = {
"RNA editing",
"reasons given in citation",
"ribosomal slippage",
- "ribosome slippage",
- "trans splicing",
"trans-splicing",
"alternative processing",
- "alternate processing",
"artificial frameshift",
- "non-consensus splice site",
"nonconsensus splice site",
"rearrangement required for product",
"modified codon recognition",
@@ -9789,7 +9888,7 @@ NLM_EXTERN void CdTransCheck (ValidStructPtr vsp, SeqFeatPtr sfp)
len--;
}
- if (! StreamCacheSetup (prot1seq, STREAM_EXPAND_GAPS, &sc)) {
+ if (! StreamCacheSetup (prot1seq, NULL, STREAM_EXPAND_GAPS, &sc)) {
goto erret;
}
/*
@@ -9972,10 +10071,8 @@ static void SpliceCheckEx (ValidStructPtr vsp, SeqFeatPtr sfp, Boolean checkAll)
/* specific biological exceptions suppress check */
if (sfp->excpt) {
- if (StringISearch (sfp->except_text, "ribosomal slippage") != NULL ||
- StringISearch (sfp->except_text, "ribosome slippage") != NULL ||
+ if (StringISearch (sfp->except_text, "ribosomal slippage") != NULL||
StringISearch (sfp->except_text, "artificial frameshift") != NULL ||
- StringISearch (sfp->except_text, "non-consensus splice site") != NULL ||
StringISearch (sfp->except_text, "nonconsensus splice site") != NULL) return;
}
@@ -10404,7 +10501,7 @@ NLM_EXTERN void ValidateSeqLoc (ValidStructPtr vsp, SeqLocPtr slp, CharPtr prefi
if (exception) {
/* trans splicing exception turns off both mixed_strand and out_of_order messages */
- if (StringISearch (sfp->except_text, "trans splicing") != NULL || StringISearch (sfp->except_text, "trans-splicing") != NULL) {
+ if (StringISearch (sfp->except_text, "trans-splicing") != NULL) {
return;
}
}
diff --git a/api/valid.h b/api/valid.h
index b15a5828..bf698fa6 100644
--- a/api/valid.h
+++ b/api/valid.h
@@ -29,7 +29,7 @@
*
* Version Creation Date: 1/1/94
*
-* $Revision: 6.12 $
+* $Revision: 6.13 $
*
* File Description: Sequence editing utilities
*
@@ -39,6 +39,9 @@
* ------- ---------- -----------------------------------------------------
*
* $Log: valid.h,v $
+* Revision 6.13 2004/05/06 19:42:22 kans
+* new function GetValidCountryList for access to country code list, which is now NULL terminated
+*
* Revision 6.12 2003/12/02 15:37:37 kans
* added vsp->seqSubmitParent for use by tbl2asn, which usually has a Seq-submit wrapper that is added on-the-fly and not indexed
*
@@ -187,6 +190,8 @@ NLM_EXTERN void ReportNonAscii PROTO((ValidStructPtr vsp, SeqEntryPtr sep));
NLM_EXTERN void SpellCallBack (char * str);
NLM_EXTERN Boolean IsNuclAcc (CharPtr name);
+NLM_EXTERN CharPtr PNTR GetValidCountryList (void);
+
#ifdef __cplusplus
}
#endif
diff --git a/api/valid.msg b/api/valid.msg
index 09281997..9ce8f22f 100644
--- a/api/valid.msg
+++ b/api/valid.msg
@@ -273,6 +273,10 @@ $^ UnwantedCompleteFlag, 26
The Mol-info.completeness flag should not be set on a genomic sequence unless
the title also says it is a complete sequence or complete genome.
+$^ CollidingPublications, 27
+Multiple publication descriptors with the same PMID or MUID apply to a Bioseq.
+The lower-level ones are redundant, and should be removed.
+
$$ GENERIC, 3
$^ NonAsciiAsn, 1
diff --git a/api/validerr.h b/api/validerr.h
index 4350f8ff..6f1e73a5 100644
--- a/api/validerr.h
+++ b/api/validerr.h
@@ -74,6 +74,7 @@
#define ERR_SEQ_DESCR_UnnecessaryBioSourceFocus 2,24
#define ERR_SEQ_DESCR_RefGeneTrackingWithoutStatus 2,25
#define ERR_SEQ_DESCR_UnwantedCompleteFlag 2,26
+#define ERR_SEQ_DESCR_CollidingPublications 2,27
#define ERR_GENERIC 3,0
#define ERR_GENERIC_NonAsciiAsn 3,1
#define ERR_GENERIC_Spell 3,2
diff --git a/asn/asn.all b/asn/asn.all
index ba9e64a4..14bb5ffe 100644
--- a/asn/asn.all
+++ b/asn/asn.all
@@ -738,7 +738,7 @@ Seq-loc-equiv ::= SET OF Seq-loc -- for a set of equivalent locations
END
---$Revision: 6.7 $
+--$Revision: 6.8 $
--**********************************************************************
--
-- NCBI Sequence elements
@@ -750,8 +750,8 @@ END
NCBI-Sequence DEFINITIONS ::=
BEGIN
-EXPORTS Bioseq, Seq-annot, Pubdesc, Seq-descr, Seqdesc, Numbering, Heterogen,
- Seq-hist, GIBB-mol, Seq-literal;
+EXPORTS Annotdesc, Bioseq, GIBB-mol, Heterogen, Numbering, Pubdesc,
+ Seq-annot, Seq-descr, Seq-hist, Seq-literal, Seqdesc;
IMPORTS Date, Int-fuzz, Dbtag, Object-id, User-object FROM NCBI-General
Seq-align FROM NCBI-Seqalign
@@ -3046,124 +3046,102 @@ GBSeq ::= SEQUENCE {
END
---$Revision: 1.4 $
+--$Revision: 1.5 $
--************************************************************************
--
-- ASN.1 and XML for the components of a GenBank/EMBL/DDBJ sequence record
-- The International Nucleotide Sequence Database (INSD) collaboration
--- Version 1.1, 9 Apr 2004
+-- Version 1.3, 1 June 2004
--
--************************************************************************
INSD-INSDSeq DEFINITIONS ::=
BEGIN
---********
-- INSDSeq provides the elements of a sequence as presented in the
-- GenBank/EMBL/DDBJ-style flatfile formats, with a small amount of
--- additional structure, plus support for protein flatfile format
--- (ie, GenPept) as displayed in the Entrez application.
+-- additional structure.
-- Although this single perspective of the three flatfile formats
-- provides a useful simplification, it hides to some extent the
-- details of the actual data underlying those formats. Nevertheless,
--- the ASN.1 and XML versions of INSD-Seq are being provided with
+-- the XML version of INSD-Seq is being provided with
-- the hopes that it will prove useful to those who bulk-process
--- sequence data at the flatfile-format level of detail. Because
--- INSD-Seq is a compromise, a number of pragmatic decisions have
+-- sequence data at the flatfile-format level of detail. Further
+-- documentation regarding the content and conventions of those formats
+-- can be found at:
+--
+-- URLs for the DDBJ, EMBL, and GenBank Feature Table Document:
+-- http://www.ddbj.nig.ac.jp/FT/full_index.html
+-- http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html
+-- http://www.ncbi.nlm.nih.gov/projects/collab/FT/index.html
+--
+-- URLs for DDBJ, EMBL, and GenBank Release Notes :
+-- http://www.ddbj.nig.ac.jp/ddbjnew/ddbj_relnote.html
+-- http://www.ebi.ac.uk/embl/Documentation/Release_notes/current/relnotes.html
+-- ftp://ftp.ncbi.nih.gov/genbank/gbrel.txt
+--
+-- Because INSDSeq is a compromise, a number of pragmatic decisions have
-- been made:
--
-- In pursuit of simplicity and familiarity a number of fields do not
-- have full substructure defined here where there is already a
-- standard flatfile format string. For example:
--
--- Date DD-Mon-YYYY
--- Authors LastName, Intials (with periods)
--- Journal JounalName Volume (issue), page-range (year)
--- FeatureLocations as per flatfile feature table, but FeatureIntervals
--- may also be provided as a convenience
--- FeatureQualifiers as per flatfile feature table
--- Primary has a string that represents a table to construct
+-- Dates: DD-MON-YYYY (eg 10-JUN-2003)
+--
+-- Author: LastName, Initials (eg Smith, J.N.)
+-- or Lastname Initials (eg Smith J.N.)
+--
+-- Journal: JournalName Volume (issue), page-range (year)
+-- or JournalName Volume(issue):page-range(year)
+-- eg Appl. Environ. Microbiol. 61 (4), 1646-1648 (1995)
+-- Appl. Environ. Microbiol. 61(4):1646-1648(1995).
+--
+-- FeatureLocations are representated as in the flatfile feature table,
+-- but FeatureIntervals may also be provided as a convenience
+--
+-- FeatureQualifiers are represented as in the flatfile feature table.
+--
+-- Primary has a string that represents a table to construct
-- a third party (TPA) sequence.
--- other-seqids can have strings with the "vertical bar format" sequence
--- identifiers used in BLAST for example, when they are non-genbank types.
--- Currently in flatfile format you only see GI, but there are others, like
--- patents, submitter clone names, etc which will appear here, as they
--- always have in the ASN.1 format, and full XML format.
--- source-db is a formatted text block for peptides in GenPept format that
--- carries information from the source protein database.
+--
+-- other-seqids can have strings with the "vertical bar format" sequence
+-- identifiers used in BLAST for example, when they are non-INSD types.
+--
+-- Currently in flatfile format you only see Accession numbers, but there
+-- are others, like patents, submitter clone names, etc which will
+-- appear here
--
-- There are also a number of elements that could have been more exactly
-- specified, but in the interest of simplicity have been simply left as
-- optional. For example:
--
--- accession and accession.version are always required for publicly
--- accessible sequence records. However, they are optional in INSDSeq
--- so that this format can also be used for non-public sequence data,
--- prior to the assignment of accessions and version numbers. In such
--- cases, records will have only "other-seqids".
+-- All publicly accessible sequence records in INSDSeq format will
+-- include accession and accession.version. However, these elements are
+-- optional in optional in INSDSeq so that this format can also be used
+-- for non-public sequence data, prior to the assignment of accessions and
+-- version numbers. In such cases, records will have only "other-seqids".
--
-- sequences will normally all have "sequence" filled in. But contig records
-- will have a "join" statement in the "contig" slot, and no "sequence".
-- We also may consider a retrieval option with no sequence of any kind
-- and no feature table to quickly check minimal values.
--
--- a reference may have an author list, or be from a consortium, or both.
---
--- some fields, such as taxonomy, do appear as separate elements in flatfile
--- format but without a specific linetype (in GenBank format this comes
--- under ORGANISM). Another example is the separation of primary accession
--- from the list of secondary accessions. In flatfile format primary
--- accession is just the first one on the list that includes all secondaries
--- after it.
---
--- create-date deserves special comment. The date you see on the right hand
--- side of the LOCUS line in GenBank format is actually the last date the
--- the record was modified (or the update-date). The date the record was
--- first submitted to the databases appears in the first submission citation
--- in the reference section. Internally in the databases and ASN.1 NCBI keeps
--- the first date the record was released into the sequence database at
--- NCBI as create-date. For records from EMBL, which supports create-date,
--- it is the date provided by EMBL. For DDBJ records, which do not supply
--- a create-date (same as GenBank format) the create-date is the first date
--- NCBI saw the record from DDBJ. For older GenBank records, before NCBI
--- took responsibility for GenBank, it is just the first date NCBI saw the
--- record. Create-date can be very useful, so we expose it here, but users
--- must understand it is only an approximation and comes from many sources,
--- and with many exceptions and caveats. It does NOT tell you the first
--- date the public might have seen this record and thus is NOT an accurate
--- measure for legal issues of precedence.
---
--- Four elements are specific to records originating at the EMBL
+-- Four (optional) elements are specific to records represented via the EMBL
-- sequence database: INSDSeq_update-release, INSDSeq_create-release,
-- INSDSeq_entry-version, and INSDSeq_database-reference.
--
--- Two elements are specific to records originating at the GenBank
--- and DDBJ sequence databases: INSDSeq_keywords, and INSDSeq_segment.
+-- One (optional) element is specific to records originating at the GenBank
+-- and DDBJ sequence databases: INSDSeq_segment.
--
--********
INSDSeq ::= SEQUENCE {
locus VisibleString ,
length INTEGER ,
- strandedness INTEGER {
- not-set (0) ,
- single-stranded (1) ,
- double-stranded (2) ,
- mixed-stranded (3) } DEFAULT not-set ,
- moltype INTEGER {
- nucleic-acid (0) ,
- dna (1) ,
- rna (2) ,
- trna (3) ,
- rrna (4) ,
- mrna (5) ,
- urna (6) ,
- snrna (7) ,
- snorna (8) ,
- peptide (9) } DEFAULT nucleic-acid ,
- topology INTEGER {
- linear (1) ,
- circular (2) } DEFAULT linear ,
+ strandedness VisibleString OPTIONAL ,
+ moltype VisibleString ,
+ topology VisibleString OPTIONAL ,
division VisibleString ,
update-date VisibleString ,
create-date VisibleString ,
diff --git a/asn/insdseq.asn b/asn/insdseq.asn
index be97634b..e06968be 100644
--- a/asn/insdseq.asn
+++ b/asn/insdseq.asn
@@ -1,121 +1,99 @@
---$Revision: 1.4 $
+--$Revision: 1.5 $
--************************************************************************
--
-- ASN.1 and XML for the components of a GenBank/EMBL/DDBJ sequence record
-- The International Nucleotide Sequence Database (INSD) collaboration
--- Version 1.1, 9 Apr 2004
+-- Version 1.3, 1 June 2004
--
--************************************************************************
INSD-INSDSeq DEFINITIONS ::=
BEGIN
---********
-- INSDSeq provides the elements of a sequence as presented in the
-- GenBank/EMBL/DDBJ-style flatfile formats, with a small amount of
--- additional structure, plus support for protein flatfile format
--- (ie, GenPept) as displayed in the Entrez application.
+-- additional structure.
-- Although this single perspective of the three flatfile formats
-- provides a useful simplification, it hides to some extent the
-- details of the actual data underlying those formats. Nevertheless,
--- the ASN.1 and XML versions of INSD-Seq are being provided with
+-- the XML version of INSD-Seq is being provided with
-- the hopes that it will prove useful to those who bulk-process
--- sequence data at the flatfile-format level of detail. Because
--- INSD-Seq is a compromise, a number of pragmatic decisions have
+-- sequence data at the flatfile-format level of detail. Further
+-- documentation regarding the content and conventions of those formats
+-- can be found at:
+--
+-- URLs for the DDBJ, EMBL, and GenBank Feature Table Document:
+-- http://www.ddbj.nig.ac.jp/FT/full_index.html
+-- http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html
+-- http://www.ncbi.nlm.nih.gov/projects/collab/FT/index.html
+--
+-- URLs for DDBJ, EMBL, and GenBank Release Notes :
+-- http://www.ddbj.nig.ac.jp/ddbjnew/ddbj_relnote.html
+-- http://www.ebi.ac.uk/embl/Documentation/Release_notes/current/relnotes.html
+-- ftp://ftp.ncbi.nih.gov/genbank/gbrel.txt
+--
+-- Because INSDSeq is a compromise, a number of pragmatic decisions have
-- been made:
--
-- In pursuit of simplicity and familiarity a number of fields do not
-- have full substructure defined here where there is already a
-- standard flatfile format string. For example:
--
--- Date DD-Mon-YYYY
--- Authors LastName, Intials (with periods)
--- Journal JounalName Volume (issue), page-range (year)
--- FeatureLocations as per flatfile feature table, but FeatureIntervals
--- may also be provided as a convenience
--- FeatureQualifiers as per flatfile feature table
--- Primary has a string that represents a table to construct
+-- Dates: DD-MON-YYYY (eg 10-JUN-2003)
+--
+-- Author: LastName, Initials (eg Smith, J.N.)
+-- or Lastname Initials (eg Smith J.N.)
+--
+-- Journal: JournalName Volume (issue), page-range (year)
+-- or JournalName Volume(issue):page-range(year)
+-- eg Appl. Environ. Microbiol. 61 (4), 1646-1648 (1995)
+-- Appl. Environ. Microbiol. 61(4):1646-1648(1995).
+--
+-- FeatureLocations are representated as in the flatfile feature table,
+-- but FeatureIntervals may also be provided as a convenience
+--
+-- FeatureQualifiers are represented as in the flatfile feature table.
+--
+-- Primary has a string that represents a table to construct
-- a third party (TPA) sequence.
--- other-seqids can have strings with the "vertical bar format" sequence
--- identifiers used in BLAST for example, when they are non-genbank types.
--- Currently in flatfile format you only see GI, but there are others, like
--- patents, submitter clone names, etc which will appear here, as they
--- always have in the ASN.1 format, and full XML format.
--- source-db is a formatted text block for peptides in GenPept format that
--- carries information from the source protein database.
+--
+-- other-seqids can have strings with the "vertical bar format" sequence
+-- identifiers used in BLAST for example, when they are non-INSD types.
+--
+-- Currently in flatfile format you only see Accession numbers, but there
+-- are others, like patents, submitter clone names, etc which will
+-- appear here
--
-- There are also a number of elements that could have been more exactly
-- specified, but in the interest of simplicity have been simply left as
-- optional. For example:
--
--- accession and accession.version are always required for publicly
--- accessible sequence records. However, they are optional in INSDSeq
--- so that this format can also be used for non-public sequence data,
--- prior to the assignment of accessions and version numbers. In such
--- cases, records will have only "other-seqids".
+-- All publicly accessible sequence records in INSDSeq format will
+-- include accession and accession.version. However, these elements are
+-- optional in optional in INSDSeq so that this format can also be used
+-- for non-public sequence data, prior to the assignment of accessions and
+-- version numbers. In such cases, records will have only "other-seqids".
--
-- sequences will normally all have "sequence" filled in. But contig records
-- will have a "join" statement in the "contig" slot, and no "sequence".
-- We also may consider a retrieval option with no sequence of any kind
-- and no feature table to quickly check minimal values.
--
--- a reference may have an author list, or be from a consortium, or both.
---
--- some fields, such as taxonomy, do appear as separate elements in flatfile
--- format but without a specific linetype (in GenBank format this comes
--- under ORGANISM). Another example is the separation of primary accession
--- from the list of secondary accessions. In flatfile format primary
--- accession is just the first one on the list that includes all secondaries
--- after it.
---
--- create-date deserves special comment. The date you see on the right hand
--- side of the LOCUS line in GenBank format is actually the last date the
--- the record was modified (or the update-date). The date the record was
--- first submitted to the databases appears in the first submission citation
--- in the reference section. Internally in the databases and ASN.1 NCBI keeps
--- the first date the record was released into the sequence database at
--- NCBI as create-date. For records from EMBL, which supports create-date,
--- it is the date provided by EMBL. For DDBJ records, which do not supply
--- a create-date (same as GenBank format) the create-date is the first date
--- NCBI saw the record from DDBJ. For older GenBank records, before NCBI
--- took responsibility for GenBank, it is just the first date NCBI saw the
--- record. Create-date can be very useful, so we expose it here, but users
--- must understand it is only an approximation and comes from many sources,
--- and with many exceptions and caveats. It does NOT tell you the first
--- date the public might have seen this record and thus is NOT an accurate
--- measure for legal issues of precedence.
---
--- Four elements are specific to records originating at the EMBL
+-- Four (optional) elements are specific to records represented via the EMBL
-- sequence database: INSDSeq_update-release, INSDSeq_create-release,
-- INSDSeq_entry-version, and INSDSeq_database-reference.
--
--- Two elements are specific to records originating at the GenBank
--- and DDBJ sequence databases: INSDSeq_keywords, and INSDSeq_segment.
+-- One (optional) element is specific to records originating at the GenBank
+-- and DDBJ sequence databases: INSDSeq_segment.
--
--********
INSDSeq ::= SEQUENCE {
locus VisibleString ,
length INTEGER ,
- strandedness INTEGER {
- not-set (0) ,
- single-stranded (1) ,
- double-stranded (2) ,
- mixed-stranded (3) } DEFAULT not-set ,
- moltype INTEGER {
- nucleic-acid (0) ,
- dna (1) ,
- rna (2) ,
- trna (3) ,
- rrna (4) ,
- mrna (5) ,
- urna (6) ,
- snrna (7) ,
- snorna (8) ,
- peptide (9) } DEFAULT nucleic-acid ,
- topology INTEGER {
- linear (1) ,
- circular (2) } DEFAULT linear ,
+ strandedness VisibleString OPTIONAL ,
+ moltype VisibleString ,
+ topology VisibleString OPTIONAL ,
division VisibleString ,
update-date VisibleString ,
create-date VisibleString ,
diff --git a/asn/seq.asn b/asn/seq.asn
index 8e0eed68..2c2aa6df 100644
--- a/asn/seq.asn
+++ b/asn/seq.asn
@@ -1,4 +1,4 @@
---$Revision: 6.7 $
+--$Revision: 6.8 $
--**********************************************************************
--
-- NCBI Sequence elements
@@ -10,8 +10,8 @@
NCBI-Sequence DEFINITIONS ::=
BEGIN
-EXPORTS Bioseq, Seq-annot, Pubdesc, Seq-descr, Seqdesc, Numbering, Heterogen,
- Seq-hist, GIBB-mol, Seq-literal;
+EXPORTS Annotdesc, Bioseq, GIBB-mol, Heterogen, Numbering, Pubdesc,
+ Seq-annot, Seq-descr, Seq-hist, Seq-literal, Seqdesc;
IMPORTS Date, Int-fuzz, Dbtag, Object-id, User-object FROM NCBI-General
Seq-align FROM NCBI-Seqalign
diff --git a/asnstat/all.h b/asnstat/all.h
index 7cbc7cb9..2d7f6d39 100644
--- a/asnstat/all.h
+++ b/asnstat/all.h
@@ -10,7 +10,7 @@
#endif
static char * asnfilename = "all.h60";
-static AsnValxNode avnx[607] = {
+static AsnValxNode avnx[588] = {
{20,"unk" ,0,0.0,&avnx[1] } ,
{20,"gt" ,1,0.0,&avnx[2] } ,
{20,"lt" ,2,0.0,&avnx[3] } ,
@@ -89,398 +89,398 @@ static AsnValxNode avnx[607] = {
{20,"both" ,3,0.0,&avnx[76] } ,
{20,"both-rev" ,4,0.0,&avnx[77] } ,
{20,"other" ,255,0.0,NULL } ,
- {20,"unknown" ,0,0.0,&avnx[79] } ,
- {20,"genomic" ,1,0.0,&avnx[80] } ,
- {20,"pre-mRNA" ,2,0.0,&avnx[81] } ,
- {20,"mRNA" ,3,0.0,&avnx[82] } ,
- {20,"rRNA" ,4,0.0,&avnx[83] } ,
- {20,"tRNA" ,5,0.0,&avnx[84] } ,
- {20,"snRNA" ,6,0.0,&avnx[85] } ,
- {20,"scRNA" ,7,0.0,&avnx[86] } ,
- {20,"peptide" ,8,0.0,&avnx[87] } ,
- {20,"other-genetic" ,9,0.0,&avnx[88] } ,
- {20,"genomic-mRNA" ,10,0.0,&avnx[89] } ,
- {20,"other" ,255,0.0,NULL } ,
- {20,"dna" ,0,0.0,&avnx[91] } ,
- {20,"rna" ,1,0.0,&avnx[92] } ,
- {20,"extrachrom" ,2,0.0,&avnx[93] } ,
- {20,"plasmid" ,3,0.0,&avnx[94] } ,
- {20,"mitochondrial" ,4,0.0,&avnx[95] } ,
- {20,"chloroplast" ,5,0.0,&avnx[96] } ,
- {20,"kinetoplast" ,6,0.0,&avnx[97] } ,
- {20,"cyanelle" ,7,0.0,&avnx[98] } ,
- {20,"synthetic" ,8,0.0,&avnx[99] } ,
- {20,"recombinant" ,9,0.0,&avnx[100] } ,
- {20,"partial" ,10,0.0,&avnx[101] } ,
- {20,"complete" ,11,0.0,&avnx[102] } ,
- {20,"mutagen" ,12,0.0,&avnx[103] } ,
- {20,"natmut" ,13,0.0,&avnx[104] } ,
- {20,"transposon" ,14,0.0,&avnx[105] } ,
- {20,"insertion-seq" ,15,0.0,&avnx[106] } ,
- {20,"no-left" ,16,0.0,&avnx[107] } ,
- {20,"no-right" ,17,0.0,&avnx[108] } ,
- {20,"macronuclear" ,18,0.0,&avnx[109] } ,
- {20,"proviral" ,19,0.0,&avnx[110] } ,
- {20,"est" ,20,0.0,&avnx[111] } ,
- {20,"sts" ,21,0.0,&avnx[112] } ,
- {20,"survey" ,22,0.0,&avnx[113] } ,
- {20,"chromoplast" ,23,0.0,&avnx[114] } ,
- {20,"genemap" ,24,0.0,&avnx[115] } ,
- {20,"restmap" ,25,0.0,&avnx[116] } ,
- {20,"physmap" ,26,0.0,&avnx[117] } ,
- {20,"other" ,255,0.0,NULL } ,
- {20,"concept-trans" ,1,0.0,&avnx[119] } ,
- {20,"seq-pept" ,2,0.0,&avnx[120] } ,
- {20,"both" ,3,0.0,&avnx[121] } ,
- {20,"seq-pept-overlap" ,4,0.0,&avnx[122] } ,
- {20,"seq-pept-homol" ,5,0.0,&avnx[123] } ,
- {20,"concept-trans-a" ,6,0.0,&avnx[124] } ,
- {20,"other" ,255,0.0,NULL } ,
- {20,"other" ,0,0.0,&avnx[126] } ,
- {20,"family" ,1,0.0,&avnx[127] } ,
- {20,"order" ,2,0.0,&avnx[128] } ,
- {20,"class" ,3,0.0,NULL } ,
- {20,"strain" ,2,0.0,&avnx[130] } ,
- {20,"substrain" ,3,0.0,&avnx[131] } ,
- {20,"type" ,4,0.0,&avnx[132] } ,
- {20,"subtype" ,5,0.0,&avnx[133] } ,
- {20,"variety" ,6,0.0,&avnx[134] } ,
- {20,"serotype" ,7,0.0,&avnx[135] } ,
- {20,"serogroup" ,8,0.0,&avnx[136] } ,
- {20,"serovar" ,9,0.0,&avnx[137] } ,
- {20,"cultivar" ,10,0.0,&avnx[138] } ,
- {20,"pathovar" ,11,0.0,&avnx[139] } ,
- {20,"chemovar" ,12,0.0,&avnx[140] } ,
- {20,"biovar" ,13,0.0,&avnx[141] } ,
- {20,"biotype" ,14,0.0,&avnx[142] } ,
- {20,"group" ,15,0.0,&avnx[143] } ,
- {20,"subgroup" ,16,0.0,&avnx[144] } ,
- {20,"isolate" ,17,0.0,&avnx[145] } ,
- {20,"common" ,18,0.0,&avnx[146] } ,
- {20,"acronym" ,19,0.0,&avnx[147] } ,
- {20,"dosage" ,20,0.0,&avnx[148] } ,
- {20,"nat-host" ,21,0.0,&avnx[149] } ,
- {20,"sub-species" ,22,0.0,&avnx[150] } ,
- {20,"specimen-voucher" ,23,0.0,&avnx[151] } ,
- {20,"authority" ,24,0.0,&avnx[152] } ,
- {20,"forma" ,25,0.0,&avnx[153] } ,
- {20,"forma-specialis" ,26,0.0,&avnx[154] } ,
- {20,"ecotype" ,27,0.0,&avnx[155] } ,
- {20,"synonym" ,28,0.0,&avnx[156] } ,
- {20,"anamorph" ,29,0.0,&avnx[157] } ,
- {20,"teleomorph" ,30,0.0,&avnx[158] } ,
- {20,"breed" ,31,0.0,&avnx[159] } ,
- {20,"gb-acronym" ,32,0.0,&avnx[160] } ,
- {20,"gb-anamorph" ,33,0.0,&avnx[161] } ,
- {20,"gb-synonym" ,34,0.0,&avnx[162] } ,
- {20,"old-lineage" ,253,0.0,&avnx[163] } ,
- {20,"old-name" ,254,0.0,&avnx[164] } ,
- {20,"other" ,255,0.0,NULL } ,
{3,NULL,1,0.0,NULL } ,
{2,NULL,0,0.0,NULL } ,
{2,NULL,1,0.0,NULL } ,
- {20,"not-set" ,0,0.0,&avnx[169] } ,
- {20,"sources" ,1,0.0,&avnx[170] } ,
+ {20,"not-set" ,0,0.0,&avnx[82] } ,
+ {20,"sources" ,1,0.0,&avnx[83] } ,
{20,"aligns" ,2,0.0,NULL } ,
- {20,"not-set" ,0,0.0,&avnx[172] } ,
- {20,"global" ,1,0.0,&avnx[173] } ,
- {20,"diags" ,2,0.0,&avnx[174] } ,
- {20,"partial" ,3,0.0,&avnx[175] } ,
- {20,"disc" ,4,0.0,&avnx[176] } ,
+ {20,"not-set" ,0,0.0,&avnx[85] } ,
+ {20,"global" ,1,0.0,&avnx[86] } ,
+ {20,"diags" ,2,0.0,&avnx[87] } ,
+ {20,"partial" ,3,0.0,&avnx[88] } ,
+ {20,"disc" ,4,0.0,&avnx[89] } ,
{20,"other" ,255,0.0,NULL } ,
{3,NULL,2,0.0,NULL } ,
{3,NULL,2,0.0,NULL } ,
{3,NULL,2,0.0,NULL } ,
{3,NULL,2,0.0,NULL } ,
- {20,"seq" ,0,0.0,&avnx[182] } ,
- {20,"sites" ,1,0.0,&avnx[183] } ,
- {20,"feats" ,2,0.0,&avnx[184] } ,
+ {20,"seq" ,0,0.0,&avnx[95] } ,
+ {20,"sites" ,1,0.0,&avnx[96] } ,
+ {20,"feats" ,2,0.0,&avnx[97] } ,
{20,"no-target" ,3,0.0,NULL } ,
{3,NULL,0,0.0,NULL } ,
- {20,"not-set" ,0,0.0,&avnx[187] } ,
- {20,"standard" ,1,0.0,&avnx[188] } ,
- {20,"prelim" ,2,0.0,&avnx[189] } ,
+ {20,"ref" ,1,0.0,&avnx[100] } ,
+ {20,"alt" ,2,0.0,&avnx[101] } ,
+ {20,"blocks" ,3,0.0,&avnx[102] } ,
+ {20,"other" ,255,0.0,NULL } ,
+ {20,"unknown" ,0,0.0,&avnx[104] } ,
+ {20,"genomic" ,1,0.0,&avnx[105] } ,
+ {20,"pre-mRNA" ,2,0.0,&avnx[106] } ,
+ {20,"mRNA" ,3,0.0,&avnx[107] } ,
+ {20,"rRNA" ,4,0.0,&avnx[108] } ,
+ {20,"tRNA" ,5,0.0,&avnx[109] } ,
+ {20,"snRNA" ,6,0.0,&avnx[110] } ,
+ {20,"scRNA" ,7,0.0,&avnx[111] } ,
+ {20,"peptide" ,8,0.0,&avnx[112] } ,
+ {20,"other-genetic" ,9,0.0,&avnx[113] } ,
+ {20,"genomic-mRNA" ,10,0.0,&avnx[114] } ,
+ {20,"other" ,255,0.0,NULL } ,
+ {20,"dna" ,0,0.0,&avnx[116] } ,
+ {20,"rna" ,1,0.0,&avnx[117] } ,
+ {20,"extrachrom" ,2,0.0,&avnx[118] } ,
+ {20,"plasmid" ,3,0.0,&avnx[119] } ,
+ {20,"mitochondrial" ,4,0.0,&avnx[120] } ,
+ {20,"chloroplast" ,5,0.0,&avnx[121] } ,
+ {20,"kinetoplast" ,6,0.0,&avnx[122] } ,
+ {20,"cyanelle" ,7,0.0,&avnx[123] } ,
+ {20,"synthetic" ,8,0.0,&avnx[124] } ,
+ {20,"recombinant" ,9,0.0,&avnx[125] } ,
+ {20,"partial" ,10,0.0,&avnx[126] } ,
+ {20,"complete" ,11,0.0,&avnx[127] } ,
+ {20,"mutagen" ,12,0.0,&avnx[128] } ,
+ {20,"natmut" ,13,0.0,&avnx[129] } ,
+ {20,"transposon" ,14,0.0,&avnx[130] } ,
+ {20,"insertion-seq" ,15,0.0,&avnx[131] } ,
+ {20,"no-left" ,16,0.0,&avnx[132] } ,
+ {20,"no-right" ,17,0.0,&avnx[133] } ,
+ {20,"macronuclear" ,18,0.0,&avnx[134] } ,
+ {20,"proviral" ,19,0.0,&avnx[135] } ,
+ {20,"est" ,20,0.0,&avnx[136] } ,
+ {20,"sts" ,21,0.0,&avnx[137] } ,
+ {20,"survey" ,22,0.0,&avnx[138] } ,
+ {20,"chromoplast" ,23,0.0,&avnx[139] } ,
+ {20,"genemap" ,24,0.0,&avnx[140] } ,
+ {20,"restmap" ,25,0.0,&avnx[141] } ,
+ {20,"physmap" ,26,0.0,&avnx[142] } ,
+ {20,"other" ,255,0.0,NULL } ,
+ {20,"concept-trans" ,1,0.0,&avnx[144] } ,
+ {20,"seq-pept" ,2,0.0,&avnx[145] } ,
+ {20,"both" ,3,0.0,&avnx[146] } ,
+ {20,"seq-pept-overlap" ,4,0.0,&avnx[147] } ,
+ {20,"seq-pept-homol" ,5,0.0,&avnx[148] } ,
+ {20,"concept-trans-a" ,6,0.0,&avnx[149] } ,
+ {20,"other" ,255,0.0,NULL } ,
+ {20,"other" ,0,0.0,&avnx[151] } ,
+ {20,"family" ,1,0.0,&avnx[152] } ,
+ {20,"order" ,2,0.0,&avnx[153] } ,
+ {20,"class" ,3,0.0,NULL } ,
+ {20,"strain" ,2,0.0,&avnx[155] } ,
+ {20,"substrain" ,3,0.0,&avnx[156] } ,
+ {20,"type" ,4,0.0,&avnx[157] } ,
+ {20,"subtype" ,5,0.0,&avnx[158] } ,
+ {20,"variety" ,6,0.0,&avnx[159] } ,
+ {20,"serotype" ,7,0.0,&avnx[160] } ,
+ {20,"serogroup" ,8,0.0,&avnx[161] } ,
+ {20,"serovar" ,9,0.0,&avnx[162] } ,
+ {20,"cultivar" ,10,0.0,&avnx[163] } ,
+ {20,"pathovar" ,11,0.0,&avnx[164] } ,
+ {20,"chemovar" ,12,0.0,&avnx[165] } ,
+ {20,"biovar" ,13,0.0,&avnx[166] } ,
+ {20,"biotype" ,14,0.0,&avnx[167] } ,
+ {20,"group" ,15,0.0,&avnx[168] } ,
+ {20,"subgroup" ,16,0.0,&avnx[169] } ,
+ {20,"isolate" ,17,0.0,&avnx[170] } ,
+ {20,"common" ,18,0.0,&avnx[171] } ,
+ {20,"acronym" ,19,0.0,&avnx[172] } ,
+ {20,"dosage" ,20,0.0,&avnx[173] } ,
+ {20,"nat-host" ,21,0.0,&avnx[174] } ,
+ {20,"sub-species" ,22,0.0,&avnx[175] } ,
+ {20,"specimen-voucher" ,23,0.0,&avnx[176] } ,
+ {20,"authority" ,24,0.0,&avnx[177] } ,
+ {20,"forma" ,25,0.0,&avnx[178] } ,
+ {20,"forma-specialis" ,26,0.0,&avnx[179] } ,
+ {20,"ecotype" ,27,0.0,&avnx[180] } ,
+ {20,"synonym" ,28,0.0,&avnx[181] } ,
+ {20,"anamorph" ,29,0.0,&avnx[182] } ,
+ {20,"teleomorph" ,30,0.0,&avnx[183] } ,
+ {20,"breed" ,31,0.0,&avnx[184] } ,
+ {20,"gb-acronym" ,32,0.0,&avnx[185] } ,
+ {20,"gb-anamorph" ,33,0.0,&avnx[186] } ,
+ {20,"gb-synonym" ,34,0.0,&avnx[187] } ,
+ {20,"old-lineage" ,253,0.0,&avnx[188] } ,
+ {20,"old-name" ,254,0.0,&avnx[189] } ,
+ {20,"other" ,255,0.0,NULL } ,
+ {20,"not-set" ,0,0.0,&avnx[191] } ,
+ {20,"standard" ,1,0.0,&avnx[192] } ,
+ {20,"prelim" ,2,0.0,&avnx[193] } ,
{20,"other" ,255,0.0,NULL } ,
{2,NULL,0,0.0,NULL } ,
- {20,"not-set" ,0,0.0,&avnx[192] } ,
- {20,"standard" ,1,0.0,&avnx[193] } ,
- {20,"unannotated" ,2,0.0,&avnx[194] } ,
+ {20,"not-set" ,0,0.0,&avnx[196] } ,
+ {20,"standard" ,1,0.0,&avnx[197] } ,
+ {20,"unannotated" ,2,0.0,&avnx[198] } ,
{20,"other" ,255,0.0,NULL } ,
{3,NULL,1,0.0,NULL } ,
- {20,"fun" ,0,0.0,&avnx[197] } ,
- {20,"inv" ,1,0.0,&avnx[198] } ,
- {20,"mam" ,2,0.0,&avnx[199] } ,
- {20,"org" ,3,0.0,&avnx[200] } ,
- {20,"phg" ,4,0.0,&avnx[201] } ,
- {20,"pln" ,5,0.0,&avnx[202] } ,
- {20,"pri" ,6,0.0,&avnx[203] } ,
- {20,"pro" ,7,0.0,&avnx[204] } ,
- {20,"rod" ,8,0.0,&avnx[205] } ,
- {20,"syn" ,9,0.0,&avnx[206] } ,
- {20,"una" ,10,0.0,&avnx[207] } ,
- {20,"vrl" ,11,0.0,&avnx[208] } ,
- {20,"vrt" ,12,0.0,&avnx[209] } ,
- {20,"pat" ,13,0.0,&avnx[210] } ,
- {20,"est" ,14,0.0,&avnx[211] } ,
- {20,"sts" ,15,0.0,&avnx[212] } ,
+ {20,"fun" ,0,0.0,&avnx[201] } ,
+ {20,"inv" ,1,0.0,&avnx[202] } ,
+ {20,"mam" ,2,0.0,&avnx[203] } ,
+ {20,"org" ,3,0.0,&avnx[204] } ,
+ {20,"phg" ,4,0.0,&avnx[205] } ,
+ {20,"pln" ,5,0.0,&avnx[206] } ,
+ {20,"pri" ,6,0.0,&avnx[207] } ,
+ {20,"pro" ,7,0.0,&avnx[208] } ,
+ {20,"rod" ,8,0.0,&avnx[209] } ,
+ {20,"syn" ,9,0.0,&avnx[210] } ,
+ {20,"una" ,10,0.0,&avnx[211] } ,
+ {20,"vrl" ,11,0.0,&avnx[212] } ,
+ {20,"vrt" ,12,0.0,&avnx[213] } ,
+ {20,"pat" ,13,0.0,&avnx[214] } ,
+ {20,"est" ,14,0.0,&avnx[215] } ,
+ {20,"sts" ,15,0.0,&avnx[216] } ,
{20,"other" ,255,0.0,NULL } ,
- {20,"embl" ,0,0.0,&avnx[214] } ,
- {20,"genbank" ,1,0.0,&avnx[215] } ,
- {20,"ddbj" ,2,0.0,&avnx[216] } ,
- {20,"geninfo" ,3,0.0,&avnx[217] } ,
- {20,"medline" ,4,0.0,&avnx[218] } ,
- {20,"swissprot" ,5,0.0,&avnx[219] } ,
- {20,"pir" ,6,0.0,&avnx[220] } ,
- {20,"pdb" ,7,0.0,&avnx[221] } ,
- {20,"epd" ,8,0.0,&avnx[222] } ,
- {20,"ecd" ,9,0.0,&avnx[223] } ,
- {20,"tfd" ,10,0.0,&avnx[224] } ,
- {20,"flybase" ,11,0.0,&avnx[225] } ,
- {20,"prosite" ,12,0.0,&avnx[226] } ,
- {20,"enzyme" ,13,0.0,&avnx[227] } ,
- {20,"mim" ,14,0.0,&avnx[228] } ,
- {20,"ecoseq" ,15,0.0,&avnx[229] } ,
- {20,"hiv" ,16,0.0,&avnx[230] } ,
+ {20,"embl" ,0,0.0,&avnx[218] } ,
+ {20,"genbank" ,1,0.0,&avnx[219] } ,
+ {20,"ddbj" ,2,0.0,&avnx[220] } ,
+ {20,"geninfo" ,3,0.0,&avnx[221] } ,
+ {20,"medline" ,4,0.0,&avnx[222] } ,
+ {20,"swissprot" ,5,0.0,&avnx[223] } ,
+ {20,"pir" ,6,0.0,&avnx[224] } ,
+ {20,"pdb" ,7,0.0,&avnx[225] } ,
+ {20,"epd" ,8,0.0,&avnx[226] } ,
+ {20,"ecd" ,9,0.0,&avnx[227] } ,
+ {20,"tfd" ,10,0.0,&avnx[228] } ,
+ {20,"flybase" ,11,0.0,&avnx[229] } ,
+ {20,"prosite" ,12,0.0,&avnx[230] } ,
+ {20,"enzyme" ,13,0.0,&avnx[231] } ,
+ {20,"mim" ,14,0.0,&avnx[232] } ,
+ {20,"ecoseq" ,15,0.0,&avnx[233] } ,
+ {20,"hiv" ,16,0.0,&avnx[234] } ,
{20,"other" ,255,0.0,NULL } ,
- {20,"unknown" ,0,0.0,&avnx[232] } ,
- {20,"genomic" ,1,0.0,&avnx[233] } ,
- {20,"chloroplast" ,2,0.0,&avnx[234] } ,
- {20,"chromoplast" ,3,0.0,&avnx[235] } ,
- {20,"kinetoplast" ,4,0.0,&avnx[236] } ,
- {20,"mitochondrion" ,5,0.0,&avnx[237] } ,
- {20,"plastid" ,6,0.0,&avnx[238] } ,
- {20,"macronuclear" ,7,0.0,&avnx[239] } ,
- {20,"extrachrom" ,8,0.0,&avnx[240] } ,
- {20,"plasmid" ,9,0.0,&avnx[241] } ,
- {20,"transposon" ,10,0.0,&avnx[242] } ,
- {20,"insertion-seq" ,11,0.0,&avnx[243] } ,
- {20,"cyanelle" ,12,0.0,&avnx[244] } ,
- {20,"proviral" ,13,0.0,&avnx[245] } ,
- {20,"virion" ,14,0.0,&avnx[246] } ,
- {20,"nucleomorph" ,15,0.0,&avnx[247] } ,
- {20,"apicoplast" ,16,0.0,&avnx[248] } ,
- {20,"leucoplast" ,17,0.0,&avnx[249] } ,
- {20,"proplastid" ,18,0.0,&avnx[250] } ,
+ {20,"unknown" ,0,0.0,&avnx[236] } ,
+ {20,"genomic" ,1,0.0,&avnx[237] } ,
+ {20,"chloroplast" ,2,0.0,&avnx[238] } ,
+ {20,"chromoplast" ,3,0.0,&avnx[239] } ,
+ {20,"kinetoplast" ,4,0.0,&avnx[240] } ,
+ {20,"mitochondrion" ,5,0.0,&avnx[241] } ,
+ {20,"plastid" ,6,0.0,&avnx[242] } ,
+ {20,"macronuclear" ,7,0.0,&avnx[243] } ,
+ {20,"extrachrom" ,8,0.0,&avnx[244] } ,
+ {20,"plasmid" ,9,0.0,&avnx[245] } ,
+ {20,"transposon" ,10,0.0,&avnx[246] } ,
+ {20,"insertion-seq" ,11,0.0,&avnx[247] } ,
+ {20,"cyanelle" ,12,0.0,&avnx[248] } ,
+ {20,"proviral" ,13,0.0,&avnx[249] } ,
+ {20,"virion" ,14,0.0,&avnx[250] } ,
+ {20,"nucleomorph" ,15,0.0,&avnx[251] } ,
+ {20,"apicoplast" ,16,0.0,&avnx[252] } ,
+ {20,"leucoplast" ,17,0.0,&avnx[253] } ,
+ {20,"proplastid" ,18,0.0,&avnx[254] } ,
{20,"endogenous-virus" ,19,0.0,NULL } ,
{3,NULL,0,0.0,NULL } ,
- {20,"unknown" ,0,0.0,&avnx[253] } ,
- {20,"natural" ,1,0.0,&avnx[254] } ,
- {20,"natmut" ,2,0.0,&avnx[255] } ,
- {20,"mut" ,3,0.0,&avnx[256] } ,
- {20,"artificial" ,4,0.0,&avnx[257] } ,
- {20,"synthetic" ,5,0.0,&avnx[258] } ,
+ {20,"unknown" ,0,0.0,&avnx[257] } ,
+ {20,"natural" ,1,0.0,&avnx[258] } ,
+ {20,"natmut" ,2,0.0,&avnx[259] } ,
+ {20,"mut" ,3,0.0,&avnx[260] } ,
+ {20,"artificial" ,4,0.0,&avnx[261] } ,
+ {20,"synthetic" ,5,0.0,&avnx[262] } ,
{20,"other" ,255,0.0,NULL } ,
{3,NULL,0,0.0,NULL } ,
- {20,"chromosome" ,1,0.0,&avnx[261] } ,
- {20,"map" ,2,0.0,&avnx[262] } ,
- {20,"clone" ,3,0.0,&avnx[263] } ,
- {20,"subclone" ,4,0.0,&avnx[264] } ,
- {20,"haplotype" ,5,0.0,&avnx[265] } ,
- {20,"genotype" ,6,0.0,&avnx[266] } ,
- {20,"sex" ,7,0.0,&avnx[267] } ,
- {20,"cell-line" ,8,0.0,&avnx[268] } ,
- {20,"cell-type" ,9,0.0,&avnx[269] } ,
- {20,"tissue-type" ,10,0.0,&avnx[270] } ,
- {20,"clone-lib" ,11,0.0,&avnx[271] } ,
- {20,"dev-stage" ,12,0.0,&avnx[272] } ,
- {20,"frequency" ,13,0.0,&avnx[273] } ,
- {20,"germline" ,14,0.0,&avnx[274] } ,
- {20,"rearranged" ,15,0.0,&avnx[275] } ,
- {20,"lab-host" ,16,0.0,&avnx[276] } ,
- {20,"pop-variant" ,17,0.0,&avnx[277] } ,
- {20,"tissue-lib" ,18,0.0,&avnx[278] } ,
- {20,"plasmid-name" ,19,0.0,&avnx[279] } ,
- {20,"transposon-name" ,20,0.0,&avnx[280] } ,
- {20,"insertion-seq-name" ,21,0.0,&avnx[281] } ,
- {20,"plastid-name" ,22,0.0,&avnx[282] } ,
- {20,"country" ,23,0.0,&avnx[283] } ,
- {20,"segment" ,24,0.0,&avnx[284] } ,
- {20,"endogenous-virus-name" ,25,0.0,&avnx[285] } ,
- {20,"transgenic" ,26,0.0,&avnx[286] } ,
- {20,"environmental-sample" ,27,0.0,&avnx[287] } ,
- {20,"isolation-source" ,28,0.0,&avnx[288] } ,
+ {20,"chromosome" ,1,0.0,&avnx[265] } ,
+ {20,"map" ,2,0.0,&avnx[266] } ,
+ {20,"clone" ,3,0.0,&avnx[267] } ,
+ {20,"subclone" ,4,0.0,&avnx[268] } ,
+ {20,"haplotype" ,5,0.0,&avnx[269] } ,
+ {20,"genotype" ,6,0.0,&avnx[270] } ,
+ {20,"sex" ,7,0.0,&avnx[271] } ,
+ {20,"cell-line" ,8,0.0,&avnx[272] } ,
+ {20,"cell-type" ,9,0.0,&avnx[273] } ,
+ {20,"tissue-type" ,10,0.0,&avnx[274] } ,
+ {20,"clone-lib" ,11,0.0,&avnx[275] } ,
+ {20,"dev-stage" ,12,0.0,&avnx[276] } ,
+ {20,"frequency" ,13,0.0,&avnx[277] } ,
+ {20,"germline" ,14,0.0,&avnx[278] } ,
+ {20,"rearranged" ,15,0.0,&avnx[279] } ,
+ {20,"lab-host" ,16,0.0,&avnx[280] } ,
+ {20,"pop-variant" ,17,0.0,&avnx[281] } ,
+ {20,"tissue-lib" ,18,0.0,&avnx[282] } ,
+ {20,"plasmid-name" ,19,0.0,&avnx[283] } ,
+ {20,"transposon-name" ,20,0.0,&avnx[284] } ,
+ {20,"insertion-seq-name" ,21,0.0,&avnx[285] } ,
+ {20,"plastid-name" ,22,0.0,&avnx[286] } ,
+ {20,"country" ,23,0.0,&avnx[287] } ,
+ {20,"segment" ,24,0.0,&avnx[288] } ,
+ {20,"endogenous-virus-name" ,25,0.0,&avnx[289] } ,
+ {20,"transgenic" ,26,0.0,&avnx[290] } ,
+ {20,"environmental-sample" ,27,0.0,&avnx[291] } ,
+ {20,"isolation-source" ,28,0.0,&avnx[292] } ,
{20,"other" ,255,0.0,NULL } ,
- {20,"unknown" ,0,0.0,&avnx[290] } ,
- {20,"genomic" ,1,0.0,&avnx[291] } ,
- {20,"pre-RNA" ,2,0.0,&avnx[292] } ,
- {20,"mRNA" ,3,0.0,&avnx[293] } ,
- {20,"rRNA" ,4,0.0,&avnx[294] } ,
- {20,"tRNA" ,5,0.0,&avnx[295] } ,
- {20,"snRNA" ,6,0.0,&avnx[296] } ,
- {20,"scRNA" ,7,0.0,&avnx[297] } ,
- {20,"peptide" ,8,0.0,&avnx[298] } ,
- {20,"other-genetic" ,9,0.0,&avnx[299] } ,
- {20,"genomic-mRNA" ,10,0.0,&avnx[300] } ,
- {20,"cRNA" ,11,0.0,&avnx[301] } ,
- {20,"snoRNA" ,12,0.0,&avnx[302] } ,
- {20,"transcribed-RNA" ,13,0.0,&avnx[303] } ,
+ {20,"unknown" ,0,0.0,&avnx[294] } ,
+ {20,"genomic" ,1,0.0,&avnx[295] } ,
+ {20,"pre-RNA" ,2,0.0,&avnx[296] } ,
+ {20,"mRNA" ,3,0.0,&avnx[297] } ,
+ {20,"rRNA" ,4,0.0,&avnx[298] } ,
+ {20,"tRNA" ,5,0.0,&avnx[299] } ,
+ {20,"snRNA" ,6,0.0,&avnx[300] } ,
+ {20,"scRNA" ,7,0.0,&avnx[301] } ,
+ {20,"peptide" ,8,0.0,&avnx[302] } ,
+ {20,"other-genetic" ,9,0.0,&avnx[303] } ,
+ {20,"genomic-mRNA" ,10,0.0,&avnx[304] } ,
+ {20,"cRNA" ,11,0.0,&avnx[305] } ,
+ {20,"snoRNA" ,12,0.0,&avnx[306] } ,
+ {20,"transcribed-RNA" ,13,0.0,&avnx[307] } ,
{20,"other" ,255,0.0,NULL } ,
{3,NULL,0,0.0,NULL } ,
- {20,"unknown" ,0,0.0,&avnx[306] } ,
- {20,"standard" ,1,0.0,&avnx[307] } ,
- {20,"est" ,2,0.0,&avnx[308] } ,
- {20,"sts" ,3,0.0,&avnx[309] } ,
- {20,"survey" ,4,0.0,&avnx[310] } ,
- {20,"genemap" ,5,0.0,&avnx[311] } ,
- {20,"physmap" ,6,0.0,&avnx[312] } ,
- {20,"derived" ,7,0.0,&avnx[313] } ,
- {20,"concept-trans" ,8,0.0,&avnx[314] } ,
- {20,"seq-pept" ,9,0.0,&avnx[315] } ,
- {20,"both" ,10,0.0,&avnx[316] } ,
- {20,"seq-pept-overlap" ,11,0.0,&avnx[317] } ,
- {20,"seq-pept-homol" ,12,0.0,&avnx[318] } ,
- {20,"concept-trans-a" ,13,0.0,&avnx[319] } ,
- {20,"htgs-1" ,14,0.0,&avnx[320] } ,
- {20,"htgs-2" ,15,0.0,&avnx[321] } ,
- {20,"htgs-3" ,16,0.0,&avnx[322] } ,
- {20,"fli-cdna" ,17,0.0,&avnx[323] } ,
- {20,"htgs-0" ,18,0.0,&avnx[324] } ,
- {20,"htc" ,19,0.0,&avnx[325] } ,
- {20,"wgs" ,20,0.0,&avnx[326] } ,
+ {20,"unknown" ,0,0.0,&avnx[310] } ,
+ {20,"standard" ,1,0.0,&avnx[311] } ,
+ {20,"est" ,2,0.0,&avnx[312] } ,
+ {20,"sts" ,3,0.0,&avnx[313] } ,
+ {20,"survey" ,4,0.0,&avnx[314] } ,
+ {20,"genemap" ,5,0.0,&avnx[315] } ,
+ {20,"physmap" ,6,0.0,&avnx[316] } ,
+ {20,"derived" ,7,0.0,&avnx[317] } ,
+ {20,"concept-trans" ,8,0.0,&avnx[318] } ,
+ {20,"seq-pept" ,9,0.0,&avnx[319] } ,
+ {20,"both" ,10,0.0,&avnx[320] } ,
+ {20,"seq-pept-overlap" ,11,0.0,&avnx[321] } ,
+ {20,"seq-pept-homol" ,12,0.0,&avnx[322] } ,
+ {20,"concept-trans-a" ,13,0.0,&avnx[323] } ,
+ {20,"htgs-1" ,14,0.0,&avnx[324] } ,
+ {20,"htgs-2" ,15,0.0,&avnx[325] } ,
+ {20,"htgs-3" ,16,0.0,&avnx[326] } ,
+ {20,"fli-cdna" ,17,0.0,&avnx[327] } ,
+ {20,"htgs-0" ,18,0.0,&avnx[328] } ,
+ {20,"htc" ,19,0.0,&avnx[329] } ,
+ {20,"wgs" ,20,0.0,&avnx[330] } ,
{20,"other" ,255,0.0,NULL } ,
{3,NULL,0,0.0,NULL } ,
- {20,"unknown" ,0,0.0,&avnx[329] } ,
- {20,"complete" ,1,0.0,&avnx[330] } ,
- {20,"partial" ,2,0.0,&avnx[331] } ,
- {20,"no-left" ,3,0.0,&avnx[332] } ,
- {20,"no-right" ,4,0.0,&avnx[333] } ,
- {20,"no-ends" ,5,0.0,&avnx[334] } ,
- {20,"has-left" ,6,0.0,&avnx[335] } ,
- {20,"has-right" ,7,0.0,&avnx[336] } ,
+ {20,"unknown" ,0,0.0,&avnx[333] } ,
+ {20,"complete" ,1,0.0,&avnx[334] } ,
+ {20,"partial" ,2,0.0,&avnx[335] } ,
+ {20,"no-left" ,3,0.0,&avnx[336] } ,
+ {20,"no-right" ,4,0.0,&avnx[337] } ,
+ {20,"no-ends" ,5,0.0,&avnx[338] } ,
+ {20,"has-left" ,6,0.0,&avnx[339] } ,
+ {20,"has-right" ,7,0.0,&avnx[340] } ,
{20,"other" ,255,0.0,NULL } ,
{3,NULL,0,0.0,NULL } ,
- {20,"not-set" ,0,0.0,&avnx[339] } ,
- {20,"virtual" ,1,0.0,&avnx[340] } ,
- {20,"raw" ,2,0.0,&avnx[341] } ,
- {20,"seg" ,3,0.0,&avnx[342] } ,
- {20,"const" ,4,0.0,&avnx[343] } ,
- {20,"ref" ,5,0.0,&avnx[344] } ,
- {20,"consen" ,6,0.0,&avnx[345] } ,
- {20,"map" ,7,0.0,&avnx[346] } ,
- {20,"delta" ,8,0.0,&avnx[347] } ,
+ {20,"not-set" ,0,0.0,&avnx[343] } ,
+ {20,"virtual" ,1,0.0,&avnx[344] } ,
+ {20,"raw" ,2,0.0,&avnx[345] } ,
+ {20,"seg" ,3,0.0,&avnx[346] } ,
+ {20,"const" ,4,0.0,&avnx[347] } ,
+ {20,"ref" ,5,0.0,&avnx[348] } ,
+ {20,"consen" ,6,0.0,&avnx[349] } ,
+ {20,"map" ,7,0.0,&avnx[350] } ,
+ {20,"delta" ,8,0.0,&avnx[351] } ,
{20,"other" ,255,0.0,NULL } ,
- {20,"not-set" ,0,0.0,&avnx[349] } ,
- {20,"dna" ,1,0.0,&avnx[350] } ,
- {20,"rna" ,2,0.0,&avnx[351] } ,
- {20,"aa" ,3,0.0,&avnx[352] } ,
- {20,"na" ,4,0.0,&avnx[353] } ,
+ {20,"not-set" ,0,0.0,&avnx[353] } ,
+ {20,"dna" ,1,0.0,&avnx[354] } ,
+ {20,"rna" ,2,0.0,&avnx[355] } ,
+ {20,"aa" ,3,0.0,&avnx[356] } ,
+ {20,"na" ,4,0.0,&avnx[357] } ,
{20,"other" ,255,0.0,NULL } ,
- {20,"not-set" ,0,0.0,&avnx[355] } ,
- {20,"linear" ,1,0.0,&avnx[356] } ,
- {20,"circular" ,2,0.0,&avnx[357] } ,
- {20,"tandem" ,3,0.0,&avnx[358] } ,
+ {20,"not-set" ,0,0.0,&avnx[359] } ,
+ {20,"linear" ,1,0.0,&avnx[360] } ,
+ {20,"circular" ,2,0.0,&avnx[361] } ,
+ {20,"tandem" ,3,0.0,&avnx[362] } ,
{20,"other" ,255,0.0,NULL } ,
{3,NULL,1,0.0,NULL } ,
- {20,"not-set" ,0,0.0,&avnx[361] } ,
- {20,"ss" ,1,0.0,&avnx[362] } ,
- {20,"ds" ,2,0.0,&avnx[363] } ,
- {20,"mixed" ,3,0.0,&avnx[364] } ,
+ {20,"not-set" ,0,0.0,&avnx[365] } ,
+ {20,"ss" ,1,0.0,&avnx[366] } ,
+ {20,"ds" ,2,0.0,&avnx[367] } ,
+ {20,"mixed" ,3,0.0,&avnx[368] } ,
{20,"other" ,255,0.0,NULL } ,
{2,NULL,0,0.0,NULL } ,
- {20,"not-set" ,0,0.0,&avnx[367] } ,
- {20,"one" ,1,0.0,&avnx[368] } ,
- {20,"two" ,2,0.0,&avnx[369] } ,
+ {20,"not-set" ,0,0.0,&avnx[371] } ,
+ {20,"one" ,1,0.0,&avnx[372] } ,
+ {20,"two" ,2,0.0,&avnx[373] } ,
{20,"three" ,3,0.0,NULL } ,
{3,NULL,0,0.0,NULL } ,
- {20,"not-set" ,0,0.0,&avnx[372] } ,
- {20,"preprotein" ,1,0.0,&avnx[373] } ,
- {20,"mature" ,2,0.0,&avnx[374] } ,
- {20,"signal-peptide" ,3,0.0,&avnx[375] } ,
+ {20,"not-set" ,0,0.0,&avnx[376] } ,
+ {20,"preprotein" ,1,0.0,&avnx[377] } ,
+ {20,"mature" ,2,0.0,&avnx[378] } ,
+ {20,"signal-peptide" ,3,0.0,&avnx[379] } ,
{20,"transit-peptide" ,4,0.0,NULL } ,
{3,NULL,0,0.0,NULL } ,
- {20,"unknown" ,0,0.0,&avnx[378] } ,
- {20,"premsg" ,1,0.0,&avnx[379] } ,
- {20,"mRNA" ,2,0.0,&avnx[380] } ,
- {20,"tRNA" ,3,0.0,&avnx[381] } ,
- {20,"rRNA" ,4,0.0,&avnx[382] } ,
- {20,"snRNA" ,5,0.0,&avnx[383] } ,
- {20,"scRNA" ,6,0.0,&avnx[384] } ,
- {20,"snoRNA" ,7,0.0,&avnx[385] } ,
+ {20,"unknown" ,0,0.0,&avnx[382] } ,
+ {20,"premsg" ,1,0.0,&avnx[383] } ,
+ {20,"mRNA" ,2,0.0,&avnx[384] } ,
+ {20,"tRNA" ,3,0.0,&avnx[385] } ,
+ {20,"rRNA" ,4,0.0,&avnx[386] } ,
+ {20,"snRNA" ,5,0.0,&avnx[387] } ,
+ {20,"scRNA" ,6,0.0,&avnx[388] } ,
+ {20,"snoRNA" ,7,0.0,&avnx[389] } ,
{20,"other" ,255,0.0,NULL } ,
- {20,"disulfide" ,1,0.0,&avnx[387] } ,
- {20,"thiolester" ,2,0.0,&avnx[388] } ,
- {20,"xlink" ,3,0.0,&avnx[389] } ,
- {20,"thioether" ,4,0.0,&avnx[390] } ,
+ {20,"disulfide" ,1,0.0,&avnx[391] } ,
+ {20,"thiolester" ,2,0.0,&avnx[392] } ,
+ {20,"xlink" ,3,0.0,&avnx[393] } ,
+ {20,"thioether" ,4,0.0,&avnx[394] } ,
{20,"other" ,255,0.0,NULL } ,
- {20,"active" ,1,0.0,&avnx[392] } ,
- {20,"binding" ,2,0.0,&avnx[393] } ,
- {20,"cleavage" ,3,0.0,&avnx[394] } ,
- {20,"inhibit" ,4,0.0,&avnx[395] } ,
- {20,"modified" ,5,0.0,&avnx[396] } ,
- {20,"glycosylation" ,6,0.0,&avnx[397] } ,
- {20,"myristoylation" ,7,0.0,&avnx[398] } ,
- {20,"mutagenized" ,8,0.0,&avnx[399] } ,
- {20,"metal-binding" ,9,0.0,&avnx[400] } ,
- {20,"phosphorylation" ,10,0.0,&avnx[401] } ,
- {20,"acetylation" ,11,0.0,&avnx[402] } ,
- {20,"amidation" ,12,0.0,&avnx[403] } ,
- {20,"methylation" ,13,0.0,&avnx[404] } ,
- {20,"hydroxylation" ,14,0.0,&avnx[405] } ,
- {20,"sulfatation" ,15,0.0,&avnx[406] } ,
- {20,"oxidative-deamination" ,16,0.0,&avnx[407] } ,
- {20,"pyrrolidone-carboxylic-acid" ,17,0.0,&avnx[408] } ,
- {20,"gamma-carboxyglutamic-acid" ,18,0.0,&avnx[409] } ,
- {20,"blocked" ,19,0.0,&avnx[410] } ,
- {20,"lipid-binding" ,20,0.0,&avnx[411] } ,
- {20,"np-binding" ,21,0.0,&avnx[412] } ,
- {20,"dna-binding" ,22,0.0,&avnx[413] } ,
- {20,"signal-peptide" ,23,0.0,&avnx[414] } ,
- {20,"transit-peptide" ,24,0.0,&avnx[415] } ,
- {20,"transmembrane-region" ,25,0.0,&avnx[416] } ,
+ {20,"active" ,1,0.0,&avnx[396] } ,
+ {20,"binding" ,2,0.0,&avnx[397] } ,
+ {20,"cleavage" ,3,0.0,&avnx[398] } ,
+ {20,"inhibit" ,4,0.0,&avnx[399] } ,
+ {20,"modified" ,5,0.0,&avnx[400] } ,
+ {20,"glycosylation" ,6,0.0,&avnx[401] } ,
+ {20,"myristoylation" ,7,0.0,&avnx[402] } ,
+ {20,"mutagenized" ,8,0.0,&avnx[403] } ,
+ {20,"metal-binding" ,9,0.0,&avnx[404] } ,
+ {20,"phosphorylation" ,10,0.0,&avnx[405] } ,
+ {20,"acetylation" ,11,0.0,&avnx[406] } ,
+ {20,"amidation" ,12,0.0,&avnx[407] } ,
+ {20,"methylation" ,13,0.0,&avnx[408] } ,
+ {20,"hydroxylation" ,14,0.0,&avnx[409] } ,
+ {20,"sulfatation" ,15,0.0,&avnx[410] } ,
+ {20,"oxidative-deamination" ,16,0.0,&avnx[411] } ,
+ {20,"pyrrolidone-carboxylic-acid" ,17,0.0,&avnx[412] } ,
+ {20,"gamma-carboxyglutamic-acid" ,18,0.0,&avnx[413] } ,
+ {20,"blocked" ,19,0.0,&avnx[414] } ,
+ {20,"lipid-binding" ,20,0.0,&avnx[415] } ,
+ {20,"np-binding" ,21,0.0,&avnx[416] } ,
+ {20,"dna-binding" ,22,0.0,&avnx[417] } ,
+ {20,"signal-peptide" ,23,0.0,&avnx[418] } ,
+ {20,"transit-peptide" ,24,0.0,&avnx[419] } ,
+ {20,"transmembrane-region" ,25,0.0,&avnx[420] } ,
{20,"other" ,255,0.0,NULL } ,
- {20,"unknown" ,0,0.0,&avnx[418] } ,
- {20,"pol1" ,1,0.0,&avnx[419] } ,
- {20,"pol2" ,2,0.0,&avnx[420] } ,
- {20,"pol3" ,3,0.0,&avnx[421] } ,
- {20,"bacterial" ,4,0.0,&avnx[422] } ,
- {20,"viral" ,5,0.0,&avnx[423] } ,
- {20,"rna" ,6,0.0,&avnx[424] } ,
- {20,"organelle" ,7,0.0,&avnx[425] } ,
+ {20,"unknown" ,0,0.0,&avnx[422] } ,
+ {20,"pol1" ,1,0.0,&avnx[423] } ,
+ {20,"pol2" ,2,0.0,&avnx[424] } ,
+ {20,"pol3" ,3,0.0,&avnx[425] } ,
+ {20,"bacterial" ,4,0.0,&avnx[426] } ,
+ {20,"viral" ,5,0.0,&avnx[427] } ,
+ {20,"rna" ,6,0.0,&avnx[428] } ,
+ {20,"organelle" ,7,0.0,&avnx[429] } ,
{20,"other" ,255,0.0,NULL } ,
{2,NULL,0,0.0,NULL } ,
{2,NULL,0,0.0,NULL } ,
- {20,"unknown" ,0,0.0,&avnx[429] } ,
- {20,"single" ,1,0.0,&avnx[430] } ,
- {20,"multiple" ,2,0.0,&avnx[431] } ,
- {20,"region" ,3,0.0,NULL } ,
{20,"unknown" ,0,0.0,&avnx[433] } ,
- {20,"rna-seq" ,1,0.0,&avnx[434] } ,
- {20,"rna-size" ,2,0.0,&avnx[435] } ,
- {20,"np-map" ,3,0.0,&avnx[436] } ,
- {20,"np-size" ,4,0.0,&avnx[437] } ,
- {20,"pe-seq" ,5,0.0,&avnx[438] } ,
- {20,"cDNA-seq" ,6,0.0,&avnx[439] } ,
- {20,"pe-map" ,7,0.0,&avnx[440] } ,
- {20,"pe-size" ,8,0.0,&avnx[441] } ,
- {20,"pseudo-seq" ,9,0.0,&avnx[442] } ,
- {20,"rev-pe-map" ,10,0.0,&avnx[443] } ,
+ {20,"single" ,1,0.0,&avnx[434] } ,
+ {20,"multiple" ,2,0.0,&avnx[435] } ,
+ {20,"region" ,3,0.0,NULL } ,
+ {20,"unknown" ,0,0.0,&avnx[437] } ,
+ {20,"rna-seq" ,1,0.0,&avnx[438] } ,
+ {20,"rna-size" ,2,0.0,&avnx[439] } ,
+ {20,"np-map" ,3,0.0,&avnx[440] } ,
+ {20,"np-size" ,4,0.0,&avnx[441] } ,
+ {20,"pe-seq" ,5,0.0,&avnx[442] } ,
+ {20,"cDNA-seq" ,6,0.0,&avnx[443] } ,
+ {20,"pe-map" ,7,0.0,&avnx[444] } ,
+ {20,"pe-size" ,8,0.0,&avnx[445] } ,
+ {20,"pseudo-seq" ,9,0.0,&avnx[446] } ,
+ {20,"rev-pe-map" ,10,0.0,&avnx[447] } ,
{20,"other" ,255,0.0,NULL } ,
- {20,"unknown" ,0,0.0,&avnx[445] } ,
- {20,"physiological" ,1,0.0,&avnx[446] } ,
- {20,"in-vitro" ,2,0.0,&avnx[447] } ,
- {20,"oocyte" ,3,0.0,&avnx[448] } ,
- {20,"transfection" ,4,0.0,&avnx[449] } ,
- {20,"transgenic" ,5,0.0,&avnx[450] } ,
+ {20,"unknown" ,0,0.0,&avnx[449] } ,
+ {20,"physiological" ,1,0.0,&avnx[450] } ,
+ {20,"in-vitro" ,2,0.0,&avnx[451] } ,
+ {20,"oocyte" ,3,0.0,&avnx[452] } ,
+ {20,"transfection" ,4,0.0,&avnx[453] } ,
+ {20,"transgenic" ,5,0.0,&avnx[454] } ,
{20,"other" ,255,0.0,NULL } ,
{3,NULL,1,0.0,NULL } ,
{2,NULL,0,0.0,NULL } ,
{2,NULL,0,0.0,NULL } ,
- {20,"helix" ,1,0.0,&avnx[455] } ,
- {20,"sheet" ,2,0.0,&avnx[456] } ,
+ {20,"helix" ,1,0.0,&avnx[459] } ,
+ {20,"sheet" ,2,0.0,&avnx[460] } ,
{20,"turn" ,3,0.0,NULL } ,
- {20,"experimental" ,1,0.0,&avnx[458] } ,
+ {20,"experimental" ,1,0.0,&avnx[462] } ,
{20,"not-experimental" ,2,0.0,NULL } ,
- {20,"genbank" ,1,0.0,&avnx[460] } ,
- {20,"embl" ,2,0.0,&avnx[461] } ,
- {20,"ddbj" ,3,0.0,&avnx[462] } ,
- {20,"pir" ,4,0.0,&avnx[463] } ,
- {20,"sp" ,5,0.0,&avnx[464] } ,
- {20,"bbone" ,6,0.0,&avnx[465] } ,
- {20,"pdb" ,7,0.0,&avnx[466] } ,
- {20,"other" ,255,0.0,NULL } ,
- {20,"ref" ,1,0.0,&avnx[468] } ,
- {20,"alt" ,2,0.0,&avnx[469] } ,
- {20,"blocks" ,3,0.0,&avnx[470] } ,
+ {20,"genbank" ,1,0.0,&avnx[464] } ,
+ {20,"embl" ,2,0.0,&avnx[465] } ,
+ {20,"ddbj" ,3,0.0,&avnx[466] } ,
+ {20,"pir" ,4,0.0,&avnx[467] } ,
+ {20,"sp" ,5,0.0,&avnx[468] } ,
+ {20,"bbone" ,6,0.0,&avnx[469] } ,
+ {20,"pdb" ,7,0.0,&avnx[470] } ,
{20,"other" ,255,0.0,NULL } ,
{20,"not-set" ,0,0.0,&avnx[472] } ,
{20,"nuc-prot" ,1,0.0,&avnx[473] } ,
@@ -597,26 +597,7 @@ static AsnValxNode avnx[607] = {
{20,"linear" ,1,0.0,&avnx[584] } ,
{20,"circular" ,2,0.0,NULL } ,
{3,NULL,1,0.0,NULL } ,
- {20,"not-set" ,0,0.0,&avnx[587] } ,
- {20,"single-stranded" ,1,0.0,&avnx[588] } ,
- {20,"double-stranded" ,2,0.0,&avnx[589] } ,
- {20,"mixed-stranded" ,3,0.0,NULL } ,
- {3,NULL,0,0.0,NULL } ,
- {20,"nucleic-acid" ,0,0.0,&avnx[592] } ,
- {20,"dna" ,1,0.0,&avnx[593] } ,
- {20,"rna" ,2,0.0,&avnx[594] } ,
- {20,"trna" ,3,0.0,&avnx[595] } ,
- {20,"rrna" ,4,0.0,&avnx[596] } ,
- {20,"mrna" ,5,0.0,&avnx[597] } ,
- {20,"urna" ,6,0.0,&avnx[598] } ,
- {20,"snrna" ,7,0.0,&avnx[599] } ,
- {20,"snorna" ,8,0.0,&avnx[600] } ,
- {20,"peptide" ,9,0.0,NULL } ,
- {3,NULL,0,0.0,NULL } ,
- {20,"linear" ,1,0.0,&avnx[603] } ,
- {20,"circular" ,2,0.0,NULL } ,
- {3,NULL,1,0.0,NULL } ,
- {20,"nucleotide" ,1,0.0,&avnx[606] } ,
+ {20,"nucleotide" ,1,0.0,&avnx[587] } ,
{20,"protein" ,2,0.0,NULL } };
static AsnType atx[1592] = {
@@ -1047,584 +1028,584 @@ static AsnType atx[1592] = {
{0, "b" ,128,1,0,1,0,0,0,0,NULL,&atx[403],NULL,0,NULL} ,
{0, "feat" ,128,10,0,0,0,0,0,0,NULL,&atx[426],NULL,0,NULL} ,
{414, "Feat-id" ,1,0,0,0,0,0,1,0,NULL,&atx[427],NULL,0,&atx[355]} ,
- {402, "Feat-id" ,1,0,0,0,0,1,0,0,NULL,&atx[14],&atx[428],0,&atx[811]} ,
+ {402, "Feat-id" ,1,0,0,0,0,1,0,0,NULL,&atx[14],&atx[428],0,&atx[826]} ,
{0, "gibb" ,128,0,0,0,0,0,0,0,NULL,&atx[6],NULL,0,&atx[429]} ,
{0, "giim" ,128,1,0,0,0,0,0,0,NULL,&atx[430],NULL,0,&atx[431]} ,
- {410, "Giimport-id" ,1,0,0,0,0,0,1,0,NULL,&atx[350],NULL,0,&atx[864]} ,
+ {410, "Giimport-id" ,1,0,0,0,0,0,1,0,NULL,&atx[350],NULL,0,&atx[879]} ,
{0, "local" ,128,2,0,0,0,0,0,0,NULL,&atx[432],NULL,0,&atx[433]} ,
{417, "Object-id" ,1,0,0,0,0,0,1,0,NULL,&atx[20],NULL,0,&atx[434]} ,
{0, "general" ,128,3,0,0,0,0,0,0,NULL,&atx[434],NULL,0,NULL} ,
- {418, "Dbtag" ,1,0,0,0,0,0,1,0,NULL,&atx[17],NULL,0,&atx[882]} ,
- {401, "Bioseq" ,1,0,0,0,0,1,0,0,NULL,&atx[13],&atx[436],0,&atx[969]} ,
- {0, "id" ,128,0,0,0,0,0,0,0,NULL,&atx[45],&atx[437],0,&atx[439]} ,
- {0, NULL,1,-1,0,0,0,0,0,0,NULL,&atx[438],NULL,0,NULL} ,
- {422, "Seq-id" ,1,0,0,0,0,0,1,0,NULL,&atx[344],NULL,0,&atx[776]} ,
- {0, "descr" ,128,1,0,1,0,0,0,0,NULL,&atx[440],NULL,0,&atx[740]} ,
- {404, "Seq-descr" ,1,0,0,0,0,1,0,0,NULL,&atx[45],&atx[441],0,&atx[442]} ,
- {0, NULL,1,-1,0,0,0,0,0,0,NULL,&atx[442],NULL,0,NULL} ,
- {405, "Seqdesc" ,1,0,0,0,0,1,0,0,NULL,&atx[14],&atx[443],0,&atx[497]} ,
- {0, "mol-type" ,128,0,0,0,0,0,0,0,NULL,&atx[444],NULL,0,&atx[445]} ,
- {409, "GIBB-mol" ,1,0,0,0,0,1,0,0,NULL,&atx[42],&avnx[78],0,&atx[950]} ,
- {0, "modif" ,128,1,0,0,0,0,0,0,NULL,&atx[45],&atx[446],0,&atx[448]} ,
- {0, NULL,1,-1,0,0,0,0,0,0,NULL,&atx[447],NULL,0,NULL} ,
- {431, "GIBB-mod" ,1,0,0,0,0,0,0,0,NULL,&atx[42],&avnx[90],0,&atx[449]} ,
- {0, "method" ,128,2,0,0,0,0,0,0,NULL,&atx[449],NULL,0,&atx[450]} ,
- {432, "GIBB-method" ,1,0,0,0,0,0,0,0,NULL,&atx[42],&avnx[118],0,&atx[735]} ,
- {0, "name" ,128,3,0,0,0,0,0,0,NULL,&atx[2],NULL,0,&atx[451]} ,
- {0, "title" ,128,4,0,0,0,0,0,0,NULL,&atx[2],NULL,0,&atx[452]} ,
- {0, "org" ,128,5,0,0,0,0,0,0,NULL,&atx[453],NULL,0,&atx[495]} ,
- {420, "Org-ref" ,1,0,0,0,0,0,1,0,NULL,&atx[454],NULL,0,&atx[721]} ,
- {401, "Org-ref" ,1,0,0,0,0,1,0,0,NULL,&atx[13],&atx[455],0,&atx[461]} ,
- {0, "taxname" ,128,0,0,1,0,0,0,0,NULL,&atx[2],NULL,0,&atx[456]} ,
- {0, "common" ,128,1,0,1,0,0,0,0,NULL,&atx[2],NULL,0,&atx[457]} ,
- {0, "mod" ,128,2,0,1,0,0,0,0,NULL,&atx[45],&atx[458],0,&atx[459]} ,
- {0, NULL,1,-1,0,0,0,0,0,0,NULL,&atx[2],NULL,0,NULL} ,
- {0, "db" ,128,3,0,1,0,0,0,0,NULL,&atx[45],&atx[460],0,&atx[462]} ,
- {0, NULL,1,-1,0,0,0,0,0,0,NULL,&atx[461],NULL,0,NULL} ,
- {402, "Dbtag" ,1,0,0,0,0,0,1,0,NULL,&atx[17],NULL,0,&atx[465]} ,
- {0, "syn" ,128,4,0,1,0,0,0,0,NULL,&atx[45],&atx[463],0,&atx[464]} ,
- {0, NULL,1,-1,0,0,0,0,0,0,NULL,&atx[2],NULL,0,NULL} ,
- {0, "orgname" ,128,5,0,1,0,0,0,0,NULL,&atx[465],NULL,0,NULL} ,
- {403, "OrgName" ,1,0,0,0,0,0,0,0,NULL,&atx[13],&atx[466],0,&atx[468]} ,
- {0, "name" ,128,0,0,1,0,0,0,0,NULL,&atx[14],&atx[467],0,&atx[484]} ,
- {0, "binomial" ,128,0,0,0,0,0,0,0,NULL,&atx[468],NULL,0,&atx[472]} ,
- {404, "BinomialOrgName" ,1,0,0,0,0,0,0,0,NULL,&atx[13],&atx[469],0,&atx[474]} ,
- {0, "genus" ,128,0,0,0,0,0,0,0,NULL,&atx[2],NULL,0,&atx[470]} ,
- {0, "species" ,128,1,0,1,0,0,0,0,NULL,&atx[2],NULL,0,&atx[471]} ,
- {0, "subspecies" ,128,2,0,1,0,0,0,0,NULL,&atx[2],NULL,0,NULL} ,
- {0, "virus" ,128,1,0,0,0,0,0,0,NULL,&atx[2],NULL,0,&atx[473]} ,
- {0, "hybrid" ,128,2,0,0,0,0,0,0,NULL,&atx[474],NULL,0,&atx[476]} ,
- {405, "MultiOrgName" ,1,0,0,0,0,0,0,0,NULL,&atx[66],&atx[475],0,&atx[478]} ,
- {0, NULL,1,-1,0,0,0,0,0,0,NULL,&atx[465],NULL,0,NULL} ,
- {0, "namedhybrid" ,128,3,0,0,0,0,0,0,NULL,&atx[468],NULL,0,&atx[477]} ,
- {0, "partial" ,128,4,0,0,0,0,0,0,NULL,&atx[478],NULL,0,NULL} ,
- {406, "PartialOrgName" ,1,0,0,0,0,0,0,0,NULL,&atx[66],&atx[479],0,&atx[487]} ,
- {0, NULL,1,-1,0,0,0,0,0,0,NULL,&atx[480],NULL,0,NULL} ,
- {408, "TaxElement" ,1,0,0,0,0,0,0,0,NULL,&atx[13],&atx[481],0,NULL} ,
- {0, "fixed-level" ,128,0,0,0,0,0,0,0,NULL,&atx[6],&avnx[125],0,&atx[482]} ,
- {0, "level" ,128,1,0,1,0,0,0,0,NULL,&atx[2],NULL,0,&atx[483]} ,
- {0, "name" ,128,2,0,0,0,0,0,0,NULL,&atx[2],NULL,0,NULL} ,
- {0, "attrib" ,128,1,0,1,0,0,0,0,NULL,&atx[2],NULL,0,&atx[485]} ,
- {0, "mod" ,128,2,0,1,0,0,0,0,NULL,&atx[66],&atx[486],0,&atx[491]} ,
- {0, NULL,1,-1,0,0,0,0,0,0,NULL,&atx[487],NULL,0,NULL} ,
- {407, "OrgMod" ,1,0,0,0,0,0,0,0,NULL,&atx[13],&atx[488],0,&atx[480]} ,
- {0, "subtype" ,128,0,0,0,0,0,0,0,NULL,&atx[6],&avnx[129],0,&atx[489]} ,
- {0, "subname" ,128,1,0,0,0,0,0,0,NULL,&atx[2],NULL,0,&atx[490]} ,
- {0, "attrib" ,128,2,0,1,0,0,0,0,NULL,&atx[2],NULL,0,NULL} ,
- {0, "lineage" ,128,3,0,1,0,0,0,0,NULL,&atx[2],NULL,0,&atx[492]} ,
- {0, "gcode" ,128,4,0,1,0,0,0,0,NULL,&atx[6],NULL,0,&atx[493]} ,
- {0, "mgcode" ,128,5,0,1,0,0,0,0,NULL,&atx[6],NULL,0,&atx[494]} ,
- {0, "div" ,128,6,0,1,0,0,0,0,NULL,&atx[2],NULL,0,NULL} ,
- {0, "comment" ,128,6,0,0,0,0,0,0,NULL,&atx[2],NULL,0,&atx[496]} ,
- {0, "num" ,128,7,0,0,0,0,0,0,NULL,&atx[497],NULL,0,&atx[590]} ,
- {406, "Numbering" ,1,0,0,0,0,1,0,0,NULL,&atx[14],&atx[498],0,&atx[719]} ,
- {0, "cont" ,128,0,0,0,0,0,0,0,NULL,&atx[499],NULL,0,&atx[503]} ,
- {434, "Num-cont" ,1,0,0,0,0,0,0,0,NULL,&atx[13],&atx[500],0,&atx[504]} ,
- {0, "refnum" ,128,0,0,0,1,0,0,0,&avnx[165],&atx[6],NULL,0,&atx[501]} ,
- {0, "has-zero" ,128,1,0,0,1,0,0,0,&avnx[166],&atx[60],NULL,0,&atx[502]} ,
- {0, "ascending" ,128,2,0,0,1,0,0,0,&avnx[167],&atx[60],NULL,0,NULL} ,
- {0, "enum" ,128,1,0,0,0,0,0,0,NULL,&atx[504],NULL,0,&atx[508]} ,
- {435, "Num-enum" ,1,0,0,0,0,0,0,0,NULL,&atx[13],&atx[505],0,&atx[509]} ,
- {0, "num" ,128,0,0,0,0,0,0,0,NULL,&atx[6],NULL,0,&atx[506]} ,
- {0, "names" ,128,1,0,0,0,0,0,0,NULL,&atx[66],&atx[507],0,NULL} ,
+ {418, "Dbtag" ,1,0,0,0,0,0,1,0,NULL,&atx[17],NULL,0,&atx[897]} ,
+ {401, "Annotdesc" ,1,0,0,0,0,1,0,0,NULL,&atx[14],&atx[436],0,&atx[561]} ,
+ {0, "name" ,128,0,0,0,0,0,0,0,NULL,&atx[2],NULL,0,&atx[437]} ,
+ {0, "title" ,128,1,0,0,0,0,0,0,NULL,&atx[2],NULL,0,&atx[438]} ,
+ {0, "comment" ,128,2,0,0,0,0,0,0,NULL,&atx[2],NULL,0,&atx[439]} ,
+ {0, "pub" ,128,3,0,0,0,0,0,0,NULL,&atx[440],NULL,0,&atx[547]} ,
+ {406, "Pubdesc" ,1,0,0,0,0,1,0,0,NULL,&atx[13],&atx[441],0,&atx[984]} ,
+ {0, "pub" ,128,0,0,0,0,0,0,0,NULL,&atx[442],NULL,0,&atx[443]} ,
+ {420, "Pub-equiv" ,1,0,0,0,0,0,1,0,NULL,&atx[325],NULL,0,&atx[578]} ,
+ {0, "name" ,128,1,0,1,0,0,0,0,NULL,&atx[2],NULL,0,&atx[444]} ,
+ {0, "fig" ,128,2,0,1,0,0,0,0,NULL,&atx[2],NULL,0,&atx[445]} ,
+ {0, "num" ,128,3,0,1,0,0,0,0,NULL,&atx[446],NULL,0,&atx[539]} ,
+ {405, "Numbering" ,1,0,0,0,0,1,0,0,NULL,&atx[14],&atx[447],0,&atx[440]} ,
+ {0, "cont" ,128,0,0,0,0,0,0,0,NULL,&atx[448],NULL,0,&atx[452]} ,
+ {435, "Num-cont" ,1,0,0,0,0,0,0,0,NULL,&atx[13],&atx[449],0,&atx[453]} ,
+ {0, "refnum" ,128,0,0,0,1,0,0,0,&avnx[78],&atx[6],NULL,0,&atx[450]} ,
+ {0, "has-zero" ,128,1,0,0,1,0,0,0,&avnx[79],&atx[60],NULL,0,&atx[451]} ,
+ {0, "ascending" ,128,2,0,0,1,0,0,0,&avnx[80],&atx[60],NULL,0,NULL} ,
+ {0, "enum" ,128,1,0,0,0,0,0,0,NULL,&atx[453],NULL,0,&atx[457]} ,
+ {436, "Num-enum" ,1,0,0,0,0,0,0,0,NULL,&atx[13],&atx[454],0,&atx[458]} ,
+ {0, "num" ,128,0,0,0,0,0,0,0,NULL,&atx[6],NULL,0,&atx[455]} ,
+ {0, "names" ,128,1,0,0,0,0,0,0,NULL,&atx[66],&atx[456],0,NULL} ,
{0, NULL,1,-1,0,0,0,0,0,0,NULL,&atx[2],NULL,0,NULL} ,
- {0, "ref" ,128,2,0,0,0,0,0,0,NULL,&atx[509],NULL,0,&atx[585]} ,
- {436, "Num-ref" ,1,0,0,0,0,0,0,0,NULL,&atx[13],&atx[510],0,&atx[586]} ,
- {0, "type" ,128,0,0,0,0,0,0,0,NULL,&atx[42],&avnx[168],0,&atx[511]} ,
- {0, "aligns" ,128,1,0,1,0,0,0,0,NULL,&atx[512],NULL,0,NULL} ,
- {416, "Seq-align" ,1,0,0,0,0,0,1,0,NULL,&atx[513],NULL,0,&atx[782]} ,
- {401, "Seq-align" ,1,0,0,0,0,1,0,0,NULL,&atx[13],&atx[514],0,&atx[518]} ,
- {0, "type" ,128,0,0,0,0,0,0,0,NULL,&atx[42],&avnx[171],0,&atx[515]} ,
- {0, "dim" ,128,1,0,1,0,0,0,0,NULL,&atx[6],NULL,0,&atx[516]} ,
- {0, "score" ,128,2,0,1,0,0,0,0,NULL,&atx[45],&atx[517],0,&atx[524]} ,
- {0, NULL,1,-1,0,0,0,0,0,0,NULL,&atx[518],NULL,0,NULL} ,
- {402, "Score" ,1,0,0,0,0,1,0,0,NULL,&atx[13],&atx[519],0,&atx[1088]} ,
- {0, "id" ,128,0,0,1,0,0,0,0,NULL,&atx[520],NULL,0,&atx[521]} ,
- {408, "Object-id" ,1,0,0,0,0,0,1,0,NULL,&atx[20],NULL,0,&atx[527]} ,
- {0, "value" ,128,1,0,0,0,0,0,0,NULL,&atx[14],&atx[522],0,NULL} ,
- {0, "real" ,128,0,0,0,0,0,0,0,NULL,&atx[58],NULL,0,&atx[523]} ,
+ {0, "ref" ,128,2,0,0,0,0,0,0,NULL,&atx[458],NULL,0,&atx[534]} ,
+ {437, "Num-ref" ,1,0,0,0,0,0,0,0,NULL,&atx[13],&atx[459],0,&atx[535]} ,
+ {0, "type" ,128,0,0,0,0,0,0,0,NULL,&atx[42],&avnx[81],0,&atx[460]} ,
+ {0, "aligns" ,128,1,0,1,0,0,0,0,NULL,&atx[461],NULL,0,NULL} ,
+ {417, "Seq-align" ,1,0,0,0,0,0,1,0,NULL,&atx[462],NULL,0,&atx[797]} ,
+ {401, "Seq-align" ,1,0,0,0,0,1,0,0,NULL,&atx[13],&atx[463],0,&atx[467]} ,
+ {0, "type" ,128,0,0,0,0,0,0,0,NULL,&atx[42],&avnx[84],0,&atx[464]} ,
+ {0, "dim" ,128,1,0,1,0,0,0,0,NULL,&atx[6],NULL,0,&atx[465]} ,
+ {0, "score" ,128,2,0,1,0,0,0,0,NULL,&atx[45],&atx[466],0,&atx[473]} ,
+ {0, NULL,1,-1,0,0,0,0,0,0,NULL,&atx[467],NULL,0,NULL} ,
+ {402, "Score" ,1,0,0,0,0,1,0,0,NULL,&atx[13],&atx[468],0,&atx[1088]} ,
+ {0, "id" ,128,0,0,1,0,0,0,0,NULL,&atx[469],NULL,0,&atx[470]} ,
+ {408, "Object-id" ,1,0,0,0,0,0,1,0,NULL,&atx[20],NULL,0,&atx[476]} ,
+ {0, "value" ,128,1,0,0,0,0,0,0,NULL,&atx[14],&atx[471],0,NULL} ,
+ {0, "real" ,128,0,0,0,0,0,0,0,NULL,&atx[58],NULL,0,&atx[472]} ,
{0, "int" ,128,1,0,0,0,0,0,0,NULL,&atx[6],NULL,0,NULL} ,
- {0, "segs" ,128,3,0,0,0,0,0,0,NULL,&atx[14],&atx[525],0,&atx[583]} ,
- {0, "dendiag" ,128,0,0,0,0,0,0,0,NULL,&atx[66],&atx[526],0,&atx[540]} ,
- {0, NULL,1,-1,0,0,0,0,0,0,NULL,&atx[527],NULL,0,NULL} ,
- {409, "Dense-diag" ,1,0,0,0,0,0,0,0,NULL,&atx[13],&atx[528],0,&atx[541]} ,
- {0, "dim" ,128,0,0,0,1,0,0,0,&avnx[177],&atx[6],NULL,0,&atx[529]} ,
- {0, "ids" ,128,1,0,0,0,0,0,0,NULL,&atx[66],&atx[530],0,&atx[532]} ,
- {0, NULL,1,-1,0,0,0,0,0,0,NULL,&atx[531],NULL,0,NULL} ,
- {405, "Seq-id" ,1,0,0,0,0,0,1,0,NULL,&atx[344],NULL,0,&atx[562]} ,
- {0, "starts" ,128,2,0,0,0,0,0,0,NULL,&atx[66],&atx[533],0,&atx[534]} ,
+ {0, "segs" ,128,3,0,0,0,0,0,0,NULL,&atx[14],&atx[474],0,&atx[532]} ,
+ {0, "dendiag" ,128,0,0,0,0,0,0,0,NULL,&atx[66],&atx[475],0,&atx[489]} ,
+ {0, NULL,1,-1,0,0,0,0,0,0,NULL,&atx[476],NULL,0,NULL} ,
+ {409, "Dense-diag" ,1,0,0,0,0,0,0,0,NULL,&atx[13],&atx[477],0,&atx[490]} ,
+ {0, "dim" ,128,0,0,0,1,0,0,0,&avnx[90],&atx[6],NULL,0,&atx[478]} ,
+ {0, "ids" ,128,1,0,0,0,0,0,0,NULL,&atx[66],&atx[479],0,&atx[481]} ,
+ {0, NULL,1,-1,0,0,0,0,0,0,NULL,&atx[480],NULL,0,NULL} ,
+ {405, "Seq-id" ,1,0,0,0,0,0,1,0,NULL,&atx[344],NULL,0,&atx[511]} ,
+ {0, "starts" ,128,2,0,0,0,0,0,0,NULL,&atx[66],&atx[482],0,&atx[483]} ,
{0, NULL,1,-1,0,0,0,0,0,0,NULL,&atx[6],NULL,0,NULL} ,
- {0, "len" ,128,3,0,0,0,0,0,0,NULL,&atx[6],NULL,0,&atx[535]} ,
- {0, "strands" ,128,4,0,1,0,0,0,0,NULL,&atx[66],&atx[536],0,&atx[538]} ,
- {0, NULL,1,-1,0,0,0,0,0,0,NULL,&atx[537],NULL,0,NULL} ,
- {407, "Na-strand" ,1,0,0,0,0,0,1,0,NULL,&atx[394],NULL,0,&atx[520]} ,
- {0, "scores" ,128,5,0,1,0,0,0,0,NULL,&atx[45],&atx[539],0,NULL} ,
- {0, NULL,1,-1,0,0,0,0,0,0,NULL,&atx[518],NULL,0,NULL} ,
- {0, "denseg" ,128,1,0,0,0,0,0,0,NULL,&atx[541],NULL,0,&atx[554]} ,
- {410, "Dense-seg" ,1,0,0,0,0,0,0,0,NULL,&atx[13],&atx[542],0,&atx[556]} ,
- {0, "dim" ,128,0,0,0,1,0,0,0,&avnx[178],&atx[6],NULL,0,&atx[543]} ,
- {0, "numseg" ,128,1,0,0,0,0,0,0,NULL,&atx[6],NULL,0,&atx[544]} ,
- {0, "ids" ,128,2,0,0,0,0,0,0,NULL,&atx[66],&atx[545],0,&atx[546]} ,
- {0, NULL,1,-1,0,0,0,0,0,0,NULL,&atx[531],NULL,0,NULL} ,
- {0, "starts" ,128,3,0,0,0,0,0,0,NULL,&atx[66],&atx[547],0,&atx[548]} ,
+ {0, "len" ,128,3,0,0,0,0,0,0,NULL,&atx[6],NULL,0,&atx[484]} ,
+ {0, "strands" ,128,4,0,1,0,0,0,0,NULL,&atx[66],&atx[485],0,&atx[487]} ,
+ {0, NULL,1,-1,0,0,0,0,0,0,NULL,&atx[486],NULL,0,NULL} ,
+ {407, "Na-strand" ,1,0,0,0,0,0,1,0,NULL,&atx[394],NULL,0,&atx[469]} ,
+ {0, "scores" ,128,5,0,1,0,0,0,0,NULL,&atx[45],&atx[488],0,NULL} ,
+ {0, NULL,1,-1,0,0,0,0,0,0,NULL,&atx[467],NULL,0,NULL} ,
+ {0, "denseg" ,128,1,0,0,0,0,0,0,NULL,&atx[490],NULL,0,&atx[503]} ,
+ {410, "Dense-seg" ,1,0,0,0,0,0,0,0,NULL,&atx[13],&atx[491],0,&atx[505]} ,
+ {0, "dim" ,128,0,0,0,1,0,0,0,&avnx[91],&atx[6],NULL,0,&atx[492]} ,
+ {0, "numseg" ,128,1,0,0,0,0,0,0,NULL,&atx[6],NULL,0,&atx[493]} ,
+ {0, "ids" ,128,2,0,0,0,0,0,0,NULL,&atx[66],&atx[494],0,&atx[495]} ,
+ {0, NULL,1,-1,0,0,0,0,0,0,NULL,&atx[480],NULL,0,NULL} ,
+ {0, "starts" ,128,3,0,0,0,0,0,0,NULL,&atx[66],&atx[496],0,&atx[497]} ,
{0, NULL,1,-1,0,0,0,0,0,0,NULL,&atx[6],NULL,0,NULL} ,
- {0, "lens" ,128,4,0,0,0,0,0,0,NULL,&atx[66],&atx[549],0,&atx[550]} ,
+ {0, "lens" ,128,4,0,0,0,0,0,0,NULL,&atx[66],&atx[498],0,&atx[499]} ,
{0, NULL,1,-1,0,0,0,0,0,0,NULL,&atx[6],NULL,0,NULL} ,
- {0, "strands" ,128,5,0,1,0,0,0,0,NULL,&atx[66],&atx[551],0,&atx[552]} ,
- {0, NULL,1,-1,0,0,0,0,0,0,NULL,&atx[537],NULL,0,NULL} ,
- {0, "scores" ,128,6,0,1,0,0,0,0,NULL,&atx[66],&atx[553],0,NULL} ,
- {0, NULL,1,-1,0,0,0,0,0,0,NULL,&atx[518],NULL,0,NULL} ,
- {0, "std" ,128,2,0,0,0,0,0,0,NULL,&atx[66],&atx[555],0,&atx[565]} ,
- {0, NULL,1,-1,0,0,0,0,0,0,NULL,&atx[556],NULL,0,NULL} ,
- {411, "Std-seg" ,1,0,0,0,0,0,0,0,NULL,&atx[13],&atx[557],0,&atx[566]} ,
- {0, "dim" ,128,0,0,0,1,0,0,0,&avnx[179],&atx[6],NULL,0,&atx[558]} ,
- {0, "ids" ,128,1,0,1,0,0,0,0,NULL,&atx[66],&atx[559],0,&atx[560]} ,
- {0, NULL,1,-1,0,0,0,0,0,0,NULL,&atx[531],NULL,0,NULL} ,
- {0, "loc" ,128,2,0,0,0,0,0,0,NULL,&atx[66],&atx[561],0,&atx[563]} ,
- {0, NULL,1,-1,0,0,0,0,0,0,NULL,&atx[562],NULL,0,NULL} ,
- {406, "Seq-loc" ,1,0,0,0,0,0,1,0,NULL,&atx[384],NULL,0,&atx[537]} ,
- {0, "scores" ,128,3,0,1,0,0,0,0,NULL,&atx[45],&atx[564],0,NULL} ,
- {0, NULL,1,-1,0,0,0,0,0,0,NULL,&atx[518],NULL,0,NULL} ,
- {0, "packed" ,128,3,0,0,0,0,0,0,NULL,&atx[566],NULL,0,&atx[580]} ,
- {412, "Packed-seg" ,1,0,0,0,0,0,0,0,NULL,&atx[13],&atx[567],0,NULL} ,
- {0, "dim" ,128,0,0,0,1,0,0,0,&avnx[180],&atx[6],NULL,0,&atx[568]} ,
- {0, "numseg" ,128,1,0,0,0,0,0,0,NULL,&atx[6],NULL,0,&atx[569]} ,
- {0, "ids" ,128,2,0,0,0,0,0,0,NULL,&atx[66],&atx[570],0,&atx[571]} ,
- {0, NULL,1,-1,0,0,0,0,0,0,NULL,&atx[531],NULL,0,NULL} ,
- {0, "starts" ,128,3,0,0,0,0,0,0,NULL,&atx[66],&atx[572],0,&atx[573]} ,
+ {0, "strands" ,128,5,0,1,0,0,0,0,NULL,&atx[66],&atx[500],0,&atx[501]} ,
+ {0, NULL,1,-1,0,0,0,0,0,0,NULL,&atx[486],NULL,0,NULL} ,
+ {0, "scores" ,128,6,0,1,0,0,0,0,NULL,&atx[66],&atx[502],0,NULL} ,
+ {0, NULL,1,-1,0,0,0,0,0,0,NULL,&atx[467],NULL,0,NULL} ,
+ {0, "std" ,128,2,0,0,0,0,0,0,NULL,&atx[66],&atx[504],0,&atx[514]} ,
+ {0, NULL,1,-1,0,0,0,0,0,0,NULL,&atx[505],NULL,0,NULL} ,
+ {411, "Std-seg" ,1,0,0,0,0,0,0,0,NULL,&atx[13],&atx[506],0,&atx[515]} ,
+ {0, "dim" ,128,0,0,0,1,0,0,0,&avnx[92],&atx[6],NULL,0,&atx[507]} ,
+ {0, "ids" ,128,1,0,1,0,0,0,0,NULL,&atx[66],&atx[508],0,&atx[509]} ,
+ {0, NULL,1,-1,0,0,0,0,0,0,NULL,&atx[480],NULL,0,NULL} ,
+ {0, "loc" ,128,2,0,0,0,0,0,0,NULL,&atx[66],&atx[510],0,&atx[512]} ,
+ {0, NULL,1,-1,0,0,0,0,0,0,NULL,&atx[511],NULL,0,NULL} ,
+ {406, "Seq-loc" ,1,0,0,0,0,0,1,0,NULL,&atx[384],NULL,0,&atx[486]} ,
+ {0, "scores" ,128,3,0,1,0,0,0,0,NULL,&atx[45],&atx[513],0,NULL} ,
+ {0, NULL,1,-1,0,0,0,0,0,0,NULL,&atx[467],NULL,0,NULL} ,
+ {0, "packed" ,128,3,0,0,0,0,0,0,NULL,&atx[515],NULL,0,&atx[529]} ,
+ {412, "Packed-seg" ,1,0,0,0,0,0,0,0,NULL,&atx[13],&atx[516],0,NULL} ,
+ {0, "dim" ,128,0,0,0,1,0,0,0,&avnx[93],&atx[6],NULL,0,&atx[517]} ,
+ {0, "numseg" ,128,1,0,0,0,0,0,0,NULL,&atx[6],NULL,0,&atx[518]} ,
+ {0, "ids" ,128,2,0,0,0,0,0,0,NULL,&atx[66],&atx[519],0,&atx[520]} ,
+ {0, NULL,1,-1,0,0,0,0,0,0,NULL,&atx[480],NULL,0,NULL} ,
+ {0, "starts" ,128,3,0,0,0,0,0,0,NULL,&atx[66],&atx[521],0,&atx[522]} ,
{0, NULL,1,-1,0,0,0,0,0,0,NULL,&atx[6],NULL,0,NULL} ,
- {0, "present" ,128,4,0,0,0,0,0,0,NULL,&atx[62],NULL,0,&atx[574]} ,
- {0, "lens" ,128,5,0,0,0,0,0,0,NULL,&atx[66],&atx[575],0,&atx[576]} ,
+ {0, "present" ,128,4,0,0,0,0,0,0,NULL,&atx[62],NULL,0,&atx[523]} ,
+ {0, "lens" ,128,5,0,0,0,0,0,0,NULL,&atx[66],&atx[524],0,&atx[525]} ,
{0, NULL,1,-1,0,0,0,0,0,0,NULL,&atx[6],NULL,0,NULL} ,
- {0, "strands" ,128,6,0,1,0,0,0,0,NULL,&atx[66],&atx[577],0,&atx[578]} ,
- {0, NULL,1,-1,0,0,0,0,0,0,NULL,&atx[537],NULL,0,NULL} ,
- {0, "scores" ,128,7,0,1,0,0,0,0,NULL,&atx[66],&atx[579],0,NULL} ,
- {0, NULL,1,-1,0,0,0,0,0,0,NULL,&atx[518],NULL,0,NULL} ,
- {0, "disc" ,128,4,0,0,0,0,0,0,NULL,&atx[581],NULL,0,NULL} ,
- {404, "Seq-align-set" ,1,0,0,0,0,1,0,0,NULL,&atx[45],&atx[582],0,&atx[531]} ,
- {0, NULL,1,-1,0,0,0,0,0,0,NULL,&atx[513],NULL,0,NULL} ,
- {0, "bounds" ,128,4,0,1,0,0,0,0,NULL,&atx[45],&atx[584],0,NULL} ,
- {0, NULL,1,-1,0,0,0,0,0,0,NULL,&atx[562],NULL,0,NULL} ,
- {0, "real" ,128,3,0,0,0,0,0,0,NULL,&atx[586],NULL,0,NULL} ,
- {437, "Num-real" ,1,0,0,0,0,0,0,0,NULL,&atx[13],&atx[587],0,&atx[750]} ,
- {0, "a" ,128,0,0,0,0,0,0,0,NULL,&atx[58],NULL,0,&atx[588]} ,
- {0, "b" ,128,1,0,0,0,0,0,0,NULL,&atx[58],NULL,0,&atx[589]} ,
+ {0, "strands" ,128,6,0,1,0,0,0,0,NULL,&atx[66],&atx[526],0,&atx[527]} ,
+ {0, NULL,1,-1,0,0,0,0,0,0,NULL,&atx[486],NULL,0,NULL} ,
+ {0, "scores" ,128,7,0,1,0,0,0,0,NULL,&atx[66],&atx[528],0,NULL} ,
+ {0, NULL,1,-1,0,0,0,0,0,0,NULL,&atx[467],NULL,0,NULL} ,
+ {0, "disc" ,128,4,0,0,0,0,0,0,NULL,&atx[530],NULL,0,NULL} ,
+ {404, "Seq-align-set" ,1,0,0,0,0,1,0,0,NULL,&atx[45],&atx[531],0,&atx[480]} ,
+ {0, NULL,1,-1,0,0,0,0,0,0,NULL,&atx[462],NULL,0,NULL} ,
+ {0, "bounds" ,128,4,0,1,0,0,0,0,NULL,&atx[45],&atx[533],0,NULL} ,
+ {0, NULL,1,-1,0,0,0,0,0,0,NULL,&atx[511],NULL,0,NULL} ,
+ {0, "real" ,128,3,0,0,0,0,0,0,NULL,&atx[535],NULL,0,NULL} ,
+ {438, "Num-real" ,1,0,0,0,0,0,0,0,NULL,&atx[13],&atx[536],0,&atx[766]} ,
+ {0, "a" ,128,0,0,0,0,0,0,0,NULL,&atx[58],NULL,0,&atx[537]} ,
+ {0, "b" ,128,1,0,0,0,0,0,0,NULL,&atx[58],NULL,0,&atx[538]} ,
{0, "units" ,128,2,0,1,0,0,0,0,NULL,&atx[2],NULL,0,NULL} ,
- {0, "maploc" ,128,8,0,0,0,0,0,0,NULL,&atx[591],NULL,0,&atx[592]} ,
- {413, "Dbtag" ,1,0,0,0,0,0,1,0,NULL,&atx[17],NULL,0,&atx[974]} ,
- {0, "pir" ,128,9,0,0,0,0,0,0,NULL,&atx[593],NULL,0,&atx[611]} ,
- {425, "PIR-block" ,1,0,0,0,0,0,1,0,NULL,&atx[594],NULL,0,&atx[666]} ,
- {401, "PIR-block" ,1,0,0,0,0,1,0,0,NULL,&atx[13],&atx[595],0,&atx[610]} ,
- {0, "had-punct" ,128,0,0,1,0,0,0,0,NULL,&atx[60],NULL,0,&atx[596]} ,
- {0, "host" ,128,1,0,1,0,0,0,0,NULL,&atx[2],NULL,0,&atx[597]} ,
- {0, "source" ,128,2,0,1,0,0,0,0,NULL,&atx[2],NULL,0,&atx[598]} ,
- {0, "summary" ,128,3,0,1,0,0,0,0,NULL,&atx[2],NULL,0,&atx[599]} ,
- {0, "genetic" ,128,4,0,1,0,0,0,0,NULL,&atx[2],NULL,0,&atx[600]} ,
- {0, "includes" ,128,5,0,1,0,0,0,0,NULL,&atx[2],NULL,0,&atx[601]} ,
- {0, "placement" ,128,6,0,1,0,0,0,0,NULL,&atx[2],NULL,0,&atx[602]} ,
- {0, "superfamily" ,128,7,0,1,0,0,0,0,NULL,&atx[2],NULL,0,&atx[603]} ,
- {0, "keywords" ,128,8,0,1,0,0,0,0,NULL,&atx[66],&atx[604],0,&atx[605]} ,
+ {0, "numexc" ,128,4,0,1,0,0,0,0,NULL,&atx[60],NULL,0,&atx[540]} ,
+ {0, "poly-a" ,128,5,0,1,0,0,0,0,NULL,&atx[60],NULL,0,&atx[541]} ,
+ {0, "maploc" ,128,6,0,1,0,0,0,0,NULL,&atx[2],NULL,0,&atx[542]} ,
+ {0, "seq-raw" ,128,7,0,1,0,0,0,0,NULL,&atx[543],NULL,0,&atx[544]} ,
+ {351, "StringStore" ,64,1,0,0,0,0,0,0,NULL,NULL,NULL,0,NULL} ,
+ {0, "align-group" ,128,8,0,1,0,0,0,0,NULL,&atx[6],NULL,0,&atx[545]} ,
+ {0, "comment" ,128,9,0,1,0,0,0,0,NULL,&atx[2],NULL,0,&atx[546]} ,
+ {0, "reftype" ,128,10,0,0,1,0,0,0,&avnx[98],&atx[6],&avnx[94],0,NULL} ,
+ {0, "user" ,128,4,0,0,0,0,0,0,NULL,&atx[548],NULL,0,&atx[549]} ,
+ {416, "User-object" ,1,0,0,0,0,0,1,0,NULL,&atx[46],NULL,0,&atx[461]} ,
+ {0, "create-date" ,128,5,0,0,0,0,0,0,NULL,&atx[550],NULL,0,&atx[551]} ,
+ {412, "Date" ,1,0,0,0,0,0,1,0,NULL,&atx[0],NULL,0,&atx[762]} ,
+ {0, "update-date" ,128,6,0,0,0,0,0,0,NULL,&atx[550],NULL,0,&atx[552]} ,
+ {0, "src" ,128,7,0,0,0,0,0,0,NULL,&atx[553],NULL,0,&atx[554]} ,
+ {423, "Seq-id" ,1,0,0,0,0,0,1,0,NULL,&atx[344],NULL,0,&atx[560]} ,
+ {0, "align" ,128,8,0,0,0,0,0,0,NULL,&atx[555],NULL,0,&atx[559]} ,
+ {459, "Align-def" ,1,0,0,0,0,0,0,0,NULL,&atx[13],&atx[556],0,NULL} ,
+ {0, "align-type" ,128,0,0,0,0,0,0,0,NULL,&atx[6],&avnx[99],0,&atx[557]} ,
+ {0, "ids" ,128,1,0,1,0,0,0,0,NULL,&atx[45],&atx[558],0,NULL} ,
+ {0, NULL,1,-1,0,0,0,0,0,0,NULL,&atx[553],NULL,0,NULL} ,
+ {0, "region" ,128,9,0,0,0,0,0,0,NULL,&atx[560],NULL,0,NULL} ,
+ {424, "Seq-loc" ,1,0,0,0,0,0,1,0,NULL,&atx[384],NULL,0,&atx[644]} ,
+ {402, "Bioseq" ,1,0,0,0,0,1,0,0,NULL,&atx[13],&atx[562],0,&atx[569]} ,
+ {0, "id" ,128,0,0,0,0,0,0,0,NULL,&atx[45],&atx[563],0,&atx[564]} ,
+ {0, NULL,1,-1,0,0,0,0,0,0,NULL,&atx[553],NULL,0,NULL} ,
+ {0, "descr" ,128,1,0,1,0,0,0,0,NULL,&atx[565],NULL,0,&atx[756]} ,
+ {408, "Seq-descr" ,1,0,0,0,0,1,0,0,NULL,&atx[45],&atx[566],0,&atx[970]} ,
+ {0, NULL,1,-1,0,0,0,0,0,0,NULL,&atx[567],NULL,0,NULL} ,
+ {411, "Seqdesc" ,1,0,0,0,0,1,0,0,NULL,&atx[14],&atx[568],0,&atx[550]} ,
+ {0, "mol-type" ,128,0,0,0,0,0,0,0,NULL,&atx[569],NULL,0,&atx[570]} ,
+ {403, "GIBB-mol" ,1,0,0,0,0,1,0,0,NULL,&atx[42],&avnx[103],0,&atx[735]} ,
+ {0, "modif" ,128,1,0,0,0,0,0,0,NULL,&atx[45],&atx[571],0,&atx[573]} ,
+ {0, NULL,1,-1,0,0,0,0,0,0,NULL,&atx[572],NULL,0,NULL} ,
+ {432, "GIBB-mod" ,1,0,0,0,0,0,0,0,NULL,&atx[42],&avnx[115],0,&atx[574]} ,
+ {0, "method" ,128,2,0,0,0,0,0,0,NULL,&atx[574],NULL,0,&atx[575]} ,
+ {433, "GIBB-method" ,1,0,0,0,0,0,0,0,NULL,&atx[42],&avnx[143],0,&atx[751]} ,
+ {0, "name" ,128,3,0,0,0,0,0,0,NULL,&atx[2],NULL,0,&atx[576]} ,
+ {0, "title" ,128,4,0,0,0,0,0,0,NULL,&atx[2],NULL,0,&atx[577]} ,
+ {0, "org" ,128,5,0,0,0,0,0,0,NULL,&atx[578],NULL,0,&atx[620]} ,
+ {421, "Org-ref" ,1,0,0,0,0,0,1,0,NULL,&atx[579],NULL,0,&atx[737]} ,
+ {401, "Org-ref" ,1,0,0,0,0,1,0,0,NULL,&atx[13],&atx[580],0,&atx[586]} ,
+ {0, "taxname" ,128,0,0,1,0,0,0,0,NULL,&atx[2],NULL,0,&atx[581]} ,
+ {0, "common" ,128,1,0,1,0,0,0,0,NULL,&atx[2],NULL,0,&atx[582]} ,
+ {0, "mod" ,128,2,0,1,0,0,0,0,NULL,&atx[45],&atx[583],0,&atx[584]} ,
+ {0, NULL,1,-1,0,0,0,0,0,0,NULL,&atx[2],NULL,0,NULL} ,
+ {0, "db" ,128,3,0,1,0,0,0,0,NULL,&atx[45],&atx[585],0,&atx[587]} ,
+ {0, NULL,1,-1,0,0,0,0,0,0,NULL,&atx[586],NULL,0,NULL} ,
+ {402, "Dbtag" ,1,0,0,0,0,0,1,0,NULL,&atx[17],NULL,0,&atx[590]} ,
+ {0, "syn" ,128,4,0,1,0,0,0,0,NULL,&atx[45],&atx[588],0,&atx[589]} ,
+ {0, NULL,1,-1,0,0,0,0,0,0,NULL,&atx[2],NULL,0,NULL} ,
+ {0, "orgname" ,128,5,0,1,0,0,0,0,NULL,&atx[590],NULL,0,NULL} ,
+ {403, "OrgName" ,1,0,0,0,0,0,0,0,NULL,&atx[13],&atx[591],0,&atx[593]} ,
+ {0, "name" ,128,0,0,1,0,0,0,0,NULL,&atx[14],&atx[592],0,&atx[609]} ,
+ {0, "binomial" ,128,0,0,0,0,0,0,0,NULL,&atx[593],NULL,0,&atx[597]} ,
+ {404, "BinomialOrgName" ,1,0,0,0,0,0,0,0,NULL,&atx[13],&atx[594],0,&atx[599]} ,
+ {0, "genus" ,128,0,0,0,0,0,0,0,NULL,&atx[2],NULL,0,&atx[595]} ,
+ {0, "species" ,128,1,0,1,0,0,0,0,NULL,&atx[2],NULL,0,&atx[596]} ,
+ {0, "subspecies" ,128,2,0,1,0,0,0,0,NULL,&atx[2],NULL,0,NULL} ,
+ {0, "virus" ,128,1,0,0,0,0,0,0,NULL,&atx[2],NULL,0,&atx[598]} ,
+ {0, "hybrid" ,128,2,0,0,0,0,0,0,NULL,&atx[599],NULL,0,&atx[601]} ,
+ {405, "MultiOrgName" ,1,0,0,0,0,0,0,0,NULL,&atx[66],&atx[600],0,&atx[603]} ,
+ {0, NULL,1,-1,0,0,0,0,0,0,NULL,&atx[590],NULL,0,NULL} ,
+ {0, "namedhybrid" ,128,3,0,0,0,0,0,0,NULL,&atx[593],NULL,0,&atx[602]} ,
+ {0, "partial" ,128,4,0,0,0,0,0,0,NULL,&atx[603],NULL,0,NULL} ,
+ {406, "PartialOrgName" ,1,0,0,0,0,0,0,0,NULL,&atx[66],&atx[604],0,&atx[612]} ,
+ {0, NULL,1,-1,0,0,0,0,0,0,NULL,&atx[605],NULL,0,NULL} ,
+ {408, "TaxElement" ,1,0,0,0,0,0,0,0,NULL,&atx[13],&atx[606],0,NULL} ,
+ {0, "fixed-level" ,128,0,0,0,0,0,0,0,NULL,&atx[6],&avnx[150],0,&atx[607]} ,
+ {0, "level" ,128,1,0,1,0,0,0,0,NULL,&atx[2],NULL,0,&atx[608]} ,
+ {0, "name" ,128,2,0,0,0,0,0,0,NULL,&atx[2],NULL,0,NULL} ,
+ {0, "attrib" ,128,1,0,1,0,0,0,0,NULL,&atx[2],NULL,0,&atx[610]} ,
+ {0, "mod" ,128,2,0,1,0,0,0,0,NULL,&atx[66],&atx[611],0,&atx[616]} ,
+ {0, NULL,1,-1,0,0,0,0,0,0,NULL,&atx[612],NULL,0,NULL} ,
+ {407, "OrgMod" ,1,0,0,0,0,0,0,0,NULL,&atx[13],&atx[613],0,&atx[605]} ,
+ {0, "subtype" ,128,0,0,0,0,0,0,0,NULL,&atx[6],&avnx[154],0,&atx[614]} ,
+ {0, "subname" ,128,1,0,0,0,0,0,0,NULL,&atx[2],NULL,0,&atx[615]} ,
+ {0, "attrib" ,128,2,0,1,0,0,0,0,NULL,&atx[2],NULL,0,NULL} ,
+ {0, "lineage" ,128,3,0,1,0,0,0,0,NULL,&atx[2],NULL,0,&atx[617]} ,
+ {0, "gcode" ,128,4,0,1,0,0,0,0,NULL,&atx[6],NULL,0,&atx[618]} ,
+ {0, "mgcode" ,128,5,0,1,0,0,0,0,NULL,&atx[6],NULL,0,&atx[619]} ,
+ {0, "div" ,128,6,0,1,0,0,0,0,NULL,&atx[2],NULL,0,NULL} ,
+ {0, "comment" ,128,6,0,0,0,0,0,0,NULL,&atx[2],NULL,0,&atx[621]} ,
+ {0, "num" ,128,7,0,0,0,0,0,0,NULL,&atx[446],NULL,0,&atx[622]} ,
+ {0, "maploc" ,128,8,0,0,0,0,0,0,NULL,&atx[623],NULL,0,&atx[624]} ,
+ {414, "Dbtag" ,1,0,0,0,0,0,1,0,NULL,&atx[17],NULL,0,&atx[989]} ,
+ {0, "pir" ,128,9,0,0,0,0,0,0,NULL,&atx[625],NULL,0,&atx[643]} ,
+ {426, "PIR-block" ,1,0,0,0,0,0,1,0,NULL,&atx[626],NULL,0,&atx[683]} ,
+ {401, "PIR-block" ,1,0,0,0,0,1,0,0,NULL,&atx[13],&atx[627],0,&atx[642]} ,
+ {0, "had-punct" ,128,0,0,1,0,0,0,0,NULL,&atx[60],NULL,0,&atx[628]} ,
+ {0, "host" ,128,1,0,1,0,0,0,0,NULL,&atx[2],NULL,0,&atx[629]} ,
+ {0, "source" ,128,2,0,1,0,0,0,0,NULL,&atx[2],NULL,0,&atx[630]} ,
+ {0, "summary" ,128,3,0,1,0,0,0,0,NULL,&atx[2],NULL,0,&atx[631]} ,
+ {0, "genetic" ,128,4,0,1,0,0,0,0,NULL,&atx[2],NULL,0,&atx[632]} ,
+ {0, "includes" ,128,5,0,1,0,0,0,0,NULL,&atx[2],NULL,0,&atx[633]} ,
+ {0, "placement" ,128,6,0,1,0,0,0,0,NULL,&atx[2],NULL,0,&atx[634]} ,
+ {0, "superfamily" ,128,7,0,1,0,0,0,0,NULL,&atx[2],NULL,0,&atx[635]} ,
+ {0, "keywords" ,128,8,0,1,0,0,0,0,NULL,&atx[66],&atx[636],0,&atx[637]} ,
{0, NULL,1,-1,0,0,0,0,0,0,NULL,&atx[2],NULL,0,NULL} ,
- {0, "cross-reference" ,128,9,0,1,0,0,0,0,NULL,&atx[2],NULL,0,&atx[606]} ,
- {0, "date" ,128,10,0,1,0,0,0,0,NULL,&atx[2],NULL,0,&atx[607]} ,
- {0, "seq-raw" ,128,11,0,1,0,0,0,0,NULL,&atx[2],NULL,0,&atx[608]} ,
- {0, "seqref" ,128,12,0,1,0,0,0,0,NULL,&atx[45],&atx[609],0,NULL} ,
- {0, NULL,1,-1,0,0,0,0,0,0,NULL,&atx[610],NULL,0,NULL} ,
+ {0, "cross-reference" ,128,9,0,1,0,0,0,0,NULL,&atx[2],NULL,0,&atx[638]} ,
+ {0, "date" ,128,10,0,1,0,0,0,0,NULL,&atx[2],NULL,0,&atx[639]} ,
+ {0, "seq-raw" ,128,11,0,1,0,0,0,0,NULL,&atx[2],NULL,0,&atx[640]} ,
+ {0, "seqref" ,128,12,0,1,0,0,0,0,NULL,&atx[45],&atx[641],0,NULL} ,
+ {0, NULL,1,-1,0,0,0,0,0,0,NULL,&atx[642],NULL,0,NULL} ,
{402, "Seq-id" ,1,0,0,0,0,0,1,0,NULL,&atx[344],NULL,0,NULL} ,
- {0, "genbank" ,128,10,0,0,0,0,0,0,NULL,&atx[612],NULL,0,&atx[625]} ,
- {424, "GB-block" ,1,0,0,0,0,0,1,0,NULL,&atx[613],NULL,0,&atx[593]} ,
- {401, "GB-block" ,1,0,0,0,0,1,0,0,NULL,&atx[13],&atx[614],0,&atx[622]} ,
- {0, "extra-accessions" ,128,0,0,1,0,0,0,0,NULL,&atx[66],&atx[615],0,&atx[616]} ,
+ {0, "genbank" ,128,10,0,0,0,0,0,0,NULL,&atx[644],NULL,0,&atx[657]} ,
+ {425, "GB-block" ,1,0,0,0,0,0,1,0,NULL,&atx[645],NULL,0,&atx[625]} ,
+ {401, "GB-block" ,1,0,0,0,0,1,0,0,NULL,&atx[13],&atx[646],0,&atx[654]} ,
+ {0, "extra-accessions" ,128,0,0,1,0,0,0,0,NULL,&atx[66],&atx[647],0,&atx[648]} ,
{0, NULL,1,-1,0,0,0,0,0,0,NULL,&atx[2],NULL,0,NULL} ,
- {0, "source" ,128,1,0,1,0,0,0,0,NULL,&atx[2],NULL,0,&atx[617]} ,
- {0, "keywords" ,128,2,0,1,0,0,0,0,NULL,&atx[66],&atx[618],0,&atx[619]} ,
+ {0, "source" ,128,1,0,1,0,0,0,0,NULL,&atx[2],NULL,0,&atx[649]} ,
+ {0, "keywords" ,128,2,0,1,0,0,0,0,NULL,&atx[66],&atx[650],0,&atx[651]} ,
{0, NULL,1,-1,0,0,0,0,0,0,NULL,&atx[2],NULL,0,NULL} ,
- {0, "origin" ,128,3,0,1,0,0,0,0,NULL,&atx[2],NULL,0,&atx[620]} ,
- {0, "date" ,128,4,0,1,0,0,0,0,NULL,&atx[2],NULL,0,&atx[621]} ,
- {0, "entry-date" ,128,5,0,1,0,0,0,0,NULL,&atx[622],NULL,0,&atx[623]} ,
+ {0, "origin" ,128,3,0,1,0,0,0,0,NULL,&atx[2],NULL,0,&atx[652]} ,
+ {0, "date" ,128,4,0,1,0,0,0,0,NULL,&atx[2],NULL,0,&atx[653]} ,
+ {0, "entry-date" ,128,5,0,1,0,0,0,0,NULL,&atx[654],NULL,0,&atx[655]} ,
{402, "Date" ,1,0,0,0,0,0,1,0,NULL,&atx[0],NULL,0,NULL} ,
- {0, "div" ,128,6,0,1,0,0,0,0,NULL,&atx[2],NULL,0,&atx[624]} ,
+ {0, "div" ,128,6,0,1,0,0,0,0,NULL,&atx[2],NULL,0,&atx[656]} ,
{0, "taxonomy" ,128,7,0,1,0,0,0,0,NULL,&atx[2],NULL,0,NULL} ,
- {0, "pub" ,128,11,0,0,0,0,0,0,NULL,&atx[626],NULL,0,&atx[640]} ,
- {403, "Pubdesc" ,1,0,0,0,0,1,0,0,NULL,&atx[13],&atx[627],0,&atx[440]} ,
- {0, "pub" ,128,0,0,0,0,0,0,0,NULL,&atx[628],NULL,0,&atx[629]} ,
- {419, "Pub-equiv" ,1,0,0,0,0,0,1,0,NULL,&atx[325],NULL,0,&atx[453]} ,
- {0, "name" ,128,1,0,1,0,0,0,0,NULL,&atx[2],NULL,0,&atx[630]} ,
- {0, "fig" ,128,2,0,1,0,0,0,0,NULL,&atx[2],NULL,0,&atx[631]} ,
- {0, "num" ,128,3,0,1,0,0,0,0,NULL,&atx[497],NULL,0,&atx[632]} ,
- {0, "numexc" ,128,4,0,1,0,0,0,0,NULL,&atx[60],NULL,0,&atx[633]} ,
- {0, "poly-a" ,128,5,0,1,0,0,0,0,NULL,&atx[60],NULL,0,&atx[634]} ,
- {0, "maploc" ,128,6,0,1,0,0,0,0,NULL,&atx[2],NULL,0,&atx[635]} ,
- {0, "seq-raw" ,128,7,0,1,0,0,0,0,NULL,&atx[636],NULL,0,&atx[637]} ,
- {351, "StringStore" ,64,1,0,0,0,0,0,0,NULL,NULL,NULL,0,NULL} ,
- {0, "align-group" ,128,8,0,1,0,0,0,0,NULL,&atx[6],NULL,0,&atx[638]} ,
- {0, "comment" ,128,9,0,1,0,0,0,0,NULL,&atx[2],NULL,0,&atx[639]} ,
- {0, "reftype" ,128,10,0,0,1,0,0,0,&avnx[185],&atx[6],&avnx[181],0,NULL} ,
- {0, "region" ,128,12,0,0,0,0,0,0,NULL,&atx[2],NULL,0,&atx[641]} ,
- {0, "user" ,128,13,0,0,0,0,0,0,NULL,&atx[642],NULL,0,&atx[643]} ,
- {415, "User-object" ,1,0,0,0,0,0,1,0,NULL,&atx[46],NULL,0,&atx[512]} ,
- {0, "sp" ,128,14,0,0,0,0,0,0,NULL,&atx[644],NULL,0,&atx[664]} ,
- {427, "SP-block" ,1,0,0,0,0,0,1,0,NULL,&atx[645],NULL,0,&atx[691]} ,
- {401, "SP-block" ,1,0,0,0,0,1,0,0,NULL,&atx[13],&atx[646],0,&atx[661]} ,
- {0, "class" ,128,0,0,0,0,0,0,0,NULL,&atx[42],&avnx[186],0,&atx[647]} ,
- {0, "extra-acc" ,128,1,0,1,0,0,0,0,NULL,&atx[45],&atx[648],0,&atx[649]} ,
+ {0, "pub" ,128,11,0,0,0,0,0,0,NULL,&atx[440],NULL,0,&atx[658]} ,
+ {0, "region" ,128,12,0,0,0,0,0,0,NULL,&atx[2],NULL,0,&atx[659]} ,
+ {0, "user" ,128,13,0,0,0,0,0,0,NULL,&atx[548],NULL,0,&atx[660]} ,
+ {0, "sp" ,128,14,0,0,0,0,0,0,NULL,&atx[661],NULL,0,&atx[681]} ,
+ {428, "SP-block" ,1,0,0,0,0,0,1,0,NULL,&atx[662],NULL,0,&atx[707]} ,
+ {401, "SP-block" ,1,0,0,0,0,1,0,0,NULL,&atx[13],&atx[663],0,&atx[678]} ,
+ {0, "class" ,128,0,0,0,0,0,0,0,NULL,&atx[42],&avnx[190],0,&atx[664]} ,
+ {0, "extra-acc" ,128,1,0,1,0,0,0,0,NULL,&atx[45],&atx[665],0,&atx[666]} ,
{0, NULL,1,-1,0,0,0,0,0,0,NULL,&atx[2],NULL,0,NULL} ,
- {0, "imeth" ,128,2,0,0,1,0,0,0,&avnx[190],&atx[60],NULL,0,&atx[650]} ,
- {0, "plasnm" ,128,3,0,1,0,0,0,0,NULL,&atx[45],&atx[651],0,&atx[652]} ,
+ {0, "imeth" ,128,2,0,0,1,0,0,0,&avnx[194],&atx[60],NULL,0,&atx[667]} ,
+ {0, "plasnm" ,128,3,0,1,0,0,0,0,NULL,&atx[45],&atx[668],0,&atx[669]} ,
{0, NULL,1,-1,0,0,0,0,0,0,NULL,&atx[2],NULL,0,NULL} ,
- {0, "seqref" ,128,4,0,1,0,0,0,0,NULL,&atx[45],&atx[653],0,&atx[655]} ,
- {0, NULL,1,-1,0,0,0,0,0,0,NULL,&atx[654],NULL,0,NULL} ,
+ {0, "seqref" ,128,4,0,1,0,0,0,0,NULL,&atx[45],&atx[670],0,&atx[672]} ,
+ {0, NULL,1,-1,0,0,0,0,0,0,NULL,&atx[671],NULL,0,NULL} ,
{404, "Seq-id" ,1,0,0,0,0,0,1,0,NULL,&atx[344],NULL,0,NULL} ,
- {0, "dbref" ,128,5,0,1,0,0,0,0,NULL,&atx[45],&atx[656],0,&atx[658]} ,
- {0, NULL,1,-1,0,0,0,0,0,0,NULL,&atx[657],NULL,0,NULL} ,
- {403, "Dbtag" ,1,0,0,0,0,0,1,0,NULL,&atx[17],NULL,0,&atx[654]} ,
- {0, "keywords" ,128,6,0,1,0,0,0,0,NULL,&atx[45],&atx[659],0,&atx[660]} ,
+ {0, "dbref" ,128,5,0,1,0,0,0,0,NULL,&atx[45],&atx[673],0,&atx[675]} ,
+ {0, NULL,1,-1,0,0,0,0,0,0,NULL,&atx[674],NULL,0,NULL} ,
+ {403, "Dbtag" ,1,0,0,0,0,0,1,0,NULL,&atx[17],NULL,0,&atx[671]} ,
+ {0, "keywords" ,128,6,0,1,0,0,0,0,NULL,&atx[45],&atx[676],0,&atx[677]} ,
{0, NULL,1,-1,0,0,0,0,0,0,NULL,&atx[2],NULL,0,NULL} ,
- {0, "created" ,128,7,0,1,0,0,0,0,NULL,&atx[661],NULL,0,&atx[662]} ,
- {402, "Date" ,1,0,0,0,0,0,1,0,NULL,&atx[0],NULL,0,&atx[657]} ,
- {0, "sequpd" ,128,8,0,1,0,0,0,0,NULL,&atx[661],NULL,0,&atx[663]} ,
- {0, "annotupd" ,128,9,0,1,0,0,0,0,NULL,&atx[661],NULL,0,NULL} ,
- {0, "dbxref" ,128,15,0,0,0,0,0,0,NULL,&atx[591],NULL,0,&atx[665]} ,
- {0, "embl" ,128,16,0,0,0,0,0,0,NULL,&atx[666],NULL,0,&atx[687]} ,
- {426, "EMBL-block" ,1,0,0,0,0,0,1,0,NULL,&atx[667],NULL,0,&atx[644]} ,
- {403, "EMBL-block" ,1,0,0,0,0,1,0,0,NULL,&atx[13],&atx[668],0,&atx[671]} ,
- {0, "class" ,128,0,0,0,1,0,0,0,&avnx[195],&atx[42],&avnx[191],0,&atx[669]} ,
- {0, "div" ,128,1,0,1,0,0,0,0,NULL,&atx[42],&avnx[196],0,&atx[670]} ,
- {0, "creation-date" ,128,2,0,0,0,0,0,0,NULL,&atx[671],NULL,0,&atx[672]} ,
- {404, "Date" ,1,0,0,0,0,0,1,0,NULL,&atx[0],NULL,0,&atx[686]} ,
- {0, "update-date" ,128,3,0,0,0,0,0,0,NULL,&atx[671],NULL,0,&atx[673]} ,
- {0, "extra-acc" ,128,4,0,1,0,0,0,0,NULL,&atx[66],&atx[674],0,&atx[675]} ,
+ {0, "created" ,128,7,0,1,0,0,0,0,NULL,&atx[678],NULL,0,&atx[679]} ,
+ {402, "Date" ,1,0,0,0,0,0,1,0,NULL,&atx[0],NULL,0,&atx[674]} ,
+ {0, "sequpd" ,128,8,0,1,0,0,0,0,NULL,&atx[678],NULL,0,&atx[680]} ,
+ {0, "annotupd" ,128,9,0,1,0,0,0,0,NULL,&atx[678],NULL,0,NULL} ,
+ {0, "dbxref" ,128,15,0,0,0,0,0,0,NULL,&atx[623],NULL,0,&atx[682]} ,
+ {0, "embl" ,128,16,0,0,0,0,0,0,NULL,&atx[683],NULL,0,&atx[704]} ,
+ {427, "EMBL-block" ,1,0,0,0,0,0,1,0,NULL,&atx[684],NULL,0,&atx[661]} ,
+ {403, "EMBL-block" ,1,0,0,0,0,1,0,0,NULL,&atx[13],&atx[685],0,&atx[688]} ,
+ {0, "class" ,128,0,0,0,1,0,0,0,&avnx[199],&atx[42],&avnx[195],0,&atx[686]} ,
+ {0, "div" ,128,1,0,1,0,0,0,0,NULL,&atx[42],&avnx[200],0,&atx[687]} ,
+ {0, "creation-date" ,128,2,0,0,0,0,0,0,NULL,&atx[688],NULL,0,&atx[689]} ,
+ {404, "Date" ,1,0,0,0,0,0,1,0,NULL,&atx[0],NULL,0,&atx[703]} ,
+ {0, "update-date" ,128,3,0,0,0,0,0,0,NULL,&atx[688],NULL,0,&atx[690]} ,
+ {0, "extra-acc" ,128,4,0,1,0,0,0,0,NULL,&atx[66],&atx[691],0,&atx[692]} ,
{0, NULL,1,-1,0,0,0,0,0,0,NULL,&atx[2],NULL,0,NULL} ,
- {0, "keywords" ,128,5,0,1,0,0,0,0,NULL,&atx[66],&atx[676],0,&atx[677]} ,
+ {0, "keywords" ,128,5,0,1,0,0,0,0,NULL,&atx[66],&atx[693],0,&atx[694]} ,
{0, NULL,1,-1,0,0,0,0,0,0,NULL,&atx[2],NULL,0,NULL} ,
- {0, "xref" ,128,6,0,1,0,0,0,0,NULL,&atx[66],&atx[678],0,NULL} ,
- {0, NULL,1,-1,0,0,0,0,0,0,NULL,&atx[679],NULL,0,NULL} ,
- {402, "EMBL-xref" ,1,0,0,0,0,1,0,0,NULL,&atx[13],&atx[680],0,&atx[667]} ,
- {0, "dbname" ,128,0,0,0,0,0,0,0,NULL,&atx[681],NULL,0,&atx[684]} ,
- {401, "EMBL-dbname" ,1,0,0,0,0,1,0,0,NULL,&atx[14],&atx[682],0,&atx[679]} ,
- {0, "code" ,128,0,0,0,0,0,0,0,NULL,&atx[42],&avnx[213],0,&atx[683]} ,
+ {0, "xref" ,128,6,0,1,0,0,0,0,NULL,&atx[66],&atx[695],0,NULL} ,
+ {0, NULL,1,-1,0,0,0,0,0,0,NULL,&atx[696],NULL,0,NULL} ,
+ {402, "EMBL-xref" ,1,0,0,0,0,1,0,0,NULL,&atx[13],&atx[697],0,&atx[684]} ,
+ {0, "dbname" ,128,0,0,0,0,0,0,0,NULL,&atx[698],NULL,0,&atx[701]} ,
+ {401, "EMBL-dbname" ,1,0,0,0,0,1,0,0,NULL,&atx[14],&atx[699],0,&atx[696]} ,
+ {0, "code" ,128,0,0,0,0,0,0,0,NULL,&atx[42],&avnx[217],0,&atx[700]} ,
{0, "name" ,128,1,0,0,0,0,0,0,NULL,&atx[2],NULL,0,NULL} ,
- {0, "id" ,128,1,0,0,0,0,0,0,NULL,&atx[66],&atx[685],0,NULL} ,
- {0, NULL,1,-1,0,0,0,0,0,0,NULL,&atx[686],NULL,0,NULL} ,
+ {0, "id" ,128,1,0,0,0,0,0,0,NULL,&atx[66],&atx[702],0,NULL} ,
+ {0, NULL,1,-1,0,0,0,0,0,0,NULL,&atx[703],NULL,0,NULL} ,
{405, "Object-id" ,1,0,0,0,0,0,1,0,NULL,&atx[20],NULL,0,NULL} ,
- {0, "create-date" ,128,17,0,0,0,0,0,0,NULL,&atx[688],NULL,0,&atx[689]} ,
- {411, "Date" ,1,0,0,0,0,0,1,0,NULL,&atx[0],NULL,0,&atx[746]} ,
- {0, "update-date" ,128,18,0,0,0,0,0,0,NULL,&atx[688],NULL,0,&atx[690]} ,
- {0, "prf" ,128,19,0,0,0,0,0,0,NULL,&atx[691],NULL,0,&atx[702]} ,
- {428, "PRF-block" ,1,0,0,0,0,0,1,0,NULL,&atx[692],NULL,0,&atx[703]} ,
- {401, "PRF-block" ,1,0,0,0,0,1,0,0,NULL,&atx[13],&atx[693],0,&atx[694]} ,
- {0, "extra-src" ,128,0,0,1,0,0,0,0,NULL,&atx[694],NULL,0,&atx[700]} ,
- {402, "PRF-ExtraSrc" ,1,0,0,0,0,0,0,0,NULL,&atx[13],&atx[695],0,NULL} ,
- {0, "host" ,128,0,0,1,0,0,0,0,NULL,&atx[2],NULL,0,&atx[696]} ,
- {0, "part" ,128,1,0,1,0,0,0,0,NULL,&atx[2],NULL,0,&atx[697]} ,
- {0, "state" ,128,2,0,1,0,0,0,0,NULL,&atx[2],NULL,0,&atx[698]} ,
- {0, "strain" ,128,3,0,1,0,0,0,0,NULL,&atx[2],NULL,0,&atx[699]} ,
+ {0, "create-date" ,128,17,0,0,0,0,0,0,NULL,&atx[550],NULL,0,&atx[705]} ,
+ {0, "update-date" ,128,18,0,0,0,0,0,0,NULL,&atx[550],NULL,0,&atx[706]} ,
+ {0, "prf" ,128,19,0,0,0,0,0,0,NULL,&atx[707],NULL,0,&atx[718]} ,
+ {429, "PRF-block" ,1,0,0,0,0,0,1,0,NULL,&atx[708],NULL,0,&atx[719]} ,
+ {401, "PRF-block" ,1,0,0,0,0,1,0,0,NULL,&atx[13],&atx[709],0,&atx[710]} ,
+ {0, "extra-src" ,128,0,0,1,0,0,0,0,NULL,&atx[710],NULL,0,&atx[716]} ,
+ {402, "PRF-ExtraSrc" ,1,0,0,0,0,0,0,0,NULL,&atx[13],&atx[711],0,NULL} ,
+ {0, "host" ,128,0,0,1,0,0,0,0,NULL,&atx[2],NULL,0,&atx[712]} ,
+ {0, "part" ,128,1,0,1,0,0,0,0,NULL,&atx[2],NULL,0,&atx[713]} ,
+ {0, "state" ,128,2,0,1,0,0,0,0,NULL,&atx[2],NULL,0,&atx[714]} ,
+ {0, "strain" ,128,3,0,1,0,0,0,0,NULL,&atx[2],NULL,0,&atx[715]} ,
{0, "taxon" ,128,4,0,1,0,0,0,0,NULL,&atx[2],NULL,0,NULL} ,
- {0, "keywords" ,128,1,0,1,0,0,0,0,NULL,&atx[66],&atx[701],0,NULL} ,
+ {0, "keywords" ,128,1,0,1,0,0,0,0,NULL,&atx[66],&atx[717],0,NULL} ,
{0, NULL,1,-1,0,0,0,0,0,0,NULL,&atx[2],NULL,0,NULL} ,
- {0, "pdb" ,128,20,0,0,0,0,0,0,NULL,&atx[703],NULL,0,&atx[718]} ,
- {429, "PDB-block" ,1,0,0,0,0,0,1,0,NULL,&atx[704],NULL,0,&atx[741]} ,
- {401, "PDB-block" ,1,0,0,0,0,1,0,0,NULL,&atx[13],&atx[705],0,&atx[706]} ,
- {0, "deposition" ,128,0,0,0,0,0,0,0,NULL,&atx[706],NULL,0,&atx[707]} ,
- {402, "Date" ,1,0,0,0,0,0,1,0,NULL,&atx[0],NULL,0,&atx[714]} ,
- {0, "class" ,128,1,0,0,0,0,0,0,NULL,&atx[2],NULL,0,&atx[708]} ,
- {0, "compound" ,128,2,0,0,0,0,0,0,NULL,&atx[66],&atx[709],0,&atx[710]} ,
+ {0, "pdb" ,128,20,0,0,0,0,0,0,NULL,&atx[719],NULL,0,&atx[734]} ,
+ {430, "PDB-block" ,1,0,0,0,0,0,1,0,NULL,&atx[720],NULL,0,&atx[757]} ,
+ {401, "PDB-block" ,1,0,0,0,0,1,0,0,NULL,&atx[13],&atx[721],0,&atx[722]} ,
+ {0, "deposition" ,128,0,0,0,0,0,0,0,NULL,&atx[722],NULL,0,&atx[723]} ,
+ {402, "Date" ,1,0,0,0,0,0,1,0,NULL,&atx[0],NULL,0,&atx[730]} ,
+ {0, "class" ,128,1,0,0,0,0,0,0,NULL,&atx[2],NULL,0,&atx[724]} ,
+ {0, "compound" ,128,2,0,0,0,0,0,0,NULL,&atx[66],&atx[725],0,&atx[726]} ,
{0, NULL,1,-1,0,0,0,0,0,0,NULL,&atx[2],NULL,0,NULL} ,
- {0, "source" ,128,3,0,0,0,0,0,0,NULL,&atx[66],&atx[711],0,&atx[712]} ,
+ {0, "source" ,128,3,0,0,0,0,0,0,NULL,&atx[66],&atx[727],0,&atx[728]} ,
{0, NULL,1,-1,0,0,0,0,0,0,NULL,&atx[2],NULL,0,NULL} ,
- {0, "exp-method" ,128,4,0,1,0,0,0,0,NULL,&atx[2],NULL,0,&atx[713]} ,
- {0, "replace" ,128,5,0,1,0,0,0,0,NULL,&atx[714],NULL,0,NULL} ,
- {403, "PDB-replace" ,1,0,0,0,0,0,0,0,NULL,&atx[13],&atx[715],0,NULL} ,
- {0, "date" ,128,0,0,0,0,0,0,0,NULL,&atx[706],NULL,0,&atx[716]} ,
- {0, "ids" ,128,1,0,0,0,0,0,0,NULL,&atx[66],&atx[717],0,NULL} ,
+ {0, "exp-method" ,128,4,0,1,0,0,0,0,NULL,&atx[2],NULL,0,&atx[729]} ,
+ {0, "replace" ,128,5,0,1,0,0,0,0,NULL,&atx[730],NULL,0,NULL} ,
+ {403, "PDB-replace" ,1,0,0,0,0,0,0,0,NULL,&atx[13],&atx[731],0,NULL} ,
+ {0, "date" ,128,0,0,0,0,0,0,0,NULL,&atx[722],NULL,0,&atx[732]} ,
+ {0, "ids" ,128,1,0,0,0,0,0,0,NULL,&atx[66],&atx[733],0,NULL} ,
{0, NULL,1,-1,0,0,0,0,0,0,NULL,&atx[2],NULL,0,NULL} ,
- {0, "het" ,128,21,0,0,0,0,0,0,NULL,&atx[719],NULL,0,&atx[720]} ,
- {407, "Heterogen" ,1,0,0,0,0,1,0,0,NULL,&atx[2],NULL,0,&atx[955]} ,
- {0, "source" ,128,22,0,0,0,0,0,0,NULL,&atx[721],NULL,0,&atx[734]} ,
- {421, "BioSource" ,1,0,0,0,0,0,1,0,NULL,&atx[722],NULL,0,&atx[438]} ,
- {401, "BioSource" ,1,0,0,0,0,1,0,0,NULL,&atx[13],&atx[723],0,&atx[726]} ,
- {0, "genome" ,128,0,0,0,1,0,0,0,&avnx[251],&atx[6],&avnx[231],0,&atx[724]} ,
- {0, "origin" ,128,1,0,0,1,0,0,0,&avnx[259],&atx[6],&avnx[252],0,&atx[725]} ,
- {0, "org" ,128,2,0,0,0,0,0,0,NULL,&atx[726],NULL,0,&atx[727]} ,
- {402, "Org-ref" ,1,0,0,0,0,0,1,0,NULL,&atx[454],NULL,0,&atx[729]} ,
- {0, "subtype" ,128,3,0,1,0,0,0,0,NULL,&atx[66],&atx[728],0,&atx[733]} ,
- {0, NULL,1,-1,0,0,0,0,0,0,NULL,&atx[729],NULL,0,NULL} ,
- {403, "SubSource" ,1,0,0,0,0,0,0,0,NULL,&atx[13],&atx[730],0,NULL} ,
- {0, "subtype" ,128,0,0,0,0,0,0,0,NULL,&atx[6],&avnx[260],0,&atx[731]} ,
- {0, "name" ,128,1,0,0,0,0,0,0,NULL,&atx[2],NULL,0,&atx[732]} ,
+ {0, "het" ,128,21,0,0,0,0,0,0,NULL,&atx[735],NULL,0,&atx[736]} ,
+ {404, "Heterogen" ,1,0,0,0,0,1,0,0,NULL,&atx[2],NULL,0,&atx[446]} ,
+ {0, "source" ,128,22,0,0,0,0,0,0,NULL,&atx[737],NULL,0,&atx[750]} ,
+ {422, "BioSource" ,1,0,0,0,0,0,1,0,NULL,&atx[738],NULL,0,&atx[553]} ,
+ {401, "BioSource" ,1,0,0,0,0,1,0,0,NULL,&atx[13],&atx[739],0,&atx[742]} ,
+ {0, "genome" ,128,0,0,0,1,0,0,0,&avnx[255],&atx[6],&avnx[235],0,&atx[740]} ,
+ {0, "origin" ,128,1,0,0,1,0,0,0,&avnx[263],&atx[6],&avnx[256],0,&atx[741]} ,
+ {0, "org" ,128,2,0,0,0,0,0,0,NULL,&atx[742],NULL,0,&atx[743]} ,
+ {402, "Org-ref" ,1,0,0,0,0,0,1,0,NULL,&atx[579],NULL,0,&atx[745]} ,
+ {0, "subtype" ,128,3,0,1,0,0,0,0,NULL,&atx[66],&atx[744],0,&atx[749]} ,
+ {0, NULL,1,-1,0,0,0,0,0,0,NULL,&atx[745],NULL,0,NULL} ,
+ {403, "SubSource" ,1,0,0,0,0,0,0,0,NULL,&atx[13],&atx[746],0,NULL} ,
+ {0, "subtype" ,128,0,0,0,0,0,0,0,NULL,&atx[6],&avnx[264],0,&atx[747]} ,
+ {0, "name" ,128,1,0,0,0,0,0,0,NULL,&atx[2],NULL,0,&atx[748]} ,
{0, "attrib" ,128,2,0,1,0,0,0,0,NULL,&atx[2],NULL,0,NULL} ,
{0, "is-focus" ,128,4,0,1,0,0,0,0,NULL,&atx[386],NULL,0,NULL} ,
- {0, "molinfo" ,128,23,0,0,0,0,0,0,NULL,&atx[735],NULL,0,NULL} ,
- {433, "MolInfo" ,1,0,0,0,0,0,0,0,NULL,&atx[13],&atx[736],0,&atx[499]} ,
- {0, "biomol" ,128,0,0,0,1,0,0,0,&avnx[304],&atx[6],&avnx[289],0,&atx[737]} ,
- {0, "tech" ,128,1,0,0,1,0,0,0,&avnx[327],&atx[6],&avnx[305],0,&atx[738]} ,
- {0, "techexp" ,128,2,0,1,0,0,0,0,NULL,&atx[2],NULL,0,&atx[739]} ,
- {0, "completeness" ,128,3,0,0,1,0,0,0,&avnx[337],&atx[6],&avnx[328],0,NULL} ,
- {0, "inst" ,128,2,0,0,0,0,0,0,NULL,&atx[741],NULL,0,&atx[967]} ,
- {430, "Seq-inst" ,1,0,0,0,0,0,0,0,NULL,&atx[13],&atx[742],0,&atx[447]} ,
- {0, "repr" ,128,0,0,0,0,0,0,0,NULL,&atx[42],&avnx[338],0,&atx[743]} ,
- {0, "mol" ,128,1,0,0,0,0,0,0,NULL,&atx[42],&avnx[348],0,&atx[744]} ,
- {0, "length" ,128,2,0,1,0,0,0,0,NULL,&atx[6],NULL,0,&atx[745]} ,
- {0, "fuzz" ,128,3,0,1,0,0,0,0,NULL,&atx[746],NULL,0,&atx[747]} ,
- {412, "Int-fuzz" ,1,0,0,0,0,0,1,0,NULL,&atx[35],NULL,0,&atx[591]} ,
- {0, "topology" ,128,4,0,0,1,0,0,0,&avnx[359],&atx[42],&avnx[354],0,&atx[748]} ,
- {0, "strand" ,128,5,0,1,0,0,0,0,NULL,&atx[42],&avnx[360],0,&atx[749]} ,
- {0, "seq-data" ,128,6,0,1,0,0,0,0,NULL,&atx[750],NULL,0,&atx[771]} ,
- {438, "Seq-data" ,1,0,0,0,0,0,0,0,NULL,&atx[14],&atx[751],0,&atx[772]} ,
- {0, "iupacna" ,128,0,0,0,0,0,0,0,NULL,&atx[752],NULL,0,&atx[753]} ,
- {446, "IUPACna" ,1,0,0,0,0,0,0,0,NULL,&atx[636],NULL,0,&atx[754]} ,
- {0, "iupacaa" ,128,1,0,0,0,0,0,0,NULL,&atx[754],NULL,0,&atx[755]} ,
- {447, "IUPACaa" ,1,0,0,0,0,0,0,0,NULL,&atx[636],NULL,0,&atx[756]} ,
- {0, "ncbi2na" ,128,2,0,0,0,0,0,0,NULL,&atx[756],NULL,0,&atx[757]} ,
- {448, "NCBI2na" ,1,0,0,0,0,0,0,0,NULL,&atx[62],NULL,0,&atx[758]} ,
- {0, "ncbi4na" ,128,3,0,0,0,0,0,0,NULL,&atx[758],NULL,0,&atx[759]} ,
- {449, "NCBI4na" ,1,0,0,0,0,0,0,0,NULL,&atx[62],NULL,0,&atx[760]} ,
- {0, "ncbi8na" ,128,4,0,0,0,0,0,0,NULL,&atx[760],NULL,0,&atx[761]} ,
- {450, "NCBI8na" ,1,0,0,0,0,0,0,0,NULL,&atx[62],NULL,0,&atx[762]} ,
- {0, "ncbipna" ,128,5,0,0,0,0,0,0,NULL,&atx[762],NULL,0,&atx[763]} ,
- {451, "NCBIpna" ,1,0,0,0,0,0,0,0,NULL,&atx[62],NULL,0,&atx[764]} ,
- {0, "ncbi8aa" ,128,6,0,0,0,0,0,0,NULL,&atx[764],NULL,0,&atx[765]} ,
- {452, "NCBI8aa" ,1,0,0,0,0,0,0,0,NULL,&atx[62],NULL,0,&atx[766]} ,
- {0, "ncbieaa" ,128,7,0,0,0,0,0,0,NULL,&atx[766],NULL,0,&atx[767]} ,
- {453, "NCBIeaa" ,1,0,0,0,0,0,0,0,NULL,&atx[636],NULL,0,&atx[768]} ,
- {0, "ncbipaa" ,128,8,0,0,0,0,0,0,NULL,&atx[768],NULL,0,&atx[769]} ,
- {454, "NCBIpaa" ,1,0,0,0,0,0,0,0,NULL,&atx[62],NULL,0,&atx[770]} ,
- {0, "ncbistdaa" ,128,9,0,0,0,0,0,0,NULL,&atx[770],NULL,0,NULL} ,
- {455, "NCBIstdaa" ,1,0,0,0,0,0,0,0,NULL,&atx[62],NULL,0,&atx[972]} ,
- {0, "ext" ,128,7,0,1,0,0,0,0,NULL,&atx[772],NULL,0,&atx[954]} ,
- {439, "Seq-ext" ,1,0,0,0,0,0,0,0,NULL,&atx[14],&atx[773],0,&atx[774]} ,
- {0, "seg" ,128,0,0,0,0,0,0,0,NULL,&atx[774],NULL,0,&atx[777]} ,
- {440, "Seg-ext" ,1,0,0,0,0,0,0,0,NULL,&atx[66],&atx[775],0,&atx[778]} ,
- {0, NULL,1,-1,0,0,0,0,0,0,NULL,&atx[776],NULL,0,NULL} ,
- {423, "Seq-loc" ,1,0,0,0,0,0,1,0,NULL,&atx[384],NULL,0,&atx[612]} ,
- {0, "ref" ,128,1,0,0,0,0,0,0,NULL,&atx[778],NULL,0,&atx[779]} ,
- {441, "Ref-ext" ,1,0,0,0,0,0,0,0,NULL,&atx[776],NULL,0,&atx[780]} ,
- {0, "map" ,128,2,0,0,0,0,0,0,NULL,&atx[780],NULL,0,&atx[944]} ,
- {442, "Map-ext" ,1,0,0,0,0,0,0,0,NULL,&atx[66],&atx[781],0,&atx[945]} ,
- {0, NULL,1,-1,0,0,0,0,0,0,NULL,&atx[782],NULL,0,NULL} ,
- {417, "Seq-feat" ,1,0,0,0,0,0,1,0,NULL,&atx[783],NULL,0,&atx[1004]} ,
- {401, "Seq-feat" ,1,0,0,0,0,1,0,0,NULL,&atx[13],&atx[784],0,&atx[427]} ,
- {0, "id" ,128,0,0,1,0,0,0,0,NULL,&atx[427],NULL,0,&atx[785]} ,
- {0, "data" ,128,1,0,0,0,0,0,0,NULL,&atx[786],NULL,0,&atx[920]} ,
- {420, "SeqFeatData" ,1,0,0,0,0,0,0,0,NULL,&atx[14],&atx[787],0,&atx[927]} ,
- {0, "gene" ,128,0,0,0,0,0,0,0,NULL,&atx[788],NULL,0,&atx[801]} ,
- {404, "Gene-ref" ,1,0,0,0,0,0,1,0,NULL,&atx[789],NULL,0,&atx[832]} ,
- {401, "Gene-ref" ,1,0,0,0,0,1,0,0,NULL,&atx[13],&atx[790],0,&atx[797]} ,
- {0, "locus" ,128,0,0,1,0,0,0,0,NULL,&atx[2],NULL,0,&atx[791]} ,
- {0, "allele" ,128,1,0,1,0,0,0,0,NULL,&atx[2],NULL,0,&atx[792]} ,
- {0, "desc" ,128,2,0,1,0,0,0,0,NULL,&atx[2],NULL,0,&atx[793]} ,
- {0, "maploc" ,128,3,0,1,0,0,0,0,NULL,&atx[2],NULL,0,&atx[794]} ,
- {0, "pseudo" ,128,4,0,0,1,0,0,0,&avnx[365],&atx[60],NULL,0,&atx[795]} ,
- {0, "db" ,128,5,0,1,0,0,0,0,NULL,&atx[45],&atx[796],0,&atx[798]} ,
+ {0, "molinfo" ,128,23,0,0,0,0,0,0,NULL,&atx[751],NULL,0,NULL} ,
+ {434, "MolInfo" ,1,0,0,0,0,0,0,0,NULL,&atx[13],&atx[752],0,&atx[448]} ,
+ {0, "biomol" ,128,0,0,0,1,0,0,0,&avnx[308],&atx[6],&avnx[293],0,&atx[753]} ,
+ {0, "tech" ,128,1,0,0,1,0,0,0,&avnx[331],&atx[6],&avnx[309],0,&atx[754]} ,
+ {0, "techexp" ,128,2,0,1,0,0,0,0,NULL,&atx[2],NULL,0,&atx[755]} ,
+ {0, "completeness" ,128,3,0,0,1,0,0,0,&avnx[341],&atx[6],&avnx[332],0,NULL} ,
+ {0, "inst" ,128,2,0,0,0,0,0,0,NULL,&atx[757],NULL,0,&atx[982]} ,
+ {431, "Seq-inst" ,1,0,0,0,0,0,0,0,NULL,&atx[13],&atx[758],0,&atx[572]} ,
+ {0, "repr" ,128,0,0,0,0,0,0,0,NULL,&atx[42],&avnx[342],0,&atx[759]} ,
+ {0, "mol" ,128,1,0,0,0,0,0,0,NULL,&atx[42],&avnx[352],0,&atx[760]} ,
+ {0, "length" ,128,2,0,1,0,0,0,0,NULL,&atx[6],NULL,0,&atx[761]} ,
+ {0, "fuzz" ,128,3,0,1,0,0,0,0,NULL,&atx[762],NULL,0,&atx[763]} ,
+ {413, "Int-fuzz" ,1,0,0,0,0,0,1,0,NULL,&atx[35],NULL,0,&atx[623]} ,
+ {0, "topology" ,128,4,0,0,1,0,0,0,&avnx[363],&atx[42],&avnx[358],0,&atx[764]} ,
+ {0, "strand" ,128,5,0,1,0,0,0,0,NULL,&atx[42],&avnx[364],0,&atx[765]} ,
+ {0, "seq-data" ,128,6,0,1,0,0,0,0,NULL,&atx[766],NULL,0,&atx[787]} ,
+ {439, "Seq-data" ,1,0,0,0,0,0,0,0,NULL,&atx[14],&atx[767],0,&atx[788]} ,
+ {0, "iupacna" ,128,0,0,0,0,0,0,0,NULL,&atx[768],NULL,0,&atx[769]} ,
+ {447, "IUPACna" ,1,0,0,0,0,0,0,0,NULL,&atx[543],NULL,0,&atx[770]} ,
+ {0, "iupacaa" ,128,1,0,0,0,0,0,0,NULL,&atx[770],NULL,0,&atx[771]} ,
+ {448, "IUPACaa" ,1,0,0,0,0,0,0,0,NULL,&atx[543],NULL,0,&atx[772]} ,
+ {0, "ncbi2na" ,128,2,0,0,0,0,0,0,NULL,&atx[772],NULL,0,&atx[773]} ,
+ {449, "NCBI2na" ,1,0,0,0,0,0,0,0,NULL,&atx[62],NULL,0,&atx[774]} ,
+ {0, "ncbi4na" ,128,3,0,0,0,0,0,0,NULL,&atx[774],NULL,0,&atx[775]} ,
+ {450, "NCBI4na" ,1,0,0,0,0,0,0,0,NULL,&atx[62],NULL,0,&atx[776]} ,
+ {0, "ncbi8na" ,128,4,0,0,0,0,0,0,NULL,&atx[776],NULL,0,&atx[777]} ,
+ {451, "NCBI8na" ,1,0,0,0,0,0,0,0,NULL,&atx[62],NULL,0,&atx[778]} ,
+ {0, "ncbipna" ,128,5,0,0,0,0,0,0,NULL,&atx[778],NULL,0,&atx[779]} ,
+ {452, "NCBIpna" ,1,0,0,0,0,0,0,0,NULL,&atx[62],NULL,0,&atx[780]} ,
+ {0, "ncbi8aa" ,128,6,0,0,0,0,0,0,NULL,&atx[780],NULL,0,&atx[781]} ,
+ {453, "NCBI8aa" ,1,0,0,0,0,0,0,0,NULL,&atx[62],NULL,0,&atx[782]} ,
+ {0, "ncbieaa" ,128,7,0,0,0,0,0,0,NULL,&atx[782],NULL,0,&atx[783]} ,
+ {454, "NCBIeaa" ,1,0,0,0,0,0,0,0,NULL,&atx[543],NULL,0,&atx[784]} ,
+ {0, "ncbipaa" ,128,8,0,0,0,0,0,0,NULL,&atx[784],NULL,0,&atx[785]} ,
+ {455, "NCBIpaa" ,1,0,0,0,0,0,0,0,NULL,&atx[62],NULL,0,&atx[786]} ,
+ {0, "ncbistdaa" ,128,9,0,0,0,0,0,0,NULL,&atx[786],NULL,0,NULL} ,
+ {456, "NCBIstdaa" ,1,0,0,0,0,0,0,0,NULL,&atx[62],NULL,0,&atx[987]} ,
+ {0, "ext" ,128,7,0,1,0,0,0,0,NULL,&atx[788],NULL,0,&atx[969]} ,
+ {440, "Seq-ext" ,1,0,0,0,0,0,0,0,NULL,&atx[14],&atx[789],0,&atx[790]} ,
+ {0, "seg" ,128,0,0,0,0,0,0,0,NULL,&atx[790],NULL,0,&atx[792]} ,
+ {441, "Seg-ext" ,1,0,0,0,0,0,0,0,NULL,&atx[66],&atx[791],0,&atx[793]} ,
+ {0, NULL,1,-1,0,0,0,0,0,0,NULL,&atx[560],NULL,0,NULL} ,
+ {0, "ref" ,128,1,0,0,0,0,0,0,NULL,&atx[793],NULL,0,&atx[794]} ,
+ {442, "Ref-ext" ,1,0,0,0,0,0,0,0,NULL,&atx[560],NULL,0,&atx[795]} ,
+ {0, "map" ,128,2,0,0,0,0,0,0,NULL,&atx[795],NULL,0,&atx[959]} ,
+ {443, "Map-ext" ,1,0,0,0,0,0,0,0,NULL,&atx[66],&atx[796],0,&atx[960]} ,
{0, NULL,1,-1,0,0,0,0,0,0,NULL,&atx[797],NULL,0,NULL} ,
+ {418, "Seq-feat" ,1,0,0,0,0,0,1,0,NULL,&atx[798],NULL,0,&atx[1004]} ,
+ {401, "Seq-feat" ,1,0,0,0,0,1,0,0,NULL,&atx[13],&atx[799],0,&atx[427]} ,
+ {0, "id" ,128,0,0,1,0,0,0,0,NULL,&atx[427],NULL,0,&atx[800]} ,
+ {0, "data" ,128,1,0,0,0,0,0,0,NULL,&atx[801],NULL,0,&atx[935]} ,
+ {420, "SeqFeatData" ,1,0,0,0,0,0,0,0,NULL,&atx[14],&atx[802],0,&atx[942]} ,
+ {0, "gene" ,128,0,0,0,0,0,0,0,NULL,&atx[803],NULL,0,&atx[816]} ,
+ {404, "Gene-ref" ,1,0,0,0,0,0,1,0,NULL,&atx[804],NULL,0,&atx[847]} ,
+ {401, "Gene-ref" ,1,0,0,0,0,1,0,0,NULL,&atx[13],&atx[805],0,&atx[812]} ,
+ {0, "locus" ,128,0,0,1,0,0,0,0,NULL,&atx[2],NULL,0,&atx[806]} ,
+ {0, "allele" ,128,1,0,1,0,0,0,0,NULL,&atx[2],NULL,0,&atx[807]} ,
+ {0, "desc" ,128,2,0,1,0,0,0,0,NULL,&atx[2],NULL,0,&atx[808]} ,
+ {0, "maploc" ,128,3,0,1,0,0,0,0,NULL,&atx[2],NULL,0,&atx[809]} ,
+ {0, "pseudo" ,128,4,0,0,1,0,0,0,&avnx[369],&atx[60],NULL,0,&atx[810]} ,
+ {0, "db" ,128,5,0,1,0,0,0,0,NULL,&atx[45],&atx[811],0,&atx[813]} ,
+ {0, NULL,1,-1,0,0,0,0,0,0,NULL,&atx[812],NULL,0,NULL} ,
{402, "Dbtag" ,1,0,0,0,0,0,1,0,NULL,&atx[17],NULL,0,NULL} ,
- {0, "syn" ,128,6,0,1,0,0,0,0,NULL,&atx[45],&atx[799],0,&atx[800]} ,
+ {0, "syn" ,128,6,0,1,0,0,0,0,NULL,&atx[45],&atx[814],0,&atx[815]} ,
{0, NULL,1,-1,0,0,0,0,0,0,NULL,&atx[2],NULL,0,NULL} ,
{0, "locus-tag" ,128,7,0,1,0,0,0,0,NULL,&atx[2],NULL,0,NULL} ,
- {0, "org" ,128,1,0,0,0,0,0,0,NULL,&atx[802],NULL,0,&atx[803]} ,
- {406, "Org-ref" ,1,0,0,0,0,0,1,0,NULL,&atx[454],NULL,0,&atx[919]} ,
- {0, "cdregion" ,128,2,0,0,0,0,0,0,NULL,&atx[804],NULL,0,&atx[831]} ,
- {423, "Cdregion" ,1,0,0,0,0,0,0,0,NULL,&atx[13],&atx[805],0,&atx[867]} ,
- {0, "orf" ,128,0,0,1,0,0,0,0,NULL,&atx[60],NULL,0,&atx[806]} ,
- {0, "frame" ,128,1,0,0,1,0,0,0,&avnx[370],&atx[42],&avnx[366],0,&atx[807]} ,
- {0, "conflict" ,128,2,0,1,0,0,0,0,NULL,&atx[60],NULL,0,&atx[808]} ,
- {0, "gaps" ,128,3,0,1,0,0,0,0,NULL,&atx[6],NULL,0,&atx[809]} ,
- {0, "mismatch" ,128,4,0,1,0,0,0,0,NULL,&atx[6],NULL,0,&atx[810]} ,
- {0, "code" ,128,5,0,1,0,0,0,0,NULL,&atx[811],NULL,0,&atx[821]} ,
- {403, "Genetic-code" ,1,0,0,0,0,1,0,0,NULL,&atx[45],&atx[812],0,&atx[788]} ,
- {0, NULL,1,-1,0,0,0,0,0,0,NULL,&atx[14],&atx[813],0,NULL} ,
- {0, "name" ,128,0,0,0,0,0,0,0,NULL,&atx[2],NULL,0,&atx[814]} ,
- {0, "id" ,128,1,0,0,0,0,0,0,NULL,&atx[6],NULL,0,&atx[815]} ,
- {0, "ncbieaa" ,128,2,0,0,0,0,0,0,NULL,&atx[2],NULL,0,&atx[816]} ,
- {0, "ncbi8aa" ,128,3,0,0,0,0,0,0,NULL,&atx[62],NULL,0,&atx[817]} ,
- {0, "ncbistdaa" ,128,4,0,0,0,0,0,0,NULL,&atx[62],NULL,0,&atx[818]} ,
- {0, "sncbieaa" ,128,5,0,0,0,0,0,0,NULL,&atx[2],NULL,0,&atx[819]} ,
- {0, "sncbi8aa" ,128,6,0,0,0,0,0,0,NULL,&atx[62],NULL,0,&atx[820]} ,
+ {0, "org" ,128,1,0,0,0,0,0,0,NULL,&atx[817],NULL,0,&atx[818]} ,
+ {406, "Org-ref" ,1,0,0,0,0,0,1,0,NULL,&atx[579],NULL,0,&atx[934]} ,
+ {0, "cdregion" ,128,2,0,0,0,0,0,0,NULL,&atx[819],NULL,0,&atx[846]} ,
+ {423, "Cdregion" ,1,0,0,0,0,0,0,0,NULL,&atx[13],&atx[820],0,&atx[882]} ,
+ {0, "orf" ,128,0,0,1,0,0,0,0,NULL,&atx[60],NULL,0,&atx[821]} ,
+ {0, "frame" ,128,1,0,0,1,0,0,0,&avnx[374],&atx[42],&avnx[370],0,&atx[822]} ,
+ {0, "conflict" ,128,2,0,1,0,0,0,0,NULL,&atx[60],NULL,0,&atx[823]} ,
+ {0, "gaps" ,128,3,0,1,0,0,0,0,NULL,&atx[6],NULL,0,&atx[824]} ,
+ {0, "mismatch" ,128,4,0,1,0,0,0,0,NULL,&atx[6],NULL,0,&atx[825]} ,
+ {0, "code" ,128,5,0,1,0,0,0,0,NULL,&atx[826],NULL,0,&atx[836]} ,
+ {403, "Genetic-code" ,1,0,0,0,0,1,0,0,NULL,&atx[45],&atx[827],0,&atx[803]} ,
+ {0, NULL,1,-1,0,0,0,0,0,0,NULL,&atx[14],&atx[828],0,NULL} ,
+ {0, "name" ,128,0,0,0,0,0,0,0,NULL,&atx[2],NULL,0,&atx[829]} ,
+ {0, "id" ,128,1,0,0,0,0,0,0,NULL,&atx[6],NULL,0,&atx[830]} ,
+ {0, "ncbieaa" ,128,2,0,0,0,0,0,0,NULL,&atx[2],NULL,0,&atx[831]} ,
+ {0, "ncbi8aa" ,128,3,0,0,0,0,0,0,NULL,&atx[62],NULL,0,&atx[832]} ,
+ {0, "ncbistdaa" ,128,4,0,0,0,0,0,0,NULL,&atx[62],NULL,0,&atx[833]} ,
+ {0, "sncbieaa" ,128,5,0,0,0,0,0,0,NULL,&atx[2],NULL,0,&atx[834]} ,
+ {0, "sncbi8aa" ,128,6,0,0,0,0,0,0,NULL,&atx[62],NULL,0,&atx[835]} ,
{0, "sncbistdaa" ,128,7,0,0,0,0,0,0,NULL,&atx[62],NULL,0,NULL} ,
- {0, "code-break" ,128,6,0,1,0,0,0,0,NULL,&atx[66],&atx[822],0,&atx[830]} ,
- {0, NULL,1,-1,0,0,0,0,0,0,NULL,&atx[823],NULL,0,NULL} ,
- {425, "Code-break" ,1,0,0,0,0,0,0,0,NULL,&atx[13],&atx[824],0,&atx[1086]} ,
- {0, "loc" ,128,0,0,0,0,0,0,0,NULL,&atx[825],NULL,0,&atx[826]} ,
+ {0, "code-break" ,128,6,0,1,0,0,0,0,NULL,&atx[66],&atx[837],0,&atx[845]} ,
+ {0, NULL,1,-1,0,0,0,0,0,0,NULL,&atx[838],NULL,0,NULL} ,
+ {425, "Code-break" ,1,0,0,0,0,0,0,0,NULL,&atx[13],&atx[839],0,&atx[1086]} ,
+ {0, "loc" ,128,0,0,0,0,0,0,0,NULL,&atx[840],NULL,0,&atx[841]} ,
{409, "Seq-loc" ,1,0,0,0,0,0,1,0,NULL,&atx[384],NULL,0,&atx[430]} ,
- {0, "aa" ,128,1,0,0,0,0,0,0,NULL,&atx[14],&atx[827],0,NULL} ,
- {0, "ncbieaa" ,128,0,0,0,0,0,0,0,NULL,&atx[6],NULL,0,&atx[828]} ,
- {0, "ncbi8aa" ,128,1,0,0,0,0,0,0,NULL,&atx[6],NULL,0,&atx[829]} ,
+ {0, "aa" ,128,1,0,0,0,0,0,0,NULL,&atx[14],&atx[842],0,NULL} ,
+ {0, "ncbieaa" ,128,0,0,0,0,0,0,0,NULL,&atx[6],NULL,0,&atx[843]} ,
+ {0, "ncbi8aa" ,128,1,0,0,0,0,0,0,NULL,&atx[6],NULL,0,&atx[844]} ,
{0, "ncbistdaa" ,128,2,0,0,0,0,0,0,NULL,&atx[6],NULL,0,NULL} ,
{0, "stops" ,128,7,0,1,0,0,0,0,NULL,&atx[6],NULL,0,NULL} ,
- {0, "prot" ,128,3,0,0,0,0,0,0,NULL,&atx[832],NULL,0,&atx[845]} ,
- {405, "Prot-ref" ,1,0,0,0,0,0,1,0,NULL,&atx[833],NULL,0,&atx[802]} ,
- {401, "Prot-ref" ,1,0,0,0,0,1,0,0,NULL,&atx[13],&atx[834],0,&atx[843]} ,
- {0, "name" ,128,0,0,1,0,0,0,0,NULL,&atx[45],&atx[835],0,&atx[836]} ,
+ {0, "prot" ,128,3,0,0,0,0,0,0,NULL,&atx[847],NULL,0,&atx[860]} ,
+ {405, "Prot-ref" ,1,0,0,0,0,0,1,0,NULL,&atx[848],NULL,0,&atx[817]} ,
+ {401, "Prot-ref" ,1,0,0,0,0,1,0,0,NULL,&atx[13],&atx[849],0,&atx[858]} ,
+ {0, "name" ,128,0,0,1,0,0,0,0,NULL,&atx[45],&atx[850],0,&atx[851]} ,
{0, NULL,1,-1,0,0,0,0,0,0,NULL,&atx[2],NULL,0,NULL} ,
- {0, "desc" ,128,1,0,1,0,0,0,0,NULL,&atx[2],NULL,0,&atx[837]} ,
- {0, "ec" ,128,2,0,1,0,0,0,0,NULL,&atx[45],&atx[838],0,&atx[839]} ,
+ {0, "desc" ,128,1,0,1,0,0,0,0,NULL,&atx[2],NULL,0,&atx[852]} ,
+ {0, "ec" ,128,2,0,1,0,0,0,0,NULL,&atx[45],&atx[853],0,&atx[854]} ,
{0, NULL,1,-1,0,0,0,0,0,0,NULL,&atx[2],NULL,0,NULL} ,
- {0, "activity" ,128,3,0,1,0,0,0,0,NULL,&atx[45],&atx[840],0,&atx[841]} ,
+ {0, "activity" ,128,3,0,1,0,0,0,0,NULL,&atx[45],&atx[855],0,&atx[856]} ,
{0, NULL,1,-1,0,0,0,0,0,0,NULL,&atx[2],NULL,0,NULL} ,
- {0, "db" ,128,4,0,1,0,0,0,0,NULL,&atx[45],&atx[842],0,&atx[844]} ,
- {0, NULL,1,-1,0,0,0,0,0,0,NULL,&atx[843],NULL,0,NULL} ,
+ {0, "db" ,128,4,0,1,0,0,0,0,NULL,&atx[45],&atx[857],0,&atx[859]} ,
+ {0, NULL,1,-1,0,0,0,0,0,0,NULL,&atx[858],NULL,0,NULL} ,
{402, "Dbtag" ,1,0,0,0,0,0,1,0,NULL,&atx[17],NULL,0,NULL} ,
- {0, "processed" ,128,5,0,0,1,0,0,0,&avnx[376],&atx[42],&avnx[371],0,NULL} ,
- {0, "rna" ,128,4,0,0,0,0,0,0,NULL,&atx[846],NULL,0,&atx[863]} ,
- {408, "RNA-ref" ,1,0,0,0,0,0,1,0,NULL,&atx[847],NULL,0,&atx[825]} ,
- {401, "RNA-ref" ,1,0,0,0,0,1,0,0,NULL,&atx[13],&atx[848],0,&atx[853]} ,
- {0, "type" ,128,0,0,0,0,0,0,0,NULL,&atx[42],&avnx[377],0,&atx[849]} ,
- {0, "pseudo" ,128,1,0,1,0,0,0,0,NULL,&atx[60],NULL,0,&atx[850]} ,
- {0, "ext" ,128,2,0,1,0,0,0,0,NULL,&atx[14],&atx[851],0,NULL} ,
- {0, "name" ,128,0,0,0,0,0,0,0,NULL,&atx[2],NULL,0,&atx[852]} ,
- {0, "tRNA" ,128,1,0,0,0,0,0,0,NULL,&atx[853],NULL,0,NULL} ,
- {402, "Trna-ext" ,1,0,0,0,0,1,0,0,NULL,&atx[13],&atx[854],0,&atx[862]} ,
- {0, "aa" ,128,0,0,1,0,0,0,0,NULL,&atx[14],&atx[855],0,&atx[859]} ,
- {0, "iupacaa" ,128,0,0,0,0,0,0,0,NULL,&atx[6],NULL,0,&atx[856]} ,
- {0, "ncbieaa" ,128,1,0,0,0,0,0,0,NULL,&atx[6],NULL,0,&atx[857]} ,
- {0, "ncbi8aa" ,128,2,0,0,0,0,0,0,NULL,&atx[6],NULL,0,&atx[858]} ,
+ {0, "processed" ,128,5,0,0,1,0,0,0,&avnx[380],&atx[42],&avnx[375],0,NULL} ,
+ {0, "rna" ,128,4,0,0,0,0,0,0,NULL,&atx[861],NULL,0,&atx[878]} ,
+ {408, "RNA-ref" ,1,0,0,0,0,0,1,0,NULL,&atx[862],NULL,0,&atx[840]} ,
+ {401, "RNA-ref" ,1,0,0,0,0,1,0,0,NULL,&atx[13],&atx[863],0,&atx[868]} ,
+ {0, "type" ,128,0,0,0,0,0,0,0,NULL,&atx[42],&avnx[381],0,&atx[864]} ,
+ {0, "pseudo" ,128,1,0,1,0,0,0,0,NULL,&atx[60],NULL,0,&atx[865]} ,
+ {0, "ext" ,128,2,0,1,0,0,0,0,NULL,&atx[14],&atx[866],0,NULL} ,
+ {0, "name" ,128,0,0,0,0,0,0,0,NULL,&atx[2],NULL,0,&atx[867]} ,
+ {0, "tRNA" ,128,1,0,0,0,0,0,0,NULL,&atx[868],NULL,0,NULL} ,
+ {402, "Trna-ext" ,1,0,0,0,0,1,0,0,NULL,&atx[13],&atx[869],0,&atx[877]} ,
+ {0, "aa" ,128,0,0,1,0,0,0,0,NULL,&atx[14],&atx[870],0,&atx[874]} ,
+ {0, "iupacaa" ,128,0,0,0,0,0,0,0,NULL,&atx[6],NULL,0,&atx[871]} ,
+ {0, "ncbieaa" ,128,1,0,0,0,0,0,0,NULL,&atx[6],NULL,0,&atx[872]} ,
+ {0, "ncbi8aa" ,128,2,0,0,0,0,0,0,NULL,&atx[6],NULL,0,&atx[873]} ,
{0, "ncbistdaa" ,128,3,0,0,0,0,0,0,NULL,&atx[6],NULL,0,NULL} ,
- {0, "codon" ,128,1,0,1,0,0,0,0,NULL,&atx[45],&atx[860],0,&atx[861]} ,
+ {0, "codon" ,128,1,0,1,0,0,0,0,NULL,&atx[45],&atx[875],0,&atx[876]} ,
{0, NULL,1,-1,0,0,0,0,0,0,NULL,&atx[6],NULL,0,NULL} ,
- {0, "anticodon" ,128,2,0,1,0,0,0,0,NULL,&atx[862],NULL,0,NULL} ,
+ {0, "anticodon" ,128,2,0,1,0,0,0,0,NULL,&atx[877],NULL,0,NULL} ,
{403, "Seq-loc" ,1,0,0,0,0,0,1,0,NULL,&atx[384],NULL,0,NULL} ,
- {0, "pub" ,128,5,0,0,0,0,0,0,NULL,&atx[864],NULL,0,&atx[865]} ,
- {411, "Pubdesc" ,1,0,0,0,0,0,1,0,NULL,&atx[626],NULL,0,&atx[913]} ,
- {0, "seq" ,128,6,0,0,0,0,0,0,NULL,&atx[825],NULL,0,&atx[866]} ,
- {0, "imp" ,128,7,0,0,0,0,0,0,NULL,&atx[867],NULL,0,&atx[871]} ,
- {424, "Imp-feat" ,1,0,0,0,0,0,0,0,NULL,&atx[13],&atx[868],0,&atx[823]} ,
- {0, "key" ,128,0,0,0,0,0,0,0,NULL,&atx[2],NULL,0,&atx[869]} ,
- {0, "loc" ,128,1,0,1,0,0,0,0,NULL,&atx[2],NULL,0,&atx[870]} ,
+ {0, "pub" ,128,5,0,0,0,0,0,0,NULL,&atx[879],NULL,0,&atx[880]} ,
+ {411, "Pubdesc" ,1,0,0,0,0,0,1,0,NULL,&atx[440],NULL,0,&atx[928]} ,
+ {0, "seq" ,128,6,0,0,0,0,0,0,NULL,&atx[840],NULL,0,&atx[881]} ,
+ {0, "imp" ,128,7,0,0,0,0,0,0,NULL,&atx[882],NULL,0,&atx[886]} ,
+ {424, "Imp-feat" ,1,0,0,0,0,0,0,0,NULL,&atx[13],&atx[883],0,&atx[838]} ,
+ {0, "key" ,128,0,0,0,0,0,0,0,NULL,&atx[2],NULL,0,&atx[884]} ,
+ {0, "loc" ,128,1,0,1,0,0,0,0,NULL,&atx[2],NULL,0,&atx[885]} ,
{0, "descr" ,128,2,0,1,0,0,0,0,NULL,&atx[2],NULL,0,NULL} ,
- {0, "region" ,128,8,0,0,0,0,0,0,NULL,&atx[2],NULL,0,&atx[872]} ,
- {0, "comment" ,128,9,0,0,0,0,0,0,NULL,&atx[386],NULL,0,&atx[873]} ,
- {0, "bond" ,128,10,0,0,0,0,0,0,NULL,&atx[42],&avnx[386],0,&atx[874]} ,
- {0, "site" ,128,11,0,0,0,0,0,0,NULL,&atx[42],&avnx[391],0,&atx[875]} ,
- {0, "rsite" ,128,12,0,0,0,0,0,0,NULL,&atx[876],NULL,0,&atx[881]} ,
- {414, "Rsite-ref" ,1,0,0,0,0,0,1,0,NULL,&atx[877],NULL,0,&atx[884]} ,
- {401, "Rsite-ref" ,1,0,0,0,0,1,0,0,NULL,&atx[14],&atx[878],0,&atx[880]} ,
- {0, "str" ,128,0,0,0,0,0,0,0,NULL,&atx[2],NULL,0,&atx[879]} ,
- {0, "db" ,128,1,0,0,0,0,0,0,NULL,&atx[880],NULL,0,NULL} ,
+ {0, "region" ,128,8,0,0,0,0,0,0,NULL,&atx[2],NULL,0,&atx[887]} ,
+ {0, "comment" ,128,9,0,0,0,0,0,0,NULL,&atx[386],NULL,0,&atx[888]} ,
+ {0, "bond" ,128,10,0,0,0,0,0,0,NULL,&atx[42],&avnx[390],0,&atx[889]} ,
+ {0, "site" ,128,11,0,0,0,0,0,0,NULL,&atx[42],&avnx[395],0,&atx[890]} ,
+ {0, "rsite" ,128,12,0,0,0,0,0,0,NULL,&atx[891],NULL,0,&atx[896]} ,
+ {414, "Rsite-ref" ,1,0,0,0,0,0,1,0,NULL,&atx[892],NULL,0,&atx[899]} ,
+ {401, "Rsite-ref" ,1,0,0,0,0,1,0,0,NULL,&atx[14],&atx[893],0,&atx[895]} ,
+ {0, "str" ,128,0,0,0,0,0,0,0,NULL,&atx[2],NULL,0,&atx[894]} ,
+ {0, "db" ,128,1,0,0,0,0,0,0,NULL,&atx[895],NULL,0,NULL} ,
{402, "Dbtag" ,1,0,0,0,0,0,1,0,NULL,&atx[17],NULL,0,NULL} ,
- {0, "user" ,128,13,0,0,0,0,0,0,NULL,&atx[882],NULL,0,&atx[883]} ,
- {419, "User-object" ,1,0,0,0,0,0,1,0,NULL,&atx[46],NULL,0,&atx[786]} ,
- {0, "txinit" ,128,14,0,0,0,0,0,0,NULL,&atx[884],NULL,0,&atx[912]} ,
- {415, "Txinit" ,1,0,0,0,0,0,1,0,NULL,&atx[885],NULL,0,&atx[933]} ,
- {401, "Txinit" ,1,0,0,0,0,1,0,0,NULL,&atx[13],&atx[886],0,&atx[891]} ,
- {0, "name" ,128,0,0,0,0,0,0,0,NULL,&atx[2],NULL,0,&atx[887]} ,
- {0, "syn" ,128,1,0,1,0,0,0,0,NULL,&atx[66],&atx[888],0,&atx[889]} ,
+ {0, "user" ,128,13,0,0,0,0,0,0,NULL,&atx[897],NULL,0,&atx[898]} ,
+ {419, "User-object" ,1,0,0,0,0,0,1,0,NULL,&atx[46],NULL,0,&atx[801]} ,
+ {0, "txinit" ,128,14,0,0,0,0,0,0,NULL,&atx[899],NULL,0,&atx[927]} ,
+ {415, "Txinit" ,1,0,0,0,0,0,1,0,NULL,&atx[900],NULL,0,&atx[948]} ,
+ {401, "Txinit" ,1,0,0,0,0,1,0,0,NULL,&atx[13],&atx[901],0,&atx[906]} ,
+ {0, "name" ,128,0,0,0,0,0,0,0,NULL,&atx[2],NULL,0,&atx[902]} ,
+ {0, "syn" ,128,1,0,1,0,0,0,0,NULL,&atx[66],&atx[903],0,&atx[904]} ,
{0, NULL,1,-1,0,0,0,0,0,0,NULL,&atx[2],NULL,0,NULL} ,
- {0, "gene" ,128,2,0,1,0,0,0,0,NULL,&atx[66],&atx[890],0,&atx[892]} ,
- {0, NULL,1,-1,0,0,0,0,0,0,NULL,&atx[891],NULL,0,NULL} ,
- {402, "Gene-ref" ,1,0,0,0,0,0,1,0,NULL,&atx[789],NULL,0,&atx[894]} ,
- {0, "protein" ,128,3,0,1,0,0,0,0,NULL,&atx[66],&atx[893],0,&atx[895]} ,
- {0, NULL,1,-1,0,0,0,0,0,0,NULL,&atx[894],NULL,0,NULL} ,
- {403, "Prot-ref" ,1,0,0,0,0,0,1,0,NULL,&atx[833],NULL,0,&atx[901]} ,
- {0, "rna" ,128,4,0,1,0,0,0,0,NULL,&atx[66],&atx[896],0,&atx[897]} ,
+ {0, "gene" ,128,2,0,1,0,0,0,0,NULL,&atx[66],&atx[905],0,&atx[907]} ,
+ {0, NULL,1,-1,0,0,0,0,0,0,NULL,&atx[906],NULL,0,NULL} ,
+ {402, "Gene-ref" ,1,0,0,0,0,0,1,0,NULL,&atx[804],NULL,0,&atx[909]} ,
+ {0, "protein" ,128,3,0,1,0,0,0,0,NULL,&atx[66],&atx[908],0,&atx[910]} ,
+ {0, NULL,1,-1,0,0,0,0,0,0,NULL,&atx[909],NULL,0,NULL} ,
+ {403, "Prot-ref" ,1,0,0,0,0,0,1,0,NULL,&atx[848],NULL,0,&atx[916]} ,
+ {0, "rna" ,128,4,0,1,0,0,0,0,NULL,&atx[66],&atx[911],0,&atx[912]} ,
{0, NULL,1,-1,0,0,0,0,0,0,NULL,&atx[2],NULL,0,NULL} ,
- {0, "expression" ,128,5,0,1,0,0,0,0,NULL,&atx[2],NULL,0,&atx[898]} ,
- {0, "txsystem" ,128,6,0,0,0,0,0,0,NULL,&atx[42],&avnx[417],0,&atx[899]} ,
- {0, "txdescr" ,128,7,0,1,0,0,0,0,NULL,&atx[2],NULL,0,&atx[900]} ,
- {0, "txorg" ,128,8,0,1,0,0,0,0,NULL,&atx[901],NULL,0,&atx[902]} ,
- {404, "Org-ref" ,1,0,0,0,0,0,1,0,NULL,&atx[454],NULL,0,&atx[907]} ,
- {0, "mapping-precise" ,128,9,0,0,1,0,0,0,&avnx[426],&atx[60],NULL,0,&atx[903]} ,
- {0, "location-accurate" ,128,10,0,0,1,0,0,0,&avnx[427],&atx[60],NULL,0,&atx[904]} ,
- {0, "inittype" ,128,11,0,1,0,0,0,0,NULL,&atx[42],&avnx[428],0,&atx[905]} ,
- {0, "evidence" ,128,12,0,1,0,0,0,0,NULL,&atx[45],&atx[906],0,NULL} ,
- {0, NULL,1,-1,0,0,0,0,0,0,NULL,&atx[907],NULL,0,NULL} ,
- {405, "Tx-evidence" ,1,0,0,0,0,0,0,0,NULL,&atx[13],&atx[908],0,NULL} ,
- {0, "exp-code" ,128,0,0,0,0,0,0,0,NULL,&atx[42],&avnx[432],0,&atx[909]} ,
- {0, "expression-system" ,128,1,0,0,1,0,0,0,&avnx[451],&atx[42],&avnx[444],0,&atx[910]} ,
- {0, "low-prec-data" ,128,2,0,0,1,0,0,0,&avnx[452],&atx[60],NULL,0,&atx[911]} ,
- {0, "from-homolog" ,128,3,0,0,1,0,0,0,&avnx[453],&atx[60],NULL,0,NULL} ,
- {0, "num" ,128,15,0,0,0,0,0,0,NULL,&atx[913],NULL,0,&atx[914]} ,
- {412, "Numbering" ,1,0,0,0,0,0,1,0,NULL,&atx[497],NULL,0,&atx[917]} ,
- {0, "psec-str" ,128,16,0,0,0,0,0,0,NULL,&atx[42],&avnx[454],0,&atx[915]} ,
- {0, "non-std-residue" ,128,17,0,0,0,0,0,0,NULL,&atx[2],NULL,0,&atx[916]} ,
- {0, "het" ,128,18,0,0,0,0,0,0,NULL,&atx[917],NULL,0,&atx[918]} ,
- {413, "Heterogen" ,1,0,0,0,0,0,1,0,NULL,&atx[719],NULL,0,&atx[876]} ,
- {0, "biosrc" ,128,19,0,0,0,0,0,0,NULL,&atx[919],NULL,0,NULL} ,
- {407, "BioSource" ,1,0,0,0,0,0,1,0,NULL,&atx[722],NULL,0,&atx[846]} ,
- {0, "partial" ,128,2,0,1,0,0,0,0,NULL,&atx[60],NULL,0,&atx[921]} ,
- {0, "except" ,128,3,0,1,0,0,0,0,NULL,&atx[60],NULL,0,&atx[922]} ,
- {0, "comment" ,128,4,0,1,0,0,0,0,NULL,&atx[2],NULL,0,&atx[923]} ,
- {0, "product" ,128,5,0,1,0,0,0,0,NULL,&atx[825],NULL,0,&atx[924]} ,
- {0, "location" ,128,6,0,0,0,0,0,0,NULL,&atx[825],NULL,0,&atx[925]} ,
- {0, "qual" ,128,7,0,1,0,0,0,0,NULL,&atx[66],&atx[926],0,&atx[930]} ,
- {0, NULL,1,-1,0,0,0,0,0,0,NULL,&atx[927],NULL,0,NULL} ,
- {421, "Gb-qual" ,1,0,0,0,0,0,0,0,NULL,&atx[13],&atx[928],0,&atx[937]} ,
- {0, "qual" ,128,0,0,0,0,0,0,0,NULL,&atx[2],NULL,0,&atx[929]} ,
+ {0, "expression" ,128,5,0,1,0,0,0,0,NULL,&atx[2],NULL,0,&atx[913]} ,
+ {0, "txsystem" ,128,6,0,0,0,0,0,0,NULL,&atx[42],&avnx[421],0,&atx[914]} ,
+ {0, "txdescr" ,128,7,0,1,0,0,0,0,NULL,&atx[2],NULL,0,&atx[915]} ,
+ {0, "txorg" ,128,8,0,1,0,0,0,0,NULL,&atx[916],NULL,0,&atx[917]} ,
+ {404, "Org-ref" ,1,0,0,0,0,0,1,0,NULL,&atx[579],NULL,0,&atx[922]} ,
+ {0, "mapping-precise" ,128,9,0,0,1,0,0,0,&avnx[430],&atx[60],NULL,0,&atx[918]} ,
+ {0, "location-accurate" ,128,10,0,0,1,0,0,0,&avnx[431],&atx[60],NULL,0,&atx[919]} ,
+ {0, "inittype" ,128,11,0,1,0,0,0,0,NULL,&atx[42],&avnx[432],0,&atx[920]} ,
+ {0, "evidence" ,128,12,0,1,0,0,0,0,NULL,&atx[45],&atx[921],0,NULL} ,
+ {0, NULL,1,-1,0,0,0,0,0,0,NULL,&atx[922],NULL,0,NULL} ,
+ {405, "Tx-evidence" ,1,0,0,0,0,0,0,0,NULL,&atx[13],&atx[923],0,NULL} ,
+ {0, "exp-code" ,128,0,0,0,0,0,0,0,NULL,&atx[42],&avnx[436],0,&atx[924]} ,
+ {0, "expression-system" ,128,1,0,0,1,0,0,0,&avnx[455],&atx[42],&avnx[448],0,&atx[925]} ,
+ {0, "low-prec-data" ,128,2,0,0,1,0,0,0,&avnx[456],&atx[60],NULL,0,&atx[926]} ,
+ {0, "from-homolog" ,128,3,0,0,1,0,0,0,&avnx[457],&atx[60],NULL,0,NULL} ,
+ {0, "num" ,128,15,0,0,0,0,0,0,NULL,&atx[928],NULL,0,&atx[929]} ,
+ {412, "Numbering" ,1,0,0,0,0,0,1,0,NULL,&atx[446],NULL,0,&atx[932]} ,
+ {0, "psec-str" ,128,16,0,0,0,0,0,0,NULL,&atx[42],&avnx[458],0,&atx[930]} ,
+ {0, "non-std-residue" ,128,17,0,0,0,0,0,0,NULL,&atx[2],NULL,0,&atx[931]} ,
+ {0, "het" ,128,18,0,0,0,0,0,0,NULL,&atx[932],NULL,0,&atx[933]} ,
+ {413, "Heterogen" ,1,0,0,0,0,0,1,0,NULL,&atx[735],NULL,0,&atx[891]} ,
+ {0, "biosrc" ,128,19,0,0,0,0,0,0,NULL,&atx[934],NULL,0,NULL} ,
+ {407, "BioSource" ,1,0,0,0,0,0,1,0,NULL,&atx[738],NULL,0,&atx[861]} ,
+ {0, "partial" ,128,2,0,1,0,0,0,0,NULL,&atx[60],NULL,0,&atx[936]} ,
+ {0, "except" ,128,3,0,1,0,0,0,0,NULL,&atx[60],NULL,0,&atx[937]} ,
+ {0, "comment" ,128,4,0,1,0,0,0,0,NULL,&atx[2],NULL,0,&atx[938]} ,
+ {0, "product" ,128,5,0,1,0,0,0,0,NULL,&atx[840],NULL,0,&atx[939]} ,
+ {0, "location" ,128,6,0,0,0,0,0,0,NULL,&atx[840],NULL,0,&atx[940]} ,
+ {0, "qual" ,128,7,0,1,0,0,0,0,NULL,&atx[66],&atx[941],0,&atx[945]} ,
+ {0, NULL,1,-1,0,0,0,0,0,0,NULL,&atx[942],NULL,0,NULL} ,
+ {421, "Gb-qual" ,1,0,0,0,0,0,0,0,NULL,&atx[13],&atx[943],0,&atx[952]} ,
+ {0, "qual" ,128,0,0,0,0,0,0,0,NULL,&atx[2],NULL,0,&atx[944]} ,
{0, "val" ,128,1,0,0,0,0,0,0,NULL,&atx[2],NULL,0,NULL} ,
- {0, "title" ,128,8,0,1,0,0,0,0,NULL,&atx[2],NULL,0,&atx[931]} ,
- {0, "ext" ,128,9,0,1,0,0,0,0,NULL,&atx[882],NULL,0,&atx[932]} ,
- {0, "cit" ,128,10,0,1,0,0,0,0,NULL,&atx[933],NULL,0,&atx[934]} ,
+ {0, "title" ,128,8,0,1,0,0,0,0,NULL,&atx[2],NULL,0,&atx[946]} ,
+ {0, "ext" ,128,9,0,1,0,0,0,0,NULL,&atx[897],NULL,0,&atx[947]} ,
+ {0, "cit" ,128,10,0,1,0,0,0,0,NULL,&atx[948],NULL,0,&atx[949]} ,
{416, "Pub-set" ,1,0,0,0,0,0,1,0,NULL,&atx[329],NULL,0,&atx[432]} ,
- {0, "exp-ev" ,128,11,0,1,0,0,0,0,NULL,&atx[42],&avnx[457],0,&atx[935]} ,
- {0, "xref" ,128,12,0,1,0,0,0,0,NULL,&atx[45],&atx[936],0,&atx[940]} ,
- {0, NULL,1,-1,0,0,0,0,0,0,NULL,&atx[937],NULL,0,NULL} ,
- {422, "SeqFeatXref" ,1,0,0,0,0,0,0,0,NULL,&atx[13],&atx[938],0,&atx[804]} ,
- {0, "id" ,128,0,0,1,0,0,0,0,NULL,&atx[427],NULL,0,&atx[939]} ,
- {0, "data" ,128,1,0,1,0,0,0,0,NULL,&atx[786],NULL,0,NULL} ,
- {0, "dbxref" ,128,13,0,1,0,0,0,0,NULL,&atx[45],&atx[941],0,&atx[942]} ,
+ {0, "exp-ev" ,128,11,0,1,0,0,0,0,NULL,&atx[42],&avnx[461],0,&atx[950]} ,
+ {0, "xref" ,128,12,0,1,0,0,0,0,NULL,&atx[45],&atx[951],0,&atx[955]} ,
+ {0, NULL,1,-1,0,0,0,0,0,0,NULL,&atx[952],NULL,0,NULL} ,
+ {422, "SeqFeatXref" ,1,0,0,0,0,0,0,0,NULL,&atx[13],&atx[953],0,&atx[819]} ,
+ {0, "id" ,128,0,0,1,0,0,0,0,NULL,&atx[427],NULL,0,&atx[954]} ,
+ {0, "data" ,128,1,0,1,0,0,0,0,NULL,&atx[801],NULL,0,NULL} ,
+ {0, "dbxref" ,128,13,0,1,0,0,0,0,NULL,&atx[45],&atx[956],0,&atx[957]} ,
{0, NULL,1,-1,0,0,0,0,0,0,NULL,&atx[434],NULL,0,NULL} ,
- {0, "pseudo" ,128,14,0,1,0,0,0,0,NULL,&atx[60],NULL,0,&atx[943]} ,
+ {0, "pseudo" ,128,14,0,1,0,0,0,0,NULL,&atx[60],NULL,0,&atx[958]} ,
{0, "except-text" ,128,15,0,1,0,0,0,0,NULL,&atx[2],NULL,0,NULL} ,
- {0, "delta" ,128,3,0,0,0,0,0,0,NULL,&atx[945],NULL,0,NULL} ,
- {443, "Delta-ext" ,1,0,0,0,0,0,0,0,NULL,&atx[66],&atx[946],0,&atx[947]} ,
- {0, NULL,1,-1,0,0,0,0,0,0,NULL,&atx[947],NULL,0,NULL} ,
- {444, "Delta-seq" ,1,0,0,0,0,0,0,0,NULL,&atx[14],&atx[948],0,&atx[959]} ,
- {0, "loc" ,128,0,0,0,0,0,0,0,NULL,&atx[776],NULL,0,&atx[949]} ,
- {0, "literal" ,128,1,0,0,0,0,0,0,NULL,&atx[950],NULL,0,NULL} ,
- {410, "Seq-literal" ,1,0,0,0,0,1,0,0,NULL,&atx[13],&atx[951],0,&atx[688]} ,
- {0, "length" ,128,0,0,0,0,0,0,0,NULL,&atx[6],NULL,0,&atx[952]} ,
- {0, "fuzz" ,128,1,0,1,0,0,0,0,NULL,&atx[746],NULL,0,&atx[953]} ,
- {0, "seq-data" ,128,2,0,1,0,0,0,0,NULL,&atx[750],NULL,0,NULL} ,
- {0, "hist" ,128,8,0,1,0,0,0,0,NULL,&atx[955],NULL,0,NULL} ,
- {408, "Seq-hist" ,1,0,0,0,0,1,0,0,NULL,&atx[13],&atx[956],0,&atx[444]} ,
- {0, "assembly" ,128,0,0,1,0,0,0,0,NULL,&atx[45],&atx[957],0,&atx[958]} ,
- {0, NULL,1,-1,0,0,0,0,0,0,NULL,&atx[512],NULL,0,NULL} ,
- {0, "replaces" ,128,1,0,1,0,0,0,0,NULL,&atx[959],NULL,0,&atx[963]} ,
- {445, "Seq-hist-rec" ,1,0,0,0,0,0,0,0,NULL,&atx[13],&atx[960],0,&atx[752]} ,
- {0, "date" ,128,0,0,1,0,0,0,0,NULL,&atx[688],NULL,0,&atx[961]} ,
- {0, "ids" ,128,1,0,0,0,0,0,0,NULL,&atx[45],&atx[962],0,NULL} ,
- {0, NULL,1,-1,0,0,0,0,0,0,NULL,&atx[438],NULL,0,NULL} ,
- {0, "replaced-by" ,128,2,0,1,0,0,0,0,NULL,&atx[959],NULL,0,&atx[964]} ,
- {0, "deleted" ,128,3,0,1,0,0,0,0,NULL,&atx[14],&atx[965],0,NULL} ,
- {0, "bool" ,128,0,0,0,0,0,0,0,NULL,&atx[60],NULL,0,&atx[966]} ,
- {0, "date" ,128,1,0,0,0,0,0,0,NULL,&atx[688],NULL,0,NULL} ,
- {0, "annot" ,128,3,0,1,0,0,0,0,NULL,&atx[45],&atx[968],0,NULL} ,
- {0, NULL,1,-1,0,0,0,0,0,0,NULL,&atx[969],NULL,0,NULL} ,
- {402, "Seq-annot" ,1,0,0,0,0,1,0,0,NULL,&atx[13],&atx[970],0,&atx[626]} ,
- {0, "id" ,128,0,0,1,0,0,0,0,NULL,&atx[45],&atx[971],0,&atx[977]} ,
- {0, NULL,1,-1,0,0,0,0,0,0,NULL,&atx[972],NULL,0,NULL} ,
- {456, "Annot-id" ,1,0,0,0,0,0,0,0,NULL,&atx[14],&atx[973],0,&atx[980]} ,
- {0, "local" ,128,0,0,0,0,0,0,0,NULL,&atx[974],NULL,0,&atx[975]} ,
- {414, "Object-id" ,1,0,0,0,0,0,1,0,NULL,&atx[20],NULL,0,&atx[642]} ,
- {0, "ncbi" ,128,1,0,0,0,0,0,0,NULL,&atx[6],NULL,0,&atx[976]} ,
- {0, "general" ,128,2,0,0,0,0,0,0,NULL,&atx[591],NULL,0,NULL} ,
- {0, "db" ,128,1,0,1,0,0,0,0,NULL,&atx[6],&avnx[459],0,&atx[978]} ,
- {0, "name" ,128,2,0,1,0,0,0,0,NULL,&atx[2],NULL,0,&atx[979]} ,
- {0, "desc" ,128,3,0,1,0,0,0,0,NULL,&atx[980],NULL,0,&atx[997]} ,
- {457, "Annot-descr" ,1,0,0,0,0,0,0,0,NULL,&atx[45],&atx[981],0,&atx[982]} ,
- {0, NULL,1,-1,0,0,0,0,0,0,NULL,&atx[982],NULL,0,NULL} ,
- {458, "Annotdesc" ,1,0,0,0,0,0,0,0,NULL,&atx[14],&atx[983],0,&atx[992]} ,
- {0, "name" ,128,0,0,0,0,0,0,0,NULL,&atx[2],NULL,0,&atx[984]} ,
- {0, "title" ,128,1,0,0,0,0,0,0,NULL,&atx[2],NULL,0,&atx[985]} ,
- {0, "comment" ,128,2,0,0,0,0,0,0,NULL,&atx[2],NULL,0,&atx[986]} ,
- {0, "pub" ,128,3,0,0,0,0,0,0,NULL,&atx[626],NULL,0,&atx[987]} ,
- {0, "user" ,128,4,0,0,0,0,0,0,NULL,&atx[642],NULL,0,&atx[988]} ,
- {0, "create-date" ,128,5,0,0,0,0,0,0,NULL,&atx[688],NULL,0,&atx[989]} ,
- {0, "update-date" ,128,6,0,0,0,0,0,0,NULL,&atx[688],NULL,0,&atx[990]} ,
- {0, "src" ,128,7,0,0,0,0,0,0,NULL,&atx[438],NULL,0,&atx[991]} ,
- {0, "align" ,128,8,0,0,0,0,0,0,NULL,&atx[992],NULL,0,&atx[996]} ,
- {459, "Align-def" ,1,0,0,0,0,0,0,0,NULL,&atx[13],&atx[993],0,NULL} ,
- {0, "align-type" ,128,0,0,0,0,0,0,0,NULL,&atx[6],&avnx[467],0,&atx[994]} ,
- {0, "ids" ,128,1,0,1,0,0,0,0,NULL,&atx[45],&atx[995],0,NULL} ,
- {0, NULL,1,-1,0,0,0,0,0,0,NULL,&atx[438],NULL,0,NULL} ,
- {0, "region" ,128,9,0,0,0,0,0,0,NULL,&atx[776],NULL,0,NULL} ,
+ {0, "delta" ,128,3,0,0,0,0,0,0,NULL,&atx[960],NULL,0,NULL} ,
+ {444, "Delta-ext" ,1,0,0,0,0,0,0,0,NULL,&atx[66],&atx[961],0,&atx[962]} ,
+ {0, NULL,1,-1,0,0,0,0,0,0,NULL,&atx[962],NULL,0,NULL} ,
+ {445, "Delta-seq" ,1,0,0,0,0,0,0,0,NULL,&atx[14],&atx[963],0,&atx[974]} ,
+ {0, "loc" ,128,0,0,0,0,0,0,0,NULL,&atx[560],NULL,0,&atx[964]} ,
+ {0, "literal" ,128,1,0,0,0,0,0,0,NULL,&atx[965],NULL,0,NULL} ,
+ {410, "Seq-literal" ,1,0,0,0,0,1,0,0,NULL,&atx[13],&atx[966],0,&atx[567]} ,
+ {0, "length" ,128,0,0,0,0,0,0,0,NULL,&atx[6],NULL,0,&atx[967]} ,
+ {0, "fuzz" ,128,1,0,1,0,0,0,0,NULL,&atx[762],NULL,0,&atx[968]} ,
+ {0, "seq-data" ,128,2,0,1,0,0,0,0,NULL,&atx[766],NULL,0,NULL} ,
+ {0, "hist" ,128,8,0,1,0,0,0,0,NULL,&atx[970],NULL,0,NULL} ,
+ {409, "Seq-hist" ,1,0,0,0,0,1,0,0,NULL,&atx[13],&atx[971],0,&atx[965]} ,
+ {0, "assembly" ,128,0,0,1,0,0,0,0,NULL,&atx[45],&atx[972],0,&atx[973]} ,
+ {0, NULL,1,-1,0,0,0,0,0,0,NULL,&atx[461],NULL,0,NULL} ,
+ {0, "replaces" ,128,1,0,1,0,0,0,0,NULL,&atx[974],NULL,0,&atx[978]} ,
+ {446, "Seq-hist-rec" ,1,0,0,0,0,0,0,0,NULL,&atx[13],&atx[975],0,&atx[768]} ,
+ {0, "date" ,128,0,0,1,0,0,0,0,NULL,&atx[550],NULL,0,&atx[976]} ,
+ {0, "ids" ,128,1,0,0,0,0,0,0,NULL,&atx[45],&atx[977],0,NULL} ,
+ {0, NULL,1,-1,0,0,0,0,0,0,NULL,&atx[553],NULL,0,NULL} ,
+ {0, "replaced-by" ,128,2,0,1,0,0,0,0,NULL,&atx[974],NULL,0,&atx[979]} ,
+ {0, "deleted" ,128,3,0,1,0,0,0,0,NULL,&atx[14],&atx[980],0,NULL} ,
+ {0, "bool" ,128,0,0,0,0,0,0,0,NULL,&atx[60],NULL,0,&atx[981]} ,
+ {0, "date" ,128,1,0,0,0,0,0,0,NULL,&atx[550],NULL,0,NULL} ,
+ {0, "annot" ,128,3,0,1,0,0,0,0,NULL,&atx[45],&atx[983],0,NULL} ,
+ {0, NULL,1,-1,0,0,0,0,0,0,NULL,&atx[984],NULL,0,NULL} ,
+ {407, "Seq-annot" ,1,0,0,0,0,1,0,0,NULL,&atx[13],&atx[985],0,&atx[565]} ,
+ {0, "id" ,128,0,0,1,0,0,0,0,NULL,&atx[45],&atx[986],0,&atx[992]} ,
+ {0, NULL,1,-1,0,0,0,0,0,0,NULL,&atx[987],NULL,0,NULL} ,
+ {457, "Annot-id" ,1,0,0,0,0,0,0,0,NULL,&atx[14],&atx[988],0,&atx[995]} ,
+ {0, "local" ,128,0,0,0,0,0,0,0,NULL,&atx[989],NULL,0,&atx[990]} ,
+ {415, "Object-id" ,1,0,0,0,0,0,1,0,NULL,&atx[20],NULL,0,&atx[548]} ,
+ {0, "ncbi" ,128,1,0,0,0,0,0,0,NULL,&atx[6],NULL,0,&atx[991]} ,
+ {0, "general" ,128,2,0,0,0,0,0,0,NULL,&atx[623],NULL,0,NULL} ,
+ {0, "db" ,128,1,0,1,0,0,0,0,NULL,&atx[6],&avnx[463],0,&atx[993]} ,
+ {0, "name" ,128,2,0,1,0,0,0,0,NULL,&atx[2],NULL,0,&atx[994]} ,
+ {0, "desc" ,128,3,0,1,0,0,0,0,NULL,&atx[995],NULL,0,&atx[997]} ,
+ {458, "Annot-descr" ,1,0,0,0,0,0,0,0,NULL,&atx[45],&atx[996],0,&atx[555]} ,
+ {0, NULL,1,-1,0,0,0,0,0,0,NULL,&atx[435],NULL,0,NULL} ,
{0, "data" ,128,4,0,0,0,0,0,0,NULL,&atx[14],&atx[998],0,NULL} ,
{0, "ftable" ,128,0,0,0,0,0,0,0,NULL,&atx[45],&atx[999],0,&atx[1000]} ,
- {0, NULL,1,-1,0,0,0,0,0,0,NULL,&atx[782],NULL,0,NULL} ,
+ {0, NULL,1,-1,0,0,0,0,0,0,NULL,&atx[797],NULL,0,NULL} ,
{0, "align" ,128,1,0,0,0,0,0,0,NULL,&atx[45],&atx[1001],0,&atx[1002]} ,
- {0, NULL,1,-1,0,0,0,0,0,0,NULL,&atx[512],NULL,0,NULL} ,
+ {0, NULL,1,-1,0,0,0,0,0,0,NULL,&atx[461],NULL,0,NULL} ,
{0, "graph" ,128,2,0,0,0,0,0,0,NULL,&atx[45],&atx[1003],0,&atx[1037]} ,
{0, NULL,1,-1,0,0,0,0,0,0,NULL,&atx[1004],NULL,0,NULL} ,
- {418, "Seq-graph" ,1,0,0,0,0,0,1,0,NULL,&atx[1005],NULL,0,&atx[628]} ,
+ {419, "Seq-graph" ,1,0,0,0,0,0,1,0,NULL,&atx[1005],NULL,0,&atx[442]} ,
{401, "Seq-graph" ,1,0,0,0,0,1,0,0,NULL,&atx[13],&atx[1006],0,&atx[1009]} ,
{0, "title" ,128,0,0,1,0,0,0,0,NULL,&atx[2],NULL,0,&atx[1007]} ,
{0, "comment" ,128,1,0,1,0,0,0,0,NULL,&atx[2],NULL,0,&atx[1008]} ,
@@ -1658,9 +1639,9 @@ static AsnType atx[1592] = {
{0, "axis" ,128,2,0,0,0,0,0,0,NULL,&atx[6],NULL,0,&atx[1036]} ,
{0, "values" ,128,3,0,0,0,0,0,0,NULL,&atx[62],NULL,0,NULL} ,
{0, "ids" ,128,3,0,0,0,0,0,0,NULL,&atx[45],&atx[1038],0,&atx[1039]} ,
- {0, NULL,1,-1,0,0,0,0,0,0,NULL,&atx[438],NULL,0,NULL} ,
+ {0, NULL,1,-1,0,0,0,0,0,0,NULL,&atx[553],NULL,0,NULL} ,
{0, "locs" ,128,4,0,0,0,0,0,0,NULL,&atx[45],&atx[1040],0,NULL} ,
- {0, NULL,1,-1,0,0,0,0,0,0,NULL,&atx[776],NULL,0,NULL} ,
+ {0, NULL,1,-1,0,0,0,0,0,0,NULL,&atx[560],NULL,0,NULL} ,
{401, "Bioseq-set" ,1,0,0,0,0,1,0,0,NULL,&atx[13],&atx[1042],0,&atx[1055]} ,
{0, "id" ,128,0,0,1,0,0,0,0,NULL,&atx[1043],NULL,0,&atx[1044]} ,
{406, "Object-id" ,1,0,0,0,0,0,1,0,NULL,&atx[20],NULL,0,&atx[1045]} ,
@@ -1672,16 +1653,16 @@ static AsnType atx[1592] = {
{0, "date" ,128,5,0,1,0,0,0,0,NULL,&atx[1050],NULL,0,&atx[1051]} ,
{408, "Date" ,1,0,0,0,0,0,1,0,NULL,&atx[0],NULL,0,NULL} ,
{0, "descr" ,128,6,0,1,0,0,0,0,NULL,&atx[1052],NULL,0,&atx[1053]} ,
- {405, "Seq-descr" ,1,0,0,0,0,0,1,0,NULL,&atx[440],NULL,0,&atx[1043]} ,
+ {405, "Seq-descr" ,1,0,0,0,0,0,1,0,NULL,&atx[565],NULL,0,&atx[1043]} ,
{0, "seq-set" ,128,7,0,0,0,0,0,0,NULL,&atx[66],&atx[1054],0,&atx[1059]} ,
{0, NULL,1,-1,0,0,0,0,0,0,NULL,&atx[1055],NULL,0,NULL} ,
{402, "Seq-entry" ,1,0,0,0,0,1,0,0,NULL,&atx[14],&atx[1056],0,&atx[1057]} ,
{0, "seq" ,128,0,0,0,0,0,0,0,NULL,&atx[1057],NULL,0,&atx[1058]} ,
- {403, "Bioseq" ,1,0,0,0,0,0,1,0,NULL,&atx[435],NULL,0,&atx[1061]} ,
+ {403, "Bioseq" ,1,0,0,0,0,0,1,0,NULL,&atx[561],NULL,0,&atx[1061]} ,
{0, "set" ,128,1,0,0,0,0,0,0,NULL,&atx[1041],NULL,0,NULL} ,
{0, "annot" ,128,8,0,1,0,0,0,0,NULL,&atx[45],&atx[1060],0,NULL} ,
{0, NULL,1,-1,0,0,0,0,0,0,NULL,&atx[1061],NULL,0,NULL} ,
- {404, "Seq-annot" ,1,0,0,0,0,0,1,0,NULL,&atx[969],NULL,0,&atx[1052]} ,
+ {404, "Seq-annot" ,1,0,0,0,0,0,1,0,NULL,&atx[984],NULL,0,&atx[1052]} ,
{401, "Seq-code-table" ,1,0,0,0,0,1,0,0,NULL,&atx[13],&atx[1063],0,&atx[1074]} ,
{0, "code" ,128,0,0,0,0,0,0,0,NULL,&atx[1064],NULL,0,&atx[1065]} ,
{404, "Seq-code-type" ,1,0,0,0,0,0,0,0,NULL,&atx[42],&avnx[492],0,NULL} ,
@@ -1707,9 +1688,9 @@ static AsnType atx[1592] = {
{0, "maps" ,128,1,0,1,0,0,0,0,NULL,&atx[45],&atx[1085],0,NULL} ,
{0, NULL,1,-1,0,0,0,0,0,0,NULL,&atx[1074],NULL,0,NULL} ,
{426, "Genetic-code-table" ,1,0,0,0,0,0,0,0,NULL,&atx[45],&atx[1087],0,NULL} ,
- {0, NULL,1,-1,0,0,0,0,0,0,NULL,&atx[811],NULL,0,NULL} ,
- {403, "Score-set" ,1,0,0,0,0,1,0,0,NULL,&atx[45],&atx[1089],0,&atx[581]} ,
- {0, NULL,1,-1,0,0,0,0,0,0,NULL,&atx[518],NULL,0,NULL} ,
+ {0, NULL,1,-1,0,0,0,0,0,0,NULL,&atx[826],NULL,0,NULL} ,
+ {403, "Score-set" ,1,0,0,0,0,1,0,0,NULL,&atx[45],&atx[1089],0,&atx[530]} ,
+ {0, NULL,1,-1,0,0,0,0,0,0,NULL,&atx[467],NULL,0,NULL} ,
{401, "Seq-submit" ,1,0,0,0,0,1,0,0,NULL,&atx[13],&atx[1091],0,&atx[1094]} ,
{0, "sub" ,128,0,0,0,0,0,0,0,NULL,&atx[1092],NULL,0,&atx[1119]} ,
{410, "Submit-block" ,1,0,0,0,0,0,0,0,NULL,&atx[13],&atx[1093],0,NULL} ,
@@ -1745,7 +1726,7 @@ static AsnType atx[1592] = {
{409, "Seq-entry" ,1,0,0,0,0,0,1,0,NULL,&atx[1055],NULL,0,&atx[1092]} ,
{0, "annots" ,128,1,0,0,0,0,0,0,NULL,&atx[45],&atx[1124],0,&atx[1126]} ,
{0, NULL,1,-1,0,0,0,0,0,0,NULL,&atx[1125],NULL,0,NULL} ,
- {407, "Seq-annot" ,1,0,0,0,0,0,1,0,NULL,&atx[969],NULL,0,&atx[1128]} ,
+ {407, "Seq-annot" ,1,0,0,0,0,0,1,0,NULL,&atx[984],NULL,0,&atx[1128]} ,
{0, "delete" ,128,2,0,0,0,0,0,0,NULL,&atx[45],&atx[1127],0,NULL} ,
{0, NULL,1,-1,0,0,0,0,0,0,NULL,&atx[1128],NULL,0,NULL} ,
{408, "Seq-id" ,1,0,0,0,0,0,1,0,NULL,&atx[344],NULL,0,&atx[1122]} ,
@@ -1877,7 +1858,7 @@ static AsnType atx[1592] = {
{0, NULL,1,-1,0,0,0,0,0,0,NULL,&atx[1255],NULL,0,NULL} ,
{413, "Projdesc" ,1,0,0,0,0,0,0,0,NULL,&atx[14],&atx[1256],0,NULL} ,
{0, "pub" ,128,0,0,0,0,0,0,0,NULL,&atx[1257],NULL,0,&atx[1258]} ,
- {408, "Pubdesc" ,1,0,0,0,0,0,1,0,NULL,&atx[626],NULL,0,&atx[1294]} ,
+ {408, "Pubdesc" ,1,0,0,0,0,0,1,0,NULL,&atx[440],NULL,0,&atx[1294]} ,
{0, "date" ,128,1,0,0,0,0,0,0,NULL,&atx[1259],NULL,0,&atx[1260]} ,
{403, "Date" ,1,0,0,0,0,0,1,0,NULL,&atx[0],NULL,0,&atx[1278]} ,
{0, "comment" ,128,2,0,0,0,0,0,0,NULL,&atx[2],NULL,0,&atx[1261]} ,
@@ -1924,7 +1905,7 @@ static AsnType atx[1592] = {
{0, "structent" ,128,17,0,0,0,0,0,0,NULL,&atx[386],NULL,0,&atx[1302]} ,
{0, "seqannot" ,128,18,0,0,0,0,0,0,NULL,&atx[45],&atx[1303],0,&atx[1305]} ,
{0, NULL,1,-1,0,0,0,0,0,0,NULL,&atx[1304],NULL,0,NULL} ,
- {407, "Seq-annot" ,1,0,0,0,0,0,1,0,NULL,&atx[969],NULL,0,&atx[1257]} ,
+ {407, "Seq-annot" ,1,0,0,0,0,0,1,0,NULL,&atx[984],NULL,0,&atx[1257]} ,
{0, "loc" ,128,19,0,0,0,0,0,0,NULL,&atx[45],&atx[1306],0,&atx[1308]} ,
{0, NULL,1,-1,0,0,0,0,0,0,NULL,&atx[1307],NULL,0,NULL} ,
{406, "Seq-loc" ,1,0,0,0,0,0,1,0,NULL,&atx[384],NULL,0,&atx[1304]} ,
@@ -1983,7 +1964,7 @@ static AsnType atx[1592] = {
{0, NULL,1,-1,0,0,0,0,0,0,NULL,&atx[1343],NULL,0,NULL} ,
{0, "seqalign" ,128,4,0,0,0,0,0,0,NULL,&atx[45],&atx[1362],0,&atx[1364]} ,
{0, NULL,1,-1,0,0,0,0,0,0,NULL,&atx[1363],NULL,0,NULL} ,
- {406, "Seq-annot" ,1,0,0,0,0,0,1,0,NULL,&atx[969],NULL,0,&atx[1341]} ,
+ {406, "Seq-annot" ,1,0,0,0,0,0,1,0,NULL,&atx[984],NULL,0,&atx[1341]} ,
{0, "style-dictionary" ,128,5,0,1,0,0,0,0,NULL,&atx[1365],NULL,0,&atx[1366]} ,
{408, "Cn3d-style-dictionary" ,1,0,0,0,0,0,1,0,NULL,&atx[1129],NULL,0,&atx[1367]} ,
{0, "user-annotations" ,128,6,0,1,0,0,0,0,NULL,&atx[1367],NULL,0,NULL} ,
@@ -2136,9 +2117,9 @@ static AsnType atx[1592] = {
{401, "INSDSeq" ,1,0,0,0,0,0,0,0,NULL,&atx[13],&atx[1514],0,&atx[1530]} ,
{0, "locus" ,128,0,0,0,0,0,0,0,NULL,&atx[2],NULL,0,&atx[1515]} ,
{0, "length" ,128,1,0,0,0,0,0,0,NULL,&atx[6],NULL,0,&atx[1516]} ,
- {0, "strandedness" ,128,2,0,0,1,0,0,0,&avnx[590],&atx[6],&avnx[586],0,&atx[1517]} ,
- {0, "moltype" ,128,3,0,0,1,0,0,0,&avnx[601],&atx[6],&avnx[591],0,&atx[1518]} ,
- {0, "topology" ,128,4,0,0,1,0,0,0,&avnx[604],&atx[6],&avnx[602],0,&atx[1519]} ,
+ {0, "strandedness" ,128,2,0,1,0,0,0,0,NULL,&atx[2],NULL,0,&atx[1517]} ,
+ {0, "moltype" ,128,3,0,0,0,0,0,0,NULL,&atx[2],NULL,0,&atx[1518]} ,
+ {0, "topology" ,128,4,0,1,0,0,0,0,NULL,&atx[2],NULL,0,&atx[1519]} ,
{0, "division" ,128,5,0,0,0,0,0,0,NULL,&atx[2],NULL,0,&atx[1520]} ,
{0, "update-date" ,128,6,0,0,0,0,0,0,NULL,&atx[2],NULL,0,&atx[1521]} ,
{0, "create-date" ,128,7,0,0,0,0,0,0,NULL,&atx[2],NULL,0,&atx[1522]} ,
@@ -2200,7 +2181,7 @@ static AsnType atx[1592] = {
{410, "INSDSet" ,1,0,0,0,0,0,0,0,NULL,&atx[66],&atx[1578],0,NULL} ,
{0, NULL,1,-1,0,0,0,0,0,0,NULL,&atx[1513],NULL,0,NULL} ,
{401, "TSeq" ,1,0,0,0,0,0,0,0,NULL,&atx[13],&atx[1580],0,&atx[1590]} ,
- {0, "seqtype" ,128,0,0,0,0,0,0,0,NULL,&atx[42],&avnx[605],0,&atx[1581]} ,
+ {0, "seqtype" ,128,0,0,0,0,0,0,0,NULL,&atx[42],&avnx[586],0,&atx[1581]} ,
{0, "gi" ,128,1,0,1,0,0,0,0,NULL,&atx[6],NULL,0,&atx[1582]} ,
{0, "accver" ,128,2,0,1,0,0,0,0,NULL,&atx[2],NULL,0,&atx[1583]} ,
{0, "sid" ,128,3,0,1,0,0,0,0,NULL,&atx[2],NULL,0,&atx[1584]} ,
@@ -2224,21 +2205,21 @@ static AsnModule ampx[36] = {
{ "NCBI-Sequence" , NULL,&atx[435],NULL,&ampx[8],0,0} ,
{ "NCBI-Seqset" , NULL,&atx[1041],NULL,&ampx[9],0,0} ,
{ "NCBI-SeqCode" , NULL,&atx[1062],NULL,&ampx[10],0,0} ,
- { "EMBL-General" , NULL,&atx[681],NULL,&ampx[11],0,0} ,
- { "SP-General" , NULL,&atx[645],NULL,&ampx[12],0,0} ,
- { "PIR-General" , NULL,&atx[594],NULL,&ampx[13],0,0} ,
- { "GenBank-General" , NULL,&atx[613],NULL,&ampx[14],0,0} ,
- { "PRF-General" , NULL,&atx[692],NULL,&ampx[15],0,0} ,
- { "PDB-General" , NULL,&atx[704],NULL,&ampx[16],0,0} ,
- { "NCBI-Seqfeat" , NULL,&atx[783],NULL,&ampx[17],0,0} ,
- { "NCBI-Rsite" , NULL,&atx[877],NULL,&ampx[18],0,0} ,
- { "NCBI-RNA" , NULL,&atx[847],NULL,&ampx[19],0,0} ,
- { "NCBI-Gene" , NULL,&atx[789],NULL,&ampx[20],0,0} ,
- { "NCBI-Organism" , NULL,&atx[454],NULL,&ampx[21],0,0} ,
- { "NCBI-BioSource" , NULL,&atx[722],NULL,&ampx[22],0,0} ,
- { "NCBI-Protein" , NULL,&atx[833],NULL,&ampx[23],0,0} ,
- { "NCBI-TxInit" , NULL,&atx[885],NULL,&ampx[24],0,0} ,
- { "NCBI-Seqalign" , NULL,&atx[513],NULL,&ampx[25],0,0} ,
+ { "EMBL-General" , NULL,&atx[698],NULL,&ampx[11],0,0} ,
+ { "SP-General" , NULL,&atx[662],NULL,&ampx[12],0,0} ,
+ { "PIR-General" , NULL,&atx[626],NULL,&ampx[13],0,0} ,
+ { "GenBank-General" , NULL,&atx[645],NULL,&ampx[14],0,0} ,
+ { "PRF-General" , NULL,&atx[708],NULL,&ampx[15],0,0} ,
+ { "PDB-General" , NULL,&atx[720],NULL,&ampx[16],0,0} ,
+ { "NCBI-Seqfeat" , NULL,&atx[798],NULL,&ampx[17],0,0} ,
+ { "NCBI-Rsite" , NULL,&atx[892],NULL,&ampx[18],0,0} ,
+ { "NCBI-RNA" , NULL,&atx[862],NULL,&ampx[19],0,0} ,
+ { "NCBI-Gene" , NULL,&atx[804],NULL,&ampx[20],0,0} ,
+ { "NCBI-Organism" , NULL,&atx[579],NULL,&ampx[21],0,0} ,
+ { "NCBI-BioSource" , NULL,&atx[738],NULL,&ampx[22],0,0} ,
+ { "NCBI-Protein" , NULL,&atx[848],NULL,&ampx[23],0,0} ,
+ { "NCBI-TxInit" , NULL,&atx[900],NULL,&ampx[24],0,0} ,
+ { "NCBI-Seqalign" , NULL,&atx[462],NULL,&ampx[25],0,0} ,
{ "NCBI-Seqres" , NULL,&atx[1005],NULL,&ampx[26],0,0} ,
{ "NCBI-Submit" , NULL,&atx[1090],NULL,&ampx[27],0,0} ,
{ "NCBI-Cn3d" , NULL,&atx[1129],NULL,&ampx[28],0,0} ,
@@ -2769,20 +2750,55 @@ static AsnModulePtr amp = ampx;
*
**************************************************/
-#define BIOSEQ &at[435]
-#define BIOSEQ_id &at[436]
-#define BIOSEQ_id_E &at[437]
-#define BIOSEQ_descr &at[439]
-#define BIOSEQ_inst &at[740]
-#define BIOSEQ_annot &at[967]
-#define BIOSEQ_annot_E &at[968]
-
-#define SEQ_ANNOT &at[969]
-#define SEQ_ANNOT_id &at[970]
-#define SEQ_ANNOT_id_E &at[971]
-#define SEQ_ANNOT_db &at[977]
-#define SEQ_ANNOT_name &at[978]
-#define SEQ_ANNOT_desc &at[979]
+#define ANNOTDESC &at[435]
+#define ANNOTDESC_name &at[436]
+#define ANNOTDESC_title &at[437]
+#define ANNOTDESC_comment &at[438]
+#define ANNOTDESC_pub &at[439]
+#define ANNOTDESC_user &at[547]
+#define ANNOTDESC_create_date &at[549]
+#define ANNOTDESC_update_date &at[551]
+#define ANNOTDESC_src &at[552]
+#define ANNOTDESC_align &at[554]
+#define ANNOTDESC_region &at[559]
+
+#define BIOSEQ &at[561]
+#define BIOSEQ_id &at[562]
+#define BIOSEQ_id_E &at[563]
+#define BIOSEQ_descr &at[564]
+#define BIOSEQ_inst &at[756]
+#define BIOSEQ_annot &at[982]
+#define BIOSEQ_annot_E &at[983]
+
+#define GIBB_MOL &at[569]
+
+#define HETEROGEN &at[735]
+
+#define NUMBERING &at[446]
+#define NUMBERING_cont &at[447]
+#define NUMBERING_enum &at[452]
+#define NUMBERING_ref &at[457]
+#define NUMBERING_real &at[534]
+
+#define PUBDESC &at[440]
+#define PUBDESC_pub &at[441]
+#define PUBDESC_name &at[443]
+#define PUBDESC_fig &at[444]
+#define PUBDESC_num &at[445]
+#define PUBDESC_numexc &at[539]
+#define PUBDESC_poly_a &at[540]
+#define PUBDESC_maploc &at[541]
+#define PUBDESC_seq_raw &at[542]
+#define PUBDESC_align_group &at[544]
+#define PUBDESC_comment &at[545]
+#define PUBDESC_reftype &at[546]
+
+#define SEQ_ANNOT &at[984]
+#define SEQ_ANNOT_id &at[985]
+#define SEQ_ANNOT_id_E &at[986]
+#define SEQ_ANNOT_db &at[992]
+#define SEQ_ANNOT_name &at[993]
+#define SEQ_ANNOT_desc &at[994]
#define SEQ_ANNOT_data &at[997]
#define SEQ_ANNOT_data_ftable &at[998]
#define SEQ_ANNOT_data_ftable_E &at[999]
@@ -2795,195 +2811,160 @@ static AsnModulePtr amp = ampx;
#define SEQ_ANNOT_data_locs &at[1039]
#define SEQ_ANNOT_data_locs_E &at[1040]
-#define PUBDESC &at[626]
-#define PUBDESC_pub &at[627]
-#define PUBDESC_name &at[629]
-#define PUBDESC_fig &at[630]
-#define PUBDESC_num &at[631]
-#define PUBDESC_numexc &at[632]
-#define PUBDESC_poly_a &at[633]
-#define PUBDESC_maploc &at[634]
-#define PUBDESC_seq_raw &at[635]
-#define PUBDESC_align_group &at[637]
-#define PUBDESC_comment &at[638]
-#define PUBDESC_reftype &at[639]
-
-#define SEQ_DESCR &at[440]
-#define SEQ_DESCR_E &at[441]
-
-#define SEQDESC &at[442]
-#define SEQDESC_mol_type &at[443]
-#define SEQDESC_modif &at[445]
-#define SEQDESC_modif_E &at[446]
-#define SEQDESC_method &at[448]
-#define SEQDESC_name &at[450]
-#define SEQDESC_title &at[451]
-#define SEQDESC_org &at[452]
-#define SEQDESC_comment &at[495]
-#define SEQDESC_num &at[496]
-#define SEQDESC_maploc &at[590]
-#define SEQDESC_pir &at[592]
-#define SEQDESC_genbank &at[611]
-#define SEQDESC_pub &at[625]
-#define SEQDESC_region &at[640]
-#define SEQDESC_user &at[641]
-#define SEQDESC_sp &at[643]
-#define SEQDESC_dbxref &at[664]
-#define SEQDESC_embl &at[665]
-#define SEQDESC_create_date &at[687]
-#define SEQDESC_update_date &at[689]
-#define SEQDESC_prf &at[690]
-#define SEQDESC_pdb &at[702]
-#define SEQDESC_het &at[718]
-#define SEQDESC_source &at[720]
-#define SEQDESC_molinfo &at[734]
-
-#define NUMBERING &at[497]
-#define NUMBERING_cont &at[498]
-#define NUMBERING_enum &at[503]
-#define NUMBERING_ref &at[508]
-#define NUMBERING_real &at[585]
-
-#define HETEROGEN &at[719]
-
-#define SEQ_HIST &at[955]
-#define SEQ_HIST_assembly &at[956]
-#define SEQ_HIST_assembly_E &at[957]
-#define SEQ_HIST_replaces &at[958]
-#define SEQ_HIST_replaced_by &at[963]
-#define SEQ_HIST_deleted &at[964]
-#define SEQ_HIST_deleted_bool &at[965]
-#define SEQ_HIST_deleted_date &at[966]
-
-#define GIBB_MOL &at[444]
-
-#define SEQ_LITERAL &at[950]
-#define SEQ_LITERAL_length &at[951]
-#define SEQ_LITERAL_fuzz &at[952]
-#define SEQ_LITERAL_seq_data &at[953]
-
-#define SEQ_INST &at[741]
-#define SEQ_INST_repr &at[742]
-#define SEQ_INST_mol &at[743]
-#define SEQ_INST_length &at[744]
-#define SEQ_INST_fuzz &at[745]
-#define SEQ_INST_topology &at[747]
-#define SEQ_INST_strand &at[748]
-#define SEQ_INST_seq_data &at[749]
-#define SEQ_INST_ext &at[771]
-#define SEQ_INST_hist &at[954]
-
-#define GIBB_MOD &at[447]
-
-#define GIBB_METHOD &at[449]
-
-#define MOLINFO &at[735]
-#define MOLINFO_biomol &at[736]
-#define MOLINFO_tech &at[737]
-#define MOLINFO_techexp &at[738]
-#define MOLINFO_completeness &at[739]
-
-#define NUM_CONT &at[499]
-#define NUM_CONT_refnum &at[500]
-#define NUM_CONT_has_zero &at[501]
-#define NUM_CONT_ascending &at[502]
-
-#define NUM_ENUM &at[504]
-#define NUM_ENUM_num &at[505]
-#define NUM_ENUM_names &at[506]
-#define NUM_ENUM_names_E &at[507]
-
-#define NUM_REF &at[509]
-#define NUM_REF_type &at[510]
-#define NUM_REF_aligns &at[511]
-
-#define NUM_REAL &at[586]
-#define NUM_REAL_a &at[587]
-#define NUM_REAL_b &at[588]
-#define NUM_REAL_units &at[589]
-
-#define SEQ_DATA &at[750]
-#define SEQ_DATA_iupacna &at[751]
-#define SEQ_DATA_iupacaa &at[753]
-#define SEQ_DATA_ncbi2na &at[755]
-#define SEQ_DATA_ncbi4na &at[757]
-#define SEQ_DATA_ncbi8na &at[759]
-#define SEQ_DATA_ncbipna &at[761]
-#define SEQ_DATA_ncbi8aa &at[763]
-#define SEQ_DATA_ncbieaa &at[765]
-#define SEQ_DATA_ncbipaa &at[767]
-#define SEQ_DATA_ncbistdaa &at[769]
-
-#define SEQ_EXT &at[772]
-#define SEQ_EXT_seg &at[773]
-#define SEQ_EXT_ref &at[777]
-#define SEQ_EXT_map &at[779]
-#define SEQ_EXT_delta &at[944]
-
-#define SEG_EXT &at[774]
-#define SEG_EXT_E &at[775]
-
-#define REF_EXT &at[778]
-
-#define MAP_EXT &at[780]
-#define MAP_EXT_E &at[781]
-
-#define DELTA_EXT &at[945]
-#define DELTA_EXT_E &at[946]
-
-#define DELTA_SEQ &at[947]
-#define DELTA_SEQ_loc &at[948]
-#define DELTA_SEQ_literal &at[949]
-
-#define SEQ_HIST_REC &at[959]
-#define SEQ_HIST_REC_date &at[960]
-#define SEQ_HIST_REC_ids &at[961]
-#define SEQ_HIST_REC_ids_E &at[962]
-
-#define IUPACNA &at[752]
-
-#define IUPACAA &at[754]
-
-#define NCBI2NA &at[756]
-
-#define NCBI4NA &at[758]
-
-#define NCBI8NA &at[760]
+#define SEQ_DESCR &at[565]
+#define SEQ_DESCR_E &at[566]
+
+#define SEQ_HIST &at[970]
+#define SEQ_HIST_assembly &at[971]
+#define SEQ_HIST_assembly_E &at[972]
+#define SEQ_HIST_replaces &at[973]
+#define SEQ_HIST_replaced_by &at[978]
+#define SEQ_HIST_deleted &at[979]
+#define SEQ_HIST_deleted_bool &at[980]
+#define SEQ_HIST_deleted_date &at[981]
+
+#define SEQ_LITERAL &at[965]
+#define SEQ_LITERAL_length &at[966]
+#define SEQ_LITERAL_fuzz &at[967]
+#define SEQ_LITERAL_seq_data &at[968]
+
+#define SEQDESC &at[567]
+#define SEQDESC_mol_type &at[568]
+#define SEQDESC_modif &at[570]
+#define SEQDESC_modif_E &at[571]
+#define SEQDESC_method &at[573]
+#define SEQDESC_name &at[575]
+#define SEQDESC_title &at[576]
+#define SEQDESC_org &at[577]
+#define SEQDESC_comment &at[620]
+#define SEQDESC_num &at[621]
+#define SEQDESC_maploc &at[622]
+#define SEQDESC_pir &at[624]
+#define SEQDESC_genbank &at[643]
+#define SEQDESC_pub &at[657]
+#define SEQDESC_region &at[658]
+#define SEQDESC_user &at[659]
+#define SEQDESC_sp &at[660]
+#define SEQDESC_dbxref &at[681]
+#define SEQDESC_embl &at[682]
+#define SEQDESC_create_date &at[704]
+#define SEQDESC_update_date &at[705]
+#define SEQDESC_prf &at[706]
+#define SEQDESC_pdb &at[718]
+#define SEQDESC_het &at[734]
+#define SEQDESC_source &at[736]
+#define SEQDESC_molinfo &at[750]
+
+#define SEQ_INST &at[757]
+#define SEQ_INST_repr &at[758]
+#define SEQ_INST_mol &at[759]
+#define SEQ_INST_length &at[760]
+#define SEQ_INST_fuzz &at[761]
+#define SEQ_INST_topology &at[763]
+#define SEQ_INST_strand &at[764]
+#define SEQ_INST_seq_data &at[765]
+#define SEQ_INST_ext &at[787]
+#define SEQ_INST_hist &at[969]
+
+#define GIBB_MOD &at[572]
+
+#define GIBB_METHOD &at[574]
+
+#define MOLINFO &at[751]
+#define MOLINFO_biomol &at[752]
+#define MOLINFO_tech &at[753]
+#define MOLINFO_techexp &at[754]
+#define MOLINFO_completeness &at[755]
+
+#define NUM_CONT &at[448]
+#define NUM_CONT_refnum &at[449]
+#define NUM_CONT_has_zero &at[450]
+#define NUM_CONT_ascending &at[451]
+
+#define NUM_ENUM &at[453]
+#define NUM_ENUM_num &at[454]
+#define NUM_ENUM_names &at[455]
+#define NUM_ENUM_names_E &at[456]
+
+#define NUM_REF &at[458]
+#define NUM_REF_type &at[459]
+#define NUM_REF_aligns &at[460]
+
+#define NUM_REAL &at[535]
+#define NUM_REAL_a &at[536]
+#define NUM_REAL_b &at[537]
+#define NUM_REAL_units &at[538]
+
+#define SEQ_DATA &at[766]
+#define SEQ_DATA_iupacna &at[767]
+#define SEQ_DATA_iupacaa &at[769]
+#define SEQ_DATA_ncbi2na &at[771]
+#define SEQ_DATA_ncbi4na &at[773]
+#define SEQ_DATA_ncbi8na &at[775]
+#define SEQ_DATA_ncbipna &at[777]
+#define SEQ_DATA_ncbi8aa &at[779]
+#define SEQ_DATA_ncbieaa &at[781]
+#define SEQ_DATA_ncbipaa &at[783]
+#define SEQ_DATA_ncbistdaa &at[785]
+
+#define SEQ_EXT &at[788]
+#define SEQ_EXT_seg &at[789]
+#define SEQ_EXT_ref &at[792]
+#define SEQ_EXT_map &at[794]
+#define SEQ_EXT_delta &at[959]
+
+#define SEG_EXT &at[790]
+#define SEG_EXT_E &at[791]
+
+#define REF_EXT &at[793]
+
+#define MAP_EXT &at[795]
+#define MAP_EXT_E &at[796]
+
+#define DELTA_EXT &at[960]
+#define DELTA_EXT_E &at[961]
+
+#define DELTA_SEQ &at[962]
+#define DELTA_SEQ_loc &at[963]
+#define DELTA_SEQ_literal &at[964]
+
+#define SEQ_HIST_REC &at[974]
+#define SEQ_HIST_REC_date &at[975]
+#define SEQ_HIST_REC_ids &at[976]
+#define SEQ_HIST_REC_ids_E &at[977]
+
+#define IUPACNA &at[768]
+
+#define IUPACAA &at[770]
+
+#define NCBI2NA &at[772]
+
+#define NCBI4NA &at[774]
-#define NCBIPNA &at[762]
+#define NCBI8NA &at[776]
-#define NCBI8AA &at[764]
+#define NCBIPNA &at[778]
-#define NCBIEAA &at[766]
+#define NCBI8AA &at[780]
-#define NCBIPAA &at[768]
+#define NCBIEAA &at[782]
-#define NCBISTDAA &at[770]
+#define NCBIPAA &at[784]
-#define ANNOT_ID &at[972]
-#define ANNOT_ID_local &at[973]
-#define ANNOT_ID_ncbi &at[975]
-#define ANNOT_ID_general &at[976]
+#define NCBISTDAA &at[786]
-#define ANNOT_DESCR &at[980]
-#define ANNOT_DESCR_E &at[981]
+#define ANNOT_ID &at[987]
+#define ANNOT_ID_local &at[988]
+#define ANNOT_ID_ncbi &at[990]
+#define ANNOT_ID_general &at[991]
-#define ANNOTDESC &at[982]
-#define ANNOTDESC_name &at[983]
-#define ANNOTDESC_title &at[984]
-#define ANNOTDESC_comment &at[985]
-#define ANNOTDESC_pub &at[986]
-#define ANNOTDESC_user &at[987]
-#define ANNOTDESC_create_date &at[988]
-#define ANNOTDESC_update_date &at[989]
-#define ANNOTDESC_src &at[990]
-#define ANNOTDESC_align &at[991]
-#define ANNOTDESC_region &at[996]
+#define ANNOT_DESCR &at[995]
+#define ANNOT_DESCR_E &at[996]
-#define ALIGN_DEF &at[992]
-#define ALIGN_DEF_align_type &at[993]
-#define ALIGN_DEF_ids &at[994]
-#define ALIGN_DEF_ids_E &at[995]
+#define ALIGN_DEF &at[555]
+#define ALIGN_DEF_align_type &at[556]
+#define ALIGN_DEF_ids &at[557]
+#define ALIGN_DEF_ids_E &at[558]
/**************************************************
@@ -3051,26 +3032,26 @@ static AsnModulePtr amp = ampx;
*
**************************************************/
-#define EMBL_DBNAME &at[681]
-#define EMBL_DBNAME_code &at[682]
-#define EMBL_DBNAME_name &at[683]
+#define EMBL_DBNAME &at[698]
+#define EMBL_DBNAME_code &at[699]
+#define EMBL_DBNAME_name &at[700]
-#define EMBL_XREF &at[679]
-#define EMBL_XREF_dbname &at[680]
-#define EMBL_XREF_id &at[684]
-#define EMBL_XREF_id_E &at[685]
+#define EMBL_XREF &at[696]
+#define EMBL_XREF_dbname &at[697]
+#define EMBL_XREF_id &at[701]
+#define EMBL_XREF_id_E &at[702]
-#define EMBL_BLOCK &at[667]
-#define EMBL_BLOCK_class &at[668]
-#define EMBL_BLOCK_div &at[669]
-#define EMBL_BLOCK_creation_date &at[670]
-#define EMBL_BLOCK_update_date &at[672]
-#define EMBL_BLOCK_extra_acc &at[673]
-#define EMBL_BLOCK_extra_acc_E &at[674]
-#define EMBL_BLOCK_keywords &at[675]
-#define EMBL_BLOCK_keywords_E &at[676]
-#define EMBL_BLOCK_xref &at[677]
-#define EMBL_BLOCK_xref_E &at[678]
+#define EMBL_BLOCK &at[684]
+#define EMBL_BLOCK_class &at[685]
+#define EMBL_BLOCK_div &at[686]
+#define EMBL_BLOCK_creation_date &at[687]
+#define EMBL_BLOCK_update_date &at[689]
+#define EMBL_BLOCK_extra_acc &at[690]
+#define EMBL_BLOCK_extra_acc_E &at[691]
+#define EMBL_BLOCK_keywords &at[692]
+#define EMBL_BLOCK_keywords_E &at[693]
+#define EMBL_BLOCK_xref &at[694]
+#define EMBL_BLOCK_xref_E &at[695]
/**************************************************
@@ -3079,22 +3060,22 @@ static AsnModulePtr amp = ampx;
*
**************************************************/
-#define SP_BLOCK &at[645]
-#define SP_BLOCK_class &at[646]
-#define SP_BLOCK_extra_acc &at[647]
-#define SP_BLOCK_extra_acc_E &at[648]
-#define SP_BLOCK_imeth &at[649]
-#define SP_BLOCK_plasnm &at[650]
-#define SP_BLOCK_plasnm_E &at[651]
-#define SP_BLOCK_seqref &at[652]
-#define SP_BLOCK_seqref_E &at[653]
-#define SP_BLOCK_dbref &at[655]
-#define SP_BLOCK_dbref_E &at[656]
-#define SP_BLOCK_keywords &at[658]
-#define SP_BLOCK_keywords_E &at[659]
-#define SP_BLOCK_created &at[660]
-#define SP_BLOCK_sequpd &at[662]
-#define SP_BLOCK_annotupd &at[663]
+#define SP_BLOCK &at[662]
+#define SP_BLOCK_class &at[663]
+#define SP_BLOCK_extra_acc &at[664]
+#define SP_BLOCK_extra_acc_E &at[665]
+#define SP_BLOCK_imeth &at[666]
+#define SP_BLOCK_plasnm &at[667]
+#define SP_BLOCK_plasnm_E &at[668]
+#define SP_BLOCK_seqref &at[669]
+#define SP_BLOCK_seqref_E &at[670]
+#define SP_BLOCK_dbref &at[672]
+#define SP_BLOCK_dbref_E &at[673]
+#define SP_BLOCK_keywords &at[675]
+#define SP_BLOCK_keywords_E &at[676]
+#define SP_BLOCK_created &at[677]
+#define SP_BLOCK_sequpd &at[679]
+#define SP_BLOCK_annotupd &at[680]
/**************************************************
@@ -3103,22 +3084,22 @@ static AsnModulePtr amp = ampx;
*
**************************************************/
-#define PIR_BLOCK &at[594]
-#define PIR_BLOCK_had_punct &at[595]
-#define PIR_BLOCK_host &at[596]
-#define PIR_BLOCK_source &at[597]
-#define PIR_BLOCK_summary &at[598]
-#define PIR_BLOCK_genetic &at[599]
-#define PIR_BLOCK_includes &at[600]
-#define PIR_BLOCK_placement &at[601]
-#define PIR_BLOCK_superfamily &at[602]
-#define PIR_BLOCK_keywords &at[603]
-#define PIR_BLOCK_keywords_E &at[604]
-#define PIR_BLOCK_cross_reference &at[605]
-#define PIR_BLOCK_date &at[606]
-#define PIR_BLOCK_seq_raw &at[607]
-#define PIR_BLOCK_seqref &at[608]
-#define PIR_BLOCK_seqref_E &at[609]
+#define PIR_BLOCK &at[626]
+#define PIR_BLOCK_had_punct &at[627]
+#define PIR_BLOCK_host &at[628]
+#define PIR_BLOCK_source &at[629]
+#define PIR_BLOCK_summary &at[630]
+#define PIR_BLOCK_genetic &at[631]
+#define PIR_BLOCK_includes &at[632]
+#define PIR_BLOCK_placement &at[633]
+#define PIR_BLOCK_superfamily &at[634]
+#define PIR_BLOCK_keywords &at[635]
+#define PIR_BLOCK_keywords_E &at[636]
+#define PIR_BLOCK_cross_reference &at[637]
+#define PIR_BLOCK_date &at[638]
+#define PIR_BLOCK_seq_raw &at[639]
+#define PIR_BLOCK_seqref &at[640]
+#define PIR_BLOCK_seqref_E &at[641]
/**************************************************
@@ -3127,17 +3108,17 @@ static AsnModulePtr amp = ampx;
*
**************************************************/
-#define GB_BLOCK &at[613]
-#define GB_BLOCK_extra_accessions &at[614]
-#define GB_BLOCK_extra_accessions_E &at[615]
-#define GB_BLOCK_source &at[616]
-#define GB_BLOCK_keywords &at[617]
-#define GB_BLOCK_keywords_E &at[618]
-#define GB_BLOCK_origin &at[619]
-#define GB_BLOCK_date &at[620]
-#define GB_BLOCK_entry_date &at[621]
-#define GB_BLOCK_div &at[623]
-#define GB_BLOCK_taxonomy &at[624]
+#define GB_BLOCK &at[645]
+#define GB_BLOCK_extra_accessions &at[646]
+#define GB_BLOCK_extra_accessions_E &at[647]
+#define GB_BLOCK_source &at[648]
+#define GB_BLOCK_keywords &at[649]
+#define GB_BLOCK_keywords_E &at[650]
+#define GB_BLOCK_origin &at[651]
+#define GB_BLOCK_date &at[652]
+#define GB_BLOCK_entry_date &at[653]
+#define GB_BLOCK_div &at[655]
+#define GB_BLOCK_taxonomy &at[656]
/**************************************************
@@ -3146,17 +3127,17 @@ static AsnModulePtr amp = ampx;
*
**************************************************/
-#define PRF_BLOCK &at[692]
-#define PRF_BLOCK_extra_src &at[693]
-#define PRF_BLOCK_keywords &at[700]
-#define PRF_BLOCK_keywords_E &at[701]
+#define PRF_BLOCK &at[708]
+#define PRF_BLOCK_extra_src &at[709]
+#define PRF_BLOCK_keywords &at[716]
+#define PRF_BLOCK_keywords_E &at[717]
-#define PRF_EXTRASRC &at[694]
-#define PRF_EXTRASRC_host &at[695]
-#define PRF_EXTRASRC_part &at[696]
-#define PRF_EXTRASRC_state &at[697]
-#define PRF_EXTRASRC_strain &at[698]
-#define PRF_EXTRASRC_taxon &at[699]
+#define PRF_EXTRASRC &at[710]
+#define PRF_EXTRASRC_host &at[711]
+#define PRF_EXTRASRC_part &at[712]
+#define PRF_EXTRASRC_state &at[713]
+#define PRF_EXTRASRC_strain &at[714]
+#define PRF_EXTRASRC_taxon &at[715]
/**************************************************
@@ -3165,20 +3146,20 @@ static AsnModulePtr amp = ampx;
*
**************************************************/
-#define PDB_BLOCK &at[704]
-#define PDB_BLOCK_deposition &at[705]
-#define PDB_BLOCK_class &at[707]
-#define PDB_BLOCK_compound &at[708]
-#define PDB_BLOCK_compound_E &at[709]
-#define PDB_BLOCK_source &at[710]
-#define PDB_BLOCK_source_E &at[711]
-#define PDB_BLOCK_exp_method &at[712]
-#define PDB_BLOCK_replace &at[713]
+#define PDB_BLOCK &at[720]
+#define PDB_BLOCK_deposition &at[721]
+#define PDB_BLOCK_class &at[723]
+#define PDB_BLOCK_compound &at[724]
+#define PDB_BLOCK_compound_E &at[725]
+#define PDB_BLOCK_source &at[726]
+#define PDB_BLOCK_source_E &at[727]
+#define PDB_BLOCK_exp_method &at[728]
+#define PDB_BLOCK_replace &at[729]
-#define PDB_REPLACE &at[714]
-#define PDB_REPLACE_date &at[715]
-#define PDB_REPLACE_ids &at[716]
-#define PDB_REPLACE_ids_E &at[717]
+#define PDB_REPLACE &at[730]
+#define PDB_REPLACE_date &at[731]
+#define PDB_REPLACE_ids &at[732]
+#define PDB_REPLACE_ids_E &at[733]
/**************************************************
@@ -3187,26 +3168,26 @@ static AsnModulePtr amp = ampx;
*
**************************************************/
-#define SEQ_FEAT &at[783]
-#define SEQ_FEAT_id &at[784]
-#define SEQ_FEAT_data &at[785]
-#define SEQ_FEAT_partial &at[920]
-#define SEQ_FEAT_except &at[921]
-#define SEQ_FEAT_comment &at[922]
-#define SEQ_FEAT_product &at[923]
-#define SEQ_FEAT_location &at[924]
-#define SEQ_FEAT_qual &at[925]
-#define SEQ_FEAT_qual_E &at[926]
-#define SEQ_FEAT_title &at[930]
-#define SEQ_FEAT_ext &at[931]
-#define SEQ_FEAT_cit &at[932]
-#define SEQ_FEAT_exp_ev &at[934]
-#define SEQ_FEAT_xref &at[935]
-#define SEQ_FEAT_xref_E &at[936]
-#define SEQ_FEAT_dbxref &at[940]
-#define SEQ_FEAT_dbxref_E &at[941]
-#define SEQ_FEAT_pseudo &at[942]
-#define SEQ_FEAT_except_text &at[943]
+#define SEQ_FEAT &at[798]
+#define SEQ_FEAT_id &at[799]
+#define SEQ_FEAT_data &at[800]
+#define SEQ_FEAT_partial &at[935]
+#define SEQ_FEAT_except &at[936]
+#define SEQ_FEAT_comment &at[937]
+#define SEQ_FEAT_product &at[938]
+#define SEQ_FEAT_location &at[939]
+#define SEQ_FEAT_qual &at[940]
+#define SEQ_FEAT_qual_E &at[941]
+#define SEQ_FEAT_title &at[945]
+#define SEQ_FEAT_ext &at[946]
+#define SEQ_FEAT_cit &at[947]
+#define SEQ_FEAT_exp_ev &at[949]
+#define SEQ_FEAT_xref &at[950]
+#define SEQ_FEAT_xref_E &at[951]
+#define SEQ_FEAT_dbxref &at[955]
+#define SEQ_FEAT_dbxref_E &at[956]
+#define SEQ_FEAT_pseudo &at[957]
+#define SEQ_FEAT_except_text &at[958]
#define FEAT_ID &at[427]
#define FEAT_ID_gibb &at[428]
@@ -3214,69 +3195,69 @@ static AsnModulePtr amp = ampx;
#define FEAT_ID_local &at[431]
#define FEAT_ID_general &at[433]
-#define GENETIC_CODE &at[811]
-#define GENETIC_CODE_E &at[812]
-#define GENETIC_CODE_E_name &at[813]
-#define GENETIC_CODE_E_id &at[814]
-#define GENETIC_CODE_E_ncbieaa &at[815]
-#define GENETIC_CODE_E_ncbi8aa &at[816]
-#define GENETIC_CODE_E_ncbistdaa &at[817]
-#define GENETIC_CODE_E_sncbieaa &at[818]
-#define GENETIC_CODE_E_sncbi8aa &at[819]
-#define GENETIC_CODE_E_sncbistdaa &at[820]
-
-#define SEQFEATDATA &at[786]
-#define SEQFEATDATA_gene &at[787]
-#define SEQFEATDATA_org &at[801]
-#define SEQFEATDATA_cdregion &at[803]
-#define SEQFEATDATA_prot &at[831]
-#define SEQFEATDATA_rna &at[845]
-#define SEQFEATDATA_pub &at[863]
-#define SEQFEATDATA_seq &at[865]
-#define SEQFEATDATA_imp &at[866]
-#define SEQFEATDATA_region &at[871]
-#define SEQFEATDATA_comment &at[872]
-#define SEQFEATDATA_bond &at[873]
-#define SEQFEATDATA_site &at[874]
-#define SEQFEATDATA_rsite &at[875]
-#define SEQFEATDATA_user &at[881]
-#define SEQFEATDATA_txinit &at[883]
-#define SEQFEATDATA_num &at[912]
-#define SEQFEATDATA_psec_str &at[914]
-#define SEQFEATDATA_non_std_residue &at[915]
-#define SEQFEATDATA_het &at[916]
-#define SEQFEATDATA_biosrc &at[918]
-
-#define GB_QUAL &at[927]
-#define GB_QUAL_qual &at[928]
-#define GB_QUAL_val &at[929]
-
-#define SEQFEATXREF &at[937]
-#define SEQFEATXREF_id &at[938]
-#define SEQFEATXREF_data &at[939]
-
-#define CDREGION &at[804]
-#define CDREGION_orf &at[805]
-#define CDREGION_frame &at[806]
-#define CDREGION_conflict &at[807]
-#define CDREGION_gaps &at[808]
-#define CDREGION_mismatch &at[809]
-#define CDREGION_code &at[810]
-#define CDREGION_code_break &at[821]
-#define CDREGION_code_break_E &at[822]
-#define CDREGION_stops &at[830]
-
-#define IMP_FEAT &at[867]
-#define IMP_FEAT_key &at[868]
-#define IMP_FEAT_loc &at[869]
-#define IMP_FEAT_descr &at[870]
-
-#define CODE_BREAK &at[823]
-#define CODE_BREAK_loc &at[824]
-#define CODE_BREAK_aa &at[826]
-#define CODE_BREAK_aa_ncbieaa &at[827]
-#define CODE_BREAK_aa_ncbi8aa &at[828]
-#define CODE_BREAK_aa_ncbistdaa &at[829]
+#define GENETIC_CODE &at[826]
+#define GENETIC_CODE_E &at[827]
+#define GENETIC_CODE_E_name &at[828]
+#define GENETIC_CODE_E_id &at[829]
+#define GENETIC_CODE_E_ncbieaa &at[830]
+#define GENETIC_CODE_E_ncbi8aa &at[831]
+#define GENETIC_CODE_E_ncbistdaa &at[832]
+#define GENETIC_CODE_E_sncbieaa &at[833]
+#define GENETIC_CODE_E_sncbi8aa &at[834]
+#define GENETIC_CODE_E_sncbistdaa &at[835]
+
+#define SEQFEATDATA &at[801]
+#define SEQFEATDATA_gene &at[802]
+#define SEQFEATDATA_org &at[816]
+#define SEQFEATDATA_cdregion &at[818]
+#define SEQFEATDATA_prot &at[846]
+#define SEQFEATDATA_rna &at[860]
+#define SEQFEATDATA_pub &at[878]
+#define SEQFEATDATA_seq &at[880]
+#define SEQFEATDATA_imp &at[881]
+#define SEQFEATDATA_region &at[886]
+#define SEQFEATDATA_comment &at[887]
+#define SEQFEATDATA_bond &at[888]
+#define SEQFEATDATA_site &at[889]
+#define SEQFEATDATA_rsite &at[890]
+#define SEQFEATDATA_user &at[896]
+#define SEQFEATDATA_txinit &at[898]
+#define SEQFEATDATA_num &at[927]
+#define SEQFEATDATA_psec_str &at[929]
+#define SEQFEATDATA_non_std_residue &at[930]
+#define SEQFEATDATA_het &at[931]
+#define SEQFEATDATA_biosrc &at[933]
+
+#define GB_QUAL &at[942]
+#define GB_QUAL_qual &at[943]
+#define GB_QUAL_val &at[944]
+
+#define SEQFEATXREF &at[952]
+#define SEQFEATXREF_id &at[953]
+#define SEQFEATXREF_data &at[954]
+
+#define CDREGION &at[819]
+#define CDREGION_orf &at[820]
+#define CDREGION_frame &at[821]
+#define CDREGION_conflict &at[822]
+#define CDREGION_gaps &at[823]
+#define CDREGION_mismatch &at[824]
+#define CDREGION_code &at[825]
+#define CDREGION_code_break &at[836]
+#define CDREGION_code_break_E &at[837]
+#define CDREGION_stops &at[845]
+
+#define IMP_FEAT &at[882]
+#define IMP_FEAT_key &at[883]
+#define IMP_FEAT_loc &at[884]
+#define IMP_FEAT_descr &at[885]
+
+#define CODE_BREAK &at[838]
+#define CODE_BREAK_loc &at[839]
+#define CODE_BREAK_aa &at[841]
+#define CODE_BREAK_aa_ncbieaa &at[842]
+#define CODE_BREAK_aa_ncbi8aa &at[843]
+#define CODE_BREAK_aa_ncbistdaa &at[844]
#define GENETIC_CODE_TABLE &at[1086]
#define GENETIC_CODE_TABLE_E &at[1087]
@@ -3288,9 +3269,9 @@ static AsnModulePtr amp = ampx;
*
**************************************************/
-#define RSITE_REF &at[877]
-#define RSITE_REF_str &at[878]
-#define RSITE_REF_db &at[879]
+#define RSITE_REF &at[892]
+#define RSITE_REF_str &at[893]
+#define RSITE_REF_db &at[894]
/**************************************************
@@ -3299,22 +3280,22 @@ static AsnModulePtr amp = ampx;
*
**************************************************/
-#define RNA_REF &at[847]
-#define RNA_REF_type &at[848]
-#define RNA_REF_pseudo &at[849]
-#define RNA_REF_ext &at[850]
-#define RNA_REF_ext_name &at[851]
-#define RNA_REF_ext_tRNA &at[852]
+#define RNA_REF &at[862]
+#define RNA_REF_type &at[863]
+#define RNA_REF_pseudo &at[864]
+#define RNA_REF_ext &at[865]
+#define RNA_REF_ext_name &at[866]
+#define RNA_REF_ext_tRNA &at[867]
-#define TRNA_EXT &at[853]
-#define TRNA_EXT_aa &at[854]
-#define TRNA_EXT_aa_iupacaa &at[855]
-#define TRNA_EXT_aa_ncbieaa &at[856]
-#define TRNA_EXT_aa_ncbi8aa &at[857]
-#define TRNA_EXT_aa_ncbistdaa &at[858]
-#define TRNA_EXT_codon &at[859]
-#define TRNA_EXT_codon_E &at[860]
-#define TRNA_EXT_anticodon &at[861]
+#define TRNA_EXT &at[868]
+#define TRNA_EXT_aa &at[869]
+#define TRNA_EXT_aa_iupacaa &at[870]
+#define TRNA_EXT_aa_ncbieaa &at[871]
+#define TRNA_EXT_aa_ncbi8aa &at[872]
+#define TRNA_EXT_aa_ncbistdaa &at[873]
+#define TRNA_EXT_codon &at[874]
+#define TRNA_EXT_codon_E &at[875]
+#define TRNA_EXT_anticodon &at[876]
/**************************************************
@@ -3323,17 +3304,17 @@ static AsnModulePtr amp = ampx;
*
**************************************************/
-#define GENE_REF &at[789]
-#define GENE_REF_locus &at[790]
-#define GENE_REF_allele &at[791]
-#define GENE_REF_desc &at[792]
-#define GENE_REF_maploc &at[793]
-#define GENE_REF_pseudo &at[794]
-#define GENE_REF_db &at[795]
-#define GENE_REF_db_E &at[796]
-#define GENE_REF_syn &at[798]
-#define GENE_REF_syn_E &at[799]
-#define GENE_REF_locus_tag &at[800]
+#define GENE_REF &at[804]
+#define GENE_REF_locus &at[805]
+#define GENE_REF_allele &at[806]
+#define GENE_REF_desc &at[807]
+#define GENE_REF_maploc &at[808]
+#define GENE_REF_pseudo &at[809]
+#define GENE_REF_db &at[810]
+#define GENE_REF_db_E &at[811]
+#define GENE_REF_syn &at[813]
+#define GENE_REF_syn_E &at[814]
+#define GENE_REF_locus_tag &at[815]
/**************************************************
@@ -3342,52 +3323,52 @@ static AsnModulePtr amp = ampx;
*
**************************************************/
-#define ORG_REF &at[454]
-#define ORG_REF_taxname &at[455]
-#define ORG_REF_common &at[456]
-#define ORG_REF_mod &at[457]
-#define ORG_REF_mod_E &at[458]
-#define ORG_REF_db &at[459]
-#define ORG_REF_db_E &at[460]
-#define ORG_REF_syn &at[462]
-#define ORG_REF_syn_E &at[463]
-#define ORG_REF_orgname &at[464]
-
-#define ORGNAME &at[465]
-#define ORGNAME_name &at[466]
-#define ORGNAME_name_binomial &at[467]
-#define ORGNAME_name_virus &at[472]
-#define ORGNAME_name_hybrid &at[473]
-#define ORGNAME_name_namedhybrid &at[476]
-#define ORGNAME_name_partial &at[477]
-#define ORGNAME_attrib &at[484]
-#define ORGNAME_mod &at[485]
-#define ORGNAME_mod_E &at[486]
-#define ORGNAME_lineage &at[491]
-#define ORGNAME_gcode &at[492]
-#define ORGNAME_mgcode &at[493]
-#define ORGNAME_div &at[494]
-
-#define BINOMIALORGNAME &at[468]
-#define BINOMIALORGNAME_genus &at[469]
-#define BINOMIALORGNAME_species &at[470]
-#define BINOMIALORGNAME_subspecies &at[471]
-
-#define MULTIORGNAME &at[474]
-#define MULTIORGNAME_E &at[475]
-
-#define PARTIALORGNAME &at[478]
-#define PARTIALORGNAME_E &at[479]
-
-#define ORGMOD &at[487]
-#define ORGMOD_subtype &at[488]
-#define ORGMOD_subname &at[489]
-#define ORGMOD_attrib &at[490]
-
-#define TAXELEMENT &at[480]
-#define TAXELEMENT_fixed_level &at[481]
-#define TAXELEMENT_level &at[482]
-#define TAXELEMENT_name &at[483]
+#define ORG_REF &at[579]
+#define ORG_REF_taxname &at[580]
+#define ORG_REF_common &at[581]
+#define ORG_REF_mod &at[582]
+#define ORG_REF_mod_E &at[583]
+#define ORG_REF_db &at[584]
+#define ORG_REF_db_E &at[585]
+#define ORG_REF_syn &at[587]
+#define ORG_REF_syn_E &at[588]
+#define ORG_REF_orgname &at[589]
+
+#define ORGNAME &at[590]
+#define ORGNAME_name &at[591]
+#define ORGNAME_name_binomial &at[592]
+#define ORGNAME_name_virus &at[597]
+#define ORGNAME_name_hybrid &at[598]
+#define ORGNAME_name_namedhybrid &at[601]
+#define ORGNAME_name_partial &at[602]
+#define ORGNAME_attrib &at[609]
+#define ORGNAME_mod &at[610]
+#define ORGNAME_mod_E &at[611]
+#define ORGNAME_lineage &at[616]
+#define ORGNAME_gcode &at[617]
+#define ORGNAME_mgcode &at[618]
+#define ORGNAME_div &at[619]
+
+#define BINOMIALORGNAME &at[593]
+#define BINOMIALORGNAME_genus &at[594]
+#define BINOMIALORGNAME_species &at[595]
+#define BINOMIALORGNAME_subspecies &at[596]
+
+#define MULTIORGNAME &at[599]
+#define MULTIORGNAME_E &at[600]
+
+#define PARTIALORGNAME &at[603]
+#define PARTIALORGNAME_E &at[604]
+
+#define ORGMOD &at[612]
+#define ORGMOD_subtype &at[613]
+#define ORGMOD_subname &at[614]
+#define ORGMOD_attrib &at[615]
+
+#define TAXELEMENT &at[605]
+#define TAXELEMENT_fixed_level &at[606]
+#define TAXELEMENT_level &at[607]
+#define TAXELEMENT_name &at[608]
/**************************************************
@@ -3396,18 +3377,18 @@ static AsnModulePtr amp = ampx;
*
**************************************************/
-#define BIOSOURCE &at[722]
-#define BIOSOURCE_genome &at[723]
-#define BIOSOURCE_origin &at[724]
-#define BIOSOURCE_org &at[725]
-#define BIOSOURCE_subtype &at[727]
-#define BIOSOURCE_subtype_E &at[728]
-#define BIOSOURCE_is_focus &at[733]
+#define BIOSOURCE &at[738]
+#define BIOSOURCE_genome &at[739]
+#define BIOSOURCE_origin &at[740]
+#define BIOSOURCE_org &at[741]
+#define BIOSOURCE_subtype &at[743]
+#define BIOSOURCE_subtype_E &at[744]
+#define BIOSOURCE_is_focus &at[749]
-#define SUBSOURCE &at[729]
-#define SUBSOURCE_subtype &at[730]
-#define SUBSOURCE_name &at[731]
-#define SUBSOURCE_attrib &at[732]
+#define SUBSOURCE &at[745]
+#define SUBSOURCE_subtype &at[746]
+#define SUBSOURCE_name &at[747]
+#define SUBSOURCE_attrib &at[748]
/**************************************************
@@ -3416,17 +3397,17 @@ static AsnModulePtr amp = ampx;
*
**************************************************/
-#define PROT_REF &at[833]
-#define PROT_REF_name &at[834]
-#define PROT_REF_name_E &at[835]
-#define PROT_REF_desc &at[836]
-#define PROT_REF_ec &at[837]
-#define PROT_REF_ec_E &at[838]
-#define PROT_REF_activity &at[839]
-#define PROT_REF_activity_E &at[840]
-#define PROT_REF_db &at[841]
-#define PROT_REF_db_E &at[842]
-#define PROT_REF_processed &at[844]
+#define PROT_REF &at[848]
+#define PROT_REF_name &at[849]
+#define PROT_REF_name_E &at[850]
+#define PROT_REF_desc &at[851]
+#define PROT_REF_ec &at[852]
+#define PROT_REF_ec_E &at[853]
+#define PROT_REF_activity &at[854]
+#define PROT_REF_activity_E &at[855]
+#define PROT_REF_db &at[856]
+#define PROT_REF_db_E &at[857]
+#define PROT_REF_processed &at[859]
/**************************************************
@@ -3435,31 +3416,31 @@ static AsnModulePtr amp = ampx;
*
**************************************************/
-#define TXINIT &at[885]
-#define TXINIT_name &at[886]
-#define TXINIT_syn &at[887]
-#define TXINIT_syn_E &at[888]
-#define TXINIT_gene &at[889]
-#define TXINIT_gene_E &at[890]
-#define TXINIT_protein &at[892]
-#define TXINIT_protein_E &at[893]
-#define TXINIT_rna &at[895]
-#define TXINIT_rna_E &at[896]
-#define TXINIT_expression &at[897]
-#define TXINIT_txsystem &at[898]
-#define TXINIT_txdescr &at[899]
-#define TXINIT_txorg &at[900]
-#define TXINIT_mapping_precise &at[902]
-#define TXINIT_location_accurate &at[903]
-#define TXINIT_inittype &at[904]
-#define TXINIT_evidence &at[905]
-#define TXINIT_evidence_E &at[906]
-
-#define TX_EVIDENCE &at[907]
-#define TX_EVIDENCE_exp_code &at[908]
-#define TX_EVIDENCE_expression_system &at[909]
-#define TX_EVIDENCE_low_prec_data &at[910]
-#define TX_EVIDENCE_from_homolog &at[911]
+#define TXINIT &at[900]
+#define TXINIT_name &at[901]
+#define TXINIT_syn &at[902]
+#define TXINIT_syn_E &at[903]
+#define TXINIT_gene &at[904]
+#define TXINIT_gene_E &at[905]
+#define TXINIT_protein &at[907]
+#define TXINIT_protein_E &at[908]
+#define TXINIT_rna &at[910]
+#define TXINIT_rna_E &at[911]
+#define TXINIT_expression &at[912]
+#define TXINIT_txsystem &at[913]
+#define TXINIT_txdescr &at[914]
+#define TXINIT_txorg &at[915]
+#define TXINIT_mapping_precise &at[917]
+#define TXINIT_location_accurate &at[918]
+#define TXINIT_inittype &at[919]
+#define TXINIT_evidence &at[920]
+#define TXINIT_evidence_E &at[921]
+
+#define TX_EVIDENCE &at[922]
+#define TX_EVIDENCE_exp_code &at[923]
+#define TX_EVIDENCE_expression_system &at[924]
+#define TX_EVIDENCE_low_prec_data &at[925]
+#define TX_EVIDENCE_from_homolog &at[926]
/**************************************************
@@ -3468,83 +3449,83 @@ static AsnModulePtr amp = ampx;
*
**************************************************/
-#define SEQ_ALIGN &at[513]
-#define SEQ_ALIGN_type &at[514]
-#define SEQ_ALIGN_dim &at[515]
-#define SEQ_ALIGN_score &at[516]
-#define SEQ_ALIGN_score_E &at[517]
-#define SEQ_ALIGN_segs &at[524]
-#define SEQ_ALIGN_segs_dendiag &at[525]
-#define SEQ_ALIGN_segs_dendiag_E &at[526]
-#define SEQ_ALIGN_segs_denseg &at[540]
-#define SEQ_ALIGN_segs_std &at[554]
-#define SEQ_ALIGN_segs_std_E &at[555]
-#define SEQ_ALIGN_segs_packed &at[565]
-#define SEQ_ALIGN_segs_disc &at[580]
-#define SEQ_ALIGN_bounds &at[583]
-#define SEQ_ALIGN_bounds_E &at[584]
-
-#define SCORE &at[518]
-#define SCORE_id &at[519]
-#define SCORE_value &at[521]
-#define SCORE_value_real &at[522]
-#define SCORE_value_int &at[523]
+#define SEQ_ALIGN &at[462]
+#define SEQ_ALIGN_type &at[463]
+#define SEQ_ALIGN_dim &at[464]
+#define SEQ_ALIGN_score &at[465]
+#define SEQ_ALIGN_score_E &at[466]
+#define SEQ_ALIGN_segs &at[473]
+#define SEQ_ALIGN_segs_dendiag &at[474]
+#define SEQ_ALIGN_segs_dendiag_E &at[475]
+#define SEQ_ALIGN_segs_denseg &at[489]
+#define SEQ_ALIGN_segs_std &at[503]
+#define SEQ_ALIGN_segs_std_E &at[504]
+#define SEQ_ALIGN_segs_packed &at[514]
+#define SEQ_ALIGN_segs_disc &at[529]
+#define SEQ_ALIGN_bounds &at[532]
+#define SEQ_ALIGN_bounds_E &at[533]
+
+#define SCORE &at[467]
+#define SCORE_id &at[468]
+#define SCORE_value &at[470]
+#define SCORE_value_real &at[471]
+#define SCORE_value_int &at[472]
#define SCORE_SET &at[1088]
#define SCORE_SET_E &at[1089]
-#define SEQ_ALIGN_SET &at[581]
-#define SEQ_ALIGN_SET_E &at[582]
-
-#define DENSE_DIAG &at[527]
-#define DENSE_DIAG_dim &at[528]
-#define DENSE_DIAG_ids &at[529]
-#define DENSE_DIAG_ids_E &at[530]
-#define DENSE_DIAG_starts &at[532]
-#define DENSE_DIAG_starts_E &at[533]
-#define DENSE_DIAG_len &at[534]
-#define DENSE_DIAG_strands &at[535]
-#define DENSE_DIAG_strands_E &at[536]
-#define DENSE_DIAG_scores &at[538]
-#define DENSE_DIAG_scores_E &at[539]
-
-#define DENSE_SEG &at[541]
-#define DENSE_SEG_dim &at[542]
-#define DENSE_SEG_numseg &at[543]
-#define DENSE_SEG_ids &at[544]
-#define DENSE_SEG_ids_E &at[545]
-#define DENSE_SEG_starts &at[546]
-#define DENSE_SEG_starts_E &at[547]
-#define DENSE_SEG_lens &at[548]
-#define DENSE_SEG_lens_E &at[549]
-#define DENSE_SEG_strands &at[550]
-#define DENSE_SEG_strands_E &at[551]
-#define DENSE_SEG_scores &at[552]
-#define DENSE_SEG_scores_E &at[553]
-
-#define STD_SEG &at[556]
-#define STD_SEG_dim &at[557]
-#define STD_SEG_ids &at[558]
-#define STD_SEG_ids_E &at[559]
-#define STD_SEG_loc &at[560]
-#define STD_SEG_loc_E &at[561]
-#define STD_SEG_scores &at[563]
-#define STD_SEG_scores_E &at[564]
-
-#define PACKED_SEG &at[566]
-#define PACKED_SEG_dim &at[567]
-#define PACKED_SEG_numseg &at[568]
-#define PACKED_SEG_ids &at[569]
-#define PACKED_SEG_ids_E &at[570]
-#define PACKED_SEG_starts &at[571]
-#define PACKED_SEG_starts_E &at[572]
-#define PACKED_SEG_present &at[573]
-#define PACKED_SEG_lens &at[574]
-#define PACKED_SEG_lens_E &at[575]
-#define PACKED_SEG_strands &at[576]
-#define PACKED_SEG_strands_E &at[577]
-#define PACKED_SEG_scores &at[578]
-#define PACKED_SEG_scores_E &at[579]
+#define SEQ_ALIGN_SET &at[530]
+#define SEQ_ALIGN_SET_E &at[531]
+
+#define DENSE_DIAG &at[476]
+#define DENSE_DIAG_dim &at[477]
+#define DENSE_DIAG_ids &at[478]
+#define DENSE_DIAG_ids_E &at[479]
+#define DENSE_DIAG_starts &at[481]
+#define DENSE_DIAG_starts_E &at[482]
+#define DENSE_DIAG_len &at[483]
+#define DENSE_DIAG_strands &at[484]
+#define DENSE_DIAG_strands_E &at[485]
+#define DENSE_DIAG_scores &at[487]
+#define DENSE_DIAG_scores_E &at[488]
+
+#define DENSE_SEG &at[490]
+#define DENSE_SEG_dim &at[491]
+#define DENSE_SEG_numseg &at[492]
+#define DENSE_SEG_ids &at[493]
+#define DENSE_SEG_ids_E &at[494]
+#define DENSE_SEG_starts &at[495]
+#define DENSE_SEG_starts_E &at[496]
+#define DENSE_SEG_lens &at[497]
+#define DENSE_SEG_lens_E &at[498]
+#define DENSE_SEG_strands &at[499]
+#define DENSE_SEG_strands_E &at[500]
+#define DENSE_SEG_scores &at[501]
+#define DENSE_SEG_scores_E &at[502]
+
+#define STD_SEG &at[505]
+#define STD_SEG_dim &at[506]
+#define STD_SEG_ids &at[507]
+#define STD_SEG_ids_E &at[508]
+#define STD_SEG_loc &at[509]
+#define STD_SEG_loc_E &at[510]
+#define STD_SEG_scores &at[512]
+#define STD_SEG_scores_E &at[513]
+
+#define PACKED_SEG &at[515]
+#define PACKED_SEG_dim &at[516]
+#define PACKED_SEG_numseg &at[517]
+#define PACKED_SEG_ids &at[518]
+#define PACKED_SEG_ids_E &at[519]
+#define PACKED_SEG_starts &at[520]
+#define PACKED_SEG_starts_E &at[521]
+#define PACKED_SEG_present &at[522]
+#define PACKED_SEG_lens &at[523]
+#define PACKED_SEG_lens_E &at[524]
+#define PACKED_SEG_strands &at[525]
+#define PACKED_SEG_strands_E &at[526]
+#define PACKED_SEG_scores &at[527]
+#define PACKED_SEG_scores_E &at[528]
/**************************************************
diff --git a/asnstat/asninsdseq.h b/asnstat/asninsdseq.h
index bd6b4e2b..92cbed69 100644
--- a/asnstat/asninsdseq.h
+++ b/asnstat/asninsdseq.h
@@ -9,37 +9,16 @@
#include <asn.h>
#endif
-static char * asnfilename = "asninsdseq.h14";
-static AsnValxNode avnx[19] = {
- {20,"not-set" ,0,0.0,&avnx[1] } ,
- {20,"single-stranded" ,1,0.0,&avnx[2] } ,
- {20,"double-stranded" ,2,0.0,&avnx[3] } ,
- {20,"mixed-stranded" ,3,0.0,NULL } ,
- {3,NULL,0,0.0,NULL } ,
- {20,"nucleic-acid" ,0,0.0,&avnx[6] } ,
- {20,"dna" ,1,0.0,&avnx[7] } ,
- {20,"rna" ,2,0.0,&avnx[8] } ,
- {20,"trna" ,3,0.0,&avnx[9] } ,
- {20,"rrna" ,4,0.0,&avnx[10] } ,
- {20,"mrna" ,5,0.0,&avnx[11] } ,
- {20,"urna" ,6,0.0,&avnx[12] } ,
- {20,"snrna" ,7,0.0,&avnx[13] } ,
- {20,"snorna" ,8,0.0,&avnx[14] } ,
- {20,"peptide" ,9,0.0,NULL } ,
- {3,NULL,0,0.0,NULL } ,
- {20,"linear" ,1,0.0,&avnx[17] } ,
- {20,"circular" ,2,0.0,NULL } ,
- {3,NULL,1,0.0,NULL } };
-
+static char * asnfilename = "asninsdseq.h15";
static AsnType atx[70] = {
{401, "INSDSeq" ,1,0,0,0,0,0,0,0,NULL,&atx[44],&atx[1],0,&atx[19]} ,
{0, "locus" ,128,0,0,0,0,0,0,0,NULL,&atx[2],NULL,0,&atx[3]} ,
{323, "VisibleString" ,0,26,0,0,0,0,0,0,NULL,NULL,NULL,0,NULL} ,
{0, "length" ,128,1,0,0,0,0,0,0,NULL,&atx[4],NULL,0,&atx[5]} ,
{302, "INTEGER" ,0,2,0,0,0,0,0,0,NULL,NULL,NULL,0,NULL} ,
- {0, "strandedness" ,128,2,0,0,1,0,0,0,&avnx[4],&atx[4],&avnx[0],0,&atx[6]} ,
- {0, "moltype" ,128,3,0,0,1,0,0,0,&avnx[15],&atx[4],&avnx[5],0,&atx[7]} ,
- {0, "topology" ,128,4,0,0,1,0,0,0,&avnx[18],&atx[4],&avnx[16],0,&atx[8]} ,
+ {0, "strandedness" ,128,2,0,1,0,0,0,0,NULL,&atx[2],NULL,0,&atx[6]} ,
+ {0, "moltype" ,128,3,0,0,0,0,0,0,NULL,&atx[2],NULL,0,&atx[7]} ,
+ {0, "topology" ,128,4,0,1,0,0,0,0,NULL,&atx[2],NULL,0,&atx[8]} ,
{0, "division" ,128,5,0,0,0,0,0,0,NULL,&atx[2],NULL,0,&atx[9]} ,
{0, "update-date" ,128,6,0,0,0,0,0,0,NULL,&atx[2],NULL,0,&atx[10]} ,
{0, "create-date" ,128,7,0,0,0,0,0,0,NULL,&atx[2],NULL,0,&atx[11]} ,
@@ -104,9 +83,9 @@ static AsnType atx[70] = {
{0, NULL,1,-1,0,0,0,0,0,0,NULL,&atx[0],NULL,0,NULL} };
static AsnModule ampx[1] = {
- { "INSD-INSDSeq" , "asninsdseq.h14",&atx[0],NULL,NULL,0,0} };
+ { "INSD-INSDSeq" , "asninsdseq.h15",&atx[0],NULL,NULL,0,0} };
-static AsnValxNodePtr avn = avnx;
+static AsnValxNodePtr avn = NULL;
static AsnTypePtr at = atx;
static AsnModulePtr amp = ampx;
diff --git a/asnstat/asnseq.h b/asnstat/asnseq.h
index d18eb47e..c6d8dd75 100644
--- a/asnstat/asnseq.h
+++ b/asnstat/asnseq.h
@@ -9,365 +9,365 @@
#include <asn.h>
#endif
-static char * asnfilename = "asnseq.h67";
+static char * asnfilename = "asnseq.h68";
static AsnValxNode avnx[146] = {
- {20,"unknown" ,0,0.0,&avnx[1] } ,
- {20,"genomic" ,1,0.0,&avnx[2] } ,
- {20,"pre-mRNA" ,2,0.0,&avnx[3] } ,
- {20,"mRNA" ,3,0.0,&avnx[4] } ,
- {20,"rRNA" ,4,0.0,&avnx[5] } ,
- {20,"tRNA" ,5,0.0,&avnx[6] } ,
- {20,"snRNA" ,6,0.0,&avnx[7] } ,
- {20,"scRNA" ,7,0.0,&avnx[8] } ,
- {20,"peptide" ,8,0.0,&avnx[9] } ,
- {20,"other-genetic" ,9,0.0,&avnx[10] } ,
- {20,"genomic-mRNA" ,10,0.0,&avnx[11] } ,
- {20,"other" ,255,0.0,NULL } ,
- {20,"dna" ,0,0.0,&avnx[13] } ,
- {20,"rna" ,1,0.0,&avnx[14] } ,
- {20,"extrachrom" ,2,0.0,&avnx[15] } ,
- {20,"plasmid" ,3,0.0,&avnx[16] } ,
- {20,"mitochondrial" ,4,0.0,&avnx[17] } ,
- {20,"chloroplast" ,5,0.0,&avnx[18] } ,
- {20,"kinetoplast" ,6,0.0,&avnx[19] } ,
- {20,"cyanelle" ,7,0.0,&avnx[20] } ,
- {20,"synthetic" ,8,0.0,&avnx[21] } ,
- {20,"recombinant" ,9,0.0,&avnx[22] } ,
- {20,"partial" ,10,0.0,&avnx[23] } ,
- {20,"complete" ,11,0.0,&avnx[24] } ,
- {20,"mutagen" ,12,0.0,&avnx[25] } ,
- {20,"natmut" ,13,0.0,&avnx[26] } ,
- {20,"transposon" ,14,0.0,&avnx[27] } ,
- {20,"insertion-seq" ,15,0.0,&avnx[28] } ,
- {20,"no-left" ,16,0.0,&avnx[29] } ,
- {20,"no-right" ,17,0.0,&avnx[30] } ,
- {20,"macronuclear" ,18,0.0,&avnx[31] } ,
- {20,"proviral" ,19,0.0,&avnx[32] } ,
- {20,"est" ,20,0.0,&avnx[33] } ,
- {20,"sts" ,21,0.0,&avnx[34] } ,
- {20,"survey" ,22,0.0,&avnx[35] } ,
- {20,"chromoplast" ,23,0.0,&avnx[36] } ,
- {20,"genemap" ,24,0.0,&avnx[37] } ,
- {20,"restmap" ,25,0.0,&avnx[38] } ,
- {20,"physmap" ,26,0.0,&avnx[39] } ,
- {20,"other" ,255,0.0,NULL } ,
- {20,"concept-trans" ,1,0.0,&avnx[41] } ,
- {20,"seq-pept" ,2,0.0,&avnx[42] } ,
- {20,"both" ,3,0.0,&avnx[43] } ,
- {20,"seq-pept-overlap" ,4,0.0,&avnx[44] } ,
- {20,"seq-pept-homol" ,5,0.0,&avnx[45] } ,
- {20,"concept-trans-a" ,6,0.0,&avnx[46] } ,
- {20,"other" ,255,0.0,NULL } ,
{3,NULL,1,0.0,NULL } ,
{2,NULL,0,0.0,NULL } ,
{2,NULL,1,0.0,NULL } ,
- {20,"not-set" ,0,0.0,&avnx[51] } ,
- {20,"sources" ,1,0.0,&avnx[52] } ,
+ {20,"not-set" ,0,0.0,&avnx[4] } ,
+ {20,"sources" ,1,0.0,&avnx[5] } ,
{20,"aligns" ,2,0.0,NULL } ,
- {20,"seq" ,0,0.0,&avnx[54] } ,
- {20,"sites" ,1,0.0,&avnx[55] } ,
- {20,"feats" ,2,0.0,&avnx[56] } ,
+ {20,"seq" ,0,0.0,&avnx[7] } ,
+ {20,"sites" ,1,0.0,&avnx[8] } ,
+ {20,"feats" ,2,0.0,&avnx[9] } ,
{20,"no-target" ,3,0.0,NULL } ,
{3,NULL,0,0.0,NULL } ,
- {20,"unknown" ,0,0.0,&avnx[59] } ,
- {20,"genomic" ,1,0.0,&avnx[60] } ,
- {20,"pre-RNA" ,2,0.0,&avnx[61] } ,
- {20,"mRNA" ,3,0.0,&avnx[62] } ,
- {20,"rRNA" ,4,0.0,&avnx[63] } ,
- {20,"tRNA" ,5,0.0,&avnx[64] } ,
- {20,"snRNA" ,6,0.0,&avnx[65] } ,
- {20,"scRNA" ,7,0.0,&avnx[66] } ,
- {20,"peptide" ,8,0.0,&avnx[67] } ,
- {20,"other-genetic" ,9,0.0,&avnx[68] } ,
- {20,"genomic-mRNA" ,10,0.0,&avnx[69] } ,
- {20,"cRNA" ,11,0.0,&avnx[70] } ,
- {20,"snoRNA" ,12,0.0,&avnx[71] } ,
- {20,"transcribed-RNA" ,13,0.0,&avnx[72] } ,
+ {20,"ref" ,1,0.0,&avnx[12] } ,
+ {20,"alt" ,2,0.0,&avnx[13] } ,
+ {20,"blocks" ,3,0.0,&avnx[14] } ,
+ {20,"other" ,255,0.0,NULL } ,
+ {20,"unknown" ,0,0.0,&avnx[16] } ,
+ {20,"genomic" ,1,0.0,&avnx[17] } ,
+ {20,"pre-mRNA" ,2,0.0,&avnx[18] } ,
+ {20,"mRNA" ,3,0.0,&avnx[19] } ,
+ {20,"rRNA" ,4,0.0,&avnx[20] } ,
+ {20,"tRNA" ,5,0.0,&avnx[21] } ,
+ {20,"snRNA" ,6,0.0,&avnx[22] } ,
+ {20,"scRNA" ,7,0.0,&avnx[23] } ,
+ {20,"peptide" ,8,0.0,&avnx[24] } ,
+ {20,"other-genetic" ,9,0.0,&avnx[25] } ,
+ {20,"genomic-mRNA" ,10,0.0,&avnx[26] } ,
+ {20,"other" ,255,0.0,NULL } ,
+ {20,"dna" ,0,0.0,&avnx[28] } ,
+ {20,"rna" ,1,0.0,&avnx[29] } ,
+ {20,"extrachrom" ,2,0.0,&avnx[30] } ,
+ {20,"plasmid" ,3,0.0,&avnx[31] } ,
+ {20,"mitochondrial" ,4,0.0,&avnx[32] } ,
+ {20,"chloroplast" ,5,0.0,&avnx[33] } ,
+ {20,"kinetoplast" ,6,0.0,&avnx[34] } ,
+ {20,"cyanelle" ,7,0.0,&avnx[35] } ,
+ {20,"synthetic" ,8,0.0,&avnx[36] } ,
+ {20,"recombinant" ,9,0.0,&avnx[37] } ,
+ {20,"partial" ,10,0.0,&avnx[38] } ,
+ {20,"complete" ,11,0.0,&avnx[39] } ,
+ {20,"mutagen" ,12,0.0,&avnx[40] } ,
+ {20,"natmut" ,13,0.0,&avnx[41] } ,
+ {20,"transposon" ,14,0.0,&avnx[42] } ,
+ {20,"insertion-seq" ,15,0.0,&avnx[43] } ,
+ {20,"no-left" ,16,0.0,&avnx[44] } ,
+ {20,"no-right" ,17,0.0,&avnx[45] } ,
+ {20,"macronuclear" ,18,0.0,&avnx[46] } ,
+ {20,"proviral" ,19,0.0,&avnx[47] } ,
+ {20,"est" ,20,0.0,&avnx[48] } ,
+ {20,"sts" ,21,0.0,&avnx[49] } ,
+ {20,"survey" ,22,0.0,&avnx[50] } ,
+ {20,"chromoplast" ,23,0.0,&avnx[51] } ,
+ {20,"genemap" ,24,0.0,&avnx[52] } ,
+ {20,"restmap" ,25,0.0,&avnx[53] } ,
+ {20,"physmap" ,26,0.0,&avnx[54] } ,
+ {20,"other" ,255,0.0,NULL } ,
+ {20,"concept-trans" ,1,0.0,&avnx[56] } ,
+ {20,"seq-pept" ,2,0.0,&avnx[57] } ,
+ {20,"both" ,3,0.0,&avnx[58] } ,
+ {20,"seq-pept-overlap" ,4,0.0,&avnx[59] } ,
+ {20,"seq-pept-homol" ,5,0.0,&avnx[60] } ,
+ {20,"concept-trans-a" ,6,0.0,&avnx[61] } ,
+ {20,"other" ,255,0.0,NULL } ,
+ {20,"unknown" ,0,0.0,&avnx[63] } ,
+ {20,"genomic" ,1,0.0,&avnx[64] } ,
+ {20,"pre-RNA" ,2,0.0,&avnx[65] } ,
+ {20,"mRNA" ,3,0.0,&avnx[66] } ,
+ {20,"rRNA" ,4,0.0,&avnx[67] } ,
+ {20,"tRNA" ,5,0.0,&avnx[68] } ,
+ {20,"snRNA" ,6,0.0,&avnx[69] } ,
+ {20,"scRNA" ,7,0.0,&avnx[70] } ,
+ {20,"peptide" ,8,0.0,&avnx[71] } ,
+ {20,"other-genetic" ,9,0.0,&avnx[72] } ,
+ {20,"genomic-mRNA" ,10,0.0,&avnx[73] } ,
+ {20,"cRNA" ,11,0.0,&avnx[74] } ,
+ {20,"snoRNA" ,12,0.0,&avnx[75] } ,
+ {20,"transcribed-RNA" ,13,0.0,&avnx[76] } ,
{20,"other" ,255,0.0,NULL } ,
{3,NULL,0,0.0,NULL } ,
- {20,"unknown" ,0,0.0,&avnx[75] } ,
- {20,"standard" ,1,0.0,&avnx[76] } ,
- {20,"est" ,2,0.0,&avnx[77] } ,
- {20,"sts" ,3,0.0,&avnx[78] } ,
- {20,"survey" ,4,0.0,&avnx[79] } ,
- {20,"genemap" ,5,0.0,&avnx[80] } ,
- {20,"physmap" ,6,0.0,&avnx[81] } ,
- {20,"derived" ,7,0.0,&avnx[82] } ,
- {20,"concept-trans" ,8,0.0,&avnx[83] } ,
- {20,"seq-pept" ,9,0.0,&avnx[84] } ,
- {20,"both" ,10,0.0,&avnx[85] } ,
- {20,"seq-pept-overlap" ,11,0.0,&avnx[86] } ,
- {20,"seq-pept-homol" ,12,0.0,&avnx[87] } ,
- {20,"concept-trans-a" ,13,0.0,&avnx[88] } ,
- {20,"htgs-1" ,14,0.0,&avnx[89] } ,
- {20,"htgs-2" ,15,0.0,&avnx[90] } ,
- {20,"htgs-3" ,16,0.0,&avnx[91] } ,
- {20,"fli-cdna" ,17,0.0,&avnx[92] } ,
- {20,"htgs-0" ,18,0.0,&avnx[93] } ,
- {20,"htc" ,19,0.0,&avnx[94] } ,
- {20,"wgs" ,20,0.0,&avnx[95] } ,
+ {20,"unknown" ,0,0.0,&avnx[79] } ,
+ {20,"standard" ,1,0.0,&avnx[80] } ,
+ {20,"est" ,2,0.0,&avnx[81] } ,
+ {20,"sts" ,3,0.0,&avnx[82] } ,
+ {20,"survey" ,4,0.0,&avnx[83] } ,
+ {20,"genemap" ,5,0.0,&avnx[84] } ,
+ {20,"physmap" ,6,0.0,&avnx[85] } ,
+ {20,"derived" ,7,0.0,&avnx[86] } ,
+ {20,"concept-trans" ,8,0.0,&avnx[87] } ,
+ {20,"seq-pept" ,9,0.0,&avnx[88] } ,
+ {20,"both" ,10,0.0,&avnx[89] } ,
+ {20,"seq-pept-overlap" ,11,0.0,&avnx[90] } ,
+ {20,"seq-pept-homol" ,12,0.0,&avnx[91] } ,
+ {20,"concept-trans-a" ,13,0.0,&avnx[92] } ,
+ {20,"htgs-1" ,14,0.0,&avnx[93] } ,
+ {20,"htgs-2" ,15,0.0,&avnx[94] } ,
+ {20,"htgs-3" ,16,0.0,&avnx[95] } ,
+ {20,"fli-cdna" ,17,0.0,&avnx[96] } ,
+ {20,"htgs-0" ,18,0.0,&avnx[97] } ,
+ {20,"htc" ,19,0.0,&avnx[98] } ,
+ {20,"wgs" ,20,0.0,&avnx[99] } ,
{20,"other" ,255,0.0,NULL } ,
{3,NULL,0,0.0,NULL } ,
- {20,"unknown" ,0,0.0,&avnx[98] } ,
- {20,"complete" ,1,0.0,&avnx[99] } ,
- {20,"partial" ,2,0.0,&avnx[100] } ,
- {20,"no-left" ,3,0.0,&avnx[101] } ,
- {20,"no-right" ,4,0.0,&avnx[102] } ,
- {20,"no-ends" ,5,0.0,&avnx[103] } ,
- {20,"has-left" ,6,0.0,&avnx[104] } ,
- {20,"has-right" ,7,0.0,&avnx[105] } ,
+ {20,"unknown" ,0,0.0,&avnx[102] } ,
+ {20,"complete" ,1,0.0,&avnx[103] } ,
+ {20,"partial" ,2,0.0,&avnx[104] } ,
+ {20,"no-left" ,3,0.0,&avnx[105] } ,
+ {20,"no-right" ,4,0.0,&avnx[106] } ,
+ {20,"no-ends" ,5,0.0,&avnx[107] } ,
+ {20,"has-left" ,6,0.0,&avnx[108] } ,
+ {20,"has-right" ,7,0.0,&avnx[109] } ,
{20,"other" ,255,0.0,NULL } ,
{3,NULL,0,0.0,NULL } ,
- {20,"not-set" ,0,0.0,&avnx[108] } ,
- {20,"virtual" ,1,0.0,&avnx[109] } ,
- {20,"raw" ,2,0.0,&avnx[110] } ,
- {20,"seg" ,3,0.0,&avnx[111] } ,
- {20,"const" ,4,0.0,&avnx[112] } ,
- {20,"ref" ,5,0.0,&avnx[113] } ,
- {20,"consen" ,6,0.0,&avnx[114] } ,
- {20,"map" ,7,0.0,&avnx[115] } ,
- {20,"delta" ,8,0.0,&avnx[116] } ,
+ {20,"not-set" ,0,0.0,&avnx[112] } ,
+ {20,"virtual" ,1,0.0,&avnx[113] } ,
+ {20,"raw" ,2,0.0,&avnx[114] } ,
+ {20,"seg" ,3,0.0,&avnx[115] } ,
+ {20,"const" ,4,0.0,&avnx[116] } ,
+ {20,"ref" ,5,0.0,&avnx[117] } ,
+ {20,"consen" ,6,0.0,&avnx[118] } ,
+ {20,"map" ,7,0.0,&avnx[119] } ,
+ {20,"delta" ,8,0.0,&avnx[120] } ,
{20,"other" ,255,0.0,NULL } ,
- {20,"not-set" ,0,0.0,&avnx[118] } ,
- {20,"dna" ,1,0.0,&avnx[119] } ,
- {20,"rna" ,2,0.0,&avnx[120] } ,
- {20,"aa" ,3,0.0,&avnx[121] } ,
- {20,"na" ,4,0.0,&avnx[122] } ,
+ {20,"not-set" ,0,0.0,&avnx[122] } ,
+ {20,"dna" ,1,0.0,&avnx[123] } ,
+ {20,"rna" ,2,0.0,&avnx[124] } ,
+ {20,"aa" ,3,0.0,&avnx[125] } ,
+ {20,"na" ,4,0.0,&avnx[126] } ,
{20,"other" ,255,0.0,NULL } ,
- {20,"not-set" ,0,0.0,&avnx[124] } ,
- {20,"linear" ,1,0.0,&avnx[125] } ,
- {20,"circular" ,2,0.0,&avnx[126] } ,
- {20,"tandem" ,3,0.0,&avnx[127] } ,
+ {20,"not-set" ,0,0.0,&avnx[128] } ,
+ {20,"linear" ,1,0.0,&avnx[129] } ,
+ {20,"circular" ,2,0.0,&avnx[130] } ,
+ {20,"tandem" ,3,0.0,&avnx[131] } ,
{20,"other" ,255,0.0,NULL } ,
{3,NULL,1,0.0,NULL } ,
- {20,"not-set" ,0,0.0,&avnx[130] } ,
- {20,"ss" ,1,0.0,&avnx[131] } ,
- {20,"ds" ,2,0.0,&avnx[132] } ,
- {20,"mixed" ,3,0.0,&avnx[133] } ,
- {20,"other" ,255,0.0,NULL } ,
- {20,"genbank" ,1,0.0,&avnx[135] } ,
- {20,"embl" ,2,0.0,&avnx[136] } ,
- {20,"ddbj" ,3,0.0,&avnx[137] } ,
- {20,"pir" ,4,0.0,&avnx[138] } ,
- {20,"sp" ,5,0.0,&avnx[139] } ,
- {20,"bbone" ,6,0.0,&avnx[140] } ,
- {20,"pdb" ,7,0.0,&avnx[141] } ,
+ {20,"not-set" ,0,0.0,&avnx[134] } ,
+ {20,"ss" ,1,0.0,&avnx[135] } ,
+ {20,"ds" ,2,0.0,&avnx[136] } ,
+ {20,"mixed" ,3,0.0,&avnx[137] } ,
{20,"other" ,255,0.0,NULL } ,
- {20,"ref" ,1,0.0,&avnx[143] } ,
- {20,"alt" ,2,0.0,&avnx[144] } ,
- {20,"blocks" ,3,0.0,&avnx[145] } ,
+ {20,"genbank" ,1,0.0,&avnx[139] } ,
+ {20,"embl" ,2,0.0,&avnx[140] } ,
+ {20,"ddbj" ,3,0.0,&avnx[141] } ,
+ {20,"pir" ,4,0.0,&avnx[142] } ,
+ {20,"sp" ,5,0.0,&avnx[143] } ,
+ {20,"bbone" ,6,0.0,&avnx[144] } ,
+ {20,"pdb" ,7,0.0,&avnx[145] } ,
{20,"other" ,255,0.0,NULL } };
static AsnType atx[206] = {
- {401, "Bioseq" ,1,0,0,0,0,1,0,0,NULL,&atx[32],&atx[1],0,&atx[166]} ,
- {0, "id" ,128,0,0,0,0,0,0,0,NULL,&atx[4],&atx[2],0,&atx[5]} ,
- {0, NULL,1,-1,0,0,0,0,0,0,NULL,&atx[3],NULL,0,NULL} ,
- {422, "Seq-id" ,1,0,0,0,0,0,1,0,NULL,NULL,NULL,0,&atx[134]} ,
- {314, "SET OF" ,0,17,0,0,0,0,0,0,NULL,NULL,NULL,0,NULL} ,
- {0, "descr" ,128,1,0,1,0,0,0,0,NULL,&atx[6],NULL,0,&atx[97]} ,
- {404, "Seq-descr" ,1,0,0,0,0,1,0,0,NULL,&atx[4],&atx[7],0,&atx[8]} ,
- {0, NULL,1,-1,0,0,0,0,0,0,NULL,&atx[8],NULL,0,NULL} ,
- {405, "Seqdesc" ,1,0,0,0,0,1,0,0,NULL,&atx[50],&atx[9],0,&atx[24]} ,
- {0, "mol-type" ,128,0,0,0,0,0,0,0,NULL,&atx[10],NULL,0,&atx[12]} ,
- {409, "GIBB-mol" ,1,0,0,0,0,1,0,0,NULL,&atx[11],&avnx[0],0,&atx[147]} ,
- {310, "ENUMERATED" ,0,10,0,0,0,0,0,0,NULL,NULL,NULL,0,NULL} ,
- {0, "modif" ,128,1,0,0,0,0,0,0,NULL,&atx[4],&atx[13],0,&atx[15]} ,
- {0, NULL,1,-1,0,0,0,0,0,0,NULL,&atx[14],NULL,0,NULL} ,
- {431, "GIBB-mod" ,1,0,0,0,0,0,0,0,NULL,&atx[11],&avnx[12],0,&atx[16]} ,
- {0, "method" ,128,2,0,0,0,0,0,0,NULL,&atx[16],NULL,0,&atx[17]} ,
- {432, "GIBB-method" ,1,0,0,0,0,0,0,0,NULL,&atx[11],&avnx[40],0,&atx[92]} ,
- {0, "name" ,128,3,0,0,0,0,0,0,NULL,&atx[18],NULL,0,&atx[19]} ,
+ {401, "Annotdesc" ,1,0,0,0,0,1,0,0,NULL,&atx[39],&atx[1],0,&atx[63]} ,
+ {0, "name" ,128,0,0,0,0,0,0,0,NULL,&atx[2],NULL,0,&atx[3]} ,
{323, "VisibleString" ,0,26,0,0,0,0,0,0,NULL,NULL,NULL,0,NULL} ,
- {0, "title" ,128,4,0,0,0,0,0,0,NULL,&atx[18],NULL,0,&atx[20]} ,
- {0, "org" ,128,5,0,0,0,0,0,0,NULL,&atx[21],NULL,0,&atx[22]} ,
- {420, "Org-ref" ,1,0,0,0,0,0,1,0,NULL,NULL,NULL,0,&atx[90]} ,
- {0, "comment" ,128,6,0,0,0,0,0,0,NULL,&atx[18],NULL,0,&atx[23]} ,
- {0, "num" ,128,7,0,0,0,0,0,0,NULL,&atx[24],NULL,0,&atx[51]} ,
- {406, "Numbering" ,1,0,0,0,0,1,0,0,NULL,&atx[50],&atx[25],0,&atx[88]} ,
- {0, "cont" ,128,0,0,0,0,0,0,0,NULL,&atx[26],NULL,0,&atx[33]} ,
- {434, "Num-cont" ,1,0,0,0,0,0,0,0,NULL,&atx[32],&atx[27],0,&atx[34]} ,
- {0, "refnum" ,128,0,0,0,1,0,0,0,&avnx[47],&atx[28],NULL,0,&atx[29]} ,
+ {0, "title" ,128,1,0,0,0,0,0,0,NULL,&atx[2],NULL,0,&atx[4]} ,
+ {0, "comment" ,128,2,0,0,0,0,0,0,NULL,&atx[2],NULL,0,&atx[5]} ,
+ {0, "pub" ,128,3,0,0,0,0,0,0,NULL,&atx[6],NULL,0,&atx[48]} ,
+ {406, "Pubdesc" ,1,0,0,0,0,1,0,0,NULL,&atx[20],&atx[7],0,&atx[181]} ,
+ {0, "pub" ,128,0,0,0,0,0,0,0,NULL,&atx[8],NULL,0,&atx[9]} ,
+ {420, "Pub-equiv" ,1,0,0,0,0,0,1,0,NULL,NULL,NULL,0,&atx[80]} ,
+ {0, "name" ,128,1,0,1,0,0,0,0,NULL,&atx[2],NULL,0,&atx[10]} ,
+ {0, "fig" ,128,2,0,1,0,0,0,0,NULL,&atx[2],NULL,0,&atx[11]} ,
+ {0, "num" ,128,3,0,1,0,0,0,0,NULL,&atx[12],NULL,0,&atx[40]} ,
+ {405, "Numbering" ,1,0,0,0,0,1,0,0,NULL,&atx[39],&atx[13],0,&atx[6]} ,
+ {0, "cont" ,128,0,0,0,0,0,0,0,NULL,&atx[14],NULL,0,&atx[21]} ,
+ {435, "Num-cont" ,1,0,0,0,0,0,0,0,NULL,&atx[20],&atx[15],0,&atx[22]} ,
+ {0, "refnum" ,128,0,0,0,1,0,0,0,&avnx[0],&atx[16],NULL,0,&atx[17]} ,
{302, "INTEGER" ,0,2,0,0,0,0,0,0,NULL,NULL,NULL,0,NULL} ,
- {0, "has-zero" ,128,1,0,0,1,0,0,0,&avnx[48],&atx[30],NULL,0,&atx[31]} ,
+ {0, "has-zero" ,128,1,0,0,1,0,0,0,&avnx[1],&atx[18],NULL,0,&atx[19]} ,
{301, "BOOLEAN" ,0,1,0,0,0,0,0,0,NULL,NULL,NULL,0,NULL} ,
- {0, "ascending" ,128,2,0,0,1,0,0,0,&avnx[49],&atx[30],NULL,0,NULL} ,
+ {0, "ascending" ,128,2,0,0,1,0,0,0,&avnx[2],&atx[18],NULL,0,NULL} ,
{311, "SEQUENCE" ,0,16,0,0,0,0,0,0,NULL,NULL,NULL,0,NULL} ,
- {0, "enum" ,128,1,0,0,0,0,0,0,NULL,&atx[34],NULL,0,&atx[39]} ,
- {435, "Num-enum" ,1,0,0,0,0,0,0,0,NULL,&atx[32],&atx[35],0,&atx[40]} ,
- {0, "num" ,128,0,0,0,0,0,0,0,NULL,&atx[28],NULL,0,&atx[36]} ,
- {0, "names" ,128,1,0,0,0,0,0,0,NULL,&atx[38],&atx[37],0,NULL} ,
- {0, NULL,1,-1,0,0,0,0,0,0,NULL,&atx[18],NULL,0,NULL} ,
+ {0, "enum" ,128,1,0,0,0,0,0,0,NULL,&atx[22],NULL,0,&atx[27]} ,
+ {436, "Num-enum" ,1,0,0,0,0,0,0,0,NULL,&atx[20],&atx[23],0,&atx[28]} ,
+ {0, "num" ,128,0,0,0,0,0,0,0,NULL,&atx[16],NULL,0,&atx[24]} ,
+ {0, "names" ,128,1,0,0,0,0,0,0,NULL,&atx[26],&atx[25],0,NULL} ,
+ {0, NULL,1,-1,0,0,0,0,0,0,NULL,&atx[2],NULL,0,NULL} ,
{312, "SEQUENCE OF" ,0,16,0,0,0,0,0,0,NULL,NULL,NULL,0,NULL} ,
- {0, "ref" ,128,2,0,0,0,0,0,0,NULL,&atx[40],NULL,0,&atx[44]} ,
- {436, "Num-ref" ,1,0,0,0,0,0,0,0,NULL,&atx[32],&atx[41],0,&atx[45]} ,
- {0, "type" ,128,0,0,0,0,0,0,0,NULL,&atx[11],&avnx[50],0,&atx[42]} ,
- {0, "aligns" ,128,1,0,1,0,0,0,0,NULL,&atx[43],NULL,0,NULL} ,
- {416, "Seq-align" ,1,0,0,0,0,0,1,0,NULL,NULL,NULL,0,&atx[140]} ,
- {0, "real" ,128,3,0,0,0,0,0,0,NULL,&atx[45],NULL,0,NULL} ,
- {437, "Num-real" ,1,0,0,0,0,0,0,0,NULL,&atx[32],&atx[46],0,&atx[107]} ,
- {0, "a" ,128,0,0,0,0,0,0,0,NULL,&atx[47],NULL,0,&atx[48]} ,
+ {0, "ref" ,128,2,0,0,0,0,0,0,NULL,&atx[28],NULL,0,&atx[33]} ,
+ {437, "Num-ref" ,1,0,0,0,0,0,0,0,NULL,&atx[20],&atx[29],0,&atx[34]} ,
+ {0, "type" ,128,0,0,0,0,0,0,0,NULL,&atx[30],&avnx[3],0,&atx[31]} ,
+ {310, "ENUMERATED" ,0,10,0,0,0,0,0,0,NULL,NULL,NULL,0,NULL} ,
+ {0, "aligns" ,128,1,0,1,0,0,0,0,NULL,&atx[32],NULL,0,NULL} ,
+ {417, "Seq-align" ,1,0,0,0,0,0,1,0,NULL,NULL,NULL,0,&atx[155]} ,
+ {0, "real" ,128,3,0,0,0,0,0,0,NULL,&atx[34],NULL,0,NULL} ,
+ {438, "Num-real" ,1,0,0,0,0,0,0,0,NULL,&atx[20],&atx[35],0,&atx[123]} ,
+ {0, "a" ,128,0,0,0,0,0,0,0,NULL,&atx[36],NULL,0,&atx[37]} ,
{309, "REAL" ,0,9,0,0,0,0,0,0,NULL,NULL,NULL,0,NULL} ,
- {0, "b" ,128,1,0,0,0,0,0,0,NULL,&atx[47],NULL,0,&atx[49]} ,
- {0, "units" ,128,2,0,1,0,0,0,0,NULL,&atx[18],NULL,0,NULL} ,
+ {0, "b" ,128,1,0,0,0,0,0,0,NULL,&atx[36],NULL,0,&atx[38]} ,
+ {0, "units" ,128,2,0,1,0,0,0,0,NULL,&atx[2],NULL,0,NULL} ,
{315, "CHOICE" ,0,-1,0,0,0,0,0,0,NULL,NULL,NULL,0,NULL} ,
- {0, "maploc" ,128,8,0,0,0,0,0,0,NULL,&atx[52],NULL,0,&atx[53]} ,
- {413, "Dbtag" ,1,0,0,0,0,0,1,0,NULL,NULL,NULL,0,&atx[171]} ,
- {0, "pir" ,128,9,0,0,0,0,0,0,NULL,&atx[54],NULL,0,&atx[55]} ,
- {425, "PIR-block" ,1,0,0,0,0,0,1,0,NULL,NULL,NULL,0,&atx[79]} ,
- {0, "genbank" ,128,10,0,0,0,0,0,0,NULL,&atx[56],NULL,0,&atx[57]} ,
- {424, "GB-block" ,1,0,0,0,0,0,1,0,NULL,NULL,NULL,0,&atx[54]} ,
- {0, "pub" ,128,11,0,0,0,0,0,0,NULL,&atx[58],NULL,0,&atx[72]} ,
- {403, "Pubdesc" ,1,0,0,0,0,1,0,0,NULL,&atx[32],&atx[59],0,&atx[6]} ,
- {0, "pub" ,128,0,0,0,0,0,0,0,NULL,&atx[60],NULL,0,&atx[61]} ,
- {419, "Pub-equiv" ,1,0,0,0,0,0,1,0,NULL,NULL,NULL,0,&atx[21]} ,
- {0, "name" ,128,1,0,1,0,0,0,0,NULL,&atx[18],NULL,0,&atx[62]} ,
- {0, "fig" ,128,2,0,1,0,0,0,0,NULL,&atx[18],NULL,0,&atx[63]} ,
- {0, "num" ,128,3,0,1,0,0,0,0,NULL,&atx[24],NULL,0,&atx[64]} ,
- {0, "numexc" ,128,4,0,1,0,0,0,0,NULL,&atx[30],NULL,0,&atx[65]} ,
- {0, "poly-a" ,128,5,0,1,0,0,0,0,NULL,&atx[30],NULL,0,&atx[66]} ,
- {0, "maploc" ,128,6,0,1,0,0,0,0,NULL,&atx[18],NULL,0,&atx[67]} ,
- {0, "seq-raw" ,128,7,0,1,0,0,0,0,NULL,&atx[68],NULL,0,&atx[69]} ,
+ {0, "numexc" ,128,4,0,1,0,0,0,0,NULL,&atx[18],NULL,0,&atx[41]} ,
+ {0, "poly-a" ,128,5,0,1,0,0,0,0,NULL,&atx[18],NULL,0,&atx[42]} ,
+ {0, "maploc" ,128,6,0,1,0,0,0,0,NULL,&atx[2],NULL,0,&atx[43]} ,
+ {0, "seq-raw" ,128,7,0,1,0,0,0,0,NULL,&atx[44],NULL,0,&atx[45]} ,
{351, "StringStore" ,64,1,0,0,0,0,0,0,NULL,NULL,NULL,0,NULL} ,
- {0, "align-group" ,128,8,0,1,0,0,0,0,NULL,&atx[28],NULL,0,&atx[70]} ,
- {0, "comment" ,128,9,0,1,0,0,0,0,NULL,&atx[18],NULL,0,&atx[71]} ,
- {0, "reftype" ,128,10,0,0,1,0,0,0,&avnx[57],&atx[28],&avnx[53],0,NULL} ,
- {0, "region" ,128,12,0,0,0,0,0,0,NULL,&atx[18],NULL,0,&atx[73]} ,
- {0, "user" ,128,13,0,0,0,0,0,0,NULL,&atx[74],NULL,0,&atx[75]} ,
- {415, "User-object" ,1,0,0,0,0,0,1,0,NULL,NULL,NULL,0,&atx[43]} ,
- {0, "sp" ,128,14,0,0,0,0,0,0,NULL,&atx[76],NULL,0,&atx[77]} ,
- {427, "SP-block" ,1,0,0,0,0,0,1,0,NULL,NULL,NULL,0,&atx[84]} ,
- {0, "dbxref" ,128,15,0,0,0,0,0,0,NULL,&atx[52],NULL,0,&atx[78]} ,
- {0, "embl" ,128,16,0,0,0,0,0,0,NULL,&atx[79],NULL,0,&atx[80]} ,
- {426, "EMBL-block" ,1,0,0,0,0,0,1,0,NULL,NULL,NULL,0,&atx[76]} ,
- {0, "create-date" ,128,17,0,0,0,0,0,0,NULL,&atx[81],NULL,0,&atx[82]} ,
- {411, "Date" ,1,0,0,0,0,0,1,0,NULL,NULL,NULL,0,&atx[103]} ,
- {0, "update-date" ,128,18,0,0,0,0,0,0,NULL,&atx[81],NULL,0,&atx[83]} ,
- {0, "prf" ,128,19,0,0,0,0,0,0,NULL,&atx[84],NULL,0,&atx[85]} ,
- {428, "PRF-block" ,1,0,0,0,0,0,1,0,NULL,NULL,NULL,0,&atx[86]} ,
- {0, "pdb" ,128,20,0,0,0,0,0,0,NULL,&atx[86],NULL,0,&atx[87]} ,
- {429, "PDB-block" ,1,0,0,0,0,0,1,0,NULL,NULL,NULL,0,&atx[98]} ,
- {0, "het" ,128,21,0,0,0,0,0,0,NULL,&atx[88],NULL,0,&atx[89]} ,
- {407, "Heterogen" ,1,0,0,0,0,1,0,0,NULL,&atx[18],NULL,0,&atx[152]} ,
- {0, "source" ,128,22,0,0,0,0,0,0,NULL,&atx[90],NULL,0,&atx[91]} ,
- {421, "BioSource" ,1,0,0,0,0,0,1,0,NULL,NULL,NULL,0,&atx[3]} ,
- {0, "molinfo" ,128,23,0,0,0,0,0,0,NULL,&atx[92],NULL,0,NULL} ,
- {433, "MolInfo" ,1,0,0,0,0,0,0,0,NULL,&atx[32],&atx[93],0,&atx[26]} ,
- {0, "biomol" ,128,0,0,0,1,0,0,0,&avnx[73],&atx[28],&avnx[58],0,&atx[94]} ,
- {0, "tech" ,128,1,0,0,1,0,0,0,&avnx[96],&atx[28],&avnx[74],0,&atx[95]} ,
- {0, "techexp" ,128,2,0,1,0,0,0,0,NULL,&atx[18],NULL,0,&atx[96]} ,
- {0, "completeness" ,128,3,0,0,1,0,0,0,&avnx[106],&atx[28],&avnx[97],0,NULL} ,
- {0, "inst" ,128,2,0,0,0,0,0,0,NULL,&atx[98],NULL,0,&atx[164]} ,
- {430, "Seq-inst" ,1,0,0,0,0,0,0,0,NULL,&atx[32],&atx[99],0,&atx[14]} ,
- {0, "repr" ,128,0,0,0,0,0,0,0,NULL,&atx[11],&avnx[107],0,&atx[100]} ,
- {0, "mol" ,128,1,0,0,0,0,0,0,NULL,&atx[11],&avnx[117],0,&atx[101]} ,
- {0, "length" ,128,2,0,1,0,0,0,0,NULL,&atx[28],NULL,0,&atx[102]} ,
- {0, "fuzz" ,128,3,0,1,0,0,0,0,NULL,&atx[103],NULL,0,&atx[104]} ,
- {412, "Int-fuzz" ,1,0,0,0,0,0,1,0,NULL,NULL,NULL,0,&atx[52]} ,
- {0, "topology" ,128,4,0,0,1,0,0,0,&avnx[128],&atx[11],&avnx[123],0,&atx[105]} ,
- {0, "strand" ,128,5,0,1,0,0,0,0,NULL,&atx[11],&avnx[129],0,&atx[106]} ,
- {0, "seq-data" ,128,6,0,1,0,0,0,0,NULL,&atx[107],NULL,0,&atx[129]} ,
- {438, "Seq-data" ,1,0,0,0,0,0,0,0,NULL,&atx[50],&atx[108],0,&atx[130]} ,
- {0, "iupacna" ,128,0,0,0,0,0,0,0,NULL,&atx[109],NULL,0,&atx[110]} ,
- {446, "IUPACna" ,1,0,0,0,0,0,0,0,NULL,&atx[68],NULL,0,&atx[111]} ,
- {0, "iupacaa" ,128,1,0,0,0,0,0,0,NULL,&atx[111],NULL,0,&atx[112]} ,
- {447, "IUPACaa" ,1,0,0,0,0,0,0,0,NULL,&atx[68],NULL,0,&atx[113]} ,
- {0, "ncbi2na" ,128,2,0,0,0,0,0,0,NULL,&atx[113],NULL,0,&atx[115]} ,
- {448, "NCBI2na" ,1,0,0,0,0,0,0,0,NULL,&atx[114],NULL,0,&atx[116]} ,
+ {0, "align-group" ,128,8,0,1,0,0,0,0,NULL,&atx[16],NULL,0,&atx[46]} ,
+ {0, "comment" ,128,9,0,1,0,0,0,0,NULL,&atx[2],NULL,0,&atx[47]} ,
+ {0, "reftype" ,128,10,0,0,1,0,0,0,&avnx[10],&atx[16],&avnx[6],0,NULL} ,
+ {0, "user" ,128,4,0,0,0,0,0,0,NULL,&atx[49],NULL,0,&atx[50]} ,
+ {416, "User-object" ,1,0,0,0,0,0,1,0,NULL,NULL,NULL,0,&atx[32]} ,
+ {0, "create-date" ,128,5,0,0,0,0,0,0,NULL,&atx[51],NULL,0,&atx[52]} ,
+ {412, "Date" ,1,0,0,0,0,0,1,0,NULL,NULL,NULL,0,&atx[119]} ,
+ {0, "update-date" ,128,6,0,0,0,0,0,0,NULL,&atx[51],NULL,0,&atx[53]} ,
+ {0, "src" ,128,7,0,0,0,0,0,0,NULL,&atx[54],NULL,0,&atx[55]} ,
+ {423, "Seq-id" ,1,0,0,0,0,0,1,0,NULL,NULL,NULL,0,&atx[62]} ,
+ {0, "align" ,128,8,0,0,0,0,0,0,NULL,&atx[56],NULL,0,&atx[61]} ,
+ {459, "Align-def" ,1,0,0,0,0,0,0,0,NULL,&atx[20],&atx[57],0,NULL} ,
+ {0, "align-type" ,128,0,0,0,0,0,0,0,NULL,&atx[16],&avnx[11],0,&atx[58]} ,
+ {0, "ids" ,128,1,0,1,0,0,0,0,NULL,&atx[60],&atx[59],0,NULL} ,
+ {0, NULL,1,-1,0,0,0,0,0,0,NULL,&atx[54],NULL,0,NULL} ,
+ {314, "SET OF" ,0,17,0,0,0,0,0,0,NULL,NULL,NULL,0,NULL} ,
+ {0, "region" ,128,9,0,0,0,0,0,0,NULL,&atx[62],NULL,0,NULL} ,
+ {424, "Seq-loc" ,1,0,0,0,0,0,1,0,NULL,NULL,NULL,0,&atx[88]} ,
+ {402, "Bioseq" ,1,0,0,0,0,1,0,0,NULL,&atx[20],&atx[64],0,&atx[71]} ,
+ {0, "id" ,128,0,0,0,0,0,0,0,NULL,&atx[60],&atx[65],0,&atx[66]} ,
+ {0, NULL,1,-1,0,0,0,0,0,0,NULL,&atx[54],NULL,0,NULL} ,
+ {0, "descr" ,128,1,0,1,0,0,0,0,NULL,&atx[67],NULL,0,&atx[113]} ,
+ {408, "Seq-descr" ,1,0,0,0,0,1,0,0,NULL,&atx[60],&atx[68],0,&atx[167]} ,
+ {0, NULL,1,-1,0,0,0,0,0,0,NULL,&atx[69],NULL,0,NULL} ,
+ {411, "Seqdesc" ,1,0,0,0,0,1,0,0,NULL,&atx[39],&atx[70],0,&atx[51]} ,
+ {0, "mol-type" ,128,0,0,0,0,0,0,0,NULL,&atx[71],NULL,0,&atx[72]} ,
+ {403, "GIBB-mol" ,1,0,0,0,0,1,0,0,NULL,&atx[30],&avnx[15],0,&atx[104]} ,
+ {0, "modif" ,128,1,0,0,0,0,0,0,NULL,&atx[60],&atx[73],0,&atx[75]} ,
+ {0, NULL,1,-1,0,0,0,0,0,0,NULL,&atx[74],NULL,0,NULL} ,
+ {432, "GIBB-mod" ,1,0,0,0,0,0,0,0,NULL,&atx[30],&avnx[27],0,&atx[76]} ,
+ {0, "method" ,128,2,0,0,0,0,0,0,NULL,&atx[76],NULL,0,&atx[77]} ,
+ {433, "GIBB-method" ,1,0,0,0,0,0,0,0,NULL,&atx[30],&avnx[55],0,&atx[108]} ,
+ {0, "name" ,128,3,0,0,0,0,0,0,NULL,&atx[2],NULL,0,&atx[78]} ,
+ {0, "title" ,128,4,0,0,0,0,0,0,NULL,&atx[2],NULL,0,&atx[79]} ,
+ {0, "org" ,128,5,0,0,0,0,0,0,NULL,&atx[80],NULL,0,&atx[81]} ,
+ {421, "Org-ref" ,1,0,0,0,0,0,1,0,NULL,NULL,NULL,0,&atx[106]} ,
+ {0, "comment" ,128,6,0,0,0,0,0,0,NULL,&atx[2],NULL,0,&atx[82]} ,
+ {0, "num" ,128,7,0,0,0,0,0,0,NULL,&atx[12],NULL,0,&atx[83]} ,
+ {0, "maploc" ,128,8,0,0,0,0,0,0,NULL,&atx[84],NULL,0,&atx[85]} ,
+ {414, "Dbtag" ,1,0,0,0,0,0,1,0,NULL,NULL,NULL,0,&atx[186]} ,
+ {0, "pir" ,128,9,0,0,0,0,0,0,NULL,&atx[86],NULL,0,&atx[87]} ,
+ {426, "PIR-block" ,1,0,0,0,0,0,1,0,NULL,NULL,NULL,0,&atx[96]} ,
+ {0, "genbank" ,128,10,0,0,0,0,0,0,NULL,&atx[88],NULL,0,&atx[89]} ,
+ {425, "GB-block" ,1,0,0,0,0,0,1,0,NULL,NULL,NULL,0,&atx[86]} ,
+ {0, "pub" ,128,11,0,0,0,0,0,0,NULL,&atx[6],NULL,0,&atx[90]} ,
+ {0, "region" ,128,12,0,0,0,0,0,0,NULL,&atx[2],NULL,0,&atx[91]} ,
+ {0, "user" ,128,13,0,0,0,0,0,0,NULL,&atx[49],NULL,0,&atx[92]} ,
+ {0, "sp" ,128,14,0,0,0,0,0,0,NULL,&atx[93],NULL,0,&atx[94]} ,
+ {428, "SP-block" ,1,0,0,0,0,0,1,0,NULL,NULL,NULL,0,&atx[100]} ,
+ {0, "dbxref" ,128,15,0,0,0,0,0,0,NULL,&atx[84],NULL,0,&atx[95]} ,
+ {0, "embl" ,128,16,0,0,0,0,0,0,NULL,&atx[96],NULL,0,&atx[97]} ,
+ {427, "EMBL-block" ,1,0,0,0,0,0,1,0,NULL,NULL,NULL,0,&atx[93]} ,
+ {0, "create-date" ,128,17,0,0,0,0,0,0,NULL,&atx[51],NULL,0,&atx[98]} ,
+ {0, "update-date" ,128,18,0,0,0,0,0,0,NULL,&atx[51],NULL,0,&atx[99]} ,
+ {0, "prf" ,128,19,0,0,0,0,0,0,NULL,&atx[100],NULL,0,&atx[101]} ,
+ {429, "PRF-block" ,1,0,0,0,0,0,1,0,NULL,NULL,NULL,0,&atx[102]} ,
+ {0, "pdb" ,128,20,0,0,0,0,0,0,NULL,&atx[102],NULL,0,&atx[103]} ,
+ {430, "PDB-block" ,1,0,0,0,0,0,1,0,NULL,NULL,NULL,0,&atx[114]} ,
+ {0, "het" ,128,21,0,0,0,0,0,0,NULL,&atx[104],NULL,0,&atx[105]} ,
+ {404, "Heterogen" ,1,0,0,0,0,1,0,0,NULL,&atx[2],NULL,0,&atx[12]} ,
+ {0, "source" ,128,22,0,0,0,0,0,0,NULL,&atx[106],NULL,0,&atx[107]} ,
+ {422, "BioSource" ,1,0,0,0,0,0,1,0,NULL,NULL,NULL,0,&atx[54]} ,
+ {0, "molinfo" ,128,23,0,0,0,0,0,0,NULL,&atx[108],NULL,0,NULL} ,
+ {434, "MolInfo" ,1,0,0,0,0,0,0,0,NULL,&atx[20],&atx[109],0,&atx[14]} ,
+ {0, "biomol" ,128,0,0,0,1,0,0,0,&avnx[77],&atx[16],&avnx[62],0,&atx[110]} ,
+ {0, "tech" ,128,1,0,0,1,0,0,0,&avnx[100],&atx[16],&avnx[78],0,&atx[111]} ,
+ {0, "techexp" ,128,2,0,1,0,0,0,0,NULL,&atx[2],NULL,0,&atx[112]} ,
+ {0, "completeness" ,128,3,0,0,1,0,0,0,&avnx[110],&atx[16],&avnx[101],0,NULL} ,
+ {0, "inst" ,128,2,0,0,0,0,0,0,NULL,&atx[114],NULL,0,&atx[179]} ,
+ {431, "Seq-inst" ,1,0,0,0,0,0,0,0,NULL,&atx[20],&atx[115],0,&atx[74]} ,
+ {0, "repr" ,128,0,0,0,0,0,0,0,NULL,&atx[30],&avnx[111],0,&atx[116]} ,
+ {0, "mol" ,128,1,0,0,0,0,0,0,NULL,&atx[30],&avnx[121],0,&atx[117]} ,
+ {0, "length" ,128,2,0,1,0,0,0,0,NULL,&atx[16],NULL,0,&atx[118]} ,
+ {0, "fuzz" ,128,3,0,1,0,0,0,0,NULL,&atx[119],NULL,0,&atx[120]} ,
+ {413, "Int-fuzz" ,1,0,0,0,0,0,1,0,NULL,NULL,NULL,0,&atx[84]} ,
+ {0, "topology" ,128,4,0,0,1,0,0,0,&avnx[132],&atx[30],&avnx[127],0,&atx[121]} ,
+ {0, "strand" ,128,5,0,1,0,0,0,0,NULL,&atx[30],&avnx[133],0,&atx[122]} ,
+ {0, "seq-data" ,128,6,0,1,0,0,0,0,NULL,&atx[123],NULL,0,&atx[145]} ,
+ {439, "Seq-data" ,1,0,0,0,0,0,0,0,NULL,&atx[39],&atx[124],0,&atx[146]} ,
+ {0, "iupacna" ,128,0,0,0,0,0,0,0,NULL,&atx[125],NULL,0,&atx[126]} ,
+ {447, "IUPACna" ,1,0,0,0,0,0,0,0,NULL,&atx[44],NULL,0,&atx[127]} ,
+ {0, "iupacaa" ,128,1,0,0,0,0,0,0,NULL,&atx[127],NULL,0,&atx[128]} ,
+ {448, "IUPACaa" ,1,0,0,0,0,0,0,0,NULL,&atx[44],NULL,0,&atx[129]} ,
+ {0, "ncbi2na" ,128,2,0,0,0,0,0,0,NULL,&atx[129],NULL,0,&atx[131]} ,
+ {449, "NCBI2na" ,1,0,0,0,0,0,0,0,NULL,&atx[130],NULL,0,&atx[132]} ,
{304, "OCTET STRING" ,0,4,0,0,0,0,0,0,NULL,NULL,NULL,0,NULL} ,
- {0, "ncbi4na" ,128,3,0,0,0,0,0,0,NULL,&atx[116],NULL,0,&atx[117]} ,
- {449, "NCBI4na" ,1,0,0,0,0,0,0,0,NULL,&atx[114],NULL,0,&atx[118]} ,
- {0, "ncbi8na" ,128,4,0,0,0,0,0,0,NULL,&atx[118],NULL,0,&atx[119]} ,
- {450, "NCBI8na" ,1,0,0,0,0,0,0,0,NULL,&atx[114],NULL,0,&atx[120]} ,
- {0, "ncbipna" ,128,5,0,0,0,0,0,0,NULL,&atx[120],NULL,0,&atx[121]} ,
- {451, "NCBIpna" ,1,0,0,0,0,0,0,0,NULL,&atx[114],NULL,0,&atx[122]} ,
- {0, "ncbi8aa" ,128,6,0,0,0,0,0,0,NULL,&atx[122],NULL,0,&atx[123]} ,
- {452, "NCBI8aa" ,1,0,0,0,0,0,0,0,NULL,&atx[114],NULL,0,&atx[124]} ,
- {0, "ncbieaa" ,128,7,0,0,0,0,0,0,NULL,&atx[124],NULL,0,&atx[125]} ,
- {453, "NCBIeaa" ,1,0,0,0,0,0,0,0,NULL,&atx[68],NULL,0,&atx[126]} ,
- {0, "ncbipaa" ,128,8,0,0,0,0,0,0,NULL,&atx[126],NULL,0,&atx[127]} ,
- {454, "NCBIpaa" ,1,0,0,0,0,0,0,0,NULL,&atx[114],NULL,0,&atx[128]} ,
- {0, "ncbistdaa" ,128,9,0,0,0,0,0,0,NULL,&atx[128],NULL,0,NULL} ,
- {455, "NCBIstdaa" ,1,0,0,0,0,0,0,0,NULL,&atx[114],NULL,0,&atx[169]} ,
- {0, "ext" ,128,7,0,1,0,0,0,0,NULL,&atx[130],NULL,0,&atx[151]} ,
- {439, "Seq-ext" ,1,0,0,0,0,0,0,0,NULL,&atx[50],&atx[131],0,&atx[132]} ,
- {0, "seg" ,128,0,0,0,0,0,0,0,NULL,&atx[132],NULL,0,&atx[135]} ,
- {440, "Seg-ext" ,1,0,0,0,0,0,0,0,NULL,&atx[38],&atx[133],0,&atx[136]} ,
- {0, NULL,1,-1,0,0,0,0,0,0,NULL,&atx[134],NULL,0,NULL} ,
- {423, "Seq-loc" ,1,0,0,0,0,0,1,0,NULL,NULL,NULL,0,&atx[56]} ,
- {0, "ref" ,128,1,0,0,0,0,0,0,NULL,&atx[136],NULL,0,&atx[137]} ,
- {441, "Ref-ext" ,1,0,0,0,0,0,0,0,NULL,&atx[134],NULL,0,&atx[138]} ,
- {0, "map" ,128,2,0,0,0,0,0,0,NULL,&atx[138],NULL,0,&atx[141]} ,
- {442, "Map-ext" ,1,0,0,0,0,0,0,0,NULL,&atx[38],&atx[139],0,&atx[142]} ,
- {0, NULL,1,-1,0,0,0,0,0,0,NULL,&atx[140],NULL,0,NULL} ,
- {417, "Seq-feat" ,1,0,0,0,0,0,1,0,NULL,NULL,NULL,0,&atx[201]} ,
- {0, "delta" ,128,3,0,0,0,0,0,0,NULL,&atx[142],NULL,0,NULL} ,
- {443, "Delta-ext" ,1,0,0,0,0,0,0,0,NULL,&atx[38],&atx[143],0,&atx[144]} ,
- {0, NULL,1,-1,0,0,0,0,0,0,NULL,&atx[144],NULL,0,NULL} ,
- {444, "Delta-seq" ,1,0,0,0,0,0,0,0,NULL,&atx[50],&atx[145],0,&atx[156]} ,
- {0, "loc" ,128,0,0,0,0,0,0,0,NULL,&atx[134],NULL,0,&atx[146]} ,
- {0, "literal" ,128,1,0,0,0,0,0,0,NULL,&atx[147],NULL,0,NULL} ,
- {410, "Seq-literal" ,1,0,0,0,0,1,0,0,NULL,&atx[32],&atx[148],0,&atx[81]} ,
- {0, "length" ,128,0,0,0,0,0,0,0,NULL,&atx[28],NULL,0,&atx[149]} ,
- {0, "fuzz" ,128,1,0,1,0,0,0,0,NULL,&atx[103],NULL,0,&atx[150]} ,
- {0, "seq-data" ,128,2,0,1,0,0,0,0,NULL,&atx[107],NULL,0,NULL} ,
- {0, "hist" ,128,8,0,1,0,0,0,0,NULL,&atx[152],NULL,0,NULL} ,
- {408, "Seq-hist" ,1,0,0,0,0,1,0,0,NULL,&atx[32],&atx[153],0,&atx[10]} ,
- {0, "assembly" ,128,0,0,1,0,0,0,0,NULL,&atx[4],&atx[154],0,&atx[155]} ,
- {0, NULL,1,-1,0,0,0,0,0,0,NULL,&atx[43],NULL,0,NULL} ,
- {0, "replaces" ,128,1,0,1,0,0,0,0,NULL,&atx[156],NULL,0,&atx[160]} ,
- {445, "Seq-hist-rec" ,1,0,0,0,0,0,0,0,NULL,&atx[32],&atx[157],0,&atx[109]} ,
- {0, "date" ,128,0,0,1,0,0,0,0,NULL,&atx[81],NULL,0,&atx[158]} ,
- {0, "ids" ,128,1,0,0,0,0,0,0,NULL,&atx[4],&atx[159],0,NULL} ,
- {0, NULL,1,-1,0,0,0,0,0,0,NULL,&atx[3],NULL,0,NULL} ,
- {0, "replaced-by" ,128,2,0,1,0,0,0,0,NULL,&atx[156],NULL,0,&atx[161]} ,
- {0, "deleted" ,128,3,0,1,0,0,0,0,NULL,&atx[50],&atx[162],0,NULL} ,
- {0, "bool" ,128,0,0,0,0,0,0,0,NULL,&atx[30],NULL,0,&atx[163]} ,
- {0, "date" ,128,1,0,0,0,0,0,0,NULL,&atx[81],NULL,0,NULL} ,
- {0, "annot" ,128,3,0,1,0,0,0,0,NULL,&atx[4],&atx[165],0,NULL} ,
- {0, NULL,1,-1,0,0,0,0,0,0,NULL,&atx[166],NULL,0,NULL} ,
- {402, "Seq-annot" ,1,0,0,0,0,1,0,0,NULL,&atx[32],&atx[167],0,&atx[58]} ,
- {0, "id" ,128,0,0,1,0,0,0,0,NULL,&atx[4],&atx[168],0,&atx[174]} ,
- {0, NULL,1,-1,0,0,0,0,0,0,NULL,&atx[169],NULL,0,NULL} ,
- {456, "Annot-id" ,1,0,0,0,0,0,0,0,NULL,&atx[50],&atx[170],0,&atx[177]} ,
- {0, "local" ,128,0,0,0,0,0,0,0,NULL,&atx[171],NULL,0,&atx[172]} ,
- {414, "Object-id" ,1,0,0,0,0,0,1,0,NULL,NULL,NULL,0,&atx[74]} ,
- {0, "ncbi" ,128,1,0,0,0,0,0,0,NULL,&atx[28],NULL,0,&atx[173]} ,
- {0, "general" ,128,2,0,0,0,0,0,0,NULL,&atx[52],NULL,0,NULL} ,
- {0, "db" ,128,1,0,1,0,0,0,0,NULL,&atx[28],&avnx[134],0,&atx[175]} ,
- {0, "name" ,128,2,0,1,0,0,0,0,NULL,&atx[18],NULL,0,&atx[176]} ,
- {0, "desc" ,128,3,0,1,0,0,0,0,NULL,&atx[177],NULL,0,&atx[194]} ,
- {457, "Annot-descr" ,1,0,0,0,0,0,0,0,NULL,&atx[4],&atx[178],0,&atx[179]} ,
- {0, NULL,1,-1,0,0,0,0,0,0,NULL,&atx[179],NULL,0,NULL} ,
- {458, "Annotdesc" ,1,0,0,0,0,0,0,0,NULL,&atx[50],&atx[180],0,&atx[189]} ,
- {0, "name" ,128,0,0,0,0,0,0,0,NULL,&atx[18],NULL,0,&atx[181]} ,
- {0, "title" ,128,1,0,0,0,0,0,0,NULL,&atx[18],NULL,0,&atx[182]} ,
- {0, "comment" ,128,2,0,0,0,0,0,0,NULL,&atx[18],NULL,0,&atx[183]} ,
- {0, "pub" ,128,3,0,0,0,0,0,0,NULL,&atx[58],NULL,0,&atx[184]} ,
- {0, "user" ,128,4,0,0,0,0,0,0,NULL,&atx[74],NULL,0,&atx[185]} ,
- {0, "create-date" ,128,5,0,0,0,0,0,0,NULL,&atx[81],NULL,0,&atx[186]} ,
- {0, "update-date" ,128,6,0,0,0,0,0,0,NULL,&atx[81],NULL,0,&atx[187]} ,
- {0, "src" ,128,7,0,0,0,0,0,0,NULL,&atx[3],NULL,0,&atx[188]} ,
- {0, "align" ,128,8,0,0,0,0,0,0,NULL,&atx[189],NULL,0,&atx[193]} ,
- {459, "Align-def" ,1,0,0,0,0,0,0,0,NULL,&atx[32],&atx[190],0,NULL} ,
- {0, "align-type" ,128,0,0,0,0,0,0,0,NULL,&atx[28],&avnx[142],0,&atx[191]} ,
- {0, "ids" ,128,1,0,1,0,0,0,0,NULL,&atx[4],&atx[192],0,NULL} ,
- {0, NULL,1,-1,0,0,0,0,0,0,NULL,&atx[3],NULL,0,NULL} ,
- {0, "region" ,128,9,0,0,0,0,0,0,NULL,&atx[134],NULL,0,NULL} ,
- {0, "data" ,128,4,0,0,0,0,0,0,NULL,&atx[50],&atx[195],0,NULL} ,
- {0, "ftable" ,128,0,0,0,0,0,0,0,NULL,&atx[4],&atx[196],0,&atx[197]} ,
- {0, NULL,1,-1,0,0,0,0,0,0,NULL,&atx[140],NULL,0,NULL} ,
- {0, "align" ,128,1,0,0,0,0,0,0,NULL,&atx[4],&atx[198],0,&atx[199]} ,
- {0, NULL,1,-1,0,0,0,0,0,0,NULL,&atx[43],NULL,0,NULL} ,
- {0, "graph" ,128,2,0,0,0,0,0,0,NULL,&atx[4],&atx[200],0,&atx[202]} ,
+ {0, "ncbi4na" ,128,3,0,0,0,0,0,0,NULL,&atx[132],NULL,0,&atx[133]} ,
+ {450, "NCBI4na" ,1,0,0,0,0,0,0,0,NULL,&atx[130],NULL,0,&atx[134]} ,
+ {0, "ncbi8na" ,128,4,0,0,0,0,0,0,NULL,&atx[134],NULL,0,&atx[135]} ,
+ {451, "NCBI8na" ,1,0,0,0,0,0,0,0,NULL,&atx[130],NULL,0,&atx[136]} ,
+ {0, "ncbipna" ,128,5,0,0,0,0,0,0,NULL,&atx[136],NULL,0,&atx[137]} ,
+ {452, "NCBIpna" ,1,0,0,0,0,0,0,0,NULL,&atx[130],NULL,0,&atx[138]} ,
+ {0, "ncbi8aa" ,128,6,0,0,0,0,0,0,NULL,&atx[138],NULL,0,&atx[139]} ,
+ {453, "NCBI8aa" ,1,0,0,0,0,0,0,0,NULL,&atx[130],NULL,0,&atx[140]} ,
+ {0, "ncbieaa" ,128,7,0,0,0,0,0,0,NULL,&atx[140],NULL,0,&atx[141]} ,
+ {454, "NCBIeaa" ,1,0,0,0,0,0,0,0,NULL,&atx[44],NULL,0,&atx[142]} ,
+ {0, "ncbipaa" ,128,8,0,0,0,0,0,0,NULL,&atx[142],NULL,0,&atx[143]} ,
+ {455, "NCBIpaa" ,1,0,0,0,0,0,0,0,NULL,&atx[130],NULL,0,&atx[144]} ,
+ {0, "ncbistdaa" ,128,9,0,0,0,0,0,0,NULL,&atx[144],NULL,0,NULL} ,
+ {456, "NCBIstdaa" ,1,0,0,0,0,0,0,0,NULL,&atx[130],NULL,0,&atx[184]} ,
+ {0, "ext" ,128,7,0,1,0,0,0,0,NULL,&atx[146],NULL,0,&atx[166]} ,
+ {440, "Seq-ext" ,1,0,0,0,0,0,0,0,NULL,&atx[39],&atx[147],0,&atx[148]} ,
+ {0, "seg" ,128,0,0,0,0,0,0,0,NULL,&atx[148],NULL,0,&atx[150]} ,
+ {441, "Seg-ext" ,1,0,0,0,0,0,0,0,NULL,&atx[26],&atx[149],0,&atx[151]} ,
+ {0, NULL,1,-1,0,0,0,0,0,0,NULL,&atx[62],NULL,0,NULL} ,
+ {0, "ref" ,128,1,0,0,0,0,0,0,NULL,&atx[151],NULL,0,&atx[152]} ,
+ {442, "Ref-ext" ,1,0,0,0,0,0,0,0,NULL,&atx[62],NULL,0,&atx[153]} ,
+ {0, "map" ,128,2,0,0,0,0,0,0,NULL,&atx[153],NULL,0,&atx[156]} ,
+ {443, "Map-ext" ,1,0,0,0,0,0,0,0,NULL,&atx[26],&atx[154],0,&atx[157]} ,
+ {0, NULL,1,-1,0,0,0,0,0,0,NULL,&atx[155],NULL,0,NULL} ,
+ {418, "Seq-feat" ,1,0,0,0,0,0,1,0,NULL,NULL,NULL,0,&atx[201]} ,
+ {0, "delta" ,128,3,0,0,0,0,0,0,NULL,&atx[157],NULL,0,NULL} ,
+ {444, "Delta-ext" ,1,0,0,0,0,0,0,0,NULL,&atx[26],&atx[158],0,&atx[159]} ,
+ {0, NULL,1,-1,0,0,0,0,0,0,NULL,&atx[159],NULL,0,NULL} ,
+ {445, "Delta-seq" ,1,0,0,0,0,0,0,0,NULL,&atx[39],&atx[160],0,&atx[171]} ,
+ {0, "loc" ,128,0,0,0,0,0,0,0,NULL,&atx[62],NULL,0,&atx[161]} ,
+ {0, "literal" ,128,1,0,0,0,0,0,0,NULL,&atx[162],NULL,0,NULL} ,
+ {410, "Seq-literal" ,1,0,0,0,0,1,0,0,NULL,&atx[20],&atx[163],0,&atx[69]} ,
+ {0, "length" ,128,0,0,0,0,0,0,0,NULL,&atx[16],NULL,0,&atx[164]} ,
+ {0, "fuzz" ,128,1,0,1,0,0,0,0,NULL,&atx[119],NULL,0,&atx[165]} ,
+ {0, "seq-data" ,128,2,0,1,0,0,0,0,NULL,&atx[123],NULL,0,NULL} ,
+ {0, "hist" ,128,8,0,1,0,0,0,0,NULL,&atx[167],NULL,0,NULL} ,
+ {409, "Seq-hist" ,1,0,0,0,0,1,0,0,NULL,&atx[20],&atx[168],0,&atx[162]} ,
+ {0, "assembly" ,128,0,0,1,0,0,0,0,NULL,&atx[60],&atx[169],0,&atx[170]} ,
+ {0, NULL,1,-1,0,0,0,0,0,0,NULL,&atx[32],NULL,0,NULL} ,
+ {0, "replaces" ,128,1,0,1,0,0,0,0,NULL,&atx[171],NULL,0,&atx[175]} ,
+ {446, "Seq-hist-rec" ,1,0,0,0,0,0,0,0,NULL,&atx[20],&atx[172],0,&atx[125]} ,
+ {0, "date" ,128,0,0,1,0,0,0,0,NULL,&atx[51],NULL,0,&atx[173]} ,
+ {0, "ids" ,128,1,0,0,0,0,0,0,NULL,&atx[60],&atx[174],0,NULL} ,
+ {0, NULL,1,-1,0,0,0,0,0,0,NULL,&atx[54],NULL,0,NULL} ,
+ {0, "replaced-by" ,128,2,0,1,0,0,0,0,NULL,&atx[171],NULL,0,&atx[176]} ,
+ {0, "deleted" ,128,3,0,1,0,0,0,0,NULL,&atx[39],&atx[177],0,NULL} ,
+ {0, "bool" ,128,0,0,0,0,0,0,0,NULL,&atx[18],NULL,0,&atx[178]} ,
+ {0, "date" ,128,1,0,0,0,0,0,0,NULL,&atx[51],NULL,0,NULL} ,
+ {0, "annot" ,128,3,0,1,0,0,0,0,NULL,&atx[60],&atx[180],0,NULL} ,
+ {0, NULL,1,-1,0,0,0,0,0,0,NULL,&atx[181],NULL,0,NULL} ,
+ {407, "Seq-annot" ,1,0,0,0,0,1,0,0,NULL,&atx[20],&atx[182],0,&atx[67]} ,
+ {0, "id" ,128,0,0,1,0,0,0,0,NULL,&atx[60],&atx[183],0,&atx[189]} ,
+ {0, NULL,1,-1,0,0,0,0,0,0,NULL,&atx[184],NULL,0,NULL} ,
+ {457, "Annot-id" ,1,0,0,0,0,0,0,0,NULL,&atx[39],&atx[185],0,&atx[192]} ,
+ {0, "local" ,128,0,0,0,0,0,0,0,NULL,&atx[186],NULL,0,&atx[187]} ,
+ {415, "Object-id" ,1,0,0,0,0,0,1,0,NULL,NULL,NULL,0,&atx[49]} ,
+ {0, "ncbi" ,128,1,0,0,0,0,0,0,NULL,&atx[16],NULL,0,&atx[188]} ,
+ {0, "general" ,128,2,0,0,0,0,0,0,NULL,&atx[84],NULL,0,NULL} ,
+ {0, "db" ,128,1,0,1,0,0,0,0,NULL,&atx[16],&avnx[138],0,&atx[190]} ,
+ {0, "name" ,128,2,0,1,0,0,0,0,NULL,&atx[2],NULL,0,&atx[191]} ,
+ {0, "desc" ,128,3,0,1,0,0,0,0,NULL,&atx[192],NULL,0,&atx[194]} ,
+ {458, "Annot-descr" ,1,0,0,0,0,0,0,0,NULL,&atx[60],&atx[193],0,&atx[56]} ,
+ {0, NULL,1,-1,0,0,0,0,0,0,NULL,&atx[0],NULL,0,NULL} ,
+ {0, "data" ,128,4,0,0,0,0,0,0,NULL,&atx[39],&atx[195],0,NULL} ,
+ {0, "ftable" ,128,0,0,0,0,0,0,0,NULL,&atx[60],&atx[196],0,&atx[197]} ,
+ {0, NULL,1,-1,0,0,0,0,0,0,NULL,&atx[155],NULL,0,NULL} ,
+ {0, "align" ,128,1,0,0,0,0,0,0,NULL,&atx[60],&atx[198],0,&atx[199]} ,
+ {0, NULL,1,-1,0,0,0,0,0,0,NULL,&atx[32],NULL,0,NULL} ,
+ {0, "graph" ,128,2,0,0,0,0,0,0,NULL,&atx[60],&atx[200],0,&atx[202]} ,
{0, NULL,1,-1,0,0,0,0,0,0,NULL,&atx[201],NULL,0,NULL} ,
- {418, "Seq-graph" ,1,0,0,0,0,0,1,0,NULL,NULL,NULL,0,&atx[60]} ,
- {0, "ids" ,128,3,0,0,0,0,0,0,NULL,&atx[4],&atx[203],0,&atx[204]} ,
- {0, NULL,1,-1,0,0,0,0,0,0,NULL,&atx[3],NULL,0,NULL} ,
- {0, "locs" ,128,4,0,0,0,0,0,0,NULL,&atx[4],&atx[205],0,NULL} ,
- {0, NULL,1,-1,0,0,0,0,0,0,NULL,&atx[134],NULL,0,NULL} };
+ {419, "Seq-graph" ,1,0,0,0,0,0,1,0,NULL,NULL,NULL,0,&atx[8]} ,
+ {0, "ids" ,128,3,0,0,0,0,0,0,NULL,&atx[60],&atx[203],0,&atx[204]} ,
+ {0, NULL,1,-1,0,0,0,0,0,0,NULL,&atx[54],NULL,0,NULL} ,
+ {0, "locs" ,128,4,0,0,0,0,0,0,NULL,&atx[60],&atx[205],0,NULL} ,
+ {0, NULL,1,-1,0,0,0,0,0,0,NULL,&atx[62],NULL,0,NULL} };
static AsnModule ampx[1] = {
- { "NCBI-Sequence" , "asnseq.h67",&atx[0],NULL,NULL,0,0} };
+ { "NCBI-Sequence" , "asnseq.h68",&atx[0],NULL,NULL,0,0} };
static AsnValxNodePtr avn = avnx;
static AsnTypePtr at = atx;
@@ -381,20 +381,55 @@ static AsnModulePtr amp = ampx;
*
**************************************************/
-#define BIOSEQ &at[0]
-#define BIOSEQ_id &at[1]
-#define BIOSEQ_id_E &at[2]
-#define BIOSEQ_descr &at[5]
-#define BIOSEQ_inst &at[97]
-#define BIOSEQ_annot &at[164]
-#define BIOSEQ_annot_E &at[165]
-
-#define SEQ_ANNOT &at[166]
-#define SEQ_ANNOT_id &at[167]
-#define SEQ_ANNOT_id_E &at[168]
-#define SEQ_ANNOT_db &at[174]
-#define SEQ_ANNOT_name &at[175]
-#define SEQ_ANNOT_desc &at[176]
+#define ANNOTDESC &at[0]
+#define ANNOTDESC_name &at[1]
+#define ANNOTDESC_title &at[3]
+#define ANNOTDESC_comment &at[4]
+#define ANNOTDESC_pub &at[5]
+#define ANNOTDESC_user &at[48]
+#define ANNOTDESC_create_date &at[50]
+#define ANNOTDESC_update_date &at[52]
+#define ANNOTDESC_src &at[53]
+#define ANNOTDESC_align &at[55]
+#define ANNOTDESC_region &at[61]
+
+#define BIOSEQ &at[63]
+#define BIOSEQ_id &at[64]
+#define BIOSEQ_id_E &at[65]
+#define BIOSEQ_descr &at[66]
+#define BIOSEQ_inst &at[113]
+#define BIOSEQ_annot &at[179]
+#define BIOSEQ_annot_E &at[180]
+
+#define GIBB_MOL &at[71]
+
+#define HETEROGEN &at[104]
+
+#define NUMBERING &at[12]
+#define NUMBERING_cont &at[13]
+#define NUMBERING_enum &at[21]
+#define NUMBERING_ref &at[27]
+#define NUMBERING_real &at[33]
+
+#define PUBDESC &at[6]
+#define PUBDESC_pub &at[7]
+#define PUBDESC_name &at[9]
+#define PUBDESC_fig &at[10]
+#define PUBDESC_num &at[11]
+#define PUBDESC_numexc &at[40]
+#define PUBDESC_poly_a &at[41]
+#define PUBDESC_maploc &at[42]
+#define PUBDESC_seq_raw &at[43]
+#define PUBDESC_align_group &at[45]
+#define PUBDESC_comment &at[46]
+#define PUBDESC_reftype &at[47]
+
+#define SEQ_ANNOT &at[181]
+#define SEQ_ANNOT_id &at[182]
+#define SEQ_ANNOT_id_E &at[183]
+#define SEQ_ANNOT_db &at[189]
+#define SEQ_ANNOT_name &at[190]
+#define SEQ_ANNOT_desc &at[191]
#define SEQ_ANNOT_data &at[194]
#define SEQ_ANNOT_data_ftable &at[195]
#define SEQ_ANNOT_data_ftable_E &at[196]
@@ -407,192 +442,157 @@ static AsnModulePtr amp = ampx;
#define SEQ_ANNOT_data_locs &at[204]
#define SEQ_ANNOT_data_locs_E &at[205]
-#define PUBDESC &at[58]
-#define PUBDESC_pub &at[59]
-#define PUBDESC_name &at[61]
-#define PUBDESC_fig &at[62]
-#define PUBDESC_num &at[63]
-#define PUBDESC_numexc &at[64]
-#define PUBDESC_poly_a &at[65]
-#define PUBDESC_maploc &at[66]
-#define PUBDESC_seq_raw &at[67]
-#define PUBDESC_align_group &at[69]
-#define PUBDESC_comment &at[70]
-#define PUBDESC_reftype &at[71]
-
-#define SEQ_DESCR &at[6]
-#define SEQ_DESCR_E &at[7]
-
-#define SEQDESC &at[8]
-#define SEQDESC_mol_type &at[9]
-#define SEQDESC_modif &at[12]
-#define SEQDESC_modif_E &at[13]
-#define SEQDESC_method &at[15]
-#define SEQDESC_name &at[17]
-#define SEQDESC_title &at[19]
-#define SEQDESC_org &at[20]
-#define SEQDESC_comment &at[22]
-#define SEQDESC_num &at[23]
-#define SEQDESC_maploc &at[51]
-#define SEQDESC_pir &at[53]
-#define SEQDESC_genbank &at[55]
-#define SEQDESC_pub &at[57]
-#define SEQDESC_region &at[72]
-#define SEQDESC_user &at[73]
-#define SEQDESC_sp &at[75]
-#define SEQDESC_dbxref &at[77]
-#define SEQDESC_embl &at[78]
-#define SEQDESC_create_date &at[80]
-#define SEQDESC_update_date &at[82]
-#define SEQDESC_prf &at[83]
-#define SEQDESC_pdb &at[85]
-#define SEQDESC_het &at[87]
-#define SEQDESC_source &at[89]
-#define SEQDESC_molinfo &at[91]
-
-#define NUMBERING &at[24]
-#define NUMBERING_cont &at[25]
-#define NUMBERING_enum &at[33]
-#define NUMBERING_ref &at[39]
-#define NUMBERING_real &at[44]
-
-#define HETEROGEN &at[88]
-
-#define SEQ_HIST &at[152]
-#define SEQ_HIST_assembly &at[153]
-#define SEQ_HIST_assembly_E &at[154]
-#define SEQ_HIST_replaces &at[155]
-#define SEQ_HIST_replaced_by &at[160]
-#define SEQ_HIST_deleted &at[161]
-#define SEQ_HIST_deleted_bool &at[162]
-#define SEQ_HIST_deleted_date &at[163]
-
-#define GIBB_MOL &at[10]
-
-#define SEQ_LITERAL &at[147]
-#define SEQ_LITERAL_length &at[148]
-#define SEQ_LITERAL_fuzz &at[149]
-#define SEQ_LITERAL_seq_data &at[150]
-
-#define SEQ_INST &at[98]
-#define SEQ_INST_repr &at[99]
-#define SEQ_INST_mol &at[100]
-#define SEQ_INST_length &at[101]
-#define SEQ_INST_fuzz &at[102]
-#define SEQ_INST_topology &at[104]
-#define SEQ_INST_strand &at[105]
-#define SEQ_INST_seq_data &at[106]
-#define SEQ_INST_ext &at[129]
-#define SEQ_INST_hist &at[151]
-
-#define GIBB_MOD &at[14]
-
-#define GIBB_METHOD &at[16]
-
-#define MOLINFO &at[92]
-#define MOLINFO_biomol &at[93]
-#define MOLINFO_tech &at[94]
-#define MOLINFO_techexp &at[95]
-#define MOLINFO_completeness &at[96]
-
-#define NUM_CONT &at[26]
-#define NUM_CONT_refnum &at[27]
-#define NUM_CONT_has_zero &at[29]
-#define NUM_CONT_ascending &at[31]
-
-#define NUM_ENUM &at[34]
-#define NUM_ENUM_num &at[35]
-#define NUM_ENUM_names &at[36]
-#define NUM_ENUM_names_E &at[37]
-
-#define NUM_REF &at[40]
-#define NUM_REF_type &at[41]
-#define NUM_REF_aligns &at[42]
-
-#define NUM_REAL &at[45]
-#define NUM_REAL_a &at[46]
-#define NUM_REAL_b &at[48]
-#define NUM_REAL_units &at[49]
-
-#define SEQ_DATA &at[107]
-#define SEQ_DATA_iupacna &at[108]
-#define SEQ_DATA_iupacaa &at[110]
-#define SEQ_DATA_ncbi2na &at[112]
-#define SEQ_DATA_ncbi4na &at[115]
-#define SEQ_DATA_ncbi8na &at[117]
-#define SEQ_DATA_ncbipna &at[119]
-#define SEQ_DATA_ncbi8aa &at[121]
-#define SEQ_DATA_ncbieaa &at[123]
-#define SEQ_DATA_ncbipaa &at[125]
-#define SEQ_DATA_ncbistdaa &at[127]
-
-#define SEQ_EXT &at[130]
-#define SEQ_EXT_seg &at[131]
-#define SEQ_EXT_ref &at[135]
-#define SEQ_EXT_map &at[137]
-#define SEQ_EXT_delta &at[141]
-
-#define SEG_EXT &at[132]
-#define SEG_EXT_E &at[133]
-
-#define REF_EXT &at[136]
-
-#define MAP_EXT &at[138]
-#define MAP_EXT_E &at[139]
-
-#define DELTA_EXT &at[142]
-#define DELTA_EXT_E &at[143]
-
-#define DELTA_SEQ &at[144]
-#define DELTA_SEQ_loc &at[145]
-#define DELTA_SEQ_literal &at[146]
-
-#define SEQ_HIST_REC &at[156]
-#define SEQ_HIST_REC_date &at[157]
-#define SEQ_HIST_REC_ids &at[158]
-#define SEQ_HIST_REC_ids_E &at[159]
-
-#define IUPACNA &at[109]
-
-#define IUPACAA &at[111]
-
-#define NCBI2NA &at[113]
-
-#define NCBI4NA &at[116]
-
-#define NCBI8NA &at[118]
-
-#define NCBIPNA &at[120]
-
-#define NCBI8AA &at[122]
+#define SEQ_DESCR &at[67]
+#define SEQ_DESCR_E &at[68]
+
+#define SEQ_HIST &at[167]
+#define SEQ_HIST_assembly &at[168]
+#define SEQ_HIST_assembly_E &at[169]
+#define SEQ_HIST_replaces &at[170]
+#define SEQ_HIST_replaced_by &at[175]
+#define SEQ_HIST_deleted &at[176]
+#define SEQ_HIST_deleted_bool &at[177]
+#define SEQ_HIST_deleted_date &at[178]
+
+#define SEQ_LITERAL &at[162]
+#define SEQ_LITERAL_length &at[163]
+#define SEQ_LITERAL_fuzz &at[164]
+#define SEQ_LITERAL_seq_data &at[165]
+
+#define SEQDESC &at[69]
+#define SEQDESC_mol_type &at[70]
+#define SEQDESC_modif &at[72]
+#define SEQDESC_modif_E &at[73]
+#define SEQDESC_method &at[75]
+#define SEQDESC_name &at[77]
+#define SEQDESC_title &at[78]
+#define SEQDESC_org &at[79]
+#define SEQDESC_comment &at[81]
+#define SEQDESC_num &at[82]
+#define SEQDESC_maploc &at[83]
+#define SEQDESC_pir &at[85]
+#define SEQDESC_genbank &at[87]
+#define SEQDESC_pub &at[89]
+#define SEQDESC_region &at[90]
+#define SEQDESC_user &at[91]
+#define SEQDESC_sp &at[92]
+#define SEQDESC_dbxref &at[94]
+#define SEQDESC_embl &at[95]
+#define SEQDESC_create_date &at[97]
+#define SEQDESC_update_date &at[98]
+#define SEQDESC_prf &at[99]
+#define SEQDESC_pdb &at[101]
+#define SEQDESC_het &at[103]
+#define SEQDESC_source &at[105]
+#define SEQDESC_molinfo &at[107]
+
+#define SEQ_INST &at[114]
+#define SEQ_INST_repr &at[115]
+#define SEQ_INST_mol &at[116]
+#define SEQ_INST_length &at[117]
+#define SEQ_INST_fuzz &at[118]
+#define SEQ_INST_topology &at[120]
+#define SEQ_INST_strand &at[121]
+#define SEQ_INST_seq_data &at[122]
+#define SEQ_INST_ext &at[145]
+#define SEQ_INST_hist &at[166]
+
+#define GIBB_MOD &at[74]
+
+#define GIBB_METHOD &at[76]
+
+#define MOLINFO &at[108]
+#define MOLINFO_biomol &at[109]
+#define MOLINFO_tech &at[110]
+#define MOLINFO_techexp &at[111]
+#define MOLINFO_completeness &at[112]
+
+#define NUM_CONT &at[14]
+#define NUM_CONT_refnum &at[15]
+#define NUM_CONT_has_zero &at[17]
+#define NUM_CONT_ascending &at[19]
+
+#define NUM_ENUM &at[22]
+#define NUM_ENUM_num &at[23]
+#define NUM_ENUM_names &at[24]
+#define NUM_ENUM_names_E &at[25]
+
+#define NUM_REF &at[28]
+#define NUM_REF_type &at[29]
+#define NUM_REF_aligns &at[31]
+
+#define NUM_REAL &at[34]
+#define NUM_REAL_a &at[35]
+#define NUM_REAL_b &at[37]
+#define NUM_REAL_units &at[38]
+
+#define SEQ_DATA &at[123]
+#define SEQ_DATA_iupacna &at[124]
+#define SEQ_DATA_iupacaa &at[126]
+#define SEQ_DATA_ncbi2na &at[128]
+#define SEQ_DATA_ncbi4na &at[131]
+#define SEQ_DATA_ncbi8na &at[133]
+#define SEQ_DATA_ncbipna &at[135]
+#define SEQ_DATA_ncbi8aa &at[137]
+#define SEQ_DATA_ncbieaa &at[139]
+#define SEQ_DATA_ncbipaa &at[141]
+#define SEQ_DATA_ncbistdaa &at[143]
+
+#define SEQ_EXT &at[146]
+#define SEQ_EXT_seg &at[147]
+#define SEQ_EXT_ref &at[150]
+#define SEQ_EXT_map &at[152]
+#define SEQ_EXT_delta &at[156]
+
+#define SEG_EXT &at[148]
+#define SEG_EXT_E &at[149]
+
+#define REF_EXT &at[151]
+
+#define MAP_EXT &at[153]
+#define MAP_EXT_E &at[154]
+
+#define DELTA_EXT &at[157]
+#define DELTA_EXT_E &at[158]
+
+#define DELTA_SEQ &at[159]
+#define DELTA_SEQ_loc &at[160]
+#define DELTA_SEQ_literal &at[161]
+
+#define SEQ_HIST_REC &at[171]
+#define SEQ_HIST_REC_date &at[172]
+#define SEQ_HIST_REC_ids &at[173]
+#define SEQ_HIST_REC_ids_E &at[174]
+
+#define IUPACNA &at[125]
+
+#define IUPACAA &at[127]
+
+#define NCBI2NA &at[129]
+
+#define NCBI4NA &at[132]
+
+#define NCBI8NA &at[134]
+
+#define NCBIPNA &at[136]
+
+#define NCBI8AA &at[138]
+
+#define NCBIEAA &at[140]
+
+#define NCBIPAA &at[142]
+
+#define NCBISTDAA &at[144]
-#define NCBIEAA &at[124]
-
-#define NCBIPAA &at[126]
-
-#define NCBISTDAA &at[128]
-
-#define ANNOT_ID &at[169]
-#define ANNOT_ID_local &at[170]
-#define ANNOT_ID_ncbi &at[172]
-#define ANNOT_ID_general &at[173]
-
-#define ANNOT_DESCR &at[177]
-#define ANNOT_DESCR_E &at[178]
+#define ANNOT_ID &at[184]
+#define ANNOT_ID_local &at[185]
+#define ANNOT_ID_ncbi &at[187]
+#define ANNOT_ID_general &at[188]
-#define ANNOTDESC &at[179]
-#define ANNOTDESC_name &at[180]
-#define ANNOTDESC_title &at[181]
-#define ANNOTDESC_comment &at[182]
-#define ANNOTDESC_pub &at[183]
-#define ANNOTDESC_user &at[184]
-#define ANNOTDESC_create_date &at[185]
-#define ANNOTDESC_update_date &at[186]
-#define ANNOTDESC_src &at[187]
-#define ANNOTDESC_align &at[188]
-#define ANNOTDESC_region &at[193]
+#define ANNOT_DESCR &at[192]
+#define ANNOT_DESCR_E &at[193]
-#define ALIGN_DEF &at[189]
-#define ALIGN_DEF_align_type &at[190]
-#define ALIGN_DEF_ids &at[191]
-#define ALIGN_DEF_ids_E &at[192]
+#define ALIGN_DEF &at[56]
+#define ALIGN_DEF_align_type &at[57]
+#define ALIGN_DEF_ids &at[58]
+#define ALIGN_DEF_ids_E &at[59]
diff --git a/biostruc/cdd/cddserver.c b/biostruc/cdd/cddserver.c
index 2766e6de..aab6d293 100644
--- a/biostruc/cdd/cddserver.c
+++ b/biostruc/cdd/cddserver.c
@@ -1,4 +1,4 @@
-/* $Id: cddserver.c,v 1.44 2004/04/01 13:43:05 lavr Exp $
+/* $Id: cddserver.c,v 1.45 2004/05/10 20:55:57 bauer Exp $
*===========================================================================
*
* PUBLIC DOMAIN NOTICE
@@ -29,7 +29,7 @@
*
* Initial Version Creation Date: 2/10/2000
*
-* $Revision: 1.44 $
+* $Revision: 1.45 $
*
* File Description:
* CD WWW-Server, Cd summary pages and alignments directly from the
@@ -38,6 +38,9 @@
* Modifications:
* --------------------------------------------------------------------------
* $Log: cddserver.c,v $
+* Revision 1.45 2004/05/10 20:55:57 bauer
+* fixed problem with CAV
+*
* Revision 1.44 2004/04/01 13:43:05 lavr
* Spell "occurred", "occurrence", and "occurring"
*
@@ -1688,6 +1691,7 @@ static void CDDSrvInfoBlk(CddPtr pcdd, FILE *table, CharPtr dbversion,
fprintf(table,"</td>\n");
dtp = CddGetCreateDate(pcdd);
utp = CddGetUpdateDate(pcdd);
+ if (NULL == dtp && NULL!= utp) dtp = utp;
fprintf(table," <td align=\"RIGHT\" class=\"medium1\" NOWRAP><strong>Created:</strong></td>\n");
if (utp) {
fprintf(table," <td align=\"LEFT\" class=\"medium1\" NOWRAP>%2d-%3s-%4d</td>\n",(int)dtp->data[3],NCBI_months[dtp->data[2]-1],(int)dtp->data[1]+1900);
@@ -2122,19 +2126,22 @@ static Boolean CddUseThisMMDBid(ValNodePtr location, CddSumPtr pcds)
/* use Paul's function to output HTML or Text-formatted alignments */
/*---------------------------------------------------------------------------*/
/*---------------------------------------------------------------------------*/
-Boolean CddInvokeAlignView(NcbiMimeAsn1Ptr pvnNcbi, CharPtr CDDalign, Int2 iPDB,
+Boolean CddInvokeAlignView(NcbiMimeAsn1Ptr pvnNcbi, Int2 iPDB,
CharPtr QuerySeq, CharPtr QueryAlign, CharPtr dbversion,
CddPtr pcdd, Boolean bHasPdb, FloatHi tbit, Uint2 pwidth,
Int4 iQueryGi, CharPtr QueryName, Int4 iFeatNum, CddSumPtr pcds,
Int4 alen, Int4 nTaxIds, Int4Ptr iTaxids, ValNodePtr txids,
CdTreeNodePtr pcdtree)
{
- Uint4 size = 2 * FileLength(CDDalign);
+ Uint4 size;
Uint4 uCAVoptions = 0;
BytePtr buf;
+ Nlm_ByteStorePtr bsp = NULL;
+ AsnIoBSPtr aibp = NULL;
BiostrucAnnotSetPtr basp;
BiostrucFeatureSetPtr bfsp;
AsnIoMemPtr aimp;
+ AsnIoPtr aip; /* debugging */
CddDescrPtr pCddesc;
CharPtr cCurrDesc;
Char source[PATH_MAX];
@@ -2160,12 +2167,6 @@ Boolean CddInvokeAlignView(NcbiMimeAsn1Ptr pvnNcbi, CharPtr CDDalign, Int2 iPDB,
piSize = (Int4Ptr) GetAlignmentSize(salp);
size = (Uint4) (piSize[0] * (8000 + piSize[1] * 400));
}
- buf = MemNew(size);
- aimp = (AsnIoMemPtr) AsnIoMemOpen("wb",buf,size);
- if (NULL == aimp) return(FALSE);
- if (!NcbiMimeAsn1AsnWrite(pvnNcbi,aimp->aip,NULL)) return(FALSE);
- AsnIoFlush(aimp->aip);
- AsnIoMemClose(aimp);
if (CddHasAnnotation(pcdd)) {
aap = pcdd->alignannot;
@@ -2216,6 +2217,32 @@ Boolean CddInvokeAlignView(NcbiMimeAsn1Ptr pvnNcbi, CharPtr CDDalign, Int2 iPDB,
printf(" <td width=\"100%%\"> \n");
printf("<PRE>\n");
}
+ bsp = Nlm_BSNew(1024);
+ aibp = AsnIoBSOpen((char *)"wb",bsp);
+ NcbiMimeAsn1AsnWrite(pvnNcbi,aibp->aip,NULL);
+ AsnIoFlush(aibp->aip);
+ AsnIoBSClose(aibp); aibp = NULL;
+ size = Nlm_BSLen(bsp);
+
+ buf = (BytePtr)MemNew(size);
+ Nlm_BSSeek(bsp,0,0);
+ if (Nlm_BSRead(bsp,buf,size) != size) {
+ CddHtmlError("Error creating buffer for Alignment viewer!");
+ }
+ Nlm_BSFree(bsp); bsp = NULL;
+
+/* old code, with 'static' buf size
+ aimp = (AsnIoMemPtr) AsnIoMemOpen("wb",buf,size);
+ if (NULL == aimp) return(FALSE);
+ if (!NcbiMimeAsn1AsnWrite(pvnNcbi,aimp->aip,NULL)) return(FALSE);
+ AsnIoFlush(aimp->aip);
+ AsnIoMemClose(aimp);
+*/
+/* debugging code
+ aip = AsnIoOpen("CAV_debug_data","w");
+ NcbiMimeAsn1AsnWrite(pvnNcbi,aip,NULL);
+ AsnIoClose(aip);
+*/
CAV_DisplayMultiple(buf, uCAVoptions, pwidth, tbit, NULL, nFeatures, pafeat);
MemFree(buf);
if (iPDB != 2 && iPDB != 8) printf("</PRE>\n");
@@ -2746,7 +2773,7 @@ Int2 Main()
SeqAlignPtr salpCopy, salpFlat;
SeqAlignPtr salpQuery = NULL;
SeqAnnotPtr psaCAlignHead = NULL;
- SeqAnnotPtr sap;
+ SeqAnnotPtr sap, sapTemp = NULL;
SeqEntryPtr sep, sequences = NULL;
SeqEntryPtr sepQuery = NULL;
SeqIdPtr sip, sipNew, sipQuery, sipRet;
@@ -3051,7 +3078,7 @@ Int2 Main()
CddHtmlError("Error setting environment variable BLASTDB");
}
www_arg = WWWGetValueByIndex(www_info,indx);
- Qstatus = (Int2) QBlastGetResults(www_arg,&salpTemp,&bspQuery,&blast_program,&blast_database,&other_returns, &error_returns);
+ Qstatus = (Int2) QBlastGetResults(www_arg,&sapTemp,&bspQuery,&blast_program,&blast_database,&other_returns, &error_returns);
if (Qstatus != 0) CddHtmlError("Could not retrieve query sequence from BLAST queue!");
sipQuery = bspQuery->id;
oidp = ObjectIdNew();
@@ -3984,7 +4011,7 @@ Int2 Main()
/*---------------------------------------------------------------------------*/
if (iSeqStrMode == CDDSEQUONLY) {
CddFixSequenceFormat(pbsaSeq->sequences);
- if (!CddInvokeAlignView(pvnNcbi,CDDalign,iPDB,QuerySeq,QueryAlign,dbversion,
+ if (!CddInvokeAlignView(pvnNcbi,iPDB,QuerySeq,QueryAlign,dbversion,
pcdd,bHasPdb,tbit,pwidth,iQueryGi,QueryName,iFeatNum,
pcds,alen,nTaxIds,iTaxids,txids,pcdtree))
CddHtmlError("Could not display alignment");
diff --git a/biostruc/cdd/wrpsbcl3.c b/biostruc/cdd/wrpsbcl3.c
index c18f2036..34ab8a2b 100644
--- a/biostruc/cdd/wrpsbcl3.c
+++ b/biostruc/cdd/wrpsbcl3.c
@@ -1,4 +1,4 @@
-/* $Id: wrpsbcl3.c,v 1.38 2004/03/10 14:36:02 bauer Exp $
+/* $Id: wrpsbcl3.c,v 1.41 2004/06/09 13:37:47 bauer Exp $
*===========================================================================
*
* PUBLIC DOMAIN NOTICE
@@ -29,7 +29,7 @@
*
* Initial Version Creation Date: 4/19/2000
*
-* $Revision: 1.38 $
+* $Revision: 1.41 $
*
* File Description:
* WWW-RPS BLAST client
@@ -37,6 +37,15 @@
* Modifications:
* --------------------------------------------------------------------------
* $Log: wrpsbcl3.c,v $
+* Revision 1.41 2004/06/09 13:37:47 bauer
+* fixed typo in citation
+*
+* Revision 1.40 2004/05/24 17:33:51 bauer
+* fixed typo in citation
+*
+* Revision 1.39 2004/05/10 18:58:37 bauer
+* alignment sorting turned on for CDD v2.00
+*
* Revision 1.38 2004/03/10 14:36:02 bauer
* cosmetic change to no-hits page
*
@@ -628,17 +637,17 @@ static Boolean WRPSBDrawSearchPage()
databases[1] = CDDSearch2;
databases[2] = CDDSearch3;
databases[3] = CDDSearch4;
+ databases[4] = CDDSearch5;
datab_nam[0] = CDDSname1;
datab_nam[1] = CDDSname2;
datab_nam[2] = CDDSname3;
datab_nam[3] = CDDSname4;
+ datab_nam[4] = CDDSname5;
if (Nlm_StrCmp(CDDlocat,"inhouse")==0) {
- databases[4] = CDDSearch5;
databases[5] = CDDSearch6;
databases[6] = CDDSearch7;
- datab_nam[6] = CDDSname7;
- datab_nam[4] = CDDSname5;
datab_nam[5] = CDDSname6;
+ datab_nam[6] = CDDSname7;
}
@@ -733,7 +742,7 @@ static Boolean WRPSBDrawSearchPage()
printf("<b><a href=\"http://www.ncbi.nlm.nih.gov:80/entrez/query.fcgi?cmd=Retrieve&db=PubMed&list_uids=12520028&dopt=Abstract\">Citing CD-Search</a>:</b>\n");
printf(" Marchler-Bauer A, Anderson JB, DeWeese-Scott C, Fedorova ND, Geer LY, He S, Hurwitz DI, Jackson JD, Jacobs AR,\n");
printf(" Lanczycki CJ, Liebert CA, Liu C, Madej T, Marchler GH, Mazumder R, Nikolskaya AN, Panchenko AR, Rao BS, Shoemaker BA,\n");
- printf(" Simonyan V, Song JS, Thiessen RA, Vasudevan S, Wang Y, Yamashita RA, Yin JJ, and Bryant SH (2003), \n");
+ printf(" Simonyan V, Song JS, Thiessen PA, Vasudevan S, Wang Y, Yamashita RA, Yin JJ, and Bryant SH (2003), \n");
printf("\"<i>CDD: a curated Entrez database of conserved domain alignments</i>\",\n");
printf(" <b>Nucleic Acids Res. 31</b>:383-387.\n");
@@ -1358,7 +1367,7 @@ static void WRPSBCl3ViewSeqAlign(SeqAlignPtr seqalign, BioseqPtr query_bsp,
printf("<b><a href=\"http://www.ncbi.nlm.nih.gov:80/entrez/query.fcgi?cmd=Retrieve&db=PubMed&list_uids=12520028&dopt=Abstract\">Citing CD-Search</a>:</b>\n");
printf(" Marchler-Bauer A, Anderson JB, DeWeese-Scott C, Fedorova ND, Geer LY, He S, Hurwitz DI, Jackson JD, Jacobs AR,\n");
printf(" Lanczycki CJ, Liebert CA, Liu C, Madej T, Marchler GH, Mazumder R, Nikolskaya AN, Panchenko AR, Rao BS, Shoemaker BA,\n");
- printf(" Simonyan V, Song JS, Thiessen RA, Vasudevan S, Wang Y, Yamashita RA, Yin JJ, and Bryant SH (2003), \n");
+ printf(" Simonyan V, Song JS, Thiessen PA, Vasudevan S, Wang Y, Yamashita RA, Yin JJ, and Bryant SH (2003), \n");
printf("\"<i>CDD: a curated Entrez database of conserved domain alignments</i>\",\n");
printf(" <b>Nucleic Acids Res. 31</b>:383-387.\n");
}
@@ -1374,7 +1383,7 @@ void QRPSBWait(CharPtr rid, Int4 iGraphMode, Int4 iPairMode, Int4 HowLong, Nlm_F
{
CharPtr cTitle;
- cTitle = MemNew(sizeof(char) * 50);
+ cTitle = MemNew(sizeof(char) * 256);
sprintf(cTitle,"CD-Search request %s",rid);
WRPSBSearchHead(cTitle,"Pending Conserved Domain Search Request",FALSE, FALSE);
printf("<BR>\n");
@@ -1384,7 +1393,7 @@ void QRPSBWait(CharPtr rid, Int4 iGraphMode, Int4 iPairMode, Int4 HowLong, Nlm_F
HowLong,URLcgi,QRPSBNAME,rid,iGraphMode,iPairMode,expect,nhits,HowLong);
}
printf("<br>\n");
- printf("<h2>Waiting for BLAST-queue to finish</H2>\n",rid);
+ printf("<h2>Waiting for BLAST-queue to finish</H2>\n");
printf("<FORM ACTION=\"%s\" METHOD=POST" ">\n", QRPSBNAME);
printf("<INPUT TYPE=\"HIDDEN\" name=\"GRAPH\" value=\"%d\">\n",iGraphMode);
printf("<INPUT TYPE=\"HIDDEN\" name=\"PAIR\" value=\"%d\">\n",iPairMode);
diff --git a/biostruc/cdd/wrpsbtool.c b/biostruc/cdd/wrpsbtool.c
index afc2939a..2f92d1af 100644
--- a/biostruc/cdd/wrpsbtool.c
+++ b/biostruc/cdd/wrpsbtool.c
@@ -1,4 +1,4 @@
-/* $Id: wrpsbtool.c,v 1.23 2004/04/13 19:42:09 bauer Exp $
+/* $Id: wrpsbtool.c,v 1.25 2004/05/24 17:36:34 bauer Exp $
*===========================================================================
*
* PUBLIC DOMAIN NOTICE
@@ -29,7 +29,7 @@
*
* Initial Version Creation Date: 4/19/2000
*
-* $Revision: 1.23 $
+* $Revision: 1.25 $
*
* File Description:
* tools for WWW-RPS BLAST
@@ -37,6 +37,12 @@
* Modifications:
* --------------------------------------------------------------------------
* $Log: wrpsbtool.c,v $
+* Revision 1.25 2004/05/24 17:36:34 bauer
+* call WRPSBCl3SortAlignment
+*
+* Revision 1.24 2004/05/10 18:58:37 bauer
+* alignment sorting turned on for CDD v2.00
+*
* Revision 1.23 2004/04/13 19:42:09 bauer
* fix URL for Cn3D launching via cddsrv.cgi
*
@@ -1889,8 +1895,7 @@ AlignmentAbstractPtr WRPSBCl3AbstractAlignment(BlastPruneSapStructPtr prune,
}
/* if (Nlm_StrCmp(myargs[1].strvalue,"cdd_prop")==0) bDbIsOasis = FALSE; */
- sap = prune->sap;
-/* sap = WRPSBCl3SortAlignment(prune->sap); */
+ sap = WRPSBCl3SortAlignment(prune->sap);
while (sap) {
iCount++;
aapThis = (AlignmentAbstractPtr)MemNew(sizeof(AlignmentAbstract));
diff --git a/biostruc/mmdbapi1.c b/biostruc/mmdbapi1.c
index bd60d93d..3925ba0a 100644
--- a/biostruc/mmdbapi1.c
+++ b/biostruc/mmdbapi1.c
@@ -29,7 +29,7 @@
*
* Version Creation Date: 03/14/95
*
-* $Revision: 6.44 $
+* $Revision: 6.45 $
*
* File Description:
*
@@ -44,6 +44,9 @@
* 95/08/30 C. Hogue Minor changes.
*
* $Log: mmdbapi1.c,v $
+* Revision 6.45 2004/05/06 19:31:09 chenj
+* fixed the bug in fnPBSFtoPSA() to use 2 chars for domain id
+*
* Revision 6.44 2003/12/03 02:11:28 kans
* added defines missing from Mac OS 10.3 headers
*
@@ -3204,6 +3207,7 @@ SeqAnnotPtr LIBCALL fnPBSFtoPSA (BiostrucFeaturePtr pbsfSelected)
MemFree (pcPDB);
/* get the embedded PDB code of the hit */
+ if (iDomain < 10) {
pcPDB = StringSave (PDBNAME_DEFAULT);
iDomain = 0;
cChain = '-';
@@ -3214,6 +3218,20 @@ SeqAnnotPtr LIBCALL fnPBSFtoPSA (BiostrucFeaturePtr pbsfSelected)
pcPDB[3] = pbsfSelected->name[10];
cChain = pbsfSelected->name[11];
iDomain = atoi ((char *) &pbsfSelected->name[12]);
+ }
+ else { /* have at least 10 domains in 1 str., added by J. Chen */
+ pcPDB = StringSave (PDBNAME_DEFAULT);
+ iDomain = 0;
+ cChain = '-';
+
+ pcPDB[0] = pbsfSelected->name[8];
+ pcPDB[1] = pbsfSelected->name[9];
+ pcPDB[2] = pbsfSelected->name[10];
+ pcPDB[3] = pbsfSelected->name[11];
+ cChain = pbsfSelected->name[12];
+ iDomain = atoi ((char *) &pbsfSelected->name[13]);
+ }
+
/*slavesip = MakePDBSeqId2 (pcPDB, cChain, iDomain, TRUE); */
slavesip = MakePDBSeqId2 (pcPDB, cChain, iDomain, FALSE);
@@ -3340,7 +3358,7 @@ SeqAnnotPtr LIBCALL fnPBSFtoPSA (BiostrucFeaturePtr pbsfSelected)
count++;
if(salp->segs == NULL) salp->segs = (Pointer) ddp;
else {
- ddp_tmp = salp->segs;
+ ddp_tmp = (DenseDiagPtr) salp->segs;
while(ddp_tmp->next) ddp_tmp = ddp_tmp->next;
ddp_tmp->next = ddp;
}
@@ -3401,7 +3419,7 @@ SeqAnnotPtr LIBCALL BiostrToSeqAnnotSet (BiostrucAnnotSetPtr set,
while(pbsfs)
{
feature=pbsfs->features;
- strcpy(pcMaster, pbsfs->descr->data.ptrvalue);
+ strcpy(pcMaster, (char *)pbsfs->descr->data.ptrvalue);
/*if master name matched*/
if(strcmp(pcMaster, pdbname_master) == 0) /* I had this commented out */
{
@@ -4177,7 +4195,7 @@ printf("in BiostrucAddFeature \n");
/*psfsThis->pvnDescr = (ValNodePtr) pbsfsThis->descr; */ /* link stub for descr */
/*pbsfsThis->descr = NULL; */ /* detach and save from free-ing descr */
/* yanli comment the above two line out, instead do the following */
- psfsThis->pvnDescr = AsnIoMemCopy((ValNodePtr) pbsfsThis->descr, (AsnReadFunc)BiostrucFeatureSetDescrAsnRead, (AsnWriteFunc)BiostrucFeatureSetDescrAsnWrite);
+ psfsThis->pvnDescr = (ValNodePtr)AsnIoMemCopy((ValNodePtr) pbsfsThis->descr, (AsnReadFunc)BiostrucFeatureSetDescrAsnRead, (AsnWriteFunc)BiostrucFeatureSetDescrAsnWrite);
pvnThis = NULL;
pvnThis = ValNodeFindNext(psfsThis->pvnDescr, NULL, BiostrucFeatureSetDescr_name);
diff --git a/checkout.date b/checkout.date
index 474c7ae2..06df8f23 100644
--- a/checkout.date
+++ b/checkout.date
@@ -1 +1 @@
-Wed May 5 12:45:49 EDT 2004
+Wed Jun 16 15:18:34 EDT 2004
diff --git a/cn3d/README b/cn3d/README
index 9f74ab29..347e978a 100644
--- a/cn3d/README
+++ b/cn3d/README
@@ -1,3 +1,2 @@
-The code in this directory (cn3d) has been moved to the c++ build. Please do
-not check in any code into this directory -- any changes should be checked into
-the c++ tree. Questions to lewisg.
+The version of Cn3D in this directory is no longer maintained and should not be used. Cn3D has been completely rewritten in C++ using the NCBI C++ toolkit. For more information, see the documentation for the C++ toolkit and
+http://www.ncbi.nlm.nih.gov/Structure/CN3D/cn3dfaq.shtml#Cn3dsource
diff --git a/connect/ncbi_connection.c b/connect/ncbi_connection.c
index 629bf2c5..5ef03d7c 100644
--- a/connect/ncbi_connection.c
+++ b/connect/ncbi_connection.c
@@ -1,4 +1,4 @@
-/* $Id: ncbi_connection.c,v 6.39 2004/03/23 02:27:37 lavr Exp $
+/* $Id: ncbi_connection.c,v 6.42 2004/05/26 16:00:06 lavr Exp $
* ===========================================================================
*
* PUBLIC DOMAIN NOTICE
@@ -305,9 +305,9 @@ extern EIO_Status CONN_SetTimeout
}
break;
default:
+ status = eIO_InvalidArg;
CONN_LOG(eLOG_Error,
"[CONN_SetTimeout] Unknown event to set timeout for");
- status = eIO_InvalidArg;
assert(0);
break;
}
@@ -454,7 +454,8 @@ extern EIO_Status CONN_Write
if (!n_written)
return eIO_InvalidArg;
*n_written = 0;
-
+ if (size && !buf)
+ return eIO_InvalidArg;
CONN_NOT_NULL(Write);
if (conn->state == eCONN_Unusable)
@@ -614,6 +615,8 @@ extern EIO_Status CONN_Read
if (!n_read)
return eIO_InvalidArg;
*n_read = 0;
+ if (size && !buf)
+ return eIO_InvalidArg;
CONN_NOT_NULL(Read);
@@ -647,6 +650,74 @@ extern EIO_Status CONN_Read
}
+extern EIO_Status CONN_ReadLine
+(CONN conn,
+ char* line,
+ size_t size,
+ size_t* n_read
+ )
+{
+ EIO_Status status = eIO_Success;
+ char w[1024];
+ size_t len;
+
+ if (!n_read)
+ return eIO_InvalidArg;
+ *n_read = 0;
+ if (size && !line)
+ return eIO_InvalidArg;
+
+ CONN_NOT_NULL(ReadLine);
+
+ /* perform open, if not opened yet */
+ if (conn->state != eCONN_Open && (status = s_Open(conn)) != eIO_Success)
+ return status;
+ assert(conn->state == eCONN_Open && conn->meta.list != 0);
+
+ /* flush the unwritten output data (if any) */
+ if ( conn->meta.flush ) {
+ conn->meta.flush(conn->meta.c_flush,
+ conn->r_timeout == kDefaultTimeout ?
+ conn->meta.default_timeout : conn->r_timeout);
+ }
+
+ len = 0;
+ while (len < size) {
+ size_t i;
+ size_t x_read = 0;
+ size_t x_size = BUF_Size(conn->buf);
+ char* buf = size - len < sizeof(w) ? w : &line[len];
+ if (x_size == 0 || x_size > sizeof(w))
+ x_size = sizeof(w);
+ status = s_CONN_Read(conn, buf, x_size, &x_read, 0);
+ for (i = 0; i < x_read; i++) {
+ if (buf == w)
+ line[len] = buf[i];
+ if (buf[i] == '\n') {
+ line[len] = '\0';
+ i++;
+ break;
+ } else if (++len >= size) {
+ i++;
+ break;
+ }
+ }
+ if (i < x_read) {
+ if (!BUF_PushBack(&conn->buf, &buf[i], x_read - i))
+ status = eIO_Unknown;
+ break;
+ } else if (status != eIO_Success) {
+ if (len < size)
+ line[len] = '\0';
+ break;
+ }
+ }
+
+ *n_read = len;
+ return status;
+}
+
+
extern EIO_Status CONN_Status(CONN conn, EIO_Event dir)
{
CONN_NOT_NULL(Status);
@@ -783,6 +854,15 @@ extern EIO_Status CONN_WaitAsync
/*
* --------------------------------------------------------------------------
* $Log: ncbi_connection.c,v $
+ * Revision 6.42 2004/05/26 16:00:06 lavr
+ * Minor status fixes in CONN_SetTimeout() and CONN_ReadLine()
+ *
+ * Revision 6.41 2004/05/24 20:19:19 lavr
+ * Fix eIO_InvalidArg conditions (size and no buffer)
+ *
+ * Revision 6.40 2004/05/24 19:54:59 lavr
+ * +CONN_ReadLine()
+ *
* Revision 6.39 2004/03/23 02:27:37 lavr
* Code formatting
*
diff --git a/connect/ncbi_connection.h b/connect/ncbi_connection.h
index a3219d16..20a6ca94 100644
--- a/connect/ncbi_connection.h
+++ b/connect/ncbi_connection.h
@@ -1,7 +1,7 @@
#ifndef CONNECT___NCBI_CONNECTION__H
#define CONNECT___NCBI_CONNECTION__H
-/* $Id: ncbi_connection.h,v 6.17 2004/02/23 15:23:36 lavr Exp $
+/* $Id: ncbi_connection.h,v 6.20 2004/06/09 14:03:06 jcherry Exp $
* ===========================================================================
*
* PUBLIC DOMAIN NOTICE
@@ -198,7 +198,7 @@ extern NCBI_XCONNECT_EXPORT EIO_Status CONN_Flush
);
-/* Read up to "size" bytes from the connection to mem.buffer pointed by "buf".
+/* Read up to "size" bytes from a connection to the buffer to pointed by "buf".
* In "*n_read", return the number of successfully read bytes.
* If there is absolutely no data available to read and the timeout (see
* CONN_SetTimeout()) is expired then return eIO_Timeout (and "*n_read" := 0).
@@ -218,6 +218,27 @@ extern NCBI_XCONNECT_EXPORT EIO_Status CONN_Read
);
+/* Read up to "size" bytes from a connection into the string buffer pointed
+ * to by "line". Stop reading if either '\n' or an error is encountered.
+ * Replace '\n' with '\0'. Upon return "*n_read" contains the number
+ * of characters written to "line", not including the terminating '\0'.
+ * If not enough space provided in "line" to accomodate the '\0'-terminated
+ * line, then all "size" bytes are used and "*n_read" equals "size" on return.
+ * This is the only case when "line" will not be '\0'-terminated.
+ * Return code advises the caller whether another line read can be attempted:
+ * eIO_Success -- read completed successfully, keep reading;
+ * other code -- an error occurred, and further attempt may fail.
+ *
+ * This call utilizes eIO_Read timeout as set by CONN_SetTimeout().
+ */
+extern NCBI_XCONNECT_EXPORT EIO_Status CONN_ReadLine
+(CONN conn,
+ char* line,
+ size_t size,
+ size_t* n_read
+ );
+
+
/* Obtain status of the last IO operation. This is NOT a completion
* code of the last CONN-call, but rather a status from the lower level
* connector's layer.
@@ -247,8 +268,8 @@ extern NCBI_XCONNECT_EXPORT EIO_Status CONN_Close
*/
typedef enum {
eCONN_OnClose = 0
-#define CONN_N_CALLBACKS 1
} ECONN_Callback;
+#define CONN_N_CALLBACKS 1
typedef void (*FConnCallback)(CONN conn, ECONN_Callback type, void* data);
@@ -305,6 +326,15 @@ extern EIO_Status CONN_WaitAsync
/*
* ---------------------------------------------------------------------------
* $Log: ncbi_connection.h,v $
+ * Revision 6.20 2004/06/09 14:03:06 jcherry
+ * Moved #define out of enum body (SWIG was choking on this)
+ *
+ * Revision 6.19 2004/05/24 19:58:29 lavr
+ * +NCBI_XCONNECT_EXPORT for CONN_ReadLine()
+ *
+ * Revision 6.18 2004/05/24 19:53:30 lavr
+ * +CONN_ReadLine()
+ *
* Revision 6.17 2004/02/23 15:23:36 lavr
* New (last) parameter "how" added in CONN_Write() API call
*
diff --git a/connect/ncbi_service.c b/connect/ncbi_service.c
index bc37c15f..0c079732 100644
--- a/connect/ncbi_service.c
+++ b/connect/ncbi_service.c
@@ -1,4 +1,4 @@
-/* $Id: ncbi_service.c,v 6.51 2004/03/23 02:28:21 lavr Exp $
+/* $Id: ncbi_service.c,v 6.53 2004/06/14 16:37:09 lavr Exp $
* ===========================================================================
*
* PUBLIC DOMAIN NOTICE
@@ -92,6 +92,23 @@ SERV_ITER SERV_OpenSimple(const char* service)
static int/*bool*/ s_AddSkipInfo(SERV_ITER iter, SSERV_Info* info)
{
+ if (info->type == fSERV_Firewall) {
+ size_t n;
+ for (n = 0; n < iter->n_skip; n++) {
+ SSERV_Info* temp = iter->skip[n];
+ if (temp->type == fSERV_Firewall &&
+ temp->u.firewall.type == info->u.firewall.type) {
+ if (n < --iter->n_skip) {
+ memmove(iter->skip + n, iter->skip + n + 1,
+ sizeof(*iter->skip)*(iter->n_skip - n));
+ }
+ if (iter->last == temp)
+ iter->last = 0;
+ free(temp);
+ break;
+ }
+ }
+ }
if (iter->n_skip == iter->n_max_skip) {
SSERV_Info** temp;
size_t n = iter->n_max_skip + 10;
@@ -106,7 +123,6 @@ static int/*bool*/ s_AddSkipInfo(SERV_ITER iter, SSERV_Info* info)
iter->skip = temp;
iter->n_max_skip = n;
}
-
iter->skip[iter->n_skip++] = info;
return 1;
}
@@ -139,7 +155,6 @@ static SERV_ITER s_Open(const char* service, TSERV_Type type,
iter->external = external;
if (n_skip) {
- TNCBI_Time t = (TNCBI_Time) time(0);
size_t i;
for (i = 0; i < n_skip; i++) {
size_t skipinfolen = SERV_SizeOfInfo(skip[i]);
@@ -149,7 +164,7 @@ static SERV_ITER s_Open(const char* service, TSERV_Type type,
return 0;
}
memcpy(skipinfo, skip[i], skipinfolen);
- skipinfo->time = t + 3600/*hour*/*24/*day*/*365/*year :-) */;
+ skipinfo->time = (TNCBI_Time)(-1);
if (!s_AddSkipInfo(iter, skipinfo)) {
free(skipinfo);
SERV_Close(iter);
@@ -242,19 +257,20 @@ static void s_SkipSkip(SERV_ITER iter)
{
if (iter->n_skip) {
TNCBI_Time t = (TNCBI_Time) time(0);
- size_t i = 0;
-
- while (i < iter->n_skip) {
- SSERV_Info* info = iter->skip[i];
- if (info->time < t) {
- if (i < --iter->n_skip)
- memmove(iter->skip + i, iter->skip + i + 1,
- sizeof(*iter->skip)*(iter->n_skip - i));
- if (info == iter->last)
+ size_t n = 0;
+
+ while (n < iter->n_skip) {
+ SSERV_Info* temp = iter->skip[n];
+ if (temp->time < t) {
+ if (n < --iter->n_skip) {
+ memmove(iter->skip + n, iter->skip + n + 1,
+ sizeof(*iter->skip)*(iter->n_skip - n));
+ }
+ if (iter->last == temp)
iter->last = 0;
- free(info);
+ free(temp);
} else
- i++;
+ n++;
}
}
}
@@ -474,6 +490,12 @@ double SERV_Preference(double pref, double gap, unsigned int n)
/*
* --------------------------------------------------------------------------
* $Log: ncbi_service.c,v $
+ * Revision 6.53 2004/06/14 16:37:09 lavr
+ * Allow no more than one firewall server info in the skip list
+ *
+ * Revision 6.52 2004/05/17 18:19:43 lavr
+ * Mark skip infos with maximal time instead of calculating 1 year from now
+ *
* Revision 6.51 2004/03/23 02:28:21 lavr
* Limit service name resolution recursion by 8
*
diff --git a/connect/ncbi_service.h b/connect/ncbi_service.h
index 04c4fe36..c78f6a0e 100644
--- a/connect/ncbi_service.h
+++ b/connect/ncbi_service.h
@@ -1,7 +1,7 @@
#ifndef CONNECT___NCBI_SERVICE__H
#define CONNECT___NCBI_SERVICE__H
-/* $Id: ncbi_service.h,v 6.32 2004/01/30 14:37:33 lavr Exp $
+/* $Id: ncbi_service.h,v 6.33 2004/06/14 16:36:13 lavr Exp $
* ===========================================================================
*
* PUBLIC DOMAIN NOTICE
@@ -41,7 +41,7 @@
/* Revision 6.100 */
#define SERV_CLIENT_REVISION_MAJOR 6
-#define SERV_CLIENT_REVISION_MINOR 100
+#define SERV_CLIENT_REVISION_MINOR 101
/** @addtogroup ServiceSupport
@@ -199,6 +199,9 @@ extern NCBI_XCONNECT_EXPORT void DISP_SetMessageHook(FDISP_MessageHook);
/*
* --------------------------------------------------------------------------
* $Log: ncbi_service.h,v $
+ * Revision 6.33 2004/06/14 16:36:13 lavr
+ * Client minor version number incremented
+ *
* Revision 6.32 2004/01/30 14:37:33 lavr
* Client revision made independent of CVS revisions
*
diff --git a/connect/test/test_assert.h b/connect/test/test_assert.h
index 5d3314f3..926f0a7c 100644
--- a/connect/test/test_assert.h
+++ b/connect/test/test_assert.h
@@ -1,7 +1,7 @@
#ifndef TEST_ASSERT__H
#define TEST_ASSERT__H
-/* $Id: test_assert.h,v 6.21 2003/03/12 21:25:19 lavr Exp $
+/* $Id: test_assert.h,v 6.22 2004/06/10 19:20:27 ivanov Exp $
* ===========================================================================
*
* PUBLIC DOMAIN NOTICE
@@ -57,7 +57,7 @@
* and in debug libraries, as well as all General Protection Fault messages.
* Environment variable DIAG_SILENT_ABORT must be set to "Y" or "y".
*/
-static void _SuppressDiagPopupMessages(void)
+static int _SuppressDiagPopupMessages(void)
{
/* Check environment variable for silent abort app at error */
const char* value = getenv("DIAG_SILENT_ABORT");
@@ -76,6 +76,7 @@ static void _SuppressDiagPopupMessages(void)
_CrtSetReportFile(_CRT_ASSERT, stderr);
_CrtSetReportMode(_CRT_ASSERT, _CRTDBG_MODE_FILE);
}
+ return 0;
}
/* Put this function at startup init level 'V', far enough not to mess up with
@@ -83,7 +84,7 @@ static void _SuppressDiagPopupMessages(void)
*/
# pragma data_seg(".CRT$XIV")
-static void (*_SDPM)(void) = _SuppressDiagPopupMessages;
+static int (*_SDPM)(void) = _SuppressDiagPopupMessages;
# pragma data_seg()
@@ -129,6 +130,9 @@ static void (*_SDPM)(void) = _SuppressDiagPopupMessages;
/*
* --------------------------------------------------------------------------
* $Log: test_assert.h,v $
+ * Revision 6.22 2004/06/10 19:20:27 ivanov
+ * _SuppressDiagPopupMessages() returns 'int' to avoid runtime errors on MSVC7
+ *
* Revision 6.21 2003/03/12 21:25:19 lavr
* More elaborate conditional branch for Mac's Codewarrior
*
diff --git a/corelib/ncbifile.c b/corelib/ncbifile.c
index 45cac220..1b881b88 100644
--- a/corelib/ncbifile.c
+++ b/corelib/ncbifile.c
@@ -29,7 +29,7 @@
*
* Version Creation Date: 3/4/91
*
-* $Revision: 6.34 $
+* $Revision: 6.35 $
*
* File Description:
* portable file routines
@@ -43,6 +43,9 @@
* 11-27-94 Ostell moved includes to ncbiwin.h to avoid conflict MSC
*
* $Log: ncbifile.c,v $
+* Revision 6.35 2004/05/07 15:57:14 kans
+* added FileCache functions for buffered read, graceful handing of Unix, Mac, and DOS line endings
+*
* Revision 6.34 2004/01/23 20:07:16 kans
* fix to FileGets under Darwin, was losing last character if buffer was shorter than line being read
*
@@ -1362,3 +1365,237 @@ NLM_EXTERN Nlm_CharPtr LIBCALL Nlm_TmpNam (Nlm_CharPtr s)
#endif
}
+/* FileCache provides buffered text read for handling Unix, Mac, and DOS line endings gracefully */
+
+/* attach file pointer (text read mode expected) to cache object (usually on stack) */
+
+NLM_EXTERN Nlm_Boolean Nlm_FileCacheSetup (
+ Nlm_FileCache PNTR fcp,
+ FILE *fp
+)
+
+{
+ if (fp == NULL || fcp == NULL) return FALSE;
+
+ MemSet ((Nlm_VoidPtr) fcp, 0, sizeof (Nlm_FileCache));
+
+ fcp->fp = fp;
+ fcp->offset = ftell (fp);
+
+ return TRUE;
+}
+
+static void Nlm_FileCacheReadBlock (
+ Nlm_FileCache PNTR fcp
+)
+
+{
+ int total;
+
+ if (fcp == NULL || fcp->fp == NULL) return;
+
+ if (fcp->ctr >= fcp->total) {
+ fcp->offset += (Nlm_Int4) fcp->total;
+ fcp->ctr = 0;
+ fcp->total = 0;
+
+ fcp->buf [0] = '\0';
+ total = (int) Nlm_FileRead ((Nlm_VoidPtr) fcp->buf, sizeof (Nlm_Char), (size_t) 512, fcp->fp);
+ if (total < 0 || total > 512) {
+ total = 512;
+ }
+
+ fcp->buf [total] = '\0';
+ fcp->total = Nlm_StringLen (fcp->buf);
+ }
+}
+
+/* equivalent of getc */
+
+static Nlm_Char Nlm_FileCacheGetChar (
+ Nlm_FileCache PNTR fcp
+)
+
+{
+ Nlm_Char ch = '\0', nxt;
+
+ if (fcp == NULL || fcp->fp == NULL) return ch;
+
+ /* read a fresh block if buffer is empty */
+
+ if (fcp->ctr >= fcp->total) {
+ Nlm_FileCacheReadBlock (fcp);
+ }
+
+ /* get next Nlm_Character in buffer */
+
+ if (fcp->ctr < fcp->total) {
+ ch = fcp->buf [(int) fcp->ctr];
+ (fcp->ctr)++;
+ }
+
+ if (ch == '\n' || ch == '\r') {
+ if (fcp->ctr >= fcp->total) {
+ Nlm_FileCacheReadBlock (fcp);
+ }
+ if (fcp->ctr < fcp->total) {
+
+ /* look for carriage return / linefeed pair - DOS file read on Mac or Unix platform */
+
+ nxt = fcp->buf [(int) fcp->ctr];
+
+ /* advance past second Nlm_Character in cr/lf pair */
+
+ if (ch == '\n' && nxt == '\r') {
+ (fcp->ctr)++;
+ } else if (ch == '\r' && nxt == '\n') {
+ (fcp->ctr)++;
+ }
+ }
+
+ /* cr or lf returned as newline */
+
+ ch = '\n';
+ }
+
+ return ch;
+}
+
+/* equivalent of fgets, leaves /n at end of string */
+
+NLM_EXTERN Nlm_CharPtr Nlm_FileCacheGetString (
+ Nlm_FileCache PNTR fcp,
+ Nlm_CharPtr str,
+ size_t size
+)
+
+{
+ Nlm_Char ch;
+ Nlm_Int2 count;
+ Nlm_CharPtr ptr;
+
+ if (fcp == NULL || fcp->fp == NULL || str == NULL || size < 1) return NULL;
+
+ ch = Nlm_FileCacheGetChar (fcp);
+ count = 0;
+ ptr = str;
+
+ while (ch != '\0' && ch != '\n' && ch != '\r' && count < size - 2) {
+ *ptr = ch;
+ ptr++;
+ count++;
+ ch = Nlm_FileCacheGetChar (fcp);
+ }
+
+ if (ch == '\n' || ch == '\r') {
+ *ptr = '\n';
+ ptr++;
+ count++;
+ } else if (ch != '\0') {
+ *ptr = ch;
+ ptr++;
+ count++;
+ }
+ *ptr = '\0';
+
+ if (count < 1) return NULL;
+
+ return str;
+}
+
+/* smarter fgets removes newline from end of string */
+
+NLM_EXTERN Nlm_CharPtr Nlm_FileCacheReadLine (
+ Nlm_FileCache PNTR fcp,
+ Nlm_CharPtr str,
+ size_t size,
+ Nlm_BoolPtr nonewline
+)
+
+{
+ Nlm_Char ch;
+ Nlm_CharPtr ptr;
+ Nlm_CharPtr tmp;
+
+ if (fcp == NULL || fcp->fp == NULL || str == NULL || size < 1) return NULL;
+ *str = '\0';
+ tmp = Nlm_FileCacheGetString (fcp, str, size);
+ if (tmp != NULL) {
+ ptr = str;
+ ch = *ptr;
+ while (ch != '\0' && ch != '\n' && ch != '\r') {
+ ptr++;
+ ch = *ptr;
+ }
+ *ptr = '\0';
+ if (nonewline != NULL) {
+ if (ch != '\n' && ch != '\r') {
+ *nonewline = TRUE;
+ } else {
+ *nonewline = FALSE;
+ }
+ }
+ }
+ return tmp;
+}
+
+NLM_EXTERN void Nlm_FileCacheSeek (
+ Nlm_FileCache PNTR fcp,
+ Nlm_Int4 pos
+)
+
+{
+ if (fcp == NULL || fcp->fp == NULL) return;
+
+ if (fcp->offset <= pos && fcp->offset + (Nlm_Int4) fcp->total >= pos) {
+ fcp->ctr = (Nlm_Int2) (pos - fcp->offset);
+ return;
+ }
+
+ fcp->ctr = 0;
+ fcp->total = 0;
+ fcp->offset = pos;
+
+ fseek (fcp->fp, pos, SEEK_SET);
+}
+
+NLM_EXTERN Nlm_Int4 Nlm_FileCacheTell (
+ Nlm_FileCache PNTR fcp
+)
+
+{
+ Nlm_Int4 bytes;
+ Nlm_Int4 offset;
+
+ if (fcp == NULL || fcp->fp == NULL) return 0L;
+
+ offset = ftell (fcp->fp);
+ bytes = (Nlm_Int4) (fcp->total - fcp->ctr);
+ offset -= bytes;
+
+ return offset;
+}
+
+NLM_EXTERN Nlm_Boolean Nlm_FileCacheFree (
+ Nlm_FileCache PNTR fcp,
+ Nlm_Boolean restoreFilePos
+)
+
+{
+ Nlm_Int4 pos;
+
+ if (fcp == NULL || fcp->fp == NULL) return FALSE;
+
+ if (restoreFilePos) {
+
+ /* correct position of file pointer */
+
+ pos = Nlm_FileCacheTell (fcp);
+ fseek (fcp->fp, pos, SEEK_SET);
+ }
+
+ MemSet ((Nlm_VoidPtr) fcp, 0, sizeof (Nlm_FileCache));
+
+ return TRUE;
+}
+
diff --git a/corelib/ncbifile.h b/corelib/ncbifile.h
index e46c0a74..853e3e64 100644
--- a/corelib/ncbifile.h
+++ b/corelib/ncbifile.h
@@ -32,7 +32,7 @@
*
* Version Creation Date: 1/1/91
*
-* $Revision: 6.5 $
+* $Revision: 6.6 $
*
* File Description:
* prototypes for portable file routines
@@ -40,6 +40,9 @@
* Modifications:
* --------------------------------------------------------------------------
* $Log: ncbifile.h,v $
+* Revision 6.6 2004/05/07 15:57:14 kans
+* added FileCache functions for buffered read, graceful handing of Unix, Mac, and DOS line endings
+*
* Revision 6.5 2001/04/05 21:36:05 juran
* EjectCd and MountCd #defined to FALSE.
*
@@ -98,6 +101,25 @@ NLM_EXTERN ValNodePtr LIBCALL Nlm_DirCatalog (Nlm_CharPtr pathname);
NLM_EXTERN Nlm_CharPtr LIBCALL Nlm_TmpNam(Nlm_CharPtr s);
NLM_EXTERN void LIBCALL Nlm_SetFileOpenHook(Nlm_FileOpenHook hook);
+/* FileCache provides buffered text read for handling Unix, Mac, and DOS line endings gracefully */
+
+typedef struct nlm_filecachedata {
+ FILE *fp;
+ Nlm_Char buf [516];
+ Nlm_Int2 ctr;
+ Nlm_Int2 total;
+ Nlm_Int4 offset;
+} Nlm_FileCache, PNTR Nlm_FileCachePtr;
+
+NLM_EXTERN Nlm_Boolean Nlm_FileCacheSetup (Nlm_FileCache PNTR fcp, FILE *fp);
+NLM_EXTERN Nlm_CharPtr Nlm_FileCacheGetString (Nlm_FileCache PNTR fcp, Nlm_CharPtr str, size_t size);
+NLM_EXTERN Nlm_CharPtr Nlm_FileCacheReadLine (Nlm_FileCache PNTR fcp, Nlm_CharPtr str, size_t size, Nlm_BoolPtr nonewline);
+NLM_EXTERN void Nlm_FileCacheSeek (Nlm_FileCache PNTR fcp, Nlm_Int4 pos);
+NLM_EXTERN Nlm_Int4 Nlm_FileCacheTell (Nlm_FileCache PNTR fcp);
+NLM_EXTERN Nlm_Boolean Nlm_FileCacheFree (Nlm_FileCache PNTR fcp, Nlm_Boolean restoreFilePos);
+
+
+
#define FileOpen Nlm_FileOpen
#define FileClose Nlm_FileClose
#define FileRead Nlm_FileRead
@@ -115,6 +137,18 @@ NLM_EXTERN void LIBCALL Nlm_SetFileOpenHook(Nlm_FileOpenHook hook);
#define CreateDir Nlm_CreateDir
#define DirCatalog Nlm_DirCatalog
#define TmpNam Nlm_TmpNam
+
+#define FileCache Nlm_FileCache
+#define FileCacheSetup Nlm_FileCacheSetup
+#define FileCachePtr Nlm_FileCachePtr
+
+#define FileCacheSetup Nlm_FileCacheSetup
+#define FileCacheGetString Nlm_FileCacheGetString
+#define FileCacheReadLine Nlm_FileCacheReadLine
+#define FileCacheSeek Nlm_FileCacheSeek
+#define FileCacheTell Nlm_FileCacheTell
+#define FileCacheFree Nlm_FileCacheFree
+
#define EjectCd(sVolume, deviceName, rawDeviceName, mountPoint, mountCmd) FALSE
#define MountCd(sVolume, deviceName, mountPoint, mountCmd) FALSE
diff --git a/data/sequin.hlp b/data/sequin.hlp
index 3f5be114..a1522b91 100644
--- a/data/sequin.hlp
+++ b/data/sequin.hlp
@@ -4,8 +4,9 @@
<!-- if you use the following meta tags, uncomment them.
- <META NAME="keywords" CONTENT="Sequin">
- <META NAME="description" CONTENT="Sequin is a stand-alone software tool developed by the NCBI for submitting and updating entries to the GenBank, EMBL, or DDBJ sequence databases. "> -->
+ <meta name="author" content="sequindoc">
+ <META NAME="keywords" CONTENT="national center for biotechnology information, ncbi, national library of medicine, nlm, national institutes of health, nih, database, archive, bookshelf, pubmed, pubmed central, bioinformatics, biomedicine, sequence submission, sequin, bankit, submitting sequences">
+ <META NAME="description" CONTENT="Sequin is a stand-alone software tool developed by the National Center for Biotechnology Information (NCBI) for submitting and updating entries to the GenBank, EMBL, or DDBJ sequence databases. "> -->
<link rel="stylesheet" href="ncbi_sequin.css">
@@ -371,9 +372,14 @@ Annotation Database
.
#In order to be released into the TPA database, the sequence must appear in a
-peer-reviewed publication in a biological journal. You will be asked later in
-the submission process to provide the GenBank Accession number(s) of the
-primary sequence(s) from which your TPA submission was derived.
+peer-reviewed publication in a biological journal. If you select this
+option, a pop-up box will appear upon the completion of the Sequence Format
+form. You must provide some description of the biological experiments used
+as evidence for the annotation of your TPA submission in this box.
+
+#You will be asked later in the submission process to provide the GenBank
+Accession number(s) of the primary sequence(s) from which your TPA
+submission was derived.
>Organism and Sequences Form
@@ -546,12 +552,31 @@ Source Modifiers form
</A>
which follows the Organism and Sequences Form.
-#If you are submitting a set of aligned sequences and one of those
-sequences is already present in the GenBank/EMBL/DDBJ database, you must
-mark that sequence so that it does not receive a new Accession number.
-Instead of supplying that sequence with a new Sequence Identifier, give
-it the identifier accU12345, where U12345 is the Accession number of the
-sequence.
+#If you are submitting a set of aligned sequences, you can specify sequence
+characters used in your alignment on this page. Sequin requires that you
+define any non-IUPAC nucleotide characters in your alignment file. The
+five types of variable characters are listed under Sequence Characters.
+
+#Every sequence within an alignment file must contain the same number of
+characters (nucleotides + gaps). Gap characters are used to represent the
+spaces between contiguous nucleotides in an alignment. Gaps that appear at
+the beginning or end of a sequence are treated differently than gaps that
+appear between nucleotides and each must be defined. GenBank prefers to
+use a hyphen (-) to represent gaps. If you use a different character to
+represent a gap, you will need to add this character to the list in the
+Beginning Gap, Middle Gap, or End Gap boxes.
+
+#Ambiguous characters represent nucleotides that are known to exist, but
+whose identity has not been experimentally validated. GenBank prefers to
+use 'n' to represent any ambiguous nucleotides. If you are using a
+different character to represent an ambiguous base, you will need to add
+this character to the list in the Ambiguous/Unknown box. Sequin will
+convert these characters to 'n's when your file is imported.
+
+#Match characters denote nucleotides that are identical in every member of
+an alignment. GenBank prefers the use of a colon (:) to represent match
+characters. If you are using a different character to represent a match
+character, you will need to add this character to the list in the Match box.
**Molecule
@@ -787,15 +812,18 @@ ovarian cancer susceptibility protein (BRCA1) mRNA, complete cds.
**FASTA+GAP Format for Aligned Nucleotide Sequences
#A number of programs output sets of aligned sequences in FASTA format.
-Frequently, to align these sequences, gaps must be inserted. In
-FASTA+GAP format, gaps can be indicated by a "-". Do not use the ? character
-to represent ambiguous bases within sequences in the alignment because Sequin
-removes non-IUPAC characters when it imports sequences. Each sequence,
-including gaps, must be the same length. The gaps will only show up in the
-alignment, not in the individual sequence in the database.
+Frequently, to align these sequences, gaps must be inserted. Specify
+relevant gap and ambiguous characters in the appropriate box on the
+
+<A HREF="#NucleotidePage">
+Nucleotide Page
+</A>
+
+form. Each sequence, including gaps, must be the same length. The gaps
+will only show up in the alignment, not in the individual sequence in the
+database.
-#Sequences in FASTA+GAP format resemble FASTA sequences. The previous section
-on
+#Sequences in FASTA+GAP format resemble FASTA sequences. The previous section on
<A HREF="#FASTAFormatforNucleotideSequences">
FASTA Format for Nucleotide Sequences
@@ -854,13 +882,13 @@ Sequence IDs, followed by the sequences. Specifically, the sequence
identifier for the first sequence is A-0V-1-A. Note that subsequent
blocks of sequence do not contain the Sequence ID.
-#Do not use the ? character to represent
-ambiguous bases within sequences in the alignment because Sequin
-removes non-IUPAC characters when it imports sequences. Ambiguous bases should
-be indicated as IUPAC characters such as N. PHYLIP files should contain - rather
-than ? to indicate "missing" at the 5' and 3' ends of sequences.
+#Specify relevant gap and ambiguous characters in the appropriate box on the
+<A HREF="#NucleotidePage">
+Nucleotide Page
+</A>
+form.
-#You can modify this format so that Sequin can
+#You can modify the PHYLIP format so that Sequin can
determine the correct organism and any other modifiers for each
sequence. An example of such modifications are below in the section on
<A HREF="#SourceModifiersforPHYLIPandNEXUS">
@@ -912,12 +940,17 @@ the sequence alignment. The following five lines contain the Sequence IDs,
followed by the sequences. Specifically, the sequence identifier for the first
sequence is A-0V-1-A. Note that subsequent blocks of sequence also contain the
Sequence ID. Also, Sequin will replace the "?" characters in the sequences
-with "N"s since they are defined as "missing" data in the header. However, if
-the 'missing' parameter is not included, or wrongly defined in the header,
-then the "?" characters are stripped from the data. This is a common cause of
-data corruption since the stripping of these characters effectively results in
-the loss of data.
+with "N"s since they are defined as "missing" data in the header. You
+should specify relevant gap and ambiguous characters in the appropriate box
+on the
+
+<A HREF="#NucleotidePage">
+Nucleotide Page
+
+</A>
+
+form.
#The following is an example of NEXUS Contiguous format.
!#NEXUS
@@ -2868,17 +2901,6 @@ sequence will appear.
#-Submission
-**Scope
-
-Please select one option using the radio buttons:
-
-#-Refers to the entire sequence
-
-#-Refers to part of the sequence
-
-#-Cites a feature on the sequence: Please do not select this option without
-providing the nucleotide spans of the feature to which the publication refers.
-
#After you have filled out the Citation on Entry form, click on
"Proceed" to see the next form.
@@ -3138,6 +3160,32 @@ features should be updated. In cases where the new and old records contain
duplicate features, you may chose to retain the new and/or old feature or
merge the duplicated features into one.
+#The check boxes at the bottom of the form allow you to specify actions to
+be taken regarding coding regions and references when updating the
+sequence. Add Cit-subs for Updated Sequences is used by the database staff
+to append reference information regarding the updating of publicly
+available sequences. Please do not use this function. By default, Update
+Proteins for Updated Sequences is selected. Sequin will attempt to
+clean-up conceptual translations of annotated coding regions based on the
+updated nucleotide sequence. You can also select options which will
+truncate retranslated proteins at stops, extend retranslated proteins
+without stops or extend retranslated proteins without starts. The Correct
+CDS genes function adjusts the corresponding gene span based on the new
+coding region span. In any case, all annotated coding regions should be
+manually reviewed following a sequence update.
+
+*Extend Sequence
+
+#This selection functions similar to the
+
+<A HREF="#UpdateSequence">
+Update Sequence
+</A>
+
+function. However, you can extend the existing sequence in either the 5'
+or 3' direction in cases where there is no overlap between the existing and
+new sequences.
+
*Feature Propagate
#This selection allows you to propagate any annotated feature from
@@ -3161,7 +3209,10 @@ translation after the stop codon on the source entry by chosing to
translate the CDS after partial 3' boundary. If the CDS that you
are propagating to other records is partial on either end, you should
select the 'Cleanup CDS partials after propagation' check box. This
-will retain the partial nature of the CDS features on all records.
+will retain the partial nature of the CDS features on all records. The
+fuse adjacent propagated intervals function will create one feature from
+two of the same type that contain abutting nucleotide intervals due to the
+nature of the alignment used for propagation.
*Add Sequence
@@ -3184,7 +3235,8 @@ cannot edit the sequence in this way.
*Find FlatFile
#Under this command, you can find strings of letters in
-all fields of your submission.
+all fields of your submission. You can use the Find First and Find Next
+buttons to identify the specified text sequentially through the flatfile.
*Find by Gene
@@ -3476,7 +3528,7 @@ entitled
Descriptors,
</A>
above.
-#The Generate Defintion Line option will generate a title for your
+#The Generate Definition Line option will generate a title for your
sequence based on the information provided in the record. This option will
work
for single sequences as well as sets of sequences, and can handle complex
@@ -3966,7 +4018,7 @@ ALT="Table of Contents" ALIGN=top BORDER=2></A>
<P CLASS=medium1><B>Questions or Comments?</B>
<BR>Write to the <A HREF="mailto:info@ncbi.nlm.nih.gov">NCBI Service
Desk</A></P>
-<P CLASS=medium1>Revised January 30, 2004
+<P CLASS=medium1>Revised June 15, 2004
</CENTER>
diff --git a/demo/asn2fsa.c b/demo/asn2fsa.c
index a385ffb3..2cc68fdd 100644
--- a/demo/asn2fsa.c
+++ b/demo/asn2fsa.c
@@ -29,7 +29,7 @@
*
* Version Creation Date: 3/4/04
*
-* $Revision: 1.13 $
+* $Revision: 1.16 $
*
* File Description:
*
@@ -61,6 +61,10 @@
#include <accpubseq.h>
#endif
+#define ASN2FSA_APP_VER "1.0"
+
+CharPtr ASN2FSA_APPLICATION = ASN2FSA_APP_VER;
+
static ValNodePtr requested_uid_list = NULL;
static TNlmMutex requested_uid_mutex = NULL;
@@ -265,6 +269,7 @@ typedef struct fastaflags {
FILE *aa;
FILE *ql;
FILE *fr;
+ FILE *logfp;
} FastaFlagData, PNTR FastaFlagPtr;
static VoidPtr DoAsyncLookup (
@@ -405,6 +410,9 @@ static ValNodePtr DoLockFarComponents (
if (NlmThreadsAvailable () && ffp->useThreads) {
rsult = AsyncLockFarComponents (sep, ffp);
+ } else if (ffp->useThreads) {
+ Message (MSG_POST, "Threads not available in this executable");
+ rsult = LockFarComponents (sep);
} else {
rsult = LockFarComponents (sep);
}
@@ -673,13 +681,16 @@ static void ProcessMultipleRecord (
AsnIoPtr aip;
AsnModulePtr amp;
AsnTypePtr atp, atp_bss, atp_desc, atp_se;
+ BioseqPtr bsp;
ValNodePtr bsplist;
- Char cmmd [256], file [FILENAME_MAX], path [PATH_MAX];
+ Char buf [64], cmmd [256], file [FILENAME_MAX], path [PATH_MAX], longest [64];
Char path1 [PATH_MAX], path2 [PATH_MAX], path3 [PATH_MAX];
StreamFlgType flags = 0;
FILE *fp, *tfp;
+ Int4 numrecords = 0;
+ SeqEntryPtr fsep, sep;
ObjMgrPtr omp;
- SeqEntryPtr sep;
+ time_t starttime, stoptime, worsttime;
#ifdef OS_UNIX
CharPtr gzcatprog;
int ret;
@@ -796,10 +807,28 @@ static void ProcessMultipleRecord (
flags = STREAM_EXPAND_GAPS;
}
+ longest [0] = '\0';
+ worsttime = 0;
+
while ((atp = AsnReadId (aip, amp, atp)) != NULL) {
if (atp == atp_se) {
sep = SeqEntryAsnRead (aip, atp);
+ starttime = GetSecs ();
+ buf [0] = '\0';
+
+ if (ffp->logfp != NULL) {
+ fsep = FindNthBioseq (sep, 1);
+ if (fsep != NULL && fsep->choice == 1) {
+ bsp = (BioseqPtr) fsep->data.ptrvalue;
+ if (bsp != NULL) {
+ SeqIdWrite (bsp->id, buf, PRINTID_FASTA_LONG, sizeof (buf));
+ fprintf (ffp->logfp, "%s\n", buf);
+ fflush (ffp->logfp);
+ }
+ }
+ }
+
bsplist = NULL;
if (ffp->lock) {
bsplist = DoLockFarComponents (sep, ffp);
@@ -820,6 +849,13 @@ static void ProcessMultipleRecord (
bsplist = UnlockFarComponents (bsplist);
+ stoptime = GetSecs ();
+ if (stoptime - starttime > worsttime && StringDoesHaveText (buf)) {
+ worsttime = stoptime - starttime;
+ StringCpy (longest, buf);
+ }
+ numrecords++;
+
SeqEntryFree (sep);
omp = ObjMgrGet ();
ObjMgrReapOne (omp);
@@ -841,6 +877,13 @@ static void ProcessMultipleRecord (
FileClose (fp);
#endif
+ if (ffp->logfp != NULL && (! StringHasNoText (longest))) {
+ fprintf (ffp->logfp, "Longest processing time %ld seconds on %s\n",
+ (long) worsttime, longest);
+ fprintf (ffp->logfp, "Total number of records %ld\n", (long) numrecords);
+ fflush (ffp->logfp);
+ }
+
sprintf (cmmd, "rm %s; rm %s; rm %s", path1, path2, path3);
system (cmmd);
}
@@ -939,6 +982,7 @@ static void FileRecurse (
#define h_argFarOutFile 20
#define e_argLineLength 21
#define T_argThreads 22
+#define L_argLogFile 23
Args myargs [] = {
{"Path to ASN.1 Files", NULL, NULL, NULL,
@@ -987,21 +1031,22 @@ Args myargs [] = {
TRUE, 'e', ARG_INT, 0.0, 0, NULL},
{"Use Threads", "F", NULL, NULL,
TRUE, 'T', ARG_BOOLEAN, 0.0, 0, NULL},
+ {"Log File", NULL, NULL, NULL,
+ TRUE, 'L', ARG_FILE_OUT, 0.0, 0, NULL},
};
Int2 Main (void)
{
+ Char app [64], sfx [32];
CharPtr base, blastdb, directory, fastaidx, ntout,
- aaout, qlout, frout, ptr, str, suffix;
+ aaout, qlout, frout, logfile, ptr, str, suffix;
Boolean batch, binary, blast, compressed, dorecurse,
expandgaps, fargenomicqual, fasta, local, lock,
masterstyle, qualgapzero, remote, usethreads;
FastaFlagData ffd;
- FILE *fp = NULL;
Int2 linelen, type = 0;
time_t run_time, start_time, stop_time;
- Char sfx [32];
/* standard setup */
@@ -1034,7 +1079,8 @@ Int2 Main (void)
/* process command line arguments */
- if (! GetArgs ("asn2fsa", sizeof (myargs) / sizeof (Args), myargs)) {
+ sprintf (app, "asn2fsa %s", ASN2FSA_APPLICATION);
+ if (! GetArgs (app, sizeof (myargs) / sizeof (Args), myargs)) {
return 0;
}
@@ -1054,7 +1100,7 @@ Int2 Main (void)
local = (Boolean) myargs [k_argLocalFetch].intvalue;
lock = (Boolean) myargs [l_argLockFar].intvalue;
linelen = (Int2) myargs [e_argLineLength].intvalue;
- usethreads = (Int2) myargs [T_argThreads].intvalue;
+ usethreads = (Boolean) myargs [T_argThreads].intvalue;
expandgaps = (Boolean) myargs [g_argExpandGaps].intvalue;
masterstyle = (Boolean) myargs [m_argMaster].intvalue;
@@ -1091,6 +1137,8 @@ Int2 Main (void)
qlout = (CharPtr) myargs [q_argQlOutFile].strvalue;
frout = (CharPtr) myargs [h_argFarOutFile].strvalue;
+ logfile = (CharPtr) myargs [L_argLogFile].strvalue;
+
/* default to stdout for nucleotide output if nothing specified */
if (StringHasNoText (ntout) &&
@@ -1118,6 +1166,7 @@ Int2 Main (void)
ffd.aa = NULL;
ffd.ql = NULL;
ffd.fr = NULL;
+ ffd.logfp = NULL;
if (! StringHasNoText (ntout)) {
ffd.nt = FileOpen (ntout, "w");
@@ -1152,6 +1201,14 @@ Int2 Main (void)
ffd.lock = TRUE;
}
+ if (! StringHasNoText (logfile)) {
+ ffd.logfp = FileOpen (logfile, "w");
+ if (ffd.logfp == NULL) {
+ Message (MSG_FATAL, "Unable to open log file");
+ return 1;
+ }
+ }
+
/* register fetch functions */
if (remote) {
@@ -1161,6 +1218,7 @@ Int2 Main (void)
return 1;
}
ffd.usePUBSEQ = TRUE;
+ ffd.useThreads = FALSE;
#else
PubSeqFetchEnable ();
#endif
@@ -1227,6 +1285,12 @@ Int2 Main (void)
stop_time = GetSecs ();
run_time = stop_time - start_time;
+
+ if (ffd.logfp != NULL) {
+ fprintf (ffd.logfp, "Finished in %ld seconds\n", (long) run_time);
+ FileClose (ffd.logfp);
+ }
+
Message (MSG_POST, "Ran in %ld seconds", (long) run_time);
/* close fetch functions */
diff --git a/demo/asn2gb.c b/demo/asn2gb.c
index 0598a4bd..14411743 100644
--- a/demo/asn2gb.c
+++ b/demo/asn2gb.c
@@ -29,7 +29,7 @@
*
* Version Creation Date: 10/21/98
*
-* $Revision: 6.61 $
+* $Revision: 6.64 $
*
* File Description: New GenBank flatfile generator application
*
@@ -50,6 +50,10 @@
#include <explore.h>
#include <asn2gnbp.h>
+#define ASN2GB_APP_VER "2.0"
+
+CharPtr ASN2GB_APPLICATION = ASN2GB_APP_VER;
+
static void SaveSeqEntry (
SeqEntryPtr sep,
CharPtr filename
@@ -145,6 +149,7 @@ static Int2 HandleSingleRecord (
CharPtr inputFile,
CharPtr outputFile,
FmtType format,
+ FmtType altformat,
ModType mode,
StlType style,
FlgType flags,
@@ -323,6 +328,9 @@ static Int2 HandleSingleRecord (
AsnIoFree (aip, FALSE);
} else {
SeqEntryToGnbk (sep, slp, format, mode, style, flags, locks, custom, extra, ofp);
+ if (altformat != 0) {
+ SeqEntryToGnbk (sep, slp, altformat, mode, style, flags, locks, custom, extra, ofp);
+ }
}
if (ofp != NULL) {
FileClose (ofp);
@@ -469,6 +477,7 @@ static void CompareFlatFiles (
SeqEntryPtr sep,
FILE* fp,
FmtType format,
+ FmtType altformat,
ModType mode,
StlType style,
FlgType flags,
@@ -496,6 +505,9 @@ static void CompareFlatFiles (
if (batch == 1) {
SeqEntryToGnbk (sep, NULL, format, mode, style, flags, locks, custom, extra, fp);
+ if (altformat != 0) {
+ SeqEntryToGnbk (sep, NULL, altformat, mode, style, flags, locks, custom, extra, fp);
+ }
return; /* just make report, nothing to diff */
} else if (batch == 2) {
@@ -591,6 +603,9 @@ static void CompareFlatFiles (
#else
SeqEntryToGnbk (sep, NULL, format, mode, style, flags, locks, custom, extra, fp);
+ if (altformat != 0) {
+ SeqEntryToGnbk (sep, NULL, altformat, mode, style, flags, locks, custom, extra, fp);
+ }
#endif
}
@@ -648,6 +663,7 @@ static Int2 HandleMultipleRecords (
CharPtr inputFile,
CharPtr outputFile,
FmtType format,
+ FmtType altformat,
ModType mode,
StlType style,
FlgType flags,
@@ -883,8 +899,8 @@ static Int2 HandleMultipleRecords (
starttime = GetSecs ();
useGbdjoin = (Boolean) (format == GENBANK_FMT && (! hasRefSeq));
CompareFlatFiles (path1, path2, path3, sep, ofp,
- format, mode, style, flags, locks, custom,
- extra, batch, gbdjoin, useGbdjoin);
+ format, altformat, mode, style, flags, locks,
+ custom, extra, batch, gbdjoin, useGbdjoin);
stoptime = GetSecs ();
if (stoptime - starttime > worsttime) {
worsttime = stoptime - starttime;
@@ -968,7 +984,7 @@ Args myargs [] = {
FALSE, 'i', ARG_FILE_IN, 0.0, 0, NULL},
{"Output File Name", "stdout", NULL, NULL,
FALSE, 'o', ARG_FILE_OUT, 0.0, 0, NULL},
- {"Format (b GenBank, e EMBL, p GenPept, t Feature Table, x GBSet)", "b", NULL, NULL,
+ {"Format (b GenBank, e EMBL, p GenPept, t Feature Table, x INSDSet)", "b", NULL, NULL,
FALSE, 'f', ARG_STRING, 0.0, 0, NULL},
{"Mode (r Release, e Entrez, s Sequin, d Dump)", "s", NULL, NULL,
FALSE, 'm', ARG_STRING, 0.0, 0, NULL},
@@ -1033,6 +1049,8 @@ Int2 Main (
{
CharPtr accn = NULL;
AsnIoPtr aip = NULL;
+ FmtType altformat = (FmtType) 0;
+ Char app [64];
AsnTypePtr atp = NULL;
Int2 batch = 0;
Boolean binary = FALSE;
@@ -1054,8 +1072,6 @@ Int2 Main (
CharPtr logfile = NULL;
FILE *logfp = NULL;
ModType mode = SEQUIN_MODE;
- Char path [PATH_MAX];
- CharPtr progname;
Boolean propOK = FALSE;
Int2 rsult = 0;
time_t runtime, starttime, stoptime;
@@ -1096,17 +1112,10 @@ Int2 Main (
return 1;
}
- ProgramPath (path, sizeof (path));
- progname = StringRChr (path, DIRDELIMCHR);
- if (progname != NULL) {
- progname++;
- } else {
- progname = "asn2gb";
- }
-
/* process command line arguments */
- if (! GetArgs (progname, sizeof (myargs) / sizeof (Args), myargs)) {
+ sprintf (app, "asn2gb %s", ASN2GB_APPLICATION);
+ if (! GetArgs (app, sizeof (myargs) / sizeof (Args), myargs)) {
return 0;
}
@@ -1131,7 +1140,11 @@ Int2 Main (
}
str = myargs [f_argFormat].strvalue;
- if (StringICmp (str, "b") == 0) {
+ if (StringICmp (str, "bp") == 0 || StringICmp (str, "pb") == 0) {
+ format = GENBANK_FMT;
+ altformat = GENPEPT_FMT;
+
+ } else if (StringICmp (str, "b") == 0) {
format = GENBANK_FMT;
} else if (StringICmp (str, "e") == 0) {
format = EMBL_FMT;
@@ -1304,13 +1317,13 @@ Int2 Main (
if (batch != 0 || accn != NULL) {
rsult = HandleMultipleRecords (myargs [i_argInputFile].strvalue,
myargs [o_argOutputFile].strvalue,
- format, mode, style, flags, locks,
+ format, altformat, mode, style, flags, locks,
custom, extra, batch, binary, compressed,
propOK, gbdjoin, accn, logfp);
} else {
rsult = HandleSingleRecord (myargs [i_argInputFile].strvalue,
myargs [o_argOutputFile].strvalue,
- format, mode, style, flags, locks,
+ format, altformat, mode, style, flags, locks,
custom, extra, type, binary, compressed,
from, to, strand, itemID, do_tiny_seq, do_fasta_stream);
}
@@ -1322,12 +1335,11 @@ Int2 Main (
}
stoptime = GetSecs ();
+ runtime = stoptime - starttime;
if (logfp != NULL) {
- fprintf (logfp, "Finished in %ld seconds\n",
- (long) (stoptime - starttime));
+ fprintf (logfp, "Finished in %ld seconds\n", (long) runtime);
FileClose (logfp);
}
- runtime = stoptime - starttime;
Message (MSG_POST, "Ran in %ld seconds", (long) runtime);
if (myargs [r_argRemote].intvalue) {
diff --git a/demo/blast_driver.c b/demo/blast_driver.c
index f568f993..16a4bb01 100644
--- a/demo/blast_driver.c
+++ b/demo/blast_driver.c
@@ -1,4 +1,4 @@
-/* $Id: blast_driver.c,v 1.40 2004/05/05 15:30:33 dondosha Exp $
+/* $Id: blast_driver.c,v 1.46 2004/06/08 17:47:43 dondosha Exp $
* ===========================================================================
*
* PUBLIC DOMAIN NOTICE
@@ -32,10 +32,10 @@ Author: Ilya Dondoshansky
Contents: Main function for running BLAST
******************************************************************************
- * $Revision: 1.40 $
+ * $Revision: 1.46 $
* */
-static char const rcsid[] = "$Id: blast_driver.c,v 1.40 2004/05/05 15:30:33 dondosha Exp $";
+static char const rcsid[] = "$Id: blast_driver.c,v 1.46 2004/06/08 17:47:43 dondosha Exp $";
#include <ncbi.h>
#include <sqnutils.h>
@@ -46,6 +46,8 @@ static char const rcsid[] = "$Id: blast_driver.c,v 1.40 2004/05/05 15:30:33 dond
#include <algo/blast/core/blast_filter.h>
#include <algo/blast/core/blast_util.h>
#include <algo/blast/core/blast_engine.h>
+#include <algo/blast/core/hspstream_collector.h>
+#include <algo/blast/api/hspstream_queue.h>
#include <algo/blast/api/blast_seq.h>
#include <algo/blast/api/blast_input.h>
#include <algo/blast/api/blast_format.h>
@@ -53,6 +55,7 @@ static char const rcsid[] = "$Id: blast_driver.c,v 1.40 2004/05/05 15:30:33 dond
#include <algo/blast/api/blast_format.h>
#include <algo/blast/api/seqsrc_readdb.h>
#include <algo/blast/api/multiseq_src.h>
+#include <algo/blast/api/blast_tabular.h>
#define NUMARG (sizeof(myargs)/sizeof(myargs[0]))
@@ -97,7 +100,8 @@ typedef enum {
ARG_FORMAT,
ARG_HTML,
ARG_ASNOUT,
- ARG_OIDRANGE
+ ARG_OIDRANGE,
+ ARG_TABULAR
} BlastArguments;
static Args myargs[] = {
@@ -199,7 +203,9 @@ static Args myargs[] = {
{ "Range of ordinal ids in the BLAST database to search.\n"
"Format: \"oid1 oid2\"; ',', ':' or ';' can also be used as delimiters\n"
"Full database is searched if range not provided.", /* ARG_OIDRANGE */
- NULL, NULL, NULL, TRUE, 'R', ARG_STRING, 0.0, 0, NULL}
+ NULL, NULL, NULL, TRUE, 'R', ARG_STRING, 0.0, 0, NULL},
+ { "Produce on-the-fly tabular output",
+ "0", NULL, NULL, FALSE, 'B', ARG_INT, 0.0, 0, NULL} /* ARG_TABULAR */
};
static Int2 BLAST_FillRPSInfo( RPSInfo **ppinfo, Nlm_MemMap **rps_mmap,
@@ -314,10 +320,9 @@ BLAST_FillOptions(LookupTableOptions* lookup_options,
{
char* blast_program;
Boolean ag_blast = TRUE, variable_wordsize = FALSE, mb_lookup = FALSE;
- Boolean greedy_extension = FALSE;
+ Int4 greedy_extension = 0;
+ Boolean greedy_with_ungapped = FALSE;
Boolean is_gapped = FALSE;
- Int8 totlen = 0;
- Int4 numseqs = 0;
Uint1 program_number;
Int2 status;
Boolean use_pssm = FALSE;
@@ -340,7 +345,8 @@ BLAST_FillOptions(LookupTableOptions* lookup_options,
mb_lookup = TRUE;
variable_wordsize = FALSE;
}
- greedy_extension = (Boolean) myargs[ARG_GREEDY].intvalue;
+ greedy_extension = MIN(myargs[ARG_GREEDY].intvalue, 2);
+ greedy_with_ungapped = (myargs[ARG_GREEDY].intvalue == 3);
}
BLAST_FillLookupTableOptions(lookup_options, program_number, mb_lookup,
@@ -372,42 +378,26 @@ BLAST_FillOptions(LookupTableOptions* lookup_options,
query_setup_options->genetic_code = myargs[ARG_GENCODE].intvalue;
BLAST_FillInitialWordOptions(word_options, program_number,
- greedy_extension, myargs[ARG_WINDOW].intvalue, variable_wordsize,
- ag_blast, mb_lookup, myargs[ARG_XDROP_UNGAPPED].intvalue);
+ (greedy_extension && !greedy_with_ungapped),
+ myargs[ARG_WINDOW].intvalue, variable_wordsize, ag_blast, mb_lookup,
+ myargs[ARG_XDROP_UNGAPPED].intvalue);
BLAST_FillExtensionOptions(ext_options, program_number, greedy_extension,
myargs[ARG_XDROP].intvalue, myargs[ARG_XDROP_FINAL].intvalue);
- if (greedy_extension) {
- switch (myargs[ARG_GREEDY].intvalue) {
- case 1:
- ext_options->algorithm_type = EXTEND_GREEDY;
- word_options->ungapped_extension = FALSE;
- break;
- case 2:
- ext_options->algorithm_type = EXTEND_GREEDY_NO_TRACEBACK;
- word_options->ungapped_extension = FALSE;
- break;
- case 3:
- ext_options->algorithm_type = EXTEND_GREEDY_NO_TRACEBACK;
- word_options->ungapped_extension = TRUE;
- break;
- default:
- break;
- }
- }
-
if (program_number == blast_type_rpsblast ||
- program_number == blast_type_rpstblastn)
- BLAST_FillScoringOptions(score_options, program_number, greedy_extension,
+ program_number == blast_type_rpstblastn) {
+ BLAST_FillScoringOptions(score_options, program_number, FALSE,
myargs[ARG_MISMATCH].intvalue, myargs[ARG_MATCH].intvalue,
"BLOSUM62", rps_info->aux_info.gap_open_penalty,
rps_info->aux_info.gap_extend_penalty);
- else
- BLAST_FillScoringOptions(score_options, program_number, greedy_extension,
+ } else {
+ BLAST_FillScoringOptions(score_options, program_number,
+ (Boolean)greedy_extension,
myargs[ARG_MISMATCH].intvalue, myargs[ARG_MATCH].intvalue,
myargs[ARG_MATRIX].strvalue, myargs[ARG_GAPOPEN].intvalue,
myargs[ARG_GAPEXT].intvalue);
+ }
if (program_number != blast_type_tblastx)
is_gapped = !myargs[ARG_UNGAPPED].intvalue;
@@ -426,8 +416,9 @@ BLAST_FillOptions(LookupTableOptions* lookup_options,
hit_options->percent_identity = myargs[ARG_PERC_IDENT].floatvalue;
hit_options->longest_intron = myargs[ARG_INTRON].intvalue;
- BLAST_FillEffectiveLengthsOptions(eff_len_options,
- numseqs, totlen, (Int8) myargs[ARG_SEARCHSP].floatvalue);
+ if (myargs[ARG_SEARCHSP].floatvalue != 0) {
+ eff_len_options->searchsp_eff = (Int8) myargs[ARG_SEARCHSP].floatvalue;
+ }
if (db_options && (program_number == blast_type_tblastn ||
program_number == blast_type_rpstblastn ||
@@ -458,7 +449,7 @@ Int2 Nlm_Main(void)
BlastHitSavingOptions* hit_options;
char* dbname = NULL;
LookupTableWrap* lookup_wrap;
- Int2 status;
+ Int2 status = 0;
QuerySetUpOptions* query_options=NULL;
BlastEffectiveLengthsOptions* eff_len_options=NULL;
BlastMaskLoc* lcase_mask = NULL;
@@ -472,7 +463,7 @@ Int2 Nlm_Main(void)
SeqAlign* seqalign;
BlastFormattingOptions* format_options;
Boolean done;
- BlastReturnStat* return_stats;
+ BlastDiagnostics* diagnostics;
Int4 ctr = 0;
PSIBlastOptions* psi_options = NULL;
BlastDatabaseOptions* db_options = NULL;
@@ -485,6 +476,11 @@ Int2 Nlm_Main(void)
Nlm_MemMapPtr rps_mmap = NULL;
Nlm_MemMapPtr rps_pssm_mmap = NULL;
RPSInfo *rps_info = NULL;
+ double scale_factor;
+ BlastHSPStream* hsp_stream = NULL;
+ Boolean tabular_output;
+ TNlmThread format_thread;
+ BlastTabularFormatData* tf_data = NULL;
if (! GetArgs (buf, NUMARG, myargs))
return (1);
@@ -496,11 +492,13 @@ Int2 Nlm_Main(void)
ErrSetMessageLevel(SEV_WARNING);
- if ((outfp = fopen(myargs[ARG_OUT].strvalue, "w")) == NULL) {
+ if ((outfp = FileOpen(myargs[ARG_OUT].strvalue, "w")) == NULL) {
ErrPostEx(SEV_FATAL, 1, 0, "blast: Unable to open output file %s\n",
myargs[ARG_OUT].strvalue);
return (1);
}
+
+ tabular_output = (Boolean)myargs[ARG_TABULAR].intvalue;
blast_program = strdup(myargs[ARG_PROGRAM].strvalue);
BlastProgram2Number(myargs[ARG_PROGRAM].strvalue, &program_number);
@@ -525,7 +523,7 @@ Int2 Nlm_Main(void)
if (!myargs[ARG_DB].strvalue) {
FILE *infp2;
char *subject_file = strdup(myargs[ARG_SUBJECT].strvalue);
- if ((infp2 = fopen(subject_file, "r")) == NULL) {
+ if ((infp2 = FileOpen(subject_file, "r")) == NULL) {
ErrPostEx(SEV_FATAL, 1, 0,
"blast: Unable to open second input file %s\n",
subject_file);
@@ -535,7 +533,7 @@ Int2 Nlm_Main(void)
BLAST_GetQuerySeqLoc(infp2, db_is_na, 0, 0, 0, NULL, &subject_slp,
0, NULL);
- fclose(infp2);
+ FileClose(infp2);
seq_src = MultiSeqSrcInit(subject_slp, program_number);
@@ -558,34 +556,39 @@ Int2 Nlm_Main(void)
if (BLAST_FillRPSInfo(&rps_info, &rps_mmap,
&rps_pssm_mmap, myargs[ARG_DB].strvalue) != 0)
ErrPostEx(SEV_FATAL, 1, 0, "RPS Blast setup failed");
+ scale_factor = rps_info->aux_info.scale_factor;
+ }
+ else {
+ scale_factor = 1.0;
}
BLAST_FillOptions(lookup_options, query_options, word_options,
ext_options, hit_options, score_options, eff_len_options,
psi_options, db_options, seq_src, rps_info);
+ if (!tabular_output) {
+ if ((status = BlastFormattingOptionsNew(program_number,
+ myargs[ARG_OUT].strvalue,
+ myargs[ARG_DESCRIPTIONS].intvalue,
+ myargs[ARG_ALIGNMENTS].intvalue,
+ myargs[ARG_FORMAT].intvalue, &format_options)) != 0)
+ return status;
+ format_options->html = (Boolean) myargs[ARG_HTML].intvalue;
- if ((status = BlastFormattingOptionsNew(program_number,
- myargs[ARG_OUT].strvalue,
- myargs[ARG_DESCRIPTIONS].intvalue,
- myargs[ARG_ALIGNMENTS].intvalue,
- myargs[ARG_FORMAT].intvalue, &format_options)) != 0)
- return status;
- format_options->html = (Boolean) myargs[ARG_HTML].intvalue;
-
- if (seq_src) {
- dbname = BLASTSeqSrcGetName(seq_src);
+ if (seq_src) {
+ dbname = BLASTSeqSrcGetName(seq_src);
- BLAST_PrintOutputHeader(format_options,
- myargs[ARG_GREEDY].intvalue, dbname, !db_is_na);
+ BLAST_PrintOutputHeader(format_options,
+ myargs[ARG_GREEDY].intvalue, dbname, !db_is_na);
+ }
}
- if ((infp = fopen(myargs[ARG_QUERY].strvalue, "r")) == NULL) {
+ if ((infp = FileOpen(myargs[ARG_QUERY].strvalue, "r")) == NULL) {
ErrPostEx(SEV_FATAL, 1, 0, "blast: Unable to open input file %s\n",
myargs[ARG_QUERY].strvalue);
return (1);
}
- return_stats = (BlastReturnStat*) calloc(1, sizeof(BlastReturnStat));
+ diagnostics = Blast_DiagnosticsInit();
translated_query = (program_number == blast_type_blastx ||
program_number == blast_type_tblastx);
@@ -622,7 +625,7 @@ Int2 Nlm_Main(void)
status =
BLAST_MainSetUp(program_number, query_options, score_options,
- hit_options, query, query_info, &lookup_segments,
+ hit_options, query, query_info, scale_factor, &lookup_segments,
&filter_loc, &sbp, &blast_message);
if (translated_query) {
@@ -638,42 +641,75 @@ Int2 Nlm_Main(void)
return status;
}
- Blast_HSPResultsInit(query_info->num_queries, &results);
LookupTableWrapInit(query, lookup_options,
lookup_segments, sbp, &lookup_wrap, rps_info);
- if (rps_blast)
+ if (!tabular_output) {
+ Int4 num_results = (rps_blast ? BLASTSeqSrcGetNumSeqs(seq_src) :
+ query_info->num_queries);
+ /* Results in the collector stream should be sorted only for a
+ database search. The latter is true if and only if the sequence
+ source has non-zero database length. */
+ Boolean sort_on_read = (BLASTSeqSrcGetTotLen(seq_src) != 0);
+ hsp_stream =
+ Blast_HSPListCollectorInit(program_number, hit_options,
+ num_results, sort_on_read);
+ } else {
+ hsp_stream = Blast_HSPListQueueInit();
+ tf_data = Blast_TabularFormatDataInit(program_number, hsp_stream,
+ seq_src, query, query_info, score_options, sbp,
+ eff_len_options, ext_options, hit_options, db_options,
+ query_slp, outfp);
+ /* Start the formatting thread */
+ if((format_thread =
+ NlmThreadCreate(Blast_TabularFormatThread, (void*) tf_data))
+ == NULL_thread) {
+ fprintf(stderr,
+ "Cannot create thread for formatting tabular output\n");
+ return 1;
+ }
+ }
+
+ if (rps_blast) {
BLAST_RPSSearchEngine(program_number, query, query_info,
seq_src, sbp, score_options, lookup_wrap,
word_options, ext_options, hit_options, eff_len_options,
- psi_options, db_options, results, return_stats);
- else
+ psi_options, db_options, hsp_stream, diagnostics,
+ (tabular_output ? NULL : &results));
+ } else {
BLAST_SearchEngine(program_number, query, query_info,
seq_src, sbp, score_options, lookup_wrap,
word_options, ext_options, hit_options, eff_len_options,
- psi_options, db_options, results, return_stats);
+ psi_options, db_options, hsp_stream, diagnostics,
+ (tabular_output ? NULL : &results));
+ }
+ if (tabular_output) {
+ void* join_status = NULL;
+ NlmThreadJoin(format_thread, &join_status);
+ }
+
+ hsp_stream = BlastHSPStreamFree(hsp_stream);
lookup_wrap = LookupTableWrapFree(lookup_wrap);
if (rps_blast) {
- Nlm_MemMapFini(rps_mmap);
- Nlm_MemMapFini(rps_pssm_mmap);
+ Nlm_MemMapFini(rps_mmap);
+ Nlm_MemMapFini(rps_pssm_mmap);
sfree(rps_info->aux_info.karlin_k);
sfree(rps_info->aux_info.orig_score_matrix);
- sfree(rps_info);
+ sfree(rps_info);
}
/* The following works because the ListNodes' data point to simple
double-integer structures */
lookup_segments = ListNodeFreeData(lookup_segments);
-
+ if (!tabular_output) {
/* Convert results to the SeqAlign form */
BLAST_ResultsToSeqAlign(program_number, results, query_slp, seq_src,
- subject_slp, score_options, sbp, score_options->gapped_calculation,
+ score_options->gapped_calculation, score_options->is_ooframe,
&seqalign);
results = Blast_HSPResultsFree(results);
- seq_src = BlastSeqSrcFree(seq_src);
if (myargs[ARG_ASNOUT].strvalue) {
AsnIoPtr asnout = AsnIoOpen(myargs[ARG_ASNOUT].strvalue, (char*)"w");
@@ -686,21 +722,21 @@ Int2 Nlm_Main(void)
status = BLAST_FormatResults(seqalign, dbname,
blast_program, query_info->num_queries, query_slp,
filter_loc, format_options, score_options->is_ooframe);
-
- BlastMaskLocFree(filter_loc);
-
PrintOutputFooter(program_number, format_options, score_options, sbp,
- lookup_options, word_options, ext_options, hit_options, query_info,
- dbname, return_stats, db_is_na);
-
+ lookup_options, word_options, ext_options,
+ hit_options, eff_len_options, query_info,
+ seq_src, diagnostics);
+ } /* if not tabular output */
query = BlastSequenceBlkFree(query);
+ BlastMaskLocFree(filter_loc);
query_info = BlastQueryInfoFree(query_info);
BlastScoreBlkFree(sbp);
query_slp = SeqLocSetFree(query_slp);
} /* End loop on sets of queries */
+ seq_src = BlastSeqSrcFree(seq_src);
subject_slp = SeqLocSetFree(subject_slp);
- sfree(return_stats);
+ Blast_DiagnosticsFree(diagnostics);
LookupTableOptionsFree(lookup_options);
BlastQuerySetUpOptionsFree(query_options);
BlastExtensionOptionsFree(ext_options);
@@ -710,11 +746,14 @@ Int2 Nlm_Main(void)
BlastEffectiveLengthsOptionsFree(eff_len_options);
PSIBlastOptionsFree(psi_options);
BlastDatabaseOptionsFree(db_options);
-
- BlastFormattingOptionsFree(format_options);
+ if (!tabular_output) {
+ BlastFormattingOptionsFree(format_options);
+ } else {
+ FileClose(outfp);
+ }
if (infp)
- fclose(infp);
+ FileClose(infp);
sfree(dbname);
sfree(blast_program);
diff --git a/demo/blastall.c b/demo/blastall.c
index 0b712792..0ead6d9d 100644
--- a/demo/blastall.c
+++ b/demo/blastall.c
@@ -1,6 +1,6 @@
-static char const rcsid[] = "$Id: blastall.c,v 6.142 2004/04/29 19:56:00 dondosha Exp $";
+static char const rcsid[] = "$Id: blastall.c,v 6.143 2004/05/13 18:42:44 coulouri Exp $";
-/* $Id: blastall.c,v 6.142 2004/04/29 19:56:00 dondosha Exp $
+/* $Id: blastall.c,v 6.143 2004/05/13 18:42:44 coulouri Exp $
**************************************************************************
* *
* COPYRIGHT NOTICE *
@@ -28,6 +28,9 @@ static char const rcsid[] = "$Id: blastall.c,v 6.142 2004/04/29 19:56:00 dondosh
**************************************************************************
*
* $Log: blastall.c,v $
+ * Revision 6.143 2004/05/13 18:42:44 coulouri
+ * disable -B for blastcl3
+ *
* Revision 6.142 2004/04/29 19:56:00 dondosha
* Mask filtered locations in query sequence lines in XML output
*
@@ -762,8 +765,10 @@ static Args myargs[] = {
sizeof(myargs) itself
made optional=TRUE but this may change?
*/
+#ifndef BLAST_CS_API
{ "Number of concatenated queries, for blastn and tblastn", /* 40 */
"0", NULL, NULL, TRUE, 'B', ARG_INT, 0.0, 0, NULL}
+#endif
};
#ifdef BLAST_CS_API
diff --git a/demo/fastacmd.c b/demo/fastacmd.c
index 010bae9d..ff0521cc 100644
--- a/demo/fastacmd.c
+++ b/demo/fastacmd.c
@@ -1,6 +1,6 @@
-static char const rcsid[] = "$Id: fastacmd.c,v 6.29 2003/05/30 17:31:09 coulouri Exp $";
+static char const rcsid[] = "$Id: fastacmd.c,v 6.30 2004/05/13 20:54:45 coulouri Exp $";
-/* $Id: fastacmd.c,v 6.29 2003/05/30 17:31:09 coulouri Exp $
+/* $Id: fastacmd.c,v 6.30 2004/05/13 20:54:45 coulouri Exp $
* ===========================================================================
*
* PUBLIC DOMAIN NOTICE
@@ -31,12 +31,15 @@ static char const rcsid[] = "$Id: fastacmd.c,v 6.29 2003/05/30 17:31:09 coulouri
*
* Initial Version Creation Date: 05/20/1997
*
-* $Revision: 6.29 $
+* $Revision: 6.30 $
*
* File Description:
* FASTA retrievel system using ISAM indexes
*
* $Log: fastacmd.c,v $
+* Revision 6.30 2004/05/13 20:54:45 coulouri
+* spell 'loci' correctly
+*
* Revision 6.29 2003/05/30 17:31:09 coulouri
* add rcsid
*
@@ -157,10 +160,10 @@ static Args myargs [] = {
" T - protein \n"
" F - nucleotide",
"G", NULL,NULL,TRUE,'p',ARG_STRING,0.0,0,NULL},
- { "Search string: GIs, accessions and locuses may be used delimited\n"
+ { "Search string: GIs, accessions and loci may be used delimited\n"
" by comma.", /* 2 */
NULL, NULL, NULL, TRUE, 's', ARG_STRING, 0.0, 0, NULL},
- { "Input file wilth GIs/accessions/locuses for batch\n"
+ { "Input file wilth GIs/accessions/loci for batch\n"
" retrieval",/* 3 */
NULL, NULL, NULL, TRUE, 'i', ARG_STRING, 0.0, 0, NULL},
{ "Retrieve duplicate accessions", /* 4 */
diff --git a/demo/fmerge.c b/demo/fmerge.c
deleted file mode 100644
index 1aaae442..00000000
--- a/demo/fmerge.c
+++ /dev/null
@@ -1,445 +0,0 @@
-/*****************************************************************************
-
- Name: fmerge.c
-
- Description: Program for merging FASTA databases.
-
- Author: Sergei Shavirin
-
- ***************************************************************************
-
- PUBLIC DOMAIN NOTICE
- National Center for Biotechnology Information
-
- This software/database is a "United States Government Work" under the
- terms of the United States Copyright Act. It was written as part of
- the author's official duties as a United States Government employee
- and thus cannot be copyrighted. This software/database is freely
- available to the public for use. The National Library of Medicine and
- the U.S. Government have not placed any restriction on its use or
- reproduction.
-
- Although all reasonable efforts have been taken to ensure the accuracy
- and reliability of the software and data, the NLM and the U.S.
- Government do not and cannot warrant the performance or results that
- may be obtained by using this software or data. The NLM and the U.S.
- Government disclaim all warranties, express or implied, including
- warranties of performance, merchantability or fitness for any
- particular purpose.
-
- Please cite the author in any work or product based on this material.
-
- ***************************************************************************
-
- Modification History:
- 20 August of 1996 - Shavirin - originaly written
-
- Bugs and restriction on use:
-
- Notes:
-
-*****************************************************************************/
-#include <ncbi.h>
-
-#define INCLENGTH 256
-#define STRLENGTH 128
-
-typedef struct MDeflineID {
- Uint4 gi;
- CharPtr defline;
- struct MDeflineID *next;
-} MDeflineID, PNTR MDeflineIDPtr;
-
-typedef struct FMergeID {
- MDeflineIDPtr id;
- CharPtr sequence;
-} FMergeID, PNTR FMergeIDPtr;
-
-Uint4Ptr GetIndexFromFasta(FILE *fd, Uint4Ptr MaxIndexCount);
-Uint4Ptr GetIndexFromIndex(FILE *fd, Uint4Ptr MaxIndexCount);
-
-static FMergeIDPtr NextFastaFromFasta(FILE *fd);
-Boolean IfGiInIndex(Uint4Ptr index, Uint4 MaxIndexCount, Uint4 gi);
-Int4 MergeFMergeID(FMergeIDPtr fasta,
- FILE *fd, FILE *fd_ind,
- Uint4Ptr index,
- Uint4 MaxIndexCount);
-void FMergeIDFree(FMergeIDPtr fasta);
-
-#define NUMARG 5
-
-Args dump_args[NUMARG] = {
- {"Job mode (this value must be set):\n"
- " 1 - create index only\n"
- " 2 - update database from existing index\n"
- " 3 - create index and update database\n"
- " ",
-
- NULL, NULL,NULL,FALSE,'t',ARG_STRING, 0.0,0,NULL},
- {"Logfile name:","fmerge.log",
- NULL,NULL,TRUE,'l',ARG_FILE_OUT, 0.0,0,NULL},
- {"Input file for indexing\n"
- " Name of FASTA file, that you are going to update\n"
- " ",
- "nr", NULL,NULL,TRUE,'n',ARG_FILE_IN,0.0,0,NULL},
-
- {"Input file for new gis:\n"
- " Name of FASTA file with new sequences\n"
- " ",
- "month.aa", NULL,NULL,TRUE,'m',ARG_FILE_IN,0.0,0,NULL},
- {"Index file name\n"
- " Name of file for index storage\n"
- " ",
- "index.nr", NULL,NULL,TRUE,'i',ARG_FILE_IN,0.0,0,NULL},
-};
-
-#define JobModeStr (const char *) dump_args[0].strvalue
-#define LogFileName (const char *) dump_args[1].strvalue
-#define NRFile (const char *) dump_args[2].strvalue
-#define MonthFile (const char *) dump_args[3].strvalue
-#define IndexFile (const char *) dump_args[4].strvalue
-
-#define INDEX_MODE 1
-#define UPDATE_MODE 2
-#define FULL_MODE 3
-
-/* ------------------------------------------------------------------
- This is handler for HeapSort function
- ------------------------------------------------------------------*/
-static int LIBCALLBACK intcompare(VoidPtr i, VoidPtr j)
-{
- if (*(int *)i > *(int *)j)
- return (1);
- if (*(int *)i < *(int *)j)
- return (-1);
- return (0);
-}
-
-Int2 Main(void)
-{
- Uint4Ptr index = NULL;
- FILE *fd, *fd_ind, *fdout;
- Uint4 i, MaxIndexCount;
- FMergeIDPtr fasta;
- Int4 Added =0, TotalAdded =0, SequenceAdded =0;
- Int4 JobMode;
-
- if ( !GetArgs ("fmerge",NUMARG,dump_args) ) {
- return 1;
- }
- if ( !ErrSetLog (LogFileName) ) {
- ErrShow();
- } else {
- ErrSetOpts (ERR_CONTINUE, ERR_LOG_ON);
- }
-
- JobMode = atoi(JobModeStr);
-
- switch(JobMode) {
- case INDEX_MODE:
- ErrLogPrintf("fmerge started in \"Create index\" mode\n");
- break;
- case UPDATE_MODE:
- ErrLogPrintf("fmerge started in \"Update database\" mode\n");
- break;
- case FULL_MODE:
- ErrLogPrintf("fmerge started in \"Full mode\" mode\n");
- break;
- default:
- printf("fmerge mode \"%s\" invalid, exiting...\n",
- JobModeStr);
- exit(1);
- }
-
- /* First creating index if necessary */
-
- if(JobMode == INDEX_MODE || JobMode == FULL_MODE) {
-
- if((fd = FileOpen(NRFile, "r")) == NULL) {
- ErrLogPrintf("Cannot open input file. Indexing failed...\n");
- exit(1);
- }
- ErrLogPrintf("Using file \"%s\" to create index\n", NRFile);
-
- if((index = GetIndexFromFasta(fd, &MaxIndexCount)) == NULL) {
- ErrLogPrintf("Error in creating gi index from FASTA file\n");
- }
- FileClose(fd);
- ErrLogPrintf("Created index of %d gis\n", MaxIndexCount);
-
- fd_ind = FileOpen(IndexFile, "w");
- for(i = 0; i < MaxIndexCount; i++)
- fprintf(fd_ind, "%ld\n", index[i]);
- FileClose(fd_ind);
-
- if(JobMode == INDEX_MODE) {
- MemFree(index);
- exit(0);
- }
- }
-
- if(JobMode == UPDATE_MODE) {
-
- if((fd_ind = FileOpen(IndexFile, "r")) == NULL) {
- ErrLogPrintf("Cannot open file with index. Exiting...\n");
- exit(1);
- }
- if((index = GetIndexFromIndex(fd_ind, &MaxIndexCount)) == NULL) {
- ErrLogPrintf("Error in creating gi index from FASTA file\n");
- }
- FileClose(fd_ind);
- ErrLogPrintf("Created index of %d gis\n", MaxIndexCount);
- }
-
- /* Now searching FASTA file with new gis*/
-
- if((fd = FileOpen(MonthFile, "r")) == NULL) {
- ErrLogPrintf("Cannot open input file. Indexing failed...\n");
- exit(1);
- }
- ErrLogPrintf("Using file \"%s\" as FASTA database with new gis\n",
- MonthFile);
-
- if((fdout = FileOpen(NRFile, "a+")) == NULL) {
- ErrLogPrintf("Error opening %s file\n", NRFile);
- exit(1);
- }
-
- if((fd_ind = FileOpen(IndexFile, "a+")) == NULL) {
- ErrLogPrintf("Error opening %s file\n", IndexFile);
- exit(1);
- }
-
- while((fasta = NextFastaFromFasta(fd)) != NULL) {
- if((Added = MergeFMergeID(fasta, fdout, fd_ind,
- index, MaxIndexCount)) != 0) {
- TotalAdded += Added;
- SequenceAdded++;
- }
- }
- MemFree(index);
- FileClose(fd);
- FileClose(fd_ind);
- ErrLogPrintf("Total gis added: %d\n", TotalAdded);
- ErrLogPrintf("Total sequences added: %d\n", SequenceAdded);
- return 0;
-}
-Uint4Ptr GetIndexFromIndex(FILE *fd, Uint4Ptr MaxIndexCount)
-{
- Uint4Ptr index;
- Uint4 IndexSize = INCLENGTH;
- Uint4 i =0, gi;
-
- index = MemNew(IndexSize*4);
-
- while (fscanf(fd, "%ld", &gi) > 0) {
-
- if(i == IndexSize) {
- IndexSize += INCLENGTH;
- index = Realloc(index, IndexSize*4);
- }
- index[i] = gi;
- i++;
- }
- *MaxIndexCount = i;
- HeapSort(index, *MaxIndexCount, sizeof(Uint4), intcompare);
- return index;
-}
-
-Uint4Ptr GetIndexFromFasta(FILE *fd, Uint4Ptr MaxIndexCount)
-{
- Char str[20000];
- CharPtr pos;
- CharPtr start;
- Uint4Ptr index;
- Uint4 i =0, gi;
- Uint4 IndexSize = INCLENGTH;
-
- index = MemNew(IndexSize*4);
-
- while(fgets (str, sizeof (str), fd) != NULL) {
- start = str;
- while((pos = StringStr(start, "\1gi|")) != NULL ||
- (pos = StringStr(start, ">gi|")) != NULL) {
- if(!sscanf(pos+4, "%ld", &gi)) {
- ErrLogPrintf("Error parsing gi number. Indexing failed...\n");
- exit(1);
- }
- /* Here we got next gi nnumber and will add it to the index */
-
- if(i == IndexSize) {
- IndexSize += INCLENGTH;
- index = Realloc(index, IndexSize*4);
- }
- index[i] = gi;
- i++;
- start = pos+4;
- }
- }
- *MaxIndexCount = i;
- HeapSort(index, *MaxIndexCount, sizeof(Uint4), intcompare);
- return index;
-}
-
-Boolean IfGiInIndex(Uint4Ptr index, Uint4 MaxIndexCount, Uint4 gi)
-{
- Uint4 high_index, low_index, new_index;
-
- if(MaxIndexCount == 0 || index == NULL || gi == 0)
- return FALSE;
-
- low_index = 0;
- high_index = MaxIndexCount;
- new_index = (low_index+high_index)/2;
-
- while(new_index != low_index && new_index != high_index){
- if (gi > index[new_index]) {
- low_index = new_index;
- } else if (gi < index[new_index]) {
- high_index = new_index;
- } else { /* scores are equal. */
- return TRUE;
- }
- new_index = (low_index+high_index)/2;
- }
- if (gi == index[new_index])
- return TRUE;
- else
- return FALSE;
-}
-
-static FMergeIDPtr NextFastaFromFasta(FILE *fd)
-{
- CharPtr str, start;
- Int4 i=0,j=0;
- Char ch;
- Int4 gi;
- CharPtr pos;
- Int4 f_pos;
- Char tmpbuff[2048];
- FMergeIDPtr fasta;
- MDeflineIDPtr id, last_id;
- CharPtr TmpSequence;
- Int4 SeqSize = STRLENGTH;
-
- if((fasta = MemNew(sizeof(FMergeID))) == NULL) {
- ErrLogPrintf("Cannot allocate memory for Fasta ID\n");
- exit(1);
- }
- fasta->id = NULL;
-
- str = MemNew(SeqSize + 5);
- str[0] = NULLB;
- for(i=0; (FileRead(&ch, sizeof(ch), 1, fd) == 1); i++) {
- if((str[i] = ch) == '\n' || ch == '\r')
- break;
- if (i > SeqSize) {
- SeqSize = i + STRLENGTH;
- str = Realloc(str, SeqSize + 5);
- }
- }
- str[i+1] = NULLB;
- start = str;
- while((pos = StringStr(str, "\1gi|")) != NULL ||
- (pos = StringStr(str, ">gi|")) != NULL) {
- if(!sscanf(pos+4, "%ld", &gi)) {
- ErrLogPrintf("Error parsing gi number. Indexing failed...\n");
- exit(1);
- }
- pos++;
- for(j = 0;pos[j] != '\1' &&
- pos[j] != '\n' &&
- pos[j] != '\r'; j++) {
- tmpbuff[j] = pos[j];
- }
- tmpbuff[j] = '\0';
-
- id = MemNew(sizeof(MDeflineID));
- id->defline = StringSave(tmpbuff);
- id->gi = gi;
- id->next = NULL;
-
- if(fasta->id == NULL) {
- fasta->id = id;
- last_id = fasta->id;
- } else {
- last_id->next = id;
- last_id = last_id->next;
- }
- str = pos+4;
- }
-
- MemFree(start);
-
- if(fasta->id == NULL) {
- MemFree(fasta);
- return NULL;
- }
-
- TmpSequence = MemNew(SeqSize + 5);
- TmpSequence[0] = NULLB;
-
-
- while(TRUE) {
- f_pos = ftell(fd);
- if(fgets(tmpbuff, sizeof (tmpbuff), fd) == NULL)
- break;
- if(StringStr(tmpbuff, ">gi|") != NULL) { /* new fasta entry started */
- fseek(fd, (long) f_pos, SEEK_SET);
- break;
- }
- StringCat(TmpSequence, tmpbuff);
- SeqSize += STRLENGTH + 5;
- TmpSequence = Realloc(TmpSequence, SeqSize);
- }
- fasta->sequence = TmpSequence;
- return fasta;
-}
-
-Int4 MergeFMergeID(FMergeIDPtr fasta, FILE *fd, FILE *fd_ind,
- Uint4Ptr index, Uint4 MaxIndexCount)
-{
- Boolean SomeNew = FALSE;
- MDeflineIDPtr id;
- Int4 TotalNot =0;
-
- id = fasta->id;
- while(id != NULL) {
- if(IfGiInIndex(index, MaxIndexCount, id->gi)) {
- id = id->next;
- continue;
- }
- TotalNot++;
-
- fprintf(fd_ind, "%ld\n", id->gi);
-
- if(SomeNew)
- fprintf(fd, "\1%s", id->defline);
- else {
- fprintf(fd, ">%s", id->defline);
- SomeNew = TRUE;
- }
- id = id->next;
- }
-
- if(SomeNew)
- fprintf(fd, "\n%s", fasta->sequence);
-
- FMergeIDFree(fasta);
- return TotalNot;
-}
-void FMergeIDFree(FMergeIDPtr fasta)
-{
- MDeflineIDPtr id, id1;
-
- MemFree(fasta->sequence);
- id = fasta->id;
- while(id != NULL) {
- MemFree(id->defline);
- id1 = id;
- id = id->next;
- MemFree(id1);
- }
- MemFree(fasta);
-}
-
diff --git a/demo/makemat.c b/demo/makemat.c
index dba93c25..49939238 100644
--- a/demo/makemat.c
+++ b/demo/makemat.c
@@ -1,4 +1,4 @@
-static char const rcsid[] = "$Id: makemat.c,v 6.13 2003/05/30 17:31:09 coulouri Exp $";
+static char const rcsid[] = "$Id: makemat.c,v 6.14 2004/05/14 12:10:06 camacho Exp $";
/*
* ===========================================================================
@@ -180,6 +180,10 @@ static Boolean takeMatrixCheckpoint(compactSearchItems * compactSearch,
localChar = getRes(compactSearch->query[i]);
fprintf(checkFile,"%c",localChar);
+
+ /* The following 2 lines are needed to preserve compatibility with the
+ * checkpoint file libraries distributed with IMPALA (from personal
+ * communication with IMPALA's author) */
posSearch->posMatrix[i][Xchar] = Xscore;
posSearch->posPrivateMatrix[i][Xchar] = Xscore * scalingFactor;
}
@@ -322,6 +326,8 @@ Nlm_FloatHi scalingFactor, Char *directoryPrefix)
BlastScoreBlkMatFill(sbp, underlyingMatrixName);
compactSearch->matrix = sbp->matrix;
compactSearch->gapped_calculation = TRUE;
+ /* Note that these two assignments are not really needed for
+ * makemat's operation and thus their values are irrelevant */
compactSearch->pseudoCountConst = 10;
compactSearch->ethresh = 0.001;
BlastScoreBlkFill(sbp, (CharPtr) query, queryLength, 0);
diff --git a/demo/megablast.c b/demo/megablast.c
index 06778dcc..d141a337 100644
--- a/demo/megablast.c
+++ b/demo/megablast.c
@@ -1,6 +1,6 @@
-static char const rcsid[] = "$Id: megablast.c,v 6.113 2004/04/29 19:56:00 dondosha Exp $";
+static char const rcsid[] = "$Id: megablast.c,v 6.114 2004/05/27 17:37:30 dondosha Exp $";
-/* $Id: megablast.c,v 6.113 2004/04/29 19:56:00 dondosha Exp $
+/* $Id: megablast.c,v 6.114 2004/05/27 17:37:30 dondosha Exp $
**************************************************************************
* *
* COPYRIGHT NOTICE *
@@ -28,6 +28,9 @@ static char const rcsid[] = "$Id: megablast.c,v 6.113 2004/04/29 19:56:00 dondos
**************************************************************************
* $Revision 6.13$ *
* $Log: megablast.c,v $
+ * Revision 6.114 2004/05/27 17:37:30 dondosha
+ * Do not call GapXEditBlockDelete in formatting callback - this is now done when HSPs are freed
+ *
* Revision 6.113 2004/04/29 19:56:00 dondosha
* Mask filtered locations in query sequence lines in XML output
*
@@ -653,7 +656,6 @@ MegaBlastPrintSegments(VoidPtr ptr)
hsp = search->current_hitlist->hsp_array[hsp_index];
if (hsp==NULL || (search->pbp->cutoff_e > 0 &&
hsp->evalue > search->pbp->cutoff_e)) {
- GapXEditBlockDelete(hsp->gap_info); /* Don't need it anymore */
continue;
}
context = hsp->context;
@@ -748,7 +750,6 @@ MegaBlastPrintSegments(VoidPtr ptr)
GXECollectDataForSeqalign(hsp->gap_info, hsp->gap_info->esp, numseg,
&start, &length, &strands,
&q_off, &hsp->subject.offset);
- GapXEditBlockDelete(hsp->gap_info); /* Don't need it anymore */
if (start[0] < 0) {
length[0] += start[0];
diff --git a/demo/tbl2asn.c b/demo/tbl2asn.c
index b95dc25c..3ea959d5 100644
--- a/demo/tbl2asn.c
+++ b/demo/tbl2asn.c
@@ -29,7 +29,7 @@
*
* Version Creation Date: 5/5/00
*
-* $Revision: 6.85 $
+* $Revision: 6.88 $
*
* File Description:
*
@@ -1428,12 +1428,14 @@ static Uint2 ProcessDeltaSet (
Uint2 entityID;
SeqEntryPtr firstsep, lastsep, nextsep, sep, topsep;
IntFuzzPtr ifp;
+ Boolean is_unk100;
+ ObjectIdPtr oip;
ObjMgrDataPtr omdptop;
ObjMgrData omdata;
Uint2 parenttype;
Pointer parentptr;
CharPtr seqbuf;
- SeqIdPtr sip;
+ SeqIdPtr sip, virtid;
SeqLitPtr slp;
ValNodePtr vnp;
@@ -1491,7 +1493,7 @@ static Uint2 ProcessDeltaSet (
bsp = FindNucBioseq (firstsep);
if (bsp == NULL) return 0;
- sip = SeqIdDup (bsp->id);
+ sip = SeqIdSetDup (bsp->id);
vnp = ValNodeExtract (&(bsp->descr), Seq_descr_title);
deltabsp = BioseqNew ();
@@ -1554,8 +1556,21 @@ static Uint2 ProcessDeltaSet (
slp->length = bsp->length;
ValNodeAddPointer ((ValNodePtr PNTR) &(deltabsp->seq_ext), (Int2) 2, (Pointer) slp);
- if (slp->length < 1) {
- slp->length = 0;
+
+ is_unk100 = FALSE;
+ virtid = bsp->id;
+ if (virtid != NULL && virtid->choice == SEQID_LOCAL) {
+ oip = (ObjectIdPtr) virtid->data.ptrvalue;
+ if (oip != NULL) {
+ if (StringCmp (oip->str, "unk100") == 0) {
+ is_unk100 = TRUE;
+ }
+ }
+ }
+ if (slp->length < 1 || is_unk100) {
+ if (slp->length < 1) {
+ slp->length = 0;
+ }
ifp = IntFuzzNew ();
ifp->choice = 4;
slp->fuzz = ifp;
@@ -3126,6 +3141,7 @@ Int2 Main (void)
{
AsnIoPtr aip = NULL;
+ Char app [64];
CharPtr base;
AsnTypePtr bssp_atp = NULL;
CitSubPtr csp;
@@ -3185,7 +3201,8 @@ Int2 Main (void)
/* process command line arguments */
- if (! GetArgs ("tbl2asn", sizeof (myargs) / sizeof (Args), myargs)) {
+ sprintf (app, "tbl2asn %s", TBL2ASN_APPLICATION);
+ if (! GetArgs (app, sizeof (myargs) / sizeof (Args), myargs)) {
return 0;
}
diff --git a/desktop/biosrc.c b/desktop/biosrc.c
index e745fdcd..9f6802e1 100644
--- a/desktop/biosrc.c
+++ b/desktop/biosrc.c
@@ -29,7 +29,7 @@
*
* Version Creation Date: 1/22/95
*
-* $Revision: 6.48 $
+* $Revision: 6.49 $
*
* File Description:
*
@@ -1907,11 +1907,11 @@ EnumFieldAssocPtr subsource_alists [] = {
};
Uint2 orgmod_widths [] = {
- 0, 15
+ 0, 25
};
Uint2 subsource_widths [] = {
- 0, 15
+ 0, 25
};
Uint2 orgmod_types [] = {
diff --git a/desktop/cdrgn.c b/desktop/cdrgn.c
index 587ced29..59282919 100644
--- a/desktop/cdrgn.c
+++ b/desktop/cdrgn.c
@@ -29,7 +29,7 @@
*
* Version Creation Date: 1/22/95
*
-* $Revision: 6.57 $
+* $Revision: 6.60 $
*
* File Description:
*
@@ -2272,9 +2272,9 @@ static DialoG CreateCdRgnDialog (GrouP h, CharPtr title, Int2 genCode,
x = HiddenGroup (cfp->protTextGrp, 2, 0, NULL);
SetGroupSpacing (x, 3, 5);
StaticPrompt (x, "Name", 0, dialogTextHeight, programFont, 'l');
- cfp->protNameText = DialogText (x, "", 15, NULL);
+ cfp->protNameText = DialogText (x, "", 25, NULL);
StaticPrompt (x, "Description", 0, dialogTextHeight, programFont, 'l');
- cfp->protDescText = DialogText (x, "", 15, NULL);
+ cfp->protDescText = DialogText (x, "", 25, NULL);
cfp->protPromptGrp = HiddenGroup (f, -1, 0, NULL);
StaticPrompt (cfp->protPromptGrp,
"Press Edit Protein Feature to change protein name",
@@ -2841,7 +2841,7 @@ extern ForM CreateCdRgnForm (Int2 left, Int2 top, CharPtr title,
StdFeatIntEdPartialCallback);
cfp->pages [LOCATION_PAGE] = s;
Hide (cfp->pages [LOCATION_PAGE]);
- cfp->locvisited = FALSE;
+ cfp->locvisited = TRUE;
AlignObjects (ALIGN_CENTER, (HANDLE) cfp->pages [CODING_REGION_PAGE],
(HANDLE) cfp->pages [COMMON_PAGE],
@@ -4210,11 +4210,11 @@ static DialoG CreateProtDialog (GrouP h, CharPtr title, ProtRefPtr prp, SeqFeatP
ppp->protGrp [0] = HiddenGroup (k, -1, 0, NULL);
g = HiddenGroup (ppp->protGrp [0], 0, 10, NULL);
StaticPrompt (g, "Protein Names", 0, 0, programFont, 'c');
- ppp->name = CreateVisibleStringDialog (g, 3, -1, 15);
+ ppp->name = CreateVisibleStringDialog (g, 3, -1, 25);
f = HiddenGroup (ppp->protGrp [0], 0, 4, NULL);
StaticPrompt (f, "Description", 0, dialogTextHeight, programFont, 'c');
- ppp->desc = DialogText (f, "", 20, NULL);
+ ppp->desc = DialogText (f, "", 25, NULL);
r = HiddenGroup (ppp->protGrp [0], 2, 0, NULL);
StaticPrompt (r, "Processing", 0, dialogTextHeight, programFont, 'l');
@@ -4242,7 +4242,7 @@ static DialoG CreateProtDialog (GrouP h, CharPtr title, ProtRefPtr prp, SeqFeatP
ppp->protGrp [2] = HiddenGroup (k, 0, 10, NULL);
StaticPrompt (ppp->protGrp [2], "Activity", 0, 0, programFont, 'c');
- ppp->activity = CreateVisibleStringDialog (ppp->protGrp [2], 3, -1, 15);
+ ppp->activity = CreateVisibleStringDialog (ppp->protGrp [2], 3, -1, 25);
Hide (ppp->protGrp [2]);
ppp->protGrp [3] = HiddenGroup (k, -1, 0, NULL);
@@ -5219,11 +5219,6 @@ static void PopulateAAPopup (PopuP AAitem)
sprintf (item, "%c %s", ch, str);
PopupItem (AAitem, item);
}
- i = '*';
- ch = GetSymbolForResidue (sctp, i);
- str = (CharPtr) GetNameForResidue (sctp, i);
- sprintf (item, "%c %s", ch, str);
- PopupItem (AAitem, item);
SetValue (AAitem, 1);
}
diff --git a/desktop/dlgutil1.c b/desktop/dlgutil1.c
index dbb332dd..86167dd2 100644
--- a/desktop/dlgutil1.c
+++ b/desktop/dlgutil1.c
@@ -29,7 +29,7 @@
*
* Version Creation Date: 1/22/95
*
-* $Revision: 6.43 $
+* $Revision: 6.44 $
*
* File Description:
*
@@ -50,6 +50,7 @@
#include <gbfeat.h>
#include <gbftdef.h>
#include <edutil.h>
+#include <explore.h>
#define NUMBER_OF_SUFFIXES 8
@@ -1008,9 +1009,12 @@ extern Boolean FeatFormReplaceWithoutUpdateProc (ForM f)
Char ch;
Char desc [128];
Int2 expev;
+ SeqMgrFeatContext fcontext;
FeatureFormPtr ffp;
+ SeqFeatPtr gene;
GeneGatherList ggl;
GeneRefPtr grp;
+ GeneRefPtr grpfeat;
GatherScope gs;
SeqLocPtr gslp;
Boolean hasNulls;
@@ -1029,6 +1033,7 @@ extern Boolean FeatFormReplaceWithoutUpdateProc (ForM f)
SeqEntryPtr sep;
SeqFeatPtr sfp;
SeqLocPtr slp;
+ CharPtr str;
Char symbol [128];
Int2 usexref;
Int2 val;
@@ -1152,7 +1157,17 @@ extern Boolean FeatFormReplaceWithoutUpdateProc (ForM f)
}
if (vnp != NULL) {
if (vnp->choice == 1) {
- grp = CreateNewGeneRef ((CharPtr) vnp->data.ptrvalue, NULL, NULL, FALSE);
+ str = (CharPtr) vnp->data.ptrvalue;
+ if (StringDoesHaveText (str)) {
+ grp = CreateNewGeneRef (str, NULL, NULL, FALSE);
+ gene = SeqMgrGetFeatureByLabel (bsp, str, SEQFEAT_GENE, 0, &fcontext);
+ if (gene != NULL && gene->data.choice == SEQFEAT_GENE) {
+ grpfeat = (GeneRefPtr) gene->data.value.ptrvalue;
+ if (grpfeat != NULL) {
+ grp->locus_tag = StringSaveNoNull (grpfeat->locus_tag);
+ }
+ }
+ }
} else if (vnp->choice == 3) {
grp = GeneRefNew ();
if (grp != NULL) {
diff --git a/desktop/dlgutil2.c b/desktop/dlgutil2.c
index a2a38bd8..30548432 100644
--- a/desktop/dlgutil2.c
+++ b/desktop/dlgutil2.c
@@ -29,7 +29,7 @@
*
* Version Creation Date: 1/22/95
*
-* $Revision: 6.48 $
+* $Revision: 6.49 $
*
* File Description:
*
@@ -1264,7 +1264,7 @@ static void ChangeCannedMessage (PopuP p)
SetStatus (ffp->exception, TRUE);
break;
case 5 :
- SetTitle (ffp->exceptText, "trans splicing");
+ SetTitle (ffp->exceptText, "trans-splicing");
SetStatus (ffp->exception, TRUE);
break;
case 6 :
@@ -1419,7 +1419,7 @@ extern GrouP CreateCommonFeatureGroupEx (GrouP h, FeatureFormPtr ffp,
PopupItem (canned, "RNA editing");
PopupItem (canned, "reasons given in citation");
PopupItem (canned, "ribosomal slippage");
- PopupItem (canned, "trans splicing");
+ PopupItem (canned, "trans-splicing");
PopupItem (canned, "artificial frameshift");
PopupItem (canned, "nonconsensus splice site");
PopupItem (canned, "rearrangement required");
@@ -1433,8 +1433,8 @@ extern GrouP CreateCommonFeatureGroupEx (GrouP h, FeatureFormPtr ffp,
} else if (StringICmp (sfp->except_text, "ribosomal slippage") == 0 ||
StringICmp (sfp->except_text, "ribosome slippage") == 0) {
SetValue (canned, 4);
- } else if (StringICmp (sfp->except_text, "trans splicing") == 0 ||
- StringICmp (sfp->except_text, "trans-splicing") == 0) {
+ } else if (StringICmp (sfp->except_text, "trans-splicing") == 0 ||
+ StringICmp (sfp->except_text, "trans splicing") == 0) {
SetValue (canned, 5);
} else if (StringICmp (sfp->except_text, "artificial frameshift") == 0) {
SetValue (canned, 6);
diff --git a/desktop/e2docsum.c b/desktop/e2docsum.c
index 2875ff75..659011df 100644
--- a/desktop/e2docsum.c
+++ b/desktop/e2docsum.c
@@ -29,7 +29,7 @@
*
* Version Creation Date: 10/30/01
*
-* $Revision: 6.48 $
+* $Revision: 6.49 $
*
* File Description:
*
@@ -1295,6 +1295,10 @@ static CharPtr Query_FetchFields (DoC d, Int2 item, Pointer ptr)
/* */
/*==================================================================*/
+static CharPtr file_is_too_long_mssg =
+"The record is too large to display in this format in the document summary window.\n\
+Please double click here to launch a separate viewer that can display this record.";
+
static CharPtr FileToString (CharPtr path)
{
@@ -1307,16 +1311,20 @@ static CharPtr FileToString (CharPtr path)
ptr = NULL;
len = FileLength (path);
if (len > 0 && len < MAXALLOC) {
- fp = FileOpen (path, "r");
- if (fp != NULL) {
- ptr = MemNew (sizeof (Char) * (size_t) (len + 4));
- if (ptr != NULL) {
- actual = FileRead (ptr, 1, (size_t) len, fp);
- if (actual > 0 && actual <= len) {
- ptr [actual] = '\0';
+ if (len > 65000) {
+ ptr = StringSave (file_is_too_long_mssg);
+ } else {
+ fp = FileOpen (path, "r");
+ if (fp != NULL) {
+ ptr = MemNew (sizeof (Char) * (size_t) (len + 4));
+ if (ptr != NULL) {
+ actual = FileRead (ptr, 1, (size_t) len, fp);
+ if (actual > 0 && actual <= len) {
+ ptr [actual] = '\0';
+ }
}
- }
FileClose (fp);
+ }
}
}
return ptr;
diff --git a/desktop/gbfview.c b/desktop/gbfview.c
index f6b8aaec..2671d9ce 100644
--- a/desktop/gbfview.c
+++ b/desktop/gbfview.c
@@ -29,7 +29,7 @@
*
* Version Creation Date: 2/5/97
*
-* $Revision: 6.73 $
+* $Revision: 6.75 $
*
* File Description:
*
@@ -896,37 +896,40 @@ static void LookForTpa (
static void PopulateFlatFile (BioseqViewPtr bvp, FmtType format, FlgType flags)
{
- BioseqPtr bsp;
- CstType custom = 0;
- DoC doc;
- Boolean doLockFarComponents = FALSE;
- Uint2 entityID;
- FonT fnt;
- FILE *fp;
- Boolean hastpaaligns;
- Int2 into;
- Boolean isAEorCH;
- Boolean isGED;
- Boolean isNTorNW;
- Boolean isNC;
- Boolean isTPA;
- Int2 item;
- ErrSev level;
- Boolean lockFar = FALSE;
- Boolean lookupFar = FALSE;
- ModType mode = SEQUIN_MODE;
- SeqEntryPtr oldsep;
- Char path [PATH_MAX];
- BaR sb = NULL;
- SeqEntryPtr sep;
- Int4 startsAt;
- CharPtr str;
- StlType style = NORMAL_STYLE;
- SeqViewProcsPtr svpp;
- SeqEntryPtr topsep;
- TexT txt;
- SeqEntryPtr usethetop = NULL;
- Int2 val;
+ BioseqPtr bsp;
+ SeqMgrFeatContext context;
+ CstType custom = 0;
+ DoC doc;
+ Boolean doLockFarComponents = FALSE;
+ Uint2 entityID;
+ Int4 feats_with_product_count;
+ FonT fnt;
+ FILE *fp;
+ Boolean hastpaaligns;
+ Int2 into;
+ Boolean isAEorCH;
+ Boolean isGED;
+ Boolean isNTorNW;
+ Boolean isNC;
+ Boolean isTPA;
+ Int2 item;
+ ErrSev level;
+ Boolean lockFar = FALSE;
+ Boolean lookupFar = FALSE;
+ ModType mode = SEQUIN_MODE;
+ SeqEntryPtr oldsep;
+ Char path [PATH_MAX];
+ BaR sb = NULL;
+ SeqEntryPtr sep;
+ SeqFeatPtr sfp;
+ Int4 startsAt;
+ CharPtr str;
+ StlType style = NORMAL_STYLE;
+ SeqViewProcsPtr svpp;
+ SeqEntryPtr topsep;
+ TexT txt;
+ SeqEntryPtr usethetop = NULL;
+ Int2 val;
if (bvp == NULL) return;
if (bvp->hasTargetControl && bvp->ffModeCtrl != NULL) {
@@ -1033,6 +1036,14 @@ static void PopulateFlatFile (BioseqViewPtr bvp, FmtType format, FlgType flags)
doLockFarComponents = TRUE;
}
}
+
+ if (mode == ENTREZ_MODE) {
+ doLockFarComponents = FALSE;
+ lockFar = FALSE;
+ lookupFar = FALSE;
+ flags = flags ^ (SHOW_CONTIG_FEATURES | SHOW_CONTIG_SOURCES | SHOW_FAR_TRANSLATION);
+ }
+
if (doLockFarComponents) {
entityID = ObjMgrGetEntityIDForPointer (bsp);
sep = GetTopSeqEntryForEntityID (entityID);
@@ -1040,6 +1051,20 @@ static void PopulateFlatFile (BioseqViewPtr bvp, FmtType format, FlgType flags)
bvp->bsplist = LockFarComponentsEx (sep, TRUE, FALSE, FALSE, NULL);
}
if (lookupFar) {
+ feats_with_product_count = 0;
+ sfp = SeqMgrGetNextFeature (bsp, NULL, 0, 0, &context);
+ while (sfp != NULL) {
+ if (sfp->product != NULL) {
+ feats_with_product_count++;
+ }
+ sfp = SeqMgrGetNextFeature (bsp, sfp, 0, 0, &context);
+ }
+ if (feats_with_product_count > 500) {
+ /* too many to lookup with current caching implementation */
+ lookupFar = FALSE;
+ }
+ }
+ if (lookupFar) {
hastpaaligns = FALSE;
VisitDescriptorsInSep (sep, (Pointer) &hastpaaligns, LookForTpa);
LookupFarSeqIDs (sep, TRUE, TRUE, TRUE, FALSE, hastpaaligns);
diff --git a/desktop/salsa.c b/desktop/salsa.c
index 9540fed9..90a3fa7f 100644
--- a/desktop/salsa.c
+++ b/desktop/salsa.c
@@ -28,7 +28,7 @@
*
* Version Creation Date: 1/27/96
*
-* $Revision: 6.159 $
+* $Revision: 6.160 $
*
* File Description:
*
@@ -3418,6 +3418,7 @@ static Boolean SetupAlignDataSap (EditAlignDataPtr adp, SeqAlignPtr salp_origina
if (adp == NULL || salp_original == NULL)
return FALSE;
+
/*************************************/
/** check if all ->type are NOT 0
*** if all 0 -> all cached from CN3D viewer
@@ -3428,6 +3429,12 @@ static Boolean SetupAlignDataSap (EditAlignDataPtr adp, SeqAlignPtr salp_origina
if (newsalp== NULL)
return FALSE;
/**************************************/
+ if (salp_original->segtype == SAS_DISC)
+ {
+ salp_original = (SeqAlignPtr) salp_original->segs;
+ }
+
+
if ( salp_original->segtype == 1 )
{
adp->blocks = create_list_alignedsegs (salp_original);
@@ -4110,6 +4117,7 @@ static ForM CreateSeqAlignEditForm (Int2 left, Int2 top, CharPtr windowname, Seq
if (salp==NULL)
return NULL;
+
moltype = SeqAlignMolType(salp);
if (moltype == 0)
return NULL;
@@ -5164,6 +5172,85 @@ extern Int2 LIBCALLBACK AlgEditFunc (Pointer data)
return OM_MSG_RET_ERROR;
}
+/* opens window for editing alignment */
+static Int2
+FinishOpeningEditAlignmentWindow
+(SeqAlignPtr salp,
+ OMProcControlPtr ompcp)
+{
+ SelStruct ss;
+ Char str [64];
+ WindoW w;
+ SeqEditViewFormPtr wdp = NULL;
+ OMUserDataPtr omudp;
+ SeqAnnotPtr sap;
+
+ ss.entityID = ompcp->input_entityID;
+ ss.itemID = ompcp->input_itemID;
+ ss.itemtype = ompcp->input_itemtype;
+ ss.regiontype =0;
+ ss.region = NULL;
+
+ SeqIdWrite (SeqAlignId(salp, 0), str, PRINTID_REPORT, 64);
+ w = (WindoW) CreateSeqAlignEditForm (-40, -90, str, salp, &ss);
+ if (w != NULL)
+ {
+ wdp = (SeqEditViewFormPtr) GetObjectExtra (w);
+ if (wdp != NULL)
+ {
+ wdp->input_entityID = ompcp->input_entityID;
+ wdp->input_itemID = ompcp->input_itemID;
+ wdp->input_itemtype = ompcp->input_itemtype;
+ wdp->this_itemtype = OBJ_SEQALIGN;
+ wdp->this_subtype = 0;
+ wdp->procid = ompcp->proc->procid;
+ wdp->proctype = ompcp->proc->proctype;
+ wdp->userkey = OMGetNextUserKey ();
+ omudp = ObjMgrAddUserData (ompcp->input_entityID, ompcp->proc->procid, OMPROC_EDIT, wdp->userkey);
+ if (omudp != NULL)
+ {
+ omudp->userdata.ptrvalue = (Pointer) wdp;
+ omudp->messagefunc = BioseqEditMsgFunc;
+ }
+ checkEntityIDForMsg (wdp);
+ }
+ Show (w);
+ Update ();
+ CaptureSlateFocus ((SlatE) wdp->pnl);
+ Select (w);
+
+ if (salp->segtype == 1) {
+ SeqAlignPtr tmp, newsalp;
+ Boolean ok;
+ EditAlignDataPtr adp;
+
+ adp = GetAlignDataPanel(wdp->pnl);
+ for ( tmp=salp; tmp!=NULL; tmp=tmp->next)
+ {
+ if (tmp->type<1)
+ {
+ sap=adp->sap_original;
+ if (sap)
+ {
+ newsalp = (SeqAlignPtr)sap->data;
+ ok = SeqAlignIDCache (newsalp, SeqAlignId (tmp, 1));
+ if (ok)
+ {
+/*
+ if (adp->sap1_original)
+ SeqAlignIDCache ((SeqAlignPtr)adp->sap1_original->data, SeqIdFindBest (bsp->id, 0));
+*/
+ repopulate_panel (w, adp, newsalp);
+ }
+ }
+ }
+ }
+ }
+ return OM_MSG_RET_DONE;
+ }
+ return OM_MSG_RET_ERROR;
+}
+
/************************************************
***
*** AnnotAlgEditFunc : to launch SEQANNOT editor
@@ -5171,14 +5258,10 @@ extern Int2 LIBCALLBACK AlgEditFunc (Pointer data)
************************************************/
extern Int2 LIBCALLBACK AnnotAlgEditFunc (Pointer data)
{
- WindoW w;
SeqAnnotPtr sap;
SeqAlignPtr salp = NULL;
- SeqEditViewFormPtr wdp = NULL;
OMProcControlPtr ompcp;
- OMUserDataPtr omudp;
- Char str [64];
- SelStruct ss;
+ Int2 rval = OM_MSG_RET_ERROR;
ompcp = (OMProcControlPtr) data;
if (ompcp == NULL || ompcp->proc == NULL) {
@@ -5217,70 +5300,19 @@ extern Int2 LIBCALLBACK AnnotAlgEditFunc (Pointer data)
if (salp == NULL)
return OM_MSG_RET_ERROR;
- ss.entityID = ompcp->input_entityID;
- ss.itemID = ompcp->input_itemID;
- ss.itemtype = ompcp->input_itemtype;
- ss.regiontype =0;
- ss.region = NULL;
-
- SeqIdWrite (SeqAlignId(salp, 0), str, PRINTID_REPORT, 64);
- w = (WindoW) CreateSeqAlignEditForm (-40, -90, str, salp, &ss);
- if (w != NULL)
- {
- wdp = (SeqEditViewFormPtr) GetObjectExtra (w);
- if (wdp != NULL)
- {
- wdp->input_entityID = ompcp->input_entityID;
- wdp->input_itemID = ompcp->input_itemID;
- wdp->input_itemtype = ompcp->input_itemtype;
- wdp->this_itemtype = OBJ_SEQALIGN;
- wdp->this_subtype = 0;
- wdp->procid = ompcp->proc->procid;
- wdp->proctype = ompcp->proc->proctype;
- wdp->userkey = OMGetNextUserKey ();
- omudp = ObjMgrAddUserData (ompcp->input_entityID, ompcp->proc->procid, OMPROC_EDIT, wdp->userkey);
- if (omudp != NULL)
- {
- omudp->userdata.ptrvalue = (Pointer) wdp;
- omudp->messagefunc = BioseqEditMsgFunc;
- }
- checkEntityIDForMsg (wdp);
- }
- Show (w);
- Update ();
- CaptureSlateFocus ((SlatE) wdp->pnl);
- Select (w);
-
-if (salp->segtype == 1) {
-SeqAlignPtr tmp, newsalp;
-Boolean ok;
-EditAlignDataPtr adp;
-
- adp = GetAlignDataPanel(wdp->pnl);
- for ( tmp=salp; tmp!=NULL; tmp=tmp->next)
+ if (salp->dim == 2)
{
- if (tmp->type<1)
- {
- sap=adp->sap_original;
- if (sap)
- {
- newsalp = (SeqAlignPtr)sap->data;
- ok = SeqAlignIDCache (newsalp, SeqAlignId (tmp, 1));
- if (ok)
- {
-/*
- if (adp->sap1_original)
- SeqAlignIDCache ((SeqAlignPtr)adp->sap1_original->data, SeqIdFindBest (bsp->id, 0));
-*/
- repopulate_panel (w, adp, newsalp);
- }
- }
- }
+ rval = FinishOpeningEditAlignmentWindow (salp, ompcp);
}
-}
- return OM_MSG_RET_DONE;
+ else
+ {
+ while (salp != NULL)
+ {
+ rval = FinishOpeningEditAlignmentWindow (salp, ompcp);
+ salp = salp->next;
+ }
}
- return OM_MSG_RET_ERROR;
+ return rval;
}
extern Int2 LIBCALLBACK AlgViewFunc (Pointer data)
diff --git a/desktop/seqpanel.c b/desktop/seqpanel.c
index 0d7797b2..3727bd00 100644
--- a/desktop/seqpanel.c
+++ b/desktop/seqpanel.c
@@ -1,4 +1,4 @@
-/* $Id: seqpanel.c,v 6.34 2004/04/13 16:52:26 bollin Exp $
+/* $Id: seqpanel.c,v 6.35 2004/05/20 20:10:20 bollin Exp $
* ===========================================================================
*
* PUBLIC DOMAIN NOTICE
@@ -42,7 +42,7 @@
enum ESeqNum { eNumNone=1, eNumSide=2, eNumTop=3 };
enum ELineType { eTypeTopSeqNumbers, eTypeTopScaleMarks, eTypeSequence,
- eTypeAlignSequence, eTypeFeature };
+ eTypeAlignSequence, eTypeFeature, eTypeAlignDivider };
enum EDrawGrid { eDrawGridOn=1, eDrawGridOff=2 };
enum EShowFeatures { eShowFeaturesOn=1, eShowFeaturesOff=2, eShowFeaturesAll=3 };
@@ -214,22 +214,57 @@ static SeqPanLinePtr PNTR CreateSeqPanelLines(Int2 lineLength, BioseqViewPtr bvp
Int4 lCount = 0, i, j;
Int4 pCount; /* Total number of paragraphs */
Int4 alnRows = -1;
- Int4 alnValidRows = 0;
+ Int4Ptr alnValidRows = NULL;
Int4 featRows = 1;
+ SeqAlignPtr tmp_salp;
+ Int4Ptr lines_per_alignment = NULL;
+ Int4 num_alignments;
+ Int4 aln_idx;
+ SeqParaGPtr PNTR ref_offset;
if (bvp->seqAlignMode) {
- fLines = GetValue(bvp->newNumControl) == eNumTop ? 2 : 0;
alnRows = AlnMgr2GetNumRows(bvp->salp); /* size of the alignment */
- if (bvp->viewWholeEntity || GetValue(bvp->newFeatControl) == eShowFeaturesAll) featRows = alnRows; /* show features for all rows */
- pCount = floor((AlnMgr2GetAlnLength(bvp->salp, FALSE)-1) / lineLength) + 1; /* alignment length */
- for (j = 1; j != alnRows + 1; j++) { /* AlnMgr counts from 1, not 0 */
- SeqIdPtr tmp_sip = AlnMgr2GetNthSeqIdPtr(bvp->salp, j);
- BioseqPtr tmp_bsp = BioseqLockById(tmp_sip);
- if (tmp_bsp != NULL) {
- alnValidRows++; /* count avaliable alignments */
- BioseqUnlock (tmp_bsp);
+ fLines = GetValue(bvp->newNumControl) == eNumTop ? 2 : 0;
+ pCount = 0;
+ num_alignments = 0;
+ for (tmp_salp = bvp->salp; tmp_salp != NULL; tmp_salp = tmp_salp->next)
+ {
+ num_alignments ++;
+ }
+ lines_per_alignment = (Int4Ptr) MemNew (sizeof (Int4) * num_alignments);
+ if (lines_per_alignment == NULL) return;
+ alnValidRows = (Int4Ptr) MemNew (sizeof (Int4) * num_alignments);
+ if (alnValidRows == NULL)
+ {
+ MemFree (lines_per_alignment);
+ return;
+ }
+ for (i=0; i < num_alignments; i++)
+ {
+ lines_per_alignment [i] = 0;
+ alnValidRows [i] = 0;
+ }
+ for (tmp_salp = bvp->salp, aln_idx = 0; tmp_salp != NULL; tmp_salp = tmp_salp->next, aln_idx++)
+ {
+ lines_per_alignment [aln_idx] = floor((AlnMgr2GetAlnLength(tmp_salp, FALSE)-1) / lineLength) + 1; /* alignment length */
+ pCount += lines_per_alignment [aln_idx];
+ if (bvp->viewWholeEntity || GetValue(bvp->newFeatControl) == eShowFeaturesAll)
+ {
+ if (featRows == 1)
+ featRows = alnRows;
+ else
+ featRows += alnRows;
+ } /* show features for all rows */
+
+ for (j = 1; j != alnRows + 1; j++) { /* AlnMgr counts from 1, not 0 */
+ SeqIdPtr tmp_sip = AlnMgr2GetNthSeqIdPtr(tmp_salp, j);
+ BioseqPtr tmp_bsp = BioseqLockById(tmp_sip);
+ if (tmp_bsp != NULL) {
+ alnValidRows [aln_idx]++; /* count avaliable alignments */
+ BioseqUnlock (tmp_bsp);
+ }
+ SeqIdFree (tmp_sip);
}
- SeqIdFree (tmp_sip);
}
} else {
fLines = GetValue(bvp->newNumControl) == eNumTop ? 3 : 1;
@@ -241,68 +276,112 @@ static SeqPanLinePtr PNTR CreateSeqPanelLines(Int2 lineLength, BioseqViewPtr bvp
ref[i] = (SeqParaGPtr) MemNew(sizeof(SeqParaG));
ref[i]->pFeatList = (ValNodePtr*) MemNew( (size_t)(sizeof(ValNodePtr)*featRows) );
}
+ ref_offset = ref;
if (GetValue(bvp->newFeatControl) != eShowFeaturesOff) {
if (bvp->seqAlignMode) { /* in alignment mode */
- for (i = 1; i != alnRows + 1; i++) {
- SeqIdPtr sip_tmp = AlnMgr2GetNthSeqIdPtr(bvp->salp, i);
- BioseqPtr bsp_tmp = BioseqLockById(sip_tmp);
-
- if (bsp_tmp != NULL) {
- if (bvp->viewWholeEntity || GetValue(bvp->newFeatControl) == eShowFeaturesAll) {
- FillFeatureInfo(bvp, bsp_tmp, lineLength, pCount, i, i - 1, ref); /* show features for each seq in alignment */
- } else if (i == bvp->TargetRow) {
- FillFeatureInfo(bvp, bsp_tmp, lineLength, pCount, i, 0, ref); /* show features for target seq in alignment */
- }
- BioseqUnlock (bsp_tmp);
- } /* bsp_tmp != NULL */
- SeqIdFree (sip_tmp);
- } /* for */
+ for (tmp_salp = bvp->salp, aln_idx = 0; tmp_salp != NULL; tmp_salp = tmp_salp->next, aln_idx ++)
+ {
+ for (i = 1; i != alnRows + 1; i++) {
+ SeqIdPtr sip_tmp = AlnMgr2GetNthSeqIdPtr(tmp_salp, i);
+ BioseqPtr bsp_tmp = BioseqLockById(sip_tmp);
+
+ if (bsp_tmp != NULL) {
+ /* TODO: Need to calculate better values for passing to FillFeatureInfo */
+ if (bvp->viewWholeEntity || GetValue(bvp->newFeatControl) == eShowFeaturesAll) {
+ FillFeatureInfo(bvp, bsp_tmp, lineLength, lines_per_alignment [aln_idx], i, i - 1, ref_offset); /* show features for each seq in alignment */
+ } else if (i == bvp->TargetRow) {
+ FillFeatureInfo(bvp, bsp_tmp, lineLength, lines_per_alignment [aln_idx], i, 0, ref_offset); /* show features for target seq in alignment */
+ }
+ BioseqUnlock (bsp_tmp);
+ } /* bsp_tmp != NULL */
+ SeqIdFree (sip_tmp);
+ } /* for */
+ ref_offset += lines_per_alignment [aln_idx];
+ }
} else {
FillFeatureInfo(bvp, bsp, lineLength, pCount, 0, 0, ref);
}
} /* done with features */
-
bvp->TotalLines = 0; /* go through all pararaphs and count total */
- for (i = 0; i < pCount; i++) {
- Int4 sub_total = 0;
- for (j = 0; j < featRows; j++) sub_total += ValNodeLen(ref[i]->pFeatList[j]);
- bvp->TotalLines += fLines + sub_total + (alnRows == -1 ? 0 : alnValidRows); /* reserve space for alignment */
+ if (bvp->seqAlignMode)
+ {
+ for (tmp_salp = bvp->salp, aln_idx = 0; tmp_salp != NULL; tmp_salp = tmp_salp->next, aln_idx ++)
+ {
+ for (i = 0; i < lines_per_alignment [aln_idx]; i++) {
+ Int4 sub_total = 0;
+ for (j = 0; j < featRows; j++) sub_total += ValNodeLen(ref[i]->pFeatList[j]);
+ bvp->TotalLines += fLines + sub_total + (alnRows == -1 ? 0 : alnValidRows[aln_idx]); /* reserve space for alignment */
+ }
+ /* add one more for divider */
+ if (tmp_salp->next != NULL) bvp->TotalLines ++;
+ }
+
+ }
+ else
+ {
+ for (i = 0; i < pCount; i++) {
+ Int4 sub_total = 0;
+ for (j = 0; j < featRows; j++) sub_total += ValNodeLen(ref[i]->pFeatList[j]);
+ bvp->TotalLines += fLines + sub_total;
+ }
}
+
splp = (SeqPanLinePtr*) MemNew( (size_t)(sizeof(SeqPanLinePtr)*bvp->TotalLines) );
- for (i = 0; i < pCount; i++) {
- if (fLines > 1) { /* Numbers on top */
- splp[lCount++] = MakeSeqPanLine(eTypeTopSeqNumbers, i);
- splp[lCount++] = MakeSeqPanLine(eTypeTopScaleMarks, i);
- }
+ if (bvp->seqAlignMode)
+ {
+ ref_offset = ref;
+ for (tmp_salp = bvp->salp, aln_idx = 0; tmp_salp != NULL; tmp_salp = tmp_salp->next, aln_idx ++)
+ {
+ for (i = 0; i < lines_per_alignment [aln_idx]; i++)
+ {
+ if (fLines > 1) { /* Numbers on top */
+ splp[lCount++] = MakeSeqPanLine(eTypeTopSeqNumbers, i);
+ splp[lCount++] = MakeSeqPanLine(eTypeTopScaleMarks, i);
+ }
- if (bvp->seqAlignMode) {
- for (j = 1; j != alnRows + 1; j++) { /* AlnMgr counts from 1, not 0 */
- SeqIdPtr tmp_sip = AlnMgr2GetNthSeqIdPtr(bvp->salp, j);
- BioseqPtr tmp_bsp = BioseqLockById(tmp_sip);
+ for (j = 1; j != alnRows + 1; j++) { /* AlnMgr counts from 1, not 0 */
+ SeqIdPtr tmp_sip = AlnMgr2GetNthSeqIdPtr(tmp_salp, j);
+ BioseqPtr tmp_bsp = BioseqLockById(tmp_sip);
- if (tmp_bsp != NULL) {
- SeqPanLinePtr plp = MakeSeqPanLine(eTypeAlignSequence, i); /* Add align sequence line*/
- plp->row = j; /* Index position in the alignment (row) */
- splp[lCount++] = plp;
- BioseqUnlock (tmp_bsp);
+ if (tmp_bsp != NULL) {
+ SeqPanLinePtr plp = MakeSeqPanLine(eTypeAlignSequence, i); /* Add align sequence line*/
+ plp->row = j; /* Index position in the alignment (row) */
+ splp[lCount++] = plp;
+ BioseqUnlock (tmp_bsp);
- if (bvp->viewWholeEntity || GetValue(bvp->newFeatControl) == eShowFeaturesAll) {
- MakeFeatureLine(&lCount, i, j, j - 1, ref, splp); /* Add feature line */
- } else if (j == bvp->TargetRow) {
- MakeFeatureLine(&lCount, i, j, 0, ref, splp);
+ if (bvp->viewWholeEntity || GetValue(bvp->newFeatControl) == eShowFeaturesAll) {
+ MakeFeatureLine(&lCount, i, j, j - 1, ref_offset, splp); /* Add feature line */
+ } else if (j == bvp->TargetRow) {
+ MakeFeatureLine(&lCount, i, j, 0, ref_offset, splp);
+ }
}
- }
- SeqIdFree(tmp_sip);
+ SeqIdFree(tmp_sip);
+ }
+ }
+ /* add divider line between alignments */
+ if (tmp_salp->next != NULL)
+ {
+ splp[lCount++] = MakeSeqPanLine (eTypeAlignDivider, i);
+ }
+ ref_offset += lines_per_alignment [aln_idx];
+ }
+
+ }
+ else
+ {
+ for (i = 0; i < pCount; i++) {
+ if (fLines > 1) { /* Numbers on top */
+ splp[lCount++] = MakeSeqPanLine(eTypeTopSeqNumbers, i);
+ splp[lCount++] = MakeSeqPanLine(eTypeTopScaleMarks, i);
}
- } else { /* Add usual sequence and features if not in alignment mode */
splp[lCount++] = MakeSeqPanLine(eTypeSequence, i); /* Add sequence line */
- MakeFeatureLine(&lCount, i, 0, 0, ref, splp); /* Add feature line */
- } /* bvp->seqAlignMode */
- } /* for */
+ MakeFeatureLine(&lCount, i, 0, 0, ref, splp); /* Add feature line */
+ }
+ }/* bvp->seqAlignMode */
for (i = 0; i < pCount; i++) {
for (j = 0; j < featRows; j++) ValNodeFree (ref[i]->pFeatList[j]);
@@ -310,6 +389,8 @@ static SeqPanLinePtr PNTR CreateSeqPanelLines(Int2 lineLength, BioseqViewPtr bvp
MemFree (ref[i]);
}
MemFree (ref);
+ MemFree (lines_per_alignment);
+ MemFree (alnValidRows);
return splp;
}
@@ -488,30 +569,111 @@ static void PopulateSeqView (BioseqViewPtr bvp)
PopulateSeqAlnView(bvp);
}
-
static void PopulateAlnView (BioseqViewPtr bvp)
{
SeqIdPtr sip = bvp->bsp->id;
+ SeqIdPtr tmpsip;
+ SeqLocPtr slp;
+ SeqEntryPtr sep;
+ SeqAlignPtr tmp_salp, next_salp;
+
bvp->seqAlignMode = TRUE;
bvp->SeqStartPosX = 150;
+
+ /* if we're switching between segments we might need to load a
+ * different alignment */
+ if (bvp->salp != NULL)
+ {
+ if ((bvp->bsp->repr != Seq_repr_seg && bvp->salp->next != NULL)
+ || (bvp->TargetRow = AlnMgr2GetFirstNForSip (bvp->salp, sip)) == -1)
+ {
+ bvp->salp = SeqAlignFree (bvp->salp);
+ }
+ else
+ {
+ PopulateSeqAlnView(bvp);
+ return;
+ }
+ }
+
if (bvp->salp == NULL && bvp->seqAlignMode) { /* Try to find an alignment */
- bvp->salp = SeqAlignListDup((SeqAlignPtr) FindSeqAlignInSeqEntry(bvp->bsp->seqentry, OBJ_SEQALIGN));
+ /* need conglomerate view if we're looking at a segmented sequence */
+ if (bvp->bsp->repr == Seq_repr_seg)
+ {
+ slp = (SeqLocPtr) bvp->bsp->seq_ext;
+ while (slp != NULL && bvp->salp == NULL)
+ {
+ tmpsip = SeqLocId (slp);
+ sep = SeqEntryFind (tmpsip);
+ bvp->salp = SeqAlignListDup((SeqAlignPtr) FindSeqAlignInSeqEntry(sep, OBJ_SEQALIGN));
+ slp = slp->next;
+ }
+ }
+ else
+ {
+ bvp->salp = SeqAlignListDup((SeqAlignPtr) FindSeqAlignInSeqEntry(bvp->bsp->seqentry, OBJ_SEQALIGN));
+ }
+
if (bvp->salp == NULL) { /* No alignment found or bug in AlignMgr (which is more likely). Switch to sequence mode */
PopulateSeqView(bvp);
return;
}
- if (bvp->salp->segtype == SAS_DENSEG && bvp->salp->next == NULL) {
- AlnMgr2IndexSingleChildSeqAlign(bvp->salp);
- } else {
- AlnMgr2IndexSeqAlign(bvp->salp);
+
+ if (bvp->bsp->repr == Seq_repr_seg)
+ {
+ /* if segmented set, index each segment individually */
+ tmp_salp = bvp->salp;
+ while (tmp_salp != NULL)
+ {
+ next_salp = tmp_salp->next;
+ tmp_salp->next = NULL;
+ if (tmp_salp->segtype == SAS_DENSEG) {
+ AlnMgr2IndexSingleChildSeqAlign(tmp_salp);
+ } else {
+ AlnMgr2IndexSeqAlign(tmp_salp);
+ }
+ tmp_salp->next = next_salp;
+ tmp_salp = next_salp;
+ }
+ }
+ else
+ {
+ /* do not incorporate other segments in segmented alignment */
+ if (!is_dim2seqalign (bvp->salp) && bvp->salp->next != NULL)
+ {
+ bvp->salp->next = SeqAlignFree (bvp->salp->next);
+ }
+
+ if (bvp->salp->segtype == SAS_DENSEG && bvp->salp->next == NULL) {
+ AlnMgr2IndexSingleChildSeqAlign(bvp->salp);
+ } else {
+ AlnMgr2IndexSeqAlign(bvp->salp);
+ }
}
}
bvp->TargetRow = ROW_UNDEFINED;
- while (sip && bvp->TargetRow == ROW_UNDEFINED) {
- bvp->TargetRow = AlnMgr2GetFirstNForSip(bvp->salp, sip);
- sip = sip->next;
+ if (bvp->bsp->repr == Seq_repr_seg)
+ {
+ slp = (SeqLocPtr) bvp->bsp->seq_ext;
+ while (slp != NULL && bvp->TargetRow == ROW_UNDEFINED)
+ {
+ tmpsip = SeqLocId (slp);
+ while (tmpsip && bvp->TargetRow == ROW_UNDEFINED)
+ {
+ bvp->TargetRow = AlnMgr2GetFirstNForSip(bvp->salp, tmpsip);
+ tmpsip = tmpsip->next;
+ }
+ slp = slp->next;
+ }
}
-
+ else
+ {
+ while (sip && bvp->TargetRow == ROW_UNDEFINED) {
+ bvp->TargetRow = AlnMgr2GetFirstNForSip(bvp->salp, sip);
+ sip = sip->next;
+ }
+ }
+
PopulateSeqAlnView(bvp);
}
@@ -557,6 +719,17 @@ static void DrawTopSeqNums(Int2 x, Int2 y, Int4 line, BioseqViewPtr bvp)
}
}
+static void DrawAlignmentDivider (Int2 x, Int2 y, BioseqViewPtr bvp)
+{
+ Int2 block, ctr=0;
+ char buf[20];
+
+ Magenta ();
+ for (block = 0; block != bvp->BlocksAtLine && ctr >= 0; block++) {
+ sprintf(buf, "~~~~~~~~~~");
+ PaintStringEx (buf, x+SEQ_X_OFFSET+bvp->SeqStartPosX+(block+1)*SEQ_GROUP_SIZE*bvp->CharWidth+block*bvp->CharWidth - bvp->CharWidth*StrLen(buf), y);
+ }
+}
static void DrawSeqSideLineNumbers(Int2 x, Int2 y, Int4 line, BioseqViewPtr bvp)
{
@@ -718,6 +891,7 @@ AlignmentIntervalToString
MemSet(alnbuf, '-', alnbuf_len); /* assume all gaps and fill the sequence later */
MemSet(seqbuf, 0, alnbuf_len);
+ if (target_row < 0) return;
if (stop > aln_len) {
MemSet (alnbuf + aln_len - start, 0, stop - aln_len);
@@ -847,20 +1021,28 @@ AlignmentIntervalToString
}
}
-static void DrawAlignment(Int2 x, Int2 y, Int4 line, Int4 row, Uint1Ptr buf, Uint1Ptr seqbuf, Uint1Ptr alnbuf, BioseqViewPtr bvp)
+static void DrawAlignment
+(Int2 x, Int2 y, Int4 line, Int4 row, Uint1Ptr buf, Uint1Ptr seqbuf,
+ Uint1Ptr alnbuf, BioseqViewPtr bvp, Int4 aln_idx)
{
- Int2 block;
- Char alnlabel[13];
- SeqIdPtr sip = AlnMgr2GetNthSeqIdPtr(bvp->salp, row);
- BioseqPtr bsp = BioseqLockById(sip);
- Int4 start = line * bvp->CharsAtLine;
- Int4 stop = start + bvp->BlocksAtLine * SEQ_GROUP_SIZE;
- Int4 alnbuf_len;
- SeqIdPtr best_id;
+ Int2 block;
+ Char alnlabel[13];
+ SeqIdPtr sip = AlnMgr2GetNthSeqIdPtr(bvp->salp, row);
+ BioseqPtr bsp = BioseqLockById(sip);
+ Int4 start = line * bvp->CharsAtLine;
+ Int4 stop = start + bvp->BlocksAtLine * SEQ_GROUP_SIZE;
+ Int4 alnbuf_len;
+ SeqIdPtr best_id;
+ SeqAlignPtr tmp_salp;
+ Int4 i;
+
+ for (i=0, tmp_salp = bvp->salp; i < aln_idx && tmp_salp != NULL; i++, tmp_salp = tmp_salp->next)
+ {
+ }
+ if (tmp_salp == NULL) return;
- AlignmentIntervalToString (bvp->salp, row, start, stop, bvp->TargetRow,
+ AlignmentIntervalToString (tmp_salp, row, start, stop, bvp->TargetRow,
bvp->viewWholeEntity, seqbuf, alnbuf, &alnbuf_len);
-
/* finally draw everything */
best_id = SeqIdFindBestAccession (bsp->id);
@@ -1148,6 +1330,8 @@ static void onDrawSeqPanel (PaneL p)
RecT r;
Int4 line;
Int2 x, y;
+ Int4 aln_idx;
+ Int4 start;
bvp = GetBioseqViewPtr (p);
bsp = bvp->bsp;
@@ -1165,7 +1349,18 @@ static void onDrawSeqPanel (PaneL p)
y = r.top + bvp->CharHeight + SEQ_Y_OFFSET;
SelectFont ((FonT)(bvp->displayFont));
- for (line = GetBarValue(sb); line < bvp->TotalLines && y <= r.bottom-2*SEQ_Y_OFFSET; line++) {
+ aln_idx = 0;
+ start = GetBarValue (sb);
+ for (line = 0; line < start && line < bvp->TotalLines; line++)
+ {
+ splp = bvp->SeqPanLines[line];
+ if (splp->lineType == eTypeAlignDivider)
+ {
+ aln_idx++;
+ }
+ }
+
+ for (line = start; line < bvp->TotalLines && y <= r.bottom-2*SEQ_Y_OFFSET; line++) {
if (IsInRange(y, updateRect.top,updateRect.bottom) ||
IsInRange(y+bvp->LineHeight,updateRect.top,updateRect.bottom))
{
@@ -1187,9 +1382,13 @@ static void onDrawSeqPanel (PaneL p)
break;
case eTypeAlignSequence:
DrawAlignSideLineNumbers(x, y, splp->bioSeqLine, splp->row, bvp);
- DrawAlignment(x, y, splp->bioSeqLine, splp->row, buf, seqbuf, alnbuf, bvp); /* Draw the alignment */
+ DrawAlignment(x, y, splp->bioSeqLine, splp->row, buf, seqbuf, alnbuf, bvp, aln_idx); /* Draw the alignment */
if (bvp->DrawGrid) DrawLtGrid(x, y+bvp->LineSpace/2, r.right, y+bvp->LineSpace/2);
break;
+ case eTypeAlignDivider:
+ DrawAlignmentDivider (x, y, bvp);
+ aln_idx++;
+ break;
case eTypeFeature:
if (bvp->DrawGrid) DrawLtGrid(x, y+bvp->LineSpace/2, r.right, y+bvp->LineSpace/2);
DrawFeature(x, y, splp->bioSeqLine, splp->row, splp->idx, splp->protProduct, bsp, bvp); /* Draw Features */
diff --git a/desktop/vsm.c b/desktop/vsm.c
index 01adaf06..a30aa559 100644
--- a/desktop/vsm.c
+++ b/desktop/vsm.c
@@ -29,7 +29,7 @@
*
* Version Creation Date: 11-29-94
*
-* $Revision: 6.15 $
+* $Revision: 6.16 $
*
* File Description:
*
@@ -1062,12 +1062,22 @@ static void VSMDragAndDrop(VSMWinPtr vsmwp, Uint2 entityID, Uint2 itemID, Uint2
ompc.input_itemtype = vsmwp->itemtype1;
ompc.do_not_reload_from_cache = TRUE;
+
if (! DetachDataForProc(&ompc, FALSE))
{
ErrShow();
return;
}
+ if (ompc.input_choicetype)
+ {
+ WarnIfAlignment (ompc.input_choicetype, ompc.input_choice, ompc.input_entityID);
+ }
+ else
+ {
+ WarnIfAlignment (ompc.input_itemtype, ompc.input_data, ompc.input_entityID);
+ }
+
if (! ompc.whole_entity) /* just a part gone, so need an update */
ObjMgrSendMsg(OM_MSG_DEL, ompc.input_entityID, ompc.input_itemID, ompc.input_itemtype);
diff --git a/doc/README.pbl b/doc/README.pbl
deleted file mode 100644
index 59bcb889..00000000
--- a/doc/README.pbl
+++ /dev/null
@@ -1,610 +0,0 @@
- PowerBLAST: RELEASE 2 (1/6/97)
- (README last modified 2/23/99)
-
-PowerBLAST can now be run with either the command line
-interface ('pblcmd') or a REAL graphical user interface
-('powblast').
-
-You still need connection to both network BLAST server and
-Entrez network service to run the program.
-
-Please read the manual carefully. If you have any questions
-about the display of the TEXT alignment, please check out
-the examples in this file.
-
-For questions and bug report, please send email to:
-blast-help@ncbi.nlm.nih.gov
-
-############################################################
- New Features
-############################################################
-1)Graphical User Interface.
-2)Search with Multiple BLAST Programs.
-3)Save the Settings for Both PowerBLAST Specific Options and
-BLAST Search Parameters.
-4)Options for Dumping out a HTML Page for TEXT Alignment
-5)Option for Monitoring the Search Process
-6)Better Manual
-
-############################################################
- Content of README
-############################################################
-The README includes
-1)Manual for Graphical User Interface (powblast)
-2)Manual for Command Line Interface (pblcmd)
-3)Examples for the TEXT output format
-4)Some Excerpt from the draft of the PowerBLAST paper that
-explains the system and algorithm of PowerBLAST
-
-
-
-############################################################
- Manual for Graphical User Interface
-############################################################
-PowerBLAST start up with a window for setting up the
-PowerBLAST special options as well as the BLAST search
-parameters.
-
-
-***********************
-Set up BLAST Parameters
-***********************
-Push the Button "Blast Program" on the top right of the
-window and you will have a new window titled "Parameter and
-DataBase for Blast Search". This window allows you to set
-parameters for multiple database searches with multiple
-BLAST programs. It has two sections. The top is to set
-parameters for searching against nucleotide databases, and
-the bottom for searching against protein databases.
-
-a)Select databases
-On the top section, you can select multiple databases by
-checking the boxes such as "nr", "sts", and "est". On the
-bottom, you can check the boxes such as "nr", "swissprot",
-and "pdb". In addition to the check boxes, you can type the
-name of a BLAST search database in the dialog box "other".
-That gives the flexibility of including a new search
-database if it is not covered by the check boxes.
-
-b)BLAST programs
-You can check "BLASTN" and "TBLASTN" for searching against
-nucleotide databases. You can check "BLASTX" and "BLASTP"
-for searching against protein databases. If none of the
-BLAST program is selected, PowerBLAST will run BLASTP for a
-protein query sequence and BLASTN for a nucleotide query
-sequence. PowerBLAST also checks the consistency between
-query and the selected BLAST program. For example, if the
-query sequence is a protein and if one of the selected
-program is BLASTN, it will skip the BLASTN option.
-
-c)Parameters for BLAST search
-Set the parameters for BLAST search in the dialog box
-following the program name. !!!!NOTE!!!! The behavior of an
-empty dialog box is different in this version than the
-previous one. If no parameter is set, PowerBLAST will use
-the default setting in the regular BLAST search. In the old
-version, the default for BLASTN and BLASTX were set with
-high cutoff score (M=1 N=-3 S=40 S2=40 for BLASTN and S=90
-S2=90 -filter=seg for BLASTX). If you are processing a large
-genomic sequence, those two sets of parameters work quite
-well, and you may consider to keep them as your default
-search parameter. For searching against protein databases,
-it will be good to set -filter=seg all the time to filter
-the low complexity regions in a protein sequence. If you
-have questions for setting up the BLAST search parameter,
-push the button "Help" at the bottom of the window to obtain
-the email address for help message of BLAST search.
-
-d)Action Buttons
-If you push the "Cancel" button, the option will be reverted
-to the previous setting. If you Push "Accept", the new
-setting will be effective. If you push "Help", you can get
-the email address for help message of BLAST search.
-
-^^^^
-TIPS
-^^^^
-If it is the FIRST time for you to run PowerBLAST, it is
-strongly recommended that you set up BLAST options and push
-"Accet". In the main window, push "Save Setting". The
-current setting will become the default setting when you run
-PowerBLAST next time. All you need to do is just load the
-query sequence.
-
-***********************
-Input Query sequences
-***********************
-You can load the query sequences either a) from an input
-file or b) by "pasting" to the Window. The input can be a
-FASTA formatted sequence file (contain one or multiple
-sequences), or a list of accession numbers, or a list of
-gis. If the input is from a FILE, it can also be a list of
-file names. If Accessions or gis are supplied, the query
-sequences will be fetched from the Entrez server directly.
-
-a) Type in the file name or Push the Button "Read Input
-File" to load the input file.
-
-b) Click at the empty panel underneath "Or Paste Query
-Formatted As:". This is important!!! If you don't place the
-cursor properly, you can paste the buffer in the wrong place
-!!! After that, go to the Pulldown Menu "Edit" and select
-"Paste" to paste the buffer to the panel. Only three formats
-are supported for this option: FASTA, GI or Accessions. The
-default is set to FASTA. You can specify the format by
-selecting from the pulldown list. If you Push the button
-"Clear Window", the panel will become empty again so you can
-correct the error.
-
-********
-Monitor
-********
-You can monitor the PowerBLAST search process by selecting
-"Use Monitor" or disable it by de-selecting the option. On
-Unix machines, sometimes, the monitor can be quite annoying.
-And you have to be prepared to Click OK when a query has no
-hit in the database.
-
-***********************
-Mask the Repeat Region
-***********************
-Type in the file name of a FASTA formatted repeat sequence
-library or load it by pushing the button "Mask Repeats". A
-sample file for human repeats, humrep.fsa, is supplied in the
-data directory.
-
-*****************
-Gapped Alignment
-*****************
-Select one of the Radio Buttons, "None", "SIM",
-"SIM2","SIM3", for Gapped Alignment Algorithm. SIM works for
-DNA-DNA and protein-protein alignment. SIM2 and SIM3 work
-for DNA-DNA alignment only. Note!! There is NO gapped
-alignment algorithm available for DNA-protein (BLASTX) and
-protein-DNA (TBLASTN) alignment.
-
-*********************
-Other Filter Options
-*********************
-Select "Low Complexity" region to filter the low complexity
-region in a DNA sequence with dust.
-Select "Self Hit" if your query is a GenBank sequence and
-you don't want to see any hits to its self.
-
-*************************
-Organism Specific Search
-*************************
-You can restrict the BLAST search results to include or
-exclude a specific group of organism. Use the radio button
-"None", "Include", "Exclude" to make the choice and put the
-organism name (either taxname, such as homo sapien or common
-name, such as mammal will do) in the dialog box.
-
-*******************
-Output File Format
-*******************
-The output from PowerBLAST will be saved in one or more
-selected file format: "TEXT" (file extention .ali), "HTML"
-(file extension .html), "Seq-align" (file extension .sat),
-and "Seq-entry' (file extension .ent). Both Seq-align(*.sat)
-and Seq-entry (*.ent) can only be viewed by Chromoscope. The
-TEXT file can be viewed directly and the HTML file can be
-opened with a WWW browser, such as netscape. It has hotlinks
-to the sequences in the public databases.
-
-***************
-Action Buttons
-***************
-The "Search" Button is disabled when there is no query
-sequence. It will be activated when there is an input file
-or the panel for pasting the results contains data.
-
-The "Save Setting" Button is very useful!!! It will save all
-the parameters that you have set (which include the BLAST
-search parameters as well as the other powerBLAST specific
-options) in the powblast configuration file and the next
-time you run the program, all the options will come up as
-the default setting. All you need to do is to load the query
-sequence. If you do NOT "Save Setting", next time, it will
-start with the same parameters as the current start-up.
-
-The "Quit" Button is used for quit the program.
-
-
-**********************
-Some Advanced Options
-**********************
-a)TEXT alignment
-The default formatting will display the annotated features
-together with alignments. You can disable this function by
-going to the Pulldown menu "Option" and deselect "Show
-Feature".
-b) Error Log file
-If you are processing a large amount of queries at one time,
-you may find the monitor annoying and turned it off.
-However, you may still want to know the records that found
-no hit. Go to the pulldown menu File and select "Save Error
-Log" to open an error log file for recording the sequences
-with no hit.
-
-
-############################################################
- Manual for Command Line Interface
-############################################################
-The Name of the Program is called "pblcmd". The argument
-list gets much more complicated compared with the previous
-version because new options are available
-To review the parameter list, type pblcmd -
-and you will get:
-
-power blast arguments:
-
- -i The file name for power blast job [String]
- -c Reset the options 0=No 1=Reset 2=Reset+Save 3=Modify
-4=Modify+Save
- [Integer] Optional
- default = 0
- range from 0 to 4
- -l The repeat FASTA library file for filtering [String]
-Optional
- -d dust the sequence before blast [T/F] Optional
- default = TRUE
- -f filter the blast output with the organism? 0=NO 1=Keep
-2=Filter
- [Integer] Optional
- default = 0
- range from 0 to 2
- -o the name for organism for filtering [String] Optional
- -s compute gapped alignment 0=No 1=sim1 2=sim2 3=sim3
-[Integer] Optional
- default = 0
- range from 0 to 3
- -a export the results as 1=text(*.ali) 8=HTML(*.html)
- 2=Seq-align(*.sat) 4=Seq-entry(*.ent)
- [Integer] Optional
- default = 0
- range from 0 to 15
- -b type of blast 0=default 1=blastn 2=blastp 4=blastx
-8=tblastn
- [Integer] Optional
- default = 0
- range from 0 to 15
- -N Search Nucleotide databases: 1=nr 2=est 4=sts 8=month
-16=htgs 32=vector
- 64=mito 128=kabat 512=pDB epd=1024 yeast=2048 gss=4096
-alu=8192
- [String] Optional
- default = 1
- -A Search Protein databases: 1=nr 8=month 128=kabat
-256=swissprot 512=pdb
- yeast=2048 alu=8192
- [String] Optional
- default = 1
- -n Parameters for BLASTN, use quote [String] Optional
- -x Parameters for BLASTX, use quote [String] Optional
- -p Parameters for BLASTP, use quote [String] Optional
- -t Parameters for TBLASTN, use quote [String] Optional
- -q filter out the GenBank query itself [T/F] Optional
- default = FALSE
- -m Enable the Monitor [T/F] Optional
- default = TRUE
-
-************
-OPTIONS
-************
--i: the file name for the query sequence(s), which can
- be FASTA formatted file with multiple sequences, a
- list of accessions/locus/gis, or a list of file
- names.
-
--c: options for save the settings
- -c0 take the default settings from the config
- file.
- -c1 reset all the parameters by taking the values
- from the command line
- -c2 same as c1 and save the settings to the
- config file.
- -c3 take the default settings from the config
- file, modify the values with the user setting in
- the command line
- -c4 same as c3 and save the settings to the
- config file
-
- If it is the FIRST time for you to run pblcmd, it
- is strongly recommended that you use -c2 to set up
- your options in the most of the search fields. The
-settings will be saved into the config file and
-next time you run the program, if you choose -c0,
-it will automatically set up the previous options
-as the default.
-This option is a little bit awkward. I tried to
-mimic the GUI interface for saving the settings
-and being able to modify the some but not all the
-values.
-
--l a FASTA formatted repeat library file for human
- repeats, humrep, is included in this package. If
- you want to filter human repeats, just do
-lhumrep.
-
--d -dT mask the low complexity region in DNA query
- sequences by the dust program. -dF no dusting
-
--f -f0 No organism filtering
- -f1 Include organism
- -f2 Exclude organism
-
--o Name of the organism. Use quotes. -o"human"
--s -s0 Do not run gapped alignment
- -s1 Run SIM
- -s2 Run SIM2
- -s3 Run SIM3
-
--a The format for output files. You can select to
- save multiple formats by adding the numbers
- together. If you select -a5, it will produce both
- the TEXT alignment (*.ali file) and the Seq-entry
- ASN.1 file (*.ent file)
- -a1 TEXT alignment with the extension .ali
- -a8 HTML page with the extension .html
--a2 ASN.1 Seq-align file with the extension .sat.
-You can view it in Chromoscope.
--a4 Seq-entry ASN.1 file with the extension .ent.
-You can view in Chromoscope.
-
--b BLAST programs. You can select multiple BLAST
- programs by adding the numbers together
- -b0 default. Use BLASTN for a DNA query and
- BLASTP for a Protein query.
- -b1 BLASTN
- -b2 BLASTP
- -b4 BLASTX
- -b8 TBLASTN
-PowerBLAST also checks the consistency between
-query and the selected BLAST program. For example,
-if the query sequence is a protein and if one of
-the selected program is BLASTN, it will skip the
-BLASTN option.
-
--N the Nucleotide Databases for BLAST Search. You can
- run searches against multiple databases by adding
- the numbers together.
-
--A the Protein Databases for BLAST Search. The
- settings are similar to -N.
-
--n Parameters for BLASTN search.
--x Parameters for BLASTX search.
--p Parameters for BLASTP search.
--t Parameters for TBLASTN search.
-
-!!!!NOTE!!!! The behavior of the unspecified
-choice for setting BLAST parameter is different in
-this version than the previous one. If no
-parameter is set, PowerBLAST will use the default
-setting in the regular BLAST search. In the old
-version, the default for BLASTN and BLASTX were
-set with high cutoff score (M=1 N=-3 S=40 S2=40
-for BLASTN and S=90 S2=90 -filter=seg for BLASTX).
-If you are processing a large genomic sequence,
-those two sets of parameters work quite well, and
-you may consider to keep them as your default
-search parameter. For searching against protein
-databases, it will be good to set -filter=seg all
-the time to filter the low complexity regions in a
-protein sequence.
-
--q -qT if the query is a GenBank sequence, filter the
- hits to itself
- -qF keep the hits
-
--m -mT monitor the process
- -mF turn off the monitor
-
-
-*******************
-EXAMPLE for pblcmd
-*******************
-pblcmd -iH_214K23.seq -c2 -lhumrep -dT -f1 -o"human" -s2 -a5
--b5 -N3 -A257 -n"M=1 N=-3 S=40 S2=40" -x"S=90 S2=90
--filter=seg" -mT
-
-For this setting, it will reset the parameters and save them
-into the configuration file. It takes the input sequence
-file H_214K23.seq, run against the human repeat library to
-find the repeats, mask the low complexity regions in the
-query with dust. Keep only the human hits from BLAST search.
-Run SIM2 to produce gapped alignment. Save the results in
-both the TEXT and the ASN.1 file. Search both nr and est
-database for BLASTN and the parameter for BLASTN is "M=1 N=-
-3 S=40 S2=40". For BLASTX, the parameter is "S=90 S2=90 -
-filter=seg". It will run with a monitor.
-
-It is a long parameter list. But once it is set with -c2,
-the next time, all you need is to run the search with pblcmd
--iinput file to get the same results.
-
-
-############################################################
- Examples for TEXT output of the Alignment
-###########################################################
-
-a) a simple DNA-DNA alignment
- 10 20 30 40
- | | | | | | | |
-12> 297 aattaaactgtatattctggataaataaaattatttcgac
-L24443> 1347 ........................................
-D31734> 1344 ........................................
-3'UTR > 1344 ****************************************
-polyA_sign > 1367 ******
-U25274> 1262 ...................a.....at...
-
-******************
-Sequence Identity
-******************
-In this output format, 12 is the query sequence. L24443,
-D31734, and U25274 are the BLAST hits. All the resides of
-the query sequence are displayed, while in the hit
-sequences, only the mismatched residues are displayed (the a
-and at in U25274). The identical residues are displayed as
-dots ".". The ">" symbol shows the orientation of the
-alignment. ">" for the plus strand and "<" for the minus
-strand. The number followint ">" indicates the position in
-the sequence.
-
-********************
-Feature Information
-********************
-For sequence D31734, there are two annotated features at the
-region where there is high similarity to the query. Both are
-marked as "*" underneath the DNA sequence. one is 3'UTR.
-3'UTR > 1344 ****************************************
-The other is the polyA signal
-polyA_sign > 1367 ******
-
-
-b) the combined view of BLASTN and BLASTX
-
-
- 10 20 30 40
- | | | | | | | |
-214K> 8837 ttgggtttctagactaaatacagtgtgggaatacacaata
-X03557> 192 ...aa..c......a.......................--
-56-KDa> 43 I E F L D K Y S V G I H N
- \
- |
- cc
-56-KDa> 48 T
-G05877> 24 ...an..c......a.......................--
- \
- |
- cc
-______________________________________________________
-frame=+1> I G F L D * I Q C G N T Q Y
-P09914 43 . E . . .
-307041 43 . E . . .
-A25407 43 . E . . .
-_______________________________________________________
-frame=+3 > W V S R L N T V W E Y T I
-P09914 42 Q I E F . D . K Y S V G .
-307041 42 Q I E F . D . K Y S V G .
-A25407 42 Q I E F . D . K Y S V G .
-
-
-The results from BLASTN and BLASTX are separated by the line
-____ into three panels. The top shows the results from
-BLASTN, the middle and the bottom show the results from
-BLASTX with frame = +1 and frame = +3, respectively.
-
-*******
-BLASTN
-*******
-The query sequence 214K has two BLASTN hits: X03557 and
-G05877 in this region. The gapped alignments were computed
-by SIM2, and the alignments were displayed as multiple pair-
-wise alignment. A gap on the master sequence, i.e. the query
-sequence, is displayed as an insertion in the matching
-sequence. At position 8852 of the query sequence, both of
-the hit sequences contain 2-bp insertions represented by \.
- |
- cc
-At the end of line, both have 2-bp gaps represented by
-dashes (--). In the aligned region, the mRNA sequence X03557
-has a coding region feature, which is presented by labeling
-each amino-acid in the middle of the 3-base codon. As a
-result, the protein sequence displayed in this panel is
-derived from the annotation on the DNA sequence.
-
-The BLASTX display, the conceptual translation with the
-specified reading frame is displayed underneath the
-separation line. The conceptual translation is compared with
-matching sequences from the protein database. Identical
-residues are labeled by dots. In this view, there are 3
-protein sequences, P09914, 307041, A25407, all of which
-align to the query sequence in both frame +1 and frame +3.
-The alignments for frame 1 translation stop at position 8852
-on the query sequence, which corresponds the 2-bp gap in the
-query sequence (displayed as 2-bp insertions on the matching
-sequences).
-
-
-############################################################
- Algorithms: Excerpt from the draft of PowerBLAST paper
-############################################################
-METHODS
-Figure 1 illustrates the data processes in PowerBlast. Prior to a BLAST search, SIM2
-computes repeat regions in the query sequence and the results are automatically annotated
-as repeat features in the query sequence. Those, together with the low complexity regions
-in a DNA sequence identified by dust (Kuzio, unpublished), are masked in a copy of the
-query sequence which will be sent to the BLAST server for database search. Four types of
-BLAST search may be conducted with PowerBLAST: BLASTN compares a nucleotide
-query to a nucleotide database; BLASTP compares a protein query to a protein database;
-BLASTX compares a translated nucleotide query to a protein database; TBLASTN
-compares a protein query to a translated nucleotide database. Large sequences are split
-into overlapping pieces and the results are merged at the end. An interface was developed
-to enable searches against multiple databases with multiple BLAST programs (Figure 2).
-Organism specific results can be obtained at any level of taxonomy index by filtering the
-HSP alignments inclusively or exclusively with Entrez Taxonomy Server. A suite of SIM
-algorithms (SIM, SIM2, SIM3) may be selected to compute more refined gapped
-alignments. The details of repeat filtering, process of large sequences, organism filtering
-and gapped alignments are described below.
-
-Filter Repeat Region
-To identify repeat regions in the query sequence, PowerBLAST uses the SIM2 algorithm
-to compute the top n non-intersecting gapped alignments between the query sequence and
-repeat sequences in a user supplied FASTA library file. A sample file for human repeat
-sequences, humrep (Makalowski, unpublished), is included in the package. In order to
-reduce false positive and false negative results, various parameters were tested in a
-experiment that compares the ALU repeats identified by SIM2 with the annotations in the
-public records ( Makalowski and Zhang, unpublished) and the optimal choice is the
-combination of scores>=20 and sequence identity > 65%. The end points of the
-alignments are taken as repeat regions, and if there are tandem repeats of the same repeat
-element, the leftmost and rightmost positions will be recorded as the end points of a
-single repeat region. The repeat regions will also be annotated automatically as features
-on the query sequence. Since repeat features are derived from the gapped alignments, the
-query sequence will be broken into overlapping pieces if its length exceeds 10,000bp
-because it is faster to compute alignments multiple times than to process the whole
-sequence at one time.
-
-Processing Large Genomic Sequence
-The memory and CPU-time requirements vary with the type of BLAST program as well
-as the composition and length of the query, PowerBlast uses an empirically derived
-maximum search size for each BLAST program. For BLASTN, the maximum size is
-8000bp; for BLASTP, it is 4000aa; for BLASTX, it is 3000bp; and for TBLASTN, it is
-2000aa. If the query sequence exceeds the threshold, it is broken into overlapping pieces
-and each piece is submitted as a separate query to the Network BLAST server. When the
-entire sequence is processed, the HSPs from the same match sequence are sorted by
-locations. If two neighboring HSPs overlap and cover the same diagonal, they will be
-merged into a larger HSP. The statistics from the HSP that has a higher score is assigned
-to the new HSP as an approximation of the real statistical value.
-
-Organism Filtering
-PowerBLAST employs two strategies for organism filtering to achieve the most efficient
-network communication with Entrez Taxonomy Server. If the selected organism has less
-than 1000 records in the public databases, all the Ids are loaded in memory. The BLAST
-hits will be compared locally with the list of the Ids. Otherwise, the Ids of the matching
-sequences will be sent over the network to Entrez server for evaluation. The user may
-choose either to include or exclude a certain taxonomy class.
-
-Gapped Alignment
-Three algorithms, SIM, SIM2, SIM3, can be selected to compute gapped alignments
-between the query sequence and the database matches. The original unmasked query
-sequence is used as the input to the SIM programs to ensure that the repeat regions are
-included in the alignments. SIM is a space efficient algorithm that generates the top n
-non-intersecting Smith-Waterman alignments between DNA-DNA or protein-protein
-sequences. However, it may be too slow for long sequences. SIM2 and SIM3 are much
-faster than SIM, but they only compute DNA-DNA alignments. SIM2 improves the speed
-by first constructing the n best non-intersecting chains of "fragments". It then applies the
-traditional dynamic programming algorithm to compute an optimal gapped alignment in a
-region delimited by the chain. SIM3 computes global alignments for sequences that have
-high similarity; it can be only used when a high cutoff score is set for the BLAST search.
-HSPs from a BLAST search supply the orientation and approximate range as input to the
-SIM programs so that the computation is much more efficient than aligning the entire
-sequences. They are sorted by location, and the gaps between the neighboring HSPs are
-analyzed to determine if more than one alignment needs to be computed because a large
-gap may impose a heavy penalty that terminates the alignment. The threshold is set to be
-200 with the default setting of the SIM programs. The ends of the HSPs are extended
-(1000 bp for DNA sequences, 100 aa for protein sequences) so that the SIM programs
-will be able to compute more accurate end points.
-
-
diff --git a/doc/asn2gb.txt b/doc/asn2gb.txt
index e5c09f6c..d4366d1e 100644
--- a/doc/asn2gb.txt
+++ b/doc/asn2gb.txt
@@ -113,15 +113,16 @@ Still others are expected to be rarely used, or are for testing new features.
#define DDBJ_VARIANT_FORMAT 4096
#define USE_OLD_SOURCE_ORG 8192
+GBSeq XML has been replaced by INSDSeq XML. The CREATE_XML_GBSEQ_FILE flag
+will actually produce INSDSeq. The original GBSeq can be generated during
+the transition period by adding the following flag.
-LOCKS are bits for controlling program performance, and are also ORed together.
+#define PRODUCE_OLD_GBSEQ 16384
-Sequin hangs onto a job pointer and does paragraph formatting in random order,
-so it should free the segment's SeqPort after each sequence block is printed.
-#define FREE_SEQPORT_EACH_TIME 1
+LOCKS are bits for controlling program performance, and are also ORed together.
-Another set is for locking far segmented or delta components, far feature
+One flag set is for locking far segmented or delta components, far feature
location Bioseqs, or far feature product Bioseqs in advance. This prevents
the object manager from uncaching components at an inopportune time, causing
unnecessary thrashing. Far component Bioseqs are needed for displaying the
@@ -131,7 +132,7 @@ sequence.
#define LOCK_FAR_LOCATIONS 4
#define LOCK_FAR_PRODUCTS 8
-A third set attempts to do bulk accession to gi lookups in advance, which is
+Another set attempts to do bulk accession to gi lookups in advance, which is
possible if PubSeqFetchEnable was called by the application. Remote fetching
in asn2gb uses this new access mechanism. Far component IDs are needed for
the CONTIG line, far location IDs for feature location joins, and far product
@@ -142,11 +143,6 @@ IDs for the /protein_id and /transcript_id accessions.
#define LOOKUP_FAR_PRODUCTS 64
#define LOOKUP_FAR_HISTORY 128
-A final flag causes the sequence to be read by streaming at setup time, to
-speed up calculation of the base count and formatting of the sequence blocks.
-
-#define STREAM_SEQ_PORT_FIRST 256
-
To use PubSeqFetchEnable, the application should #include <pmfapi.h>.
@@ -202,7 +198,7 @@ with the Sequin archive. The most commonly used arguments are shown below.
-i Input File Name
-o Output File Name
- -f Format (b GenBank, e EMBL, p GenPept, t Feature Table, x GBSet)
+ -f Format (b GenBank, e EMBL, p GenPept, t Feature Table, x INSDSet)
-m Mode (r Release, e Entrez, s Sequin, d Dump)
-s Style (n Normal, s Segment, m Master, c Contig)
-g Bit Flags (1 HTML, 2 XML, 4 ContigFeats, 8 ContigSrcs, 16 FarTransl)
diff --git a/doc/blast/bl2seq.html b/doc/blast/bl2seq.html
new file mode 100644
index 00000000..4861c704
--- /dev/null
+++ b/doc/blast/bl2seq.html
@@ -0,0 +1,57 @@
+<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN"
+ "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">
+
+<html xmlns="http://www.w3.org/1999/xhtml">
+ <head>
+ <meta name="generator"
+ content="HTML Tidy for Linux/x86 (vers 1st October 2002), see www.w3.org" />
+
+ <title></title>
+ </head>
+
+ <body>
+<pre>
+Bl2seq
+------
+
+Bl2seq performs a comparison between two sequences using either the blastn or
+blastp algorithm. Both sequences must be either nucleotides or proteins.
+The options may be obtained by executing 'bl2seq -'.
+
+ -i First sequence [File In]
+ -j Second sequence [File In]
+ -p Program name: blastp, blastn, blastx. For blastx 1st argument should be nucleotide [String]
+ default = blastp
+ -g Gapped [T/F]
+ default = T
+ -o alignment output file [File Out]
+ default = stdout
+ -d theor. db size (zero is real size) [Integer]
+ default = 0
+ -a SeqAnnot output file [File Out] Optional
+ -G Cost to open a gap (zero invokes default behavior) [Integer]
+ default = 0
+ -E Cost to extend a gap (zero invokes default behavior) [Integer]
+ default = 0
+ -X X dropoff value for gapped alignment (in bits) (zero invokes default behavior) [Integer]
+ default = 0
+ -W Wordsize (zero invokes default behavior) [Integer]
+ default = 0
+ -M Matrix [String]
+ default = BLOSUM62
+ -q Penalty for a nucleotide mismatch (blastn only) [Integer]
+ default = -3
+ -r Reward for a nucleotide match (blastn only) [Integer]
+ default = 1
+ -F Filter query sequence (DUST with blastn, SEG with others) [String]
+ default = T
+ -e Expectation value (E) [Real]
+ default = 10.0
+ -S Query strands to search against database (blastn only). 3 is both, 1 is top, 2 is bottom [Integer]
+ default = 3
+ -T Produce HTML output [T/F]
+ default = F
+</pre>
+ </body>
+</html>
+
diff --git a/doc/blast/blast.html b/doc/blast/blast.html
index 8726dcea..b5520e02 100644
--- a/doc/blast/blast.html
+++ b/doc/blast/blast.html
@@ -1,14 +1,39 @@
+<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN"
+ "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">
+
+<html xmlns="http://www.w3.org/1999/xhtml">
+ <head>
+ <meta name="generator"
+ content="HTML Tidy for Linux/x86 (vers 1st October 2002), see www.w3.org" />
+
+ <title></title>
+ </head>
+
+ <body>
+<pre>
README for stand-alone BLAST
-$Date: 2004/04/12 15:06:00 $
+$Date: 2004/05/17 15:07:45 $
+
+Table of contents
+-----------------
+1. Introduction
+1. Available platforms
+2. Getting the BLAST software
+3a. Configuration for UNIX-like systems
+3b. Configuration for Windows
+4. Downloading databases
+
+Introduction
+------------
-This document provides information on stand-alone BLAST. Topics covered are
-setting up stand-alone BLAST, command-line options for stand-alone BLAST,
-and a release history of the different versions.
+
+1. Available platforms
+----------------------
NCBI provides binaries for the following platforms:
-Apple MacOS X (ppc)
+Apple MacOS X (ppc32)
FreeBSD 4.5 (ia32)
HP HPUX 11 (ia64)
HP Tru64 5.1 (alpha)
@@ -21,13 +46,22 @@ Sun Solaris 8 (sparc64)
We will attempt to produce binaries for other platforms upon request.
-Stand-alone binaries are available from ftp://ftp.ncbi.nlm.nih.gov/blast/executables/
+2. Getting the BLAST software
+-----------------------------
-Please remember to FTP in binary mode.
+Binaries are available from:
+ftp://ftp.ncbi.nlm.nih.gov/blast/executables/LATEST-BLAST/
-Setting up Standalone BLAST for UNIX:
--=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-
+Filenames are of the following form:
+
+program-version-architecture-os.extension
+
+
+Please remember to FTP in binary mode.
+
+3. Configuration for UNIX-like systems
+--------------------------------------
Basically, there are three steps needed to setup the Standalone BLAST
executable for the UNIX platform.
@@ -97,7 +131,7 @@ may be easier to 'cheat' here and just extract a portion of a
nucleotide sequence you know is in the downloaded ecoli.nt database.
Make a text file called test.txt with the following sequence:
->Test
+&gt;Test
AGCTTTTCATTCTGACTGCAACGGGCAATATGTCTCTGTGTGGATTAAAAAAAGAGTGTCTGATAGCAGC
TTCTGAACTGGTTACCTGCCGTGAGTAAATTAAAATTTTATTGACTTAGGTCACTAAATACTTTAACCAA
TATAGGCATAGCGCACAGACAGATAAAAATTACAGAGTACACAACATCCATGAAACGCATTAGCACCACC
@@ -126,14 +160,13 @@ If you have any questions please send them to the
blast-help@ncbi.nlm.nih.gov e-mail address.
-Setting up Standalone BLAST for Windows
--=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-
+3b. Configuration for Windows
+-----------------------------
-There are three steps needed to setup the Standalone BLAST
-executable.
+There are three steps needed to setup the Standalone BLAST executable.
-1) Download and compress the Standalone BLAST Windows binary
-blastcz.exe. We suggest doing this in it's own directory, perhaps called
+1) Download and compress the Standalone BLAST Windows binary.
+ We suggest doing this in it's own directory, perhaps called
blast. This is a 'self-extracting' archive and all you need to do is run
this either through a Command Prompt (DOS Prompt) or by selecting "Run"
from the Windows "Start button" and browsing the blastcz.exe file.
@@ -207,7 +240,7 @@ may be easier to 'cheat' here and just extract a portion of a
nucleotide sequence you know is in the downloaded ecoli.nt database.
So make a text file called test.txt with the following sequence:
->Test
+&gt;Test
AGCTTTTCATTCTGACTGCAACGGGCAATATGTCTCTGTGTGGATTAAAAAAAGAGTGTCTGATAGCAGC
TTCTGAACTGGTTACCTGCCGTGAGTAAATTAAAATTTTATTGACTTAGGTCACTAAATACTTTAACCAA
TATAGGCATAGCGCACAGACAGATAAAAATTACAGAGTACACAACATCCATGAAACGCATTAGCACCACC
@@ -250,7 +283,7 @@ System recommendations:
BLAST uses memory-mapped files (on UNIX and NT systems), so it runs best if
it can read the entire BLAST database into memory, then keep on using it
there. Resources consumed reading a database into memory can easily
-outweight the cost of a BLAST search, so that the memory of a machine is
+outweigh the cost of a BLAST search, so that the memory of a machine is
normally more important than the CPU speed. This means that one should have
sufficient memory for the largest BLAST database one will use, then run all
the searches against this databases in serial, then run queries against
@@ -279,1449 +312,7 @@ limit datasize unlimited
Note that this change only applies to the current session, so it is advisable to place
this command in some file sourced at startup, such as .login or .cshrc.
+</pre>
+ </body>
+</html>
-
-BLAST OPTIONS
--------------
-
-Formatdb
---------
-
-There is now a separate document describing formatdb (README.formatdb). Please
-refer to it for information on formatting FASTA files for BLAST searches.
-
-
-Blastall
---------
-
-Blastall may be used to perform all five flavors of blast comparison. One
-may obtain the blastall options by executing 'blastall -' (note the dash). A
-typical use of blastall would be to perform a blastn search (nucl. vs. nucl.)
-of a file called QUERY would be:
-
-blastall -p blastn -d nr -i QUERY -o out.QUERY
-
-The output is placed into the output file out.QUERY and the search is performed
-against the 'nr' database. If a protein vs. protein search is desired,
-then 'blastn' should be replaced with 'blastp' etc.
-
-Some of the most commonly used blastall options are:
-
-blastall arguments:
-
- -p Program Name [String]
-
- Input should be one of "blastp", "blastn", "blastx", "tblastn", or "tblastx".
-
- -d Database [String]
- default = nr
-
- The database specified must first be formatted with formatdb.
- Multiple database names (bracketed by quotations) will be accepted.
- An example would be
-
- -d "nr est"
-
- which will search both the nr and est databases, presenting the results as if one
- 'virtual' database consisting of all the entries from both were searched. The
- statistics are based on the 'virtual' database of nr and est.
-
- -i Query File [File In]
- default = stdin
-
- The query should be in FASTA format. If multiple FASTA entries are in the input
- file, all queries will be searched.
-
- -e Expectation value (E) [Real]
- default = 10.0
-
- -o BLAST report Output File [File Out] Optional
- default = stdout
-
- -F Filter query sequence (DUST with blastn, SEG with others) [String]
- default = T
-
- BLAST 2.0 and 2.1 uses the dust low-complexity filter for blastn and seg for the
- other programs. Both 'dust' and 'seg' are integral parts of the NCBI toolkit
- and are accessed automatically.
-
- If one uses "-F T" then normal filtering by seg or dust (for blastn)
- occurs (likewise "-F F" means no filtering whatsoever).
-
- This options also takes a string as an argument. One may use such a
- string to change the specific parameters of seg or invoke other filters.
- Please see the "Filtering Strings" section (below) for details.
-
- -S Query strands to search against database (for blast[nx], and tblastx). 3 is both, 1 is top, 2 is bottom [Integer]
- default = 3
-
- -T Produce HTML output [T/F]
- default = F
-
- -l Restrict search of database to list of GI's [String] Optional
-
- This option specifies that only a subset of the database should be
- searched, determined by the list of gi's (i.e., NCBI identifiers) in a
- file. One can obtain a list of gi's for a given Entrez query from
- http://www.ncbi.nlm.nih.gov/Entrez/batch.html. This file should
- be in the same directory as the database, or in the directory that
- BLAST is called from.
-
- -U Use lower case filtering of FASTA sequence [T/F] Optional
- default = F
-
- This option specifies that any lower-case letters in the input FASTA file
- should be masked.
-
-
- Documentation for PSI-TBLASTN
-
-PSI-BLASTN is a variant of blastall that searches a protein query
-sequence against a nucleotide sequence database using a position
-specific matrix created by PSI-BLAST. The nucleotide sequence database
-is dynamically translated in all reading frames during PSI-TBLASTN
-search. Using a position specific matrix may enable finding more
-distantly related sequences.
-
-Programs:
-blastpgp [takes a protein query and perform PSI-BLAST search to
- creates a position specific matrix using a protein
- database]
-
-blastall [reads position specific matrix and performs PSI-TBLASTN
- search]
-
-Usage:
-A user would typically run blastpgp to create and save a position
-specific matrix, followed by a run of blastall for PSI-TBLASTN search.
-
-blastpgp must be executed with -C option followed by a file name to
-save position specific score matrix.
-
-blastall with "-p psitblastn" option executes PSI-TBLASTSN search, and
--R option followed by a file name specifying the file that contains
-position specific score matrix. All other options that apply when
-using "blastall -p tblastn ..." also apply when using "blastall -p
-psitblastn ...", but there are some restrictions to parameters: 1) The
-query must be the same as the one used in blastpgp for creating a
-position specific matrix. 2) By default, blastpgp has filtering off
-(-F F) and blastall has filtering on (-F T). To ensure consistent
-usage of the blastpgp/psitblastn combination, the -F option should be
-explicitly set in one or the other run.
-
-
-Example:
-One may run PSI-BLST to create and save a position specific score matrix
-as follows:
-
- blastpgp -d nr -i ff.chd -j 2 -C ff.chd.ckp
-
-Position specific score matrix is saved in ff.chd.ckp. Then, using
-this matrix, one may run PSI-TBLASTN search:
-
- blastall -i ff.chd -d yeast -p psitblastn -R ff.chd.ckp
-
-Note that this allows the score matrix to be constructed using one
-database (nr in the example) and then used to search a second database
-(yeast in the example). Even if the two database names are the same,
-blastpgp uses the protein version while "blastall -p psitblastn" uses
-the DNA version.
-
-
-
-Blastpgp
---------
-
-Blastpgp performs gapped blastp searches and can be used to perform
-iterative searches in psi-blast and phi-blast mode. See the PSI-Blast and
-PHI-BLAST sections (below) for a description of this binary. The options may be
-obtained by executing 'blastpgp -'.
-
- -T Produce HTML output [T/F]
- default = F
-
- -Q Output File for PSI-BLAST Matrix in ASCII [File Out] Optional
-
-Bl2seq
-------
-
-Bl2seq performs a comparison between two sequences using either the blastn or
-blastp algorithm. Both sequences must be either nucleotides or proteins.
-The options may be obtained by executing 'bl2seq -'.
-
- -i First sequence [File In]
- -j Second sequence [File In]
- -p Program name: blastp, blastn, blastx. For blastx 1st argument should be nucleotide [String]
- default = blastp
- -g Gapped [T/F]
- default = T
- -o alignment output file [File Out]
- default = stdout
- -d theor. db size (zero is real size) [Integer]
- default = 0
- -a SeqAnnot output file [File Out] Optional
- -G Cost to open a gap (zero invokes default behavior) [Integer]
- default = 0
- -E Cost to extend a gap (zero invokes default behavior) [Integer]
- default = 0
- -X X dropoff value for gapped alignment (in bits) (zero invokes default behavior) [Integer]
- default = 0
- -W Wordsize (zero invokes default behavior) [Integer]
- default = 0
- -M Matrix [String]
- default = BLOSUM62
- -q Penalty for a nucleotide mismatch (blastn only) [Integer]
- default = -3
- -r Reward for a nucleotide match (blastn only) [Integer]
- default = 1
- -F Filter query sequence (DUST with blastn, SEG with others) [String]
- default = T
- -e Expectation value (E) [Real]
- default = 10.0
- -S Query strands to search against database (blastn only). 3 is both, 1 is top, 2 is bottom [Integer]
- default = 3
- -T Produce HTML output [T/F]
- default = F
-
-
-Fastacmd
---------
-
-There is now a separate document describing fastacmd (README.fastacmd). Please
-refer to it for information on using this tool.
-
-
-Filtering Strings
------------------
-
- The -F argument can take a string as input specifying that seg should be
- run with certain values or that other non-standard filters should be used.
- This sections describes this syntax.
-
- The seg options can be changed by using:
-
- -F "S 10 1.0 1.5"
-
- which specifies a window of 10, locut of 1.0 and hicut of 1.5.
-
- A coiled-coiled filter, based on the work of Lupas et al. (Science, vol 252, pp. 1162-4 (1991))
- and written by John Kuzio (Wilson et al., J Gen Virol, vol. 76, pp. 2923-32 (1995)), may be invoked
- by specifying:
-
- -F "C"
-
- There are three parameters for this: window, cutoff (prob of a coil-coil), and
- linker (distance between two coiled-coiled regions that should be linked
- together). These are now set to
-
- window: 22
- cutoff: 40.0
- linker: 32
-
- One may also change the coiled-coiled parameters in a manner analogous to
- that of seg:
-
- -F "C 28 40.0 32" will change the window to 28.
-
- One may also run both seg and coiled-coiled together by using a ";":
-
- -F "C;S"
-
- Filtering by dust may also be specified by:
-
- -F "D"
-
- It is possible to specify that the masking should only be done during
- the process of building the initial words by starting the filtering
- command with 'm', e.g.:
-
- -F "m S"
-
- which specifies that seg (with default arguments) should be used for masking,
- but that the masking should only be done when the words are being built.
- This masking option is available with all filters.
-
- If the -U option (to mask any lower-case sequence in the input FASTA file) is used and
- one does not wish any other filtering, but does wish to mask when building the lookup tables
- then one should specify:
-
- -F "m"
-
- This is the only case where "m" should be specified alone.
-
-
-PSI-Blast
----------
-
-The blastpgp program can do an iterative search in which
-sequences found in one round of searching are used to build
-a score model for the next round of searching. In this usage,
-the program is called Position-Specific Iterated BLAST, or PSI-BLAST.
-As explained in the accompanying paper, the BLAST algorithm is
-not tied to a specific score matrix. Traditionally, it has been
-implemented using an AxA substitution matrix where A is the alphabet size.
-PSI-BLAST instead uses a QxA matrix, where Q is the length of the query
-sequence; at each position the cost of a letter depends on the position
-w.r.t. the query and the letter in the subject sequence.
-
-The position-specific matrix for round i+1 is built from a constrained
-multiple alignment among the query and the sequences found with
-sufficiently low e-value in round i. The top part of the output for
-each round distinguishes the sequences into: sequences found
-previously and used in the score model, and sequences not used in the
-score model. The output currently includes lots of diagnostics
-requested by users at NCBI. To skip quickly from the output of
-one round to the next, search for the string "producing", which is
-part of the header for each round and likely does not appear elsewhere
-in the output. PSI-BLAST "converges" and stops if all sequences
-found at round i+1 below the e-value threshold were already in
-the model at the beginning of the round.
-
-There are several blastpgp parameters specifically for PSI-BLAST:
--j is the maximum number of rounds (default 1; i.e., regular BLAST)
--h is the e-value threshold for including sequences in the
- score matrix model (default 0.001)
--c is the "constant" used in the pseudocount formula specified in the
- paper (default 10)
-
-The -C and -R flags provide a "checkpointing" facility whereby
-a score model can be stored and later reused.
- -C stores the query and frequency count ratio matrix in a
- file
- -R restarts from a file stored previously.
-When using -R, it is required that the query specified on the command line
-match exactly the query in the restart file.
-The checkpoint files are stored in a byte-encoded (not human readable)
-format, so as to prevent roundoff error between writing and reading
-the checkpoint.
-Users who also develop their own sequence analysis software may wish
-to develop their own scoring systems. For this purpose the code
-in posit.c that writes out the checkpoint can be easily adapated to
-write out scoring systems derived by other algorithms in such
-a way that PSI-BLAST can read the files in later.
-The checkpoint structure is general in the sense that it can handle
-any position-specific matrix that fits in the Karlin-Altschul
-statistical framework for BLAST scoring.
-
-The -B flag provides a way to jump start PSI-BLAST from a master-slave
-multiple alignment computed outside PSI-BLAST. The multiple alignment
-must include the query sequence as one of the sequences, but it need
-not be the first sequence. The multiple alignment must be specified
-in a format that is derived from Clustal, but without some headers and
-trailers. See example below. The rules are also described by the
-following words. Suppose the multiple alignments has N sequences. It
-may be presented in 1 or more blocks, where each block presents a
-range of columns from the multiple alignment. E.g., the first block
-might have columns 1-60, the second block might have columns 61-95,
-the third block might have columns 96-128. Each block should have N
-rows, 1 row per sequence. The sequences should be in the same order
-in every block. Blocks are separated by 1 or more blank lines.
-Within a block there are no blank lines, and each line consists of 1
-sequence identifier followed by some white space followed by
-characters (and gaps) for that sequence in the multiple alignment. In
-each column, all letters must be in upper case, or all letters must be
-in lower case. Upper case means that this column is to be given
-position-specific scores. Lower-case means to use the underlying
-matrix (specified by -M) for this column; e.g., if the query sequence
-has an 'l' residue in the column, then the standard scores for
-matching an L are used in the column.
-
-A sample usage would be:
-
- blastpgp -i seq1 -B align1 -j 2 -d nr
-
-where seq1 is the query
- align1 is the alignment file
- -j 2 indicates to do 2 rounds
- -d nr indicates to use the nr database
-
-The example files
- seq1
- align1
-copied below were kindly supplied by L. Aravind from a paper
-he and Chris Ponting published in Protein Science:
-
-Aravind L, Ponting CP, Homologues of 26S proteasome subunits
-are regulators of transcription and translation, Protein Science
-7(1998) 1250-1254.
-
-L. Aravind (aravind@ncbi.nlm.nih.gov) was the first user
-and helped define how -B should work. Y. Wolf (wolf@ncbi.nlm.nih.gov)
-helped design a more flexible input format for the alignments.
-If you like how -B works, let them know.
-If you do not like how -B works, complain to
-A. Schaffer(schaffer@helix.nih.gov) who did the implementation.
-
-seq1
-----
-> 26SPS9_Hs
-IHAAEEKDWKTAYSYFYEAFEGYDSIDSPKAITSLKYMLLCKIMLNTPEDVQALVSGKLALRYAGRQTEA
-LKCVAQASKNRSLADFEKALTDYRAELRDDPIISTHLAKLYDNLLEQNLIRVIEPFSRVQIEHISSLIKL
-SKADVERKLSQMILDKKFHGILDQGEGVLIIFDEPP
-
-
-align1
-------
-26SPS9_Hs IHAAEEKDWKTAYSYFYEAFEGYdsidspkaitslkymllckimlntpedvqalvsgklalryagrqtealkcvaqasknr
-F57B9_Ce LHAADEKDFKTAFSYFYEAFEGYdsvdekvsaltalkymllckvmldlpdevnsllsaklalkyngsdldamkaiaaaaqk
-YDL097c_Sc ILHCEDKDYKTAFSYFFESFESYhnltthnsyekacqvlkymllskimlnliddvknilnakytketyqsrgidamkavae
-YMJ5_Ce LYSAEERDYKTSFSYFYEAFEGFasigdkinatsalkymilckimlneteqlagllaakeivayqkspriiairsmadafr
-FUS6_ARATH KNYIRTRDYCTTTKHIIHMCMNAilvsiemgqfthvtsyvnkaeqnpetlepmvnaklrcasglahlelkkyklaarkfld
-COS41.8_Ci SLDYKLKTYLTIARLYLEDEDPVqaemyinrasllqnetadeqlqihykvcyarvldyrrkfleaaqrynelsyksaihet
-644879 KCYSRARDYCTSAKHVINMCLNVikvsvylqnwshvlsyvskaestpeiaeqrgerdsqtqailtklkcaaglaelaarky
-YPR108w_Sc IHCLAVRNFKEAAKLLVDSLATFtsieltsyesiatyasvtglftlertdlkskvidspellslisttaalqsissltisl
-eif-3p110_Hs SKAMKMGDWKTCHSFIINEKMNGkvw-------------------------------------------------------
-T23D8.4_Ce SKAMLNGDWKKCQDYIVNDKMNQkvw-------------------------------------------------------
-YD95_Sp IYLMSIRNFSGAADLLLDCMSTFsstellpyydvvryavisgaisldrvdvktkivdspevlavlpqnesmssleacinsl
-KIAA0107_Hs LYCVAIRDFKQAAELFLDTVSTFtsyelmdyktfvtytvyvsmialerpdlrekvikgaeilevlhslpavrqylfslyec
-F49C12.8_Hs LYRMSVRDFAGAADLFLEAVPTFgsyelmtyenlilytvitttfaldrpdlrtkvircnevqeqltggglngtlipvreyl
-Int-6_Mm KFQYECGNYSGAAEYLYFFRVLVpatdrnalsslwgklaseilmqnwdaamedltrlketidnnsvssplqslqqrtwlih
-
-26SPS9_Hs sladfekaltdy-----------------------------------------------------------------------------------
-F57B9_Ce rslkdfqvafgsf----------------------------------------------------------------------------------
-YDL097c_Sc aynnrslldfntalkqy------------------------------------------------------------------------------
-YMJ5_Ce krslkdfvkalaeh---------------------------------------------------------------------------------
-FUS6_ARATH vnpelgnsyneviapqdiatygglcalasfdrselkqkvidninfrnflelvpdvrelindfyssryascleylasl------------------
-COS41.8_Ci eqtkalekalncailapagqqrsrmlatlfkdercqllpsfgilekmfldriiksdemeefar--------------------------------
-644879 kqaakclllasfdhcdfpellspsnvaiygglcalatfdrqelqrnvissssfklflelepqvrdiifkfyeskyasclkmldem----------
-YPR108w_Sc yasdyasyfpyllety-------------------------------------------------------------------------------
-eif-3p110_Hs -----------------------------------------------------------------------------------------------
-T23D8.4_Ce -----------------------------------------------------------------------------------------------
-YD95_Sp ylcdysgffrtladve-------------------------------------------------------------------------------
-KIAA0107_Hs rysvffqslavv-----------------------------------------------------------------------------------
-F49C12.8_Hs esyydchydrffiqlaale----------------------------------------------------------------------------
-Int-6_Mm wslfvffnhpkgrdniidlflyqpqylnaiqtmcphilrylttavitnkdvrkrrqvlkdlvkviqqesytykdpitefveclyvnfdfdgaqkk
-
-26SPS9_Hs ----RAELRDDPIISTHLAKLYDNLLEQNLIRVIEPFSRVQIEHISSLIKLSKADVERKLSQMILDKKFHGILDQGEGVLIIFDEPP
-F57B9_Ce ----PQELQMDPVVRKHFHSLSERMLEKDLCRIIEPYSFVQIEHVAQQIGIDRSKVEKKLSQMILDQKLSGSLDQGEGMLIVFEIAV
-YDL097c_Sc ----EKELMGDELTRSHFNALYDTLLESNLCKIIEPFECVEISHISKIIGLDTQQVEGKLSQMILDKIFYGVLDQGNGWLYVYETPN
-YMJ5_Ce ----KIELVEDKVVAVHSQNLERNMLEKEISRVIEPYSEIELSYIARVIGMTVPPVERAIARMILDKKLMGSIDQHGDTVVVYPKAD
-FUS6_ARATH ----KSNLLLDIHLHDHVDTLYDQIRKKALIQYTLPFVSVDLSRMADAFKTSVSGLEKELEALITDNQIQARIDSHNKILYARHADQ
-COS41.8_Ci ----QLMPHQKAITADGSNILHRAVTEHNLLSASKLYNNIRFTELGALLEIPHQMAEKVASQMICESRMKGHIDQIDGIVFFERRET
-644879 ----KDNLLLDMYLAPHVRTLYTQIRNRALIQYFSPYVSADMHRMAAAFNTTVAALEDELTQLILEGLISARVDSHSKILYARDVDQ
-YPR108w_Sc ----ANVLIPCKYLNRHADFFVREMRRKVYAQLLESYKTLSLKSMASAFGVSVAFLDNDLGKFIPNKQLNCVIDRVNGIVETNRPDN
-eif-3p110_Hs ----DLFPEADKVRTMLVRKIQEESLRTYLFTYSSVYDSISMETLSDMFELDLPTVHSIISKMIINEELMASLDQPTQTVVMHRTEP
-T23D8.4_Ce ----NLFHNAETVKGMVVRRIQEESLRTYLLTYSTVYATVSLKKLADLFELSKKDVHSIISKMIIQEELSATLDEPTDCLIMHRVEP
-YD95_Sp ----VNHLKCDQFLVAHYRYYVREMRRRAYAQLLESYRALSIDSMAASFGVSVDYIDRDLASFIPDNKLNCVIDRVNGVVFTNRPDE
-KIAA0107_Hs ----EQEMKKDWLFAPHYRYYVREMRIHAYSQLLESYRSLTLGYMAEAFGVGVEFIDQELSRFIAAGRLHCKIDKVNEIVETNRPDS
-F49C12.8_Hs ----SERFKFDRYLSPHFNYYSRGMRHRAYEQFLTPYKTVRIDMMAKDFGVSRAFIDRELHRLIATGQLQCRIDAVNGVIEVNHRDS
-Int-6_Mm lrecESVLVNDFFLVACLEDFIENARLFIFETFCRIHQCISINMLADKLNMTPEEAERWIVNLIRNARLDAKIDSKLGHVVMGNNAV
-
-
-
-
-
-PHI-Blast
----------
-
-PHI-BLAST (Pattern-Hit Initiated BLAST) is a search
-program that combines matching of regular expressions
-with local alignments surrounding the match.
-The most important features of the program have been
-incorporated into the BLAST software framework
-partly for user convenience and partly so that
-PHI-BLAST may be combined seamlessly with PSI-BLAST.
-Other features that do not fit into the BLAST framework
-will be released later as a separate program and/or
-separate Web page query options.
-
-One very restrictive way to identify protein motifs
-is by regular expressions that must contain each instance
-of the motif. The PROSITE database is a compilation of
-restricted regular expressions that describe protein motifs.
-Given a protein sequence S and a regular expression pattern P
-occurring in S, PHI-BLAST helps answer the question:
-What other protein sequences both contain an occurrence of P
-and are homologous to S in the vicinity of the pattern occurrences?
-PHI-BLAST may be preferable to just searching for pattern occurrences
-because it filters out those cases where the pattern occurrence is
-probably random and not indicative of homology.
-PHI-BLAST may be preferable to other flavors of BLAST because
-it is faster and because it allows the user to express
-a rigid pattern occurrence requirement.
-
-The pattern search methods in PHI-BLAST are based on the
-algorithms in:
-
-R. Baeza-Yates and G. Gonnet, Communications of the ACM 35(1992), pp. 74-82.
-S. Wu and U. Manber, Communications of the ACM 35(1992), pp. 83-91.
-
-The calculation of local alignments is done using a method
-very similar to (and much of the same code as) gapped BLAST.
-However, the method of evaluating statistical significance is different, and
-is described below.
-
-In the stand-alone mode the typical PHI-BLAST usage looks like:
- blastpgp -i -k -p patseedp
-
- where -i is followed by the file containing the query in FASTA format
- where -k is followed by the file containing the pattern in a syntax given below
- and "patseedp" indicates the mode of usage, not representing any file.
-
-The syntax for the query sequence is FASTA format as for all other
-BLAST queries. The syntax for patterns follows the rules of
-PROSITE and is documented in detail below.
-The specified pattern is not required to be in the PROSITE list.
-Most of the other BLAST flags can be used with PHI-BLAST.
-One important exception is that PHI-BLAST requires gapped
-alignments (i.e. forbids -g F in the flags) because ungapped
-alignments do not make sense for almost all patterns in PROSITE.
-
-There is a second mode of PHI-BLAST usage that is important when
-the specified pattern occurs more than 1 time in the query.
-In this case, the user may be interested in restricting the
-search for local alignments to a subset of the pattern occurrences.
-This can be done with a search that looks like:
- blastpgp -i -k -p seedp
-
-in which case the use of the "seedp" option requires the user to
-specify the location(s) of the interesting pattern occurrence(s)
-in the pattern file. The syntax for how to specify pattern
-occurrences is below. When there are multiple pattern occurrences in the
-query it may be important to decide how many are of interest because
-the E-value for matches is effectively multiplied by the number
-of interesting pattern occurrences.
-
-The PHI-BLAST Web page supports only the "patseedp" option.
-
-PHI-BLAST is integrated with PSI-BLAST. In the command-line
-mode, PSI-BLAST can be invoked by using the -j option, as usual.
-When this is done as:
- blastpgp -i -k -p patseedp -j
-
-then the first round of searching uses PHI-BLAST and all subsequent
-rounds use PSI-BLAST.
-In the Web page setting, the user must explicitly invoke one round
-at a time, and the PHI-BLAST Web page provides the option to
-initiate a PSI-BLAST round with the PHI-BLAST results.
-To describe a combined usage, use the term "PHI-PSI-BLAST"
-(Pattern-Hit Initiated, Position-Specific Iterated BLAST).
-
-Determining statistical significance.
-
-When a query sequence Q matches a database sequence D in PHI-BLAST,
-it is useful to subdivide Q and D into 3 disjoint pieces
- Qleft Qpattern Qright
- Dleft Dpattern Dright
-
-The substrings Qpattern and Dpattern contain the pattern specified
-in the pattern file. The pieces Qpattern and Dpattern are aligned
-and that alignment is displayed as part of the PHI-BLAST output,
-but the score for that alignment is mostly ignored.
-The "reduced" score r of an alignment is the sum of the scores obtained
-by aligning Qleft with Dleft and by aligning Qright with Dright.
-
-The expected number of alignments with a reduced score >= x
-is given by:
- CN(Lambda*x + 1)e^(-Lambda *x)
-where:
-
-C and Lambda are "constants" depending on the score matrix and the
-gap costs.
-N is (number of occurrences of pattern in database) * (number of
- occurrences of pattern in Q)
-e is the base of the natural logarithm.
-
-It is important to understand that this method of computing
-the statistical significance of a PHI-BLAST alignment is mathematically
-different from the method used for BLAST and PSI-BLAST alignments.
-However, both methods provide E-values, so they the E_values are
-displayed with a similar output syntax.
-
-Rules for pattern syntax for PHI-BLAST.
-
-The syntax for patterns in PHI-BLAST follows the conventions
-of PROSITE. When using the stand-alone program, it
-is permissible to have multiple patterns in a file separated
-by a blank line between patterns. When using the Web-page
-only one pattern is allowed per query.
-
-Valid protein characters for PHI-BLAST patterns:
- ABCDEFGHIKLMNPQRSTVWXYZU
-
-Valid DNA characters for PHI-BLAST patterns:
- ACGT
-
-Other useful delimiters:
- [ ] means any one of the characters enclosed in the brackets
- e.g., [LFYT] means one occurrence of L or F or Y or T
- - means nothing (this is a spacer character used by PROSITE)
- x with nothing following means any residue
- x(5) means 5 positions in which any residue is allowed (and similarly for any other
- single number in parentheses after x)
- x(2,4) means 2 to 4 positions where any residue is allowed,
- and similarly for any other two numbers separated by a comma;
- the first number should be < the second number.
- > can occur only at the end of a pattern and means nothing
- it may occur before a period
- (another spacer used by PROSITE)
-
- . may be used at the end of the pattern and means nothing
-
-When using the stand-alone program, the pattern should
-be in a file, with the first line starting:
- ID
-followed by 2 spaces and a text string giving the pattern a name.
-
-There should also be a line starting
- PA
-followed by 2 spaces followed by the pattern description.
-
-All other PROSITE codes in the first two columns are allowed,
-but only the HI code, described below is relevant to PHI-BLAST.
-
-Here is an example from PROSITE.
-
-ID CNMP_BINDING_2; PATTERN.
-AC PS00889;
-DT OCT-1993 (CREATED); OCT-1993 (DATA UPDATE); NOV-1995 (INFO UPDATE).
-DE Cyclic nucleotide-binding domain signature 2.
-PA [LIVMF]-G-E-x-[GAS]-[LIVM]-x(5,11)-R-[STAQ]-A-x-[LIVMA]-x-[STACV].
-NR /RELEASE=32,49340;
-NR /TOTAL=57(36); /POSITIVE=57(36); /UNKNOWN=0(0); /FALSE_POS=0(0);
-NR /FALSE_NEG=1; /PARTIAL=1;
-CC /TAXO-RANGE=??EP?; /MAX-REPEAT=2;
-
-The line starting
- ID
-gives the pattern a name.
-The lines starting
- AC, DT, DE, NR, NR, CC
-are relevant to PROSITE users, but irrelevant to PHI-BLAST.
-These lines are tolerated, but ignored by PHI-BLAST.
-
-The line starting
- PA
-describes the pattern as:
- one of LIVMF
-followed by
- G
-followed by
- E
-followed by
- any single character
-followed by
- one of GAS
-followed by
- one of LIVM
-followed by
- any 5 to 11 characters
-followed by
- R
-followed by
- one of STAQ
-followed by
- A
-followed by
- any single character
-followed by
- one of LIVMA
-followed by
- any single character
-followed by
- one of STACV
-
-In this case the pattern ends with a period.
-It can end with nothing after the last specifying symbol
-or any number of > signs or periods or combination thereof.
-
-Here is another example, illustrating the use of an HI line.
-
-ID ER_TARGET; PATTERN.
-PA [KRHQSA]-[DENQ]-E-L>.
-HI (19 22)
-HI (201 204)
-
-In this example, the HI lines specify that the pattern
-occurs twice, once from positions 19 through 22 in the
-sequence and once from positions 201 through 204 in the
-sequence.
-These specifications are relevant when stand-alone PHI-BLAST is
-used with the
- seedp
-option, in which the interesting occurrences of the pattern
-in the sequence are specified. In this case the
-HI lines specify which occurrence(s) of the pattern
-should be used to find good alignments.
-
-In general, the seedp option is more useful than the
-standard patternp option ONLY when the
-pattern occurs K > 1 times in the sequence AND
-the user is interested in matching to J < K of those
-occurrences.
-Then using the HI lines enables the user to specify which
-occurrences are of interest.
-
-Additional functionality related to PHI-BLAST.
-
-PHI-BLAST takes as input both a sequence and a query containing
-that sequence and searches a sequence database for
-other sequences containing the same pattern and having a good alignment.
-One may be interested in asking two related, simpler questions:
-
-1. Given a sequence and a database of patterns, which patterns occur
-in the sequence and where?
-
-2. Given a pattern and a sequence database, which sequences contain the
-pattern and where?
-
-These queries can be answered wih software closely related to PHI-BLAST,
-but they do not fit into the output framework of BLAST because the
-answers are simple lists without alignments and with no notion of
-statistical significance.
-
-The NCBI toolbox includes another program, currently called
- seedtop
-to answer the two queries above.
-
-Query 1 can be asked with:
- seedtop -i -k -p patmatchp
-
-Query 2 can be asked with:
- seedtop -d -k -p patternp
-
-The -k argument is used similarly in all queries and the file
-format is always the same. The standard pattern database is
-PROSITE, but others (or a subset) can be used.
-There are plans afoot to offer the patmatchp query (number 1) on
-the PHI-BLAST web page or in its vicinity, but this would
-be restricted to having PROSITE as the pattern database.
-
-References
-
- Zhang, Zheng, Alejandro A. Schäffer, Webb Miller, Thomas L. Madden,
- David J. Lipman, Eugene V. Koonin, and Stephen F. Altschul (1998),
- "Protein sequence similarity searches using patterns as seeds", Nucleic
- Acids Res. 26:3986-3990.
-
- Altschul, Stephen F., Thomas L. Madden, Alejandro A. Schaffer,
- Jinghui Zhang, Zheng Zhang, Webb Miller, and David J. Lipman (1997),
- "Gapped BLAST and PSI-BLAST: a new generation of protein database
- search programs", Nucleic Acids Res. 25:3389-3402.
-
- Karlin, Samuel and Stephen F. Altschul (1990). Methods for
- assessing the statistical significance of molecular sequence
- features by using general scoring schemes. Proc. Natl. Acad.
- Sci. USA 87:2264-68.
-
- Karlin, Samuel and Stephen F. Altschul (1993). Applications
- and statistics for multiple high-scoring segments in molecu-
- lar sequences. Proc. Natl. Acad. Sci. USA 90:5873-7.
-
- Schaffer, Alejandro A., L. Aravind, Thomas L. Madden, Sergei Shavirin
- John L. Spouge, Yuri I. Wolf, Eugene V. Koonin, and Stephen F. Altschul (2001),
- Improving PSI-BLAST Protein Database Search Sensitivity with Composition-Based
- Statistics and Other Refinements. Nucleic Acids Res. 29:2994-3005.
-
-Release History
----------------
-Notes for the 2.2.8 release:
-
-* Correction to tblastx alignment computation
-
-Notes for 2.2.7 release (2/2/04):
-
-* Standalone BLAST is now available for amd64-linux.
-
-* formatdb now restricts volume sizes to 1G on 32-bit platforms
- for performance reasons.
-
-* The -A option has been removed from formatdb, that is, all databases
- will be created with ASN.1 deflines.
-
-* tblastn query concatenation now works correctly on 64-bit platforms.
-
-* The wwwblast source code has been merged into the C toolkit tree and
- is no longer distributed with the binaries.
-
-Notes for 2.2.6 release (4/9/03):
-
-Enhancements:
-
-1.) A -B option now exists for blastall that specifies the concatenation of queries
-for blastn and tblastn. This option is still experimental and subject to change.
-It is not supported with XML, ASN.1, or tabular output.
-
-2.) Text and binary SeqAligns can now be produced in place of the standard BLAST report by
-using (respectively) "-m 10" or "-m 11".
-
-Bug fixes:
-
-1.) A problem with an integer "rollover" in formatdb has been fixed. This happened when the
-volume size was selected with the -v option and the specified number of bases became negative and
-the option was ignored.
-
-2.) A problem in the statistics of the BLAST output footer was fixed. This was a double-counting of
-the number of extensions performed.
-
-3.) A problem that caused the target and query sequences to be reversed in tblastx XML output has been fixed.
-
-4.) A memory corruption problem in the formatting of the tabular output has been fixed.
-
-5.) An unstable sorting problem in the results for tblastx searches has been fixed.
-
-6.) A spurious error message about a file called "taxdb.bti" has been suppressed.
-
-7.) A problem with the number of hits returned in XML mode being double what they should be has been fixed.
-
-8.) The fastacmd return values have been corrected, it is 0 on success and 1 for an error.
-
-
-Notes for 2.2.5 release (11/15/02):
-
-Enhancements:
-
-1.) Fastacmd now prints the length of the longest sequence
-when used with the -I option.
-
-2.) It is now possible to specify a range restriction on the query for
-an rpsblast search, use the -L option.
-
-
-Bug fixes:
-
-1.) A problem that caused bl2seq to not show the ID of the query has
-been fixed.
-
-2.) A problem that caused formatdb to not properly work on the nr protein FASTA file
-has been fixed.
-
-3.) A problem that caused blastall or blastpgp to print too many alignments when
-used with XML mode has been fixed.
-
-4.) The -P option was accidentally deleted from the blast binaries in the 2.2.4 release
-and has been restored. The -P option can be used to specify whether one or
-two hits should be found before an extension, the default is two hits.
-
-
-Notes for 2.2.4 release (08/26/02):
-
-Enhancements:
-
-1.) Discontiguous word matching is now available for megablast.
-See http://www.ncbi.nlm.nih.gov/blast/discontiguous.html for details.
-
-2.) An out-of-frame gapping option (meaning that one or two bases can be
-inserted or deleted from an alignment) is now available in blastall for
-blastx and tblastn. NOTE that the expect values have been calculated
-assuming in-frame gapping (three bases inserted/deleted) and should only
-be used for guidance.
-
-3.) Fastacmd can now dump out partial sequences (using the -L option)
-and print taxonomic information for a sequence.
-
-Bug fixes:
-
-1.) A problem that caused blastall to core-dump when the -U option
-(mask the sequence that is lower-case in input file) has been fixed.
-
-2.) A problem that caused bl2seq to not work properly for protein-protein
-searches with BLOSUM62 (on some platforms) has been fixed.
-
-3.) A problem that caused seedtop to core dump if there were a lot
-of hits has been fixed.
-
-4.) Using -n with blastall (megablast mode) now returns the same results
-as default megablast.
-
-5.) XML output for megablast has been fixed.
-
-6.) A problem with translating rpsblast that caused it to crash on OSF/1
-and report incorrect values on other platforms has been fixed.
-
-7.) A memory leak in formatdb was fixed.
-
-8.) A problem that caused blastpgp to core-dump when running in PHI-BLAST mode
-(if many hits were found) was fixed. Memory leaks were also fixed.
-
-9.) the double closing of a file that caused phi-blast to crash occassionally
-under LINUX has been fixed.
-
-
-Notes for 2.2.3 release (04/24/02):
-
-Enhancements:
-
-1.) Version 4 of the BLAST databases is now the default for formatdb.
-This can be overridden for older binaries by use of "-A F" on the
-command-line.
-
-
-Bug fixes:
-
-1.) A problem has been fixed that caused tblastn searches to miss some protein matches,
-if the database sequence was longer than 15 million bases.
-
-2.) Selenocysteine residues (U) in the query are now replaced by X's as these
-are not supported in the currently available matrices (e.g., BLOSUM62), so that
-their presence occasionally caused data corruption.
-
-3.) A problem with combining the "-m 7" and "-n T" options in blastall has
-been fixed.
-
-4.) XML output had a <Hit_def> field that could (incorrectly) have an empty value,
-this has been fixed.
-
-5.) A problem with reading databases with more than one volume and an oidlist has
-been fixed.
-
-6.) A problem with ungapped XML output that caused all HSP's to be number zero
-has been resolved, they are now numbered with one-offset.
-
-7.) A bug that prevented use of some matrices for ungapped searches has
-been fixed.
-
-8.) Effective query and database lengths were calculated incorrectly for
-rpsblast, leading to a minor change in expect values in some cases. This
-has been corrected.
-
-9.) A for loop that could overrun the end of a buffer during formatting was
-fixed. Many thanks to Haruna Cofer of SGI for pointing this.
-
-10.) The effective database length command-line argument (-z) has been fixed
-for blastall and megablast. The parser was reading digits only until
-there were no non-digits (e.g., 1.6e8 was interpreted as "1"), leading
-to wildly incorrect effective database lengths. This has been fixed so
-that 160000000 and 1.6e8 are interpeted the same way.
-
-
-Notes for 2.2.2 release:
-
-Enhancements:
-
-1.) Version 4 of the BLAST databases is now fully supported. This version
-has some enhancements described in README.formatdb and fixes some problems
-described below. Use the "-A" option on formatdb to produce the new database
-version. The BLAST binaries for release 2.2.2 are entirely compatiable with
-both the current and the new version of the BLAST databases. Old BLAST binaries
-are not necessarily compatiable with the new database format.
-
-2.) Fastacmd will dump out an entire BLAST database in FASTA format if the
-new -D option is used.
-
-3.) Fastacmd will separate definition lines from different GI's that have
-been merged together in nr (as they all have the same sequence) by control-A's.
-if the new -c option is used.
-
-
-Bug fixes:
-
-1.) A problem has been fixed that caused tblastn searches to miss some protein matches,
-if the database sequence was longer than 15 million bases.
-
-2.) The old (current) version of the BLAST databases has a "rollover" problem if
-the total number of bases in a single volume is greater than 4294967295. The new
-database verison (#4) allows eight bytes for this.
-
-3.) The old (current) version of the BLAST database format does not handle ambiguity
-characters in a nucleotide database sequence if it is over 16 million characters long.
-The new version of the the BLAST database does.
-
-4.) A performance problem that caused a mutexes to be acquired too often for
-multi-threaded runs with four or more CPU's has been fixed. Thanks to Haruna
-Cofer of SGI for help in finding the cause.
-
-5.) A problem that caused ungapped blastp/blastx/tblastn/tblastx to crash on
-certain matrices (e.g., pam10) has been fixed.
-
-6.) Some blastpgp problems with using the -B (for reading a master-slave alignment) and
-reading checkpoint files (-C) have been resolved.
-
-
-Notes for 2.2.1 release:
-
-Enhancements:
-
-1.) BLAST and PSI-BLAST improvements as described in
-Schaffer et al., Nucleic Acids Research 2001 Jul 15;29(14):2994-3005.
-These include improvements the use of composition-based statistics
-and improvements to the edge-correction effects. Composition-based
-statistics were initially implemented in release 2.1.1, but the
-implementation is improved in release 2.2.1.
-
-2.) Formatdb automatically produces database volumes for input
-consisting of more than 4 billion letters.
-
-3.) Formatdb can produce an alias file for a given database and GI list
-as well as convert a GI list to the more efficient binary format. See
-details in README.formatdb.
-
-4.) RPSBLAST now works properly with 'scaled' databases. The scaling factor must
-be set when executing the program 'makemat' (which takes PSI-BLAST checkpoints
-as input). Scaling-up the matrix improves the precision of the (integer) calculations.
-
-5.) Tabular output has now been added to blastpgp and rpsblast, use the "-m 8" option.
-
-6.) Blastpgp will now process multiple queries.
-
-Bug fixes:
-
-1.) A problem with the -K option (for culling) that caused BLAST to crash has been fixed.
-
-2.) A problem with the "gnl" identifier and multi-volume databases has been fixed.
-
-3.) A problem that caused BLASTN to very rarely find suboptimal alignments has been fixed.
-
-4.) A problem that could cause makemat to crash has been fixed.
-
-4.) Some multi-threading problem pointed out by Henry Gabb of KAI were fixed.
-
-5.) Some PC-lint errors and warnings pointed out by Russ Williams of United Devices
-were fixed.
-
-
-Notes for 2.1.3 release:
-
-Enhancements:
-
-1.) Addition of PSI-TBLASTN ability to blastall, see description in
-README.bls.
-
-2.) Database sequences over 5 million bases in length are now broken
-into chunks to keep memory usage reasonable.
-
-3.) Blastall now allows one to enter a location if it is desired
-to search a subsequence of the query.
-
-4.) Formatdb can produce a new BLAST database format using the -A option.
-The BLAST programs can read this format as well as the current format (the
-program automatically identifies which version it should work with). This
-new format stores the sequence definition lines in a structured manner
-(as ASN.1), this will allow future versions of BLAST to better present
-taxonomic information as well as information about other resources (e.g.,
-UniGene, LocusLink) for a database sequence.
-
-5.) Blastall can now produce tab-delimited, use "-m 8" to specify this.
-
-6.) Improved Karlin-Altschul parameters are now being used, they were
-calculated using the "island" method
-
-7.) A "gapped" check was added to BLASTN to ensure that if a hit is low-scoring
-after an ungapped extension, but high-scoring after a gapped extension, it will
-not be missed.
-
-8.) The formatdb error messages have been improved for the case of illegal
-characters in the sequence.
-
-9.) The number of HSP's saved in an ungapped search has been increased to 400 from 200.
-
-Bug fixes:
-
-1.) A problem with XML output was fixed.
-
-2.) A problem with the seg filtering under LINUX was
-fixed (many thanks to Eric Cabot at GCG for pointing this out).
-
-3.) A problem with format of BLAST reports if the "-o" flag
-was not used when the database was produced was fixed
-(thanks again to Eric Cabot).
-
-4.) A problem with reading the BLAST database caused by a 4-byte signed integer
-than should have been unsigned was fixed (thanks to Haruna Cofer at SGI
-for pointing this out).
-
-5.) A problem with copymat under NT and IRIX was fixed.
-
-
-Notes for 2.1.2 release:
-
-Enhancements:
-
-1.) Release of rpsblast. Rpsblast performs a search against a database
-of profiles. See README.rps for full details.
-
-2.) Release of blastclust. BLASTCLUST automatically and systematically clusters protein sequences
-based on pairwise matches found using the BLAST algorithm. See README.bcl for
-full details.
-
-3.) Release of megablast. Megablast uses the greeedy algorithm of Webb Miller et al.
-for nucleotide sequence alignment search and concatenates many queries to save
-time spent scanning the database. See README.mbl for full details.
-
-4.) XML output can now be produced. Use the '-m 7' option for this.
-The XML output is still experimental.
-
-5.) the default behavior the culling (-K) option has been changed. Previously
-this option was set to 100, meaning that if more than 100 HSP's had a
-hit to a region lower scoring ones would be dropped. The option is now
-zero, which turns off this behavior. In a few cases this change will
-result in more database sequences being reported. The previous behavior can
-be recovered by using '-K 100' on the command-line.
-
-Bug fixes:
-
-1.) A bug that caused only the last SeqAnnot to be written (if the -O option
-was used) when multiple sequences were searched has been fixed. All
-SeqAnnots are printed out.
-
-2.) A bug that caused the search space (set on the command line with the -Y option)
-to be ignored for some blastx and tblastn calculations has been fixed.
-
-3.) A failure to close a file if a gilst was used (using the -l option) was
-fixed. Many thanks to David Mathog at CalTech for spotting this problem
-and suggesting a fix.
-
-4.) A bug that caused all the database names listed in an alias file to be
-printed, rather than the "TITLE" field has been fixed.
-
-
-
-Notes for 2.1.1:
-
-Enhancements:
-
-1.) Addition of compostion-based statistics:
-
-BLAST and PSI-BLAST now permit calculated E-values to take into account the amino acid composition of the individual database sequences involved in reported
-alignments. This improves E-value accuracy, thereby reducing the number of false positive results.
-
-The improved statistics are achieved with a scaling procedure [1,2] which in effect employs a slightly different scoring system for each database sequence. As a result,
-raw BLAST alignment scores in general will not correspond precisely to those implied by any standard substitution matrix. Furthermore, identical alignments can receive
-different scores, based upon the compositions of the sequences they involve. The improved statistics are now used by default for all rounds of searching on the
-PSI-BLAST page, but not on the BLAST page. Therefore, if one uses default settings, the results of the first round of searching will be different on the BLAST and
-PSI-BLAST pages.
-
-In addition adjustments have been made to two PSI-BLAST parameters: the pseudocount constant default has been changed from 10 to 7, and the E-value threshold for
-including matches in the PSI-BLAST model has been changed from 0.001 to 0.002.
-
-1. Altschul, S.F. et al. (1997) Nucl. Acids Res. 25:3389-3402.
-2. Schäffer, A.A. et al. (1999) Bioinformatics 15:1000-1011.
-
-
-Notes for 2.0.14 release:
-
-
-Bug fixes:
-
-1.) extra line returns between sequences in the a FASTA file
-causes formatdb to produce corrupted databases.
-
-2.) ";" at the beginning of a line was not being treated as a comment.
-
-3.) a problem with the formatter causes blast to core-dump if
-the FASTA definition line only contains an identifier and
-no description.
-
-4.) a problem in the ungapped extension for protein sequences
-causes a rare problem.
-
-5.) the '-U' option that causes lower-case sequence to be masked
-does not work correctly for blastx.
-
-
-Notes for 2.0.13 release:
-
-Enhancements:
-
-1.) The output format for pairwise alignments was changed to
-put each new gi (if the sequence has redundant gi's) on a
-new line. If HTML output is specified then each gi is hyperlinked.
-
-Bug fixes:
-
-1.) An NCBI toolkit problem parsing the new RefSeq format in FASTA files
-(two bars instead of three) was fixed. This fix applies to all
-BLAST binaries (formatdb, blastall, blastpgp, etc.).
-
-2.) A problem that caused BLAST version 2.0.12 under NT to freeze in
-multithreaded mode has been fixed.
-
-Notes for 2.0.12 release:
-
-Enhancements:
-
-1.) Bl2seq can now perform nucleotide-protein (blastx style) comparisons.
-This necessitated changing the '-p' option from a Boolean to a
-string. Valid arguments are "blastn", "blastp", or "blastx".
-
-Bug fixes:
-
-1.) A problem in the NCBI threads library that caused BLAST to sometimes
-stick was corrected. Many thanks to Haruna Cofer and colleauges at SGI
-for providing a fix.
-
-2.) A problem that caused BLAST to core-dump (especially on long queries)
-has been fixed. Many thanks to Gary Williams for providing examples.
-
-3.) A problem that prevented the search of multiple multivolume databases
-has been fixed.
-
-
-
-Notes for 2.0.11 release:
-
-Enhancements:
-
-1.) Optimizations were contributed by Chris Joerg of COMPAQ. These changes
-reduce the number of cache misses, unroll loops, and make some instructions
-unnecessary. These improvements can speed up BLAST for long sequences
-several-fold.
-
-2.) A database is now only memory-mapped while being searched. If multiple databases
-are searched and the total exceeds the allowed memory-map limit this allows
-all databases to be searched as memory-mapped files. If a database cannot
-be memory-mapped it is read as an ordinary file, rather than causing an error.
-
-Bug fixes:
-
-1.) Formatdb was fixed to correct a problem with FASTA string identifiers under NT.
-
-2.) Blastpgp was fixed to prevent a core-dump under LINUX
-
-3.) BLASTN was found to miss some hits near the expect value cutoff. This has been
-corrected.
-
-
-
-Notes for 2.0.10 release:
-
-Enhancements:
-
-1.) Bl2seq, a utility to compare two sequences using the blastn or blastp approach,
-is included in the archive. See the full description in the README.bls for details.
-
-2.) A 'sparse' option ('-s') has been added to formatdb. This option limits the indices
-for the string identifiers (used by formatdb) to accessions (i.e., no locus names).
-This is especially useful for sequences sets like the EST's where the accession and locus
-names are identical. Formatdb runs faster and produces smaller temporary files if this
-option is used. It is strongly recommended for EST's, STS's, GSS's, and HTGS's.
-
-3.) A volume option ('-v') has been added to formatdb. This option breaks up large
-FASTA files into 'volumes' (each with a maximum size of 2 billion letters).
-As part of the creation of a volume formatdb writes a new type of BLAST database file,
-called an alias file, with the extension 'nal' or 'pal', is written. This option
-should be used if one wishes to formatdb large databases (e.g., over 2 billion
-base pairs).
-
-4.) It is is now possible to jump start the command line version of PSI-BLAST (blastpgp)
-from a multiple alignment that includes the query sequence using the -B option. Details
-are in README.bls.
-
-5.) The maximum wordsize limit for BLASTN has been removed.
-
-Bug fixes:
-
-1.) A problem if the database length, set by the '-z' option was greater than
-2 billion, was fixed.
-
-2.) A core-dump that resulted from the use of the coil-coil masking
-('-F C') was fixed by including a file needed for the data directory.
-
-3.) A bug was fixed that caused some very short alignments to be assigned incorrect
-expect values.
-
-4.) A bug was fixed that caused formatdb to produce incorrect BLAST databases if
-the input was ASN.1.
-
-5.) A serious performance problem with BLASTN and longer words (greater than 16)
-was fixed.
-
-Notes for 2.0.9 release:
-
-Enhancements:
-
-1.) two new options have been added to blastall: to produce output in HTML and
-to search a subset of the database based upon a list of GI's. Please see
-the options section for full information.
-
-2.) two new options have been added to blastpgp: to produce HTML output and to
-produce an ASCII version of the PSI-BLAST Matrix. Please see the options section
-for more information.
-
-3.) formatdb has a new option to allow specification of a 'base' name. see the options
-section for full details.
-
-4.) it is possible to mask only during the phase when the lookup table is being built,
-but not during the extensions. See the options section for full details.
-
-Bug fixes:
-
-1.) a problem that occurred when too many HSP's aligned to the same part
-of the query from one database sequence has been fixed.
-
-2.) a problem that caused seedtop to not perform pattern-matching for DNA
-sequences has been fixed.
-
-3.) the number of HSP's saved for ungapped BLAST and tblastx is now limited to
-200 to prevent problems with memory and speed.
-
-4.) a missing thread join that caused problems under DEC Alpha has been added.
-
-5.) a formatting problem with the database summary at the beginning of the
-BLAST output (if multiple databases totaling over 2 Gig) has been fixed.
-
-6.) a bug in formatdb that caused a core-dump if the total number of sequences was an
-exact multiple of 100000 was fixed.
-
-
-Notes for 2.0.8 release:
-
-Enhancements:
-
-1.) Frame and strand information was added to the output. Examples of the
-new output format may be found at http://www.ncbi.nlm.nih.gov/BLAST/example.html.
-
-2.) An option that specifes the query strand to be searched (for blastn, blastx, and tblastx)
-has been added. The option is '-S'.
-
-Bug fixes:
-
-1.) The problem with the 'too-wide' parameter input screen under NT was fixed.
-
-2.) BLAST no longer core-dump's when the query is NULL.
-
-3.) BLAST no longer core-dump's when the query contains an '@' and blastx or tblastx is selected.
-
-Notes for 2.0.7 release:
-
-Bug fixes:
-
-1.) BLAST now multi-threads properly under LINUX.
-
-2.) A problem with very redundant databases and psi-blast was fixed.
-
-3.) A problem with the formatting of the number of identities and positives
-was fixed. This affected results on the minus strand only and did not
-affect the expect value or scores.
-
-4.) A problem that caused tblastn to core-dump very occassionally was corrected.
-
-5.) A problem with multiple patterns in PHI-BLAST was fixed.
-
-6.) A limit on the number of HSP's that were saved (100) was removed.
-
-Notes for 2.0.6 release:
-
-Enhancements:
-
-1.) PHI-BLAST is included in this release. Please see notes on PHI-BLAST for
-details.
-
-2.) SEG has become an integral part of the NCBI toolkit and it is no longer necessary
-to install it separately. It is also now supported under non-UNIX platforms.
-
-3.) Access to filtering options.
-
-If one uses "-F T" then normal filtering by seg or dust (for blastn)
-occurs (likewise "-F F" means no filtering whatsoever). The seg options
-can be changed by using:
-
--F "S 10 1.0 1.5"
-
-which specifies a window of 10, locut of 1.0 and hicut of 1.5. One may
-also specify coiled-coiled filtering by specifying:
-
--F "C"
-
-There are three parameters for this: window, cutoff (prob of a coil-coil), and
-linker (distance between two coiled-coiled regions that should be linked
-together). These are now set to
-
-window: 22
-cutoff: 40.0
-linker: 32
-
-One may also change the coiled-coiled parameters in a manner analogous to
-that of seg:
-
--F "C 28 40.0 32" will change the window to 28.
-
-One may also run both seg and coiled-coiled together by using a ";":
-
--F "C;S"
-
-4.) BLAST has been changed to reduce the number of redundant hits that a user
-may see. This is acheived by keeping track of the number of hits completely
-contained in a certain region and eliminating those lower scoring hits that
-are redundant with others. This behavior may be controlled with the -K and -L
-options:
-
- -K Number of best hits from a region to keep [Integer]
- default = 50
- -L Length of region used to judge hits [Integer]
- default = 20
-
-Setting -K to zero turns off this feature. This is the default only on blastall.
-
-Bug fixes:
-
-1.) There was a problem with the procedure that called the external utility seg.
-The need to fix this was obviated by the integration of seg into the toolkit.
-This showed up under LINUX.
-
-2.) There was a memory problem with formatdb that has been fixed. This showed up
-mostly under NT and LINUX.
-
-3.) A problem with running in multi-processing mode under IRIX6.5 (as a non-root user)
-was fixed.
-
-Notes for 2.0.5 release:
-
-Enhancements:
-
-1.) The BLAST version is printed by formatdb in it's log file.
-
-2.) Multi-database searches no longer require that the -o option be used when
-preparing the databases (i.e., with formatdb).
-
-Bugs fixed:
-
-1.) A serious bug with multi-database iterative searches was fixed (thanks to
-Steve Brenner for providing an example).
-
-2.) 'lcl' is not formatted in the BLAST report when the sequence identifier
-is a local identifier or does not contain a bar ("|").
-
-3.) A large memory leak in formatdb was fixed.
-
-4.) An unnecessary cast that caused formatdb to fail on Solaris 2.5 machines
-if the binary was made under 2.6 was fixed.
-
-5.) Better error checking was added to protect against core-dumps.
-
-6.) Some problems with the sum statistics treatment of the blastx and tblastn
-programs reported by D. Rozenbaum were fixed. The number of alignments
-involved in a sum group was misrepresented. Also the incorrect length for
-the database sequence was used, sometimes casuing a slight change in the
-value reported.
-
-7.) A problem with blastpgp was fixed that reported incorrect values for
-matrices other than BLOSUM62 during iterative searches.
-
-Notes for 2.0.4 release:
-
-Enhancements:
-
-1.) multiple database searches:
-
-Version 2.0.4 will accept multiple database names (bracketed by quotations).
-An example would be
-
- -d "nr est"
-
-which will search both the nr and est databases, presenting the results as if one
-'virtual' database consisting of all the entries from both were searched. The
-statistics are based on the 'virtual' database.
-
-2.) new options:
-
- -W Word size, default if zero [Integer]
- default = 0
- -z Effective length of the database (use zero for the real size) [Integer]
- default = 0
-
-3.) The number of identities, positives, and gaps are now printed out before the
-alignments for gapped blastx, tblastn, and tblastx. Additionally this feature is
-now also enabled for ungapped BLAST.
-
-4.) Formatdb now accepts ASN.1, as well as FASTA, as input.
-
-Bugs fixed:
-
-1.) In blastx, tblastn, and tblastx a codon was incorrectly formatted as a start codon in
-some cases.
-
-2.) The last alignment of the last sequence being presented was incorrectly dropped
-in some cases. This change could affect the statistical significance of the last database
-sequence if the dropped alignment had a lower e-value than any other alignments from the
-same database sequence.
diff --git a/doc/blast/blastall.html b/doc/blast/blastall.html
index c296fff4..264662ce 100644
--- a/doc/blast/blastall.html
+++ b/doc/blast/blastall.html
@@ -1,3 +1,97 @@
+<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN"
+ "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">
+
+<html xmlns="http://www.w3.org/1999/xhtml">
+ <head>
+ <meta name="generator"
+ content="HTML Tidy for Linux/x86 (vers 1st October 2002), see www.w3.org" />
+
+ <title></title>
+ </head>
+
+ <body>
+<pre>
+Blastall
+--------
+
+Blastall may be used to perform all five flavors of blast comparison. One
+may obtain the blastall options by executing 'blastall -' (note the dash). A
+typical use of blastall would be to perform a blastn search (nucl. vs. nucl.)
+of a file called QUERY would be:
+
+blastall -p blastn -d nr -i QUERY -o out.QUERY
+
+The output is placed into the output file out.QUERY and the search is performed
+against the 'nr' database. If a protein vs. protein search is desired,
+then 'blastn' should be replaced with 'blastp' etc.
+
+Some of the most commonly used blastall options are:
+
+blastall arguments:
+
+ -p Program Name [String]
+
+ Input should be one of "blastp", "blastn", "blastx", "tblastn", or "tblastx".
+
+ -d Database [String]
+ default = nr
+
+ The database specified must first be formatted with formatdb.
+ Multiple database names (bracketed by quotations) will be accepted.
+ An example would be
+
+ -d "nr est"
+
+ which will search both the nr and est databases, presenting the results as if one
+ 'virtual' database consisting of all the entries from both were searched. The
+ statistics are based on the 'virtual' database of nr and est.
+
+ -i Query File [File In]
+ default = stdin
+
+ The query should be in FASTA format. If multiple FASTA entries are in the input
+ file, all queries will be searched.
+
+ -e Expectation value (E) [Real]
+ default = 10.0
+
+ -o BLAST report Output File [File Out] Optional
+ default = stdout
+
+ -F Filter query sequence (DUST with blastn, SEG with others) [String]
+ default = T
+
+ BLAST 2.0 and 2.1 uses the dust low-complexity filter for blastn and seg for the
+ other programs. Both 'dust' and 'seg' are integral parts of the NCBI toolkit
+ and are accessed automatically.
+
+ If one uses "-F T" then normal filtering by seg or dust (for blastn)
+ occurs (likewise "-F F" means no filtering whatsoever).
+
+ This options also takes a string as an argument. One may use such a
+ string to change the specific parameters of seg or invoke other filters.
+ Please see the "Filtering Strings" section (below) for details.
+
+ -S Query strands to search against database (for blast[nx], and tblastx). 3 is both, 1 is top, 2 is bottom [Integer]
+ default = 3
+
+ -T Produce HTML output [T/F]
+ default = F
+
+ -l Restrict search of database to list of GI's [String] Optional
+
+ This option specifies that only a subset of the database should be
+ searched, determined by the list of gi's (i.e., NCBI identifiers) in a
+ file. One can obtain a list of gi's for a given Entrez query from
+ http://www.ncbi.nlm.nih.gov/Entrez/batch.html. This file should
+ be in the same directory as the database, or in the directory that
+ BLAST is called from.
+
+ -U Use lower case filtering of FASTA sequence [T/F] Optional
+ default = F
+
+ This option specifies that any lower-case letters in the input FASTA file
+ should be masked.
Enhancements:
@@ -28,4 +122,7 @@ When the -B option is used, the summary statistics at the bottom
of the output are for the combined set of queries; at present,
the summary statistics are not tabulated for the individual
queries in a multiple-query input.
-
+</pre>
+ </body>
+</html>
+
diff --git a/doc/blast/blastclust.html b/doc/blast/blastclust.html
index baf663e5..8dc9c4d3 100644
--- a/doc/blast/blastclust.html
+++ b/doc/blast/blastclust.html
@@ -1,3 +1,16 @@
+<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN"
+ "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">
+
+<html xmlns="http://www.w3.org/1999/xhtml">
+ <head>
+ <meta name="generator"
+ content="HTML Tidy for Linux/x86 (vers 1st October 2002), see www.w3.org" />
+
+ <title></title>
+ </head>
+
+ <body>
+<pre>
BLASTCLUST - BLAST score-based single-linkage clustering.
1. Clustering procedure.
@@ -94,46 +107,46 @@ variable.
Input:
- -i <file> sequence file in the FASTA format (default = stdin)
- -d <file> sequence database name
- -r <file> name of a hit-list file saved by BLASTCLUST
+ -i &lt;file&gt; sequence file in the FASTA format (default = stdin)
+ -d &lt;file&gt; sequence database name
+ -r &lt;file&gt; name of a hit-list file saved by BLASTCLUST
These three options are mutually exclusive.
- -l <file> a file with a list of IDs to restrict the clustering
+ -l &lt;file&gt; a file with a list of IDs to restrict the clustering
Thresholds:
- -S <threshold> similarity threshold
- if <3 then the threshold is set as a BLAST score density
+ -S &lt;threshold&gt; similarity threshold
+ if &lt;3 then the threshold is set as a BLAST score density
(0.0 to 3.0; default = 1.75)
- if >=3 then the threshold is set as a percent of identical
+ if &gt;=3 then the threshold is set as a percent of identical
residues (3 to 100)
- -L <threshold> minimum length coverage (0.0 to 1.0; default = 0.9)
- -b <T|F> require coverage as specified by -L and -S on both (T) or
+ -L &lt;threshold&gt; minimum length coverage (0.0 to 1.0; default = 0.9)
+ -b &lt;T|F&gt; require coverage as specified by -L and -S on both (T) or
only one (F) sequence of a pair (default = TRUE)
Output:
- -o <file> file to save cluster list (default = stdout)
- -s <file> file to save hit-list (this file may be not portable across
+ -o &lt;file&gt; file to save cluster list (default = stdout)
+ -s &lt;file&gt; file to save hit-list (this file may be not portable across
platforms)
- -p <T|F> protein (T) or nucleotide (F) sequences in the input
+ -p &lt;T|F&gt; protein (T) or nucleotide (F) sequences in the input
(default = TRUE)
Misc:
- -C <T|F> continue unfinished clustering (crash recovery mode).
+ -C &lt;T|F&gt; continue unfinished clustering (crash recovery mode).
(default = FALSE)
- -a <number> Number of CPU's to use in a multi-thread mode
+ -a &lt;number&gt; Number of CPU's to use in a multi-thread mode
(default = 1).
- -v <logfile> Progress report destination (printed every 1000 sequences).
+ -v &lt;logfile&gt; Progress report destination (printed every 1000 sequences).
Set to F to suppress report messages (default = stderr).
- -e <T|F> Enable sequence id parsing in database formatting. Set to F if
+ -e &lt;T|F&gt; Enable sequence id parsing in database formatting. Set to F if
multiple sequences have identical ids (default = TRUE).
-W Word size to use for initial matches (default = 0, translates to 3 for
proteins and 32 for nucleotides).
- -c <config file> Configuration file with advanced options, containing any
+ -c &lt;config file&gt; Configuration file with advanced options, containing any
of the following options with their values, separated by whitespace:
-r, -q, -G, -E - match, mismatch, gap open and gap extension scores
respectively,
@@ -169,21 +182,21 @@ are true for most UNIX platforms.
A.1. Header.
- 1-byte boolean IDtype 1 if numeric IDs; 0 if string IDs
- 4-byte integer ListSz size of the ID list; if IDs are numeric this
- is the number of SeqID records, otherwise this
- is the length of the ID list (in bytes)
+ 1-byte boolean IDtype 1 if numeric IDs; 0 if string IDs
+ 4-byte integer ListSz size of the ID list; if IDs are numeric this
+ is the number of SeqID records, otherwise this
+ is the length of the ID list (in bytes)
A.2. Sequence ID list.
If IDtype is 1 (numeric IDs) then the list is ListSz records of
- 4-byte integer SeqID sequence ID (numeric)
+ 4-byte integer SeqID sequence ID (numeric)
If IDtype is 0 (string IDs) then the list is a list of records of
- var-length char SeqID sequence ID (string)
- space (' ') separator
+ var-length char SeqID sequence ID (string)
+ space (' ') separator
(total length is ListSz bytes; the number of sequences is equal to the number
of spaces).
@@ -192,16 +205,19 @@ A.3. Sequence length list.
This is a list of
- 4-byte integer SeqLen sequence length
+ 4-byte integer SeqLen sequence length
A.4. Hit list.
The list consists of the following records going to the end of file:
- 4-byte integer N1 ordinal number of the 1st sequence
- 4-byte integer N2 ordinal number of the 2nd sequence
- 4-byte integer HSPL1 HSP length on the 1st sequence
- 4-byte integer HSPL2 HSP length on the 2nd sequence
- 8-byte float Score BLAST score
- 8-byte float PercId Percent of identical residues
+ 4-byte integer N1 ordinal number of the 1st sequence
+ 4-byte integer N2 ordinal number of the 2nd sequence
+ 4-byte integer HSPL1 HSP length on the 1st sequence
+ 4-byte integer HSPL2 HSP length on the 2nd sequence
+ 8-byte float Score BLAST score
+ 8-byte float PercId Percent of identical residues
+</pre>
+ </body>
+</html>
diff --git a/doc/blast/blastdb.html b/doc/blast/blastdb.html
new file mode 100644
index 00000000..c45679c5
--- /dev/null
+++ b/doc/blast/blastdb.html
@@ -0,0 +1,336 @@
+<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN"
+ "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">
+
+<html xmlns="http://www.w3.org/1999/xhtml">
+ <head>
+ <meta name="generator"
+ content="HTML Tidy for Linux/x86 (vers 1st October 2002), see www.w3.org" />
+
+ <title></title>
+ </head>
+
+ <body>
+<pre>
+ The BLAST Databases
+ Last updated on July 21, 2003
+
+This document describes the "BLAST" databases available on the NCBI
+FTP site under the "blast/db" subdirectory. The direct URL to this
+subdirectory is:
+ ftp://ftp.ncbi.nih.gov/blast/db
+
+I. General Introduction
+
+NCBI BLAST home pages (http://www.ncbi.nih.gov/BLAST/) use a standard
+set of BLAST databases for Nucleotide, Protein, and Translated BLAST
+searches. These databases are made available in the db directory as
+compressed archives (ftp://ftp.ncbi.nih.gov/blast/db/) in preformatted
+format. The FASTA databases now reside under the blast/FASTA
+subdirectory.
+
+The preformatted databases offer the following advantages:
+
+ * The preformatted databases are smaller in size and are
+ faster to download;
+ * Preformatting removes the need to run formatdb;
+ * Taxonomy information is available for each database entry.
+
+Preformatted databases must be downloaded in binary mode and inflated
+with gzip or other decompress tools. The BLAST database files can then
+be extracted out of the resulting tar file using &ldquo;tar&rdquo; program on Unix/Linux
+or WinZip and StuffIt Expander on Windows and Macintosh platforms,
+respectively.
+
+Large databases are formatted in multiple 1 Gigabytes volumes, which
+are named using the &ldquo;database.##.tar.gz&rdquo; convention. All relevant volumes
+are required. An alias file is provided so that the database can be called
+using the alias name without the extension (.nal or .pal). For example,
+to call est database, simply use &ldquo;&ndash;d est&rdquo; option in the commandline
+(without the quotes).
+
+Certain databases are subsets of a larger parental database. For those
+databases, mask files, rather than actual databases, are provided. The
+mask file needs the parent database to function properly. The parent
+databases should be generated on the same day as the mask file. For
+example, to use swissprot preformatted database, swissprot.tar.gz, one
+will need to get the nr.tar.gz with the same date stamp.
+
+Additional BLAST databases that are not provided in preformatted
+formats are available in the FASTA subdirectory. For genomic BLAST
+databases, please check the genomes ftp directory at:
+ ftp://ftp.ncbi.nih.gov/genomes/
+
+
+2. Contents of the /blast/db/ directory
+
+The formatted databases are archived in this directory. The name of
+these databases and their contents are listed below.
++--------------------+-----------------------------------------------+
+|File Name | Content Description |
++--------------------+-----------------------------------------------+
+/FASTA subdirectory for FASTA formatted sequences
+
+README README for this subdirectory (this file)
+
+est.00.tar.gz | three volumes of the formatted est database
+est.01.tar.gz | from the EST division of GenBank, EMBL,
+est.02.tar.gz | and DDBJ
+
+est_human.tar.gz | mask file for human subset of the est
+est_mouse.tar.gz | mask file for mouse subset of the est
+est_others.tar.gz | mask file for non-human and non-mouse subset
+ | of the est database
+ | These three mask files need all volumes of
+ | est to function properly.
+
+gss.00.tar.gz | two volumes of the formatted gss database
+gss.01.tar.gz | from the GSS division of GenBank, EMBL, and
+ | DDBJ
+
+htgs.00.tar.gz | three volumes of htgs database with entries
+htgs.01.tar.gz | from HTG division of GenBank, EMBL, and DDBJ
+htgs.01.tar.gz |
+
+human_genomic.tar.gz human RefSeq (NC_######) chromosome records
+ with gap adjusted concatenated NT_ contigs
+
+nr.tar.gz non-redundant protein sequence database with
+ entries from GenPept, Swissprot, PIR, PDF, PDB,
+ and NCBI RefSeq
+
+nt.00.tar.gz | nucleotide sequence database, with entries
+nt.01.tar.gz | from all traditional divisions of GenBank,
+nt.02.tar.gz | EMBL, and DDBJ excluding bulk divisions (gss,
+ | sts, pat, est, and htg divisions. wgs entries
+ | are also excluded. Not non-redundant.
+
+other_genomic.tar.gz RefSeq chromosome records (NC_######) for
+ organisms other than human
+
+pataa.tar.gz | patent protein sequence database
+patnt.tar.gz | patent nucleotide sequence database
+ | The above two databases are directly from
+ | USPTO or from EU/Japan Patent Agencies via
+ | EMBL/DDBJ
+
+pdbaa.tar.gz protein sequences from pdb protein structures
+pdbnt.tar.gz nucleotide sequences from pdb nucleic acid
+ structures. They are NOT the protein coding
+ sequences for the corresponding pdbaa entries.
+
+sts.tar.gz Sequences from the STS division of GenBank, EMBL,
+ and DDBJ
+
+swissprot.tar.gz swiss-prot sequence databases (last major update)
+
+taxdb.tar.gz Taxonomy information for the formatted database
+
+wgs.00.tar.gz | Whole genome shotgun sequence assemblies for
+wgs.01.tar.gz | different organisms, broken up into 1 GB
+wgs.02.tar.gz | volumes.
+wgs.03.tar.gz
+wgs.04.tar.gz
+wgs.05.tar.gz
++--------------------+-----------------------------------------------+
+
+
+3. Content of the /db/FASTA Subdirectory
+
+This subdirectory contains FASTA formatted sequence files, formerly
+available under /db directory. The file names and database contents
+are listed below. These files are now archived in .gz format and must
+be processed through formatdb before they can be used by the BLAST
+programs.
+
++--------------------+-----------------------------------------------+
+|File Name | Content Description |
++--------------------+-----------------------------------------------+
+alu.a.gz translation of alu.n repeats
+alu.n.gz alu repeat elements
+
+drosoph.aa.gz CDS translations from drosophila.nt
+drosoph.nt.gz genomic sequences for drosophila
+
+ecoli.aa.gz CDS translations from ecoli.nt
+ecoli.nt.gz Escherichia coli K-12 genomic sequences
+
+est_human.gz* | human subset of the est database (see Note 1)
+est_mouse.gz* | mouse subset of the est database
+est_others.gz* | non-human and non-mouse subset of the est
+ database
+
+gss.gz* sequences from the GSS division of GenBank,
+ EMBL, and DDBJ
+
+htg.gz* htgs database with high throughput genomic
+ entries from the htg division of GenBank,
+ EMBL, and DDBJ
+
+human_genomic.gz* human RefSeq (NC_######) chromosome records
+ with gap adjusted concatenated NT_ contigs
+
+igSeqNt.gz human and mouse immunoglobulin nucleotide
+ sequences
+igSeqProt.gz human and mouse immunoglobulin protein
+ sequences
+
+mito.aa.gz CDS translations of complete mitochondrial
+ genomes
+mito.nt.gz complete mitochondrial genomes
+
+month.aa.gz | newly released/updated protein sequences
+ (See Note 2)
+month.est_human.gz | newly released/updated human est sequences
+month.est_mouse.gz | newly released/updated mouse est sequences
+month.est_others.gz | newly released/updated est other than
+ | human/mouse
+month.gss.gz | newly released/updated gss sequences
+month.htgs.gz | newly released/updated htgs sequences
+month.nt.gz | newly released/updated sequences for the nt
+ database
+
+nr.gz* non-redundant protein sequence database with
+ entries from GenPept, Swissprot, PIR, PDF,
+ PDB, and RefSeq
+
+nt.gz* nucleotide sequence database, with entries
+ from all traditional divisions of GenBank,
+ EMBL, and DDBJ excluding bulk divisions
+ (gss, sts, pat, est, htg divisions) and wgs
+ entries. Not non-redundant.
+
+other_genomic.gz* RefSeq chromosome records (NC_######) for
+ organisms other than human
+
+pataa.gz* | patent protein sequence database
+patnt.gz* | patent nucleotide sequence database
+ | The above two dbs are directly from USPTO
+ | of from EU/Japan Patent Agency via EMBL/DDBJ
+
+pdbaa.gz* protein sequences from pdb protein structures
+pdbnt.gz* nucleotide sequences from pdb nucleic acid
+ structures. They are NOT the protein coding
+ sequences for the corresponding pdbaa entries.
+
+sts.gz* database for sequence tag site entries
+
+swissprot.gz* swiss-prot database (last major release)
+
+vector.gz vector sequence database (See Note 3)
+
+wgs.gz* whole genome shotgun genome assemblies
+
+yeast.aa.gz protein translations from yeast genome
+yeast.nt.gz yeast genomes.
++--------------------+-----------------------------------------------+
+NOTE:
+(1) we do not provide the complete est database in FASTA format. One
+ need to get all three subsets(est_human, est_mouse, and est_others
+ and concatenate them into the complete est fasta database.
+(2) month.### databases are the sequences newly released or updated
+ within the last 30 days for that database.
+(3) For vector contamination screening, use the UniVec database from:
+ ftp://ftp.ncbi.nih.gov/pub/UniVec/
+ * marked files have preformatted counterparts.
+
+
+4. Database updates
+
+The BLAST databases are updated daily. Update of existing databases
+by merging of new records from the month database using fmerge is no
+longer supported. We do not have an established incremental update
+scheme at this time. We recommend downloading the databases regularly
+to keep their content current.
+
+5. Non-redundant defline syntax
+
+The only non-redundant database is the protein nr. In it, identical
+sequences are merged into one entry. To be merged two sequences must
+have identical lengths and every residue at every position must be the
+same. The FASTA deflines for the different entries that belong to one
+nr record are separated by control-A characters invisible to most
+programs. In the example below both entries gi|1469284 and gi|1477453
+have the same sequence, in every respect:
+
+&gt;gi|3023276|sp|Q57293|AFUC_ACTPL Ferric transport ATP-binding protein afuC
+^Agi|1469284|gb|AAB05030.1| afuC gene product ^Agi|1477453|gb|AAB17216.1|
+afuC [Actinobacillus pleuropneumoniae]
+MNNDFLVLKNITKSFGKATVIDNLDLVIKRGTMVTLLGPSGCGKTTVLRLVAGLENPTSGQIFIDGEDVT
+KSSIQNRDICIVFQSYALFPHMSIGDNVGYGLRMQGVSNEERKQRVKEALELVDLAGFADRFVDQISGGQ
+QQRVALARALVLKPKVLILDEPLSNLDANLRRSMREKIRELQQRLGITSLYVTHDQTEAFAVSDEVIVMN
+KGTIMQKARQKIFIYDRILYSLRNFMGESTICDGNLNQGTVSIGDYRFPLHNAADFSVADGACLVGVRPE
+AIRLTATGETSQRCQIKSAVYMGNHWEIVANWNGKDVLINANPDQFDPDATKAFIHFTEQGIFLLNKE
+
+The syntax of sequence header lines used by the NCBI BLAST server
+depends on the database from which each sequence was obtained. The table
+below lists the identifiers for the databases from which the sequences
+were derived.
+
+ Database Name Identifier Syntax
+ ============================ ========================
+ GenBank gb|accession|locus
+ EMBL Data Library emb|accession|locus
+ DDBJ, DNA Database of Japan dbj|accession|locus
+ NBRF PIR pir||entry
+ Protein Research Foundation prf||name
+ SWISS-PROT sp|accession|entry name
+ Brookhaven Protein Data Bank pdb|entry|chain
+ Patents pat|country|number
+ GenInfo Backbone Id bbs|number
+ General database identifier gnl|database|identifier
+ NCBI Reference Sequence ref|accession|locus
+ Local Sequence identifier lcl|identifier
+
+"gi" identifiers are being assigned by NCBI for all sequences contained
+within NCBI's sequence databases. The "gi" identifier provides a uniform
+and stable naming convention whereby a specific sequence is assigned its
+unique gi identifier. If a nucleotide or protein sequence changes,
+however, a new gi identifier is assigned, even if the accession number
+of the record remains unchanged. Thus gi identifiers provide a mechanism
+for identifying the exact sequence that was used or retrieved in a given
+search.
+
+We recommend that "gi display option" be activated in local blast search
+by setting the -I option to T, which was set to false by default:
+
+ -I Show GI's in deflines [T/F]
+ default = F
+
+For databases whose entries are not from official NCBI sequence databases,
+such as Trace database, the gnl| convention is used. For custom database,
+this convention should be followed and the id for each sequence must be
+unique, if one would like to take the advantage of indexed database,
+which enables specific sequence retrieval using fastacmd program included
+in the blast executable package. One should refer to documents
+distributed in the standalone BLAST package for more details.
+
+
+6. Formatting the FASTA database
+
+FASTA database files need to be formatted with formatdb before they can be
+used in local blast search. For those from NCBI, the following formatdb
+are recommended:
+ formatdb &ndash;i input_db &ndash;p F &ndash;o T for nucleotide
+ formatdb &ndash;i input_db &ndash;p T &ndash;o T for protein
+
+The -A option introduced in 2.2.3 is now built into the formatdb program
+and thus removed from the list of configurable options since 2.2.8. This
+enables formatdb to properly handle large sequence files (longer than 16
+million bases). Please refer to formatdb.txt under the /blast/documents
+directory for more information. Database preprared using 2.2.8 formatdb
+will not be backward compatible with blast programs old than version 2.2.3.
+
+
+7. Technical Support
+
+Questions and comments on this document and NCBI BLAST related questions
+should be sent to blast-help group at:
+ blast-help@ncbi.nlm.nih.gov
+
+For information about other NCBI resources/services, please send email to
+NCBI User Serivce at:
+ info@ncbi.nlm.nih.gov
+</pre>
+ </body>
+</html>
+
diff --git a/doc/blast/blastftp.html b/doc/blast/blastftp.html
new file mode 100644
index 00000000..98986fe1
--- /dev/null
+++ b/doc/blast/blastftp.html
@@ -0,0 +1,526 @@
+<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN"
+ "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">
+
+<html xmlns="http://www.w3.org/1999/xhtml">
+ <head>
+ <meta name="generator"
+ content="HTML Tidy for Linux/x86 (vers 1st October 2002), see www.w3.org" />
+
+ <title></title>
+ </head>
+
+ <body>
+<pre>
+ Readme for NCBI blast ftp site
+ Last updated on February 15, 2004
+
+This file lists the subdirectories and files found on the NCBI BLAST
+ftp site (ftp://ftp.ncbi.nlm.nih.gov/blast/). It provides the basic
+information on file content, and on how the files should be used.
+
+
+1. Introduction
+
+NCBI BLAST ftp site provides standalone blast, client server blast,
+and wwwblast packages for different platforms. It also provides
+commonly used blast databases in preformatted as well as FASTA format.
+Some documents on the blast executables and other related subjects are
+also provided.
+
+
+2. File list and content
+
+A description of the files are listed in the tables below, one table
+for each directory or subdirectory.
+
+2.1 ftp://ftp.ncbi.nlm.nih.gov/blast/ directory content
+
+The blast ftp directory contains several subdirectories each for a
+specific set of files.
+
++------------------+-------------------------------------------------+
+|Name |Content |
++------------------+-------------------------------------------------+
+blastftp.txt this file
+
+db subdirectory with database, in preformatted or
+ FASTA form
+
+demo demonstration programs and documents from blast
+ developers
+
+documents documents for programs in standalone blast,
+ netblast, and wwwblast programs
+
+executables archives for binary distribution of blast programs
+
+matrices protein and nucleotide score matrices, only a
+ subset are supported by blast
+
+temp temporary directory for miscellaneous files
++------------------+-------------------------------------------------+
+
+
+2.2 File content for ftp://ftp.ncbi.nlm.nih.gov/blast/db/ subdirectory
+
+Databases larger than two gigabytes (2 GB) are formatted in multiple
+volumes, which are named using the &ldquo;database.##.tar.gz&rdquo; convention.
+All relevant volumes are required. An alias file is provided so that
+the database can be called using the alias name without the extension
+(.nal or .pal). For example, to call est database, simply use &ldquo;&ndash;d est&rdquo;
+option in the commandline (without the quotes).
+
+Certain databases are subsets of a larger parental database. For those
+databases, mask files, rather than actual databases, are provided. The
+mask file needs the parent database to function properly. The parent
+databases should be generated on the same day as the mask file. For
+example, to use swissprot preformatted database, swissprot.tar.gz, one
+will need to get the nr.tar.gz with the same date stamp.
+
+To use the preformatted blast database file, first inflate the file
+using gzip (unix, linux), WinZip (window), or StuffIt Expander (Mac),
+then extract the component files out from the resulting tar file using
+tar (unix, linux), WinZip (Window), or StuffIt Expander (Mac). The
+resulting files are ready for BLAST.
+
++---------------------+----------------------------------------------+
+|Name |Content |
++---------------------+----------------------------------------------+
+FASTA subdirectory with databases in FASTA format
+
+blastdb.txt content list of the blast database
+
+est.00.tar.gz first volume of the est database
+est.01.tar.gz second volume of the est database
+est.02.tar.gz third volume of the est database
+ all volumes are needed to reconstitute
+ complete est database
+
+est_human.tar.gz human est database, a mask file requires both
+ volumes of est to work
+
+est_mouse.tar.gz mouse est database, a maks file needs both
+ volumes of est to work
+
+est_others.tar.gz est database without human/mouse entries, a
+ mask file reqires both volumes of est
+
+gss.tar.gz genomic survery sequence database
+
+htgs.00.tar.gz first volume of the htgs database
+htgs.01.tar.gz second volume of the htgs database
+htgs.02.tar.gz all volumes are needed to reconstitute
+htgs.03.tar.gz complete htgs database
+
+human_genomic.tar.gz human chromosome database containing
+ concatenated contigs with adjusted gaps
+ represented by N's
+
+nr.tar.gz non-redundant protein database
+
+nt.00.tar.gz first volume of the nucleotide nr database
+nt.01.tar.gz second volume of the nucleotide nr database
+nt.02.tar.gz all volumes are needed to reconstitute
+ complete nt database
+
+other_genomic.tar.gz chromosome database for organisms other than
+ human
+
+pataa.tar.gz patent protein database
+
+patnt.tar.gz patent nucleotide database
+
+pdbaa.tar.gz protein sequence database for pdb entries. It
+ is mask file and requires nr.tar.gz
+
+pdbnt.tar.gz nucleotide sequence database for pdb entries.
+ They are not coding sequences for the
+ corresponding protein structure entries!
+
+sts.tar.gz sequence tag site database
+
+swissprot.tar.gz swissprot sequence database, last major
+ release. It is mask file and requires
+ nr.tar.gz to work properly
+
+taxdb.tar.gz taxonomy id database for use with new version
+ of blast database (not fully implemented yet)
+
+wgs.00.tar.gz first volume of wgs assembly database
+wgs.01.tar.gz second volume of the wgs assembly database.
+wgs.02.tar.gz third volume of the wgs assembly database.
+wgs.03.tar.gz fourth volume of the wgs assembly database.
+wgs.04.tar.gz fifth volume of the wgs assembly database.
+wgs.05.tar.gz sixth volume of the wgs assembly database.
+ all volumes are needed.
++--------------------+-----------------------------------------------+
+
+
+2.2.1 File content for ftp://ftp.ncbi.nlm.nih.gov/blast/db/FASTA
+subdirectory
+
+he FASTA database files are now stored in this subdirectory, it does
+contain some additional databases that are not available via the NCBI
+BLAST pages. Due to file size issues, the full est database is not
+provided. One needs to get the three subsets and concatenate them
+together to get the complete est database.
+
+These databases will need to be formatted using formatdb program found
+in the standalone blast executable package. The recommended
+commandlines to use are:
+
+ formatdb &ndash;i input_db &ndash;p F &ndash;o T for nucleotide
+
+ formatdb &ndash;i input_db &ndash;p T &ndash;o T for protein
+
+For additional information on formatdb, please see the formatdb.txt
+document under /blast/documents/ directory.
+
++------------------+--------------------------------------------------+
+|Name |Content |
++------------------+--------------------------------------------------+
+ alu.a.gz proteins translated from alu.n
+
+ alu.n.gz alu repeat sequences
+
+ drosoph.aa.gz Drosophila protein from genome annotation
+
+ drosoph.nt.gz Drosophila genome
+
+ ecoli.aa.gz E.coli K-12 proteins from genome annotation
+
+ ecoli.nt.gz E.coli K-12 genomic contigs
+
+ est_human.gz human subset of the est database
+
+ est_mouse.gz mouse subset of the est database
+
+ est_others.gz subset of est other than human or mouse entries
+
+ gss.gz Genomic Survey Sequences (mostly BAC ends)
+
+ htgs.gz High Throughput Genomic Sequences
+
+ human_genomic.gz Human chromosomes formed by concatenating genomic
+ contig assemblies (NT_######) and adjusting the
+ gaps with N&rsquo;s
+
+ igSeqNt.gz Immunoglobulin nucleotide sequences
+
+ igSeqProt.gz Immunoglobulin protein sequences
+
+ mito.aa.gz protein from the annotated mitochondrial genomes
+
+ mito.nt.gz mitochondrial genomes
+month.aa.gz
+protein
+ sequences released or updated in the past 30 days
+
+ month.est_human.gz human subset of EST released/updated in the past
+ 30 days
+
+ month.est_mouse.gz mosue subset of EST released/updated in the past
+ 30 days
+
+ month.est_others.gz EST, wihtout entries from human or mouse, released
+ or updated in the past 30 days
+
+ month.gss.gz gss entries released/updated in the past 30 days
+
+ month.htgs.gz htgs entries released/updated in the past 30 days
+
+ month.nt.gz subset of nt released/updated in the past 30 days
+
+ nr.gz non-redundant protein sequence database
+
+ nt.gz nucleotide database from GenBank excluding the
+ batch division htgs, est, gss,sts, pat divisions,
+ and wgs entries. Not non-redundant.
+
+ other_genomic.gz Chromosome entries other than human
+
+ pataa.gz Patent protein sequence database
+
+ patnt.gz Patent nucleotide sequence database
+
+ pdbaa.gz protein sequences for pdb entries
+
+ pdbnt.gz nucleotide entries for pdb entries. They are NOT
+ the coding sequence forthe corresponding
+ protein entries
+
+ sts.gz Sequence Tag Sites database
+
+ swissprot.gz swissprot database, last major release
+
+ vector.gz vector sequences from synthetic (syn) division
+ of GenBank
+
+ wgs.gz Whole Genome Shotgun sequence assembly
+
+ yeast.aa.gz protein translations from yeast genome annotation
+
+ yeast.nt.gz yeast genomic sequence
++------------------+----------------------------------------------------+
+
+
+2.3 File content for ftp://ftp.ncbi.nlm.nih.gov/blast/demo/ directory
+
+This directory contains some technical presentations from the BLAST
+developers along with some demo tools or documentation relevant to BLAST.
+
++------------------------+-----------------------------------------------+
+|Name |Content |
++------------------------+-----------------------------------------------+
+ README.blast_demo readme for blast_demo package
+
+ README.first readme for this directory
+
+ README.parse_blast_xml readme for parse_blast_xml package
+
+ blast_demo.tar.gz blast_demo package on blast db, blast object,
+ and reformating blast alignment from
+ blastobj file
+
+ blast_exercises.doc blast exercise questions answers
+
+ blast_programming.ppt PowerPoint presentation on BLAST programing
+
+ blast_talk.ppt PowerPoint presentation (O'Reilly conference)
+
+ ieee_blast.final.ppt PowerPoint presentation (IEEE conference)
+
+ ieee_talk.pdf Above IEEE presentation in PDF format
+
+ parse_blast_xml.tar.gz demo package on parsing xml styled blast output
+
+ splitd.ppt PowerPoint presentation on NCBI BLAST server&rsquo;s
+ splitd implementation
+
+ test_suite.tar.gz test package
++------------------------+-----------------------------------------------+
+
+
+2.4 File content for ftp://ftp.ncbi.nlm.nih.gov/blast/documents/ directory
+
+This directory contains copies of the documentation on different BLAST
+programs distributed from this ftp site under the /blast/executables/
+directory. blast.txt also contains detailed release history.
+
++------------------------+-----------------------------------------------+
+|Name |Content |
++------------------------+-----------------------------------------------+
+ blast.txt readme for blastall and blastpgp
+
+ blastclust.txt readme for blastclust
+
+ developer subdirectory with additional documentation
+
+ blast_seqalign.txt describing seqalign function
+
+ readdb.txt describing readdb function
+
+ urlapi.txt a short introduction on BLAST URL API which
+ supersedes the blasturl
+
+ formatdb.txt readme for formatdb program
+
+ impala.txt readme for impala
+
+ megablast.txt readme for megablast
+
+ netblast.txt readme for netblast (blastcl3)
+
+ rpsblast.txt readme for rpsblast
+
+ xml subdirectory with .dtd and .mod field
+ description files for blast xml output
+
+ xml/NCBI_BlastOutput.dtd dtd file for blast xml output
+ xml/NCBI_BlastOutput.mod mod file for blast xml output
+ xml/NCBI_Entity.mod mod file for NCBI xml file
+ xml/README.blxml readme on blast xml output
++------------------------+-----------------------------------------------+
+
+
+2.5 File content for ftp://ftp.ncbi.nlm.nih.gov/blast/executables/
+directory
+
+This directory contains several subdirectories each for a specific
+subsets of executable BLAST programs:
+
+/LATEST-BLAST subdirectory contains the standalone blast binaries from
+ the latest major versioned release.
+
+/LATEST-NETBLAST sudirectory contains the netblast binaries from the
+ latest major versioned release.
+
+/LATEST-WWWBLAST subdirectory contains the wwwblast binaries from the
+ latest major versioned release.
+
+/release different releases, with the last one linked to LATEST
+ directories
+
+/snapshot subdirectory contains patches or intermediate updates put up in
+ between major releases. For previous releases, go to release
+ subdirectory, where the old major releases are archived back to
+ version 2.0.10.
+
+
+
+2.5.1 File content for ftp://ftp.ncbi.nlm.nih.gov/blast/executables/LATEST-BLAST,
+ /LATEST-NETBLAST, and /LATEST-WWWBLAST subdirectories
+
+All these three subdirectories link to the latest release directory,
+which contains the standalone BLAST executables package (blast initialed
+archives), blastcl3 client (netblast initialed archives), and server blast
+(wwwblast initialed archives).
+
+The standalone archive is needed to set up BLAST locally on user's own
+machine. It also provides the tools necessary to prepare custom databases
+and retrieve sequences from these prepared databases. Different archives
+for commonly used platforms are available.
+
+The blast client archive contains the blastcl3 program which functions by
+formulating BLAST search locally first and forwarding the search to NCBI
+blast server for process. The search results returned by NCBI BLAST server
+is saved to an user-specified file on local computer disk.
+
+The server blast archive contains the web pages with embedded blast search
+forms similar to that of NCBI that can process the BLAST search request against
+local set of databases and return the result to a browser window. wwwblast
+is now in sync with the NCBI toolkit and the two above two packages.
+
+
++------------------------------------+-------------------------------+
+|Name |Content |
++------------------------------------+-------------------------------+
+ MD5SUM.txt
+
+ blast-2.2.8-alpha-osf1.tar.gz Standalone for COMPAQ/HP alpha
+ machine (OSF 5.1 and above)
+
+ blast-2.2.8-amd64-linux.tar.gz Standalone for AMD 64-bits PC
+ running linux
+
+ blast-2.2.8-ia32-freebsd.tar.gz Standalone for intel Pentium PC
+ running freeBSD
+
+ blast-2.2.8-ia32-linux.tar.gz Standalone for intel Pentium PC
+ running Linux
+
+ blast-2.2.8-ia32-win32.exe Standalone for intel Pentium PC
+ running Windows
+
+ blast-2.2.8-ia64-linux.tar.gz Standalone for intel Itanium PC
+ running Linux
+
+ blast-2.2.8-mips-irix-32-bit.tar.gz Standalone for 32-bits SGI
+
+ blast-2.2.8-mips-irix.tar.gz Standalone for 64-bits SGI
+
+ blast-2.2.8-powerpc-macosx.tar.gz Standalone for MacOSX (terminal)
+
+ blast-2.2.8-sparc-solaris.tar.gz Standalone for Sun Sparc station
+ running Solaris
+
+ netblast-2.2.8-alpha-osf1.tar.gz netblast for COMPAQ/HP alpha
+ machine (OSF 5.1 and above)
+
+ netblast-2.2.8-amd64-linux.tar.gz netblast for AMD 64-bits PC
+ running Linux
+
+ netblast-2.2.8-ia32-freebsd.tar.gz netblast for intel Pentium PC
+ running freeBSD
+
+ netblast-2.2.8-ia32-linux.tar.gz netblast for intel Pentium PC
+ running Linux
+
+ netblast-2.2.8-ia32-win32.exe netblast for for intel Pentium
+ PC running Windows
+
+ netblast-2.2.8-ia64-linux.tar.gz netblast for for intel Itanium PC
+ running Linux
+
+ netblast-2.2.8-mips-irix.tar.gz netblast for SGI 32-bits system
+
+ netblast-2.2.8-powerpc-macosx.tar.gz netblast for MacOSX
+
+ netblast-2.2.8-sparc-solaris.tar.gz netblast for Sun Sparc station
+ running Solaris
+
+ wwwblast-2.2.8-alpha-osf1.tar.gz wwwblast for COMPAQ/HP alpha
+ machine (OSF 5.1 and above)
+
+ wwwblast-2.2.8-amd64-linux.tar.gz wwwblast for AMD 64-bits PC
+ running Linux
+
+ wwwblast-2.2.8-ia32-freebsd.tar.gz wwwblast for Intel Pentium PC
+ running Linux
+
+ wwwblast-2.2.8-ia32-linux.tar.gz wwwblast for Intel Pentium PC
+ running Linux
+
+ wwwblast-2.2.8-ia64-linux.tar.gz wwwblast for Intel Itanium PC
+ running Linux
+
+ wwwblast-2.2.8-mips-irix.tar.gz wwwblast for SGI 32-bits system
+
+ wwwblast-2.2.8-powerpc-macosx.tar.gz wwwblast for MacOSX
+
+ wwwblast-2.2.8-sparc-solaris.tar.gz wwwblast for Sun Sparc station
+ running Solaris
++------------------------------------+-------------------------------+
+
+
+2.5.2 File content for ftp://ftp.ncbi.nlm.nih.gov/blast/executables/release
+subdirectory
+
+This directory contains past major releases of BLAST, as far back as
+version 2.0.10. Each release is in its own subdirectory.
+
+
+2.5.3 File content for ftp.ncbi.nlm.nih.gov/blast/executables/snapshot
+subdirectory
+
+This subdirectory contains intermediate enhanced or patched archives
+released after the last major release. They are organized according
+to the date and only contains the binaries for the affected platforms.
+
+
+2.5.4 File content for ftp.ncbi.nlm.nih.gov/blast/executables/special
+subdirectory
+
+From time to time, we make binaries for some rare platforms under
+special circumstances. Those files are archived here.
+
+
+2.6 File content ftp://ftp.ncbi.nlm.nih.gov/blast/matrices directory
+
+This directory contains the scoring matrices, which are files that can
+be used by BLAST alignment assessment. The file are text files with
+special format that can be viewed directly by a browser.
+
+For valid statistical analysis, blastn uses only identity matrix and
+blastp only supports a limited subset of the BLOSUM and PAM matrices:
+BLOSUM 45, 62, 80, plus PAM30 and 70.
+
+
+2.7 File content of the ftp://ftp.ncbi.nlm.nih.gov/blast/temp
+subdirectory
+
+An left-over subdirectory of miscellaneous files or tools.
+
+
+3. Techinical Support
+
+Additional questions/comments on this ftp site should be directed to
+NCBI blast-help group at:
+ blast-help@ncbi.nlm.nih.gov
+
+Other questions on general NCBI resources should be directed to:
+ info@ncbi.nlm.nih.gov
+</pre>
+ </body>
+</html>
+
diff --git a/doc/blast/blastpgp.html b/doc/blast/blastpgp.html
new file mode 100644
index 00000000..0440c990
--- /dev/null
+++ b/doc/blast/blastpgp.html
@@ -0,0 +1,520 @@
+<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN"
+ "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">
+
+<html xmlns="http://www.w3.org/1999/xhtml">
+ <head>
+ <meta name="generator"
+ content="HTML Tidy for Linux/x86 (vers 1st October 2002), see www.w3.org" />
+
+ <title></title>
+ </head>
+
+ <body>
+<pre>
+Blastpgp
+--------
+
+Blastpgp performs gapped blastp searches and can be used to perform
+iterative searches in psi-blast and phi-blast mode. See the PSI-Blast and
+PHI-BLAST sections (below) for a description of this binary. The options may be
+obtained by executing 'blastpgp -'.
+
+ -T Produce HTML output [T/F]
+ default = F
+
+ -Q Output File for PSI-BLAST Matrix in ASCII [File Out] Optional
+
+
+PSI-Blast
+---------
+
+The blastpgp program can do an iterative search in which
+sequences found in one round of searching are used to build
+a score model for the next round of searching. In this usage,
+the program is called Position-Specific Iterated BLAST, or PSI-BLAST.
+As explained in the accompanying paper, the BLAST algorithm is
+not tied to a specific score matrix. Traditionally, it has been
+implemented using an AxA substitution matrix where A is the alphabet size.
+PSI-BLAST instead uses a QxA matrix, where Q is the length of the query
+sequence; at each position the cost of a letter depends on the position
+w.r.t. the query and the letter in the subject sequence.
+
+The position-specific matrix for round i+1 is built from a constrained
+multiple alignment among the query and the sequences found with
+sufficiently low e-value in round i. The top part of the output for
+each round distinguishes the sequences into: sequences found
+previously and used in the score model, and sequences not used in the
+score model. The output currently includes lots of diagnostics
+requested by users at NCBI. To skip quickly from the output of
+one round to the next, search for the string "producing", which is
+part of the header for each round and likely does not appear elsewhere
+in the output. PSI-BLAST "converges" and stops if all sequences
+found at round i+1 below the e-value threshold were already in
+the model at the beginning of the round.
+
+There are several blastpgp parameters specifically for PSI-BLAST:
+-j is the maximum number of rounds (default 1; i.e., regular BLAST)
+-h is the e-value threshold for including sequences in the
+ score matrix model (default 0.001)
+-c is the "constant" used in the pseudocount formula specified in the
+ paper (default 10)
+
+The -C and -R flags provide a "checkpointing" facility whereby
+a score model can be stored and later reused.
+ -C stores the query and frequency count ratio matrix in a
+ file
+ -R restarts from a file stored previously.
+When using -R, it is required that the query specified on the command line
+match exactly the query in the restart file.
+The checkpoint files are stored in a byte-encoded (not human readable)
+format, so as to prevent roundoff error between writing and reading
+the checkpoint.
+Users who also develop their own sequence analysis software may wish
+to develop their own scoring systems. For this purpose the code
+in posit.c that writes out the checkpoint can be easily adapated to
+write out scoring systems derived by other algorithms in such
+a way that PSI-BLAST can read the files in later.
+The checkpoint structure is general in the sense that it can handle
+any position-specific matrix that fits in the Karlin-Altschul
+statistical framework for BLAST scoring.
+
+The -B flag provides a way to jump start PSI-BLAST from a master-slave
+multiple alignment computed outside PSI-BLAST. The multiple alignment
+must include the query sequence as one of the sequences, but it need
+not be the first sequence. The multiple alignment must be specified
+in a format that is derived from Clustal, but without some headers and
+trailers. See example below. The rules are also described by the
+following words. Suppose the multiple alignments has N sequences. It
+may be presented in 1 or more blocks, where each block presents a
+range of columns from the multiple alignment. E.g., the first block
+might have columns 1-60, the second block might have columns 61-95,
+the third block might have columns 96-128. Each block should have N
+rows, 1 row per sequence. The sequences should be in the same order
+in every block. Blocks are separated by 1 or more blank lines.
+Within a block there are no blank lines, and each line consists of 1
+sequence identifier followed by some white space followed by
+characters (and gaps) for that sequence in the multiple alignment. In
+each column, all letters must be in upper case, or all letters must be
+in lower case. Upper case means that this column is to be given
+position-specific scores. Lower-case means to use the underlying
+matrix (specified by -M) for this column; e.g., if the query sequence
+has an 'l' residue in the column, then the standard scores for
+matching an L are used in the column.
+
+A sample usage would be:
+
+ blastpgp -i seq1 -B align1 -j 2 -d nr
+
+where seq1 is the query
+ align1 is the alignment file
+ -j 2 indicates to do 2 rounds
+ -d nr indicates to use the nr database
+
+The example files
+ seq1
+ align1
+copied below were kindly supplied by L. Aravind from a paper
+he and Chris Ponting published in Protein Science:
+
+Aravind L, Ponting CP, Homologues of 26S proteasome subunits
+are regulators of transcription and translation, Protein Science
+7(1998) 1250-1254.
+
+L. Aravind (aravind@ncbi.nlm.nih.gov) was the first user
+and helped define how -B should work. Y. Wolf (wolf@ncbi.nlm.nih.gov)
+helped design a more flexible input format for the alignments.
+If you like how -B works, let them know.
+If you do not like how -B works, complain to
+A. Schaffer(schaffer@helix.nih.gov) who did the implementation.
+
+seq1
+----
+&gt; 26SPS9_Hs
+IHAAEEKDWKTAYSYFYEAFEGYDSIDSPKAITSLKYMLLCKIMLNTPEDVQALVSGKLALRYAGRQTEA
+LKCVAQASKNRSLADFEKALTDYRAELRDDPIISTHLAKLYDNLLEQNLIRVIEPFSRVQIEHISSLIKL
+SKADVERKLSQMILDKKFHGILDQGEGVLIIFDEPP
+
+
+align1
+------
+26SPS9_Hs IHAAEEKDWKTAYSYFYEAFEGYdsidspkaitslkymllckimlntpedvqalvsgklalryagrqtealkcvaqasknr
+F57B9_Ce LHAADEKDFKTAFSYFYEAFEGYdsvdekvsaltalkymllckvmldlpdevnsllsaklalkyngsdldamkaiaaaaqk
+YDL097c_Sc ILHCEDKDYKTAFSYFFESFESYhnltthnsyekacqvlkymllskimlnliddvknilnakytketyqsrgidamkavae
+YMJ5_Ce LYSAEERDYKTSFSYFYEAFEGFasigdkinatsalkymilckimlneteqlagllaakeivayqkspriiairsmadafr
+FUS6_ARATH KNYIRTRDYCTTTKHIIHMCMNAilvsiemgqfthvtsyvnkaeqnpetlepmvnaklrcasglahlelkkyklaarkfld
+COS41.8_Ci SLDYKLKTYLTIARLYLEDEDPVqaemyinrasllqnetadeqlqihykvcyarvldyrrkfleaaqrynelsyksaihet
+644879 KCYSRARDYCTSAKHVINMCLNVikvsvylqnwshvlsyvskaestpeiaeqrgerdsqtqailtklkcaaglaelaarky
+YPR108w_Sc IHCLAVRNFKEAAKLLVDSLATFtsieltsyesiatyasvtglftlertdlkskvidspellslisttaalqsissltisl
+eif-3p110_Hs SKAMKMGDWKTCHSFIINEKMNGkvw-------------------------------------------------------
+T23D8.4_Ce SKAMLNGDWKKCQDYIVNDKMNQkvw-------------------------------------------------------
+YD95_Sp IYLMSIRNFSGAADLLLDCMSTFsstellpyydvvryavisgaisldrvdvktkivdspevlavlpqnesmssleacinsl
+KIAA0107_Hs LYCVAIRDFKQAAELFLDTVSTFtsyelmdyktfvtytvyvsmialerpdlrekvikgaeilevlhslpavrqylfslyec
+F49C12.8_Hs LYRMSVRDFAGAADLFLEAVPTFgsyelmtyenlilytvitttfaldrpdlrtkvircnevqeqltggglngtlipvreyl
+Int-6_Mm KFQYECGNYSGAAEYLYFFRVLVpatdrnalsslwgklaseilmqnwdaamedltrlketidnnsvssplqslqqrtwlih
+
+26SPS9_Hs sladfekaltdy-----------------------------------------------------------------------------------
+F57B9_Ce rslkdfqvafgsf----------------------------------------------------------------------------------
+YDL097c_Sc aynnrslldfntalkqy------------------------------------------------------------------------------
+YMJ5_Ce krslkdfvkalaeh---------------------------------------------------------------------------------
+FUS6_ARATH vnpelgnsyneviapqdiatygglcalasfdrselkqkvidninfrnflelvpdvrelindfyssryascleylasl------------------
+COS41.8_Ci eqtkalekalncailapagqqrsrmlatlfkdercqllpsfgilekmfldriiksdemeefar--------------------------------
+644879 kqaakclllasfdhcdfpellspsnvaiygglcalatfdrqelqrnvissssfklflelepqvrdiifkfyeskyasclkmldem----------
+YPR108w_Sc yasdyasyfpyllety-------------------------------------------------------------------------------
+eif-3p110_Hs -----------------------------------------------------------------------------------------------
+T23D8.4_Ce -----------------------------------------------------------------------------------------------
+YD95_Sp ylcdysgffrtladve-------------------------------------------------------------------------------
+KIAA0107_Hs rysvffqslavv-----------------------------------------------------------------------------------
+F49C12.8_Hs esyydchydrffiqlaale----------------------------------------------------------------------------
+Int-6_Mm wslfvffnhpkgrdniidlflyqpqylnaiqtmcphilrylttavitnkdvrkrrqvlkdlvkviqqesytykdpitefveclyvnfdfdgaqkk
+
+26SPS9_Hs ----RAELRDDPIISTHLAKLYDNLLEQNLIRVIEPFSRVQIEHISSLIKLSKADVERKLSQMILDKKFHGILDQGEGVLIIFDEPP
+F57B9_Ce ----PQELQMDPVVRKHFHSLSERMLEKDLCRIIEPYSFVQIEHVAQQIGIDRSKVEKKLSQMILDQKLSGSLDQGEGMLIVFEIAV
+YDL097c_Sc ----EKELMGDELTRSHFNALYDTLLESNLCKIIEPFECVEISHISKIIGLDTQQVEGKLSQMILDKIFYGVLDQGNGWLYVYETPN
+YMJ5_Ce ----KIELVEDKVVAVHSQNLERNMLEKEISRVIEPYSEIELSYIARVIGMTVPPVERAIARMILDKKLMGSIDQHGDTVVVYPKAD
+FUS6_ARATH ----KSNLLLDIHLHDHVDTLYDQIRKKALIQYTLPFVSVDLSRMADAFKTSVSGLEKELEALITDNQIQARIDSHNKILYARHADQ
+COS41.8_Ci ----QLMPHQKAITADGSNILHRAVTEHNLLSASKLYNNIRFTELGALLEIPHQMAEKVASQMICESRMKGHIDQIDGIVFFERRET
+644879 ----KDNLLLDMYLAPHVRTLYTQIRNRALIQYFSPYVSADMHRMAAAFNTTVAALEDELTQLILEGLISARVDSHSKILYARDVDQ
+YPR108w_Sc ----ANVLIPCKYLNRHADFFVREMRRKVYAQLLESYKTLSLKSMASAFGVSVAFLDNDLGKFIPNKQLNCVIDRVNGIVETNRPDN
+eif-3p110_Hs ----DLFPEADKVRTMLVRKIQEESLRTYLFTYSSVYDSISMETLSDMFELDLPTVHSIISKMIINEELMASLDQPTQTVVMHRTEP
+T23D8.4_Ce ----NLFHNAETVKGMVVRRIQEESLRTYLLTYSTVYATVSLKKLADLFELSKKDVHSIISKMIIQEELSATLDEPTDCLIMHRVEP
+YD95_Sp ----VNHLKCDQFLVAHYRYYVREMRRRAYAQLLESYRALSIDSMAASFGVSVDYIDRDLASFIPDNKLNCVIDRVNGVVFTNRPDE
+KIAA0107_Hs ----EQEMKKDWLFAPHYRYYVREMRIHAYSQLLESYRSLTLGYMAEAFGVGVEFIDQELSRFIAAGRLHCKIDKVNEIVETNRPDS
+F49C12.8_Hs ----SERFKFDRYLSPHFNYYSRGMRHRAYEQFLTPYKTVRIDMMAKDFGVSRAFIDRELHRLIATGQLQCRIDAVNGVIEVNHRDS
+Int-6_Mm lrecESVLVNDFFLVACLEDFIENARLFIFETFCRIHQCISINMLADKLNMTPEEAERWIVNLIRNARLDAKIDSKLGHVVMGNNAV
+
+
+
+
+
+PHI-Blast
+---------
+
+PHI-BLAST (Pattern-Hit Initiated BLAST) is a search
+program that combines matching of regular expressions
+with local alignments surrounding the match.
+The most important features of the program have been
+incorporated into the BLAST software framework
+partly for user convenience and partly so that
+PHI-BLAST may be combined seamlessly with PSI-BLAST.
+Other features that do not fit into the BLAST framework
+will be released later as a separate program and/or
+separate Web page query options.
+
+One very restrictive way to identify protein motifs
+is by regular expressions that must contain each instance
+of the motif. The PROSITE database is a compilation of
+restricted regular expressions that describe protein motifs.
+Given a protein sequence S and a regular expression pattern P
+occurring in S, PHI-BLAST helps answer the question:
+What other protein sequences both contain an occurrence of P
+and are homologous to S in the vicinity of the pattern occurrences?
+PHI-BLAST may be preferable to just searching for pattern occurrences
+because it filters out those cases where the pattern occurrence is
+probably random and not indicative of homology.
+PHI-BLAST may be preferable to other flavors of BLAST because
+it is faster and because it allows the user to express
+a rigid pattern occurrence requirement.
+
+The pattern search methods in PHI-BLAST are based on the
+algorithms in:
+
+R. Baeza-Yates and G. Gonnet, Communications of the ACM 35(1992), pp. 74-82.
+S. Wu and U. Manber, Communications of the ACM 35(1992), pp. 83-91.
+
+The calculation of local alignments is done using a method
+very similar to (and much of the same code as) gapped BLAST.
+However, the method of evaluating statistical significance is different, and
+is described below.
+
+In the stand-alone mode the typical PHI-BLAST usage looks like:
+ blastpgp -i -k -p patseedp
+
+ where -i is followed by the file containing the query in FASTA format
+ where -k is followed by the file containing the pattern in a syntax given below
+ and "patseedp" indicates the mode of usage, not representing any file.
+
+The syntax for the query sequence is FASTA format as for all other
+BLAST queries. The syntax for patterns follows the rules of
+PROSITE and is documented in detail below.
+The specified pattern is not required to be in the PROSITE list.
+Most of the other BLAST flags can be used with PHI-BLAST.
+One important exception is that PHI-BLAST requires gapped
+alignments (i.e. forbids -g F in the flags) because ungapped
+alignments do not make sense for almost all patterns in PROSITE.
+
+There is a second mode of PHI-BLAST usage that is important when
+the specified pattern occurs more than 1 time in the query.
+In this case, the user may be interested in restricting the
+search for local alignments to a subset of the pattern occurrences.
+This can be done with a search that looks like:
+ blastpgp -i -k -p seedp
+
+in which case the use of the "seedp" option requires the user to
+specify the location(s) of the interesting pattern occurrence(s)
+in the pattern file. The syntax for how to specify pattern
+occurrences is below. When there are multiple pattern occurrences in the
+query it may be important to decide how many are of interest because
+the E-value for matches is effectively multiplied by the number
+of interesting pattern occurrences.
+
+The PHI-BLAST Web page supports only the "patseedp" option.
+
+PHI-BLAST is integrated with PSI-BLAST. In the command-line
+mode, PSI-BLAST can be invoked by using the -j option, as usual.
+When this is done as:
+ blastpgp -i -k -p patseedp -j
+
+then the first round of searching uses PHI-BLAST and all subsequent
+rounds use PSI-BLAST.
+In the Web page setting, the user must explicitly invoke one round
+at a time, and the PHI-BLAST Web page provides the option to
+initiate a PSI-BLAST round with the PHI-BLAST results.
+To describe a combined usage, use the term "PHI-PSI-BLAST"
+(Pattern-Hit Initiated, Position-Specific Iterated BLAST).
+
+Determining statistical significance.
+
+When a query sequence Q matches a database sequence D in PHI-BLAST,
+it is useful to subdivide Q and D into 3 disjoint pieces
+ Qleft Qpattern Qright
+ Dleft Dpattern Dright
+
+The substrings Qpattern and Dpattern contain the pattern specified
+in the pattern file. The pieces Qpattern and Dpattern are aligned
+and that alignment is displayed as part of the PHI-BLAST output,
+but the score for that alignment is mostly ignored.
+The "reduced" score r of an alignment is the sum of the scores obtained
+by aligning Qleft with Dleft and by aligning Qright with Dright.
+
+The expected number of alignments with a reduced score &gt;= x
+is given by:
+ CN(Lambda*x + 1)e^(-Lambda *x)
+where:
+
+C and Lambda are "constants" depending on the score matrix and the
+gap costs.
+N is (number of occurrences of pattern in database) * (number of
+ occurrences of pattern in Q)
+e is the base of the natural logarithm.
+
+It is important to understand that this method of computing
+the statistical significance of a PHI-BLAST alignment is mathematically
+different from the method used for BLAST and PSI-BLAST alignments.
+However, both methods provide E-values, so they the E_values are
+displayed with a similar output syntax.
+
+Rules for pattern syntax for PHI-BLAST.
+
+The syntax for patterns in PHI-BLAST follows the conventions
+of PROSITE. When using the stand-alone program, it
+is permissible to have multiple patterns in a file separated
+by a blank line between patterns. When using the Web-page
+only one pattern is allowed per query.
+
+Valid protein characters for PHI-BLAST patterns:
+ ABCDEFGHIKLMNPQRSTVWXYZU
+
+Valid DNA characters for PHI-BLAST patterns:
+ ACGT
+
+Other useful delimiters:
+ [ ] means any one of the characters enclosed in the brackets
+ e.g., [LFYT] means one occurrence of L or F or Y or T
+ - means nothing (this is a spacer character used by PROSITE)
+ x with nothing following means any residue
+ x(5) means 5 positions in which any residue is allowed (and similarly for any other
+ single number in parentheses after x)
+ x(2,4) means 2 to 4 positions where any residue is allowed,
+ and similarly for any other two numbers separated by a comma;
+ the first number should be &lt; the second number.
+ &gt; can occur only at the end of a pattern and means nothing
+ it may occur before a period
+ (another spacer used by PROSITE)
+
+ . may be used at the end of the pattern and means nothing
+
+When using the stand-alone program, the pattern should
+be in a file, with the first line starting:
+ ID
+followed by 2 spaces and a text string giving the pattern a name.
+
+There should also be a line starting
+ PA
+followed by 2 spaces followed by the pattern description.
+
+All other PROSITE codes in the first two columns are allowed,
+but only the HI code, described below is relevant to PHI-BLAST.
+
+Here is an example from PROSITE.
+
+ID CNMP_BINDING_2; PATTERN.
+AC PS00889;
+DT OCT-1993 (CREATED); OCT-1993 (DATA UPDATE); NOV-1995 (INFO UPDATE).
+DE Cyclic nucleotide-binding domain signature 2.
+PA [LIVMF]-G-E-x-[GAS]-[LIVM]-x(5,11)-R-[STAQ]-A-x-[LIVMA]-x-[STACV].
+NR /RELEASE=32,49340;
+NR /TOTAL=57(36); /POSITIVE=57(36); /UNKNOWN=0(0); /FALSE_POS=0(0);
+NR /FALSE_NEG=1; /PARTIAL=1;
+CC /TAXO-RANGE=??EP?; /MAX-REPEAT=2;
+
+The line starting
+ ID
+gives the pattern a name.
+The lines starting
+ AC, DT, DE, NR, NR, CC
+are relevant to PROSITE users, but irrelevant to PHI-BLAST.
+These lines are tolerated, but ignored by PHI-BLAST.
+
+The line starting
+ PA
+describes the pattern as:
+ one of LIVMF
+followed by
+ G
+followed by
+ E
+followed by
+ any single character
+followed by
+ one of GAS
+followed by
+ one of LIVM
+followed by
+ any 5 to 11 characters
+followed by
+ R
+followed by
+ one of STAQ
+followed by
+ A
+followed by
+ any single character
+followed by
+ one of LIVMA
+followed by
+ any single character
+followed by
+ one of STACV
+
+In this case the pattern ends with a period.
+It can end with nothing after the last specifying symbol
+or any number of &gt; signs or periods or combination thereof.
+
+Here is another example, illustrating the use of an HI line.
+
+ID ER_TARGET; PATTERN.
+PA [KRHQSA]-[DENQ]-E-L&gt;.
+HI (19 22)
+HI (201 204)
+
+In this example, the HI lines specify that the pattern
+occurs twice, once from positions 19 through 22 in the
+sequence and once from positions 201 through 204 in the
+sequence.
+These specifications are relevant when stand-alone PHI-BLAST is
+used with the
+ seedp
+option, in which the interesting occurrences of the pattern
+in the sequence are specified. In this case the
+HI lines specify which occurrence(s) of the pattern
+should be used to find good alignments.
+
+In general, the seedp option is more useful than the
+standard patternp option ONLY when the
+pattern occurs K &gt; 1 times in the sequence AND
+the user is interested in matching to J &lt; K of those
+occurrences.
+Then using the HI lines enables the user to specify which
+occurrences are of interest.
+
+Additional functionality related to PHI-BLAST.
+
+PHI-BLAST takes as input both a sequence and a query containing
+that sequence and searches a sequence database for
+other sequences containing the same pattern and having a good alignment.
+One may be interested in asking two related, simpler questions:
+
+1. Given a sequence and a database of patterns, which patterns occur
+in the sequence and where?
+
+2. Given a pattern and a sequence database, which sequences contain the
+pattern and where?
+
+These queries can be answered wih software closely related to PHI-BLAST,
+but they do not fit into the output framework of BLAST because the
+answers are simple lists without alignments and with no notion of
+statistical significance.
+
+The NCBI toolbox includes another program, currently called
+ seedtop
+to answer the two queries above.
+
+Query 1 can be asked with:
+ seedtop -i -k -p patmatchp
+
+Query 2 can be asked with:
+ seedtop -d -k -p patternp
+
+The -k argument is used similarly in all queries and the file
+format is always the same. The standard pattern database is
+PROSITE, but others (or a subset) can be used.
+There are plans afoot to offer the patmatchp query (number 1) on
+the PHI-BLAST web page or in its vicinity, but this would
+be restricted to having PROSITE as the pattern database.
+
+ Documentation for PSI-TBLASTN
+
+PSI-BLASTN is a variant of blastall that searches a protein query
+sequence against a nucleotide sequence database using a position
+specific matrix created by PSI-BLAST. The nucleotide sequence database
+is dynamically translated in all reading frames during PSI-TBLASTN
+search. Using a position specific matrix may enable finding more
+distantly related sequences.
+
+Programs:
+blastpgp [takes a protein query and perform PSI-BLAST search to
+ creates a position specific matrix using a protein
+ database]
+
+blastall [reads position specific matrix and performs PSI-TBLASTN
+ search]
+
+Usage:
+A user would typically run blastpgp to create and save a position
+specific matrix, followed by a run of blastall for PSI-TBLASTN search.
+
+blastpgp must be executed with -C option followed by a file name to
+save position specific score matrix.
+
+blastall with "-p psitblastn" option executes PSI-TBLASTSN search, and
+-R option followed by a file name specifying the file that contains
+position specific score matrix. All other options that apply when
+using "blastall -p tblastn ..." also apply when using "blastall -p
+psitblastn ...", but there are some restrictions to parameters: 1) The
+query must be the same as the one used in blastpgp for creating a
+position specific matrix. 2) By default, blastpgp has filtering off
+(-F F) and blastall has filtering on (-F T). To ensure consistent
+usage of the blastpgp/psitblastn combination, the -F option should be
+explicitly set in one or the other run.
+
+
+Example:
+One may run PSI-BLST to create and save a position specific score matrix
+as follows:
+
+ blastpgp -d nr -i ff.chd -j 2 -C ff.chd.ckp
+
+Position specific score matrix is saved in ff.chd.ckp. Then, using
+this matrix, one may run PSI-TBLASTN search:
+
+ blastall -i ff.chd -d yeast -p psitblastn -R ff.chd.ckp
+
+Note that this allows the score matrix to be constructed using one
+database (nr in the example) and then used to search a second database
+(yeast in the example). Even if the two database names are the same,
+blastpgp uses the protein version while "blastall -p psitblastn" uses
+the DNA version.
+</pre>
+ </body>
+</html>
+
diff --git a/doc/blast/fastacmd.html b/doc/blast/fastacmd.html
index c99edd8e..8746074d 100644
--- a/doc/blast/fastacmd.html
+++ b/doc/blast/fastacmd.html
@@ -1,3 +1,16 @@
+<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN"
+ "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">
+
+<html xmlns="http://www.w3.org/1999/xhtml">
+ <head>
+ <meta name="generator"
+ content="HTML Tidy for Linux/x86 (vers 1st October 2002), see www.w3.org" />
+
+ <title></title>
+ </head>
+
+ <body>
+<pre>
fastacmd README
===============
Last updated: 04/09/2003
@@ -73,7 +86,7 @@ Usage
1.) Retrieving a sequence by gi:
fastacmd -d nt -s 555
->gi|555|emb|X65215.1|BTMISATN B.taurus microsatellite DNA (624bp)
+&gt;gi|555|emb|X65215.1|BTMISATN B.taurus microsatellite DNA (624bp)
ACCTCCACTAGCTTTGTTTGTAGTGATGCTCTGTAGCACCACTGGGAAGCCCTTTAATGAATGTGCCTTTCCGCAAATCA
CACACACACAAATACACTTATAGAAACAAGGTGATTTTCTTGAAATAATAAAACAAAATTTGGAAGAAGATTTTTACTGT
CTTAGGAAAAGTAAGGCATTGGAAGGTGGCTAGGTATGACATATGAAGTTGCATTTTAAAACTGGAATTGGACAACTGAT
@@ -159,3 +172,7 @@ fastacmd -d patnt -s 412262 -T
ftp://ftp.ncbi.nih.gov/blast/db/taxdb.tar.gz
Download the required files and install them as described above.
+</pre>
+ </body>
+</html>
+
diff --git a/doc/blast/filter.html b/doc/blast/filter.html
new file mode 100644
index 00000000..bbfd4e08
--- /dev/null
+++ b/doc/blast/filter.html
@@ -0,0 +1,74 @@
+<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN"
+ "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">
+
+<html xmlns="http://www.w3.org/1999/xhtml">
+ <head>
+ <meta name="generator"
+ content="HTML Tidy for Linux/x86 (vers 1st October 2002), see www.w3.org" />
+
+ <title></title>
+ </head>
+
+ <body>
+<pre>
+Filtering Strings
+-----------------
+
+ The -F argument can take a string as input specifying that seg should be
+ run with certain values or that other non-standard filters should be used.
+ This sections describes this syntax.
+
+ The seg options can be changed by using:
+
+ -F "S 10 1.0 1.5"
+
+ which specifies a window of 10, locut of 1.0 and hicut of 1.5.
+
+ A coiled-coiled filter, based on the work of Lupas et al. (Science, vol 252, pp. 1162-4 (1991))
+ and written by John Kuzio (Wilson et al., J Gen Virol, vol. 76, pp. 2923-32 (1995)), may be invoked
+ by specifying:
+
+ -F "C"
+
+ There are three parameters for this: window, cutoff (prob of a coil-coil), and
+ linker (distance between two coiled-coiled regions that should be linked
+ together). These are now set to
+
+ window: 22
+ cutoff: 40.0
+ linker: 32
+
+ One may also change the coiled-coiled parameters in a manner analogous to
+ that of seg:
+
+ -F "C 28 40.0 32" will change the window to 28.
+
+ One may also run both seg and coiled-coiled together by using a ";":
+
+ -F "C;S"
+
+ Filtering by dust may also be specified by:
+
+ -F "D"
+
+ It is possible to specify that the masking should only be done during
+ the process of building the initial words by starting the filtering
+ command with 'm', e.g.:
+
+ -F "m S"
+
+ which specifies that seg (with default arguments) should be used for masking,
+ but that the masking should only be done when the words are being built.
+ This masking option is available with all filters.
+
+ If the -U option (to mask any lower-case sequence in the input FASTA file) is used and
+ one does not wish any other filtering, but does wish to mask when building the lookup tables
+ then one should specify:
+
+ -F "m"
+
+ This is the only case where "m" should be specified alone.
+</pre>
+ </body>
+</html>
+
diff --git a/doc/blast/formatdb.html b/doc/blast/formatdb.html
index 75beccdb..28f0d4ee 100644
--- a/doc/blast/formatdb.html
+++ b/doc/blast/formatdb.html
@@ -1,3 +1,16 @@
+<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN"
+ "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">
+
+<html xmlns="http://www.w3.org/1999/xhtml">
+ <head>
+ <meta name="generator"
+ content="HTML Tidy for Linux/x86 (vers 1st October 2002), see www.w3.org" />
+
+ <title></title>
+ </head>
+
+ <body>
+<pre>
Formatdb README
------------------
@@ -117,7 +130,7 @@ formatdb 2.2.5 arguments:
-v Database volume size in millions of letters [Integer] Optional
default = 0
- range from 0 to <NULL>
+ range from 0 to &lt;NULL&gt;
This option breaks up large FASTA files into 'volumes' (each
with a maximum size of 2 billion letters). As part of the
@@ -207,7 +220,7 @@ TotalNum = 4 ; total number of bits used so far
; These are the paths to the files containing the
; gi's whose links will be modified. The format is
-; <link_type> = <file_path>
+; &lt;link_type&gt; = &lt;file_path&gt;
; where link_type is one of the types of links defined in the LinkBitNumbers
; section.
[LinkFiles]
@@ -229,7 +242,7 @@ are in this parseable format, formatdb produces additional indices
allowing retrieval from the databases by identifier. The databases on
the NCBI FTP site contain parseable identifiers. It is sufficient if
the first word on the FASTA definition line is a unique identifier
-(e.g., ">3091 Alcoho de..."). It is necessary to use parseable
+(e.g., "&gt;3091 Alcoho de..."). It is necessary to use parseable
identifiers for the following cases:
1.) ASN.1 is to be produced from blastall or blastpgp, then "-o" must
@@ -710,7 +723,7 @@ The formatdb index files involving deflines are small relative to the
source database due to entries such as the one below in which the
defline is much shorter than the sequence.
->gi|5819095|ref|NC_001321.1| Balaenoptera physalus mitochondrion, complete genome
+&gt;gi|5819095|ref|NC_001321.1| Balaenoptera physalus mitochondrion, complete genome
GTTAATTACTAATCAGCCCATGATCATAACATAACTGAGGTTTCATACATTTGGTATTTTTTTATTTTTTTTGGGGGGCT
TGCACGGACTCCCCTATGACCCTAAAGGGTCTCGTCGCAGTCAGATAAATTGTAGCTGGGCCTGGATGTATTTGTTATTT
GACTAGCACAACCAACATGTGCAGTTAAATTAATGGTTACAGGACATAGTACTCCACTATTCCCCCCGGGCTCAAAAAAC
@@ -747,7 +760,7 @@ The formatdb index files involving deflines are large relative to the
source database due to entries such as the one below in which the
defline is much longer than the sequence.
->gi|229659|pdb|1AAP|A Chain A, Protease Inhibitor Domain Of Alzheimer's
+&gt;gi|229659|pdb|1AAP|A Chain A, Protease Inhibitor Domain Of Alzheimer's
Amyloid Beta-Protein Precursor (APPI)gi|229660|pdb|1AAP|B Chain B,
Protease Inhibitor Domain Of Alzheimer's Amyloid Beta-Protein Precursor
(APPI)
@@ -761,3 +774,7 @@ of the the NCBI toolkit (ftp://ftp.ncbi.nih.gov/toolbox/ncbi_tools/),
readdb.h contains a list of supported function calls.
Last updated January 30 2004
+</pre>
+ </body>
+</html>
+
diff --git a/doc/blast/impala.html b/doc/blast/impala.html
index 1d3d5cde..9fb04b41 100644
--- a/doc/blast/impala.html
+++ b/doc/blast/impala.html
@@ -1,3 +1,16 @@
+<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN"
+ "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">
+
+<html xmlns="http://www.w3.org/1999/xhtml">
+ <head>
+ <meta name="generator"
+ content="HTML Tidy for Linux/x86 (vers 1st October 2002), see www.w3.org" />
+
+ <title></title>
+ </head>
+
+ <body>
+<pre>
IMPALA: Integrating Matrix Profiles And Local Alignments
1. Files in Distribution
@@ -23,15 +36,15 @@ make impala
This will result in three binary executable files:
-makemat : primary profile preprocessor
+makemat : primary profile preprocessor
(converts a collection of binary profiles, created by the -C option
of PSI-BLAST, into portable ASCII form);
-copymat : secondary profile preprocessor
+copymat : secondary profile preprocessor
(converts ASCII matrices, produced by the primary preprocessor,
into database that can be read into memory quickly);
-impala : search program (searches a database of score
+impala : search program (searches a database of score
matrices, prepared by copymat, producing BLAST-like output).
3. Conversion of profiles into searchable database
@@ -40,34 +53,34 @@ impala : search program (searches a database of score
Prepare the following files:
-i. a collection of PSI-BLAST-generated profiles with arbitrary
+i. a collection of PSI-BLAST-generated profiles with arbitrary
names and suffix .chk;
-ii. a collection of "profile master sequences", associated with
+ii. a collection of "profile master sequences", associated with
the profiles, each in a separate file with arbitrary name and a 3 character
suffix starting with c;
the sequences can have deflines; they need not be sequences in nr or
in any other sequence database; if the sequences have deflines, then
the deflines must be unique.
-iii. a list of profile file names, one per line, named
- <database_name>.pn;
+iii. a list of profile file names, one per line, named
+ &lt;database_name&gt;.pn;
-iv. a list of master sequence file names, one per line, in the same
+iv. a list of master sequence file names, one per line, in the same
order as a list of profile names, named
- <database_name>.sn;
+ &lt;database_name&gt;.sn;
The following files will be created:
-i. a collection of ASCII files, corresponding to each of the
+i. a collection of ASCII files, corresponding to each of the
original profiles, named
- <profile_name>.mtx;
+ &lt;profile_name&gt;.mtx;
-ii. a list of ASCII matrix files, named
- <database_name>.mn;
+ii. a list of ASCII matrix files, named
+ &lt;database_name&gt;.mn;
-iii. ASCII file with auxiliary information, named
- <database_name>.aux;
+iii. ASCII file with auxiliary information, named
+ &lt;database_name&gt;.aux;
Arguments to makemat:
@@ -100,25 +113,25 @@ in to makemat are propagated to copymat and impala.
Prepare the following files:
-i. a collection of ASCII files, corresponding to each of the
+i. a collection of ASCII files, corresponding to each of the
original profiles, named
- <profile_name>.mtx
+ &lt;profile_name&gt;.mtx
(created by makemat);
-ii. a collection of "profile master sequences", associated with
+ii. a collection of "profile master sequences", associated with
the profiles, each in a separate file with arbitrary name and a 3 character
suffix starting with c.
-iii. a list of ASCII_matrix files, named
- <database_name>.mn
+iii. a list of ASCII_matrix files, named
+ &lt;database_name&gt;.mn
(created by makemat);
-iv. a list of master sequence file names, one per
+iv. a list of master sequence file names, one per
line, in the same order as a list of matrix names, named
- <database_name>.sn;
+ &lt;database_name&gt;.sn;
-v. ASCII file with auxiliary information, named
- <database_name>.aux
+v. ASCII file with auxiliary information, named
+ &lt;database_name&gt;.aux
(created by makemat);
The files input to copymatices are in ASCII format and thus portable
@@ -126,8 +139,8 @@ between machines with different encodings for machine-readable files
The following files will be created:
-i. a huge binary file, containing all profile matrices, named
- <database_name>.mat;
+i. a huge binary file, containing all profile matrices, named
+ &lt;database_name&gt;.mat;
Arguments to copymat
@@ -204,9 +217,7 @@ Against a Collection of PSI-BLAST-Constructed Position-Specific
Score Matrices, Bioninformatics, to appear.
Please cite the above paper if you publish any results computed by IMPALA.
-
-
-
-
-
+</pre>
+ </body>
+</html>
diff --git a/doc/blast/index.html b/doc/blast/index.html
new file mode 100644
index 00000000..3a2af281
--- /dev/null
+++ b/doc/blast/index.html
@@ -0,0 +1,66 @@
+<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN">
+<html>
+<head>
+</head>
+<body>
+<span style="text-decoration: underline;">Downloads</span><br>
+<ul>
+ <li><a
+ href="ftp://ftp.ncbi.nlm.nih.gov/blast/executables/LATEST-BLAST/">Standalone
+utilities</a></li>
+ <li><a href="ftp://ftp.ncbi.nlm.nih.gov/blast/db/">BLAST databases</a></li>
+ <li><a href="ftp://ftp.ncbi.nlm.nih.gov/pub/mmdb/cdd/">RPSBLAST
+databases</a></li>
+</ul>
+<span style="text-decoration: underline;">Search utilities</span><br>
+<ul>
+ <li>&nbsp;&nbsp;&nbsp; <a href="bl2seq.html">bl2seq</a> - given two
+sequences in FASTA format, find regions of local similarity</li>
+ <li>&nbsp;&nbsp;&nbsp; <a href="blastall.html">blastall</a> - given
+sequences in FASTA format, find similar sequences in a BLAST database</li>
+ <li>&nbsp;&nbsp;&nbsp; <a href="megablast.html">megablast</a> -
+compare highly similar nucleotide sequences</li>
+ <li>&nbsp;&nbsp;&nbsp; <a href="blastpgp.html">blastpgp</a> - create
+protein profiles with iterative and regular expression BLAST</li>
+</ul>
+<span style="text-decoration: underline;">Sequence manipulation
+utilities</span><br>
+<ul>
+ <li>&nbsp;&nbsp;&nbsp; <a href="fastacmd.html">fastacmd</a> -
+retrieve FASTA sequences from BLAST databases</li>
+ <li>&nbsp;&nbsp;&nbsp; <a href="formatdb.html">formatdb</a> - create
+BLAST databases from FASTA sequences</li>
+</ul>
+<span style="text-decoration: underline;">Advanced utilities</span><br>
+<ul>
+ <li>&nbsp;&nbsp;&nbsp; <a href="blastclust.html">blastclust</a> -
+automatic sequence clustering</li>
+ <li>&nbsp;&nbsp;&nbsp; <a href="makemat.html">makemat</a> / <a
+ href="copymat.html">copymat</a> - convert a set of profiles into an
+RPSBLAST database</li>
+ <li>&nbsp;&nbsp;&nbsp; <a href="rpsblast.html">rpsblast</a> - search
+an RPSBLAST database using the BLAST algorithm</li>
+ <li>&nbsp;&nbsp;&nbsp; <a href="impala.html">impala</a> - search an
+RPSBLAST database using the Smith-Waterman algorithm (deprecated)</li>
+</ul>
+<br>
+<span style="text-decoration: underline;">Glossary</span><br>
+<ul>
+ <li>&nbsp;&nbsp;&nbsp; profile - A position-specific scoring matrix
+(PSSM)
+created by position-specific iterated BLAST (psiblast).</li>
+ <li>&nbsp;&nbsp;&nbsp; FASTA - Originally a <a
+ href="http://www.ncbi.nlm.nih.gov/entrez/query.fcgi?db=PubMed&amp;cmd=Retrieve&amp;list_uids=2983426&amp;dopt=Abstract">protein
+alignment algorithm</a>, in this context FASTA refers to an
+uncompressed text representation of nucleotide or protein sequence data.</li>
+ <li>&nbsp;&nbsp;&nbsp; BLAST database - A set of protein or
+nucleotide
+sequences in a format suitable for efficient searching.</li>
+ <li>&nbsp;&nbsp;&nbsp; RPSBLAST database - A set of profiles in a
+format
+suitable for efficient searching.</li>
+</ul>
+<br>
+<br>
+</body>
+</html>
diff --git a/doc/blast/megablast.html b/doc/blast/megablast.html
index 0b61ca77..90a26287 100644
--- a/doc/blast/megablast.html
+++ b/doc/blast/megablast.html
@@ -1,3 +1,16 @@
+<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN"
+ "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">
+
+<html xmlns="http://www.w3.org/1999/xhtml">
+ <head>
+ <meta name="generator"
+ content="HTML Tidy for Linux/x86 (vers 1st October 2002), see www.w3.org" />
+
+ <title></title>
+ </head>
+
+ <body>
+<pre>
README for standalone MEGABLAST
(last updated 10/20/2000)
@@ -169,10 +182,10 @@ either specific to Mega BLAST or having different meaning:
Seq-annot ::= {
All hits for first query
- }
+ }
Seq-annot ::= {
All hits for second query
- }
+ }
etc.
-----------------------------
@@ -262,3 +275,7 @@ either specific to Mega BLAST or having different meaning:
-H Maximal number of HSPs to save per database sequence.
-----------------------------
+</pre>
+ </body>
+</html>
+
diff --git a/doc/blast/netblast.html b/doc/blast/netblast.html
index 6c237e6f..96797646 100644
--- a/doc/blast/netblast.html
+++ b/doc/blast/netblast.html
@@ -1,3 +1,16 @@
+<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN"
+ "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">
+
+<html xmlns="http://www.w3.org/1999/xhtml">
+ <head>
+ <meta name="generator"
+ content="HTML Tidy for Linux/x86 (vers 1st October 2002), see www.w3.org" />
+
+ <title></title>
+ </head>
+
+ <body>
+<pre>
README for netblast (network-client BLAST)
Last updated 2/23/04
@@ -208,8 +221,8 @@ firewall configuration:
IP address Port
------------- ----
130.14.29.112 5861
-130.14.29.112 5862
-130.14.29.112 5863
+130.14.29.112 5862
+130.14.29.112 5863
This ensures that the appropriate path is open in the firewall through
which blastcl3 can make needed connections to NCBI server.
@@ -241,11 +254,11 @@ characters per line. NOTE the file should be in text format. If WORD
or other word processing program is used, make sure the file is saved
as text.
->query_sequence_1
+&gt;query_sequence_1
MNTIRNSICLTIITMVLCGFLFPLAITLIGQIFFYQQANGSLITYDNRIVGSKLIGQHWTETRYFHGRPS
AVDYNMNPEKLYKNGVSSGGSNESNGNTELIARMKHHVKFGNSNVTIDAATSSGSGLDPHITVENALKQA
PRIADARHISTSRVADLIQHRKQRGVLTNDYVNVLELNIALDKMKD
->query_sequence_2
+&gt;query_sequence_2
MAQPGPAPQPDVSLQQRVAELEKINAEFLRAQQQLEQEFNQKRAKFKELYLAKEEDLKRQNAVLQAAQDD
LGHLRTQLWEAQAEMENIKAIATVSENTKQEAIDEVKRQWREEVASLQAIMKETVRDYEHQFHLRLEQER
AQWAQYRESAEREIADLRRRLSEGQEEENLENEMKKAQEDAEKLRSVVMPMEKEIAALKDKLTEAEDKIK
@@ -1253,6 +1266,7 @@ addressed to:
Quesiton and comments on NCBI resources other than BLAST, should be
addressed to:
info@ncbi.nlm.nih.gov
-
-
+</pre>
+ </body>
+</html>
diff --git a/doc/blast/rpsblast.html b/doc/blast/rpsblast.html
index 9483b54f..e1af0249 100644
--- a/doc/blast/rpsblast.html
+++ b/doc/blast/rpsblast.html
@@ -1,3 +1,16 @@
+<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN"
+ "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">
+
+<html xmlns="http://www.w3.org/1999/xhtml">
+ <head>
+ <meta name="generator"
+ content="HTML Tidy for Linux/x86 (vers 1st October 2002), see www.w3.org" />
+
+ <title></title>
+ </head>
+
+ <body>
+<pre>
RPS Blast: Reversed Position Specific Blast
@@ -39,11 +52,11 @@ and all of the IMPALA code for evaluating the statistical significance of a matc
The following binary files are used to setup and run RPS Blast:
-makemat : primary profile preprocessor
+makemat : primary profile preprocessor
(converts a collection of binary profiles, created by the -C option
of PSI-BLAST, into portable ASCII form);
-copymat : secondary profile preprocessor
+copymat : secondary profile preprocessor
(converts ASCII matrices, produced by the primary preprocessor,
into database that can be read into memory quickly);
@@ -61,34 +74,34 @@ another source you should skip the steps listed in 2.1.
Prepare the following files:
-i. a collection of PSI-BLAST-generated profiles with arbitrary
+i. a collection of PSI-BLAST-generated profiles with arbitrary
names and suffix .chk;
-ii. a collection of "profile master sequences", associated with
+ii. a collection of "profile master sequences", associated with
the profiles, each in a separate file with arbitrary name and a 3 character
suffix starting with c;
the sequences can have deflines; they need not be sequences in nr or
in any other sequence database; if the sequences have deflines, then
the deflines must be unique.
-iii. a list of profile file names, one per line, named
- <database_name>.pn;
+iii. a list of profile file names, one per line, named
+ &lt;database_name&gt;.pn;
-iv. a list of master sequence file names, one per line, in the same
+iv. a list of master sequence file names, one per line, in the same
order as a list of profile names, named
- <database_name>.sn;
+ &lt;database_name&gt;.sn;
The following files will be created:
-a. a collection of ASCII files, corresponding to each of the
+a. a collection of ASCII files, corresponding to each of the
original profiles, named
- <profile_name>.mtx;
+ &lt;profile_name&gt;.mtx;
-b. a list of ASCII matrix files, named
- <database_name>.mn;
+b. a list of ASCII matrix files, named
+ &lt;database_name&gt;.mn;
-c. ASCII file with auxiliary information, named
- <database_name>.aux;
+c. ASCII file with auxiliary information, named
+ &lt;database_name&gt;.aux;
Arguments to makemat:
@@ -118,31 +131,31 @@ were actually used in making the checkpoints. However, the values fed
in to makemat are propagated to copymat and rpsblast.
ATTENTION: It is strongly recommended to use -S 1 - the scaling factor
- should be set to 1 for rpsblast at this point in time.
+ should be set to 1 for rpsblast at this point in time.
2.2. Secondary preprocessing
Prepare the following files:
-i. a collection of ASCII files, corresponding to each of the
+i. a collection of ASCII files, corresponding to each of the
original profiles, named
- <profile_name>.mtx
+ &lt;profile_name&gt;.mtx
(created by makemat);
-ii. a collection of "profile master sequences", associated with
+ii. a collection of "profile master sequences", associated with
the profiles, each in a separate file with arbitrary name and a 3 character
suffix starting with c.
-iii. a list of ASCII_matrix files, named
- <database_name>.mn
+iii. a list of ASCII_matrix files, named
+ &lt;database_name&gt;.mn
(created by makemat);
-iv. a list of master sequence file names, one per
+iv. a list of master sequence file names, one per
line, in the same order as a list of matrix names, named
- <database_name>.sn;
+ &lt;database_name&gt;.sn;
-v. ASCII file with auxiliary information, named
- <database_name>.aux
+v. ASCII file with auxiliary information, named
+ &lt;database_name&gt;.aux
(created by makemat);
The files input to copymatices are in ASCII format and thus portable
@@ -150,12 +163,12 @@ between machines with different encodings for machine-readable files
The following files will be created:
-a. a huge binary file, containing all profile matrices, named
- <database_name>.rps;
+a. a huge binary file, containing all profile matrices, named
+ &lt;database_name&gt;.rps;
b. a huge binary file, containing lookup table for the Blast search
- corresponding to matrixes named <database_name>.loo
+ corresponding to matrixes named &lt;database_name&gt;.loo
c. File containing concatenation of all FASTA "profile master sequences".
- named <database_name> (without extention)
+ named &lt;database_name&gt; (without extention)
Arguments to copymat
@@ -171,13 +184,13 @@ the the lookup table in memory before writing it to disk. Users have
found that they require a machine with at least 500 Meg of memory for this
task.
-2.3 Creating of BLAST database from <database_name> file containing
+2.3 Creating of BLAST database from &lt;database_name&gt; file containing
all "profile master sequences".
"formatdb" program should be run to create regular BLAST database of all
"profile master sequences":
- formatdb -i <database_name> -o T
+ formatdb -i &lt;database_name&gt; -o T
3. Search
@@ -205,16 +218,6 @@ APPENDIX:
A. Documentation of the .mtx file format
-I have been unable to find any formal documentation. I asked around
-in the BLAST group and discovered that Jason Papadopoulos was also
-unable to find any, but had reverse engineered the format from code
-that reads these files. Based on his descriptions, I have produced
-the following writeup. Comments and corrections are welcome.
-
-Kevin Bealer
-
------
-
Format of the .mtx file:
L = Length of SEQ
@@ -236,46 +239,36 @@ length.
Using the symbols mentioned above, it looks something like this:
-<L>
-<SEQ>
-<ka1-1>
-<ka1-2>
-<ka1-3>
-<ka1-4>
-<ka2-1>
-<ka2-2>
-<ka2-3>
-<ka2-4>
-<ka3-1>
-<ka3-2>
-<ka3-3>
-<ka3-4>
-<p1-1> <p1-2> <p1-3> ... <p1-26>
-<p2-1> <p2-2> <p2-3> ... <p2-26>
+&lt;L&gt;
+&lt;SEQ&gt;
+&lt;ka1-1&gt;
+&lt;ka1-2&gt;
+&lt;ka1-3&gt;
+&lt;ka1-4&gt;
+&lt;ka2-1&gt;
+&lt;ka2-2&gt;
+&lt;ka2-3&gt;
+&lt;ka2-4&gt;
+&lt;ka3-1&gt;
+&lt;ka3-2&gt;
+&lt;ka3-3&gt;
+&lt;ka3-4&gt;
+&lt;p1-1&gt; &lt;p1-2&gt; &lt;p1-3&gt; ... &lt;p1-26&gt;
+&lt;p2-1&gt; &lt;p2-2&gt; &lt;p2-3&gt; ... &lt;p2-26&gt;
...
-<pL-1> <pL-2> <pL-3> ... <pL-26>
-
+&lt;pL-1&gt; &lt;pL-2&gt; &lt;pL-3&gt; ... &lt;pL-26&gt;
-This description is based on RPS blast code, so I don't have a full
-understanding of the information. I am not sure of the exact use of
-the KA blocks, but Aron Marchler-Bauer had this to say:
-[You can find the explanation for the three blocks of KA-parameters in
+One can find the explanation for the three blocks of KA-parameters in
makemat's source code, lines 188-190:
- putMatrixKbp(checkFile, compactSearch->kbp_gap_std[0], scaleScores, 1/scalingFactor);
- putMatrixKbp(checkFile, compactSearch->kbp_gap_psi[0], scaleScores, 1/scalingFactor);
- putMatrixKbp(checkFile, sbp->kbp_ideal, scaleScores, 1/scalingFactor);
-
-- Aron]
+ putMatrixKbp(checkFile, compactSearch-&gt;kbp_gap_std[0], scaleScores, 1/scalingFactor);
+ putMatrixKbp(checkFile, compactSearch-&gt;kbp_gap_psi[0], scaleScores, 1/scalingFactor);
+ putMatrixKbp(checkFile, sbp-&gt;kbp_ideal, scaleScores, 1/scalingFactor);
Thus, the first KA block is the standard score, the second is for
-PSI-Blast, and the third is the ideal score. I'm not sure exactly
-what is connoted by "standard" vs. "ideal".
-
-Aron also suggested that this format may be on its way out, but no
-definite time frame was mentioned.
-
--- March 3, 2004
--- Kevin Bealer
+PSI-Blast, and the third is the ideal score.
+</pre>
+ </body>
+</html>
diff --git a/doc/fwd_check.sh b/doc/fwd_check.sh
index 8dc7480e..0ec2dbc2 100755
--- a/doc/fwd_check.sh
+++ b/doc/fwd_check.sh
@@ -1,5 +1,5 @@
#! /bin/sh
-# $Id: fwd_check.sh,v 1.15 2004/01/16 19:03:45 lavr Exp $
+# $Id: fwd_check.sh,v 1.19 2004/06/07 19:36:33 lavr Exp $
# Author: Denis Vakatov (vakatov@ncbi,nlm.nih.gov)
# Modified: Anton Lavrentiev (lavr@ncbi.nlm.nih.gov)
#
@@ -22,13 +22,13 @@ cat <<EOF
;130.14.22.30 5810 RETIRED
130.14.22.31 5812 RETIRED
130.14.22.32 5811 RETIRED
-130.14.22.12 5845 INTERNAL
-130.14.29.112 5860 RESERVED
+;130.14.22.12 5845 RETIRED
+130.14.25.13 5860 INTERNAL
130.14.29.112 5861 OK
130.14.29.112 5862 OK
130.14.29.112 5863 OK
-130.14.29.112 5864 RESERVED
-130.14.29.112 5865 RESERVED
+130.14.29.112 5864 OK
+130.14.29.112 5865 OK
130.14.29.112 5866 RESERVED
130.14.29.112 5867 RESERVED
130.14.29.112 5868 RESERVED
@@ -49,7 +49,7 @@ while read x_host x_port x_status ; do
guard=$!
wait $pid >/dev/null 2>&1
kill $guard >/dev/null 2>&1
- grep -s 'NCBI Firewall Daemon: Invalid ticket\. Connection closed\.' /tmp/$$ >/dev/null 2>&1
+ grep -s 'NCBI Firewall Daemon: Invalid ticket\. *Connection closed\.' /tmp/$$ >/dev/null 2>&1
if test $? -eq 0 ; then
echo "${x_host}:${x_port} ${x_status}"
else
diff --git a/doc/sequin.htm b/doc/sequin.htm
index 41267466..41bbdd1c 100644
--- a/doc/sequin.htm
+++ b/doc/sequin.htm
@@ -4,8 +4,9 @@
<meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1">
<!-- if you use the following meta tags, uncomment them.
- <META NAME="keywords" CONTENT="Sequin">
- <META NAME="description" CONTENT="Sequin is a stand-alone software tool developed by the NCBI for submitting and updating entries to the GenBank, EMBL, or DDBJ sequence databases. ">
+ <meta name="author" content="sequindoc">
+ <META NAME="keywords" CONTENT="national center for biotechnology information, ncbi, national library of medicine, nlm, national institutes of health, nih, database, archive, bookshelf, pubmed, pubmed central, bioinformatics, biomedicine, sequence submission, sequin, bankit, submitting sequences, quick guide, format">
+ <META NAME="description" CONTENT="Sequin is a stand-alone software tool developed by the National Center for Biotechnology Information (NCBI) for submitting and updating entries to the GenBank, EMBL, or DDBJ sequence databases. ">
-->
<link rel="stylesheet" href="images/ncbi_sequin.css">
diff --git a/doc/tbl2asn.txt b/doc/tbl2asn.txt
index cf11792e..b6f71422 100755
--- a/doc/tbl2asn.txt
+++ b/doc/tbl2asn.txt
@@ -203,6 +203,11 @@ GenBank flatfile.
transl_except (pos:591..593,aa:Sec)
+The codon recognized and anticodon position of tRNAs can also be given.
+
+ codon_recognized TGG
+ anticodon (pos:7591..7593,aa:Trp)
+
In addition to the standard qualifiers seen in GenBank format, several other
tokens are used to direct values to specific fields in the ASN.1 data.
These include gene_syn, gene_desc, locus_tag, prot_desc, prot_note,
@@ -215,6 +220,31 @@ features for packaging.
protein_id lcl|sde3p
transcript_id lcl|sde3m
+Exceptional biological situations can be annotated by use of the exception
+qualifier. For example
+
+ exception ribosomal slippage
+
+The following are legal exception qualifier values
+
+ RNA editing
+ reasons given in citation
+ ribosomal slippage
+ trans-splicing
+ alternative processing
+ artificial frameshift
+ nonconsensus splice site
+ rearrangement required for product
+ modified codon recognition
+ alternative start codon
+
+Since the International Nucleotide Sequence Database collaboration only
+allows "RNA editing" and "reasons given in citation" to appear in release
+mode, other exceptions are mapped to the /note qualifier in the flatfile.
+However, each exception text string turns off specific validator tests that
+would otherwise produce warning messages, so they should be entered as
+exception qualifiers.
+
Gene Ontology (GO) terms can be indicated with the following qualifiers
go_component endoplasmic reticulum|0005783
diff --git a/link/mswin/insdseqget.rc b/link/mswin/insdseqget.rc
new file mode 100644
index 00000000..38fcfd11
--- /dev/null
+++ b/link/mswin/insdseqget.rc
@@ -0,0 +1,11 @@
+#include <windows.h>
+#include "ncbirc.h"
+
+STRINGTABLE
+ BEGIN
+ STR_PROGRAM, "insdseqget"
+ END
+
+ICO_PROGRAM ICON ncbilogo.ico
+ICO_NCBILOGO ICON ncbilogo.ico
+
diff --git a/link/winmet/ApplicationStationery/GuiAppDefaults.mcp.xml b/link/winmet/ApplicationStationery/GuiAppDefaults.mcp.xml
index f4689e3c..591415cd 100644
--- a/link/winmet/ApplicationStationery/GuiAppDefaults.mcp.xml
+++ b/link/winmet/ApplicationStationery/GuiAppDefaults.mcp.xml
@@ -398,7 +398,7 @@
</SETTING>
<!-- Settings for "Build Extras" panel -->
- <SETTING><NAME>CacheModDates</NAME><VALUE>true</VALUE></SETTING>
+ <SETTING><NAME>CacheModDates</NAME><VALUE>false</VALUE></SETTING>
<SETTING><NAME>DumpBrowserInfo</NAME><VALUE>false</VALUE></SETTING>
<SETTING><NAME>CacheSubprojects</NAME><VALUE>true</VALUE></SETTING>
<SETTING><NAME>UseThirdPartyDebugger</NAME><VALUE>false</VALUE></SETTING>
diff --git a/link/winmet/LibraryStationery/LibraryStationery.mcp.xml b/link/winmet/LibraryStationery/LibraryStationery.mcp.xml
index 59ec564f..c16c4130 100644
--- a/link/winmet/LibraryStationery/LibraryStationery.mcp.xml
+++ b/link/winmet/LibraryStationery/LibraryStationery.mcp.xml
@@ -398,9 +398,9 @@
</SETTING>
<!-- Settings for "Build Extras" panel -->
- <SETTING><NAME>CacheModDates</NAME><VALUE>true</VALUE></SETTING>
+ <SETTING><NAME>CacheModDates</NAME><VALUE>false</VALUE></SETTING>
<SETTING><NAME>DumpBrowserInfo</NAME><VALUE>false</VALUE></SETTING>
- <SETTING><NAME>CacheSubprojects</NAME><VALUE>true</VALUE></SETTING>
+ <SETTING><NAME>CacheSubprojects</NAME><VALUE>false</VALUE></SETTING>
<SETTING><NAME>UseThirdPartyDebugger</NAME><VALUE>false</VALUE></SETTING>
<SETTING><NAME>BrowserGenerator</NAME><VALUE>2</VALUE></SETTING>
<SETTING><NAME>DebuggerAppPath</NAME>
diff --git a/make/makeApps.met b/make/makeApps.met
index cd6b2db0..131c1935 100755
--- a/make/makeApps.met
+++ b/make/makeApps.met
@@ -141,6 +141,18 @@ on SetProjectData()
set impalaData to my SimpleProjectData("impala", {})
set item 1 of fileList of item 1 of fileData of impalaData to "profiles.c"
+ -- asn2fsa
+ set myName to "asn2fsa"
+ set myFeatures to {"sockets"}
+ set mySettings to {prefixFile:"", ppcProject:{}}
+ set myRsrcs to {"ncbilogo.r", "Info.plc"}
+ set myLibs to {"ncbi", "ncbiconn", "ncbiobj", "vibrant", "ncbitool", "netcli", "ncbiid1"}
+ set myFiles to Â
+ {{projPath:"demo:", fileList:{"asn2fsa.c"}}}
+ set asn2fsaData to Â
+ {name:myName, features:myFeatures, settings:mySettings, rsrcs:myRsrcs, projLibs:Â
+ myLibs, fileData:myFiles}
+
-- Spidey
set myName to "spidey"
set myFeatures to {"sockets"}
@@ -160,6 +172,7 @@ on SetProjectData()
AddProject(entrez2Data)
AddSimpleProject("tbl2asn", {})
AddSimpleProject("asn2gb", {"sockets"})
+ AddProject(asn2fsaData)
AddProject(spideyData)
-- AddProject(entrezData)
diff --git a/make/makeall.unx b/make/makeall.unx
index 267232fd..fc28edf6 100644
--- a/make/makeall.unx
+++ b/make/makeall.unx
@@ -1,6 +1,6 @@
# makefile for asntool and ncbi core routines,
#
-# $Id: makeall.unx,v 6.232 2004/04/30 18:15:22 camacho Exp $
+# $Id: makeall.unx,v 6.240 2004/06/08 15:44:54 dondosha Exp $
#
# cdromlib data access functions, vibrant, and entrez
# SunOS with unbundled ANSI compiler [ make LCL=acc RAN=ranlib CC=acc ]
@@ -280,16 +280,18 @@ SRC39 = ideochrow.c humchrom_dat.c mschrom_dat.c ideochrom.c ideoorgs.c \
SRC50 = chartables.c get.c maketables.c pcre.c pcreposix.c study.c
-SRC60 = aa_ungapped.c blast_dust.c blast_engine.c blast_extend.c \
- blast_filter.c blast_gapalign.c blast_hits.c blast_lookup.c \
+SRC60 = aa_ungapped.c blast_diagnostics.c blast_dust.c blast_engine.c \
+ blast_extend.c blast_filter.c blast_gapalign.c blast_hits.c blast_lookup.c \
blast_message.c blast_options.c blast_seg.c blast_seqsrc.c \
blast_setup.c blast_stat.c blast_traceback.c blast_util.c \
- gapinfo.c greedy_align.c link_hsps.c lookup_wrap.c mb_lookup.c \
- ncbi_math.c ncbi_std.c pattern.c phi_extend.c phi_lookup.c \
- lookup_util.c blast_encoding.c blast_psi_priv.c
+ gapinfo.c greedy_align.c hspstream_collector.c link_hsps.c lookup_wrap.c \
+ mb_lookup.c ncbi_math.c ncbi_std.c pattern.c phi_extend.c phi_lookup.c \
+ lookup_util.c blast_encoding.c blast_psi.c blast_psi_priv.c blast_kappa.c \
+ matrix_freq_ratios.c blast_hspstream.c
-SRC61 = blast_input.c blast_seq.c blast_seqalign.c seqsrc_readdb.c \
- blast_format.c multiseq_src.c twoseq_api.c
+SRC61 = blast_format.c blast_input.c blast_returns.c blast_seq.c \
+ blast_seqalign.c blast_tabular.c hspstream_queue.c multiseq_src.c \
+ seqsrc_readdb.c twoseq_api.c
# objects needed for versions of asntool and entrez
@@ -427,16 +429,18 @@ OBJ39 = ideochrow.o humchrom_dat.o mschrom_dat.o ideochrom.o ideoorgs.o \
OBJ50 = chartables.o get.o maketables.o pcre.o pcreposix.o study.o
-OBJ60 = aa_ungapped.o blast_dust.o blast_engine.o blast_extend.o \
+OBJ60 = aa_ungapped.o blast_diagnostics.o blast_dust.o blast_engine.o blast_extend.o \
blast_filter.o blast_gapalign.o blast_hits.o blast_lookup.o \
blast_message.o blast_options.o blast_seg.o blast_seqsrc.o \
blast_setup.o blast_stat.o blast_traceback.o blast_util.o \
- gapinfo.o greedy_align.o link_hsps.o lookup_wrap.o mb_lookup.o \
- ncbi_math.o ncbi_std.o pattern.o phi_extend.o phi_lookup.o \
- lookup_util.o blast_encoding.o blast_psi_priv.o
-
-OBJ61 = blast_input.o blast_seq.o blast_seqalign.o seqsrc_readdb.o \
- blast_format.o multiseq_src.o twoseq_api.o
+ gapinfo.o greedy_align.o hspstream_collector.o link_hsps.o lookup_wrap.o \
+ mb_lookup.o ncbi_math.o ncbi_std.o pattern.o phi_extend.o phi_lookup.o \
+ lookup_util.o blast_encoding.o blast_psi.o blast_psi_priv.o blast_kappa.o \
+ matrix_freq_ratios.o blast_hspstream.o
+
+OBJ61 = blast_input.o blast_format.o blast_returns.o blast_seq.o \
+ blast_seqalign.o blast_tabular.o hspstream_queue.o multiseq_src.o \
+ seqsrc_readdb.o twoseq_api.o
# NOTE: if you enter an object file to an OBJxx greater than 30, you have to explicitly
diff --git a/make/makeallchives b/make/makeallchives
index ead6960f..dd09d4fa 100755
--- a/make/makeallchives
+++ b/make/makeallchives
@@ -1,6 +1,6 @@
#!/bin/sh
#
-# $Id: makeallchives,v 1.73 2004/02/24 14:49:49 beloslyu Exp $
+# $Id: makeallchives,v 1.74 2004/06/16 19:17:52 beloslyu Exp $
#
#
# Creates all archives from a directory above 'ncbi'
@@ -68,7 +68,7 @@ If you have not done so already, please refer to the
file at ftp://ftp.ncbi.nih.gov/entrez/README
EOF
f="entrez entrez2 blastcl3 ../doc/firewall.html"
- (cd ncbi/build; test -f Nentrez && ln Nentrez entrez)
+ (cd ncbi/build; test -f Nentrez && ln -f Nentrez entrez)
;;
sequin)
f="sequin tbl2asn asn2gb"
@@ -91,14 +91,11 @@ EOF
f="fa2htgs"
;;
blast)
- f="blastall blastpgp seedtop formatdb fastacmd copymat makemat \
-impala megablast blastclust rpsblast bl2seq ../doc/impala.txt \
-../doc/blast.txt ../doc/megablast.txt ../doc/blastclust.txt ../doc/rpsblast.txt \
-../doc/formatdb.txt ../doc/fastacmd.txt ../doc/README-qm"
+ f="blastall blastpgp seedtop formatdb fastacmd copymat makemat impala megablast blastclust rpsblast bl2seq ../doc/blast/bl2seq.html ../doc/blast/blast.html ../doc/blast/blastall.html ../doc/blast/blastclust.html ../doc/blast/blastdb.html ../doc/blast/blastftp.html ../doc/blast/blastpgp.html ../doc/blast/fastacmd.html ../doc/blast/filter.html ../doc/blast/formatdb.html ../doc/blast/impala.html ../doc/blast/index.html ../doc/blast/megablast.html ../doc/blast/netblast.html ../doc/blast/rpsblast.html"
rm -f archive/blast/data/sequin.hlp
;;
netblast)
- f="blastcl3 ../doc/firewall.html ../doc/netblast.txt"
+ f="blastcl3 ../doc/firewall.html ../doc/blast/netblast.html"
;;
*)
;;
diff --git a/make/makedemo.unx b/make/makedemo.unx
index 5d256fea..be25b44b 100644
--- a/make/makedemo.unx
+++ b/make/makedemo.unx
@@ -1,6 +1,6 @@
# makefile for demo programs
#
-# $Id: makedemo.unx,v 6.68 2004/01/02 22:05:16 coulouri Exp $
+# $Id: makedemo.unx,v 6.69 2004/05/13 18:23:15 coulouri Exp $
#
# Sun with unbundled ANSI compiler [ make CC=acc ]
# Sun with Gnu C [ make CC=gcc ]
@@ -97,7 +97,7 @@ EXE1 = testcore makeset \
dosimple asn2ff getseq getfeat checksub asndhuff \
entrcmd errhdr cdscan findspl \
ncbisort fa2htgs fastacmd formatdb blast blastall blastpgp \
- testval seedtop fmerge makemat copymat impala \
+ testval seedtop makemat copymat impala \
megablast vecscreen gil2bin tbl2asn blastclust rpsblast \
asn2xml debruijn \
test_regexp demo_regexp demo_regexp_grep
@@ -107,7 +107,7 @@ SRC1 = testcore.c makeset.c \
getseq.c getfeat.c checksub.c asndhuff.c \
entrcmd.c errhdr.c cdscan.c findspl.c \
ncbisort.c fa2htgs.c fastacmd.c formatdb.c blast_driver.c blastall.c \
- blastpgp.c testval.c seedtop.c fmerge.c makemat.c copymat.c profiles.c \
+ blastpgp.c testval.c seedtop.c makemat.c copymat.c profiles.c \
megablast.c vecscreen.c gil2bin.c tbl2asn.c blastclust.c rpsblast.c \
asn2xml.c debruijn.c \
pcretest.c pcredemo.c pcregrep.c
@@ -271,11 +271,6 @@ fastacmd : fastacmd.c
formatdb : formatdb.c
$(CC) -o formatdb $(LDFLAGS) formatdb.c $(LIB23) $(LIB2) $(LIB1) $(OTHERLIBS)
-# fmerge
-
-fmerge : fmerge.c
- $(CC) -o fmerge $(LDFLAGS) fmerge.c $(LIB23) $(LIB2) $(LIB1) $(OTHERLIBS)
-
# blast (reincarnation of blastall from algo/blast sources)
blast : blast_driver.c
diff --git a/make/makedis.csh b/make/makedis.csh
index da03c620..98f5648e 100755
--- a/make/makedis.csh
+++ b/make/makedis.csh
@@ -1,6 +1,6 @@
#!/bin/csh -f
#
-# $Id: makedis.csh,v 1.99 2004/05/03 18:12:03 beloslyu Exp $
+# $Id: makedis.csh,v 1.100 2004/05/13 17:13:54 ucko Exp $
#
## PUBLIC DOMAIN NOTICE
# National Center for Biotechnology Information
@@ -164,6 +164,9 @@ case Linux:
case "i?86":
set platform=linux-x86
breaksw
+ case "alpha":
+ set platform=linux-alpha
+ breaksw
default:
set platform=linux
breaksw
diff --git a/make/makenet.unx b/make/makenet.unx
index 32fde9e2..5528c1a0 100644
--- a/make/makenet.unx
+++ b/make/makenet.unx
@@ -1,6 +1,6 @@
# makefile for network demo programs and network entrez
#
-# $Id: makenet.unx,v 6.172 2004/05/05 02:01:08 kans Exp $
+# $Id: makenet.unx,v 6.175 2004/05/17 17:56:58 dondosha Exp $
# test, ignore
#
# Sun with unbundled ANSI compiler [ make CC=acc RAN=ranlib ]
@@ -1484,11 +1484,13 @@ nph-viewgif.cgi : viewgif.c
wblast2.REAL : wblast2.c
$(CC) -o wblast2.REAL $(LDFLAGS) -DBL2SEQ_STANDALONE wblast2.c \
- $(LIB23) $(LIB2) $(LIB1) $(OTHERLIBS)
+ $(LIB61) $(LIB23) $(LIB2) $(LIB60) $(LIB1) $(OTHERLIBS)
+
wblast2_cs.REAL : wblast2.c
$(CC) -o wblast2_cs.REAL $(LDFLAGS) -DNCBI_ENTREZ_CLIENT -DBL2SEQ_STANDALONE \
- wblast2.c $(LIB41) $(LIB6) $(LIB23) $(LIB2) $(LIB1) $(OTHERLIBS)
+ wblast2.c $(LIB41) $(LIB6) $(LIB61) $(LIB23) $(LIB2) $(LIB60) $(LIB1) \
+ $(OTHERLIBS)
bl2bag.cgi : bl2bag.c
$(CC) -o bl2bag.cgi $(LDFLAGS) bl2bag.c $(LIB1) $(OTHERLIBS)
diff --git a/make/msvc_prj/algo/blast/api/blastapi.dsp b/make/msvc_prj/algo/blast/api/blastapi.dsp
index 97cf877a..7bb197fe 100644
--- a/make/msvc_prj/algo/blast/api/blastapi.dsp
+++ b/make/msvc_prj/algo/blast/api/blastapi.dsp
@@ -62,6 +62,14 @@ SOURCE=..\..\..\..\..\algo\blast\api\blast_input.c
# End Source File
# Begin Source File
+SOURCE=..\..\..\..\..\algo\blast\api\blast_returns.c
+# End Source File
+# Begin Source File
+
+SOURCE=..\..\..\..\..\algo\blast\api\blast_tabular.c
+# End Source File
+# Begin Source File
+
SOURCE=..\..\..\..\..\algo\blast\api\blast_seq.c
# End Source File
# Begin Source File
@@ -70,6 +78,10 @@ SOURCE=..\..\..\..\..\algo\blast\api\blast_seqalign.c
# End Source File
# Begin Source File
+SOURCE=..\..\..\..\..\algo\blast\api\hspstream_queue.c
+# End Source File
+# Begin Source File
+
SOURCE=..\..\..\..\..\algo\blast\api\multiseq_src.c
# End Source File
# Begin Source File
@@ -94,6 +106,14 @@ SOURCE=..\..\..\..\..\algo\blast\api\blast_input.h
# End Source File
# Begin Source File
+SOURCE=..\..\..\..\..\algo\blast\api\blast_returns.h
+# End Source File
+# Begin Source File
+
+SOURCE=..\..\..\..\..\algo\blast\api\blast_tabular.h
+# End Source File
+# Begin Source File
+
SOURCE=..\..\..\..\..\algo\blast\api\blast_seq.h
# End Source File
# Begin Source File
@@ -102,6 +122,10 @@ SOURCE=..\..\..\..\..\algo\blast\api\blast_seqalign.h
# End Source File
# Begin Source File
+SOURCE=..\..\..\..\..\algo\blast\api\hspstream_queue.h
+# End Source File
+# Begin Source File
+
SOURCE=..\..\..\..\..\algo\blast\api\multiseq_src.h
# End Source File
# Begin Source File
diff --git a/make/msvc_prj/algo/blast/core/blast.dsp b/make/msvc_prj/algo/blast/core/blast.dsp
index 3351085a..623289ed 100644
--- a/make/msvc_prj/algo/blast/core/blast.dsp
+++ b/make/msvc_prj/algo/blast/core/blast.dsp
@@ -58,6 +58,10 @@ SOURCE=..\..\..\..\..\algo\blast\core\aa_ungapped.c
# End Source File
# Begin Source File
+SOURCE=..\..\..\..\..\algo\blast\core\blast_diagnostics.c
+# End Source File
+# Begin Source File
+
SOURCE=..\..\..\..\..\algo\blast\core\blast_dust.c
# End Source File
# Begin Source File
@@ -86,10 +90,18 @@ SOURCE=..\..\..\..\..\algo\blast\core\blast_hits.c
# End Source File
# Begin Source File
+SOURCE=..\..\..\..\..\algo\blast\core\blast_hspstream.c
+# End Source File
+# Begin Source File
+
SOURCE=..\..\..\..\..\algo\blast\core\blast_inline.h
# End Source File
# Begin Source File
+SOURCE=..\..\..\..\..\algo\blast\core\blast_kappa.c
+# End Source File
+# Begin Source File
+
SOURCE=..\..\..\..\..\algo\blast\core\blast_lookup.c
# End Source File
# Begin Source File
@@ -102,6 +114,10 @@ SOURCE=..\..\..\..\..\algo\blast\core\blast_options.c
# End Source File
# Begin Source File
+SOURCE=..\..\..\..\..\algo\blast\core\blast_psi.c
+# End Source File
+# Begin Source File
+
SOURCE=..\..\..\..\..\algo\blast\core\blast_psi_priv.c
# End Source File
# Begin Source File
@@ -142,6 +158,10 @@ SOURCE=..\..\..\..\..\algo\blast\core\greedy_align.c
# End Source File
# Begin Source File
+SOURCE=..\..\..\..\..\algo\blast\core\hspstream_collector.c
+# End Source File
+# Begin Source File
+
SOURCE=..\..\..\..\..\algo\blast\core\link_hsps.c
# End Source File
# Begin Source File
@@ -154,7 +174,7 @@ SOURCE=..\..\..\..\..\algo\blast\core\lookup_wrap.c
# End Source File
# Begin Source File
-SOURCE=..\..\..\..\..\algo\blast\core\matrix_freq_ratios.h
+SOURCE=..\..\..\..\..\algo\blast\core\matrix_freq_ratios.c
# End Source File
# Begin Source File
@@ -194,6 +214,10 @@ SOURCE=..\..\..\..\..\algo\blast\core\blast_def.h
# End Source File
# Begin Source File
+SOURCE=..\..\..\..\..\algo\blast\core\blast_diagnostics.h
+# End Source File
+# Begin Source File
+
SOURCE=..\..\..\..\..\algo\blast\core\blast_dust.h
# End Source File
# Begin Source File
@@ -222,6 +246,10 @@ SOURCE=..\..\..\..\..\algo\blast\core\blast_hits.h
# End Source File
# Begin Source File
+SOURCE=..\..\..\..\..\algo\blast\core\blast_hspstream.h
+# End Source File
+# Begin Source File
+
SOURCE=..\..\..\..\..\algo\blast\core\blast_lookup.h
# End Source File
# Begin Source File
@@ -234,6 +262,10 @@ SOURCE=..\..\..\..\..\algo\blast\core\blast_options.h
# End Source File
# Begin Source File
+SOURCE=..\..\..\..\..\algo\blast\core\blast_psi.h
+# End Source File
+# Begin Source File
+
SOURCE=..\..\..\..\..\algo\blast\core\blast_rps.h
# End Source File
# Begin Source File
@@ -274,6 +306,10 @@ SOURCE=..\..\..\..\..\algo\blast\core\greedy_align.h
# End Source File
# Begin Source File
+SOURCE=..\..\..\..\..\algo\blast\core\hspstream_collector.h
+# End Source File
+# Begin Source File
+
SOURCE=..\..\..\..\..\algo\blast\core\link_hsps.h
# End Source File
# Begin Source File
@@ -286,6 +322,10 @@ SOURCE=..\..\..\..\..\algo\blast\core\lookup_wrap.h
# End Source File
# Begin Source File
+SOURCE=..\..\..\..\..\algo\blast\core\matrix_freq_ratios.h
+# End Source File
+# Begin Source File
+
SOURCE=..\..\..\..\..\algo\blast\core\mb_lookup.h
# End Source File
# Begin Source File
diff --git a/make/msvc_prj/connect/connect.dsp b/make/msvc_prj/connect/connect.dsp
new file mode 100644
index 00000000..d1c1d577
--- /dev/null
+++ b/make/msvc_prj/connect/connect.dsp
@@ -0,0 +1,258 @@
+# Microsoft Developer Studio Project File - Name="connect" - Package Owner=<4>
+# Microsoft Developer Studio Generated Build File, Format Version 6.00
+# ** DO NOT EDIT **
+
+# TARGTYPE "Win32 (x86) Static Library" 0x0104
+
+CFG=connect - Win32 DebugDLL
+!MESSAGE This is not a valid makefile. To build this project using NMAKE,
+!MESSAGE use the Export Makefile command and run
+!MESSAGE
+!MESSAGE NMAKE /f "connect.mak".
+!MESSAGE
+!MESSAGE You can specify a configuration when running NMAKE
+!MESSAGE by defining the macro CFG on the command line. For example:
+!MESSAGE
+!MESSAGE NMAKE /f "connect.mak" CFG="connect - Win32 DebugDLL"
+!MESSAGE
+!MESSAGE Possible choices for configuration are:
+!MESSAGE
+!MESSAGE "connect - Win32 DebugDLL" (based on "Win32 (x86) Static Library")
+!MESSAGE
+
+# Begin Project
+# PROP AllowPerConfigDependencies 0
+# PROP Scc_ProjName ""
+# PROP Scc_LocalPath ""
+CPP=cl.exe
+RSC=rc.exe
+# PROP BASE Use_MFC 0
+# PROP BASE Use_Debug_Libraries 1
+# PROP BASE Output_Dir "DebugDLL"
+# PROP BASE Intermediate_Dir "DebugDLL"
+# PROP BASE Target_Dir ""
+# PROP Use_MFC 0
+# PROP Use_Debug_Libraries 1
+# PROP Output_Dir "DebugDLL"
+# PROP Intermediate_Dir "DebugDLL"
+# PROP Target_Dir ""
+# ADD BASE CPP /nologo /MDd /W3 /GX /Z7 /Od /D "WIN32" /D "_DEBUG" /D "_MBCS" /D "_LIB" /YX /FD /GZ /c
+# SUBTRACT BASE CPP /Fr
+# ADD CPP /nologo /MDd /W3 /GR /Z7 /Od /I "..\..\.." /I "..\..\..\corelib" /I "..\..\..\asnstat" /I "..\..\..\asnlib" /I "..\..\..\vibrant" /I "..\..\..\object" /I "..\..\..\api" /I "..\..\..\desktop" /I "..\..\..\cdromlib" /I "..\..\..\tools" /I "..\..\..\biostruc" /I "..\..\..\access" /I "..\..\..\connect" /I "..\..\..\cn3d" /I "..\..\..\gif" /I "..\..\..\network\blast3\client" /I "..\..\..\network\nsclilib" /D "WIN32" /D "_DEBUG" /YX /FD /GZ /c
+# ADD BASE RSC /l 0x409 /d "_DEBUG"
+# ADD RSC /l 0x409 /d "_DEBUG"
+BSC32=bscmake.exe
+# ADD BASE BSC32 /nologo
+# ADD BSC32 /nologo
+LIB32=link.exe -lib
+# ADD BASE LIB32 /nologo
+# ADD LIB32 /nologo
+# Begin Target
+
+# Name "connect - Win32 DebugDLL"
+# Begin Group "Source Files"
+
+# PROP Default_Filter "cpp;c;cxx;rc;def;r;odl;idl;hpj;bat"
+# Begin Source File
+
+SOURCE=..\..\..\connect\ncbi_ansi_ext.c
+# End Source File
+# Begin Source File
+
+SOURCE=..\..\..\connect\ncbi_buffer.c
+# End Source File
+# Begin Source File
+
+SOURCE=..\..\..\connect\ncbi_connection.c
+# End Source File
+# Begin Source File
+
+SOURCE=..\..\..\connect\ncbi_connector.c
+# End Source File
+# Begin Source File
+
+SOURCE=..\..\..\connect\ncbi_connutil.c
+# End Source File
+# Begin Source File
+
+SOURCE=..\..\..\connect\ncbi_core.c
+# End Source File
+# Begin Source File
+
+SOURCE=..\..\..\connect\ncbi_core_c.c
+# End Source File
+# Begin Source File
+
+SOURCE=..\..\..\connect\ncbi_dispd.c
+# End Source File
+# Begin Source File
+
+SOURCE=..\..\..\connect\ncbi_file_connector.c
+# End Source File
+# Begin Source File
+
+SOURCE=..\..\..\connect\ncbi_heapmgr.c
+# End Source File
+# Begin Source File
+
+SOURCE=..\..\..\connect\ncbi_host_info.c
+# End Source File
+# Begin Source File
+
+SOURCE=..\..\..\connect\ncbi_host_infop.h
+# End Source File
+# Begin Source File
+
+SOURCE=..\..\..\connect\ncbi_http_connector.c
+# End Source File
+# Begin Source File
+
+SOURCE=..\..\..\connect\ncbi_lbsmd_stub.c
+# End Source File
+# Begin Source File
+
+SOURCE=..\..\..\connect\ncbi_memory_connector.c
+# End Source File
+# Begin Source File
+
+SOURCE=..\..\..\connect\ncbi_priv.c
+# End Source File
+# Begin Source File
+
+SOURCE=..\..\..\connect\ncbi_priv.h
+# End Source File
+# Begin Source File
+
+SOURCE=..\..\..\connect\ncbi_sendmail.c
+# End Source File
+# Begin Source File
+
+SOURCE=..\..\..\connect\ncbi_server_info.c
+# End Source File
+# Begin Source File
+
+SOURCE=..\..\..\connect\ncbi_server_infop.h
+# End Source File
+# Begin Source File
+
+SOURCE=..\..\..\connect\ncbi_service.c
+# End Source File
+# Begin Source File
+
+SOURCE=..\..\..\connect\ncbi_service_connector.c
+# End Source File
+# Begin Source File
+
+SOURCE=..\..\..\connect\ncbi_servicep.h
+# End Source File
+# Begin Source File
+
+SOURCE=..\..\..\connect\ncbi_socket.c
+# End Source File
+# Begin Source File
+
+SOURCE=..\..\..\connect\ncbi_socket_connector.c
+# End Source File
+# Begin Source File
+
+SOURCE=..\..\..\connect\ncbi_util.c
+# End Source File
+# End Group
+# Begin Group "Header Files"
+
+# PROP Default_Filter "h;hpp;hxx;hm;inl"
+# Begin Source File
+
+SOURCE=..\..\..\connect\ncbi_ansi_ext.h
+# End Source File
+# Begin Source File
+
+SOURCE=..\..\..\connect\ncbi_buffer.h
+# End Source File
+# Begin Source File
+
+SOURCE=..\..\..\connect\ncbi_comm.h
+# End Source File
+# Begin Source File
+
+SOURCE=..\..\..\connect\ncbi_config.h
+# End Source File
+# Begin Source File
+
+SOURCE=..\..\..\connect\ncbi_connection.h
+# End Source File
+# Begin Source File
+
+SOURCE=..\..\..\connect\ncbi_connector.h
+# End Source File
+# Begin Source File
+
+SOURCE=..\..\..\connect\ncbi_connutil.h
+# End Source File
+# Begin Source File
+
+SOURCE=..\..\..\connect\ncbi_core.h
+# End Source File
+# Begin Source File
+
+SOURCE=..\..\..\connect\ncbi_core_c.h
+# End Source File
+# Begin Source File
+
+SOURCE=..\..\..\connect\ncbi_dispd.h
+# End Source File
+# Begin Source File
+
+SOURCE=..\..\..\connect\ncbi_file_connector.h
+# End Source File
+# Begin Source File
+
+SOURCE=..\..\..\connect\ncbi_heapmgr.h
+# End Source File
+# Begin Source File
+
+SOURCE=..\..\..\connect\ncbi_host_info.h
+# End Source File
+# Begin Source File
+
+SOURCE=..\..\..\connect\ncbi_http_connector.h
+# End Source File
+# Begin Source File
+
+SOURCE=..\..\..\connect\ncbi_memory_connector.h
+# End Source File
+# Begin Source File
+
+SOURCE=..\..\..\connect\ncbi_sendmail.h
+# End Source File
+# Begin Source File
+
+SOURCE=..\..\..\connect\ncbi_server_info.h
+# End Source File
+# Begin Source File
+
+SOURCE=..\..\..\connect\ncbi_service.h
+# End Source File
+# Begin Source File
+
+SOURCE=..\..\..\connect\ncbi_service_connector.h
+# End Source File
+# Begin Source File
+
+SOURCE=..\..\..\connect\ncbi_socket.h
+# End Source File
+# Begin Source File
+
+SOURCE=..\..\..\connect\ncbi_socket_connector.h
+# End Source File
+# Begin Source File
+
+SOURCE=..\..\..\connect\ncbi_types.h
+# End Source File
+# Begin Source File
+
+SOURCE=..\..\..\connect\ncbi_util.h
+# End Source File
+# End Group
+# End Target
+# End Project
diff --git a/make/msvc_prj/corelib/ncbi/ncbi.dsp b/make/msvc_prj/corelib/ncbi/ncbi.dsp
index 1fe8de38..4d48e32e 100644
--- a/make/msvc_prj/corelib/ncbi/ncbi.dsp
+++ b/make/msvc_prj/corelib/ncbi/ncbi.dsp
@@ -26,9 +26,6 @@ CFG=ncbi - Win32 DebugDLL
# PROP Scc_LocalPath ""
CPP=cl.exe
RSC=rc.exe
-
-!IF "$(CFG)" == "ncbi - Win32 DebugDLL"
-
# PROP BASE Use_MFC 0
# PROP BASE Use_Debug_Libraries 1
# PROP BASE Output_Dir "DebugDLL"
@@ -39,9 +36,9 @@ RSC=rc.exe
# PROP Output_Dir "DebugDLL"
# PROP Intermediate_Dir "DebugDLL"
# PROP Target_Dir ""
-# ADD BASE CPP /nologo /MDd /W3 /Gm /GX /Z7 /Od /D "WIN32" /D "_DEBUG" /D "_MBCS" /D "_LIB" /YX /FD /GZ /c
+# ADD BASE CPP /nologo /MDd /W3 /GX /Z7 /Od /D "WIN32" /D "_DEBUG" /D "_MBCS" /D "_LIB" /YX /FD /GZ /c
# SUBTRACT BASE CPP /Fr
-# ADD CPP /nologo /MDd /W3 /Gm /GR /Z7 /Od /I "..\..\..\.." /I "..\..\..\..\corelib" /I "..\..\..\..\connect" /I "..\..\..\..\asnlib" /D "WIN32" /D "_DEBUG" /D "_MBCS" /YX /FD /GZ /c
+# ADD CPP /nologo /MDd /W3 /GR /Z7 /Od /I "..\..\..\.." /I "..\..\..\..\corelib" /I "..\..\..\..\connect" /I "..\..\..\..\asnlib" /D "WIN32" /D "_DEBUG" /D "_MBCS" /YX /FD /GZ /c
# ADD BASE RSC /l 0x409 /d "_DEBUG"
# ADD RSC /l 0x409 /d "_DEBUG"
BSC32=bscmake.exe
@@ -50,9 +47,6 @@ BSC32=bscmake.exe
LIB32=link.exe -lib
# ADD BASE LIB32 /nologo
# ADD LIB32 /nologo
-
-!ENDIF
-
# Begin Target
# Name "ncbi - Win32 DebugDLL"
@@ -121,90 +115,6 @@ SOURCE=..\..\..\..\corelib\matrix.c
# End Source File
# Begin Source File
-SOURCE=..\..\..\..\connect\ncbi_ansi_ext.c
-# End Source File
-# Begin Source File
-
-SOURCE=..\..\..\..\connect\ncbi_buffer.c
-# End Source File
-# Begin Source File
-
-SOURCE=..\..\..\..\connect\ncbi_connection.c
-# End Source File
-# Begin Source File
-
-SOURCE=..\..\..\..\connect\ncbi_connector.c
-# End Source File
-# Begin Source File
-
-SOURCE=..\..\..\..\connect\ncbi_connutil.c
-# End Source File
-# Begin Source File
-
-SOURCE=..\..\..\..\connect\ncbi_core.c
-# End Source File
-# Begin Source File
-
-SOURCE=..\..\..\..\connect\ncbi_core_c.c
-# End Source File
-# Begin Source File
-
-SOURCE=..\..\..\..\connect\ncbi_file_connector.c
-# End Source File
-# Begin Source File
-
-SOURCE=..\..\..\..\connect\ncbi_host_info.c
-# End Source File
-# Begin Source File
-
-SOURCE=..\..\..\..\connect\ncbi_http_connector.c
-# End Source File
-# Begin Source File
-
-SOURCE=..\..\..\..\connect\ncbi_memory_connector.c
-# End Source File
-# Begin Source File
-
-SOURCE=..\..\..\..\connect\ncbi_priv.c
-# End Source File
-# Begin Source File
-
-SOURCE=..\..\..\..\connect\ncbi_sendmail.c
-# End Source File
-# Begin Source File
-
-SOURCE=..\..\..\..\connect\ncbi_server_info.c
-# End Source File
-# Begin Source File
-
-SOURCE=..\..\..\..\connect\ncbi_service.c
-# End Source File
-# Begin Source File
-
-SOURCE=..\..\..\..\connect\ncbi_service_connector.c
-# End Source File
-# Begin Source File
-
-SOURCE=..\..\..\..\connect\ncbi_dispd.c
-# End Source File
-# Begin Source File
-
-SOURCE=..\..\..\..\connect\ncbi_lbsmd_stub.c
-# End Source File
-# Begin Source File
-
-SOURCE=..\..\..\..\connect\ncbi_socket.c
-# End Source File
-# Begin Source File
-
-SOURCE=..\..\..\..\connect\ncbi_socket_connector.c
-# End Source File
-# Begin Source File
-
-SOURCE=..\..\..\..\connect\ncbi_util.c
-# End Source File
-# Begin Source File
-
SOURCE=..\..\..\..\corelib\ncbiargs.c
# End Source File
# Begin Source File
@@ -363,82 +273,6 @@ SOURCE=..\..\..\..\corelib\ncbi.h
# End Source File
# Begin Source File
-SOURCE=..\..\..\..\connect\ncbi_ansi_ext.h
-# End Source File
-# Begin Source File
-
-SOURCE=..\..\..\..\connect\ncbi_buffer.h
-# End Source File
-# Begin Source File
-
-SOURCE=..\..\..\..\connect\ncbi_config.h
-# End Source File
-# Begin Source File
-
-SOURCE=..\..\..\..\connect\ncbi_connection.h
-# End Source File
-# Begin Source File
-
-SOURCE=..\..\..\..\connect\ncbi_connector.h
-# End Source File
-# Begin Source File
-
-SOURCE=..\..\..\..\connect\ncbi_connutil.h
-# End Source File
-# Begin Source File
-
-SOURCE=..\..\..\..\connect\ncbi_core.h
-# End Source File
-# Begin Source File
-
-SOURCE=..\..\..\..\connect\ncbi_core_c.h
-# End Source File
-# Begin Source File
-
-SOURCE=..\..\..\..\connect\ncbi_file_connector.h
-# End Source File
-# Begin Source File
-
-SOURCE=..\..\..\..\connect\ncbi_host_info.h
-# End Source File
-# Begin Source File
-
-SOURCE=..\..\..\..\connect\ncbi_http_connector.h
-# End Source File
-# Begin Source File
-
-SOURCE=..\..\..\..\connect\ncbi_memory_connector.h
-# End Source File
-# Begin Source File
-
-SOURCE=..\..\..\..\connect\ncbi_priv.h
-# End Source File
-# Begin Source File
-
-SOURCE=..\..\..\..\connect\ncbi_sendmail.h
-# End Source File
-# Begin Source File
-
-SOURCE=..\..\..\..\connect\ncbi_server_info.h
-# End Source File
-# Begin Source File
-
-SOURCE=..\..\..\..\connect\ncbi_service.h
-# End Source File
-# Begin Source File
-
-SOURCE=..\..\..\..\connect\ncbi_service_connector.h
-# End Source File
-# Begin Source File
-
-SOURCE=..\..\..\..\connect\ncbi_socket.h
-# End Source File
-# Begin Source File
-
-SOURCE=..\..\..\..\connect\ncbi_socket_connector.h
-# End Source File
-# Begin Source File
-
SOURCE=..\..\..\..\connect\ncbi_util.h
# End Source File
# Begin Source File
@@ -473,7 +307,7 @@ SOURCE=..\..\..\..\corelib\ncbilcl.h
SOURCE=..\..\..\..\corelib\ncbilcl.msw
# Begin Custom Build
-InputDir=..\..\..\..\corelib
+InputDir=\ncbi\corelib
InputPath=..\..\..\..\corelib\ncbilcl.msw
"$(InputDir)\ncbilcl.h" : $(SOURCE) "$(INTDIR)" "$(OUTDIR)"
diff --git a/make/msvc_prj/demo/fmerge/demo_fmerge.dsp b/make/msvc_prj/demo/insdseqget/demo_insdseqget.dsp
index 7847e1ab..2a8dac24 100644
--- a/make/msvc_prj/demo/fmerge/demo_fmerge.dsp
+++ b/make/msvc_prj/demo/insdseqget/demo_insdseqget.dsp
@@ -1,23 +1,23 @@
-# Microsoft Developer Studio Project File - Name="demo_fmerge" - Package Owner=<4>
+# Microsoft Developer Studio Project File - Name="demo_insdseqget" - Package Owner=<4>
# Microsoft Developer Studio Generated Build File, Format Version 6.00
# ** DO NOT EDIT **
# TARGTYPE "Win32 (x86) Application" 0x0101
-CFG=demo_fmerge - Win32 DebugDLL
+CFG=demo_insdseqget - Win32 DebugDLL
!MESSAGE This is not a valid makefile. To build this project using NMAKE,
!MESSAGE use the Export Makefile command and run
!MESSAGE
-!MESSAGE NMAKE /f "demo_fmerge.mak".
+!MESSAGE NMAKE /f "demo_insdseqget.mak".
!MESSAGE
!MESSAGE You can specify a configuration when running NMAKE
!MESSAGE by defining the macro CFG on the command line. For example:
!MESSAGE
-!MESSAGE NMAKE /f "demo_fmerge.mak" CFG="demo_fmerge - Win32 DebugDLL"
+!MESSAGE NMAKE /f "demo_insdseqget.mak" CFG="demo_insdseqget - Win32 DebugDLL"
!MESSAGE
!MESSAGE Possible choices for configuration are:
!MESSAGE
-!MESSAGE "demo_fmerge - Win32 DebugDLL" (based on "Win32 (x86) Application")
+!MESSAGE "demo_insdseqget - Win32 DebugDLL" (based on "Win32 (x86) Application")
!MESSAGE
# Begin Project
@@ -28,7 +28,7 @@ CPP=cl.exe
MTL=midl.exe
RSC=rc.exe
-!IF "$(CFG)" == "demo_fmerge - Win32 DebugDLL"
+!IF "$(CFG)" == "demo_insdseqget - Win32 DebugDLL"
# PROP BASE Use_MFC 0
# PROP BASE Use_Debug_Libraries 1
@@ -43,7 +43,7 @@ RSC=rc.exe
# PROP Target_Dir ""
LIB32=link.exe -lib
# ADD BASE CPP /nologo /MDd /W3 /Gm /GX /Z7 /Od /D "WIN32" /D "_DEBUG" /D "_NETENT_" /D "_WINDOWS" /D "_MBCS" /YX /FD /GZ /c
-# ADD CPP /nologo /MDd /W3 /Gm /GR /Z7 /Od /I "..\..\..\.." /I "..\..\..\..\corelib" /I "..\..\..\..\api" /I "..\..\..\..\asnstat" /I "..\..\..\..\asnlib" /I "..\..\..\..\object" /I "..\..\..\..\cdromlib" /I "..\..\..\..\biostruc" /I "..\..\..\..\tools" /D "_CONSOLE" /D "WIN32" /D "_DEBUG" /YX /FD /GZ /c
+# ADD CPP /nologo /MDd /W3 /Gm /GR /Z7 /Od /I "..\..\..\.." /I "..\..\..\..\corelib" /I "..\..\..\..\connect" /I "..\..\..\..\api" /I "..\..\..\..\asnstat" /I "..\..\..\..\asnlib" /I "..\..\..\..\access" /I "..\..\..\..\object" /I "..\..\..\..\cdromlib" /I "..\..\..\..\biostruc" /I "..\..\..\..\tools" /D "_CONSOLE" /D "WIN32" /D "_DEBUG" /YX /FD /GZ /c
# ADD BASE MTL /nologo /D "_DEBUG" /mktyplib203 /win32
# ADD MTL /nologo /D "_DEBUG" /mktyplib203 /win32
# ADD BASE RSC /l 0x409 /d "_DEBUG"
@@ -53,24 +53,24 @@ BSC32=bscmake.exe
# ADD BSC32 /nologo
LINK32=link.exe
# ADD BASE LINK32 kernel32.lib user32.lib gdi32.lib winspool.lib comdlg32.lib advapi32.lib shell32.lib ole32.lib oleaut32.lib uuid.lib odbc32.lib odbccp32.lib /nologo /incremental:yes /subsystem:windows /pdb:none /debug /machine:I386 /pdbtype:sept
-# ADD LINK32 oldnames.lib kernel32.lib user32.lib gdi32.lib winspool.lib comdlg32.lib advapi32.lib shell32.lib /nologo /incremental:yes /subsystem:console /pdb:none /debug /machine:I386 /out:"DebugDLL/fmerge.exe" /pdbtype:sept
+# ADD LINK32 oldnames.lib kernel32.lib user32.lib gdi32.lib winspool.lib comdlg32.lib advapi32.lib shell32.lib wsock32.lib /nologo /incremental:yes /subsystem:console /debug /machine:I386 /out:"DebugDLL/insdseqget.exe" /pdbtype:sept
# SUBTRACT LINK32 /pdb:none
!ENDIF
# Begin Target
-# Name "demo_fmerge - Win32 DebugDLL"
+# Name "demo_insdseqget - Win32 DebugDLL"
# Begin Group "Source Files"
# PROP Default_Filter "cpp;c;cxx;rc;def;r;odl;idl;hpj;bat"
# Begin Source File
-SOURCE=..\..\..\..\demo\fmerge.c
+SOURCE=..\..\..\..\demo\insdseqget.c
# End Source File
# Begin Source File
-SOURCE=..\..\..\..\link\mswin\fmerge.rc
+SOURCE=..\..\..\..\link\mswin\insdseqget.rc
# End Source File
# End Group
# Begin Group "Header Files"
diff --git a/make/msvc_prj/ncbi.dsw b/make/msvc_prj/ncbi.dsw
index 2a224a30..c4e3ad5a 100644
--- a/make/msvc_prj/ncbi.dsw
+++ b/make/msvc_prj/ncbi.dsw
@@ -102,9 +102,6 @@ Package=<4>
Project_Dep_Name demo_fastacmd
End Project Dependency
Begin Project Dependency
- Project_Dep_Name demo_fmerge
- End Project Dependency
- Begin Project Dependency
Project_Dep_Name demo_formatdb
End Project Dependency
Begin Project Dependency
@@ -272,6 +269,9 @@ Package=<4>
Begin Project Dependency
Project_Dep_Name demo_flint
End Project Dependency
+ Begin Project Dependency
+ Project_Dep_Name demo_insdseqget
+ End Project Dependency
}}}
###############################################################################
@@ -407,6 +407,9 @@ Package=<4>
Begin Project Dependency
Project_Dep_Name blastapi
End Project Dependency
+ Begin Project Dependency
+ Project_Dep_Name connect
+ End Project Dependency
}}}
###############################################################################
@@ -459,6 +462,18 @@ Package=<4>
###############################################################################
+Project: "connect"=.\connect\connect.dsp - Package Owner=<4>
+
+Package=<5>
+{{{
+}}}
+
+Package=<4>
+{{{
+}}}
+
+###############################################################################
+
Project: "ddvlib"=.\ddv\ddvlib\ddvlib.dsp - Package Owner=<4>
Package=<5>
@@ -491,6 +506,9 @@ Package=<4>
Begin Project Dependency
Project_Dep_Name ncbi
End Project Dependency
+ Begin Project Dependency
+ Project_Dep_Name connect
+ End Project Dependency
}}}
###############################################################################
@@ -578,6 +596,9 @@ Package=<4>
Begin Project Dependency
Project_Dep_Name ncbi
End Project Dependency
+ Begin Project Dependency
+ Project_Dep_Name connect
+ End Project Dependency
}}}
###############################################################################
@@ -800,6 +821,9 @@ Package=<4>
Begin Project Dependency
Project_Dep_Name ncbi
End Project Dependency
+ Begin Project Dependency
+ Project_Dep_Name connect
+ End Project Dependency
}}}
###############################################################################
@@ -836,6 +860,9 @@ Package=<4>
Begin Project Dependency
Project_Dep_Name netcli
End Project Dependency
+ Begin Project Dependency
+ Project_Dep_Name connect
+ End Project Dependency
}}}
###############################################################################
@@ -860,6 +887,9 @@ Package=<4>
Begin Project Dependency
Project_Dep_Name ncbi
End Project Dependency
+ Begin Project Dependency
+ Project_Dep_Name connect
+ End Project Dependency
}}}
###############################################################################
@@ -938,6 +968,9 @@ Package=<4>
Begin Project Dependency
Project_Dep_Name ncbi
End Project Dependency
+ Begin Project Dependency
+ Project_Dep_Name connect
+ End Project Dependency
}}}
###############################################################################
@@ -1013,6 +1046,9 @@ Package=<4>
Begin Project Dependency
Project_Dep_Name ncbi
End Project Dependency
+ Begin Project Dependency
+ Project_Dep_Name connect
+ End Project Dependency
}}}
###############################################################################
@@ -1067,6 +1103,9 @@ Package=<4>
Begin Project Dependency
Project_Dep_Name ncbi
End Project Dependency
+ Begin Project Dependency
+ Project_Dep_Name connect
+ End Project Dependency
}}}
###############################################################################
@@ -1142,6 +1181,9 @@ Package=<4>
Begin Project Dependency
Project_Dep_Name ncbi
End Project Dependency
+ Begin Project Dependency
+ Project_Dep_Name connect
+ End Project Dependency
}}}
###############################################################################
@@ -1172,6 +1214,9 @@ Package=<4>
Begin Project Dependency
Project_Dep_Name ncbi
End Project Dependency
+ Begin Project Dependency
+ Project_Dep_Name connect
+ End Project Dependency
}}}
###############################################################################
@@ -1199,6 +1244,9 @@ Package=<4>
Begin Project Dependency
Project_Dep_Name ncbi
End Project Dependency
+ Begin Project Dependency
+ Project_Dep_Name connect
+ End Project Dependency
}}}
###############################################################################
@@ -1274,6 +1322,9 @@ Package=<4>
Begin Project Dependency
Project_Dep_Name ncbi
End Project Dependency
+ Begin Project Dependency
+ Project_Dep_Name connect
+ End Project Dependency
}}}
###############################################################################
@@ -1362,30 +1413,6 @@ Package=<4>
###############################################################################
-Project: "demo_fmerge"=.\demo\fmerge\demo_fmerge.dsp - Package Owner=<4>
-
-Package=<5>
-{{{
-}}}
-
-Package=<4>
-{{{
- Begin Project Dependency
- Project_Dep_Name ncbitool
- End Project Dependency
- Begin Project Dependency
- Project_Dep_Name ncbiobj
- End Project Dependency
- Begin Project Dependency
- Project_Dep_Name ncbi
- End Project Dependency
- Begin Project Dependency
- Project_Dep_Name ncbimain
- End Project Dependency
-}}}
-
-###############################################################################
-
Project: "demo_formatdb"=.\demo\formatdb\demo_formatdb.dsp - Package Owner=<4>
Package=<5>
@@ -1478,6 +1505,9 @@ Package=<4>
Begin Project Dependency
Project_Dep_Name netcli
End Project Dependency
+ Begin Project Dependency
+ Project_Dep_Name connect
+ End Project Dependency
}}}
###############################################################################
@@ -1496,6 +1526,9 @@ Package=<4>
Begin Project Dependency
Project_Dep_Name ncbi
End Project Dependency
+ Begin Project Dependency
+ Project_Dep_Name connect
+ End Project Dependency
}}}
###############################################################################
@@ -1514,6 +1547,9 @@ Package=<4>
Begin Project Dependency
Project_Dep_Name ncbi
End Project Dependency
+ Begin Project Dependency
+ Project_Dep_Name connect
+ End Project Dependency
}}}
###############################################################################
@@ -1550,6 +1586,9 @@ Package=<4>
Begin Project Dependency
Project_Dep_Name netentr
End Project Dependency
+ Begin Project Dependency
+ Project_Dep_Name connect
+ End Project Dependency
}}}
###############################################################################
@@ -1622,6 +1661,42 @@ Package=<4>
Begin Project Dependency
Project_Dep_Name ncbi
End Project Dependency
+ Begin Project Dependency
+ Project_Dep_Name connect
+ End Project Dependency
+}}}
+
+###############################################################################
+
+Project: "demo_insdseqget"=.\demo\insdseqget\demo_insdseqget.dsp - Package Owner=<4>
+
+Package=<5>
+{{{
+}}}
+
+Package=<4>
+{{{
+ Begin Project Dependency
+ Project_Dep_Name ncbiid1
+ End Project Dependency
+ Begin Project Dependency
+ Project_Dep_Name netcli
+ End Project Dependency
+ Begin Project Dependency
+ Project_Dep_Name ncbitool
+ End Project Dependency
+ Begin Project Dependency
+ Project_Dep_Name ncbinacc
+ End Project Dependency
+ Begin Project Dependency
+ Project_Dep_Name ncbiobj
+ End Project Dependency
+ Begin Project Dependency
+ Project_Dep_Name ncbi
+ End Project Dependency
+ Begin Project Dependency
+ Project_Dep_Name ncbimain
+ End Project Dependency
}}}
###############################################################################
@@ -1733,6 +1808,9 @@ Package=<4>
Begin Project Dependency
Project_Dep_Name ncbi
End Project Dependency
+ Begin Project Dependency
+ Project_Dep_Name connect
+ End Project Dependency
}}}
###############################################################################
@@ -1793,6 +1871,9 @@ Package=<4>
Begin Project Dependency
Project_Dep_Name ncbiacc
End Project Dependency
+ Begin Project Dependency
+ Project_Dep_Name connect
+ End Project Dependency
}}}
###############################################################################
@@ -1826,6 +1907,9 @@ Package=<4>
Begin Project Dependency
Project_Dep_Name ncbi
End Project Dependency
+ Begin Project Dependency
+ Project_Dep_Name connect
+ End Project Dependency
}}}
###############################################################################
@@ -1922,6 +2006,9 @@ Package=<4>
Begin Project Dependency
Project_Dep_Name ncbi
End Project Dependency
+ Begin Project Dependency
+ Project_Dep_Name connect
+ End Project Dependency
}}}
###############################################################################
@@ -1973,6 +2060,9 @@ Package=<4>
Begin Project Dependency
Project_Dep_Name ncbi
End Project Dependency
+ Begin Project Dependency
+ Project_Dep_Name connect
+ End Project Dependency
}}}
###############################################################################
@@ -2048,6 +2138,9 @@ Package=<4>
Begin Project Dependency
Project_Dep_Name ncbi
End Project Dependency
+ Begin Project Dependency
+ Project_Dep_Name connect
+ End Project Dependency
}}}
###############################################################################
@@ -2201,6 +2294,9 @@ Package=<4>
Begin Project Dependency
Project_Dep_Name ncbi
End Project Dependency
+ Begin Project Dependency
+ Project_Dep_Name connect
+ End Project Dependency
}}}
###############################################################################
@@ -2225,6 +2321,9 @@ Package=<4>
Begin Project Dependency
Project_Dep_Name ncbi
End Project Dependency
+ Begin Project Dependency
+ Project_Dep_Name connect
+ End Project Dependency
}}}
###############################################################################
@@ -2300,6 +2399,9 @@ Package=<4>
Begin Project Dependency
Project_Dep_Name ncbi
End Project Dependency
+ Begin Project Dependency
+ Project_Dep_Name connect
+ End Project Dependency
}}}
###############################################################################
@@ -2348,6 +2450,9 @@ Package=<4>
Begin Project Dependency
Project_Dep_Name ncbi
End Project Dependency
+ Begin Project Dependency
+ Project_Dep_Name connect
+ End Project Dependency
}}}
###############################################################################
@@ -2372,6 +2477,9 @@ Package=<4>
Begin Project Dependency
Project_Dep_Name ncbi
End Project Dependency
+ Begin Project Dependency
+ Project_Dep_Name connect
+ End Project Dependency
}}}
###############################################################################
@@ -2507,6 +2615,9 @@ Package=<4>
Begin Project Dependency
Project_Dep_Name ncbi
End Project Dependency
+ Begin Project Dependency
+ Project_Dep_Name connect
+ End Project Dependency
}}}
###############################################################################
@@ -2555,6 +2666,9 @@ Package=<4>
Begin Project Dependency
Project_Dep_Name ncbi
End Project Dependency
+ Begin Project Dependency
+ Project_Dep_Name connect
+ End Project Dependency
}}}
###############################################################################
@@ -2648,6 +2762,9 @@ Package=<4>
Begin Project Dependency
Project_Dep_Name blastapi
End Project Dependency
+ Begin Project Dependency
+ Project_Dep_Name connect
+ End Project Dependency
}}}
###############################################################################
@@ -2714,6 +2831,9 @@ Package=<4>
Begin Project Dependency
Project_Dep_Name blastapi
End Project Dependency
+ Begin Project Dependency
+ Project_Dep_Name connect
+ End Project Dependency
}}}
###############################################################################
@@ -2729,6 +2849,9 @@ Package=<4>
Begin Project Dependency
Project_Dep_Name ncbi
End Project Dependency
+ Begin Project Dependency
+ Project_Dep_Name connect
+ End Project Dependency
}}}
###############################################################################
@@ -2837,6 +2960,9 @@ Package=<5>
Package=<4>
{{{
+ Begin Project Dependency
+ Project_Dep_Name connect
+ End Project Dependency
}}}
###############################################################################
@@ -2984,6 +3110,9 @@ Package=<4>
Begin Project Dependency
Project_Dep_Name ncbi
End Project Dependency
+ Begin Project Dependency
+ Project_Dep_Name connect
+ End Project Dependency
}}}
###############################################################################
@@ -2999,6 +3128,9 @@ Package=<4>
Begin Project Dependency
Project_Dep_Name ncbi
End Project Dependency
+ Begin Project Dependency
+ Project_Dep_Name connect
+ End Project Dependency
}}}
###############################################################################
@@ -3014,6 +3146,9 @@ Package=<4>
Begin Project Dependency
Project_Dep_Name ncbi
End Project Dependency
+ Begin Project Dependency
+ Project_Dep_Name connect
+ End Project Dependency
}}}
###############################################################################
@@ -3029,6 +3164,9 @@ Package=<4>
Begin Project Dependency
Project_Dep_Name ncbi
End Project Dependency
+ Begin Project Dependency
+ Project_Dep_Name connect
+ End Project Dependency
}}}
###############################################################################
@@ -3044,6 +3182,9 @@ Package=<4>
Begin Project Dependency
Project_Dep_Name ncbi
End Project Dependency
+ Begin Project Dependency
+ Project_Dep_Name connect
+ End Project Dependency
}}}
###############################################################################
@@ -3059,6 +3200,9 @@ Package=<4>
Begin Project Dependency
Project_Dep_Name ncbi
End Project Dependency
+ Begin Project Dependency
+ Project_Dep_Name connect
+ End Project Dependency
}}}
###############################################################################
@@ -3074,6 +3218,9 @@ Package=<4>
Begin Project Dependency
Project_Dep_Name ncbi
End Project Dependency
+ Begin Project Dependency
+ Project_Dep_Name connect
+ End Project Dependency
}}}
###############################################################################
@@ -3089,6 +3236,9 @@ Package=<4>
Begin Project Dependency
Project_Dep_Name ncbi
End Project Dependency
+ Begin Project Dependency
+ Project_Dep_Name connect
+ End Project Dependency
}}}
###############################################################################
@@ -3107,6 +3257,9 @@ Package=<4>
Begin Project Dependency
Project_Dep_Name ncbi
End Project Dependency
+ Begin Project Dependency
+ Project_Dep_Name connect
+ End Project Dependency
}}}
###############################################################################
@@ -3122,6 +3275,9 @@ Package=<4>
Begin Project Dependency
Project_Dep_Name ncbi
End Project Dependency
+ Begin Project Dependency
+ Project_Dep_Name connect
+ End Project Dependency
}}}
###############################################################################
@@ -3140,6 +3296,9 @@ Package=<4>
Begin Project Dependency
Project_Dep_Name ncbi
End Project Dependency
+ Begin Project Dependency
+ Project_Dep_Name connect
+ End Project Dependency
}}}
###############################################################################
@@ -3155,6 +3314,9 @@ Package=<4>
Begin Project Dependency
Project_Dep_Name ncbi
End Project Dependency
+ Begin Project Dependency
+ Project_Dep_Name connect
+ End Project Dependency
}}}
###############################################################################
@@ -3173,6 +3335,9 @@ Package=<4>
Begin Project Dependency
Project_Dep_Name ncbi
End Project Dependency
+ Begin Project Dependency
+ Project_Dep_Name connect
+ End Project Dependency
}}}
###############################################################################
@@ -3188,6 +3353,9 @@ Package=<4>
Begin Project Dependency
Project_Dep_Name ncbi
End Project Dependency
+ Begin Project Dependency
+ Project_Dep_Name connect
+ End Project Dependency
}}}
###############################################################################
@@ -3206,6 +3374,9 @@ Package=<4>
Begin Project Dependency
Project_Dep_Name ncbi
End Project Dependency
+ Begin Project Dependency
+ Project_Dep_Name connect
+ End Project Dependency
}}}
###############################################################################
diff --git a/network/blast3/client/blastcl3.c b/network/blast3/client/blastcl3.c
deleted file mode 100644
index 21f75a1d..00000000
--- a/network/blast3/client/blastcl3.c
+++ /dev/null
@@ -1,656 +0,0 @@
-/*
-* ===========================================================================
-*
-* PUBLIC DOMAIN NOTICE
-* National Center for Biotechnology Information
-*
-* This software/database is a "United States Government Work" under the
-* terms of the United States Copyright Act. It was written as part of
-* the author's official duties as a United States Government employee and
-* thus cannot be copyrighted. This software/database is freely available
-* to the public for use. The National Library of Medicine and the U.S.
-* Government have not placed any restriction on its use or reproduction.
-*
-* Although all reasonable efforts have been taken to ensure the accuracy
-* and reliability of the software and data, the NLM and the U.S.
-* Government do not and cannot warrant the performance or results that
-* may be obtained by using this software or data. The NLM and the U.S.
-* Government disclaim all warranties, express or implied, including
-* warranties of performance, merchantability or fitness for any particular
-* purpose.
-*
-* Please cite the author in any work or product based on this material.
-*
-* ===========================================================================
-*
-* File Name: blastcl3.c
-*
-* Author: Tom Madden
-*
-* Version Creation Date: 05/16/95
-*
-* $Revision: 1.25 $
-*
-* File Description:
-* Simulates "traditional" BLAST output
-*
-* Modifications:
-* --------------------------------------------------------------------------
-* Date Name Description of modification
-* ------- ---------- -----------------------------------------------------
-*
-* ==========================================================================
-*
-*
-* RCS Modification History:
-* $Log: blastcl3.c,v $
-* Revision 1.25 2001/04/13 23:01:22 juran
-* Removed unused variable.
-*
-* Revision 1.24 2001/01/19 21:30:09 dondosha
-* Call readdb...._ex functions to possibly save some time when finding db length
-*
-* Revision 1.23 2000/10/25 16:41:26 madden
-* Add BioSource to query_bsp
-*
-* Revision 1.22 2000/09/28 16:36:59 dondosha
-* Set parameters differently for megablast
-*
-* Revision 1.21 2000/08/28 15:17:45 dondosha
-* Added functionality for megablast search
-*
-* Revision 1.20 2000/05/04 18:57:00 shavirin
-* Added option to restrict search to results of Entrez2 lookup.
-*
-* Revision 1.19 2000/05/04 16:55:05 shavirin
-* Removed message of the day for the RPS Blast.
-*
-* Revision 1.18 2000/05/03 17:33:40 shavirin
-* Changed vesion info for RPS-BLAST.
-*
-* Revision 1.17 2000/05/02 18:07:38 shavirin
-* Added option to use RPS Blast search - and corresponding changes to
-* do this search over the network.
-*
-* Revision 1.16 1999/10/07 18:17:18 madden
-* Remove FindProt, FindNuc and SeqAlignToFasta
-*
-* Revision 1.15 1999/08/20 16:37:15 shavirin
-* Added protection against invalid program type.
-*
-* Revision 1.14 1999/04/20 14:51:59 madden
-* Error message if TraditionalReport returns FALSE
-*
-* Revision 1.13 1999/04/13 14:59:31 madden
-* Add more options (searchsp, culling, strand)
-*
-* Revision 1.12 1999/04/02 16:25:36 madden
-* FileClose on error, check NULL pointer
-*
-* Revision 1.11 1999/03/26 15:59:51 madden
-* Add option to use filter string, gifile, and get HTML output
-*
-* Revision 1.10 1999/03/05 15:43:41 madden
-* added matrix option
-*
-* Revision 1.9 1999/01/03 22:33:20 kans
-* now calls UseLocalAsnloadDataAndErrMsg
-*
-* Revision 1.8 1998/12/09 15:27:04 madden
-* Add wordsize
-*
-* Revision 1.7 1998/11/05 17:55:25 madden
-* Removed unused global_fp
-*
-* Revision 1.6 1998/05/02 20:39:38 kans
-* global_fp is extern, removed unused callback function, removed unused variables, added newlines in long prompts
-*
-* Revision 1.5 1998/04/23 14:18:43 egorov
-* Add number_of_hits parameter to TraditionalBlastReportLoc
-*
-* Revision 1.4 1998/04/22 19:58:06 egorov
-* Fix minor bug after previous commit
-*
-* Revision 1.3 1998/04/22 18:10:06 egorov
-* Add support for SeqLoc to blastcl3
-*
-* Revision 1.2 1998/04/16 19:35:30 madden
-* Added Int4Ptr arg to TraditionalBlastReport specifying the numbers of hits
-*
-* Revision 1.1 1997/10/08 19:24:56 madden
-* Main (command-line) client file
-*
- *
-*/
-#define BLASTCLI_BUF_SIZE 255
-#include <sequtil.h>
-#include <prtutil.h>
-#include <tofasta.h>
-#include <objblst3.h>
-#include <netblap3.h>
-#include <blastpri.h>
-#include <dust.h>
-#include <txalign.h>
-#include <accentr.h>
-#include <sqnutils.h>
-
-
-/*
- Montior hook to print to stderr for UNIX clients.
-*/
-
-static int LIBCALLBACK UNIXMontiorHook(Nlm_MonitorPtr mon, MonCode code)
-
-{
- switch (code) {
-#ifdef OS_UNIX
- case MonCode_Create :
- fprintf(stderr, "%s\n", (Nlm_CharPtr) mon->strTitle);
- break;
- case MonCode_StrValue :
- fprintf(stderr, "%s\n", (Nlm_CharPtr) mon->strValue);
- break;
-#endif
- default :
- break;
- }
- return 0;
-
-}
-
-
-static void PrintMotd(CharPtr string, FILE *fp, Boolean html_format)
-
-{
- Char buffer[100];
- CharPtr ptr;
-
- if (string == NULL)
- return;
-
- buffer[0] = NULLB;
- ptr = buffer;
-
- if (html_format) {
- fprintf(fp, "<PRE>\n");
- }
-
- while (*string != NULLB) {
- if (*string == '~') {
- *ptr = NULLB;
- fprintf(fp, "%s\n", buffer);
- buffer[0] = NULLB;
- ptr = buffer;
- string++;
- if (*string == NULLB)
- break;
- } else {
- *ptr=*string;
- ptr++; string++;
- }
- }
- *ptr = NULLB;
- fprintf(fp, "%s\n", buffer);
-
- if (html_format) {
- fprintf(fp, "</PRE>\n");
- }
-
- fflush(fp);
-}
-
-
-#define NUMARGS (sizeof(myargs)/sizeof(myargs[0]))
-
-static Args myargs [] = {
- { "Program Name", /* 0 */
- NULL, NULL, NULL, FALSE, 'p', ARG_STRING, 0.0, 0, NULL},
- { "Database", /* 1 */
- "nr", NULL, NULL, FALSE, 'd', ARG_STRING, 0.0, 0, NULL},
- { "Query File", /* 2 */
- "stdin", NULL, NULL, FALSE, 'i', ARG_FILE_IN, 0.0, 0, NULL},
- { "Expectation value (E)", /* 3 */
- "10.0", NULL, NULL, FALSE, 'e', ARG_FLOAT, 0.0, 0, NULL},
- { "alignment view options:\n0 = pairwise,\n1 = master-slave showing identities,\n2 = master-slave no identities,\n3 = flat master-slave, show identities,\n4 = flat master-slave, no identities,\n5 = master-slave no identities and blunt ends,\n6 = flat master-slave, no identities and blunt ends", /* 4 */
- "0", NULL, NULL, FALSE, 'm', ARG_INT, 0.0, 0, NULL},
- { "BLAST report Output File", /* 5 */
- "stdout", NULL, NULL, TRUE, 'o', ARG_FILE_OUT, 0.0, 0, NULL},
- { "Filter query sequence (DUST with blastn, SEG with others)", /* 6 */
- "T", NULL, NULL, FALSE, 'F', ARG_STRING, 0.0, 0, NULL},
- { "Cost to open a gap (zero invokes default behavior)", /* 7 */
- "0", NULL, NULL, FALSE, 'G', ARG_INT, 0.0, 0, NULL},
- { "Cost to extend a gap (zero invokes default behavior)", /* 8 */
- "0", NULL, NULL, FALSE, 'E', ARG_INT, 0.0, 0, NULL},
- { "X dropoff value for gapped alignment (in bits)\n(zero invokes default behavior)", /* 9 */
- "0", NULL, NULL, FALSE, 'X', ARG_INT, 0.0, 0, NULL},
- { "Show GI's in deflines", /* 10 */
- "F", NULL, NULL, FALSE, 'I', ARG_BOOLEAN, 0.0, 0, NULL},
- { "Penalty for a nucleotide mismatch (blastn only)", /* 11 */
- "-3", NULL, NULL, FALSE, 'q', ARG_INT, 0.0, 0, NULL},
- { "Reward for a nucleotide match (blastn only)", /* 12 */
- "1", NULL, NULL, FALSE, 'r', ARG_INT, 0.0, 0, NULL},
- { "Number of one-line descriptions (V)", /* 13 */
- "500", NULL, NULL, FALSE, 'v', ARG_INT, 0.0, 0, NULL},
- { "Number of alignments to show (B)", /* 14 */
- "250", NULL, NULL, FALSE, 'b', ARG_INT, 0.0, 0, NULL},
- { "Threshold for extending hits, default if zero", /* 15 */
- "0", NULL, NULL, FALSE, 'f', ARG_INT, 0.0, 0, NULL},
- { "Perfom gapped alignment (not available with tblastx)", /* 16 */
- "T", NULL, NULL, FALSE, 'g', ARG_BOOLEAN, 0.0, 0, NULL},
- { "Query Genetic code to use", /* 17 */
- "1", NULL, NULL, FALSE, 'Q', ARG_INT, 0.0, 0, NULL},
- { "DB Genetic code (for tblast[nx] only)", /* 18 */
- "1", NULL, NULL, FALSE, 'D', ARG_INT, 0.0, 0, NULL},
- { "Number of processors to use", /* 19 */
- "1", NULL, NULL, FALSE, 'a', ARG_INT, 0.0, 0, NULL},
- { "SeqAlign file", /* 20 */
- NULL, NULL, NULL, TRUE, 'O', ARG_FILE_OUT, 0.0, 0, NULL},
- { "Believe the query defline", /* 21 */
- "F", NULL, NULL, FALSE, 'J', ARG_BOOLEAN, 0.0, 0, NULL},
- { "Word size, default if zero", /* 22 */
- "0", NULL, NULL, FALSE, 'W', ARG_INT, 0.0, 0, NULL},
- { "Start of the sequence", /* 23 */
- "1", NULL, NULL, FALSE, 'A', ARG_INT, 0.0, 0, NULL},
- { "End of the sequence (-1 is entire sequence)", /* 24 */
- "-1", NULL, NULL, FALSE, 'B', ARG_INT, 0.0, 0, NULL},
- { "Matrix", /* 25 */
- "BLOSUM62", NULL, NULL, FALSE, 'M', ARG_STRING, 0.0, 0, NULL},
- { "Produce HTML output", /* 26 */
- "F", NULL, NULL, FALSE, 'T', ARG_BOOLEAN, 0.0, 0, NULL},
- { "Restrict search of database to list of GI's", /* 27 */
- NULL, NULL, NULL, TRUE, 'l', ARG_STRING, 0.0, 0, NULL},
- { "Number of best hits from a region to keep", /* 28 */
- "100", NULL, NULL, FALSE, 'K', ARG_INT, 0.0, 0, NULL},
- { "Length of region used to judge hits", /* 29 */
- "20", NULL, NULL, FALSE, 'L', ARG_INT, 0.0, 0, NULL},
- { "Effective length of the search space (use zero for the real size)", /* 30 */
- "0", NULL, NULL, FALSE, 'Y', ARG_FLOAT, 0.0, 0, NULL},
- { "Query strands to search against database (for blast[nx], and tblastx). 3 is both, 1 is top, 2 is bottom", /* 31 */
- "3", NULL, NULL, FALSE, 'S', ARG_INT, 0.0, 0, NULL},
- { "RPS Blast search", /* 32 */
- "F", NULL, NULL, FALSE, 'R', ARG_BOOLEAN, 0.0, 0, NULL},
- { "Restrict search of database to results of Entrez2 lookup", /* 33 */
- NULL, NULL, NULL, TRUE, 'u', ARG_STRING, 0.0, 0, NULL},
- { "MegaBlast search", /* 34 */
- "F", NULL, NULL, FALSE, 'N', ARG_BOOLEAN, 0.0, 0, NULL}
-};
-
-#define MAX_NUM_QUERIES 16383 /* == 1/2 INT2_MAX */
-#define MAX_TOTAL_LENGTH 10000000
-/*********************************************************************
-* "main" function to call blast for the client.
-*
-* This function checks the command-line arguments, opens the
-* connection to the server, processes all the entries in
-* the FASTA file (obtained using FastaToSeqEntry), and
-* closes the connection.
-*********************************************************************/
-Int2 Main (void)
-
-{
- BLAST_OptionsBlkPtr options;
- BLAST_KarlinBlkPtr ka_params=NULL, ka_params_gap=NULL;
- BlastResponsePtr response = NULL;
- BioseqPtr query_bsp;
- BioSourcePtr source;
- BlastNet3Hptr bl3hp;
- BlastVersionPtr blast_version;
- Boolean db_is_na, query_is_na, show_gi, believe_query=FALSE;
- Boolean html=FALSE, status;
- CharPtr ret_buffer=NULL, params_buffer=NULL;
- CharPtr date, motd, version;
- Int2 num_of_queries, retval;
- Int4 number_of_descriptions, number_of_alignments;
- SeqEntryPtr sep;
- SeqIdPtr seqid_list=NULL;
- TxDfDbInfoPtr dbinfo=NULL;
- Uint1 align_type, align_view;
- Uint4 align_options, print_options;
- Int4 startloc, endloc;
- SeqLocPtr slp;
-
- CharPtr blast_database, blast_inputfile, blast_outputfile;
- Char blast_program[32];
-
- FILE *infp, *outfp;
-
- if (! GetArgs ("blastcl3", NUMARGS, myargs)) {
- return (1);
- }
-
- UseLocalAsnloadDataAndErrMsg ();
-
- ErrSetFatalLevel (SEV_MAX); /* never die from ErrPostEx */
-
- if (! SeqEntryLoad())
- return 1;
-
- StringCpy(blast_program, myargs [0].strvalue);
-
- /* For RPS Blast - anything not "blastp" - is "tblastn" */
-
- if(myargs[32].intvalue) {
- if(StringICmp(blast_program, "blastp")) {
- StringCpy(blast_program, "blastx");
- }
- }
-
- blast_database = myargs [1].strvalue;
- blast_inputfile = myargs [2].strvalue;
- blast_outputfile = myargs [5].strvalue;
- if (myargs[26].intvalue)
- html = TRUE;
-
- if ((infp = FileOpen(blast_inputfile, "r")) == NULL) {
- ErrPostEx(SEV_FATAL, 0, 0, "blast: Unable to open input file %s\n", blast_inputfile);
- return (1);
- }
-
- outfp = NULL;
- if (blast_outputfile != NULL) {
- if ((outfp = FileOpen(blast_outputfile, "w")) == NULL) {
- ErrPostEx(SEV_FATAL, 0, 0, "blast: Unable to open output file %s\n", blast_outputfile);
- return (1);
- }
- }
-
- align_view = (Int1) myargs[4].intvalue;
-
- align_type = BlastGetTypes(blast_program, &query_is_na, &db_is_na);
-
- if(align_type == blast_type_undefined)
- return 1;
-
- if (StringICmp("blastx", blast_program) == 0) {
- if (align_view != 0) {
- ErrPostEx(SEV_FATAL, 0, 0, "This option is not available with blastx");
- return 1;
- }
- } else if (StringICmp("tblastx", blast_program) == 0) {
- if (align_view != 0) {
- ErrPostEx(SEV_FATAL, 0, 0, "This option is not available with tblastx");
- return 1;
- }
- }
-
- believe_query = FALSE;
- if (myargs[21].intvalue != 0)
- believe_query = TRUE;
-
- if (believe_query == FALSE && myargs[20].strvalue) {
- ErrPostEx(SEV_FATAL, 0, 0, "-J option must be TRUE to produce a SeqAlign file");
- }
-
- options = BLASTOptionNew(blast_program, (Boolean) myargs [16].intvalue);
- options->is_megablast_search = (Boolean) myargs[34].intvalue;
- if (options->is_megablast_search)
- options->wordsize = 0;
- if (options == NULL)
- return 3;
-
- /* If option RPS Blast set - option "program" is ignored by
- the engine */
-
- if(myargs[32].intvalue)
- options->is_rps_blast = TRUE;
-
- BLASTOptionSetGapParams(options, myargs[25].strvalue, 0, 0);
- options->expect_value = (Nlm_FloatHi) myargs [3].floatvalue;
- number_of_descriptions = myargs[13].intvalue;
- number_of_alignments = myargs[14].intvalue;
- options->hitlist_size = MAX(number_of_descriptions, number_of_alignments);
- if (myargs[7].intvalue != 0)
- options->gap_open = myargs[7].intvalue;
- if (myargs[8].intvalue != 0)
- options->gap_extend = myargs[8].intvalue;
- if (myargs[9].intvalue != 0)
- options->gap_x_dropoff = myargs[9].intvalue;
- if (StringICmp(myargs[6].strvalue, "T") == 0) {
- if (StringICmp("blastn", blast_program) == 0)
- options->filter_string = StringSave("D");
- else
- options->filter_string = StringSave("S");
- } else {
- options->filter_string = StringSave(myargs[6].strvalue);
- }
-
- show_gi = (Boolean) myargs[10].intvalue;
- if (StringICmp("blastn", blast_program) == 0) {
- options->penalty = myargs[11].intvalue;
- options->reward = myargs[12].intvalue;
- } else {
- if (myargs[15].intvalue != 0) {
- options->threshold_first = myargs[15].intvalue;
- options->threshold_second = myargs[15].intvalue;
- }
- }
-
- options->genetic_code = myargs[17].intvalue;
- options->db_genetic_code = myargs[18].intvalue;
- options->number_of_cpus = myargs[19].intvalue;
- if (myargs[22].intvalue != 0) {
- options->wordsize = myargs[22].intvalue;
- if (options->is_megablast_search)
- options->wordsize += 4;
- }
-
- print_options = 0;
- align_options = 0;
- align_options += TXALIGN_COMPRESS;
- align_options += TXALIGN_END_NUM;
- if (StringICmp("blastx", blast_program) == 0) {
- align_options += TXALIGN_BLASTX_SPECIAL;
- }
- if (show_gi) {
- align_options += TXALIGN_SHOW_GI;
- print_options += TXALIGN_SHOW_GI;
- }
- if (myargs[16].intvalue == 0)
- print_options += TXALIGN_SHOW_NO_OF_SEGS;
-
- if (align_view) {
- align_options += TXALIGN_MASTER;
- if (align_view == 1 || align_view == 3)
- align_options += TXALIGN_MISMATCH;
- if (align_view == 3 || align_view == 4 || align_view == 6)
- align_options += TXALIGN_FLAT_INS;
- if (align_view == 5 || align_view == 6)
- align_options += TXALIGN_BLUNT_END;
- } else {
- align_options += TXALIGN_MATRIX_VAL;
- align_options += TXALIGN_SHOW_QS;
- }
-
- if (html) {
- align_options += TXALIGN_HTML;
- print_options += TXALIGN_HTML;
- }
-
- if (myargs[27].strvalue) {
- options->gifile = StringSave(myargs[27].strvalue);
- }
-
- options->hsp_range_max = myargs[28].intvalue;
- if (options->hsp_range_max != 0 && !options->is_megablast_search)
- options->perform_culling = TRUE;
- if (!options->is_megablast_search)
- options->block_width = myargs[29].intvalue;
- else /* In megablast this has different meaning, will be default only */
- options->block_width = 0;
-
- if (myargs[29].floatvalue)
- options->searchsp_eff = (Nlm_FloatHi) myargs[30].floatvalue;
-
- options->strand_option = myargs[31].intvalue;
-
- if(myargs[33].strvalue)
- options->entrez_query = StringSave(myargs[33].strvalue);
-
- if (options->is_megablast_search) {
- if (options->wordsize == 0)
- options->wordsize = 32;
- options->cutoff_s = options->wordsize;
- options->cutoff_s2 = options->wordsize - 4;
- }
-
-
- if (! BlastInit("blastcl3", &bl3hp, &response)) {
- ErrPostEx(SEV_FATAL, 0, 0, "Unable to initialize BLAST service");
- FileClose(infp);
- FileClose(outfp);
- return (1);
- }
- if (response && response->choice == BlastResponse_init) {
- blast_version = response->data.ptrvalue;
- version = StringSave(blast_version->version);
- date = StringSave(blast_version->date);
- } else {
- ErrPostEx(SEV_FATAL, 0, 0, "Unable to connect to service");
- FileClose(infp);
- FileClose(outfp);
- return 1;
- }
-
- BlastResponseFree(response);
-
- BlastNetBioseqFetchEnable(bl3hp, blast_database, db_is_na, TRUE);
-
- if(!myargs[32].intvalue) {
- motd = Blast3GetMotd(bl3hp);
- PrintMotd(motd, outfp, html);
- motd = MemFree(motd);
- }
-
- if (html)
- fprintf(outfp, "<PRE>\n");
-
- if(options->is_rps_blast == TRUE)
- BlastPrintVersionInfoEx("RPS-BLAST", html, version, date, outfp);
- else {
- init_buff_ex(90);
- BlastPrintVersionInfoEx(blast_program, html, version, date, outfp);
- fprintf(outfp, "\n");
- BlastPrintReference(html, 80, outfp);
- free_buff();
- }
-
- MemFree(version);
- MemFree(date);
-
- fprintf(outfp, "\n");
- num_of_queries=0;
- retval=0;
-
- if (options->is_megablast_search) {
- Int4 total_length, num_bsps;
- Boolean done;
- SeqLocPtr last_mask, mask_slp;
- Int2 ctr = 1;
- Char prefix[2];
-
- StrCpy(prefix, "");
- done = FALSE;
- slp = NULL;
- while (!done) {
- num_bsps = 0;
- total_length = 0;
- done = TRUE;
- SeqMgrHoldIndexing(TRUE);
- mask_slp = last_mask = NULL;
- while ((sep=FastaToSeqEntryForDb(infp, query_is_na, NULL,
- believe_query, prefix, &ctr,
- &mask_slp)) != NULL) {
-
- if (mask_slp) {
- if (!last_mask)
- options->query_lcase_mask = last_mask = mask_slp;
- else {
- last_mask->next = mask_slp;
- last_mask = last_mask->next;
- }
- mask_slp = NULL;
- }
- query_bsp = NULL;
- if (query_is_na)
- SeqEntryExplore(sep, &query_bsp, FindNuc);
- else
- SeqEntryExplore(sep, &query_bsp, FindProt);
-
- if (query_bsp == NULL) {
- ErrPostEx(SEV_FATAL, 0, 0, "Unable to obtain bioseq\n");
- return 2;
- }
-
- ValNodeAddPointer(&slp, SEQLOC_WHOLE,
- SeqIdDup(SeqIdFindBest(query_bsp->id,
- SEQID_GI)));
- num_bsps++;
- total_length += query_bsp->length;
- if (total_length > MAX_TOTAL_LENGTH ||
- num_bsps >= MAX_NUM_QUERIES) {
- done = FALSE;
- break;
- }
- sep = MemFree(sep); /* Do not free the underlying Bioseq */
- }
- SeqMgrHoldIndexing(FALSE);
- if (num_bsps > 0)
- status = TraditionalBlastReportLoc(slp, options, bl3hp, blast_program, blast_database, FALSE, outfp, TRUE, print_options, align_options, number_of_descriptions, number_of_alignments, NULL);
- if (status == FALSE)
- ErrPostEx(SEV_ERROR, 0, 0,
- "An error has occurred on the server\n");
- slp = SeqLocSetFree(slp);
- }
- } else {
- while ((sep = FastaToSeqEntryEx(infp, query_is_na, NULL, believe_query)) != NULL) {
- query_bsp = NULL;
- SeqEntryExplore(sep, &query_bsp, query_is_na? FindNuc : FindProt);
-
- /* Read boundaries of location */
- startloc = myargs[23].intvalue - 1;
- if (myargs[24].intvalue == -1)
- endloc = query_bsp->length - 1;
- else
- endloc = myargs[24].intvalue - 1;
-
- if (query_bsp == NULL) {
- ErrPostEx(SEV_FATAL, 0, 0, "Unable to obtain bioseq\n");
- retval = 2;
- break;
- }
-
- source = BioSourceNew();
- source->org = OrgRefNew();
- source->org->orgname = OrgNameNew();
- source->org->orgname->gcode = options->genetic_code;
- ValNodeAddPointer(&(query_bsp->descr), Seq_descr_source, source);
-
- init_buff_ex(85);
- AcknowledgeBlastQuery(query_bsp, 70, outfp, FALSE, html);
- free_buff();
-
- if (startloc || endloc != query_bsp->length - 1) {
- /* Create the SeqLoc */
- slp = SeqLocIntNew(startloc, endloc, Seq_strand_both, query_bsp->id);
-
- status = TraditionalBlastReportLoc(slp, options, bl3hp, blast_program, blast_database, FALSE, outfp, TRUE, print_options, align_options, number_of_descriptions, number_of_alignments, NULL);
- slp = SeqLocSetFree(slp);
- } else {
- status = TraditionalBlastReport(query_bsp, options, bl3hp,
- blast_program, blast_database,
- FALSE, outfp, TRUE, print_options,
- align_options,
- number_of_descriptions,
- number_of_alignments, NULL);
- }
- if (status == FALSE)
- ErrPostEx(SEV_ERROR, 0, 0, "An error has occurred on the server\n");
- sep = SeqEntryFree(sep);
- }
- }
- options = BLASTOptionDelete(options);
- FileClose(infp);
- FileClose (outfp);
- BlastFini(bl3hp);
- return retval;
-}
diff --git a/network/id1arch/idfetch.c b/network/id1arch/idfetch.c
index 09660d5d..416f03e6 100644
--- a/network/id1arch/idfetch.c
+++ b/network/id1arch/idfetch.c
@@ -25,6 +25,9 @@
* Author Karl Sirotkin
*
$Log: idfetch.c,v $
+ Revision 1.32 2004/05/25 18:41:35 kans
+ removed obsolete STREAM_SEQ_PORT_FIRST flag
+
Revision 1.31 2004/02/18 22:18:45 yaschenk
adding recognition of gnl|sat_name|ent seqids
@@ -1016,7 +1019,7 @@ static Boolean IdFetch_func(Int4 gi,CharPtr db, Int4 ent,Int2 maxplex)
case 3:
AssignIDsInEntity(0,OBJ_SEQENTRY,sep);
if(!SeqEntryToGnbk(sep,NULL,GENBANK_FMT,ENTREZ_MODE,0,SHOW_CONTIG_FEATURES|ONLY_NEAR_FEATURES,
- LOOKUP_FAR_COMPONENTS|LOOKUP_FAR_LOCATIONS|LOOKUP_FAR_PRODUCTS|LOOKUP_FAR_HISTORY|STREAM_SEQ_PORT_FIRST,0,NULL,fp)){
+ LOOKUP_FAR_COMPONENTS|LOOKUP_FAR_LOCATIONS|LOOKUP_FAR_PRODUCTS|LOOKUP_FAR_HISTORY,0,NULL,fp)){
ErrPostEx(SEV_WARNING,0,0,
"GenBank Format does not exist for this sequence ");
retval=FALSE;
@@ -1026,7 +1029,7 @@ static Boolean IdFetch_func(Int4 gi,CharPtr db, Int4 ent,Int2 maxplex)
case 4:
AssignIDsInEntity(0,OBJ_SEQENTRY,sep);
if(!SeqEntryToGnbk(sep,NULL,GENPEPT_FMT,ENTREZ_MODE,0,SHOW_CONTIG_FEATURES|ONLY_NEAR_FEATURES,
- LOOKUP_FAR_COMPONENTS|LOOKUP_FAR_LOCATIONS|LOOKUP_FAR_PRODUCTS|LOOKUP_FAR_HISTORY|STREAM_SEQ_PORT_FIRST,0,NULL,fp))
+ LOOKUP_FAR_COMPONENTS|LOOKUP_FAR_LOCATIONS|LOOKUP_FAR_PRODUCTS|LOOKUP_FAR_HISTORY,0,NULL,fp))
{
ErrPostEx(SEV_WARNING,0,0,
"GenPept Format does not exist for this sequence");
diff --git a/network/id2arch/id2.asn b/network/id2arch/id2.asn
index 739be22f..d44755f7 100644
--- a/network/id2arch/id2.asn
+++ b/network/id2arch/id2.asn
@@ -1,4 +1,4 @@
---$Revision: 1.11 $
+--$Revision: 1.12 $
--********************************************************************
--
-- Network Id server network access
@@ -279,7 +279,10 @@ ID2-Reply-Get-Seq-id ::= SEQUENCE {
-- resolved Seq-id
-- not set if error occurred
- seq-id SEQUENCE OF Seq-id OPTIONAL
+ seq-id SEQUENCE OF Seq-id OPTIONAL,
+
+ -- this Seq-id is the last one in the request
+ end-of-reply NULL OPTIONAL
}
@@ -297,7 +300,10 @@ ID2-Reply-Get-Blob-Id ::= SEQUENCE {
-- annotation types in this blob
-- annotation are unknown if this field is omitted
- annot-info SEQUENCE OF ID2S-Seq-annot-Info OPTIONAL
+ annot-info SEQUENCE OF ID2S-Seq-annot-Info OPTIONAL,
+
+ -- this Blob-id is the last one in the request
+ end-of-reply NULL OPTIONAL
}
diff --git a/network/id2arch/seqsplit.asn b/network/id2arch/seqsplit.asn
index 46932fe8..0b047a7e 100644
--- a/network/id2arch/seqsplit.asn
+++ b/network/id2arch/seqsplit.asn
@@ -1,4 +1,4 @@
---$Revision: 1.3 $
+--$Revision: 1.4 $
--********************************************************************
--
-- Network Id server network access
@@ -124,8 +124,14 @@ ID2S-Chunk-Data ::= SEQUENCE {
descrs SET OF Seq-descr OPTIONAL,
annots SET OF Seq-annot OPTIONAL,
assembly SET OF Seq-align OPTIONAL,
- seq-map SEQUENCE OF Seq-literal OPTIONAL,
- seq-data SEQUENCE OF Seq-literal OPTIONAL
+ seq-map SEQUENCE OF ID2S-Sequence-Piece OPTIONAL,
+ seq-data SEQUENCE OF ID2S-Sequence-Piece OPTIONAL
+}
+
+
+ID2S-Sequence-Piece ::= SEQUENCE {
+ start INTEGER, -- start position on sequence
+ data SEQUENCE OF Seq-literal
}
diff --git a/network/wwwblast/Src/test/run.pl b/network/wwwblast/Src/test/run.pl
new file mode 100755
index 00000000..ac2cf252
--- /dev/null
+++ b/network/wwwblast/Src/test/run.pl
@@ -0,0 +1,74 @@
+#! /usr/local/bin/perl
+
+if (not -e "Log") {
+ `mkdir Log`;
+}
+
+my $app = $ARGV[0];
+my $diff = 'diff -w';
+my $time = &GetTimeCmd();
+
+#my $oldbin = "/net/blast012/export/home/web/public/htdocs/BLAST/bl2seq/$app";
+my $oldbin = "../wblast2.old.REAL";
+chomp(my $basedir = `pwd`);
+my $newbin = "$basedir/$app";
+
+my $out = "out";
+
+if (not -e "$out") {
+ `mkdir $out`;
+}
+
+my %Tests;
+
+if ($app eq "wblast2.REAL") {
+ $Tests{'blastp'} = "\"ONE=129295&TWO=XP_222492.2&FILTER=1&PROGRAM=blastp\"";
+
+ $Tests{'blastn'} = "\"ONE=555&TWO=101&FILTER=1&PROGRAM=blastn\"";
+
+ $Tests{'megablast'} = "\"ONE=555&TWO=AC091728&FILTER=1&PROGRAM=blastn&MEGABLAST=yes&WORD=20\"";
+
+ $Tests{'tblastn'} = "\"ONE=9930103&TWO=9930102&FILTER=1&PROGRAM=tblastn\"";
+
+ $Tests{'blastx'} = "\"ONE=3090&TWO=3091&FILTER=1&PROGRAM=blastx\"";
+
+ $Tests{'tblastx'} = "\"ONE=555&TWO=101&PROGRAM=tblastx&FILTER=1&WORD=3\"";
+
+ $Tests{'blastn-minus'} = "\"ONE=NT_004487.15&TWO=AA441981.1&FROM=7685545&TO=7686027&FFROM=10&TTO=480&STRAND=2&FILTER=1&PROGRAM=blastn\"";
+
+ $Tests{'blastn-plus'} = "\"ONE=NT_004487.15&TWO=AA441981.1&FROM=7685545&TO=7686027&FFROM=10&TTO=480&STRAND=1&FILTER=1&PROGRAM=blastn\"";
+
+ $Tests{'fully-masked'} = "\"ONE=U09816&TWO=BX641126.1&FROM=1280&TO=1324&FFROM=2052&TTO=2082&STRAND=2&PROGRAM=blastn\"";
+} else {
+ if ($app eq "blast_cs.REAL") {
+
+
+ }
+}
+foreach $test (keys %Tests) {
+ print "\nTest ", $test, "";
+ print "\n----------------\n";
+ print "Parameters: $Tests{$test}\n\n";
+
+ foreach $binary_type (qw(New Old)) {
+
+ print "\t\"$binary_type\". Time: ";
+
+ if ($binary_type eq "New") {
+ $binary = $newbin;
+ } else {
+ $binary = $oldbin;
+ }
+ $rv = system("$time sh -c '$binary $Tests{$test}' > $out/$test.$binary_type.out 2> $out/$test.$binary_type.err");
+ $time_str = `tail -3 $out/$test.$binary_type.err | tr -s "\n" " "`;
+ chomp($time_str);
+ print $time_str, "\n";
+ }
+ `$diff $out/$test.Old.out $out/$test.New.out > $out/$test.diff`;
+}
+
+sub GetTimeCmd() {
+ return "/usr/bin/time -p" if (`uname` =~ /linux/i);
+ return "/usr/bin/time";
+}
+
diff --git a/network/wwwblast/Src/wblast2.c b/network/wwwblast/Src/wblast2.c
index 95fbbf20..61da2167 100644
--- a/network/wwwblast/Src/wblast2.c
+++ b/network/wwwblast/Src/wblast2.c
@@ -1,4 +1,4 @@
-/* $Id: wblast2.c,v 1.8 2003/12/19 18:12:37 coulouri Exp $
+/* $Id: wblast2.c,v 1.9 2004/05/14 17:25:07 dondosha Exp $
* ===========================================================================
*
* PUBLIC DOMAIN NOTICE
@@ -27,12 +27,15 @@
*
* Initial Creation Date: 10/23/2000
*
-* $Revision: 1.8 $
+* $Revision: 1.9 $
*
* File Description:
* BLAST 2 Sequences CGI program
*
* $Log: wblast2.c,v $
+* Revision 1.9 2004/05/14 17:25:07 dondosha
+* Allow use of new BLAST engine
+*
* Revision 1.8 2003/12/19 18:12:37 coulouri
* fix name collision in aix
*
@@ -192,10 +195,17 @@
#include <blastpat.h>
#ifndef BL2SEQ_STANDALONE
-#include <objBlobj.h>
#include <qblastnet.h>
#endif
+#if 1
+#define USE_NEW_BLAST
+#endif
+
+#ifdef USE_NEW_BLAST
+#include <algo/blast/api/twoseq_api.h>
+#endif
+
#define MY_BLOSUM62 0
#define MY_PAM30 1
#define MY_PAM70 2
@@ -1153,8 +1163,8 @@ static void PrintOutScore(SeqAlignPtr sap, Boolean is_aa, Int4Ptr PNTR matrix, V
{
Int4 number, score;
Nlm_FloatHi bit_score, evalue;
- CharPtr eval_buff_ptr;
- static Char eval_buff[10], bit_score_buff[10];
+ CharPtr eval_buff_ptr;
+ Char eval_buff[10], bit_score_buff[10];
AlignSumPtr asp;
Int2 percent_identical, percent_positive;
ValNodePtr bs_list;
@@ -1166,32 +1176,12 @@ static void PrintOutScore(SeqAlignPtr sap, Boolean is_aa, Int4Ptr PNTR matrix, V
printf("<pre>\n");
GetScoreAndEvalue(sap, &score, &bit_score, &evalue, &number);
-
+
+ /* Evalue buffer for printing may be shifted if a digit is knocked off
+ before e. */
eval_buff_ptr = eval_buff;
- if (evalue < 1.0e-180) {
- sprintf(eval_buff, "0.0");
- } else if (evalue < 1.0e-99) {
- sprintf(eval_buff, "%2.0le", evalue);
- eval_buff_ptr++; /* Knock off digit. */
- } else if (evalue < 0.0009) {
- sprintf(eval_buff, "%3.0le", evalue);
- } else if (evalue < 0.1) {
- sprintf(eval_buff, "%4.3lf", evalue);
- } else if (evalue < 1.0) {
- sprintf(eval_buff, "%3.2lf", evalue);
- } else if (evalue < 10.0) {
- sprintf(eval_buff, "%2.1lf", evalue);
- } else {
- sprintf(eval_buff, "%5.0lf", evalue);
- }
-
- if (bit_score > 9999) {
- sprintf(bit_score_buff, "%4.3le", bit_score);
- } else if (bit_score > 99.9) {
- sprintf(bit_score_buff, "%4.0ld", (long) bit_score);
- } else {
- sprintf(bit_score_buff, "%4.1lf", bit_score);
- }
+ ScoreAndEvalueToBuffers(bit_score, evalue, bit_score_buff,
+ &eval_buff_ptr, TRUE);
if (number == 1) {
printf("Score = %s bits (%ld), Expect = %s<BR>",
@@ -1997,6 +1987,57 @@ static void* ConnectionThreadRun(void *p)
}
#endif
+#ifdef USE_NEW_BLAST
+static void BLASTOptions2SummaryOptions(BLAST_OptionsBlk* options,
+ Char* progname,
+ BLAST_SummaryOptions* s_options)
+{
+ if (!options || !s_options)
+ return;
+
+ if (options->is_megablast_search)
+ s_options->hint = eFast;
+
+ if (!strcmp(progname, "blastn"))
+ s_options->program = eBlastn;
+ else if (!strcmp(progname, "blastp"))
+ s_options->program = eBlastp;
+ else if (!strcmp(progname, "blastx"))
+ s_options->program = eBlastx;
+ else if (!strcmp(progname, "tblastn"))
+ s_options->program = eTblastn;
+ else if (!strcmp(progname, "tblastx"))
+ s_options->program = eTblastx;
+ else
+ s_options->program = eChoose;
+
+ s_options->strand = options->strand_option;
+ s_options->cutoff_evalue = options->expect_value;
+ if (options->matrix)
+ s_options->matrix = strdup(options->matrix);
+
+
+ if (options->filter_string)
+ s_options->filter_string = strdup(options->filter_string);
+ else /* If filtering option not set, assume no filtering! */
+ s_options->filter_string = strdup("F");
+
+ s_options->word_size = options->wordsize;
+ s_options->gapped_calculation = options->gapped_calculation;
+ s_options->nucleotide_match = options->reward;
+ s_options->nucleotide_mismatch = options->penalty;
+ s_options->gap_open = options->gap_open;
+ s_options->gap_extend = options->gap_extend;
+ s_options->gap_x_dropoff = options->gap_x_dropoff;
+ s_options->db_length = options->db_length;
+ s_options->word_threshold = options->threshold_second;
+ /* If window size is set to 0, enforce single hit method for initial
+ words */
+ if (options->window_size == 0)
+ s_options->init_seed_method = eOneHit;
+}
+#endif
+
#define BL2SEQ_CPU_LIMIT 240
Int2 Main(void)
@@ -2009,7 +2050,7 @@ Int2 Main(void)
Int4 from=0, to=0, ffrom=0, tto=0;
Int2 wordsize, filter=0;
Int2 mtrx = 0, color=1;
- SeqAlignPtr seqalign, sap, sapnext;
+ SeqAlignPtr seqalign = NULL, sap, sapnext;
SeqAnnotPtr hsat= NULL, sat, satnext;
FloatHi expect;
Boolean is_prot=FALSE, is_aa1=FALSE, is_aa2=FALSE, is_na1=TRUE, is_na2=TRUE;
@@ -2019,8 +2060,8 @@ Int2 Main(void)
BLAST_OptionsBlkPtr options = NULL;
Int4 ll, len1, len2, txoption;
SeqIdPtr sip;
- Int4Ptr PNTR txmatrix;
- BLAST_MatrixPtr blast_matrix;
+ Int4Ptr PNTR txmatrix = NULL;
+ BLAST_MatrixPtr blast_matrix = NULL;
SeqPortPtr spp;
Uint1 code1, code2;
ValNodePtr vnp, error_return=NULL;
@@ -2037,7 +2078,7 @@ Int2 Main(void)
Uint1 align_type;
ValNodePtr other_returns, mask, mask_head;
- CharPtr buffer;
+ CharPtr buffer = NULL;
BLAST_KarlinBlkPtr ka_params=NULL, ka_gap_params=NULL;
TxDfDbInfoPtr dbinfo = NULL;
BlastTimeKeeper time_keeper;
@@ -2059,6 +2100,11 @@ Int2 Main(void)
int pid, time_start, results_size = 0;
#endif
+#ifdef USE_NEW_BLAST
+ BLAST_SummaryOptions* s_options = NULL;
+ BLAST_SummaryReturn* s_return = NULL;
+#endif
+
#ifdef RLIMIT_CPU
struct rlimit rl;
@@ -2134,6 +2180,12 @@ Int2 Main(void)
progname = theInfo->program;
+ /* For tblastx, we still do a protein search, even though both sequences
+ are nucleotide. */
+ if (!(StringICmp(progname, "tblastx")))
+ is_prot = TRUE;
+
+
if ((chptr = WWWGetValueByName(theInfo->info, "MEGABLAST")) != NULL)
is_megablast = TRUE;
options = BLASTOptionNewEx(progname, TRUE, is_megablast);
@@ -2545,9 +2597,14 @@ Int2 Main(void)
logmsg(0);
#endif
+#ifndef USE_NEW_BLAST
seqalign = BlastTwoSequencesByLocEx(slp1, slp2, progname,
options, &other_returns, NULL);
-
+#else
+ BLAST_SummaryOptionsInit(&s_options);
+ BLASTOptions2SummaryOptions(options, progname, s_options);
+ BLAST_TwoSeqLocSets(s_options, slp1, slp2, &seqalign, &mask, &s_return);
+#endif
run_status = WBLAST2_FORMAT;
/* seqalign = BlastTwoSequencesEx(query_bsp, subject_bsp, progname,
options, &other_returns, NULL);*/
@@ -2576,6 +2633,7 @@ Int2 Main(void)
return 0;
}
+#ifndef USE_NEW_BLAST
mask = NULL;
for (vnp=other_returns; vnp; vnp = vnp->next) {
switch (vnp->choice) {
@@ -2611,6 +2669,7 @@ Int2 Main(void)
}
ValNodeFree(other_returns);
+#endif
to = (to >0) ? to : fake_bsp->length;
tto = (tto >0) ? tto : subject_bsp->length;
@@ -2964,6 +3023,7 @@ Int2 Main(void)
if (txmatrix)
txmatrix = TxMatrixDestruct(txmatrix);
dbinfo = TxDfDbInfoDestruct(dbinfo);
+#ifndef USE_NEW_BLAST
if (ka_params) {
PrintKAParameters(ka_params->Lambda, ka_params->K, ka_params->H,
70, stdout, FALSE);
@@ -2983,6 +3043,19 @@ Int2 Main(void)
mask = mask->next;
}
ValNodeFree(mask_head);
+#else
+ if (s_return->ka_params) {
+ PrintKAParameters(s_return->ka_params->Lambda, s_return->ka_params->K,
+ s_return->ka_params->H, 70, stdout, FALSE);
+ }
+ if (s_return->ka_params_gap) {
+ PrintKAParameters(s_return->ka_params_gap->Lambda,
+ s_return->ka_params_gap->K,
+ s_return->ka_params_gap->H, 70, stdout, TRUE);
+ }
+ PrintTildeSepLines(s_return->params_buffer, 70, stdout);
+
+#endif
CreateTailHTML();
run_status = WBLAST2_DONE;
options = BLASTOptionDelete(options);
diff --git a/object/objalign.c b/object/objalign.c
index ecbb9ede..a1554092 100644
--- a/object/objalign.c
+++ b/object/objalign.c
@@ -29,7 +29,7 @@
*
* Version Creation Date: 4/1/91
*
-* $Revision: 6.10 $
+* $Revision: 6.11 $
*
* File Description: Object manager for module NCBI-Seqalign
*
@@ -628,6 +628,7 @@ ret:
AsnUnlinkType(orig); /* unlink local tree */
return sap;
erret:
+ aip->io_failure = TRUE;
sap = SeqAlignFree(sap);
goto ret;
}
diff --git a/object/objalignloc.c b/object/objalignloc.c
index 6bb43f81..768a312b 100644
--- a/object/objalignloc.c
+++ b/object/objalignloc.c
@@ -64,7 +64,6 @@ NLM_EXTERN
AlignLocSetPtr LIBCALL
AlignLocSetAsnRead(AsnIoPtr aip, AsnTypePtr orig)
{
- DataVal av;
AsnTypePtr atp;
Boolean isError = FALSE;
AsnReadFunc func;
@@ -120,7 +119,6 @@ erret:
NLM_EXTERN Boolean LIBCALL
AlignLocSetAsnWrite(AlignLocSetPtr ptr, AsnIoPtr aip, AsnTypePtr orig)
{
- DataVal av;
AsnTypePtr atp;
Boolean retval = FALSE;
@@ -399,7 +397,6 @@ erret:
NLM_EXTERN Boolean LIBCALL
AlignLocAsnWrite(AlignLocPtr ptr, AsnIoPtr aip, AsnTypePtr orig)
{
- DataVal av;
AsnTypePtr atp;
Boolean retval = FALSE;
diff --git a/object/objfeat.c b/object/objfeat.c
index d31808c7..e1177b59 100644
--- a/object/objfeat.c
+++ b/object/objfeat.c
@@ -29,7 +29,7 @@
*
* Version Creation Date: 4/1/91
*
-* $Revision: 6.22 $
+* $Revision: 6.23 $
*
* File Description: Object manager for module NCBI-SeqFeat
*
@@ -512,6 +512,7 @@ ret:
AsnUnlinkType(orig); /* unlink local tree */
return sfp;
erret:
+ aip->io_failure = TRUE;
sfp = SeqFeatFree(sfp);
goto ret;
}
diff --git a/object/objgbseq.c b/object/objgbseq.c
index 49d70365..a663ddfa 100644
--- a/object/objgbseq.c
+++ b/object/objgbseq.c
@@ -1304,7 +1304,6 @@ NLM_EXTERN
GBSetPtr LIBCALL
GBSetAsnRead(AsnIoPtr aip, AsnTypePtr orig)
{
- DataVal av;
AsnTypePtr atp;
Boolean isError = FALSE;
AsnReadFunc func;
@@ -1360,7 +1359,6 @@ erret:
NLM_EXTERN Boolean LIBCALL
GBSetAsnWrite(GBSetPtr ptr, AsnIoPtr aip, AsnTypePtr orig)
{
- DataVal av;
AsnTypePtr atp;
Boolean retval = FALSE;
diff --git a/object/objinsdseq.c b/object/objinsdseq.c
index c441c26e..0c6d1113 100644
--- a/object/objinsdseq.c
+++ b/object/objinsdseq.c
@@ -31,7 +31,7 @@ objinsdseqAsnLoad(void)
/**************************************************
* Generated object loaders for Module INSD-INSDSeq
-* Generated using ASNCODE Revision: 6.14 at Apr 28, 2004 5:15 PM
+* Generated using ASNCODE Revision: 6.14 at Jun 1, 2004 11:01 AM
*
**************************************************/
@@ -47,9 +47,6 @@ INSDSeqNew(void)
{
INSDSeqPtr ptr = MemNew((size_t) sizeof(INSDSeq));
- ptr -> strandedness = 0;
- ptr -> moltype = 0;
- ptr -> topology = 1;
return ptr;
}
@@ -69,6 +66,9 @@ INSDSeqFree(INSDSeqPtr ptr)
return NULL;
}
MemFree(ptr -> locus);
+ MemFree(ptr -> strandedness);
+ MemFree(ptr -> moltype);
+ MemFree(ptr -> topology);
MemFree(ptr -> division);
MemFree(ptr -> update_date);
MemFree(ptr -> create_date);
@@ -162,21 +162,21 @@ INSDSeqAsnRead(AsnIoPtr aip, AsnTypePtr orig)
if ( AsnReadVal(aip, atp, &av) <= 0) {
goto erret;
}
- ptr -> strandedness = av.intvalue;
+ ptr -> strandedness = av.ptrvalue;
atp = AsnReadId(aip,amp, atp);
}
if (atp == INSDSEQ_moltype) {
if ( AsnReadVal(aip, atp, &av) <= 0) {
goto erret;
}
- ptr -> moltype = av.intvalue;
+ ptr -> moltype = av.ptrvalue;
atp = AsnReadId(aip,amp, atp);
}
if (atp == INSDSEQ_topology) {
if ( AsnReadVal(aip, atp, &av) <= 0) {
goto erret;
}
- ptr -> topology = av.intvalue;
+ ptr -> topology = av.ptrvalue;
atp = AsnReadId(aip,amp, atp);
}
if (atp == INSDSEQ_division) {
@@ -404,12 +404,18 @@ INSDSeqAsnWrite(INSDSeqPtr ptr, AsnIoPtr aip, AsnTypePtr orig)
}
av.intvalue = ptr -> length;
retval = AsnWrite(aip, INSDSEQ_length, &av);
- av.intvalue = ptr -> strandedness;
- retval = AsnWrite(aip, INSDSEQ_strandedness, &av);
- av.intvalue = ptr -> moltype;
- retval = AsnWrite(aip, INSDSEQ_moltype, &av);
- av.intvalue = ptr -> topology;
- retval = AsnWrite(aip, INSDSEQ_topology, &av);
+ if (ptr -> strandedness != NULL) {
+ av.ptrvalue = ptr -> strandedness;
+ retval = AsnWrite(aip, INSDSEQ_strandedness, &av);
+ }
+ if (ptr -> moltype != NULL) {
+ av.ptrvalue = ptr -> moltype;
+ retval = AsnWrite(aip, INSDSEQ_moltype, &av);
+ }
+ if (ptr -> topology != NULL) {
+ av.ptrvalue = ptr -> topology;
+ retval = AsnWrite(aip, INSDSEQ_topology, &av);
+ }
if (ptr -> division != NULL) {
av.ptrvalue = ptr -> division;
retval = AsnWrite(aip, INSDSEQ_division, &av);
@@ -1304,7 +1310,6 @@ NLM_EXTERN
INSDSetPtr LIBCALL
INSDSetAsnRead(AsnIoPtr aip, AsnTypePtr orig)
{
- DataVal av;
AsnTypePtr atp;
Boolean isError = FALSE;
AsnReadFunc func;
@@ -1360,7 +1365,6 @@ erret:
NLM_EXTERN Boolean LIBCALL
INSDSetAsnWrite(INSDSetPtr ptr, AsnIoPtr aip, AsnTypePtr orig)
{
- DataVal av;
AsnTypePtr atp;
Boolean retval = FALSE;
diff --git a/object/objinsdseq.h b/object/objinsdseq.h
index c11a714f..ccecdb3e 100644
--- a/object/objinsdseq.h
+++ b/object/objinsdseq.h
@@ -17,7 +17,7 @@ extern "C" { /* } */
/**************************************************
*
* Generated objects for Module INSD-INSDSeq
-* Generated using ASNCODE Revision: 6.14 at Apr 28, 2004 5:15 PM
+* Generated using ASNCODE Revision: 6.14 at Jun 1, 2004 11:01 AM
*
**************************************************/
@@ -35,9 +35,9 @@ typedef struct struct_INSDSeq {
Uint4 OBbits__;
CharPtr locus;
Int4 length;
- Int4 strandedness;
- Int4 moltype;
- Int4 topology;
+ CharPtr strandedness;
+ CharPtr moltype;
+ CharPtr topology;
CharPtr division;
CharPtr update_date;
CharPtr create_date;
diff --git a/object/objloc.c b/object/objloc.c
index 8ccf564d..6fbe3297 100644
--- a/object/objloc.c
+++ b/object/objloc.c
@@ -29,7 +29,7 @@
*
* Version Creation Date: 4/1/91
*
-* $Revision: 6.7 $
+* $Revision: 6.8 $
*
* File Description: Object manager for module NCBI-Seqloc
*
@@ -41,6 +41,9 @@
*
*
* $Log: objloc.c,v $
+* Revision 6.8 2004/05/12 20:41:56 kans
+* set aip->io_failure in several erret blocks for compatibility of old object loaders with new ones
+*
* Revision 6.7 2004/04/01 13:43:08 lavr
* Spell "occurred", "occurrence", and "occurring"
*
@@ -802,6 +805,7 @@ ret:
AsnUnlinkType(elementtype);
return first;
erret:
+ aip->io_failure = TRUE;
first = SeqIdSetFree(first);
goto ret;
}
@@ -1953,6 +1957,7 @@ ret:
AsnUnlinkType(element);
return first;
erret:
+ aip->io_failure = TRUE;
first = SeqLocSetFree(first);
goto ret;
}
diff --git a/object/objproj.c b/object/objproj.c
index b1da988f..077444d6 100644
--- a/object/objproj.c
+++ b/object/objproj.c
@@ -188,7 +188,6 @@ erret:
NLM_EXTERN Boolean LIBCALL
ProjectAsnWrite(ProjectPtr ptr, AsnIoPtr aip, AsnTypePtr orig)
{
- DataVal av;
AsnTypePtr atp;
Boolean retval = FALSE;
diff --git a/object/objres.c b/object/objres.c
index 872351e1..cbe36704 100644
--- a/object/objres.c
+++ b/object/objres.c
@@ -29,7 +29,7 @@
*
* Version Creation Date: 4/1/91
*
-* $Revision: 6.5 $
+* $Revision: 6.6 $
*
* File Description: Object manager for module NCBI-Seqres
*
@@ -41,6 +41,9 @@
*
*
* $Log: objres.c,v $
+* Revision 6.6 2004/05/12 20:41:56 kans
+* set aip->io_failure in several erret blocks for compatibility of old object loaders with new ones
+*
* Revision 6.5 2004/04/01 13:43:08 lavr
* Spell "occurred", "occurrence", and "occurring"
*
@@ -498,6 +501,7 @@ ret:
AsnUnlinkType(orig); /* unlink local tree */
return sgp;
erret:
+ aip->io_failure = TRUE;
sgp = SeqGraphFree(sgp);
goto ret;
}
diff --git a/object/objseq.c b/object/objseq.c
index b819fe41..da51a728 100644
--- a/object/objseq.c
+++ b/object/objseq.c
@@ -29,7 +29,7 @@
*
* Version Creation Date: 4/1/91
*
-* $Revision: 6.18 $
+* $Revision: 6.19 $
*
* File Description: Object manager for module NCBI-Seq
*
@@ -888,6 +888,7 @@ ret:
AsnUnlinkType(orig); /* unlink local tree */
return bsp;
erret:
+ aip->io_failure = TRUE;
bsp = BioseqFree(bsp);
goto ret;
}
@@ -1667,6 +1668,7 @@ ret:
AsnUnlinkType(orig); /* unlink local tree */
return first;
erret:
+ aip->io_failure = TRUE;
first = SeqDescrFree(first);
goto ret;
}
@@ -1852,6 +1854,7 @@ ret:
AsnUnlinkType(orig); /* unlink local tree */
return anp;
erret:
+ aip->io_failure = TRUE;
anp = SeqDescFree(anp);
goto ret;
}
@@ -2980,6 +2983,7 @@ ret:
AsnUnlinkType(orig); /* unlink local tree */
return sap;
erret:
+ aip->io_failure = TRUE;
sap = SeqAnnotFree(sap);
goto ret;
}
diff --git a/object/objsset.c b/object/objsset.c
index b541cef2..4e62711a 100644
--- a/object/objsset.c
+++ b/object/objsset.c
@@ -29,7 +29,7 @@
*
* Version Creation Date: 4/1/91
*
-* $Revision: 6.7 $
+* $Revision: 6.8 $
*
* File Description: Object manager for module NCBI-Seqset
*
@@ -41,6 +41,9 @@
*
*
* $Log: objsset.c,v $
+* Revision 6.8 2004/05/12 20:41:57 kans
+* set aip->io_failure in several erret blocks for compatibility of old object loaders with new ones
+*
* Revision 6.7 2004/04/01 13:43:08 lavr
* Spell "occurred", "occurrence", and "occurring"
*
@@ -848,6 +851,7 @@ ret:
AsnUnlinkType(orig); /* unlink local tree */
return bsp;
erret:
+ aip->io_failure = TRUE;
bsp = BioseqSetFree(bsp);
goto ret;
}
@@ -1008,6 +1012,7 @@ ret:
AsnUnlinkType(orig); /* unlink local tree */
return sep;
erret:
+ aip->io_failure = TRUE;
sep = SeqEntryFree(sep);
goto ret;
}
diff --git a/object/objsub.c b/object/objsub.c
index 3472fb81..43411198 100644
--- a/object/objsub.c
+++ b/object/objsub.c
@@ -29,7 +29,7 @@
*
* Version Creation Date: 1/1/91
*
-* $Revision: 6.2 $
+* $Revision: 6.3 $
*
* File Description: Object manager for module NCBI-Submit
*
@@ -41,6 +41,9 @@
*
*
* $Log: objsub.c,v $
+* Revision 6.3 2004/05/12 20:41:57 kans
+* set aip->io_failure in several erret blocks for compatibility of old object loaders with new ones
+*
* Revision 6.2 2004/04/01 13:43:08 lavr
* Spell "occurred", "occurrence", and "occurring"
*
@@ -606,6 +609,7 @@ ret:
AsnUnlinkType(orig); /* unlink local tree */
return ssp;
erret:
+ aip->io_failure = TRUE;
ssp = SeqSubmitFree(ssp);
goto ret;
}
@@ -801,6 +805,7 @@ ret:
AsnUnlinkType(orig); /* unlink local tree */
return sbp;
erret:
+ aip->io_failure = TRUE;
sbp = SubmitBlockFree(sbp);
goto ret;
}
diff --git a/object/objtseq.c b/object/objtseq.c
index 5eaa5c32..e5f8f4b2 100644
--- a/object/objtseq.c
+++ b/object/objtseq.c
@@ -318,7 +318,6 @@ NLM_EXTERN
TSeqSetPtr LIBCALL
TSeqSetAsnRead(AsnIoPtr aip, AsnTypePtr orig)
{
- DataVal av;
AsnTypePtr atp;
Boolean isError = FALSE;
AsnReadFunc func;
@@ -374,7 +373,6 @@ erret:
NLM_EXTERN Boolean LIBCALL
TSeqSetAsnWrite(TSeqSetPtr ptr, AsnIoPtr aip, AsnTypePtr orig)
{
- DataVal av;
AsnTypePtr atp;
Boolean retval = FALSE;
diff --git a/platform/linux-alpha.ncbi.mk b/platform/linux-alpha.ncbi.mk
new file mode 100644
index 00000000..c695d340
--- /dev/null
+++ b/platform/linux-alpha.ncbi.mk
@@ -0,0 +1,51 @@
+#
+# $Id: linux-alpha.ncbi.mk,v 1.1 2004/05/13 17:10:26 ucko Exp $
+#
+NCBI_DEFAULT_LCL = lnx
+NCBI_MAKE_SHELL = /bin/sh
+#warning, the flags -D__USE_FILE_OFFSET64 -D__USE_LARGEFILE64 will allow
+#you to work with large (>4Gb) files only if you have glibc version >= 2.1
+#NCBI_CC = gcc -pipe -D__USE_FILE_OFFSET64 -D__USE_LARGEFILE64
+#it appears the flags above do not work anymore with newer libc,
+#the new flags should work. Dima. 08/23/01
+NCBI_AR=ar
+NCBI_CC = gcc -pipe -D_FILE_OFFSET_BITS=64 -D_LARGEFILE64_SOURCE
+NCBI_CFLAGS1 = -c
+NCBI_LDFLAGS1 = -O3 -mieee
+NCBI_OPTFLAG = -O3 -mieee
+NCBI_BIN_MASTER = /home/coremake/ncbi/bin
+NCBI_BIN_COPY = /home/coremake/ncbi/bin
+NCBI_INCDIR = /home/coremake/ncbi/include
+NCBI_LIBDIR = /home/coremake/ncbi/lib
+NCBI_ALTLIB = /home/coremake/ncbi/altlib
+#will work only when you have Motif installed!
+NCBI_VIBFLAG = -I/usr/X11R6/include -L/usr/X11R6/lib -DWIN_MOTIF
+NCBI_VIBLIBS = -lXmu -lXm -lXt -lSM -lICE -lXext -lXp -lX11 -ldl
+#warning! If you have only dynamic version of Motif or Lesstif
+#you should delete -Wl,-Bstatic sentence from the next line:
+NCBI_DISTVIBLIBS = -L/usr/X11R6/lib -lXmu -lXm -lXt -lSM -lICE -lXext -lXp -lX11 -ldl
+NCBI_OTHERLIBS = -lm
+NCBI_RANLIB = ranlib
+# Used by makedis.csh
+NCBI_MT_OTHERLIBS = -lpthread
+NCBI_OTHERLIBS_MT = $(NCBI_MT_OTHERLIBS) -lm
+NCBI_THREAD_OBJ = ncbithr.o
+NETENTREZVERSION = 2.02c2ASN1SPEC6
+
+# uncomment OPENGL_TARGETS to build OpenGL apps; do not change
+# OPENGL_NCBI_LIBS! However, may need to set
+# OPENGL_INCLUDE and OPENGL_LIBS to suit local environment
+# OPENGL_TARGETS = Cn3D
+OPENGL_NCBI_LIBS = LIB400=libvibrantOGL.a LIB3000=libncbicn3dOGL.a
+OPENGL_INCLUDE = -I/usr/X11R6/include
+OPENGL_LIBS = -L/usr/X11R6/lib -lGL -lGLU
+NCBI_OGLLIBS = -L/usr/X11R6/lib -lGL -lGLU
+
+# uncomment (and change appropriately) these lines to build PNG
+# output support into Cn3D (OpenGL version only)
+#LIBPNG_DIR = /home/paul/Programs/libpng
+#ZLIB_DIR = /home/paul/Programs/zlib
+
+NCBI_LBSM_SRC = ncbi_lbsmd_stub.c
+NCBI_LBSM_OBJ = ncbi_lbsmd_stub.o
+
diff --git a/sequin/sequin.h b/sequin/sequin.h
index 5458109f..bed4357a 100644
--- a/sequin/sequin.h
+++ b/sequin/sequin.h
@@ -29,7 +29,7 @@
*
* Version Creation Date: 1/22/95
*
-* $Revision: 6.134 $
+* $Revision: 6.138 $
*
* File Description:
*
@@ -228,6 +228,8 @@ extern void ConsolidateOrganismNotes (IteM i);
extern void ConsolidateLikeModifiersWithSemicolons (IteM i);
extern void ConsolidateLikeModifiersWithoutSemicolons (IteM i);
+extern void CountryLookup (IteM i);
+
extern void ExtendPartialFeatures (IteM i);
extern void TrimOrganismName (IteM i);
@@ -485,6 +487,8 @@ extern void SetupEditSecondary (MenU m, BaseFormPtr bfp);
extern void EditLocusProc (IteM i);
extern void ConvertToLocalProc (IteM i);
+extern ValNodePtr BuildDescriptorValNodeList (void);
+
extern void RemoveFeature (IteM i);
extern void RemoveDescriptor (IteM i);
@@ -499,6 +503,8 @@ extern void MakeExonsFromMRNAIntervals (IteM i);
extern Int2 LIBCALLBACK CreateDeleteByTextWindow (Pointer data);
extern Int2 LIBCALLBACK CreateSegregateByTextWindow (Pointer data);
+extern Int2 LIBCALLBACK CreateSegregateByFeatureWindow (Pointer data);
+extern Int2 LIBCALLBACK CreateSegregateByDescriptorWindow (Pointer data);
extern Int2 LIBCALLBACK RemoveExtraneousSets (Pointer data);
extern void RemoveOrphanProteins (Uint2 entityID, SeqEntryPtr sep);
extern void ParseAsnOrFlatfileToAnywhere (IteM i);
@@ -552,6 +558,7 @@ extern void DownloadAndExtendProc (ButtoN b);
extern void UpdateSeqAfterDownload (BaseFormPtr bfp, BioseqPtr oldbsp, BioseqPtr newbsp);
extern void ExtendSeqAfterDownload (BaseFormPtr bfp, BioseqPtr oldbsp, BioseqPtr newbsp);
extern void NewUpdateSequence (IteM i);
+extern void NewUpdateSequenceNewBlast (IteM i);
extern void NewExtendSequence (IteM i);
extern void FastaNucDirectToSeqEdProc (IteM i);
@@ -562,6 +569,7 @@ extern void ParseAntiCodonsFromtRNAComment (IteM i);
extern void RemoveAlignment (IteM i);
extern void RemoveGraph (IteM i);
extern void RemoveProteins (IteM i);
+extern void RemoveProteinsAndRenormalize (IteM i);
extern void GlobalAddTranslExcept (IteM i);
diff --git a/sequin/sequin1.c b/sequin/sequin1.c
index d5f65dd5..a768978d 100644
--- a/sequin/sequin1.c
+++ b/sequin/sequin1.c
@@ -29,7 +29,7 @@
*
* Version Creation Date: 1/22/95
*
-* $Revision: 6.450 $
+* $Revision: 6.458 $
*
* File Description:
*
@@ -129,7 +129,7 @@ static char *time_of_compilation = "now";
#include <Gestalt.h>
#endif
-#define SEQ_APP_VER "5.22"
+#define SEQ_APP_VER "5.25"
#ifndef CODECENTER
static char* sequin_version_binary = "Sequin Indexer Services Version " SEQ_APP_VER " " __DATE__ " " __TIME__;
@@ -1099,7 +1099,7 @@ static void SmartResetProc (IteM i)
status = sm_usr_data->header->status;
sm_usr_data->header->status = SMStatClosed;
SMSendMsgToClient(sm_usr_data);
- sm_usr_data->header->status = status;
+ sm_usr_data->header->status = (SMStatusCode)status;
return;
}
}
@@ -3084,7 +3084,7 @@ static void PrintExtraErrorInstructions (FILE *fp, CharPtr message)
"\nMiddle gap characters are used to maintain the spacing "
"inside an alignment. These are not nucleotides and will "
"not appear as part of your sequence file.\n"
-"Missing characters are used to represent indeterminate/ambiguous "
+"Ambiguous/unknown characters are used to represent indeterminate/ambiguous "
"nucleotides. These will appear in your sequence file as 'n'.\n"
"Match characters are used to indicate positions where "
"sequences are identical to the first sequence. These will be "
@@ -3131,9 +3131,25 @@ static void WalkErrorList (TErrorInfoPtr list, FILE *fp)
}
+static Int4 CountNucleotides (CharPtr sequence)
+{
+ Int4 num = 0;
+ CharPtr cp;
+
+ if (sequence == NULL) return 0;
+ for (cp = sequence; *cp != 0; cp++)
+ {
+ if (*cp != '-')
+ {
+ num++;
+ }
+ }
+ return num;
+}
+
static void PrintAlignmentSummary (TAlignmentFilePtr afp, FILE *fp)
{
- Int4 index;
+ Int4 index;
if (fp == NULL) return;
@@ -3142,18 +3158,35 @@ static void PrintAlignmentSummary (TAlignmentFilePtr afp, FILE *fp)
} else {
fprintf (fp, "Found %d sequences\n", afp->num_sequences);
fprintf (fp, "Found %d organisms\n", afp->num_organisms);
- for (index = 0; index < afp->num_sequences; index++)
+ if (afp->num_sequences == afp->num_segments * afp->num_organisms)
{
- fprintf (fp, "\t%s\t", afp->ids [index]);
- if (index < afp->num_organisms) {
- fprintf (fp, "%s\n", afp->organisms [index]);
- } else {
- fprintf (fp, "No organism information\n");
- }
+ for (index = 0; index < afp->num_sequences; index++)
+ {
+ fprintf (fp, "\t%s\t%d nucleotides\t", afp->ids [index],
+ CountNucleotides (afp->sequences[index]));
+ if (index / afp->num_segments < afp->num_organisms) {
+ fprintf (fp, "%s\n", afp->organisms [index / afp->num_segments]);
+ } else {
+ fprintf (fp, "No organism information\n");
+ }
+ }
}
- while (index < afp->num_organisms) {
- fprintf (fp, "Unclaimed organism: %s\n", afp->organisms [index]);
- index++;
+ else
+ {
+ for (index = 0; index < afp->num_sequences; index++)
+ {
+ fprintf (fp, "\t%s\t%d nucleotides\t", afp->ids [index],
+ CountNucleotides (afp->sequences[index]));
+ if (index < afp->num_organisms) {
+ fprintf (fp, "%s\n", afp->organisms [index]);
+ } else {
+ fprintf (fp, "No organism information\n");
+ }
+ }
+ while (index < afp->num_organisms) {
+ fprintf (fp, "Unclaimed organism: %s\n", afp->organisms [index]);
+ index++;
+ }
}
}
}
@@ -3194,19 +3227,33 @@ typedef struct alphabetformdata {
static Boolean DoSequenceLengthsMatch (TAlignmentFilePtr afp)
{
- int seq_index;
- Int4 seq_len;
+ int seq_index;
+ int curr_seg;
+ Int4Ptr seq_len;
+ Boolean rval;
if (afp == NULL || afp->sequences == NULL || afp->num_sequences == 0) {
return TRUE;
}
- seq_len = StringLen (afp->sequences[0]);
- for (seq_index = 1; seq_index < afp->num_sequences; seq_index++) {
- if (StringLen (afp->sequences[seq_index]) != seq_len) {
- return FALSE;
+
+ seq_len = (Int4Ptr) MemNew (sizeof (Int4) * afp->num_segments);
+ if (seq_len == NULL) return FALSE;
+ for (seq_index = 0; seq_index < afp->num_segments; seq_index ++)
+ {
+ seq_len [seq_index] = StringLen (afp->sequences[seq_index]);
+ }
+
+ curr_seg = 0;
+ rval = TRUE;
+ for (seq_index = afp->num_segments; seq_index < afp->num_sequences && rval; seq_index++) {
+ if (StringLen (afp->sequences[seq_index]) != seq_len[curr_seg]) {
+ rval = FALSE;
}
+ curr_seg ++;
+ if (curr_seg >= afp->num_segments) curr_seg = 0;
}
- return TRUE;
+ MemFree (seq_len);
+ return rval;
}
extern SeqEntryPtr
@@ -3260,7 +3307,7 @@ SeqEntryFromAlignmentFile
if (afp != NULL) {
if (afp->num_organisms == 0 && no_org_err_msg != NULL) {
Message (MSG_ERROR, no_org_err_msg);
- } else if (afp->num_organisms != 0 && afp->num_organisms != afp->num_sequences) {
+ } else if (afp->num_organisms != 0 && afp->num_organisms != afp->num_sequences && afp->num_organisms * afp->num_segments != afp->num_sequences) {
Message (MSG_ERROR, "Number of organisms must match number of sequences!");
} else {
ans = ANS_YES;
@@ -3405,16 +3452,16 @@ static void BuildGetAlphabetDialog (IteM i)
SetGroupSpacing (h, 10, 10);
g = HiddenGroup (h, 2, 4, NULL);
- StaticPrompt (g, "Missing", 0, dialogTextHeight, programFont, 'c');
- afp->missing = DialogText (g, "?", 5, NULL);
+ StaticPrompt (g, "Ambiguous/Unknown", 0, dialogTextHeight, programFont, 'c');
+ afp->missing = DialogText (g, "?Nn", 5, NULL);
StaticPrompt (g, "Match", 0, dialogTextHeight, programFont, 'c');
afp->match = DialogText (g, ".", 5, NULL);
StaticPrompt (g, "Beginning Gap", 0, dialogTextHeight, programFont, 'c');
- afp->beginning_gap = DialogText (g, "-.nN", 5, NULL);
+ afp->beginning_gap = DialogText (g, "-.?nN", 5, NULL);
StaticPrompt (g, "Middle Gap", 0, dialogTextHeight, programFont, 'c');
- afp->middle_gap = DialogText (g, "-nN", 5, NULL);
+ afp->middle_gap = DialogText (g, "-", 5, NULL);
StaticPrompt (g, "End Gap", 0, dialogTextHeight, programFont, 'c');
- afp->end_gap = DialogText (g, "-?", 5, NULL);
+ afp->end_gap = DialogText (g, "-.?nN", 5, NULL);
StaticPrompt (g, "Sequence Type", 0, dialogTextHeight, programFont, 'c');
afp->sequence_type = PopupList (g, TRUE, NULL);
PopupItem (afp->sequence_type, "Nucleotide");
@@ -4356,8 +4403,6 @@ static void CloseProc (BaseFormPtr bfp)
OMUserDataPtr omudp;
ObjMgrDataPtr tmp;
#ifdef USE_SMARTNET
- ObjMgrDataPtr PNTR omdp;
- int fd;
SMUserDataPtr sm_usr_data = NULL;
#endif
@@ -9801,15 +9846,20 @@ static void s_GetTpaInfo (SequencesFormPtr sqfp)
Update ();
}
+static CharPtr tpaString = NULL;
+
static void FinishPuttingTogether (ForM f)
{
- BaseFormPtr bfp;
- BioseqSetPtr bssp;
- Uint2 entityID = 0;
- Int2 handled;
- SeqEntryPtr sep = NULL;
+ BaseFormPtr bfp;
+ BioseqSetPtr bssp;
+ Uint2 entityID = 0;
+ Int2 handled;
+ ObjMgrDataPtr omdp;
+ SubmitBlockPtr sbp;
+ SeqEntryPtr sep = NULL;
SequencesFormPtr sqfp;
+ SeqSubmitPtr ssp;
bfp = (BaseFormPtr) GetObjectExtra (f);
if (bfp != NULL) {
@@ -9821,6 +9871,22 @@ static void FinishPuttingTogether (ForM f)
}
/*#endif*/
entityID = PackageFormResults (globalsbp, sep, TRUE);
+ sqfp = (SequencesFormPtr) bfp;
+ if (SEQ_TPA_SUBMISSION == sqfp->submType && entityID > 0) {
+ omdp = ObjMgrGetData (entityID);
+ if (omdp != NULL && omdp->datatype == OBJ_SEQSUB) {
+ ssp = (SeqSubmitPtr) omdp->dataptr;
+ if (ssp != NULL && ssp->datatype == 1) {
+ sbp = ssp->sub;
+ if (sbp != NULL) {
+ if (sbp->comment == NULL && StringDoesHaveText (tpaString)) {
+ sbp->comment = tpaString;
+ tpaString = NULL;
+ }
+ }
+ }
+ }
+ }
globalsbp = NULL;
WatchCursor ();
seqviewprocs.forceSeparateViewer = TRUE;
@@ -9931,24 +9997,13 @@ static void BackToFormat (ButtoN b)
}
}
-static void GetOrgAndSeq (ButtoN b)
+static void FinishOrgAndSeq (void)
{
- FormatBlockPtr fbp;
- MonitorPtr mon;
- ForM w;
+ MonitorPtr mon;
+ ForM w;
WatchCursor ();
- Hide (formatForm);
- fbp = (FormatBlockPtr) FormToPointer (formatForm);
- if (fbp != NULL) {
- globalFormatBlock.seqPackage = fbp->seqPackage;
- globalFormatBlock.seqFormat = fbp->seqFormat;
- globalFormatBlock.numSeqs = fbp->numSeqs;
- globalFormatBlock.submType = fbp->submType;
- }
- MemFree (fbp);
- WatchCursor ();
mon = MonitorStrNewEx ("Sequin New Submission", 30, FALSE);
MonitorStrValue (mon, "Creating Sequences Form");
Update ();
@@ -9970,6 +10025,27 @@ static void GetOrgAndSeq (ButtoN b)
Update ();
}
+static void BackToSubmitter (ButtoN b)
+
+{
+ MsgAnswer ans;
+
+ ans = Message (MSG_OKC, "Are you sure? Format information will be lost.");
+ if (ans == ANS_CANCEL) return;
+ Hide (formatForm);
+ Update ();
+ PointerToForm (initSubmitForm, globalsbp);
+ globalsbp = SequinBlockFree (globalsbp);
+ Show (initSubmitForm);
+ Select (initSubmitForm);
+ SendHelpScrollMessage (helpForm, "Submitting Authors Form", NULL);
+ Update ();
+ globalFormatBlock.seqPackage = SEQ_PKG_SINGLE;
+ globalFormatBlock.seqFormat = SEQ_FMT_FASTA;
+ globalFormatBlock.numSeqs = 0;
+ globalFormatBlock.submType = SEQ_ORIG_SUBMISSION;
+}
+
static void GetFormat (ButtoN b)
{
@@ -9995,25 +10071,118 @@ static void GetFormat (ButtoN b)
Update ();
}
-static void BackToSubmitter (ButtoN b)
+static WindoW tpaWindow = NULL;
+static TexT tpaText = NULL;
+static ButtoN tpaNext = NULL;
+/* tpaString defined above FinishPuttingTogether */
+
+static void TpaPrev (ButtoN b)
{
- MsgAnswer ans;
+ Hide (tpaWindow);
+ tpaString = MemFree (tpaString);
+ SetTitle (tpaText, "");
+ Show (formatForm);
+ Select (formatForm);
+ SendHelpScrollMessage (helpForm, "Sequence Format Form", NULL);
+ Update ();
+}
+
+static void TpaNext (ButtoN b)
+
+{
+ tpaString = MemFree (tpaString);
+ tpaString = SaveStringFromText (tpaText);
+ if (StringHasNoText (tpaString)) {
+ Message (MSG_OK, "The requested information is required in order for you to be able to proceed with a TPA submission");
+ return;
+ }
+ Hide (tpaWindow);
+ WatchCursor ();
+ FinishOrgAndSeq ();
+}
+
+static void TpaText (TexT t)
+
+{
+ if (TextHasNoText (t)) {
+ SafeDisable (tpaNext);
+ } else {
+ SafeEnable (tpaNext);
+ }
+}
+
+static CharPtr tpaMssg = "\
+Third party annotation records require a publication describing the biological \
+experiments used as evidence for the annotation. Please provide information \
+regarding the nature of these experiments.";
+
+static void DoTpaForm (void)
+
+{
+ GrouP c, h, p;
+
+ if (tpaWindow == NULL) {
+ tpaWindow = FixedWindow (-50, -33, -10, -10, "TPA Evidence", NULL);
+ h = HiddenGroup (tpaWindow, -1, 0, NULL);
+ SetGroupSpacing (h, 10, 10);
+
+ p = MultiLinePrompt (h, tpaMssg, 30 * stdCharWidth, programFont);
+
+ tpaText = ScrollText (h, 30, 5, programFont, TRUE, TpaText);
+
+ c = HiddenGroup (h, 2, 0, NULL);
+ PushButton (c, "<< Prev Form", TpaPrev);
+ tpaNext = PushButton (c, "Next Form >>", TpaNext);
+
+ AlignObjects (ALIGN_CENTER, (HANDLE) p, (HANDLE) tpaText, (HANDLE) c, NULL);
+
+ RealizeWindow (tpaWindow);
+ }
+ tpaString = MemFree (tpaString);
+ SafeSetTitle (tpaText, "");
+ SafeDisable (tpaNext);
+ Show (tpaWindow);
+ Select (tpaWindow);
+}
+
+static void GetOrgAndSeq (ButtoN b)
+
+{
+ /*
+ MsgAnswer ans;
+ */
+ FormatBlockPtr fbp;
+ Boolean is_tpa = FALSE;
- ans = Message (MSG_OKC, "Are you sure? Format information will be lost.");
- if (ans == ANS_CANCEL) return;
Hide (formatForm);
- Update ();
- PointerToForm (initSubmitForm, globalsbp);
- globalsbp = SequinBlockFree (globalsbp);
- Show (initSubmitForm);
- Select (initSubmitForm);
- SendHelpScrollMessage (helpForm, "Submitting Authors Form", NULL);
- Update ();
- globalFormatBlock.seqPackage = SEQ_PKG_SINGLE;
- globalFormatBlock.seqFormat = SEQ_FMT_FASTA;
- globalFormatBlock.numSeqs = 0;
- globalFormatBlock.submType = SEQ_ORIG_SUBMISSION;
+ fbp = (FormatBlockPtr) FormToPointer (formatForm);
+ if (fbp != NULL) {
+ globalFormatBlock.seqPackage = fbp->seqPackage;
+ globalFormatBlock.seqFormat = fbp->seqFormat;
+ globalFormatBlock.numSeqs = fbp->numSeqs;
+ globalFormatBlock.submType = fbp->submType;
+ is_tpa = (Boolean) (globalFormatBlock.submType == SEQ_TPA_SUBMISSION);
+ }
+ MemFree (fbp);
+ if (is_tpa) {
+ DoTpaForm ();
+ /*
+ ans = Message (MSG_YN, "%s", tpaMssg);
+ if (ans == ANS_YES) {
+ WatchCursor ();
+ FinishOrgAndSeq ();
+ } else {
+ Show (formatForm);
+ Select (formatForm);
+ SendHelpScrollMessage (helpForm, "Sequence Format Form", NULL);
+ Update ();
+ }
+ */
+ } else {
+ WatchCursor ();
+ FinishOrgAndSeq ();
+ }
}
static void BackToStartup (ButtoN b)
diff --git a/sequin/sequin10.c b/sequin/sequin10.c
index a27aa03d..36bc94d2 100644
--- a/sequin/sequin10.c
+++ b/sequin/sequin10.c
@@ -29,7 +29,7 @@
*
* Version Creation Date: 9/3/2003
*
-* $Revision: 1.213 $
+* $Revision: 1.217 $
*
* File Description:
*
@@ -69,16 +69,16 @@ static void ListClauses (
Boolean suppress_final_and
);
-static void LabelClauses (
- ValNodePtr clause_list,
- Uint1 biomol,
- BioseqPtr bsp
-);
+static void LabelClauses
+( ValNodePtr clause_list,
+ Uint1 biomol,
+ BioseqPtr bsp,
+ Boolean suppress_locus_tag);
-static CharPtr GetProductName (
- SeqFeatPtr cds,
- BioseqPtr bsp
-);
+static CharPtr GetProductName
+( SeqFeatPtr cds,
+ BioseqPtr bsp,
+ Boolean suppress_locus_tag);
#define DEFLINE_FEATLIST 1
#define DEFLINE_CLAUSEPLUS 2
@@ -112,8 +112,8 @@ typedef struct featureclause {
FeatureClausePtr NewFeatureClause (
SeqFeatPtr sfp,
- BioseqPtr bsp
-);
+ BioseqPtr bsp,
+ Boolean suppress_locus_tag);
static void PluralizeConsolidatedClauseDescription (
FeatureClausePtr fcp
@@ -126,7 +126,8 @@ typedef Boolean (LIBCALLBACK *ShouldRemoveFunction) (
BioseqPtr bsp,
Boolean isLonely,
Boolean isRequested,
- Boolean isSegment
+ Boolean isSegment,
+ Boolean suppress_locus_tag
);
/* This section of the code contains some functions for dealing with
@@ -344,6 +345,8 @@ static ModifierItemGlobalData DefLineModifiers[] = {
{ "Isolation-source" , FALSE, SUBSRC_isolation_source , FALSE },
{ "Lab-host" , FALSE, SUBSRC_lab_host , FALSE },
{ "Map" , FALSE, SUBSRC_map , FALSE },
+ { "Note-OrgMod" , TRUE, ORGMOD_other , FALSE },
+ { "Note-SubSrc" , FALSE, SUBSRC_other , FALSE },
{ "Old-lineage" , TRUE , ORGMOD_old_lineage , FALSE },
{ "Old-name" , TRUE , ORGMOD_old_name , FALSE },
{ "Pathovar" , TRUE , ORGMOD_pathovar , FALSE },
@@ -407,6 +410,8 @@ typedef enum {
DEFLINE_POS_Isolation_source,
DEFLINE_POS_Lab_host,
DEFLINE_POS_Map,
+ DEFLINE_POS_Note_orgmod,
+ DEFLINE_POS_Note_subsrc,
DEFLINE_POS_Old_lineage,
DEFLINE_POS_Old_name,
DEFLINE_POS_Pathovar,
@@ -2316,16 +2321,15 @@ static Boolean LIBCALLBACK IsGene (
return TRUE;
}
-static CharPtr GetGeneName (
-GeneRefPtr grp
-)
+static CharPtr GetGeneName (GeneRefPtr grp, Boolean suppress_locus_tag)
{
ValNodePtr syn;
if (grp == NULL) return NULL;
if (SeqMgrGeneIsSuppressed (grp)) return NULL;
if (StringDoesHaveText (grp->locus)) return grp->locus;
- if (StringDoesHaveText (grp->locus_tag)) return grp->locus_tag;
+ if (! suppress_locus_tag && StringDoesHaveText (grp->locus_tag))
+ return grp->locus_tag;
if (StringDoesHaveText (grp->desc)) return grp->desc;
for (syn = grp->syn; syn != NULL; syn = syn->next)
{
@@ -2335,9 +2339,7 @@ GeneRefPtr grp
return NULL;
}
-static CharPtr GetAlleleName (
-GeneRefPtr grp
-)
+static CharPtr GetAlleleName (GeneRefPtr grp, Boolean suppress_locus_tag)
{
size_t lenallele;
size_t lengenename;
@@ -2346,7 +2348,7 @@ GeneRefPtr grp
if (grp == NULL) return NULL;
if (StringHasNoText (grp->allele)) return NULL;
- gene_name = GetGeneName (grp);
+ gene_name = GetGeneName (grp, suppress_locus_tag);
if (StringHasNoText (gene_name)) return NULL;
lenallele = StringLen (grp->allele);
lengenename = StringLen (gene_name);
@@ -2378,20 +2380,20 @@ GeneRefPtr grp
/* This function compares the gene names and allele names of the gene
* to see if they match.
*/
-static Boolean DoGenesMatch (
- GeneRefPtr grp1,
- GeneRefPtr grp2
-)
+static Boolean DoGenesMatch
+(GeneRefPtr grp1,
+ GeneRefPtr grp2,
+ Boolean suppress_locus_tag)
{
CharPtr name1;
CharPtr name2;
- name1 = GetGeneName (grp1);
- name2 = GetGeneName (grp2);
+ name1 = GetGeneName (grp1, suppress_locus_tag);
+ name2 = GetGeneName (grp2, suppress_locus_tag);
if (StringCmp (name1, name2) != 0) return FALSE;
- name1 = GetAlleleName (grp1);
- name2 = GetAlleleName (grp2);
+ name1 = GetAlleleName (grp1, suppress_locus_tag);
+ name2 = GetAlleleName (grp2, suppress_locus_tag);
if ((name1 == NULL && name2 != NULL)
|| (name1 != NULL && name2 == NULL))
{
@@ -3710,8 +3712,8 @@ static void GroupAltSplicedExons (
*/
static void ExpandAltSplicedExons (
ValNodePtr clause_list,
- BioseqPtr bsp
-)
+ BioseqPtr bsp,
+ Boolean suppress_locus_tag)
{
ValNodePtr clause, rest_of_list, featlist, new_clause;
FeatureClausePtr fcp, new_fcp;
@@ -3744,7 +3746,7 @@ static void ExpandAltSplicedExons (
{
new_clause = ValNodeNew (clause);
if (new_clause == NULL) return;
- new_fcp = NewFeatureClause (featlist->data.ptrvalue, bsp);
+ new_fcp = NewFeatureClause (featlist->data.ptrvalue, bsp, suppress_locus_tag);
if (new_fcp == NULL) return;
new_fcp->grp = fcp->grp;
new_fcp->is_alt_spliced = fcp->is_alt_spliced;
@@ -3765,7 +3767,7 @@ static void ExpandAltSplicedExons (
}
else
{
- ExpandAltSplicedExons (fcp->featlist, bsp);
+ ExpandAltSplicedExons (fcp->featlist, bsp, suppress_locus_tag);
}
}
}
@@ -3777,11 +3779,11 @@ static void ExpandAltSplicedExons (
* than one clause, while other features should really only belong to
* one clause.
*/
-static Boolean AddGeneToClauses (
- SeqFeatPtr gene,
+static Boolean AddGeneToClauses
+( SeqFeatPtr gene,
CharPtr gene_productname,
- ValNodePtr clause_list
-)
+ ValNodePtr clause_list,
+ Boolean suppress_locus_tag)
{
ValNodePtr clause;
FeatureClausePtr fcp;
@@ -3817,7 +3819,7 @@ static Boolean AddGeneToClauses (
}
}
- if (fcp->grp != NULL && DoGenesMatch (fcp->grp, grp))
+ if (fcp->grp != NULL && DoGenesMatch (fcp->grp, grp, suppress_locus_tag))
{
used_gene = TRUE;
if (gene_productname != NULL
@@ -3849,9 +3851,7 @@ static Boolean AddGeneToClauses (
/* This function iterates through the list of features and calls
* AddGeneToClauses for each gene feature it finds.
*/
-static void GroupGenes (
- ValNodePtr PNTR clause_list
-)
+static void GroupGenes (ValNodePtr PNTR clause_list, Boolean suppress_locus_tag)
{
ValNodePtr vnp;
ValNodePtr featlist;
@@ -3870,7 +3870,7 @@ static void GroupGenes (
{
AddGeneToClauses (featlist->data.ptrvalue,
fcp->feature_label_data.productname,
- vnp->next);
+ vnp->next, suppress_locus_tag);
}
}
}
@@ -3883,11 +3883,11 @@ static void GroupGenes (
* mRNA can apply to more than one clause, while other features should
* really only belong to one clause.
*/
-static Boolean AddmRNAToClauses (
- SeqFeatPtr mRNA,
+static Boolean AddmRNAToClauses
+( SeqFeatPtr mRNA,
ValNodePtr clause_list,
- BioseqPtr bsp
-)
+ BioseqPtr bsp,
+ Boolean suppress_locus_tag)
{
ValNodePtr clause;
FeatureClausePtr fcp;
@@ -3900,7 +3900,7 @@ static Boolean AddmRNAToClauses (
if (clause_list == NULL) return FALSE;
used_mRNA = FALSE;
- productname = GetProductName (mRNA, bsp);
+ productname = GetProductName (mRNA, bsp, suppress_locus_tag);
if (productname == NULL) return TRUE;
for (clause = clause_list; clause != NULL; clause = clause->next)
@@ -3953,7 +3953,8 @@ static Boolean AddmRNAToClauses (
*/
static void GroupmRNAs (
ValNodePtr PNTR clause_list,
- BioseqPtr bsp
+ BioseqPtr bsp,
+ Boolean suppress_locus_tag
)
{
ValNodePtr vnp;
@@ -3971,7 +3972,7 @@ static void GroupmRNAs (
&& featlist->choice == DEFLINE_FEATLIST
&& IsmRNA (featlist->data.ptrvalue))
{
- if (AddmRNAToClauses (featlist->data.ptrvalue, *clause_list, bsp))
+ if (AddmRNAToClauses (featlist->data.ptrvalue, *clause_list, bsp, suppress_locus_tag))
{
fcp->delete_me = TRUE;
}
@@ -4130,10 +4131,10 @@ static CharPtr GetFeatureTypeWord (
* If none of the above conditions apply, the sequence indexing context label
* will be used to obtain the product name for the feature.
*/
-static CharPtr GetProductName (
- SeqFeatPtr cds,
- BioseqPtr bsp
-)
+static CharPtr GetProductName
+( SeqFeatPtr cds,
+ BioseqPtr bsp,
+ Boolean suppress_locus_tag)
{
CharPtr protein_name;
CharPtr semicolon;
@@ -4172,7 +4173,7 @@ static CharPtr GetProductName (
{
grp = (GeneRefPtr) cds->data.value.ptrvalue;
if (grp == NULL) return NULL;
- gene_name = GetGeneName (grp);
+ gene_name = GetGeneName (grp, suppress_locus_tag);
if (grp->desc != NULL
&& StringCmp (grp->desc, gene_name) != 0)
{
@@ -4222,8 +4223,8 @@ static CharPtr GetProductName (
static FeatureClausePtr FindProductInFeatureList (
FeatureClausePtr fcp,
ValNodePtr clause_list,
- matchFunction itemmatch
-)
+ matchFunction itemmatch,
+ Boolean suppress_locus_tag)
{
ValNodePtr vnp;
FeatureClausePtr vnp_fcp;
@@ -4233,7 +4234,7 @@ static FeatureClausePtr FindProductInFeatureList (
if (vnp->choice == DEFLINE_CLAUSEPLUS && vnp->data.ptrvalue != NULL)
{
vnp_fcp = vnp->data.ptrvalue;
- if (DoGenesMatch (vnp_fcp->grp, fcp->grp)
+ if (DoGenesMatch (vnp_fcp->grp, fcp->grp, suppress_locus_tag)
&& vnp_fcp->featlist != NULL
&& vnp_fcp->featlist->choice == DEFLINE_FEATLIST
&& itemmatch (vnp_fcp->featlist->data.ptrvalue))
@@ -4242,7 +4243,8 @@ static FeatureClausePtr FindProductInFeatureList (
}
else
{
- vnp_fcp = FindProductInFeatureList (fcp, vnp_fcp->featlist, itemmatch);
+ vnp_fcp = FindProductInFeatureList (fcp, vnp_fcp->featlist,
+ itemmatch, suppress_locus_tag);
if (vnp_fcp != NULL) return vnp_fcp;
}
}
@@ -4258,10 +4260,10 @@ static FeatureClausePtr FindProductInFeatureList (
* If there is a gene and a product, the description will be the name of
* the product followed by the name of the gene in parentheses.
*/
-static CharPtr GetGeneProtDescription (
- FeatureClausePtr fcp,
- BioseqPtr bsp
-)
+static CharPtr GetGeneProtDescription
+( FeatureClausePtr fcp,
+ BioseqPtr bsp,
+ Boolean suppress_locus_tag)
{
SeqFeatPtr sfp;
CharPtr protein_name;
@@ -4285,7 +4287,7 @@ static CharPtr GetGeneProtDescription (
}
else
{
- protein_name = GetProductName (sfp, bsp);
+ protein_name = GetProductName (sfp, bsp, suppress_locus_tag);
if (protein_name == NULL && IsGene (sfp))
{
@@ -4296,7 +4298,7 @@ static CharPtr GetGeneProtDescription (
description_length += StringLen (protein_name);
}
- gene_name = GetGeneName (fcp->grp);
+ gene_name = GetGeneName (fcp->grp, suppress_locus_tag);
if (gene_name != NULL)
{
description_length += StringLen (gene_name);
@@ -4339,10 +4341,10 @@ static matchFunction productfeatures[] = {
/* This function finds gene features without products and looks for
* features that might provide products for them.
*/
-static void FindGeneProducts (
- ValNodePtr clause_list,
- BioseqPtr bsp
-)
+static void FindGeneProducts
+( ValNodePtr clause_list,
+ BioseqPtr bsp,
+ Boolean suppress_locus_tag)
{
ValNodePtr vnp;
FeatureClausePtr fcp, productfcp;
@@ -4364,7 +4366,8 @@ static void FindGeneProducts (
for (i=0; i < NumProductFeatureTypes && productfcp == NULL; i++)
{
productfcp = FindProductInFeatureList (fcp, clause_list,
- productfeatures[i]);
+ productfeatures[i],
+ suppress_locus_tag);
}
if (productfcp != NULL)
{
@@ -4377,7 +4380,8 @@ static void FindGeneProducts (
else
{
fcp->feature_label_data.productname
- = GetProductName (productfcp->featlist->data.ptrvalue, bsp);
+ = GetProductName (productfcp->featlist->data.ptrvalue,
+ bsp, suppress_locus_tag);
}
if (fcp->feature_label_data.description != NULL)
{
@@ -4385,12 +4389,12 @@ static void FindGeneProducts (
fcp->feature_label_data.description = NULL;
}
fcp->feature_label_data.description =
- GetGeneProtDescription (fcp, bsp);
+ GetGeneProtDescription (fcp, bsp, suppress_locus_tag);
}
}
else
{
- FindGeneProducts (fcp->featlist, bsp);
+ FindGeneProducts (fcp->featlist, bsp, suppress_locus_tag);
}
}
}
@@ -4434,10 +4438,10 @@ static CharPtr GetExonDescription (
return label;
}
-static CharPtr GetFeatureDescription (
- FeatureClausePtr fcp,
- BioseqPtr bsp
-)
+static CharPtr GetFeatureDescription
+( FeatureClausePtr fcp,
+ BioseqPtr bsp,
+ Boolean suppress_locus_tag)
{
SeqFeatPtr sfp;
@@ -4488,7 +4492,7 @@ static CharPtr GetFeatureDescription (
}
else
{
- return GetGeneProtDescription (fcp, bsp);
+ return GetGeneProtDescription (fcp, bsp, suppress_locus_tag);
}
}
@@ -4548,11 +4552,11 @@ static void LIBCALLBACK GetPromoterFeatureLabel (
* subfeatures of the clause, or the interval could be a combination of the
* last two items if the feature is a CDS.
*/
-static CharPtr GetGenericInterval (
- FeatureClausePtr fcp,
- Uint1 biomol,
- BioseqPtr bsp
-)
+static CharPtr GetGenericInterval
+( FeatureClausePtr fcp,
+ Uint1 biomol,
+ BioseqPtr bsp,
+ Boolean suppress_locus_tag)
{
CharPtr interval;
Boolean partial5, partial3;
@@ -4589,7 +4593,7 @@ static CharPtr GetGenericInterval (
{
suppress_final_and = TRUE;
}
- LabelClauses (featlist, biomol, bsp);
+ LabelClauses (featlist, biomol, bsp, suppress_locus_tag);
ListClauses (featlist, &strings, FALSE, suppress_final_and);
subfeatlist = MergeValNodeStrings (strings, FALSE);
ValNodeFreeData (strings);
@@ -4647,12 +4651,12 @@ static CharPtr GetGenericInterval (
* for more of the specific feature types, to reduce the number of times
* that the feature must be identified as being a certain type.
*/
-static void LIBCALLBACK GetGenericFeatureLabel (
- FeatureClausePtr fcp,
- BioseqPtr bsp,
- Uint1 biomol,
- FeatureLabelPtr flp
-)
+static void LIBCALLBACK GetGenericFeatureLabel
+( FeatureClausePtr fcp,
+ BioseqPtr bsp,
+ Uint1 biomol,
+ FeatureLabelPtr flp,
+ Boolean suppress_locus_tag)
{
SeqFeatPtr main_feat;
@@ -4673,13 +4677,13 @@ static void LIBCALLBACK GetGenericFeatureLabel (
}
if (flp->productname == NULL)
{
- flp->productname = GetProductName (main_feat, bsp);
+ flp->productname = GetProductName (main_feat, bsp, suppress_locus_tag);
}
if (flp->description == NULL
&& (! IsMiscRNA (main_feat)
|| StringStr (flp->productname, "spacer") == NULL ))
{
- flp->description = GetFeatureDescription (fcp, bsp);
+ flp->description = GetFeatureDescription (fcp, bsp, suppress_locus_tag);
}
}
@@ -4711,11 +4715,11 @@ typedef enum {
NumDefLineFeatLabels
} DefLineFeatLabel;
-static void LabelFeature (
- BioseqPtr bsp,
- Uint1 biomol,
- FeatureClausePtr new_clauseplus
-)
+static void LabelFeature
+( BioseqPtr bsp,
+ Uint1 biomol,
+ FeatureClausePtr new_clauseplus,
+ Boolean suppress_locus_tag)
{
Int4 i;
SeqFeatPtr main_feat;
@@ -4726,11 +4730,12 @@ static void LabelFeature (
{
main_feat = (SeqFeatPtr) new_clauseplus->featlist->data.ptrvalue;
- new_clauseplus->allelename = GetAlleleName (new_clauseplus->grp);
+ new_clauseplus->allelename = GetAlleleName (new_clauseplus->grp,
+ suppress_locus_tag);
if (new_clauseplus->interval == NULL)
{
new_clauseplus->interval =
- GetGenericInterval (new_clauseplus, biomol, bsp);
+ GetGenericInterval (new_clauseplus, biomol, bsp, suppress_locus_tag);
}
for (i=0; i < NumDefLineFeatLabels; i++)
@@ -4745,7 +4750,7 @@ static void LabelFeature (
}
GetGenericFeatureLabel ( new_clauseplus, bsp, biomol,
- &new_clauseplus->feature_label_data);
+ &new_clauseplus->feature_label_data, suppress_locus_tag);
return;
}
}
@@ -4919,11 +4924,11 @@ static void TrimUnwantedWordsFromAltSpliceProductName (
* must have the same gene, must share a complete interval, and must have
* similarly named products.
*/
-static CharPtr MeetAltSpliceRules (
- FeatureClausePtr cdsfcp1,
+static CharPtr MeetAltSpliceRules
+( FeatureClausePtr cdsfcp1,
FeatureClausePtr cdsfcp2,
- BioseqPtr bsp
-)
+ BioseqPtr bsp,
+ Boolean suppress_locus_tag)
{
SeqFeatPtr cds1, cds2;
CharPtr match_string;
@@ -4937,7 +4942,7 @@ static CharPtr MeetAltSpliceRules (
cds1 = cdsfcp1->featlist->data.ptrvalue;
cds2 = cdsfcp2->featlist->data.ptrvalue;
- if (! DoGenesMatch (cdsfcp1->grp, cdsfcp2->grp))
+ if (! DoGenesMatch (cdsfcp1->grp, cdsfcp2->grp, suppress_locus_tag))
return NULL;
if ( (res = TestFeatOverlap (cds1, cds2, COMMON_INTERVAL)) != -1)
@@ -5048,10 +5053,10 @@ static void MoveSubclauses (
/* a comment and a data.choice value that indicates alt splicing */
/* we remove the second alternatively spliced CDS feature from the list */
-static void FindAltSplices (
- ValNodePtr clause_list,
- BioseqPtr bsp
-)
+static void FindAltSplices
+( ValNodePtr clause_list,
+ BioseqPtr bsp,
+ Boolean suppress_locus_tag)
{
FeatureClausePtr fcp1, fcp2;
ValNodePtr cdsclause1, cdsclause2;
@@ -5069,7 +5074,7 @@ static void FindAltSplices (
if (fcp1->feature_label_data.productname == NULL)
{
fcp1->feature_label_data.productname =
- GetProductName (fcp1->featlist->data.ptrvalue, bsp);
+ GetProductName (fcp1->featlist->data.ptrvalue, bsp, suppress_locus_tag);
}
searchclause = cdsclause1->next;
cdsclause2 = FindNextCDSClause (searchclause);
@@ -5079,9 +5084,9 @@ static void FindAltSplices (
if (fcp2->feature_label_data.productname == NULL)
{
fcp2->feature_label_data.productname =
- GetProductName (fcp2->featlist->data.ptrvalue, bsp);
+ GetProductName (fcp2->featlist->data.ptrvalue, bsp, suppress_locus_tag);
}
- combined_protein_name = MeetAltSpliceRules (fcp1, fcp2, bsp);
+ combined_protein_name = MeetAltSpliceRules (fcp1, fcp2, bsp, suppress_locus_tag);
if (combined_protein_name != NULL)
{
/* get rid of variant, splice variant, splice product, isoform, etc.*/
@@ -5134,18 +5139,18 @@ static void FindAltSplices (
DeleteFeatureClauses (&clause_list);
}
-static void LabelClauses (
- ValNodePtr clause_list,
- Uint1 biomol,
- BioseqPtr bsp
-)
+static void LabelClauses
+( ValNodePtr clause_list,
+ Uint1 biomol,
+ BioseqPtr bsp,
+ Boolean suppress_locus_tag)
{
ValNodePtr clause;
clause = clause_list;
while (clause != NULL)
{
- LabelFeature ( bsp, biomol, clause->data.ptrvalue);
+ LabelFeature ( bsp, biomol, clause->data.ptrvalue, suppress_locus_tag);
clause = clause->next;
}
}
@@ -5176,10 +5181,10 @@ static CharPtr separators [] = {
#define num_separators 3
-static ValNodePtr GetMiscRNAelements (
- SeqFeatPtr misc_rna,
- BioseqPtr bsp
-)
+static ValNodePtr GetMiscRNAelements
+( SeqFeatPtr misc_rna,
+ BioseqPtr bsp,
+ Boolean suppress_locus_tag)
{
CharPtr buffer;
Int4 i, best_i;
@@ -5198,7 +5203,7 @@ static ValNodePtr GetMiscRNAelements (
to_free = NULL;
if (misc_rna == NULL) return NULL;
- buffer = GetProductName (misc_rna, bsp);
+ buffer = GetProductName (misc_rna, bsp, suppress_locus_tag);
to_free = buffer;
if (buffer == NULL)
{
@@ -5265,7 +5270,7 @@ static ValNodePtr GetMiscRNAelements (
word_i++) {}
if (word_i < NUM_MISC_RNA_WORDS)
{
- fcp = NewFeatureClause ( misc_rna, bsp);
+ fcp = NewFeatureClause ( misc_rna, bsp, suppress_locus_tag);
if (fcp == NULL) return NULL;
if (word_i == MISC_RNA_WORD_INTERNAL_SPACER
|| word_i == MISC_RNA_WORD_EXTERNAL_SPACER
@@ -5345,8 +5350,8 @@ static ValNodePtr GetMiscRNAelements (
*/
static void ReplaceRNAClauses (
ValNodePtr PNTR clause_list,
- BioseqPtr bsp
-)
+ BioseqPtr bsp,
+ Boolean suppress_locus_tag)
{
FeatureClausePtr fcp;
SeqFeatPtr main_feat;
@@ -5368,7 +5373,7 @@ static void ReplaceRNAClauses (
if (IsrRNA (main_feat) || IsMiscRNA (main_feat))
{
- replacement_clauses = GetMiscRNAelements ( main_feat, bsp );
+ replacement_clauses = GetMiscRNAelements ( main_feat, bsp, suppress_locus_tag );
if (replacement_clauses != NULL)
{
for (vnp = replacement_clauses; vnp->next != NULL; vnp = vnp->next) {}
@@ -6309,7 +6314,7 @@ static void ListClauses (
"pseudogene mRNA")==0)
&& clause_len > StringLen ("precursor")
&& StringCmp ( thisclause->feature_label_data.description
- + clause_len - StringLen ("precursor"),
+ + clause_len - StringLen ("precursor") - 1,
"precursor") == 0)
{
print_comma_between_description_and_typeword = TRUE;
@@ -6483,7 +6488,8 @@ static Boolean LIBCALLBACK ShouldRemoveExon (
BioseqPtr bsp,
Boolean isLonely,
Boolean isRequested,
- Boolean isSegment
+ Boolean isSegment,
+ Boolean suppress_locus_tag
)
{
Boolean partial3, partial5;
@@ -6513,13 +6519,13 @@ static Boolean LIBCALLBACK ShouldRemoveCDS (
BioseqPtr bsp,
Boolean isLonely,
Boolean isRequested,
- Boolean isSegment
-)
+ Boolean isSegment,
+ Boolean suppress_locus_tag)
{
CharPtr description;
Boolean retval = FALSE;
- description = GetGeneProtDescription (this_fcp, bsp);
+ description = GetGeneProtDescription (this_fcp, bsp, suppress_locus_tag);
if (StringHasNoText (description))
{
retval = TRUE;
@@ -6534,7 +6540,8 @@ static Boolean LIBCALLBACK ShouldRemoveNoncodingProductFeat (
FeatureClausePtr this_fcp,
BioseqPtr bsp, Boolean isLonely,
Boolean isRequested,
- Boolean isSegment
+ Boolean isSegment,
+ Boolean suppress_locus_tag
)
{
if (isRequested) return FALSE;
@@ -6547,7 +6554,8 @@ static Boolean LIBCALLBACK ShouldRemovePromoter (
FeatureClausePtr this_fcp,
BioseqPtr bsp, Boolean isLonely,
Boolean isRequested,
- Boolean isSegment
+ Boolean isSegment,
+ Boolean suppress_locus_tag
)
{
if (isLonely || isRequested) return FALSE;
@@ -6561,7 +6569,8 @@ static Boolean LIBCALLBACK ShouldRemoveLTR (
BioseqPtr bsp,
Boolean isLonely,
Boolean isRequested,
- Boolean isSegment
+ Boolean isSegment,
+ Boolean suppress_locus_tag
)
{
if (isLonely || isRequested)
@@ -6577,7 +6586,8 @@ static Boolean LIBCALLBACK ShouldRemove3UTR (
BioseqPtr bsp,
Boolean isLonely,
Boolean isRequested,
- Boolean isSegment
+ Boolean isSegment,
+ Boolean suppress_locus_tag
)
{
if (isLonely || isRequested)
@@ -6593,7 +6603,8 @@ static Boolean LIBCALLBACK ShouldRemove5UTR (
BioseqPtr bsp,
Boolean isLonely,
Boolean isRequested,
- Boolean isSegment
+ Boolean isSegment,
+ Boolean suppress_locus_tag
)
{
if (isLonely || isRequested)
@@ -6608,28 +6619,29 @@ static Boolean LIBCALLBACK ShouldRemoveIntron (
FeatureClausePtr this_fcp,
BioseqPtr bsp, Boolean isLonely,
Boolean isRequested,
- Boolean isSegment
+ Boolean isSegment,
+ Boolean suppress_locus_tag
)
{
if (isLonely || isRequested) return FALSE;
else return TRUE;
}
-static Boolean LIBCALLBACK ShouldRemoveGeneric (
- SeqFeatPtr sfp,
+static Boolean LIBCALLBACK ShouldRemoveGeneric
+( SeqFeatPtr sfp,
FeatureClausePtr parent_fcp,
FeatureClausePtr this_fcp,
BioseqPtr bsp,
Boolean isLonely,
Boolean isRequested,
- Boolean isSegment
-)
+ Boolean isSegment,
+ Boolean suppress_locus_tag)
{
CharPtr productname;
Boolean rval;
rval = FALSE;
- if (IsMiscRNA (sfp) && ( productname = GetProductName (sfp, bsp)) != NULL)
+ if (IsMiscRNA (sfp) && ( productname = GetProductName (sfp, bsp, suppress_locus_tag)) != NULL)
{
if (StringStr (productname, "trans-spliced leader") != NULL)
{
@@ -6688,6 +6700,7 @@ typedef struct deflinefeaturerequestlist {
Boolean remove_subfeatures;
DefLineType feature_list_type;
Int4 misc_feat_parse_rule;
+ Boolean suppress_locus_tags;
} DeflineFeatureRequestList, PNTR DeflineFeatureRequestListPtr;
static void InitFeatureRequests (
@@ -6704,6 +6717,7 @@ static void InitFeatureRequests (
feature_requests->remove_subfeatures = FALSE;
feature_requests->feature_list_type = DEFLINE_USE_FEATURES;
feature_requests->misc_feat_parse_rule = 2;
+ feature_requests->suppress_locus_tags = FALSE;
}
static Boolean RemoveCondition (
@@ -6722,17 +6736,19 @@ static Boolean RemoveCondition (
{
if (remove_items[i].itemmatch (sfp))
return remove_items[i].ShouldRemove (sfp, parent_fcp, this_fcp, bsp,
- isLonely, feature_requests->items[i].keep, isSegment);
+ isLonely, feature_requests->items[i].keep,
+ isSegment,
+ feature_requests->suppress_locus_tags);
}
return ShouldRemoveGeneric(sfp, parent_fcp, this_fcp, bsp, isLonely, FALSE,
- isSegment);
+ isSegment, feature_requests->suppress_locus_tags);
}
-static Boolean FindOtherGeneClause (
- ValNodePtr feature_list,
+static Boolean FindOtherGeneClause
+( ValNodePtr feature_list,
ValNodePtr me,
- GeneRefPtr grp
-)
+ GeneRefPtr grp,
+ Boolean suppress_locus_tag)
{
ValNodePtr vnp;
FeatureClausePtr fcp;
@@ -6747,11 +6763,11 @@ static Boolean FindOtherGeneClause (
fcp = vnp->data.ptrvalue;
if (fcp->delete_me) continue;
if ( fcp->grp == grp
- || (fcp->grp != NULL && DoGenesMatch (fcp->grp, grp)))
+ || (fcp->grp != NULL && DoGenesMatch (fcp->grp, grp, suppress_locus_tag)))
{
return TRUE;
}
- if ( FindOtherGeneClause (fcp->featlist, me, grp))
+ if ( FindOtherGeneClause (fcp->featlist, me, grp, suppress_locus_tag))
{
return TRUE;
}
@@ -6760,11 +6776,11 @@ static Boolean FindOtherGeneClause (
return FALSE;
}
-static void RemoveGenesMentionedElsewhere (
- ValNodePtr PNTR feature_list,
- ValNodePtr search_list,
- Boolean delete_now
-)
+static void RemoveGenesMentionedElsewhere
+( ValNodePtr PNTR feature_list,
+ ValNodePtr search_list,
+ Boolean delete_now,
+ Boolean suppress_locus_tag)
{
ValNodePtr vnp;
FeatureClausePtr fcp;
@@ -6780,13 +6796,13 @@ static void RemoveGenesMentionedElsewhere (
}
if ( IsGene (fcp->featlist->data.ptrvalue)
&& fcp->featlist->next == NULL
- && FindOtherGeneClause ( search_list, vnp, fcp->grp))
+ && FindOtherGeneClause ( search_list, vnp, fcp->grp, suppress_locus_tag))
{
fcp->delete_me = TRUE;
}
else
{
- RemoveGenesMentionedElsewhere ( &(fcp->featlist), search_list, FALSE);
+ RemoveGenesMentionedElsewhere ( &(fcp->featlist), search_list, FALSE, suppress_locus_tag);
}
}
}
@@ -7295,8 +7311,8 @@ static void ConsolidateClauses (
ValNodePtr PNTR list,
BioseqPtr bsp,
Uint1 biomol,
- Boolean delete_now
-)
+ Boolean delete_now,
+ Boolean suppress_locus_tag)
{
ValNodePtr vnp;
FeatureClausePtr fcp;
@@ -7317,14 +7333,14 @@ static void ConsolidateClauses (
continue;
}
- ConsolidateClauses (&(fcp->featlist), bsp, biomol, FALSE);
+ ConsolidateClauses (&(fcp->featlist), bsp, biomol, FALSE, suppress_locus_tag);
if (last_cds_fcp == NULL)
{
last_cds_fcp = fcp;
if (fcp->feature_label_data.description == NULL)
{
- last_desc = GetGeneProtDescription (fcp, bsp);
+ last_desc = GetGeneProtDescription (fcp, bsp, suppress_locus_tag);
}
else
{
@@ -7344,7 +7360,7 @@ static void ConsolidateClauses (
{
if (fcp->feature_label_data.description == NULL)
{
- new_desc = GetGeneProtDescription (fcp, bsp);
+ new_desc = GetGeneProtDescription (fcp, bsp, suppress_locus_tag);
}
else
{
@@ -7385,7 +7401,7 @@ static void ConsolidateClauses (
MemFree (last_cds_fcp->interval);
}
last_cds_fcp->interval =
- GetGenericInterval (last_cds_fcp, biomol, bsp);
+ GetGenericInterval (last_cds_fcp, biomol, bsp, suppress_locus_tag);
MemFree (new_desc);
}
else
@@ -7405,8 +7421,8 @@ static void ConsolidateClauses (
static void CountUnknownGenes (
ValNodePtr PNTR clause_list,
- BioseqPtr bsp
-)
+ BioseqPtr bsp,
+ Boolean suppress_locus_tag)
{
FeatureClausePtr fcp, new_fcp;
ValNodePtr vnp, new_vnp;
@@ -7423,8 +7439,8 @@ static void CountUnknownGenes (
&& (fcp = vnp->data.ptrvalue) != NULL
&& ! fcp->is_unknown)
{
- CountUnknownGenes (&(fcp->featlist), bsp);
- gene_name = GetGeneProtDescription (fcp, bsp);
+ CountUnknownGenes (&(fcp->featlist), bsp, suppress_locus_tag);
+ gene_name = GetGeneProtDescription (fcp, bsp, suppress_locus_tag);
if (StringCmp (gene_name, "unknown") == 0
&& fcp->featlist != NULL
&& fcp->featlist->choice == DEFLINE_FEATLIST)
@@ -7433,7 +7449,8 @@ static void CountUnknownGenes (
{
new_vnp = ValNodeNew (*clause_list);
if (new_vnp == NULL) return;
- new_fcp = NewFeatureClause (fcp->featlist->data.ptrvalue, bsp);
+ new_fcp = NewFeatureClause (fcp->featlist->data.ptrvalue,
+ bsp, suppress_locus_tag);
new_fcp->is_unknown = TRUE;
new_vnp->choice = DEFLINE_CLAUSEPLUS;
new_vnp->data.ptrvalue = new_fcp;
@@ -7481,10 +7498,10 @@ static void ReplaceDefinitionLine (
MemFree (defline);
}
-FeatureClausePtr NewFeatureClause (
- SeqFeatPtr sfp,
- BioseqPtr bsp
-)
+FeatureClausePtr NewFeatureClause
+( SeqFeatPtr sfp,
+ BioseqPtr bsp,
+ Boolean suppress_locus_tag)
{
FeatureClausePtr fcp;
Boolean partial5, partial3;
@@ -7530,7 +7547,7 @@ FeatureClausePtr NewFeatureClause (
}
if (IsCDS (sfp))
{
- fcp->feature_label_data.productname = GetProductName (sfp, bsp);
+ fcp->feature_label_data.productname = GetProductName (sfp, bsp, suppress_locus_tag);
}
fcp->featlist = ValNodeNew (NULL);
if (fcp->featlist == NULL)
@@ -7545,9 +7562,7 @@ FeatureClausePtr NewFeatureClause (
return fcp;
}
-static ValNodePtr GetFeatureList (
- BioseqPtr bsp
-)
+static ValNodePtr GetFeatureList (BioseqPtr bsp, Boolean suppress_locus_tag)
{
ValNodePtr head, vnp;
SeqFeatPtr sfp;
@@ -7563,7 +7578,7 @@ static ValNodePtr GetFeatureList (
{
if (IsRecognizedFeature (sfp))
{
- fcp = NewFeatureClause (sfp, bsp);
+ fcp = NewFeatureClause (sfp, bsp, suppress_locus_tag);
if (fcp == NULL) return NULL;
fcp->numivals = fcontext.numivals;
fcp->ivals = fcontext.ivals;
@@ -7834,7 +7849,8 @@ static Boolean IntervalIntersectsIvals
static ValNodePtr GrabTraversingGenes
(ValNodePtr parent_feature_list,
SeqMgrSegmentContextPtr context,
- BioseqPtr parent_bsp)
+ BioseqPtr parent_bsp,
+ Boolean suppress_locus_tag)
{
FeatureClausePtr fcp, new_fcp;
ValNodePtr clause;
@@ -7855,7 +7871,8 @@ static ValNodePtr GrabTraversingGenes
&& fcp->ivals != NULL && fcp->numivals > 0)
{
if (IntervalIntersectsIvals (fcp->numivals, fcp->ivals, context)) {
- new_fcp = NewFeatureClause (fcp->featlist->data.ptrvalue, parent_bsp);
+ new_fcp = NewFeatureClause (fcp->featlist->data.ptrvalue, parent_bsp,
+ suppress_locus_tag);
if (new_fcp == NULL) return FALSE;
vnp = ValNodeNew (segment_feature_list);
if (vnp == NULL) return FALSE;
@@ -7883,8 +7900,7 @@ static CharPtr BuildFeatureClauses (
static Boolean LIBCALLBACK GetFeatureClauseForSeg (
SeqLocPtr slp,
- SeqMgrSegmentContextPtr context
-)
+ SeqMgrSegmentContextPtr context)
{
SegmentDefLineFeatureClausePtr sdlp;
ValNodePtr clause, tmp_parent_list;
@@ -7932,7 +7948,8 @@ static Boolean LIBCALLBACK GetFeatureClauseForSeg (
&& stop >= context->cumOffset)
{
new_fcp = NewFeatureClause (fcp->featlist->data.ptrvalue,
- sdlp->parent_bsp);
+ sdlp->parent_bsp,
+ sdlp->feature_requests->suppress_locus_tags);
if (new_fcp == NULL) return FALSE;
vnp = ValNodeNew (segment_feature_list);
if (vnp == NULL) return FALSE;
@@ -7945,7 +7962,8 @@ static Boolean LIBCALLBACK GetFeatureClauseForSeg (
if (segment_feature_list == NULL) {
segment_feature_list = GrabTraversingGenes (sdlp->parent_feature_list,
- context, sdlp->parent_bsp);
+ context, sdlp->parent_bsp,
+ sdlp->feature_requests->suppress_locus_tags);
}
entityID = ObjMgrGetEntityIDForPointer (bsp);
@@ -7996,15 +8014,15 @@ static CharPtr BuildFeatureClauses (
if (feature_requests->feature_list_type == DEFLINE_USE_FEATURES
&& ( ! isSegment || (seg_feature_list != NULL && *seg_feature_list != NULL)))
{
- GroupmRNAs (feature_list, bsp);
+ GroupmRNAs (feature_list, bsp, feature_requests->suppress_locus_tags);
/* genes are added to other clauses */
- GroupGenes (feature_list);
+ GroupGenes (feature_list, feature_requests->suppress_locus_tags);
if (! feature_requests->suppress_alt_splice_phrase)
{
/* find alt-spliced CDSs */
- FindAltSplices (*feature_list, bsp);
+ FindAltSplices (*feature_list, bsp, feature_requests->suppress_locus_tags);
}
GroupAltSplicedExons (feature_list, bsp, TRUE);
@@ -8012,9 +8030,9 @@ static CharPtr BuildFeatureClauses (
/* now group clauses */
GroupAllClauses ( feature_list, bsp );
- ExpandAltSplicedExons (*feature_list, bsp);
+ ExpandAltSplicedExons (*feature_list, bsp, feature_requests->suppress_locus_tags);
- FindGeneProducts (*feature_list, bsp);
+ FindGeneProducts (*feature_list, bsp, feature_requests->suppress_locus_tags);
if (seg_feature_list != NULL && *seg_feature_list != NULL)
{
@@ -8027,7 +8045,8 @@ static CharPtr BuildFeatureClauses (
/* remove exons and other unwanted features */
RemoveUnwantedFeatures (feature_list, bsp, isSegment, feature_requests);
- RemoveGenesMentionedElsewhere (feature_list, *feature_list, TRUE);
+ RemoveGenesMentionedElsewhere (feature_list, *feature_list, TRUE,
+ feature_requests->suppress_locus_tags);
if (feature_requests->remove_subfeatures)
{
@@ -8036,7 +8055,7 @@ static CharPtr BuildFeatureClauses (
DeleteOperonSubfeatures (feature_list, TRUE);
- CountUnknownGenes (feature_list, bsp);
+ CountUnknownGenes (feature_list, bsp, feature_requests->suppress_locus_tags);
if (feature_requests->misc_feat_parse_rule == 1)
{
@@ -8047,7 +8066,7 @@ static CharPtr BuildFeatureClauses (
RemoveUnwantedMiscFeats (feature_list, TRUE);
}
- ReplaceRNAClauses (feature_list, bsp);
+ ReplaceRNAClauses (feature_list, bsp, feature_requests->suppress_locus_tags);
/* take any exons on the minus strand */
/* and reverse their order within the clause */
@@ -8055,9 +8074,11 @@ static CharPtr BuildFeatureClauses (
RenameExonSequences ( feature_list, bsp, TRUE);
- LabelClauses (*feature_list, molecule_type, bsp);
+ LabelClauses (*feature_list, molecule_type, bsp,
+ feature_requests->suppress_locus_tags);
- ConsolidateClauses (feature_list, bsp, molecule_type, TRUE);
+ ConsolidateClauses (feature_list, bsp, molecule_type, TRUE,
+ feature_requests->suppress_locus_tags);
/* this allows genes to be listed together even if they are from */
/* separate sequences */
@@ -8096,17 +8117,26 @@ static Int2 GetProductFlagFromCDSProductNames (BioseqPtr bsp)
SeqFeatPtr cds = NULL;
Int2 product_flag;
Int2 i;
+ CharPtr found;
+ Char ch;
product_flag = 0;
- for (cds = SeqMgrGetNextFeature (bsp, cds, SEQFEAT_CDREGION, 0, &context); cds != NULL && product_flag == 0; cds = cds->next)
+ for (cds = SeqMgrGetNextFeature (bsp, cds, SEQFEAT_CDREGION, 0, &context);
+ cds != NULL && product_flag == 0;
+ cds = cds->next)
{
- for (i = 1; organelleByPopup[i] != NULL && product_flag == 0; i++)
- {
- if (StringStr (context.label, organelleByPopup[i]))
- {
- product_flag = i;
- }
- }
+ for (i = 1; organelleByPopup[i] != NULL && product_flag == 0; i++)
+ {
+ found = StringStr (context.label, organelleByPopup[i]);
+ if (found != NULL)
+ {
+ ch = *(found + StringLen (organelleByPopup[i]));
+ if (ch == 0 || ch == ' ')
+ {
+ product_flag = i;
+ }
+ }
+ }
}
return product_flag;
@@ -8162,7 +8192,8 @@ static void BuildDefLineFeatClauseList (
sdld.parent_bsp = bsp;
sdld.molecule_type = GetMoleculeType (bsp, entityID);
- sdld.parent_feature_list = GetFeatureList (bsp);
+ sdld.parent_feature_list = GetFeatureList (bsp,
+ feature_requests->suppress_locus_tags);
sdld.feature_requests = feature_requests;
sdld.product_flag = product_flag;
@@ -8201,7 +8232,7 @@ static void BuildDefLineFeatClauseList (
if (bsp == NULL) return;
if ( SpecialHandlingForSpecialTechniques (bsp)) return;
molecule_type = GetMoleculeType (bsp, entityID);
- head = GetFeatureList (bsp);
+ head = GetFeatureList (bsp, feature_requests->suppress_locus_tags);
/* get default product flag if necessary */
if (product_flag == -1 || product_flag == DEFAULT_ORGANELLE_CLAUSE) {
@@ -8280,6 +8311,7 @@ typedef struct deflineformdata {
GrouP featureOptsGrp;
PopuP misc_feat_parse_rule;
ButtoN alternate_splice_flag;
+ ButtoN suppress_locus_tags;
} DefLineFormData, PNTR DefLineFormPtr;
static void DefLineFormMessageProc (ForM f, Int2 mssg)
@@ -8435,6 +8467,9 @@ static void DoAutoDefLine (ButtoN b)
dlfp->feature_requests.remove_subfeatures =
GetStatus (dlfp->remove_subfeatures);
+ dlfp->feature_requests.suppress_locus_tags =
+ GetStatus (dlfp->suppress_locus_tags);
+
dlfp->feature_requests.misc_feat_parse_rule =
GetValue (dlfp->misc_feat_parse_rule);
@@ -8796,6 +8831,10 @@ static GrouP CreateDefLineFormFeatureOptionsGroup (
"Suppress transposon and insertion sequence subfeatures", NULL);
SetStatus (dlfp->remove_subfeatures, FALSE);
+ dlfp->suppress_locus_tags = CheckBox (dlfp->featureOptsGrp,
+ "Suppress locus tags", NULL);
+ SetStatus (dlfp->suppress_locus_tags, FALSE);
+
g = NormalGroup (dlfp->featureOptsGrp, 3, 0,
"Optional Features", programFont, NULL);
@@ -8824,6 +8863,7 @@ static GrouP CreateDefLineFormFeatureOptionsGroup (
(HANDLE) dlfp->alternate_splice_flag,
(HANDLE) dlfp->suppress_alt_splice_phrase,
(HANDLE) dlfp->remove_subfeatures,
+ (HANDLE) dlfp->suppress_locus_tags,
(HANDLE) g,
(HANDLE) r,
NULL);
diff --git a/sequin/sequin2.c b/sequin/sequin2.c
index c7302c3b..16d97958 100644
--- a/sequin/sequin2.c
+++ b/sequin/sequin2.c
@@ -29,7 +29,7 @@
*
* Version Creation Date: 1/22/95
*
-* $Revision: 6.168 $
+* $Revision: 6.172 $
*
* File Description:
*
@@ -1095,7 +1095,7 @@ End Gap: When some of the sequences in an alignment are shorter \
or longer than others, end gap characters are added to the end \
of the sequence to maintain the correct spacing. These will \
not appear in your sequence file.\n\
-Missing: These characters are used to represent \
+Ambiguous/Unknown: These characters are used to represent \
indeterminate/ambiguous nucleotides. These will appear in your \
sequence file as 'n'.\n\
Match: These characters are used to indicate positions where \
@@ -1187,6 +1187,7 @@ static Boolean ImportPhylipDialog (DialoG d, CharPtr filename)
CountTitlesWithoutOrganisms (sep);
} else {
+ SendHelpScrollMessage (helpForm, "Organism and Sequences Form", "Nucleotide Page");
SetPhylipDocInstructions (ppp);
}
} else {
@@ -1259,7 +1260,7 @@ static DialoG CreatePhylipDialog (GrouP h, CharPtr title, CharPtr text,
a = NormalGroup (m, 4, 0, "Sequence Characters", systemFont, NULL);
StaticPrompt (a, "Beginning Gap", 0, dialogTextHeight, systemFont, 'c');
ppp->beginning_gap = DialogText (a, "-.Nn?", 5, NULL);
- StaticPrompt (a, "Missing", 0, dialogTextHeight, systemFont, 'c');
+ StaticPrompt (a, "Ambiguous/Unknown", 0, dialogTextHeight, systemFont, 'c');
ppp->missing = DialogText (a, "?Nn", 5, NULL);
StaticPrompt (a, "Middle Gap", 0, dialogTextHeight, systemFont, 'c');
ppp->middle_gap = DialogText (a, "-.", 5, NULL);
@@ -7848,6 +7849,8 @@ extern void SqnNewAlign (BioseqPtr bsp1, BioseqPtr bsp2, SeqAlignPtr PNTR salp)
}
+/* This section of code is for the Remove Sequences From Alignments function. */
+
typedef struct alignmentsequencelist {
SeqIdPtr sip;
Char descr[255];
@@ -7860,12 +7863,70 @@ typedef struct removeseqfromaligndata {
SeqEntryPtr sep;
} RemoveSeqFromAlignData, PNTR RemoveSeqFromAlignPtr;
-static void RemoveOneSequenceFromAlignment (SeqIdPtr sip, SeqAlignPtr salp)
+/* This function will remove DenDiag and pairwise alignments if they contain
+ * the sequence identified by sip, otherwise it will remove the sequence from
+ * the alignment.
+ */
+static SeqAlignPtr RemoveOneSequenceFromAlignment (SeqIdPtr sip, SeqAlignPtr salphead)
{
- if (FindSeqIdinSeqAlign (salp, sip)) {
- SeqAlignIDCache (salp, sip);
+ Uint4 seqid_order;
+ SeqIdPtr tmpsip;
+ SeqAlignPtr salp, salp_next, prev_salp, remove_salp, last_remove;
+
+ if (!FindSeqIdinSeqAlign (salphead, sip)) return;
+
+ salp = salphead;
+ prev_salp = NULL;
+ remove_salp = NULL;
+ last_remove = NULL;
+ while (salp != NULL)
+ {
+ salp_next = salp->next;
+ tmpsip = SeqIdPtrFromSeqAlign (salp);
+ seqid_order = SeqIdOrderInBioseqIdList(sip, tmpsip);
+ if (seqid_order == 0)
+ {
+ /* do nothing for this subalignment */
+ prev_salp = salp;
+ }
+ else if (salp->dim == 2 || salphead->segtype ==1)
+ {
+ /* This is for a pairwise alignment or a DENDIAG alignment */
+ if (prev_salp == NULL)
+ {
+ salphead = salp->next;
+ }
+ else
+ {
+ prev_salp->next = salp->next;
+ }
+ /* save the alignments that we want to free in a list and get rid of them
+ * at the end - freeing them beforehand causes problems with listing the
+ * IDs in the alignment.
+ */
+ salp->next = NULL;
+ if (remove_salp == NULL)
+ {
+ remove_salp = salp;
+ }
+ else
+ {
+ last_remove->next = salp;
+ }
+ last_remove = salp;
+ }
+ else
+ {
+ SeqAlignBioseqDeleteById (salphead, sip);
+ prev_salp = salp;
+ }
+ salp = salp_next;
}
+ /* Now we can free the alignment */
+ SeqAlignFree (remove_salp);
+ return salphead;
}
+
static void RemoveSequenceFromAlignmentsCallback (SeqAnnotPtr sap, Pointer userdata)
{
SeqAlignPtr salp;
@@ -7875,19 +7936,94 @@ static void RemoveSequenceFromAlignmentsCallback (SeqAnnotPtr sap, Pointer userd
salp = (SeqAlignPtr) sap->data;
if (salp == NULL) return;
sip = (SeqIdPtr) userdata;
- RemoveOneSequenceFromAlignment (sip, salp);
+ sap->data = RemoveOneSequenceFromAlignment (sip, salp);
+ /* if we've deleted all of the alignments, get rid of the annotation as well */
+ if (sap->data == NULL)
+ {
+ sap->idx.deleteme = TRUE;
+ }
+}
+
+typedef struct checkforremovesequencefromalignments
+{
+ Boolean found_problem;
+ SeqIdPtr sip;
+} CheckForRemoveSequenceFromAlignmentsData, PNTR CheckForRemoveSequenceFromAlignmentsPtr;
+
+/* This is the callback function for looking for pairwise alignments.
+/* If we delete the first sequence in a pairwise alignment, we end up deleting
+ * the entire alignment because that sequence is paired with every other sequence.
+ */
+static void CheckForRemoveSequenceFromAlignmentsProblemsCallback (SeqAnnotPtr sap, Pointer userdata)
+{
+ CheckForRemoveSequenceFromAlignmentsPtr p;
+ SeqAlignPtr salphead, salp;
+ Uint4 seqid_order;
+ SeqIdPtr tmpsip;
+
+ if (sap == NULL || sap->type != 2
+ || (p = (CheckForRemoveSequenceFromAlignmentsPtr)userdata) == NULL
+ || p->found_problem)
+ {
+ return;
+ }
+ salphead = (SeqAlignPtr) sap->data;
+ if (salphead == NULL) return;
+
+ if (!FindSeqIdinSeqAlign (salphead, p->sip))
+ {
+ return;
+ }
+ for (salp = salphead; salp != NULL; salp = salp->next)
+ {
+ tmpsip = SeqIdPtrFromSeqAlign (salp);
+ seqid_order = SeqIdOrderInBioseqIdList(p->sip, tmpsip);
+ if (seqid_order == 0)
+ {
+ continue;
+ }
+ else if (seqid_order == 1 && salp->dim == 2)
+ {
+ p->found_problem = TRUE;
+ }
+ }
}
static void DoRemoveSequencesFromAlignment (ButtoN b)
{
RemoveSeqFromAlignPtr rp;
+ WindoW w;
ValNodePtr vnp;
Int2 val;
AlignmentSequenceListPtr aslp;
-
+ CheckForRemoveSequenceFromAlignmentsData data;
+
if (b == NULL) return;
rp = (RemoveSeqFromAlignPtr) GetObjectExtra (b);
if (rp == NULL) return;
+
+ w = (WindoW) rp->form;
+ Hide (w);
+ /* first, check for pairwise alignments */
+ val = 1;
+ for (vnp = rp->sequence_list; vnp != NULL; vnp = vnp->next) {
+ aslp = vnp->data.ptrvalue;
+ if (aslp == NULL) continue;
+ if (GetItemStatus (rp->sequence_list_ctrl, val)) {
+ data.sip = aslp->sip;
+ data.found_problem = FALSE;
+ VisitAnnotsInSep (rp->sep, (Pointer) &data, CheckForRemoveSequenceFromAlignmentsProblemsCallback);
+ if (data.found_problem)
+ {
+ Message (MSG_ERROR, "One of the selected sequences is the first in a pairwise alignment."
+ " You must convert the alignment to a multiple alignment before trying to remove this sequence.");
+ Remove (rp->form);
+ return;
+ }
+ }
+ val++;
+ }
+
val = 1;
for (vnp = rp->sequence_list; vnp != NULL; vnp = vnp->next) {
aslp = vnp->data.ptrvalue;
@@ -7897,11 +8033,39 @@ static void DoRemoveSequencesFromAlignment (ButtoN b)
}
val++;
}
+
+ ValNodeFree (rp->sequence_list);
+ rp->sequence_list = NULL;
+ DeleteMarkedObjects (rp->input_entityID, 0, NULL);
ObjMgrSetDirtyFlag (rp->input_entityID, TRUE);
ObjMgrSendMsg (OM_MSG_UPDATE, rp->input_entityID, 0, 0);
Remove (rp->form);
}
+/* This function is used so that a sequence ID will only appear once in the list,
+ * even if it appears in more than one alignment or subalignment.
+ */
+static Boolean IsIDAlreadyInList (SeqIdPtr sip, ValNodePtr list)
+{
+ ValNodePtr vnp;
+ AlignmentSequenceListPtr aslp;
+
+ if (sip == NULL) return FALSE;
+
+ for (vnp = list; vnp != NULL; vnp = vnp->next)
+ {
+ aslp = (AlignmentSequenceListPtr) vnp->data.ptrvalue;
+ if (aslp != NULL && SeqIdComp (aslp->sip, sip) == SIC_YES)
+ {
+ return TRUE;
+ }
+ }
+ return FALSE;
+}
+
+/* This function creates the list of sequence IDs and descriptions to use in
+ * the Remove Sequences From Alignments dialog.
+ */
static void ListSequencesInAlignmentsCallback (SeqAnnotPtr sap, Pointer userdata)
{
SeqAlignPtr salp;
@@ -7914,11 +8078,13 @@ static void ListSequencesInAlignmentsCallback (SeqAnnotPtr sap, Pointer userdata
if (sap == NULL || sap->type != 2 || userdata == NULL) return;
salp = (SeqAlignPtr) sap->data;
- if (salp == NULL) return;
- list = (ValNodePtr PNTR)userdata;
- sip_list = SeqAlignIDList (salp);
- if (sip_list == NULL) return;
- for (sip = sip_list; sip != NULL; sip = sip->next) {
+ while (salp != NULL)
+ {
+ list = (ValNodePtr PNTR)userdata;
+ sip_list = SeqAlignIDList (salp);
+ if (sip_list == NULL) return;
+ for (sip = sip_list; sip != NULL; sip = sip->next) {
+ if (IsIDAlreadyInList (sip, *list)) continue;
aslp = (AlignmentSequenceListPtr) MemNew (sizeof (AlignmentSequenceListData));
if (aslp == NULL) return;
aslp->sip = sip;
@@ -7933,7 +8099,7 @@ static void ListSequencesInAlignmentsCallback (SeqAnnotPtr sap, Pointer userdata
offset ++;
}
SeqIdWrite (bsp_sip, aslp->descr + offset, PRINTID_TEXTID_ACCESSION, 254 - offset);
- offset += StringLen (aslp->descr);
+ offset = StringLen (aslp->descr);
}
} else {
SeqIdWrite (sip, aslp->descr, PRINTID_TEXTID_ACCESSION, 254);
@@ -7942,7 +8108,9 @@ static void ListSequencesInAlignmentsCallback (SeqAnnotPtr sap, Pointer userdata
vnp->data.ptrvalue = aslp;
if (*list == NULL) {
*list = vnp;
- }
+ }
+ }
+ salp = salp->next;
}
}
@@ -7992,6 +8160,7 @@ extern void RemoveSequencesFromAlignment (IteM i)
rp = (RemoveSeqFromAlignPtr) MemNew (sizeof (RemoveSeqFromAlignData));
if (rp == NULL) return;
+ rp->input_entityID = bfp->input_entityID;
rp->sep = GetTopSeqEntryForEntityID (bfp->input_entityID);
if (rp->sep == NULL) {
MemFree (rp);
@@ -8035,4 +8204,5 @@ extern void RemoveSequencesFromAlignment (IteM i)
Update ();
}
+/* End of Remove Sequences From Alignments function code. */
diff --git a/sequin/sequin3.c b/sequin/sequin3.c
index c574dd52..912199c2 100644
--- a/sequin/sequin3.c
+++ b/sequin/sequin3.c
@@ -29,7 +29,7 @@
*
* Version Creation Date: 1/22/95
*
-* $Revision: 6.377 $
+* $Revision: 6.384 $
*
* File Description:
*
@@ -1074,6 +1074,57 @@ static void RemoveAllGeneXrefs (IteM i)
ObjMgrSendMsg (OM_MSG_UPDATE, bfp->input_entityID, 0, 0);
}
+static void DoRefreshGeneXrefs (SeqFeatPtr sfp, Pointer userdata)
+
+{
+ SeqFeatXrefPtr curr;
+ GeneRefPtr grp, grpfeat;
+ SeqFeatPtr gene;
+ SeqMgrFeatContext fcontext;
+ BioseqPtr bsp;
+
+ if (sfp == NULL) return;
+
+ for (curr = sfp->xref; curr != NULL; curr = curr->next)
+ {
+ if (curr->data.choice == SEQFEAT_GENE) {
+ grp = (GeneRefPtr) curr->data.value.ptrvalue;
+ if (grp != NULL)
+ {
+ bsp = BioseqFindFromSeqLoc (sfp->location);
+ gene = SeqMgrGetFeatureByLabel (bsp, grp->locus, SEQFEAT_GENE, 0, &fcontext);
+ if (gene != NULL && gene->data.choice == SEQFEAT_GENE) {
+ grpfeat = (GeneRefPtr) gene->data.value.ptrvalue;
+ if (grpfeat != NULL) {
+ GeneRefFree (grp);
+ grp = GeneRefDup (grpfeat);
+ curr->data.value.ptrvalue = grp;
+ }
+ }
+ }
+ }
+ }
+}
+
+static void RefreshGeneXRefs (IteM i)
+
+{
+ BaseFormPtr bfp;
+ SeqEntryPtr sep;
+
+#ifdef WIN_MAC
+ bfp = currentFormDataPtr;
+#else
+ bfp = GetObjectExtra (i);
+#endif
+ if (bfp == NULL) return;
+ sep = GetTopSeqEntryForEntityID (bfp->input_entityID);
+ if (sep == NULL) return;
+ VisitFeaturesInSep (sep, NULL, DoRefreshGeneXrefs);
+ ObjMgrSetDirtyFlag (bfp->input_entityID, TRUE);
+ ObjMgrSendMsg (OM_MSG_UPDATE, bfp->input_entityID, 0, 0);
+}
+
static ValNodePtr RemoveDbxrefList (ValNodePtr vnp)
{
@@ -1634,7 +1685,47 @@ static void RawSeqToDeltaSeq (IteM i)
if (bfp == NULL) return;
sep = GetTopSeqEntryForEntityID (bfp->input_entityID);
if (sep == NULL) return;
- VisitBioseqsInSep (sep, (Pointer) bfp, ConvertNsToGaps);
+ VisitBioseqsInSep (sep, NULL, ConvertNsToGaps);
+ ObjMgrSetDirtyFlag (bfp->input_entityID, TRUE);
+ ObjMgrSendMsg (OM_MSG_UPDATE, bfp->input_entityID, 0, 0);
+}
+
+static void RawSeqToDeltaSeqUnknownLengthGaps (IteM i)
+
+{
+ BaseFormPtr bfp;
+ SeqEntryPtr sep;
+ Int4 unknown_gap_size = 100;
+
+#ifdef WIN_MAC
+ bfp = currentFormDataPtr;
+#else
+ bfp = GetObjectExtra (i);
+#endif
+ if (bfp == NULL) return;
+ sep = GetTopSeqEntryForEntityID (bfp->input_entityID);
+ if (sep == NULL) return;
+ VisitBioseqsInSep (sep, &unknown_gap_size, ConvertNsToGaps);
+ ObjMgrSetDirtyFlag (bfp->input_entityID, TRUE);
+ ObjMgrSendMsg (OM_MSG_UPDATE, bfp->input_entityID, 0, 0);
+}
+
+static void RawSeqToDeltaSeqUnknown100LengthGaps (IteM i)
+
+{
+ BaseFormPtr bfp;
+ SeqEntryPtr sep;
+ Int4 unknown_gap_size = -1;
+
+#ifdef WIN_MAC
+ bfp = currentFormDataPtr;
+#else
+ bfp = GetObjectExtra (i);
+#endif
+ if (bfp == NULL) return;
+ sep = GetTopSeqEntryForEntityID (bfp->input_entityID);
+ if (sep == NULL) return;
+ VisitBioseqsInSep (sep, &unknown_gap_size, ConvertNsToGaps);
ObjMgrSetDirtyFlag (bfp->input_entityID, TRUE);
ObjMgrSendMsg (OM_MSG_UPDATE, bfp->input_entityID, 0, 0);
}
@@ -10314,6 +10405,9 @@ extern void SetupSpecialMenu (MenU m, BaseFormPtr bfp)
i = CommandItem (s, "Genus-Species Fixup", GenSpecTaxonFixup);
SetObjectExtra (i, bfp, NULL);
SeparatorItem (s);
+ i = CommandItem (s, "Country Fixup", CountryLookup);
+ SetObjectExtra (i, bfp, NULL);
+ SeparatorItem (s);
i = CommandItem (s, "Set Source Focus", SetSourceFocus);
SetObjectExtra (i, bfp, NULL);
i = CommandItem (s, "Clear Source Focus", ClearSourceFocus);
@@ -10371,6 +10465,8 @@ extern void SetupSpecialMenu (MenU m, BaseFormPtr bfp)
SetObjectExtra (i, bfp, NULL);
i = CommandItem (s, "Remove Proteins", RemoveProteins);
SetObjectExtra (i, bfp, NULL);
+ i = CommandItem (s, "Remove Proteins and Renormalize Nuc-Prot Sets", RemoveProteinsAndRenormalize);
+ SetObjectExtra (i, bfp, NULL);
SeparatorItem (s);
i = CommandItem (s, "Remove Source Qual", RemoveSource);
SetObjectExtra (i, bfp, NULL);
@@ -10566,6 +10662,9 @@ extern void SetupSpecialMenu (MenU m, BaseFormPtr bfp)
i = CommandItem (s, "Resolve Colliding Local IDs", ResolveExistingLocalIDs);
SetObjectExtra (i, bfp, NULL);
}
+ SeparatorItem (s);
+ i = CommandItem (s, "Refresh Gene Xrefs", RefreshGeneXRefs);
+ SetObjectExtra (i, bfp, NULL);
s = SubMenu (m, "Edit/ E");
i = CommandItem (s, "Edit Qualifiers", EditQualifier);
@@ -10624,7 +10723,7 @@ extern void SetupSpecialMenu (MenU m, BaseFormPtr bfp)
s = SubMenu (m, "Transform/ T");
i = CommandItem (s, "Correct CDS Genetic Codes", CorrectCDSGenCodes);
SetObjectExtra (i, bfp, NULL);
- i = CommandItem (s, "Correct CDS Propagate Crud", FixCdsAfterPropagate);
+ i = CommandItem (s, "Cleanup CDS partials after propagation", FixCdsAfterPropagate);
SetObjectExtra (i, bfp, NULL);
SeparatorItem (s);
i = CommandItem (s, "Trim Ns from Bioseqs", TrimNsFromNucs);
@@ -10799,7 +10898,12 @@ extern void SetupSpecialMenu (MenU m, BaseFormPtr bfp)
SetObjectExtra (i, bfp, NULL);
SeparatorItem (s);
}
- i = CommandItem (s, "Raw Sequence with Ns to Delta Sequence", RawSeqToDeltaSeq);
+ x = SubMenu (s, "Raw Sequence with Ns to Delta Sequence");
+ i = CommandItem (x, "All Known Length Gaps", RawSeqToDeltaSeq);
+ SetObjectExtra (i, bfp, NULL);
+ i = CommandItem (x, "Unknown Length Gaps for 100 Ns", RawSeqToDeltaSeqUnknownLengthGaps);
+ SetObjectExtra (i, bfp, NULL);
+ i = CommandItem (x, "Unknown Length 100 Gaps for All Ns", RawSeqToDeltaSeqUnknown100LengthGaps);
SetObjectExtra (i, bfp, NULL);
s = SubMenu (m, "Misc/ M");
diff --git a/sequin/sequin4.c b/sequin/sequin4.c
index d21e8e4c..457c5add 100644
--- a/sequin/sequin4.c
+++ b/sequin/sequin4.c
@@ -29,7 +29,7 @@
*
* Version Creation Date: 6/28/96
*
-* $Revision: 6.210 $
+* $Revision: 6.214 $
*
* File Description:
*
@@ -74,6 +74,7 @@
#include <aliparse.h>
#include <spidey.h>
#include <ent2api.h>
+#include <valid.h>
#define REGISTER_UPDATESEGSET ObjMgrProcLoadEx (OMPROC_FILTER,"Update Segmented Set","UpdateSegSet",0,0,0,0,NULL,UpdateSegSet,PROC_PRIORITY_DEFAULT, "Indexer")
@@ -97,6 +98,10 @@
#define REGISTER_SEGREGATE_BY_TEXT ObjMgrProcLoadEx (OMPROC_FILTER, "Segregate By Text","SegregateByText",0,0,0,0,NULL,CreateSegregateByTextWindow,PROC_PRIORITY_DEFAULT, "Indexer")
+#define REGISTER_SEGREGATE_BY_FEATURE ObjMgrProcLoadEx (OMPROC_FILTER, "Segregate By Feature","SegregateByFeature",0,0,0,0,NULL,CreateSegregateByFeatureWindow,PROC_PRIORITY_DEFAULT, "Indexer")
+
+#define REGISTER_SEGREGATE_BY_DESCRIPTOR ObjMgrProcLoadEx (OMPROC_FILTER, "Segregate By Descriptor","SegregateByDescriptor",0,0,0,0,NULL,CreateSegregateByDescriptorWindow,PROC_PRIORITY_DEFAULT, "Indexer")
+
#define REGISTER_CONVERTSEQALIGN ObjMgrProcLoadEx (OMPROC_FILTER,"Convert SeqAlign","ConvertSeqAlign",0,0,0,0,NULL,ConvertToTrueMultipleAlignment,PROC_PRIORITY_DEFAULT, "Alignment")
#define REGISTER_MAKESEQALIGN ObjMgrProcLoadEx (OMPROC_FILTER,"Make SeqAlign","CreateSeqAlign",0,0,0,0,NULL,GenerateSeqAlignFromSeqEntry,PROC_PRIORITY_DEFAULT, "Alignment")
@@ -6509,6 +6514,8 @@ extern void SetupSequinFilters (void)
if (indexerVersion) {
REGISTER_DELETE_BY_TEXT;
+ REGISTER_SEGREGATE_BY_FEATURE;
+ REGISTER_SEGREGATE_BY_DESCRIPTOR;
REGISTER_SEGREGATE_BY_TEXT;
REGISTER_FIND_NON_ACGT;
REGISTER_BSP_INDEX;
@@ -8148,3 +8155,92 @@ extern void ConsolidateOrganismNotes (IteM i)
ObjMgrSendMsg (OM_MSG_UPDATE, bfp->input_entityID, 0, 0);
}
+static void CountryLookupProc (BioSourcePtr biop, Pointer userdata)
+{
+ CharPtr PNTR list;
+ CharPtr PNTR ptr;
+ SubSourcePtr ssp;
+ CharPtr cp, before, newname;
+ Int4 len_cntry, len_qual, len_name;
+
+ if (biop == NULL || (list = (CharPtr PNTR)userdata) == NULL)
+ {
+ return;
+ }
+
+ for (ssp = biop->subtype; ssp != NULL; ssp = ssp->next)
+ {
+ if (ssp->subtype != SUBSRC_country || ssp->name == NULL) continue;
+ for (ptr = list; ptr != NULL && *ptr != NULL; ptr++)
+ {
+ len_cntry = StringLen (*ptr);
+ cp = StringStr (ssp->name, *ptr);
+ if (cp != NULL && !isalpha (cp [len_cntry]))
+ {
+ len_qual = StringLen (ssp->name);
+ if (cp == ssp->name)
+ {
+ if (len_cntry == len_qual || ssp->name [len_cntry] == ':')
+ {
+ /* exact match, don't need to do anything */
+ return;
+ }
+ ssp->name [len_cntry] = ':';
+ return;
+ }
+ else
+ {
+ if (isalpha (*(cp - 1)))
+ {
+ /* not really a match, part of another word */
+ continue;
+ }
+ else
+ {
+ newname = (CharPtr) MemNew (len_qual + 3);
+ *(cp - 1) = 0;
+ before = StringSave (ssp->name);
+ StringNCpy (newname, *ptr, len_cntry);
+ newname [len_cntry] = ':';
+ newname [len_cntry + 1] = ' ';
+ StringNCpy (newname + len_cntry + 2, before, StringLen (before));
+ StringCpy (newname + len_cntry + 2 + StringLen (before), cp + len_cntry);
+ len_name = StringLen (newname);
+ while (isspace (newname[len_name - 1]) || ispunct (newname [len_name - 1]))
+ {
+ newname [len_name - 1] = 0;
+ len_name --;
+ }
+ before = MemFree (before);
+ MemFree (ssp->name);
+ ssp->name = newname;
+ }
+ }
+ }
+ }
+ }
+}
+
+extern void CountryLookup (IteM i)
+{
+ BaseFormPtr bfp;
+ SeqEntryPtr sep;
+ CharPtr PNTR list;
+
+
+#ifdef WIN_MAC
+ bfp = currentFormDataPtr;
+#else
+ bfp = GetObjectExtra (i);
+#endif
+ if (bfp == NULL) return;
+ sep = GetTopSeqEntryForEntityID (bfp->input_entityID);
+ if (sep == NULL) return;
+
+ list = GetValidCountryList ();
+ if (list == NULL) return;
+ VisitBioSourcesInSep (sep, list, CountryLookupProc);
+ ObjMgrSetDirtyFlag (bfp->input_entityID, TRUE);
+ ObjMgrSendMsg (OM_MSG_UPDATE, bfp->input_entityID, 0, 0);
+}
+
diff --git a/sequin/sequin5.c b/sequin/sequin5.c
index db716219..2e45fc3c 100644
--- a/sequin/sequin5.c
+++ b/sequin/sequin5.c
@@ -29,7 +29,7 @@
*
* Version Creation Date: 8/26/97
*
-* $Revision: 6.169 $
+* $Revision: 6.171 $
*
* File Description:
*
@@ -3266,7 +3266,7 @@ static void BlastCDD (BioseqPtr bsp, Pointer userdata)
/* do blast search */
- salp = BlastBioseqNet (bl3hp, bsp, "blastp", "oasis_sap", options,
+ salp = BlastBioseqNet (bl3hp, bsp, "blastp", "cdd", options,
NULL, &error_returns, NULL);
/* BlastErrorPrintExtra (error_returns, TRUE, stdout); */
@@ -3314,7 +3314,7 @@ extern void SimpleCDDBlastProc (IteM i)
/* blast fetch enable needed to retrieve by general SeqID */
- BlastNetBioseqFetchEnable (bl3hp, "oasis_sap", FALSE, TRUE);
+ BlastNetBioseqFetchEnable (bl3hp, "cdd", FALSE, TRUE);
bf.bl3hp = bl3hp;
bf.options = options;
@@ -3330,7 +3330,7 @@ extern void SimpleCDDBlastProc (IteM i)
BlastFini (bl3hp);
options = BLASTOptionDelete (options);
- BlastNetBioseqFetchDisable (bl3hp, "oasis_sap", FALSE);
+ BlastNetBioseqFetchDisable (bl3hp, "cdd", FALSE);
ObjMgrSetDirtyFlag (bfp->input_entityID, TRUE);
ObjMgrSendMsg (OM_MSG_UPDATE, bfp->input_entityID, 0, 0);
@@ -9301,14 +9301,14 @@ static void ProcessBioSourceFunc (BioSourcePtr biop, SourceFormPtr sfp, Boolean
if (ssp != NULL) {
foundit = StringISearch (ssp->name, sfp->findStr);
while (foundit != NULL) {
- offset = foundit - ssp->name;
+ offset = foundit - ssp->name + 1;
EditSourceString (&(ssp->name), sfp, foundit);
foundit = StringISearch (ssp->name + offset, sfp->findStr);
}
} else if (mod != NULL) {
foundit = StringISearch (mod->subname, sfp->findStr);
while (foundit != NULL) {
- offset = foundit - mod->subname;
+ offset = foundit - mod->subname + 1;
EditSourceString (&(mod->subname), sfp, foundit);
foundit = StringISearch (mod->subname + offset, sfp->findStr);
}
diff --git a/sequin/sequin6.c b/sequin/sequin6.c
index 4a0f4b4e..33c8c607 100644
--- a/sequin/sequin6.c
+++ b/sequin/sequin6.c
@@ -29,7 +29,7 @@
*
* Version Creation Date: 11/12/97
*
-* $Revision: 6.162 $
+* $Revision: 6.164 $
*
* File Description:
*
@@ -75,6 +75,7 @@ END_ENUM_ALIST
#define IMPORT_FEAT_TYPE 7
#define DEFLINE_TYPE 8
#define FEATURE_NOTE_TYPE 9
+#define PUBLICATION_TYPE 10
#define NUMBER_OF_TYPES 7
#define NUMBER_OF_TYPES_WITH_DEFLINE 8
@@ -91,7 +92,9 @@ static ENUM_ALIST(target_field_alist)
{"DefLine", DEFLINE_TYPE},
END_ENUM_ALIST
-#define NUMBER_OF_SEGREGATE_TYPES 9
+#define NUMBER_OF_SEGREGATE_TYPES 10
+#define NUMBER_OF_PARSE_TYPES 9
+
static ENUM_ALIST(segregate_target_field_alist)
{" ", 0},
{"Gene", GENE_TYPE},
@@ -103,8 +106,34 @@ static ENUM_ALIST(segregate_target_field_alist)
{"Import Feature", IMPORT_FEAT_TYPE},
{"DefLine", DEFLINE_TYPE},
{"Feature Note", FEATURE_NOTE_TYPE},
+ {"Publication", PUBLICATION_TYPE},
+END_ENUM_ALIST
+
+#define NUMBER_OF_PARSE_TYPES 9
+
+static ENUM_ALIST(parse_target_field_alist)
+ {" ", 0},
+ {"Gene", GENE_TYPE},
+ {"CDS", CDS_TYPE},
+ {"Prot", PROT_TYPE},
+ {"RNA", RNA_TYPE},
+ {"BioSource", BIOSOURCE_TYPE},
+ {"OrgMod and SubSource", ORGMOD_SUBSOURCE_TYPE},
+ {"Import Feature", IMPORT_FEAT_TYPE},
+ {"DefLine", DEFLINE_TYPE},
+ {"Feature Note", FEATURE_NOTE_TYPE},
END_ENUM_ALIST
+#define PUBLICATION_PUBLISHED_FIELD 1
+#define PUBLICATION_INPRESS_FIELD 2
+#define PUBLICATION_UNPUB_FIELD 3
+
+static ENUM_ALIST (publication_field_alist)
+ {" ", 0},
+ {"Published", PUBLICATION_PUBLISHED_FIELD},
+ {"In Press", PUBLICATION_INPRESS_FIELD},
+ {"Unpublished", PUBLICATION_UNPUB_FIELD},
+END_ENUM_ALIST
#define EXT_NUMBER_OF_TYPES 7
@@ -350,7 +379,7 @@ static ENUM_ALIST (subsource_subtype_and_note_alist)
END_ENUM_ALIST
-#define NUM_SUBTARGET_POPUPS 10
+#define NUM_SUBTARGET_POPUPS 11
static GbFeatName ParseQualifierList[] = {
{"allele", Class_text}, {"anticodon", Class_pos_aa},
@@ -1618,6 +1647,246 @@ static Boolean DoFeaturesContainText_Callback
return found;
}
+typedef struct objstringdata
+{
+ CharPtr match;
+ Boolean found;
+} ObjStringData, PNTR ObjStringPtr;
+
+static void LIBCALLBACK AsnWriteRemoveForDCallBack (AsnExpOptStructPtr pAEOS)
+
+{
+ CharPtr pchFind;
+ CharPtr pchSource;
+ ObjStringPtr osp;
+
+ osp = (ObjStringPtr) pAEOS->data;
+ if (ISA_STRINGTYPE (AsnFindBaseIsa (pAEOS->atp))) {
+ pchSource = (CharPtr) pAEOS->dvp->ptrvalue;
+ pchFind = osp->match;
+ if (StringSearch (pchSource, pchFind) != NULL) {
+ osp->found = TRUE;
+ }
+ }
+}
+
+static Boolean ObjectHasSubstring (ObjMgrTypePtr omtp, AsnIoPtr aip, Pointer ptr, ObjStringPtr osp)
+
+{
+ osp->found = FALSE;
+ (omtp->asnwrite) (ptr, aip, NULL);
+ return osp->found;
+}
+
+static Uint1 GetPubStatus (PubdescPtr pdp)
+{
+ ValNodePtr vnp;
+ CitGenPtr cgp;
+ CitArtPtr cap;
+ CitJourPtr cjp;
+ CitBookPtr cbp;
+ CitSubPtr csp;
+ MedlineEntryPtr mlp;
+ ImprintPtr ip = NULL;
+ Uint1 status = 255; /* 255 is currently not a valid status */
+
+ if (pdp == NULL) return status;
+
+ for (vnp = pdp->pub; vnp != NULL && ip == NULL; vnp = vnp->next)
+ {
+ switch (vnp->choice)
+ {
+ case PUB_Gen:
+ cgp = (CitGenPtr) vnp->data.ptrvalue;
+ if (cgp != NULL && StringICmp (cgp->cit, "Unpublished"))
+ {
+ return PUB_STATUS_UNPUBLISHED;
+ }
+ break;
+ case PUB_Article:
+ case PUB_Medline:
+ if (vnp->choice == PUB_Article)
+ {
+ cap = (CitArtPtr) vnp->data.ptrvalue;
+ }
+ else
+ {
+ cap = NULL;
+ mlp = (MedlineEntryPtr) vnp->data.ptrvalue;
+ if (mlp != NULL)
+ {
+ cap = mlp->cit;
+ }
+ }
+ if (cap != NULL && cap->from == 1)
+ {
+ cjp = (CitJourPtr) cap->fromptr;
+ if (cjp != NULL)
+ {
+ ip = cjp->imp;
+ }
+ }
+ break;
+ case PUB_Man:
+ case PUB_Book:
+ cbp = (CitBookPtr) vnp->data.ptrvalue;
+ if (cbp != NULL)
+ {
+ ip = cbp->imp;
+ }
+ break;
+ case PUB_Sub:
+ csp = (CitSubPtr) vnp->data.ptrvalue;
+ if (csp != NULL)
+ {
+ ip = csp->imp;
+ }
+ break;
+ }
+ }
+ if (ip != NULL)
+ {
+ status = ip->prepub;
+ }
+ return status;
+}
+
+static Boolean DoesPubStatusMatch (PubdescPtr pdp, ConvertFormPtr cfp)
+{
+ Uint1 pub_status;
+
+ if (pdp == NULL || cfp == NULL) return FALSE;
+ if (cfp->subtype == 0) return TRUE;
+
+ pub_status = GetPubStatus (pdp);
+
+ if (cfp->subtype == PUBLICATION_PUBLISHED_FIELD
+ && pub_status == PUB_STATUS_PUBLISHED)
+ {
+ return TRUE;
+ }
+ else if (cfp->subtype == PUBLICATION_INPRESS_FIELD
+ && pub_status == PUB_STATUS_IN_PRESS)
+ {
+ return TRUE;
+ }
+ else if (cfp->subtype == PUBLICATION_UNPUB_FIELD
+ && pub_status == PUB_STATUS_UNPUBLISHED)
+ {
+ return TRUE;
+ }
+ else
+ {
+ return FALSE;
+ }
+}
+
+static Boolean DoesSequenceHavePubWithText (BioseqPtr bsp, ConvertFormPtr cfp)
+{
+ AsnExpOptPtr aeop;
+ AsnIoPtr aip;
+ ObjStringData osd;
+ SeqMgrDescContext dcontext;
+ SeqDescrPtr sdp;
+ SeqMgrFeatContext fcontext;
+ SeqFeatPtr sfp;
+ Boolean rval = FALSE;
+ ObjMgrPtr omp;
+ ObjMgrTypePtr omtp;
+ PubdescPtr pdp;
+
+ if (bsp == NULL || cfp == NULL) return FALSE;
+ omp = ObjMgrGet ();
+ if (omp == NULL) return FALSE;
+ omtp = ObjMgrTypeFind (omp, OBJ_SEQDESC, NULL, NULL);
+ if (omtp == NULL) return FALSE;
+
+ aip = AsnIoNullOpen ();
+ aeop = AsnExpOptNew (aip, NULL, NULL, AsnWriteRemoveForDCallBack);
+ if (aeop != NULL) {
+ aeop->user_data = (Pointer) &osd;
+ }
+ osd.match = cfp->deleteStr;
+
+ /* look for publication descriptors */
+ sdp = SeqMgrGetNextDescriptor (bsp, NULL, Seq_descr_pub, &dcontext);
+ while (sdp != NULL && !rval) {
+ if (ObjectHasSubstring (omtp, aip, (Pointer) sdp, &osd)) {
+ pdp = (PubdescPtr) sdp->data.ptrvalue;
+ if (DoesPubStatusMatch (pdp, cfp))
+ {
+ rval = TRUE;
+ }
+ }
+ sdp = SeqMgrGetNextDescriptor (bsp, sdp, Seq_descr_pub, &dcontext);
+ }
+
+ if (!rval)
+ {
+ omtp = ObjMgrTypeFind (omp, OBJ_SEQFEAT, NULL, NULL);
+ if (omtp != NULL)
+ {
+ /* look for publication features */
+ sfp = SeqMgrGetNextFeature (bsp, NULL, 0, FEATDEF_PUB, &fcontext);
+ while (sfp != NULL && !rval)
+ {
+ if (ObjectHasSubstring (omtp, aip, (Pointer) sfp, &osd))
+ {
+ pdp = (PubdescPtr) sfp->data.value.ptrvalue;
+ if (DoesPubStatusMatch (pdp, cfp))
+ {
+ rval = TRUE;
+ }
+ }
+ sfp = SeqMgrGetNextFeature (bsp, sfp, 0, FEATDEF_PUB, &fcontext);
+ }
+ }
+ }
+
+ AsnIoClose (aip);
+ return rval;
+}
+
+static Boolean DoesNucProtSetHavePubWithText (BioseqSetPtr bssp, ConvertFormPtr cfp)
+{
+ AsnExpOptPtr aeop;
+ AsnIoPtr aip;
+ ObjStringData osd;
+ SeqDescrPtr sdp;
+ Boolean rval = FALSE;
+ ObjMgrPtr omp;
+ ObjMgrTypePtr omtp;
+ PubdescPtr pdp;
+
+ if (bssp == NULL || cfp == NULL) return FALSE;
+ omp = ObjMgrGet ();
+ if (omp == NULL) return FALSE;
+ omtp = ObjMgrTypeFind (omp, OBJ_SEQDESC, NULL, NULL);
+ if (omtp == NULL) return FALSE;
+
+ aip = AsnIoNullOpen ();
+ aeop = AsnExpOptNew (aip, NULL, NULL, AsnWriteRemoveForDCallBack);
+ if (aeop != NULL) {
+ aeop->user_data = (Pointer) &osd;
+ }
+ osd.match = cfp->deleteStr;
+
+ /* look for publication descriptors */
+ sdp = bssp->descr;
+ while (sdp != NULL && !rval) {
+ if (sdp->choice == Seq_descr_pub && ObjectHasSubstring (omtp, aip, (Pointer) sdp, &osd)) {
+ pdp = (PubdescPtr) sdp->data.ptrvalue;
+ if (DoesPubStatusMatch (pdp, cfp))
+ {
+ rval = TRUE;
+ }
+ }
+ sdp = sdp->next;
+ }
+
+ AsnIoClose (aip);
+ return rval;
+}
static Boolean DoesSequenceContainText (BioseqPtr bsp, ConvertFormPtr cfp)
{
@@ -1659,6 +1928,9 @@ static Boolean DoesSequenceContainText (BioseqPtr bsp, ConvertFormPtr cfp)
found = TRUE;
}
break;
+ case PUBLICATION_TYPE :
+ found = DoesSequenceHavePubWithText (bsp, cfp);
+ break;
default:
break;
}
@@ -1671,6 +1943,10 @@ static Boolean DoesNucProtSetContainText (BioseqSetPtr bssp, ConvertFormPtr cfp)
BioseqPtr bsp;
if (bssp == NULL) return FALSE;
+ if (cfp->type == PUBLICATION_TYPE && DoesNucProtSetHavePubWithText (bssp, cfp))
+ {
+ return TRUE;
+ }
for (sep = bssp->seq_set; sep != NULL; sep = sep->next) {
if (IS_Bioseq (sep)) {
bsp = (BioseqPtr) sep->data.ptrvalue;
@@ -1879,8 +2155,13 @@ static void SegregateByText_Callback (ButtoN b)
}
else
cfp->type = (Int2) val;
-
- if (cfp->type != DEFLINE_TYPE && cfp->type != FEATURE_NOTE_TYPE) {
+
+ if (cfp->type == PUBLICATION_TYPE)
+ {
+ GetEnumPopup (cfp->subtarget [cfp->type], cfp->alists [cfp->type], &val);
+ cfp->subtype = (Int2) val;
+ }
+ else if (cfp->type != DEFLINE_TYPE && cfp->type != FEATURE_NOTE_TYPE) {
GetEnumPopup (cfp->subtarget [cfp->type], cfp->alists [cfp->type], &val);
if (0 == val) {
Remove (cfp->form);
@@ -1957,7 +2238,7 @@ static void SetSegregateAcceptButton (Handle a)
SafeDisable (cfp->accept);
return;
}
- } else if (val != DEFLINE_TYPE) {
+ } else if (val != DEFLINE_TYPE && val != PUBLICATION_TYPE) {
cfp->type = (Int2) val;
if (!GetEnumPopup (cfp->subtarget [cfp->type],
@@ -2125,6 +2406,7 @@ extern Int2 LIBCALLBACK CreateSegregateByTextWindow (Pointer data)
cfp->alists [BIOSOURCE_TYPE] = orgref_field_alist;
cfp->alists [ORGMOD_SUBSOURCE_TYPE] = subsource_and_orgmod_note_subtype_alist;
cfp->alists [IMPORT_FEAT_TYPE] = impfeat_field_alist;
+ cfp->alists [PUBLICATION_TYPE] = publication_field_alist;
cfp->feature_list = BuildFeatureValNodeList (TRUE, "All", 255, TRUE, FALSE);
x = HiddenGroup (p, 0, 0, NULL);
@@ -2177,6 +2459,468 @@ extern Int2 LIBCALLBACK CreateSegregateByTextWindow (Pointer data)
return OM_MSG_RET_OK;
}
+
+typedef struct segregatefeatdata {
+ FEATURE_FORM_BLOCK
+
+ PopuP type_popup;
+ ValNodePtr type_list;
+ ButtoN accept;
+
+ BioseqSetPtr target_set;
+ Uint2 segregate_type;
+ Boolean is_feat;
+} SegregateFeatData, PNTR SegregateFeatPtr;
+
+static void CleanupSegregateFeatPage (GraphiC g, VoidPtr data)
+
+{
+ SegregateFeatPtr sfp;
+
+ sfp = (SegregateFeatPtr) data;
+ MemFree (sfp);
+ StdCleanupFormProc (g, data);
+}
+
+static Boolean DoesSequenceContainFeatureType (BioseqPtr bsp, SegregateFeatPtr sfp)
+{
+ SeqMgrFeatContext context;
+ SeqFeatPtr feat;
+
+ feat = NULL;
+ while ((feat = SeqMgrGetNextFeature (bsp, feat, 0, 0, &context)) != NULL)
+ {
+ if (feat->idx.subtype == sfp->segregate_type)
+ {
+ return TRUE;
+ }
+ }
+ return FALSE;
+}
+
+static Boolean DoesNucProtSetContainFeatureType (BioseqSetPtr bssp, SegregateFeatPtr sfp)
+{
+ SeqEntryPtr sep;
+ BioseqPtr bsp;
+
+ if (bssp == NULL) return FALSE;
+ for (sep = bssp->seq_set; sep != NULL; sep = sep->next) {
+ if (IS_Bioseq (sep)) {
+ bsp = (BioseqPtr) sep->data.ptrvalue;
+ if (DoesSequenceContainFeatureType (bsp, sfp)) {
+ return TRUE;
+ }
+ }
+ }
+ return FALSE;
+}
+
+static Boolean DoesSequenceContainDescriptorType (BioseqPtr bsp, SegregateFeatPtr sfp)
+{
+ SeqMgrDescContext context;
+ SeqDescPtr desc;
+
+ if((desc = SeqMgrGetNextDescriptor (bsp, NULL, sfp->segregate_type, &context)) != NULL)
+ {
+ return TRUE;
+ }
+ return FALSE;
+}
+
+typedef struct checkdescdata {
+ Uint2 segregate_type;
+ Boolean found;
+} CheckDescData, PNTR CheckDescPtr;
+
+static void DoesSetContainDescriptorType_Callback (SeqDescPtr sdp, Pointer userdata)
+{
+ CheckDescPtr p;
+
+ if (sdp == NULL || userdata == NULL) return;
+ p = (CheckDescPtr) userdata;
+ if (p->found) return;
+ if (sdp->choice == p->segregate_type) p->found = TRUE;
+}
+
+static Boolean DoesNucProtSetContainDescriptorType (BioseqSetPtr bssp, SegregateFeatPtr sfp)
+{
+ CheckDescData d;
+
+ if (bssp == NULL) return FALSE;
+ d.found = FALSE;
+ d.segregate_type = sfp->segregate_type;
+ VisitDescriptorsInSet (bssp, &d, DoesSetContainDescriptorType_Callback);
+ return d.found;
+}
+
+static Boolean WantToSegregateSequence (BioseqPtr bsp, SegregateFeatPtr sfp)
+{
+ if (bsp == NULL || sfp == NULL) return FALSE;
+ if (sfp->is_feat)
+ {
+ return DoesSequenceContainFeatureType (bsp, sfp);
+ }
+ else
+ {
+ return DoesSequenceContainDescriptorType (bsp, sfp);
+ }
+}
+
+static Boolean WantToSegregateNucProtSet (BioseqSetPtr bssp, SegregateFeatPtr sfp)
+{
+ if (bssp == NULL || sfp == NULL) return FALSE;
+ if (sfp->is_feat)
+ {
+ return DoesNucProtSetContainFeatureType (bssp, sfp);
+ }
+ else
+ {
+ return DoesNucProtSetContainDescriptorType (bssp, sfp);
+ }
+}
+
+/*=========================================================================*/
+/* */
+/* SegregateItemsByFeature () - Given a feature type, move bioseqs */
+/* containing those features to a new popset. */
+/* */
+/*=========================================================================*/
+
+static void SegregateItemsByFeatureOrDescriptor
+(SeqEntryPtr seqlist,
+ SegregateFeatPtr sfp,
+ BioseqSetPtr set1,
+ BioseqSetPtr set2)
+{
+
+ BioseqPtr bsp;
+ BioseqSetPtr this_bssp;
+ SeqEntryPtr this_list;
+ SeqEntryPtr sep, next_sep;
+ SeqEntryPtr set1last, set2last;
+
+
+ if (sfp == NULL || set1 == NULL || set2 == NULL || seqlist == NULL)
+ return;
+
+ set1last = set1->seq_set;
+ while (set1last != NULL && set1last->next != NULL) {
+ set1last = set1last->next;
+ }
+ set2last = set2->seq_set;
+ while (set2last != NULL && set2last->next != NULL) {
+ set2last = set2last->next;
+ }
+
+ sep = seqlist;
+ while (sep != NULL) {
+ next_sep = sep->next;
+ if (IS_Bioseq_set (sep)) {
+ this_bssp = (BioseqSetPtr) sep->data.ptrvalue;
+ if (this_bssp->_class == BioseqseqSet_class_nuc_prot) {
+ if (WantToSegregateNucProtSet (this_bssp, sfp)) {
+ if (set2last == NULL) {
+ set2->seq_set = sep;
+ } else {
+ set2last->next = sep;
+ }
+ set2last = sep;
+ } else {
+ if (set1last == NULL) {
+ set1->seq_set = sep;
+ } else {
+ set1last->next = sep;
+ }
+ set1last = sep;
+ }
+ sep->next = NULL;
+ } else {
+ this_list = this_bssp->seq_set;
+ this_bssp->seq_set = NULL;
+ SegregateItemsByFeatureOrDescriptor (this_list, sfp, set1, set2);
+ }
+ } else if (IS_Bioseq (sep)) {
+ bsp = (BioseqPtr) sep->data.ptrvalue;
+ if (WantToSegregateSequence (bsp, sfp)) {
+ if (set2last == NULL) {
+ set2->seq_set = sep;
+ } else {
+ set2last->next = sep;
+ }
+ set2last = sep;
+ } else {
+ if (set1last == NULL) {
+ set1->seq_set = sep;
+ } else {
+ set1last->next = sep;
+ }
+ set1last = sep;
+ }
+ sep->next = NULL;
+ }
+ sep = next_sep;
+ }
+}
+
+
+/*=========================================================================*/
+/* */
+/* SegregateByFeatureOrDescriptor_Callback () - Segregates sequences that */
+/* contain a selected feature. */
+/* */
+/*=========================================================================*/
+
+static void SegregateByFeatureOrDescriptor_Callback (ButtoN b)
+{
+ SegregateFeatPtr sfp;
+ SeqEntryPtr sep;
+ SeqEntryPtr tmp1, tmp2;
+ UIEnum val;
+ BioseqSetPtr bssp;
+ BioseqSetPtr parent_set;
+ SeqEntryPtr seqlist;
+ BioseqSetPtr newset1, newset2;
+ ObjMgrDataPtr omdptop;
+ ObjMgrData omdata;
+ Uint2 parenttype;
+ Pointer parentptr;
+ SeqEntryPtr last_sep;
+ ValNodePtr vnp;
+
+ /* Check the initial conditions and get the sequence */
+ sfp = (SegregateFeatPtr) GetObjectExtra (b);
+ if (sfp == NULL || sfp->input_entityID == 0 || sfp->target_set == NULL) {
+ Remove (sfp->form);
+ return;
+ }
+
+ sep = GetTopSeqEntryForEntityID (sfp->input_entityID);
+ if (sep == NULL) {
+ Remove (sfp->form);
+ return;
+ }
+
+ SaveSeqEntryObjMgrData (sep, &omdptop, &omdata);
+ GetSeqEntryParent (sep, &parentptr, &parenttype);
+
+ bssp = sfp->target_set;
+
+ parent_set = (BioseqSetPtr)(bssp->idx.parentptr);
+ seqlist = bssp->seq_set;
+ bssp->seq_set = NULL;
+
+ if (parent_set == NULL || parent_set->seq_set == NULL) {
+ newset1 = BioseqSetNew ();
+ if (newset1 == NULL) return;
+ newset2 = BioseqSetNew ();
+ if (newset2 == NULL) return;
+ newset1->_class = bssp->_class;
+ newset2->_class = bssp->_class;
+ tmp1 = SeqEntryNew ();
+ if (tmp1 == NULL) return;
+ tmp1->choice = 2;
+ tmp1->data.ptrvalue = (Pointer) newset1;
+ tmp2 = SeqEntryNew ();
+ if (tmp2 == NULL) return;
+ tmp2->choice = 2;
+ tmp2->data.ptrvalue = (Pointer) newset2;
+ bssp->seq_set = tmp1;
+ tmp1->next = tmp2;
+ bssp->_class = BioseqseqSet_class_genbank;
+ /* Propagate descriptors down */
+ ValNodeLink (&(newset1->descr),
+ AsnIoMemCopy ((Pointer) bssp->descr,
+ (AsnReadFunc) SeqDescrAsnRead,
+ (AsnWriteFunc) SeqDescrAsnWrite));
+ ValNodeLink (&(newset2->descr),
+ AsnIoMemCopy ((Pointer) bssp->descr,
+ (AsnReadFunc) SeqDescrAsnRead,
+ (AsnWriteFunc) SeqDescrAsnWrite));
+ bssp->descr = SeqDescrFree (bssp->descr);
+ } else {
+ last_sep = parent_set->seq_set;
+ newset1 = bssp;
+ newset2 = BioseqSetNew ();
+ if (newset2 == NULL) return;
+ newset2->_class = newset1->_class;
+ tmp1 = SeqEntryNew ();
+ if (tmp1 == NULL) return;
+ tmp1->choice = 2;
+ tmp1->data.ptrvalue = (Pointer) newset2;
+ while (last_sep != NULL && last_sep->next != NULL) {
+ last_sep = last_sep->next;
+ }
+ if (last_sep == NULL) return;
+ last_sep->next = tmp1;
+ /* copy descriptors horizontally */
+ ValNodeLink (&(newset2->descr),
+ AsnIoMemCopy ((Pointer) bssp->descr,
+ (AsnReadFunc) SeqDescrAsnRead,
+ (AsnWriteFunc) SeqDescrAsnWrite));
+ }
+
+ /* Get the feature to look for */
+ val = GetValue (sfp->type_popup);
+ for (vnp = sfp->type_list; vnp != NULL && val > 1; vnp = vnp->next, val--)
+ {
+ }
+ if (vnp == NULL || val != 1)
+ {
+ Remove (sfp->form);
+ return;
+ }
+ sfp->segregate_type = vnp->choice;
+
+ /* Display the 'working' cursor */
+
+ WatchCursor ();
+ Update ();
+
+ /* Do the search and move sequences */
+ SegregateItemsByFeatureOrDescriptor (seqlist, sfp, newset1, newset2);
+
+ /* Remove the window and update things */
+ SeqMgrLinkSeqEntry (sep, parenttype, parentptr);
+ RestoreSeqEntryObjMgrData (sep, omdptop, &omdata);
+ ObjMgrSetDirtyFlag (sfp->input_entityID, TRUE);
+ ObjMgrSendMsg (OM_MSG_UPDATE, sfp->input_entityID, 0, 0);
+
+ ArrowCursor ();
+ Update ();
+ Remove (sfp->form);
+
+ /* Return successfully */
+ return;
+}
+
+
+/*=========================================================================*/
+/* */
+/* CreateSegregateByFeatureWindow () - Creates and then displays the window*/
+/* for getting segregate by text info from the user.*/
+/* */
+/*=========================================================================*/
+
+static Int2 LIBCALLBACK CreateSegregateByFeatureOrDescriptorWindow (Pointer data, Boolean is_feat)
+{
+ GrouP c;
+ SegregateFeatPtr sfp;
+ GrouP g;
+ GrouP h;
+ OMProcControlPtr ompcp;
+ StdEditorProcsPtr sepp;
+ WindoW w;
+ ValNodePtr vnp;
+
+ /* Check parameters and get a pointer to the current data */
+
+ ompcp = (OMProcControlPtr) data;
+ if (ompcp == NULL)
+ return OM_MSG_RET_ERROR;
+
+ if (ompcp->input_itemtype != OBJ_BIOSEQSET || ompcp->input_data == NULL) {
+ Message (MSG_ERROR, "Must select Bioseq Set!");
+ return OM_MSG_RET_ERROR;
+ }
+
+ /* Create a new window, and a struct */
+ /* to pass around the data in. */
+
+ sfp = (SegregateFeatPtr) MemNew (sizeof (SegregateFeatData));
+ if (sfp == NULL)
+ return OM_MSG_RET_ERROR;
+ sfp->is_feat = is_feat;
+
+ if (sfp->is_feat)
+ {
+ w = FixedWindow (-50, -33, -10, -10, "Segregate By Feature",
+ StdCloseWindowProc);
+ }
+ else
+ {
+ w = FixedWindow (-50, -33, -10, -10, "Segregate By Descriptor",
+ StdCloseWindowProc);
+ }
+
+ SetObjectExtra (w, sfp, CleanupSegregateFeatPage);
+ sfp->form = (ForM) w;
+
+ sepp = (StdEditorProcsPtr) GetAppProperty ("StdEditorForm");
+ if (sepp != NULL) {
+ SetActivate (w, sepp->activateForm);
+ sfp->appmessage = sepp->handleMessages;
+ }
+
+ sfp->input_entityID = ompcp->input_entityID;
+ sfp->input_itemID = ompcp->input_itemID;
+ sfp->input_itemtype = ompcp->input_itemtype;
+ sfp->target_set = (BioseqSetPtr)ompcp->input_data;
+
+ sepp = (StdEditorProcsPtr) GetAppProperty ("StdEditorForm");
+ if (sepp != NULL) {
+ SetActivate (w, sepp->activateForm);
+ sfp->appmessage = sepp->handleMessages;
+ }
+
+ /* Add the popup lists */
+
+ h = HiddenGroup (w, -1, 0, NULL);
+ SetGroupSpacing (h, 10, 10);
+
+ g = HiddenGroup (h, 3, 0, NULL);
+
+ if (sfp->is_feat)
+ {
+ StaticPrompt (g, "Segregate sequences with the feature", 0, dialogTextHeight,
+ programFont, 'l');
+ sfp->type_list = BuildFeatureValNodeList (TRUE, NULL, 0, TRUE, FALSE);
+ }
+ else
+ {
+ StaticPrompt (g, "Segregate sequences with the descriptor", 0, dialogTextHeight,
+ programFont, 'l');
+ sfp->type_list = BuildDescriptorValNodeList ();
+ }
+
+ sfp->type_popup = PopupList (g, TRUE, NULL);
+ SetObjectExtra (sfp->type_popup, sfp, NULL);
+ for (vnp = sfp->type_list; vnp != NULL; vnp = vnp->next)
+ {
+ PopupItem (sfp->type_popup, (CharPtr) vnp->data.ptrvalue);
+ }
+ SetValue (sfp->type_popup, 1);
+
+ /* Add Accept and Cancel buttons */
+
+ c = HiddenGroup (h, 4, 0, NULL);
+ sfp->accept = DefaultButton (c, "Accept", SegregateByFeatureOrDescriptor_Callback);
+ SetObjectExtra (sfp->accept, sfp, NULL);
+ PushButton (c, "Cancel", StdCancelButtonProc);
+
+ /* Line things up nicely */
+
+ AlignObjects (ALIGN_LEFT, (HANDLE) g, (HANDLE) c, (HANDLE) h, NULL);
+
+ /* Display the window now */
+
+ RealizeWindow (w);
+ Show (w);
+ Select (w);
+ Select (sfp->accept);
+ Update ();
+ return OM_MSG_RET_OK;
+}
+
+extern Int2 LIBCALLBACK CreateSegregateByFeatureWindow (Pointer data)
+{
+ return CreateSegregateByFeatureOrDescriptorWindow (data, TRUE);
+}
+
+extern Int2 LIBCALLBACK CreateSegregateByDescriptorWindow (Pointer data)
+{
+ return CreateSegregateByFeatureOrDescriptorWindow (data, FALSE);
+}
+
static CharPtr SaveOrReplaceStringCopy (ConvertFormPtr cfp, CharPtr str, CharPtr current)
{
@@ -2919,7 +3663,7 @@ static void ConvertFromFlatFile (Uint2 entityID, SeqEntryPtr sep, ConvertFormPtr
bssp = (BioseqSetPtr) sep->data.ptrvalue;
} else return;
- ajp = asn2gnbk_setup (bsp, bssp, NULL, format, SEQUIN_MODE, NORMAL_STYLE, 0, 0, 0, NULL);
+ ajp = asn2gnbk_setup (bsp, bssp, NULL, (FmtType)format, SEQUIN_MODE, NORMAL_STYLE, 0, 0, 0, NULL);
if (ajp != NULL) {
goOn = TRUE;
for (index = 0; index < ajp->numParagraphs && goOn; index++) {
@@ -3618,7 +4362,7 @@ static void BuildParseToAnywhereDialog (IteM i, Int4 parsetype)
SetObjectExtra (w, cfp, CleanupParseForm);
cfp->form = (ForM) w;
cfp->formmessage = ConvertMessageProc;
- cfp->target_alist = segregate_target_field_alist;
+ cfp->target_alist = parse_target_field_alist;
cfp->set_accept_proc = (PupActnProc) SetSegregateAcceptButton;
sepp = (StdEditorProcsPtr) GetAppProperty ("StdEditorForm");
@@ -3688,7 +4432,7 @@ static void BuildParseToAnywhereDialog (IteM i, Int4 parsetype)
x = HiddenGroup (p, 0, 0, NULL);
- for (j = 1; j <= NUMBER_OF_SEGREGATE_TYPES; j++) {
+ for (j = 1; j <= NUMBER_OF_PARSE_TYPES; j++) {
if (j == ORGMOD_SUBSOURCE_TYPE) {
cfp->subtarget [j] = (PopuP) SingleList (x, 10, 8, (LstActnProc) cfp->set_accept_proc);
SetObjectExtra (cfp->subtarget [j], cfp, NULL);
diff --git a/sequin/sequin7.c b/sequin/sequin7.c
index 7b8c7e92..63218505 100644
--- a/sequin/sequin7.c
+++ b/sequin/sequin7.c
@@ -29,7 +29,7 @@
*
* Version Creation Date: 1/3/98
*
-* $Revision: 6.144 $
+* $Revision: 6.148 $
*
* File Description:
*
@@ -7195,9 +7195,11 @@ static Boolean CDSMeetsStringConstraint (SeqFeatPtr sfp,
extern Boolean MeetsStringConstraint (SeqFeatPtr sfp,
CharPtr findThisStr)
{
- GBQualPtr gbqp;
- GeneRefPtr grp;
- RnaRefPtr rrp;
+ GBQualPtr gbqp;
+ GeneRefPtr grp;
+ RnaRefPtr rrp;
+ SeqMgrFeatContext context;
+ Boolean have_context = FALSE;
/* If no string constraint, then everyone matches */
@@ -7226,6 +7228,15 @@ extern Boolean MeetsStringConstraint (SeqFeatPtr sfp,
gbqp = gbqp->next;
}
+ if (SeqMgrGetDesiredFeature (sfp->idx.entityID, NULL, 0, 0, sfp, &context) != NULL)
+ {
+ if (StringISearch (context.label, findThisStr))
+ {
+ return TRUE;
+ }
+ have_context = TRUE;
+ }
+
if (sfp->data.choice == SEQFEAT_GENE)
{
grp = sfp->data.value.ptrvalue;
@@ -7250,6 +7261,15 @@ extern Boolean MeetsStringConstraint (SeqFeatPtr sfp,
if (StringISearch ((CharPtr) rrp->ext.value.ptrvalue, findThisStr))
return TRUE;
}
+ else if (rrp->type == 3 && rrp->ext.choice == 2 && have_context)
+ {
+ /* look for the label as it appears to the user */
+ if (StringNCmp(findThisStr, "tRNA-", 5) == 0
+ && StringISearch (context.label, findThisStr + 5))
+ {
+ return TRUE;
+ }
+ }
}
/* If we got to here, then the string constraint was not found */
@@ -8123,7 +8143,7 @@ static void MarkProteinCallback (SeqEntryPtr sep, Pointer mydata, Int4 index, In
}
}
-extern void RemoveProteins (IteM i)
+extern void RemoveProteinsAndOptionallyRenormalize (IteM i, Boolean renormalize)
{
BaseFormPtr bfp;
@@ -8167,12 +8187,26 @@ extern void RemoveProteins (IteM i)
ValNodeFree (vnp);
SeqMgrLinkSeqEntry (sep, parenttype, parentptr);
RestoreSeqEntryObjMgrData (sep, omdptop, &omdata);
+ if (renormalize)
+ {
+ RenormalizeNucProtSets (sep, TRUE);
+ }
ObjMgrSetDirtyFlag (bfp->input_entityID, TRUE);
ObjMgrSendMsg (OM_MSG_UPDATE, bfp->input_entityID, 0, 0);
ObjMgrDeSelect (0, 0, 0, 0, NULL);
Update ();
}
+extern void RemoveProteins (IteM i)
+{
+ RemoveProteinsAndOptionallyRenormalize (i, FALSE);
+}
+
+extern void RemoveProteinsAndRenormalize (IteM i)
+{
+ RemoveProteinsAndOptionallyRenormalize (i, TRUE);
+}
+
#define EDIT_FIVE_PRIME 1
#define EDIT_THREE_PRIME 2
@@ -9009,7 +9043,7 @@ WriteAlignmentInterleaveToFile
MemSet (printed_line, ' ', printed_line_len - 2);
label_pos = alnlabels + (row - 1) * (label_len + 1) * sizeof (Char);
MemCpy (printed_line, label_pos, StringLen (label_pos));
- AlignmentIntervalToString (salp, row, start, stop, 1, FALSE,
+ AlignmentIntervalToString (salp, row, start, stop, 1, TRUE,
seqbuf, alnbuf, &alnbuf_len);
MemCpy (printed_line + label_len + 1, alnbuf, alnbuf_len);
fprintf (fp, printed_line);
@@ -9028,8 +9062,95 @@ WriteAlignmentInterleaveToFile
}
}
+static void WriteAlignmentContiguousToFile
+(SeqAlignPtr salp,
+ FILE *fp)
+{
+ Int4 num_segments;
+ SeqAlignPtr tmp_salp;
+ Int4 idx;
+ CharPtr PNTR alnlabels = NULL;
+ Int4Ptr label_len = NULL;
+ Int4Ptr aln_len = NULL;
+ Uint1Ptr alnbuf = NULL;
+ Uint1Ptr seqbuf = NULL;
+ CharPtr printed_line = NULL;
+ Int4 alnbuf_len;
+ Int4 printed_line_len;
+ CharPtr label_pos;
+ Int4 row, start, stop;
+ Int4 seq_chars_per_row = 80;
+
+ if (salp == NULL || fp == NULL) return;
+
+ num_segments = 0;
+ for (tmp_salp = salp; tmp_salp != NULL; tmp_salp = tmp_salp->next)
+ {
+ num_segments++;
+ }
+
+
+ /* get labels and lengths for all segments */
+ alnlabels = (CharPtr PNTR) MemNew (sizeof (CharPtr) * num_segments);
+ label_len = (Int4Ptr) MemNew (sizeof (Int4) * num_segments);
+ aln_len = (Int4Ptr) MemNew (sizeof (Int4) * num_segments);
+ if (alnlabels != NULL && label_len != NULL && aln_len != NULL)
+ {
+ for (tmp_salp = salp, idx = 0; tmp_salp != NULL, idx < num_segments; tmp_salp = tmp_salp->next, idx++)
+ {
+ alnlabels [idx] = GetSeqAlignLabels (tmp_salp, &label_len[idx]);
+ aln_len [idx]= AlnMgr2GetAlnLength(tmp_salp, FALSE);
+
+ }
+
+ /* get buffers */
+ alnbuf = (Uint1Ptr) MemNew (seq_chars_per_row * sizeof (Uint1));
+ seqbuf = (Uint1Ptr) MemNew (seq_chars_per_row * sizeof (Uint1));
+ printed_line_len = seq_chars_per_row + 3;
+ printed_line = (CharPtr) MemNew (printed_line_len * sizeof (Char));
+ if (alnbuf != NULL && seqbuf != NULL && printed_line != NULL) {
+ printed_line [ printed_line_len - 1] = 0;
+ printed_line [ printed_line_len - 2] = '\n';
+
+ for (row = 1; row <= salp->dim; row++) {
+ if (salp->next != NULL)
+ {
+ fprintf (fp, "[\n");
+ }
+ for (tmp_salp = salp, idx = 0; tmp_salp != NULL, idx < num_segments; tmp_salp = tmp_salp->next, idx++)
+ {
+ label_pos = alnlabels [idx] + (row - 1) * (label_len[idx] + 1) * sizeof (Char);
+ fprintf (fp, ">%s\n", label_pos);
+ start = 0;
+ stop = seq_chars_per_row - 1;
+ while (start < aln_len [idx]) {
+ MemSet (printed_line, ' ', printed_line_len - 2);
+ AlignmentIntervalToString (tmp_salp, row, start, stop, 1, TRUE,
+ seqbuf, alnbuf, &alnbuf_len);
+ MemCpy (printed_line, alnbuf, alnbuf_len);
+ fprintf (fp, printed_line);
+ start = stop + 1;
+ stop += seq_chars_per_row;
+ }
+ fprintf (fp, "\n");
+ }
+ if (salp->next != NULL)
+ {
+ fprintf (fp, "]\n");
+ }
+ }
+ }
+ MemFree (alnbuf);
+ MemFree (seqbuf);
+ MemFree (printed_line);
+ }
+ MemFree (label_len);
+ MemFree (alnlabels);
+ MemFree (aln_len);
+}
+
static void
-WriteAlignmentContiguousToFile
+OldWriteAlignmentContiguousToFile
(SeqAlignPtr salp,
FILE *fp)
{
@@ -9085,26 +9206,83 @@ WriteAlignmentContiguousToFile
}
}
+static SetAlignmentDim (SeqAlignPtr salp)
+{
+ AMAlignIndex2Ptr amaip;
+ DenseSegPtr dsp;
+
+ if (salp == NULL || salp->dim > 0 || salp->saip == NULL) return;
+
+ if (salp->saip->indextype == INDEX_PARENT)
+ {
+ amaip = (AMAlignIndex2Ptr)(salp->saip);
+ salp->dim = amaip->sharedaln->dim;
+ }
+ else if (salp->saip->indextype == INDEX_CHILD)
+ {
+ dsp = (DenseSegPtr)(salp->segs);
+ salp->dim = dsp->dim;
+ }
+}
+
+static void IndexAlignmentSet (SeqAlignPtr salp)
+{
+ SeqAlignPtr tmp_salp, next_salp;
+
+ if (salp == NULL || salp->saip != NULL) return;
+
+ if (salp->next != NULL && salp->dim > 2)
+ {
+ for (tmp_salp = salp; tmp_salp != NULL; tmp_salp = tmp_salp->next)
+ {
+ next_salp = tmp_salp->next;
+ tmp_salp->next = NULL;
+ if (tmp_salp->segtype == SAS_DENSEG && tmp_salp->next == NULL) {
+ AlnMgr2IndexSingleChildSeqAlign(tmp_salp);
+ } else {
+ AlnMgr2IndexSeqAlign(tmp_salp);
+ }
+ SetAlignmentDim (tmp_salp);
+ tmp_salp->next = next_salp;
+ }
+ }
+ else
+ {
+ if (salp->segtype == SAS_DENSEG && salp->next == NULL) {
+ AlnMgr2IndexSingleChildSeqAlign(salp);
+ } else {
+ AlnMgr2IndexSeqAlign(salp);
+ }
+ SetAlignmentDim (salp);
+ }
+}
+
static void WriteSeqEntryAlignmentToFile (SeqEntryPtr sep, FILE *fp, Boolean Interleave)
{
BioseqSetPtr bssp;
SeqAnnotPtr sap;
- SeqAlignPtr salp;
+ SeqAlignPtr salp = NULL;
if (sep == NULL || ! IS_Bioseq_set (sep)) return;
bssp = (BioseqSetPtr) sep->data.ptrvalue;
if (bssp == NULL) return;
for (sap = bssp->annot; sap != NULL; sap = sap->next) {
if (sap->type == 2) {
- salp = (SeqAlignPtr) sap->data;
- if (salp->saip == NULL) {
- AlnMgr2IndexSingleChildSeqAlign (salp);
- }
+ salp = SeqAlignListDup((SeqAlignPtr) sap->data);
+ IndexAlignmentSet (salp);
+
if (Interleave) {
+ if (salp->next != NULL)
+ {
+ Message (MSG_ERROR, "Unable to write segmented alignments as interleave");
+ return;
+ }
WriteAlignmentInterleaveToFile (salp, fp);
} else {
WriteAlignmentContiguousToFile (salp, fp);
}
+ SeqAlignFree (salp);
+ salp = NULL;
}
}
diff --git a/sequin/sequin8.c b/sequin/sequin8.c
index 4030fd05..fd9ede6d 100644
--- a/sequin/sequin8.c
+++ b/sequin/sequin8.c
@@ -29,7 +29,7 @@
*
* Version Creation Date: 2/3/98
*
-* $Revision: 6.255 $
+* $Revision: 6.256 $
*
* File Description:
*
@@ -3343,6 +3343,27 @@ static int LIBCALLBACK SortMostUsedDescriptorsFirst (VoidPtr ptr1, VoidPtr ptr2)
}
}
+extern ValNodePtr BuildDescriptorValNodeList (void)
+{
+ Int4 j;
+ ValNodePtr vnp;
+ ValNodePtr head = NULL;
+
+ for (j = 1; descNames [j] != NULL; j++) {
+ if (StringHasNoText (descNames [j])) continue;
+ vnp = ValNodeNew (head);
+ if (head == NULL) {
+ head = vnp;
+ }
+ if (vnp != NULL) {
+ vnp->choice = j;
+ vnp->data.ptrvalue = StringSave (descNames [j]);
+ }
+ }
+ head = SortValNode (head, SortMostUsedDescriptorsFirst);
+ return head;
+}
+
static void RemoveAsnObject (IteM i, Boolean feature)
{
@@ -3413,18 +3434,7 @@ static void RemoveAsnObject (IteM i, Boolean feature)
if (feature) {
head = BuildFeatureValNodeList (TRUE, "All", ALL_FEATURES, TRUE, FALSE);
} else {
- for (j = 1; descNames [j] != NULL; j++) {
- if (StringHasNoText (descNames [j])) continue;
- vnp = ValNodeNew (head);
- if (head == NULL) {
- head = vnp;
- }
- if (vnp != NULL) {
- vnp->choice = j;
- vnp->data.ptrvalue = StringSave (descNames [j]);
- }
- }
- head = SortValNode (head, SortMostUsedDescriptorsFirst);
+ head = BuildDescriptorValNodeList();
}
if (head != NULL) {
diff --git a/sequin/sequin9.c b/sequin/sequin9.c
index 0f8a4845..802fb0c2 100644
--- a/sequin/sequin9.c
+++ b/sequin/sequin9.c
@@ -29,7 +29,7 @@
*
* Version Creation Date: 4/20/99
*
-* $Revision: 6.254 $
+* $Revision: 6.260 $
*
* File Description:
*
@@ -4445,7 +4445,8 @@ static TransTablePtr GetTranslationTable (CdRegionPtr crp, Boolean PNTR table_is
static CharPtr ExtendProtein5
(SeqFeatPtr sfp,
- Uint2 input_entityID)
+ Uint2 input_entityID,
+ Boolean force_partial)
{
CdRegionPtr crp;
TransTablePtr tbl = NULL;
@@ -4486,8 +4487,14 @@ static CharPtr ExtendProtein5
strand = SeqLocStrand (sfp->location);
sip = SeqLocId (sfp->location);
offset = -1;
+
+ start = GetOffsetInBioseq (test_slp, nucBsp, SEQLOC_START);
+ if (start == 0)
+ {
+ stop_looking = TRUE;
+ }
- while (! found_start && ! found_stop && ! stop_looking) {
+ while (((! found_start && ! found_stop) || force_partial) && ! stop_looking) {
start = GetOffsetInBioseq (test_slp, nucBsp, SEQLOC_START);
stop = GetOffsetInBioseq (test_slp, nucBsp, SEQLOC_STOP);
if (strand == Seq_strand_minus) {
@@ -4537,7 +4544,7 @@ static CharPtr ExtendProtein5
MemFree (bases);
}
}
-
+
SeqLocFree (test_slp);
if (! found_stop) {
start = GetOffsetInBioseq (sfp->location, nucBsp, SEQLOC_START);
@@ -4552,12 +4559,16 @@ static CharPtr ExtendProtein5
CheckSeqLocForPartial (sfp->location, &partial5, &partial3);
SetSeqLocPartial (sfp->location, TRUE, partial3);
sfp->partial = TRUE;
+ if (crp->frame == 0)
+ {
+ crp->frame = 1;
+ }
if (strand == Seq_strand_minus) {
sfp->location = ExpandSeqLoc (stop, nucBsp->length - 1, strand, nucBsp, sfp->location);
- crp->frame = (nucBsp->length - 1 - start) % 3 + 1;
+ crp->frame = (nucBsp->length - 1 - start + crp->frame - 1) % 3 + 1;
} else {
sfp->location = ExpandSeqLoc (0, stop, strand, nucBsp, sfp->location);
- crp->frame = start % 3 + 1;
+ crp->frame = (start + crp->frame - 1) % 3 + 1;
}
}
}
@@ -4573,7 +4584,8 @@ static CharPtr ExtendProtein5
static CharPtr ExtendProtein3
(SeqFeatPtr sfp,
- Uint2 input_entityID)
+ Uint2 input_entityID,
+ Boolean force_partial)
{
BioseqPtr nucBsp;
Int4 max_stop, min_start, start, stop;
@@ -4609,7 +4621,14 @@ static CharPtr ExtendProtein3
contains_stop = FALSE;
contains_start = FALSE;
newprot = NULL;
- while (! contains_stop &&
+ /* need to initialize newprot in case we're already at the edge */
+ if ((strand != Seq_strand_minus && stop == max_stop)
+ || (strand == Seq_strand_minus && stop == min_start))
+ {
+ newprot = FixProteinString (sfp, strand, FALSE, &truncated,
+ &contains_start, &contains_stop);
+ }
+ while ((! contains_stop || force_partial) &&
( (strand == Seq_strand_minus && stop > min_start)
|| (strand != Seq_strand_minus && stop < max_stop)))
{
@@ -4634,7 +4653,7 @@ static CharPtr ExtendProtein3
&contains_start, &contains_stop);
}
- if (! contains_stop) {
+ if (! contains_stop || force_partial) {
start = GetOffsetInBioseq (sfp->location, nucBsp, SEQLOC_START);
stop = GetOffsetInBioseq (sfp->location, nucBsp, SEQLOC_STOP);
if (strand == Seq_strand_minus) {
@@ -4673,6 +4692,8 @@ PrepareUpdatePtrForProtein
Uint1 strand;
SeqLocPtr newloc;
BioseqPtr nucBsp;
+ Boolean partial5, partial3;
+
if (sfp == NULL
|| sfp->idx.subtype != FEATDEF_CDS
@@ -4683,7 +4704,9 @@ PrepareUpdatePtrForProtein
{
return NULL;
}
-
+
+ CheckSeqLocForPartial (sfp->location, &partial3, &partial5);
+
nucBsp = GetBioseqGivenSeqLoc (sfp->location, input_entityID);
if (nucBsp == NULL) return NULL;
newloc = SeqLocMerge (nucBsp, sfp->location, NULL, FALSE, FALSE, FALSE);
@@ -4701,17 +4724,18 @@ PrepareUpdatePtrForProtein
&contains_start, &contains_stop);
/* Must do 3' end first, otherwise may truncate at stops introduced by expanding 5' end for partiality */
- if (! contains_stop && extend_proteins3 && transl_except_len == 0) {
+ if ((! contains_stop && extend_proteins3 && transl_except_len == 0)
+ || ((extend_proteins3 || partial3) && !truncate_proteins)) {
MemFree (newprot);
- newprot = ExtendProtein3 (sfp, input_entityID);
+ newprot = ExtendProtein3 (sfp, input_entityID, partial3 && !truncate_proteins);
if (newprot == NULL) return NULL;
*extended3 = TRUE;
} else {
*extended3 = FALSE;
}
- if (! contains_start && extend_proteins5) {
+ if (! contains_start && (extend_proteins5 || partial5)) {
MemFree (newprot);
- newprot = ExtendProtein5 (sfp, input_entityID);
+ newprot = ExtendProtein5 (sfp, input_entityID, partial5);
if (newprot == NULL) return NULL;
*extended5 = TRUE;
} else {
@@ -6818,7 +6842,7 @@ static void DetermineButtonState (UpsDataPtr udp,
/* Replace */
- else if (udp->new5 >= udp->old5 && udp->new3 >= udp->old3) {
+ else {
SetValue (udp->rmc, 1);
Disable (*extend5ButtonPtr);
Disable (*extend3ButtonPtr);
@@ -6827,7 +6851,7 @@ static void DetermineButtonState (UpsDataPtr udp,
}
/* Patch */
-
+/* This section removed - do not set patch as a default
else if (udp->new5 <= udp->old5 && udp->new3 <= udp->old3) {
SetValue (udp->rmc, 4);
Disable (*extend5ButtonPtr);
@@ -6836,14 +6860,13 @@ static void DetermineButtonState (UpsDataPtr udp,
udp->recomb2 = udp->aln_length;
/* If patch sequence matches, must be feature propagation only */
-
- if (StringNICmp (udp->seq1 + udp->old5 - udp->new5,
+/* if (StringNICmp (udp->seq1 + udp->old5 - udp->new5,
udp->seq2,
StringLen (udp->seq2)) == 0) {
SetValue (udp->sfb, 2);
Disable (udp->sfb);
}
- }
+ } */
/* If no features, must be sequence update only */
@@ -7561,7 +7584,7 @@ static Int2 UpdateNextBioseqInFastaSet (UpsDataPtr udp)
/* */
/*=====================================================================*/
-extern void UpdateFastaSetEx (IteM i, Boolean use_new_blast)
+static void UpdateFastaSetEx (IteM i, Boolean use_new_blast)
{
BaseFormPtr bfp;
FILE *fp;
@@ -7788,13 +7811,13 @@ extern void NewExtendSequence (IteM i)
NewUpdateOrExtendSequence (i, FALSE, FALSE);
}
-extern void NewExtendSequenceNewBlast (IteM i)
+static void NewExtendSequenceNewBlast (IteM i)
{
NewUpdateOrExtendSequence (i, FALSE, TRUE);
}
-extern void UpdateSeqAfterDownloadEx
+static void UpdateSeqAfterDownloadEx
(BaseFormPtr bfp,
BioseqPtr oldbsp,
BioseqPtr newbsp,
@@ -7870,7 +7893,7 @@ extern void UpdateSeqAfterDownload
UpdateSeqAfterDownloadEx (bfp, oldbsp, newbsp, FALSE);
}
-extern void ExtendSeqAfterDownloadEx
+static void ExtendSeqAfterDownloadEx
(BaseFormPtr bfp,
BioseqPtr oldbsp,
BioseqPtr newbsp,
@@ -9018,6 +9041,7 @@ static ForM FeaturePropagateForm (
fdp->transPast = CheckBox (g, "Translate CDS after partial 3' boundary", NULL);
fdp->fixCDS = CheckBox (g, "Cleanup CDS partials after propagation", NULL);
+ SetStatus (fdp->fixCDS, TRUE);
fdp->fuseJoints = CheckBox (g, "Fuse adjacent propagated intervals", NULL);
SetStatus (fdp->fuseJoints, TRUE);
@@ -10106,11 +10130,6 @@ NLM_EXTERN SeqAlignPtr Sqn_LocalAlign2SeqEx (BioseqPtr bsp1, BioseqPtr bsp2, Boo
return sap_final;
}
-NLM_EXTERN SeqAlignPtr Sqn_LocalAlign2Seq (BioseqPtr bsp1, BioseqPtr bsp2, BoolPtr revcomp)
-{
- return Sqn_LocalAlign2SeqEx (bsp1, bsp2, revcomp, FALSE);
-}
-
/* End of implementation of the new BLAST library .
*/
diff --git a/tools/blast.c b/tools/blast.c
index 991f1d63..2957847c 100644
--- a/tools/blast.c
+++ b/tools/blast.c
@@ -1,6 +1,6 @@
-static char const rcsid[] = "$Id: blast.c,v 6.405 2004/04/28 14:37:06 madden Exp $";
+static char const rcsid[] = "$Id: blast.c,v 6.406 2004/05/21 13:53:37 dondosha Exp $";
-/* $Id: blast.c,v 6.405 2004/04/28 14:37:06 madden Exp $
+/* $Id: blast.c,v 6.406 2004/05/21 13:53:37 dondosha Exp $
* ===========================================================================
*
* PUBLIC DOMAIN NOTICE
@@ -49,9 +49,12 @@ Detailed Contents:
further manipulation.
******************************************************************************
- * $Revision: 6.405 $
+ * $Revision: 6.406 $
*
* $Log: blast.c,v $
+ * Revision 6.406 2004/05/21 13:53:37 dondosha
+ * Fix in BLASTMergeHitLists
+ *
* Revision 6.405 2004/04/28 14:37:06 madden
* Changes from Mike Gertz
* - modified the link_hsps routine to apply the gap_prob parameter to
@@ -2709,7 +2712,7 @@ CheckForRequiredRegion (BlastSearchBlkPtr search, Boolean strict)
if (hsp->query.offset > search->required_start ||
hsp->query.end < search->required_end)
{
- hsp_array[index] = MemFree(hsp_array[index]);
+ hsp_array[index] = BLAST_HSPFree(hsp_array[index]);
}
else
{
@@ -2727,7 +2730,7 @@ CheckForRequiredRegion (BlastSearchBlkPtr search, Boolean strict)
}
else
{
- hsp_array[index] = MemFree(hsp_array[index]);
+ hsp_array[index] = BLAST_HSPFree(hsp_array[index]);
}
@@ -2980,7 +2983,7 @@ BlastReevaluateWithAmbiguities (BlastSearchBlkPtr search, Int4 sequence_number)
}
else
{ /* Delete if this is now below the cutoff score. */
- hsp_array[index] = MemFree(hsp_array[index]);
+ hsp_array[index] = BLAST_HSPFree(hsp_array[index]);
}
if (StringCmp(search->prog_name, "blastn") != 0)
@@ -6108,11 +6111,10 @@ static BLAST_HitListPtr
BLASTMergeHitLists(BlastSearchBlkPtr search, BLAST_HitListPtr hitlist1,
BLAST_HitListPtr hitlist2, Int4 start, Boolean merge_hsps)
{
- BLAST_HSPPtr hsp, PNTR hspp1, PNTR hspp2;
+ BLAST_HSPPtr hsp, hsp_var, PNTR hspp1, PNTR hspp2;
Int4 index, index1, index2;
Int4 hspcnt1, hspcnt2, new_hspcnt = 0;
BLAST_HSPPtr PNTR new_hsp_array;
- Int4Ptr index_array1, index_array2;
if (hitlist1 == NULL) {
hitlist1 = (BLAST_HitListPtr)
@@ -6129,27 +6131,31 @@ BLASTMergeHitLists(BlastSearchBlkPtr search, BLAST_HitListPtr hitlist1,
}
hspcnt1 = hspcnt2 = 0;
- hspp1 = (BLAST_HSPPtr PNTR) MemNew(hitlist1->hspcnt*sizeof(BLAST_HSPPtr));
- hspp2 = (BLAST_HSPPtr PNTR) MemNew(hitlist2->hspcnt*sizeof(BLAST_HSPPtr));
- index_array1 = (Int4Ptr) MemNew(hitlist1->hspcnt*sizeof(Int4));
- index_array2 = (Int4Ptr) MemNew(hitlist2->hspcnt*sizeof(Int4));
- for (index=0; index<hitlist1->hspcnt; index++) {
+ /* Put all HSPs that intersect the overlap region at the front of the
+ respective HSP arrays. */
+ for (index = 0; index < hitlist1->hspcnt; index++) {
hsp = hitlist1->hsp_array[index];
if (hsp->subject.end > start) {
- index_array1[hspcnt1] = index;
- hspp1[hspcnt1++] = hsp;
- } else
- new_hspcnt++;
+ /* At least part of this HSP lies in the overlap strip. */
+ hsp_var = hitlist1->hsp_array[hspcnt1];
+ hitlist1->hsp_array[hspcnt1] = hsp;
+ hitlist1->hsp_array[index] = hsp_var;
+ ++hspcnt1;
+ }
}
- for (index=0; index<hitlist2->hspcnt; index++) {
+ for (index = 0; index < hitlist2->hspcnt; index++) {
hsp = hitlist2->hsp_array[index];
if (hsp->subject.offset < start + DBSEQ_CHUNK_OVERLAP) {
- index_array2[hspcnt2] = index;
- hspp2[hspcnt2++] = hsp;
- } else
- new_hspcnt++;
+ /* At least part of this HSP lies in the overlap strip. */
+ hsp_var = hitlist2->hsp_array[hspcnt2];
+ hitlist2->hsp_array[hspcnt2] = hsp;
+ hitlist2->hsp_array[index] = hsp_var;
+ ++hspcnt2;
+ }
}
+ hspp1 = hitlist1->hsp_array;
+ hspp2 = hitlist2->hsp_array;
HeapSort(hspp1, hspcnt1, sizeof(BLAST_HSPPtr), diag_compare_hsps);
HeapSort(hspp2, hspcnt2, sizeof(BLAST_HSPPtr), diag_compare_hsps);
@@ -6164,75 +6170,75 @@ BLASTMergeHitLists(BlastSearchBlkPtr search, BLAST_HitListPtr hitlist1,
if (merge_hsps) {
if (BLASTMergeHsps(search, hspp1[index], hspp2[index1],
start)) {
- /* Point the corresponding element of the full first HSP
- array to the new HSP */
- hitlist1->hsp_array[index_array1[index]] = hspp1[index];
- /* Free the corresponding element of the full second
- HSP array. */
- hitlist2->hsp_array[index_array2[index1]] =
- hspp2[index1] = BLAST_HSPFree(hspp2[index1]);
- break;
+ /* Free the second HSP. */
+ hspp2[index1] = BLAST_HSPFree(hspp2[index1]);
}
} else { /* No gap information available */
if (BLASTHspContained(hspp1[index], hspp2[index1])) {
- hspp1[index] = MemFree(hspp1[index]);
- /* Point the corresponding element of the full first HSP
- array to the new HSP; free the element of the second
- array. */
- hitlist1->hsp_array[index_array1[index]] = hspp2[index1];
- hitlist2->hsp_array[index_array2[index1]] =
- hspp2[index1] = NULL;
+ /* Point the first HSP to the new HSP; */
+ hspp1[index] = BLAST_HSPFree(hspp1[index]);
+ hspp1[index] = hspp2[index1];
+ hspp2[index1] = NULL;
+ /* This HSP has been removed, so break out of the inner
+ loop */
+ break;
} else if (BLASTHspContained(hspp2[index1], hspp1[index])) {
- /* Just free the corresponding element of the second
- HSP array */
- hitlist2->hsp_array[index_array2[index1]] =
- hspp2[index1] = BLAST_HSPFree(hspp2[index1]);
+ hspp2[index1] = BLAST_HSPFree(hspp2[index1]);
}
}
+ } else {
+ /* This and remaining HSPs are too far from the one being
+ checked */
+ break;
}
}
}
- new_hspcnt += hspcnt1;
- for (index=0; index<hspcnt2; index++) {
- if (hspp2[index] != NULL)
- new_hspcnt++;
- }
-
- hspp1 = MemFree(hspp1);
- hspp2 = MemFree(hspp2);
- index_array1 = MemFree(index_array1);
- index_array2 = MemFree(index_array2);
+ HspArrayPurge(hitlist2->hsp_array, hitlist2->hspcnt, FALSE);
+ /* The new number of HSPs is now the sum of the remaining counts in the
+ two lists, but if there is a restriction on the number of HSPs to keep,
+ it might have to be reduced. */
+ new_hspcnt = hitlist2->hspcnt + hitlist1->hspcnt;
+ if (search->pbp->hsp_num_max)
+ new_hspcnt = MIN(new_hspcnt, search->pbp->hsp_num_max);
+
if (new_hspcnt >= hitlist1->hspmax-1 && hitlist1->do_not_reallocate == FALSE) {
- new_hsp_array = (BLAST_HSPPtr PNTR) Realloc(hitlist1->hsp_array, new_hspcnt*2*sizeof(BLAST_HSPPtr));
+ Int4 new_allocated = 2*new_hspcnt;
+ if (search->pbp->hsp_num_max)
+ new_allocated = MIN(new_allocated, search->pbp->hsp_num_max);
+ new_hsp_array = (BLAST_HSPPtr PNTR)
+ Realloc(hitlist1->hsp_array, new_allocated*sizeof(BLAST_HSPPtr));
if (new_hsp_array == NULL) {
ErrPostEx(SEV_WARNING, 0, 0, "UNABLE to reallocate in BlastSaveCurrentHsp for ordinal id %ld, continuing with fixed array of %ld HSP's", (long) search->subject_id, (long) hitlist1->hspmax);
hitlist1->do_not_reallocate = TRUE;
} else {
hitlist1->hsp_array = new_hsp_array;
- hitlist1->hspmax = 2*new_hspcnt;
+ hitlist1->hspmax = new_allocated;
}
+ new_hspcnt = MIN(new_hspcnt, hitlist1->hspmax);
}
- if (new_hspcnt <= hitlist1->hspmax) {
- /* Capacity is enough to save all HSPs from both arrays */
+ if (new_hspcnt >= hitlist2->hspcnt + hitlist1->hspcnt) {
+ /* All HSPs from both arrays are saved */
for (index=hitlist1->hspcnt, index1=0;
index1<hitlist2->hspcnt; index1++) {
if (hitlist2->hsp_array[index1] != NULL)
hitlist1->hsp_array[index++] = hitlist2->hsp_array[index1];
}
} else {
- /* All HSPs cannot be saved; sort both arrays by score and save only
- hspmax best ones */
- new_hsp_array = (BLAST_HSPPtr PNTR)
- Malloc(hitlist1->hspmax*sizeof(BLAST_HSPPtr));
- HeapSort(hitlist1->hsp_array, hitlist1->hspcnt, sizeof(BLAST_HSPPtr),
- score_compare_hsps);
- HeapSort(hitlist2->hsp_array, hitlist2->hspcnt, sizeof(BLAST_HSPPtr),
+ /* Not all HSPs are be saved; sort both arrays by score and save only
+ the new_hspcnt best ones.
+ For the merged set of HSPs, allocate array the same size as in the
+ old HSP list. */
+ new_hsp_array = (BLAST_HSP**)
+ malloc(hitlist1->hspmax*sizeof(BLAST_HSP*));
+ HeapSort(hitlist1->hsp_array, hitlist1->hspcnt,
+ sizeof(BLAST_HSP*), score_compare_hsps);
+ HeapSort(hitlist2->hsp_array, hitlist2->hspcnt, sizeof(BLAST_HSP*),
score_compare_hsps);
index1 = index2 = 0;
- for (index = 0; index < hitlist1->hspmax; ++index) {
+ for (index = 0; index < new_hspcnt; ++index) {
if (index1 < hitlist1->hspcnt &&
(index2 >= hitlist2->hspcnt ||
(hitlist1->hsp_array[index1]->score >=
@@ -6254,11 +6260,12 @@ BLASTMergeHitLists(BlastSearchBlkPtr search, BLAST_HitListPtr hitlist1,
BLAST_HSPFree(hitlist2->hsp_array[index2]);
}
/* Point hitlist1's HSP array to the new one */
+ hitlist1->hsp_array = (BLAST_HSP**) MemFree(hitlist1->hsp_array);
hitlist1->hsp_array = new_hsp_array;
}
hitlist1->hspcnt = index;
- /* Second hitlist now does not own any HSPs at all */
+ /* Second HSP list now does not own any HSPs */
hitlist2->hspcnt = 0;
return hitlist1;
@@ -9553,6 +9560,17 @@ int LIBCALLBACK RPSResultHspScoreCmp(VoidPtr v1, VoidPtr v2)
return 1;
else if (h1->score > h2->score)
return -1;
+
+ if( h1->subject_offset < h2->subject_offset )
+ return 1;
+ if( h1->subject_offset > h2->subject_offset )
+ return -1;
+
+ if( h1->subject_length < h2->subject_length )
+ return 1;
+ if( h1->subject_length > h2->subject_length )
+ return -1;
+
else return 0;
}
/*
diff --git a/tools/blastkar.c b/tools/blastkar.c
index 8132fd91..a98472aa 100644
--- a/tools/blastkar.c
+++ b/tools/blastkar.c
@@ -1,4 +1,4 @@
-static char const rcsid[] = "$Id: blastkar.c,v 6.100 2004/04/28 14:36:00 madden Exp $";
+static char const rcsid[] = "$Id: blastkar.c,v 6.101 2004/06/07 20:03:23 coulouri Exp $";
/* ===========================================================================
*
@@ -49,8 +49,11 @@ Detailed Contents:
- calculate pseuod-scores from p-values.
******************************************************************************
- * $Revision: 6.100 $
+ * $Revision: 6.101 $
* $Log: blastkar.c,v $
+ * Revision 6.101 2004/06/07 20:03:23 coulouri
+ * use floating point constants for comparisons with floating point variables
+ *
* Revision 6.100 2004/04/28 14:36:00 madden
* Changes from Mike Gertz:
* - I created the new routine BlastGapDecayDivisor that computes a
@@ -3209,7 +3212,7 @@ BlastKarlinLtoH(BLAST_ScoreFreqPtr sfp, Nlm_FloatHi lambda)
}
scale = Nlm_Powi( etonlam, high );
- if( scale > 0 ) {
+ if( scale > 0.0 ) {
H = lambda * sum/scale;
} else { /* Underflow of exp( -lambda * high ) */
H = lambda * exp( lambda * high + log(sum) );
@@ -3421,7 +3424,7 @@ BlastKarlinLHtoK(BLAST_ScoreFreqPtr sfp, Nlm_FloatHi lambda, Nlm_FloatHi H)
/* Look for the greatest common divisor ("delta" in Appendix of PNAS 87 of
Karlin&Altschul (1990) */
for (i = 1, divisor = -low; i <= range && divisor > 1; ++i) {
- if (probArrayStartLow[i])
+ if (probArrayStartLow[i] != 0.0)
divisor = Nlm_Gcd(divisor, i);
}
@@ -3691,7 +3694,7 @@ BlastKarlinLambdaNR(BLAST_ScoreFreqPtr sfp)
sprob = sfp->sprob;
/* Find greatest common divisor of all scores */
for (i = 1, d = -low; i <= high-low && d > 1; ++i) {
- if (sprob[i+low] != 0) {
+ if (sprob[i+low] != 0.0) {
d = Nlm_Gcd(d, i);
}
}
diff --git a/tools/blastool.c b/tools/blastool.c
index 2571f366..f5350854 100644
--- a/tools/blastool.c
+++ b/tools/blastool.c
@@ -1,4 +1,4 @@
-static char const rcsid[] = "$Id: blastool.c,v 6.264 2004/04/30 15:25:20 dondosha Exp $";
+static char const rcsid[] = "$Id: blastool.c,v 6.268 2004/05/21 13:53:04 dondosha Exp $";
/* ===========================================================================
*
@@ -34,8 +34,20 @@ Contents: Utilities for BLAST
******************************************************************************/
/*
-* $Revision: 6.264 $
+* $Revision: 6.268 $
* $Log: blastool.c,v $
+* Revision 6.268 2004/05/21 13:53:04 dondosha
+* Use BLAST_HSPFree to free BLAST_HSP structures, hence no need to call GapXEditBlockDelete in multiple places
+*
+* Revision 6.267 2004/05/14 15:38:11 dondosha
+* Use newly public function ScoreAndEvalueToBuffers from txalign.h instead of a static function
+*
+* Revision 6.266 2004/05/14 14:41:03 bealer
+* - Er. I mean .002, as per blastpgp.
+*
+* Revision 6.265 2004/05/14 14:39:45 bealer
+* - Adjust ethresh to .001 for PSI blast.
+*
* Revision 6.264 2004/04/30 15:25:20 dondosha
* Added argument in call to BXMLGetHspFromSeqAlign
*
@@ -4933,28 +4945,6 @@ BlastGetNumIdentical(Uint1Ptr query, Uint1Ptr subject, Int4 q_start,
return ident;
}
-static void ScoreAndEvalueToBuffers(FloatHi bit_score, FloatHi evalue,
- CharPtr *bit_score_buf, CharPtr *evalue_buf)
-{
- if (evalue < 1.0e-180)
- sprintf(*evalue_buf, "0.0");
- else if (evalue < 1.0e-99)
- sprintf(*evalue_buf, "%2.0le", evalue);
- else if (evalue < 0.0009)
- sprintf(*evalue_buf, "%3.1le", evalue);
- else if (evalue < 1.0)
- sprintf(*evalue_buf, "%4.3lf", evalue);
- else
- sprintf(*evalue_buf, "%5.1lf", evalue);
-
- if (bit_score > 9999)
- sprintf(*bit_score_buf, "%4.3le", bit_score);
- else if (bit_score > 99.9)
- sprintf(*bit_score_buf, "%4.1lf", bit_score);
- else
- sprintf(*bit_score_buf, "%4.2lf", bit_score);
-}
-
/*
Function to print results in tab-delimited format, given a SeqAlign list.
q_shift and s_shift are the offsets in query and subject in case of a
@@ -5006,7 +4996,8 @@ void BlastPrintTabularResults(SeqAlignPtr seqalign, BioseqPtr query_bsp,
Int4 numseg, num_gap_opens, num_mismatches, num_ident, score;
Int4 number, align_length, index, i;
Int4 q_start, q_end, s_start, s_end;
- CharPtr eval_buff, bit_score_buff;
+ Char bit_score_buff[10];
+ CharPtr eval_buff;
Boolean is_translated;
SeqIdPtr query_id, old_query_id = NULL, subject_id, old_subject_id = NULL;
BioseqPtr subject_bsp=NULL;
@@ -5022,8 +5013,6 @@ void BlastPrintTabularResults(SeqAlignPtr seqalign, BioseqPtr query_bsp,
is_translated = (StringCmp(blast_program, "blastn") &&
StringCmp(blast_program, "blastp"));
- eval_buff = Malloc(10);
- bit_score_buff = Malloc(10);
if (is_translated) {
asp = MemNew(sizeof(AlignSum));
asp->matrix = load_default_matrix();
@@ -5031,7 +5020,6 @@ void BlastPrintTabularResults(SeqAlignPtr seqalign, BioseqPtr query_bsp,
asp->ooframe = is_ooframe;
}
-
if (is_ungapped)
sap_tmp = SeqAlignNew();
@@ -5039,6 +5027,10 @@ void BlastPrintTabularResults(SeqAlignPtr seqalign, BioseqPtr query_bsp,
if (query_bsp)
query_id = query_bsp->id;
+ /* Evalue buffer is dynamically allocated to avoid compiler warnings
+ in calls to ScoreAndEvalueToBuffers. */
+ eval_buff = Malloc(10);
+
for (sap = seqalign; sap; sap = sap->next) {
if (query_slp)
query_id = TxGetQueryIdFromSeqAlign(sap);
@@ -5113,8 +5105,10 @@ void BlastPrintTabularResults(SeqAlignPtr seqalign, BioseqPtr query_bsp,
GetScoreAndEvalue(sap, &score, &bit_score, &evalue, &number);
+ /* Do not allow knocking off digit in evalue buffer, so parsers are
+ not confused. */
ScoreAndEvalueToBuffers(bit_score, evalue,
- &bit_score_buff, &eval_buff);
+ bit_score_buff, &eval_buff, FALSE);
/* Loop on segments within this seqalign (in ungapped case) */
while (TRUE) {
@@ -5158,7 +5152,7 @@ void BlastPrintTabularResults(SeqAlignPtr seqalign, BioseqPtr query_bsp,
sap_tmp->segs = ssp;
GetScoreAndEvalue(sap_tmp, &score, &bit_score, &evalue, &number);
ScoreAndEvalueToBuffers(bit_score, evalue,
- &bit_score_buff, &eval_buff);
+ bit_score_buff, &eval_buff, FALSE);
find_score_in_align(sap_tmp, 1, asp);
} else
find_score_in_align(sap, 1, asp);
@@ -5200,7 +5194,7 @@ void BlastPrintTabularResults(SeqAlignPtr seqalign, BioseqPtr query_bsp,
sap_tmp->segs = ddp;
GetScoreAndEvalue(sap_tmp, &score, &bit_score, &evalue, &number);
ScoreAndEvalueToBuffers(bit_score, evalue,
- &bit_score_buff, &eval_buff);
+ bit_score_buff, &eval_buff, FALSE);
align_length = ddp->len;
if (ddp->strands[0] == Seq_strand_minus) {
@@ -5254,6 +5248,8 @@ void BlastPrintTabularResults(SeqAlignPtr seqalign, BioseqPtr query_bsp,
}
}
+ eval_buff = MemFree(eval_buff);
+
if (is_ungapped)
sap_tmp = MemFree(sap_tmp);
@@ -5261,8 +5257,7 @@ void BlastPrintTabularResults(SeqAlignPtr seqalign, BioseqPtr query_bsp,
free_default_matrix(asp->matrix);
MemFree(asp);
}
- MemFree(eval_buff);
- MemFree(bit_score_buff);
+
BioseqUnlock(subject_bsp);
if (query_slp)
BioseqUnlock(query_bsp);
@@ -5280,7 +5275,8 @@ int LIBCALLBACK BlastPrintAlignInfo(VoidPtr srch)
Int4 num_mismatches, num_gap_opens, align_length, num_ident;
Uint1Ptr query_seq, subject_start=NULL, subject_seq, rev_subject=NULL;
FloatHi perc_ident, bit_score, evalue;
- Char eval_buff[10], bit_score_buff[10];
+ Char bit_score_buff[10];
+ CharPtr eval_buff = NULL;
Int4 length=0, query_length, subject_length=0, rev_subject_length=0;
Int4 q_start, q_end, s_start, s_end, q_shift=0, s_shift=0;
CharPtr subject_descr = NULL;
@@ -5510,12 +5506,6 @@ int LIBCALLBACK BlastPrintAlignInfo(VoidPtr srch)
subject_id = SeqIdSetFree(subject_id);
- for (index=0; index<hspcnt; index++) {
- if (search->current_hitlist->hsp_array[index] != NULL)
- search->current_hitlist->hsp_array[index]->gap_info =
- GapXEditBlockDelete(search->current_hitlist->hsp_array[index]->gap_info);
- }
-
if (is_translated || !search->pbp->gapped_calculation) {
asp = MemNew(sizeof(AlignSum));
asp->matrix = NULL;
@@ -5525,6 +5515,10 @@ int LIBCALLBACK BlastPrintAlignInfo(VoidPtr srch)
AdjustOffSetsInSeqAlign(seqalign, search->query_slp, subject_slp);
}
+ /* Evalue buffer is dynamically allocated to avoid compiler warnings
+ in calls to ScoreAndEvalueToBuffers. */
+ eval_buff = Malloc(10);
+
/* Now print the tab-delimited fields, using seqalign */
for (sap = seqalign; sap; sap = sap->next) {
perc_ident = 0;
@@ -5534,23 +5528,10 @@ int LIBCALLBACK BlastPrintAlignInfo(VoidPtr srch)
GetScoreAndEvalue(sap, &score, &bit_score, &evalue, &number);
- if (evalue < 1.0e-180)
- sprintf(eval_buff, "0.0");
- else if (evalue < 1.0e-99)
- sprintf(eval_buff, "%2.0le", evalue);
- else if (evalue < 0.0009)
- sprintf(eval_buff, "%3.1le", evalue);
- else if (evalue < 1.0)
- sprintf(eval_buff, "%4.3lf", evalue);
- else
- sprintf(eval_buff, "%5.1lf", evalue);
-
- if (bit_score > 9999)
- sprintf(bit_score_buff, "%4.3le", bit_score);
- else if (bit_score > 99.9)
- sprintf(bit_score_buff, "%4.1lf", bit_score);
- else
- sprintf(bit_score_buff, "%4.2lf", bit_score);
+ /* Do not allow knocking off digit in evalue buffer, so parsers are
+ not confused. */
+ ScoreAndEvalueToBuffers(bit_score, evalue,
+ bit_score_buff, &eval_buff, FALSE);
query_seq = search->context[search->first_context].query->sequence;
@@ -5647,6 +5628,8 @@ int LIBCALLBACK BlastPrintAlignInfo(VoidPtr srch)
q_end, s_start, s_end, eval_buff, bit_score_buff);
}
+ eval_buff = MemFree(eval_buff);
+
if (is_translated) {
free_default_matrix(asp->matrix);
MemFree(asp);
@@ -5705,7 +5688,8 @@ MegaBlastPrintAlignInfo(VoidPtr ptr)
BLAST_KarlinBlkPtr kbp;
Uint1Ptr query_seq, subject_seq = NULL;
FloatHi perc_ident, bit_score;
- Char eval_buff[10], bit_score_buff[10];
+ Char bit_score_buff[10];
+ CharPtr eval_buff = NULL;
GapXEditScriptPtr esp;
Int4 q_start, q_end, s_start, s_end, query_length, numseg;
Int4 q_off, q_shift = 0, s_off, s_shift = 0;
@@ -5771,12 +5755,14 @@ MegaBlastPrintAlignInfo(VoidPtr ptr)
/* Get offset shift if query is a subsequence */
q_shift = SeqLocStart(search->query_slp);
+ /* Evalue buffer is dynamically allocated to avoid compiler warnings
+ in calls to ScoreAndEvalueToBuffers. */
+ eval_buff = Malloc(10);
+
for (hsp_index=0; hsp_index<search->current_hitlist->hspcnt; hsp_index++) {
hsp = search->current_hitlist->hsp_array[hsp_index];
if (hsp==NULL || (search->pbp->cutoff_e > 0 &&
hsp->evalue > search->pbp->cutoff_e)) {
- hsp->gap_info =
- GapXEditBlockDelete(hsp->gap_info); /* Don't need it anymore */
continue;
}
context = hsp->context;
@@ -5847,8 +5833,6 @@ MegaBlastPrintAlignInfo(VoidPtr ptr)
hsp->evalue =
BlastKarlinStoE_simple(hsp->score, kbp, searchsp_eff);
if (hsp->evalue > search->pbp->cutoff_e) {
- hsp->gap_info =
- GapXEditBlockDelete(hsp->gap_info); /* Don't need it anymore */
continue;
}
}
@@ -5892,8 +5876,6 @@ MegaBlastPrintAlignInfo(VoidPtr ptr)
GXECollectDataForSeqalign(hsp->gap_info, hsp->gap_info->esp, numseg,
&start, &length, &strands,
&q_off, &s_off);
- hsp->gap_info =
- GapXEditBlockDelete(hsp->gap_info); /* Don't need it anymore */
if (start[0] < 0) {
length[0] += start[0];
@@ -5977,24 +5959,10 @@ MegaBlastPrintAlignInfo(VoidPtr ptr)
s_start += s_shift;
s_end += s_shift;
- if (hsp->evalue < 1.0e-180)
- sprintf(eval_buff, "0.0");
- else if (hsp->evalue < 1.0e-99)
- sprintf(eval_buff, "%2.0le", hsp->evalue);
- else if (hsp->evalue < 0.0009)
- sprintf(eval_buff, "%3.1le", hsp->evalue);
- else if (hsp->evalue < 1.0)
- sprintf(eval_buff, "%4.3lf", hsp->evalue);
- else
- sprintf(eval_buff, "%5.1lf", hsp->evalue);
-
- if (bit_score > 9999)
- sprintf(bit_score_buff, "%4.3le", bit_score);
- else if (bit_score > 99.9)
- sprintf(bit_score_buff, "%4.1lf", bit_score);
- else
- sprintf(bit_score_buff, "%4.2lf", bit_score);
-
+ /* Do not allow knocking off digit in evalue buffer, so parsers are
+ not confused. */
+ ScoreAndEvalueToBuffers(bit_score, hsp->evalue,
+ bit_score_buff, &eval_buff, FALSE);
if (print_sequences) {
if (numeric_sip_type) {
@@ -6043,6 +6011,8 @@ MegaBlastPrintAlignInfo(VoidPtr ptr)
MemFree(subject_buffer);
MemFree(subject_descr);
MemFree(buffer);
+ MemFree(eval_buff);
+
sip = SeqIdSetFree(sip);
fflush(fp);
return 0;
@@ -7029,7 +6999,7 @@ BLAST_Wizard(
/* set some defaults for backward compat. with blastcgicmd.cpp */
if(!StringCmp(service, "psi"))
- out->ethresh = 0.005;
+ out->ethresh = 0.002;
else if (!StringCmp(service, "rpsblast"))
out->is_rps_blast = TRUE;
@@ -7159,7 +7129,7 @@ BLAST_Wizard(
if(!strcmp(service, "psi")) {
out->ethresh = mask->ethresh ?
options->ethresh :
- 0.005;
+ 0.002;
out->tweak_parameters = mask->tweak_parameters ?
options->tweak_parameters :
TRUE;
diff --git a/tools/blastutl.c b/tools/blastutl.c
index 69225e4c..ba9bb737 100644
--- a/tools/blastutl.c
+++ b/tools/blastutl.c
@@ -1,4 +1,4 @@
-static char const rcsid[] = "$Id: blastutl.c,v 6.434 2004/04/22 16:40:32 dondosha Exp $";
+static char const rcsid[] = "$Id: blastutl.c,v 6.438 2004/06/01 20:34:06 dondosha Exp $";
/* ===========================================================================
*
@@ -32,12 +32,24 @@ Author: Tom Madden
Contents: Utilities for BLAST
-$Revision: 6.434 $
+$Revision: 6.438 $
******************************************************************************/
/*
*
* $Log: blastutl.c,v $
+* Revision 6.438 2004/06/01 20:34:06 dondosha
+* Fix in previous change; memory leak fix
+*
+* Revision 6.437 2004/05/27 17:36:24 dondosha
+* Minor fix for previous 2 changes
+*
+* Revision 6.436 2004/05/25 21:42:47 dondosha
+* Fix in previous change: in some cases edit block should not be freed when BLAST_HSP is freed
+*
+* Revision 6.435 2004/05/21 13:53:04 dondosha
+* Use BLAST_HSPFree to free BLAST_HSP structures, hence no need to call GapXEditBlockDelete in multiple places
+*
* Revision 6.434 2004/04/22 16:40:32 dondosha
* Set search->subject_id to correct ordinal id, needed for finding splice junctions in HSP links at traceback stage
*
@@ -4598,7 +4610,7 @@ BioseqBlastEngineCore(BlastSearchBlkPtr search, BLAST_OptionsBlkPtr options,
}
for (index=0; index<hspcnt; index++)
- MemFree(hspp[index]);
+ hspp[index] = MemFree(hspp[index]);
hspp = MemFree(hspp);
}
#endif /* Clustering hits */
@@ -6176,7 +6188,7 @@ BlastHitListDestruct(BLAST_HitListPtr hitlist)
for (index=0; index<hspcnt_max; index++)
{
- hsp_array[index] = MemFree(hsp_array[index]);
+ hsp_array[index] = BLAST_HSPFree(hsp_array[index]);
}
hitlist->hsp_array = MemFree(hsp_array);
@@ -7717,7 +7729,6 @@ CopyHSPToResultHsp(BLAST_KarlinBlkPtr kbp, BLAST_HSPPtr hsp, BLASTResultHspPtr r
result_hsp->subject_gapped_start = hsp->subject.gapped_start;
result_hsp->context = hsp->context;
result_hsp->gap_info = hsp->gap_info;
-
/* Not set in the other type of HSP? */
result_hsp->hspset_cnt = 0;
@@ -8111,12 +8122,14 @@ CheckGappedAlignmentsForOverlap(BlastSearchBlkPtr search, BLAST_HSPPtr *hsp_arra
{
if (hsp_array[index]->score > hsp_array[index+increment]->score)
{
- hsp_array[index+increment] = MemFree(hsp_array[index+increment]);
+ hsp_array[index+increment] =
+ BLAST_HSPFree(hsp_array[index+increment]);
increment++;
}
else
{
- hsp_array[index] = MemFree(hsp_array[index]);
+ hsp_array[index] =
+ BLAST_HSPFree(hsp_array[index]);
index++;
increment = 1;
}
@@ -8149,12 +8162,14 @@ CheckGappedAlignmentsForOverlap(BlastSearchBlkPtr search, BLAST_HSPPtr *hsp_arra
{
if (hsp_array[index]->score > hsp_array[index+increment]->score)
{
- hsp_array[index+increment] = MemFree(hsp_array[index+increment]);
+ hsp_array[index+increment] =
+ BLAST_HSPFree(hsp_array[index+increment]);
increment++;
}
else
{
- hsp_array[index] = MemFree(hsp_array[index]);
+ hsp_array[index] =
+ BLAST_HSPFree(hsp_array[index]);
index++;
increment = 1;
}
@@ -8464,7 +8479,7 @@ BlastGappedScoreInternal(BlastSearchBlkPtr search, Uint1Ptr subject, Int4 subjec
}
else
{ /* Contained within another HSP, delete. */
- hsp_array[index] = MemFree(hsp_array[index]);
+ hsp_array[index] = BLAST_HSPFree(hsp_array[index]);
}
}
helper = MemFree(helper);
@@ -8626,7 +8641,7 @@ BlastNtGappedScoreInternal(BlastSearchBlkPtr search, Uint1Ptr subject, Int4 subj
}
else
{ /* Contained within another HSP, delete. */
- hsp_array[index] = MemFree(hsp_array[index]);
+ hsp_array[index] = BLAST_HSPFree(hsp_array[index]);
}
}
helper = MemFree(helper);
@@ -9274,7 +9289,9 @@ RealBlastGetGappedAlignmentTraceback(BlastSearchBlkPtr search, Uint1Ptr subject,
} else {
query_id = search->query_id;
}
- CopyHSPToResultHsp(search->sbp->kbp_gap[search->first_context], hsp, &result_hsp);
+ CopyHSPToResultHsp(search->sbp->kbp_gap[search->first_context],
+ hsp, &result_hsp);
+
if (new_subject_seqid) {
if (search->pbp->explode_seqids)
seqid_tmp = gi_list;
@@ -9485,6 +9502,8 @@ SumBlastGetGappedAlignmentEx (BlastSearchBlkPtr search, Int4 hit_number, Boolean
high_score = hsp_array[index]->score;
CopyHSPToResultHsp(search->sbp->kbp_gap[search->first_context], hsp_array[index], &(result_hsp_array[index1]));
index1++;
+ /* Do not free edit block, just the
+ BLAST_HSP structure. */
hsp_array[index] = MemFree(hsp_array[index]);
}
}
@@ -9569,7 +9588,8 @@ BlastGetGapAlgnTbck (BlastSearchBlkPtr search, Int4 hit_number, Boolean reverse,
for (index=0; index<hspcnt; index++)
{
hsp_array[index] = MemNew(sizeof(BLAST_HSP));
- CopyResultHspToHSP(&(result_hitlist->hsp_array[index]), hsp_array[index]);
+ CopyResultHspToHSP(&(result_hitlist->hsp_array[index]),
+ hsp_array[index]);
}
HeapSort(hsp_array,hspcnt,sizeof(BLAST_HSPPtr), score_compare_hsps);
@@ -9601,16 +9621,24 @@ BlastGetGapAlgnTbck (BlastSearchBlkPtr search, Int4 hit_number, Boolean reverse,
current_evalue = hsp_array[index]->evalue;
if (high_score < hsp_array[index]->score)
high_score = hsp_array[index]->score;
+
CopyHSPToResultHsp(search->sbp->kbp_gap[search->first_context], hsp_array[index], &(result_hsp_array[index1]));
index1++;
+ /* Do not free edit block, just the BLAST_HSP
+ structure */
hsp_array[index] = MemFree(hsp_array[index]);
}
}
hsp_array = MemFree(hsp_array);
+ if (result_hitlist->hsp_array) {
+ /* Delete any edit blocks from a previous traceback. */
+ for (index=0; index< result_hitlist->hspcnt; ++index)
+ GapXEditBlockDelete(result_hitlist->hsp_array[index].gap_info);
+
+ MemFree(result_hitlist->hsp_array);
+ }
result_hitlist->hspcnt = index1;
- if (result_hitlist->hsp_array)
- MemFree(result_hitlist->hsp_array);
result_hitlist->hsp_array = result_hsp_array;
result_hitlist->best_evalue = current_evalue;
result_hitlist->high_score = high_score;
@@ -10185,7 +10213,7 @@ BlastHitListPurge(BLAST_HitListPtr hitlist)
hspcnt_max = hitlist->hspcnt_max;
for (index=0; index<hspcnt_max; index++) {
- hsp_array[index] = MemFree(hsp_array[index]);
+ hsp_array[index] = BLAST_HSPFree(hsp_array[index]);
}
hitlist->hspcnt = 0;
@@ -10298,7 +10326,7 @@ CheckHspOverlap (BLAST_HSPPtr PNTR hsp_array, BLAST_HSPPtr hsp2, Int4 hspcnt, Bo
}
else
{
- hsp_array[index] = MemFree(hsp_array[index]);
+ hsp_array[index] = BLAST_HSPFree(hsp_array[index]);
*hsp_deleted = TRUE;
}
}
@@ -10472,13 +10500,13 @@ bove.*/
{
if (new_index >= hspcnt)
{ /* this HSP is less significant than others on a full list.*/
- new_hsp = MemFree(new_hsp);
+ new_hsp = BLAST_HSPFree(new_hsp);
return;
}
else
{ /* Delete the last HPS on the list. */
hspcnt = --current_hitlist->hspcnt;
- hsp_array[hspcnt] = MemFree(hsp_array[hspcnt]);
+ hsp_array[hspcnt] = BLAST_HSPFree(hsp_array[hspcnt]);
}
}
current_hitlist->hspcnt++;
@@ -10781,7 +10809,7 @@ bove.*/
else
{ /* Delete the last HPS on the list. */
hspcnt = --current_hitlist->hspcnt;
- hsp_array[hspcnt] = MemFree(hsp_array[hspcnt]);
+ hsp_array[hspcnt] = BLAST_HSPFree(hsp_array[hspcnt]);
}
}
current_hitlist->hspcnt++;
diff --git a/tools/kappa.c b/tools/kappa.c
index 7a2e8398..e50c5bc8 100644
--- a/tools/kappa.c
+++ b/tools/kappa.c
@@ -1,6 +1,6 @@
-static char const rcsid[] = "$Id: kappa.c,v 6.39 2004/03/31 18:12:13 papadopo Exp $";
+static char const rcsid[] = "$Id: kappa.c,v 6.41 2004/06/14 21:11:05 papadopo Exp $";
-/* $Id: kappa.c,v 6.39 2004/03/31 18:12:13 papadopo Exp $
+/* $Id: kappa.c,v 6.41 2004/06/14 21:11:05 papadopo Exp $
* ==========================================================================
*
* PUBLIC DOMAIN NOTICE
@@ -34,9 +34,24 @@ Authors: Alejandro Schaffer, Mike Gertz
Contents: Utilities for doing Smith-Waterman alignments and adjusting
the scoring system for each match in blastpgp
- $Revision: 6.39 $
+ $Revision: 6.41 $
$Log: kappa.c,v $
+ Revision 6.41 2004/06/14 21:11:05 papadopo
+ From Michael Gertz:
+ - Added several casts where casts occur in blast_kappa.c. These casts
+ should have no real effect; the log of blast_kappa.c indicates that
+ they suppress compiler warnings.
+ - Changed the type of one variable that holds a score from
+ Nlm_FloatHi to Int4.
+ - moved the definition Kappa_ForbiddenRanges and relevant
+ routines earlier in the file.
+ - fixed some comments.
+ - made a few (~5) changes in whitespace.
+
+ Revision 6.40 2004/06/03 16:10:50 dondosha
+ Fix in Kappa_SearchParametersNew: allocate correct number of rows for matrices
+
Revision 6.39 2004/03/31 18:12:13 papadopo
Mike Gertz' refactoring of RedoAlignmentCore
@@ -326,7 +341,7 @@ Kappa_MatchRecordInsertSeqAlign(
newSW =
SWResultsNew(self->sequence, newScore, self->score, newEvalue,
- self->eValue, (NULL == self->alignments),
+ self->eValue, (Boolean) (NULL == self->alignments),
localScalingFactor * lambda, logK,
self->subject_index, self->id);
@@ -631,7 +646,7 @@ SWheapInsert(SWheap * self,
}
if(self->array != NULL) {
/* "self" is currently a list. Add the new alignments to the end */
- SWheapRecord *heapRecord; /* destination for the new alignments */
+ SWheapRecord *heapRecord; /* destination for the new alignments */
heapRecord = &self->array[++self->n];
heapRecord->bestEvalue = matchRecord->eValue;
heapRecord->theseAlignments = matchRecord->alignments;
@@ -650,7 +665,7 @@ SWheapInsert(SWheap * self,
Int4 newCapacity; /* capacity the heap will have after
* it is resized */
newCapacity = MAX(SWHEAP_MIN_RESIZE + self->capacity,
- SWHEAP_RESIZE_FACTOR * self->capacity);
+ (Int4) (SWHEAP_RESIZE_FACTOR * self->capacity));
self->heapArray = (SWheapRecord *)
MemMore(self->heapArray, (newCapacity + 1) * sizeof(SWheapRecord));
self->capacity = newCapacity;
@@ -673,7 +688,7 @@ SWheapInsert(SWheap * self,
/* the new alignments must be discarded */
discardedAlignments = matchRecord->alignments;
} else {
- /* the largest element in the heap must be discarded */
+ /* The largest element in the heap must be discarded. */
SWheapRecord *heapRecord; /* destination for the new alignments */
discardedAlignments = self->heapArray[1].theseAlignments;
@@ -685,7 +700,7 @@ SWheapInsert(SWheap * self,
}
/* end else the largest element in the heap must be discarded */
while(discardedAlignments != NULL) {
- /* There are discarded alignments that have not been freed */
+ /* There are discarded alignments that have not been freed. */
SWResults *thisAlignment; /* the head of the list of
* discarded alignments */
thisAlignment = discardedAlignments;
@@ -696,9 +711,9 @@ SWheapInsert(SWheap * self,
MemFree(thisAlignment);
}
/* end while there are discarded alignments that have not been freed */
- }
+ }
/* end else some set of alignments must be discarded */
-
+
self->worstEvalue = self->heapArray[1].bestEvalue;
KAPPA_ASSERT(SWheapIsValid(self->heapArray, 1, self->n));
}
@@ -765,13 +780,13 @@ SWheapPop(SWheap * self)
last = &self->heapArray[self->n];
results = first->theseAlignments;
-
+
first->theseAlignments = last->theseAlignments;
first->bestEvalue = last->bestEvalue;
SWheapifyDown(self->heapArray, 1, --self->n);
}
-
+
KAPPA_ASSERT(SWheapIsValid(self->heapArray, 1, self->n));
return results;
@@ -1325,6 +1340,96 @@ static Int4 BLspecialSmithWatermanFindStart(Uint1 * matchSeq,
return(bestScore);
}
+
+/**
+ * An instance of Kappa_ForbiddenRanges is used by the Smith-Waterman
+ * algorithm to represent ranges in the database that are not to be
+ * aligned.
+ */
+struct Kappa_ForbiddenRanges {
+ Int4 *numForbidden; /* how many forbidden ranges at each db
+ * position */
+ Int4 **ranges; /* forbidden ranges for each database
+ * position */
+ Int4 queryLength; /* length of the query sequence */
+};
+typedef struct Kappa_ForbiddenRanges Kappa_ForbiddenRanges;
+
+
+/* Initialize a new, empty Kappa_ForbiddenRanges */
+static void
+Kappa_ForbiddenRangesInitialize(
+ Kappa_ForbiddenRanges * self, /* object to be initialized */
+ Int4 queryLength /* the length of the query */
+) {
+ Int4 f;
+ self->queryLength = queryLength;
+ self->numForbidden = (Int4 *) MemNew(queryLength * sizeof(Int4));
+ self->ranges = (Int4 **) MemNew(queryLength * sizeof(Int4 *));
+
+ for(f = 0; f < queryLength; f++) {
+ self->numForbidden[f] = 0;
+ self->ranges[f] = (Int4 *) MemNew(2 * sizeof(Int4));
+ self->ranges[f][0] = 0;
+ self->ranges[f][1] = 0;
+ }
+}
+
+
+/* Reset self to be empty */
+static void
+Kappa_ForbiddenRangesClear(Kappa_ForbiddenRanges * self)
+{
+ Int4 f;
+ for(f = 0; f < self->queryLength; f++) {
+ self->numForbidden[f] = 0;
+ }
+}
+
+
+/* Add some ranges to self */
+static void
+Kappa_ForbiddenRangesPush(
+ Kappa_ForbiddenRanges * self,
+ Int4 queryStart, /* start of the alignment in the query
+ sequence */
+ Int4 queryAlignmentExtent, /* length of the alignment in the query
+ sequence */
+ Int4 matchStart, /* start of the alignment in the
+ subject sequence */
+ Int4 matchAlignmentExtent) /* length of the alignment in the
+ subject sequence */
+{
+ Int4 f;
+ for(f = queryStart; f < (queryStart + queryAlignmentExtent); f++) {
+ Int4 last = 2 * self->numForbidden[f];
+ if(0 != last) { /* we must resize the array */
+ self->ranges[f] =
+ (Int4 *) MemMore(self->ranges[f], (last + 2) * sizeof(Int4));
+ }
+ self->ranges[f][last] = matchStart;
+ self->ranges[f][last + 1] = matchStart + matchAlignmentExtent;
+
+ self->numForbidden[f]++;
+ }
+}
+
+
+/**
+ * Release the storage associated with the fields of self, but do not
+ * delete self
+ */
+static void
+Kappa_ForbiddenRangesRelease(Kappa_ForbiddenRanges * self)
+{
+ Int4 f;
+ for(f = 0; f < self->queryLength; f++) MemFree(self->ranges[f]);
+
+ MemFree(self->ranges); self->ranges = NULL;
+ MemFree(self->numForbidden); self->numForbidden = NULL;
+}
+
+
/*The following procedure computes the number of identities in an
* alignment of query_seq to the matching sequence stored in
* SWAlign. The alignment is encoded in gap_info*/
@@ -2119,88 +2224,6 @@ Kappa_MatchingSequenceRelease(Kappa_MatchingSequence * self)
}
-/* An instance of Kappa_ForbiddenRanges is used by the Smith-Waterman
- * algorithm to represent ranges in the database that are not to be
- * aligned.
- */
-
-struct Kappa_ForbiddenRanges { Int4 *numForbidden; /* how many
- forbidden ranges at each db * position */
- Int4 **ranges; /* forbidden ranges for each database
- * position */
- Int4 queryLength;
-};
-typedef struct Kappa_ForbiddenRanges Kappa_ForbiddenRanges;
-
-
-/* Initialize a new, empty Kappa_ForbiddenRanges */
-static void
-Kappa_ForbiddenRangesInitialize(
- Kappa_ForbiddenRanges * self, /* object to be initialized */
- Int4 queryLength /* the length of the query */
-) {
- Int4 f;
- self->queryLength = queryLength;
- self->numForbidden = (Int4 *) MemNew(queryLength * sizeof(Int4));
- self->ranges = (Int4 **) MemNew(queryLength * sizeof(Int4 *));
-
- for(f = 0; f < queryLength; f++) {
- self->numForbidden[f] = 0;
- self->ranges[f] = (Int4 *) MemNew(2 * sizeof(Int4));
- self->ranges[f][0] = 0;
- self->ranges[f][1] = 0;
- }
-}
-
-
-/* Reset self to be empty */
-static void
-Kappa_ForbiddenRangesClear(Kappa_ForbiddenRanges * self)
-{
- Int4 f;
- for(f = 0; f < self->queryLength; f++) {
- self->numForbidden[f] = 0;
- }
-}
-
-
-/* Add some ranges to self */
-static void
-Kappa_ForbiddenRangesPush(
- Kappa_ForbiddenRanges * self,
- Int4 queryStart, /* start of the alignment in the query sequence */
- Int4 queryAlignmentExtent, /* length of the alignment in the query sequence */
- Int4 matchStart, /* start of the alignment in the subject sequence */
- Int4 matchAlignmentExtent) /* length of the alignment in the subject sequence */
-{
- Int4 f;
- for(f = queryStart; f < (queryStart + queryAlignmentExtent); f++) {
- Int4 last = 2 * self->numForbidden[f];
- if(0 != last) { /* we must resize the array */
- self->ranges[f] =
- (Int4 *) MemMore(self->ranges[f], (last + 2) * sizeof(Int4));
- }
- self->ranges[f][last] = matchStart;
- self->ranges[f][last + 1] = matchStart + matchAlignmentExtent;
-
- self->numForbidden[f]++;
- }
-}
-
-
-/* Release the storage associated with the fields of self, but do not
- * delete self */
-static void
-Kappa_ForbiddenRangesRelease(Kappa_ForbiddenRanges * self)
-{
- Int4 f;
- for(f = 0; f < self->queryLength; f++) MemFree(self->ranges[f]);
-
- MemFree(self->ranges); self->ranges = NULL;
- MemFree(self->numForbidden); self->numForbidden = NULL;
-}
-
-
/* Redo a S-W alignment using an x-drop alignment. The result will
* usually be the same as the S-W alignment. The call to ALIGN
* attempts to force the endpoints of the alignment to match the
@@ -2225,10 +2248,12 @@ Kappa_SWFindFinalEndsUsingXdrop(
* scoring system has been
* scaled in order to obtain
* greater precision */
- Int4 * queryAlignmentExtent, /* length of the alignment in the query sequence,
- as computed by the x-drop algorithm */
- Int4 * matchAlignmentExtent, /* length of the alignment in the subject sequence,
- as computed by the x-drop algorithm */
+ Int4 * queryAlignmentExtent, /* length of the alignment in the query
+ sequence, as computed by the x-drop
+ algorithm */
+ Int4 * matchAlignmentExtent, /* length of the alignment in the
+ subject sequence, as computed by the
+ x-drop algorithm */
Int4 ** reverseAlignScript, /* alignment information (script)
* returned by a x-drop alignment algorithm */
BLAST_Score * newScore /* alignment score computed by the
@@ -2242,7 +2267,7 @@ Kappa_SWFindFinalEndsUsingXdrop(
Int4 *alignScript; /* the alignment script that will be
generated below by the ALIGN
routine. */
-
+
*reverseAlignScript = alignScript =
(Int4 *) MemNew((matchLength + queryLength + 3) * sizeof(Int4));
@@ -2346,8 +2371,8 @@ Kappa_SearchParametersNew(
if(adjustParameters) {
sp->kbp_gap_orig = BlastKarlinBlkCreate();
- sp->startMatrix = allocateScaledMatrix(rows);
- sp->origMatrix = allocateScaledMatrix(rows);
+ sp->startMatrix = allocateScaledMatrix(sp->mRows);
+ sp->origMatrix = allocateScaledMatrix(sp->mRows);
sp->resProb =
(Nlm_FloatHi *) MemNew(PROTEIN_ALPHABET * sizeof(Nlm_FloatHi));
@@ -2796,7 +2821,7 @@ RedoAlignmentCore(BlastSearchBlkPtr search,
Int4 *reverseAlignScript;
gap_align->x_parameter =
- options->gap_x_dropoff_final * NCBIMATH_LN2 / kbp->Lambda;
+ (Int4) (options->gap_x_dropoff_final * NCBIMATH_LN2/kbp->Lambda);
Kappa_SWFindFinalEndsUsingXdrop(query, queryLength, queryStart,
queryEnd,
@@ -2862,7 +2887,7 @@ RedoAlignmentCore(BlastSearchBlkPtr search,
* non-SW case */
search->pbp->gap_x_dropoff_final =
- options->gap_x_dropoff_final * NCBIMATH_LN2 / kbp->Lambda;
+ (Int4) (options->gap_x_dropoff_final * NCBIMATH_LN2 / kbp->Lambda);
/* recall that index is the counter corresponding to
* thisMatch; by aliasing, thisMatch will get updated during
* the following call to BlastGetGapAlgnTbck, so that
@@ -2887,12 +2912,12 @@ RedoAlignmentCore(BlastSearchBlkPtr search,
* query to the
* current database
* sequence */
- Nlm_FloatHi bestScore; /* the score of the highest
+ Int4 bestScore; /* the score of the highest
* scoring alignment */
numNewAlignments = thisMatch->hspcnt;
bestScore =
- Nlm_Nint(((Nlm_FloatHi) thisMatch->hsp_array[0].score) /
- localScalingFactor);
+ (Int4) Nlm_Nint(((Nlm_FloatHi) thisMatch->hsp_array[0].score) /
+ localScalingFactor);
Kappa_MatchRecordInitialize(&matchRecord, bestEvalue, bestScore,
matchingSeq.sequence,
diff --git a/tools/mblast.c b/tools/mblast.c
index 954b9c23..de6879c6 100644
--- a/tools/mblast.c
+++ b/tools/mblast.c
@@ -1,4 +1,4 @@
-static char const rcsid[] = "$Id: mblast.c,v 6.202 2004/03/31 17:58:51 papadopo Exp $";
+static char const rcsid[] = "$Id: mblast.c,v 6.204 2004/05/27 17:35:56 dondosha Exp $";
/* ===========================================================================
*
@@ -40,9 +40,15 @@ Detailed Contents:
- Functions specific to Mega BLAST
******************************************************************************
- * $Revision: 6.202 $
+ * $Revision: 6.204 $
*
* $Log: mblast.c,v $
+ * Revision 6.204 2004/05/27 17:35:56 dondosha
+ * Do not flag HSPs for deletion in sorting before doing inclusion tests
+ *
+ * Revision 6.203 2004/05/21 13:53:04 dondosha
+ * Use BLAST_HSPFree to free BLAST_HSP structures, hence no need to call GapXEditBlockDelete in multiple places
+ *
* Revision 6.202 2004/03/31 17:58:51 papadopo
* Mike Gertz' changes for length adjustment calculations
*
@@ -3292,99 +3298,113 @@ static int LIBCALLBACK
diag_compare_hsps(VoidPtr v1, VoidPtr v2)
{
- BLAST_HSPPtr h1, h2;
- BLAST_HSPPtr PNTR hp1, PNTR hp2;
-
- hp1 = (BLAST_HSPPtr PNTR) v1;
- hp2 = (BLAST_HSPPtr PNTR) v2;
- h1 = *hp1;
- h2 = *hp2;
-
- if (h1==NULL && h2==NULL) return 0;
- else if (h1==NULL) return 1;
- else if (h2==NULL) return -1;
-
- /* Separate different queries and/or strands */
- if (h1->context < h2->context)
- return -1;
- else if (h1->context > h2->context)
- return 1;
-
- /* If the two HSP's have same coordinates, they are equal */
- if (h1->query.offset == h2->query.offset &&
- h1->query.end == h2->query.end &&
- h1->subject.offset == h2->subject.offset &&
- h1->subject.end == h2->subject.end)
- return 0;
-
- /* Check if one HSP is contained in the other, if so,
- leave only the longer one, given it has lower evalue */
- if (h1->query.offset >= h2->query.offset &&
- h1->query.end <= h2->query.end &&
- h1->subject.offset >= h2->subject.offset &&
- h1->subject.end <= h2->subject.end &&
- h1->evalue >= h2->evalue) {
- *hp1 = BLAST_HSPFree(h1);
- return 1;
- } else if (h1->query.offset <= h2->query.offset &&
- h1->query.end >= h2->query.end &&
- h1->subject.offset <= h2->subject.offset &&
- h1->subject.end >= h2->subject.end &&
- h1->evalue <= h2->evalue) {
- *hp2 = BLAST_HSPFree(h2);
- return -1;
- }
+ BLAST_HSPPtr h1, h2;
+ BLAST_HSPPtr PNTR hp1, PNTR hp2;
+
+ hp1 = (BLAST_HSPPtr PNTR) v1;
+ hp2 = (BLAST_HSPPtr PNTR) v2;
+ h1 = *hp1;
+ h2 = *hp2;
+
+ if (h1==NULL && h2==NULL) return 0;
+ else if (h1==NULL) return 1;
+ else if (h2==NULL) return -1;
+
+ /* Separate different queries and/or strands */
+ if (h1->context < h2->context)
+ return -1;
+ else if (h1->context > h2->context)
+ return 1;
+
+ return (h1->query.offset - h1->subject.offset) -
+ (h2->query.offset - h2->subject.offset);
+}
- return (h1->query.offset - h1->subject.offset) -
- (h2->query.offset - h2->subject.offset);
+typedef enum E_HSPInclusionStatus {
+ e_Equal = 0, /**< Identical */
+ e_FirstInSecond, /**< First included in rectangle formed by second */
+ e_SecondInFirst, /**< Second included in rectangle formed by first */
+ e_DiagNear, /**< Diagonals are near, but neither HSP is included in
+ the other. */
+ e_DiagDistant /**< Diagonals are far apart, or different contexts */
+} E_HSPInclusionStatus;
+
+/** HSP inclusion criterion for megablast: one HSP must be included in a
+ * diagonal strip of a certain width around the other, and also in a rectangle
+ * formed by the other HSP's endpoints.
+ */
+static E_HSPInclusionStatus
+BLAST_HSPInclusionTest(BLAST_HSP* hsp1, BLAST_HSP* hsp2)
+{
+ if (hsp1->context != hsp2->context ||
+ !MB_HSP_CLOSE(hsp1->query.offset, hsp2->query.offset,
+ hsp1->subject.offset, hsp2->subject.offset,
+ 2*MB_DIAG_NEAR))
+ return e_DiagDistant;
+
+ if (hsp1->query.offset == hsp2->query.offset &&
+ hsp1->query.end == hsp2->query.end &&
+ hsp1->subject.offset == hsp2->subject.offset &&
+ hsp1->subject.end == hsp2->subject.end &&
+ hsp1->score == hsp2->score) {
+ return e_Equal;
+ } else if (hsp1->query.offset >= hsp2->query.offset &&
+ hsp1->query.end <= hsp2->query.end &&
+ hsp1->subject.offset >= hsp2->subject.offset &&
+ hsp1->subject.end <= hsp2->subject.end &&
+ hsp1->score < hsp2->score) {
+ return e_FirstInSecond;
+ } else if (hsp1->query.offset <= hsp2->query.offset &&
+ hsp1->query.end >= hsp2->query.end &&
+ hsp1->subject.offset <= hsp2->subject.offset &&
+ hsp1->subject.end >= hsp2->subject.end &&
+ hsp1->score >= hsp2->score) {
+ return e_SecondInFirst;
+ }
+ return e_DiagNear;
}
+/** How many HSPs to check for inclusion for each new HSP? */
+#define MAX_NUM_CHECK_INCLUSION 20
+
static void
BlastSortUniqHspArray(BLAST_HitListPtr hitlist)
{
- Int4 index, new_hspcnt, index1, q_off, s_off, q_end, s_end, index2;
+ Int4 index, new_hspcnt, index1, index2;
BLAST_HSPPtr PNTR hsp_array = hitlist->hsp_array;
Boolean shift_needed = FALSE;
- Int2 context;
- FloatHi evalue;
+ E_HSPInclusionStatus inclusion_status = e_DiagNear;
HeapSort(hitlist->hsp_array, hitlist->hspcnt, sizeof(BLAST_HSPPtr),
diag_compare_hsps);
+
for (index=1, new_hspcnt=0; index<hitlist->hspcnt; index++) {
if (hsp_array[index]==NULL)
continue;
- q_off = hsp_array[index]->query.offset;
- s_off = hsp_array[index]->subject.offset;
- q_end = hsp_array[index]->query.end;
- s_end = hsp_array[index]->subject.end;
- evalue = hsp_array[index]->evalue;
- context = hsp_array[index]->context;
- for (index1 = new_hspcnt; index1 >= 0 &&
- hsp_array[index1]->context == context && new_hspcnt-index1 < 10 &&
- MB_HSP_CLOSE(q_off, hsp_array[index1]->query.offset,
- s_off, hsp_array[index1]->subject.offset,
- 2*MB_DIAG_NEAR);
+ inclusion_status = e_DiagNear;
+ for (index1 = new_hspcnt; inclusion_status != e_DiagDistant &&
+ index1 >= 0 && new_hspcnt-index1 < MAX_NUM_CHECK_INCLUSION;
index1--) {
- if (q_off >= hsp_array[index1]->query.offset &&
- s_off >= hsp_array[index1]->subject.offset &&
- q_end <= hsp_array[index1]->query.end &&
- s_end <= hsp_array[index1]->subject.end &&
- evalue >= hsp_array[index1]->evalue) {
+ inclusion_status =
+ BLAST_HSPInclusionTest(hsp_array[index], hsp_array[index1]);
+ if (inclusion_status == e_FirstInSecond ||
+ inclusion_status == e_Equal) {
+ /* Free the new HSP and break out of the inclusion test loop */
hsp_array[index] = BLAST_HSPFree(hsp_array[index]);
break;
- } else if (q_off <= hsp_array[index1]->query.offset &&
- s_off <= hsp_array[index1]->subject.offset &&
- q_end >= hsp_array[index1]->query.end &&
- s_end >= hsp_array[index1]->subject.end &&
- evalue <= hsp_array[index1]->evalue) {
+ } else if (inclusion_status == e_SecondInFirst) {
hsp_array[index1] = BLAST_HSPFree(hsp_array[index1]);
shift_needed = TRUE;
}
}
+ /* If some lower indexed HSPs have been removed, shift the subsequent
+ HSPs */
if (shift_needed) {
+ /* Find the first non-NULL HSP, going backwards */
while (index1 >= 0 && !hsp_array[index1])
index1--;
+ /* Go forward, and shift any non-NULL HSPs */
for (index2 = ++index1; index1 <= new_hspcnt; index1++) {
if (hsp_array[index1])
hsp_array[index2++] = hsp_array[index1];
@@ -3683,8 +3703,7 @@ Boolean ReevaluateScoreWithAmbiguities(BlastSearchBlkPtr search,
hsp->gap_info->esp = first_esp;
}
if (last_esp->next != NULL) {
- GapXEditScriptDelete(last_esp->next);
- last_esp->next = NULL;
+ last_esp->next = GapXEditScriptDelete(last_esp->next);
}
last_esp->num = last_esp_num;
BlastHSPGetNumIdentical(search, hsp, NULL, &hsp->num_ident,
@@ -3694,12 +3713,6 @@ Boolean ReevaluateScoreWithAmbiguities(BlastSearchBlkPtr search,
delete_hsp = TRUE;
}
- if (delete_hsp) { /* This HSP is now below the cutoff */
- if (first_esp != NULL && first_esp != hsp->gap_info->esp)
- GapXEditScriptDelete(first_esp);
- hsp->gap_info = GapXEditBlockDelete(hsp->gap_info);
- }
-
return delete_hsp;
}
@@ -3820,7 +3833,7 @@ MegaBlastReevaluateWithAmbiguities(BlastSearchBlkPtr search)
ReevaluateScoreWithAmbiguities(search, subject_start, hsp);
if (delete_hsp) { /* This HSP is now below the cutoff */
- hsp_array[index] = MemFree(hsp_array[index]);
+ hsp_array[index] = BLAST_HSPFree(hsp);
purge = TRUE;
}
}
@@ -3836,7 +3849,6 @@ MegaBlastReevaluateWithAmbiguities(BlastSearchBlkPtr search)
if (current_hitlist->hspcnt > 1)
BlastSortUniqHspArray(current_hitlist);
-
if (search->pbp->hsp_num_max &&
search->pbp->hsp_num_max < search->current_hitlist->hspcnt &&
@@ -4019,7 +4031,6 @@ MegaBlastSaveCurrentHitlist(BlastSearchBlkPtr search)
if (search->pbp->mb_params->perc_identity > 0) {
if (MegaBlastGetHspPercentIdentity(search, hsp) <
search->pbp->mb_params->perc_identity) {
- hsp->gap_info = GapXEditBlockDelete(hsp->gap_info);
index1++;
if (index1 >= hspmax)
break;
@@ -4052,7 +4063,6 @@ MegaBlastSaveCurrentHitlist(BlastSearchBlkPtr search)
been completed */
new_size = search->pbp->hsp_num_max;
if (new_size <= hsp_array_sizes[query_index]) {
- hsp->gap_info = GapXEditBlockDelete(hsp->gap_info);
do_not_reallocate[query_index] = TRUE;
continue;
}
@@ -4070,7 +4080,6 @@ MegaBlastSaveCurrentHitlist(BlastSearchBlkPtr search)
}
} else {
/* hsp_array is already full and reallocation not allowed */
- hsp->gap_info = GapXEditBlockDelete(hsp->gap_info);
continue;
}
} else
@@ -4116,6 +4125,9 @@ MegaBlastSaveCurrentHitlist(BlastSearchBlkPtr search)
hsp->subject.gapped_start;
}
hsp_array[hsp_index].gap_info = hsp->gap_info;
+ /* Edit block pointer has been copied; remove it from hsp to avoid
+ double freeing */
+ hsp->gap_info = NULL;
hsp_array[hsp_index].context = hsp->context;
hsp_array[hsp_index].query_offset = hsp->query.offset;
hsp_array[hsp_index].query_length = hsp->query.length;
diff --git a/tools/posit.c b/tools/posit.c
index 4f213d9a..79f01d17 100644
--- a/tools/posit.c
+++ b/tools/posit.c
@@ -1,6 +1,6 @@
-static char const rcsid[] = "$Id: posit.c,v 6.61 2003/08/04 20:43:55 dondosha Exp $";
+static char const rcsid[] = "$Id: posit.c,v 6.63 2004/06/08 14:03:48 camacho Exp $";
-/* $Id: posit.c,v 6.61 2003/08/04 20:43:55 dondosha Exp $
+/* $Id: posit.c,v 6.63 2004/06/08 14:03:48 camacho Exp $
* ===========================================================================
*
* PUBLIC DOMAIN NOTICE
@@ -32,10 +32,16 @@ static char const rcsid[] = "$Id: posit.c,v 6.61 2003/08/04 20:43:55 dondosha Ex
Contents: utilities for position-based BLAST.
- $Revision: 6.61 $
+ $Revision: 6.63 $
*****************************************************************************
* $Log: posit.c,v $
+ * Revision 6.63 2004/06/08 14:03:48 camacho
+ * Alejandro Schaffer's fix to spread out gap costs in posDemographics.
+ *
+ * Revision 6.62 2004/05/14 12:13:09 camacho
+ * Made posDemographics non-static for testing purposes.
+ *
* Revision 6.61 2003/08/04 20:43:55 dondosha
* Test for selenocysteines when comparing checkpoint sequences with query
*
@@ -904,7 +910,9 @@ void LIBCALL posPurgeMatches(posSearchItems *posSearch, compactSearchItems * com
/*Compute general information about the sequences that matched on the
i-th pass such as how many matched at each query position and what letter
matched*/
-static void posDemographics(posSearchItems *posSearch, compactSearchItems * compactSearch, SeqAlignPtr listOfSeqAligns)
+void LIBCALL posDemographics(posSearchItems *posSearch,
+ compactSearchItems * compactSearch,
+ SeqAlignPtr listOfSeqAligns)
{
Uint1Ptr q; /*pointers into query */
Uint1Ptr s; /*pointer into a matching string */
@@ -990,24 +998,29 @@ static void posDemographics(posSearchItems *posSearch, compactSearchItems * comp
if ((GAP_HERE) == subjectOffset) { /*XX*/
for(c = 0, qplace = queryOffset;
c < matchLength; c++, qplace++) {
- posSearch->posDescMatrix[seqIndex + 1][qplace].used = TRUE;
- posSearch->posDescMatrix[seqIndex + 1][qplace].letter = GAP_CHAR;
- posSearch->posDescMatrix[seqIndex + 1][qplace].e_value = 1.0;
+ /*Keep the following test if spreading out gap costs,
+ so that in that case a lower E-value non-gap trumps
+ a higher E-value gap; if not spreading out gap costs
+ then comment out the test, so that a higher E-value
+ gap trumps a lower E-value letter*/
+ if (!posSearch->posDescMatrix[seqIndex+1][qplace].used)
+ {
+ posSearch->posDescMatrix[seqIndex + 1][qplace].used = TRUE;
+ posSearch->posDescMatrix[seqIndex + 1][qplace].letter = GAP_CHAR;
+ posSearch->posDescMatrix[seqIndex + 1][qplace].e_value = 1.0;
+ }
}
}
else { /*no gap*/
for(c = 0, qplace = queryOffset, splace = subjectOffset;
c < matchLength; c++, qplace++, splace++) {
- if ((!posSearch->posDescMatrix[seqIndex+1][qplace].used) ||
- (thisEvalue
- < posSearch->posDescMatrix[seqIndex+1][qplace].e_value))
- if (!posSearch->posDescMatrix[seqIndex+1][qplace].used)
- {
- posSearch->posDescMatrix[seqIndex+1][qplace].letter = (Int1) s[splace];
- posSearch->posDescMatrix[seqIndex+1][qplace].used = TRUE;
- posSearch->posDescMatrix[seqIndex+1][qplace].e_value =
- thisEvalue;
- }
+ if (!posSearch->posDescMatrix[seqIndex+1][qplace].used)
+ {
+ posSearch->posDescMatrix[seqIndex+1][qplace].letter = (Int1) s[splace];
+ posSearch->posDescMatrix[seqIndex+1][qplace].used = TRUE;
+ posSearch->posDescMatrix[seqIndex+1][qplace].e_value =
+ thisEvalue;
+ }
}
}
startQ += 2;
diff --git a/tools/posit.h b/tools/posit.h
index c764ecd4..dbb89b29 100644
--- a/tools/posit.h
+++ b/tools/posit.h
@@ -1,4 +1,4 @@
-/* $Id: posit.h,v 6.23 2001/08/29 19:05:03 madden Exp $
+/* $Id: posit.h,v 6.24 2004/05/14 12:13:09 camacho Exp $
* ===========================================================================
*
* PUBLIC DOMAIN NOTICE
@@ -32,11 +32,14 @@ Author: Alejandro Schaffer
Contents: header file for position-based BLAST.
-$Revision: 6.23 $
+$Revision: 6.24 $
*****************************************************************************/
/*
* $Log: posit.h,v $
+* Revision 6.24 2004/05/14 12:13:09 camacho
+* Made posDemographics non-static for testing purposes.
+*
* Revision 6.23 2001/08/29 19:05:03 madden
* added parameter posComputationCalled in outputPosComputation
*
@@ -258,6 +261,10 @@ void LIBCALL posCancel(posSearchItems *posSearch, compactSearchItems * compactSe
void LIBCALL posPurgeMatches(posSearchItems *posSearch, compactSearchItems * compactSearch);
+void LIBCALL posDemographics(posSearchItems *posSearch,
+ compactSearchItems * compactSearch,
+ SeqAlignPtr listOfSeqAligns);
+
/*Cleanup position-specific data structures after one pass*/
void LIBCALL posCleanup PROTO((posSearchItems *posSearch, compactSearchItems * compactSearch));
diff --git a/tools/rpsutil.c b/tools/rpsutil.c
index b33f2e86..b8ea3806 100644
--- a/tools/rpsutil.c
+++ b/tools/rpsutil.c
@@ -1,6 +1,6 @@
-static char const rcsid[] = "$Id: rpsutil.c,v 6.69 2004/03/18 15:09:22 papadopo Exp $";
+static char const rcsid[] = "$Id: rpsutil.c,v 6.70 2004/05/13 16:58:28 kans Exp $";
-/* $Id: rpsutil.c,v 6.69 2004/03/18 15:09:22 papadopo Exp $
+/* $Id: rpsutil.c,v 6.70 2004/05/13 16:58:28 kans Exp $
* ===========================================================================
*
* PUBLIC DOMAIN NOTICE
@@ -31,12 +31,15 @@ static char const rcsid[] = "$Id: rpsutil.c,v 6.69 2004/03/18 15:09:22 papadopo
*
* Initial Version Creation Date: 12/14/1999
*
-* $Revision: 6.69 $
+* $Revision: 6.70 $
*
* File Description:
* Reversed PSI BLAST utilities file
*
* $Log: rpsutil.c,v $
+* Revision 6.70 2004/05/13 16:58:28 kans
+* in AnnotateRegionsFromCDD, do not put cdd->ShortName into comment if same as cdd->Definition
+*
* Revision 6.69 2004/03/18 15:09:22 papadopo
* use the score as a tiebreaker during final sort of seqaligns in RPS blast
*
@@ -2985,7 +2988,7 @@ NLM_EXTERN void AnnotateRegionsFromCDD (
AddFieldToCddUserObject (uop, "evalue", NULL, 0, cdd->evalue);
AddFieldToCddUserObject (uop, "bit_score", NULL, 0, cdd->bit_score);
}
- if (cdd->ShortName != NULL) {
+ if (cdd->ShortName != NULL && StringICmp (cdd->ShortName, cdd->Definition) != 0) {
len = StringLen (cdd->ShortName) + 10;
str = MemNew (len);
if (str != NULL) {
diff --git a/tools/salptool.c b/tools/salptool.c
index 410c2f53..b76f4d0f 100644
--- a/tools/salptool.c
+++ b/tools/salptool.c
@@ -1,4 +1,4 @@
-static char const rcsid[] = "$Id: salptool.c,v 6.35 2004/04/28 20:32:52 bollin Exp $";
+static char const rcsid[] = "$Id: salptool.c,v 6.36 2004/06/10 18:59:36 bollin Exp $";
#include <sequtil.h> /* SeqIdDupList */
#include <salpedit.h>
@@ -2939,7 +2939,9 @@ static Boolean check_dbid_seqalign (SeqAlignPtr salp)
while (!found && sip != NULL)
{
next = sip->next;
+ sip->next = NULL;
SeqIdWrite (sip, str, PRINTID_FASTA_LONG, 50);
+ sip->next = next;
tmp = StringStr (str, "acc");
if (tmp!=NULL) {
tmp++; tmp++; tmp++;
diff --git a/tools/toasn3.c b/tools/toasn3.c
index 09283402..48273305 100644
--- a/tools/toasn3.c
+++ b/tools/toasn3.c
@@ -1,4 +1,4 @@
-static char const rcsid[] = "$Id: toasn3.c,v 6.82 2003/06/18 21:52:21 kans Exp $";
+static char const rcsid[] = "$Id: toasn3.c,v 6.83 2004/05/14 16:10:34 kans Exp $";
/*****************************************************************************
*
@@ -4900,9 +4900,11 @@ static void GetCdRegionsWithPeptides (SeqEntryPtr sep, Pointer data, Int4 index,
}
for (sfp = sap->data; sfp != NULL; sfp = sfp->next) {
if (sfp->data.choice == SEQFEAT_CDREGION) {
- tmp = ValNodeNew(NULL);
- tmp->data.ptrvalue = sfp;
- sfap->cds = tie_next(sfap->cds, tmp);
+ if (! sfp->pseudo) {
+ tmp = ValNodeNew(NULL);
+ tmp->data.ptrvalue = sfp;
+ sfap->cds = tie_next(sfap->cds, tmp);
+ }
}
if (sfp->data.choice == SEQFEAT_IMP) {
ifp = (ImpFeatPtr) sfp->data.value.ptrvalue;
diff --git a/util/creaders/alnread.c b/util/creaders/alnread.c
index 17099b14..699a0525 100644
--- a/util/creaders/alnread.c
+++ b/util/creaders/alnread.c
@@ -1,5 +1,5 @@
/*
- * $Id: alnread.c,v 1.9 2004/03/16 21:05:15 bollin Exp $
+ * $Id: alnread.c,v 1.10 2004/05/20 19:40:24 bollin Exp $
*
* ===========================================================================
*
@@ -89,6 +89,12 @@ typedef struct SCommentLoc {
struct SCommentLoc * next;
} SCommentLoc, * TCommentLocPtr;
+typedef struct SBracketedCommentList
+{
+ TLineInfoPtr comment_lines;
+ struct SBracketedCommentList * next;
+} SBracketedCommentList, * TBracketedCommentListPtr;
+
typedef struct SAlignRawSeq {
char * id;
TLineInfoPtr sequence_data;
@@ -111,6 +117,7 @@ typedef struct SAlignFileRaw {
char * alphabet;
int expected_num_sequence;
int expected_sequence_len;
+ int num_segments;
} SAlignRawFileData, * SAlignRawFilePtr;
/* These functions are used for storing and transmitting information
@@ -681,6 +688,136 @@ s_ReportASN1Error
}
+/* This function reports that some sequences are inside brackets (indicating a segmented set)
+ * and that some sequences are outside the brackets.
+ */
+static void
+s_ReportSegmentedAlignmentError
+(TIntLinkPtr offset_list,
+ FReportErrorFunction errfunc,
+ void * errdata)
+{
+ TErrorInfoPtr eip;
+ const char * msg = "This file contains sequences in brackets (indicating "
+ "a segmented alignment) as well as sequences not in brackets at lines "
+ "%s. Please either add or remove brackets to correct this problem.";
+ int num_lines = 0;
+ int msg_len = 0;
+ TIntLinkPtr t;
+ char * line_text_list;
+ char * line_text_list_offset;
+
+ if (errfunc == NULL || offset_list == NULL) {
+ return;
+ }
+
+ for (t = offset_list; t != NULL; t = t->next)
+ {
+ num_lines ++;
+ }
+ msg_len = num_lines * (kMaxPrintedIntLen + 2);
+ if (num_lines > 1)
+ {
+ msg_len += 4;
+ }
+ line_text_list = (char *) malloc (msg_len);
+ if (line_text_list == NULL) return;
+ line_text_list_offset = line_text_list;
+ for (t = offset_list; t != NULL; t = t->next)
+ {
+ if (t->next == NULL)
+ {
+ sprintf (line_text_list_offset, "%d", t->ival);
+ }
+ else if (num_lines == 2)
+ {
+ sprintf (line_text_list_offset, "%d and ", t->ival);
+ }
+ else if (t->next->next == NULL)
+ {
+ sprintf (line_text_list_offset, "%d, and ", t->ival);
+ }
+ else
+ {
+ sprintf (line_text_list_offset, "%d, ", t->ival);
+ }
+ line_text_list_offset += strlen (line_text_list_offset);
+ }
+
+ msg_len += strlen (msg) + 1;
+
+ eip = ErrorInfoNew (NULL);
+ if (eip != NULL) {
+ eip->category = eAlnErr_BadData;
+ eip->message = (char *) malloc (msg_len);
+ if (eip->message != NULL) {
+ sprintf (eip->message, msg, line_text_list);
+ }
+ errfunc (eip, errdata);
+ }
+ free (line_text_list);
+}
+
+
+/* This function reports an error if a line looks like it might contain an organism comment
+ * but is somehow improperly formatted
+ */
+static void s_ReportOrgCommentError
+(char * linestring,
+ FReportErrorFunction errfunc,
+ void * errdata)
+{
+ TErrorInfoPtr eip;
+ const char * msg = "This line may contain an improperly formatted organism description.\n"
+ "Organism descriptions should be of the form [org=tax name] or [organism=tax name].\n";
+
+ if (errfunc == NULL || linestring == NULL) {
+ return;
+ }
+
+ eip = ErrorInfoNew (NULL);
+ if (eip != NULL) {
+ eip->category = eAlnErr_BadData;
+ eip->message = (char *) malloc (strlen (msg) + strlen (linestring) + 1);
+ if (eip->message != NULL) {
+ strcpy (eip->message, msg);
+ strcat (eip->message, linestring);
+ }
+ errfunc (eip, errdata);
+ }
+}
+
+
+/* This function reports that the number of segments in an alignment of
+ * segmented sets is inconsistent.
+ */
+static void s_ReportBadNumSegError
+(int line_num,
+ int num_seg,
+ int num_seg_exp,
+ FReportErrorFunction errfunc,
+ void * errdata)
+{
+ TErrorInfoPtr eip;
+ const char * msg = "This segmented set contains a different number of segments (%d) than expected (%d).\n";
+
+ if (errfunc == NULL) {
+ return;
+ }
+
+ eip = ErrorInfoNew (NULL);
+ if (eip != NULL) {
+ eip->line_num = line_num;
+ eip->category = eAlnErr_BadData;
+ eip->message = (char *) malloc (strlen (msg) + 2 * kMaxPrintedIntLen + 1);
+ if (eip->message != NULL) {
+ sprintf (eip->message, msg, num_seg, num_seg_exp);
+ }
+ errfunc (eip, errdata);
+ }
+}
+
+
/* This function allocates memory for a SSequenceInfo structure and
* initializes the member variables. It returns a pointer to the newly
* allocated memory.
@@ -1281,6 +1418,187 @@ s_AddLineInfo
return list;
}
+/* This function creates a new bracketed comment */
+static TBracketedCommentListPtr s_BracketedCommentListNew
+(TBracketedCommentListPtr list,
+ char * string,
+ int line_num,
+ int line_offset)
+{
+ TBracketedCommentListPtr comment;
+
+ comment = (TBracketedCommentListPtr) malloc (sizeof (SBracketedCommentList));
+ if (comment == NULL) {
+ return NULL;
+ }
+ comment->comment_lines = s_LineInfoNew (string, line_num, line_offset);
+ comment->next = NULL;
+
+ if (list != NULL) {
+ while (list->next != NULL) {
+ list = list->next;
+ }
+ list->next = comment;
+ }
+
+ return comment;
+}
+
+/* This function frees a bracketed comment list. */
+static void s_BracketedCommentListFree (TBracketedCommentListPtr list)
+{
+ if (list == NULL) {
+ return;
+ }
+ s_BracketedCommentListFree (list->next);
+ list->next = NULL;
+ s_LineInfoFree (list->comment_lines);
+}
+
+/* This function adds a line to a bracketed comment. */
+static void s_BracketedCommentListAddLine
+(TBracketedCommentListPtr comment,
+ char * string,
+ int line_num,
+ int line_offset)
+{
+ if (comment == NULL) {
+ return;
+ }
+
+ comment->comment_lines = s_AddLineInfo (comment->comment_lines, string, line_num, line_offset);
+}
+
+/* This function counts the sequences found in a bracketed comment. */
+static int s_CountSequencesInBracketedComment (TBracketedCommentListPtr comment)
+{
+ TLineInfoPtr lip;
+ int num_segments = 0;
+ EBool skipped_line_since_last_defline = eTrue;
+
+ if (comment == NULL || comment->comment_lines == NULL) {
+ return 0;
+ }
+
+ lip = comment->comment_lines;
+ /* First line must be left bracket on a line by itself */
+ if (lip->data[0] != '[' || strspn (lip->data + 1, " \t\r\n") != strlen (lip->data + 1))
+ {
+ return 0;
+ }
+ lip = lip->next;
+ while (lip != NULL && lip->next != NULL)
+ {
+ if (lip->data[0] == '>')
+ {
+ if (!skipped_line_since_last_defline)
+ {
+ return 0;
+ }
+ else
+ {
+ num_segments ++;
+ skipped_line_since_last_defline = eFalse;
+ }
+ }
+ else
+ {
+ skipped_line_since_last_defline = eTrue;
+ }
+ lip = lip->next;
+ }
+ /* Last line must be right bracket on a line by itself */
+ /* First line must be left bracket on a line by itself */
+ if (lip->data[0] != ']' || strspn (lip->data + 1, " \t\r\n") != strlen (lip->data + 1))
+ {
+ return 0;
+ }
+
+ return num_segments;
+}
+
+/* This function counts the number of sequences that appear in
+ * bracketed comments. If the number of sequences is inconsistent,
+ * the function will issue error messages and return a 1, otherwise
+ * the function will return the number of sequences that appear in
+ * each bracketed comment.
+ */
+static int s_GetNumSegmentsInAlignment
+(TBracketedCommentListPtr comment_list,
+ FReportErrorFunction errfunc,
+ void * errdata)
+{
+ TBracketedCommentListPtr comment;
+ TSizeInfoPtr segcount_list = NULL;
+ int num_segments = 1;
+ int num_segments_this_bracket;
+ int num_segments_expected;
+ TSizeInfoPtr best;
+
+ if (comment_list == NULL)
+ {
+ return num_segments;
+ }
+
+ for (comment = comment_list; comment != NULL; comment = comment->next)
+ {
+ num_segments_this_bracket = s_CountSequencesInBracketedComment (comment);
+ segcount_list = s_AddSizeInfoAppearances (segcount_list,
+ num_segments_this_bracket,
+ 1);
+ if (comment != comment_list && segcount_list->next != NULL)
+ {
+ best = s_GetMostPopularSizeInfo (segcount_list);
+ num_segments_expected = best->size_value;
+
+ if (num_segments_expected != num_segments_this_bracket)
+ {
+ s_ReportBadNumSegError (comment->comment_lines->line_num,
+ num_segments_this_bracket, num_segments_expected,
+ errfunc, errdata);
+ }
+ }
+ }
+ if (segcount_list != NULL && segcount_list->next == NULL && segcount_list->size_value > 0)
+ {
+ num_segments = segcount_list->size_value;
+ }
+ s_SizeInfoFree (segcount_list);
+ return num_segments;
+}
+
+/* This function gets a list of the offsets of the
+ * sequences in bracketed comments.
+ */
+static TIntLinkPtr GetSegmentOffsetList (TBracketedCommentListPtr comment_list)
+{
+ TIntLinkPtr new_offset, offset_list = NULL;
+ TBracketedCommentListPtr comment;
+ TLineInfoPtr lip;
+
+ if (comment_list == NULL)
+ {
+ return NULL;
+ }
+
+ for (comment = comment_list; comment != NULL; comment = comment->next)
+ {
+ if (s_CountSequencesInBracketedComment (comment) == 0)
+ {
+ continue;
+ }
+ for (lip = comment->comment_lines; lip != NULL; lip = lip->next)
+ {
+ if (lip->data != NULL && lip->data[0] == '>')
+ {
+ new_offset = s_IntLinkNew (lip->line_num + 1, offset_list);
+ if (offset_list == NULL) offset_list = new_offset;
+ }
+ }
+ }
+ return offset_list;
+}
+
static char * s_TokenizeString (char * str, char *delimiter, char **last)
{
int skip;
@@ -2459,6 +2777,10 @@ static void s_ReadOrgNamesFromText
}
clp = s_FindOrganismComment (string);
+ if (clp == NULL && (strstr (string, "org=") != NULL || strstr (string, "organism=") != NULL))
+ {
+ s_ReportOrgCommentError (string, afrp->report_error, afrp->report_error_userdata);
+ }
while (clp != NULL) {
org_name = s_CreateOrderedOrgName (clp);
afrp->organisms = s_AddLineInfo (afrp->organisms, org_name, line_num,
@@ -2712,6 +3034,7 @@ static SAlignRawFilePtr s_AlignFileRawNew (void)
afrp->alphabet = NULL;
afrp->expected_num_sequence = 0;
afrp->expected_sequence_len = 0;
+ afrp->num_segments = 1;
return afrp;
}
@@ -2986,6 +3309,20 @@ s_FindInterleavedBlocks
}
+static void s_TrimEndSpace (char *linestring)
+{
+ int len;
+ char *cp;
+
+ if (linestring == NULL) return;
+ len = strlen (linestring);
+ cp = linestring + len - 1;
+ while (cp > linestring && (*cp == ' ' || *cp == '\t' || *cp == '\r' || *cp == '\n'))
+ {
+ *cp = 0;
+ cp--;
+ }
+}
static SAlignRawFilePtr
s_ReadAlignFileRaw
@@ -2995,21 +3332,24 @@ s_ReadAlignFileRaw
FReportErrorFunction errfunc,
void * errdata)
{
- char * linestring;
- SAlignRawFilePtr afrp;
- char * tmp;
- EBool found_stop;
- int overall_line_count;
- EBool found_expected_ntax = eFalse;
- EBool found_expected_nchar = eFalse;
- EBool found_char_comment = eFalse;
- SLengthListPtr pattern_list = NULL;
- SLengthListPtr this_pattern;
- char * cp;
- int len;
- TIntLinkPtr new_offset;
- EBool in_taxa_comment;
- EBool in_bracketed_comment = eFalse;
+ char * linestring;
+ SAlignRawFilePtr afrp;
+ char * tmp;
+ EBool found_stop;
+ int overall_line_count;
+ EBool found_expected_ntax = eFalse;
+ EBool found_expected_nchar = eFalse;
+ EBool found_char_comment = eFalse;
+ SLengthListPtr pattern_list = NULL;
+ SLengthListPtr this_pattern;
+ char * cp;
+ int len;
+ TIntLinkPtr new_offset;
+ EBool in_taxa_comment;
+ EBool in_bracketed_comment = eFalse;
+ TBracketedCommentListPtr comment_list = NULL, last_comment = NULL;
+
+
if (readfunc == NULL || sequence_info == NULL) {
return NULL;
@@ -3035,6 +3375,7 @@ s_ReadAlignFileRaw
}
while (linestring != NULL && linestring [0] != EOF) {
+ s_TrimEndSpace (linestring);
s_ReadOrgNamesFromText (linestring, overall_line_count, afrp);
/* we want to remove the comment from the line for the purpose
* of looking for blank lines and skipping,
@@ -3077,17 +3418,34 @@ s_ReadAlignFileRaw
in_taxa_comment = eTrue;
}
+ /* remove complete single-line bracketed comments from line
+ *before checking for multiline bracketed comments */
+ s_RemoveCommentFromLine (tmp);
+
if (in_bracketed_comment) {
+ len = strspn (linestring, " \t\r\n");
+ if (last_comment != NULL)
+ {
+ s_BracketedCommentListAddLine (last_comment, linestring + len,
+ overall_line_count, len);
+ }
if (strchr (tmp, ']') != NULL) {
in_bracketed_comment = eFalse;
}
tmp [0] = 0;
} else if (tmp [0] == '[' && strchr (tmp, ']') == NULL) {
in_bracketed_comment = eTrue;
+ len = strspn (linestring, " \t\r\n");
+ last_comment = s_BracketedCommentListNew (comment_list,
+ linestring + len,
+ overall_line_count, len);
+ if (comment_list == NULL)
+ {
+ comment_list = last_comment;
+ }
tmp [0] = 0;
}
- s_RemoveCommentFromLine (tmp);
if (s_SkippableString (tmp)) {
tmp [0] = 0;
}
@@ -3132,10 +3490,29 @@ s_ReadAlignFileRaw
linestring = readfunc (userdata);
overall_line_count ++;
}
+ afrp->num_segments = s_GetNumSegmentsInAlignment (comment_list, errfunc, errdata);
+ if (afrp->num_segments > 1)
+ {
+ if (afrp->offset_list != NULL)
+ {
+ s_ReportSegmentedAlignmentError (afrp->offset_list,
+ errfunc, errdata);
+ s_AlignFileRawFree (afrp);
+ s_LengthListFree (pattern_list);
+ s_BracketedCommentListFree (comment_list);
+ return NULL;
+ }
+ else
+ {
+ afrp->offset_list = GetSegmentOffsetList (comment_list);
+ afrp->marked_ids = eTrue;
+ }
+ }
if (! afrp->marked_ids) {
s_FindInterleavedBlocks (pattern_list, afrp);
}
s_LengthListFree (pattern_list);
+ s_BracketedCommentListFree (comment_list);
return afrp;
}
@@ -3420,36 +3797,35 @@ static void
s_CreateSequencesBasedOnTokenPatterns
(TLineInfoPtr token_list,
TIntLinkPtr offset_list,
- SLengthListPtr anchorpattern,
+ SLengthListPtr * anchorpattern,
SAlignRawFilePtr afrp)
{
TLineInfoPtr lip;
int line_counter;
TIntLinkPtr offset_ptr, next_offset_ptr;
char * curr_id;
- int num_pattern_lines;
- int num_pattern_chars;
TSizeInfoPtr sip;
int pattern_line_counter;
+ int curr_seg;
if (token_list == NULL || offset_list == NULL
|| anchorpattern == NULL
- || anchorpattern->lengthrepeats == NULL
|| afrp == NULL)
{
return;
}
-
- num_pattern_lines = 0;
- num_pattern_chars = 0;
- for (sip = anchorpattern->lengthrepeats; sip != NULL; sip = sip->next) {
- num_pattern_lines += sip->num_appearances;
- num_pattern_chars += (sip->size_value * sip->num_appearances);
+ for (curr_seg = 0; curr_seg < afrp->num_segments; curr_seg ++)
+ {
+ if (anchorpattern [curr_seg] == NULL || anchorpattern [curr_seg]->lengthrepeats == NULL)
+ {
+ return;
+ }
}
-
+
line_counter = 0;
lip = token_list;
offset_ptr = offset_list;
+ curr_seg = 0;
for (offset_ptr = offset_list;
offset_ptr != NULL && lip != NULL;
@@ -3464,7 +3840,7 @@ s_CreateSequencesBasedOnTokenPatterns
curr_id = lip->data;
lip = lip->next;
line_counter ++;
- for (sip = anchorpattern->lengthrepeats;
+ for (sip = anchorpattern[curr_seg]->lengthrepeats;
sip != NULL
&& lip != NULL
&& (next_offset_ptr == NULL
@@ -3501,6 +3877,11 @@ s_CreateSequencesBasedOnTokenPatterns
afrp->report_error_userdata);
}
}
+ curr_seg ++;
+ if (curr_seg >= afrp->num_segments)
+ {
+ curr_seg = 0;
+ }
}
}
@@ -3519,33 +3900,69 @@ s_CreateSequencesBasedOnTokenPatterns
* most appearances and returns that pattern as the anchor pattern to use
* when checking sequence data blocks for consistency with one another.
*/
-static SLengthListPtr
+static SLengthListPtr *
s_CreateAnchorPatternForMarkedIDs
(SAlignRawFilePtr afrp)
{
- SLengthListPtr list, this_pattern, best;
+ SLengthListPtr * list;
+ SLengthListPtr * best;
+ SLengthListPtr this_pattern;
char * cp;
TLineInfoPtr lip;
+ int curr_seg;
if (afrp == NULL) {
return NULL;
}
- list = NULL;
+ /* initialize length lists */
+ list = (SLengthListPtr *) malloc (afrp->num_segments * sizeof (SLengthListPtr));
+ if (list == NULL)
+ {
+ return NULL;
+ }
+ for (curr_seg = 0; curr_seg < afrp->num_segments; curr_seg ++)
+ {
+ list[curr_seg] = NULL;
+ }
+ /* initialize best ptrs */
+ /* list is one element longer, to hold null terminator */
+ best = (SLengthListPtr *) malloc ((afrp->num_segments + 1) * sizeof (SLengthListPtr));
+ if (best == NULL)
+ {
+ return NULL;
+ }
+ for (curr_seg = 0; curr_seg < afrp->num_segments + 1; curr_seg ++)
+ {
+ best[curr_seg] = NULL;
+ }
+
+ /* initialize pattern */
this_pattern = NULL;
+ curr_seg = 0;
for (lip = afrp->line_list;
lip != NULL && ! s_FoundStopLine (lip->data);
lip = lip->next)
{
if (lip->data == NULL) continue;
+ if (lip->data [0] == ']' || lip->data [0] == '[') continue;
if (lip->data [0] == '>') {
if (this_pattern != NULL) {
- list = s_AddLengthList (list, this_pattern);
+ list [curr_seg] = s_AddLengthList (list [curr_seg], this_pattern);
+ curr_seg ++;
+ if (curr_seg >= afrp->num_segments)
+ {
+ curr_seg = 0;
+ }
}
this_pattern = s_LengthListNew (NULL);
if (this_pattern == NULL) {
- s_LengthListFree (list);
+ for (curr_seg = 0; curr_seg < afrp->num_segments; curr_seg ++)
+ {
+ s_LengthListFree (list [curr_seg]);
+ }
+ free (list);
return NULL;
}
this_pattern->num_appearances = 1;
@@ -3559,40 +3976,55 @@ s_CreateAnchorPatternForMarkedIDs
}
}
if (this_pattern != NULL) {
- list = s_AddLengthList (list, this_pattern);
+ list[curr_seg] = s_AddLengthList (list [curr_seg], this_pattern);
}
- /* Now find the pattern with the most appearances */
- best = NULL;
- for (this_pattern = list;
- this_pattern != NULL;
- this_pattern = this_pattern->next)
+ /* Now find the pattern with the most appearances for each segment*/
+ for (curr_seg = 0; curr_seg < afrp->num_segments; curr_seg++)
{
- if (this_pattern->num_appearances == 0) continue;
- if (best == NULL
- || this_pattern->num_appearances > best->num_appearances)
+ for (this_pattern = list [curr_seg];
+ this_pattern != NULL;
+ this_pattern = this_pattern->next)
{
- best = this_pattern;
+ if (this_pattern->num_appearances == 0) continue;
+ if (best [curr_seg] == NULL
+ || this_pattern->num_appearances > best[curr_seg]->num_appearances)
+ {
+ best[curr_seg] = this_pattern;
+ }
+
}
- }
- /* free all patterns before and after anchor pattern */
- if (best != NULL) {
- s_LengthListFree (best->next);
- best->next = NULL;
- }
-
- if (best != list) {
- this_pattern = list;
- while ( this_pattern != NULL && this_pattern->next != best ) {
- this_pattern = this_pattern->next;
+ /* free all patterns before and after anchor pattern */
+ if (best [curr_seg] != NULL) {
+ s_LengthListFree (best [curr_seg]->next);
+ best [curr_seg]->next = NULL;
}
- if (this_pattern != NULL) {
- this_pattern->next = NULL;
- s_LengthListFree (list);
+
+ if (best [curr_seg] != list [curr_seg]) {
+ this_pattern = list [curr_seg];
+ while ( this_pattern != NULL && this_pattern->next != best[curr_seg] ) {
+ this_pattern = this_pattern->next;
+ }
+ if (this_pattern != NULL) {
+ this_pattern->next = NULL;
+ s_LengthListFree (list [curr_seg]);
+ }
}
}
+ for (curr_seg = 0; curr_seg < afrp->num_segments; curr_seg ++)
+ {
+ if (best[curr_seg] == NULL)
+ {
+ for (curr_seg = 0; curr_seg < afrp->num_segments; curr_seg ++)
+ {
+ s_LengthListFree (best [curr_seg]);
+ }
+ return NULL;
+ }
+ }
+
return best;
}
@@ -3649,7 +4081,7 @@ static void s_RemoveBasePairCountCommentsFromData (SAlignRawFilePtr afrp)
*/
static void s_ProcessAlignFileRawForMarkedIDs (SAlignRawFilePtr afrp)
{
- SLengthListPtr anchorpattern;
+ SLengthListPtr * anchorpattern;
if (afrp == NULL) {
return;
@@ -4500,7 +4932,7 @@ static void s_ProcessAlignFileRawByLengthPattern (SAlignRawFilePtr afrp)
TLineInfoPtr token_list;
SLengthListPtr list;
TLineInfoPtr lip;
- SLengthListPtr anchorpattern;
+ SLengthListPtr anchorpattern[2];
TIntLinkPtr offset_list;
int best_length;
int best_num_chars;
@@ -4525,18 +4957,19 @@ static void s_ProcessAlignFileRawByLengthPattern (SAlignRawFilePtr afrp)
}
}
- anchorpattern = s_FindMostPopularPattern (list->lengthrepeats);
- if (anchorpattern == NULL || anchorpattern->lengthrepeats == NULL) {
+ anchorpattern [0] = s_FindMostPopularPattern (list->lengthrepeats);
+ anchorpattern [1] = NULL;
+ if (anchorpattern [0] == NULL || anchorpattern[0]->lengthrepeats == NULL) {
return;
}
/* find anchor patterns in original list,
* find distances between anchor patterns
*/
- offset_list = s_CreateOffsetList (list->lengthrepeats, anchorpattern);
+ offset_list = s_CreateOffsetList (list->lengthrepeats, anchorpattern[0]);
offset_list = s_AugmentOffsetList (offset_list,
list->lengthrepeats,
- anchorpattern);
+ anchorpattern[0]);
/* resolve unusual distances between anchor patterns */
best_length = s_GetMostPopularPatternLength (offset_list);
@@ -4554,7 +4987,7 @@ static void s_ProcessAlignFileRawByLengthPattern (SAlignRawFilePtr afrp)
s_CreateSequencesBasedOnTokenPatterns (token_list, offset_list,
anchorpattern, afrp);
- s_LengthListFree (anchorpattern);
+ s_LengthListFree (anchorpattern[0]);
s_LengthListFree (list);
s_LineInfoFree (token_list);
}
@@ -4583,6 +5016,7 @@ extern TAlignmentFilePtr AlignmentFileNew (void)
afp->num_sequences = 0;
afp->num_organisms = 0;
afp->num_deflines = 0;
+ afp->num_segments = 0;
afp->ids = NULL;
afp->sequences = NULL;
afp->organisms = NULL;
@@ -5066,11 +5500,12 @@ s_ConvertDataToOutput
TSequenceInfoPtr sip)
{
TAlignRawSeqPtr arsp;
- int index;
- TSizeInfoPtr lengths;
- int best_length;
+ int index;
+ TSizeInfoPtr * lengths;
+ int * best_length;
TAlignmentFilePtr afp;
TLineInfoPtr lip;
+ int curr_seg;
if (afrp == NULL || sip == NULL || afrp->sequences == NULL) {
return NULL;
@@ -5082,7 +5517,7 @@ s_ConvertDataToOutput
afp->num_organisms = afrp->num_organisms;
afp->num_deflines = afrp->num_deflines;
-
+ afp->num_segments = afrp->num_segments;
afp->num_sequences = 0;
lengths = NULL;
@@ -5090,7 +5525,8 @@ s_ConvertDataToOutput
afp->num_sequences++;
}
- if (afp->num_sequences != afrp->num_organisms) {
+ if (afp->num_sequences != afrp->num_organisms
+ && afp->num_sequences / afp->num_segments != afrp->num_organisms) {
s_ReportMissingOrganismInfo (afrp->report_error,
afrp->report_error_userdata);
} else {
@@ -5146,7 +5582,25 @@ s_ConvertDataToOutput
afp->organisms [index] = strdup (lip->data);
}
+ /* we need to store length information about different segments separately */
+ lengths = (TSizeInfoPtr *) malloc (sizeof (TSizeInfoPtr) * afrp->num_segments);
+ if (lengths == NULL) {
+ AlignmentFileFree (afp);
+ return NULL;
+ }
+ best_length = (int *) malloc (sizeof (int) * afrp->num_segments);
+ if (best_length == NULL) {
+ free (lengths);
+ AlignmentFileFree (afp);
+ return NULL;
+ }
+ for (curr_seg = 0; curr_seg < afrp->num_segments; curr_seg ++) {
+ lengths [curr_seg] = NULL;
+ best_length [curr_seg] = 0;
+ }
+
/* copy in sequence data */
+ curr_seg = 0;
for (arsp = afrp->sequences, index = 0;
arsp != NULL && index < afp->num_sequences;
arsp = arsp->next, index++) {
@@ -5154,26 +5608,38 @@ s_ConvertDataToOutput
s_LineInfoMergeAndStripSpaces (arsp->sequence_data);
if (afp->sequences [index] != NULL) {
- lengths = s_AddSizeInfo (lengths, strlen (afp->sequences [index]));
+ lengths [curr_seg] = s_AddSizeInfo (lengths [curr_seg], strlen (afp->sequences [index]));
}
afp->ids [index] = strdup (arsp->id);
+ curr_seg ++;
+ if (curr_seg >= afrp->num_segments) {
+ curr_seg = 0;
+ }
+ }
+ for (curr_seg = 0; curr_seg < afrp->num_segments; curr_seg ++)
+ {
+ best_length [curr_seg] = s_GetMostPopularSize (lengths [curr_seg]);
+ if (best_length [curr_seg] == 0 && lengths [curr_seg] != NULL) {
+ best_length [curr_seg] = lengths [curr_seg]->size_value;
+ }
}
- best_length = s_GetMostPopularSize (lengths);
- if (best_length == 0 && lengths != NULL) {
- best_length = lengths->size_value;
- }
+ curr_seg = 0;
for (index = 0; index < afp->num_sequences; index++) {
if (afp->sequences [index] == NULL) {
s_ReportMissingSequenceData (afp->ids [index],
afrp->report_error,
afrp->report_error_userdata);
- } else if ((int) strlen (afp->sequences [index]) != best_length) {
- s_ReportBadSequenceLength (afp->ids [index], best_length,
+ } else if ((int) strlen (afp->sequences [index]) != best_length [curr_seg]) {
+ s_ReportBadSequenceLength (afp->ids [index], best_length [curr_seg],
strlen (afp->sequences [index]),
afrp->report_error,
afrp->report_error_userdata);
}
+ curr_seg ++;
+ if (curr_seg >= afrp->num_segments) {
+ curr_seg = 0;
+ }
}
if (afrp->expected_num_sequence > 0
@@ -5185,14 +5651,20 @@ s_ConvertDataToOutput
afrp->report_error_userdata);
}
if (afrp->expected_sequence_len > 0
- && afrp->expected_sequence_len != best_length)
+ && afrp->expected_sequence_len != best_length [0])
{
s_ReportIncorrectSequenceLength (afrp->expected_sequence_len,
- best_length,
+ best_length [0],
afrp->report_error,
afrp->report_error_userdata);
}
- s_SizeInfoFree (lengths);
+
+ free (best_length);
+ for (curr_seg = 0; curr_seg < afrp->num_segments; curr_seg ++)
+ {
+ s_SizeInfoFree (lengths [curr_seg]);
+ }
+ free (lengths);
return afp;
}
@@ -5260,6 +5732,11 @@ ReadAlignmentFile
/*
* ===========================================================================
* $Log: alnread.c,v $
+ * Revision 1.10 2004/05/20 19:40:24 bollin
+ * Made chnages to allow reading of alignments of segmented sets.
+ * Also added warnings for when organism lines may be present but improperly
+ * formatted.
+ *
* Revision 1.9 2004/03/16 21:05:15 bollin
* Added some improvements to the portion of the alignment reader that deals
* with contiguous alignments that do not have a '>' at the beginning of each
diff --git a/util/creaders/alnread.h b/util/creaders/alnread.h
index 13958a22..f6055947 100644
--- a/util/creaders/alnread.h
+++ b/util/creaders/alnread.h
@@ -2,7 +2,7 @@
#define UTIL_CREADERS___ALNREAD__H
/*
- * $Id: alnread.h,v 1.2 2004/02/05 15:43:32 bollin Exp $
+ * $Id: alnread.h,v 1.3 2004/05/20 19:39:40 bollin Exp $
*
* ===========================================================================
*
@@ -107,6 +107,7 @@ typedef struct SAlignmentFile {
int num_sequences;
int num_organisms;
int num_deflines;
+ int num_segments;
char ** ids;
char ** sequences;
char ** organisms;
@@ -140,6 +141,10 @@ extern NCBI_CREADERS_EXPORT TAlignmentFilePtr ReadAlignmentFile (
* ==========================================================================
*
* $Log: alnread.h,v $
+ * Revision 1.3 2004/05/20 19:39:40 bollin
+ * added num_segments member to SAlignmentFile structure to allow reading of
+ * alignments of segmented sets.
+ *
* Revision 1.2 2004/02/05 15:43:32 bollin
* fixed portability issue for windows function pointers
*
diff --git a/vibrant/vibutils.c b/vibrant/vibutils.c
index 7b675590..96b4cc7a 100644
--- a/vibrant/vibutils.c
+++ b/vibrant/vibutils.c
@@ -29,7 +29,7 @@
*
* Version Creation Date: 7/1/91
*
-* $Revision: 6.62 $
+* $Revision: 6.63 $
*
* File Description:
* Vibrant miscellaneous functions
@@ -37,6 +37,13 @@
* Modifications:
* --------------------------------------------------------------------------
* $Log: vibutils.c,v $
+* Revision 6.63 2004/05/28 20:10:32 sinyakov
+* WIN_MSWIN: by Yoon Choi:
+* Modified Nlm_GetOutputFileName to use Nlm_FileLengthEx instead
+* of Nlm-FileOpen/Nlm_FileClose to determine whether file exists or
+* not. The old way popped up an info box when it did not find an
+* existing file.
+*
* Revision 6.62 2004/05/04 16:34:23 shomrat
* Remove file name restrictions for MSWIN
*
@@ -5334,12 +5341,10 @@ extern Nlm_Boolean Nlm_GetOutputFileName (Nlm_CharPtr fileName, size_t maxsize,
if (GetSaveFileName (&ofn) && fileName != NULL) {
Nlm_StringNCpy_0(fileName, ofn.lpstrFile, maxsize);
AnsiToOemBuff (fileName, fileName, maxsize);
- f = Nlm_FileOpen (fileName, "r");
- if (f != NULL) {
- Nlm_FileClose (f);
+ if (Nlm_FileLengthEx(fileName) != -1) {
if (Nlm_Message (MSG_YN, "Replace existing file?") == ANS_NO) {
return FALSE;
- }
+ }
}
return TRUE;
} else {
diff --git a/vibrant/vibwndws.c b/vibrant/vibwndws.c
index 10eaffc1..5165e413 100644
--- a/vibrant/vibwndws.c
+++ b/vibrant/vibwndws.c
@@ -29,7 +29,7 @@
*
* Version Creation Date: 7/1/91
*
-* $Revision: 6.62 $
+* $Revision: 6.64 $
*
* File Description:
* Vibrant main, event loop, and window functions
@@ -37,6 +37,12 @@
* Modifications:
* --------------------------------------------------------------------------
* $Log: vibwndws.c,v $
+* Revision 6.64 2004/06/02 15:53:17 bollin
+* fixed Nlm_ProcessKeyPress for MOTIF to handle arrow keys
+*
+* Revision 6.63 2004/06/02 14:54:33 bollin
+* fixed Nlm_ProcessKeyPress to also handle arrow keys
+*
* Revision 6.62 2004/04/14 19:14:06 sinyakov
* WIN_MSWIN: support X-Windows-like -bg color command line option
*
@@ -7230,12 +7236,21 @@ extern void Nlm_KeyboardView (Nlm_KeyProc key)
static void Nlm_ProcessKeyPress (LPMSG lpMsg)
{
- Nlm_Char ch;
+ Nlm_Char ch, sp_ch;
+
+ if (keyAction == NULL) return;
- if (lpMsg->message == WM_CHAR) {
- ch = (Nlm_Char) lpMsg->wParam;
- if (keyAction != NULL) {
- keyAction (ch);
+ ch = (Nlm_Char) lpMsg->wParam;
+ if (lpMsg->message == WM_CHAR)
+ {
+ keyAction (ch);
+ }
+ else if (lpMsg->message == WM_KEYDOWN)
+ {
+ sp_ch = Nlm_KeydownToChar (ch);
+ if (sp_ch != 0)
+ {
+ keyAction (sp_ch);
}
}
}
@@ -7245,15 +7260,16 @@ static void Nlm_ProcessKeyPress (LPMSG lpMsg)
static void Nlm_ProcessKeyPress (XEvent *event)
{
- Nlm_Char buffer[2];
+ Nlm_Char ch;
- if (event->type == KeyPress && keyAction != NULL &&
- XLookupString(&event->xkey, buffer, sizeof(buffer), NULL, NULL) == 1) {
- Nlm_ctrlKey = ((event->xkey.state & ControlMask) != 0);
- Nlm_shftKey = ((event->xkey.state & ShiftMask ) != 0);
- Nlm_cmmdKey = FALSE;
- Nlm_optKey = FALSE;
- keyAction( *buffer );
+ if (event->type == KeyPress && keyAction != NULL)
+ {
+ ch = Nlm_GetInputChar (&event->xkey);
+ Nlm_ctrlKey = ((event->xkey.state & ControlMask) != 0);
+ Nlm_shftKey = ((event->xkey.state & ShiftMask ) != 0);
+ Nlm_cmmdKey = FALSE;
+ Nlm_optKey = FALSE;
+ keyAction( ch );
}
}
#endif
diff --git a/webdesign/designs/cubby/storedsearch/Templates/EditSearch.html b/webdesign/designs/cubby/storedsearch/Templates/EditSearch.html
index 06a36fed..b5416a42 100644
--- a/webdesign/designs/cubby/storedsearch/Templates/EditSearch.html
+++ b/webdesign/designs/cubby/storedsearch/Templates/EditSearch.html
@@ -1,5 +1,5 @@
<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en">
-<head><title>Edit Search</title>
+<head><title>Search Details</title>
<link rel="stylesheet" href="cubby.css" type="text/css" media="all" />
<body>
@@ -8,7 +8,7 @@
<div id="formdiv" class="mainview">
- <div id="formtitle"><span class="stitle">Edit Search</span></div>
+ <div id="formtitle"><span class="stitle">Search Details</span></div>
<div class="searchtable">
<table cellspacing="0">
@@ -82,15 +82,7 @@
</table>
</div>
- <div id="formbuttondiv">
-
- <input type="button" class="button" value="Cancel" alt="Cancel" title="Click to cancel this save">
- <input type="button" class="button" value="Run" alt="Run" title="Click to run this search">
- <input type="button" class="button" value="OK" alt="OK" title="Click to save this search"/>
- </div> <!-- End formbuttondiv -->
-
<div id="details">
- <h1>Details</h1>
<dl>
<dt>Your search</dt>
<dd>mouse AND adenylate cyclase</dd>
@@ -99,6 +91,11 @@
</dl>
</div>
+ <div id="formbuttondiv">
+ <input type="button" class="button" value="Cancel" alt="Cancel" title="Click to cancel this save">
+ <input type="button" class="button" value="OK" alt="OK" title="Click to save this search"/>
+ </div> <!-- End formbuttondiv -->
+
</div> <!-- End formdiv -->
</form>
diff --git a/webdesign/designs/cubby/storedsearch/Templates/Makefile b/webdesign/designs/cubby/storedsearch/Templates/Makefile
new file mode 100644
index 00000000..c96e02f0
--- /dev/null
+++ b/webdesign/designs/cubby/storedsearch/Templates/Makefile
@@ -0,0 +1,3 @@
+
+dist:
+ scp *.html *.css *.jpg Blueprint/* mjohnson@graceland:html/DesignReviews/CubbyEmail/v1.3
diff --git a/webdesign/designs/cubby/storedsearch/Templates/MySearches.html b/webdesign/designs/cubby/storedsearch/Templates/MySearches.html
index 4b5f303c..cc56bcd6 100644
--- a/webdesign/designs/cubby/storedsearch/Templates/MySearches.html
+++ b/webdesign/designs/cubby/storedsearch/Templates/MySearches.html
@@ -4,92 +4,95 @@
<body>
-<form method="post" action="saveSearch">
-
- <div id="formdiv" class="mainview">
-
- <div id="myformheader">
- <div id="mycolheaders">
-
- <div id="mytitle">
- <span class="selectall">
- <input type="checkbox" name="selectall" title="Select/Deselect all"/>
- </span>
- <span class="titlepos">
- My Saved Searches
- </span>
- </div>
- </div>
- </div>
-
- <div class="searchtable">
- <table cellspacing="0">
+<div id="my_searches">
- <tr class="database">
- <td colspan="4">PubMed</td>
- </tr>
+ <form method="post" action="saveSearch">
- <tr class="oddrow">
- <td class="buttons">
- <input type="checkbox" name="select" value="1" title="Select this search"/>
- </td>
- <td class="query">
- <span title="PubMed: mouse[ALL FIELDS] AND adenylate cyclase">
- <a href="EditSearch.html">mouse AND adenylate cyclase</a></span>
- </td>
- <td class="age">3 days ago</td>
- <td class="schedule">
- <a href="EditSearch.html"
- title="Updates sent every Monday. Click to change or disable schedule.">Weekly</a>
- </td>
- </tr>
+ <div id="formdiv" class="mainview">
- <tr class="evenrow">
- <td class="buttons">
- <input type="checkbox" name="select" value="1" title="Select this search"/>
- </td>
- <td class="query">
- <span title="PubMed: SARS[All Fields] AND complete[All Fields] AND (&quot;genome&quot;[MeSH Terms] OR genome[Text Word])">
- <a href="EditSearch.html">SARS complete genome</a></span>
- </td>
- <td class="age">32 days ago</td>
- <td class="schedule">
- <a href="EditSearch.html"
- title="Updates sent daily. Click to change or disable schedule.">Daily</a>
- </td>
- </tr>
-
- <tr class="database">
- <td colspan="4">OMIM</td>
- </tr>
-
- <tr class="oddrow">
- <td class="buttons">
- <input type="checkbox" name="select" value="1" title="Select this search"/>
- </td>
- <td class="query">
- <span title="OMIM: (&quot;tonsillectomy&quot;[MeSH Terms] OR tonsillectomy[Text Word]) AND (&quot;obesity&quot;[MeSH Terms] OR obesity[Text Word])">
- <a href="EditSearch.html">tonsillectomy AND obesity</a></span>
- </td>
- <td class="age">3 days ago</td>
- <td class="schedule">
- <a href="EditSearch.html"
- title="Click to schedule email updates.">No Schedule</a>
- </td>
- </td>
- </tr>
- </table>
+ <div id="myformheader">
+ <div id="mycolheaders">
+
+ <div id="mytitle">
+ <span class="selectall">
+ <input type="checkbox" name="selectall" title="Select/Deselect all"/>
+ </span>
+ <span class="titlepos">
+ My Saved Searches
+ </span>
+ </div>
</div>
- <div id="formbuttondiv">
+ <div class="searchtable">
+ <table cellspacing="0">
+
+ <tr class="database">
+ <th colspan="2" class="dbname">Search PubMed</th>
+ <th>Last Viewed</th>
+ <th>Details</th>
+ </tr>
+
+ <tr class="oddrow">
+ <td class="buttons">
+ <input type="checkbox" name="select" value="1" title="Select this search"/>
+ </td>
+ <td class="query">
+ <span title="Run PubMed: mouse[ALL FIELDS] AND adenylate cyclase">
+ <a href="PubMed.html">mouse AND adenylate cyclase</a></span>
+ </td>
+ <td class="age">3 days ago</td>
+ <td class="schedule">
+ <a href="EditSearch.html"
+ title="Updates sent every Monday. Click to change or disable schedule.">Weekly</a>
+ </td>
+ </tr>
+
+ <tr class="evenrow">
+ <td class="buttons">
+ <input type="checkbox" name="select" value="1" title="Select this search"/>
+ </td>
+ <td class="query">
+ <span title="Run PubMed: SARS[All Fields] AND complete[All Fields] AND (&quot;genome&quot;[MeSH Terms] OR genome[Text Word])">
+ <a href="PubMed.html">SARS complete genome</a></span>
+ </td>
+ <td class="age">32 days ago</td>
+ <td class="schedule">
+ <a href="EditSearch.html"
+ title="Updates sent daily. Click to change or disable schedule.">Daily</a>
+ </td>
+ </tr>
+
+ <tr class="database">
+ <th colspan="2" class="dbname">Search OMIM</th>
+ <th colspan="2"/>
+ </tr>
+
+ <tr class="oddrow">
+ <td class="buttons">
+ <input type="checkbox" name="select" value="1" title="Select this search"/>
+ </td>
+ <td class="query">
+ <span title="Run OMIM: (&quot;tonsillectomy&quot;[MeSH Terms] OR tonsillectomy[Text Word]) AND (&quot;obesity&quot;[MeSH Terms] OR obesity[Text Word])">
+ <a href="PubMed.html">tonsillectomy AND obesity</a></span>
+ </td>
+ <td class="age">3 days ago</td>
+ <td class="schedule">
+ <a href="EditSearch.html"
+ title="Click to schedule email updates.">No Schedule</a>
+ </td>
+ </td>
+ </tr>
+ </table>
+ </div>
+
+ <div id="formbuttondiv">
+
+ <input type="button" class="button" value="Delete Selected" alt="Delete selected" src="DeleteSelected.gif" title="Click to delete all selected searches"/>
+ <input type="button" class="button" value="What's New For Selected" alt="What's New For Selected" src="WhatsNewForSelected.gif" title="Click to show updates to selected searches"/>
+ </div> <!-- End formbuttons -->
+ </div> <!-- End formdiv -->
- <input type="button" class="button" value="Delete Selected" alt="Delete selected" src="DeleteSelected.gif" title="Click to delete all selected searches"/>
- <input type="button" class="button" value="What's New For Selected" alt="What's New For Selected" src="WhatsNewForSelected.gif" title="Click to show updates to selected searches"/>
- <input type="button" class="button" value="Done" alt="Done" src="done.gif" title="Click to return to previous view"/>
- </div> <!-- End formbuttons -->
- </div> <!-- End formdiv -->
-
-</form>
-
+ </form>
+</div>
</body>
</html>
diff --git a/webdesign/designs/cubby/storedsearch/Templates/PubMed.html b/webdesign/designs/cubby/storedsearch/Templates/PubMed.html
new file mode 100644
index 00000000..e632090f
--- /dev/null
+++ b/webdesign/designs/cubby/storedsearch/Templates/PubMed.html
@@ -0,0 +1,4 @@
+<map name="GraffleExport">
+</map>
+<image src="PubMed.jpg" usemap="#GraffleExport">
+</image>
diff --git a/webdesign/designs/cubby/storedsearch/Templates/WhatsNewSummary.html b/webdesign/designs/cubby/storedsearch/Templates/WhatsNewSummary.html
new file mode 100644
index 00000000..da5b7899
--- /dev/null
+++ b/webdesign/designs/cubby/storedsearch/Templates/WhatsNewSummary.html
@@ -0,0 +1,79 @@
+<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en">
+<head><title>What's New</title>
+<link rel="stylesheet" href="cubby.css" type="text/css" media="all" />
+
+<body>
+
+ <div id="whats_new_summary">
+ <form method="post" action="saveSearch">
+
+ <div id="formdiv" class="mainview">
+
+ <div id="myformheader">
+ <div id="mycolheaders">
+
+ <div id="mytitle">
+ <span class="titlepos">
+ What's New
+ </span>
+ </div>
+ </div>
+ </div>
+
+ <div class="searchtable">
+ <table cellspacing="0">
+
+ <tr class="database">
+ <td colspan="3">PubMed</td>
+ </tr>
+
+ <tr class="oddrow">
+ <td class="query">
+ <span class="querytext" title="PubMed: mouse[ALL FIELDS] AND adenylate cyclase">
+ mouse AND adenylate cyclase</span>
+ </td>
+ <td class="age">3 days ago</td>
+ <td class="schedule">
+ <a href="Canvas%203.html"
+ title="Click to view new updates.">12</a>
+ </td>
+ </tr>
+
+ <tr class="evenrow">
+ <td class="query">
+ <span class="querytext" title="PubMed: SARS[All Fields] AND complete[All Fields] AND (&quot;genome&quot;[MeSH Terms] OR genome[Text Word])">
+ SARS complete genome</span>
+ </td>
+ <td class="age">32 days ago</td>
+ <td class="schedule">
+ <a href="Canvas%203.html"
+ title="Click to view new updates.">3</a>
+ </td>
+ </tr>
+
+ <tr class="database">
+ <td colspan="3">OMIM</td>
+ </tr>
+
+ <tr class="oddrow">
+ <td class="query">
+ <span class="querytext" title="OMIM: (&quot;tonsillectomy&quot;[MeSH Terms] OR tonsillectomy[Text Word]) AND (&quot;obesity&quot;[MeSH Terms] OR obesity[Text Word])">
+ tonsillectomy AND obesity</span>
+ </td>
+ <td class="age">3 days ago</td>
+ <td class="schedule">
+ <span title="No new data since last read.">0</a>
+ </td>
+ </td>
+ </tr>
+ </table>
+ </div>
+
+ <div id="formbuttondiv">
+ <input type="button" class="button" value="Done" alt="Done" src="done.gif" title="Click to return to previous view"/>
+ </div> <!-- End formbuttons -->
+ </div> <!-- End formdiv -->
+ </form>
+ </div>
+</body>
+</html>
diff --git a/webdesign/designs/cubby/storedsearch/Templates/cubby.css b/webdesign/designs/cubby/storedsearch/Templates/cubby.css
index 580006a8..905e677c 100644
--- a/webdesign/designs/cubby/storedsearch/Templates/cubby.css
+++ b/webdesign/designs/cubby/storedsearch/Templates/cubby.css
@@ -56,14 +56,6 @@ span.selectall {
background-color: #8B9BAB;
}
-/*
-.searchtable {
- padding: 0px 5px 0px 5px;
- border-color: #808080;
- padding: 0;
-}
-*/
-
div.searchtable table {
width: 100%;
border-bottom: 2px solid #06c;
@@ -71,6 +63,30 @@ div.searchtable table {
border-right: 2px solid #06c;
}
+tr.database {
+ margin: 0px;
+ padding: 0px;
+ text-align: left;
+ vertical-align: bottom;
+ font-size: .7em;
+ font-weight: bold;
+ color: #06c;
+ background-color: #CAE1FF;
+}
+
+tr.database th {
+ vertical-align: middle;
+ padding-left: 4px;
+}
+
+div.searchtable table tr td {
+}
+
+tr.database th.dbname {
+ text-align: center;
+ padding: 8px 4px;
+}
+
.mysearchhead {
border-color: #808080;
padding: 0;
@@ -127,19 +143,6 @@ div.searchtable table {
padding: 0px 4px 0px 0px;
}
-tr.database {
- margin: 0px;
- padding: 0px;
- vertical-align: bottom;
- font-size: 10pt;
- font-weight: bold;
- background-color: #CAE1FF;
-}
-
-tr.database td {
-border-top: 2px solid #80a1ff;
-}
-
.query, .age, .schedule {
font-size: 10pt;
}
@@ -206,23 +209,18 @@ span.querytext {
}
div#details {
- margin: 2em 0em 0em 0em;
- border: 1px solid #06c;
+ margin: 0em;
+ border-style: solid;
+ border-color: #06c;
+ border-width: 0px 2px 2px 2px;
padding: 0px;
}
-div#details h1 {
- font-size: 1em;
- padding: 6px;
- color: #CAE1FF;
- background-color: #06c;
- margin: 0;
-}
-
div#details dl {
margin: 0;
padding: 1em;
background-color: #eeeee0;
+ background-color: #cae1ff;
}
div#details dl dt {