summaryrefslogtreecommitdiff
path: root/algo/blast/api
diff options
context:
space:
mode:
authorAaron M. Ucko <ucko@debian.org>2007-09-05 15:33:43 +0000
committerAaron M. Ucko <ucko@debian.org>2007-09-05 15:33:43 +0000
commit7647e504b18f91edcedba85e7a6ef772b2a0f48b (patch)
tree6152f91efe8f30174ce9d51525a458b85335f12b /algo/blast/api
parentb0629a94e882461a9d6cab18807c5f96501cf38f (diff)
[svn-upgrade] Integrating new upstream version, ncbi-tools6 (6.1.20070822)
Diffstat (limited to 'algo/blast/api')
-rw-r--r--algo/blast/api/blast_api.c151
-rw-r--r--algo/blast/api/blast_api.h26
-rw-r--r--algo/blast/api/blast_format.c16
-rw-r--r--algo/blast/api/blast_input.c517
-rw-r--r--algo/blast/api/blast_input.h62
-rw-r--r--algo/blast/api/blast_options_api.c28
-rw-r--r--algo/blast/api/blast_options_api.h4
-rw-r--r--algo/blast/api/blast_returns.c26
-rw-r--r--algo/blast/api/blast_returns.h4
-rw-r--r--algo/blast/api/blast_seq.c23
-rw-r--r--algo/blast/api/blast_seq.h3
-rw-r--r--algo/blast/api/blast_seqalign.c14
-rw-r--r--algo/blast/api/blast_tabular.c19
-rw-r--r--algo/blast/api/dust_filter.c87
-rw-r--r--algo/blast/api/hspstream_queue.c8
-rw-r--r--algo/blast/api/repeats_filter.c10
-rw-r--r--algo/blast/api/seqsrc_multiseq.c36
-rw-r--r--algo/blast/api/seqsrc_multiseq.h4
-rw-r--r--algo/blast/api/seqsrc_readdb.c52
-rw-r--r--algo/blast/api/twoseq_api.c18
20 files changed, 975 insertions, 133 deletions
diff --git a/algo/blast/api/blast_api.c b/algo/blast/api/blast_api.c
index 67d64e4d..a088090e 100644
--- a/algo/blast/api/blast_api.c
+++ b/algo/blast/api/blast_api.c
@@ -1,4 +1,4 @@
-/* $Id: blast_api.c,v 1.41 2006/09/15 13:12:43 madden Exp $
+/* $Id: blast_api.c,v 1.50 2007/07/27 18:02:26 papadopo Exp $
***************************************************************************
* *
* COPYRIGHT NOTICE *
@@ -44,6 +44,7 @@
#include <algo/blast/core/blast_traceback.h>
#include <algo/blast/core/hspstream_collector.h>
#include <algo/blast/core/phi_lookup.h>
+#include <algo/blast/core/blast_psi.h>
#include <algo/blast/api/hspstream_queue.h>
#include <algo/blast/api/blast_mtlock.h>
#include <algo/blast/api/blast_prelim.h>
@@ -53,6 +54,7 @@
#include <algo/blast/api/blast_seqalign.h>
#include <algo/blast/api/dust_filter.h>
#include <algo/blast/api/blast_message_api.h>
+#include <algo/blast/core/gencode_singleton.h>
/** @addtogroup CToolkitAlgoBlast
*
@@ -273,10 +275,6 @@ s_BlastHSPStreamSetUp(BLAST_SequenceBlk* query, BlastQueryInfo* query_info,
if (!tf_data) {
const Int4 kNumResults = query_info->num_queries;
- /* Results in the collector stream should be sorted only for a
- database search. The latter is true if and only if the sequence
- source has non-zero database length. */
- const Boolean kSortOnRead = (BlastSeqSrcGetTotLen(seq_src) != 0);
SBlastHitsParameters* blasthit_params=NULL;
MT_LOCK lock = NULL;
if (options->num_cpus > 1)
@@ -286,7 +284,7 @@ s_BlastHSPStreamSetUp(BLAST_SequenceBlk* query, BlastQueryInfo* query_info,
options->score_options, &blasthit_params);
*hsp_stream =
Blast_HSPListCollectorInitMT(options->program, blasthit_params,
- kNumResults, kSortOnRead, lock);
+ kNumResults, lock);
} else {
/* Initialize the queue HSP stream for tabular formatting. */
*hsp_stream = Blast_HSPListQueueInit();
@@ -335,6 +333,8 @@ s_BlastThreadManager(BLAST_SequenceBlk* query, BlastQueryInfo* query_info,
ASSERT(query && query_info && seq_src && lookup_wrap && sbp &&
hsp_stream && extra_returns);
+ BlastSeqSrcResetChunkIterator((BlastSeqSrc*) seq_src);
+
/* Start the formatting thread */
if(tf_data && NlmThreadsAvailable() &&
(format_thread =
@@ -374,7 +374,8 @@ s_BlastThreadManager(BLAST_SequenceBlk* query, BlastQueryInfo* query_info,
SPHIPatternSearchBlk* pattern_blk = NULL;
if (Blast_ProgramIsPhiBlast(kProgram)) {
pattern_blk = (SPHIPatternSearchBlk*) lookup_wrap->lut;
- pattern_blk->num_patterns_db = diagnostics->ungapped_stat->lookup_hits;
+ pattern_blk->num_patterns_db =
+ (Int4)diagnostics->ungapped_stat->lookup_hits;
}
if ((status = Blast_RunTracebackSearch(kProgram, query,
@@ -500,15 +501,79 @@ s_BlastFindMatrixPath(const char* matrix_name, Boolean is_prot)
return NULL;
}
-Int2
-Blast_RunSearch(SeqLoc* query_seqloc,
- const BlastSeqSrc* seq_src,
- SeqLoc* masking_locs,
- const SBlastOptions* options,
- BlastTabularFormatData* tf_data,
- BlastHSPResults **results,
- SeqLoc** filter_out,
- Blast_SummaryReturn* extra_returns)
+
+/**
+ * Read a checkpoint file and set the necessary structures in a
+ * BlastScoreBlk: the psi_matrix, kbp_psi[0], and kbp_gap_psi[0].
+ *
+ * @param sbp a BlastScoreBlk to receive a PSSM [in/out]
+ * @param query query sequence data
+ * @param psi_matrix_file checkpoint file to read
+ * @pcore_msg a pointer to receive error and warning messages
+ */
+static int
+s_SetupScoreBlkPssmFromChkpt(BlastScoreBlk * sbp,
+ BLAST_SequenceBlk * query,
+ Blast_PsiCheckpointLoc * psi_checkpoint,
+ Blast_Message* *pcore_msg)
+{
+ int status = 0;
+ /* An intermediate representation of the PSSM data that is used
+ in PSIBlast routines */
+ PSIMatrix * pssm = NULL;
+ /* The actual PSSM that is saved in the BlastScoreBlk */
+ SPsiBlastScoreMatrix * psi_matrix = NULL;
+ size_t i, j;
+
+ psi_matrix = SPsiBlastScoreMatrixNew(query->length);
+ if (!psi_matrix) {
+ ErrPostEx(SEV_FATAL, 1, 0,
+ "Out-of-memory: cannot allocate a PSSM of length %d.\n",
+ query->length);
+ status = -1;
+ goto error_return;
+ }
+ status = Blast_PosReadCheckpoint(psi_matrix->freq_ratios,
+ query->length, query->sequence,
+ psi_checkpoint,
+ pcore_msg);
+ if (status != 0) {
+ goto error_return;
+ }
+ Blast_KarlinBlkCopy(psi_matrix->kbp, sbp->kbp_gap_std[0]);
+ status = PSICreatePssmFromFrequencyRatios(query->sequence,
+ query->length, sbp,
+ psi_matrix->freq_ratios,
+ kPSSM_NoImpalaScaling,
+ &pssm);
+ if (0 != status) {
+ goto error_return;
+ }
+ for (i = 0; i < psi_matrix->pssm->ncols; i++) {
+ for (j = 0; j < psi_matrix->pssm->nrows; j++) {
+ psi_matrix->pssm->data[i][j] = pssm->pssm[i][j];
+ }
+ }
+ PSIMatrixFree(pssm);
+ sbp->psi_matrix = psi_matrix;
+ return 0;
+error_return:
+ if (psi_matrix)
+ SPsiBlastScoreMatrixFree(psi_matrix);
+ return status;
+}
+
+
+Int2
+Blast_RunSearch(SeqLoc* query_seqloc,
+ Blast_PsiCheckpointLoc * psi_checkpoint,
+ const BlastSeqSrc* seq_src,
+ SeqLoc* masking_locs,
+ const SBlastOptions* options,
+ BlastTabularFormatData* tf_data,
+ BlastHSPResults **results,
+ SeqLoc** filter_out,
+ Blast_SummaryReturn* extra_returns)
{
Int2 status = 0;
BLAST_SequenceBlk *query = NULL;
@@ -600,6 +665,20 @@ Blast_RunSearch(SeqLoc* query_seqloc,
if (status)
return status;
+ if (psi_checkpoint) {
+ core_msg = NULL;
+ status = s_SetupScoreBlkPssmFromChkpt(sbp, query, psi_checkpoint,
+ &core_msg);
+ if (core_msg) {
+ extra_returns->error =
+ Blast_MessageToSBlastMessage(core_msg, query_seqloc,
+ query_info,
+ options->believe_query);
+ core_msg = Blast_MessageFree(core_msg);
+ }
+ if (status)
+ return status;
+ }
if (filter_out) {
*filter_out =
BlastMaskLocToSeqLoc(kProgram, mask_loc, query_seqloc);
@@ -687,8 +766,10 @@ Blast_RunSearch(SeqLoc* query_seqloc,
return status;
}
-Int2
-Blast_DatabaseSearch(SeqLoc* query_seqloc, char* db_name,
+Int2
+Blast_DatabaseSearch(SeqLoc* query_seqloc,
+ Blast_PsiCheckpointLoc * psi_checkpoint,
+ char* db_name,
SeqLoc* masking_locs,
const SBlastOptions* options,
BlastTabularFormatData* tf_data,
@@ -733,9 +814,10 @@ Blast_DatabaseSearch(SeqLoc* query_seqloc, char* db_name,
if (extra_returns->error)
return -1;
- status =
- Blast_RunSearch(query_seqloc, seq_src, masking_locs, options, tf_data,
- &results, filter_out, extra_returns);
+ status =
+ Blast_RunSearch(query_seqloc, psi_checkpoint, seq_src,
+ masking_locs, options, tf_data, &results,
+ filter_out, extra_returns);
/* The ReadDBFILE structure will not be destroyed here, because the
initialising function used readdb_attach */
@@ -852,8 +934,10 @@ PHIBlastRunSearch(SeqLoc* query_seqloc, char* db_name, SeqLoc* masking_locs,
/* Masking at hash and on-the-fly tabular output are not applicable for
PHI BLAST, so pass NULL in corresponding arguments. */
status =
- Blast_RunSearch(query_seqloc, seq_src, masking_locs, options, NULL,
- &results, filter_out, extra_returns);
+ Blast_RunSearch(query_seqloc, (Blast_PsiCheckpointLoc *) NULL,
+ seq_src, masking_locs, options,
+ (BlastTabularFormatData*) NULL, &results,
+ filter_out, extra_returns);
/* The ReadDBFILE structure will not be destroyed here, because the
initialising function used readdb_attach */
@@ -909,9 +993,10 @@ Blast_TwoSeqLocSetsAdvanced(SeqLoc* query_seqloc,
if (extra_returns->error)
return -1;
- status =
- Blast_RunSearch(query_seqloc, seq_src, masking_locs, options, tf_data,
- &results, filter_out, extra_returns);
+ status =
+ Blast_RunSearch(query_seqloc, (Blast_PsiCheckpointLoc *) NULL,
+ seq_src, masking_locs, options, tf_data,
+ &results, filter_out, extra_returns);
/* The ReadDBFILE structure will not be destroyed here, because the
initialising function used readdb_attach */
@@ -932,6 +1017,20 @@ Blast_TwoSeqLocSetsAdvanced(SeqLoc* query_seqloc,
return status;
}
+void GeneticCodeSingletonInit()
+{
+ Uint1* gc = NULL;
+ GenCodeSingletonInit();
+ BLAST_GeneticCodeFind(BLAST_GENETIC_CODE, &gc);
+ GenCodeSingletonAdd(BLAST_GENETIC_CODE, gc);
+ free(gc);
+}
+
+void GeneticCodeSingletonFini()
+{
+ GenCodeSingletonFini();
+}
+
/* @} */
diff --git a/algo/blast/api/blast_api.h b/algo/blast/api/blast_api.h
index 25c76fb1..2957ec9c 100644
--- a/algo/blast/api/blast_api.h
+++ b/algo/blast/api/blast_api.h
@@ -1,4 +1,4 @@
-/* $Id: blast_api.h,v 1.7 2006/01/13 15:58:31 madden Exp $
+/* $Id: blast_api.h,v 1.10 2007/03/20 14:56:21 camacho Exp $
***************************************************************************
* *
* COPYRIGHT NOTICE *
@@ -47,15 +47,27 @@ extern "C" {
#include <algo/blast/api/blast_tabular.h>
#include <algo/blast/api/blast_options_api.h>
#include <algo/blast/api/blast_seqalign.h>
+#include <algo/blast/api/blast_input.h>
/** @addtogroup CToolkitAlgoBlast
*
* @{
*/
+/** Initialize the genetic code singleton.
+ * This function must be called before running any BLAST search with the new
+ * engine */
+void GeneticCodeSingletonInit();
+
+/** Uinitialize the genetic code singleton.
+ * This function must be called after running any BLAST search with the new
+ * engine */
+void GeneticCodeSingletonFini();
+
/** Compares a list of SeqLoc's against a BLAST database using the
* BLAST algorithm.
* @param query_seqloc List of query Seq-loc's [in]
+ * @param psi_matrix_file a checkpoint file for PSSM searches [in]
* @param db_name Name of a BLAST database to search [in]
* @param masking_locs Locations in the queries that should be masked [in]
* @param options Search options [in]
@@ -64,8 +76,10 @@ extern "C" {
* @param filter_out Filtering locations [out]
* @param extra_returns Additional information about the search [out]
*/
-Int2
-Blast_DatabaseSearch(SeqLoc* query_seqloc, char* db_name,
+Int2
+Blast_DatabaseSearch(SeqLoc* query_seqloc,
+ Blast_PsiCheckpointLoc * psi_checkpoint,
+ char* db_name,
SeqLoc* masking_locs,
const SBlastOptions* options,
BlastTabularFormatData* tf_data,
@@ -96,6 +110,7 @@ Blast_TwoSeqLocSetsAdvanced(SeqLoc* query_seqloc,
/** Compare a list of query SeqLoc's against a source of subject sequences.
* @param query_seqloc List of query sequences locations [in]
+ * @param psi_matrix_file a checkpoint file for PSSM searches [in]
* @param seq_src Source of subject sequences [in]
* @param masking_locs Locations where query sequences should be masked. [in]
* @param options Search options [in]
@@ -106,8 +121,9 @@ Blast_TwoSeqLocSetsAdvanced(SeqLoc* query_seqloc,
* @param extra_returns Additional search statistits [out]
* @return 0 on success, -1 on failure.
*/
-Int2
-Blast_RunSearch(SeqLoc* query_seqloc,
+Int2
+Blast_RunSearch(SeqLoc* query_seqloc,
+ Blast_PsiCheckpointLoc * psi_checkpoint,
const BlastSeqSrc* seq_src,
SeqLoc* masking_locs,
const SBlastOptions* options,
diff --git a/algo/blast/api/blast_format.c b/algo/blast/api/blast_format.c
index 3d4ff10c..20b1f643 100644
--- a/algo/blast/api/blast_format.c
+++ b/algo/blast/api/blast_format.c
@@ -1,4 +1,4 @@
-/* $Id: blast_format.c,v 1.112 2006/10/06 12:22:28 madden Exp $
+/* $Id: blast_format.c,v 1.114 2007/01/19 14:32:31 madden Exp $
* ===========================================================================
*
* PUBLIC DOMAIN NOTICE
@@ -32,7 +32,7 @@
*/
#ifndef SKIP_DOXYGEN_PROCESSING
-static char const rcsid[] = "$Id: blast_format.c,v 1.112 2006/10/06 12:22:28 madden Exp $";
+static char const rcsid[] = "$Id: blast_format.c,v 1.114 2007/01/19 14:32:31 madden Exp $";
#endif /* SKIP_DOXYGEN_PROCESSING */
#include <algo/blast/api/blast_format.h>
@@ -483,7 +483,10 @@ Int2 BlastFormattingInfoNew(EAlignView align_view,
if (align_view != eAlignViewXml && align_view < eAlignViewAsnText) {
FILE* outfp;
if ((outfp = FileOpen(outfile_name, "w")) == NULL)
- return -1;
+ {
+ ErrPostEx(SEV_WARNING, 0, 0, "Unable to open output file %s:", outfile_name);
+ return -1;
+ }
info->outfp = outfp;
} else {
char write_mode[3];
@@ -497,7 +500,10 @@ Int2 BlastFormattingInfoNew(EAlignView align_view,
strcpy(write_mode, "wb");
if ((info->aip = AsnIoOpen(filename_copy, write_mode)) == NULL)
- return -1;
+ {
+ ErrPostEx(SEV_WARNING, 0, 0, "Unable to open output file %s:", filename_copy);
+ return -1;
+ }
sfree(filename_copy);
}
info->is_seqalign_null = TRUE; /* will be updated in BLAST_FormatResults */
@@ -597,7 +603,7 @@ GetOldAlignType(EBlastProgramType prog, Boolean* db_is_na)
align_type = 3;
*db_is_na = FALSE;
break;
- case eBlastTypeTblastn:
+ case eBlastTypePsiTblastn: case eBlastTypeTblastn:
align_type = 4;
*db_is_na = TRUE;
break;
diff --git a/algo/blast/api/blast_input.c b/algo/blast/api/blast_input.c
index 6c9e50f1..364d1dfc 100644
--- a/algo/blast/api/blast_input.c
+++ b/algo/blast/api/blast_input.c
@@ -1,5 +1,5 @@
#ifndef SKIP_DOXYGEN_PROCESSING
-static char const rcsid[] = "$Id: blast_input.c,v 1.25 2006/05/24 21:17:50 camacho Exp $";
+static char const rcsid[] = "$Id: blast_input.c,v 1.30 2007/05/07 13:27:15 kans Exp $";
#endif /* SKIP_DOXYGEN_PROCESSING */
/* ===========================================================================
*
@@ -37,6 +37,9 @@ static char const rcsid[] = "$Id: blast_input.c,v 1.25 2006/05/24 21:17:50 camac
#include <algo/blast/api/blast_seq.h>
#include <algo/blast/core/blast_filter.h>
#include <algo/blast/core/blast_setup.h>
+#include <algo/blast/core/blast_posit.h>
+#include <objscoremat.h>
+#include <asn.h>
/** @addtogroup CToolkitAlgoBlast
*
@@ -178,5 +181,517 @@ BLAST_GetQuerySeqLoc(FILE *infp, Boolean query_is_na, Uint1 strand,
return total_length;
}
+
+
+/**
+ * Read the query data stored in a PSI-BLAST checkpoint file.
+ *
+ * @param *pold_query_length length of the old query, as stored in the
+ * checkpoint file
+ * @param old_query a buffer of length at least query_length
+ * to hold the query data from the checkpoint
+ * file
+ * @param query_length the expected query length, and the length
+ * of the old_query buffer
+ * @param file the checkpoint file
+ * @return the number of query characters read,
+ * <= the value of the query_length parameter.
+ */
+static int
+s_GetOldQueryFromCheckpoint(Int4 * pold_query_length, Uint1 * old_query,
+ int query_length, FILE * file)
+{
+ int i;
+ int num_read; /* number of query characters read from disk */
+
+ ASSERT(query_length > 0);
+ if (1 != fread(pold_query_length, sizeof(Int4), 1, file)) {
+ *pold_query_length = 0;
+ return 0;
+ }
+ /* Read at most query_length bytes, no matter what the value of
+ * *pold_query_length is */
+ num_read = (int)fread(old_query, sizeof(Uint1), query_length, file);
+ for (i = 0; i < num_read; i++) {
+ old_query[i] = AMINOACID_TO_NCBISTDAA[old_query[i]];
+ }
+ return num_read;
+}
+
+
+/**
+ * Compare the query to another sequence, typically a query stored in
+ * a PSI-BLAST checkpoint file. Warn if they don't match.
+ * @param query one query to be compared
+ * @param query_length the length of query and old_query
+ * @param old_query the other query
+ * @return 0 if the sequences match, -1 otherwise */
+static Int4
+s_ValidateOldQuery(const Uint1 query[], int query_length,
+ const Uint1 old_query[], int old_query_length)
+{
+ int query_index;
+ /* Value for the X ambiguity character */
+ enum { eXchar = 21 };
+
+ if (query_length != old_query_length) {
+ ErrPostEx(SEV_WARNING, 0, 0, "Invalid usage of checkpoint recovery; "
+ "old query has length %ld, new query has length %ld",
+ (long) old_query_length, (long) query_length);
+ return -1;
+ }
+ for (query_index = 0; query_index < query_length; query_index++) {
+ if (old_query[query_index] != query[query_index]) {
+ char old_char = NCBISTDAA_TO_AMINOACID[old_query[query_index]];
+ char new_char = NCBISTDAA_TO_AMINOACID[query[query_index]];
+
+ if (old_query[query_index] != eXchar) {
+ if (query[query_index] == eXchar) {
+ ErrPostEx(SEV_WARNING, 0, 0,
+ "\nStored query has a %c at position "
+ "%d, while new query has a %c there.\n%c "
+ "appears in query sequence: The query "
+ "could be filtered. Run with \"-F F\" "
+ "option to turn the filter off.",
+ old_char, query_index, new_char, new_char);
+ } else {
+ ErrPostEx(SEV_WARNING, 0, 0,
+ "Stored query has a %c at position %d, "
+ "while new query has a %c there.",
+ old_char, query_index, new_char);
+ }
+ return -1;
+ } else { /* old_query[c] == eXchar */
+ ErrPostEx(SEV_WARNING, 0, 0,
+ "Stored query has a %c at position %d, "
+ "while new query has a %c there\n%c appears "
+ "in the stored query: The stored query may be "
+ "filtered. Run blastpgp with \"-F F\" option "
+ "to turn the filter off.",
+ old_char, query_index, new_char, old_char);
+ /* This is only a warning */
+ }
+ }
+ }
+ return 0;
+}
+
+
+/**
+ * Read frequency ratios from a PSI-BLAST checkpoint file; read the
+ * query data for the file before calling this routine.
+ *
+ * @param freq_ratios the frequency ratios [out]
+ * @param query_length the length of the query
+ * @param file file to read
+ *
+ * @return 0 on sucess, nonzero on error */
+static int
+s_PosReadFreqRatios(double ** freq_ratios,
+ int qlength,
+ FILE * file)
+{
+ int query_index; /* Query position (column) in the PSSM */
+ int stdaa_index; /* Index in the ncbi_stdaa alphabet */
+ int trueaa_index; /* Index in the ARND... alphabet of true
+ amino acids */
+ /* conversion from 28 letter NCBIstdaa alphabet to 20 letter order
+ * for true amino acids: ARNDCQEGHILKMFPSTWYV. */
+ static int alphaConvert[BLASTAA_SIZE] =
+ {(-1), 0, (-1), 4, 3, 6, 13, 7, 8, 9, 11, 10, 12, 2, 14, 5, 1, 15,
+ 16, 19, 17, (-1), 18, (-1), (-1), (-1), (-1), (-1)};
+ /* Buffer to hold frequency data in ARND... order */
+ double trueaa_buffer[PRO_TRUE_ALPHABET_SIZE];
+ int num_read; /* Number of items retrieved by a call to read */
+
+ for (query_index = 0; query_index < qlength; query_index++) {
+ num_read = (int)fread(trueaa_buffer, sizeof(double),
+ PRO_TRUE_ALPHABET_SIZE, file);
+ if (num_read < PRO_TRUE_ALPHABET_SIZE)
+ return -1;
+ for (stdaa_index = 0; stdaa_index < BLASTAA_SIZE; stdaa_index++) {
+ trueaa_index = alphaConvert[stdaa_index];
+ if (trueaa_index < 0) {
+ freq_ratios[query_index][stdaa_index] = 0.0;
+ } else {
+ freq_ratios[query_index][stdaa_index] =
+ trueaa_buffer[trueaa_index];
+ }
+ }
+ }
+ return 0;
+}
+
+
+/**
+ * Read frequency ratios from a standard format PSI-BLAST checkpoint file.
+ *
+ * @param freq_ratios the frequency ratios
+ * @param query_length the length of the query, and second dimension of
+ * freq_ratios
+ * @param query query sequence data
+ * @param file an open file to be read
+ * @param blast_msg a pointer to hold BLAST warnings.
+ *
+ * @return 0 on success, nonzero otherwise
+ */
+static int
+s_PosReadStdCheckpointFile(double ** freq_ratios,
+ int query_length,
+ const Uint1 * query,
+ FILE * file,
+ Blast_Message* *blast_msg)
+{
+ int chkpt_query_length = 0; /* Length of the query saved in the
+ checkpoint file */
+ int num_read = 0; /* number if query characters actually read from
+ the checkpoint file */
+ int status = 0; /* error status */
+ /* Buffer to hold the query from the checkpoint file */
+ Uint1 * chkpt_query = NULL;
+
+ chkpt_query = calloc(query_length, sizeof(Uint1));
+ if (NULL != chkpt_query) {
+ num_read = s_GetOldQueryFromCheckpoint(&chkpt_query_length,
+ chkpt_query, query_length,
+ file);
+ }
+ if (NULL == chkpt_query || num_read < chkpt_query_length) {
+ Blast_MessageWrite(blast_msg, eBlastSevFatal,
+ kBlastMessageNoContext,
+ "Blast_PosReadCheckpoint: "
+ "Failed to reconstruct previous query\n");
+ goto error_return;
+ }
+ status = s_ValidateOldQuery(query, query_length,
+ chkpt_query, chkpt_query_length);
+ free(chkpt_query);
+
+ if (0 != status)
+ goto error_return;
+
+ status = s_PosReadFreqRatios(freq_ratios, query_length, file);
+ if (0 != status)
+ goto error_return;
+
+ return 0;
+error_return:
+ Blast_MessageWrite(blast_msg, eBlastSevFatal,
+ kBlastMessageNoContext,
+ "Blast_PosReadCheckpoint: "
+ "Failed to recover data\n");
+ return -1;
+}
+
+
+/**
+ * Read frequency ratios from a standard format PSI-BLAST checkpoint file
+ * of the given name.
+ *
+ * @param freq_ratios the frequency ratios
+ * @param query_length the length of the query, and second dimension of
+ * freq_ratios
+ * @param query query sequence data
+ * @param file the name of the file to be read
+ * @param blast_msg a pointer to hold BLAST warnings.
+ *
+ * @return 0 on success, nonzero otherwise
+ */
+static int
+s_PosReadStdCheckpoint(Nlm_FloatHi ** freq_ratios,
+ int qlength,
+ const Uint1 * query,
+ const char * filename,
+ Blast_Message* *blast_msg)
+{
+ FILE * file = fopen(filename, "rb");
+ int status, close_status;
+
+ if (!file) {
+ Blast_MessageWrite(blast_msg, eBlastSevFatal,
+ kBlastMessageNoContext,
+ "Blast_PosReadCheckpointFile: "
+ "Could not open checkpoint file\n");
+ return -1;
+ }
+ status = s_PosReadStdCheckpointFile(freq_ratios, qlength, query,
+ file, blast_msg);
+ close_status = fclose(file);
+
+ return (0 == status) ? close_status : status;
+}
+
+
+static int
+s_PosReadAsnCheckpoint(double ** freq_ratios,
+ int query_length,
+ const Uint1 query[],
+ char fileName[],
+ int is_ascii_scoremat)
+{
+ AsnIoPtr infile = NULL;
+ PssmWithParametersPtr scoremat = NULL;
+ PssmPtr pssm = NULL;
+ PssmIntermediateDataPtr freqs = NULL;
+ ValNodePtr freq_list;
+ Bioseq *bsp;
+ int i, j, c;
+ enum { eXchar = 21 };
+
+ if (is_ascii_scoremat) {
+ infile = AsnIoOpen(fileName, "r");
+ } else {
+ infile = AsnIoOpen(fileName, "rb");
+ }
+ if (infile == NULL) {
+ ErrPostEx(SEV_WARNING, 0, 0,"Could not open scoremat file\n");
+ return -1;
+ }
+
+ scoremat = PssmWithParametersAsnRead(infile, NULL);
+ AsnIoClose(infile);
+ if (scoremat == NULL) {
+ ErrPostEx(SEV_WARNING, 0, 0,
+ "Could not read scoremat from input file\n");
+ return 1;
+ }
+ pssm = scoremat->pssm;
+ if (pssm == NULL) {
+ ErrPostEx(SEV_WARNING, 0, 0,"Scoremat is empty\n");
+ PssmWithParametersFree(scoremat);
+ return -1;
+ }
+ freqs = pssm->intermediateData;
+ if (freqs == NULL) {
+ ErrPostEx(SEV_WARNING, 0, 0,
+ "Scoremat doesn't contain intermediate data\n");
+ PssmWithParametersFree(scoremat);
+ return -1;
+ }
+ if (freqs->freqRatios == NULL) {
+ ErrPostEx(SEV_WARNING, 0, 0,
+ "Scoremat does not contain frequency ratios\n");
+ PssmWithParametersFree(scoremat);
+ return -1;
+ }
+ if (pssm->numRows != BLASTAA_SIZE) {
+ ErrPostEx(SEV_WARNING, 0, 0, "Wrong alphabet size of %d in "
+ "input scoremat\n", pssm->numRows);
+ PssmWithParametersFree(scoremat);
+ return -1;
+ }
+ if (!pssm->query || !pssm->query->data.ptrvalue) {
+ ErrPostEx(SEV_WARNING, 0, 0,
+ "Missing sequence data in input scoremat\n");
+ PssmWithParametersFree(scoremat);
+ return -1;
+ }
+ bsp = (Bioseq *)(pssm->query->data.ptrvalue);
+ if (pssm->numColumns != bsp->length) {
+ ErrPostEx(SEV_WARNING, 0, 0, "Different sequence lengths "
+ "(%d and %d) in input scoremat\n", pssm->numColumns,
+ bsp->length);
+ PssmWithParametersFree(scoremat);
+ return -1;
+ }
+ if (pssm->numColumns != query_length) {
+ ErrPostEx(SEV_WARNING, 0, 0, "Scoremat sequence length "
+ "(%d) does not match query length (%d)\n",
+ pssm->numColumns, query_length);
+ PssmWithParametersFree(scoremat);
+ return -1;
+ }
+ if (!bsp->seq_data || !ISA_aa(bsp->mol)) {
+ ErrPostEx(SEV_WARNING, 0, 0,
+ "Sequence within checkpoint file has no data or is "
+ "not protein\n");
+ PssmWithParametersFree(scoremat);
+ return -1;
+ }
+
+ if (bsp->seq_data_type == Seq_code_gap) {
+ ErrPostEx(SEV_WARNING, 0, 0,"Seq_code_gap passed to s_PosReadAsnCheckpoint\n");
+ return -1;
+ }
+
+ BSSeek((ByteStorePtr) bsp->seq_data, 0, SEEK_SET);
+
+ /* Convert sequence data into Seq_code_ncbistdaa */
+ if (bsp->seq_data_type != Seq_code_ncbistdaa) {
+
+ ByteStore* new_byte_store = BSConvertSeq((ByteStorePtr) bsp->seq_data,
+ Seq_code_ncbistdaa,
+ bsp->seq_data_type,
+ bsp->length);
+ if ( !new_byte_store ) {
+ ErrPostEx(SEV_FATAL, 1, 0, "Failed to convert Bioseq in ASN.1"
+ " PSSM to Seq_code_ncbistdaa");
+ }
+ bsp->seq_data = (SeqDataPtr) new_byte_store;
+ bsp->seq_data_type = Seq_code_ncbistdaa;
+ BSSeek((ByteStorePtr) bsp->seq_data, 0, SEEK_SET);
+ }
+
+ /* verify the input query is the same as the sequence
+ within the checkpoint file */
+
+ for (i = 0; i < query_length; i++) {
+ c = BSGetByte((ByteStorePtr) bsp->seq_data);
+ if (c == EOF) {
+ ErrPostEx(SEV_WARNING, 0, 0, "Premature end of sequence data\n");
+ PssmWithParametersFree(scoremat);
+ return -1;
+ }
+ if (c != query[i]) {
+ char old_char = NCBISTDAA_TO_AMINOACID[query[i]];
+ char new_char = NCBISTDAA_TO_AMINOACID[c];
+ if (query[i] == eXchar) {
+ ErrPostEx(SEV_WARNING, 0, 0,
+ "Query sequence contains '%c' at position %d; "
+ "if filtering was used, rerun the search with "
+ "filtering turned off ('-F F')\n", old_char, i);
+ }
+ else {
+ ErrPostEx(SEV_WARNING, 0, 0,
+ "Query sequence contains '%c' at position %d, "
+ "while sequence withing checkpoint file contains "
+ "'%c' at this position\n",
+ old_char, i, new_char);
+ }
+ PssmWithParametersFree(scoremat);
+ return -1;
+ }
+ }
+
+ /* Read in the frequency ratios, verify they fall
+ in the correct range, and verify that the linked list
+ of residue frequencies is exactly as long as it should be */
+
+ freq_list = freqs->freqRatios;
+ /* This is bad */
+ if (pssm->byRow == FALSE) {
+ j = 0;
+ for (i = 0; i < pssm->numColumns; i++) {
+ for (j = 0; j < pssm->numRows; j++) {
+ if (freq_list == NULL)
+ break;
+ freq_ratios[i][j] = freq_list->data.realvalue;
+
+ if (freq_ratios[i][j] < 0.0) {
+ ErrPostEx(SEV_WARNING, 0, 0, "position frequency (%d,%d) "
+ "out of bounds\n", i, j);
+ PssmWithParametersFree(scoremat);
+ return -1;
+ }
+ freq_list = freq_list->next;
+ }
+ if (j < pssm->numRows)
+ break;
+ }
+ } else {
+ i = 0;
+ for (j = 0; j < pssm->numRows; j++) {
+ for (i = 0; i < pssm->numColumns; i++) {
+ if (freq_list == NULL)
+ break;
+ freq_ratios[i][j] = freq_list->data.realvalue;
+
+ if (freq_ratios[i][j] < 0.0) {
+ ErrPostEx(SEV_WARNING, 0, 0, "position frequency (%d,%d) "
+ "out of bounds\n", i, j);
+ PssmWithParametersFree(scoremat);
+ return -1;
+ }
+ freq_list = freq_list->next;
+ }
+ if (i < pssm->numColumns)
+ break;
+ }
+ }
+ if (i < pssm->numColumns || j < pssm->numRows) {
+ ErrPostEx(SEV_WARNING, 0, 0, "Not enough frequency "
+ "ratios in input scoremat\n");
+ PssmWithParametersFree(scoremat);
+ return -1;
+ }
+ if (freq_list != NULL) {
+ ErrPostEx(SEV_WARNING, 0, 0, "Too many frequency "
+ "ratios in input scoremat\n");
+ PssmWithParametersFree(scoremat);
+ return -1;
+ }
+ PssmWithParametersFree(scoremat);
+ return 0;
+}
+
+
+Blast_PsiCheckpointLoc *
+Blast_PsiCheckpointLocNew(EPsiCheckpointType checkpoint_type,
+ char * filename)
+{
+ Blast_PsiCheckpointLoc * psi_checkpoint =
+ malloc(sizeof(Blast_PsiCheckpointLoc));
+ if (psi_checkpoint) {
+ size_t length_filename = strlen(filename);
+ psi_checkpoint->filename = calloc(length_filename + 1, sizeof(char));
+ if (!psi_checkpoint->filename) {
+ free(psi_checkpoint);
+ psi_checkpoint = NULL;
+ }
+ memcpy(psi_checkpoint->filename, filename, length_filename + 1);
+ psi_checkpoint->checkpoint_type = checkpoint_type;
+ }
+ return psi_checkpoint;
+}
+
+
+void
+Blast_PsiCheckpointLocFree(Blast_PsiCheckpointLoc ** ppsi_checkpoint)
+{
+ Blast_PsiCheckpointLoc * psi_checkpoint = *ppsi_checkpoint;
+ if (psi_checkpoint) {
+ if (psi_checkpoint->filename) {
+ free(psi_checkpoint->filename);
+ }
+ free(psi_checkpoint);
+ }
+ *ppsi_checkpoint = NULL;
+}
+
+
+int
+Blast_PosReadCheckpoint(double ** freq_ratios,
+ int query_length,
+ const Uint1 * query,
+ Blast_PsiCheckpointLoc * psi_checkpoint,
+ Blast_Message* *blast_msg)
+{
+ int status;
+
+ switch(psi_checkpoint->checkpoint_type) {
+ case eStandardCheckpoint:
+ status = s_PosReadStdCheckpoint(freq_ratios, query_length,
+ query,
+ psi_checkpoint->filename,
+ blast_msg);
+ break;
+ case eAsnTextCheckpoint:
+ case eAsnBinaryCheckpoint:
+ {{
+ int is_ascii_checkpoint =
+ psi_checkpoint->checkpoint_type == eAsnTextCheckpoint;
+ status = s_PosReadAsnCheckpoint(freq_ratios,
+ query_length, query,
+ psi_checkpoint->filename,
+ is_ascii_checkpoint);
+ }}
+ break;
+ default:
+ ASSERT(0 && "Impossible type of checkpoint file");
+ status = -1;
+ break;
+ }
+ return status;
+}
/* @} */
diff --git a/algo/blast/api/blast_input.h b/algo/blast/api/blast_input.h
index 26d042e2..60586ef4 100644
--- a/algo/blast/api/blast_input.h
+++ b/algo/blast/api/blast_input.h
@@ -1,4 +1,4 @@
-/* $Id: blast_input.h,v 1.18 2006/04/21 14:33:44 madden Exp $
+/* $Id: blast_input.h,v 1.20 2007/03/12 16:12:46 madden Exp $
* ===========================================================================
*
* PUBLIC DOMAIN NOTICE
@@ -44,6 +44,7 @@ extern "C" {
#include <ncbi.h>
#include <algo/blast/core/blast_def.h>
+#include <algo/blast/core/blast_message.h>
/** @addtogroup CToolkitAlgoBlast
*
@@ -76,6 +77,52 @@ BLAST_GetQuerySeqLoc(FILE *infp, Boolean query_is_na, Uint1 strand,
Int4* num_queries, Boolean believe_query,
Int4 genetic_code);
+
+/** The possible file formats of a PSI-BLAST checkpoint file. */
+typedef enum EPsiCheckpointType {
+ eStandardCheckpoint = 0, /**< The useual PSI-BLAST binary format */
+ eAsnTextCheckpoint = 1, /**< ASN.1 text format */
+ eAsnBinaryCheckpoint = 2 /**< ASN.1 binary format */
+} EPsiCheckpointType;
+
+/** The location and type of a PSI-BLAST checkpoint file */
+typedef struct Blast_PsiCheckpointLoc {
+ EPsiCheckpointType checkpoint_type; /**< file format */
+ char * filename; /**< name of the file */
+} Blast_PsiCheckpointLoc;
+
+/** Create a new locator for a PSI-BLAST checkpoint file.
+ * @param checkpoint_type file format
+ * @param filename name of the file */
+Blast_PsiCheckpointLoc *
+Blast_PsiCheckpointLocNew(EPsiCheckpointType checkpoint_type,
+ char * filename);
+
+/** Free a PSI-BLAST checkpoint file locator */
+void
+Blast_PsiCheckpointLocFree(Blast_PsiCheckpointLoc ** psi_checkpoint);
+
+
+/**
+ * Read frequency ratios from a PSI-BLAST checkpoint file.
+ *
+ * @param freq_ratios the frequency ratios
+ * @param query_length the length of the query, and second dimension of
+ * freq_ratios
+ * @param query query sequence data
+ * @param psi_checkpoint location of the checkpoint data
+ * @param blast_msg a pointer to hold BLAST warnings.
+ *
+ * @return 0 on success, nonzero otherwise
+ */
+int
+Blast_PosReadCheckpoint(double ** freq_ratios,
+ int query_length,
+ const Uint1 * query,
+ Blast_PsiCheckpointLoc * psi_checkpoint,
+ Blast_Message* *blast_msg);
+
+
/* @} */
#ifdef __cplusplus
@@ -86,6 +133,19 @@ BLAST_GetQuerySeqLoc(FILE *infp, Boolean query_is_na, Uint1 strand,
* ===========================================================================
*
* $Log: blast_input.h,v $
+* Revision 1.20 2007/03/12 16:12:46 madden
+* - Create an enum EPsiCheckpointType that specifies the file format
+* of a PSI-BLAST checkpoint file.
+* - Define a new datatype Blast_PsiCheckpointLoc to
+* specify the location and type of a PSI-BLAST checkpoint file.
+* - Declare Blast_PsiCheckpointLocNew and Blast_PsiCheckpointLocFree.
+* [from Mike Gertz]
+*
+* Revision 1.19 2007/03/05 14:50:08 camacho
+* - Added a prototype for Blast_PosReadCheckpoint.
+* - Added core/blast_message.h to the includes because
+* Blast_PosReadCheckpoint has a Blast_Message ** parameter.
+*
* Revision 1.18 2006/04/21 14:33:44 madden
* BLAST_GetQuerySeqLoc parameter ctr is now a pointer to Int4, fixes case of more than 32k queries
*
diff --git a/algo/blast/api/blast_options_api.c b/algo/blast/api/blast_options_api.c
index f855c187..12c0f855 100644
--- a/algo/blast/api/blast_options_api.c
+++ b/algo/blast/api/blast_options_api.c
@@ -1,4 +1,4 @@
-/* $Id: blast_options_api.c,v 1.18 2006/04/26 12:45:28 madden Exp $
+/* $Id: blast_options_api.c,v 1.24 2007/03/20 15:17:16 kans Exp $
***************************************************************************
* *
* COPYRIGHT NOTICE *
@@ -35,6 +35,7 @@
#include <algo/blast/core/blast_util.h>
#include <algo/blast/core/blast_filter.h>
#include <algo/blast/api/blast_seq.h>
+#include <algo/blast/core/gencode_singleton.h>
/** @addtogroup CToolkitAlgoBlast
*
@@ -83,11 +84,11 @@ Int2 SBlastOptionsNew(const char* program_name, SBlastOptions** options_out,
return status;
}
- if (program == eBlastTypeTblastn || program == eBlastTypeRpsTblastn ||
- program == eBlastTypeTblastx) {
- if ((status = BLAST_GeneticCodeFind(db_options->genetic_code,
- &db_options->gen_code_string)))
- return status;
+ if (Blast_SubjectIsTranslated(program) || program == eBlastTypeRpsTblastn) {
+ Uint1* gc = NULL;
+ BLAST_GeneticCodeFind(db_options->genetic_code, &gc);
+ GenCodeSingletonAdd(db_options->genetic_code, gc);
+ free(gc);
}
*options_out = options = (SBlastOptions*) calloc(1, sizeof(SBlastOptions));
@@ -145,7 +146,7 @@ Int2 SBlastOptionsSetWordSize(SBlastOptions* options, Int4 word_size)
return -1;
}
-Int2 SBlastOptionsSetThreshold(SBlastOptions* options, Int4 threshold)
+Int2 SBlastOptionsSetThreshold(SBlastOptions* options, double threshold)
{
if (!options || !options->lookup_options || !options->score_options)
@@ -189,6 +190,8 @@ Int2 SBlastOptionsSetWindowSize(SBlastOptions* options, Int4 window_size)
}
options->word_options->window_size = window_size;
+
+ return 0;
}
Int2 SBlastOptionsSetDiscMbParams(SBlastOptions* options, Int4 template_length,
@@ -197,7 +200,7 @@ Int2 SBlastOptionsSetDiscMbParams(SBlastOptions* options, Int4 template_length,
if (!options || !options->lookup_options)
return -1;
- options->lookup_options->lut_type = MB_LOOKUP_TABLE;
+ options->lookup_options->lut_type = eMBLookupTable;
options->lookup_options->mb_template_length = template_length;
options->lookup_options->mb_template_type = template_type;
@@ -318,14 +321,7 @@ Int2 SBlastOptionsSetDbGeneticCode(SBlastOptions* options, Int4 gc)
if (!options || !options->db_options)
return -1;
- /* If previously set genetic code is the same as the new one, there is no
- need to do anything. */
- if (options->db_options->genetic_code != gc) {
- options->db_options->genetic_code = gc;
- /* Free old genetic code string. */
- sfree(options->db_options->gen_code_string);
- status = BLAST_GeneticCodeFind(gc, &options->db_options->gen_code_string);
- }
+ options->db_options->genetic_code = gc;
return status;
diff --git a/algo/blast/api/blast_options_api.h b/algo/blast/api/blast_options_api.h
index b2e6b1a1..f9b10199 100644
--- a/algo/blast/api/blast_options_api.h
+++ b/algo/blast/api/blast_options_api.h
@@ -1,4 +1,4 @@
-/* $Id: blast_options_api.h,v 1.13 2006/06/08 21:46:24 papadopo Exp $
+/* $Id: blast_options_api.h,v 1.14 2007/03/08 14:57:31 kans Exp $
***************************************************************************
* *
* COPYRIGHT NOTICE *
@@ -139,7 +139,7 @@ Int2 SBlastOptionsSetRewardPenaltyAndGapCosts(SBlastOptions* options,
* @return zero unless error (e.g., threshold is < zero)
*/
Int2 SBlastOptionsSetThreshold(SBlastOptions* options,
- Int4 threshold);
+ double threshold);
/** Set window size for two hit extension.
* @param options options Options structure to update. [in] [out]
diff --git a/algo/blast/api/blast_returns.c b/algo/blast/api/blast_returns.c
index 664b858d..186f054e 100644
--- a/algo/blast/api/blast_returns.c
+++ b/algo/blast/api/blast_returns.c
@@ -1,4 +1,4 @@
-/* $Id: blast_returns.c,v 1.34 2006/10/13 18:44:29 papadopo Exp $
+/* $Id: blast_returns.c,v 1.38 2007/07/10 15:28:07 papadopo Exp $
* ===========================================================================
*
* PUBLIC DOMAIN NOTICE
@@ -29,7 +29,7 @@
*/
#ifndef SKIP_DOXYGEN_PROCESSING
-static char const rcsid[] = "$Id: blast_returns.c,v 1.34 2006/10/13 18:44:29 papadopo Exp $";
+static char const rcsid[] = "$Id: blast_returns.c,v 1.38 2007/07/10 15:28:07 papadopo Exp $";
#endif /* SKIP_DOXYGEN_PROCESSING */
#include <algo/blast/api/blast_returns.h>
@@ -194,8 +194,12 @@ Blast_GetParametersBuffer(EBlastProgramType program_number,
add_string_to_buffer(buffer, &ret_buffer, &ret_buffer_length);
}
if (search_params->threshold) {
- sprintf(buffer, "Neighboring words threshold: %ld",
- (long) search_params->threshold);
+ if (search_params->threshold == floor(search_params->threshold))
+ sprintf(buffer, "Neighboring words threshold: %ld",
+ (long)search_params->threshold);
+ else
+ sprintf(buffer, "Neighboring words threshold: %.2f",
+ search_params->threshold);
add_string_to_buffer(buffer, &ret_buffer, &ret_buffer_length);
}
if (search_params->window_size) {
@@ -366,10 +370,9 @@ s_SummaryDBStatsFill(EBlastProgramType program_number,
itr = BlastSeqSrcIteratorFree(itr);
}
}
-
- if (program_number == eBlastTypeTblastn ||
- program_number == eBlastTypeRpsTblastn ||
- program_number == eBlastTypeTblastx)
+
+ if (Blast_SubjectIsTranslated(program_number) ||
+ program_number == eBlastTypeRpsTblastn)
total_length /= 3;
(*db_stats)->dblength = total_length;
@@ -400,8 +403,11 @@ s_SummaryDBStatsFill(EBlastProgramType program_number,
/** FIXME: Should this be different for RPS BLAST? */
(*db_stats)->qlen = qlen;
(*db_stats)->eff_qlen = qlen - ((*db_stats)->hsp_length);
- (*db_stats)->eff_dblength =
- total_length - num_entries*((*db_stats)->hsp_length);
+ (*db_stats)->eff_dblength = total_length;
+ if (eff_len_options->db_length == 0) {
+ (*db_stats)->eff_dblength -= (Int8)num_entries *
+ ((*db_stats)->hsp_length);
+ }
(*db_stats)->eff_searchsp =
query_info->contexts[query_info->first_context].eff_searchsp;
if (eff_len_options &&
diff --git a/algo/blast/api/blast_returns.h b/algo/blast/api/blast_returns.h
index af3fb5cf..1e61283b 100644
--- a/algo/blast/api/blast_returns.h
+++ b/algo/blast/api/blast_returns.h
@@ -1,4 +1,4 @@
-/* $Id: blast_returns.h,v 1.14 2006/04/26 12:46:00 madden Exp $
+/* $Id: blast_returns.h,v 1.15 2007/07/10 15:28:07 papadopo Exp $
* ===========================================================================
*
* PUBLIC DOMAIN NOTICE
@@ -88,7 +88,7 @@ typedef struct Blast_SearchParams {
char* pattern; /**< phi-blast pattern used. */
char* entrez_query; /**< query to entrez */
double ethresh; /**< PSI-BLAST threshold to keep HSPs in profile. */
- Int4 threshold; /**< threshold to start extension of hits. */
+ double threshold; /**< threshold to start extension of hits. */
Int4 window_size; /**< max allowed distance between hits to init extension in 2-hit mode. */
} Blast_SearchParams;
diff --git a/algo/blast/api/blast_seq.c b/algo/blast/api/blast_seq.c
index 6854c54d..30484f0d 100644
--- a/algo/blast/api/blast_seq.c
+++ b/algo/blast/api/blast_seq.c
@@ -1,5 +1,5 @@
#ifndef SKIP_DOXYGEN_PROCESSING
-static char const rcsid[] = "$Id: blast_seq.c,v 1.85 2006/07/05 15:52:35 papadopo Exp $";
+static char const rcsid[] = "$Id: blast_seq.c,v 1.88 2007/03/14 19:50:30 papadopo Exp $";
#endif /* SKIP_DOXYGEN_PROCESSING */
/*
* ===========================================================================
@@ -40,7 +40,6 @@ static char const rcsid[] = "$Id: blast_seq.c,v 1.85 2006/07/05 15:52:35 papadop
#include <algo/blast/core/blast_util.h>
#include <algo/blast/core/blast_encoding.h>
#include <algo/blast/core/blast_setup.h> /* For BlastSeqLoc_RestrictToInterval */
-#include <algo/blast/core/blast_inline.h>
/** @addtogroup CToolkitAlgoBlast
*
@@ -70,7 +69,7 @@ BlastSeqlocsHaveDuplicateIDs(SeqLoc* query_seqlocs)
return FALSE;
/* allocate hashtable */
- hashtable = (Uint4 *)calloc(1 << kLog2HashSize, sizeof(Uint4));
+ hashtable = (Uint4 *)calloc((size_t)1 << kLog2HashSize, sizeof(Uint4));
id_entries = (SeqIdHash *)malloc((kNumSeqs + 1) * sizeof(SeqIdHash));
for (slp = query_seqlocs, curr_id_num = 1; slp; slp = slp->next) {
@@ -81,7 +80,7 @@ BlastSeqlocsHaveDuplicateIDs(SeqLoc* query_seqlocs)
/* hash the ID of the next query sequence */
SeqIdLabel(id, buffer, sizeof(buffer), OM_LABEL_CONTENT);
- hashval = readdb_sequence_hash(buffer, strlen(buffer));
+ hashval = readdb_sequence_hash(buffer, (int)strlen(buffer));
hashval = hashval >> (32 - kLog2HashSize);
if (hashtable[hashval] != 0) {
Int4 offset = hashtable[hashval];
@@ -156,7 +155,7 @@ BlastMaskLocFromSeqLoc(SeqLoc* mask_seqlocs, SeqLoc* query_seqlocs,
retval = BlastMaskLocNew(kNumSeqs*kNumContexts);
/* create hashtable for query IDs */
- hashtable = (Uint4 *)calloc(1 << kLog2HashSize, sizeof(Uint4));
+ hashtable = (Uint4 *)calloc((size_t)1 << kLog2HashSize, sizeof(Uint4));
id_entries = (SeqIdHash *)malloc((kNumSeqs + 1) * sizeof(SeqIdHash));
/* add the ID of each query sequence to the hashtable */
@@ -166,7 +165,7 @@ BlastMaskLocFromSeqLoc(SeqLoc* mask_seqlocs, SeqLoc* query_seqlocs,
Char buffer[64];
SeqIdLabel(seq_id, buffer, sizeof(buffer), OM_LABEL_CONTENT);
- hashval = readdb_sequence_hash(buffer, strlen(buffer));
+ hashval = readdb_sequence_hash(buffer, (int)strlen(buffer));
hashval = hashval >> (32 - kLog2HashSize);
id_entries[curr_id_num].id = seq_id;
@@ -191,7 +190,7 @@ BlastMaskLocFromSeqLoc(SeqLoc* mask_seqlocs, SeqLoc* query_seqlocs,
mask_id = SeqLocId(current_mask);
SeqIdLabel(mask_id, buffer, sizeof(buffer), OM_LABEL_CONTENT);
- hashval = readdb_sequence_hash(buffer, strlen(buffer));
+ hashval = readdb_sequence_hash(buffer, (int)strlen(buffer));
hashval = hashval >> (32 - kLog2HashSize);
/* examine only the query IDs that hash to the same value */
@@ -588,7 +587,7 @@ Int2 BLAST_GeneticCodeFind(Int4 gc, Uint1** genetic_code)
if (!gen_code_eaa)
return -1;
smtp = SeqMapTableFind(Seq_code_ncbistdaa, Seq_code_ncbieaa);
- gen_code_length = StrLen(gen_code_eaa);
+ gen_code_length = (Int4)StrLen(gen_code_eaa);
*genetic_code = gen_code_stdaa = (Uint1*) calloc(gen_code_length+1, 1);
if (!gen_code_stdaa)
@@ -740,10 +739,7 @@ Int2 BLAST_SetUpQuery(EBlastProgramType program_number,
program_number == eBlastTypePhiBlastn) {
encoding = eBlastEncodingNucleotide;
num_frames = 2;
- } else if (program_number == eBlastTypeBlastp ||
- program_number == eBlastTypeRpsBlast ||
- program_number == eBlastTypeTblastn ||
- program_number == eBlastTypePhiBlastp) {
+ } else if (Blast_QueryIsProtein(program_number)) {
encoding = eBlastEncodingProtein;
num_frames = 1;
} else { /* blastx or rpstblastn, which is also essentially blastx */
@@ -785,8 +781,7 @@ Int2 BLAST_SetUpSubject(EBlastProgramType program_number,
EBlastEncoding encoding;
const Boolean kNucleotide = (program_number == eBlastTypeBlastn ||
program_number == eBlastTypePhiBlastn);
- const Boolean kTranslated = (program_number == eBlastTypeTblastn ||
- program_number == eBlastTypeTblastx);
+ const Boolean kTranslated = Blast_SubjectIsTranslated(program_number);
if (kNucleotide)
encoding = eBlastEncodingNucleotide;
diff --git a/algo/blast/api/blast_seq.h b/algo/blast/api/blast_seq.h
index e0c4ade9..5e652ad8 100644
--- a/algo/blast/api/blast_seq.h
+++ b/algo/blast/api/blast_seq.h
@@ -1,4 +1,4 @@
-/* $Id: blast_seq.h,v 1.28 2006/04/20 15:33:33 papadopo Exp $
+/* $Id: blast_seq.h,v 1.29 2006/11/21 17:21:20 papadopo Exp $
* ===========================================================================
*
* PUBLIC DOMAIN NOTICE
@@ -41,6 +41,7 @@ extern "C" {
#include <objseq.h>
#include <algo/blast/core/blast_def.h>
+#include <algo/blast/core/blast_query_info.h>
#include <algo/blast/core/blast_options.h>
/** @addtogroup CToolkitAlgoBlast
diff --git a/algo/blast/api/blast_seqalign.c b/algo/blast/api/blast_seqalign.c
index b3056a03..bbf2c094 100644
--- a/algo/blast/api/blast_seqalign.c
+++ b/algo/blast/api/blast_seqalign.c
@@ -1,4 +1,4 @@
-/* $Id: blast_seqalign.c,v 1.60 2006/06/13 14:42:44 papadopo Exp $
+/* $Id: blast_seqalign.c,v 1.62 2007/02/26 14:52:50 papadopo Exp $
* ===========================================================================
*
* PUBLIC DOMAIN NOTICE
@@ -29,7 +29,7 @@
*/
#ifndef SKIP_DOXYGEN_PROCESSING
-static char const rcsid[] = "$Id: blast_seqalign.c,v 1.60 2006/06/13 14:42:44 papadopo Exp $";
+static char const rcsid[] = "$Id: blast_seqalign.c,v 1.62 2007/02/26 14:52:50 papadopo Exp $";
#endif /* SKIP_DOXYGEN_PROCESSING */
#include <algo/blast/api/blast_seqalign.h>
@@ -82,10 +82,6 @@ SBlastSeqalignArrayFree(SBlastSeqalignArray* seqalign_vec)
return NULL;
}
-/** Creates a score set corresponding to one HSP.
- * @param hsp HSP structure [in]
- * @return Score set for this HSP.
- */
ScorePtr
GetScoreSetFromBlastHsp(BlastHSP* hsp)
{
@@ -743,10 +739,8 @@ BlastHSPToSeqAlign(EBlastProgramType program, BlastHSP* hsp,
start1 = hsp->query.offset;
start2 = hsp->subject.offset;
- translate1 = (program == eBlastTypeBlastx || program == eBlastTypeTblastx ||
- program == eBlastTypeRpsTblastn);
- translate2 = (program == eBlastTypeTblastn || program == eBlastTypeTblastx);
-
+ translate1 = Blast_QueryIsTranslated(program);
+ translate2 = Blast_SubjectIsTranslated(program);
/* If no eGapAlignDecline regions exists output seqalign will be
regular Den-Seg or Std-seg */
diff --git a/algo/blast/api/blast_tabular.c b/algo/blast/api/blast_tabular.c
index 3f29fcd5..6058f59c 100644
--- a/algo/blast/api/blast_tabular.c
+++ b/algo/blast/api/blast_tabular.c
@@ -1,4 +1,4 @@
-/* $Id: blast_tabular.c,v 1.34 2006/06/09 17:44:50 papadopo Exp $
+/* $Id: blast_tabular.c,v 1.39 2007/03/20 15:17:16 kans Exp $
* ===========================================================================
*
* PUBLIC DOMAIN NOTICE
@@ -29,7 +29,7 @@
*/
#ifndef SKIP_DOXYGEN_PROCESSING
-static char const rcsid[] = "$Id: blast_tabular.c,v 1.34 2006/06/09 17:44:50 papadopo Exp $";
+static char const rcsid[] = "$Id: blast_tabular.c,v 1.39 2007/03/20 15:17:16 kans Exp $";
#endif /* SKIP_DOXYGEN_PROCESSING */
#include <algo/blast/api/blast_tabular.h>
@@ -40,6 +40,7 @@ static char const rcsid[] = "$Id: blast_tabular.c,v 1.34 2006/06/09 17:44:50 pap
#include <algo/blast/api/blast_format.h>
#include <algo/blast/api/blast_seqalign.h>
#include <algo/blast/core/blast_seqsrc_impl.h>
+#include <algo/blast/core/gencode_singleton.h>
#include <txalign.h>
@@ -82,14 +83,11 @@ Blast_TabularFormatDataSetUp(BlastTabularFormatData* tf_data,
ASSERT(score_options && db_options);
- tf_data->perform_traceback =
- (score_options->gapped_calculation &&
- ext_options->ePrelimGapExt != eGreedyWithTracebackExt);
-
+ tf_data->perform_traceback = score_options->gapped_calculation;
tf_data->program = program;
tf_data->hsp_stream = hsp_stream;
tf_data->query = query;
- tf_data->gen_code_string = db_options->gen_code_string;
+ tf_data->gen_code_string = GenCodeSingletonFind(db_options->genetic_code);
/* Sequence source must be copied, to guarantee multi-thread safety. */
tf_data->seq_src = BlastSeqSrcCopy(seq_src);
/* Effective lengths must be duplicated in query info structure, because
@@ -171,9 +169,8 @@ FillNuclSequenceBuffers(EBlastProgramType program, BlastHSP* hsp,
Boolean reverse;
Boolean translate1, translate2;
- translate1 = (program == eBlastTypeBlastx || program == eBlastTypeTblastx ||
- program == eBlastTypeRpsTblastn);
- translate2 = (program == eBlastTypeTblastn || program == eBlastTypeTblastx);
+ translate1 = Blast_QueryIsTranslated(program);
+ translate2 = Blast_SubjectIsTranslated(program);
reverse = (hsp->query.frame != hsp->subject.frame);
@@ -364,7 +361,7 @@ void* Blast_TabularFormatThread(void* data)
Blast_TracebackFromHSPList(program, hsp_list, query,
seq_arg.seq, query_info, gap_align, gap_align->sbp, score_params,
- ext_params->options, hit_params, gen_code_string);
+ ext_params->options, hit_params, gen_code_string, NULL);
/* Return subject sequence unless it is needed for the sequence
printout */
if (tf_data->format_options != eBlastTabularAddSequences) {
diff --git a/algo/blast/api/dust_filter.c b/algo/blast/api/dust_filter.c
index db8c9bd7..29097583 100644
--- a/algo/blast/api/dust_filter.c
+++ b/algo/blast/api/dust_filter.c
@@ -1,5 +1,5 @@
#ifndef SKIP_DOXYGEN_PROCESSING
-static char const rcsid[] = "$Id: dust_filter.c,v 1.7 2006/05/24 21:17:50 camacho Exp $";
+static char const rcsid[] = "$Id: dust_filter.c,v 1.10 2007/01/24 14:31:16 madden Exp $";
#endif /* SKIP_DOXYGEN_PROCESSING */
/*
@@ -44,14 +44,91 @@ static char const rcsid[] = "$Id: dust_filter.c,v 1.7 2006/05/24 21:17:50 camach
#include <algo/blast/api/seqsrc_readdb.h>
#include <algo/blast/core/blast_filter.h>
#include <algo/blast/core/blast_util.h>
-#include <algo/blast/core/blast_inline.h>
-#include <algo/blast/core/blast_dust.h>
+#include <blast_dust.h>
/** @addtogroup CToolkitAlgoBlast
*
* @{
*/
+
+/** Look for dustable locations
+ * @param loc returns locations [in|out]
+ * @param reg dust specific locations [in]
+ * @param nreg number of DREGION [in]
+ * @return 0 if no error
+ */
+static Int2
+s_GetDustLocations (BlastSeqLoc** loc, DREGION* reg, Int4 nreg)
+{
+ BlastSeqLoc* tail = NULL; /* pointer to tail of loc linked list */
+
+ if (!loc)
+ return -1;
+
+ *loc = NULL;
+
+ /* point to dusted locations */
+ if (nreg > 0) {
+ Int4 i;
+ for (i = 0; reg && i < nreg; i++) {
+ /* Cache the tail of the list to avoid the overhead of traversing the
+ * list when appending to it */
+ tail = BlastSeqLocNew(tail ? &tail : loc, reg->from, reg->to);
+ reg = reg->next;
+ }
+ }
+ return 0;
+}
+
+/** Dust provided sequence.
+ * @param sequence input sequence [in]
+ * @param length number of bases [in]
+ * @param offset where to start on input sequence [in]
+ * @param level dust parameter [in]
+ * @param window dust parameter [in]
+ * @param linker dust parameter [in]
+ * @param dust_loc returns locations [in|out]
+ * @return 0 if no error
+ */
+Int2 s_SeqBufferDust (Uint1* sequence, Int4 length, Int4 offset,
+ Int2 level, Int2 window, Int2 linker,
+ BlastSeqLoc** dust_loc)
+{
+ DREGION* reg,* regold;
+ Int4 nreg;
+ Int2 status = 0;
+
+ /* place for dusted regions */
+ regold = reg = (DREGION*) calloc(1, sizeof(DREGION));
+ if (!reg)
+ return -1;
+
+ nreg = DustSegs (sequence, length, offset, reg, (Int4)level,
+ (Int4)window, (Int4)linker);
+
+ status = s_GetDustLocations(dust_loc, reg, nreg);
+
+ /* clean up memory */
+ reg = regold;
+ while (reg)
+ {
+ regold = reg;
+ reg = reg->next;
+ sfree (regold);
+ }
+
+ return status;
+}
+
+/** Returns locations that should be masked according to dust.
+ * @param query_blk input sequence [in]
+ * @param query_info number of bases etc. [in]
+ * @param query_seqloc locations to be dusted [in]
+ * @param filter_options dust parameters provided here [in]
+ * @param filter_maskloc return value [in|out]
+ * @return 0 if no error
+ */
static Int2
s_GetFilteringLocations(BLAST_SequenceBlk* query_blk, BlastQueryInfo* query_info, SeqLoc* query_seqloc, const SBlastFilterOptions* filter_options, BlastMaskLoc** filter_maskloc)
{
@@ -92,7 +169,7 @@ s_GetFilteringLocations(BLAST_SequenceBlk* query_blk, BlastQueryInfo* query_info
if (BlastIsReverseStrand(kIsNucl, context) == TRUE)
{ /* Reverse this as it's on minus strand. */
BlastSeqLoc* filter_slp_rev = NULL;
- SeqBufferDust(buffer, query_length, 0, dust_options->level,
+ s_SeqBufferDust(buffer, query_length, 0, dust_options->level,
dust_options->window, dust_options->linker, &filter_slp);
/* Reverse this relative to the part of the query being searched, leave it up to
@@ -104,7 +181,7 @@ s_GetFilteringLocations(BLAST_SequenceBlk* query_blk, BlastQueryInfo* query_info
}
else
{
- SeqBufferDust(buffer, query_length, 0, dust_options->level,
+ s_SeqBufferDust(buffer, query_length, 0, dust_options->level,
dust_options->window, dust_options->linker, &filter_slp);
}
diff --git a/algo/blast/api/hspstream_queue.c b/algo/blast/api/hspstream_queue.c
index b94b1c89..9324f40d 100644
--- a/algo/blast/api/hspstream_queue.c
+++ b/algo/blast/api/hspstream_queue.c
@@ -1,4 +1,4 @@
-/* $Id: hspstream_queue.c,v 1.8 2005/04/12 17:59:08 dondosha Exp $
+/* $Id: hspstream_queue.c,v 1.9 2007/07/27 18:03:11 papadopo Exp $
* ===========================================================================
*
* PUBLIC DOMAIN NOTICE
@@ -34,7 +34,7 @@
#ifndef SKIP_DOXYGEN_PROCESSING
static char const rcsid[] =
- "$Id: hspstream_queue.c,v 1.8 2005/04/12 17:59:08 dondosha Exp $";
+ "$Id: hspstream_queue.c,v 1.9 2007/07/27 18:03:11 papadopo Exp $";
#endif /* SKIP_DOXYGEN_PROCESSING */
@@ -203,6 +203,10 @@ BlastHSPListQueueNew(BlastHSPStream* hsp_stream, void* args)
SetMethod(hsp_stream, eWrite, fnptr);
fnptr.closeFn = &BlastHSPListQueueClose;
SetMethod(hsp_stream, eClose, fnptr);
+ fnptr.batch_read = NULL;
+ SetMethod(hsp_stream, eBatchRead, fnptr);
+ fnptr.mergeFn = NULL;
+ SetMethod(hsp_stream, eMerge, fnptr);
SetData(hsp_stream, args);
return hsp_stream;
diff --git a/algo/blast/api/repeats_filter.c b/algo/blast/api/repeats_filter.c
index 7a7aa05c..61259273 100644
--- a/algo/blast/api/repeats_filter.c
+++ b/algo/blast/api/repeats_filter.c
@@ -1,5 +1,5 @@
#ifndef SKIP_DOXYGEN_PROCESSING
-static char const rcsid[] = "$Id: repeats_filter.c,v 1.15 2006/05/24 21:17:50 camacho Exp $";
+static char const rcsid[] = "$Id: repeats_filter.c,v 1.18 2007/03/12 16:13:36 madden Exp $";
#endif /* SKIP_DOXYGEN_PROCESSING */
/*
@@ -70,7 +70,7 @@ s_SetRepeatsSearchOptions(SBlastOptions* options)
SBlastOptionsSetFilterString(options, REPEATS_SEARCH_FILTER_STRING)))
return status;
- options->lookup_options->lut_type = NA_LOOKUP_TABLE;
+ options->lookup_options->lut_type = eNaLookupTable;
options->score_options->penalty = REPEATS_SEARCH_PENALTY;
options->score_options->gap_open = REPEATS_SEARCH_GAP_OPEN;
options->score_options->gap_extend = REPEATS_SEARCH_GAP_EXTEND;
@@ -227,8 +227,10 @@ Blast_FindRepeatFilterSeqLoc(SeqLoc* query_seqloc,
s_SetRepeatsSearchOptions(options);
- status =
- Blast_RunSearch(query_seqloc, seq_src, NULL, options, NULL,
+ status =
+ Blast_RunSearch(query_seqloc, (Blast_PsiCheckpointLoc *) NULL,
+ seq_src, (SeqLoc*) NULL, options,
+ (BlastTabularFormatData*) NULL,
&results, &filter_loc, sum_returns);
/* The ReadDBFILE structure will not be destroyed here, because the
diff --git a/algo/blast/api/seqsrc_multiseq.c b/algo/blast/api/seqsrc_multiseq.c
index a8ccdad3..1ffdfbef 100644
--- a/algo/blast/api/seqsrc_multiseq.c
+++ b/algo/blast/api/seqsrc_multiseq.c
@@ -1,4 +1,4 @@
-/* $Id: seqsrc_multiseq.c,v 1.24 2006/05/24 21:17:50 camacho Exp $
+/* $Id: seqsrc_multiseq.c,v 1.27 2007/05/16 18:11:11 camacho Exp $
* ===========================================================================
*
* PUBLIC DOMAIN NOTICE
@@ -30,7 +30,7 @@
*/
#ifndef SKIP_DOXYGEN_PROCESSING
-static char const rcsid[] = "$Id: seqsrc_multiseq.c,v 1.24 2006/05/24 21:17:50 camacho Exp $";
+static char const rcsid[] = "$Id: seqsrc_multiseq.c,v 1.27 2007/05/16 18:11:11 camacho Exp $";
#endif /* SKIP_DOXYGEN_PROCESSING */
#include <algo/blast/api/seqsrc_multiseq.h>
@@ -171,6 +171,16 @@ s_MultiSeqGetNumSeqs(void* multiseq_handle, void* ignoreme)
return seq_info->num_seqs;
}
+/** Returns zero in this implementation as not supported without an alias file.
+ * @param multiseq_handle Pointer to the structure containing sequences [in]
+ * @param ignoreme Unused by this implementation [in]
+ */
+static Int4
+s_MultiSeqGetNumSeqsStats(void* multiseq_handle, void* ignoreme)
+{
+ return 0;
+}
+
/** Returns 0 as total length, indicating that this is NOT a database!
* @param multiseq_handle Pointer to the structure containing sequences [in]
* @param ignoreme Unused by this implementation [in]
@@ -181,6 +191,16 @@ s_MultiSeqGetTotLen(void* multiseq_handle, void* ignoreme)
return 0;
}
+/** Returns 0 for statistical total length as this is not supported without an alias file.
+ * @param multiseq_handle Pointer to the structure containing sequences [in]
+ * @param ignoreme Unused by this implementation [in]
+ */
+static Int8
+s_MultiSeqGetTotLenStats(void* multiseq_handle, void* ignoreme)
+{
+ return 0;
+}
+
/** Needed for completeness only.
* @param multiseq_handle Pointer to the structure containing sequences [in]
* @param ignoreme Unused by this implementation [in]
@@ -311,6 +331,13 @@ s_MultiSeqIteratorNext(void* multiseq_handle, BlastSeqSrcIterator* itr)
return retval;
}
+/** Resets the internal bookmark iterator (N/A in this case) */
+static void
+s_MultiSeqResetChunkIter(void* multiseq_handle)
+{
+ return;
+}
+
/** Multi sequence source destructor: frees its internal data structure and the
* BlastSeqSrc structure itself.
* @param seq_src BlastSeqSrc structure to free [in]
@@ -327,8 +354,6 @@ s_MultiSeqSrcFree(BlastSeqSrc* seq_src)
seq_info = (MultiSeqInfo*)_BlastSeqSrcImpl_GetDataStructure(seq_src);
seq_info = s_MultiSeqInfoFree(seq_info);
- sfree(seq_src);
-
return NULL;
}
@@ -376,14 +401,17 @@ s_MultiSeqSrcNew(BlastSeqSrc* retval, void* args)
_BlastSeqSrcImpl_SetCopyFnPtr(retval, &s_MultiSeqSrcCopy);
_BlastSeqSrcImpl_SetDataStructure(retval, (void*) seq_info);
_BlastSeqSrcImpl_SetGetNumSeqs(retval, &s_MultiSeqGetNumSeqs);
+ _BlastSeqSrcImpl_SetGetNumSeqsStats(retval, &s_MultiSeqGetNumSeqsStats);
_BlastSeqSrcImpl_SetGetMaxSeqLen(retval, &s_MultiSeqGetMaxLength);
_BlastSeqSrcImpl_SetGetAvgSeqLen(retval, &s_MultiSeqGetAvgLength);
_BlastSeqSrcImpl_SetGetTotLen(retval, &s_MultiSeqGetTotLen);
+ _BlastSeqSrcImpl_SetGetTotLenStats(retval, &s_MultiSeqGetTotLenStats);
_BlastSeqSrcImpl_SetGetName(retval, &s_MultiSeqGetName);
_BlastSeqSrcImpl_SetGetIsProt(retval, &s_MultiSeqGetIsProt);
_BlastSeqSrcImpl_SetGetSequence(retval, &s_MultiSeqGetSequence);
_BlastSeqSrcImpl_SetGetSeqLen(retval, &s_MultiSeqGetSeqLen);
_BlastSeqSrcImpl_SetIterNext(retval, &s_MultiSeqIteratorNext);
+ _BlastSeqSrcImpl_SetResetChunkIterator(retval, &s_MultiSeqResetChunkIter);
_BlastSeqSrcImpl_SetReleaseSequence(retval, &s_MultiSeqReleaseSequence);
return retval;
diff --git a/algo/blast/api/seqsrc_multiseq.h b/algo/blast/api/seqsrc_multiseq.h
index e5b92344..3655cfd3 100644
--- a/algo/blast/api/seqsrc_multiseq.h
+++ b/algo/blast/api/seqsrc_multiseq.h
@@ -1,4 +1,4 @@
-/* $Id: seqsrc_multiseq.h,v 1.9 2005/04/06 23:27:53 dondosha Exp $
+/* $Id: seqsrc_multiseq.h,v 1.10 2006/11/21 17:22:15 papadopo Exp $
* ===========================================================================
*
* PUBLIC DOMAIN NOTICE
@@ -33,7 +33,7 @@
#define MULTISEQ_SRC_H
#include <algo/blast/core/blast_seqsrc.h>
-#include <algo/blast/core/blast_def.h>
+#include <algo/blast/core/blast_program.h>
#include <algo/blast/core/blast_message.h>
#include <objloc.h>
diff --git a/algo/blast/api/seqsrc_readdb.c b/algo/blast/api/seqsrc_readdb.c
index dc6e99cd..5b1d35bd 100644
--- a/algo/blast/api/seqsrc_readdb.c
+++ b/algo/blast/api/seqsrc_readdb.c
@@ -1,4 +1,4 @@
-/* $Id: seqsrc_readdb.c,v 1.54 2006/05/31 19:00:52 camacho Exp $
+/* $Id: seqsrc_readdb.c,v 1.57 2007/05/15 15:29:56 camacho Exp $
* ===========================================================================
*
* PUBLIC DOMAIN NOTICE
@@ -29,7 +29,7 @@
*/
#ifndef SKIP_DOXYGEN_PROCESSING
-static char const rcsid[] = "$Id: seqsrc_readdb.c,v 1.54 2006/05/31 19:00:52 camacho Exp $";
+static char const rcsid[] = "$Id: seqsrc_readdb.c,v 1.57 2007/05/15 15:29:56 camacho Exp $";
#endif /* SKIP_DOXYGEN_PROCESSING */
#include <algo/blast/api/seqsrc_readdb.h>
@@ -73,6 +73,22 @@ s_ReaddbGetNumSeqs(void* readdb_handle, void* ignoreme)
return dbnseqs;
}
+/** Retrieves a number of sequences in the BlastSeqSrc
+ * for use in calculating search space (and expect value).
+ * @param readdb_handle Pointer to initialized ReadDBFILEPtr structure [in]
+ * @param ignoreme Unused by this implementation [in]
+ */
+static Int4
+s_ReaddbGetNumSeqsStats(void* readdb_handle, void* ignoreme)
+{
+ ReadDBFILEPtr rdfp = (ReadDBFILEPtr) readdb_handle;
+ Int4 dbnseqs = 0;
+ Int8 dblength = 0;
+
+ readdb_get_stats_numbers(rdfp, &dbnseqs, &dblength);
+ return dbnseqs;
+}
+
/** Retrieves the total length of all sequences in the BlastSeqSrc.
* @param readdb_handle Pointer to initialized ReadDBFILEPtr structure [in]
* @param ignoreme Unused by this implementation [in]
@@ -88,6 +104,22 @@ s_ReaddbGetTotLen(void* readdb_handle, void* ignoreme)
return dblength;
}
+/** Retrieves the total length of all sequences in the BlastSeqSrc
+ * for use in calculating search space (and expect value).
+ * @param readdb_handle Pointer to initialized ReadDBFILEPtr structure [in]
+ * @param ignoreme Unused by this implementation [in]
+ */
+static Int8
+s_ReaddbGetTotLenStats(void* readdb_handle, void* ignoreme)
+{
+ ReadDBFILEPtr rdfp = (ReadDBFILEPtr) readdb_handle;
+ Int4 dbnseqs = 0;
+ Int8 dblength = 0;
+
+ readdb_get_stats_numbers(rdfp, &dbnseqs, &dblength);
+ return dblength;
+}
+
/** Retrieves the average length of sequences in the BlastSeqSrc.
* @param readdb_handle Pointer to initialized ReadDBFILEPtr structure [in]
* @param ignoreme Unused by this implementation [in]
@@ -433,6 +465,18 @@ s_ReaddbIteratorNext(void* readdb_handle, BlastSeqSrcIterator* itr)
return retval;
}
+/** Reset the ReadDBFilePtr's internal chunk "bookmark"
+ * @param readdb_handle Pointer to the ReadDBFILE structure [in]
+ */
+static void
+s_ReaddbResetChunkIterator(void* readdb_handle)
+{
+ ReadDBFILEPtr rdfp = (ReadDBFILEPtr) readdb_handle;
+ ASSERT(rdfp);
+ rdfp->shared_info->last_oid_assigned = 0;
+ return;
+}
+
/** Readdb sequence source destructor: frees its internal data structure and the
* BlastSeqSrc structure itself.
* @param bssp BlastSeqSrc structure to free [in]
@@ -444,7 +488,6 @@ s_ReaddbSeqSrcFree(BlastSeqSrc* bssp)
if (!bssp)
return NULL;
readdb_destruct((ReadDBFILEPtr)_BlastSeqSrcImpl_GetDataStructure(bssp));
- sfree(bssp);
return NULL;
}
@@ -484,14 +527,17 @@ s_InitNewReaddbSeqSrc(BlastSeqSrc* retval, ReadDBFILE* rdfp)
_BlastSeqSrcImpl_SetCopyFnPtr(retval, &s_ReaddbSeqSrcCopy);
_BlastSeqSrcImpl_SetDataStructure(retval, (void*) rdfp);
_BlastSeqSrcImpl_SetGetNumSeqs(retval, &s_ReaddbGetNumSeqs);
+ _BlastSeqSrcImpl_SetGetNumSeqsStats(retval, &s_ReaddbGetNumSeqsStats);
_BlastSeqSrcImpl_SetGetMaxSeqLen(retval, &s_ReaddbGetMaxLength);
_BlastSeqSrcImpl_SetGetAvgSeqLen(retval, &s_ReaddbGetAvgLength);
_BlastSeqSrcImpl_SetGetTotLen(retval, &s_ReaddbGetTotLen);
+ _BlastSeqSrcImpl_SetGetTotLenStats(retval, &s_ReaddbGetTotLenStats);
_BlastSeqSrcImpl_SetGetName(retval, &s_ReaddbGetName);
_BlastSeqSrcImpl_SetGetIsProt(retval, &s_ReaddbGetIsProt);
_BlastSeqSrcImpl_SetGetSequence(retval, &s_ReaddbGetSequence);
_BlastSeqSrcImpl_SetGetSeqLen(retval, &s_ReaddbGetSeqLen);
_BlastSeqSrcImpl_SetIterNext(retval, &s_ReaddbIteratorNext);
+ _BlastSeqSrcImpl_SetResetChunkIterator(retval, &s_ReaddbResetChunkIterator);
_BlastSeqSrcImpl_SetReleaseSequence(retval, &s_ReaddbReleaseSequence);
#ifdef KAPPA_PRINT_DIAGNOSTICS
_BlastSeqSrcImpl_SetGetGis(retval, &s_ReaddbGetGis);
diff --git a/algo/blast/api/twoseq_api.c b/algo/blast/api/twoseq_api.c
index 16b2e30c..7e66ce17 100644
--- a/algo/blast/api/twoseq_api.c
+++ b/algo/blast/api/twoseq_api.c
@@ -1,4 +1,4 @@
-/* $Id: twoseq_api.c,v 1.54 2006/01/23 16:39:50 papadopo Exp $
+/* $Id: twoseq_api.c,v 1.59 2007/03/20 15:17:16 kans Exp $
***************************************************************************
* *
* COPYRIGHT NOTICE *
@@ -36,9 +36,10 @@
#include <algo/blast/core/blast_message.h>
#include <algo/blast/core/blast_util.h>
#include <algo/blast/core/blast_engine.h>
-#include <algo/blast/core/mb_lookup.h>
#include <algo/blast/core/blast_filter.h>
+#include <algo/blast/core/blast_nalookup.h>
#include <algo/blast/core/hspstream_collector.h>
+#include <algo/blast/core/gencode_singleton.h>
#include <algo/blast/api/seqsrc_multiseq.h>
#include <algo/blast/api/blast_seqalign.h>
#include <algo/blast/api/blast_seq.h>
@@ -114,11 +115,11 @@ s_TwoSeqBasicFillOptions(const BLAST_SummaryOptions* basic_options,
Int2 word_size = basic_options->word_size;
char *matrix;
- if (program_number == eBlastTypeTblastn ||
- program_number == eBlastTypeTblastx) {
- if ((status = BLAST_GeneticCodeFind(db_options->genetic_code,
- &db_options->gen_code_string)))
- return status;
+ if (Blast_SubjectIsTranslated(program_number)) {
+ Uint1* gc = NULL;
+ BLAST_GeneticCodeFind(db_options->genetic_code, &gc);
+ GenCodeSingletonAdd(db_options->genetic_code, gc);
+ free(gc);
}
if (program_number == eBlastTypeBlastn) {
@@ -137,7 +138,7 @@ s_TwoSeqBasicFillOptions(const BLAST_SummaryOptions* basic_options,
query_length > MEGABLAST_CUTOFF) {
do_megablast = TRUE;
if (basic_options->gapped_calculation)
- greedy_align = 1; /* one-pass, no ungapped */
+ greedy_align = 1;
}
@@ -184,7 +185,6 @@ s_TwoSeqBasicFillOptions(const BLAST_SummaryOptions* basic_options,
BLAST_FillInitialWordOptions(word_options,
program_number,
- (Boolean)greedy_align,
0, /* default window size. */
0); /* default ungapped X dropoff */