summaryrefslogtreecommitdiff
path: root/algo/blast/api
diff options
context:
space:
mode:
Diffstat (limited to 'algo/blast/api')
-rw-r--r--algo/blast/api/blast_format.c433
-rw-r--r--algo/blast/api/blast_format.h44
-rw-r--r--algo/blast/api/blast_returns.c405
-rw-r--r--algo/blast/api/blast_returns.h105
-rw-r--r--algo/blast/api/blast_seq.c16
-rw-r--r--algo/blast/api/blast_seqalign.c164
-rw-r--r--algo/blast/api/blast_seqalign.h15
-rw-r--r--algo/blast/api/blast_tabular.c238
-rw-r--r--algo/blast/api/blast_tabular.h101
-rw-r--r--algo/blast/api/hspstream_queue.c182
-rw-r--r--algo/blast/api/hspstream_queue.h64
-rw-r--r--algo/blast/api/multiseq_src.c6
-rw-r--r--algo/blast/api/seqsrc_readdb.c46
-rw-r--r--algo/blast/api/seqsrc_readdb.h9
-rw-r--r--algo/blast/api/twoseq_api.c262
-rw-r--r--algo/blast/api/twoseq_api.h77
16 files changed, 1585 insertions, 582 deletions
diff --git a/algo/blast/api/blast_format.c b/algo/blast/api/blast_format.c
index eb63b53a..4591df8d 100644
--- a/algo/blast/api/blast_format.c
+++ b/algo/blast/api/blast_format.c
@@ -1,4 +1,4 @@
-/* $Id: blast_format.c,v 1.48 2004/05/03 15:24:04 dondosha Exp $
+/* $Id: blast_format.c,v 1.51 2004/06/07 18:40:34 dondosha Exp $
* ===========================================================================
*
* PUBLIC DOMAIN NOTICE
@@ -34,18 +34,19 @@ Contents: Formatting of BLAST results (SeqAlign)
Detailed Contents:
******************************************************************************
- * $Revision: 1.48 $
+ * $Revision: 1.51 $
* */
-static char const rcsid[] = "$Id: blast_format.c,v 1.48 2004/05/03 15:24:04 dondosha Exp $";
+static char const rcsid[] = "$Id: blast_format.c,v 1.51 2004/06/07 18:40:34 dondosha Exp $";
#include <algo/blast/api/blast_format.h>
#include <algo/blast/api/blast_seq.h>
#include <algo/blast/core/blast_filter.h>
#include <algo/blast/core/blast_util.h>
+#include <algo/blast/core/blast_seqsrc.h>
+#include <algo/blast/api/blast_returns.h>
#include <sequtil.h>
#include <txalign.h>
-#include <readdb.h>
extern Uint1 LIBCALL
BlastGetTypes PROTO((char* blast_program, Boolean* query_is_na,
@@ -91,19 +92,23 @@ extern CharPtr LIBCALL BlastGetReference (Boolean html);
extern CharPtr LIBCALL BlastGetVersionNumber PROTO((void));
extern CharPtr LIBCALL BlastGetReleaseDate PROTO((void));
-typedef struct TxDfDbInfo {
- struct TxDfDbInfo* next;
- Boolean is_protein;
- char* name;
- char* definition;
- char* date;
- Int8 total_length;
- Int4 number_seqs;
- Boolean subset; /* Print the subset message. */
-} TxDfDbInfo, *TxDfDbInfoPtr;
+/** This function is defined in distrib/tools/blastool.c; 1st argument is
+ * a pointer to TxDfDbInfo, defined in blastpri.h, which is identical to
+ * BLAST_DbSummary, defined in algo/blast/api/blast_returns.h
+ */
+extern Boolean LIBCALL
+PrintDbReport PROTO((BLAST_DbSummary* dbinfo, Int4 line_length, FILE *outfp));
+
+/** The following 3 functions are from distrib/tools/readdb.h */
+extern Boolean LIBCALL
+ReadDBBioseqFetchEnable PROTO((CharPtr program, CharPtr dbname, Boolean is_na,
+ Boolean now));
+
+extern void LIBCALL ReadDBBioseqFetchDisable PROTO((void));
extern Boolean LIBCALL
-PrintDbReport PROTO((TxDfDbInfo* dbinfo, Int4 line_length, FILE *outfp));
+PrintDbInformation PROTO((CharPtr database, Boolean is_aa, Int4 line_length,
+ FILE *outfp, Boolean html));
Int2 BlastFormattingOptionsNew(Uint1 program_number, char* file_out,
Int4 num_descriptions, Int4 num_alignments, Int4 align_view,
@@ -395,7 +400,7 @@ static MBXml* MBXmlInit(AsnIoPtr aip, CharPtr program, CharPtr database,
Int2 BLAST_FormatResults(SeqAlignPtr head, char* blast_database,
char* blast_program, Int4 num_queries,
SeqLocPtr query_slp, BlastMaskLoc* blast_mask,
- BlastFormattingOptions* format_options, Boolean is_ooframe)
+ const BlastFormattingOptions* format_options, Boolean is_ooframe)
{
SeqAlignPtr seqalign = head, sap, next_seqalign;
SeqLocPtr mask_loc, next_mask_loc = NULL, tmp_loc = NULL, mask_loc_head;
@@ -598,277 +603,20 @@ Int2 BLAST_FormatResults(SeqAlignPtr head, char* blast_database,
return 0;
}
-static TxDfDbInfo* LIBCALL
-TxDfDbInfoDestruct (TxDfDbInfo* dbinfo)
-{
- TxDfDbInfo* next;
-
- if (dbinfo == NULL)
- return NULL;
-
- while (dbinfo)
- {
- sfree(dbinfo->name);
- sfree(dbinfo->definition);
- sfree(dbinfo->date);
- next = dbinfo->next;
- sfree(dbinfo);
- dbinfo = next;
- }
-
- return dbinfo;
-}
-
-static TxDfDbInfo* BLAST_GetDbInfo(ReadDBFILEPtr rdfp)
-{
- TxDfDbInfo* dbinfo,* head = NULL,* dbinfo_var = NULL;
- char* chptr;
-
- while (rdfp) {
- dbinfo = calloc(1, sizeof(TxDfDbInfo));
- dbinfo->name = strdup(readdb_get_filename(rdfp));
-
- if((chptr = readdb_get_title(rdfp)) == NULL)
- chptr = readdb_get_filename(rdfp);
- dbinfo->definition = strdup(chptr);
-
- dbinfo->date = strdup(readdb_get_date(rdfp));
-
- dbinfo->is_protein = readdb_is_prot(rdfp);
-
- if (rdfp->aliaslen)
- dbinfo->total_length = rdfp->aliaslen;
- else
- dbinfo->total_length = readdb_get_dblen(rdfp);
- if (rdfp->aliasnseq)
- dbinfo->number_seqs = rdfp->aliasnseq;
- else
- dbinfo->number_seqs = readdb_get_num_entries(rdfp);
- if (head == NULL) {
- head = dbinfo;
- dbinfo_var = dbinfo;
- } else {
- dbinfo_var->next = dbinfo;
- dbinfo_var = dbinfo_var->next;
- }
- rdfp = rdfp->next;
- }
- return head;
-}
-/*
- adds the new string to the buffer, separating by a tilde.
- Checks the size of the buffer for FormatBlastParameters and
- allocates longer replacement if needed.
-*/
-
-static Boolean
-add_string_to_bufferEx(char* buffer, char* *old, Int2* old_length, Boolean add_tilde)
-
-{
- char* new,* ptr;
- Int2 length, new_length;
-
- length = (StringLen(*old));
-
- if((Int2)(StringLen(buffer)+length+3) > *old_length)
- {
- new_length = *old_length + 255;
- new = calloc(new_length, sizeof(char));
- if (*old_length > 0 && *old != NULL)
- {
- memcpy(new, *old, *old_length);
- sfree(*old);
- }
- *old = new;
- *old_length = new_length;
- }
-
- ptr = *old;
- ptr += length;
- if (add_tilde)
- {
- *ptr = '~';
- ptr++;
- }
-
- while (*buffer != NULLB)
- {
- *ptr = *buffer;
- buffer++; ptr++;
- }
-
- return TRUE;
-}
-
-static Boolean
-add_string_to_buffer(char* buffer, char* *old, Int2* old_length)
-
-{
- return add_string_to_bufferEx(buffer, old, old_length, TRUE);
-}
-
-
-
-/*
- Formats the BLAST parameters for the BLAST report.
- One char* is returned, newlines are indicated by tildes ('~').
-*/
-
-
-static char*
-FormatBlastParameters(Uint1 program_number,
- BlastScoringOptions* score_options,
- BlastScoreBlk* sbp, LookupTableOptions* lookup_options,
- BlastInitialWordOptions* word_options,
- BlastExtensionOptions* ext_options,
- BlastHitSavingOptions* hit_options,
- BlastQueryInfo* query_info, ReadDBFILEPtr rdfp,
- BlastReturnStat* return_stats)
-{
- Int4 cutoff = 0;
- char buffer[128];
- char* ret_buffer;
- Int2 ret_buffer_length;
- Int4 num_entries;
- Int8 total_length;
- Int4 qlen;
- double evalue;
- Boolean single_query = (query_info->last_context <= 1);
- Blast_KarlinBlk* kbp;
-
- ret_buffer = NULL;
- ret_buffer_length = 0;
-
-
- if (score_options->matrix) {
- sprintf(buffer, "Matrix: %s", score_options->matrix);
- add_string_to_buffer(buffer, &ret_buffer, &ret_buffer_length);
- }
-
- if (score_options->gapped_calculation) {
- sprintf(buffer, "Gap Penalties: Existence: %ld, Extension: %ld",
- (long) score_options->gap_open,
- (long) score_options->gap_extend);
- add_string_to_buffer(buffer, &ret_buffer, &ret_buffer_length);
- }
-
- if (rdfp) {
- readdb_get_totals_ex(rdfp, &total_length, &num_entries, TRUE);
- } else {
- num_entries = 1;
- total_length = 1000;
- }
-
- sprintf(buffer, "Number of Sequences: %ld", (long) num_entries);
- add_string_to_buffer(buffer, &ret_buffer, &ret_buffer_length);
- if (return_stats) {
- sprintf(buffer, "Number of Hits to DB: %s",
- Nlm_Int8tostr((Int8) return_stats->db_hits, 1));
- add_string_to_buffer(buffer, &ret_buffer, &ret_buffer_length);
-
- sprintf(buffer, "Number of extensions: %ld",
- (long) return_stats->init_extends);
- add_string_to_buffer(buffer, &ret_buffer, &ret_buffer_length);
- sprintf(buffer, "Number of successful extensions: %ld",
- (long) return_stats->good_init_extends);
- add_string_to_buffer(buffer, &ret_buffer, &ret_buffer_length);
-
- if (hit_options->expect_value > 0.1) {
- sprintf(buffer, "Number of sequences better than %4.1f: %ld",
- hit_options->expect_value,
- (long) return_stats->number_of_seqs_better_E);
- } else {
- sprintf(buffer, "Number of sequences better than %3.1e: %ld",
- hit_options->expect_value,
- (long) return_stats->number_of_seqs_better_E);
- }
- add_string_to_buffer(buffer, &ret_buffer, &ret_buffer_length);
-
- if (score_options->gapped_calculation) {
- sprintf(buffer,
- "Number of HSP's better than %4.1f without gapping: %ld",
- hit_options->expect_value,
- (long) return_stats->prelim_gap_no_contest);
- add_string_to_buffer(buffer, &ret_buffer, &ret_buffer_length);
- sprintf(buffer,
- "Number of HSP's successfully gapped in prelim test: %ld",
- (long) return_stats->prelim_gap_passed);
- add_string_to_buffer(buffer, &ret_buffer, &ret_buffer_length);
- }
- }
- /* Total query length does not include the first and last sentinel byte */
- qlen = query_info->context_offsets[query_info->last_context+1] - 1;
- sprintf(buffer, "length of query: %ld", (long)qlen);
- add_string_to_buffer(buffer, &ret_buffer, &ret_buffer_length);
- sprintf(buffer, "length of database: %s", Nlm_Int8tostr (total_length, 1));
- add_string_to_buffer(buffer, &ret_buffer, &ret_buffer_length);
-
- if (single_query) {
- sprintf(buffer, "effective search space used: %s",
- Nlm_Int8tostr(
- query_info->eff_searchsp_array[query_info->first_context], 1));
- add_string_to_buffer(buffer, &ret_buffer, &ret_buffer_length);
- }
- sprintf(buffer, "T: %ld", (long) lookup_options->threshold);
- add_string_to_buffer(buffer, &ret_buffer, &ret_buffer_length);
- sprintf(buffer, "A: %ld", (long) word_options->window_size);
- add_string_to_buffer(buffer, &ret_buffer, &ret_buffer_length);
-
- if (!score_options->gapped_calculation)
- kbp = sbp->kbp[query_info->first_context];
- else
- kbp = sbp->kbp_gap[query_info->first_context];
-
- sprintf(buffer, "X1: %ld (%4.1f bits)",
- (long)return_stats->x_drop_ungapped, word_options->x_dropoff);
- add_string_to_buffer(buffer, &ret_buffer, &ret_buffer_length);
- if (score_options->gapped_calculation) {
- sprintf(buffer, "X2: %ld (%4.1f bits)",
- (long)return_stats->x_drop_gap, ext_options->gap_x_dropoff);
- add_string_to_buffer(buffer, &ret_buffer, &ret_buffer_length);
- sprintf(buffer, "X3: %ld (%4.1f bits)",
- (long)return_stats->x_drop_gap_final,
- ext_options->gap_x_dropoff_final);
- add_string_to_buffer(buffer, &ret_buffer, &ret_buffer_length);
-
- sprintf(buffer, "S1: %ld (%4.1f bits)",
- (long)return_stats->gap_trigger, ext_options->gap_trigger);
- add_string_to_buffer(buffer, &ret_buffer, &ret_buffer_length);
- }
-
- cutoff = 0;
- if (single_query) {
- Int4 context = query_info->first_context;
- double searchsp = (double) query_info->eff_searchsp_array[context];
-
- /* For translated RPS blast the search space must be scaled down */
- if (program_number == blast_type_rpstblastn)
- searchsp = searchsp / NUM_FRAMES;
-
- evalue = hit_options->expect_value;
- BLAST_Cutoffs(&cutoff, &evalue, kbp, searchsp, FALSE, 0);
- sprintf(buffer, "S2: %ld (%4.1f bits)", (long) cutoff,
- (((cutoff)*(kbp->Lambda))-(kbp->logK))/NCBIMATH_LN2);
- add_string_to_buffer(buffer, &ret_buffer, &ret_buffer_length);
- }
- return ret_buffer;
-}
-
Int2 PrintOutputFooter(Uint1 program_number,
- BlastFormattingOptions* format_options,
- BlastScoringOptions* score_options, BlastScoreBlk* sbp,
- LookupTableOptions* lookup_options,
- BlastInitialWordOptions* word_options,
- BlastExtensionOptions* ext_options,
- BlastHitSavingOptions* hit_options,
- BlastQueryInfo* query_info, char* dbname,
- BlastReturnStat* return_stats,
- Boolean db_is_na)
+ const BlastFormattingOptions* format_options,
+ const BlastScoringOptions* score_options, const BlastScoreBlk* sbp,
+ const LookupTableOptions* lookup_options,
+ const BlastInitialWordOptions* word_options,
+ const BlastExtensionOptions* ext_options,
+ const BlastHitSavingOptions* hit_options,
+ const BlastEffectiveLengthsOptions* eff_len_options,
+ const BlastQueryInfo* query_info, const BlastSeqSrc* seq_src,
+ const BlastDiagnostics* diagnostics)
{
FILE *outfp;
- TxDfDbInfo* dbinfo_head,* dbinfo;
+ BLAST_DbSummary* dbinfo_head,* dbinfo;
char* params_buffer;
- ReadDBFILEPtr rdfp = NULL;
if (!format_options || !format_options->outfp)
return -1;
@@ -882,14 +630,12 @@ Int2 PrintOutputFooter(Uint1 program_number,
fprintf(outfp, "<PRE>\n");
init_buff_ex(85);
- if (dbname && (rdfp = readdb_new(dbname, !db_is_na))) {
- dbinfo_head = BLAST_GetDbInfo(rdfp);
+ dbinfo_head = Blast_GetDbSummary(seq_src);
- for (dbinfo = dbinfo_head; dbinfo; dbinfo = dbinfo->next) {
- PrintDbReport(dbinfo, 70, outfp);
- }
- dbinfo_head = TxDfDbInfoDestruct(dbinfo_head);
+ for (dbinfo = dbinfo_head; dbinfo; dbinfo = dbinfo->next) {
+ PrintDbReport(dbinfo, 70, outfp);
}
+ dbinfo_head = Blast_DbSummaryFree(dbinfo_head);
if (sbp && sbp->kbp) {
Blast_KarlinBlk* ka_params;
@@ -908,20 +654,19 @@ Int2 PrintOutputFooter(Uint1 program_number,
}
params_buffer =
- FormatBlastParameters(program_number, score_options, sbp,
- lookup_options, word_options, ext_options, hit_options, query_info,
- rdfp, return_stats);
+ Blast_GetParametersBuffer(program_number, score_options, sbp,
+ lookup_options, word_options, ext_options,
+ hit_options, eff_len_options, query_info,
+ seq_src, diagnostics);
PrintTildeSepLines(params_buffer, 70, outfp);
sfree(params_buffer);
free_buff();
- readdb_destruct(rdfp);
-
return 0;
}
-Int2 BLAST_PrintOutputHeader(BlastFormattingOptions* format_options,
+Int2 BLAST_PrintOutputHeader(const BlastFormattingOptions* format_options,
Boolean is_megablast, char* dbname, Boolean is_protein)
{
if (format_options->align_view < 7) {
@@ -965,24 +710,15 @@ Int2 BLAST_PrintOutputHeader(BlastFormattingOptions* format_options,
#define BUFFER_LENGTH 256
-static void
-PrintSeqDefline(SeqIdPtr sip, char* descr, char** buffer_ptr,
+void
+Blast_SeqIdGetDefLine(SeqIdPtr sip, char* descr, char** buffer_ptr,
Boolean ncbi_gi, Boolean accession_only, Boolean seqid_only,
Boolean believe_local_id)
{
- char* seqid_buffer = NULL, *title = NULL, *defline_buffer;
+ char* seqid_buffer = NULL, *title = NULL, *defline_buffer = NULL;
Int4 gi;
Boolean numeric_sip_type = FALSE;
- if (!descr) {
- /* Get the title */
- BioseqPtr bsp = BioseqLockById(sip);
- title = strdup(BioseqGetTitle(bsp));
- BioseqUnlock(bsp);
- } else {
- title = descr;
- }
-
if ((believe_local_id || sip->choice != SEQID_LOCAL) &&
(sip->choice != SEQID_GENERAL ||
StringCmp(((DbtagPtr)sip->data.ptrvalue)->db, "BL_ORD_ID"))) {
@@ -998,8 +734,20 @@ PrintSeqDefline(SeqIdPtr sip, char* descr, char** buffer_ptr,
GetAccessionFromSeqId(SeqIdFindBestAccession(sip),
&gi, &seqid_buffer);
}
- } else if (seqid_only && title) {
- seqid_buffer = StringTokMT(title, " \t\n\r", &title);
+ } else {
+ if (!descr) {
+ /* Get the title */
+ BioseqPtr bsp = BioseqLockById(sip);
+ if (bsp) {
+ title = strdup(BioseqGetTitle(bsp));
+ BioseqUnlock(bsp);
+ }
+ } else {
+ title = descr;
+ }
+
+ if (seqid_only)
+ seqid_buffer = StringTokMT(title, " \t\n\r", &title);
}
if (numeric_sip_type && !seqid_buffer) {
@@ -1021,35 +769,13 @@ PrintSeqDefline(SeqIdPtr sip, char* descr, char** buffer_ptr,
*buffer_ptr = defline_buffer;
}
-static void ScoreAndEvalueToBuffers(double bit_score, double evalue,
- char* *bit_score_buf, char* *evalue_buf)
-{
- if (evalue < 1.0e-180)
- sprintf(*evalue_buf, "0.0");
- else if (evalue < 1.0e-99)
- sprintf(*evalue_buf, "%2.0e", evalue);
- else if (evalue < 0.0009)
- sprintf(*evalue_buf, "%3.1e", evalue);
- else if (evalue < 1.0)
- sprintf(*evalue_buf, "%4.3f", evalue);
- else
- sprintf(*evalue_buf, "%5.1f", evalue);
-
- if (bit_score > 9999)
- sprintf(*bit_score_buf, "%4.3e", bit_score);
- else if (bit_score > 99.9)
- sprintf(*bit_score_buf, "%4.1f", bit_score);
- else
- sprintf(*bit_score_buf, "%4.2f", bit_score);
-}
-
/* This function might serve as a starting point for a callback function
* that prints results before the traceback stage, e.g. the on-the-fly
* tabular output, a la the -D3 option of the old megablast.
*/
void BLAST_PrintIntermediateResults(BlastHSPResults* results,
BlastQueryInfo* query_info, SeqLocPtr query_slp,
- ReadDBFILEPtr rdfp, SeqIdPtr seqid, BlastScoreBlk* sbp,
+ BlastSeqSrc* seq_src, BlastScoreBlk* sbp,
char* filename)
{
Int4 query_index, hit_index, hsp_index;
@@ -1061,33 +787,24 @@ void BLAST_PrintIntermediateResults(BlastHSPResults* results,
Int4 q_start, q_end, s_start, s_end;
FILE *outfp;
SeqLocPtr slp;
- char* subject_descr;
Blast_KarlinBlk* kbp;
- char* bit_score_buff,* eval_buff;
- double bit_score;
+ char bit_score_buff[10], eval_buff[10];
+ char* eval_buff_ptr = NULL;
BlastHSP* hsp;
+ ListNode* seqid_wrap = NULL;
- if (!results || !query_info || !query_slp || (!rdfp && !seqid) ||
+ if (!results || !query_info || !query_slp || !seq_src ||
!sbp || !filename)
return;
outfp = FileOpen(filename, "w");
- eval_buff = (char *) malloc(10);
- bit_score_buff = (char *) malloc(10);
-
- if (!rdfp) {
- /* Two sequences case */
- subject_id = seqid;
- subject_descr = NULL;
- }
-
for (query_index = 0, slp = query_slp;
query_index < results->num_queries && slp;
++query_index, slp = slp->next) {
hit_list = results->hitlist_array[query_index];
query_id = SeqLocId(slp);
- PrintSeqDefline(query_id, NULL, &query_buffer, TRUE, FALSE, FALSE,
+ Blast_SeqIdGetDefLine(query_id, NULL, &query_buffer, TRUE, FALSE, FALSE,
FALSE);
fprintf(outfp, "#Query = %s\n\n", query_buffer);
sfree(query_buffer);
@@ -1100,11 +817,16 @@ void BLAST_PrintIntermediateResults(BlastHSPResults* results,
for (hit_index = 0; hit_index < hit_list->hsplist_count;
++hit_index) {
hsp_list = hit_list->hsplist_array[hit_index];
- if (rdfp) {
- readdb_get_descriptor(rdfp, hsp_list->oid, &subject_id,
- &subject_descr);
+ BLASTSeqSrcGetSeqId(seq_src, (void*) &hsp_list->oid);
+ if (seqid_wrap->choice == BLAST_SEQSRC_C_SEQID) {
+ subject_id = (SeqId*) seqid_wrap->ptr;
+ ListNodeFree(seqid_wrap);
+ } else {
+ /* Could not retrieve id for this subject sequence. This should
+ never happen, but if it does, skip this HSP. */
+ continue;
}
- PrintSeqDefline(subject_id, subject_descr, &subject_buffer,
+ Blast_SeqIdGetDefLine(subject_id, NULL, &subject_buffer,
TRUE, FALSE, FALSE, TRUE);
fprintf(outfp, ">%s\n\n", subject_buffer);
sfree(subject_buffer);
@@ -1124,19 +846,18 @@ void BLAST_PrintIntermediateResults(BlastHSPResults* results,
}
kbp = sbp->kbp[hsp->context];
- bit_score = (hsp->score*kbp->Lambda - kbp->logK) /
- NCBIMATH_LN2;
- ScoreAndEvalueToBuffers(bit_score, hsp->evalue,
- &bit_score_buff, &eval_buff);
+ eval_buff_ptr = eval_buff;
+
+ ScoreAndEvalueToBuffers(hsp->bit_score, hsp->evalue,
+ bit_score_buff, &eval_buff_ptr, TRUE);
fprintf(outfp, "[%ld %ld] [%ld %ld] %s %s\n",
(long)q_start, (long)q_end, (long)s_start, (long)s_end,
- bit_score_buff, eval_buff);
+ bit_score_buff, eval_buff_ptr);
}
}
}
}
FileClose(outfp);
}
-
diff --git a/algo/blast/api/blast_format.h b/algo/blast/api/blast_format.h
index 28535ff6..24f3bfa9 100644
--- a/algo/blast/api/blast_format.h
+++ b/algo/blast/api/blast_format.h
@@ -1,4 +1,4 @@
-/* $Id: blast_format.h,v 1.20 2004/04/22 22:15:40 dondosha Exp $
+/* $Id: blast_format.h,v 1.22 2004/06/07 18:40:48 dondosha Exp $
* ===========================================================================
*
* PUBLIC DOMAIN NOTICE
@@ -32,7 +32,7 @@ Author: Ilya Dondoshansky
Contents: Functions needed for formatting of BLAST results
******************************************************************************
- * $Revision: 1.20 $
+ * $Revision: 1.22 $
* */
#ifndef __BLAST_FORMAT__
#define __BLAST_FORMAT__
@@ -48,10 +48,11 @@ extern "C" {
#include <ncbi.h>
#include <asn.h>
#include <bxmlobj.h>
-#include <readdb.h>
#include <algo/blast/core/blast_options.h>
#include <algo/blast/core/blast_hits.h>
-#include <algo/blast/api/seqsrc_readdb.h>
+#include <algo/blast/core/blast_seqsrc.h>
+#include <algo/blast/core/blast_diagnostics.h>
+#include <algo/blast/api/twoseq_api.h>
/** Options for formatting BLAST results
*/
@@ -117,7 +118,7 @@ typedef struct MBXml {
Int2 BLAST_FormatResults(SeqAlignPtr head, char* blast_database,
char* blast_program, Int4 num_queries,
SeqLocPtr query_slp, BlastMaskLoc* mask_loc,
- BlastFormattingOptions* format_options, Boolean is_ooframe);
+ const BlastFormattingOptions* format_options, Boolean is_ooframe);
/** Print the summary at the end of the BLAST report.
* @param program_number Type of BLAST program [in]
@@ -128,20 +129,23 @@ Int2 BLAST_FormatResults(SeqAlignPtr head, char* blast_database,
* @param word_options Word finding options and parameters [in]
* @param ext_options Extension options and parameters [in]
* @param hit_options Hit saving options [in]
+ * @param eff_len_options Effective lengths options, containing user-specified
+ * values for database length or eff. search space [in]
* @param query_info Query information [in]
- * @param dbname BLAST database name [in]
- * @param return_stats Data about this run [in]
- * @param db_is_na TRUE if a nucleotide database [in]
+ * @param seq_src Source of subject sequences [in]
+ * @param diagnostics Data about this run [in]
*/
Int2 PrintOutputFooter(Uint1 program_number,
- BlastFormattingOptions* format_options,
- BlastScoringOptions* score_options, BlastScoreBlk* sbp,
- LookupTableOptions* lookup_options,
- BlastInitialWordOptions* word_options,
- BlastExtensionOptions* ext_options,
- BlastHitSavingOptions* hit_options,
- BlastQueryInfo* query_info, char* dbname,
- BlastReturnStat* return_stats, Boolean db_is_na);
+ const BlastFormattingOptions* format_options,
+ const BlastScoringOptions* score_options,
+ const BlastScoreBlk* sbp,
+ const LookupTableOptions* lookup_options,
+ const BlastInitialWordOptions* word_options,
+ const BlastExtensionOptions* ext_options,
+ const BlastHitSavingOptions* hit_options,
+ const BlastEffectiveLengthsOptions* eff_len_options,
+ const BlastQueryInfo* query_info, const BlastSeqSrc* seq_src,
+ const BlastDiagnostics* diagnostics);
/** Prints the top part of the traditional BLAST output, including version,
* reference(s) and database information.
@@ -151,13 +155,17 @@ Int2 PrintOutputFooter(Uint1 program_number,
* @param dbname BLAST database name [in]
* @param is_protein Is the database protein or nucleotide? [in]
*/
-Int2 BLAST_PrintOutputHeader(BlastFormattingOptions* format_options,
+Int2 BLAST_PrintOutputHeader(const BlastFormattingOptions* format_options,
Boolean is_megablast, char* dbname, Boolean is_protein);
void BLAST_PrintIntermediateResults(BlastHSPResults* results,
BlastQueryInfo* query_info, SeqLocPtr query_slp,
- ReadDBFILEPtr rdfp, SeqIdPtr seqid, BlastScoreBlk* sbp,
+ BlastSeqSrc* seq_src, BlastScoreBlk* sbp,
char* filename);
+void
+Blast_SeqIdGetDefLine(SeqIdPtr sip, char* descr, char** buffer_ptr,
+ Boolean ncbi_gi, Boolean accession_only,
+ Boolean seqid_only, Boolean believe_local_id);
#ifdef __cplusplus
diff --git a/algo/blast/api/blast_returns.c b/algo/blast/api/blast_returns.c
new file mode 100644
index 00000000..6d385346
--- /dev/null
+++ b/algo/blast/api/blast_returns.c
@@ -0,0 +1,405 @@
+/* $Id: blast_returns.c,v 1.1 2004/05/14 17:19:03 dondosha Exp $
+* ===========================================================================
+*
+* PUBLIC DOMAIN NOTICE
+* National Center for Biotechnology Information
+*
+* This software/database is a "United States Government Work" under the
+* terms of the United States Copyright Act. It was written as part of
+* the author's offical duties as a United States Government employee and
+* thus cannot be copyrighted. This software/database is freely available
+* to the public for use. The National Library of Medicine and the U.S.
+* Government have not placed any restriction on its use or reproduction.
+*
+* Although all reasonable efforts have been taken to ensure the accuracy
+* and reliability of the software and data, the NLM and the U.S.
+* Government do not and cannot warrant the performance or results that
+* may be obtained by using this software or data. The NLM and the U.S.
+* Government disclaim all warranties, express or implied, including
+* warranties of performance, merchantability or fitness for any particular
+* purpose.
+*
+* Please cite the author in any work or product based on this material.
+*
+* ===========================================================================*/
+
+/*****************************************************************************
+
+File name: blast_returns.c
+
+Author: Ilya Dondoshansky
+
+Contents: Manipulating data returned from BLAST other than Seq-aligns
+
+Detailed Contents:
+
+******************************************************************************
+ * $Revision: 1.1 $
+ * */
+
+static char const rcsid[] = "$Id: blast_returns.c,v 1.1 2004/05/14 17:19:03 dondosha Exp $";
+
+#include <algo/blast/api/blast_returns.h>
+#include <algo/blast/api/blast_seq.h>
+#include <algo/blast/core/blast_filter.h>
+#include <algo/blast/core/blast_util.h>
+#include <algo/blast/core/blast_seqsrc.h>
+
+BLAST_DbSummary* LIBCALL
+Blast_DbSummaryFree (BLAST_DbSummary* dbinfo)
+{
+ BLAST_DbSummary* next;
+
+ if (dbinfo == NULL)
+ return NULL;
+
+ while (dbinfo)
+ {
+ sfree(dbinfo->name);
+ sfree(dbinfo->definition);
+ sfree(dbinfo->date);
+ next = dbinfo->next;
+ sfree(dbinfo);
+ dbinfo = next;
+ }
+
+ return dbinfo;
+}
+
+BLAST_DbSummary* Blast_GetDbSummary(const BlastSeqSrc* seq_src)
+{
+ BLAST_DbSummary* dbinfo = NULL;
+ char* chptr = NULL;
+
+ dbinfo = calloc(1, sizeof(BLAST_DbSummary));
+ dbinfo->name = BLASTSeqSrcGetName(seq_src);
+
+ if((chptr = BLASTSeqSrcGetDefinition(seq_src)) == NULL)
+ chptr = dbinfo->name;
+
+ if (chptr)
+ dbinfo->definition = strdup(chptr);
+
+ dbinfo->date = BLASTSeqSrcGetDate(seq_src);
+
+ dbinfo->is_protein = BLASTSeqSrcGetIsProt(seq_src);
+
+ if ((dbinfo->total_length = BLASTSeqSrcGetTotLen(seq_src)) == 0)
+ dbinfo->total_length = BLASTSeqSrcGetMaxSeqLen(seq_src);
+ dbinfo->number_seqs = BLASTSeqSrcGetNumSeqs(seq_src);
+
+ return dbinfo;
+}
+
+/*
+ adds the new string to the buffer, separating by a tilde.
+ Checks the size of the buffer for Blast_GetParametersBuffer and
+ allocates longer replacement if needed.
+*/
+
+static Boolean
+add_string_to_buffer(char* buffer, char* *old, Int2* old_length)
+
+{
+ char* new,* ptr;
+ Int2 length = 0, new_length;
+
+ if (!buffer)
+ return FALSE;
+
+ if (*old)
+ length = (strlen(*old));
+
+ if((Int2)(strlen(buffer)+length+3) > *old_length)
+ {
+ new_length = *old_length + 255;
+ new = calloc(new_length, sizeof(char));
+ if (*old_length > 0 && *old != NULL)
+ {
+ memcpy(new, *old, *old_length);
+ sfree(*old);
+ }
+ *old = new;
+ *old_length = new_length;
+ }
+
+ ptr = *old;
+ ptr += length;
+
+ /* Add a tilde */
+ *ptr = '~';
+ ptr++;
+
+ while (*buffer != NULLB)
+ {
+ *ptr = *buffer;
+ buffer++; ptr++;
+ }
+
+ return TRUE;
+}
+
+char*
+Blast_GetParametersBuffer(Uint1 program_number,
+ const BlastScoringOptions* score_options,
+ const BlastScoreBlk* sbp, const LookupTableOptions* lookup_options,
+ const BlastInitialWordOptions* word_options,
+ const BlastExtensionOptions* ext_options,
+ const BlastHitSavingOptions* hit_options,
+ const BlastEffectiveLengthsOptions* eff_len_options,
+ const BlastQueryInfo* query_info, const BlastSeqSrc* seq_src,
+ const BlastDiagnostics* diagnostics)
+{
+ Int4 cutoff = 0;
+ char buffer[128];
+ char* ret_buffer;
+ Int2 ret_buffer_length;
+ Int4 num_entries = 0;
+ Int8 total_length = 0;
+ Int4 qlen = 0;
+ double evalue;
+ Int2 num_frames;
+ Boolean single_query;
+ Blast_KarlinBlk* kbp;
+ BlastUngappedStats* ungapped_stats = NULL;
+ BlastGappedStats* gapped_stats = NULL;
+ BlastRawCutoffs* raw_cutoffs = NULL;
+
+ ret_buffer = NULL;
+ ret_buffer_length = 0;
+
+ if (program_number == blast_type_blastx ||
+ program_number == blast_type_tblastx)
+ num_frames = NUM_FRAMES;
+ else if (program_number == blast_type_blastn)
+ num_frames = 2;
+ else
+ num_frames = 1;
+
+ single_query = (query_info->last_context < num_frames);
+
+ if (diagnostics) {
+ ungapped_stats = diagnostics->ungapped_stat;
+ gapped_stats = diagnostics->gapped_stat;
+ raw_cutoffs = diagnostics->cutoffs;
+ }
+
+ if (score_options->matrix) {
+ sprintf(buffer, "Matrix: %s", score_options->matrix);
+ add_string_to_buffer(buffer, &ret_buffer, &ret_buffer_length);
+ }
+
+ if (score_options->gapped_calculation) {
+ sprintf(buffer, "Gap Penalties: Existence: %ld, Extension: %ld",
+ (long) score_options->gap_open,
+ (long) score_options->gap_extend);
+ add_string_to_buffer(buffer, &ret_buffer, &ret_buffer_length);
+ }
+
+ if (eff_len_options->db_length)
+ total_length = eff_len_options->db_length;
+ else if (seq_src) {
+ if ((total_length = BLASTSeqSrcGetTotLen(seq_src)) == 0)
+ total_length = BLASTSeqSrcGetMaxSeqLen(seq_src);
+ }
+
+ if (program_number == blast_type_tblastn ||
+ program_number == blast_type_rpstblastn ||
+ program_number == blast_type_tblastx)
+ total_length /= 3;
+
+ if (eff_len_options->dbseq_num)
+ num_entries = eff_len_options->dbseq_num;
+ else if (seq_src)
+ num_entries = BLASTSeqSrcGetNumSeqs(seq_src);
+
+ sprintf(buffer, "Number of Sequences: %ld", (long) num_entries);
+ add_string_to_buffer(buffer, &ret_buffer, &ret_buffer_length);
+ if (ungapped_stats) {
+ sprintf(buffer, "Number of Hits to DB: %s",
+ Nlm_Int8tostr(ungapped_stats->lookup_hits, 1));
+ add_string_to_buffer(buffer, &ret_buffer, &ret_buffer_length);
+
+ sprintf(buffer, "Number of extensions: %ld",
+ (long) ungapped_stats->init_extends);
+ add_string_to_buffer(buffer, &ret_buffer, &ret_buffer_length);
+ sprintf(buffer, "Number of successful extensions: %ld",
+ (long) ungapped_stats->good_init_extends);
+ add_string_to_buffer(buffer, &ret_buffer, &ret_buffer_length);
+ }
+
+ if (gapped_stats) {
+ if (hit_options->expect_value > 0.1) {
+ sprintf(buffer, "Number of sequences better than %4.1f: %ld",
+ hit_options->expect_value,
+ (long) gapped_stats->num_seqs_passed);
+ } else {
+ sprintf(buffer, "Number of sequences better than %3.1e: %ld",
+ hit_options->expect_value,
+ (long) gapped_stats->num_seqs_passed);
+ }
+ add_string_to_buffer(buffer, &ret_buffer, &ret_buffer_length);
+
+ if (score_options->gapped_calculation) {
+ sprintf(buffer,
+ "Number of HSP's better than %4.1f without gapping: %ld",
+ hit_options->expect_value,
+ (long) gapped_stats->seqs_ungapped_passed);
+ add_string_to_buffer(buffer, &ret_buffer, &ret_buffer_length);
+ sprintf(buffer,
+ "Number of HSP's gapped: %ld",
+ (long) gapped_stats->extensions);
+ add_string_to_buffer(buffer, &ret_buffer, &ret_buffer_length);
+ sprintf(buffer,
+ "Number of HSP's successfully gapped: %ld",
+ (long) gapped_stats->good_extensions);
+ add_string_to_buffer(buffer, &ret_buffer, &ret_buffer_length);
+ sprintf(buffer,
+ "Number of extra gapped extensions for HSPs above %4.1f: %ld",
+ hit_options->expect_value,
+ (long) gapped_stats->extra_extensions);
+ add_string_to_buffer(buffer, &ret_buffer, &ret_buffer_length);
+ }
+ }
+
+ /* Query length makes sense only for single query sequence. */
+ if (single_query) {
+ qlen = BLAST_GetQueryLength(query_info, query_info->first_context);
+ sprintf(buffer, "Length of query: %ld", (long)qlen);
+ add_string_to_buffer(buffer, &ret_buffer, &ret_buffer_length);
+ }
+
+ sprintf(buffer, "Length of database: %s", Nlm_Int8tostr (total_length, 1));
+ add_string_to_buffer(buffer, &ret_buffer, &ret_buffer_length);
+
+ if (single_query) {
+ Int4 length_adjustment =
+ query_info->length_adjustments[query_info->first_context];
+ Int4 eff_qlen;
+ Int8 eff_dblen;
+ sprintf(buffer, "Length adjustment: %ld", (long) length_adjustment);
+ add_string_to_buffer(buffer, &ret_buffer, &ret_buffer_length);
+
+ /** FIXME: Should this be different for RPS BLAST? */
+ eff_qlen = qlen - length_adjustment;
+ sprintf(buffer, "Effective length of query: %ld", (long)eff_qlen);
+ add_string_to_buffer(buffer, &ret_buffer, &ret_buffer_length);
+
+ eff_dblen = total_length - num_entries*length_adjustment;
+ sprintf(buffer, "Effective length of database: %s",
+ Nlm_Int8tostr (eff_dblen , 1));
+ add_string_to_buffer(buffer, &ret_buffer, &ret_buffer_length);
+ sprintf(buffer, "Effective search space: %8.0f",
+ ((double) eff_dblen)*((double) eff_qlen));
+ add_string_to_buffer(buffer, &ret_buffer, &ret_buffer_length);
+ sprintf(buffer, "Effective search space used: %8.0f",
+ (double)query_info->eff_searchsp_array[query_info->first_context]);
+ add_string_to_buffer(buffer, &ret_buffer, &ret_buffer_length);
+ }
+ sprintf(buffer, "Neighboring words threshold: %ld",
+ (long) lookup_options->threshold);
+ add_string_to_buffer(buffer, &ret_buffer, &ret_buffer_length);
+ sprintf(buffer, "Window for multiple hits: %ld",
+ (long) word_options->window_size);
+ add_string_to_buffer(buffer, &ret_buffer, &ret_buffer_length);
+
+ if (raw_cutoffs) {
+ kbp = sbp->kbp[query_info->first_context];
+ sprintf(buffer, "X1: %ld (%4.1f bits)",
+ (long)raw_cutoffs->x_drop_ungapped,
+ raw_cutoffs->x_drop_ungapped*kbp->Lambda/NCBIMATH_LN2);
+ add_string_to_buffer(buffer, &ret_buffer, &ret_buffer_length);
+ if (score_options->gapped_calculation) {
+ sprintf(buffer, "X2: %ld (%4.1f bits)",
+ (long)raw_cutoffs->x_drop_gap, ext_options->gap_x_dropoff);
+ add_string_to_buffer(buffer, &ret_buffer, &ret_buffer_length);
+ sprintf(buffer, "X3: %ld (%4.1f bits)",
+ (long)raw_cutoffs->x_drop_gap_final,
+ ext_options->gap_x_dropoff_final);
+ add_string_to_buffer(buffer, &ret_buffer, &ret_buffer_length);
+
+ sprintf(buffer, "S1: %ld (%4.1f bits)",
+ (long)raw_cutoffs->gap_trigger, ext_options->gap_trigger);
+ add_string_to_buffer(buffer, &ret_buffer, &ret_buffer_length);
+ }
+ }
+ cutoff = 0;
+ if (single_query) {
+ Int4 context = query_info->first_context;
+ double searchsp = (double) query_info->eff_searchsp_array[context];
+
+ /* For translated RPS blast the search space must be scaled down */
+ if (program_number == blast_type_rpstblastn)
+ searchsp = searchsp / NUM_FRAMES;
+
+ evalue = hit_options->expect_value;
+ if (!score_options->gapped_calculation)
+ kbp = sbp->kbp[query_info->first_context];
+ else
+ kbp = sbp->kbp_gap[query_info->first_context];
+
+ BLAST_Cutoffs(&cutoff, &evalue, kbp, searchsp, FALSE, 0);
+ sprintf(buffer, "S2: %ld (%4.1f bits)", (long) cutoff,
+ (((cutoff)*(kbp->Lambda))-(kbp->logK))/NCBIMATH_LN2);
+ add_string_to_buffer(buffer, &ret_buffer, &ret_buffer_length);
+ }
+ return ret_buffer;
+}
+
+/** Save the Karlin-Altschul parameters calculated in the BLAST search.
+ * @param sbp Internal scoring block structure [in]
+ * @param context Index into the array of structures containing
+ * Karlin-Altschul parameters [in]
+ * @param sum_returns Returns summary structure [out]
+*/
+static void
+Blast_SummaryFillKAParameters(const BlastScoreBlk* sbp, Int4 context,
+ BLAST_SummaryReturn* sum_returns)
+{
+ Blast_KarlinBlk* kbp;
+
+ if (!sbp)
+ return;
+
+ if (sbp->kbp) {
+ kbp = sbp->kbp[context];
+ sum_returns->ka_params =
+ (BLAST_KAParameters*) malloc(sizeof(BLAST_KAParameters));
+ sum_returns->ka_params->Lambda = kbp->Lambda;
+ sum_returns->ka_params->K = kbp->K;
+ sum_returns->ka_params->H = kbp->H;
+ }
+
+ if (sbp->kbp_gap) {
+ kbp = sbp->kbp_gap[context];
+ sum_returns->ka_params_gap =
+ (BLAST_KAParameters*) malloc(sizeof(BLAST_KAParameters));
+ sum_returns->ka_params_gap->Lambda = kbp->Lambda;
+ sum_returns->ka_params_gap->K = kbp->K;
+ sum_returns->ka_params_gap->H = kbp->H;
+ }
+}
+
+void Blast_SummaryReturnFill(Uint1 program_number,
+ const BlastScoringOptions* score_options,
+ const BlastScoreBlk* sbp,
+ const LookupTableOptions* lookup_options,
+ const BlastInitialWordOptions* word_options,
+ const BlastExtensionOptions* ext_options,
+ const BlastHitSavingOptions* hit_options,
+ const BlastEffectiveLengthsOptions* eff_len_options,
+ const BlastQueryInfo* query_info,
+ const BlastSeqSrc* seq_src,
+ const BlastDiagnostics* diagnostics,
+ BLAST_SummaryReturn** sum_returns_out)
+{
+ BLAST_SummaryReturn* sum_returns =
+ (BLAST_SummaryReturn*) calloc(1, sizeof(BLAST_SummaryReturn));
+ Blast_SummaryFillKAParameters(sbp, query_info->first_context, sum_returns);
+ sum_returns->params_buffer =
+ Blast_GetParametersBuffer(program_number, score_options, sbp,
+ lookup_options, word_options, ext_options,
+ hit_options, eff_len_options, query_info,
+ seq_src, diagnostics);
+ *sum_returns_out = sum_returns;
+}
diff --git a/algo/blast/api/blast_returns.h b/algo/blast/api/blast_returns.h
new file mode 100644
index 00000000..74c78992
--- /dev/null
+++ b/algo/blast/api/blast_returns.h
@@ -0,0 +1,105 @@
+/* $Id: blast_returns.h,v 1.1 2004/05/14 17:19:03 dondosha Exp $
+* ===========================================================================
+*
+* PUBLIC DOMAIN NOTICE
+* National Center for Biotechnology Information
+*
+* This software/database is a "United States Government Work" under the
+* terms of the United States Copyright Act. It was written as part of
+* the author's offical duties as a United States Government employee and
+* thus cannot be copyrighted. This software/database is freely available
+* to the public for use. The National Library of Medicine and the U.S.
+* Government have not placed any restriction on its use or reproduction.
+*
+* Although all reasonable efforts have been taken to ensure the accuracy
+* and reliability of the software and data, the NLM and the U.S.
+* Government do not and cannot warrant the performance or results that
+* may be obtained by using this software or data. The NLM and the U.S.
+* Government disclaim all warranties, express or implied, including
+* warranties of performance, merchantability or fitness for any particular
+* purpose.
+*
+* Please cite the author in any work or product based on this material.
+*
+* ===========================================================================*/
+
+/*****************************************************************************
+
+File name: blast_returns.h
+
+Author: Ilya Dondoshansky
+
+Contents: Manipulation of data returned from BLAST other than Seq-aligns
+
+******************************************************************************
+ * $Revision: 1.1 $
+ * */
+#ifndef __BLAST_RETURNS__
+#define __BLAST_RETURNS__
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#ifndef NCBI_C_TOOLKIT
+#define NCBI_C_TOOLKIT
+#endif
+
+#include <algo/blast/core/blast_options.h>
+#include <algo/blast/core/blast_hits.h>
+#include <algo/blast/core/blast_seqsrc.h>
+#include <algo/blast/core/blast_diagnostics.h>
+#include <algo/blast/api/twoseq_api.h>
+
+typedef struct BLAST_DbSummary {
+ struct BLAST_DbSummary* next;
+ Boolean is_protein;
+ char* name;
+ char* definition;
+ char* date;
+ Int8 total_length;
+ Int4 number_seqs;
+ Boolean subset; /* Print the subset message. */
+} BLAST_DbSummary;
+
+BLAST_DbSummary* LIBCALL
+Blast_DbSummaryFree (BLAST_DbSummary* dbinfo);
+
+/** Retrieves necessary information from a sequence source and fills the
+ * BLAST_DbSummary structure.
+ */
+BLAST_DbSummary* Blast_GetDbSummary(const BlastSeqSrc* seq_src);
+
+/** Formats the BLAST parameters for the BLAST report.
+ * One char* is returned, newlines are indicated by tildes ('~').
+ */
+char*
+Blast_GetParametersBuffer(Uint1 program_number,
+ const BlastScoringOptions* score_options,
+ const BlastScoreBlk* sbp, const LookupTableOptions* lookup_options,
+ const BlastInitialWordOptions* word_options,
+ const BlastExtensionOptions* ext_options,
+ const BlastHitSavingOptions* hit_options,
+ const BlastEffectiveLengthsOptions* eff_len_options,
+ const BlastQueryInfo* query_info, const BlastSeqSrc* seq_src,
+ const BlastDiagnostics* diagnostics);
+
+/** Fills the summary returns */
+void Blast_SummaryReturnFill(Uint1 program_number,
+ const BlastScoringOptions* score_options,
+ const BlastScoreBlk* sbp,
+ const LookupTableOptions* lookup_options,
+ const BlastInitialWordOptions* word_options,
+ const BlastExtensionOptions* ext_options,
+ const BlastHitSavingOptions* hit_options,
+ const BlastEffectiveLengthsOptions* eff_len_options,
+ const BlastQueryInfo* query_info,
+ const BlastSeqSrc* seq_src,
+ const BlastDiagnostics* diagnostics,
+ BLAST_SummaryReturn** sum_returns_out);
+
+#ifdef __cplusplus
+}
+#endif
+#endif /* !__BLAST_FORMAT__ */
+
diff --git a/algo/blast/api/blast_seq.c b/algo/blast/api/blast_seq.c
index c4bb1a2c..2eb93b5d 100644
--- a/algo/blast/api/blast_seq.c
+++ b/algo/blast/api/blast_seq.c
@@ -1,4 +1,4 @@
-static char const rcsid[] = "$Id: blast_seq.c,v 1.39 2004/04/16 14:44:07 papadopo Exp $";
+static char const rcsid[] = "$Id: blast_seq.c,v 1.40 2004/05/14 17:20:50 dondosha Exp $";
/*
* ===========================================================================
*
@@ -33,7 +33,7 @@ Author: Ilya Dondoshansky
Contents: Functions converting between SeqLocs and structures used in BLAST.
******************************************************************************
- * $Revision: 1.39 $
+ * $Revision: 1.40 $
* */
#include <seqport.h>
@@ -274,7 +274,7 @@ static Int4 BLAST_SetUpQueryInfo(SeqLocPtr slp, Uint1 program,
Uint4 max_length = 0;
if (translate)
- num_frames = 6;
+ num_frames = NUM_FRAMES;
else if (is_na)
num_frames = 2;
else
@@ -302,14 +302,14 @@ static Int4 BLAST_SetUpQueryInfo(SeqLocPtr slp, Uint1 program,
}
if ((context_offsets = (Int4*)
- malloc((total_contexts+1)*sizeof(Int4))) == NULL)
+ calloc((total_contexts+1), sizeof(Int4))) == NULL)
return -1;
if ((query_info->eff_searchsp_array =
- (Int8*) malloc(total_contexts*sizeof(Int8))) == NULL)
+ (Int8*) calloc(total_contexts, sizeof(Int8))) == NULL)
return -1;
if ((query_info->length_adjustments =
- (Int4*) malloc(total_contexts*sizeof(Int4))) == NULL)
+ (Int4*) calloc(total_contexts, sizeof(Int4))) == NULL)
return -1;
context_offsets[0] = 0;
@@ -542,7 +542,7 @@ BLAST_GetSequence(SeqLocPtr slp, BlastQueryInfo* query_info,
}
}
- if (num_frames == 6) {
+ if (num_frames == NUM_FRAMES) {
/* Sequence must be translated in 6 frames. This can only happen
for query - subject sequences are translated later. */
Int4 gc;
@@ -639,7 +639,7 @@ Int2 BLAST_SetUpQuery(Uint1 program_number, SeqLocPtr query_slp,
num_frames = 1;
} else {
encoding = NCBI4NA_ENCODING;
- num_frames = 6;
+ num_frames = NUM_FRAMES;
}
if ((status=BLAST_GetSequence(query_slp, *query_info, query_options,
diff --git a/algo/blast/api/blast_seqalign.c b/algo/blast/api/blast_seqalign.c
index ea6baeb6..9e32cb19 100644
--- a/algo/blast/api/blast_seqalign.c
+++ b/algo/blast/api/blast_seqalign.c
@@ -1,4 +1,4 @@
-/* $Id: blast_seqalign.c,v 1.31 2004/04/19 15:03:20 papadopo Exp $
+/* $Id: blast_seqalign.c,v 1.35 2004/06/08 17:47:24 dondosha Exp $
* ===========================================================================
*
* PUBLIC DOMAIN NOTICE
@@ -32,10 +32,10 @@ Author: Ilya Dondoshansky
Contents: Conversion of BLAST results to the SeqAlign form
******************************************************************************
- * $Revision: 1.31 $
+ * $Revision: 1.35 $
* */
-static char const rcsid[] = "$Id: blast_seqalign.c,v 1.31 2004/04/19 15:03:20 papadopo Exp $";
+static char const rcsid[] = "$Id: blast_seqalign.c,v 1.35 2004/06/08 17:47:24 dondosha Exp $";
#include <algo/blast/api/blast_seqalign.h>
@@ -46,14 +46,12 @@ extern ScorePtr MakeBlastScore (ScorePtr PNTR old, CharPtr scoretype,
Nlm_FloatHi prob, Int4 score);
static ScorePtr
-GetScoreSetFromBlastHsp(Uint1 program_number, BlastHSP* hsp,
- BlastScoreBlk* sbp, BlastScoringOptions* score_options)
+GetScoreSetFromBlastHsp(BlastHSP* hsp)
{
ScorePtr score_set=NULL;
double prob;
Int4 score;
char* scoretype;
- Blast_KarlinBlk* kbp;
score = hsp->score;
if (score > 0)
@@ -77,16 +75,9 @@ GetScoreSetFromBlastHsp(Uint1 program_number, BlastHSP* hsp,
MakeBlastScore(&score_set, scoretype, prob, 0);
}
- if (!score_options->gapped_calculation) {
- kbp = sbp->kbp[hsp->context];
- } else {
- kbp = sbp->kbp_gap[hsp->context];
- }
-
/* Calculate bit score from the raw score */
- prob = ((hsp->score*kbp->Lambda) - kbp->logK)/NCBIMATH_LN2;
- if (prob >= 0.)
- MakeBlastScore(&score_set, "bit_score", prob, 0);
+ if (hsp->bit_score >= 0.)
+ MakeBlastScore(&score_set, "bit_score", hsp->bit_score, 0);
if (hsp->num_ident > 0)
MakeBlastScore(&score_set, "num_ident", 0.0, hsp->num_ident);
@@ -121,10 +112,8 @@ static Int2 AddGiListToScoreSet(ScorePtr score_set, ValNodePtr gi_list)
************************************************************************/
static DenseDiagPtr
-BLAST_HSPToDenseDiag(Uint1 program_number, DenseDiagPtr* old,
- BlastHSP* hsp, Boolean reverse,
- Int4 query_length, Int4 subject_length, BlastScoreBlk* sbp,
- BlastScoringOptions* score_options)
+BLAST_HSPToDenseDiag(DenseDiagPtr* old, BlastHSP* hsp, Boolean reverse,
+ Int4 query_length, Int4 subject_length)
{
DenseDiagPtr ddp, new;
@@ -180,8 +169,7 @@ BLAST_HSPToDenseDiag(Uint1 program_number, DenseDiagPtr* old,
new->starts[1] = subject_length - hsp->subject.offset - hsp->subject.length;
}
}
- new->scores =
- GetScoreSetFromBlastHsp(program_number, hsp, sbp, score_options);
+ new->scores = GetScoreSetFromBlastHsp(hsp);
/* Go to the end of the chain, and then attach "new" */
if (*old)
@@ -210,10 +198,8 @@ BLAST_HSPToDenseDiag(Uint1 program_number, DenseDiagPtr* old,
*
************************************************************************/
static StdSeg*
-BLAST_HSPToStdSeg(Uint1 program_number, StdSeg** old,
- BlastHSP* hsp, Int4 query_length, Int4 subject_length, SeqIdPtr sip,
- Boolean reverse, BlastScoreBlk* sbp,
- BlastScoringOptions* score_options)
+BLAST_HSPToStdSeg(StdSeg** old, BlastHSP* hsp, Int4 query_length,
+ Int4 subject_length, SeqIdPtr sip, Boolean reverse)
{
StdSeg* ssp,* new;
SeqIdPtr query_sip, subject_sip;
@@ -288,8 +274,7 @@ BLAST_HSPToStdSeg(Uint1 program_number, StdSeg** old,
}
new->loc = slp;
- new->scores =
- GetScoreSetFromBlastHsp(program_number, hsp, sbp, score_options);
+ new->scores = GetScoreSetFromBlastHsp(hsp);
/* Go to the end of the chain, and then attach "new" */
if (*old)
@@ -327,9 +312,8 @@ BLAST_HSPToStdSeg(Uint1 program_number, StdSeg** old,
static Int2
BLAST_UngappedHSPToSeqAlign(Uint1 program_number,
BlastHSPList* hsp_list, SeqIdPtr query_id,
- SeqIdPtr subject_id, BlastScoreBlk* sbp, Int4 query_length,
- Int4 subject_length, BlastScoringOptions* score_options,
- SeqAlignPtr* seqalign_ptr)
+ SeqIdPtr subject_id, Int4 query_length,
+ Int4 subject_length, SeqAlignPtr* seqalign_ptr)
{
BlastHSP* hsp;
DenseDiagPtr ddp_head=NULL, ddp;
@@ -361,13 +345,12 @@ BLAST_UngappedHSPToSeqAlign(Uint1 program_number,
sip->next = SeqIdDup(subject_id);
if (getdensediag) {
- ddp = BLAST_HSPToDenseDiag(program_number, &ddp_head, hsp, FALSE,
- query_length, subject_length, sbp, score_options);
+ ddp = BLAST_HSPToDenseDiag(&ddp_head, hsp, FALSE, query_length,
+ subject_length);
ddp->id = sip;
} else {
- ssp = BLAST_HSPToStdSeg(program_number, &ssp_head, hsp,
- query_length, subject_length, sip, FALSE, sbp,
- score_options);
+ ssp = BLAST_HSPToStdSeg(&ssp_head, hsp, query_length,
+ subject_length, sip, FALSE);
ssp->ids = sip;
}
sip = NULL; /* This SeqIdPtr is now on the SeqAlign. */
@@ -454,8 +437,8 @@ Boolean GapCollectDataForSeqalign(GapEditBlock* edit_block,
index=0;
for (i = 0, curr=curr_in; curr && i < numseg; curr=curr->next, i++) {
switch(curr->op_type) {
- case GAPALIGN_DECLINE:
- case GAPALIGN_SUB:
+ case eGapAlignDecline:
+ case eGapAlignSub:
if (strand1 != Seq_strand_minus) {
if(translate1 == FALSE)
begin1 = get_current_pos(start1, curr->num);
@@ -494,7 +477,7 @@ Boolean GapCollectDataForSeqalign(GapEditBlock* edit_block,
break;
- case GAPALIGN_DEL:
+ case eGapAlignDel:
begin1 = -1;
if (strand2 != Seq_strand_minus) {
if(translate2 == FALSE)
@@ -528,7 +511,7 @@ Boolean GapCollectDataForSeqalign(GapEditBlock* edit_block,
break;
- case GAPALIGN_INS:
+ case eGapAlignIns:
if (strand1 != Seq_strand_minus) {
if(translate1 == FALSE)
begin1 = get_current_pos(start1, curr->num);
@@ -581,7 +564,7 @@ static void GapCorrectUASequence(GapEditBlock* edit_block)
for (curr=edit_block->esp; curr; curr = curr->next) {
- if(curr->op_type == GAPALIGN_DECLINE && last_indel == TRUE) {
+ if(curr->op_type == eGapAlignDecline && last_indel == TRUE) {
/* This is invalid condition and regions should be
exchanged */
@@ -596,7 +579,7 @@ static void GapCorrectUASequence(GapEditBlock* edit_block)
last_indel = FALSE;
- if(curr->op_type == GAPALIGN_INS || curr->op_type == GAPALIGN_DEL) {
+ if(curr->op_type == eGapAlignIns || curr->op_type == eGapAlignDel) {
last_indel = TRUE;
curr_last2 = curr_last;
}
@@ -769,14 +752,14 @@ GapEditBlockToSeqAlign(GapEditBlock* edit_block, SeqIdPtr subject_id, SeqIdPtr q
for (curr=edit_block->esp; curr; curr = curr->next) {
numseg++;
- if(/*edit_block->discontinuous && */curr->op_type == GAPALIGN_DECLINE)
+ if(/*edit_block->discontinuous && */curr->op_type == eGapAlignDecline)
is_disc_align = TRUE;
}
start1 = edit_block->start1;
start2 = edit_block->start2;
- /* If no GAPALIGN_DECLINE regions exists output seqalign will be
+ /* If no eGapAlignDecline regions exists output seqalign will be
regular Den-Seg or Std-seg */
if(is_disc_align == FALSE) {
/* Please note, that edit_block passed only for data like
@@ -812,11 +795,11 @@ GapEditBlockToSeqAlign(GapEditBlock* edit_block, SeqIdPtr subject_id, SeqIdPtr q
for (numseg = 0, curr = curr_last;
curr; curr = curr->next, numseg++) {
- if(curr->op_type == GAPALIGN_DECLINE) {
+ if(curr->op_type == eGapAlignDecline) {
if(numseg != 0) { /* End of aligned area */
break;
} else {
- while(curr && curr->op_type == GAPALIGN_DECLINE) {
+ while(curr && curr->op_type == eGapAlignDecline) {
numseg++;
curr = curr->next;
}
@@ -935,7 +918,7 @@ BLAST_OOFEditBlockToSeqAlign(Uint1 program, GapEditBlock* edit_block,
slp2 = NULL;
switch (curr->op_type) {
- case 0: /* deletion of three nucleotides. */
+ case eGapAlignDel: /* deletion of three nucleotides. */
first_shift = FALSE;
@@ -960,7 +943,7 @@ BLAST_OOFEditBlockToSeqAlign(Uint1 program, GapEditBlock* edit_block,
break;
- case 6: /* insertion of three nucleotides. */
+ case eGapAlignIns: /* insertion of three nucleotides. */
/* If gap is followed after frameshift - we have to
add this element for the alignment to be correct */
@@ -1070,7 +1053,7 @@ BLAST_OOFEditBlockToSeqAlign(Uint1 program, GapEditBlock* edit_block,
break;
- case 3: /* Substitution. */
+ case eGapAlignSub: /* Substitution. */
first_shift = FALSE;
@@ -1090,14 +1073,8 @@ BLAST_OOFEditBlockToSeqAlign(Uint1 program, GapEditBlock* edit_block,
/* Nucleotide scale shifted by op_type */
seq_int2 = SeqIntNew();
- /* Adjusting last segment and new start point in
- nucleotide coordinates */
- /* if(seq_int2_last != NULL) {
- seq_int2_last->to = seq_int2_last->to - (3 - curr->op_type);
- start2 = start2 - (3 - curr->op_type);
- } */
-
- seq_int2->from = get_current_pos(&start2, curr->num*curr->op_type);
+ seq_int2->from =
+ get_current_pos(&start2, curr->num*(Uint1)curr->op_type);
seq_int2->to = start2 - 1;
/* Chop off three bases and one residue at a time.
@@ -1125,10 +1102,10 @@ BLAST_OOFEditBlockToSeqAlign(Uint1 program, GapEditBlock* edit_block,
seq_int2_last = seq_int2; /* Will be used to adjust "to" value */
break;
- case 1: /* gap of two nucleotides. */
- case 2: /* Gap of one nucleotide. */
- case 4: /* Insertion of one nucleotide. */
- case 5: /* Insertion of two nucleotides. */
+ case eGapAlignDel2: /* gap of two nucleotides. */
+ case eGapAlignDel1: /* Gap of one nucleotide. */
+ case eGapAlignIns1: /* Insertion of one nucleotide. */
+ case eGapAlignIns2: /* Insertion of two nucleotides. */
if(first_shift == TRUE) { /* Second frameshift in a row */
/* Protein coordinates */
@@ -1147,7 +1124,8 @@ BLAST_OOFEditBlockToSeqAlign(Uint1 program, GapEditBlock* edit_block,
/* Nucleotide scale shifted by op_type */
seq_int2 = SeqIntNew();
- seq_int2->from = get_current_pos(&start2, curr->op_type);
+ seq_int2->from =
+ get_current_pos(&start2, (Uint1)curr->op_type);
seq_int2->to = start2 - 1;
if(seq_int2->to >= original_length2) {
@@ -1181,10 +1159,8 @@ BLAST_OOFEditBlockToSeqAlign(Uint1 program, GapEditBlock* edit_block,
/* If this substitution is following simple frameshift
we do not need to start new segment, but may continue
old one */
- /* printf("curr_num = %d (%d)\n", curr->num, curr->op_type); */
-
if(seq_int2_last != NULL) {
- get_current_pos(&start2, curr->num*(curr->op_type-3));
+ get_current_pos(&start2, curr->num*((Uint1)curr->op_type-3));
if(strand2 != Seq_strand_minus) {
seq_int2_last->to = start2 - 1;
} else {
@@ -1206,13 +1182,14 @@ BLAST_OOFEditBlockToSeqAlign(Uint1 program, GapEditBlock* edit_block,
seq_int1_last++;
}
- } else if (curr->op_type > 3) {
+ } else if ((Uint1)curr->op_type > 3) {
/* Protein piece is empty */
ValNodeAddPointer(&slp1, SEQLOC_EMPTY, SeqIdDup(id1));
/* Simulating insertion of nucleotides */
seq_int2 = SeqIntNew();
- seq_int2->from = get_current_pos(&start2,
- curr->num*(curr->op_type-3));
+ seq_int2->from =
+ get_current_pos(&start2,
+ curr->num*((Uint1)curr->op_type-3));
seq_int2->to = start2 - 1;
if(seq_int2->to >= original_length2) {
@@ -1279,9 +1256,8 @@ BLAST_OOFEditBlockToSeqAlign(Uint1 program, GapEditBlock* edit_block,
static Int2
BLAST_GapInfoToSeqAlign(Uint1 program_number, BlastHSPList* hsp_list,
- SeqIdPtr query_id, SeqIdPtr subject_id, BlastScoreBlk* sbp,
- Int4 query_length, BlastScoringOptions* score_options,
- SeqAlignPtr* head_seqalign)
+ SeqIdPtr query_id, SeqIdPtr subject_id, Int4 query_length,
+ Boolean is_ooframe, SeqAlignPtr* head_seqalign)
{
Int2 status = 0;
BlastHSP** hsp_array;
@@ -1294,7 +1270,7 @@ BLAST_GapInfoToSeqAlign(Uint1 program_number, BlastHSPList* hsp_list,
for (index=0; index<hsp_list->hspcnt; index++) {
hsp_array[index]->gap_info->original_length1 = query_length;
- if (score_options->is_ooframe) {
+ if (is_ooframe) {
seqalign = BLAST_OOFEditBlockToSeqAlign(program_number,
hsp_array[index]->gap_info,
query_id, subject_id);
@@ -1310,9 +1286,7 @@ BLAST_GapInfoToSeqAlign(Uint1 program_number, BlastHSPList* hsp_list,
last_seqalign->next = seqalign;
last_seqalign = last_seqalign->next;
}
- seqalign->score =
- GetScoreSetFromBlastHsp(program_number, hsp_array[index], sbp,
- score_options);
+ seqalign->score = GetScoreSetFromBlastHsp(hsp_array[index]);
}
return status;
@@ -1320,9 +1294,8 @@ BLAST_GapInfoToSeqAlign(Uint1 program_number, BlastHSPList* hsp_list,
Int2 BLAST_ResultsToSeqAlign(Uint1 program_number,
BlastHSPResults* results, SeqLocPtr query_slp,
- BlastSeqSrc* bssp, SeqLocPtr subject_slp,
- BlastScoringOptions* score_options, BlastScoreBlk* sbp,
- Boolean is_gapped, SeqAlignPtr* head_seqalign)
+ BlastSeqSrc* seq_src, Boolean is_gapped, Boolean is_ooframe,
+ SeqAlignPtr* head_seqalign)
{
Int4 query_index, subject_index;
SeqLocPtr slp = query_slp;
@@ -1331,13 +1304,16 @@ Int2 BLAST_ResultsToSeqAlign(Uint1 program_number,
BlastHSPList* hsp_list;
SeqAlignPtr seqalign = NULL, last_seqalign = NULL;
Int4 subject_length = 0;
+ ListNode* seqid_wrap = NULL;
+ char* bad_id_str = NULL; /* In case an unknown id is returned from sequence
+ source */
*head_seqalign = NULL;
+ if (!results)
+ return 0;
- if (!bssp) {
- subject_id = SeqLocId(subject_slp);
- subject_length = SeqLocLen(subject_slp);
- }
+ if (!seq_src)
+ return -1;
for (query_index = 0; slp && query_index < results->num_queries;
++query_index, slp = slp->next) {
@@ -1351,22 +1327,30 @@ Int2 BLAST_ResultsToSeqAlign(Uint1 program_number,
hsp_list = hit_list->hsplist_array[subject_index];
if (!hsp_list)
continue;
- if (bssp) {
- char* id_str = BLASTSeqSrcGetSeqIdStr(bssp, (void*) &hsp_list->oid);
- subject_id = SeqIdParse(id_str);
- subject_length = BLASTSeqSrcGetSeqLen(bssp, (void*) &hsp_list->oid);
- sfree(id_str);
+ bad_id_str = NULL;
+ seqid_wrap = BLASTSeqSrcGetSeqId(seq_src, (void*) &hsp_list->oid);
+ if (seqid_wrap->choice == BLAST_SEQSRC_C_SEQID) {
+ subject_id = (SeqId*) seqid_wrap->ptr;
+ ListNodeFree(seqid_wrap);
+ } else {
+ /* Should not happen: wrong type of subject id returned;
+ Create a fake local id. */
+ bad_id_str = strdup("lcl|unknown");
+ subject_id = SeqIdParse(bad_id_str);
}
+ subject_length =
+ BLASTSeqSrcGetSeqLen(seq_src, (void*) &hsp_list->oid);
if (is_gapped) {
BLAST_GapInfoToSeqAlign(program_number, hsp_list, query_id,
- subject_id, sbp, SeqLocLen(slp), score_options, &seqalign);
+ subject_id, SeqLocLen(slp), is_ooframe, &seqalign);
} else {
BLAST_UngappedHSPToSeqAlign(program_number, hsp_list, query_id,
- subject_id, sbp, SeqLocLen(slp), subject_length,
- score_options, &seqalign);
+ subject_id, SeqLocLen(slp), subject_length, &seqalign);
}
- if (bssp)
+ /* If unknown seqid was allocated here, free it, since it has been
+ duplicated in the Seq-align */
+ if (bad_id_str)
SeqIdSetFree(subject_id);
if (seqalign) {
diff --git a/algo/blast/api/blast_seqalign.h b/algo/blast/api/blast_seqalign.h
index d78377dd..c6d009d7 100644
--- a/algo/blast/api/blast_seqalign.h
+++ b/algo/blast/api/blast_seqalign.h
@@ -1,4 +1,4 @@
-/* $Id: blast_seqalign.h,v 1.13 2004/03/12 15:18:53 coulouri Exp $
+/* $Id: blast_seqalign.h,v 1.16 2004/06/08 17:47:24 dondosha Exp $
* ===========================================================================
*
* PUBLIC DOMAIN NOTICE
@@ -32,7 +32,7 @@ Author: Ilya Dondoshansky
Contents: Functions to convert BLAST results to the SeqAlign form
******************************************************************************
- * $Revision: 1.13 $
+ * $Revision: 1.16 $
* */
#ifndef __BLAST_SEQALIGN__
#define __BLAST_SEQALIGN__
@@ -55,17 +55,14 @@ extern "C" {
* @param program_number Type of BLAST program [in]
* @param results The BLAST results [in]
* @param query_slp List of query SeqLoc's [in]
- * @param bssp Pointer to the BLAST database wrapper structure [in]
- * @param subject_slp Subject SeqLoc (for two sequences search) [in]
- * @param score_options Scoring options block [in]
- * @param sbp Scoring and statistical information [in]
+ * @param seq_src Pointer to the BLAST database wrapper structure [in]
* @param is_gapped Is this a gapped alignment search? [in]
+ * @param is_ooframe Is this a search with out-of-frame gapping? [in]
* @param head_seqalign List of SeqAlign's [out]
*/
Int2 BLAST_ResultsToSeqAlign(Uint1 program_number, BlastHSPResults* results,
- SeqLocPtr query_slp, BlastSeqSrc* bssp, SeqLocPtr subject_slp,
- BlastScoringOptions* score_options, BlastScoreBlk* sbp,
- Boolean is_gapped, SeqAlignPtr* head_seqalign);
+ SeqLocPtr query_slp, BlastSeqSrc* seq_src,
+ Boolean is_gapped, Boolean is_ooframe, SeqAlignPtr* head_seqalign);
Boolean GapCollectDataForSeqalign(GapEditBlock* edit_block,
GapEditScript* curr_in, Int4 numseg,
diff --git a/algo/blast/api/blast_tabular.c b/algo/blast/api/blast_tabular.c
new file mode 100644
index 00000000..7963dade
--- /dev/null
+++ b/algo/blast/api/blast_tabular.c
@@ -0,0 +1,238 @@
+/* $Id: blast_tabular.c,v 1.3 2004/06/14 20:43:30 dondosha Exp $
+* ===========================================================================
+*
+* PUBLIC DOMAIN NOTICE
+* National Center for Biotechnology Information
+*
+* This software/database is a "United States Government Work" under the
+* terms of the United States Copyright Act. It was written as part of
+* the author's offical duties as a United States Government employee and
+* thus cannot be copyrighted. This software/database is freely available
+* to the public for use. The National Library of Medicine and the U.S.
+* Government have not placed any restriction on its use or reproduction.
+*
+* Although all reasonable efforts have been taken to ensure the accuracy
+* and reliability of the software and data, the NLM and the U.S.
+* Government do not and cannot warrant the performance or results that
+* may be obtained by using this software or data. The NLM and the U.S.
+* Government disclaim all warranties, express or implied, including
+* warranties of performance, merchantability or fitness for any particular
+* purpose.
+*
+* Please cite the author in any work or product based on this material.
+*
+* ===========================================================================*/
+
+/*****************************************************************************
+
+File name: blast_tabular.c
+
+Author: Ilya Dondoshansky
+
+Contents: On-the-fly tabular formatting of BLAST results
+
+Detailed Contents:
+
+******************************************************************************
+ * $Revision: 1.3 $
+ * */
+
+static char const rcsid[] = "$Id: blast_tabular.c,v 1.3 2004/06/14 20:43:30 dondosha Exp $";
+
+#include <algo/blast/api/blast_tabular.h>
+#include <algo/blast/core/blast_util.h>
+#include <algo/blast/core/blast_setup.h>
+#include <algo/blast/core/blast_engine.h>
+#include <algo/blast/core/blast_traceback.h>
+#include <algo/blast/api/blast_format.h>
+#include <txalign.h>
+
+BlastTabularFormatData*
+Blast_TabularFormatDataInit(Uint1 program, BlastHSPStream* hsp_stream,
+ BlastSeqSrc* seq_src, BLAST_SequenceBlk* query, BlastQueryInfo* query_info,
+ const BlastScoringOptions* score_options, BlastScoreBlk* sbp,
+ const BlastEffectiveLengthsOptions* eff_len_options,
+ const BlastExtensionOptions* ext_options,
+ const BlastHitSavingOptions* hit_options,
+ const BlastDatabaseOptions* db_options,
+ SeqLoc* query_slp, FILE* outfp)
+{
+ BlastTabularFormatData* tf_data =
+ (BlastTabularFormatData*) calloc(1, sizeof(BlastTabularFormatData));
+ tf_data->perform_traceback =
+ (score_options->gapped_calculation &&
+ ext_options->eTbackExt != eSkipTbck);
+
+ tf_data->program = program;
+ tf_data->hsp_stream = hsp_stream;
+ tf_data->query = query;
+ tf_data->gen_code_string = db_options->gen_code_string;
+ tf_data->query_slp = query_slp;
+ tf_data->outfp = outfp;
+ /* Sequence source must be copied, to guarantee multi-thread safety. */
+ tf_data->seq_src = BlastSeqSrcCopy(seq_src);
+ /* Effective lengths must be duplicated in query info structure, because
+ they might be changing in the preliminary search. */
+ tf_data->query_info = BlastQueryInfoDup(query_info);
+
+ /* If traceback will have to be performed before tabular output,
+ do the preparation for it here. */
+ if (tf_data->perform_traceback) {
+ BLAST_GapAlignSetUp(program, seq_src,
+ score_options, eff_len_options, ext_options, hit_options,
+ query_info, sbp, &tf_data->score_params, &tf_data->ext_params,
+ &tf_data->hit_params, &tf_data->eff_len_params, &tf_data->gap_align);
+ }
+
+ return tf_data;
+}
+
+void BlastTabularFormatDataFree(BlastTabularFormatData* tf_data)
+{
+ /* Free only the structures that have been initialized internally */
+ tf_data->query_info = BlastQueryInfoFree(tf_data->query_info);
+ tf_data->score_params = BlastScoringParametersFree(tf_data->score_params);
+ tf_data->ext_params = BlastExtensionParametersFree(tf_data->ext_params);
+ tf_data->hit_params = BlastHitSavingParametersFree(tf_data->hit_params);
+ tf_data->eff_len_params =
+ BlastEffectiveLengthsParametersFree(tf_data->eff_len_params);
+ tf_data->gap_align = BLAST_GapAlignStructFree(tf_data->gap_align);
+ tf_data->seq_src = BlastSeqSrcFree(tf_data->seq_src);
+ sfree(tf_data);
+}
+
+/* This function might serve as a starting point for a callback function
+ * that prints results before the traceback stage, e.g. the on-the-fly
+ * tabular output, a la the -D3 option of the old megablast.
+ */
+void* Blast_TabularFormatThread(void* data)
+{
+ BlastTabularFormatData* tf_data;
+ Uint1 program;
+ BlastHSPList* hsp_list = NULL;
+ BlastSeqSrc* seq_src;
+ BLAST_SequenceBlk* query = NULL;
+ BlastQueryInfo* query_info = NULL;
+ BlastScoringParameters* score_params = NULL;
+ BlastExtensionParameters* ext_params = NULL;
+ BlastHitSavingParameters* hit_params = NULL;
+ BlastEffectiveLengthsParameters* eff_len_params = NULL;
+ Uint1* gen_code_string = NULL;
+ BlastGapAlignStruct* gap_align = NULL;
+ Int4 query_index, index;
+ char* query_buffer = NULL;
+ char* subject_buffer = NULL;
+ Int4 q_start=0, q_end=0, s_start=0, s_end=0;
+ SeqLocPtr slp;
+ char bit_score_buff[10], eval_buff[10];
+ char* eval_buff_ptr = NULL;
+ BlastHSP* hsp;
+ SeqId** query_id_array = NULL;
+ Int4 align_length = 0;
+ Int4 num_gaps = 0, num_gap_opens = 0, num_mismatches = 0;
+ double perc_ident = 0;
+ GetSeqArg seq_arg;
+ Boolean one_seq_update_params;
+
+ tf_data = (BlastTabularFormatData*) data;
+ if (!tf_data || !tf_data->query_slp || !tf_data->hsp_stream ||
+ !tf_data->seq_src || !tf_data->outfp)
+ return NULL;
+
+ program = tf_data->program;
+ seq_src = tf_data->seq_src;
+
+ if (tf_data->perform_traceback) {
+ query = tf_data->query;
+ query_info = tf_data->query_info;
+ score_params = tf_data->score_params;
+ ext_params = tf_data->ext_params;
+ hit_params = tf_data->hit_params;
+ eff_len_params = tf_data->eff_len_params;
+ gap_align = tf_data->gap_align;
+ gen_code_string = tf_data->gen_code_string;
+
+ memset((void*) &seq_arg, 0, sizeof(seq_arg));
+ seq_arg.encoding = Blast_TracebackGetEncoding(program);
+ }
+
+ query_id_array =
+ (SeqId**) malloc(ValNodeLen(tf_data->query_slp)*sizeof(SeqId*));
+
+ for (index = 0, slp = tf_data->query_slp; slp; ++index, slp = slp->next) {
+ query_id_array[index] = SeqLocId(slp);
+ }
+
+ one_seq_update_params = (BLASTSeqSrcGetTotLen(seq_src) == 0);
+
+ while (BlastHSPStreamRead(tf_data->hsp_stream, &hsp_list)
+ != kBlastHSPStream_Eof) {
+ if (!hsp_list) {
+ /* This should not happen, but just in case */
+ continue;
+ }
+
+ /* Perform traceback if necessary */
+ if (tf_data->perform_traceback) {
+ BlastSequenceBlkClean(seq_arg.seq);
+ seq_arg.oid = hsp_list->oid;
+ if (BLASTSeqSrcGetSequence(seq_src, (void*) &seq_arg) < 0)
+ continue;
+
+ if (one_seq_update_params) {
+ Int2 status;
+ /* This is not a database search, so effective search spaces
+ need to be recalculated based on this subject sequence length */
+ if ((status = BLAST_OneSubjectUpdateParameters(program,
+ seq_arg.seq->length,
+ score_params->options,
+ query_info, gap_align->sbp,
+ ext_params, hit_params, NULL,
+ eff_len_params)) != 0) {
+ continue;
+ }
+ }
+
+ Blast_TracebackFromHSPList(program, hsp_list, query,
+ seq_arg.seq, query_info, gap_align, gap_align->sbp, score_params,
+ ext_params->options, hit_params, gen_code_string);
+ BLASTSeqSrcRetSequence(seq_src, (void*)&seq_arg);
+ /* Recalculate the bit scores, since they might have changed. */
+ Blast_HSPListGetBitScores(hsp_list,
+ score_params->options->gapped_calculation, gap_align->sbp);
+ }
+ subject_buffer =
+ BLASTSeqSrcGetSeqIdStr(seq_src, (void*) &hsp_list->oid);
+
+ for (index = 0; index < hsp_list->hspcnt; ++index) {
+ hsp = hsp_list->hsp_array[index];
+ query_index =
+ Blast_GetQueryIndexFromContext(hsp->context, program);
+ Blast_SeqIdGetDefLine(query_id_array[query_index], NULL,
+ &query_buffer, TRUE, FALSE, TRUE, FALSE);
+
+ eval_buff_ptr = eval_buff;
+ ScoreAndEvalueToBuffers(hsp->bit_score, hsp->evalue,
+ bit_score_buff, &eval_buff_ptr, FALSE);
+
+ /* Calculate percentage of identities */
+ Blast_HSPCalcLengthAndGaps(hsp, &align_length, &num_gaps,
+ &num_gap_opens);
+ perc_ident = ((double)hsp->num_ident)/align_length * 100;
+ num_mismatches = align_length - hsp->num_ident - num_gaps;
+
+ Blast_HSPGetAdjustedOffsets(hsp, &q_start, &q_end, &s_start, &s_end);
+
+ fprintf(tf_data->outfp,
+ "%s\t%s\t%.2f\t%ld\t%ld\t%ld\t%ld\t%ld\t%ld\t%ld\t%s\t%s\n",
+ query_buffer, subject_buffer, perc_ident,
+ (long) align_length, (long) num_mismatches,
+ (long) num_gap_opens, (long) q_start, (long) q_end,
+ (long) s_start, (long) s_end, eval_buff, bit_score_buff);
+ }
+ fflush(tf_data->outfp);
+ }
+ /* Deallocate the formatting thread data structure */
+ BlastTabularFormatDataFree(tf_data);
+ return NULL;
+}
diff --git a/algo/blast/api/blast_tabular.h b/algo/blast/api/blast_tabular.h
new file mode 100644
index 00000000..7befa54f
--- /dev/null
+++ b/algo/blast/api/blast_tabular.h
@@ -0,0 +1,101 @@
+/* $Id: blast_tabular.h,v 1.2 2004/06/14 20:43:30 dondosha Exp $
+* ===========================================================================
+*
+* PUBLIC DOMAIN NOTICE
+* National Center for Biotechnology Information
+*
+* This software/database is a "United States Government Work" under the
+* terms of the United States Copyright Act. It was written as part of
+* the author's offical duties as a United States Government employee and
+* thus cannot be copyrighted. This software/database is freely available
+* to the public for use. The National Library of Medicine and the U.S.
+* Government have not placed any restriction on its use or reproduction.
+*
+* Although all reasonable efforts have been taken to ensure the accuracy
+* and reliability of the software and data, the NLM and the U.S.
+* Government do not and cannot warrant the performance or results that
+* may be obtained by using this software or data. The NLM and the U.S.
+* Government disclaim all warranties, express or implied, including
+* warranties of performance, merchantability or fitness for any particular
+* purpose.
+*
+* Please cite the author in any work or product based on this material.
+*
+* ===========================================================================*/
+
+/*****************************************************************************
+
+File name: blast_tabular.h
+
+Author: Ilya Dondoshansky
+
+Contents: Functions needed for formatting of BLAST results
+
+******************************************************************************
+ * $Revision: 1.2 $
+ * */
+#ifndef __BLAST_TABULAR__
+#define __BLAST_TABULAR__
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#ifndef NCBI_C_TOOLKIT
+#define NCBI_C_TOOLKIT
+#endif
+
+#include <ncbi.h>
+#include <asn.h>
+#include <algo/blast/core/blast_hits.h>
+#include <algo/blast/core/lookup_wrap.h>
+#include <algo/blast/core/blast_seqsrc.h>
+#include <algo/blast/core/blast_hspstream.h>
+#include <algo/blast/core/blast_gapalign.h>
+#include <objloc.h>
+
+/** Data structure containing all information necessary for production of the
+ * tabular output.
+ */
+typedef struct BlastTabularFormatData {
+ Uint1 program; /**< Type of BLAST program */
+ BlastHSPStream* hsp_stream; /**< Source of the BLAST results */
+ BlastSeqSrc* seq_src; /**< Source of the subject sequences */
+ BLAST_SequenceBlk* query; /**< Query sequence */
+ BlastQueryInfo* query_info; /**< Query information, including context
+ offsets and effective lengths. */
+ BlastScoringParameters* score_params;
+ BlastExtensionParameters* ext_params;
+ BlastHitSavingParameters* hit_params;
+ BlastEffectiveLengthsParameters* eff_len_params;
+ Uint1* gen_code_string;
+ BlastGapAlignStruct* gap_align;
+ SeqLoc* query_slp; /**< Source of query sequences identifiers */
+ FILE* outfp; /**< Output stream */
+ Boolean perform_traceback; /**< Must gapped extension with traceback be
+ performed before formatting? */
+} BlastTabularFormatData;
+
+/** Function initializing the BlastTabularFormatData data structure fields. */
+BlastTabularFormatData*
+Blast_TabularFormatDataInit(Uint1 program, BlastHSPStream* hsp_stream,
+ BlastSeqSrc* seq_src, BLAST_SequenceBlk* query, BlastQueryInfo* query_info,
+ const BlastScoringOptions* scoring_options, BlastScoreBlk* sbp,
+ const BlastEffectiveLengthsOptions* eff_len_options,
+ const BlastExtensionOptions* ext_options,
+ const BlastHitSavingOptions* hit_options,
+ const BlastDatabaseOptions* db_options, SeqLoc* query_slp, FILE* outfp);
+
+/** Free the tabular formatting data structure and all its internally
+ * allocated substructures.
+ */
+void BlastTabularFormatDataFree(BlastTabularFormatData* tf_data);
+
+/** Driver for the thread producing tabular output. */
+void* Blast_TabularFormatThread(void* data);
+
+#ifdef __cplusplus
+}
+#endif
+#endif /* !__BLAST_TABULAR__ */
+
diff --git a/algo/blast/api/hspstream_queue.c b/algo/blast/api/hspstream_queue.c
new file mode 100644
index 00000000..ea5dac83
--- /dev/null
+++ b/algo/blast/api/hspstream_queue.c
@@ -0,0 +1,182 @@
+/* $Id: hspstream_queue.c,v 1.2 2004/06/08 17:46:35 dondosha Exp $
+ * ===========================================================================
+ *
+ * PUBLIC DOMAIN NOTICE
+ * National Center for Biotechnology Information
+ *
+ * This software/database is a "United States Government Work" under the
+ * terms of the United States Copyright Act. It was written as part of
+ * the author's official duties as a United States Government employee and
+ * thus cannot be copyrighted. This software/database is freely available
+ * to the public for use. The National Library of Medicine and the U.S.
+ * Government have not placed any restriction on its use or reproduction.
+ *
+ * Although all reasonable efforts have been taken to ensure the accuracy
+ * and reliability of the software and data, the NLM and the U.S.
+ * Government do not and cannot warrant the performance or results that
+ * may be obtained by using this software or data. The NLM and the U.S.
+ * Government disclaim all warranties, express or implied, including
+ * warranties of performance, merchantability or fitness for any particular
+ * purpose.
+ *
+ * Please cite the author in any work or product based on this material.
+ *
+ * ===========================================================================
+ *
+ * Author: Ilya Dondoshansky
+ *
+ */
+
+/** @file hspstream_queue.c
+ * Implementation of the BlastHSPStream interface for producing BLAST results
+ * on the fly.
+ */
+
+static char const rcsid[] =
+ "$Id: hspstream_queue.c,v 1.2 2004/06/08 17:46:35 dondosha Exp $";
+
+
+#include <algo/blast/core/blast_hits.h>
+#include <algo/blast/api/hspstream_queue.h>
+#include <ncbithr.h>
+
+/** Default hit saving stream methods */
+
+static BlastHSPStream*
+BlastHSPListQueueFree(BlastHSPStream* hsp_stream)
+{
+ BlastHSPListQueueData* stream_data =
+ (BlastHSPListQueueData*) GetData(hsp_stream);
+ ListNode* node;
+
+ NlmSemaDestroy(stream_data->m_resultsSema);
+ NlmMutexDestroy(stream_data->m_resultsMutex);
+
+ for (node = stream_data->m_queueStart; node; node = node->next) {
+ node->ptr = (void*) Blast_HSPListFree((BlastHSPList*)node->ptr);
+ }
+ stream_data->m_queueStart = ListNodeFree(stream_data->m_queueStart);
+ sfree(stream_data);
+ sfree(hsp_stream);
+ return NULL;
+}
+
+static int
+BlastHSPListQueueRead(BlastHSPStream* hsp_stream,
+ BlastHSPList** hsp_list_out)
+{
+ BlastHSPListQueueData* stream_data =
+ (BlastHSPListQueueData*) GetData(hsp_stream);
+ int status = kBlastHSPStream_Error;
+
+ /* Lock the mutex */
+ NlmMutexLockEx(&stream_data->m_resultsMutex);
+
+ if (!stream_data->m_writingDone) {
+ while (!stream_data->m_writingDone && !stream_data->m_queueStart) {
+ /* Decrement the semaphore count to 0, then wait for it to be
+ * incremented. Note that mutex must be locked whenever the
+ * contents of the stream are checked, but it must be unlocked
+ * for the semaphore wait. */
+ NlmMutexUnlock(stream_data->m_resultsMutex);
+ NlmSemaWait(stream_data->m_resultsSema);
+ NlmMutexLockEx(&stream_data->m_resultsMutex);
+ }
+ }
+
+ if (!stream_data->m_queueStart) {
+ /* Nothing in the queue, but no more writing to the queue is expected. */
+ *hsp_list_out = NULL;
+ status = kBlastHSPStream_Eof;
+ } else {
+ ListNode* start_node = stream_data->m_queueStart;
+
+ *hsp_list_out = (BlastHSPList*) start_node->ptr;
+
+ stream_data->m_queueStart = start_node->next;
+ start_node->next = NULL;
+ ListNodeFree(start_node);
+ if (!stream_data->m_queueStart)
+ stream_data->m_queueEnd = NULL;
+ status = kBlastHSPStream_Success;
+ }
+
+ NlmMutexUnlock(stream_data->m_resultsMutex);
+
+ return status;
+}
+
+static int
+BlastHSPListQueueWrite(BlastHSPStream* hsp_stream,
+ BlastHSPList** hsp_list)
+{
+ BlastHSPListQueueData* stream_data =
+ (BlastHSPListQueueData*) GetData(hsp_stream);
+
+ /* If input is empty, don't do anything, but return success */
+ if (*hsp_list == NULL)
+ return kBlastHSPStream_Success;
+
+ /* If stream is closed for writing, return error */
+ if (stream_data->m_writingDone)
+ return kBlastHSPStream_Error;
+
+ NlmMutexLockEx(&stream_data->m_resultsMutex);
+ stream_data->m_queueEnd =
+ ListNodeAddPointer(&stream_data->m_queueEnd, 0, (void*)(*hsp_list));
+ if (!stream_data->m_queueStart)
+ stream_data->m_queueStart = stream_data->m_queueEnd;
+ /* Free the caller from this pointer's ownership. */
+ *hsp_list = NULL;
+ /* Increment the semaphore count. */
+ NlmSemaPost(stream_data->m_resultsSema);
+ NlmMutexUnlock(stream_data->m_resultsMutex);
+
+ return kBlastHSPStream_Success;
+}
+
+static void
+BlastHSPListQueueClose(BlastHSPStream* hsp_stream)
+{
+ BlastHSPListQueueData* stream_data =
+ (BlastHSPListQueueData*) GetData(hsp_stream);
+ NlmMutexLockEx(&stream_data->m_resultsMutex);
+ stream_data->m_writingDone = TRUE;
+ /* Increment the semaphore count so the reading thread can get out of
+ * the waiting state and check the m_writingDone variable. */
+ NlmSemaPost(stream_data->m_resultsSema);
+ NlmMutexUnlock(stream_data->m_resultsMutex);
+}
+
+static BlastHSPStream*
+BlastHSPListQueueNew(BlastHSPStream* hsp_stream, void* args)
+{
+ BlastHSPStreamFunctionPointerTypes fnptr;
+
+ fnptr.dtor = &BlastHSPListQueueFree;
+ SetMethod(hsp_stream, eDestructor, fnptr);
+ fnptr.method = &BlastHSPListQueueRead;
+ SetMethod(hsp_stream, eRead, fnptr);
+ fnptr.method = &BlastHSPListQueueWrite;
+ SetMethod(hsp_stream, eWrite, fnptr);
+ fnptr.closeFn = &BlastHSPListQueueClose;
+ SetMethod(hsp_stream, eClose, fnptr);
+
+ SetData(hsp_stream, args);
+ return hsp_stream;
+}
+
+BlastHSPStream* Blast_HSPListQueueInit()
+{
+ BlastHSPListQueueData* stream_data =
+ (BlastHSPListQueueData*) calloc(1, sizeof(BlastHSPListQueueData));
+ BlastHSPStreamNewInfo info;
+
+ /* At the start of the search there is nothing in the results queue, so
+ * initialize the semaphore count with 0. */
+ stream_data->m_resultsSema = NlmSemaInit(0);
+ info.constructor = &BlastHSPListQueueNew;
+ info.ctor_argument = (void*)stream_data;
+
+ return BlastHSPStreamNew(&info);
+}
diff --git a/algo/blast/api/hspstream_queue.h b/algo/blast/api/hspstream_queue.h
new file mode 100644
index 00000000..1b00b0e7
--- /dev/null
+++ b/algo/blast/api/hspstream_queue.h
@@ -0,0 +1,64 @@
+/* $Id: hspstream_queue.h,v 1.2 2004/06/08 17:46:35 dondosha Exp $
+ * ===========================================================================
+ *
+ * PUBLIC DOMAIN NOTICE
+ * National Center for Biotechnology Information
+ *
+ * This software/database is a "United States Government Work" under the
+ * terms of the United States Copyright Act. It was written as part of
+ * the author's official duties as a United States Government employee and
+ * thus cannot be copyrighted. This software/database is freely available
+ * to the public for use. The National Library of Medicine and the U.S.
+ * Government have not placed any restriction on its use or reproduction.
+ *
+ * Although all reasonable efforts have been taken to ensure the accuracy
+ * and reliability of the software and data, the NLM and the U.S.
+ * Government do not and cannot warrant the performance or results that
+ * may be obtained by using this software or data. The NLM and the U.S.
+ * Government disclaim all warranties, express or implied, including
+ * warranties of performance, merchantability or fitness for any particular
+ * purpose.
+ *
+ * Please cite the author in any work or product based on this material.
+ *
+ * ===========================================================================
+ *
+ * Author: Ilya Dondoshansky
+ *
+ */
+
+/** @file hspstream_queue.h
+ * Implementation of the BlastHSPStream interface for producing results on the
+ * fly.
+ */
+
+#ifndef HSPSTREAM_QUEUE_H
+#define HSPSTREAM_QUEUE_H
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include <ncbithr.h>
+#include <algo/blast/core/blast_options.h>
+#include <algo/blast/core/blast_hits.h>
+#include <algo/blast/core/blast_seqsrc.h>
+#include <algo/blast/core/blast_hspstream.h>
+
+/** Data structure for the queue implementation of BlastHSPStream */
+typedef struct BlastHSPListQueueData {
+ ListNode* m_queueStart;
+ ListNode* m_queueEnd;
+ Boolean m_writingDone;
+ TNlmMutex m_resultsMutex;
+ TNlmSemaphore m_resultsSema;
+} BlastHSPListQueueData;
+
+/** Function to initialize the queue implementation of BlastHSPStream */
+BlastHSPStream* Blast_HSPListQueueInit(void);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* HSPSTREAM_QUEUE_H */
diff --git a/algo/blast/api/multiseq_src.c b/algo/blast/api/multiseq_src.c
index 31018649..c8e57990 100644
--- a/algo/blast/api/multiseq_src.c
+++ b/algo/blast/api/multiseq_src.c
@@ -1,4 +1,4 @@
-/* $Id: multiseq_src.c,v 1.5 2004/04/28 19:50:02 dondosha Exp $
+/* $Id: multiseq_src.c,v 1.6 2004/06/08 17:46:35 dondosha Exp $
* ===========================================================================
*
* PUBLIC DOMAIN NOTICE
@@ -31,7 +31,7 @@
*
*/
-static char const rcsid[] = "$Id: multiseq_src.c,v 1.5 2004/04/28 19:50:02 dondosha Exp $";
+static char const rcsid[] = "$Id: multiseq_src.c,v 1.6 2004/06/08 17:46:35 dondosha Exp $";
#include <algo/blast/api/multiseq_src.h>
#include <algo/blast/core/blast_util.h>
@@ -57,7 +57,7 @@ static MultiSeqInfo* MultiSeqInfoNew(const SeqLoc* seqloc_list, Uint1 program)
index < num_seqs; ++index, seqloc_ptr = seqloc_ptr->next) {
retval->seqloc_array[index] = seqloc_ptr;
BLAST_SetUpSubject(program, seqloc_ptr, &retval->seqblk_array[index]);
- max_length = MAX(max_length, retval->seqblk_array[index]->length);
+ max_length = MAX(max_length, (Uint4)retval->seqblk_array[index]->length);
}
retval->max_length = max_length;
diff --git a/algo/blast/api/seqsrc_readdb.c b/algo/blast/api/seqsrc_readdb.c
index 1688c3cf..935dcb55 100644
--- a/algo/blast/api/seqsrc_readdb.c
+++ b/algo/blast/api/seqsrc_readdb.c
@@ -1,4 +1,4 @@
-/* $Id: seqsrc_readdb.c,v 1.27 2004/04/28 19:39:01 dondosha Exp $
+/* $Id: seqsrc_readdb.c,v 1.29 2004/06/07 17:14:58 dondosha Exp $
* ===========================================================================
*
* PUBLIC DOMAIN NOTICE
@@ -30,7 +30,7 @@
*
*/
-static char const rcsid[] = "$Id: seqsrc_readdb.c,v 1.27 2004/04/28 19:39:01 dondosha Exp $";
+static char const rcsid[] = "$Id: seqsrc_readdb.c,v 1.29 2004/06/07 17:14:58 dondosha Exp $";
#include <algo/blast/api/seqsrc_readdb.h>
#include <algo/blast/core/blast_def.h>
@@ -197,8 +197,10 @@ static Int2 ReaddbRetSequence(void* readdb_handle, void* args)
ASSERT(readdb_args);
- if (readdb_args->seq->sequence_start_allocated)
+ if (readdb_args->seq->sequence_start_allocated) {
sfree(readdb_args->seq->sequence_start);
+ readdb_args->seq->sequence_start_allocated = FALSE;
+ }
return 0;
}
@@ -215,21 +217,26 @@ static char* ReaddbGetSeqIdStr(void* readdb_handle, void* args)
ReadDBFILEPtr rdfp = (ReadDBFILEPtr) readdb_handle;
Int4* oid = (Int4*) args;
SeqIdPtr sip = NULL;
+ char* descr = NULL;
char *seqid_str = NULL;
if (!rdfp || !oid)
return NULL;
- if ( !(seqid_str = (char*) malloc(sizeof(char)*SEQIDLEN_MAX)))
- return NULL;
-
- if (!readdb_get_descriptor(rdfp, *oid, &sip, NULL)) {
+ if (!readdb_get_descriptor(rdfp, *oid, &sip, &descr)) {
sfree(seqid_str);
return NULL;
}
- SeqIdWrite(sip, seqid_str, PRINTID_FASTA_LONG, SEQIDLEN_MAX-1);
-
+ if (sip->choice != SEQID_GENERAL ||
+ strcmp(((DbtagPtr)sip->data.ptrvalue)->db, "BL_ORD_ID")) {
+ if ( !(seqid_str = (char*) malloc(sizeof(char)*SEQIDLEN_MAX)))
+ return NULL;
+ SeqIdWrite(sip, seqid_str, PRINTID_FASTA_LONG, SEQIDLEN_MAX-1);
+ sfree(descr);
+ } else {
+ seqid_str = strtok(descr, " \t\n\r");
+ }
sip = SeqIdSetFree(sip);
return seqid_str;
@@ -445,6 +452,7 @@ BlastSeqSrc* ReaddbSeqSrcNew(BlastSeqSrc* retval, void* args)
/* Initialize the BlastSeqSrc structure fields with user-defined function
* pointers and rdfp */
SetDeleteFnPtr(retval, &ReaddbSeqSrcFree);
+ SetCopyFnPtr(retval, &ReaddbSeqSrcCopy);
SetDataStructure(retval, (void*) rdfp);
SetGetNumSeqs(retval, &ReaddbGetNumSeqs);
SetGetMaxSeqLen(retval, &ReaddbGetMaxLength);
@@ -478,9 +486,11 @@ BlastSeqSrc* ReaddbSeqSrcNew(BlastSeqSrc* retval, void* args)
while (rdfp && rdfp->stop < rargs->final_db_seq)
rdfp = rdfp->next;
/* Set last sequence for this and all subsequent rdfp's to the one
- in the arguments, making the subsequent rdfp's ranges empty. */
+ in the arguments, making the subsequent rdfp's ranges empty.
+ Note that final_db_seq in arguments is 1 beyond the last sequence
+ number to search. */
for ( ; rdfp; rdfp = rdfp->next)
- rdfp->stop = rargs->final_db_seq;
+ rdfp->stop = rargs->final_db_seq - 1;
}
return retval;
@@ -495,6 +505,20 @@ BlastSeqSrc* ReaddbSeqSrcFree(BlastSeqSrc* bssp)
return NULL;
}
+BlastSeqSrc* ReaddbSeqSrcCopy(BlastSeqSrc* bssp)
+{
+ ReadDBFILE* rdfp = NULL;
+
+ if (!bssp)
+ return NULL;
+
+ rdfp = readdb_attach((ReadDBFILEPtr)GetDataStructure(bssp));
+
+ SetDataStructure(bssp, (void*) rdfp);
+
+ return bssp;
+}
+
BlastSeqSrc*
ReaddbBlastSeqSrcInit(const char* dbname, Boolean is_prot, int first_seq,
int last_seq, void* extra_arg)
diff --git a/algo/blast/api/seqsrc_readdb.h b/algo/blast/api/seqsrc_readdb.h
index 0121a380..482222af 100644
--- a/algo/blast/api/seqsrc_readdb.h
+++ b/algo/blast/api/seqsrc_readdb.h
@@ -1,4 +1,4 @@
-/* $Id: seqsrc_readdb.h,v 1.9 2004/02/18 19:38:20 dondosha Exp $
+/* $Id: seqsrc_readdb.h,v 1.10 2004/06/07 17:15:18 dondosha Exp $
* ===========================================================================
*
* PUBLIC DOMAIN NOTICE
@@ -69,6 +69,13 @@ BlastSeqSrc* ReaddbSeqSrcNew(BlastSeqSrc* bssp, void* args);
*/
BlastSeqSrc* ReaddbSeqSrcFree(BlastSeqSrc* bssp);
+/** Readdb sequence source copier:
+ * creates a new copy of the ReadDBFILE structure by calling readdb_attach.
+ * @param bssp BlastSeqSrc structure to copy [in]
+ * @return New BlastSeqSrc structure
+ */
+BlastSeqSrc* ReaddbSeqSrcCopy(BlastSeqSrc* bssp);
+
/** Initialize the sequence source structure.
* @param dbname BLAST database name [in]
* @param is_prot Is this a protein or nucleotide database? [in]
diff --git a/algo/blast/api/twoseq_api.c b/algo/blast/api/twoseq_api.c
index 9f3294ab..1a866bae 100644
--- a/algo/blast/api/twoseq_api.c
+++ b/algo/blast/api/twoseq_api.c
@@ -1,4 +1,4 @@
-/* $Id: twoseq_api.c,v 1.6 2004/05/05 15:30:13 dondosha Exp $
+/* $Id: twoseq_api.c,v 1.13 2004/06/08 17:47:24 dondosha Exp $
***************************************************************************
* *
* COPYRIGHT NOTICE *
@@ -40,10 +40,15 @@
#include <algo/blast/core/blast_util.h>
#include <algo/blast/core/blast_engine.h>
#include <algo/blast/core/mb_lookup.h>
+#include <algo/blast/core/blast_filter.h>
+#include <algo/blast/core/hspstream_collector.h>
#include <algo/blast/api/multiseq_src.h>
#include <algo/blast/api/blast_seqalign.h>
#include <algo/blast/api/blast_seq.h>
#include <algo/blast/api/twoseq_api.h>
+#include <algo/blast/api/blast_returns.h>
+/* For the AdjustOffSetsInSeqalign function */
+#include <sequtil.h>
Int2 BLAST_SummaryOptionsInit(BLAST_SummaryOptions **options)
{
@@ -64,6 +69,9 @@ Int2 BLAST_SummaryOptionsInit(BLAST_SummaryOptions **options)
new_options->gapped_calculation = TRUE;
new_options->nucleotide_match = 1;
new_options->nucleotide_mismatch = -3;
+ new_options->word_threshold = 0;
+ new_options->init_seed_method = eDefaultSeedType;
+
*options = new_options;
return 0;
}
@@ -87,7 +95,7 @@ BLAST_FillOptions(Uint1 program_number,
BlastEffectiveLengthsOptions* eff_len_options,
PSIBlastOptions* psi_options,
BlastDatabaseOptions* db_options,
- BlastSeqSrc* seq_src, Int4 query_length,
+ Int4 query_length,
Int4 subject_length, RPSInfo *rps_info)
{
Boolean do_megablast = FALSE;
@@ -107,11 +115,12 @@ BLAST_FillOptions(Uint1 program_number,
/* If one of the sequences is large enough,
set up a megablast search */
- if (query_length > MEGABLAST_CUTOFF ||
+ if (basic_options->hint == eFast ||
+ query_length > MEGABLAST_CUTOFF ||
subject_length > MEGABLAST_CUTOFF) {
do_megablast = TRUE;
if (basic_options->gapped_calculation)
- greedy_align = TRUE; /* one-pass, no ungapped */
+ greedy_align = 1; /* one-pass, no ungapped */
}
/* For a megablast search or a blastn search with
@@ -122,20 +131,21 @@ BLAST_FillOptions(Uint1 program_number,
/* If megablast was turned on but the input indicates
a sensitive search is desired, switch to discontiguous
- megablast (hardwired to the 12-of-21 optimal template) */
+ megablast (hardwired to the 11-of-21 optimal template) */
- if (word_size == 0 && do_megablast &&
- basic_options->hint == eSensitive) {
- word_size = 12;
+ if (do_megablast && basic_options->hint == eSensitive) {
+ if (word_size == 0)
+ word_size = 11;
do_discontig = TRUE;
do_ag_blast = FALSE;
}
}
+
BLAST_FillLookupTableOptions(lookup_options,
program_number,
do_megablast,
- 0, /* default threshold */
+ basic_options->word_threshold,
word_size,
do_ag_blast,
0, /* no variable wordsize */
@@ -158,24 +168,27 @@ BLAST_FillOptions(Uint1 program_number,
BLAST_FillInitialWordOptions(word_options,
program_number,
- greedy_align,
+ (Boolean)greedy_align,
0, /* default for ungapped extensions */
0, /* no variable wordsize */
do_ag_blast,
do_megablast,
0); /* default ungapped X dropoff */
+ /* If we need to enforce a single-hit method, reset window size to 0.
+ To enforce two-hit method, set window size to a default non-zero
+ value */
+ if (basic_options->init_seed_method == eOneHit)
+ word_options->window_size = 0;
+ else if (basic_options->init_seed_method == eTwoHits)
+ word_options->window_size = BLAST_WINDOW_SIZE_PROT;
+
BLAST_FillExtensionOptions(ext_options,
program_number,
greedy_align,
- 0, /* default gapped X dropoff */
+ basic_options->gap_x_dropoff,
0); /* default final X dropoff */
- if (greedy_align == 1) {
- ext_options->algorithm_type = EXTEND_GREEDY;
- word_options->ungapped_extension = FALSE;
- }
-
if (basic_options->matrix == NULL)
matrix = "BLOSUM62";
else
@@ -183,12 +196,12 @@ BLAST_FillOptions(Uint1 program_number,
BLAST_FillScoringOptions(score_options,
program_number,
- greedy_align,
+ (Boolean)greedy_align,
basic_options->nucleotide_mismatch,
basic_options->nucleotide_match,
matrix,
- 0, /* default gap open penalty */
- 0); /* default gap extend penalty */
+ basic_options->gap_open,
+ basic_options->gap_extend);
score_options->gapped_calculation = basic_options->gapped_calculation;
@@ -197,23 +210,95 @@ BLAST_FillOptions(Uint1 program_number,
0); /* default number of alignments saved */
hit_options->percent_identity = 0; /* no percent identity cutoff */
-
- BLAST_FillEffectiveLengthsOptions(eff_len_options,
- 1, /* one sequence */
- subject_length, /* this long */
- 0); /* default search space */
+
+ eff_len_options->db_length = basic_options->db_length;
+
return 0;
}
Int2
-BLAST_TwoSequencesSearch(const BLAST_SummaryOptions *basic_options,
- BioseqPtr bsp1, BioseqPtr bsp2, SeqAlign **seqalign_out)
+BLAST_TwoSequencesSearch(BLAST_SummaryOptions *basic_options,
+ BioseqPtr bsp1, BioseqPtr bsp2,
+ SeqAlign **seqalign_out)
{
+ Uint1 program_type = blast_type_undefined;
SeqLocPtr query_slp = NULL; /* sequence variables */
SeqLocPtr subject_slp = NULL;
+ Boolean seq1_is_aa, seq2_is_aa;
+ Int2 status = 0;
+
+ /* sanity checks */
+
+ *seqalign_out = NULL;
+ if (bsp1 == NULL || bsp2 == NULL)
+ return 0;
+
+ seq1_is_aa = ISA_aa(bsp1->mol);
+ seq2_is_aa = ISA_aa(bsp2->mol);
+
+ /* Find program type consistent with the sequences. */
+ if (!seq1_is_aa && !seq2_is_aa) {
+ if (basic_options->program == eTblastx)
+ program_type = blast_type_tblastx;
+ else
+ program_type = blast_type_blastn;
+ } else if (seq1_is_aa && seq2_is_aa) {
+ program_type = blast_type_blastp;
+ } else if (!seq1_is_aa && seq2_is_aa) {
+ program_type = blast_type_blastx;
+ } else if (seq1_is_aa && !seq2_is_aa) {
+ program_type = blast_type_tblastn;
+ }
+
+ /* Check if program type in options is consistent with the one determined
+ from sequences. */
+ if (basic_options->program == eChoose)
+ basic_options->program = program_type;
+ else if (basic_options->program != program_type)
+ return -1;
+
+ /* Convert the bioseqs into seqlocs. */
+
+ ValNodeAddPointer(&query_slp, SEQLOC_WHOLE,
+ SeqIdDup(SeqIdFindBest(bsp1->id, SEQID_GI)));
+ if (!query_slp)
+ return -1;
+ ValNodeAddPointer(&subject_slp, SEQLOC_WHOLE,
+ SeqIdDup(SeqIdFindBest(bsp2->id, SEQID_GI)));
+ if (!subject_slp)
+ return -1;
+
+ status = BLAST_TwoSeqLocSets(basic_options, query_slp, subject_slp,
+ seqalign_out, NULL, NULL);
+ SeqLocFree(query_slp);
+ SeqLocFree(subject_slp);
+
+ return status;
+}
+
+static Int4 SeqLocListLen(SeqLoc* seqloc)
+{
+ Int4 length = 0;
+
+ for ( ; seqloc; seqloc = seqloc->next)
+ length += SeqLocLen(seqloc);
+
+ return length;
+}
+
+/** Compares one list of SeqLoc's against another list of SeqLoc's using the
+ * BLAST algorithm.
+ */
+Int2
+BLAST_TwoSeqLocSets(const BLAST_SummaryOptions *basic_options,
+ SeqLoc* query_seqloc, SeqLoc* subject_seqloc,
+ SeqAlign **seqalign_out,
+ SeqLoc** filter_out,
+ BLAST_SummaryReturn* *extra_returns)
+{
+ Uint1 program_type;
BlastSeqSrc *seq_src = NULL;
BLAST_SequenceBlk *query = NULL;
- Uint1 program_type;
BlastQueryInfo* query_info = NULL;
ListNode* lookup_segments = NULL; /* query filtering structures */
@@ -229,39 +314,31 @@ BLAST_TwoSequencesSearch(const BLAST_SummaryOptions *basic_options,
BlastEffectiveLengthsOptions* eff_len_options = NULL;
BlastScoreBlk* sbp = NULL;
BlastHSPResults* results = NULL;
- BlastReturnStat* return_stats = NULL;
+ BlastDiagnostics* diagnostics = NULL;
PSIBlastOptions* psi_options = NULL;
BlastDatabaseOptions* db_options = NULL;
RPSInfo *rps_info = NULL;
-
- Int2 status;
-
- /* sanity checks */
-
- *seqalign_out = NULL;
- if (bsp1 == NULL || bsp2 == NULL)
- return 0;
-
- if (bsp1->mol != bsp2->mol)
- return 0;
-
- /* decide which blast program to execute */
+ Int2 status = 0;
+ BlastHSPStream* hsp_stream;
switch(basic_options->program) {
- case eBlastp:
- program_type = blast_type_blastp;
- break;
case eBlastn:
- program_type = blast_type_blastn;
- break;
- case eChoose:
- if (ISA_aa(bsp1->mol))
- program_type = blast_type_blastp;
- else
- program_type = blast_type_blastn;
- break;
+ program_type = blast_type_blastn;
+ break;
+ case eBlastp:
+ program_type = blast_type_blastp;
+ break;
+ case eBlastx:
+ program_type = blast_type_blastx;
+ break;
+ case eTblastn:
+ program_type = blast_type_tblastn;
+ break;
+ case eTblastx:
+ program_type = blast_type_tblastx;
+ break;
default:
- return -1;
+ return -1;
}
/* fill in the engine-specific options */
@@ -273,36 +350,35 @@ BLAST_TwoSequencesSearch(const BLAST_SummaryOptions *basic_options,
if (status != 0)
goto bail_out;
+ if (program_type == blast_type_tblastn ||
+ program_type == blast_type_tblastx) {
+ if ((status = BLAST_GeneticCodeFind(db_options->genetic_code,
+ &db_options->gen_code_string)))
+ return status;
+ }
+
+ seq_src = MultiSeqSrcInit(subject_seqloc, program_type);
+ if (seq_src == NULL)
+ goto bail_out;
+
status = BLAST_FillOptions(program_type, basic_options,
lookup_options, query_options, word_options,
ext_options, hit_options, score_options,
eff_len_options, psi_options, db_options,
- NULL, bsp1->length, bsp2->length, rps_info);
+ SeqLocListLen(query_seqloc),
+ BLASTSeqSrcGetMaxSeqLen(seq_src),
+ rps_info);
if (status != 0)
goto bail_out;
- /* Convert the bioseqs into seqlocs. */
-
- ValNodeAddPointer(&query_slp, SEQLOC_WHOLE,
- SeqIdDup(SeqIdFindBest(bsp1->id, SEQID_GI)));
- ValNodeAddPointer(&subject_slp, SEQLOC_WHOLE,
- SeqIdDup(SeqIdFindBest(bsp2->id, SEQID_GI)));
- if (query_slp == NULL || subject_slp == NULL) {
- status = -1;
- goto bail_out;
- }
/* convert the seqlocs into SequenceBlks, and fill in query_info */
- status = BLAST_SetUpQuery(program_type, query_slp, query_options,
+ status = BLAST_SetUpQuery(program_type, query_seqloc, query_options,
&query_info, &query);
if (status != 0)
goto bail_out;
- seq_src = MultiSeqSrcInit(subject_slp, program_type);
- if (seq_src == NULL)
- goto bail_out;
-
/* perform final setup */
status = BLAST_ValidateOptions(program_type, ext_options,
@@ -311,37 +387,58 @@ BLAST_TwoSequencesSearch(const BLAST_SummaryOptions *basic_options,
goto bail_out;
status = BLAST_MainSetUp(program_type, query_options, score_options,
- hit_options, query, query_info, &lookup_segments,
+ hit_options, query, query_info, 1.0, &lookup_segments,
&filter_loc, &sbp, NULL);
if (status != 0)
goto bail_out;
- return_stats = (BlastReturnStat*) calloc(1, sizeof(BlastReturnStat));
- if (return_stats == NULL)
- goto bail_out;
+ if (extra_returns) {
+ if ((diagnostics = Blast_DiagnosticsInit()) == NULL)
+ goto bail_out;
+ }
- Blast_HSPResultsInit(query_info->num_queries, &results);
LookupTableWrapInit(query, lookup_options,
lookup_segments, sbp, &lookup_wrap, NULL);
+ /* Initialize the HSPList collector stream. Results should not be sorted
+ before reading from it. */
+ hsp_stream =
+ Blast_HSPListCollectorInit(program_type, hit_options,
+ query_info->num_queries, FALSE);
/* finally, do the search */
status = BLAST_SearchEngine(program_type, query, query_info,
seq_src, sbp, score_options, lookup_wrap, word_options,
ext_options, hit_options, eff_len_options,
- psi_options, db_options, results, return_stats);
+ psi_options, db_options, hsp_stream, diagnostics, &results);
+
+ hsp_stream = BlastHSPStreamFree(hsp_stream);
if (status != 0)
goto bail_out;
/* Convert results to the SeqAlign form */
- status = BLAST_ResultsToSeqAlign(program_type, results, query_slp, NULL,
- subject_slp, score_options, sbp, score_options->gapped_calculation,
- seqalign_out);
+ status = BLAST_ResultsToSeqAlign(program_type, results, query_seqloc,
+ seq_src, score_options->gapped_calculation,
+ score_options->is_ooframe, seqalign_out);
bail_out:
- if (return_stats)
- sfree(return_stats);
+ AdjustOffSetsInSeqAlign(*seqalign_out, query_seqloc, subject_seqloc);
+
+ if (!status && extra_returns) {
+ Blast_SummaryReturnFill(program_type, score_options, sbp,
+ lookup_options, word_options, ext_options,
+ hit_options, eff_len_options, query_info,
+ seq_src, diagnostics, extra_returns);
+ }
+
+ if (filter_out) {
+ *filter_out =
+ BlastMaskLocToSeqLoc(program_type, filter_loc, query_seqloc);
+ }
+
+ Blast_DiagnosticsFree(diagnostics);
+ BlastMaskLocFree(filter_loc);
BlastSeqSrcFree(seq_src);
LookupTableWrapFree(lookup_wrap);
ListNodeFreeData(lookup_segments);
@@ -358,8 +455,9 @@ bail_out:
BlastEffectiveLengthsOptionsFree(eff_len_options);
PSIBlastOptionsFree(psi_options);
BlastDatabaseOptionsFree(db_options);
- SeqLocFree(query_slp);
- SeqLocFree(subject_slp);
return status;
}
+
+
+
diff --git a/algo/blast/api/twoseq_api.h b/algo/blast/api/twoseq_api.h
index f449b699..4ba5f768 100644
--- a/algo/blast/api/twoseq_api.h
+++ b/algo/blast/api/twoseq_api.h
@@ -1,4 +1,4 @@
-/* $Id: twoseq_api.h,v 1.2 2004/03/24 19:14:21 papadopo Exp $
+/* $Id: twoseq_api.h,v 1.3 2004/05/14 17:24:03 dondosha Exp $
***************************************************************************
* *
* COPYRIGHT NOTICE *
@@ -52,9 +52,13 @@
* megablast with word size 12 is used.
*/
enum blast_type {
- eChoose = 0, /**< blastn for nuc. sequences, blastp otherwise */
+ eChoose = 0, /**< Choose type of search by sequences molecule type:
+ n-n=blastn, p-p=blastp, n-p=blastx, p-n=tblastn */
eBlastn = 1, /**< blastn or megablast (determined automatically) */
- eBlastp = 2 /**< blastp search on two protein sequences */
+ eBlastp = 2, /**< blastp search between protein sequences */
+ eBlastx = 3, /**< blastx for nucleotide vs protein sequences */
+ eTblastn = 4, /**< tblastn for protein vs nucleotide sequences */
+ eTblastx = 5 /**< tblastx for translated nucleotide sequences */
};
/**
@@ -66,6 +70,14 @@ enum blast_hint {
eFast = 1 /**< trade off sensitivity for speed */
};
+typedef enum seed_type {
+ eDefaultSeedType = 0, /**< BLAST will decide which method to use based on
+ program and other information. */
+ eOneHit = 1, /**< Require only one initial hit for extension */
+ eTwoHits = 2 /**< Require more than one hit within a window
+ for extension */
+} seed_type;
+
/**
* The main user-visible setup structure for the API. This
* only makes a (small) subset of the complete options available
@@ -99,8 +111,43 @@ typedef struct {
for matching letters (default 1) */
Int4 nucleotide_mismatch; /**< For nucleotide searches, the penalty
for mismatching letters (default -3) */
+ Int4 gap_open; /**< Cost of opening a gap. Default=0, invokes
+ default values: 5 for nucleotide;
+ depends on matrix for protein search.*/
+ Int4 gap_extend; /**< Cost of extending a gap. Default=0,
+ invokes default values: 2 for nucleotide;
+ depends on matrix for protein search.*/
+ Int4 gap_x_dropoff; /**< Dropoff value for the gapped extension.
+ Default=0, invokes default values. */
+ double db_length; /**< Database length to use in statistical
+ calculations.
+ Default=0 means "database length" is set
+ to the subject sequence length for each
+ subject sequence. */
+ Int4 word_threshold; /**< Threshold for finding neighboring words
+ in protein searches. */
+ seed_type init_seed_method; /**< Single-hit or multiple-hit choice of
+ initial seeds for extension. */
} BLAST_SummaryOptions;
+/** Small structure containing the just those Karlin-Altschul parameters needed
+ * for the BLAST formatting */
+typedef struct BLAST_KAParameters {
+ double Lambda;
+ double K;
+ double H;
+} BLAST_KAParameters;
+
+/** Structure holding all calculated data returned from a BLAST search other
+ * than the alignment.
+ */
+typedef struct BLAST_SummaryReturn {
+ BLAST_KAParameters* ka_params; /**< Ungapped Karlin-Altschul parameters */
+ BLAST_KAParameters* ka_params_gap;/**< Gapped Karlin-Altschul parameters */
+ char* params_buffer; /**< Buffer holding the bottom of BLAST report. */
+} BLAST_SummaryReturn;
+
+
/**
* Allocate storage for an API setup structure and set the
* default options for it.
@@ -129,9 +176,31 @@ BLAST_SummaryOptions* BLAST_SummaryOptionsFree(BLAST_SummaryOptions *options);
* If search failed or no alignments were found, set to NULL [out]
* @return 0 for a successful search, nonzero if search failed
*/
-Int2 BLAST_TwoSequencesSearch(const BLAST_SummaryOptions *options,
+Int2 BLAST_TwoSequencesSearch(BLAST_SummaryOptions *options,
Bioseq *bsp1,
Bioseq *bsp2,
SeqAlign **seqalign_out);
+/**
+ * Perform a BLAST search on the two input sequences and return
+ * the list of alignments the search generates
+ * @param options Structure describing how the search will be configured [in]
+ * @param seqloc1 The first list of sequences (queries) to be compared.
+ * Filtering is applied only to these sequences [in]
+ * @param seqloc2 The second list of sequences (subjects) to be compared [in]
+ * @param seqalign_out The list of alignments generated by the search.
+ * Alignments are sorted by query; then by subject among
+ * same query alignments.
+ * If search failed or no alignments were found,
+ * set to NULL [out]
+ * @param filter_out Masking locations [out]
+ * @param extra_returns Data needed to print the bottom of BLAST report [out]
+ * @return 0 for a successful search, nonzero if search failed
+ */
+Int2 BLAST_TwoSeqLocSets(const BLAST_SummaryOptions *options,
+ SeqLoc* seqloc1, SeqLoc* seqloc2,
+ SeqAlign **seqalign_out,
+ SeqLoc** filter_out,
+ BLAST_SummaryReturn* *extra_returns);
+
#endif /* !_TWOSEQ_API_H_ */