summaryrefslogtreecommitdiff
path: root/algo
diff options
context:
space:
mode:
authorAaron M. Ucko <ucko@debian.org>2009-08-11 17:36:45 +0000
committerAaron M. Ucko <ucko@debian.org>2009-08-11 17:36:45 +0000
commitb4be7afc96f6bd0604ad7e6070c4baf8be8d808f (patch)
treecbb84659b8c016c34d5daf011b0248b85756d974 /algo
parentcb2452a815dd397299bc41d4ca4339883a4cf19e (diff)
[svn-upgrade] Integrating new upstream version, ncbi-tools6 (6.1.20090809)
Diffstat (limited to 'algo')
-rw-r--r--algo/blast/core/blast_extend.c13
-rw-r--r--algo/blast/core/blast_extend.h4
-rw-r--r--algo/blast/core/na_ungapped.c251
3 files changed, 152 insertions, 116 deletions
diff --git a/algo/blast/core/blast_extend.c b/algo/blast/core/blast_extend.c
index dcfd895c..6c09e454 100644
--- a/algo/blast/core/blast_extend.c
+++ b/algo/blast/core/blast_extend.c
@@ -1,4 +1,4 @@
-/* $Id: blast_extend.c,v 1.118 2009/01/05 16:54:38 kazimird Exp $
+/* $Id: blast_extend.c,v 1.119 2009/07/30 19:34:30 kazimird Exp $
* ===========================================================================
*
* PUBLIC DOMAIN NOTICE
@@ -30,7 +30,7 @@
#ifndef SKIP_DOXYGEN_PROCESSING
static char const rcsid[] =
- "$Id: blast_extend.c,v 1.118 2009/01/05 16:54:38 kazimird Exp $";
+ "$Id: blast_extend.c,v 1.119 2009/07/30 19:34:30 kazimird Exp $";
#endif /* SKIP_DOXYGEN_PROCESSING */
#include <algo/blast/core/blast_extend.h>
@@ -80,7 +80,7 @@ s_BlastDiagTableFree(BLAST_DiagTable* diag_table)
{
if (diag_table) {
sfree(diag_table->hit_level_array);
-
+ sfree(diag_table->hit_len_array);
sfree(diag_table);
}
return NULL;
@@ -106,6 +106,7 @@ static Int4 s_BlastDiagClear(BLAST_DiagTable * diag)
for (i = 0; i < n; i++) {
diag_struct_array[i].flag = 0;
diag_struct_array[i].last_hit = -diag->window;
+ if (diag->hit_len_array) diag->hit_len_array[i] = 0;
}
return 0;
}
@@ -148,6 +149,10 @@ Int2 BlastExtendWordNew(Uint4 query_length,
diag_table->hit_level_array = (DiagStruct *)
calloc(diag_table->diag_array_length, sizeof(DiagStruct));
+ if (word_params->options->window_size) {
+ diag_table->hit_len_array = (Uint1 *)
+ calloc(diag_table->diag_array_length, sizeof(Uint1));
+ }
if (!diag_table->hit_level_array) {
sfree(ewp);
return -1;
@@ -173,7 +178,7 @@ Blast_ExtendWordExit(Blast_ExtendWord * ewp, Int4 subject_length)
}
} else if (ewp->hash_table) {
if (ewp->hash_table->offset >= INT4_MAX / 4) {
- ewp->hash_table->occupancy = 1;
+ ewp->hash_table->occupancy = 1;
ewp->hash_table->offset = ewp->hash_table->window;
memset(ewp->hash_table->backbone, 0,
ewp->hash_table->num_buckets * sizeof(Int4));
diff --git a/algo/blast/core/blast_extend.h b/algo/blast/core/blast_extend.h
index 01f1061a..c627fad1 100644
--- a/algo/blast/core/blast_extend.h
+++ b/algo/blast/core/blast_extend.h
@@ -1,4 +1,4 @@
-/* $Id: blast_extend.h,v 1.53 2008/07/23 16:55:47 kazimird Exp $
+/* $Id: blast_extend.h,v 1.54 2009/07/30 19:34:30 kazimird Exp $
* ===========================================================================
*
* PUBLIC DOMAIN NOTICE
@@ -66,6 +66,7 @@ typedef struct DiagHashCell {
Int4 diag; /**< This hit's diagonal */
Int4 level : 31; /**< This hit's offset in the subject sequence */
Uint4 hit_saved : 1; /**< Whether or not this hit has been saved */
+ Int4 hit_len; /**< The length of last hit */
Uint4 next; /**< Offset of next element in the chain */
} DiagHashCell;
@@ -76,6 +77,7 @@ typedef struct DiagHashCell {
typedef struct BLAST_DiagTable {
DiagStruct* hit_level_array;/**< Array to hold latest hits and their
lengths for all diagonals */
+ Uint1* hit_len_array; /**< Array to hold the lengthof the latest hit */
Int4 diag_array_length; /**< Smallest power of 2 longer than query length */
Int4 diag_mask; /**< Used to mask off everything above
min_diag_length (mask = min_diag_length-1). */
diff --git a/algo/blast/core/na_ungapped.c b/algo/blast/core/na_ungapped.c
index e9b63197..308fc13e 100644
--- a/algo/blast/core/na_ungapped.c
+++ b/algo/blast/core/na_ungapped.c
@@ -1,4 +1,4 @@
-/* $Id: na_ungapped.c,v 1.20 2009/06/22 13:54:32 kazimird Exp $
+/* $Id: na_ungapped.c,v 1.21 2009/07/30 19:34:30 kazimird Exp $
* ===========================================================================
*
* PUBLIC DOMAIN NOTICE
@@ -30,7 +30,7 @@
#ifndef SKIP_DOXYGEN_PROCESSING
static char const rcsid[] =
- "$Id: na_ungapped.c,v 1.20 2009/06/22 13:54:32 kazimird Exp $";
+ "$Id: na_ungapped.c,v 1.21 2009/07/30 19:34:30 kazimird Exp $";
#endif /* SKIP_DOXYGEN_PROCESSING */
#include <algo/blast/core/na_ungapped.h>
@@ -251,10 +251,12 @@ s_NuclUngappedExtend(BLAST_SequenceBlk * query,
* @param diag The diagonal to be retrieved [in]
* @param level The offset of the last hit on the specified diagonal [out]
* @param hit_saved Whether or not the last hit on the specified diagonal was saved [out]
+ * @param hit_length length of the last hit on the specified diagonal [out]
* @return 1 if successful, 0 if no hit was found on the specified diagonal.
*/
static NCBI_INLINE Int4 s_BlastDiagHashRetrieve(BLAST_DiagHash * table,
Int4 diag, Int4 * level,
+ Int4 * hit_len,
Int4 * hit_saved)
{
/* see http://lxr.linux.no/source/include/linux/hash.h */
@@ -265,6 +267,7 @@ static NCBI_INLINE Int4 s_BlastDiagHashRetrieve(BLAST_DiagHash * table,
while (index) {
if (table->chain[index].diag == diag) {
*level = table->chain[index].level;
+ *hit_len = table->chain[index].hit_len;
*hit_saved = table->chain[index].hit_saved;
return 1;
}
@@ -280,40 +283,38 @@ static NCBI_INLINE Int4 s_BlastDiagHashRetrieve(BLAST_DiagHash * table,
* @param table The hash table [in]
* @param diag The diagonal to be stored [in]
* @param level The offset of the hit to be stored [in]
+ * @param len The length of the hit to be stored [in]
* @param hit_saved Whether or not this hit was stored [in]
* @param s_end Needed to clean up defunct entries [in]
* @param window_size Needed to clean up defunct entries [in]
- * @param min_step Needed to clean up defunct entries [in]
- * @param two_hits Needed to clean up defunct entries [in]
* @return 1 if successful, 0 if memory allocation failed.
*/
static NCBI_INLINE Int4 s_BlastDiagHashInsert(BLAST_DiagHash * table,
Int4 diag, Int4 level,
+ Int4 len,
Int4 hit_saved,
- Int4 s_end,
- Int4 window_size,
- Int4 min_step,
- Int4 two_hits)
+ Int4 s_off,
+ Int4 window_size)
{
Uint4 bucket = ((Uint4) diag * 0x9E370001) % DIAGHASH_NUM_BUCKETS;
Uint4 index = table->backbone[bucket];
+ DiagHashCell *cell = NULL;
while (index) {
/* if we find what we're looking for, save into it */
if (table->chain[index].diag == diag) {
table->chain[index].level = level;
+ table->chain[index].hit_len = len;
table->chain[index].hit_saved = hit_saved;
return 1;
}
/* otherwise, if this hit is stale, save into it. */
else {
- Int4 step = s_end - table->chain[index].level;
/* if this hit is stale, save into it. */
- if (!
- (step <= (Int4) min_step
- || (two_hits && step <= window_size))) {
+ if ( s_off - table->chain[index].level > window_size) {
table->chain[index].diag = diag;
table->chain[index].level = level;
+ table->chain[index].hit_len = len;
table->chain[index].hit_saved = hit_saved;
return 1;
}
@@ -324,7 +325,6 @@ static NCBI_INLINE Int4 s_BlastDiagHashInsert(BLAST_DiagHash * table,
/* if we got this far, we were unable to replace any existing entries. */
/* if there's no more room, allocate more */
-
if (table->occupancy == table->capacity) {
table->capacity *= 2;
table->chain =
@@ -333,15 +333,14 @@ static NCBI_INLINE Int4 s_BlastDiagHashInsert(BLAST_DiagHash * table,
return 0;
}
- {
- DiagHashCell *cell = table->chain + table->occupancy;
- cell->diag = diag;
- cell->level = level;
- cell->hit_saved = hit_saved;
- cell->next = table->backbone[bucket];
- table->backbone[bucket] = table->occupancy;
- table->occupancy++;
- }
+ cell = table->chain + table->occupancy;
+ cell->diag = diag;
+ cell->level = level;
+ cell->hit_len = len;
+ cell->hit_saved = hit_saved;
+ cell->next = table->backbone[bucket];
+ table->backbone[bucket] = table->occupancy;
+ table->occupancy++;
return 1;
}
@@ -421,6 +420,7 @@ s_BlastnDiagTableExtendInitialHit(BLAST_SequenceBlk * query,
{
Int4 diag, real_diag;
Int4 s_end, s_off_pos, s_end_pos;
+ Int4 ext_right = 0;
BlastUngappedData *ungapped_data;
BlastUngappedData dummy_ungapped_data;
Int4 window_size = word_params->options->window_size;
@@ -429,6 +429,8 @@ s_BlastnDiagTableExtendInitialHit(BLAST_SequenceBlk * query,
DiagStruct *hit_level_array;
BlastUngappedCutoffs *cutoffs = NULL;
Boolean two_hits = (window_size > 0);
+ Boolean found = FALSE;
+ Int4 Delta = MIN(5, window_size - word_length);
hit_level_array = diag_table->hit_level_array;
ASSERT(hit_level_array);
@@ -441,41 +443,56 @@ s_BlastnDiagTableExtendInitialHit(BLAST_SequenceBlk * query,
s_off_pos = s_off + diag_table->offset;
s_end_pos = s_end + diag_table->offset;
- if (contiguous) {
- /* hit within the explored area should be rejected*/
- if (s_off_pos < last_hit) return 0;
+ /* hit within the explored area should be rejected*/
+ if (s_off_pos < last_hit) return 0;
- if (two_hits && (hit_saved || s_end_pos > last_hit + window_size )) {
- /* this must be the 1st hit */
- /* check to see if it can be extended to the right by
- word_length and therefore qualifies for a double-hit */
- Uint4 ext_right = s_BlastRightExtend(query, subject,
+ if (two_hits && (hit_saved || s_end_pos > last_hit + window_size )) {
+ /* check to see if it can be extended to the right by
+ word_length and therefore qualifies for a double-hit */
+ if (contiguous) {
+ ext_right = s_BlastRightExtend(query, subject,
q_off + word_length, s_end, query_info, word_length);
/* update the right end*/
s_end += ext_right;
s_end_pos += ext_right;
- if (ext_right < word_length) {
- /* if it is not a double hit, then it is a new hit */
+ }
+
+ if (ext_right < word_length) {
+ /* try off-diagonals */
+ Int4 orig_diag = real_diag + diag_table->diag_array_length;
+ Int4 s_a = s_off_pos + word_length - window_size;
+ Int4 s_b = s_end_pos - 2 * word_length;
+ Int4 delta;
+ if (Delta < 0) Delta = 0;
+ for (delta = 1; delta < Delta ; ++delta) {
+ Int4 off_diag = (orig_diag + delta) & diag_table->diag_mask;
+ Int4 off_s_end = hit_level_array[off_diag].last_hit;
+ Int4 off_s_l = diag_table->hit_len_array[off_diag];
+ if ( off_s_l
+ && off_s_end - delta >= s_a
+ && off_s_end - off_s_l <= s_b) {
+ found = TRUE;
+ break;
+ }
+ off_diag = (orig_diag - delta) & diag_table->diag_mask;
+ off_s_end = hit_level_array[off_diag].last_hit;
+ off_s_l = diag_table->hit_len_array[off_diag];
+ if ( off_s_l
+ && off_s_end >= s_a
+ && off_s_end - off_s_l + delta <= s_b) {
+ found = TRUE;
+ break;
+ }
+ }
+ if (!found) {
+ /* This is a new hit */
hit_ready = 0;
- last_hit = s_end_pos;
- hit_saved = 0;
}
}
- } else {
- /* hit within the explored area should be rejected*/
- if (s_off_pos < last_hit) return 0;
-
- if (two_hits && (hit_saved || s_end_pos > last_hit + window_size )) {
- /* first hit */
- hit_ready = 0;
- last_hit = s_end_pos;
- hit_saved = 0;
- }
}
if (hit_ready) {
if (word_params->ungapped_extension) {
- /* Perform ungapped extension */
Int4 context = BSearchContextInfo(q_off, query_info);
cutoffs = word_params->cutoffs + context;
ungapped_data = &dummy_ungapped_data;
@@ -484,31 +501,27 @@ s_BlastnDiagTableExtendInitialHit(BLAST_SequenceBlk * query,
word_params->nucl_score_table,
cutoffs->reduced_nucl_cutoff_score);
- last_hit = ungapped_data->length + ungapped_data->s_start
- + diag_table->offset;
+ if (found || ungapped_data->score >= cutoffs->cutoff_score) {
+ BlastUngappedData *final_data =
+ (BlastUngappedData *) malloc(sizeof(BlastUngappedData));
+ *final_data = *ungapped_data;
+ BLAST_SaveInitialHit(init_hitlist, q_off, s_off, final_data);
+ s_end_pos = ungapped_data->length + ungapped_data->s_start
+ + diag_table->offset;
+ } else {
+ hit_ready = 0;
+ }
} else {
ungapped_data = NULL;
- last_hit = s_end_pos;
- }
- if (ungapped_data == NULL) {
BLAST_SaveInitialHit(init_hitlist, q_off, s_off, ungapped_data);
- /* Set the "saved" flag for this hit */
- hit_saved = 1;
- } else if (ungapped_data->score >= cutoffs->cutoff_score) {
- BlastUngappedData *final_data =
- (BlastUngappedData *) malloc(sizeof(BlastUngappedData));
- *final_data = *ungapped_data;
- BLAST_SaveInitialHit(init_hitlist, q_off, s_off, final_data);
- /* Set the "saved" flag for this hit */
- hit_saved = 1;
- } else {
- /* Unset the "saved" flag for this hit */
- hit_saved = 0;
}
}
- hit_level_array[real_diag].last_hit = last_hit;
- hit_level_array[real_diag].flag = hit_saved;
+ hit_level_array[real_diag].last_hit = s_end_pos;
+ hit_level_array[real_diag].flag = hit_ready;
+ if (two_hits) {
+ diag_table->hit_len_array[real_diag] = (hit_ready) ? 0 : s_end_pos - s_off_pos;
+ }
return hit_ready;
}
@@ -543,7 +556,8 @@ s_BlastnDiagHashExtendInitialHit(BLAST_SequenceBlk * query,
BlastInitHitList * init_hitlist)
{
Int4 diag;
- Int4 s_end, s_off_pos, s_end_pos;
+ Int4 s_end, s_off_pos, s_end_pos, s_l;
+ Int4 ext_right = 0;
BlastUngappedData *ungapped_data;
BlastUngappedData dummy_ungapped_data;
Int4 window_size = word_params->options->window_size;
@@ -551,6 +565,8 @@ s_BlastnDiagHashExtendInitialHit(BLAST_SequenceBlk * query,
Int4 last_hit, hit_saved = 0;
BlastUngappedCutoffs *cutoffs = NULL;
Boolean two_hits = (window_size > 0);
+ Boolean found = FALSE;
+ Int4 Delta = MIN(5, window_size - word_length);
Int4 rc;
diag = s_off - q_off;
@@ -558,40 +574,60 @@ s_BlastnDiagHashExtendInitialHit(BLAST_SequenceBlk * query,
s_off_pos = s_off + hash_table->offset;
s_end_pos = s_end + hash_table->offset;
- rc = s_BlastDiagHashRetrieve(hash_table, diag, &last_hit, &hit_saved);
+ rc = s_BlastDiagHashRetrieve(hash_table, diag, &last_hit, &s_l, &hit_saved);
/* if there is no record in hashtable, we set last_hit to be a very negative number */
- if(!rc) last_hit = 0;
- if (contiguous) {
- /* hit within the explored area should be rejected*/
- if (s_off_pos < last_hit) return 0;
-
- if (two_hits && (hit_saved || s_end_pos > last_hit + window_size )) {
- /* this must be the 1st hit */
- /* check to see if it can be extended to the right by
- word_length and therefore qualifies for a double-hit */
- Uint4 ext_right = s_BlastRightExtend(query, subject,
+ if(!rc) last_hit = 0;
+
+ /* hit within the explored area should be rejected*/
+ if (s_off_pos < last_hit) return 0;
+
+ if (two_hits && (hit_saved || s_end_pos > last_hit + window_size )) {
+ /* this must be the 1st hit */
+ /* check to see if it can be extended to the right by
+ word_length and therefore qualifies for a double-hit */
+ if (contiguous) {
+ ext_right = s_BlastRightExtend(query, subject,
q_off + word_length, s_end, query_info, word_length);
/* update the right end*/
s_end += ext_right;
s_end_pos += ext_right;
- if (ext_right < word_length) {
- /* if it is not a double hit, then it is a new hit */
+ }
+
+ if (ext_right < word_length) {
+ /* try off-diagonal */
+ Int4 s_a = s_off_pos + word_length - window_size;
+ Int4 s_b = s_end_pos - 2 * word_length;
+ Int4 delta;
+ if (Delta < 0) Delta = 0;
+ for (delta = 1; delta < Delta; ++delta) {
+ Int4 off_s_end = 0;
+ Int4 off_s_l = 0;
+ Int4 off_hit_saved = 0;
+ Int4 off_rc = s_BlastDiagHashRetrieve(hash_table, diag + delta,
+ &off_s_end, &off_s_l, &off_hit_saved);
+ if ( off_rc
+ && off_s_l
+ && off_s_end - delta >= s_a
+ && off_s_end - off_s_l <= s_b) {
+ found = TRUE;
+ break;
+ }
+ off_rc = s_BlastDiagHashRetrieve(hash_table, diag - delta,
+ &off_s_end, &off_s_l, &off_hit_saved);
+ if ( off_rc
+ && off_s_l
+ && off_s_end >= s_a
+ && off_s_end - off_s_l + delta <= s_b) {
+ found = TRUE;
+ break;
+ }
+ }
+ if (!found) {
+ /* This is a new hit */
hit_ready = 0;
- last_hit = s_end_pos;
- hit_saved = 0;
}
}
- } else {
- /* hit within the explored area should be rejected*/
- if (s_off_pos < last_hit) return 0;
-
- if (two_hits && (hit_saved || s_end_pos > last_hit + window_size )) {
- /* first hit */
- hit_ready = 0;
- last_hit = s_end_pos;
- hit_saved = 0;
- }
}
if (hit_ready) {
@@ -605,32 +641,25 @@ s_BlastnDiagHashExtendInitialHit(BLAST_SequenceBlk * query,
ungapped_data,
word_params->nucl_score_table,
cutoffs->reduced_nucl_cutoff_score);
-
- last_hit = ungapped_data->length + ungapped_data->s_start
- + hash_table->offset;
+ if (found || ungapped_data->score >= cutoffs->cutoff_score) {
+ BlastUngappedData *final_data =
+ (BlastUngappedData *) malloc(sizeof(BlastUngappedData));
+ *final_data = *ungapped_data;
+ BLAST_SaveInitialHit(init_hitlist, q_off, s_off, final_data);
+ s_end_pos = ungapped_data->length + ungapped_data->s_start
+ + hash_table->offset;
+ } else {
+ hit_ready = 0;
+ }
} else {
ungapped_data = NULL;
- last_hit = s_end_pos;
- }
- if (ungapped_data == NULL) {
BLAST_SaveInitialHit(init_hitlist, q_off, s_off, ungapped_data);
- /* Set the "saved" flag for this hit */
- hit_saved = 1;
- } else if (ungapped_data->score >= cutoffs->cutoff_score) {
- BlastUngappedData *final_data =
- (BlastUngappedData *) malloc(sizeof(BlastUngappedData));
- *final_data = *ungapped_data;
- BLAST_SaveInitialHit(init_hitlist, q_off, s_off, final_data);
- /* Set the "saved" flag for this hit */
- hit_saved = 1;
- } else {
- /* Unset the "saved" flag for this hit */
- hit_saved = 0;
}
}
-
- s_BlastDiagHashInsert(hash_table, diag, last_hit, hit_saved,
- s_end + hash_table->offset, window_size, word_length ,two_hits);
+
+ s_BlastDiagHashInsert(hash_table, diag, s_end_pos,
+ (hit_ready) ? 0 : s_end_pos - s_off_pos,
+ hit_ready, s_off_pos, window_size + Delta);
return hit_ready;
}