summaryrefslogtreecommitdiff
path: root/tools/blast.c
diff options
context:
space:
mode:
authorAaron M. Ucko <ucko@debian.org>2005-03-24 18:32:05 +0000
committerAaron M. Ucko <ucko@debian.org>2005-03-24 18:32:05 +0000
commitf06fc23cbc179836f402001f24176fc9d5725482 (patch)
tree39e97ad8f13a33296b32a3907f3409b056cf851b /tools/blast.c
parentccba467ae4f393d7acce357a9847bfe1fb77ccc7 (diff)
Load ncbi (6.1.20040616) into ncbi-tools6/branches/upstream/current.
Diffstat (limited to 'tools/blast.c')
-rw-r--r--tools/blast.c146
1 files changed, 82 insertions, 64 deletions
diff --git a/tools/blast.c b/tools/blast.c
index 991f1d63..2957847c 100644
--- a/tools/blast.c
+++ b/tools/blast.c
@@ -1,6 +1,6 @@
-static char const rcsid[] = "$Id: blast.c,v 6.405 2004/04/28 14:37:06 madden Exp $";
+static char const rcsid[] = "$Id: blast.c,v 6.406 2004/05/21 13:53:37 dondosha Exp $";
-/* $Id: blast.c,v 6.405 2004/04/28 14:37:06 madden Exp $
+/* $Id: blast.c,v 6.406 2004/05/21 13:53:37 dondosha Exp $
* ===========================================================================
*
* PUBLIC DOMAIN NOTICE
@@ -49,9 +49,12 @@ Detailed Contents:
further manipulation.
******************************************************************************
- * $Revision: 6.405 $
+ * $Revision: 6.406 $
*
* $Log: blast.c,v $
+ * Revision 6.406 2004/05/21 13:53:37 dondosha
+ * Fix in BLASTMergeHitLists
+ *
* Revision 6.405 2004/04/28 14:37:06 madden
* Changes from Mike Gertz
* - modified the link_hsps routine to apply the gap_prob parameter to
@@ -2709,7 +2712,7 @@ CheckForRequiredRegion (BlastSearchBlkPtr search, Boolean strict)
if (hsp->query.offset > search->required_start ||
hsp->query.end < search->required_end)
{
- hsp_array[index] = MemFree(hsp_array[index]);
+ hsp_array[index] = BLAST_HSPFree(hsp_array[index]);
}
else
{
@@ -2727,7 +2730,7 @@ CheckForRequiredRegion (BlastSearchBlkPtr search, Boolean strict)
}
else
{
- hsp_array[index] = MemFree(hsp_array[index]);
+ hsp_array[index] = BLAST_HSPFree(hsp_array[index]);
}
@@ -2980,7 +2983,7 @@ BlastReevaluateWithAmbiguities (BlastSearchBlkPtr search, Int4 sequence_number)
}
else
{ /* Delete if this is now below the cutoff score. */
- hsp_array[index] = MemFree(hsp_array[index]);
+ hsp_array[index] = BLAST_HSPFree(hsp_array[index]);
}
if (StringCmp(search->prog_name, "blastn") != 0)
@@ -6108,11 +6111,10 @@ static BLAST_HitListPtr
BLASTMergeHitLists(BlastSearchBlkPtr search, BLAST_HitListPtr hitlist1,
BLAST_HitListPtr hitlist2, Int4 start, Boolean merge_hsps)
{
- BLAST_HSPPtr hsp, PNTR hspp1, PNTR hspp2;
+ BLAST_HSPPtr hsp, hsp_var, PNTR hspp1, PNTR hspp2;
Int4 index, index1, index2;
Int4 hspcnt1, hspcnt2, new_hspcnt = 0;
BLAST_HSPPtr PNTR new_hsp_array;
- Int4Ptr index_array1, index_array2;
if (hitlist1 == NULL) {
hitlist1 = (BLAST_HitListPtr)
@@ -6129,27 +6131,31 @@ BLASTMergeHitLists(BlastSearchBlkPtr search, BLAST_HitListPtr hitlist1,
}
hspcnt1 = hspcnt2 = 0;
- hspp1 = (BLAST_HSPPtr PNTR) MemNew(hitlist1->hspcnt*sizeof(BLAST_HSPPtr));
- hspp2 = (BLAST_HSPPtr PNTR) MemNew(hitlist2->hspcnt*sizeof(BLAST_HSPPtr));
- index_array1 = (Int4Ptr) MemNew(hitlist1->hspcnt*sizeof(Int4));
- index_array2 = (Int4Ptr) MemNew(hitlist2->hspcnt*sizeof(Int4));
- for (index=0; index<hitlist1->hspcnt; index++) {
+ /* Put all HSPs that intersect the overlap region at the front of the
+ respective HSP arrays. */
+ for (index = 0; index < hitlist1->hspcnt; index++) {
hsp = hitlist1->hsp_array[index];
if (hsp->subject.end > start) {
- index_array1[hspcnt1] = index;
- hspp1[hspcnt1++] = hsp;
- } else
- new_hspcnt++;
+ /* At least part of this HSP lies in the overlap strip. */
+ hsp_var = hitlist1->hsp_array[hspcnt1];
+ hitlist1->hsp_array[hspcnt1] = hsp;
+ hitlist1->hsp_array[index] = hsp_var;
+ ++hspcnt1;
+ }
}
- for (index=0; index<hitlist2->hspcnt; index++) {
+ for (index = 0; index < hitlist2->hspcnt; index++) {
hsp = hitlist2->hsp_array[index];
if (hsp->subject.offset < start + DBSEQ_CHUNK_OVERLAP) {
- index_array2[hspcnt2] = index;
- hspp2[hspcnt2++] = hsp;
- } else
- new_hspcnt++;
+ /* At least part of this HSP lies in the overlap strip. */
+ hsp_var = hitlist2->hsp_array[hspcnt2];
+ hitlist2->hsp_array[hspcnt2] = hsp;
+ hitlist2->hsp_array[index] = hsp_var;
+ ++hspcnt2;
+ }
}
+ hspp1 = hitlist1->hsp_array;
+ hspp2 = hitlist2->hsp_array;
HeapSort(hspp1, hspcnt1, sizeof(BLAST_HSPPtr), diag_compare_hsps);
HeapSort(hspp2, hspcnt2, sizeof(BLAST_HSPPtr), diag_compare_hsps);
@@ -6164,75 +6170,75 @@ BLASTMergeHitLists(BlastSearchBlkPtr search, BLAST_HitListPtr hitlist1,
if (merge_hsps) {
if (BLASTMergeHsps(search, hspp1[index], hspp2[index1],
start)) {
- /* Point the corresponding element of the full first HSP
- array to the new HSP */
- hitlist1->hsp_array[index_array1[index]] = hspp1[index];
- /* Free the corresponding element of the full second
- HSP array. */
- hitlist2->hsp_array[index_array2[index1]] =
- hspp2[index1] = BLAST_HSPFree(hspp2[index1]);
- break;
+ /* Free the second HSP. */
+ hspp2[index1] = BLAST_HSPFree(hspp2[index1]);
}
} else { /* No gap information available */
if (BLASTHspContained(hspp1[index], hspp2[index1])) {
- hspp1[index] = MemFree(hspp1[index]);
- /* Point the corresponding element of the full first HSP
- array to the new HSP; free the element of the second
- array. */
- hitlist1->hsp_array[index_array1[index]] = hspp2[index1];
- hitlist2->hsp_array[index_array2[index1]] =
- hspp2[index1] = NULL;
+ /* Point the first HSP to the new HSP; */
+ hspp1[index] = BLAST_HSPFree(hspp1[index]);
+ hspp1[index] = hspp2[index1];
+ hspp2[index1] = NULL;
+ /* This HSP has been removed, so break out of the inner
+ loop */
+ break;
} else if (BLASTHspContained(hspp2[index1], hspp1[index])) {
- /* Just free the corresponding element of the second
- HSP array */
- hitlist2->hsp_array[index_array2[index1]] =
- hspp2[index1] = BLAST_HSPFree(hspp2[index1]);
+ hspp2[index1] = BLAST_HSPFree(hspp2[index1]);
}
}
+ } else {
+ /* This and remaining HSPs are too far from the one being
+ checked */
+ break;
}
}
}
- new_hspcnt += hspcnt1;
- for (index=0; index<hspcnt2; index++) {
- if (hspp2[index] != NULL)
- new_hspcnt++;
- }
-
- hspp1 = MemFree(hspp1);
- hspp2 = MemFree(hspp2);
- index_array1 = MemFree(index_array1);
- index_array2 = MemFree(index_array2);
+ HspArrayPurge(hitlist2->hsp_array, hitlist2->hspcnt, FALSE);
+ /* The new number of HSPs is now the sum of the remaining counts in the
+ two lists, but if there is a restriction on the number of HSPs to keep,
+ it might have to be reduced. */
+ new_hspcnt = hitlist2->hspcnt + hitlist1->hspcnt;
+ if (search->pbp->hsp_num_max)
+ new_hspcnt = MIN(new_hspcnt, search->pbp->hsp_num_max);
+
if (new_hspcnt >= hitlist1->hspmax-1 && hitlist1->do_not_reallocate == FALSE) {
- new_hsp_array = (BLAST_HSPPtr PNTR) Realloc(hitlist1->hsp_array, new_hspcnt*2*sizeof(BLAST_HSPPtr));
+ Int4 new_allocated = 2*new_hspcnt;
+ if (search->pbp->hsp_num_max)
+ new_allocated = MIN(new_allocated, search->pbp->hsp_num_max);
+ new_hsp_array = (BLAST_HSPPtr PNTR)
+ Realloc(hitlist1->hsp_array, new_allocated*sizeof(BLAST_HSPPtr));
if (new_hsp_array == NULL) {
ErrPostEx(SEV_WARNING, 0, 0, "UNABLE to reallocate in BlastSaveCurrentHsp for ordinal id %ld, continuing with fixed array of %ld HSP's", (long) search->subject_id, (long) hitlist1->hspmax);
hitlist1->do_not_reallocate = TRUE;
} else {
hitlist1->hsp_array = new_hsp_array;
- hitlist1->hspmax = 2*new_hspcnt;
+ hitlist1->hspmax = new_allocated;
}
+ new_hspcnt = MIN(new_hspcnt, hitlist1->hspmax);
}
- if (new_hspcnt <= hitlist1->hspmax) {
- /* Capacity is enough to save all HSPs from both arrays */
+ if (new_hspcnt >= hitlist2->hspcnt + hitlist1->hspcnt) {
+ /* All HSPs from both arrays are saved */
for (index=hitlist1->hspcnt, index1=0;
index1<hitlist2->hspcnt; index1++) {
if (hitlist2->hsp_array[index1] != NULL)
hitlist1->hsp_array[index++] = hitlist2->hsp_array[index1];
}
} else {
- /* All HSPs cannot be saved; sort both arrays by score and save only
- hspmax best ones */
- new_hsp_array = (BLAST_HSPPtr PNTR)
- Malloc(hitlist1->hspmax*sizeof(BLAST_HSPPtr));
- HeapSort(hitlist1->hsp_array, hitlist1->hspcnt, sizeof(BLAST_HSPPtr),
- score_compare_hsps);
- HeapSort(hitlist2->hsp_array, hitlist2->hspcnt, sizeof(BLAST_HSPPtr),
+ /* Not all HSPs are be saved; sort both arrays by score and save only
+ the new_hspcnt best ones.
+ For the merged set of HSPs, allocate array the same size as in the
+ old HSP list. */
+ new_hsp_array = (BLAST_HSP**)
+ malloc(hitlist1->hspmax*sizeof(BLAST_HSP*));
+ HeapSort(hitlist1->hsp_array, hitlist1->hspcnt,
+ sizeof(BLAST_HSP*), score_compare_hsps);
+ HeapSort(hitlist2->hsp_array, hitlist2->hspcnt, sizeof(BLAST_HSP*),
score_compare_hsps);
index1 = index2 = 0;
- for (index = 0; index < hitlist1->hspmax; ++index) {
+ for (index = 0; index < new_hspcnt; ++index) {
if (index1 < hitlist1->hspcnt &&
(index2 >= hitlist2->hspcnt ||
(hitlist1->hsp_array[index1]->score >=
@@ -6254,11 +6260,12 @@ BLASTMergeHitLists(BlastSearchBlkPtr search, BLAST_HitListPtr hitlist1,
BLAST_HSPFree(hitlist2->hsp_array[index2]);
}
/* Point hitlist1's HSP array to the new one */
+ hitlist1->hsp_array = (BLAST_HSP**) MemFree(hitlist1->hsp_array);
hitlist1->hsp_array = new_hsp_array;
}
hitlist1->hspcnt = index;
- /* Second hitlist now does not own any HSPs at all */
+ /* Second HSP list now does not own any HSPs */
hitlist2->hspcnt = 0;
return hitlist1;
@@ -9553,6 +9560,17 @@ int LIBCALLBACK RPSResultHspScoreCmp(VoidPtr v1, VoidPtr v2)
return 1;
else if (h1->score > h2->score)
return -1;
+
+ if( h1->subject_offset < h2->subject_offset )
+ return 1;
+ if( h1->subject_offset > h2->subject_offset )
+ return -1;
+
+ if( h1->subject_length < h2->subject_length )
+ return 1;
+ if( h1->subject_length > h2->subject_length )
+ return -1;
+
else return 0;
}
/*