summaryrefslogtreecommitdiff
path: root/algo/blast/core/aa_ungapped.h
blob: 20580d57f70fc819d4b36ef9c8bbaa8855a1b812 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
/* $Id: aa_ungapped.h,v 1.24 2005/11/16 14:31:36 madden Exp $
 * ===========================================================================
 *
 *                            PUBLIC DOMAIN NOTICE
 *               National Center for Biotechnology Information
 *
 *  This software/database is a "United States Government Work" under the
 *  terms of the United States Copyright Act.  It was written as part of
 *  the author's official duties as a United States Government employee and
 *  thus cannot be copyrighted.  This software/database is freely available
 *  to the public for use. The National Library of Medicine and the U.S.
 *  Government have not placed any restriction on its use or reproduction.
 *
 *  Although all reasonable efforts have been taken to ensure the accuracy
 *  and reliability of the software and data, the NLM and the U.S.
 *  Government do not and cannot warrant the performance or results that
 *  may be obtained by using this software or data. The NLM and the U.S.
 *  Government disclaim all warranties, express or implied, including
 *  warranties of performance, merchantability or fitness for any particular
 *  purpose.
 *
 *  Please cite the author in any work or product based on this material.
 *
 * ===========================================================================
 */

/** @file aa_ungapped.h
 * @todo FIXME: Need file description (protein wordfinding & ungapped 
 * extension code?)
 */

#ifndef AA_UNGAPPED__H
#define AA_UNGAPPED__H

#include <algo/blast/core/blast_def.h>
#include <algo/blast/core/blast_lookup.h>
#include <algo/blast/core/blast_extend.h>

#ifdef __cplusplus
extern "C" {
#endif

/** Scan a subject sequence for word hits
 *
 * @param subject the subject sequence [in]
 * @param query the query sequence [in]
 * @param lookup the lookup table [in]
 * @param matrix the substitution matrix [in]
 * @param word_params word parameters, needed for cutoff and dropoff [in]
 * @param ewp extend parameters, needed for diagonal tracking [in]
 * @param offset_pairs Array for storing query and subject offsets. [in]
 * @param offset_array_size the number of elements in each offset array [in]
 * @param init_hitlist hsps resulting from the ungapped extension [out]
 * @param ungapped_stats Various hit counts. Not filled if NULL [out]
 */
Int2 BlastAaWordFinder(BLAST_SequenceBlk* subject,
		       BLAST_SequenceBlk* query,
		       LookupTableWrap* lookup,
		       Int4** matrix,
		       const BlastInitialWordParameters* word_params,
		       Blast_ExtendWord* ewp,
                       BlastOffsetPair* NCBI_RESTRICT offset_pairs,
		       Int4 offset_array_size,
		       BlastInitHitList* init_hitlist, 
             BlastUngappedStats* ungapped_stats);

/** Scan a subject sequence for word hits and trigger two-hit extensions.
 *
 * @param subject the subject sequence [in]
 * @param query the query sequence [in]
 * @param lookup_wrap the lookup table [in]
 * @param diag the diagonal array structure [in/out]
 * @param matrix the substitution matrix [in]
 * @param cutoff cutoff score for saving ungapped HSPs [in]
 * @param dropoff x dropoff [in]
 * @param offset_pairs Array for storing query and subject offsets. [in]
 * @param array_size the number of elements in each offset array [in]
 * @param ungapped_hsps hsps resulting from the ungapped extension [out]
 * @param ungapped_stats Various hit counts. Not filled if NULL [out]
 */

Int2 BlastAaWordFinder_TwoHit(const BLAST_SequenceBlk* subject,
			      const BLAST_SequenceBlk* query,
			      const LookupTableWrap* lookup_wrap,
			      BLAST_DiagTable* diag,
			      Int4 ** matrix,
			      Int4 cutoff,
			      Int4 dropoff,
                              BlastOffsetPair* NCBI_RESTRICT offset_pairs,
			      Int4 array_size,
	                      BlastInitHitList* ungapped_hsps, 
                              BlastUngappedStats* ungapped_stats);

/** Scan a subject sequence for word hits and trigger one-hit extensions.
 *
 * @param subject the subject sequence
 * @param query the query sequence
 * @param lookup_wrap the lookup table
 * @param diag the diagonal array structure
 * @param matrix the substitution matrix [in]
 * @param cutoff cutoff score for saving ungapped HSPs [in]
 * @param dropoff x dropoff [in]
 * @param offset_pairs Array for storing query and subject offsets. [in]
 * @param array_size the number of elements in each offset array
 * @param ungapped_hsps hsps resulting from the ungapped extensions [out]
 * @param ungapped_stats Various hit counts. Not filled if NULL [out]
 */
Int2 BlastAaWordFinder_OneHit(const BLAST_SequenceBlk* subject,
			      const BLAST_SequenceBlk* query,
			      const LookupTableWrap* lookup_wrap,
			      BLAST_DiagTable* diag,
			      Int4 ** matrix,
			      Int4 cutoff,
			      Int4 dropoff,
                              BlastOffsetPair* NCBI_RESTRICT offset_pairs,
			      Int4 array_size,
	            BlastInitHitList* ungapped_hsps, 
               BlastUngappedStats* ungapped_stats);

/**
 * Beginning at s_off and q_off in the subject and query, respectively,
 * extend to the right until the cumulative score becomes negative or
 * drops by at least dropoff.
 *
 * @param matrix the substitution matrix [in]
 * @param subject subject sequence [in]
 * @param query query sequence [in]
 * @param s_off subject offset [in]
 * @param q_off query offset [in]
 * @param dropoff the X dropoff parameter [in]
 * @param displacement the length of the extension [out]
 * @param maxscore the score derived from a previous left extension [in]
 * @param s_last_off the rightmost subject offset examined [out]
 * @return The score of the extension
 */

  Int4 BlastAaExtendRight(Int4 ** matrix,
			const BLAST_SequenceBlk* subject,
			const BLAST_SequenceBlk* query,
			Int4 s_off,
			Int4 q_off,
			Int4 dropoff,
			Int4* displacement,
	                Int4 maxscore,
	                Int4* s_last_off);

  Int4 BlastPSSMExtendRight(Int4 ** matrix,
			const BLAST_SequenceBlk* subject,
			Int4 query_size,
			Int4 s_off,
			Int4 q_off,
			Int4 dropoff,
			Int4* displacement,
	                Int4 maxscore,
	                Int4* s_last_off);


/**
 * Beginning at s_off and q_off in the subject and query, respectively,
 * extend to the left until the cumulative score becomes negative or
 * drops by at least dropoff.
 *
 * @param matrix the substitution matrix [in]
 * @param subject subject sequence [in]
 * @param query query sequence [in]
 * @param s_off subject offset [in]
 * @param q_off query offset [in]
 * @param dropoff the X dropoff parameter [in]
 * @param displacement the length of the extension [out]
 * @param score the score so far (probably from initial word hit) [in]
 * @return The score of the extension
 */

Int4 BlastAaExtendLeft(Int4 ** matrix,
		       const BLAST_SequenceBlk* subject,
		       const BLAST_SequenceBlk* query,
		       Int4 s_off,
		       Int4 q_off,
		       Int4 dropoff,
		       Int4* displacement,
                       Int4 score);

Int4 BlastPSSMExtendLeft(Int4 ** matrix,
		       const BLAST_SequenceBlk* subject,
		       Int4 s_off,
		       Int4 q_off,
		       Int4 dropoff,
		       Int4* displacement,
                       Int4 score);


/** Perform a one-hit extension. Beginning at the specified hit,
 * extend to the left, then extend to the right. 
 *
 * @param matrix the substitution matrix [in]
 * @param subject subject sequence [in]
 * @param query query sequence [in]
 * @param s_off subject offset [in]
 * @param q_off query offset [in]
 * @param dropoff X dropoff parameter [in]
 * @param hsp_q the offset in the query where the HSP begins [out]
 * @param hsp_s the offset in the subject where the HSP begins [out]
 * @param hsp_len the length of the HSP [out]
 * @param word_size number of letters in the initial word hit [in]
 * @param use_pssm TRUE if the scoring matrix is position-specific [in]
 * @param s_last_off the rightmost subject offset examined [out]
 * @return the score of the hsp.
 */

Int4 BlastAaExtendOneHit(Int4 ** matrix,
	                 const BLAST_SequenceBlk* subject,
	                 const BLAST_SequenceBlk* query,
	                 Int4 s_off,
	                 Int4 q_off,
	                 Int4 dropoff,
			 Int4* hsp_q,
			 Int4* hsp_s,
			 Int4* hsp_len,
                         Int4 word_size,
	                 Boolean use_pssm,
	                 Int4* s_last_off);
	                 
/** Perform a two-hit extension. Given two hits L and R, begin
 * at R and extend to the left. If we do not reach L, abort the extension.
 * Otherwise, begin at R and extend to the right.
 *
 * @param matrix the substitution matrix [in]
 * @param subject subject sequence [in]
 * @param query query sequence [in]
 * @param s_left_off left subject offset [in]
 * @param s_right_off right subject offset [in]
 * @param q_right_off right query offset [in]
 * @param dropoff X dropoff parameter [in]
 * @param hsp_q the offset in the query where the HSP begins [out]
 * @param hsp_s the offset in the subject where the HSP begins [out]
 * @param hsp_len the length of the HSP [out]
 * @param use_pssm TRUE if the scoring matrix is position-specific [in]
 * @param word_size number of letters in one word [in]
 * @param right_extend set to TRUE if an extension to the right happened [out]
 * @param s_last_off the rightmost subject offset examined [out]
 * @return the score of the hsp.
 */

Int4 BlastAaExtendTwoHit(Int4 ** matrix,
	                 const BLAST_SequenceBlk* subject,
	                 const BLAST_SequenceBlk* query,
	                 Int4 s_left_off,
	                 Int4 s_right_off,
	                 Int4 q_right_off,
	                 Int4 dropoff,
			 Int4* hsp_q,
			 Int4* hsp_s,
			 Int4* hsp_len,
	                 Boolean use_pssm,
	                 Int4 word_size,
	                 Boolean *right_extend,
	                 Int4* s_last_off);

#ifdef __cplusplus
}
#endif

#endif /* AA_UNGAPPED__H */