summaryrefslogtreecommitdiff
path: root/demo
diff options
context:
space:
mode:
Diffstat (limited to 'demo')
-rw-r--r--demo/blast_driver.c72
-rw-r--r--demo/blastall.c9
-rw-r--r--demo/copymat.c254
-rw-r--r--demo/debruijn.c113
-rw-r--r--demo/formatdb.c14
-rw-r--r--demo/megablast.c42
-rw-r--r--demo/rtestval.c6
-rw-r--r--demo/tbl2asn.c152
8 files changed, 474 insertions, 188 deletions
diff --git a/demo/blast_driver.c b/demo/blast_driver.c
index 8249d58f..65befc58 100644
--- a/demo/blast_driver.c
+++ b/demo/blast_driver.c
@@ -1,4 +1,4 @@
-/* $Id: blast.c,v 1.9 2003/10/23 20:15:37 dondosha Exp $
+/* $Id: blast_driver.c,v 1.16 2004/01/07 14:40:20 papadopo Exp $
* ===========================================================================
*
* PUBLIC DOMAIN NOTICE
@@ -32,10 +32,10 @@ Author: Ilya Dondoshansky
Contents: Main function for running BLAST
******************************************************************************
- * $Revision: 1.9 $
+ * $Revision: 1.16 $
* */
-static char const rcsid[] = "$Id: blast.c,v 1.9 2003/10/23 20:15:37 dondosha Exp $";
+static char const rcsid[] = "$Id: blast_driver.c,v 1.16 2004/01/07 14:40:20 papadopo Exp $";
#include <ncbi.h>
#include <sqnutils.h>
@@ -45,9 +45,9 @@ static char const rcsid[] = "$Id: blast.c,v 1.9 2003/10/23 20:15:37 dondosha Exp
#include <algo/blast/core/blast_message.h>
#include <algo/blast/core/blast_util.h>
#include <algo/blast/core/blast_engine.h>
-#include "blast_seqalign.h"
-#include "blast_format.h"
-#include "seqsrc_readdb.h"
+#include <algo/blast/api/blast_seqalign.h>
+#include <algo/blast/api/blast_format.h>
+#include <algo/blast/api/seqsrc_readdb.h>
#define NUMARG (sizeof(myargs)/sizeof(myargs[0]))
@@ -147,8 +147,9 @@ static Args myargs[] = {
"0", NULL, NULL, FALSE, 'y', ARG_INT, 0.0, 0, NULL},
{ "Do only ungapped alignment (always TRUE for tblastx)",/*ARG_UNGAPPED*/
"F", NULL, NULL, FALSE, 'u', ARG_BOOLEAN, 0.0, 0, NULL},
- { "Use greedy algorithm for gapped extensions", /* ARG_GREEDY */
- "F", NULL, NULL, FALSE, 'g', ARG_BOOLEAN, 0.0, 0, NULL},
+ { "Use greedy algorithm for gapped extensions:\n 0 no, 1 one-step, "
+ "2 two-step, 3 two-step with ungapped", /* ARG_GREEDY */
+ "F", NULL, NULL, FALSE, 'g', ARG_INT, 0.0, 0, NULL},
{ "Gap open penalty (default: non-affine if greedy; 5 if dyn. prog.)",
"0", NULL, NULL, FALSE, 'G', ARG_INT, 0.0, 0, NULL}, /* ARG_GAPOPEN */
{ "Gap extension penalty (default: non-affine if greedy; 2 otherwise)",
@@ -256,6 +257,14 @@ BLAST_FillOptions(LookupTableOptions* lookup_options,
if (myargs[ARG_STRIDE].intvalue)
lookup_options->scan_step = myargs[ARG_STRIDE].intvalue;
+
+ if (myargs[ARG_PHI].strvalue) {
+ lookup_options->phi_pattern = strdup(myargs[ARG_PHI].strvalue);
+ lookup_options->lut_type =
+ ((program_number == blast_type_blastn) ?
+ PHI_NA_LOOKUP : PHI_AA_LOOKUP);
+ hit_options->phi_align = TRUE;
+ }
BLAST_FillQuerySetUpOptions(query_setup_options, program_number,
myargs[ARG_FILTER].strvalue, myargs[ARG_STRAND].intvalue);
@@ -272,6 +281,25 @@ BLAST_FillOptions(LookupTableOptions* lookup_options,
BLAST_FillExtensionOptions(ext_options, program_number, greedy_extension,
myargs[ARG_XDROP].intvalue, myargs[ARG_XDROP_FINAL].intvalue);
+ if (greedy_extension) {
+ switch (myargs[ARG_GREEDY].intvalue) {
+ case 1:
+ ext_options->algorithm_type = EXTEND_GREEDY;
+ word_options->ungapped_extension = FALSE;
+ break;
+ case 2:
+ ext_options->algorithm_type = EXTEND_GREEDY_NO_TRACEBACK;
+ word_options->ungapped_extension = FALSE;
+ break;
+ case 3:
+ ext_options->algorithm_type = EXTEND_GREEDY_NO_TRACEBACK;
+ word_options->ungapped_extension = TRUE;
+ break;
+ default:
+ break;
+ }
+ }
+
BLAST_FillScoringOptions(score_options, program_number, greedy_extension,
myargs[ARG_MISMATCH].intvalue, myargs[ARG_MATCH].intvalue,
myargs[ARG_MATRIX].strvalue, myargs[ARG_GAPOPEN].intvalue,
@@ -304,10 +332,10 @@ BLAST_FillOptions(LookupTableOptions* lookup_options,
BLAST_FillEffectiveLengthsOptions(eff_len_options,
numseqs, totlen, (Int8) myargs[ARG_SEARCHSP].floatvalue);
- if (myargs[ARG_DBGENCODE].intvalue && db_options &&
- (program_number == blast_type_tblastn ||
- program_number == blast_type_tblastx)) {
- db_options->genetic_code = myargs[ARG_DBGENCODE].intvalue;
+ if (db_options && (program_number == blast_type_tblastn ||
+ program_number == blast_type_tblastx)) {
+ if (myargs[ARG_DBGENCODE].intvalue)
+ db_options->genetic_code = myargs[ARG_DBGENCODE].intvalue;
if ((status = BLAST_GeneticCodeFind(db_options->genetic_code,
&db_options->gen_code_string)))
return status;
@@ -338,13 +366,13 @@ Int2 Nlm_Main(void)
Int2 status;
QuerySetUpOptions* query_options=NULL;
BlastEffectiveLengthsOptions* eff_len_options=NULL;
- BlastMask* lcase_mask = NULL;
- BlastMask* filter_loc=NULL; /* All masking locations */
+ BlastMaskLoc* lcase_mask = NULL;
+ BlastMaskLoc* filter_loc=NULL; /* All masking locations */
SeqLoc* query_slp = NULL;
BlastScoreBlk* sbp = NULL;
FILE *infp, *outfp;
BlastQueryInfo* query_info;
- BlastResults* results = NULL;
+ BlastHSPResults* results = NULL;
Blast_Message* blast_message = NULL;
SeqAlign* seqalign;
BlastFormattingOptions* format_options;
@@ -413,6 +441,8 @@ Int2 Nlm_Main(void)
readdb_args = (ReaddbNewArgs*) malloc(sizeof(ReaddbNewArgs));
readdb_args->dbname = dbname = myargs[ARG_DB].strvalue;
readdb_args->is_protein = !db_is_na;
+ readdb_args->first_db_seq = 0;
+ readdb_args->final_db_seq = 0;
bssn_info.constructor = &ReaddbSeqSrcNew;
bssn_info.ctor_argument = (void*) readdb_args;
@@ -460,7 +490,7 @@ Int2 Nlm_Main(void)
}
if (translated_query) {
- BlastMaskDNAToProtein(&lcase_mask, query_slp);
+ BlastMaskLocDNAToProtein(&lcase_mask, query_slp);
}
status = BLAST_SetUpQuery(program_number, query_slp,
@@ -476,7 +506,7 @@ Int2 Nlm_Main(void)
if (translated_query) {
/* Filter locations were returned in protein coordinates; convert them
back to nucleotide here */
- BlastMaskProteinToDNA(&filter_loc, query_slp);
+ BlastMaskLocProteinToDNA(&filter_loc, query_slp);
}
if (status) {
@@ -490,8 +520,6 @@ Int2 Nlm_Main(void)
LookupTableWrapInit(query, lookup_options,
lookup_segments, sbp, &lookup_wrap);
- return_stats = (BlastReturnStat*) calloc(1, sizeof(BlastReturnStat));
-
if (bssp) {
BLAST_DatabaseSearchEngine(program_number, query, query_info,
bssp, sbp, score_options, lookup_wrap,
@@ -512,7 +540,7 @@ Int2 Nlm_Main(void)
/* Convert results to the SeqAlign form */
BLAST_ResultsToSeqAlign(program_number, results, query_slp, bssp,
- subject_slp, score_options, sbp, hit_options->gapped_calculation,
+ subject_slp, score_options, sbp, score_options->gapped_calculation,
&seqalign);
results = BLAST_ResultsFree(results);
@@ -530,6 +558,8 @@ Int2 Nlm_Main(void)
blast_program, query_info->num_queries, query_slp,
filter_loc, format_options, score_options->is_ooframe);
+ BlastMaskLocFree(filter_loc);
+
PrintOutputFooter(program_number, format_options, score_options, sbp,
lookup_options, word_options, ext_options, hit_options, query_info,
readdb_args, return_stats);
@@ -540,7 +570,9 @@ Int2 Nlm_Main(void)
query_slp = SeqLocSetFree(query_slp);
} /* End loop on sets of queries */
+ sfree(readdb_args);
subject = BlastSequenceBlkFree(subject);
+ subject_slp = SeqLocSetFree(subject_slp);
sfree(return_stats);
LookupTableOptionsFree(lookup_options);
BlastQuerySetUpOptionsFree(query_options);
diff --git a/demo/blastall.c b/demo/blastall.c
index 6f56f52f..41fcf1b7 100644
--- a/demo/blastall.c
+++ b/demo/blastall.c
@@ -1,6 +1,6 @@
-static char const rcsid[] = "$Id: blastall.c,v 6.135 2003/08/21 15:37:54 dondosha Exp $";
+static char const rcsid[] = "$Id: blastall.c,v 6.136 2003/11/05 22:28:06 dondosha Exp $";
-/* $Id: blastall.c,v 6.135 2003/08/21 15:37:54 dondosha Exp $
+/* $Id: blastall.c,v 6.136 2003/11/05 22:28:06 dondosha Exp $
**************************************************************************
* *
* COPYRIGHT NOTICE *
@@ -28,6 +28,9 @@ static char const rcsid[] = "$Id: blastall.c,v 6.135 2003/08/21 15:37:54 dondosh
**************************************************************************
*
* $Log: blastall.c,v $
+ * Revision 6.136 2003/11/05 22:28:06 dondosha
+ * No need to shift subsequence coordinates in tabular output, since they are already shifted in the seqalign
+ *
* Revision 6.135 2003/08/21 15:37:54 dondosha
* Corrections for out-of-frame tabular output and megablast XML output
*
@@ -1571,7 +1574,7 @@ Int2 Main (void)
BlastPrintTabularResults(curr_seqalign, query_bsp, slp,
number_of_alignments, blast_program,
!options->gapped_calculation, options->is_ooframe,
- believe_query, from, 0, global_fp, NULL, (align_view == 9));
+ believe_query, 0, 0, global_fp, NULL, (align_view == 9));
SeqAlignSetFree(curr_seqalign);
}
diff --git a/demo/copymat.c b/demo/copymat.c
index 08231e58..adbdd1e8 100644
--- a/demo/copymat.c
+++ b/demo/copymat.c
@@ -1,4 +1,4 @@
-static char const rcsid[] = "$Id: copymat.c,v 6.25 2003/05/30 17:31:09 coulouri Exp $";
+static char const rcsid[] = "$Id: copymat.c,v 6.30 2004/01/30 20:34:45 coulouri Exp $";
/*
* ===========================================================================
@@ -36,6 +36,22 @@ Contents: main routines for copymatrices program to convert
score matrices output by makematrices into a single byte-encoded file.
$Log: copymat.c,v $
+Revision 6.30 2004/01/30 20:34:45 coulouri
+fix minor nit to FileWrite call
+
+Revision 6.29 2004/01/26 19:40:48 coulouri
+* Correct buffer overrun
+* Use offset rather than pointer in LookupBackboneCell
+
+Revision 6.28 2003/11/24 18:18:47 coulouri
+Correction to previous fix for 64-bit irix
+
+Revision 6.27 2003/11/21 18:01:15 ivanov
+Added extern definition for impalaMakeFileNames()
+
+Revision 6.26 2003/11/20 15:44:32 camacho
+Tom Madden's changes to use lookup table contruction code from algo/blast.
+
Revision 6.25 2003/05/30 17:31:09 coulouri
add rcsid
@@ -107,10 +123,39 @@ Changed a little format of RPS lookup tables file.
#include <sequtil.h>
#include <seqport.h>
#include <tofasta.h>
-#include <blast.h>
-#include <blastpri.h>
-#include <posit.h>
-#include <profiles.h>
+
+#ifndef MAXLINELEN
+# define MAXLINELEN 2000
+#endif
+#ifndef MAX_NAME_LENGTH
+# define MAX_NAME_LENGTH 500
+#endif
+#ifndef PRO_ALPHABET_SIZE
+# define PRO_ALPHABET_SIZE 26
+#endif
+#ifndef SORT_THRESHOLD
+# define SORT_THRESHOLD 20
+#endif
+#ifndef RPS_MAGIC_NUMBER
+# define RPS_MAGIC_NUMBER 7702
+#endif
+/*factor used to multiply the gapped K parameter to make it
+ more accurate in most cases*/
+#ifndef PRO_K_MULTIPLIER
+# define PRO_K_MULTIPLIER 1.2
+#endif
+#include <algo/blast/core/blast_lookup.h>
+#include <algo/blast/core/blast_options.h>
+
+typedef Int4 ScoreRow[PRO_ALPHABET_SIZE];
+extern Boolean LIBCALL
+IMPALAPrintHelp PROTO((Boolean html, Int4 line_length, Char * programName,
+ FILE *outfp));
+extern void LIBCALL
+impalaMakeFileNames PROTO((Char * matrixDbName, Char * auxiliaryFileName,
+ Char * mmapFileName, Char * seqFileName,
+ Char *matrixFileName, Char * ckptFileName,
+ Char *directoryPrefix));
#define NUMARG (sizeof(myargs)/sizeof(myargs[0]))
@@ -333,6 +378,40 @@ static Int4 findTotalLength(FILE *matrixAuxiliaryFile, Int4 numProfiles,
return(totalLength);
}
+static Boolean RPSUpdateOffsets(LookupTable *lookup)
+{
+ Uint4 len;
+ Int4 index;
+ Int4 num_used;
+ Int4 offset_diff;
+
+ len = lookup->backbone_size;
+ offset_diff = lookup->wordsize - 1;
+
+ /* Walk through table, copying info into mod_lt[] */
+ for(index = 0; index < len; index++) {
+
+ if((num_used=lookup->thick_backbone[index].num_used) <= 3)
+ {
+ while (num_used > 0)
+ {
+ num_used--;
+ lookup->thick_backbone[index].payload.entries[num_used] += offset_diff;
+ }
+ }
+ else
+ {
+ while (num_used > 0)
+ {
+ num_used--;
+ lookup->overflow [ lookup->thick_backbone[index].payload.overflow_cursor + num_used] += offset_diff;
+ }
+ }
+ }
+ return TRUE;
+}
+
+
/* #define RPS_THRESHOLD 11 */
/* #define RPS_WORDSIZE 3 */
@@ -341,50 +420,44 @@ static Int4 findTotalLength(FILE *matrixAuxiliaryFile, Int4 numProfiles,
pointers relative to the start of "mod_lookup_table_memory" chunk
RPS Blast will calculate real pointers in run time using these values
*/
-Boolean RPSUpdatePointers(LookupTablePtr lookup)
+Boolean RPSUpdatePointers(LookupTable *lookup, Uint4 *new_overflow, Uint4 *new_overflow_size)
{
- ModLAEntry * mod_lt;
Uint4 len;
Int4 index;
- ModLookupPositionPtr start_address;
+ Uint4 *start_address;
long mlpp_address;
- ModLookupPositionPtr *lpp;
+ Uint4 *new_overflow_cursor;
+ Int4 *src;
+ Int4 first_hit;
+
+ len = lookup->backbone_size;
- len = lookup->array_size;
- mod_lt=lookup->mod_lt;
- start_address = (ModLookupPositionPtr) lookup->mod_lookup_table_memory;
+ start_address = new_overflow_cursor = new_overflow;
/* Walk through table, copying info into mod_lt[] */
for(index = 0; index < len; index++) {
- if(mod_lt[index].num_used <= 3)
+ if(lookup->thick_backbone[index].num_used <= 3)
continue;
-#if 1
+ src = &(lookup->overflow[lookup->thick_backbone[index].payload.overflow_cursor]);
+ MemCpy(new_overflow_cursor, &src[1], sizeof(Uint4)*(lookup->thick_backbone[index].num_used-1));
+
+ mlpp_address = (long) new_overflow_cursor;
+
+ new_overflow_cursor += lookup->thick_backbone[index].num_used-1;
+ first_hit = src[0];
- /* Taking pointer to 4/8 bytes address */
- lpp= (ModLookupPositionPtr *) &mod_lt[index].entries[1];
-
- mlpp_address = (long) *lpp;
mlpp_address -= (long) start_address;
/* Now this is new relative address - usually small */
- *lpp = (ModLookupPositionPtr) mlpp_address;
-
-#if defined(OS_UNIX_IRIX)
- if(sizeof(ModLookupPositionPtr) == 8) { /* 64bit build */
- mlpp_address = mod_lt[index].entries[1];
- mod_lt[index].entries[1] = mod_lt[index].entries[2];
- mod_lt[index].entries[2] = mlpp_address;
- }
-#endif
-
-#else
- mod_lt[index].entries[1] -= (int) start_address;
- mod_lt[index].entries[2] = 0; /* Not used */
-#endif
+ lookup->thick_backbone[index].payload.entries[1] = (Int4) mlpp_address;
+ lookup->thick_backbone[index].payload.entries[0] = first_hit;
}
+
+ *new_overflow_size = new_overflow_cursor - new_overflow;
+
return TRUE;
}
@@ -392,17 +465,24 @@ Boolean RPSUpdatePointers(LookupTablePtr lookup)
Write lookup table to the disk into file "*.loo", which will be
used memory-mapped during RPS Blast search
*/
-Boolean RPSDumpLookupTable(LookupTablePtr lookup, FILE *fd)
+Boolean RPSDumpLookupTable(LookupTable *lookup, FILE *fd)
{
+ Uint4 *new_overflow;
+ Uint4 new_overflow_size;
- RPSUpdatePointers(lookup);
+ RPSUpdateOffsets(lookup);
- FileWrite(lookup->mod_lt, sizeof(ModLAEntry), 1+lookup->array_size, fd);
- if(lookup->mod_lookup_table_size) {
- FileWrite(lookup->mod_lookup_table_memory,
- lookup->mod_lookup_table_size,
- sizeof(ModLookupPosition), fd);
- }
+ new_overflow = malloc(lookup->overflow_size*sizeof(Uint4));
+ RPSUpdatePointers(lookup, new_overflow, &new_overflow_size);
+
+ FileWrite(lookup->thick_backbone, sizeof(LookupBackboneCell), lookup->backbone_size, fd);
+ if(new_overflow_size)
+ FileWrite(new_overflow,
+ sizeof(Uint4),
+ new_overflow_size,
+ fd);
+
+ sfree(new_overflow);
return TRUE;
}
@@ -415,25 +495,63 @@ Boolean RPSCreateLookupFile(ScoreRow *combinedMatrix, Int4 numProfiles,
Int4Ptr seqlens, CharPtr filename,
Nlm_FloatHi scalingFactor)
{
- LookupTablePtr lookup;
- BlastAllWordPtr all_words;
- Int4 start, i, header_size, all_length, magicNumber;
+ BlastScoreBlk *sbp;
+ DoubleInt *double_int;
FILE *fd;
+ Int4 **posMatrix;
+ Int4 start, i, header_size, all_length, magicNumber;
Int4Ptr offsets;
- BLAST_ScorePtr PNTR posMatrix;
Int4 num_lookups;
+ ListNode *lookup_segment=NULL;
+ LookupTable *lookup;
+ LookupTableWrap* lookup_wrap_ptr=NULL;
+ LookupTableOptions* lookup_options;
+
if((fd = FileOpen(filename, "wb")) == NULL)
return FALSE;
num_lookups = 1; /* Single lookup table for all set */
- lookup = lookup_new(PRO_ALPHABET_SIZE, 3, 0);
- all_words = BlastPopulateAllWordArrays(3, PRO_ALPHABET_SIZE);
+ all_length = seqlens[numProfiles] - seqlens[0];
+
+ posMatrix = MemNew((all_length + 1) * sizeof(Int4 *));
+ for (i = 0; i < all_length; i++) {
+ posMatrix[i] = (Int4 *) &(combinedMatrix[i][0]);
+ }
+
+ /* Last row is necessary */
+ posMatrix[all_length] = MemNew(sizeof(Int4) * PRO_ALPHABET_SIZE);
+
+ for(i = 0; i < PRO_ALPHABET_SIZE; i++) {
+ posMatrix[all_length][i] = -INT2_MAX;
+ }
+
+ sbp = BlastScoreBlkNew(BLASTAA_SEQ_CODE, 1);
+ sbp->posMatrix = posMatrix;
+ LookupTableOptionsNew(blast_type_blastp, &lookup_options);
+ BLAST_FillLookupTableOptions(lookup_options, blast_type_blastp, FALSE,
+ (Int4) (myargs[3].floatvalue*scalingFactor), myargs[4].intvalue, FALSE, FALSE, TRUE); /* add last arg for psi-blast?? */
+
+
+ double_int = (DoubleInt*) calloc(1, sizeof(DoubleInt));
+ double_int->i1 = 0;
+ double_int->i2 = all_length;
+
+ ListNodeAddPointer(&lookup_segment, 0, double_int);
+
+ /* Need query for psi-blast?? where to put the PSSM? */
+ LookupTableWrapInit(NULL, lookup_options, lookup_segment, sbp, &lookup_wrap_ptr);
+
+ sbp->posMatrix = NULL;
+ sbp = BlastScoreBlkFree(sbp);
+ lookup_options = LookupTableOptionsFree(lookup_options);
+ lookup_segment = ListNodeFreeData(lookup_segment);
+
+ lookup = (LookupTable*) lookup_wrap_ptr->lut;
/* Only Uint4 maximum length for lookup file allowed in current
implementation */
-
header_size = (numProfiles+1)*sizeof(Int4) + 8*sizeof(Int4);
/* Beginning of file will be allocated for lookup offsets */
@@ -441,32 +559,11 @@ Boolean RPSCreateLookupFile(ScoreRow *combinedMatrix, Int4 numProfiles,
offsets = MemNew(sizeof(Int4) * (num_lookups + 1));
- all_length = seqlens[numProfiles] - seqlens[0];
-
- posMatrix = MemNew((all_length + 1) * sizeof(BLAST_Score *));
- for (i = 0; i < all_length; i++) {
- posMatrix[i] = (BLAST_Score *) &(combinedMatrix[i][0]);
- }
-
- /* Last row is necessary */
- posMatrix[all_length] = MemNew(sizeof(BLAST_Score) * PRO_ALPHABET_SIZE);
- for(i = 0; i < PRO_ALPHABET_SIZE; i++) {
- posMatrix[all_length][i] = -INT2_MAX;
- }
offsets[0] = ftell(fd);
start = seqlens[0]; /* 0 */
- if(BlastNewFindWordsEx(lookup, &posMatrix[start], 0, all_length,
- all_words, (Int4) (myargs[3].floatvalue*scalingFactor),
- myargs[4].intvalue, 0) < 0) {
- ErrPostEx(SEV_ERROR, 0,0, "Failure to create llokup table");
- return FALSE;
- }
-
- lookup_position_aux_destruct(lookup);
-
RPSDumpLookupTable(lookup, fd);
i = 1;
@@ -477,9 +574,9 @@ Boolean RPSCreateLookupFile(ScoreRow *combinedMatrix, Int4 numProfiles,
magicNumber = RPS_MAGIC_NUMBER;
FileWrite(&magicNumber, sizeof(Int4), 1, fd); /* header[0] */
FileWrite(&num_lookups, sizeof(Int4), 1, fd); /* header[1] */
- FileWrite(&lookup->num_pos_added, sizeof(Int4), 1, fd); /* header[2] */
- FileWrite(&lookup->num_unique_pos_added, sizeof(Int4), 1, fd); /* header[3] */
- FileWrite(&lookup->mod_lookup_table_size, sizeof(Int4), 1, fd); /* header[4] */
+ FileWrite(&lookup->neighbor_matches, sizeof(Int4), 1, fd); /* header[2] */
+ FileWrite(&lookup->neighbor_matches, sizeof(Int4), 1, fd); /* header[3] */
+ FileWrite(&lookup->overflow_size, sizeof(Int4), 1, fd); /* header[4] */
/* Now writing recorded offsets in the beginning of the file */
@@ -487,27 +584,14 @@ Boolean RPSCreateLookupFile(ScoreRow *combinedMatrix, Int4 numProfiles,
FileWrite(offsets, sizeof(Int4), num_lookups + 1, fd);
FileClose(fd);
-/* comment out for now, why is this here?
- if (scalingFactor != 1.0) {
- for(j = 0; j < all_length; j++) {
- for(i = 0; i < PRO_ALPHABET_SIZE; i++) {
- combinedMatrix[j][i] /= scalingFactor;
- }
- }
- }
-*/
-
/* Final memory cleenup */
- lookup = lookup_destruct(lookup);
-
MemFree(posMatrix[all_length]);
MemFree(posMatrix);
- BlastAllWordDestruct(all_words);
-
return TRUE;
}
+
/* -- SSH --
Create file <database_name> (without extention), which is concatenation
of all FASTA files used. Used by RPS Blast.
diff --git a/demo/debruijn.c b/demo/debruijn.c
index 732745b0..c287351c 100644
--- a/demo/debruijn.c
+++ b/demo/debruijn.c
@@ -1,72 +1,107 @@
-#include <stdio.h>
-#include <util.h>
+/* $Id: debruijn.c,v 1.3 2004/01/05 20:44:55 coulouri Exp $
+
+* ===========================================================================
+*
+* PUBLIC DOMAIN NOTICE
+* National Center for Biotechnology Information
+*
+* This software/database is a "United States Government Work" under the
+* terms of the United States Copyright Act. It was written as part of
+* the author's offical duties as a United States Government employee and
+* thus cannot be copyrighted. This software/database is freely available
+* to the public for use. The National Library of Medicine and the U.S.
+* Government have not placed any restriction on its use or reproduction.
+*
+* Although all reasonable efforts have been taken to ensure the accuracy
+* and reliability of the software and data, the NLM and the U.S.
+* Government do not and cannot warrant the performance or results that
+* may be obtained by using this software or data. The NLM and the U.S.
+* Government disclaim all warranties, express or implied, including
+* warranties of performance, merchantability or fitness for any particular
+* purpose.
+*
+* Please cite the author in any work or product based on this material.
+*
+* ===========================================================================
+
+*/
+
+static char const rcsid[] = "$Id: debruijn.c,v 1.3 2004/01/05 20:44:55 coulouri Exp $";
/*
* example driver for de Bruijn sequences.
*
* this code generates all n-mers over a protein
- * or dna alphabet. useful for creating fasta test sequences.
+ * or dna alphabet. useful for creating test sequences.
*/
-/* k = 22 */
-char proteinalphabet[] = "arndcqeghilkmfpstwyvbz";
+#include <ncbi.h>
+#include <algo/blast/core/lookup_util.h>
+
+static Args myargs[] = {
+ { "word size",
+ NULL, NULL, NULL, FALSE, 'n', ARG_INT, 0.0, 0, NULL },
+ { "alphabet\n"
+ "(supply 'ncbistdaa' or 'ncbi2na' for standard\n"
+ "alphabets, or supply your own alphabet)\n",
+ NULL, NULL, NULL, FALSE, 'a', ARG_STRING, 0.0, 0, NULL },
+};
-/* k = 4 */
-char dnaalphabet[] = "acgt";
+Uint1 ncbistdaa[] = "-abcdefghiklmnpqrstvwxyzu*";
+Uint1 ncbi2na[] = "acgt";
-int main(int argc, char *argv[])
+Int2 Main(void)
{
- int i;
- int n, k;
- char *output;
- int outputsize;
- char *alphabet;
+ Int4 i;
+ Int4 n, k;
+ Uint1 *output;
+ Int4 outputsize;
+ Uint1 *alphabet=NULL;
+
+ if ( ! GetArgs("debruijn", sizeof(myargs)/sizeof(myargs[0]), myargs) )
+ return 1;
+
+ n = myargs[0].intvalue;
- if (argc != 4)
+ if (n < 1)
{
- fprintf(stderr, "usage: %s n k prot-or-dna\n", argv[0]);
- fprintf(stderr, "where prot-or-dna = 0 for protein, 1 for dna\n");
- fprintf(stderr, "example: %s 3 22 0\n",argv[0]);
- fprintf(stderr, "example: %s 11 4 1\n",argv[0]);
- exit(1);
+ fprintf(stderr,"n must be greater than one.\n");
+ return 1;
}
-
- n = atoi(argv[1]);
- k = atoi(argv[2]);
-
- if (atoi(argv[3]) == 0)
- alphabet = proteinalphabet;
- else
- alphabet = dnaalphabet;
+ alphabet = myargs[1].strvalue;
+
+ if (strcmp("ncbistdaa", myargs[1].strvalue) == 0)
+ alphabet = ncbistdaa;
+
+ if (strcmp("ncbi2na", myargs[1].strvalue) == 0)
+ alphabet = ncbi2na;
+
+ k = strlen(alphabet);
+
/* output array needs:
* k^n bytes - to store the de Bruijn sequence
* n-1 bytes - to unwrap (see below)
* 1 byte - for the terminating NUL
*/
- outputsize = iexp(k,n) + (n-1) + 1;
- output = (char *) calloc( outputsize, sizeof(char));
+ outputsize = iexp(k,n) + (n-1);
+ output = (char *) malloc(outputsize + 1);
/* compute the (n,k) de Bruijn sequence */
debruijn(n,k,output,alphabet);
- /* but, we don't want a true cyclical de Bruijn sequence, we want
- * all words in a straight line. so we copy the first n-1 letters
+ /* We don't want a true cyclical de Bruijn sequence; we want
+ * all words in a straight line- copy the first n-1 letters
* to the end.
*/
for(i=0;i<(n-1);i++)
- output[outputsize-((n-1)+1)+i] = output[i];
-
- /* don't forget to NUL-terminate it */
+ output[outputsize-n+1+i] = output[i];
- output[outputsize-1] = '\0';
+ /* Terminate the string. */
- fprintf(stderr,"n (word size) = %d\n",n);
- fprintf(stderr,"k (alphabet size) = %d\n",k);
- fprintf(stderr,"output size = k^n + (n-1) + 1 = %d + %d + 1 = %d\n",iexp(k,n),n-1,outputsize);
- fprintf(stderr,"naive size would have been %d\n",n * iexp(k,n));
+ output[outputsize+1] = '\0';
puts(output);
diff --git a/demo/formatdb.c b/demo/formatdb.c
index 37e7a8ed..9a2143fe 100644
--- a/demo/formatdb.c
+++ b/demo/formatdb.c
@@ -1,4 +1,4 @@
-static char const rcsid[] = "$Id: formatdb.c,v 6.91 2003/10/01 18:59:56 camacho Exp $";
+static char const rcsid[] = "$Id: formatdb.c,v 6.92 2004/01/29 14:56:44 camacho Exp $";
/*****************************************************************************
@@ -32,11 +32,14 @@ static char const rcsid[] = "$Id: formatdb.c,v 6.91 2003/10/01 18:59:56 camacho
Version Creation Date: 10/01/96
- $Revision: 6.91 $
+ $Revision: 6.92 $
File Description: formats FASTA databases for use by BLAST
$Log: formatdb.c,v $
+ Revision 6.92 2004/01/29 14:56:44 camacho
+ Removed -A option, FORMATDB_VER_TEXT no longer supported
+
Revision 6.91 2003/10/01 18:59:56 camacho
Fix to creation of custom databases using a gi list and alias files when the
source database spans multiple volumes.
@@ -418,8 +421,6 @@ Args dump_args[] = {
"F", NULL, NULL, TRUE, 's', ARG_BOOLEAN, 0.0, 0, NULL},
{ "Verbose: check for non-unique string ids in the database",
"F", NULL, NULL, TRUE, 'V', ARG_BOOLEAN, 0.0, 0, NULL},
- { "Create ASN.1 structured deflines",
- "T", NULL, NULL, TRUE, 'A', ARG_BOOLEAN, 0.0, 0, NULL},
{ "Create an alias file with this name\n"
" use the gifile arg (below) if set to calculate db size\n"
" use the BLAST db specified with -i (above)",
@@ -452,7 +453,6 @@ enum {
dbsize_arg,
sparse_arg,
nonunique_arg,
- asn1_deflines_arg,
alias_fn_arg,
gifile_arg,
bin_gifile_arg,
@@ -669,9 +669,7 @@ Int2 Main(void)
dump_args[basename_arg].strvalue,
dump_args[alias_fn_arg].strvalue,
((Int8)dump_args[dbsize_arg].intvalue)*1000000, 0,
- dump_args[asn1_deflines_arg].intvalue ?
- FORMATDB_VER : FORMATDB_VER_TEXT ,
- FALSE, 0);
+ FORMATDB_VER, FALSE, 0);
if (options == NULL)
return 1;
diff --git a/demo/megablast.c b/demo/megablast.c
index 2fa5beb6..7db38590 100644
--- a/demo/megablast.c
+++ b/demo/megablast.c
@@ -1,6 +1,6 @@
-static char const rcsid[] = "$Id: megablast.c,v 6.107 2003/05/30 17:31:09 coulouri Exp $";
+static char const rcsid[] = "$Id: megablast.c,v 6.111 2004/01/27 20:47:18 dondosha Exp $";
-/* $Id: megablast.c,v 6.107 2003/05/30 17:31:09 coulouri Exp $
+/* $Id: megablast.c,v 6.111 2004/01/27 20:47:18 dondosha Exp $
**************************************************************************
* *
* COPYRIGHT NOTICE *
@@ -28,6 +28,18 @@ static char const rcsid[] = "$Id: megablast.c,v 6.107 2003/05/30 17:31:09 coulou
**************************************************************************
* $Revision 6.13$ *
* $Log: megablast.c,v $
+ * Revision 6.111 2004/01/27 20:47:18 dondosha
+ * Set no_traceback option to 2 for -D4, 1 for -D0, 0 for others
+ *
+ * Revision 6.110 2003/11/05 22:28:06 dondosha
+ * No need to shift subsequence coordinates in tabular output, since they are already shifted in the seqalign
+ *
+ * Revision 6.109 2003/10/30 17:30:49 dondosha
+ * Fixed error in previous commit - output file pointer has to be set for all cases
+ *
+ * Revision 6.108 2003/10/29 17:48:15 dondosha
+ * Added -D4 option for 2-stage greedy gapped extension with format identical to -D2
+ *
* Revision 6.107 2003/05/30 17:31:09 coulouri
* add rcsid
*
@@ -396,7 +408,8 @@ enum {
MBLAST_ENDPOINTS = 0,
MBLAST_SEGMENTS,
MBLAST_ALIGNMENTS,
- MBLAST_ALIGN_INFO
+ MBLAST_ALIGN_INFO,
+ MBLAST_DELAYED_TRACEBACK
};
static int LIBCALLBACK
@@ -912,6 +925,7 @@ Int2 Main (void)
Boolean lcase_masking;
BlastOutputPtr boutp = NULL;
MBXmlPtr mbxp = NULL;
+ Boolean traditional_formatting;
StringCpy(buf, "megablast ");
StringNCat(buf, BlastGetVersionNumber(), sizeof(buf)-StringLen(buf)-1);
@@ -947,14 +961,17 @@ Int2 Main (void)
}
align_type = BlastGetTypes(blast_program, &query_is_na, &db_is_na);
-
+ traditional_formatting =
+ (myargs[12].intvalue == MBLAST_ALIGNMENTS ||
+ myargs[12].intvalue == MBLAST_DELAYED_TRACEBACK);
+
if (myargs[15].strvalue) {
if (myargs[15].strvalue[0] == 'f' || myargs[15].strvalue[0] == 'F' ||
myargs[15].strvalue[0] == '0')
believe_query = FALSE;
else
believe_query = TRUE;
- } else if (myargs[12].intvalue == MBLAST_ALIGNMENTS &&
+ } else if (traditional_formatting &&
!myargs[14].strvalue)
believe_query = FALSE;
else
@@ -1058,9 +1075,11 @@ Int2 Main (void)
options->gifile = StringSave(myargs[22].strvalue);
if (myargs[12].intvalue == MBLAST_ENDPOINTS)
- options->no_traceback = TRUE;
+ options->no_traceback = 1;
+ else if (myargs[12].intvalue == MBLAST_DELAYED_TRACEBACK)
+ options->no_traceback = 2;
else
- options->no_traceback = FALSE;
+ options->no_traceback = 0;
options->megablast_full_deflines = (Boolean) myargs[27].intvalue;
options->perc_identity = (FloatLo) myargs[30].floatvalue;
@@ -1077,10 +1096,9 @@ Int2 Main (void)
StrCpy(prefix, "");
global_fp = outfp;
- if (myargs[12].intvalue != MBLAST_ALIGNMENTS)
- options->output = outfp;
+ options->output = outfp;
- if (myargs[12].intvalue==MBLAST_ALIGNMENTS) {
+ if (traditional_formatting) {
if (align_view < 7) {
if (html) {
fprintf(outfp, "<HTML>\n<TITLE>MEGABLAST Search Results</TITLE>\n");
@@ -1233,7 +1251,7 @@ Int2 Main (void)
}
- if (myargs[12].intvalue==MBLAST_ALIGNMENTS) {
+ if (traditional_formatting) {
dbinfo = NULL;
ka_params = NULL;
ka_params_gap = NULL;
@@ -1333,7 +1351,7 @@ Int2 Main (void)
BlastPrintTabulatedResults(seqalign,
query_bsp_array[index], NULL, number_of_alignments,
blast_program, !options->gapped_calculation,
- believe_query, options->required_start, 0,
+ believe_query, 0, 0,
global_fp, (align_view == 9));
SeqAlignSetFree(seqalign);
diff --git a/demo/rtestval.c b/demo/rtestval.c
index 6752bbe7..1a3bd194 100644
--- a/demo/rtestval.c
+++ b/demo/rtestval.c
@@ -5,7 +5,7 @@
* check for stop codons
* Check for and fix non 3.0 asn spec things
*
-* $Id: rtestval.c,v 1.10 2003/05/13 16:02:42 coulouri Exp $
+* $Id: rtestval.c,v 1.11 2003/11/14 18:07:54 kans Exp $
*
*****************************************************************************/
#include <accid1.h>
@@ -158,6 +158,10 @@ Int2 Main(void)
vsp->cutoff = (Int2)(myargs[10].intvalue);
vsp->useSeqMgrIndexes = (Boolean)(myargs[12].intvalue); /* indexed validate */
vsp->validateAlignments = (Boolean)(myargs[13].intvalue);
+ if (vsp->validateAlignments) {
+ vsp->alignFindRemoteBsp = TRUE;
+ vsp->doSeqHistAssembly = TRUE;
+ }
vsp->farIDsInAlignments = (Boolean)(myargs[13].intvalue);
vsp->alwaysRequireIsoJTA = (Boolean)(myargs[14].intvalue);
vsp->farFetchCDSproducts = (Boolean) (myargs[15].intvalue && myargs[16].intvalue);
diff --git a/demo/tbl2asn.c b/demo/tbl2asn.c
index 1b36b887..a969f40e 100644
--- a/demo/tbl2asn.c
+++ b/demo/tbl2asn.c
@@ -29,7 +29,7 @@
*
* Version Creation Date: 5/5/00
*
-* $Revision: 6.71 $
+* $Revision: 6.79 $
*
* File Description:
*
@@ -61,7 +61,7 @@
#include <simple.h>
#include <aliparse.h>
-#define TBL2ASN_APP_VER "2.5"
+#define TBL2ASN_APP_VER "2.8"
CharPtr TBL2ASN_APPLICATION = TBL2ASN_APP_VER;
@@ -162,6 +162,7 @@ static void ValidateOneFile (
if (vsp != NULL) {
vsp->useSeqMgrIndexes = TRUE;
vsp->suppressContext = TRUE;
+ vsp->seqSubmitParent = TRUE;
oldErrSev = ErrSetMessageLevel (SEV_NONE);
ValidateSeqEntry (sep, vsp);
ValidStructFree (vsp);
@@ -552,6 +553,16 @@ static OrgStuff commonOrgStuff [] = {
"PLN", 1, 1, 39947
},
{
+ "Aspergillus nidulans FGSC A4", "",
+ "Eukaryota; Fungi; Ascomycota; Pezizomycotina; Eurotiomycetes; Eurotiales; Trichocomaceae; Emericella",
+ "PLN", 1, 4, 227321
+ },
+ {
+ "environmental sequence", "",
+ "unclassified; environmental samples",
+ "UNA", 1, 2, 256318
+ },
+ {
NULL, NULL, NULL, NULL, 0, 0, 0
}
};
@@ -778,42 +789,62 @@ static BioseqPtr AttachSeqAnnotEntity (Uint2 entityID, SeqAnnotPtr sap, Boolean
return bsp;
}
-static CharPtr TrimBracketsFromString (CharPtr str)
+static CharPtr TrimBracketsFromString (CharPtr str, SqnTagPtr stp)
{
Uchar ch; /* to use 8bit characters in multibyte languages */
+ Int2 count;
CharPtr dst;
CharPtr ptr;
- if (StringHasNoText (str)) return str;
+ if (StringHasNoText (str) || stp == NULL) return str;
/* remove bracketed fields */
+ count = 0;
dst = str;
ptr = str;
ch = *ptr;
while (ch != '\0') {
if (ch == '[') {
- ptr++;
- ch = *ptr;
- while (ch != '\0' && ch != ']' && ch != '"') {
+ if (count < stp->num_tags && (! stp->used [count])) {
+ *dst = ch;
+ dst++;
ptr++;
ch = *ptr;
- }
- if (ch == '"') {
+ while (ch != '\0' && ch != ']') {
+ *dst = ch;
+ dst++;
+ ptr++;
+ ch = *ptr;
+ }
+ *dst = ch;
+ dst++;
+ ptr++;
+ ch = *ptr;
+ } else {
ptr++;
ch = *ptr;
- while (ch != '\0' && ch != '"') {
+ while (ch != '\0' && ch != ']' && ch != '"') {
+ ptr++;
+ ch = *ptr;
+ }
+ if (ch == '"') {
+ ptr++;
+ ch = *ptr;
+ while (ch != '\0' && ch != '"') {
+ ptr++;
+ ch = *ptr;
+ }
+ }
+ while (ch != '\0' && ch != ']') {
ptr++;
ch = *ptr;
}
- }
- while (ch != '\0' && ch != ']') {
ptr++;
ch = *ptr;
}
- ptr++;
- ch = *ptr;
+ count++;
} else {
*dst = ch;
dst++;
@@ -1037,16 +1068,16 @@ static void ProcessOneNuc (
}
}
- if (stp != NULL) {
- SqnTagFree (stp);
- }
-
- TrimBracketsFromString (ttl);
+ TrimBracketsFromString (ttl, stp);
if (! StringHasNoText (ttl)) {
str = StringSave (ttl);
SeqDescrAddPointer (&(bsp->descr), Seq_descr_title, (Pointer) str);
}
+ if (stp != NULL) {
+ SqnTagFree (stp);
+ }
+
ValNodeFreeData (vnp);
}
@@ -1179,6 +1210,60 @@ static void ReplaceOnePeptide (
MemFree (str2);
}
+static void ReplaceOneRNA (
+ SimpleSeqPtr ssp,
+ Boolean conflict
+)
+
+{
+ ByteStorePtr bs;
+ BioseqPtr bsp;
+ SeqIdPtr sip;
+ CharPtr str1, str2;
+
+ if (ssp == NULL || ssp->numid < 1) return;
+
+ sip = MakeSeqID (ssp->id [0]);
+ bsp = BioseqFind (sip);
+ SeqIdFree (sip);
+ if (bsp == NULL || bsp->repr != Seq_repr_raw) return;
+
+ /* remove trailing X and * */
+
+ bs = ssp->seq;
+ ssp->seqlen = BSLen (bs);
+
+ str1 = BSMerge (ssp->seq, NULL);
+ str2 = GetSequenceByBsp (bsp);
+
+ if (StringCmp (str1, str2) != 0) {
+
+ /* swap sequence byte stores */
+
+ bs = bsp->seq_data;
+ bsp->seq_data = ssp->seq;
+ ssp->seq = bs;
+ bsp->length = BSLen (bsp->seq_data);
+ bsp->seq_data_type = Seq_code_iupacna;
+
+ /*
+ mrna = SeqMgrGetRNAgivenProduct (bsp, NULL);
+ if (mrna != NULL) {
+
+ if (conflict) {
+ mrna->excpt = TRUE;
+ if (StringHasNoText (mrna->except_text)) {
+ mrna->except_text = StringSave ("RNA editing");
+ }
+ }
+ }
+ */
+ }
+
+ MemFree (str1);
+ MemFree (str2);
+}
+
static Uint2 ProcessOneAsn (
FILE* fp,
BioSourcePtr src,
@@ -2128,6 +2213,29 @@ static void ProcessOneRecord (
FileClose (fp);
}
+ /* read one or more feature tables from .rna file */
+
+ fp = OpenOneFile (directory, base, ".rna");
+ if (fp != NULL) {
+
+ /* indexing needed to find mRNA from transcript product to set RNA editing exception */
+
+ SeqMgrIndexFeatures (entityID, NULL);
+
+ while ((dataptr = ReadAsnFastaOrFlatFile (fp, &datatype, NULL, FALSE, TRUE, TRUE, TRUE)) != NULL) {
+ if (datatype == OBJ_FASTA) {
+
+ ssp = (SimpleSeqPtr) dataptr;
+ ReplaceOneRNA (ssp, tbl->conflict);
+ SimpleSeqFree (ssp);
+
+ } else {
+ ObjMgrFree (datatype, dataptr);
+ }
+ }
+ FileClose (fp);
+ }
+
/* read one or more quality score blocks from .qvl file */
fp = OpenOneFile (directory, base, ".qvl");
@@ -2222,7 +2330,11 @@ static void ProcessOneRecord (
if (! tbl->genprodset) {
VisitFeaturesInSep (sep, NULL, ClearRnaProducts);
}
- InstantiateProteinTitles (entityID, NULL);
+ if (SeqMgrFeaturesAreIndexed (entityID)) {
+ InstantiateProteinTitles (entityID, NULL);
+ } else {
+ Message (MSG_POSTERR, "Unable to instantiate protein titles due to dropped index");
+ }
if (tbl->genprodset) {
/* need to reindex before instantiating mRNA titles */
SeqMgrIndexFeatures (entityID, NULL);