summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--VERSION2
-rw-r--r--access/ent2api.c6
-rw-r--r--algo/blast/core/blast_extend.c13
-rw-r--r--algo/blast/core/blast_extend.h4
-rw-r--r--algo/blast/core/na_ungapped.c251
-rw-r--r--api/asn2gnb3.c102
-rw-r--r--api/asn2gnb4.c92
-rw-r--r--api/asn2gnb5.c4
-rw-r--r--api/asn2gnb6.c5
-rw-r--r--api/asn2gnbi.h3
-rw-r--r--api/gbftdef.h3
-rw-r--r--api/gbftglob.c3
-rwxr-xr-xapi/macroapi.c158
-rw-r--r--api/seqport.c352
-rw-r--r--api/seqport.h240
-rw-r--r--api/sequtil.c9
-rw-r--r--api/sqnutil1.c64
-rw-r--r--api/sqnutil2.c15
-rw-r--r--api/sqnutil3.c157
-rwxr-xr-xapi/sqnutil4.c179
-rw-r--r--api/sqnutils.h11
-rw-r--r--api/valid.c26
-rw-r--r--checkout.date2
-rw-r--r--connect/ncbi_connection.c17
-rw-r--r--connect/ncbi_socket.c53
-rw-r--r--data/institution_codes.txt29
-rw-r--r--demo/asn2all.c4
-rw-r--r--demo/asn2gb.c4
-rw-r--r--demo/asnval.c6
-rw-r--r--demo/scantest.c36
-rwxr-xr-xdemo/src_chk.c1185
-rw-r--r--demo/tbl2asn.c18
-rw-r--r--desktop/pubdesc.c5
-rw-r--r--doc/man/Psequin.156
-rw-r--r--doc/man/asn2all.120
-rw-r--r--doc/man/asn2fsa.18
-rw-r--r--doc/man/asn2gb.116
-rw-r--r--doc/man/asn2idx.14
-rw-r--r--doc/man/asnval.120
-rw-r--r--doc/man/blast.1175
-rw-r--r--doc/man/cleanasn.192
-rw-r--r--doc/man/fa2htgs.18
-rw-r--r--doc/man/fastacmd.18
-rw-r--r--doc/man/formatdb.14
-rw-r--r--doc/man/formatrpsdb.12
-rw-r--r--doc/man/gene2xml.114
-rw-r--r--doc/man/idfetch.14
-rw-r--r--doc/man/nps2gps.110
-rw-r--r--doc/man/spidey.18
-rw-r--r--doc/man/subfuse.130
-rw-r--r--doc/man/taxblast.134
-rw-r--r--doc/man/tbl2asn.1176
-rw-r--r--doc/man/trna2sap.12
-rw-r--r--make/makenet.unx9
-rw-r--r--make/xCode/ncbictoolkit/ncbictoolkit.xcodeproj/project.pbxproj12
-rw-r--r--sequin/sequin.h4
-rw-r--r--sequin/sequin1.c198
-rw-r--r--sequin/sequin3.c36
-rw-r--r--sequin/sequin5.c50
-rw-r--r--sequin/sequin7.c59
-rw-r--r--sequin/sequin8.c49
61 files changed, 2606 insertions, 1560 deletions
diff --git a/VERSION b/VERSION
index c21b993b..84dcfd0b 100644
--- a/VERSION
+++ b/VERSION
@@ -1 +1 @@
-Sun Jul 19 10:14:04 EDT 2009
+Sun Aug 9 10:13:45 EDT 2009
diff --git a/access/ent2api.c b/access/ent2api.c
index d8e2165e..7c5125ff 100644
--- a/access/ent2api.c
+++ b/access/ent2api.c
@@ -29,7 +29,7 @@
*
* Version Creation Date: 7/29/99
*
-* $Revision: 1.113 $
+* $Revision: 1.114 $
*
* File Description:
*
@@ -1079,7 +1079,8 @@ NLM_EXTERN Boolean ValidateEntrez2InfoPtrEx (
StringICmp (db, "nuccore") != 0 &&
StringICmp (db, "nucgss") != 0 &&
StringICmp (db, "nucest") != 0 &&
- StringICmp (db, "toolkit") != 0) {
+ StringICmp (db, "toolkit") != 0 &&
+ StringICmp (db, "blastdbinfo") != 0) {
sprintf (buf, "Database %s has no links", db);
ValNodeCopyStr (head, 0, buf);
rsult = FALSE;
@@ -1350,6 +1351,7 @@ NLM_EXTERN Boolean ValidateEntrez2InfoPtrEx (
} else if (StringICmp (last, "Comment") == 0 && StringICmp (str, "Comments") == 0) {
} else if (StringICmp (last, "SID") == 0 && StringICmp (str, "SidExternalID") == 0) {
} else if (StringICmp (last, "Platform") == 0 && StringICmp (str, "Platform Reporter Type") == 0) {
+ } else if (StringICmp (last, "Database") == 0 && StringICmp (str, "Database Name") == 0) {
} else {
sprintf (buf, "Menu names %s [%s] and %s [%s] may be unintended variants", last, dbnames [lastvnp->choice], str, dbnames [vnp->choice]);
ValNodeCopyStr (head, 0, buf);
diff --git a/algo/blast/core/blast_extend.c b/algo/blast/core/blast_extend.c
index dcfd895c..6c09e454 100644
--- a/algo/blast/core/blast_extend.c
+++ b/algo/blast/core/blast_extend.c
@@ -1,4 +1,4 @@
-/* $Id: blast_extend.c,v 1.118 2009/01/05 16:54:38 kazimird Exp $
+/* $Id: blast_extend.c,v 1.119 2009/07/30 19:34:30 kazimird Exp $
* ===========================================================================
*
* PUBLIC DOMAIN NOTICE
@@ -30,7 +30,7 @@
#ifndef SKIP_DOXYGEN_PROCESSING
static char const rcsid[] =
- "$Id: blast_extend.c,v 1.118 2009/01/05 16:54:38 kazimird Exp $";
+ "$Id: blast_extend.c,v 1.119 2009/07/30 19:34:30 kazimird Exp $";
#endif /* SKIP_DOXYGEN_PROCESSING */
#include <algo/blast/core/blast_extend.h>
@@ -80,7 +80,7 @@ s_BlastDiagTableFree(BLAST_DiagTable* diag_table)
{
if (diag_table) {
sfree(diag_table->hit_level_array);
-
+ sfree(diag_table->hit_len_array);
sfree(diag_table);
}
return NULL;
@@ -106,6 +106,7 @@ static Int4 s_BlastDiagClear(BLAST_DiagTable * diag)
for (i = 0; i < n; i++) {
diag_struct_array[i].flag = 0;
diag_struct_array[i].last_hit = -diag->window;
+ if (diag->hit_len_array) diag->hit_len_array[i] = 0;
}
return 0;
}
@@ -148,6 +149,10 @@ Int2 BlastExtendWordNew(Uint4 query_length,
diag_table->hit_level_array = (DiagStruct *)
calloc(diag_table->diag_array_length, sizeof(DiagStruct));
+ if (word_params->options->window_size) {
+ diag_table->hit_len_array = (Uint1 *)
+ calloc(diag_table->diag_array_length, sizeof(Uint1));
+ }
if (!diag_table->hit_level_array) {
sfree(ewp);
return -1;
@@ -173,7 +178,7 @@ Blast_ExtendWordExit(Blast_ExtendWord * ewp, Int4 subject_length)
}
} else if (ewp->hash_table) {
if (ewp->hash_table->offset >= INT4_MAX / 4) {
- ewp->hash_table->occupancy = 1;
+ ewp->hash_table->occupancy = 1;
ewp->hash_table->offset = ewp->hash_table->window;
memset(ewp->hash_table->backbone, 0,
ewp->hash_table->num_buckets * sizeof(Int4));
diff --git a/algo/blast/core/blast_extend.h b/algo/blast/core/blast_extend.h
index 01f1061a..c627fad1 100644
--- a/algo/blast/core/blast_extend.h
+++ b/algo/blast/core/blast_extend.h
@@ -1,4 +1,4 @@
-/* $Id: blast_extend.h,v 1.53 2008/07/23 16:55:47 kazimird Exp $
+/* $Id: blast_extend.h,v 1.54 2009/07/30 19:34:30 kazimird Exp $
* ===========================================================================
*
* PUBLIC DOMAIN NOTICE
@@ -66,6 +66,7 @@ typedef struct DiagHashCell {
Int4 diag; /**< This hit's diagonal */
Int4 level : 31; /**< This hit's offset in the subject sequence */
Uint4 hit_saved : 1; /**< Whether or not this hit has been saved */
+ Int4 hit_len; /**< The length of last hit */
Uint4 next; /**< Offset of next element in the chain */
} DiagHashCell;
@@ -76,6 +77,7 @@ typedef struct DiagHashCell {
typedef struct BLAST_DiagTable {
DiagStruct* hit_level_array;/**< Array to hold latest hits and their
lengths for all diagonals */
+ Uint1* hit_len_array; /**< Array to hold the lengthof the latest hit */
Int4 diag_array_length; /**< Smallest power of 2 longer than query length */
Int4 diag_mask; /**< Used to mask off everything above
min_diag_length (mask = min_diag_length-1). */
diff --git a/algo/blast/core/na_ungapped.c b/algo/blast/core/na_ungapped.c
index e9b63197..308fc13e 100644
--- a/algo/blast/core/na_ungapped.c
+++ b/algo/blast/core/na_ungapped.c
@@ -1,4 +1,4 @@
-/* $Id: na_ungapped.c,v 1.20 2009/06/22 13:54:32 kazimird Exp $
+/* $Id: na_ungapped.c,v 1.21 2009/07/30 19:34:30 kazimird Exp $
* ===========================================================================
*
* PUBLIC DOMAIN NOTICE
@@ -30,7 +30,7 @@
#ifndef SKIP_DOXYGEN_PROCESSING
static char const rcsid[] =
- "$Id: na_ungapped.c,v 1.20 2009/06/22 13:54:32 kazimird Exp $";
+ "$Id: na_ungapped.c,v 1.21 2009/07/30 19:34:30 kazimird Exp $";
#endif /* SKIP_DOXYGEN_PROCESSING */
#include <algo/blast/core/na_ungapped.h>
@@ -251,10 +251,12 @@ s_NuclUngappedExtend(BLAST_SequenceBlk * query,
* @param diag The diagonal to be retrieved [in]
* @param level The offset of the last hit on the specified diagonal [out]
* @param hit_saved Whether or not the last hit on the specified diagonal was saved [out]
+ * @param hit_length length of the last hit on the specified diagonal [out]
* @return 1 if successful, 0 if no hit was found on the specified diagonal.
*/
static NCBI_INLINE Int4 s_BlastDiagHashRetrieve(BLAST_DiagHash * table,
Int4 diag, Int4 * level,
+ Int4 * hit_len,
Int4 * hit_saved)
{
/* see http://lxr.linux.no/source/include/linux/hash.h */
@@ -265,6 +267,7 @@ static NCBI_INLINE Int4 s_BlastDiagHashRetrieve(BLAST_DiagHash * table,
while (index) {
if (table->chain[index].diag == diag) {
*level = table->chain[index].level;
+ *hit_len = table->chain[index].hit_len;
*hit_saved = table->chain[index].hit_saved;
return 1;
}
@@ -280,40 +283,38 @@ static NCBI_INLINE Int4 s_BlastDiagHashRetrieve(BLAST_DiagHash * table,
* @param table The hash table [in]
* @param diag The diagonal to be stored [in]
* @param level The offset of the hit to be stored [in]
+ * @param len The length of the hit to be stored [in]
* @param hit_saved Whether or not this hit was stored [in]
* @param s_end Needed to clean up defunct entries [in]
* @param window_size Needed to clean up defunct entries [in]
- * @param min_step Needed to clean up defunct entries [in]
- * @param two_hits Needed to clean up defunct entries [in]
* @return 1 if successful, 0 if memory allocation failed.
*/
static NCBI_INLINE Int4 s_BlastDiagHashInsert(BLAST_DiagHash * table,
Int4 diag, Int4 level,
+ Int4 len,
Int4 hit_saved,
- Int4 s_end,
- Int4 window_size,
- Int4 min_step,
- Int4 two_hits)
+ Int4 s_off,
+ Int4 window_size)
{
Uint4 bucket = ((Uint4) diag * 0x9E370001) % DIAGHASH_NUM_BUCKETS;
Uint4 index = table->backbone[bucket];
+ DiagHashCell *cell = NULL;
while (index) {
/* if we find what we're looking for, save into it */
if (table->chain[index].diag == diag) {
table->chain[index].level = level;
+ table->chain[index].hit_len = len;
table->chain[index].hit_saved = hit_saved;
return 1;
}
/* otherwise, if this hit is stale, save into it. */
else {
- Int4 step = s_end - table->chain[index].level;
/* if this hit is stale, save into it. */
- if (!
- (step <= (Int4) min_step
- || (two_hits && step <= window_size))) {
+ if ( s_off - table->chain[index].level > window_size) {
table->chain[index].diag = diag;
table->chain[index].level = level;
+ table->chain[index].hit_len = len;
table->chain[index].hit_saved = hit_saved;
return 1;
}
@@ -324,7 +325,6 @@ static NCBI_INLINE Int4 s_BlastDiagHashInsert(BLAST_DiagHash * table,
/* if we got this far, we were unable to replace any existing entries. */
/* if there's no more room, allocate more */
-
if (table->occupancy == table->capacity) {
table->capacity *= 2;
table->chain =
@@ -333,15 +333,14 @@ static NCBI_INLINE Int4 s_BlastDiagHashInsert(BLAST_DiagHash * table,
return 0;
}
- {
- DiagHashCell *cell = table->chain + table->occupancy;
- cell->diag = diag;
- cell->level = level;
- cell->hit_saved = hit_saved;
- cell->next = table->backbone[bucket];
- table->backbone[bucket] = table->occupancy;
- table->occupancy++;
- }
+ cell = table->chain + table->occupancy;
+ cell->diag = diag;
+ cell->level = level;
+ cell->hit_len = len;
+ cell->hit_saved = hit_saved;
+ cell->next = table->backbone[bucket];
+ table->backbone[bucket] = table->occupancy;
+ table->occupancy++;
return 1;
}
@@ -421,6 +420,7 @@ s_BlastnDiagTableExtendInitialHit(BLAST_SequenceBlk * query,
{
Int4 diag, real_diag;
Int4 s_end, s_off_pos, s_end_pos;
+ Int4 ext_right = 0;
BlastUngappedData *ungapped_data;
BlastUngappedData dummy_ungapped_data;
Int4 window_size = word_params->options->window_size;
@@ -429,6 +429,8 @@ s_BlastnDiagTableExtendInitialHit(BLAST_SequenceBlk * query,
DiagStruct *hit_level_array;
BlastUngappedCutoffs *cutoffs = NULL;
Boolean two_hits = (window_size > 0);
+ Boolean found = FALSE;
+ Int4 Delta = MIN(5, window_size - word_length);
hit_level_array = diag_table->hit_level_array;
ASSERT(hit_level_array);
@@ -441,41 +443,56 @@ s_BlastnDiagTableExtendInitialHit(BLAST_SequenceBlk * query,
s_off_pos = s_off + diag_table->offset;
s_end_pos = s_end + diag_table->offset;
- if (contiguous) {
- /* hit within the explored area should be rejected*/
- if (s_off_pos < last_hit) return 0;
+ /* hit within the explored area should be rejected*/
+ if (s_off_pos < last_hit) return 0;
- if (two_hits && (hit_saved || s_end_pos > last_hit + window_size )) {
- /* this must be the 1st hit */
- /* check to see if it can be extended to the right by
- word_length and therefore qualifies for a double-hit */
- Uint4 ext_right = s_BlastRightExtend(query, subject,
+ if (two_hits && (hit_saved || s_end_pos > last_hit + window_size )) {
+ /* check to see if it can be extended to the right by
+ word_length and therefore qualifies for a double-hit */
+ if (contiguous) {
+ ext_right = s_BlastRightExtend(query, subject,
q_off + word_length, s_end, query_info, word_length);
/* update the right end*/
s_end += ext_right;
s_end_pos += ext_right;
- if (ext_right < word_length) {
- /* if it is not a double hit, then it is a new hit */
+ }
+
+ if (ext_right < word_length) {
+ /* try off-diagonals */
+ Int4 orig_diag = real_diag + diag_table->diag_array_length;
+ Int4 s_a = s_off_pos + word_length - window_size;
+ Int4 s_b = s_end_pos - 2 * word_length;
+ Int4 delta;
+ if (Delta < 0) Delta = 0;
+ for (delta = 1; delta < Delta ; ++delta) {
+ Int4 off_diag = (orig_diag + delta) & diag_table->diag_mask;
+ Int4 off_s_end = hit_level_array[off_diag].last_hit;
+ Int4 off_s_l = diag_table->hit_len_array[off_diag];
+ if ( off_s_l
+ && off_s_end - delta >= s_a
+ && off_s_end - off_s_l <= s_b) {
+ found = TRUE;
+ break;
+ }
+ off_diag = (orig_diag - delta) & diag_table->diag_mask;
+ off_s_end = hit_level_array[off_diag].last_hit;
+ off_s_l = diag_table->hit_len_array[off_diag];
+ if ( off_s_l
+ && off_s_end >= s_a
+ && off_s_end - off_s_l + delta <= s_b) {
+ found = TRUE;
+ break;
+ }
+ }
+ if (!found) {
+ /* This is a new hit */
hit_ready = 0;
- last_hit = s_end_pos;
- hit_saved = 0;
}
}
- } else {
- /* hit within the explored area should be rejected*/
- if (s_off_pos < last_hit) return 0;
-
- if (two_hits && (hit_saved || s_end_pos > last_hit + window_size )) {
- /* first hit */
- hit_ready = 0;
- last_hit = s_end_pos;
- hit_saved = 0;
- }
}
if (hit_ready) {
if (word_params->ungapped_extension) {
- /* Perform ungapped extension */
Int4 context = BSearchContextInfo(q_off, query_info);
cutoffs = word_params->cutoffs + context;
ungapped_data = &dummy_ungapped_data;
@@ -484,31 +501,27 @@ s_BlastnDiagTableExtendInitialHit(BLAST_SequenceBlk * query,
word_params->nucl_score_table,
cutoffs->reduced_nucl_cutoff_score);
- last_hit = ungapped_data->length + ungapped_data->s_start
- + diag_table->offset;
+ if (found || ungapped_data->score >= cutoffs->cutoff_score) {
+ BlastUngappedData *final_data =
+ (BlastUngappedData *) malloc(sizeof(BlastUngappedData));
+ *final_data = *ungapped_data;
+ BLAST_SaveInitialHit(init_hitlist, q_off, s_off, final_data);
+ s_end_pos = ungapped_data->length + ungapped_data->s_start
+ + diag_table->offset;
+ } else {
+ hit_ready = 0;
+ }
} else {
ungapped_data = NULL;
- last_hit = s_end_pos;
- }
- if (ungapped_data == NULL) {
BLAST_SaveInitialHit(init_hitlist, q_off, s_off, ungapped_data);
- /* Set the "saved" flag for this hit */
- hit_saved = 1;
- } else if (ungapped_data->score >= cutoffs->cutoff_score) {
- BlastUngappedData *final_data =
- (BlastUngappedData *) malloc(sizeof(BlastUngappedData));
- *final_data = *ungapped_data;
- BLAST_SaveInitialHit(init_hitlist, q_off, s_off, final_data);
- /* Set the "saved" flag for this hit */
- hit_saved = 1;
- } else {
- /* Unset the "saved" flag for this hit */
- hit_saved = 0;
}
}
- hit_level_array[real_diag].last_hit = last_hit;
- hit_level_array[real_diag].flag = hit_saved;
+ hit_level_array[real_diag].last_hit = s_end_pos;
+ hit_level_array[real_diag].flag = hit_ready;
+ if (two_hits) {
+ diag_table->hit_len_array[real_diag] = (hit_ready) ? 0 : s_end_pos - s_off_pos;
+ }
return hit_ready;
}
@@ -543,7 +556,8 @@ s_BlastnDiagHashExtendInitialHit(BLAST_SequenceBlk * query,
BlastInitHitList * init_hitlist)
{
Int4 diag;
- Int4 s_end, s_off_pos, s_end_pos;
+ Int4 s_end, s_off_pos, s_end_pos, s_l;
+ Int4 ext_right = 0;
BlastUngappedData *ungapped_data;
BlastUngappedData dummy_ungapped_data;
Int4 window_size = word_params->options->window_size;
@@ -551,6 +565,8 @@ s_BlastnDiagHashExtendInitialHit(BLAST_SequenceBlk * query,
Int4 last_hit, hit_saved = 0;
BlastUngappedCutoffs *cutoffs = NULL;
Boolean two_hits = (window_size > 0);
+ Boolean found = FALSE;
+ Int4 Delta = MIN(5, window_size - word_length);
Int4 rc;
diag = s_off - q_off;
@@ -558,40 +574,60 @@ s_BlastnDiagHashExtendInitialHit(BLAST_SequenceBlk * query,
s_off_pos = s_off + hash_table->offset;
s_end_pos = s_end + hash_table->offset;
- rc = s_BlastDiagHashRetrieve(hash_table, diag, &last_hit, &hit_saved);
+ rc = s_BlastDiagHashRetrieve(hash_table, diag, &last_hit, &s_l, &hit_saved);
/* if there is no record in hashtable, we set last_hit to be a very negative number */
- if(!rc) last_hit = 0;
- if (contiguous) {
- /* hit within the explored area should be rejected*/
- if (s_off_pos < last_hit) return 0;
-
- if (two_hits && (hit_saved || s_end_pos > last_hit + window_size )) {
- /* this must be the 1st hit */
- /* check to see if it can be extended to the right by
- word_length and therefore qualifies for a double-hit */
- Uint4 ext_right = s_BlastRightExtend(query, subject,
+ if(!rc) last_hit = 0;
+
+ /* hit within the explored area should be rejected*/
+ if (s_off_pos < last_hit) return 0;
+
+ if (two_hits && (hit_saved || s_end_pos > last_hit + window_size )) {
+ /* this must be the 1st hit */
+ /* check to see if it can be extended to the right by
+ word_length and therefore qualifies for a double-hit */
+ if (contiguous) {
+ ext_right = s_BlastRightExtend(query, subject,
q_off + word_length, s_end, query_info, word_length);
/* update the right end*/
s_end += ext_right;
s_end_pos += ext_right;
- if (ext_right < word_length) {
- /* if it is not a double hit, then it is a new hit */
+ }
+
+ if (ext_right < word_length) {
+ /* try off-diagonal */
+ Int4 s_a = s_off_pos + word_length - window_size;
+ Int4 s_b = s_end_pos - 2 * word_length;
+ Int4 delta;
+ if (Delta < 0) Delta = 0;
+ for (delta = 1; delta < Delta; ++delta) {
+ Int4 off_s_end = 0;
+ Int4 off_s_l = 0;
+ Int4 off_hit_saved = 0;
+ Int4 off_rc = s_BlastDiagHashRetrieve(hash_table, diag + delta,
+ &off_s_end, &off_s_l, &off_hit_saved);
+ if ( off_rc
+ && off_s_l
+ && off_s_end - delta >= s_a
+ && off_s_end - off_s_l <= s_b) {
+ found = TRUE;
+ break;
+ }
+ off_rc = s_BlastDiagHashRetrieve(hash_table, diag - delta,
+ &off_s_end, &off_s_l, &off_hit_saved);
+ if ( off_rc
+ && off_s_l
+ && off_s_end >= s_a
+ && off_s_end - off_s_l + delta <= s_b) {
+ found = TRUE;
+ break;
+ }
+ }
+ if (!found) {
+ /* This is a new hit */
hit_ready = 0;
- last_hit = s_end_pos;
- hit_saved = 0;
}
}
- } else {
- /* hit within the explored area should be rejected*/
- if (s_off_pos < last_hit) return 0;
-
- if (two_hits && (hit_saved || s_end_pos > last_hit + window_size )) {
- /* first hit */
- hit_ready = 0;
- last_hit = s_end_pos;
- hit_saved = 0;
- }
}
if (hit_ready) {
@@ -605,32 +641,25 @@ s_BlastnDiagHashExtendInitialHit(BLAST_SequenceBlk * query,
ungapped_data,
word_params->nucl_score_table,
cutoffs->reduced_nucl_cutoff_score);
-
- last_hit = ungapped_data->length + ungapped_data->s_start
- + hash_table->offset;
+ if (found || ungapped_data->score >= cutoffs->cutoff_score) {
+ BlastUngappedData *final_data =
+ (BlastUngappedData *) malloc(sizeof(BlastUngappedData));
+ *final_data = *ungapped_data;
+ BLAST_SaveInitialHit(init_hitlist, q_off, s_off, final_data);
+ s_end_pos = ungapped_data->length + ungapped_data->s_start
+ + hash_table->offset;
+ } else {
+ hit_ready = 0;
+ }
} else {
ungapped_data = NULL;
- last_hit = s_end_pos;
- }
- if (ungapped_data == NULL) {
BLAST_SaveInitialHit(init_hitlist, q_off, s_off, ungapped_data);
- /* Set the "saved" flag for this hit */
- hit_saved = 1;
- } else if (ungapped_data->score >= cutoffs->cutoff_score) {
- BlastUngappedData *final_data =
- (BlastUngappedData *) malloc(sizeof(BlastUngappedData));
- *final_data = *ungapped_data;
- BLAST_SaveInitialHit(init_hitlist, q_off, s_off, final_data);
- /* Set the "saved" flag for this hit */
- hit_saved = 1;
- } else {
- /* Unset the "saved" flag for this hit */
- hit_saved = 0;
}
}
-
- s_BlastDiagHashInsert(hash_table, diag, last_hit, hit_saved,
- s_end + hash_table->offset, window_size, word_length ,two_hits);
+
+ s_BlastDiagHashInsert(hash_table, diag, s_end_pos,
+ (hit_ready) ? 0 : s_end_pos - s_off_pos,
+ hit_ready, s_off_pos, window_size + Delta);
return hit_ready;
}
diff --git a/api/asn2gnb3.c b/api/asn2gnb3.c
index 025c5b77..3fd89877 100644
--- a/api/asn2gnb3.c
+++ b/api/asn2gnb3.c
@@ -30,7 +30,7 @@
*
* Version Creation Date: 10/21/98
*
-* $Revision: 1.111 $
+* $Revision: 1.113 $
*
* File Description: New GenBank flatfile generator - work in progress
*
@@ -308,12 +308,6 @@ static void AddWGSMasterCommentString (
if (StringHasNoText (taxname)) {
taxname = "?";
}
- if (StringHasNoText (first)) {
- first = "?";
- }
- if (StringHasNoText (last)) {
- last = "?";
- }
ver [0] = '\0';
acclen = StringLen (wgsname);
if (acclen == 12) {
@@ -330,15 +324,30 @@ static void AddWGSMasterCommentString (
sprintf (buf, "The %s whole genome shotgun (WGS) project has the project accession %s.", taxname, wgsaccn);
FFAddOneString(ffstring, buf, TRUE, FALSE, TILDE_EXPAND);
- sprintf (buf, " This version of the project (%s) has the accession number %s,", ver, wgsname);
+ sprintf (buf, " This version of the project (%s) has the accession number %s", ver, wgsname);
FFAddOneString(ffstring, buf, FALSE, FALSE, TILDE_EXPAND);
- if (StringCmp (first, last) != 0) {
- sprintf (buf, " and consists of sequences %s-%s.", first, last);
+ if (first == NULL && last == NULL) {
+ sprintf (buf, ".");
FFAddOneString(ffstring, buf, TRUE, FALSE, TILDE_EXPAND);
} else {
- sprintf (buf, " and consists of sequence %s.", first);
- FFAddOneString(ffstring, buf, TRUE, FALSE, TILDE_EXPAND);
+ if (first != NULL && last == NULL) {
+ last = first;
+ } else if (first == NULL && last != NULL) {
+ first = last;
+ }
+ if (StringDoesHaveText (first) && StringDoesHaveText (last)) {
+ if (StringCmp (first, last) != 0) {
+ sprintf (buf, ", and consists of sequences %s-%s.", first, last);
+ FFAddOneString(ffstring, buf, TRUE, FALSE, TILDE_EXPAND);
+ } else {
+ sprintf (buf, ", and consists of sequence %s.", first);
+ FFAddOneString(ffstring, buf, TRUE, FALSE, TILDE_EXPAND);
+ }
+ } else {
+ sprintf (buf, ".");
+ FFAddOneString(ffstring, buf, TRUE, FALSE, TILDE_EXPAND);
+ }
}
}
@@ -1295,21 +1304,27 @@ static CharPtr GetStrForTpaOrRefSeqHist (
{
Boolean accn;
- Char buf [64];
+ Char buf [100];
DbtagPtr dbt;
Int4 gi;
ValNodePtr head = NULL;
SeqHistPtr hist;
SeqIdPtr id;
+ Int2 j;
+ int k;
+ Int2 max;
Boolean minus1;
Boolean minus2;
+ Int4 oldstop = -1;
+ Uint1 residue;
SeqAlignPtr salp;
SeqAlignPtr salptmp;
+ StreamCache sc;
SeqIdPtr sip;
Int4 start;
Int4 stop;
CharPtr str;
- Char tmp [80];
+ Char tmp [120];
if (bsp == NULL) return NULL;
hist = bsp->hist;
@@ -1344,6 +1359,65 @@ static CharPtr GetStrForTpaOrRefSeqHist (
ValNodeCopyStr (&head, 0, "TPA_SPAN PRIMARY_IDENTIFIER PRIMARY_SPAN COMP");
}
}
+ if (isRefSeq && oldstop > -1 && oldstop < start) {
+ sprintf (tmp, "~%ld-%ld ",
+ (long) (oldstop + 1), (long) (start));
+ tmp [21] = '\0';
+ StringCpy (buf, " ");
+ k = 0;
+ if (StreamCacheSetup (bsp, NULL, 0, &sc)) {
+ if (start - oldstop < 15) {
+ StreamCacheSetPosition (&sc, oldstop);
+ buf [k] = '"';
+ k++;
+ max = start - oldstop;
+ for (j = 0; j < max; j++) {
+ residue = StreamCacheGetResidue (&sc);
+ buf [k] = (Char) residue;
+ k++;
+ }
+ buf [k] = '"';
+ k++;
+ } else {
+ StreamCacheSetPosition (&sc, oldstop);
+ buf [k] = '"';
+ k++;
+ for (j = 0; j < 4; j++) {
+ residue = StreamCacheGetResidue (&sc);
+ buf [k] = (Char) residue;
+ k++;
+ }
+ buf [k] = '.';
+ k++;
+ buf [k] = '.';
+ k++;
+ buf [k] = '.';
+ k++;
+ StreamCacheSetPosition (&sc, start - 4);
+ for (j = 0; j < 4; j++) {
+ residue = StreamCacheGetResidue (&sc);
+ buf [k] = (Char) residue;
+ k++;
+ }
+ buf [k] = '"';
+ k++;
+ }
+ } else {
+ /*
+ StringCpy (buf, "inserted base(s)");
+ */
+ }
+ buf [k] = '\0';
+ StringCat (buf, " ");
+ buf [18] = '\0';
+ StringCat (tmp, buf);
+ sprintf (buf, " %ld-%ld ",
+ (long) 1, (long) (start - oldstop));
+ buf [21] = '\0';
+ StringCat (tmp, buf);
+ ValNodeCopyStr (&head, 0, tmp);
+ }
+ oldstop = stop + 1;
if (id != NULL) {
SeqIdWrite (id, buf, PRINTID_TEXTID_ACC_VER, sizeof (buf) - 1);
if (id->choice == SEQID_GENERAL) {
diff --git a/api/asn2gnb4.c b/api/asn2gnb4.c
index ae78f805..5564f1e3 100644
--- a/api/asn2gnb4.c
+++ b/api/asn2gnb4.c
@@ -30,7 +30,7 @@
*
* Version Creation Date: 10/21/98
*
-* $Revision: 1.199 $
+* $Revision: 1.201 $
*
* File Description: New GenBank flatfile generator - work in progress
*
@@ -183,6 +183,7 @@ static FtQualType feat_qual_order [] = {
FTQUAL_label,
FTQUAL_cds_product,
FTQUAL_extra_products,
+ FTQUAL_UniProtKB_evidence,
FTQUAL_protein_id,
FTQUAL_transcript_id,
FTQUAL_db_xref,
@@ -376,6 +377,7 @@ static FeaturQual asn2gnbk_featur_quals [ASN2GNBK_TOTAL_FEATUR] = {
{ "trna_aa", Qual_class_ignore },
{ "codon_recognized", Qual_class_trna_codons },
{ "trna_codons", Qual_class_trna_codons },
+ { "UniProtKB_evidence", Qual_class_quote },
{ "usedin", Qual_class_usedin },
{ "xtra_products", Qual_class_xtraprds }
};
@@ -386,49 +388,50 @@ typedef struct qualfeatur {
FtQualType featurclass;
} QualFeatur, PNTR QualFeaturPtr;
-#define NUM_GB_QUALS 40
+#define NUM_GB_QUALS 41
static QualFeatur qualToFeature [NUM_GB_QUALS] = {
- { "allele", FTQUAL_allele },
- { "bound_moiety", FTQUAL_bound_moiety },
- { "clone", FTQUAL_clone },
- { "codon", FTQUAL_codon },
- { "compare", FTQUAL_compare },
- { "cons_splice", FTQUAL_cons_splice },
- { "cyt_map", FTQUAL_gene_cyt_map },
- { "direction", FTQUAL_direction },
- { "EC_number", FTQUAL_EC_number },
- { "estimated_length", FTQUAL_estimated_length },
- { "experiment", FTQUAL_experiment },
- { "frequency", FTQUAL_frequency },
- { "function", FTQUAL_function },
- { "gen_map", FTQUAL_gene_gen_map },
- { "inference", FTQUAL_inference },
- { "insertion_seq", FTQUAL_insertion_seq },
- { "label", FTQUAL_label },
- { "map", FTQUAL_map },
- { "mobile_element", FTQUAL_mobile_element },
- { "mod_base", FTQUAL_mod_base },
- { "ncRNA_class", FTQUAL_ncRNA_class },
- { "number", FTQUAL_number },
- { "old_locus_tag", FTQUAL_old_locus_tag },
- { "operon", FTQUAL_operon },
- { "organism", FTQUAL_organism },
- { "PCR_conditions", FTQUAL_PCR_conditions },
- { "phenotype", FTQUAL_phenotype },
- { "product", FTQUAL_product_quals },
- { "rad_map", FTQUAL_gene_rad_map },
- { "replace", FTQUAL_replace },
- { "rpt_family", FTQUAL_rpt_family },
- { "rpt_type", FTQUAL_rpt_type },
- { "rpt_unit", FTQUAL_rpt_unit },
- { "rpt_unit_range", FTQUAL_rpt_unit_range },
- { "rpt_unit_seq", FTQUAL_rpt_unit_seq },
- { "satellite", FTQUAL_satellite },
- { "standard_name", FTQUAL_standard_name },
- { "tag_peptide", FTQUAL_tag_peptide },
- { "transposon", FTQUAL_transposon },
- { "usedin", FTQUAL_usedin }
+ { "allele", FTQUAL_allele },
+ { "bound_moiety", FTQUAL_bound_moiety },
+ { "clone", FTQUAL_clone },
+ { "codon", FTQUAL_codon },
+ { "compare", FTQUAL_compare },
+ { "cons_splice", FTQUAL_cons_splice },
+ { "cyt_map", FTQUAL_gene_cyt_map },
+ { "direction", FTQUAL_direction },
+ { "EC_number", FTQUAL_EC_number },
+ { "estimated_length", FTQUAL_estimated_length },
+ { "experiment", FTQUAL_experiment },
+ { "frequency", FTQUAL_frequency },
+ { "function", FTQUAL_function },
+ { "gen_map", FTQUAL_gene_gen_map },
+ { "inference", FTQUAL_inference },
+ { "insertion_seq", FTQUAL_insertion_seq },
+ { "label", FTQUAL_label },
+ { "map", FTQUAL_map },
+ { "mobile_element", FTQUAL_mobile_element },
+ { "mod_base", FTQUAL_mod_base },
+ { "ncRNA_class", FTQUAL_ncRNA_class },
+ { "number", FTQUAL_number },
+ { "old_locus_tag", FTQUAL_old_locus_tag },
+ { "operon", FTQUAL_operon },
+ { "organism", FTQUAL_organism },
+ { "PCR_conditions", FTQUAL_PCR_conditions },
+ { "phenotype", FTQUAL_phenotype },
+ { "product", FTQUAL_product_quals },
+ { "rad_map", FTQUAL_gene_rad_map },
+ { "replace", FTQUAL_replace },
+ { "rpt_family", FTQUAL_rpt_family },
+ { "rpt_type", FTQUAL_rpt_type },
+ { "rpt_unit", FTQUAL_rpt_unit },
+ { "rpt_unit_range", FTQUAL_rpt_unit_range },
+ { "rpt_unit_seq", FTQUAL_rpt_unit_seq },
+ { "satellite", FTQUAL_satellite },
+ { "standard_name", FTQUAL_standard_name },
+ { "tag_peptide", FTQUAL_tag_peptide },
+ { "transposon", FTQUAL_transposon },
+ { "UniProtKB_evidence", FTQUAL_UniProtKB_evidence },
+ { "usedin", FTQUAL_usedin }
};
static Int2 GbqualToFeaturIndex (
@@ -927,6 +930,7 @@ static ValQual legalGbqualList [] = {
{ FEATDEF_CDS , FTQUAL_standard_name },
{ FEATDEF_PROT , FTQUAL_product },
+ { FEATDEF_PROT , FTQUAL_UniProtKB_evidence },
{ FEATDEF_preRNA , FTQUAL_allele },
{ FEATDEF_preRNA , FTQUAL_function },
@@ -3481,6 +3485,7 @@ static void FormatFeatureBlockQuals (
tmp = StringSave (gbq->val);
str = tmp;
len = StringLen (str);
+#if 0
if (len > 1 && *str == '(' && str [len - 1] == ')' /* &&
StringChr (str + 1, '(') == NULL /* && StringChr (str, ',') != NULL */) {
str++;
@@ -3511,6 +3516,7 @@ static void FormatFeatureBlockQuals (
str = ptr;
}
} else {
+#endif
if ((! ajp->flags.checkQualSyntax) || (ValidateRptUnit (str))) {
TrimSpacesAroundString (str);
if (idx == FTQUAL_rpt_unit_range) {
@@ -3526,7 +3532,9 @@ static void FormatFeatureBlockQuals (
FFAddOneChar(ffstring, '\n', FALSE);
}
}
+#if 0
}
+#endif
MemFree (tmp);
}
gbq = gbq->next;
diff --git a/api/asn2gnb5.c b/api/asn2gnb5.c
index f1be0ec4..99b3b6f5 100644
--- a/api/asn2gnb5.c
+++ b/api/asn2gnb5.c
@@ -30,7 +30,7 @@
*
* Version Creation Date: 10/21/98
*
-* $Revision: 1.154 $
+* $Revision: 1.155 $
*
* File Description: New GenBank flatfile generator - work in progress
*
@@ -154,9 +154,11 @@ static UrlData Nlm_url_base [] = {
{"GeneID", "http://www.ncbi.nlm.nih.gov/sites/entrez?db=gene&cmd=Retrieve&dopt=full_report&list_uids="},
{"GO", "http://amigo.geneontology.org/cgi-bin/amigo/go.cgi?view=details&depth=1&query=GO:"},
{"GOA", "http://www.ebi.ac.uk/ego/GProtein?ac="},
+ {"GreengenesID", "http://greengenes.lbl.gov/cgi-bin/show_one_record_v2.pl?prokMSA_id="},
{"GRIN", "http://www.ars-grin.gov/cgi-bin/npgs/acc/display.pl?"},
{"H-InvDB", "http://www.h-invitational.jp"},
{"HGNC", "http://www.genenames.org/data/hgnc_data.php?hgnc_id="},
+ {"HMPID", "http://www.hmpdacc-resources.org/cgi-bin/hmp_catalog/main.cgi?section=HmpSummary&page=displayHmpProject&hmp_id="},
{"HOMD", "http://www.homd.org/"},
{"HPRD", "http://www.hprd.org/protein/"},
{"HSSP", "http://srs.ebi.ac.uk/srsbin/cgi-bin/wgetz?-newId+-e+hssp-ID:"},
diff --git a/api/asn2gnb6.c b/api/asn2gnb6.c
index d9cda849..3abbb4f7 100644
--- a/api/asn2gnb6.c
+++ b/api/asn2gnb6.c
@@ -30,7 +30,7 @@
*
* Version Creation Date: 10/21/98
*
-* $Revision: 1.196 $
+* $Revision: 1.198 $
*
* File Description: New GenBank flatfile generator - work in progress
*
@@ -472,9 +472,11 @@ NLM_EXTERN CharPtr legalDbXrefs [] = {
"GeneID",
"GO",
"GOA",
+ "GreengenesID",
"GRIN",
"H-InvDB",
"HGNC",
+ "HMPID",
"HOMD",
"HSSP",
"IMGT/GENE-DB",
@@ -547,6 +549,7 @@ NLM_EXTERN CharPtr legalSrcDbXrefs [] = {
"IMGT/LIGM",
"JCM",
"MGI",
+ "MycoBank",
"NBRC",
"RZPD",
"taxon",
diff --git a/api/asn2gnbi.h b/api/asn2gnbi.h
index 4bf0a98b..fb2848c3 100644
--- a/api/asn2gnbi.h
+++ b/api/asn2gnbi.h
@@ -29,7 +29,7 @@
*
* Version Creation Date: 12/30/03
*
-* $Revision: 1.111 $
+* $Revision: 1.112 $
*
* File Description: New GenBank flatfile generator, internal header
*
@@ -718,6 +718,7 @@ typedef enum {
FTQUAL_trna_aa,
FTQUAL_trna_codons,
FTQUAL_trna_codons_note,
+ FTQUAL_UniProtKB_evidence,
FTQUAL_usedin,
FTQUAL_xtra_prod_quals,
ASN2GNBK_TOTAL_FEATUR
diff --git a/api/gbftdef.h b/api/gbftdef.h
index 04a2ab0b..069a018e 100644
--- a/api/gbftdef.h
+++ b/api/gbftdef.h
@@ -116,8 +116,9 @@
#define GBQUAL_mating_type 106
#define GBQUAL_satellite 107
#define GBQUAL_gene_synonym 108
+#define GBQUAL_UniProtKB_evidence 109
-#define ParFlat_TOTAL_GBQUAL 109
+#define ParFlat_TOTAL_GBQUAL 110
#define ParFlat_TOTAL_IntOr 3
#define ParFlat_TOTAL_LRB 3
#define ParFlat_TOTAL_Exp 2
diff --git a/api/gbftglob.c b/api/gbftglob.c
index a8b0e265..0bede378 100644
--- a/api/gbftglob.c
+++ b/api/gbftglob.c
@@ -63,7 +63,8 @@ static GbFeatName STATIC__ParFlat_GBQual_names[ParFlat_TOTAL_GBQUAL] = {
{"metagenomic", Class_none}, { "culture_collection", Class_text},
{"bio_material", Class_text}, { "ncRNA_class", Class_text},
{"tag_peptide", Class_text}, { "mating_type", Class_text},
- {"satellite", Class_text}, { "gene_synonym", Class_text}
+ {"satellite", Class_text}, { "gene_synonym", Class_text},
+ { "UniProtKB_evidence", Class_text}
};
NLM_EXTERN GbFeatNamePtr x_ParFlat_GBQual_names(void) {
diff --git a/api/macroapi.c b/api/macroapi.c
index 6341e97b..a1b92bf9 100755
--- a/api/macroapi.c
+++ b/api/macroapi.c
@@ -29,7 +29,7 @@
*
* Version Creation Date: 11/8/2007
*
-* $Revision: 1.201 $
+* $Revision: 1.202 $
*
* File Description:
*
@@ -6416,35 +6416,6 @@ static CharPtr GetAnticodonLocString (SeqFeatPtr sfp)
-static SeqFeatPtr GetProtFeature (BioseqPtr protbsp)
-{
- SeqMgrFeatContext fcontext;
- SeqAnnotPtr sap;
- SeqFeatPtr prot_sfp;
- ProtRefPtr prp;
-
- if (protbsp == NULL) return NULL;
-
- prot_sfp = SeqMgrGetNextFeature (protbsp, NULL, 0, FEATDEF_PROT, &fcontext);
- if (prot_sfp == NULL) {
- sap = protbsp->annot;
- while (sap != NULL && prot_sfp == NULL) {
- if (sap->type == 1) {
- prot_sfp = sap->data;
- while (prot_sfp != NULL
- && (prot_sfp->data.choice != SEQFEAT_PROT
- || (prp = prot_sfp->data.value.ptrvalue) == NULL
- || prp->processed != 0)) {
- prot_sfp = prot_sfp->next;
- }
- }
- sap = sap->next;
- }
- }
- return prot_sfp;
-}
-
-
static ProtRefPtr GetProtRefForFeature (SeqFeatPtr sfp)
{
BioseqPtr protbsp;
@@ -8344,9 +8315,8 @@ NLM_EXTERN Uint2 GetEntityIdFromObject (Uint1 choice, Pointer data)
ObjValNodePtr ovp;
SeqFeatPtr sfp;
BioseqPtr bsp;
- SeqMgrDescContext context;
- if (data == NULL) return NULL;
+ if (data == NULL) return 0;
switch (choice)
{
@@ -12144,7 +12114,6 @@ NLM_EXTERN Boolean SetFieldValueForObjectEx (Uint1 choice, Pointer data, FieldTy
ObjValNodePtr ovp;
GBBlockPtr gb;
Boolean was_empty;
- ValNodePtr molinfo_field;
if (data == NULL || field == NULL || field->data.ptrvalue == NULL) return FALSE;
@@ -17982,120 +17951,6 @@ static void CreateDataForFeature (SeqFeatPtr sfp, Int4 feature_type)
}
-static void ExtraCDSCreationActions (SeqFeatPtr cds, SeqEntryPtr parent_sep)
-{
- ByteStorePtr bs;
- CharPtr prot, ptr;
- BioseqPtr bsp;
- Char ch;
- Int4 i;
- SeqEntryPtr psep, nsep;
- MolInfoPtr mip;
- ValNodePtr vnp, descr;
- SeqFeatPtr prot_sfp;
- ProtRefPtr prp;
- Boolean partial5, partial3;
-
- if (cds == NULL) return;
-
- CheckSeqLocForPartial (cds->location, &partial5, &partial3);
-
- /* Create corresponding protein sequence data for the CDS */
-
- bs = ProteinFromCdRegionEx (cds, TRUE, FALSE);
- if (NULL == bs)
- return;
-
- prot = BSMerge (bs, NULL);
- bs = BSFree (bs);
- if (NULL == prot)
- return;
-
- ptr = prot;
- ch = *ptr;
- while (ch != '\0') {
- *ptr = TO_UPPER (ch);
- ptr++;
- ch = *ptr;
- }
- i = StringLen (prot);
- if (i > 0 && prot [i - 1] == '*') {
- prot [i - 1] = '\0';
- }
- bs = BSNew (1000);
- if (bs != NULL) {
- ptr = prot;
- BSWrite (bs, (VoidPtr) ptr, (Int4) StringLen (ptr));
- }
-
- /* Create the product protein Bioseq */
-
- bsp = BioseqNew ();
- if (NULL == bsp)
- return;
-
- bsp->repr = Seq_repr_raw;
- bsp->mol = Seq_mol_aa;
- bsp->seq_data_type = Seq_code_ncbieaa;
- bsp->seq_data = (SeqDataPtr) bs;
- bsp->length = BSLen (bs);
- bs = NULL;
- bsp->id = MakeNewProteinSeqId (cds->location, NULL);
- SeqMgrAddToBioseqIndex (bsp);
-
- /* Create a new SeqEntry for the Prot Bioseq */
-
- psep = SeqEntryNew ();
- if (NULL == psep)
- return;
-
- psep->choice = 1;
- psep->data.ptrvalue = (Pointer) bsp;
- SeqMgrSeqEntry (SM_BIOSEQ, (Pointer) bsp, psep);
-
- /* Add a descriptor to the protein Bioseq */
-
- mip = MolInfoNew ();
- if (NULL == mip)
- return;
-
- mip->biomol = 8;
- mip->tech = 8;
- if (partial5 && partial3) {
- mip->completeness = 5;
- } else if (partial5) {
- mip->completeness = 3;
- } else if (partial3) {
- mip->completeness = 4;
- }
- vnp = CreateNewDescriptor (psep, Seq_descr_molinfo);
- if (NULL == vnp)
- return;
-
- vnp->data.ptrvalue = (Pointer) mip;
-
- /**/
-
- descr = ExtractBioSourceAndPubs (parent_sep);
-
- AddSeqEntryToSeqEntry (parent_sep, psep, TRUE);
- nsep = FindNucSeqEntry (parent_sep);
- ReplaceBioSourceAndPubs (parent_sep, descr);
- SetSeqFeatProduct (cds, bsp);
-
- prp = ProtRefNew ();
-
- if (prp != NULL) {
- prot_sfp = CreateNewFeature (psep, NULL, SEQFEAT_PROT, NULL);
- if (prot_sfp != NULL) {
- prot_sfp->data.value.ptrvalue = (Pointer) prp;
- SetSeqLocPartial (prot_sfp->location, partial5, partial3);
- prot_sfp->partial = (partial5 || partial3);
- }
- }
-}
-
-
static SeqLocPtr LocationFromApplyFeatureAction (BioseqPtr bsp, ApplyFeatureActionPtr action)
{
LocationIntervalPtr l;
@@ -18989,6 +18844,12 @@ static Boolean ConvertRNAToRNA (SeqFeatPtr sfp, Int4 featdef_to, ConvertFeatureD
}
+static Boolean MiscFeatToCodingRegionConvertFunc (SeqFeatPtr sfp, Int4 featdef_to, ConvertFeatureDstOptionsPtr dst_options)
+{
+ return ConvertMiscFeatToCodingRegion (sfp);
+}
+
+
typedef struct convertfeattable {
Uint2 seqfeat_from;
Uint2 featdef_from;
@@ -19035,6 +18896,9 @@ static ConvertFeatTableData conversion_functions[] = {
{ SEQFEAT_IMP, FEATDEF_ANY, SEQFEAT_RNA, FEATDEF_ANY,
ConvertImpToRNAFunc,
"Creates an RNA feature of the specified subtype. Import feature key is discarded." },
+ { SEQFEAT_IMP, FEATDEF_misc_feature, SEQFEAT_CDREGION, FEATDEF_CDS,
+ MiscFeatToCodingRegionConvertFunc,
+ "Use misc_feature comment for coding region product name." },
{ SEQFEAT_REGION, FEATDEF_REGION, SEQFEAT_IMP, FEATDEF_ANY,
ConvertRegionToImp,
"Creates a misc_feature with the region name saved as a /note qualifier." },
diff --git a/api/seqport.c b/api/seqport.c
index 06857b6c..06f10c06 100644
--- a/api/seqport.c
+++ b/api/seqport.c
@@ -29,7 +29,7 @@
*
* Version Creation Date: 7/13/91
*
-* $Revision: 6.174 $
+* $Revision: 6.177 $
*
* File Description: Ports onto Bioseqs
*
@@ -4104,6 +4104,7 @@ NLM_EXTERN SeqLocPtr LIBCALL productLoc_to_locationLoc(SeqFeatPtr sfp, SeqLocPtr
SeqBondPtr sbp;
ValNode vn;
Boolean is_cdregion = FALSE;
+ Boolean partial5, partial3;
if ((sfp == NULL) || (productLoc == NULL)) return head;
if (sfp->data.choice == 3) is_cdregion = TRUE;
@@ -4114,14 +4115,12 @@ NLM_EXTERN SeqLocPtr LIBCALL productLoc_to_locationLoc(SeqFeatPtr sfp, SeqLocPtr
if (productLoc->choice == SEQLOC_BOND) /* fake this one in */
{
sbp = (SeqBondPtr)(productLoc->data.ptrvalue);
- tmp = productInterval_to_locationIntervals(sfp, sbp->a->point,
-sbp->a->point);
+ tmp = productInterval_to_locationIntervals(sfp, sbp->a->point, sbp->a->point, FALSE);
if (sbp->b == NULL) /* one point in bond */
return tmp;
SeqLocAdd(&head, tmp, TRUE, FALSE);
- tmp = productInterval_to_locationIntervals(sfp, sbp->b->point,
-sbp->b->point);
+ tmp = productInterval_to_locationIntervals(sfp, sbp->b->point, sbp->b->point, FALSE);
if (tmp == NULL)
return head;
@@ -4135,6 +4134,7 @@ sbp->b->point);
goto ret;
}
+ CheckSeqLocForPartial (productLoc, &partial5, &partial3);
slp = NULL;
while ((slp = SeqLocFindNext(productLoc, slp)) != NULL)
{
@@ -4142,7 +4142,7 @@ sbp->b->point);
product_stop = SeqLocStop(slp);
if ((product_start >= 0) && (product_stop >= 0))
{
- tmp = productInterval_to_locationIntervals(sfp, product_start, product_stop);
+ tmp = productInterval_to_locationIntervals(sfp, product_start, product_stop, partial5);
if(tmp != NULL)
load_fuzz_to_DNA(tmp, slp, TRUE);
while (tmp != NULL)
@@ -4189,6 +4189,7 @@ NLM_EXTERN SeqLocPtr LIBCALL aaFeatLoc_to_dnaFeatLoc(SeqFeatPtr sfp,
CdRegionPtr crp;
SeqIntPtr sp1, sp2;
BioseqPtr bsp;
+ Boolean aa_partialn, aa_partialc;
dnaLoc = aaLoc_to_dnaLoc(sfp, aa_loc);
if (dnaLoc == NULL) return dnaLoc;
@@ -4196,10 +4197,12 @@ NLM_EXTERN SeqLocPtr LIBCALL aaFeatLoc_to_dnaFeatLoc(SeqFeatPtr sfp,
if (! sfp->partial) /* no partial checks needed */
return dnaLoc;
+
+ CheckSeqLocForPartial (aa_loc, &aa_partialn, &aa_partialc);
crp = (CdRegionPtr)(sfp->data.value.ptrvalue);
aaPos = SeqLocStart(aa_loc);
- if ((! aaPos) && (crp->frame > 1)) /* using first amino acid */
+ if ((! aaPos) && (crp->frame > 1) && aa_partialn) /* using first amino acid */
{
tmp1 = SeqLocFindNext(sfp->location, NULL);
tmp2 = SeqLocFindNext(dnaLoc, NULL);
@@ -4221,7 +4224,7 @@ NLM_EXTERN SeqLocPtr LIBCALL aaFeatLoc_to_dnaFeatLoc(SeqFeatPtr sfp,
}
dnaPartial = SeqLocPartialCheck(sfp->location);
- if (dnaPartial & SLP_STOP) /* missing 3' end of cdregion */
+ if ((dnaPartial & SLP_STOP) && aa_partialc) /* missing 3' end of cdregion */
{
sip = SeqLocId(aa_loc);
bsp = BioseqFindCore(sip);
@@ -4262,19 +4265,162 @@ NLM_EXTERN SeqLocPtr LIBCALL aaFeatLoc_to_dnaFeatLoc(SeqFeatPtr sfp,
return dnaLoc;
}
-/******************************************************************
-*
-* productInterval_to_locationIntervals(sfp, product_start, product_stop)
-* map the amino acid sequence to a chain of Seq-locs in the
-* DNA sequence through a CdRegion feature
-*
-******************************************************************/
-NLM_EXTERN SeqLocPtr LIBCALL productInterval_to_locationIntervals(SeqFeatPtr sfp, Int4 product_start, Int4
-product_stop)
+
+static SeqLocPtr
+NucLocFromProtInterval
+(SeqFeatPtr cds,
+ Int4 prot_start,
+ Int4 prot_stop,
+ Boolean n_partial)
{
- Int4 frame_offset, start_offset; /*for determine the reading frame*/
- SeqLocPtr slp = NULL;
CdRegionPtr crp;
+ Int4 nt_before = 0, aa_before = 0, nt_this, prev_nt = 0, part_codon;
+ SeqLocPtr result = NULL;
+ SeqLocPtr slp = NULL; /* used for iterating through locations in the coding region */
+ SeqLocPtr loc; /* used for creating interval on NT sequence */
+ Boolean first_loc = TRUE;
+ Int4 cds_int_start, cds_int_stop, cds_int_len;
+ Int4 frame_start = 0;
+ Int4 aa_int_start, aa_int_stop, aa_len, this_aa, aa_needed, aa_unneeded, aa_accumulated = 0;
+ Int4 aa_from_this_interval;
+ Uint1 strand;
+
+ if (cds == NULL || cds->data.choice != SEQFEAT_CDREGION || prot_start < 0 || prot_stop < prot_start) {
+ return NULL;
+ }
+
+ crp = (CdRegionPtr) cds->data.value.ptrvalue;
+ if (crp == NULL) {
+ return NULL;
+ }
+ if (crp->frame > 1) {
+ frame_start = crp->frame - 1;
+ }
+
+ aa_len = prot_stop - prot_start + 1;
+
+ while((slp = SeqLocFindNext(cds->location, slp)) != NULL) {
+ cds_int_len = SeqLocLen (slp);
+ cds_int_start = SeqLocStart (slp);
+ cds_int_stop = SeqLocStop (slp);
+ strand = SeqLocStrand (slp);
+
+ if (first_loc) {
+ if (strand == Seq_strand_minus) {
+ cds_int_stop -= frame_start;
+ } else {
+ cds_int_start += frame_start;
+ }
+ cds_int_len -= frame_start;
+ }
+
+ /* calculate the number of NT that "count" for this interval -
+ * don't include the NT in a partial codon at the beginning of
+ * of the feature, but do include NT from a partial codon at
+ * the end of the previous interval.
+ */
+ nt_this = cds_int_len + prev_nt;
+ part_codon = nt_this % 3;
+ nt_this -= part_codon;
+
+ /* calculate how many AA are covered by this interval */
+ this_aa = nt_this / 3;
+
+ if (aa_before + this_aa >= prot_start) {
+
+ /* figure out whether to take all of this interval, or just part of it */
+ aa_from_this_interval = this_aa;
+
+ /* 5' end (left for plus strand, right for minus) */
+ if (aa_before < prot_start) {
+ /* skip some at the beginning */
+ aa_unneeded = prot_start - aa_before;
+ aa_from_this_interval -= aa_unneeded;
+
+ if (strand == Seq_strand_minus) {
+ aa_int_stop = cds_int_stop + prev_nt - (3 * aa_unneeded);
+ } else {
+ aa_int_start = cds_int_start - prev_nt + (3 * aa_unneeded);
+ }
+ } else {
+ /* start at the beginning */
+ if (strand == Seq_strand_minus) {
+ aa_int_stop = cds_int_stop;
+ if (first_loc) {
+ if (n_partial) {
+ /* put frame shift back in, if first loc and n-partial */
+ aa_int_stop += frame_start;
+ } else if (aa_before == prot_start) {
+ /* starts in this interval, but after "remainder" of previous codon */
+ aa_int_stop -= prev_nt;
+ }
+ }
+ } else {
+ aa_int_start = cds_int_start;
+ if (first_loc) {
+ if (n_partial) {
+ /* put frame shift back in, if first loc and n-partial */
+ aa_int_start -= frame_start;
+ } else if (aa_before == prot_start) {
+ /* starts in this interval, but after "remainder" of previous codon */
+ aa_int_start += prev_nt;
+ }
+ }
+ }
+ }
+
+ /* 3' end (right for plus strand, left for minus) */
+ if (aa_accumulated + aa_from_this_interval < aa_len) {
+ if (strand == Seq_strand_minus) {
+ aa_int_start = cds_int_start;
+ } else {
+ aa_int_stop = cds_int_stop;
+ }
+ } else {
+ /* just take the part that we need */
+ aa_needed = aa_len - aa_accumulated;
+ aa_unneeded = aa_from_this_interval - aa_needed;
+
+ if (strand == Seq_strand_minus) {
+ aa_int_start = cds_int_start + part_codon + (3 * aa_unneeded);
+ } else {
+ aa_int_stop = cds_int_stop - part_codon - (3 * aa_unneeded);
+ }
+ aa_from_this_interval -= aa_unneeded;
+ }
+
+ /* note - if aa_int_start > aa_int_stop, that means we eliminated
+ * both ends of the interval.
+ */
+ if (aa_int_start <= aa_int_stop) {
+ /* aa_accumulated now includes the number of complete codons that have
+ * been accounted for (not counting a partial codon at the end of this
+ * interval, if any
+ */
+ aa_accumulated += aa_from_this_interval;
+
+ /* add interval to result */
+ loc = SeqLocIntNew(aa_int_start, aa_int_stop, strand, SeqLocId(slp));
+ SeqLocAdd(&result, loc, TRUE, FALSE);
+ }
+ }
+
+ first_loc = FALSE;
+ aa_before += this_aa;
+ prev_nt = part_codon;
+
+ if (aa_before > prot_stop) {
+ break;
+ }
+ }
+
+ return result;
+}
+
+
+static SeqLocPtr NaLocFromNaInterval (SeqFeatPtr sfp, Int4 product_start, Int4 product_stop)
+{
+ SeqLocPtr slp = NULL;
SeqLocPtr location_loc, loc; /*for the sfp.location location*/
Boolean is_end; /**is the end for process reached?**/
@@ -4283,70 +4429,26 @@ product_stop)
Int4 cur_pos; /**current sfp.product sequence position in process**/
Int4 product_len; /**length of the sfp.product **/
- Boolean is_new; /**Is cur_pos at the begin of new exon?**/
- Int4 end_partial; /*the end of aa is a partial codon*/
Int4 d_start, d_stop; /*the start and the stop of the sfp.location sequence*/
Int4 offset; /*offset from the start of the current exon*/
Int4 aa_len;
Uint1 strand;
Int4 p_end_pos; /*the end of the product sequence in the current loc*/
- Int4 first_partial; /*first codon is a partial*/
- Boolean is_cdregion = FALSE;
-
-
-
- if(sfp->data.choice ==3) /* cdregion must take into account 3 base/aa */
- {
- is_cdregion = TRUE;
-
- crp = (CdRegionPtr) sfp->data.value.ptrvalue;
- if(!crp)
- {
- return NULL;
- }
-
- if(crp->frame>0)
- {
- frame_offset = crp->frame-1;
- }
- else
- {
- frame_offset = 0;
- }
- start_offset = frame_offset;
- }
- else
- {
- start_offset = 0;
- frame_offset = 0;
- }
cur_pos= product_start;
product_len = 0;
is_end = FALSE;
p_start = 0;
- first_partial = 0;
- end_partial = 0;
slp = NULL;
location_loc= NULL;
while(!is_end && ((slp = SeqLocFindNext(sfp->location, slp))!=NULL))
{
product_len += SeqLocLen(slp);
- if (is_cdregion)
- {
- end_partial = ((product_len - start_offset)%3);
- p_stop = (product_len - start_offset)/3 -1;
- if(end_partial != 0)
- ++p_stop;
- }
- else
- {
- p_stop = product_len - start_offset - 1;
- }
+ p_stop = product_len - 1;
p_end_pos = p_stop;
- if(p_stop > product_stop || (p_stop == product_stop && end_partial == 0))
+ if(p_stop >= product_stop)
{
p_stop = product_stop; /**check if the end is reached**/
is_end = TRUE;
@@ -4354,21 +4456,7 @@ product_stop)
if(p_stop >= cur_pos) /*get the exon*/
{
- is_new = (p_start == cur_pos); /*start a new exon?*/
- if(is_new) /**special case of the first partial**/
- {
- offset = 0;
- }
- else if (is_cdregion)
- {
- if(frame_offset && p_start >0)
- ++p_start;
- offset = 3*(cur_pos - p_start) + frame_offset;
- }
- else
- {
- offset = cur_pos - p_start;
- }
+ offset = cur_pos - p_start;
strand = SeqLocStrand(slp);
if(strand == Seq_strand_minus)
@@ -4377,47 +4465,20 @@ product_stop)
d_start = SeqLocStart(slp) + offset;
d_stop = d_start;
- /*first codon*/
- if(is_cdregion && is_new && product_len == SeqLocLen(slp))
- {
- if(strand == Seq_strand_minus)
- d_stop -= frame_offset;
- else
- d_stop += frame_offset;
- }
- aa_len = MIN(p_stop, product_stop) - cur_pos +1;
- if(end_partial != 0 && (p_end_pos >= product_start && p_end_pos <= product_stop))
- {
- --aa_len;
- }
- if(first_partial > 0)
- {
- --aa_len;
- }
- if(strand == Seq_strand_minus)
+
+ aa_len = MIN(p_stop, product_stop) - cur_pos +1;
+
+ if(strand == Seq_strand_minus)
{
if(aa_len >= 0)
{
- if (is_cdregion)
- d_stop -= (3*aa_len - 1);
- else
- d_stop -= (aa_len - 1);
+ d_stop -= (aa_len - 1);
}
else
{
++d_stop;
}
-
- if(first_partial >0)
- d_stop -= first_partial;
-
- first_partial = 0;
- if (end_partial > 0 && (p_end_pos >= product_start && p_end_pos <= product_stop))
- {
- d_stop -= end_partial;
- first_partial = 3 - end_partial;
- }
-
+
d_stop = MAX(d_stop, SeqLocStart(slp));
loc = SeqLocIntNew(d_stop, d_start, strand, SeqLocId(slp));
}
@@ -4425,53 +4486,46 @@ product_stop)
{
if(aa_len >= 0)
{
- if (is_cdregion)
- d_stop += (3*aa_len - 1);
- else
- d_stop += (aa_len - 1);
+ d_stop += (aa_len - 1);
}
else
--d_stop;
- if(first_partial > 0)
- d_stop += first_partial;
- first_partial = 0;
- if (end_partial> 0 && (p_end_pos >= product_start && p_end_pos <= product_stop))
- {
- d_stop += end_partial;
- first_partial = 3 - end_partial;
- }
d_stop = MIN(d_stop, SeqLocStop(slp));
loc = SeqLocIntNew(d_start, d_stop, strand, SeqLocId(slp));
}
SeqLocAdd(&location_loc, loc, TRUE, FALSE);
- if(end_partial != 0)
- cur_pos = p_stop;
- else
- cur_pos = p_stop+1;
- }
+ cur_pos = p_stop+1;
+ }
+ p_start = p_stop +1;
- if(end_partial != 0)
- {
- p_start = p_stop;
- }
- else
- {
- p_start = p_stop +1;
- }
-
- if (is_cdregion)
- {
- frame_offset = (product_len - start_offset)%3;
- if(frame_offset >0)
- frame_offset = 3-frame_offset;
- }
+ }/**end of while(slp && !is_end) **/
- }/**end of while(slp && !is_end) **/
+ return location_loc;
+}
+
+/******************************************************************
+*
+* productInterval_to_locationIntervals(sfp, product_start, product_stop)
+* map the amino acid sequence to a chain of Seq-locs in the
+* DNA sequence through a CdRegion feature
+*
+******************************************************************/
+NLM_EXTERN SeqLocPtr LIBCALL
+productInterval_to_locationIntervals
+(SeqFeatPtr sfp,
+ Int4 product_start,
+ Int4 product_stop,
+ Boolean aa_partialn)
+{
- return location_loc;
+ if (sfp->data.choice == SEQFEAT_CDREGION) {
+ return NucLocFromProtInterval (sfp, product_start, product_stop, aa_partialn);
+ } else {
+ return NaLocFromNaInterval (sfp, product_start, product_stop);
+ }
}
@@ -4557,7 +4611,7 @@ merge, Int4Ptr frame, Boolean allowTerminator)
a_left += 3;
}
}
- if (a_right > (bsp->length) * 3 - 1) {
+ if (a_right > (bsp->length) * 3 - 1 && !allowTerminator) {
CheckSeqLocForPartial (slp, &partial5, &partial3);
strand = SeqLocStrand (slp);
if ((partial5 && strand != Seq_strand_minus) || (partial3 && strand == Seq_strand_minus)) {
@@ -4570,7 +4624,7 @@ merge, Int4Ptr frame, Boolean allowTerminator)
aa_from = a_left / 3;
aa_to = a_right / 3;
- if (aa_to > end_pos)
+ if (aa_to > end_pos && !allowTerminator)
aa_to = end_pos;
if (merge)
diff --git a/api/seqport.h b/api/seqport.h
index 9928a18e..22cddc39 100644
--- a/api/seqport.h
+++ b/api/seqport.h
@@ -29,7 +29,7 @@
*
* Version Creation Date: 7/13/91
*
-* $Revision: 6.59 $
+* $Revision: 6.60 $
*
* File Description: Ports onto Bioseqs
*
@@ -38,242 +38,6 @@
* Date Name Description of modification
* ------- ---------- -----------------------------------------------------
*
-*
-* $Log: seqport.h,v $
-* Revision 6.59 2008/02/12 18:56:52 bollin
-* Made ReverseSeqData and ComplementSeqData extern
-*
-* Revision 6.58 2007/05/30 18:10:06 kans
-* added KNOWN_GAP_AS_PLUS to distinguish known-length from unknown-length gaps, use for validation
-*
-* Revision 6.57 2006/12/20 20:08:24 kans
-* added SUPPRESS_VIRT_SEQ and STREAM_VIRT_AS_PLUS, moved STREAM_CORRECT_INVAL
-*
-* Revision 6.56 2006/12/18 15:42:58 kans
-* made MakeCodeBreakList public so validator can check for unnecessary transl excepts
-*
-* Revision 6.55 2006/11/15 18:02:59 kans
-* ProteinFromCdRegionExEx and TransTableTranslateCdRegionEx take farProdFetchOK argument
-*
-* Revision 6.54 2006/11/06 17:16:38 kans
-* added stream flag to allow negative gi numbers by NCBI ID group
-*
-* Revision 6.53 2006/07/13 17:06:39 bollin
-* use Uint4 instead of Uint2 for itemID values
-* removed unused variables
-* resolved compiler warnings
-*
-* Revision 6.52 2006/05/19 18:40:07 kans
-* added protein equivalent of nucleotide SeqSearch finite state machine
-*
-* Revision 6.51 2005/08/24 15:14:31 kans
-* modified MolWtForLoc to use StreamCache, added MolWtForBsp and MolWtForStr
-*
-* Revision 6.50 2005/06/01 20:27:06 kans
-* added MapNa4ByteToIUPACplusGapString
-*
-* Revision 6.49 2005/03/15 14:35:44 kans
-* seqport stream gap control flags (2-bit set) are STREAM_EXPAND_GAPS, GAP_TO_SINGLE_DASH, and EXPAND_GAPS_TO_DASHES
-*
-* Revision 6.48 2005/03/14 22:48:11 kans
-* inserted STREAM_INDICATE_GAPS before STREAM_CORRECT_INVAL, will mark gap with 251 instead of N or X
-*
-* Revision 6.47 2004/11/29 17:12:42 kans
-* added SearchFlgType for expandPattern, allowOneMismatch, justTopStrand arguments
-*
-* Revision 6.46 2004/11/26 18:53:09 kans
-* SeqSearchAddNucleotidePattern takes expandPattern, allowOneMismatch arguments
-*
-* Revision 6.45 2004/10/27 22:15:34 kans
-* added STREAM_CORRECT_INVAL flag to SeqPortStream
-*
-* Revision 6.44 2004/07/16 19:37:37 kans
-* SeqPortStream and FastaStream functions return Int4, negative count if any fetch failures
-*
-* Revision 6.43 2004/05/12 18:55:33 kans
-* StreamCache takes SeqLocPtr as well as BioseqPtr optional arguments, slp version is equivalent of SeqPortNewByLoc
-*
-* Revision 6.42 2004/04/27 20:09:26 kans
-* StreamCacheGetResidue returns Uint1 because Char might be signed, preventing IS_residue from working
-*
-* Revision 6.41 2004/04/27 18:15:12 kans
-* added StreamCache functions that provide buffered request-driven access to sequence via SeqPortStream
-*
-* Revision 6.40 2004/04/14 12:39:01 kans
-* SeqPortStreamLoc is public function, SeqPortStreamRaw directly uncompresses byte store, avoids any SeqPort calls - still need more efficient way to reverse complement without a big buffer
-*
-* Revision 6.39 2004/04/08 20:19:21 kans
-* SeqPortStreamInt is external
-*
-* Revision 6.38 2004/03/15 19:54:54 kans
-* SeqPortStream takes expandable bit flags parameter
-*
-* Revision 6.37 2004/02/25 19:07:45 kans
-* ProteinFromCdRegionExEx and TransTableTranslateCdRegionEx return alternative start flag
-*
-* Revision 6.36 2003/11/18 17:08:46 kans
-* added MapNa4ByteTo4BitString, use in seqport read and get char
-*
-* Revision 6.35 2003/11/17 22:44:31 kans
-* added MapNa2ByteTo4BitString in preparation for faster SeqPortRead from 2na to 4na
-*
-* Revision 6.34 2003/11/05 21:17:22 bollin
-* added new option for Retranslate Coding Regions to handle stop codons at end of complete CDS during retranslate while ignoring stop codons
-*
-* Revision 6.33 2002/11/11 18:02:40 kans
-* added SeqPortStream to efficiently stream through a sequence
-*
-* Revision 6.32 2002/07/08 15:08:59 kans
-* made ReadCodingRegionBases extern
-*
-* Revision 6.31 2002/05/13 21:41:32 kans
-* added ConvertNsToGaps
-*
-* Revision 6.30 2001/02/18 20:58:52 kans
-* added GetSequenceByBsp
-*
-* Revision 6.29 2000/12/18 18:03:26 kans
-* added GetScoresbySeqId
-*
-* Revision 6.28 2000/09/24 23:31:18 kans
-* added GetSequenceByFeature
-*
-* Revision 6.27 2000/09/24 22:52:47 kans
-* added GetSequenceByIdOrAccnDotVer
-*
-* Revision 6.26 2000/09/05 21:33:50 kans
-* productInterval_to_locationIntervals replaces aaInterval_to_dnaIntervals, also works for mRNA feature (JO)
-*
-* Revision 6.25 2000/08/31 18:12:54 shavirin
-* Added new function TransTableFreeAll().
-*
-* Revision 6.24 2000/08/11 18:09:49 kans
-* GetScoresbyAccessionDotVersion passes length back through new parameter
-*
-* Revision 6.23 2000/08/11 18:03:25 kans
-* added GetScoresbyAccessionDotVersion - prototyped in seqport.h but implemented in sqnutil2.c
-*
-* Revision 6.22 2000/08/10 17:22:38 kans
-* added GetDNAbyAccessionDotVersion for genome processing effort
-*
-* Revision 6.21 2000/08/04 15:45:22 kans
-* added ContigRevComp - still need to implement for delta bioseqs
-*
-* Revision 6.20 2000/08/03 19:02:54 kans
-* added PersistentTransTableByGenCode and PersistentTransTableByCdRegion
-*
-* Revision 6.19 2000/08/01 20:02:58 kans
-* separate macros for IsOrfStart, IsAmbigStart, IsAnyStart
-*
-* Revision 6.18 2000/07/22 22:45:37 kans
-* more work on trans table translation functions
-*
-* Revision 6.17 2000/07/21 15:28:36 kans
-* first pass at TransTableTranslate functions - more work remains
-*
-* Revision 6.16 2000/07/05 17:02:12 kans
-* added spp->gapIsZero, SeqPortSet_do_virtualEx, using ncbi4na with gap of 0 to distinguish quality scores under N versus quality scores under gap
-*
-* Revision 6.15 2000/05/23 20:41:17 ostell
-* added MolWtForLoc()
-*
-* Revision 6.14 1999/11/17 00:56:33 kans
-* improved seqsearch fsa, removed protein part, still need to allow single mismatch
-*
-* Revision 6.13 1999/11/12 21:00:50 kans
-* added TransTableProcessBioseq for 6-frame translation, SeqSearchAddNucleotidePattern and SeqSearchAddProteinPattern for SeqSearch
-*
-* Revision 6.12 1999/11/11 00:58:28 kans
-* added SeqSearch sequence search finite state machine - still need more functions to add protein patterns, read from rsite file
-*
-* Revision 6.11 1999/10/06 22:09:02 kans
-* ComposeCodonsRecognizedString to handle degenerate codons
-*
-* Revision 6.10 1999/08/06 20:22:19 kans
-* TransTable simplified to eliminate single and double letter states
-*
-* Revision 6.9 1999/08/06 02:20:16 kans
-* finite state machine for 6-frame translation and orf search enhanced to handle nucleotide ambiguity characters
-*
-* Revision 6.8 1999/02/12 20:48:24 kans
-* made fast byte expansion functions public
-*
-* Revision 6.7 1998/12/14 20:56:24 kans
-* dnaLoc_to_aaLoc takes allowTerminator parameter to handle stop codons created by polyA tail
-*
-* Revision 6.6 1998/11/16 21:10:08 kans
-* added IsATGStart and IsAltStart macros
-*
-* Revision 6.5 1998/11/16 17:20:31 kans
-* nextBase in codon fsa is Uint1, cast state array index to int in macros
-*
-* Revision 6.4 1998/11/14 00:30:21 kans
-* added TransTableInit and macros for 6-frame translation and orf-finding finite state machine
-*
-* Revision 6.3 1998/09/16 21:40:42 kans
-* added SPCacheQ for rapid 2na/4na to iupacna conversion
-*
-* Revision 6.2 1998/02/24 15:09:17 kans
-* made AAForCodon prototype public
-*
-* Revision 6.1 1997/09/16 15:31:31 kans
-* added aaFeatLoc_to_dnaFeatLoc (JO)
-*
-* Revision 6.0 1997/08/25 18:07:16 madden
-* Revision changed to 6.0
-*
-* Revision 5.5 1997/08/15 17:02:44 madden
-* Added new function ProteinFromCdRegionEx with remove_trailingX Boolean
-*
-* Revision 5.4 1997/06/19 18:38:52 vakatov
-* [WIN32,MSVC++] Adopted for the "NCBIOBJ.LIB" DLL'ization
-*
-* Revision 5.3 1997/03/06 22:47:54 shavirin
-* Moved definitions for SPCompress functions from sequtil.h
-*
- * Revision 5.2 1996/08/09 15:27:47 ostell
- * added BioseqRev(), BioseqComp(), BioseqRevComp()
- *
- * Revision 5.1 1996/07/15 19:04:18 epstein
- * add new param to dnaLoc_to_aaLoc() to optionally report frame
- *
- * Revision 5.0 1996/05/28 13:23:23 ostell
- * Set to revision 5.0
- *
- * Revision 4.8 1996/01/30 16:28:52 ostell
- * fixed type in comment
- *
- * Revision 4.7 1996/01/30 16:24:04 ostell
- * added merge argument to dnaLoc_to_aaLoc()
- * change calls to SeqLocPackage
- *
- * Revision 4.6 1996/01/29 22:03:52 ostell
- * added aaLoc_to_dnaLoc() and dnsLoc_to_aaLoc()
- *
- * Revision 4.5 1996/01/28 07:00:05 ostell
- * made fisxes to support deeply nexted segmented seqports
- *
- * Revision 4.4 1996/01/27 22:19:00 ostell
- * added SeqPortSet_.. functions
- * refined support for virtual seqeunces
- *
- * Revision 4.3 1996/01/10 22:25:25 ostell
- * added aaInterval_to_seqloc()
- *
- * Revision 4.2 1995/12/29 21:31:44 ostell
- * made SeqPort helper functions public for use by edutil for delta seqs
- *
- * Revision 4.1 1995/12/26 22:29:34 ostell
- * added support for delta seq to SeqPort
- *
- * Revision 4.0 1995/07/26 13:49:01 ostell
- * force revision to 4.0
- *
- * Revision 2.14 1995/05/15 21:46:05 ostell
- * added Log line
- *
-*
-*
* ==========================================================================
*/
@@ -683,7 +447,7 @@ NLM_EXTERN SeqLocPtr LIBCALL aaFeatLoc_to_dnaFeatLoc(SeqFeatPtr sfp, SeqLocPtr a
* DNA sequence through a CdRegion feature
*
******************************************************************/
-NLM_EXTERN SeqLocPtr LIBCALL productInterval_to_locationIntervals (SeqFeatPtr sfp, Int4 aa_start, Int4 aa_stop);
+NLM_EXTERN SeqLocPtr LIBCALL productInterval_to_locationIntervals (SeqFeatPtr sfp, Int4 aa_start, Int4 aa_stop, Boolean aa_partialn);
/*-------------- BioseqRevComp () ---------------------------*/
/***********************************************************************
diff --git a/api/sequtil.c b/api/sequtil.c
index 84d5ee9b..b4f067c6 100644
--- a/api/sequtil.c
+++ b/api/sequtil.c
@@ -29,7 +29,7 @@
*
* Version Creation Date: 4/1/91
*
-* $Revision: 6.284 $
+* $Revision: 6.285 $
*
* File Description: Sequence Utilities for objseq and objsset
*
@@ -9257,7 +9257,8 @@ NLM_EXTERN Uint4 LIBCALL WHICH_db_accession (CharPtr s)
(StringICmp(temp,"GE") == 0) ||
(StringICmp(temp,"GH") == 0) ||
(StringICmp(temp,"GO") == 0) ||
- (StringICmp(temp,"GR") == 0) ) { /* NCBI EST */
+ (StringICmp(temp,"GR") == 0) ||
+ (StringICmp(temp,"GT") == 0) ) { /* NCBI EST */
retcode = ACCN_NCBI_EST;
} else if ((StringICmp(temp,"BV") == 0) ||
(StringICmp(temp,"GF") == 0)) { /* NCBI STS */
@@ -9291,10 +9292,6 @@ NLM_EXTERN Uint4 LIBCALL WHICH_db_accession (CharPtr s)
(StringICmp(temp,"GL") == 0)) { /* NCBI segmented set header Bioseq */
retcode = ACCN_NCBI_SEGSET;
} else if ((StringICmp(temp,"AS") == 0) ||
- (StringICmp(temp,"GO") == 0) ||
- (StringICmp(temp,"GP") == 0) ||
- (StringICmp(temp,"GQ") == 0) ||
- (StringICmp(temp,"GT") == 0) ||
(StringICmp(temp,"GU") == 0) ||
(StringICmp(temp,"GV") == 0) ||
(StringICmp(temp,"GW") == 0) ||
diff --git a/api/sqnutil1.c b/api/sqnutil1.c
index 532dc7e8..44282e91 100644
--- a/api/sqnutil1.c
+++ b/api/sqnutil1.c
@@ -29,7 +29,7 @@
*
* Version Creation Date: 9/2/97
*
-* $Revision: 6.532 $
+* $Revision: 6.534 $
*
* File Description:
*
@@ -4203,6 +4203,10 @@ static Boolean HandledGBQualOnProt (SeqFeatPtr sfp, GBQualPtr gbq)
return FALSE;
}
+ if (StringICmp (gbq->qual, "UniProtKB_evidence") == 0) {
+ return FALSE;
+ }
+
return TRUE; /* all other gbquals not appropriate on protein features */
}
@@ -4506,10 +4510,13 @@ static void CleanupConsSplice (GBQualPtr gbq)
gbq->val = str;
}
-static void ExpandParenGroup (GBQualPtr headgbq)
+static Boolean ExpandParenGroup (GBQualPtr headgbq)
{
+ Char ch;
GBQualPtr lastgbq;
+ size_t len;
+ Int2 nesting;
GBQualPtr newgbq;
GBQualPtr nextqual;
CharPtr ptr;
@@ -4518,8 +4525,34 @@ static void ExpandParenGroup (GBQualPtr headgbq)
nextqual = headgbq->next;
lastgbq = headgbq;
- tmp = StringSave (headgbq->val);
- str = tmp + 1;
+ ptr = headgbq->val;
+ tmp = StringSave (ptr + 1);
+ len = StringLen (tmp);
+ if (len > 0 && tmp [len - 1] == ')') {
+ tmp [len - 1] = '\0';
+ }
+ str = tmp;
+ nesting = 0;
+ ptr = str;
+ ch = *ptr;
+ while (ch != '\0') {
+ if (ch == '(') {
+ nesting++;
+ } else if (ch == ')') {
+ nesting--;
+ if (nesting < 0) {
+ MemFree (tmp);
+ return FALSE;
+ }
+ } else if (ch == ',') {
+ if (nesting < 0) {
+ MemFree (tmp);
+ return FALSE;
+ }
+ }
+ ptr++;
+ ch = *ptr;
+ }
while (! StringHasNoText (str)) {
ptr = StringChr (str, ',');
if (ptr == NULL) {
@@ -4541,6 +4574,7 @@ static void ExpandParenGroup (GBQualPtr headgbq)
str = ptr;
}
MemFree (tmp);
+ return TRUE;
}
static Boolean IsBaseRange (CharPtr str)
@@ -4598,10 +4632,13 @@ static void ModernizeFeatureGBQuals (SeqFeatPtr sfp)
str [len - 1] = ')';
}
if (len > 1 && *str == '(' && str [len - 1] == ')' /* && StringChr (str + 1, '(') == NULL */) {
- ExpandParenGroup (gbq);
- nextqual = gbq->next;
- /* individual parsed out (xxx,xxx) qualifiers will be processed next, now get rid of original */
- unlink = TRUE;
+ if (ExpandParenGroup (gbq)) {
+ nextqual = gbq->next;
+ /* individual parsed out (xxx,xxx) qualifiers will be processed next, now get rid of original */
+ unlink = TRUE;
+ } else {
+ unlink = FALSE;
+ }
} else {
unlink = FALSE;
}
@@ -4620,10 +4657,13 @@ static void ModernizeFeatureGBQuals (SeqFeatPtr sfp)
str [len - 1] = ')';
}
if (len > 1 && *str == '(' && str [len - 1] == ')' && StringChr (str + 1, '(') == NULL) {
- ExpandParenGroup (gbq);
- nextqual = gbq->next;
- /* individual parsed out (xxx,xxx) qualifiers will be processed next, now get rid of original */
- unlink = TRUE;
+ if (ExpandParenGroup (gbq)) {
+ nextqual = gbq->next;
+ /* individual parsed out (xxx,xxx) qualifiers will be processed next, now get rid of original */
+ unlink = TRUE;
+ } else {
+ unlink = FALSE;
+ }
} else {
unlink = FALSE;
}
diff --git a/api/sqnutil2.c b/api/sqnutil2.c
index f31cb69b..f768aedc 100644
--- a/api/sqnutil2.c
+++ b/api/sqnutil2.c
@@ -29,7 +29,7 @@
*
* Version Creation Date: 9/2/97
*
-* $Revision: 6.391 $
+* $Revision: 6.392 $
*
* File Description:
*
@@ -11965,8 +11965,9 @@ NLM_EXTERN Boolean FeatureOkForFeatureList (SeqFeatPtr sfp, ValNodePtr feature_l
NLM_EXTERN SeqFeatPtr GetGeneForFeature (SeqFeatPtr sfp)
{
+ BioseqPtr bsp;
GeneRefPtr grp;
- SeqFeatPtr overlap_gene;
+ SeqFeatPtr overlap_gene = NULL;
Boolean is_suppressed;
SeqMgrFeatContext fcontext;
@@ -11975,9 +11976,15 @@ NLM_EXTERN SeqFeatPtr GetGeneForFeature (SeqFeatPtr sfp)
if (is_suppressed) return NULL;
if (grp != NULL) {
- overlap_gene = SeqMgrGetGeneByLocusTag (BioseqFindFromSeqLoc(sfp->location), grp->locus_tag, &fcontext);
+ bsp = BioseqFindFromSeqLoc (sfp->location);
+ if (bsp == NULL) return NULL;
+ if (StringDoesHaveText (grp->locus_tag)) {
+ overlap_gene = SeqMgrGetGeneByLocusTag (bsp, grp->locus_tag, &fcontext);
+ } else if (StringDoesHaveText (grp->locus)) {
+ overlap_gene = SeqMgrGetFeatureByLabel (bsp, grp->locus, SEQFEAT_GENE, 0, &fcontext);
+ }
} else {
- overlap_gene = SeqMgrGetOverlappingGene(sfp->location, &fcontext);
+ overlap_gene = SeqMgrGetOverlappingGene (sfp->location, &fcontext);
}
return overlap_gene;
}
diff --git a/api/sqnutil3.c b/api/sqnutil3.c
index eba9ae27..a4bcf72f 100644
--- a/api/sqnutil3.c
+++ b/api/sqnutil3.c
@@ -29,7 +29,7 @@
*
* Version Creation Date: 2/7/00
*
-* $Revision: 6.510 $
+* $Revision: 6.514 $
*
* File Description:
*
@@ -11669,14 +11669,19 @@ static Boolean GetOverlappingTRNAs (BioseqPtr bsp, SeqLocPtr slp, Int4 loc_right
SeqFeatPtr sfp;
SeqMgrFeatContext context;
Boolean found_any = FALSE;
+ Uint1 slp_strand, rna_strand;
if (bsp == NULL || slp == NULL || list == NULL) return FALSE;
+ slp_strand = SeqLocStrand (slp);
for (sfp = SeqMgrGetNextFeature (bsp, NULL, SEQFEAT_RNA, FEATDEF_tRNA, &context);
sfp != NULL && context.left <= loc_right;
sfp = SeqMgrGetNextFeature (bsp, sfp, SEQFEAT_RNA, FEATDEF_tRNA, &context))
{
- if (SeqLocStrand (sfp->location) == SeqLocStrand (slp) && SeqLocCompare (sfp->location, slp) != SLC_NO_MATCH) {
+ rna_strand = SeqLocStrand (sfp->location);
+ if (((slp_strand == Seq_strand_minus && rna_strand == Seq_strand_minus)
+ || (slp_strand != Seq_strand_minus && rna_strand != Seq_strand_minus))
+ && SeqLocCompare (sfp->location, slp) != SLC_NO_MATCH) {
ValNodeAddPointer (list, OBJ_SEQFEAT, sfp);
found_any = TRUE;
}
@@ -17601,46 +17606,141 @@ static Boolean CouldExtendRight (BioseqPtr bsp, Int4 pos)
}
-static Boolean ExtendPartialSeqIntToEndOrGap (SeqIntPtr sint, BioseqPtr bsp)
+NLM_EXTERN Int4
+Extend5PartialSeqIntToEndOrGap
+(SeqIntPtr sint,
+ BioseqPtr bsp,
+ Boolean short_only)
{
- Boolean rval = FALSE;
- Int4 distance;
+ Int4 distance = 0;
if (sint == NULL || bsp == NULL) {
return FALSE;
}
- if (sint->if_from != NULL && sint->from != 0) {
- if (sint->from < 3) {
- sint->from = 0;
- rval = TRUE;
- } else if (bsp->repr == Seq_repr_delta) {
- /* wasn't close to the sequence end, but perhaps it is close to a gap */
+ if (sint->strand == Seq_strand_minus) {
+ if (sint->if_to != NULL && sint->to != bsp->length - 1) {
+ distance = DistanceToDownstreamGap (sint->to, bsp);
+ if (distance == 1 || distance == 2 || (distance > -1 && !short_only)) {
+ sint->to += distance;
+ } else if (!short_only || sint->to > bsp->length - 4) {
+ distance = bsp->length - 1 - sint->to;
+ sint->to = bsp->length - 1;
+ } else {
+ distance = 0;
+ }
+ }
+ } else {
+ if (sint->if_from != NULL && sint->from != 0) {
distance = DistanceToUpstreamGap (sint->from, bsp);
- if (distance == 1 || distance == 2) {
+ if (distance == 1 || distance == 2 || (distance > -1 && !short_only)) {
sint->from -= distance;
- rval = TRUE;
+ } else if (!short_only || sint->from < 3) {
+ distance = sint->from;
+ sint->from = 0;
+ } else {
+ distance = 0;
}
}
}
- if (sint->if_to != NULL && sint->to != bsp->length - 1) {
- if (sint->to > bsp->length - 4) {
- sint->to = bsp->length - 1;
- rval = TRUE;
- } else if (bsp->repr == Seq_repr_delta) {
- /* wasn't close to the sequence end, but perhaps it is close to a gap */
+ return distance;
+}
+
+
+NLM_EXTERN Int4
+Extend3PartialSeqIntToEndOrGap
+(SeqIntPtr sint,
+ BioseqPtr bsp,
+ Boolean short_only)
+{
+ Int4 distance = 0;
+
+ if (sint == NULL || bsp == NULL) {
+ return FALSE;
+ }
+
+ if (sint->strand == Seq_strand_minus) {
+ if (sint->if_from != NULL && sint->from != 0) {
+ distance = DistanceToUpstreamGap (sint->from, bsp);
+ if (distance == 1 || distance == 2 || (distance > -1 && !short_only)) {
+ sint->from -= distance;
+ } else if (!short_only || sint->from < 3) {
+ distance = sint->from;
+ sint->from = 0;
+ } else {
+ distance = 0;
+ }
+ }
+ } else {
+ if (sint->if_to != NULL && sint->to != bsp->length - 1) {
distance = DistanceToDownstreamGap (sint->to, bsp);
- if (distance == 1 || distance == 2) {
+ if (distance == 1 || distance == 2 || (distance > -1 && !short_only)) {
sint->to += distance;
- rval = TRUE;
+ } else if (!short_only || sint->to > bsp->length - 4) {
+ distance = bsp->length - 1 - sint->to;
+ sint->to = bsp->length - 1;
+ } else {
+ distance = 0;
}
}
}
+ return distance;
+}
+
+
+
+static Boolean ExtendPartialSeqIntToEndOrGap (SeqIntPtr sint, BioseqPtr bsp)
+{
+ Boolean rval = FALSE;
+ if (Extend5PartialSeqIntToEndOrGap (sint, bsp, TRUE) > 0) {
+ rval = TRUE;
+ }
+
+ if (Extend3PartialSeqIntToEndOrGap (sint, bsp, TRUE) > 0) {
+ rval = TRUE;
+ }
+
return rval;
}
+NLM_EXTERN Int4 ExtendSeqLocToEndOrGap (SeqLocPtr slp, BioseqPtr bsp, Boolean end5)
+{
+ Int4 diff = 0;
+ SeqLocPtr slp_index;
+
+ if (slp == NULL || bsp == NULL) return 0;
+
+ switch (slp->choice)
+ {
+ case SEQLOC_INT:
+ if (end5) {
+ diff = Extend5PartialSeqIntToEndOrGap (slp->data.ptrvalue, bsp, FALSE);
+ } else {
+ diff = Extend3PartialSeqIntToEndOrGap (slp->data.ptrvalue, bsp, FALSE);
+ }
+ break;
+ case SEQLOC_MIX:
+ case SEQLOC_PACKED_INT:
+ if (end5) {
+ /* take the first one */
+ diff = ExtendSeqLocToEndOrGap (slp->data.ptrvalue, bsp, end5);
+ } else {
+ /* take the last one */
+ for (slp_index = slp->data.ptrvalue; slp_index != NULL && slp_index->next != NULL; slp_index = slp_index->next) {
+ }
+ if (slp_index != NULL) {
+ diff = ExtendSeqLocToEndOrGap (slp_index, bsp, end5);
+ }
+ }
+ break;
+ }
+
+ return diff;
+}
+
+
NLM_EXTERN SeqFeatPtr FindBestProtein (Uint2 entityID, SeqLocPtr product)
{
@@ -22801,6 +22901,21 @@ BarcodeValidateOneSeqEntry
}
}
}
+ if (show_all) {
+ for (vnp = pass_fail_list; vnp != NULL; vnp = vnp->next) {
+ res = (BarcodeTestResultsPtr) vnp->data.ptrvalue;
+ SeqIdWrite (SeqIdFindBest (res->bsp->id, SEQID_GENBANK), id_buf, PRINTID_REPORT, sizeof (id_buf) - 1);
+ reason = GetBarcodeTestFailureReasons (res);
+ BarcodeValPrintStr (ofp, " <message severity=\"INFO\" seq-id=\"%s\">", id_buf);
+ if (PassBarcodeTests(res)) {
+ BarcodeValPrintStr (ofp, NULL, "PASS");
+ } else {
+ BarcodeValPrintStr (ofp, "FAIL (%s)", reason == NULL ? "" : reason);
+ }
+ BarcodeValPrintStr (ofp, NULL, "</message>\n");
+ reason = MemFree (reason);
+ }
+ }
} else {
if (show_header) {
if (ofp == NULL) {
diff --git a/api/sqnutil4.c b/api/sqnutil4.c
index 3579f132..26d7b9b0 100755
--- a/api/sqnutil4.c
+++ b/api/sqnutil4.c
@@ -29,7 +29,7 @@
*
* Version Creation Date: 12/27/2007
*
-* $Revision: 1.57 $
+* $Revision: 1.59 $
*
* File Description:
* This file contains functions for automatically generating definition lines.
@@ -12319,6 +12319,7 @@ NLM_EXTERN Boolean ConvertImpToImpFunc (SeqFeatPtr sfp, Uint2 featdef_to)
{
ifp->key = StringSave (featname);
}
+ sfp->idx.subtype = 0;
return TRUE;
}
@@ -12569,6 +12570,182 @@ static void InstantiateMatPeptideProductForProteinFeature (SeqFeatPtr sfp, Point
}
+NLM_EXTERN void ExtraCDSCreationActions (SeqFeatPtr cds, SeqEntryPtr parent_sep)
+{
+ ByteStorePtr bs;
+ CharPtr prot, ptr;
+ BioseqPtr bsp;
+ Char ch;
+ Int4 i;
+ SeqEntryPtr psep, nsep;
+ MolInfoPtr mip;
+ ValNodePtr vnp, descr;
+ SeqFeatPtr prot_sfp;
+ ProtRefPtr prp;
+ Boolean partial5, partial3;
+
+ if (cds == NULL) return;
+
+ CheckSeqLocForPartial (cds->location, &partial5, &partial3);
+
+ /* Create corresponding protein sequence data for the CDS */
+
+ bs = ProteinFromCdRegionEx (cds, TRUE, FALSE);
+ if (NULL == bs)
+ return;
+
+ prot = BSMerge (bs, NULL);
+ bs = BSFree (bs);
+ if (NULL == prot)
+ return;
+
+ ptr = prot;
+ ch = *ptr;
+ while (ch != '\0') {
+ *ptr = TO_UPPER (ch);
+ ptr++;
+ ch = *ptr;
+ }
+ i = StringLen (prot);
+ if (i > 0 && prot [i - 1] == '*') {
+ prot [i - 1] = '\0';
+ }
+ bs = BSNew (1000);
+ if (bs != NULL) {
+ ptr = prot;
+ BSWrite (bs, (VoidPtr) ptr, (Int4) StringLen (ptr));
+ }
+
+ /* Create the product protein Bioseq */
+
+ bsp = BioseqNew ();
+ if (NULL == bsp)
+ return;
+
+ bsp->repr = Seq_repr_raw;
+ bsp->mol = Seq_mol_aa;
+ bsp->seq_data_type = Seq_code_ncbieaa;
+ bsp->seq_data = (SeqDataPtr) bs;
+ bsp->length = BSLen (bs);
+ bs = NULL;
+ bsp->id = MakeNewProteinSeqId (cds->location, NULL);
+ SeqMgrAddToBioseqIndex (bsp);
+
+ /* Create a new SeqEntry for the Prot Bioseq */
+
+ psep = SeqEntryNew ();
+ if (NULL == psep)
+ return;
+
+ psep->choice = 1;
+ psep->data.ptrvalue = (Pointer) bsp;
+ SeqMgrSeqEntry (SM_BIOSEQ, (Pointer) bsp, psep);
+
+ /* Add a descriptor to the protein Bioseq */
+
+ mip = MolInfoNew ();
+ if (NULL == mip)
+ return;
+
+ mip->biomol = 8;
+ mip->tech = 8;
+ if (partial5 && partial3) {
+ mip->completeness = 5;
+ } else if (partial5) {
+ mip->completeness = 3;
+ } else if (partial3) {
+ mip->completeness = 4;
+ }
+ vnp = CreateNewDescriptor (psep, Seq_descr_molinfo);
+ if (NULL == vnp)
+ return;
+
+ vnp->data.ptrvalue = (Pointer) mip;
+
+ /**/
+
+ descr = ExtractBioSourceAndPubs (parent_sep);
+
+ AddSeqEntryToSeqEntry (parent_sep, psep, TRUE);
+ nsep = FindNucSeqEntry (parent_sep);
+ ReplaceBioSourceAndPubs (parent_sep, descr);
+ SetSeqFeatProduct (cds, bsp);
+
+ prp = ProtRefNew ();
+
+ if (prp != NULL) {
+ prot_sfp = CreateNewFeature (psep, NULL, SEQFEAT_PROT, NULL);
+ if (prot_sfp != NULL) {
+ prot_sfp->data.value.ptrvalue = (Pointer) prp;
+ SetSeqLocPartial (prot_sfp->location, partial5, partial3);
+ prot_sfp->partial = (partial5 || partial3);
+ }
+ }
+}
+
+
+NLM_EXTERN SeqFeatPtr GetProtFeature (BioseqPtr protbsp)
+{
+ SeqMgrFeatContext fcontext;
+ SeqAnnotPtr sap;
+ SeqFeatPtr prot_sfp;
+ ProtRefPtr prp;
+
+ if (protbsp == NULL) return NULL;
+
+ prot_sfp = SeqMgrGetNextFeature (protbsp, NULL, 0, FEATDEF_PROT, &fcontext);
+ if (prot_sfp == NULL) {
+ sap = protbsp->annot;
+ while (sap != NULL && prot_sfp == NULL) {
+ if (sap->type == 1) {
+ prot_sfp = sap->data;
+ while (prot_sfp != NULL
+ && (prot_sfp->data.choice != SEQFEAT_PROT
+ || (prp = prot_sfp->data.value.ptrvalue) == NULL
+ || prp->processed != 0)) {
+ prot_sfp = prot_sfp->next;
+ }
+ }
+ sap = sap->next;
+ }
+ }
+ return prot_sfp;
+}
+
+
+NLM_EXTERN Boolean ConvertMiscFeatToCodingRegion (SeqFeatPtr sfp)
+{
+ BioseqPtr bsp, prot_bsp;
+ SeqFeatPtr prot;
+ ProtRefPtr prp;
+
+ if (sfp == NULL || sfp->idx.subtype != FEATDEF_misc_feature) {
+ return FALSE;
+ }
+
+ sfp->data.value.ptrvalue = ImpFeatFree (sfp->data.value.ptrvalue);
+ sfp->data.value.ptrvalue = CdRegionNew ();
+ sfp->data.choice = SEQFEAT_CDREGION;
+ sfp->idx.subtype = 0;
+
+ bsp = BioseqFindFromSeqLoc (sfp->location);
+ if (bsp != NULL) {
+ ExtraCDSCreationActions (sfp, GetBestTopParentForData (bsp->idx.entityID, bsp));
+ if (!StringHasNoText (sfp->comment)) {
+ prot_bsp = BioseqFindFromSeqLoc (sfp->product);
+ prot = GetProtFeature (prot_bsp);
+ if (prot != NULL) {
+ prp = prot->data.value.ptrvalue;
+ ValNodeAddPointer (&prp->name, 0, sfp->comment);
+ sfp->comment = NULL;
+ }
+ }
+ }
+
+ return TRUE;
+}
+
+
NLM_EXTERN void InstantiateMatPeptideProducts (SeqEntryPtr sep)
{
VisitFeaturesInSep (sep, NULL, InstantiateMatPeptideProductForProteinFeature);
diff --git a/api/sqnutils.h b/api/sqnutils.h
index 2765b115..03a2cb4b 100644
--- a/api/sqnutils.h
+++ b/api/sqnutils.h
@@ -29,7 +29,7 @@
*
* Version Creation Date: 9/2/97
*
-* $Revision: 6.377 $
+* $Revision: 6.379 $
*
* File Description:
*
@@ -1746,7 +1746,9 @@ NLM_EXTERN Boolean ConvertImpToImpFunc (SeqFeatPtr sfp, Uint2 featdef_to);
NLM_EXTERN Boolean ConvertRegionToRNAFunc (SeqFeatPtr sfp, Uint2 featdef_to);
NLM_EXTERN Boolean ConvertGeneToMiscFeatFunc (SeqFeatPtr sfp, Uint2 featdef_to);
NLM_EXTERN Boolean ConvertProtToProtFunc (SeqFeatPtr sfp, Uint2 featdef_to);
-
+NLM_EXTERN Boolean ConvertMiscFeatToCodingRegion (SeqFeatPtr sfp);
+NLM_EXTERN void ExtraCDSCreationActions (SeqFeatPtr cds, SeqEntryPtr parent_sep);
+NLM_EXTERN SeqFeatPtr GetProtFeature (BioseqPtr protbsp);
NLM_EXTERN void InstantiateMatPeptideProducts (SeqEntryPtr sep);
@@ -1816,6 +1818,11 @@ NLM_EXTERN Boolean AutoConvertCDSToMiscFeat (SeqFeatPtr cds, Boolean remove_orig
NLM_EXTERN AuthListPtr PNTR GetAuthListForPub (PubPtr the_pub);
NLM_EXTERN void RemoveConsortiumFromPub (PubPtr pub);
+NLM_EXTERN Int4 Extend5PartialSeqIntToEndOrGap (SeqIntPtr sint, BioseqPtr bsp, Boolean short_only);
+NLM_EXTERN Int4 Extend3PartialSeqIntToEndOrGap (SeqIntPtr sint, BioseqPtr bsp, Boolean short_only);
+NLM_EXTERN Int4 ExtendSeqLocToEndOrGap (SeqLocPtr slp, BioseqPtr bsp, Boolean end5);
+
+
#ifdef __cplusplus
}
#endif
diff --git a/api/valid.c b/api/valid.c
index e65ea6e8..ecb40052 100644
--- a/api/valid.c
+++ b/api/valid.c
@@ -29,7 +29,7 @@
*
* Version Creation Date: 1/1/94
*
-* $Revision: 6.1245 $
+* $Revision: 6.1247 $
*
* File Description: Sequence editing utilities
*
@@ -10365,7 +10365,7 @@ NLM_EXTERN Boolean ParseStructuredVoucher (
CharPtr tmp;
if (StringHasNoText (subname)) return FALSE;
- if (StringLen (subname) < 5) return FALSE;
+ if (StringLen (subname) < 3) return FALSE;
TrimSpacesAroundString (subname);
ptr = StringChr (subname, ':');
@@ -13642,6 +13642,22 @@ static Boolean FeatureSequencesIdentical (SeqFeatPtr sfp, SeqFeatPtr lastsfp)
return rsult;
}
+static Boolean GeneXrefsDifferent (SeqFeatPtr sfp, SeqFeatPtr lastsfp)
+
+{
+ SeqFeatPtr gene, lastgene;
+
+ if (sfp == NULL || lastsfp == NULL) return FALSE;
+
+ gene = GetGeneForFeature (sfp);
+ lastgene = GetGeneForFeature (lastsfp);
+ if (gene == NULL || lastgene == NULL) return FALSE;
+
+ if (gene != lastgene) return TRUE;
+
+ return FALSE;
+}
+
static Boolean ValidateBioseqContextIndexed (BioseqPtr bsp, BioseqValidStrPtr bvsp)
{
@@ -14057,6 +14073,9 @@ static Boolean ValidateBioseqContextIndexed (BioseqPtr bsp, BioseqValidStrPtr bv
/* do not report if mRNAs are linked to two different CDSs */
} else if (fcontext.sap == sap) {
if (samelabel) {
+ if (GeneXrefsDifferent (sfp, last)) {
+ severity = SEV_WARNING;
+ }
ValidErr (vsp, severity, ERR_SEQ_FEAT_FeatContentDup, "Duplicate feature");
} else if (featdeftype != FEATDEF_PUB) {
if (fcontext.partialL != partialL || fcontext.partialR != partialR) {
@@ -14099,6 +14118,9 @@ static Boolean ValidateBioseqContextIndexed (BioseqPtr bsp, BioseqValidStrPtr bv
}
} else {
if (samelabel) {
+ if (GeneXrefsDifferent (sfp, last)) {
+ severity = SEV_WARNING;
+ }
ValidErr (vsp, severity, ERR_SEQ_FEAT_FeatContentDup, "Duplicate feature (packaged in different feature table)");
} else if (featdeftype != FEATDEF_PUB) {
if (suppress_duplicate_messages && (featdeftype == FEATDEF_CDS || featdeftype == FEATDEF_mRNA) && HaveUniqueFeatIDXrefs (xref, sfp->xref)) {
diff --git a/checkout.date b/checkout.date
index ed3c8324..0204f581 100644
--- a/checkout.date
+++ b/checkout.date
@@ -1 +1 @@
-Sun Jul 19 10:12:36 EDT 2009
+Sun Aug 9 10:12:32 EDT 2009
diff --git a/connect/ncbi_connection.c b/connect/ncbi_connection.c
index 9370a176..dda0c396 100644
--- a/connect/ncbi_connection.c
+++ b/connect/ncbi_connection.c
@@ -1,4 +1,4 @@
-/* $Id: ncbi_connection.c,v 6.59 2009/07/13 15:04:37 kazimird Exp $
+/* $Id: ncbi_connection.c,v 6.60 2009/07/28 13:04:32 kazimird Exp $
* ===========================================================================
*
* PUBLIC DOMAIN NOTICE
@@ -83,9 +83,9 @@
#define CONN_NOT_NULL(s_c, f_n) CONN_NOT_NULL_EX(s_c, f_n, eIO_InvalidArg)
#ifdef _DEBUG
-# define CONN_TRACE(f_n, msg) CONN_LOG(0, f_n, eLOG_Trace, msg)
+# define CONN_TRACE(f_n, msg) CONN_LOG(0, f_n, eLOG_Trace, msg)
#else
-# define CONN_TRACE(f_n, msg) ((void) 0)
+# define CONN_TRACE(f_n, msg) ((void) 0)
#endif /*_DEBUG*/
@@ -409,9 +409,8 @@ extern EIO_Status CONN_Wait
: eIO_NotSupported;
if (status != eIO_Success) {
- const char* errmsg = (event == eIO_Read
- ? "Read event failed"
- : "Write event failed");
+ static const char* kErrMsg[] = { "Read event failed",
+ "Write event failed" };
ELOG_Level level;
switch (status) {
case eIO_Timeout:
@@ -419,10 +418,8 @@ extern EIO_Status CONN_Wait
level = eLOG_Warning;
else if (timeout->sec | timeout->usec)
level = eLOG_Trace;
- else {
- CONN_TRACE(Wait, errmsg);
+ else
return status;
- }
break;
case eIO_Closed:
level = event == eIO_Read ? eLOG_Trace : eLOG_Error;
@@ -434,7 +431,7 @@ extern EIO_Status CONN_Wait
level = eLOG_Error;
break;
}
- CONN_LOG(14, Wait, level, errmsg);
+ CONN_LOG(14, Wait, level, kErrMsg[event != eIO_Read]);
}
return status;
}
diff --git a/connect/ncbi_socket.c b/connect/ncbi_socket.c
index 30413870..10bba048 100644
--- a/connect/ncbi_socket.c
+++ b/connect/ncbi_socket.c
@@ -1,4 +1,4 @@
-/* $Id: ncbi_socket.c,v 6.282 2009/07/13 15:04:37 kazimird Exp $
+/* $Id: ncbi_socket.c,v 6.283 2009/07/30 16:24:29 kazimird Exp $
* ===========================================================================
*
* PUBLIC DOMAIN NOTICE
@@ -436,8 +436,8 @@ static const char* s_ID(const SOCK sock, char* buf)
/* Put socket description to the message, then log the transferred data
*/
-static void s_DoLog(const SOCK sock, EIO_Event event,
- const void* data, size_t size, const void* ptr)
+static void s_DoLog(ELOG_Level level, const SOCK sock, EIO_Event event,
+ const void* data, size_t size, const void* ptr)
{
const struct sockaddr* sa = (const struct sockaddr*) ptr;
const char* what;
@@ -493,7 +493,7 @@ static void s_DoLog(const SOCK sock, EIO_Event event,
else
strcpy(tail, "???");
}
- CORE_LOGF_X(112, eLOG_Trace,
+ CORE_LOGF_X(112, level,
("%s%s%s", s_ID(sock, _id), head, tail));
break;
@@ -527,11 +527,7 @@ static void s_DoLog(const SOCK sock, EIO_Event event,
*tail = '\0';
}
- CORE_DATAF_EXX(109, !size && data &&
- (sock->type == eDatagram
- || (sock->n_read | sock->n_written))
- ? eLOG_Error : eLOG_Trace,
- data, size,
+ CORE_DATAF_EXX(109, level, data, size,
("%s%.*s%s%s%s", s_ID(sock, _id), n, what,
sock->type == eDatagram
? (event == eIO_Read ? " from " : " to ")
@@ -569,7 +565,7 @@ static void s_DoLog(const SOCK sock, EIO_Event event,
head + 1, sizeof(head) - 1);
} else
*head = '\0';
- CORE_LOGF_X(113, eLOG_Trace,
+ CORE_LOGF_X(113, level,
("%s%s%s (out: %s, in: %s)", s_ID(sock, _id),
ptr ? (const char*) ptr :
sock->keep ? "Leaving" : "Closing", head,
@@ -1783,11 +1779,13 @@ static EIO_Status s_Recv(SOCK sock,
x_error == SOCK_ECONNABORTED ||
x_error == SOCK_ENETRESET))) {
/* statistics & logging */
- if ((x_read < 0 && (sock->n_read | sock->n_written)) ||
+ if (x_read < 0 ||
((sock->log == eOn || (sock->log == eDefault && s_Log == eOn))
&& (!sock->session || flag > 0))) {
- s_DoLog(sock, eIO_Read, (x_read < 0 ? (void*) &x_error :
- x_read > 0 ? buf : 0),
+ s_DoLog(x_read < 0 && sock->n_read && sock->n_written
+ ? eLOG_Error : eLOG_Trace, sock, eIO_Read,
+ x_read < 0 ? (void*) &x_error :
+ x_read > 0 ? buf : 0,
(size_t)(x_read < 0 ? 0 : x_read), 0);
}
@@ -1930,7 +1928,7 @@ static EIO_Status s_Read(SOCK sock,
/* statistics & logging */
if (sock->log == eOn || (sock->log == eDefault && s_Log == eOn)){
- s_DoLog(sock, eIO_Read, x_read > 0 ? x_buf :
+ s_DoLog(eLOG_Trace, sock, eIO_Read, x_read > 0 ? x_buf :
status == eIO_Success ? 0 : (void*) &x_error,
status != eIO_Success ? 0 : x_read, " [decrypt]");
}
@@ -2184,11 +2182,12 @@ static EIO_Status s_Send(SOCK sock,
x_error == SOCK_ENETRESET ||
x_error == SOCK_ECONNABORTED))){
/* statistics & logging */
- if ((x_written < 0 && (sock->n_read | sock->n_written)) ||
+ if (x_written < 0 ||
((sock->log == eOn || (sock->log == eDefault && s_Log == eOn))
&& (!sock->session || flag > 0))) {
- s_DoLog(sock, eIO_Write, (x_written < 0
- ? (void*) &x_error : data),
+ s_DoLog(x_written < 0 && sock->n_read && sock->n_written
+ ? eLOG_Error : eLOG_Trace, sock, eIO_Write,
+ x_written < 0 ? (void*) &x_error : data,
(size_t)(x_written < 0 ? 0 : x_written),
flag < 0 ? "" : 0);
}
@@ -2337,7 +2336,7 @@ static EIO_Status s_WriteData(SOCK sock,
/* statistics & logging */
if (sock->log == eOn || (sock->log == eDefault && s_Log == eOn)) {
- s_DoLog(sock, eIO_Write,
+ s_DoLog(eLOG_Trace, sock, eIO_Write,
status == eIO_Success ? data : (void*) &x_error,
status != eIO_Success ? 0 : *n_written, " [encrypt]");
}
@@ -2685,7 +2684,7 @@ static EIO_Status s_Close(SOCK sock, int abort)
/* statistics & logging */
if (sock->log == eOn || (sock->log == eDefault && s_Log == eOn))
- s_DoLog(sock, eIO_Close, 0, 0, abort ? "Aborting" : 0);
+ s_DoLog(eLOG_Trace, sock, eIO_Close, 0, 0, abort ? "Aborting" : 0);
} else
abort = 1;
@@ -2897,7 +2896,7 @@ static EIO_Status s_Connect(SOCK sock,
/* statistics & logging */
if (sock->log == eOn || (sock->log == eDefault && s_Log == eOn))
- s_DoLog(sock, eIO_Open, 0, 0, &addr.sa);
+ s_DoLog(eLOG_Trace, sock, eIO_Open, 0, 0, &addr.sa);
/* establish connection to the peer */
sock->connected = 0;
@@ -3791,7 +3790,7 @@ static EIO_Status s_Accept(LSOCK lsock,
/* statistics & logging */
if ((*sock)->log == eOn || ((*sock)->log == eDefault && s_Log == eOn))
- s_DoLog(*sock, eIO_Open, 0, 0, &addr.sa);
+ s_DoLog(eLOG_Trace, *sock, eIO_Open, 0, 0, &addr.sa);
return eIO_Success;
}
@@ -4170,7 +4169,7 @@ extern EIO_Status SOCK_CreateOnTopEx(const void* handle,
/* statistics & logging */
if (x_sock->log == eOn || (x_sock->log == eDefault && s_Log == eOn))
- s_DoLog(x_sock, eIO_Open, &peer, 0, &peer.sa);
+ s_DoLog(eLOG_Trace, x_sock, eIO_Open, &peer, 0, &peer.sa);
/* success */
*sock = x_sock;
@@ -5125,7 +5124,7 @@ extern EIO_Status DSOCK_CreateEx(SOCK* sock, TSOCK_Flags flags)
/* statistics & logging */
if ((*sock)->log == eOn || ((*sock)->log == eDefault && s_Log == eOn))
- s_DoLog(*sock, eIO_Open, 0, 0, 0);
+ s_DoLog(eLOG_Trace, *sock, eIO_Open, 0, 0, 0);
return eIO_Success;
}
@@ -5170,7 +5169,7 @@ extern EIO_Status DSOCK_Bind(SOCK sock, unsigned short port)
/* statistics & logging */
if (sock->log == eOn || (sock->log == eDefault && s_Log == eOn))
- s_DoLog(sock, eIO_Open, 0, 0, (struct sockaddr*) &addr);
+ s_DoLog(eLOG_Trace, sock, eIO_Open, 0, 0, (struct sockaddr*) &addr);
return eIO_Success;
}
@@ -5254,7 +5253,7 @@ extern EIO_Status DSOCK_Connect(SOCK sock,
/* statistics & logging */
if (sock->log == eOn || (sock->log == eDefault && s_Log == eOn))
- s_DoLog(sock, eIO_Open, &peer, 0, (struct sockaddr*) &peer);
+ s_DoLog(eLOG_Trace, sock, eIO_Open, &peer, 0, (struct sockaddr*)&peer);
return eIO_Success;
}
@@ -5338,7 +5337,7 @@ extern EIO_Status DSOCK_SendMsg(SOCK sock,
/* statistics & logging */
if (sock->log == eOn || (sock->log == eDefault && s_Log == eOn)){
- s_DoLog(sock, eIO_Write, x_msg, (size_t) x_written,
+ s_DoLog(eLOG_Trace, sock, eIO_Write, x_msg, (size_t) x_written,
(struct sockaddr*) &addr);
}
@@ -5484,7 +5483,7 @@ extern EIO_Status DSOCK_RecvMsg(SOCK sock,
/* statistics & logging */
if (sock->log == eOn || (sock->log == eDefault && s_Log == eOn)){
- s_DoLog(sock, eIO_Read, x_msg, (size_t) x_read,
+ s_DoLog(eLOG_Trace, sock, eIO_Read, x_msg, (size_t) x_read,
(struct sockaddr*) &addr);
}
diff --git a/data/institution_codes.txt b/data/institution_codes.txt
index be1a1d09..d9255588 100644
--- a/data/institution_codes.txt
+++ b/data/institution_codes.txt
@@ -462,6 +462,7 @@ BM<GBR-LONDON> s The Natural History Museum, Department of Botany
BMAM s Beijing Natural History Museum
BMB s Booth Museum of Natural History
BMBN<UK> s Booth Museum of Natural History
+BMCC c Brittany Microbe Culture collection
BMFM-UNAM c Culture Collection of Fungal Pathogens Strains from the Basic Mycology Laboratory of the Department of Microbiology and Parasitology, Faculty of Medicine, UNAM
BMGB s Barbados Museum and Historical Society
BMH s Bournemouth Natural Science Society Museum, herbarium
@@ -799,6 +800,7 @@ CEEF s Escuela Nacional de Ciencias Forestales
CEET s El Colegio de la Frontera Sur, Colleccion de Insectos Asociados a Plantas Cultivadas en la Frontera Sur
CEL s University of Illinois, Crop Sciences Department
CELM s Coleccion Entomologica "Luis Maria Murillo"
+CELMS c Collection of Environmental and Laboratory Microbial Strains
CEMBP s Centre of Excellence in Marine Biology
CEN s EMBRAPA Recursos Geneticos e Biotecnologia - CENARGEN
CENA<BRZ> s Centro de Energia Nuclear na Agricultura, Universidade de Sao Paulo
@@ -909,6 +911,7 @@ CIMNH s Albertson College of Idaho, Orma J. Smith Museum of Natural History
CIMSC c Collezione Instituto di Microbiologia
CINC s University of Cincinnati, Biological Sciences Department
CIP<COL> s Centro de Investigaciones Pesqueras
+CIP<ECU> c International Potato Center
CIP<FRA> c Pasteur Institute Collection, Biological Resource Center of Pasteur Institute (CRBIP)
CIP<PER> b Centro Internacional de las Papas
CIPDE c Collection of Insect Pathogens, Dept. of Entomology
@@ -972,6 +975,16 @@ CMMEX s Universidad Autonoma de Baja California
CMMI s Chinese Academy of Traditional Medicine
CMML s Colorado State University
CMN s Canadian Museum of Nature
+CMN:Annelid s Canadian Museum of Nature, Annelid Collection
+CMN:Bird s Canadian Museum of Nature, Bird Collection
+CMN:Crustacean s Canadian Museum of Nature, Crustacean Collection
+CMN:Fish s Canadian Museum of Nature, Fish Collection
+CMN:GenInvert Canadian Museum of Nature, General Invertebrate Collextion
+CMN:Herp s Canadian Museum of Nature, Amphibian and Reptile Collection
+CMN:Insect s Canadian Museum of Nature, Insect Collection
+CMN:Mammal s Canadian Museum of Nature, Mammal Collection
+CMN:Mollusc s Canadian Museum of Nature, Mollusc Collection
+CMN:Parasite s Canadian Museum of Nature, Parasite Collection
CMNAR s Canadian Museum of Nature, Amphibian and Reptile Collection
CMNC s Canadian Museum of Nature, Neotropical Cerambycidae Collection
CMNFI s Canadian Museum of Nature, Fish Collection
@@ -1335,6 +1348,7 @@ DNHM<USA-UT> s Dinosaur Natural History Museum
DNPM s Setor de Paleontologia do Departamento Nacional de Producao Mineral
DNS s Dundee Naturalists' Society
DO s Societe d'Agriculture Sciences et Arts
+DOA c Department Of Agriculture
DOMO s Collegio Mellerio Rosmini
DOR s Dorset County Museum
DORC s Dorset County Museum
@@ -2429,7 +2443,8 @@ INV s Inverness Museum and Art Gallery
INVA s Invergordon Academy
INVAM c International Culture Collection of (Vesicular) Arbuscular Mycorrhizal Fungi
INVEMAR s Instituto de Investigaciones Marinas de Punta de Betin
-IO s Instituto Oceanografico da Universidade de Sao Paulo
+IO<BRA> s Instituto Oceanografico da Universidade de Sao Paulo
+IO<PRT> s Instituto de Oceanografia da Universidade de Lisboa
IOAN s Shirshov Institute of Oceanography
IOC c Colecao de Culturas de Fungos do Instituto Oswaldo Cruz
IOCAS s Institute of Oceanology, Chinese Academy of Scineces
@@ -2504,6 +2519,7 @@ ISMC s Indiana Department of Natural Resources
ISNHC s State Historical Society of Iowa
ISNP s Istituto Sperimentale per la Nutrizione delle Piante
ISP c International Cooperative Project for Description and Deposition of Type Cultures
+ISPaVe c Centro di Ricerca per la Patologia Vegetale
ISRA s Royal Academy
ISRI c Indonesian Sugar Research Institute, Pusat Penelitian Perkebunan Gula Indonesia
ISS c Collection of Bacteria
@@ -3432,6 +3448,7 @@ MMNH<USA-MN> s Bell Museum of Natural History
MMNHS s Macedonian Museum of Natural History
MMNS s Mississippi Museum of Natural Science
MMP s Museo de Mar del Plata (Argentina)
+MMRF c Marine Microbial Reference Facility
MMS s Montshire Museum of Science
MMTT s Iran National Museum of Natural History
MMUE s Museum of Manchester University
@@ -3515,6 +3532,7 @@ MP<ZAF> s Transvaal Museum
MPA s Ecole National Superieure Agronomique, Biologie et Pathologie Vegetales
MPC s Monterey Peninsula College, Life Science Museum
MPCA s Museo Provincial "Carlos Ameghino"
+MPCNyO s Museo Provincial de Ciencias Naturales, Puerto Madryn
MPCRM s Museo Paleontologico Cittadino della Rocca
MPE s F. R. Long Herbarium
MPEF-PV s Muso Paleontologico Egidio Fergulio
@@ -3813,6 +3831,7 @@ NCE s University of Newcastle upon Tyne, School of Biological Sciences
NCFB c National Collection of Food Bacteria
NCH s Norwich Botanical Society
NCHU s National Chung Hsing University
+NCHU:ZOOL s National Chung Hsing University, Department of Life Science
NCIM c National Collection of Industrial Microorganisms
NCIMB c National Collections of Industrial Food and Marine Bacteria (incorporating the NCFB)
NCIP<IDN> s Pusat Penelitian dan Pengembangan Oseanologi
@@ -5478,6 +5497,7 @@ TU<EST> s University of Tartu
TU<USA-LA> s Tulane University, Museum of Natural History
TUAT s Tokyo University of Agriculture
TUB s Eberhard-Karls-Universitaet Tuebingen, Institut fuer Biologie I
+TUBSB b Tohoku University Brassica Seed Bank
TUC s University of Arizona, Ecology and Evolutionary Biology Department
TUCH s Tribhuvan University, Central Department of Botany
TUFIL s Tokyo University of Fisheries, Ichthyological Laboratory
@@ -6172,13 +6192,14 @@ WNC s University of North Carolina Wilmington, Department of Biology and Marine
WNHM s Oklahoma Baptist University, Webster Natural History Museum
WNLM s Niederoesterreichisches Landesmuseum
WNMU s Western New Mexico University Museum
-WNMU:Bird Western New Mexico University Museum, bird collection
-WNMU:Fish Western New Mexico University Museum, fish collection
-WNMU:Mamm Western New Mexico University Museum, mammal collection
+WNMU:Bird s Western New Mexico University Museum, bird collection
+WNMU:Fish s Western New Mexico University Museum, fish collection
+WNMU:Mamm s Western New Mexico University Museum, mammal collection
WNRE s Whiteshell Nuclear Research Establishment
WNS s Wiesbaden Naturwissenschaftliche Sammlung der Stadt
WNU s Northwest University, Biology Department
WOCB s University of Windsor, Biological Sciences Department
+WOCSB b Wheeler Orchid Collection and Species Bank
WOH s Southwestern Oklahoma State University, Biology Department
WOLL s University of Wollongong, Department of Biological Sciences
WOS s City Museum and Art Gallery
diff --git a/demo/asn2all.c b/demo/asn2all.c
index 9c666e20..f81b5eb5 100644
--- a/demo/asn2all.c
+++ b/demo/asn2all.c
@@ -29,7 +29,7 @@
*
* Version Creation Date: 7/26/04
*
-* $Revision: 1.63 $
+* $Revision: 1.64 $
*
* File Description:
*
@@ -53,7 +53,7 @@
#include <pmfapi.h>
#include <lsqfetch.h>
-#define ASN2ALL_APP_VER "5.1"
+#define ASN2ALL_APP_VER "5.2"
CharPtr ASN2ALL_APPLICATION = ASN2ALL_APP_VER;
diff --git a/demo/asn2gb.c b/demo/asn2gb.c
index 68db0965..7197d7ce 100644
--- a/demo/asn2gb.c
+++ b/demo/asn2gb.c
@@ -29,7 +29,7 @@
*
* Version Creation Date: 10/21/98
*
-* $Revision: 6.133 $
+* $Revision: 6.134 $
*
* File Description: New GenBank flatfile generator application
*
@@ -54,7 +54,7 @@
/* asn2gnbi.h needed to test PUBSEQGetAccnVer in accpubseq.c */
#include <asn2gnbi.h>
-#define ASN2GB_APP_VER "7.1"
+#define ASN2GB_APP_VER "7.2"
CharPtr ASN2GB_APPLICATION = ASN2GB_APP_VER;
diff --git a/demo/asnval.c b/demo/asnval.c
index b8009ad4..30562d98 100644
--- a/demo/asnval.c
+++ b/demo/asnval.c
@@ -29,7 +29,7 @@
*
* Version Creation Date: 11/3/04
*
-* $Revision: 1.96 $
+* $Revision: 1.98 $
*
* File Description:
*
@@ -60,7 +60,7 @@
#include <accpubseq.h>
#endif
-#define ASNVAL_APP_VER "7.2"
+#define ASNVAL_APP_VER "7.3"
CharPtr ASNVAL_APPLICATION = ASNVAL_APP_VER;
@@ -804,7 +804,7 @@ static void DoValidation (
}
xml_header = GetXmlHeaderText(cutoff);
}
- if (!BarcodeValidateOneSeqEntry (ofp, sep, FALSE,
+ if (!BarcodeValidateOneSeqEntry (ofp, sep, TRUE,
vfp->verbosity == 4,
!vfp->has_errors,
xml_header)) {
diff --git a/demo/scantest.c b/demo/scantest.c
index 15701c17..2c7251e1 100644
--- a/demo/scantest.c
+++ b/demo/scantest.c
@@ -29,7 +29,7 @@
*
* Version Creation Date: 1/20/95
*
-* $Revision: 6.56 $
+* $Revision: 6.57 $
*
* File Description:
* template for custom scans of ASN.1 release files
@@ -911,6 +911,39 @@ static void FindMuidCitations (
}
}
+static void FindWholeGraphLocs (
+ SeqGraphPtr sgp,
+ Pointer userdata
+)
+
+{
+ ChangeDataPtr cdp;
+ SeqLocPtr slp;
+ ThrdDataPtr tdp;
+
+ if (sgp == NULL) return;
+ cdp = (ChangeDataPtr) userdata;
+ if (cdp == NULL) return;
+ tdp = cdp->tdp;
+ if (tdp == NULL) return;
+ if (tdp->fp == NULL) return;
+
+ slp = sgp->loc;
+ if (slp == NULL) {
+ if (tdp->verbose) {
+ TSPrintLine (tdp->fp, "GPHLOC", tdp->id, NULL, NULL, "\t");
+ } else {
+ TSPrintLine (tdp->fp, "GPHLOC", tdp->id, NULL, NULL, " ");
+ }
+ } else if (slp->choice == SEQLOC_WHOLE) {
+ if (tdp->verbose) {
+ TSPrintLine (tdp->fp, "GPHWHL", tdp->id, NULL, NULL, "\t");
+ } else {
+ TSPrintLine (tdp->fp, "GPHWHL", tdp->id, NULL, NULL, " ");
+ }
+ }
+}
+
static void RnaProtCmntTrailingCommaFix (
SeqFeatPtr sfp,
Pointer userdata
@@ -1501,6 +1534,7 @@ static void DoReport (
VisitBioSourcesInSep (sep, (Pointer) &cdbefore, LookForSemicolonedVouchers);
VisitFeaturesInSep (sep, (Pointer) &cdbefore, FindCommaInGene);
VisitFeaturesInSep (sep, (Pointer) &cdbefore, FindMuidCitations);
+ VisitGraphsInSep (sep, (Pointer) &cdbefore, FindWholeGraphLocs);
tmp = Se2Bs (sep);
if (! BSEqual (bs, tmp)) {
diff --git a/demo/src_chk.c b/demo/src_chk.c
index cb5ffe30..57741db0 100755
--- a/demo/src_chk.c
+++ b/demo/src_chk.c
@@ -1,398 +1,787 @@
-/* src_chk.c
-* ===========================================================================
-*
-* PUBLIC DOMAIN NOTICE
-* National Center for Biotechnology Information (NCBI)
-*
-* This software/database is a "United States Government Work" under the
-* terms of the United States Copyright Act. It was written as part of
-* the author's official duties as a United States Government employee and
-* thus cannot be copyrighted. This software/database is freely available
-* to the public for use. The National Library of Medicine and the U.S.
-* Government do not place any restriction on its use or reproduction.
-* We would, however, appreciate having the NCBI and the author cited in
-* any work or product based on this material
-*
-* Although all reasonable efforts have been taken to ensure the accuracy
-* and reliability of the software and data, the NLM and the U.S.
-* Government do not and cannot warrant the performance or results that
-* may be obtained by using this software or data. The NLM and the U.S.
-* Government disclaim all warranties, express or implied, including
-* warranties of performance, merchantability or fitness for any particular
-* purpose.
-*
-* ===========================================================================
-*
-* File Name: src_chk.c
-*
-* Author: Colleen Bollin
-*
-* Version Creation Date: 4/12/07
-*
-* $Revision: 1.10 $
-*
-* File Description:
-*
-* Modifications:
-* --------------------------------------------------------------------------
-* Date Name Description of modification
-* ------- ---------- -----------------------------------------------------
-*
-*
-* ==========================================================================
-*/
-
-#include <ncbi.h>
-#include <objall.h>
-#include <objsset.h>
-#include <objsub.h>
-#include <objfdef.h>
-#include <sequtil.h>
-#include <gather.h>
-#include <sqnutils.h>
-#include <explore.h>
-#include <pmfapi.h>
-#define NLM_GENERATED_CODE_PROTO
-#include <asnmacro.h>
-#include <objmacro.h>
-#include <macroapi.h>
-
-#define SRC_CHK_APP_VER "1.0"
-
-CharPtr SRC_CHK_APPLICATION = SRC_CHK_APP_VER;
-
-
-static ValNodePtr CollectFieldList(BioseqPtr bsp)
-{
- BioSourcePtr biop;
- SeqDescrPtr sdp;
- SeqMgrDescContext dcontext;
- ValNodePtr list = NULL, vnp;
-
- for (sdp = SeqMgrGetNextDescriptor (bsp, NULL, Seq_descr_source, &dcontext);
- sdp != NULL;
- sdp = SeqMgrGetNextDescriptor (bsp, sdp, Seq_descr_source, &dcontext)) {
- biop = (BioSourcePtr) sdp->data.ptrvalue;
- vnp = GetSourceQualFieldListFromBioSource (biop);
- ValNodeLink (&list, vnp);
- }
- return list;
-}
-
-
-static void PrintHeader (FILE *fp, ValNodePtr field_list)
-{
- CharPtr txt;
-
- if (fp == NULL || field_list == NULL) {
- return;
- }
- /* first field accession, second field GI, third field tax ID */
- fprintf (fp, "\t\tTaxID");
- while (field_list != NULL) {
- txt = SummarizeFieldType (field_list);
- fprintf (fp, "\t%s", txt);
- txt = MemFree (txt);
- field_list = field_list->next;
- }
- fprintf (fp, "\n");
-}
-
-
-static Int4 GetTaxIdFromOrgRef (OrgRefPtr orp)
-{
- Int4 tax_id = -1;
- ValNodePtr vnp;
- DbtagPtr d;
-
- if (orp != NULL)
- {
- for (vnp = orp->db; vnp != NULL; vnp = vnp->next)
- {
- d = (DbtagPtr) vnp->data.ptrvalue;
- if (StringCmp(d->db, "taxon") == 0)
- {
- tax_id = d->tag->id;
- break;
- }
- }
- }
- return tax_id;
-}
-
-
-static void PrintBioSourceLine (FILE *fp, BioSourcePtr biop, ValNodePtr field_list)
-{
- CharPtr txt;
-
- if (fp == NULL || biop == NULL || field_list == NULL) {
- return;
- }
-
- fprintf (fp, "\t%d", GetTaxIdFromOrgRef(biop->org));
-
- while (field_list != NULL) {
- txt = GetSourceQualFromBioSource (biop, field_list->data.ptrvalue, NULL);
- fprintf (fp, "\t%s", txt == NULL ? "" : txt);
- txt = MemFree (txt);
- field_list = field_list->next;
- }
-}
-
-
-static void PrintBioseqLines (FILE *fp, BioseqPtr bsp, ValNodePtr field_list)
-{
- SeqDescrPtr sdp;
- SeqMgrDescContext dcontext;
- Char id_txt[255], id_txt2[255];
- SeqIdPtr sip, sip_gi = NULL, sip_gb = NULL;
-
- if (fp == NULL || bsp == NULL || field_list == NULL) {
- return;
- }
-
- for (sip = bsp->id; sip != NULL; sip = sip->next) {
- if (sip->choice == SEQID_GENBANK
- || (sip->choice == SEQID_EMBL && sip_gb == NULL)
- || (sip->choice == SEQID_SWISSPROT && sip_gb == NULL)
- || (sip->choice == SEQID_DDBJ && sip_gb == NULL)
- || (sip->choice == SEQID_PIR && sip_gb == NULL)) {
- sip_gb = sip;
- } else if (sip->choice == SEQID_GI) {
- sip_gi = sip;
- }
- }
-
- if (sip_gb == NULL && sip_gi == NULL) {
- SeqIdWrite (SeqIdFindBest (bsp->id, SEQID_GENBANK), id_txt, PRINTID_REPORT, sizeof (id_txt) - 1);
- id_txt2[0] = 0;
- } else {
- if (sip_gb == NULL) {
- id_txt[0] = 0;
- } else {
- SeqIdWrite (sip_gb, id_txt, PRINTID_REPORT, sizeof (id_txt) - 1);
- }
- if (sip_gi == NULL) {
- id_txt2[0] = 0;
- } else {
- SeqIdWrite (sip_gi, id_txt2, PRINTID_REPORT, sizeof (id_txt2) - 1);
- }
- }
-
- for (sdp = SeqMgrGetNextDescriptor (bsp, NULL, Seq_descr_source, &dcontext);
- sdp != NULL;
- sdp = SeqMgrGetNextDescriptor (bsp, sdp, Seq_descr_source, &dcontext)) {
- fprintf (fp, "%s\t%s", id_txt, id_txt2);
- PrintBioSourceLine (fp, sdp->data.ptrvalue, field_list);
- fprintf (fp, "\n");
- }
-}
-
-
-static void PrintBioseqErrorLine (FILE *fp, SeqIdPtr sip)
-{
- Char id_txt[255];
-
- if (fp == NULL || sip == NULL) {
- return;
- }
-
- SeqIdWrite (sip, id_txt, PRINTID_REPORT, sizeof (id_txt) - 1);
-
- if (sip->choice == SEQID_GI) {
- fprintf (fp, "\t%s\n", id_txt);
- } else {
- fprintf (fp, "%s\t\n", id_txt);
- }
-}
-
-
-static Boolean IsAllDigits (CharPtr str)
-{
- CharPtr cp;
-
- if (StringHasNoText (str)) return FALSE;
-
- cp = str;
- while (*cp != 0 && isdigit (*cp)) {
- cp++;
- }
- if (*cp == 0) {
- return TRUE;
- } else {
- return FALSE;
- }
-}
-
-
-static SeqIdPtr SmartGuessMakeId (CharPtr str)
-{
- CharPtr id_txt;
- SeqIdPtr sip = NULL;
-
- if (StringHasNoText (str)) {
- return NULL;
- } else if (StringChr (str, '|') != NULL) {
- sip = MakeSeqID (str);
- } else if (IsAllDigits (str)) {
- id_txt = (CharPtr) MemNew (sizeof (Char) * (StringLen (str) + 4));
- sprintf (id_txt, "gi|%s", str);
- sip = MakeSeqID (id_txt);
- id_txt = MemFree (id_txt);
- } else {
- id_txt = (CharPtr) MemNew (sizeof (Char) * (StringLen (str) + 4));
- sprintf (id_txt, "gb|%s", str);
- sip = MakeSeqID (id_txt);
- id_txt = MemFree (id_txt);
- }
- return sip;
-}
-
-
-/* Args structure contains command-line arguments */
-
-#define i_argInputFile 0
-#define o_argOutputFile 1
-
-Args myargs [] = {
- {"Input File", NULL, NULL, NULL,
- TRUE, 'i', ARG_FILE_IN, 0.0, 0, NULL},
- {"Output File", NULL, NULL, NULL,
- TRUE, 'o', ARG_FILE_OUT, 0.0, 0, NULL}
-};
-
-
-static void SortFieldListForSrcChk (ValNodePtr PNTR field_list)
-{
- ValNodePtr vnp, vnp_s, vnp_prev = NULL;
-
- if (field_list == NULL || *field_list == NULL) return;
-
- SortUniqueFieldTypeList (field_list);
-
- /* move taxname to front of list */
- for (vnp = *field_list; vnp != NULL; vnp_prev = vnp, vnp = vnp->next) {
- if (vnp->choice == FieldType_source_qual) {
- vnp_s = vnp->data.ptrvalue;
- if (vnp_s != NULL
- && vnp_s->choice == SourceQualChoice_textqual
- && vnp_s->data.intvalue == Source_qual_taxname) {
- /* only need to move if not already at front of list */
- if (vnp_prev != NULL) {
- vnp_prev->next = vnp->next;
- vnp->next = *field_list;
- *field_list = vnp;
- }
- break;
- }
- }
- }
-
-
-}
-
-
-Int2 Main(void)
-{
- Char app [64];
- Int4 rval = 0;
- CharPtr id_file, line;
- ReadBufferData rbd;
- ValNodePtr field_list = NULL;
- SeqIdPtr sip;
- ValNodePtr bsp_list = NULL, vnp;
- BioseqPtr bsp;
- FILE *fp;
-
-
- /* standard setup */
-
- ErrSetFatalLevel (SEV_MAX);
- ErrClearOptFlags (EO_SHOW_USERSTR);
- UseLocalAsnloadDataAndErrMsg ();
- ErrPathReset ();
-
- /* finish resolving internal connections in ASN.1 parse tables */
-
- if (! AllObjLoad ()) {
- Message (MSG_FATAL, "AllObjLoad failed");
- return 1;
- }
- if (! SubmitAsnLoad ()) {
- Message (MSG_FATAL, "SubmitAsnLoad failed");
- return 1;
- }
- if (! FeatDefSetLoad ()) {
- Message (MSG_FATAL, "FeatDefSetLoad failed");
- return 1;
- }
- if (! SeqCodeSetLoad ()) {
- Message (MSG_FATAL, "SeqCodeSetLoad failed");
- return 1;
- }
- if (! GeneticCodeTableLoad ()) {
- Message (MSG_FATAL, "GeneticCodeTableLoad failed");
- return 1;
- }
-
- PubSeqFetchEnable ();
-
- /* process command line arguments */
-
- sprintf (app, "src_chk %s", SRC_CHK_APPLICATION);
- if (! GetArgs (app, sizeof (myargs) / sizeof (Args), myargs)) {
- return 0;
- }
-
- id_file = (CharPtr) myargs [i_argInputFile].strvalue;
-
- rbd.fp = FileOpen (id_file, "r");
- if (rbd.fp == NULL) {
- Message (MSG_ERROR, "Unable to open %s", (CharPtr) myargs [i_argInputFile].strvalue);
- return 1;
- }
- rbd.current_data = NULL;
- line = AbstractReadFunction (&rbd);
- while (line != NULL && line[0] != EOF) {
- if (!StringHasNoText (line)) {
-
- sip = SmartGuessMakeId (line);
- bsp = BioseqLockById (sip);
- if (bsp == NULL) {
- printf ("Unable to download Bioseq for %s\n", line);
- } else {
- ValNodeLink (&field_list, CollectFieldList (bsp));
- BioseqUnlock (bsp);
- }
- ValNodeAddPointer (&bsp_list, 0, sip);
- }
- line = MemFree (line);
- line = AbstractReadFunction (&rbd);
- }
-
- FileClose (rbd.fp);
-
- SortFieldListForSrcChk (&field_list);
-
- fp = FileOpen ((CharPtr) myargs [o_argOutputFile].strvalue, "w");
- if (fp == NULL) {
- Message (MSG_ERROR, "Unable to open %s", (CharPtr) myargs [o_argOutputFile].strvalue);
- rval = 1;
- } else {
- PrintHeader (fp, field_list);
- for (vnp = bsp_list; vnp != NULL; vnp = vnp->next) {
- bsp = BioseqLockById (vnp->data.ptrvalue);
- if (bsp == NULL) {
- PrintBioseqErrorLine (fp, vnp->data.ptrvalue);
- } else {
- PrintBioseqLines (fp, bsp, field_list);
- }
- BioseqUnlock (bsp);
- vnp->data.ptrvalue = SeqIdFree (vnp->data.ptrvalue);
- }
- }
- FileClose (fp);
- bsp_list = ValNodeFree (bsp_list);
- field_list = FieldTypeListFree (field_list);
- return rval;
-}
+/* src_chk.c
+* ===========================================================================
+*
+* PUBLIC DOMAIN NOTICE
+* National Center for Biotechnology Information (NCBI)
+*
+* This software/database is a "United States Government Work" under the
+* terms of the United States Copyright Act. It was written as part of
+* the author's official duties as a United States Government employee and
+* thus cannot be copyrighted. This software/database is freely available
+* to the public for use. The National Library of Medicine and the U.S.
+* Government do not place any restriction on its use or reproduction.
+* We would, however, appreciate having the NCBI and the author cited in
+* any work or product based on this material
+*
+* Although all reasonable efforts have been taken to ensure the accuracy
+* and reliability of the software and data, the NLM and the U.S.
+* Government do not and cannot warrant the performance or results that
+* may be obtained by using this software or data. The NLM and the U.S.
+* Government disclaim all warranties, express or implied, including
+* warranties of performance, merchantability or fitness for any particular
+* purpose.
+*
+* ===========================================================================
+*
+* File Name: src_chk.c
+*
+* Author: Colleen Bollin
+*
+* Version Creation Date: 4/12/07
+*
+* $Revision: 1.11 $
+*
+* File Description:
+*
+* Modifications:
+* --------------------------------------------------------------------------
+* Date Name Description of modification
+* ------- ---------- -----------------------------------------------------
+*
+*
+* ==========================================================================
+*/
+
+#include <ncbi.h>
+#include <objall.h>
+#include <objsset.h>
+#include <objsub.h>
+#include <objfdef.h>
+#include <sequtil.h>
+#include <gather.h>
+#include <sqnutils.h>
+#include <explore.h>
+#include <pmfapi.h>
+#define NLM_GENERATED_CODE_PROTO
+#include <asnmacro.h>
+#include <objmacro.h>
+#include <macroapi.h>
+#ifdef INTERNAL_NCBI_SRC_CHK
+#include <accpubseq.h>
+#endif
+
+#define SRC_CHK_APP_VER "1.0"
+
+CharPtr SRC_CHK_APPLICATION = SRC_CHK_APP_VER;
+
+#ifdef INTERNAL_NCBI_SRC_CHK
+static CharPtr dirsubfetchproc = "DirSubBioseqFetch";
+
+static CharPtr dirsubfetchcmd = NULL;
+
+extern Pointer ReadFromDirSub (CharPtr accn, Uint2Ptr datatype, Uint2Ptr entityID);
+extern Pointer ReadFromDirSub (CharPtr accn, Uint2Ptr datatype, Uint2Ptr entityID)
+
+{
+ Char cmmd [256];
+ Pointer dataptr;
+ FILE* fp;
+ Char path [PATH_MAX];
+
+ if (datatype != NULL) {
+ *datatype = 0;
+ }
+ if (entityID != NULL) {
+ *entityID = 0;
+ }
+ if (StringHasNoText (accn)) return NULL;
+
+ if (dirsubfetchcmd == NULL) {
+ if (GetAppParam ("SEQUIN", "DIRSUB", "FETCHSCRIPT", NULL, cmmd, sizeof (cmmd))) {
+ dirsubfetchcmd = StringSaveNoNull (cmmd);
+ }
+ }
+ if (dirsubfetchcmd == NULL) return NULL;
+
+ TmpNam (path);
+
+#ifdef OS_UNIX
+ sprintf (cmmd, "csh %s %s > %s", dirsubfetchcmd, accn, path);
+ system (cmmd);
+#endif
+#ifdef OS_MSWIN
+ sprintf (cmmd, "%s %s -o %s", dirsubfetchcmd, accn, path);
+ system (cmmd);
+#endif
+
+ fp = FileOpen (path, "r");
+ if (fp == NULL) {
+ FileRemove (path);
+ return NULL;
+ }
+ dataptr = ReadAsnFastaOrFlatFile (fp, datatype, entityID, FALSE, FALSE, TRUE, FALSE);
+ FileClose (fp);
+ FileRemove (path);
+ return dataptr;
+}
+
+
+static Int2 LIBCALLBACK DirSubBioseqFetchFunc (Pointer data)
+
+{
+ BioseqPtr bsp;
+ Char cmmd [256];
+ Pointer dataptr;
+ Uint2 datatype;
+ Uint2 entityID;
+ FILE* fp;
+ OMProcControlPtr ompcp;
+ ObjMgrProcPtr ompp;
+ Char path [PATH_MAX];
+ SeqEntryPtr sep = NULL;
+ SeqIdPtr sip;
+ TextSeqIdPtr tsip;
+
+ ompcp = (OMProcControlPtr) data;
+ if (ompcp == NULL) return OM_MSG_RET_ERROR;
+ ompp = ompcp->proc;
+ if (ompp == NULL) return OM_MSG_RET_ERROR;
+ sip = (SeqIdPtr) ompcp->input_data;
+ if (sip == NULL) return OM_MSG_RET_ERROR;
+
+ if (sip->choice != SEQID_GENBANK) return OM_MSG_RET_ERROR;
+ tsip = (TextSeqIdPtr) sip->data.ptrvalue;
+ if (tsip == NULL || StringHasNoText (tsip->accession)) return OM_MSG_RET_ERROR;
+
+ if (dirsubfetchcmd == NULL) {
+ if (GetAppParam ("SEQUIN", "DIRSUB", "FETCHSCRIPT", NULL, cmmd, sizeof (cmmd))) {
+ dirsubfetchcmd = StringSaveNoNull (cmmd);
+ }
+ }
+ if (dirsubfetchcmd == NULL) return OM_MSG_RET_ERROR;
+
+ TmpNam (path);
+
+#ifdef OS_UNIX
+ sprintf (cmmd, "csh %s %s > %s", dirsubfetchcmd, tsip->accession, path);
+ system (cmmd);
+#endif
+#ifdef OS_MSWIN
+ sprintf (cmmd, "%s %s -o %s", dirsubfetchcmd, tsip->accession, path);
+ system (cmmd);
+#endif
+
+ fp = FileOpen (path, "r");
+ if (fp == NULL) {
+ FileRemove (path);
+ return OM_MSG_RET_ERROR;
+ }
+ dataptr = ReadAsnFastaOrFlatFile (fp, &datatype, &entityID, FALSE, FALSE, TRUE, FALSE);
+ FileClose (fp);
+ FileRemove (path);
+
+ if (dataptr == NULL) return OM_MSG_RET_OK;
+
+ sep = GetTopSeqEntryForEntityID (entityID);
+ if (sep == NULL) return OM_MSG_RET_ERROR;
+ bsp = BioseqFindInSeqEntry (sip, sep);
+ ompcp->output_data = (Pointer) bsp;
+ ompcp->output_entityID = ObjMgrGetEntityIDForChoice (sep);
+ return OM_MSG_RET_DONE;
+}
+
+static Boolean DirSubFetchEnable (void)
+
+{
+ ObjMgrProcLoad (OMPROC_FETCH, dirsubfetchproc, dirsubfetchproc,
+ OBJ_SEQID, 0, OBJ_BIOSEQ, 0, NULL,
+ DirSubBioseqFetchFunc, PROC_PRIORITY_DEFAULT);
+ return TRUE;
+}
+
+static CharPtr smartfetchproc = "SmartBioseqFetch";
+
+static CharPtr smartfetchcmd = NULL;
+
+extern Pointer ReadFromSmart (CharPtr accn, Uint2Ptr datatype, Uint2Ptr entityID);
+extern Pointer ReadFromSmart (CharPtr accn, Uint2Ptr datatype, Uint2Ptr entityID)
+
+{
+ Char cmmd [256];
+ Pointer dataptr;
+ FILE* fp;
+ Char path [PATH_MAX];
+
+ if (datatype != NULL) {
+ *datatype = 0;
+ }
+ if (entityID != NULL) {
+ *entityID = 0;
+ }
+ if (StringHasNoText (accn)) return NULL;
+
+ if (smartfetchcmd == NULL) {
+ if (GetAppParam ("SEQUIN", "SMART", "FETCHSCRIPT", NULL, cmmd, sizeof (cmmd))) {
+ smartfetchcmd = StringSaveNoNull (cmmd);
+ }
+ }
+ if (smartfetchcmd == NULL) return NULL;
+
+ TmpNam (path);
+
+#ifdef OS_UNIX
+ sprintf (cmmd, "csh %s %s > %s", smartfetchcmd, accn, path);
+ system (cmmd);
+#endif
+#ifdef OS_MSWIN
+ sprintf (cmmd, "%s %s -o %s", smartfetchcmd, accn, path);
+ system (cmmd);
+#endif
+
+ fp = FileOpen (path, "r");
+ if (fp == NULL) {
+ FileRemove (path);
+ return NULL;
+ }
+ dataptr = ReadAsnFastaOrFlatFile (fp, datatype, entityID, FALSE, FALSE, TRUE, FALSE);
+ FileClose (fp);
+ FileRemove (path);
+ return dataptr;
+}
+
+
+static Int2 LIBCALLBACK SmartBioseqFetchFunc (Pointer data)
+
+{
+ BioseqPtr bsp;
+ Char cmmd [256];
+ Pointer dataptr;
+ Uint2 datatype;
+ Uint2 entityID;
+ FILE* fp;
+ OMProcControlPtr ompcp;
+ ObjMgrProcPtr ompp;
+ Char path [PATH_MAX];
+ SeqEntryPtr sep = NULL;
+ SeqIdPtr sip;
+ TextSeqIdPtr tsip;
+
+ ompcp = (OMProcControlPtr) data;
+ if (ompcp == NULL) return OM_MSG_RET_ERROR;
+ ompp = ompcp->proc;
+ if (ompp == NULL) return OM_MSG_RET_ERROR;
+ sip = (SeqIdPtr) ompcp->input_data;
+ if (sip == NULL) return OM_MSG_RET_ERROR;
+
+ if (sip->choice != SEQID_GENBANK) return OM_MSG_RET_ERROR;
+ tsip = (TextSeqIdPtr) sip->data.ptrvalue;
+ if (tsip == NULL || StringHasNoText (tsip->accession)) return OM_MSG_RET_ERROR;
+
+ if (smartfetchcmd == NULL) {
+ if (GetAppParam ("SEQUIN", "SMART", "FETCHSCRIPT", NULL, cmmd, sizeof (cmmd))) {
+ smartfetchcmd = StringSaveNoNull (cmmd);
+ }
+ }
+ if (smartfetchcmd == NULL) return OM_MSG_RET_ERROR;
+
+ TmpNam (path);
+
+#ifdef OS_UNIX
+ sprintf (cmmd, "csh %s %s > %s", smartfetchcmd, tsip->accession, path);
+ system (cmmd);
+#endif
+#ifdef OS_MSWIN
+ sprintf (cmmd, "%s %s -o %s", smartfetchcmd, tsip->accession, path);
+ system (cmmd);
+#endif
+
+ fp = FileOpen (path, "r");
+ if (fp == NULL) {
+ FileRemove (path);
+ return OM_MSG_RET_ERROR;
+ }
+ dataptr = ReadAsnFastaOrFlatFile (fp, &datatype, &entityID, FALSE, FALSE, TRUE, FALSE);
+ FileClose (fp);
+ FileRemove (path);
+
+ if (dataptr == NULL) return OM_MSG_RET_OK;
+
+ sep = GetTopSeqEntryForEntityID (entityID);
+ if (sep == NULL) return OM_MSG_RET_ERROR;
+ bsp = BioseqFindInSeqEntry (sip, sep);
+ ompcp->output_data = (Pointer) bsp;
+ ompcp->output_entityID = ObjMgrGetEntityIDForChoice (sep);
+ return OM_MSG_RET_DONE;
+}
+
+static Boolean SmartFetchEnable (void)
+
+{
+ ObjMgrProcLoad (OMPROC_FETCH, smartfetchproc, smartfetchproc,
+ OBJ_SEQID, 0, OBJ_BIOSEQ, 0, NULL,
+ SmartBioseqFetchFunc, PROC_PRIORITY_DEFAULT);
+ return TRUE;
+}
+
+static CharPtr tpasmartfetchproc = "TPASmartBioseqFetch";
+
+static CharPtr tpasmartfetchcmd = NULL;
+
+extern Pointer ReadFromTPASmart (CharPtr accn, Uint2Ptr datatype, Uint2Ptr entityID);
+extern Pointer ReadFromTPASmart (CharPtr accn, Uint2Ptr datatype, Uint2Ptr entityID)
+
+{
+ Char cmmd [256];
+ Pointer dataptr;
+ FILE* fp;
+ Char path [PATH_MAX];
+
+ if (datatype != NULL) {
+ *datatype = 0;
+ }
+ if (entityID != NULL) {
+ *entityID = 0;
+ }
+ if (StringHasNoText (accn)) return NULL;
+
+ if (tpasmartfetchcmd == NULL) {
+ if (GetAppParam ("SEQUIN", "TPASMART", "FETCHSCRIPT", NULL, cmmd, sizeof (cmmd))) {
+ tpasmartfetchcmd = StringSaveNoNull (cmmd);
+ }
+ }
+ if (tpasmartfetchcmd == NULL) return NULL;
+
+ TmpNam (path);
+
+#ifdef OS_UNIX
+ sprintf (cmmd, "csh %s %s > %s", tpasmartfetchcmd, accn, path);
+ system (cmmd);
+#endif
+#ifdef OS_MSWIN
+ sprintf (cmmd, "%s %s -o %s", tpasmartfetchcmd, accn, path);
+ system (cmmd);
+#endif
+
+ fp = FileOpen (path, "r");
+ if (fp == NULL) {
+ FileRemove (path);
+ return NULL;
+ }
+ dataptr = ReadAsnFastaOrFlatFile (fp, datatype, entityID, FALSE, FALSE, TRUE, FALSE);
+ FileClose (fp);
+ FileRemove (path);
+ return dataptr;
+}
+
+
+static Int2 LIBCALLBACK TPASmartBioseqFetchFunc (Pointer data)
+
+{
+ BioseqPtr bsp;
+ Char cmmd [256];
+ Pointer dataptr;
+ Uint2 datatype;
+ Uint2 entityID;
+ FILE* fp;
+ OMProcControlPtr ompcp;
+ ObjMgrProcPtr ompp;
+ Char path [PATH_MAX];
+ SeqEntryPtr sep = NULL;
+ SeqIdPtr sip;
+ TextSeqIdPtr tsip;
+
+ ompcp = (OMProcControlPtr) data;
+ if (ompcp == NULL) return OM_MSG_RET_ERROR;
+ ompp = ompcp->proc;
+ if (ompp == NULL) return OM_MSG_RET_ERROR;
+ sip = (SeqIdPtr) ompcp->input_data;
+ if (sip == NULL) return OM_MSG_RET_ERROR;
+
+ if (sip->choice != SEQID_TPG) return OM_MSG_RET_ERROR;
+ tsip = (TextSeqIdPtr) sip->data.ptrvalue;
+ if (tsip == NULL || StringHasNoText (tsip->accession)) return OM_MSG_RET_ERROR;
+
+ if (tpasmartfetchcmd == NULL) {
+ if (GetAppParam ("SEQUIN", "TPASMART", "FETCHSCRIPT", NULL, cmmd, sizeof (cmmd))) {
+ tpasmartfetchcmd = StringSaveNoNull (cmmd);
+ }
+ }
+ if (tpasmartfetchcmd == NULL) return OM_MSG_RET_ERROR;
+
+ TmpNam (path);
+
+#ifdef OS_UNIX
+ sprintf (cmmd, "csh %s %s > %s", tpasmartfetchcmd, tsip->accession, path);
+ system (cmmd);
+#endif
+#ifdef OS_MSWIN
+ sprintf (cmmd, "%s %s -o %s", tpasmartfetchcmd, tsip->accession, path);
+ system (cmmd);
+#endif
+
+ fp = FileOpen (path, "r");
+ if (fp == NULL) {
+ FileRemove (path);
+ return OM_MSG_RET_ERROR;
+ }
+ dataptr = ReadAsnFastaOrFlatFile (fp, &datatype, &entityID, FALSE, FALSE, TRUE, FALSE);
+ FileClose (fp);
+ FileRemove (path);
+
+ if (dataptr == NULL) return OM_MSG_RET_OK;
+
+ sep = GetTopSeqEntryForEntityID (entityID);
+ if (sep == NULL) return OM_MSG_RET_ERROR;
+ bsp = BioseqFindInSeqEntry (sip, sep);
+ ompcp->output_data = (Pointer) bsp;
+ ompcp->output_entityID = ObjMgrGetEntityIDForChoice (sep);
+ return OM_MSG_RET_DONE;
+}
+
+static Boolean TPASmartFetchEnable (void)
+
+{
+ ObjMgrProcLoad (OMPROC_FETCH, tpasmartfetchproc, tpasmartfetchproc,
+ OBJ_SEQID, 0, OBJ_BIOSEQ, 0, NULL,
+ TPASmartBioseqFetchFunc, PROC_PRIORITY_DEFAULT);
+ return TRUE;
+}
+#endif
+
+
+static ValNodePtr CollectFieldList(BioseqPtr bsp)
+{
+ BioSourcePtr biop;
+ SeqDescrPtr sdp;
+ SeqMgrDescContext dcontext;
+ ValNodePtr list = NULL, vnp;
+
+ for (sdp = SeqMgrGetNextDescriptor (bsp, NULL, Seq_descr_source, &dcontext);
+ sdp != NULL;
+ sdp = SeqMgrGetNextDescriptor (bsp, sdp, Seq_descr_source, &dcontext)) {
+ biop = (BioSourcePtr) sdp->data.ptrvalue;
+ vnp = GetSourceQualFieldListFromBioSource (biop);
+ ValNodeLink (&list, vnp);
+ }
+ return list;
+}
+
+
+static void PrintHeader (FILE *fp, ValNodePtr field_list)
+{
+ CharPtr txt;
+
+ if (fp == NULL || field_list == NULL) {
+ return;
+ }
+ /* first field accession, second field GI, third field tax ID */
+ fprintf (fp, "\t\tTaxID");
+ while (field_list != NULL) {
+ txt = SummarizeFieldType (field_list);
+ fprintf (fp, "\t%s", txt);
+ txt = MemFree (txt);
+ field_list = field_list->next;
+ }
+ fprintf (fp, "\n");
+}
+
+
+static Int4 GetTaxIdFromOrgRef (OrgRefPtr orp)
+{
+ Int4 tax_id = -1;
+ ValNodePtr vnp;
+ DbtagPtr d;
+
+ if (orp != NULL)
+ {
+ for (vnp = orp->db; vnp != NULL; vnp = vnp->next)
+ {
+ d = (DbtagPtr) vnp->data.ptrvalue;
+ if (StringCmp(d->db, "taxon") == 0)
+ {
+ tax_id = d->tag->id;
+ break;
+ }
+ }
+ }
+ return tax_id;
+}
+
+
+static void PrintBioSourceLine (FILE *fp, BioSourcePtr biop, ValNodePtr field_list)
+{
+ CharPtr txt;
+
+ if (fp == NULL || biop == NULL || field_list == NULL) {
+ return;
+ }
+
+ fprintf (fp, "\t%d", GetTaxIdFromOrgRef(biop->org));
+
+ while (field_list != NULL) {
+ txt = GetSourceQualFromBioSource (biop, field_list->data.ptrvalue, NULL);
+ fprintf (fp, "\t%s", txt == NULL ? "" : txt);
+ txt = MemFree (txt);
+ field_list = field_list->next;
+ }
+}
+
+
+static void PrintBioseqLines (FILE *fp, BioseqPtr bsp, ValNodePtr field_list)
+{
+ SeqDescrPtr sdp;
+ SeqMgrDescContext dcontext;
+ Char id_txt[255], id_txt2[255];
+ SeqIdPtr sip, sip_gi = NULL, sip_gb = NULL;
+
+ if (fp == NULL || bsp == NULL || field_list == NULL) {
+ return;
+ }
+
+ for (sip = bsp->id; sip != NULL; sip = sip->next) {
+ if (sip->choice == SEQID_GENBANK
+ || (sip->choice == SEQID_EMBL && sip_gb == NULL)
+ || (sip->choice == SEQID_SWISSPROT && sip_gb == NULL)
+ || (sip->choice == SEQID_DDBJ && sip_gb == NULL)
+ || (sip->choice == SEQID_PIR && sip_gb == NULL)) {
+ sip_gb = sip;
+ } else if (sip->choice == SEQID_GI) {
+ sip_gi = sip;
+ }
+ }
+
+ if (sip_gb == NULL && sip_gi == NULL) {
+ SeqIdWrite (SeqIdFindBest (bsp->id, SEQID_GENBANK), id_txt, PRINTID_REPORT, sizeof (id_txt) - 1);
+ id_txt2[0] = 0;
+ } else {
+ if (sip_gb == NULL) {
+ id_txt[0] = 0;
+ } else {
+ SeqIdWrite (sip_gb, id_txt, PRINTID_REPORT, sizeof (id_txt) - 1);
+ }
+ if (sip_gi == NULL) {
+ id_txt2[0] = 0;
+ } else {
+ SeqIdWrite (sip_gi, id_txt2, PRINTID_REPORT, sizeof (id_txt2) - 1);
+ }
+ }
+
+ for (sdp = SeqMgrGetNextDescriptor (bsp, NULL, Seq_descr_source, &dcontext);
+ sdp != NULL;
+ sdp = SeqMgrGetNextDescriptor (bsp, sdp, Seq_descr_source, &dcontext)) {
+ fprintf (fp, "%s\t%s", id_txt, id_txt2);
+ PrintBioSourceLine (fp, sdp->data.ptrvalue, field_list);
+ fprintf (fp, "\n");
+ }
+}
+
+
+static void PrintBioseqErrorLine (FILE *fp, SeqIdPtr sip)
+{
+ Char id_txt[255];
+
+ if (fp == NULL || sip == NULL) {
+ return;
+ }
+
+ SeqIdWrite (sip, id_txt, PRINTID_REPORT, sizeof (id_txt) - 1);
+
+ if (sip->choice == SEQID_GI) {
+ fprintf (fp, "\t%s\n", id_txt);
+ } else {
+ fprintf (fp, "%s\t\n", id_txt);
+ }
+}
+
+
+static Boolean IsAllDigits (CharPtr str)
+{
+ CharPtr cp;
+
+ if (StringHasNoText (str)) return FALSE;
+
+ cp = str;
+ while (*cp != 0 && isdigit (*cp)) {
+ cp++;
+ }
+ if (*cp == 0) {
+ return TRUE;
+ } else {
+ return FALSE;
+ }
+}
+
+
+static SeqIdPtr SmartGuessMakeId (CharPtr str)
+{
+ CharPtr id_txt;
+ SeqIdPtr sip = NULL;
+
+ if (StringHasNoText (str)) {
+ return NULL;
+ } else if (StringChr (str, '|') != NULL) {
+ sip = MakeSeqID (str);
+ } else if (IsAllDigits (str)) {
+ id_txt = (CharPtr) MemNew (sizeof (Char) * (StringLen (str) + 4));
+ sprintf (id_txt, "gi|%s", str);
+ sip = MakeSeqID (id_txt);
+ id_txt = MemFree (id_txt);
+ } else {
+ id_txt = (CharPtr) MemNew (sizeof (Char) * (StringLen (str) + 4));
+ sprintf (id_txt, "gb|%s", str);
+ sip = MakeSeqID (id_txt);
+ id_txt = MemFree (id_txt);
+ }
+ return sip;
+}
+
+
+/* Args structure contains command-line arguments */
+
+#define i_argInputFile 0
+#define o_argOutputFile 1
+
+Args myargs [] = {
+ {"Input File", NULL, NULL, NULL,
+ TRUE, 'i', ARG_FILE_IN, 0.0, 0, NULL},
+ {"Output File", NULL, NULL, NULL,
+ TRUE, 'o', ARG_FILE_OUT, 0.0, 0, NULL}
+};
+
+
+static void SortFieldListForSrcChk (ValNodePtr PNTR field_list)
+{
+ ValNodePtr vnp, vnp_s, vnp_prev = NULL;
+
+ if (field_list == NULL || *field_list == NULL) return;
+
+ SortUniqueFieldTypeList (field_list);
+
+ /* move taxname to front of list */
+ for (vnp = *field_list; vnp != NULL; vnp_prev = vnp, vnp = vnp->next) {
+ if (vnp->choice == FieldType_source_qual) {
+ vnp_s = vnp->data.ptrvalue;
+ if (vnp_s != NULL
+ && vnp_s->choice == SourceQualChoice_textqual
+ && vnp_s->data.intvalue == Source_qual_taxname) {
+ /* only need to move if not already at front of list */
+ if (vnp_prev != NULL) {
+ vnp_prev->next = vnp->next;
+ vnp->next = *field_list;
+ *field_list = vnp;
+ }
+ break;
+ }
+ }
+ }
+
+
+}
+
+
+Int2 Main(void)
+{
+ Char app [64];
+ Int4 rval = 0;
+ CharPtr id_file, line;
+ ReadBufferData rbd;
+ ValNodePtr field_list = NULL;
+ SeqIdPtr sip;
+ ValNodePtr bsp_list = NULL, vnp;
+ BioseqPtr bsp;
+ FILE *fp;
+
+
+ /* standard setup */
+
+ ErrSetFatalLevel (SEV_MAX);
+ ErrClearOptFlags (EO_SHOW_USERSTR);
+ UseLocalAsnloadDataAndErrMsg ();
+ ErrPathReset ();
+
+ /* finish resolving internal connections in ASN.1 parse tables */
+
+ if (! AllObjLoad ()) {
+ Message (MSG_FATAL, "AllObjLoad failed");
+ return 1;
+ }
+ if (! SubmitAsnLoad ()) {
+ Message (MSG_FATAL, "SubmitAsnLoad failed");
+ return 1;
+ }
+ if (! FeatDefSetLoad ()) {
+ Message (MSG_FATAL, "FeatDefSetLoad failed");
+ return 1;
+ }
+ if (! SeqCodeSetLoad ()) {
+ Message (MSG_FATAL, "SeqCodeSetLoad failed");
+ return 1;
+ }
+ if (! GeneticCodeTableLoad ()) {
+ Message (MSG_FATAL, "GeneticCodeTableLoad failed");
+ return 1;
+ }
+
+#ifdef INTERNAL_NCBI_SRC_CHK
+ DirSubFetchEnable ();
+ SmartFetchEnable ();
+ TPASmartFetchEnable ();
+
+ if (! PUBSEQBioseqFetchEnable ("src_chk", FALSE)) {
+ Message (MSG_POSTERR, "PUBSEQBioseqFetchEnable failed");
+ return 1;
+ }
+#else
+ PubSeqFetchEnable ();
+#endif
+
+ /* process command line arguments */
+
+ sprintf (app, "src_chk %s", SRC_CHK_APPLICATION);
+ if (! GetArgs (app, sizeof (myargs) / sizeof (Args), myargs)) {
+ return 0;
+ }
+
+ id_file = (CharPtr) myargs [i_argInputFile].strvalue;
+
+ rbd.fp = FileOpen (id_file, "r");
+ if (rbd.fp == NULL) {
+ Message (MSG_ERROR, "Unable to open %s", (CharPtr) myargs [i_argInputFile].strvalue);
+ return 1;
+ }
+ rbd.current_data = NULL;
+ line = AbstractReadFunction (&rbd);
+ while (line != NULL && line[0] != EOF) {
+ if (!StringHasNoText (line)) {
+
+ sip = SmartGuessMakeId (line);
+ bsp = BioseqLockById (sip);
+ if (bsp == NULL) {
+ printf ("Unable to download Bioseq for %s\n", line);
+ } else {
+ ValNodeLink (&field_list, CollectFieldList (bsp));
+ BioseqUnlock (bsp);
+ }
+ ValNodeAddPointer (&bsp_list, 0, sip);
+ }
+ line = MemFree (line);
+ line = AbstractReadFunction (&rbd);
+ }
+
+ FileClose (rbd.fp);
+
+ SortFieldListForSrcChk (&field_list);
+
+ fp = FileOpen ((CharPtr) myargs [o_argOutputFile].strvalue, "w");
+ if (fp == NULL) {
+ Message (MSG_ERROR, "Unable to open %s", (CharPtr) myargs [o_argOutputFile].strvalue);
+ rval = 1;
+ } else {
+ PrintHeader (fp, field_list);
+ for (vnp = bsp_list; vnp != NULL; vnp = vnp->next) {
+ bsp = BioseqLockById (vnp->data.ptrvalue);
+ if (bsp == NULL) {
+ PrintBioseqErrorLine (fp, vnp->data.ptrvalue);
+ } else {
+ PrintBioseqLines (fp, bsp, field_list);
+ }
+ BioseqUnlock (bsp);
+ vnp->data.ptrvalue = SeqIdFree (vnp->data.ptrvalue);
+ }
+ }
+ FileClose (fp);
+ bsp_list = ValNodeFree (bsp_list);
+ field_list = FieldTypeListFree (field_list);
+
+ return rval;
+}
diff --git a/demo/tbl2asn.c b/demo/tbl2asn.c
index a70afa46..ceae97b3 100644
--- a/demo/tbl2asn.c
+++ b/demo/tbl2asn.c
@@ -29,7 +29,7 @@
*
* Version Creation Date: 5/5/00
*
-* $Revision: 6.295 $
+* $Revision: 6.297 $
*
* File Description:
*
@@ -73,7 +73,7 @@ static char *date_of_compilation = __DATE__;
#include <objmacro.h>
#include <macroapi.h>
-#define TBL2ASN_APP_VER "14.2"
+#define TBL2ASN_APP_VER "14.3"
CharPtr TBL2ASN_APPLICATION = TBL2ASN_APP_VER;
@@ -6175,7 +6175,8 @@ static void ProcessOneRecord (
{
AsnTypePtr atp_bssse;
BioSourcePtr biop;
- BioseqPtr bsp;
+ BioseqPtr bsp, feat_bsp;
+ Boolean already_converted_ids = FALSE;
BioseqSetPtr bssp = NULL;
Char buf [256];
SeqMgrFeatContext context;
@@ -6338,6 +6339,15 @@ static void ProcessOneRecord (
if (datatype == OBJ_SEQANNOT) {
sap = (SeqAnnotPtr) dataptr;
+
+ if (!StringHasNoText (tbl->center) && !already_converted_ids) {
+ feat_bsp = GetBioseqReferencedByAnnot (sap, entityID);
+ if (feat_bsp == NULL) {
+ VisitBioseqsInSep (sep, tbl->center, MakeGenomeCenterID);
+ already_converted_ids = TRUE;
+ }
+ }
+
ProcessOneAnnot (sap, entityID, tbl);
} else {
@@ -6681,7 +6691,7 @@ static void ProcessOneRecord (
}
}
- if (StringDoesHaveText (tbl->center)) {
+ if (StringDoesHaveText (tbl->center) && !already_converted_ids) {
VisitBioseqsInSep (sep, tbl->center, MakeGenomeCenterID);
}
diff --git a/desktop/pubdesc.c b/desktop/pubdesc.c
index 65b7e5e5..a606528b 100644
--- a/desktop/pubdesc.c
+++ b/desktop/pubdesc.c
@@ -29,7 +29,7 @@
*
* Version Creation Date: 7/28/95
*
-* $Revision: 6.70 $
+* $Revision: 6.71 $
*
* File Description:
*
@@ -554,6 +554,9 @@ static CitBookPtr PutATProc (PubdescPagePtr ppp)
{
vnp->choice = 1;
vnp->data.ptrvalue = SaveStringFromTextAndStripNewlines (ppp->xa_info);
+ if (vnp->data.ptrvalue == NULL) {
+ vnp->data.ptrvalue = StringSave ("?");
+ }
}
vnp = ValNodeNew (vnphead);
if (vnp != NULL)
diff --git a/doc/man/Psequin.1 b/doc/man/Psequin.1
index 79eafce7..7957e049 100644
--- a/doc/man/Psequin.1
+++ b/doc/man/Psequin.1
@@ -3,20 +3,20 @@
Psequin \- submit sequences to Genbank, EMBL, and DDBJ
.SH SYNOPSIS
.B Psequin
-[\|\fB-b\fP\|]
-[\|\fB-bse\fP\|]
-[\|\fB-e\fP\|]
-[\|\fB-f\fP\ \fIfilename\fP\|]
-[\|\fB-gc\fP\|]
-[\|\fB-h\fP\|]
-[\|\fB-oldaln\fP\|]
-[\|\fB-oldasn\fP\|]
-[\|\fB-oldgph\fP\|]
-[\|\fB-oldseq\fP\|]
-[\|\fB-oldsource\fP\|]
-[\|\fB-s\fP\|]
-[\|\fB-w\fP\|]
-[\|\fB-x\fP\|]
+[\|\fB\-b\fP\|]
+[\|\fB\-bse\fP\|]
+[\|\fB\-e\fP\|]
+[\|\fB\-f\fP\ \fIfilename\fP\|]
+[\|\fB\-gc\fP\|]
+[\|\fB\-h\fP\|]
+[\|\fB\-oldaln\fP\|]
+[\|\fB\-oldasn\fP\|]
+[\|\fB\-oldgph\fP\|]
+[\|\fB\-oldseq\fP\|]
+[\|\fB\-oldsource\fP\|]
+[\|\fB\-s\fP\|]
+[\|\fB\-w\fP\|]
+[\|\fB\-x\fP\|]
.SH DESCRIPTION
\fBPsequin\fP is a program designed to aid in the submission of
sequences to the GenBank, EMBL, and DDBJ sequence databases. It was
@@ -46,46 +46,46 @@ is edited. It can display features on the sequence during editing, and
allows feature intervals to be adjusted by direct manipulation.
.SH OPTIONS
.TP
-\fB-b\fP
+\fB\-b\fP
Bioseq-set mode
.TP
-\fB-bse\fP
+\fB\-bse\fP
binseqentry mode
.TP
-\fB-e\fP
+\fB\-e\fP
Entrez mode
.TP
-\fB-f\fP\ \fIfilename\fP
+\fB\-f\fP\ \fIfilename\fP
read from \fIfilename\fP
.TP
-\fB-gc\fP
+\fB\-gc\fP
genome center mode
.TP
-\fB-h\fP
+\fB\-h\fP
turn off automatic help
.TP
-\fB-oldaln\fP
+\fB\-oldaln\fP
use old alignment reader
.TP
-\fB-oldasn\fP
+\fB\-oldasn\fP
leave as old ASN.1
.TP
-\fB-oldgph\fP
+\fB\-oldgph\fP
use old graphic view
.TP
-\fB-oldseq\fP
+\fB\-oldseq\fP
use old sequence view
.TP
-\fB-oldsource\fP
+\fB\-oldsource\fP
use old flat-file source format
.TP
-\fB-s\fP
+\fB\-s\fP
subtool mode
.TP
-\fB-w\fP
+\fB\-w\fP
workbench mode
.TP
-\fB-x\fP
+\fB\-x\fP
read from standard input
.SH AUTHOR
The National Center for Biotechnology Information.
diff --git a/doc/man/asn2all.1 b/doc/man/asn2all.1
index 19f76172..a105a5c5 100644
--- a/doc/man/asn2all.1
+++ b/doc/man/asn2all.1
@@ -1,4 +1,4 @@
-.TH ASN2ALL 1 2007-10-19 NCBI "NCBI Tools User's Manual"
+.TH ASN2ALL 1 2009-07-31 NCBI "NCBI Tools User's Manual"
.SH NAME
asn2all \- generate reports from ASN.1 biological data
.SH SYNOPSIS
@@ -47,16 +47,16 @@ decompression.
.PP
In \fBasn2all\fP, the name of the file to be processed is specified by
the \fB\-i\fP command line argument.
-Use \fB-a\ t\fP to indicate that it is a release file and \fB-b\fP to
+Use \fB\-a\ t\fP to indicate that it is a release file and \fB\-b\fP to
indicate that it is binary ASN.1.
A text ASN.1 file obtained from Entrez can be processed by using
-\fB-a\ a\fP instead of \fB-a\ t\ -b\fP.
+\fB\-a\ a\fP instead of \fB\-a\ t\ \-b\fP.
.PP
Nucleotide and protein records can be processed simultaneously.
-Use the \fB-o\fP argument to indicate the nucleotide output file, and
-the \fB-v\fP argument for the protein output file.
+Use the \fB\-o\fP argument to indicate the nucleotide output file, and
+the \fB\-v\fP argument for the protein output file.
.PP
-The \fB-f\fP argument determines the format to be generated, and is
+The \fB\-f\fP argument determines the format to be generated, and is
documented in more detail (along with other options) in the following
section.
.SH OPTIONS
@@ -79,7 +79,9 @@ Input ASN.1 type:
.RS
.PD 0
.IP a
-any (autodetected; default)
+Automatic (default)
+.IP z
+Any
.IP e
Seq-entry
.IP b
@@ -110,6 +112,8 @@ Output Format:
GenBank/GenPept (default)
.IP f
FASTA
+.IP d
+CDS FASTA
.IP t
Sequin-style 5-column feature table
.IP y
@@ -169,7 +173,7 @@ File selection suffix when working with entire directories.
The command
.RS
.sp
- asn2all -i gbpri1.aso -a t -b -f g -o gbpri1.nuc -v gbpri1.prt
+ asn2all \-i gbpri1.aso \-a t \-b \-f g \-o gbpri1.nuc \-v gbpri1.prt
.sp
.RE
will generate GenBank and GenPept reports from \fBgbpri1.aso\fP.
diff --git a/doc/man/asn2fsa.1 b/doc/man/asn2fsa.1
index f7d5082b..ed4da24f 100644
--- a/doc/man/asn2fsa.1
+++ b/doc/man/asn2fsa.1
@@ -1,4 +1,4 @@
-.TH ASN2FSA 1 2005-05-16 NCBI "NCBI Tools User's Manual"
+.TH ASN2FSA 1 2009-07-31 NCBI "NCBI Tools User's Manual"
.SH NAME
asn2fsa \- convert biological sequence data from ASN.1 to FASTA
.SH SYNOPSIS
@@ -54,7 +54,9 @@ Input ASN.1 type:
.RS
.PD 0
.IP a
-any (autodetected; default)
+Automatic (default)
+.IP z
+Any
.IP e
Seq-entry
.IP b
@@ -126,7 +128,7 @@ Protein output file name
File selection substring (\fB.ent\fP by default) [String]
.TP
\fB\-z\fP
-Print quality score gap as -1
+Print quality score gap as \-1
.SH AUTHOR
The National Center for Biotechnology Information.
.SH SEE ALSO
diff --git a/doc/man/asn2gb.1 b/doc/man/asn2gb.1
index a7357724..8cd09d3c 100644
--- a/doc/man/asn2gb.1
+++ b/doc/man/asn2gb.1
@@ -1,10 +1,11 @@
-.TH ASN2GB 1 2007-10-19 NCBI "NCBI Tools User's Manual"
+.TH ASN2GB 1 2009-07-31 NCBI "NCBI Tools User's Manual"
.SH NAME
asn2gb \- convert ASN.1 biological data to a GenBank-style flat format
.SH SYNOPSIS
.B asn2gb
[\|\fB\-\fP\|]
[\|\fB\-A\fP\ \fIaccession\fP\|]
+[\|\fB\-F\fP\|]
[\|\fB\-a\fP\ \fIasn-type\fP\|]
[\|\fB\-b\fP\|]
[\|\fB\-c\fP\|]
@@ -40,10 +41,14 @@ Print usage message
\fB\-A\fP\ \fIaccession\fP
Accession to fetch
.TP
+\fB\-F\fP
+Fetch remote annotations
+.TP
\fB\-a\fP\ \fIasn-type\fP
ASN.1 Type:
.RS
.PD 0
+.IP "[Single record]"
.IP a
Any (autodetected; default)
.IP e
@@ -54,6 +59,9 @@ Bioseq
bioseq-Set
.IP m
seq-subMit
+.IP q
+Catenated
+.IP "[Release file; components individually processed and freed]"
.IP t
baTch bioseq-set
.IP u
@@ -195,10 +203,14 @@ Report
.IP 2
Sequin/Release
.IP 3
-asn2gb/asn2flat
+asn2gb SSEC/nocleanup
.IP 4
asn2flat BSEC/nocleanup
.IP 5
+asn2gb/asn2flat
+.IP 6
+asn2gb NEW dbxref/OLD dbxref
+.IP 7
oldasn2gb/newasn2gb
.PD
.RE
diff --git a/doc/man/asn2idx.1 b/doc/man/asn2idx.1
index 87a57839..8b3275aa 100644
--- a/doc/man/asn2idx.1
+++ b/doc/man/asn2idx.1
@@ -1,4 +1,4 @@
-.TH ASN2IDX 1 2007-10-19 NCBI "NCBI Tools User's Manual"
+.TH ASN2IDX 1 2008-12-13 NCBI "NCBI Tools User's Manual"
.SH NAME
asn2idx \- index ASN.1 Bioseq-sets for fast access to individual sequences
.SH SYNOPSIS
@@ -29,7 +29,7 @@ Bioseq-sets are Binary
Required Subdirectory
.TP
\fB\-f\fP\ \fIfilter\fP
-Filter (default = \fBgbcon,gbest,gbgss,gbsts\fP)
+Filter (default = \fBgbcon,gbest,gbgss,gbhtg,gbsts\fP)
.TP
\fB\-p\fP\ \fIpath\fP
Path to Files
diff --git a/doc/man/asnval.1 b/doc/man/asnval.1
index 29673f03..23980291 100644
--- a/doc/man/asnval.1
+++ b/doc/man/asnval.1
@@ -1,14 +1,16 @@
-.TH ASNVAL 1 2007-10-19 NCBI "NCBI Tools User's Manual"
+.TH ASNVAL 1 2009-07-31 NCBI "NCBI Tools User's Manual"
.SH NAME
asnval \- validate ASN.1 biological sequence records
.SH SYNOPSIS
.B asnval
[\|\fB\-\fP\|]
[\|\fB\-A\fP\|]
+[\|\fB\-B\fP\|]
[\|\fB\-C\fP\ \fIN\fP\|]
[\|\fB\-E\fP\ \fIstr\fP\|]
[\|\fB\-G\fP\|]
[\|\fB\-J\fP\|]
+[\|\fB\-K\fP\|]
[\|\fB\-L\fP\ \fIfilename\fP\|]
[\|\fB\-M\fP\|]
[\|\fB\-N\fP\ \fIflags\fP\|]
@@ -46,6 +48,9 @@ Print usage message
\fB\-A\fP
Validate Alignments
.TP
+\fB\-B\fP
+Validate Barcodes
+.TP
\fB\-C\fP\ \fIN\fP
Max count
.TP
@@ -58,6 +63,9 @@ GI lookup from accession
\fB\-J\fP
Require ISO-JTA?
.TP
+\fB\-K\fP
+Summary to error file
+.TP
\fB\-L\fP\ \fIfilename\fP
Log File
.TP
@@ -90,9 +98,9 @@ informational
.IP 2
warning
.IP 3
-error (default for \fB-Q\fP)
+error (default for \fB\-Q\fP)
.IP 4
-grounds for rejection (default for \fB-P\fP, \fB-R\fP)
+grounds for rejection (default for \fB\-P\fP, \fB\-R\fP)
.PD
.RE
.TP
@@ -122,7 +130,9 @@ Input ASN.1 type:
.RS
.PD 0
.IP a
-Any (autodetected; default)
+Automatic (default)
+.IP z
+Any
.IP e
seq-Entry
.IP b
@@ -172,7 +182,7 @@ Remote Fetching from ID
Recurse
.TP
\fB\-v\fP\ \fIN\fP
-Verbosity, from 0 (default) to 3
+Verbosity, from \fB0\fP to \fB4\fP (\fB1\fP by default)
.TP
\fB\-x\fP\ \fIstr\fP
File selection substring (\fB.ent\fP by default)
diff --git a/doc/man/blast.1 b/doc/man/blast.1
index e6205e04..8a007f0e 100644
--- a/doc/man/blast.1
+++ b/doc/man/blast.1
@@ -1,4 +1,4 @@
-.TH BLAST 1 2007-10-19 NCBI "NCBI Tools User's Manual"
+.TH BLAST 1 2009-08-02 NCBI "NCBI Tools User's Manual"
.SH NAME
bl2seq, blast, blastall, blastall_old, blastcl3, blastpgp, impala, megablast, rpsblast, seedtop \- Basic Local Alignment Search Tool
.SH SYNOPSIS
@@ -32,7 +32,10 @@ bl2seq, blast, blastall, blastall_old, blastcl3, blastpgp, impala, megablast, rp
[\|\fB\-r\fP\ \fIN\fP\|]
[\|\fB\-t\fP\ \fIN\fP\|]
.PP
-.B blast
+\" Debian renames blast to blast2 to avoid clashing with an unrelated
+\" blast executable.
+.ds bx blast
+.B \*(bx
[\|\fB\-\fP\|]
[\|\fB\-B\fP\ \fIN\fP\|]
[\|\fB\-D\fP\ \fIN\fP\|]
@@ -372,6 +375,7 @@ bl2seq, blast, blastall, blastall_old, blastcl3, blastpgp, impala, megablast, rp
[\|\fB\-G\fP\ \fIN\fP\|]
[\|\fB\-I\fP\|]
[\|\fB\-J\fP\|]
+[\|\fB\-K\fP\ \fIN\fP\|]
[\|\fB\-M\fP\ \fIstr\fP\|]
[\|\fB\-O\fP\ \fIfilename\fP\|]
[\|\fB\-S\fP\ \fIN\fP\|]
@@ -395,7 +399,7 @@ are documented together because they have a lot of common options.
the blastn or blastp algorithm. Both sequences must be either
nucleotides or proteins.
.PP
-\fBblast\fP compares a sequence against either a local database or a
+\fB\*(bx\fP compares a sequence against either a local database or a
second sequence; it incorporates most of the functionality of both
\fBbl2seq\fP and \fBblastall\fP, but uses a semi-experimental new
internal engine.
@@ -404,7 +408,7 @@ internal engine.
local database for a sequence.
\fBblastall\fP uses a newer engine than \fBblastall_old\fP by default,
but supports using the older engine as well (when invoked with the
-option \fB-V\ F\fP).
+option \fB\-V\ F\fP).
.PP
\fBblastcl3\fP accesses the newest NCBI BLAST search engine (version
2.0). The software behind BLAST version 2.0 was written from scratch
@@ -428,7 +432,7 @@ compare two large sets of sequences against each other.
.PP
\fBrpsblast\fP (Reverse PSI-BLAST) searches a query sequence against a
database of profiles. This is the opposite of PSI-BLAST that searches
-a profile against a database of sequences, hence the 'Reverse'.
+a profile against a database of sequences, hence the `Reverse'.
\fBrpsblast\fP uses a BLAST-like algorithm, finding single- or
double-word hits and then performing an ungapped extension on these
candidate matches. If a sufficiently high-scoring ungapped alignment
@@ -485,7 +489,7 @@ Input sequences in the form of accession.version
Multiple Hits window size; generally defaults to 0 (for single-hit
extensions), but defaults to 40 when using discontiguous templates.
.TP
-\fB\-B\fP\ \fIN\fP (blast)
+\fB\-B\fP\ \fIN\fP (\*(bx)
Produce on-the-fly output:
.RS
.PD 0
@@ -508,15 +512,16 @@ Number of concatenated queries, in blastn or tblastn mode
\fB\-B\fP\ \fIfilename\fP (blastpgp)
Input Alignment File for PSI-BLAST Restart
.TP
-\fB\-C\fP\ \fIX\fP (blast, blastall, blastall_old, blastcl3)
+\fB\-C\fP\ \fIX\fP (\*(bx, blastall, blastall_old, blastcl3)
Use composition-based statistics for blastp or tblastn:
.RS
.PD 0
-.IP "D or d"
-Default (equivalent to \fBT\fP)
+.IP "T, t, D, or d"
+Default (equivalent to \fB1\fP for \fB\*(bx\fP and \fBblastall_old\fP
+and to \fB2\fP for \fBblastall\fP and \fBblastcl3\fP)
.IP "0, F, or f"
No composition-based statistics
-.IP "1, T, or t"
+.IP 1
Composition-based statistics as in \fINAR\fP 29:2994-3005, 2001
.IP 2
Composition-based score adjustment as in \fIBioinformatics\fP 21:902-911,
@@ -528,7 +533,7 @@ Composition-based score adjustment as in \fIBioinformatics\fP 21:902-911,
.RE
.RS
When enabling statistics in blastall, blastall_old, or blastcl3 (\fIi.e.\fP,
-not blast), appending \fBu\fP (case-insensitive) to the mode enables
+not \*(bx), appending \fBu\fP (case-insensitive) to the mode enables
use of unified p-values combining alignment and compositional p-values
in round 1 only.
.RE
@@ -550,7 +555,7 @@ tabular
.PD
.RE
.TP
-\fB\-D\fP\ \fIN\fP (blast, blastall, blastall_old, blastcl3)
+\fB\-D\fP\ \fIN\fP (\*(bx, blastall, blastall_old, blastcl3)
Translate sequences in the database according to genetic code \fIN\fP
in /usr/share/ncbi/data/gc.prt (default is 1; only applies to tblast*)
.TP
@@ -577,39 +582,39 @@ incremental binary ASN.1
Cost decline to align (default = 99999)
.TP
\fB\-E\fP\ \fIN\fP (bl2seq, blastcl3, megablast)
-Extending a gap costs \fIN\fP (-1 invokes default behavior)
+Extending a gap costs \fIN\fP (\-1 invokes default behavior)
.TP
-\fB\-E\fP\ \fIN\fP (blast, blastall, blastall_old)
-Extending a gap costs \fIN\fP (-1 invokes default behavior:
+\fB\-E\fP\ \fIN\fP (\*(bx, blastall, blastall_old)
+Extending a gap costs \fIN\fP (\-1 invokes default behavior:
non-affine if greedy, 2 otherwise)
.TP
\fB\-E\fP\ \fIN\fP (blastpgp, impala, seedtop)
Extending a gap costs \fIN\fP (default is 1)
.TP
-\fB\-F\fP\ \fIstr\fP (bl2seq, blast, blastall, blastall_old, blastpgp,
+\fB\-F\fP\ \fIstr\fP (bl2seq, \*(bx, blastall, blastall_old, blastpgp,
blastcl3, impala, megablast, rpsblast)
Filter options for DUST or SEG; defaults to \fBT\fP for bl2seq,
-blast, blastall, blastall_old, blastcl3, and megablast, and to
+\*(bx, blastall, blastall_old, blastcl3, and megablast, and to
\fBF\fP for blastpgp, impala, and rpsblast.
.TP
\fB\-F\fP (seedtop)
Filter sequence with SEG.
.TP
\fB\-G\fP\ \fIN\fP (bl2seq, blastcl3, megablast)
-Opening a gap costs \fIN\fP (-1 invokes default behavior)
+Opening a gap costs \fIN\fP (\-1 invokes default behavior)
.TP
-\fB\-G\fP\ \fIN\fP (blast, blastall, blastall_old)
-Opening a gap costs \fIN\fP (-1 invokes default behavior: non-affine
+\fB\-G\fP\ \fIN\fP (\*(bx, blastall, blastall_old)
+Opening a gap costs \fIN\fP (\-1 invokes default behavior: non-affine
if greedy, 5 if using dynamic programming)
.TP
\fB\-G\fP\ \fIN\fP (blastpgp, impala, seedtop)
Opening a gap costs \fIN\fP (default is 11)
.TP
-\fB\-H\fP (blast)
+\fB\-H\fP (\*(bx)
Produce HTML output
.TP
\fB\-H\fP\ \fIN\fP (blastpgp)
-End of required region in query (-1 indicates end of query)
+End of required region in query (\-1 indicates end of query)
.TP
\fB\-H\fP (impala)
Print help (different from usage message)
@@ -617,41 +622,46 @@ Print help (different from usage message)
\fB\-H\fP\ \fIN\fP (megablast)
Maximal number of HSPs to save per database sequence (default is 0, unlimited)
.TP
-\fB\-I\fP\ \(dq\fIstart\ stop\fP\(dq (bl2seq, blast)
+\fB\-I\fP\ \(dq\fIstart\ stop\fP\(dq (bl2seq, \*(bx)
Location on first (query) sequence (applies only if file specified
-with \fB-i\fP contains a single sequence)
+with \fB\-i\fP contains a single sequence)
.TP
\fB\-I\fP (blastall, blastall_old, blastcl3, blastpgp, impala, megablast,
rpsblast, seedtop)
Show GIs in deflines
.TP
-\fB\-J\fP\ \(dq\fIstart\ stop\fP\(dq (bl2seq, blast)
+\fB\-J\fP\ \(dq\fIstart\ stop\fP\(dq (bl2seq, \*(bx)
Location on second (subject) sequence (applies only if file specified
-with \fB-j\fP contains a single sequence)
+with \fB\-j\fP contains a single sequence)
.TP
\fB\-J\fP (blastall, blastall_old, blastcl3, blastpgp, impala, megablast,
rpsblast, seedtop)
Believe the query defline
.TP
-\fB\-K\fP\ \fIN\fP (blast, blastall, blastall_old, blastcl3, blastpgp)
-Number of best hits from a region to keep (off by default, if used a
-value of 100 is recommended)
+\fB\-K\fP\ \fIN\fP (\*(bx, blastall, blastall_old, blastcl3, blastpgp)
+Number of best hits from a region to keep.
+Off by default.
+If used a value of 100 is recommended.
+Very high values of \fB\-v\fP or \fB\-b\fP are also suggested.
+.TP
+\fB\-K\fP\ \fIN\fP (seedtop)
+Internal hit buffer size multiplier (wrt query length; default = 2)
.TP
-\fB\-L\fP (blast)
+\fB\-L\fP (\*(bx)
Use (classical Mega BLAST) lookup table with width 12
.TP
\fB\-L\fP\ \fIstart,stop\fP (blastall, blastall_old, blastcl3, megablast,
rpsblast)
Location on query sequence (for rpsblast, only valid in blastp mode)
.TP
-\fB\-M\fP\ \fIstr\fP (bl2seq, blast, blastall, blastall_old, blastcl3,
+\fB\-M\fP\ \fIstr\fP (bl2seq, \*(bx, blastall, blastall_old, blastcl3,
blastpgp, impala, seedtop)
Use matrix \fIstr\fP (default = BLOSUM62)
.TP
\fB\-M\fP\ \fIN\fP (megablast)
Maximal total length of queries for a single search (default = 5000000)
.TP
-\fB\-N\fP (blast)
+\fB\-N\fP (\*(bx)
Show only accessions for sequence IDs in tabular output
.TP
\fB\-N\fP\ \fIX\fP (blastpgp, rpsblast)
@@ -676,7 +686,7 @@ Write (ASN.1) sequence alignments to \fIfilename\fP; only valid for
blastpgp, impala, rpsblast, and seedtop with \fB\-J\fP, and only valid
for megablast with \fB\-D2\fP.
.TP
-\fB\-P\fP\ \fIX\fP (blast)
+\fB\-P\fP\ \fIX\fP (\*(bx)
Identity percentage cut-off
.TP
\fB\-P\fP\ \fIN\fP (blastall, blastall_old, blastcl3, blastpgp, rpsblast)
@@ -689,7 +699,7 @@ Read matrix profiles from database \fIfilename\fP
\fB\-P\fP\ \fIN\fP (megablast)
Maximal number of positions for a hash value (set to 0 [default] to ignore)
.TP
-\fB\-Q\fP\ \fIN\fP (blast, blastall, blastall_old, blastcl3)
+\fB\-Q\fP\ \fIN\fP (\*(bx, blastall, blastall_old, blastcl3)
Translate query according to genetic code \fIN\fP in
/usr/share/ncbi/data/gc.prt (default is 1)
.TP
@@ -697,9 +707,9 @@ Translate query according to genetic code \fIN\fP in
Output File for PSI-BLAST Matrix in ASCII
.TP
\fB\-Q\fP\ \fIfilename\fP (megablast)
-Masked query output; requires \fB-D\ 2\fP
+Masked query output; requires \fB\-D\ 2\fP
.TP
-\fB\-R\fP (blast)
+\fB\-R\fP (\*(bx)
Compute locally optimal Smith-Waterman alignments.
(This option is only available for gapped tblastn.)
.TP
@@ -715,7 +725,7 @@ Input File for PSI-BLAST Restart
\fB\-R\fP (megablast)
Report the log information at the end of output
.TP
-\fB\-S\fP\ \fIN\fP (bl2seq, blast, blastall, blastall_old, blastcl3,
+\fB\-S\fP\ \fIN\fP (bl2seq, \*(bx, blastall, blastall_old, blastcl3,
megablast)
Query strands to search against database for blastn, blastx, tblastx:
.RS
@@ -739,7 +749,7 @@ Cutoff cost (default = 30)
rpsblast)
Produce HTML output
.TP
-\fB\-T\fP\ \fIN\fP (blast)
+\fB\-T\fP\ \fIN\fP (\*(bx)
Type of a discontiguous word template:
.RS
.PD 0
@@ -759,10 +769,10 @@ Use lower case filtering for the query sequence
\fB\-V\fP (bl2seq, blastall, megablast)
Force use of legacy engine
.TP
-\fB\-V\fP (blast)
+\fB\-V\fP (\*(bx)
Use variable word size approach to database scanning
.TP
-\fB\-W\fP\ \fIN\fP (bl2seq, blast, blastall, blastall_old, blastcl3,
+\fB\-W\fP\ \fIN\fP (bl2seq, \*(bx, blastall, blastall_old, blastcl3,
blastpgp, megablast, rpsblast)
Use words of size \fIN\fP (length of best perfect match; zero invokes
default behavior, except with megablast, which defaults to 28, and
@@ -770,7 +780,7 @@ blastpgp, which defaults to 3. The default values for the other
commands vary with "program": 11 for blastn, 28 for megablast, and 3
for everything else.)
.TP
-\fB\-X\fP\ \fIN\fP (bl2seq, blast, blastall, blastall_old, blastcl3,
+\fB\-X\fP\ \fIN\fP (bl2seq, \*(bx, blastall, blastall_old, blastcl3,
blastpgp, megablast, rpsblast, seedtop)
X dropoff value for gapped alignment (in bits) (zero invokes default
behavior, except with megablast, which defaults to 20, and rpsblast
@@ -778,39 +788,43 @@ and seedtop, which default to 15. The default values for the other
commands vary with "program": 30 for blastn, 20 for megablast, 0 for
tblastx, and 15 for everything else.)
.TP
-\fB\-Y\fP\ \fIX\fP (bl2seq, blast, blastall, blastall_old, blastcl3,
+\fB\-Y\fP\ \fIX\fP (bl2seq, \*(bx, blastall, blastall_old, blastcl3,
blastpgp, megablast, rpsblast)
Effective length of the search space (use zero for the real size)
.TP
-\fB\-Z\fP\ \fIN\fP (blast, blastall, blastall_old, blastcl3, blastpgp,
+\fB\-Z\fP\ \fIN\fP (\*(bx, blastall, blastall_old, blastcl3, blastpgp,
megablast, rpsblast)
X dropoff value for final [dynamic programming?] gapped alignment in
-bits (default is 50 for blastn and megablast, 0 for tblastx, 25 for
+bits (default is 100 for blastn and megablast, 0 for tblastx, 25 for
others)
.TP
\fB\-a\fP\ \fIfilename\fP (bl2seq)
Write text ASN.1 output to \fIfilename\fP
.TP
-\fB\-a\fP\ \fIN\fP (blast, blastall, blastall_old, blastcl3, blastpgp,
+\fB\-a\fP\ \fIN\fP (\*(bx, blastall, blastall_old, blastcl3, blastpgp,
impala, megablast, rpsblast)
Number of threads to use (default is one)
.TP
-\fB\-b\fP\ \fIN\fP (blast, blastall, blastall_old, blastcl3, blastpgp,
+\fB\-b\fP\ \fIN\fP (\*(bx, blastall, blastall_old, blastcl3, blastpgp,
impala, megablast, rpsblast)
Number of database sequences to show alignments for (B) (default is 250)
.TP
-\fB\-c\fP (blast)
+\fB\-c\fP (\*(bx)
Mask lower case
.TP
-\fB\-c\fP\ \fIN\fP (blastpgp, impala)
-Constant in pseudocounts for multipass version (default is 9)
+\fB\-c\fP\ \fIN\fP (impala)
+Constant in pseudocounts for multipass version; 0 (default) uses
+entropy method; otherwise a value near 30 is recommended
+.TP
+\fB\-c\fP\ \fIN\fP (impala)
+Constant in pseudocounts for multipass version (default is 10)
.TP
\fB\-d\fP\ \fIN\fP (bl2seq)
Use theoretical DB size of \fIN\fP (zero stands for the real size)
.TP
-\fB\-d\fP\ \fIstr\fP (blast, blastall, blastall_old, blastcl3, blastpgp,
+\fB\-d\fP\ \fIstr\fP (\*(bx, blastall, blastall_old, blastcl3, blastpgp,
impala, megablast, seedtop)
-Database to use (default is nr for all executables except blast,
+Database to use (default is nr for all executables except \*(bx,
which requires a second FASTA sequence if this is not set)
.TP
\fB\-d\fP\ \fIfilename\fP (rpsblast)
@@ -819,7 +833,7 @@ RPS BLAST Database
\fB\-e\fP\ \fIX\fP
Expectation value (E) (default = 10.0)
.TP
-\fB\-f\fP\ \fIX\fP (blast, blastall, blastall_old, blastcl3)
+\fB\-f\fP\ \fIX\fP (\*(bx, blastall, blastall_old, blastcl3)
Threshold for extending hits, default if zero: 0 for blastn and
megablast, 11 for blastp, 12 for blastx, and 13 for tblasn and
tblastx.
@@ -828,7 +842,7 @@ tblastx.
Threshold for extending hits (default 11)
.TP
\fB\-f\fP (megablast)
-Show full IDs in the output (default - only GIs or accessions)
+Show full IDs in the output (default: only GIs or accessions)
.TP
\fB\-f\fP (seedtop)
Force searching for patterns even if they are too likely
@@ -836,14 +850,14 @@ Force searching for patterns even if they are too likely
\fB\-g\ F\fP (bl2seq, blastall, blastall_old, blastcl3)
Do not perform gapped alignment (N/A for tblastx)
.TP
-\fB\-g\fP (blast)
+\fB\-g\fP (\*(bx)
Use greedy algorithm for gapped extensions
.TP
\fB\-g\ F\fP (megablast)
Make discontiguous megablast generate words for every base of the
database (mandatory with the current BLAST engine)
.TP
-\fB\-h\fP\ \fIN\fP (blast)
+\fB\-h\fP\ \fIN\fP (\*(bx)
Frame shift penalty for out-of-frame gapping (blastx, tblastn only;
default is zero)
.TP
@@ -855,13 +869,13 @@ for blastpgp, 0.005 for impala)
Read (first, query) sequence or set from \fIfilename\fP (default is
stdin; not needed for blastpgp if restarting from scoremat)
.TP
-\fB\-j\fP\ \fIfilename\fP (bl2seq, blast)
+\fB\-j\fP\ \fIfilename\fP (bl2seq, \*(bx)
Read second (subject) sequence or set from \fIfilename\fP
.TP
\fB\-j\fP\ \fIN\fP (blastpgp)
Maximum number of passes to use in multipass version (default = 1)
.TP
-\fB\-k\fP\ \fIstr\fP (blast)
+\fB\-k\fP\ \fIstr\fP (\*(bx)
Pattern for PHI-BLAST
.TP
\fB\-k\fP\ \fIfilename\fP (blastpgp, seedtop)
@@ -876,7 +890,7 @@ Log messages to \fIfilename\fP rather than standard error.
\fB\-m\fP (bl2seq)
Use Mega Blast for search
.TP
-\fB\-m\fP\ \fIN\fP (blast, blastall, blastall_old, blastcl3, blastpgp,
+\fB\-m\fP\ \fIN\fP (\*(bx, blastall, blastall_old, blastcl3, blastpgp,
impala, megablast, rpsblast)
alignment view options:
.RS
@@ -908,7 +922,7 @@ ASN.1 binary (not available for impala or rpsblast)
.PD
.RE
.TP
-\fB\-n\fP (blast)
+\fB\-n\fP (\*(bx)
Show GIs in sequence IDs
.TP
\fB\-n\fP (blastall, blastall_old, blastcl3)
@@ -920,7 +934,7 @@ Use non-greedy (dynamic programming) extension for affine gap scores
\fB\-o\fP\ \fIfilename\fP
Write final alignment report to \fIfilename\fP rather than stdout
.TP
-\fB\-p\fP\ \fIstr\fP (bl2seq, blast, blastall, blastall_old, blastcl3)
+\fB\-p\fP\ \fIstr\fP (bl2seq, \*(bx, blastall, blastall_old, blastcl3)
Use the "program" (comparison type) \fIstr\fP. The \fBDESCRIPTION\fP
section covers this option in more detail.
.TP
@@ -944,10 +958,10 @@ indicates which sequences contain a pattern
.PD
.RE
.TP
-\fB\-q\fP\ \fIN\fP (bl2seq, blast, blastall, blastall_old, blastcl3,
+\fB\-q\fP\ \fIN\fP (bl2seq, \*(bx, blastall, blastall_old, blastcl3,
megablast, seedtop)
-Penalty for a nucleotide mismatch (blastn only) (default = -10 for
-seedtop, -3 for everything else)
+Penalty for a nucleotide mismatch (blastn only) (default = \-10 for
+seedtop, \-3 for everything else)
.TP
\fB\-q\fP\ \fIN\fP (blastpgp)
ASN.1 Scoremat input of checkpoint data:
@@ -962,12 +976,12 @@ restart from binary scoremat checkpoint file
.PD
.RE
.TP
-\fB\-r\fP\ \fIN\fP (bl2seq, blast, blastall, blastall_old, blastcl3,
+\fB\-r\fP\ \fIN\fP (bl2seq, \*(bx, blastall, blastall_old, blastcl3,
megablast, seedtop)
Reward for a nucleotide match (blastn only) (default = 10 for seedtop,
--10 for everything else)
+\-10 for everything else)
.TP
-\fB\-s\fP (blast)
+\fB\-s\fP (\*(bx)
No-op (formerly requested generating words for every base of the database)
.TP
\fB\-s\fP (blastall, blastall_old, blastcl3, blastpgp)
@@ -978,24 +992,24 @@ tblastn mode.
\fB\-s\fP\ \fIN\fP (megablast)
Minimal hit score to report (0 for default behavior)
.TP
-\fB\-t\fP\ \fIN\fP (bl2seq, blast, blastall, blastall_old, blastcl3)
+\fB\-t\fP\ \fIN\fP (bl2seq, \*(bx, blastall, blastall_old, blastcl3)
Length of a discontiguous word template (the largest intron allowed in
a translated nucleotide sequence when linking multiple distinct
assignments; default = 0; negative values disable linking for blastall,
blastall_old, and blastcl3.)
.TP
\fB\-t\fP\ \fIN\fP[\|\fBu\fP\|] (blastpgp)
-Composition-based statistics mode. The first character is interpreted
-as follows:
+Composition-based score adjustment.
+The first character is interpreted as follows:
.RS
.PD 0
.IP "0, F, or f"
no composition-based statistics
-.IP "1, T, or t"
+.IP 1
composition-based statistics as in \fINAR\fP 29:2994\-3005, 2001
-.IP 2
+.IP "2, T, or t"
composition-based score adjustment as in \fIBioinformatics\fP
-21:902-911, 2005, conditioned on sequence properties in round 1
+21:902-911, 2005, conditioned on sequence properties in round 1 (default)
.IP 3
composition-based score adjustment as in \fIBioinformatics\fP
21:902-911, 2005, unconditionally in round 1
@@ -1009,7 +1023,7 @@ alignment p-value and compositional p-value in round 1 only.
\fB\-t\fP\ \fIN\fP (megablast)
Length of a discontiguous word template (contiguous word if 0 [default])
.TP
-\fB\-u\fP (blast)
+\fB\-u\fP (\*(bx)
Do only ungapped alignment (always TRUE for tblastx)
.TP
\fB\-u\fP\ \fIstr\fP (blastcl3)
@@ -1022,24 +1036,24 @@ ASN.1 Scoremat output of checkpoint data:
.IP 0
no scoremat output (default)
.IP 1
-output ASCII scoremat checkpoint file (requires \fB-J\fP)
+output ASCII scoremat checkpoint file (requires \fB\-J\fP)
.IP 2
-output binary scoremat checkpoint file (requires \fB-J\fP)
+output binary scoremat checkpoint file (requires \fB\-J\fP)
.PD
.RE
.TP
-\fB\-v\fP\ \fIN\fP (blast, blastall, blastall_old, blastcl3, blastpgp,
+\fB\-v\fP\ \fIN\fP (\*(bx, blastall, blastall_old, blastcl3, blastpgp,
impala, megablast, rpsblast)
Number of one-line descriptions to show (V) (default = 500)
.TP
-\fB\-w\fP\ \fIN\fP (blast)
+\fB\-w\fP\ \fIN\fP (\*(bx)
Window size (max. allowed distance between a pair of initial hits; 0
-invokes default behavior, -1 turns off multiple hits)
+invokes default behavior, \-1 turns off multiple hits)
.TP
\fB\-w\fP\ \fIN\fP (blastall, blastall_old, blastcl3)
Frame shift penalty (OOF algorithm for blastx)
.TP
-\fB\-y\fP\ \fIX\fP (blast, blastall, blastall_old, blastcl3, blastpgp,
+\fB\-y\fP\ \fIX\fP (\*(bx, blastall, blastall_old, blastcl3, blastpgp,
impala, rpsblast)
X dropoff for ungapped extensions in bits (0.0 invokes default
behavior: 20 for blastn, 10 for megablast, and 7 for all others.)
@@ -1047,7 +1061,7 @@ behavior: 20 for blastn, 10 for megablast, and 7 for all others.)
\fB\-y\fP\ \fIN\fP (megablast)
X dropoff value for ungapped extension (default is 10)
.TP
-\fB\-z\fP\ \fIN\fP (blast)
+\fB\-z\fP\ \fIN\fP (\*(bx)
Longest intron length for uneven gap HSP linking (tblastn only;
default is 0)
.TP
@@ -1066,6 +1080,7 @@ The National Center for Biotechnology Information.
.BR formatdb (1),
.BR formatrpsdb (1),
.BR makemat (1),
+.BR taxblast (1),
blast.html,
seedtop.html,
<http://www.ncbi.nlm.nih.gov/BLAST/>.
diff --git a/doc/man/cleanasn.1 b/doc/man/cleanasn.1
index b0ccaae9..403e34e5 100644
--- a/doc/man/cleanasn.1
+++ b/doc/man/cleanasn.1
@@ -1,22 +1,30 @@
-.TH CLEANASN 1 2007-10-19 NCBI "NCBI Tools User's Manual"
+.TH CLEANASN 1 2009-07-31 NCBI "NCBI Tools User's Manual"
.SH NAME
cleanasn \- clean up irregularities in NCBI ASN.1 objects
.SH SYNOPSIS
.B cleanasn
[\|\fB\-\fP\|]
+[\|\fB\-D\fP\ \fIstr\fP\|]
[\|\fB\-F\fP\ \fIstr\fP\|]
[\|\fB\-K\fP\ \fIstr\fP\|]
+[\|\fB\-L\fP\ \fIfilename\fP\|]
+[\|\fB\-M\fP\ \fIfilename\fP\|]
[\|\fB\-N\fP\ \fIstr\fP\|]
+[\|\fB\-P\fP\|]
+[\|\fB\-Q\fP\ \fIstr\fP\|]
[\|\fB\-R\fP\|]
[\|\fB\-T\fP\|]
+[\|\fB\-U\fP\ \fIstr\fP\|]
+[\|\fB\-X\fP\ \fIstr\fP\|]
[\|\fB\-a\fP\ \fIstr\fP\|]
[\|\fB\-b\fP\|]
[\|\fB\-c\fP\|]
[\|\fB\-f\fP\ \fIstr\fP\|]
[\|\fB\-i\fP\ \fIfilename\fP\|]
-[\|\fB\-l\fP\ \fIfilename\fP\|]
+[\|\fB\-m\fP\ \fIstr\fP\|]
[\|\fB\-o\fP\ \fIfilename\fP\|]
[\|\fB\-p\fP\ \fIpath\fP\|]
+[\|\fB\-q\fP\ \fIpath\fP\|]
[\|\fB\-r\fP\ \fIpath\fP\|]
[\|\fB\-x\fP\ \fIext\fP\|]
.SH DESCRIPTION
@@ -28,6 +36,15 @@ A summary of options is included below.
\fB\-\fP
Print usage message
.TP
+\fB\-D\fP\ \fIstr\fP
+Clean up descriptors, per the flags in str:
+.RS
+.PD 0
+.IP t
+Remove Title
+.PD
+.RE
+.TP
\fB\-F\fP\ \fIstr\fP
Clean up features, per the flags in str:
.RS
@@ -47,11 +64,25 @@ Perform a general cleanup, per the flags in str:
.PD 0
.IP b
BasicSeqEntryCleanup
+.IP p
+C++ BasicCleanup (via an external utility)
.IP s
SeriousSeqEntryCleanup
+.IP g
+GpipeSeqEntryCleanup
+.IP n
+Normalize Descriptor Order
+.IP u
+Remove NcbiCleanup User Objects
.PD
.RE
.TP
+\fB\-L\fP\ \fIfilename\fP
+Log file
+.TP
+\fB\-M\fP\ \fIfilename\fP
+Macro file
+.TP
\fB\-N\fP\ \fIstr\fP
Clean up links, per the flags in str:
.RS
@@ -67,12 +98,50 @@ ClearFeatureIDs
.PD
.RE
.TP
+\fB\-P\fP
+Publication Lookup
+.TP
+\fB\-Q\fP\ \fIstr\fP
+Report:
+.RS
+.PD 0
+.IP r
+ASN.1 BSEC/SSEC Report
+.IP g
+GenBank SSEC Diff
+.IP m
+Modernization
+.PD
+.RE
+.TP
\fB\-R\fP
Remote fetching from ID (NCBI sequence databases)
.TP
\fB\-T\fP
Taxonomy Lookup
.TP
+\fB\-U\fP\ \fIstr\fP
+Modernize, per the flags in str:
+.RS
+.PD 0
+.IP g
+Genes
+.IP r
+RNA
+.IP p
+PCR Primers
+.PD
+.RE
+.TP
+\fB\-X\fP\ \fIstr\fP
+Miscellaneous options, per str:
+.RS
+.PD 0
+.IP d
+Automatic definition line
+.PD
+.RE
+.TP
\fB\-a\fP\ \fIstr\fP
ASN.1 type
.RS
@@ -104,8 +173,20 @@ Substring filter
\fB\-i\fP\ \fIfilename\fP
Single input file (defaults to stdin)
.TP
-\fB\-l\fP\ \fIfilename\fP
-Log file
+\fB\-m\fP\ \fIstr\fP
+Flatfile mode:
+.RS
+.PD 0
+.IP r
+Release
+.IP e
+Entrez
+.IP s
+Sequin
+.IP d
+Dump
+.PD
+.RE
.TP
\fB\-o\fP\ \fIfilename\fP
Single output file (defaults to stdout)
@@ -113,6 +194,9 @@ Single output file (defaults to stdout)
\fB\-p\fP\ \fIpath\fP
Process all matching files in \fIpath\fP
.TP
+\fB\-q\fP\ \fIpath\fP
+Ffdiff executable (default is /netopt/genbank/subtool/bin/ffdiff)
+.TP
\fB\-r\fP\ \fIpath\fP
Path for results
.TP
diff --git a/doc/man/fa2htgs.1 b/doc/man/fa2htgs.1
index b604fa02..1186452f 100644
--- a/doc/man/fa2htgs.1
+++ b/doc/man/fa2htgs.1
@@ -101,7 +101,7 @@ For example:
Contig2 + 1 SP6 left
Contig3 + 1
- Contig1 - T7 right
+ Contig1 \- T7 right
.fi
The first column is the contig name, the second is the orientation,
@@ -180,12 +180,12 @@ Length of sequence in bp (default = 0). The length is checked against
the actual number of bases we get. For phase 1 and 2 sequence it is
also used to estimate gap lengths. For phase 1 and 2 records, it is
important to use a number GREATER than the amount of provided
-nucleotide, otherwise this will generate false 'gaps'. Here is
+nucleotide, otherwise this will generate false `gaps'. Here is
assumed that the putative full length of the BAC or cosmid will be
-used. There should be at least 20 to 30 'n' in between the segments
+used. There should be at least 20 to 30 `n' in between the segments
(you can check for these in Sequin), as this will ensure proper
behavior when this sequence is used with BLAST. Otherwise
-'artifactual' unrelated segment neighbors may be brought into
+`artifactual' unrelated segment neighbors may be brought into
proximity of each other.
.TP
\fB\-m\fP
diff --git a/doc/man/fastacmd.1 b/doc/man/fastacmd.1
index bad55531..afa5ab71 100644
--- a/doc/man/fastacmd.1
+++ b/doc/man/fastacmd.1
@@ -21,11 +21,11 @@ fastacmd \- retrieve FASTA sequences from a BLAST database
[\|\fB\-t\fP\|]
.SH DESCRIPTION
\fBfastacmd\fP retrieves FASTA formatted sequences from a
-\fBblast\fP(1) database formatted using the '\fB\-o\fP' option. An
+\fBblast\fP(1) database formatted using the `\fB\-o\fP' option. An
example \fBfastacmd\fP call would be
.PP
.ce
-fastacmd -d nr -s p38398
+fastacmd \-d nr \-s p38398
.SH OPTIONS
A summary of options is included below.
.TP
@@ -92,7 +92,7 @@ Type of file:
.RS
.PD 0
.IP G
-guess (default) - look for protein, then nucleotide
+guess (default): look for protein, then nucleotide
.IP T
protein
.IP F
@@ -103,7 +103,7 @@ nucleotide
\fB\-s\fP\ \fIstr\fP
Comma-delimited search string(s).
GIs, accessions, loci, or fullSeq-id strings may be used,
-\fIe.g.\fP, \fB555\fP, \fBAC147927\fP, \fB'gnl|dbname|tag'\fP
+\fIe.g.\fP, \fB555\fP, \fBAC147927\fP, \fB\(aqgnl|dbname|tag\(aq\fP
.TP
\fB\-t\fP
Definition line should contain target GI only
diff --git a/doc/man/formatdb.1 b/doc/man/formatdb.1
index 23e4e233..d56460ed 100644
--- a/doc/man/formatdb.1
+++ b/doc/man/formatdb.1
@@ -98,10 +98,10 @@ GSS's, and HTGS's.
Title for database file [String]
.TP
\fB\-v\fP\ \fIN\fP
-Break up large FASTA files into 'volumes' of size \fIN\fP million
+Break up large FASTA files into `volumes' of size \fIN\fP million
letters (4000 by default). As part of the creation of a volume,
\fBformatdb\fP writes a new type of BLAST database file, called an
-alias file, with the extension 'nal' or 'pal'.
+alias file, with the extension `nal' or `pal'.
.SH AUTHOR
The National Center for Biotechnology Information.
.SH SEE ALSO
diff --git a/doc/man/formatrpsdb.1 b/doc/man/formatrpsdb.1
index 25846419..73efcac0 100644
--- a/doc/man/formatrpsdb.1
+++ b/doc/man/formatrpsdb.1
@@ -22,7 +22,7 @@ sequences into a database suitable for use with Reverse Position
Specific (RPS) Blast.
Each input sequence, together with its position-specific scoring
matrix (PSSM), is ASN.1 encoded into a PssmWithParameters (or
-'scoremat') object and resides in a separate file.
+`scoremat') object and resides in a separate file.
Scoremat objects can be created using \fBblastpgp\fP.
\fBFormatrpsdb\fP is given a list of these files and produces the
corresponding database.
diff --git a/doc/man/gene2xml.1 b/doc/man/gene2xml.1
index 9ec74b00..0bbd9283 100644
--- a/doc/man/gene2xml.1
+++ b/doc/man/gene2xml.1
@@ -37,31 +37,31 @@ File is Binary
File is Compressed
.TP
\fB\-i\fP\ \fIfilename\fP
-Single Input file (standard input by default) when not using \fB-p\fP
+Single Input file (standard input by default) when not using \fB\-p\fP
.TP
\fB\-l\fP
-Log processing (list files processed when using \fB-p\fP)
+Log processing (list files processed when using \fB\-p\fP)
.TP
\fB\-o\fP\ \fIfilename\fP
-Single Output file (standard output by default) when not using \fB-p\fP
+Single Output file (standard output by default) when not using \fB\-p\fP
.TP
\fB\-p\fP\ \fIpath\fP
Path to Files (if processing an entire directory)
.TP
\fB\-r\fP\ \fIpath\fP
-Path for Results when using \fB-p\fP; defaults to the input directory
+Path for Results when using \fB\-p\fP; defaults to the input directory
.TP
\fB\-t\fP\ \fIN\fP
Limit to the given Taxon ID (per \fBhttp://www.ncbi.nlm.nih.gov/Taxonomy/\fP)
.TP
\fB\-x\fP
-Extract .ags -> text .agc (format previously distributed)
+Extract .ags to text .agc (format previously distributed)
.TP
\fB\-y\fP
-Combine .agc -> text .ags (for testing)
+Combine .agc to text .ags (for testing)
.TP
\fB\-z\fP
-Combine .agc -> binary .ags, then gzip
+Combine .agc to binary .ags, then gzip
.SH AUTHOR
The National Center for Biotechnology Information.
.SH SEE ALSO
diff --git a/doc/man/idfetch.1 b/doc/man/idfetch.1
index 4d378114..0ce4651c 100644
--- a/doc/man/idfetch.1
+++ b/doc/man/idfetch.1
@@ -33,7 +33,7 @@ Add the specified feature types (comma-delimited); allowed values are
CDD, SNP, SNP_graph, MGC, HPRD, STS, tRNA, and microRNA.
.TP
\fB\-G\fP\ \fIfilename\fP
-File with list of GIs, (versioned) accessions, FASTA SeqID's to dump
+File with list of GIs, (versioned) accessions, FASTA SeqIDs to dump
.TP
\fB\-Q\fP\ \fIfilename\fP
Generate GI list by Entrez query in \fIfilename\fP; requires \fB\-dn\fP
@@ -67,7 +67,7 @@ Entity number (retrieval number) to dump
Flattened SeqId. Possible formats:
.br
\fItype\fP([\fIname\fP][,[\fIaccession\fP][,[\fIrelease\fP][,\fIversion\fP]]])
-as '5(HUMHBB)'
+as \(aq5(HUMHBB)\(aq
.br
\fItype\fP=\fIaccession\fP
.br
diff --git a/doc/man/nps2gps.1 b/doc/man/nps2gps.1
index eaf43926..f3b9a704 100644
--- a/doc/man/nps2gps.1
+++ b/doc/man/nps2gps.1
@@ -1,11 +1,13 @@
-.TH NPS2GPS 1 2007-10-19 NCBI "NCBI Tools User's Manual"
+.TH NPS2GPS 1 2008-12-13 NCBI "NCBI Tools User's Manual"
.SH NAME
nps2gps \- convert nucleotide-protein sets to ASN.1 genomic product sets
.SH SYNOPSIS
.B nps2gps
[\|\fB\-\fP\|]
+[\|\fB\-D\fP\|]
[\|\fB\-F\fP\|]
[\|\fB\-L\fP\|]
+[\|\fB\-P\fP\|]
[\|\fB\-R\fP\|]
[\|\fB\-f\fP\ \fIstr\fP\|]
[\|\fB\-i\fP\ \fIfilename\fP\|]
@@ -23,12 +25,18 @@ A summary of options is included below.
\fB\-\fP
Print usage message
.TP
+\fB\-D\fP
+RefSeq mRNA Titles
+.TP
\fB\-F\fP
Map by Feature ID
.TP
\fB\-L\fP
Lock components in advance
.TP
+\fB\-P\fP
+mRNA ID from Protein
+.TP
\fB\-R\fP
Enable Remote fetching from ID
.TP
diff --git a/doc/man/spidey.1 b/doc/man/spidey.1
index f3e2836f..4dd3014b 100644
--- a/doc/man/spidey.1
+++ b/doc/man/spidey.1
@@ -170,7 +170,7 @@ terminal introns from 100kb to 240kb and for all others from 35kb to
.TP
\fB\-a\fP\ \fIfilename\fP
Output file for alignments when directed to a separate file with
-\fB-p\ 3\fP (default = spidey.aln).
+\fB\-p\ 3\fP (default = spidey.aln).
.TP
\fB\-c\fP\ \fIN\fP
Identity cutoff, in percent, for quality control purposes.
@@ -198,14 +198,14 @@ can substitute the desired accession number for the filename.
Print ASN.1 alignment?
.TP
\fB\-k\fP\ \fIfilename\fP
-File for ASN.1 output with \fB-k\fP (default = spidey.asn).
+File for ASN.1 output with \fB\-k\fP (default = spidey.asn).
.TP
\fB\-l\fP\ \fIN\fP
Length coverage cutoff, in percent.
.TP
\fB\-m\fP\ \fIfilename\fP
Input file containing the mRNA sequence(s) in ASN.1 or FASTA format,
-or a list of their accessions (with \fB-G\fP). If your computer is
+or a list of their accessions (with \fB\-G\fP). If your computer is
running on a network that can access GenBank, you can substitute a
single accession number for the filename.
.TP
@@ -213,7 +213,7 @@ single accession number for the filename.
Number of gene models to return per input mRNA (default = 1).
.TP
\fB\-o\fP\ \fIstr\fP
-Main output file (default = stdout; contents controlled by \fB-p\fP).
+Main output file (default = stdout; contents controlled by \fB\-p\fP).
.TP
\fB\-p\fP\ \fIN\fP
Print alignment?
diff --git a/doc/man/subfuse.1 b/doc/man/subfuse.1
new file mode 100644
index 00000000..762906e6
--- /dev/null
+++ b/doc/man/subfuse.1
@@ -0,0 +1,30 @@
+.TH SUBFUSE 1 2008-12-13 NCBI "NCBI Tools User's Manual"
+.SH NAME
+subfuse \- merge Genbank submissions
+.SH SYNOPSIS
+.B subfuse
+[\|\fB\-\fP\|]
+[\|\fB\-o\fP\ \fIfilename\fP\|]
+[\|\fB\-p\fP\ \fIpath\fP\|]
+[\|\fB\-x\fP\ \fIext\fP\|]
+.SH DESCRIPTION
+\fBsubfuse\fP is a utility to consolidate multiple Genbank submissions
+into a single batch submission.
+.SH OPTIONS
+A summary of options is included below.
+.TP
+\fB\-\fP
+Print usage message
+.TP
+\fB\-o\fP\ \fIfilename\fP
+Output file (\fBstdout\fP by default)
+.TP
+\fB\-p\fP\ \fIpath\fP
+Path to files
+.TP
+\fB\-x\fP\ \fIext\fP
+Input filename suffix (\fB.sqn\fP by default)
+.SH SEE ALSO
+.BR sequin (1).
+.SH AUTHOR
+The National Center for Biotechnology Information.
diff --git a/doc/man/taxblast.1 b/doc/man/taxblast.1
new file mode 100644
index 00000000..1fc9109b
--- /dev/null
+++ b/doc/man/taxblast.1
@@ -0,0 +1,34 @@
+.TH TAXBLAST 1 2008-12-13 NCBI "NCBI Tools User's Manual"
+.SH NAME
+taxblast \- taxonomy-enabled BLAST
+.SH SYNOPSIS
+.B taxblast
+[\|\fB\-\fP\|]
+[\|\fB\-d\fP\ \fIstr\fP\|]
+\fB\-i\fP\ \fIfilename\fP
+[\|\fB\-o\fP\ \fIfilename\fP\|]
+[\|\fB\-p\fP\|]
+.SH DESCRIPTION
+\fBtaxblast\fP is a variant of BLAST that makes use of taxonomic
+information.
+.SH OPTIONS
+A summary of options is included below.
+.TP
+\fB\-\fP
+Print usage message
+.TP
+\fB\-d\fP\ \fIstr\fP
+Database used to get SeqAnnot ASN.1 (\fBnr\fP by default)
+.TP
+\fB\-i\fP\ \fIfilename\fP
+Input ASN.1 File (SeqAnnot)
+.TP
+\fB\-o\fP\ \fIfilename\fP
+Output file name (stdout by default)
+.TP
+\fB\-p\fP
+Sequence is DNA
+.SH SEE ALSO
+.BR BLAST (1).
+.SH AUTHOR
+The National Center for Biotechnology Information.
diff --git a/doc/man/tbl2asn.1 b/doc/man/tbl2asn.1
index e1b984c6..6b8a3dfa 100644
--- a/doc/man/tbl2asn.1
+++ b/doc/man/tbl2asn.1
@@ -1,19 +1,18 @@
-.TH TBL2ASN 1 2007-10-19 NCBI "NCBI Tools User's Manual"
+.TH TBL2ASN 1 2009-07-31 NCBI "NCBI Tools User's Manual"
.SH NAME
tbl2asn \- prepare a GenBank submission using an ASCII feature table
.SH SYNOPSIS
.B tbl2asn
[\|\fB\-\fP\|]
-[\|\fB\-B\fP\ \fIstr\fP\|]
+[\|\fB\-A\fP\ \fIstr\fP\|]
[\|\fB\-C\fP\ \fIstr\fP\|]
[\|\fB\-D\fP\ \fIfilename\fP\|]
-[\|\fB\-E\fP\ \fIstr\fP\|]
-[\|\fB\-F\fP\ \fIstr\fP|]
+[\|\fB\-E\fP\|]
+[\|\fB\-F\fP\ \fIstr\fP\|]
[\|\fB\-G\fP\ \fIstr\fP\|]
-[\|\fB\-H\fP\|]
+[\|\fB\-H\fP\ \fIstr\fP\|]
[\|\fB\-K\fP\|]
[\|\fB\-L\fP\|]
-[\|\fB\-M\fP\ \fIstr\fP\|]
[\|\fB\-O\fP\|]
[\|\fB\-P\fP\|]
[\|\fB\-Q\fP\|]
@@ -21,22 +20,20 @@ tbl2asn \- prepare a GenBank submission using an ASCII feature table
[\|\fB\-S\fP\|]
[\|\fB\-T\fP\|]
[\|\fB\-U\fP\|]
+[\|\fB\-V\fP\ \fIstr\fP\|]
[\|\fB\-W\fP\|]
[\|\fB\-X\fP\ \fIstr\fP\|]
[\|\fB\-Y\fP\ \fIfilename\fP\|]
+[\|\fB\-Z\fP\ \fIfilename\fP\|]
[\|\fB\-a\fP\ \fIstr\fP\|]
[\|\fB\-b\fP\|]
-[\|\fB\-c\fP\|]
-[\|\fB\-d\fP\|]
-[\|\fB\-e\fP\|]
+[\|\fB\-c\fP\ \fIstr\fP\|]
[\|\fB\-f\fP\ \fIfilename\fP\|]
[\|\fB\-g\fP\|]
[\|\fB\-h\fP\|]
[\|\fB\-i\fP\ \fIfilename\fP\|]
[\|\fB\-j\fP\ \fIstr\fP\|]
-[\|\fB\-k\fP\|]
-[\|\fB\-l\fP\|]
-[\|\fB\-m\fP\|]
+[\|\fB\-k\fP\ \fIstr\fP\|]
[\|\fB\-n\fP\ \fIstr\fP\|]
[\|\fB\-o\fP\ \fIfilename\fP\|]
[\|\fB\-p\fP\ \fIstr\fP\|]
@@ -46,7 +43,6 @@ tbl2asn \- prepare a GenBank submission using an ASCII feature table
[\|\fB\-t\fP\ \fIfilename\fP\|]
[\|\fB\-u\fP\|]
[\|\fB\-v\fP\|]
-[\|\fB\-w\fP\ \fIN\fP\|]
[\|\fB\-x\fP\ \fIstr\fP\|]
[\|\fB\-y\fP\ \fIstr\fP\|]
[\|\fB\-z\fP\|]
@@ -63,8 +59,8 @@ A summary of options is included below.
\fB\-\fP
Print usage message
.TP
-\fB\-B\fP\ \fIstr\fP
-Alignment Beginning gap characters
+\fB\-a\fP\ \fIstr\fP
+Accession
.TP
\fB\-C\fP\ \fIstr\fP
Genome Center tag
@@ -72,17 +68,29 @@ Genome Center tag
\fB\-D\fP\ \fIfilename\fP
Descriptors file
.TP
-\fB\-E\fP\ \fIstr\fP
-Alignment End gap characters
+\fB\-E\fP
+Recurse
.TP
\fB\-F\fP
Feature ID links (\fBo\fP by Overlap, \fBp\fP by Product)
.TP
\fB\-G\fP\ \fIstr\fP
+Alignment Gap Flags (comma separated fields, e.g., \fBp,\-,\-,\-,?,.\fP )
+\fBn\fP Nucleotide or \fBp\fP Protein,
+Begin, Middle, End Gap Characters,
+Missing Characters, Match Characters
Alignment middle Gap characters
.TP
-\fB\-H\fP
-Implicit gaps
+\fB\-H\fP\ \fIstr\fP
+Hold until publication:
+.RS
+.PD 0
+.IP y
+For one year
+.IP \fImm/dd/yyyy\fP
+Until the specified date
+.PD
+.RE
.TP
\fB\-K\fP
Safe Bioseq-set
@@ -90,20 +98,17 @@ Safe Bioseq-set
\fB\-L\fP
Force Local protein_id/transcript_id
.TP
-\fB\-M\fP\ \fIstr\fP
-Alignment Match characters
-.TP
\fB\-O\fP
Allow run-on ORFs
.TP
\fB\-P\fP
-Alignment is Proteins
+Remote publication lookup
.TP
\fB\-Q\fP
Special mRNA titles
.TP
\fB\-R\fP
-Remote fetching from ID
+Remote sequence record fetching from ID
.TP
\fB\-S\fP
Smart feature annotation
@@ -114,29 +119,84 @@ Remote Taxonomy lookup
\fB\-U\fP
Remove Unnecessary gene xref
.TP
+\fB\-V\fP\ \fIstr\fP
+Verification (combine any of the following letters)
+.RS
+.PD 0
+.IP v
+Validate with Normal Stringency
+.IP r
+Validate without Country Check
+.IP b
+Generate GenBank Flatfile
+.IP g
+Generate Gene Report
+.PD
+.RE
+.TP
\fB\-W\fP
Log progress
.TP
\fB\-X\fP\ \fIstr\fP
-Alignment missing characters
+Extra flags (combine any of the following letters)
+.RS
+.PD 0
+.IP C
+Apply comments in \fB.cmt\fP files to all sequences
+.PD
+.RE
.TP
\fB\-Y\fP\ \fIfilename\fP
Read a comment string from \fIfilename\fP
.TP
+\fB\-Z\fP\ \fIfilename\fP
+Write a discrepancy report to \fIfilename\fP
+.TP
\fB\-a\fP\ \fIstr\fP
-Accession
+File type:
+.RS
+.PD 0
+.IP a
+Any (default)
+.IP r20u
+Runs of 20+ Ns are gaps, 100 Ns are unknown length
+.IP r20k
+Runs of 20+ Ns are gaps, 100 Ns are known length
+.IP s
+FASTA Set (\fBs\fP Batch, \fBs1\fP Pop, \fBs2\fP Phy, \fBs3\fP Mut,
+\fBs4\fP Eco)
+.IP d
+FASTA Delta
+.IP di
+FASTA Delta with Implicit Gaps
+.IP l
+FASTA+Gap Alignment
+.IP z
+FASTA with Gap Lines
+.IP e
+PHRAP/ACE
+.PD
+.RE
.TP
\fB\-b\fP
-Generate GenBank file
-.TP
-\fB\-c\fP
-Annotate longest ORF
-.TP
-\fB\-d\fP
-Read FASTAs as Delta
+Generate GenBank file (deprecated in favor of \fB-V b\fP)
.TP
-\fB\-e\fP
-Read PHRAP/ACE format
+\fB\-c\fP\ \fIstr\fP
+Cleanup (combine any of the following letters)
+.RS
+.PD 0
+.IP d
+Correct Collection Dates (assume month first)
+.IP D
+Correct Collection Dates (assume day first)
+.IP b
+Append note to coding regions that overlap other coding regions with
+similar product names and do not contain 'ABC'
+.IP x
+Extend partial ends of features by one or two nucleotides to abut gaps
+or sequence ends
+.PD
+.RE
.TP
\fB\-f\fP\ \fIfilename\fP
Single table file
@@ -153,14 +213,20 @@ Single input file
\fB\-j\fP\ \fIstr\fP
Source qualifiers
.TP
-\fB\-k\fP
-Set conflict on mismatch
-.TP
-\fB\-l\fP
-Read FASTA+Gap Alignment
-.TP
-\fB\-m\fP
-Allow alternative starts
+\fB\-k\fP\ \fIstr\fP
+CDS flags (combine any of the following letters)
+.RS
+.PD 0
+.IP c
+Annotate Longest ORF
+.IP r
+Allow Runon ORFs
+.IP m
+Allow Alternative Starts
+.IP k
+Set Conflict on Mismatch
+.PD
+.RE
.TP
\fB\-n\fP\ \fIstr\fP
Organism name
@@ -187,36 +253,20 @@ Read template from \fIfilename\fP
Convert GenProdSet to NucProtSet
.TP
\fB\-v\fP
-Validate
-.TP
-\fB\-w\fP \fIN\fP
-FASTA set class
-.RS
-.PD 0
-.IP 0
-unspecified (default)
-.IP 1
-population study
-.IP 2
-phylogenetic study
-.IP 3
-set of mutations
-.IP 4
-ecological sample study
-.PD
-.RE
+Validate (deprecated in favor of \fB-V v\fP)
.TP
\fB\-x\fP\ \fIstr\fP
Suffix (default = \fB.fsa\fP)
.TP
\fB\-y\fP\ \fIstr\fP
-Comment
.TP
\fB\-z\fP
-Read FASTAs with gap lines
+Clean up log file
+Comment
.SH AUTHOR
The National Center for Biotechnology Information.
.SH SEE ALSO
+.ad l
.BR Psequin (1),
.BR sbtedit (1),
tbl2asn.txt,
diff --git a/doc/man/trna2sap.1 b/doc/man/trna2sap.1
index 752cc64f..7e3d90f6 100644
--- a/doc/man/trna2sap.1
+++ b/doc/man/trna2sap.1
@@ -68,7 +68,7 @@ Annotation Title (normally \(lqtRNAscan-SE\(rq).
Ignore Undetermined tRNAs
.TP
\fB\-x\fP\ \fIstr\fP
-File selection suffix with \fB-p\fP (\fB.trna\fP by default).
+File selection suffix with \fB\-p\fP (\fB.trna\fP by default).
.SH AUTHOR
The National Center for Biotechnology Information.
.SH SEE ALSO
diff --git a/make/makenet.unx b/make/makenet.unx
index 27f811b4..81ba4197 100644
--- a/make/makenet.unx
+++ b/make/makenet.unx
@@ -1,6 +1,6 @@
# makefile for network demo programs and network entrez
#
-# $Id: makenet.unx,v 6.231 2008/12/10 21:23:29 ucko Exp $
+# $Id: makenet.unx,v 6.233 2009/08/05 20:06:36 ucko Exp $
# test, ignore
#
# Sun with unbundled ANSI compiler [ make CC=acc RAN=ranlib ]
@@ -389,7 +389,7 @@ utilities : $(EXEUTIL)
vibutilities : $(EXEUTILVIB)
-.NO_PARALLEL: copy nocopy Tentrez sequin Psequin sbtedit Ssequin elecpcr asn2fast asn2asn cleanasn cspeedtest sugint Nbatch Nbatch3 Nentrcmd seqget idfetch test_nc bi_socket test_ncbi_dsock debug_server rtestval rasn2ff asn2gb asn2gb_psf asn2fsa asn2fsa_psf tbl2asn tbl2asn_psf raw2delt aceread_tst asn2all gene2xml asnval asnval_psf asndisc asndisc_psf demo_aceread_tst asnmacro asnstrip flint gbseqget insdseqget trna2sap trna2tbl testent2 entrez2 spidey dotmatrix ingenue condense bl2seq
+.NO_PARALLEL: copy nocopy Tentrez sequin Psequin sbtedit Ssequin elecpcr asn2fast asn2asn cleanasn cspeedtest sugint Nbatch Nbatch3 Nentrcmd seqget idfetch test_nc bi_socket test_ncbi_dsock debug_server rtestval rasn2ff asn2gb asn2gb_psf asn2fsa asn2fsa_psf tbl2asn tbl2asn_psf raw2delt aceread_tst asn2all gene2xml asnval asnval_psf asndisc asndisc_psf demo_aceread_tst asnmacro asnstrip flint gbseqget insdseqget trna2sap trna2tbl testent2 entrez2 spidey dotmatrix ingenue condense bl2seq src_chk src_chk_psf
.WAIT:
echo Waiting...go
@@ -1468,4 +1468,9 @@ bl2bag.cgi : bl2bag.c
src_chk : src_chk.c
$(CC) -o src_chk $(LDFLAGS) src_chk.c $(LIB2) $(LIB1) $(OTHERLIBS)
+src_chk_psf : src_chk.c
+ $(CC) -DINTERNAL_NCBI_SRC_CHK -g -o src_chk_psf $(LDFLAGS) src_chk.c \
+ $(LIB_PS) $(LIB23) $(LIBCOMPADJ) $(LIB2) $(LIB1) \
+ $(NCBI_SYBLIBS_CT) $(OTHERLIBS)
+
##
diff --git a/make/xCode/ncbictoolkit/ncbictoolkit.xcodeproj/project.pbxproj b/make/xCode/ncbictoolkit/ncbictoolkit.xcodeproj/project.pbxproj
index 510fab6e..efa29733 100644
--- a/make/xCode/ncbictoolkit/ncbictoolkit.xcodeproj/project.pbxproj
+++ b/make/xCode/ncbictoolkit/ncbictoolkit.xcodeproj/project.pbxproj
@@ -753,8 +753,6 @@
3734FD650FF2AF7D004C8F4B /* ncbi_host_infop.h in Headers */ = {isa = PBXBuildFile; fileRef = 3734F9490FF2AF7C004C8F4B /* ncbi_host_infop.h */; settings = {ATTRIBUTES = (Public, ); }; };
3734FD670FF2AF7D004C8F4B /* ncbi_http_connector.h in Headers */ = {isa = PBXBuildFile; fileRef = 3734F94B0FF2AF7C004C8F4B /* ncbi_http_connector.h */; settings = {ATTRIBUTES = (Public, ); }; };
3734FD690FF2AF7D004C8F4B /* ncbi_lb.h in Headers */ = {isa = PBXBuildFile; fileRef = 3734F94D0FF2AF7C004C8F4B /* ncbi_lb.h */; settings = {ATTRIBUTES = (Public, ); }; };
- 3734FD6A0FF2AF7D004C8F4B /* ncbi_lbsm.h in Headers */ = {isa = PBXBuildFile; fileRef = 3734F94E0FF2AF7C004C8F4B /* ncbi_lbsm.h */; settings = {ATTRIBUTES = (Public, ); }; };
- 3734FD6B0FF2AF7D004C8F4B /* ncbi_lbsm_ipc.h in Headers */ = {isa = PBXBuildFile; fileRef = 3734F94F0FF2AF7C004C8F4B /* ncbi_lbsm_ipc.h */; settings = {ATTRIBUTES = (Public, ); }; };
3734FD6C0FF2AF7D004C8F4B /* ncbi_lbsmd.h in Headers */ = {isa = PBXBuildFile; fileRef = 3734F9500FF2AF7C004C8F4B /* ncbi_lbsmd.h */; settings = {ATTRIBUTES = (Public, ); }; };
3734FD6F0FF2AF7D004C8F4B /* ncbi_local.h in Headers */ = {isa = PBXBuildFile; fileRef = 3734F9530FF2AF7C004C8F4B /* ncbi_local.h */; settings = {ATTRIBUTES = (Public, ); }; };
3734FD710FF2AF7D004C8F4B /* ncbi_memory_connector.h in Headers */ = {isa = PBXBuildFile; fileRef = 3734F9550FF2AF7C004C8F4B /* ncbi_memory_connector.h */; settings = {ATTRIBUTES = (Public, ); }; };
@@ -1055,8 +1053,6 @@
379747C30FF3C77600138501 /* ncbi_host_infop.h in CopyFiles */ = {isa = PBXBuildFile; fileRef = 3734F9490FF2AF7C004C8F4B /* ncbi_host_infop.h */; };
379747C40FF3C77600138501 /* ncbi_http_connector.h in CopyFiles */ = {isa = PBXBuildFile; fileRef = 3734F94B0FF2AF7C004C8F4B /* ncbi_http_connector.h */; };
379747C50FF3C77600138501 /* ncbi_lb.h in CopyFiles */ = {isa = PBXBuildFile; fileRef = 3734F94D0FF2AF7C004C8F4B /* ncbi_lb.h */; };
- 379747C60FF3C77600138501 /* ncbi_lbsm.h in CopyFiles */ = {isa = PBXBuildFile; fileRef = 3734F94E0FF2AF7C004C8F4B /* ncbi_lbsm.h */; };
- 379747C70FF3C77600138501 /* ncbi_lbsm_ipc.h in CopyFiles */ = {isa = PBXBuildFile; fileRef = 3734F94F0FF2AF7C004C8F4B /* ncbi_lbsm_ipc.h */; };
379747C80FF3C77600138501 /* ncbi_lbsmd.h in CopyFiles */ = {isa = PBXBuildFile; fileRef = 3734F9500FF2AF7C004C8F4B /* ncbi_lbsmd.h */; };
379747C90FF3C77600138501 /* ncbi_local.h in CopyFiles */ = {isa = PBXBuildFile; fileRef = 3734F9530FF2AF7C004C8F4B /* ncbi_local.h */; };
379747CA0FF3C77600138501 /* ncbi_memory_connector.h in CopyFiles */ = {isa = PBXBuildFile; fileRef = 3734F9550FF2AF7C004C8F4B /* ncbi_memory_connector.h */; };
@@ -1199,8 +1195,6 @@
379747C30FF3C77600138501 /* ncbi_host_infop.h in CopyFiles */,
379747C40FF3C77600138501 /* ncbi_http_connector.h in CopyFiles */,
379747C50FF3C77600138501 /* ncbi_lb.h in CopyFiles */,
- 379747C60FF3C77600138501 /* ncbi_lbsm.h in CopyFiles */,
- 379747C70FF3C77600138501 /* ncbi_lbsm_ipc.h in CopyFiles */,
379747C80FF3C77600138501 /* ncbi_lbsmd.h in CopyFiles */,
379747C90FF3C77600138501 /* ncbi_local.h in CopyFiles */,
379747CA0FF3C77600138501 /* ncbi_memory_connector.h in CopyFiles */,
@@ -2112,8 +2106,6 @@
3734F9490FF2AF7C004C8F4B /* ncbi_host_infop.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = ncbi_host_infop.h; path = ../../../connect/ncbi_host_infop.h; sourceTree = SOURCE_ROOT; };
3734F94B0FF2AF7C004C8F4B /* ncbi_http_connector.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = ncbi_http_connector.h; path = ../../../connect/ncbi_http_connector.h; sourceTree = SOURCE_ROOT; };
3734F94D0FF2AF7C004C8F4B /* ncbi_lb.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = ncbi_lb.h; path = ../../../connect/ncbi_lb.h; sourceTree = SOURCE_ROOT; };
- 3734F94E0FF2AF7C004C8F4B /* ncbi_lbsm.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = ncbi_lbsm.h; path = ../../../connect/ncbi_lbsm.h; sourceTree = SOURCE_ROOT; };
- 3734F94F0FF2AF7C004C8F4B /* ncbi_lbsm_ipc.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = ncbi_lbsm_ipc.h; path = ../../../connect/ncbi_lbsm_ipc.h; sourceTree = SOURCE_ROOT; };
3734F9500FF2AF7C004C8F4B /* ncbi_lbsmd.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = ncbi_lbsmd.h; path = ../../../connect/ncbi_lbsmd.h; sourceTree = SOURCE_ROOT; };
3734F9530FF2AF7C004C8F4B /* ncbi_local.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = ncbi_local.h; path = ../../../connect/ncbi_local.h; sourceTree = SOURCE_ROOT; };
3734F9550FF2AF7C004C8F4B /* ncbi_memory_connector.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = ncbi_memory_connector.h; path = ../../../connect/ncbi_memory_connector.h; sourceTree = SOURCE_ROOT; };
@@ -3663,8 +3655,6 @@
3734F9490FF2AF7C004C8F4B /* ncbi_host_infop.h */,
3734F94B0FF2AF7C004C8F4B /* ncbi_http_connector.h */,
3734F94D0FF2AF7C004C8F4B /* ncbi_lb.h */,
- 3734F94E0FF2AF7C004C8F4B /* ncbi_lbsm.h */,
- 3734F94F0FF2AF7C004C8F4B /* ncbi_lbsm_ipc.h */,
3734F9500FF2AF7C004C8F4B /* ncbi_lbsmd.h */,
3734F9530FF2AF7C004C8F4B /* ncbi_local.h */,
3734F9550FF2AF7C004C8F4B /* ncbi_memory_connector.h */,
@@ -4413,8 +4403,6 @@
3734FD650FF2AF7D004C8F4B /* ncbi_host_infop.h in Headers */,
3734FD670FF2AF7D004C8F4B /* ncbi_http_connector.h in Headers */,
3734FD690FF2AF7D004C8F4B /* ncbi_lb.h in Headers */,
- 3734FD6A0FF2AF7D004C8F4B /* ncbi_lbsm.h in Headers */,
- 3734FD6B0FF2AF7D004C8F4B /* ncbi_lbsm_ipc.h in Headers */,
3734FD6C0FF2AF7D004C8F4B /* ncbi_lbsmd.h in Headers */,
3734FD6F0FF2AF7D004C8F4B /* ncbi_local.h in Headers */,
3734FD710FF2AF7D004C8F4B /* ncbi_memory_connector.h in Headers */,
diff --git a/sequin/sequin.h b/sequin/sequin.h
index 8da7be9e..2b757fb2 100644
--- a/sequin/sequin.h
+++ b/sequin/sequin.h
@@ -29,7 +29,7 @@
*
* Version Creation Date: 1/22/95
*
-* $Revision: 6.576 $
+* $Revision: 6.579 $
*
* File Description:
*
@@ -1828,6 +1828,8 @@ ReplaceComplexLocation
NLM_EXTERN void CleanupCDD (IteM i);
+NLM_EXTERN void ReportNonTSABioseqs (BioseqPtr bsp, Pointer userdata);
+
#ifdef OS_MSWIN
NLM_EXTERN Int4 RunSilent(const char *cmdline);
#endif
diff --git a/sequin/sequin1.c b/sequin/sequin1.c
index 473d065c..36616fdb 100644
--- a/sequin/sequin1.c
+++ b/sequin/sequin1.c
@@ -29,7 +29,7 @@
*
* Version Creation Date: 1/22/95
*
-* $Revision: 6.705 $
+* $Revision: 6.709 $
*
* File Description:
*
@@ -131,7 +131,7 @@ static char *time_of_compilation = "now";
#include <Gestalt.h>
#endif
-#define SEQ_APP_VER "9.50"
+#define SEQ_APP_VER "9.55"
CharPtr SEQUIN_APPLICATION = SEQ_APP_VER;
CharPtr SEQUIN_SERVICES = NULL;
@@ -280,7 +280,7 @@ static Boolean dirsubMode = FALSE;
static MenU newDescMenu = NULL;
static MenU newFeatMenu = NULL;
static MenU advTableMenu = NULL;
-static MenU sucMenu = NULL;
+static IteM sucItem = NULL;
static MenU newPubMenu = NULL;
static MenU batchApplyMenu = NULL;
static MenU batchEditMenu = NULL;
@@ -687,51 +687,161 @@ extern Boolean WriteTheEntityID (Uint2 entityID, CharPtr path, Boolean binary)
return rsult;
}
-extern Boolean PropagateFromGenBankBioseqSet (SeqEntryPtr sep, Boolean ask)
+static ValNodePtr ExtractGivenSeqDescrUserObject (ValNodePtr PNTR headptr, CharPtr str, CharPtr cls)
{
- BioseqPtr bsp;
- BioseqSetPtr bssp;
- Uint1 _class;
- SeqEntryPtr seqentry;
- ValNodePtr sourcedescr;
+ Boolean extract_it;
+ ValNodePtr last = NULL, vnp;
+ ObjectIdPtr oip;
+ UserObjectPtr uop;
- if (sep != NULL) {
- if (sep->choice == 2 && sep->data.ptrvalue != NULL) {
- bssp = (BioseqSetPtr) sep->data.ptrvalue;
- _class = bssp->_class;
- sourcedescr = bssp->descr;
- if (sourcedescr == NULL) return FALSE;
- if (_class == 7) {
- if (ask) {
- if (Message (MSG_YN, "Propagate descriptors from top-level set?") == ANS_NO) return FALSE;
- }
- seqentry = bssp->seq_set;
- while (seqentry != NULL) {
- if (seqentry->data.ptrvalue != NULL) {
- if (seqentry->choice == 1) {
- bsp = (BioseqPtr) seqentry->data.ptrvalue;
- ValNodeLink (&(bsp->descr),
- AsnIoMemCopy ((Pointer) sourcedescr,
- (AsnReadFunc) SeqDescrAsnRead,
- (AsnWriteFunc) SeqDescrAsnWrite));
- } else if (seqentry->choice == 2) {
- bssp = (BioseqSetPtr) seqentry->data.ptrvalue;
- ValNodeLink (&(bssp->descr),
- AsnIoMemCopy ((Pointer) sourcedescr,
- (AsnReadFunc) SeqDescrAsnRead,
- (AsnWriteFunc) SeqDescrAsnWrite));
+ if (headptr == NULL) return NULL;
+ vnp = *headptr;
+
+ while (vnp != NULL) {
+ extract_it = FALSE;
+ if (vnp->choice == Seq_descr_user) {
+ uop = (UserObjectPtr) vnp->data.ptrvalue;
+ if (uop != NULL) {
+ if (StringDoesHaveText (cls)) {
+ if (StringICmp (uop->_class, cls) == 0) {
+ extract_it = TRUE;
+ }
+ }
+ if (StringDoesHaveText (str)) {
+ oip = uop->type;
+ if (oip != NULL) {
+ if (StringICmp (oip->str, str) == 0) {
+ extract_it = TRUE;
}
}
- seqentry = seqentry->next;
}
- bssp = (BioseqSetPtr) sep->data.ptrvalue;
- bssp->descr = SeqDescrFree (bssp->descr);
- return TRUE;
}
}
+ if (extract_it) {
+ if (last == NULL) {
+ *headptr = vnp->next;
+ } else {
+ last->next = vnp->next;
+ }
+ vnp->next = NULL;
+ return vnp;
+ } else {
+ last = vnp;
+ vnp = vnp->next;
+ }
}
- return FALSE;
+
+ return NULL;
+}
+
+typedef struct propgenbankdata {
+ Boolean ask;
+ Boolean asked;
+ Boolean bail;
+ Boolean changed;
+} PropGenbankData, PNTR PropGenBankPtr;
+
+static void DoPropagateFromGenBankBioseqSet (
+ BioseqSetPtr seqset,
+ Pointer userdata
+)
+
+{
+ BioseqPtr bsp;
+ BioseqSetPtr bssp;
+ PropGenBankPtr pgp;
+ SeqEntryPtr seqentry;
+ ValNodePtr smartuserobj;
+ ValNodePtr sourcedescr;
+ UserObjectPtr uop;
+
+ if (seqset == NULL) return;
+ if (seqset->_class != BioseqseqSet_class_genbank) return;
+ pgp = (PropGenBankPtr) userdata;
+ if (pgp == NULL) return;
+
+ seqentry = seqset->seq_set;
+ sourcedescr = seqset->descr;
+ if (sourcedescr == NULL) return;
+
+ /* if only descriptor is tracking user object, skip */
+ if (sourcedescr->next == NULL && sourcedescr->choice == Seq_descr_user) {
+ uop = (UserObjectPtr) sourcedescr->data.ptrvalue;
+ if (uop != NULL && StringICmp (uop->_class, "SMART_V1.0") == 0) return;
+ }
+
+ /* optionally ask if propagation is desired */
+ if (pgp->ask) {
+ if (! pgp->asked) {
+ if (Message (MSG_YN, "Propagate descriptors from top-level set?") == ANS_NO) {
+ pgp->bail = TRUE;
+ }
+ pgp->asked = TRUE;
+ }
+ }
+ if (pgp->bail) return;
+
+ /* disconnect descriptors from parent bssp */
+ seqset->descr = NULL;
+
+ /* extract tracking user object */
+ smartuserobj = ExtractGivenSeqDescrUserObject (&sourcedescr, NULL, "SMART_V1.0");
+
+ while (seqentry != NULL) {
+ if (seqentry->data.ptrvalue != NULL) {
+ if (seqentry->choice == 1) {
+ bsp = (BioseqPtr) seqentry->data.ptrvalue;
+ ValNodeLink (&(bsp->descr),
+ AsnIoMemCopy ((Pointer) sourcedescr,
+ (AsnReadFunc) SeqDescrAsnRead,
+ (AsnWriteFunc) SeqDescrAsnWrite));
+ } else if (seqentry->choice == 2) {
+ bssp = (BioseqSetPtr) seqentry->data.ptrvalue;
+ ValNodeLink (&(bssp->descr),
+ AsnIoMemCopy ((Pointer) sourcedescr,
+ (AsnReadFunc) SeqDescrAsnRead,
+ (AsnWriteFunc) SeqDescrAsnWrite));
+ }
+ pgp->changed = TRUE;
+ }
+ seqentry = seqentry->next;
+ }
+
+ /* free extracted original descriptors now that copies are propagated */
+ SeqDescrFree (sourcedescr);
+
+ /* restore tracking user object */
+ if (smartuserobj != NULL) {
+ ValNodeLink (&(seqset->descr), smartuserobj);
+ }
+
+ /* recurse */
+ VisitSetsInSet (seqset, userdata, DoPropagateFromGenBankBioseqSet);
+}
+
+extern Boolean PropagateFromGenBankBioseqSet (SeqEntryPtr sep, Boolean ask)
+
+{
+ BioseqSetPtr bssp;
+ PropGenbankData pdp;
+
+ if (sep == NULL) return FALSE;
+ if (! IS_Bioseq_set (sep)) return FALSE;
+
+ bssp = (BioseqSetPtr) sep->data.ptrvalue;
+ if (bssp == NULL) return FALSE;
+ if (bssp->_class != BioseqseqSet_class_genbank) return FALSE;
+
+ MemSet ((Pointer) &pdp, 0, sizeof (PropGenbankData));
+ pdp.ask = ask;
+ pdp.asked = FALSE;
+ pdp.bail = FALSE;
+ pdp.changed = FALSE;
+
+ DoPropagateFromGenBankBioseqSet (bssp, (Pointer) &pdp);
+
+ return pdp.changed;
}
static void ForcePropagate (IteM i)
@@ -5105,7 +5215,7 @@ static void BioseqViewFormActivated (WindoW w)
(HANDLE) newDescMenu,
(HANDLE) newFeatMenu,
(HANDLE) advTableMenu,
- (HANDLE) sucMenu,
+ (HANDLE) sucItem,
(HANDLE) newPubMenu,
(HANDLE) batchApplyMenu,
(HANDLE) batchEditMenu,
@@ -5386,7 +5496,7 @@ static void MacDeactProc (WindoW w)
(HANDLE) newDescMenu,
(HANDLE) newFeatMenu,
(HANDLE) advTableMenu,
- (HANDLE) sucMenu,
+ (HANDLE) sucItem,
(HANDLE) newPubMenu,
(HANDLE) batchApplyMenu,
(HANDLE) batchEditMenu,
@@ -9437,6 +9547,10 @@ static void SetupMacMenus (void)
/*
submitItem = CommandItem (m, "Submit to NCBI", SubmitToNCBI);
*/
+ /*
+ SeparatorItem (m);
+ CommandItem (m, "Propagate Top Descriptors", ForcePropagate);
+ */
SeparatorItem (m);
printItem = FormCommandItem (m, "Print", NULL, VIB_MSG_PRINT);
SeparatorItem (m);
@@ -9679,7 +9793,7 @@ static void SetupMacMenus (void)
CommandItem (newFeatMenu, "Generate Definition Line", AutoDef);
advTableMenu = SubMenu (newFeatMenu, "Advanced Table Readers");
CommandItem (advTableMenu, "Load Structured Comments from Table", SubmitterCreateStructuredComments);
- sucMenu = CommandItem (newFeatMenu, "Sort Unique Count By Group", SUCSubmitterProc);
+ sucItem = CommandItem (newFeatMenu, "Sort Unique Count By Group", SUCSubmitterProc);
}
#endif
diff --git a/sequin/sequin3.c b/sequin/sequin3.c
index f3ea4587..d05dc001 100644
--- a/sequin/sequin3.c
+++ b/sequin/sequin3.c
@@ -29,7 +29,7 @@
*
* Version Creation Date: 1/22/95
*
-* $Revision: 6.975 $
+* $Revision: 6.978 $
*
* File Description:
*
@@ -190,6 +190,22 @@ static ValNodePtr ApplyTranscriptomeIdListWithProgress (ValNodePtr ids_list, Loc
}
+NLM_EXTERN void ReportNonTSABioseqs (BioseqPtr bsp, Pointer userdata)
+{
+ LogInfoPtr lip;
+ Char id_str[255];
+
+ if (bsp == NULL || (lip = (LogInfoPtr) userdata) == NULL || lip->fp == NULL || ISA_aa (bsp->mol)) {
+ return;
+ }
+ if (bsp->hist == NULL || bsp->hist->assembly == NULL) {
+ SeqIdWrite (SeqIdFindBest (bsp->id, SEQID_GENBANK), id_str, PRINTID_REPORT, sizeof (id_str) - 1);
+ fprintf (lip->fp, "%s has no TSA table\n", id_str);
+ lip->data_in_log = TRUE;
+ }
+}
+
+
static void AddTSATableToBioseq (IteM i)
{
BaseFormPtr bfp;
@@ -256,16 +272,18 @@ static void AddTSATableToBioseq (IteM i)
ValNodeLink (&coverage_report, err_list);
err_list = coverage_report;
+ lip = OpenLog ("TSA Table Problems");
if (err_list != NULL) {
- lip = OpenLog ("TSA Table Problems");
for (vnp = err_list; vnp != NULL; vnp = vnp->next) {
fprintf (lip->fp, "%s\n", vnp->data.ptrvalue);
}
lip->data_in_log = TRUE;
- CloseLog (lip);
- lip = FreeLog (lip);
err_list = ValNodeFreeData (err_list);
}
+ VisitBioseqsInSep (sep, lip, ReportNonTSABioseqs);
+ CloseLog (lip);
+ lip = FreeLog (lip);
+
ObjMgrSetDirtyFlag (bfp->input_entityID, TRUE);
ObjMgrSendMsg (OM_MSG_UPDATE, bfp->input_entityID, 0, 0);
}
@@ -304,16 +322,18 @@ static void RefreshTSATables (IteM i)
err_list = coverage_report;
+ lip = OpenLog ("TSA Table Problems");
if (err_list != NULL) {
- lip = OpenLog ("TSA Table Problems");
for (vnp = err_list; vnp != NULL; vnp = vnp->next) {
fprintf (lip->fp, "%s\n", vnp->data.ptrvalue);
}
lip->data_in_log = TRUE;
- CloseLog (lip);
- lip = FreeLog (lip);
err_list = ValNodeFreeData (err_list);
}
+ VisitBioseqsInSep (sep, lip, ReportNonTSABioseqs);
+ CloseLog (lip);
+ lip = FreeLog (lip);
+
ObjMgrSetDirtyFlag (bfp->input_entityID, TRUE);
ObjMgrSendMsg (OM_MSG_UPDATE, bfp->input_entityID, 0, 0);
}
@@ -22383,7 +22403,7 @@ static void MakeSpecialEditMenu (MenU m, BaseFormPtr bfp)
SeparatorItem (s);
x = SubMenu (s, "Extend Partial Features");
- i = CommandItem (x, "All", ExtendPartialFeatures);
+ i = CommandItem (x, "All to Ends", ExtendPartialFeatures);
SetObjectExtra (i, bfp, NULL);
i = CommandItem (x, "With Constraint", ExtendPartialFeaturesWithConstraint);
SetObjectExtra (i, bfp, NULL);
diff --git a/sequin/sequin5.c b/sequin/sequin5.c
index 856cfa02..28490f98 100644
--- a/sequin/sequin5.c
+++ b/sequin/sequin5.c
@@ -29,7 +29,7 @@
*
* Version Creation Date: 8/26/97
*
-* $Revision: 6.673 $
+* $Revision: 6.675 $
*
* File Description:
*
@@ -2631,6 +2631,13 @@ static Boolean ConvertCDSToMatPeptide (SeqFeatPtr sfp, Uint2 featdef_to, Pointer
return AutoConvertCDSToMiscFeat (sfp, extradata == NULL ? TRUE : !(*((BoolPtr) extradata)));
}
+
+static Boolean ConvertMiscFeatureToCDSFunction (SeqFeatPtr sfp, Uint2 featdef_to, Pointer extradata)
+{
+ return ConvertMiscFeatToCodingRegion (sfp);
+}
+
+
extern EnumFieldAssoc enum_bond_alist [];
extern EnumFieldAssoc enum_site_alist [];
@@ -17049,6 +17056,7 @@ static Boolean ConvertImpToImp (SeqFeatPtr, Uint2 featdef_to, Pointer extradata)
static Boolean ConvertRNAToRNA (SeqFeatPtr, Uint2 featdef_to, Pointer extradata);
static Boolean ConvertProtToProt (SeqFeatPtr, Uint2 featdef_to, Pointer extradata);
static Boolean ConvertCDSToMatPeptide (SeqFeatPtr sfp, Uint2 featdef_to, Pointer extradata);
+static Boolean ConvertMiscFeatureToCDSFunction (SeqFeatPtr sfp, Uint2 featdef_to, Pointer extradata);
static ConvertFeatureProcsData ConvertFeaturesTable[] = {
{ SEQFEAT_CDREGION, FEATDEF_CDS, SEQFEAT_RNA, FEATDEF_ANY,
@@ -17084,6 +17092,9 @@ static ConvertFeatureProcsData ConvertFeaturesTable[] = {
"If protein feature has name, this will be saved as /product qualifier on new feature.\nIf protein feature does not have name but does have description, this will be saved as /product qualifier on new feature.\n"
"EC_number values from the protein feature will be saved as /EC_number qualifiers on the new feature.\nActivity values will be saved as /function qualifiers on the new feature.\n"
"Db_xref values from the protein feature will be saved as /db_xref qualifers on the new feature." },
+ { SEQFEAT_IMP, FEATDEF_misc_feature, SEQFEAT_CDREGION, FEATDEF_CDS,
+ NULL, NULL, NULL, ConvertMiscFeatureToCDSFunction, NULL,
+ "Use misc_feature comment for coding region product name." },
{ SEQFEAT_IMP, FEATDEF_ANY, SEQFEAT_RNA, FEATDEF_misc_RNA,
NULL, NULL, NULL, ConvertImpToSpecialRNA, NULL,
"Creates a misc_RNA. Import feature key is discarded." },
@@ -18546,6 +18557,7 @@ static Boolean FeatureRemoveOrConvertAction (Pointer userdata)
OrigFeatPtr ofp;
SeqFeatPtr sfp;
Boolean rval = TRUE;
+ SeqEntryPtr create_sep;
if (userdata == NULL) return FALSE;
@@ -18637,7 +18649,12 @@ static Boolean FeatureRemoveOrConvertAction (Pointer userdata)
{
continue;
}
- sfp = CreateNewFeature (ofp->sep, NULL, ofp->sfp->data.choice, ofp->sfp);
+ if (IS_Bioseq_set (ofp->sep)) {
+ create_sep = FindNucSeqEntry (ofp->sep);
+ } else {
+ create_sep = ofp->sep;
+ }
+ sfp = CreateNewFeature (create_sep, NULL, ofp->sfp->data.choice, ofp->sfp);
}
}
mrfp->feat_list = ValNodeFreeData (mrfp->feat_list);
@@ -29439,35 +29456,6 @@ static void GetCombinedCDSLocationCallback (SeqFeatPtr sfp, Pointer userdata)
}
-static SeqFeatPtr GetProtFeature (BioseqPtr protbsp)
-{
- SeqMgrFeatContext fcontext;
- SeqAnnotPtr sap;
- SeqFeatPtr prot_sfp;
- ProtRefPtr prp;
-
- if (protbsp == NULL) return NULL;
-
- prot_sfp = SeqMgrGetNextFeature (protbsp, NULL, 0, FEATDEF_PROT, &fcontext);
- if (prot_sfp == NULL) {
- sap = protbsp->annot;
- while (sap != NULL && prot_sfp == NULL) {
- if (sap->type == 1) {
- prot_sfp = sap->data;
- while (prot_sfp != NULL
- && (prot_sfp->data.choice != SEQFEAT_PROT
- || (prp = prot_sfp->data.value.ptrvalue) == NULL
- || prp->processed != 0)) {
- prot_sfp = prot_sfp->next;
- }
- }
- sap = sap->next;
- }
- }
- return prot_sfp;
-}
-
-
static void ApplyProductName (CombineCDSPtr ccp, SeqFeatPtr new_cds)
{
BioseqPtr first_prot_bsp, new_prot_bsp;
diff --git a/sequin/sequin7.c b/sequin/sequin7.c
index 2c14f1da..b3fe1881 100644
--- a/sequin/sequin7.c
+++ b/sequin/sequin7.c
@@ -29,7 +29,7 @@
*
* Version Creation Date: 1/3/98
*
-* $Revision: 6.359 $
+* $Revision: 6.360 $
*
* File Description:
*
@@ -12720,13 +12720,28 @@ static void BarcodeReportPolymorphism (ButtoN b)
}
+static void ApplyBarcodeDbxrefsBtn (ButtoN b)
+{
+ BarcodeToolPtr drfp;
+
+ drfp = (BarcodeToolPtr) GetObjectExtra (b);
+ if (drfp == NULL) return;
+
+ VisitBioseqsInSep (GetTopSeqEntryForEntityID (drfp->input_entityID), NULL, ApplyBarcodeDbxrefsToBioseq);
+
+ ObjMgrSetDirtyFlag (drfp->input_entityID, TRUE);
+ ObjMgrSendMsg (OM_MSG_UPDATE, drfp->input_entityID, 0, 0);
+ Update();
+}
+
+
extern void BarcodeTestTool (IteM i)
{
BaseFormPtr bfp;
BarcodeToolPtr drfp;
SeqEntryPtr sep;
GrouP h;
- GrouP c, c3;
+ GrouP c, c3, c4, c5;
ButtoN b;
WindoW w;
OMUserDataPtr omudp;
@@ -12782,37 +12797,43 @@ extern void BarcodeTestTool (IteM i)
drfp->pass_fail_summary = StaticPrompt (h, "0 Pass, 0 Fail", 20 * stdCharWidth, dialogTextHeight, programFont, 'l');
RefreshBarcodeList(drfp);
- c3 = HiddenGroup (h, 10, 0, NULL);
- SetGroupSpacing (c3, 10, 10);
- b = PushButton (c3, "Compliance Report", BarcodeTestComplianceReport);
+ c4 = HiddenGroup (h, 5, 0, NULL);
+ SetGroupSpacing (c4, 10, 10);
+
+ b = PushButton (c4, "Compliance Report", BarcodeTestComplianceReport);
SetObjectExtra (b, drfp, NULL);
- b = PushButton (c3, "Failure Report", BarcodeReportButton);
+ b = PushButton (c4, "Failure Report", BarcodeReportButton);
SetObjectExtra (b, drfp, NULL);
- b = PushButton (c3, "Comprehensive Report", BarcodeComprehensiveReportButton);
+ b = PushButton (c4, "Comprehensive Report", BarcodeComprehensiveReportButton);
SetObjectExtra (b, drfp, NULL);
-
- b = PushButton (c3, "Report Polymorphism", BarcodeReportPolymorphism);
+ b = PushButton (c4, "Report Polymorphism", BarcodeReportPolymorphism);
SetObjectExtra (b, drfp, NULL);
- b = PushButton (c3, "Replace Tags", BarcodeTestImportTagTable);
+ c5 = HiddenGroup (h, 5, 0, NULL);
+ SetGroupSpacing (c5, 10, 10);
+ b = PushButton (c5, "Apply Dbxrefs", ApplyBarcodeDbxrefsBtn);
+ SetObjectExtra (b, drfp, NULL);
+ b = PushButton (c5, "Replace Tags", BarcodeTestImportTagTable);
SetObjectExtra (b, drfp, NULL);
- b = PushButton (c3, "Add New Tags", BarcodeTestApplyTagTable);
+ b = PushButton (c5, "Add New Tags", BarcodeTestApplyTagTable);
SetObjectExtra (b, drfp, NULL);
- b = PushButton (c3, "Make Tag Table", BarcodeTestMakeTagTable);
+ b = PushButton (c5, "Make Tag Table", BarcodeTestMakeTagTable);
+ SetObjectExtra (b, drfp, NULL);
+ b = PushButton (c5, "Refresh List", BarcodeRefreshButton);
SetObjectExtra (b, drfp, NULL);
- b = PushButton (c3, "Refresh List", BarcodeRefreshButton);
+ c3 = HiddenGroup (h, 4, 0, NULL);
+ SetGroupSpacing (c3, 10, 10);
+ b = PushButton (c3, "Remove BARCODE Keyword from Selected", RemoveSelectedKeywordsBtn);
SetObjectExtra (b, drfp, NULL);
- b = PushButton (c3, "Remove BARCODE Keyword from Selected Sequences", RemoveSelectedKeywordsBtn);
+ b = PushButton (c3, "Add BARCODE Keyword to BARCODE Tech", AddBarcodeKeywordBtn);
SetObjectExtra (b, drfp, NULL);
- b = PushButton (c3, "Add BARCODE Keyword to BARCODE Tech Sequences", AddBarcodeKeywordBtn);
+ b = PushButton (c3, "Remove BARCODE Tech from Selected", RemoveSelectedTechBtn);
SetObjectExtra (b, drfp, NULL);
- c = HiddenGroup (h, 5, 0, NULL);
+ c = HiddenGroup (h, 4, 0, NULL);
SetGroupSpacing (c, 10, 10);
- b = PushButton (c, "Remove BARCODE Tech from Selected Sequences", RemoveSelectedTechBtn);
- SetObjectExtra (b, drfp, NULL);
drfp->undo = PushButton (c, "Undo", BarcodeUndoButton);
SetObjectExtra (drfp->undo, drfp, NULL);
@@ -12825,7 +12846,7 @@ extern void BarcodeTestTool (IteM i)
PushButton (c, "Dismiss", StdCancelButtonProc);
- AlignObjects (ALIGN_CENTER, (HANDLE) drfp->clickable_list, (HANDLE) drfp->pass_fail_summary, (HANDLE) c3, (HANDLE) c, NULL);
+ AlignObjects (ALIGN_CENTER, (HANDLE) drfp->clickable_list, (HANDLE) drfp->pass_fail_summary, (HANDLE) c4, (HANDLE) c5, (HANDLE) c3, (HANDLE) c, NULL);
RealizeWindow (w);
diff --git a/sequin/sequin8.c b/sequin/sequin8.c
index 2e9c7b03..4b3d4018 100644
--- a/sequin/sequin8.c
+++ b/sequin/sequin8.c
@@ -29,7 +29,7 @@
*
* Version Creation Date: 2/3/98
*
-* $Revision: 6.537 $
+* $Revision: 6.540 $
*
* File Description:
*
@@ -403,7 +403,8 @@ extern void ExtendSeqLocToPosition (SeqLocPtr slp, Boolean end5, Int4 pos)
}
}
-static void ExtendOnePartialFeatureEx (SeqFeatPtr sfp, Boolean extend5, Boolean extend3)
+
+static void ExtendOnePartialFeatureExEx (SeqFeatPtr sfp, Boolean extend5, Boolean extend3, Boolean stop_at_gap)
{
BioseqPtr bsp;
Boolean partial3, partial5;
@@ -416,7 +417,11 @@ static void ExtendOnePartialFeatureEx (SeqFeatPtr sfp, Boolean extend5, Boolean
CheckSeqLocForPartial (sfp->location, &partial5, &partial3);
if (partial5 && extend5)
{
- start_diff = ExtendSeqLocToEnd (sfp->location, bsp, TRUE);
+ if (stop_at_gap) {
+ start_diff = ExtendSeqLocToEndOrGap (sfp->location, bsp, TRUE);
+ } else {
+ start_diff = ExtendSeqLocToEnd (sfp->location, bsp, TRUE);
+ }
if (start_diff > 0 && sfp->data.choice == SEQFEAT_CDREGION) {
crp = (CdRegionPtr) sfp->data.value.ptrvalue;
if (crp != NULL) {
@@ -429,10 +434,27 @@ static void ExtendOnePartialFeatureEx (SeqFeatPtr sfp, Boolean extend5, Boolean
}
if (partial3 && extend3)
{
- ExtendSeqLocToEnd (sfp->location, bsp, FALSE);
+ if (stop_at_gap) {
+ ExtendSeqLocToEndOrGap (sfp->location, bsp, FALSE);
+ } else {
+ ExtendSeqLocToEnd (sfp->location, bsp, FALSE);
+ }
}
}
+
+static void ExtendOnePartialFeatureEx (SeqFeatPtr sfp, Boolean extend5, Boolean extend3)
+{
+ ExtendOnePartialFeatureExEx (sfp, extend5, extend3, FALSE);
+}
+
+
+static void ExtendOnePartialFeatureToEndOrGap (SeqFeatPtr sfp, Pointer userdata)
+{
+ ExtendOnePartialFeatureExEx (sfp, TRUE, TRUE, TRUE);
+}
+
+
static void ExtendOnePartialFeature (SeqFeatPtr sfp, Pointer userdata)
{
ExtendOnePartialFeatureEx (sfp, TRUE, TRUE);
@@ -491,6 +513,7 @@ typedef struct extendpartialfeaturesform {
DialoG feature_type;
ButtoN extend5;
ButtoN extend3;
+ ButtoN stop_at_gaps;
DialoG string_constraint;
ButtoN leave_dlg_up;
} ExtendPartialFeaturesFormData, PNTR ExtendPartialFeaturesFormPtr;
@@ -505,7 +528,7 @@ static void DoExtendPartialFeatures (ButtoN b)
ValNodePtr vnp;
StringConstraintPtr scp;
ValNodePtr object_list;
- Boolean extend5, extend3;
+ Boolean extend5, extend3, stop_at_gaps;
f = (ExtendPartialFeaturesFormPtr) GetObjectExtra (b);
if (f == NULL) return;
@@ -538,9 +561,10 @@ static void DoExtendPartialFeatures (ButtoN b)
extend5 = GetStatus (f->extend5);
extend3 = GetStatus (f->extend3);
+ stop_at_gaps = GetStatus (f->stop_at_gaps);
for (vnp = object_list; vnp != NULL; vnp = vnp->next) {
if (vnp->choice == OBJ_SEQFEAT && vnp->data.ptrvalue != NULL) {
- ExtendOnePartialFeatureEx (vnp->data.ptrvalue, extend5, extend3);
+ ExtendOnePartialFeatureExEx (vnp->data.ptrvalue, extend5, extend3, stop_at_gaps);
}
}
object_list = ValNodeFree (object_list);
@@ -601,6 +625,9 @@ extern void ExtendPartialFeaturesWithConstraint (IteM i)
SetStatus (f->extend5, TRUE);
f->extend3 = CheckBox (g, "Extend partial 3'", NULL);
SetStatus (f->extend3, TRUE);
+
+ f->stop_at_gaps = CheckBox (h, "Stop at gaps", NULL);
+ SetStatus (f->stop_at_gaps, TRUE);
p2 = StaticPrompt (h, "Optional Constraint", 0, dialogTextHeight, programFont, 'c');
f->string_constraint = StringConstraintDialog (h, "Where feature text", FALSE, NULL, NULL);
@@ -614,6 +641,7 @@ extern void ExtendPartialFeaturesWithConstraint (IteM i)
AlignObjects (ALIGN_CENTER, (HANDLE) p1,
(HANDLE) f->feature_type,
(HANDLE) g,
+ (HANDLE) f->stop_at_gaps,
(HANDLE) p2,
(HANDLE) f->string_constraint,
(HANDLE) c,
@@ -12229,6 +12257,7 @@ static void AcceptTSAAssembly (ButtoN b)
ValNodePtr err_list, coverage_report, vnp, ids_list, match_errs;
SeqAlignPtr salp, salp_next;
LogInfoPtr lip;
+ SeqEntryPtr sep;
frm = (TSAAssemblyFormPtr) GetObjectExtra (b);
if (frm == NULL) {
@@ -12269,16 +12298,18 @@ static void AcceptTSAAssembly (ButtoN b)
ValNodeLink (&coverage_report, err_list);
err_list = coverage_report;
+ lip = OpenLog ("TSA Table Problems");
if (err_list != NULL) {
- lip = OpenLog ("TSA Table Problems");
for (vnp = err_list; vnp != NULL; vnp = vnp->next) {
fprintf (lip->fp, "%s\n", vnp->data.ptrvalue);
}
lip->data_in_log = TRUE;
- CloseLog (lip);
- lip = FreeLog (lip);
err_list = ValNodeFreeData (err_list);
}
+ sep = GetTopSeqEntryForEntityID (frm->input_entityID);
+ VisitBioseqsInSep (sep, lip, ReportNonTSABioseqs);
+ CloseLog (lip);
+ lip = FreeLog (lip);
}
ObjMgrSetDirtyFlag (frm->input_entityID, TRUE);