diff options
61 files changed, 2606 insertions, 1560 deletions
@@ -1 +1 @@ -Sun Jul 19 10:14:04 EDT 2009 +Sun Aug 9 10:13:45 EDT 2009 diff --git a/access/ent2api.c b/access/ent2api.c index d8e2165e..7c5125ff 100644 --- a/access/ent2api.c +++ b/access/ent2api.c @@ -29,7 +29,7 @@ * * Version Creation Date: 7/29/99 * -* $Revision: 1.113 $ +* $Revision: 1.114 $ * * File Description: * @@ -1079,7 +1079,8 @@ NLM_EXTERN Boolean ValidateEntrez2InfoPtrEx ( StringICmp (db, "nuccore") != 0 && StringICmp (db, "nucgss") != 0 && StringICmp (db, "nucest") != 0 && - StringICmp (db, "toolkit") != 0) { + StringICmp (db, "toolkit") != 0 && + StringICmp (db, "blastdbinfo") != 0) { sprintf (buf, "Database %s has no links", db); ValNodeCopyStr (head, 0, buf); rsult = FALSE; @@ -1350,6 +1351,7 @@ NLM_EXTERN Boolean ValidateEntrez2InfoPtrEx ( } else if (StringICmp (last, "Comment") == 0 && StringICmp (str, "Comments") == 0) { } else if (StringICmp (last, "SID") == 0 && StringICmp (str, "SidExternalID") == 0) { } else if (StringICmp (last, "Platform") == 0 && StringICmp (str, "Platform Reporter Type") == 0) { + } else if (StringICmp (last, "Database") == 0 && StringICmp (str, "Database Name") == 0) { } else { sprintf (buf, "Menu names %s [%s] and %s [%s] may be unintended variants", last, dbnames [lastvnp->choice], str, dbnames [vnp->choice]); ValNodeCopyStr (head, 0, buf); diff --git a/algo/blast/core/blast_extend.c b/algo/blast/core/blast_extend.c index dcfd895c..6c09e454 100644 --- a/algo/blast/core/blast_extend.c +++ b/algo/blast/core/blast_extend.c @@ -1,4 +1,4 @@ -/* $Id: blast_extend.c,v 1.118 2009/01/05 16:54:38 kazimird Exp $ +/* $Id: blast_extend.c,v 1.119 2009/07/30 19:34:30 kazimird Exp $ * =========================================================================== * * PUBLIC DOMAIN NOTICE @@ -30,7 +30,7 @@ #ifndef SKIP_DOXYGEN_PROCESSING static char const rcsid[] = - "$Id: blast_extend.c,v 1.118 2009/01/05 16:54:38 kazimird Exp $"; + "$Id: blast_extend.c,v 1.119 2009/07/30 19:34:30 kazimird Exp $"; #endif /* SKIP_DOXYGEN_PROCESSING */ #include <algo/blast/core/blast_extend.h> @@ -80,7 +80,7 @@ s_BlastDiagTableFree(BLAST_DiagTable* diag_table) { if (diag_table) { sfree(diag_table->hit_level_array); - + sfree(diag_table->hit_len_array); sfree(diag_table); } return NULL; @@ -106,6 +106,7 @@ static Int4 s_BlastDiagClear(BLAST_DiagTable * diag) for (i = 0; i < n; i++) { diag_struct_array[i].flag = 0; diag_struct_array[i].last_hit = -diag->window; + if (diag->hit_len_array) diag->hit_len_array[i] = 0; } return 0; } @@ -148,6 +149,10 @@ Int2 BlastExtendWordNew(Uint4 query_length, diag_table->hit_level_array = (DiagStruct *) calloc(diag_table->diag_array_length, sizeof(DiagStruct)); + if (word_params->options->window_size) { + diag_table->hit_len_array = (Uint1 *) + calloc(diag_table->diag_array_length, sizeof(Uint1)); + } if (!diag_table->hit_level_array) { sfree(ewp); return -1; @@ -173,7 +178,7 @@ Blast_ExtendWordExit(Blast_ExtendWord * ewp, Int4 subject_length) } } else if (ewp->hash_table) { if (ewp->hash_table->offset >= INT4_MAX / 4) { - ewp->hash_table->occupancy = 1; + ewp->hash_table->occupancy = 1; ewp->hash_table->offset = ewp->hash_table->window; memset(ewp->hash_table->backbone, 0, ewp->hash_table->num_buckets * sizeof(Int4)); diff --git a/algo/blast/core/blast_extend.h b/algo/blast/core/blast_extend.h index 01f1061a..c627fad1 100644 --- a/algo/blast/core/blast_extend.h +++ b/algo/blast/core/blast_extend.h @@ -1,4 +1,4 @@ -/* $Id: blast_extend.h,v 1.53 2008/07/23 16:55:47 kazimird Exp $ +/* $Id: blast_extend.h,v 1.54 2009/07/30 19:34:30 kazimird Exp $ * =========================================================================== * * PUBLIC DOMAIN NOTICE @@ -66,6 +66,7 @@ typedef struct DiagHashCell { Int4 diag; /**< This hit's diagonal */ Int4 level : 31; /**< This hit's offset in the subject sequence */ Uint4 hit_saved : 1; /**< Whether or not this hit has been saved */ + Int4 hit_len; /**< The length of last hit */ Uint4 next; /**< Offset of next element in the chain */ } DiagHashCell; @@ -76,6 +77,7 @@ typedef struct DiagHashCell { typedef struct BLAST_DiagTable { DiagStruct* hit_level_array;/**< Array to hold latest hits and their lengths for all diagonals */ + Uint1* hit_len_array; /**< Array to hold the lengthof the latest hit */ Int4 diag_array_length; /**< Smallest power of 2 longer than query length */ Int4 diag_mask; /**< Used to mask off everything above min_diag_length (mask = min_diag_length-1). */ diff --git a/algo/blast/core/na_ungapped.c b/algo/blast/core/na_ungapped.c index e9b63197..308fc13e 100644 --- a/algo/blast/core/na_ungapped.c +++ b/algo/blast/core/na_ungapped.c @@ -1,4 +1,4 @@ -/* $Id: na_ungapped.c,v 1.20 2009/06/22 13:54:32 kazimird Exp $ +/* $Id: na_ungapped.c,v 1.21 2009/07/30 19:34:30 kazimird Exp $ * =========================================================================== * * PUBLIC DOMAIN NOTICE @@ -30,7 +30,7 @@ #ifndef SKIP_DOXYGEN_PROCESSING static char const rcsid[] = - "$Id: na_ungapped.c,v 1.20 2009/06/22 13:54:32 kazimird Exp $"; + "$Id: na_ungapped.c,v 1.21 2009/07/30 19:34:30 kazimird Exp $"; #endif /* SKIP_DOXYGEN_PROCESSING */ #include <algo/blast/core/na_ungapped.h> @@ -251,10 +251,12 @@ s_NuclUngappedExtend(BLAST_SequenceBlk * query, * @param diag The diagonal to be retrieved [in] * @param level The offset of the last hit on the specified diagonal [out] * @param hit_saved Whether or not the last hit on the specified diagonal was saved [out] + * @param hit_length length of the last hit on the specified diagonal [out] * @return 1 if successful, 0 if no hit was found on the specified diagonal. */ static NCBI_INLINE Int4 s_BlastDiagHashRetrieve(BLAST_DiagHash * table, Int4 diag, Int4 * level, + Int4 * hit_len, Int4 * hit_saved) { /* see http://lxr.linux.no/source/include/linux/hash.h */ @@ -265,6 +267,7 @@ static NCBI_INLINE Int4 s_BlastDiagHashRetrieve(BLAST_DiagHash * table, while (index) { if (table->chain[index].diag == diag) { *level = table->chain[index].level; + *hit_len = table->chain[index].hit_len; *hit_saved = table->chain[index].hit_saved; return 1; } @@ -280,40 +283,38 @@ static NCBI_INLINE Int4 s_BlastDiagHashRetrieve(BLAST_DiagHash * table, * @param table The hash table [in] * @param diag The diagonal to be stored [in] * @param level The offset of the hit to be stored [in] + * @param len The length of the hit to be stored [in] * @param hit_saved Whether or not this hit was stored [in] * @param s_end Needed to clean up defunct entries [in] * @param window_size Needed to clean up defunct entries [in] - * @param min_step Needed to clean up defunct entries [in] - * @param two_hits Needed to clean up defunct entries [in] * @return 1 if successful, 0 if memory allocation failed. */ static NCBI_INLINE Int4 s_BlastDiagHashInsert(BLAST_DiagHash * table, Int4 diag, Int4 level, + Int4 len, Int4 hit_saved, - Int4 s_end, - Int4 window_size, - Int4 min_step, - Int4 two_hits) + Int4 s_off, + Int4 window_size) { Uint4 bucket = ((Uint4) diag * 0x9E370001) % DIAGHASH_NUM_BUCKETS; Uint4 index = table->backbone[bucket]; + DiagHashCell *cell = NULL; while (index) { /* if we find what we're looking for, save into it */ if (table->chain[index].diag == diag) { table->chain[index].level = level; + table->chain[index].hit_len = len; table->chain[index].hit_saved = hit_saved; return 1; } /* otherwise, if this hit is stale, save into it. */ else { - Int4 step = s_end - table->chain[index].level; /* if this hit is stale, save into it. */ - if (! - (step <= (Int4) min_step - || (two_hits && step <= window_size))) { + if ( s_off - table->chain[index].level > window_size) { table->chain[index].diag = diag; table->chain[index].level = level; + table->chain[index].hit_len = len; table->chain[index].hit_saved = hit_saved; return 1; } @@ -324,7 +325,6 @@ static NCBI_INLINE Int4 s_BlastDiagHashInsert(BLAST_DiagHash * table, /* if we got this far, we were unable to replace any existing entries. */ /* if there's no more room, allocate more */ - if (table->occupancy == table->capacity) { table->capacity *= 2; table->chain = @@ -333,15 +333,14 @@ static NCBI_INLINE Int4 s_BlastDiagHashInsert(BLAST_DiagHash * table, return 0; } - { - DiagHashCell *cell = table->chain + table->occupancy; - cell->diag = diag; - cell->level = level; - cell->hit_saved = hit_saved; - cell->next = table->backbone[bucket]; - table->backbone[bucket] = table->occupancy; - table->occupancy++; - } + cell = table->chain + table->occupancy; + cell->diag = diag; + cell->level = level; + cell->hit_len = len; + cell->hit_saved = hit_saved; + cell->next = table->backbone[bucket]; + table->backbone[bucket] = table->occupancy; + table->occupancy++; return 1; } @@ -421,6 +420,7 @@ s_BlastnDiagTableExtendInitialHit(BLAST_SequenceBlk * query, { Int4 diag, real_diag; Int4 s_end, s_off_pos, s_end_pos; + Int4 ext_right = 0; BlastUngappedData *ungapped_data; BlastUngappedData dummy_ungapped_data; Int4 window_size = word_params->options->window_size; @@ -429,6 +429,8 @@ s_BlastnDiagTableExtendInitialHit(BLAST_SequenceBlk * query, DiagStruct *hit_level_array; BlastUngappedCutoffs *cutoffs = NULL; Boolean two_hits = (window_size > 0); + Boolean found = FALSE; + Int4 Delta = MIN(5, window_size - word_length); hit_level_array = diag_table->hit_level_array; ASSERT(hit_level_array); @@ -441,41 +443,56 @@ s_BlastnDiagTableExtendInitialHit(BLAST_SequenceBlk * query, s_off_pos = s_off + diag_table->offset; s_end_pos = s_end + diag_table->offset; - if (contiguous) { - /* hit within the explored area should be rejected*/ - if (s_off_pos < last_hit) return 0; + /* hit within the explored area should be rejected*/ + if (s_off_pos < last_hit) return 0; - if (two_hits && (hit_saved || s_end_pos > last_hit + window_size )) { - /* this must be the 1st hit */ - /* check to see if it can be extended to the right by - word_length and therefore qualifies for a double-hit */ - Uint4 ext_right = s_BlastRightExtend(query, subject, + if (two_hits && (hit_saved || s_end_pos > last_hit + window_size )) { + /* check to see if it can be extended to the right by + word_length and therefore qualifies for a double-hit */ + if (contiguous) { + ext_right = s_BlastRightExtend(query, subject, q_off + word_length, s_end, query_info, word_length); /* update the right end*/ s_end += ext_right; s_end_pos += ext_right; - if (ext_right < word_length) { - /* if it is not a double hit, then it is a new hit */ + } + + if (ext_right < word_length) { + /* try off-diagonals */ + Int4 orig_diag = real_diag + diag_table->diag_array_length; + Int4 s_a = s_off_pos + word_length - window_size; + Int4 s_b = s_end_pos - 2 * word_length; + Int4 delta; + if (Delta < 0) Delta = 0; + for (delta = 1; delta < Delta ; ++delta) { + Int4 off_diag = (orig_diag + delta) & diag_table->diag_mask; + Int4 off_s_end = hit_level_array[off_diag].last_hit; + Int4 off_s_l = diag_table->hit_len_array[off_diag]; + if ( off_s_l + && off_s_end - delta >= s_a + && off_s_end - off_s_l <= s_b) { + found = TRUE; + break; + } + off_diag = (orig_diag - delta) & diag_table->diag_mask; + off_s_end = hit_level_array[off_diag].last_hit; + off_s_l = diag_table->hit_len_array[off_diag]; + if ( off_s_l + && off_s_end >= s_a + && off_s_end - off_s_l + delta <= s_b) { + found = TRUE; + break; + } + } + if (!found) { + /* This is a new hit */ hit_ready = 0; - last_hit = s_end_pos; - hit_saved = 0; } } - } else { - /* hit within the explored area should be rejected*/ - if (s_off_pos < last_hit) return 0; - - if (two_hits && (hit_saved || s_end_pos > last_hit + window_size )) { - /* first hit */ - hit_ready = 0; - last_hit = s_end_pos; - hit_saved = 0; - } } if (hit_ready) { if (word_params->ungapped_extension) { - /* Perform ungapped extension */ Int4 context = BSearchContextInfo(q_off, query_info); cutoffs = word_params->cutoffs + context; ungapped_data = &dummy_ungapped_data; @@ -484,31 +501,27 @@ s_BlastnDiagTableExtendInitialHit(BLAST_SequenceBlk * query, word_params->nucl_score_table, cutoffs->reduced_nucl_cutoff_score); - last_hit = ungapped_data->length + ungapped_data->s_start - + diag_table->offset; + if (found || ungapped_data->score >= cutoffs->cutoff_score) { + BlastUngappedData *final_data = + (BlastUngappedData *) malloc(sizeof(BlastUngappedData)); + *final_data = *ungapped_data; + BLAST_SaveInitialHit(init_hitlist, q_off, s_off, final_data); + s_end_pos = ungapped_data->length + ungapped_data->s_start + + diag_table->offset; + } else { + hit_ready = 0; + } } else { ungapped_data = NULL; - last_hit = s_end_pos; - } - if (ungapped_data == NULL) { BLAST_SaveInitialHit(init_hitlist, q_off, s_off, ungapped_data); - /* Set the "saved" flag for this hit */ - hit_saved = 1; - } else if (ungapped_data->score >= cutoffs->cutoff_score) { - BlastUngappedData *final_data = - (BlastUngappedData *) malloc(sizeof(BlastUngappedData)); - *final_data = *ungapped_data; - BLAST_SaveInitialHit(init_hitlist, q_off, s_off, final_data); - /* Set the "saved" flag for this hit */ - hit_saved = 1; - } else { - /* Unset the "saved" flag for this hit */ - hit_saved = 0; } } - hit_level_array[real_diag].last_hit = last_hit; - hit_level_array[real_diag].flag = hit_saved; + hit_level_array[real_diag].last_hit = s_end_pos; + hit_level_array[real_diag].flag = hit_ready; + if (two_hits) { + diag_table->hit_len_array[real_diag] = (hit_ready) ? 0 : s_end_pos - s_off_pos; + } return hit_ready; } @@ -543,7 +556,8 @@ s_BlastnDiagHashExtendInitialHit(BLAST_SequenceBlk * query, BlastInitHitList * init_hitlist) { Int4 diag; - Int4 s_end, s_off_pos, s_end_pos; + Int4 s_end, s_off_pos, s_end_pos, s_l; + Int4 ext_right = 0; BlastUngappedData *ungapped_data; BlastUngappedData dummy_ungapped_data; Int4 window_size = word_params->options->window_size; @@ -551,6 +565,8 @@ s_BlastnDiagHashExtendInitialHit(BLAST_SequenceBlk * query, Int4 last_hit, hit_saved = 0; BlastUngappedCutoffs *cutoffs = NULL; Boolean two_hits = (window_size > 0); + Boolean found = FALSE; + Int4 Delta = MIN(5, window_size - word_length); Int4 rc; diag = s_off - q_off; @@ -558,40 +574,60 @@ s_BlastnDiagHashExtendInitialHit(BLAST_SequenceBlk * query, s_off_pos = s_off + hash_table->offset; s_end_pos = s_end + hash_table->offset; - rc = s_BlastDiagHashRetrieve(hash_table, diag, &last_hit, &hit_saved); + rc = s_BlastDiagHashRetrieve(hash_table, diag, &last_hit, &s_l, &hit_saved); /* if there is no record in hashtable, we set last_hit to be a very negative number */ - if(!rc) last_hit = 0; - if (contiguous) { - /* hit within the explored area should be rejected*/ - if (s_off_pos < last_hit) return 0; - - if (two_hits && (hit_saved || s_end_pos > last_hit + window_size )) { - /* this must be the 1st hit */ - /* check to see if it can be extended to the right by - word_length and therefore qualifies for a double-hit */ - Uint4 ext_right = s_BlastRightExtend(query, subject, + if(!rc) last_hit = 0; + + /* hit within the explored area should be rejected*/ + if (s_off_pos < last_hit) return 0; + + if (two_hits && (hit_saved || s_end_pos > last_hit + window_size )) { + /* this must be the 1st hit */ + /* check to see if it can be extended to the right by + word_length and therefore qualifies for a double-hit */ + if (contiguous) { + ext_right = s_BlastRightExtend(query, subject, q_off + word_length, s_end, query_info, word_length); /* update the right end*/ s_end += ext_right; s_end_pos += ext_right; - if (ext_right < word_length) { - /* if it is not a double hit, then it is a new hit */ + } + + if (ext_right < word_length) { + /* try off-diagonal */ + Int4 s_a = s_off_pos + word_length - window_size; + Int4 s_b = s_end_pos - 2 * word_length; + Int4 delta; + if (Delta < 0) Delta = 0; + for (delta = 1; delta < Delta; ++delta) { + Int4 off_s_end = 0; + Int4 off_s_l = 0; + Int4 off_hit_saved = 0; + Int4 off_rc = s_BlastDiagHashRetrieve(hash_table, diag + delta, + &off_s_end, &off_s_l, &off_hit_saved); + if ( off_rc + && off_s_l + && off_s_end - delta >= s_a + && off_s_end - off_s_l <= s_b) { + found = TRUE; + break; + } + off_rc = s_BlastDiagHashRetrieve(hash_table, diag - delta, + &off_s_end, &off_s_l, &off_hit_saved); + if ( off_rc + && off_s_l + && off_s_end >= s_a + && off_s_end - off_s_l + delta <= s_b) { + found = TRUE; + break; + } + } + if (!found) { + /* This is a new hit */ hit_ready = 0; - last_hit = s_end_pos; - hit_saved = 0; } } - } else { - /* hit within the explored area should be rejected*/ - if (s_off_pos < last_hit) return 0; - - if (two_hits && (hit_saved || s_end_pos > last_hit + window_size )) { - /* first hit */ - hit_ready = 0; - last_hit = s_end_pos; - hit_saved = 0; - } } if (hit_ready) { @@ -605,32 +641,25 @@ s_BlastnDiagHashExtendInitialHit(BLAST_SequenceBlk * query, ungapped_data, word_params->nucl_score_table, cutoffs->reduced_nucl_cutoff_score); - - last_hit = ungapped_data->length + ungapped_data->s_start - + hash_table->offset; + if (found || ungapped_data->score >= cutoffs->cutoff_score) { + BlastUngappedData *final_data = + (BlastUngappedData *) malloc(sizeof(BlastUngappedData)); + *final_data = *ungapped_data; + BLAST_SaveInitialHit(init_hitlist, q_off, s_off, final_data); + s_end_pos = ungapped_data->length + ungapped_data->s_start + + hash_table->offset; + } else { + hit_ready = 0; + } } else { ungapped_data = NULL; - last_hit = s_end_pos; - } - if (ungapped_data == NULL) { BLAST_SaveInitialHit(init_hitlist, q_off, s_off, ungapped_data); - /* Set the "saved" flag for this hit */ - hit_saved = 1; - } else if (ungapped_data->score >= cutoffs->cutoff_score) { - BlastUngappedData *final_data = - (BlastUngappedData *) malloc(sizeof(BlastUngappedData)); - *final_data = *ungapped_data; - BLAST_SaveInitialHit(init_hitlist, q_off, s_off, final_data); - /* Set the "saved" flag for this hit */ - hit_saved = 1; - } else { - /* Unset the "saved" flag for this hit */ - hit_saved = 0; } } - - s_BlastDiagHashInsert(hash_table, diag, last_hit, hit_saved, - s_end + hash_table->offset, window_size, word_length ,two_hits); + + s_BlastDiagHashInsert(hash_table, diag, s_end_pos, + (hit_ready) ? 0 : s_end_pos - s_off_pos, + hit_ready, s_off_pos, window_size + Delta); return hit_ready; } diff --git a/api/asn2gnb3.c b/api/asn2gnb3.c index 025c5b77..3fd89877 100644 --- a/api/asn2gnb3.c +++ b/api/asn2gnb3.c @@ -30,7 +30,7 @@ * * Version Creation Date: 10/21/98 * -* $Revision: 1.111 $ +* $Revision: 1.113 $ * * File Description: New GenBank flatfile generator - work in progress * @@ -308,12 +308,6 @@ static void AddWGSMasterCommentString ( if (StringHasNoText (taxname)) { taxname = "?"; } - if (StringHasNoText (first)) { - first = "?"; - } - if (StringHasNoText (last)) { - last = "?"; - } ver [0] = '\0'; acclen = StringLen (wgsname); if (acclen == 12) { @@ -330,15 +324,30 @@ static void AddWGSMasterCommentString ( sprintf (buf, "The %s whole genome shotgun (WGS) project has the project accession %s.", taxname, wgsaccn); FFAddOneString(ffstring, buf, TRUE, FALSE, TILDE_EXPAND); - sprintf (buf, " This version of the project (%s) has the accession number %s,", ver, wgsname); + sprintf (buf, " This version of the project (%s) has the accession number %s", ver, wgsname); FFAddOneString(ffstring, buf, FALSE, FALSE, TILDE_EXPAND); - if (StringCmp (first, last) != 0) { - sprintf (buf, " and consists of sequences %s-%s.", first, last); + if (first == NULL && last == NULL) { + sprintf (buf, "."); FFAddOneString(ffstring, buf, TRUE, FALSE, TILDE_EXPAND); } else { - sprintf (buf, " and consists of sequence %s.", first); - FFAddOneString(ffstring, buf, TRUE, FALSE, TILDE_EXPAND); + if (first != NULL && last == NULL) { + last = first; + } else if (first == NULL && last != NULL) { + first = last; + } + if (StringDoesHaveText (first) && StringDoesHaveText (last)) { + if (StringCmp (first, last) != 0) { + sprintf (buf, ", and consists of sequences %s-%s.", first, last); + FFAddOneString(ffstring, buf, TRUE, FALSE, TILDE_EXPAND); + } else { + sprintf (buf, ", and consists of sequence %s.", first); + FFAddOneString(ffstring, buf, TRUE, FALSE, TILDE_EXPAND); + } + } else { + sprintf (buf, "."); + FFAddOneString(ffstring, buf, TRUE, FALSE, TILDE_EXPAND); + } } } @@ -1295,21 +1304,27 @@ static CharPtr GetStrForTpaOrRefSeqHist ( { Boolean accn; - Char buf [64]; + Char buf [100]; DbtagPtr dbt; Int4 gi; ValNodePtr head = NULL; SeqHistPtr hist; SeqIdPtr id; + Int2 j; + int k; + Int2 max; Boolean minus1; Boolean minus2; + Int4 oldstop = -1; + Uint1 residue; SeqAlignPtr salp; SeqAlignPtr salptmp; + StreamCache sc; SeqIdPtr sip; Int4 start; Int4 stop; CharPtr str; - Char tmp [80]; + Char tmp [120]; if (bsp == NULL) return NULL; hist = bsp->hist; @@ -1344,6 +1359,65 @@ static CharPtr GetStrForTpaOrRefSeqHist ( ValNodeCopyStr (&head, 0, "TPA_SPAN PRIMARY_IDENTIFIER PRIMARY_SPAN COMP"); } } + if (isRefSeq && oldstop > -1 && oldstop < start) { + sprintf (tmp, "~%ld-%ld ", + (long) (oldstop + 1), (long) (start)); + tmp [21] = '\0'; + StringCpy (buf, " "); + k = 0; + if (StreamCacheSetup (bsp, NULL, 0, &sc)) { + if (start - oldstop < 15) { + StreamCacheSetPosition (&sc, oldstop); + buf [k] = '"'; + k++; + max = start - oldstop; + for (j = 0; j < max; j++) { + residue = StreamCacheGetResidue (&sc); + buf [k] = (Char) residue; + k++; + } + buf [k] = '"'; + k++; + } else { + StreamCacheSetPosition (&sc, oldstop); + buf [k] = '"'; + k++; + for (j = 0; j < 4; j++) { + residue = StreamCacheGetResidue (&sc); + buf [k] = (Char) residue; + k++; + } + buf [k] = '.'; + k++; + buf [k] = '.'; + k++; + buf [k] = '.'; + k++; + StreamCacheSetPosition (&sc, start - 4); + for (j = 0; j < 4; j++) { + residue = StreamCacheGetResidue (&sc); + buf [k] = (Char) residue; + k++; + } + buf [k] = '"'; + k++; + } + } else { + /* + StringCpy (buf, "inserted base(s)"); + */ + } + buf [k] = '\0'; + StringCat (buf, " "); + buf [18] = '\0'; + StringCat (tmp, buf); + sprintf (buf, " %ld-%ld ", + (long) 1, (long) (start - oldstop)); + buf [21] = '\0'; + StringCat (tmp, buf); + ValNodeCopyStr (&head, 0, tmp); + } + oldstop = stop + 1; if (id != NULL) { SeqIdWrite (id, buf, PRINTID_TEXTID_ACC_VER, sizeof (buf) - 1); if (id->choice == SEQID_GENERAL) { diff --git a/api/asn2gnb4.c b/api/asn2gnb4.c index ae78f805..5564f1e3 100644 --- a/api/asn2gnb4.c +++ b/api/asn2gnb4.c @@ -30,7 +30,7 @@ * * Version Creation Date: 10/21/98 * -* $Revision: 1.199 $ +* $Revision: 1.201 $ * * File Description: New GenBank flatfile generator - work in progress * @@ -183,6 +183,7 @@ static FtQualType feat_qual_order [] = { FTQUAL_label, FTQUAL_cds_product, FTQUAL_extra_products, + FTQUAL_UniProtKB_evidence, FTQUAL_protein_id, FTQUAL_transcript_id, FTQUAL_db_xref, @@ -376,6 +377,7 @@ static FeaturQual asn2gnbk_featur_quals [ASN2GNBK_TOTAL_FEATUR] = { { "trna_aa", Qual_class_ignore }, { "codon_recognized", Qual_class_trna_codons }, { "trna_codons", Qual_class_trna_codons }, + { "UniProtKB_evidence", Qual_class_quote }, { "usedin", Qual_class_usedin }, { "xtra_products", Qual_class_xtraprds } }; @@ -386,49 +388,50 @@ typedef struct qualfeatur { FtQualType featurclass; } QualFeatur, PNTR QualFeaturPtr; -#define NUM_GB_QUALS 40 +#define NUM_GB_QUALS 41 static QualFeatur qualToFeature [NUM_GB_QUALS] = { - { "allele", FTQUAL_allele }, - { "bound_moiety", FTQUAL_bound_moiety }, - { "clone", FTQUAL_clone }, - { "codon", FTQUAL_codon }, - { "compare", FTQUAL_compare }, - { "cons_splice", FTQUAL_cons_splice }, - { "cyt_map", FTQUAL_gene_cyt_map }, - { "direction", FTQUAL_direction }, - { "EC_number", FTQUAL_EC_number }, - { "estimated_length", FTQUAL_estimated_length }, - { "experiment", FTQUAL_experiment }, - { "frequency", FTQUAL_frequency }, - { "function", FTQUAL_function }, - { "gen_map", FTQUAL_gene_gen_map }, - { "inference", FTQUAL_inference }, - { "insertion_seq", FTQUAL_insertion_seq }, - { "label", FTQUAL_label }, - { "map", FTQUAL_map }, - { "mobile_element", FTQUAL_mobile_element }, - { "mod_base", FTQUAL_mod_base }, - { "ncRNA_class", FTQUAL_ncRNA_class }, - { "number", FTQUAL_number }, - { "old_locus_tag", FTQUAL_old_locus_tag }, - { "operon", FTQUAL_operon }, - { "organism", FTQUAL_organism }, - { "PCR_conditions", FTQUAL_PCR_conditions }, - { "phenotype", FTQUAL_phenotype }, - { "product", FTQUAL_product_quals }, - { "rad_map", FTQUAL_gene_rad_map }, - { "replace", FTQUAL_replace }, - { "rpt_family", FTQUAL_rpt_family }, - { "rpt_type", FTQUAL_rpt_type }, - { "rpt_unit", FTQUAL_rpt_unit }, - { "rpt_unit_range", FTQUAL_rpt_unit_range }, - { "rpt_unit_seq", FTQUAL_rpt_unit_seq }, - { "satellite", FTQUAL_satellite }, - { "standard_name", FTQUAL_standard_name }, - { "tag_peptide", FTQUAL_tag_peptide }, - { "transposon", FTQUAL_transposon }, - { "usedin", FTQUAL_usedin } + { "allele", FTQUAL_allele }, + { "bound_moiety", FTQUAL_bound_moiety }, + { "clone", FTQUAL_clone }, + { "codon", FTQUAL_codon }, + { "compare", FTQUAL_compare }, + { "cons_splice", FTQUAL_cons_splice }, + { "cyt_map", FTQUAL_gene_cyt_map }, + { "direction", FTQUAL_direction }, + { "EC_number", FTQUAL_EC_number }, + { "estimated_length", FTQUAL_estimated_length }, + { "experiment", FTQUAL_experiment }, + { "frequency", FTQUAL_frequency }, + { "function", FTQUAL_function }, + { "gen_map", FTQUAL_gene_gen_map }, + { "inference", FTQUAL_inference }, + { "insertion_seq", FTQUAL_insertion_seq }, + { "label", FTQUAL_label }, + { "map", FTQUAL_map }, + { "mobile_element", FTQUAL_mobile_element }, + { "mod_base", FTQUAL_mod_base }, + { "ncRNA_class", FTQUAL_ncRNA_class }, + { "number", FTQUAL_number }, + { "old_locus_tag", FTQUAL_old_locus_tag }, + { "operon", FTQUAL_operon }, + { "organism", FTQUAL_organism }, + { "PCR_conditions", FTQUAL_PCR_conditions }, + { "phenotype", FTQUAL_phenotype }, + { "product", FTQUAL_product_quals }, + { "rad_map", FTQUAL_gene_rad_map }, + { "replace", FTQUAL_replace }, + { "rpt_family", FTQUAL_rpt_family }, + { "rpt_type", FTQUAL_rpt_type }, + { "rpt_unit", FTQUAL_rpt_unit }, + { "rpt_unit_range", FTQUAL_rpt_unit_range }, + { "rpt_unit_seq", FTQUAL_rpt_unit_seq }, + { "satellite", FTQUAL_satellite }, + { "standard_name", FTQUAL_standard_name }, + { "tag_peptide", FTQUAL_tag_peptide }, + { "transposon", FTQUAL_transposon }, + { "UniProtKB_evidence", FTQUAL_UniProtKB_evidence }, + { "usedin", FTQUAL_usedin } }; static Int2 GbqualToFeaturIndex ( @@ -927,6 +930,7 @@ static ValQual legalGbqualList [] = { { FEATDEF_CDS , FTQUAL_standard_name }, { FEATDEF_PROT , FTQUAL_product }, + { FEATDEF_PROT , FTQUAL_UniProtKB_evidence }, { FEATDEF_preRNA , FTQUAL_allele }, { FEATDEF_preRNA , FTQUAL_function }, @@ -3481,6 +3485,7 @@ static void FormatFeatureBlockQuals ( tmp = StringSave (gbq->val); str = tmp; len = StringLen (str); +#if 0 if (len > 1 && *str == '(' && str [len - 1] == ')' /* && StringChr (str + 1, '(') == NULL /* && StringChr (str, ',') != NULL */) { str++; @@ -3511,6 +3516,7 @@ static void FormatFeatureBlockQuals ( str = ptr; } } else { +#endif if ((! ajp->flags.checkQualSyntax) || (ValidateRptUnit (str))) { TrimSpacesAroundString (str); if (idx == FTQUAL_rpt_unit_range) { @@ -3526,7 +3532,9 @@ static void FormatFeatureBlockQuals ( FFAddOneChar(ffstring, '\n', FALSE); } } +#if 0 } +#endif MemFree (tmp); } gbq = gbq->next; diff --git a/api/asn2gnb5.c b/api/asn2gnb5.c index f1be0ec4..99b3b6f5 100644 --- a/api/asn2gnb5.c +++ b/api/asn2gnb5.c @@ -30,7 +30,7 @@ * * Version Creation Date: 10/21/98 * -* $Revision: 1.154 $ +* $Revision: 1.155 $ * * File Description: New GenBank flatfile generator - work in progress * @@ -154,9 +154,11 @@ static UrlData Nlm_url_base [] = { {"GeneID", "http://www.ncbi.nlm.nih.gov/sites/entrez?db=gene&cmd=Retrieve&dopt=full_report&list_uids="}, {"GO", "http://amigo.geneontology.org/cgi-bin/amigo/go.cgi?view=details&depth=1&query=GO:"}, {"GOA", "http://www.ebi.ac.uk/ego/GProtein?ac="}, + {"GreengenesID", "http://greengenes.lbl.gov/cgi-bin/show_one_record_v2.pl?prokMSA_id="}, {"GRIN", "http://www.ars-grin.gov/cgi-bin/npgs/acc/display.pl?"}, {"H-InvDB", "http://www.h-invitational.jp"}, {"HGNC", "http://www.genenames.org/data/hgnc_data.php?hgnc_id="}, + {"HMPID", "http://www.hmpdacc-resources.org/cgi-bin/hmp_catalog/main.cgi?section=HmpSummary&page=displayHmpProject&hmp_id="}, {"HOMD", "http://www.homd.org/"}, {"HPRD", "http://www.hprd.org/protein/"}, {"HSSP", "http://srs.ebi.ac.uk/srsbin/cgi-bin/wgetz?-newId+-e+hssp-ID:"}, diff --git a/api/asn2gnb6.c b/api/asn2gnb6.c index d9cda849..3abbb4f7 100644 --- a/api/asn2gnb6.c +++ b/api/asn2gnb6.c @@ -30,7 +30,7 @@ * * Version Creation Date: 10/21/98 * -* $Revision: 1.196 $ +* $Revision: 1.198 $ * * File Description: New GenBank flatfile generator - work in progress * @@ -472,9 +472,11 @@ NLM_EXTERN CharPtr legalDbXrefs [] = { "GeneID", "GO", "GOA", + "GreengenesID", "GRIN", "H-InvDB", "HGNC", + "HMPID", "HOMD", "HSSP", "IMGT/GENE-DB", @@ -547,6 +549,7 @@ NLM_EXTERN CharPtr legalSrcDbXrefs [] = { "IMGT/LIGM", "JCM", "MGI", + "MycoBank", "NBRC", "RZPD", "taxon", diff --git a/api/asn2gnbi.h b/api/asn2gnbi.h index 4bf0a98b..fb2848c3 100644 --- a/api/asn2gnbi.h +++ b/api/asn2gnbi.h @@ -29,7 +29,7 @@ * * Version Creation Date: 12/30/03 * -* $Revision: 1.111 $ +* $Revision: 1.112 $ * * File Description: New GenBank flatfile generator, internal header * @@ -718,6 +718,7 @@ typedef enum { FTQUAL_trna_aa, FTQUAL_trna_codons, FTQUAL_trna_codons_note, + FTQUAL_UniProtKB_evidence, FTQUAL_usedin, FTQUAL_xtra_prod_quals, ASN2GNBK_TOTAL_FEATUR diff --git a/api/gbftdef.h b/api/gbftdef.h index 04a2ab0b..069a018e 100644 --- a/api/gbftdef.h +++ b/api/gbftdef.h @@ -116,8 +116,9 @@ #define GBQUAL_mating_type 106 #define GBQUAL_satellite 107 #define GBQUAL_gene_synonym 108 +#define GBQUAL_UniProtKB_evidence 109 -#define ParFlat_TOTAL_GBQUAL 109 +#define ParFlat_TOTAL_GBQUAL 110 #define ParFlat_TOTAL_IntOr 3 #define ParFlat_TOTAL_LRB 3 #define ParFlat_TOTAL_Exp 2 diff --git a/api/gbftglob.c b/api/gbftglob.c index a8b0e265..0bede378 100644 --- a/api/gbftglob.c +++ b/api/gbftglob.c @@ -63,7 +63,8 @@ static GbFeatName STATIC__ParFlat_GBQual_names[ParFlat_TOTAL_GBQUAL] = { {"metagenomic", Class_none}, { "culture_collection", Class_text}, {"bio_material", Class_text}, { "ncRNA_class", Class_text}, {"tag_peptide", Class_text}, { "mating_type", Class_text}, - {"satellite", Class_text}, { "gene_synonym", Class_text} + {"satellite", Class_text}, { "gene_synonym", Class_text}, + { "UniProtKB_evidence", Class_text} }; NLM_EXTERN GbFeatNamePtr x_ParFlat_GBQual_names(void) { diff --git a/api/macroapi.c b/api/macroapi.c index 6341e97b..a1b92bf9 100755 --- a/api/macroapi.c +++ b/api/macroapi.c @@ -29,7 +29,7 @@ * * Version Creation Date: 11/8/2007 * -* $Revision: 1.201 $ +* $Revision: 1.202 $ * * File Description: * @@ -6416,35 +6416,6 @@ static CharPtr GetAnticodonLocString (SeqFeatPtr sfp) -static SeqFeatPtr GetProtFeature (BioseqPtr protbsp) -{ - SeqMgrFeatContext fcontext; - SeqAnnotPtr sap; - SeqFeatPtr prot_sfp; - ProtRefPtr prp; - - if (protbsp == NULL) return NULL; - - prot_sfp = SeqMgrGetNextFeature (protbsp, NULL, 0, FEATDEF_PROT, &fcontext); - if (prot_sfp == NULL) { - sap = protbsp->annot; - while (sap != NULL && prot_sfp == NULL) { - if (sap->type == 1) { - prot_sfp = sap->data; - while (prot_sfp != NULL - && (prot_sfp->data.choice != SEQFEAT_PROT - || (prp = prot_sfp->data.value.ptrvalue) == NULL - || prp->processed != 0)) { - prot_sfp = prot_sfp->next; - } - } - sap = sap->next; - } - } - return prot_sfp; -} - - static ProtRefPtr GetProtRefForFeature (SeqFeatPtr sfp) { BioseqPtr protbsp; @@ -8344,9 +8315,8 @@ NLM_EXTERN Uint2 GetEntityIdFromObject (Uint1 choice, Pointer data) ObjValNodePtr ovp; SeqFeatPtr sfp; BioseqPtr bsp; - SeqMgrDescContext context; - if (data == NULL) return NULL; + if (data == NULL) return 0; switch (choice) { @@ -12144,7 +12114,6 @@ NLM_EXTERN Boolean SetFieldValueForObjectEx (Uint1 choice, Pointer data, FieldTy ObjValNodePtr ovp; GBBlockPtr gb; Boolean was_empty; - ValNodePtr molinfo_field; if (data == NULL || field == NULL || field->data.ptrvalue == NULL) return FALSE; @@ -17982,120 +17951,6 @@ static void CreateDataForFeature (SeqFeatPtr sfp, Int4 feature_type) } -static void ExtraCDSCreationActions (SeqFeatPtr cds, SeqEntryPtr parent_sep) -{ - ByteStorePtr bs; - CharPtr prot, ptr; - BioseqPtr bsp; - Char ch; - Int4 i; - SeqEntryPtr psep, nsep; - MolInfoPtr mip; - ValNodePtr vnp, descr; - SeqFeatPtr prot_sfp; - ProtRefPtr prp; - Boolean partial5, partial3; - - if (cds == NULL) return; - - CheckSeqLocForPartial (cds->location, &partial5, &partial3); - - /* Create corresponding protein sequence data for the CDS */ - - bs = ProteinFromCdRegionEx (cds, TRUE, FALSE); - if (NULL == bs) - return; - - prot = BSMerge (bs, NULL); - bs = BSFree (bs); - if (NULL == prot) - return; - - ptr = prot; - ch = *ptr; - while (ch != '\0') { - *ptr = TO_UPPER (ch); - ptr++; - ch = *ptr; - } - i = StringLen (prot); - if (i > 0 && prot [i - 1] == '*') { - prot [i - 1] = '\0'; - } - bs = BSNew (1000); - if (bs != NULL) { - ptr = prot; - BSWrite (bs, (VoidPtr) ptr, (Int4) StringLen (ptr)); - } - - /* Create the product protein Bioseq */ - - bsp = BioseqNew (); - if (NULL == bsp) - return; - - bsp->repr = Seq_repr_raw; - bsp->mol = Seq_mol_aa; - bsp->seq_data_type = Seq_code_ncbieaa; - bsp->seq_data = (SeqDataPtr) bs; - bsp->length = BSLen (bs); - bs = NULL; - bsp->id = MakeNewProteinSeqId (cds->location, NULL); - SeqMgrAddToBioseqIndex (bsp); - - /* Create a new SeqEntry for the Prot Bioseq */ - - psep = SeqEntryNew (); - if (NULL == psep) - return; - - psep->choice = 1; - psep->data.ptrvalue = (Pointer) bsp; - SeqMgrSeqEntry (SM_BIOSEQ, (Pointer) bsp, psep); - - /* Add a descriptor to the protein Bioseq */ - - mip = MolInfoNew (); - if (NULL == mip) - return; - - mip->biomol = 8; - mip->tech = 8; - if (partial5 && partial3) { - mip->completeness = 5; - } else if (partial5) { - mip->completeness = 3; - } else if (partial3) { - mip->completeness = 4; - } - vnp = CreateNewDescriptor (psep, Seq_descr_molinfo); - if (NULL == vnp) - return; - - vnp->data.ptrvalue = (Pointer) mip; - - /**/ - - descr = ExtractBioSourceAndPubs (parent_sep); - - AddSeqEntryToSeqEntry (parent_sep, psep, TRUE); - nsep = FindNucSeqEntry (parent_sep); - ReplaceBioSourceAndPubs (parent_sep, descr); - SetSeqFeatProduct (cds, bsp); - - prp = ProtRefNew (); - - if (prp != NULL) { - prot_sfp = CreateNewFeature (psep, NULL, SEQFEAT_PROT, NULL); - if (prot_sfp != NULL) { - prot_sfp->data.value.ptrvalue = (Pointer) prp; - SetSeqLocPartial (prot_sfp->location, partial5, partial3); - prot_sfp->partial = (partial5 || partial3); - } - } -} - - static SeqLocPtr LocationFromApplyFeatureAction (BioseqPtr bsp, ApplyFeatureActionPtr action) { LocationIntervalPtr l; @@ -18989,6 +18844,12 @@ static Boolean ConvertRNAToRNA (SeqFeatPtr sfp, Int4 featdef_to, ConvertFeatureD } +static Boolean MiscFeatToCodingRegionConvertFunc (SeqFeatPtr sfp, Int4 featdef_to, ConvertFeatureDstOptionsPtr dst_options) +{ + return ConvertMiscFeatToCodingRegion (sfp); +} + + typedef struct convertfeattable { Uint2 seqfeat_from; Uint2 featdef_from; @@ -19035,6 +18896,9 @@ static ConvertFeatTableData conversion_functions[] = { { SEQFEAT_IMP, FEATDEF_ANY, SEQFEAT_RNA, FEATDEF_ANY, ConvertImpToRNAFunc, "Creates an RNA feature of the specified subtype. Import feature key is discarded." }, + { SEQFEAT_IMP, FEATDEF_misc_feature, SEQFEAT_CDREGION, FEATDEF_CDS, + MiscFeatToCodingRegionConvertFunc, + "Use misc_feature comment for coding region product name." }, { SEQFEAT_REGION, FEATDEF_REGION, SEQFEAT_IMP, FEATDEF_ANY, ConvertRegionToImp, "Creates a misc_feature with the region name saved as a /note qualifier." }, diff --git a/api/seqport.c b/api/seqport.c index 06857b6c..06f10c06 100644 --- a/api/seqport.c +++ b/api/seqport.c @@ -29,7 +29,7 @@ * * Version Creation Date: 7/13/91 * -* $Revision: 6.174 $ +* $Revision: 6.177 $ * * File Description: Ports onto Bioseqs * @@ -4104,6 +4104,7 @@ NLM_EXTERN SeqLocPtr LIBCALL productLoc_to_locationLoc(SeqFeatPtr sfp, SeqLocPtr SeqBondPtr sbp; ValNode vn; Boolean is_cdregion = FALSE; + Boolean partial5, partial3; if ((sfp == NULL) || (productLoc == NULL)) return head; if (sfp->data.choice == 3) is_cdregion = TRUE; @@ -4114,14 +4115,12 @@ NLM_EXTERN SeqLocPtr LIBCALL productLoc_to_locationLoc(SeqFeatPtr sfp, SeqLocPtr if (productLoc->choice == SEQLOC_BOND) /* fake this one in */ { sbp = (SeqBondPtr)(productLoc->data.ptrvalue); - tmp = productInterval_to_locationIntervals(sfp, sbp->a->point, -sbp->a->point); + tmp = productInterval_to_locationIntervals(sfp, sbp->a->point, sbp->a->point, FALSE); if (sbp->b == NULL) /* one point in bond */ return tmp; SeqLocAdd(&head, tmp, TRUE, FALSE); - tmp = productInterval_to_locationIntervals(sfp, sbp->b->point, -sbp->b->point); + tmp = productInterval_to_locationIntervals(sfp, sbp->b->point, sbp->b->point, FALSE); if (tmp == NULL) return head; @@ -4135,6 +4134,7 @@ sbp->b->point); goto ret; } + CheckSeqLocForPartial (productLoc, &partial5, &partial3); slp = NULL; while ((slp = SeqLocFindNext(productLoc, slp)) != NULL) { @@ -4142,7 +4142,7 @@ sbp->b->point); product_stop = SeqLocStop(slp); if ((product_start >= 0) && (product_stop >= 0)) { - tmp = productInterval_to_locationIntervals(sfp, product_start, product_stop); + tmp = productInterval_to_locationIntervals(sfp, product_start, product_stop, partial5); if(tmp != NULL) load_fuzz_to_DNA(tmp, slp, TRUE); while (tmp != NULL) @@ -4189,6 +4189,7 @@ NLM_EXTERN SeqLocPtr LIBCALL aaFeatLoc_to_dnaFeatLoc(SeqFeatPtr sfp, CdRegionPtr crp; SeqIntPtr sp1, sp2; BioseqPtr bsp; + Boolean aa_partialn, aa_partialc; dnaLoc = aaLoc_to_dnaLoc(sfp, aa_loc); if (dnaLoc == NULL) return dnaLoc; @@ -4196,10 +4197,12 @@ NLM_EXTERN SeqLocPtr LIBCALL aaFeatLoc_to_dnaFeatLoc(SeqFeatPtr sfp, if (! sfp->partial) /* no partial checks needed */ return dnaLoc; + + CheckSeqLocForPartial (aa_loc, &aa_partialn, &aa_partialc); crp = (CdRegionPtr)(sfp->data.value.ptrvalue); aaPos = SeqLocStart(aa_loc); - if ((! aaPos) && (crp->frame > 1)) /* using first amino acid */ + if ((! aaPos) && (crp->frame > 1) && aa_partialn) /* using first amino acid */ { tmp1 = SeqLocFindNext(sfp->location, NULL); tmp2 = SeqLocFindNext(dnaLoc, NULL); @@ -4221,7 +4224,7 @@ NLM_EXTERN SeqLocPtr LIBCALL aaFeatLoc_to_dnaFeatLoc(SeqFeatPtr sfp, } dnaPartial = SeqLocPartialCheck(sfp->location); - if (dnaPartial & SLP_STOP) /* missing 3' end of cdregion */ + if ((dnaPartial & SLP_STOP) && aa_partialc) /* missing 3' end of cdregion */ { sip = SeqLocId(aa_loc); bsp = BioseqFindCore(sip); @@ -4262,19 +4265,162 @@ NLM_EXTERN SeqLocPtr LIBCALL aaFeatLoc_to_dnaFeatLoc(SeqFeatPtr sfp, return dnaLoc; } -/****************************************************************** -* -* productInterval_to_locationIntervals(sfp, product_start, product_stop) -* map the amino acid sequence to a chain of Seq-locs in the -* DNA sequence through a CdRegion feature -* -******************************************************************/ -NLM_EXTERN SeqLocPtr LIBCALL productInterval_to_locationIntervals(SeqFeatPtr sfp, Int4 product_start, Int4 -product_stop) + +static SeqLocPtr +NucLocFromProtInterval +(SeqFeatPtr cds, + Int4 prot_start, + Int4 prot_stop, + Boolean n_partial) { - Int4 frame_offset, start_offset; /*for determine the reading frame*/ - SeqLocPtr slp = NULL; CdRegionPtr crp; + Int4 nt_before = 0, aa_before = 0, nt_this, prev_nt = 0, part_codon; + SeqLocPtr result = NULL; + SeqLocPtr slp = NULL; /* used for iterating through locations in the coding region */ + SeqLocPtr loc; /* used for creating interval on NT sequence */ + Boolean first_loc = TRUE; + Int4 cds_int_start, cds_int_stop, cds_int_len; + Int4 frame_start = 0; + Int4 aa_int_start, aa_int_stop, aa_len, this_aa, aa_needed, aa_unneeded, aa_accumulated = 0; + Int4 aa_from_this_interval; + Uint1 strand; + + if (cds == NULL || cds->data.choice != SEQFEAT_CDREGION || prot_start < 0 || prot_stop < prot_start) { + return NULL; + } + + crp = (CdRegionPtr) cds->data.value.ptrvalue; + if (crp == NULL) { + return NULL; + } + if (crp->frame > 1) { + frame_start = crp->frame - 1; + } + + aa_len = prot_stop - prot_start + 1; + + while((slp = SeqLocFindNext(cds->location, slp)) != NULL) { + cds_int_len = SeqLocLen (slp); + cds_int_start = SeqLocStart (slp); + cds_int_stop = SeqLocStop (slp); + strand = SeqLocStrand (slp); + + if (first_loc) { + if (strand == Seq_strand_minus) { + cds_int_stop -= frame_start; + } else { + cds_int_start += frame_start; + } + cds_int_len -= frame_start; + } + + /* calculate the number of NT that "count" for this interval - + * don't include the NT in a partial codon at the beginning of + * of the feature, but do include NT from a partial codon at + * the end of the previous interval. + */ + nt_this = cds_int_len + prev_nt; + part_codon = nt_this % 3; + nt_this -= part_codon; + + /* calculate how many AA are covered by this interval */ + this_aa = nt_this / 3; + + if (aa_before + this_aa >= prot_start) { + + /* figure out whether to take all of this interval, or just part of it */ + aa_from_this_interval = this_aa; + + /* 5' end (left for plus strand, right for minus) */ + if (aa_before < prot_start) { + /* skip some at the beginning */ + aa_unneeded = prot_start - aa_before; + aa_from_this_interval -= aa_unneeded; + + if (strand == Seq_strand_minus) { + aa_int_stop = cds_int_stop + prev_nt - (3 * aa_unneeded); + } else { + aa_int_start = cds_int_start - prev_nt + (3 * aa_unneeded); + } + } else { + /* start at the beginning */ + if (strand == Seq_strand_minus) { + aa_int_stop = cds_int_stop; + if (first_loc) { + if (n_partial) { + /* put frame shift back in, if first loc and n-partial */ + aa_int_stop += frame_start; + } else if (aa_before == prot_start) { + /* starts in this interval, but after "remainder" of previous codon */ + aa_int_stop -= prev_nt; + } + } + } else { + aa_int_start = cds_int_start; + if (first_loc) { + if (n_partial) { + /* put frame shift back in, if first loc and n-partial */ + aa_int_start -= frame_start; + } else if (aa_before == prot_start) { + /* starts in this interval, but after "remainder" of previous codon */ + aa_int_start += prev_nt; + } + } + } + } + + /* 3' end (right for plus strand, left for minus) */ + if (aa_accumulated + aa_from_this_interval < aa_len) { + if (strand == Seq_strand_minus) { + aa_int_start = cds_int_start; + } else { + aa_int_stop = cds_int_stop; + } + } else { + /* just take the part that we need */ + aa_needed = aa_len - aa_accumulated; + aa_unneeded = aa_from_this_interval - aa_needed; + + if (strand == Seq_strand_minus) { + aa_int_start = cds_int_start + part_codon + (3 * aa_unneeded); + } else { + aa_int_stop = cds_int_stop - part_codon - (3 * aa_unneeded); + } + aa_from_this_interval -= aa_unneeded; + } + + /* note - if aa_int_start > aa_int_stop, that means we eliminated + * both ends of the interval. + */ + if (aa_int_start <= aa_int_stop) { + /* aa_accumulated now includes the number of complete codons that have + * been accounted for (not counting a partial codon at the end of this + * interval, if any + */ + aa_accumulated += aa_from_this_interval; + + /* add interval to result */ + loc = SeqLocIntNew(aa_int_start, aa_int_stop, strand, SeqLocId(slp)); + SeqLocAdd(&result, loc, TRUE, FALSE); + } + } + + first_loc = FALSE; + aa_before += this_aa; + prev_nt = part_codon; + + if (aa_before > prot_stop) { + break; + } + } + + return result; +} + + +static SeqLocPtr NaLocFromNaInterval (SeqFeatPtr sfp, Int4 product_start, Int4 product_stop) +{ + SeqLocPtr slp = NULL; SeqLocPtr location_loc, loc; /*for the sfp.location location*/ Boolean is_end; /**is the end for process reached?**/ @@ -4283,70 +4429,26 @@ product_stop) Int4 cur_pos; /**current sfp.product sequence position in process**/ Int4 product_len; /**length of the sfp.product **/ - Boolean is_new; /**Is cur_pos at the begin of new exon?**/ - Int4 end_partial; /*the end of aa is a partial codon*/ Int4 d_start, d_stop; /*the start and the stop of the sfp.location sequence*/ Int4 offset; /*offset from the start of the current exon*/ Int4 aa_len; Uint1 strand; Int4 p_end_pos; /*the end of the product sequence in the current loc*/ - Int4 first_partial; /*first codon is a partial*/ - Boolean is_cdregion = FALSE; - - - - if(sfp->data.choice ==3) /* cdregion must take into account 3 base/aa */ - { - is_cdregion = TRUE; - - crp = (CdRegionPtr) sfp->data.value.ptrvalue; - if(!crp) - { - return NULL; - } - - if(crp->frame>0) - { - frame_offset = crp->frame-1; - } - else - { - frame_offset = 0; - } - start_offset = frame_offset; - } - else - { - start_offset = 0; - frame_offset = 0; - } cur_pos= product_start; product_len = 0; is_end = FALSE; p_start = 0; - first_partial = 0; - end_partial = 0; slp = NULL; location_loc= NULL; while(!is_end && ((slp = SeqLocFindNext(sfp->location, slp))!=NULL)) { product_len += SeqLocLen(slp); - if (is_cdregion) - { - end_partial = ((product_len - start_offset)%3); - p_stop = (product_len - start_offset)/3 -1; - if(end_partial != 0) - ++p_stop; - } - else - { - p_stop = product_len - start_offset - 1; - } + p_stop = product_len - 1; p_end_pos = p_stop; - if(p_stop > product_stop || (p_stop == product_stop && end_partial == 0)) + if(p_stop >= product_stop) { p_stop = product_stop; /**check if the end is reached**/ is_end = TRUE; @@ -4354,21 +4456,7 @@ product_stop) if(p_stop >= cur_pos) /*get the exon*/ { - is_new = (p_start == cur_pos); /*start a new exon?*/ - if(is_new) /**special case of the first partial**/ - { - offset = 0; - } - else if (is_cdregion) - { - if(frame_offset && p_start >0) - ++p_start; - offset = 3*(cur_pos - p_start) + frame_offset; - } - else - { - offset = cur_pos - p_start; - } + offset = cur_pos - p_start; strand = SeqLocStrand(slp); if(strand == Seq_strand_minus) @@ -4377,47 +4465,20 @@ product_stop) d_start = SeqLocStart(slp) + offset; d_stop = d_start; - /*first codon*/ - if(is_cdregion && is_new && product_len == SeqLocLen(slp)) - { - if(strand == Seq_strand_minus) - d_stop -= frame_offset; - else - d_stop += frame_offset; - } - aa_len = MIN(p_stop, product_stop) - cur_pos +1; - if(end_partial != 0 && (p_end_pos >= product_start && p_end_pos <= product_stop)) - { - --aa_len; - } - if(first_partial > 0) - { - --aa_len; - } - if(strand == Seq_strand_minus) + + aa_len = MIN(p_stop, product_stop) - cur_pos +1; + + if(strand == Seq_strand_minus) { if(aa_len >= 0) { - if (is_cdregion) - d_stop -= (3*aa_len - 1); - else - d_stop -= (aa_len - 1); + d_stop -= (aa_len - 1); } else { ++d_stop; } - - if(first_partial >0) - d_stop -= first_partial; - - first_partial = 0; - if (end_partial > 0 && (p_end_pos >= product_start && p_end_pos <= product_stop)) - { - d_stop -= end_partial; - first_partial = 3 - end_partial; - } - + d_stop = MAX(d_stop, SeqLocStart(slp)); loc = SeqLocIntNew(d_stop, d_start, strand, SeqLocId(slp)); } @@ -4425,53 +4486,46 @@ product_stop) { if(aa_len >= 0) { - if (is_cdregion) - d_stop += (3*aa_len - 1); - else - d_stop += (aa_len - 1); + d_stop += (aa_len - 1); } else --d_stop; - if(first_partial > 0) - d_stop += first_partial; - first_partial = 0; - if (end_partial> 0 && (p_end_pos >= product_start && p_end_pos <= product_stop)) - { - d_stop += end_partial; - first_partial = 3 - end_partial; - } d_stop = MIN(d_stop, SeqLocStop(slp)); loc = SeqLocIntNew(d_start, d_stop, strand, SeqLocId(slp)); } SeqLocAdd(&location_loc, loc, TRUE, FALSE); - if(end_partial != 0) - cur_pos = p_stop; - else - cur_pos = p_stop+1; - } + cur_pos = p_stop+1; + } + p_start = p_stop +1; - if(end_partial != 0) - { - p_start = p_stop; - } - else - { - p_start = p_stop +1; - } - - if (is_cdregion) - { - frame_offset = (product_len - start_offset)%3; - if(frame_offset >0) - frame_offset = 3-frame_offset; - } + }/**end of while(slp && !is_end) **/ - }/**end of while(slp && !is_end) **/ + return location_loc; +} + +/****************************************************************** +* +* productInterval_to_locationIntervals(sfp, product_start, product_stop) +* map the amino acid sequence to a chain of Seq-locs in the +* DNA sequence through a CdRegion feature +* +******************************************************************/ +NLM_EXTERN SeqLocPtr LIBCALL +productInterval_to_locationIntervals +(SeqFeatPtr sfp, + Int4 product_start, + Int4 product_stop, + Boolean aa_partialn) +{ - return location_loc; + if (sfp->data.choice == SEQFEAT_CDREGION) { + return NucLocFromProtInterval (sfp, product_start, product_stop, aa_partialn); + } else { + return NaLocFromNaInterval (sfp, product_start, product_stop); + } } @@ -4557,7 +4611,7 @@ merge, Int4Ptr frame, Boolean allowTerminator) a_left += 3; } } - if (a_right > (bsp->length) * 3 - 1) { + if (a_right > (bsp->length) * 3 - 1 && !allowTerminator) { CheckSeqLocForPartial (slp, &partial5, &partial3); strand = SeqLocStrand (slp); if ((partial5 && strand != Seq_strand_minus) || (partial3 && strand == Seq_strand_minus)) { @@ -4570,7 +4624,7 @@ merge, Int4Ptr frame, Boolean allowTerminator) aa_from = a_left / 3; aa_to = a_right / 3; - if (aa_to > end_pos) + if (aa_to > end_pos && !allowTerminator) aa_to = end_pos; if (merge) diff --git a/api/seqport.h b/api/seqport.h index 9928a18e..22cddc39 100644 --- a/api/seqport.h +++ b/api/seqport.h @@ -29,7 +29,7 @@ * * Version Creation Date: 7/13/91 * -* $Revision: 6.59 $ +* $Revision: 6.60 $ * * File Description: Ports onto Bioseqs * @@ -38,242 +38,6 @@ * Date Name Description of modification * ------- ---------- ----------------------------------------------------- * -* -* $Log: seqport.h,v $ -* Revision 6.59 2008/02/12 18:56:52 bollin -* Made ReverseSeqData and ComplementSeqData extern -* -* Revision 6.58 2007/05/30 18:10:06 kans -* added KNOWN_GAP_AS_PLUS to distinguish known-length from unknown-length gaps, use for validation -* -* Revision 6.57 2006/12/20 20:08:24 kans -* added SUPPRESS_VIRT_SEQ and STREAM_VIRT_AS_PLUS, moved STREAM_CORRECT_INVAL -* -* Revision 6.56 2006/12/18 15:42:58 kans -* made MakeCodeBreakList public so validator can check for unnecessary transl excepts -* -* Revision 6.55 2006/11/15 18:02:59 kans -* ProteinFromCdRegionExEx and TransTableTranslateCdRegionEx take farProdFetchOK argument -* -* Revision 6.54 2006/11/06 17:16:38 kans -* added stream flag to allow negative gi numbers by NCBI ID group -* -* Revision 6.53 2006/07/13 17:06:39 bollin -* use Uint4 instead of Uint2 for itemID values -* removed unused variables -* resolved compiler warnings -* -* Revision 6.52 2006/05/19 18:40:07 kans -* added protein equivalent of nucleotide SeqSearch finite state machine -* -* Revision 6.51 2005/08/24 15:14:31 kans -* modified MolWtForLoc to use StreamCache, added MolWtForBsp and MolWtForStr -* -* Revision 6.50 2005/06/01 20:27:06 kans -* added MapNa4ByteToIUPACplusGapString -* -* Revision 6.49 2005/03/15 14:35:44 kans -* seqport stream gap control flags (2-bit set) are STREAM_EXPAND_GAPS, GAP_TO_SINGLE_DASH, and EXPAND_GAPS_TO_DASHES -* -* Revision 6.48 2005/03/14 22:48:11 kans -* inserted STREAM_INDICATE_GAPS before STREAM_CORRECT_INVAL, will mark gap with 251 instead of N or X -* -* Revision 6.47 2004/11/29 17:12:42 kans -* added SearchFlgType for expandPattern, allowOneMismatch, justTopStrand arguments -* -* Revision 6.46 2004/11/26 18:53:09 kans -* SeqSearchAddNucleotidePattern takes expandPattern, allowOneMismatch arguments -* -* Revision 6.45 2004/10/27 22:15:34 kans -* added STREAM_CORRECT_INVAL flag to SeqPortStream -* -* Revision 6.44 2004/07/16 19:37:37 kans -* SeqPortStream and FastaStream functions return Int4, negative count if any fetch failures -* -* Revision 6.43 2004/05/12 18:55:33 kans -* StreamCache takes SeqLocPtr as well as BioseqPtr optional arguments, slp version is equivalent of SeqPortNewByLoc -* -* Revision 6.42 2004/04/27 20:09:26 kans -* StreamCacheGetResidue returns Uint1 because Char might be signed, preventing IS_residue from working -* -* Revision 6.41 2004/04/27 18:15:12 kans -* added StreamCache functions that provide buffered request-driven access to sequence via SeqPortStream -* -* Revision 6.40 2004/04/14 12:39:01 kans -* SeqPortStreamLoc is public function, SeqPortStreamRaw directly uncompresses byte store, avoids any SeqPort calls - still need more efficient way to reverse complement without a big buffer -* -* Revision 6.39 2004/04/08 20:19:21 kans -* SeqPortStreamInt is external -* -* Revision 6.38 2004/03/15 19:54:54 kans -* SeqPortStream takes expandable bit flags parameter -* -* Revision 6.37 2004/02/25 19:07:45 kans -* ProteinFromCdRegionExEx and TransTableTranslateCdRegionEx return alternative start flag -* -* Revision 6.36 2003/11/18 17:08:46 kans -* added MapNa4ByteTo4BitString, use in seqport read and get char -* -* Revision 6.35 2003/11/17 22:44:31 kans -* added MapNa2ByteTo4BitString in preparation for faster SeqPortRead from 2na to 4na -* -* Revision 6.34 2003/11/05 21:17:22 bollin -* added new option for Retranslate Coding Regions to handle stop codons at end of complete CDS during retranslate while ignoring stop codons -* -* Revision 6.33 2002/11/11 18:02:40 kans -* added SeqPortStream to efficiently stream through a sequence -* -* Revision 6.32 2002/07/08 15:08:59 kans -* made ReadCodingRegionBases extern -* -* Revision 6.31 2002/05/13 21:41:32 kans -* added ConvertNsToGaps -* -* Revision 6.30 2001/02/18 20:58:52 kans -* added GetSequenceByBsp -* -* Revision 6.29 2000/12/18 18:03:26 kans -* added GetScoresbySeqId -* -* Revision 6.28 2000/09/24 23:31:18 kans -* added GetSequenceByFeature -* -* Revision 6.27 2000/09/24 22:52:47 kans -* added GetSequenceByIdOrAccnDotVer -* -* Revision 6.26 2000/09/05 21:33:50 kans -* productInterval_to_locationIntervals replaces aaInterval_to_dnaIntervals, also works for mRNA feature (JO) -* -* Revision 6.25 2000/08/31 18:12:54 shavirin -* Added new function TransTableFreeAll(). -* -* Revision 6.24 2000/08/11 18:09:49 kans -* GetScoresbyAccessionDotVersion passes length back through new parameter -* -* Revision 6.23 2000/08/11 18:03:25 kans -* added GetScoresbyAccessionDotVersion - prototyped in seqport.h but implemented in sqnutil2.c -* -* Revision 6.22 2000/08/10 17:22:38 kans -* added GetDNAbyAccessionDotVersion for genome processing effort -* -* Revision 6.21 2000/08/04 15:45:22 kans -* added ContigRevComp - still need to implement for delta bioseqs -* -* Revision 6.20 2000/08/03 19:02:54 kans -* added PersistentTransTableByGenCode and PersistentTransTableByCdRegion -* -* Revision 6.19 2000/08/01 20:02:58 kans -* separate macros for IsOrfStart, IsAmbigStart, IsAnyStart -* -* Revision 6.18 2000/07/22 22:45:37 kans -* more work on trans table translation functions -* -* Revision 6.17 2000/07/21 15:28:36 kans -* first pass at TransTableTranslate functions - more work remains -* -* Revision 6.16 2000/07/05 17:02:12 kans -* added spp->gapIsZero, SeqPortSet_do_virtualEx, using ncbi4na with gap of 0 to distinguish quality scores under N versus quality scores under gap -* -* Revision 6.15 2000/05/23 20:41:17 ostell -* added MolWtForLoc() -* -* Revision 6.14 1999/11/17 00:56:33 kans -* improved seqsearch fsa, removed protein part, still need to allow single mismatch -* -* Revision 6.13 1999/11/12 21:00:50 kans -* added TransTableProcessBioseq for 6-frame translation, SeqSearchAddNucleotidePattern and SeqSearchAddProteinPattern for SeqSearch -* -* Revision 6.12 1999/11/11 00:58:28 kans -* added SeqSearch sequence search finite state machine - still need more functions to add protein patterns, read from rsite file -* -* Revision 6.11 1999/10/06 22:09:02 kans -* ComposeCodonsRecognizedString to handle degenerate codons -* -* Revision 6.10 1999/08/06 20:22:19 kans -* TransTable simplified to eliminate single and double letter states -* -* Revision 6.9 1999/08/06 02:20:16 kans -* finite state machine for 6-frame translation and orf search enhanced to handle nucleotide ambiguity characters -* -* Revision 6.8 1999/02/12 20:48:24 kans -* made fast byte expansion functions public -* -* Revision 6.7 1998/12/14 20:56:24 kans -* dnaLoc_to_aaLoc takes allowTerminator parameter to handle stop codons created by polyA tail -* -* Revision 6.6 1998/11/16 21:10:08 kans -* added IsATGStart and IsAltStart macros -* -* Revision 6.5 1998/11/16 17:20:31 kans -* nextBase in codon fsa is Uint1, cast state array index to int in macros -* -* Revision 6.4 1998/11/14 00:30:21 kans -* added TransTableInit and macros for 6-frame translation and orf-finding finite state machine -* -* Revision 6.3 1998/09/16 21:40:42 kans -* added SPCacheQ for rapid 2na/4na to iupacna conversion -* -* Revision 6.2 1998/02/24 15:09:17 kans -* made AAForCodon prototype public -* -* Revision 6.1 1997/09/16 15:31:31 kans -* added aaFeatLoc_to_dnaFeatLoc (JO) -* -* Revision 6.0 1997/08/25 18:07:16 madden -* Revision changed to 6.0 -* -* Revision 5.5 1997/08/15 17:02:44 madden -* Added new function ProteinFromCdRegionEx with remove_trailingX Boolean -* -* Revision 5.4 1997/06/19 18:38:52 vakatov -* [WIN32,MSVC++] Adopted for the "NCBIOBJ.LIB" DLL'ization -* -* Revision 5.3 1997/03/06 22:47:54 shavirin -* Moved definitions for SPCompress functions from sequtil.h -* - * Revision 5.2 1996/08/09 15:27:47 ostell - * added BioseqRev(), BioseqComp(), BioseqRevComp() - * - * Revision 5.1 1996/07/15 19:04:18 epstein - * add new param to dnaLoc_to_aaLoc() to optionally report frame - * - * Revision 5.0 1996/05/28 13:23:23 ostell - * Set to revision 5.0 - * - * Revision 4.8 1996/01/30 16:28:52 ostell - * fixed type in comment - * - * Revision 4.7 1996/01/30 16:24:04 ostell - * added merge argument to dnaLoc_to_aaLoc() - * change calls to SeqLocPackage - * - * Revision 4.6 1996/01/29 22:03:52 ostell - * added aaLoc_to_dnaLoc() and dnsLoc_to_aaLoc() - * - * Revision 4.5 1996/01/28 07:00:05 ostell - * made fisxes to support deeply nexted segmented seqports - * - * Revision 4.4 1996/01/27 22:19:00 ostell - * added SeqPortSet_.. functions - * refined support for virtual seqeunces - * - * Revision 4.3 1996/01/10 22:25:25 ostell - * added aaInterval_to_seqloc() - * - * Revision 4.2 1995/12/29 21:31:44 ostell - * made SeqPort helper functions public for use by edutil for delta seqs - * - * Revision 4.1 1995/12/26 22:29:34 ostell - * added support for delta seq to SeqPort - * - * Revision 4.0 1995/07/26 13:49:01 ostell - * force revision to 4.0 - * - * Revision 2.14 1995/05/15 21:46:05 ostell - * added Log line - * -* -* * ========================================================================== */ @@ -683,7 +447,7 @@ NLM_EXTERN SeqLocPtr LIBCALL aaFeatLoc_to_dnaFeatLoc(SeqFeatPtr sfp, SeqLocPtr a * DNA sequence through a CdRegion feature * ******************************************************************/ -NLM_EXTERN SeqLocPtr LIBCALL productInterval_to_locationIntervals (SeqFeatPtr sfp, Int4 aa_start, Int4 aa_stop); +NLM_EXTERN SeqLocPtr LIBCALL productInterval_to_locationIntervals (SeqFeatPtr sfp, Int4 aa_start, Int4 aa_stop, Boolean aa_partialn); /*-------------- BioseqRevComp () ---------------------------*/ /*********************************************************************** diff --git a/api/sequtil.c b/api/sequtil.c index 84d5ee9b..b4f067c6 100644 --- a/api/sequtil.c +++ b/api/sequtil.c @@ -29,7 +29,7 @@ * * Version Creation Date: 4/1/91 * -* $Revision: 6.284 $ +* $Revision: 6.285 $ * * File Description: Sequence Utilities for objseq and objsset * @@ -9257,7 +9257,8 @@ NLM_EXTERN Uint4 LIBCALL WHICH_db_accession (CharPtr s) (StringICmp(temp,"GE") == 0) || (StringICmp(temp,"GH") == 0) || (StringICmp(temp,"GO") == 0) || - (StringICmp(temp,"GR") == 0) ) { /* NCBI EST */ + (StringICmp(temp,"GR") == 0) || + (StringICmp(temp,"GT") == 0) ) { /* NCBI EST */ retcode = ACCN_NCBI_EST; } else if ((StringICmp(temp,"BV") == 0) || (StringICmp(temp,"GF") == 0)) { /* NCBI STS */ @@ -9291,10 +9292,6 @@ NLM_EXTERN Uint4 LIBCALL WHICH_db_accession (CharPtr s) (StringICmp(temp,"GL") == 0)) { /* NCBI segmented set header Bioseq */ retcode = ACCN_NCBI_SEGSET; } else if ((StringICmp(temp,"AS") == 0) || - (StringICmp(temp,"GO") == 0) || - (StringICmp(temp,"GP") == 0) || - (StringICmp(temp,"GQ") == 0) || - (StringICmp(temp,"GT") == 0) || (StringICmp(temp,"GU") == 0) || (StringICmp(temp,"GV") == 0) || (StringICmp(temp,"GW") == 0) || diff --git a/api/sqnutil1.c b/api/sqnutil1.c index 532dc7e8..44282e91 100644 --- a/api/sqnutil1.c +++ b/api/sqnutil1.c @@ -29,7 +29,7 @@ * * Version Creation Date: 9/2/97 * -* $Revision: 6.532 $ +* $Revision: 6.534 $ * * File Description: * @@ -4203,6 +4203,10 @@ static Boolean HandledGBQualOnProt (SeqFeatPtr sfp, GBQualPtr gbq) return FALSE; } + if (StringICmp (gbq->qual, "UniProtKB_evidence") == 0) { + return FALSE; + } + return TRUE; /* all other gbquals not appropriate on protein features */ } @@ -4506,10 +4510,13 @@ static void CleanupConsSplice (GBQualPtr gbq) gbq->val = str; } -static void ExpandParenGroup (GBQualPtr headgbq) +static Boolean ExpandParenGroup (GBQualPtr headgbq) { + Char ch; GBQualPtr lastgbq; + size_t len; + Int2 nesting; GBQualPtr newgbq; GBQualPtr nextqual; CharPtr ptr; @@ -4518,8 +4525,34 @@ static void ExpandParenGroup (GBQualPtr headgbq) nextqual = headgbq->next; lastgbq = headgbq; - tmp = StringSave (headgbq->val); - str = tmp + 1; + ptr = headgbq->val; + tmp = StringSave (ptr + 1); + len = StringLen (tmp); + if (len > 0 && tmp [len - 1] == ')') { + tmp [len - 1] = '\0'; + } + str = tmp; + nesting = 0; + ptr = str; + ch = *ptr; + while (ch != '\0') { + if (ch == '(') { + nesting++; + } else if (ch == ')') { + nesting--; + if (nesting < 0) { + MemFree (tmp); + return FALSE; + } + } else if (ch == ',') { + if (nesting < 0) { + MemFree (tmp); + return FALSE; + } + } + ptr++; + ch = *ptr; + } while (! StringHasNoText (str)) { ptr = StringChr (str, ','); if (ptr == NULL) { @@ -4541,6 +4574,7 @@ static void ExpandParenGroup (GBQualPtr headgbq) str = ptr; } MemFree (tmp); + return TRUE; } static Boolean IsBaseRange (CharPtr str) @@ -4598,10 +4632,13 @@ static void ModernizeFeatureGBQuals (SeqFeatPtr sfp) str [len - 1] = ')'; } if (len > 1 && *str == '(' && str [len - 1] == ')' /* && StringChr (str + 1, '(') == NULL */) { - ExpandParenGroup (gbq); - nextqual = gbq->next; - /* individual parsed out (xxx,xxx) qualifiers will be processed next, now get rid of original */ - unlink = TRUE; + if (ExpandParenGroup (gbq)) { + nextqual = gbq->next; + /* individual parsed out (xxx,xxx) qualifiers will be processed next, now get rid of original */ + unlink = TRUE; + } else { + unlink = FALSE; + } } else { unlink = FALSE; } @@ -4620,10 +4657,13 @@ static void ModernizeFeatureGBQuals (SeqFeatPtr sfp) str [len - 1] = ')'; } if (len > 1 && *str == '(' && str [len - 1] == ')' && StringChr (str + 1, '(') == NULL) { - ExpandParenGroup (gbq); - nextqual = gbq->next; - /* individual parsed out (xxx,xxx) qualifiers will be processed next, now get rid of original */ - unlink = TRUE; + if (ExpandParenGroup (gbq)) { + nextqual = gbq->next; + /* individual parsed out (xxx,xxx) qualifiers will be processed next, now get rid of original */ + unlink = TRUE; + } else { + unlink = FALSE; + } } else { unlink = FALSE; } diff --git a/api/sqnutil2.c b/api/sqnutil2.c index f31cb69b..f768aedc 100644 --- a/api/sqnutil2.c +++ b/api/sqnutil2.c @@ -29,7 +29,7 @@ * * Version Creation Date: 9/2/97 * -* $Revision: 6.391 $ +* $Revision: 6.392 $ * * File Description: * @@ -11965,8 +11965,9 @@ NLM_EXTERN Boolean FeatureOkForFeatureList (SeqFeatPtr sfp, ValNodePtr feature_l NLM_EXTERN SeqFeatPtr GetGeneForFeature (SeqFeatPtr sfp) { + BioseqPtr bsp; GeneRefPtr grp; - SeqFeatPtr overlap_gene; + SeqFeatPtr overlap_gene = NULL; Boolean is_suppressed; SeqMgrFeatContext fcontext; @@ -11975,9 +11976,15 @@ NLM_EXTERN SeqFeatPtr GetGeneForFeature (SeqFeatPtr sfp) if (is_suppressed) return NULL; if (grp != NULL) { - overlap_gene = SeqMgrGetGeneByLocusTag (BioseqFindFromSeqLoc(sfp->location), grp->locus_tag, &fcontext); + bsp = BioseqFindFromSeqLoc (sfp->location); + if (bsp == NULL) return NULL; + if (StringDoesHaveText (grp->locus_tag)) { + overlap_gene = SeqMgrGetGeneByLocusTag (bsp, grp->locus_tag, &fcontext); + } else if (StringDoesHaveText (grp->locus)) { + overlap_gene = SeqMgrGetFeatureByLabel (bsp, grp->locus, SEQFEAT_GENE, 0, &fcontext); + } } else { - overlap_gene = SeqMgrGetOverlappingGene(sfp->location, &fcontext); + overlap_gene = SeqMgrGetOverlappingGene (sfp->location, &fcontext); } return overlap_gene; } diff --git a/api/sqnutil3.c b/api/sqnutil3.c index eba9ae27..a4bcf72f 100644 --- a/api/sqnutil3.c +++ b/api/sqnutil3.c @@ -29,7 +29,7 @@ * * Version Creation Date: 2/7/00 * -* $Revision: 6.510 $ +* $Revision: 6.514 $ * * File Description: * @@ -11669,14 +11669,19 @@ static Boolean GetOverlappingTRNAs (BioseqPtr bsp, SeqLocPtr slp, Int4 loc_right SeqFeatPtr sfp; SeqMgrFeatContext context; Boolean found_any = FALSE; + Uint1 slp_strand, rna_strand; if (bsp == NULL || slp == NULL || list == NULL) return FALSE; + slp_strand = SeqLocStrand (slp); for (sfp = SeqMgrGetNextFeature (bsp, NULL, SEQFEAT_RNA, FEATDEF_tRNA, &context); sfp != NULL && context.left <= loc_right; sfp = SeqMgrGetNextFeature (bsp, sfp, SEQFEAT_RNA, FEATDEF_tRNA, &context)) { - if (SeqLocStrand (sfp->location) == SeqLocStrand (slp) && SeqLocCompare (sfp->location, slp) != SLC_NO_MATCH) { + rna_strand = SeqLocStrand (sfp->location); + if (((slp_strand == Seq_strand_minus && rna_strand == Seq_strand_minus) + || (slp_strand != Seq_strand_minus && rna_strand != Seq_strand_minus)) + && SeqLocCompare (sfp->location, slp) != SLC_NO_MATCH) { ValNodeAddPointer (list, OBJ_SEQFEAT, sfp); found_any = TRUE; } @@ -17601,46 +17606,141 @@ static Boolean CouldExtendRight (BioseqPtr bsp, Int4 pos) } -static Boolean ExtendPartialSeqIntToEndOrGap (SeqIntPtr sint, BioseqPtr bsp) +NLM_EXTERN Int4 +Extend5PartialSeqIntToEndOrGap +(SeqIntPtr sint, + BioseqPtr bsp, + Boolean short_only) { - Boolean rval = FALSE; - Int4 distance; + Int4 distance = 0; if (sint == NULL || bsp == NULL) { return FALSE; } - if (sint->if_from != NULL && sint->from != 0) { - if (sint->from < 3) { - sint->from = 0; - rval = TRUE; - } else if (bsp->repr == Seq_repr_delta) { - /* wasn't close to the sequence end, but perhaps it is close to a gap */ + if (sint->strand == Seq_strand_minus) { + if (sint->if_to != NULL && sint->to != bsp->length - 1) { + distance = DistanceToDownstreamGap (sint->to, bsp); + if (distance == 1 || distance == 2 || (distance > -1 && !short_only)) { + sint->to += distance; + } else if (!short_only || sint->to > bsp->length - 4) { + distance = bsp->length - 1 - sint->to; + sint->to = bsp->length - 1; + } else { + distance = 0; + } + } + } else { + if (sint->if_from != NULL && sint->from != 0) { distance = DistanceToUpstreamGap (sint->from, bsp); - if (distance == 1 || distance == 2) { + if (distance == 1 || distance == 2 || (distance > -1 && !short_only)) { sint->from -= distance; - rval = TRUE; + } else if (!short_only || sint->from < 3) { + distance = sint->from; + sint->from = 0; + } else { + distance = 0; } } } - if (sint->if_to != NULL && sint->to != bsp->length - 1) { - if (sint->to > bsp->length - 4) { - sint->to = bsp->length - 1; - rval = TRUE; - } else if (bsp->repr == Seq_repr_delta) { - /* wasn't close to the sequence end, but perhaps it is close to a gap */ + return distance; +} + + +NLM_EXTERN Int4 +Extend3PartialSeqIntToEndOrGap +(SeqIntPtr sint, + BioseqPtr bsp, + Boolean short_only) +{ + Int4 distance = 0; + + if (sint == NULL || bsp == NULL) { + return FALSE; + } + + if (sint->strand == Seq_strand_minus) { + if (sint->if_from != NULL && sint->from != 0) { + distance = DistanceToUpstreamGap (sint->from, bsp); + if (distance == 1 || distance == 2 || (distance > -1 && !short_only)) { + sint->from -= distance; + } else if (!short_only || sint->from < 3) { + distance = sint->from; + sint->from = 0; + } else { + distance = 0; + } + } + } else { + if (sint->if_to != NULL && sint->to != bsp->length - 1) { distance = DistanceToDownstreamGap (sint->to, bsp); - if (distance == 1 || distance == 2) { + if (distance == 1 || distance == 2 || (distance > -1 && !short_only)) { sint->to += distance; - rval = TRUE; + } else if (!short_only || sint->to > bsp->length - 4) { + distance = bsp->length - 1 - sint->to; + sint->to = bsp->length - 1; + } else { + distance = 0; } } } + return distance; +} + + + +static Boolean ExtendPartialSeqIntToEndOrGap (SeqIntPtr sint, BioseqPtr bsp) +{ + Boolean rval = FALSE; + if (Extend5PartialSeqIntToEndOrGap (sint, bsp, TRUE) > 0) { + rval = TRUE; + } + + if (Extend3PartialSeqIntToEndOrGap (sint, bsp, TRUE) > 0) { + rval = TRUE; + } + return rval; } +NLM_EXTERN Int4 ExtendSeqLocToEndOrGap (SeqLocPtr slp, BioseqPtr bsp, Boolean end5) +{ + Int4 diff = 0; + SeqLocPtr slp_index; + + if (slp == NULL || bsp == NULL) return 0; + + switch (slp->choice) + { + case SEQLOC_INT: + if (end5) { + diff = Extend5PartialSeqIntToEndOrGap (slp->data.ptrvalue, bsp, FALSE); + } else { + diff = Extend3PartialSeqIntToEndOrGap (slp->data.ptrvalue, bsp, FALSE); + } + break; + case SEQLOC_MIX: + case SEQLOC_PACKED_INT: + if (end5) { + /* take the first one */ + diff = ExtendSeqLocToEndOrGap (slp->data.ptrvalue, bsp, end5); + } else { + /* take the last one */ + for (slp_index = slp->data.ptrvalue; slp_index != NULL && slp_index->next != NULL; slp_index = slp_index->next) { + } + if (slp_index != NULL) { + diff = ExtendSeqLocToEndOrGap (slp_index, bsp, end5); + } + } + break; + } + + return diff; +} + + NLM_EXTERN SeqFeatPtr FindBestProtein (Uint2 entityID, SeqLocPtr product) { @@ -22801,6 +22901,21 @@ BarcodeValidateOneSeqEntry } } } + if (show_all) { + for (vnp = pass_fail_list; vnp != NULL; vnp = vnp->next) { + res = (BarcodeTestResultsPtr) vnp->data.ptrvalue; + SeqIdWrite (SeqIdFindBest (res->bsp->id, SEQID_GENBANK), id_buf, PRINTID_REPORT, sizeof (id_buf) - 1); + reason = GetBarcodeTestFailureReasons (res); + BarcodeValPrintStr (ofp, " <message severity=\"INFO\" seq-id=\"%s\">", id_buf); + if (PassBarcodeTests(res)) { + BarcodeValPrintStr (ofp, NULL, "PASS"); + } else { + BarcodeValPrintStr (ofp, "FAIL (%s)", reason == NULL ? "" : reason); + } + BarcodeValPrintStr (ofp, NULL, "</message>\n"); + reason = MemFree (reason); + } + } } else { if (show_header) { if (ofp == NULL) { diff --git a/api/sqnutil4.c b/api/sqnutil4.c index 3579f132..26d7b9b0 100755 --- a/api/sqnutil4.c +++ b/api/sqnutil4.c @@ -29,7 +29,7 @@ * * Version Creation Date: 12/27/2007 * -* $Revision: 1.57 $ +* $Revision: 1.59 $ * * File Description: * This file contains functions for automatically generating definition lines. @@ -12319,6 +12319,7 @@ NLM_EXTERN Boolean ConvertImpToImpFunc (SeqFeatPtr sfp, Uint2 featdef_to) { ifp->key = StringSave (featname); } + sfp->idx.subtype = 0; return TRUE; } @@ -12569,6 +12570,182 @@ static void InstantiateMatPeptideProductForProteinFeature (SeqFeatPtr sfp, Point } +NLM_EXTERN void ExtraCDSCreationActions (SeqFeatPtr cds, SeqEntryPtr parent_sep) +{ + ByteStorePtr bs; + CharPtr prot, ptr; + BioseqPtr bsp; + Char ch; + Int4 i; + SeqEntryPtr psep, nsep; + MolInfoPtr mip; + ValNodePtr vnp, descr; + SeqFeatPtr prot_sfp; + ProtRefPtr prp; + Boolean partial5, partial3; + + if (cds == NULL) return; + + CheckSeqLocForPartial (cds->location, &partial5, &partial3); + + /* Create corresponding protein sequence data for the CDS */ + + bs = ProteinFromCdRegionEx (cds, TRUE, FALSE); + if (NULL == bs) + return; + + prot = BSMerge (bs, NULL); + bs = BSFree (bs); + if (NULL == prot) + return; + + ptr = prot; + ch = *ptr; + while (ch != '\0') { + *ptr = TO_UPPER (ch); + ptr++; + ch = *ptr; + } + i = StringLen (prot); + if (i > 0 && prot [i - 1] == '*') { + prot [i - 1] = '\0'; + } + bs = BSNew (1000); + if (bs != NULL) { + ptr = prot; + BSWrite (bs, (VoidPtr) ptr, (Int4) StringLen (ptr)); + } + + /* Create the product protein Bioseq */ + + bsp = BioseqNew (); + if (NULL == bsp) + return; + + bsp->repr = Seq_repr_raw; + bsp->mol = Seq_mol_aa; + bsp->seq_data_type = Seq_code_ncbieaa; + bsp->seq_data = (SeqDataPtr) bs; + bsp->length = BSLen (bs); + bs = NULL; + bsp->id = MakeNewProteinSeqId (cds->location, NULL); + SeqMgrAddToBioseqIndex (bsp); + + /* Create a new SeqEntry for the Prot Bioseq */ + + psep = SeqEntryNew (); + if (NULL == psep) + return; + + psep->choice = 1; + psep->data.ptrvalue = (Pointer) bsp; + SeqMgrSeqEntry (SM_BIOSEQ, (Pointer) bsp, psep); + + /* Add a descriptor to the protein Bioseq */ + + mip = MolInfoNew (); + if (NULL == mip) + return; + + mip->biomol = 8; + mip->tech = 8; + if (partial5 && partial3) { + mip->completeness = 5; + } else if (partial5) { + mip->completeness = 3; + } else if (partial3) { + mip->completeness = 4; + } + vnp = CreateNewDescriptor (psep, Seq_descr_molinfo); + if (NULL == vnp) + return; + + vnp->data.ptrvalue = (Pointer) mip; + + /**/ + + descr = ExtractBioSourceAndPubs (parent_sep); + + AddSeqEntryToSeqEntry (parent_sep, psep, TRUE); + nsep = FindNucSeqEntry (parent_sep); + ReplaceBioSourceAndPubs (parent_sep, descr); + SetSeqFeatProduct (cds, bsp); + + prp = ProtRefNew (); + + if (prp != NULL) { + prot_sfp = CreateNewFeature (psep, NULL, SEQFEAT_PROT, NULL); + if (prot_sfp != NULL) { + prot_sfp->data.value.ptrvalue = (Pointer) prp; + SetSeqLocPartial (prot_sfp->location, partial5, partial3); + prot_sfp->partial = (partial5 || partial3); + } + } +} + + +NLM_EXTERN SeqFeatPtr GetProtFeature (BioseqPtr protbsp) +{ + SeqMgrFeatContext fcontext; + SeqAnnotPtr sap; + SeqFeatPtr prot_sfp; + ProtRefPtr prp; + + if (protbsp == NULL) return NULL; + + prot_sfp = SeqMgrGetNextFeature (protbsp, NULL, 0, FEATDEF_PROT, &fcontext); + if (prot_sfp == NULL) { + sap = protbsp->annot; + while (sap != NULL && prot_sfp == NULL) { + if (sap->type == 1) { + prot_sfp = sap->data; + while (prot_sfp != NULL + && (prot_sfp->data.choice != SEQFEAT_PROT + || (prp = prot_sfp->data.value.ptrvalue) == NULL + || prp->processed != 0)) { + prot_sfp = prot_sfp->next; + } + } + sap = sap->next; + } + } + return prot_sfp; +} + + +NLM_EXTERN Boolean ConvertMiscFeatToCodingRegion (SeqFeatPtr sfp) +{ + BioseqPtr bsp, prot_bsp; + SeqFeatPtr prot; + ProtRefPtr prp; + + if (sfp == NULL || sfp->idx.subtype != FEATDEF_misc_feature) { + return FALSE; + } + + sfp->data.value.ptrvalue = ImpFeatFree (sfp->data.value.ptrvalue); + sfp->data.value.ptrvalue = CdRegionNew (); + sfp->data.choice = SEQFEAT_CDREGION; + sfp->idx.subtype = 0; + + bsp = BioseqFindFromSeqLoc (sfp->location); + if (bsp != NULL) { + ExtraCDSCreationActions (sfp, GetBestTopParentForData (bsp->idx.entityID, bsp)); + if (!StringHasNoText (sfp->comment)) { + prot_bsp = BioseqFindFromSeqLoc (sfp->product); + prot = GetProtFeature (prot_bsp); + if (prot != NULL) { + prp = prot->data.value.ptrvalue; + ValNodeAddPointer (&prp->name, 0, sfp->comment); + sfp->comment = NULL; + } + } + } + + return TRUE; +} + + NLM_EXTERN void InstantiateMatPeptideProducts (SeqEntryPtr sep) { VisitFeaturesInSep (sep, NULL, InstantiateMatPeptideProductForProteinFeature); diff --git a/api/sqnutils.h b/api/sqnutils.h index 2765b115..03a2cb4b 100644 --- a/api/sqnutils.h +++ b/api/sqnutils.h @@ -29,7 +29,7 @@ * * Version Creation Date: 9/2/97 * -* $Revision: 6.377 $ +* $Revision: 6.379 $ * * File Description: * @@ -1746,7 +1746,9 @@ NLM_EXTERN Boolean ConvertImpToImpFunc (SeqFeatPtr sfp, Uint2 featdef_to); NLM_EXTERN Boolean ConvertRegionToRNAFunc (SeqFeatPtr sfp, Uint2 featdef_to); NLM_EXTERN Boolean ConvertGeneToMiscFeatFunc (SeqFeatPtr sfp, Uint2 featdef_to); NLM_EXTERN Boolean ConvertProtToProtFunc (SeqFeatPtr sfp, Uint2 featdef_to); - +NLM_EXTERN Boolean ConvertMiscFeatToCodingRegion (SeqFeatPtr sfp); +NLM_EXTERN void ExtraCDSCreationActions (SeqFeatPtr cds, SeqEntryPtr parent_sep); +NLM_EXTERN SeqFeatPtr GetProtFeature (BioseqPtr protbsp); NLM_EXTERN void InstantiateMatPeptideProducts (SeqEntryPtr sep); @@ -1816,6 +1818,11 @@ NLM_EXTERN Boolean AutoConvertCDSToMiscFeat (SeqFeatPtr cds, Boolean remove_orig NLM_EXTERN AuthListPtr PNTR GetAuthListForPub (PubPtr the_pub); NLM_EXTERN void RemoveConsortiumFromPub (PubPtr pub); +NLM_EXTERN Int4 Extend5PartialSeqIntToEndOrGap (SeqIntPtr sint, BioseqPtr bsp, Boolean short_only); +NLM_EXTERN Int4 Extend3PartialSeqIntToEndOrGap (SeqIntPtr sint, BioseqPtr bsp, Boolean short_only); +NLM_EXTERN Int4 ExtendSeqLocToEndOrGap (SeqLocPtr slp, BioseqPtr bsp, Boolean end5); + + #ifdef __cplusplus } #endif diff --git a/api/valid.c b/api/valid.c index e65ea6e8..ecb40052 100644 --- a/api/valid.c +++ b/api/valid.c @@ -29,7 +29,7 @@ * * Version Creation Date: 1/1/94 * -* $Revision: 6.1245 $ +* $Revision: 6.1247 $ * * File Description: Sequence editing utilities * @@ -10365,7 +10365,7 @@ NLM_EXTERN Boolean ParseStructuredVoucher ( CharPtr tmp; if (StringHasNoText (subname)) return FALSE; - if (StringLen (subname) < 5) return FALSE; + if (StringLen (subname) < 3) return FALSE; TrimSpacesAroundString (subname); ptr = StringChr (subname, ':'); @@ -13642,6 +13642,22 @@ static Boolean FeatureSequencesIdentical (SeqFeatPtr sfp, SeqFeatPtr lastsfp) return rsult; } +static Boolean GeneXrefsDifferent (SeqFeatPtr sfp, SeqFeatPtr lastsfp) + +{ + SeqFeatPtr gene, lastgene; + + if (sfp == NULL || lastsfp == NULL) return FALSE; + + gene = GetGeneForFeature (sfp); + lastgene = GetGeneForFeature (lastsfp); + if (gene == NULL || lastgene == NULL) return FALSE; + + if (gene != lastgene) return TRUE; + + return FALSE; +} + static Boolean ValidateBioseqContextIndexed (BioseqPtr bsp, BioseqValidStrPtr bvsp) { @@ -14057,6 +14073,9 @@ static Boolean ValidateBioseqContextIndexed (BioseqPtr bsp, BioseqValidStrPtr bv /* do not report if mRNAs are linked to two different CDSs */ } else if (fcontext.sap == sap) { if (samelabel) { + if (GeneXrefsDifferent (sfp, last)) { + severity = SEV_WARNING; + } ValidErr (vsp, severity, ERR_SEQ_FEAT_FeatContentDup, "Duplicate feature"); } else if (featdeftype != FEATDEF_PUB) { if (fcontext.partialL != partialL || fcontext.partialR != partialR) { @@ -14099,6 +14118,9 @@ static Boolean ValidateBioseqContextIndexed (BioseqPtr bsp, BioseqValidStrPtr bv } } else { if (samelabel) { + if (GeneXrefsDifferent (sfp, last)) { + severity = SEV_WARNING; + } ValidErr (vsp, severity, ERR_SEQ_FEAT_FeatContentDup, "Duplicate feature (packaged in different feature table)"); } else if (featdeftype != FEATDEF_PUB) { if (suppress_duplicate_messages && (featdeftype == FEATDEF_CDS || featdeftype == FEATDEF_mRNA) && HaveUniqueFeatIDXrefs (xref, sfp->xref)) { diff --git a/checkout.date b/checkout.date index ed3c8324..0204f581 100644 --- a/checkout.date +++ b/checkout.date @@ -1 +1 @@ -Sun Jul 19 10:12:36 EDT 2009 +Sun Aug 9 10:12:32 EDT 2009 diff --git a/connect/ncbi_connection.c b/connect/ncbi_connection.c index 9370a176..dda0c396 100644 --- a/connect/ncbi_connection.c +++ b/connect/ncbi_connection.c @@ -1,4 +1,4 @@ -/* $Id: ncbi_connection.c,v 6.59 2009/07/13 15:04:37 kazimird Exp $ +/* $Id: ncbi_connection.c,v 6.60 2009/07/28 13:04:32 kazimird Exp $ * =========================================================================== * * PUBLIC DOMAIN NOTICE @@ -83,9 +83,9 @@ #define CONN_NOT_NULL(s_c, f_n) CONN_NOT_NULL_EX(s_c, f_n, eIO_InvalidArg) #ifdef _DEBUG -# define CONN_TRACE(f_n, msg) CONN_LOG(0, f_n, eLOG_Trace, msg) +# define CONN_TRACE(f_n, msg) CONN_LOG(0, f_n, eLOG_Trace, msg) #else -# define CONN_TRACE(f_n, msg) ((void) 0) +# define CONN_TRACE(f_n, msg) ((void) 0) #endif /*_DEBUG*/ @@ -409,9 +409,8 @@ extern EIO_Status CONN_Wait : eIO_NotSupported; if (status != eIO_Success) { - const char* errmsg = (event == eIO_Read - ? "Read event failed" - : "Write event failed"); + static const char* kErrMsg[] = { "Read event failed", + "Write event failed" }; ELOG_Level level; switch (status) { case eIO_Timeout: @@ -419,10 +418,8 @@ extern EIO_Status CONN_Wait level = eLOG_Warning; else if (timeout->sec | timeout->usec) level = eLOG_Trace; - else { - CONN_TRACE(Wait, errmsg); + else return status; - } break; case eIO_Closed: level = event == eIO_Read ? eLOG_Trace : eLOG_Error; @@ -434,7 +431,7 @@ extern EIO_Status CONN_Wait level = eLOG_Error; break; } - CONN_LOG(14, Wait, level, errmsg); + CONN_LOG(14, Wait, level, kErrMsg[event != eIO_Read]); } return status; } diff --git a/connect/ncbi_socket.c b/connect/ncbi_socket.c index 30413870..10bba048 100644 --- a/connect/ncbi_socket.c +++ b/connect/ncbi_socket.c @@ -1,4 +1,4 @@ -/* $Id: ncbi_socket.c,v 6.282 2009/07/13 15:04:37 kazimird Exp $ +/* $Id: ncbi_socket.c,v 6.283 2009/07/30 16:24:29 kazimird Exp $ * =========================================================================== * * PUBLIC DOMAIN NOTICE @@ -436,8 +436,8 @@ static const char* s_ID(const SOCK sock, char* buf) /* Put socket description to the message, then log the transferred data */ -static void s_DoLog(const SOCK sock, EIO_Event event, - const void* data, size_t size, const void* ptr) +static void s_DoLog(ELOG_Level level, const SOCK sock, EIO_Event event, + const void* data, size_t size, const void* ptr) { const struct sockaddr* sa = (const struct sockaddr*) ptr; const char* what; @@ -493,7 +493,7 @@ static void s_DoLog(const SOCK sock, EIO_Event event, else strcpy(tail, "???"); } - CORE_LOGF_X(112, eLOG_Trace, + CORE_LOGF_X(112, level, ("%s%s%s", s_ID(sock, _id), head, tail)); break; @@ -527,11 +527,7 @@ static void s_DoLog(const SOCK sock, EIO_Event event, *tail = '\0'; } - CORE_DATAF_EXX(109, !size && data && - (sock->type == eDatagram - || (sock->n_read | sock->n_written)) - ? eLOG_Error : eLOG_Trace, - data, size, + CORE_DATAF_EXX(109, level, data, size, ("%s%.*s%s%s%s", s_ID(sock, _id), n, what, sock->type == eDatagram ? (event == eIO_Read ? " from " : " to ") @@ -569,7 +565,7 @@ static void s_DoLog(const SOCK sock, EIO_Event event, head + 1, sizeof(head) - 1); } else *head = '\0'; - CORE_LOGF_X(113, eLOG_Trace, + CORE_LOGF_X(113, level, ("%s%s%s (out: %s, in: %s)", s_ID(sock, _id), ptr ? (const char*) ptr : sock->keep ? "Leaving" : "Closing", head, @@ -1783,11 +1779,13 @@ static EIO_Status s_Recv(SOCK sock, x_error == SOCK_ECONNABORTED || x_error == SOCK_ENETRESET))) { /* statistics & logging */ - if ((x_read < 0 && (sock->n_read | sock->n_written)) || + if (x_read < 0 || ((sock->log == eOn || (sock->log == eDefault && s_Log == eOn)) && (!sock->session || flag > 0))) { - s_DoLog(sock, eIO_Read, (x_read < 0 ? (void*) &x_error : - x_read > 0 ? buf : 0), + s_DoLog(x_read < 0 && sock->n_read && sock->n_written + ? eLOG_Error : eLOG_Trace, sock, eIO_Read, + x_read < 0 ? (void*) &x_error : + x_read > 0 ? buf : 0, (size_t)(x_read < 0 ? 0 : x_read), 0); } @@ -1930,7 +1928,7 @@ static EIO_Status s_Read(SOCK sock, /* statistics & logging */ if (sock->log == eOn || (sock->log == eDefault && s_Log == eOn)){ - s_DoLog(sock, eIO_Read, x_read > 0 ? x_buf : + s_DoLog(eLOG_Trace, sock, eIO_Read, x_read > 0 ? x_buf : status == eIO_Success ? 0 : (void*) &x_error, status != eIO_Success ? 0 : x_read, " [decrypt]"); } @@ -2184,11 +2182,12 @@ static EIO_Status s_Send(SOCK sock, x_error == SOCK_ENETRESET || x_error == SOCK_ECONNABORTED))){ /* statistics & logging */ - if ((x_written < 0 && (sock->n_read | sock->n_written)) || + if (x_written < 0 || ((sock->log == eOn || (sock->log == eDefault && s_Log == eOn)) && (!sock->session || flag > 0))) { - s_DoLog(sock, eIO_Write, (x_written < 0 - ? (void*) &x_error : data), + s_DoLog(x_written < 0 && sock->n_read && sock->n_written + ? eLOG_Error : eLOG_Trace, sock, eIO_Write, + x_written < 0 ? (void*) &x_error : data, (size_t)(x_written < 0 ? 0 : x_written), flag < 0 ? "" : 0); } @@ -2337,7 +2336,7 @@ static EIO_Status s_WriteData(SOCK sock, /* statistics & logging */ if (sock->log == eOn || (sock->log == eDefault && s_Log == eOn)) { - s_DoLog(sock, eIO_Write, + s_DoLog(eLOG_Trace, sock, eIO_Write, status == eIO_Success ? data : (void*) &x_error, status != eIO_Success ? 0 : *n_written, " [encrypt]"); } @@ -2685,7 +2684,7 @@ static EIO_Status s_Close(SOCK sock, int abort) /* statistics & logging */ if (sock->log == eOn || (sock->log == eDefault && s_Log == eOn)) - s_DoLog(sock, eIO_Close, 0, 0, abort ? "Aborting" : 0); + s_DoLog(eLOG_Trace, sock, eIO_Close, 0, 0, abort ? "Aborting" : 0); } else abort = 1; @@ -2897,7 +2896,7 @@ static EIO_Status s_Connect(SOCK sock, /* statistics & logging */ if (sock->log == eOn || (sock->log == eDefault && s_Log == eOn)) - s_DoLog(sock, eIO_Open, 0, 0, &addr.sa); + s_DoLog(eLOG_Trace, sock, eIO_Open, 0, 0, &addr.sa); /* establish connection to the peer */ sock->connected = 0; @@ -3791,7 +3790,7 @@ static EIO_Status s_Accept(LSOCK lsock, /* statistics & logging */ if ((*sock)->log == eOn || ((*sock)->log == eDefault && s_Log == eOn)) - s_DoLog(*sock, eIO_Open, 0, 0, &addr.sa); + s_DoLog(eLOG_Trace, *sock, eIO_Open, 0, 0, &addr.sa); return eIO_Success; } @@ -4170,7 +4169,7 @@ extern EIO_Status SOCK_CreateOnTopEx(const void* handle, /* statistics & logging */ if (x_sock->log == eOn || (x_sock->log == eDefault && s_Log == eOn)) - s_DoLog(x_sock, eIO_Open, &peer, 0, &peer.sa); + s_DoLog(eLOG_Trace, x_sock, eIO_Open, &peer, 0, &peer.sa); /* success */ *sock = x_sock; @@ -5125,7 +5124,7 @@ extern EIO_Status DSOCK_CreateEx(SOCK* sock, TSOCK_Flags flags) /* statistics & logging */ if ((*sock)->log == eOn || ((*sock)->log == eDefault && s_Log == eOn)) - s_DoLog(*sock, eIO_Open, 0, 0, 0); + s_DoLog(eLOG_Trace, *sock, eIO_Open, 0, 0, 0); return eIO_Success; } @@ -5170,7 +5169,7 @@ extern EIO_Status DSOCK_Bind(SOCK sock, unsigned short port) /* statistics & logging */ if (sock->log == eOn || (sock->log == eDefault && s_Log == eOn)) - s_DoLog(sock, eIO_Open, 0, 0, (struct sockaddr*) &addr); + s_DoLog(eLOG_Trace, sock, eIO_Open, 0, 0, (struct sockaddr*) &addr); return eIO_Success; } @@ -5254,7 +5253,7 @@ extern EIO_Status DSOCK_Connect(SOCK sock, /* statistics & logging */ if (sock->log == eOn || (sock->log == eDefault && s_Log == eOn)) - s_DoLog(sock, eIO_Open, &peer, 0, (struct sockaddr*) &peer); + s_DoLog(eLOG_Trace, sock, eIO_Open, &peer, 0, (struct sockaddr*)&peer); return eIO_Success; } @@ -5338,7 +5337,7 @@ extern EIO_Status DSOCK_SendMsg(SOCK sock, /* statistics & logging */ if (sock->log == eOn || (sock->log == eDefault && s_Log == eOn)){ - s_DoLog(sock, eIO_Write, x_msg, (size_t) x_written, + s_DoLog(eLOG_Trace, sock, eIO_Write, x_msg, (size_t) x_written, (struct sockaddr*) &addr); } @@ -5484,7 +5483,7 @@ extern EIO_Status DSOCK_RecvMsg(SOCK sock, /* statistics & logging */ if (sock->log == eOn || (sock->log == eDefault && s_Log == eOn)){ - s_DoLog(sock, eIO_Read, x_msg, (size_t) x_read, + s_DoLog(eLOG_Trace, sock, eIO_Read, x_msg, (size_t) x_read, (struct sockaddr*) &addr); } diff --git a/data/institution_codes.txt b/data/institution_codes.txt index be1a1d09..d9255588 100644 --- a/data/institution_codes.txt +++ b/data/institution_codes.txt @@ -462,6 +462,7 @@ BM<GBR-LONDON> s The Natural History Museum, Department of Botany BMAM s Beijing Natural History Museum BMB s Booth Museum of Natural History BMBN<UK> s Booth Museum of Natural History +BMCC c Brittany Microbe Culture collection BMFM-UNAM c Culture Collection of Fungal Pathogens Strains from the Basic Mycology Laboratory of the Department of Microbiology and Parasitology, Faculty of Medicine, UNAM BMGB s Barbados Museum and Historical Society BMH s Bournemouth Natural Science Society Museum, herbarium @@ -799,6 +800,7 @@ CEEF s Escuela Nacional de Ciencias Forestales CEET s El Colegio de la Frontera Sur, Colleccion de Insectos Asociados a Plantas Cultivadas en la Frontera Sur CEL s University of Illinois, Crop Sciences Department CELM s Coleccion Entomologica "Luis Maria Murillo" +CELMS c Collection of Environmental and Laboratory Microbial Strains CEMBP s Centre of Excellence in Marine Biology CEN s EMBRAPA Recursos Geneticos e Biotecnologia - CENARGEN CENA<BRZ> s Centro de Energia Nuclear na Agricultura, Universidade de Sao Paulo @@ -909,6 +911,7 @@ CIMNH s Albertson College of Idaho, Orma J. Smith Museum of Natural History CIMSC c Collezione Instituto di Microbiologia CINC s University of Cincinnati, Biological Sciences Department CIP<COL> s Centro de Investigaciones Pesqueras +CIP<ECU> c International Potato Center CIP<FRA> c Pasteur Institute Collection, Biological Resource Center of Pasteur Institute (CRBIP) CIP<PER> b Centro Internacional de las Papas CIPDE c Collection of Insect Pathogens, Dept. of Entomology @@ -972,6 +975,16 @@ CMMEX s Universidad Autonoma de Baja California CMMI s Chinese Academy of Traditional Medicine CMML s Colorado State University CMN s Canadian Museum of Nature +CMN:Annelid s Canadian Museum of Nature, Annelid Collection +CMN:Bird s Canadian Museum of Nature, Bird Collection +CMN:Crustacean s Canadian Museum of Nature, Crustacean Collection +CMN:Fish s Canadian Museum of Nature, Fish Collection +CMN:GenInvert Canadian Museum of Nature, General Invertebrate Collextion +CMN:Herp s Canadian Museum of Nature, Amphibian and Reptile Collection +CMN:Insect s Canadian Museum of Nature, Insect Collection +CMN:Mammal s Canadian Museum of Nature, Mammal Collection +CMN:Mollusc s Canadian Museum of Nature, Mollusc Collection +CMN:Parasite s Canadian Museum of Nature, Parasite Collection CMNAR s Canadian Museum of Nature, Amphibian and Reptile Collection CMNC s Canadian Museum of Nature, Neotropical Cerambycidae Collection CMNFI s Canadian Museum of Nature, Fish Collection @@ -1335,6 +1348,7 @@ DNHM<USA-UT> s Dinosaur Natural History Museum DNPM s Setor de Paleontologia do Departamento Nacional de Producao Mineral DNS s Dundee Naturalists' Society DO s Societe d'Agriculture Sciences et Arts +DOA c Department Of Agriculture DOMO s Collegio Mellerio Rosmini DOR s Dorset County Museum DORC s Dorset County Museum @@ -2429,7 +2443,8 @@ INV s Inverness Museum and Art Gallery INVA s Invergordon Academy INVAM c International Culture Collection of (Vesicular) Arbuscular Mycorrhizal Fungi INVEMAR s Instituto de Investigaciones Marinas de Punta de Betin -IO s Instituto Oceanografico da Universidade de Sao Paulo +IO<BRA> s Instituto Oceanografico da Universidade de Sao Paulo +IO<PRT> s Instituto de Oceanografia da Universidade de Lisboa IOAN s Shirshov Institute of Oceanography IOC c Colecao de Culturas de Fungos do Instituto Oswaldo Cruz IOCAS s Institute of Oceanology, Chinese Academy of Scineces @@ -2504,6 +2519,7 @@ ISMC s Indiana Department of Natural Resources ISNHC s State Historical Society of Iowa ISNP s Istituto Sperimentale per la Nutrizione delle Piante ISP c International Cooperative Project for Description and Deposition of Type Cultures +ISPaVe c Centro di Ricerca per la Patologia Vegetale ISRA s Royal Academy ISRI c Indonesian Sugar Research Institute, Pusat Penelitian Perkebunan Gula Indonesia ISS c Collection of Bacteria @@ -3432,6 +3448,7 @@ MMNH<USA-MN> s Bell Museum of Natural History MMNHS s Macedonian Museum of Natural History MMNS s Mississippi Museum of Natural Science MMP s Museo de Mar del Plata (Argentina) +MMRF c Marine Microbial Reference Facility MMS s Montshire Museum of Science MMTT s Iran National Museum of Natural History MMUE s Museum of Manchester University @@ -3515,6 +3532,7 @@ MP<ZAF> s Transvaal Museum MPA s Ecole National Superieure Agronomique, Biologie et Pathologie Vegetales MPC s Monterey Peninsula College, Life Science Museum MPCA s Museo Provincial "Carlos Ameghino" +MPCNyO s Museo Provincial de Ciencias Naturales, Puerto Madryn MPCRM s Museo Paleontologico Cittadino della Rocca MPE s F. R. Long Herbarium MPEF-PV s Muso Paleontologico Egidio Fergulio @@ -3813,6 +3831,7 @@ NCE s University of Newcastle upon Tyne, School of Biological Sciences NCFB c National Collection of Food Bacteria NCH s Norwich Botanical Society NCHU s National Chung Hsing University +NCHU:ZOOL s National Chung Hsing University, Department of Life Science NCIM c National Collection of Industrial Microorganisms NCIMB c National Collections of Industrial Food and Marine Bacteria (incorporating the NCFB) NCIP<IDN> s Pusat Penelitian dan Pengembangan Oseanologi @@ -5478,6 +5497,7 @@ TU<EST> s University of Tartu TU<USA-LA> s Tulane University, Museum of Natural History TUAT s Tokyo University of Agriculture TUB s Eberhard-Karls-Universitaet Tuebingen, Institut fuer Biologie I +TUBSB b Tohoku University Brassica Seed Bank TUC s University of Arizona, Ecology and Evolutionary Biology Department TUCH s Tribhuvan University, Central Department of Botany TUFIL s Tokyo University of Fisheries, Ichthyological Laboratory @@ -6172,13 +6192,14 @@ WNC s University of North Carolina Wilmington, Department of Biology and Marine WNHM s Oklahoma Baptist University, Webster Natural History Museum WNLM s Niederoesterreichisches Landesmuseum WNMU s Western New Mexico University Museum -WNMU:Bird Western New Mexico University Museum, bird collection -WNMU:Fish Western New Mexico University Museum, fish collection -WNMU:Mamm Western New Mexico University Museum, mammal collection +WNMU:Bird s Western New Mexico University Museum, bird collection +WNMU:Fish s Western New Mexico University Museum, fish collection +WNMU:Mamm s Western New Mexico University Museum, mammal collection WNRE s Whiteshell Nuclear Research Establishment WNS s Wiesbaden Naturwissenschaftliche Sammlung der Stadt WNU s Northwest University, Biology Department WOCB s University of Windsor, Biological Sciences Department +WOCSB b Wheeler Orchid Collection and Species Bank WOH s Southwestern Oklahoma State University, Biology Department WOLL s University of Wollongong, Department of Biological Sciences WOS s City Museum and Art Gallery diff --git a/demo/asn2all.c b/demo/asn2all.c index 9c666e20..f81b5eb5 100644 --- a/demo/asn2all.c +++ b/demo/asn2all.c @@ -29,7 +29,7 @@ * * Version Creation Date: 7/26/04 * -* $Revision: 1.63 $ +* $Revision: 1.64 $ * * File Description: * @@ -53,7 +53,7 @@ #include <pmfapi.h> #include <lsqfetch.h> -#define ASN2ALL_APP_VER "5.1" +#define ASN2ALL_APP_VER "5.2" CharPtr ASN2ALL_APPLICATION = ASN2ALL_APP_VER; diff --git a/demo/asn2gb.c b/demo/asn2gb.c index 68db0965..7197d7ce 100644 --- a/demo/asn2gb.c +++ b/demo/asn2gb.c @@ -29,7 +29,7 @@ * * Version Creation Date: 10/21/98 * -* $Revision: 6.133 $ +* $Revision: 6.134 $ * * File Description: New GenBank flatfile generator application * @@ -54,7 +54,7 @@ /* asn2gnbi.h needed to test PUBSEQGetAccnVer in accpubseq.c */ #include <asn2gnbi.h> -#define ASN2GB_APP_VER "7.1" +#define ASN2GB_APP_VER "7.2" CharPtr ASN2GB_APPLICATION = ASN2GB_APP_VER; diff --git a/demo/asnval.c b/demo/asnval.c index b8009ad4..30562d98 100644 --- a/demo/asnval.c +++ b/demo/asnval.c @@ -29,7 +29,7 @@ * * Version Creation Date: 11/3/04 * -* $Revision: 1.96 $ +* $Revision: 1.98 $ * * File Description: * @@ -60,7 +60,7 @@ #include <accpubseq.h> #endif -#define ASNVAL_APP_VER "7.2" +#define ASNVAL_APP_VER "7.3" CharPtr ASNVAL_APPLICATION = ASNVAL_APP_VER; @@ -804,7 +804,7 @@ static void DoValidation ( } xml_header = GetXmlHeaderText(cutoff); } - if (!BarcodeValidateOneSeqEntry (ofp, sep, FALSE, + if (!BarcodeValidateOneSeqEntry (ofp, sep, TRUE, vfp->verbosity == 4, !vfp->has_errors, xml_header)) { diff --git a/demo/scantest.c b/demo/scantest.c index 15701c17..2c7251e1 100644 --- a/demo/scantest.c +++ b/demo/scantest.c @@ -29,7 +29,7 @@ * * Version Creation Date: 1/20/95 * -* $Revision: 6.56 $ +* $Revision: 6.57 $ * * File Description: * template for custom scans of ASN.1 release files @@ -911,6 +911,39 @@ static void FindMuidCitations ( } } +static void FindWholeGraphLocs ( + SeqGraphPtr sgp, + Pointer userdata +) + +{ + ChangeDataPtr cdp; + SeqLocPtr slp; + ThrdDataPtr tdp; + + if (sgp == NULL) return; + cdp = (ChangeDataPtr) userdata; + if (cdp == NULL) return; + tdp = cdp->tdp; + if (tdp == NULL) return; + if (tdp->fp == NULL) return; + + slp = sgp->loc; + if (slp == NULL) { + if (tdp->verbose) { + TSPrintLine (tdp->fp, "GPHLOC", tdp->id, NULL, NULL, "\t"); + } else { + TSPrintLine (tdp->fp, "GPHLOC", tdp->id, NULL, NULL, " "); + } + } else if (slp->choice == SEQLOC_WHOLE) { + if (tdp->verbose) { + TSPrintLine (tdp->fp, "GPHWHL", tdp->id, NULL, NULL, "\t"); + } else { + TSPrintLine (tdp->fp, "GPHWHL", tdp->id, NULL, NULL, " "); + } + } +} + static void RnaProtCmntTrailingCommaFix ( SeqFeatPtr sfp, Pointer userdata @@ -1501,6 +1534,7 @@ static void DoReport ( VisitBioSourcesInSep (sep, (Pointer) &cdbefore, LookForSemicolonedVouchers); VisitFeaturesInSep (sep, (Pointer) &cdbefore, FindCommaInGene); VisitFeaturesInSep (sep, (Pointer) &cdbefore, FindMuidCitations); + VisitGraphsInSep (sep, (Pointer) &cdbefore, FindWholeGraphLocs); tmp = Se2Bs (sep); if (! BSEqual (bs, tmp)) { diff --git a/demo/src_chk.c b/demo/src_chk.c index cb5ffe30..57741db0 100755 --- a/demo/src_chk.c +++ b/demo/src_chk.c @@ -1,398 +1,787 @@ -/* src_chk.c -* =========================================================================== -* -* PUBLIC DOMAIN NOTICE -* National Center for Biotechnology Information (NCBI) -* -* This software/database is a "United States Government Work" under the -* terms of the United States Copyright Act. It was written as part of -* the author's official duties as a United States Government employee and -* thus cannot be copyrighted. This software/database is freely available -* to the public for use. The National Library of Medicine and the U.S. -* Government do not place any restriction on its use or reproduction. -* We would, however, appreciate having the NCBI and the author cited in -* any work or product based on this material -* -* Although all reasonable efforts have been taken to ensure the accuracy -* and reliability of the software and data, the NLM and the U.S. -* Government do not and cannot warrant the performance or results that -* may be obtained by using this software or data. The NLM and the U.S. -* Government disclaim all warranties, express or implied, including -* warranties of performance, merchantability or fitness for any particular -* purpose. -* -* =========================================================================== -* -* File Name: src_chk.c -* -* Author: Colleen Bollin -* -* Version Creation Date: 4/12/07 -* -* $Revision: 1.10 $ -* -* File Description: -* -* Modifications: -* -------------------------------------------------------------------------- -* Date Name Description of modification -* ------- ---------- ----------------------------------------------------- -* -* -* ========================================================================== -*/ - -#include <ncbi.h> -#include <objall.h> -#include <objsset.h> -#include <objsub.h> -#include <objfdef.h> -#include <sequtil.h> -#include <gather.h> -#include <sqnutils.h> -#include <explore.h> -#include <pmfapi.h> -#define NLM_GENERATED_CODE_PROTO -#include <asnmacro.h> -#include <objmacro.h> -#include <macroapi.h> - -#define SRC_CHK_APP_VER "1.0" - -CharPtr SRC_CHK_APPLICATION = SRC_CHK_APP_VER; - - -static ValNodePtr CollectFieldList(BioseqPtr bsp) -{ - BioSourcePtr biop; - SeqDescrPtr sdp; - SeqMgrDescContext dcontext; - ValNodePtr list = NULL, vnp; - - for (sdp = SeqMgrGetNextDescriptor (bsp, NULL, Seq_descr_source, &dcontext); - sdp != NULL; - sdp = SeqMgrGetNextDescriptor (bsp, sdp, Seq_descr_source, &dcontext)) { - biop = (BioSourcePtr) sdp->data.ptrvalue; - vnp = GetSourceQualFieldListFromBioSource (biop); - ValNodeLink (&list, vnp); - } - return list; -} - - -static void PrintHeader (FILE *fp, ValNodePtr field_list) -{ - CharPtr txt; - - if (fp == NULL || field_list == NULL) { - return; - } - /* first field accession, second field GI, third field tax ID */ - fprintf (fp, "\t\tTaxID"); - while (field_list != NULL) { - txt = SummarizeFieldType (field_list); - fprintf (fp, "\t%s", txt); - txt = MemFree (txt); - field_list = field_list->next; - } - fprintf (fp, "\n"); -} - - -static Int4 GetTaxIdFromOrgRef (OrgRefPtr orp) -{ - Int4 tax_id = -1; - ValNodePtr vnp; - DbtagPtr d; - - if (orp != NULL) - { - for (vnp = orp->db; vnp != NULL; vnp = vnp->next) - { - d = (DbtagPtr) vnp->data.ptrvalue; - if (StringCmp(d->db, "taxon") == 0) - { - tax_id = d->tag->id; - break; - } - } - } - return tax_id; -} - - -static void PrintBioSourceLine (FILE *fp, BioSourcePtr biop, ValNodePtr field_list) -{ - CharPtr txt; - - if (fp == NULL || biop == NULL || field_list == NULL) { - return; - } - - fprintf (fp, "\t%d", GetTaxIdFromOrgRef(biop->org)); - - while (field_list != NULL) { - txt = GetSourceQualFromBioSource (biop, field_list->data.ptrvalue, NULL); - fprintf (fp, "\t%s", txt == NULL ? "" : txt); - txt = MemFree (txt); - field_list = field_list->next; - } -} - - -static void PrintBioseqLines (FILE *fp, BioseqPtr bsp, ValNodePtr field_list) -{ - SeqDescrPtr sdp; - SeqMgrDescContext dcontext; - Char id_txt[255], id_txt2[255]; - SeqIdPtr sip, sip_gi = NULL, sip_gb = NULL; - - if (fp == NULL || bsp == NULL || field_list == NULL) { - return; - } - - for (sip = bsp->id; sip != NULL; sip = sip->next) { - if (sip->choice == SEQID_GENBANK - || (sip->choice == SEQID_EMBL && sip_gb == NULL) - || (sip->choice == SEQID_SWISSPROT && sip_gb == NULL) - || (sip->choice == SEQID_DDBJ && sip_gb == NULL) - || (sip->choice == SEQID_PIR && sip_gb == NULL)) { - sip_gb = sip; - } else if (sip->choice == SEQID_GI) { - sip_gi = sip; - } - } - - if (sip_gb == NULL && sip_gi == NULL) { - SeqIdWrite (SeqIdFindBest (bsp->id, SEQID_GENBANK), id_txt, PRINTID_REPORT, sizeof (id_txt) - 1); - id_txt2[0] = 0; - } else { - if (sip_gb == NULL) { - id_txt[0] = 0; - } else { - SeqIdWrite (sip_gb, id_txt, PRINTID_REPORT, sizeof (id_txt) - 1); - } - if (sip_gi == NULL) { - id_txt2[0] = 0; - } else { - SeqIdWrite (sip_gi, id_txt2, PRINTID_REPORT, sizeof (id_txt2) - 1); - } - } - - for (sdp = SeqMgrGetNextDescriptor (bsp, NULL, Seq_descr_source, &dcontext); - sdp != NULL; - sdp = SeqMgrGetNextDescriptor (bsp, sdp, Seq_descr_source, &dcontext)) { - fprintf (fp, "%s\t%s", id_txt, id_txt2); - PrintBioSourceLine (fp, sdp->data.ptrvalue, field_list); - fprintf (fp, "\n"); - } -} - - -static void PrintBioseqErrorLine (FILE *fp, SeqIdPtr sip) -{ - Char id_txt[255]; - - if (fp == NULL || sip == NULL) { - return; - } - - SeqIdWrite (sip, id_txt, PRINTID_REPORT, sizeof (id_txt) - 1); - - if (sip->choice == SEQID_GI) { - fprintf (fp, "\t%s\n", id_txt); - } else { - fprintf (fp, "%s\t\n", id_txt); - } -} - - -static Boolean IsAllDigits (CharPtr str) -{ - CharPtr cp; - - if (StringHasNoText (str)) return FALSE; - - cp = str; - while (*cp != 0 && isdigit (*cp)) { - cp++; - } - if (*cp == 0) { - return TRUE; - } else { - return FALSE; - } -} - - -static SeqIdPtr SmartGuessMakeId (CharPtr str) -{ - CharPtr id_txt; - SeqIdPtr sip = NULL; - - if (StringHasNoText (str)) { - return NULL; - } else if (StringChr (str, '|') != NULL) { - sip = MakeSeqID (str); - } else if (IsAllDigits (str)) { - id_txt = (CharPtr) MemNew (sizeof (Char) * (StringLen (str) + 4)); - sprintf (id_txt, "gi|%s", str); - sip = MakeSeqID (id_txt); - id_txt = MemFree (id_txt); - } else { - id_txt = (CharPtr) MemNew (sizeof (Char) * (StringLen (str) + 4)); - sprintf (id_txt, "gb|%s", str); - sip = MakeSeqID (id_txt); - id_txt = MemFree (id_txt); - } - return sip; -} - - -/* Args structure contains command-line arguments */ - -#define i_argInputFile 0 -#define o_argOutputFile 1 - -Args myargs [] = { - {"Input File", NULL, NULL, NULL, - TRUE, 'i', ARG_FILE_IN, 0.0, 0, NULL}, - {"Output File", NULL, NULL, NULL, - TRUE, 'o', ARG_FILE_OUT, 0.0, 0, NULL} -}; - - -static void SortFieldListForSrcChk (ValNodePtr PNTR field_list) -{ - ValNodePtr vnp, vnp_s, vnp_prev = NULL; - - if (field_list == NULL || *field_list == NULL) return; - - SortUniqueFieldTypeList (field_list); - - /* move taxname to front of list */ - for (vnp = *field_list; vnp != NULL; vnp_prev = vnp, vnp = vnp->next) { - if (vnp->choice == FieldType_source_qual) { - vnp_s = vnp->data.ptrvalue; - if (vnp_s != NULL - && vnp_s->choice == SourceQualChoice_textqual - && vnp_s->data.intvalue == Source_qual_taxname) { - /* only need to move if not already at front of list */ - if (vnp_prev != NULL) { - vnp_prev->next = vnp->next; - vnp->next = *field_list; - *field_list = vnp; - } - break; - } - } - } - - -} - - -Int2 Main(void) -{ - Char app [64]; - Int4 rval = 0; - CharPtr id_file, line; - ReadBufferData rbd; - ValNodePtr field_list = NULL; - SeqIdPtr sip; - ValNodePtr bsp_list = NULL, vnp; - BioseqPtr bsp; - FILE *fp; - - - /* standard setup */ - - ErrSetFatalLevel (SEV_MAX); - ErrClearOptFlags (EO_SHOW_USERSTR); - UseLocalAsnloadDataAndErrMsg (); - ErrPathReset (); - - /* finish resolving internal connections in ASN.1 parse tables */ - - if (! AllObjLoad ()) { - Message (MSG_FATAL, "AllObjLoad failed"); - return 1; - } - if (! SubmitAsnLoad ()) { - Message (MSG_FATAL, "SubmitAsnLoad failed"); - return 1; - } - if (! FeatDefSetLoad ()) { - Message (MSG_FATAL, "FeatDefSetLoad failed"); - return 1; - } - if (! SeqCodeSetLoad ()) { - Message (MSG_FATAL, "SeqCodeSetLoad failed"); - return 1; - } - if (! GeneticCodeTableLoad ()) { - Message (MSG_FATAL, "GeneticCodeTableLoad failed"); - return 1; - } - - PubSeqFetchEnable (); - - /* process command line arguments */ - - sprintf (app, "src_chk %s", SRC_CHK_APPLICATION); - if (! GetArgs (app, sizeof (myargs) / sizeof (Args), myargs)) { - return 0; - } - - id_file = (CharPtr) myargs [i_argInputFile].strvalue; - - rbd.fp = FileOpen (id_file, "r"); - if (rbd.fp == NULL) { - Message (MSG_ERROR, "Unable to open %s", (CharPtr) myargs [i_argInputFile].strvalue); - return 1; - } - rbd.current_data = NULL; - line = AbstractReadFunction (&rbd); - while (line != NULL && line[0] != EOF) { - if (!StringHasNoText (line)) { - - sip = SmartGuessMakeId (line); - bsp = BioseqLockById (sip); - if (bsp == NULL) { - printf ("Unable to download Bioseq for %s\n", line); - } else { - ValNodeLink (&field_list, CollectFieldList (bsp)); - BioseqUnlock (bsp); - } - ValNodeAddPointer (&bsp_list, 0, sip); - } - line = MemFree (line); - line = AbstractReadFunction (&rbd); - } - - FileClose (rbd.fp); - - SortFieldListForSrcChk (&field_list); - - fp = FileOpen ((CharPtr) myargs [o_argOutputFile].strvalue, "w"); - if (fp == NULL) { - Message (MSG_ERROR, "Unable to open %s", (CharPtr) myargs [o_argOutputFile].strvalue); - rval = 1; - } else { - PrintHeader (fp, field_list); - for (vnp = bsp_list; vnp != NULL; vnp = vnp->next) { - bsp = BioseqLockById (vnp->data.ptrvalue); - if (bsp == NULL) { - PrintBioseqErrorLine (fp, vnp->data.ptrvalue); - } else { - PrintBioseqLines (fp, bsp, field_list); - } - BioseqUnlock (bsp); - vnp->data.ptrvalue = SeqIdFree (vnp->data.ptrvalue); - } - } - FileClose (fp); - bsp_list = ValNodeFree (bsp_list); - field_list = FieldTypeListFree (field_list); - return rval; -} +/* src_chk.c
+* ===========================================================================
+*
+* PUBLIC DOMAIN NOTICE
+* National Center for Biotechnology Information (NCBI)
+*
+* This software/database is a "United States Government Work" under the
+* terms of the United States Copyright Act. It was written as part of
+* the author's official duties as a United States Government employee and
+* thus cannot be copyrighted. This software/database is freely available
+* to the public for use. The National Library of Medicine and the U.S.
+* Government do not place any restriction on its use or reproduction.
+* We would, however, appreciate having the NCBI and the author cited in
+* any work or product based on this material
+*
+* Although all reasonable efforts have been taken to ensure the accuracy
+* and reliability of the software and data, the NLM and the U.S.
+* Government do not and cannot warrant the performance or results that
+* may be obtained by using this software or data. The NLM and the U.S.
+* Government disclaim all warranties, express or implied, including
+* warranties of performance, merchantability or fitness for any particular
+* purpose.
+*
+* ===========================================================================
+*
+* File Name: src_chk.c
+*
+* Author: Colleen Bollin
+*
+* Version Creation Date: 4/12/07
+*
+* $Revision: 1.11 $
+*
+* File Description:
+*
+* Modifications:
+* --------------------------------------------------------------------------
+* Date Name Description of modification
+* ------- ---------- -----------------------------------------------------
+*
+*
+* ==========================================================================
+*/
+
+#include <ncbi.h>
+#include <objall.h>
+#include <objsset.h>
+#include <objsub.h>
+#include <objfdef.h>
+#include <sequtil.h>
+#include <gather.h>
+#include <sqnutils.h>
+#include <explore.h>
+#include <pmfapi.h>
+#define NLM_GENERATED_CODE_PROTO
+#include <asnmacro.h>
+#include <objmacro.h>
+#include <macroapi.h>
+#ifdef INTERNAL_NCBI_SRC_CHK
+#include <accpubseq.h>
+#endif
+
+#define SRC_CHK_APP_VER "1.0"
+
+CharPtr SRC_CHK_APPLICATION = SRC_CHK_APP_VER;
+
+#ifdef INTERNAL_NCBI_SRC_CHK
+static CharPtr dirsubfetchproc = "DirSubBioseqFetch";
+
+static CharPtr dirsubfetchcmd = NULL;
+
+extern Pointer ReadFromDirSub (CharPtr accn, Uint2Ptr datatype, Uint2Ptr entityID);
+extern Pointer ReadFromDirSub (CharPtr accn, Uint2Ptr datatype, Uint2Ptr entityID)
+
+{
+ Char cmmd [256];
+ Pointer dataptr;
+ FILE* fp;
+ Char path [PATH_MAX];
+
+ if (datatype != NULL) {
+ *datatype = 0;
+ }
+ if (entityID != NULL) {
+ *entityID = 0;
+ }
+ if (StringHasNoText (accn)) return NULL;
+
+ if (dirsubfetchcmd == NULL) {
+ if (GetAppParam ("SEQUIN", "DIRSUB", "FETCHSCRIPT", NULL, cmmd, sizeof (cmmd))) {
+ dirsubfetchcmd = StringSaveNoNull (cmmd);
+ }
+ }
+ if (dirsubfetchcmd == NULL) return NULL;
+
+ TmpNam (path);
+
+#ifdef OS_UNIX
+ sprintf (cmmd, "csh %s %s > %s", dirsubfetchcmd, accn, path);
+ system (cmmd);
+#endif
+#ifdef OS_MSWIN
+ sprintf (cmmd, "%s %s -o %s", dirsubfetchcmd, accn, path);
+ system (cmmd);
+#endif
+
+ fp = FileOpen (path, "r");
+ if (fp == NULL) {
+ FileRemove (path);
+ return NULL;
+ }
+ dataptr = ReadAsnFastaOrFlatFile (fp, datatype, entityID, FALSE, FALSE, TRUE, FALSE);
+ FileClose (fp);
+ FileRemove (path);
+ return dataptr;
+}
+
+
+static Int2 LIBCALLBACK DirSubBioseqFetchFunc (Pointer data)
+
+{
+ BioseqPtr bsp;
+ Char cmmd [256];
+ Pointer dataptr;
+ Uint2 datatype;
+ Uint2 entityID;
+ FILE* fp;
+ OMProcControlPtr ompcp;
+ ObjMgrProcPtr ompp;
+ Char path [PATH_MAX];
+ SeqEntryPtr sep = NULL;
+ SeqIdPtr sip;
+ TextSeqIdPtr tsip;
+
+ ompcp = (OMProcControlPtr) data;
+ if (ompcp == NULL) return OM_MSG_RET_ERROR;
+ ompp = ompcp->proc;
+ if (ompp == NULL) return OM_MSG_RET_ERROR;
+ sip = (SeqIdPtr) ompcp->input_data;
+ if (sip == NULL) return OM_MSG_RET_ERROR;
+
+ if (sip->choice != SEQID_GENBANK) return OM_MSG_RET_ERROR;
+ tsip = (TextSeqIdPtr) sip->data.ptrvalue;
+ if (tsip == NULL || StringHasNoText (tsip->accession)) return OM_MSG_RET_ERROR;
+
+ if (dirsubfetchcmd == NULL) {
+ if (GetAppParam ("SEQUIN", "DIRSUB", "FETCHSCRIPT", NULL, cmmd, sizeof (cmmd))) {
+ dirsubfetchcmd = StringSaveNoNull (cmmd);
+ }
+ }
+ if (dirsubfetchcmd == NULL) return OM_MSG_RET_ERROR;
+
+ TmpNam (path);
+
+#ifdef OS_UNIX
+ sprintf (cmmd, "csh %s %s > %s", dirsubfetchcmd, tsip->accession, path);
+ system (cmmd);
+#endif
+#ifdef OS_MSWIN
+ sprintf (cmmd, "%s %s -o %s", dirsubfetchcmd, tsip->accession, path);
+ system (cmmd);
+#endif
+
+ fp = FileOpen (path, "r");
+ if (fp == NULL) {
+ FileRemove (path);
+ return OM_MSG_RET_ERROR;
+ }
+ dataptr = ReadAsnFastaOrFlatFile (fp, &datatype, &entityID, FALSE, FALSE, TRUE, FALSE);
+ FileClose (fp);
+ FileRemove (path);
+
+ if (dataptr == NULL) return OM_MSG_RET_OK;
+
+ sep = GetTopSeqEntryForEntityID (entityID);
+ if (sep == NULL) return OM_MSG_RET_ERROR;
+ bsp = BioseqFindInSeqEntry (sip, sep);
+ ompcp->output_data = (Pointer) bsp;
+ ompcp->output_entityID = ObjMgrGetEntityIDForChoice (sep);
+ return OM_MSG_RET_DONE;
+}
+
+static Boolean DirSubFetchEnable (void)
+
+{
+ ObjMgrProcLoad (OMPROC_FETCH, dirsubfetchproc, dirsubfetchproc,
+ OBJ_SEQID, 0, OBJ_BIOSEQ, 0, NULL,
+ DirSubBioseqFetchFunc, PROC_PRIORITY_DEFAULT);
+ return TRUE;
+}
+
+static CharPtr smartfetchproc = "SmartBioseqFetch";
+
+static CharPtr smartfetchcmd = NULL;
+
+extern Pointer ReadFromSmart (CharPtr accn, Uint2Ptr datatype, Uint2Ptr entityID);
+extern Pointer ReadFromSmart (CharPtr accn, Uint2Ptr datatype, Uint2Ptr entityID)
+
+{
+ Char cmmd [256];
+ Pointer dataptr;
+ FILE* fp;
+ Char path [PATH_MAX];
+
+ if (datatype != NULL) {
+ *datatype = 0;
+ }
+ if (entityID != NULL) {
+ *entityID = 0;
+ }
+ if (StringHasNoText (accn)) return NULL;
+
+ if (smartfetchcmd == NULL) {
+ if (GetAppParam ("SEQUIN", "SMART", "FETCHSCRIPT", NULL, cmmd, sizeof (cmmd))) {
+ smartfetchcmd = StringSaveNoNull (cmmd);
+ }
+ }
+ if (smartfetchcmd == NULL) return NULL;
+
+ TmpNam (path);
+
+#ifdef OS_UNIX
+ sprintf (cmmd, "csh %s %s > %s", smartfetchcmd, accn, path);
+ system (cmmd);
+#endif
+#ifdef OS_MSWIN
+ sprintf (cmmd, "%s %s -o %s", smartfetchcmd, accn, path);
+ system (cmmd);
+#endif
+
+ fp = FileOpen (path, "r");
+ if (fp == NULL) {
+ FileRemove (path);
+ return NULL;
+ }
+ dataptr = ReadAsnFastaOrFlatFile (fp, datatype, entityID, FALSE, FALSE, TRUE, FALSE);
+ FileClose (fp);
+ FileRemove (path);
+ return dataptr;
+}
+
+
+static Int2 LIBCALLBACK SmartBioseqFetchFunc (Pointer data)
+
+{
+ BioseqPtr bsp;
+ Char cmmd [256];
+ Pointer dataptr;
+ Uint2 datatype;
+ Uint2 entityID;
+ FILE* fp;
+ OMProcControlPtr ompcp;
+ ObjMgrProcPtr ompp;
+ Char path [PATH_MAX];
+ SeqEntryPtr sep = NULL;
+ SeqIdPtr sip;
+ TextSeqIdPtr tsip;
+
+ ompcp = (OMProcControlPtr) data;
+ if (ompcp == NULL) return OM_MSG_RET_ERROR;
+ ompp = ompcp->proc;
+ if (ompp == NULL) return OM_MSG_RET_ERROR;
+ sip = (SeqIdPtr) ompcp->input_data;
+ if (sip == NULL) return OM_MSG_RET_ERROR;
+
+ if (sip->choice != SEQID_GENBANK) return OM_MSG_RET_ERROR;
+ tsip = (TextSeqIdPtr) sip->data.ptrvalue;
+ if (tsip == NULL || StringHasNoText (tsip->accession)) return OM_MSG_RET_ERROR;
+
+ if (smartfetchcmd == NULL) {
+ if (GetAppParam ("SEQUIN", "SMART", "FETCHSCRIPT", NULL, cmmd, sizeof (cmmd))) {
+ smartfetchcmd = StringSaveNoNull (cmmd);
+ }
+ }
+ if (smartfetchcmd == NULL) return OM_MSG_RET_ERROR;
+
+ TmpNam (path);
+
+#ifdef OS_UNIX
+ sprintf (cmmd, "csh %s %s > %s", smartfetchcmd, tsip->accession, path);
+ system (cmmd);
+#endif
+#ifdef OS_MSWIN
+ sprintf (cmmd, "%s %s -o %s", smartfetchcmd, tsip->accession, path);
+ system (cmmd);
+#endif
+
+ fp = FileOpen (path, "r");
+ if (fp == NULL) {
+ FileRemove (path);
+ return OM_MSG_RET_ERROR;
+ }
+ dataptr = ReadAsnFastaOrFlatFile (fp, &datatype, &entityID, FALSE, FALSE, TRUE, FALSE);
+ FileClose (fp);
+ FileRemove (path);
+
+ if (dataptr == NULL) return OM_MSG_RET_OK;
+
+ sep = GetTopSeqEntryForEntityID (entityID);
+ if (sep == NULL) return OM_MSG_RET_ERROR;
+ bsp = BioseqFindInSeqEntry (sip, sep);
+ ompcp->output_data = (Pointer) bsp;
+ ompcp->output_entityID = ObjMgrGetEntityIDForChoice (sep);
+ return OM_MSG_RET_DONE;
+}
+
+static Boolean SmartFetchEnable (void)
+
+{
+ ObjMgrProcLoad (OMPROC_FETCH, smartfetchproc, smartfetchproc,
+ OBJ_SEQID, 0, OBJ_BIOSEQ, 0, NULL,
+ SmartBioseqFetchFunc, PROC_PRIORITY_DEFAULT);
+ return TRUE;
+}
+
+static CharPtr tpasmartfetchproc = "TPASmartBioseqFetch";
+
+static CharPtr tpasmartfetchcmd = NULL;
+
+extern Pointer ReadFromTPASmart (CharPtr accn, Uint2Ptr datatype, Uint2Ptr entityID);
+extern Pointer ReadFromTPASmart (CharPtr accn, Uint2Ptr datatype, Uint2Ptr entityID)
+
+{
+ Char cmmd [256];
+ Pointer dataptr;
+ FILE* fp;
+ Char path [PATH_MAX];
+
+ if (datatype != NULL) {
+ *datatype = 0;
+ }
+ if (entityID != NULL) {
+ *entityID = 0;
+ }
+ if (StringHasNoText (accn)) return NULL;
+
+ if (tpasmartfetchcmd == NULL) {
+ if (GetAppParam ("SEQUIN", "TPASMART", "FETCHSCRIPT", NULL, cmmd, sizeof (cmmd))) {
+ tpasmartfetchcmd = StringSaveNoNull (cmmd);
+ }
+ }
+ if (tpasmartfetchcmd == NULL) return NULL;
+
+ TmpNam (path);
+
+#ifdef OS_UNIX
+ sprintf (cmmd, "csh %s %s > %s", tpasmartfetchcmd, accn, path);
+ system (cmmd);
+#endif
+#ifdef OS_MSWIN
+ sprintf (cmmd, "%s %s -o %s", tpasmartfetchcmd, accn, path);
+ system (cmmd);
+#endif
+
+ fp = FileOpen (path, "r");
+ if (fp == NULL) {
+ FileRemove (path);
+ return NULL;
+ }
+ dataptr = ReadAsnFastaOrFlatFile (fp, datatype, entityID, FALSE, FALSE, TRUE, FALSE);
+ FileClose (fp);
+ FileRemove (path);
+ return dataptr;
+}
+
+
+static Int2 LIBCALLBACK TPASmartBioseqFetchFunc (Pointer data)
+
+{
+ BioseqPtr bsp;
+ Char cmmd [256];
+ Pointer dataptr;
+ Uint2 datatype;
+ Uint2 entityID;
+ FILE* fp;
+ OMProcControlPtr ompcp;
+ ObjMgrProcPtr ompp;
+ Char path [PATH_MAX];
+ SeqEntryPtr sep = NULL;
+ SeqIdPtr sip;
+ TextSeqIdPtr tsip;
+
+ ompcp = (OMProcControlPtr) data;
+ if (ompcp == NULL) return OM_MSG_RET_ERROR;
+ ompp = ompcp->proc;
+ if (ompp == NULL) return OM_MSG_RET_ERROR;
+ sip = (SeqIdPtr) ompcp->input_data;
+ if (sip == NULL) return OM_MSG_RET_ERROR;
+
+ if (sip->choice != SEQID_TPG) return OM_MSG_RET_ERROR;
+ tsip = (TextSeqIdPtr) sip->data.ptrvalue;
+ if (tsip == NULL || StringHasNoText (tsip->accession)) return OM_MSG_RET_ERROR;
+
+ if (tpasmartfetchcmd == NULL) {
+ if (GetAppParam ("SEQUIN", "TPASMART", "FETCHSCRIPT", NULL, cmmd, sizeof (cmmd))) {
+ tpasmartfetchcmd = StringSaveNoNull (cmmd);
+ }
+ }
+ if (tpasmartfetchcmd == NULL) return OM_MSG_RET_ERROR;
+
+ TmpNam (path);
+
+#ifdef OS_UNIX
+ sprintf (cmmd, "csh %s %s > %s", tpasmartfetchcmd, tsip->accession, path);
+ system (cmmd);
+#endif
+#ifdef OS_MSWIN
+ sprintf (cmmd, "%s %s -o %s", tpasmartfetchcmd, tsip->accession, path);
+ system (cmmd);
+#endif
+
+ fp = FileOpen (path, "r");
+ if (fp == NULL) {
+ FileRemove (path);
+ return OM_MSG_RET_ERROR;
+ }
+ dataptr = ReadAsnFastaOrFlatFile (fp, &datatype, &entityID, FALSE, FALSE, TRUE, FALSE);
+ FileClose (fp);
+ FileRemove (path);
+
+ if (dataptr == NULL) return OM_MSG_RET_OK;
+
+ sep = GetTopSeqEntryForEntityID (entityID);
+ if (sep == NULL) return OM_MSG_RET_ERROR;
+ bsp = BioseqFindInSeqEntry (sip, sep);
+ ompcp->output_data = (Pointer) bsp;
+ ompcp->output_entityID = ObjMgrGetEntityIDForChoice (sep);
+ return OM_MSG_RET_DONE;
+}
+
+static Boolean TPASmartFetchEnable (void)
+
+{
+ ObjMgrProcLoad (OMPROC_FETCH, tpasmartfetchproc, tpasmartfetchproc,
+ OBJ_SEQID, 0, OBJ_BIOSEQ, 0, NULL,
+ TPASmartBioseqFetchFunc, PROC_PRIORITY_DEFAULT);
+ return TRUE;
+}
+#endif
+
+
+static ValNodePtr CollectFieldList(BioseqPtr bsp)
+{
+ BioSourcePtr biop;
+ SeqDescrPtr sdp;
+ SeqMgrDescContext dcontext;
+ ValNodePtr list = NULL, vnp;
+
+ for (sdp = SeqMgrGetNextDescriptor (bsp, NULL, Seq_descr_source, &dcontext);
+ sdp != NULL;
+ sdp = SeqMgrGetNextDescriptor (bsp, sdp, Seq_descr_source, &dcontext)) {
+ biop = (BioSourcePtr) sdp->data.ptrvalue;
+ vnp = GetSourceQualFieldListFromBioSource (biop);
+ ValNodeLink (&list, vnp);
+ }
+ return list;
+}
+
+
+static void PrintHeader (FILE *fp, ValNodePtr field_list)
+{
+ CharPtr txt;
+
+ if (fp == NULL || field_list == NULL) {
+ return;
+ }
+ /* first field accession, second field GI, third field tax ID */
+ fprintf (fp, "\t\tTaxID");
+ while (field_list != NULL) {
+ txt = SummarizeFieldType (field_list);
+ fprintf (fp, "\t%s", txt);
+ txt = MemFree (txt);
+ field_list = field_list->next;
+ }
+ fprintf (fp, "\n");
+}
+
+
+static Int4 GetTaxIdFromOrgRef (OrgRefPtr orp)
+{
+ Int4 tax_id = -1;
+ ValNodePtr vnp;
+ DbtagPtr d;
+
+ if (orp != NULL)
+ {
+ for (vnp = orp->db; vnp != NULL; vnp = vnp->next)
+ {
+ d = (DbtagPtr) vnp->data.ptrvalue;
+ if (StringCmp(d->db, "taxon") == 0)
+ {
+ tax_id = d->tag->id;
+ break;
+ }
+ }
+ }
+ return tax_id;
+}
+
+
+static void PrintBioSourceLine (FILE *fp, BioSourcePtr biop, ValNodePtr field_list)
+{
+ CharPtr txt;
+
+ if (fp == NULL || biop == NULL || field_list == NULL) {
+ return;
+ }
+
+ fprintf (fp, "\t%d", GetTaxIdFromOrgRef(biop->org));
+
+ while (field_list != NULL) {
+ txt = GetSourceQualFromBioSource (biop, field_list->data.ptrvalue, NULL);
+ fprintf (fp, "\t%s", txt == NULL ? "" : txt);
+ txt = MemFree (txt);
+ field_list = field_list->next;
+ }
+}
+
+
+static void PrintBioseqLines (FILE *fp, BioseqPtr bsp, ValNodePtr field_list)
+{
+ SeqDescrPtr sdp;
+ SeqMgrDescContext dcontext;
+ Char id_txt[255], id_txt2[255];
+ SeqIdPtr sip, sip_gi = NULL, sip_gb = NULL;
+
+ if (fp == NULL || bsp == NULL || field_list == NULL) {
+ return;
+ }
+
+ for (sip = bsp->id; sip != NULL; sip = sip->next) {
+ if (sip->choice == SEQID_GENBANK
+ || (sip->choice == SEQID_EMBL && sip_gb == NULL)
+ || (sip->choice == SEQID_SWISSPROT && sip_gb == NULL)
+ || (sip->choice == SEQID_DDBJ && sip_gb == NULL)
+ || (sip->choice == SEQID_PIR && sip_gb == NULL)) {
+ sip_gb = sip;
+ } else if (sip->choice == SEQID_GI) {
+ sip_gi = sip;
+ }
+ }
+
+ if (sip_gb == NULL && sip_gi == NULL) {
+ SeqIdWrite (SeqIdFindBest (bsp->id, SEQID_GENBANK), id_txt, PRINTID_REPORT, sizeof (id_txt) - 1);
+ id_txt2[0] = 0;
+ } else {
+ if (sip_gb == NULL) {
+ id_txt[0] = 0;
+ } else {
+ SeqIdWrite (sip_gb, id_txt, PRINTID_REPORT, sizeof (id_txt) - 1);
+ }
+ if (sip_gi == NULL) {
+ id_txt2[0] = 0;
+ } else {
+ SeqIdWrite (sip_gi, id_txt2, PRINTID_REPORT, sizeof (id_txt2) - 1);
+ }
+ }
+
+ for (sdp = SeqMgrGetNextDescriptor (bsp, NULL, Seq_descr_source, &dcontext);
+ sdp != NULL;
+ sdp = SeqMgrGetNextDescriptor (bsp, sdp, Seq_descr_source, &dcontext)) {
+ fprintf (fp, "%s\t%s", id_txt, id_txt2);
+ PrintBioSourceLine (fp, sdp->data.ptrvalue, field_list);
+ fprintf (fp, "\n");
+ }
+}
+
+
+static void PrintBioseqErrorLine (FILE *fp, SeqIdPtr sip)
+{
+ Char id_txt[255];
+
+ if (fp == NULL || sip == NULL) {
+ return;
+ }
+
+ SeqIdWrite (sip, id_txt, PRINTID_REPORT, sizeof (id_txt) - 1);
+
+ if (sip->choice == SEQID_GI) {
+ fprintf (fp, "\t%s\n", id_txt);
+ } else {
+ fprintf (fp, "%s\t\n", id_txt);
+ }
+}
+
+
+static Boolean IsAllDigits (CharPtr str)
+{
+ CharPtr cp;
+
+ if (StringHasNoText (str)) return FALSE;
+
+ cp = str;
+ while (*cp != 0 && isdigit (*cp)) {
+ cp++;
+ }
+ if (*cp == 0) {
+ return TRUE;
+ } else {
+ return FALSE;
+ }
+}
+
+
+static SeqIdPtr SmartGuessMakeId (CharPtr str)
+{
+ CharPtr id_txt;
+ SeqIdPtr sip = NULL;
+
+ if (StringHasNoText (str)) {
+ return NULL;
+ } else if (StringChr (str, '|') != NULL) {
+ sip = MakeSeqID (str);
+ } else if (IsAllDigits (str)) {
+ id_txt = (CharPtr) MemNew (sizeof (Char) * (StringLen (str) + 4));
+ sprintf (id_txt, "gi|%s", str);
+ sip = MakeSeqID (id_txt);
+ id_txt = MemFree (id_txt);
+ } else {
+ id_txt = (CharPtr) MemNew (sizeof (Char) * (StringLen (str) + 4));
+ sprintf (id_txt, "gb|%s", str);
+ sip = MakeSeqID (id_txt);
+ id_txt = MemFree (id_txt);
+ }
+ return sip;
+}
+
+
+/* Args structure contains command-line arguments */
+
+#define i_argInputFile 0
+#define o_argOutputFile 1
+
+Args myargs [] = {
+ {"Input File", NULL, NULL, NULL,
+ TRUE, 'i', ARG_FILE_IN, 0.0, 0, NULL},
+ {"Output File", NULL, NULL, NULL,
+ TRUE, 'o', ARG_FILE_OUT, 0.0, 0, NULL}
+};
+
+
+static void SortFieldListForSrcChk (ValNodePtr PNTR field_list)
+{
+ ValNodePtr vnp, vnp_s, vnp_prev = NULL;
+
+ if (field_list == NULL || *field_list == NULL) return;
+
+ SortUniqueFieldTypeList (field_list);
+
+ /* move taxname to front of list */
+ for (vnp = *field_list; vnp != NULL; vnp_prev = vnp, vnp = vnp->next) {
+ if (vnp->choice == FieldType_source_qual) {
+ vnp_s = vnp->data.ptrvalue;
+ if (vnp_s != NULL
+ && vnp_s->choice == SourceQualChoice_textqual
+ && vnp_s->data.intvalue == Source_qual_taxname) {
+ /* only need to move if not already at front of list */
+ if (vnp_prev != NULL) {
+ vnp_prev->next = vnp->next;
+ vnp->next = *field_list;
+ *field_list = vnp;
+ }
+ break;
+ }
+ }
+ }
+
+
+}
+
+
+Int2 Main(void)
+{
+ Char app [64];
+ Int4 rval = 0;
+ CharPtr id_file, line;
+ ReadBufferData rbd;
+ ValNodePtr field_list = NULL;
+ SeqIdPtr sip;
+ ValNodePtr bsp_list = NULL, vnp;
+ BioseqPtr bsp;
+ FILE *fp;
+
+
+ /* standard setup */
+
+ ErrSetFatalLevel (SEV_MAX);
+ ErrClearOptFlags (EO_SHOW_USERSTR);
+ UseLocalAsnloadDataAndErrMsg ();
+ ErrPathReset ();
+
+ /* finish resolving internal connections in ASN.1 parse tables */
+
+ if (! AllObjLoad ()) {
+ Message (MSG_FATAL, "AllObjLoad failed");
+ return 1;
+ }
+ if (! SubmitAsnLoad ()) {
+ Message (MSG_FATAL, "SubmitAsnLoad failed");
+ return 1;
+ }
+ if (! FeatDefSetLoad ()) {
+ Message (MSG_FATAL, "FeatDefSetLoad failed");
+ return 1;
+ }
+ if (! SeqCodeSetLoad ()) {
+ Message (MSG_FATAL, "SeqCodeSetLoad failed");
+ return 1;
+ }
+ if (! GeneticCodeTableLoad ()) {
+ Message (MSG_FATAL, "GeneticCodeTableLoad failed");
+ return 1;
+ }
+
+#ifdef INTERNAL_NCBI_SRC_CHK
+ DirSubFetchEnable ();
+ SmartFetchEnable ();
+ TPASmartFetchEnable ();
+
+ if (! PUBSEQBioseqFetchEnable ("src_chk", FALSE)) {
+ Message (MSG_POSTERR, "PUBSEQBioseqFetchEnable failed");
+ return 1;
+ }
+#else
+ PubSeqFetchEnable ();
+#endif
+
+ /* process command line arguments */
+
+ sprintf (app, "src_chk %s", SRC_CHK_APPLICATION);
+ if (! GetArgs (app, sizeof (myargs) / sizeof (Args), myargs)) {
+ return 0;
+ }
+
+ id_file = (CharPtr) myargs [i_argInputFile].strvalue;
+
+ rbd.fp = FileOpen (id_file, "r");
+ if (rbd.fp == NULL) {
+ Message (MSG_ERROR, "Unable to open %s", (CharPtr) myargs [i_argInputFile].strvalue);
+ return 1;
+ }
+ rbd.current_data = NULL;
+ line = AbstractReadFunction (&rbd);
+ while (line != NULL && line[0] != EOF) {
+ if (!StringHasNoText (line)) {
+
+ sip = SmartGuessMakeId (line);
+ bsp = BioseqLockById (sip);
+ if (bsp == NULL) {
+ printf ("Unable to download Bioseq for %s\n", line);
+ } else {
+ ValNodeLink (&field_list, CollectFieldList (bsp));
+ BioseqUnlock (bsp);
+ }
+ ValNodeAddPointer (&bsp_list, 0, sip);
+ }
+ line = MemFree (line);
+ line = AbstractReadFunction (&rbd);
+ }
+
+ FileClose (rbd.fp);
+
+ SortFieldListForSrcChk (&field_list);
+
+ fp = FileOpen ((CharPtr) myargs [o_argOutputFile].strvalue, "w");
+ if (fp == NULL) {
+ Message (MSG_ERROR, "Unable to open %s", (CharPtr) myargs [o_argOutputFile].strvalue);
+ rval = 1;
+ } else {
+ PrintHeader (fp, field_list);
+ for (vnp = bsp_list; vnp != NULL; vnp = vnp->next) {
+ bsp = BioseqLockById (vnp->data.ptrvalue);
+ if (bsp == NULL) {
+ PrintBioseqErrorLine (fp, vnp->data.ptrvalue);
+ } else {
+ PrintBioseqLines (fp, bsp, field_list);
+ }
+ BioseqUnlock (bsp);
+ vnp->data.ptrvalue = SeqIdFree (vnp->data.ptrvalue);
+ }
+ }
+ FileClose (fp);
+ bsp_list = ValNodeFree (bsp_list);
+ field_list = FieldTypeListFree (field_list);
+
+ return rval;
+}
diff --git a/demo/tbl2asn.c b/demo/tbl2asn.c index a70afa46..ceae97b3 100644 --- a/demo/tbl2asn.c +++ b/demo/tbl2asn.c @@ -29,7 +29,7 @@ * * Version Creation Date: 5/5/00 * -* $Revision: 6.295 $ +* $Revision: 6.297 $ * * File Description: * @@ -73,7 +73,7 @@ static char *date_of_compilation = __DATE__; #include <objmacro.h> #include <macroapi.h> -#define TBL2ASN_APP_VER "14.2" +#define TBL2ASN_APP_VER "14.3" CharPtr TBL2ASN_APPLICATION = TBL2ASN_APP_VER; @@ -6175,7 +6175,8 @@ static void ProcessOneRecord ( { AsnTypePtr atp_bssse; BioSourcePtr biop; - BioseqPtr bsp; + BioseqPtr bsp, feat_bsp; + Boolean already_converted_ids = FALSE; BioseqSetPtr bssp = NULL; Char buf [256]; SeqMgrFeatContext context; @@ -6338,6 +6339,15 @@ static void ProcessOneRecord ( if (datatype == OBJ_SEQANNOT) { sap = (SeqAnnotPtr) dataptr; + + if (!StringHasNoText (tbl->center) && !already_converted_ids) { + feat_bsp = GetBioseqReferencedByAnnot (sap, entityID); + if (feat_bsp == NULL) { + VisitBioseqsInSep (sep, tbl->center, MakeGenomeCenterID); + already_converted_ids = TRUE; + } + } + ProcessOneAnnot (sap, entityID, tbl); } else { @@ -6681,7 +6691,7 @@ static void ProcessOneRecord ( } } - if (StringDoesHaveText (tbl->center)) { + if (StringDoesHaveText (tbl->center) && !already_converted_ids) { VisitBioseqsInSep (sep, tbl->center, MakeGenomeCenterID); } diff --git a/desktop/pubdesc.c b/desktop/pubdesc.c index 65b7e5e5..a606528b 100644 --- a/desktop/pubdesc.c +++ b/desktop/pubdesc.c @@ -29,7 +29,7 @@ * * Version Creation Date: 7/28/95 * -* $Revision: 6.70 $ +* $Revision: 6.71 $ * * File Description: * @@ -554,6 +554,9 @@ static CitBookPtr PutATProc (PubdescPagePtr ppp) { vnp->choice = 1; vnp->data.ptrvalue = SaveStringFromTextAndStripNewlines (ppp->xa_info); + if (vnp->data.ptrvalue == NULL) { + vnp->data.ptrvalue = StringSave ("?"); + } } vnp = ValNodeNew (vnphead); if (vnp != NULL) diff --git a/doc/man/Psequin.1 b/doc/man/Psequin.1 index 79eafce7..7957e049 100644 --- a/doc/man/Psequin.1 +++ b/doc/man/Psequin.1 @@ -3,20 +3,20 @@ Psequin \- submit sequences to Genbank, EMBL, and DDBJ .SH SYNOPSIS .B Psequin -[\|\fB-b\fP\|] -[\|\fB-bse\fP\|] -[\|\fB-e\fP\|] -[\|\fB-f\fP\ \fIfilename\fP\|] -[\|\fB-gc\fP\|] -[\|\fB-h\fP\|] -[\|\fB-oldaln\fP\|] -[\|\fB-oldasn\fP\|] -[\|\fB-oldgph\fP\|] -[\|\fB-oldseq\fP\|] -[\|\fB-oldsource\fP\|] -[\|\fB-s\fP\|] -[\|\fB-w\fP\|] -[\|\fB-x\fP\|] +[\|\fB\-b\fP\|] +[\|\fB\-bse\fP\|] +[\|\fB\-e\fP\|] +[\|\fB\-f\fP\ \fIfilename\fP\|] +[\|\fB\-gc\fP\|] +[\|\fB\-h\fP\|] +[\|\fB\-oldaln\fP\|] +[\|\fB\-oldasn\fP\|] +[\|\fB\-oldgph\fP\|] +[\|\fB\-oldseq\fP\|] +[\|\fB\-oldsource\fP\|] +[\|\fB\-s\fP\|] +[\|\fB\-w\fP\|] +[\|\fB\-x\fP\|] .SH DESCRIPTION \fBPsequin\fP is a program designed to aid in the submission of sequences to the GenBank, EMBL, and DDBJ sequence databases. It was @@ -46,46 +46,46 @@ is edited. It can display features on the sequence during editing, and allows feature intervals to be adjusted by direct manipulation. .SH OPTIONS .TP -\fB-b\fP +\fB\-b\fP Bioseq-set mode .TP -\fB-bse\fP +\fB\-bse\fP binseqentry mode .TP -\fB-e\fP +\fB\-e\fP Entrez mode .TP -\fB-f\fP\ \fIfilename\fP +\fB\-f\fP\ \fIfilename\fP read from \fIfilename\fP .TP -\fB-gc\fP +\fB\-gc\fP genome center mode .TP -\fB-h\fP +\fB\-h\fP turn off automatic help .TP -\fB-oldaln\fP +\fB\-oldaln\fP use old alignment reader .TP -\fB-oldasn\fP +\fB\-oldasn\fP leave as old ASN.1 .TP -\fB-oldgph\fP +\fB\-oldgph\fP use old graphic view .TP -\fB-oldseq\fP +\fB\-oldseq\fP use old sequence view .TP -\fB-oldsource\fP +\fB\-oldsource\fP use old flat-file source format .TP -\fB-s\fP +\fB\-s\fP subtool mode .TP -\fB-w\fP +\fB\-w\fP workbench mode .TP -\fB-x\fP +\fB\-x\fP read from standard input .SH AUTHOR The National Center for Biotechnology Information. diff --git a/doc/man/asn2all.1 b/doc/man/asn2all.1 index 19f76172..a105a5c5 100644 --- a/doc/man/asn2all.1 +++ b/doc/man/asn2all.1 @@ -1,4 +1,4 @@ -.TH ASN2ALL 1 2007-10-19 NCBI "NCBI Tools User's Manual" +.TH ASN2ALL 1 2009-07-31 NCBI "NCBI Tools User's Manual" .SH NAME asn2all \- generate reports from ASN.1 biological data .SH SYNOPSIS @@ -47,16 +47,16 @@ decompression. .PP In \fBasn2all\fP, the name of the file to be processed is specified by the \fB\-i\fP command line argument. -Use \fB-a\ t\fP to indicate that it is a release file and \fB-b\fP to +Use \fB\-a\ t\fP to indicate that it is a release file and \fB\-b\fP to indicate that it is binary ASN.1. A text ASN.1 file obtained from Entrez can be processed by using -\fB-a\ a\fP instead of \fB-a\ t\ -b\fP. +\fB\-a\ a\fP instead of \fB\-a\ t\ \-b\fP. .PP Nucleotide and protein records can be processed simultaneously. -Use the \fB-o\fP argument to indicate the nucleotide output file, and -the \fB-v\fP argument for the protein output file. +Use the \fB\-o\fP argument to indicate the nucleotide output file, and +the \fB\-v\fP argument for the protein output file. .PP -The \fB-f\fP argument determines the format to be generated, and is +The \fB\-f\fP argument determines the format to be generated, and is documented in more detail (along with other options) in the following section. .SH OPTIONS @@ -79,7 +79,9 @@ Input ASN.1 type: .RS .PD 0 .IP a -any (autodetected; default) +Automatic (default) +.IP z +Any .IP e Seq-entry .IP b @@ -110,6 +112,8 @@ Output Format: GenBank/GenPept (default) .IP f FASTA +.IP d +CDS FASTA .IP t Sequin-style 5-column feature table .IP y @@ -169,7 +173,7 @@ File selection suffix when working with entire directories. The command .RS .sp - asn2all -i gbpri1.aso -a t -b -f g -o gbpri1.nuc -v gbpri1.prt + asn2all \-i gbpri1.aso \-a t \-b \-f g \-o gbpri1.nuc \-v gbpri1.prt .sp .RE will generate GenBank and GenPept reports from \fBgbpri1.aso\fP. diff --git a/doc/man/asn2fsa.1 b/doc/man/asn2fsa.1 index f7d5082b..ed4da24f 100644 --- a/doc/man/asn2fsa.1 +++ b/doc/man/asn2fsa.1 @@ -1,4 +1,4 @@ -.TH ASN2FSA 1 2005-05-16 NCBI "NCBI Tools User's Manual" +.TH ASN2FSA 1 2009-07-31 NCBI "NCBI Tools User's Manual" .SH NAME asn2fsa \- convert biological sequence data from ASN.1 to FASTA .SH SYNOPSIS @@ -54,7 +54,9 @@ Input ASN.1 type: .RS .PD 0 .IP a -any (autodetected; default) +Automatic (default) +.IP z +Any .IP e Seq-entry .IP b @@ -126,7 +128,7 @@ Protein output file name File selection substring (\fB.ent\fP by default) [String] .TP \fB\-z\fP -Print quality score gap as -1 +Print quality score gap as \-1 .SH AUTHOR The National Center for Biotechnology Information. .SH SEE ALSO diff --git a/doc/man/asn2gb.1 b/doc/man/asn2gb.1 index a7357724..8cd09d3c 100644 --- a/doc/man/asn2gb.1 +++ b/doc/man/asn2gb.1 @@ -1,10 +1,11 @@ -.TH ASN2GB 1 2007-10-19 NCBI "NCBI Tools User's Manual" +.TH ASN2GB 1 2009-07-31 NCBI "NCBI Tools User's Manual" .SH NAME asn2gb \- convert ASN.1 biological data to a GenBank-style flat format .SH SYNOPSIS .B asn2gb [\|\fB\-\fP\|] [\|\fB\-A\fP\ \fIaccession\fP\|] +[\|\fB\-F\fP\|] [\|\fB\-a\fP\ \fIasn-type\fP\|] [\|\fB\-b\fP\|] [\|\fB\-c\fP\|] @@ -40,10 +41,14 @@ Print usage message \fB\-A\fP\ \fIaccession\fP Accession to fetch .TP +\fB\-F\fP +Fetch remote annotations +.TP \fB\-a\fP\ \fIasn-type\fP ASN.1 Type: .RS .PD 0 +.IP "[Single record]" .IP a Any (autodetected; default) .IP e @@ -54,6 +59,9 @@ Bioseq bioseq-Set .IP m seq-subMit +.IP q +Catenated +.IP "[Release file; components individually processed and freed]" .IP t baTch bioseq-set .IP u @@ -195,10 +203,14 @@ Report .IP 2 Sequin/Release .IP 3 -asn2gb/asn2flat +asn2gb SSEC/nocleanup .IP 4 asn2flat BSEC/nocleanup .IP 5 +asn2gb/asn2flat +.IP 6 +asn2gb NEW dbxref/OLD dbxref +.IP 7 oldasn2gb/newasn2gb .PD .RE diff --git a/doc/man/asn2idx.1 b/doc/man/asn2idx.1 index 87a57839..8b3275aa 100644 --- a/doc/man/asn2idx.1 +++ b/doc/man/asn2idx.1 @@ -1,4 +1,4 @@ -.TH ASN2IDX 1 2007-10-19 NCBI "NCBI Tools User's Manual" +.TH ASN2IDX 1 2008-12-13 NCBI "NCBI Tools User's Manual" .SH NAME asn2idx \- index ASN.1 Bioseq-sets for fast access to individual sequences .SH SYNOPSIS @@ -29,7 +29,7 @@ Bioseq-sets are Binary Required Subdirectory .TP \fB\-f\fP\ \fIfilter\fP -Filter (default = \fBgbcon,gbest,gbgss,gbsts\fP) +Filter (default = \fBgbcon,gbest,gbgss,gbhtg,gbsts\fP) .TP \fB\-p\fP\ \fIpath\fP Path to Files diff --git a/doc/man/asnval.1 b/doc/man/asnval.1 index 29673f03..23980291 100644 --- a/doc/man/asnval.1 +++ b/doc/man/asnval.1 @@ -1,14 +1,16 @@ -.TH ASNVAL 1 2007-10-19 NCBI "NCBI Tools User's Manual" +.TH ASNVAL 1 2009-07-31 NCBI "NCBI Tools User's Manual" .SH NAME asnval \- validate ASN.1 biological sequence records .SH SYNOPSIS .B asnval [\|\fB\-\fP\|] [\|\fB\-A\fP\|] +[\|\fB\-B\fP\|] [\|\fB\-C\fP\ \fIN\fP\|] [\|\fB\-E\fP\ \fIstr\fP\|] [\|\fB\-G\fP\|] [\|\fB\-J\fP\|] +[\|\fB\-K\fP\|] [\|\fB\-L\fP\ \fIfilename\fP\|] [\|\fB\-M\fP\|] [\|\fB\-N\fP\ \fIflags\fP\|] @@ -46,6 +48,9 @@ Print usage message \fB\-A\fP Validate Alignments .TP +\fB\-B\fP +Validate Barcodes +.TP \fB\-C\fP\ \fIN\fP Max count .TP @@ -58,6 +63,9 @@ GI lookup from accession \fB\-J\fP Require ISO-JTA? .TP +\fB\-K\fP +Summary to error file +.TP \fB\-L\fP\ \fIfilename\fP Log File .TP @@ -90,9 +98,9 @@ informational .IP 2 warning .IP 3 -error (default for \fB-Q\fP) +error (default for \fB\-Q\fP) .IP 4 -grounds for rejection (default for \fB-P\fP, \fB-R\fP) +grounds for rejection (default for \fB\-P\fP, \fB\-R\fP) .PD .RE .TP @@ -122,7 +130,9 @@ Input ASN.1 type: .RS .PD 0 .IP a -Any (autodetected; default) +Automatic (default) +.IP z +Any .IP e seq-Entry .IP b @@ -172,7 +182,7 @@ Remote Fetching from ID Recurse .TP \fB\-v\fP\ \fIN\fP -Verbosity, from 0 (default) to 3 +Verbosity, from \fB0\fP to \fB4\fP (\fB1\fP by default) .TP \fB\-x\fP\ \fIstr\fP File selection substring (\fB.ent\fP by default) diff --git a/doc/man/blast.1 b/doc/man/blast.1 index e6205e04..8a007f0e 100644 --- a/doc/man/blast.1 +++ b/doc/man/blast.1 @@ -1,4 +1,4 @@ -.TH BLAST 1 2007-10-19 NCBI "NCBI Tools User's Manual" +.TH BLAST 1 2009-08-02 NCBI "NCBI Tools User's Manual" .SH NAME bl2seq, blast, blastall, blastall_old, blastcl3, blastpgp, impala, megablast, rpsblast, seedtop \- Basic Local Alignment Search Tool .SH SYNOPSIS @@ -32,7 +32,10 @@ bl2seq, blast, blastall, blastall_old, blastcl3, blastpgp, impala, megablast, rp [\|\fB\-r\fP\ \fIN\fP\|] [\|\fB\-t\fP\ \fIN\fP\|] .PP -.B blast +\" Debian renames blast to blast2 to avoid clashing with an unrelated +\" blast executable. +.ds bx blast +.B \*(bx [\|\fB\-\fP\|] [\|\fB\-B\fP\ \fIN\fP\|] [\|\fB\-D\fP\ \fIN\fP\|] @@ -372,6 +375,7 @@ bl2seq, blast, blastall, blastall_old, blastcl3, blastpgp, impala, megablast, rp [\|\fB\-G\fP\ \fIN\fP\|] [\|\fB\-I\fP\|] [\|\fB\-J\fP\|] +[\|\fB\-K\fP\ \fIN\fP\|] [\|\fB\-M\fP\ \fIstr\fP\|] [\|\fB\-O\fP\ \fIfilename\fP\|] [\|\fB\-S\fP\ \fIN\fP\|] @@ -395,7 +399,7 @@ are documented together because they have a lot of common options. the blastn or blastp algorithm. Both sequences must be either nucleotides or proteins. .PP -\fBblast\fP compares a sequence against either a local database or a +\fB\*(bx\fP compares a sequence against either a local database or a second sequence; it incorporates most of the functionality of both \fBbl2seq\fP and \fBblastall\fP, but uses a semi-experimental new internal engine. @@ -404,7 +408,7 @@ internal engine. local database for a sequence. \fBblastall\fP uses a newer engine than \fBblastall_old\fP by default, but supports using the older engine as well (when invoked with the -option \fB-V\ F\fP). +option \fB\-V\ F\fP). .PP \fBblastcl3\fP accesses the newest NCBI BLAST search engine (version 2.0). The software behind BLAST version 2.0 was written from scratch @@ -428,7 +432,7 @@ compare two large sets of sequences against each other. .PP \fBrpsblast\fP (Reverse PSI-BLAST) searches a query sequence against a database of profiles. This is the opposite of PSI-BLAST that searches -a profile against a database of sequences, hence the 'Reverse'. +a profile against a database of sequences, hence the `Reverse'. \fBrpsblast\fP uses a BLAST-like algorithm, finding single- or double-word hits and then performing an ungapped extension on these candidate matches. If a sufficiently high-scoring ungapped alignment @@ -485,7 +489,7 @@ Input sequences in the form of accession.version Multiple Hits window size; generally defaults to 0 (for single-hit extensions), but defaults to 40 when using discontiguous templates. .TP -\fB\-B\fP\ \fIN\fP (blast) +\fB\-B\fP\ \fIN\fP (\*(bx) Produce on-the-fly output: .RS .PD 0 @@ -508,15 +512,16 @@ Number of concatenated queries, in blastn or tblastn mode \fB\-B\fP\ \fIfilename\fP (blastpgp) Input Alignment File for PSI-BLAST Restart .TP -\fB\-C\fP\ \fIX\fP (blast, blastall, blastall_old, blastcl3) +\fB\-C\fP\ \fIX\fP (\*(bx, blastall, blastall_old, blastcl3) Use composition-based statistics for blastp or tblastn: .RS .PD 0 -.IP "D or d" -Default (equivalent to \fBT\fP) +.IP "T, t, D, or d" +Default (equivalent to \fB1\fP for \fB\*(bx\fP and \fBblastall_old\fP +and to \fB2\fP for \fBblastall\fP and \fBblastcl3\fP) .IP "0, F, or f" No composition-based statistics -.IP "1, T, or t" +.IP 1 Composition-based statistics as in \fINAR\fP 29:2994-3005, 2001 .IP 2 Composition-based score adjustment as in \fIBioinformatics\fP 21:902-911, @@ -528,7 +533,7 @@ Composition-based score adjustment as in \fIBioinformatics\fP 21:902-911, .RE .RS When enabling statistics in blastall, blastall_old, or blastcl3 (\fIi.e.\fP, -not blast), appending \fBu\fP (case-insensitive) to the mode enables +not \*(bx), appending \fBu\fP (case-insensitive) to the mode enables use of unified p-values combining alignment and compositional p-values in round 1 only. .RE @@ -550,7 +555,7 @@ tabular .PD .RE .TP -\fB\-D\fP\ \fIN\fP (blast, blastall, blastall_old, blastcl3) +\fB\-D\fP\ \fIN\fP (\*(bx, blastall, blastall_old, blastcl3) Translate sequences in the database according to genetic code \fIN\fP in /usr/share/ncbi/data/gc.prt (default is 1; only applies to tblast*) .TP @@ -577,39 +582,39 @@ incremental binary ASN.1 Cost decline to align (default = 99999) .TP \fB\-E\fP\ \fIN\fP (bl2seq, blastcl3, megablast) -Extending a gap costs \fIN\fP (-1 invokes default behavior) +Extending a gap costs \fIN\fP (\-1 invokes default behavior) .TP -\fB\-E\fP\ \fIN\fP (blast, blastall, blastall_old) -Extending a gap costs \fIN\fP (-1 invokes default behavior: +\fB\-E\fP\ \fIN\fP (\*(bx, blastall, blastall_old) +Extending a gap costs \fIN\fP (\-1 invokes default behavior: non-affine if greedy, 2 otherwise) .TP \fB\-E\fP\ \fIN\fP (blastpgp, impala, seedtop) Extending a gap costs \fIN\fP (default is 1) .TP -\fB\-F\fP\ \fIstr\fP (bl2seq, blast, blastall, blastall_old, blastpgp, +\fB\-F\fP\ \fIstr\fP (bl2seq, \*(bx, blastall, blastall_old, blastpgp, blastcl3, impala, megablast, rpsblast) Filter options for DUST or SEG; defaults to \fBT\fP for bl2seq, -blast, blastall, blastall_old, blastcl3, and megablast, and to +\*(bx, blastall, blastall_old, blastcl3, and megablast, and to \fBF\fP for blastpgp, impala, and rpsblast. .TP \fB\-F\fP (seedtop) Filter sequence with SEG. .TP \fB\-G\fP\ \fIN\fP (bl2seq, blastcl3, megablast) -Opening a gap costs \fIN\fP (-1 invokes default behavior) +Opening a gap costs \fIN\fP (\-1 invokes default behavior) .TP -\fB\-G\fP\ \fIN\fP (blast, blastall, blastall_old) -Opening a gap costs \fIN\fP (-1 invokes default behavior: non-affine +\fB\-G\fP\ \fIN\fP (\*(bx, blastall, blastall_old) +Opening a gap costs \fIN\fP (\-1 invokes default behavior: non-affine if greedy, 5 if using dynamic programming) .TP \fB\-G\fP\ \fIN\fP (blastpgp, impala, seedtop) Opening a gap costs \fIN\fP (default is 11) .TP -\fB\-H\fP (blast) +\fB\-H\fP (\*(bx) Produce HTML output .TP \fB\-H\fP\ \fIN\fP (blastpgp) -End of required region in query (-1 indicates end of query) +End of required region in query (\-1 indicates end of query) .TP \fB\-H\fP (impala) Print help (different from usage message) @@ -617,41 +622,46 @@ Print help (different from usage message) \fB\-H\fP\ \fIN\fP (megablast) Maximal number of HSPs to save per database sequence (default is 0, unlimited) .TP -\fB\-I\fP\ \(dq\fIstart\ stop\fP\(dq (bl2seq, blast) +\fB\-I\fP\ \(dq\fIstart\ stop\fP\(dq (bl2seq, \*(bx) Location on first (query) sequence (applies only if file specified -with \fB-i\fP contains a single sequence) +with \fB\-i\fP contains a single sequence) .TP \fB\-I\fP (blastall, blastall_old, blastcl3, blastpgp, impala, megablast, rpsblast, seedtop) Show GIs in deflines .TP -\fB\-J\fP\ \(dq\fIstart\ stop\fP\(dq (bl2seq, blast) +\fB\-J\fP\ \(dq\fIstart\ stop\fP\(dq (bl2seq, \*(bx) Location on second (subject) sequence (applies only if file specified -with \fB-j\fP contains a single sequence) +with \fB\-j\fP contains a single sequence) .TP \fB\-J\fP (blastall, blastall_old, blastcl3, blastpgp, impala, megablast, rpsblast, seedtop) Believe the query defline .TP -\fB\-K\fP\ \fIN\fP (blast, blastall, blastall_old, blastcl3, blastpgp) -Number of best hits from a region to keep (off by default, if used a -value of 100 is recommended) +\fB\-K\fP\ \fIN\fP (\*(bx, blastall, blastall_old, blastcl3, blastpgp) +Number of best hits from a region to keep. +Off by default. +If used a value of 100 is recommended. +Very high values of \fB\-v\fP or \fB\-b\fP are also suggested. +.TP +\fB\-K\fP\ \fIN\fP (seedtop) +Internal hit buffer size multiplier (wrt query length; default = 2) .TP -\fB\-L\fP (blast) +\fB\-L\fP (\*(bx) Use (classical Mega BLAST) lookup table with width 12 .TP \fB\-L\fP\ \fIstart,stop\fP (blastall, blastall_old, blastcl3, megablast, rpsblast) Location on query sequence (for rpsblast, only valid in blastp mode) .TP -\fB\-M\fP\ \fIstr\fP (bl2seq, blast, blastall, blastall_old, blastcl3, +\fB\-M\fP\ \fIstr\fP (bl2seq, \*(bx, blastall, blastall_old, blastcl3, blastpgp, impala, seedtop) Use matrix \fIstr\fP (default = BLOSUM62) .TP \fB\-M\fP\ \fIN\fP (megablast) Maximal total length of queries for a single search (default = 5000000) .TP -\fB\-N\fP (blast) +\fB\-N\fP (\*(bx) Show only accessions for sequence IDs in tabular output .TP \fB\-N\fP\ \fIX\fP (blastpgp, rpsblast) @@ -676,7 +686,7 @@ Write (ASN.1) sequence alignments to \fIfilename\fP; only valid for blastpgp, impala, rpsblast, and seedtop with \fB\-J\fP, and only valid for megablast with \fB\-D2\fP. .TP -\fB\-P\fP\ \fIX\fP (blast) +\fB\-P\fP\ \fIX\fP (\*(bx) Identity percentage cut-off .TP \fB\-P\fP\ \fIN\fP (blastall, blastall_old, blastcl3, blastpgp, rpsblast) @@ -689,7 +699,7 @@ Read matrix profiles from database \fIfilename\fP \fB\-P\fP\ \fIN\fP (megablast) Maximal number of positions for a hash value (set to 0 [default] to ignore) .TP -\fB\-Q\fP\ \fIN\fP (blast, blastall, blastall_old, blastcl3) +\fB\-Q\fP\ \fIN\fP (\*(bx, blastall, blastall_old, blastcl3) Translate query according to genetic code \fIN\fP in /usr/share/ncbi/data/gc.prt (default is 1) .TP @@ -697,9 +707,9 @@ Translate query according to genetic code \fIN\fP in Output File for PSI-BLAST Matrix in ASCII .TP \fB\-Q\fP\ \fIfilename\fP (megablast) -Masked query output; requires \fB-D\ 2\fP +Masked query output; requires \fB\-D\ 2\fP .TP -\fB\-R\fP (blast) +\fB\-R\fP (\*(bx) Compute locally optimal Smith-Waterman alignments. (This option is only available for gapped tblastn.) .TP @@ -715,7 +725,7 @@ Input File for PSI-BLAST Restart \fB\-R\fP (megablast) Report the log information at the end of output .TP -\fB\-S\fP\ \fIN\fP (bl2seq, blast, blastall, blastall_old, blastcl3, +\fB\-S\fP\ \fIN\fP (bl2seq, \*(bx, blastall, blastall_old, blastcl3, megablast) Query strands to search against database for blastn, blastx, tblastx: .RS @@ -739,7 +749,7 @@ Cutoff cost (default = 30) rpsblast) Produce HTML output .TP -\fB\-T\fP\ \fIN\fP (blast) +\fB\-T\fP\ \fIN\fP (\*(bx) Type of a discontiguous word template: .RS .PD 0 @@ -759,10 +769,10 @@ Use lower case filtering for the query sequence \fB\-V\fP (bl2seq, blastall, megablast) Force use of legacy engine .TP -\fB\-V\fP (blast) +\fB\-V\fP (\*(bx) Use variable word size approach to database scanning .TP -\fB\-W\fP\ \fIN\fP (bl2seq, blast, blastall, blastall_old, blastcl3, +\fB\-W\fP\ \fIN\fP (bl2seq, \*(bx, blastall, blastall_old, blastcl3, blastpgp, megablast, rpsblast) Use words of size \fIN\fP (length of best perfect match; zero invokes default behavior, except with megablast, which defaults to 28, and @@ -770,7 +780,7 @@ blastpgp, which defaults to 3. The default values for the other commands vary with "program": 11 for blastn, 28 for megablast, and 3 for everything else.) .TP -\fB\-X\fP\ \fIN\fP (bl2seq, blast, blastall, blastall_old, blastcl3, +\fB\-X\fP\ \fIN\fP (bl2seq, \*(bx, blastall, blastall_old, blastcl3, blastpgp, megablast, rpsblast, seedtop) X dropoff value for gapped alignment (in bits) (zero invokes default behavior, except with megablast, which defaults to 20, and rpsblast @@ -778,39 +788,43 @@ and seedtop, which default to 15. The default values for the other commands vary with "program": 30 for blastn, 20 for megablast, 0 for tblastx, and 15 for everything else.) .TP -\fB\-Y\fP\ \fIX\fP (bl2seq, blast, blastall, blastall_old, blastcl3, +\fB\-Y\fP\ \fIX\fP (bl2seq, \*(bx, blastall, blastall_old, blastcl3, blastpgp, megablast, rpsblast) Effective length of the search space (use zero for the real size) .TP -\fB\-Z\fP\ \fIN\fP (blast, blastall, blastall_old, blastcl3, blastpgp, +\fB\-Z\fP\ \fIN\fP (\*(bx, blastall, blastall_old, blastcl3, blastpgp, megablast, rpsblast) X dropoff value for final [dynamic programming?] gapped alignment in -bits (default is 50 for blastn and megablast, 0 for tblastx, 25 for +bits (default is 100 for blastn and megablast, 0 for tblastx, 25 for others) .TP \fB\-a\fP\ \fIfilename\fP (bl2seq) Write text ASN.1 output to \fIfilename\fP .TP -\fB\-a\fP\ \fIN\fP (blast, blastall, blastall_old, blastcl3, blastpgp, +\fB\-a\fP\ \fIN\fP (\*(bx, blastall, blastall_old, blastcl3, blastpgp, impala, megablast, rpsblast) Number of threads to use (default is one) .TP -\fB\-b\fP\ \fIN\fP (blast, blastall, blastall_old, blastcl3, blastpgp, +\fB\-b\fP\ \fIN\fP (\*(bx, blastall, blastall_old, blastcl3, blastpgp, impala, megablast, rpsblast) Number of database sequences to show alignments for (B) (default is 250) .TP -\fB\-c\fP (blast) +\fB\-c\fP (\*(bx) Mask lower case .TP -\fB\-c\fP\ \fIN\fP (blastpgp, impala) -Constant in pseudocounts for multipass version (default is 9) +\fB\-c\fP\ \fIN\fP (impala) +Constant in pseudocounts for multipass version; 0 (default) uses +entropy method; otherwise a value near 30 is recommended +.TP +\fB\-c\fP\ \fIN\fP (impala) +Constant in pseudocounts for multipass version (default is 10) .TP \fB\-d\fP\ \fIN\fP (bl2seq) Use theoretical DB size of \fIN\fP (zero stands for the real size) .TP -\fB\-d\fP\ \fIstr\fP (blast, blastall, blastall_old, blastcl3, blastpgp, +\fB\-d\fP\ \fIstr\fP (\*(bx, blastall, blastall_old, blastcl3, blastpgp, impala, megablast, seedtop) -Database to use (default is nr for all executables except blast, +Database to use (default is nr for all executables except \*(bx, which requires a second FASTA sequence if this is not set) .TP \fB\-d\fP\ \fIfilename\fP (rpsblast) @@ -819,7 +833,7 @@ RPS BLAST Database \fB\-e\fP\ \fIX\fP Expectation value (E) (default = 10.0) .TP -\fB\-f\fP\ \fIX\fP (blast, blastall, blastall_old, blastcl3) +\fB\-f\fP\ \fIX\fP (\*(bx, blastall, blastall_old, blastcl3) Threshold for extending hits, default if zero: 0 for blastn and megablast, 11 for blastp, 12 for blastx, and 13 for tblasn and tblastx. @@ -828,7 +842,7 @@ tblastx. Threshold for extending hits (default 11) .TP \fB\-f\fP (megablast) -Show full IDs in the output (default - only GIs or accessions) +Show full IDs in the output (default: only GIs or accessions) .TP \fB\-f\fP (seedtop) Force searching for patterns even if they are too likely @@ -836,14 +850,14 @@ Force searching for patterns even if they are too likely \fB\-g\ F\fP (bl2seq, blastall, blastall_old, blastcl3) Do not perform gapped alignment (N/A for tblastx) .TP -\fB\-g\fP (blast) +\fB\-g\fP (\*(bx) Use greedy algorithm for gapped extensions .TP \fB\-g\ F\fP (megablast) Make discontiguous megablast generate words for every base of the database (mandatory with the current BLAST engine) .TP -\fB\-h\fP\ \fIN\fP (blast) +\fB\-h\fP\ \fIN\fP (\*(bx) Frame shift penalty for out-of-frame gapping (blastx, tblastn only; default is zero) .TP @@ -855,13 +869,13 @@ for blastpgp, 0.005 for impala) Read (first, query) sequence or set from \fIfilename\fP (default is stdin; not needed for blastpgp if restarting from scoremat) .TP -\fB\-j\fP\ \fIfilename\fP (bl2seq, blast) +\fB\-j\fP\ \fIfilename\fP (bl2seq, \*(bx) Read second (subject) sequence or set from \fIfilename\fP .TP \fB\-j\fP\ \fIN\fP (blastpgp) Maximum number of passes to use in multipass version (default = 1) .TP -\fB\-k\fP\ \fIstr\fP (blast) +\fB\-k\fP\ \fIstr\fP (\*(bx) Pattern for PHI-BLAST .TP \fB\-k\fP\ \fIfilename\fP (blastpgp, seedtop) @@ -876,7 +890,7 @@ Log messages to \fIfilename\fP rather than standard error. \fB\-m\fP (bl2seq) Use Mega Blast for search .TP -\fB\-m\fP\ \fIN\fP (blast, blastall, blastall_old, blastcl3, blastpgp, +\fB\-m\fP\ \fIN\fP (\*(bx, blastall, blastall_old, blastcl3, blastpgp, impala, megablast, rpsblast) alignment view options: .RS @@ -908,7 +922,7 @@ ASN.1 binary (not available for impala or rpsblast) .PD .RE .TP -\fB\-n\fP (blast) +\fB\-n\fP (\*(bx) Show GIs in sequence IDs .TP \fB\-n\fP (blastall, blastall_old, blastcl3) @@ -920,7 +934,7 @@ Use non-greedy (dynamic programming) extension for affine gap scores \fB\-o\fP\ \fIfilename\fP Write final alignment report to \fIfilename\fP rather than stdout .TP -\fB\-p\fP\ \fIstr\fP (bl2seq, blast, blastall, blastall_old, blastcl3) +\fB\-p\fP\ \fIstr\fP (bl2seq, \*(bx, blastall, blastall_old, blastcl3) Use the "program" (comparison type) \fIstr\fP. The \fBDESCRIPTION\fP section covers this option in more detail. .TP @@ -944,10 +958,10 @@ indicates which sequences contain a pattern .PD .RE .TP -\fB\-q\fP\ \fIN\fP (bl2seq, blast, blastall, blastall_old, blastcl3, +\fB\-q\fP\ \fIN\fP (bl2seq, \*(bx, blastall, blastall_old, blastcl3, megablast, seedtop) -Penalty for a nucleotide mismatch (blastn only) (default = -10 for -seedtop, -3 for everything else) +Penalty for a nucleotide mismatch (blastn only) (default = \-10 for +seedtop, \-3 for everything else) .TP \fB\-q\fP\ \fIN\fP (blastpgp) ASN.1 Scoremat input of checkpoint data: @@ -962,12 +976,12 @@ restart from binary scoremat checkpoint file .PD .RE .TP -\fB\-r\fP\ \fIN\fP (bl2seq, blast, blastall, blastall_old, blastcl3, +\fB\-r\fP\ \fIN\fP (bl2seq, \*(bx, blastall, blastall_old, blastcl3, megablast, seedtop) Reward for a nucleotide match (blastn only) (default = 10 for seedtop, --10 for everything else) +\-10 for everything else) .TP -\fB\-s\fP (blast) +\fB\-s\fP (\*(bx) No-op (formerly requested generating words for every base of the database) .TP \fB\-s\fP (blastall, blastall_old, blastcl3, blastpgp) @@ -978,24 +992,24 @@ tblastn mode. \fB\-s\fP\ \fIN\fP (megablast) Minimal hit score to report (0 for default behavior) .TP -\fB\-t\fP\ \fIN\fP (bl2seq, blast, blastall, blastall_old, blastcl3) +\fB\-t\fP\ \fIN\fP (bl2seq, \*(bx, blastall, blastall_old, blastcl3) Length of a discontiguous word template (the largest intron allowed in a translated nucleotide sequence when linking multiple distinct assignments; default = 0; negative values disable linking for blastall, blastall_old, and blastcl3.) .TP \fB\-t\fP\ \fIN\fP[\|\fBu\fP\|] (blastpgp) -Composition-based statistics mode. The first character is interpreted -as follows: +Composition-based score adjustment. +The first character is interpreted as follows: .RS .PD 0 .IP "0, F, or f" no composition-based statistics -.IP "1, T, or t" +.IP 1 composition-based statistics as in \fINAR\fP 29:2994\-3005, 2001 -.IP 2 +.IP "2, T, or t" composition-based score adjustment as in \fIBioinformatics\fP -21:902-911, 2005, conditioned on sequence properties in round 1 +21:902-911, 2005, conditioned on sequence properties in round 1 (default) .IP 3 composition-based score adjustment as in \fIBioinformatics\fP 21:902-911, 2005, unconditionally in round 1 @@ -1009,7 +1023,7 @@ alignment p-value and compositional p-value in round 1 only. \fB\-t\fP\ \fIN\fP (megablast) Length of a discontiguous word template (contiguous word if 0 [default]) .TP -\fB\-u\fP (blast) +\fB\-u\fP (\*(bx) Do only ungapped alignment (always TRUE for tblastx) .TP \fB\-u\fP\ \fIstr\fP (blastcl3) @@ -1022,24 +1036,24 @@ ASN.1 Scoremat output of checkpoint data: .IP 0 no scoremat output (default) .IP 1 -output ASCII scoremat checkpoint file (requires \fB-J\fP) +output ASCII scoremat checkpoint file (requires \fB\-J\fP) .IP 2 -output binary scoremat checkpoint file (requires \fB-J\fP) +output binary scoremat checkpoint file (requires \fB\-J\fP) .PD .RE .TP -\fB\-v\fP\ \fIN\fP (blast, blastall, blastall_old, blastcl3, blastpgp, +\fB\-v\fP\ \fIN\fP (\*(bx, blastall, blastall_old, blastcl3, blastpgp, impala, megablast, rpsblast) Number of one-line descriptions to show (V) (default = 500) .TP -\fB\-w\fP\ \fIN\fP (blast) +\fB\-w\fP\ \fIN\fP (\*(bx) Window size (max. allowed distance between a pair of initial hits; 0 -invokes default behavior, -1 turns off multiple hits) +invokes default behavior, \-1 turns off multiple hits) .TP \fB\-w\fP\ \fIN\fP (blastall, blastall_old, blastcl3) Frame shift penalty (OOF algorithm for blastx) .TP -\fB\-y\fP\ \fIX\fP (blast, blastall, blastall_old, blastcl3, blastpgp, +\fB\-y\fP\ \fIX\fP (\*(bx, blastall, blastall_old, blastcl3, blastpgp, impala, rpsblast) X dropoff for ungapped extensions in bits (0.0 invokes default behavior: 20 for blastn, 10 for megablast, and 7 for all others.) @@ -1047,7 +1061,7 @@ behavior: 20 for blastn, 10 for megablast, and 7 for all others.) \fB\-y\fP\ \fIN\fP (megablast) X dropoff value for ungapped extension (default is 10) .TP -\fB\-z\fP\ \fIN\fP (blast) +\fB\-z\fP\ \fIN\fP (\*(bx) Longest intron length for uneven gap HSP linking (tblastn only; default is 0) .TP @@ -1066,6 +1080,7 @@ The National Center for Biotechnology Information. .BR formatdb (1), .BR formatrpsdb (1), .BR makemat (1), +.BR taxblast (1), blast.html, seedtop.html, <http://www.ncbi.nlm.nih.gov/BLAST/>. diff --git a/doc/man/cleanasn.1 b/doc/man/cleanasn.1 index b0ccaae9..403e34e5 100644 --- a/doc/man/cleanasn.1 +++ b/doc/man/cleanasn.1 @@ -1,22 +1,30 @@ -.TH CLEANASN 1 2007-10-19 NCBI "NCBI Tools User's Manual" +.TH CLEANASN 1 2009-07-31 NCBI "NCBI Tools User's Manual" .SH NAME cleanasn \- clean up irregularities in NCBI ASN.1 objects .SH SYNOPSIS .B cleanasn [\|\fB\-\fP\|] +[\|\fB\-D\fP\ \fIstr\fP\|] [\|\fB\-F\fP\ \fIstr\fP\|] [\|\fB\-K\fP\ \fIstr\fP\|] +[\|\fB\-L\fP\ \fIfilename\fP\|] +[\|\fB\-M\fP\ \fIfilename\fP\|] [\|\fB\-N\fP\ \fIstr\fP\|] +[\|\fB\-P\fP\|] +[\|\fB\-Q\fP\ \fIstr\fP\|] [\|\fB\-R\fP\|] [\|\fB\-T\fP\|] +[\|\fB\-U\fP\ \fIstr\fP\|] +[\|\fB\-X\fP\ \fIstr\fP\|] [\|\fB\-a\fP\ \fIstr\fP\|] [\|\fB\-b\fP\|] [\|\fB\-c\fP\|] [\|\fB\-f\fP\ \fIstr\fP\|] [\|\fB\-i\fP\ \fIfilename\fP\|] -[\|\fB\-l\fP\ \fIfilename\fP\|] +[\|\fB\-m\fP\ \fIstr\fP\|] [\|\fB\-o\fP\ \fIfilename\fP\|] [\|\fB\-p\fP\ \fIpath\fP\|] +[\|\fB\-q\fP\ \fIpath\fP\|] [\|\fB\-r\fP\ \fIpath\fP\|] [\|\fB\-x\fP\ \fIext\fP\|] .SH DESCRIPTION @@ -28,6 +36,15 @@ A summary of options is included below. \fB\-\fP Print usage message .TP +\fB\-D\fP\ \fIstr\fP +Clean up descriptors, per the flags in str: +.RS +.PD 0 +.IP t +Remove Title +.PD +.RE +.TP \fB\-F\fP\ \fIstr\fP Clean up features, per the flags in str: .RS @@ -47,11 +64,25 @@ Perform a general cleanup, per the flags in str: .PD 0 .IP b BasicSeqEntryCleanup +.IP p +C++ BasicCleanup (via an external utility) .IP s SeriousSeqEntryCleanup +.IP g +GpipeSeqEntryCleanup +.IP n +Normalize Descriptor Order +.IP u +Remove NcbiCleanup User Objects .PD .RE .TP +\fB\-L\fP\ \fIfilename\fP +Log file +.TP +\fB\-M\fP\ \fIfilename\fP +Macro file +.TP \fB\-N\fP\ \fIstr\fP Clean up links, per the flags in str: .RS @@ -67,12 +98,50 @@ ClearFeatureIDs .PD .RE .TP +\fB\-P\fP +Publication Lookup +.TP +\fB\-Q\fP\ \fIstr\fP +Report: +.RS +.PD 0 +.IP r +ASN.1 BSEC/SSEC Report +.IP g +GenBank SSEC Diff +.IP m +Modernization +.PD +.RE +.TP \fB\-R\fP Remote fetching from ID (NCBI sequence databases) .TP \fB\-T\fP Taxonomy Lookup .TP +\fB\-U\fP\ \fIstr\fP +Modernize, per the flags in str: +.RS +.PD 0 +.IP g +Genes +.IP r +RNA +.IP p +PCR Primers +.PD +.RE +.TP +\fB\-X\fP\ \fIstr\fP +Miscellaneous options, per str: +.RS +.PD 0 +.IP d +Automatic definition line +.PD +.RE +.TP \fB\-a\fP\ \fIstr\fP ASN.1 type .RS @@ -104,8 +173,20 @@ Substring filter \fB\-i\fP\ \fIfilename\fP Single input file (defaults to stdin) .TP -\fB\-l\fP\ \fIfilename\fP -Log file +\fB\-m\fP\ \fIstr\fP +Flatfile mode: +.RS +.PD 0 +.IP r +Release +.IP e +Entrez +.IP s +Sequin +.IP d +Dump +.PD +.RE .TP \fB\-o\fP\ \fIfilename\fP Single output file (defaults to stdout) @@ -113,6 +194,9 @@ Single output file (defaults to stdout) \fB\-p\fP\ \fIpath\fP Process all matching files in \fIpath\fP .TP +\fB\-q\fP\ \fIpath\fP +Ffdiff executable (default is /netopt/genbank/subtool/bin/ffdiff) +.TP \fB\-r\fP\ \fIpath\fP Path for results .TP diff --git a/doc/man/fa2htgs.1 b/doc/man/fa2htgs.1 index b604fa02..1186452f 100644 --- a/doc/man/fa2htgs.1 +++ b/doc/man/fa2htgs.1 @@ -101,7 +101,7 @@ For example: Contig2 + 1 SP6 left Contig3 + 1 - Contig1 - T7 right + Contig1 \- T7 right .fi The first column is the contig name, the second is the orientation, @@ -180,12 +180,12 @@ Length of sequence in bp (default = 0). The length is checked against the actual number of bases we get. For phase 1 and 2 sequence it is also used to estimate gap lengths. For phase 1 and 2 records, it is important to use a number GREATER than the amount of provided -nucleotide, otherwise this will generate false 'gaps'. Here is +nucleotide, otherwise this will generate false `gaps'. Here is assumed that the putative full length of the BAC or cosmid will be -used. There should be at least 20 to 30 'n' in between the segments +used. There should be at least 20 to 30 `n' in between the segments (you can check for these in Sequin), as this will ensure proper behavior when this sequence is used with BLAST. Otherwise -'artifactual' unrelated segment neighbors may be brought into +`artifactual' unrelated segment neighbors may be brought into proximity of each other. .TP \fB\-m\fP diff --git a/doc/man/fastacmd.1 b/doc/man/fastacmd.1 index bad55531..afa5ab71 100644 --- a/doc/man/fastacmd.1 +++ b/doc/man/fastacmd.1 @@ -21,11 +21,11 @@ fastacmd \- retrieve FASTA sequences from a BLAST database [\|\fB\-t\fP\|] .SH DESCRIPTION \fBfastacmd\fP retrieves FASTA formatted sequences from a -\fBblast\fP(1) database formatted using the '\fB\-o\fP' option. An +\fBblast\fP(1) database formatted using the `\fB\-o\fP' option. An example \fBfastacmd\fP call would be .PP .ce -fastacmd -d nr -s p38398 +fastacmd \-d nr \-s p38398 .SH OPTIONS A summary of options is included below. .TP @@ -92,7 +92,7 @@ Type of file: .RS .PD 0 .IP G -guess (default) - look for protein, then nucleotide +guess (default): look for protein, then nucleotide .IP T protein .IP F @@ -103,7 +103,7 @@ nucleotide \fB\-s\fP\ \fIstr\fP Comma-delimited search string(s). GIs, accessions, loci, or fullSeq-id strings may be used, -\fIe.g.\fP, \fB555\fP, \fBAC147927\fP, \fB'gnl|dbname|tag'\fP +\fIe.g.\fP, \fB555\fP, \fBAC147927\fP, \fB\(aqgnl|dbname|tag\(aq\fP .TP \fB\-t\fP Definition line should contain target GI only diff --git a/doc/man/formatdb.1 b/doc/man/formatdb.1 index 23e4e233..d56460ed 100644 --- a/doc/man/formatdb.1 +++ b/doc/man/formatdb.1 @@ -98,10 +98,10 @@ GSS's, and HTGS's. Title for database file [String] .TP \fB\-v\fP\ \fIN\fP -Break up large FASTA files into 'volumes' of size \fIN\fP million +Break up large FASTA files into `volumes' of size \fIN\fP million letters (4000 by default). As part of the creation of a volume, \fBformatdb\fP writes a new type of BLAST database file, called an -alias file, with the extension 'nal' or 'pal'. +alias file, with the extension `nal' or `pal'. .SH AUTHOR The National Center for Biotechnology Information. .SH SEE ALSO diff --git a/doc/man/formatrpsdb.1 b/doc/man/formatrpsdb.1 index 25846419..73efcac0 100644 --- a/doc/man/formatrpsdb.1 +++ b/doc/man/formatrpsdb.1 @@ -22,7 +22,7 @@ sequences into a database suitable for use with Reverse Position Specific (RPS) Blast. Each input sequence, together with its position-specific scoring matrix (PSSM), is ASN.1 encoded into a PssmWithParameters (or -'scoremat') object and resides in a separate file. +`scoremat') object and resides in a separate file. Scoremat objects can be created using \fBblastpgp\fP. \fBFormatrpsdb\fP is given a list of these files and produces the corresponding database. diff --git a/doc/man/gene2xml.1 b/doc/man/gene2xml.1 index 9ec74b00..0bbd9283 100644 --- a/doc/man/gene2xml.1 +++ b/doc/man/gene2xml.1 @@ -37,31 +37,31 @@ File is Binary File is Compressed .TP \fB\-i\fP\ \fIfilename\fP -Single Input file (standard input by default) when not using \fB-p\fP +Single Input file (standard input by default) when not using \fB\-p\fP .TP \fB\-l\fP -Log processing (list files processed when using \fB-p\fP) +Log processing (list files processed when using \fB\-p\fP) .TP \fB\-o\fP\ \fIfilename\fP -Single Output file (standard output by default) when not using \fB-p\fP +Single Output file (standard output by default) when not using \fB\-p\fP .TP \fB\-p\fP\ \fIpath\fP Path to Files (if processing an entire directory) .TP \fB\-r\fP\ \fIpath\fP -Path for Results when using \fB-p\fP; defaults to the input directory +Path for Results when using \fB\-p\fP; defaults to the input directory .TP \fB\-t\fP\ \fIN\fP Limit to the given Taxon ID (per \fBhttp://www.ncbi.nlm.nih.gov/Taxonomy/\fP) .TP \fB\-x\fP -Extract .ags -> text .agc (format previously distributed) +Extract .ags to text .agc (format previously distributed) .TP \fB\-y\fP -Combine .agc -> text .ags (for testing) +Combine .agc to text .ags (for testing) .TP \fB\-z\fP -Combine .agc -> binary .ags, then gzip +Combine .agc to binary .ags, then gzip .SH AUTHOR The National Center for Biotechnology Information. .SH SEE ALSO diff --git a/doc/man/idfetch.1 b/doc/man/idfetch.1 index 4d378114..0ce4651c 100644 --- a/doc/man/idfetch.1 +++ b/doc/man/idfetch.1 @@ -33,7 +33,7 @@ Add the specified feature types (comma-delimited); allowed values are CDD, SNP, SNP_graph, MGC, HPRD, STS, tRNA, and microRNA. .TP \fB\-G\fP\ \fIfilename\fP -File with list of GIs, (versioned) accessions, FASTA SeqID's to dump +File with list of GIs, (versioned) accessions, FASTA SeqIDs to dump .TP \fB\-Q\fP\ \fIfilename\fP Generate GI list by Entrez query in \fIfilename\fP; requires \fB\-dn\fP @@ -67,7 +67,7 @@ Entity number (retrieval number) to dump Flattened SeqId. Possible formats: .br \fItype\fP([\fIname\fP][,[\fIaccession\fP][,[\fIrelease\fP][,\fIversion\fP]]]) -as '5(HUMHBB)' +as \(aq5(HUMHBB)\(aq .br \fItype\fP=\fIaccession\fP .br diff --git a/doc/man/nps2gps.1 b/doc/man/nps2gps.1 index eaf43926..f3b9a704 100644 --- a/doc/man/nps2gps.1 +++ b/doc/man/nps2gps.1 @@ -1,11 +1,13 @@ -.TH NPS2GPS 1 2007-10-19 NCBI "NCBI Tools User's Manual" +.TH NPS2GPS 1 2008-12-13 NCBI "NCBI Tools User's Manual" .SH NAME nps2gps \- convert nucleotide-protein sets to ASN.1 genomic product sets .SH SYNOPSIS .B nps2gps [\|\fB\-\fP\|] +[\|\fB\-D\fP\|] [\|\fB\-F\fP\|] [\|\fB\-L\fP\|] +[\|\fB\-P\fP\|] [\|\fB\-R\fP\|] [\|\fB\-f\fP\ \fIstr\fP\|] [\|\fB\-i\fP\ \fIfilename\fP\|] @@ -23,12 +25,18 @@ A summary of options is included below. \fB\-\fP Print usage message .TP +\fB\-D\fP +RefSeq mRNA Titles +.TP \fB\-F\fP Map by Feature ID .TP \fB\-L\fP Lock components in advance .TP +\fB\-P\fP +mRNA ID from Protein +.TP \fB\-R\fP Enable Remote fetching from ID .TP diff --git a/doc/man/spidey.1 b/doc/man/spidey.1 index f3e2836f..4dd3014b 100644 --- a/doc/man/spidey.1 +++ b/doc/man/spidey.1 @@ -170,7 +170,7 @@ terminal introns from 100kb to 240kb and for all others from 35kb to .TP \fB\-a\fP\ \fIfilename\fP Output file for alignments when directed to a separate file with -\fB-p\ 3\fP (default = spidey.aln). +\fB\-p\ 3\fP (default = spidey.aln). .TP \fB\-c\fP\ \fIN\fP Identity cutoff, in percent, for quality control purposes. @@ -198,14 +198,14 @@ can substitute the desired accession number for the filename. Print ASN.1 alignment? .TP \fB\-k\fP\ \fIfilename\fP -File for ASN.1 output with \fB-k\fP (default = spidey.asn). +File for ASN.1 output with \fB\-k\fP (default = spidey.asn). .TP \fB\-l\fP\ \fIN\fP Length coverage cutoff, in percent. .TP \fB\-m\fP\ \fIfilename\fP Input file containing the mRNA sequence(s) in ASN.1 or FASTA format, -or a list of their accessions (with \fB-G\fP). If your computer is +or a list of their accessions (with \fB\-G\fP). If your computer is running on a network that can access GenBank, you can substitute a single accession number for the filename. .TP @@ -213,7 +213,7 @@ single accession number for the filename. Number of gene models to return per input mRNA (default = 1). .TP \fB\-o\fP\ \fIstr\fP -Main output file (default = stdout; contents controlled by \fB-p\fP). +Main output file (default = stdout; contents controlled by \fB\-p\fP). .TP \fB\-p\fP\ \fIN\fP Print alignment? diff --git a/doc/man/subfuse.1 b/doc/man/subfuse.1 new file mode 100644 index 00000000..762906e6 --- /dev/null +++ b/doc/man/subfuse.1 @@ -0,0 +1,30 @@ +.TH SUBFUSE 1 2008-12-13 NCBI "NCBI Tools User's Manual" +.SH NAME +subfuse \- merge Genbank submissions +.SH SYNOPSIS +.B subfuse +[\|\fB\-\fP\|] +[\|\fB\-o\fP\ \fIfilename\fP\|] +[\|\fB\-p\fP\ \fIpath\fP\|] +[\|\fB\-x\fP\ \fIext\fP\|] +.SH DESCRIPTION +\fBsubfuse\fP is a utility to consolidate multiple Genbank submissions +into a single batch submission. +.SH OPTIONS +A summary of options is included below. +.TP +\fB\-\fP +Print usage message +.TP +\fB\-o\fP\ \fIfilename\fP +Output file (\fBstdout\fP by default) +.TP +\fB\-p\fP\ \fIpath\fP +Path to files +.TP +\fB\-x\fP\ \fIext\fP +Input filename suffix (\fB.sqn\fP by default) +.SH SEE ALSO +.BR sequin (1). +.SH AUTHOR +The National Center for Biotechnology Information. diff --git a/doc/man/taxblast.1 b/doc/man/taxblast.1 new file mode 100644 index 00000000..1fc9109b --- /dev/null +++ b/doc/man/taxblast.1 @@ -0,0 +1,34 @@ +.TH TAXBLAST 1 2008-12-13 NCBI "NCBI Tools User's Manual" +.SH NAME +taxblast \- taxonomy-enabled BLAST +.SH SYNOPSIS +.B taxblast +[\|\fB\-\fP\|] +[\|\fB\-d\fP\ \fIstr\fP\|] +\fB\-i\fP\ \fIfilename\fP +[\|\fB\-o\fP\ \fIfilename\fP\|] +[\|\fB\-p\fP\|] +.SH DESCRIPTION +\fBtaxblast\fP is a variant of BLAST that makes use of taxonomic +information. +.SH OPTIONS +A summary of options is included below. +.TP +\fB\-\fP +Print usage message +.TP +\fB\-d\fP\ \fIstr\fP +Database used to get SeqAnnot ASN.1 (\fBnr\fP by default) +.TP +\fB\-i\fP\ \fIfilename\fP +Input ASN.1 File (SeqAnnot) +.TP +\fB\-o\fP\ \fIfilename\fP +Output file name (stdout by default) +.TP +\fB\-p\fP +Sequence is DNA +.SH SEE ALSO +.BR BLAST (1). +.SH AUTHOR +The National Center for Biotechnology Information. diff --git a/doc/man/tbl2asn.1 b/doc/man/tbl2asn.1 index e1b984c6..6b8a3dfa 100644 --- a/doc/man/tbl2asn.1 +++ b/doc/man/tbl2asn.1 @@ -1,19 +1,18 @@ -.TH TBL2ASN 1 2007-10-19 NCBI "NCBI Tools User's Manual" +.TH TBL2ASN 1 2009-07-31 NCBI "NCBI Tools User's Manual" .SH NAME tbl2asn \- prepare a GenBank submission using an ASCII feature table .SH SYNOPSIS .B tbl2asn [\|\fB\-\fP\|] -[\|\fB\-B\fP\ \fIstr\fP\|] +[\|\fB\-A\fP\ \fIstr\fP\|] [\|\fB\-C\fP\ \fIstr\fP\|] [\|\fB\-D\fP\ \fIfilename\fP\|] -[\|\fB\-E\fP\ \fIstr\fP\|] -[\|\fB\-F\fP\ \fIstr\fP|] +[\|\fB\-E\fP\|] +[\|\fB\-F\fP\ \fIstr\fP\|] [\|\fB\-G\fP\ \fIstr\fP\|] -[\|\fB\-H\fP\|] +[\|\fB\-H\fP\ \fIstr\fP\|] [\|\fB\-K\fP\|] [\|\fB\-L\fP\|] -[\|\fB\-M\fP\ \fIstr\fP\|] [\|\fB\-O\fP\|] [\|\fB\-P\fP\|] [\|\fB\-Q\fP\|] @@ -21,22 +20,20 @@ tbl2asn \- prepare a GenBank submission using an ASCII feature table [\|\fB\-S\fP\|] [\|\fB\-T\fP\|] [\|\fB\-U\fP\|] +[\|\fB\-V\fP\ \fIstr\fP\|] [\|\fB\-W\fP\|] [\|\fB\-X\fP\ \fIstr\fP\|] [\|\fB\-Y\fP\ \fIfilename\fP\|] +[\|\fB\-Z\fP\ \fIfilename\fP\|] [\|\fB\-a\fP\ \fIstr\fP\|] [\|\fB\-b\fP\|] -[\|\fB\-c\fP\|] -[\|\fB\-d\fP\|] -[\|\fB\-e\fP\|] +[\|\fB\-c\fP\ \fIstr\fP\|] [\|\fB\-f\fP\ \fIfilename\fP\|] [\|\fB\-g\fP\|] [\|\fB\-h\fP\|] [\|\fB\-i\fP\ \fIfilename\fP\|] [\|\fB\-j\fP\ \fIstr\fP\|] -[\|\fB\-k\fP\|] -[\|\fB\-l\fP\|] -[\|\fB\-m\fP\|] +[\|\fB\-k\fP\ \fIstr\fP\|] [\|\fB\-n\fP\ \fIstr\fP\|] [\|\fB\-o\fP\ \fIfilename\fP\|] [\|\fB\-p\fP\ \fIstr\fP\|] @@ -46,7 +43,6 @@ tbl2asn \- prepare a GenBank submission using an ASCII feature table [\|\fB\-t\fP\ \fIfilename\fP\|] [\|\fB\-u\fP\|] [\|\fB\-v\fP\|] -[\|\fB\-w\fP\ \fIN\fP\|] [\|\fB\-x\fP\ \fIstr\fP\|] [\|\fB\-y\fP\ \fIstr\fP\|] [\|\fB\-z\fP\|] @@ -63,8 +59,8 @@ A summary of options is included below. \fB\-\fP Print usage message .TP -\fB\-B\fP\ \fIstr\fP -Alignment Beginning gap characters +\fB\-a\fP\ \fIstr\fP +Accession .TP \fB\-C\fP\ \fIstr\fP Genome Center tag @@ -72,17 +68,29 @@ Genome Center tag \fB\-D\fP\ \fIfilename\fP Descriptors file .TP -\fB\-E\fP\ \fIstr\fP -Alignment End gap characters +\fB\-E\fP +Recurse .TP \fB\-F\fP Feature ID links (\fBo\fP by Overlap, \fBp\fP by Product) .TP \fB\-G\fP\ \fIstr\fP +Alignment Gap Flags (comma separated fields, e.g., \fBp,\-,\-,\-,?,.\fP ) +\fBn\fP Nucleotide or \fBp\fP Protein, +Begin, Middle, End Gap Characters, +Missing Characters, Match Characters Alignment middle Gap characters .TP -\fB\-H\fP -Implicit gaps +\fB\-H\fP\ \fIstr\fP +Hold until publication: +.RS +.PD 0 +.IP y +For one year +.IP \fImm/dd/yyyy\fP +Until the specified date +.PD +.RE .TP \fB\-K\fP Safe Bioseq-set @@ -90,20 +98,17 @@ Safe Bioseq-set \fB\-L\fP Force Local protein_id/transcript_id .TP -\fB\-M\fP\ \fIstr\fP -Alignment Match characters -.TP \fB\-O\fP Allow run-on ORFs .TP \fB\-P\fP -Alignment is Proteins +Remote publication lookup .TP \fB\-Q\fP Special mRNA titles .TP \fB\-R\fP -Remote fetching from ID +Remote sequence record fetching from ID .TP \fB\-S\fP Smart feature annotation @@ -114,29 +119,84 @@ Remote Taxonomy lookup \fB\-U\fP Remove Unnecessary gene xref .TP +\fB\-V\fP\ \fIstr\fP +Verification (combine any of the following letters) +.RS +.PD 0 +.IP v +Validate with Normal Stringency +.IP r +Validate without Country Check +.IP b +Generate GenBank Flatfile +.IP g +Generate Gene Report +.PD +.RE +.TP \fB\-W\fP Log progress .TP \fB\-X\fP\ \fIstr\fP -Alignment missing characters +Extra flags (combine any of the following letters) +.RS +.PD 0 +.IP C +Apply comments in \fB.cmt\fP files to all sequences +.PD +.RE .TP \fB\-Y\fP\ \fIfilename\fP Read a comment string from \fIfilename\fP .TP +\fB\-Z\fP\ \fIfilename\fP +Write a discrepancy report to \fIfilename\fP +.TP \fB\-a\fP\ \fIstr\fP -Accession +File type: +.RS +.PD 0 +.IP a +Any (default) +.IP r20u +Runs of 20+ Ns are gaps, 100 Ns are unknown length +.IP r20k +Runs of 20+ Ns are gaps, 100 Ns are known length +.IP s +FASTA Set (\fBs\fP Batch, \fBs1\fP Pop, \fBs2\fP Phy, \fBs3\fP Mut, +\fBs4\fP Eco) +.IP d +FASTA Delta +.IP di +FASTA Delta with Implicit Gaps +.IP l +FASTA+Gap Alignment +.IP z +FASTA with Gap Lines +.IP e +PHRAP/ACE +.PD +.RE .TP \fB\-b\fP -Generate GenBank file -.TP -\fB\-c\fP -Annotate longest ORF -.TP -\fB\-d\fP -Read FASTAs as Delta +Generate GenBank file (deprecated in favor of \fB-V b\fP) .TP -\fB\-e\fP -Read PHRAP/ACE format +\fB\-c\fP\ \fIstr\fP +Cleanup (combine any of the following letters) +.RS +.PD 0 +.IP d +Correct Collection Dates (assume month first) +.IP D +Correct Collection Dates (assume day first) +.IP b +Append note to coding regions that overlap other coding regions with +similar product names and do not contain 'ABC' +.IP x +Extend partial ends of features by one or two nucleotides to abut gaps +or sequence ends +.PD +.RE .TP \fB\-f\fP\ \fIfilename\fP Single table file @@ -153,14 +213,20 @@ Single input file \fB\-j\fP\ \fIstr\fP Source qualifiers .TP -\fB\-k\fP -Set conflict on mismatch -.TP -\fB\-l\fP -Read FASTA+Gap Alignment -.TP -\fB\-m\fP -Allow alternative starts +\fB\-k\fP\ \fIstr\fP +CDS flags (combine any of the following letters) +.RS +.PD 0 +.IP c +Annotate Longest ORF +.IP r +Allow Runon ORFs +.IP m +Allow Alternative Starts +.IP k +Set Conflict on Mismatch +.PD +.RE .TP \fB\-n\fP\ \fIstr\fP Organism name @@ -187,36 +253,20 @@ Read template from \fIfilename\fP Convert GenProdSet to NucProtSet .TP \fB\-v\fP -Validate -.TP -\fB\-w\fP \fIN\fP -FASTA set class -.RS -.PD 0 -.IP 0 -unspecified (default) -.IP 1 -population study -.IP 2 -phylogenetic study -.IP 3 -set of mutations -.IP 4 -ecological sample study -.PD -.RE +Validate (deprecated in favor of \fB-V v\fP) .TP \fB\-x\fP\ \fIstr\fP Suffix (default = \fB.fsa\fP) .TP \fB\-y\fP\ \fIstr\fP -Comment .TP \fB\-z\fP -Read FASTAs with gap lines +Clean up log file +Comment .SH AUTHOR The National Center for Biotechnology Information. .SH SEE ALSO +.ad l .BR Psequin (1), .BR sbtedit (1), tbl2asn.txt, diff --git a/doc/man/trna2sap.1 b/doc/man/trna2sap.1 index 752cc64f..7e3d90f6 100644 --- a/doc/man/trna2sap.1 +++ b/doc/man/trna2sap.1 @@ -68,7 +68,7 @@ Annotation Title (normally \(lqtRNAscan-SE\(rq). Ignore Undetermined tRNAs .TP \fB\-x\fP\ \fIstr\fP -File selection suffix with \fB-p\fP (\fB.trna\fP by default). +File selection suffix with \fB\-p\fP (\fB.trna\fP by default). .SH AUTHOR The National Center for Biotechnology Information. .SH SEE ALSO diff --git a/make/makenet.unx b/make/makenet.unx index 27f811b4..81ba4197 100644 --- a/make/makenet.unx +++ b/make/makenet.unx @@ -1,6 +1,6 @@ # makefile for network demo programs and network entrez # -# $Id: makenet.unx,v 6.231 2008/12/10 21:23:29 ucko Exp $ +# $Id: makenet.unx,v 6.233 2009/08/05 20:06:36 ucko Exp $ # test, ignore # # Sun with unbundled ANSI compiler [ make CC=acc RAN=ranlib ] @@ -389,7 +389,7 @@ utilities : $(EXEUTIL) vibutilities : $(EXEUTILVIB) -.NO_PARALLEL: copy nocopy Tentrez sequin Psequin sbtedit Ssequin elecpcr asn2fast asn2asn cleanasn cspeedtest sugint Nbatch Nbatch3 Nentrcmd seqget idfetch test_nc bi_socket test_ncbi_dsock debug_server rtestval rasn2ff asn2gb asn2gb_psf asn2fsa asn2fsa_psf tbl2asn tbl2asn_psf raw2delt aceread_tst asn2all gene2xml asnval asnval_psf asndisc asndisc_psf demo_aceread_tst asnmacro asnstrip flint gbseqget insdseqget trna2sap trna2tbl testent2 entrez2 spidey dotmatrix ingenue condense bl2seq +.NO_PARALLEL: copy nocopy Tentrez sequin Psequin sbtedit Ssequin elecpcr asn2fast asn2asn cleanasn cspeedtest sugint Nbatch Nbatch3 Nentrcmd seqget idfetch test_nc bi_socket test_ncbi_dsock debug_server rtestval rasn2ff asn2gb asn2gb_psf asn2fsa asn2fsa_psf tbl2asn tbl2asn_psf raw2delt aceread_tst asn2all gene2xml asnval asnval_psf asndisc asndisc_psf demo_aceread_tst asnmacro asnstrip flint gbseqget insdseqget trna2sap trna2tbl testent2 entrez2 spidey dotmatrix ingenue condense bl2seq src_chk src_chk_psf .WAIT: echo Waiting...go @@ -1468,4 +1468,9 @@ bl2bag.cgi : bl2bag.c src_chk : src_chk.c $(CC) -o src_chk $(LDFLAGS) src_chk.c $(LIB2) $(LIB1) $(OTHERLIBS) +src_chk_psf : src_chk.c + $(CC) -DINTERNAL_NCBI_SRC_CHK -g -o src_chk_psf $(LDFLAGS) src_chk.c \ + $(LIB_PS) $(LIB23) $(LIBCOMPADJ) $(LIB2) $(LIB1) \ + $(NCBI_SYBLIBS_CT) $(OTHERLIBS) + ## diff --git a/make/xCode/ncbictoolkit/ncbictoolkit.xcodeproj/project.pbxproj b/make/xCode/ncbictoolkit/ncbictoolkit.xcodeproj/project.pbxproj index 510fab6e..efa29733 100644 --- a/make/xCode/ncbictoolkit/ncbictoolkit.xcodeproj/project.pbxproj +++ b/make/xCode/ncbictoolkit/ncbictoolkit.xcodeproj/project.pbxproj @@ -753,8 +753,6 @@ 3734FD650FF2AF7D004C8F4B /* ncbi_host_infop.h in Headers */ = {isa = PBXBuildFile; fileRef = 3734F9490FF2AF7C004C8F4B /* ncbi_host_infop.h */; settings = {ATTRIBUTES = (Public, ); }; }; 3734FD670FF2AF7D004C8F4B /* ncbi_http_connector.h in Headers */ = {isa = PBXBuildFile; fileRef = 3734F94B0FF2AF7C004C8F4B /* ncbi_http_connector.h */; settings = {ATTRIBUTES = (Public, ); }; }; 3734FD690FF2AF7D004C8F4B /* ncbi_lb.h in Headers */ = {isa = PBXBuildFile; fileRef = 3734F94D0FF2AF7C004C8F4B /* ncbi_lb.h */; settings = {ATTRIBUTES = (Public, ); }; }; - 3734FD6A0FF2AF7D004C8F4B /* ncbi_lbsm.h in Headers */ = {isa = PBXBuildFile; fileRef = 3734F94E0FF2AF7C004C8F4B /* ncbi_lbsm.h */; settings = {ATTRIBUTES = (Public, ); }; }; - 3734FD6B0FF2AF7D004C8F4B /* ncbi_lbsm_ipc.h in Headers */ = {isa = PBXBuildFile; fileRef = 3734F94F0FF2AF7C004C8F4B /* ncbi_lbsm_ipc.h */; settings = {ATTRIBUTES = (Public, ); }; }; 3734FD6C0FF2AF7D004C8F4B /* ncbi_lbsmd.h in Headers */ = {isa = PBXBuildFile; fileRef = 3734F9500FF2AF7C004C8F4B /* ncbi_lbsmd.h */; settings = {ATTRIBUTES = (Public, ); }; }; 3734FD6F0FF2AF7D004C8F4B /* ncbi_local.h in Headers */ = {isa = PBXBuildFile; fileRef = 3734F9530FF2AF7C004C8F4B /* ncbi_local.h */; settings = {ATTRIBUTES = (Public, ); }; }; 3734FD710FF2AF7D004C8F4B /* ncbi_memory_connector.h in Headers */ = {isa = PBXBuildFile; fileRef = 3734F9550FF2AF7C004C8F4B /* ncbi_memory_connector.h */; settings = {ATTRIBUTES = (Public, ); }; }; @@ -1055,8 +1053,6 @@ 379747C30FF3C77600138501 /* ncbi_host_infop.h in CopyFiles */ = {isa = PBXBuildFile; fileRef = 3734F9490FF2AF7C004C8F4B /* ncbi_host_infop.h */; }; 379747C40FF3C77600138501 /* ncbi_http_connector.h in CopyFiles */ = {isa = PBXBuildFile; fileRef = 3734F94B0FF2AF7C004C8F4B /* ncbi_http_connector.h */; }; 379747C50FF3C77600138501 /* ncbi_lb.h in CopyFiles */ = {isa = PBXBuildFile; fileRef = 3734F94D0FF2AF7C004C8F4B /* ncbi_lb.h */; }; - 379747C60FF3C77600138501 /* ncbi_lbsm.h in CopyFiles */ = {isa = PBXBuildFile; fileRef = 3734F94E0FF2AF7C004C8F4B /* ncbi_lbsm.h */; }; - 379747C70FF3C77600138501 /* ncbi_lbsm_ipc.h in CopyFiles */ = {isa = PBXBuildFile; fileRef = 3734F94F0FF2AF7C004C8F4B /* ncbi_lbsm_ipc.h */; }; 379747C80FF3C77600138501 /* ncbi_lbsmd.h in CopyFiles */ = {isa = PBXBuildFile; fileRef = 3734F9500FF2AF7C004C8F4B /* ncbi_lbsmd.h */; }; 379747C90FF3C77600138501 /* ncbi_local.h in CopyFiles */ = {isa = PBXBuildFile; fileRef = 3734F9530FF2AF7C004C8F4B /* ncbi_local.h */; }; 379747CA0FF3C77600138501 /* ncbi_memory_connector.h in CopyFiles */ = {isa = PBXBuildFile; fileRef = 3734F9550FF2AF7C004C8F4B /* ncbi_memory_connector.h */; }; @@ -1199,8 +1195,6 @@ 379747C30FF3C77600138501 /* ncbi_host_infop.h in CopyFiles */, 379747C40FF3C77600138501 /* ncbi_http_connector.h in CopyFiles */, 379747C50FF3C77600138501 /* ncbi_lb.h in CopyFiles */, - 379747C60FF3C77600138501 /* ncbi_lbsm.h in CopyFiles */, - 379747C70FF3C77600138501 /* ncbi_lbsm_ipc.h in CopyFiles */, 379747C80FF3C77600138501 /* ncbi_lbsmd.h in CopyFiles */, 379747C90FF3C77600138501 /* ncbi_local.h in CopyFiles */, 379747CA0FF3C77600138501 /* ncbi_memory_connector.h in CopyFiles */, @@ -2112,8 +2106,6 @@ 3734F9490FF2AF7C004C8F4B /* ncbi_host_infop.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = ncbi_host_infop.h; path = ../../../connect/ncbi_host_infop.h; sourceTree = SOURCE_ROOT; }; 3734F94B0FF2AF7C004C8F4B /* ncbi_http_connector.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = ncbi_http_connector.h; path = ../../../connect/ncbi_http_connector.h; sourceTree = SOURCE_ROOT; }; 3734F94D0FF2AF7C004C8F4B /* ncbi_lb.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = ncbi_lb.h; path = ../../../connect/ncbi_lb.h; sourceTree = SOURCE_ROOT; }; - 3734F94E0FF2AF7C004C8F4B /* ncbi_lbsm.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = ncbi_lbsm.h; path = ../../../connect/ncbi_lbsm.h; sourceTree = SOURCE_ROOT; }; - 3734F94F0FF2AF7C004C8F4B /* ncbi_lbsm_ipc.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = ncbi_lbsm_ipc.h; path = ../../../connect/ncbi_lbsm_ipc.h; sourceTree = SOURCE_ROOT; }; 3734F9500FF2AF7C004C8F4B /* ncbi_lbsmd.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = ncbi_lbsmd.h; path = ../../../connect/ncbi_lbsmd.h; sourceTree = SOURCE_ROOT; }; 3734F9530FF2AF7C004C8F4B /* ncbi_local.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = ncbi_local.h; path = ../../../connect/ncbi_local.h; sourceTree = SOURCE_ROOT; }; 3734F9550FF2AF7C004C8F4B /* ncbi_memory_connector.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = ncbi_memory_connector.h; path = ../../../connect/ncbi_memory_connector.h; sourceTree = SOURCE_ROOT; }; @@ -3663,8 +3655,6 @@ 3734F9490FF2AF7C004C8F4B /* ncbi_host_infop.h */, 3734F94B0FF2AF7C004C8F4B /* ncbi_http_connector.h */, 3734F94D0FF2AF7C004C8F4B /* ncbi_lb.h */, - 3734F94E0FF2AF7C004C8F4B /* ncbi_lbsm.h */, - 3734F94F0FF2AF7C004C8F4B /* ncbi_lbsm_ipc.h */, 3734F9500FF2AF7C004C8F4B /* ncbi_lbsmd.h */, 3734F9530FF2AF7C004C8F4B /* ncbi_local.h */, 3734F9550FF2AF7C004C8F4B /* ncbi_memory_connector.h */, @@ -4413,8 +4403,6 @@ 3734FD650FF2AF7D004C8F4B /* ncbi_host_infop.h in Headers */, 3734FD670FF2AF7D004C8F4B /* ncbi_http_connector.h in Headers */, 3734FD690FF2AF7D004C8F4B /* ncbi_lb.h in Headers */, - 3734FD6A0FF2AF7D004C8F4B /* ncbi_lbsm.h in Headers */, - 3734FD6B0FF2AF7D004C8F4B /* ncbi_lbsm_ipc.h in Headers */, 3734FD6C0FF2AF7D004C8F4B /* ncbi_lbsmd.h in Headers */, 3734FD6F0FF2AF7D004C8F4B /* ncbi_local.h in Headers */, 3734FD710FF2AF7D004C8F4B /* ncbi_memory_connector.h in Headers */, diff --git a/sequin/sequin.h b/sequin/sequin.h index 8da7be9e..2b757fb2 100644 --- a/sequin/sequin.h +++ b/sequin/sequin.h @@ -29,7 +29,7 @@ * * Version Creation Date: 1/22/95 * -* $Revision: 6.576 $ +* $Revision: 6.579 $ * * File Description: * @@ -1828,6 +1828,8 @@ ReplaceComplexLocation NLM_EXTERN void CleanupCDD (IteM i); +NLM_EXTERN void ReportNonTSABioseqs (BioseqPtr bsp, Pointer userdata); + #ifdef OS_MSWIN NLM_EXTERN Int4 RunSilent(const char *cmdline); #endif diff --git a/sequin/sequin1.c b/sequin/sequin1.c index 473d065c..36616fdb 100644 --- a/sequin/sequin1.c +++ b/sequin/sequin1.c @@ -29,7 +29,7 @@ * * Version Creation Date: 1/22/95 * -* $Revision: 6.705 $ +* $Revision: 6.709 $ * * File Description: * @@ -131,7 +131,7 @@ static char *time_of_compilation = "now"; #include <Gestalt.h> #endif -#define SEQ_APP_VER "9.50" +#define SEQ_APP_VER "9.55" CharPtr SEQUIN_APPLICATION = SEQ_APP_VER; CharPtr SEQUIN_SERVICES = NULL; @@ -280,7 +280,7 @@ static Boolean dirsubMode = FALSE; static MenU newDescMenu = NULL; static MenU newFeatMenu = NULL; static MenU advTableMenu = NULL; -static MenU sucMenu = NULL; +static IteM sucItem = NULL; static MenU newPubMenu = NULL; static MenU batchApplyMenu = NULL; static MenU batchEditMenu = NULL; @@ -687,51 +687,161 @@ extern Boolean WriteTheEntityID (Uint2 entityID, CharPtr path, Boolean binary) return rsult; } -extern Boolean PropagateFromGenBankBioseqSet (SeqEntryPtr sep, Boolean ask) +static ValNodePtr ExtractGivenSeqDescrUserObject (ValNodePtr PNTR headptr, CharPtr str, CharPtr cls) { - BioseqPtr bsp; - BioseqSetPtr bssp; - Uint1 _class; - SeqEntryPtr seqentry; - ValNodePtr sourcedescr; + Boolean extract_it; + ValNodePtr last = NULL, vnp; + ObjectIdPtr oip; + UserObjectPtr uop; - if (sep != NULL) { - if (sep->choice == 2 && sep->data.ptrvalue != NULL) { - bssp = (BioseqSetPtr) sep->data.ptrvalue; - _class = bssp->_class; - sourcedescr = bssp->descr; - if (sourcedescr == NULL) return FALSE; - if (_class == 7) { - if (ask) { - if (Message (MSG_YN, "Propagate descriptors from top-level set?") == ANS_NO) return FALSE; - } - seqentry = bssp->seq_set; - while (seqentry != NULL) { - if (seqentry->data.ptrvalue != NULL) { - if (seqentry->choice == 1) { - bsp = (BioseqPtr) seqentry->data.ptrvalue; - ValNodeLink (&(bsp->descr), - AsnIoMemCopy ((Pointer) sourcedescr, - (AsnReadFunc) SeqDescrAsnRead, - (AsnWriteFunc) SeqDescrAsnWrite)); - } else if (seqentry->choice == 2) { - bssp = (BioseqSetPtr) seqentry->data.ptrvalue; - ValNodeLink (&(bssp->descr), - AsnIoMemCopy ((Pointer) sourcedescr, - (AsnReadFunc) SeqDescrAsnRead, - (AsnWriteFunc) SeqDescrAsnWrite)); + if (headptr == NULL) return NULL; + vnp = *headptr; + + while (vnp != NULL) { + extract_it = FALSE; + if (vnp->choice == Seq_descr_user) { + uop = (UserObjectPtr) vnp->data.ptrvalue; + if (uop != NULL) { + if (StringDoesHaveText (cls)) { + if (StringICmp (uop->_class, cls) == 0) { + extract_it = TRUE; + } + } + if (StringDoesHaveText (str)) { + oip = uop->type; + if (oip != NULL) { + if (StringICmp (oip->str, str) == 0) { + extract_it = TRUE; } } - seqentry = seqentry->next; } - bssp = (BioseqSetPtr) sep->data.ptrvalue; - bssp->descr = SeqDescrFree (bssp->descr); - return TRUE; } } + if (extract_it) { + if (last == NULL) { + *headptr = vnp->next; + } else { + last->next = vnp->next; + } + vnp->next = NULL; + return vnp; + } else { + last = vnp; + vnp = vnp->next; + } } - return FALSE; + + return NULL; +} + +typedef struct propgenbankdata { + Boolean ask; + Boolean asked; + Boolean bail; + Boolean changed; +} PropGenbankData, PNTR PropGenBankPtr; + +static void DoPropagateFromGenBankBioseqSet ( + BioseqSetPtr seqset, + Pointer userdata +) + +{ + BioseqPtr bsp; + BioseqSetPtr bssp; + PropGenBankPtr pgp; + SeqEntryPtr seqentry; + ValNodePtr smartuserobj; + ValNodePtr sourcedescr; + UserObjectPtr uop; + + if (seqset == NULL) return; + if (seqset->_class != BioseqseqSet_class_genbank) return; + pgp = (PropGenBankPtr) userdata; + if (pgp == NULL) return; + + seqentry = seqset->seq_set; + sourcedescr = seqset->descr; + if (sourcedescr == NULL) return; + + /* if only descriptor is tracking user object, skip */ + if (sourcedescr->next == NULL && sourcedescr->choice == Seq_descr_user) { + uop = (UserObjectPtr) sourcedescr->data.ptrvalue; + if (uop != NULL && StringICmp (uop->_class, "SMART_V1.0") == 0) return; + } + + /* optionally ask if propagation is desired */ + if (pgp->ask) { + if (! pgp->asked) { + if (Message (MSG_YN, "Propagate descriptors from top-level set?") == ANS_NO) { + pgp->bail = TRUE; + } + pgp->asked = TRUE; + } + } + if (pgp->bail) return; + + /* disconnect descriptors from parent bssp */ + seqset->descr = NULL; + + /* extract tracking user object */ + smartuserobj = ExtractGivenSeqDescrUserObject (&sourcedescr, NULL, "SMART_V1.0"); + + while (seqentry != NULL) { + if (seqentry->data.ptrvalue != NULL) { + if (seqentry->choice == 1) { + bsp = (BioseqPtr) seqentry->data.ptrvalue; + ValNodeLink (&(bsp->descr), + AsnIoMemCopy ((Pointer) sourcedescr, + (AsnReadFunc) SeqDescrAsnRead, + (AsnWriteFunc) SeqDescrAsnWrite)); + } else if (seqentry->choice == 2) { + bssp = (BioseqSetPtr) seqentry->data.ptrvalue; + ValNodeLink (&(bssp->descr), + AsnIoMemCopy ((Pointer) sourcedescr, + (AsnReadFunc) SeqDescrAsnRead, + (AsnWriteFunc) SeqDescrAsnWrite)); + } + pgp->changed = TRUE; + } + seqentry = seqentry->next; + } + + /* free extracted original descriptors now that copies are propagated */ + SeqDescrFree (sourcedescr); + + /* restore tracking user object */ + if (smartuserobj != NULL) { + ValNodeLink (&(seqset->descr), smartuserobj); + } + + /* recurse */ + VisitSetsInSet (seqset, userdata, DoPropagateFromGenBankBioseqSet); +} + +extern Boolean PropagateFromGenBankBioseqSet (SeqEntryPtr sep, Boolean ask) + +{ + BioseqSetPtr bssp; + PropGenbankData pdp; + + if (sep == NULL) return FALSE; + if (! IS_Bioseq_set (sep)) return FALSE; + + bssp = (BioseqSetPtr) sep->data.ptrvalue; + if (bssp == NULL) return FALSE; + if (bssp->_class != BioseqseqSet_class_genbank) return FALSE; + + MemSet ((Pointer) &pdp, 0, sizeof (PropGenbankData)); + pdp.ask = ask; + pdp.asked = FALSE; + pdp.bail = FALSE; + pdp.changed = FALSE; + + DoPropagateFromGenBankBioseqSet (bssp, (Pointer) &pdp); + + return pdp.changed; } static void ForcePropagate (IteM i) @@ -5105,7 +5215,7 @@ static void BioseqViewFormActivated (WindoW w) (HANDLE) newDescMenu, (HANDLE) newFeatMenu, (HANDLE) advTableMenu, - (HANDLE) sucMenu, + (HANDLE) sucItem, (HANDLE) newPubMenu, (HANDLE) batchApplyMenu, (HANDLE) batchEditMenu, @@ -5386,7 +5496,7 @@ static void MacDeactProc (WindoW w) (HANDLE) newDescMenu, (HANDLE) newFeatMenu, (HANDLE) advTableMenu, - (HANDLE) sucMenu, + (HANDLE) sucItem, (HANDLE) newPubMenu, (HANDLE) batchApplyMenu, (HANDLE) batchEditMenu, @@ -9437,6 +9547,10 @@ static void SetupMacMenus (void) /* submitItem = CommandItem (m, "Submit to NCBI", SubmitToNCBI); */ + /* + SeparatorItem (m); + CommandItem (m, "Propagate Top Descriptors", ForcePropagate); + */ SeparatorItem (m); printItem = FormCommandItem (m, "Print", NULL, VIB_MSG_PRINT); SeparatorItem (m); @@ -9679,7 +9793,7 @@ static void SetupMacMenus (void) CommandItem (newFeatMenu, "Generate Definition Line", AutoDef); advTableMenu = SubMenu (newFeatMenu, "Advanced Table Readers"); CommandItem (advTableMenu, "Load Structured Comments from Table", SubmitterCreateStructuredComments); - sucMenu = CommandItem (newFeatMenu, "Sort Unique Count By Group", SUCSubmitterProc); + sucItem = CommandItem (newFeatMenu, "Sort Unique Count By Group", SUCSubmitterProc); } #endif diff --git a/sequin/sequin3.c b/sequin/sequin3.c index f3ea4587..d05dc001 100644 --- a/sequin/sequin3.c +++ b/sequin/sequin3.c @@ -29,7 +29,7 @@ * * Version Creation Date: 1/22/95 * -* $Revision: 6.975 $ +* $Revision: 6.978 $ * * File Description: * @@ -190,6 +190,22 @@ static ValNodePtr ApplyTranscriptomeIdListWithProgress (ValNodePtr ids_list, Loc } +NLM_EXTERN void ReportNonTSABioseqs (BioseqPtr bsp, Pointer userdata) +{ + LogInfoPtr lip; + Char id_str[255]; + + if (bsp == NULL || (lip = (LogInfoPtr) userdata) == NULL || lip->fp == NULL || ISA_aa (bsp->mol)) { + return; + } + if (bsp->hist == NULL || bsp->hist->assembly == NULL) { + SeqIdWrite (SeqIdFindBest (bsp->id, SEQID_GENBANK), id_str, PRINTID_REPORT, sizeof (id_str) - 1); + fprintf (lip->fp, "%s has no TSA table\n", id_str); + lip->data_in_log = TRUE; + } +} + + static void AddTSATableToBioseq (IteM i) { BaseFormPtr bfp; @@ -256,16 +272,18 @@ static void AddTSATableToBioseq (IteM i) ValNodeLink (&coverage_report, err_list); err_list = coverage_report; + lip = OpenLog ("TSA Table Problems"); if (err_list != NULL) { - lip = OpenLog ("TSA Table Problems"); for (vnp = err_list; vnp != NULL; vnp = vnp->next) { fprintf (lip->fp, "%s\n", vnp->data.ptrvalue); } lip->data_in_log = TRUE; - CloseLog (lip); - lip = FreeLog (lip); err_list = ValNodeFreeData (err_list); } + VisitBioseqsInSep (sep, lip, ReportNonTSABioseqs); + CloseLog (lip); + lip = FreeLog (lip); + ObjMgrSetDirtyFlag (bfp->input_entityID, TRUE); ObjMgrSendMsg (OM_MSG_UPDATE, bfp->input_entityID, 0, 0); } @@ -304,16 +322,18 @@ static void RefreshTSATables (IteM i) err_list = coverage_report; + lip = OpenLog ("TSA Table Problems"); if (err_list != NULL) { - lip = OpenLog ("TSA Table Problems"); for (vnp = err_list; vnp != NULL; vnp = vnp->next) { fprintf (lip->fp, "%s\n", vnp->data.ptrvalue); } lip->data_in_log = TRUE; - CloseLog (lip); - lip = FreeLog (lip); err_list = ValNodeFreeData (err_list); } + VisitBioseqsInSep (sep, lip, ReportNonTSABioseqs); + CloseLog (lip); + lip = FreeLog (lip); + ObjMgrSetDirtyFlag (bfp->input_entityID, TRUE); ObjMgrSendMsg (OM_MSG_UPDATE, bfp->input_entityID, 0, 0); } @@ -22383,7 +22403,7 @@ static void MakeSpecialEditMenu (MenU m, BaseFormPtr bfp) SeparatorItem (s); x = SubMenu (s, "Extend Partial Features"); - i = CommandItem (x, "All", ExtendPartialFeatures); + i = CommandItem (x, "All to Ends", ExtendPartialFeatures); SetObjectExtra (i, bfp, NULL); i = CommandItem (x, "With Constraint", ExtendPartialFeaturesWithConstraint); SetObjectExtra (i, bfp, NULL); diff --git a/sequin/sequin5.c b/sequin/sequin5.c index 856cfa02..28490f98 100644 --- a/sequin/sequin5.c +++ b/sequin/sequin5.c @@ -29,7 +29,7 @@ * * Version Creation Date: 8/26/97 * -* $Revision: 6.673 $ +* $Revision: 6.675 $ * * File Description: * @@ -2631,6 +2631,13 @@ static Boolean ConvertCDSToMatPeptide (SeqFeatPtr sfp, Uint2 featdef_to, Pointer return AutoConvertCDSToMiscFeat (sfp, extradata == NULL ? TRUE : !(*((BoolPtr) extradata))); } + +static Boolean ConvertMiscFeatureToCDSFunction (SeqFeatPtr sfp, Uint2 featdef_to, Pointer extradata) +{ + return ConvertMiscFeatToCodingRegion (sfp); +} + + extern EnumFieldAssoc enum_bond_alist []; extern EnumFieldAssoc enum_site_alist []; @@ -17049,6 +17056,7 @@ static Boolean ConvertImpToImp (SeqFeatPtr, Uint2 featdef_to, Pointer extradata) static Boolean ConvertRNAToRNA (SeqFeatPtr, Uint2 featdef_to, Pointer extradata); static Boolean ConvertProtToProt (SeqFeatPtr, Uint2 featdef_to, Pointer extradata); static Boolean ConvertCDSToMatPeptide (SeqFeatPtr sfp, Uint2 featdef_to, Pointer extradata); +static Boolean ConvertMiscFeatureToCDSFunction (SeqFeatPtr sfp, Uint2 featdef_to, Pointer extradata); static ConvertFeatureProcsData ConvertFeaturesTable[] = { { SEQFEAT_CDREGION, FEATDEF_CDS, SEQFEAT_RNA, FEATDEF_ANY, @@ -17084,6 +17092,9 @@ static ConvertFeatureProcsData ConvertFeaturesTable[] = { "If protein feature has name, this will be saved as /product qualifier on new feature.\nIf protein feature does not have name but does have description, this will be saved as /product qualifier on new feature.\n" "EC_number values from the protein feature will be saved as /EC_number qualifiers on the new feature.\nActivity values will be saved as /function qualifiers on the new feature.\n" "Db_xref values from the protein feature will be saved as /db_xref qualifers on the new feature." }, + { SEQFEAT_IMP, FEATDEF_misc_feature, SEQFEAT_CDREGION, FEATDEF_CDS, + NULL, NULL, NULL, ConvertMiscFeatureToCDSFunction, NULL, + "Use misc_feature comment for coding region product name." }, { SEQFEAT_IMP, FEATDEF_ANY, SEQFEAT_RNA, FEATDEF_misc_RNA, NULL, NULL, NULL, ConvertImpToSpecialRNA, NULL, "Creates a misc_RNA. Import feature key is discarded." }, @@ -18546,6 +18557,7 @@ static Boolean FeatureRemoveOrConvertAction (Pointer userdata) OrigFeatPtr ofp; SeqFeatPtr sfp; Boolean rval = TRUE; + SeqEntryPtr create_sep; if (userdata == NULL) return FALSE; @@ -18637,7 +18649,12 @@ static Boolean FeatureRemoveOrConvertAction (Pointer userdata) { continue; } - sfp = CreateNewFeature (ofp->sep, NULL, ofp->sfp->data.choice, ofp->sfp); + if (IS_Bioseq_set (ofp->sep)) { + create_sep = FindNucSeqEntry (ofp->sep); + } else { + create_sep = ofp->sep; + } + sfp = CreateNewFeature (create_sep, NULL, ofp->sfp->data.choice, ofp->sfp); } } mrfp->feat_list = ValNodeFreeData (mrfp->feat_list); @@ -29439,35 +29456,6 @@ static void GetCombinedCDSLocationCallback (SeqFeatPtr sfp, Pointer userdata) } -static SeqFeatPtr GetProtFeature (BioseqPtr protbsp) -{ - SeqMgrFeatContext fcontext; - SeqAnnotPtr sap; - SeqFeatPtr prot_sfp; - ProtRefPtr prp; - - if (protbsp == NULL) return NULL; - - prot_sfp = SeqMgrGetNextFeature (protbsp, NULL, 0, FEATDEF_PROT, &fcontext); - if (prot_sfp == NULL) { - sap = protbsp->annot; - while (sap != NULL && prot_sfp == NULL) { - if (sap->type == 1) { - prot_sfp = sap->data; - while (prot_sfp != NULL - && (prot_sfp->data.choice != SEQFEAT_PROT - || (prp = prot_sfp->data.value.ptrvalue) == NULL - || prp->processed != 0)) { - prot_sfp = prot_sfp->next; - } - } - sap = sap->next; - } - } - return prot_sfp; -} - - static void ApplyProductName (CombineCDSPtr ccp, SeqFeatPtr new_cds) { BioseqPtr first_prot_bsp, new_prot_bsp; diff --git a/sequin/sequin7.c b/sequin/sequin7.c index 2c14f1da..b3fe1881 100644 --- a/sequin/sequin7.c +++ b/sequin/sequin7.c @@ -29,7 +29,7 @@ * * Version Creation Date: 1/3/98 * -* $Revision: 6.359 $ +* $Revision: 6.360 $ * * File Description: * @@ -12720,13 +12720,28 @@ static void BarcodeReportPolymorphism (ButtoN b) } +static void ApplyBarcodeDbxrefsBtn (ButtoN b) +{ + BarcodeToolPtr drfp; + + drfp = (BarcodeToolPtr) GetObjectExtra (b); + if (drfp == NULL) return; + + VisitBioseqsInSep (GetTopSeqEntryForEntityID (drfp->input_entityID), NULL, ApplyBarcodeDbxrefsToBioseq); + + ObjMgrSetDirtyFlag (drfp->input_entityID, TRUE); + ObjMgrSendMsg (OM_MSG_UPDATE, drfp->input_entityID, 0, 0); + Update(); +} + + extern void BarcodeTestTool (IteM i) { BaseFormPtr bfp; BarcodeToolPtr drfp; SeqEntryPtr sep; GrouP h; - GrouP c, c3; + GrouP c, c3, c4, c5; ButtoN b; WindoW w; OMUserDataPtr omudp; @@ -12782,37 +12797,43 @@ extern void BarcodeTestTool (IteM i) drfp->pass_fail_summary = StaticPrompt (h, "0 Pass, 0 Fail", 20 * stdCharWidth, dialogTextHeight, programFont, 'l'); RefreshBarcodeList(drfp); - c3 = HiddenGroup (h, 10, 0, NULL); - SetGroupSpacing (c3, 10, 10); - b = PushButton (c3, "Compliance Report", BarcodeTestComplianceReport); + c4 = HiddenGroup (h, 5, 0, NULL); + SetGroupSpacing (c4, 10, 10); + + b = PushButton (c4, "Compliance Report", BarcodeTestComplianceReport); SetObjectExtra (b, drfp, NULL); - b = PushButton (c3, "Failure Report", BarcodeReportButton); + b = PushButton (c4, "Failure Report", BarcodeReportButton); SetObjectExtra (b, drfp, NULL); - b = PushButton (c3, "Comprehensive Report", BarcodeComprehensiveReportButton); + b = PushButton (c4, "Comprehensive Report", BarcodeComprehensiveReportButton); SetObjectExtra (b, drfp, NULL); - - b = PushButton (c3, "Report Polymorphism", BarcodeReportPolymorphism); + b = PushButton (c4, "Report Polymorphism", BarcodeReportPolymorphism); SetObjectExtra (b, drfp, NULL); - b = PushButton (c3, "Replace Tags", BarcodeTestImportTagTable); + c5 = HiddenGroup (h, 5, 0, NULL); + SetGroupSpacing (c5, 10, 10); + b = PushButton (c5, "Apply Dbxrefs", ApplyBarcodeDbxrefsBtn); + SetObjectExtra (b, drfp, NULL); + b = PushButton (c5, "Replace Tags", BarcodeTestImportTagTable); SetObjectExtra (b, drfp, NULL); - b = PushButton (c3, "Add New Tags", BarcodeTestApplyTagTable); + b = PushButton (c5, "Add New Tags", BarcodeTestApplyTagTable); SetObjectExtra (b, drfp, NULL); - b = PushButton (c3, "Make Tag Table", BarcodeTestMakeTagTable); + b = PushButton (c5, "Make Tag Table", BarcodeTestMakeTagTable); + SetObjectExtra (b, drfp, NULL); + b = PushButton (c5, "Refresh List", BarcodeRefreshButton); SetObjectExtra (b, drfp, NULL); - b = PushButton (c3, "Refresh List", BarcodeRefreshButton); + c3 = HiddenGroup (h, 4, 0, NULL); + SetGroupSpacing (c3, 10, 10); + b = PushButton (c3, "Remove BARCODE Keyword from Selected", RemoveSelectedKeywordsBtn); SetObjectExtra (b, drfp, NULL); - b = PushButton (c3, "Remove BARCODE Keyword from Selected Sequences", RemoveSelectedKeywordsBtn); + b = PushButton (c3, "Add BARCODE Keyword to BARCODE Tech", AddBarcodeKeywordBtn); SetObjectExtra (b, drfp, NULL); - b = PushButton (c3, "Add BARCODE Keyword to BARCODE Tech Sequences", AddBarcodeKeywordBtn); + b = PushButton (c3, "Remove BARCODE Tech from Selected", RemoveSelectedTechBtn); SetObjectExtra (b, drfp, NULL); - c = HiddenGroup (h, 5, 0, NULL); + c = HiddenGroup (h, 4, 0, NULL); SetGroupSpacing (c, 10, 10); - b = PushButton (c, "Remove BARCODE Tech from Selected Sequences", RemoveSelectedTechBtn); - SetObjectExtra (b, drfp, NULL); drfp->undo = PushButton (c, "Undo", BarcodeUndoButton); SetObjectExtra (drfp->undo, drfp, NULL); @@ -12825,7 +12846,7 @@ extern void BarcodeTestTool (IteM i) PushButton (c, "Dismiss", StdCancelButtonProc); - AlignObjects (ALIGN_CENTER, (HANDLE) drfp->clickable_list, (HANDLE) drfp->pass_fail_summary, (HANDLE) c3, (HANDLE) c, NULL); + AlignObjects (ALIGN_CENTER, (HANDLE) drfp->clickable_list, (HANDLE) drfp->pass_fail_summary, (HANDLE) c4, (HANDLE) c5, (HANDLE) c3, (HANDLE) c, NULL); RealizeWindow (w); diff --git a/sequin/sequin8.c b/sequin/sequin8.c index 2e9c7b03..4b3d4018 100644 --- a/sequin/sequin8.c +++ b/sequin/sequin8.c @@ -29,7 +29,7 @@ * * Version Creation Date: 2/3/98 * -* $Revision: 6.537 $ +* $Revision: 6.540 $ * * File Description: * @@ -403,7 +403,8 @@ extern void ExtendSeqLocToPosition (SeqLocPtr slp, Boolean end5, Int4 pos) } } -static void ExtendOnePartialFeatureEx (SeqFeatPtr sfp, Boolean extend5, Boolean extend3) + +static void ExtendOnePartialFeatureExEx (SeqFeatPtr sfp, Boolean extend5, Boolean extend3, Boolean stop_at_gap) { BioseqPtr bsp; Boolean partial3, partial5; @@ -416,7 +417,11 @@ static void ExtendOnePartialFeatureEx (SeqFeatPtr sfp, Boolean extend5, Boolean CheckSeqLocForPartial (sfp->location, &partial5, &partial3); if (partial5 && extend5) { - start_diff = ExtendSeqLocToEnd (sfp->location, bsp, TRUE); + if (stop_at_gap) { + start_diff = ExtendSeqLocToEndOrGap (sfp->location, bsp, TRUE); + } else { + start_diff = ExtendSeqLocToEnd (sfp->location, bsp, TRUE); + } if (start_diff > 0 && sfp->data.choice == SEQFEAT_CDREGION) { crp = (CdRegionPtr) sfp->data.value.ptrvalue; if (crp != NULL) { @@ -429,10 +434,27 @@ static void ExtendOnePartialFeatureEx (SeqFeatPtr sfp, Boolean extend5, Boolean } if (partial3 && extend3) { - ExtendSeqLocToEnd (sfp->location, bsp, FALSE); + if (stop_at_gap) { + ExtendSeqLocToEndOrGap (sfp->location, bsp, FALSE); + } else { + ExtendSeqLocToEnd (sfp->location, bsp, FALSE); + } } } + +static void ExtendOnePartialFeatureEx (SeqFeatPtr sfp, Boolean extend5, Boolean extend3) +{ + ExtendOnePartialFeatureExEx (sfp, extend5, extend3, FALSE); +} + + +static void ExtendOnePartialFeatureToEndOrGap (SeqFeatPtr sfp, Pointer userdata) +{ + ExtendOnePartialFeatureExEx (sfp, TRUE, TRUE, TRUE); +} + + static void ExtendOnePartialFeature (SeqFeatPtr sfp, Pointer userdata) { ExtendOnePartialFeatureEx (sfp, TRUE, TRUE); @@ -491,6 +513,7 @@ typedef struct extendpartialfeaturesform { DialoG feature_type; ButtoN extend5; ButtoN extend3; + ButtoN stop_at_gaps; DialoG string_constraint; ButtoN leave_dlg_up; } ExtendPartialFeaturesFormData, PNTR ExtendPartialFeaturesFormPtr; @@ -505,7 +528,7 @@ static void DoExtendPartialFeatures (ButtoN b) ValNodePtr vnp; StringConstraintPtr scp; ValNodePtr object_list; - Boolean extend5, extend3; + Boolean extend5, extend3, stop_at_gaps; f = (ExtendPartialFeaturesFormPtr) GetObjectExtra (b); if (f == NULL) return; @@ -538,9 +561,10 @@ static void DoExtendPartialFeatures (ButtoN b) extend5 = GetStatus (f->extend5); extend3 = GetStatus (f->extend3); + stop_at_gaps = GetStatus (f->stop_at_gaps); for (vnp = object_list; vnp != NULL; vnp = vnp->next) { if (vnp->choice == OBJ_SEQFEAT && vnp->data.ptrvalue != NULL) { - ExtendOnePartialFeatureEx (vnp->data.ptrvalue, extend5, extend3); + ExtendOnePartialFeatureExEx (vnp->data.ptrvalue, extend5, extend3, stop_at_gaps); } } object_list = ValNodeFree (object_list); @@ -601,6 +625,9 @@ extern void ExtendPartialFeaturesWithConstraint (IteM i) SetStatus (f->extend5, TRUE); f->extend3 = CheckBox (g, "Extend partial 3'", NULL); SetStatus (f->extend3, TRUE); + + f->stop_at_gaps = CheckBox (h, "Stop at gaps", NULL); + SetStatus (f->stop_at_gaps, TRUE); p2 = StaticPrompt (h, "Optional Constraint", 0, dialogTextHeight, programFont, 'c'); f->string_constraint = StringConstraintDialog (h, "Where feature text", FALSE, NULL, NULL); @@ -614,6 +641,7 @@ extern void ExtendPartialFeaturesWithConstraint (IteM i) AlignObjects (ALIGN_CENTER, (HANDLE) p1, (HANDLE) f->feature_type, (HANDLE) g, + (HANDLE) f->stop_at_gaps, (HANDLE) p2, (HANDLE) f->string_constraint, (HANDLE) c, @@ -12229,6 +12257,7 @@ static void AcceptTSAAssembly (ButtoN b) ValNodePtr err_list, coverage_report, vnp, ids_list, match_errs; SeqAlignPtr salp, salp_next; LogInfoPtr lip; + SeqEntryPtr sep; frm = (TSAAssemblyFormPtr) GetObjectExtra (b); if (frm == NULL) { @@ -12269,16 +12298,18 @@ static void AcceptTSAAssembly (ButtoN b) ValNodeLink (&coverage_report, err_list); err_list = coverage_report; + lip = OpenLog ("TSA Table Problems"); if (err_list != NULL) { - lip = OpenLog ("TSA Table Problems"); for (vnp = err_list; vnp != NULL; vnp = vnp->next) { fprintf (lip->fp, "%s\n", vnp->data.ptrvalue); } lip->data_in_log = TRUE; - CloseLog (lip); - lip = FreeLog (lip); err_list = ValNodeFreeData (err_list); } + sep = GetTopSeqEntryForEntityID (frm->input_entityID); + VisitBioseqsInSep (sep, lip, ReportNonTSABioseqs); + CloseLog (lip); + lip = FreeLog (lip); } ObjMgrSetDirtyFlag (frm->input_entityID, TRUE); |