diff options
author | Aaron M. Ucko <ucko@debian.org> | 2005-04-29 15:34:56 +0000 |
---|---|---|
committer | Aaron M. Ucko <ucko@debian.org> | 2005-04-29 15:34:56 +0000 |
commit | 402b112099aa816a02fd502b7f0261a99fe7126a (patch) | |
tree | 614fef3fedb6a920352586fc76cd1b0c828cffd2 /api/edutil.c | |
parent | e0f3c07fe198b2ecfa46997942fd22a48655373e (diff) |
Load /tmp/.../ncbi-tools6-6.1.20050429 into
ncbi-tools6/branches/upstream/current.
Diffstat (limited to 'api/edutil.c')
-rw-r--r-- | api/edutil.c | 937 |
1 files changed, 773 insertions, 164 deletions
diff --git a/api/edutil.c b/api/edutil.c index 6169301f..525a8bcb 100644 --- a/api/edutil.c +++ b/api/edutil.c @@ -29,7 +29,7 @@ * * Version Creation Date: 2/4/94 * -* $Revision: 6.39 $ +* $Revision: 6.47 $ * * File Description: Sequence editing utilities * @@ -39,6 +39,36 @@ * ------- ---------- ----------------------------------------------------- * * $Log: edutil.c,v $ +* Revision 6.47 2005/04/28 20:10:31 bollin +* added new function AdjustFeaturesForInsertion which is called by BioseqInsert +* and also by a new function in sequin3.c for converting a raw bioseq to a delta +* and inserting gaps +* +* Revision 6.46 2005/04/06 19:33:15 bollin +* made it possible to insert and remove gaps from delta sequences +* +* Revision 6.45 2005/03/18 20:51:10 bollin +* only change frame when CDS location has been changed, change anticodon locations +* and code breaks when locations have just been shifted +* +* Revision 6.44 2005/03/08 21:14:44 bollin +* strand argument in SeqLocCopyRegion is Seq_strand_minus when features +* should be reverse-complemented, does not actually indicate the strand to +* which a feature should be copied +* +* Revision 6.43 2005/02/28 16:53:40 bollin +* corrected Unix compiler warnings +* +* Revision 6.42 2005/02/28 16:08:35 bollin +* added utilities for editing delta sequences +* +* Revision 6.41 2005/01/24 17:00:58 bollin +* only change frames, fix code break locations, and fix anticodon locations +* when feature location is changed in SeqFeatDelete +* +* Revision 6.40 2004/11/17 21:19:18 lavr +* AffectedFeatFree() to return NULL on afp == NULL +* * Revision 6.39 2004/10/08 16:04:16 bollin * added ability to check when an action will remove a feature * @@ -1032,7 +1062,6 @@ NLM_EXTERN Boolean LIBCALL SeqDeleteByLoc (SeqLocPtr slp, Boolean do_feat, Boole Boolean retval = FALSE; Int2 numloc, i = 0, ctr, pick, totloc; SeqLocPtr PNTR locs, PNTR tlocs, PNTR theorder; - SeqIdPtr the_id = NULL; BioseqPtr bsp; Int4 tstart, tstop; @@ -1193,35 +1222,38 @@ NLM_EXTERN Int2 LIBCALL SeqFeatDelete (SeqFeatPtr sfp, SeqIdPtr target, Int4 fro si.from = from; si.to = to; - CheckSeqLocForPartial (sfp->location, &partial5, &partial3); - strand = SeqLocStrand (sfp->location); - bsp = BioseqFindFromSeqLoc (sfp->location); + CheckSeqLocForPartial (sfp->location, &partial5, &partial3); + strand = SeqLocStrand (sfp->location); + bsp = BioseqFindFromSeqLoc (sfp->location); sfp->location = SeqLocDelete(sfp->location, target, from, to, merge, &changed); sfp->product = SeqLocDelete(sfp->product, target, from, to, merge, &changed); if (sfp->location == NULL) return 2; - + switch (sfp->data.choice) { case SEQFEAT_CDREGION: /* cdregion */ crp = (CdRegionPtr)(sfp->data.value.ptrvalue); - /* adjust frame */ - if ((strand == Seq_strand_minus && bsp != NULL && to == bsp->length - 1 && partial5) - || (strand != Seq_strand_minus && from == 0 && partial5)) - { - if (crp->frame == 0) - { - crp->frame = 1; - } - new_frame = crp->frame - ((to - from + 1) % 3); - if (new_frame < 1) + if (changed) + { + /* adjust frame */ + if ((strand == Seq_strand_minus && bsp != NULL && to == bsp->length - 1 && partial5) + || (strand != Seq_strand_minus && from == 0 && partial5)) { - new_frame += 3; - } - crp->frame = new_frame; - } + if (crp->frame == 0) + { + crp->frame = 1; + } + new_frame = crp->frame - ((to - from + 1) % 3); + if (new_frame < 1) + { + new_frame += 3; + } + crp->frame = new_frame; + } + } /* fix code_break locations */ prevcbp = NULL; for (cbp = crp->code_break; cbp != NULL; cbp = nextcbp) @@ -2145,7 +2177,7 @@ static Int2 LIBCALL IndexedSeqFeatsCopy (BioseqPtr newbsp, BioseqPtr oldbsp, Int { Int2 ctr=0; - SeqFeatPtr head=NULL, sfp, last=NULL, newsfp; + SeqFeatPtr sfp, last=NULL, newsfp; SeqInt si; ValNode vn; ValNodePtr region; @@ -2249,7 +2281,7 @@ NLM_EXTERN Int2 LIBCALL SeqFeatsCopy (BioseqPtr newbsp, BioseqPtr oldbsp, Int4 f { Int2 ctr=0; BioseqContextPtr bcp = NULL; - SeqFeatPtr head=NULL, sfp, last=NULL, newsfp; + SeqFeatPtr sfp, last=NULL, newsfp; SeqInt si; ValNode vn; ValNodePtr region; @@ -2359,14 +2391,13 @@ NLM_EXTERN Int2 LIBCALL SeqFeatsCopy (BioseqPtr newbsp, BioseqPtr oldbsp, Int4 f NLM_EXTERN SeqLocPtr LIBCALL SeqLocCopyRegion(SeqIdPtr newid, SeqLocPtr head, BioseqPtr oldbsp, Int4 from, Int4 to, Uint1 strand, BoolPtr split) { - SeqLocPtr newhead = NULL, last=NULL, tmp, slp, prev, next, thead; + SeqLocPtr newhead = NULL, tmp, slp, prev, next, thead; SeqIntPtr sip, sip2; SeqPntPtr spp, spp2; PackSeqPntPtr pspp, pspp2; SeqBondPtr sbp, sbp2; SeqIdPtr sidp, oldids; Int4 numpnt, i, tpos, len, intcnt, othercnt; - Pointer ptr = NULL; Boolean dropped_one; IntFuzzPtr ifp; ValNode vn; @@ -2634,10 +2665,6 @@ NLM_EXTERN SeqLocPtr LIBCALL SeqLocCopyRegion(SeqIdPtr newid, SeqLocPtr head, Bi sip2->if_from = sip2->if_to; sip2->if_to = ifp; } - else if (sip2->strand == Seq_strand_minus) - { - sip2->strand = strand; - } newhead = ValNodeNew(NULL); newhead->choice = SEQLOC_INT; @@ -2823,6 +2850,134 @@ NLM_EXTERN void LIBCALL IntFuzzClip(IntFuzzPtr ifp, Int4 from, Int4 to, Uint1 st return; } +extern void +AdjustFeaturesForInsertion +(BioseqPtr tobsp, + SeqIdPtr to_id, + Int4 pos, + Int4 len, + Boolean do_split) +{ + Uint2 entityID; + SeqFeatPtr sfp; + CdRegionPtr crp; + CodeBreakPtr cbp, prevcbp, nextcbp; + RnaRefPtr rrp; + tRNAPtr trp; + SeqMgrFeatContext fcontext; + ValNodePtr prods, vnp; + BioseqContextPtr bcp; + + if (tobsp == NULL || to_id == NULL) + { + return; + } + + entityID = ObjMgrGetEntityIDForPointer (tobsp); + if (entityID > 0 && SeqMgrFeaturesAreIndexed (entityID)) { + sfp = NULL; + while ((sfp = SeqMgrGetNextFeature (tobsp, sfp, 0, 0, &fcontext)) != NULL) + { + sfp->location = SeqLocInsert (sfp->location, to_id,pos, len, do_split, NULL); + switch (sfp->data.choice) + { + case SEQFEAT_CDREGION: /* cdregion */ + crp = (CdRegionPtr)(sfp->data.value.ptrvalue); + prevcbp = NULL; + for (cbp = crp->code_break; cbp != NULL; cbp = nextcbp) + { + nextcbp = cbp->next; + cbp->loc = SeqLocInsert (cbp->loc, to_id,pos, len, do_split, NULL); + if (cbp->loc == NULL) + { + if (prevcbp != NULL) + prevcbp->next = nextcbp; + else + crp->code_break = nextcbp; + cbp->next = NULL; + CodeBreakFree (cbp); + } + else + prevcbp = cbp; + } + break; + case SEQFEAT_RNA: + rrp = (RnaRefPtr)(sfp->data.value.ptrvalue); + if (rrp->ext.choice == 2) /* tRNA */ + { + trp = (tRNAPtr)(rrp->ext.value.ptrvalue); + if (trp->anticodon != NULL) + { + trp->anticodon = SeqLocInsert (trp->anticodon, to_id,pos, len, do_split, NULL); + } + } + break; + default: + break; + } + } + + /* adjust features pointing by product */ + prods = SeqMgrGetSfpProductList (tobsp); + for (vnp = prods; vnp != NULL; vnp = vnp->next) { + sfp = (SeqFeatPtr) vnp->data.ptrvalue; + if (sfp == NULL) continue; + sfp->product = SeqLocInsert (sfp->product, to_id,pos, len, do_split, NULL); + } + + } else { + bcp = BioseqContextNew(tobsp); + sfp = NULL; + /* adjust features pointing by location */ + while ((sfp = BioseqContextGetSeqFeat(bcp, 0, sfp, NULL, 0)) != NULL) + { + sfp->location = SeqLocInsert(sfp->location, to_id,pos, len, do_split, NULL); + switch (sfp->data.choice) + { + case SEQFEAT_CDREGION: /* cdregion */ + crp = (CdRegionPtr)(sfp->data.value.ptrvalue); + prevcbp = NULL; + for (cbp = crp->code_break; cbp != NULL; cbp = nextcbp) + { + nextcbp = cbp->next; + cbp->loc = SeqLocInsert(cbp->loc, to_id,pos, len, do_split, NULL); + if (cbp->loc == NULL) + { + if (prevcbp != NULL) + prevcbp->next = nextcbp; + else + crp->code_break = nextcbp; + cbp->next = NULL; + CodeBreakFree(cbp); + } + else + prevcbp = cbp; + } + break; + case SEQFEAT_RNA: + rrp = (RnaRefPtr)(sfp->data.value.ptrvalue); + if (rrp->ext.choice == 2) /* tRNA */ + { + trp = (tRNAPtr)(rrp->ext.value.ptrvalue); + if (trp->anticodon != NULL) + { + trp->anticodon = SeqLocInsert(trp->anticodon, to_id,pos, len, do_split, NULL); + } + } + break; + default: + break; + } + } + + sfp = NULL; + /* adjust features pointing by product */ + while ((sfp = BioseqContextGetSeqFeat(bcp, 0, sfp, NULL, 1)) != NULL) + sfp->product = SeqLocInsert(sfp->product, to_id,pos, len, do_split, NULL); + BioseqContextFree(bcp); + } +} + /***************************************************************************** * * BioseqInsert (from_id, from, to, strand, to_id, pos, from_feat, to_feat, @@ -2884,10 +3039,6 @@ NLM_EXTERN Boolean LIBCALL BioseqInsert (SeqIdPtr from_id, Int4 from, Int4 to, U RnaRefPtr rrp; tRNAPtr trp; SeqEntryPtr oldscope; - Uint2 entityID; - SeqMgrFeatContext fcontext; - ValNodePtr prods, vnp; - if ((from_id == NULL) || (to_id == NULL)) return FALSE; @@ -3100,110 +3251,7 @@ NLM_EXTERN Boolean LIBCALL BioseqInsert (SeqIdPtr from_id, Int4 from, Int4 to, U if (to_feat) /* fix up sourceid Bioseq feature table(s) */ { - entityID = ObjMgrGetEntityIDForPointer (tobsp); - if (entityID > 0 && SeqMgrFeaturesAreIndexed (entityID)) { - sfp = NULL; - while ((sfp = SeqMgrGetNextFeature (tobsp, sfp, 0, 0, &fcontext)) != NULL) - { - sfp->location = SeqLocInsert (sfp->location, to_id,pos, len, do_split, NULL); - switch (sfp->data.choice) - { - case SEQFEAT_CDREGION: /* cdregion */ - crp = (CdRegionPtr)(sfp->data.value.ptrvalue); - prevcbp = NULL; - for (cbp = crp->code_break; cbp != NULL; cbp = nextcbp) - { - nextcbp = cbp->next; - cbp->loc = SeqLocInsert (cbp->loc, to_id,pos, len, do_split, NULL); - if (cbp->loc == NULL) - { - if (prevcbp != NULL) - prevcbp->next = nextcbp; - else - crp->code_break = nextcbp; - cbp->next = NULL; - CodeBreakFree (cbp); - } - else - prevcbp = cbp; - } - break; - case SEQFEAT_RNA: - rrp = (RnaRefPtr)(sfp->data.value.ptrvalue); - if (rrp->ext.choice == 2) /* tRNA */ - { - trp = (tRNAPtr)(rrp->ext.value.ptrvalue); - if (trp->anticodon != NULL) - { - trp->anticodon = SeqLocInsert (trp->anticodon, to_id,pos, len, do_split, NULL); - } - } - break; - default: - break; - } - } - - /* adjust features pointing by product */ - prods = SeqMgrGetSfpProductList (tobsp); - for (vnp = prods; vnp != NULL; vnp = vnp->next) { - sfp = (SeqFeatPtr) vnp->data.ptrvalue; - if (sfp == NULL) continue; - sfp->product = SeqLocInsert (sfp->product, to_id,pos, len, do_split, NULL); - } - - } else { - bcp = BioseqContextNew(tobsp); - sfp = NULL; - /* adjust features pointing by location */ - while ((sfp = BioseqContextGetSeqFeat(bcp, 0, sfp, NULL, 0)) != NULL) - { - sfp->location = SeqLocInsert(sfp->location, to_id,pos, len, do_split, NULL); - switch (sfp->data.choice) - { - case SEQFEAT_CDREGION: /* cdregion */ - crp = (CdRegionPtr)(sfp->data.value.ptrvalue); - prevcbp = NULL; - for (cbp = crp->code_break; cbp != NULL; cbp = nextcbp) - { - nextcbp = cbp->next; - cbp->loc = SeqLocInsert(cbp->loc, to_id,pos, len, do_split, NULL); - if (cbp->loc == NULL) - { - if (prevcbp != NULL) - prevcbp->next = nextcbp; - else - crp->code_break = nextcbp; - cbp->next = NULL; - CodeBreakFree(cbp); - } - else - prevcbp = cbp; - } - break; - case SEQFEAT_RNA: - rrp = (RnaRefPtr)(sfp->data.value.ptrvalue); - if (rrp->ext.choice == 2) /* tRNA */ - { - trp = (tRNAPtr)(rrp->ext.value.ptrvalue); - if (trp->anticodon != NULL) - { - trp->anticodon = SeqLocInsert(trp->anticodon, to_id,pos, len, do_split, NULL); - } - } - break; - default: - break; - } - } - - sfp = NULL; - /* adjust features pointing by product */ - while ((sfp = BioseqContextGetSeqFeat(bcp, 0, sfp, NULL, 1)) != NULL) - sfp->product = SeqLocInsert(sfp->product, to_id,pos, len, do_split, NULL); - - BioseqContextFree(bcp); - } + AdjustFeaturesForInsertion (tobsp, to_id, pos, len, do_split); } if (from_feat) /* add source Bioseq features to sourceid */ @@ -3882,11 +3930,12 @@ static AffectedFeatPtr AffectedFeatNew (void) afp->feat_before = NULL; afp->feat_after = NULL; } + return afp; } static AffectedFeatPtr AffectedFeatFree (AffectedFeatPtr afp) { - if (afp == NULL) return; + if (afp == NULL) return NULL; afp->feat_before = SeqFeatFree (afp->feat_before); afp->feat_after = SeqFeatFree (afp->feat_after); afp = MemFree (afp); @@ -4024,7 +4073,7 @@ NLM_EXTERN void SeqEdInsertAdjustRNA * * *****************************************************************************/ -static SeqLocPtr LIBCALL SeqEdSeqLocInsert (SeqLocPtr head, BioseqPtr target, Int4 pos, Int4 len, +NLM_EXTERN SeqLocPtr LIBCALL SeqEdSeqLocInsert (SeqLocPtr head, BioseqPtr target, Int4 pos, Int4 len, Boolean split, SeqIdPtr newid) { SeqIntPtr sip, sip2; @@ -4873,20 +4922,390 @@ static void SeqEdInsertAdjustFeat (SeqFeatPtr sfp, SeqEdJournalPtr sejp, Int4 in } } +static Boolean IsDeltaSeqGap (DeltaSeqPtr dsp) +{ + SeqLitPtr slip; + if (dsp == NULL || dsp->choice != 2 || dsp->data.ptrvalue == NULL) + { + return FALSE; + } + slip = (SeqLitPtr) (dsp->data.ptrvalue); + if (slip->seq_data == NULL) + { + return TRUE; + } + else + { + return FALSE; + } +} + +static Boolean IsDeltaSeqUnknownGap (DeltaSeqPtr dsp) +{ + SeqLitPtr slip; + if (dsp == NULL || dsp->choice != 2 || dsp->data.ptrvalue == NULL) + { + return FALSE; + } + slip = (SeqLitPtr) (dsp->data.ptrvalue); + if (slip->seq_data == NULL && slip->fuzz != NULL && slip->fuzz->choice == 4) + { + return TRUE; + } + else + { + return FALSE; + } +} + +static DeltaSeqPtr GetDeltaSeqForOffset (BioseqPtr bsp, Int4 offset, Int4Ptr seqstart) +{ + Int4 curr_pos = 0; + Boolean found = FALSE; + SeqLocPtr slp; + SeqLitPtr slip = NULL; + DeltaSeqPtr dsp; + + if (bsp == NULL || bsp->repr != Seq_repr_delta + || bsp->seq_ext_type != 4 || bsp->seq_ext == NULL + || offset < 0) + { + return NULL; + } + + if (seqstart != NULL) + { + *seqstart = 0; + } + dsp = (DeltaSeqPtr) bsp->seq_ext; + while (dsp != NULL && !found) + { + if (dsp->data.ptrvalue == NULL) continue; + if (dsp->choice == 1) + { /* SeqLoc */ + slp = (SeqLocPtr)(dsp->data.ptrvalue); + curr_pos += SeqLocLen (slp); + } + else if (dsp->choice == 2) + { + slip = (SeqLitPtr) (dsp->data.ptrvalue); + curr_pos += slip->length; + } + if (curr_pos > offset + || (curr_pos == offset + && (dsp->next == NULL || ! IsDeltaSeqGap (dsp)))) + { + found = TRUE; + } + else + { + if (seqstart != NULL) + { + *seqstart = curr_pos; + } + dsp=dsp->next; + } + } + + return dsp; +} + +static Boolean +SeqEdInsertByteStore +(ByteStorePtr seq_data, + Int4 insert_point, + CharPtr char_data, + Int4 num_chars, + Uint1 moltype) +{ + Char ch; + Int4 i; + + if (seq_data == NULL || insert_point < 0 || char_data == NULL || num_chars < 1) + { + return FALSE; + } + BSSeek(seq_data, insert_point, SEEK_SET); + Nlm_BSAdd(seq_data, num_chars, FALSE); + BSSeek(seq_data, insert_point, SEEK_SET); + for (i = 0; i < num_chars; i++) + { + ch = TO_UPPER (char_data [i]); + if ( ISA_na (moltype) ) { + if (ch == 'U') ch = 'T'; + if (ch == 'X') ch = 'N'; + if ( StringChr ("EFIJLOPQXZ-.*", ch) == NULL ) { + BSPutByte ( seq_data, (Int2) ch ); + } + } + else + { + if ( StringChr("JO-.", ch) == NULL ) { + BSPutByte ( seq_data, (Int2) ch ); + } + } + } + return TRUE; +} + +static Boolean SeqEdInsertRaw (SeqEdJournalPtr sejp, Int4 insert_point) +{ + Boolean rval; + + if (sejp == NULL || sejp->bsp == NULL || sejp->bsp->repr != Seq_repr_raw + || sejp->char_data == NULL || sejp->num_chars == 0 || insert_point < 0) + { + return FALSE; + } + + rval = SeqEdInsertByteStore (sejp->bsp->seq_data, insert_point, + sejp->char_data, sejp->num_chars, sejp->moltype); + + if (rval) + { + sejp->bsp->length += sejp->num_chars; + } + return rval; +} + +static Boolean +SeqEdInsertIntoDeltaGap +(DeltaSeqPtr dsp, + SeqEdJournalPtr sejp, + Int4 insert_point) +{ + SeqLitPtr slip, slip_data, slip_second_gap; + Boolean rval = FALSE; + DeltaSeqPtr dsp_data, dsp_second_gap; + IntFuzzPtr ifp = NULL; + + if (dsp == NULL || dsp->choice != 2 || dsp->data.ptrvalue == NULL) + { + return rval; + } + slip = (SeqLitPtr) dsp->data.ptrvalue; + if (slip->seq_data != NULL) + { + return rval; + } + + if (slip->fuzz != NULL && slip->fuzz->choice == 4) + { + ifp = IntFuzzNew (); + ifp->choice = 4; + } + + /* split the gap in two and create a new DeltaSeqPtr in the middle */ + slip_data = SeqLitNew (); + slip_data->seq_data_type = Seq_code_iupacna; + slip_data->seq_data = BSNew (sejp->num_chars); + rval = SeqEdInsertByteStore (slip_data->seq_data, 0, + sejp->char_data, sejp->num_chars, sejp->moltype); + if (rval) + { + slip_data->length = sejp->num_chars; + /* create second gap */ + slip_second_gap = SeqLitNew (); + slip_second_gap->length = slip->length - insert_point; + slip_second_gap->fuzz = ifp; + /* truncate first gap */ + slip->length = insert_point; + dsp_data = ValNodeNew (NULL); + dsp_data->choice = 2; + dsp_data->data.ptrvalue = slip_data; + dsp_second_gap = ValNodeNew (NULL); + dsp_second_gap->choice = 2; + dsp_second_gap->data.ptrvalue = slip_second_gap; + dsp_second_gap->next = dsp->next; + dsp_data->next = dsp_second_gap; + dsp->next = dsp_data; + } + return rval; +} + +static Boolean SeqEdInsertDelta (SeqEdJournalPtr sejp, Int4 insert_point) +{ + DeltaSeqPtr dsp; + SeqLitPtr slip; + Int4 seqstart = 0; + ByteStorePtr bs_new; + Boolean rval; + + if (sejp == NULL || sejp->bsp == NULL || sejp->bsp->repr != Seq_repr_delta + || sejp->bsp->seq_ext_type != 4 + || sejp->char_data == NULL || sejp->num_chars == 0 + || insert_point < 0) + { + return FALSE; + } + + dsp = GetDeltaSeqForOffset (sejp->bsp, insert_point, &seqstart); + + if (dsp == NULL || dsp->choice != 2 || dsp->data.ptrvalue == NULL) + { + return FALSE; + } + + slip = (SeqLitPtr) dsp->data.ptrvalue; + insert_point -= seqstart; + + if (IsDeltaSeqGap (dsp)) + { + rval = SeqEdInsertIntoDeltaGap (dsp, sejp, insert_point); + } + else + { + if (slip->seq_data_type != Seq_code_iupacna) + { + bs_new = BSConvertSeq(slip->seq_data, Seq_code_iupacna, + slip->seq_data_type, + slip->length); + slip->seq_data_type = Seq_code_iupacna; + slip->seq_data = bs_new; + } + + rval = SeqEdInsertByteStore (slip->seq_data, insert_point, + sejp->char_data, sejp->num_chars, + sejp->moltype); + } + + if (rval) + { + slip->length += sejp->num_chars; + sejp->bsp->length += sejp->num_chars; + } + return rval; +} + +static Boolean +SeqEdInsertGap (SeqEdJournalPtr sejp, Int4 insert_point) +{ + DeltaSeqPtr dsp, dsp_gap, dsp_after; + Int4 seqstart = 0; + SeqLitPtr slip, slip_before, slip_gap, slip_after; + ByteStorePtr bs_new; + + if (sejp == NULL || sejp->bsp == NULL || sejp->bsp->repr != Seq_repr_delta + || sejp->bsp->seq_ext_type != 4 + || sejp->char_data == NULL || sejp->num_chars == 0 + || insert_point < 0) + { + return FALSE; + } + + dsp = GetDeltaSeqForOffset (sejp->bsp, insert_point, &seqstart); + + if (dsp == NULL || dsp->choice != 2 || dsp->data.ptrvalue == NULL) + { + return FALSE; + } + + slip_gap = SeqLitNew (); + slip_gap->seq_data_type = 0; + slip_gap->seq_data = NULL; + slip_gap->length = sejp->num_chars; + if (sejp->unknown_gap) + { + slip_gap->fuzz = IntFuzzNew (); + slip_gap->fuzz->choice = 4; + } + + slip = (SeqLitPtr) (dsp->data.ptrvalue); + + /* make insert_point relative to start of this SeqLit */ + insert_point -= seqstart; + + if (insert_point == 0) + { + /* insert gap before */ + dsp_after = ValNodeNew (NULL); + dsp_after->choice = 2; + dsp_after->data.ptrvalue = slip; + dsp_after->next = dsp->next; + dsp->next = dsp_after; + dsp->data.ptrvalue = slip_gap; + } + else if (insert_point == slip->length) + { + /* insert gap after */ + dsp_after = ValNodeNew (NULL); + dsp_after->choice = 2; + dsp_after->data.ptrvalue = slip_gap; + dsp_after->next = dsp->next; + dsp->next = dsp_after; + } + else if (IsDeltaSeqUnknownGap (dsp)) + { + /* can't insert gap inside gap of unknown length */ + slip_gap = SeqLitFree (slip_gap); + return FALSE; + } + else if (IsDeltaSeqGap (dsp) && !sejp->unknown_gap) + { + slip_gap = SeqLitFree (slip_gap); + slip->length += sejp->num_chars; + } + else + { + slip_before = SeqLitNew (); + slip_before->seq_data_type = Seq_code_iupacna; + slip_before->length = insert_point; + + slip_after = SeqLitNew (); + slip_after->seq_data_type = Seq_code_iupacna; + slip_after->length = slip->length - insert_point; + + if (slip->seq_data != NULL) + { + if (slip->seq_data_type != Seq_code_iupacna) + { + bs_new = BSConvertSeq(slip->seq_data, Seq_code_iupacna, + slip->seq_data_type, + slip->length); + slip->seq_data_type = Seq_code_iupacna; + slip->seq_data = bs_new; + } + slip_before->seq_data = BSNew (slip_before->length); + slip_after->seq_data = BSNew (slip_after->length); + + BSSeek(slip->seq_data, 0, SEEK_SET); + BSInsertFromBS (slip_before->seq_data, slip->seq_data, slip_before->length); + BSInsertFromBS (slip_after->seq_data, slip->seq_data, slip_after->length); + } + + dsp_after = ValNodeNew (NULL); + dsp_after->choice = 2; + dsp_after->data.ptrvalue = slip_after; + dsp_after->next = dsp->next; + + dsp_gap = ValNodeNew (NULL); + dsp_gap->choice = 2; + dsp_gap->data.ptrvalue = slip_gap; + dsp_gap->next = dsp_after; + + dsp->data.ptrvalue = slip_before; + dsp->next = dsp_gap; + slip = SeqLitFree (slip); + } + + sejp->bsp->length += sejp->num_chars; + + return TRUE; +} + NLM_EXTERN Boolean SeqEdInsert (SeqEdJournalPtr sejp) { Int4 len; - Int4 i; - Char ch; SeqFeatPtr sfp; SeqMgrFeatContext fcontext; ValNodePtr prods, vnp; BioseqContextPtr bcp; Int4 insert_point; Boolean recreated_feats = FALSE; + Boolean rval = FALSE; - if (sejp == NULL || sejp->bsp == NULL || sejp->bsp->repr != Seq_repr_raw + if (sejp == NULL || sejp->bsp == NULL || sejp->char_data == NULL || sejp->num_chars == 0) { return FALSE; @@ -4906,29 +5325,24 @@ SeqEdInsert (SeqEdJournalPtr sejp) if ((insert_point < 0) || (insert_point > len)) return FALSE; - BSSeek(sejp->bsp->seq_data, insert_point, SEEK_SET); - Nlm_BSAdd(sejp->bsp->seq_data, sejp->num_chars, FALSE); - BSSeek(sejp->bsp->seq_data, insert_point, SEEK_SET); - for (i = 0; i < sejp->num_chars; i++) + if (sejp->action == eSeqEdInsertGap || sejp->action == eSeqEdDeleteGap) { - ch = TO_UPPER (sejp->char_data [i]); - if ( ISA_na (sejp->moltype) ) { - if (ch == 'U') ch = 'T'; - if (ch == 'X') ch = 'N'; - if ( StringChr ("EFIJLOPQXZ-.*", ch) == NULL ) { - BSPutByte ( sejp->bsp->seq_data, (Int2) ch ); - } - } - else - { - if ( StringChr("JO-.", ch) == NULL ) { - BSPutByte ( sejp->bsp->seq_data, (Int2) ch ); - } - } - } - - sejp->bsp->length += sejp->num_chars; - + rval = SeqEdInsertGap (sejp, insert_point); + } + else if (sejp->bsp->repr == Seq_repr_raw) + { + rval = SeqEdInsertRaw (sejp, insert_point); + } + else if (sejp->bsp->repr == Seq_repr_delta) + { + rval = SeqEdInsertDelta (sejp, insert_point); + } + + if (!rval) + { + return FALSE; + } + /* fix features */ if (sejp->entityID > 0 && SeqMgrFeaturesAreIndexed (sejp->entityID)) { @@ -4979,6 +5393,7 @@ SeqEdInsert (SeqEdJournalPtr sejp) return TRUE; } + /* This section contains code for deleting from sequences and feature locations, adapted from * that found in edutil.c */ @@ -5068,6 +5483,169 @@ static Int2 LIBCALL SeqEdSeqFeatDelete (SeqFeatPtr sfp, BioseqPtr target, Int4 f return 0; } +static Boolean SeqEdDeleteFromDeltaSeq (DeltaSeqPtr dsp, Int4 from, Int4 to) +{ + ByteStorePtr bs_new; + SeqLitPtr slip; + + if (dsp == NULL || dsp->choice != 2 || dsp->data.ptrvalue == NULL) + { + return FALSE; + } + + slip = (SeqLitPtr) dsp->data.ptrvalue; + + if (from < 0 || to > slip->length) + { + return FALSE; + } + if (to < 0) + { + to = slip->length - 1; + } + + if (! IsDeltaSeqGap (dsp)) + { + if (slip->seq_data_type != Seq_code_iupacna) + { + bs_new = BSConvertSeq(slip->seq_data, Seq_code_iupacna, + slip->seq_data_type, + slip->length); + slip->seq_data_type = Seq_code_iupacna; + slip->seq_data = bs_new; + } + BSSeek(slip->seq_data, from, SEEK_SET); + Nlm_BSDelete (slip->seq_data, to - from + 1); + } + slip->length -= (to - from + 1); + + return TRUE; +} + +static void DeleteFromSeqLit (SeqLitPtr slip, Int4 from, Int4 to) +{ + ByteStorePtr bs_new; + + if (slip == NULL) + { + return; + } + if (from < 0) + { + from = 0; + } + + if (to > slip->length - 1 || to < 0) + { + to = slip->length - 1; + } + + if (slip->seq_data != NULL) + { + if (slip->seq_data_type != Seq_code_iupacna) + { + bs_new = BSConvertSeq(slip->seq_data, Seq_code_iupacna, + slip->seq_data_type, + slip->length); + slip->seq_data_type = Seq_code_iupacna; + slip->seq_data = bs_new; + } + BSSeek(slip->seq_data, from, SEEK_SET); + Nlm_BSDelete (slip->seq_data, to - from + 1); + } + slip->length -= (to - from + 1); +} + +static Boolean SeqEdDeleteFromDeltaBsp (BioseqPtr bsp, Int4 from, Int4 to) +{ + Boolean retval = FALSE; + DeltaSeqPtr dsp, dsp_next, prev_dsp; + SeqLitPtr slip; + Int4 curr_pos = 0; + Int4 del_to, del_from; + Int4 piece_len; + SeqLocPtr slp; + + if (bsp == NULL || bsp->repr != Seq_repr_delta + || bsp->seq_ext_type != 4 || bsp->seq_ext == NULL) + { + return retval; + } + + prev_dsp = NULL; + dsp = (DeltaSeqPtr) bsp->seq_ext; + while (dsp != NULL && curr_pos < to) + { + dsp_next = dsp->next; + piece_len = 0; + /* remove empty dsps */ + if (dsp->data.ptrvalue == NULL) + { + /* skip */ + prev_dsp = dsp; + } + else if (dsp->choice == 1) + { /* SeqLoc */ + slp = (SeqLocPtr)(dsp->data.ptrvalue); + piece_len = SeqLocLen (slp); + prev_dsp = dsp; + } + else if (dsp->choice == 2) + { + slip = (SeqLitPtr) (dsp->data.ptrvalue); + piece_len = slip->length; + if (curr_pos + piece_len > from) + { + if (from > curr_pos) + { + del_from = from - curr_pos; + } + else + { + del_from = 0; + } + + if (to - curr_pos < slip->length - 1) + { + del_to = to - curr_pos; + } + else + { + del_to = slip->length - 1; + } + DeleteFromSeqLit (slip, del_from, del_to); + + /* remove empty delta seq parts */ + if (slip->length == 0) + { + if (prev_dsp == NULL) + { + bsp->seq_ext = dsp->next; + } + else + { + prev_dsp->next = dsp->next; + } + dsp->next = NULL; + slip = SeqLitFree (slip); + dsp = ValNodeFree (dsp); + } + else + { + prev_dsp = dsp; + } + } + else + { + prev_dsp = dsp; + } + } + curr_pos += piece_len; + dsp = dsp_next; + } + return TRUE; +} + static Boolean SeqEdDeleteFromSegOrDeltaBsp (BioseqPtr bsp, Int4 from, Int4 to) { SeqLocPtr tmp, head; @@ -5354,10 +5932,13 @@ NLM_EXTERN Boolean SeqEdDeleteFromBsp (SeqEdJournalPtr sejp, BoolPtr pfeats_dele retval = TRUE; break; case Seq_repr_seg: - case Seq_repr_delta: /* update segmented sequence */ retval = SeqEdDeleteFromSegOrDeltaBsp (sejp->bsp, sejp->offset, sejp->offset + sejp->num_chars - 1); break; + case Seq_repr_delta: + /* update delta sequence */ + retval = SeqEdDeleteFromDeltaBsp (sejp->bsp, sejp->offset, sejp->offset + sejp->num_chars - 1); + break; case Seq_repr_map: /* map bioseq */ retval = SeqEdDeleteFromMapBioseq (sejp->bsp, sejp->offset, sejp->offset + sejp->num_chars - 1); @@ -5390,6 +5971,34 @@ NLM_EXTERN Boolean SeqEdDeleteFromBsp (SeqEdJournalPtr sejp, BoolPtr pfeats_dele return retval; } +/* this function will indicate whether the interval on the Bioseq specified contains + * any gaps of unknown length. + */ +static Boolean DoesIntervalContainUnknownGap (BioseqPtr bsp, Int4 from, Int4 to) +{ + DeltaSeqPtr from_dsp, to_dsp, this_dsp; + Int4 from_start = 0, to_start = 0; + Boolean unknown_gap = FALSE; + + if (bsp == NULL || from < 0 || from >= bsp->length || to < 0 || to >= bsp->length) + { + return FALSE; + } + + from_dsp = GetDeltaSeqForOffset (bsp, from, &from_start); + to_dsp = GetDeltaSeqForOffset (bsp, to, &to_start); + + this_dsp = from_dsp; + while (!unknown_gap && this_dsp != NULL && (to_dsp == NULL || this_dsp != to_dsp->next)) + { + unknown_gap = IsDeltaSeqUnknownGap (this_dsp); + this_dsp = this_dsp->next; + } + + return unknown_gap; +} + + /* This section of code deals with editing the sequence by inserting and removing characters. * Functions are needed to change the indices for the affected features so that they will * display properly. |