summaryrefslogtreecommitdiff
path: root/api/salpedit.c
diff options
context:
space:
mode:
authorAaron M. Ucko <ucko@debian.org>2005-03-23 17:39:07 +0000
committerAaron M. Ucko <ucko@debian.org>2005-03-23 17:39:07 +0000
commita7c5bff619aa4850d09825888b5f46c7cd086f11 (patch)
tree2ab2dd2ff2977a5e0bcfdd90a17ca4f1a73181fc /api/salpedit.c
parentbfbecafe8b6142e8356e8447f0822b43e94b64b7 (diff)
Load ncbi (6.1.20010709) into ncbi-tools6/branches/upstream/current.
Diffstat (limited to 'api/salpedit.c')
-rw-r--r--api/salpedit.c4304
1 files changed, 4304 insertions, 0 deletions
diff --git a/api/salpedit.c b/api/salpedit.c
new file mode 100644
index 00000000..ae017372
--- /dev/null
+++ b/api/salpedit.c
@@ -0,0 +1,4304 @@
+#include <ncbi.h>
+#include <ncbimisc.h> /* ValNodeLen */
+#include <sequtil.h> /* for GetScoreAndEvalue, SeqIdDupList */
+#include <salpedit.h>
+#include <salpacc.h>
+#include <seqport.h>
+
+#define MIN_DIAG_LEN 20 /*minimal amount of overlap required for build_overlap_list<-MergeTwoAlignList */
+
+static Boolean DenseSegInsert(SeqIdPtr from_id, Int4 from, Int4 to, Uint1 strand, SeqIdPtr to_id, Int4 pos, DenseSegPtr dsp, Boolean merge, BoolPtr seq_insert);
+static SeqAlignPtr AttachSeqInAlign(SeqIdPtr from_id, Int4 from, Int4 to, Uint1 strand, SeqIdPtr to_id, Int4 pos, SeqAlignPtr head);
+
+static void OrderInt4(Int4Ptr x, Int4Ptr y) /* replace jzmisc: swap */
+{
+ Int4 temp;
+
+ if((*x) > (*y)){
+ temp = *x;
+ *x = *y;
+ *y = temp;
+ }
+}
+
+
+static SeqAlignPtr LIBCALL is_salp_in_sap (SeqAnnotPtr sap, Uint1 choice)
+{
+ SeqAlignPtr salp = NULL;
+
+ if (sap != NULL) {
+ for (; sap!= NULL; sap=sap->next) {
+ if (sap->type == choice) {
+ salp = (SeqAlignPtr) sap->data;
+ return salp;
+ }
+ }
+ }
+ return NULL;
+}
+
+/*******************************************
+***
+*** SeqAlignBioseqDeleteById
+*** if DenseDiag: delete the SeqAlign
+*** if DenseSeg: delete the SeqId, the starts positions,
+*** and the corresponding strands
+***
+********************************************/
+
+NLM_EXTERN SeqAlignPtr LIBCALL SeqAlignBioseqDeleteById (SeqAlignPtr salphead, SeqIdPtr sip)
+{
+ SeqAlignPtr salp,
+ presalp = NULL,
+ nextsalp;
+ SeqIdPtr presip, nextsip, tmpsip;
+ DenseSegPtr dsp;
+ Int4 j, k;
+ Int2 index = 0;
+
+ salp = salphead;
+ while (salp!= NULL)
+ {
+ nextsalp = salp->next;
+ tmpsip =SeqIdPtrFromSeqAlign (salp);
+ if ((SeqIdOrderInBioseqIdList(sip, tmpsip)) > 0)
+ {
+ if (salp->segtype == 1 || salp->dim == 2)
+ {
+ if (salphead==NULL)
+ salphead = nextsalp;
+ if (presalp!=NULL)
+ presalp->next = nextsalp;
+ salp->next = NULL;
+ salp = SeqAlignFree (salp);
+ if (presalp!=NULL)
+ salp = presalp;
+ }
+ else if (salp->segtype == 2)
+ {
+ dsp = (DenseSegPtr) salp->segs;
+ presip = NULL;
+ while (tmpsip!=NULL)
+ {
+ nextsip = tmpsip->next;
+ if (SeqIdForSameBioseq (tmpsip, sip))
+ {
+ if (presip==NULL) {
+ dsp->ids = nextsip;
+ } else {
+ presip->next = nextsip;
+ }
+ tmpsip->next = NULL;
+ SeqIdFree (tmpsip);
+ k=0;
+ for (j=0; j<dsp->dim*dsp->numseg; j++) {
+ if (((j-index) % (dsp->dim))!=0) {
+ dsp->starts[k] = dsp->starts[j];
+ k++;
+ }
+ }
+ k=0;
+ if (dsp->strands) {
+ for (j=0; j<dsp->dim*dsp->numseg; j++) {
+ if (((j-index) % (dsp->dim))!=0) {
+ dsp->strands[k] = dsp->strands[j];
+ k++;
+ }
+ }
+ }
+ dsp->dim--;
+ salp->dim--;
+ }
+ else
+ presip = tmpsip;
+ tmpsip = nextsip;
+ index++;
+ }
+ }
+ }
+ presalp = salp;
+ salp = nextsalp;
+ }
+ return salphead;
+}
+
+static void SeqAlignBioseqDeleteByIdCallback (SeqEntryPtr sep, Pointer mydata,
+ Int4 index, Int2 indent)
+{
+ BioseqPtr bsp;
+ BioseqSetPtr bssp;
+ SeqAlignPtr salp,
+ salptmp;
+ SeqIdPtr sip;
+
+ sip = (SeqIdPtr)(mydata);
+ if (sip!=NULL && sep != NULL && sep->data.ptrvalue) {
+ if (IS_Bioseq(sep)) {
+ bsp = (BioseqPtr) sep->data.ptrvalue;
+ if (bsp!=NULL) {
+ salp = is_salp_in_sap (bsp->annot, 2);
+ if (salp!=NULL) {
+ for (salptmp=salp; salptmp!=NULL; salptmp=salptmp->next)
+ salptmp = SeqAlignBioseqDeleteById (salp, sip);
+ }
+ }
+ }
+ else if(IS_Bioseq_set(sep)) {
+ bssp = (BioseqSetPtr)sep->data.ptrvalue;
+ if (bssp!=NULL) {
+ salp = is_salp_in_sap (bssp->annot, 2);
+ if (salp!=NULL) {
+ for (salptmp=salp; salptmp!=NULL; salptmp=salptmp->next)
+ salptmp = SeqAlignBioseqDeleteById (salp, sip);
+ }
+ }
+ }
+ }
+}
+
+NLM_EXTERN Pointer LIBCALL SeqAlignBioseqDeleteByIdFromSeqEntry (SeqEntryPtr sep, SeqIdPtr sip)
+{
+ SeqEntryExplore (sep, (Pointer)sip, SeqAlignBioseqDeleteByIdCallback);
+ return sep;
+}
+
+/**************************************************************************
+***
+* Functions for editing a Seq-align object
+*
+***************************************************************************
+***/
+static SeqAlignPtr SeqAlignInsert(SeqIdPtr from_id, Int4 from, Int4 to, Uint1 strand, SeqIdPtr to_id, Int4 pos, SeqAlignPtr head, Boolean merge, Boolean add_insertion)
+{
+ DenseSegPtr dsp;
+ DenseDiagPtr ddp;
+ StdSegPtr ssp;
+ Boolean seq_insert;
+ BioseqPtr bsp;
+ SeqAlignPtr align;
+
+ align = head;
+ seq_insert = FALSE;
+ while(align)
+ {
+ switch(align->segtype)
+ {
+ case 0:
+ break;
+ case 1:
+ ddp = (DenseDiagPtr)(align->segs);
+ break;
+ case 2:
+ dsp = (DenseSegPtr)(align->segs);
+ DenseSegInsert(from_id, from, to, strand, to_id, pos, dsp, merge, &seq_insert);
+ break;
+ case 3:
+ ssp = (StdSegPtr)(align->segs);
+ break;
+ default:
+ break;
+ }
+ align = align->next;
+ }
+
+ /*sequence is not inserted*/
+ if(!seq_insert && add_insertion)
+ {
+ bsp = BioseqFind(from_id);
+ if(bsp->repr != Seq_repr_seg)
+ head= AttachSeqInAlign(from_id, from, to, strand, to_id, pos, head);
+ }
+ return head;
+
+}
+
+static Int2 get_t_order(SeqIdPtr ids, SeqIdPtr t_id)
+{
+ Int2 t_order = 0;
+
+ if(t_id == NULL)
+ return -1;
+ while(ids)
+ {
+ if(SeqIdForSameBioseq(t_id, ids))
+ return t_order;
+ ++t_order;
+ ids = ids->next;
+ }
+ return -1;
+
+}
+
+
+
+static Boolean get_seg(ValNodePtr v_starts, Int2 dim, Int4Ptr starts)
+{
+ Int2 i;
+
+ for(i=0; i<dim && v_starts != NULL; ++i, v_starts = v_starts->next)
+ starts[i] = v_starts->data.intvalue;
+ return (i == dim);
+}
+
+
+static ValNodePtr Free_node(ValNodePtr v_starts, ValNodePtr pv_starts, Int2 dim)
+{
+ Int2 i;
+ ValNodePtr next = NULL;
+
+ for(i =0; i<dim; ++i)
+ {
+ next= v_starts->next;
+ v_starts->next = NULL;
+ ValNodeFree(v_starts);
+ v_starts = next;
+ }
+
+ while(dim > 1)
+ {
+ pv_starts = pv_starts->next;
+ --dim;
+ }
+ pv_starts->next = next;
+ return next;
+
+}
+
+static Boolean is_gap_segment(Int4Ptr c_starts, Int2 dim)
+{
+ Int2 i;
+ Int2 numgap = 0;
+
+ for(i =0; i<dim; ++i)
+ {
+ if(c_starts[i] == -1)
+ ++numgap;
+ }
+ return (numgap >= (dim -1));
+}
+
+static void mod_prev_start(ValNodePtr p_starts, Int4Ptr c_starts, Int4Ptr c_strands, Int2 dim)
+{
+ Int2 i;
+
+ for(i =0; i<dim; ++i)
+ {
+ if(c_strands[i] == (Int4)Seq_strand_minus)
+ p_starts->data.intvalue = c_starts[i];
+ p_starts = p_starts->next;
+ }
+}
+
+/****************************************************************************
+***
+* return TRUE if needs another round of modification
+*
+*****************************************************************************
+***/
+static Boolean mod_alignment(ValNodePtr v_starts, ValNodePtr v_strands, ValNodePtr v_lens, Int2 dim, Int2 m_order, Int2Ptr n_segs)
+{
+ ValNodePtr pv_starts, pv_lens, pv_strands;
+ Boolean has_prev;
+ Int4 c_len, p_len=0;
+ Int4 p_start, c_start;
+ Int2 i;
+ Boolean retval;
+ Int4Ptr cur_starts, prev_starts;
+ Int4Ptr cur_strands, prev_strands;
+ Boolean merge;
+
+ retval = FALSE;
+ cur_starts = (Int4Ptr) MemNew((size_t)dim * sizeof(Int4));
+ prev_starts = (Int4Ptr) MemNew((size_t)dim * sizeof(Int4));
+ cur_strands = (Int4Ptr) MemNew((size_t)dim * sizeof(Int4));
+ prev_strands = (Int4Ptr) MemNew((size_t)dim * sizeof(Int4));
+
+ has_prev = FALSE;
+ pv_starts = NULL;
+ pv_strands = NULL;
+ pv_lens = NULL;
+ while(v_starts && v_strands && v_lens)
+ {
+ get_seg(v_starts, dim, cur_starts);
+ get_seg(v_strands, dim, cur_strands);
+ c_len = v_lens->data.intvalue;
+ merge = FALSE;
+
+ if(has_prev) /*check if the two can be merged*/
+ {
+ merge = TRUE;
+ for(i =0; i<dim; ++i)
+ {
+ p_start = prev_starts[i];
+ c_start = cur_starts[i];
+ if(cur_strands[i] != prev_strands[i])
+ {
+ merge = FALSE;
+ break;
+ }
+ if(p_start == -1 && c_start != -1)
+ {
+ merge = FALSE;
+ break;
+ }
+ if(p_start != -1 && c_start == -1)
+ {
+ merge = FALSE;
+ break;
+ }
+ if(p_start != -1 && c_start != -1)
+ {
+ if(cur_strands[i] == (Int4)Seq_strand_minus)
+ {
+ if(c_start + c_len != p_start)
+ {
+ merge = FALSE;
+ break;
+ }
+ }
+ else
+ {
+ if(p_start + p_len != c_start)
+ {
+ merge = FALSE;
+ break;
+ }
+ }
+ }
+ }
+ }
+ if(merge)
+ {
+ p_len += c_len;
+ pv_lens->data.intvalue = p_len;
+ mod_prev_start(pv_starts, cur_starts, cur_strands, dim);
+ v_starts = Free_node(v_starts, pv_starts, dim);
+ v_strands = Free_node(v_strands, pv_strands, dim);
+ v_lens = Free_node(v_lens, pv_lens, 1);
+ -- (*n_segs);
+ retval = TRUE;
+ }
+ else
+ {
+ pv_starts = v_starts;
+ pv_strands = v_strands;
+ pv_lens = v_lens;
+ MemCopy(prev_starts, cur_starts, (size_t)dim * sizeof(Int4));
+ MemCopy(prev_strands, cur_strands, (size_t)dim * sizeof(Int4));
+ p_len = c_len;
+ for(i =0; i<dim; ++i)
+ {
+ v_starts = v_starts->next;
+ v_strands = v_strands->next;
+ }
+ v_lens = v_lens->next;
+ has_prev = TRUE;
+
+ }
+
+ }
+
+ MemFree(prev_starts);
+ MemFree(cur_starts);
+ MemFree(prev_strands);
+ MemFree(cur_strands);
+ return retval;
+
+}
+
+static void load_new_data(Int4Ptr starts, Uint1Ptr strands, Int4Ptr lens, ValNodePtr v_starts, ValNodePtr v_strands, ValNodePtr v_lens, Int2 dim, Int2 cur_seg)
+{
+ Int2 i, j;
+
+ for (i =0; i<cur_seg; ++i)
+ {
+ for(j =0; j<dim; ++j)
+ {
+ starts[i*dim+j] = v_starts->data.intvalue;
+ strands[i*dim+j] = (Uint1)(v_strands->data.intvalue);
+ v_starts = v_starts->next;
+ v_strands = v_strands->next;
+ }
+ lens[i] = v_lens->data.intvalue;
+ v_lens = v_lens->next;
+ }
+
+}
+
+
+static SeqIdPtr get_nth_id(SeqIdPtr ids, Int2 order)
+{
+ Int2 i;
+
+ i =0;
+ while(ids){
+ if(order == i)
+ return ids;
+ ++i;
+ ids = ids->next;
+ }
+
+ return NULL;
+}
+
+static DenseSegPtr make_dsp(ValNodePtr v_starts, ValNodePtr v_lens, ValNodePtr v_strands, Int2 m_order, Int2 dim, DenseSegPtr prev, Int2 cur_seg, Boolean merge, SeqIdPtr m_sip, SeqIdPtr s_sip)
+{
+ Int4Ptr starts, lens;
+ Uint1Ptr strands;
+ DenseSegPtr dsp;
+
+ /*while(merge)*/
+ if(merge)
+ merge = mod_alignment(v_starts, v_strands, v_lens, dim, m_order, &cur_seg);
+ starts = (Int4Ptr) MemNew((size_t)(dim* cur_seg) * sizeof(Int4));
+ lens= (Int4Ptr) MemNew((size_t)(cur_seg) * sizeof(Int4));
+ strands= (Uint1Ptr) MemNew((size_t)(dim * cur_seg) * sizeof(Uint1));
+
+ load_new_data(starts, strands, lens, v_starts, v_strands, v_lens, dim, cur_seg);
+ ValNodeFree(v_starts);
+ ValNodeFree(v_strands);
+ ValNodeFree(v_lens);
+ if(prev)
+ {
+ MemFree(prev->starts);
+ MemFree(prev->lens);
+ MemFree(prev->strands);
+ prev->numseg = cur_seg;
+ prev->starts = starts;
+ prev->strands = strands;
+ prev->lens = lens;
+ return prev;
+ }
+ else
+ {
+ if(m_sip == NULL || s_sip == NULL)
+ {
+ Message(MSG_ERROR, "Fail to get the new DenseSeg");
+ MemFree(starts);
+ MemFree(lens);
+ MemFree(strands);
+ return NULL;
+ }
+ dsp = DenseSegNew();
+ dsp->dim = dim;
+ dsp->ids = SeqIdDup(m_sip);
+ dsp->ids->next = SeqIdDup(s_sip);
+ dsp->numseg = cur_seg;
+ dsp->starts = starts;
+ dsp->lens = lens;
+ dsp->strands = strands;
+ return dsp;
+ }
+
+}
+
+
+static Boolean DenseSegInsert(SeqIdPtr from_id, Int4 from, Int4 to, Uint1 strand, SeqIdPtr to_id, Int4 pos, DenseSegPtr dsp, Boolean merge, BoolPtr seq_insert)
+{
+ Int2 cur_seg, i, j;
+ Int2 t_order; /*the order of target sequence*/
+ Int4 t_start, t_stop, t_max;
+ Int4 cur_start;
+ Int2 dim;
+ Int4Ptr starts, lens;
+ Uint1 cur_strand;
+ Uint1Ptr strands;
+ ValNodePtr v_starts, v_lens, v_strands;
+ SeqIdPtr c_id;
+ Int4 ins_len;
+ Boolean add_insert_seg;
+ Boolean has_source_id;
+ Int4 gap_size, s_gap_size;
+ Int4 s_last, t_last, s_first, t_first;
+ Int2 s_order = -1;
+
+
+
+ if(pos <0)
+ return FALSE;
+ t_max = 0;
+ t_order = get_t_order(dsp->ids, to_id);
+ if(t_order == -1)
+ return FALSE;
+ dim = dsp->dim;
+ starts = dsp->starts;
+ lens = dsp->lens;
+ strands = dsp->strands;
+
+ cur_seg = 0;
+ v_starts = NULL;
+ v_lens = NULL;
+ v_strands = NULL;
+ ins_len = to - from +1;
+ has_source_id = FALSE;
+ for(c_id = dsp->ids, i =0; c_id != NULL; c_id = c_id->next, ++i)
+ {
+ if(SeqIdMatch(c_id, from_id))
+ {
+ s_order = i;
+ has_source_id = TRUE;
+ break;
+ }
+ }
+
+
+ /*check if it can be inserted before the start of the alignment*/
+ gap_size = -1;
+ s_gap_size = -1;
+ if(*seq_insert == FALSE && has_source_id)
+ {
+ t_first = dsp->starts[t_order];
+ cur_strand = dsp->strands[t_order];
+ if(t_first != -1)
+ {
+ if(cur_strand == Seq_strand_minus)
+ {
+ t_first += dsp->lens[0];
+ if(t_first <= pos)
+ gap_size = pos - t_first;
+ }
+ else /*plus strand*/
+ {
+ if(t_first >=pos )
+ gap_size = t_first - pos;
+ }
+ }
+
+ s_first = dsp->starts[s_order];
+ cur_strand = dsp->strands[s_order];
+ if(s_first != -1)
+ {
+
+ if(cur_strand == Seq_strand_minus)
+ {
+ s_first += dsp->lens[0];
+ if(s_first <= from)
+ s_gap_size = from - s_first;
+ }
+ else
+ {
+ if(s_first > 0)
+ {
+ --s_first;
+ if(s_first >=to)
+ s_gap_size = s_first - to;
+ }
+ }
+ }
+ if(gap_size >= 0 && s_gap_size >= 0 && gap_size == s_gap_size)
+ {
+ for(j = 0; j<dim; ++j)
+ {
+ cur_start = -1;
+ cur_strand = dsp->strands[dim*(dsp->numseg-1) + j];
+ if(j == t_order)
+ {
+ cur_start = pos;
+ }
+ if(j == s_order)
+ {
+ if(cur_strand == Seq_strand_minus)
+ cur_start = from - gap_size;
+ else
+ cur_start = from;
+ }
+ ValNodeAddInt(&v_starts, 0, cur_start);
+ ValNodeAddInt(&v_strands, 0, cur_strand);
+ }
+ ValNodeAddInt(&v_lens, 0, (ins_len + gap_size));
+ *seq_insert = TRUE;
+ ++cur_seg;
+ }
+ }
+
+
+
+ for(i =0; i<dsp->numseg; ++i)
+ {
+ t_start = starts[dim*i +t_order];
+ if(t_start != -1)
+ {
+ t_stop = t_start + lens[i] -1;
+ t_max = MAX(t_stop, t_max);
+
+ /*insertion is downstrems of the current segment*/
+ /*copy everything*/
+ if(pos > t_stop)
+ { /*downstream insertions*/
+ for(j=0; j<dim; ++j)
+ {
+ cur_start = starts[dim*i + j];
+ cur_strand = (strands[dim*i + j]);
+ ValNodeAddInt(&v_starts, 0, cur_start);
+ ValNodeAddInt(&v_strands, 0, (Int4)cur_strand);
+ }
+ ValNodeAddInt(&v_lens, 0, lens[i]);
+ ++cur_seg;
+ }
+
+ /*insertion is upstream, add offset*/
+ if(pos < t_start)
+ {
+ for(j=0; j<dim; ++j)
+ {
+ cur_start = starts[dim*i + j];
+ cur_strand = (strands[dim*i + j]);
+ if(j == t_order)
+ cur_start += ins_len;
+ ValNodeAddInt(&v_starts, 0, cur_start);
+ ValNodeAddInt(&v_strands, 0, (Int4)cur_strand);
+ }
+ ValNodeAddInt(&v_lens, 0, lens[i]);
+ ++cur_seg;
+ }
+
+ /*insert within the current segment*/
+ if(pos >= t_start && pos <= t_stop) /*insert within*/
+ {
+ /*if the segment is broken, add the first half*/
+ if(pos > t_start)
+ {
+ for(j = 0; j<dim; ++j)
+ {
+ cur_start = starts[dim*i+j];
+ cur_strand = strands[dim*i + j];
+ ValNodeAddInt(&v_starts, 0, cur_start);
+ ValNodeAddInt(&v_strands, 0, (Int4)cur_strand);
+ }
+ ValNodeAddInt(&v_lens, 0, (pos - t_start));
+ ++cur_seg;
+ }
+
+ /*add the inserted segment*/
+ if(i == 0)
+ add_insert_seg = (has_source_id && *seq_insert == FALSE);
+ else
+ add_insert_seg = TRUE;
+
+ if(add_insert_seg)
+ {
+ for(j = 0; j<dim ; ++j)
+ {
+ cur_start = -1;
+ cur_strand = strands[dim*i + j];
+ if(has_source_id && j == s_order)
+ {
+ *seq_insert = TRUE;
+ cur_start = from;
+ cur_strand = strand;
+ }
+ if(j == t_order)
+ cur_start = pos;
+ ValNodeAddInt(&v_starts, 0, cur_start);
+ ValNodeAddInt(&v_strands, 0, (Int4)cur_strand);
+ }
+ ValNodeAddInt(&v_lens, 0, ins_len);
+ ++cur_seg;
+ }
+
+ /*add the leftover segment*/
+ for(j=0; j<dim; ++j)
+ {
+ cur_start = starts[dim*i+j];
+ cur_strand = strands[dim*i + j];
+ if(j == t_order)
+ cur_start = pos + ins_len;
+ ValNodeAddInt(&v_starts, 0, cur_start);
+ ValNodeAddInt(&v_strands, 0, (Int4)cur_strand);
+ }
+ ValNodeAddInt(&v_lens, 0, (lens[i] - (pos - t_start)));
+ ++cur_seg;
+ }
+ }
+
+ else /*gaps in the target sequence*/
+ {
+ for(j=0; j<dim; ++j)
+ {
+ cur_start = starts[dim*i + j];
+ cur_strand = (strands[dim*i + j]);
+ ValNodeAddInt(&v_starts, 0, cur_start);
+ ValNodeAddInt(&v_strands, 0, (Int4)cur_strand);
+ }
+ ValNodeAddInt(&v_lens, 0, lens[i]);
+ ++cur_seg;
+ }
+ }
+
+
+ /*attach to the end of the sequence*/
+ gap_size = -1;
+ s_gap_size = -1;
+ if(*seq_insert == FALSE && has_source_id)
+ {
+ t_last = dsp->starts[dim*(dsp->numseg-1) +t_order];
+ cur_strand = dsp->strands[dim*(dsp->numseg-1) + t_order];
+ if(t_last != -1)
+ {
+ if(cur_strand == Seq_strand_minus)
+ {
+ t_last -= 1;
+ if(pos <= t_last)
+ gap_size = t_last - pos;
+ }
+ else /*plus strand*/
+ {
+ t_last += dsp->lens[dsp->numseg-1];
+ if(pos >= t_last)
+ gap_size = pos - t_last;
+ }
+ }
+
+ s_last = dsp->starts[dim*(dsp->numseg-1) +s_order];
+ cur_strand = dsp->strands[dim*(dsp->numseg-1) + s_order];
+ if(s_last != -1)
+ {
+
+ if(cur_strand == Seq_strand_minus)
+ {
+ s_last -= 1;
+ s_gap_size = s_last - to;
+ }
+ else
+ {
+ s_last += dsp->lens[dsp->numseg-1];
+ s_gap_size = from - s_last;
+ }
+ }
+ if(gap_size >=0 && s_gap_size >=0 && gap_size == s_gap_size)
+ {
+ for(j = 0; j<dim; ++j)
+ {
+ cur_start = -1;
+ cur_strand = dsp->strands[dim*(dsp->numseg-1) + j];
+ if(j == t_order)
+ {
+ if(cur_strand == Seq_strand_minus)
+ cur_start = t_last - (ins_len + gap_size -1);
+ else
+ cur_start = t_last;
+ }
+ if(j == s_order)
+ {
+ if(cur_strand == Seq_strand_minus)
+ cur_start = from;
+ else
+ cur_start = from - gap_size;
+ }
+ ValNodeAddInt(&v_starts, 0, cur_start);
+ ValNodeAddInt(&v_strands, 0, cur_strand);
+ }
+ ValNodeAddInt(&v_lens, 0, (ins_len + gap_size));
+ *seq_insert = TRUE;
+ ++cur_seg;
+ }
+ }
+
+
+ dsp = make_dsp(v_starts, v_lens, v_strands, t_order, dim, dsp, cur_seg, merge, NULL, NULL);
+ return TRUE;
+}
+
+
+static ValNodePtr store_data(Int4 val, ValNodePtr head)
+{
+ ValNodeAddInt(&head, 0, val);
+ return head;
+}
+
+static ValNodePtr store_reverse_data(Int4 val, ValNodePtr head)
+{
+ ValNodePtr new_vnp;
+
+ new_vnp = ValNodeNew(NULL);
+ new_vnp->data.intvalue = val;
+
+ new_vnp->next = head;
+ return new_vnp;
+}
+
+static ValNodePtr find_last_node(ValNodePtr head)
+{
+ if(head != NULL)
+ {
+ while(head->next != NULL)
+ head = head->next;
+ return head;
+ }
+ return NULL;
+
+}
+
+static ValNodePtr link_to_end(ValNodePtr new_vnp, ValNodePtr head)
+{
+ ValNodePtr l_node;
+
+ if(head == NULL)
+ return new_vnp;
+ l_node = find_last_node(head);
+ l_node->next = new_vnp;
+ return head;
+}
+
+static ValNodePtr find_nth_node(ValNodePtr vnp, Int2 order)
+{
+ Int2 i;
+
+ i =0;
+ while(vnp ){
+ if(i == order)
+ return vnp;
+ vnp = vnp->next;
+ ++i;
+ }
+ return NULL;
+}
+
+static Int4 get_last_pos(ValNodePtr v_starts, ValNodePtr v_lens)
+{
+ ValNodePtr prev;
+ Int4 start, pos;
+
+ prev = NULL;
+ while(v_starts->next != NULL)
+ {
+ prev = v_starts;
+ v_starts = v_starts->next;
+ }
+ if(prev == NULL)
+ return -1;
+ start = prev->data.intvalue;
+ while(v_lens->next != NULL)
+ v_lens = v_lens->next;
+ pos = start + v_lens->data.intvalue -1;
+ return pos;
+}
+
+
+
+
+/*************************************************************************
+***
+* only consider the master sequence is on plus strand
+* and m_order is always == 0
+*
+**************************************************************************
+***/
+static Boolean merge_two_seg(ValNodePtr PNTR v_starts, ValNodePtr vs_starts, ValNodePtr PNTR v_lens, ValNodePtr vs_lens, ValNodePtr PNTR v_strands, ValNodePtr vs_strands, Int4 pos, Uint1 s_strand, Int2 numseg)
+{
+ Int4 gap, last_pos;
+ ValNodePtr l_start, l_len;
+
+ gap = (*v_starts)->data.intvalue - (pos);
+ if(gap >=0)
+ {
+ /*if(gap > 0)
+ {
+ *v_starts = store_reverse_data(-1, *v_starts);
+ *v_starts = store_reverse_data(pos, *v_starts);
+ *v_strands = store_reverse_data(s_strand, *v_strands);
+ *v_strands = store_reverse_data(1, *v_strands);
+ *v_lens = store_reverse_data(gap+1, *v_lens);
+ }*/
+ last_pos = get_last_pos(vs_starts, vs_lens);
+ if(last_pos == -1)
+ return FALSE;
+
+ gap = (*v_starts)->data.intvalue - last_pos -1;
+ if(gap >0)
+ return FALSE;
+ *v_starts = link_to_end(*v_starts, vs_starts);
+ *v_strands = link_to_end(*v_strands, vs_strands);
+ *v_lens = link_to_end(*v_lens, vs_lens);
+ return TRUE;
+ }
+
+ l_start = find_nth_node(*v_starts, 2*(numseg-1));
+ l_len= find_nth_node(*v_lens, (numseg-1));
+ last_pos = l_start->data.intvalue + l_len->data.intvalue -1;
+ gap = pos - last_pos;
+ if(gap >=0)
+ {
+ /*if(gap >0)
+ {
+ *v_starts = store_data(pos, *v_starts);
+ *v_starts = store_data(-1, *v_starts);
+ *v_strands = store_data(1, *v_strands);
+ *v_strands = store_data(s_strand, *v_strands);
+ *v_lens = store_data(gap+1, *v_lens);
+ }*/
+ gap = vs_starts->data.intvalue - last_pos -1;
+ if(gap >0)
+ return FALSE;
+ *v_starts = link_to_end(vs_starts, *v_starts);
+ *v_strands = link_to_end(vs_strands, *v_strands);
+ *v_lens = link_to_end(vs_lens, *v_lens);
+ return TRUE;
+ }
+
+ return FALSE;
+}
+
+
+/************************************************************************
+***
+* load the portion of alignment (from, to, strand) in f_dsp to
+* the interval where to_id points to
+* from_id has already been inserted into to_id, so the region
+* can be mapped to the interval on to_id
+*
+*************************************************************************
+***/
+static Boolean dsp_process(DenseSegPtr f_dsp, Int4 from, Int4 to, Uint1 strand, SeqIdPtr from_id, SeqIdPtr to_id, Int4 pos, ValNodePtr PNTR vs_starts, ValNodePtr PNTR vs_strands, ValNodePtr PNTR vs_lens)
+{
+ Int4Ptr starts, lens;
+ Uint1Ptr strands;
+ Uint1 i_strand;
+ SeqLocPtr m_loc;
+ Int4 m_start, m_stop, m_pos;
+ Int4 off_start, off_stop;
+ Int4 t_start, t_stop;
+ Int4 s_start, s_len;
+ Int2 m_order, i;
+ Boolean is_found;
+ BioseqPtr tbsp;
+
+
+ if(f_dsp == NULL)
+ return FALSE;
+ tbsp = BioseqFind(to_id);
+ if(tbsp == NULL)
+ return FALSE;
+ starts = f_dsp->starts;
+ lens = f_dsp->lens;
+ strands = f_dsp->strands;
+ m_loc = SeqLocIntNew(0, 0, 1, from_id);
+ m_pos = -1;
+ m_order = get_t_order(f_dsp->ids, from_id);
+ is_found = FALSE;
+ *vs_starts = NULL;
+ *vs_strands = NULL;
+ *vs_lens = NULL;
+
+ for(i = 0; i<f_dsp->numseg; ++i)
+ {
+ m_start = starts[2*i+m_order];
+ if(m_start != -1){
+ m_stop = m_start + lens[i] -1;
+ m_pos = m_stop +1;
+ }
+ else
+ {
+ m_start = m_pos;
+ m_stop = m_pos;
+ }
+ if(m_start != -1 && m_stop != -1)
+ {
+ if(!(m_start > to) || (m_stop < from))
+ {
+ off_start = MAX(0, from-m_start);
+ off_stop = MAX(0, m_stop - to);
+ /*if(tbsp->repr == Seq_repr_seg)
+ {
+ m_start = MAX(m_start, from);
+ m_stop = MIN(m_stop, to);
+ update_seq_loc(m_start, m_stop, strand, m_loc);
+ t_start = GetOffsetInBioseq(m_loc, tbsp, SEQLOC_LEFT_END);
+ t_stop = GetOffsetInBioseq(m_loc, tbsp, SEQLOC_RIGHT_END);
+ }
+ else
+ {*/
+ if(strand == Seq_strand_minus)
+ {
+ t_start = (pos-1) -(to - from) + MAX((to-m_stop), 0);
+ t_stop = pos -1 - MAX((m_start-from), 0);
+ }
+ else
+ {
+ t_start = (pos-1) - (to - from) + MAX((m_start-from), 0);
+ t_stop = pos-1 - MAX((to - m_stop), 0);
+ }
+ /*}*/
+ if(t_start != -1 && t_stop != -1)
+ {
+ OrderInt4(&t_start, &t_stop);
+ s_start = starts[2*i +1-m_order];
+ if(s_start != -1)
+ {
+ if(strands[2*i] == strands[2*i+1])
+ s_start += off_start;
+ else
+ s_start += off_stop;
+ }
+ s_len = lens[i] - (off_start + off_stop);
+ if(s_len >0)
+ {
+ is_found = TRUE;
+ i_strand = strands[2*i+1-m_order];
+ if(starts[2*i+m_order] == -1)
+ t_start = -1;
+ if(strand != strands[2*i+m_order]){ /*reverse the seg order*/
+ i_strand = 3 - i_strand;
+ *vs_starts = store_reverse_data(s_start, *vs_starts);
+ *vs_starts = store_reverse_data(t_start, *vs_starts);
+ *vs_strands = store_reverse_data((Int4)i_strand, *vs_strands);
+ *vs_strands = store_reverse_data(1, *vs_strands);
+ *vs_lens = store_reverse_data(s_len, *vs_lens);
+ }
+ else
+ {
+ *vs_starts = store_data(t_start, *vs_starts);
+ *vs_starts = store_data(s_start, *vs_starts);
+ *vs_strands = store_data(1, *vs_strands);
+ *vs_strands = store_data((Int4)i_strand, *vs_strands);
+ *vs_lens = store_data(s_len, *vs_lens);
+ }
+ } /*end of if(s_len>0)*/
+ }
+ }
+ }
+ }
+ SeqLocFree(m_loc);
+ return is_found;
+
+}
+
+
+static Int2 get_cur_seg(ValNodePtr lens)
+{
+ Int2 i=0;
+
+ while(lens)
+ {
+ ++i;
+ lens = lens->next;
+ }
+ return i;
+}
+
+/************************************************************************
+***
+* DenseSegMerge(only for the two-dimentional pairwise alignment
+* pos is the position on the merged sequence
+*
+*************************************************************************
+***/
+static Boolean DenseSegMerge(DenseSegPtr f_dsp, SeqIdPtr from_id, Int4 from, Int4 to, Uint1 strand, SeqIdPtr to_id, Int4 pos, DenseSegPtr t_dsp)
+{
+ Int2 f_order, t_order;
+ SeqIdPtr fs_id, ts_id;
+ Int2 i, j;
+ ValNodePtr v_starts, v_lens, v_strands, vs_starts, vs_lens, vs_strands;
+ Int4Ptr starts, lens;
+ Uint1Ptr strands;
+ Int2 cur_seg;
+ Uint1 i_strand;
+ BioseqPtr t_bsp;
+
+ t_order = get_t_order(t_dsp->ids, to_id);
+ if(t_order == -1)
+ return FALSE;
+ f_order = get_t_order(f_dsp->ids, from_id);
+ if(f_order == -1)
+ return FALSE;
+
+ fs_id = get_nth_id(f_dsp->ids, 1-f_order);
+ ts_id = get_nth_id(t_dsp->ids, 1-t_order);
+ if(!SeqIdForSameBioseq(fs_id, ts_id))
+ return FALSE;
+
+ vs_starts =NULL;
+ vs_strands = NULL;
+ vs_lens = NULL;
+ if(!dsp_process(f_dsp, from, to, strand, from_id, to_id, pos, &vs_starts, &vs_strands, &vs_lens)) /*the alignment is not within [from, to]*/
+ return TRUE;
+ i_strand = (Uint1)(vs_strands->next->data.intvalue);
+ t_bsp = BioseqFind(to_id);
+ if(pos == BioseqGetLen(t_bsp)) /*insertion at the end*/
+ pos = pos - (to - from +1) -1;
+
+ v_starts =NULL;
+ v_strands = NULL;
+ v_lens = NULL;
+ starts = t_dsp->starts;
+ lens = t_dsp->lens;
+ strands = t_dsp->strands;
+ for(i =0; i<t_dsp->numseg; ++i)
+ {
+ for(j =0; j<2; ++j)
+ {
+ v_starts = store_data(starts[2*i +j], v_starts);
+ v_strands = store_data(strands[2*i+j], v_strands);
+ }
+ v_lens = store_data(lens[i], v_lens);
+ }
+
+ if(!merge_two_seg(&v_starts, vs_starts, &v_lens, vs_lens, &v_strands, vs_strands, pos, i_strand, t_dsp->numseg)) /*fail to brought togather*/
+ {
+ ValNodeFree(v_starts);
+ ValNodeFree(vs_starts);
+ ValNodeFree(v_lens);
+ ValNodeFree(vs_lens);
+ ValNodeFree(v_strands);
+ ValNodeFree(vs_strands);
+ return FALSE;
+ }
+
+ cur_seg = get_cur_seg(v_lens);
+ t_dsp = make_dsp(v_starts, v_lens, v_strands, t_order, 2, t_dsp, cur_seg, TRUE, to_id, fs_id);
+ return TRUE;
+}
+
+static SeqAlignPtr make_new_align(DenseSegPtr f_dsp, Int4 from, Int4 to, Uint1 strand, SeqIdPtr from_id, SeqIdPtr to_id, Int4 pos, SeqAlignPtr head)
+{
+ ValNodePtr vs_starts, vs_strands, vs_lens;
+ DenseSegPtr dsp;
+ Int2 cur_seg;
+ SeqAlignPtr align;
+ SeqIdPtr fs_id;
+
+ if(!dsp_process(f_dsp, from, to, strand, from_id, to_id, pos, &vs_starts, &vs_strands, &vs_lens)) /*the alignment is not within [from, to]*/
+ return head;
+
+ fs_id = f_dsp->ids->next;
+ cur_seg = get_cur_seg(vs_lens);
+ dsp = make_dsp(vs_starts, vs_lens, vs_strands, 0, 2, NULL, cur_seg, TRUE, to_id, fs_id);
+ if(dsp)
+ {
+ align = SeqAlignNew();
+ align->segtype = 2;
+ align->segs = dsp;
+ align->type =3;
+ if(head == NULL)
+ head = align;
+ else
+ head = SeqAlignLink(align, head);
+ }
+ return head;
+}
+
+
+
+static SeqAlignPtr JZSeqAlignMerge(SeqAlignPtr f_align, SeqIdPtr from_id, Int4 from, Int4 to, Uint1 strand, SeqIdPtr to_id, Int4 pos, SeqAlignPtr t_align)
+{
+ SeqAlignPtr new_salp, curr;
+ DenseSegPtr f_dsp, t_dsp;
+ Boolean is_found;
+
+ if(f_align == NULL)
+ return t_align;
+
+ new_salp = NULL;
+ while(f_align)
+ {
+ if(f_align->segtype == 2)
+ {
+ f_dsp = (DenseSegPtr) f_align->segs;
+ is_found = FALSE;
+ curr = t_align;
+ while(curr && !is_found)
+ {
+ if(curr->segtype ==2)
+ {
+ t_dsp = (DenseSegPtr) curr->segs;
+ is_found = DenseSegMerge(f_dsp, from_id, from, to, strand, to_id, pos, t_dsp);
+ }
+ curr = curr->next;
+ }
+
+ if(!is_found)
+ new_salp = make_new_align(f_dsp, from, to, strand, from_id, to_id, pos, new_salp);
+
+
+ f_align = f_align->next;
+ }
+ }
+
+ if(new_salp != NULL){
+ if(t_align == NULL)
+ t_align = new_salp;
+ else
+ t_align = SeqAlignLink(new_salp, t_align);
+ }
+ return t_align;
+
+
+}
+
+
+
+/***************************************************************************
+***
+* AttachSeqInAlign(): for a sequence not found in the previous alignment
+* data and it is used in producing the alignment, it would be attached
+* as a part of the alignment node for the seq-hist of the new sequence
+*
+******************************************************************************
+***/
+static SeqAlignPtr AttachSeqInAlign(SeqIdPtr from_id, Int4 from, Int4 to, Uint1 strand, SeqIdPtr to_id, Int4 pos, SeqAlignPtr head)
+{
+ DenseSegPtr dsp;
+ ValNodePtr v_starts, v_lens, v_strands;
+ SeqAlignPtr align;
+
+ v_starts = NULL;
+ v_lens = NULL;
+ v_strands = NULL;
+
+
+ ValNodeAddInt(&v_starts, 0, pos);
+ ValNodeAddInt(&v_starts, 0, from);
+ ValNodeAddInt(&v_strands, 0, (Int4)Seq_strand_plus);
+ ValNodeAddInt(&v_strands, 0, (Int4)strand);
+ ValNodeAddInt(&v_lens, 0, (to - from +1));
+ dsp = make_dsp(v_starts, v_lens, v_strands, 0, 2, NULL, 1, TRUE, to_id, from_id);
+ align = SeqAlignNew();
+ align->segtype = 2;
+ align->segs = dsp;
+ align->type =3;
+ if(head == NULL)
+ head = align;
+ else
+ head = SeqAlignLink(align, head);
+ return head;
+
+}
+
+
+static void mod_old_align(SeqAlignPtr PNTR old, SeqIdPtr sip)
+{
+ SeqAlignPtr curr, prev;
+ SeqIdPtr c_sip;
+
+ prev = NULL;
+ curr = *old;
+
+ while(curr)
+ {
+ c_sip = SeqAlignId(curr, 1);
+ if(SeqIdForSameBioseq(c_sip, sip))
+ {
+ if(prev == NULL)
+ *old= curr->next;
+ else
+ prev->next = curr->next;
+ curr->next = NULL;
+ SeqAlignFree(curr);
+ return;
+ }
+ prev = curr;
+ curr = curr->next;
+ }
+}
+
+static void attach_new_align(SeqAlignPtr new_salp, SeqAlignPtr PNTR old) {
+ SeqIdPtr sip;
+ SeqAlignPtr align;
+
+ if(new_salp == NULL)
+ return;
+ align = new_salp;
+ while(align)
+ {
+ sip = SeqAlignId(align, 1);
+ if(sip)
+ mod_old_align(old, sip);
+ align= align->next;
+ }
+
+ if(*old== NULL)
+ *old = new_salp;
+ else
+ (*old) = SeqAlignLink(new_salp, (*old));
+
+}
+
+static void delete_bad_node(ValNodePtr PNTR head, Uint1 choice)
+{
+ ValNodePtr prev, next, curr;
+
+ curr = *head;
+ prev = NULL;
+
+ while(curr)
+ {
+ next = curr->next;
+ if(curr->choice != choice)
+ {
+ if(prev == NULL)
+ *head = next;
+ else
+ prev->next = next;
+ curr->next = NULL;
+ ValNodeFree(curr);
+ }
+ else
+ prev = curr;
+
+ curr = next;
+ }
+}
+static Int4 count_coverage (BoolPtr used_list, Int4 size)
+{
+ Int4 i;
+ Int4 count;
+
+ count = 0;
+ for(i = 0; i<size; ++i)
+ {
+ if(used_list[i] != 0)
+ ++count;
+ }
+ return count;
+}
+
+
+static Int2 filter_seqid_order;
+static int LIBCALLBACK CompareChainProc(VoidPtr ptr1, VoidPtr ptr2)
+{
+ ValNodePtr vnp1;
+ ValNodePtr vnp2;
+ DenseDiagPtr ddp1, ddp2;
+ Int4 start1, stop1, start2, stop2;
+
+ start1 = -1;
+ start2 = -1;
+ if (ptr1 != NULL && ptr2 != NULL) {
+ vnp1 = *((ValNodePtr PNTR) ptr1);
+ vnp2 = *((ValNodePtr PNTR) ptr2);
+ if (vnp1 != NULL && vnp2 != NULL) {
+ ddp1 = (DenseDiagPtr) vnp1->data.ptrvalue;
+ ddp2 = (DenseDiagPtr) vnp2->data.ptrvalue;
+ start1 = ddp1->starts[filter_seqid_order];
+ stop1 = start1 + ddp1->len -1;
+ start2 = ddp2->starts[filter_seqid_order];
+ stop2 = start2 + ddp2->len -1;
+ if(stop1> stop2)
+ return 1;
+ else if(stop1 < stop2)
+ return -1;
+ else if (stop1 == stop2)
+ {
+ if(ddp1->len > ddp2->len)
+ return -1;
+ else if(ddp1->len < ddp2->len)
+ return 1;
+ else
+ return 0;
+ }
+ }
+ }
+ return 0;
+}
+
+
+/*
+* return 0 for no overlap
+* return 1 for _2 overlap too much with _1
+* return 2 for _1 overlap too much with _1
+*/
+static Uint1 has_too_much_overlap (Int4 start_1, Int4 stop_1, Int4 start_2, Int4 stop_2)
+{
+ Int4 m_start, m_stop;
+ FloatHi value;
+ Int4 len_1, len_2;
+
+ if(start_2 > stop_1 || stop_2 < start_1) /*no overlap */
+ return 0;
+
+ if(start_2 >= start_1 && stop_2 <= stop_1) /*completely contained within*/
+ return 1;
+
+ if(start_1 >= start_2 && stop_1 <= start_2)
+ return 2;
+
+ m_start = MAX(start_1, start_2);
+ m_stop = MIN(stop_1, stop_2);
+
+ len_1 = stop_1 - start_1 +1 ;
+ len_2 = stop_2 - start_2 + 1;
+
+ value = (FloatHi)(m_stop - m_start + 1)/(FloatHi)(MIN(len_1, len_2));
+ if(value >= 0.7)
+ {
+ if(len_1 >= len_2)
+ return 1;
+ else
+ return 2;
+ }
+ else
+ return 0;
+}
+
+
+
+static Int4Ptr convert_ddp_to_array (ValNodePtr ddp_list, Uint1 strand, Int2 s_order, Int4Ptr p_num)
+{
+ Int4Ptr val;
+ Int4 num;
+ ValNodePtr curr;
+ DenseDiagPtr ddp;
+ Int4 i;
+
+ num = 0;
+ for(curr = ddp_list; curr != NULL; curr = curr->next)
+ {
+ ++num;
+ }
+
+ val = (Int4Ptr) MemNew((size_t)num * sizeof(Int4));
+
+ /*if strand == minus, check the descending order of the stop.
+ otherwise, check the ascending order of the start */
+
+ i = 0;
+ for(curr = ddp_list; curr != NULL; curr = curr->next)
+ {
+ ddp = (DenseDiagPtr) curr->data.ptrvalue;
+ if(strand == Seq_strand_minus)
+ val[i++] = ddp->starts[1-s_order] + ddp->len -1;
+ else
+ val[i++] = ddp->starts[1-s_order];
+ }
+
+ *p_num = num;
+ return val;
+}
+
+
+static Int4 get_score_from_list (ScorePtr scores)
+{
+ ObjectIdPtr oip;
+
+ if(scores == NULL)
+ return 0;
+ while(scores)
+ {
+ oip = scores->id;
+ if(oip && StringCmp(oip->str, "score") == 0)
+ return scores->value.intvalue;
+ scores = scores->next;
+ }
+
+ return 0;
+}
+
+
+static Int4 get_score_from_DenseDiag (DenseDiagPtr ddp)
+{
+ return get_score_from_list (ddp->scores);
+}
+
+static Boolean filter_this_seg (DenseDiagPtr curr, DenseDiagPtr PNTR h_list, Int2 order)
+{
+ Int4 start, stop, t_start, t_stop;
+ Int4 a_start, a_stop;
+ Int4 score, t_score;
+ DenseDiagPtr prev, next, list;
+ Boolean found;
+ Int4 len;
+
+ start = curr->starts[order];
+ stop = start + curr->len -1;
+ score = get_score_from_DenseDiag (curr);
+ prev = NULL;
+ list = *h_list;
+ while(list)
+ {
+ next = list->next;
+ t_start = list->starts[order];
+ t_stop = t_start + list->len -1;
+ t_score = get_score_from_DenseDiag (list);
+
+ found = FALSE;
+ if(!(t_start > stop || t_stop < start))
+ {
+ a_start = MAX(t_start, start);
+ a_stop = MIN(t_stop, stop);
+ len = MAX(t_stop, stop) - MIN(t_start, start);
+ if((FloatHi)(a_stop - a_start)/(FloatHi)len > 0.80)
+ {
+ if(score > t_score)
+ {
+ if(prev == NULL)
+ *h_list = next;
+ else
+ prev->next = next;
+ list->next = NULL;
+ DenseDiagFree(list);
+ found = TRUE;
+ }
+ }
+ }
+
+ if(!found)
+ prev = list;
+ list = next;
+ }
+
+ list = *h_list;
+ while(list)
+ {
+ t_start = list->starts[order];
+ t_stop = t_start+ list->len -1;
+ t_score = get_score_from_DenseDiag (list);
+
+ if(!(t_start > stop || t_stop < start))
+ {
+ a_start = MAX(t_start, start);
+ a_stop = MIN(t_stop, stop);
+ if((FloatHi)(a_stop - a_start)/(FloatHi)(stop - start) > 0.50)
+ {
+ if(score < t_score)
+ return TRUE;
+ }
+ }
+ list = list->next;
+ }
+
+ return FALSE;
+}
+
+
+static Boolean is_same_orientation(Uint1 strand_1, Uint1 strand_2)
+{
+ if(strand_1 == strand_2)
+ return TRUE;
+ if(strand_1 == Seq_strand_minus)
+ return (strand_2 == Seq_strand_minus);
+ else
+ return (strand_2 != Seq_strand_minus);
+}
+
+static void filter_wrong_orientation (SeqAlignPtr align, Boolean same_orient)
+{
+ DenseDiagPtr ddp, prev, next;
+ Boolean del;
+
+ ddp = (DenseDiagPtr) align->segs;
+ prev = NULL;
+
+ while(ddp)
+ {
+ next = ddp->next;
+ del = (is_same_orientation(ddp->strands[0], ddp->strands[1]) != same_orient);
+ if(del)
+ {
+ if(prev == NULL)
+ align->segs = next;
+ else
+ prev->next = next;
+ ddp->next = NULL;
+ DenseDiagFree(ddp);
+ }
+ else
+ prev = ddp;
+ ddp = next;
+ }
+}
+
+
+static FloatHi get_overlap(BoolPtr status, Int4 from, Int4 len, Int4Ptr poverlap)
+{
+ Int4 i;
+ Int4 overlap_len;
+
+ overlap_len = 0;
+ for(i = 0; i<len; ++i)
+ {
+ if(status[i+from])
+ ++overlap_len;
+ }
+
+ *poverlap = overlap_len;
+ return (FloatHi)overlap_len/(FloatHi)len;
+}
+
+
+static Boolean is_overlap(BoolPtr status, Int4 from, Int4 len)
+{
+ Int4 i;
+ Int4 overlap_len;
+
+ overlap_len = 0;
+ for(i = 0; i<len; ++i)
+ {
+ if(status[i+from])
+ ++overlap_len;
+ }
+
+ if((FloatHi)overlap_len/(FloatHi)len> 0.7)
+ return TRUE;
+ else
+ {
+ MemSet(status+from, 1, (size_t)(len) * sizeof(Boolean));
+ return FALSE;
+ }
+}
+
+
+static void set_ddp_status(ValNodePtr ddp_list, Int4 first_len, Int4 second_len)
+{
+ BoolPtr first_status, second_status;
+ DenseDiagPtr ddp;
+ FloatHi val_1, val_2;
+ Int4 overlap_1, overlap_2;
+
+ first_status = (BoolPtr) MemNew((size_t)first_len * sizeof(Boolean));
+ second_status = (BoolPtr) MemNew((size_t)second_len * sizeof(Boolean));
+
+ while(ddp_list)
+ {
+ ddp = (DenseDiagPtr) ddp_list->data.ptrvalue;
+ val_1 = get_overlap(first_status, ddp->starts[0], ddp->len, &overlap_1);
+ val_2 = get_overlap(second_status, ddp->starts[1], ddp->len, &overlap_2);
+
+ if(val_1 >= 0.95 || val_2 >=0.95
+ || (ddp->len - MAX(overlap_1, overlap_2) < MIN(50, ddp->len-10))
+ || (val_1 > 0.7 && val_2 >0.7))
+ {
+ /* if(ddp->starts[0] == 35)
+ printf("val_1 = %lf, val_2 = %lf, overlap_1 = %ld, overlap_2 = %ld, len = %ld\n", val_1, val_2, overlap_1, overlap_2, ddp->len);
+ */
+ ddp_list->choice = 0;
+ }
+ else
+ {
+ MemSet(first_status+ddp->starts[0], 1, (size_t)(ddp->len) * sizeof(Boolean));
+ MemSet(second_status+ddp->starts[1], 1, (size_t)(ddp->len) * sizeof(Boolean));
+ ddp_list->choice = 1;
+ }
+ ddp_list = ddp_list->next;
+ }
+
+ MemFree(first_status);
+ MemFree(second_status);
+}
+
+static Boolean free_diag_in_list(ValNodePtr ddp_list, DenseDiagPtr ddp)
+{
+ DenseDiagPtr c_ddp;
+
+ while(ddp_list)
+ {
+ c_ddp = (DenseDiagPtr) ddp_list->data.ptrvalue;
+ if(c_ddp == ddp)
+ return (ddp_list->choice == 1);
+ ddp_list = ddp_list->next;
+ }
+
+ return FALSE;
+}
+
+static void filter_repeats (ValNodePtr PNTR ddp_list, Int4 first_len, Int4 second_len)
+{
+
+ set_ddp_status(*ddp_list, first_len, second_len);
+ delete_bad_node(ddp_list, 1);
+}
+
+static Int4Ptr get_process_list (ValNodePtr ddp_list)
+{
+ Int4 num, i, j;
+ ValNodePtr curr;
+ Int4Ptr order, score_list;
+ DenseDiagPtr ddp;
+ Boolean change = TRUE;
+ Int4 temp;
+
+ num = 0;
+ for(curr = ddp_list; curr != NULL; curr = curr->next)
+ ++num;
+
+ score_list = (Int4Ptr) MemNew((size_t)num * sizeof(ValNodePtr));
+ order = (Int4Ptr) MemNew((size_t)num * sizeof(Int4));
+
+ i = 0;
+ for(curr = ddp_list; curr != NULL; curr = curr->next)
+ {
+ ddp = (DenseDiagPtr) curr->data.ptrvalue;
+ score_list[i] = get_score_from_DenseDiag (ddp);
+ order[i] = i;
+ ++i;
+ }
+
+ for(i = 0; i<num-1 && change; ++i)
+ {
+ change = FALSE;
+ for(j = 0; j<num-i-1; ++j)
+ {
+ if(score_list[j+1] > score_list[j])
+ {
+
+ change =TRUE;
+ temp = score_list[j];
+ score_list[j] = score_list[j+1];
+ score_list[j+1] = temp;
+
+ temp = order[j];
+ order[j] = order[j+1];
+ order[j+1] = temp;
+ }
+ }
+ }
+
+ MemFree(score_list);
+ return order;
+}
+
+
+static ValNodePtr get_nth_node(ValNodePtr vnp, Int4 order)
+{
+ Int4 i;
+
+ i = 0;
+ while(vnp)
+ {
+ if(i == order)
+ return vnp;
+ ++i;
+ vnp = vnp->next;
+ }
+
+ return NULL;
+}
+
+static Uint1 get_alignment_orientation(ValNodePtr PNTR pddp_list, BioseqPtr bsp, Int2 s_order)
+{
+ Int4 num;
+ Int4Ptr score_order;
+ ValNodePtr curr;
+ DenseDiagPtr ddp;
+ BoolPtr plus_used_list, minus_used_list;
+ Int4 plus_count, minus_count;
+ Int4 i;
+ ValNodePtr list, ddp_list;
+ Uint1 strand, t_strand;
+
+ ddp_list = *pddp_list;
+ num = 0;
+ for(curr = ddp_list; curr != NULL; curr = curr->next)
+ ++num;
+
+ score_order = get_process_list (ddp_list);
+ plus_used_list = (BoolPtr) MemNew((size_t)bsp->length * sizeof(Boolean));
+ minus_used_list = (BoolPtr) MemNew((size_t)bsp->length * sizeof(Boolean));
+
+
+
+ for(i = 0; i<MIN(num, 10); ++i)
+ {
+ curr = get_nth_node(ddp_list, score_order[i]);
+ if(curr != NULL)
+ {
+ ddp = (DenseDiagPtr) curr->data.ptrvalue;
+ if(ddp->strands[0] != ddp->strands[1] &&
+ (ddp->strands[0] == Seq_strand_minus ||
+ ddp->strands[1] == Seq_strand_minus))
+ MemSet(minus_used_list+ddp->starts[s_order], 1,
+ (size_t)(ddp->len) * sizeof(Boolean));
+ else
+ MemSet(plus_used_list+ddp->starts[s_order], 1,
+ (size_t)(ddp->len) * sizeof(Boolean));
+ }
+ }
+
+ plus_count = count_coverage (plus_used_list, bsp->length);
+ minus_count = count_coverage (minus_used_list, bsp->length);
+ MemFree(plus_used_list);
+ MemFree(minus_used_list);
+
+ if(plus_count == 0 && minus_count == 0)
+ {
+ MemFree(score_order);
+ return 0;
+ }
+ else if(plus_count > minus_count)
+ strand = Seq_strand_plus;
+ else
+ strand = Seq_strand_minus;
+
+ /*only load the ddp_list with the right orientation*/
+ list = NULL;
+ for(i = 0; i<num; ++i)
+ {
+ curr = get_nth_node(ddp_list, score_order[i]);
+ ddp = (DenseDiagPtr) curr->data.ptrvalue;
+ if(ddp->strands[0] != ddp->strands[1] &&
+ (ddp->strands[0] == Seq_strand_minus ||
+ ddp->strands[1] == Seq_strand_minus))
+ t_strand = Seq_strand_minus;
+ else
+ t_strand = Seq_strand_plus;
+ if(t_strand == strand)
+ ValNodeAddPointer(&list, 0, ddp);
+ }
+ ValNodeFree(*pddp_list);
+ *pddp_list = list;
+ MemFree(score_order);
+ return strand;
+
+
+}
+
+
+
+/*
+* check to see if ddp falls in the order of the existing list
+*
+*/
+static Boolean load_ddp_in_order(ValNodePtr PNTR h_list, DenseDiagPtr ddp, Boolean reverse, Boolean add)
+{
+ Int4 m_stop, s_stop;
+ Int4 mt_stop, st_stop;
+ ValNodePtr curr, prev;
+ DenseDiagPtr t_ddp;
+ Boolean load;
+ ValNodePtr vnp;
+ Int4 i;
+
+ m_stop = ddp->starts[0] + ddp->len -1;
+ s_stop = ddp->starts[1] + ddp->len -1;
+
+ for(curr = *h_list; curr != NULL; curr = curr->next)
+ {
+ t_ddp = (DenseDiagPtr) curr->data.ptrvalue;
+ if(ddp->len <= t_ddp->len)
+ {
+ for(i = 0; i<2; ++i)
+ {
+ if(ddp->starts[i] >= t_ddp->starts[i])
+ {
+ if(t_ddp->starts[i] + t_ddp->len >= ddp->starts[i] + ddp->len)
+ return FALSE;
+ }
+ }
+ }
+ }
+ prev = NULL;
+ curr = *h_list;
+ while(curr)
+ {
+ t_ddp = (DenseDiagPtr) curr->data.ptrvalue;
+ mt_stop = t_ddp->starts[0] + t_ddp->len -1;
+ st_stop = t_ddp->starts[1] + t_ddp->len -1;
+ if(m_stop <= mt_stop)
+ {
+ if(reverse)
+ load = (s_stop >= st_stop);
+ else
+ load = (s_stop <= st_stop);
+ /*check with the previous order*/
+ if(load)
+ {
+ if(prev != NULL)
+ {
+ t_ddp = (DenseDiagPtr) prev->data.ptrvalue;
+ mt_stop = t_ddp->starts[0] + t_ddp->len -1;
+ st_stop = t_ddp->starts[1] + t_ddp->len -1;
+
+ if(m_stop < mt_stop)
+ load = FALSE;
+ else
+ {
+ if(reverse)
+ load = (s_stop <= st_stop);
+ else
+ load = (s_stop >= st_stop);
+ }
+ }
+ }
+ if(load)
+ {
+ if(add)
+ {
+ vnp = ValNodeNew(NULL);
+ vnp->choice = 1;
+ vnp->data.ptrvalue = ddp;
+ if(prev == NULL)
+ *h_list = vnp;
+ else
+ prev->next = vnp;
+ vnp->next = curr;
+ }
+ return TRUE;
+ }
+ else
+ return FALSE;
+ }
+ prev = curr;
+ curr = curr->next;
+ }
+
+ if(prev != NULL)
+ {
+ if(reverse)
+ load = (s_stop <= st_stop);
+ else
+ load = (s_stop >= st_stop);
+
+ if(load)
+ {
+ vnp = ValNodeNew(NULL);
+ vnp->choice = 1;
+ vnp->data.ptrvalue = ddp;
+ prev->next = vnp;
+ return TRUE;
+ }
+ }
+
+ return FALSE;
+}
+
+
+
+static Int4Ptr load_ddp_scores(ValNodePtr ddp_list, Int4Ptr num)
+{
+ DenseDiagPtr ddp;
+ Int4 i;
+ Int4Ptr score_list;
+ ValNodePtr curr;
+
+ i = 0;
+ for(curr = ddp_list; curr != NULL; curr = curr->next)
+ ++i;
+ if(i == 0)
+ return NULL;
+ *num = i;
+ score_list = (Int4Ptr) MemNew((size_t)i * sizeof(Int4));
+ i = 0;
+ for(curr = ddp_list; curr != NULL; curr = curr->next)
+ {
+ ddp = (DenseDiagPtr) curr->data.ptrvalue;
+ score_list[i] = get_score_from_list (ddp->scores);
+ ++i;
+ }
+ return score_list;
+}
+
+
+static Boolean is_ddp_overlap(DenseDiagPtr ddp, BoolPtr first_status, BoolPtr second_status)
+{
+ FloatHi val_1, val_2;
+ Int4 overlap_1, overlap_2;
+
+ val_1 = get_overlap(first_status, ddp->starts[0], ddp->len, &overlap_1);
+ val_2 = get_overlap(second_status, ddp->starts[1], ddp->len, &overlap_2);
+
+ if(val_1 >=0.95 || val_2 >=0.95 || (val_1 > 0.7 && val_2 >0.7))
+ return TRUE;
+
+ /*maximum non-overlap region is less than 50-bp*/
+ if(ddp->len - MAX(overlap_1, overlap_2) < MIN(50, ddp->len-10))
+ return TRUE;
+ return FALSE;
+}
+
+
+/*
+* lns.c - Given a sequence of numbers, this program computes
+* the Longest Nondecreasing Subsequence.
+*
+* Feb. 20, 1997
+*
+* Program by Jinghui Zhang & Kun-Mao Chao
+* Toolkit version : H. Sicotte, 1/04/1999
+*/
+
+/* Input :
+ call lns()
+ S : a sequence of numbers;
+ len_s : the length of the sequences;
+ Output:
+ LNS : the positions of the Longest Nondecreasing Subsequence;
+ return value : the length of the LNS;
+ Note: "-1" entries are allowed.
+*/
+
+/* Input :
+ SS : a sequence of numbers; (without "-1" entries)
+ len_ss : the length of the sequences;
+ Output:
+ LNS : the positions of the Longest Nondecreasing Subsequence;
+ return value : the length of the LNS;
+*/
+static Int4 pure_lns(Int4Ptr SS, Int4 len_ss, Int4Ptr LNS)
+{
+ Int4 len_lns;
+ Int4Ptr PREV=NULL,BEST=NULL,BEST_POS=NULL;
+ Int4 i, low, high, mid;
+ Int4 t;
+
+ len_lns = 1;
+ if(len_ss) {
+ PREV = (Int4Ptr) Malloc(sizeof(Int4)*len_ss);
+ BEST = (Int4Ptr) Malloc(sizeof(Int4)*len_ss);
+ BEST_POS = (Int4Ptr) Malloc(sizeof(Int4)*len_ss);
+ } else
+ return 0;
+ BEST[0] = SS[0];
+ BEST_POS[0] = 0;
+ PREV[0] = -1;
+ for (i=1; i<len_ss; ++i) {
+ t = SS[i];
+ low = 0;
+ high = len_lns-1;
+ while (low < high) {
+ mid = (low + high) / 2;
+ if (t >= BEST[mid]) low = mid + 1;
+ else high = mid;
+ }
+ if (t < BEST[low]) {
+ if (low == 0) PREV[i] = -1;
+ else PREV[i] = BEST_POS[low-1];
+ BEST[low] = t;
+ BEST_POS[low] = i;
+ } else {
+ BEST[len_lns] = t;
+ BEST_POS[len_lns] = i;
+ PREV[i] = BEST_POS[len_lns-1];
+ ++len_lns;
+ }
+ }
+
+ /* trace back */
+
+ t = BEST_POS[len_lns-1];
+ for (i=len_lns-1; i>=0; --i) {
+ LNS[i] = t;
+ t = PREV[t];
+ }
+
+ if(PREV)
+ Free(PREV);
+ if(BEST)
+ Free(BEST);
+ if(BEST_POS)
+ Free(BEST_POS);
+ return len_lns;
+}
+static Int4 lns(Int4Ptr S, Int4 len_s, Int4Ptr LNS)
+{
+ Int4Ptr SS=NULL;
+ Int4 len_ss, len_lns;
+ Int4 i, j;
+ SS=(Int4Ptr)Malloc(len_s*sizeof(Int4));
+ /* screen out those "-1" entries */
+ len_ss = 0;
+ for (i=0; i<len_s; ++i)
+ if (S[i] >= 0)
+ SS[len_ss++] = S[i];
+
+ len_lns = pure_lns(SS, len_ss, LNS);
+
+ /* add the offset */
+ for (i=0, j=0; j<len_lns; ++i)
+ if (S[i] == SS[LNS[j]]) LNS[j++] = i;
+
+ if(SS)
+ Free(SS);
+ return len_lns;
+}
+
+
+
+
+static void sort_list_order(ValNodePtr PNTR ph_list, Uint1 strand)
+{
+ ValNodePtr h_list;
+ ValNodePtr curr;
+ DenseDiagPtr ddp;
+ Int4 val;
+ ValNodePtr best_list = NULL;
+
+
+ h_list = *ph_list;
+ if(h_list == NULL || h_list->next == NULL)
+ return;
+ ddp = (DenseDiagPtr) h_list->data.ptrvalue;
+ ValNodeAddPointer(&best_list, 0, ddp);
+ for(curr = h_list->next; curr != NULL; curr = curr->next)
+ {
+ ddp = (DenseDiagPtr) curr->data.ptrvalue;
+ load_ddp_in_order(&best_list, ddp, (Boolean) (strand==Seq_strand_minus), TRUE);
+ }
+
+
+ filter_seqid_order = 0;
+ h_list = ValNodeSort(h_list, CompareChainProc);
+ {
+ Int4 len_S=0,len_lns=0,rlen_lns=0;
+ Int4Ptr S=NULL,rS=NULL,LNS=NULL,rLNS=NULL,this_LNS;
+ Int4 i,j;
+ len_S = ValNodeLen(h_list);
+ if(len_S>0) {
+ S=(Int4Ptr)Malloc(len_S*sizeof(Int4));
+ rS=(Int4Ptr)Malloc(len_S*sizeof(Int4));
+ LNS=(Int4Ptr)Malloc(len_S*sizeof(Int4));
+ rLNS=(Int4Ptr)Malloc(len_S*sizeof(Int4));
+ for(curr = h_list,i=0; curr != NULL; curr = curr->next)
+ {
+ ddp = (DenseDiagPtr) curr->data.ptrvalue;
+ if(strand == Seq_strand_minus)
+ S[i++] = ddp->starts[1];
+ else
+ S[i++] = ddp->starts[1] + ddp->len-1;
+ }
+ for(i=len_S-1,j=0;i>=0;i--,j++)
+ rS[j]=S[i];
+ len_lns = lns(S, len_S, LNS);
+ rlen_lns = lns(rS, len_S, rLNS);
+ }
+ if (len_lns <= 0 || rlen_lns <=0) {
+ val=-1;
+ Message(MSG_ERROR, "Fail in get_lns");
+ if(len_S) {
+ Free(S);
+ Free(rS);
+ Free(LNS);
+ Free(rLNS);
+ }
+ exit(2);
+ } else if(rlen_lns > len_lns) {
+ val = 1; /* usr rLNS */
+ len_lns=rlen_lns;
+ this_LNS = rLNS;
+ } else {
+ val =0; /* use LNS */
+ this_LNS = LNS;
+ }
+ /* inconsistent orientation from LNS */
+ if((strand == Seq_strand_minus && val == 0) ||
+ (strand != Seq_strand_minus && val== 1))
+ {
+ ValNodeFree(h_list);
+ *ph_list = best_list;
+ if(len_S) {
+ Free(S);
+ Free(rS);
+ Free(LNS);
+ Free(rLNS);
+ }
+ return;
+ }
+
+ ValNodeFree(best_list);
+ for(i=0;i<len_lns;i++) {
+ val = this_LNS[i];
+ if(strand == Seq_strand_minus)
+ val =len_S -1 - val;
+ curr = get_nth_node(h_list, val);
+ if(curr != NULL)
+ curr->choice = 1;
+ }
+ if(len_S) {
+ Free(S);
+ Free(rS);
+ Free(LNS);
+ Free(rLNS);
+ }
+
+ }
+ /*
+ {
+ FILE *tmp_fp;
+ tmp_fp = FileOpen("Ins.test", "w");
+ for(curr = h_list; curr != NULL; curr = curr->next)
+ {
+ ddp = curr->data.ptrvalue;
+ if(strand == Seq_strand_minus)
+ fprintf(tmp_fp, "%ld\t", ddp->starts[1]);
+ else
+ fprintf(tmp_fp, "%ld\t", ddp->starts[1]+ddp->len-1);
+ }
+ fprintf(tmp_fp, "\n");
+ FileClose(tmp_fp);
+ system("get_lns < Ins.test >out");
+
+ tmp_fp = FileOpen("out", "r");
+ fscanf(tmp_fp, "%ld\n", &val);
+ if(val == -1)
+ {
+ Message(MSG_ERROR, "Fail in get_lns");
+ exit(1);
+ }
+ // inconsistent orientation from LNS //
+ if((strand == Seq_strand_minus && val == 0) ||
+ (strand != Seq_strand_minus && val== 1))
+ {
+ FileClose(tmp_fp);
+ ValNodeFree(h_list);
+ *ph_list = best_list;
+ return;
+ }
+
+
+ ValNodeFree(best_list);
+ num = ValNodeLen(h_list);
+ while(fscanf(tmp_fp, "%ld\n", &val) != EOF)
+ {
+ if(strand == Seq_strand_minus)
+ val = num -1 - val;
+ curr = get_nth_node(h_list, val);
+ if(curr != NULL)
+ curr->choice = 1;
+ }
+ FileClose(tmp_fp);
+ }
+*/
+ delete_bad_node(&h_list, 1);
+
+ if(h_list == NULL)
+ {
+ Message(MSG_ERROR, "No good node in sort_list_order");
+ exit(1);
+ }
+
+ *ph_list = h_list;
+}
+
+
+
+static ValNodePtr check_ddp_order(ValNodePtr ddp_list, Uint1 strand, Int4 first_len, Int4 second_len)
+{
+ DenseDiagPtr ddp;
+ ValNodePtr h_list = NULL, t_list;
+ ValNodePtr curr;
+ Int4 num, i;
+ Boolean reverse;
+ BoolPtr first_status, second_status;
+ Int4 p_score;
+ Int4Ptr score_list;
+ Boolean is_end;
+
+
+ if(ddp_list == NULL || ddp_list->next == NULL)
+ return ddp_list;
+ score_list = load_ddp_scores(ddp_list, &num);
+ if(score_list == NULL)
+ return ddp_list;
+
+ for(curr = ddp_list; curr != NULL; curr = curr->next)
+ curr->choice = 0;
+ p_score = score_list[0];
+ first_status = (BoolPtr) MemNew((size_t)first_len * sizeof(Boolean));
+ second_status = (BoolPtr) MemNew((size_t)second_len * sizeof(Boolean));
+
+ reverse = (strand == Seq_strand_minus);
+ h_list = NULL;
+ is_end = FALSE;
+ while(!is_end)
+ {
+ t_list = NULL;
+ for(i= 0, curr = ddp_list; curr != NULL; curr = curr->next)
+ {
+ if(p_score/(score_list[i]) <=2)
+ {
+ if(curr->choice == 0)
+ {
+ curr->choice = 1;
+ ddp = (DenseDiagPtr) curr->data.ptrvalue;
+ if(h_list == NULL ||
+ load_ddp_in_order(&h_list, ddp, reverse, FALSE))
+ {
+ if(!is_ddp_overlap(ddp, first_status, second_status))
+ ValNodeAddPointer(&t_list, 0, ddp);
+ }
+ }
+ if(curr->next == NULL)
+ {
+ p_score = score_list[i];
+ is_end = TRUE;
+ break;
+ }
+ }
+ else
+ {
+ p_score = score_list[i];
+ break;
+ }
+ ++i;
+ }
+ if(t_list != NULL)
+ {
+ sort_list_order(&t_list, strand);
+ if(h_list == NULL)
+ {
+ h_list = t_list;
+ for(curr = t_list; curr != NULL; curr = curr->next)
+ {
+ ddp = (DenseDiagPtr) curr->data.ptrvalue;
+ MemSet(first_status+ddp->starts[0], 1, (size_t)(ddp->len) * sizeof(Boolean));
+ MemSet(second_status+ddp->starts[1], 1, (size_t)(ddp->len) * sizeof(Boolean));
+ }
+ }
+ else
+ {
+ for(curr = t_list; curr != NULL; curr = curr->next)
+ {
+ ddp = (DenseDiagPtr) curr->data.ptrvalue;
+ if(load_ddp_in_order(&h_list, ddp, reverse, TRUE))
+ {
+ MemSet(first_status+ddp->starts[0], 1, (size_t)(ddp->len) * sizeof(Boolean));
+ MemSet(second_status+ddp->starts[1], 1, (size_t)(ddp->len) * sizeof(Boolean));
+ }
+ }
+ ValNodeFree(t_list);
+ }
+ }
+ if(is_end)
+ break;
+ }
+
+ if(h_list == NULL)
+ {
+ Message(MSG_ERROR, "No good node in check_ddp_order");
+ exit(1);
+ }
+
+ MemFree(score_list);
+ MemFree(first_status);
+ MemFree(second_status);
+ ValNodeFree(ddp_list);
+ return h_list;
+
+
+}
+
+
+
+/*
+*
+* assume the align is the alignment computed from BLAST. Get rid of
+* all the smaller segments that are out of the main diagnol. the ValNodePtr
+* returns a list of dense-diags that are on the main diagnol. s_sip specifies
+* the sequence that will have non-redundant coverage
+*/
+NLM_EXTERN ValNodePtr FilterSeqAlign(SeqAlignPtr align, SeqIdPtr s_sip, Uint1Ptr p_strand)
+{
+ Int2 order = 0, s_order;
+ DenseDiagPtr ddp;
+ SeqIdPtr sip;
+ Uint1 strand;
+ BioseqPtr bsp;
+ ValNodePtr ddp_list;
+ BioseqPtr first_bsp, second_bsp;
+
+ if(s_sip == NULL || align == NULL)
+ return NULL;
+ if(align->segtype != 1)
+ return NULL;
+ bsp = BioseqFind(s_sip);
+ if(bsp == NULL)
+ {
+ ErrPostEx(SEV_WARNING, 0, 0, "Fail to find Bioseq for s_sip");
+ return NULL;
+ }
+
+ ddp_list = NULL;
+ s_order = -1;
+ for(ddp = (DenseDiagPtr) align->segs; ddp != NULL; ddp = ddp->next)
+ {
+ ValNodeAddPointer(&ddp_list, 0, ddp);
+ if(s_order == -1)
+ {
+ for(sip = ddp->id, order = 0; sip != NULL; sip = sip->next, ++order)
+ {
+ if(SeqIdMatch(s_sip, sip))
+ {
+ s_order = order;
+ }
+ }
+ }
+ }
+ if(s_order == -1)
+ {
+ printf("error in finding the right order\n");
+ exit(1);
+ }
+
+ /*ddp_list only contains the alignment with the right orientation*/
+ /* it also has been sorted according to the order of the scores*/
+ strand = get_alignment_orientation(&ddp_list, bsp, s_order);
+
+ if(strand == 0)
+ return NULL;
+
+ ddp = (DenseDiagPtr) align->segs;
+ first_bsp = BioseqFind(ddp->id);
+ second_bsp = BioseqFind(ddp->id->next);
+ /* filter_wrong_orientation (align, strand == Seq_strand_plus); */
+ filter_repeats(&ddp_list, first_bsp->length, second_bsp->length);
+
+ if(ddp_list == NULL)
+ {
+ Message(MSG_ERROR, "Nothing leftover after filter_repeats");
+ exit(1);
+ }
+ *p_strand = strand;
+ return check_ddp_order(ddp_list, strand, first_bsp->length, second_bsp->length);
+
+}
+
+static Int4 get_dsp_align_len(DenseSegPtr dsp)
+{
+ Int2 i;
+ Int4 len = 0;
+
+ for(i = 0; i<dsp->numseg; ++i)
+ {
+ if(dsp->starts[2*i] != -1 && dsp->starts[2*i+1] != -1)
+ len += dsp->lens[i];
+ }
+
+ return len;
+}
+
+/*
+*
+* convert the Dense-seg to Dense-diag
+* take the end point of aligned segment and make an alignment of
+* Dense-diag
+*
+*/
+
+static ScorePtr make_seg_score(Int4 align_len, Int4 seg_len, Int4 total_score)
+{
+ Int4 score;
+ ScorePtr sp;
+ ObjectIdPtr oip;
+
+ /* score = (seg_len * total_score)/align_len; */
+ score = total_score;
+ sp = ScoreNew();
+ oip = ObjectIdNew();
+ oip->str = StringSave("score");
+ sp->id = oip;
+ sp->choice = 1;
+ sp->value.intvalue = score;
+ return sp;
+}
+
+
+static DenseDiagPtr SeqAlignConvertDspToDdp(SeqAlignPtr align, DenseDiagPtr PNTR h_ddp)
+{
+ DenseSegPtr dsp;
+ DenseDiagPtr ddp, prev;
+ Int2 i;
+ Int4 score;
+ Int4 align_len;
+
+ if(align == NULL || align->segtype != 2)
+ return NULL;
+ dsp = (DenseSegPtr) align->segs;
+ if(dsp == NULL || dsp->dim != 2)
+ return NULL;
+ align_len = get_dsp_align_len(dsp);
+
+ if(*h_ddp != NULL)
+ {
+ prev = *h_ddp;
+ while(prev->next != NULL)
+ prev = prev->next;
+ }
+ else
+ prev = NULL;
+ score = get_score_from_list (align->score);
+ for(i = 0; i<dsp->numseg; ++i)
+ {
+ if(dsp->starts[2*i] != -1 &&
+ dsp->starts[2*i+1] != -1 && dsp->lens[i]>0)
+ {
+ ddp = DenseDiagNew();
+ ddp->dim = 2;
+ ddp->id = SeqIdDup(dsp->ids);
+ ddp->id->next = SeqIdDup(dsp->ids->next);
+ ddp->starts = (Int4Ptr) MemNew((size_t)2 * sizeof(Int4));
+ ddp->starts[0] = dsp->starts[2*i];
+ ddp->starts[1] = dsp->starts[2*i + 1];
+ ddp->len = dsp->lens[i];
+ ddp->strands = (Uint1Ptr) MemNew((size_t)2 * sizeof(Uint1));
+ if(dsp->strands != NULL)
+ {
+ ddp->strands[0] = dsp->strands[2*i];
+ ddp->strands[1] = dsp->strands[2*i+1];
+ }
+ ddp->scores = make_seg_score(align_len, dsp->lens[i], score);
+ if(*h_ddp == NULL)
+ *h_ddp= ddp;
+ else
+ prev->next = ddp;
+ prev = ddp;
+ }
+ }
+
+ return (*h_ddp);
+
+}
+
+/*
+* convert a list of DenseSeg to DenseDiag
+*
+*/
+NLM_EXTERN SeqAlignPtr SeqAlignConvertDspToDdpList(SeqAlignPtr align)
+{
+ SeqAlignPtr n_align;
+ DenseDiagPtr h_ddp = NULL;
+
+
+ while(align)
+ {
+ SeqAlignConvertDspToDdp(align, &h_ddp);
+ align = align->next;
+ }
+ if(h_ddp != NULL)
+ {
+ n_align = SeqAlignNew();
+ n_align->segtype = 1;
+ n_align->segs = h_ddp;
+ return n_align;
+ }
+ else
+ return NULL;
+
+}
+
+static SeqAlignPtr clean_up_align_list(SeqAlignPtr PNTR h_align, ValNodePtr head)
+{
+ DenseDiagPtr ddp, n_ddp, p_ddp, c_ddp;
+ SeqAlignPtr prev, next;
+ Boolean found;
+ ValNodePtr curr;
+ SeqAlignPtr align;
+
+
+ prev = NULL;
+ align = *h_align;
+ while(align)
+ {
+ next = align->next;
+
+ ddp = (DenseDiagPtr) align->segs;
+ p_ddp = NULL;
+ while(ddp)
+ {
+ found = FALSE;
+ n_ddp = ddp->next;
+ for(curr = head; curr != NULL; curr = curr->next)
+ {
+ c_ddp = (DenseDiagPtr) curr->data.ptrvalue;
+ if(ddp == c_ddp)
+ {
+ found = TRUE;
+ break;
+ }
+ }
+ if(!found)
+ {
+ if(p_ddp == NULL)
+ align->segs = n_ddp;
+ else
+ p_ddp->next = n_ddp;
+ ddp->next = NULL;
+ DenseDiagFree(ddp);
+ }
+ else
+ p_ddp = ddp;
+ ddp = n_ddp;
+ }
+ if(align->segs == NULL)
+ {
+ if(prev == NULL)
+ *h_align = next;
+ else
+ prev->next = next;
+ align->next = NULL;
+ SeqAlignFree(align);
+ }
+ else
+ prev = align;
+ align = next;
+ }
+
+ return (*h_align);
+}
+
+
+
+
+/*
+* return a Seq-align that was made up from the DenseDiag in the
+* right order. The Seq-align is freshly created
+* the chain of the Seq-align should be for the same Seq-id
+*
+*/
+NLM_EXTERN SeqAlignPtr FilterDenseSegAlign(SeqAlignPtr align, SeqIdPtr sip)
+{
+ SeqAlignPtr h_align;
+ Uint1 strand;
+ ValNodePtr head = NULL;
+
+ h_align = SeqAlignConvertDspToDdpList(align);
+ if(h_align == NULL)
+ return NULL;
+ head = FilterSeqAlign(h_align, sip, &strand);
+
+ if(head == NULL)
+ {
+ SeqAlignFree(h_align);
+ return NULL;
+ }
+ clean_up_align_list(&h_align, head);
+ ValNodeFree(head);
+ return h_align;
+}
+
+
+
+/*
+*
+* from the align, figure out the maximum and minimum range on the two sequences.
+* the two Seq-locs can then be sent for the seond path algorithm to compute
+* sequence alignment
+* align is normally computed from a heuristic method, such as BLAST
+* return TRUE for success and FLASE for failure
+*/
+static Boolean FigureAlignRange(SeqAlignPtr align, SeqLocPtr m_loc, SeqLocPtr s_loc)
+{
+ DenseDiagPtr ddp;
+ ValNodePtr head, vnp;
+ SeqIntPtr sint;
+ Int4 s_max =-1 , s_min = -1;
+ Int4 m_max = -1, m_min = -1;
+ Uint1 strand;
+ Int2 s_order, order;
+ SeqIdPtr sip;
+ Uint1 m_strand, s_strand;
+
+
+
+ m_strand = 0;
+ s_strand = 0;
+ head = FilterSeqAlign(align, SeqLocId(s_loc), &strand);
+ if(head == NULL)
+ return FALSE;
+
+ ddp = (DenseDiagPtr) head->data.ptrvalue;
+ s_order = -1;
+ order = 0;
+ for(sip = ddp->id; sip != NULL; sip = sip->next, ++order)
+ {
+ if(SeqIdForSameBioseq(sip, SeqLocId(s_loc)))
+ {
+ s_order = order;
+ break;
+ }
+ }
+ if(s_order == -1)
+ {
+ ValNodeFree(head);
+ return FALSE;
+ }
+ s_min = ddp->starts[s_order];
+ if(strand == Seq_strand_minus)
+ {
+ m_max = ddp->starts[1-s_order] + ddp->len -1;
+ }
+ else
+ m_min = ddp->starts[1-s_order];
+ vnp = head;
+ while(vnp->next != NULL)
+ {
+ ddp = (DenseDiagPtr) vnp->data.ptrvalue;
+ /* printf("%ld, %ld, len = %ld\n", ddp->starts[0], ddp->starts[1], ddp->len); */
+ vnp = vnp->next;
+ }
+ ddp = (DenseDiagPtr) vnp->data.ptrvalue;
+
+ s_max = ddp->starts[s_order] + ddp->len -1;
+ if(strand == Seq_strand_minus)
+ {
+ m_min = ddp->starts[1-s_order];
+ }
+ else
+ m_max = ddp->starts[1-s_order] + ddp->len -1;
+
+ if(m_min > m_max)
+ {
+
+ Message(MSG_ERROR, "m_min = %ld, m_max = %ld", m_min, m_max);
+ ValNodeFree(head);
+ return FALSE;
+ }
+ ValNodeFree(head);
+
+ if(s_min == -1 || s_max == -1 || m_min == -1 || m_max == -1)
+ return FALSE;
+
+ m_strand = Seq_strand_plus;
+ s_strand = strand;
+
+
+ sint = (SeqIntPtr) m_loc->data.ptrvalue;
+ sint->from = m_min;
+ sint->to = m_max;
+ sint->strand = m_strand;
+
+ sint = (SeqIntPtr) s_loc->data.ptrvalue;
+ sint->from = s_min;
+ sint->to = s_max;
+ sint->strand = s_strand;
+
+ return TRUE;
+}
+
+
+
+NLM_EXTERN Boolean is_loc_overlap(SeqLocPtr loc_1, SeqLocPtr loc_2, Int4Ptr min_val, Int4Ptr max_val, Int4 gap_len, Int4 max_align_size, Int4Ptr p_gap_val)
+{
+ Int4 start_1, stop_1, start_2, stop_2;
+
+ start_1 = SeqLocStart(loc_1);
+ stop_1 = SeqLocStop(loc_1);
+ start_2 = SeqLocStart(loc_2);
+ stop_2 = SeqLocStop(loc_2);
+
+ /*strict overlap*/
+ if(!(stop_1 < start_2 || start_1 > stop_2))
+ {
+ *min_val = MAX(start_1, start_2);
+ *max_val = MIN(stop_1, stop_2);
+ return TRUE;
+ }
+
+ if(gap_len == 0)
+ return FALSE;
+ if(stop_2 > stop_1)
+ {
+ /* if(start_2 - stop_1 <= gap_len) */
+ if(start_2 - stop_1 <= max_align_size)
+ {
+ *min_val = stop_1;
+ *max_val = start_2;
+ if(p_gap_val != NULL)
+ *p_gap_val = start_2 - stop_1;
+ return TRUE;
+ }
+ else
+ return FALSE;
+ }
+ else
+ {
+ /* if(start_1 - stop_2 <= gap_len) */
+ if(start_1 - stop_2 <= max_align_size)
+ {
+ *min_val = stop_2;
+ *max_val = start_1;
+ if(p_gap_val != NULL)
+ *p_gap_val = start_1 - stop_2;
+ return TRUE;
+ }
+ else
+ return FALSE;
+ }
+
+}
+
+
+/*
+*
+* ppos is the position of the known sequence, trying to find the matching sequence
+* position
+*/
+NLM_EXTERN Int4 find_matching_position(DenseSegPtr dsp, Int4 pos, Int2 pos_order)
+{
+ Int2 i;
+ Int4 start_1, start_2;
+ Int4 offset;
+
+
+ for(i = 0; i<dsp->numseg; ++i)
+ {
+ start_1 = dsp->starts[2*i + pos_order];
+ start_2 = dsp->starts[2*i + 1 - pos_order];
+
+ if(start_1 != -1 && start_2 != -1)
+ {
+ if(pos >= start_1 && pos <= start_1 + dsp->lens[i] -1)
+ {
+ if(dsp->strands[pos_order] == Seq_strand_minus)
+ {
+ offset = start_1 + dsp->lens[i] - 1 - pos;
+ }
+ else
+ offset = pos - start_1;
+
+ if(dsp->strands[1 - pos_order] == Seq_strand_minus)
+ return (start_2 + dsp->lens[i] -1 - offset);
+ else
+ return (start_2 + offset);
+ }
+ }
+ }
+
+
+ return -1;
+}
+
+static void SeqAnnotWrite(SeqAlignPtr align)
+{
+ SeqAnnotPtr annot;
+ AsnIoPtr aip;
+
+ annot = SeqAnnotNew();
+ annot->type = 2;
+ annot->data = align;
+
+ aip = AsnIoOpen("temp2.sat", "w");
+ SeqAnnotAsnWrite(annot, aip, NULL);
+ AsnIoClose(aip);
+
+ annot->data = NULL;
+ SeqAnnotFree(annot);
+}
+
+NLM_EXTERN Boolean select_overlap_loc(SeqLocPtr loc_1_1, SeqLocPtr loc_2_1, SeqLocPtr loc_1_2, SeqLocPtr loc_2_2, Int2Ptr p_order)
+{
+ Int4 start_1, stop_1, start_2, stop_2;
+ Int4 overlap_1, overlap_2;
+
+ overlap_1 = 0;
+ overlap_2 = 0;
+
+ start_1 = SeqLocStart(loc_1_1);
+ stop_1 = SeqLocStop(loc_1_1);
+
+ start_2 = SeqLocStart(loc_2_1);
+ stop_2 = SeqLocStop(loc_2_1);
+
+ if(!(start_2 > stop_1 || stop_2 < start_1))
+ {
+ if(stop_2 >= stop_1)
+ overlap_1 = MAX(0, stop_1 - start_2 + 1);
+ else
+ overlap_1 = MAX(0, stop_2 - start_1 + 1);
+ }
+
+ start_1 = SeqLocStart(loc_1_2);
+ stop_1 = SeqLocStop(loc_1_2);
+
+ start_2 = SeqLocStart(loc_2_2);
+ stop_2 = SeqLocStop(loc_2_2);
+
+ if(!(start_2 > stop_1 || stop_2 < start_1))
+ {
+ if(stop_2 >= stop_1)
+ overlap_2 = MAX(0, stop_1 - start_2 + 1);
+ else
+ overlap_2 = MAX(0, stop_2 - start_1 + 1);
+ }
+
+ if(overlap_1 == 0 && overlap_2 == 0)
+ return FALSE;
+ else
+ {
+ if(overlap_1 >= overlap_2)
+ *p_order = 0;
+ else
+ *p_order = 1;
+ return TRUE;
+ }
+}
+
+
+
+/*###############################################################
+*
+* functions for merge the two alignments
+*
+################################################################*/
+
+
+NLM_EXTERN Int4 get_align_len_in_overlap (SeqAlignPtr align, Int4 from, Int4 to, Int2 order)
+{
+ DenseSegPtr dsp;
+ Int2 i;
+ Int4 start, stop;
+ Int4 align_len = 0;
+
+ dsp = (DenseSegPtr) align->segs;
+ for(i = 0; i<dsp->numseg; ++i)
+ {
+ if(dsp->starts[2*i] != -1 && dsp->starts[2*i+1] != -1)
+ {
+ start = dsp->starts[2*i + order];
+ stop = dsp->starts[2*i + order] + dsp->lens[i] -1;
+ if(!(start > to || stop < from))
+ {
+ start = MAX(start, from);
+ stop = MIN(stop, to);
+
+ if(stop >= start)
+ align_len += (stop - start + 1);
+ }
+ }
+ }
+
+ return align_len;
+}
+
+
+
+static SeqAnnotPtr open_annot(CharPtr f_name)
+{
+ SeqAnnotPtr annot;
+ AsnIoPtr aip;
+
+ aip = AsnIoOpen(f_name, "r");
+ annot = SeqAnnotAsnRead(aip, NULL);
+ AsnIoClose(aip);
+ return annot;
+}
+
+typedef struct segdata {
+ Int2 seg_order;
+ Int4 overlap_len;
+}SegOrder, PNTR SegOrderPtr;
+
+/*
+* find the longest diagnol in the overlapping region
+*
+*/
+static int LIBCALLBACK SegOrderCompProc (VoidPtr ptr1, VoidPtr ptr2)
+{
+ ValNodePtr vnp1, vnp2;
+ SegOrderPtr sop1, sop2;
+
+ if (ptr1 != NULL && ptr2 != NULL) {
+ vnp1 = *((ValNodePtr PNTR) ptr1);
+ vnp2 = *((ValNodePtr PNTR) ptr2);
+ if (vnp1 != NULL && vnp2 != NULL) {
+ sop1 = (SegOrderPtr) vnp1->data.ptrvalue;
+ sop2 = (SegOrderPtr) vnp2->data.ptrvalue;
+ if(sop1 != NULL && sop2 != NULL)
+ {
+ if(sop1->overlap_len > sop2->overlap_len)
+ return -1;
+ else if(sop1->overlap_len < sop2->overlap_len)
+ return 1;
+ else
+ return 0;
+ }
+ }
+ }
+
+ return 0;
+}
+
+static ValNodePtr find_overlap_diagnol(SeqAlignPtr align, Int4 from, Int4 to, Int2 order)
+{
+ Int2 i;
+ DenseSegPtr dsp;
+ Int4 start, stop;
+ SegOrderPtr sop;
+ ValNodePtr seg_list;
+
+ seg_list = NULL;
+ dsp = (DenseSegPtr) align->segs;
+ for(i = 0; i<dsp->numseg; ++i)
+ {
+ if(dsp->starts[2*i] != -1 && dsp->starts[2*i+1] != -1)
+ {
+ start = dsp->starts[2*i+order];
+ stop = start + dsp->lens[i] -1;
+ if(!(start > to || stop < from))
+ {
+
+ start = MAX(start, from);
+ stop = MIN(stop, to);
+ sop = (SegOrderPtr) MemNew(sizeof(SegOrder));
+ sop->seg_order = i;
+ sop->overlap_len = stop - start + 1;
+ ValNodeAddPointer(&seg_list, 0, sop);
+ }
+ }
+ }
+
+ if(seg_list != NULL)
+ return ValNodeSort(seg_list, SegOrderCompProc);
+ else
+ return NULL;
+}
+
+static Boolean is_dsp_same(DenseSegPtr dsp_1, DenseSegPtr dsp_2)
+{
+ if(dsp_1 == NULL || dsp_2 == NULL)
+ return FALSE;
+ if(dsp_1->ids == NULL || dsp_2->ids == NULL)
+ return FALSE;
+ if(!SeqIdMatch(dsp_1->ids, dsp_2->ids))
+ return FALSE;
+
+ if(dsp_1->ids->next == NULL || dsp_2->ids->next == NULL)
+ return FALSE;
+ if(!SeqIdMatch(dsp_1->ids->next, dsp_2->ids->next))
+ return FALSE;
+
+ if(dsp_1->strands == NULL || dsp_2->strands == NULL)
+ return FALSE;
+
+ if(!is_same_orientation(dsp_1->strands[0], dsp_2->strands[0]))
+ return FALSE;
+
+ if(!is_same_orientation(dsp_1->strands[1], dsp_2->strands[1]))
+ return FALSE;
+
+ return TRUE;
+}
+
+static Int4 get_score_value(ScorePtr sp)
+{
+ ObjectIdPtr oip;
+
+ while(sp)
+ {
+ if(sp->id)
+ {
+ oip = sp->id;
+ if(oip && oip->str && StringCmp(oip->str, "score") == 0)
+ {
+ if(sp->choice == 1)
+ return sp->value.intvalue;
+ }
+ }
+ sp = sp->next;
+ }
+
+ return -1;
+}
+
+static void load_big_score (SeqAlignPtr align_1, SeqAlignPtr align_2)
+{
+ Int4 val_1, val_2;
+
+ val_1 = get_score_value(align_1->score);
+ val_2 = get_score_value(align_2->score);
+
+ if(val_1 < val_2)
+ {
+ ScoreSetFree(align_1->score);
+ align_1->score = align_2->score;
+ align_2->score = NULL;
+ }
+}
+
+/*
+ merge two seqaligns: align_1 contains the merged seqalign, align_2 is clobbered.
+ reorders the seqalign on input so that align_1 is "left" of align_2
+ in display-style coordinates.
+
+ */
+
+NLM_EXTERN Boolean merge_two_align(SeqAlignPtr align_1, SeqAlignPtr align_2, Int4 from, Int4 to, Int2 order)
+{
+ ValNodePtr seg_list_1, seg_list_2;
+ SegOrderPtr sop_1, sop_2;
+ ValNodePtr curr_1, curr_2;
+ Boolean found;
+ Int2 seg_order_1, seg_order_2;
+ DenseSegPtr dsp_1, dsp_2;
+ Int4 start_1, stop_1, start_2, stop_2;
+ Int4 overlap_1, overlap_2;
+ Int4 extend_len;
+ Int2 n_seg_num, i, j;
+ Int4Ptr starts, lens;
+ Uint1Ptr strands;
+ Int4 m_start_1, s_start_1, len_1;
+ Int4 m_start_2, s_start_2, len_2;
+ Int4 offset_1;
+ if(!align_1 || !align_2 || align_1->segtype != 2 || align_2->segtype!=2)
+ return FALSE;
+ dsp_1 = (DenseSegPtr) align_1->segs;
+ dsp_2 = (DenseSegPtr) align_2->segs;
+
+ if(!is_dsp_same(dsp_1, dsp_2))
+ return FALSE;
+ seg_list_1 = find_overlap_diagnol(align_1, from, to, order);
+ if(seg_list_1 == NULL)
+ return FALSE;
+ seg_list_2 = find_overlap_diagnol(align_2, from, to, order);
+ if(seg_list_2 == NULL)
+ {
+ ValNodeFreeData(seg_list_1);
+ return FALSE;
+ }
+
+ if(SeqAlignStart(align_1,0)>SeqAlignStart(align_2,0)) {
+ VoidPtr segs;
+ ValNodePtr vnp;
+ segs = align_1->segs;
+ align_1->segs = align_2->segs;
+ align_2->segs = segs;
+ dsp_1 = (DenseSegPtr) align_1->segs;
+ dsp_2 = (DenseSegPtr) align_2->segs;
+ vnp = seg_list_1;
+ seg_list_1 = seg_list_2;
+ seg_list_2 = vnp;
+ }
+
+ found = FALSE;
+ for(curr_1 = seg_list_1; curr_1 != NULL && !found; curr_1 = curr_1->next)
+ {
+ sop_1 = (SegOrderPtr) curr_1->data.ptrvalue;
+ seg_order_1 = sop_1->seg_order;
+ m_start_1 = dsp_1->starts[2*seg_order_1 + order];
+ s_start_1 = dsp_1->starts[2*seg_order_1 + 1 - order];
+ len_1 = dsp_1->lens[seg_order_1];
+ for(curr_2 = seg_list_2; curr_2 != NULL && !found; curr_2 = curr_2->next)
+ {
+ sop_2 = (SegOrderPtr) curr_2->data.ptrvalue;
+ seg_order_2 = sop_2->seg_order;
+
+ m_start_2 = dsp_2->starts[2*seg_order_2 + order];
+ s_start_2 = dsp_2->starts[2*seg_order_2 + 1 - order];
+ len_2 = dsp_2->lens[seg_order_2];
+
+
+ found = TRUE;
+ start_1 = dsp_1->starts[2*seg_order_1 + order];
+ stop_1 = start_1 + dsp_1->lens[seg_order_1] -1;
+
+ start_2 = dsp_2->starts[2*seg_order_2 + order];
+ stop_2 = start_2 + dsp_2->lens[seg_order_2] -1;
+
+ /*check if the overlapping region on the sequence overlaps??*/
+ if(start_1 > stop_2 || stop_1 < start_2)
+ found = FALSE;
+
+ offset_1 = 0;
+ /*trim the tail of the first alignment and the head of the second*/
+ if(found)
+ {
+ if(dsp_1->strands[order] == Seq_strand_minus)
+ {
+ /*trim the head of the second alignment*/
+ /* <====-------===================== first
+ <================================ second
+ trim ^^^^^^^^^^^^XXXXXXXXXXXXXXXXXXXXX overlap
+ */
+ if(stop_2 > stop_1)
+ {
+ offset_1 = stop_2 - stop_1;
+ dsp_2->lens[seg_order_2] -= offset_1;
+ if(dsp_2->strands[1-order] != Seq_strand_minus)
+ dsp_2->starts[2*seg_order_2 + 1 - order] += offset_1;
+
+ stop_2 = stop_1;
+ }
+ /*trim the tail of the first alignment*/
+ /*
+ ^^^^^^^^^^ trim
+ <=================================== first
+ <=======================------========= second
+ XXXXXXXXXXXXXXXXXXXXXXX overlap
+ */
+ if(start_1 < start_2)
+ {
+ offset_1 = start_2 - start_1;
+ dsp_1->starts[2*seg_order_1 + order] += offset_1;
+ dsp_1->lens[seg_order_1] -= offset_1;
+ if(dsp_1->strands[1-order] == Seq_strand_minus)
+ dsp_1->starts[2*seg_order_1 + 1 - order] += offset_1;
+
+ start_1 = start_2;
+ }
+ overlap_1 = start_1 - stop_2;
+ }
+ else
+ {
+ /*trim the head of the second alignment*/
+ /*
+ ====----====================> first
+ ================================ second
+ ^^^^^^^^XXXXXXXXXXXXXXXXXXXX
+ trim overlap
+ */
+ if(start_2 < start_1)
+ {
+ offset_1 = start_1 - start_2;
+ dsp_2->starts[2*seg_order_2 + order] += offset_1;
+ if(dsp_2->strands[1-order] != Seq_strand_minus)
+
+ dsp_2->starts[2*seg_order_2 + 1 - order] += offset_1;
+ dsp_2->lens[seg_order_2] -= offset_1;
+ start_2 = start_1;
+ }
+ /*trim the tail of the first alignment*/
+ /*
+ overlap trim
+ XXXXXXXXXXXXXXXXXXXXXXX^^^^^^^^^^
+ =================================> first
+ ========================------============= second
+ */
+
+ if(stop_1 > stop_2)
+ {
+ offset_1 = stop_1 - stop_2;
+ dsp_1->lens[seg_order_1] -= offset_1;
+ if(dsp_1->strands[1-order] == Seq_strand_minus)
+
+ dsp_1->starts[2*seg_order_1 + 1 - order] += offset_1;
+ stop_1 = stop_2;
+ }
+ overlap_1 = stop_1 - start_2;
+ }
+ if(found && overlap_1 <=0)
+ found = FALSE;
+ }
+
+
+ /*check the second sequence*/
+ if(found)
+ {
+ start_1 = dsp_1->starts[2*seg_order_1 + 1 - order];
+ stop_1 = start_1 + dsp_1->lens[seg_order_1] -1;
+
+ start_2 = dsp_2->starts[2*seg_order_2 + 1 - order];
+ stop_2 = start_2 + dsp_2->lens[seg_order_2] -1;
+
+ /*check if the overlapping region on the sequence overlaps??*/
+ if(start_1 > stop_2 || stop_1 < start_2)
+ found = FALSE;
+ }
+ if(found)
+ {
+ overlap_2 = MIN(stop_2,stop_1) - MAX(start_1,start_2);
+ if(found && overlap_2 <=0)
+ found = FALSE;
+ }
+
+ if(found && overlap_2 != overlap_1)
+ found = FALSE;
+
+ if(found)
+ {
+ if(dsp_1->strands[1-order] == Seq_strand_minus)
+ {
+ stop_1 = dsp_1->starts[2*seg_order_1 + 1 - order] +
+ dsp_1->lens[seg_order_1] -1;
+ stop_2 = dsp_2->starts[2*seg_order_2 + 1 - order] +
+ dsp_2->lens[seg_order_2] -1;
+ extend_len = stop_1 - stop_2;
+ }
+ else
+ {
+ start_1 = dsp_1->starts[2*seg_order_1 + 1 - order];
+ start_2 = dsp_2->starts[2*seg_order_2 + 1 - order];
+ extend_len = start_2 - start_1;
+ }
+ if(dsp_2->strands[order] != Seq_strand_minus)
+ {
+ dsp_2->starts[2*seg_order_2 + order] -= extend_len;
+ }
+ if(dsp_2->strands[1-order] != Seq_strand_minus)
+ {
+ dsp_2->starts[2*seg_order_2 + 1 - order] -= extend_len;
+ }
+
+ dsp_2->lens[seg_order_2] += extend_len;
+
+ n_seg_num = seg_order_1 + (dsp_2->numseg - seg_order_2);
+ if(n_seg_num < 1)
+ found = FALSE;
+ }
+ if(found)
+ {
+
+ starts = (Int4Ptr) MemNew((size_t)(2*n_seg_num) * sizeof(Int4));
+ strands = (Uint1Ptr) MemNew((size_t)(2*n_seg_num) * sizeof(Uint1));
+ lens = (Int4Ptr) MemNew((size_t) n_seg_num * sizeof(Int4));
+
+ j = 0;
+ if(seg_order_1 > 0)
+ {
+ for(i = 0; i<seg_order_1; ++i)
+ {
+ starts[2*j] = dsp_1->starts[2*i];
+ starts[2*j+1] = dsp_1->starts[2*i+1];
+ strands[2*j] = dsp_1->strands[2*i];
+ strands[2*j+1] = dsp_1->strands[2*i+1];
+
+ lens[j] = dsp_1->lens[i];
+ ++j;
+ }
+ }
+
+ for(i = seg_order_2; i<dsp_2->numseg; ++i)
+ {
+ starts[2*j] = dsp_2->starts[2*i];
+ starts[2*j+1] = dsp_2->starts[2*i+1];
+ strands[2*j] = dsp_2->strands[2*i];
+ strands[2*j+1] = dsp_2->strands[2*i+1];
+ lens[j] = dsp_2->lens[i];
+ ++j;
+ }
+
+ dsp_1->numseg = n_seg_num;
+ MemFree(dsp_1->starts);
+ MemFree(dsp_1->strands);
+ MemFree(dsp_1->lens);
+
+ dsp_1->starts = starts;
+ dsp_1->strands = strands;
+ dsp_1->lens = lens;
+ load_big_score (align_1, align_2);
+ break;
+ }
+ else
+ {
+ dsp_1->starts[2*seg_order_1 + order] = m_start_1;
+ dsp_1->starts[2*seg_order_1 + 1 - order] = s_start_1;
+ dsp_1->lens[seg_order_1] = len_1;
+
+ dsp_2->starts[2*seg_order_2 + order] = m_start_2;
+ dsp_2->starts[2*seg_order_2 + 1 - order] = s_start_2;
+ dsp_2->lens[seg_order_2] = len_2;
+ }
+ }
+ }
+
+ ValNodeFreeData(seg_list_1);
+ ValNodeFreeData(seg_list_2);
+ return found;
+
+}
+
+static SeqAlignPtr extract_align_from_list(SeqAlignPtr PNTR list, SeqAlignPtr align)
+{
+ SeqAlignPtr curr, prev;
+
+ prev = NULL;
+ curr = *list;
+ while(curr)
+ {
+ if(curr == align)
+ {
+ if(prev == NULL)
+ *list = curr->next;
+ else
+ prev->next = curr->next;
+ curr->next = NULL;
+ return curr;
+ }
+ prev = curr;
+ curr = curr->next;
+ }
+ return NULL;
+}
+
+NLM_EXTERN void SeqAlignReverse(SeqAlignPtr align, Int2 order)
+{
+ DenseSegPtr dsp;
+ Int4Ptr starts, lens;
+ Int2 numseg, i, j;
+
+ if(align->segtype != 2)
+ return;
+ dsp = (DenseSegPtr) align->segs;
+ if(dsp == NULL || dsp->strands == NULL || dsp->strands[order] != Seq_strand_minus)
+ return;
+
+ numseg = dsp->numseg;
+ starts = (Int4Ptr) MemNew((size_t)2*numseg * sizeof(Int4));
+ lens = (Int4Ptr) MemNew((size_t)numseg * sizeof(Int4));
+
+ for(i = numseg-1, j = 0; i>=0; --i, ++j)
+ {
+ starts[2*j] = dsp->starts[2*i];
+ starts[2*j+1] = dsp->starts[2*i+1];
+ lens[j] = dsp->lens[i];
+ dsp->strands[2*i] = 3 - dsp->strands[2*i];
+ dsp->strands[2*i + 1] = 3 - dsp->strands[2*i + 1];
+ }
+
+ MemFree(dsp->starts);
+ dsp->starts = starts;
+ MemFree(dsp->lens);
+ dsp->lens = lens;
+}
+
+typedef struct align_overlap {
+ SeqAlignPtr align;
+ Int4 overlap_len;
+ Int4 align_len;
+}AlignOverlap, PNTR AlignOverlapPtr;
+
+static int LIBCALLBACK AlignOverlapCompProc (VoidPtr ptr1, VoidPtr ptr2)
+{
+ ValNodePtr vnp1, vnp2;
+ AlignOverlapPtr sop1, sop2;
+
+ if (ptr1 != NULL && ptr2 != NULL) {
+ vnp1 = *((ValNodePtr PNTR) ptr1);
+ vnp2 = *((ValNodePtr PNTR) ptr2);
+ if (vnp1 != NULL && vnp2 != NULL) {
+ sop1 = (AlignOverlapPtr) vnp1->data.ptrvalue;
+ sop2 = (AlignOverlapPtr) vnp2->data.ptrvalue;
+ if(sop1 != NULL && sop2 != NULL)
+ {
+ if(sop1->overlap_len > sop2->overlap_len)
+ return -1;
+ else if(sop1->overlap_len < sop2->overlap_len)
+ return 1;
+ else if(sop1->align_len > sop2->align_len)
+ return -1;
+ else if(sop1->align_len < sop2->align_len)
+ return 1;
+ else
+ return 0;
+ }
+ }
+ }
+
+ return 0;
+}
+
+
+
+static ValNodePtr build_overlap_list(SeqAlignPtr align, Int4 from, Int4 to, Int2 order)
+{
+ Int4 t_len;
+ ValNodePtr list = NULL;
+ AlignOverlapPtr aop;
+
+ /*sort the alignment by score*/
+ while(align)
+ {
+ /*check if the alignment is significant*/
+ t_len = get_align_len_in_overlap (align, from, to, order);
+ if(t_len > MIN_DIAG_LEN)
+ {
+ aop = (AlignOverlapPtr) MemNew(sizeof(AlignOverlap));
+ aop->overlap_len = t_len;
+ aop->align = align;
+ aop->align_len = SeqAlignLength(align);
+ ValNodeAddPointer(&list, 0, aop);
+ }
+ align = align->next;
+ }
+
+ if(list != NULL)
+ return ValNodeSort(list, AlignOverlapCompProc);
+ else
+ return NULL;
+
+}
+
+
+
+NLM_EXTERN Boolean MergeTwoAlignList (SeqAlignPtr h_align_1, SeqAlignPtr PNTR p_align_2, Int4 from, Int4 to, Int2 order)
+{
+
+ ValNodePtr list_1, list_2;
+ AlignOverlapPtr aop_1, aop_2;
+ ValNodePtr curr_1, curr_2;
+ SeqAlignPtr align_1, align_2;
+ Boolean found;
+ SeqAlignPtr h_align_2;
+ Boolean retval = FALSE;
+
+ h_align_2 = *p_align_2;
+ if(p_align_2 == NULL || h_align_1 == NULL || (h_align_2 = *p_align_2)== NULL)
+ return FALSE;
+
+ list_1 = build_overlap_list(h_align_1, from, to, order);
+ if(list_1 == NULL)
+ return FALSE;
+ list_2 = build_overlap_list(h_align_2, from, to, order);
+ if(list_2 == NULL)
+ {
+ ValNodeFreeData(list_1);
+ return FALSE;
+ }
+
+ for(curr_1 = list_1; curr_1 != NULL; curr_1 = curr_1->next)
+ {
+ aop_1 = (AlignOverlapPtr) curr_1->data.ptrvalue;
+ align_1 = aop_1->align;
+ SeqAlignReverse(align_1, order);
+
+ found = FALSE;
+ for(curr_2 = list_2; !found && curr_2 != NULL; curr_2 = curr_2->next)
+ {
+ aop_2 = (AlignOverlapPtr) curr_2->data.ptrvalue;
+ if(aop_2->align != NULL)
+ {
+ align_2 = aop_2->align;
+ SeqAlignReverse(align_2, order);
+ if(merge_two_align(align_1, align_2, from, to, 0))
+ {
+ retval = TRUE;
+ aop_2->align = NULL;
+ if(extract_align_from_list(p_align_2, align_2) != NULL)
+ SeqAlignFree(align_2);
+ found = TRUE;
+ }
+ }
+ }
+ }
+
+ ValNodeFreeData(list_1);
+ ValNodeFreeData(list_2);
+
+ return retval;
+}
+
+
+
+/*
+*
+* functions related to reduce the redundant level of the view
+*/
+
+typedef struct align_info {
+ SeqAlignPtr align;
+ Int4 align_len;
+}AlignInfo, PNTR AlignInfoPtr;
+
+static int LIBCALLBACK CompareAlignInfoProc(VoidPtr ptr1, VoidPtr ptr2)
+{
+ AlignInfoPtr aip_1, aip_2;
+
+ if (ptr1 != NULL && ptr2 != NULL) {
+ aip_1 = (AlignInfoPtr) ptr1;
+ aip_2 = (AlignInfoPtr) ptr2;
+ if(aip_1->align_len > aip_2->align_len)
+ return -1;
+ else if(aip_1->align_len < aip_2->align_len)
+ return 1;
+ else return 0;
+ }
+ return 0;
+}
+
+typedef struct align_inforeal {
+ SeqAlignPtr align;
+ Nlm_FloatHi rvalue;
+}AlignInfoReal, PNTR AlignInfoRealPtr;
+
+
+static int LIBCALLBACK CompareAlignInfoRealProc(VoidPtr ptr1, VoidPtr ptr2)
+{
+ AlignInfoRealPtr aip_1, aip_2;
+
+ if (ptr1 != NULL && ptr2 != NULL) {
+ aip_1 = (AlignInfoRealPtr) ptr1;
+ aip_2 = (AlignInfoRealPtr) ptr2;
+ if(aip_1->rvalue > aip_2->rvalue)
+ return -1;
+ else if(aip_1->rvalue < aip_2->rvalue)
+ return 1;
+ else return 0;
+ }
+ return 0;
+}
+
+
+
+/*
+ Generic Function to Sort SeqAligns.. must
+ provide Comparison Callback function
+*/
+
+
+NLM_EXTERN SeqAlignPtr SeqAlignSort(SeqAlignPtr salp,SeqAlignSortCB sort_fn) {
+ SeqAlignPtr sap, PNTR salp_list;
+ Int4 n,i;
+
+ if(salp==NULL || salp->next==NULL)
+ return salp;
+
+ n=1;
+ sap = salp->next;
+ while (sap != NULL)
+ {
+ n++;
+ sap = sap->next;
+ }
+ salp_list = Calloc(n, sizeof (SeqAlignPtr));
+ for(sap=salp,i=0;i<n;i++,sap=sap->next) {
+ salp_list[i]=sap;
+ }
+ HeapSort (salp_list, n, sizeof (SeqAlignPtr), sort_fn);
+ sap = salp = salp_list[0];
+ for (i = 1; i < n; i++) {
+ sap->next = salp_list[i];
+ sap = sap->next;
+ }
+ sap->next = NULL;
+ MemFree (salp_list);
+ return salp;
+}
+
+
+
+NLM_EXTERN SeqAlignPtr SeqAlignSortByLength(SeqAlignPtr salp)
+{
+ SeqAlignPtr sap;
+ AlignInfoPtr salp_list;
+ Int4 n,i;
+
+ if(salp==NULL || salp->next==NULL)
+ return salp;
+
+ n=1;
+ sap = salp->next;
+ while (sap != NULL)
+ {
+ n++;
+ sap = sap->next;
+ }
+ salp_list = Calloc(n, sizeof (AlignInfo));
+ for(sap=salp,i=0;i<n;i++,sap=sap->next) {
+ salp_list[i].align=sap;
+ salp_list[i].align_len = SeqAlignLength(sap);
+ }
+ HeapSort (salp_list, n, sizeof (AlignInfo), CompareAlignInfoProc);
+ sap = salp = salp_list[0].align;
+ for (i = 1; i < n; i++) {
+ sap->next = salp_list[i].align;
+ sap = sap->next;
+ }
+ sap->next = NULL;
+ MemFree (salp_list);
+ return salp;
+}
+
+NLM_EXTERN SeqAlignPtr SeqAlignSortByScore(SeqAlignPtr salp)
+{
+ SeqAlignPtr sap;
+ AlignInfoPtr salp_list;
+ Int4 n,i;
+
+ if(salp==NULL || salp->next==NULL)
+ return salp;
+
+ n=1;
+ sap = salp->next;
+ while (sap != NULL)
+ {
+ n++;
+ sap = sap->next;
+ }
+ salp_list = Calloc(n, sizeof (AlignInfo));
+ {
+ Int4 score,number;
+ Nlm_FloatHi bit_score,evalue;
+ for(sap=salp,i=0;i<n;i++,sap=sap->next) {
+ salp_list[i].align=sap;
+ GetScoreAndEvalue(sap, &score, &bit_score, &evalue, &number);
+ salp_list[i].align_len = score;
+ }
+ }
+ HeapSort (salp_list, n, sizeof (AlignInfo), CompareAlignInfoProc);
+ sap = salp = salp_list[0].align;
+ for (i = 1; i < n; i++) {
+ sap->next = salp_list[i].align;
+ sap = sap->next;
+ }
+ sap->next = NULL;
+ MemFree (salp_list);
+ return salp;
+}
+
+
+NLM_EXTERN SeqAlignPtr SeqAlignSortByEvalue(SeqAlignPtr salp)
+{
+ SeqAlignPtr sap;
+ AlignInfoRealPtr salp_list;
+ Int4 n,i;
+
+ if(salp==NULL || salp->next==NULL)
+ return salp;
+
+ n=1;
+ sap = salp->next;
+ while (sap != NULL)
+ {
+ n++;
+ sap = sap->next;
+ }
+ salp_list = Calloc(n, sizeof (AlignInfoReal));
+ {
+ Int4 score,number;
+ Nlm_FloatHi bit_score,evalue;
+ for(sap=salp,i=0;i<n;i++,sap=sap->next) {
+ salp_list[i].align=sap;
+ GetScoreAndEvalue(sap, &score, &bit_score, &evalue, &number);
+ salp_list[i].rvalue = evalue;
+ }
+ }
+ HeapSort (salp_list, n, sizeof (AlignInfoReal), CompareAlignInfoRealProc);
+ /* reverse order */
+ sap = salp = salp_list[n-1].align;
+ for (i = n-2; i >=0; i--) {
+ sap->next = salp_list[i].align;
+ sap = sap->next;
+ }
+ sap->next = NULL;
+ MemFree (salp_list);
+ return salp;
+}
+
+NLM_EXTERN SeqAlignPtr SeqAlignSortByStart(SeqAlignPtr salp,Int2 order)
+{
+ SeqAlignPtr sap;
+ AlignInfoPtr salp_list;
+ Int4 n,i;
+
+ if(salp==NULL || salp->next==NULL)
+ return salp;
+
+ n=1;
+ sap = salp->next;
+ while (sap != NULL)
+ {
+ n++;
+ sap = sap->next;
+ }
+ salp_list = Calloc(n, sizeof (AlignInfo));
+ {
+ for(sap=salp,i=0;i<n;i++,sap=sap->next) {
+ salp_list[i].align=sap;
+ salp_list[i].align_len = SeqAlignStart(sap,order);
+ }
+ }
+ HeapSort (salp_list, n, sizeof (AlignInfo), CompareAlignInfoProc);
+ /* reverse order */
+ sap = salp = salp_list[n-1].align;
+ for (i = n-2; i >=0; i--) {
+ sap->next = salp_list[i].align;
+ sap = sap->next;
+ }
+ sap->next = NULL;
+ MemFree (salp_list);
+ return salp;
+}
+
+
+static Int4 get_align_mid_point(SeqAlignPtr align, Int2 order)
+{
+ DenseSegPtr dsp;
+ Int4 start, stop;
+ Uint1 strand, val;
+ SeqIdPtr sip;
+
+ if(align->segtype == 2)
+ {
+ dsp = (DenseSegPtr) align->segs;
+ val = 0;
+ for(sip = dsp->ids; sip != NULL; sip = sip->next)
+ {
+ if(val == order)
+ {
+ SeqAlignStartStopById(align, sip, &start, &stop, &strand);
+ return (start + stop)/2;
+ }
+ ++val;
+ }
+ }
+
+ return -1;
+}
+NLM_EXTERN SeqAlignPtr SeqAlignSortByRegion(SeqAlignPtr salp,Int2 order)
+{
+ SeqAlignPtr sap;
+ AlignInfoPtr salp_list;
+ Int4 n,i;
+
+ if(salp==NULL || salp->next==NULL)
+ return salp;
+
+ n=1;
+ sap = salp->next;
+ while (sap != NULL)
+ {
+ n++;
+ sap = sap->next;
+ }
+ salp_list = Calloc(n, sizeof (AlignInfo));
+ for(sap=salp,i=0;i<n;i++,sap=sap->next) {
+ salp_list[i].align=sap;
+ salp_list[i].align_len = get_align_mid_point(sap, order);
+ }
+ HeapSort (salp_list, n, sizeof (AlignInfo), CompareAlignInfoProc);
+ /* reverse order */
+ sap = salp = salp_list[n-1].align;
+ for (i = n-2; i >=0; i--) {
+ sap->next = salp_list[i].align;
+ sap = sap->next;
+ }
+ sap->next = NULL;
+ MemFree (salp_list);
+ return salp;
+}
+
+
+
+
+static Int4 get_align_list_len(SeqAlignPtr align)
+{
+ Int4 len = 0;
+
+ while(align)
+ {
+ len += SeqAlignLength(align);
+ align = align->next;
+ }
+
+ return len;
+}
+
+
+static void save_output (SeqIdPtr sip, CharPtr f_name, SeqAlignPtr align)
+{
+ AsnIoPtr aip;
+ BioseqPtr bsp;
+ SeqEntryPtr sep;
+
+ bsp = BioseqFind(sip);
+ sep = SeqEntryFind(sip);
+
+ if(bsp == NULL || sep == NULL)
+ return;
+
+ bsp->hist = SeqHistNew();
+ bsp->hist->assembly = align;
+
+ aip = AsnIoOpen(f_name, "w");
+ SeqEntryAsnWrite(sep, aip, NULL);
+ AsnIoClose(aip);
+
+ bsp->hist->assembly = NULL;
+ SeqHistFree(bsp->hist);
+ bsp->hist = NULL;
+}
+
+
+NLM_EXTERN SeqAlignPtr find_best_align(SeqAlignPtr align, Int4Ptr pmax_score)
+{
+ SeqAlignPtr curr, best_align;
+ Int4 score, max_score = 0, number;
+ Nlm_FloatHi bit_score, evalue;
+
+ best_align = NULL;
+ for(curr = align; curr != NULL; curr = curr->next)
+ {
+ GetScoreAndEvalue(curr, &score, &bit_score, &evalue , &number);
+ if(score > max_score)
+ {
+ max_score = score;
+ best_align = curr;
+ }
+ }
+
+
+ *pmax_score = max_score;
+ return best_align;
+}
+
+
+
+/*flip the order of the first and the second sequences*/
+NLM_EXTERN void SeqAlignSwapOrder(SeqAlignPtr align)
+{
+ SeqAlignPtr curr;
+ DenseSegPtr dsp;
+ SeqIdPtr sip;
+ Int2 i;
+ Int4 temp;
+ Uint1 strand;
+
+ for(curr = align; curr != NULL; curr = curr->next)
+ {
+ SeqAlignReverse(curr, 1);
+ dsp = (DenseSegPtr) curr->segs;
+
+ /*switching the Seq-ids*/
+ sip = dsp->ids->next;
+ dsp->ids->next = NULL;
+ sip->next = dsp->ids;
+ dsp->ids = sip;
+
+ for(i = 0; i<dsp->numseg; ++i)
+ {
+ temp = dsp->starts[2*i];
+ dsp->starts[2*i] = dsp->starts[2*i+1];
+ dsp->starts[2*i+1] = temp;
+
+ strand = dsp->strands[2*i];
+ dsp->strands[2*i] = dsp->strands[2*i+1];
+ dsp->strands[2*i+1] = strand;
+ }
+
+ }
+ return;
+}
+
+
+
+static void reverse_seqloc (SeqLocPtr s_loc)
+{
+ SeqLocPtr slp, t_slp, n_slp;
+ SeqIntPtr sint;
+ Uint1 strand;
+
+ if(s_loc == NULL || s_loc->choice != SEQLOC_MIX)
+ return;
+ slp = NULL;
+ t_slp = NULL;
+ while((slp = SeqLocFindNext(s_loc, slp)) != NULL)
+ {
+ if(slp->choice == SEQLOC_INT)
+ {
+ sint = (SeqIntPtr) slp->data.ptrvalue;
+ strand = sint->strand;
+ n_slp = SeqLocIntNew(sint->from, sint->to, 3-strand, sint->id);
+ if(t_slp == NULL)
+ t_slp = n_slp;
+ else
+ {
+ n_slp->next = t_slp;
+ t_slp = n_slp;
+ }
+ }
+ }
+ SeqLocSetFree((SeqLocPtr) s_loc->data.ptrvalue);
+ s_loc->data.ptrvalue = t_slp;
+}
+
+
+static BioseqPtr create_fake_bioseq(SeqLocPtr seq_loc, CharPtr fake_name)
+{
+
+ BioseqPtr bsp;
+ SeqPortPtr spp;
+ ByteStorePtr b_store;
+ Uint1 residue;
+ Uint1 code;
+ SeqLocPtr slp;
+ Int4 length;
+
+ code = Seq_code_iupacna;
+ length = SeqLocLen(seq_loc);
+ b_store = BSNew(length + 2);
+ BSSeek(b_store, 0, SEEK_SET);
+
+ slp = NULL;
+ length = 0;
+ while((slp = SeqLocFindNext(seq_loc, slp)) != NULL)
+ {
+ if(slp->choice == SEQLOC_INT || slp->choice == SEQLOC_WHOLE)
+ {
+ spp = SeqPortNewByLoc(slp, code);
+ residue = SeqPortGetResidue(spp);
+ while(residue != SEQPORT_EOF)
+ {
+ if(IS_ALPHA(residue))
+ BSPutByte(b_store, (Int2)residue);
+ residue = SeqPortGetResidue(spp);
+ }
+ SeqPortFree(spp);
+ length += SeqLocLen(slp);
+ }
+ }
+
+ bsp = BioseqNew();
+ bsp->id = local_id_make(fake_name);
+ bsp->seq_data = b_store;
+ bsp->repr = Seq_repr_raw;
+ bsp->mol = Seq_mol_dna;
+ bsp->length = length;
+ bsp->topology = 1;
+ bsp->seq_data_type = code;
+ return bsp;
+
+
+}
+
+static SeqLocPtr dup_mul_locs(SeqLocPtr slp)
+{
+ Int4 start, stop;
+ Uint1 strand;
+ SeqLocPtr t_slp, h_slp;
+ SeqLocPtr curr;
+
+ curr = NULL;
+ h_slp = NULL;
+ while((curr = SeqLocFindNext(slp, curr)) != NULL)
+ {
+ start = SeqLocStart(curr);
+ stop = SeqLocStop(curr);
+ strand = SeqLocStrand(curr);
+ t_slp = SeqLocIntNew(start, stop, strand, SeqLocId(curr));
+ ValNodeLink(&h_slp, t_slp);
+ }
+
+ if(h_slp->next == NULL)
+ return h_slp;
+ else
+ {
+ t_slp = ValNodeNew(NULL);
+ t_slp->choice = SEQLOC_MIX;
+ t_slp->data.ptrvalue = h_slp;
+ return t_slp;
+ }
+}
+
+
+static Boolean second_seq_has_gap(BioseqPtr s_bsp)
+{
+ SeqLocPtr slp;
+ SeqIdPtr sip;
+ BioseqPtr t_bsp;
+
+ for(slp = (SeqLocPtr) s_bsp->seq_ext; slp != NULL; slp = slp->next)
+ {
+ if(slp->choice != SEQLOC_NULL)
+ {
+ sip = SeqLocId(slp);
+ if(sip != NULL)
+ {
+ t_bsp = BioseqFind(sip);
+ if(t_bsp != NULL)
+ {
+ if(t_bsp->repr == Seq_repr_virtual)
+ return TRUE;
+ }
+ }
+ }
+ }
+
+ return FALSE;
+}
+
+
+static SeqIdPtr get_best_id(BioseqPtr bsp)
+{
+ SeqIdPtr sip;
+
+ sip = SeqIdFindBest(bsp->id, 0);
+ if(sip == NULL || sip->choice != SEQID_GI)
+ {
+ for(sip = bsp->id; sip != NULL; sip = sip->next)
+ {
+ if(sip->choice == SEQID_LOCAL)
+ return sip;
+ }
+ return bsp->id;
+ }
+ else
+ return sip;
+}
+
+
+/*gap = unknown length is separated as separate segments. This
+ is to make sure that create_fake_bioseq won't include those
+ segments
+ */
+
+static Boolean is_virtual_segment(SeqIdPtr sip)
+{
+ BioseqPtr bsp;
+ ObjectIdPtr oip;
+
+ bsp = BioseqFind(sip);
+ if(bsp != NULL)
+ {
+ if(bsp->repr == Seq_repr_virtual || bsp->repr == Seq_repr_map)
+ return TRUE;
+ else
+ return FALSE;
+ }
+ if(sip->choice == SEQID_LOCAL)
+ {
+ oip = (ObjectIdPtr) sip->data.ptrvalue;
+ return (oip->str && StringCmp(oip->str, "virtual") == 0);
+ }
+
+ return FALSE;
+}
+
+
+static SeqLocPtr build_slp_for_gcontig (BioseqPtr gcontig_bsp, SeqIdPtr gcontig_sip)
+{
+ SeqLocPtr slp, h_slp = NULL, t_slp;
+ Int4 start = 0, stop = -1;
+ SeqIdPtr sip;
+ Boolean update;
+
+
+ for(slp = (SeqLocPtr) gcontig_bsp->seq_ext; slp != NULL; slp = slp->next)
+ {
+ if(slp->choice != SEQLOC_NULL)
+ {
+ sip = SeqLocId(slp);
+ /* if(sip->choice != SEQID_GI) */
+ if(sip != NULL)
+ {
+ update = FALSE;
+ if(is_virtual_segment(sip))
+ {
+ t_slp = SeqLocIntNew(start, stop, Seq_strand_plus, gcontig_sip);
+ ValNodeLink(&h_slp, t_slp);
+ update = TRUE;
+ }
+ stop += SeqLocLen(slp);
+ if(update)
+ start = stop + 1;
+
+ }
+ }
+ else
+ {
+ if(start < stop)
+ {
+ t_slp = SeqLocIntNew(start, stop, Seq_strand_plus, gcontig_sip);
+ ValNodeLink(&h_slp, t_slp);
+ start = stop + 1;
+ }
+ }
+ }
+
+ if(start < stop)
+ {
+ t_slp = SeqLocIntNew(start, stop, Seq_strand_plus, gcontig_sip);
+ ValNodeLink(&h_slp, t_slp);
+ }
+
+ if(h_slp == NULL)
+ return NULL;
+
+ if(h_slp->next != NULL)
+ {
+ t_slp = ValNodeNew(NULL);
+ t_slp->choice = SEQLOC_MIX;
+ t_slp->data.ptrvalue = h_slp;
+ return t_slp;
+ }
+ else
+ return h_slp;
+}
+
+
+/* Reverses the strand of a Linked list of SeqAlignPtr. */
+NLM_EXTERN SeqAlignPtr LIBCALL SeqAlignListReverseStrand (SeqAlignPtr salp)
+{
+ SeqAlignPtr salptmp;
+ Int4Ptr lenp;
+ Int4Ptr startp;
+ Uint1Ptr strandp;
+ Int4 numseg;
+ Int2 dim;
+ Int4 j, k, n, tmp;
+
+ for (salptmp=salp; salptmp!=NULL; salptmp=salptmp->next)
+ {
+ if (salptmp->segtype == 2)
+ {
+ DenseSegPtr dsp;
+ dsp = (DenseSegPtr) salptmp->segs;
+ strandp = dsp->strands;
+ numseg = dsp->numseg;
+ dim = dsp->dim;
+ lenp = dsp->lens;
+ startp = dsp->starts;
+ for (j=0; j < numseg*dim && strandp!=NULL; j++, strandp++)
+ {
+ if (*strandp == Seq_strand_minus)
+ *strandp = Seq_strand_plus;
+ else if (*strandp != Seq_strand_minus)
+ *strandp = Seq_strand_minus;
+ }
+ for (j=0, k=numseg-1; j<numseg/2; j++, k--) {
+ tmp=lenp[j];
+ lenp[j]=lenp[k];
+ lenp[k]=tmp;
+ }
+ for (j=0, k=(dim*numseg-dim); j<(dim*numseg-1)/2; j+=dim, k-=dim) {
+ for (n=0; n<dim; n++) {
+ tmp=startp[j+n];
+ startp[j+n]=startp[k+n];
+ startp[k+n]=tmp;
+ }
+ }
+ } else if (salptmp->segtype ==1) {
+ DenseDiagPtr ddp,ddp_next,ddp_prev;
+ ddp_prev=NULL;
+ ddp = (DenseDiagPtr) salptmp->segs;
+ while(ddp!=NULL) {
+ strandp = ddp->strands;
+ dim = ddp->dim;
+ for (j=0; j < dim && strandp!=NULL; j++, strandp++)
+ {
+ if (*strandp == Seq_strand_minus)
+ *strandp = Seq_strand_plus;
+ else if (*strandp != Seq_strand_minus)
+ *strandp = Seq_strand_minus;
+ }
+ ddp_next = ddp->next;
+ if(salptmp->type==3 || salptmp->type==1) {
+ ddp->next = ddp_prev;
+ ddp_prev =ddp;
+ }
+ ddp = ddp_next;
+ }
+ if(salptmp->type==3 || salptmp->type==1)
+ salptmp->segs = ddp_prev;
+ }
+ }
+ return salp;
+}
+
+NLM_EXTERN SeqAlignPtr LIBCALL SeqAlignDup (SeqAlignPtr salp)
+{
+ SeqAnnotPtr sap,
+ sap2;
+ SeqAlignPtr salp2 = NULL,
+ next;
+
+ next = salp->next;
+ salp->next = NULL;
+ sap = SeqAnnotForSeqAlign (salp);
+ sap2 = (SeqAnnotPtr) AsnIoMemCopy ((Pointer) sap, (AsnReadFunc) SeqAnnotAsnRead, (AsnWriteFunc) SeqAnnotAsnWrite);
+ if (sap2!=NULL) {
+ salp2 = (SeqAlignPtr)sap2->data;
+ sap2->data = NULL;
+ SeqAnnotFree (sap2);
+ }
+ if (next != NULL)
+ salp->next = next;
+ sap->data = NULL;
+ SeqAnnotFree (sap);
+ return salp2;
+}
+
+NLM_EXTERN SeqAlignPtr LIBCALL SeqAlignListDup (SeqAlignPtr salp)
+{
+ SeqAnnotPtr sap,
+ sap2;
+ SeqAlignPtr salp2 = NULL;
+ if (salp == NULL) return(NULL);
+ sap = SeqAnnotForSeqAlign (salp);
+ if (sap == NULL) return(NULL);
+ sap2 = (SeqAnnotPtr) AsnIoMemCopy ((Pointer) sap, (AsnReadFunc) SeqAnnotAsnRead, (AsnWriteFunc) SeqAnnotAsnWrite);
+ if (sap2!=NULL) {
+ salp2 = sap2->data;
+ sap2->data = NULL;
+ SeqAnnotFree (sap2);
+ }
+ sap->data = NULL;
+ SeqAnnotFree (sap);
+ return salp2;
+}
+