diff options
author | Aaron M. Ucko <ucko@debian.org> | 2005-03-24 18:32:05 +0000 |
---|---|---|
committer | Aaron M. Ucko <ucko@debian.org> | 2005-03-24 18:32:05 +0000 |
commit | f06fc23cbc179836f402001f24176fc9d5725482 (patch) | |
tree | 39e97ad8f13a33296b32a3907f3409b056cf851b /sequin | |
parent | ccba467ae4f393d7acce357a9847bfe1fb77ccc7 (diff) |
Load ncbi (6.1.20040616) into ncbi-tools6/branches/upstream/current.
Diffstat (limited to 'sequin')
-rw-r--r-- | sequin/sequin.h | 10 | ||||
-rw-r--r-- | sequin/sequin1.c | 297 | ||||
-rw-r--r-- | sequin/sequin10.c | 446 | ||||
-rw-r--r-- | sequin/sequin2.c | 200 | ||||
-rw-r--r-- | sequin/sequin3.c | 112 | ||||
-rw-r--r-- | sequin/sequin4.c | 98 | ||||
-rw-r--r-- | sequin/sequin5.c | 12 | ||||
-rw-r--r-- | sequin/sequin6.c | 762 | ||||
-rw-r--r-- | sequin/sequin7.c | 202 | ||||
-rw-r--r-- | sequin/sequin8.c | 36 | ||||
-rw-r--r-- | sequin/sequin9.c | 75 |
11 files changed, 1894 insertions, 356 deletions
diff --git a/sequin/sequin.h b/sequin/sequin.h index 5458109f..bed4357a 100644 --- a/sequin/sequin.h +++ b/sequin/sequin.h @@ -29,7 +29,7 @@ * * Version Creation Date: 1/22/95 * -* $Revision: 6.134 $ +* $Revision: 6.138 $ * * File Description: * @@ -228,6 +228,8 @@ extern void ConsolidateOrganismNotes (IteM i); extern void ConsolidateLikeModifiersWithSemicolons (IteM i); extern void ConsolidateLikeModifiersWithoutSemicolons (IteM i); +extern void CountryLookup (IteM i); + extern void ExtendPartialFeatures (IteM i); extern void TrimOrganismName (IteM i); @@ -485,6 +487,8 @@ extern void SetupEditSecondary (MenU m, BaseFormPtr bfp); extern void EditLocusProc (IteM i); extern void ConvertToLocalProc (IteM i); +extern ValNodePtr BuildDescriptorValNodeList (void); + extern void RemoveFeature (IteM i); extern void RemoveDescriptor (IteM i); @@ -499,6 +503,8 @@ extern void MakeExonsFromMRNAIntervals (IteM i); extern Int2 LIBCALLBACK CreateDeleteByTextWindow (Pointer data); extern Int2 LIBCALLBACK CreateSegregateByTextWindow (Pointer data); +extern Int2 LIBCALLBACK CreateSegregateByFeatureWindow (Pointer data); +extern Int2 LIBCALLBACK CreateSegregateByDescriptorWindow (Pointer data); extern Int2 LIBCALLBACK RemoveExtraneousSets (Pointer data); extern void RemoveOrphanProteins (Uint2 entityID, SeqEntryPtr sep); extern void ParseAsnOrFlatfileToAnywhere (IteM i); @@ -552,6 +558,7 @@ extern void DownloadAndExtendProc (ButtoN b); extern void UpdateSeqAfterDownload (BaseFormPtr bfp, BioseqPtr oldbsp, BioseqPtr newbsp); extern void ExtendSeqAfterDownload (BaseFormPtr bfp, BioseqPtr oldbsp, BioseqPtr newbsp); extern void NewUpdateSequence (IteM i); +extern void NewUpdateSequenceNewBlast (IteM i); extern void NewExtendSequence (IteM i); extern void FastaNucDirectToSeqEdProc (IteM i); @@ -562,6 +569,7 @@ extern void ParseAntiCodonsFromtRNAComment (IteM i); extern void RemoveAlignment (IteM i); extern void RemoveGraph (IteM i); extern void RemoveProteins (IteM i); +extern void RemoveProteinsAndRenormalize (IteM i); extern void GlobalAddTranslExcept (IteM i); diff --git a/sequin/sequin1.c b/sequin/sequin1.c index d5f65dd5..a768978d 100644 --- a/sequin/sequin1.c +++ b/sequin/sequin1.c @@ -29,7 +29,7 @@ * * Version Creation Date: 1/22/95 * -* $Revision: 6.450 $ +* $Revision: 6.458 $ * * File Description: * @@ -129,7 +129,7 @@ static char *time_of_compilation = "now"; #include <Gestalt.h> #endif -#define SEQ_APP_VER "5.22" +#define SEQ_APP_VER "5.25" #ifndef CODECENTER static char* sequin_version_binary = "Sequin Indexer Services Version " SEQ_APP_VER " " __DATE__ " " __TIME__; @@ -1099,7 +1099,7 @@ static void SmartResetProc (IteM i) status = sm_usr_data->header->status; sm_usr_data->header->status = SMStatClosed; SMSendMsgToClient(sm_usr_data); - sm_usr_data->header->status = status; + sm_usr_data->header->status = (SMStatusCode)status; return; } } @@ -3084,7 +3084,7 @@ static void PrintExtraErrorInstructions (FILE *fp, CharPtr message) "\nMiddle gap characters are used to maintain the spacing " "inside an alignment. These are not nucleotides and will " "not appear as part of your sequence file.\n" -"Missing characters are used to represent indeterminate/ambiguous " +"Ambiguous/unknown characters are used to represent indeterminate/ambiguous " "nucleotides. These will appear in your sequence file as 'n'.\n" "Match characters are used to indicate positions where " "sequences are identical to the first sequence. These will be " @@ -3131,9 +3131,25 @@ static void WalkErrorList (TErrorInfoPtr list, FILE *fp) } +static Int4 CountNucleotides (CharPtr sequence) +{ + Int4 num = 0; + CharPtr cp; + + if (sequence == NULL) return 0; + for (cp = sequence; *cp != 0; cp++) + { + if (*cp != '-') + { + num++; + } + } + return num; +} + static void PrintAlignmentSummary (TAlignmentFilePtr afp, FILE *fp) { - Int4 index; + Int4 index; if (fp == NULL) return; @@ -3142,18 +3158,35 @@ static void PrintAlignmentSummary (TAlignmentFilePtr afp, FILE *fp) } else { fprintf (fp, "Found %d sequences\n", afp->num_sequences); fprintf (fp, "Found %d organisms\n", afp->num_organisms); - for (index = 0; index < afp->num_sequences; index++) + if (afp->num_sequences == afp->num_segments * afp->num_organisms) { - fprintf (fp, "\t%s\t", afp->ids [index]); - if (index < afp->num_organisms) { - fprintf (fp, "%s\n", afp->organisms [index]); - } else { - fprintf (fp, "No organism information\n"); - } + for (index = 0; index < afp->num_sequences; index++) + { + fprintf (fp, "\t%s\t%d nucleotides\t", afp->ids [index], + CountNucleotides (afp->sequences[index])); + if (index / afp->num_segments < afp->num_organisms) { + fprintf (fp, "%s\n", afp->organisms [index / afp->num_segments]); + } else { + fprintf (fp, "No organism information\n"); + } + } } - while (index < afp->num_organisms) { - fprintf (fp, "Unclaimed organism: %s\n", afp->organisms [index]); - index++; + else + { + for (index = 0; index < afp->num_sequences; index++) + { + fprintf (fp, "\t%s\t%d nucleotides\t", afp->ids [index], + CountNucleotides (afp->sequences[index])); + if (index < afp->num_organisms) { + fprintf (fp, "%s\n", afp->organisms [index]); + } else { + fprintf (fp, "No organism information\n"); + } + } + while (index < afp->num_organisms) { + fprintf (fp, "Unclaimed organism: %s\n", afp->organisms [index]); + index++; + } } } } @@ -3194,19 +3227,33 @@ typedef struct alphabetformdata { static Boolean DoSequenceLengthsMatch (TAlignmentFilePtr afp) { - int seq_index; - Int4 seq_len; + int seq_index; + int curr_seg; + Int4Ptr seq_len; + Boolean rval; if (afp == NULL || afp->sequences == NULL || afp->num_sequences == 0) { return TRUE; } - seq_len = StringLen (afp->sequences[0]); - for (seq_index = 1; seq_index < afp->num_sequences; seq_index++) { - if (StringLen (afp->sequences[seq_index]) != seq_len) { - return FALSE; + + seq_len = (Int4Ptr) MemNew (sizeof (Int4) * afp->num_segments); + if (seq_len == NULL) return FALSE; + for (seq_index = 0; seq_index < afp->num_segments; seq_index ++) + { + seq_len [seq_index] = StringLen (afp->sequences[seq_index]); + } + + curr_seg = 0; + rval = TRUE; + for (seq_index = afp->num_segments; seq_index < afp->num_sequences && rval; seq_index++) { + if (StringLen (afp->sequences[seq_index]) != seq_len[curr_seg]) { + rval = FALSE; } + curr_seg ++; + if (curr_seg >= afp->num_segments) curr_seg = 0; } - return TRUE; + MemFree (seq_len); + return rval; } extern SeqEntryPtr @@ -3260,7 +3307,7 @@ SeqEntryFromAlignmentFile if (afp != NULL) { if (afp->num_organisms == 0 && no_org_err_msg != NULL) { Message (MSG_ERROR, no_org_err_msg); - } else if (afp->num_organisms != 0 && afp->num_organisms != afp->num_sequences) { + } else if (afp->num_organisms != 0 && afp->num_organisms != afp->num_sequences && afp->num_organisms * afp->num_segments != afp->num_sequences) { Message (MSG_ERROR, "Number of organisms must match number of sequences!"); } else { ans = ANS_YES; @@ -3405,16 +3452,16 @@ static void BuildGetAlphabetDialog (IteM i) SetGroupSpacing (h, 10, 10); g = HiddenGroup (h, 2, 4, NULL); - StaticPrompt (g, "Missing", 0, dialogTextHeight, programFont, 'c'); - afp->missing = DialogText (g, "?", 5, NULL); + StaticPrompt (g, "Ambiguous/Unknown", 0, dialogTextHeight, programFont, 'c'); + afp->missing = DialogText (g, "?Nn", 5, NULL); StaticPrompt (g, "Match", 0, dialogTextHeight, programFont, 'c'); afp->match = DialogText (g, ".", 5, NULL); StaticPrompt (g, "Beginning Gap", 0, dialogTextHeight, programFont, 'c'); - afp->beginning_gap = DialogText (g, "-.nN", 5, NULL); + afp->beginning_gap = DialogText (g, "-.?nN", 5, NULL); StaticPrompt (g, "Middle Gap", 0, dialogTextHeight, programFont, 'c'); - afp->middle_gap = DialogText (g, "-nN", 5, NULL); + afp->middle_gap = DialogText (g, "-", 5, NULL); StaticPrompt (g, "End Gap", 0, dialogTextHeight, programFont, 'c'); - afp->end_gap = DialogText (g, "-?", 5, NULL); + afp->end_gap = DialogText (g, "-.?nN", 5, NULL); StaticPrompt (g, "Sequence Type", 0, dialogTextHeight, programFont, 'c'); afp->sequence_type = PopupList (g, TRUE, NULL); PopupItem (afp->sequence_type, "Nucleotide"); @@ -4356,8 +4403,6 @@ static void CloseProc (BaseFormPtr bfp) OMUserDataPtr omudp; ObjMgrDataPtr tmp; #ifdef USE_SMARTNET - ObjMgrDataPtr PNTR omdp; - int fd; SMUserDataPtr sm_usr_data = NULL; #endif @@ -9801,15 +9846,20 @@ static void s_GetTpaInfo (SequencesFormPtr sqfp) Update (); } +static CharPtr tpaString = NULL; + static void FinishPuttingTogether (ForM f) { - BaseFormPtr bfp; - BioseqSetPtr bssp; - Uint2 entityID = 0; - Int2 handled; - SeqEntryPtr sep = NULL; + BaseFormPtr bfp; + BioseqSetPtr bssp; + Uint2 entityID = 0; + Int2 handled; + ObjMgrDataPtr omdp; + SubmitBlockPtr sbp; + SeqEntryPtr sep = NULL; SequencesFormPtr sqfp; + SeqSubmitPtr ssp; bfp = (BaseFormPtr) GetObjectExtra (f); if (bfp != NULL) { @@ -9821,6 +9871,22 @@ static void FinishPuttingTogether (ForM f) } /*#endif*/ entityID = PackageFormResults (globalsbp, sep, TRUE); + sqfp = (SequencesFormPtr) bfp; + if (SEQ_TPA_SUBMISSION == sqfp->submType && entityID > 0) { + omdp = ObjMgrGetData (entityID); + if (omdp != NULL && omdp->datatype == OBJ_SEQSUB) { + ssp = (SeqSubmitPtr) omdp->dataptr; + if (ssp != NULL && ssp->datatype == 1) { + sbp = ssp->sub; + if (sbp != NULL) { + if (sbp->comment == NULL && StringDoesHaveText (tpaString)) { + sbp->comment = tpaString; + tpaString = NULL; + } + } + } + } + } globalsbp = NULL; WatchCursor (); seqviewprocs.forceSeparateViewer = TRUE; @@ -9931,24 +9997,13 @@ static void BackToFormat (ButtoN b) } } -static void GetOrgAndSeq (ButtoN b) +static void FinishOrgAndSeq (void) { - FormatBlockPtr fbp; - MonitorPtr mon; - ForM w; + MonitorPtr mon; + ForM w; WatchCursor (); - Hide (formatForm); - fbp = (FormatBlockPtr) FormToPointer (formatForm); - if (fbp != NULL) { - globalFormatBlock.seqPackage = fbp->seqPackage; - globalFormatBlock.seqFormat = fbp->seqFormat; - globalFormatBlock.numSeqs = fbp->numSeqs; - globalFormatBlock.submType = fbp->submType; - } - MemFree (fbp); - WatchCursor (); mon = MonitorStrNewEx ("Sequin New Submission", 30, FALSE); MonitorStrValue (mon, "Creating Sequences Form"); Update (); @@ -9970,6 +10025,27 @@ static void GetOrgAndSeq (ButtoN b) Update (); } +static void BackToSubmitter (ButtoN b) + +{ + MsgAnswer ans; + + ans = Message (MSG_OKC, "Are you sure? Format information will be lost."); + if (ans == ANS_CANCEL) return; + Hide (formatForm); + Update (); + PointerToForm (initSubmitForm, globalsbp); + globalsbp = SequinBlockFree (globalsbp); + Show (initSubmitForm); + Select (initSubmitForm); + SendHelpScrollMessage (helpForm, "Submitting Authors Form", NULL); + Update (); + globalFormatBlock.seqPackage = SEQ_PKG_SINGLE; + globalFormatBlock.seqFormat = SEQ_FMT_FASTA; + globalFormatBlock.numSeqs = 0; + globalFormatBlock.submType = SEQ_ORIG_SUBMISSION; +} + static void GetFormat (ButtoN b) { @@ -9995,25 +10071,118 @@ static void GetFormat (ButtoN b) Update (); } -static void BackToSubmitter (ButtoN b) +static WindoW tpaWindow = NULL; +static TexT tpaText = NULL; +static ButtoN tpaNext = NULL; +/* tpaString defined above FinishPuttingTogether */ + +static void TpaPrev (ButtoN b) { - MsgAnswer ans; + Hide (tpaWindow); + tpaString = MemFree (tpaString); + SetTitle (tpaText, ""); + Show (formatForm); + Select (formatForm); + SendHelpScrollMessage (helpForm, "Sequence Format Form", NULL); + Update (); +} + +static void TpaNext (ButtoN b) + +{ + tpaString = MemFree (tpaString); + tpaString = SaveStringFromText (tpaText); + if (StringHasNoText (tpaString)) { + Message (MSG_OK, "The requested information is required in order for you to be able to proceed with a TPA submission"); + return; + } + Hide (tpaWindow); + WatchCursor (); + FinishOrgAndSeq (); +} + +static void TpaText (TexT t) + +{ + if (TextHasNoText (t)) { + SafeDisable (tpaNext); + } else { + SafeEnable (tpaNext); + } +} + +static CharPtr tpaMssg = "\ +Third party annotation records require a publication describing the biological \ +experiments used as evidence for the annotation. Please provide information \ +regarding the nature of these experiments."; + +static void DoTpaForm (void) + +{ + GrouP c, h, p; + + if (tpaWindow == NULL) { + tpaWindow = FixedWindow (-50, -33, -10, -10, "TPA Evidence", NULL); + h = HiddenGroup (tpaWindow, -1, 0, NULL); + SetGroupSpacing (h, 10, 10); + + p = MultiLinePrompt (h, tpaMssg, 30 * stdCharWidth, programFont); + + tpaText = ScrollText (h, 30, 5, programFont, TRUE, TpaText); + + c = HiddenGroup (h, 2, 0, NULL); + PushButton (c, "<< Prev Form", TpaPrev); + tpaNext = PushButton (c, "Next Form >>", TpaNext); + + AlignObjects (ALIGN_CENTER, (HANDLE) p, (HANDLE) tpaText, (HANDLE) c, NULL); + + RealizeWindow (tpaWindow); + } + tpaString = MemFree (tpaString); + SafeSetTitle (tpaText, ""); + SafeDisable (tpaNext); + Show (tpaWindow); + Select (tpaWindow); +} + +static void GetOrgAndSeq (ButtoN b) + +{ + /* + MsgAnswer ans; + */ + FormatBlockPtr fbp; + Boolean is_tpa = FALSE; - ans = Message (MSG_OKC, "Are you sure? Format information will be lost."); - if (ans == ANS_CANCEL) return; Hide (formatForm); - Update (); - PointerToForm (initSubmitForm, globalsbp); - globalsbp = SequinBlockFree (globalsbp); - Show (initSubmitForm); - Select (initSubmitForm); - SendHelpScrollMessage (helpForm, "Submitting Authors Form", NULL); - Update (); - globalFormatBlock.seqPackage = SEQ_PKG_SINGLE; - globalFormatBlock.seqFormat = SEQ_FMT_FASTA; - globalFormatBlock.numSeqs = 0; - globalFormatBlock.submType = SEQ_ORIG_SUBMISSION; + fbp = (FormatBlockPtr) FormToPointer (formatForm); + if (fbp != NULL) { + globalFormatBlock.seqPackage = fbp->seqPackage; + globalFormatBlock.seqFormat = fbp->seqFormat; + globalFormatBlock.numSeqs = fbp->numSeqs; + globalFormatBlock.submType = fbp->submType; + is_tpa = (Boolean) (globalFormatBlock.submType == SEQ_TPA_SUBMISSION); + } + MemFree (fbp); + if (is_tpa) { + DoTpaForm (); + /* + ans = Message (MSG_YN, "%s", tpaMssg); + if (ans == ANS_YES) { + WatchCursor (); + FinishOrgAndSeq (); + } else { + Show (formatForm); + Select (formatForm); + SendHelpScrollMessage (helpForm, "Sequence Format Form", NULL); + Update (); + } + */ + } else { + WatchCursor (); + FinishOrgAndSeq (); + } } static void BackToStartup (ButtoN b) diff --git a/sequin/sequin10.c b/sequin/sequin10.c index a27aa03d..36bc94d2 100644 --- a/sequin/sequin10.c +++ b/sequin/sequin10.c @@ -29,7 +29,7 @@ * * Version Creation Date: 9/3/2003 * -* $Revision: 1.213 $ +* $Revision: 1.217 $ * * File Description: * @@ -69,16 +69,16 @@ static void ListClauses ( Boolean suppress_final_and ); -static void LabelClauses ( - ValNodePtr clause_list, - Uint1 biomol, - BioseqPtr bsp -); +static void LabelClauses +( ValNodePtr clause_list, + Uint1 biomol, + BioseqPtr bsp, + Boolean suppress_locus_tag); -static CharPtr GetProductName ( - SeqFeatPtr cds, - BioseqPtr bsp -); +static CharPtr GetProductName +( SeqFeatPtr cds, + BioseqPtr bsp, + Boolean suppress_locus_tag); #define DEFLINE_FEATLIST 1 #define DEFLINE_CLAUSEPLUS 2 @@ -112,8 +112,8 @@ typedef struct featureclause { FeatureClausePtr NewFeatureClause ( SeqFeatPtr sfp, - BioseqPtr bsp -); + BioseqPtr bsp, + Boolean suppress_locus_tag); static void PluralizeConsolidatedClauseDescription ( FeatureClausePtr fcp @@ -126,7 +126,8 @@ typedef Boolean (LIBCALLBACK *ShouldRemoveFunction) ( BioseqPtr bsp, Boolean isLonely, Boolean isRequested, - Boolean isSegment + Boolean isSegment, + Boolean suppress_locus_tag ); /* This section of the code contains some functions for dealing with @@ -344,6 +345,8 @@ static ModifierItemGlobalData DefLineModifiers[] = { { "Isolation-source" , FALSE, SUBSRC_isolation_source , FALSE }, { "Lab-host" , FALSE, SUBSRC_lab_host , FALSE }, { "Map" , FALSE, SUBSRC_map , FALSE }, + { "Note-OrgMod" , TRUE, ORGMOD_other , FALSE }, + { "Note-SubSrc" , FALSE, SUBSRC_other , FALSE }, { "Old-lineage" , TRUE , ORGMOD_old_lineage , FALSE }, { "Old-name" , TRUE , ORGMOD_old_name , FALSE }, { "Pathovar" , TRUE , ORGMOD_pathovar , FALSE }, @@ -407,6 +410,8 @@ typedef enum { DEFLINE_POS_Isolation_source, DEFLINE_POS_Lab_host, DEFLINE_POS_Map, + DEFLINE_POS_Note_orgmod, + DEFLINE_POS_Note_subsrc, DEFLINE_POS_Old_lineage, DEFLINE_POS_Old_name, DEFLINE_POS_Pathovar, @@ -2316,16 +2321,15 @@ static Boolean LIBCALLBACK IsGene ( return TRUE; } -static CharPtr GetGeneName ( -GeneRefPtr grp -) +static CharPtr GetGeneName (GeneRefPtr grp, Boolean suppress_locus_tag) { ValNodePtr syn; if (grp == NULL) return NULL; if (SeqMgrGeneIsSuppressed (grp)) return NULL; if (StringDoesHaveText (grp->locus)) return grp->locus; - if (StringDoesHaveText (grp->locus_tag)) return grp->locus_tag; + if (! suppress_locus_tag && StringDoesHaveText (grp->locus_tag)) + return grp->locus_tag; if (StringDoesHaveText (grp->desc)) return grp->desc; for (syn = grp->syn; syn != NULL; syn = syn->next) { @@ -2335,9 +2339,7 @@ GeneRefPtr grp return NULL; } -static CharPtr GetAlleleName ( -GeneRefPtr grp -) +static CharPtr GetAlleleName (GeneRefPtr grp, Boolean suppress_locus_tag) { size_t lenallele; size_t lengenename; @@ -2346,7 +2348,7 @@ GeneRefPtr grp if (grp == NULL) return NULL; if (StringHasNoText (grp->allele)) return NULL; - gene_name = GetGeneName (grp); + gene_name = GetGeneName (grp, suppress_locus_tag); if (StringHasNoText (gene_name)) return NULL; lenallele = StringLen (grp->allele); lengenename = StringLen (gene_name); @@ -2378,20 +2380,20 @@ GeneRefPtr grp /* This function compares the gene names and allele names of the gene * to see if they match. */ -static Boolean DoGenesMatch ( - GeneRefPtr grp1, - GeneRefPtr grp2 -) +static Boolean DoGenesMatch +(GeneRefPtr grp1, + GeneRefPtr grp2, + Boolean suppress_locus_tag) { CharPtr name1; CharPtr name2; - name1 = GetGeneName (grp1); - name2 = GetGeneName (grp2); + name1 = GetGeneName (grp1, suppress_locus_tag); + name2 = GetGeneName (grp2, suppress_locus_tag); if (StringCmp (name1, name2) != 0) return FALSE; - name1 = GetAlleleName (grp1); - name2 = GetAlleleName (grp2); + name1 = GetAlleleName (grp1, suppress_locus_tag); + name2 = GetAlleleName (grp2, suppress_locus_tag); if ((name1 == NULL && name2 != NULL) || (name1 != NULL && name2 == NULL)) { @@ -3710,8 +3712,8 @@ static void GroupAltSplicedExons ( */ static void ExpandAltSplicedExons ( ValNodePtr clause_list, - BioseqPtr bsp -) + BioseqPtr bsp, + Boolean suppress_locus_tag) { ValNodePtr clause, rest_of_list, featlist, new_clause; FeatureClausePtr fcp, new_fcp; @@ -3744,7 +3746,7 @@ static void ExpandAltSplicedExons ( { new_clause = ValNodeNew (clause); if (new_clause == NULL) return; - new_fcp = NewFeatureClause (featlist->data.ptrvalue, bsp); + new_fcp = NewFeatureClause (featlist->data.ptrvalue, bsp, suppress_locus_tag); if (new_fcp == NULL) return; new_fcp->grp = fcp->grp; new_fcp->is_alt_spliced = fcp->is_alt_spliced; @@ -3765,7 +3767,7 @@ static void ExpandAltSplicedExons ( } else { - ExpandAltSplicedExons (fcp->featlist, bsp); + ExpandAltSplicedExons (fcp->featlist, bsp, suppress_locus_tag); } } } @@ -3777,11 +3779,11 @@ static void ExpandAltSplicedExons ( * than one clause, while other features should really only belong to * one clause. */ -static Boolean AddGeneToClauses ( - SeqFeatPtr gene, +static Boolean AddGeneToClauses +( SeqFeatPtr gene, CharPtr gene_productname, - ValNodePtr clause_list -) + ValNodePtr clause_list, + Boolean suppress_locus_tag) { ValNodePtr clause; FeatureClausePtr fcp; @@ -3817,7 +3819,7 @@ static Boolean AddGeneToClauses ( } } - if (fcp->grp != NULL && DoGenesMatch (fcp->grp, grp)) + if (fcp->grp != NULL && DoGenesMatch (fcp->grp, grp, suppress_locus_tag)) { used_gene = TRUE; if (gene_productname != NULL @@ -3849,9 +3851,7 @@ static Boolean AddGeneToClauses ( /* This function iterates through the list of features and calls * AddGeneToClauses for each gene feature it finds. */ -static void GroupGenes ( - ValNodePtr PNTR clause_list -) +static void GroupGenes (ValNodePtr PNTR clause_list, Boolean suppress_locus_tag) { ValNodePtr vnp; ValNodePtr featlist; @@ -3870,7 +3870,7 @@ static void GroupGenes ( { AddGeneToClauses (featlist->data.ptrvalue, fcp->feature_label_data.productname, - vnp->next); + vnp->next, suppress_locus_tag); } } } @@ -3883,11 +3883,11 @@ static void GroupGenes ( * mRNA can apply to more than one clause, while other features should * really only belong to one clause. */ -static Boolean AddmRNAToClauses ( - SeqFeatPtr mRNA, +static Boolean AddmRNAToClauses +( SeqFeatPtr mRNA, ValNodePtr clause_list, - BioseqPtr bsp -) + BioseqPtr bsp, + Boolean suppress_locus_tag) { ValNodePtr clause; FeatureClausePtr fcp; @@ -3900,7 +3900,7 @@ static Boolean AddmRNAToClauses ( if (clause_list == NULL) return FALSE; used_mRNA = FALSE; - productname = GetProductName (mRNA, bsp); + productname = GetProductName (mRNA, bsp, suppress_locus_tag); if (productname == NULL) return TRUE; for (clause = clause_list; clause != NULL; clause = clause->next) @@ -3953,7 +3953,8 @@ static Boolean AddmRNAToClauses ( */ static void GroupmRNAs ( ValNodePtr PNTR clause_list, - BioseqPtr bsp + BioseqPtr bsp, + Boolean suppress_locus_tag ) { ValNodePtr vnp; @@ -3971,7 +3972,7 @@ static void GroupmRNAs ( && featlist->choice == DEFLINE_FEATLIST && IsmRNA (featlist->data.ptrvalue)) { - if (AddmRNAToClauses (featlist->data.ptrvalue, *clause_list, bsp)) + if (AddmRNAToClauses (featlist->data.ptrvalue, *clause_list, bsp, suppress_locus_tag)) { fcp->delete_me = TRUE; } @@ -4130,10 +4131,10 @@ static CharPtr GetFeatureTypeWord ( * If none of the above conditions apply, the sequence indexing context label * will be used to obtain the product name for the feature. */ -static CharPtr GetProductName ( - SeqFeatPtr cds, - BioseqPtr bsp -) +static CharPtr GetProductName +( SeqFeatPtr cds, + BioseqPtr bsp, + Boolean suppress_locus_tag) { CharPtr protein_name; CharPtr semicolon; @@ -4172,7 +4173,7 @@ static CharPtr GetProductName ( { grp = (GeneRefPtr) cds->data.value.ptrvalue; if (grp == NULL) return NULL; - gene_name = GetGeneName (grp); + gene_name = GetGeneName (grp, suppress_locus_tag); if (grp->desc != NULL && StringCmp (grp->desc, gene_name) != 0) { @@ -4222,8 +4223,8 @@ static CharPtr GetProductName ( static FeatureClausePtr FindProductInFeatureList ( FeatureClausePtr fcp, ValNodePtr clause_list, - matchFunction itemmatch -) + matchFunction itemmatch, + Boolean suppress_locus_tag) { ValNodePtr vnp; FeatureClausePtr vnp_fcp; @@ -4233,7 +4234,7 @@ static FeatureClausePtr FindProductInFeatureList ( if (vnp->choice == DEFLINE_CLAUSEPLUS && vnp->data.ptrvalue != NULL) { vnp_fcp = vnp->data.ptrvalue; - if (DoGenesMatch (vnp_fcp->grp, fcp->grp) + if (DoGenesMatch (vnp_fcp->grp, fcp->grp, suppress_locus_tag) && vnp_fcp->featlist != NULL && vnp_fcp->featlist->choice == DEFLINE_FEATLIST && itemmatch (vnp_fcp->featlist->data.ptrvalue)) @@ -4242,7 +4243,8 @@ static FeatureClausePtr FindProductInFeatureList ( } else { - vnp_fcp = FindProductInFeatureList (fcp, vnp_fcp->featlist, itemmatch); + vnp_fcp = FindProductInFeatureList (fcp, vnp_fcp->featlist, + itemmatch, suppress_locus_tag); if (vnp_fcp != NULL) return vnp_fcp; } } @@ -4258,10 +4260,10 @@ static FeatureClausePtr FindProductInFeatureList ( * If there is a gene and a product, the description will be the name of * the product followed by the name of the gene in parentheses. */ -static CharPtr GetGeneProtDescription ( - FeatureClausePtr fcp, - BioseqPtr bsp -) +static CharPtr GetGeneProtDescription +( FeatureClausePtr fcp, + BioseqPtr bsp, + Boolean suppress_locus_tag) { SeqFeatPtr sfp; CharPtr protein_name; @@ -4285,7 +4287,7 @@ static CharPtr GetGeneProtDescription ( } else { - protein_name = GetProductName (sfp, bsp); + protein_name = GetProductName (sfp, bsp, suppress_locus_tag); if (protein_name == NULL && IsGene (sfp)) { @@ -4296,7 +4298,7 @@ static CharPtr GetGeneProtDescription ( description_length += StringLen (protein_name); } - gene_name = GetGeneName (fcp->grp); + gene_name = GetGeneName (fcp->grp, suppress_locus_tag); if (gene_name != NULL) { description_length += StringLen (gene_name); @@ -4339,10 +4341,10 @@ static matchFunction productfeatures[] = { /* This function finds gene features without products and looks for * features that might provide products for them. */ -static void FindGeneProducts ( - ValNodePtr clause_list, - BioseqPtr bsp -) +static void FindGeneProducts +( ValNodePtr clause_list, + BioseqPtr bsp, + Boolean suppress_locus_tag) { ValNodePtr vnp; FeatureClausePtr fcp, productfcp; @@ -4364,7 +4366,8 @@ static void FindGeneProducts ( for (i=0; i < NumProductFeatureTypes && productfcp == NULL; i++) { productfcp = FindProductInFeatureList (fcp, clause_list, - productfeatures[i]); + productfeatures[i], + suppress_locus_tag); } if (productfcp != NULL) { @@ -4377,7 +4380,8 @@ static void FindGeneProducts ( else { fcp->feature_label_data.productname - = GetProductName (productfcp->featlist->data.ptrvalue, bsp); + = GetProductName (productfcp->featlist->data.ptrvalue, + bsp, suppress_locus_tag); } if (fcp->feature_label_data.description != NULL) { @@ -4385,12 +4389,12 @@ static void FindGeneProducts ( fcp->feature_label_data.description = NULL; } fcp->feature_label_data.description = - GetGeneProtDescription (fcp, bsp); + GetGeneProtDescription (fcp, bsp, suppress_locus_tag); } } else { - FindGeneProducts (fcp->featlist, bsp); + FindGeneProducts (fcp->featlist, bsp, suppress_locus_tag); } } } @@ -4434,10 +4438,10 @@ static CharPtr GetExonDescription ( return label; } -static CharPtr GetFeatureDescription ( - FeatureClausePtr fcp, - BioseqPtr bsp -) +static CharPtr GetFeatureDescription +( FeatureClausePtr fcp, + BioseqPtr bsp, + Boolean suppress_locus_tag) { SeqFeatPtr sfp; @@ -4488,7 +4492,7 @@ static CharPtr GetFeatureDescription ( } else { - return GetGeneProtDescription (fcp, bsp); + return GetGeneProtDescription (fcp, bsp, suppress_locus_tag); } } @@ -4548,11 +4552,11 @@ static void LIBCALLBACK GetPromoterFeatureLabel ( * subfeatures of the clause, or the interval could be a combination of the * last two items if the feature is a CDS. */ -static CharPtr GetGenericInterval ( - FeatureClausePtr fcp, - Uint1 biomol, - BioseqPtr bsp -) +static CharPtr GetGenericInterval +( FeatureClausePtr fcp, + Uint1 biomol, + BioseqPtr bsp, + Boolean suppress_locus_tag) { CharPtr interval; Boolean partial5, partial3; @@ -4589,7 +4593,7 @@ static CharPtr GetGenericInterval ( { suppress_final_and = TRUE; } - LabelClauses (featlist, biomol, bsp); + LabelClauses (featlist, biomol, bsp, suppress_locus_tag); ListClauses (featlist, &strings, FALSE, suppress_final_and); subfeatlist = MergeValNodeStrings (strings, FALSE); ValNodeFreeData (strings); @@ -4647,12 +4651,12 @@ static CharPtr GetGenericInterval ( * for more of the specific feature types, to reduce the number of times * that the feature must be identified as being a certain type. */ -static void LIBCALLBACK GetGenericFeatureLabel ( - FeatureClausePtr fcp, - BioseqPtr bsp, - Uint1 biomol, - FeatureLabelPtr flp -) +static void LIBCALLBACK GetGenericFeatureLabel +( FeatureClausePtr fcp, + BioseqPtr bsp, + Uint1 biomol, + FeatureLabelPtr flp, + Boolean suppress_locus_tag) { SeqFeatPtr main_feat; @@ -4673,13 +4677,13 @@ static void LIBCALLBACK GetGenericFeatureLabel ( } if (flp->productname == NULL) { - flp->productname = GetProductName (main_feat, bsp); + flp->productname = GetProductName (main_feat, bsp, suppress_locus_tag); } if (flp->description == NULL && (! IsMiscRNA (main_feat) || StringStr (flp->productname, "spacer") == NULL )) { - flp->description = GetFeatureDescription (fcp, bsp); + flp->description = GetFeatureDescription (fcp, bsp, suppress_locus_tag); } } @@ -4711,11 +4715,11 @@ typedef enum { NumDefLineFeatLabels } DefLineFeatLabel; -static void LabelFeature ( - BioseqPtr bsp, - Uint1 biomol, - FeatureClausePtr new_clauseplus -) +static void LabelFeature +( BioseqPtr bsp, + Uint1 biomol, + FeatureClausePtr new_clauseplus, + Boolean suppress_locus_tag) { Int4 i; SeqFeatPtr main_feat; @@ -4726,11 +4730,12 @@ static void LabelFeature ( { main_feat = (SeqFeatPtr) new_clauseplus->featlist->data.ptrvalue; - new_clauseplus->allelename = GetAlleleName (new_clauseplus->grp); + new_clauseplus->allelename = GetAlleleName (new_clauseplus->grp, + suppress_locus_tag); if (new_clauseplus->interval == NULL) { new_clauseplus->interval = - GetGenericInterval (new_clauseplus, biomol, bsp); + GetGenericInterval (new_clauseplus, biomol, bsp, suppress_locus_tag); } for (i=0; i < NumDefLineFeatLabels; i++) @@ -4745,7 +4750,7 @@ static void LabelFeature ( } GetGenericFeatureLabel ( new_clauseplus, bsp, biomol, - &new_clauseplus->feature_label_data); + &new_clauseplus->feature_label_data, suppress_locus_tag); return; } } @@ -4919,11 +4924,11 @@ static void TrimUnwantedWordsFromAltSpliceProductName ( * must have the same gene, must share a complete interval, and must have * similarly named products. */ -static CharPtr MeetAltSpliceRules ( - FeatureClausePtr cdsfcp1, +static CharPtr MeetAltSpliceRules +( FeatureClausePtr cdsfcp1, FeatureClausePtr cdsfcp2, - BioseqPtr bsp -) + BioseqPtr bsp, + Boolean suppress_locus_tag) { SeqFeatPtr cds1, cds2; CharPtr match_string; @@ -4937,7 +4942,7 @@ static CharPtr MeetAltSpliceRules ( cds1 = cdsfcp1->featlist->data.ptrvalue; cds2 = cdsfcp2->featlist->data.ptrvalue; - if (! DoGenesMatch (cdsfcp1->grp, cdsfcp2->grp)) + if (! DoGenesMatch (cdsfcp1->grp, cdsfcp2->grp, suppress_locus_tag)) return NULL; if ( (res = TestFeatOverlap (cds1, cds2, COMMON_INTERVAL)) != -1) @@ -5048,10 +5053,10 @@ static void MoveSubclauses ( /* a comment and a data.choice value that indicates alt splicing */ /* we remove the second alternatively spliced CDS feature from the list */ -static void FindAltSplices ( - ValNodePtr clause_list, - BioseqPtr bsp -) +static void FindAltSplices +( ValNodePtr clause_list, + BioseqPtr bsp, + Boolean suppress_locus_tag) { FeatureClausePtr fcp1, fcp2; ValNodePtr cdsclause1, cdsclause2; @@ -5069,7 +5074,7 @@ static void FindAltSplices ( if (fcp1->feature_label_data.productname == NULL) { fcp1->feature_label_data.productname = - GetProductName (fcp1->featlist->data.ptrvalue, bsp); + GetProductName (fcp1->featlist->data.ptrvalue, bsp, suppress_locus_tag); } searchclause = cdsclause1->next; cdsclause2 = FindNextCDSClause (searchclause); @@ -5079,9 +5084,9 @@ static void FindAltSplices ( if (fcp2->feature_label_data.productname == NULL) { fcp2->feature_label_data.productname = - GetProductName (fcp2->featlist->data.ptrvalue, bsp); + GetProductName (fcp2->featlist->data.ptrvalue, bsp, suppress_locus_tag); } - combined_protein_name = MeetAltSpliceRules (fcp1, fcp2, bsp); + combined_protein_name = MeetAltSpliceRules (fcp1, fcp2, bsp, suppress_locus_tag); if (combined_protein_name != NULL) { /* get rid of variant, splice variant, splice product, isoform, etc.*/ @@ -5134,18 +5139,18 @@ static void FindAltSplices ( DeleteFeatureClauses (&clause_list); } -static void LabelClauses ( - ValNodePtr clause_list, - Uint1 biomol, - BioseqPtr bsp -) +static void LabelClauses +( ValNodePtr clause_list, + Uint1 biomol, + BioseqPtr bsp, + Boolean suppress_locus_tag) { ValNodePtr clause; clause = clause_list; while (clause != NULL) { - LabelFeature ( bsp, biomol, clause->data.ptrvalue); + LabelFeature ( bsp, biomol, clause->data.ptrvalue, suppress_locus_tag); clause = clause->next; } } @@ -5176,10 +5181,10 @@ static CharPtr separators [] = { #define num_separators 3 -static ValNodePtr GetMiscRNAelements ( - SeqFeatPtr misc_rna, - BioseqPtr bsp -) +static ValNodePtr GetMiscRNAelements +( SeqFeatPtr misc_rna, + BioseqPtr bsp, + Boolean suppress_locus_tag) { CharPtr buffer; Int4 i, best_i; @@ -5198,7 +5203,7 @@ static ValNodePtr GetMiscRNAelements ( to_free = NULL; if (misc_rna == NULL) return NULL; - buffer = GetProductName (misc_rna, bsp); + buffer = GetProductName (misc_rna, bsp, suppress_locus_tag); to_free = buffer; if (buffer == NULL) { @@ -5265,7 +5270,7 @@ static ValNodePtr GetMiscRNAelements ( word_i++) {} if (word_i < NUM_MISC_RNA_WORDS) { - fcp = NewFeatureClause ( misc_rna, bsp); + fcp = NewFeatureClause ( misc_rna, bsp, suppress_locus_tag); if (fcp == NULL) return NULL; if (word_i == MISC_RNA_WORD_INTERNAL_SPACER || word_i == MISC_RNA_WORD_EXTERNAL_SPACER @@ -5345,8 +5350,8 @@ static ValNodePtr GetMiscRNAelements ( */ static void ReplaceRNAClauses ( ValNodePtr PNTR clause_list, - BioseqPtr bsp -) + BioseqPtr bsp, + Boolean suppress_locus_tag) { FeatureClausePtr fcp; SeqFeatPtr main_feat; @@ -5368,7 +5373,7 @@ static void ReplaceRNAClauses ( if (IsrRNA (main_feat) || IsMiscRNA (main_feat)) { - replacement_clauses = GetMiscRNAelements ( main_feat, bsp ); + replacement_clauses = GetMiscRNAelements ( main_feat, bsp, suppress_locus_tag ); if (replacement_clauses != NULL) { for (vnp = replacement_clauses; vnp->next != NULL; vnp = vnp->next) {} @@ -6309,7 +6314,7 @@ static void ListClauses ( "pseudogene mRNA")==0) && clause_len > StringLen ("precursor") && StringCmp ( thisclause->feature_label_data.description - + clause_len - StringLen ("precursor"), + + clause_len - StringLen ("precursor") - 1, "precursor") == 0) { print_comma_between_description_and_typeword = TRUE; @@ -6483,7 +6488,8 @@ static Boolean LIBCALLBACK ShouldRemoveExon ( BioseqPtr bsp, Boolean isLonely, Boolean isRequested, - Boolean isSegment + Boolean isSegment, + Boolean suppress_locus_tag ) { Boolean partial3, partial5; @@ -6513,13 +6519,13 @@ static Boolean LIBCALLBACK ShouldRemoveCDS ( BioseqPtr bsp, Boolean isLonely, Boolean isRequested, - Boolean isSegment -) + Boolean isSegment, + Boolean suppress_locus_tag) { CharPtr description; Boolean retval = FALSE; - description = GetGeneProtDescription (this_fcp, bsp); + description = GetGeneProtDescription (this_fcp, bsp, suppress_locus_tag); if (StringHasNoText (description)) { retval = TRUE; @@ -6534,7 +6540,8 @@ static Boolean LIBCALLBACK ShouldRemoveNoncodingProductFeat ( FeatureClausePtr this_fcp, BioseqPtr bsp, Boolean isLonely, Boolean isRequested, - Boolean isSegment + Boolean isSegment, + Boolean suppress_locus_tag ) { if (isRequested) return FALSE; @@ -6547,7 +6554,8 @@ static Boolean LIBCALLBACK ShouldRemovePromoter ( FeatureClausePtr this_fcp, BioseqPtr bsp, Boolean isLonely, Boolean isRequested, - Boolean isSegment + Boolean isSegment, + Boolean suppress_locus_tag ) { if (isLonely || isRequested) return FALSE; @@ -6561,7 +6569,8 @@ static Boolean LIBCALLBACK ShouldRemoveLTR ( BioseqPtr bsp, Boolean isLonely, Boolean isRequested, - Boolean isSegment + Boolean isSegment, + Boolean suppress_locus_tag ) { if (isLonely || isRequested) @@ -6577,7 +6586,8 @@ static Boolean LIBCALLBACK ShouldRemove3UTR ( BioseqPtr bsp, Boolean isLonely, Boolean isRequested, - Boolean isSegment + Boolean isSegment, + Boolean suppress_locus_tag ) { if (isLonely || isRequested) @@ -6593,7 +6603,8 @@ static Boolean LIBCALLBACK ShouldRemove5UTR ( BioseqPtr bsp, Boolean isLonely, Boolean isRequested, - Boolean isSegment + Boolean isSegment, + Boolean suppress_locus_tag ) { if (isLonely || isRequested) @@ -6608,28 +6619,29 @@ static Boolean LIBCALLBACK ShouldRemoveIntron ( FeatureClausePtr this_fcp, BioseqPtr bsp, Boolean isLonely, Boolean isRequested, - Boolean isSegment + Boolean isSegment, + Boolean suppress_locus_tag ) { if (isLonely || isRequested) return FALSE; else return TRUE; } -static Boolean LIBCALLBACK ShouldRemoveGeneric ( - SeqFeatPtr sfp, +static Boolean LIBCALLBACK ShouldRemoveGeneric +( SeqFeatPtr sfp, FeatureClausePtr parent_fcp, FeatureClausePtr this_fcp, BioseqPtr bsp, Boolean isLonely, Boolean isRequested, - Boolean isSegment -) + Boolean isSegment, + Boolean suppress_locus_tag) { CharPtr productname; Boolean rval; rval = FALSE; - if (IsMiscRNA (sfp) && ( productname = GetProductName (sfp, bsp)) != NULL) + if (IsMiscRNA (sfp) && ( productname = GetProductName (sfp, bsp, suppress_locus_tag)) != NULL) { if (StringStr (productname, "trans-spliced leader") != NULL) { @@ -6688,6 +6700,7 @@ typedef struct deflinefeaturerequestlist { Boolean remove_subfeatures; DefLineType feature_list_type; Int4 misc_feat_parse_rule; + Boolean suppress_locus_tags; } DeflineFeatureRequestList, PNTR DeflineFeatureRequestListPtr; static void InitFeatureRequests ( @@ -6704,6 +6717,7 @@ static void InitFeatureRequests ( feature_requests->remove_subfeatures = FALSE; feature_requests->feature_list_type = DEFLINE_USE_FEATURES; feature_requests->misc_feat_parse_rule = 2; + feature_requests->suppress_locus_tags = FALSE; } static Boolean RemoveCondition ( @@ -6722,17 +6736,19 @@ static Boolean RemoveCondition ( { if (remove_items[i].itemmatch (sfp)) return remove_items[i].ShouldRemove (sfp, parent_fcp, this_fcp, bsp, - isLonely, feature_requests->items[i].keep, isSegment); + isLonely, feature_requests->items[i].keep, + isSegment, + feature_requests->suppress_locus_tags); } return ShouldRemoveGeneric(sfp, parent_fcp, this_fcp, bsp, isLonely, FALSE, - isSegment); + isSegment, feature_requests->suppress_locus_tags); } -static Boolean FindOtherGeneClause ( - ValNodePtr feature_list, +static Boolean FindOtherGeneClause +( ValNodePtr feature_list, ValNodePtr me, - GeneRefPtr grp -) + GeneRefPtr grp, + Boolean suppress_locus_tag) { ValNodePtr vnp; FeatureClausePtr fcp; @@ -6747,11 +6763,11 @@ static Boolean FindOtherGeneClause ( fcp = vnp->data.ptrvalue; if (fcp->delete_me) continue; if ( fcp->grp == grp - || (fcp->grp != NULL && DoGenesMatch (fcp->grp, grp))) + || (fcp->grp != NULL && DoGenesMatch (fcp->grp, grp, suppress_locus_tag))) { return TRUE; } - if ( FindOtherGeneClause (fcp->featlist, me, grp)) + if ( FindOtherGeneClause (fcp->featlist, me, grp, suppress_locus_tag)) { return TRUE; } @@ -6760,11 +6776,11 @@ static Boolean FindOtherGeneClause ( return FALSE; } -static void RemoveGenesMentionedElsewhere ( - ValNodePtr PNTR feature_list, - ValNodePtr search_list, - Boolean delete_now -) +static void RemoveGenesMentionedElsewhere +( ValNodePtr PNTR feature_list, + ValNodePtr search_list, + Boolean delete_now, + Boolean suppress_locus_tag) { ValNodePtr vnp; FeatureClausePtr fcp; @@ -6780,13 +6796,13 @@ static void RemoveGenesMentionedElsewhere ( } if ( IsGene (fcp->featlist->data.ptrvalue) && fcp->featlist->next == NULL - && FindOtherGeneClause ( search_list, vnp, fcp->grp)) + && FindOtherGeneClause ( search_list, vnp, fcp->grp, suppress_locus_tag)) { fcp->delete_me = TRUE; } else { - RemoveGenesMentionedElsewhere ( &(fcp->featlist), search_list, FALSE); + RemoveGenesMentionedElsewhere ( &(fcp->featlist), search_list, FALSE, suppress_locus_tag); } } } @@ -7295,8 +7311,8 @@ static void ConsolidateClauses ( ValNodePtr PNTR list, BioseqPtr bsp, Uint1 biomol, - Boolean delete_now -) + Boolean delete_now, + Boolean suppress_locus_tag) { ValNodePtr vnp; FeatureClausePtr fcp; @@ -7317,14 +7333,14 @@ static void ConsolidateClauses ( continue; } - ConsolidateClauses (&(fcp->featlist), bsp, biomol, FALSE); + ConsolidateClauses (&(fcp->featlist), bsp, biomol, FALSE, suppress_locus_tag); if (last_cds_fcp == NULL) { last_cds_fcp = fcp; if (fcp->feature_label_data.description == NULL) { - last_desc = GetGeneProtDescription (fcp, bsp); + last_desc = GetGeneProtDescription (fcp, bsp, suppress_locus_tag); } else { @@ -7344,7 +7360,7 @@ static void ConsolidateClauses ( { if (fcp->feature_label_data.description == NULL) { - new_desc = GetGeneProtDescription (fcp, bsp); + new_desc = GetGeneProtDescription (fcp, bsp, suppress_locus_tag); } else { @@ -7385,7 +7401,7 @@ static void ConsolidateClauses ( MemFree (last_cds_fcp->interval); } last_cds_fcp->interval = - GetGenericInterval (last_cds_fcp, biomol, bsp); + GetGenericInterval (last_cds_fcp, biomol, bsp, suppress_locus_tag); MemFree (new_desc); } else @@ -7405,8 +7421,8 @@ static void ConsolidateClauses ( static void CountUnknownGenes ( ValNodePtr PNTR clause_list, - BioseqPtr bsp -) + BioseqPtr bsp, + Boolean suppress_locus_tag) { FeatureClausePtr fcp, new_fcp; ValNodePtr vnp, new_vnp; @@ -7423,8 +7439,8 @@ static void CountUnknownGenes ( && (fcp = vnp->data.ptrvalue) != NULL && ! fcp->is_unknown) { - CountUnknownGenes (&(fcp->featlist), bsp); - gene_name = GetGeneProtDescription (fcp, bsp); + CountUnknownGenes (&(fcp->featlist), bsp, suppress_locus_tag); + gene_name = GetGeneProtDescription (fcp, bsp, suppress_locus_tag); if (StringCmp (gene_name, "unknown") == 0 && fcp->featlist != NULL && fcp->featlist->choice == DEFLINE_FEATLIST) @@ -7433,7 +7449,8 @@ static void CountUnknownGenes ( { new_vnp = ValNodeNew (*clause_list); if (new_vnp == NULL) return; - new_fcp = NewFeatureClause (fcp->featlist->data.ptrvalue, bsp); + new_fcp = NewFeatureClause (fcp->featlist->data.ptrvalue, + bsp, suppress_locus_tag); new_fcp->is_unknown = TRUE; new_vnp->choice = DEFLINE_CLAUSEPLUS; new_vnp->data.ptrvalue = new_fcp; @@ -7481,10 +7498,10 @@ static void ReplaceDefinitionLine ( MemFree (defline); } -FeatureClausePtr NewFeatureClause ( - SeqFeatPtr sfp, - BioseqPtr bsp -) +FeatureClausePtr NewFeatureClause +( SeqFeatPtr sfp, + BioseqPtr bsp, + Boolean suppress_locus_tag) { FeatureClausePtr fcp; Boolean partial5, partial3; @@ -7530,7 +7547,7 @@ FeatureClausePtr NewFeatureClause ( } if (IsCDS (sfp)) { - fcp->feature_label_data.productname = GetProductName (sfp, bsp); + fcp->feature_label_data.productname = GetProductName (sfp, bsp, suppress_locus_tag); } fcp->featlist = ValNodeNew (NULL); if (fcp->featlist == NULL) @@ -7545,9 +7562,7 @@ FeatureClausePtr NewFeatureClause ( return fcp; } -static ValNodePtr GetFeatureList ( - BioseqPtr bsp -) +static ValNodePtr GetFeatureList (BioseqPtr bsp, Boolean suppress_locus_tag) { ValNodePtr head, vnp; SeqFeatPtr sfp; @@ -7563,7 +7578,7 @@ static ValNodePtr GetFeatureList ( { if (IsRecognizedFeature (sfp)) { - fcp = NewFeatureClause (sfp, bsp); + fcp = NewFeatureClause (sfp, bsp, suppress_locus_tag); if (fcp == NULL) return NULL; fcp->numivals = fcontext.numivals; fcp->ivals = fcontext.ivals; @@ -7834,7 +7849,8 @@ static Boolean IntervalIntersectsIvals static ValNodePtr GrabTraversingGenes (ValNodePtr parent_feature_list, SeqMgrSegmentContextPtr context, - BioseqPtr parent_bsp) + BioseqPtr parent_bsp, + Boolean suppress_locus_tag) { FeatureClausePtr fcp, new_fcp; ValNodePtr clause; @@ -7855,7 +7871,8 @@ static ValNodePtr GrabTraversingGenes && fcp->ivals != NULL && fcp->numivals > 0) { if (IntervalIntersectsIvals (fcp->numivals, fcp->ivals, context)) { - new_fcp = NewFeatureClause (fcp->featlist->data.ptrvalue, parent_bsp); + new_fcp = NewFeatureClause (fcp->featlist->data.ptrvalue, parent_bsp, + suppress_locus_tag); if (new_fcp == NULL) return FALSE; vnp = ValNodeNew (segment_feature_list); if (vnp == NULL) return FALSE; @@ -7883,8 +7900,7 @@ static CharPtr BuildFeatureClauses ( static Boolean LIBCALLBACK GetFeatureClauseForSeg ( SeqLocPtr slp, - SeqMgrSegmentContextPtr context -) + SeqMgrSegmentContextPtr context) { SegmentDefLineFeatureClausePtr sdlp; ValNodePtr clause, tmp_parent_list; @@ -7932,7 +7948,8 @@ static Boolean LIBCALLBACK GetFeatureClauseForSeg ( && stop >= context->cumOffset) { new_fcp = NewFeatureClause (fcp->featlist->data.ptrvalue, - sdlp->parent_bsp); + sdlp->parent_bsp, + sdlp->feature_requests->suppress_locus_tags); if (new_fcp == NULL) return FALSE; vnp = ValNodeNew (segment_feature_list); if (vnp == NULL) return FALSE; @@ -7945,7 +7962,8 @@ static Boolean LIBCALLBACK GetFeatureClauseForSeg ( if (segment_feature_list == NULL) { segment_feature_list = GrabTraversingGenes (sdlp->parent_feature_list, - context, sdlp->parent_bsp); + context, sdlp->parent_bsp, + sdlp->feature_requests->suppress_locus_tags); } entityID = ObjMgrGetEntityIDForPointer (bsp); @@ -7996,15 +8014,15 @@ static CharPtr BuildFeatureClauses ( if (feature_requests->feature_list_type == DEFLINE_USE_FEATURES && ( ! isSegment || (seg_feature_list != NULL && *seg_feature_list != NULL))) { - GroupmRNAs (feature_list, bsp); + GroupmRNAs (feature_list, bsp, feature_requests->suppress_locus_tags); /* genes are added to other clauses */ - GroupGenes (feature_list); + GroupGenes (feature_list, feature_requests->suppress_locus_tags); if (! feature_requests->suppress_alt_splice_phrase) { /* find alt-spliced CDSs */ - FindAltSplices (*feature_list, bsp); + FindAltSplices (*feature_list, bsp, feature_requests->suppress_locus_tags); } GroupAltSplicedExons (feature_list, bsp, TRUE); @@ -8012,9 +8030,9 @@ static CharPtr BuildFeatureClauses ( /* now group clauses */ GroupAllClauses ( feature_list, bsp ); - ExpandAltSplicedExons (*feature_list, bsp); + ExpandAltSplicedExons (*feature_list, bsp, feature_requests->suppress_locus_tags); - FindGeneProducts (*feature_list, bsp); + FindGeneProducts (*feature_list, bsp, feature_requests->suppress_locus_tags); if (seg_feature_list != NULL && *seg_feature_list != NULL) { @@ -8027,7 +8045,8 @@ static CharPtr BuildFeatureClauses ( /* remove exons and other unwanted features */ RemoveUnwantedFeatures (feature_list, bsp, isSegment, feature_requests); - RemoveGenesMentionedElsewhere (feature_list, *feature_list, TRUE); + RemoveGenesMentionedElsewhere (feature_list, *feature_list, TRUE, + feature_requests->suppress_locus_tags); if (feature_requests->remove_subfeatures) { @@ -8036,7 +8055,7 @@ static CharPtr BuildFeatureClauses ( DeleteOperonSubfeatures (feature_list, TRUE); - CountUnknownGenes (feature_list, bsp); + CountUnknownGenes (feature_list, bsp, feature_requests->suppress_locus_tags); if (feature_requests->misc_feat_parse_rule == 1) { @@ -8047,7 +8066,7 @@ static CharPtr BuildFeatureClauses ( RemoveUnwantedMiscFeats (feature_list, TRUE); } - ReplaceRNAClauses (feature_list, bsp); + ReplaceRNAClauses (feature_list, bsp, feature_requests->suppress_locus_tags); /* take any exons on the minus strand */ /* and reverse their order within the clause */ @@ -8055,9 +8074,11 @@ static CharPtr BuildFeatureClauses ( RenameExonSequences ( feature_list, bsp, TRUE); - LabelClauses (*feature_list, molecule_type, bsp); + LabelClauses (*feature_list, molecule_type, bsp, + feature_requests->suppress_locus_tags); - ConsolidateClauses (feature_list, bsp, molecule_type, TRUE); + ConsolidateClauses (feature_list, bsp, molecule_type, TRUE, + feature_requests->suppress_locus_tags); /* this allows genes to be listed together even if they are from */ /* separate sequences */ @@ -8096,17 +8117,26 @@ static Int2 GetProductFlagFromCDSProductNames (BioseqPtr bsp) SeqFeatPtr cds = NULL; Int2 product_flag; Int2 i; + CharPtr found; + Char ch; product_flag = 0; - for (cds = SeqMgrGetNextFeature (bsp, cds, SEQFEAT_CDREGION, 0, &context); cds != NULL && product_flag == 0; cds = cds->next) + for (cds = SeqMgrGetNextFeature (bsp, cds, SEQFEAT_CDREGION, 0, &context); + cds != NULL && product_flag == 0; + cds = cds->next) { - for (i = 1; organelleByPopup[i] != NULL && product_flag == 0; i++) - { - if (StringStr (context.label, organelleByPopup[i])) - { - product_flag = i; - } - } + for (i = 1; organelleByPopup[i] != NULL && product_flag == 0; i++) + { + found = StringStr (context.label, organelleByPopup[i]); + if (found != NULL) + { + ch = *(found + StringLen (organelleByPopup[i])); + if (ch == 0 || ch == ' ') + { + product_flag = i; + } + } + } } return product_flag; @@ -8162,7 +8192,8 @@ static void BuildDefLineFeatClauseList ( sdld.parent_bsp = bsp; sdld.molecule_type = GetMoleculeType (bsp, entityID); - sdld.parent_feature_list = GetFeatureList (bsp); + sdld.parent_feature_list = GetFeatureList (bsp, + feature_requests->suppress_locus_tags); sdld.feature_requests = feature_requests; sdld.product_flag = product_flag; @@ -8201,7 +8232,7 @@ static void BuildDefLineFeatClauseList ( if (bsp == NULL) return; if ( SpecialHandlingForSpecialTechniques (bsp)) return; molecule_type = GetMoleculeType (bsp, entityID); - head = GetFeatureList (bsp); + head = GetFeatureList (bsp, feature_requests->suppress_locus_tags); /* get default product flag if necessary */ if (product_flag == -1 || product_flag == DEFAULT_ORGANELLE_CLAUSE) { @@ -8280,6 +8311,7 @@ typedef struct deflineformdata { GrouP featureOptsGrp; PopuP misc_feat_parse_rule; ButtoN alternate_splice_flag; + ButtoN suppress_locus_tags; } DefLineFormData, PNTR DefLineFormPtr; static void DefLineFormMessageProc (ForM f, Int2 mssg) @@ -8435,6 +8467,9 @@ static void DoAutoDefLine (ButtoN b) dlfp->feature_requests.remove_subfeatures = GetStatus (dlfp->remove_subfeatures); + dlfp->feature_requests.suppress_locus_tags = + GetStatus (dlfp->suppress_locus_tags); + dlfp->feature_requests.misc_feat_parse_rule = GetValue (dlfp->misc_feat_parse_rule); @@ -8796,6 +8831,10 @@ static GrouP CreateDefLineFormFeatureOptionsGroup ( "Suppress transposon and insertion sequence subfeatures", NULL); SetStatus (dlfp->remove_subfeatures, FALSE); + dlfp->suppress_locus_tags = CheckBox (dlfp->featureOptsGrp, + "Suppress locus tags", NULL); + SetStatus (dlfp->suppress_locus_tags, FALSE); + g = NormalGroup (dlfp->featureOptsGrp, 3, 0, "Optional Features", programFont, NULL); @@ -8824,6 +8863,7 @@ static GrouP CreateDefLineFormFeatureOptionsGroup ( (HANDLE) dlfp->alternate_splice_flag, (HANDLE) dlfp->suppress_alt_splice_phrase, (HANDLE) dlfp->remove_subfeatures, + (HANDLE) dlfp->suppress_locus_tags, (HANDLE) g, (HANDLE) r, NULL); diff --git a/sequin/sequin2.c b/sequin/sequin2.c index c7302c3b..16d97958 100644 --- a/sequin/sequin2.c +++ b/sequin/sequin2.c @@ -29,7 +29,7 @@ * * Version Creation Date: 1/22/95 * -* $Revision: 6.168 $ +* $Revision: 6.172 $ * * File Description: * @@ -1095,7 +1095,7 @@ End Gap: When some of the sequences in an alignment are shorter \ or longer than others, end gap characters are added to the end \ of the sequence to maintain the correct spacing. These will \ not appear in your sequence file.\n\ -Missing: These characters are used to represent \ +Ambiguous/Unknown: These characters are used to represent \ indeterminate/ambiguous nucleotides. These will appear in your \ sequence file as 'n'.\n\ Match: These characters are used to indicate positions where \ @@ -1187,6 +1187,7 @@ static Boolean ImportPhylipDialog (DialoG d, CharPtr filename) CountTitlesWithoutOrganisms (sep); } else { + SendHelpScrollMessage (helpForm, "Organism and Sequences Form", "Nucleotide Page"); SetPhylipDocInstructions (ppp); } } else { @@ -1259,7 +1260,7 @@ static DialoG CreatePhylipDialog (GrouP h, CharPtr title, CharPtr text, a = NormalGroup (m, 4, 0, "Sequence Characters", systemFont, NULL); StaticPrompt (a, "Beginning Gap", 0, dialogTextHeight, systemFont, 'c'); ppp->beginning_gap = DialogText (a, "-.Nn?", 5, NULL); - StaticPrompt (a, "Missing", 0, dialogTextHeight, systemFont, 'c'); + StaticPrompt (a, "Ambiguous/Unknown", 0, dialogTextHeight, systemFont, 'c'); ppp->missing = DialogText (a, "?Nn", 5, NULL); StaticPrompt (a, "Middle Gap", 0, dialogTextHeight, systemFont, 'c'); ppp->middle_gap = DialogText (a, "-.", 5, NULL); @@ -7848,6 +7849,8 @@ extern void SqnNewAlign (BioseqPtr bsp1, BioseqPtr bsp2, SeqAlignPtr PNTR salp) } +/* This section of code is for the Remove Sequences From Alignments function. */ + typedef struct alignmentsequencelist { SeqIdPtr sip; Char descr[255]; @@ -7860,12 +7863,70 @@ typedef struct removeseqfromaligndata { SeqEntryPtr sep; } RemoveSeqFromAlignData, PNTR RemoveSeqFromAlignPtr; -static void RemoveOneSequenceFromAlignment (SeqIdPtr sip, SeqAlignPtr salp) +/* This function will remove DenDiag and pairwise alignments if they contain + * the sequence identified by sip, otherwise it will remove the sequence from + * the alignment. + */ +static SeqAlignPtr RemoveOneSequenceFromAlignment (SeqIdPtr sip, SeqAlignPtr salphead) { - if (FindSeqIdinSeqAlign (salp, sip)) { - SeqAlignIDCache (salp, sip); + Uint4 seqid_order; + SeqIdPtr tmpsip; + SeqAlignPtr salp, salp_next, prev_salp, remove_salp, last_remove; + + if (!FindSeqIdinSeqAlign (salphead, sip)) return; + + salp = salphead; + prev_salp = NULL; + remove_salp = NULL; + last_remove = NULL; + while (salp != NULL) + { + salp_next = salp->next; + tmpsip = SeqIdPtrFromSeqAlign (salp); + seqid_order = SeqIdOrderInBioseqIdList(sip, tmpsip); + if (seqid_order == 0) + { + /* do nothing for this subalignment */ + prev_salp = salp; + } + else if (salp->dim == 2 || salphead->segtype ==1) + { + /* This is for a pairwise alignment or a DENDIAG alignment */ + if (prev_salp == NULL) + { + salphead = salp->next; + } + else + { + prev_salp->next = salp->next; + } + /* save the alignments that we want to free in a list and get rid of them + * at the end - freeing them beforehand causes problems with listing the + * IDs in the alignment. + */ + salp->next = NULL; + if (remove_salp == NULL) + { + remove_salp = salp; + } + else + { + last_remove->next = salp; + } + last_remove = salp; + } + else + { + SeqAlignBioseqDeleteById (salphead, sip); + prev_salp = salp; + } + salp = salp_next; } + /* Now we can free the alignment */ + SeqAlignFree (remove_salp); + return salphead; } + static void RemoveSequenceFromAlignmentsCallback (SeqAnnotPtr sap, Pointer userdata) { SeqAlignPtr salp; @@ -7875,19 +7936,94 @@ static void RemoveSequenceFromAlignmentsCallback (SeqAnnotPtr sap, Pointer userd salp = (SeqAlignPtr) sap->data; if (salp == NULL) return; sip = (SeqIdPtr) userdata; - RemoveOneSequenceFromAlignment (sip, salp); + sap->data = RemoveOneSequenceFromAlignment (sip, salp); + /* if we've deleted all of the alignments, get rid of the annotation as well */ + if (sap->data == NULL) + { + sap->idx.deleteme = TRUE; + } +} + +typedef struct checkforremovesequencefromalignments +{ + Boolean found_problem; + SeqIdPtr sip; +} CheckForRemoveSequenceFromAlignmentsData, PNTR CheckForRemoveSequenceFromAlignmentsPtr; + +/* This is the callback function for looking for pairwise alignments. +/* If we delete the first sequence in a pairwise alignment, we end up deleting + * the entire alignment because that sequence is paired with every other sequence. + */ +static void CheckForRemoveSequenceFromAlignmentsProblemsCallback (SeqAnnotPtr sap, Pointer userdata) +{ + CheckForRemoveSequenceFromAlignmentsPtr p; + SeqAlignPtr salphead, salp; + Uint4 seqid_order; + SeqIdPtr tmpsip; + + if (sap == NULL || sap->type != 2 + || (p = (CheckForRemoveSequenceFromAlignmentsPtr)userdata) == NULL + || p->found_problem) + { + return; + } + salphead = (SeqAlignPtr) sap->data; + if (salphead == NULL) return; + + if (!FindSeqIdinSeqAlign (salphead, p->sip)) + { + return; + } + for (salp = salphead; salp != NULL; salp = salp->next) + { + tmpsip = SeqIdPtrFromSeqAlign (salp); + seqid_order = SeqIdOrderInBioseqIdList(p->sip, tmpsip); + if (seqid_order == 0) + { + continue; + } + else if (seqid_order == 1 && salp->dim == 2) + { + p->found_problem = TRUE; + } + } } static void DoRemoveSequencesFromAlignment (ButtoN b) { RemoveSeqFromAlignPtr rp; + WindoW w; ValNodePtr vnp; Int2 val; AlignmentSequenceListPtr aslp; - + CheckForRemoveSequenceFromAlignmentsData data; + if (b == NULL) return; rp = (RemoveSeqFromAlignPtr) GetObjectExtra (b); if (rp == NULL) return; + + w = (WindoW) rp->form; + Hide (w); + /* first, check for pairwise alignments */ + val = 1; + for (vnp = rp->sequence_list; vnp != NULL; vnp = vnp->next) { + aslp = vnp->data.ptrvalue; + if (aslp == NULL) continue; + if (GetItemStatus (rp->sequence_list_ctrl, val)) { + data.sip = aslp->sip; + data.found_problem = FALSE; + VisitAnnotsInSep (rp->sep, (Pointer) &data, CheckForRemoveSequenceFromAlignmentsProblemsCallback); + if (data.found_problem) + { + Message (MSG_ERROR, "One of the selected sequences is the first in a pairwise alignment." + " You must convert the alignment to a multiple alignment before trying to remove this sequence."); + Remove (rp->form); + return; + } + } + val++; + } + val = 1; for (vnp = rp->sequence_list; vnp != NULL; vnp = vnp->next) { aslp = vnp->data.ptrvalue; @@ -7897,11 +8033,39 @@ static void DoRemoveSequencesFromAlignment (ButtoN b) } val++; } + + ValNodeFree (rp->sequence_list); + rp->sequence_list = NULL; + DeleteMarkedObjects (rp->input_entityID, 0, NULL); ObjMgrSetDirtyFlag (rp->input_entityID, TRUE); ObjMgrSendMsg (OM_MSG_UPDATE, rp->input_entityID, 0, 0); Remove (rp->form); } +/* This function is used so that a sequence ID will only appear once in the list, + * even if it appears in more than one alignment or subalignment. + */ +static Boolean IsIDAlreadyInList (SeqIdPtr sip, ValNodePtr list) +{ + ValNodePtr vnp; + AlignmentSequenceListPtr aslp; + + if (sip == NULL) return FALSE; + + for (vnp = list; vnp != NULL; vnp = vnp->next) + { + aslp = (AlignmentSequenceListPtr) vnp->data.ptrvalue; + if (aslp != NULL && SeqIdComp (aslp->sip, sip) == SIC_YES) + { + return TRUE; + } + } + return FALSE; +} + +/* This function creates the list of sequence IDs and descriptions to use in + * the Remove Sequences From Alignments dialog. + */ static void ListSequencesInAlignmentsCallback (SeqAnnotPtr sap, Pointer userdata) { SeqAlignPtr salp; @@ -7914,11 +8078,13 @@ static void ListSequencesInAlignmentsCallback (SeqAnnotPtr sap, Pointer userdata if (sap == NULL || sap->type != 2 || userdata == NULL) return; salp = (SeqAlignPtr) sap->data; - if (salp == NULL) return; - list = (ValNodePtr PNTR)userdata; - sip_list = SeqAlignIDList (salp); - if (sip_list == NULL) return; - for (sip = sip_list; sip != NULL; sip = sip->next) { + while (salp != NULL) + { + list = (ValNodePtr PNTR)userdata; + sip_list = SeqAlignIDList (salp); + if (sip_list == NULL) return; + for (sip = sip_list; sip != NULL; sip = sip->next) { + if (IsIDAlreadyInList (sip, *list)) continue; aslp = (AlignmentSequenceListPtr) MemNew (sizeof (AlignmentSequenceListData)); if (aslp == NULL) return; aslp->sip = sip; @@ -7933,7 +8099,7 @@ static void ListSequencesInAlignmentsCallback (SeqAnnotPtr sap, Pointer userdata offset ++; } SeqIdWrite (bsp_sip, aslp->descr + offset, PRINTID_TEXTID_ACCESSION, 254 - offset); - offset += StringLen (aslp->descr); + offset = StringLen (aslp->descr); } } else { SeqIdWrite (sip, aslp->descr, PRINTID_TEXTID_ACCESSION, 254); @@ -7942,7 +8108,9 @@ static void ListSequencesInAlignmentsCallback (SeqAnnotPtr sap, Pointer userdata vnp->data.ptrvalue = aslp; if (*list == NULL) { *list = vnp; - } + } + } + salp = salp->next; } } @@ -7992,6 +8160,7 @@ extern void RemoveSequencesFromAlignment (IteM i) rp = (RemoveSeqFromAlignPtr) MemNew (sizeof (RemoveSeqFromAlignData)); if (rp == NULL) return; + rp->input_entityID = bfp->input_entityID; rp->sep = GetTopSeqEntryForEntityID (bfp->input_entityID); if (rp->sep == NULL) { MemFree (rp); @@ -8035,4 +8204,5 @@ extern void RemoveSequencesFromAlignment (IteM i) Update (); } +/* End of Remove Sequences From Alignments function code. */ diff --git a/sequin/sequin3.c b/sequin/sequin3.c index c574dd52..912199c2 100644 --- a/sequin/sequin3.c +++ b/sequin/sequin3.c @@ -29,7 +29,7 @@ * * Version Creation Date: 1/22/95 * -* $Revision: 6.377 $ +* $Revision: 6.384 $ * * File Description: * @@ -1074,6 +1074,57 @@ static void RemoveAllGeneXrefs (IteM i) ObjMgrSendMsg (OM_MSG_UPDATE, bfp->input_entityID, 0, 0); } +static void DoRefreshGeneXrefs (SeqFeatPtr sfp, Pointer userdata) + +{ + SeqFeatXrefPtr curr; + GeneRefPtr grp, grpfeat; + SeqFeatPtr gene; + SeqMgrFeatContext fcontext; + BioseqPtr bsp; + + if (sfp == NULL) return; + + for (curr = sfp->xref; curr != NULL; curr = curr->next) + { + if (curr->data.choice == SEQFEAT_GENE) { + grp = (GeneRefPtr) curr->data.value.ptrvalue; + if (grp != NULL) + { + bsp = BioseqFindFromSeqLoc (sfp->location); + gene = SeqMgrGetFeatureByLabel (bsp, grp->locus, SEQFEAT_GENE, 0, &fcontext); + if (gene != NULL && gene->data.choice == SEQFEAT_GENE) { + grpfeat = (GeneRefPtr) gene->data.value.ptrvalue; + if (grpfeat != NULL) { + GeneRefFree (grp); + grp = GeneRefDup (grpfeat); + curr->data.value.ptrvalue = grp; + } + } + } + } + } +} + +static void RefreshGeneXRefs (IteM i) + +{ + BaseFormPtr bfp; + SeqEntryPtr sep; + +#ifdef WIN_MAC + bfp = currentFormDataPtr; +#else + bfp = GetObjectExtra (i); +#endif + if (bfp == NULL) return; + sep = GetTopSeqEntryForEntityID (bfp->input_entityID); + if (sep == NULL) return; + VisitFeaturesInSep (sep, NULL, DoRefreshGeneXrefs); + ObjMgrSetDirtyFlag (bfp->input_entityID, TRUE); + ObjMgrSendMsg (OM_MSG_UPDATE, bfp->input_entityID, 0, 0); +} + static ValNodePtr RemoveDbxrefList (ValNodePtr vnp) { @@ -1634,7 +1685,47 @@ static void RawSeqToDeltaSeq (IteM i) if (bfp == NULL) return; sep = GetTopSeqEntryForEntityID (bfp->input_entityID); if (sep == NULL) return; - VisitBioseqsInSep (sep, (Pointer) bfp, ConvertNsToGaps); + VisitBioseqsInSep (sep, NULL, ConvertNsToGaps); + ObjMgrSetDirtyFlag (bfp->input_entityID, TRUE); + ObjMgrSendMsg (OM_MSG_UPDATE, bfp->input_entityID, 0, 0); +} + +static void RawSeqToDeltaSeqUnknownLengthGaps (IteM i) + +{ + BaseFormPtr bfp; + SeqEntryPtr sep; + Int4 unknown_gap_size = 100; + +#ifdef WIN_MAC + bfp = currentFormDataPtr; +#else + bfp = GetObjectExtra (i); +#endif + if (bfp == NULL) return; + sep = GetTopSeqEntryForEntityID (bfp->input_entityID); + if (sep == NULL) return; + VisitBioseqsInSep (sep, &unknown_gap_size, ConvertNsToGaps); + ObjMgrSetDirtyFlag (bfp->input_entityID, TRUE); + ObjMgrSendMsg (OM_MSG_UPDATE, bfp->input_entityID, 0, 0); +} + +static void RawSeqToDeltaSeqUnknown100LengthGaps (IteM i) + +{ + BaseFormPtr bfp; + SeqEntryPtr sep; + Int4 unknown_gap_size = -1; + +#ifdef WIN_MAC + bfp = currentFormDataPtr; +#else + bfp = GetObjectExtra (i); +#endif + if (bfp == NULL) return; + sep = GetTopSeqEntryForEntityID (bfp->input_entityID); + if (sep == NULL) return; + VisitBioseqsInSep (sep, &unknown_gap_size, ConvertNsToGaps); ObjMgrSetDirtyFlag (bfp->input_entityID, TRUE); ObjMgrSendMsg (OM_MSG_UPDATE, bfp->input_entityID, 0, 0); } @@ -10314,6 +10405,9 @@ extern void SetupSpecialMenu (MenU m, BaseFormPtr bfp) i = CommandItem (s, "Genus-Species Fixup", GenSpecTaxonFixup); SetObjectExtra (i, bfp, NULL); SeparatorItem (s); + i = CommandItem (s, "Country Fixup", CountryLookup); + SetObjectExtra (i, bfp, NULL); + SeparatorItem (s); i = CommandItem (s, "Set Source Focus", SetSourceFocus); SetObjectExtra (i, bfp, NULL); i = CommandItem (s, "Clear Source Focus", ClearSourceFocus); @@ -10371,6 +10465,8 @@ extern void SetupSpecialMenu (MenU m, BaseFormPtr bfp) SetObjectExtra (i, bfp, NULL); i = CommandItem (s, "Remove Proteins", RemoveProteins); SetObjectExtra (i, bfp, NULL); + i = CommandItem (s, "Remove Proteins and Renormalize Nuc-Prot Sets", RemoveProteinsAndRenormalize); + SetObjectExtra (i, bfp, NULL); SeparatorItem (s); i = CommandItem (s, "Remove Source Qual", RemoveSource); SetObjectExtra (i, bfp, NULL); @@ -10566,6 +10662,9 @@ extern void SetupSpecialMenu (MenU m, BaseFormPtr bfp) i = CommandItem (s, "Resolve Colliding Local IDs", ResolveExistingLocalIDs); SetObjectExtra (i, bfp, NULL); } + SeparatorItem (s); + i = CommandItem (s, "Refresh Gene Xrefs", RefreshGeneXRefs); + SetObjectExtra (i, bfp, NULL); s = SubMenu (m, "Edit/ E"); i = CommandItem (s, "Edit Qualifiers", EditQualifier); @@ -10624,7 +10723,7 @@ extern void SetupSpecialMenu (MenU m, BaseFormPtr bfp) s = SubMenu (m, "Transform/ T"); i = CommandItem (s, "Correct CDS Genetic Codes", CorrectCDSGenCodes); SetObjectExtra (i, bfp, NULL); - i = CommandItem (s, "Correct CDS Propagate Crud", FixCdsAfterPropagate); + i = CommandItem (s, "Cleanup CDS partials after propagation", FixCdsAfterPropagate); SetObjectExtra (i, bfp, NULL); SeparatorItem (s); i = CommandItem (s, "Trim Ns from Bioseqs", TrimNsFromNucs); @@ -10799,7 +10898,12 @@ extern void SetupSpecialMenu (MenU m, BaseFormPtr bfp) SetObjectExtra (i, bfp, NULL); SeparatorItem (s); } - i = CommandItem (s, "Raw Sequence with Ns to Delta Sequence", RawSeqToDeltaSeq); + x = SubMenu (s, "Raw Sequence with Ns to Delta Sequence"); + i = CommandItem (x, "All Known Length Gaps", RawSeqToDeltaSeq); + SetObjectExtra (i, bfp, NULL); + i = CommandItem (x, "Unknown Length Gaps for 100 Ns", RawSeqToDeltaSeqUnknownLengthGaps); + SetObjectExtra (i, bfp, NULL); + i = CommandItem (x, "Unknown Length 100 Gaps for All Ns", RawSeqToDeltaSeqUnknown100LengthGaps); SetObjectExtra (i, bfp, NULL); s = SubMenu (m, "Misc/ M"); diff --git a/sequin/sequin4.c b/sequin/sequin4.c index d21e8e4c..457c5add 100644 --- a/sequin/sequin4.c +++ b/sequin/sequin4.c @@ -29,7 +29,7 @@ * * Version Creation Date: 6/28/96 * -* $Revision: 6.210 $ +* $Revision: 6.214 $ * * File Description: * @@ -74,6 +74,7 @@ #include <aliparse.h> #include <spidey.h> #include <ent2api.h> +#include <valid.h> #define REGISTER_UPDATESEGSET ObjMgrProcLoadEx (OMPROC_FILTER,"Update Segmented Set","UpdateSegSet",0,0,0,0,NULL,UpdateSegSet,PROC_PRIORITY_DEFAULT, "Indexer") @@ -97,6 +98,10 @@ #define REGISTER_SEGREGATE_BY_TEXT ObjMgrProcLoadEx (OMPROC_FILTER, "Segregate By Text","SegregateByText",0,0,0,0,NULL,CreateSegregateByTextWindow,PROC_PRIORITY_DEFAULT, "Indexer") +#define REGISTER_SEGREGATE_BY_FEATURE ObjMgrProcLoadEx (OMPROC_FILTER, "Segregate By Feature","SegregateByFeature",0,0,0,0,NULL,CreateSegregateByFeatureWindow,PROC_PRIORITY_DEFAULT, "Indexer") + +#define REGISTER_SEGREGATE_BY_DESCRIPTOR ObjMgrProcLoadEx (OMPROC_FILTER, "Segregate By Descriptor","SegregateByDescriptor",0,0,0,0,NULL,CreateSegregateByDescriptorWindow,PROC_PRIORITY_DEFAULT, "Indexer") + #define REGISTER_CONVERTSEQALIGN ObjMgrProcLoadEx (OMPROC_FILTER,"Convert SeqAlign","ConvertSeqAlign",0,0,0,0,NULL,ConvertToTrueMultipleAlignment,PROC_PRIORITY_DEFAULT, "Alignment") #define REGISTER_MAKESEQALIGN ObjMgrProcLoadEx (OMPROC_FILTER,"Make SeqAlign","CreateSeqAlign",0,0,0,0,NULL,GenerateSeqAlignFromSeqEntry,PROC_PRIORITY_DEFAULT, "Alignment") @@ -6509,6 +6514,8 @@ extern void SetupSequinFilters (void) if (indexerVersion) { REGISTER_DELETE_BY_TEXT; + REGISTER_SEGREGATE_BY_FEATURE; + REGISTER_SEGREGATE_BY_DESCRIPTOR; REGISTER_SEGREGATE_BY_TEXT; REGISTER_FIND_NON_ACGT; REGISTER_BSP_INDEX; @@ -8148,3 +8155,92 @@ extern void ConsolidateOrganismNotes (IteM i) ObjMgrSendMsg (OM_MSG_UPDATE, bfp->input_entityID, 0, 0); } +static void CountryLookupProc (BioSourcePtr biop, Pointer userdata) +{ + CharPtr PNTR list; + CharPtr PNTR ptr; + SubSourcePtr ssp; + CharPtr cp, before, newname; + Int4 len_cntry, len_qual, len_name; + + if (biop == NULL || (list = (CharPtr PNTR)userdata) == NULL) + { + return; + } + + for (ssp = biop->subtype; ssp != NULL; ssp = ssp->next) + { + if (ssp->subtype != SUBSRC_country || ssp->name == NULL) continue; + for (ptr = list; ptr != NULL && *ptr != NULL; ptr++) + { + len_cntry = StringLen (*ptr); + cp = StringStr (ssp->name, *ptr); + if (cp != NULL && !isalpha (cp [len_cntry])) + { + len_qual = StringLen (ssp->name); + if (cp == ssp->name) + { + if (len_cntry == len_qual || ssp->name [len_cntry] == ':') + { + /* exact match, don't need to do anything */ + return; + } + ssp->name [len_cntry] = ':'; + return; + } + else + { + if (isalpha (*(cp - 1))) + { + /* not really a match, part of another word */ + continue; + } + else + { + newname = (CharPtr) MemNew (len_qual + 3); + *(cp - 1) = 0; + before = StringSave (ssp->name); + StringNCpy (newname, *ptr, len_cntry); + newname [len_cntry] = ':'; + newname [len_cntry + 1] = ' '; + StringNCpy (newname + len_cntry + 2, before, StringLen (before)); + StringCpy (newname + len_cntry + 2 + StringLen (before), cp + len_cntry); + len_name = StringLen (newname); + while (isspace (newname[len_name - 1]) || ispunct (newname [len_name - 1])) + { + newname [len_name - 1] = 0; + len_name --; + } + before = MemFree (before); + MemFree (ssp->name); + ssp->name = newname; + } + } + } + } + } +} + +extern void CountryLookup (IteM i) +{ + BaseFormPtr bfp; + SeqEntryPtr sep; + CharPtr PNTR list; + + +#ifdef WIN_MAC + bfp = currentFormDataPtr; +#else + bfp = GetObjectExtra (i); +#endif + if (bfp == NULL) return; + sep = GetTopSeqEntryForEntityID (bfp->input_entityID); + if (sep == NULL) return; + + list = GetValidCountryList (); + if (list == NULL) return; + VisitBioSourcesInSep (sep, list, CountryLookupProc); + ObjMgrSetDirtyFlag (bfp->input_entityID, TRUE); + ObjMgrSendMsg (OM_MSG_UPDATE, bfp->input_entityID, 0, 0); +} + diff --git a/sequin/sequin5.c b/sequin/sequin5.c index db716219..2e45fc3c 100644 --- a/sequin/sequin5.c +++ b/sequin/sequin5.c @@ -29,7 +29,7 @@ * * Version Creation Date: 8/26/97 * -* $Revision: 6.169 $ +* $Revision: 6.171 $ * * File Description: * @@ -3266,7 +3266,7 @@ static void BlastCDD (BioseqPtr bsp, Pointer userdata) /* do blast search */ - salp = BlastBioseqNet (bl3hp, bsp, "blastp", "oasis_sap", options, + salp = BlastBioseqNet (bl3hp, bsp, "blastp", "cdd", options, NULL, &error_returns, NULL); /* BlastErrorPrintExtra (error_returns, TRUE, stdout); */ @@ -3314,7 +3314,7 @@ extern void SimpleCDDBlastProc (IteM i) /* blast fetch enable needed to retrieve by general SeqID */ - BlastNetBioseqFetchEnable (bl3hp, "oasis_sap", FALSE, TRUE); + BlastNetBioseqFetchEnable (bl3hp, "cdd", FALSE, TRUE); bf.bl3hp = bl3hp; bf.options = options; @@ -3330,7 +3330,7 @@ extern void SimpleCDDBlastProc (IteM i) BlastFini (bl3hp); options = BLASTOptionDelete (options); - BlastNetBioseqFetchDisable (bl3hp, "oasis_sap", FALSE); + BlastNetBioseqFetchDisable (bl3hp, "cdd", FALSE); ObjMgrSetDirtyFlag (bfp->input_entityID, TRUE); ObjMgrSendMsg (OM_MSG_UPDATE, bfp->input_entityID, 0, 0); @@ -9301,14 +9301,14 @@ static void ProcessBioSourceFunc (BioSourcePtr biop, SourceFormPtr sfp, Boolean if (ssp != NULL) { foundit = StringISearch (ssp->name, sfp->findStr); while (foundit != NULL) { - offset = foundit - ssp->name; + offset = foundit - ssp->name + 1; EditSourceString (&(ssp->name), sfp, foundit); foundit = StringISearch (ssp->name + offset, sfp->findStr); } } else if (mod != NULL) { foundit = StringISearch (mod->subname, sfp->findStr); while (foundit != NULL) { - offset = foundit - mod->subname; + offset = foundit - mod->subname + 1; EditSourceString (&(mod->subname), sfp, foundit); foundit = StringISearch (mod->subname + offset, sfp->findStr); } diff --git a/sequin/sequin6.c b/sequin/sequin6.c index 4a0f4b4e..33c8c607 100644 --- a/sequin/sequin6.c +++ b/sequin/sequin6.c @@ -29,7 +29,7 @@ * * Version Creation Date: 11/12/97 * -* $Revision: 6.162 $ +* $Revision: 6.164 $ * * File Description: * @@ -75,6 +75,7 @@ END_ENUM_ALIST #define IMPORT_FEAT_TYPE 7 #define DEFLINE_TYPE 8 #define FEATURE_NOTE_TYPE 9 +#define PUBLICATION_TYPE 10 #define NUMBER_OF_TYPES 7 #define NUMBER_OF_TYPES_WITH_DEFLINE 8 @@ -91,7 +92,9 @@ static ENUM_ALIST(target_field_alist) {"DefLine", DEFLINE_TYPE}, END_ENUM_ALIST -#define NUMBER_OF_SEGREGATE_TYPES 9 +#define NUMBER_OF_SEGREGATE_TYPES 10 +#define NUMBER_OF_PARSE_TYPES 9 + static ENUM_ALIST(segregate_target_field_alist) {" ", 0}, {"Gene", GENE_TYPE}, @@ -103,8 +106,34 @@ static ENUM_ALIST(segregate_target_field_alist) {"Import Feature", IMPORT_FEAT_TYPE}, {"DefLine", DEFLINE_TYPE}, {"Feature Note", FEATURE_NOTE_TYPE}, + {"Publication", PUBLICATION_TYPE}, +END_ENUM_ALIST + +#define NUMBER_OF_PARSE_TYPES 9 + +static ENUM_ALIST(parse_target_field_alist) + {" ", 0}, + {"Gene", GENE_TYPE}, + {"CDS", CDS_TYPE}, + {"Prot", PROT_TYPE}, + {"RNA", RNA_TYPE}, + {"BioSource", BIOSOURCE_TYPE}, + {"OrgMod and SubSource", ORGMOD_SUBSOURCE_TYPE}, + {"Import Feature", IMPORT_FEAT_TYPE}, + {"DefLine", DEFLINE_TYPE}, + {"Feature Note", FEATURE_NOTE_TYPE}, END_ENUM_ALIST +#define PUBLICATION_PUBLISHED_FIELD 1 +#define PUBLICATION_INPRESS_FIELD 2 +#define PUBLICATION_UNPUB_FIELD 3 + +static ENUM_ALIST (publication_field_alist) + {" ", 0}, + {"Published", PUBLICATION_PUBLISHED_FIELD}, + {"In Press", PUBLICATION_INPRESS_FIELD}, + {"Unpublished", PUBLICATION_UNPUB_FIELD}, +END_ENUM_ALIST #define EXT_NUMBER_OF_TYPES 7 @@ -350,7 +379,7 @@ static ENUM_ALIST (subsource_subtype_and_note_alist) END_ENUM_ALIST -#define NUM_SUBTARGET_POPUPS 10 +#define NUM_SUBTARGET_POPUPS 11 static GbFeatName ParseQualifierList[] = { {"allele", Class_text}, {"anticodon", Class_pos_aa}, @@ -1618,6 +1647,246 @@ static Boolean DoFeaturesContainText_Callback return found; } +typedef struct objstringdata +{ + CharPtr match; + Boolean found; +} ObjStringData, PNTR ObjStringPtr; + +static void LIBCALLBACK AsnWriteRemoveForDCallBack (AsnExpOptStructPtr pAEOS) + +{ + CharPtr pchFind; + CharPtr pchSource; + ObjStringPtr osp; + + osp = (ObjStringPtr) pAEOS->data; + if (ISA_STRINGTYPE (AsnFindBaseIsa (pAEOS->atp))) { + pchSource = (CharPtr) pAEOS->dvp->ptrvalue; + pchFind = osp->match; + if (StringSearch (pchSource, pchFind) != NULL) { + osp->found = TRUE; + } + } +} + +static Boolean ObjectHasSubstring (ObjMgrTypePtr omtp, AsnIoPtr aip, Pointer ptr, ObjStringPtr osp) + +{ + osp->found = FALSE; + (omtp->asnwrite) (ptr, aip, NULL); + return osp->found; +} + +static Uint1 GetPubStatus (PubdescPtr pdp) +{ + ValNodePtr vnp; + CitGenPtr cgp; + CitArtPtr cap; + CitJourPtr cjp; + CitBookPtr cbp; + CitSubPtr csp; + MedlineEntryPtr mlp; + ImprintPtr ip = NULL; + Uint1 status = 255; /* 255 is currently not a valid status */ + + if (pdp == NULL) return status; + + for (vnp = pdp->pub; vnp != NULL && ip == NULL; vnp = vnp->next) + { + switch (vnp->choice) + { + case PUB_Gen: + cgp = (CitGenPtr) vnp->data.ptrvalue; + if (cgp != NULL && StringICmp (cgp->cit, "Unpublished")) + { + return PUB_STATUS_UNPUBLISHED; + } + break; + case PUB_Article: + case PUB_Medline: + if (vnp->choice == PUB_Article) + { + cap = (CitArtPtr) vnp->data.ptrvalue; + } + else + { + cap = NULL; + mlp = (MedlineEntryPtr) vnp->data.ptrvalue; + if (mlp != NULL) + { + cap = mlp->cit; + } + } + if (cap != NULL && cap->from == 1) + { + cjp = (CitJourPtr) cap->fromptr; + if (cjp != NULL) + { + ip = cjp->imp; + } + } + break; + case PUB_Man: + case PUB_Book: + cbp = (CitBookPtr) vnp->data.ptrvalue; + if (cbp != NULL) + { + ip = cbp->imp; + } + break; + case PUB_Sub: + csp = (CitSubPtr) vnp->data.ptrvalue; + if (csp != NULL) + { + ip = csp->imp; + } + break; + } + } + if (ip != NULL) + { + status = ip->prepub; + } + return status; +} + +static Boolean DoesPubStatusMatch (PubdescPtr pdp, ConvertFormPtr cfp) +{ + Uint1 pub_status; + + if (pdp == NULL || cfp == NULL) return FALSE; + if (cfp->subtype == 0) return TRUE; + + pub_status = GetPubStatus (pdp); + + if (cfp->subtype == PUBLICATION_PUBLISHED_FIELD + && pub_status == PUB_STATUS_PUBLISHED) + { + return TRUE; + } + else if (cfp->subtype == PUBLICATION_INPRESS_FIELD + && pub_status == PUB_STATUS_IN_PRESS) + { + return TRUE; + } + else if (cfp->subtype == PUBLICATION_UNPUB_FIELD + && pub_status == PUB_STATUS_UNPUBLISHED) + { + return TRUE; + } + else + { + return FALSE; + } +} + +static Boolean DoesSequenceHavePubWithText (BioseqPtr bsp, ConvertFormPtr cfp) +{ + AsnExpOptPtr aeop; + AsnIoPtr aip; + ObjStringData osd; + SeqMgrDescContext dcontext; + SeqDescrPtr sdp; + SeqMgrFeatContext fcontext; + SeqFeatPtr sfp; + Boolean rval = FALSE; + ObjMgrPtr omp; + ObjMgrTypePtr omtp; + PubdescPtr pdp; + + if (bsp == NULL || cfp == NULL) return FALSE; + omp = ObjMgrGet (); + if (omp == NULL) return FALSE; + omtp = ObjMgrTypeFind (omp, OBJ_SEQDESC, NULL, NULL); + if (omtp == NULL) return FALSE; + + aip = AsnIoNullOpen (); + aeop = AsnExpOptNew (aip, NULL, NULL, AsnWriteRemoveForDCallBack); + if (aeop != NULL) { + aeop->user_data = (Pointer) &osd; + } + osd.match = cfp->deleteStr; + + /* look for publication descriptors */ + sdp = SeqMgrGetNextDescriptor (bsp, NULL, Seq_descr_pub, &dcontext); + while (sdp != NULL && !rval) { + if (ObjectHasSubstring (omtp, aip, (Pointer) sdp, &osd)) { + pdp = (PubdescPtr) sdp->data.ptrvalue; + if (DoesPubStatusMatch (pdp, cfp)) + { + rval = TRUE; + } + } + sdp = SeqMgrGetNextDescriptor (bsp, sdp, Seq_descr_pub, &dcontext); + } + + if (!rval) + { + omtp = ObjMgrTypeFind (omp, OBJ_SEQFEAT, NULL, NULL); + if (omtp != NULL) + { + /* look for publication features */ + sfp = SeqMgrGetNextFeature (bsp, NULL, 0, FEATDEF_PUB, &fcontext); + while (sfp != NULL && !rval) + { + if (ObjectHasSubstring (omtp, aip, (Pointer) sfp, &osd)) + { + pdp = (PubdescPtr) sfp->data.value.ptrvalue; + if (DoesPubStatusMatch (pdp, cfp)) + { + rval = TRUE; + } + } + sfp = SeqMgrGetNextFeature (bsp, sfp, 0, FEATDEF_PUB, &fcontext); + } + } + } + + AsnIoClose (aip); + return rval; +} + +static Boolean DoesNucProtSetHavePubWithText (BioseqSetPtr bssp, ConvertFormPtr cfp) +{ + AsnExpOptPtr aeop; + AsnIoPtr aip; + ObjStringData osd; + SeqDescrPtr sdp; + Boolean rval = FALSE; + ObjMgrPtr omp; + ObjMgrTypePtr omtp; + PubdescPtr pdp; + + if (bssp == NULL || cfp == NULL) return FALSE; + omp = ObjMgrGet (); + if (omp == NULL) return FALSE; + omtp = ObjMgrTypeFind (omp, OBJ_SEQDESC, NULL, NULL); + if (omtp == NULL) return FALSE; + + aip = AsnIoNullOpen (); + aeop = AsnExpOptNew (aip, NULL, NULL, AsnWriteRemoveForDCallBack); + if (aeop != NULL) { + aeop->user_data = (Pointer) &osd; + } + osd.match = cfp->deleteStr; + + /* look for publication descriptors */ + sdp = bssp->descr; + while (sdp != NULL && !rval) { + if (sdp->choice == Seq_descr_pub && ObjectHasSubstring (omtp, aip, (Pointer) sdp, &osd)) { + pdp = (PubdescPtr) sdp->data.ptrvalue; + if (DoesPubStatusMatch (pdp, cfp)) + { + rval = TRUE; + } + } + sdp = sdp->next; + } + + AsnIoClose (aip); + return rval; +} static Boolean DoesSequenceContainText (BioseqPtr bsp, ConvertFormPtr cfp) { @@ -1659,6 +1928,9 @@ static Boolean DoesSequenceContainText (BioseqPtr bsp, ConvertFormPtr cfp) found = TRUE; } break; + case PUBLICATION_TYPE : + found = DoesSequenceHavePubWithText (bsp, cfp); + break; default: break; } @@ -1671,6 +1943,10 @@ static Boolean DoesNucProtSetContainText (BioseqSetPtr bssp, ConvertFormPtr cfp) BioseqPtr bsp; if (bssp == NULL) return FALSE; + if (cfp->type == PUBLICATION_TYPE && DoesNucProtSetHavePubWithText (bssp, cfp)) + { + return TRUE; + } for (sep = bssp->seq_set; sep != NULL; sep = sep->next) { if (IS_Bioseq (sep)) { bsp = (BioseqPtr) sep->data.ptrvalue; @@ -1879,8 +2155,13 @@ static void SegregateByText_Callback (ButtoN b) } else cfp->type = (Int2) val; - - if (cfp->type != DEFLINE_TYPE && cfp->type != FEATURE_NOTE_TYPE) { + + if (cfp->type == PUBLICATION_TYPE) + { + GetEnumPopup (cfp->subtarget [cfp->type], cfp->alists [cfp->type], &val); + cfp->subtype = (Int2) val; + } + else if (cfp->type != DEFLINE_TYPE && cfp->type != FEATURE_NOTE_TYPE) { GetEnumPopup (cfp->subtarget [cfp->type], cfp->alists [cfp->type], &val); if (0 == val) { Remove (cfp->form); @@ -1957,7 +2238,7 @@ static void SetSegregateAcceptButton (Handle a) SafeDisable (cfp->accept); return; } - } else if (val != DEFLINE_TYPE) { + } else if (val != DEFLINE_TYPE && val != PUBLICATION_TYPE) { cfp->type = (Int2) val; if (!GetEnumPopup (cfp->subtarget [cfp->type], @@ -2125,6 +2406,7 @@ extern Int2 LIBCALLBACK CreateSegregateByTextWindow (Pointer data) cfp->alists [BIOSOURCE_TYPE] = orgref_field_alist; cfp->alists [ORGMOD_SUBSOURCE_TYPE] = subsource_and_orgmod_note_subtype_alist; cfp->alists [IMPORT_FEAT_TYPE] = impfeat_field_alist; + cfp->alists [PUBLICATION_TYPE] = publication_field_alist; cfp->feature_list = BuildFeatureValNodeList (TRUE, "All", 255, TRUE, FALSE); x = HiddenGroup (p, 0, 0, NULL); @@ -2177,6 +2459,468 @@ extern Int2 LIBCALLBACK CreateSegregateByTextWindow (Pointer data) return OM_MSG_RET_OK; } + +typedef struct segregatefeatdata { + FEATURE_FORM_BLOCK + + PopuP type_popup; + ValNodePtr type_list; + ButtoN accept; + + BioseqSetPtr target_set; + Uint2 segregate_type; + Boolean is_feat; +} SegregateFeatData, PNTR SegregateFeatPtr; + +static void CleanupSegregateFeatPage (GraphiC g, VoidPtr data) + +{ + SegregateFeatPtr sfp; + + sfp = (SegregateFeatPtr) data; + MemFree (sfp); + StdCleanupFormProc (g, data); +} + +static Boolean DoesSequenceContainFeatureType (BioseqPtr bsp, SegregateFeatPtr sfp) +{ + SeqMgrFeatContext context; + SeqFeatPtr feat; + + feat = NULL; + while ((feat = SeqMgrGetNextFeature (bsp, feat, 0, 0, &context)) != NULL) + { + if (feat->idx.subtype == sfp->segregate_type) + { + return TRUE; + } + } + return FALSE; +} + +static Boolean DoesNucProtSetContainFeatureType (BioseqSetPtr bssp, SegregateFeatPtr sfp) +{ + SeqEntryPtr sep; + BioseqPtr bsp; + + if (bssp == NULL) return FALSE; + for (sep = bssp->seq_set; sep != NULL; sep = sep->next) { + if (IS_Bioseq (sep)) { + bsp = (BioseqPtr) sep->data.ptrvalue; + if (DoesSequenceContainFeatureType (bsp, sfp)) { + return TRUE; + } + } + } + return FALSE; +} + +static Boolean DoesSequenceContainDescriptorType (BioseqPtr bsp, SegregateFeatPtr sfp) +{ + SeqMgrDescContext context; + SeqDescPtr desc; + + if((desc = SeqMgrGetNextDescriptor (bsp, NULL, sfp->segregate_type, &context)) != NULL) + { + return TRUE; + } + return FALSE; +} + +typedef struct checkdescdata { + Uint2 segregate_type; + Boolean found; +} CheckDescData, PNTR CheckDescPtr; + +static void DoesSetContainDescriptorType_Callback (SeqDescPtr sdp, Pointer userdata) +{ + CheckDescPtr p; + + if (sdp == NULL || userdata == NULL) return; + p = (CheckDescPtr) userdata; + if (p->found) return; + if (sdp->choice == p->segregate_type) p->found = TRUE; +} + +static Boolean DoesNucProtSetContainDescriptorType (BioseqSetPtr bssp, SegregateFeatPtr sfp) +{ + CheckDescData d; + + if (bssp == NULL) return FALSE; + d.found = FALSE; + d.segregate_type = sfp->segregate_type; + VisitDescriptorsInSet (bssp, &d, DoesSetContainDescriptorType_Callback); + return d.found; +} + +static Boolean WantToSegregateSequence (BioseqPtr bsp, SegregateFeatPtr sfp) +{ + if (bsp == NULL || sfp == NULL) return FALSE; + if (sfp->is_feat) + { + return DoesSequenceContainFeatureType (bsp, sfp); + } + else + { + return DoesSequenceContainDescriptorType (bsp, sfp); + } +} + +static Boolean WantToSegregateNucProtSet (BioseqSetPtr bssp, SegregateFeatPtr sfp) +{ + if (bssp == NULL || sfp == NULL) return FALSE; + if (sfp->is_feat) + { + return DoesNucProtSetContainFeatureType (bssp, sfp); + } + else + { + return DoesNucProtSetContainDescriptorType (bssp, sfp); + } +} + +/*=========================================================================*/ +/* */ +/* SegregateItemsByFeature () - Given a feature type, move bioseqs */ +/* containing those features to a new popset. */ +/* */ +/*=========================================================================*/ + +static void SegregateItemsByFeatureOrDescriptor +(SeqEntryPtr seqlist, + SegregateFeatPtr sfp, + BioseqSetPtr set1, + BioseqSetPtr set2) +{ + + BioseqPtr bsp; + BioseqSetPtr this_bssp; + SeqEntryPtr this_list; + SeqEntryPtr sep, next_sep; + SeqEntryPtr set1last, set2last; + + + if (sfp == NULL || set1 == NULL || set2 == NULL || seqlist == NULL) + return; + + set1last = set1->seq_set; + while (set1last != NULL && set1last->next != NULL) { + set1last = set1last->next; + } + set2last = set2->seq_set; + while (set2last != NULL && set2last->next != NULL) { + set2last = set2last->next; + } + + sep = seqlist; + while (sep != NULL) { + next_sep = sep->next; + if (IS_Bioseq_set (sep)) { + this_bssp = (BioseqSetPtr) sep->data.ptrvalue; + if (this_bssp->_class == BioseqseqSet_class_nuc_prot) { + if (WantToSegregateNucProtSet (this_bssp, sfp)) { + if (set2last == NULL) { + set2->seq_set = sep; + } else { + set2last->next = sep; + } + set2last = sep; + } else { + if (set1last == NULL) { + set1->seq_set = sep; + } else { + set1last->next = sep; + } + set1last = sep; + } + sep->next = NULL; + } else { + this_list = this_bssp->seq_set; + this_bssp->seq_set = NULL; + SegregateItemsByFeatureOrDescriptor (this_list, sfp, set1, set2); + } + } else if (IS_Bioseq (sep)) { + bsp = (BioseqPtr) sep->data.ptrvalue; + if (WantToSegregateSequence (bsp, sfp)) { + if (set2last == NULL) { + set2->seq_set = sep; + } else { + set2last->next = sep; + } + set2last = sep; + } else { + if (set1last == NULL) { + set1->seq_set = sep; + } else { + set1last->next = sep; + } + set1last = sep; + } + sep->next = NULL; + } + sep = next_sep; + } +} + + +/*=========================================================================*/ +/* */ +/* SegregateByFeatureOrDescriptor_Callback () - Segregates sequences that */ +/* contain a selected feature. */ +/* */ +/*=========================================================================*/ + +static void SegregateByFeatureOrDescriptor_Callback (ButtoN b) +{ + SegregateFeatPtr sfp; + SeqEntryPtr sep; + SeqEntryPtr tmp1, tmp2; + UIEnum val; + BioseqSetPtr bssp; + BioseqSetPtr parent_set; + SeqEntryPtr seqlist; + BioseqSetPtr newset1, newset2; + ObjMgrDataPtr omdptop; + ObjMgrData omdata; + Uint2 parenttype; + Pointer parentptr; + SeqEntryPtr last_sep; + ValNodePtr vnp; + + /* Check the initial conditions and get the sequence */ + sfp = (SegregateFeatPtr) GetObjectExtra (b); + if (sfp == NULL || sfp->input_entityID == 0 || sfp->target_set == NULL) { + Remove (sfp->form); + return; + } + + sep = GetTopSeqEntryForEntityID (sfp->input_entityID); + if (sep == NULL) { + Remove (sfp->form); + return; + } + + SaveSeqEntryObjMgrData (sep, &omdptop, &omdata); + GetSeqEntryParent (sep, &parentptr, &parenttype); + + bssp = sfp->target_set; + + parent_set = (BioseqSetPtr)(bssp->idx.parentptr); + seqlist = bssp->seq_set; + bssp->seq_set = NULL; + + if (parent_set == NULL || parent_set->seq_set == NULL) { + newset1 = BioseqSetNew (); + if (newset1 == NULL) return; + newset2 = BioseqSetNew (); + if (newset2 == NULL) return; + newset1->_class = bssp->_class; + newset2->_class = bssp->_class; + tmp1 = SeqEntryNew (); + if (tmp1 == NULL) return; + tmp1->choice = 2; + tmp1->data.ptrvalue = (Pointer) newset1; + tmp2 = SeqEntryNew (); + if (tmp2 == NULL) return; + tmp2->choice = 2; + tmp2->data.ptrvalue = (Pointer) newset2; + bssp->seq_set = tmp1; + tmp1->next = tmp2; + bssp->_class = BioseqseqSet_class_genbank; + /* Propagate descriptors down */ + ValNodeLink (&(newset1->descr), + AsnIoMemCopy ((Pointer) bssp->descr, + (AsnReadFunc) SeqDescrAsnRead, + (AsnWriteFunc) SeqDescrAsnWrite)); + ValNodeLink (&(newset2->descr), + AsnIoMemCopy ((Pointer) bssp->descr, + (AsnReadFunc) SeqDescrAsnRead, + (AsnWriteFunc) SeqDescrAsnWrite)); + bssp->descr = SeqDescrFree (bssp->descr); + } else { + last_sep = parent_set->seq_set; + newset1 = bssp; + newset2 = BioseqSetNew (); + if (newset2 == NULL) return; + newset2->_class = newset1->_class; + tmp1 = SeqEntryNew (); + if (tmp1 == NULL) return; + tmp1->choice = 2; + tmp1->data.ptrvalue = (Pointer) newset2; + while (last_sep != NULL && last_sep->next != NULL) { + last_sep = last_sep->next; + } + if (last_sep == NULL) return; + last_sep->next = tmp1; + /* copy descriptors horizontally */ + ValNodeLink (&(newset2->descr), + AsnIoMemCopy ((Pointer) bssp->descr, + (AsnReadFunc) SeqDescrAsnRead, + (AsnWriteFunc) SeqDescrAsnWrite)); + } + + /* Get the feature to look for */ + val = GetValue (sfp->type_popup); + for (vnp = sfp->type_list; vnp != NULL && val > 1; vnp = vnp->next, val--) + { + } + if (vnp == NULL || val != 1) + { + Remove (sfp->form); + return; + } + sfp->segregate_type = vnp->choice; + + /* Display the 'working' cursor */ + + WatchCursor (); + Update (); + + /* Do the search and move sequences */ + SegregateItemsByFeatureOrDescriptor (seqlist, sfp, newset1, newset2); + + /* Remove the window and update things */ + SeqMgrLinkSeqEntry (sep, parenttype, parentptr); + RestoreSeqEntryObjMgrData (sep, omdptop, &omdata); + ObjMgrSetDirtyFlag (sfp->input_entityID, TRUE); + ObjMgrSendMsg (OM_MSG_UPDATE, sfp->input_entityID, 0, 0); + + ArrowCursor (); + Update (); + Remove (sfp->form); + + /* Return successfully */ + return; +} + + +/*=========================================================================*/ +/* */ +/* CreateSegregateByFeatureWindow () - Creates and then displays the window*/ +/* for getting segregate by text info from the user.*/ +/* */ +/*=========================================================================*/ + +static Int2 LIBCALLBACK CreateSegregateByFeatureOrDescriptorWindow (Pointer data, Boolean is_feat) +{ + GrouP c; + SegregateFeatPtr sfp; + GrouP g; + GrouP h; + OMProcControlPtr ompcp; + StdEditorProcsPtr sepp; + WindoW w; + ValNodePtr vnp; + + /* Check parameters and get a pointer to the current data */ + + ompcp = (OMProcControlPtr) data; + if (ompcp == NULL) + return OM_MSG_RET_ERROR; + + if (ompcp->input_itemtype != OBJ_BIOSEQSET || ompcp->input_data == NULL) { + Message (MSG_ERROR, "Must select Bioseq Set!"); + return OM_MSG_RET_ERROR; + } + + /* Create a new window, and a struct */ + /* to pass around the data in. */ + + sfp = (SegregateFeatPtr) MemNew (sizeof (SegregateFeatData)); + if (sfp == NULL) + return OM_MSG_RET_ERROR; + sfp->is_feat = is_feat; + + if (sfp->is_feat) + { + w = FixedWindow (-50, -33, -10, -10, "Segregate By Feature", + StdCloseWindowProc); + } + else + { + w = FixedWindow (-50, -33, -10, -10, "Segregate By Descriptor", + StdCloseWindowProc); + } + + SetObjectExtra (w, sfp, CleanupSegregateFeatPage); + sfp->form = (ForM) w; + + sepp = (StdEditorProcsPtr) GetAppProperty ("StdEditorForm"); + if (sepp != NULL) { + SetActivate (w, sepp->activateForm); + sfp->appmessage = sepp->handleMessages; + } + + sfp->input_entityID = ompcp->input_entityID; + sfp->input_itemID = ompcp->input_itemID; + sfp->input_itemtype = ompcp->input_itemtype; + sfp->target_set = (BioseqSetPtr)ompcp->input_data; + + sepp = (StdEditorProcsPtr) GetAppProperty ("StdEditorForm"); + if (sepp != NULL) { + SetActivate (w, sepp->activateForm); + sfp->appmessage = sepp->handleMessages; + } + + /* Add the popup lists */ + + h = HiddenGroup (w, -1, 0, NULL); + SetGroupSpacing (h, 10, 10); + + g = HiddenGroup (h, 3, 0, NULL); + + if (sfp->is_feat) + { + StaticPrompt (g, "Segregate sequences with the feature", 0, dialogTextHeight, + programFont, 'l'); + sfp->type_list = BuildFeatureValNodeList (TRUE, NULL, 0, TRUE, FALSE); + } + else + { + StaticPrompt (g, "Segregate sequences with the descriptor", 0, dialogTextHeight, + programFont, 'l'); + sfp->type_list = BuildDescriptorValNodeList (); + } + + sfp->type_popup = PopupList (g, TRUE, NULL); + SetObjectExtra (sfp->type_popup, sfp, NULL); + for (vnp = sfp->type_list; vnp != NULL; vnp = vnp->next) + { + PopupItem (sfp->type_popup, (CharPtr) vnp->data.ptrvalue); + } + SetValue (sfp->type_popup, 1); + + /* Add Accept and Cancel buttons */ + + c = HiddenGroup (h, 4, 0, NULL); + sfp->accept = DefaultButton (c, "Accept", SegregateByFeatureOrDescriptor_Callback); + SetObjectExtra (sfp->accept, sfp, NULL); + PushButton (c, "Cancel", StdCancelButtonProc); + + /* Line things up nicely */ + + AlignObjects (ALIGN_LEFT, (HANDLE) g, (HANDLE) c, (HANDLE) h, NULL); + + /* Display the window now */ + + RealizeWindow (w); + Show (w); + Select (w); + Select (sfp->accept); + Update (); + return OM_MSG_RET_OK; +} + +extern Int2 LIBCALLBACK CreateSegregateByFeatureWindow (Pointer data) +{ + return CreateSegregateByFeatureOrDescriptorWindow (data, TRUE); +} + +extern Int2 LIBCALLBACK CreateSegregateByDescriptorWindow (Pointer data) +{ + return CreateSegregateByFeatureOrDescriptorWindow (data, FALSE); +} + static CharPtr SaveOrReplaceStringCopy (ConvertFormPtr cfp, CharPtr str, CharPtr current) { @@ -2919,7 +3663,7 @@ static void ConvertFromFlatFile (Uint2 entityID, SeqEntryPtr sep, ConvertFormPtr bssp = (BioseqSetPtr) sep->data.ptrvalue; } else return; - ajp = asn2gnbk_setup (bsp, bssp, NULL, format, SEQUIN_MODE, NORMAL_STYLE, 0, 0, 0, NULL); + ajp = asn2gnbk_setup (bsp, bssp, NULL, (FmtType)format, SEQUIN_MODE, NORMAL_STYLE, 0, 0, 0, NULL); if (ajp != NULL) { goOn = TRUE; for (index = 0; index < ajp->numParagraphs && goOn; index++) { @@ -3618,7 +4362,7 @@ static void BuildParseToAnywhereDialog (IteM i, Int4 parsetype) SetObjectExtra (w, cfp, CleanupParseForm); cfp->form = (ForM) w; cfp->formmessage = ConvertMessageProc; - cfp->target_alist = segregate_target_field_alist; + cfp->target_alist = parse_target_field_alist; cfp->set_accept_proc = (PupActnProc) SetSegregateAcceptButton; sepp = (StdEditorProcsPtr) GetAppProperty ("StdEditorForm"); @@ -3688,7 +4432,7 @@ static void BuildParseToAnywhereDialog (IteM i, Int4 parsetype) x = HiddenGroup (p, 0, 0, NULL); - for (j = 1; j <= NUMBER_OF_SEGREGATE_TYPES; j++) { + for (j = 1; j <= NUMBER_OF_PARSE_TYPES; j++) { if (j == ORGMOD_SUBSOURCE_TYPE) { cfp->subtarget [j] = (PopuP) SingleList (x, 10, 8, (LstActnProc) cfp->set_accept_proc); SetObjectExtra (cfp->subtarget [j], cfp, NULL); diff --git a/sequin/sequin7.c b/sequin/sequin7.c index 7b8c7e92..63218505 100644 --- a/sequin/sequin7.c +++ b/sequin/sequin7.c @@ -29,7 +29,7 @@ * * Version Creation Date: 1/3/98 * -* $Revision: 6.144 $ +* $Revision: 6.148 $ * * File Description: * @@ -7195,9 +7195,11 @@ static Boolean CDSMeetsStringConstraint (SeqFeatPtr sfp, extern Boolean MeetsStringConstraint (SeqFeatPtr sfp, CharPtr findThisStr) { - GBQualPtr gbqp; - GeneRefPtr grp; - RnaRefPtr rrp; + GBQualPtr gbqp; + GeneRefPtr grp; + RnaRefPtr rrp; + SeqMgrFeatContext context; + Boolean have_context = FALSE; /* If no string constraint, then everyone matches */ @@ -7226,6 +7228,15 @@ extern Boolean MeetsStringConstraint (SeqFeatPtr sfp, gbqp = gbqp->next; } + if (SeqMgrGetDesiredFeature (sfp->idx.entityID, NULL, 0, 0, sfp, &context) != NULL) + { + if (StringISearch (context.label, findThisStr)) + { + return TRUE; + } + have_context = TRUE; + } + if (sfp->data.choice == SEQFEAT_GENE) { grp = sfp->data.value.ptrvalue; @@ -7250,6 +7261,15 @@ extern Boolean MeetsStringConstraint (SeqFeatPtr sfp, if (StringISearch ((CharPtr) rrp->ext.value.ptrvalue, findThisStr)) return TRUE; } + else if (rrp->type == 3 && rrp->ext.choice == 2 && have_context) + { + /* look for the label as it appears to the user */ + if (StringNCmp(findThisStr, "tRNA-", 5) == 0 + && StringISearch (context.label, findThisStr + 5)) + { + return TRUE; + } + } } /* If we got to here, then the string constraint was not found */ @@ -8123,7 +8143,7 @@ static void MarkProteinCallback (SeqEntryPtr sep, Pointer mydata, Int4 index, In } } -extern void RemoveProteins (IteM i) +extern void RemoveProteinsAndOptionallyRenormalize (IteM i, Boolean renormalize) { BaseFormPtr bfp; @@ -8167,12 +8187,26 @@ extern void RemoveProteins (IteM i) ValNodeFree (vnp); SeqMgrLinkSeqEntry (sep, parenttype, parentptr); RestoreSeqEntryObjMgrData (sep, omdptop, &omdata); + if (renormalize) + { + RenormalizeNucProtSets (sep, TRUE); + } ObjMgrSetDirtyFlag (bfp->input_entityID, TRUE); ObjMgrSendMsg (OM_MSG_UPDATE, bfp->input_entityID, 0, 0); ObjMgrDeSelect (0, 0, 0, 0, NULL); Update (); } +extern void RemoveProteins (IteM i) +{ + RemoveProteinsAndOptionallyRenormalize (i, FALSE); +} + +extern void RemoveProteinsAndRenormalize (IteM i) +{ + RemoveProteinsAndOptionallyRenormalize (i, TRUE); +} + #define EDIT_FIVE_PRIME 1 #define EDIT_THREE_PRIME 2 @@ -9009,7 +9043,7 @@ WriteAlignmentInterleaveToFile MemSet (printed_line, ' ', printed_line_len - 2); label_pos = alnlabels + (row - 1) * (label_len + 1) * sizeof (Char); MemCpy (printed_line, label_pos, StringLen (label_pos)); - AlignmentIntervalToString (salp, row, start, stop, 1, FALSE, + AlignmentIntervalToString (salp, row, start, stop, 1, TRUE, seqbuf, alnbuf, &alnbuf_len); MemCpy (printed_line + label_len + 1, alnbuf, alnbuf_len); fprintf (fp, printed_line); @@ -9028,8 +9062,95 @@ WriteAlignmentInterleaveToFile } } +static void WriteAlignmentContiguousToFile +(SeqAlignPtr salp, + FILE *fp) +{ + Int4 num_segments; + SeqAlignPtr tmp_salp; + Int4 idx; + CharPtr PNTR alnlabels = NULL; + Int4Ptr label_len = NULL; + Int4Ptr aln_len = NULL; + Uint1Ptr alnbuf = NULL; + Uint1Ptr seqbuf = NULL; + CharPtr printed_line = NULL; + Int4 alnbuf_len; + Int4 printed_line_len; + CharPtr label_pos; + Int4 row, start, stop; + Int4 seq_chars_per_row = 80; + + if (salp == NULL || fp == NULL) return; + + num_segments = 0; + for (tmp_salp = salp; tmp_salp != NULL; tmp_salp = tmp_salp->next) + { + num_segments++; + } + + + /* get labels and lengths for all segments */ + alnlabels = (CharPtr PNTR) MemNew (sizeof (CharPtr) * num_segments); + label_len = (Int4Ptr) MemNew (sizeof (Int4) * num_segments); + aln_len = (Int4Ptr) MemNew (sizeof (Int4) * num_segments); + if (alnlabels != NULL && label_len != NULL && aln_len != NULL) + { + for (tmp_salp = salp, idx = 0; tmp_salp != NULL, idx < num_segments; tmp_salp = tmp_salp->next, idx++) + { + alnlabels [idx] = GetSeqAlignLabels (tmp_salp, &label_len[idx]); + aln_len [idx]= AlnMgr2GetAlnLength(tmp_salp, FALSE); + + } + + /* get buffers */ + alnbuf = (Uint1Ptr) MemNew (seq_chars_per_row * sizeof (Uint1)); + seqbuf = (Uint1Ptr) MemNew (seq_chars_per_row * sizeof (Uint1)); + printed_line_len = seq_chars_per_row + 3; + printed_line = (CharPtr) MemNew (printed_line_len * sizeof (Char)); + if (alnbuf != NULL && seqbuf != NULL && printed_line != NULL) { + printed_line [ printed_line_len - 1] = 0; + printed_line [ printed_line_len - 2] = '\n'; + + for (row = 1; row <= salp->dim; row++) { + if (salp->next != NULL) + { + fprintf (fp, "[\n"); + } + for (tmp_salp = salp, idx = 0; tmp_salp != NULL, idx < num_segments; tmp_salp = tmp_salp->next, idx++) + { + label_pos = alnlabels [idx] + (row - 1) * (label_len[idx] + 1) * sizeof (Char); + fprintf (fp, ">%s\n", label_pos); + start = 0; + stop = seq_chars_per_row - 1; + while (start < aln_len [idx]) { + MemSet (printed_line, ' ', printed_line_len - 2); + AlignmentIntervalToString (tmp_salp, row, start, stop, 1, TRUE, + seqbuf, alnbuf, &alnbuf_len); + MemCpy (printed_line, alnbuf, alnbuf_len); + fprintf (fp, printed_line); + start = stop + 1; + stop += seq_chars_per_row; + } + fprintf (fp, "\n"); + } + if (salp->next != NULL) + { + fprintf (fp, "]\n"); + } + } + } + MemFree (alnbuf); + MemFree (seqbuf); + MemFree (printed_line); + } + MemFree (label_len); + MemFree (alnlabels); + MemFree (aln_len); +} + static void -WriteAlignmentContiguousToFile +OldWriteAlignmentContiguousToFile (SeqAlignPtr salp, FILE *fp) { @@ -9085,26 +9206,83 @@ WriteAlignmentContiguousToFile } } +static SetAlignmentDim (SeqAlignPtr salp) +{ + AMAlignIndex2Ptr amaip; + DenseSegPtr dsp; + + if (salp == NULL || salp->dim > 0 || salp->saip == NULL) return; + + if (salp->saip->indextype == INDEX_PARENT) + { + amaip = (AMAlignIndex2Ptr)(salp->saip); + salp->dim = amaip->sharedaln->dim; + } + else if (salp->saip->indextype == INDEX_CHILD) + { + dsp = (DenseSegPtr)(salp->segs); + salp->dim = dsp->dim; + } +} + +static void IndexAlignmentSet (SeqAlignPtr salp) +{ + SeqAlignPtr tmp_salp, next_salp; + + if (salp == NULL || salp->saip != NULL) return; + + if (salp->next != NULL && salp->dim > 2) + { + for (tmp_salp = salp; tmp_salp != NULL; tmp_salp = tmp_salp->next) + { + next_salp = tmp_salp->next; + tmp_salp->next = NULL; + if (tmp_salp->segtype == SAS_DENSEG && tmp_salp->next == NULL) { + AlnMgr2IndexSingleChildSeqAlign(tmp_salp); + } else { + AlnMgr2IndexSeqAlign(tmp_salp); + } + SetAlignmentDim (tmp_salp); + tmp_salp->next = next_salp; + } + } + else + { + if (salp->segtype == SAS_DENSEG && salp->next == NULL) { + AlnMgr2IndexSingleChildSeqAlign(salp); + } else { + AlnMgr2IndexSeqAlign(salp); + } + SetAlignmentDim (salp); + } +} + static void WriteSeqEntryAlignmentToFile (SeqEntryPtr sep, FILE *fp, Boolean Interleave) { BioseqSetPtr bssp; SeqAnnotPtr sap; - SeqAlignPtr salp; + SeqAlignPtr salp = NULL; if (sep == NULL || ! IS_Bioseq_set (sep)) return; bssp = (BioseqSetPtr) sep->data.ptrvalue; if (bssp == NULL) return; for (sap = bssp->annot; sap != NULL; sap = sap->next) { if (sap->type == 2) { - salp = (SeqAlignPtr) sap->data; - if (salp->saip == NULL) { - AlnMgr2IndexSingleChildSeqAlign (salp); - } + salp = SeqAlignListDup((SeqAlignPtr) sap->data); + IndexAlignmentSet (salp); + if (Interleave) { + if (salp->next != NULL) + { + Message (MSG_ERROR, "Unable to write segmented alignments as interleave"); + return; + } WriteAlignmentInterleaveToFile (salp, fp); } else { WriteAlignmentContiguousToFile (salp, fp); } + SeqAlignFree (salp); + salp = NULL; } } diff --git a/sequin/sequin8.c b/sequin/sequin8.c index 4030fd05..fd9ede6d 100644 --- a/sequin/sequin8.c +++ b/sequin/sequin8.c @@ -29,7 +29,7 @@ * * Version Creation Date: 2/3/98 * -* $Revision: 6.255 $ +* $Revision: 6.256 $ * * File Description: * @@ -3343,6 +3343,27 @@ static int LIBCALLBACK SortMostUsedDescriptorsFirst (VoidPtr ptr1, VoidPtr ptr2) } } +extern ValNodePtr BuildDescriptorValNodeList (void) +{ + Int4 j; + ValNodePtr vnp; + ValNodePtr head = NULL; + + for (j = 1; descNames [j] != NULL; j++) { + if (StringHasNoText (descNames [j])) continue; + vnp = ValNodeNew (head); + if (head == NULL) { + head = vnp; + } + if (vnp != NULL) { + vnp->choice = j; + vnp->data.ptrvalue = StringSave (descNames [j]); + } + } + head = SortValNode (head, SortMostUsedDescriptorsFirst); + return head; +} + static void RemoveAsnObject (IteM i, Boolean feature) { @@ -3413,18 +3434,7 @@ static void RemoveAsnObject (IteM i, Boolean feature) if (feature) { head = BuildFeatureValNodeList (TRUE, "All", ALL_FEATURES, TRUE, FALSE); } else { - for (j = 1; descNames [j] != NULL; j++) { - if (StringHasNoText (descNames [j])) continue; - vnp = ValNodeNew (head); - if (head == NULL) { - head = vnp; - } - if (vnp != NULL) { - vnp->choice = j; - vnp->data.ptrvalue = StringSave (descNames [j]); - } - } - head = SortValNode (head, SortMostUsedDescriptorsFirst); + head = BuildDescriptorValNodeList(); } if (head != NULL) { diff --git a/sequin/sequin9.c b/sequin/sequin9.c index 0f8a4845..802fb0c2 100644 --- a/sequin/sequin9.c +++ b/sequin/sequin9.c @@ -29,7 +29,7 @@ * * Version Creation Date: 4/20/99 * -* $Revision: 6.254 $ +* $Revision: 6.260 $ * * File Description: * @@ -4445,7 +4445,8 @@ static TransTablePtr GetTranslationTable (CdRegionPtr crp, Boolean PNTR table_is static CharPtr ExtendProtein5 (SeqFeatPtr sfp, - Uint2 input_entityID) + Uint2 input_entityID, + Boolean force_partial) { CdRegionPtr crp; TransTablePtr tbl = NULL; @@ -4486,8 +4487,14 @@ static CharPtr ExtendProtein5 strand = SeqLocStrand (sfp->location); sip = SeqLocId (sfp->location); offset = -1; + + start = GetOffsetInBioseq (test_slp, nucBsp, SEQLOC_START); + if (start == 0) + { + stop_looking = TRUE; + } - while (! found_start && ! found_stop && ! stop_looking) { + while (((! found_start && ! found_stop) || force_partial) && ! stop_looking) { start = GetOffsetInBioseq (test_slp, nucBsp, SEQLOC_START); stop = GetOffsetInBioseq (test_slp, nucBsp, SEQLOC_STOP); if (strand == Seq_strand_minus) { @@ -4537,7 +4544,7 @@ static CharPtr ExtendProtein5 MemFree (bases); } } - + SeqLocFree (test_slp); if (! found_stop) { start = GetOffsetInBioseq (sfp->location, nucBsp, SEQLOC_START); @@ -4552,12 +4559,16 @@ static CharPtr ExtendProtein5 CheckSeqLocForPartial (sfp->location, &partial5, &partial3); SetSeqLocPartial (sfp->location, TRUE, partial3); sfp->partial = TRUE; + if (crp->frame == 0) + { + crp->frame = 1; + } if (strand == Seq_strand_minus) { sfp->location = ExpandSeqLoc (stop, nucBsp->length - 1, strand, nucBsp, sfp->location); - crp->frame = (nucBsp->length - 1 - start) % 3 + 1; + crp->frame = (nucBsp->length - 1 - start + crp->frame - 1) % 3 + 1; } else { sfp->location = ExpandSeqLoc (0, stop, strand, nucBsp, sfp->location); - crp->frame = start % 3 + 1; + crp->frame = (start + crp->frame - 1) % 3 + 1; } } } @@ -4573,7 +4584,8 @@ static CharPtr ExtendProtein5 static CharPtr ExtendProtein3 (SeqFeatPtr sfp, - Uint2 input_entityID) + Uint2 input_entityID, + Boolean force_partial) { BioseqPtr nucBsp; Int4 max_stop, min_start, start, stop; @@ -4609,7 +4621,14 @@ static CharPtr ExtendProtein3 contains_stop = FALSE; contains_start = FALSE; newprot = NULL; - while (! contains_stop && + /* need to initialize newprot in case we're already at the edge */ + if ((strand != Seq_strand_minus && stop == max_stop) + || (strand == Seq_strand_minus && stop == min_start)) + { + newprot = FixProteinString (sfp, strand, FALSE, &truncated, + &contains_start, &contains_stop); + } + while ((! contains_stop || force_partial) && ( (strand == Seq_strand_minus && stop > min_start) || (strand != Seq_strand_minus && stop < max_stop))) { @@ -4634,7 +4653,7 @@ static CharPtr ExtendProtein3 &contains_start, &contains_stop); } - if (! contains_stop) { + if (! contains_stop || force_partial) { start = GetOffsetInBioseq (sfp->location, nucBsp, SEQLOC_START); stop = GetOffsetInBioseq (sfp->location, nucBsp, SEQLOC_STOP); if (strand == Seq_strand_minus) { @@ -4673,6 +4692,8 @@ PrepareUpdatePtrForProtein Uint1 strand; SeqLocPtr newloc; BioseqPtr nucBsp; + Boolean partial5, partial3; + if (sfp == NULL || sfp->idx.subtype != FEATDEF_CDS @@ -4683,7 +4704,9 @@ PrepareUpdatePtrForProtein { return NULL; } - + + CheckSeqLocForPartial (sfp->location, &partial3, &partial5); + nucBsp = GetBioseqGivenSeqLoc (sfp->location, input_entityID); if (nucBsp == NULL) return NULL; newloc = SeqLocMerge (nucBsp, sfp->location, NULL, FALSE, FALSE, FALSE); @@ -4701,17 +4724,18 @@ PrepareUpdatePtrForProtein &contains_start, &contains_stop); /* Must do 3' end first, otherwise may truncate at stops introduced by expanding 5' end for partiality */ - if (! contains_stop && extend_proteins3 && transl_except_len == 0) { + if ((! contains_stop && extend_proteins3 && transl_except_len == 0) + || ((extend_proteins3 || partial3) && !truncate_proteins)) { MemFree (newprot); - newprot = ExtendProtein3 (sfp, input_entityID); + newprot = ExtendProtein3 (sfp, input_entityID, partial3 && !truncate_proteins); if (newprot == NULL) return NULL; *extended3 = TRUE; } else { *extended3 = FALSE; } - if (! contains_start && extend_proteins5) { + if (! contains_start && (extend_proteins5 || partial5)) { MemFree (newprot); - newprot = ExtendProtein5 (sfp, input_entityID); + newprot = ExtendProtein5 (sfp, input_entityID, partial5); if (newprot == NULL) return NULL; *extended5 = TRUE; } else { @@ -6818,7 +6842,7 @@ static void DetermineButtonState (UpsDataPtr udp, /* Replace */ - else if (udp->new5 >= udp->old5 && udp->new3 >= udp->old3) { + else { SetValue (udp->rmc, 1); Disable (*extend5ButtonPtr); Disable (*extend3ButtonPtr); @@ -6827,7 +6851,7 @@ static void DetermineButtonState (UpsDataPtr udp, } /* Patch */ - +/* This section removed - do not set patch as a default else if (udp->new5 <= udp->old5 && udp->new3 <= udp->old3) { SetValue (udp->rmc, 4); Disable (*extend5ButtonPtr); @@ -6836,14 +6860,13 @@ static void DetermineButtonState (UpsDataPtr udp, udp->recomb2 = udp->aln_length; /* If patch sequence matches, must be feature propagation only */ - - if (StringNICmp (udp->seq1 + udp->old5 - udp->new5, +/* if (StringNICmp (udp->seq1 + udp->old5 - udp->new5, udp->seq2, StringLen (udp->seq2)) == 0) { SetValue (udp->sfb, 2); Disable (udp->sfb); } - } + } */ /* If no features, must be sequence update only */ @@ -7561,7 +7584,7 @@ static Int2 UpdateNextBioseqInFastaSet (UpsDataPtr udp) /* */ /*=====================================================================*/ -extern void UpdateFastaSetEx (IteM i, Boolean use_new_blast) +static void UpdateFastaSetEx (IteM i, Boolean use_new_blast) { BaseFormPtr bfp; FILE *fp; @@ -7788,13 +7811,13 @@ extern void NewExtendSequence (IteM i) NewUpdateOrExtendSequence (i, FALSE, FALSE); } -extern void NewExtendSequenceNewBlast (IteM i) +static void NewExtendSequenceNewBlast (IteM i) { NewUpdateOrExtendSequence (i, FALSE, TRUE); } -extern void UpdateSeqAfterDownloadEx +static void UpdateSeqAfterDownloadEx (BaseFormPtr bfp, BioseqPtr oldbsp, BioseqPtr newbsp, @@ -7870,7 +7893,7 @@ extern void UpdateSeqAfterDownload UpdateSeqAfterDownloadEx (bfp, oldbsp, newbsp, FALSE); } -extern void ExtendSeqAfterDownloadEx +static void ExtendSeqAfterDownloadEx (BaseFormPtr bfp, BioseqPtr oldbsp, BioseqPtr newbsp, @@ -9018,6 +9041,7 @@ static ForM FeaturePropagateForm ( fdp->transPast = CheckBox (g, "Translate CDS after partial 3' boundary", NULL); fdp->fixCDS = CheckBox (g, "Cleanup CDS partials after propagation", NULL); + SetStatus (fdp->fixCDS, TRUE); fdp->fuseJoints = CheckBox (g, "Fuse adjacent propagated intervals", NULL); SetStatus (fdp->fuseJoints, TRUE); @@ -10106,11 +10130,6 @@ NLM_EXTERN SeqAlignPtr Sqn_LocalAlign2SeqEx (BioseqPtr bsp1, BioseqPtr bsp2, Boo return sap_final; } -NLM_EXTERN SeqAlignPtr Sqn_LocalAlign2Seq (BioseqPtr bsp1, BioseqPtr bsp2, BoolPtr revcomp) -{ - return Sqn_LocalAlign2SeqEx (bsp1, bsp2, revcomp, FALSE); -} - /* End of implementation of the new BLAST library . */ |