diff options
author | Aaron M. Ucko <ucko@debian.org> | 2005-03-23 15:50:17 +0000 |
---|---|---|
committer | Aaron M. Ucko <ucko@debian.org> | 2005-03-23 15:50:17 +0000 |
commit | 5d9c18cdc6c0e622f123be548f6f7b8ba827d3ac (patch) | |
tree | 2b219ac945a1f81c6a6ffc09fa0db76191716644 /api/seqmgr.c |
[svn-inject] Installing original source of ncbi-tools6 (6.0.2)
Diffstat (limited to 'api/seqmgr.c')
-rw-r--r-- | api/seqmgr.c | 6228 |
1 files changed, 6228 insertions, 0 deletions
diff --git a/api/seqmgr.c b/api/seqmgr.c new file mode 100644 index 00000000..66b1532f --- /dev/null +++ b/api/seqmgr.c @@ -0,0 +1,6228 @@ +/* seqmgr.c +* =========================================================================== +* +* PUBLIC DOMAIN NOTICE +* National Center for Biotechnology Information +* +* This software/database is a "United States Government Work" under the +* terms of the United States Copyright Act. It was written as part of +* the author's official duties as a United States Government employee and +* thus cannot be copyrighted. This software/database is freely available +* to the public for use. The National Library of Medicine and the U.S. +* Government have not placed any restriction on its use or reproduction. +* +* Although all reasonable efforts have been taken to ensure the accuracy +* and reliability of the software and data, the NLM and the U.S. +* Government do not and cannot warrant the performance or results that +* may be obtained by using this software or data. The NLM and the U.S. +* Government disclaim all warranties, express or implied, including +* warranties of performance, merchantability or fitness for any particular +* purpose. +* +* Please cite the author in any work or product based on this material. +* +* =========================================================================== +* +* File Name: seqmgr.c +* +* Author: James Ostell +* +* Version Creation Date: 9/94 +* +* $Revision: 6.69 $ +* +* File Description: Manager for Bioseqs and BioseqSets +* +* Modifications: +* -------------------------------------------------------------------------- +* Date Name Description of modification +* ------- ---------- ----------------------------------------------------- +* +* $Log: seqmgr.c,v $ +* Revision 6.69 1999/01/14 21:50:04 kans +* added packaging level to descriptor context structure +* +* Revision 6.68 1998/12/09 22:08:49 kans +* fixed a typo that prevented cleanup of index on top bioseq set +* +* Revision 6.67 1998/12/07 16:55:08 kans +* fixed memory leak of extra block on top bioseqset when automatically cleaned up from objmgr +* +* Revision 6.66 1998/11/28 20:08:23 kans +* was not initializing or incrementing nummRNAs and numCDSs variables +* +* Revision 6.65 1998/11/24 22:21:24 kans +* index mRNA and CDS by position, allow arbitrary sorted feature array index +* +* Revision 6.64 1998/10/29 01:07:10 kans +* indexing makes new sep if not on top bsp or bssp +* +* Revision 6.63 1998/10/29 00:50:39 kans +* calls BasicSeqEntryCleanup at beginning of indexing +* +* Revision 6.62 1998/10/27 14:53:30 kans +* get overlapping gene/pub/source takes SeqLocPtr - pass in sfp->location +* +* Revision 6.61 1998/10/22 23:41:49 kans +* feat context has bsp, partial flags, far location flag, GetDesired functions can work on entity entity if using itemID +* +* Revision 6.60 1998/10/22 16:05:55 kans +* removed labeltype parameter from SeqMgrIndexFeatures, changed index parameter/field to Uint2 +* +* Revision 6.59 1998/10/20 17:45:58 kans +* distinctive messages for whether far accession was handled or not +* +* Revision 6.58 1998/10/16 21:42:02 kans +* fixed comment on last sort criteria to parent seq-annot +* +* Revision 6.57 1998/10/16 21:36:37 kans +* feature sorting continues with internal intervals, subtype, label, and finally seqannot parent +* +* Revision 6.56 1998/10/13 20:59:48 kans +* SeqMgrGetOmdpForBioseq uses new field in Bioseq to avoid binary search lookup +* +* Revision 6.55 1998/10/09 15:45:13 kans +* more informative errors in SeqMgr indexing +* +* Revision 6.54 1998/09/30 14:49:37 kans +* set scope for FindAppropriateBioseq, FindFirstLocalBioseq, over and above gather scope, which does not apply to the above calls even from within a gather callback +* +* Revision 6.53 1998/09/29 20:06:07 kans +* FindFirstLocalBioseq and GetOffsetInFirstLocalBioseq to deal with far segments more gracefully than just not indexing the feature +* +* Revision 6.52 1998/09/29 15:07:17 kans +* corrected logic for seqDescFilter, seqFeatFilter, and featDefFilter in explore functions +* +* Revision 6.51 1998/09/23 16:41:07 kans +* added SeqMgrGetDesiredDescriptor +* +* Revision 6.50 1998/09/22 18:17:01 kans +* descriptor index flag now tracked properly, separate from itemID +* +* Revision 6.49 1998/09/22 18:01:25 kans +* had been skipping bssp for lastDescrItemID +* +* Revision 6.48 1998/09/22 16:55:51 kans +* added SeqMgrGetDesiredFeature and position index field +* +* Revision 6.47 1998/09/22 13:11:59 kans +* locationFilter parameter to explore features function +* +* Revision 6.46 1998/09/01 19:25:25 kans +* context parameter in get best protein, get cds/rna given product +* +* Revision 6.45 1998/08/24 18:27:09 kans +* removed solaris -v -fd warnings +* +* Revision 6.44 1998/08/21 21:32:36 kans +* populate ivals array (start/stop pairs) for indexed features +* +* Revision 6.43 1998/08/21 20:18:59 kans +* added SeqMgrExploreSegments, indexing features on segmented bioseq +* +* Revision 6.42 1998/08/19 16:26:48 kans +* MakeReversedSeqIdString called from original location of code, also finished support for biosource feature indexing +* +* Revision 6.41 1998/08/18 21:43:54 kans +* SeqIdWithinBioseq finds appropriate SeqID in bsp->id chain for use with SeqLocAinB, allowing multiple IDs on a protein bioseq +* +* Revision 6.40 1998/08/16 22:36:24 kans +* fixed direct map up from part to segmented bioseq +* +* Revision 6.39 1998/08/14 15:40:37 kans +* SeqMgrMapPartToSegmentedBioseq neede LIBCALL, speeded up function by adding map up on part if fetched +* +* Revision 6.38 1998/08/13 22:31:45 kans +* SeqMgrMapPartToSegmentedBioseq to speed up GetOffsetInBioseq, start of indexing segments, also index biosource by location for binary search (Wheelan) +* +* Revision 6.37 1998/08/12 22:16:29 kans +* sort seg-parts array by SeqId, handle seqloc_int and seqloc_whole +* +* Revision 6.36 1998/08/12 21:25:04 kans +* forgot to free allocated partslist array +* +* Revision 6.35 1998/08/12 21:10:37 kans +* added parts index to speed segmented bioseq mapping +* +* Revision 6.34 1998/07/23 17:30:41 kans +* get overlapping gene was nulling out sfp, then dereferencing +* +* Revision 6.33 1998/07/23 13:08:56 kans +* SeqMgrGetOverlappingGene and SeqMgrGetOverlappingPub take optional context pointer +* +* Revision 6.32 1998/07/23 01:15:19 kans +* minor fix to not return ignored feature +* +* Revision 6.31 1998/07/23 01:11:33 kans +* added SeqMgrGetOverlappingPub, gene overlap works even when gene spans circular origin +* +* Revision 6.30 1998/07/16 22:30:55 kans +* improved gene overlap function +* +* Revision 6.29 1998/07/16 16:56:38 kans +* added parent BioseqSetPtr field to SeqMgrBioseqContext, check most recent bioseq first when indexing features +* +* Revision 6.28 1998/07/06 16:15:17 kans +* fixed typo in explore bioseqs callback +* +* Revision 6.27 1998/07/06 15:57:29 kans +* SeqMgrExploreBioseqs takes entityID or ptr +* +* Revision 6.26 1998/07/06 15:30:15 kans +* scope on index explore, added SeqMgrExploreBioseqs +* +* Revision 6.25 1998/07/02 22:30:44 kans +* process product before indexing by location, ErrPostItem if cannot find bioseq for location +* +* Revision 6.24 1998/07/02 17:52:33 kans +* CreateBioseqExtraBlock was not being called for protein bioseq to link back to CDS, which was seen first +* +* Revision 6.23 1998/07/01 19:13:16 kans +* SMFeatBlock.data is allocated array of reasonable size +* +* Revision 6.22 1998/06/30 22:08:02 kans +* SeqMgrFeaturesAreIndexed takes entityID, returns time_t time stamp of latest indexing +* +* Revision 6.21 1998/06/30 14:28:00 kans +* changed GetSeqFeat, which collided with asn2ff4 for some linkers +* +* Revision 6.20 1998/06/30 14:20:16 kans +* changes to heap sort order to put genes first, then rnas, if ranges are equal +* +* Revision 6.19 1998/06/30 12:56:45 kans +* code fixes, public functions moved to explore.h +* +* Revision 6.18 1998/06/29 23:37:37 kans +* added context structure for all explores, index every bioseq in an entity +* +* Revision 6.17 1998/06/29 03:06:41 kans +* fixed two conditionals in ProcessFeatureProducts +* +* Revision 6.16 1998/06/29 02:29:44 kans +* new context get descriptor and feature functions now working, get gene not always working +* +* Revision 6.15 1998/06/29 01:33:27 kans +* added SeqMgrGetNextDescriptor and SeqMgrGetNextFeature +* +* Revision 6.14 1998/06/29 00:24:00 kans +* several changes to new indexing functions +* +* Revision 6.13 1998/06/28 03:44:18 kans +* omdp->parentptr is bssp, not omdp, so use ObjMgrFindByData to get higher descriptors +* +* Revision 6.12 1998/06/28 03:15:09 kans +* missing break statement caused features to be ignored +* +* Revision 6.11 1998/06/28 02:38:15 kans +* simplified filters, finished best gene, explore functions +* +* Revision 6.10 1998/06/27 22:23:49 kans +* improvements and further implementation of new indexing, exploration functions +* +* Revision 6.9 1998/06/27 00:03:45 kans +* fix feature heap sort, post increment feature insertion index, look for best prot +* when setting cds back pointer, and merge descriptor count and feature collect callbacks +* +* Revision 6.8 1998/06/26 22:36:24 kans +* initial work on tracking sorted features, and cds and prot links, for rapid collection +* +* Revision 6.7 1998/05/01 16:13:13 kans +* caching of gi with NULL seqID allowed with protection against calling SeqIdDup +* +* Revision 6.6 1998/04/20 22:38:08 kans +* should prevent caching of gi with NULL seqID +* +* Revision 6.5 1998/04/08 16:52:08 kans +* casts to ValNodeLen calls +* +* Revision 6.4 1998/03/30 21:02:25 ostell +* removed check for parenttype != 0 on call to ObjMgrConnect in SeqMgrLinkSeqEntry +* so that disconnects from sets would work as well as connects +* +* Revision 6.3 1997/11/19 22:14:42 ostell +* added support for multithreaded programs +* +* Revision 6.2 1997/09/25 18:20:14 tatiana +* fixing -1 bug for gaps in CountGapsInDeltaSeq +* +* Revision 6.1 1997/09/11 15:55:40 ostell +* Added support for SetColor messages +* +* Revision 6.0 1997/08/25 18:07:06 madden +* Revision changed to 6.0 +* +* Revision 5.20 1997/07/31 16:06:49 kans +* BioseqLockById clears scope if first call to BioseqFindFunc fails, tries again +* +* Revision 5.19 1997/07/30 19:44:46 kans +* bug fix by Serge Bazhin +* +* Revision 5.18 1997/07/28 13:29:41 ostell +* Moved GetUniGeneIDForSeqId() to seqmgr.c +* +* Revision 5.17 1997/07/15 17:37:43 ostell +* fixed problems with duplicate Bioseqs in BioseqFindFunc +* +* Revision 5.16 1997/07/09 21:11:53 ostell +* added support for indexed seqid lookups of bioseqs +* +* Revision 5.15 1997/06/19 18:38:43 vakatov +* [WIN32,MSVC++] Adopted for the "NCBIOBJ.LIB" DLL'ization +* +* Revision 5.14 1997/06/17 17:59:09 kans +* GetSeqIdForGI now uses binary search in cache +* +* Revision 5.13 1997/06/17 16:33:57 kans +* first pass at cache in SeqIdForGi +* +* Revision 5.12 1997/03/26 14:01:37 ostell +* removed OMUserData from new copy of Bioseq when uncaching to stop +* memory leak +* + * Revision 5.11 1997/02/24 21:46:17 ostell + * in BioseqFindFunc, when checking with scope a failure occurs, now it + * checks again without scope. + * + * Revision 5.10 1997/01/23 22:38:21 ostell + * added missing newline at end of file (sigh) + * + * Revision 5.9 1997/01/23 22:37:14 ostell + * minor change to seqmgr.h for new indexing + * + * Revision 5.7 1997/01/08 22:48:50 tatiana + * buf and buflen arguments added to CountGapsInDeltaSeq() + * + * Revision 5.6 1996/08/22 14:50:05 ostell + * initialized static arrays in BioseqFindFunc + * + * Revision 5.5 1996/08/21 13:33:33 ostell + * added cachig to BioseqFindFunc + * + * Revision 5.4 1996/08/06 19:56:03 kans + * for SEQLOC_WHOLE, must call SeqIdFindBest on bsp->id + * + * Revision 5.3 1996/08/05 15:57:26 chappey + * in BioseqReloadFunc, the OMUserDataPtr is passed to the new + * ObjMgrDataPtr, and is not deleted anymore. + * + * Revision 5.2 1996/07/25 02:32:26 ostell + * added CountGapsInDeltaSeq() + * + * Revision 5.1 1996/07/19 22:13:13 ostell + * added SpreadGapsInDeltaSeq() + * + * Revision 5.0 1996/05/28 13:23:23 ostell + * Set to revision 5.0 + * + * Revision 4.7 1996/03/19 19:05:17 kans + * SeqEntrySetScope now returns old scope, not new scope + * + * Revision 4.6 1996/01/23 14:44:38 kans + * added Pointer casts to MemSet + * + * Revision 4.5 1995/12/22 14:43:59 ostell + * added reload code to BioseqLockById + * break out relad from cache code to be used as part of gather locking + * with BioseqReload + * + * Revision 4.4 1995/12/09 23:12:41 kans + * SeqEntryFind now can deal with a Seq-Submit ultimate parent + * + * Revision 4.3 1995/12/04 21:40:05 ostell + * added GetSeqIdForGI() and GetGIForSeqId() + * + * Revision 4.2 1995/10/03 15:50:37 ostell + * added support for selection by region.. now fully implemented + * + * Revision 4.1 1995/09/30 03:38:31 ostell + * Changed ObjMgrMessage functions to pass a structure + * Added support for selecting regions + * Added ability to remove entity when no more views on it + * + * Revision 4.0 1995/07/26 13:49:01 ostell + * force revision to 4.0 + * + * Revision 1.16 1995/05/15 21:46:05 ostell + * added Log line + * +* +* +* +* ========================================================================== +*/ + +/** for ErrPostEx() ****/ + +static char *this_module = "ncbiapi"; +#define THIS_MODULE this_module +static char *this_file = __FILE__; +#define THIS_FILE this_file + +/**********************/ + +#include <explore.h> /* new public functions prototyped here */ +#include <seqmgr.h> /* the interface */ +#include <sequtil.h> /* CLEAN THIS UP LATER? */ +#include <gather.h> +#include <subutil.h> +#include <ncbithr.h> +#include <objfdef.h> +#include <sqnutils.h> + +/***************************************************************************** +* +* Bioseq Management +* +*****************************************************************************/ + +static BioseqPtr LIBCALLBACK BSFetchFunc PROTO((SeqIdPtr sid, Uint1 ld_type)); +static BioseqPtr NEAR BioseqFindFunc PROTO((SeqIdPtr sid, Boolean reload_from_cache)); +static Boolean NEAR SeqMgrGenericSelect PROTO((SeqLocPtr region, Int2 type, + Uint1Ptr rgb)); +static BioseqPtr NEAR BioseqReloadFunc PROTO((SeqIdPtr sid, ObjMgrDataPtr omdp)); + +static Boolean NEAR SeqMgrProcessNonIndexedBioseq PROTO((void)); +static Boolean NEAR SeqMgrAddIndexElement PROTO((SeqMgrPtr smp, BioseqPtr bsp, CharPtr buf)); +static void NEAR RevStringUpper PROTO((CharPtr str)); +static BSFetchTop NEAR SeqMgrGetFetchTop (void); + + +/***************************************************************************** +* +* Return the current SeqMgr +* SeqMgrGet is obsolete +* SeqMgrReadLock, ReadUnlock, WriteLock, WriteUnlock are thread safe +* +*****************************************************************************/ +static TNlmMutex smp_mutex = NULL; +static SeqMgrPtr global_smp = NULL; +static TNlmRWlock smp_RWlock = NULL; +static TNlmRWlock sgi_RWlock = NULL; + +/***************************************************************************** +* +* Return the current SeqMgr +* Initialize if not done already +* This function will become obsolete +* +*****************************************************************************/ +NLM_EXTERN SeqMgrPtr LIBCALL SeqMgrGet (void) +{ + Int4 ret; + SeqMgrPtr smp; + + if (global_smp != NULL) + return global_smp; + + ret = NlmMutexLockEx(&smp_mutex); /* protect this section */ + if (ret) /* error */ + { + ErrPostEx(SEV_FATAL,0,0,"SeqMgrGet failed [%ld]", (long)ret); + return NULL; + } + + if (global_smp == NULL) /* check again after mutex */ + { + /*** have to initialize it **/ + smp = (SeqMgrPtr) MemNew (sizeof(SeqMgr)); + smp->bsfetch = BSFetchFunc; /* BioseqFetch default */ + smp->fetch_on_lock = TRUE; /* fetch when locking */ + smp_RWlock = NlmRWinit(); /* initialize RW lock */ + sgi_RWlock = NlmRWinit(); /* initialize RW lock */ + global_smp = smp; /* do this last for mutex safety */ + } + + NlmMutexUnlock(smp_mutex); + + return global_smp; +} + +/***************************************************************************** +* +* SeqMgrReadLock() +* Initialize if not done already +* A thread can have only one read or write lock at a time +* Many threads can have read locks +* Only one thread can have a write lock +* No other threads may have read locks if a write lock is granted +* If another thread holds a write lock, this call blocks until write +* is unlocked. +* +*****************************************************************************/ +NLM_EXTERN SeqMgrPtr LIBCALL SeqMgrReadLock (void) +{ + SeqMgrPtr smp; + Int4 ret; + + smp = SeqMgrGet(); /* ensure initialization */ + + ret = NlmRWrdlock(smp_RWlock); + if (ret != 0) + { + ErrPostEx(SEV_ERROR,0,0,"SeqMgrReadLock: RWrdlock error [%ld]", + (long)ret); + return NULL; + } + return smp; +} + +/***************************************************************************** +* +* SeqMgrWriteLock +* Initialize if not done already +* A thread can have only one read or write lock at a time +* Many threads can have read locks +* Only one thread can have a write lock +* No other threads may have read locks if a write lock is granted +* If another thread holds a read or write lock, this call blocks until write +* is unlocked. +* +*****************************************************************************/ +NLM_EXTERN SeqMgrPtr LIBCALL SeqMgrWriteLock (void) +{ + SeqMgrPtr smp; + Int4 ret; + + smp = SeqMgrGet(); /* ensure initialization */ + + ret = NlmRWwrlock(smp_RWlock); + if (ret != 0) + { + ErrPostEx(SEV_ERROR,0,0,"SeqMgrWriteLock: RWwrlock error [%ld]", + (long)ret); + return NULL; + } + smp->is_write_locked = TRUE; + return smp; +} + + +/***************************************************************************** +* +* SeqMgrUnlock() +* +*****************************************************************************/ +NLM_EXTERN Boolean LIBCALL SeqMgrUnlock (void) +{ + SeqMgrPtr smp; + Int4 ret; + + smp = SeqMgrGet(); /* ensure initialization */ + + ret = NlmRWunlock(smp_RWlock); + if (ret != 0) + { + ErrPostEx(SEV_ERROR,0,0,"SeqMgrUnlock: RWunlock error [%ld]", + (long)ret); + return FALSE; + } + smp->is_write_locked = FALSE; /* can't be write locked */ + return TRUE; +} + +/**************************************************************************** +* +* RevStringUpper(str) +* Up cases and reverses string +* to get different parts early for SeqId StringCmps +* +*****************************************************************************/ +static void NEAR RevStringUpper (CharPtr str) +{ + CharPtr nd; + Char tmp; + + if (str == NULL) + return; + nd = str; + while (*nd != '\0') + nd++; + nd--; + + while (nd > str) + { + tmp = TO_UPPER(*nd); + *nd = TO_UPPER(*str); + *str = tmp; + nd--; str++; + } + + if (nd == str) + *nd = TO_UPPER(*nd); + return; +} + +static Boolean MakeReversedSeqIdString (SeqIdPtr sid, CharPtr buf, size_t len) + +{ + Uint1 oldchoice; + CharPtr tmp; + TextSeqIdPtr tsip; + + if (sid == NULL || buf == NULL || len < 1) return FALSE; + oldchoice = 0; + switch (sid->choice) { + case SEQID_GI: + sprintf (buf, "%ld", (long)(sid->data.ptrvalue)); + break; + case SEQID_EMBL: + case SEQID_DDBJ: + oldchoice = sid->choice; + sid->choice = SEQID_GENBANK; + case SEQID_GENBANK: + case SEQID_PIR: + case SEQID_OTHER: + case SEQID_SWISSPROT: + case SEQID_PRF: + tsip = (TextSeqIdPtr) (sid->data.ptrvalue); + if (tsip->accession != NULL) { + tmp = tsip->name; + tsip->name = NULL; + SeqIdWrite (sid, buf, PRINTID_FASTA_SHORT, len); + tsip->name = tmp; + } else { + SeqIdWrite (sid, buf, PRINTID_FASTA_SHORT, len); + } + if (oldchoice) + sid->choice = oldchoice; + break; + default: + SeqIdWrite (sid, buf, PRINTID_FASTA_SHORT, len); + break; + } + RevStringUpper (buf); + return TRUE; +} + +/***************************************************************************** +* +* SeqEntrySetScope(sep) +* scopes global seqentry searches to sep +* setting sep=NULL, opens scope to all seqentries in memory +* returns the current scope +* +*****************************************************************************/ +NLM_EXTERN SeqEntryPtr LIBCALL SeqEntrySetScope(SeqEntryPtr sep) +{ + SeqEntryPtr curr = NULL; + SeqMgrPtr smp; + Int2 i, j; + SMScopePtr smsp; + TNlmThread thr; + Boolean found; + + smp = SeqMgrWriteLock(); + if (smp == NULL) goto ret; + thr = NlmThreadSelf(); + found = FALSE; + for (i = 0, smsp = smp->scope; i < smp->num_scope; i++, smsp++) + { + if (NlmThreadCompare(thr, smsp->thr)) + { + curr = smsp->SEscope; + smsp->SEscope = sep; + if (sep == NULL) /* removing one? */ + { + smp->num_scope--; + j = smp->num_scope - i; /* number to move */ + if (j) /* not last one */ + MemCopy(smsp, (smsp+1), (size_t)(j * sizeof(SMScope))); + } + goto ret; /* all done */ + } + } + + /* thread not on list */ + if (sep == NULL) + goto ret; /* nothing to do */ + + i = smp->num_scope; + j = smp->total_scope; + if (j == i) /* need more room */ + { + j += 20; /* new size */ + smsp = smp->scope; + smp->scope = MemNew((size_t)(j * sizeof(SMScope))); + MemCopy(smp->scope, smsp, (size_t)(i * sizeof(SMScope))); + smp->total_scope = j; + MemFree(smsp); + } + + smp->scope[i].thr = thr; + smp->scope[i].SEscope = sep; + smp->num_scope++; + +ret: SeqMgrUnlock(); + return curr; +} + +/***************************************************************************** +* +* SeqEntryGetScope(sep) +* returns the current scope or NULL if none set +* +*****************************************************************************/ +NLM_EXTERN SeqEntryPtr LIBCALL SeqEntryGetScope(void) +{ + SeqMgrPtr smp; + SeqEntryPtr scope = NULL; + Int2 i; + SMScopePtr smsp; + TNlmThread thr; + + smp = SeqMgrReadLock(); + if (smp == NULL) return FALSE; + thr = NlmThreadSelf(); + for (i = 0, smsp = smp->scope; i < smp->num_scope; i++, smsp++) + { + if (NlmThreadCompare(thr, smsp->thr)) + { + scope = smsp->SEscope; + break; + } + } + SeqMgrUnlock(); + return scope; +} + +/***************************************************************************** +* +* BioseqFind(SeqIdPtr) +* Just checks in object loaded memory +* Will also restore a Bioseq that has been cached out +* +*****************************************************************************/ +NLM_EXTERN BioseqPtr LIBCALL BioseqFind (SeqIdPtr sid) +{ + return BioseqFindFunc(sid, TRUE); +} + +/***************************************************************************** +* +* BioseqFindCore(sid) +* Finds a Bioseq in memory based on SeqId when only "core" elements needed +* Will NOT restore a Bioseq that has been cached out by SeqMgr +* This function is for use ONLY by functions that only need the parts +* of the Bioseq left when cached out. This includes the SeqId chain, +* and non-pointer components of the Bioseq. +* +*****************************************************************************/ +NLM_EXTERN BioseqPtr LIBCALL BioseqFindCore (SeqIdPtr sip) +{ + return BioseqFindFunc(sip, FALSE); +} + +/***************************************************************************** +* +* BioseqFindEntity(sid, itemIDptr) +* Finds a Bioseq in memory based on SeqId +* Will NOT restore a Bioseq that has been cached out by SeqMgr +* returns EntityID if found, otherwise 0 +* itemIDptr is set to the value for itemID in ObjMgr functions +* itemtype is OBJ_BIOSEQ of course +* +*****************************************************************************/ +NLM_EXTERN Uint2 LIBCALL BioseqFindEntity (SeqIdPtr sip, Uint2Ptr itemIDptr) +{ + BioseqPtr bsp; + Uint2 entityID = 0; + + *itemIDptr = 0; + bsp = BioseqFindCore(sip); + if (bsp == NULL) return entityID; /* not found */ + entityID = ObjMgrGetEntityIDForPointer((Pointer)bsp); + if (! entityID) + return entityID; + + *itemIDptr = GatherItemIDByData(entityID, OBJ_BIOSEQ, (Pointer)bsp); + return entityID; +} + +/******************************************************************************** +* +* BioseqReload (omdp, lockit) +* reloads the cached SeqEntry at top of omdp +* if (lockit) locks the record +* +*********************************************************************************/ + +NLM_EXTERN ObjMgrDataPtr LIBCALL BioseqReload(ObjMgrDataPtr omdp, Boolean lockit) +{ + BioseqPtr bsp = NULL; + ObjMgrDataPtr retval = NULL; + Int2 j; + ObjMgrPtr omp; + + if (omdp == NULL) return retval; + if (! ((omdp->datatype == OBJ_BIOSEQ) || (omdp->datatype == OBJ_BIOSEQSET))) + return retval; + if (omdp->parentptr != NULL) + { + omp = ObjMgrReadLock(); + omdp = ObjMgrFindTop(omp, omdp); + ObjMgrUnlock(); + if (omdp == NULL) + return retval; + } + + if (omdp->tempload == TL_CACHED) /* only need to reload if cached */ + { + bsp = BioseqReloadFunc (NULL, omdp); + if (bsp == NULL) + return retval; + omp = ObjMgrReadLock(); + j = ObjMgrLookup(omp, (Pointer)bsp); + omdp = ObjMgrFindTop(omp, omp->datalist[j]); + ObjMgrUnlock(); + } + + if (lockit) + { + ObjMgrLock(omdp->datatype, omdp->dataptr, TRUE); + } + + return omdp; +} + +static BSFetchTop NEAR SeqMgrGetFetchTop (void) +{ + SeqMgrPtr smp; + BSFetchTop bsftp=NULL; + + smp = SeqMgrReadLock(); + if (smp == NULL) return bsftp; + bsftp = smp->bsfetch; + SeqMgrUnlock(); + return bsftp; +} + +static BioseqPtr NEAR BioseqReloadFunc (SeqIdPtr sid, ObjMgrDataPtr omdp) +{ + Int2 j; + ObjMgrDataPtr oldomdp; + OMUserDataPtr omudp, next; + ObjMgrProcPtr ompp; + OMProcControl ompc; + BioseqPtr bsp= NULL; + Int2 ret; + ObjMgrPtr omp; + BSFetchTop bsftp=NULL; + + ompp = NULL; + omp = ObjMgrReadLock(); + for (omudp = omdp->userdata; omudp != NULL; omudp = omudp->next) + { + if (omudp->proctype == OMPROC_FETCH) /* caching function */ + { + ompp = ObjMgrProcFind(omp, omudp->procid, NULL, 0); + if (ompp != NULL) + break; + } + } + ObjMgrUnlock(); + + if (ompp == NULL) + return bsp; + if (ompp->outputtype != OBJ_BIOSEQ) + return bsp; + + oldomdp = omdp; + omdp = NULL; + bsftp = SeqMgrGetFetchTop(); + if (bsftp != NULL) + { + if (ompp != NULL) /* fetch proc left a signal */ + { /* rerun fetch */ + MemSet((Pointer)(&ompc), 0, sizeof(OMProcControl)); + ompc.input_data = sid; + ompc.input_entityID = oldomdp->EntityID; + ompc.proc = ompp; + ret = (* (ompp->func))((Pointer)&ompc); + switch (ret) + { + case OM_MSG_RET_ERROR: + ErrShow(); + break; + case OM_MSG_RET_DEL: + break; + case OM_MSG_RET_OK: + break; + case OM_MSG_RET_DONE: + omp = ObjMgrWriteLock(); + ObjMgrSetTempLoad (omp, ompc.output_data); + ObjMgrUnlock(); + bsp = (BioseqPtr)(ompc.output_data); + break; + default: + break; + } + } + + if (bsp == NULL) /* nope, try regular fetch */ + { + bsp = (*(bsftp))(sid, BSFETCH_TEMP); + } + + if (bsp != NULL) + { + omp = ObjMgrReadLock(); + j = ObjMgrLookup(omp, (Pointer)bsp); + omdp = ObjMgrFindTop(omp, omp->datalist[j]); + ObjMgrUnlock(); + omdp->EntityID = oldomdp->EntityID; + oldomdp->EntityID = 0; + + omudp = omdp->userdata; + while (omudp != NULL) + { + next = omudp->next; + if (omudp->freefunc != NULL) + (*(omudp->freefunc))(omudp->userdata.ptrvalue); + MemFree(omudp); + omudp = next; + } + omdp->userdata = oldomdp->userdata; + oldomdp->userdata = NULL; + + if (oldomdp->choice != NULL) + SeqEntryFree(oldomdp->choice); + else + { + switch(oldomdp->datatype) + { + case OBJ_BIOSEQ: + BioseqFree((BioseqPtr)(oldomdp->dataptr)); + break; + case OBJ_BIOSEQSET: + BioseqSetFree((BioseqSetPtr)(oldomdp->dataptr)); + break; + default: + ErrPostEx(SEV_ERROR,0,0,"BioseqFindFunc: delete unknown type [%d]", + (int)(oldomdp->datatype)); + break; + } + } + } + } + return bsp; +} +/** static func used internally **/ + +/******************************************* +* +* WARNING: if you change BIOSEQ_CACHE_NUM, you have to change the +* number of NULL in the initialization of the 2 static pointer arrays +* below +* +*******************************************/ +/* nb: this cache is cleared in SeqMgrDeleteFromBioseqIndex() */ +#define BIOSEQ_CACHE_NUM 3 +static SeqEntryPtr se_cache[BIOSEQ_CACHE_NUM] = { + NULL, NULL, NULL}; /* for a few platforms */ +static ObjMgrDataPtr omdp_cache[BIOSEQ_CACHE_NUM] = { + NULL, NULL, NULL}; /* for a few platforms */ +static TNlmMutex smp_cache_mutex = NULL; + +static BioseqPtr NEAR BioseqFindFunc (SeqIdPtr sid, Boolean reload_from_cache) +{ + Int4 i, j, num, imin, imax, ret; + SeqIdIndexElementPtr PNTR sipp; + CharPtr tmp; + Char buf[80]; + Boolean do_return; + SeqMgrPtr smp; + ObjMgrPtr omp; + ObjMgrDataPtr omdp; + BioseqPtr bsp = NULL, tbsp; + SeqEntryPtr scope; + + if (sid == NULL) + return NULL; + + ret = NlmMutexLockEx(&smp_cache_mutex); /* protect this section */ + if (ret) /* error */ + { + ErrPostEx(SEV_FATAL,0,0,"BioseqFindFunc cache mutex failed [%ld]", (long)ret); + return NULL; + } + + do_return = FALSE; + scope = SeqEntryGetScope(); /* first check the cache */ + for (i = 0; i < BIOSEQ_CACHE_NUM; i++) + { + if (omdp_cache[i] == NULL) + break; + omdp = omdp_cache[i]; + if (omdp->datatype == OBJ_BIOSEQ) + { + if ((scope == NULL) || (scope == se_cache[i])) + { + bsp = (BioseqPtr)(omdp->dataptr); + + if (BioseqMatch(bsp, sid)) + { + for (j = i; j > 0; j--) /* shift to top of cache */ + { + omdp_cache[j] = omdp_cache[j-1]; + se_cache[j] = se_cache[j-1]; + } + omdp_cache[0] = omdp; + se_cache[0] = scope; + + if (! reload_from_cache) + { + do_return = TRUE; + goto done_cache; + } + + omp = ObjMgrReadLock(); + omdp = ObjMgrFindTop(omp, omdp); + ObjMgrUnlock(); + if (omdp->tempload != TL_CACHED) + { + do_return = TRUE; + goto done_cache; + } + + bsp = BioseqReloadFunc(sid, omdp); + + if (bsp == NULL) + { + + ErrPostEx(SEV_ERROR,0,0,"BioseqFindFunc: couldn't uncache"); + } + do_return = TRUE; + goto done_cache; + } + } + } + } +done_cache: + NlmMutexUnlock(smp_cache_mutex); + if (do_return) /* all done */ + { + return bsp; + } + + bsp = NULL; /* resetting it */ + + SeqMgrProcessNonIndexedBioseq(); /* make sure all are indexed */ + + /* stringify as in SeqMgrAdd */ + + MakeReversedSeqIdString (sid, buf, 79); /* common function to make id, call RevStringUpper */ + + /* + oldchoice = 0; + switch (sid->choice) + { + case SEQID_GI: + sprintf(buf, "%ld", (long)(sid->data.ptrvalue)); + break; + case SEQID_EMBL: + case SEQID_DDBJ: + oldchoice = sid->choice; + sid->choice = SEQID_GENBANK; + case SEQID_GENBANK: + case SEQID_PIR: + case SEQID_OTHER: + case SEQID_SWISSPROT: + case SEQID_PRF: + tsip = (TextSeqIdPtr)(sid->data.ptrvalue); + if (tsip->accession != NULL) + { + tmp = tsip->name; + tsip->name = NULL; + SeqIdWrite(sid, buf, PRINTID_FASTA_SHORT, 79); + tsip->name = tmp; + } + else + SeqIdWrite(sid, buf, PRINTID_FASTA_SHORT, 79); + if (oldchoice) + sid->choice = oldchoice; + break; + default: + SeqIdWrite(sid, buf, PRINTID_FASTA_SHORT, 79); + break; + } + + RevStringUpper(buf); + */ + + imin = 0; + smp = SeqMgrReadLock(); + imax = smp->BioseqIndexCnt - 1; + sipp = smp->BioseqIndex; + + num = -1; + + while (imax >= imin) + { + i = (imax + imin)/2; + tmp = sipp[i]->str; + if ((j = StringCmp(tmp, buf)) > 0) + imax = i - 1; + else if (j < 0) + imin = i + 1; + else + { + num = i; + break; + } + } + + if (num < 0) /* couldn't find it */ + { + /* + Message(MSG_ERROR, "[1] Couldn't find [%s]", buf); + */ + SeqMgrUnlock(); + return NULL; + } + + + if (scope != NULL) /* check in scope */ + { + tbsp = (BioseqPtr)(sipp[num]->omdp->dataptr); + if (ObjMgrIsChild(scope->data.ptrvalue, tbsp)) + { + bsp = tbsp; + omdp = sipp[num]->omdp; + } + else + { /* not in scope, could be duplicate SeqId */ + i = num-1; + while ((i >= 0) && (bsp == NULL) && (! StringCmp(sipp[i]->str, buf))) /* back up */ + { + tbsp = (BioseqPtr)(sipp[i]->omdp->dataptr); + if (ObjMgrIsChild(scope->data.ptrvalue, tbsp)) + { + bsp = tbsp; + omdp = sipp[i]->omdp; + } + i--; + } + i = num + 1; + imax = smp->BioseqIndexCnt - 1; + while ((bsp == NULL) && (i <= imax) && (! StringCmp(sipp[i]->str, buf))) + { + tbsp = (BioseqPtr)(sipp[i]->omdp->dataptr); + if (ObjMgrIsChild(scope->data.ptrvalue, tbsp)) + { + bsp = tbsp; + omdp = sipp[i]->omdp; + } + i++; + } + } + } + else /* no scope set */ + { + omdp = sipp[num]->omdp; + bsp = (BioseqPtr)(omdp->dataptr); + } + + SeqMgrUnlock(); + + if (bsp == NULL) /* not found */ + { + /* + Message(MSG_ERROR, "[2] Couldn't find [%s]", buf); + */ + return bsp; + } + + ret = NlmMutexLockEx(&smp_cache_mutex); /* protect this section */ + if (ret) /* error */ + { + ErrPostEx(SEV_FATAL,0,0,"BioseqFindFunc2 cache mutex failed [%ld]", (long)ret); + return NULL; + } + + for (j = (BIOSEQ_CACHE_NUM - 1); j > 0; j--) /* shift to top of cache */ + { + omdp_cache[j] = omdp_cache[j-1]; + se_cache[j] = se_cache[j-1]; + } + omdp_cache[0] = omdp; + se_cache[0] = scope; + + NlmMutexUnlock(smp_cache_mutex); + + if (! reload_from_cache) + return bsp; + + omp = ObjMgrReadLock(); + omdp = ObjMgrFindTop(omp, omdp); + ObjMgrUnlock(); + if (omdp == NULL) + { + bsp = NULL; + goto ret; + } + if (omdp->tempload == TL_CACHED) + bsp = BioseqReloadFunc(sid, omdp); +ret: + return bsp; +} + +/***************************************************************************** +* +* SeqMgrFreeCache() +* frees all cached SeqEntrys +* returns FALSE if any errors occurred +* +*****************************************************************************/ +NLM_EXTERN Boolean LIBCALL SeqMgrFreeCache(void) +{ + return ObjMgrFreeCache(OBJ_SEQENTRY); +} + +/***************************************************************************** +* +* BioseqLockById(SeqIdPtr) +* Finds the Bioseq and locks it +* Makes sure appropriate BioseqContent is present +* +*****************************************************************************/ +NLM_EXTERN BioseqPtr LIBCALL BioseqLockById (SeqIdPtr sid) +{ + BioseqPtr bsp = NULL; + SeqMgrPtr smp; + SeqEntryPtr oldscope = NULL; + BSFetchTop bsftp; + Boolean fetch_on_lock; + + if (sid == NULL) return bsp; + + bsp = BioseqFindFunc(sid, TRUE); + if (bsp == NULL) + { + smp = SeqMgrReadLock(); + if (smp == NULL) return bsp; + fetch_on_lock = smp->fetch_on_lock; + bsftp = smp->bsfetch; + SeqMgrUnlock(); + + if (fetch_on_lock) + { + oldscope = SeqEntrySetScope (NULL); + if (oldscope != NULL) { + bsp = BioseqFindFunc(sid, TRUE); + SeqEntrySetScope (oldscope); + } + if (bsp == NULL && bsftp != NULL) + bsp = (*(bsftp))(sid, BSFETCH_TEMP); + } + } + + if (bsp == NULL) return NULL; + + ObjMgrLock(OBJ_BIOSEQ, (Pointer)bsp, TRUE); + return bsp; +} + +/***************************************************************************** +* +* BioseqUnlockById(SeqIdPtr sip) +* Frees a Bioseq to be dumped from memory if necessary +* +*****************************************************************************/ +NLM_EXTERN Boolean LIBCALL BioseqUnlockById (SeqIdPtr sip) +{ + BioseqPtr bsp; + + if (sip == NULL) return FALSE; + + bsp = BioseqFindFunc(sip, FALSE); + if (bsp == NULL) + return FALSE; + + ObjMgrLock(OBJ_BIOSEQ, (Pointer)bsp, FALSE); + return TRUE; +} + +/***************************************************************************** +* +* BioseqLock(BioseqPtr) +* Locks a Bioseq +* Any cached data is returned to memory +* +*****************************************************************************/ +NLM_EXTERN BioseqPtr LIBCALL BioseqLock (BioseqPtr bsp) +{ + if (bsp == NULL) return NULL; + + ObjMgrLock(OBJ_BIOSEQ, (Pointer)bsp, TRUE); + + return bsp; +} + +/***************************************************************************** +* +* BioseqUnlock(BioseqPtr) +* Frees a Bioseq to be dumped from memory if necessary +* +*****************************************************************************/ +NLM_EXTERN Boolean LIBCALL BioseqUnlock (BioseqPtr bsp) +{ + if (bsp == NULL) return FALSE; + + if (ObjMgrLock(OBJ_BIOSEQ, (Pointer)bsp, FALSE) >= 0) + return TRUE; + else + return FALSE; +} + +/***************************************************************************** +* +* BioseqFetch(SeqIdPtr, flag) +* loads bioseq into memory if possible +* first trys LocalLoad +* they trys EntrezLoad +* +*****************************************************************************/ +static BioseqPtr LIBCALLBACK BSFetchFunc (SeqIdPtr sid, Uint1 ld_type) +{ + BioseqPtr bsp = NULL; + ObjMgrProcPtr ompp; + OMProcControl ompc; + Int2 ret; + ObjMgrPtr omp; + + ompp = NULL; + while ((ompp = ObjMgrProcFindNext(NULL, OMPROC_FETCH, OBJ_SEQID, OBJ_BIOSEQ, ompp)) != NULL) + { + MemSet((Pointer)(&ompc), 0, sizeof(OMProcControl)); + ompc.input_data = sid; + ompc.proc = ompp; + ret = (* (ompp->func))((Pointer)&ompc); + switch (ret) + { + case OM_MSG_RET_ERROR: + ErrShow(); + break; + case OM_MSG_RET_DEL: + break; + case OM_MSG_RET_OK: + break; + case OM_MSG_RET_DONE: + if (ld_type == BSFETCH_TEMP) + { + omp = ObjMgrWriteLock(); + ObjMgrSetTempLoad (omp, ompc.output_data); + ObjMgrUnlock(); + } + bsp = (BioseqPtr)(ompc.output_data); + break; + default: + break; + } + if (bsp != NULL) /* got one */ + break; + } + + return bsp; +} + + +NLM_EXTERN BioseqPtr LIBCALL BioseqFetch (SeqIdPtr sid, Uint1 ld_type) +{ + BSFetchTop bsftp; + BioseqPtr bsp; + + bsp = BioseqFindFunc(sid, TRUE); + if (bsp != NULL) return bsp; + + bsftp = SeqMgrGetFetchTop(); + if (bsftp == NULL) return NULL; + + return (*(bsftp))(sid, ld_type); +} + +/***************************************************************************** +* +* GetGIForSeqId(SeqIdPtr) +* returns the GI for a SeqId +* returns 0 if can't find it +* +*****************************************************************************/ +NLM_EXTERN Int4 LIBCALL GetGIForSeqId (SeqIdPtr sid) +{ + BioseqPtr bsp = NULL; + ObjMgrProcPtr ompp; + OMProcControl ompc; + Int2 ret; + SeqIdPtr sip; + Int4 gi=0; + + + if (sid == NULL) + return gi; + + if (sid->choice == SEQID_GI) + return sid->data.intvalue; + + bsp = BioseqFindCore(sid); + if (bsp != NULL) + { + for (sip = bsp->id; sip != NULL; sip = sip->next) + { + if (sip->choice == SEQID_GI) + return sip->data.intvalue; + } + } + + + ompp = NULL; + while ((ompp = ObjMgrProcFindNext(NULL, OMPROC_FETCH, OBJ_SEQID, OBJ_SEQID, ompp)) != NULL) + { + if ((ompp->subinputtype == 0) && (ompp->suboutputtype == SEQID_GI)) + { + MemSet((Pointer)(&ompc), 0, sizeof(OMProcControl)); + ompc.input_data = sid; + ompc.proc = ompp; + ret = (* (ompp->func))((Pointer)&ompc); + switch (ret) + { + case OM_MSG_RET_ERROR: + ErrShow(); + break; + case OM_MSG_RET_DEL: + break; + case OM_MSG_RET_OK: + break; + case OM_MSG_RET_DONE: + sip = (SeqIdPtr)(ompc.output_data); + if (sip != NULL) + { + if (sip->choice == SEQID_GI) + { + gi = sip->data.intvalue; + SeqIdFree(sip); + return gi; + } + SeqIdFree(sip); + } + break; + default: + break; + } + } + } + + return gi; +} + + +/***************************************************************************** +* +* GetSeqIdForGI(Int4) +* returns the SeqId for a GI +* returns NULL if can't find it +* The returned SeqId is allocated. Caller must free it. +* +*****************************************************************************/ +typedef struct seqidblock { + Int4 uid; + SeqIdPtr sip; +} SeqIdBlock, PNTR SeqIdBlockPtr; + +static ValNodePtr seqidgicache = NULL; +static ValNodePtr PNTR seqidgiarray = NULL; +static Int2 seqidcount = 0; +static TNlmRWlock sid_RWlock = NULL; + +static void RecordInSeqIdGiCache (Int4 gi, SeqIdPtr sip) + +{ + ValNodePtr vnp, tmp; + ValNodePtr PNTR prev; + SeqIdBlockPtr sibp; + Int4 retval; + + /* if (sip == NULL) return; okay to cache NULL because we protect against SeqIdDup */ + + retval = NlmRWwrlock(sgi_RWlock); + if (retval != 0) + { + ErrPostEx(SEV_ERROR,0,0,"RecSeqIdGi: RWwrlock error [%ld]", + (long)retval); + return; + } + + + vnp = ValNodeNew (NULL); + if (vnp == NULL) goto ret; + sibp = (SeqIdBlockPtr) MemNew (sizeof (SeqIdBlock)); + if (sibp == NULL) { + MemFree (vnp); + goto ret; + } + + sibp->uid = gi; + if (sip != NULL) { + sibp->sip = SeqIdDup (sip); + } + vnp->data.ptrvalue = (Pointer) sibp; + + if (seqidgicache == NULL) { + seqidgicache = vnp; + goto ret; + } + + seqidgiarray = MemFree (seqidgiarray); + + prev = (ValNodePtr PNTR) (&seqidgicache); + tmp = seqidgicache; + while (tmp != NULL) { + sibp = (SeqIdBlockPtr) tmp->data.ptrvalue; + if (sibp != NULL) { + if (sibp->uid > gi) { + if (prev != NULL) { + vnp->next = *prev; + *prev = vnp; + } + goto ret; + } else if (sibp->uid == gi) { + goto ret; + } else { + prev = (ValNodePtr PNTR) (& (tmp->next)); + } + } else { + prev = (ValNodePtr PNTR) (& (tmp->next)); + } + tmp = tmp->next; + } + if (prev != NULL) { + vnp->next = *prev; + *prev = vnp; + } +ret: + retval = NlmRWunlock(sgi_RWlock); + if (retval != 0) + { + ErrPostEx(SEV_ERROR,0,0,"RecSeqIdGiUnlock: RWunlock error [%ld]", + (long)retval); + } + return; +} + +static Boolean FetchFromSeqIdGiCache (Int4 gi, SeqIdPtr PNTR sipp) + +{ + ValNodePtr vnp; + SeqIdBlockPtr sibp = NULL; + Int2 i; + Int2 left, right, mid; + Int4 compare, ret; + Boolean done = FALSE; + + if (sipp == NULL) return done; + *sipp = NULL; + if (seqidgicache == NULL) return done; + + if (seqidgiarray == NULL) { + ret = NlmRWwrlock(sgi_RWlock); + if (ret != 0) + { + ErrPostEx(SEV_ERROR,0,0,"SeqIdGi: RWwrlock error [%ld]", + (long)ret); + return done; + } + + if (seqidgiarray == NULL) + { + seqidcount = (Int2) ValNodeLen (seqidgicache); + seqidgiarray = MemNew (sizeof (ValNodePtr) * (size_t) (seqidcount + 1)); + if (seqidgiarray != NULL) { + i = 0; + for (vnp = seqidgicache; vnp != NULL; vnp = vnp->next) { + seqidgiarray [i] = vnp; + i++; + } + } + } + ret = NlmRWunlock(sgi_RWlock); + if (ret != 0) + { + ErrPostEx(SEV_ERROR,0,0,"SeqIdGiUnlock: RWunlock error [%ld]", + (long)ret); + return done; + } + + } + + ret = NlmRWrdlock(sgi_RWlock); + if (ret != 0) + { + ErrPostEx(SEV_ERROR,0,0,"SeqIdGi: RWrdlock error [%ld]", + (long)ret); + return done; + } + + if (seqidgiarray != NULL) { + left = 1; + right = seqidcount; + while (left <= right) { + mid = (left + right) / 2; + compare = 0; + vnp = seqidgiarray [mid - 1]; + if (vnp != NULL) { + sibp = (SeqIdBlockPtr) vnp->data.ptrvalue; + if (sibp != NULL) { + compare = gi - sibp->uid; + } + } + if (compare <= 0) { + right = mid - 1; + } + if (compare >= 0) { + left = mid + 1; + } + } + if (left > right + 1 && sibp != NULL) { + if (sibp->sip != NULL) { + *sipp = SeqIdDup (sibp->sip); + } + done = TRUE; + } + } + + + ret = NlmRWunlock(sgi_RWlock); + if (ret != 0) + { + ErrPostEx(SEV_ERROR,0,0,"SeqIdGiUnlock: RWunlock error [%ld]", + (long)ret); + } + + return done; +} + +NLM_EXTERN SeqIdPtr LIBCALL GetSeqIdForGI (Int4 gi) +{ + BioseqPtr bsp = NULL; + ObjMgrProcPtr ompp; + OMProcControl ompc; + Int2 ret; + SeqIdPtr sip, sip2=NULL, other=NULL, gb=NULL; + ValNode vn; + + + if (gi <= 0) + return sip2; + + vn.choice = SEQID_GI; + vn.data.intvalue = gi; + vn.next = NULL; + + bsp = BioseqFindCore(&vn); + if (bsp != NULL) + { + for (sip = bsp->id; sip != NULL; sip = sip->next) + { + switch (sip->choice) + { + case SEQID_LOCAL: /* object id */ + case SEQID_GIBBSQ: + case SEQID_GIBBMT: + case SEQID_PATENT: + case SEQID_GENERAL: + other = sip; + break; + case SEQID_GI: + break; + case SEQID_GENBANK: + case SEQID_EMBL: + case SEQID_PIR: + case SEQID_SWISSPROT: + case SEQID_DDBJ: + case SEQID_PRF: + case SEQID_PDB: + gb = sip; + break; + default: + if (other == NULL) + other = sip; + break; + } + } + } + + + if (gb != NULL) + sip2 = gb; + else if (other != NULL) + sip2 = other; + + if (sip2 != NULL) + return SeqIdDup(sip2); + + if (FetchFromSeqIdGiCache (gi, &sip2)) { + return sip2; + } + + ompp = NULL; + while ((ompp = ObjMgrProcFindNext(NULL, OMPROC_FETCH, OBJ_SEQID, OBJ_SEQID, ompp)) != NULL) + { + if ((ompp->subinputtype == SEQID_GI) && (ompp->suboutputtype == 0)) + { + MemSet((Pointer)(&ompc), 0, sizeof(OMProcControl)); + ompc.input_data = &vn; + ompc.proc = ompp; + ret = (* (ompp->func))((Pointer)&ompc); + switch (ret) + { + case OM_MSG_RET_ERROR: + ErrShow(); + break; + case OM_MSG_RET_DEL: + break; + case OM_MSG_RET_OK: + break; + case OM_MSG_RET_DONE: + sip2 = (SeqIdPtr)(ompc.output_data); + if (sip2 != NULL) + { + RecordInSeqIdGiCache (gi, sip2); + return sip2; + } + break; + default: + break; + } + } + } + + RecordInSeqIdGiCache (gi, sip2); + return sip2; +} + +/***************************************************************************** +* +* SeqEntryFind(sip) +* returns top level seqentry for sip +* +*****************************************************************************/ +NLM_EXTERN SeqEntryPtr LIBCALL SeqEntryFind (SeqIdPtr sid) +{ + BioseqPtr bsp; + ObjMgrDataPtr omdp; + ObjMgrDataPtr PNTR omdpp; + SeqEntryPtr result=NULL; + SeqSubmitPtr ssp; + Int2 i; + ObjMgrPtr omp; + + bsp = BioseqFind(sid); + if (bsp == NULL) return result; + + omp = ObjMgrReadLock(); + omdpp = omp->datalist; + + i = ObjMgrLookup(omp, (Pointer)bsp); + omdp = omdpp[i]; + while (omdp->parentptr != NULL) + { + i = ObjMgrLookup(omp, (omdp->parentptr)); + omdp = omdpp[i]; + } + + if (omdp->datatype == OBJ_SEQSUB) { + ssp = (SeqSubmitPtr) omdp->dataptr; + if (ssp != NULL && ssp->datatype == 1) { + result = (SeqEntryPtr) ssp->data; + } + } else { + result = omdp->choice; + } + ObjMgrUnlock(); + return result; +} + +/***************************************************************************** +* +* BioseqContextPtr BioseqContextNew (bsp) +* +*****************************************************************************/ +NLM_EXTERN BioseqContextPtr LIBCALL BioseqContextNew (BioseqPtr bsp) +{ + ObjMgrDataPtr omdp; + ObjMgrDataPtr PNTR omdpp; + Int2 i, ctr=0; + SeqEntryPtr seps[BIOSEQCONTEXTMAX]; + BioseqContextPtr bcp; + ObjMgrPtr omp; + + if (bsp == NULL) + return NULL; + + + bcp = MemNew(sizeof(BioseqContext)); + bcp->bsp = bsp; + bcp->se.choice = 1; /* bioseq */ + bcp->se.data.ptrvalue = bsp; + + omp = ObjMgrReadLock(); + if (omp == NULL) return BioseqContextFree(bcp); + omdpp = omp->datalist; + + i = ObjMgrLookup(omp, (Pointer)bsp); + omdp = omdpp[i]; + + if (omdp->choice != NULL) + { + seps[ctr] = omdp->choice; + ctr++; + + while (omdp->parentptr != NULL) + { + i = ObjMgrLookup(omp, (omdp->parentptr)); + omdp = omdpp[i]; + if (omdp->choice != NULL) + { + if (ctr == BIOSEQCONTEXTMAX) + ErrPostEx(SEV_ERROR, 0,0, "BioseqContextNew: more than %d levels",(int)ctr); + else + { + seps[ctr] = omdp->choice; + ctr++; + } + } + } + + bcp->count = ctr; + for (i = 0; i < bcp->count; i++) + { + ctr--; + bcp->context[i] = seps[ctr]; + } + } + + if (omdp->tempload == TL_CACHED) + { + ErrPostEx(SEV_ERROR,0,0,"BioseqContextNew: bsp is TL_CACHED"); + bcp = BioseqContextFree(bcp); + } + + ObjMgrUnlock(); + + return bcp; +} + +/***************************************************************************** +* +* BioseqContextFree(bcp) +* +*****************************************************************************/ +NLM_EXTERN BioseqContextPtr LIBCALL BioseqContextFree(BioseqContextPtr bcp) +{ + return MemFree(bcp); +} + +/***************************************************************************** +* +* BioseqContextGetSeqDescr(bcp, type, curr, SeqEntryPtr PNTR sep) +* returns pointer to the next SeqDescr of this type +* type gives type of Seq-descr +* if (type == 0) +* get them all +* curr is NULL or previous node of this type found +* moves up from bsp +* if (sep != NULL) sep set to SeqEntryPtr containing the SeqDescr. +* +*****************************************************************************/ +NLM_EXTERN ValNodePtr LIBCALL BioseqContextGetSeqDescr (BioseqContextPtr bcp, Int2 type, ValNodePtr curr, SeqEntryPtr PNTR the_sep) /* the last one you used */ +{ + Int2 i; + ValNodePtr tmp; + Boolean found = FALSE; + BioseqPtr bsp; + BioseqSetPtr bssp; + + if (bcp == NULL) return NULL; + + if (the_sep != NULL) + *the_sep = NULL; + + if (bcp->count == 0) /* just a Bioseq */ + { + tmp = BioseqGetSeqDescr(bcp->bsp, type, curr); + if (the_sep != NULL) *the_sep = bcp->context[1]; + return tmp; + } + + i = bcp->count - 1; + if (curr != NULL) /* find where we are */ + { + while ((i >= 0) && (! found)) + { + if (IS_Bioseq(bcp->context[i])) + { + bsp = (BioseqPtr)((bcp->context[i])->data.ptrvalue); + tmp = bsp->descr; + } + else + { + bssp = (BioseqSetPtr)((bcp->context[i])->data.ptrvalue); + tmp = bssp->descr; + } + while ((tmp != curr) && (tmp != NULL)) + tmp = tmp->next; + if (tmp == curr) + { + found = TRUE; + tmp = tmp->next; + } + else + i--; + } + if (! found) /* can't find it! */ + return NULL; + } + else /* get first one */ + { + tmp = bcp->bsp->descr; + } + + while (i >= 0) + { + while (tmp != NULL) + { + if ((! type) || ((Int2)(tmp->choice) == type)) + { + if (the_sep != NULL) *the_sep = bcp->context[i]; + return tmp; + } + tmp = tmp->next; + } + i--; + if (i >= 0) + { + if (IS_Bioseq(bcp->context[i])) + { + bsp = (BioseqPtr)((bcp->context[i])->data.ptrvalue); + tmp = bsp->descr; + } + else + { + bssp = (BioseqSetPtr)((bcp->context[i])->data.ptrvalue); + tmp = bssp->descr; + } + } + } + return NULL; +} + +/***************************************************************************** +* +* BioseqContextGetSeqFeat(bcp, type, curr, sapp) +* returns pointer to the next Seq-feat of this type +* type gives type of Seq-descr +* if (type == 0) +* get them all +* curr is NULL or previous node of this type found +* moves up from bsp +* if (sapp != NULL) is filled with SeqAnnotPtr containing the SeqFeat +* in: +* 0 = sfp->location only +* 1 = sfp->product only +* 2 = either of above +* +*****************************************************************************/ +NLM_EXTERN SeqFeatPtr LIBCALL BioseqContextGetSeqFeat (BioseqContextPtr bcp, Int2 type, + SeqFeatPtr curr, SeqAnnotPtr PNTR sapp, Int2 in) /* the last one you used */ +{ + SeqEntryPtr sep; + + if (bcp == NULL) return NULL; + + if (sapp != NULL) + *sapp = NULL; + + if (bcp->count == 0) /* just a BioseqSeq */ + sep = &(bcp->se); + else + sep = bcp->context[0]; + + return SeqEntryGetSeqFeat (sep, type, curr, sapp, in, bcp->bsp); +} + +typedef struct smgetseqfeat { + Boolean hit; + SeqFeatPtr last, + this; + SeqAnnotPtr sap; + SeqLocPtr slp1, slp2; + Int2 in, type; +} SMGetSeqFeat, PNTR GetSeqFeatPtr; + +NLM_EXTERN void GetSeqFeatCallback (SeqEntryPtr sep, Pointer data, Int4 index, Int2 indent); + +/***************************************************************************** +* +* SeqEntryGetSeqFeat(sep, type, curr, sapp) +* returns pointer to the next Seq-feat of this type +* type gives type of SeqFeat +* if (type == 0) +* get them all +* curr is NULL or previous node of this type found +* moves up from bsp +* if (sapp != NULL) is filled with SeqAnnotPtr containing the SeqFeat +* if (bsp != NULL) then for that Bioseq match on location by "in" +* in: +* 0 = sfp->location only +* 1 = sfp->product only +* 2 = either of above +* +*****************************************************************************/ +NLM_EXTERN SeqFeatPtr LIBCALL SeqEntryGetSeqFeat (SeqEntryPtr sep, Int2 type, + SeqFeatPtr curr, SeqAnnotPtr PNTR sapp, Int2 in, BioseqPtr bsp) /* the last one you used */ +{ + SMGetSeqFeat gsf; + ValNode vn1, vn2; + + if (sep == NULL) return NULL; + + if (sapp != NULL) + *sapp = NULL; + + if (curr == NULL) + gsf.hit = TRUE; + else + gsf.hit = FALSE; + gsf.last = curr; + gsf.this = NULL; + gsf.sap = NULL; + gsf.type = type; + gsf.in = in; + if (bsp != NULL) /* matching by Bioseq */ + { + if ((bsp->repr == Seq_repr_seg) || (bsp->repr == Seq_repr_ref)) + { + vn2.choice = SEQLOC_MIX; + vn2.data.ptrvalue = bsp->seq_ext; + gsf.slp2 = (SeqLocPtr)(&vn2); + } + else + gsf.slp2 = NULL; + + vn1.choice = SEQLOC_WHOLE; + vn1.data.ptrvalue = (Pointer) SeqIdFindBest (bsp->id, 0); + gsf.slp1 = (SeqLocPtr)(&vn1); + } + else + gsf.slp1 = NULL; + + SeqEntryExplore (sep, (Pointer)(&gsf), GetSeqFeatCallback); + + if (sapp != NULL) + *sapp = gsf.sap; + + return gsf.this; +} + +NLM_EXTERN void GetSeqFeatCallback (SeqEntryPtr sep, Pointer data, Int4 index, Int2 indent) +{ + GetSeqFeatPtr gsfp; + BioseqPtr bsp; + BioseqSetPtr bssp; + SeqAnnotPtr sap; + SeqFeatPtr sfp, last; + Boolean hit, gotit = FALSE; + Uint1 type; + SeqLocPtr slp1, slp2, slp; + Int2 i, in, retval; + + gsfp = (GetSeqFeatPtr)data; + if (gsfp->this != NULL) /* got it */ + return; + + last = gsfp->last; + hit = gsfp->hit; + type = (Uint1)(gsfp->type); + if (gsfp->slp1 != NULL) /* matching by Bioseq */ + { + slp1 = gsfp->slp1; + slp2 = gsfp->slp2; + in = gsfp->in; + } + else + slp1 = NULL; + + if (IS_Bioseq(sep)) + { + bsp = (BioseqPtr)(sep->data.ptrvalue); + sap = bsp->annot; + } + else + { + bssp = (BioseqSetPtr)(sep->data.ptrvalue); + sap = bssp->annot; + } + + while (sap != NULL) + { + if (sap->type == 1) /* feature table */ + { + for (sfp = (SeqFeatPtr)(sap->data); sfp != NULL; sfp = sfp->next) + { + if (! hit) /* still looking */ + { + if (sfp == last) + { + hit = TRUE; + gsfp->hit = TRUE; + } + } + else + { + if ((! type) || (type == sfp->data.choice)) + { + if (slp1 != NULL) /* look for feats on a bioseq */ + { + for (i = 0; i < 2; i++) + { + if ((i == 0) && (in != 1)) + slp = sfp->location; + else if ((i==1) && (in != 0)) + slp = sfp->product; + else + slp = NULL; + if (slp != NULL) + { + retval = SeqLocCompare(slp, slp1); + if (retval) + { + gotit = TRUE; + break; + } + + if (slp2 != NULL) + { + retval = SeqLocCompare(slp, slp2); + if (retval) + { + gotit = TRUE; + break; + } + } + } + } + } + else + gotit = TRUE; + if (gotit) + { + gsfp->this = sfp; + gsfp->sap = sap; + return; + } + } + } + } + } + + sap = sap->next; + } + + return; +} + +/***************************************************************************** +* +* BioseqContextGetTitle(bcp) +* returns first title for Bioseq in this context +* +*****************************************************************************/ +NLM_EXTERN CharPtr LIBCALL BioseqContextGetTitle(BioseqContextPtr bcp) +{ + CharPtr title = NULL; + ValNodePtr vnp; + + vnp = BioseqContextGetSeqDescr(bcp, Seq_descr_title, NULL, NULL); + if (vnp != NULL) + title = (CharPtr)vnp->data.ptrvalue; + return title; +} + +/***************************************************************************** +* +* SeqMgr Functions +* +*****************************************************************************/ + +/***************************************************************************** +* +* SeqMgrSeqEntry(type, data, sep) +* Adds the SeqEntryPtr pointing directly to this Bioseq or BioseqSet +* +*****************************************************************************/ +NLM_EXTERN Boolean LIBCALL SeqMgrSeqEntry (Uint1 type, Pointer data, SeqEntryPtr sep) +{ + return ObjMgrSetChoice (OBJ_SEQENTRY, sep, data); +} + +/***************************************************************************** +* +* SeqMgrGetSeqEntryForData(data) +* returns SeqEntryPtr for a BioseqPtr or BioseqSetPtr +* sep must have been put in SeqMgr using SeqMgrSeqEntry +* the Bioseq/BioseqSets it is a part of must also be in SeqMgr +* returns NULL on failure. +* +*****************************************************************************/ +NLM_EXTERN SeqEntryPtr LIBCALL SeqMgrGetSeqEntryForData (Pointer data) +{ + return ObjMgrGetChoiceForData(data); +} + +/***************************************************************************** +* +* SeqMgrGetEntityIDForSeqEntry(sep) +* returns the EntityID for a SeqEntryPtr +* sep must have been put in SeqMgr using SeqMgrSeqEntry +* the Bioseq/BioseqSets it is a part of must also be in SeqMgr +* This function will move up to the top of the SeqEntry tree it may be +* in. If top level EntityID is 0, one is assigned at this point. +* If an element is moved under a different hierarchy, its EntityID will +* change. +* returns 0 on failure. +* +*****************************************************************************/ +NLM_EXTERN Int2 LIBCALL SeqMgrGetEntityIDForSeqEntry (SeqEntryPtr sep) +{ + return ObjMgrGetEntityIDForChoice (sep); +} + +/***************************************************************************** +* +* SeqMgrGetSeqEntryForEntityID (id) +* +*****************************************************************************/ +NLM_EXTERN SeqEntryPtr LIBCALL SeqMgrGetSeqEntryForEntityID (Int2 id) +{ + return ObjMgrGetChoiceForEntityID (id); +} + +/***************************************************************************** +* +* SeqMgrSetBSFetchTop (fetch, data) +* sets the BSFetchTop routine to "fetch" +* returns previous value +* set to NULL to turn off all fetching for that type +* +* Current value can be called directly as BioseqFetch(); +* Default is +* 1) looks in memory +* 2) looks locally if LocalBSFetch is set +* 3) looks remotely if RemoteBSFetch is set +* +*****************************************************************************/ +NLM_EXTERN BSFetchTop LIBCALL SeqMgrSetBSFetchTop (BSFetchTop fetch, Pointer data) +{ + SeqMgrPtr smp; + BSFetchTop tmp = NULL; + + smp = SeqMgrWriteLock(); + if (smp == NULL) return tmp; + + tmp = smp->bsfetch; + smp->bsfetch = fetch; + smp->TopData = data; + SeqMgrUnlock(); + return tmp; +} + +/***************************************************************************** +* +* SeqMgrSetFetchOnLock(value) +* if value = TRUE, manager will try to fetch the bioseq if not in +* memory, when BioseqLock is called +* if FALSE, BioseqLock will only look in memory +* returns previous value of fetch_on_lock +* default is to fetch_on_lock +* +*****************************************************************************/ +NLM_EXTERN Boolean LIBCALL SeqMgrSetFetchOnLock (Boolean value) +{ + Boolean tmp=FALSE; + SeqMgrPtr smp; + + smp = SeqMgrWriteLock(); + if (smp == NULL) return tmp; + + tmp = smp->fetch_on_lock; + smp->fetch_on_lock = value; + SeqMgrUnlock(); + return tmp; +} + +/***************************************************************************** +* +* SeqMgrLinkSeqEntry(sep, parenttype, parentptr) +* connects all component seq-entries by traversing the linked list +* all calling SeqMgrConnect and SeqMgrSeqEntry appropriately +* if parenttype != 0, then assumes seqentry is contained in parentptr +* and should be connected to it +* +*****************************************************************************/ +NLM_EXTERN Boolean LIBCALL SeqMgrLinkSeqEntry (SeqEntryPtr sep, Uint2 parenttype, Pointer parentptr) +{ + SeqEntryPtr sep2; + BioseqSetPtr bssp; + Uint2 the_type; + + if (sep == NULL) + return FALSE; + + if (IS_Bioseq(sep)) + the_type = OBJ_BIOSEQ; + else + the_type = OBJ_BIOSEQSET; + + SeqMgrSeqEntry((Uint1)the_type, sep->data.ptrvalue, sep); + + /**** if (parenttype != 0) ****/ + ObjMgrConnect(the_type, sep->data.ptrvalue, parenttype, parentptr); + + if (! IS_Bioseq(sep)) + { + bssp = (BioseqSetPtr)(sep->data.ptrvalue); + for (sep2 = bssp->seq_set; sep2 != NULL; sep2 = sep2->next) + { + SeqMgrLinkSeqEntry (sep2, the_type, sep->data.ptrvalue); + } + } + return TRUE; +} +/***************************************************************************** +* +* Selection Functions for data objects based on SeqLoc +* See also general selection in objmgr.h +* +*****************************************************************************/ + +/***************************************************************************** +* +* SeqMgrSelect(region) +* region is a SeqLocPtr +* It can only apply to one Bioseq +* selected area will be extreme left and right ends +* fuzziness is ignored +* if something else selected, deselects it first, then selects requested +* item +* to select without deselecting something else, use SeqMgrAlsoSelect() +* returns TRUE if item is now currently selected, FALSE if not +* "region" is always copied. Caller is responsible for managment of +* SeqLoc that is passed in. +* +*****************************************************************************/ +NLM_EXTERN Boolean LIBCALL SeqMgrSelect (SeqLocPtr region) +{ + return SeqMgrGenericSelect(region, 1, NULL); +} + +NLM_EXTERN Boolean LIBCALL SeqMgrAlsoSelect (SeqLocPtr region) +{ + return SeqMgrGenericSelect(region, 2, NULL); +} + +/***************************************************************************** +* +* SeqMgrDeSelect(region) +* if this item was selected, then deselects and returns TRUE +* else returns FALSE +* +*****************************************************************************/ +NLM_EXTERN Boolean LIBCALL SeqMgrDeSelect (SeqLocPtr region) +{ + return SeqMgrGenericSelect(region, 3, NULL); +} + +/***************************************************************************** +* +* SeqMgrSetColor(region, rgb) +* +*****************************************************************************/ +NLM_EXTERN Boolean LIBCALL SeqMgrSetColor (SeqLocPtr region, Uint1Ptr rgb) +{ + if (rgb == NULL) return FALSE; + return SeqMgrGenericSelect(region, 4, rgb); +} + +static Boolean NEAR SeqMgrGenericSelect (SeqLocPtr region, Int2 type, + Uint1Ptr rgb) +{ + SeqInt si; + ValNode vn; + SeqIdPtr sip; + Uint2 entityID, itemID; + + if (region == NULL) return FALSE; + + sip = SeqLocId(region); + if (sip == NULL) return FALSE; + + entityID = BioseqFindEntity(sip, &itemID); + if (entityID == 0) return FALSE; + + MemSet((Pointer)(&si), 0, sizeof(SeqInt)); + MemSet((Pointer)(&vn), 0, sizeof(ValNode)); + + si.id = sip; + si.from = SeqLocStart(region); + si.to = SeqLocStop(region); + si.strand = SeqLocStrand(region); + + if ((si.from < 0) || (si.to < 0) || (si.from > si.to)) return FALSE; + + vn.choice = SEQLOC_INT; + vn.data.ptrvalue = &si; + + switch (type) + { + case 1: + return ObjMgrSelect(entityID, itemID, OBJ_BIOSEQ, OM_REGION_SEQLOC, &vn); + case 2: + return ObjMgrAlsoSelect(entityID, itemID, OBJ_BIOSEQ, OM_REGION_SEQLOC, &vn); + case 3: + return ObjMgrDeSelect(entityID, itemID, OBJ_BIOSEQ, OM_REGION_SEQLOC, &vn); + case 4: + return ObjMgrSetColor(entityID, itemID, OBJ_BIOSEQ, + OM_REGION_SEQLOC, &vn, rgb); + default: + break; + } + + return FALSE; +} + +/***************************************************************************** +* +* SpreadGapsInDeltaSeq(BioseqPtr bsp) +* bsp must be a delta seq +* function counts deltas with known lengths ( = known_len) +* counts deltas which are gaps of unknown length ( = unk_count) +* these can delta of length 0, delta with fuzz = lim (unk), +* or SEQLOC_NULL +* converts all unknown gaps to delta with fuzz = lim(unk) +* sets length of all unknown gaps to +* (bsp->length - known_len)/unk_count +* any reminder spread over first few gaps +* +*****************************************************************************/ +NLM_EXTERN Boolean LIBCALL SpreadGapsInDeltaSeq (BioseqPtr bsp) +{ + Boolean retval = FALSE; + Int4 known_len = 0, + total_gap, gap_len, + unk_count = 0, + remainder; + DeltaSeqPtr dsp; + SeqLocPtr slocp; + SeqLitPtr slp; + IntFuzzPtr ifp; + + if (bsp == NULL) return retval; + if ((bsp->repr != Seq_repr_delta) || (bsp->seq_ext == NULL)) + return retval; + + retval = TRUE; /* can function */ + + for (dsp = (DeltaSeqPtr)(bsp->seq_ext); dsp != NULL; dsp = dsp->next) + { + switch (dsp->choice) + { + case 1: /* SeqLocPtr */ + slocp = (SeqLocPtr)(dsp->data.ptrvalue); + if (slocp == NULL) break; + if (slocp->choice == SEQLOC_NULL) /* convert it */ + { + SeqLocFree(slocp); + slp = SeqLitNew(); + dsp->choice = 2; + dsp->data.ptrvalue = slp; + ifp = IntFuzzNew(); + slp->fuzz = ifp; + ifp->choice = 4; /* lim - type unk */ + unk_count++; + } + else /* count length */ + known_len += SeqLocLen(slocp); + break; + case 2: /* SeqLitPtr */ + slp = (SeqLitPtr)(dsp->data.ptrvalue); + if (slp == NULL) break; + if (slp->seq_data != NULL) /* not a gap */ + { + known_len += slp->length; + break; + } + ifp = slp->fuzz; + if (slp->length == 0) /* unknown length */ + { + unk_count++; + if (ifp != NULL) + { + if (ifp->choice != 4) /* not lim */ + ifp = IntFuzzFree(ifp); + else if (ifp->a != 0) /* not unk */ + ifp = IntFuzzFree(ifp); + } + if (ifp == NULL) + { + ifp = IntFuzzNew(); + ifp->choice = 4; /* lim - unk */ + slp->fuzz = ifp; + } + } + else /* gap length was set */ + { + if (ifp == NULL) /* no fuzz - count length */ + known_len += slp->length; + else /* might be a guess */ + { + if ((ifp->choice == 4) && (ifp->a == 0)) /* lim - unk */ + unk_count++; + else + known_len += slp->length; + } + } + break; + default: + break; + } + + } + + if (unk_count == 0) /* no unknown gaps */ + return retval; + + total_gap = bsp->length - known_len; + if (total_gap < 0) + total_gap = 0; + gap_len = total_gap / unk_count; + remainder = total_gap - (gap_len * unk_count); + + for (dsp = (DeltaSeqPtr)(bsp->seq_ext); dsp != NULL; dsp = dsp->next) + { + switch (dsp->choice) + { + case 1: /* SeqLocPtr */ + break; + case 2: /* SeqLitPtr */ + slp = (SeqLitPtr)(dsp->data.ptrvalue); + if (slp == NULL) break; + if (slp->seq_data != NULL) break; + ifp = slp->fuzz; + if (ifp == NULL) break; + if ((ifp->choice != 4) || (ifp->a != 0)) + break; + slp->length = gap_len; + if (remainder) + { + slp->length++; + remainder--; + } + break; + default: + break; + } + } + + return retval; +} + +/***************************************************************************** +* +* CountGapsInDeltaSeq(BioseqPtr bsp, &num_segs, &num_gaps, &known_residues, &num_gaps_faked) +* bsp must be a delta seq +* function counts deltas and returns a profile +* num_segs = total number of segments +* num_gaps = total number of segments representing gaps +* known_residues = number of real residues in the sequence (not gaps) +* num_gaps_faked = number of gaps where real length is not known, but where +* a length was guessed by spreading the total gap length +* out over all gaps evenly. +* +* NOTE: any of these pointers except bsp can be NULL +* +* returns TRUE if values in argument were set. +* +*****************************************************************************/ +NLM_EXTERN Boolean LIBCALL CountGapsInDeltaSeq (BioseqPtr bsp, Int4Ptr num_segs, Int4Ptr num_gaps, Int4Ptr known_residues, Int4Ptr num_gaps_faked, CharPtr buf, Int2 buflen) +{ + Boolean retval = FALSE; + Int4 residues = 0, + segs = 0, + gaps = 0, + len = 0, + fake_gaps = 0, + from = 0, + tlen = 0; + DeltaSeqPtr dsp; + SeqLocPtr slocp; + SeqLitPtr slp; + IntFuzzPtr ifp; + Boolean unk; + static Char tmp[128]; + Int2 diff; + + if (bsp == NULL) return retval; + if ((bsp->repr != Seq_repr_delta) || (bsp->seq_ext == NULL)) + return retval; + + retval = TRUE; /* can function */ + + + for (dsp = (DeltaSeqPtr)(bsp->seq_ext); dsp != NULL; dsp = dsp->next) + { + segs++; + from = len + 1; + switch (dsp->choice) + { + case 1: /* SeqLocPtr */ + slocp = (SeqLocPtr)(dsp->data.ptrvalue); + if (slocp == NULL) break; + if (slocp->choice == SEQLOC_NULL) /* gap */ + { + gaps++; + sprintf(tmp, "* %ld %ld gap of unknown length~", from, len); + diff = LabelCopy(buf, tmp, buflen); + buflen -= diff; + buf += diff; + } + else { /* count length */ + residues += SeqLocLen(slocp); + if (buf != NULL) { + tlen = SeqLocLen(slocp); + len += tlen; + sprintf(tmp, "* %8ld %8ld: contig of %ld bp in length~", from, len, tlen); + diff = LabelCopy(buf, tmp, buflen); + buflen -= diff; + buf += diff; + } + } + break; + case 2: /* SeqLitPtr */ + slp = (SeqLitPtr)(dsp->data.ptrvalue); + if (slp == NULL) break; + tlen = slp->length; + len += tlen; + if (slp->seq_data != NULL) + { + residues += slp->length; + if (buf) { + sprintf(tmp, "* %8ld %8ld: contig of %ld bp in length~", from, len, tlen); + diff = LabelCopy(buf, tmp, buflen); + buflen -= diff; + buf += diff; + } + } + else + { + unk = FALSE; + gaps++; + ifp = slp->fuzz; + if (ifp != NULL) + { + if ((ifp->choice == 4) && (ifp->a == 0)) { + unk = TRUE; + fake_gaps++; + if (buf) { + if (from > len) { + sprintf(tmp, "* gap of unknown length~"); + } else { + sprintf(tmp, "* %8ld %8ld: gap of unknown length~", from, len); + } + diff = LabelCopy(buf, tmp, buflen); + buflen -= diff; + buf += diff; + } + } + } + if (!unk && buf) { + sprintf(tmp, "* %8ld %ld: gap of %8ld bp~", from, len, tlen); + diff = LabelCopy(buf, tmp, buflen); + buflen -= diff; + buf += diff; + } + } + break; + default: + break; + } + } + + if (num_segs != NULL) + *num_segs = segs; + if (num_gaps != NULL) + *num_gaps = gaps; + if (known_residues != NULL) + *known_residues = residues; + if (num_gaps_faked != NULL) + *num_gaps_faked = fake_gaps; + + return retval; +} + + +/***************************************************************************** +* +* SeqMgrAdd(type, data) +* adds a Bioseq or BioseqSet to the sequence manager +* +*****************************************************************************/ +NLM_EXTERN Boolean LIBCALL SeqMgrAdd (Uint2 type, Pointer data) +{ + Boolean retval; + + retval = ObjMgrAdd(type, data); + if (type != OBJ_BIOSEQ) + return retval; + + SeqMgrAddToBioseqIndex((BioseqPtr)data); + + return retval; + +} + + +/***************************************************************************** +* +* SeqMgrDelete(type, data) +* deletes a Bioseq or BioseqSet from the sequence manager +* +*****************************************************************************/ +NLM_EXTERN Boolean LIBCALL SeqMgrDelete (Uint2 type, Pointer data) +{ + if (type == OBJ_BIOSEQ) /* remove id indexes */ + SeqMgrDeleteFromBioseqIndex((BioseqPtr)data); + + return ObjMgrDelete(type, data); +} + +static Boolean NEAR SeqMgrAddIndexElement(SeqMgrPtr smp, BioseqPtr bsp, CharPtr buf) +{ + SeqIdIndexElementPtr sip, PNTR sipp; + SeqIdIndexBlockPtr sibp, prev; + Int4 imin, imax, i, j; + CharPtr tmp, newstr; + ObjMgrDataPtr omdp; + ObjMgrPtr omp; + + omp = ObjMgrReadLock(); + omdp = ObjMgrFindByData(omp, (Pointer)bsp); /* caching protection */ + ObjMgrUnlock(); + if (omdp == NULL) + { + return FALSE; + } + + sipp = smp->BioseqIndex; + if (smp->BioseqIndexCnt >= smp->BioseqIndexNum) /* expand space */ + { + prev = NULL; + for (sibp = smp->BioseqIndexData; sibp != NULL; sibp = sibp->next) + prev = sibp; + sibp = MemNew(sizeof(SeqIdIndexBlock)); + if (prev != NULL) + prev->next = sibp; + else + smp->BioseqIndexData = sibp; + + smp->BioseqIndex = MemNew((smp->BioseqIndexNum + 100) * +sizeof(SeqIdIndexElementPtr)); + MemCopy(smp->BioseqIndex, sipp, (smp->BioseqIndexNum * +sizeof(SeqIdIndexElementPtr))); + MemFree(sipp); + smp->BioseqIndexNum += 100; + sipp = smp->BioseqIndex; + for (i = 0, j = smp->BioseqIndexCnt; i < 100; i++, j++) + sipp[j] = &(sibp->sid[i]); + } + + i = smp->BioseqIndexCnt; /* empties are at the end */ + sip = sipp[i]; + sip->omdp = omdp; /* fill in the values */ + sip->str = StringSave(buf); + newstr = sip->str; + RevStringUpper(newstr); /* try to avoid case check */ + + imin = 0; /* find where it goes */ + imax = i-1; + if (imax >= 0) + tmp = sipp[imax]->str; + if ((i) && (StringCmp(newstr, sipp[imax]->str) < 0)) + { + i = (imax + imin) / 2; + while (imax > imin) + { + tmp = sipp[i]->str; + if ((j = StringCmp(newstr, tmp)) < 0) + imax = i - 1; + else if (j > 0) + imin = i + 1; + else + break; + i = (imax + imin)/2; + } + + if (StringCmp(newstr, sipp[i]->str) > 0) /* check for off by 1 */ + { + i++; + } + + + imax = smp->BioseqIndexCnt - 1; /* open the array */ + while (imax >= i) + { + sipp[imax+1] = sipp[imax]; + imax--; + } + } + + sipp[i] = sip; /* put in the pointer in order */ + smp->BioseqIndexCnt++; /* got one more */ + return TRUE; +} + +/***************************************************************************** +* +* SeqMgrProcessNonIndexedBioseq() +* Indexes a BioseqPtr by SeqId(s) +* +*****************************************************************************/ +static Boolean NEAR SeqMgrProcessNonIndexedBioseq(void) +{ + BioseqPtr PNTR bspp, bsp; + Int4 i, total, k; + SeqIdPtr sip; + Char buf[80]; + CharPtr tmp; + Uint1 oldchoice; + Boolean indexed; + TextSeqIdPtr tsip; + SeqMgrPtr smp; + + smp = SeqMgrReadLock(); + if (! smp->NonIndexedBioseqCnt) + { + SeqMgrUnlock(); + return TRUE; + } + SeqMgrUnlock(); + + smp = SeqMgrWriteLock(); + if (! smp->NonIndexedBioseqCnt) + { + SeqMgrUnlock(); + return TRUE; + } + + total = smp->NonIndexedBioseqCnt; + bspp = smp->NonIndexedBioseq; + for (i = 0; i < total; i++) + { + indexed = FALSE; + bsp = bspp[i]; + if (bsp != NULL) + { + if (bsp->id != NULL) + { + indexed = TRUE; + for (sip = bsp->id; sip != NULL; sip = sip->next) + { + oldchoice = 0; + switch (sip->choice) + { + case SEQID_GI: + sprintf(buf, "%ld", (long)(sip->data.ptrvalue)); + SeqMgrAddIndexElement(smp, bsp, buf); + break; + case SEQID_EMBL: + case SEQID_DDBJ: + oldchoice = sip->choice; + sip->choice = SEQID_GENBANK; + case SEQID_GENBANK: + case SEQID_PIR: + case SEQID_OTHER: + case SEQID_SWISSPROT: + case SEQID_PRF: + tsip = (TextSeqIdPtr)(sip->data.ptrvalue); + if (tsip->name != NULL) + { + tmp = tsip->accession; + tsip->accession = NULL; + SeqIdWrite(sip, buf, PRINTID_FASTA_SHORT, 79); + SeqMgrAddIndexElement(smp, bsp, buf); + tsip->accession = tmp; + } + tmp = tsip->name; + tsip->name = NULL; + SeqIdWrite(sip, buf, PRINTID_FASTA_SHORT, 79); + SeqMgrAddIndexElement(smp, bsp, buf); + tsip->name = tmp; + if (oldchoice) + sip->choice = oldchoice; + break; + default: + SeqIdWrite(sip, buf, PRINTID_FASTA_SHORT, 79); + SeqMgrAddIndexElement(smp, bsp, buf); + break; + } + } + } + } + if (indexed) + bspp[i] = NULL; + } + + for (i = 0; i < total; i++) + { + if (bspp[i] == NULL) + { + total--; + for (k = i; k < total; k++) + bspp[k] = bspp[k+1]; + i--; + } + } + + smp->NonIndexedBioseqCnt = total; + + SeqMgrUnlock(); + + return TRUE; +} + + + +/***************************************************************************** +* +* SeqMgrAddToBioseqIndex(bsp) +* Indexes a BioseqPtr by SeqId(s) +* +*****************************************************************************/ +NLM_EXTERN Boolean LIBCALL SeqMgrAddToBioseqIndex (BioseqPtr bsp) +{ + SeqMgrPtr smp; + BioseqPtr PNTR bspp; + + if (bsp == NULL) + return FALSE; + + smp = SeqMgrWriteLock(); + /* increase array as +needed */ + if (smp->NonIndexedBioseqCnt >= smp->NonIndexedBioseqNum) + { + bspp = smp->NonIndexedBioseq; + smp->NonIndexedBioseq = MemNew((smp->NonIndexedBioseqNum + 10) * +sizeof (BioseqPtr)); + MemCopy(smp->NonIndexedBioseq, bspp, (smp->NonIndexedBioseqNum * +sizeof(BioseqPtr))); + MemFree(bspp); + smp->NonIndexedBioseqNum += 10; + } + + smp->NonIndexedBioseq[smp->NonIndexedBioseqCnt] = bsp; + smp->NonIndexedBioseqCnt++; + + SeqMgrUnlock(); + + SeqMgrProcessNonIndexedBioseq(); + + return TRUE; +} + + +/***************************************************************************** +* +* SeqMgrDeleteDeleteFromBioseqIndex(bsp) +* Removes index on BioseqPtr SeqIds +* +*****************************************************************************/ +NLM_EXTERN Boolean LIBCALL SeqMgrDeleteFromBioseqIndex (BioseqPtr bsp) +{ + SeqMgrPtr smp; + SeqIdIndexElementPtr PNTR sipp, sip; + Int4 i, j, num; + BioseqPtr PNTR bspp; + ObjMgrDataPtr omdp; + ObjMgrPtr omp; + + smp = SeqMgrWriteLock(); + /* check if not +indexed yet */ + if (smp->NonIndexedBioseqCnt > 0) + { + num = smp->NonIndexedBioseqCnt; + bspp = smp->NonIndexedBioseq; + for (i = 0; i < num; i++) + { + if (bspp[i] == bsp) + { + num--; + for (j = i; j < num; j++) + bspp[j] = bspp[j+1]; + i--; + } + } + smp->NonIndexedBioseqCnt = num; + } + + num = smp->BioseqIndexCnt; + sipp = smp->BioseqIndex; + omp = ObjMgrReadLock(); + omdp = ObjMgrFindByData(omp, (Pointer)bsp); + ObjMgrUnlock(); + + for (i = 0; i < BIOSEQ_CACHE_NUM; i++) /* remove from BioseqFind cache */ + { + if (omdp_cache[i] == omdp) + { + omdp_cache[i] = NULL; + se_cache[i] = NULL; + } + } + + for (i = 0; i < num; i++) + { + if (sipp[i]->omdp == omdp) + { + sipp[i]->omdp = NULL; + sipp[i]->str = MemFree(sipp[i]->str); + sip = sipp[i]; + for (j = i; j < (num-1); j++) + sipp[j] = sipp[j+1]; + sipp[j] = sip; + num--; i--; + } + } + + smp->BioseqIndexCnt = num; + + SeqMgrUnlock(); + + return TRUE; +} + + +/***************************************************************************** +* +* SeqMgrReplaceInBioseqIndex(bsp) +* Replaces index on BioseqPtr SeqIds +* +*****************************************************************************/ +NLM_EXTERN Boolean LIBCALL SeqMgrReplaceInBioseqIndex (BioseqPtr bsp) +{ + SeqMgrDeleteFromBioseqIndex(bsp); + return SeqMgrAddToBioseqIndex(bsp); +} + +/***************************************************************************** +* +* GetUniGeneIDForSeqId(SeqIdPtr) +* returns the UniGene ID for a SeqId +* returns 0 if can't find it, or not a legal unigene id +* This only applies to genomes division of entrez +* +*****************************************************************************/ + +/***************************************************************** +* +* IT IS a KLUDGE!! Add 1,000,000 to the unigene id +* +*****************************************************************/ +#define KLUDGE_UNIGENE 1000000 /*the kludge offset val add to unigene sequence*/ +#define KLUDGE_FlyBase 2000000 /*the kludge offset for FlyBase*/ +#define KLUDGE_JACKSON 3000000 /*the kludge offset for the Mouse data*/ +#define KLUDGE_JRGP 4000000 /*the kludge offset for the rice data*/ +#define KLUDGE_CESC 5000000 /*the kludge offset for the C. elegans data*/ +#define KLUDGE_BSNR 6000000 /*the kludge offset for the B. subtilis data*/ +#define KLUDGE_HUMGEN 7000000 /*the kludge offset for the Human genomic data*/ +#define KLUDGE_YGG 8000000 /*the kludge offset for the yeast data*/ +#define KLUDGE_NCBICG 9000000 /*the kludge offset for small genomes*/ +#define KLUDGE_MAIZE 10000000 /*the kludge offset for corn*/ + +NLM_EXTERN Int4 LIBCALL GetUniGeneIDForSeqId (SeqIdPtr sip) +{ + DbtagPtr db_tag; + ObjectIdPtr oip; + + if (sip == NULL) + return 0; + + + if(sip->choice != SEQID_GENERAL) + return 0; + + db_tag = sip->data.ptrvalue; + if(db_tag == NULL || db_tag->db == NULL) + return 0; + + oip = db_tag->tag; + if(oip == NULL || oip->id == 0) + return 0; + + if(StringCmp(db_tag->db, "UNIGENE") == 0) + return (KLUDGE_UNIGENE+ oip->id); + if(StringCmp(db_tag->db, "UniGene") == 0) + return (KLUDGE_UNIGENE+ oip->id); + if(StringCmp(db_tag->db, "FlyBase") == 0) + return (KLUDGE_FlyBase+ oip->id); + if(StringCmp(db_tag->db, "JACKSON") == 0) + return (KLUDGE_JACKSON+ oip->id); + if(StringCmp(db_tag->db, "JRGP") == 0) + return (KLUDGE_JRGP + oip->id); + if(StringCmp(db_tag->db, "CESC") == 0) + return (KLUDGE_CESC + oip->id); + if(StringCmp(db_tag->db, "BSNR") == 0) + return (KLUDGE_BSNR + oip->id); + if(StringCmp(db_tag->db, "HUMGEN") == 0) + return (KLUDGE_HUMGEN + oip->id); + if(StringCmp(db_tag->db, "YGG") == 0) + return (KLUDGE_YGG + oip->id); + if(StringCmp(db_tag->db, "NCBICG") == 0) + return (KLUDGE_NCBICG + oip->id); + if(StringCmp(db_tag->db, "MAIZE") == 0) + return (KLUDGE_MAIZE + oip->id); + return 0; + +} + + +/***************************************************************************** +* +* BioseqExtra extensions to preindex for rapid retrieval +* +*****************************************************************************/ + +/* +* remaining to be done are mapping tables for rapid coordinate conversion +* between genome record and parts, genomic DNA and mRNA, and mRNA and protein +*/ + +static ObjMgrDataPtr SeqMgrGetOmdpForPointer (Pointer ptr) + +{ + ObjMgrDataPtr omdp; + ObjMgrPtr omp; + + if (ptr == NULL) return NULL; + omp = ObjMgrWriteLock (); + omdp = ObjMgrFindByData (omp, ptr); + ObjMgrUnlock (); + return omdp; +} + +static ObjMgrDataPtr SeqMgrGetOmdpForBioseq (BioseqPtr bsp) + +{ + ObjMgrDataPtr omdp; + ObjMgrPtr omp; + + if (bsp == NULL) return NULL; + omdp = (ObjMgrDataPtr) bsp->omdp; + if (omdp != NULL) return omdp; + omp = ObjMgrWriteLock (); + omdp = ObjMgrFindByData (omp, bsp); + ObjMgrUnlock (); + bsp->omdp = (Pointer) omdp; + return omdp; +} + +static SeqEntryPtr SeqMgrGetTopSeqEntryForEntity (Uint2 entityID) + +{ + ObjMgrDataPtr omdp; + SeqSubmitPtr ssp; + + omdp = ObjMgrGetData (entityID); + if (omdp == NULL) return FALSE; + switch (omdp->datatype) { + case OBJ_SEQSUB : + ssp = (SeqSubmitPtr) omdp->dataptr; + if (ssp != NULL && ssp->datatype == 1) { + return (SeqEntryPtr) ssp->data; + } + break; + case OBJ_BIOSEQ : + case OBJ_BIOSEQSET : + return (SeqEntryPtr) omdp->choice; + default : + break; + } + return NULL; +} + +static Boolean SeqMgrClearBioseqExtraData (ObjMgrDataPtr omdp) + +{ + BioseqExtraPtr bspextra; + SMFeatBlockPtr currf; + SMSeqIdxPtr currp; + Int2 i; + SMFeatItemPtr itemf; + SMFeatBlockPtr nextf; + SMSeqIdxPtr nextp; + + if (omdp == NULL) return FALSE; + bspextra = (BioseqExtraPtr) omdp->extradata; + if (bspextra == NULL) return FALSE; + + /* free sorted arrays of pointers into data blocks */ + + bspextra->featsByID = MemFree (bspextra->featsByID); + bspextra->featsBySfp = MemFree (bspextra->featsBySfp); + bspextra->featsByPos = MemFree (bspextra->featsByPos); + bspextra->genesByPos = MemFree (bspextra->genesByPos); + bspextra->mRNAsByPos = MemFree (bspextra->mRNAsByPos); + bspextra->CDSsByPos = MemFree (bspextra->CDSsByPos); + bspextra->pubsByPos = MemFree (bspextra->pubsByPos); + bspextra->orgsByPos = MemFree (bspextra->orgsByPos); + + /* free arrays to speed mapping from parts to segmented bioseq */ + + bspextra->partsByLoc = MemFree (bspextra->partsByLoc); + bspextra->partsBySeqId = MemFree (bspextra->partsBySeqId); + + /* free data blocks of feature information */ + + currf = bspextra->featlisthead; + while (currf != NULL) { + nextf = currf->next; + + if (currf->data != NULL) { + + /* free allocated label strings within block items */ + + for (i = 0; i < currf->index; i++) { + itemf = &(currf->data [i]); + MemFree (itemf->label); + MemFree (itemf->ivals); + } + + /* free array of SMFeatItems */ + + MemFree (currf->data); + } + + MemFree (currf); + currf = nextf; + } + + /* free data blocks of parts to segment mapping information */ + + currp = bspextra->segparthead; + while (currp != NULL) { + nextp = currp->next; + SeqLocFree (currp->slp); + MemFree (currp->seqIdOfPart); + MemFree (currp); + currp = nextp; + } + + /* clean interval list once implemented */ + + bspextra->featlisthead = NULL; + bspextra->featlisttail = NULL; + bspextra->segparthead = NULL; + + bspextra->numfeats = 0; + bspextra->numgenes = 0; + bspextra->nummRNAs = 0; + bspextra->numCDSs = 0; + bspextra->numpubs = 0; + bspextra->numorgs = 0; + bspextra->numsegs = 0; + + bspextra->min = INT4_MAX; + bspextra->blocksize = 50; + + bspextra->protFeat = NULL; + bspextra->cdsOrRnaFeat = NULL; + + /* free genome - parts mapping arrays when they are added */ + + return TRUE; +} + +static Boolean DoSeqMgrFreeBioseqExtraData (ObjMgrDataPtr omdp) + +{ + if (omdp == NULL) return FALSE; + if (omdp->datatype != OBJ_BIOSEQ && omdp->datatype != OBJ_BIOSEQSET) return FALSE; + if (omdp->extradata != NULL) { + SeqMgrClearBioseqExtraData (omdp); + omdp->extradata = MemFree (omdp->extradata); + omdp->reapextra = NULL; + omdp->reloadextra = NULL; + omdp->freeextra = NULL; + } + return TRUE; +} + +/* object manager callbacks to reap, reload, and free extra bioseq data */ + +NLM_EXTERN Pointer LIBCALLBACK SeqMgrReapBioseqExtraFunc (Pointer data) + +{ + BioseqExtraPtr bspextra; + SMFeatBlockPtr curr; + Int2 i; + SMFeatItemPtr item; + ObjMgrDataPtr omdp; + + omdp = (ObjMgrDataPtr) data; + if (omdp == NULL || omdp->datatype != OBJ_BIOSEQ) return NULL; + bspextra = (BioseqExtraPtr) omdp->extradata; + if (bspextra == NULL) return NULL; + + /* loop through data blocks of feature information */ + + curr = bspextra->featlisthead; + while (curr != NULL) { + + /* NULL out pointers to cached out feature and annot */ + + if (curr->data != NULL) { + for (i = 0; i < curr->index; i++) { + item = &(curr->data [i]); + item->sfp = NULL; + item->sap = NULL; + } + } + + curr = curr->next; + } + + return NULL; +} + +NLM_EXTERN Pointer LIBCALLBACK SeqMgrReloadBioseqExtraFunc (Pointer data) + +{ + return NULL; +} + +NLM_EXTERN Pointer LIBCALLBACK SeqMgrFreeBioseqExtraFunc (Pointer data) + +{ + DoSeqMgrFreeBioseqExtraData ((ObjMgrDataPtr) data); + return NULL; +} + +/***************************************************************************** +* +* SeqMgrClearFeatureIndexes clears every bioseq in an entity +* +*****************************************************************************/ + +static void SeqMgrClearIndexesProc (SeqEntryPtr sep, Pointer mydata, Int4 index, Int2 indent) + +{ + BioseqPtr bsp; + BioseqSetPtr bssp; + ObjMgrDataPtr omdp = NULL; + BoolPtr rsult; + + if (sep == NULL || (! IS_Bioseq (sep))) return; + if (IS_Bioseq (sep)) { + bsp = (BioseqPtr) sep->data.ptrvalue; + if (bsp == NULL) return; + omdp = SeqMgrGetOmdpForBioseq (bsp); + } else if (IS_Bioseq_set (sep)) { + bssp = (BioseqSetPtr) sep->data.ptrvalue; + if (bssp == NULL) return; + omdp = SeqMgrGetOmdpForPointer (bssp); + } + if (omdp != NULL && DoSeqMgrFreeBioseqExtraData (omdp)) { + rsult = (BoolPtr) mydata; + *rsult = TRUE; + } +} + +NLM_EXTERN Boolean LIBCALL SeqMgrClearFeatureIndexes (Uint2 entityID, Pointer ptr) + +{ + ObjMgrDataPtr omdp; + Boolean rsult = FALSE; + SeqEntryPtr sep; + + if (entityID == 0) { + entityID = ObjMgrGetEntityIDForPointer (ptr); + } + if (entityID == 0) return FALSE; + sep = SeqMgrGetTopSeqEntryForEntity (entityID); + if (sep == NULL) return FALSE; + SeqEntryExplore (sep, (Pointer) (&rsult), SeqMgrClearIndexesProc); + + /* clear out object manager time of indexing flag and master feature itemID list */ + + omdp = ObjMgrGetData (entityID); + if (omdp != NULL) { + omdp->indexed = 0; + SeqMgrClearBioseqExtraData (omdp); + omdp->extradata = MemFree (omdp->extradata); + omdp->reapextra = NULL; + omdp->reloadextra = NULL; + omdp->freeextra = NULL; + } + return rsult; +} + +/***************************************************************************** +* +* FindAppropriateBioseq finds the segmented bioseq if location is join on parts +* +*****************************************************************************/ + +static BioseqPtr FindAppropriateBioseq (SeqLocPtr loc, BioseqPtr tryfirst) + +{ + BioseqPtr bsp = NULL; + BioseqExtraPtr bspextra; + BioseqSetPtr bssp; + ObjMgrDataPtr omdp; + BioseqPtr part; + SeqEntryPtr sep; + SeqIdPtr sip; + SeqLocPtr slp; + + if (loc == NULL) return NULL; + sip = SeqLocId (loc); + if (sip != NULL) { + if (tryfirst != NULL && SeqIdIn (sip, tryfirst->id)) { + bsp = tryfirst; + } else { + bsp = BioseqFind (sip); + } + + /* first see if this is raw local part of segmented bioseq */ + + if (bsp != NULL && bsp->repr == Seq_repr_raw) { + omdp = SeqMgrGetOmdpForBioseq (bsp); + if (omdp != NULL && omdp->datatype == OBJ_BIOSEQ) { + bspextra = (BioseqExtraPtr) omdp->extradata; + if (bspextra != NULL) { + if (bspextra->parentBioseq != NULL) { + bsp = bspextra->parentBioseq; + } + } + } + } + return bsp; + } + + /* otherwise assume location is on multiple parts of a segmented set */ + + slp = SeqLocFindNext (loc, NULL); + if (slp == NULL) return NULL; + sip = SeqLocId (slp); + if (sip == NULL) return NULL; + part = BioseqFind (sip); + if (part == NULL) return NULL; + omdp = SeqMgrGetOmdpForBioseq (part); + while (omdp != NULL) { + if (omdp->datatype == OBJ_BIOSEQSET) { + bssp = (BioseqSetPtr) omdp->dataptr; + if (bssp != NULL) { + if (bssp->_class == BioseqseqSet_class_segset) { + for (sep = bssp->seq_set; sep != NULL; sep = sep->next) { + if (IS_Bioseq (sep)) { + bsp = (BioseqPtr) sep->data.ptrvalue; + if (bsp != NULL) { + return bsp; + } + } + } + } + } + } + omdp = SeqMgrGetOmdpForPointer (omdp->parentptr); + } + return NULL; +} + +/***************************************************************************** +* +* FindFirstLocalBioseq is called as a last resort if FindAppropriateBioseq +* fails, and it scans the feature location to find the first local bioseq +* referenced by a feature interval +* +*****************************************************************************/ + +static BioseqPtr FindFirstLocalBioseq (SeqLocPtr loc) + +{ + BioseqPtr bsp; + SeqIdPtr sip; + SeqLocPtr slp = NULL; + + if (loc == NULL) return NULL; + + while ((slp = SeqLocFindNext (loc, slp)) != NULL) { + sip = SeqLocId (slp); + if (sip != NULL) { + bsp = BioseqFind (sip); + if (bsp != NULL) return bsp; + } + } + + return NULL; +} + +/***************************************************************************** +* +* GetOffsetInFirstLocalBioseq is called to get the intervals on last resort bioseqs +* +*****************************************************************************/ + +static Int4 GetOffsetInFirstLocalBioseq (SeqLocPtr loc, BioseqPtr in, Uint1 which_end) + +{ + SeqLocPtr slp = NULL; + Int4 val; + + if (loc == NULL) return -1; + + while ((slp = SeqLocFindNext (loc, slp)) != NULL) { + val = GetOffsetInBioseq (slp, in, which_end); + if (val != -1) return val; + } + + return -1; +} + +/***************************************************************************** +* +* SeqMgrFindSMFeatItemPtr and SeqMgrFindSMFeatItemByID return SMFeatItemPtr +* to access internal fields +* SeqMgrGetDesiredDescriptor and SeqMgrGetDesiredFeature take an itemID, +* position index, or SeqDescPtr or SeqFeatPtr, return the SeqDescPtr or +* SeqFeatPtr, and fill in the context structure +* +*****************************************************************************/ + +NLM_EXTERN SMFeatItemPtr LIBCALL SeqMgrFindSMFeatItemPtr (SeqFeatPtr sfp) + +{ + SMFeatItemPtr PNTR array; + BioseqPtr bsp; + BioseqExtraPtr bspextra; + SMFeatBlockPtr curr; + Int2 i; + SMFeatItemPtr item; + Int4 L; + Int4 mid; + ObjMgrDataPtr omdp; + Int4 R; + + if (sfp == NULL) return NULL; + bsp = FindAppropriateBioseq (sfp->location, NULL); + if (bsp == NULL) { + bsp = FindFirstLocalBioseq (sfp->location); + } + omdp = SeqMgrGetOmdpForBioseq (bsp); + if (omdp == NULL || omdp->datatype != OBJ_BIOSEQ) return NULL; + bspextra = (BioseqExtraPtr) omdp->extradata; + if (bspextra == NULL) return NULL; + + /* first try array sorted by SeqFeatPtr value */ + + array = bspextra->featsBySfp; + if (array != NULL && bspextra->numfeats > 0) { + L = 0; + R = bspextra->numfeats - 1; + while (L < R) { + mid = (L + R) / 2; + item = array [mid]; + if (item != NULL && item->sfp < sfp) { + L = mid + 1; + } else { + R = mid; + } + } + + item = array [R]; + if (item->sfp == sfp) return item; + } + + /* now look in feature indices for cached feature information */ + + curr = bspextra->featlisthead; + while (curr != NULL) { + + if (curr->data != NULL) { + for (i = 0; i < curr->index; i++) { + item = &(curr->data [i]); + if (item->sfp == sfp && (! item->ignore)) return item; + } + } + + curr = curr->next; + } + + return NULL; +} + +NLM_EXTERN SMFeatItemPtr LIBCALL SeqMgrFindSMFeatItemByID (Uint2 entityID, BioseqPtr bsp, Uint2 itemID) + +{ + SMFeatItemPtr PNTR array; + BioseqExtraPtr bspextra; + SMFeatBlockPtr curr; + Int2 i; + SMFeatItemPtr item; + Int4 L; + Int4 mid; + ObjMgrDataPtr omdp; + Int4 R; + + if (entityID > 0) { + omdp = ObjMgrGetData (entityID); + if (omdp == NULL) return NULL; + } else { + if (bsp == NULL) return NULL; + omdp = SeqMgrGetOmdpForBioseq (bsp); + if (omdp == NULL || omdp->datatype != OBJ_BIOSEQ) return NULL; + } + bspextra = (BioseqExtraPtr) omdp->extradata; + if (bspextra == NULL) return NULL; + + /* first try array sorted by itemID value */ + + array = bspextra->featsByID; + if (array != NULL && bspextra->numfeats > 0) { + L = 0; + R = bspextra->numfeats - 1; + while (L < R) { + mid = (L + R) / 2; + item = array [mid]; + if (item != NULL && item->itemID < itemID) { + L = mid + 1; + } else { + R = mid; + } + } + + item = array [R]; + if (item->itemID == itemID) return item; + } + + /* now look in feature indices for cached feature information */ + + curr = bspextra->featlisthead; + while (curr != NULL) { + + if (curr->data != NULL) { + for (i = 0; i < curr->index; i++) { + item = &(curr->data [i]); + if (item->itemID == itemID && (! item->ignore)) return item; + } + } + + curr = curr->next; + } + + return NULL; +} + +NLM_EXTERN SeqFeatPtr LIBCALL SeqMgrGetDesiredFeature (Uint2 entityID, BioseqPtr bsp, + Uint2 itemID, Uint2 index, SeqFeatPtr sfp, + SeqMgrFeatContext PNTR context) + +{ + SMFeatItemPtr PNTR array; + BioseqExtraPtr bspextra; + SeqFeatPtr curr; + SMFeatItemPtr item = NULL; + ObjMgrDataPtr omdp; + + if (entityID > 0) { + omdp = ObjMgrGetData (entityID); + if (omdp == NULL) return NULL; + } else { + if (bsp == NULL) return NULL; + omdp = SeqMgrGetOmdpForBioseq (bsp); + if (omdp == NULL || omdp->datatype != OBJ_BIOSEQ) return NULL; + } + bspextra = (BioseqExtraPtr) omdp->extradata; + if (bspextra == NULL) return NULL; + + if (itemID > 0) { + item = SeqMgrFindSMFeatItemByID (entityID, bsp, itemID); + } else if (index > 0) { + array = bspextra->featsByPos; + if (array != NULL && bspextra->numfeats > 0 && index <= bspextra->numfeats) { + item = array [index - 1]; + } + } else if (sfp != NULL) { + item = SeqMgrFindSMFeatItemPtr (sfp); + } + if (item == NULL) return NULL; + + entityID = ObjMgrGetEntityIDForPointer (omdp->dataptr); + + curr = item->sfp; + if (curr != NULL && context != NULL && (! item->ignore)) { + context->entityID = entityID; + context->itemID = item->itemID; + context->sfp = curr; + context->sap = item->sap; + context->bsp = item->bsp; + context->label = item->label; + context->left = item->left; + context->right = item->right; + context->partialL = item->partialL; + context->partialR = item->partialR; + context->farloc = item->farloc; + context->strand = item->strand; + context->seqfeattype = FindFeatFromFeatDefType (item->subtype); + context->featdeftype = item->subtype; + context->numivals = item->numivals; + context->ivals = item->ivals; + context->userdata = NULL; + context->omdp = (Pointer) omdp; + context->index = item->index + 1; + } + return curr; +} + +static ValNodePtr DesiredDescriptorPerBioseq (SeqEntryPtr sep, BioseqPtr bsp, + Uint2 itemID, Uint2 index, ValNodePtr sdp, + SeqMgrDescContext PNTR context) + +{ + BioseqSetPtr bssp; + ValNodePtr curr = NULL; + SeqEntryPtr tmp; + + if (sep != NULL) { + if (IS_Bioseq (sep)) { + bsp = (BioseqPtr) sep->data.ptrvalue; + if (bsp == NULL) return NULL; + } else if (IS_Bioseq_set (sep)) { + bssp = (BioseqSetPtr) sep->data.ptrvalue; + if (bssp == NULL) return NULL; + for (tmp = bssp->seq_set; tmp != NULL; tmp = tmp->next) { + curr = DesiredDescriptorPerBioseq (tmp, NULL, itemID, index, sdp, context); + if (curr != NULL) return curr; + } + return NULL; + } + } + + if (bsp == NULL) return NULL; + + while ((curr = SeqMgrGetNextDescriptor (bsp, curr, 0, context)) != NULL) { + if (itemID > 0 && itemID == context->itemID) return curr; + if (index > 0 && index == context->index) return curr; + if (sdp != NULL && sdp == curr) return curr; + } + + return NULL; +} + +NLM_EXTERN ValNodePtr LIBCALL SeqMgrGetDesiredDescriptor (Uint2 entityID, BioseqPtr bsp, + Uint2 itemID, Uint2 index, ValNodePtr sdp, + SeqMgrDescContext PNTR context) + +{ + SeqMgrDescContext dfaultcontext; + SeqEntryPtr sep; + + if (context == NULL) { + context = &dfaultcontext; + } + + if (entityID > 0) { + sep = SeqMgrGetTopSeqEntryForEntity (entityID); + if (sep == NULL) return NULL; + return DesiredDescriptorPerBioseq (sep, NULL, itemID, index, sdp, context); + } else if (bsp != NULL) { + return DesiredDescriptorPerBioseq (NULL, bsp, itemID, index, sdp, context); + } + + return NULL; +} + +/***************************************************************************** +* +* RecordFeaturesInBioseqs callback explores bioseqs, bioseq sets, and features, +* keeping a running total of the descriptor item counts, and records specific +* information about features on each bioseq +* +*****************************************************************************/ + +typedef struct extraindex { + BioseqPtr lastbsp; + SeqAnnotPtr lastsap; + BioseqSetPtr lastbssp; + SMSeqIdxPtr segpartail; + Int4 cumulative; + Uint2 descrcount; + Uint2 featcount; +} ExtraIndex, PNTR ExtraIndexPtr; + +static void SetDescriptorCounts (ValNodePtr sdp, ExtraIndexPtr exindx, Pointer thisitem, Uint2 thistype) + +{ + Uint2 count = 0; + ObjMgrDataPtr omdp; + + /* count bioseq or bioseq set descriptors, to calculate omdp.lastDescrItemID */ + + if (sdp == NULL || exindx == NULL) return; + if (thistype == OBJ_BIOSEQ) { + omdp = SeqMgrGetOmdpForBioseq ((BioseqPtr) thisitem); + } else { + omdp = SeqMgrGetOmdpForPointer (thisitem); + } + if (omdp == NULL) return; + + omdp->lastDescrItemID = exindx->descrcount; + while (sdp != NULL) { + count++; + sdp = sdp->next; + } + exindx->descrcount += count; +} + +static void CreateBioseqExtraBlock (ObjMgrDataPtr omdp, BioseqPtr bsp) + +{ + BioseqExtraPtr bspextra; + + if (omdp == NULL || omdp->extradata != NULL) return; + + bspextra = (BioseqExtraPtr) MemNew (sizeof (BioseqExtra)); + omdp->extradata = (Pointer) bspextra; + if (bspextra == NULL) return; + + omdp->reapextra = SeqMgrReapBioseqExtraFunc; + omdp->reloadextra = SeqMgrReloadBioseqExtraFunc; + omdp->freeextra = SeqMgrFreeBioseqExtraFunc; + + bspextra->bsp = bsp; + bspextra->omdp = omdp; + bspextra->min = INT4_MAX; +} + +static SeqIdPtr SeqIdWithinBioseq (BioseqPtr bsp, SeqLocPtr slp) + +{ + SeqIdPtr a; + SeqIdPtr b; + + if (bsp == NULL || slp == NULL) return NULL; + a = SeqLocId (slp); + if (a == NULL) return NULL; + for (b = bsp->id; b != NULL; b = b->next) { + if (SeqIdComp (a, b) == SIC_YES) return b; + } + return NULL; +} + +static void ProcessFeatureProducts (SeqFeatPtr sfp, Uint2 itemID, GatherContextPtr gcp) + +{ + BioseqPtr bsp; + BioseqExtraPtr bspextra; + Char buf [81]; + CharPtr ctmp; + Int4 diff; + CharPtr loclbl; + Int4 min; + ObjMgrDataPtr omdp; + CharPtr prodlbl; + SeqFeatPtr prt; + CharPtr ptmp; + SeqAnnotPtr sap; + SeqIdPtr sip; + SeqLocPtr slp; + ValNode vn; + + if (sfp == NULL || sfp->product == NULL) return; + if (sfp->data.choice != SEQFEAT_CDREGION && sfp->data.choice != SEQFEAT_RNA) return; + + sip = SeqLocId (sfp->product); + if (sip == NULL) return; + bsp = BioseqFind (sip); + if (bsp == NULL) return; + omdp = SeqMgrGetOmdpForBioseq (bsp); + if (omdp == NULL || omdp->datatype != OBJ_BIOSEQ) return; + + bspextra = (BioseqExtraPtr) omdp->extradata; + if (bspextra == NULL) { + CreateBioseqExtraBlock (omdp, bsp); + bspextra = (BioseqExtraPtr) omdp->extradata; + } + if (bspextra == NULL) return; + + /* cds or rna reference stored in product bioseq's omdp.cdsOrRnaFeat */ + + if (bspextra->cdsOrRnaFeat != NULL && bspextra->cdsOrRnaFeat != sfp) { + FeatDefLabel (sfp, buf, sizeof (buf) - 1, OM_LABEL_CONTENT); + ctmp = SeqLocPrint (sfp->location); + loclbl = ctmp; + if (loclbl == NULL) { + loclbl = "?"; + } + ptmp = SeqLocPrint (sfp->product); + prodlbl = ptmp; + if (prodlbl == NULL) { + prodlbl = "?"; + } + ErrPostItem (SEV_WARNING, 0, 0, + "SeqMgr indexing cds or rna progenitor already set - Feature: %s - Location [%s] - Product [%s]", + buf, loclbl, prodlbl); + MemFree (ctmp); + MemFree (ptmp); + } + + if (omdp->tempload == TL_NOT_TEMP) { + bspextra->cdsOrRnaFeat = sfp; + } + if (sfp->data.choice == SEQFEAT_RNA) return; + + /* if protFeat exists it was set by exhaustive gather on protein bioseq */ + + if (bspextra->protFeat != NULL) return; + + /* calculate largest protein feature on cds's product bioseq */ + + min = INT4_MAX; + vn.choice = SEQLOC_WHOLE; + vn.data.ptrvalue = (Pointer) bsp->id; + vn.next = NULL; + slp = (Pointer) (&vn); + + sap = bsp->annot; + while (sap != NULL) { + if (sap->type == 1) { + prt = (SeqFeatPtr) sap->data; + while (prt != NULL) { + if (prt->data.choice == SEQFEAT_PROT) { + + /* get SeqId in bioseq that matches SeqId used for location */ + + vn.data.ptrvalue = SeqIdWithinBioseq (bsp, prt->location); + + diff = SeqLocAinB (prt->location, slp); + if (diff >= 0) { + if (diff < min) { + min = diff; + if (omdp->tempload == TL_NOT_TEMP) { + bspextra->protFeat = prt; + } + } + } + } + prt = prt->next; + } + } + sap = sap->next; + } +} + +static void RecordOneFeature (BioseqExtraPtr bspextra, ObjMgrDataPtr omdp, + BioseqPtr bsp, ExtraIndexPtr exindx, SeqFeatPtr sfp, + Int4 left, Int4 right, Uint2 itemID, Boolean farloc, + Boolean ignore) + +{ + Char buf [129]; + SMFeatBlockPtr curr; + Int2 i; + SMFeatItemPtr item; + Int4Ptr ivals; + SeqLocPtr loc; + SMFeatBlockPtr next; + Int2 numivals = 0; + Boolean single_interval; + SeqLocPtr slp = NULL; + Uint1 subtype = 0; + + if (bspextra == NULL || omdp == NULL || bsp == NULL || exindx == NULL || sfp == NULL) return; + + if (bspextra->featlisttail != NULL) { + + /* just in case blocksize should was not set for some reason */ + + if (bspextra->blocksize < 1) { + bspextra->blocksize = 5; + } + + curr = bspextra->featlisttail; + if (curr->index >= bspextra->blocksize) { + + /* allocate next chunk in linked list of blocks */ + + next = (SMFeatBlockPtr) MemNew (sizeof (SMFeatBlock)); + curr->next = next; + + if (next != NULL) { + bspextra->featlisttail = next; + curr = next; + } + } + + if (curr->index < bspextra->blocksize) { + + /* allocate data block if not yet done for this chunk */ + + if (curr->data == NULL) { + curr->data = (SMFeatItemPtr) MemNew (sizeof (SMFeatItem) * (size_t) (bspextra->blocksize)); + } + + /* now record desired information about current feature */ + + if (curr->data != NULL) { + item = &(curr->data [curr->index]); + if (omdp->tempload == TL_NOT_TEMP) { + item->sfp = sfp; + item->sap = exindx->lastsap; + item->bsp = bsp; + } + FeatDefLabel (sfp, buf, sizeof (buf) - 1, OM_LABEL_CONTENT); + item->label = StringSaveNoNull (buf); + item->left = left; + item->right = right; + CheckSeqLocForPartial (sfp->location, &(item->partialL), &(item->partialR)); + item->farloc = farloc; + item->strand = SeqLocStrand (sfp->location); + subtype = FindFeatDefType (sfp); + item->subtype = subtype; + item->itemID = itemID; + item->ignore = ignore; + + /* record start/stop pairs of intervals on target bioseq */ + + single_interval = (Boolean) (item->subtype == FEATDEF_GENE || + item->subtype == FEATDEF_PUB); + loc = SeqLocMerge (bsp, sfp->location, NULL, single_interval, TRUE, FALSE); + + slp = NULL; + while ((slp = SeqLocFindNext (loc, slp)) != NULL) { + numivals++; + } + if (numivals > 0) { + ivals = MemNew (sizeof (Int4) * (numivals * 2)); + item->ivals = ivals; + item->numivals = numivals; + slp = NULL; + i = 0; + while ((slp = SeqLocFindNext (loc, slp)) != NULL) { + ivals [i] = SeqLocStart (slp); + i++; + ivals [i] = SeqLocStop (slp); + i++; + } + } + SeqLocFree (loc); + } + + /* increment count on current block */ + + (curr->index)++; + + /* count all features, per bioseq and per entity */ + + (bspextra->numfeats)++; + (exindx->featcount)++; + + /* count all gene, publication, and biosource features separately */ + + if (subtype == FEATDEF_GENE) { + (bspextra->numgenes)++; + } + if (subtype == FEATDEF_mRNA) { + (bspextra->nummRNAs)++; + } + if (subtype == FEATDEF_CDS) { + (bspextra->numCDSs)++; + } + if (subtype == FEATDEF_PUB) { + (bspextra->numpubs)++; + } + if (subtype == FEATDEF_BIOSRC) { + (bspextra->numorgs)++; + } + + } + } +} + +/* callback for recording features and descriptor, prot, and cdsOrRna information */ + +static Boolean RecordFeaturesInBioseqs (GatherContextPtr gcp) + +{ + BioseqPtr bsp = NULL; + BioseqExtraPtr bspextra; + BioseqSetPtr bssp = NULL; + Char buf [81]; + Int2 count; + CharPtr ctmp; + Int4 diff; + ExtraIndexPtr exindx; + Int4 left; + CharPtr loclbl; + ObjMgrDataPtr omdp; + Int4 right; + SeqAnnotPtr sap = NULL; + ValNodePtr sdp = NULL; + SeqFeatPtr sfp = NULL; + SeqLocPtr slp; + SeqFeatPtr tmp; + Boolean usingLocalBsp = FALSE; + ValNode vn; + + switch (gcp->thistype) { + case OBJ_BIOSEQ : + bsp = (BioseqPtr) gcp->thisitem; + if (bsp == NULL) return TRUE; + sdp = bsp->descr; + break; + case OBJ_BIOSEQSET : + bssp = (BioseqSetPtr) gcp->thisitem; + if (bssp == NULL) return TRUE; + sdp = bssp->descr; + break; + case OBJ_SEQANNOT : + sap = (SeqAnnotPtr) gcp->thisitem; + break; + case OBJ_SEQFEAT : + sfp = (SeqFeatPtr) gcp->thisitem; + break; + default : + return TRUE; + } + + exindx = (ExtraIndexPtr) gcp->userdata; + if (exindx == NULL) return FALSE; + + /* save bspItemID to support bioseq explore functions */ + + if (bsp != NULL) { + + /* save last BioseqPtr to check first for appropriate bioseq */ + + exindx->lastbsp = bsp; + + /* blocksize for new block based only on features packaged on bioseq */ + + exindx->lastbssp = NULL; + + omdp = SeqMgrGetOmdpForBioseq (bsp); + if (omdp != NULL) { + bspextra = (BioseqExtraPtr) omdp->extradata; + if (bspextra == NULL) { + CreateBioseqExtraBlock (omdp, bsp); + bspextra = (BioseqExtraPtr) omdp->extradata; + } + if (bspextra != NULL) { + bspextra->bspItemID = gcp->itemID; + } + } + } + + /* save last BioseqSetPtr to calculate blocksize from bioseq set and bioseq features, + features on bioseq set presumably being CDS or mRNA and applying only to nucleotides */ + + if (bssp != NULL) { + exindx->lastbssp = bssp; + } + + /* count bioseq or bioseq set descriptors, to calculate lastDescrItemID */ + + if (sdp != NULL) { + SetDescriptorCounts (sdp, exindx, gcp->thisitem, gcp->thistype); + return TRUE; + } + + /* save SeqAnnotPtr containing next features to be gathered */ + + if (sap != NULL) { + exindx->lastsap = sap; + return TRUE; + } + + /* otherwise index features on every bioseq in entity */ + + if (sfp == NULL) return TRUE; + + /* cds or rna reference stored in product bioseq's omdp.cdsOrRnaFeat, + best protein feature in omdp.protFeat (do before adding CDS) */ + + if (sfp->product != NULL) { + ProcessFeatureProducts (sfp, gcp->itemID, gcp); + } + + bsp = FindAppropriateBioseq (sfp->location, exindx->lastbsp); + + /* failure here can be due to SeqLoc that references far accession */ + + if (bsp == NULL) { + + /* if far accession, find first local bioseq on any location interval */ + + bsp = FindFirstLocalBioseq (sfp->location); + + /* report whether far accession was able to be handled */ + + FeatDefLabel (sfp, buf, sizeof (buf) - 1, OM_LABEL_CONTENT); + ctmp = SeqLocPrint (sfp->location); + loclbl = ctmp; + if (loclbl == NULL) { + loclbl = "?"; + } + + if (bsp == NULL) { + ErrPostItem (SEV_WARNING, 0, 0, + "SeqMgr indexing feature location problem - Feature: %s - Location [%s]", + buf, loclbl); + } else { + /* + ErrPostItem (SEV_INFO, 0, 0, + "SeqMgr indexing detected and handled far accession - Feature: %s - Location [%s]", + buf, loclbl); + */ + } + MemFree (ctmp); + + if (bsp == NULL) return TRUE; + usingLocalBsp = TRUE; + } + + /* assume subsequent features will be on this bioseq */ + + exindx->lastbsp = bsp; + + omdp = SeqMgrGetOmdpForBioseq (bsp); + if (omdp == NULL) return TRUE; + + /* now prepare for adding feature to index */ + + bspextra = (BioseqExtraPtr) omdp->extradata; + if (bspextra == NULL) { + CreateBioseqExtraBlock (omdp, bsp); + bspextra = (BioseqExtraPtr) omdp->extradata; + } + if (bspextra == NULL) return TRUE; + + /* get extreme left and right extents of feature location */ + + if (usingLocalBsp) { + + left = GetOffsetInFirstLocalBioseq (sfp->location, bsp, SEQLOC_LEFT_END); + if (left == -1) return TRUE; + right = GetOffsetInFirstLocalBioseq (sfp->location, bsp, SEQLOC_RIGHT_END); + if (right == -1) return TRUE; + + } else { + + left = GetOffsetInBioseq (sfp->location, bsp, SEQLOC_LEFT_END); + if (left == -1) return TRUE; + right = GetOffsetInBioseq (sfp->location, bsp, SEQLOC_RIGHT_END); + if (right == -1) return TRUE; + + } + + /* if indexing protein bioseq, store largest protein feature */ + + if (sfp->data.choice == SEQFEAT_PROT) { + vn.choice = SEQLOC_WHOLE; + vn.data.ptrvalue = (Pointer) bsp->id; + vn.next = NULL; + slp = (Pointer) &vn; + + /* get SeqId in bioseq that matches SeqId used for location */ + + vn.data.ptrvalue = (Pointer) SeqIdWithinBioseq (bsp, sfp->location); + + diff = SeqLocAinB (sfp->location, slp); + if (diff >= 0) { + if (diff < bspextra->min) { + bspextra->min = diff; + if (omdp->tempload == TL_NOT_TEMP) { + bspextra->protFeat = sfp; + } + } + } + } + + /* add feature item to linked list of blocks */ + + if (bspextra->featlisthead == NULL) { + bspextra->featlisthead = (SMFeatBlockPtr) MemNew (sizeof (SMFeatBlock)); + + /* for first feature indexed on this bioseq, quickly see if few or many + additional features, since most features on a bioseq are packaged in + the same list, and most proteins only have one bioseq */ + + for (tmp = sfp, count = 0; + tmp != NULL && count < 50; + tmp = tmp->next, count++) continue; + + /* extend count if above features were packaged on a bioseq set (presumably CDS or mRNA) */ + + if (exindx->lastbssp != NULL) { + for (sap = bsp->annot; sap != NULL; sap = sap->next) { + if (sap->type == 1) { + + for (tmp = (SeqFeatPtr) sap->data; + tmp != NULL && count < 50; + tmp = tmp->next, count++) continue; + + } + } + } + + bspextra->blocksize = count; + } + if (bspextra->featlisttail == NULL) { + bspextra->featlisttail = bspextra->featlisthead; + } + + if (bspextra->featlisttail != NULL) { + + /* if feature spans origin, record with left < 0 */ + + if (left > right && bsp->topology == TOPOLOGY_CIRCULAR) { + left -= bsp->length; + } + + RecordOneFeature (bspextra, omdp, bsp, exindx, sfp, left, + right, gcp->itemID, usingLocalBsp, FALSE); + + /* record gene, publication, and biosource features twice if spanning the origin */ + + if (left < 0 && bsp->topology == TOPOLOGY_CIRCULAR) { + if (sfp->data.choice == SEQFEAT_GENE || + sfp->data.choice == SEQFEAT_PUB || + sfp->data.choice == SEQFEAT_BIOSRC) { + + RecordOneFeature (bspextra, omdp, bsp, exindx, sfp, left + bsp->length, + right + bsp->length, gcp->itemID, usingLocalBsp, TRUE); + + } + } + } + + return TRUE; +} + +/***************************************************************************** +* +* RecordSegmentsInBioseqs callback explores bioseq segments +* +*****************************************************************************/ + +static Boolean RecordSegmentsInBioseqs (GatherContextPtr gcp) + +{ + BioseqPtr bsp = NULL; + BioseqExtraPtr bspextra; + Char buf [80]; + ExtraIndexPtr exindx; + Int4 from; + ObjMgrDataPtr omdp; + SMSeqIdxPtr segpartptr; + SeqIdPtr sid; + SeqIntPtr sipp; + SeqLocPtr slp = NULL; + Uint1 strand; + Int4 to; + + switch (gcp->thistype) { + case OBJ_BIOSEQ : + bsp = (BioseqPtr) gcp->thisitem; + if (bsp == NULL) return TRUE; + break; + case OBJ_BIOSEQ_SEG : + slp = (SeqLocPtr) gcp->thisitem; + if (slp == NULL) return TRUE; + break; + default : + return TRUE; + } + + exindx = (ExtraIndexPtr) gcp->userdata; + if (exindx == NULL) return FALSE; + + if (bsp != NULL) { + if (bsp->repr == Seq_repr_seg) { + exindx->lastbsp = bsp; + } else { + exindx->lastbsp = NULL; + } + exindx->cumulative = 0; + return TRUE; + } + + if (slp == NULL) return TRUE; + + bsp = exindx->lastbsp; + if (bsp == NULL) return TRUE; + + omdp = SeqMgrGetOmdpForBioseq (bsp); + if (omdp == NULL) return TRUE; + + bspextra = (BioseqExtraPtr) omdp->extradata; + if (bspextra == NULL) { + CreateBioseqExtraBlock (omdp, bsp); + bspextra = (BioseqExtraPtr) omdp->extradata; + } + if (bspextra == NULL) return TRUE; + + if (slp->choice == SEQLOC_INT && slp->data.ptrvalue != NULL) { + sipp = (SeqIntPtr) (slp->data.ptrvalue); + from = sipp->from; + to = sipp->to; + strand = sipp->strand; + } else { + from = 0; + to = SeqLocLen (slp) - 1; + strand = SeqLocStrand (slp); + } + + if (to - from + 1 < 1) return TRUE; + + /* create and fill in SMSeqIdx element */ + + segpartptr = MemNew (sizeof (SMSeqIdx)); + if (segpartptr != NULL) { + sid = SeqLocId (slp); + if (MakeReversedSeqIdString (sid, buf, sizeof (buf) - 1)) { + segpartptr->slp = AsnIoMemCopy (slp, + (AsnReadFunc) SeqLocAsnRead, + (AsnWriteFunc) SeqLocAsnWrite); + segpartptr->seqIdOfPart = StringSave (buf); + segpartptr->parentBioseq = bsp; + segpartptr->cumOffset = exindx->cumulative; + segpartptr->from = from; + segpartptr->to = to; + segpartptr->strand = strand; + segpartptr->itemID = gcp->itemID; + } + } + + exindx->cumulative += (to - from + 1); + + /* link into segparthead list of parts IDs */ + + if (bspextra->segparthead == NULL) { + bspextra->segparthead = segpartptr; + exindx->segpartail = segpartptr; + } else if (exindx->segpartail != NULL) { + exindx->segpartail->next = segpartptr; + exindx->segpartail = segpartptr; + } + + return TRUE; +} + +/***************************************************************************** +* +* SortFeatItemListByID callback sorts array into feature item table by itemID +* SortFeatItemListBySfp callback sorts array into feature item table by feature pointer +* SortFeatItemListByPos callback sorts array into feature item table by feature position +* +*****************************************************************************/ + +static int LIBCALLBACK SortFeatItemListByID (VoidPtr vp1, VoidPtr vp2) + +{ + SMFeatItemPtr PNTR spp1 = vp1; + SMFeatItemPtr PNTR spp2 = vp2; + SMFeatItemPtr sp1; + SMFeatItemPtr sp2; + + if (spp1 == NULL || spp2 == NULL) return 0; + sp1 = *((SMFeatItemPtr PNTR) spp1); + sp2 = *((SMFeatItemPtr PNTR) spp2); + if (sp1 == NULL || sp2 == NULL) return 0; + + if (sp1->itemID > sp2->itemID) { + return 1; + } else if (sp1->itemID < sp2->itemID) { + return -1; + } + + return 0; +} + +static int LIBCALLBACK SortFeatItemListBySfp (VoidPtr vp1, VoidPtr vp2) + +{ + SMFeatItemPtr PNTR spp1 = vp1; + SMFeatItemPtr PNTR spp2 = vp2; + SMFeatItemPtr sp1; + SMFeatItemPtr sp2; + + if (spp1 == NULL || spp2 == NULL) return 0; + sp1 = *((SMFeatItemPtr PNTR) spp1); + sp2 = *((SMFeatItemPtr PNTR) spp2); + if (sp1 == NULL || sp2 == NULL) return 0; + + if (sp1->sfp > sp2->sfp) { + return 1; + } else if (sp1->sfp < sp2->sfp) { + return -1; + } + + return 0; +} + +static int LIBCALLBACK SortFeatItemListByPos (VoidPtr vp1, VoidPtr vp2) + +{ + Int2 compare; + Int2 i; + Int2 j; + Int2 numivals; + SMFeatItemPtr PNTR spp1 = vp1; + SMFeatItemPtr PNTR spp2 = vp2; + SMFeatItemPtr sp1; + SMFeatItemPtr sp2; + + if (spp1 == NULL || spp2 == NULL) return 0; + sp1 = *((SMFeatItemPtr PNTR) spp1); + sp2 = *((SMFeatItemPtr PNTR) spp2); + if (sp1 == NULL || sp2 == NULL) return 0; + + /* feature with smallest left extreme is first */ + + if (sp1->left > sp2->left) { + return 1; + } else if (sp1->left < sp2->left) { + return -1; + + /* reversing order so that longest feature is first */ + + } else if (sp1->right > sp2->right) { + return -1; /* was 1 */ + } else if (sp1->right < sp2->right) { + return 1; /* was -1 */ + + /* given identical extremes, put gene features first */ + + } else if (sp1->subtype == FEATDEF_GENE) { + return -1; + } else if (sp2->subtype == FEATDEF_GENE) { + return 1; + + /* then rna features */ + + } else if (FindFeatFromFeatDefType (sp1->subtype) == SEQFEAT_RNA) { + return -1; + } else if (FindFeatFromFeatDefType (sp2->subtype) == SEQFEAT_RNA) { + return 1; + + /* then cds features */ + + } else if (sp1->subtype == FEATDEF_CDS) { + return -1; + } else if (sp2->subtype == FEATDEF_CDS) { + return 1; + } + + /* next compare internal intervals */ + + numivals = MIN (sp1->numivals, sp2->numivals); + if (numivals > 1 && sp1->ivals != NULL && sp2->ivals != NULL) { + for (i = 0, j = 0; i < numivals; i++) { + + /* check left interval */ + + if (sp1->ivals [i] > sp2->ivals [i]) { + return 1; + } else if (sp1->ivals [i] < sp2->ivals [i]) { + return -1; + } + j++; + + /* check right interval */ + + if (sp1->ivals [i] > sp2->ivals [i]) { + return -1; /* was 1 */ + } else if (sp1->ivals [i] < sp2->ivals [i]) { + return 1; /* was -1 */ + } + j++; + } + } + + /* next compare other feature subtypes */ + + if (sp1->subtype < sp2->subtype) { + return -1; + } else if (sp1->subtype > sp2->subtype) { + return 1; + } + + /* then compare feature label */ + + compare = StringCmp (sp1->label, sp2->label); + if (compare > 0) { + return 1; + } else if (compare < 0) { + return -1; + } + + /* last compare parent seq-annot */ + + if (sp1->sap > sp2->sap) { + return 1; + } else if (sp1->sap < sp2->sap) { + return -1; + } + + return 0; +} + +/***************************************************************************** +* +* IndexSegmentedParts callback builds index to speed up mapping +* of parts to segmented bioseqs +* +*****************************************************************************/ + +static int LIBCALLBACK SortSeqIdxArray (VoidPtr ptr1, VoidPtr ptr2) + +{ + Int2 compare; + SMSeqIdxPtr PNTR partp1 = ptr1; + SMSeqIdxPtr PNTR partp2 = ptr2; + SMSeqIdxPtr part1, part2; + + if (partp1 == NULL || partp2 == NULL) return 0; + part1 = *((SMSeqIdxPtr PNTR) partp1); + part2 = *((SMSeqIdxPtr PNTR) partp2); + if (part1 == NULL || part2 == NULL) return 0; + compare = StringCmp (part1->seqIdOfPart, part2->seqIdOfPart); + if (compare > 0) { + return 1; + } else if (compare < 0) { + return -1; + } + return 0; +} + +static void IndexSegmentedParts (SeqEntryPtr sep, BioseqPtr PNTR lastsegbsp) + +{ + BioseqPtr bsp; + BioseqExtraPtr bspextra; + BioseqSetPtr bssp; + Int2 i; + Int2 numsegs = 0; + Int4 cumulative = 0; + ObjMgrDataPtr omdp; + SMSeqIdxPtr PNTR partsByLoc; + SMSeqIdxPtr PNTR partsBySeqId; + SMSeqIdxPtr segpartptr; + + if (sep == NULL) return; + if (IS_Bioseq_set (sep)) { + bssp = (BioseqSetPtr) sep->data.ptrvalue; + if (bssp == NULL) return; + for (sep = bssp->seq_set; sep != NULL; sep = sep->next) { + IndexSegmentedParts (sep, lastsegbsp); + } + if (bssp->_class == BioseqseqSet_class_segset && lastsegbsp != NULL) { + *lastsegbsp = NULL; + } + return; + } + + if (! IS_Bioseq (sep)) return; + bsp = (BioseqPtr) sep->data.ptrvalue; + if (bsp == NULL) return; + + /* check for raw part packaged with segmented bioseq */ + + if (bsp->repr == Seq_repr_raw && lastsegbsp != NULL && *lastsegbsp != NULL) { + omdp = SeqMgrGetOmdpForBioseq (bsp); + if (omdp == NULL) return; + + bspextra = (BioseqExtraPtr) omdp->extradata; + if (bspextra == NULL) { + CreateBioseqExtraBlock (omdp, bsp); + bspextra = (BioseqExtraPtr) omdp->extradata; + } + if (bspextra == NULL) return; + + /* now record segmented parent of raw part if all are packaged together */ + + bspextra->parentBioseq = *lastsegbsp; + return; + } + + if (bsp->repr != Seq_repr_seg) return; + + omdp = SeqMgrGetOmdpForBioseq (bsp); + if (omdp == NULL) return; + + bspextra = (BioseqExtraPtr) omdp->extradata; + if (bspextra == NULL) { + CreateBioseqExtraBlock (omdp, bsp); + bspextra = (BioseqExtraPtr) omdp->extradata; + } + if (bspextra == NULL) return; + + if (lastsegbsp != NULL) { + *lastsegbsp = bsp; + } + + for (segpartptr = bspextra->segparthead; + segpartptr != NULL; + segpartptr = segpartptr->next) { + numsegs++; + } + + bspextra->numsegs = numsegs; + segpartptr = bspextra->segparthead; + if (numsegs < 1 || segpartptr == NULL) return; + + partsByLoc = (SMSeqIdxPtr PNTR) MemNew (sizeof (SMSeqIdxPtr) * (numsegs + 1)); + bspextra->partsByLoc = partsByLoc; + + if (partsByLoc != NULL) { + i = 0; + while (i < numsegs && segpartptr != NULL) { + partsByLoc [i] = segpartptr; + segpartptr = segpartptr->next; + i++; + } + + partsBySeqId = (SMSeqIdxPtr PNTR) MemNew (sizeof (SMSeqIdxPtr) * (numsegs + 1)); + bspextra->partsBySeqId = partsBySeqId; + + if (partsBySeqId != NULL) { + for (i = 0; i < numsegs; i++) { + partsBySeqId [i] = partsByLoc [i]; + } + + /* sort array by SeqId for binary search */ + + HeapSort ((Pointer) partsBySeqId, numsegs, sizeof (SMSeqIdxPtr), SortSeqIdxArray); + } + + } +} + +/***************************************************************************** +* +* IndexRecordedFeatures callback builds sorted arrays of features and genes +* +*****************************************************************************/ + +static void IndexRecordedFeatures (SeqEntryPtr sep) + +{ + BioseqPtr bsp; + BioseqExtraPtr bspextra; + BioseqSetPtr bssp; + SMFeatBlockPtr curr; + SMFeatItemPtr PNTR featsByID; + SMFeatItemPtr PNTR featsBySfp; + SMFeatItemPtr PNTR featsByPos; + SMFeatItemPtr PNTR genesByPos; + SMFeatItemPtr PNTR mRNAsByPos; + SMFeatItemPtr PNTR CDSsByPos; + SMFeatItemPtr PNTR pubsByPos; + SMFeatItemPtr PNTR orgsByPos; + Int4 i; + Int4 j; + SMFeatItemPtr item; + Int4 numfeats; + Int4 numgenes; + Int4 nummRNAs; + Int4 numCDSs; + Int4 numpubs; + Int4 numorgs; + ObjMgrDataPtr omdp; + + if (sep == NULL) return; + if (IS_Bioseq_set (sep)) { + bssp = (BioseqSetPtr) sep->data.ptrvalue; + if (bssp == NULL) return; + for (sep = bssp->seq_set; sep != NULL; sep = sep->next) { + IndexRecordedFeatures (sep); + } + return; + } + + if (! IS_Bioseq (sep)) return; + bsp = (BioseqPtr) sep->data.ptrvalue; + + omdp = SeqMgrGetOmdpForBioseq (bsp); + if (omdp == NULL) return; + bspextra = (BioseqExtraPtr) omdp->extradata; + if (bspextra == NULL) return; + + numfeats = bspextra->numfeats; + numgenes = bspextra->numgenes; + nummRNAs = bspextra->nummRNAs; + numCDSs = bspextra->numCDSs; + numpubs = bspextra->numpubs; + numorgs = bspextra->numorgs; + + curr = bspextra->featlisthead; + + if (bspextra->numfeats > 0 && curr != NULL) { + + /* build array of pointers into feature items */ + + featsByID = (SMFeatItemPtr PNTR) MemNew (sizeof (SMFeatItemPtr) * (numfeats + 1)); + bspextra->featsByID = featsByID; + + if (featsByID != NULL) { + i = 0; + j = 0; + while (i < numfeats && curr != NULL) { + if (j >= curr->index || j >= bspextra->blocksize) { + j = 0; + curr = curr->next; + } + if (curr != NULL && j < curr->index && curr->data != NULL) { + featsByID [i] = &(curr->data [j]); + i++; + j++; + } + } + if (i < numfeats) { + ErrPostEx (SEV_WARNING, 0, 0, "SeqMgr indexing feature table build problem"); + } + + featsBySfp = (SMFeatItemPtr PNTR) MemNew (sizeof (SMFeatItemPtr) * (numfeats + 1)); + bspextra->featsBySfp = featsBySfp; + + if (featsBySfp != NULL) { + for (i = 0; i < numfeats; i++) { + featsBySfp [i] = featsByID [i]; + } + + /* sort all features by SeqFeatPtr value */ + + HeapSort ((VoidPtr) featsBySfp, (size_t) numfeats, sizeof (SMFeatItemPtr), SortFeatItemListBySfp); + } + + featsByPos = (SMFeatItemPtr PNTR) MemNew (sizeof (SMFeatItemPtr) * (numfeats + 1)); + bspextra->featsByPos = featsByPos; + + if (featsByPos != NULL) { + for (i = 0; i < numfeats; i++) { + featsByPos [i] = featsByID [i]; + } + + /* sort all features by feature location on bioseq */ + + HeapSort ((VoidPtr) featsByPos, (size_t) numfeats, sizeof (SMFeatItemPtr), SortFeatItemListByPos); + + for (i = 0; i < numfeats; i++) { + item = featsByPos [i]; + if (item != NULL) { + item->index = i; + } + } + + /* build subarray of sorted gene features for lookup by overlap */ + + if (numgenes > 0) { + + genesByPos = (SMFeatItemPtr PNTR) MemNew (sizeof (SMFeatItemPtr) * (numgenes + 1)); + bspextra->genesByPos = genesByPos; + + if (genesByPos != NULL) { + i = 0; + j = 0; + while (i < numfeats && j < numgenes) { + item = featsByPos [i]; + if (item->subtype == FEATDEF_GENE) { + genesByPos [j] = item; + j++; + } + i++; + } + } + } + + /* build subarray of sorted mRNA features for lookup by overlap */ + + if (nummRNAs > 0) { + + mRNAsByPos = (SMFeatItemPtr PNTR) MemNew (sizeof (SMFeatItemPtr) * (nummRNAs + 1)); + bspextra->mRNAsByPos = mRNAsByPos; + + if (mRNAsByPos != NULL) { + i = 0; + j = 0; + while (i < numfeats && j < nummRNAs) { + item = featsByPos [i]; + if (item->subtype == FEATDEF_mRNA) { + mRNAsByPos [j] = item; + j++; + } + i++; + } + } + } + + /* build subarray of sorted CDS features for lookup by overlap */ + + if (numCDSs > 0) { + + CDSsByPos = (SMFeatItemPtr PNTR) MemNew (sizeof (SMFeatItemPtr) * (numCDSs + 1)); + bspextra->CDSsByPos = CDSsByPos; + + if (CDSsByPos != NULL) { + i = 0; + j = 0; + while (i < numfeats && j < numCDSs) { + item = featsByPos [i]; + if (item->subtype == FEATDEF_CDS) { + CDSsByPos [j] = item; + j++; + } + i++; + } + } + } + + /* build subarray of sorted publication features for lookup by overlap */ + + if (numpubs > 0) { + + pubsByPos = (SMFeatItemPtr PNTR) MemNew (sizeof (SMFeatItemPtr) * (numpubs + 1)); + bspextra->pubsByPos = pubsByPos; + + if (pubsByPos != NULL) { + i = 0; + j = 0; + while (i < numfeats && j < numpubs) { + item = featsByPos [i]; + if (item->subtype == FEATDEF_PUB) { + pubsByPos [j] = item; + j++; + } + i++; + } + } + } + + /* build subarray of sorted biosource features for lookup by overlap */ + + if (numorgs > 0) { + + orgsByPos = (SMFeatItemPtr PNTR) MemNew (sizeof (SMFeatItemPtr) * (numorgs + 1)); + bspextra->orgsByPos = orgsByPos; + + if (orgsByPos != NULL) { + i = 0; + j = 0; + while (i < numfeats && j < numorgs) { + item = featsByPos [i]; + if (item->subtype == FEATDEF_BIOSRC) { + orgsByPos [j] = item; + j++; + } + i++; + } + } + } + } + + } + } +} + +/***************************************************************************** +* +* IndexFeaturesOnEntity makes feature pointers across all Bioseqs in entity +* +*****************************************************************************/ + +static void IndexFeaturesOnEntity (SeqEntryPtr sep, SMFeatItemPtr PNTR featsByID, Int4Ptr countP) + +{ + BioseqPtr bsp; + BioseqExtraPtr bspextra; + BioseqSetPtr bssp; + Int4 count; + Int4 i; + Int4 numfeats; + ObjMgrDataPtr omdp; + + if (sep == NULL || featsByID == NULL || countP == NULL) return; + if (IS_Bioseq_set (sep)) { + bssp = (BioseqSetPtr) sep->data.ptrvalue; + if (bssp == NULL) return; + for (sep = bssp->seq_set; sep != NULL; sep = sep->next) { + IndexFeaturesOnEntity (sep, featsByID, countP); + } + return; + } + + if (! IS_Bioseq (sep)) return; + bsp = (BioseqPtr) sep->data.ptrvalue; + if (bsp == NULL) return; + + omdp = SeqMgrGetOmdpForBioseq (bsp); + if (omdp == NULL) return; + bspextra = (BioseqExtraPtr) omdp->extradata; + if (bspextra == NULL) return; + + numfeats = bspextra->numfeats; + if (bspextra->featsByID != NULL || numfeats > 0) { + count = *countP; + + for (i = 0; i < numfeats; i++, count++) { + featsByID [count] = bspextra->featsByID [i]; + } + + *countP = count; + } +} + +/***************************************************************************** +* +* SeqMgrReindexBioseqExtraData refreshes internal indices for rapid retrieval +* +*****************************************************************************/ + +NLM_EXTERN Uint2 LIBCALL SeqMgrIndexFeatures (Uint2 entityID, Pointer ptr) + +{ + BioseqExtraPtr bspextra; + Int4 count; + ExtraIndex exind; + SMFeatItemPtr PNTR featsByID; + GatherScope gs; + BioseqPtr lastsegbsp = NULL; + SeqEntryPtr oldscope; + ObjMgrDataPtr omdp; + SeqEntryPtr sep; + + if (entityID == 0) { + entityID = ObjMgrGetEntityIDForPointer (ptr); + } + if (entityID == 0) return 0; + + /* reset any existing index data on all bioseqs in entity */ + + SeqMgrClearFeatureIndexes (entityID, NULL); + + /* want to scope to bioseqs within the entity, to allow for colliding IDs */ + + sep = SeqMgrGetTopSeqEntryForEntity (entityID); + + /* make top SeqEntry if only Bioseq or BioseqSet was read */ + + if (sep == NULL) { + omdp = ObjMgrGetData (entityID); + if (omdp != NULL) { + if (omdp->datatype == OBJ_BIOSEQ || omdp->datatype == OBJ_BIOSEQSET) { + sep = SeqEntryNew (); + if (sep != NULL) { + if (omdp->datatype == OBJ_BIOSEQ) { + sep->choice = 1; + sep->data.ptrvalue = omdp->dataptr; + SeqMgrSeqEntry (SM_BIOSEQ, omdp->dataptr, sep); + } else { + sep->choice = 2; + sep->data.ptrvalue = omdp->dataptr; + SeqMgrSeqEntry (SM_BIOSEQSET, omdp->dataptr, sep); + } + } + sep = GetTopSeqEntryForEntityID (entityID); + } + } + } + + if (sep == NULL) return 0; + + /* clean up many old-style ASN.1 problems without changing structure */ + + BasicSeqEntryCleanup (sep); + + /* set scope for FindAppropriateBioseq, FindFirstLocalBioseq */ + + oldscope = SeqEntrySetScope (sep); + + /* gather all segmented locations */ + + exind.lastbsp = NULL; + exind.lastsap = NULL; + exind.lastbssp = NULL; + exind.segpartail = NULL; + exind.descrcount = 0; + exind.featcount = 0; + + MemSet ((Pointer) (&gs), 0, sizeof (GatherScope)); + MemSet ((Pointer) (gs.ignore), (int) (TRUE), (size_t) (OBJ_MAX * sizeof (Boolean))); + gs.ignore [OBJ_BIOSEQ] = FALSE; + gs.ignore [OBJ_BIOSEQSET] = FALSE; + gs.ignore [OBJ_BIOSEQ_SEG] = FALSE; + gs.scope = sep; + GatherEntity (entityID, (Pointer) (&exind), RecordSegmentsInBioseqs, &gs); + + /* build indexes to speed mapping of parts to segmented bioseq */ + + lastsegbsp = NULL; + + IndexSegmentedParts (sep, &lastsegbsp); + + /* now gather to get descriptor itemID counts on each bioseq or bioseq set, + and record features on the bioseq indicated by the feature location */ + + exind.lastbsp = NULL; + exind.lastsap = NULL; + exind.lastbssp = NULL; + exind.segpartail = NULL; + exind.descrcount = 0; + exind.featcount = 0; + + MemSet ((Pointer) (&gs), 0, sizeof (GatherScope)); + MemSet ((Pointer) (gs.ignore), (int) (TRUE), (size_t) (OBJ_MAX * sizeof (Boolean))); + gs.ignore [OBJ_BIOSEQ] = FALSE; + gs.ignore [OBJ_BIOSEQSET] = FALSE; + gs.ignore [OBJ_SEQANNOT] = FALSE; + gs.ignore [OBJ_SEQFEAT] = FALSE; + gs.scope = sep; + GatherEntity (entityID, (Pointer) (&exind), RecordFeaturesInBioseqs, &gs); + + /* finish building array of sorted features on each indexed bioseq */ + + IndexRecordedFeatures (sep); + + /* resetset scope used to limit FindAppropriateBioseq, FindFirstLocalBioseq */ + + SeqEntrySetScope (oldscope); + + /* stamp top of entity with time of indexing */ + + omdp = ObjMgrGetData (entityID); + if (omdp != NULL) { + omdp->indexed = GetSecs (); + + /* master index of feature if top of entity is not a Bioseq */ + + if (omdp->extradata == NULL && omdp->datatype != OBJ_BIOSEQ) { + + CreateBioseqExtraBlock (omdp, NULL); + bspextra = (BioseqExtraPtr) omdp->extradata; + if (bspextra != NULL) { + + /* make master index of features by itemID at top of entity */ + + if (exind.featcount > 0) { + featsByID = (SMFeatItemPtr PNTR) MemNew (sizeof (SMFeatItemPtr) * (exind.featcount + 1)); + if (featsByID != NULL) { + count = 0; + IndexFeaturesOnEntity (sep, featsByID, &count); + + /* sort all features on entity-wide list by itemID */ + + HeapSort ((VoidPtr) featsByID, (size_t) count, sizeof (SMFeatItemPtr), SortFeatItemListByID); + + bspextra->featsByID = featsByID; + bspextra->numfeats = count; + } + } + } + } + } + + return entityID; +} + +/***************************************************************************** +* +* SeqMgrIsBioseqIndexed checks for presence of time of indexing stamp +* +*****************************************************************************/ + +NLM_EXTERN time_t LIBCALL SeqMgrFeaturesAreIndexed (Uint2 entityID) + +{ + ObjMgrDataPtr omdp; + + if (entityID == 0) return 0; + omdp = ObjMgrGetData (entityID); + if (omdp == NULL) return 0; + return omdp->indexed; +} + +/***************************************************************************** +* +* SeqMgrGetBestProteinFeature and SeqMgrGetCDSgivenProduct take a protein +* bioseq to get the best protein feature or encoding CDS +* SeqMgrGetRNAgivenProduct takes an mRNA (cDNA) bioseq and gets encoding mRNA +* feature on the genomic bioseq +* +*****************************************************************************/ + +static void SetContextForFeature (SeqFeatPtr sfp, SeqMgrFeatContext PNTR context, ObjMgrDataPtr omdp) + +{ + SMFeatItemPtr best; + + if (sfp == NULL || context == NULL || omdp == NULL) return; + best = SeqMgrFindSMFeatItemPtr (sfp); + if (best == NULL) return; + context->entityID = ObjMgrGetEntityIDForPointer (omdp->dataptr); + context->itemID = best->itemID; + context->sfp = best->sfp; + context->sap = best->sap; + context->bsp = best->bsp; + context->label = best->label; + context->left = best->left; + context->right = best->right; + context->partialL = best->partialL; + context->partialR = best->partialR; + context->farloc = best->farloc; + context->strand = best->strand; + context->seqfeattype = FindFeatFromFeatDefType (best->subtype); + context->featdeftype = best->subtype; + context->numivals = best->numivals; + context->ivals = best->ivals; + context->userdata = NULL; + context->omdp = (Pointer) omdp; + context->index = best->index + 1; +} + +NLM_EXTERN SeqFeatPtr LIBCALL SeqMgrGetBestProteinFeature (BioseqPtr bsp, + SeqMgrFeatContext PNTR context) + +{ + BioseqExtraPtr bspextra; + ObjMgrDataPtr omdp; + + omdp = SeqMgrGetOmdpForBioseq (bsp); + if (omdp == NULL || omdp->datatype != OBJ_BIOSEQ) return NULL; + bspextra = (BioseqExtraPtr) omdp->extradata; + if (bspextra == NULL) return NULL; + SetContextForFeature (bspextra->protFeat, context, omdp); + return bspextra->protFeat; +} + +NLM_EXTERN SeqFeatPtr LIBCALL SeqMgrGetCDSgivenProduct (BioseqPtr bsp, + SeqMgrFeatContext PNTR context) + +{ + BioseqExtraPtr bspextra; + ObjMgrDataPtr omdp; + SeqFeatPtr sfp; + + omdp = SeqMgrGetOmdpForBioseq (bsp); + if (omdp == NULL || omdp->datatype != OBJ_BIOSEQ) return NULL; + bspextra = (BioseqExtraPtr) omdp->extradata; + if (bspextra == NULL) return NULL; + sfp = bspextra->cdsOrRnaFeat; + if (sfp == NULL || sfp->data.choice != SEQFEAT_CDREGION) return NULL; + SetContextForFeature (sfp, context, omdp); + return sfp; +} + +NLM_EXTERN SeqFeatPtr LIBCALL SeqMgrGetRNAgivenProduct (BioseqPtr bsp, + SeqMgrFeatContext PNTR context) + +{ + BioseqExtraPtr bspextra; + ObjMgrDataPtr omdp; + SeqFeatPtr sfp; + + omdp = SeqMgrGetOmdpForBioseq (bsp); + if (omdp == NULL || omdp->datatype != OBJ_BIOSEQ) return NULL; + bspextra = (BioseqExtraPtr) omdp->extradata; + if (bspextra == NULL) return NULL; + sfp = bspextra->cdsOrRnaFeat; + if (sfp == NULL || sfp->data.choice != SEQFEAT_RNA) return NULL; + SetContextForFeature (sfp, context, omdp); + return sfp; +} + +/***************************************************************************** +* +* SeqMgrGetGeneXref, SeqMgrGeneIsSuppressed, SeqMgrGetOverlappingGene, +* and SeqMgrGetOverlappingPub +* +*****************************************************************************/ + +static Boolean HasNoText (CharPtr str) + +{ + Char ch; + + if (str != NULL) { + ch = *str; + while (ch != '\0') { + if (ch > ' ') { + return FALSE; + } + str++; + ch = *str; + } + } + return TRUE; +} + +NLM_EXTERN GeneRefPtr LIBCALL SeqMgrGetGeneXref (SeqFeatPtr sfp) + +{ + GeneRefPtr grp = NULL; + SeqFeatXrefPtr xref; + + if (sfp == NULL) return NULL; + xref = sfp->xref; + while (xref != NULL && xref->data.choice != SEQFEAT_GENE) { + xref = xref->next; + } + if (xref != NULL) { + grp = (GeneRefPtr) xref->data.value.ptrvalue; + } + return grp; +} + +NLM_EXTERN Boolean LIBCALL SeqMgrGeneIsSuppressed (GeneRefPtr grp) + +{ + if (grp == NULL) return FALSE; + if (grp != NULL && HasNoText (grp->locus) && HasNoText (grp->allele) && + HasNoText (grp->desc) && HasNoText (grp->maploc) && + grp->db == NULL && grp->syn == NULL) return TRUE; + return FALSE; +} + +static SeqFeatPtr SeqMgrGetBestOverlappingFeat (SeqLocPtr slp, Uint2 subtype, + SMFeatItemPtr PNTR array, Int4 num, Int4Ptr pos, + SeqMgrFeatContext PNTR context) + +{ + SMFeatItemPtr best = NULL; + BioseqPtr bsp; + BioseqExtraPtr bspextra; + Int4 diff; + SMFeatItemPtr feat; + Uint2 index = 0; + Int4 L; + Int4 left; + Int4 max; + Int4 mid; + ObjMgrDataPtr omdp; + Int4 R; + Int4 right; + Uint1 strand; + + if (pos != NULL) { + *pos = 0; + } + if (slp == NULL) return NULL; + bsp = FindAppropriateBioseq (slp, NULL); + if (bsp == NULL) { + bsp = FindFirstLocalBioseq (slp); + } + omdp = SeqMgrGetOmdpForBioseq (bsp); + if (omdp == NULL || omdp->datatype != OBJ_BIOSEQ) return NULL; + + bspextra = (BioseqExtraPtr) omdp->extradata; + if (bspextra == NULL) return NULL; + + switch (subtype) { + case FEATDEF_GENE : + array = bspextra->genesByPos; + num = bspextra->numgenes; + break; + case FEATDEF_CDS : + array = bspextra->CDSsByPos; + num = bspextra->numCDSs; + break; + case FEATDEF_mRNA : + array = bspextra->mRNAsByPos; + num = bspextra->nummRNAs; + break; + case FEATDEF_PUB : + array = bspextra->pubsByPos; + num = bspextra->numpubs; + break; + case FEATDEF_BIOSRC : + array = bspextra->orgsByPos; + num = bspextra->numorgs; + break; + default : + break; + } + + if (array == NULL || num < 1) return NULL; + + left = GetOffsetInBioseq (slp, bsp, SEQLOC_LEFT_END); + if (left == -1) return NULL; + right = GetOffsetInBioseq (slp, bsp, SEQLOC_RIGHT_END); + if (right == -1) return NULL; + + /* if feature spans origin, normalize with left < 0 */ + + if (left > right && bsp->topology == TOPOLOGY_CIRCULAR) { + left -= bsp->length; + } + + /* binary search to leftmost candidate within the xxxByPos array */ + + L = 0; + R = num - 1; + while (L < R) { + mid = (L + R) / 2; + feat = array [mid]; + if (feat != NULL && feat->right < left) { + L = mid + 1; + } else { + R = mid; + } + } + + /* linear scan to smallest covering gene, publication, or biosource */ + + best = NULL; + index = 0; + + feat = array [R]; + max = INT4_MAX; + strand = SeqLocStrand (slp); + while (R < num && feat != NULL && feat->left <= right) { + if (feat->left <= left && feat->right >= right) { + if (feat->strand == strand || + strand == Seq_strand_unknown || + feat->strand == Seq_strand_unknown) { + diff = (left - feat->left) + (feat->right - right); + if (diff < max) { + best = feat; + index = R; + max = diff; + } + } + } + R++; + feat = array [R]; + } + + if (best != NULL) { + if (pos != NULL) { + *pos = index + 1; + } + if (context != NULL) { + context->entityID = ObjMgrGetEntityIDForPointer (omdp->dataptr); + context->itemID = best->itemID; + context->sfp = best->sfp; + context->sap = best->sap; + context->bsp = best->bsp; + context->label = best->label; + context->left = best->left; + context->right = best->right; + context->partialL = best->partialL; + context->partialR = best->partialR; + context->farloc = best->farloc; + context->strand = best->strand; + context->seqfeattype = FindFeatFromFeatDefType (best->subtype); + context->featdeftype = best->subtype; + context->numivals = best->numivals; + context->ivals = best->ivals; + context->userdata = NULL; + context->omdp = (Pointer) omdp; + context->index = best->index + 1; + } + return best->sfp; + } + + return NULL; +} + +NLM_EXTERN SeqFeatPtr LIBCALL SeqMgrGetOverlappingGene (SeqLocPtr slp, SeqMgrFeatContext PNTR context) + +{ + return SeqMgrGetBestOverlappingFeat (slp, FEATDEF_GENE, NULL, 0, NULL, context); +} + +NLM_EXTERN SeqFeatPtr LIBCALL SeqMgrGetOverlappingmRNA (SeqLocPtr slp, SeqMgrFeatContext PNTR context) + +{ + return SeqMgrGetBestOverlappingFeat (slp, FEATDEF_mRNA, NULL, 0, NULL, context); +} + +NLM_EXTERN SeqFeatPtr LIBCALL SeqMgrGetOverlappingCDS (SeqLocPtr slp, SeqMgrFeatContext PNTR context) + +{ + return SeqMgrGetBestOverlappingFeat (slp, FEATDEF_CDS, NULL, 0, NULL, context); +} + +NLM_EXTERN SeqFeatPtr LIBCALL SeqMgrGetOverlappingPub (SeqLocPtr slp, SeqMgrFeatContext PNTR context) + +{ + return SeqMgrGetBestOverlappingFeat (slp, FEATDEF_PUB, NULL, 0, NULL, context); +} + +NLM_EXTERN SeqFeatPtr LIBCALL SeqMgrGetOverlappingSource (SeqLocPtr slp, SeqMgrFeatContext PNTR context) + +{ + return SeqMgrGetBestOverlappingFeat (slp, FEATDEF_BIOSRC, NULL, 0, NULL, context); +} + +/***************************************************************************** +* +* SeqMgrBuildFeatureIndex builds a sorted array index for any feature type +* (gene, mRNA, CDS, publication, and biosource have built-in arrays) +* SeqMgrGetOverlappingFeature uses the array to find feature overlap, +* returning the position in the index +* SeqMgrGetFeatureInIndex gets an arbitrary feature indexed by the array +* +*****************************************************************************/ + +NLM_EXTERN VoidPtr LIBCALL SeqMgrBuildFeatureIndex (BioseqPtr bsp, Int4Ptr num, + Uint1 seqFeatChoice, Uint1 featDefChoice) + +{ + SMFeatItemPtr PNTR array; + BioseqExtraPtr bspextra; + SMFeatItemPtr PNTR featsByPos; + Int4 i; + Int4 j; + SMFeatItemPtr item; + Int4 numfeats; + Int4 numitems; + ObjMgrDataPtr omdp; + Uint1 seqfeattype; + + if (num != NULL) { + *num = 0; + } + if (bsp == NULL) return NULL; + omdp = SeqMgrGetOmdpForBioseq (bsp); + if (omdp == NULL || omdp->datatype != OBJ_BIOSEQ) return NULL; + + bspextra = (BioseqExtraPtr) omdp->extradata; + if (bspextra == NULL) return NULL; + + featsByPos = bspextra->featsByPos; + numfeats = bspextra->numfeats; + if (featsByPos == NULL || numfeats < 1) return NULL; + + for (i = 0, numitems = 0; i < numfeats; i++) { + item = featsByPos [i]; + if (item->subtype == seqFeatChoice) { + numitems++; + } + } + if (numitems < 1) return NULL; + + array = (SMFeatItemPtr PNTR) MemNew (sizeof (SMFeatItemPtr) * (numitems + 1)); + if (array == NULL) return NULL; + + i = 0; + j = 0; + while (i < numfeats && j < numitems) { + item = featsByPos [i]; + seqfeattype = FindFeatFromFeatDefType (item->subtype); + if ((seqFeatChoice == 0 || seqfeattype == seqFeatChoice) && + (featDefChoice == 0 || item->subtype == featDefChoice)) { + array [j] = item; + j++; + } + i++; + } + + if (num != NULL) { + *num = numitems; + } + + return (VoidPtr) array; +} + +NLM_EXTERN SeqFeatPtr LIBCALL SeqMgrGetOverlappingFeature (SeqLocPtr slp, VoidPtr featarray, + Int4 numfeats, Int4Ptr position, + SeqMgrFeatContext PNTR context) + +{ + return SeqMgrGetBestOverlappingFeat (slp, 0, (SMFeatItemPtr PNTR) featarray, + numfeats, position, context); +} + +NLM_EXTERN SeqFeatPtr LIBCALL SeqMgrGetFeatureInIndex (BioseqPtr bsp, VoidPtr featarray, + Int4 numfeats, Uint2 index, + SeqMgrFeatContext PNTR context) + +{ + SMFeatItemPtr PNTR array; + SeqFeatPtr curr; + Uint2 entityID; + SMFeatItemPtr item = NULL; + ObjMgrDataPtr omdp; + + if (bsp == NULL || featarray == NULL || numfeats < 1) return NULL; + if (index < 1 || index > numfeats) return NULL; + omdp = SeqMgrGetOmdpForBioseq (bsp); + if (omdp == NULL || omdp->datatype != OBJ_BIOSEQ) return NULL; + + array = (SMFeatItemPtr PNTR) featarray; + item = array [index - 1]; + if (item == NULL) return NULL; + + entityID = ObjMgrGetEntityIDForPointer (omdp->dataptr); + + curr = item->sfp; + if (curr != NULL && context != NULL && (! item->ignore)) { + context->entityID = entityID; + context->itemID = item->itemID; + context->sfp = curr; + context->sap = item->sap; + context->bsp = item->bsp; + context->label = item->label; + context->left = item->left; + context->right = item->right; + context->partialL = item->partialL; + context->partialR = item->partialR; + context->farloc = item->farloc; + context->strand = item->strand; + context->seqfeattype = FindFeatFromFeatDefType (item->subtype); + context->featdeftype = item->subtype; + context->numivals = item->numivals; + context->ivals = item->ivals; + context->userdata = NULL; + context->omdp = (Pointer) omdp; + context->index = item->index + 1; + } + return curr; +} + +/***************************************************************************** +* +* SeqMgrGetNextDescriptor and SeqMgrGetNextFeature +* +*****************************************************************************/ + +NLM_EXTERN ValNodePtr LIBCALL SeqMgrGetNextDescriptor (BioseqPtr bsp, ValNodePtr curr, + Uint1 seqDescChoice, + SeqMgrDescContext PNTR context) + +{ + BioseqSetPtr bssp; + Uint2 entityID; + ObjMgrDataPtr omdp; + SeqEntryPtr sep; + ValNode vn; + + if (context == NULL) return NULL; + + /* if curr is NULL, initialize context fields (in user's stack) */ + + if (curr == NULL) { + if (bsp == NULL) return NULL; + omdp = SeqMgrGetOmdpForBioseq (bsp); + if (omdp == NULL || omdp->datatype != OBJ_BIOSEQ) return NULL; + + context->omdp = (Pointer) omdp; + context->itemID = omdp->lastDescrItemID; + context->index = 0; + context->level = 0; + + /* start curr just before beginning of bioseq descriptor list */ + + curr = &vn; + vn.choice = 0; + vn.data.ptrvalue = 0; + vn.next = bsp->descr; + } + + omdp = (ObjMgrDataPtr) context->omdp; + if (omdp == NULL) return NULL; + + entityID = ObjMgrGetEntityIDForPointer (omdp->dataptr); + sep = ObjMgrGetChoiceForData (omdp->dataptr); + + /* now look for next appropriate descriptor after curr in current chain */ + + while (curr != NULL) { + curr = curr->next; + if (curr != NULL) { + (context->itemID)++; + (context->index)++; + if (seqDescChoice == 0 || curr->choice == seqDescChoice) { + context->entityID = entityID; + context->sdp = curr; + context->sep = sep; + context->seqdesctype = curr->choice; + context->userdata = NULL; + context->omdp = (Pointer) omdp; + return curr; + } + } + } + + /* now go up omdp chain looking for next descriptor */ + + while (curr == NULL) { + omdp = SeqMgrGetOmdpForPointer (omdp->parentptr); + if (omdp == NULL) return NULL; + + /* update current omdp in context */ + + context->omdp = (Pointer) omdp; + context->itemID = omdp->lastDescrItemID; + + switch (omdp->datatype) { + case OBJ_BIOSEQ : + bsp = (BioseqPtr) omdp->dataptr; + curr = bsp->descr; + break; + case OBJ_BIOSEQSET : + bssp = (BioseqSetPtr) omdp->dataptr; + curr = bssp->descr; + break; + default : + break; + } + + sep = ObjMgrGetChoiceForData (omdp->dataptr); + (context->level)++; + + /* now look for first appropriate descriptor in current chain */ + + while (curr != NULL) { + (context->itemID)++; + (context->index)++; + if (seqDescChoice == 0 || curr->choice == seqDescChoice) { + context->entityID = entityID; + context->sdp = curr; + context->sep = sep; + context->seqdesctype = curr->choice; + context->userdata = NULL; + context->omdp = (Pointer) omdp; + return curr; + } + curr = curr->next; + } + } + + return curr; +} + +NLM_EXTERN SeqFeatPtr LIBCALL SeqMgrGetNextFeature (BioseqPtr bsp, SeqFeatPtr curr, + Uint1 seqFeatChoice, Uint1 featDefChoice, + SeqMgrFeatContext PNTR context) + +{ + BioseqExtraPtr bspextra; + Uint2 entityID; + SMFeatItemPtr PNTR featsByPos; + Uint2 i; + SMFeatItemPtr item; + ObjMgrDataPtr omdp; + Uint1 seqfeattype; + + if (context == NULL) return NULL; + + /* if curr is NULL, initialize context fields (in user's stack) */ + + if (curr == NULL) { + if (bsp == NULL) return NULL; + omdp = SeqMgrGetOmdpForBioseq (bsp); + if (omdp == NULL || omdp->datatype != OBJ_BIOSEQ) return NULL; + + context->omdp = (Pointer) omdp; + context->index = 0; + } + + omdp = (ObjMgrDataPtr) context->omdp; + if (omdp == NULL) return NULL; + bspextra = (BioseqExtraPtr) omdp->extradata; + if (bspextra == NULL) return NULL; + featsByPos = bspextra->featsByPos; + if (featsByPos == NULL || bspextra->numfeats < 1) return NULL; + + entityID = ObjMgrGetEntityIDForPointer (omdp->dataptr); + + i = context->index; + + /* now look for next appropriate feature */ + + while (i < bspextra->numfeats) { + item = featsByPos [i]; + if (item != NULL) { + curr = item->sfp; + i++; + if (curr != NULL) { + seqfeattype = FindFeatFromFeatDefType (item->subtype); + if ((seqFeatChoice == 0 || seqfeattype == seqFeatChoice) && + (featDefChoice == 0 || item->subtype == featDefChoice) && + (! item->ignore)) { + context->entityID = entityID; + context->itemID = item->itemID; + context->sfp = curr; + context->sap = item->sap; + context->bsp = item->bsp; + context->label = item->label; + context->left = item->left; + context->right = item->right; + context->partialL = item->partialL; + context->partialR = item->partialR; + context->farloc = item->farloc; + context->strand = item->strand; + context->seqfeattype = seqfeattype; + context->featdeftype = item->subtype; + context->numivals = item->numivals; + context->ivals = item->ivals; + context->userdata = NULL; + context->omdp = (Pointer) omdp; + context->index = item->index + 1; + return curr; + } + } + } + } + + return NULL; +} + +/***************************************************************************** +* +* SeqMgrExploreBioseqs, SeqMgrExploreDescriptors, and SeqMgrExploreFeatures +* +*****************************************************************************/ + +static Boolean JustExamineBioseqs (SeqEntryPtr sep, BioseqSetPtr bssp, + SeqMgrBioseqContextPtr context, + SeqMgrBioseqExploreProc userfunc, + Boolean nucs, Boolean prots, Boolean parts) + +{ + BioseqPtr bsp; + BioseqExtraPtr bspextra; + ObjMgrDataPtr omdp; + + if (sep == NULL || context == NULL || userfunc == NULL) return FALSE; + + if (IS_Bioseq (sep)) { + bsp = (BioseqPtr) sep->data.ptrvalue; + if (bsp == NULL) return TRUE; + + /* check for desired molecule type */ + + if (ISA_na (bsp->mol) && (! nucs)) return TRUE; + if (ISA_aa (bsp->mol) && (! prots)) return TRUE; + + omdp = SeqMgrGetOmdpForBioseq (bsp); + if (omdp == NULL || omdp->datatype != OBJ_BIOSEQ) return TRUE; + bspextra = (BioseqExtraPtr) omdp->extradata; + if (bspextra == NULL) return TRUE; + + context->itemID = bspextra->bspItemID; + context->bsp = bsp; + context->sep = sep; + context->bssp = bssp; + context->omdp = omdp; + (context->index)++; + + /* continue until user function returns FALSE, then exit all recursions */ + + if (! userfunc (bsp, context)) return FALSE; + return TRUE; + } + + if (IS_Bioseq_set (sep)) { + bssp = (BioseqSetPtr) sep->data.ptrvalue; + if (bssp == NULL) return TRUE; + + /* check to see if parts should be explored */ + + if (bssp->_class == BioseqseqSet_class_parts && (! parts)) return TRUE; + + /* recursively explore bioseq set until user function returns FALSE */ + + for (sep = bssp->seq_set; sep != NULL; sep = sep->next) { + if (! JustExamineBioseqs (sep, bssp, context, userfunc, nucs, prots, parts)) return FALSE; + } + } + + return TRUE; +} + +NLM_EXTERN Boolean LIBCALL SeqMgrExploreBioseqs (Uint2 entityID, Pointer ptr, Pointer userdata, + SeqMgrBioseqExploreProc userfunc, + Boolean nucs, Boolean prots, Boolean parts) + +{ + SeqMgrBioseqContext context; + SeqEntryPtr sep; + + if (entityID == 0) { + entityID = ObjMgrGetEntityIDForPointer (ptr); + } + if (entityID == 0) return 0; + sep = SeqMgrGetTopSeqEntryForEntity (entityID); + if (sep == NULL) return FALSE; + if (userfunc == NULL) return FALSE; + + context.entityID = entityID; + context.index = 0; + context.userdata = userdata; + + /* recursive call to explore SeqEntry and pass appropriate bioseqs to user */ + + JustExamineBioseqs (sep, NULL, &context, userfunc, nucs, prots, parts); + + return TRUE; +} + +NLM_EXTERN Boolean LIBCALL SeqMgrExploreSegments (BioseqPtr bsp, Pointer userdata, + SeqMgrSegmentExploreProc userfunc) + +{ + BioseqExtraPtr bspextra; + SeqMgrSegmentContext context; + Uint2 entityID; + Uint2 i; + ObjMgrDataPtr omdp; + SMSeqIdxPtr PNTR partsByLoc; + SMSeqIdxPtr segpartptr; + SeqLocPtr slp; + + if (bsp == NULL || bsp->repr != Seq_repr_seg) return FALSE; + omdp = SeqMgrGetOmdpForBioseq (bsp); + if (omdp == NULL || omdp->datatype != OBJ_BIOSEQ) return FALSE; + if (userfunc == NULL) return FALSE; + entityID = ObjMgrGetEntityIDForPointer (omdp->dataptr); + + bspextra = (BioseqExtraPtr) omdp->extradata; + if (bspextra == NULL) return FALSE; + partsByLoc = bspextra->partsByLoc; + if (partsByLoc == NULL || bspextra->numsegs < 1) return FALSE; + + for (i = 0; i < bspextra->numsegs; i++) { + segpartptr = partsByLoc [i]; + if (segpartptr != NULL) { + slp = segpartptr->slp; + context.entityID = entityID; + context.itemID = segpartptr->itemID; + context.slp = slp; + context.parent = segpartptr->parentBioseq; + context.cumOffset = segpartptr->cumOffset; + context.from = segpartptr->from; + context.to = segpartptr->to; + context.strand = segpartptr->strand; + context.userdata = userdata; + context.omdp = (Pointer) omdp; + context.index = i + 1; + if (! userfunc (slp, &context)) return TRUE; + } + } + + return TRUE; +} + +NLM_EXTERN Boolean LIBCALL SeqMgrExploreDescriptors (BioseqPtr bsp, Pointer userdata, + SeqMgrDescExploreProc userfunc, + BoolPtr seqDescFilter) + +{ + BioseqSetPtr bssp; + SeqMgrDescContext context; + Uint2 entityID; + Uint2 itemID; + ObjMgrDataPtr omdp; + ValNodePtr sdp; + SeqEntryPtr sep; + + omdp = SeqMgrGetOmdpForBioseq (bsp); + if (omdp == NULL || omdp->datatype != OBJ_BIOSEQ) return FALSE; + if (userfunc == NULL) return FALSE; + entityID = ObjMgrGetEntityIDForPointer (omdp->dataptr); + + context.index = 0; + context.level = 0; + while (omdp != NULL) { + itemID = omdp->lastDescrItemID; + sdp = NULL; + switch (omdp->datatype) { + case OBJ_BIOSEQ : + bsp = (BioseqPtr) omdp->dataptr; + sdp = bsp->descr; + break; + case OBJ_BIOSEQSET : + bssp = (BioseqSetPtr) omdp->dataptr; + sdp = bssp->descr; + break; + default : + break; + } + + sep = ObjMgrGetChoiceForData (omdp->dataptr); + + /* call for every appropriate descriptor in current chain */ + + while (sdp != NULL) { + itemID++; + if (seqDescFilter == NULL || seqDescFilter [sdp->choice]) { + context.entityID = entityID; + context.itemID = itemID; + context.sdp = sdp; + context.sep = sep; + context.seqdesctype = sdp->choice; + context.userdata = userdata; + context.omdp = (Pointer) omdp; + (context.index)++; + if (! userfunc (sdp, &context)) return TRUE; + } + sdp = sdp->next; + } + + /* now go up omdp chain looking for next descriptor */ + + omdp = SeqMgrGetOmdpForPointer (omdp->parentptr); + (context.level)++; + } + return TRUE; +} + +NLM_EXTERN Boolean LIBCALL SeqMgrExploreFeatures (BioseqPtr bsp, Pointer userdata, + SeqMgrFeatExploreProc userfunc, + SeqLocPtr locationFilter, + BoolPtr seqFeatFilter, BoolPtr featDefFilter) + +{ + BioseqExtraPtr bspextra; + SeqMgrFeatContext context; + Uint2 entityID; + SMFeatItemPtr PNTR featsByPos; + Uint2 i; + SMFeatItemPtr item; + Int4 left = INT4_MIN; + ObjMgrDataPtr omdp; + Int4 right = INT4_MAX; + Uint1 seqfeattype; + SeqFeatPtr sfp; + + omdp = SeqMgrGetOmdpForBioseq (bsp); + if (omdp == NULL || omdp->datatype != OBJ_BIOSEQ) return FALSE; + if (userfunc == NULL) return FALSE; + entityID = ObjMgrGetEntityIDForPointer (omdp->dataptr); + + bspextra = (BioseqExtraPtr) omdp->extradata; + if (bspextra == NULL) return FALSE; + featsByPos = bspextra->featsByPos; + if (featsByPos == NULL || bspextra->numfeats < 1) return FALSE; + + if (locationFilter != NULL) { + left = GetOffsetInBioseq (locationFilter, bsp, SEQLOC_LEFT_END); + if (left == -1) left = INT4_MIN; + right = GetOffsetInBioseq (locationFilter, bsp, SEQLOC_RIGHT_END); + if (right == -1) right = INT4_MAX; + } + + /* call for every appropriate feature in sorted list */ + + for (i = 0; i < bspextra->numfeats; i++) { + item = featsByPos [i]; + if (item != NULL) { + sfp = item->sfp; + seqfeattype = FindFeatFromFeatDefType (item->subtype); + if ((seqFeatFilter == NULL || seqFeatFilter [seqfeattype]) && + (featDefFilter == NULL || featDefFilter [item->subtype]) && + (locationFilter == NULL || (item->right >= left && item->left <= right)) && + (! item->ignore)) { + context.entityID = entityID; + context.itemID = item->itemID; + context.sfp = sfp; + context.sap = item->sap; + context.bsp = item->bsp; + context.label = item->label; + context.left = item->left; + context.right = item->right; + context.partialL = item->partialL; + context.partialR = item->partialR; + context.farloc = item->farloc; + context.strand = item->strand; + context.seqfeattype = seqfeattype; + context.featdeftype = item->subtype; + context.numivals = item->numivals; + context.ivals = item->ivals; + context.userdata = userdata; + context.omdp = (Pointer) omdp; + context.index = item->index + 1; + if (! userfunc (sfp, &context)) return TRUE; + } + } + } + return TRUE; +} + +/***************************************************************************** +* +* SeqMgrMapPartToSegmentedBioseq can speed up sequtil's CheckPointInBioseq +* for indexed part bioseq to segmented bioseq mapping +* +*****************************************************************************/ + +static SMSeqIdxPtr BinarySearchPartToSegmentMap (BioseqPtr in, Int4 pos, BioseqPtr bsp, SeqIdPtr sip) + +{ + BioseqExtraPtr bspextra; + Char buf [80]; + Int2 compare; + ObjMgrDataPtr omdp; + SMSeqIdxPtr PNTR partsBySeqId; + SMSeqIdxPtr segpartptr; + Int2 L, R, mid; + + if (in == NULL) return NULL; + omdp = SeqMgrGetOmdpForBioseq (in); + if (omdp == NULL) return NULL; + bspextra = (BioseqExtraPtr) omdp->extradata; + if (bspextra == NULL) return NULL; + + partsBySeqId = bspextra->partsBySeqId; + if (partsBySeqId == NULL || bspextra->numsegs < 1) return NULL; + + if (bsp != NULL) { + sip = bsp->id; + } + if (sip == NULL) return NULL; + + /* binary search into array on segmented bioseq sorted by part seqID (reversed) string */ + + while (sip != NULL) { + if (MakeReversedSeqIdString (sip, buf, sizeof (buf) - 1)) { + L = 0; + R = bspextra->numsegs - 1; + while (L < R) { + mid = (L + R) / 2; + segpartptr = partsBySeqId [mid]; + compare = StringCmp (segpartptr->seqIdOfPart, buf); + if (compare < 0) { + L = mid + 1; + } else { + R = mid; + } + } + segpartptr = partsBySeqId [R]; + if (StringCmp (segpartptr->seqIdOfPart, buf) == 0) { + if (pos >= segpartptr->from && pos <= segpartptr->to) { + return segpartptr; + } + } + } + sip = sip->next; + } + + return NULL; +} + +NLM_EXTERN Int4 LIBCALL SeqMgrMapPartToSegmentedBioseq (BioseqPtr in, Int4 pos, BioseqPtr bsp, SeqIdPtr sip) + +{ + BioseqExtraPtr bspextra; + SMSeqIdxPtr currp; + SMSeqIdxPtr nextp; + ObjMgrDataPtr omdp; + SMSeqIdxPtr segpartptr; + + if (in == NULL) return -1; + + /* first check to see if part has been loaded and single map up block installed */ + + if (bsp != NULL) { + omdp = SeqMgrGetOmdpForBioseq (bsp); + if (omdp != NULL) { + bspextra = (BioseqExtraPtr) omdp->extradata; + if (bspextra != NULL) { + + /* no need for partsByLoc or partsBySeqId arrays, just use segparthead linked list */ + + for (segpartptr = bspextra->segparthead; segpartptr != NULL; segpartptr = segpartptr->next) { + if (segpartptr->parentBioseq == in) { + if (pos >= segpartptr->from && pos <= segpartptr->to) { + + /* success, immediate return with mapped up value */ + + if (segpartptr->strand == Seq_strand_minus) { + return segpartptr->cumOffset + (segpartptr->to - pos); + } else { + return segpartptr->cumOffset + (pos - segpartptr->from); + } + } + } + } + } + } + } + + /* otherwise do binary search on segmented bioseq mapping data */ + + segpartptr = BinarySearchPartToSegmentMap (in, pos, bsp, sip); + if (segpartptr == NULL) return -1; + + if (pos >= segpartptr->from && pos <= segpartptr->to) { + + /* install map up block on part, if it has been loaded, to speed up next search */ + + if (bsp != NULL) { + omdp = SeqMgrGetOmdpForBioseq (bsp); + if (omdp != NULL) { + bspextra = (BioseqExtraPtr) omdp->extradata; + if (bspextra == NULL) { + CreateBioseqExtraBlock (omdp, bsp); + bspextra = (BioseqExtraPtr) omdp->extradata; + } + if (bspextra != NULL) { + + /* clean up any old map up info on part */ + + for (currp = bspextra->segparthead; currp != NULL; currp = nextp) { + nextp = currp->next; + SeqLocFree (currp->slp); + MemFree (currp->seqIdOfPart); + MemFree (currp); + } + bspextra->segparthead = NULL; + bspextra->numsegs = 0; + bspextra->partsByLoc = MemFree (bspextra->partsByLoc); + bspextra->partsBySeqId = MemFree (bspextra->partsBySeqId); + + /* allocate single map up block */ + + currp = MemNew (sizeof (SMSeqIdx)); + if (currp != NULL) { + currp->slp = AsnIoMemCopy (segpartptr->slp, + (AsnReadFunc) SeqLocAsnRead, + (AsnWriteFunc) SeqLocAsnWrite); + currp->seqIdOfPart = StringSave (segpartptr->seqIdOfPart); + currp->parentBioseq = segpartptr->parentBioseq; + currp->cumOffset = segpartptr->cumOffset; + currp->from = segpartptr->from; + currp->to = segpartptr->to; + currp->strand = segpartptr->strand; + } + + /* add new map up block to part */ + + bspextra->segparthead = currp; + } + } + } + + /* now return offset result */ + + if (segpartptr->strand == Seq_strand_minus) { + return segpartptr->cumOffset + (segpartptr->to - pos); + } else { + return segpartptr->cumOffset + (pos - segpartptr->from); + } + } + return -1; +} + |