Load ncbi (6.1.20031028) into ncbi-tools6/branches/upstream/current.

author: Aaron M. Ucko <ucko@debian.org> 2005-03-23 23:49:09 +0000
committer: Aaron M. Ucko <ucko@debian.org> 2005-03-23 23:49:09 +0000
commit: 5349ec8772bc373e4c2349a04e57d7952c006326 (patch)
tree: b733fe1df8c0c2d2418b3ce82ebcbd3d0db12a1f /asn/asn.all
parent: 0eff2d00595b4adcf6f1c4e6bbbcf0f416c70310 (diff)
1 files changed, 202 insertions, 2 deletions
diff --git a/asn/asn.all b/asn/asn.all
index aaf0f21b..a59bafef 100644
--- a/asn/asn.all
+++ b/asn/asn.all
@@ -738,7 +738,7 @@ Seq-loc-equiv ::= SET OF Seq-loc      -- for a set of equivalent locations
 END
     
 
---$Revision: 6.6 $
+--$Revision: 6.7 $
 --**********************************************************************
 --
 --  NCBI Sequence elements
@@ -751,7 +751,7 @@ NCBI-Sequence DEFINITIONS ::=
 BEGIN
 
 EXPORTS Bioseq, Seq-annot, Pubdesc, Seq-descr, Seqdesc, Numbering, Heterogen,
-        Seq-hist, GIBB-mol;
+        Seq-hist, GIBB-mol, Seq-literal;
 
 IMPORTS Date, Int-fuzz, Dbtag, Object-id, User-object FROM NCBI-General
         Seq-align FROM NCBI-Seqalign
@@ -2876,3 +2876,203 @@ PrintFormText ::= SEQUENCE {
     
 END
 
+--$Revision: 6.4 $
+--*********************************************************
+--
+-- ASN.1 and XML for the components of a GenBank format sequence
+-- J.Ostell 2002
+--
+--*********************************************************
+
+NCBI-GBSeq DEFINITIONS ::=
+BEGIN
+
+--********
+--  GBSeq represents the elements in a GenBank style report
+--    of a sequence with some small additions to structure and support
+--    for protein (GenPept) versions of GenBank format as seen in
+--    Entrez. While this represents the simplification, reduction of
+--    detail, and flattening to a single sequence perspective of GenBank
+--    format (compared with the full ASN.1 or XML from which GenBank and
+--    this format is derived at NCBI), it is presented in ASN.1 or XML for
+--    automated parsing and processing. It is hoped that this compromise
+--    will be useful for those bulk processing at the GenBank format level
+--    of detail today. Since it is a compromise, a number of pragmatic
+--    decisions have been made.
+--
+--  In pursuit of simplicity and familiarity a number of
+--    fields do not have full substructure defined here where there is
+--    already a standard GenBank format string. For example:
+--
+--    Date  DD-Mon-YYYY
+--    Authors   LastName, Intials (with periods)
+--   Journal   JounalName Volume (issue), page-range (year)
+--   FeatureLocations as per GenBank feature table, but FeatureIntervals
+--    may also be provided as a convenience
+--   FeatureQualifiers  as per GenBank feature table
+--   Primary has a string that represents a table to construct
+--    a third party (TPA) sequence.
+--   other-seqids can have strings with the "vertical bar format" sequence
+--    identifiers used in BLAST for example, when they are non-genbank types.
+--    Currently in GenBank format you only see GI, but there are others, like
+--    patents, submitter clone names, etc which will appear here, as they
+--    always have in the ASN.1 format, and full XML format.
+--   source-db is a formatted text block for peptides in GenPept format that
+--    carries information from the source protein database.
+--
+--  There are also a number of elements that could have been
+--   more exactly specified, but in the interest of simplicity
+--   have been simply left as options. For example..
+--
+--  accession and accession.version will always appear in a GenBank record
+--   they are optional because this format can also be used for non-GenBank
+--   sequences, and in that case will have only "other-seqids".
+--
+--  sequences will normally all have "sequence" filled in. But contig records
+--    will have a "join" statement in the "contig" slot, and no "sequence".
+--    We also may consider a retrieval option with no sequence of any kind
+--     and no feature table to quickly check minimal values.
+--
+--  a reference may have an author list, or be from a consortium, or both.
+--
+--  some fields, such as taxonomy, do appear as separate elements in GenBank
+--    format but without a specific linetype (in GenBank format this comes
+--    under ORGANISM). Another example is the separation of primary accession
+--    from the list of secondary accessions. In GenBank format primary
+--    accession is just the first one on the list that includes all secondaries
+--    after it.
+--
+--  create-date deserves special comment. The date you see on the right hand
+--    side of the LOCUS line in GenBank format is actually the last date the
+--    the record was modified (or the update-date). The date the record was
+--    first submitted to GenBank appears in the first submission citation in
+--    the reference section. Internally in the databases and ASN.1 NCBI keeps
+--    the first date the record was released into the sequence database at
+--    NCBI as create-date. For records from EMBL, which supports create-date,
+--    it is the date provided by EMBL. For DDBJ records, which do not supply
+--    a create-date (same as GenBank format) the create-date is the first date
+--    NCBI saw the record from DDBJ. For older GenBank records, before NCBI
+--    took responsibility for GenBank, it is just the first date NCBI saw the
+--    record. Create-date can be very useful, so we expose it here, but users
+--    must understand it is only an approximation and comes from many sources,
+--    and with many exceptions and caveats. It does NOT tell you the first
+--    date the public might have seen this record and thus is NOT an accurate
+--    measure for legal issues of precedence.
+--
+--********
+
+GBSeq ::= SEQUENCE {
+	locus VisibleString ,
+	length INTEGER ,
+	strandedness INTEGER {
+		not-set (0) ,
+		single-stranded (1) ,
+		double-stranded (2) ,
+		mixed-stranded (3) } DEFAULT not-set ,
+	moltype INTEGER {
+		nucleic-acid (0) ,
+		dna (1) ,
+		rna (2) ,
+		trna (3) ,
+		rrna (4) ,
+		mrna (5) ,
+		urna (6) ,
+		snrna (7) ,
+		snorna (8) ,
+		peptide (9) } DEFAULT nucleic-acid ,
+	topology INTEGER {
+		linear (1) ,
+		circular (2) } DEFAULT linear ,
+	division VisibleString ,
+	update-date VisibleString ,
+	create-date VisibleString ,
+	definition VisibleString ,
+	primary-accession VisibleString OPTIONAL ,
+	accession-version VisibleString OPTIONAL ,
+	other-seqids SEQUENCE OF GBSeqid OPTIONAL ,
+	secondary-accessions SEQUENCE OF GBSecondary-accn OPTIONAL,
+	keywords SEQUENCE OF GBKeyword OPTIONAL ,
+	segment VisibleString OPTIONAL ,
+	source VisibleString ,
+	organism VisibleString ,
+	taxonomy VisibleString ,
+	references SEQUENCE OF GBReference ,
+	comment VisibleString OPTIONAL ,
+	primary VisibleString OPTIONAL ,
+	source-db VisibleString OPTIONAL ,
+	feature-table SEQUENCE OF GBFeature OPTIONAL ,
+	sequence VisibleString OPTIONAL ,  -- Optional for other dump forms
+	contig VisibleString OPTIONAL }
+
+	GBSecondary-accn ::= VisibleString
+
+	GBSeqid ::= VisibleString
+
+	GBKeyword ::= VisibleString
+
+	GBReference ::= SEQUENCE {
+		reference VisibleString ,
+		authors SEQUENCE OF GBAuthor OPTIONAL ,
+		consortium VisibleString OPTIONAL ,
+		title VisibleString OPTIONAL ,
+		journal VisibleString ,
+		medline INTEGER OPTIONAL ,
+		pubmed INTEGER OPTIONAL ,
+		remark VisibleString OPTIONAL }
+
+	GBAuthor ::= VisibleString
+
+	GBFeature ::= SEQUENCE {
+		key VisibleString ,
+		location VisibleString ,
+		intervals SEQUENCE OF GBInterval OPTIONAL ,
+		quals SEQUENCE OF GBQualifier OPTIONAL }
+
+	GBInterval ::= SEQUENCE {
+		from INTEGER OPTIONAL ,
+		to INTEGER OPTIONAL ,
+		point INTEGER OPTIONAL ,
+		accession VisibleString }
+
+	GBQualifier ::= SEQUENCE {
+		name VisibleString ,
+		value VisibleString OPTIONAL }
+
+	GBSet ::= SEQUENCE OF GBSeq
+		
+END
+
+--$Revision: 6.1 $
+--**********************************************************************
+--
+--  ASN.1 for a tiny Bioseq in XML
+--    basically a structured FASTA file with a few extras
+--    in this case we drop all modularity of components
+--      All ids are Optional - simpler structure, less checking
+--      Components of organism are hard coded - can't easily add or change
+--      sequence is just string whether DNA or protein
+--  by James Ostell, 2000
+--
+--**********************************************************************
+
+NCBI-TSeq DEFINITIONS ::=
+BEGIN
+
+TSeq ::= SEQUENCE {
+	seqtype ENUMERATED {
+		nucleotide (1),
+		protein (2) },
+	gi INTEGER OPTIONAL,
+	accver VisibleString OPTIONAL,
+	sid VisibleString OPTIONAL,
+	local VisibleString OPTIONAL,
+	taxid INTEGER OPTIONAL,
+	orgname VisibleString OPTIONAL,
+	defline VisibleString,
+	length INTEGER,
+	sequence VisibleString }
+
+TSeqSet ::= SEQUENCE OF TSeq    -- a bunch of them
+
+END
+
author	Aaron M. Ucko <ucko@debian.org>	2005-03-23 23:49:09 +0000
committer	Aaron M. Ucko <ucko@debian.org>	2005-03-23 23:49:09 +0000
commit	5349ec8772bc373e4c2349a04e57d7952c006326 (patch)
tree	b733fe1df8c0c2d2418b3ce82ebcbd3d0db12a1f /asn/asn.all
parent	0eff2d00595b4adcf6f1c4e6bbbcf0f416c70310 (diff)