summaryrefslogtreecommitdiff
path: root/asn/asn.all
diff options
context:
space:
mode:
authorAaron M. Ucko <ucko@debian.org>2005-03-23 23:49:09 +0000
committerAaron M. Ucko <ucko@debian.org>2005-03-23 23:49:09 +0000
commit5349ec8772bc373e4c2349a04e57d7952c006326 (patch)
treeb733fe1df8c0c2d2418b3ce82ebcbd3d0db12a1f /asn/asn.all
parent0eff2d00595b4adcf6f1c4e6bbbcf0f416c70310 (diff)
Load ncbi (6.1.20031028) into ncbi-tools6/branches/upstream/current.
Diffstat (limited to 'asn/asn.all')
-rw-r--r--asn/asn.all204
1 files changed, 202 insertions, 2 deletions
diff --git a/asn/asn.all b/asn/asn.all
index aaf0f21b..a59bafef 100644
--- a/asn/asn.all
+++ b/asn/asn.all
@@ -738,7 +738,7 @@ Seq-loc-equiv ::= SET OF Seq-loc -- for a set of equivalent locations
END
---$Revision: 6.6 $
+--$Revision: 6.7 $
--**********************************************************************
--
-- NCBI Sequence elements
@@ -751,7 +751,7 @@ NCBI-Sequence DEFINITIONS ::=
BEGIN
EXPORTS Bioseq, Seq-annot, Pubdesc, Seq-descr, Seqdesc, Numbering, Heterogen,
- Seq-hist, GIBB-mol;
+ Seq-hist, GIBB-mol, Seq-literal;
IMPORTS Date, Int-fuzz, Dbtag, Object-id, User-object FROM NCBI-General
Seq-align FROM NCBI-Seqalign
@@ -2876,3 +2876,203 @@ PrintFormText ::= SEQUENCE {
END
+--$Revision: 6.4 $
+--*********************************************************
+--
+-- ASN.1 and XML for the components of a GenBank format sequence
+-- J.Ostell 2002
+--
+--*********************************************************
+
+NCBI-GBSeq DEFINITIONS ::=
+BEGIN
+
+--********
+-- GBSeq represents the elements in a GenBank style report
+-- of a sequence with some small additions to structure and support
+-- for protein (GenPept) versions of GenBank format as seen in
+-- Entrez. While this represents the simplification, reduction of
+-- detail, and flattening to a single sequence perspective of GenBank
+-- format (compared with the full ASN.1 or XML from which GenBank and
+-- this format is derived at NCBI), it is presented in ASN.1 or XML for
+-- automated parsing and processing. It is hoped that this compromise
+-- will be useful for those bulk processing at the GenBank format level
+-- of detail today. Since it is a compromise, a number of pragmatic
+-- decisions have been made.
+--
+-- In pursuit of simplicity and familiarity a number of
+-- fields do not have full substructure defined here where there is
+-- already a standard GenBank format string. For example:
+--
+-- Date DD-Mon-YYYY
+-- Authors LastName, Intials (with periods)
+-- Journal JounalName Volume (issue), page-range (year)
+-- FeatureLocations as per GenBank feature table, but FeatureIntervals
+-- may also be provided as a convenience
+-- FeatureQualifiers as per GenBank feature table
+-- Primary has a string that represents a table to construct
+-- a third party (TPA) sequence.
+-- other-seqids can have strings with the "vertical bar format" sequence
+-- identifiers used in BLAST for example, when they are non-genbank types.
+-- Currently in GenBank format you only see GI, but there are others, like
+-- patents, submitter clone names, etc which will appear here, as they
+-- always have in the ASN.1 format, and full XML format.
+-- source-db is a formatted text block for peptides in GenPept format that
+-- carries information from the source protein database.
+--
+-- There are also a number of elements that could have been
+-- more exactly specified, but in the interest of simplicity
+-- have been simply left as options. For example..
+--
+-- accession and accession.version will always appear in a GenBank record
+-- they are optional because this format can also be used for non-GenBank
+-- sequences, and in that case will have only "other-seqids".
+--
+-- sequences will normally all have "sequence" filled in. But contig records
+-- will have a "join" statement in the "contig" slot, and no "sequence".
+-- We also may consider a retrieval option with no sequence of any kind
+-- and no feature table to quickly check minimal values.
+--
+-- a reference may have an author list, or be from a consortium, or both.
+--
+-- some fields, such as taxonomy, do appear as separate elements in GenBank
+-- format but without a specific linetype (in GenBank format this comes
+-- under ORGANISM). Another example is the separation of primary accession
+-- from the list of secondary accessions. In GenBank format primary
+-- accession is just the first one on the list that includes all secondaries
+-- after it.
+--
+-- create-date deserves special comment. The date you see on the right hand
+-- side of the LOCUS line in GenBank format is actually the last date the
+-- the record was modified (or the update-date). The date the record was
+-- first submitted to GenBank appears in the first submission citation in
+-- the reference section. Internally in the databases and ASN.1 NCBI keeps
+-- the first date the record was released into the sequence database at
+-- NCBI as create-date. For records from EMBL, which supports create-date,
+-- it is the date provided by EMBL. For DDBJ records, which do not supply
+-- a create-date (same as GenBank format) the create-date is the first date
+-- NCBI saw the record from DDBJ. For older GenBank records, before NCBI
+-- took responsibility for GenBank, it is just the first date NCBI saw the
+-- record. Create-date can be very useful, so we expose it here, but users
+-- must understand it is only an approximation and comes from many sources,
+-- and with many exceptions and caveats. It does NOT tell you the first
+-- date the public might have seen this record and thus is NOT an accurate
+-- measure for legal issues of precedence.
+--
+--********
+
+GBSeq ::= SEQUENCE {
+ locus VisibleString ,
+ length INTEGER ,
+ strandedness INTEGER {
+ not-set (0) ,
+ single-stranded (1) ,
+ double-stranded (2) ,
+ mixed-stranded (3) } DEFAULT not-set ,
+ moltype INTEGER {
+ nucleic-acid (0) ,
+ dna (1) ,
+ rna (2) ,
+ trna (3) ,
+ rrna (4) ,
+ mrna (5) ,
+ urna (6) ,
+ snrna (7) ,
+ snorna (8) ,
+ peptide (9) } DEFAULT nucleic-acid ,
+ topology INTEGER {
+ linear (1) ,
+ circular (2) } DEFAULT linear ,
+ division VisibleString ,
+ update-date VisibleString ,
+ create-date VisibleString ,
+ definition VisibleString ,
+ primary-accession VisibleString OPTIONAL ,
+ accession-version VisibleString OPTIONAL ,
+ other-seqids SEQUENCE OF GBSeqid OPTIONAL ,
+ secondary-accessions SEQUENCE OF GBSecondary-accn OPTIONAL,
+ keywords SEQUENCE OF GBKeyword OPTIONAL ,
+ segment VisibleString OPTIONAL ,
+ source VisibleString ,
+ organism VisibleString ,
+ taxonomy VisibleString ,
+ references SEQUENCE OF GBReference ,
+ comment VisibleString OPTIONAL ,
+ primary VisibleString OPTIONAL ,
+ source-db VisibleString OPTIONAL ,
+ feature-table SEQUENCE OF GBFeature OPTIONAL ,
+ sequence VisibleString OPTIONAL , -- Optional for other dump forms
+ contig VisibleString OPTIONAL }
+
+ GBSecondary-accn ::= VisibleString
+
+ GBSeqid ::= VisibleString
+
+ GBKeyword ::= VisibleString
+
+ GBReference ::= SEQUENCE {
+ reference VisibleString ,
+ authors SEQUENCE OF GBAuthor OPTIONAL ,
+ consortium VisibleString OPTIONAL ,
+ title VisibleString OPTIONAL ,
+ journal VisibleString ,
+ medline INTEGER OPTIONAL ,
+ pubmed INTEGER OPTIONAL ,
+ remark VisibleString OPTIONAL }
+
+ GBAuthor ::= VisibleString
+
+ GBFeature ::= SEQUENCE {
+ key VisibleString ,
+ location VisibleString ,
+ intervals SEQUENCE OF GBInterval OPTIONAL ,
+ quals SEQUENCE OF GBQualifier OPTIONAL }
+
+ GBInterval ::= SEQUENCE {
+ from INTEGER OPTIONAL ,
+ to INTEGER OPTIONAL ,
+ point INTEGER OPTIONAL ,
+ accession VisibleString }
+
+ GBQualifier ::= SEQUENCE {
+ name VisibleString ,
+ value VisibleString OPTIONAL }
+
+ GBSet ::= SEQUENCE OF GBSeq
+
+END
+
+--$Revision: 6.1 $
+--**********************************************************************
+--
+-- ASN.1 for a tiny Bioseq in XML
+-- basically a structured FASTA file with a few extras
+-- in this case we drop all modularity of components
+-- All ids are Optional - simpler structure, less checking
+-- Components of organism are hard coded - can't easily add or change
+-- sequence is just string whether DNA or protein
+-- by James Ostell, 2000
+--
+--**********************************************************************
+
+NCBI-TSeq DEFINITIONS ::=
+BEGIN
+
+TSeq ::= SEQUENCE {
+ seqtype ENUMERATED {
+ nucleotide (1),
+ protein (2) },
+ gi INTEGER OPTIONAL,
+ accver VisibleString OPTIONAL,
+ sid VisibleString OPTIONAL,
+ local VisibleString OPTIONAL,
+ taxid INTEGER OPTIONAL,
+ orgname VisibleString OPTIONAL,
+ defline VisibleString,
+ length INTEGER,
+ sequence VisibleString }
+
+TSeqSet ::= SEQUENCE OF TSeq -- a bunch of them
+
+END
+