diff options
author | Aaron M. Ucko <ucko@debian.org> | 2005-03-23 23:49:09 +0000 |
---|---|---|
committer | Aaron M. Ucko <ucko@debian.org> | 2005-03-23 23:49:09 +0000 |
commit | 5349ec8772bc373e4c2349a04e57d7952c006326 (patch) | |
tree | b733fe1df8c0c2d2418b3ce82ebcbd3d0db12a1f /asn/asn.all | |
parent | 0eff2d00595b4adcf6f1c4e6bbbcf0f416c70310 (diff) |
Load ncbi (6.1.20031028) into ncbi-tools6/branches/upstream/current.
Diffstat (limited to 'asn/asn.all')
-rw-r--r-- | asn/asn.all | 204 |
1 files changed, 202 insertions, 2 deletions
diff --git a/asn/asn.all b/asn/asn.all index aaf0f21b..a59bafef 100644 --- a/asn/asn.all +++ b/asn/asn.all @@ -738,7 +738,7 @@ Seq-loc-equiv ::= SET OF Seq-loc -- for a set of equivalent locations END ---$Revision: 6.6 $ +--$Revision: 6.7 $ --********************************************************************** -- -- NCBI Sequence elements @@ -751,7 +751,7 @@ NCBI-Sequence DEFINITIONS ::= BEGIN EXPORTS Bioseq, Seq-annot, Pubdesc, Seq-descr, Seqdesc, Numbering, Heterogen, - Seq-hist, GIBB-mol; + Seq-hist, GIBB-mol, Seq-literal; IMPORTS Date, Int-fuzz, Dbtag, Object-id, User-object FROM NCBI-General Seq-align FROM NCBI-Seqalign @@ -2876,3 +2876,203 @@ PrintFormText ::= SEQUENCE { END +--$Revision: 6.4 $ +--********************************************************* +-- +-- ASN.1 and XML for the components of a GenBank format sequence +-- J.Ostell 2002 +-- +--********************************************************* + +NCBI-GBSeq DEFINITIONS ::= +BEGIN + +--******** +-- GBSeq represents the elements in a GenBank style report +-- of a sequence with some small additions to structure and support +-- for protein (GenPept) versions of GenBank format as seen in +-- Entrez. While this represents the simplification, reduction of +-- detail, and flattening to a single sequence perspective of GenBank +-- format (compared with the full ASN.1 or XML from which GenBank and +-- this format is derived at NCBI), it is presented in ASN.1 or XML for +-- automated parsing and processing. It is hoped that this compromise +-- will be useful for those bulk processing at the GenBank format level +-- of detail today. Since it is a compromise, a number of pragmatic +-- decisions have been made. +-- +-- In pursuit of simplicity and familiarity a number of +-- fields do not have full substructure defined here where there is +-- already a standard GenBank format string. For example: +-- +-- Date DD-Mon-YYYY +-- Authors LastName, Intials (with periods) +-- Journal JounalName Volume (issue), page-range (year) +-- FeatureLocations as per GenBank feature table, but FeatureIntervals +-- may also be provided as a convenience +-- FeatureQualifiers as per GenBank feature table +-- Primary has a string that represents a table to construct +-- a third party (TPA) sequence. +-- other-seqids can have strings with the "vertical bar format" sequence +-- identifiers used in BLAST for example, when they are non-genbank types. +-- Currently in GenBank format you only see GI, but there are others, like +-- patents, submitter clone names, etc which will appear here, as they +-- always have in the ASN.1 format, and full XML format. +-- source-db is a formatted text block for peptides in GenPept format that +-- carries information from the source protein database. +-- +-- There are also a number of elements that could have been +-- more exactly specified, but in the interest of simplicity +-- have been simply left as options. For example.. +-- +-- accession and accession.version will always appear in a GenBank record +-- they are optional because this format can also be used for non-GenBank +-- sequences, and in that case will have only "other-seqids". +-- +-- sequences will normally all have "sequence" filled in. But contig records +-- will have a "join" statement in the "contig" slot, and no "sequence". +-- We also may consider a retrieval option with no sequence of any kind +-- and no feature table to quickly check minimal values. +-- +-- a reference may have an author list, or be from a consortium, or both. +-- +-- some fields, such as taxonomy, do appear as separate elements in GenBank +-- format but without a specific linetype (in GenBank format this comes +-- under ORGANISM). Another example is the separation of primary accession +-- from the list of secondary accessions. In GenBank format primary +-- accession is just the first one on the list that includes all secondaries +-- after it. +-- +-- create-date deserves special comment. The date you see on the right hand +-- side of the LOCUS line in GenBank format is actually the last date the +-- the record was modified (or the update-date). The date the record was +-- first submitted to GenBank appears in the first submission citation in +-- the reference section. Internally in the databases and ASN.1 NCBI keeps +-- the first date the record was released into the sequence database at +-- NCBI as create-date. For records from EMBL, which supports create-date, +-- it is the date provided by EMBL. For DDBJ records, which do not supply +-- a create-date (same as GenBank format) the create-date is the first date +-- NCBI saw the record from DDBJ. For older GenBank records, before NCBI +-- took responsibility for GenBank, it is just the first date NCBI saw the +-- record. Create-date can be very useful, so we expose it here, but users +-- must understand it is only an approximation and comes from many sources, +-- and with many exceptions and caveats. It does NOT tell you the first +-- date the public might have seen this record and thus is NOT an accurate +-- measure for legal issues of precedence. +-- +--******** + +GBSeq ::= SEQUENCE { + locus VisibleString , + length INTEGER , + strandedness INTEGER { + not-set (0) , + single-stranded (1) , + double-stranded (2) , + mixed-stranded (3) } DEFAULT not-set , + moltype INTEGER { + nucleic-acid (0) , + dna (1) , + rna (2) , + trna (3) , + rrna (4) , + mrna (5) , + urna (6) , + snrna (7) , + snorna (8) , + peptide (9) } DEFAULT nucleic-acid , + topology INTEGER { + linear (1) , + circular (2) } DEFAULT linear , + division VisibleString , + update-date VisibleString , + create-date VisibleString , + definition VisibleString , + primary-accession VisibleString OPTIONAL , + accession-version VisibleString OPTIONAL , + other-seqids SEQUENCE OF GBSeqid OPTIONAL , + secondary-accessions SEQUENCE OF GBSecondary-accn OPTIONAL, + keywords SEQUENCE OF GBKeyword OPTIONAL , + segment VisibleString OPTIONAL , + source VisibleString , + organism VisibleString , + taxonomy VisibleString , + references SEQUENCE OF GBReference , + comment VisibleString OPTIONAL , + primary VisibleString OPTIONAL , + source-db VisibleString OPTIONAL , + feature-table SEQUENCE OF GBFeature OPTIONAL , + sequence VisibleString OPTIONAL , -- Optional for other dump forms + contig VisibleString OPTIONAL } + + GBSecondary-accn ::= VisibleString + + GBSeqid ::= VisibleString + + GBKeyword ::= VisibleString + + GBReference ::= SEQUENCE { + reference VisibleString , + authors SEQUENCE OF GBAuthor OPTIONAL , + consortium VisibleString OPTIONAL , + title VisibleString OPTIONAL , + journal VisibleString , + medline INTEGER OPTIONAL , + pubmed INTEGER OPTIONAL , + remark VisibleString OPTIONAL } + + GBAuthor ::= VisibleString + + GBFeature ::= SEQUENCE { + key VisibleString , + location VisibleString , + intervals SEQUENCE OF GBInterval OPTIONAL , + quals SEQUENCE OF GBQualifier OPTIONAL } + + GBInterval ::= SEQUENCE { + from INTEGER OPTIONAL , + to INTEGER OPTIONAL , + point INTEGER OPTIONAL , + accession VisibleString } + + GBQualifier ::= SEQUENCE { + name VisibleString , + value VisibleString OPTIONAL } + + GBSet ::= SEQUENCE OF GBSeq + +END + +--$Revision: 6.1 $ +--********************************************************************** +-- +-- ASN.1 for a tiny Bioseq in XML +-- basically a structured FASTA file with a few extras +-- in this case we drop all modularity of components +-- All ids are Optional - simpler structure, less checking +-- Components of organism are hard coded - can't easily add or change +-- sequence is just string whether DNA or protein +-- by James Ostell, 2000 +-- +--********************************************************************** + +NCBI-TSeq DEFINITIONS ::= +BEGIN + +TSeq ::= SEQUENCE { + seqtype ENUMERATED { + nucleotide (1), + protein (2) }, + gi INTEGER OPTIONAL, + accver VisibleString OPTIONAL, + sid VisibleString OPTIONAL, + local VisibleString OPTIONAL, + taxid INTEGER OPTIONAL, + orgname VisibleString OPTIONAL, + defline VisibleString, + length INTEGER, + sequence VisibleString } + +TSeqSet ::= SEQUENCE OF TSeq -- a bunch of them + +END + |