summaryrefslogtreecommitdiff
path: root/asn
diff options
context:
space:
mode:
authorAaron M. Ucko <ucko@debian.org>2005-03-23 20:49:08 +0000
committerAaron M. Ucko <ucko@debian.org>2005-03-23 20:49:08 +0000
commitee1ab2cbbf85d439732174f321efc1114f19f749 (patch)
tree4c803451e8507be875a478b39bdd31702f0ea281 /asn
parentc36b9906c3ef791147b3643f9e485cc02568819f (diff)
Load ncbi (6.1.20020828) into ncbi-tools6/branches/upstream/current.
Diffstat (limited to 'asn')
-rw-r--r--asn/asn.all25
-rw-r--r--asn/asnpub.all5
-rw-r--r--asn/gbseq.asn166
-rw-r--r--asn/general.asn5
-rw-r--r--asn/makestat.bat2
-rwxr-xr-xasn/makestat.unx2
-rw-r--r--asn/seq.asn12
-rw-r--r--asn/seqfeat.asn8
-rw-r--r--asn/tinyseq.asn34
9 files changed, 245 insertions, 14 deletions
diff --git a/asn/asn.all b/asn/asn.all
index 1b3b2e5d..f1c2d87b 100644
--- a/asn/asn.all
+++ b/asn/asn.all
@@ -8,7 +8,7 @@
--
--**********************************************************************
---$Revision: 6.3 $
+--$Revision: 6.4 $
--**********************************************************************
--
-- NCBI General Data elements
@@ -97,7 +97,8 @@ Person-id ::= CHOICE {
name Name-std , -- structured name
ml VisibleString , -- MEDLINE name (semi-structured)
-- eg. "Jones RM"
- str VisibleString } -- unstructured name
+ str VisibleString, -- unstructured name
+ consortium VisibleString } -- consortium name
Name-std ::= SEQUENCE { -- Structured names
last VisibleString ,
@@ -737,7 +738,7 @@ Seq-loc-equiv ::= SET OF Seq-loc -- for a set of equivalent locations
END
---$Revision: 6.5 $
+--$Revision: 6.6 $
--**********************************************************************
--
-- NCBI Sequence elements
@@ -866,13 +867,21 @@ MolInfo ::= SEQUENCE {
other (255) } -- use Source.techexp
DEFAULT unknown ,
techexp VisibleString OPTIONAL , -- explanation if tech not enough
+ --
+ -- Completeness is not indicated in most records. For genomes, assume
+ -- the sequences are incomplete unless specifically marked as complete.
+ -- For mRNAs, assume the ends are not known exactly unless marked as
+ -- having the left or right end.
+ --
completeness INTEGER {
unknown (0) ,
complete (1) , -- complete biological entity
partial (2) , -- partial but no details given
- no-left (3), -- missing 5' or NH3 end
+ no-left (3) , -- missing 5' or NH3 end
no-right (4) , -- missing 3' or COOH end
no-ends (5) , -- missing both ends
+ has-left (6) , -- 5' or NH3 end present
+ has-right (7) , -- 3' or COOH end present
other (255) } DEFAULT unknown }
@@ -1501,7 +1510,7 @@ PDB-replace ::= SEQUENCE {
END
---$Revision: 6.7 $
+--$Revision: 6.9 $
--**********************************************************************
--
-- NCBI Sequence Feature elements
@@ -1781,7 +1790,8 @@ Gene-ref ::= SEQUENCE {
maploc VisibleString OPTIONAL , -- descriptive map location
pseudo BOOLEAN DEFAULT FALSE , -- pseudogene
db SET OF Dbtag OPTIONAL , -- ids in other dbases
- syn SET OF VisibleString OPTIONAL } -- synonyms for locus
+ syn SET OF VisibleString OPTIONAL , -- synonyms for locus
+ locus-tag VisibleString OPTIONAL } -- systematic gene name (e.g., MI0001, ORF0069)
END
@@ -1864,6 +1874,9 @@ OrgMod ::= SEQUENCE {
anamorph (29) ,
teleomorph (30) ,
breed (31) ,
+ gb-acronym (32) , -- used by taxonomy database
+ gb-anamorph (33) , -- used by taxonomy database
+ gb-synonym (34) , -- used by taxonomy database
old-lineage (253) ,
old-name (254) ,
other (255) } , -- ASN5: old-name (254) will be added to next spec
diff --git a/asn/asnpub.all b/asn/asnpub.all
index 94f27044..7cd8735f 100644
--- a/asn/asnpub.all
+++ b/asn/asnpub.all
@@ -1,4 +1,4 @@
---$Revision: 6.3 $
+--$Revision: 6.4 $
--**********************************************************************
--
-- NCBI General Data elements
@@ -87,7 +87,8 @@ Person-id ::= CHOICE {
name Name-std , -- structured name
ml VisibleString , -- MEDLINE name (semi-structured)
-- eg. "Jones RM"
- str VisibleString } -- unstructured name
+ str VisibleString, -- unstructured name
+ consortium VisibleString } -- consortium name
Name-std ::= SEQUENCE { -- Structured names
last VisibleString ,
diff --git a/asn/gbseq.asn b/asn/gbseq.asn
new file mode 100644
index 00000000..be39c295
--- /dev/null
+++ b/asn/gbseq.asn
@@ -0,0 +1,166 @@
+--$Revision: 6.3 $
+--*********************************************************
+--
+-- ASN.1 and XML for the components of a GenBank format sequence
+-- J.Ostell 2002
+--
+--*********************************************************
+
+NCBI-GBSeq DEFINITIONS ::=
+BEGIN
+
+--********
+-- GBSeq represents the elements in a GenBank style report
+-- of a sequence with some small additions to structure and support
+-- for protein (GenPept) versions of GenBank format as seen in
+-- Entrez. While this represents the simplification, reduction of
+-- detail, and flattening to a single sequence perspective of GenBank
+-- format (compared with the full ASN.1 or XML from which GenBank and
+-- this format is derived at NCBI), it is presented in ASN.1 or XML for
+-- automated parsing and processing. It is hoped that this compromise
+-- will be useful for those bulk processing at the GenBank format level
+-- of detail today. Since it is a compromise, a number of pragmatic
+-- decisions have been made.
+--
+-- In pursuit of simplicity and familiarity a number of
+-- fields do not have full substructure defined here where there is
+-- already a standard GenBank format string. For example:
+--
+-- Date DD-Mon-YYYY
+-- Authors LastName, Intials (with periods)
+-- Journal JounalName Volume (issue), page-range (year)
+-- FeatureLocations as per GenBank feature table, but FeatureIntervals
+-- may also be provided as a convenience
+-- FeatureQualifiers as per GenBank feature table
+-- Primary has a string that represents a table to construct
+-- a third party (TPA) sequence.
+-- other-seqids can have strings with the "vertical bar format" sequence
+-- identifiers used in BLAST for example, when they are non-genbank types.
+-- Currently in GenBank format you only see GI, but there are others, like
+-- patents, submitter clone names, etc which will appear here, as they
+-- always have in the ASN.1 format, and full XML format.
+-- source-db is a formatted text block for peptides in GenPept format that
+-- carries information from the source protein database.
+--
+-- There are also a number of elements that could have been
+-- more exactly specified, but in the interest of simplicity
+-- have been simply left as options. For example..
+--
+-- accession and accession.version will always appear in a GenBank record
+-- they are optional because this format can also be used for non-GenBank
+-- sequences, and in that case will have only "other-seqids".
+--
+-- sequences will normally all have "sequence" filled in. But contig records
+-- will have a "join" statement in the "contig" slot, and no "sequence".
+-- We also may consider a retrieval option with no sequence of any kind
+-- and no feature table to quickly check minimal values.
+--
+-- a reference may have an author list, or be from a consortium, or both.
+--
+-- some fields, such as taxonomy, do appear as separate elements in GenBank
+-- format but without a specific linetype (in GenBank format this comes
+-- under ORGANISM). Another example is the separation of primary accession
+-- from the list of secondary accessions. In GenBank format primary
+-- accession is just the first one on the list that includes all secondaries
+-- after it.
+--
+-- create-date deserves special comment. The date you see on the right hand
+-- side of the LOCUS line in GenBank format is actually the last date the
+-- the record was modified (or the update-date). The date the record was
+-- first submitted to GenBank appears in the first submission citation in
+-- the reference section. Internally in the databases and ASN.1 NCBI keeps
+-- the first date the record was released into the sequence database at
+-- NCBI as create-date. For records from EMBL, which supports create-date,
+-- it is the date provided by EMBL. For DDBJ records, which do not supply
+-- a create-date (same as GenBank format) the create-date is the first date
+-- NCBI saw the record from DDBJ. For older GenBank records, before NCBI
+-- took responsibility for GenBank, it is just the first date NCBI saw the
+-- record. Create-date can be very useful, so we expose it here, but users
+-- must understand it is only an approximation and comes from many sources,
+-- and with many exceptions and caveats. It does NOT tell you the first
+-- date the public might have seen this record and thus is NOT an accurate
+-- measure for legal issues of precedence.
+--
+--********
+
+GBSeq ::= SEQUENCE {
+ locus VisibleString ,
+ length INTEGER ,
+ strandedness INTEGER {
+ not-set (0) ,
+ single-stranded (1) ,
+ double-stranded (2) ,
+ mixed-stranded (3) } DEFAULT not-set ,
+ moltype INTEGER {
+ nucleic-acid (0) ,
+ dna (1) ,
+ rna (2) ,
+ trna (3) ,
+ rrna (4) ,
+ mrna (5) ,
+ urna (6) ,
+ snrna (7) ,
+ snorna (8) ,
+ peptide (9) } DEFAULT nucleic-acid ,
+ topology INTEGER {
+ linear (1) ,
+ circular (2) } DEFAULT linear ,
+ division VisibleString ,
+ update-date VisibleString ,
+ create-date VisibleString ,
+ definition VisibleString ,
+ primary-accession VisibleString OPTIONAL ,
+ accession-version VisibleString OPTIONAL ,
+ other-seqids SEQUENCE OF Seqid OPTIONAL ,
+ secondary-accessions SEQUENCE OF Secondary-accession OPTIONAL,
+ keywords SEQUENCE OF Keyword OPTIONAL ,
+ segment VisibleString OPTIONAL ,
+ source VisibleString ,
+ organism VisibleString ,
+ taxonomy VisibleString ,
+ references SEQUENCE OF GBReference ,
+ comment VisibleString OPTIONAL ,
+ primary VisibleString OPTIONAL ,
+ source-db VisibleString OPTIONAL ,
+ feature-table SEQUENCE OF GBFeature OPTIONAL ,
+ sequence VisibleString OPTIONAL , -- Optional for other dump forms
+ contig VisibleString OPTIONAL }
+
+ Secondary-accession ::= VisibleString
+
+ Seqid ::= VisibleString
+
+ Keyword ::= VisibleString
+
+ GBReference ::= SEQUENCE {
+ reference VisibleString ,
+ authors SEQUENCE OF Author OPTIONAL ,
+ consortium VisibleString OPTIONAL ,
+ title VisibleString OPTIONAL ,
+ journal VisibleString ,
+ medline INTEGER OPTIONAL ,
+ pubmed INTEGER OPTIONAL ,
+ remark VisibleString OPTIONAL }
+
+ Author ::= VisibleString
+
+ GBFeature ::= SEQUENCE {
+ key VisibleString ,
+ location VisibleString ,
+ intervals SEQUENCE OF GBInterval OPTIONAL ,
+ quals SEQUENCE OF GBQualifier OPTIONAL }
+
+ GBInterval ::= SEQUENCE {
+ from INTEGER OPTIONAL ,
+ to INTEGER OPTIONAL ,
+ point INTEGER OPTIONAL ,
+ accession VisibleString }
+
+ GBQualifier ::= SEQUENCE {
+ name VisibleString ,
+ value VisibleString OPTIONAL }
+
+ GBSet ::= SEQUENCE OF GBSeq
+
+END
+
diff --git a/asn/general.asn b/asn/general.asn
index 4fbd99b5..654fff8d 100644
--- a/asn/general.asn
+++ b/asn/general.asn
@@ -1,4 +1,4 @@
---$Revision: 6.3 $
+--$Revision: 6.4 $
--**********************************************************************
--
-- NCBI General Data elements
@@ -87,7 +87,8 @@ Person-id ::= CHOICE {
name Name-std , -- structured name
ml VisibleString , -- MEDLINE name (semi-structured)
-- eg. "Jones RM"
- str VisibleString } -- unstructured name
+ str VisibleString, -- unstructured name
+ consortium VisibleString } -- consortium name
Name-std ::= SEQUENCE { -- Structured names
last VisibleString ,
diff --git a/asn/makestat.bat b/asn/makestat.bat
index b8f88ffc..4fbe8342 100644
--- a/asn/makestat.bat
+++ b/asn/makestat.bat
@@ -38,3 +38,5 @@ asntool -m ..\asn\medlars.asn -o asnmdrs.h
asntool -m ..\asn\proj.asn -o asnproj.h
asntool -m ..\access\entrez2.asn -o asnent2.h
asntool -m ..\access\mim.asn -o asnmim.h
+asntool -m ..\asn\gbseq.asn -o asngbseq.h
+asntool -m ..\asn\tinyseq.asn -o asntseq.h
diff --git a/asn/makestat.unx b/asn/makestat.unx
index f8fdcd5c..728499d7 100755
--- a/asn/makestat.unx
+++ b/asn/makestat.unx
@@ -38,3 +38,5 @@ asntool -m ../asn/medlars.asn -o asnmdrs.h
asntool -m ../asn/proj.asn -o asnproj.h
asntool -m ../access/entrez2.asn -o asnent2.h
asntool -m ../access/mim.asn -o asnmim.h
+asntool -m ../asn/gbseq.asn -o asngbseq.h
+asntool -m ../asn/tinyseq.asn -o asntseq.h
diff --git a/asn/seq.asn b/asn/seq.asn
index 49c55d0d..0c397967 100644
--- a/asn/seq.asn
+++ b/asn/seq.asn
@@ -1,4 +1,4 @@
---$Revision: 6.5 $
+--$Revision: 6.6 $
--**********************************************************************
--
-- NCBI Sequence elements
@@ -127,13 +127,21 @@ MolInfo ::= SEQUENCE {
other (255) } -- use Source.techexp
DEFAULT unknown ,
techexp VisibleString OPTIONAL , -- explanation if tech not enough
+ --
+ -- Completeness is not indicated in most records. For genomes, assume
+ -- the sequences are incomplete unless specifically marked as complete.
+ -- For mRNAs, assume the ends are not known exactly unless marked as
+ -- having the left or right end.
+ --
completeness INTEGER {
unknown (0) ,
complete (1) , -- complete biological entity
partial (2) , -- partial but no details given
- no-left (3), -- missing 5' or NH3 end
+ no-left (3) , -- missing 5' or NH3 end
no-right (4) , -- missing 3' or COOH end
no-ends (5) , -- missing both ends
+ has-left (6) , -- 5' or NH3 end present
+ has-right (7) , -- 3' or COOH end present
other (255) } DEFAULT unknown }
diff --git a/asn/seqfeat.asn b/asn/seqfeat.asn
index 8d796562..7fff56e9 100644
--- a/asn/seqfeat.asn
+++ b/asn/seqfeat.asn
@@ -1,4 +1,4 @@
---$Revision: 6.7 $
+--$Revision: 6.9 $
--**********************************************************************
--
-- NCBI Sequence Feature elements
@@ -278,7 +278,8 @@ Gene-ref ::= SEQUENCE {
maploc VisibleString OPTIONAL , -- descriptive map location
pseudo BOOLEAN DEFAULT FALSE , -- pseudogene
db SET OF Dbtag OPTIONAL , -- ids in other dbases
- syn SET OF VisibleString OPTIONAL } -- synonyms for locus
+ syn SET OF VisibleString OPTIONAL , -- synonyms for locus
+ locus-tag VisibleString OPTIONAL } -- systematic gene name (e.g., MI0001, ORF0069)
END
@@ -361,6 +362,9 @@ OrgMod ::= SEQUENCE {
anamorph (29) ,
teleomorph (30) ,
breed (31) ,
+ gb-acronym (32) , -- used by taxonomy database
+ gb-anamorph (33) , -- used by taxonomy database
+ gb-synonym (34) , -- used by taxonomy database
old-lineage (253) ,
old-name (254) ,
other (255) } , -- ASN5: old-name (254) will be added to next spec
diff --git a/asn/tinyseq.asn b/asn/tinyseq.asn
new file mode 100644
index 00000000..d16a2949
--- /dev/null
+++ b/asn/tinyseq.asn
@@ -0,0 +1,34 @@
+--$Revision: 6.1 $
+--**********************************************************************
+--
+-- ASN.1 for a tiny Bioseq in XML
+-- basically a structured FASTA file with a few extras
+-- in this case we drop all modularity of components
+-- All ids are Optional - simpler structure, less checking
+-- Components of organism are hard coded - can't easily add or change
+-- sequence is just string whether DNA or protein
+-- by James Ostell, 2000
+--
+--**********************************************************************
+
+NCBI-TSeq DEFINITIONS ::=
+BEGIN
+
+TSeq ::= SEQUENCE {
+ seqtype ENUMERATED {
+ nucleotide (1),
+ protein (2) },
+ gi INTEGER OPTIONAL,
+ accver VisibleString OPTIONAL,
+ sid VisibleString OPTIONAL,
+ local VisibleString OPTIONAL,
+ taxid INTEGER OPTIONAL,
+ orgname VisibleString OPTIONAL,
+ defline VisibleString,
+ length INTEGER,
+ sequence VisibleString }
+
+TSeqSet ::= SEQUENCE OF TSeq -- a bunch of them
+
+END
+