summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--LICENSE.md2
-rw-r--r--debian/changelog8
-rw-r--r--debian/control4
-rw-r--r--debian/copyright4
-rwxr-xr-xdebian/rules3
-rw-r--r--dune-project2
-rw-r--r--sexplib0.opam4
-rw-r--r--src/dune (renamed from dune)0
-rw-r--r--src/lazy_group_id.ml21
-rw-r--r--src/lazy_group_id.mli9
-rw-r--r--src/raw_grammar.ml217
-rw-r--r--src/sexp.ml (renamed from sexp.ml)40
-rw-r--r--src/sexp.mli (renamed from sexp.mli)34
-rw-r--r--src/sexp_conv.ml (renamed from sexp_conv.ml)0
-rw-r--r--src/sexp_conv.mli (renamed from sexp_conv.mli)0
-rw-r--r--src/sexp_conv_error.ml (renamed from sexp_conv_error.ml)9
-rw-r--r--src/sexpable.ml (renamed from sexpable.ml)0
-rw-r--r--src/sexplib0.ml9
18 files changed, 351 insertions, 15 deletions
diff --git a/LICENSE.md b/LICENSE.md
index 6029120..ef90205 100644
--- a/LICENSE.md
+++ b/LICENSE.md
@@ -1,6 +1,6 @@
The MIT License
-Copyright (c) 2005--2019 Jane Street Group, LLC <opensource@janestreet.com>
+Copyright (c) 2005--2020 Jane Street Group, LLC <opensource@janestreet.com>
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
diff --git a/debian/changelog b/debian/changelog
index ab154f1..4e90b82 100644
--- a/debian/changelog
+++ b/debian/changelog
@@ -1,3 +1,11 @@
+ocaml-sexplib0 (0.14.0-1) unstable; urgency=medium
+
+ * New upstream release
+ * Bump debhelper compat level to 13
+ * Bump Standards-Version to 4.5.0
+
+ -- Stéphane Glondu <glondu@debian.org> Fri, 24 Jul 2020 09:17:56 +0200
+
ocaml-sexplib0 (0.13.0-1) unstable; urgency=medium
* New upstream release
diff --git a/debian/control b/debian/control
index 20c1628..2bf1bd2 100644
--- a/debian/control
+++ b/debian/control
@@ -4,11 +4,11 @@ Maintainer: Debian OCaml Maintainers <debian-ocaml-maint@lists.debian.org>
Uploaders:
Stéphane Glondu <glondu@debian.org>
Build-Depends:
- debhelper-compat (= 12),
+ debhelper-compat (= 13),
ocaml-nox,
ocaml-dune,
dh-ocaml
-Standards-Version: 4.4.1
+Standards-Version: 4.5.0
Rules-Requires-Root: no
Section: ocaml
Homepage: https://github.com/janestreet/sexplib0
diff --git a/debian/copyright b/debian/copyright
index 5af0689..9860800 100644
--- a/debian/copyright
+++ b/debian/copyright
@@ -1,11 +1,11 @@
Format: https://www.debian.org/doc/packaging-manuals/copyright-format/1.0/
Files: *
-Copyright: © 2005-2019 Jane Street Group, LLC
+Copyright: © 2005-2020 Jane Street Group, LLC
License: MIT
Files: debian/*
-Copyright: © 2019 Stéphane Glondu
+Copyright: © 2019-2020 Stéphane Glondu
License: MIT
License: MIT
diff --git a/debian/rules b/debian/rules
index 51ac126..267c0f7 100755
--- a/debian/rules
+++ b/debian/rules
@@ -15,5 +15,4 @@ override_dh_auto_install:
dune install --destdir=$(DESTDIR) --prefix=/usr --libdir=..$(OCAML_STDLIB_DIR)
rm -f $(DESTDIR)/usr/doc/sexplib0/LICENSE.md
-override_dh_missing:
- dh_missing --fail-missing
+override_dh_dwz:
diff --git a/dune-project b/dune-project
index 598db56..eb10bcb 100644
--- a/dune-project
+++ b/dune-project
@@ -1 +1 @@
-(lang dune 1.5) \ No newline at end of file
+(lang dune 1.10) \ No newline at end of file
diff --git a/sexplib0.opam b/sexplib0.opam
index ac23449..80d083b 100644
--- a/sexplib0.opam
+++ b/sexplib0.opam
@@ -1,5 +1,5 @@
opam-version: "2.0"
-version: "v0.13.0"
+version: "v0.14.0"
maintainer: "opensource@janestreet.com"
authors: ["Jane Street Group, LLC <opensource@janestreet.com>"]
homepage: "https://github.com/janestreet/sexplib0"
@@ -12,7 +12,7 @@ build: [
]
depends: [
"ocaml" {>= "4.04.2"}
- "dune" {>= "1.5.1"}
+ "dune" {>= "2.0.0"}
]
synopsis: "Library containing the definition of S-expressions and some base converters"
description: "
diff --git a/dune b/src/dune
index 4fb59af..4fb59af 100644
--- a/dune
+++ b/src/dune
diff --git a/src/lazy_group_id.ml b/src/lazy_group_id.ml
new file mode 100644
index 0000000..8481668
--- /dev/null
+++ b/src/lazy_group_id.ml
@@ -0,0 +1,21 @@
+(** Why allocate a ref instead of storing the int directly?
+
+ We generate many more sexp grammars than actually get used, so we prefer to defer the
+ id until we need it. The compiler can optimize away allocations that nobody touches.
+*)
+
+type t = int Lazy.t
+
+let create =
+ let next = ref 0 in
+ fun () -> lazy (
+ (* As long as we don't give up the global Ocaml runtime lock by allocating, we can
+ treat the read and write as atomic. See "20.12.2 Parallel execution of long-running
+ C code" in the 4.09 manual. *)
+ let id = !next in
+ next := id + 1;
+ id)
+
+let force (t : t) = Lazy.force t
+
+let compare a b = compare (force a) (force b)
diff --git a/src/lazy_group_id.mli b/src/lazy_group_id.mli
new file mode 100644
index 0000000..c02568c
--- /dev/null
+++ b/src/lazy_group_id.mli
@@ -0,0 +1,9 @@
+(** [Lazy_group_id] is a cheap way to allocate unique integer identifiers for sexp
+ grammars. See [sexp_intf.ml] for details. *)
+
+type t
+
+val compare : t -> t -> int
+
+val create : unit -> t
+val force : t -> int
diff --git a/src/raw_grammar.ml b/src/raw_grammar.ml
new file mode 100644
index 0000000..e6e4396
--- /dev/null
+++ b/src/raw_grammar.ml
@@ -0,0 +1,217 @@
+(** Representation of S-expression grammars *)
+
+(** This module defines the representation of S-expression grammars produced by
+ [@@deriving sexp_grammar]. It introduces an AST to represent these grammars and a
+ notion of "group" to represent the grammars of a mutually recursive set of OCaml
+ type declaration.
+
+ The grammar for a given type expression can be constructed via: {[
+
+ [%sexp_grammar: <type>]
+
+ ]}
+
+ {3 Goals and non-goals}
+
+ Functionality goals: With post-processing, sexp grammars can be pretty-printed in a
+ human-readable format and provides enough information to implement completion and
+ validation tools.
+
+ Performance goals: [@@deriving sexp_grammar] adds minimal overhead and introduces no
+ toplevel side effect. The compiler can lift the vast majority of ASTs generated by
+ [@@deriving sexp_grammar] as global constants. Common sub-grammars are usually shared,
+ particularly when they derive from multiple applications of the same functor.
+
+ Non-goals: Stability, although we will make changes backwards-compatible or at least
+ provide a reasonable upgrade path.
+
+ In what follows, we describe how this is achieved.
+
+ {3 Encoding of generated grammars to maximize sharing}
+
+ A [group] contains the grammars for all types of a mutually recursive group of OCaml
+ type declarations.
+
+ To ensure maximum sharing, a group is split into two parts:
+
+ - The [generic_group] depends only on the textual type declarations. Where the type
+ declaration refers to an existing concrete type, the generic group takes a variable
+ to represent the grammar of that type. This means that the compiler can lift each
+ type declaration in the source code to a shared global constant.
+
+ - The [group] binds the type variables of the [generic_group], either to concrete
+ grammars where the type declaration refers to a concrete type, or to another
+ variable where the type declaration itself was polymorphic.
+
+ To understand this point better, imagine the following type declaration {[
+
+ type t = X of u
+
+ ]} were explicitly split into its [generic_group] and [group] parts: {[
+
+ type 'u t_generic = X of 'u
+ type t = u t_generic
+
+ ]}
+
+ If [u] came from a functor argument, it's easy to see that [t_generic] would be
+ exactly the same in all applications of the functor and only [t] would vary. The
+ grammar of [t_generic], which is the biggest part, would be shared between all
+ applications of the functor.
+
+ {3 Processing of grammars}
+
+ The [Raw_grammar.t] type optimizes for performance over ease of use. To help users
+ process the raw grammars into a more usable form, we keep two identifiers in the
+ generated grammars:
+
+ - The [generic_group_id] uniquely identifies a [generic_group]. It is a hash of the
+ generic group itself. (It is okay that this scheme would conflate identical type
+ declarations, because the resulting generic groups would be identical as well.)
+
+ - The [group_id] uniquely identifies a [group]. It is a unique integer, generated
+ lazily so that we don't create a side effect at module creation time.
+
+ The exact processing would depend on the final application. We expect that a typical
+ consumer of sexp grammars would define less-indirected equivalents of the [t] and
+ [group] types, possibly re-using the [_ type_] and [Atom.t] types.
+*)
+
+(** The label of a field, constructor, or constant. *)
+type label = string
+
+type generic_group_id = string
+type group_id = Lazy_group_id.t
+
+(** Variable names. These are used to improve readability of the printed grammars.
+ Internally, we use numerical indices to represent variables; see [Implicit_var]
+ below. *)
+type var_name = string
+
+type type_name = string
+
+(** A grammatical type which classifies atoms. *)
+module Atom = struct
+ type t =
+ | String (** Any atom. *)
+ | Bool (** One of [true], [false], [True], or [False]. *)
+ | Char (** A single-character atom. *)
+ | Float (** An atom which parses as a {!float}. *)
+ | Int (** An atom which parses as an integer, such as {!int} or {!int64}. *)
+ | This of { ignore_capitalization : bool; string : string }
+ (** Exactly that string, possibly modulo case in the first character. *)
+end
+
+(** A grammatical type which classifies sexps. Corresponds to a non-terminal in a
+ context-free grammar. *)
+type 't type_ =
+ | Any (** Any list or atom. *)
+ | Apply of 't type_ * 't type_ list (** Assign types to (explicit) type variables. *)
+ | Atom of Atom.t (** An atom, in particular one of the given {!Atom.t}. *)
+ | Explicit_bind of var_name list * 't type_
+ (** In [Bind ([ "a"; "b" ], Explicit_var 0)], [Explicit_var 0] is ["a"]. One must bind
+ all available type variables: free variables are not permitted. *)
+ | Explicit_var of int
+ (** Indices for type variables, e.g. ['a], introduced by polymorphic definitions.
+
+ Unlike de Bruijn indices, these are always bound by the nearest ancestral
+ [Explicit_bind]. *)
+ | Grammar of 't (** Embeds other types in a grammar. *)
+ | Implicit_var of int
+ (** Indices for type constructors, e.g. [int], in scope. Unlike de Bruijn indices, these
+ are always bound by the [implicit_vars] of the nearest enclosing [generic_groups].
+ *)
+ | List of 't sequence_type
+ (** A list of a certain form. Depending on the {!sequence_type}, this might
+ correspond to an OCaml tuple, list, or embedded record. *)
+ | Option of 't type_
+ (** An optional value. Either syntax recognized by [option_of_sexp] is supported:
+ [(Some 42)] or [(42)] for a value and [None] or [()] for no value. *)
+ | Record of 't record_type
+ (** A list of lists, representing a record of the given {!record_type}. For
+ validation, [Record recty] is equivalent to [List [Fields recty]]. *)
+ | Recursive of type_name
+ (** A type in the same mutually recursive group, possibly the current one. *)
+ | Union of 't type_ list
+ (** Any sexp matching any of the given types. {!Variant} should be preferred when
+ possible, especially for complex types, since validation and other algorithms may
+ behave exponentially.
+
+ One useful special case is [Union []], the empty type. This is occasionally
+ generated for things such as abstract types. *)
+ | Variant of 't variant_type (** A sexp which matches the given {!variant_type}. *)
+
+(** A grammatical type which classifies sequences of sexps. Here, a "sequence" may mean
+ either a list on its own or, say, the sexps following a constructor in a list
+ matching a {!variant_type}.
+
+ Certain operations may greatly favor simple sequence types. For example, matching
+ [List [ Many type_ ]] is easy for any type [type_] (assuming [type_] itself is
+ easy), but [List [ Many type1; Many type2 ]] may require backtracking. Grammars
+ derived from OCaml types will only have "nice" sequence types. *)
+and 't sequence_type = 't component list
+
+(** Part of a sequence of sexps. *)
+and 't component =
+ | One of 't type_ (** Exactly one sexp of the given type. *)
+ | Optional of 't type_ (** One sexp of the given type, or nothing at all. *)
+ | Many of 't type_ (** Any number of sexps, each of the given type. *)
+ | Fields of 't record_type
+ (** A succession of lists, collectively defining a record of the given {!record_type}.
+ The fields may appear in any order. The number of lists is not necessarily fixed,
+ as some fields may be optional. In particular, if all fields are optional, there
+ may be zero lists. *)
+
+(** A tagged union of grammatical types. Grammars derived from OCaml variants will have
+ variant types. *)
+and 't variant_type =
+ { ignore_capitalization : bool
+ (** If true, the grammar is insensitive to the case of the first letter of the label.
+ This matches the behavior of derived [sexp_of_t] functions. *)
+ ; alts : (label * 't sequence_type) list
+ (** An association list of labels (constructors) to sequence types. A matching sexp is
+ a list whose head is the label as an atom and whose tail matches the given
+ sequence type. As a special case, an alternative whose sequence is empty matches
+ an atom rather than a list (i.e., [label] rather than [(label)]). This is in
+ keeping with generated [t_of_sexp] functions.
+
+ As a workaround, to match [(label)] one could use
+ [("label", [ Optional (Union []) ])]. *)
+ }
+
+(** A collection of field definitions specifying a record type. Consists only of an
+ association list from labels to fields. *)
+and 't record_type =
+ { allow_extra_fields: bool
+ ; fields: (label * 't field) list
+ }
+
+(** A field in a record. *)
+and 't field =
+ { optional : bool (** If true, the field is optional. *)
+ ; args : 't sequence_type
+ (** A sequence type which the arguments to the field must match. An empty sequence is
+ permissible but would not be generated for any OCaml type. *)
+ }
+
+type t =
+ | Ref of type_name * group
+ | Inline of t type_
+
+and group =
+ { gid : group_id
+ ; generic_group : generic_group
+ ; origin : string
+ (** [origin] provides a human-readable hint as to where the type was defined.
+
+ For a globally unique identifier, use [gid] instead.
+
+ See [ppx/ppx_sexp_conv/test/expect/test_origin.ml] for examples. *)
+ ; apply_implicit : t list
+ }
+
+and generic_group =
+ { implicit_vars : var_name list
+ ; ggid : generic_group_id
+ ; types : (type_name * t type_) list
+ }
diff --git a/sexp.ml b/src/sexp.ml
index 5e35024..c23ca2d 100644
--- a/sexp.ml
+++ b/src/sexp.ml
@@ -12,6 +12,7 @@ type t = Atom of string | List of t list
let sexp_of_t t = t
let t_of_sexp t = t
+let t_sexp_grammar = Raw_grammar.Inline Any
let rec compare_list a b =
match a, b with
@@ -295,7 +296,44 @@ include Printing
let of_float_style : [ `Underscores | `No_underscores ] ref = ref `No_underscores
let of_int_style : [ `Underscores | `No_underscores ] ref = ref `No_underscores
-module Private = Printing
+module Private = struct
+ include Printing
+
+ module Raw_grammar = struct
+ include Raw_grammar
+
+ module Builtin = struct
+ let unit_sexp_grammar = Inline (List [])
+ let bool_sexp_grammar = Inline (Atom Bool)
+ let string_sexp_grammar = Inline (Atom String)
+ let bytes_sexp_grammar = string_sexp_grammar
+ let char_sexp_grammar = Inline (Atom Char)
+ let int_sexp_grammar = Inline (Atom Int)
+ let float_sexp_grammar = Inline (Atom Float)
+ let int32_sexp_grammar = Inline (Atom Int)
+ let int64_sexp_grammar = Inline (Atom Int)
+ let nativeint_sexp_grammar = Inline (Atom Int)
+ let ref_sexp_grammar = Inline (Explicit_bind ([ "'a" ], Explicit_var 0))
+ let lazy_t_sexp_grammar = Inline (Explicit_bind ([ "'a" ], Explicit_var 0))
+ let option_sexp_grammar = Inline (Explicit_bind ([ "'a" ], Option (Explicit_var 0)))
+
+ let list_sexp_grammar =
+ Inline (Explicit_bind ([ "'a" ], List [ Many (Explicit_var 0) ]))
+ ;;
+
+ let array_sexp_grammar = list_sexp_grammar
+ end
+
+ let empty_sexp_grammar = Inline (Union [])
+ let opaque_sexp_grammar = empty_sexp_grammar
+ let fun_sexp_grammar = empty_sexp_grammar
+ let tuple2_sexp_grammar =
+ Inline
+ (Explicit_bind
+ ([ "'a"; "'b" ], List [ One (Explicit_var 0); One (Explicit_var 1) ]))
+ ;;
+ end
+end
let message name fields =
let rec conv_fields = function
diff --git a/sexp.mli b/src/sexp.mli
index bb00698..4d3f1cf 100644
--- a/sexp.mli
+++ b/src/sexp.mli
@@ -6,6 +6,7 @@ type t = Atom of string | List of t list
creating a circular dependency *)
val t_of_sexp : t -> t
val sexp_of_t : t -> t
+val t_sexp_grammar : Raw_grammar.t
val equal : t -> t -> bool
val compare : t -> t -> int
@@ -88,11 +89,41 @@ val to_string : t -> string
val of_float_style : [ `Underscores | `No_underscores ] ref
val of_int_style : [ `Underscores | `No_underscores ] ref
-
(*_ See the Jane Street Style Guide for an explanation of [Private] submodules:
https://opensource.janestreet.com/standards/#private-submodules *)
module Private : sig
+
+ (*_ exported for downstream tools *)
+ module Raw_grammar : sig
+ include module type of struct
+ include Raw_grammar
+ end
+
+ module Builtin : sig
+ val unit_sexp_grammar : t
+ val bool_sexp_grammar : t
+ val string_sexp_grammar : t
+ val bytes_sexp_grammar : t
+ val char_sexp_grammar : t
+ val int_sexp_grammar : t
+ val float_sexp_grammar : t
+ val int32_sexp_grammar : t
+ val int64_sexp_grammar : t
+ val nativeint_sexp_grammar : t
+ val ref_sexp_grammar : t
+ val lazy_t_sexp_grammar : t
+ val option_sexp_grammar : t
+ val list_sexp_grammar : t
+ val array_sexp_grammar : t
+ end
+
+ val empty_sexp_grammar : t
+ val opaque_sexp_grammar : t
+ val fun_sexp_grammar : t
+ val tuple2_sexp_grammar : t
+ end
+
(*_ Exported for sexplib *)
val size : t -> int * int
@@ -112,4 +143,5 @@ module Private : sig
val mach_maybe_esc_str : string -> string
val must_escape : string -> bool
val esc_str : string -> string
+
end
diff --git a/sexp_conv.ml b/src/sexp_conv.ml
index c185c02..c185c02 100644
--- a/sexp_conv.ml
+++ b/src/sexp_conv.ml
diff --git a/sexp_conv.mli b/src/sexp_conv.mli
index 90c2625..90c2625 100644
--- a/sexp_conv.mli
+++ b/src/sexp_conv.mli
diff --git a/sexp_conv_error.ml b/src/sexp_conv_error.ml
index 562d3a5..c41a375 100644
--- a/sexp_conv_error.ml
+++ b/src/sexp_conv_error.ml
@@ -39,6 +39,12 @@ let unexpected_stag loc sexp =
(* Errors concerning records *)
+let record_sexp_bool_with_payload loc sexp =
+ let msg =
+ loc ^
+ "_of_sexp: record conversion: a [sexp.bool] field was given a payload." in
+ of_sexp_error msg sexp
+
let record_only_pairs_expected loc sexp =
let msg =
loc ^
@@ -117,8 +123,5 @@ let empty_list_invalid_poly_var loc sexp =
of_sexp_error (
loc ^ "_of_sexp: the empty list is an invalid polymorphic variant") sexp
-let silly_type loc sexp =
- of_sexp_error (loc ^ "_of_sexp: trying to convert a silly type") sexp
-
let empty_type loc sexp =
of_sexp_error (loc ^ "_of_sexp: trying to convert an empty type") sexp
diff --git a/sexpable.ml b/src/sexpable.ml
index 4049fa2..4049fa2 100644
--- a/sexpable.ml
+++ b/src/sexpable.ml
diff --git a/src/sexplib0.ml b/src/sexplib0.ml
new file mode 100644
index 0000000..48c7527
--- /dev/null
+++ b/src/sexplib0.ml
@@ -0,0 +1,9 @@
+module Sexp = Sexp
+module Sexp_conv = Sexp_conv
+module Sexp_conv_error = Sexp_conv_error
+module Sexpable = Sexpable
+
+module Private = struct
+ module Lazy_group_id = Lazy_group_id
+ module Raw_grammar = Raw_grammar
+end