diff options
-rw-r--r-- | LICENSE.md | 2 | ||||
-rw-r--r-- | debian/changelog | 8 | ||||
-rw-r--r-- | debian/control | 4 | ||||
-rw-r--r-- | debian/copyright | 4 | ||||
-rwxr-xr-x | debian/rules | 3 | ||||
-rw-r--r-- | dune-project | 2 | ||||
-rw-r--r-- | sexplib0.opam | 4 | ||||
-rw-r--r-- | src/dune (renamed from dune) | 0 | ||||
-rw-r--r-- | src/lazy_group_id.ml | 21 | ||||
-rw-r--r-- | src/lazy_group_id.mli | 9 | ||||
-rw-r--r-- | src/raw_grammar.ml | 217 | ||||
-rw-r--r-- | src/sexp.ml (renamed from sexp.ml) | 40 | ||||
-rw-r--r-- | src/sexp.mli (renamed from sexp.mli) | 34 | ||||
-rw-r--r-- | src/sexp_conv.ml (renamed from sexp_conv.ml) | 0 | ||||
-rw-r--r-- | src/sexp_conv.mli (renamed from sexp_conv.mli) | 0 | ||||
-rw-r--r-- | src/sexp_conv_error.ml (renamed from sexp_conv_error.ml) | 9 | ||||
-rw-r--r-- | src/sexpable.ml (renamed from sexpable.ml) | 0 | ||||
-rw-r--r-- | src/sexplib0.ml | 9 |
18 files changed, 351 insertions, 15 deletions
@@ -1,6 +1,6 @@ The MIT License -Copyright (c) 2005--2019 Jane Street Group, LLC <opensource@janestreet.com> +Copyright (c) 2005--2020 Jane Street Group, LLC <opensource@janestreet.com> Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal diff --git a/debian/changelog b/debian/changelog index ab154f1..4e90b82 100644 --- a/debian/changelog +++ b/debian/changelog @@ -1,3 +1,11 @@ +ocaml-sexplib0 (0.14.0-1) unstable; urgency=medium + + * New upstream release + * Bump debhelper compat level to 13 + * Bump Standards-Version to 4.5.0 + + -- Stéphane Glondu <glondu@debian.org> Fri, 24 Jul 2020 09:17:56 +0200 + ocaml-sexplib0 (0.13.0-1) unstable; urgency=medium * New upstream release diff --git a/debian/control b/debian/control index 20c1628..2bf1bd2 100644 --- a/debian/control +++ b/debian/control @@ -4,11 +4,11 @@ Maintainer: Debian OCaml Maintainers <debian-ocaml-maint@lists.debian.org> Uploaders: Stéphane Glondu <glondu@debian.org> Build-Depends: - debhelper-compat (= 12), + debhelper-compat (= 13), ocaml-nox, ocaml-dune, dh-ocaml -Standards-Version: 4.4.1 +Standards-Version: 4.5.0 Rules-Requires-Root: no Section: ocaml Homepage: https://github.com/janestreet/sexplib0 diff --git a/debian/copyright b/debian/copyright index 5af0689..9860800 100644 --- a/debian/copyright +++ b/debian/copyright @@ -1,11 +1,11 @@ Format: https://www.debian.org/doc/packaging-manuals/copyright-format/1.0/ Files: * -Copyright: © 2005-2019 Jane Street Group, LLC +Copyright: © 2005-2020 Jane Street Group, LLC License: MIT Files: debian/* -Copyright: © 2019 Stéphane Glondu +Copyright: © 2019-2020 Stéphane Glondu License: MIT License: MIT diff --git a/debian/rules b/debian/rules index 51ac126..267c0f7 100755 --- a/debian/rules +++ b/debian/rules @@ -15,5 +15,4 @@ override_dh_auto_install: dune install --destdir=$(DESTDIR) --prefix=/usr --libdir=..$(OCAML_STDLIB_DIR) rm -f $(DESTDIR)/usr/doc/sexplib0/LICENSE.md -override_dh_missing: - dh_missing --fail-missing +override_dh_dwz: diff --git a/dune-project b/dune-project index 598db56..eb10bcb 100644 --- a/dune-project +++ b/dune-project @@ -1 +1 @@ -(lang dune 1.5)
\ No newline at end of file +(lang dune 1.10)
\ No newline at end of file diff --git a/sexplib0.opam b/sexplib0.opam index ac23449..80d083b 100644 --- a/sexplib0.opam +++ b/sexplib0.opam @@ -1,5 +1,5 @@ opam-version: "2.0" -version: "v0.13.0" +version: "v0.14.0" maintainer: "opensource@janestreet.com" authors: ["Jane Street Group, LLC <opensource@janestreet.com>"] homepage: "https://github.com/janestreet/sexplib0" @@ -12,7 +12,7 @@ build: [ ] depends: [ "ocaml" {>= "4.04.2"} - "dune" {>= "1.5.1"} + "dune" {>= "2.0.0"} ] synopsis: "Library containing the definition of S-expressions and some base converters" description: " diff --git a/src/lazy_group_id.ml b/src/lazy_group_id.ml new file mode 100644 index 0000000..8481668 --- /dev/null +++ b/src/lazy_group_id.ml @@ -0,0 +1,21 @@ +(** Why allocate a ref instead of storing the int directly? + + We generate many more sexp grammars than actually get used, so we prefer to defer the + id until we need it. The compiler can optimize away allocations that nobody touches. +*) + +type t = int Lazy.t + +let create = + let next = ref 0 in + fun () -> lazy ( + (* As long as we don't give up the global Ocaml runtime lock by allocating, we can + treat the read and write as atomic. See "20.12.2 Parallel execution of long-running + C code" in the 4.09 manual. *) + let id = !next in + next := id + 1; + id) + +let force (t : t) = Lazy.force t + +let compare a b = compare (force a) (force b) diff --git a/src/lazy_group_id.mli b/src/lazy_group_id.mli new file mode 100644 index 0000000..c02568c --- /dev/null +++ b/src/lazy_group_id.mli @@ -0,0 +1,9 @@ +(** [Lazy_group_id] is a cheap way to allocate unique integer identifiers for sexp + grammars. See [sexp_intf.ml] for details. *) + +type t + +val compare : t -> t -> int + +val create : unit -> t +val force : t -> int diff --git a/src/raw_grammar.ml b/src/raw_grammar.ml new file mode 100644 index 0000000..e6e4396 --- /dev/null +++ b/src/raw_grammar.ml @@ -0,0 +1,217 @@ +(** Representation of S-expression grammars *) + +(** This module defines the representation of S-expression grammars produced by + [@@deriving sexp_grammar]. It introduces an AST to represent these grammars and a + notion of "group" to represent the grammars of a mutually recursive set of OCaml + type declaration. + + The grammar for a given type expression can be constructed via: {[ + + [%sexp_grammar: <type>] + + ]} + + {3 Goals and non-goals} + + Functionality goals: With post-processing, sexp grammars can be pretty-printed in a + human-readable format and provides enough information to implement completion and + validation tools. + + Performance goals: [@@deriving sexp_grammar] adds minimal overhead and introduces no + toplevel side effect. The compiler can lift the vast majority of ASTs generated by + [@@deriving sexp_grammar] as global constants. Common sub-grammars are usually shared, + particularly when they derive from multiple applications of the same functor. + + Non-goals: Stability, although we will make changes backwards-compatible or at least + provide a reasonable upgrade path. + + In what follows, we describe how this is achieved. + + {3 Encoding of generated grammars to maximize sharing} + + A [group] contains the grammars for all types of a mutually recursive group of OCaml + type declarations. + + To ensure maximum sharing, a group is split into two parts: + + - The [generic_group] depends only on the textual type declarations. Where the type + declaration refers to an existing concrete type, the generic group takes a variable + to represent the grammar of that type. This means that the compiler can lift each + type declaration in the source code to a shared global constant. + + - The [group] binds the type variables of the [generic_group], either to concrete + grammars where the type declaration refers to a concrete type, or to another + variable where the type declaration itself was polymorphic. + + To understand this point better, imagine the following type declaration {[ + + type t = X of u + + ]} were explicitly split into its [generic_group] and [group] parts: {[ + + type 'u t_generic = X of 'u + type t = u t_generic + + ]} + + If [u] came from a functor argument, it's easy to see that [t_generic] would be + exactly the same in all applications of the functor and only [t] would vary. The + grammar of [t_generic], which is the biggest part, would be shared between all + applications of the functor. + + {3 Processing of grammars} + + The [Raw_grammar.t] type optimizes for performance over ease of use. To help users + process the raw grammars into a more usable form, we keep two identifiers in the + generated grammars: + + - The [generic_group_id] uniquely identifies a [generic_group]. It is a hash of the + generic group itself. (It is okay that this scheme would conflate identical type + declarations, because the resulting generic groups would be identical as well.) + + - The [group_id] uniquely identifies a [group]. It is a unique integer, generated + lazily so that we don't create a side effect at module creation time. + + The exact processing would depend on the final application. We expect that a typical + consumer of sexp grammars would define less-indirected equivalents of the [t] and + [group] types, possibly re-using the [_ type_] and [Atom.t] types. +*) + +(** The label of a field, constructor, or constant. *) +type label = string + +type generic_group_id = string +type group_id = Lazy_group_id.t + +(** Variable names. These are used to improve readability of the printed grammars. + Internally, we use numerical indices to represent variables; see [Implicit_var] + below. *) +type var_name = string + +type type_name = string + +(** A grammatical type which classifies atoms. *) +module Atom = struct + type t = + | String (** Any atom. *) + | Bool (** One of [true], [false], [True], or [False]. *) + | Char (** A single-character atom. *) + | Float (** An atom which parses as a {!float}. *) + | Int (** An atom which parses as an integer, such as {!int} or {!int64}. *) + | This of { ignore_capitalization : bool; string : string } + (** Exactly that string, possibly modulo case in the first character. *) +end + +(** A grammatical type which classifies sexps. Corresponds to a non-terminal in a + context-free grammar. *) +type 't type_ = + | Any (** Any list or atom. *) + | Apply of 't type_ * 't type_ list (** Assign types to (explicit) type variables. *) + | Atom of Atom.t (** An atom, in particular one of the given {!Atom.t}. *) + | Explicit_bind of var_name list * 't type_ + (** In [Bind ([ "a"; "b" ], Explicit_var 0)], [Explicit_var 0] is ["a"]. One must bind + all available type variables: free variables are not permitted. *) + | Explicit_var of int + (** Indices for type variables, e.g. ['a], introduced by polymorphic definitions. + + Unlike de Bruijn indices, these are always bound by the nearest ancestral + [Explicit_bind]. *) + | Grammar of 't (** Embeds other types in a grammar. *) + | Implicit_var of int + (** Indices for type constructors, e.g. [int], in scope. Unlike de Bruijn indices, these + are always bound by the [implicit_vars] of the nearest enclosing [generic_groups]. + *) + | List of 't sequence_type + (** A list of a certain form. Depending on the {!sequence_type}, this might + correspond to an OCaml tuple, list, or embedded record. *) + | Option of 't type_ + (** An optional value. Either syntax recognized by [option_of_sexp] is supported: + [(Some 42)] or [(42)] for a value and [None] or [()] for no value. *) + | Record of 't record_type + (** A list of lists, representing a record of the given {!record_type}. For + validation, [Record recty] is equivalent to [List [Fields recty]]. *) + | Recursive of type_name + (** A type in the same mutually recursive group, possibly the current one. *) + | Union of 't type_ list + (** Any sexp matching any of the given types. {!Variant} should be preferred when + possible, especially for complex types, since validation and other algorithms may + behave exponentially. + + One useful special case is [Union []], the empty type. This is occasionally + generated for things such as abstract types. *) + | Variant of 't variant_type (** A sexp which matches the given {!variant_type}. *) + +(** A grammatical type which classifies sequences of sexps. Here, a "sequence" may mean + either a list on its own or, say, the sexps following a constructor in a list + matching a {!variant_type}. + + Certain operations may greatly favor simple sequence types. For example, matching + [List [ Many type_ ]] is easy for any type [type_] (assuming [type_] itself is + easy), but [List [ Many type1; Many type2 ]] may require backtracking. Grammars + derived from OCaml types will only have "nice" sequence types. *) +and 't sequence_type = 't component list + +(** Part of a sequence of sexps. *) +and 't component = + | One of 't type_ (** Exactly one sexp of the given type. *) + | Optional of 't type_ (** One sexp of the given type, or nothing at all. *) + | Many of 't type_ (** Any number of sexps, each of the given type. *) + | Fields of 't record_type + (** A succession of lists, collectively defining a record of the given {!record_type}. + The fields may appear in any order. The number of lists is not necessarily fixed, + as some fields may be optional. In particular, if all fields are optional, there + may be zero lists. *) + +(** A tagged union of grammatical types. Grammars derived from OCaml variants will have + variant types. *) +and 't variant_type = + { ignore_capitalization : bool + (** If true, the grammar is insensitive to the case of the first letter of the label. + This matches the behavior of derived [sexp_of_t] functions. *) + ; alts : (label * 't sequence_type) list + (** An association list of labels (constructors) to sequence types. A matching sexp is + a list whose head is the label as an atom and whose tail matches the given + sequence type. As a special case, an alternative whose sequence is empty matches + an atom rather than a list (i.e., [label] rather than [(label)]). This is in + keeping with generated [t_of_sexp] functions. + + As a workaround, to match [(label)] one could use + [("label", [ Optional (Union []) ])]. *) + } + +(** A collection of field definitions specifying a record type. Consists only of an + association list from labels to fields. *) +and 't record_type = + { allow_extra_fields: bool + ; fields: (label * 't field) list + } + +(** A field in a record. *) +and 't field = + { optional : bool (** If true, the field is optional. *) + ; args : 't sequence_type + (** A sequence type which the arguments to the field must match. An empty sequence is + permissible but would not be generated for any OCaml type. *) + } + +type t = + | Ref of type_name * group + | Inline of t type_ + +and group = + { gid : group_id + ; generic_group : generic_group + ; origin : string + (** [origin] provides a human-readable hint as to where the type was defined. + + For a globally unique identifier, use [gid] instead. + + See [ppx/ppx_sexp_conv/test/expect/test_origin.ml] for examples. *) + ; apply_implicit : t list + } + +and generic_group = + { implicit_vars : var_name list + ; ggid : generic_group_id + ; types : (type_name * t type_) list + } @@ -12,6 +12,7 @@ type t = Atom of string | List of t list let sexp_of_t t = t let t_of_sexp t = t +let t_sexp_grammar = Raw_grammar.Inline Any let rec compare_list a b = match a, b with @@ -295,7 +296,44 @@ include Printing let of_float_style : [ `Underscores | `No_underscores ] ref = ref `No_underscores let of_int_style : [ `Underscores | `No_underscores ] ref = ref `No_underscores -module Private = Printing +module Private = struct + include Printing + + module Raw_grammar = struct + include Raw_grammar + + module Builtin = struct + let unit_sexp_grammar = Inline (List []) + let bool_sexp_grammar = Inline (Atom Bool) + let string_sexp_grammar = Inline (Atom String) + let bytes_sexp_grammar = string_sexp_grammar + let char_sexp_grammar = Inline (Atom Char) + let int_sexp_grammar = Inline (Atom Int) + let float_sexp_grammar = Inline (Atom Float) + let int32_sexp_grammar = Inline (Atom Int) + let int64_sexp_grammar = Inline (Atom Int) + let nativeint_sexp_grammar = Inline (Atom Int) + let ref_sexp_grammar = Inline (Explicit_bind ([ "'a" ], Explicit_var 0)) + let lazy_t_sexp_grammar = Inline (Explicit_bind ([ "'a" ], Explicit_var 0)) + let option_sexp_grammar = Inline (Explicit_bind ([ "'a" ], Option (Explicit_var 0))) + + let list_sexp_grammar = + Inline (Explicit_bind ([ "'a" ], List [ Many (Explicit_var 0) ])) + ;; + + let array_sexp_grammar = list_sexp_grammar + end + + let empty_sexp_grammar = Inline (Union []) + let opaque_sexp_grammar = empty_sexp_grammar + let fun_sexp_grammar = empty_sexp_grammar + let tuple2_sexp_grammar = + Inline + (Explicit_bind + ([ "'a"; "'b" ], List [ One (Explicit_var 0); One (Explicit_var 1) ])) + ;; + end +end let message name fields = let rec conv_fields = function @@ -6,6 +6,7 @@ type t = Atom of string | List of t list creating a circular dependency *) val t_of_sexp : t -> t val sexp_of_t : t -> t +val t_sexp_grammar : Raw_grammar.t val equal : t -> t -> bool val compare : t -> t -> int @@ -88,11 +89,41 @@ val to_string : t -> string val of_float_style : [ `Underscores | `No_underscores ] ref val of_int_style : [ `Underscores | `No_underscores ] ref - (*_ See the Jane Street Style Guide for an explanation of [Private] submodules: https://opensource.janestreet.com/standards/#private-submodules *) module Private : sig + + (*_ exported for downstream tools *) + module Raw_grammar : sig + include module type of struct + include Raw_grammar + end + + module Builtin : sig + val unit_sexp_grammar : t + val bool_sexp_grammar : t + val string_sexp_grammar : t + val bytes_sexp_grammar : t + val char_sexp_grammar : t + val int_sexp_grammar : t + val float_sexp_grammar : t + val int32_sexp_grammar : t + val int64_sexp_grammar : t + val nativeint_sexp_grammar : t + val ref_sexp_grammar : t + val lazy_t_sexp_grammar : t + val option_sexp_grammar : t + val list_sexp_grammar : t + val array_sexp_grammar : t + end + + val empty_sexp_grammar : t + val opaque_sexp_grammar : t + val fun_sexp_grammar : t + val tuple2_sexp_grammar : t + end + (*_ Exported for sexplib *) val size : t -> int * int @@ -112,4 +143,5 @@ module Private : sig val mach_maybe_esc_str : string -> string val must_escape : string -> bool val esc_str : string -> string + end diff --git a/sexp_conv.ml b/src/sexp_conv.ml index c185c02..c185c02 100644 --- a/sexp_conv.ml +++ b/src/sexp_conv.ml diff --git a/sexp_conv.mli b/src/sexp_conv.mli index 90c2625..90c2625 100644 --- a/sexp_conv.mli +++ b/src/sexp_conv.mli diff --git a/sexp_conv_error.ml b/src/sexp_conv_error.ml index 562d3a5..c41a375 100644 --- a/sexp_conv_error.ml +++ b/src/sexp_conv_error.ml @@ -39,6 +39,12 @@ let unexpected_stag loc sexp = (* Errors concerning records *) +let record_sexp_bool_with_payload loc sexp = + let msg = + loc ^ + "_of_sexp: record conversion: a [sexp.bool] field was given a payload." in + of_sexp_error msg sexp + let record_only_pairs_expected loc sexp = let msg = loc ^ @@ -117,8 +123,5 @@ let empty_list_invalid_poly_var loc sexp = of_sexp_error ( loc ^ "_of_sexp: the empty list is an invalid polymorphic variant") sexp -let silly_type loc sexp = - of_sexp_error (loc ^ "_of_sexp: trying to convert a silly type") sexp - let empty_type loc sexp = of_sexp_error (loc ^ "_of_sexp: trying to convert an empty type") sexp diff --git a/sexpable.ml b/src/sexpable.ml index 4049fa2..4049fa2 100644 --- a/sexpable.ml +++ b/src/sexpable.ml diff --git a/src/sexplib0.ml b/src/sexplib0.ml new file mode 100644 index 0000000..48c7527 --- /dev/null +++ b/src/sexplib0.ml @@ -0,0 +1,9 @@ +module Sexp = Sexp +module Sexp_conv = Sexp_conv +module Sexp_conv_error = Sexp_conv_error +module Sexpable = Sexpable + +module Private = struct + module Lazy_group_id = Lazy_group_id + module Raw_grammar = Raw_grammar +end |