diff options
author | Julien Puydt <jpuydt@debian.org> | 2022-08-15 13:59:22 +0200 |
---|---|---|
committer | Julien Puydt <jpuydt@debian.org> | 2022-08-15 13:59:22 +0200 |
commit | b0292b78bed3be1301edd77d712de8f86cec714e (patch) | |
tree | 128442321e79f41f9275735b97b1bed01a8aa913 | |
parent | 3aa58303c4d5f409cd71414da9cd497e5deaf14f (diff) | |
parent | aa0b49810808485e222ade34feef98152ef378bf (diff) |
Update upstream source from tag 'upstream/0.15.1'
Update to upstream version '0.15.1'
with Debian dir 1b647c90b7ca49ed76b6b2244fce2897f5451d0e
-rw-r--r-- | LICENSE.md | 2 | ||||
-rw-r--r-- | README.md | 9 | ||||
-rw-r--r-- | sexplib0.opam | 6 | ||||
-rw-r--r-- | src/lazy_group_id.ml | 21 | ||||
-rw-r--r-- | src/lazy_group_id.mli | 9 | ||||
-rw-r--r-- | src/raw_grammar.ml | 217 | ||||
-rw-r--r-- | src/sexp.ml | 225 | ||||
-rw-r--r-- | src/sexp.mli | 56 | ||||
-rw-r--r-- | src/sexp_conv.ml | 472 | ||||
-rw-r--r-- | src/sexp_conv.mli | 162 | ||||
-rw-r--r-- | src/sexp_conv_error.ml | 77 | ||||
-rw-r--r-- | src/sexp_conv_grammar.ml | 29 | ||||
-rw-r--r-- | src/sexp_conv_grammar.mli | 20 | ||||
-rw-r--r-- | src/sexp_grammar.ml | 191 | ||||
-rw-r--r-- | src/sexpable.ml | 10 | ||||
-rw-r--r-- | src/sexplib0.ml | 6 |
16 files changed, 736 insertions, 776 deletions
@@ -1,6 +1,6 @@ The MIT License -Copyright (c) 2005--2020 Jane Street Group, LLC <opensource@janestreet.com> +Copyright (c) 2005--2022 Jane Street Group, LLC <opensource@janestreet.com> Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal diff --git a/README.md b/README.md new file mode 100644 index 0000000..6977148 --- /dev/null +++ b/README.md @@ -0,0 +1,9 @@ +"Sexplib0 - a low-dep version of Sexplib" +========================================= + +`sexplib0` is a lightweight portion of `sexplib`, for situations where a +dependency on `sexplib` is problematic. + +It has the type definition and the printing functions, but not parsing. + +See [sexplib](https://github.com/janestreet/sexplib) for documentation. diff --git a/sexplib0.opam b/sexplib0.opam index 80d083b..0bcc2a1 100644 --- a/sexplib0.opam +++ b/sexplib0.opam @@ -1,7 +1,7 @@ opam-version: "2.0" -version: "v0.14.0" -maintainer: "opensource@janestreet.com" -authors: ["Jane Street Group, LLC <opensource@janestreet.com>"] +version: "v0.15.0" +maintainer: "Jane Street developers" +authors: ["Jane Street Group, LLC"] homepage: "https://github.com/janestreet/sexplib0" bug-reports: "https://github.com/janestreet/sexplib0/issues" dev-repo: "git+https://github.com/janestreet/sexplib0.git" diff --git a/src/lazy_group_id.ml b/src/lazy_group_id.ml deleted file mode 100644 index 8481668..0000000 --- a/src/lazy_group_id.ml +++ /dev/null @@ -1,21 +0,0 @@ -(** Why allocate a ref instead of storing the int directly? - - We generate many more sexp grammars than actually get used, so we prefer to defer the - id until we need it. The compiler can optimize away allocations that nobody touches. -*) - -type t = int Lazy.t - -let create = - let next = ref 0 in - fun () -> lazy ( - (* As long as we don't give up the global Ocaml runtime lock by allocating, we can - treat the read and write as atomic. See "20.12.2 Parallel execution of long-running - C code" in the 4.09 manual. *) - let id = !next in - next := id + 1; - id) - -let force (t : t) = Lazy.force t - -let compare a b = compare (force a) (force b) diff --git a/src/lazy_group_id.mli b/src/lazy_group_id.mli deleted file mode 100644 index c02568c..0000000 --- a/src/lazy_group_id.mli +++ /dev/null @@ -1,9 +0,0 @@ -(** [Lazy_group_id] is a cheap way to allocate unique integer identifiers for sexp - grammars. See [sexp_intf.ml] for details. *) - -type t - -val compare : t -> t -> int - -val create : unit -> t -val force : t -> int diff --git a/src/raw_grammar.ml b/src/raw_grammar.ml deleted file mode 100644 index e6e4396..0000000 --- a/src/raw_grammar.ml +++ /dev/null @@ -1,217 +0,0 @@ -(** Representation of S-expression grammars *) - -(** This module defines the representation of S-expression grammars produced by - [@@deriving sexp_grammar]. It introduces an AST to represent these grammars and a - notion of "group" to represent the grammars of a mutually recursive set of OCaml - type declaration. - - The grammar for a given type expression can be constructed via: {[ - - [%sexp_grammar: <type>] - - ]} - - {3 Goals and non-goals} - - Functionality goals: With post-processing, sexp grammars can be pretty-printed in a - human-readable format and provides enough information to implement completion and - validation tools. - - Performance goals: [@@deriving sexp_grammar] adds minimal overhead and introduces no - toplevel side effect. The compiler can lift the vast majority of ASTs generated by - [@@deriving sexp_grammar] as global constants. Common sub-grammars are usually shared, - particularly when they derive from multiple applications of the same functor. - - Non-goals: Stability, although we will make changes backwards-compatible or at least - provide a reasonable upgrade path. - - In what follows, we describe how this is achieved. - - {3 Encoding of generated grammars to maximize sharing} - - A [group] contains the grammars for all types of a mutually recursive group of OCaml - type declarations. - - To ensure maximum sharing, a group is split into two parts: - - - The [generic_group] depends only on the textual type declarations. Where the type - declaration refers to an existing concrete type, the generic group takes a variable - to represent the grammar of that type. This means that the compiler can lift each - type declaration in the source code to a shared global constant. - - - The [group] binds the type variables of the [generic_group], either to concrete - grammars where the type declaration refers to a concrete type, or to another - variable where the type declaration itself was polymorphic. - - To understand this point better, imagine the following type declaration {[ - - type t = X of u - - ]} were explicitly split into its [generic_group] and [group] parts: {[ - - type 'u t_generic = X of 'u - type t = u t_generic - - ]} - - If [u] came from a functor argument, it's easy to see that [t_generic] would be - exactly the same in all applications of the functor and only [t] would vary. The - grammar of [t_generic], which is the biggest part, would be shared between all - applications of the functor. - - {3 Processing of grammars} - - The [Raw_grammar.t] type optimizes for performance over ease of use. To help users - process the raw grammars into a more usable form, we keep two identifiers in the - generated grammars: - - - The [generic_group_id] uniquely identifies a [generic_group]. It is a hash of the - generic group itself. (It is okay that this scheme would conflate identical type - declarations, because the resulting generic groups would be identical as well.) - - - The [group_id] uniquely identifies a [group]. It is a unique integer, generated - lazily so that we don't create a side effect at module creation time. - - The exact processing would depend on the final application. We expect that a typical - consumer of sexp grammars would define less-indirected equivalents of the [t] and - [group] types, possibly re-using the [_ type_] and [Atom.t] types. -*) - -(** The label of a field, constructor, or constant. *) -type label = string - -type generic_group_id = string -type group_id = Lazy_group_id.t - -(** Variable names. These are used to improve readability of the printed grammars. - Internally, we use numerical indices to represent variables; see [Implicit_var] - below. *) -type var_name = string - -type type_name = string - -(** A grammatical type which classifies atoms. *) -module Atom = struct - type t = - | String (** Any atom. *) - | Bool (** One of [true], [false], [True], or [False]. *) - | Char (** A single-character atom. *) - | Float (** An atom which parses as a {!float}. *) - | Int (** An atom which parses as an integer, such as {!int} or {!int64}. *) - | This of { ignore_capitalization : bool; string : string } - (** Exactly that string, possibly modulo case in the first character. *) -end - -(** A grammatical type which classifies sexps. Corresponds to a non-terminal in a - context-free grammar. *) -type 't type_ = - | Any (** Any list or atom. *) - | Apply of 't type_ * 't type_ list (** Assign types to (explicit) type variables. *) - | Atom of Atom.t (** An atom, in particular one of the given {!Atom.t}. *) - | Explicit_bind of var_name list * 't type_ - (** In [Bind ([ "a"; "b" ], Explicit_var 0)], [Explicit_var 0] is ["a"]. One must bind - all available type variables: free variables are not permitted. *) - | Explicit_var of int - (** Indices for type variables, e.g. ['a], introduced by polymorphic definitions. - - Unlike de Bruijn indices, these are always bound by the nearest ancestral - [Explicit_bind]. *) - | Grammar of 't (** Embeds other types in a grammar. *) - | Implicit_var of int - (** Indices for type constructors, e.g. [int], in scope. Unlike de Bruijn indices, these - are always bound by the [implicit_vars] of the nearest enclosing [generic_groups]. - *) - | List of 't sequence_type - (** A list of a certain form. Depending on the {!sequence_type}, this might - correspond to an OCaml tuple, list, or embedded record. *) - | Option of 't type_ - (** An optional value. Either syntax recognized by [option_of_sexp] is supported: - [(Some 42)] or [(42)] for a value and [None] or [()] for no value. *) - | Record of 't record_type - (** A list of lists, representing a record of the given {!record_type}. For - validation, [Record recty] is equivalent to [List [Fields recty]]. *) - | Recursive of type_name - (** A type in the same mutually recursive group, possibly the current one. *) - | Union of 't type_ list - (** Any sexp matching any of the given types. {!Variant} should be preferred when - possible, especially for complex types, since validation and other algorithms may - behave exponentially. - - One useful special case is [Union []], the empty type. This is occasionally - generated for things such as abstract types. *) - | Variant of 't variant_type (** A sexp which matches the given {!variant_type}. *) - -(** A grammatical type which classifies sequences of sexps. Here, a "sequence" may mean - either a list on its own or, say, the sexps following a constructor in a list - matching a {!variant_type}. - - Certain operations may greatly favor simple sequence types. For example, matching - [List [ Many type_ ]] is easy for any type [type_] (assuming [type_] itself is - easy), but [List [ Many type1; Many type2 ]] may require backtracking. Grammars - derived from OCaml types will only have "nice" sequence types. *) -and 't sequence_type = 't component list - -(** Part of a sequence of sexps. *) -and 't component = - | One of 't type_ (** Exactly one sexp of the given type. *) - | Optional of 't type_ (** One sexp of the given type, or nothing at all. *) - | Many of 't type_ (** Any number of sexps, each of the given type. *) - | Fields of 't record_type - (** A succession of lists, collectively defining a record of the given {!record_type}. - The fields may appear in any order. The number of lists is not necessarily fixed, - as some fields may be optional. In particular, if all fields are optional, there - may be zero lists. *) - -(** A tagged union of grammatical types. Grammars derived from OCaml variants will have - variant types. *) -and 't variant_type = - { ignore_capitalization : bool - (** If true, the grammar is insensitive to the case of the first letter of the label. - This matches the behavior of derived [sexp_of_t] functions. *) - ; alts : (label * 't sequence_type) list - (** An association list of labels (constructors) to sequence types. A matching sexp is - a list whose head is the label as an atom and whose tail matches the given - sequence type. As a special case, an alternative whose sequence is empty matches - an atom rather than a list (i.e., [label] rather than [(label)]). This is in - keeping with generated [t_of_sexp] functions. - - As a workaround, to match [(label)] one could use - [("label", [ Optional (Union []) ])]. *) - } - -(** A collection of field definitions specifying a record type. Consists only of an - association list from labels to fields. *) -and 't record_type = - { allow_extra_fields: bool - ; fields: (label * 't field) list - } - -(** A field in a record. *) -and 't field = - { optional : bool (** If true, the field is optional. *) - ; args : 't sequence_type - (** A sequence type which the arguments to the field must match. An empty sequence is - permissible but would not be generated for any OCaml type. *) - } - -type t = - | Ref of type_name * group - | Inline of t type_ - -and group = - { gid : group_id - ; generic_group : generic_group - ; origin : string - (** [origin] provides a human-readable hint as to where the type was defined. - - For a globally unique identifier, use [gid] instead. - - See [ppx/ppx_sexp_conv/test/expect/test_origin.ml] for examples. *) - ; apply_implicit : t list - } - -and generic_group = - { implicit_vars : var_name list - ; ggid : generic_group_id - ; types : (type_name * t type_) list - } diff --git a/src/sexp.ml b/src/sexp.ml index c23ca2d..d706fee 100644 --- a/src/sexp.ml +++ b/src/sexp.ml @@ -3,41 +3,42 @@ (* blit_string doesn't exist in [StdLabels.Bytes]... *) let bytes_blit_string ~src ~src_pos ~dst ~dst_pos ~len = Bytes.blit_string src src_pos dst dst_pos len +;; open StdLabels open Format (** Type of S-expressions *) -type t = Atom of string | List of t list +type t = + | Atom of string + | List of t list let sexp_of_t t = t let t_of_sexp t = t -let t_sexp_grammar = Raw_grammar.Inline Any let rec compare_list a b = match a, b with - | [] , [] -> 0 - | [] , _ -> -1 - | _ , [] -> 1 - | x::xs, y::ys -> + | [], [] -> 0 + | [], _ -> -1 + | _, [] -> 1 + | x :: xs, y :: ys -> let res = compare x y in - if res <> 0 then res - else compare_list xs ys + if res <> 0 then res else compare_list xs ys and compare a b = - if a == b then - 0 - else + if a == b + then 0 + else ( match a, b with | Atom a, Atom b -> String.compare a b | Atom _, _ -> -1 | _, Atom _ -> 1 - | List a, List b -> compare_list a b + | List a, List b -> compare_list a b) +;; let equal a b = compare a b = 0 exception Not_found_s of t - exception Of_sexp_error of exn * t module Printing = struct @@ -49,69 +50,94 @@ module Printing = struct let must_escape str = let len = String.length str in - len = 0 || + len = 0 + || let rec loop str ix = match str.[ix] with | '"' | '(' | ')' | ';' | '\\' -> true - | '|' -> ix > 0 && let next = ix - 1 in Char.equal str.[next] '#' || loop str next - | '#' -> ix > 0 && let next = ix - 1 in Char.equal str.[next] '|' || loop str next + | '|' -> + ix > 0 + && + let next = ix - 1 in + Char.equal str.[next] '#' || loop str next + | '#' -> + ix > 0 + && + let next = ix - 1 in + Char.equal str.[next] '|' || loop str next | '\000' .. '\032' | '\127' .. '\255' -> true | _ -> ix > 0 && loop str (ix - 1) in loop str (len - 1) + ;; let escaped s = let n = ref 0 in for i = 0 to String.length s - 1 do - n := !n + - (match String.unsafe_get s i with - | '\"' | '\\' | '\n' | '\t' | '\r' | '\b' -> 2 - | ' ' .. '~' -> 1 - | _ -> 4) + n + := !n + + + match String.unsafe_get s i with + | '\"' | '\\' | '\n' | '\t' | '\r' | '\b' -> 2 + | ' ' .. '~' -> 1 + | _ -> 4 done; - if !n = String.length s then s else begin + if !n = String.length s + then s + else ( let s' = Bytes.create !n in n := 0; for i = 0 to String.length s - 1 do - begin match String.unsafe_get s i with - | ('\"' | '\\') as c -> - Bytes.unsafe_set s' !n '\\'; incr n; Bytes.unsafe_set s' !n c - | '\n' -> - Bytes.unsafe_set s' !n '\\'; incr n; Bytes.unsafe_set s' !n 'n' - | '\t' -> - Bytes.unsafe_set s' !n '\\'; incr n; Bytes.unsafe_set s' !n 't' - | '\r' -> - Bytes.unsafe_set s' !n '\\'; incr n; Bytes.unsafe_set s' !n 'r' - | '\b' -> - Bytes.unsafe_set s' !n '\\'; incr n; Bytes.unsafe_set s' !n 'b' - | (' ' .. '~') as c -> Bytes.unsafe_set s' !n c - | c -> - let a = Char.code c in - Bytes.unsafe_set s' !n '\\'; - incr n; - Bytes.unsafe_set s' !n (Char.chr (48 + a / 100)); - incr n; - Bytes.unsafe_set s' !n (Char.chr (48 + (a / 10) mod 10)); - incr n; - Bytes.unsafe_set s' !n (Char.chr (48 + a mod 10)); - end; + (match String.unsafe_get s i with + | ('\"' | '\\') as c -> + Bytes.unsafe_set s' !n '\\'; + incr n; + Bytes.unsafe_set s' !n c + | '\n' -> + Bytes.unsafe_set s' !n '\\'; + incr n; + Bytes.unsafe_set s' !n 'n' + | '\t' -> + Bytes.unsafe_set s' !n '\\'; + incr n; + Bytes.unsafe_set s' !n 't' + | '\r' -> + Bytes.unsafe_set s' !n '\\'; + incr n; + Bytes.unsafe_set s' !n 'r' + | '\b' -> + Bytes.unsafe_set s' !n '\\'; + incr n; + Bytes.unsafe_set s' !n 'b' + | ' ' .. '~' as c -> Bytes.unsafe_set s' !n c + | c -> + let a = Char.code c in + Bytes.unsafe_set s' !n '\\'; + incr n; + Bytes.unsafe_set s' !n (Char.chr (48 + (a / 100))); + incr n; + Bytes.unsafe_set s' !n (Char.chr (48 + (a / 10 mod 10))); + incr n; + Bytes.unsafe_set s' !n (Char.chr (48 + (a mod 10)))); incr n done; - Bytes.unsafe_to_string s' - end + Bytes.unsafe_to_string s') + ;; let esc_str str = let estr = escaped str in let elen = String.length estr in let res = Bytes.create (elen + 2) in bytes_blit_string ~src:estr ~src_pos:0 ~dst:res ~dst_pos:1 ~len:elen; - Bytes.unsafe_set res 0 '"'; + Bytes.unsafe_set res 0 '"'; Bytes.unsafe_set res (elen + 1) '"'; Bytes.unsafe_to_string res + ;; let index_of_newline str start = - try Some (String.index_from str start '\n') - with Not_found -> None + try Some (String.index_from str start '\n') with + | Not_found -> None + ;; let get_substring str index end_pos_opt = let end_pos = @@ -120,24 +146,26 @@ module Printing = struct | Some end_pos -> end_pos in String.sub str ~pos:index ~len:(end_pos - index) + ;; let is_one_line str = match index_of_newline str 0 with | None -> true | Some index -> index + 1 = String.length str + ;; let pp_hum_maybe_esc_str ppf str = - if not (must_escape str) then - pp_print_string ppf str - else if is_one_line str then - pp_print_string ppf (esc_str str) - else begin + if not (must_escape str) + then pp_print_string ppf str + else if is_one_line str + then pp_print_string ppf (esc_str str) + else ( let rec loop index = let next_newline = index_of_newline str index in let next_line = get_substring str index next_newline in pp_print_string ppf (escaped next_line); match next_newline with - | None -> () + | None -> () | Some newline_index -> pp_print_string ppf "\\"; pp_force_newline ppf (); @@ -149,11 +177,10 @@ module Printing = struct pp_print_string ppf " \""; loop 0; pp_print_string ppf "\""; - pp_close_box ppf (); - end + pp_close_box ppf ()) + ;; - let mach_maybe_esc_str str = - if must_escape str then esc_str str else str + let mach_maybe_esc_str str = if must_escape str then esc_str str else str (* Output of S-expressions to formatters *) @@ -174,6 +201,7 @@ module Printing = struct | [] -> pp_print_string ppf ")"; pp_close_box ppf () + ;; let rec pp_mach_internal may_need_space ppf = function | Atom str -> @@ -187,24 +215,27 @@ module Printing = struct let may_need_space = pp_mach_internal false ppf h in pp_mach_rest may_need_space ppf t; false - | List [] -> pp_print_string ppf "()"; false + | List [] -> + pp_print_string ppf "()"; + false and pp_mach_rest may_need_space ppf = function | h :: t -> let may_need_space = pp_mach_internal may_need_space ppf h in pp_mach_rest may_need_space ppf t | [] -> pp_print_string ppf ")" + ;; let pp_hum ppf sexp = pp_hum_indent !default_indent ppf sexp - let pp_mach ppf sexp = ignore (pp_mach_internal false ppf sexp) let pp = pp_mach (* Sexp size *) - let rec size_loop (v, c as acc) = function + let rec size_loop ((v, c) as acc) = function | Atom str -> v + 1, c + String.length str | List lst -> List.fold_left lst ~init:acc ~f:size_loop + ;; let size sexp = size_loop (0, 0) sexp @@ -213,6 +244,7 @@ module Printing = struct let to_buffer_hum ~buf ?(indent = !default_indent) sexp = let ppf = Format.formatter_of_buffer buf in Format.fprintf ppf "%a@?" (pp_hum_indent indent) sexp + ;; let to_buffer_mach ~buf sexp = let rec loop may_need_space = function @@ -227,13 +259,17 @@ module Printing = struct let may_need_space = loop false h in loop_rest may_need_space t; false - | List [] -> Buffer.add_string buf "()"; false + | List [] -> + Buffer.add_string buf "()"; + false and loop_rest may_need_space = function | h :: t -> let may_need_space = loop may_need_space h in loop_rest may_need_space t - | [] -> Buffer.add_char buf ')' in + | [] -> Buffer.add_char buf ')' + in ignore (loop false sexp) + ;; let to_buffer = to_buffer_mach @@ -250,13 +286,17 @@ module Printing = struct let may_need_space = loop false h in loop_rest may_need_space t; false - | List [] -> add_string buf "()"; false + | List [] -> + add_string buf "()"; + false and loop_rest may_need_space = function | h :: t -> let may_need_space = loop may_need_space h in loop_rest may_need_space t - | [] -> add_char buf ')' in + | [] -> add_char buf ')' + in ignore (loop false sexp) + ;; (* The maximum size of a thing on the minor heap is 256 words. Previously, this size of the returned buffer here was 4096 bytes, which @@ -275,12 +315,15 @@ module Printing = struct (* String conversions *) let to_string_hum ?indent = function - | Atom str when (match index_of_newline str 0 with None -> true | Some _ -> false) -> - mach_maybe_esc_str str + | Atom str + when match index_of_newline str 0 with + | None -> true + | Some _ -> false -> mach_maybe_esc_str str | sexp -> let buf = buffer () in to_buffer_hum ?indent sexp ~buf; Buffer.contents buf + ;; let to_string_mach = function | Atom str -> mach_maybe_esc_str str @@ -288,59 +331,27 @@ module Printing = struct let buf = buffer () in to_buffer_mach sexp ~buf; Buffer.contents buf + ;; let to_string = to_string_mach end + include Printing let of_float_style : [ `Underscores | `No_underscores ] ref = ref `No_underscores -let of_int_style : [ `Underscores | `No_underscores ] ref = ref `No_underscores +let of_int_style : [ `Underscores | `No_underscores ] ref = ref `No_underscores module Private = struct include Printing - - module Raw_grammar = struct - include Raw_grammar - - module Builtin = struct - let unit_sexp_grammar = Inline (List []) - let bool_sexp_grammar = Inline (Atom Bool) - let string_sexp_grammar = Inline (Atom String) - let bytes_sexp_grammar = string_sexp_grammar - let char_sexp_grammar = Inline (Atom Char) - let int_sexp_grammar = Inline (Atom Int) - let float_sexp_grammar = Inline (Atom Float) - let int32_sexp_grammar = Inline (Atom Int) - let int64_sexp_grammar = Inline (Atom Int) - let nativeint_sexp_grammar = Inline (Atom Int) - let ref_sexp_grammar = Inline (Explicit_bind ([ "'a" ], Explicit_var 0)) - let lazy_t_sexp_grammar = Inline (Explicit_bind ([ "'a" ], Explicit_var 0)) - let option_sexp_grammar = Inline (Explicit_bind ([ "'a" ], Option (Explicit_var 0))) - - let list_sexp_grammar = - Inline (Explicit_bind ([ "'a" ], List [ Many (Explicit_var 0) ])) - ;; - - let array_sexp_grammar = list_sexp_grammar - end - - let empty_sexp_grammar = Inline (Union []) - let opaque_sexp_grammar = empty_sexp_grammar - let fun_sexp_grammar = empty_sexp_grammar - let tuple2_sexp_grammar = - Inline - (Explicit_bind - ([ "'a"; "'b" ], List [ One (Explicit_var 0); One (Explicit_var 1) ])) - ;; - end end let message name fields = let rec conv_fields = function | [] -> [] | (fname, fsexp) :: rest -> - match fname with - | "" -> fsexp :: conv_fields rest - | _ -> List [ Atom fname; fsexp ] :: conv_fields rest + (match fname with + | "" -> fsexp :: conv_fields rest + | _ -> List [ Atom fname; fsexp ] :: conv_fields rest) in List (Atom name :: conv_fields fields) +;; diff --git a/src/sexp.mli b/src/sexp.mli index 4d3f1cf..fe68a3c 100644 --- a/src/sexp.mli +++ b/src/sexp.mli @@ -1,13 +1,13 @@ (** Type of S-expressions *) -type t = Atom of string | List of t list +type t = + | Atom of string + | List of t list (*_ We don't use [@@deriving sexp] as this would generated references to [Sexplib], creating a circular dependency *) val t_of_sexp : t -> t val sexp_of_t : t -> t -val t_sexp_grammar : Raw_grammar.t - val equal : t -> t -> bool val compare : t -> t -> int @@ -76,7 +76,7 @@ val pp : Format.formatter -> t -> unit string in human readable form with indentation level [indent]. @param indent default = [!default_indent] *) -val to_string_hum : ?indent : int -> t -> string +val to_string_hum : ?indent:int -> t -> string (** [to_string_mach sexp] converts S-expression [sexp] to a string in machine readable (i.e. most compact) form. *) @@ -88,60 +88,28 @@ val to_string : t -> string (** {1 Styles} *) val of_float_style : [ `Underscores | `No_underscores ] ref -val of_int_style : [ `Underscores | `No_underscores ] ref +val of_int_style : [ `Underscores | `No_underscores ] ref + (*_ See the Jane Street Style Guide for an explanation of [Private] submodules: https://opensource.janestreet.com/standards/#private-submodules *) module Private : sig - - (*_ exported for downstream tools *) - module Raw_grammar : sig - include module type of struct - include Raw_grammar - end - - module Builtin : sig - val unit_sexp_grammar : t - val bool_sexp_grammar : t - val string_sexp_grammar : t - val bytes_sexp_grammar : t - val char_sexp_grammar : t - val int_sexp_grammar : t - val float_sexp_grammar : t - val int32_sexp_grammar : t - val int64_sexp_grammar : t - val nativeint_sexp_grammar : t - val ref_sexp_grammar : t - val lazy_t_sexp_grammar : t - val option_sexp_grammar : t - val list_sexp_grammar : t - val array_sexp_grammar : t - end - - val empty_sexp_grammar : t - val opaque_sexp_grammar : t - val fun_sexp_grammar : t - val tuple2_sexp_grammar : t - end - (*_ Exported for sexplib *) val size : t -> int * int - val buffer : unit -> Buffer.t + val to_buffer : buf:Buffer.t -> t -> unit + val to_buffer_hum : buf:Buffer.t -> ?indent:int -> t -> unit + val to_buffer_mach : buf:Buffer.t -> t -> unit - val to_buffer : buf:Buffer.t -> t -> unit - val to_buffer_hum : buf:Buffer.t -> ?indent:int -> t -> unit - val to_buffer_mach : buf:Buffer.t -> t -> unit val to_buffer_gen - : buf : 'buffer - -> add_char : ('buffer -> char -> unit) - -> add_string : ('buffer -> string -> unit) + : buf:'buffer + -> add_char:('buffer -> char -> unit) + -> add_string:('buffer -> string -> unit) -> t -> unit val mach_maybe_esc_str : string -> string val must_escape : string -> bool val esc_str : string -> string - end diff --git a/src/sexp_conv.ml b/src/sexp_conv.ml index c185c02..9cc83e7 100644 --- a/src/sexp_conv.ml +++ b/src/sexp_conv.ml @@ -1,16 +1,10 @@ (* Utility Module for S-expression Conversions *) -let polymorphic_compare = compare + open StdLabels open MoreLabels open Printf open Sexp -type sexp_bool = bool -type 'a sexp_option = 'a option -type 'a sexp_list = 'a list -type 'a sexp_array = 'a array -type 'a sexp_opaque = 'a - (* Conversion of OCaml-values to S-expressions *) external format_float : string -> float -> string = "caml_format_float" @@ -20,22 +14,17 @@ external format_float : string -> float -> string = "caml_format_float" which was converted from a decimal (string) with <= 15 significant digits. So it's worth trying first to avoid things like "3.1400000000000001". - See comment above [to_string_round_trippable] in {!Core_kernel.Float} for + See comment above [to_string_round_trippable] in {!Core.Float} for detailed explanation and examples. *) let default_string_of_float = ref (fun x -> let y = format_float "%.15G" x in - if (float_of_string y) = x then - y - else - format_float "%.17G" x) + if float_of_string y = x then y else format_float "%.17G" x) ;; let read_old_option_format = ref true let write_old_option_format = ref true - let list_map f l = List.rev (List.rev_map l ~f) - let sexp_of_unit () = List [] let sexp_of_bool b = Atom (string_of_bool b) let sexp_of_string str = Atom str @@ -50,16 +39,17 @@ let sexp_of_ref sexp_of__a rf = sexp_of__a !rf let sexp_of_lazy_t sexp_of__a lv = sexp_of__a (Lazy.force lv) let sexp_of_option sexp_of__a = function - | Some x when !write_old_option_format -> List [sexp_of__a x] - | Some x -> List [Atom "some"; sexp_of__a x] + | Some x when !write_old_option_format -> List [ sexp_of__a x ] + | Some x -> List [ Atom "some"; sexp_of__a x ] | None when !write_old_option_format -> List [] | None -> Atom "none" +;; -let sexp_of_pair sexp_of__a sexp_of__b (a, b) = - List [sexp_of__a a; sexp_of__b b] +let sexp_of_pair sexp_of__a sexp_of__b (a, b) = List [ sexp_of__a a; sexp_of__b b ] let sexp_of_triple sexp_of__a sexp_of__b sexp_of__c (a, b, c) = - List [sexp_of__a a; sexp_of__b b; sexp_of__c c] + List [ sexp_of__a a; sexp_of__b b; sexp_of__c c ] +;; (* List.rev (List.rev_map ...) is tail recursive, the OCaml standard library List.map is NOT. *) @@ -71,15 +61,16 @@ let sexp_of_array sexp_of__a ar = lst_ref := sexp_of__a ar.(i) :: !lst_ref done; List !lst_ref +;; let sexp_of_hashtbl sexp_of_key sexp_of_val htbl = - let coll ~key:k ~data:v acc = List [sexp_of_key k; sexp_of_val v] :: acc in + let coll ~key:k ~data:v acc = List [ sexp_of_key k; sexp_of_val v ] :: acc in List (Hashtbl.fold htbl ~init:[] ~f:coll) +;; let sexp_of_opaque _ = Atom "<opaque>" let sexp_of_fun _ = Atom "<fun>" - (* Exception converter registration and lookup *) module Exn_converter = struct @@ -90,94 +81,52 @@ module Exn_converter = struct (* Fast and automatic exception registration *) - module Int = struct - type t = int - - let compare t1 t2 = polymorphic_compare (t1 : int) t2 + module Registration = struct + type t = + { sexp_of_exn : exn -> Sexp.t + ; (* If [printexc = true] then this sexp converter is used for Printexc.to_string *) + printexc : bool + } end - module Exn_ids = Map.Make (Int) - - module Obj = struct - module Extension_constructor = struct - [@@@ocaml.warning "-3"] + module Exn_table = Ephemeron.K1.Make (struct type t = extension_constructor - let id = Obj.extension_id - let of_val = Obj.extension_constructor - end - end - let exn_id_map - : (Obj.Extension_constructor.t, exn -> Sexp.t) Ephemeron.K1.t Exn_ids.t ref = - ref Exn_ids.empty - - (* [Obj.extension_id] works on both the exception itself, and the extension slot of the - exception. *) - let rec clean_up_handler (slot : Obj.Extension_constructor.t) = - let id = Obj.Extension_constructor.id slot in - let old_exn_id_map = !exn_id_map in - let new_exn_id_map = Exn_ids.remove id old_exn_id_map in - (* This trick avoids mutexes and should be fairly efficient *) - if !exn_id_map != old_exn_id_map then - clean_up_handler slot - else - exn_id_map := new_exn_id_map + let equal = ( == ) + let hash = Obj.Extension_constructor.id + end) + + let the_exn_table : Registration.t Exn_table.t = Exn_table.create 17 (* Ephemerons are used so that [sexp_of_exn] closure don't keep the extension_constructor live. *) - let add ?(finalise = true) extension_constructor sexp_of_exn = - let id = Obj.Extension_constructor.id extension_constructor in - let rec loop () = - let old_exn_id_map = !exn_id_map in - let ephe = Ephemeron.K1.create () in - Ephemeron.K1.set_data ephe sexp_of_exn; - Ephemeron.K1.set_key ephe extension_constructor; - let new_exn_id_map = Exn_ids.add old_exn_id_map ~key:id ~data:ephe in - (* This trick avoids mutexes and should be fairly efficient *) - if !exn_id_map != old_exn_id_map then - loop () - else begin - exn_id_map := new_exn_id_map; - if finalise then - try - Gc.finalise clean_up_handler extension_constructor - with Invalid_argument _ -> - (* Pre-allocated extension constructors cannot be finalised *) - () - end - in - loop () - - let add_auto ?finalise exn sexp_of_exn = - add ?finalise (Obj.Extension_constructor.of_val exn) sexp_of_exn - - let find_auto exn = - let id = Obj.Extension_constructor.id (Obj.Extension_constructor.of_val exn) in - match Exn_ids.find id !exn_id_map with - | exception Not_found -> None - | ephe -> - match Ephemeron.K1.get_data ephe with - | None -> None - | Some sexp_of_exn -> Some (sexp_of_exn exn) + let add ?(printexc = true) ?finalise:_ extension_constructor sexp_of_exn = + Exn_table.add the_exn_table extension_constructor { sexp_of_exn; printexc } + ;; + let find_auto ~for_printexc exn = + let extension_constructor = Obj.Extension_constructor.of_val exn in + match Exn_table.find_opt the_exn_table extension_constructor with + | None -> None + | Some { sexp_of_exn; printexc } -> + (match for_printexc, printexc with + | false, _ | _, true -> Some (sexp_of_exn exn) + | true, false -> None) + ;; module For_unit_tests_only = struct - let size () = Exn_ids.fold !exn_id_map ~init:0 ~f:(fun ~key:_ ~data:ephe acc -> - match Ephemeron.K1.get_data ephe with - | None -> acc - | Some _ -> acc + 1 - ) + let size () = (Exn_table.stats_alive the_exn_table).num_bindings end - end -let sexp_of_exn_opt exn = Exn_converter.find_auto exn - +let sexp_of_exn_opt_for_printexc exn = Exn_converter.find_auto ~for_printexc:true exn +let sexp_of_exn_opt exn = Exn_converter.find_auto ~for_printexc:false exn let sexp_of_exn exn = match sexp_of_exn_opt exn with - | None -> List [Atom (Printexc.to_string exn)] + | None -> List [ Atom (Printexc.to_string exn) ] | Some sexp -> sexp +;; let exn_to_string e = Sexp.to_string_hum (sexp_of_exn e) @@ -187,269 +136,280 @@ let exn_to_string e = Sexp.to_string_hum (sexp_of_exn e) registered, which is what we want. *) let () = Printexc.register_printer (fun exn -> - match sexp_of_exn_opt exn with + match sexp_of_exn_opt_for_printexc exn with | None -> None - | Some sexp -> - Some (Sexp.to_string_hum ~indent:2 sexp)) + | Some sexp -> Some (Sexp.to_string_hum ~indent:2 sexp)) +;; + +let printexc_prefer_sexp exn = + match sexp_of_exn_opt exn with + | None -> Printexc.to_string exn + | Some sexp -> Sexp.to_string_hum ~indent:2 sexp +;; (* Conversion of S-expressions to OCaml-values *) exception Of_sexp_error = Sexp.Of_sexp_error let record_check_extra_fields = ref true - let of_sexp_error_exn exc sexp = raise (Of_sexp_error (exc, sexp)) - let of_sexp_error what sexp = raise (Of_sexp_error (Failure what, sexp)) -let unit_of_sexp sexp = match sexp with +let unit_of_sexp sexp = + match sexp with | List [] -> () | Atom _ | List _ -> of_sexp_error "unit_of_sexp: empty list needed" sexp +;; -let bool_of_sexp sexp = match sexp with +let bool_of_sexp sexp = + match sexp with | Atom ("true" | "True") -> true | Atom ("false" | "False") -> false | Atom _ -> of_sexp_error "bool_of_sexp: unknown string" sexp | List _ -> of_sexp_error "bool_of_sexp: atom needed" sexp +;; -let string_of_sexp sexp = match sexp with +let string_of_sexp sexp = + match sexp with | Atom str -> str | List _ -> of_sexp_error "string_of_sexp: atom needed" sexp +;; -let bytes_of_sexp sexp = match sexp with +let bytes_of_sexp sexp = + match sexp with | Atom str -> Bytes.of_string str | List _ -> of_sexp_error "bytes_of_sexp: atom needed" sexp +;; - -let char_of_sexp sexp = match sexp with +let char_of_sexp sexp = + match sexp with | Atom str -> - if String.length str <> 1 then - of_sexp_error - "char_of_sexp: atom string must contain one character only" sexp; + if String.length str <> 1 + then of_sexp_error "char_of_sexp: atom string must contain one character only" sexp; str.[0] | List _ -> of_sexp_error "char_of_sexp: atom needed" sexp +;; -let int_of_sexp sexp = match sexp with +let int_of_sexp sexp = + match sexp with | Atom str -> - (try int_of_string str - with exc -> of_sexp_error ("int_of_sexp: " ^ exn_to_string exc) sexp) + (try int_of_string str with + | exc -> of_sexp_error ("int_of_sexp: " ^ exn_to_string exc) sexp) | List _ -> of_sexp_error "int_of_sexp: atom needed" sexp +;; -let float_of_sexp sexp = match sexp with +let float_of_sexp sexp = + match sexp with | Atom str -> - (try float_of_string str - with exc -> - of_sexp_error ("float_of_sexp: " ^ exn_to_string exc) sexp) + (try float_of_string str with + | exc -> of_sexp_error ("float_of_sexp: " ^ exn_to_string exc) sexp) | List _ -> of_sexp_error "float_of_sexp: atom needed" sexp +;; -let int32_of_sexp sexp = match sexp with +let int32_of_sexp sexp = + match sexp with | Atom str -> - (try Int32.of_string str - with exc -> - of_sexp_error ("int32_of_sexp: " ^ exn_to_string exc) sexp) + (try Int32.of_string str with + | exc -> of_sexp_error ("int32_of_sexp: " ^ exn_to_string exc) sexp) | List _ -> of_sexp_error "int32_of_sexp: atom needed" sexp +;; -let int64_of_sexp sexp = match sexp with +let int64_of_sexp sexp = + match sexp with | Atom str -> - (try Int64.of_string str - with exc -> - of_sexp_error ("int64_of_sexp: " ^ exn_to_string exc) sexp) + (try Int64.of_string str with + | exc -> of_sexp_error ("int64_of_sexp: " ^ exn_to_string exc) sexp) | List _ -> of_sexp_error "int64_of_sexp: atom needed" sexp +;; -let nativeint_of_sexp sexp = match sexp with +let nativeint_of_sexp sexp = + match sexp with | Atom str -> - (try Nativeint.of_string str - with exc -> - of_sexp_error ("nativeint_of_sexp: " ^ exn_to_string exc) sexp) + (try Nativeint.of_string str with + | exc -> of_sexp_error ("nativeint_of_sexp: " ^ exn_to_string exc) sexp) | List _ -> of_sexp_error "nativeint_of_sexp: atom needed" sexp +;; let ref_of_sexp a__of_sexp sexp = ref (a__of_sexp sexp) let lazy_t_of_sexp a__of_sexp sexp = Lazy.from_val (a__of_sexp sexp) let option_of_sexp a__of_sexp sexp = - if !read_old_option_format then + if !read_old_option_format + then ( match sexp with | List [] | Atom ("none" | "None") -> None - | List [el] | List [Atom ("some" | "Some"); el] -> Some (a__of_sexp el) - | List _ -> - of_sexp_error "option_of_sexp: list must represent optional value" sexp - | Atom _ -> of_sexp_error "option_of_sexp: only none can be atom" sexp - else + | List [ el ] | List [ Atom ("some" | "Some"); el ] -> Some (a__of_sexp el) + | List _ -> of_sexp_error "option_of_sexp: list must represent optional value" sexp + | Atom _ -> of_sexp_error "option_of_sexp: only none can be atom" sexp) + else ( match sexp with | Atom ("none" | "None") -> None - | List [Atom ("some" | "Some"); el] -> Some (a__of_sexp el) + | List [ Atom ("some" | "Some"); el ] -> Some (a__of_sexp el) | Atom _ -> of_sexp_error "option_of_sexp: only none can be atom" sexp - | List _ -> of_sexp_error "option_of_sexp: list must be (some el)" sexp + | List _ -> of_sexp_error "option_of_sexp: list must be (some el)" sexp) +;; -let pair_of_sexp a__of_sexp b__of_sexp sexp = match sexp with - | List [a_sexp; b_sexp] -> +let pair_of_sexp a__of_sexp b__of_sexp sexp = + match sexp with + | List [ a_sexp; b_sexp ] -> let a = a__of_sexp a_sexp in let b = b__of_sexp b_sexp in a, b | List _ -> - of_sexp_error - "pair_of_sexp: list must contain exactly two elements only" sexp + of_sexp_error "pair_of_sexp: list must contain exactly two elements only" sexp | Atom _ -> of_sexp_error "pair_of_sexp: list needed" sexp +;; -let triple_of_sexp a__of_sexp b__of_sexp c__of_sexp sexp = match sexp with - | List [a_sexp; b_sexp; c_sexp] -> +let triple_of_sexp a__of_sexp b__of_sexp c__of_sexp sexp = + match sexp with + | List [ a_sexp; b_sexp; c_sexp ] -> let a = a__of_sexp a_sexp in let b = b__of_sexp b_sexp in let c = c__of_sexp c_sexp in a, b, c | List _ -> - of_sexp_error - "triple_of_sexp: list must contain exactly three elements only" sexp + of_sexp_error "triple_of_sexp: list must contain exactly three elements only" sexp | Atom _ -> of_sexp_error "triple_of_sexp: list needed" sexp +;; -let list_of_sexp a__of_sexp sexp = match sexp with +let list_of_sexp a__of_sexp sexp = + match sexp with | List lst -> let rev_lst = List.rev_map lst ~f:a__of_sexp in List.rev rev_lst | Atom _ -> of_sexp_error "list_of_sexp: list needed" sexp +;; -let array_of_sexp a__of_sexp sexp = match sexp with +let array_of_sexp a__of_sexp sexp = + match sexp with | List [] -> [||] | List (h :: t) -> let len = List.length t + 1 in let res = Array.make len (a__of_sexp h) in let rec loop i = function | [] -> res - | h :: t -> res.(i) <- a__of_sexp h; loop (i + 1) t in + | h :: t -> + res.(i) <- a__of_sexp h; + loop (i + 1) t + in loop 1 t | Atom _ -> of_sexp_error "array_of_sexp: list needed" sexp +;; -let hashtbl_of_sexp key_of_sexp val_of_sexp sexp = match sexp with +let hashtbl_of_sexp key_of_sexp val_of_sexp sexp = + match sexp with | List lst -> let htbl = Hashtbl.create 0 in let act = function - | List [k_sexp; v_sexp] -> + | List [ k_sexp; v_sexp ] -> Hashtbl.add htbl ~key:(key_of_sexp k_sexp) ~data:(val_of_sexp v_sexp) - | List _ | Atom _ -> - of_sexp_error "hashtbl_of_sexp: tuple list needed" sexp + | List _ | Atom _ -> of_sexp_error "hashtbl_of_sexp: tuple list needed" sexp in List.iter lst ~f:act; htbl | Atom _ -> of_sexp_error "hashtbl_of_sexp: list needed" sexp +;; let opaque_of_sexp sexp = of_sexp_error "opaque_of_sexp: cannot convert opaque values" sexp +;; + +let fun_of_sexp sexp = of_sexp_error "fun_of_sexp: cannot convert function values" sexp + +(* Sexp Grammars *) -let fun_of_sexp sexp = - of_sexp_error "fun_of_sexp: cannot convert function values" sexp +include Sexp_conv_grammar (* Registering default exception printers *) -let get_flc_error name (file, line, chr) = - Atom (sprintf "%s %s:%d:%d" name file line chr) +let get_flc_error name (file, line, chr) = Atom (sprintf "%s %s:%d:%d" name file line chr) let () = List.iter - ~f:(fun (extension_constructor, handler) -> Exn_converter.add ~finalise:false extension_constructor handler) - [ - ( - [%extension_constructor Assert_failure], - (function - | Assert_failure arg -> get_flc_error "Assert_failure" arg - | _ -> assert false) - );( - [%extension_constructor Exit], - (function - | Exit -> Atom "Exit" - | _ -> assert false) - );( - [%extension_constructor End_of_file], - (function - | End_of_file -> Atom "End_of_file" - | _ -> assert false) - );( - [%extension_constructor Failure], - (function - | Failure arg -> List [Atom "Failure"; Atom arg ] - | _ -> assert false) - );( - [%extension_constructor Not_found], - (function - | Not_found -> Atom "Not_found" - | _ -> assert false) - );( - [%extension_constructor Invalid_argument], - (function - | Invalid_argument arg -> List [Atom "Invalid_argument"; Atom arg ] - | _ -> assert false) - );( - [%extension_constructor Match_failure], - (function - | Match_failure arg -> get_flc_error "Match_failure" arg - | _ -> assert false) - );( - [%extension_constructor Not_found_s], - (function - | Not_found_s arg -> List [Atom "Not_found_s"; arg ] - | _ -> assert false) - );( - [%extension_constructor Sys_error], - (function - | Sys_error arg -> List [Atom "Sys_error"; Atom arg ] - | _ -> assert false) - );( - [%extension_constructor Arg.Help], - (function - | Arg.Help arg -> List [Atom "Arg.Help"; Atom arg ] - | _ -> assert false) - );( - [%extension_constructor Arg.Bad], - (function - | Arg.Bad arg -> List [Atom "Arg.Bad"; Atom arg ] - | _ -> assert false) - );( - [%extension_constructor Lazy.Undefined], - (function - | Lazy.Undefined -> Atom "Lazy.Undefined" - | _ -> assert false) - );( - [%extension_constructor Parsing.Parse_error], - (function - | Parsing.Parse_error -> Atom "Parsing.Parse_error" - | _ -> assert false) - );( - [%extension_constructor Queue.Empty], - (function - | Queue.Empty -> Atom "Queue.Empty" - | _ -> assert false) - );( - [%extension_constructor Scanf.Scan_failure], - (function - | Scanf.Scan_failure arg -> List [Atom "Scanf.Scan_failure"; Atom arg ] - | _ -> assert false) - );( - [%extension_constructor Stack.Empty], - (function - | Stack.Empty -> Atom "Stack.Empty" - | _ -> assert false) - );( - [%extension_constructor Stream.Failure], - (function - | Stream.Failure -> Atom "Stream.Failure" - | _ -> assert false) - );( - [%extension_constructor Stream.Error], - (function - | Stream.Error arg -> List [Atom "Stream.Error"; Atom arg ] - | _ -> assert false) - );( - [%extension_constructor Sys.Break], - (function - | Sys.Break -> Atom "Sys.Break" - | _ -> assert false) - );( - [%extension_constructor Of_sexp_error], - (function - | Of_sexp_error (exc, sexp) -> - List [Atom "Sexplib.Conv.Of_sexp_error"; sexp_of_exn exc; sexp] - | _ -> assert false) - ); + ~f:(fun (extension_constructor, handler) -> + Exn_converter.add ~printexc:false ~finalise:false extension_constructor handler) + [ ( [%extension_constructor Assert_failure] + , function + | Assert_failure arg -> get_flc_error "Assert_failure" arg + | _ -> assert false ) + ; ( [%extension_constructor Exit] + , function + | Exit -> Atom "Exit" + | _ -> assert false ) + ; ( [%extension_constructor End_of_file] + , function + | End_of_file -> Atom "End_of_file" + | _ -> assert false ) + ; ( [%extension_constructor Failure] + , function + | Failure arg -> List [ Atom "Failure"; Atom arg ] + | _ -> assert false ) + ; ( [%extension_constructor Not_found] + , function + | Not_found -> Atom "Not_found" + | _ -> assert false ) + ; ( [%extension_constructor Invalid_argument] + , function + | Invalid_argument arg -> List [ Atom "Invalid_argument"; Atom arg ] + | _ -> assert false ) + ; ( [%extension_constructor Match_failure] + , function + | Match_failure arg -> get_flc_error "Match_failure" arg + | _ -> assert false ) + ; ( [%extension_constructor Not_found_s] + , function + | Not_found_s arg -> List [ Atom "Not_found_s"; arg ] + | _ -> assert false ) + ; ( [%extension_constructor Sys_error] + , function + | Sys_error arg -> List [ Atom "Sys_error"; Atom arg ] + | _ -> assert false ) + ; ( [%extension_constructor Arg.Help] + , function + | Arg.Help arg -> List [ Atom "Arg.Help"; Atom arg ] + | _ -> assert false ) + ; ( [%extension_constructor Arg.Bad] + , function + | Arg.Bad arg -> List [ Atom "Arg.Bad"; Atom arg ] + | _ -> assert false ) + ; ( [%extension_constructor Lazy.Undefined] + , function + | Lazy.Undefined -> Atom "Lazy.Undefined" + | _ -> assert false ) + ; ( [%extension_constructor Parsing.Parse_error] + , function + | Parsing.Parse_error -> Atom "Parsing.Parse_error" + | _ -> assert false ) + ; ( [%extension_constructor Queue.Empty] + , function + | Queue.Empty -> Atom "Queue.Empty" + | _ -> assert false ) + ; ( [%extension_constructor Scanf.Scan_failure] + , function + | Scanf.Scan_failure arg -> List [ Atom "Scanf.Scan_failure"; Atom arg ] + | _ -> assert false ) + ; ( [%extension_constructor Stack.Empty] + , function + | Stack.Empty -> Atom "Stack.Empty" + | _ -> assert false ) + ; ( [%extension_constructor Sys.Break] + , function + | Sys.Break -> Atom "Sys.Break" + | _ -> assert false ) ] +;; + +let () = + List.iter + ~f:(fun (extension_constructor, handler) -> + Exn_converter.add ~printexc:true ~finalise:false extension_constructor handler) + [ ( [%extension_constructor Of_sexp_error] + , function + | Of_sexp_error (exc, sexp) -> + List [ Atom "Sexplib.Conv.Of_sexp_error"; sexp_of_exn exc; sexp ] + | _ -> assert false ) + ] +;; external ignore : _ -> unit = "%ignore" external ( = ) : 'a -> 'a -> bool = "%equal" diff --git a/src/sexp_conv.mli b/src/sexp_conv.mli index 90c2625..5690eb2 100644 --- a/src/sexp_conv.mli +++ b/src/sexp_conv.mli @@ -1,34 +1,20 @@ (** Utility Module for S-expression Conversions *) -(** Dummy definitions for "optional" options, lists, and for opaque types *) -type sexp_bool = bool -[@@deprecated "[since 2019-03] use [@sexp.bool] instead"] -type 'a sexp_option = 'a option -[@@deprecated "[since 2019-03] use [@sexp.option] instead"] -type 'a sexp_list = 'a list -[@@deprecated "[since 2019-03] use [@sexp.list] instead"] -type 'a sexp_array = 'a array -[@@deprecated "[since 2019-03] use [@sexp.array] instead"] -type 'a sexp_opaque = 'a -[@@deprecated "[since 2019-03] use [@sexp.opaque] instead"] - (** {6 Conversion of OCaml-values to S-expressions} *) -val default_string_of_float : (float -> string) ref (** [default_string_of_float] reference to the default function used to convert floats to strings. Initially set to [fun n -> sprintf "%.20G" n]. *) +val default_string_of_float : (float -> string) ref -val write_old_option_format : bool ref (** [write_old_option_format] reference for the default option format used to write option values. If set to [true], the old-style option format will be used, the new-style one otherwise. Initially set to [true]. *) +val write_old_option_format : bool ref - -val read_old_option_format : bool ref (** [read_old_option_format] reference for the default option format used to read option values. [Of_sexp_error] will be raised with old-style option values if this reference is set to [false]. @@ -37,241 +23,259 @@ val read_old_option_format : bool ref the only way to avoid breaking old code with the standard macros. Initially set to [true]. *) +val read_old_option_format : bool ref (** We re-export a tail recursive map function, because some modules override the standard library functions (e.g. [StdLabels]) which wrecks havoc with the camlp4 extension. *) val list_map : ('a -> 'b) -> 'a list -> 'b list -val sexp_of_unit : unit -> Sexp.t (** [sexp_of_unit ()] converts a value of type [unit] to an S-expression. *) +val sexp_of_unit : unit -> Sexp.t -val sexp_of_bool : bool -> Sexp.t (** [sexp_of_bool b] converts the value [x] of type [bool] to an S-expression. *) +val sexp_of_bool : bool -> Sexp.t -val sexp_of_string : string -> Sexp.t (** [sexp_of_bool str] converts the value [str] of type [string] to an S-expression. *) +val sexp_of_string : string -> Sexp.t -val sexp_of_bytes : bytes -> Sexp.t (** [sexp_of_bool str] converts the value [str] of type [bytes] to an S-expression. *) +val sexp_of_bytes : bytes -> Sexp.t -val sexp_of_char : char -> Sexp.t (** [sexp_of_char c] converts the value [c] of type [char] to an S-expression. *) +val sexp_of_char : char -> Sexp.t -val sexp_of_int : int -> Sexp.t (** [sexp_of_int n] converts the value [n] of type [int] to an S-expression. *) +val sexp_of_int : int -> Sexp.t -val sexp_of_float : float -> Sexp.t (** [sexp_of_float n] converts the value [n] of type [float] to an S-expression. *) +val sexp_of_float : float -> Sexp.t -val sexp_of_int32 : int32 -> Sexp.t (** [sexp_of_int32 n] converts the value [n] of type [int32] to an S-expression. *) +val sexp_of_int32 : int32 -> Sexp.t -val sexp_of_int64 : int64 -> Sexp.t (** [sexp_of_int64 n] converts the value [n] of type [int64] to an S-expression. *) +val sexp_of_int64 : int64 -> Sexp.t -val sexp_of_nativeint : nativeint -> Sexp.t (** [sexp_of_nativeint n] converts the value [n] of type [nativeint] to an S-expression. *) +val sexp_of_nativeint : nativeint -> Sexp.t -val sexp_of_ref : ('a -> Sexp.t) -> 'a ref -> Sexp.t (** [sexp_of_ref conv r] converts the value [r] of type ['a ref] to an S-expression. Uses [conv] to convert values of type ['a] to an S-expression. *) +val sexp_of_ref : ('a -> Sexp.t) -> 'a ref -> Sexp.t -val sexp_of_lazy_t : ('a -> Sexp.t) -> 'a lazy_t -> Sexp.t (** [sexp_of_lazy_t conv l] converts the value [l] of type ['a lazy_t] to an S-expression. Uses [conv] to convert values of type ['a] to an S-expression. *) +val sexp_of_lazy_t : ('a -> Sexp.t) -> 'a lazy_t -> Sexp.t -val sexp_of_option : ('a -> Sexp.t) -> 'a option -> Sexp.t (** [sexp_of_option conv opt] converts the value [opt] of type ['a option] to an S-expression. Uses [conv] to convert values of type ['a] to an S-expression. *) +val sexp_of_option : ('a -> Sexp.t) -> 'a option -> Sexp.t -val sexp_of_pair : ('a -> Sexp.t) -> ('b -> Sexp.t) -> 'a * 'b -> Sexp.t (** [sexp_of_pair conv1 conv2 pair] converts a pair to an S-expression. It uses its first argument to convert the first element of the pair, and its second argument to convert the second element of the pair. *) +val sexp_of_pair : ('a -> Sexp.t) -> ('b -> Sexp.t) -> 'a * 'b -> Sexp.t -val sexp_of_triple : - ('a -> Sexp.t) -> ('b -> Sexp.t) -> ('c -> Sexp.t) -> 'a * 'b * 'c -> Sexp.t (** [sexp_of_triple conv1 conv2 conv3 triple] converts a triple to an S-expression using [conv1], [conv2], and [conv3] to convert its elements. *) +val sexp_of_triple + : ('a -> Sexp.t) + -> ('b -> Sexp.t) + -> ('c -> Sexp.t) + -> 'a * 'b * 'c + -> Sexp.t -val sexp_of_list : ('a -> Sexp.t) -> 'a list -> Sexp.t (** [sexp_of_list conv lst] converts the value [lst] of type ['a list] to an S-expression. Uses [conv] to convert values of type ['a] to an S-expression. *) +val sexp_of_list : ('a -> Sexp.t) -> 'a list -> Sexp.t -val sexp_of_array : ('a -> Sexp.t) -> 'a array -> Sexp.t (** [sexp_of_array conv ar] converts the value [ar] of type ['a array] to an S-expression. Uses [conv] to convert values of type ['a] to an S-expression. *) +val sexp_of_array : ('a -> Sexp.t) -> 'a array -> Sexp.t -val sexp_of_hashtbl : - ('a -> Sexp.t) -> ('b -> Sexp.t) -> ('a, 'b) Hashtbl.t -> Sexp.t (** [sexp_of_hashtbl conv_key conv_value htbl] converts the value [htbl] of type [('a, 'b) Hashtbl.t] to an S-expression. Uses [conv_key] to convert the hashtable keys of type ['a], and [conv_value] to convert hashtable values of type ['b] to S-expressions. *) +val sexp_of_hashtbl : ('a -> Sexp.t) -> ('b -> Sexp.t) -> ('a, 'b) Hashtbl.t -> Sexp.t -val sexp_of_opaque : 'a -> Sexp.t (** [sexp_of_opaque x] converts the value [x] of opaque type to an S-expression. This means the user need not provide converters, but the result cannot be interpreted. *) +val sexp_of_opaque : 'a -> Sexp.t -val sexp_of_fun : ('a -> 'b) -> Sexp.t (** [sexp_of_fun f] converts the value [f] of function type to a dummy S-expression. Functions cannot be serialized as S-expressions, but at least a placeholder can be generated for pretty-printing. *) - +val sexp_of_fun : ('a -> 'b) -> Sexp.t (** {6 Conversion of S-expressions to OCaml-values} *) -exception Of_sexp_error of exn * Sexp.t (** [Of_sexp_error (exn, sexp)] the exception raised when an S-expression could not be successfully converted to an OCaml-value. *) +exception Of_sexp_error of exn * Sexp.t -val record_check_extra_fields : bool ref (** [record_check_extra_fields] checks for extra (= unknown) fields in record S-expressions. *) +val record_check_extra_fields : bool ref -val of_sexp_error : string -> Sexp.t -> 'a (** [of_sexp_error reason sexp] @raise Of_sexp_error (Failure reason, sexp). *) +val of_sexp_error : string -> Sexp.t -> 'a -val of_sexp_error_exn : exn -> Sexp.t -> 'a (** [of_sexp_error exc sexp] @raise Of_sexp_error (exc, sexp). *) +val of_sexp_error_exn : exn -> Sexp.t -> 'a -val unit_of_sexp : Sexp.t -> unit (** [unit_of_sexp sexp] converts S-expression [sexp] to a value of type [unit]. *) +val unit_of_sexp : Sexp.t -> unit -val bool_of_sexp : Sexp.t -> bool (** [bool_of_sexp sexp] converts S-expression [sexp] to a value of type [bool]. *) +val bool_of_sexp : Sexp.t -> bool -val string_of_sexp : Sexp.t -> string (** [string_of_sexp sexp] converts S-expression [sexp] to a value of type [string]. *) +val string_of_sexp : Sexp.t -> string -val bytes_of_sexp : Sexp.t -> bytes (** [bytes_of_sexp sexp] converts S-expression [sexp] to a value of type [bytes]. *) +val bytes_of_sexp : Sexp.t -> bytes -val char_of_sexp : Sexp.t -> char (** [char_of_sexp sexp] converts S-expression [sexp] to a value of type [char]. *) +val char_of_sexp : Sexp.t -> char -val int_of_sexp : Sexp.t -> int (** [int_of_sexp sexp] converts S-expression [sexp] to a value of type [int]. *) +val int_of_sexp : Sexp.t -> int -val float_of_sexp : Sexp.t -> float (** [float_of_sexp sexp] converts S-expression [sexp] to a value of type [float]. *) +val float_of_sexp : Sexp.t -> float -val int32_of_sexp : Sexp.t -> int32 (** [int32_of_sexp sexp] converts S-expression [sexp] to a value of type [int32]. *) +val int32_of_sexp : Sexp.t -> int32 -val int64_of_sexp : Sexp.t -> int64 (** [int64_of_sexp sexp] converts S-expression [sexp] to a value of type [int64]. *) +val int64_of_sexp : Sexp.t -> int64 -val nativeint_of_sexp : Sexp.t -> nativeint (** [nativeint_of_sexp sexp] converts S-expression [sexp] to a value of type [nativeint]. *) +val nativeint_of_sexp : Sexp.t -> nativeint -val ref_of_sexp : (Sexp.t -> 'a) -> Sexp.t -> 'a ref (** [ref_of_sexp conv sexp] converts S-expression [sexp] to a value of type ['a ref] using conversion function [conv], which converts an S-expression to a value of type ['a]. *) +val ref_of_sexp : (Sexp.t -> 'a) -> Sexp.t -> 'a ref -val lazy_t_of_sexp : (Sexp.t -> 'a) -> Sexp.t -> 'a lazy_t (** [lazy_t_of_sexp conv sexp] converts S-expression [sexp] to a value of type ['a lazy_t] using conversion function [conv], which converts an S-expression to a value of type ['a]. *) +val lazy_t_of_sexp : (Sexp.t -> 'a) -> Sexp.t -> 'a lazy_t -val option_of_sexp : (Sexp.t -> 'a) -> Sexp.t -> 'a option (** [option_of_sexp conv sexp] converts S-expression [sexp] to a value of type ['a option] using conversion function [conv], which converts an S-expression to a value of type ['a]. *) +val option_of_sexp : (Sexp.t -> 'a) -> Sexp.t -> 'a option -val pair_of_sexp : (Sexp.t -> 'a) -> (Sexp.t -> 'b) -> Sexp.t -> 'a * 'b (** [pair_of_sexp conv1 conv2 sexp] converts S-expression [sexp] to a pair of type ['a * 'b] using conversion functions [conv1] and [conv2], which convert S-expressions to values of type ['a] and ['b] respectively. *) +val pair_of_sexp : (Sexp.t -> 'a) -> (Sexp.t -> 'b) -> Sexp.t -> 'a * 'b -val triple_of_sexp : - (Sexp.t -> 'a) -> (Sexp.t -> 'b) -> (Sexp.t -> 'c) -> Sexp.t -> 'a * 'b * 'c (** [triple_of_sexp conv1 conv2 conv3 sexp] converts S-expression [sexp] to a triple of type ['a * 'b * 'c] using conversion functions [conv1], [conv2], and [conv3], which convert S-expressions to values of type ['a], ['b], and ['c] respectively. *) +val triple_of_sexp + : (Sexp.t -> 'a) + -> (Sexp.t -> 'b) + -> (Sexp.t -> 'c) + -> Sexp.t + -> 'a * 'b * 'c -val list_of_sexp : (Sexp.t -> 'a) -> Sexp.t -> 'a list (** [list_of_sexp conv sexp] converts S-expression [sexp] to a value of type ['a list] using conversion function [conv], which converts an S-expression to a value of type ['a]. *) +val list_of_sexp : (Sexp.t -> 'a) -> Sexp.t -> 'a list -val array_of_sexp : (Sexp.t -> 'a) -> Sexp.t -> 'a array (** [array_of_sexp conv sexp] converts S-expression [sexp] to a value of type ['a array] using conversion function [conv], which converts an S-expression to a value of type ['a]. *) +val array_of_sexp : (Sexp.t -> 'a) -> Sexp.t -> 'a array -val hashtbl_of_sexp : - (Sexp.t -> 'a) -> (Sexp.t -> 'b) -> Sexp.t -> ('a, 'b) Hashtbl.t (** [hashtbl_of_sexp conv_key conv_value sexp] converts S-expression [sexp] to a value of type [('a, 'b) Hashtbl.t] using conversion function [conv_key], which converts an S-expression to hashtable key of type ['a], and function [conv_value], which converts an S-expression to hashtable value of type ['b]. *) +val hashtbl_of_sexp : (Sexp.t -> 'a) -> (Sexp.t -> 'b) -> Sexp.t -> ('a, 'b) Hashtbl.t -val opaque_of_sexp : Sexp.t -> 'a (** [opaque_of_sexp sexp] @raise Of_sexp_error when attempting to convert an S-expression to an opaque value. *) +val opaque_of_sexp : Sexp.t -> 'a -val fun_of_sexp : Sexp.t -> 'a (** [fun_of_sexp sexp] @raise Of_sexp_error when attempting to convert an S-expression to a function. *) +val fun_of_sexp : Sexp.t -> 'a +(** Sexp Grammars *) + +include module type of struct + include Sexp_conv_grammar +end (** Exception converters *) -val sexp_of_exn : exn -> Sexp.t (** [sexp_of_exn exc] converts exception [exc] to an S-expression. If no suitable converter is found, the standard converter in [Printexc] will be used to generate an atomic S-expression. *) +val sexp_of_exn : exn -> Sexp.t + +(** Converts an exception to a string via sexp, falling back to [Printexc.to_string] if no + sexp conversion is registered for this exception. + + This is different from [Printexc.to_string] in that it additionally uses the sexp + converters registered with [~printexc:false]. Another difference is that the behavior + of [Printexc] can be overridden with [Printexc.register], but here we always try sexp + conversion first. +*) +val printexc_prefer_sexp : exn -> string -val sexp_of_exn_opt : exn -> Sexp.t option (** [sexp_of_exn_opt exc] converts exception [exc] to [Some sexp]. If no suitable converter is found, [None] is returned instead. *) +val sexp_of_exn_opt : exn -> Sexp.t option module Exn_converter : sig - val add_auto : ?finalise : bool -> exn -> (exn -> Sexp.t) -> unit - [@@deprecated "[since 2016-07] use Conv.Exn_converter.add"] - - val add : ?finalise : bool -> extension_constructor -> (exn -> Sexp.t) -> unit - (** [add ?finalise constructor sexp_of_exn] registers exception S-expression + (** [add constructor sexp_of_exn] registers exception S-expression converter [sexp_of_exn] for exceptions with the given [constructor]. - NOTE: If [finalise] is [true], then the exception will be automatically - registered for removal with the GC (default). Finalisation will not work - with exceptions that have been allocated outside the heap, which is the - case for some standard ones e.g. [Sys_error]. - - @param finalise default = [true] *) + NOTE: [finalise] is ignored, and provided only for backward compatibility. *) + val add + : ?printexc:bool + -> ?finalise:bool + -> extension_constructor + -> (exn -> Sexp.t) + -> unit module For_unit_tests_only : sig val size : unit -> int @@ -279,7 +283,7 @@ module Exn_converter : sig end (**/**) + (*_ For the syntax extension *) external ignore : _ -> unit = "%ignore" external ( = ) : 'a -> 'a -> bool = "%equal" - diff --git a/src/sexp_conv_error.ml b/src/sexp_conv_error.ml index c41a375..6272673 100644 --- a/src/sexp_conv_error.ml +++ b/src/sexp_conv_error.ml @@ -11,117 +11,130 @@ exception Of_sexp_error = Of_sexp_error let tuple_of_size_n_expected loc n sexp = of_sexp_error (sprintf "%s_of_sexp: tuple of size %d expected" loc n) sexp - +;; (* Errors concerning sum types *) let stag_no_args loc sexp = - of_sexp_error (loc ^ "_of_sexp: sum tag does not take arguments") sexp + of_sexp_error (loc ^ "_of_sexp: this constructor does not take arguments") sexp +;; let stag_incorrect_n_args loc tag sexp = - let msg = - sprintf "%s_of_sexp: sum tag %S has incorrect number of arguments" loc tag - in + let msg = sprintf "%s_of_sexp: sum tag %S has incorrect number of arguments" loc tag in of_sexp_error msg sexp +;; let stag_takes_args loc sexp = - of_sexp_error (loc ^ "_of_sexp: sum tag must be a structured value") sexp + of_sexp_error (loc ^ "_of_sexp: this constructor requires arguments") sexp +;; let nested_list_invalid_sum loc sexp = - of_sexp_error (loc ^ "_of_sexp: a nested list is an invalid sum") sexp + of_sexp_error (loc ^ "_of_sexp: expected a variant type, saw a nested list") sexp +;; let empty_list_invalid_sum loc sexp = - of_sexp_error (loc ^ "_of_sexp: the empty list is an invalid sum") sexp + of_sexp_error (loc ^ "_of_sexp: expected a variant type, saw an empty list") sexp +;; let unexpected_stag loc sexp = - of_sexp_error (loc ^ "_of_sexp: unexpected sum tag") sexp - + of_sexp_error (loc ^ "_of_sexp: unexpected variant constructor") sexp +;; (* Errors concerning records *) let record_sexp_bool_with_payload loc sexp = let msg = - loc ^ - "_of_sexp: record conversion: a [sexp.bool] field was given a payload." in + loc ^ "_of_sexp: record conversion: a [sexp.bool] field was given a payload." + in of_sexp_error msg sexp +;; let record_only_pairs_expected loc sexp = let msg = - loc ^ - "_of_sexp: record conversion: only pairs expected, \ - their first element must be an atom" in + loc + ^ "_of_sexp: record conversion: only pairs expected, their first element must be an \ + atom" + in of_sexp_error msg sexp +;; let record_superfluous_fields ~what ~loc rev_fld_names sexp = let fld_names_str = String.concat (List.rev rev_fld_names) ~sep:" " in let msg = sprintf "%s_of_sexp: %s: %s" loc what fld_names_str in of_sexp_error msg sexp +;; let record_duplicate_fields loc rev_fld_names sexp = record_superfluous_fields ~what:"duplicate fields" ~loc rev_fld_names sexp +;; let record_extra_fields loc rev_fld_names sexp = record_superfluous_fields ~what:"extra fields" ~loc rev_fld_names sexp +;; let rec record_get_undefined_loop fields = function | [] -> String.concat (List.rev fields) ~sep:" " | (true, field) :: rest -> record_get_undefined_loop (field :: fields) rest | _ :: rest -> record_get_undefined_loop fields rest +;; let record_undefined_elements loc sexp lst = let undefined = record_get_undefined_loop [] lst in let msg = - sprintf "%s_of_sexp: the following record elements were undefined: %s" - loc undefined + sprintf "%s_of_sexp: the following record elements were undefined: %s" loc undefined in of_sexp_error msg sexp +;; let record_list_instead_atom loc sexp = let msg = loc ^ "_of_sexp: list instead of atom for record expected" in of_sexp_error msg sexp +;; let record_poly_field_value loc sexp = let msg = - loc ^ - "_of_sexp: cannot convert values of types resulting from polymorphic \ - record fields" + loc + ^ "_of_sexp: cannot convert values of types resulting from polymorphic record fields" in of_sexp_error msg sexp - +;; (* Errors concerning polymorphic variants *) exception No_variant_match -let no_variant_match () = - raise No_variant_match +let no_variant_match () = raise No_variant_match let no_matching_variant_found loc sexp = of_sexp_error (loc ^ "_of_sexp: no matching variant found") sexp +;; let ptag_no_args loc sexp = - of_sexp_error ( - loc ^ "_of_sexp: polymorphic variant does not take arguments") sexp + of_sexp_error (loc ^ "_of_sexp: polymorphic variant does not take arguments") sexp +;; let ptag_incorrect_n_args loc cnstr sexp = let msg = sprintf "%s_of_sexp: polymorphic variant tag %S has incorrect number of arguments" - loc cnstr + loc + cnstr in of_sexp_error msg sexp +;; let ptag_takes_args loc sexp = - of_sexp_error (loc ^ "_of_sexp: polymorphic variant tag takes an argument") - sexp + of_sexp_error (loc ^ "_of_sexp: polymorphic variant tag takes an argument") sexp +;; let nested_list_invalid_poly_var loc sexp = - of_sexp_error ( - loc ^ "_of_sexp: a nested list is an invalid polymorphic variant") sexp + of_sexp_error (loc ^ "_of_sexp: a nested list is an invalid polymorphic variant") sexp +;; let empty_list_invalid_poly_var loc sexp = - of_sexp_error ( - loc ^ "_of_sexp: the empty list is an invalid polymorphic variant") sexp + of_sexp_error (loc ^ "_of_sexp: the empty list is an invalid polymorphic variant") sexp +;; let empty_type loc sexp = of_sexp_error (loc ^ "_of_sexp: trying to convert an empty type") sexp +;; diff --git a/src/sexp_conv_grammar.ml b/src/sexp_conv_grammar.ml new file mode 100644 index 0000000..f8fd7ef --- /dev/null +++ b/src/sexp_conv_grammar.ml @@ -0,0 +1,29 @@ +let unit_sexp_grammar : unit Sexp_grammar.t = { untyped = List Empty } +let bool_sexp_grammar : bool Sexp_grammar.t = { untyped = Bool } +let string_sexp_grammar : string Sexp_grammar.t = { untyped = String } +let bytes_sexp_grammar : bytes Sexp_grammar.t = { untyped = String } +let char_sexp_grammar : char Sexp_grammar.t = { untyped = Char } +let int_sexp_grammar : int Sexp_grammar.t = { untyped = Integer } +let float_sexp_grammar : float Sexp_grammar.t = { untyped = Float } +let int32_sexp_grammar : int32 Sexp_grammar.t = { untyped = Integer } +let int64_sexp_grammar : int64 Sexp_grammar.t = { untyped = Integer } +let nativeint_sexp_grammar : nativeint Sexp_grammar.t = { untyped = Integer } +let sexp_t_sexp_grammar : Sexp.t Sexp_grammar.t = { untyped = Any "Sexp.t" } +let ref_sexp_grammar grammar = Sexp_grammar.coerce grammar +let lazy_t_sexp_grammar grammar = Sexp_grammar.coerce grammar + +let option_sexp_grammar ({ untyped } : _ Sexp_grammar.t) : _ option Sexp_grammar.t = + { untyped = Option untyped } +;; + +let list_sexp_grammar ({ untyped } : _ Sexp_grammar.t) : _ list Sexp_grammar.t = + { untyped = List (Many untyped) } +;; + +let array_sexp_grammar ({ untyped } : _ Sexp_grammar.t) : _ array Sexp_grammar.t = + { untyped = List (Many untyped) } +;; + +let empty_sexp_grammar : _ Sexp_grammar.t = { untyped = Union [] } +let opaque_sexp_grammar = empty_sexp_grammar +let fun_sexp_grammar = empty_sexp_grammar diff --git a/src/sexp_conv_grammar.mli b/src/sexp_conv_grammar.mli new file mode 100644 index 0000000..eb2fcca --- /dev/null +++ b/src/sexp_conv_grammar.mli @@ -0,0 +1,20 @@ +(** Sexp grammar definitions. *) + +val unit_sexp_grammar : unit Sexp_grammar.t +val bool_sexp_grammar : bool Sexp_grammar.t +val string_sexp_grammar : string Sexp_grammar.t +val bytes_sexp_grammar : bytes Sexp_grammar.t +val char_sexp_grammar : char Sexp_grammar.t +val int_sexp_grammar : int Sexp_grammar.t +val float_sexp_grammar : float Sexp_grammar.t +val int32_sexp_grammar : int32 Sexp_grammar.t +val int64_sexp_grammar : int64 Sexp_grammar.t +val nativeint_sexp_grammar : nativeint Sexp_grammar.t +val sexp_t_sexp_grammar : Sexp.t Sexp_grammar.t +val ref_sexp_grammar : 'a Sexp_grammar.t -> 'a ref Sexp_grammar.t +val lazy_t_sexp_grammar : 'a Sexp_grammar.t -> 'a lazy_t Sexp_grammar.t +val option_sexp_grammar : 'a Sexp_grammar.t -> 'a option Sexp_grammar.t +val list_sexp_grammar : 'a Sexp_grammar.t -> 'a list Sexp_grammar.t +val array_sexp_grammar : 'a Sexp_grammar.t -> 'a array Sexp_grammar.t +val opaque_sexp_grammar : 'a Sexp_grammar.t +val fun_sexp_grammar : 'a Sexp_grammar.t diff --git a/src/sexp_grammar.ml b/src/sexp_grammar.ml new file mode 100644 index 0000000..00bfdca --- /dev/null +++ b/src/sexp_grammar.ml @@ -0,0 +1,191 @@ +(** Representation of S-expression grammars *) + +(** This module defines a representation for s-expression grammars. Using ppx_sexp_conv + and [[@@deriving sexp_grammar]] produces a grammar that is compatible with the derived + [of_sexp] for a given type. + + As with other derived definitions, polymorphic types derive a function that takes a + grammar for each type argument and produces a grammar for the monomorphized type. + + Monomorphic types derive a grammar directly. To avoid top-level side effects, + [[@@deriving sexp_grammar]] wraps grammars in the [Lazy] constructor as needed. + + This type may change over time as our needs for expressive grammars change. We will + attempt to make changes backward-compatible, or at least provide a reasonable upgrade + path. *) + +[@@@warning "-30"] (* allow duplicate field names *) + +(** Grammar of a sexp. *) +type grammar = + | Any of string (** accepts any sexp; string is a type name for human readability *) + | Bool (** accepts the atoms "true" or "false", modulo capitalization *) + | Char (** accepts any single-character atom *) + | Integer (** accepts any atom matching ocaml integer syntax, regardless of bit width *) + | Float (** accepts any atom matching ocaml float syntax *) + | String (** accepts any atom *) + | Option of grammar (** accepts an option, both [None] vs [Some _] and [()] vs [(_)]. *) + | List of list_grammar (** accepts a list *) + | Variant of variant (** accepts clauses keyed by a leading or sole atom *) + | Union of grammar list (** accepts a sexp if any of the listed grammars accepts it *) + | Tagged of grammar with_tag + (** annotates a grammar with a client-specific key/value pair *) + | Tyvar of string + (** Name of a type variable, e.g. [Tyvar "a"] for ['a]. Only meaningful when the body of + the innermost enclosing [defn] defines a corresponding type variable. *) + | Tycon of string * grammar list + (** Type constructor applied to arguments. For example, [Tycon ("list", [ Integer ])] + represents [int list]. Only meaningful when the innermost enclosing [Recursive] + grammar defines a corresponding type constructor. *) + | Recursive of grammar * defn list + (** [Recursive (grammar, definitions)] allows [grammar] to refer to type constructors + from the mutually recursive [definitions]. The definitions may also refer to each + others' type constructors. + + Ordinarily, [grammar] itself is just a [Tycon] argument, although technically it can + be any grammar. + + For example, the following definitions define a binary tree parameterized by a type + stored at its leaves. + + {[ + let defns = + [ { tycon = "tree" + ; tyvars = ["a"] + ; grammar = + Variant + { name_kind = Capitalized + ; clauses = + [ { name = "Node" + ; args = Cons (Tycon ("node", [Tyvar "a"]), Empty) + } + ; { name = "Tree" + ; args = Cons (Tycon ("leaf", [Tyvar "a"]), Empty) + } + ] + } + } + ; { tycon = "node" + ; tyvars = ["a"] + ; grammar = List (Many (Tycon "tree", [Tyvar "a"])) + } + ; { tycon = "leaf" + ; tyvars = ["a"] + ; grammar = [Tyvar "a"] + } + ] + ;; + ]} + + Normally, the type of a tree storing integers would be written like this: + + {[ + Recursive (Tycon ("tree", [ Integer ]), defns) + ]} + + It is equivalent, though needlessly verbose, to replace the [Tycon] reference with + the grammar of ["tree"], substituting [Integer] for [Tyvar "a"]: + + {[ + Recursive + ( Variant + { name_kind = Capitalized + ; clauses = + [ { name = "Node" + ; args = Cons (Tycon ("node", [Tyvar "a"]), Empty) + } + ; { name = "Tree" + ; args = Cons (Tycon ("leaf", [Tyvar "a"]), Empty) + } + ] + } + , defns ) + ]} + *) + | Lazy of grammar lazy_t + (** Lazily computed grammar. Use [Lazy] to avoid top-level side effects. To define + recursive grammars, use [Recursive] instead. *) + +(** Grammar of a list of sexps. *) +and list_grammar = + | Empty (** accepts an empty list of sexps *) + | Cons of grammar * list_grammar + (** accepts a non-empty list with head and tail matching the given grammars *) + | Many of grammar (** accepts zero or more sexps, each matching the given grammar *) + | Fields of record (** accepts sexps representing fields of a record *) + +(** Case sensitivity options for names of variant constructors. *) +and case_sensitivity = + | Case_insensitive (** Comparison is case insensitive. Used for custom parsers. *) + | Case_sensitive (** Comparison is case sensitive. Used for polymorphic variants. *) + | Case_sensitive_except_first_character + (** Comparison is case insensitive for the first character and case sensitive afterward. + Used for regular variants. *) + +(** Grammar of variants. Accepts any sexp matching one of the clauses. *) +and variant = + { case_sensitivity : case_sensitivity + ; clauses : clause with_tag_list list + } + +(** Grammar of a single variant clause. Accepts sexps based on the [clause_kind]. *) +and clause = + { name : string + ; clause_kind : clause_kind + } + +(** Grammar of a single variant clause's contents. [Atom_clause] accepts an atom matching + the clause's name. [List_clause] accepts a list whose head is an atom matching the + clause's name and whose tail matches [args]. The clause's name is matched modulo the + variant's [name_kind]. *) +and clause_kind = + | Atom_clause + | List_clause of { args : list_grammar } + +(** Grammar of a record. Accepts any list of sexps specifying each of the fields, + regardless of order. If [allow_extra_fields] is specified, ignores sexps with names + not found in [fields]. *) +and record = + { allow_extra_fields : bool + ; fields : field with_tag_list list + } + +(** Grammar of a record field. A field must show up exactly once in a record if + [required], or at most once otherwise. Accepts a list headed by [name] as an atom, + followed by sexps matching [args]. *) +and field = + { name : string + ; required : bool + ; args : list_grammar + } + +(** Grammar tagged with client-specific key/value pair. *) +and 'a with_tag = + { key : string + ; value : Sexp.t + ; grammar : 'a + } + +and 'a with_tag_list = + | Tag of 'a with_tag_list with_tag + | No_tag of 'a + +(** Grammar of a recursive type definition. Names the [tycon] being defined, and the + [tyvars] it takes as parameters. Specifies the [grammar] of the [tycon]. The grammar + may refer to any of the [tyvars], and to any of the [tycon]s from the same set of + [Recursive] definitions. *) +and defn = + { tycon : string + ; tyvars : string list + ; grammar : grammar + } + +(** Top-level grammar type. Has a phantom type parameter to associate each grammar with + the type its sexps represent. This makes it harder to apply grammars to the wrong + type, while grammars can still be easily coerced to a new type if needed. *) +type _ t = { untyped : grammar } [@@unboxed] + +let coerce (type a b) ({ untyped = _ } as t : a t) : b t = t + +(** This reserved key is used for all tags generated from doc comments. *) +let doc_comment_tag = "sexp_grammar.doc_comment" diff --git a/src/sexpable.ml b/src/sexpable.ml index 4049fa2..4d62f4f 100644 --- a/src/sexpable.ml +++ b/src/sexpable.ml @@ -23,10 +23,16 @@ module type S3 = sig type ('a, 'b, 'c) t val t_of_sexp - : (Sexp.t -> 'a) -> (Sexp.t -> 'b) -> (Sexp.t -> 'c) -> Sexp.t + : (Sexp.t -> 'a) + -> (Sexp.t -> 'b) + -> (Sexp.t -> 'c) + -> Sexp.t -> ('a, 'b, 'c) t val sexp_of_t - : ('a -> Sexp.t) -> ('b -> Sexp.t) -> ('c -> Sexp.t) -> ('a, 'b, 'c) t + : ('a -> Sexp.t) + -> ('b -> Sexp.t) + -> ('c -> Sexp.t) + -> ('a, 'b, 'c) t -> Sexp.t end diff --git a/src/sexplib0.ml b/src/sexplib0.ml index 48c7527..b13037a 100644 --- a/src/sexplib0.ml +++ b/src/sexplib0.ml @@ -1,9 +1,5 @@ module Sexp = Sexp module Sexp_conv = Sexp_conv module Sexp_conv_error = Sexp_conv_error +module Sexp_grammar = Sexp_grammar module Sexpable = Sexpable - -module Private = struct - module Lazy_group_id = Lazy_group_id - module Raw_grammar = Raw_grammar -end |