From 1218de7fc39977167f5e06ed6c1741cc87d8885a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?St=C3=A9phane=20Glondu?= Date: Wed, 3 Aug 2016 13:29:53 +0200 Subject: Import biniou_1.0.12.orig.tar.gz [dgit import orig biniou_1.0.12.orig.tar.gz] --- .gitignore | 14 + Changes | 27 ++ INSTALL | 30 ++ LICENSE | 24 ++ META.in | 7 + Makefile | 175 ++++++++++ README.md | 21 ++ bdump.ml | 74 +++++ bi_dump.ml | 93 ++++++ bi_inbuf.ml | 106 ++++++ bi_inbuf.mli | 108 +++++++ bi_io.ml | 950 ++++++++++++++++++++++++++++++++++++++++++++++++++++++ bi_io.mli | 195 +++++++++++ bi_outbuf.ml | 121 +++++++ bi_outbuf.mli | 121 +++++++ bi_share.ml | 55 ++++ bi_share.mli | 38 +++ bi_stream.ml | 148 +++++++++ bi_stream.mli | 38 +++ bi_util.ml | 45 +++ bi_util.mli | 15 + bi_vint.ml | 171 ++++++++++ bi_vint.mli | 69 ++++ biniou-format.txt | 213 ++++++++++++ test_biniou.ml | 229 +++++++++++++ 25 files changed, 3087 insertions(+) create mode 100644 .gitignore create mode 100644 Changes create mode 100644 INSTALL create mode 100644 LICENSE create mode 100644 META.in create mode 100644 Makefile create mode 100644 README.md create mode 100644 bdump.ml create mode 100644 bi_dump.ml create mode 100644 bi_inbuf.ml create mode 100644 bi_inbuf.mli create mode 100644 bi_io.ml create mode 100644 bi_io.mli create mode 100644 bi_outbuf.ml create mode 100644 bi_outbuf.mli create mode 100644 bi_share.ml create mode 100644 bi_share.mli create mode 100644 bi_stream.ml create mode 100644 bi_stream.mli create mode 100644 bi_util.ml create mode 100644 bi_util.mli create mode 100644 bi_vint.ml create mode 100644 bi_vint.mli create mode 100644 biniou-format.txt create mode 100644 test_biniou.ml diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..76fa4f8 --- /dev/null +++ b/.gitignore @@ -0,0 +1,14 @@ +*~ +*.cm[ioxat] +*.[oa] +*.cmx[as] +*.cmti +*.annot +META +bdump +test-stream.dat +test2.bin +*.byte +*.native +*.exe +test_biniou diff --git a/Changes b/Changes new file mode 100644 index 0000000..60d9830 --- /dev/null +++ b/Changes @@ -0,0 +1,27 @@ +Releases of biniou +================== + +!!! = some incompatibilities +opt = optimizations ++ui = additions in the user interface +-ui = restrictions in the user interface +bug = bug or security fix +doc = major changes in the documentation +pkg = changes in the structure of the package or in the installation procedure + +2012-03-19 1.0.2: [+ui] support for flushing output to abstract OO channels + +2012-01-03 1.0.1: [+ui] new experimental array streaming utility + [+ui] compiling with -g, allowing stack trace recording + [doc] fixed description of the format for string encoding + +2010-12-04 1.0.0: [+ui] added system of references for sharing values + [!!!] new variant `Shared + [!!!] new fields in input and output buffers + [+ui] bdump: option -h to specify alternate dictionary + [pkg] standalone reference for the biniou format + +2010-09-13 0.9.1: [bug] fixed Bi_inbuf.from_channel + [pkg] added INSTALL file + +2010-08-04 0.9.0: first release diff --git a/INSTALL b/INSTALL new file mode 100644 index 0000000..15345ad --- /dev/null +++ b/INSTALL @@ -0,0 +1,30 @@ + Installation instructions for biniou + + +Requirements: + +- Objective Caml (>= 3.11 is fine, earlier versions are probably fine too) +- GNU make +- Findlib (`ocamlfind' command) +- easy-format + + +GODI makes the installation process straightforward, +although other package managers can be equally convenient. + + +Manual installation is done using: + +make # or `make all' for the bytecode-only version + +make install # or `make BINDIR=/foo/bin install' for installing executables + # in a place other than the guessed default. + + +Uninstallation: + +make uninstall + + + +Bugs and feedback should be sent to Martin Jambon . diff --git a/LICENSE b/LICENSE new file mode 100644 index 0000000..152fad5 --- /dev/null +++ b/LICENSE @@ -0,0 +1,24 @@ +Copyright (c) 2010 Martin Jambon +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: +1. Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. +2. Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. +3. The name of the author may not be used to endorse or promote products + derived from this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR +IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES +OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. +IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, +INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT +NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF +THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. diff --git a/META.in b/META.in new file mode 100644 index 0000000..4a5788d --- /dev/null +++ b/META.in @@ -0,0 +1,7 @@ +name = "biniou" +version = "@@VERSION@@" +description = "Extensible binary serialization format" +requires = "easy-format" +archive(byte) = "biniou.cma" +archive(native) = "biniou.cmxa" +archive(native,plugin) = "biniou.cmxs" diff --git a/Makefile b/Makefile new file mode 100644 index 0000000..a398630 --- /dev/null +++ b/Makefile @@ -0,0 +1,175 @@ +VERSION = 1.0.12 + +FLAGS = -g -annot -bin-annot +PACKS = easy-format + +ifeq "$(shell ocamlfind ocamlc -config |grep os_type)" "os_type: Win32" +EXE=.exe +else +EXE= +endif + +BEST = $(shell \ + if ocamlfind ocamlopt 2>/dev/null; then \ + echo .native; \ + else \ + echo .byte; \ + fi \ +) + +NATDYNLINK = $(shell \ + if [ -f `ocamlfind ocamlc -where`/dynlink.cmxa ]; then \ + echo YES; \ + else \ + echo NO; \ + fi \ +) + +ifeq "${NATDYNLINK}" "YES" +CMXS=biniou.cmxs +endif + +.PHONY: default all byte opt install doc test +default: all test_biniou$(EXE) + +ifeq "$(BEST)" ".native" +all: byte opt doc META +else +all: byte doc META +endif + +byte: biniou.cma bdump.byte +opt: biniou.cmxa $(CMXS) bdump.native + +test: test_biniou$(EXE) + ./$< + +ifndef PREFIX + PREFIX = $(shell dirname $$(dirname $$(which ocamlfind))) + export PREFIX +endif + +ifndef BINDIR + BINDIR = $(PREFIX)/bin + export BINDIR +endif + +META: META.in Makefile + sed -e 's:@@VERSION@@:$(VERSION):' META.in > META + +SOURCES = bi_util.mli bi_util.ml \ + bi_share.mli bi_share.ml \ + bi_outbuf.mli bi_outbuf.ml bi_inbuf.mli bi_inbuf.ml \ + bi_vint.mli bi_vint.ml bi_io.mli bi_io.ml \ + bi_dump.ml bi_stream.mli bi_stream.ml + +MLI = $(filter %.mli, $(SOURCES)) +ML = $(filter %.ml, $(SOURCES)) +CMI = $(MLI:.mli=.cmi) +CMT = $(MLI:.mli=.cmt) +ANNOT = $(MLI:.mli=.annot) +CMO = $(ML:.ml=.cmo) +CMX = $(ML:.ml=.cmx) +O = $(ML:.ml=.o) + +biniou.cma: $(SOURCES) Makefile + ocamlfind ocamlc -a $(FLAGS) -o biniou.cma \ + -package "$(PACKS)" $(SOURCES) + +biniou.cmxa: $(SOURCES) Makefile + ocamlfind ocamlopt -a $(FLAGS) \ + -o biniou.cmxa -package "$(PACKS)" $(SOURCES) + +biniou.cmxs: biniou.cmxa + ocamlfind ocamlopt -shared -linkall -I . -o $@ $^ + +bdump.byte: biniou.cma bdump.ml + ocamlfind ocamlc -o $@ $(FLAGS) \ + -package $(PACKS) -linkpkg $^ + +bdump.native: biniou.cmxa bdump.ml + ocamlfind ocamlopt -o $@ $(FLAGS) \ + -package $(PACKS) -linkpkg $^ + +test_biniou.byte: biniou.cma test_biniou.ml + ocamlfind ocamlc -o $@ $(FLAGS) \ + -package "$(PACKS) unix" -linkpkg $^ + +test_biniou.native: biniou.cmxa test_biniou.ml + ocamlfind ocamlopt -o $@ $(FLAGS) \ + -package "$(PACKS) unix" -linkpkg $^ + +%$(EXE): %$(BEST) + cp $< $@ + +doc: doc/index.html +doc/index.html: $(MLI) + mkdir -p doc + ocamlfind ocamldoc -d doc -html -package easy-format $(MLI) + +install: META byte + if [ -f bdump.native ]; then \ + cp bdump.native $(BINDIR)/bdump$(EXE); \ + else \ + cp bdump.byte $(BINDIR)/bdump$(EXE); \ + fi + ocamlfind install biniou META \ + $(MLI) $(CMI) $(CMT) $(ANNOT) $(CMO) biniou.cma \ + -optional $(CMX) $(O) biniou.cmxa biniou.a biniou.cmxs + +uninstall: + rm -f $(BINDIR)/bdump{.exe,} + ocamlfind remove biniou + +.PHONY: clean + +clean: + rm -f *.o *.a *.cm[ioxa] *.cmxa *~ *.annot META + rm -f {bdump,test_biniou}{.exe,.byte,.native,} + rm -rf doc + rm -f test.bin test_channels.bin + +SUBDIRS = +SVNURL = svn://svn.forge.ocamlcore.org/svnroot/biniou/trunk/biniou + +.PHONY: archive +archive: + @echo "Making archive for version $(VERSION)" + @if [ -z "$$WWW" ]; then \ + echo '*** Environment variable WWW is undefined ***' >&2; \ + exit 1; \ + fi + @if [ -n "$$(svn status -q)" ]; then \ + echo "*** There are uncommitted changes, aborting. ***" >&2; \ + exit 1; \ + fi + $(MAKE) && ./bdump -help > $$WWW/bdump-help.txt + mkdir -p $$WWW/biniou-doc + $(MAKE) doc && cp doc/* $$WWW/biniou-doc/ + rm -rf /tmp/biniou /tmp/biniou-$(VERSION) && \ + cd /tmp && \ + svn co "$(SVNURL)" && \ + for x in "." $(SUBDIRS); do \ + rm -rf /tmp/biniou/$$x/.svn; \ + done && \ + cd /tmp && cp -r biniou biniou-$(VERSION) && \ + tar czf biniou.tar.gz biniou && \ + tar cjf biniou.tar.bz2 biniou && \ + tar czf biniou-$(VERSION).tar.gz biniou-$(VERSION) && \ + tar cjf biniou-$(VERSION).tar.bz2 biniou-$(VERSION) + mv /tmp/biniou.tar.gz /tmp/biniou.tar.bz2 ../releases + mv /tmp/biniou-$(VERSION).tar.gz \ + /tmp/biniou-$(VERSION).tar.bz2 ../releases + cp ../releases/biniou.tar.gz $$WWW/ + cp ../releases/biniou.tar.bz2 $$WWW/ + cp ../releases/biniou-$(VERSION).tar.gz $$WWW/ + cp ../releases/biniou-$(VERSION).tar.bz2 $$WWW/ + cd ../releases && \ + svn add biniou.tar.gz biniou.tar.bz2 \ + biniou-$(VERSION).tar.gz biniou-$(VERSION).tar.bz2 && \ + svn commit -m "biniou version $(VERSION)" + cp LICENSE $$WWW/biniou-license.txt + cp Changes $$WWW/biniou-changes.txt + cp biniou-format.txt $$WWW/biniou-format.txt + echo 'let biniou_version = "$(VERSION)"' \ + > $$WWW/biniou-version.ml diff --git a/README.md b/README.md new file mode 100644 index 0000000..193adc8 --- /dev/null +++ b/README.md @@ -0,0 +1,21 @@ +Biniou +====== + +Biniou (pronounced "be new") is a binary data format designed for speed, +safety, ease of use and backward compatibility as protocols evolve. +Biniou is vastly equivalent to JSON in terms of functionality but allows +implementations several times faster (4 times faster than +[yojson](https://github.com/mjambon/yojson)), with +25-35% space savings. + +Biniou data can be decoded into human-readable form without knowledge +of type definitions except for field and variant names which are +represented by 31-bit hashes. A program named `bdump` is provided for +routine visualization of biniou data files. + +The program [atdgen](https://mjambon.github.io/atdgen-doc/) +is used to derive OCaml-Biniou serializers and +deserializers from type definitions. + +Biniou format specification: +https://mjambon.github.io/atdgen-doc/biniou-format.txt diff --git a/bdump.ml b/bdump.ml new file mode 100644 index 0000000..2b8f1a4 --- /dev/null +++ b/bdump.ml @@ -0,0 +1,74 @@ +open Printf + +let () = + let file = ref None in + let dic = ref [] in + let dic_file = ref (Bi_dump.default_dict_path ()) in + let use_global_dictionary = ref true in + let options = [ + "-d", Arg.String (fun s -> dic := Bi_dump.load_lines !dic s), + "file + File containing words to add to the dictionary, one per line"; + + "-h", Arg.String (fun s -> dic_file := Some s), + "file + Location of the dictionary used for unhashing. + Default: $HOME/.bdump-dict on Unix, + $HOMEPATH\\_bdump-dict on Windows"; + + "-w", Arg.String (fun s -> dic := List.rev_append (Bi_dump.split s) !dic), + "word1,word2,... + Comma-separated list of words to add to the dictionary"; + + "-x", Arg.Clear use_global_dictionary, + sprintf " + Do not load nor update the dictionary used for name + unhashing."; + ] + in + let msg = sprintf "Usage: %s [file] [options]" Sys.argv.(0) in + let error () = + Arg.usage options msg in + let set_file s = + match !file with + None -> file := Some s + | Some _ -> error () + in + Arg.parse options set_file msg; + + if !use_global_dictionary then ( + let must_save = !dic <> [] in + dic := Bi_dump.load_dictionary !dic_file !dic; + if must_save then + Bi_dump.save_dictionary !dic_file !dic + ); + + let unhash = Bi_io.make_unhash !dic in + let ic = + match !file with + None -> stdin + | Some s -> open_in_bin s + in + let inbuf = Bi_inbuf.from_string (Bi_dump.load ic) in + let value_count = ref 0 in + Printexc.record_backtrace true; + (try + while true do + (try ignore (Bi_inbuf.peek inbuf) + with Bi_inbuf.End_of_input -> raise Exit); + Bi_io.print_view_of_tree (Bi_io.read_tree ~unhash inbuf); + print_newline (); + incr value_count; + done; + with + Exit -> () + | e -> + Printf.eprintf "Broken input after reading %i value%s: \ + exception %s\n" + !value_count (if !value_count > 1 then "s" else "") + (Printexc.to_string e); + Printexc.print_backtrace stderr; + flush stderr + ); + + close_in ic diff --git a/bi_dump.ml b/bi_dump.ml new file mode 100644 index 0000000..c967260 --- /dev/null +++ b/bi_dump.ml @@ -0,0 +1,93 @@ +open Printf + +(* +let split s = Str.split (Str.regexp ",") s +*) + +let split s = + let acc = ref [] in + let stop = ref (String.length s) in + for i = !stop - 1 downto 0 do + if s.[i] = ',' then ( + let start = i + 1 in + acc := String.sub s start (!stop - start) :: !acc; + stop := i + ) + done; + String.sub s 0 !stop :: !acc + + +let load_lines accu s = + let ic = open_in s in + let l = ref accu in + (try + while true do + l := input_line ic :: List.rev !l + done + with End_of_file -> + close_in ic + ); + !l + +let load ic = + let buf = Buffer.create 1000 in + try + while true do + Buffer.add_char buf (input_char ic); + done; + assert false + with End_of_file -> + Buffer.contents buf + +let ( // ) = Filename.concat + +let default_dict_path () = + try + match Sys.os_type with + "Unix" -> Some (Sys.getenv "HOME" // ".bdump-dict") + | "Win32" -> Some (Sys.getenv "HOMEPATH" // "_bdump-dict") + | "Cygwin" -> Some (Sys.getenv "HOME" // ".bdump-dict") + | _ -> None + with Not_found -> + None + +let load_dictionary dic_file accu = + match dic_file with + None -> accu + | Some fn -> + if Sys.file_exists fn then + try + load_lines accu fn + with e -> + failwith (sprintf "Cannot load dictionary from %S: %s\n%!" + fn (Printexc.to_string e)) + else + accu + +let write_uniq oc a = + if Array.length a > 0 then ( + fprintf oc "%s\n" a.(0); + ignore ( + Array.fold_left ( + fun last x -> + if last <> x then + fprintf oc "%s\n" x; + x + ) a.(0) a + ) + ) + +let save_dictionary dic_file l = + match dic_file with + None -> () + | Some fn -> + let a = Array.of_list l in + Array.sort String.compare a; + let oc = open_out fn in + let finally () = close_out_noerr oc in + try + write_uniq oc a; + finally () + with e -> + finally (); + raise e diff --git a/bi_inbuf.ml b/bi_inbuf.ml new file mode 100644 index 0000000..d30ecb0 --- /dev/null +++ b/bi_inbuf.ml @@ -0,0 +1,106 @@ +type t = { + mutable i_s : string; + mutable i_pos : int; + mutable i_len : int; + mutable i_offs : int; + mutable i_max_len : int; + i_refill : (t -> int -> unit); + i_shared : Bi_share.Rd.tbl; +} + +exception End_of_input + +let try_preread ib n = + if ib.i_len - ib.i_pos < n then ( + ib.i_refill ib n; + min (ib.i_len - ib.i_pos) n + ) + else + n + +let read ib n = + let pos = ib.i_pos in + if ib.i_len - pos >= n then ( + ib.i_pos <- pos + n; + pos + ) + else + if try_preread ib n >= n then + let pos = ib.i_pos in + ib.i_pos <- ib.i_pos + n; + pos + else + raise End_of_input + +let read_char ib = + let pos = ib.i_pos in + if ib.i_len - pos > 0 then ( + let c = String.unsafe_get ib.i_s pos in + ib.i_pos <- pos + 1; + c + ) + else + if try_preread ib 1 > 0 then + let pos = ib.i_pos in + let c = String.unsafe_get ib.i_s pos in + ib.i_pos <- pos + 1; + c + else + raise End_of_input + +let peek ib = + let pos = ib.i_pos in + if ib.i_len - pos > 0 then ( + String.unsafe_get ib.i_s pos + ) + else + if try_preread ib 1 > 0 then + String.unsafe_get ib.i_s ib.i_pos + else + raise End_of_input + +let from_string ?(pos = 0) ?(shrlen = 16) s = { + i_s = s; + i_pos = pos; + i_len = String.length s; + i_offs = -pos; + i_max_len = String.length s; + i_refill = (fun ib n -> ()); + i_shared = Bi_share.Rd.create shrlen; +} + +(* + Like Pervasives.really_input but returns the number of bytes + read instead of raising End_of_file when the end of file is reached. +*) +let rec not_really_input ic s pos len accu = + let n = input ic s pos len in + if n < len && n > 0 then + not_really_input ic s (pos + n) (len - n) (accu + n) + else + accu + n + +let refill_from_channel ic ib n = + if n > ib.i_max_len then + invalid_arg "Bi_inbuf.refill_from_channel" + else ( + let rem_len = ib.i_len - ib.i_pos in + if rem_len < n then + let s = ib.i_s in + String.blit s ib.i_pos s 0 rem_len; + let to_read = n - rem_len in + let really_read = not_really_input ic s rem_len to_read 0 in + ib.i_offs <- ib.i_offs + ib.i_pos; + ib.i_pos <- 0; + ib.i_len <- rem_len + really_read + ) + +let from_channel ?(len = 4096) ?(shrlen = 16) ic = { + i_s = String.create len; + i_pos = 0; + i_len = 0; + i_offs = 0; + i_max_len = len; + i_refill = refill_from_channel ic; + i_shared = Bi_share.Rd.create shrlen; +} diff --git a/bi_inbuf.mli b/bi_inbuf.mli new file mode 100644 index 0000000..2998412 --- /dev/null +++ b/bi_inbuf.mli @@ -0,0 +1,108 @@ +(** Input buffer *) + +type t = { + mutable i_s : string; + (** This is the buffer string. + It can be accessed for reading but should normally only + be written to or replaced only by the [i_refill] function. + *) + + mutable i_pos : int; + (** This is the current position in the input buffer. + All data before that may be erased at anytime. *) + + mutable i_len : int; + (** + This is the position of the first byte of invalid input data. + Data starting at [i_pos] and ending at [i_len-1] is considered + valid input data that is available to the user. + Beware that calls to [try_preread], [read] and other read functions + may move data around and therefore modify the values + of [i_pos] and [i_len] in order to keep pointing to the + correct data segment. + *) + + mutable i_offs : int; + (** Length of data read and discarded from the buffer. + This indicates the position in the input stream of + the first byte of the buffer. + The current position in the input stream is [i_offs + i_pos]. + The total length of input stream data put into the buffer is + [i_offs + i_len]. + *) + + mutable i_max_len : int; + (** This is the length of [i_s]. *) + + i_refill : t -> int -> unit; + (** + Function called when not enough data is available in the buffer. + The int argument is the total number of bytes that must be + available starting at position [i_pos] when the function returns. + This function typically does nothing if all input data already has been + placed into the buffer. + The [i_pos] and [i_len] fields can be modified the [i_refill] function, + as long as the available data that was starting from [i_pos] + still starts from the new value of [i_pos]. + All the other fields can be modified as well. + *) + + i_shared : Bi_share.Rd.tbl; + (** + Hash table used to map positions in the input stream to + shared objects (if any). + *) +} + +exception End_of_input + (** + Exception raised by all the functions of this module + when it is not possible to return a valid result + because there is not enough data to read from the buffer. + *) + +val try_preread : t -> int -> int + (** + [try_preread ib n] make at least [n] bytes available for reading + in [ib.i_s], unless the end of the input is reached. + The result indicates how many bytes were made available. If smaller than + [n], the result indicates that the end of the input was reached. + [ib.i_pos] is set to point to the first available byte. + *) + +val read : t -> int -> int + (** + [read ib n] makes at least [n] bytes available for reading or raises + the [End_of_input] exception. + The result is the position of the first available byte. + [ib.i_pos] is moved to point to the next position after the [n] bytes. + @raise End_of_input if there is less than [n] bytes + before the end of input. + *) + +val read_char : t -> char + (** + Read just one byte. + @raise End_of_input if the end of input has already been reached. + *) + +val peek : t -> char + (** + Return the next byte without moving forward. + @raise End_of_input if the end of input has already been reached. + *) + +val from_string : ?pos:int -> ?shrlen:int -> string -> t + (** + Create an input buffer from a string. + @param pos position to start from. Default: 0. + @param shrlen initial length of the table used to store shared values. + *) + +val from_channel : ?len:int -> ?shrlen:int -> in_channel -> t + (** + Create an input buffer from an in_channel. + Such a buffer is not extensible and [read] requests may not exceed [len]. + @param len buffer length. + @param shrlen initial length of the table used to store shared values. + *) diff --git a/bi_io.ml b/bi_io.ml new file mode 100644 index 0000000..98e6c6d --- /dev/null +++ b/bi_io.ml @@ -0,0 +1,950 @@ +open Printf + +open Bi_outbuf +open Bi_inbuf + +type node_tag = int + +let bool_tag = 0 +let int8_tag = 1 +let int16_tag = 2 +let int32_tag = 3 +let int64_tag = 4 +let float32_tag = 11 +let float64_tag = 12 +let uvint_tag = 16 +let svint_tag = 17 +let string_tag = 18 +let array_tag = 19 +let tuple_tag = 20 +let record_tag = 21 +let num_variant_tag = 22 +let variant_tag = 23 +let unit_tag = 24 +let table_tag = 25 +let shared_tag = 26 + +type hash = int + +(* + Data tree, for testing purposes. +*) +type tree = + [ `Unit + | `Bool of bool + | `Int8 of char + | `Int16 of int + | `Int32 of Int32.t + | `Int64 of Int64.t + | `Float32 of float + | `Float64 of float + | `Uvint of int + | `Svint of int + | `String of string + | `Array of (node_tag * tree array) option + | `Tuple of tree array + | `Record of (string option * hash * tree) array + | `Num_variant of (int * tree option) + | `Variant of (string option * hash * tree option) + | `Table of + ((string option * hash * node_tag) array * tree array array) option + | `Shared of tree ] + +(* extend sign bit *) +let make_signed x = + if x > 0x3FFFFFFF then x - (1 lsl 31) else x + +(* + Same function as the one used for OCaml variants and object methods. +*) +let hash_name s = + let accu = ref 0 in + for i = 0 to String.length s - 1 do + accu := 223 * !accu + Char.code s.[i] + done; + (* reduce to 31 bits *) + accu := !accu land (1 lsl 31 - 1); + (* make it signed for 64 bits architectures *) + make_signed !accu + + +(* + Structure of a hashtag: 4 bytes, + + argbit 7bits 8bits 8bits 8bits + +---------------------+ + 31-bit hash + + argbit = 1 iff hashtag is followed by an argument, this is always 1 for + record fields. + +*) +let write_hashtag ob h has_arg = + let h = h land 0x7fffffff in + let pos = Bi_outbuf.alloc ob 4 in + let s = ob.o_s in + String.unsafe_set s (pos+3) (Char.chr (h land 0xff)); + let h = h lsr 8 in + String.unsafe_set s (pos+2) (Char.chr (h land 0xff)); + let h = h lsr 8 in + String.unsafe_set s (pos+1) (Char.chr (h land 0xff)); + let h = h lsr 8 in + String.unsafe_set s pos ( + Char.chr ( + if has_arg then h lor 0x80 + else h + ) + ) + +let string_of_hashtag h has_arg = + let ob = Bi_outbuf.create 4 in + write_hashtag ob h has_arg; + Bi_outbuf.contents ob + +let read_hashtag ib cont = + let i = Bi_inbuf.read ib 4 in + let s = ib.i_s in + let x0 = Char.code s.[i] in + let has_arg = x0 >= 0x80 in + let x1 = (x0 land 0x7f) lsl 24 in + let x2 = (Char.code s.[i+1]) lsl 16 in + let x3 = (Char.code s.[i+2]) lsl 8 in + let x4 = Char.code s.[i+3] in + let h = make_signed (x1 lor x2 lor x3 lor x4) in + + cont ib h has_arg + + +let read_field_hashtag ib = + let i = Bi_inbuf.read ib 4 in + let s = ib.i_s in + let x0 = Char.code (String.unsafe_get s i) in + if x0 < 0x80 then + Bi_util.error "Corrupted data (invalid field hashtag)"; + let x1 = (x0 land 0x7f) lsl 24 in + let x2 = (Char.code (String.unsafe_get s (i+1))) lsl 16 in + let x3 = (Char.code (String.unsafe_get s (i+2))) lsl 8 in + let x4 = Char.code (String.unsafe_get s (i+3)) in + make_signed (x1 lor x2 lor x3 lor x4) + + +type int7 = int + +let write_numtag ob i has_arg = + if i < 0 || i > 0x7f then + Bi_util.error "Corrupted data (invalid numtag)"; + let x = + if has_arg then i lor 0x80 + else i + in + Bi_outbuf.add_char ob (Char.chr x) + +let read_numtag ib cont = + let i = Bi_inbuf.read ib 1 in + let x = Char.code ib.i_s.[i] in + let has_arg = x >= 0x80 in + cont ib (x land 0x7f) has_arg + +let make_unhash l = + let tbl = Hashtbl.create (4 * List.length l) in + List.iter ( + fun s -> + let h = hash_name s in + try + match Hashtbl.find tbl h with + Some s' -> + if s <> s' then + failwith ( + sprintf + "Bi_io.make_unhash: \ + %S and %S have the same hash, please pick another name" + s s' + ) + | None -> assert false + + with Not_found -> Hashtbl.add tbl h (Some s) + ) l; + fun h -> + try Hashtbl.find tbl h + with Not_found -> None + + +let write_tag ob x = + Bi_outbuf.add_char ob (Char.chr x) + +let write_untagged_unit ob () = + Bi_outbuf.add_char ob '\x00' + +let write_untagged_bool ob x = + Bi_outbuf.add_char ob (if x then '\x01' else '\x00') + +let write_untagged_char ob x = + Bi_outbuf.add_char ob x + +let write_untagged_int8 ob x = + Bi_outbuf.add_char ob (Char.chr x) + +let write_untagged_int16 ob x = + Bi_outbuf.add_char ob (Char.chr (x lsr 8)); + Bi_outbuf.add_char ob (Char.chr (x land 0xff)) + +let write_untagged_int32 ob x = + let high = Int32.to_int (Int32.shift_right_logical x 16) in + Bi_outbuf.add_char ob (Char.chr (high lsr 8)); + Bi_outbuf.add_char ob (Char.chr (high land 0xff)); + let low = Int32.to_int x in + Bi_outbuf.add_char ob (Char.chr ((low lsr 8) land 0xff)); + Bi_outbuf.add_char ob (Char.chr (low land 0xff)) + +let write_untagged_float32 ob x = + write_untagged_int32 ob (Int32.bits_of_float x) + +let float_endianness = lazy ( + match String.unsafe_get (Obj.magic 1.0) 0 with + '\x3f' -> `Big + | '\x00' -> `Little + | _ -> assert false +) + +let read_untagged_float64 ib = + let i = Bi_inbuf.read ib 8 in + let s = ib.i_s in + let x = Obj.new_block Obj.double_tag 8 in + (match Lazy.force float_endianness with + `Little -> + for j = 0 to 7 do + String.unsafe_set (Obj.obj x) (7-j) (String.unsafe_get s (i+j)) + done + | `Big -> + for j = 0 to 7 do + String.unsafe_set (Obj.obj x) j (String.unsafe_get s (i+j)) + done + ); + (Obj.obj x : float) + +let write_untagged_float64 ob x = + let i = Bi_outbuf.alloc ob 8 in + let s = ob.o_s in + (match Lazy.force float_endianness with + `Little -> + for j = 0 to 7 do + String.unsafe_set s (i+j) (String.unsafe_get (Obj.magic x) (7-j)) + done + | `Big -> + for j = 0 to 7 do + String.unsafe_set s (i+j) (String.unsafe_get (Obj.magic x) j) + done + ) + +(* +let write_untagged_int64 ob x = + let x4 = Int64.to_int (Int64.shift_right_logical x 48) in + Bi_outbuf.add_char ob (Char.chr (x4 lsr 8)); + Bi_outbuf.add_char ob (Char.chr (x4 land 0xff)); + let x3 = Int64.to_int (Int64.shift_right_logical x 32) in + Bi_outbuf.add_char ob (Char.chr ((x3 lsr 8) land 0xff)); + Bi_outbuf.add_char ob (Char.chr (x3 land 0xff)); + let x2 = Int64.to_int (Int64.shift_right_logical x 16) in + Bi_outbuf.add_char ob (Char.chr ((x2 lsr 8) land 0xff)); + Bi_outbuf.add_char ob (Char.chr (x2 land 0xff)); + let x1 = Int64.to_int x in + Bi_outbuf.add_char ob (Char.chr ((x1 lsr 8) land 0xff)); + Bi_outbuf.add_char ob (Char.chr (x1 land 0xff)) +*) + +let write_untagged_int64 ob x = + write_untagged_float64 ob (Int64.float_of_bits x) + + +let safety_test () = + let s = "\x3f\xf0\x06\x05\x04\x03\x02\x01" in + let x = 1.00146962706651288 in + let y = read_untagged_float64 (Bi_inbuf.from_string s) in + if x <> y then + assert false; + let ob = Bi_outbuf.create 8 in + write_untagged_float64 ob x; + if Bi_outbuf.contents ob <> s then + assert false + + + +let write_untagged_string ob s = + Bi_vint.write_uvint ob (String.length s); + Bi_outbuf.add_string ob s + +let write_untagged_uvint = Bi_vint.write_uvint +let write_untagged_svint = Bi_vint.write_svint + +let write_unit ob () = + write_tag ob unit_tag; + write_untagged_unit ob () + +let write_bool ob x = + write_tag ob bool_tag; + write_untagged_bool ob x + +let write_char ob x = + write_tag ob int8_tag; + write_untagged_char ob x + +let write_int8 ob x = + write_tag ob int8_tag; + write_untagged_int8 ob x + +let write_int16 ob x = + write_tag ob int16_tag; + write_untagged_int16 ob x + +let write_int32 ob x = + write_tag ob int32_tag; + write_untagged_int32 ob x + +let write_int64 ob x = + write_tag ob int64_tag; + write_untagged_int64 ob x + +let write_float32 ob x = + write_tag ob float32_tag; + write_untagged_float32 ob x + +let write_float64 ob x = + write_tag ob float64_tag; + write_untagged_float64 ob x + +let write_string ob x = + write_tag ob string_tag; + write_untagged_string ob x + +let write_uvint ob x = + write_tag ob uvint_tag; + write_untagged_uvint ob x + +let write_svint ob x = + write_tag ob svint_tag; + write_untagged_svint ob x + + + + +let rec write_t ob tagged (x : tree) = + match x with + `Unit -> + if tagged then + write_tag ob unit_tag; + write_untagged_unit ob () + + | `Bool x -> + if tagged then + write_tag ob bool_tag; + write_untagged_bool ob x + + | `Int8 x -> + if tagged then + write_tag ob int8_tag; + write_untagged_char ob x + + | `Int16 x -> + if tagged then + write_tag ob int16_tag; + write_untagged_int16 ob x + + | `Int32 x -> + if tagged then + write_tag ob int32_tag; + write_untagged_int32 ob x + + | `Int64 x -> + if tagged then + write_tag ob int64_tag; + write_untagged_int64 ob x + + | `Float32 x -> + if tagged then + write_tag ob float32_tag; + write_untagged_float32 ob x + + | `Float64 x -> + if tagged then + write_tag ob float64_tag; + write_untagged_float64 ob x + + | `Uvint x -> + if tagged then + write_tag ob uvint_tag; + Bi_vint.write_uvint ob x + + | `Svint x -> + if tagged then + write_tag ob svint_tag; + Bi_vint.write_svint ob x + + | `String s -> + if tagged then + write_tag ob string_tag; + write_untagged_string ob s + + | `Array o -> + if tagged then + write_tag ob array_tag; + (match o with + None -> Bi_vint.write_uvint ob 0 + | Some (node_tag, a) -> + let len = Array.length a in + Bi_vint.write_uvint ob len; + if len > 0 then ( + write_tag ob node_tag; + Array.iter (write_t ob false) a + ) + ) + + | `Tuple a -> + if tagged then + write_tag ob tuple_tag; + Bi_vint.write_uvint ob (Array.length a); + Array.iter (write_t ob true) a + + | `Record a -> + if tagged then + write_tag ob record_tag; + Bi_vint.write_uvint ob (Array.length a); + Array.iter (write_field ob) a + + | `Num_variant (i, x) -> + if tagged then + write_tag ob num_variant_tag; + write_numtag ob i (x <> None); + (match x with + None -> () + | Some v -> write_t ob true v) + + | `Variant (o, h, x) -> + if tagged then + write_tag ob variant_tag; + write_hashtag ob h (x <> None); + (match x with + None -> () + | Some v -> write_t ob true v) + + | `Table o -> + if tagged then + write_tag ob table_tag; + (match o with + None -> Bi_vint.write_uvint ob 0 + | Some (fields, a) -> + let row_num = Array.length a in + Bi_vint.write_uvint ob row_num; + if row_num > 0 then + let col_num = Array.length fields in + Bi_vint.write_uvint ob col_num; + Array.iter ( + fun (name, h, tag) -> + write_hashtag ob h true; + write_tag ob tag + ) fields; + if row_num > 0 then ( + for i = 0 to row_num - 1 do + let ai = a.(i) in + if Array.length ai <> col_num then + invalid_arg "Bi_io.write_t: Malformed `Table"; + for j = 0 to col_num - 1 do + write_t ob false ai.(j) + done + done + ) + ) + + | `Shared x -> + if tagged then + write_tag ob shared_tag; + let offset = + Bi_share.Wr.put ob.o_shared + (x, Bi_share.dummy_type_id) (ob.o_offs + ob.o_len) in + Bi_vint.write_uvint ob offset; + if offset = 0 then + write_t ob true x + +and write_field ob (s, h, x) = + write_hashtag ob h true; + write_t ob true x + +let write_tree ob x = + write_t ob true x + +let string_of_tree x = + let ob = Bi_outbuf.create 1000 in + write_tree ob x; + Bi_outbuf.contents ob + +let tag_of_tree (x : tree) = + match x with + `Unit -> unit_tag + | `Bool _ -> bool_tag + | `Int8 _ -> int8_tag + | `Int16 _ -> int16_tag + | `Int32 _ -> int32_tag + | `Int64 _ -> int64_tag + | `Float32 _ -> float32_tag + | `Float64 _ -> float64_tag + | `Uvint _ -> uvint_tag + | `Svint _ -> svint_tag + | `String _ -> string_tag + | `Array _ -> array_tag + | `Tuple _ -> tuple_tag + | `Record _ -> record_tag + | `Num_variant _ -> num_variant_tag + | `Variant _ -> variant_tag + | `Table _ -> table_tag + | `Shared _ -> shared_tag + + +let read_tag ib = + Char.code (Bi_inbuf.read_char ib) + +let read_untagged_unit ib = + match Bi_inbuf.read_char ib with + '\x00' -> () + | _ -> Bi_util.error "Corrupted data (unit value)" + +let read_untagged_bool ib = + match Bi_inbuf.read_char ib with + '\x00' -> false + | '\x01' -> true + | _ -> Bi_util.error "Corrupted data (bool value)" + +let read_untagged_char ib = Bi_inbuf.read_char ib + +let read_untagged_int8 ib = + Char.code (Bi_inbuf.read_char ib) + +let read_untagged_int16 ib = + let i = Bi_inbuf.read ib 2 in + let s = ib.i_s in + ((Char.code s.[i]) lsl 8) lor (Char.code s.[i+1]) + + +let read_untagged_int32 ib = + let i = Bi_inbuf.read ib 4 in + let s = ib.i_s in + let x1 = + Int32.of_int (((Char.code s.[i ]) lsl 8) lor (Char.code s.[i+1])) in + let x2 = + Int32.of_int (((Char.code s.[i+2]) lsl 8) lor (Char.code s.[i+3])) in + Int32.logor (Int32.shift_left x1 16) x2 + +let read_untagged_float32 ib = + Int32.float_of_bits (read_untagged_int32 ib) + +(* +let read_untagged_int64 ib = + let i = Bi_inbuf.read ib 8 in + let s = ib.i_s in + let x1 = + Int64.of_int (((Char.code s.[i ]) lsl 8) lor (Char.code s.[i+1])) in + let x2 = + Int64.of_int (((Char.code s.[i+2]) lsl 8) lor (Char.code s.[i+3])) in + let x3 = + Int64.of_int (((Char.code s.[i+4]) lsl 8) lor (Char.code s.[i+5])) in + let x4 = + Int64.of_int (((Char.code s.[i+6]) lsl 8) lor (Char.code s.[i+7])) in + Int64.logor (Int64.shift_left x1 48) + (Int64.logor (Int64.shift_left x2 32) + (Int64.logor (Int64.shift_left x3 16) x4)) +*) + +let read_untagged_int64 ib = + Int64.bits_of_float (read_untagged_float64 ib) + + + +let read_untagged_string ib = + let len = Bi_vint.read_uvint ib in + let str = String.create len in + let pos = ref 0 in + let rem = ref len in + while !rem > 0 do + let bytes_read = Bi_inbuf.try_preread ib !rem in + if bytes_read = 0 then + Bi_util.error "Corrupted data (string)" + else ( + String.blit ib.i_s ib.i_pos str !pos bytes_read; + ib.i_pos <- ib.i_pos + bytes_read; + pos := !pos + bytes_read; + rem := !rem - bytes_read + ) + done; + str + +let read_untagged_uvint = Bi_vint.read_uvint +let read_untagged_svint = Bi_vint.read_svint + +let read_unit ib = read_untagged_unit ib; `Unit + +let read_bool ib = `Bool (read_untagged_bool ib) + +let read_int8 ib = `Int8 (read_untagged_char ib) + +let read_int16 ib = `Int16 (read_untagged_int16 ib) + +let read_int32 ib = `Int32 (read_untagged_int32 ib) + +let read_int64 ib = `Int64 (read_untagged_int64 ib) + +let read_float32 ib = + `Float32 (read_untagged_float32 ib) + +let read_float64 ib = + `Float64 (read_untagged_float64 ib) + +let read_uvint ib = `Uvint (read_untagged_uvint ib) +let read_svint ib = `Svint (read_untagged_svint ib) + +let read_string ib = `String (read_untagged_string ib) + +let print s = print_string s; print_newline () + +let read_tree ?(unhash = make_unhash []) ib : tree = + + let rec read_array ib = + let len = Bi_vint.read_uvint ib in + if len = 0 then `Array None + else + let tag = read_tag ib in + let read = reader_of_tag tag in + `Array (Some (tag, Array.init len (fun _ -> read ib))) + + and read_tuple ib = + let len = Bi_vint.read_uvint ib in + `Tuple (Array.init len (fun _ -> read_tree ib)) + + and read_field ib = + let h = read_field_hashtag ib in + let name = unhash h in + let x = read_tree ib in + (name, h, x) + + and read_record ib = + let len = Bi_vint.read_uvint ib in + `Record (Array.init len (fun _ -> read_field ib)) + + and read_num_variant_cont ib i has_arg = + let x = + if has_arg then + Some (read_tree ib) + else + None + in + `Num_variant (i, x) + + and read_num_variant ib = + read_numtag ib read_num_variant_cont + + and read_variant_cont ib h has_arg = + let name = unhash h in + let x = + if has_arg then + Some (read_tree ib) + else + None + in + `Variant (name, h, x) + + and read_variant ib = + read_hashtag ib read_variant_cont + + and read_table ib = + let row_num = Bi_vint.read_uvint ib in + if row_num = 0 then + `Table None + else + let col_num = Bi_vint.read_uvint ib in + let fields = + Array.init col_num ( + fun _ -> + let h = read_field_hashtag ib in + let name = unhash h in + let tag = read_tag ib in + (name, h, tag) + ) + in + let readers = + Array.map (fun (name, h, tag) -> reader_of_tag tag) fields in + let a = + Array.init row_num + (fun _ -> + Array.init col_num (fun j -> readers.(j) ib)) + in + `Table (Some (fields, a)) + + and read_shared ib = + let pos = ib.i_offs + ib.i_pos in + let offset = Bi_vint.read_uvint ib in + if offset = 0 then + let rec r = `Shared r in + Bi_share.Rd.put ib.i_shared + (pos, Bi_share.dummy_type_id) (Obj.repr r); + let x = read_tree ib in + Obj.set_field (Obj.repr r) 1 (Obj.repr x); + r + else + Obj.obj (Bi_share.Rd.get ib.i_shared + (pos - offset, Bi_share.dummy_type_id)) + + and reader_of_tag = function + 0 (* bool *) -> read_bool + | 1 (* int8 *) -> read_int8 + | 2 (* int16 *) -> read_int16 + | 3 (* int32 *) -> read_int32 + | 4 (* int64 *) -> read_int64 + | 11 (* float32 *) -> read_float32 + | 12 (* float64 *) -> read_float64 + | 16 (* uvint *) -> read_uvint + | 17 (* svint *) -> read_svint + | 18 (* string *) -> read_string + | 19 (* array *) -> read_array + | 20 (* tuple *) -> read_tuple + | 21 (* record *) -> read_record + | 22 (* num_variant *) -> read_num_variant + | 23 (* variant *) -> read_variant + | 24 (* unit *) -> read_unit + | 25 (* table *) -> read_table + | 26 (* shared *) -> read_shared + | _ -> Bi_util.error "Corrupted data (invalid tag)" + + and read_tree ib : tree = + reader_of_tag (read_tag ib) ib + + in + read_tree ib + +let tree_of_string ?unhash s = read_tree ?unhash (Bi_inbuf.from_string s) + + +let skip_bytes ib n = ignore (Bi_inbuf.read ib n) + +let skip_unit ib = skip_bytes ib 1 +let skip_bool ib = skip_bytes ib 1 +let skip_int8 ib = skip_bytes ib 1 +let skip_int16 ib = skip_bytes ib 2 +let skip_int32 ib = skip_bytes ib 4 +let skip_int64 ib = skip_bytes ib 8 +let skip_float32 ib = skip_bytes ib 4 +let skip_float64 ib = skip_bytes ib 8 +let skip_uvint ib = ignore (read_untagged_uvint ib) +let skip_svint ib = ignore (read_untagged_svint ib) + +let skip_string ib = + let len = Bi_vint.read_uvint ib in + skip_bytes ib len + +let rec skip_array ib = + let len = Bi_vint.read_uvint ib in + if len = 0 then () + else + let tag = read_tag ib in + let read = skipper_of_tag tag in + for i = 1 to len do + read ib + done + +and skip_tuple ib = + let len = Bi_vint.read_uvint ib in + for i = 1 to len do + skip ib + done + +and skip_field ib = + ignore (read_field_hashtag ib); + skip ib + +and skip_record ib = + let len = Bi_vint.read_uvint ib in + for i = 1 to len do + skip_field ib + done + +and skip_num_variant_cont ib i has_arg = + if has_arg then + skip ib + +and skip_num_variant ib = + read_numtag ib skip_num_variant_cont + +and skip_variant_cont ib h has_arg = + if has_arg then + skip ib + +and skip_variant ib = + read_hashtag ib skip_variant_cont + +and skip_table ib = + let row_num = Bi_vint.read_uvint ib in + if row_num = 0 then + () + else + let col_num = Bi_vint.read_uvint ib in + let readers = + Array.init col_num ( + fun _ -> + ignore (read_field_hashtag ib); + skipper_of_tag (read_tag ib) + ) + in + for i = 1 to row_num do + for j = 1 to col_num do + readers.(j) ib + done + done + +and skipper_of_tag = function + 0 (* bool *) -> skip_bool + | 1 (* int8 *) -> skip_int8 + | 2 (* int16 *) -> skip_int16 + | 3 (* int32 *) -> skip_int32 + | 4 (* int64 *) -> skip_int64 + | 11 (* float32 *) -> skip_float32 + | 12 (* float64 *) -> skip_float64 + | 16 (* uvint *) -> skip_uvint + | 17 (* svint *) -> skip_svint + | 18 (* string *) -> skip_string + | 19 (* array *) -> skip_array + | 20 (* tuple *) -> skip_tuple + | 21 (* record *) -> skip_record + | 22 (* num_variant *) -> skip_num_variant + | 23 (* variant *) -> skip_variant + | 24 (* unit *) -> skip_unit + | 25 (* table *) -> skip_table + | _ -> Bi_util.error "Corrupted data (invalid tag)" + +and skip ib : unit = + skipper_of_tag (read_tag ib) ib + + +(* Equivalent of Array.map that guarantees a left-to-right order *) +let array_map f a = + let len = Array.length a in + if len = 0 then [||] + else ( + let r = Array.make len (f (Array.unsafe_get a 0)) in + for i = 1 to len - 1 do + Array.unsafe_set r i (f (Array.unsafe_get a i)) + done; + r + ) + + +module Pp = +struct + open Easy_format + + let array = list + let record = list + let tuple = { list with + space_after_opening = false; + space_before_closing = false; + align_closing = false } + let variant = { list with + separators_stick_left = true } + + let map f a = Array.to_list (array_map f a) + + let rec format shared (x : tree) = + match x with + `Unit -> Atom ("unit", atom) + | `Bool x -> Atom ((if x then "true" else "false"), atom) + | `Int8 x -> Atom (sprintf "0x%02x" (Char.code x), atom) + | `Int16 x -> Atom (sprintf "0x%04x" x, atom) + | `Int32 x -> Atom (sprintf "0x%08lx" x, atom) + | `Int64 x -> Atom (sprintf "0x%016Lx" x, atom) + | `Float32 x -> Atom (string_of_float x, atom) + | `Float64 x -> Atom (string_of_float x, atom) + | `Uvint x -> Atom (string_of_int x, atom) + | `Svint x -> Atom (string_of_int x, atom) + | `String s -> Atom (sprintf "%S" s, atom) + | `Array None -> Atom ("[]", atom) + | `Array (Some (_, a)) -> + List (("[", ",", "]", array), map (format shared) a) + | `Tuple a -> List (("(", ",", ")", tuple), map (format shared) a) + | `Record a -> List (("{", ",", "}", record), map (format_field shared) a) + | `Num_variant (i, o) -> + let suffix = + if i = 0 then "" + else string_of_int i + in + (match o with + None -> Atom ("None" ^ suffix, atom) + | Some x -> + let cons = Atom ("Some" ^ suffix, atom) in + Label ((cons, label), format shared x)) + | `Variant (opt_name, h, o) -> + let name = + match opt_name with + None -> sprintf "#%08lx" (Int32.of_int h) + | Some s -> sprintf "%S" s + in + (match o with + None -> Atom ("<" ^ name ^ ">", atom) + | Some x -> + List (("<", "", ">", tuple), + [ Label ((Atom (name ^ ":", atom), label), + format shared x) ]) + ) + | `Table None -> Atom ("[]", atom) + | `Table (Some (header, aa)) -> + let record_array = + `Array ( + Some ( + record_tag, + Array.map ( + fun a -> + `Record ( + Array.mapi ( + fun i x -> + let s, h, _ = header.(i) in + (s, h, x) + ) a + ) + ) aa + ) + ) in + format shared record_array + + | `Shared x -> + let tbl, p = shared in + incr p; + let pos = !p in + let offset = Bi_share.Wr.put tbl (x, Bi_share.dummy_type_id) pos in + if offset = 0 then + Label ((Atom (sprintf "shared%i ->" pos, atom), label), + format shared x) + else + Atom (sprintf "shared%i" (pos - offset), atom) + + and format_field shared (o, h, x) = + let s = + match o with + None -> sprintf "#%08lx" (Int32.of_int h) + | Some s -> sprintf "%S" s + in + Label ((Atom (sprintf "%s:" s, atom), label), format shared x) +end + +let init () = (Bi_share.Wr.create 512, ref 0) + +let view_of_tree t = + Easy_format.Pretty.to_string (Pp.format (init ()) t) + +let print_view_of_tree t = + Easy_format.Pretty.to_stdout (Pp.format (init ()) t) + +let output_view_of_tree oc t = + Easy_format.Pretty.to_channel oc (Pp.format (init ()) t) + +let view ?unhash s = + view_of_tree (tree_of_string ?unhash s) + +let print_view ?unhash s = + print_view_of_tree (tree_of_string ?unhash s) + +let output_view ?unhash oc s = + output_view_of_tree oc (tree_of_string ?unhash s) diff --git a/bi_io.mli b/bi_io.mli new file mode 100644 index 0000000..979c9dc --- /dev/null +++ b/bi_io.mli @@ -0,0 +1,195 @@ +(** Input and output functions for the Biniou serialization format *) + +(** {1 Node tags} *) + +type node_tag = int + +val bool_tag : node_tag (** Tag indicating a bool node. *) +val int8_tag : node_tag (** Tag indicating an int8 node. *) +val int16_tag : node_tag (** Tag indicating an int16 node. *) +val int32_tag : node_tag (** Tag indicating an int32 node. *) +val int64_tag : node_tag (** Tag indicating an int64 node. *) +val float32_tag : node_tag (** Tag indicating a float32 node. *) +val float64_tag : node_tag (** Tag indicating a float64 node. *) +val uvint_tag : node_tag (** Tag indicating a uvint node. *) +val svint_tag : node_tag (** Tag indicating an svint node. *) +val string_tag : node_tag (** Tag indicating a string node. *) +val array_tag : node_tag (** Tag indicating an array node. *) +val tuple_tag : node_tag (** Tag indicating a tuple node. *) +val record_tag : node_tag (** Tag indicating a record node. *) +val num_variant_tag : node_tag (** Tag indicating a num_variant node. *) +val variant_tag : node_tag (** Tag indicating a variant node. *) +val unit_tag : node_tag (** Tag indicating a unit node. *) +val table_tag : node_tag (** Tag indicating a table node. *) +val shared_tag : node_tag (** Tag indicating a shared node. *) + +val write_tag : Bi_outbuf.t -> node_tag -> unit + (** Write one-byte tag to a buffer. *) + +val read_tag : Bi_inbuf.t -> node_tag + (** Read one-byte tag from a buffer. *) + + +(** {1 Tags of variants and record fields} *) + +type hash = int (** 31-bit hash *) + +val hash_name : string -> hash + (** Hash function used to compute field name tags and variant tags from + their full name. *) + +val write_hashtag : Bi_outbuf.t -> hash -> bool -> unit + (** [write_hashtag ob h has_arg] writes variant tag [h] to buffer [ob]. + [has_arg] indicates whether the variant has an argument. + This function can be used for record field names as well, + in which case [has_arg] may only be [true]. *) + +val string_of_hashtag : hash -> bool -> string + (** Same as [write_hashtag] but writes to a string. *) + +val read_hashtag : + Bi_inbuf.t -> + (Bi_inbuf.t -> hash -> bool -> 'a) -> 'a + (** [read_hashtag ib f] reads a variant tag as hash [h] and flag [has_arg] + and returns [f h has_arg]. *) + +val read_field_hashtag : Bi_inbuf.t -> hash + (** [read_field_hashtag ib] reads a field tag and returns the 31-bit hash. *) + +val make_unhash : string list -> (hash -> string option) + (** Compute the hash of each string of the input list + and return a function that converts a hash back + to the original string. Lookups do not allocate memory blocks. + @raise Failure if the input list contains two different strings + with the same hash. + *) + +type int7 = int + (** 7-bit int used to represent a num_variant tag. *) + +val write_numtag : Bi_outbuf.t -> int7 -> bool -> unit + (** [write_numtag ob i has_arg] writes the tag of a num_variant. + The tag name is represented by [i] which must be within \[0, 127\] + and the flag [has_arg] which indicates the presence of an argument. *) + +val read_numtag : + Bi_inbuf.t -> + (Bi_inbuf.t -> int7 -> bool -> 'a) -> 'a + (** [read_numtag ib f] reads a num_variant tag + and processes the tag name [i] and flag [has_arg] + using [f]. *) + + +(** {1 Atom writers} *) + +(** The [write_untagged_] functions write an untagged value (VAL) + to an output buffer + while the other [write_] functions write a tagged value (TAGVAL). *) + +val write_untagged_unit : Bi_outbuf.t -> unit -> unit +val write_untagged_bool : Bi_outbuf.t -> bool -> unit +val write_untagged_char : Bi_outbuf.t -> char -> unit +val write_untagged_int8 : Bi_outbuf.t -> int -> unit +val write_untagged_int16 : Bi_outbuf.t -> int -> unit +val write_untagged_int32 : Bi_outbuf.t -> int32 -> unit +val write_untagged_int64 : Bi_outbuf.t -> int64 -> unit +val write_untagged_float32 : Bi_outbuf.t -> float -> unit +val write_untagged_float64 : Bi_outbuf.t -> float -> unit +val write_untagged_string : Bi_outbuf.t -> string -> unit +val write_untagged_uvint : Bi_outbuf.t -> int -> unit +val write_untagged_svint : Bi_outbuf.t -> int -> unit + +val write_unit : Bi_outbuf.t -> unit -> unit +val write_bool : Bi_outbuf.t -> bool -> unit +val write_char : Bi_outbuf.t -> char -> unit +val write_int8 : Bi_outbuf.t -> int -> unit +val write_int16 : Bi_outbuf.t -> int -> unit +val write_int32 : Bi_outbuf.t -> int32 -> unit +val write_int64 : Bi_outbuf.t -> int64 -> unit +val write_float32 : Bi_outbuf.t -> float -> unit +val write_float64 : Bi_outbuf.t -> float -> unit +val write_string : Bi_outbuf.t -> string -> unit +val write_uvint : Bi_outbuf.t -> int -> unit +val write_svint : Bi_outbuf.t -> int -> unit + +(** {1 Atom readers} *) + +(** The [read_untagged_] functions read an untagged value (VAL) + from an input buffer. *) + +val read_untagged_unit : Bi_inbuf.t -> unit +val read_untagged_bool : Bi_inbuf.t -> bool +val read_untagged_char : Bi_inbuf.t -> char +val read_untagged_int8 : Bi_inbuf.t -> int +val read_untagged_int16 : Bi_inbuf.t -> int +val read_untagged_int32 : Bi_inbuf.t -> int32 +val read_untagged_int64 : Bi_inbuf.t -> int64 +val read_untagged_float32 : Bi_inbuf.t -> float +val read_untagged_float64 : Bi_inbuf.t -> float +val read_untagged_string : Bi_inbuf.t -> string +val read_untagged_uvint : Bi_inbuf.t -> int +val read_untagged_svint : Bi_inbuf.t -> int + +val skip : Bi_inbuf.t -> unit + (** Read and discard a value. Useful for skipping unknown record fields. *) + + +(** {1 Generic tree} *) + +type tree = + [ + | `Unit + | `Bool of bool + | `Int8 of char + | `Int16 of int + | `Int32 of Int32.t + | `Int64 of Int64.t + | `Float32 of float + | `Float64 of float + | `Uvint of int + | `Svint of int + | `String of string + | `Array of (node_tag * tree array) option + | `Tuple of tree array + | `Record of (string option * hash * tree) array + | `Num_variant of (int * tree option) + | `Variant of (string option * hash * tree option) + | `Table of + ((string option * hash * node_tag) array * tree array array) option + | `Shared of tree ] + (** Tree representing serialized data, useful for testing + and for untyped transformations. *) + +val write_tree : Bi_outbuf.t -> tree -> unit + (** Serialization of a tree to a buffer. *) + +val string_of_tree : tree -> string + (** Serialization of a tree into a string. *) + +val read_tree : ?unhash:(hash -> string option) -> Bi_inbuf.t -> tree + (** Deserialization of a tree from a buffer. *) + +val tree_of_string : ?unhash:(hash -> string option) -> string -> tree + (** Deserialization of a tree from a string. *) + +val tag_of_tree : tree -> node_tag + (** Returns the node tag of the given tree. *) + + +val view_of_tree : tree -> string +val view : + ?unhash:(hash -> string option) -> string -> string + (** Prints a human-readable representation of the data into a string. *) + +val print_view_of_tree : tree -> unit +val print_view : + ?unhash:(hash -> string option) -> string -> unit + (** Prints a human-readable representation of the data to stdout. *) + +val output_view_of_tree : out_channel -> tree -> unit +val output_view : + ?unhash:(hash -> string option) -> out_channel -> string -> unit + (** Prints a human-readable representation of the data to an out_channel. *) + +val safety_test : unit -> unit + (** Check that certain low-level hacks work as expected *) diff --git a/bi_outbuf.ml b/bi_outbuf.ml new file mode 100644 index 0000000..d49f80c --- /dev/null +++ b/bi_outbuf.ml @@ -0,0 +1,121 @@ +type t = { + mutable o_s : string; + mutable o_max_len : int; + mutable o_len : int; + mutable o_offs : int; + o_init_len : int; + o_make_room : (t -> int -> unit); + mutable o_shared : Bi_share.Wr.tbl; + o_shared_init_len : int; +} + +let really_extend b n = + let slen0 = b.o_max_len in + let reqlen = b.o_len + n in + let slen = + let x = max reqlen (2 * slen0) in + if x <= Sys.max_string_length then x + else + if Sys.max_string_length < reqlen then + invalid_arg "Buf.extend: reached Sys.max_string_length" + else + Sys.max_string_length + in + let s = String.create slen in + String.blit b.o_s 0 s 0 b.o_len; + b.o_s <- s; + b.o_max_len <- slen + +let flush_to_output abstract_output b n = + abstract_output b.o_s 0 b.o_len; + b.o_offs <- b.o_offs + b.o_len; + b.o_len <- 0; + if n > b.o_max_len then + really_extend b n + +let flush_to_channel oc = flush_to_output (output oc) + + +let create ?(make_room = really_extend) ?(shrlen = 16) n = { + o_s = String.create n; + o_max_len = n; + o_len = 0; + o_offs = 0; + o_init_len = n; + o_make_room = make_room; + o_shared = Bi_share.Wr.create shrlen; + o_shared_init_len = shrlen; +} + +let create_channel_writer ?(len = 4096) ?shrlen oc = + create ~make_room:(flush_to_channel oc) ?shrlen len + +let flush_channel_writer b = + b.o_make_room b 0 + +let create_output_writer ?(len = 4096) ?shrlen out = + create ~make_room:(flush_to_output out#output) ?shrlen len + +let flush_output_writer = flush_channel_writer + + +(* + Guarantee that the buffer string has enough room for n additional bytes. +*) +let extend b n = + if b.o_len + n > b.o_max_len then + b.o_make_room b n + +let alloc b n = + extend b n; + let pos = b.o_len in + b.o_len <- pos + n; + pos + +let add_substring b s pos len = + extend b len; + String.blit s pos b.o_s b.o_len len; + b.o_len <- b.o_len + len + +let add_string b s = + add_substring b s 0 (String.length s) + + +let add_char b c = + let pos = alloc b 1 in + b.o_s.[pos] <- c + +let unsafe_add_char b c = + let len = b.o_len in + b.o_s.[len] <- c; + b.o_len <- len + 1 + +let add_char2 b c1 c2 = + let pos = alloc b 2 in + let s = b.o_s in + String.unsafe_set s pos c1; + String.unsafe_set s (pos+1) c2 + +let add_char4 b c1 c2 c3 c4 = + let pos = alloc b 4 in + let s = b.o_s in + String.unsafe_set s pos c1; + String.unsafe_set s (pos+1) c2; + String.unsafe_set s (pos+2) c3; + String.unsafe_set s (pos+3) c4 + + + +let clear b = + b.o_offs <- 0; + b.o_len <- 0; + Bi_share.Wr.clear b.o_shared + +let reset b = + if String.length b.o_s <> b.o_init_len then + b.o_s <- String.create b.o_init_len; + b.o_offs <- 0; + b.o_len <- 0; + b.o_shared <- Bi_share.Wr.create b.o_shared_init_len + +let contents b = String.sub b.o_s 0 b.o_len diff --git a/bi_outbuf.mli b/bi_outbuf.mli new file mode 100644 index 0000000..fb1482b --- /dev/null +++ b/bi_outbuf.mli @@ -0,0 +1,121 @@ +(** Output buffer *) + +type t = { + mutable o_s : string; + (** Buffer string *) + + mutable o_max_len : int; + (** Same as [String.length s] *) + + mutable o_len : int; + (** Length of the data present in the buffer = current position + in the buffer *) + + mutable o_offs : int; + (** Length of data written and flushed out of the buffer. + The total number of bytes written to the buffer + is therefore [o_offs + o_len]. *) + + o_init_len : int; + (** Initial length of the buffer *) + + o_make_room : t -> int -> unit; + (** + [make_room buf n] must provide space for at least the requested + number of bytes [n], typically by reallocating a larger buffer + string or by flushing the data to a channel. + This function is only called when there is not enough space for [n] + bytes. + *) + + mutable o_shared : Bi_share.Wr.tbl; + (** + Hash table used to map shared objects to positions in the input stream. + *) + + o_shared_init_len : int; + (** + Initial length of the [o_shared] table. + *) +} + +val really_extend : t -> int -> unit + (** + Default make_room function: reallocate a larger buffer string. + *) + +val flush_to_channel : out_channel -> t -> int -> unit + (** + Alternate make_room function: write to an out_channel. + *) + +val create : ?make_room:(t -> int -> unit) -> ?shrlen:int -> int -> t + (** + Create a buffer. The default [make_room] function is [really_extend]. + @param shrlen initial size of the table used to store shared values. + *) + +val contents : t -> string + (** + Returns the data currently in the buffer. + *) + +val create_channel_writer : ?len:int -> ?shrlen:int -> out_channel -> t +val flush_channel_writer : t -> unit + (** + Pair of convenience functions for creating a buffer that + flushes data to an out_channel when it is full. + *) + +val create_output_writer : + ?len:int -> ?shrlen:int -> < output : string -> int -> int -> int; .. > -> t +val flush_output_writer : t -> unit + (** + Pair of convenience functions for creating a buffer that + flushes data to an object when it is full. + *) + + +val extend : t -> int -> unit + (** + Guarantee that the buffer string has enough room for n additional bytes. + *) + +val alloc : t -> int -> int + (** + [alloc buf n] makes room for [n] bytes and returns the position + of the first byte in the buffer string [buf.s]. + It behaves as if [n] arbitrary bytes were added and it is + the user's responsibility to set them to some meaningful values + by accessing [buf.s] directly. + *) + +val add_string : t -> string -> unit + (** Add a string to the buffer. *) + +val add_substring : t -> string -> int -> int -> unit + (** [add_substring dst src srcpos len] copies [len] bytes from + string [src] to buffer [dst] starting from position [srcpos]. *) + +val add_char : t -> char -> unit + (** Add a byte to the buffer. *) + +val add_char2 : t -> char -> char -> unit + (** Add two bytes to the buffer. *) + +val add_char4 : t -> char -> char -> char -> char -> unit + (** Add four bytes to the buffer. *) + +val unsafe_add_char : t -> char -> unit + (** Add a byte to the buffer without checking that there is enough + room for it. *) + +val clear : t -> unit + (** Remove any data present in the buffer and in the table holding + shared objects. *) + +val reset : t -> unit + (** Remove any data present in the buffer and reset it to its original + size. + Remove any data present in the table holding shared objects + and reset it to its original size. *) diff --git a/bi_share.ml b/bi_share.ml new file mode 100644 index 0000000..b864ebb --- /dev/null +++ b/bi_share.ml @@ -0,0 +1,55 @@ +type type_id = int + +let dummy_type_id = 0 + +let create_type_id = + let n = ref dummy_type_id in + fun () -> + incr n; + if !n < 0 then + failwith "Bi_share.Rd_poly.create_type_id: \ + exhausted available type_id's" + else + !n + +module Wr = +struct + module H = Hashtbl.Make ( + struct + type t = Obj.t * type_id + let equal (x1, t1) (x2, t2) = x1 == x2 && t1 == t2 + let hash = Hashtbl.hash + end + ) + + type tbl = int H.t + + let create = H.create + let clear tbl = + if H.length tbl > 0 then + H.clear tbl + + let put tbl k pos = + try + let pos0 = H.find tbl (Obj.magic k) in + pos - pos0 + with Not_found -> + H.add tbl (Obj.magic k) pos; + 0 +end + +module Rd = +struct + type tbl = ((int * type_id), Obj.t) Hashtbl.t + + let create n = Hashtbl.create n + let clear = Hashtbl.clear + + let put tbl pos x = + Hashtbl.add tbl pos x + + let get tbl pos = + try Hashtbl.find tbl pos + with Not_found -> + Bi_util.error "Corrupted data (invalid reference)" +end diff --git a/bi_share.mli b/bi_share.mli new file mode 100644 index 0000000..a27d3af --- /dev/null +++ b/bi_share.mli @@ -0,0 +1,38 @@ +(** \[not for general use\] *) + +(**/**) + +type type_id +val dummy_type_id : type_id +val create_type_id : unit -> type_id + +module Wr : +sig + type tbl + val create : int -> tbl + val clear : tbl -> unit + + val put : tbl -> ('a * type_id) -> int -> int + (** [put tbl x pos] returns 0 if [x] is not already in the table + and adds [x] to the table. [pos] is the absolute position + of the first byte of the ref value excluding its tag. + If [x] is found in the table, then the difference between + [pos] and the original position is returned. + *) +end + +module Rd : +sig + type tbl + val create : int -> tbl + val clear : tbl -> unit + + val put : tbl -> (int * type_id) -> Obj.t -> unit + (** [put tbl pos x] puts the position of a new shared value into the + table. [pos] is the absolute position of the first byte + of the ref value excluding its tag. *) + + val get : tbl -> (int * type_id) -> Obj.t + (** [get tbl pos] returns the value stored at this position + or raises a {!Bi_util.Error} exception. *) +end diff --git a/bi_stream.ml b/bi_stream.ml new file mode 100644 index 0000000..88f8d7b --- /dev/null +++ b/bi_stream.ml @@ -0,0 +1,148 @@ +open Printf + +let error s = failwith ("Bi_stream: " ^ s) + +let input_int64 ic = + match Sys.word_size with + 64 -> + let n = ref 0 in + for i = 1 to 8 do + n := (!n lsl 8) lor (input_byte ic); + done; + if !n < 0 then + error "Corrupted stream: excessive chunk length"; + !n + | 32 -> + for i = 1 to 4 do + if input_byte ic <> 0 then + error "Chunk length exceeds supported range on this platform" + done; + let n = ref 0 in + for i = 1 to 4 do + n := (!n lsl 8) lor (input_byte ic); + done; + if !n < 0 then + error "Chunk length exceeds supported range on this platform"; + !n + | n -> + error (sprintf "unsupported word size (%i)" n) + +let output_int64 oc n = + match Sys.word_size with + 64 -> + let n = ref n in + for i = 1 to 8 do + output_char oc (char_of_int (!n lsr 56)); + n := !n lsl 8 + done + | 32 -> + output_string oc "\000\000\000\000"; + let n = ref n in + for i = 1 to 4 do + output_char oc (char_of_int (!n lsr 24)); + n := !n lsl 8 + done + | n -> + error (sprintf "unsupported word size (%i)" n) + +let rec read_chunk of_string ic = + match input_char ic with + '\001' -> + let len = input_int64 ic in + if len > Sys.max_string_length then + error + (sprintf + "Corrupted stream: excessive chunk length (%i bytes)" len); + let s = String.create len in + really_input ic s 0 len; + Some (of_string s) + + | '\000' -> None + + | c -> error (sprintf "Corrupted stream: %C" c) + + +let flatten st = + let a = ref [| |] in + let pos = ref 0 in + let rec next i = + if !pos >= Array.length !a then ( + match Stream.peek st with + None -> None + | Some a' -> + Stream.junk st; + a := a'; + pos := 0; + next i + ) + else ( + let x = (!a).(!pos) in + incr pos; + Some x + ) + in + Stream.from next + + +let read_stream of_string ic = + flatten (Stream.from (fun i -> read_chunk of_string ic)) + +let rev_array_of_list l = + match l with + [] -> [||] + | x :: tl -> + let r = ref tl in + let len = List.length l in + let a = Array.make len x in + for i = len - 2 downto 0 do + match !r with + hd :: tl -> + a.(i) <- hd; + r := tl; + | [] -> assert false + done; + a + +let write_stream ?(chunk_len = 1024) to_string oc st = + let n = ref 0 in + let acc = ref [] in + let flush_chunk () = + let a = rev_array_of_list !acc in + acc := []; + n := 0; + let s = to_string a in + output_char oc '\001'; + output_int64 oc (String.length s); + output_string oc s + in + Stream.iter ( + fun x -> + incr n; + acc := x :: !acc; + if !n >= chunk_len then + flush_chunk () + ) st; + if !n > 0 then + flush_chunk (); + output_char oc '\000' + + +let test l = + List.iter (fun x -> assert (x >= 0 && x <= 9)) l; + let to_string a = + String.concat "" (List.map string_of_int (Array.to_list a)) + in + let of_string s = + Array.init (String.length s) (fun i -> int_of_string (String.make 1 s.[i])) + in + let st = Stream.of_list l in + let oc = open_out "test-stream.dat" in + write_stream ~chunk_len:2 to_string oc st; + close_out oc; + + let ic = open_in "test-stream.dat" in + let st' = read_stream of_string ic in + let l' = ref [] in + Stream.iter (fun i -> l' := i :: !l') st'; + close_in ic; + l = List.rev !l' diff --git a/bi_stream.mli b/bi_stream.mli new file mode 100644 index 0000000..6233749 --- /dev/null +++ b/bi_stream.mli @@ -0,0 +1,38 @@ +(** Streaming utilities (experimental) *) + +(** + This module offers a streaming interface for representing long lists + of elements that cannot fit in memory. + Stream items are serialized as chunks of configurable length. + + Stream format (independent from the biniou serialization format): + +{v + ( ONE INT64 BYTE* )* ZERO +v} + + where [INT64] is the length of a chunk (unsigned big-endian 64-bit int), + i.e. the number of following [BYTE]s. + [ONE] and [ZERO] are the single-byte representations of 1 and 0 and are used + to indicate whether the end of the stream is reached. +*) + +val read_stream : (string -> 'a array) -> in_channel -> 'a Stream.t + (** [read_stream of_string ic] creates an OCaml stream from + an input channel [ic]. The data come in chunks and each chunk + is converted from a string to an array by calling [of_string]. *) + +val write_stream : + ?chunk_len:int -> + ('a array -> string) -> out_channel -> 'a Stream.t -> unit + (** [write_stream to_string oc st] writes an OCaml stream to the + output channel [oc]. It creates chunks of [chunk_len], + except for the last chunk which is usually smaller. + @param chunk_len has a default value of 1024. The limit + supported by this OCaml implementation on 32-bit + platforms is 16777215. + *) + +(**/**) + +val test : int list -> bool diff --git a/bi_util.ml b/bi_util.ml new file mode 100644 index 0000000..222177c --- /dev/null +++ b/bi_util.ml @@ -0,0 +1,45 @@ +exception Error of string + +let error s = raise (Error s) + + +(* + Debugging utilities. +*) + +let string8_of_int x = + let s = String.create 8 in + for i = 0 to 7 do + s.[7-i] <- Char.chr (0xff land (x lsr (8 * i))) + done; + s + +let string4_of_int x = + let s = String.create 4 in + for i = 0 to 3 do + s.[3-i] <- Char.chr (0xff land (x lsr (8 * i))) + done; + s + +let print_bits ?(pos = 0) ?len s = + let slen = String.length s in + if pos < 0 || (pos > 0 && pos >= slen) then + invalid_arg "Bi_util.print_bits"; + let len = + match len with + None -> slen - pos + | Some len -> + if len > slen - pos then invalid_arg "Bi_util.print_bits" + else len + in + + let r = String.create (len * 9) in + for i = 0 to len - 1 do + let k = i * 9 in + let x = Char.code s.[pos+i] in + for j = 0 to 7 do + r.[k+j] <- if (x lsr (7 - j)) land 1 = 0 then '0' else '1' + done; + r.[k+8] <- if (i + 1) mod 8 = 0 then '\n' else ' ' + done; + r diff --git a/bi_util.mli b/bi_util.mli new file mode 100644 index 0000000..7d6601b --- /dev/null +++ b/bi_util.mli @@ -0,0 +1,15 @@ +(** Error handling etc. *) + +exception Error of string + (** Multipurpose exception normally raised when invalid data + is found by a read or write operation. *) + +val error : string -> 'a + (** [error msg] is equivalent to [raise (Error msg)]. *) + + +(**/**) + +val string8_of_int : int -> string +val string4_of_int : int -> string +val print_bits : ?pos:int -> ?len:int -> string -> string diff --git a/bi_vint.ml b/bi_vint.ml new file mode 100644 index 0000000..ac37297 --- /dev/null +++ b/bi_vint.ml @@ -0,0 +1,171 @@ +(* Variable-byte encoding of 8-byte integers (starting from 0). *) + +open Printf +open Bi_outbuf +open Bi_inbuf + +type uint = int + +(* Word size in bytes *) +let word_size = + if 0x7fffffff = -1 then 4 + else 8 + +(* Maximum int size in bits *) +let max_int_bits = + 8 * word_size - 1 + +(* Maximum length of a vint decodable into an OCaml int, + maximum value of the highest byte of the largest vint supported *) +let max_vint_bytes, max_highest_byte = + if max_int_bits mod 7 = 0 then + let m = max_int_bits / 7 in + let h = 1 lsl 7 - 1 in + m, h + else + let m = max_int_bits / 7 + 1 in + let h = 1 lsl (max_int_bits mod 7) - 1 in + m, h + +let check_highest_byte x = + if x > max_highest_byte then + Bi_util.error "Vint exceeding range of OCaml ints" + + +let unsigned_of_signed i = + if i >= 0 then + (* + 0 -> 0 + 1 -> 2 + 2 -> 4 + 3 -> 6 + *) + i lsl 1 + else + (* + -1 -> 1 + -2 -> 3 + -3 -> 5 + *) + ((-1-i) lsl 1) lor 1 + +let signed_of_unsigned i = + if i land 1 = 0 then i lsr 1 + else -1 - (i lsr 1) + +let write_uvint buf i = + Bi_outbuf.extend buf max_vint_bytes; + + let x = ref i in + while !x lsr 7 <> 0 do + let byte = 0x80 lor (!x land 0x7f) in + Bi_outbuf.unsafe_add_char buf (Char.chr byte); + x := !x lsr 7; + done; + Bi_outbuf.unsafe_add_char buf (Char.chr !x) + +let write_svint buf i = + write_uvint buf (unsigned_of_signed i) + +(* convenience *) +let uvint_of_uint ?buf i = + let buffer = + match buf with + | None -> Bi_outbuf.create 10 + | Some b -> b + in + Bi_outbuf.clear buffer; + write_uvint buffer i; + Bi_outbuf.contents buffer + +let svint_of_int ?buf i = + uvint_of_uint ?buf (unsigned_of_signed i) + + +let read_uvint ib = + let avail = Bi_inbuf.try_preread ib max_vint_bytes in + let s = ib.i_s in + let pos = ib.i_pos in + let x = ref 0 in + (try + for i = 0 to avail - 1 do + let b = Char.code s.[pos+i] in + x := ((b land 0x7f) lsl (7*i)) lor !x; + if b < 0x80 then ( + ib.i_pos <- pos + i + 1; + if i + 1 = max_vint_bytes then + check_highest_byte b; + raise Exit + ) + done; + Bi_util.error "Unterminated vint or vint exceeding range of OCaml ints" + with Exit -> () + ); + !x + + +let read_svint ib = + signed_of_unsigned (read_uvint ib) + +(* convenience *) + +let check_end_of_input ib = + if Bi_inbuf.try_preread ib 1 > 0 then + Bi_util.error "Junk input after end of vint" + +let uint_of_uvint s = + let ib = Bi_inbuf.from_string s in + let x = read_uvint ib in + check_end_of_input ib; + x + +let int_of_svint s = + let ib = Bi_inbuf.from_string s in + let x = read_svint ib in + check_end_of_input ib; + x + + +(* + Testing +*) + +let string_of_list l = + let ob = Bi_outbuf.create 100 in + List.iter (write_uvint ob) l; + Bi_outbuf.contents ob + +let rec read_list ib = + if ib.i_pos < ib.i_len then + let x = read_uvint ib in + x :: read_list ib + else + [] + +let list_of_string s = + read_list (Bi_inbuf.from_string s) + +let print_list l = + List.iter ( + fun i -> + printf "dec %i\nhex %x\nbin %s\n" i i + (Bi_util.print_bits (Bi_util.string8_of_int i)) + ) l + +let test () = + let l = [ + 0; + 0xfffffff; + (0x01020304 lsl 32) lor 0x05060708; + max_int; + min_int + ] in + printf "Input:\n"; + print_list l; + let l' = list_of_string (string_of_list l) in + printf "Output:\n"; + print_list l'; + if l = l' then + print_endline "SUCCESS" + else + print_endline "FAILURE" diff --git a/bi_vint.mli b/bi_vint.mli new file mode 100644 index 0000000..5a4e247 --- /dev/null +++ b/bi_vint.mli @@ -0,0 +1,69 @@ +(** Vint: variable-length representation of integers *) + +(** + This module currently provides only conversions between vint and the + OCaml int type. Here are the current limits of OCaml ints on + 32-bit and 64-bit systems: +{v + word length (bits) 32 64 + + int length (bits) 31 63 + + min_int (lowest signed int) 0x40000000 0x4000000000000000 + -1073741824 -4611686018427387904 + + max_int (greatest signed int) 0x3fffffff 0x3fffffffffffffff + 1073741823 4611686018427387903 + + lowest unsigned int 0x0 0x0 + 0 0 + + greatest unsigned int 0x7fffffff 0x7fffffffffffffff + 2147483647 9223372036854775807 + + maximum vint length (data bits) 31 63 + maximum vint length (total bytes) 5 9 +v} +*) + +type uint = int + (** Unsigned int. + Note that ints (signed) and uints use the same representation + for integers within \[0, [max_int]\]. + *) + +val uvint_of_uint : ?buf:Bi_outbuf.t -> uint -> string + (** Convert an unsigned int to a vint. + @param buf existing output buffer that could be reused by this function + instead of creating a new one. *) + +val svint_of_int : ?buf:Bi_outbuf.t -> int -> string + (** Convert a signed int to a vint. + @param buf existing output buffer that could be reused by this function + instead of creating a new one. *) + +val uint_of_uvint : string -> uint + (** Interpret a vint as an unsigned int. + @raise Bi_util.Error if the input string is not a single valid uvint + that is representable using the uint type. *) + +val int_of_svint : string -> int + (** Interpret a vint as a signed int. + @raise Bi_util.Error if the input string is not a single valid svint + that is representable using the int type. *) + +val write_uvint : Bi_outbuf.t -> uint -> unit + (** Write an unsigned int to a buffer. *) + +val write_svint : Bi_outbuf.t -> int -> unit + (** Write a signed int to a buffer. *) + +val read_uvint : Bi_inbuf.t -> uint + (** Read an unsigned int from a buffer. + @raise Bi_util.Error if there is no data to read from or if the + uvint is not representable using the uint type. *) + +val read_svint : Bi_inbuf.t -> int + (** Read a signed int from a buffer. + @raise Bi_util.Error if there is no data to read from or if the + svint is not representable using the int type. *) diff --git a/biniou-format.txt b/biniou-format.txt new file mode 100644 index 0000000..6177899 --- /dev/null +++ b/biniou-format.txt @@ -0,0 +1,213 @@ + The Biniou format + ----------------- + +Contents: + +1. Grammar +2. Tags +3. Fixed-length types +4. Vints +5. Field and variant name hashing +6. Numeric variants + + + +1. Grammar + + + TAGVAL ::= TAG VAL // A biniou value with its matching tag + + VAL ::= ATOM + | ARRAY + | TUPLE + | RECORD + | NUM_VARIANT + | VARIANT + | TABLE + | SHARED + + ATOM ::= unit // 0, using one byte + | bool // 0 for false, 1 for true, using one byte + | int8 // 1 arbitrary byte + | int16 // 2 arbitrary bytes + | int32 // 4 arbitrary bytes + | int64 // 8 arbitrary bytes + | float32 // IEEE-754 binary32 + | float64 // IEEE-754 binary64 + | uvint // unsigned variable-length int + | svint // signed variable-length int + | STRING // sequence of any number of bytes prefixed by its length + + STRING ::= LENGTH byte* + ARRAY ::= LENGTH (TAG VAL* )? + NUM_VARIANT ::= NUM_VARIANT_TAG TAGVAL? + VARIANT ::= VARIANT_TAG TAGVAL? + TUPLE ::= LENGTH TAGVAL* + RECORD ::= LENGTH (FIELD_TAG TAGVAL)* + TABLE ::= LENGTH (LENGTH (FIELD_TAG TAG)* (VAL* )* )? // list of records + + SHARED ::= OFFSET TAGVAL? // Value given iff the offset is 0. + // Otherwise, the offset indicates the + // relative position to the left of a SHARED + // to which we are redirected. + + TAG ::= int8 // identifies a type of node + LENGTH ::= uvint + OFFSET ::= uvint + NUM_VARIANT_TAG ::= int8 // 0-127 if no argument, 128-255 if has argument + VARIANT_TAG ::= int32 // first bit indicates argument, then 31-bit hash + FIELD_TAG ::= int32 // 31-bit hash (first bit always 1) + + + +2. Tags + + +Tags indicate the shallow structure of any biniou value. + +The biniou format is such that the tag of any value is known +from the input data. This allows decoding biniou data as a tree +where each node represents a biniou value, without requiring external +type information. + +The tag values for the various kinds of biniou values are: + + Type of value Tag + --------------------------- + bool 0 + int8 1 + int16 2 + int32 3 + int64 4 + float32 11 + float64 12 + uvint 16 + svint 17 + string 18 + ARRAY 19 + TUPLE 20 + RECORD 21 + NUM_VARIANT 22 + VARIANT 23 + unit 24 + TABLE 25 + SHARED 26 + + + +3. Fixed-length types + + +Atomic values of type unit, bool, int8, int16, int32, int64, float32 and +float64 represent arbitrary sequences of 1, 2, 4 or 8 bytes. + +In order to make the visualization of data easier, +the default interpretation of these values shall be used: + + Length + in bytes Type of value Default interpretation + --------------------------------------------------------------------- + 1 unit 0 represents the unit value + 1 bool 0 represents false, 1 represents true + 1 int8 unsigned 8-bit int + 2 int16 big endian unsigned 16-bit int + 4 int32 big endian unsigned 32-bit int + 8 int64 big endian unsigned 64-bit int + 4 float32 big endian IEEE-754 binary32 (float) + 8 float64 big endian IEEE-754 binary64 (double) + + + +4. Vints + + +Vints are a variable-length, byte-aligned representation of +positive integers. + +A vint is represented by a sequence of bytes from least significant +to most significant. In all the bytes except the last one, the +high bit is set to 1 and indicates that more bytes follow. +The high bit of the last byte is set to 0. +The remaining 7 bits in each byte represent data. + +Here is the representation of some sample values: + + 0xxxxxxx + 0 00000000 + 1 00000001 + 2 00000010 + 127 01111111 + + 1xxxxxxx 0xxxxxxx + 128 10000000 00000001 + 129 10000001 00000001 + 255 11111111 00000001 + 256 11111111 00000010 + 16383 11111111 01111111 + + 1xxxxxxx 1xxxxxxx 0xxxxxxx + 16384 10000000 10000000 00000001 + 16385 10000001 10000000 00000001 + + +Positive integers can be represented by standard vints. +We call this representation unsigned vint or uvint. + +Arbitrary integers can also be represented using vints, after mapping +to positive integers. We call this representation signed vint or svint. +Positive numbers and 0 are mapped to even numbers and negative numbers +are mapped to odd positive numbers. Here is the mapping for +small numbers: + + vint unsigned signed + representation interpretation interpretation + (uvint) (svint) + 0xxxxxx0 + 00000000 0 0 + 00000010 2 1 + 00000100 4 2 + 00000110 6 3 + + 0xxxxxx1 + 00000001 1 -1 + 00000011 3 -2 + 00000101 5 -3 + + + +5. Field and variant name hashing + + +Record field names and variant names are represented by a +31-bit tag which must be a hash of the name. The following +hash function must be used: + + hash(s): + h <- 0 + for i = 0 to length(s) - 1 do + h <- 223 * h + s[i] + done + h <- h mod 2^31 + return h + +For example, hash("Hello") is 0x37eea2f2. + +A full field tag or variant tag is made of 32 bits. +The first bit is 0 for variants without an argument, and 1 for +variants with an argument or record fields. +The remaining 31 bits are the hash of field or variant name described above. + + + +6. Numeric variants + + +Numeric variants are a more compact alternative to variants using +32-bit hash-based tags since the tag of numeric variants +takes only one byte. + +The most common use of numeric variants is for an option type. +A value of type option is either None or Some value, +e.g. None, Some 123 or Some 0. +This allows to represent undefined values without +reserving a special value called null or undefined. diff --git a/test_biniou.ml b/test_biniou.ml new file mode 100644 index 0000000..f75b46d --- /dev/null +++ b/test_biniou.ml @@ -0,0 +1,229 @@ +open Printf + +open Bi_io + +let rec deep_cycle = `Tuple [| `Shared deep_cycle |] + +let test_tree : tree = + `Tuple [| + `Unit; + `Num_variant (0, None); + `Num_variant (0, Some (`Svint 127)); + `Array (Some (svint_tag, [| `Svint 1; `Svint 2 |])); + `Tuple [| `Shared deep_cycle; `Shared deep_cycle |]; + `Record [| + (Some "abc", hash_name "abc", `String "hello"); + (Some "number", hash_name "number", `Svint 123); + (Some "variant1", hash_name "variant1", + `Variant (Some "Foo", hash_name "Foo", Some (`Svint (-456)))); + (Some "variant2", hash_name "variant2", + `Variant (Some "Bar", hash_name "Bar", None)); + |]; + `Table ( + Some ( + [| (Some "name", hash_name "name", string_tag); + (Some "age", hash_name "age", uvint_tag) |], + [| + [| `String "Francisco"; `Uvint 67 |]; + [| `String "Mateo"; `Uvint 23 |]; + [| `String "Clara"; `Uvint 27 |]; + [| `String "Jose"; `Uvint 39 |]; + |] + ) + ); + `Array ( + Some ( + array_tag, + [| + `Array ( + Some ( + float64_tag, + [| `Float64 1.234567; `Float64 2.345678; `Float64 3.456789; |] + ) + ); + `Array ( + Some ( + float64_tag, + [| `Float64 4.567890; `Float64 5.678901; `Float64 6.789012 |] + ) + ); + `Array ( + Some ( + float64_tag, + [| `Float64 7.890123; `Float64 8.901234; `Float64 9.012345 |] + ) + ); + `Array ( + Some ( + float64_tag, + [| `Float64 10.123456; `Float64 11.234567; `Float64 12.345678 |] + ) + ); + |] + ) + ) +|] + +let unhash = make_unhash [ "abc"; "number"; + "variant1"; "variant2"; + "Foo"; "Bar"; + "name"; "age" ] + +let test () = + let s = string_of_tree test_tree in + let test_tree2 = tree_of_string ~unhash s in + (s, String.length s, test_tree2, test_tree2 = test_tree) + + +let test_json () = + let s = + "[\ + null,\ + null,\ + 127,\ + [1,2],\ + [[1,[1]],1]\ + {\"abc\":\"hello\",\ + \"number\":123,\ + \"variant1\":[\"Foo\",-456],\ + \"variant2\":\"Bar\"},\ + [[1,\"first\"],[2,\"second\"],[3,\"third\"],[4,\"fourth\"]],\ + [\ + {\"name\":\"Francisco\",\"age\":67},\ + {\"name\":\"Mateo\",\"age\":23},\ + {\"name\":\"Clara\",\"age\":27},\ + {\"name\":\"Jose\",\"age\":39}\ + ],\ + [\ + [1.234567,2.345678,3.456789],\ + [4.567890,5.678901,6.789012],\ + [7.890123,8.901234,9.012345],\ + [10.123456,11.234567,12.345678]\ + ],\ + ]" in + s, String.length s + +type foo = { + abc : string; + number : int; + variant1 : [ `Foo of int ]; + variant2 : [ `Bar ] +} + +type person = { + name : string; + age : int +} + +let native_test_tree = + ( + (), + None, + Some 127, + [| 1; 2 |], + { abc = "hello"; + number = 123; + variant1 = `Foo (-456); + variant2 = `Bar }, + [| + 1, "first"; + 2, "second"; + 3, "third"; + 4, "fourth"; + |], + [| + { name = "Francisco"; age = 67 }; + { name = "Mateo"; age = 23 }; + { name = "Clara"; age = 27 }; + { name = "Jose"; age = 39 }; + |], + [| + [| 1.234567; 2.345678; 3.456789 |]; + [| 4.567890; 5.678901; 6.789012 |]; + [| 7.890123; 8.901234; 9.012345 |]; + [| 10.123456; 11.234567; 12.345678 |] + |] + ) + +let marshal x = Marshal.to_string x [(*Marshal.No_sharing*)] +let unmarshal s = Marshal.from_string s 0 + +let native_test_tree_marshalled = marshal native_test_tree + +let marshal_wr_perf n = + for i = 1 to n do + ignore (marshal native_test_tree) + done + +let marshal_rd_perf n = + for i = 1 to n do + ignore (unmarshal native_test_tree_marshalled) + done + +let test_tree_binioued = string_of_tree test_tree + +let biniou_wr_perf n = + for i = 1 to n do + ignore (string_of_tree test_tree) + done + +let biniou_rd_perf n = + for i = 1 to n do + ignore (tree_of_string test_tree_binioued) + done + +let time name f x = + let t1 = Unix.gettimeofday () in + ignore (f x); + let t2 = Unix.gettimeofday () in + Printf.printf "%s: %.3f\n%!" name (t2 -. t1) + +let wr_perf () = + let n = 1_000_000 in + time "wr biniou" biniou_wr_perf n; + time "wr marshal" marshal_wr_perf n + +let rd_perf () = + let n = 1_000_000 in + time "rd biniou" biniou_rd_perf n; + time "rd marshal" marshal_rd_perf n + +let eq x y = + Marshal.to_string x [] = Marshal.to_string y [] + +let test_channels x = + let file = "test_channels.bin" in + let oc = open_out_bin file in + let ob = Bi_outbuf.create_channel_writer oc in + write_tree ob x; + Bi_outbuf.flush_channel_writer ob; + close_out oc; + let ic = open_in_bin file in + let ib = Bi_inbuf.from_channel ic in + let x' = read_tree ib in + if not (eq x x') then ( + printf "Error in writing or reading via channels:\n"; + Bi_io.print_view (string_of_tree x'); + print_newline (); + ) + +let () = + Bi_io.safety_test (); + let s = string_of_tree test_tree in + Bi_io.print_view s; + print_newline (); + let x = tree_of_string s in + if s <> string_of_tree x then + printf "Error in writing or reading\n%!"; + + test_channels x; + + let oc = open_out_bin "test.bin" in + output_string oc s; + close_out oc; + + wr_perf (); + rd_perf (); + + assert (Bi_stream.test [5; 3; 8; 4]); + assert (Bi_stream.test []) -- cgit v1.2.3 From 167f35f591e238102ddf1bc26ff49a15506a2cb3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?St=C3=A9phane=20Glondu?= Date: Wed, 3 Aug 2016 22:19:16 +0200 Subject: Import biniou_1.0.12-2.debian.tar.xz [dgit import tarball biniou 1.0.12-2 biniou_1.0.12-2.debian.tar.xz] --- bdump.1 | 82 ++++++++++++++++++++++++++++++++++++++++++ bdump.mkd | 28 +++++++++++++++ changelog | 65 +++++++++++++++++++++++++++++++++ compat | 1 + control | 55 ++++++++++++++++++++++++++++ copyright | 50 ++++++++++++++++++++++++++ gbp.conf | 2 ++ libbiniou-ocaml-dev.docs | 1 + libbiniou-ocaml-dev.install.in | 10 ++++++ libbiniou-ocaml-dev.manpages | 1 + libbiniou-ocaml-dev.ocamldoc | 1 + libbiniou-ocaml.install.in | 3 ++ rules | 47 ++++++++++++++++++++++++ source/format | 1 + watch | 2 ++ 15 files changed, 349 insertions(+) create mode 100644 bdump.1 create mode 100644 bdump.mkd create mode 100644 changelog create mode 100644 compat create mode 100644 control create mode 100644 copyright create mode 100644 gbp.conf create mode 100644 libbiniou-ocaml-dev.docs create mode 100644 libbiniou-ocaml-dev.install.in create mode 100644 libbiniou-ocaml-dev.manpages create mode 100644 libbiniou-ocaml-dev.ocamldoc create mode 100644 libbiniou-ocaml.install.in create mode 100755 rules create mode 100644 source/format create mode 100644 watch diff --git a/bdump.1 b/bdump.1 new file mode 100644 index 0000000..603d198 --- /dev/null +++ b/bdump.1 @@ -0,0 +1,82 @@ +.\" groff -man -Tascii bdump.1 +.\" ========================================================================== +.\" ============= Synopsis =================================================== +.\" ========================================================================== +.TH BDUMP 1 "June 2013" BDUMP "User Manuals" +.SH NAME +bdump \- visualization of biniou data +.SH SYNOPSIS +.B bdump \fR[\fIOptions...\fR] [\fIfile\fR] +.\" ========================================================================== +.\" ============= Description ================================================ +.\" ========================================================================== +.SH DESCRIPTION +.B bdump +prints biniou data files in human readable form. For the +conversion of hash tags into meaningful names, +.B bdump +maintains a dictionary of words, see option +.I -h\fR. +New words can be added to this dictionary with +.I -d +and +.I -w\fR. The dictionary is saved between invocation, see option +.I -x +and +.I -h\fR. +.P +If the +.I file +argument is omitted, +.B bdump +reads data from standard input. +.\" +.\" ========================================================================== +.\" ================ Options ================================================= +.\" ========================================================================== +.\" +.SH OPTIONS +.\" ===================== -d file ============================================ +.TP +.B "-d file" +load +.I file +containing words to add to the dictionary, one per line +.\" ===================== -h file ============================================ +.TP +.B "-h file" +set dictionary file for unhashing. Default is +.I $HOME/.bdump-dict +on Unix and +.I $HOMEPATH\_bdump-dict +on Windows +.\" ===================== -w word1,word2,... ================================= +.TP +.B "-w word1,word2,..." +add words to the dictionary +.\" ===================== -x ================================================ +.TP +.B "-x" +neither load nor update the dictionary file (see option +.I -h\fR) +.\" ===================== -help ============================================== +.TP +.B "-help | --help" +print options and exit +.\" +.\" ========================================================================== +.\" ================ SEE ALSO ================================================ +.\" ========================================================================== +.\" +.SH SEE ALSO +.TP +the \fBbiniou\fR web site, \fIhttp://mjambon.com/biniou.html\fR +.\" +.\" ========================================================================== +.\" ================ Author ================================================== +.\" ========================================================================== +.\" +.SH AUTHOR +This manual page was written by Sylvain Le Gall + and Hendrik Tews , +specifically for the Debian project (and may be used by others). diff --git a/bdump.mkd b/bdump.mkd new file mode 100644 index 0000000..ba93350 --- /dev/null +++ b/bdump.mkd @@ -0,0 +1,28 @@ +% this is the old source of the man page, it is currently not used +% translate with pandoc -s -w man $^ -o $@ +% BDUMP(1) bdump User Manual +% Sylvain Le Gall +% December 5, 2010 + +# NAME + +bdump - dump biniou data. + +# SYNOPSIS + +bdump [options] file\* + +# DESCRIPTION + +Dump biniou data on standard output. + +# OPTIONS + +-w word1,word2,... +: Comma-separated list of words to add to the dictionary + +-d file +: File containing words to add to the dictionary, one per line + +-help|\--help +: Display list of options diff --git a/changelog b/changelog new file mode 100644 index 0000000..fb09ab8 --- /dev/null +++ b/changelog @@ -0,0 +1,65 @@ +biniou (1.0.12-2) unstable; urgency=medium + + * Team upload + * Fix FTBFS on bytecode architectures + + -- Stéphane Glondu Wed, 03 Aug 2016 22:19:16 +0200 + +biniou (1.0.12-1) unstable; urgency=medium + + * Team upload + * New upstream release + * Update Homepage and debian/watch + * Update Vcs-* + * Bump Standards-Version to 3.9.8 + + -- Stéphane Glondu Wed, 03 Aug 2016 13:29:53 +0200 + +biniou (1.0.9-1) unstable; urgency=medium + + * Team upload + * New upstream release + * Bump Standards-Version to 3.9.5 + + -- Stéphane Glondu Fri, 31 Jan 2014 11:13:23 +0100 + +biniou (1.0.8-1) unstable; urgency=low + + * Team upload + * New upstream release + - remove patch fix-bi_stream-for-32-bit-OCaml, since upstream changes + now deal explicitly with 32-bit platforms + + -- Stéphane Glondu Thu, 11 Jul 2013 11:36:49 +0200 + +biniou (1.0.6-1) unstable; urgency=low + + [ Sylvain Le Gall ] + * Remove Sylvain Le Gall from uploaders + + [ Hendrik Tews ] + * update watch + * add myself as uploader + * bump debhelper compat level and standards version + * update Vcs, dependencies and package description + * update copyright + * rename and adapt debian-changes patch + * add patch fix-bi-stream-32 + * update man page + * install api docs in api/html + + -- Hendrik Tews Thu, 13 Jun 2013 14:01:31 +0200 + +biniou (1.0.0-1) unstable; urgency=low + + * Team upload + * New upstream release + * Bump Standards-Version to 3.9.2 + + -- Stéphane Glondu Mon, 11 Jul 2011 00:02:27 +0200 + +biniou (0.9.1-1) unstable; urgency=low + + * Initial release. (Closes: #605672) + + -- Sylvain Le Gall Sun, 05 Dec 2010 00:07:39 +0100 diff --git a/compat b/compat new file mode 100644 index 0000000..ec63514 --- /dev/null +++ b/compat @@ -0,0 +1 @@ +9 diff --git a/control b/control new file mode 100644 index 0000000..b7e1b2b --- /dev/null +++ b/control @@ -0,0 +1,55 @@ +Source: biniou +Section: ocaml +Priority: optional +Maintainer: Debian OCaml Maintainers +Uploaders: Hendrik Tews +Build-Depends: + ocaml-nox (>= 3.11.1-3~), + ocaml-best-compilers, + ocaml-findlib, + dh-ocaml (>= 0.9~), + debhelper (>= 9), + libeasy-format-ocaml-dev +Standards-Version: 3.9.8 +Homepage: https://github.com/mjambon/biniou +Vcs-Git: https://anonscm.debian.org/git/pkg-ocaml-maint/packages/biniou.git +Vcs-Browser: https://anonscm.debian.org/git/pkg-ocaml-maint/packages/biniou.git + +Package: libbiniou-ocaml-dev +Architecture: any +Depends: + ${ocaml:Depends}, + ${shlibs:Depends}, + ${misc:Depends} +Provides: ${ocaml:Provides} +Recommends: ocaml-findlib +Description: flexible binary data format in OCaml - development files + Biniou is a binary data format designed for speed, safety, ease of use + and backward compatibility as protocols evolve. Biniou is vastly + equivalent to JSON in terms of functionality but allows implementations + about 4 times as fast (see godi-yojson for comparison), with 25-35% + space savings. Biniou data can be decoded into human-readable form + without knowledge of type definitions except for field and variant names + which are represented by 31-bit hashes. + . + This package contains the development files needed for programming + with the library. + +Package: libbiniou-ocaml +Architecture: any +Depends: + ${ocaml:Depends}, + ${shlibs:Depends}, + ${misc:Depends} +Provides: ${ocaml:Provides} +Recommends: ocaml-findlib +Description: flexible binary data format in OCaml - plugins + Biniou is a binary data format designed for speed, safety, ease of use + and backward compatibility as protocols evolve. Biniou is vastly + equivalent to JSON in terms of functionality but allows implementations + about 4 times as fast (see godi-yojson for comparison), with 25-35% + space savings. Biniou data can be decoded into human-readable form + without knowledge of type definitions except for field and variant names + which are represented by 31-bit hashes. + . + This package contains the shared runtime libraries. diff --git a/copyright b/copyright new file mode 100644 index 0000000..ccef55e --- /dev/null +++ b/copyright @@ -0,0 +1,50 @@ +Format: http://www.debian.org/doc/packaging-manuals/copyright-format/1.0/ +Upstream-Name: Biniou +Upstream-Contact: Martin Jambon + +Files: * +Copyright: Copyright (c) 2010 Martin Jambon +License: BSD-3-clause + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + 1. Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + 2. Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + 3. The name of the author may not be used to endorse or promote products + derived from this software without specific prior written permission. + . + THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR + IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. + IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, + INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF + THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + + +Files: debian/* +Copyright: 2010 Sylvain Le Gall + 2013 Hendrik Tews +License: GPL-3+ + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + . + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + . + You should have received a copy of the GNU General Public License + along with this program. If not, see . + . + On Debian systems, the full text of the GNU General Public + License version 3 can be found in the file + `/usr/share/common-licenses/GPL-3'. diff --git a/gbp.conf b/gbp.conf new file mode 100644 index 0000000..cec628c --- /dev/null +++ b/gbp.conf @@ -0,0 +1,2 @@ +[DEFAULT] +pristine-tar = True diff --git a/libbiniou-ocaml-dev.docs b/libbiniou-ocaml-dev.docs new file mode 100644 index 0000000..eedd89b --- /dev/null +++ b/libbiniou-ocaml-dev.docs @@ -0,0 +1 @@ +api diff --git a/libbiniou-ocaml-dev.install.in b/libbiniou-ocaml-dev.install.in new file mode 100644 index 0000000..a4a290c --- /dev/null +++ b/libbiniou-ocaml-dev.install.in @@ -0,0 +1,10 @@ +@OCamlStdlibDir@/biniou/*.annot +@OCamlStdlibDir@/biniou/*.cmt +@OCamlStdlibDir@/biniou/*.cmo +@OCamlStdlibDir@/biniou/*.cmi +@OCamlStdlibDir@/biniou/*.ml* +OPT: @OCamlStdlibDir@/biniou/*.o +OPT: @OCamlStdlibDir@/biniou/*.cmxa +OPT: @OCamlStdlibDir@/biniou/*.a +OPT: @OCamlStdlibDir@/biniou/*.cmx +/usr/bin/bdump diff --git a/libbiniou-ocaml-dev.manpages b/libbiniou-ocaml-dev.manpages new file mode 100644 index 0000000..aeb224c --- /dev/null +++ b/libbiniou-ocaml-dev.manpages @@ -0,0 +1 @@ +debian/bdump.1 diff --git a/libbiniou-ocaml-dev.ocamldoc b/libbiniou-ocaml-dev.ocamldoc new file mode 100644 index 0000000..5143b80 --- /dev/null +++ b/libbiniou-ocaml-dev.ocamldoc @@ -0,0 +1 @@ +-d api/html --doc-base-generate diff --git a/libbiniou-ocaml.install.in b/libbiniou-ocaml.install.in new file mode 100644 index 0000000..6fb3b34 --- /dev/null +++ b/libbiniou-ocaml.install.in @@ -0,0 +1,3 @@ +@OCamlStdlibDir@/biniou/META +@OCamlStdlibDir@/biniou/*.cma +DYN: @OCamlStdlibDir@/biniou/*.cmxs diff --git a/rules b/rules new file mode 100755 index 0000000..4e307ad --- /dev/null +++ b/rules @@ -0,0 +1,47 @@ +#!/usr/bin/make -f +# -*- makefile -*- + +# Uncomment this to turn on verbose mode. +#export DH_VERBOSE=1 +#export DH_OPTIONS=-v + +DESTDIR=$(CURDIR)/debian/tmp + +include /usr/share/ocaml/ocamlvars.mk + +OCAMLFIND_DESTDIR=$(DESTDIR)/$(OCAML_STDLIB_DIR) +export OCAMLFIND_DESTDIR +OCAMLFIND_LDCONF=ignore +export OCAMLFIND_LDCONF + +%: + dh $@ --with ocaml + + +.PHONY: override_dh_auto_build +override_dh_auto_build: +ifeq ($(OCAML_HAVE_OCAMLOPT),yes) + $(MAKE) +else + $(MAKE) all bdump.byte +endif + $(MAKE) doc + + +.PHONY: override_dh_auto_install +override_dh_auto_install: + mkdir -p '$(OCAMLFIND_DESTDIR)' + mkdir -p '$(DESTDIR)/usr/bin' + make install 'BINDIR=$(DESTDIR)/usr/bin' + + +.PHONY: override_dh_install +override_dh_install: + dh_install --fail-missing + + +.PHONY: override_dh_installdocs +override_dh_installdocs: + mkdir api + mv doc api/html + dh_installdocs diff --git a/source/format b/source/format new file mode 100644 index 0000000..163aaf8 --- /dev/null +++ b/source/format @@ -0,0 +1 @@ +3.0 (quilt) diff --git a/watch b/watch new file mode 100644 index 0000000..138cb90 --- /dev/null +++ b/watch @@ -0,0 +1,2 @@ +version=3 +https://github.com/mjambon/biniou/tags .*/v([\d\.]+)\.tar\.gz -- cgit v1.2.3