summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorStephane Glondu <steph@glondu.net>2022-02-20 17:37:27 +0100
committerStephane Glondu <steph@glondu.net>2022-02-20 17:37:27 +0100
commit084bfcb45334fdd9cbc7ffec804b225fae82ad56 (patch)
tree4250fa700ab952f990706f92e56955599564dc6b
parent9db54116bb06f17fa8d8256490a91c13767ec1ae (diff)
New upstream version 1.0.3
-rw-r--r--B0.ml62
-rw-r--r--BRZO1
-rw-r--r--CHANGES.md6
-rw-r--r--LICENSE.md2
-rw-r--r--README.md7
-rw-r--r--_tags4
-rw-r--r--doc/api.odocl1
-rw-r--r--doc/index.mld17
-rw-r--r--opam52
-rw-r--r--pkg/META4
-rw-r--r--src/uutf.ml7
-rw-r--r--src/uutf.mli75
-rw-r--r--test/test.ml5
-rw-r--r--test/tests.itarget3
-rw-r--r--test/utftrip.ml7
15 files changed, 176 insertions, 77 deletions
diff --git a/B0.ml b/B0.ml
new file mode 100644
index 0000000..b04a3bd
--- /dev/null
+++ b/B0.ml
@@ -0,0 +1,62 @@
+open B0_kit.V000
+open B00_std
+
+
+(* OCaml library names *)
+
+let uutf = B0_ocaml.libname "uutf"
+let unix = B0_ocaml.libname "unix"
+let cmdliner = B0_ocaml.libname "cmdliner"
+
+(* Libraries *)
+
+let uutf_lib =
+ let srcs = Fpath.[`Dir (v "src")] in
+ let requires = [] in
+ B0_ocaml.lib uutf ~doc:"The uutf library" ~srcs ~requires
+
+(* Tests *)
+
+let test =
+ let srcs = Fpath.[`File (v "test/test.ml")] in
+ let meta = B0_meta.(empty |> tag test) in
+ let requires = [ uutf ] in
+ B0_ocaml.exe "test" ~doc:"Test suite" ~srcs ~meta ~requires
+
+let utftrip =
+ let doc = "Recode UTF-{8,16,16LE,16BE} and latin1 from stdin to stdout" in
+ let srcs = Fpath.[`File (v "test/utftrip.ml")] in
+ let requires = [unix; uutf; cmdliner] in
+ B0_ocaml.exe "utftrip" ~doc ~srcs ~requires
+
+(* Packs *)
+
+let default =
+ let meta =
+ let open B0_meta in
+ empty
+ |> tag B0_opam.tag
+ |> add authors ["The uutf programmers"]
+ |> add maintainers ["Daniel Bünzli <daniel.buenzl i@erratique.ch>"]
+ |> add homepage "https://erratique.ch/software/uutf"
+ |> add online_doc "https://erratique.ch/software/uutf/doc/"
+ |> add licenses ["ISC"]
+ |> add repo "git+https://erratique.ch/repos/uutf.git"
+ |> add issues "https://github.com/dbuenzli/uutf/issues"
+ |> add description_tags
+ ["unicode"; "text"; "utf-8"; "utf-16"; "codec"; "org:erratique"]
+ |> add B0_opam.Meta.depopts ["cmdliner", ""]
+ |> add B0_opam.Meta.conflicts
+ [ "cmdliner", {|< "0.9.8"|}]
+ |> add B0_opam.Meta.depends
+ [ "ocaml", {|>= "4.03.0"|};
+ "ocamlfind", {|build|};
+ "ocamlbuild", {|build|};
+ "topkg", {|build & >= "1.0.3"|};
+ ]
+ |> add B0_opam.Meta.build
+ {|[["ocaml" "pkg/pkg.ml" "build" "--dev-pkg" "%{dev}%"
+ "--with-cmdliner" "%{cmdliner:installed}%"]]|}
+ in
+ B0_pack.v "default" ~doc:"uutf package" ~meta ~locked:true @@
+ B0_unit.list ()
diff --git a/BRZO b/BRZO
new file mode 100644
index 0000000..5a47b6b
--- /dev/null
+++ b/BRZO
@@ -0,0 +1 @@
+(srcs-x pkg) \ No newline at end of file
diff --git a/CHANGES.md b/CHANGES.md
index 7a38b6a..50b1554 100644
--- a/CHANGES.md
+++ b/CHANGES.md
@@ -1,3 +1,9 @@
+v1.0.3 2022-02-03
+-----------------
+
+- Support for OCaml 5.00, thanks to Kate (@kit-ty-kate) for
+ the patch.
+
v1.0.2 2019-02-05 La Forclaz (VS)
---------------------------------
diff --git a/LICENSE.md b/LICENSE.md
index 1ccbd72..6f8c261 100644
--- a/LICENSE.md
+++ b/LICENSE.md
@@ -1,4 +1,4 @@
-Copyright (c) 2016 Daniel C. Bünzli
+Copyright (c) 2016 The uutf programmers
Permission to use, copy, modify, and/or distribute this software for any
purpose with or without fee is hereby granted, provided that the above
diff --git a/README.md b/README.md
index 64d451a..2935a89 100644
--- a/README.md
+++ b/README.md
@@ -1,6 +1,6 @@
Uutf — Non-blocking streaming Unicode codec for OCaml
-------------------------------------------------------------------------------
-v1.0.2
+v1.0.3
Uutf is a non-blocking streaming codec to decode and encode the UTF-8,
UTF-16, UTF-16LE and UTF-16BE encoding schemes. It can efficiently
@@ -9,7 +9,8 @@ character position tracking and support newline normalization.
Functions are also provided to fold over the characters of UTF encoded
OCaml string values and to directly encode characters in OCaml
-Buffer.t values.
+Buffer.t values. **Note** that since OCaml 4.14, that functionality
+can be found in the Stdlib and you are encouraged to migrate to it.
Uutf has no dependency and is distributed under the ISC license.
@@ -33,7 +34,7 @@ The documentation and API reference is automatically generated by from
the source interfaces. It can be consulted [online][doc] or via
`odig doc uutf`.
-[doc]: http://erratique.ch/software/uutf/doc/Uutf
+[doc]: http://erratique.ch/software/uutf/doc/
## Sample programs
diff --git a/_tags b/_tags
index 1bfe0bf..974e80f 100644
--- a/_tags
+++ b/_tags
@@ -1,7 +1,7 @@
-true : bin_annot, safe_string, package(bytes), package(uchar)
+true : bin_annot, safe_string
+<_b0> : -traverse
<src> : include
-
<test> : include
<test/utftrip.*> : package(unix), package(cmdliner)
<test/examples.*> : package(unix) \ No newline at end of file
diff --git a/doc/api.odocl b/doc/api.odocl
deleted file mode 100644
index 4b879fc..0000000
--- a/doc/api.odocl
+++ /dev/null
@@ -1 +0,0 @@
-Uutf
diff --git a/doc/index.mld b/doc/index.mld
new file mode 100644
index 0000000..1ba1460
--- /dev/null
+++ b/doc/index.mld
@@ -0,0 +1,17 @@
+{0 Uutf {%html: <span class="version">v1.0.3</span>%}}
+
+Uutf is a non-blocking streaming codec to decode and encode the UTF-8,
+UTF-16, UTF-16LE and UTF-16BE encoding schemes. It can efficiently
+work character by character without blocking on IO. Decoders perform
+character position tracking and support newline normalization.
+
+Functions are also provided to fold over the characters of UTF encoded
+OCaml string values and to directly encode characters in OCaml
+{!Buffer.t} values. {b Note} that since OCaml 4.14, that functionality
+can be found in the Stdlib and you are encouraged to migrate to it.
+
+{1:api API}
+
+{!modules:
+Uutf
+}
diff --git a/opam b/opam
index 3894cc3..42c72cc 100644
--- a/opam
+++ b/opam
@@ -1,23 +1,35 @@
-version: "1.0.2"
+version: "1.0.3"
opam-version: "2.0"
-maintainer: "Daniel Bünzli <daniel.buenzl i@erratique.ch>"
-authors: ["Daniel Bünzli <daniel.buenzl i@erratique.ch>"]
-homepage: "http://erratique.ch/software/uutf"
-doc: "http://erratique.ch/software/uutf/doc/Uutf"
-dev-repo: "git+http://erratique.ch/repos/uutf.git"
+name: "uutf"
+synopsis: """Non-blocking streaming Unicode codec for OCaml"""
+maintainer: ["Daniel Bünzli <daniel.buenzl i@erratique.ch>"]
+authors: ["The uutf programmers"]
+homepage: "https://erratique.ch/software/uutf"
+doc: "https://erratique.ch/software/uutf/doc/"
+dev-repo: "git+https://erratique.ch/repos/uutf.git"
bug-reports: "https://github.com/dbuenzli/uutf/issues"
-tags: [ "unicode" "text" "utf-8" "utf-16" "codec" "org:erratique" ]
-license: "ISC"
-depends: [
- "ocaml" {>= "4.01.0"}
- "ocamlfind" {build}
- "ocamlbuild" {build}
- "topkg" {build}
- "uchar"
-]
+license: ["ISC"]
+tags: ["unicode" "text" "utf-8" "utf-16" "codec" "org:erratique"]
+depends: ["ocaml" {>= "4.03.0"}
+ "ocamlfind" {build}
+ "ocamlbuild" {build}
+ "topkg" {build & >= "1.0.3"}]
depopts: ["cmdliner"]
-conflicts: ["cmdliner" { < "0.9.6"} ]
-build: [[
- "ocaml" "pkg/pkg.ml" "build"
- "--pinned" "%{pinned}%"
- "--with-cmdliner" "%{cmdliner:installed}%" ]] \ No newline at end of file
+conflicts: ["cmdliner" {< "0.9.8"}]
+build: [["ocaml" "pkg/pkg.ml" "build" "--dev-pkg" "%{dev}%"
+ "--with-cmdliner" "%{cmdliner:installed}%"]]
+description: """
+Uutf is a non-blocking streaming codec to decode and encode the UTF-8,
+UTF-16, UTF-16LE and UTF-16BE encoding schemes. It can efficiently
+work character by character without blocking on IO. Decoders perform
+character position tracking and support newline normalization.
+
+Functions are also provided to fold over the characters of UTF encoded
+OCaml string values and to directly encode characters in OCaml
+Buffer.t values. **Note** that since OCaml 4.14, that functionality
+can be found in the Stdlib and you are encouraged to migrate to.
+
+Uutf has no dependency and is distributed under the ISC license.
+
+Home page: http://erratique.ch/software/uutf
+Contact: Daniel Bünzli `<daniel.buenzl i@erratique.ch>`"""
diff --git a/pkg/META b/pkg/META
index adc5f7d..29c73f2 100644
--- a/pkg/META
+++ b/pkg/META
@@ -1,6 +1,6 @@
-version = "1.0.2"
+version = "1.0.3"
description = "Non-blocking streaming Unicode codec for OCaml"
-requires = "bytes uchar"
+requires = ""
archive(byte) = "uutf.cma"
archive(native) = "uutf.cmxa"
plugin(byte) = "uutf.cma"
diff --git a/src/uutf.ml b/src/uutf.ml
index e0fc60c..eafca5f 100644
--- a/src/uutf.ml
+++ b/src/uutf.ml
@@ -1,7 +1,6 @@
(*---------------------------------------------------------------------------
- Copyright (c) 2012 Daniel C. Bünzli. All rights reserved.
+ Copyright (c) 2012 The uutf programmers. All rights reserved.
Distributed under the ISC license, see terms at the end of the file.
- uutf v1.0.2
---------------------------------------------------------------------------*)
let io_buffer_size = 65536 (* IO_BUFFER_SIZE 4.0.0 *)
@@ -31,7 +30,7 @@ let u_rep = Uchar.unsafe_of_int 0xFFFD (* replacement character. *)
type encoding = [ `UTF_8 | `UTF_16 | `UTF_16BE | `UTF_16LE ]
type decoder_encoding = [ encoding | `US_ASCII | `ISO_8859_1 ]
-let encoding_of_string s = match String.uppercase s with (* IANA names. *)
+let encoding_of_string s = match String.uppercase_ascii s with (* IANA names. *)
| "UTF-8" -> Some `UTF_8
| "UTF-16" -> Some `UTF_16
| "UTF-16LE" -> Some `UTF_16LE
@@ -807,7 +806,7 @@ module Buffer = struct
end
(*---------------------------------------------------------------------------
- Copyright (c) 2012 Daniel C. Bünzli
+ Copyright (c) 2012 The uutf programmers
Permission to use, copy, modify, and/or distribute this software for any
purpose with or without fee is hereby granted, provided that the above
diff --git a/src/uutf.mli b/src/uutf.mli
index c63fc19..9c34820 100644
--- a/src/uutf.mli
+++ b/src/uutf.mli
@@ -1,7 +1,6 @@
(*---------------------------------------------------------------------------
- Copyright (c) 2012 Daniel C. Bünzli. All rights reserved.
+ Copyright (c) 2012 The uutf programmers. All rights reserved.
Distributed under the ISC license, see terms at the end of the file.
- uutf v1.0.2
---------------------------------------------------------------------------*)
(** Non-blocking streaming Unicode codec.
@@ -13,32 +12,34 @@
character without blocking on IO. Decoders perform
character position tracking and support {{!nln}newline normalization}.
- Functions are also provided to {{!String} fold over} the
- characters of UTF encoded OCaml string values and to
- {{!Buffer}directly encode} characters in OCaml {!Buffer.t}
- values.
+ Functions are also provided to {{!String} fold over} the characters
+ of UTF encoded OCaml string values and to {{!Buffer}directly encode}
+ characters in OCaml {!Stdlib.Buffer.t} values. {b Note} that since OCaml
+ 4.14, that functionality can be found in {!Stdlib.String} and
+ {!Stdlib.Buffer} and you are encouraged to migrate to it.
See {{:#examples}examples} of use.
- {e v1.0.2 — {{:http://erratique.ch/software/uutf }homepage}}
-
- {3 References}
- {ul
- {- The Unicode Consortium.
- {e {{:http://www.unicode.org/versions/latest}The Unicode Standard}}.
- (latest version)}}
+ {b References}
+ {ul
+ {- The Unicode Consortium.
+ {e {{:http://www.unicode.org/versions/latest}The Unicode Standard}}.
+ (latest version)}}
*)
(** {1:ucharcsts Special Unicode characters} *)
val u_bom : Uchar.t
(** [u_bom] is the {{:http://unicode.org/glossary/#byte_order_mark}byte
- order mark} (BOM) character ([U+FEFF]). *)
+ order mark} (BOM) character ([U+FEFF]). From OCaml 4.06 on, use
+ {!Uchar.bom}. *)
val u_rep : Uchar.t
(** [u_rep] is the
{{:http://unicode.org/glossary/#replacement_character}replacement}
- character ([U+FFFD]). *)
+ character ([U+FFFD]). From OCaml 4.06 on, use
+ {!Uchar.rep}. *)
+
(** {1:schemes Unicode encoding schemes} *)
@@ -121,7 +122,7 @@ val decoder : ?nln:[< nln] -> ?encoding:[< decoder_encoding] -> [< src] ->
can only be [`UTF_8], [`UTF_16BE] or [`UTF_16LE]. The heuristic
looks at the first three bytes of input (or less if impossible)
and takes the {e first} matching byte pattern in the table below.
-{[
+{v
xx = any byte
.. = any byte or no byte (input too small)
pp = positive byte
@@ -137,7 +138,7 @@ pp 00 .. | `UTF_16LE | ASCII UTF-16LE and U+0000 is often forbidden
uu .. .. | `UTF_8 | ASCII UTF-8 or valid UTF-8 first byte.
xx xx .. | `UTF_16BE | Not UTF-8 => UTF-16, no BOM => UTF-16BE
.. .. .. | `UTF_8 | Single malformed UTF-8 byte or no input.
-]}
+v}
This heuristic is compatible both with BOM based
recognitition and
{{:http://tools.ietf.org/html/rfc4627#section-3}JSON-like encoding
@@ -153,12 +154,12 @@ xx xx .. | `UTF_16BE | Not UTF-8 => UTF-16, no BOM => UTF-16BE
and character count of the last decoded character (including
[`Malformed] ones) are respectively returned by {!decoder_line},
{!decoder_col}, {!decoder_byte_count} and {!decoder_count}. Before
- the first call to {!decode} the line number is [1] and the column
- is [0]. Each {!decode} returning [`Uchar] or [`Malformed]
+ the first call to {!val-decode} the line number is [1] and the column
+ is [0]. Each {!val-decode} returning [`Uchar] or [`Malformed]
increments the column until a newline. On a newline, the line
number is incremented and the column set to zero. For example the
line is [2] and column [0] after the first newline was
- decoded. This can be understood as if {!decode} was moving an
+ decoded. This can be understood as if {!val-decode} was moving an
insertion point to the right in the data. A {e newline} is
anything normalized by [`Readline], see {!nln}.
@@ -205,7 +206,7 @@ val set_decoder_encoding : decoder -> [< decoder_encoding] -> unit
(** [set_decoder_encoding d enc] changes the decoded encoding
to [enc] after decoding started.
- {b Warning.} Call only after {!decode} was called on [d] and that the
+ {b Warning.} Call only after {!val-decode} was called on [d] and that the
last call to it returned something different from [`Await] or data may
be lost. After encoding guess wait for at least three [`Uchar]s. *)
@@ -213,25 +214,25 @@ val set_decoder_encoding : decoder -> [< decoder_encoding] -> unit
val decoder_line : decoder -> int
(** [decoder_line d] is the line number of the last
- decoded (or malformed) character. See {!decoder} for details. *)
+ decoded (or malformed) character. See {!val-decoder} for details. *)
val decoder_col : decoder -> int
(** [decoder_col d] is the column number of the last decoded
- (or malformed) character. See {!decoder} for details. *)
+ (or malformed) character. See {!val-decoder} for details. *)
val decoder_byte_count : decoder -> int
(** [decoder_byte_count d] is the number of bytes already decoded on
- [d] (including malformed ones). This is the last {!decode}'s
+ [d] (including malformed ones). This is the last {!val-decode}'s
end byte offset counting from the beginning of the stream. *)
val decoder_count : decoder -> int
(** [decoder_count d] is the number of characters already decoded on [d]
- (including malformed ones). See {!decoder} for details. *)
+ (including malformed ones). See {!val-decoder} for details. *)
val decoder_removed_bom : decoder -> bool
(** [decoder_removed_bom d] is [true] iff an {e initial}
{{:http://unicode.org/glossary/#byte_order_mark}BOM} was
- removed from the input stream. See {!decoder} for details. *)
+ removed from the input stream. See {!val-decoder} for details. *)
val decoder_src : decoder -> src
(** [decoder_src d] is [d]'s input source. *)
@@ -267,7 +268,7 @@ val encode :
{ul
{- [`Partial] iff [e] has a [`Manual] destination and needs more output
storage. The client must use {!Manual.dst} to provide a new buffer
- and then call {!encode} with [`Await] until [`Ok] is returned.}
+ and then call {!val-encode} with [`Await] until [`Ok] is returned.}
{- [`Ok] when the encoder is ready to encode a new [`Uchar] or [`End]}}
For [`Manual] destination, encoding [`End] always returns
@@ -293,15 +294,15 @@ val encoder_dst : encoder -> dst
module Manual : sig
val src : decoder -> Bytes.t -> int -> int -> unit
(** [src d s j l] provides [d] with [l] bytes to read, starting at
- [j] in [s]. This byte range is read by calls to {!decode} with [d]
+ [j] in [s]. This byte range is read by calls to {!val-decode} with [d]
until [`Await] is returned. To signal the end of input call the function
with [l = 0]. *)
val dst : encoder -> Bytes.t -> int -> int -> unit
(** [dst e s j l] provides [e] with [l] bytes to write, starting
- at [j] in [s]. This byte range is written by calls to {!encode} with [e]
- until [`Partial] is returned. Use {!dst_rem} to know the remaining
- number of non-written free bytes in [s]. *)
+ at [j] in [s]. This byte range is written by calls to
+ {!val-encode} with [e] until [`Partial] is returned. Use {!dst_rem} to
+ know the remaining number of non-written free bytes in [s]. *)
val dst_rem : encoder -> int
(** [dst_rem e] is the remaining number of non-written, free bytes
@@ -310,7 +311,10 @@ end
(** {1:strbuf String folders and Buffer encoders} *)
-(** Fold over the characters of UTF encoded OCaml [string] values. *)
+(** Fold over the characters of UTF encoded OCaml [string] values.
+
+ {b Note.} Since OCaml 4.14, UTF decoders are available in
+ {!Stdlib.String}. You are encouraged to migrate to them. *)
module String : sig
(** {1 Encoding guess} *)
@@ -358,7 +362,10 @@ module String : sig
[String.length s - pos]. *)
end
-(** UTF encode characters in OCaml {!Buffer.t} values. *)
+(** UTF encode characters in OCaml {!Buffer.t} values.
+
+ {b Note.} Since OCaml 4.06, these encoders are available in
+ {!Stdlib.Buffer}. You are encouraged to migrate to them. *)
module Buffer : sig
(** {1 Buffer encoders} *)
@@ -487,7 +494,7 @@ end
*)
(*---------------------------------------------------------------------------
- Copyright (c) 2012 Daniel C. Bünzli
+ Copyright (c) 2012 The uutf programmers
Permission to use, copy, modify, and/or distribute this software for any
purpose with or without fee is hereby granted, provided that the above
diff --git a/test/test.ml b/test/test.ml
index 57be950..0f622a6 100644
--- a/test/test.ml
+++ b/test/test.ml
@@ -1,7 +1,6 @@
(*---------------------------------------------------------------------------
- Copyright (c) 2012 Daniel C. Bünzli. All rights reserved.
+ Copyright (c) 2012 The uutf programmers. All rights reserved.
Distributed under the ISC license, see terms at the end of the file.
- uutf v1.0.2
---------------------------------------------------------------------------*)
let u_nl = Uchar.of_int 0x000A
@@ -377,7 +376,7 @@ let test () =
let () = if not (!Sys.interactive) then test ()
(*---------------------------------------------------------------------------
- Copyright (c) 2012 Daniel C. Bünzli
+ Copyright (c) 2012 The uutf programmers
Permission to use, copy, modify, and/or distribute this software for any
purpose with or without fee is hereby granted, provided that the above
diff --git a/test/tests.itarget b/test/tests.itarget
deleted file mode 100644
index 7b23501..0000000
--- a/test/tests.itarget
+++ /dev/null
@@ -1,3 +0,0 @@
-test.native
-examples.native
-utftrip.native \ No newline at end of file
diff --git a/test/utftrip.ml b/test/utftrip.ml
index 714af74..627bf0c 100644
--- a/test/utftrip.ml
+++ b/test/utftrip.ml
@@ -1,7 +1,6 @@
(*---------------------------------------------------------------------------
- Copyright (c) 2012 Daniel C. Bünzli. All rights reserved.
+ Copyright (c) 2012 The uutf programmers. All rights reserved.
Distributed under the ISC license, see terms at the end of the file.
- uutf v1.0.2
---------------------------------------------------------------------------*)
let str = Printf.sprintf
@@ -385,14 +384,14 @@ let cmd =
in
Term.(pure do_cmd $ cmd $ file $ sin $ sout $ use_unix $ usize $
ienc $ oenc $ nln $ rseed $ rcount),
- Term.info "utftrip" ~version:"v1.0.2" ~doc ~man
+ Term.info "utftrip" ~version:"v1.0.3" ~doc ~man
let () = match Term.eval cmd with
| `Error _ -> exit 1
| _ -> if !input_malformed then exit 2 else exit 0
(*---------------------------------------------------------------------------
- Copyright (c) 2012 Daniel C. Bünzli
+ Copyright (c) 2012 The uutf programmers
Permission to use, copy, modify, and/or distribute this software for any
purpose with or without fee is hereby granted, provided that the above