summaryrefslogtreecommitdiff
path: root/src/lib/sedlexing.mli
diff options
context:
space:
mode:
Diffstat (limited to 'src/lib/sedlexing.mli')
-rw-r--r--src/lib/sedlexing.mli66
1 files changed, 52 insertions, 14 deletions
diff --git a/src/lib/sedlexing.mli b/src/lib/sedlexing.mli
index 9df2f17..b129ff1 100644
--- a/src/lib/sedlexing.mli
+++ b/src/lib/sedlexing.mli
@@ -45,27 +45,37 @@ exception MalFormed
Uchars [a], a position [pos] and a code point count [n]. The
function should put [n] code points or less in [a], starting at
position [pos], and return the number of characters provided. A
- return value of 0 means end of input. *)
-val create : (Uchar.t array -> int -> int -> int) -> lexbuf
-
-(** set the initial tracked input position for [lexbuf].
- If set to [Lexing.dummy_pos], Sedlexing will not track position
- information for you. *)
-val set_position : lexbuf -> Lexing.position -> unit
+ return value of 0 means end of input. [bytes_per_char] argument is
+ optional. If unspecified, byte positions are the same as code point
+ position. *)
+val create :
+ ?bytes_per_char:(Uchar.t -> int) ->
+ (Uchar.t array -> int -> int -> int) ->
+ lexbuf
+
+(** set the initial tracked input position, in code point, for [lexbuf].
+ If unspecified, byte postion is set to the same value as code
+ point position. *)
+val set_position :
+ ?bytes_position:Lexing.position -> lexbuf -> Lexing.position -> unit
(** [set_filename lexbuf file] sets the filename to [file] in
[lexbuf]. It also sets the {!Lexing.pos_fname} field in
returned {!Lexing.position} records. *)
val set_filename : lexbuf -> string -> unit
-(** Create a lexbuf from a stream of Unicode code points. *)
-val from_gen : Uchar.t Gen.t -> lexbuf
+(** Create a lexbuf from a stream of Unicode code points. [bytes_per_char] is
+ optional. If unspecified, byte positions are the same as code point positions. *)
+val from_gen : ?bytes_per_char:(Uchar.t -> int) -> Uchar.t Gen.t -> lexbuf
-(** Create a lexbuf from an array of Unicode code points. *)
-val from_int_array : int array -> lexbuf
+(** Create a lexbuf from an array of Unicode code points. [bytes_per_char] is
+ optional. If unspecified, byte positions are the same as code point positions. *)
+val from_int_array : ?bytes_per_char:(Uchar.t -> int) -> int array -> lexbuf
-(** Create a lexbuf from an array of Unicode code points. *)
-val from_uchar_array : Uchar.t array -> lexbuf
+(** Create a lexbuf from an array of Unicode code points. [bytes_per_char] is
+ optional. If unspecified, byte positions are the same as code point positions. *)
+val from_uchar_array :
+ ?bytes_per_char:(Uchar.t -> int) -> Uchar.t array -> lexbuf
(** {6 Interface for lexers semantic actions} *)
@@ -78,29 +88,57 @@ val from_uchar_array : Uchar.t array -> lexbuf
The first code point of the stream has offset 0. *)
val lexeme_start : lexbuf -> int
+(** [Sedlexing.lexeme_start lexbuf] returns the offset in the
+ input stream of the first byte of the matched string.
+ The first code point of the stream has offset 0. *)
+val lexeme_bytes_start : lexbuf -> int
+
(** [Sedlexing.lexeme_end lexbuf] returns the offset in the input
stream of the character following the last code point of the
matched string. The first character of the stream has offset
0. *)
val lexeme_end : lexbuf -> int
+(** [Sedlexing.lexeme_end lexbuf] returns the offset in the input
+ stream of the byte following the last code point of the
+ matched string. The first character of the stream has offset
+ 0. *)
+val lexeme_bytes_end : lexbuf -> int
+
(** [Sedlexing.loc lexbuf] returns the pair
[(Sedlexing.lexeme_start lexbuf,Sedlexing.lexeme_end
lexbuf)]. *)
val loc : lexbuf -> int * int
+(** [Sedlexing.bytes_loc lexbuf] returns the pair
+ [(Sedlexing.lexeme_bytes_start lexbuf,Sedlexing.lexeme_bytes_end
+ lexbuf)]. *)
+val bytes_loc : lexbuf -> int * int
+
(** [Sedlexing.lexeme_length lexbuf] returns the difference
[(Sedlexing.lexeme_end lexbuf) - (Sedlexing.lexeme_start
lexbuf)], that is, the length (in code points) of the matched
string. *)
val lexeme_length : lexbuf -> int
+(** [Sedlexing.lexeme_length lexbuf] returns the difference
+ [(Sedlexing.lexeme_bytes_end lexbuf) - (Sedlexing.lexeme_bytes_start
+ lexbuf)], that is, the length (in bytes) of the matched
+ string. *)
+val lexeme_bytes_length : lexbuf -> int
+
(** [Sedlexing.lexing_positions lexbuf] returns the start and end
- positions of the current token, using a record of type
+ positions, in code points, of the current token, using a record of type
[Lexing.position]. This is intended for consumption
by parsers like those generated by [Menhir]. *)
val lexing_positions : lexbuf -> Lexing.position * Lexing.position
+(** [Sedlexing.lexing_bytes_positions lexbuf] returns the start and end
+ positions, in bytes, of the current token, using a record of type
+ [Lexing.position]. This is intended for consumption
+ by parsers like those generated by [Menhir]. *)
+val lexing_bytes_positions : lexbuf -> Lexing.position * Lexing.position
+
(** [Sedlexing.new_line lexbuf] increments the line count and
sets the beginning of line to the current position, as though
a newline character had been encountered in the input. *)