diff options
Diffstat (limited to 'src/lib/sedlexing.mli')
-rw-r--r-- | src/lib/sedlexing.mli | 66 |
1 files changed, 52 insertions, 14 deletions
diff --git a/src/lib/sedlexing.mli b/src/lib/sedlexing.mli index 9df2f17..b129ff1 100644 --- a/src/lib/sedlexing.mli +++ b/src/lib/sedlexing.mli @@ -45,27 +45,37 @@ exception MalFormed Uchars [a], a position [pos] and a code point count [n]. The function should put [n] code points or less in [a], starting at position [pos], and return the number of characters provided. A - return value of 0 means end of input. *) -val create : (Uchar.t array -> int -> int -> int) -> lexbuf - -(** set the initial tracked input position for [lexbuf]. - If set to [Lexing.dummy_pos], Sedlexing will not track position - information for you. *) -val set_position : lexbuf -> Lexing.position -> unit + return value of 0 means end of input. [bytes_per_char] argument is + optional. If unspecified, byte positions are the same as code point + position. *) +val create : + ?bytes_per_char:(Uchar.t -> int) -> + (Uchar.t array -> int -> int -> int) -> + lexbuf + +(** set the initial tracked input position, in code point, for [lexbuf]. + If unspecified, byte postion is set to the same value as code + point position. *) +val set_position : + ?bytes_position:Lexing.position -> lexbuf -> Lexing.position -> unit (** [set_filename lexbuf file] sets the filename to [file] in [lexbuf]. It also sets the {!Lexing.pos_fname} field in returned {!Lexing.position} records. *) val set_filename : lexbuf -> string -> unit -(** Create a lexbuf from a stream of Unicode code points. *) -val from_gen : Uchar.t Gen.t -> lexbuf +(** Create a lexbuf from a stream of Unicode code points. [bytes_per_char] is + optional. If unspecified, byte positions are the same as code point positions. *) +val from_gen : ?bytes_per_char:(Uchar.t -> int) -> Uchar.t Gen.t -> lexbuf -(** Create a lexbuf from an array of Unicode code points. *) -val from_int_array : int array -> lexbuf +(** Create a lexbuf from an array of Unicode code points. [bytes_per_char] is + optional. If unspecified, byte positions are the same as code point positions. *) +val from_int_array : ?bytes_per_char:(Uchar.t -> int) -> int array -> lexbuf -(** Create a lexbuf from an array of Unicode code points. *) -val from_uchar_array : Uchar.t array -> lexbuf +(** Create a lexbuf from an array of Unicode code points. [bytes_per_char] is + optional. If unspecified, byte positions are the same as code point positions. *) +val from_uchar_array : + ?bytes_per_char:(Uchar.t -> int) -> Uchar.t array -> lexbuf (** {6 Interface for lexers semantic actions} *) @@ -78,29 +88,57 @@ val from_uchar_array : Uchar.t array -> lexbuf The first code point of the stream has offset 0. *) val lexeme_start : lexbuf -> int +(** [Sedlexing.lexeme_start lexbuf] returns the offset in the + input stream of the first byte of the matched string. + The first code point of the stream has offset 0. *) +val lexeme_bytes_start : lexbuf -> int + (** [Sedlexing.lexeme_end lexbuf] returns the offset in the input stream of the character following the last code point of the matched string. The first character of the stream has offset 0. *) val lexeme_end : lexbuf -> int +(** [Sedlexing.lexeme_end lexbuf] returns the offset in the input + stream of the byte following the last code point of the + matched string. The first character of the stream has offset + 0. *) +val lexeme_bytes_end : lexbuf -> int + (** [Sedlexing.loc lexbuf] returns the pair [(Sedlexing.lexeme_start lexbuf,Sedlexing.lexeme_end lexbuf)]. *) val loc : lexbuf -> int * int +(** [Sedlexing.bytes_loc lexbuf] returns the pair + [(Sedlexing.lexeme_bytes_start lexbuf,Sedlexing.lexeme_bytes_end + lexbuf)]. *) +val bytes_loc : lexbuf -> int * int + (** [Sedlexing.lexeme_length lexbuf] returns the difference [(Sedlexing.lexeme_end lexbuf) - (Sedlexing.lexeme_start lexbuf)], that is, the length (in code points) of the matched string. *) val lexeme_length : lexbuf -> int +(** [Sedlexing.lexeme_length lexbuf] returns the difference + [(Sedlexing.lexeme_bytes_end lexbuf) - (Sedlexing.lexeme_bytes_start + lexbuf)], that is, the length (in bytes) of the matched + string. *) +val lexeme_bytes_length : lexbuf -> int + (** [Sedlexing.lexing_positions lexbuf] returns the start and end - positions of the current token, using a record of type + positions, in code points, of the current token, using a record of type [Lexing.position]. This is intended for consumption by parsers like those generated by [Menhir]. *) val lexing_positions : lexbuf -> Lexing.position * Lexing.position +(** [Sedlexing.lexing_bytes_positions lexbuf] returns the start and end + positions, in bytes, of the current token, using a record of type + [Lexing.position]. This is intended for consumption + by parsers like those generated by [Menhir]. *) +val lexing_bytes_positions : lexbuf -> Lexing.position * Lexing.position + (** [Sedlexing.new_line lexbuf] increments the line count and sets the beginning of line to the current position, as though a newline character had been encountered in the input. *) |