diff options
-rw-r--r-- | morsmall.opam | 9 | ||||
-rw-r--r-- | src/AST.ml | 5 | ||||
-rw-r--r-- | src/CST_to_AST.ml | 105 | ||||
-rw-r--r-- | src/Makefile | 2 | ||||
-rw-r--r-- | src/dune | 4 | ||||
-rw-r--r-- | src/ext/extFormat.ml | 40 | ||||
-rw-r--r-- | src/ext/extPervasives.ml | 23 | ||||
-rw-r--r-- | src/morsmall.ml | 5 | ||||
-rw-r--r-- | src/morsmall.mli | 1 | ||||
-rw-r--r-- | src/safePrinter.ml | 8 | ||||
-rw-r--r-- | src/utilities/dune | 4 | ||||
-rw-r--r-- | src/utilities/testParser.ml | 244 | ||||
-rw-r--r-- | src/utilities/testParser.mli | 37 | ||||
-rw-r--r-- | tests/generator.ml | 6 |
14 files changed, 360 insertions, 133 deletions
diff --git a/morsmall.opam b/morsmall.opam index f89f1ee..9eb45f7 100644 --- a/morsmall.opam +++ b/morsmall.opam @@ -6,7 +6,7 @@ description: """ A concise AST for POSIX shell """ -version: "0.1" +version: "0.2.0" license: "GPL3" maintainer: "Nicolas Jeannerod <nicolas.jeannerod@irif.fr>" @@ -17,9 +17,10 @@ bug-reports: "https://github.com/colis-anr/morsmall/issues" dev-repo: "git+ssh://git@github.com/colis-anr/morsmall.git" depends: [ - "dune" {build} - "morbig" - "ocaml" {build & >= "4.04"} + "dune" {build} + "morbig" {>= "0.10.0"} + "ocaml" {>= "4.04"} + "ppx_deriving" {build} ] build: [ @@ -33,6 +33,7 @@ and character_range = char list and attribute = | NoAttribute + | ParameterLength of word | UseDefaultValues of word | AssignDefaultValues of word | IndicateErrorifNullorUnset of word @@ -47,11 +48,9 @@ and word_component = | DoubleQuoted of word | Variable of name * attribute | Subshell of program - | Name of string (* FIXME: do we really want that? *) - | Assignment of assignment (* and that? *) | GlobAll | GlobAny - | GlobRange of character_range + | BracketExpression of (Morbig.CST.bracket_expression [@equal (=)] [@opaque]) and word = word_component list and word' = word located diff --git a/src/CST_to_AST.ml b/src/CST_to_AST.ml index d97da95..001ae71 100644 --- a/src/CST_to_AST.ml +++ b/src/CST_to_AST.ml @@ -756,76 +756,85 @@ and word_double_quoted__to__word (Word (_, word_cst)) = (* CST.word_cst -> AST.word *) and word_cst__to__word (word_cst : word_cst) : AST.word = - List.map word_component__to__word_component word_cst + List.map word_component__to__word word_cst + |> List.flatten and word_cst_double_quoted__to__word (word_cst : word_cst) : AST.word = - List.map word_component_double_quoted__to__word_component word_cst + List.map word_component_double_quoted__to__word word_cst + |> List.flatten (* CST.word_component -> AST.word_component *) -and word_component__to__word_component = function - | WordSubshell (_, program') -> - AST.Subshell (program'__to__program program') +and word_component__to__word = function + | WordEmpty -> + [] | WordName name -> - AST.Name name (* FIXME: literal? *) - | WordAssignmentWord assignment_word -> - AST.Assignment (assignment_word__to__assignment assignment_word) - | WordDoubleQuoted word -> - AST.DoubleQuoted (word_double_quoted__to__word word) + [AST.Literal name] + | WordLiteral literal -> + [AST.Literal literal] + | WordAssignmentWord (Name name, Word (_, word_cst)) -> + [AST.Literal name; + AST.Literal "="] + @ word_cst__to__word word_cst | WordSingleQuoted (Word (_, [WordLiteral literal])) -> - AST.Literal literal + [AST.Literal literal] | WordSingleQuoted (Word (_, [])) -> - AST.Literal "" + [AST.Literal ""] | WordSingleQuoted _ -> - assert false - | WordLiteral literal -> - AST.Literal literal + assert false + | WordSubshell (_, program') -> + [AST.Subshell (program'__to__program program')] + | WordDoubleQuoted word -> + [AST.DoubleQuoted (word_double_quoted__to__word word)] | WordVariable (VariableAtom (name, variable_attribute)) -> - AST.Variable (name, variable_attribute__to__attribute variable_attribute) + [AST.Variable (name, variable_attribute__to__attribute variable_attribute)] | WordGlobAll -> - AST.GlobAll + [AST.GlobAll] | WordGlobAny -> - AST.GlobAny - | WordGlobRange (Range char_list) -> - AST.GlobRange char_list - | WordOther | WordEmpty -> - assert false - -and word_component_double_quoted__to__word_component = function + [AST.GlobAny] + | WordReBracketExpression bracket_expression -> + [AST.BracketExpression bracket_expression] + +and word_component_double_quoted__to__word = function + | WordEmpty -> + [] + | WordName literal | WordLiteral literal -> + [AST.Literal literal] | WordSubshell (_, program') -> - AST.Subshell (program'__to__program program') - | WordName name -> - AST.Name name (* FIXME: literal? *) - | WordAssignmentWord assignment_word -> - AST.Assignment (assignment_word__to__assignment assignment_word) - | WordLiteral literal -> - AST.Literal literal + [AST.Subshell (program'__to__program program')] + | WordAssignmentWord (Name name, Word (_, word_cst)) -> + [AST.Literal name; + AST.Literal "="] + @ word_cst_double_quoted__to__word word_cst | WordVariable (VariableAtom (name, variable_attribute)) -> - AST.Variable (name, variable_attribute__to__attribute variable_attribute) + [AST.Variable (name, variable_attribute__to__attribute variable_attribute)] + | WordReBracketExpression bracket_expression -> + [AST.BracketExpression bracket_expression] | WordDoubleQuoted _ | WordSingleQuoted _ - | WordGlobAll | WordGlobAny | WordGlobRange _ - | WordOther | WordEmpty -> - assert false + | WordGlobAll | WordGlobAny -> + assert false and variable_attribute__to__attribute = function | NoAttribute -> - AST.NoAttribute - | UseDefaultValues word -> - AST.UseDefaultValues (word__to__word word) - | AssignDefaultValues word -> - AST.AssignDefaultValues (word__to__word word) - | IndicateErrorifNullorUnset word -> - AST.IndicateErrorifNullorUnset (word__to__word word) - | UseAlternativeValue word -> - AST.UseAlternativeValue (word__to__word word) + AST.NoAttribute + | ParameterLength word -> + AST.ParameterLength (word__to__word word) + | UseDefaultValues (_, word) -> + AST.UseDefaultValues (word__to__word word) + | AssignDefaultValues (_, word) -> + AST.AssignDefaultValues (word__to__word word) + | IndicateErrorifNullorUnset (_, word) -> + AST.IndicateErrorifNullorUnset (word__to__word word) + | UseAlternativeValue (_, word) -> + AST.UseAlternativeValue (word__to__word word) | RemoveSmallestSuffixPattern word -> - AST.RemoveSmallestSuffixPattern (word__to__word word) + AST.RemoveSmallestSuffixPattern (word__to__word word) | RemoveLargestSuffixPattern word -> - AST.RemoveLargestSuffixPattern (word__to__word word) + AST.RemoveLargestSuffixPattern (word__to__word word) | RemoveSmallestPrefixPattern word -> - AST.RemoveSmallestPrefixPattern (word__to__word word) + AST.RemoveSmallestPrefixPattern (word__to__word word) | RemoveLargestPrefixPattern word -> - AST.RemoveLargestPrefixPattern (word__to__word word) + AST.RemoveLargestPrefixPattern (word__to__word word) (* CST.name -> AST.name *) diff --git a/src/Makefile b/src/Makefile index 6891285..a83741e 100644 --- a/src/Makefile +++ b/src/Makefile @@ -1,4 +1,4 @@ default: headers: - headache -h .header *.ml *.mli + headache -h .header *.ml *.mli utilities/*.ml utilities/*.mli @@ -1,8 +1,6 @@ -(copy_files ext/*) - (library (name morsmall) (public_name morsmall) (preprocess (pps ppx_deriving.std)) - (libraries morbig) + (libraries morbig morsmall.utilities) (flags :standard -w +A-4-30-42)) diff --git a/src/ext/extFormat.ml b/src/ext/extFormat.ml deleted file mode 100644 index c58df51..0000000 --- a/src/ext/extFormat.ml +++ /dev/null @@ -1,40 +0,0 @@ -(******************************************************************************) -(* *) -(* RSCLI *) -(* *) -(* A command-line interface for RSCDS tunes *) -(* Copyright (C) 2017 Nicolas "Niols" Jeannerod *) -(* *) -(* This program is free software: you can redistribute it and/or modify *) -(* it under the terms of the GNU General Public License as published by *) -(* the Free Software Foundation, either version 3 of the License, or *) -(* (at your option) any later version. *) -(* *) -(* This program is distributed in the hope that it will be useful, *) -(* but WITHOUT ANY WARRANTY; without even the implied warranty of *) -(* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *) -(* GNU General Public License for more details. *) -(* *) -(* You should have received a copy of the GNU General Public License *) -(* along with this program. If not, see <http://www.gnu.org/licenses/>. *) -(* *) -(******************************************************************************) - -type 'a fprintf = Format.formatter -> 'a -> unit - -let to_string__of__fprintf fprintf value = - let buf = Buffer.create 16 in - let ppf = Format.formatter_of_buffer buf in - fprintf ppf value; - Format.fprintf ppf "@?"; - Buffer.contents buf - -let to_channel__of__fprintf fprintf channel value = - let ppf = Format.formatter_of_out_channel channel in - fprintf ppf value; - Format.fprintf ppf "@?" - -let to_file__of__fprintf fprintf filename value = - let oc = open_out filename in - to_channel__of__fprintf fprintf oc value; - close_out oc diff --git a/src/ext/extPervasives.ml b/src/ext/extPervasives.ml deleted file mode 100644 index 3c1ff7b..0000000 --- a/src/ext/extPervasives.ml +++ /dev/null @@ -1,23 +0,0 @@ -(******************************************************************************) -(* *) -(* Morsmall *) -(* A concise AST for POSIX shell *) -(* *) -(* Copyright (C) 2017 Yann Régis-Gianas, Ralf Treinen, Nicolas Jeannerod *) -(* *) -(* This program is free software: you can redistribute it and/or modify *) -(* it under the terms of the GNU General Public License as published by *) -(* the Free Software Foundation, either version 3 of the License, or *) -(* (at your option) any later version. *) -(* *) -(* This program is distributed in the hope that it will be useful, *) -(* but WITHOUT ANY WARRANTY; without even the implied warranty of *) -(* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *) -(* GNU General Public License for more details. *) -(* *) -(* You should have received a copy of the GNU General Public License *) -(* along with this program. If not, see <http://www.gnu.org/licenses/>. *) -(* *) -(******************************************************************************) - -let (||>) f g x = f x |> g diff --git a/src/morsmall.ml b/src/morsmall.ml index 444b9c8..6200005 100644 --- a/src/morsmall.ml +++ b/src/morsmall.ml @@ -22,10 +22,10 @@ exception SyntaxError of Location.lexing_position let parse_file filename = - let open Morbig.API in + let open Morbig in ( try - Morbig.API.parse_file filename + Morbig.parse_file filename with | Errors.DuringParsing position | Errors.DuringLexing (position, _) -> @@ -43,3 +43,4 @@ module AST = AST module Location = Location module SafePrinter = SafePrinter module CST_to_AST = CST_to_AST +module Utilities = Morsmall_utilities diff --git a/src/morsmall.mli b/src/morsmall.mli index 1420e7b..c216bad 100644 --- a/src/morsmall.mli +++ b/src/morsmall.mli @@ -43,3 +43,4 @@ val pp_print_debug : Format.formatter -> AST.program -> unit module Location = Location module SafePrinter = SafePrinter module CST_to_AST = CST_to_AST +module Utilities = Morsmall_utilities diff --git a/src/safePrinter.ml b/src/safePrinter.ml index 59b1ff0..1d5ac6d 100644 --- a/src/safePrinter.ml +++ b/src/safePrinter.ml @@ -39,15 +39,11 @@ and pp_word_component ppf = function (*FIXME*) fpf ppf "${%s}" variable | Subshell command_list -> fpf ppf "$(%a)" pp_command'_list command_list - | Name name -> - fpf ppf "%s" name - | Assignment assignment -> - pp_assignment ppf assignment | GlobAll -> fpf ppf "*" | GlobAny -> fpf ppf "?" - | GlobRange _char_range -> + | BracketExpression _bracket_expression -> assert false (* AST.word *) @@ -91,7 +87,7 @@ and pp_pattern ppf = function and pp_pattern' ppf pattern' = Location.on_located (pp_pattern ppf) pattern' - + (* AST.assignement *) and pp_assignment ppf (variable, word) = diff --git a/src/utilities/dune b/src/utilities/dune new file mode 100644 index 0000000..ce2c816 --- /dev/null +++ b/src/utilities/dune @@ -0,0 +1,4 @@ +(library + (name morsmall_utilities) + (public_name morsmall.utilities) + (libraries morbig)) diff --git a/src/utilities/testParser.ml b/src/utilities/testParser.ml new file mode 100644 index 0000000..deae0bd --- /dev/null +++ b/src/utilities/testParser.ml @@ -0,0 +1,244 @@ +(***************************************************************************) +(* Morsmall *) +(* A concise AST for POSIX shell *) +(* *) +(* Copyright (C) 2017,2018,2019 Yann Régis-Gianas, Ralf Treinen, *) +(* Nicolas Jeannerod *) +(* *) +(* This program is free software: you can redistribute it and/or modify *) +(* it under the terms of the GNU General Public License as published by *) +(* the Free Software Foundation, either version 3 of the License, or *) +(* (at your option) any later version. *) +(* *) +(* This program is distributed in the hope that it will be useful, *) +(* but WITHOUT ANY WARRANTY; without even the implied warranty of *) +(* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *) +(* GNU General Public License for more details. *) +(* *) +(* You should have received a copy of the GNU General Public License *) +(* along with this program. If not, see <http://www.gnu.org/licenses/>. *) +(***************************************************************************) + +(** abstract syntax of test expressions *) + +type expression = + | And of expression * expression + | Or of expression * expression + | Not of expression + | Binary of string * string * string (* (op,arg_left,arg_right) *) + | Unary of string * string (* (op,arg) *) + | Single of string (* arg *) + +exception Parse_error + +type token = + | UnOp of string (* unary operators -e, -f, etc. *) + | BinOp of string (* binary operators -eq, =, etc. *) + | AndOp (* -a *) + | OrOp (* -o *) + | NotOp (* ! *) + | ParL (* ( *) + | ParR (* ) *) + | BracketR (* ] *) + | String of string (* all the rest *) + | EOF + +let to_token s = match s with + (* file existence and type *) + | "-e" | "-d" | "-f" | "-b" | "-c" | "-h" | "-L" | "-p" | "-S" -> UnOp s + (* file attributes *) + | "-g" | "-u" | "-s" | "-r" | "-w" | "-x" -> UnOp s + (* GNU extension on files *) + | "-G" | "-O" | "-k" -> UnOp s + (* GNU extension on files *) + | "-nt" | "-ot" | "-ef" -> BinOp s + (* unary operators on strings *) + | "-n" | "-z" -> UnOp s + (* binary operators on strings *) + | "=" | "!=" -> BinOp s + (* binary operators on integers *) + | "-eq" | "-ne" | "-gt" | "-ge" | "-lt" | "-le" -> BinOp s + (* unary operator on file descriptor *) + | "-t" -> UnOp s + | "-a" -> AndOp + | "-o" -> OrOp + | "(" -> ParL + | ")" -> ParR + | "]" -> BracketR + | "!" -> NotOp + | _ -> String s + +let parse ?(bracket=false) wl = + let tokenbuf = + wl + |> List.map Morbig.remove_quotes + |> List.map to_token + |> ref + in + let lookup () = + match !tokenbuf with + | h::_ -> h + | [] -> EOF + in + let pop () = + match !tokenbuf with + | _::r -> tokenbuf := r + | [] -> assert false + in + + let rec parse_S () = + let exp = parse_S' () in + if bracket then + if lookup () = BracketR then + pop () + else + raise Parse_error; + if lookup () = EOF then + exp + else + raise Parse_error + + and parse_S' () = + match lookup () with + | EOF | BracketR -> None + | _ -> Some (parse_disj ()) + + and parse_disj () = + let head = parse_conj () in + match parse_disj' () with + | None -> head + | Some rest -> Or (head,rest) + + and parse_disj' () = + match lookup () with + | EOF | BracketR | ParR -> None + | OrOp -> pop (); Some (parse_disj ()) + | _ -> raise Parse_error + + and parse_conj () = + let head = parse_literal () in + match parse_conj' () with + | None -> head + | Some rest -> And (head, rest) + + and parse_conj' () = + match lookup () with + | OrOp | EOF | BracketR | ParR -> None + | AndOp -> pop (); Some (parse_conj ()) + | _ -> raise Parse_error + + and parse_literal () = + match lookup () with + | NotOp -> pop (); Not (parse_atom ()) + | UnOp _ | ParL | String _ -> parse_atom () + | _ -> raise Parse_error + + and parse_atom () = + match lookup () with + | UnOp op -> + pop (); + (match lookup () with + | String s -> pop (); Unary (op,s) + | _ -> raise Parse_error) + | ParL -> + pop (); + let exp = parse_disj () in + (match lookup () with + | ParR -> pop (); exp + | _ -> raise Parse_error) + | String s -> + pop (); + (match parse_atom' () with + | None -> Single s + | Some (binop,rightarg) -> Binary (binop,s,rightarg)) + | _ -> raise Parse_error + + and parse_atom' () = + match lookup () with + | AndOp | OrOp | EOF | BracketR -> None + | BinOp binop -> + pop (); + (match lookup () with + | String rightarg | UnOp rightarg | BinOp rightarg + -> pop (); Some (binop,rightarg) + | _ -> raise Parse_error) + | _ -> raise Parse_error + in + + parse_S () + + +(* + +grammar of test expressions: + +<S> -> EOF | <disj> EOF +<disj> -> <conj> | <conj> -o <disj> +<conj> -> <literal> | <literal> -a <conj> +<literal> -> <atom> | ! <atom> +<atom> -> string | unop string | string binop string | ( <disj> ) + +grammar in LL(1): + +<S> -> <S'> EOF +<S'> -> EPSILON | <disj> +<disj> -> <conj> <disj'> +<disj'> -> EPSILON | -o <disj> +<conj> -> <literal> <conj'> +<conj'> -> EPSILON | -a <conj> +<literal> -> <atom> | ! <atom> +<atom> -> string <atom'> | unop string | ( <disj> ) +<atom'> -> EPSILON | binop string + +annulating non-terminals: { <disj'>, <conj'>, <atom'> } + +nonterminal | Fi_1 +------------+-------------------- +<S> | string, unop, (, ! +<disj> | string, unop, (, ! +<disj'> | -o +<conj> | string, unop, (, ! +<conj'> | -a +<literal> | string, unop, (, ! +<atom> | string, unop, ( +<atom'> | binop + +right side | FIRST_1 +-------------------+--------------------- +<disj> EOF | string, unop, (, ! +<conj> <disj'> | string, unop, (, ! +-o <disj> | -o +<literal> <conj'> | string, unop, (, ! +-a <conj> | -a +<atom> | string, unop, ( +! <atom> | ! +string <atom'> | string +unop string | unop +( <disj> ) | ( +binop string | binop + +nonterminal | FOLLOW_1 +------------+-------------------- +<S> | \emptyset +<disj> | EOF, ) +<disj'> | EOF, ) +<conj> | -o, EOF, ) +<conj'> | -o, EOF, ) +<literal> | -a, -o, EOF, ) +<atom> | -a, -o, EOF, ) +<atom'> | -a, -o, EOF, ) + +Hence we have the following requirements for being LL(1): + +nonterminal | must be mutually disjoint +------------+-------------------------- +<S> | --- +<disj> | --- +<disj'> | EOF, ), -o +<conj> | --- +<conj'> | -o, EOF, ), -a +<literal> | string, unop, (, ! +<atom> | string, unop, ( +<atom'> | -a, -o, EOF, ), binop + +*) diff --git a/src/utilities/testParser.mli b/src/utilities/testParser.mli new file mode 100644 index 0000000..bee7386 --- /dev/null +++ b/src/utilities/testParser.mli @@ -0,0 +1,37 @@ +(***************************************************************************) +(* Morsmall *) +(* A concise AST for POSIX shell *) +(* *) +(* Copyright (C) 2017,2018,2019 Yann Régis-Gianas, Ralf Treinen, *) +(* Nicolas Jeannerod *) +(* *) +(* This program is free software: you can redistribute it and/or modify *) +(* it under the terms of the GNU General Public License as published by *) +(* the Free Software Foundation, either version 3 of the License, or *) +(* (at your option) any later version. *) +(* *) +(* This program is distributed in the hope that it will be useful, *) +(* but WITHOUT ANY WARRANTY; without even the implied warranty of *) +(* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *) +(* GNU General Public License for more details. *) +(* *) +(* You should have received a copy of the GNU General Public License *) +(* along with this program. If not, see <http://www.gnu.org/licenses/>. *) +(***************************************************************************) + +(** abstract syntax of test expressions *) + +type expression = + | And of expression * expression + | Or of expression * expression + | Not of expression + | Binary of string * string * string (* (op,arg_left,arg_right) *) + | Unary of string * string (* (op,arg) *) + | Single of string (* arg *) + +exception Parse_error + +(** [parse ~bracket wl] parses the list of words [wl] as a test + expression (or [None] if [wl] is empty). If [bracket] is [true] + then the last word of [wl] must be a right bracket. *) +val parse: ?bracket:bool -> string list -> expression option diff --git a/tests/generator.ml b/tests/generator.ml index 178a065..dc3d5fa 100644 --- a/tests/generator.ml +++ b/tests/generator.ml @@ -105,8 +105,8 @@ and g_pattern' p = dummy_locate g_pattern p and g_assignment p = - { variable = choose [|1,"x";2,"y";3,"z";4,"choucroute"|] ; - word = g_word (d p) } + (choose [|1,"x";2,"y";3,"z";4,"choucroute"|], + g_word (d p)) and g_assignment' p = dummy_locate g_assignment p @@ -127,7 +127,7 @@ and g_redirection_kind _p = and g_program p = g_list ~prob:0.5 ~limit:3 (fun () -> g_command' (d p)) - + and g_command p = if p.depth <= 0 then g_simple_command (d p) |