summaryrefslogtreecommitdiff
path: root/src/AST.ml
diff options
context:
space:
mode:
Diffstat (limited to 'src/AST.ml')
-rw-r--r--src/AST.ml248
1 files changed, 248 insertions, 0 deletions
diff --git a/src/AST.ml b/src/AST.ml
new file mode 100644
index 0000000..44f9e3b
--- /dev/null
+++ b/src/AST.ml
@@ -0,0 +1,248 @@
+(***************************************************************************)
+(* Morsmall *)
+(* A concise AST for POSIX shell *)
+(* *)
+(* Copyright (C) 2017,2018,2019 Yann Régis-Gianas, Ralf Treinen, *)
+(* Nicolas Jeannerod *)
+(* *)
+(* This program is free software: you can redistribute it and/or modify *)
+(* it under the terms of the GNU General Public License as published by *)
+(* the Free Software Foundation, either version 3 of the License, or *)
+(* (at your option) any later version. *)
+(* *)
+(* This program is distributed in the hope that it will be useful, *)
+(* but WITHOUT ANY WARRANTY; without even the implied warranty of *)
+(* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *)
+(* GNU General Public License for more details. *)
+(* *)
+(* You should have received a copy of the GNU General Public License *)
+(* along with this program. If not, see <http://www.gnu.org/licenses/>. *)
+(***************************************************************************)
+
+(** Names in Shell are just strings with a few additional
+ conditions. *)
+
+type name = string
+
+(** The type {!word} is a description of words in Shell. {e See POSIX,
+ 2 Shell & Utilities, 2.3 Token Recognition} *)
+
+and character_range = char list
+
+and attribute =
+ | NoAttribute
+ | ParameterLength of word
+ | UseDefaultValues of word
+ | AssignDefaultValues of word
+ | IndicateErrorifNullorUnset of word
+ | UseAlternativeValue of word
+ | RemoveSmallestSuffixPattern of word
+ | RemoveLargestSuffixPattern of word
+ | RemoveSmallestPrefixPattern of word
+ | RemoveLargestPrefixPattern of word
+
+and word_component =
+ | WTildePrefix of string
+ | WLiteral of string
+ | WDoubleQuoted of word
+ | WVariable of name * attribute
+ | WSubshell of program
+ | WGlobAll
+ | WGlobAny
+ | WBracketExpression of (Morbig.CST.bracket_expression [@equal (=)] [@opaque])
+
+and word = word_component list
+and word' = word Location.located
+
+(** For now, a {!pattern} is just a {!word}. *)
+
+and pattern = word list
+and pattern' = pattern Location.located
+
+(** An assignment is just a pair of a {!name} and a {!word}. *)
+
+and assignment = name * word
+and assignment' = assignment Location.located
+
+(** A file descriptor {!descr} is an integer. *)
+
+and descr = int
+
+(** The following description does contain all the semantic subtleties
+ of POSIX Shell. Such a description can be found in the document
+ {{:http://pubs.opengroup.org/onlinepubs/9699919799.2016edition/}IEEE
+ Std 1003.1™-2008, 2016 Edition}. In the following, we will refer to
+ it simple as POSIX.
+
+ The type {!command} contains the definition of a Shell command in
+ Morsmall. Compared to what can be found in the POSIX standard or in
+ {{:https://github.com/colis-anr/morbig}Morbig}, this type is rather
+ small. This is because a lot of syntactically distinct scripts that
+ are semantically equivalent are identified in here. For instance,
+ all the following scripts are equivalent and identified as such:
+ {[
+ if t1; then c1; elif t2; then c2; fi
+ if { t1; }; then c1; elif { { t2; }; }; fi
+ if t1; then c1; else if t2; then c2; fi; fi
+ ]}
+
+ {2 Simple Command}
+
+ {e See POSIX, 2 Shell & Utilities, 2.9.1 Simple Command}
+
+ {2 Lists and Pipelines}
+
+ {e See POSIX, 2 Shell & Utilities, 2.9.2 Pipelines and 2.9.3 Lists}
+
+ - {b Asynchronous Lists.} When encountering [Async c], the Shell
+ shall execute [c] asynchronously in a subshell. This means that the
+ shell shall not wait for the command to finish before executing the
+ next command.
+
+ - {b Sequential Lists.} {i A contrario}, the commands [c1] and [c2]
+ in [Seq (c1, c2)] shall be executed sequentially.
+
+ - {b AND Lists.} In [And (c1, c2)], [c1] shall be executed
+ first. If its exit status is zero, [c2] shall be executed. The
+ commands are expanded only if they are executed.
+
+ - {b OR Lists.} In [Or (c1, c2)], [c1] shall be executed first. If
+ its exit status is non-zero, [c2] shall be executed.
+
+ - {b Pipeline.} In [Pipe (c1, c2)], the standard output of [c1]
+ shall be connected to the standard input of [c2]. The standard
+ input, standard output, or both of a command shall be considered to
+ be assigned by the pipeline before any redirection specified by
+ redirection operators that are part of the command.
+
+ - {b Negation.} The command [Not c] has the same behaviour as [c],
+ except for the exit status that shall be the logical NOT of the
+ exit status of [c].
+
+ {2 Compound Commands}
+
+ {e See POSIX, 2 Shell & Utilities, 2.9.4 Compound Commands}
+
+ - {b The Subshell Environment.} [Subshell c] shall execute [c] a
+ subshell environment. Variable assignments and built-in commands
+ that affect the environment shall not remain in effect after the
+ list finishes.
+
+ - {b The for Loop.} [For (x, l, c)] shall execute a sequence of
+ commands [c] for each member in a list of items. It is to be noted
+ that [l] is non-mandatory and is thus an option. Besides, there is
+ an important semantic difference between [None] and [Some
+ \[\]]. The former appears in a for loop where the list of words
+ appear but is empty. In that case, the for loops through the empty
+ list. The latter appears in a for loop where the list of words has
+ been omitted. In that case, the for loops through the positional
+ parameters.
+
+ - {b The case Conditional Construct.} [Case (w, \[(\[p11;...\],c1);
+ ...\])] shall execute the compound-list corresponding to the first
+ one of several patterns that is matched by the string resulting
+ from the expansion of the given word [w]. In order from the
+ beginning to the end of the case statement, each pattern [p*] shall
+ be subjected to expansion, and the result of these expansions shall
+ be compared against the expansion of [w]. After the first match, no
+ more patterns shall be expanded, and the corresponding [c*] shall
+ be executed. The order of expansion and comparison of multiple
+ patterns that label the same statement is unspecified.
+
+ - {b The if Conditional Construct.} [If (c1, c2, c3)] shall execute
+ [c1] and use its exit status to determine whether to execute [c2]
+ or [c3]. In fact, [c3] is not mandatory and is thus an option.
+
+ - {b The while Loop.} [While (c1, c2)] shall continuously execute
+ [c2] as long as [c1] has a zero exit status.
+
+ - {b The until Loop.} [Until (c1, c2)] shall continuously execute
+ [c2] as long as [c1] has a non-zero exit status.
+
+ {2 Function Definition Command}
+
+ {e See POSIX, 2 Shell & Utilities, 2.9.5 Function Definition
+ Command}
+
+ A function is a user-defined name that is used as a simple command
+ to call a compound command with new positional parameters. A
+ function is defined with a {e function definition command},
+ [Function (name, body)].
+
+ This function definition command defines a function named [name:
+ string] and with body [body: command]. The [body] shall be executed
+ whenever [name] is specified as the name of a simple command.
+
+ {2 Redirection}
+
+ {e See POSIX, 2 Shell & Utilities, 2.7 Redirections}
+
+ *)
+
+(** {1 Type Definitions}
+
+ The type [command] describes a command in the AST. All the command
+ semantics are described at the top of this document. *)
+
+and program = command' list
+
+and command =
+ (* Simple Commands *)
+ | Simple of assignment' list * word' list
+
+ (* Lists *)
+ | Async of command
+ | Seq of command' * command'
+ | And of command' * command'
+ | Or of command' * command'
+
+ (* Pipelines *)
+ | Not of command'
+ | Pipe of command' * command'
+
+ (* Compound Command's *)
+ | Subshell of command'
+ | For of name * word list option * command'
+ | Case of word * case_item' list
+ | If of command' * command' * command' option
+ | While of command' * command'
+ | Until of command' * command'
+
+ (* Function Definition Command' *)
+ | Function of name * command'
+
+ (* Redirection *)
+ | Redirection of command' * descr * kind * word
+ | HereDocument of command' * descr * word'
+
+and command' = command Location.located
+
+and case_item = pattern' * command' option
+
+and case_item' = case_item Location.located
+
+and kind =
+ | Output (* > *)
+ | OutputDuplicate (* >& *)
+ | OutputAppend (* >> *)
+ | OutputClobber (* >| *)
+ | Input (* < *)
+ | InputDuplicate (* <& *)
+ | InputOutput (* <> *)
+
+[@@deriving
+ eq,
+ show {with_path=false},
+ yojson {exn=true},
+ visitors {variety = "iter"; ancestors=["Location.located_iter"]; nude=true},
+ visitors {variety = "map"; ancestors=["Location.located_map"]; nude=true},
+ visitors {variety = "reduce"; ancestors=["Location.located_reduce"]; nude=true},
+ visitors {variety = "mapreduce"; ancestors=["Location.located_mapreduce"]; nude=true},
+ visitors {variety = "iter2"; ancestors=["Location.located_iter2"]; nude=true},
+ visitors {variety = "map2"; ancestors=["Location.located_map2"]; nude=true},
+ visitors {variety = "reduce2"; ancestors=["Location.located_reduce2"]; nude=true}
+]
+
+let default_redirection_descriptor = function
+ | Output | OutputDuplicate | OutputAppend | OutputClobber -> 1
+ | Input | InputDuplicate | InputOutput -> 0