summaryrefslogtreecommitdiff
path: root/src/scan.l
diff options
context:
space:
mode:
authorDemi Obenour <demiobenour@gmail.com>2016-09-27 10:54:16 -0400
committerWill Estes <westes575@gmail.com>2016-10-19 20:39:06 -0400
commit4bffa41e4ed434f1e2ba64ac5a8fe661fa089cfb (patch)
tree378e886700ac96e8c316f354f10e1e9a622144ea /src/scan.l
parent7dcb10c048744192ab873ee10fb6a5d8b1640434 (diff)
Improved M4 quotation
This fixes M4 quotation of certain strings beginning with `yy` (in section 3 of the input file only) and character literals. The new quotation method is also less brittle and faster. Tests that relied on the old behavior were fixed. Also, `yyconst` is no longer defined; use `const` (which it unconditionally was defined to) instead.
Diffstat (limited to 'src/scan.l')
-rw-r--r--src/scan.l104
1 files changed, 54 insertions, 50 deletions
diff --git a/src/scan.l b/src/scan.l
index 9a2bd83..ecf238a 100644
--- a/src/scan.l
+++ b/src/scan.l
@@ -38,8 +38,8 @@ extern bool tablesverify, tablesext;
extern int trlcontxt; /* Set in parse.y for each rule. */
extern const char *escaped_qstart, *escaped_qend;
-#define M4QSTART "[["
-#define M4QEND "]]"
+#define M4QSTART "[""["
+#define M4QEND "]""]"
#define SECT3_ESCAPED_QSTART "[" M4QEND M4QSTART "[" M4QEND M4QSTART
#define SECT3_ESCAPED_QEND M4QEND "]" M4QSTART M4QEND "]" M4QSTART
@@ -51,8 +51,8 @@ extern const char *escaped_qstart, *escaped_qend;
action_define( def, 1 ); \
}
-#define ACTION_ECHO_QSTART add_action (escaped_qstart)
-#define ACTION_ECHO_QEND add_action (escaped_qend)
+#define ACTION_ECHO_QSTART add_action (SECT3_ESCAPED_QSTART)
+#define ACTION_ECHO_QEND add_action (SECT3_ESCAPED_QEND)
#define ACTION_M4_IFDEF(def, should_define) \
do{ \
@@ -101,6 +101,10 @@ extern const char *escaped_qstart, *escaped_qend;
if ( getenv("POSIXLY_CORRECT") ) \
posix_compat = true;
+#define START_CODEBLOCK do { add_action(M4QSTART); BEGIN(CODEBLOCK); } while(0)
+#define END_CODEBLOCK do { add_action(M4QEND); BEGIN(INITIAL); } while (0)
+#define CODEBLOCK_QSTART "[]""][""[""[]""][""["
+#define CODEBLOCK_QEND "]""]""][""[""]""]""][""["
%}
%option caseless nodefault noreject stack noyy_top_state
@@ -112,7 +116,7 @@ extern const char *escaped_qstart, *escaped_qend;
%x GROUP_WITH_PARAMS
%x GROUP_MINUS_PARAMS
%x EXTENDED_COMMENT
-%x COMMENT_DISCARD
+%x COMMENT_DISCARD CODE_COMMENT
%x SECT3_NOESCAPE
%x CHARACTER_CONSTANT
@@ -135,8 +139,8 @@ CCL_EXPR ("[:"^?[[:alpha:]]+":]")
LEXOPT [aceknopr]
-M4QSTART "[["
-M4QEND "]]"
+M4QSTART "[""["
+M4QEND "]""]"
%%
static int bracelevel, didadef, indented_code;
@@ -149,8 +153,8 @@ M4QEND "]]"
<INITIAL>{
- ^{WS} indented_code = true; BEGIN(CODEBLOCK);
- ^"/*" ACTION_ECHO; yy_push_state( COMMENT );
+ ^{WS} indented_code = true; START_CODEBLOCK;
+ ^"/*" add_action("/*[""["); yy_push_state( COMMENT );
^#{OPTWS}line{WS} yy_push_state( LINEDIR );
^"%s"{NAME}? return SCDECL;
^"%x"{NAME}? return XSCDECL;
@@ -158,7 +162,7 @@ M4QEND "]]"
++linenum;
line_directive_out(NULL, 1);
indented_code = false;
- BEGIN(CODEBLOCK);
+ START_CODEBLOCK;
}
^"%top"[[:blank:]]*"{"[[:blank:]]*{NL} {
brace_start_line = linenum;
@@ -213,12 +217,18 @@ M4QEND "]]"
}
-<COMMENT>{
- "*/" ACTION_ECHO; yy_pop_state();
- "*" ACTION_ECHO;
- [^*\n] ACTION_ECHO;
+<COMMENT,CODE_COMMENT>{ /* */
+ [^\[\]\*\n]* ACTION_ECHO;
+ . ACTION_ECHO;
+
{NL} ++linenum; ACTION_ECHO;
}
+<COMMENT>{
+ "*/" add_action("*/]""]"); yy_pop_state();
+}
+<CODE_COMMENT>{
+ "*/" ACTION_ECHO; yy_pop_state();
+}
<COMMENT_DISCARD>{
/* This is the same as COMMENT, but is discarded rather than output. */
@@ -245,21 +255,20 @@ M4QEND "]]"
}
. /* ignore spurious characters */
}
-<ACTION,CODEBLOCK,ACTION_STRING,PERCENT_BRACE_ACTION,COMMENT,CHARACTER_CONSTANT>{
- M4|YY|m4 add_action(M4QSTART); ACTION_ECHO; add_action(M4QEND);
+<ACTION,CODEBLOCK,ACTION_STRING,PERCENT_BRACE_ACTION,CHARACTER_CONSTANT,
+COMMENT,CODE_COMMENT>{
{M4QSTART} ACTION_ECHO_QSTART;
{M4QEND} ACTION_ECHO_QEND;
}
<CODEBLOCK>{
- ^"%}".*{NL} ++linenum; BEGIN(INITIAL);
- . ACTION_ECHO;
-
+ ^"%}".*{NL} ++linenum; END_CODEBLOCK;
+ [^\n%\[\]]* ACTION_ECHO;
+ . ACTION_ECHO;
{NL} {
++linenum;
ACTION_ECHO;
- if ( indented_code )
- BEGIN(INITIAL);
+ if ( indented_code ) END_CODEBLOCK;
}
}
@@ -284,13 +293,7 @@ M4QEND "]]"
{M4QSTART} buf_strnappend(&top_buf, escaped_qstart, (int) strlen(escaped_qstart));
{M4QEND} buf_strnappend(&top_buf, escaped_qend, (int) strlen(escaped_qend));
- m4|M4|YY {
- buf_strnappend(&top_buf, M4QSTART, 2);
- buf_strnappend(&top_buf, yytext, 2);
- buf_strnappend(&top_buf, M4QEND, 2);
- }
-
- ([^{}\r\nmMY\[\]]+)|[^{}\r\n] {
+ ([^{}\r\n\[\]]+)|[^{}\r\n] {
buf_strnappend(&top_buf, yytext, yyleng);
}
@@ -545,6 +548,7 @@ M4QEND "]]"
yyless(amt);
}
else {
+ add_action("]""]");
continued_action = true;
++linenum;
return '\n';
@@ -614,9 +618,8 @@ M4QEND "]]"
^"%%".* {
sectnum = 3;
- BEGIN(SECT3);
+ BEGIN(no_section3_escape ? SECT3_NOESCAPE : SECT3);
outn("/* Begin user sect3 */");
- //fwrite(M4QSTART, 1, 2, yyout);
yyterminate(); /* to stop the parser */
}
@@ -891,7 +894,7 @@ nmstr[yyleng - 2 - end_is_ws] = '\0'; /* chop trailing brace */
<PERCENT_BRACE_ACTION>{
{OPTWS}"%}".* bracelevel = 0;
- <ACTION>"/*" ACTION_ECHO; yy_push_state( COMMENT );
+ <ACTION>"/*" ACTION_ECHO; yy_push_state( CODE_COMMENT );
<CODEBLOCK,ACTION>{
"reject" {
@@ -912,7 +915,7 @@ nmstr[yyleng - 2 - end_is_ws] = '\0'; /* chop trailing brace */
(doing_codeblock && indented_code) )
{
if ( doing_rule_action )
- add_action( "\tYY_BREAK\n" );
+ add_action( "\tYY_BREAK]""]\n" );
doing_rule_action = doing_codeblock = false;
BEGIN(SECT2);
@@ -937,7 +940,7 @@ nmstr[yyleng - 2 - end_is_ws] = '\0'; /* chop trailing brace */
if ( bracelevel == 0 )
{
if ( doing_rule_action )
- add_action( "\tYY_BREAK\n" );
+ add_action( "\tYY_BREAK]""]\n" );
doing_rule_action = false;
BEGIN(SECT2);
@@ -960,7 +963,7 @@ nmstr[yyleng - 2 - end_is_ws] = '\0'; /* chop trailing brace */
. ACTION_ECHO;
}
-<COMMENT,COMMENT_DISCARD,ACTION,ACTION_STRING,CHARACTER_CONSTANT><<EOF>> {
+<COMMENT,CODE_COMMENT,COMMENT_DISCARD,ACTION,ACTION_STRING,CHARACTER_CONSTANT><<EOF>> {
synerr( _( "EOF encountered inside an action" ) );
yyterminate();
}
@@ -979,25 +982,26 @@ nmstr[yyleng - 2 - end_is_ws] = '\0'; /* chop trailing brace */
return CHAR;
}
-
<SECT3>{
- "M4"|"m4"|"YY" {
- if (no_section3_escape) {
- ECHO;
- } else
- fprintf (yyout, "[[%s]]", yytext);
+ {M4QSTART} fputs(escaped_qstart, yyout);
+ {M4QEND} fputs(escaped_qend, yyout);
+ [^\[\]\n]*(\n?) ECHO;
+ .|\n ECHO;
+ <<EOF>> {
+ sectnum = 0;
+ yyterminate();
+ }
+}
+<SECT3_NOESCAPE>{
+ {M4QSTART} fprintf(yyout, "[""[%s]""]", escaped_qstart);
+ {M4QEND} fprintf(yyout, "[""[%s]""]", escaped_qend);
+ [^\[\]\n]*(\n?) ECHO;
+ (.|\n) ECHO;
+ <<EOF>> {
+ sectnum = 0;
+ yyterminate();
}
- {M4QSTART} fwrite (escaped_qstart, 1, strlen(escaped_qstart) - 0, yyout);
- {M4QEND} fwrite (escaped_qend, 1, strlen(escaped_qend) - 0, yyout);
- [^\[\]MmY\n]*(\n?) ECHO;
- (.|\n) ECHO;
- <<EOF>> {
- //fwrite(M4QEND, 1, strlen(M4QEND), yyout);
- sectnum = 0;
- yyterminate();
- }
}
-
<*>.|\n format_synerr( _( "bad character: %s" ), yytext );
%%