summaryrefslogtreecommitdiff
path: root/parse.y
diff options
context:
space:
mode:
authorManoj Srivastava <srivasta@golden-gryphon.com>2003-12-03 22:33:17 -0800
committerManoj Srivastava <srivasta@golden-gryphon.com>2003-12-03 22:33:17 -0800
commitc2b22e08bd48278f2cf125f054c9f6286e345ff0 (patch)
tree3c0ab722c83ef33913ad293af7d56ce2c4e1fcc9 /parse.y
parentedc848712307fe5c881364e12e520e9fe58d9969 (diff)
Imported Upstream version 2.5.31
Diffstat (limited to 'parse.y')
-rw-r--r--parse.y288
1 files changed, 212 insertions, 76 deletions
diff --git a/parse.y b/parse.y
index c6d7eea..4cde223 100644
--- a/parse.y
+++ b/parse.y
@@ -1,40 +1,62 @@
/* parse.y - parser for flex input */
%token CHAR NUMBER SECTEND SCDECL XSCDECL NAME PREVCCL EOF_OP
-%token OPTION_OP OPT_OUTFILE OPT_PREFIX OPT_YYCLASS
+%token OPTION_OP OPT_OUTFILE OPT_PREFIX OPT_YYCLASS OPT_HEADER
+%token OPT_TABLES
%token CCE_ALNUM CCE_ALPHA CCE_BLANK CCE_CNTRL CCE_DIGIT CCE_GRAPH
%token CCE_LOWER CCE_PRINT CCE_PUNCT CCE_SPACE CCE_UPPER CCE_XDIGIT
-%{
-/*-
- * Copyright (c) 1990 The Regents of the University of California.
- * All rights reserved.
- *
- * This code is derived from software contributed to Berkeley by
- * Vern Paxson.
- *
- * The United States Government has rights in this work pursuant
- * to contract no. DE-AC03-76SF00098 between the United States
- * Department of Energy and the University of California.
+/*
+ *POSIX and AT&T lex place the
+ * precedence of the repeat operator, {}, below that of concatenation.
+ * Thus, ab{3} is ababab. Most other POSIX utilities use an Extended
+ * Regular Expression (ERE) precedence that has the repeat operator
+ * higher than concatenation. This causes ab{3} to yield abbb.
*
- * Redistribution and use in source and binary forms with or without
- * modification are permitted provided that: (1) source distributions retain
- * this entire copyright notice and comment, and (2) distributions including
- * binaries display the following acknowledgement: ``This product includes
- * software developed by the University of California, Berkeley and its
- * contributors'' in the documentation or other materials provided with the
- * distribution and in all advertising materials mentioning features or use
- * of this software. Neither the name of the University nor the names of
- * its contributors may be used to endorse or promote products derived from
- * this software without specific prior written permission.
- * THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR IMPLIED
- * WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED WARRANTIES OF
- * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE.
+ * In order to support the POSIX and AT&T precedence and the flex
+ * precedence we define two token sets for the begin and end tokens of
+ * the repeat operator, '{' and '}'. The lexical scanner chooses
+ * which tokens to return based on whether posix_compat or lex_compat
+ * are specified. Specifying either posix_compat or lex_compat will
+ * cause flex to parse scanner files as per the AT&T and
+ * POSIX-mandated behavior.
*/
-/* $Header: /home/daffy/u0/vern/flex/RCS/parse.y,v 2.28 95/04/21 11:51:51 vern Exp $ */
+%token BEGIN_REPEAT_POSIX END_REPEAT_POSIX BEGIN_REPEAT_FLEX END_REPEAT_FLEX
+
+
+%{
+/* Copyright (c) 1990 The Regents of the University of California. */
+/* All rights reserved. */
+
+/* This code is derived from software contributed to Berkeley by */
+/* Vern Paxson. */
+
+/* The United States Government has rights in this work pursuant */
+/* to contract no. DE-AC03-76SF00098 between the United States */
+/* Department of Energy and the University of California. */
+
+/* This file is part of flex. */
+
+/* Redistribution and use in source and binary forms, with or without */
+/* modification, are permitted provided that the following conditions */
+/* are met: */
+
+/* 1. Redistributions of source code must retain the above copyright */
+/* notice, this list of conditions and the following disclaimer. */
+/* 2. Redistributions in binary form must reproduce the above copyright */
+/* notice, this list of conditions and the following disclaimer in the */
+/* documentation and/or other materials provided with the distribution. */
+/* Neither the name of the University nor the names of its contributors */
+/* may be used to endorse or promote products derived from this software */
+/* without specific prior written permission. */
+
+/* THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR */
+/* IMPLIED WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED */
+/* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR */
+/* PURPOSE. */
/* Some versions of bison are broken in that they use alloca() but don't
* declare it properly. The following is the patented (just kidding!)
@@ -48,6 +70,7 @@
#endif
#include "flexdef.h"
+#include "tables.h"
/* The remainder of the alloca() cruft has to come after including flexdef.h,
* so HAVE_ALLOCA_H is (possibly) defined.
@@ -86,14 +109,21 @@ int scon_stk_ptr;
static int madeany = false; /* whether we've made the '.' character class */
int previous_continued_action; /* whether the previous rule's action was '|' */
+#define format_warn3(fmt, a1, a2) \
+ do{ \
+ char fw3_msg[MAXLINE];\
+ snprintf( fw3_msg, MAXLINE,(fmt), (a1), (a2) );\
+ warn( fw3_msg );\
+ }while(0)
+
/* Expand a POSIX character class expression. */
#define CCL_EXPR(func) \
- { \
+ do{ \
int c; \
for ( c = 0; c < csize; ++c ) \
if ( isascii(c) && func(c) ) \
ccladd( currccl, c ); \
- }
+ }while(0)
/* While POSIX defines isblank(), it's not ANSI C. */
#define IS_BLANK(c) ((c) == ' ' || (c) == '\t')
@@ -123,7 +153,7 @@ goal : initlex sect1 sect1end sect2 initforrule
*/
default_rule = num_rules;
- finish_rule( def_rule, false, 0, 0 );
+ finish_rule( def_rule, false, 0, 0, 0);
for ( i = 1; i <= lastsc; ++i )
scset[i] = mkbranch( scset[i], def_rule );
@@ -150,7 +180,7 @@ sect1 : sect1 startconddecl namelist1
| sect1 options
|
| error
- { synerr( "unknown error processing section 1" ); }
+ { synerr( _("unknown error processing section 1") ); }
;
sect1end : SECTEND
@@ -175,7 +205,7 @@ namelist1 : namelist1 NAME
{ scinstal( nmstr, xcluflg ); }
| error
- { synerr( "bad start condition list" ); }
+ { synerr( _("bad start condition list") ); }
;
options : OPTION_OP optionlist
@@ -194,6 +224,10 @@ option : OPT_OUTFILE '=' NAME
{ prefix = copy_string( nmstr ); }
| OPT_YYCLASS '=' NAME
{ yyclass = copy_string( nmstr ); }
+ | OPT_HEADER '=' NAME
+ { headerfilename = copy_string( nmstr ); }
+ | OPT_TABLES '=' NAME
+ { tablesext = true; tablesfilename = copy_string( nmstr ); }
;
sect2 : sect2 scon initforrule flexrule '\n'
@@ -220,7 +254,7 @@ flexrule : '^' rule
{
pat = $2;
finish_rule( pat, variable_trail_rule,
- headcnt, trailcnt );
+ headcnt, trailcnt , previous_continued_action);
if ( scon_stk_ptr > 0 )
{
@@ -256,7 +290,7 @@ flexrule : '^' rule
{
pat = $1;
finish_rule( pat, variable_trail_rule,
- headcnt, trailcnt );
+ headcnt, trailcnt , previous_continued_action);
if ( scon_stk_ptr > 0 )
{
@@ -300,7 +334,7 @@ flexrule : '^' rule
}
| error
- { synerr( "unrecognized rule" ); }
+ { synerr( _("unrecognized rule") ); }
;
scon_stk_ptr :
@@ -336,7 +370,7 @@ namelist2 : namelist2 ',' sconname
| sconname
| error
- { synerr( "bad start condition list" ); }
+ { synerr( _("bad start condition list") ); }
;
sconname : NAME
@@ -392,6 +426,7 @@ rule : re2 re
/* Mark as variable. */
varlength = true;
headcnt = 0;
+
}
if ( lex_compat || (varlength && headcnt == 0) )
@@ -418,7 +453,7 @@ rule : re2 re
}
| re2 re '$'
- { synerr( "trailing context used twice" ); }
+ { synerr( _("trailing context used twice") ); }
| re '$'
{
@@ -431,7 +466,7 @@ rule : re2 re
if ( trlcontxt )
{
- synerr( "trailing context used twice" );
+ synerr( _("trailing context used twice") );
$$ = mkstate( SYM_EPSILON );
}
@@ -500,7 +535,7 @@ re2 : re '/'
*/
if ( trlcontxt )
- synerr( "trailing context used twice" );
+ synerr( _("trailing context used twice") );
else
trlcontxt = true;
@@ -529,6 +564,69 @@ series : series singleton
| singleton
{ $$ = $1; }
+
+ | series BEGIN_REPEAT_POSIX NUMBER ',' NUMBER END_REPEAT_POSIX
+ {
+ varlength = true;
+
+ if ( $3 > $5 || $3 < 0 )
+ {
+ synerr( _("bad iteration values") );
+ $$ = $1;
+ }
+ else
+ {
+ if ( $3 == 0 )
+ {
+ if ( $5 <= 0 )
+ {
+ synerr(
+ _("bad iteration values") );
+ $$ = $1;
+ }
+ else
+ $$ = mkopt(
+ mkrep( $1, 1, $5 ) );
+ }
+ else
+ $$ = mkrep( $1, $3, $5 );
+ }
+ }
+
+ | series BEGIN_REPEAT_POSIX NUMBER ',' END_REPEAT_POSIX
+ {
+ varlength = true;
+
+ if ( $3 <= 0 )
+ {
+ synerr( _("iteration value must be positive") );
+ $$ = $1;
+ }
+
+ else
+ $$ = mkrep( $1, $3, INFINITY );
+ }
+
+ | series BEGIN_REPEAT_POSIX NUMBER END_REPEAT_POSIX
+ {
+ /* The series could be something like "(foo)",
+ * in which case we have no idea what its length
+ * is, so we punt here.
+ */
+ varlength = true;
+
+ if ( $3 <= 0 )
+ {
+ synerr( _("iteration value must be positive")
+ );
+ $$ = $1;
+ }
+
+ else
+ $$ = link_machines( $1,
+ copysingl( $1, $3 - 1 ) );
+ }
+
;
singleton : singleton '*'
@@ -550,13 +648,13 @@ singleton : singleton '*'
$$ = mkopt( $1 );
}
- | singleton '{' NUMBER ',' NUMBER '}'
+ | singleton BEGIN_REPEAT_FLEX NUMBER ',' NUMBER END_REPEAT_FLEX
{
varlength = true;
if ( $3 > $5 || $3 < 0 )
{
- synerr( "bad iteration values" );
+ synerr( _("bad iteration values") );
$$ = $1;
}
else
@@ -566,7 +664,7 @@ singleton : singleton '*'
if ( $5 <= 0 )
{
synerr(
- "bad iteration values" );
+ _("bad iteration values") );
$$ = $1;
}
else
@@ -578,13 +676,13 @@ singleton : singleton '*'
}
}
- | singleton '{' NUMBER ',' '}'
+ | singleton BEGIN_REPEAT_FLEX NUMBER ',' END_REPEAT_FLEX
{
varlength = true;
if ( $3 <= 0 )
{
- synerr( "iteration value must be positive" );
+ synerr( _("iteration value must be positive") );
$$ = $1;
}
@@ -592,7 +690,7 @@ singleton : singleton '*'
$$ = mkrep( $1, $3, INFINITY );
}
- | singleton '{' NUMBER '}'
+ | singleton BEGIN_REPEAT_FLEX NUMBER END_REPEAT_FLEX
{
/* The singleton could be something like "(foo)",
* in which case we have no idea what its length
@@ -602,7 +700,7 @@ singleton : singleton '*'
if ( $3 <= 0 )
{
- synerr( "iteration value must be positive" );
+ synerr( _("iteration value must be positive") );
$$ = $1;
}
@@ -648,6 +746,9 @@ singleton : singleton '*'
++rulelen;
+ if (ccl_has_nl[$1])
+ rule_has_nl[num_rules] = true;
+
$$ = mkstate( -$1 );
}
@@ -655,6 +756,9 @@ singleton : singleton '*'
{
++rulelen;
+ if (ccl_has_nl[$1])
+ rule_has_nl[num_rules] = true;
+
$$ = mkstate( -$1 );
}
@@ -671,6 +775,9 @@ singleton : singleton '*'
if ( caseins && $1 >= 'A' && $1 <= 'Z' )
$1 = clower( $1 );
+ if ($1 == nlch)
+ rule_has_nl[num_rules] = true;
+
$$ = mkstate( $1 );
}
;
@@ -687,16 +794,42 @@ fullccl : '[' ccl ']'
ccl : ccl CHAR '-' CHAR
{
- if ( caseins )
- {
- if ( $2 >= 'A' && $2 <= 'Z' )
- $2 = clower( $2 );
- if ( $4 >= 'A' && $4 <= 'Z' )
- $4 = clower( $4 );
- }
+
+ if (caseins)
+ {
+ /* Squish the character range to lowercase only if BOTH
+ * ends of the range are uppercase.
+ */
+ if (isupper ($2) && isupper ($4))
+ {
+ $2 = tolower ($2);
+ $4 = tolower ($4);
+ }
+
+ /* If one end of the range has case and the other
+ * does not, or the cases are different, then we're not
+ * sure what range the user is trying to express.
+ * Examples: [@-z] or [S-t]
+ */
+ else if (has_case ($2) != has_case ($4)
+ || (has_case ($2) && (b_islower ($2) != b_islower ($4))))
+ format_warn3 (
+ _("the character range [%c-%c] is ambiguous in a case-insensitive scanner"),
+ $2, $4);
+
+ /* If the range spans uppercase characters but not
+ * lowercase (or vice-versa), then should we automatically
+ * include lowercase characters in the range?
+ * Example: [@-_] spans [a-z] but not [A-Z]
+ */
+ else if (!has_case ($2) && !has_case ($4) && !range_covers_case ($2, $4))
+ format_warn3 (
+ _("the character range [%c-%c] is ambiguous in a case-insensitive scanner"),
+ $2, $4);
+ }
if ( $2 > $4 )
- synerr( "negative range in character class" );
+ synerr( _("negative range in character class") );
else
{
@@ -739,23 +872,23 @@ ccl : ccl CHAR '-' CHAR
}
;
-ccl_expr: CCE_ALNUM { CCL_EXPR(isalnum) }
- | CCE_ALPHA { CCL_EXPR(isalpha) }
- | CCE_BLANK { CCL_EXPR(IS_BLANK) }
- | CCE_CNTRL { CCL_EXPR(iscntrl) }
- | CCE_DIGIT { CCL_EXPR(isdigit) }
- | CCE_GRAPH { CCL_EXPR(isgraph) }
- | CCE_LOWER { CCL_EXPR(islower) }
- | CCE_PRINT { CCL_EXPR(isprint) }
- | CCE_PUNCT { CCL_EXPR(ispunct) }
- | CCE_SPACE { CCL_EXPR(isspace) }
+ccl_expr: CCE_ALNUM { CCL_EXPR(isalnum); }
+ | CCE_ALPHA { CCL_EXPR(isalpha); }
+ | CCE_BLANK { CCL_EXPR(IS_BLANK); }
+ | CCE_CNTRL { CCL_EXPR(iscntrl); }
+ | CCE_DIGIT { CCL_EXPR(isdigit); }
+ | CCE_GRAPH { CCL_EXPR(isgraph); }
+ | CCE_LOWER { CCL_EXPR(islower); }
+ | CCE_PRINT { CCL_EXPR(isprint); }
+ | CCE_PUNCT { CCL_EXPR(ispunct); }
+ | CCE_SPACE { CCL_EXPR(isspace); }
| CCE_UPPER {
if ( caseins )
- CCL_EXPR(islower)
+ CCL_EXPR(islower);
else
- CCL_EXPR(isupper)
+ CCL_EXPR(isupper);
}
- | CCE_XDIGIT { CCL_EXPR(isxdigit) }
+ | CCE_XDIGIT { CCL_EXPR(isxdigit); }
;
string : string CHAR
@@ -763,6 +896,9 @@ string : string CHAR
if ( caseins && $2 >= 'A' && $2 <= 'Z' )
$2 = clower( $2 );
+ if ( $2 == nlch )
+ rule_has_nl[num_rules] = true;
+
++rulelen;
$$ = link_machines( $1, mkstate( $2 ) );
@@ -815,7 +951,7 @@ void build_eof_action()
/* format_synerr - write out formatted syntax error */
void format_synerr( msg, arg )
-char msg[], arg[];
+const char *msg, arg[];
{
char errmsg[MAXLINE];
@@ -827,7 +963,7 @@ char msg[], arg[];
/* synerr - report a syntax error */
void synerr( str )
-char str[];
+const char *str;
{
syntaxerror = true;
pinpoint_message( str );
@@ -837,7 +973,7 @@ char str[];
/* format_warn - write out formatted warning */
void format_warn( msg, arg )
-char msg[], arg[];
+const char *msg, arg[];
{
char warn_msg[MAXLINE];
@@ -849,7 +985,7 @@ char msg[], arg[];
/* warn - report a warning, unless -w was given */
void warn( str )
-char str[];
+const char *str;
{
line_warning( str, linenum );
}
@@ -859,7 +995,7 @@ char str[];
*/
void format_pinpoint_message( msg, arg )
-char msg[], arg[];
+const char *msg, arg[];
{
char errmsg[MAXLINE];
@@ -871,7 +1007,7 @@ char msg[], arg[];
/* pinpoint_message - write out a message, pinpointing its location */
void pinpoint_message( str )
-char str[];
+const char *str;
{
line_pinpoint( str, linenum );
}
@@ -880,7 +1016,7 @@ char str[];
/* line_warning - report a warning at a given line, unless -w was given */
void line_warning( str, line )
-char str[];
+const char *str;
int line;
{
char warning[MAXLINE];
@@ -896,10 +1032,10 @@ int line;
/* line_pinpoint - write out a message, pinpointing it at the given line */
void line_pinpoint( str, line )
-char str[];
+const char *str;
int line;
{
- fprintf( stderr, "\"%s\", line %d: %s\n", infilename, line, str );
+ fprintf( stderr, "%s:%d: %s\n", infilename, line, str );
}
@@ -908,6 +1044,6 @@ int line;
*/
void yyerror( msg )
-char msg[];
+const char *msg;
{
}