Imported Upstream version 2.5.31

author: Manoj Srivastava <srivasta@golden-gryphon.com> 2003-12-03 22:33:17 -0800
committer: Manoj Srivastava <srivasta@golden-gryphon.com> 2003-12-03 22:33:17 -0800
commit: c2b22e08bd48278f2cf125f054c9f6286e345ff0 (patch)
tree: 3c0ab722c83ef33913ad293af7d56ce2c4e1fcc9 /parse.y
parent: edc848712307fe5c881364e12e520e9fe58d9969 (diff)
1 files changed, 212 insertions, 76 deletions
diff --git a/parse.y b/parse.y
index c6d7eea..4cde223 100644
--- a/parse.y
+++ b/parse.y
@@ -1,40 +1,62 @@
 /* parse.y - parser for flex input */
 
 %token CHAR NUMBER SECTEND SCDECL XSCDECL NAME PREVCCL EOF_OP
-%token OPTION_OP OPT_OUTFILE OPT_PREFIX OPT_YYCLASS
+%token OPTION_OP OPT_OUTFILE OPT_PREFIX OPT_YYCLASS OPT_HEADER
+%token OPT_TABLES
 
 %token CCE_ALNUM CCE_ALPHA CCE_BLANK CCE_CNTRL CCE_DIGIT CCE_GRAPH
 %token CCE_LOWER CCE_PRINT CCE_PUNCT CCE_SPACE CCE_UPPER CCE_XDIGIT
 
-%{
-/*-
- * Copyright (c) 1990 The Regents of the University of California.
- * All rights reserved.
- *
- * This code is derived from software contributed to Berkeley by
- * Vern Paxson.
- * 
- * The United States Government has rights in this work pursuant
- * to contract no. DE-AC03-76SF00098 between the United States
- * Department of Energy and the University of California.
+/*
+ *POSIX and AT&T lex place the
+ * precedence of the repeat operator, {}, below that of concatenation.
+ * Thus, ab{3} is ababab.  Most other POSIX utilities use an Extended
+ * Regular Expression (ERE) precedence that has the repeat operator
+ * higher than concatenation.  This causes ab{3} to yield abbb.
  *
- * Redistribution and use in source and binary forms with or without
- * modification are permitted provided that: (1) source distributions retain
- * this entire copyright notice and comment, and (2) distributions including
- * binaries display the following acknowledgement:  ``This product includes
- * software developed by the University of California, Berkeley and its
- * contributors'' in the documentation or other materials provided with the
- * distribution and in all advertising materials mentioning features or use
- * of this software.  Neither the name of the University nor the names of
- * its contributors may be used to endorse or promote products derived from
- * this software without specific prior written permission.
- * THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR IMPLIED
- * WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED WARRANTIES OF
- * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE.
+ * In order to support the POSIX and AT&T precedence and the flex
+ * precedence we define two token sets for the begin and end tokens of
+ * the repeat operator, '{' and '}'.  The lexical scanner chooses
+ * which tokens to return based on whether posix_compat or lex_compat
+ * are specified. Specifying either posix_compat or lex_compat will
+ * cause flex to parse scanner files as per the AT&T and
+ * POSIX-mandated behavior.
  */
 
-/* $Header: /home/daffy/u0/vern/flex/RCS/parse.y,v 2.28 95/04/21 11:51:51 vern Exp $ */
+%token BEGIN_REPEAT_POSIX END_REPEAT_POSIX BEGIN_REPEAT_FLEX END_REPEAT_FLEX
+
+
+%{
+/*  Copyright (c) 1990 The Regents of the University of California. */
+/*  All rights reserved. */
+
+/*  This code is derived from software contributed to Berkeley by */
+/*  Vern Paxson. */
+
+/*  The United States Government has rights in this work pursuant */
+/*  to contract no. DE-AC03-76SF00098 between the United States */
+/*  Department of Energy and the University of California. */
+
+/*  This file is part of flex. */
+
+/*  Redistribution and use in source and binary forms, with or without */
+/*  modification, are permitted provided that the following conditions */
+/*  are met: */
+
+/*  1. Redistributions of source code must retain the above copyright */
+/*     notice, this list of conditions and the following disclaimer. */
+/*  2. Redistributions in binary form must reproduce the above copyright */
+/*     notice, this list of conditions and the following disclaimer in the */
+/*     documentation and/or other materials provided with the distribution. */
 
+/*  Neither the name of the University nor the names of its contributors */
+/*  may be used to endorse or promote products derived from this software */
+/*  without specific prior written permission. */
+
+/*  THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR */
+/*  IMPLIED WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED */
+/*  WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR */
+/*  PURPOSE. */
 
 /* Some versions of bison are broken in that they use alloca() but don't
  * declare it properly.  The following is the patented (just kidding!)
@@ -48,6 +70,7 @@
 #endif
 
 #include "flexdef.h"
+#include "tables.h"
 
 /* The remainder of the alloca() cruft has to come after including flexdef.h,
  * so HAVE_ALLOCA_H is (possibly) defined.
@@ -86,14 +109,21 @@ int scon_stk_ptr;
 static int madeany = false;  /* whether we've made the '.' character class */
 int previous_continued_action;	/* whether the previous rule's action was '|' */
 
+#define format_warn3(fmt, a1, a2) \
+	do{ \
+        char fw3_msg[MAXLINE];\
+        snprintf( fw3_msg, MAXLINE,(fmt), (a1), (a2) );\
+        warn( fw3_msg );\
+	}while(0)
+
 /* Expand a POSIX character class expression. */
 #define CCL_EXPR(func) \
-	{ \
+	do{ \
 	int c; \
 	for ( c = 0; c < csize; ++c ) \
 		if ( isascii(c) && func(c) ) \
 			ccladd( currccl, c ); \
-	}
+	}while(0)
 
 /* While POSIX defines isblank(), it's not ANSI C. */
 #define IS_BLANK(c) ((c) == ' ' || (c) == '\t')
@@ -123,7 +153,7 @@ goal		:  initlex sect1 sect1end sect2 initforrule
 			 */
 			default_rule = num_rules;
 
-			finish_rule( def_rule, false, 0, 0 );
+			finish_rule( def_rule, false, 0, 0, 0);
 
 			for ( i = 1; i <= lastsc; ++i )
 				scset[i] = mkbranch( scset[i], def_rule );
@@ -150,7 +180,7 @@ sect1		:  sect1 startconddecl namelist1
 		|  sect1 options
 		|
 		|  error
-			{ synerr( "unknown error processing section 1" ); }
+			{ synerr( _("unknown error processing section 1") ); }
 		;
 
 sect1end	:  SECTEND
@@ -175,7 +205,7 @@ namelist1	:  namelist1 NAME
 			{ scinstal( nmstr, xcluflg ); }
 
 		|  error
-			{ synerr( "bad start condition list" ); }
+			{ synerr( _("bad start condition list") ); }
 		;
 
 options		:  OPTION_OP optionlist
@@ -194,6 +224,10 @@ option		:  OPT_OUTFILE '=' NAME
 			{ prefix = copy_string( nmstr ); }
 		|  OPT_YYCLASS '=' NAME
 			{ yyclass = copy_string( nmstr ); }
+		|  OPT_HEADER '=' NAME
+			{ headerfilename = copy_string( nmstr ); }
+	    |  OPT_TABLES '=' NAME
+            { tablesext = true; tablesfilename = copy_string( nmstr ); }
 		;
 
 sect2		:  sect2 scon initforrule flexrule '\n'
@@ -220,7 +254,7 @@ flexrule	:  '^' rule
 			{
 			pat = $2;
 			finish_rule( pat, variable_trail_rule,
-				headcnt, trailcnt );
+				headcnt, trailcnt , previous_continued_action);
 
 			if ( scon_stk_ptr > 0 )
 				{
@@ -256,7 +290,7 @@ flexrule	:  '^' rule
 			{
 			pat = $1;
 			finish_rule( pat, variable_trail_rule,
-				headcnt, trailcnt );
+				headcnt, trailcnt , previous_continued_action);
 
 			if ( scon_stk_ptr > 0 )
 				{
@@ -300,7 +334,7 @@ flexrule	:  '^' rule
 			}
 
 		|  error
-			{ synerr( "unrecognized rule" ); }
+			{ synerr( _("unrecognized rule") ); }
 		;
 
 scon_stk_ptr	:
@@ -336,7 +370,7 @@ namelist2	:  namelist2 ',' sconname
 		|  sconname
 
 		|  error
-			{ synerr( "bad start condition list" ); }
+			{ synerr( _("bad start condition list") ); }
 		;
 
 sconname	:  NAME
@@ -392,6 +426,7 @@ rule		:  re2 re
 				/* Mark as variable. */
 				varlength = true;
 				headcnt = 0;
+
 				}
 
 			if ( lex_compat || (varlength && headcnt == 0) )
@@ -418,7 +453,7 @@ rule		:  re2 re
 			}
 
 		|  re2 re '$'
-			{ synerr( "trailing context used twice" ); }
+			{ synerr( _("trailing context used twice") ); }
 
 		|  re '$'
 			{
@@ -431,7 +466,7 @@ rule		:  re2 re
 
 			if ( trlcontxt )
 				{
-				synerr( "trailing context used twice" );
+				synerr( _("trailing context used twice") );
 				$$ = mkstate( SYM_EPSILON );
 				}
 
@@ -500,7 +535,7 @@ re2		:  re '/'
 			 */
 
 			if ( trlcontxt )
-				synerr( "trailing context used twice" );
+				synerr( _("trailing context used twice") );
 			else
 				trlcontxt = true;
 
@@ -529,6 +564,69 @@ series		:  series singleton
 
 		|  singleton
 			{ $$ = $1; }
+
+		|  series BEGIN_REPEAT_POSIX NUMBER ',' NUMBER END_REPEAT_POSIX
+			{
+			varlength = true;
+
+			if ( $3 > $5 || $3 < 0 )
+				{
+				synerr( _("bad iteration values") );
+				$$ = $1;
+				}
+			else
+				{
+				if ( $3 == 0 )
+					{
+					if ( $5 <= 0 )
+						{
+						synerr(
+						_("bad iteration values") );
+						$$ = $1;
+						}
+					else
+						$$ = mkopt(
+							mkrep( $1, 1, $5 ) );
+					}
+				else
+					$$ = mkrep( $1, $3, $5 );
+				}
+			}
+
+		|  series BEGIN_REPEAT_POSIX NUMBER ',' END_REPEAT_POSIX
+			{
+			varlength = true;
+
+			if ( $3 <= 0 )
+				{
+				synerr( _("iteration value must be positive") );
+				$$ = $1;
+				}
+
+			else
+				$$ = mkrep( $1, $3, INFINITY );
+			}
+
+		|  series BEGIN_REPEAT_POSIX NUMBER END_REPEAT_POSIX
+			{
+			/* The series could be something like "(foo)",
+			 * in which case we have no idea what its length
+			 * is, so we punt here.
+			 */
+			varlength = true;
+
+			if ( $3 <= 0 )
+				{
+				  synerr( _("iteration value must be positive")
+					  );
+				$$ = $1;
+				}
+
+			else
+				$$ = link_machines( $1,
+						copysingl( $1, $3 - 1 ) );
+			}
+
 		;
 
 singleton	:  singleton '*'
@@ -550,13 +648,13 @@ singleton	:  singleton '*'
 			$$ = mkopt( $1 );
 			}
 
-		|  singleton '{' NUMBER ',' NUMBER '}'
+		|  singleton BEGIN_REPEAT_FLEX NUMBER ',' NUMBER END_REPEAT_FLEX
 			{
 			varlength = true;
 
 			if ( $3 > $5 || $3 < 0 )
 				{
-				synerr( "bad iteration values" );
+				synerr( _("bad iteration values") );
 				$$ = $1;
 				}
 			else
@@ -566,7 +664,7 @@ singleton	:  singleton '*'
 					if ( $5 <= 0 )
 						{
 						synerr(
-						"bad iteration values" );
+						_("bad iteration values") );
 						$$ = $1;
 						}
 					else
@@ -578,13 +676,13 @@ singleton	:  singleton '*'
 				}
 			}
 
-		|  singleton '{' NUMBER ',' '}'
+		|  singleton BEGIN_REPEAT_FLEX NUMBER ',' END_REPEAT_FLEX
 			{
 			varlength = true;
 
 			if ( $3 <= 0 )
 				{
-				synerr( "iteration value must be positive" );
+				synerr( _("iteration value must be positive") );
 				$$ = $1;
 				}
 
@@ -592,7 +690,7 @@ singleton	:  singleton '*'
 				$$ = mkrep( $1, $3, INFINITY );
 			}
 
-		|  singleton '{' NUMBER '}'
+		|  singleton BEGIN_REPEAT_FLEX NUMBER END_REPEAT_FLEX
 			{
 			/* The singleton could be something like "(foo)",
 			 * in which case we have no idea what its length
@@ -602,7 +700,7 @@ singleton	:  singleton '*'
 
 			if ( $3 <= 0 )
 				{
-				synerr( "iteration value must be positive" );
+				synerr( _("iteration value must be positive") );
 				$$ = $1;
 				}
 
@@ -648,6 +746,9 @@ singleton	:  singleton '*'
 
 			++rulelen;
 
+			if (ccl_has_nl[$1])
+				rule_has_nl[num_rules] = true;
+
 			$$ = mkstate( -$1 );
 			}
 
@@ -655,6 +756,9 @@ singleton	:  singleton '*'
 			{
 			++rulelen;
 
+			if (ccl_has_nl[$1])
+				rule_has_nl[num_rules] = true;
+
 			$$ = mkstate( -$1 );
 			}
 
@@ -671,6 +775,9 @@ singleton	:  singleton '*'
 			if ( caseins && $1 >= 'A' && $1 <= 'Z' )
 				$1 = clower( $1 );
 
+			if ($1 == nlch)
+				rule_has_nl[num_rules] = true;
+
 			$$ = mkstate( $1 );
 			}
 		;
@@ -687,16 +794,42 @@ fullccl		:  '[' ccl ']'
 
 ccl		:  ccl CHAR '-' CHAR
 			{
-			if ( caseins )
-				{
-				if ( $2 >= 'A' && $2 <= 'Z' )
-					$2 = clower( $2 );
-				if ( $4 >= 'A' && $4 <= 'Z' )
-					$4 = clower( $4 );
-				}
+
+			if (caseins)
+			  {
+			    /* Squish the character range to lowercase only if BOTH
+			     * ends of the range are uppercase.
+			     */
+			    if (isupper ($2) && isupper ($4))
+			      {
+				$2 = tolower ($2);
+				$4 = tolower ($4);
+			      }
+
+			    /* If one end of the range has case and the other
+			     * does not, or the cases are different, then we're not
+			     * sure what range the user is trying to express.
+			     * Examples: [@-z] or [S-t]
+			     */
+			    else if (has_case ($2) != has_case ($4)
+				     || (has_case ($2) && (b_islower ($2) != b_islower ($4))))
+			      format_warn3 (
+			      _("the character range [%c-%c] is ambiguous in a case-insensitive scanner"),
+					    $2, $4);
+
+			    /* If the range spans uppercase characters but not
+			     * lowercase (or vice-versa), then should we automatically
+			     * include lowercase characters in the range?
+			     * Example: [@-_] spans [a-z] but not [A-Z]
+			     */
+			    else if (!has_case ($2) && !has_case ($4) && !range_covers_case ($2, $4))
+			      format_warn3 (
+			      _("the character range [%c-%c] is ambiguous in a case-insensitive scanner"),
+					    $2, $4);
+			  }
 
 			if ( $2 > $4 )
-				synerr( "negative range in character class" );
+				synerr( _("negative range in character class") );
 
 			else
 				{
@@ -739,23 +872,23 @@ ccl		:  ccl CHAR '-' CHAR
 			}
 		;
 
-ccl_expr:	   CCE_ALNUM	{ CCL_EXPR(isalnum) }
-		|  CCE_ALPHA	{ CCL_EXPR(isalpha) }
-		|  CCE_BLANK	{ CCL_EXPR(IS_BLANK) }
-		|  CCE_CNTRL	{ CCL_EXPR(iscntrl) }
-		|  CCE_DIGIT	{ CCL_EXPR(isdigit) }
-		|  CCE_GRAPH	{ CCL_EXPR(isgraph) }
-		|  CCE_LOWER	{ CCL_EXPR(islower) }
-		|  CCE_PRINT	{ CCL_EXPR(isprint) }
-		|  CCE_PUNCT	{ CCL_EXPR(ispunct) }
-		|  CCE_SPACE	{ CCL_EXPR(isspace) }
+ccl_expr:	   CCE_ALNUM	{ CCL_EXPR(isalnum); }
+		|  CCE_ALPHA	{ CCL_EXPR(isalpha); }
+		|  CCE_BLANK	{ CCL_EXPR(IS_BLANK); }
+		|  CCE_CNTRL	{ CCL_EXPR(iscntrl); }
+		|  CCE_DIGIT	{ CCL_EXPR(isdigit); }
+		|  CCE_GRAPH	{ CCL_EXPR(isgraph); }
+		|  CCE_LOWER	{ CCL_EXPR(islower); }
+		|  CCE_PRINT	{ CCL_EXPR(isprint); }
+		|  CCE_PUNCT	{ CCL_EXPR(ispunct); }
+		|  CCE_SPACE	{ CCL_EXPR(isspace); }
 		|  CCE_UPPER	{
 				if ( caseins )
-					CCL_EXPR(islower)
+					CCL_EXPR(islower);
 				else
-					CCL_EXPR(isupper)
+					CCL_EXPR(isupper);
 				}
-		|  CCE_XDIGIT	{ CCL_EXPR(isxdigit) }
+		|  CCE_XDIGIT	{ CCL_EXPR(isxdigit); }
 		;
 		
 string		:  string CHAR
@@ -763,6 +896,9 @@ string		:  string CHAR
 			if ( caseins && $2 >= 'A' && $2 <= 'Z' )
 				$2 = clower( $2 );
 
+			if ( $2 == nlch )
+				rule_has_nl[num_rules] = true;
+
 			++rulelen;
 
 			$$ = link_machines( $1, mkstate( $2 ) );
@@ -815,7 +951,7 @@ void build_eof_action()
 /* format_synerr - write out formatted syntax error */
 
 void format_synerr( msg, arg )
-char msg[], arg[];
+const char *msg, arg[];
 	{
 	char errmsg[MAXLINE];
 
@@ -827,7 +963,7 @@ char msg[], arg[];
 /* synerr - report a syntax error */
 
 void synerr( str )
-char str[];
+const char *str;
 	{
 	syntaxerror = true;
 	pinpoint_message( str );
@@ -837,7 +973,7 @@ char str[];
 /* format_warn - write out formatted warning */
 
 void format_warn( msg, arg )
-char msg[], arg[];
+const char *msg, arg[];
 	{
 	char warn_msg[MAXLINE];
 
@@ -849,7 +985,7 @@ char msg[], arg[];
 /* warn - report a warning, unless -w was given */
 
 void warn( str )
-char str[];
+const char *str;
 	{
 	line_warning( str, linenum );
 	}
@@ -859,7 +995,7 @@ char str[];
  */
 
 void format_pinpoint_message( msg, arg )
-char msg[], arg[];
+const char *msg, arg[];
 	{
 	char errmsg[MAXLINE];
 
@@ -871,7 +1007,7 @@ char msg[], arg[];
 /* pinpoint_message - write out a message, pinpointing its location */
 
 void pinpoint_message( str )
-char str[];
+const char *str;
 	{
 	line_pinpoint( str, linenum );
 	}
@@ -880,7 +1016,7 @@ char str[];
 /* line_warning - report a warning at a given line, unless -w was given */
 
 void line_warning( str, line )
-char str[];
+const char *str;
 int line;
 	{
 	char warning[MAXLINE];
@@ -896,10 +1032,10 @@ int line;
 /* line_pinpoint - write out a message, pinpointing it at the given line */
 
 void line_pinpoint( str, line )
-char str[];
+const char *str;
 int line;
 	{
-	fprintf( stderr, "\"%s\", line %d: %s\n", infilename, line, str );
+	fprintf( stderr, "%s:%d: %s\n", infilename, line, str );
 	}
 
 
@@ -908,6 +1044,6 @@ int line;
  */
 
 void yyerror( msg )
-char msg[];
+const char *msg;
 	{
 	}
author	Manoj Srivastava <srivasta@golden-gryphon.com>	2003-12-03 22:33:17 -0800
committer	Manoj Srivastava <srivasta@golden-gryphon.com>	2003-12-03 22:33:17 -0800
commit	c2b22e08bd48278f2cf125f054c9f6286e345ff0 (patch)
tree	3c0ab722c83ef33913ad293af7d56ce2c4e1fcc9 /parse.y
parent	edc848712307fe5c881364e12e520e9fe58d9969 (diff)