diff options
author | Manoj Srivastava <srivasta@golden-gryphon.com> | 2003-12-03 22:33:17 -0800 |
---|---|---|
committer | Manoj Srivastava <srivasta@golden-gryphon.com> | 2003-12-03 22:33:17 -0800 |
commit | c2b22e08bd48278f2cf125f054c9f6286e345ff0 (patch) | |
tree | 3c0ab722c83ef33913ad293af7d56ce2c4e1fcc9 /parse.y | |
parent | edc848712307fe5c881364e12e520e9fe58d9969 (diff) |
Imported Upstream version 2.5.31
Diffstat (limited to 'parse.y')
-rw-r--r-- | parse.y | 288 |
1 files changed, 212 insertions, 76 deletions
@@ -1,40 +1,62 @@ /* parse.y - parser for flex input */ %token CHAR NUMBER SECTEND SCDECL XSCDECL NAME PREVCCL EOF_OP -%token OPTION_OP OPT_OUTFILE OPT_PREFIX OPT_YYCLASS +%token OPTION_OP OPT_OUTFILE OPT_PREFIX OPT_YYCLASS OPT_HEADER +%token OPT_TABLES %token CCE_ALNUM CCE_ALPHA CCE_BLANK CCE_CNTRL CCE_DIGIT CCE_GRAPH %token CCE_LOWER CCE_PRINT CCE_PUNCT CCE_SPACE CCE_UPPER CCE_XDIGIT -%{ -/*- - * Copyright (c) 1990 The Regents of the University of California. - * All rights reserved. - * - * This code is derived from software contributed to Berkeley by - * Vern Paxson. - * - * The United States Government has rights in this work pursuant - * to contract no. DE-AC03-76SF00098 between the United States - * Department of Energy and the University of California. +/* + *POSIX and AT&T lex place the + * precedence of the repeat operator, {}, below that of concatenation. + * Thus, ab{3} is ababab. Most other POSIX utilities use an Extended + * Regular Expression (ERE) precedence that has the repeat operator + * higher than concatenation. This causes ab{3} to yield abbb. * - * Redistribution and use in source and binary forms with or without - * modification are permitted provided that: (1) source distributions retain - * this entire copyright notice and comment, and (2) distributions including - * binaries display the following acknowledgement: ``This product includes - * software developed by the University of California, Berkeley and its - * contributors'' in the documentation or other materials provided with the - * distribution and in all advertising materials mentioning features or use - * of this software. Neither the name of the University nor the names of - * its contributors may be used to endorse or promote products derived from - * this software without specific prior written permission. - * THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR IMPLIED - * WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED WARRANTIES OF - * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. + * In order to support the POSIX and AT&T precedence and the flex + * precedence we define two token sets for the begin and end tokens of + * the repeat operator, '{' and '}'. The lexical scanner chooses + * which tokens to return based on whether posix_compat or lex_compat + * are specified. Specifying either posix_compat or lex_compat will + * cause flex to parse scanner files as per the AT&T and + * POSIX-mandated behavior. */ -/* $Header: /home/daffy/u0/vern/flex/RCS/parse.y,v 2.28 95/04/21 11:51:51 vern Exp $ */ +%token BEGIN_REPEAT_POSIX END_REPEAT_POSIX BEGIN_REPEAT_FLEX END_REPEAT_FLEX + + +%{ +/* Copyright (c) 1990 The Regents of the University of California. */ +/* All rights reserved. */ + +/* This code is derived from software contributed to Berkeley by */ +/* Vern Paxson. */ + +/* The United States Government has rights in this work pursuant */ +/* to contract no. DE-AC03-76SF00098 between the United States */ +/* Department of Energy and the University of California. */ + +/* This file is part of flex. */ + +/* Redistribution and use in source and binary forms, with or without */ +/* modification, are permitted provided that the following conditions */ +/* are met: */ + +/* 1. Redistributions of source code must retain the above copyright */ +/* notice, this list of conditions and the following disclaimer. */ +/* 2. Redistributions in binary form must reproduce the above copyright */ +/* notice, this list of conditions and the following disclaimer in the */ +/* documentation and/or other materials provided with the distribution. */ +/* Neither the name of the University nor the names of its contributors */ +/* may be used to endorse or promote products derived from this software */ +/* without specific prior written permission. */ + +/* THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR */ +/* IMPLIED WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED */ +/* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR */ +/* PURPOSE. */ /* Some versions of bison are broken in that they use alloca() but don't * declare it properly. The following is the patented (just kidding!) @@ -48,6 +70,7 @@ #endif #include "flexdef.h" +#include "tables.h" /* The remainder of the alloca() cruft has to come after including flexdef.h, * so HAVE_ALLOCA_H is (possibly) defined. @@ -86,14 +109,21 @@ int scon_stk_ptr; static int madeany = false; /* whether we've made the '.' character class */ int previous_continued_action; /* whether the previous rule's action was '|' */ +#define format_warn3(fmt, a1, a2) \ + do{ \ + char fw3_msg[MAXLINE];\ + snprintf( fw3_msg, MAXLINE,(fmt), (a1), (a2) );\ + warn( fw3_msg );\ + }while(0) + /* Expand a POSIX character class expression. */ #define CCL_EXPR(func) \ - { \ + do{ \ int c; \ for ( c = 0; c < csize; ++c ) \ if ( isascii(c) && func(c) ) \ ccladd( currccl, c ); \ - } + }while(0) /* While POSIX defines isblank(), it's not ANSI C. */ #define IS_BLANK(c) ((c) == ' ' || (c) == '\t') @@ -123,7 +153,7 @@ goal : initlex sect1 sect1end sect2 initforrule */ default_rule = num_rules; - finish_rule( def_rule, false, 0, 0 ); + finish_rule( def_rule, false, 0, 0, 0); for ( i = 1; i <= lastsc; ++i ) scset[i] = mkbranch( scset[i], def_rule ); @@ -150,7 +180,7 @@ sect1 : sect1 startconddecl namelist1 | sect1 options | | error - { synerr( "unknown error processing section 1" ); } + { synerr( _("unknown error processing section 1") ); } ; sect1end : SECTEND @@ -175,7 +205,7 @@ namelist1 : namelist1 NAME { scinstal( nmstr, xcluflg ); } | error - { synerr( "bad start condition list" ); } + { synerr( _("bad start condition list") ); } ; options : OPTION_OP optionlist @@ -194,6 +224,10 @@ option : OPT_OUTFILE '=' NAME { prefix = copy_string( nmstr ); } | OPT_YYCLASS '=' NAME { yyclass = copy_string( nmstr ); } + | OPT_HEADER '=' NAME + { headerfilename = copy_string( nmstr ); } + | OPT_TABLES '=' NAME + { tablesext = true; tablesfilename = copy_string( nmstr ); } ; sect2 : sect2 scon initforrule flexrule '\n' @@ -220,7 +254,7 @@ flexrule : '^' rule { pat = $2; finish_rule( pat, variable_trail_rule, - headcnt, trailcnt ); + headcnt, trailcnt , previous_continued_action); if ( scon_stk_ptr > 0 ) { @@ -256,7 +290,7 @@ flexrule : '^' rule { pat = $1; finish_rule( pat, variable_trail_rule, - headcnt, trailcnt ); + headcnt, trailcnt , previous_continued_action); if ( scon_stk_ptr > 0 ) { @@ -300,7 +334,7 @@ flexrule : '^' rule } | error - { synerr( "unrecognized rule" ); } + { synerr( _("unrecognized rule") ); } ; scon_stk_ptr : @@ -336,7 +370,7 @@ namelist2 : namelist2 ',' sconname | sconname | error - { synerr( "bad start condition list" ); } + { synerr( _("bad start condition list") ); } ; sconname : NAME @@ -392,6 +426,7 @@ rule : re2 re /* Mark as variable. */ varlength = true; headcnt = 0; + } if ( lex_compat || (varlength && headcnt == 0) ) @@ -418,7 +453,7 @@ rule : re2 re } | re2 re '$' - { synerr( "trailing context used twice" ); } + { synerr( _("trailing context used twice") ); } | re '$' { @@ -431,7 +466,7 @@ rule : re2 re if ( trlcontxt ) { - synerr( "trailing context used twice" ); + synerr( _("trailing context used twice") ); $$ = mkstate( SYM_EPSILON ); } @@ -500,7 +535,7 @@ re2 : re '/' */ if ( trlcontxt ) - synerr( "trailing context used twice" ); + synerr( _("trailing context used twice") ); else trlcontxt = true; @@ -529,6 +564,69 @@ series : series singleton | singleton { $$ = $1; } + + | series BEGIN_REPEAT_POSIX NUMBER ',' NUMBER END_REPEAT_POSIX + { + varlength = true; + + if ( $3 > $5 || $3 < 0 ) + { + synerr( _("bad iteration values") ); + $$ = $1; + } + else + { + if ( $3 == 0 ) + { + if ( $5 <= 0 ) + { + synerr( + _("bad iteration values") ); + $$ = $1; + } + else + $$ = mkopt( + mkrep( $1, 1, $5 ) ); + } + else + $$ = mkrep( $1, $3, $5 ); + } + } + + | series BEGIN_REPEAT_POSIX NUMBER ',' END_REPEAT_POSIX + { + varlength = true; + + if ( $3 <= 0 ) + { + synerr( _("iteration value must be positive") ); + $$ = $1; + } + + else + $$ = mkrep( $1, $3, INFINITY ); + } + + | series BEGIN_REPEAT_POSIX NUMBER END_REPEAT_POSIX + { + /* The series could be something like "(foo)", + * in which case we have no idea what its length + * is, so we punt here. + */ + varlength = true; + + if ( $3 <= 0 ) + { + synerr( _("iteration value must be positive") + ); + $$ = $1; + } + + else + $$ = link_machines( $1, + copysingl( $1, $3 - 1 ) ); + } + ; singleton : singleton '*' @@ -550,13 +648,13 @@ singleton : singleton '*' $$ = mkopt( $1 ); } - | singleton '{' NUMBER ',' NUMBER '}' + | singleton BEGIN_REPEAT_FLEX NUMBER ',' NUMBER END_REPEAT_FLEX { varlength = true; if ( $3 > $5 || $3 < 0 ) { - synerr( "bad iteration values" ); + synerr( _("bad iteration values") ); $$ = $1; } else @@ -566,7 +664,7 @@ singleton : singleton '*' if ( $5 <= 0 ) { synerr( - "bad iteration values" ); + _("bad iteration values") ); $$ = $1; } else @@ -578,13 +676,13 @@ singleton : singleton '*' } } - | singleton '{' NUMBER ',' '}' + | singleton BEGIN_REPEAT_FLEX NUMBER ',' END_REPEAT_FLEX { varlength = true; if ( $3 <= 0 ) { - synerr( "iteration value must be positive" ); + synerr( _("iteration value must be positive") ); $$ = $1; } @@ -592,7 +690,7 @@ singleton : singleton '*' $$ = mkrep( $1, $3, INFINITY ); } - | singleton '{' NUMBER '}' + | singleton BEGIN_REPEAT_FLEX NUMBER END_REPEAT_FLEX { /* The singleton could be something like "(foo)", * in which case we have no idea what its length @@ -602,7 +700,7 @@ singleton : singleton '*' if ( $3 <= 0 ) { - synerr( "iteration value must be positive" ); + synerr( _("iteration value must be positive") ); $$ = $1; } @@ -648,6 +746,9 @@ singleton : singleton '*' ++rulelen; + if (ccl_has_nl[$1]) + rule_has_nl[num_rules] = true; + $$ = mkstate( -$1 ); } @@ -655,6 +756,9 @@ singleton : singleton '*' { ++rulelen; + if (ccl_has_nl[$1]) + rule_has_nl[num_rules] = true; + $$ = mkstate( -$1 ); } @@ -671,6 +775,9 @@ singleton : singleton '*' if ( caseins && $1 >= 'A' && $1 <= 'Z' ) $1 = clower( $1 ); + if ($1 == nlch) + rule_has_nl[num_rules] = true; + $$ = mkstate( $1 ); } ; @@ -687,16 +794,42 @@ fullccl : '[' ccl ']' ccl : ccl CHAR '-' CHAR { - if ( caseins ) - { - if ( $2 >= 'A' && $2 <= 'Z' ) - $2 = clower( $2 ); - if ( $4 >= 'A' && $4 <= 'Z' ) - $4 = clower( $4 ); - } + + if (caseins) + { + /* Squish the character range to lowercase only if BOTH + * ends of the range are uppercase. + */ + if (isupper ($2) && isupper ($4)) + { + $2 = tolower ($2); + $4 = tolower ($4); + } + + /* If one end of the range has case and the other + * does not, or the cases are different, then we're not + * sure what range the user is trying to express. + * Examples: [@-z] or [S-t] + */ + else if (has_case ($2) != has_case ($4) + || (has_case ($2) && (b_islower ($2) != b_islower ($4)))) + format_warn3 ( + _("the character range [%c-%c] is ambiguous in a case-insensitive scanner"), + $2, $4); + + /* If the range spans uppercase characters but not + * lowercase (or vice-versa), then should we automatically + * include lowercase characters in the range? + * Example: [@-_] spans [a-z] but not [A-Z] + */ + else if (!has_case ($2) && !has_case ($4) && !range_covers_case ($2, $4)) + format_warn3 ( + _("the character range [%c-%c] is ambiguous in a case-insensitive scanner"), + $2, $4); + } if ( $2 > $4 ) - synerr( "negative range in character class" ); + synerr( _("negative range in character class") ); else { @@ -739,23 +872,23 @@ ccl : ccl CHAR '-' CHAR } ; -ccl_expr: CCE_ALNUM { CCL_EXPR(isalnum) } - | CCE_ALPHA { CCL_EXPR(isalpha) } - | CCE_BLANK { CCL_EXPR(IS_BLANK) } - | CCE_CNTRL { CCL_EXPR(iscntrl) } - | CCE_DIGIT { CCL_EXPR(isdigit) } - | CCE_GRAPH { CCL_EXPR(isgraph) } - | CCE_LOWER { CCL_EXPR(islower) } - | CCE_PRINT { CCL_EXPR(isprint) } - | CCE_PUNCT { CCL_EXPR(ispunct) } - | CCE_SPACE { CCL_EXPR(isspace) } +ccl_expr: CCE_ALNUM { CCL_EXPR(isalnum); } + | CCE_ALPHA { CCL_EXPR(isalpha); } + | CCE_BLANK { CCL_EXPR(IS_BLANK); } + | CCE_CNTRL { CCL_EXPR(iscntrl); } + | CCE_DIGIT { CCL_EXPR(isdigit); } + | CCE_GRAPH { CCL_EXPR(isgraph); } + | CCE_LOWER { CCL_EXPR(islower); } + | CCE_PRINT { CCL_EXPR(isprint); } + | CCE_PUNCT { CCL_EXPR(ispunct); } + | CCE_SPACE { CCL_EXPR(isspace); } | CCE_UPPER { if ( caseins ) - CCL_EXPR(islower) + CCL_EXPR(islower); else - CCL_EXPR(isupper) + CCL_EXPR(isupper); } - | CCE_XDIGIT { CCL_EXPR(isxdigit) } + | CCE_XDIGIT { CCL_EXPR(isxdigit); } ; string : string CHAR @@ -763,6 +896,9 @@ string : string CHAR if ( caseins && $2 >= 'A' && $2 <= 'Z' ) $2 = clower( $2 ); + if ( $2 == nlch ) + rule_has_nl[num_rules] = true; + ++rulelen; $$ = link_machines( $1, mkstate( $2 ) ); @@ -815,7 +951,7 @@ void build_eof_action() /* format_synerr - write out formatted syntax error */ void format_synerr( msg, arg ) -char msg[], arg[]; +const char *msg, arg[]; { char errmsg[MAXLINE]; @@ -827,7 +963,7 @@ char msg[], arg[]; /* synerr - report a syntax error */ void synerr( str ) -char str[]; +const char *str; { syntaxerror = true; pinpoint_message( str ); @@ -837,7 +973,7 @@ char str[]; /* format_warn - write out formatted warning */ void format_warn( msg, arg ) -char msg[], arg[]; +const char *msg, arg[]; { char warn_msg[MAXLINE]; @@ -849,7 +985,7 @@ char msg[], arg[]; /* warn - report a warning, unless -w was given */ void warn( str ) -char str[]; +const char *str; { line_warning( str, linenum ); } @@ -859,7 +995,7 @@ char str[]; */ void format_pinpoint_message( msg, arg ) -char msg[], arg[]; +const char *msg, arg[]; { char errmsg[MAXLINE]; @@ -871,7 +1007,7 @@ char msg[], arg[]; /* pinpoint_message - write out a message, pinpointing its location */ void pinpoint_message( str ) -char str[]; +const char *str; { line_pinpoint( str, linenum ); } @@ -880,7 +1016,7 @@ char str[]; /* line_warning - report a warning at a given line, unless -w was given */ void line_warning( str, line ) -char str[]; +const char *str; int line; { char warning[MAXLINE]; @@ -896,10 +1032,10 @@ int line; /* line_pinpoint - write out a message, pinpointing it at the given line */ void line_pinpoint( str, line ) -char str[]; +const char *str; int line; { - fprintf( stderr, "\"%s\", line %d: %s\n", infilename, line, str ); + fprintf( stderr, "%s:%d: %s\n", infilename, line, str ); } @@ -908,6 +1044,6 @@ int line; */ void yyerror( msg ) -char msg[]; +const char *msg; { } |