diff options
-rw-r--r-- | configure.in | 1 | ||||
-rw-r--r-- | doc/flex.texi | 22 | ||||
-rw-r--r-- | parse.y | 39 | ||||
-rw-r--r-- | scan.l | 15 | ||||
-rw-r--r-- | tests/Makefile.am | 2 | ||||
-rw-r--r-- | tests/descriptions | 1 | ||||
-rw-r--r-- | tests/test-ccl/.cvsignore | 9 | ||||
-rw-r--r-- | tests/test-ccl/Makefile.am | 44 | ||||
-rw-r--r-- | tests/test-ccl/scanner.l | 68 | ||||
-rw-r--r-- | tests/test-ccl/test.input | 10 |
10 files changed, 207 insertions, 4 deletions
diff --git a/configure.in b/configure.in index 27d5223..dd6abed 100644 --- a/configure.in +++ b/configure.in @@ -146,6 +146,7 @@ tests/test-top/Makefile tests/test-rescan-nr/Makefile tests/test-rescan-r/Makefile tests/test-quotes/Makefile +tests/test-ccl/Makefile dnl --new-test-here-- This line is processed by tests/create-test. ) diff --git a/doc/flex.texi b/doc/flex.texi index 130cf09..c43d3ee 100644 --- a/doc/flex.texi +++ b/doc/flex.texi @@ -886,7 +886,10 @@ For example, the following character classes are all equivalent: @end verbatim @end example -Some notes on patterns are in order. +A word of caution. Character classes are expanded immediately when seen in the @code{flex} input. +This means the character classes are sensitive to the locale in which @code{flex} +is executed, and the resulting scanner will not be sensitive to the runtime locale. +This may or may not be desirable. @itemize @@ -927,6 +930,23 @@ unfortunately the inconsistency is historically entrenched. Matching newlines means that a pattern like @samp{[^"]*} can match the entire input unless there's another quote in the input. +Flex allows negation of character class expressions by prepending @samp{^} to +the POSIX character class name. + +@example +@verbatim + [:^alnum:] [:^alpha:] [:^blank:] + [:^cntrl:] [:^digit:] [:^graph:] + [:^lower:] [:^print:] [:^punct:] + [:^space:] [:^upper:] [:^xdigit:] +@end verbatim +@end example + +Flex will issue a warning if the expressions @samp{[:^upper:]} and +@samp{[:^lower:]} appear in a case-insensitive scanner, since their meaning is +unclear. The current behavior is to skip them entirely, but this may change +without notice in future revisions of flex. + @cindex trailing context, limits of @cindex ^ as non-special character in patterns @cindex $ as normal character in patterns @@ -7,6 +7,8 @@ %token CCE_ALNUM CCE_ALPHA CCE_BLANK CCE_CNTRL CCE_DIGIT CCE_GRAPH %token CCE_LOWER CCE_PRINT CCE_PUNCT CCE_SPACE CCE_UPPER CCE_XDIGIT +%token CCE_NEG_ALNUM CCE_NEG_ALPHA CCE_NEG_BLANK CCE_NEG_CNTRL CCE_NEG_DIGIT CCE_NEG_GRAPH +%token CCE_NEG_LOWER CCE_NEG_PRINT CCE_NEG_PUNCT CCE_NEG_SPACE CCE_NEG_UPPER CCE_NEG_XDIGIT /* *POSIX and AT&T lex place the * precedence of the repeat operator, {}, below that of concatenation. @@ -125,6 +127,15 @@ int previous_continued_action; /* whether the previous rule's action was '|' */ ccladd( currccl, c ); \ }while(0) +/* negated class */ +#define CCL_NEG_EXPR(func) \ + do{ \ + int c; \ + for ( c = 0; c < csize; ++c ) \ + if ( !func(c) ) \ + ccladd( currccl, c ); \ + }while(0) + /* While POSIX defines isblank(), it's not ANSI C. */ #define IS_BLANK(c) ((c) == ' ' || (c) == '\t') @@ -872,7 +883,8 @@ ccl : ccl CHAR '-' CHAR } ; -ccl_expr: CCE_ALNUM { CCL_EXPR(isalnum); } +ccl_expr: + CCE_ALNUM { CCL_EXPR(isalnum); } | CCE_ALPHA { CCL_EXPR(isalpha); } | CCE_BLANK { CCL_EXPR(IS_BLANK); } | CCE_CNTRL { CCL_EXPR(iscntrl); } @@ -882,13 +894,36 @@ ccl_expr: CCE_ALNUM { CCL_EXPR(isalnum); } | CCE_PRINT { CCL_EXPR(isprint); } | CCE_PUNCT { CCL_EXPR(ispunct); } | CCE_SPACE { CCL_EXPR(isspace); } + | CCE_XDIGIT { CCL_EXPR(isxdigit); } | CCE_UPPER { if ( caseins ) CCL_EXPR(islower); else CCL_EXPR(isupper); } - | CCE_XDIGIT { CCL_EXPR(isxdigit); } + + | CCE_NEG_ALNUM { CCL_NEG_EXPR(isalnum); } + | CCE_NEG_ALPHA { CCL_NEG_EXPR(isalpha); } + | CCE_NEG_BLANK { CCL_NEG_EXPR(IS_BLANK); } + | CCE_NEG_CNTRL { CCL_NEG_EXPR(iscntrl); } + | CCE_NEG_DIGIT { CCL_NEG_EXPR(isdigit); } + | CCE_NEG_GRAPH { CCL_NEG_EXPR(isgraph); } + | CCE_NEG_PRINT { CCL_NEG_EXPR(isprint); } + | CCE_NEG_PUNCT { CCL_NEG_EXPR(ispunct); } + | CCE_NEG_SPACE { CCL_NEG_EXPR(isspace); } + | CCE_NEG_XDIGIT { CCL_NEG_EXPR(isxdigit); } + | CCE_NEG_LOWER { + if ( caseins ) + warn(_("[:^lower:] is ambiguous in case insensitive scanner")); + else + CCL_NEG_EXPR(islower); + } + | CCE_NEG_UPPER { + if ( caseins ) + warn(_("[:^upper:] ambiguous in case insensitive scanner")); + else + CCL_NEG_EXPR(isupper); + } ; string : string CHAR @@ -117,7 +117,7 @@ ESCSEQ (\\([^\n]|[0-7]{1,3}|x[[:xdigit:]]{1,2})) FIRST_CCL_CHAR ([^\\\n]|{ESCSEQ}) CCL_CHAR ([^\\\n\]]|{ESCSEQ}) -CCL_EXPR ("[:"[[:alpha:]]+":]") +CCL_EXPR ("[:"^?[[:alpha:]]+":]") LEXOPT [aceknopr] @@ -708,6 +708,19 @@ nmstr[yyleng - 2 - end_is_ws] = '\0'; /* chop trailing brace */ "[:space:]" BEGIN(CCL); return CCE_SPACE; "[:upper:]" BEGIN(CCL); return CCE_UPPER; "[:xdigit:]" BEGIN(CCL); return CCE_XDIGIT; + + "[:^alnum:]" BEGIN(CCL); return CCE_NEG_ALNUM; + "[:^alpha:]" BEGIN(CCL); return CCE_NEG_ALPHA; + "[:^blank:]" BEGIN(CCL); return CCE_NEG_BLANK; + "[:^cntrl:]" BEGIN(CCL); return CCE_NEG_CNTRL; + "[:^digit:]" BEGIN(CCL); return CCE_NEG_DIGIT; + "[:^graph:]" BEGIN(CCL); return CCE_NEG_GRAPH; + "[:^lower:]" BEGIN(CCL); return CCE_NEG_LOWER; + "[:^print:]" BEGIN(CCL); return CCE_NEG_PRINT; + "[:^punct:]" BEGIN(CCL); return CCE_NEG_PUNCT; + "[:^space:]" BEGIN(CCL); return CCE_NEG_SPACE; + "[:^upper:]" BEGIN(CCL); return CCE_NEG_UPPER; + "[:^xdigit:]" BEGIN(CCL); return CCE_NEG_XDIGIT; {CCL_EXPR} { format_synerr( _( "bad character class expression: %s" ), diff --git a/tests/Makefile.am b/tests/Makefile.am index c1f6366..0644674 100644 --- a/tests/Makefile.am +++ b/tests/Makefile.am @@ -26,6 +26,7 @@ dist_noinst_SCRIPTS = \ create-test DIST_SUBDIRS = \ + test-ccl \ test-quotes \ test-rescan-r \ test-rescan-nr \ @@ -70,6 +71,7 @@ DIST_SUBDIRS = \ test-table-opts SUBDIRS = \ + test-ccl \ test-quotes \ test-rescan-r \ test-rescan-nr \ diff --git a/tests/descriptions b/tests/descriptions index a8c3ebf..c8f3aaf 100644 --- a/tests/descriptions +++ b/tests/descriptions @@ -8,6 +8,7 @@ basic-r - Simple scanner, reentrant. bison-nr - Ordinary bison-bridge. bison-yylloc - Reentrant scanner + pure parser. Requires bison. bison-yylval - Reentrant scanner + pure parser. Requires bison. +ccl - Character classes. c-cpp-nr - Compile a C scanner with C++ compiler, nonreentrant. c-cpp-r - Compile a C scanner with C++ compiler, reentrant. c++-basic - The C++ scanner. diff --git a/tests/test-ccl/.cvsignore b/tests/test-ccl/.cvsignore new file mode 100644 index 0000000..2f65350 --- /dev/null +++ b/tests/test-ccl/.cvsignore @@ -0,0 +1,9 @@ +Makefile +Makefile.in +parser.c +parser.h +scanner.c +TEMPLATE +OUTPUT +.deps +test-ccl diff --git a/tests/test-ccl/Makefile.am b/tests/test-ccl/Makefile.am new file mode 100644 index 0000000..0a5fdc0 --- /dev/null +++ b/tests/test-ccl/Makefile.am @@ -0,0 +1,44 @@ +# This file is part of flex. + +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: + +# 1. Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# 2. Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution. + +# Neither the name of the University nor the names of its contributors +# may be used to endorse or promote products derived from this software +# without specific prior written permission. + +# THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR +# IMPLIED WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED +# WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +# PURPOSE. + +FLEX = $(top_builddir)/flex + +builddir = @builddir@ + +EXTRA_DIST = scanner.l test.input +CLEANFILES = scanner.c scanner.h test-ccl OUTPUT $(OBJS) +OBJS = scanner.o + +AM_CPPFLAGS = -I$(srcdir) -I$(builddir) -I$(top_srcdir) -I$(top_builddir) + +testname = test-ccl + +scanner.c: $(srcdir)/scanner.l + $(FLEX) $(LFLAGS) $< + +$(testname)$(EXEEXT): $(OBJS) + $(CC) -o $@ $(LDFLAGS) $(OBJS) $(LOADLIBES) + +test: $(testname)$(EXEEXT) + ./$(testname)$(EXEEXT) < $(srcdir)/test.input + +.c.o: + $(CC) -c -o $@ $(AM_CPPFLAGS) $(CPPFLAGS) $(CFLAGS) $< diff --git a/tests/test-ccl/scanner.l b/tests/test-ccl/scanner.l new file mode 100644 index 0000000..749ac71 --- /dev/null +++ b/tests/test-ccl/scanner.l @@ -0,0 +1,68 @@ +/* + * This file is part of flex. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE. + */ + +%{ +/* A template scanner file to build "scanner.c". */ +#include <stdio.h> +#include <stdlib.h> +#include "config.h" +/*#include "parser.h" */ + +%} + +%option 8bit outfile="scanner.c" prefix="test" +%option nounput nomain noyywrap +%option warn + + +%% + +"^alpha:"[[:^alpha:]]+@alpha@\n printf("OK: %s", yytext); ++yylineno; return 1; +"^digit:"[[:^digit:]]+@digit@\n printf("OK: %s", yytext); ++yylineno; return 1; +"^alnum:"[[:^alnum:]]+@alnum@\n printf("OK: %s", yytext); ++yylineno; return 1; +"^upper:"[[:^upper:]]+@upper@\n printf("OK: %s", yytext); ++yylineno; return 1; +"^lower:"[[:^lower:]]+@lower@\n printf("OK: %s", yytext); ++yylineno; return 1; +"^space:"[[:^space:]]+@space@\n printf("OK: %s", yytext); ++yylineno; return 1; +"^blank:"[[:^blank:]]+@blank@\n printf("OK: %s", yytext); ++yylineno; return 1; +"^punct:"[[:^punct:]]+@punct@\n printf("OK: %s", yytext); ++yylineno; return 1; +"^cntrl:"[[:^cntrl:]]+@cntrl@\n printf("OK: %s", yytext); ++yylineno; return 1; +"^xdigit:"[[:^xdigit:]]+@xdigit@\n printf("OK: %s", yytext); ++yylineno; return 1; + +.|\n { + printf("ERROR: at line %d\n", yylineno); + abort(); + } +%% + +int main(void); + +int +main () +{ + yyin = stdin; + yyout = stdout; + while (yylex()) + ; + printf("TEST RETURNING OK.\n"); + return 0; +} diff --git a/tests/test-ccl/test.input b/tests/test-ccl/test.input new file mode 100644 index 0000000..c8c005a --- /dev/null +++ b/tests/test-ccl/test.input @@ -0,0 +1,10 @@ +^alpha:0123456789 ~!@#$%^&*(){}[]':;"<>,./?\+=_-`@alpha@ +^digit:abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ ~!@#$%^&*(){}[]':;"<>,./?\+=_-`@digit@ +^alnum:~!@#$%^&*(){}[]':;"<>,./?\+=_-`@alnum@ +^upper:abcdefghijklmnopqrstuvwxyz0123456789 ~!@#$%^&*(){}[]':;"<>,./?\+=_-`@upper@ +^lower:ABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789ABCDEF ~!@#$%^&*(){}[]':;"<>,./?\+=_-`@lower@ +^space:abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789ABCDEF~!@#$%^&*(){}[]':;"<>,./?\+=_-`@space@ +^blank:abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789ABCDEF~!@#$%^&*(){}[]':;"<>,./?\+=_-`@blank@ +^punct:abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789ABCDEF Z@punct@ +^cntrl:abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789ABCDEF~!@#$%^&*(){}[]':;"<>,./?\+=_-`@cntrl@ +^xdigit:ghijklmnopqrstuvwxyzGHIJKLMNOPQRSTUVWXYZ ~!@#$%^&*(){}[]':;"<>,./?\+=_-`@xdigit@ |