summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--configure.in1
-rw-r--r--doc/flex.texi22
-rw-r--r--parse.y39
-rw-r--r--scan.l15
-rw-r--r--tests/Makefile.am2
-rw-r--r--tests/descriptions1
-rw-r--r--tests/test-ccl/.cvsignore9
-rw-r--r--tests/test-ccl/Makefile.am44
-rw-r--r--tests/test-ccl/scanner.l68
-rw-r--r--tests/test-ccl/test.input10
10 files changed, 207 insertions, 4 deletions
diff --git a/configure.in b/configure.in
index 27d5223..dd6abed 100644
--- a/configure.in
+++ b/configure.in
@@ -146,6 +146,7 @@ tests/test-top/Makefile
tests/test-rescan-nr/Makefile
tests/test-rescan-r/Makefile
tests/test-quotes/Makefile
+tests/test-ccl/Makefile
dnl --new-test-here-- This line is processed by tests/create-test.
)
diff --git a/doc/flex.texi b/doc/flex.texi
index 130cf09..c43d3ee 100644
--- a/doc/flex.texi
+++ b/doc/flex.texi
@@ -886,7 +886,10 @@ For example, the following character classes are all equivalent:
@end verbatim
@end example
-Some notes on patterns are in order.
+A word of caution. Character classes are expanded immediately when seen in the @code{flex} input.
+This means the character classes are sensitive to the locale in which @code{flex}
+is executed, and the resulting scanner will not be sensitive to the runtime locale.
+This may or may not be desirable.
@itemize
@@ -927,6 +930,23 @@ unfortunately the inconsistency is historically entrenched. Matching
newlines means that a pattern like @samp{[^"]*} can match the entire
input unless there's another quote in the input.
+Flex allows negation of character class expressions by prepending @samp{^} to
+the POSIX character class name.
+
+@example
+@verbatim
+ [:^alnum:] [:^alpha:] [:^blank:]
+ [:^cntrl:] [:^digit:] [:^graph:]
+ [:^lower:] [:^print:] [:^punct:]
+ [:^space:] [:^upper:] [:^xdigit:]
+@end verbatim
+@end example
+
+Flex will issue a warning if the expressions @samp{[:^upper:]} and
+@samp{[:^lower:]} appear in a case-insensitive scanner, since their meaning is
+unclear. The current behavior is to skip them entirely, but this may change
+without notice in future revisions of flex.
+
@cindex trailing context, limits of
@cindex ^ as non-special character in patterns
@cindex $ as normal character in patterns
diff --git a/parse.y b/parse.y
index 0f56e04..e7c79cf 100644
--- a/parse.y
+++ b/parse.y
@@ -7,6 +7,8 @@
%token CCE_ALNUM CCE_ALPHA CCE_BLANK CCE_CNTRL CCE_DIGIT CCE_GRAPH
%token CCE_LOWER CCE_PRINT CCE_PUNCT CCE_SPACE CCE_UPPER CCE_XDIGIT
+%token CCE_NEG_ALNUM CCE_NEG_ALPHA CCE_NEG_BLANK CCE_NEG_CNTRL CCE_NEG_DIGIT CCE_NEG_GRAPH
+%token CCE_NEG_LOWER CCE_NEG_PRINT CCE_NEG_PUNCT CCE_NEG_SPACE CCE_NEG_UPPER CCE_NEG_XDIGIT
/*
*POSIX and AT&T lex place the
* precedence of the repeat operator, {}, below that of concatenation.
@@ -125,6 +127,15 @@ int previous_continued_action; /* whether the previous rule's action was '|' */
ccladd( currccl, c ); \
}while(0)
+/* negated class */
+#define CCL_NEG_EXPR(func) \
+ do{ \
+ int c; \
+ for ( c = 0; c < csize; ++c ) \
+ if ( !func(c) ) \
+ ccladd( currccl, c ); \
+ }while(0)
+
/* While POSIX defines isblank(), it's not ANSI C. */
#define IS_BLANK(c) ((c) == ' ' || (c) == '\t')
@@ -872,7 +883,8 @@ ccl : ccl CHAR '-' CHAR
}
;
-ccl_expr: CCE_ALNUM { CCL_EXPR(isalnum); }
+ccl_expr:
+ CCE_ALNUM { CCL_EXPR(isalnum); }
| CCE_ALPHA { CCL_EXPR(isalpha); }
| CCE_BLANK { CCL_EXPR(IS_BLANK); }
| CCE_CNTRL { CCL_EXPR(iscntrl); }
@@ -882,13 +894,36 @@ ccl_expr: CCE_ALNUM { CCL_EXPR(isalnum); }
| CCE_PRINT { CCL_EXPR(isprint); }
| CCE_PUNCT { CCL_EXPR(ispunct); }
| CCE_SPACE { CCL_EXPR(isspace); }
+ | CCE_XDIGIT { CCL_EXPR(isxdigit); }
| CCE_UPPER {
if ( caseins )
CCL_EXPR(islower);
else
CCL_EXPR(isupper);
}
- | CCE_XDIGIT { CCL_EXPR(isxdigit); }
+
+ | CCE_NEG_ALNUM { CCL_NEG_EXPR(isalnum); }
+ | CCE_NEG_ALPHA { CCL_NEG_EXPR(isalpha); }
+ | CCE_NEG_BLANK { CCL_NEG_EXPR(IS_BLANK); }
+ | CCE_NEG_CNTRL { CCL_NEG_EXPR(iscntrl); }
+ | CCE_NEG_DIGIT { CCL_NEG_EXPR(isdigit); }
+ | CCE_NEG_GRAPH { CCL_NEG_EXPR(isgraph); }
+ | CCE_NEG_PRINT { CCL_NEG_EXPR(isprint); }
+ | CCE_NEG_PUNCT { CCL_NEG_EXPR(ispunct); }
+ | CCE_NEG_SPACE { CCL_NEG_EXPR(isspace); }
+ | CCE_NEG_XDIGIT { CCL_NEG_EXPR(isxdigit); }
+ | CCE_NEG_LOWER {
+ if ( caseins )
+ warn(_("[:^lower:] is ambiguous in case insensitive scanner"));
+ else
+ CCL_NEG_EXPR(islower);
+ }
+ | CCE_NEG_UPPER {
+ if ( caseins )
+ warn(_("[:^upper:] ambiguous in case insensitive scanner"));
+ else
+ CCL_NEG_EXPR(isupper);
+ }
;
string : string CHAR
diff --git a/scan.l b/scan.l
index cdbd3e8..cff5386 100644
--- a/scan.l
+++ b/scan.l
@@ -117,7 +117,7 @@ ESCSEQ (\\([^\n]|[0-7]{1,3}|x[[:xdigit:]]{1,2}))
FIRST_CCL_CHAR ([^\\\n]|{ESCSEQ})
CCL_CHAR ([^\\\n\]]|{ESCSEQ})
-CCL_EXPR ("[:"[[:alpha:]]+":]")
+CCL_EXPR ("[:"^?[[:alpha:]]+":]")
LEXOPT [aceknopr]
@@ -708,6 +708,19 @@ nmstr[yyleng - 2 - end_is_ws] = '\0'; /* chop trailing brace */
"[:space:]" BEGIN(CCL); return CCE_SPACE;
"[:upper:]" BEGIN(CCL); return CCE_UPPER;
"[:xdigit:]" BEGIN(CCL); return CCE_XDIGIT;
+
+ "[:^alnum:]" BEGIN(CCL); return CCE_NEG_ALNUM;
+ "[:^alpha:]" BEGIN(CCL); return CCE_NEG_ALPHA;
+ "[:^blank:]" BEGIN(CCL); return CCE_NEG_BLANK;
+ "[:^cntrl:]" BEGIN(CCL); return CCE_NEG_CNTRL;
+ "[:^digit:]" BEGIN(CCL); return CCE_NEG_DIGIT;
+ "[:^graph:]" BEGIN(CCL); return CCE_NEG_GRAPH;
+ "[:^lower:]" BEGIN(CCL); return CCE_NEG_LOWER;
+ "[:^print:]" BEGIN(CCL); return CCE_NEG_PRINT;
+ "[:^punct:]" BEGIN(CCL); return CCE_NEG_PUNCT;
+ "[:^space:]" BEGIN(CCL); return CCE_NEG_SPACE;
+ "[:^upper:]" BEGIN(CCL); return CCE_NEG_UPPER;
+ "[:^xdigit:]" BEGIN(CCL); return CCE_NEG_XDIGIT;
{CCL_EXPR} {
format_synerr(
_( "bad character class expression: %s" ),
diff --git a/tests/Makefile.am b/tests/Makefile.am
index c1f6366..0644674 100644
--- a/tests/Makefile.am
+++ b/tests/Makefile.am
@@ -26,6 +26,7 @@ dist_noinst_SCRIPTS = \
create-test
DIST_SUBDIRS = \
+ test-ccl \
test-quotes \
test-rescan-r \
test-rescan-nr \
@@ -70,6 +71,7 @@ DIST_SUBDIRS = \
test-table-opts
SUBDIRS = \
+ test-ccl \
test-quotes \
test-rescan-r \
test-rescan-nr \
diff --git a/tests/descriptions b/tests/descriptions
index a8c3ebf..c8f3aaf 100644
--- a/tests/descriptions
+++ b/tests/descriptions
@@ -8,6 +8,7 @@ basic-r - Simple scanner, reentrant.
bison-nr - Ordinary bison-bridge.
bison-yylloc - Reentrant scanner + pure parser. Requires bison.
bison-yylval - Reentrant scanner + pure parser. Requires bison.
+ccl - Character classes.
c-cpp-nr - Compile a C scanner with C++ compiler, nonreentrant.
c-cpp-r - Compile a C scanner with C++ compiler, reentrant.
c++-basic - The C++ scanner.
diff --git a/tests/test-ccl/.cvsignore b/tests/test-ccl/.cvsignore
new file mode 100644
index 0000000..2f65350
--- /dev/null
+++ b/tests/test-ccl/.cvsignore
@@ -0,0 +1,9 @@
+Makefile
+Makefile.in
+parser.c
+parser.h
+scanner.c
+TEMPLATE
+OUTPUT
+.deps
+test-ccl
diff --git a/tests/test-ccl/Makefile.am b/tests/test-ccl/Makefile.am
new file mode 100644
index 0000000..0a5fdc0
--- /dev/null
+++ b/tests/test-ccl/Makefile.am
@@ -0,0 +1,44 @@
+# This file is part of flex.
+
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+
+# 1. Redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer.
+# 2. Redistributions in binary form must reproduce the above copyright
+# notice, this list of conditions and the following disclaimer in the
+# documentation and/or other materials provided with the distribution.
+
+# Neither the name of the University nor the names of its contributors
+# may be used to endorse or promote products derived from this software
+# without specific prior written permission.
+
+# THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR
+# IMPLIED WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED
+# WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE.
+
+FLEX = $(top_builddir)/flex
+
+builddir = @builddir@
+
+EXTRA_DIST = scanner.l test.input
+CLEANFILES = scanner.c scanner.h test-ccl OUTPUT $(OBJS)
+OBJS = scanner.o
+
+AM_CPPFLAGS = -I$(srcdir) -I$(builddir) -I$(top_srcdir) -I$(top_builddir)
+
+testname = test-ccl
+
+scanner.c: $(srcdir)/scanner.l
+ $(FLEX) $(LFLAGS) $<
+
+$(testname)$(EXEEXT): $(OBJS)
+ $(CC) -o $@ $(LDFLAGS) $(OBJS) $(LOADLIBES)
+
+test: $(testname)$(EXEEXT)
+ ./$(testname)$(EXEEXT) < $(srcdir)/test.input
+
+.c.o:
+ $(CC) -c -o $@ $(AM_CPPFLAGS) $(CPPFLAGS) $(CFLAGS) $<
diff --git a/tests/test-ccl/scanner.l b/tests/test-ccl/scanner.l
new file mode 100644
index 0000000..749ac71
--- /dev/null
+++ b/tests/test-ccl/scanner.l
@@ -0,0 +1,68 @@
+/*
+ * This file is part of flex.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE.
+ */
+
+%{
+/* A template scanner file to build "scanner.c". */
+#include <stdio.h>
+#include <stdlib.h>
+#include "config.h"
+/*#include "parser.h" */
+
+%}
+
+%option 8bit outfile="scanner.c" prefix="test"
+%option nounput nomain noyywrap
+%option warn
+
+
+%%
+
+"^alpha:"[[:^alpha:]]+@alpha@\n printf("OK: %s", yytext); ++yylineno; return 1;
+"^digit:"[[:^digit:]]+@digit@\n printf("OK: %s", yytext); ++yylineno; return 1;
+"^alnum:"[[:^alnum:]]+@alnum@\n printf("OK: %s", yytext); ++yylineno; return 1;
+"^upper:"[[:^upper:]]+@upper@\n printf("OK: %s", yytext); ++yylineno; return 1;
+"^lower:"[[:^lower:]]+@lower@\n printf("OK: %s", yytext); ++yylineno; return 1;
+"^space:"[[:^space:]]+@space@\n printf("OK: %s", yytext); ++yylineno; return 1;
+"^blank:"[[:^blank:]]+@blank@\n printf("OK: %s", yytext); ++yylineno; return 1;
+"^punct:"[[:^punct:]]+@punct@\n printf("OK: %s", yytext); ++yylineno; return 1;
+"^cntrl:"[[:^cntrl:]]+@cntrl@\n printf("OK: %s", yytext); ++yylineno; return 1;
+"^xdigit:"[[:^xdigit:]]+@xdigit@\n printf("OK: %s", yytext); ++yylineno; return 1;
+
+.|\n {
+ printf("ERROR: at line %d\n", yylineno);
+ abort();
+ }
+%%
+
+int main(void);
+
+int
+main ()
+{
+ yyin = stdin;
+ yyout = stdout;
+ while (yylex())
+ ;
+ printf("TEST RETURNING OK.\n");
+ return 0;
+}
diff --git a/tests/test-ccl/test.input b/tests/test-ccl/test.input
new file mode 100644
index 0000000..c8c005a
--- /dev/null
+++ b/tests/test-ccl/test.input
@@ -0,0 +1,10 @@
+^alpha:0123456789 ~!@#$%^&*(){}[]':;"<>,./?\+=_-`@alpha@
+^digit:abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ ~!@#$%^&*(){}[]':;"<>,./?\+=_-`@digit@
+^alnum:~!@#$%^&*(){}[]':;"<>,./?\+=_-`@alnum@
+^upper:abcdefghijklmnopqrstuvwxyz0123456789 ~!@#$%^&*(){}[]':;"<>,./?\+=_-`@upper@
+^lower:ABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789ABCDEF ~!@#$%^&*(){}[]':;"<>,./?\+=_-`@lower@
+^space:abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789ABCDEF~!@#$%^&*(){}[]':;"<>,./?\+=_-`@space@
+^blank:abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789ABCDEF~!@#$%^&*(){}[]':;"<>,./?\+=_-`@blank@
+^punct:abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789ABCDEF Z@punct@
+^cntrl:abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789ABCDEF~!@#$%^&*(){}[]':;"<>,./?\+=_-`@cntrl@
+^xdigit:ghijklmnopqrstuvwxyzGHIJKLMNOPQRSTUVWXYZ ~!@#$%^&*(){}[]':;"<>,./?\+=_-`@xdigit@