summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJohn Millaway <john43@users.sourceforge.net>2006-03-28 20:47:11 +0000
committerJohn Millaway <john43@users.sourceforge.net>2006-03-28 20:47:11 +0000
commit771ed6ddf3cba36aeca375641297b9b254c7d74b (patch)
treea98d84490a3296202cdca6ee932062c439ae9618
parentf6faf861a7f39e893f502107a6c767aadc82ac49 (diff)
Added ccl union operator.
Added test in test suite for ccl union operator. Documented ccl union operator. Removed crufty ccl cache to prevent parser problems.
-rw-r--r--ccl.c31
-rw-r--r--doc/flex.texi7
-rw-r--r--flexdef.h1
-rw-r--r--parse.y6
-rw-r--r--scan.l8
-rw-r--r--sym.c1
-rw-r--r--tests/test-ccl/scanner.l4
-rw-r--r--tests/test-ccl/test.input2
8 files changed, 55 insertions, 5 deletions
diff --git a/ccl.c b/ccl.c
index d23611d..8d66bb6 100644
--- a/ccl.c
+++ b/ccl.c
@@ -151,6 +151,37 @@ ccl_set_diff (int a, int b)
return d;
}
+/* ccl_set_union - create a new ccl as the set union of the two given ccls. */
+int
+ccl_set_union (int a, int b)
+{
+ int d, i;
+
+ /* create new class */
+ d = cclinit();
+
+ /* Add all of a */
+ for (i = 0; i < ccllen[a]; ++i)
+ ccladd (d, ccltbl[cclmap[a] + i]);
+
+ /* Add all of b */
+ for (i = 0; i < ccllen[b]; ++i)
+ ccladd (d, ccltbl[cclmap[b] + i]);
+
+ /* debug */
+ if (0){
+ fprintf(stderr, "ccl_set_union (%d + %d = %d", a, b, d);
+ fprintf(stderr, "\n ");
+ dump_cclp (stderr, a);
+ fprintf(stderr, "\n ");
+ dump_cclp (stderr, b);
+ fprintf(stderr, "\n ");
+ dump_cclp (stderr, d);
+ fprintf(stderr, "\n)\n");
+ }
+ return d;
+}
+
/* cclinit - return an empty ccl */
diff --git a/doc/flex.texi b/doc/flex.texi
index 04aff51..54be073 100644
--- a/doc/flex.texi
+++ b/doc/flex.texi
@@ -997,6 +997,13 @@ just the single character @samp{a}). The @samp{@{-@}} operator is left
associative, so @samp{[abc]@{-@}[b]@{-@}[c]} is the same as @samp{[a]}. Be careful
not to accidently create an empty set, which will never match.
+@item
+
+The @samp{@{+@}} operator computes the union of two character classes. For
+example, @samp{[a-z]@{+@}[0-9]} is the same as @samp{[a-z0-9]}. This operator
+is useful when preceded by the result of a difference operation, as in,
+@samp{[[:alpha:]]@{-@}[[:lower:]]@{+@}[q]}, which is equivalent to
+@samp{[A-Zq]} in the "C" locale.
@cindex trailing context, limits of
@cindex ^ as non-special character in patterns
diff --git a/flexdef.h b/flexdef.h
index 4136578..2b3b760 100644
--- a/flexdef.h
+++ b/flexdef.h
@@ -725,6 +725,7 @@ extern void ccladd PROTO ((int, int)); /* add a single character to a ccl */
extern int cclinit PROTO ((void)); /* make an empty ccl */
extern void cclnegate PROTO ((int)); /* negate a ccl */
extern int ccl_set_diff (int a, int b); /* set difference of two ccls. */
+extern int ccl_set_union (int a, int b); /* set union of two ccls. */
/* List the members of a set of characters in CCL form. */
extern void list_character_set PROTO ((FILE *, int[]));
diff --git a/parse.y b/parse.y
index e66deb8..251cc72 100644
--- a/parse.y
+++ b/parse.y
@@ -10,7 +10,7 @@
%token CCE_NEG_ALNUM CCE_NEG_ALPHA CCE_NEG_BLANK CCE_NEG_CNTRL CCE_NEG_DIGIT CCE_NEG_GRAPH
%token CCE_NEG_LOWER CCE_NEG_PRINT CCE_NEG_PUNCT CCE_NEG_SPACE CCE_NEG_UPPER CCE_NEG_XDIGIT
-%left CCL_OP_DIFF
+%left CCL_OP_DIFF CCL_OP_UNION
/*
*POSIX and AT&T lex place the
@@ -760,7 +760,6 @@ singleton : singleton '*'
| fullccl
{
- if ( ! cclsorted )
/* Sort characters for fast searching. We
* use a shell sort since this list could
* be large.
@@ -810,7 +809,8 @@ singleton : singleton '*'
}
;
fullccl:
- fullccl CCL_OP_DIFF braceccl { $$ = ccl_set_diff ($1, $3); }
+ fullccl CCL_OP_DIFF braceccl { $$ = ccl_set_diff ($1, $3); }
+ | fullccl CCL_OP_UNION braceccl { $$ = ccl_set_union ($1, $3); }
| braceccl
;
diff --git a/scan.l b/scan.l
index 5af40a7..e06279f 100644
--- a/scan.l
+++ b/scan.l
@@ -621,7 +621,12 @@ M4QEND "]]"
/* Check to see if we've already encountered this
* ccl.
*/
- if ( (cclval = ccllookup( (Char *) nmstr )) != 0 )
+ if (0 /* <--- This "0" effectively disables the reuse of a
+ * character class (purely based on its source text).
+ * The reason it was disabled is so yacc/bison can parse
+ * ccl operations, such as ccl difference and union.
+ */
+ && (cclval = ccllookup( (Char *) nmstr )) != 0 )
{
if ( input() != ']' )
synerr( _( "bad character class" ) );
@@ -647,6 +652,7 @@ M4QEND "]]"
}
}
"{-}" return CCL_OP_DIFF;
+ "{+}" return CCL_OP_UNION;
/* Check for :space: at the end of the rule so we don't
diff --git a/sym.c b/sym.c
index 7aecae9..8d0b2e9 100644
--- a/sym.c
+++ b/sym.c
@@ -168,7 +168,6 @@ static struct hash_entry *findsym (sym, table, table_size)
return &empty_entry;
}
-
/* hashfunct - compute the hash value for "str" and hash size "hash_size" */
static int hashfunct (str, hash_size)
diff --git a/tests/test-ccl/scanner.l b/tests/test-ccl/scanner.l
index 914795c..330278a 100644
--- a/tests/test-ccl/scanner.l
+++ b/tests/test-ccl/scanner.l
@@ -55,6 +55,10 @@
^"abcd-bc:"([abcd]{-}[bc])+@abcd-bc@\n printf("OK: %s", yytext); ++yylineno; return 1;
^"abcde-b-c:"([abcde]{-}[b]{-}[c])+@abcde-b-c@\n printf("OK: %s", yytext); ++yylineno; return 1;
^"^XY-^XYZ:"([^XY]{-}[^XYZ])+@^XY-^XYZ@\n printf("OK: %s", yytext); ++yylineno; return 1;
+
+^"a+d:"([[:alpha:]]{+}[[:digit:]])+"@a+d@"\n a_ok();
+^"a-u+Q:"([[:alpha:]]{-}[[:upper:]]{+}[Q])+"@a-u+Q@"\n a_ok();
+
^"ia:"(?i:a)+@ia@\n printf("OK: %s", yytext); ++yylineno; return 1;
^"iabc:"(?i:abc)+@iabc@\n printf("OK: %s", yytext); ++yylineno; return 1;
^"ia-c:"(?i:[a-c]+)@ia-c@\n printf("OK: %s", yytext); ++yylineno; return 1;
diff --git a/tests/test-ccl/test.input b/tests/test-ccl/test.input
index f38b4d6..b318fe6 100644
--- a/tests/test-ccl/test.input
+++ b/tests/test-ccl/test.input
@@ -13,6 +13,8 @@ l-xyz:abcdefghijklmnopqrstuvw@l-xyz@
abcd-bc:aaaaddddaaaa@abcd-bc@
abcde-b-c:aaaaddddeeee@abcde-b-c@
^XY-^XYZ:ZZZZZZZZZZZ@^XY-^XYZ@
+a+d:abc0123xyz789@a+d@
+a-u+Q:abcQQQQxyz@a-u+Q@
ia:AaAa@ia@
iabc:ABCabcAbCaBc@iabc@
ia-c:ABCabcAbCaBc@ia-c@