diff options
author | John Millaway <john43@users.sourceforge.net> | 2006-03-28 20:47:11 +0000 |
---|---|---|
committer | John Millaway <john43@users.sourceforge.net> | 2006-03-28 20:47:11 +0000 |
commit | 771ed6ddf3cba36aeca375641297b9b254c7d74b (patch) | |
tree | a98d84490a3296202cdca6ee932062c439ae9618 | |
parent | f6faf861a7f39e893f502107a6c767aadc82ac49 (diff) |
Added ccl union operator.
Added test in test suite for ccl union operator.
Documented ccl union operator.
Removed crufty ccl cache to prevent parser problems.
-rw-r--r-- | ccl.c | 31 | ||||
-rw-r--r-- | doc/flex.texi | 7 | ||||
-rw-r--r-- | flexdef.h | 1 | ||||
-rw-r--r-- | parse.y | 6 | ||||
-rw-r--r-- | scan.l | 8 | ||||
-rw-r--r-- | sym.c | 1 | ||||
-rw-r--r-- | tests/test-ccl/scanner.l | 4 | ||||
-rw-r--r-- | tests/test-ccl/test.input | 2 |
8 files changed, 55 insertions, 5 deletions
@@ -151,6 +151,37 @@ ccl_set_diff (int a, int b) return d; } +/* ccl_set_union - create a new ccl as the set union of the two given ccls. */ +int +ccl_set_union (int a, int b) +{ + int d, i; + + /* create new class */ + d = cclinit(); + + /* Add all of a */ + for (i = 0; i < ccllen[a]; ++i) + ccladd (d, ccltbl[cclmap[a] + i]); + + /* Add all of b */ + for (i = 0; i < ccllen[b]; ++i) + ccladd (d, ccltbl[cclmap[b] + i]); + + /* debug */ + if (0){ + fprintf(stderr, "ccl_set_union (%d + %d = %d", a, b, d); + fprintf(stderr, "\n "); + dump_cclp (stderr, a); + fprintf(stderr, "\n "); + dump_cclp (stderr, b); + fprintf(stderr, "\n "); + dump_cclp (stderr, d); + fprintf(stderr, "\n)\n"); + } + return d; +} + /* cclinit - return an empty ccl */ diff --git a/doc/flex.texi b/doc/flex.texi index 04aff51..54be073 100644 --- a/doc/flex.texi +++ b/doc/flex.texi @@ -997,6 +997,13 @@ just the single character @samp{a}). The @samp{@{-@}} operator is left associative, so @samp{[abc]@{-@}[b]@{-@}[c]} is the same as @samp{[a]}. Be careful not to accidently create an empty set, which will never match. +@item + +The @samp{@{+@}} operator computes the union of two character classes. For +example, @samp{[a-z]@{+@}[0-9]} is the same as @samp{[a-z0-9]}. This operator +is useful when preceded by the result of a difference operation, as in, +@samp{[[:alpha:]]@{-@}[[:lower:]]@{+@}[q]}, which is equivalent to +@samp{[A-Zq]} in the "C" locale. @cindex trailing context, limits of @cindex ^ as non-special character in patterns @@ -725,6 +725,7 @@ extern void ccladd PROTO ((int, int)); /* add a single character to a ccl */ extern int cclinit PROTO ((void)); /* make an empty ccl */ extern void cclnegate PROTO ((int)); /* negate a ccl */ extern int ccl_set_diff (int a, int b); /* set difference of two ccls. */ +extern int ccl_set_union (int a, int b); /* set union of two ccls. */ /* List the members of a set of characters in CCL form. */ extern void list_character_set PROTO ((FILE *, int[])); @@ -10,7 +10,7 @@ %token CCE_NEG_ALNUM CCE_NEG_ALPHA CCE_NEG_BLANK CCE_NEG_CNTRL CCE_NEG_DIGIT CCE_NEG_GRAPH %token CCE_NEG_LOWER CCE_NEG_PRINT CCE_NEG_PUNCT CCE_NEG_SPACE CCE_NEG_UPPER CCE_NEG_XDIGIT -%left CCL_OP_DIFF +%left CCL_OP_DIFF CCL_OP_UNION /* *POSIX and AT&T lex place the @@ -760,7 +760,6 @@ singleton : singleton '*' | fullccl { - if ( ! cclsorted ) /* Sort characters for fast searching. We * use a shell sort since this list could * be large. @@ -810,7 +809,8 @@ singleton : singleton '*' } ; fullccl: - fullccl CCL_OP_DIFF braceccl { $$ = ccl_set_diff ($1, $3); } + fullccl CCL_OP_DIFF braceccl { $$ = ccl_set_diff ($1, $3); } + | fullccl CCL_OP_UNION braceccl { $$ = ccl_set_union ($1, $3); } | braceccl ; @@ -621,7 +621,12 @@ M4QEND "]]" /* Check to see if we've already encountered this * ccl. */ - if ( (cclval = ccllookup( (Char *) nmstr )) != 0 ) + if (0 /* <--- This "0" effectively disables the reuse of a + * character class (purely based on its source text). + * The reason it was disabled is so yacc/bison can parse + * ccl operations, such as ccl difference and union. + */ + && (cclval = ccllookup( (Char *) nmstr )) != 0 ) { if ( input() != ']' ) synerr( _( "bad character class" ) ); @@ -647,6 +652,7 @@ M4QEND "]]" } } "{-}" return CCL_OP_DIFF; + "{+}" return CCL_OP_UNION; /* Check for :space: at the end of the rule so we don't @@ -168,7 +168,6 @@ static struct hash_entry *findsym (sym, table, table_size) return &empty_entry; } - /* hashfunct - compute the hash value for "str" and hash size "hash_size" */ static int hashfunct (str, hash_size) diff --git a/tests/test-ccl/scanner.l b/tests/test-ccl/scanner.l index 914795c..330278a 100644 --- a/tests/test-ccl/scanner.l +++ b/tests/test-ccl/scanner.l @@ -55,6 +55,10 @@ ^"abcd-bc:"([abcd]{-}[bc])+@abcd-bc@\n printf("OK: %s", yytext); ++yylineno; return 1; ^"abcde-b-c:"([abcde]{-}[b]{-}[c])+@abcde-b-c@\n printf("OK: %s", yytext); ++yylineno; return 1; ^"^XY-^XYZ:"([^XY]{-}[^XYZ])+@^XY-^XYZ@\n printf("OK: %s", yytext); ++yylineno; return 1; + +^"a+d:"([[:alpha:]]{+}[[:digit:]])+"@a+d@"\n a_ok(); +^"a-u+Q:"([[:alpha:]]{-}[[:upper:]]{+}[Q])+"@a-u+Q@"\n a_ok(); + ^"ia:"(?i:a)+@ia@\n printf("OK: %s", yytext); ++yylineno; return 1; ^"iabc:"(?i:abc)+@iabc@\n printf("OK: %s", yytext); ++yylineno; return 1; ^"ia-c:"(?i:[a-c]+)@ia-c@\n printf("OK: %s", yytext); ++yylineno; return 1; diff --git a/tests/test-ccl/test.input b/tests/test-ccl/test.input index f38b4d6..b318fe6 100644 --- a/tests/test-ccl/test.input +++ b/tests/test-ccl/test.input @@ -13,6 +13,8 @@ l-xyz:abcdefghijklmnopqrstuvw@l-xyz@ abcd-bc:aaaaddddaaaa@abcd-bc@ abcde-b-c:aaaaddddeeee@abcde-b-c@ ^XY-^XYZ:ZZZZZZZZZZZ@^XY-^XYZ@ +a+d:abc0123xyz789@a+d@ +a-u+Q:abcQQQQxyz@a-u+Q@ ia:AaAa@ia@ iabc:ABCabcAbCaBc@iabc@ ia-c:ABCabcAbCaBc@ia-c@ |