diff options
author | John Millaway <john43@users.sourceforge.net> | 2006-03-22 19:06:36 +0000 |
---|---|---|
committer | John Millaway <john43@users.sourceforge.net> | 2006-03-22 19:06:36 +0000 |
commit | 68a3b55eb3b423cae377c0a25db8ff30ec8bd02d (patch) | |
tree | 8f7c307ec65eb4cb72264ffe40be77e8c49457a5 | |
parent | ba6cb02a7bd77eced9d15a59bdf0e1ff1d8dee50 (diff) |
Added set difference operator {-} for character classes.
-rw-r--r-- | ccl.c | 80 | ||||
-rw-r--r-- | flexdef.h | 1 | ||||
-rw-r--r-- | parse.y | 12 | ||||
-rw-r--r-- | scan.l | 1 | ||||
-rw-r--r-- | tests/test-ccl/scanner.l | 26 | ||||
-rw-r--r-- | tests/test-ccl/test.input | 5 |
6 files changed, 113 insertions, 12 deletions
@@ -33,6 +33,23 @@ #include "flexdef.h" +/* return true if the chr is in the ccl. Takes negation into account. */ +static bool +ccl_contains (const int cclp, const int ch) +{ + int ind, len, i; + + len = ccllen[cclp]; + ind = cclmap[cclp]; + + for (i = 0; i < len; ++i) + if (ccltbl[ind + i] == ch) + return !cclng[cclp]; + + return cclng[cclp]; +} + + /* ccladd - add a single character to a ccl */ void ccladd (cclp, ch) @@ -71,6 +88,69 @@ void ccladd (cclp, ch) ccltbl[newpos] = ch; } +/* dump_cclp - same thing as list_character_set, but for cclps. */ + +static void dump_cclp (FILE* file, int cclp) +{ + register int i; + + putc ('[', file); + + for (i = 0; i < csize; ++i) { + if (ccl_contains(cclp, i)){ + register int start_char = i; + + putc (' ', file); + + fputs (readable_form (i), file); + + while (++i < csize && ccl_contains(cclp,i)) ; + + if (i - 1 > start_char) + /* this was a run */ + fprintf (file, "-%s", + readable_form (i - 1)); + + putc (' ', file); + } + } + + putc (']', file); +} + + + +/* ccl_set_diff - create a new ccl as the set difference of the two given ccls. */ +int +ccl_set_diff (int a, int b) +{ + int d, ch; + + /* create new class */ + d = cclinit(); + + /* In order to handle negation, we spin through all possible chars, + * addding each char in a that is not in b. + * (This could be O(n^2), but n is small and bounded.) + */ + for ( ch = 0; ch < csize; ++ch ) + if (ccl_contains (a, ch) && !ccl_contains(b, ch)) + ccladd (d, ch); + + /* debug */ + if (0){ + fprintf(stderr, "ccl_set_diff ("); + fprintf(stderr, "\n "); + dump_cclp (stderr, a); + fprintf(stderr, "\n "); + dump_cclp (stderr, b); + fprintf(stderr, "\n "); + dump_cclp (stderr, d); + fprintf(stderr, "\n)\n"); + } + return d; +} + /* cclinit - return an empty ccl */ @@ -719,6 +719,7 @@ extern int yylval; extern void ccladd PROTO ((int, int)); /* add a single character to a ccl */ extern int cclinit PROTO ((void)); /* make an empty ccl */ extern void cclnegate PROTO ((int)); /* negate a ccl */ +extern int ccl_set_diff (int a, int b); /* set difference of two ccls. */ /* List the members of a set of characters in CCL form. */ extern void list_character_set PROTO ((FILE *, int[])); @@ -9,6 +9,9 @@ %token CCE_NEG_ALNUM CCE_NEG_ALPHA CCE_NEG_BLANK CCE_NEG_CNTRL CCE_NEG_DIGIT CCE_NEG_GRAPH %token CCE_NEG_LOWER CCE_NEG_PRINT CCE_NEG_PUNCT CCE_NEG_SPACE CCE_NEG_UPPER CCE_NEG_XDIGIT + +%left CCL_OP_DIFF + /* *POSIX and AT&T lex place the * precedence of the repeat operator, {}, below that of concatenation. @@ -792,9 +795,14 @@ singleton : singleton '*' $$ = mkstate( $1 ); } ; +fullccl: + fullccl CCL_OP_DIFF braceccl { $$ = ccl_set_diff ($1, $3); } + | braceccl + ; -fullccl : '[' ccl ']' - { $$ = $2; } +braceccl: + + '[' ccl ']' { $$ = $2; } | '[' '^' ccl ']' { @@ -592,6 +592,7 @@ M4QEND "]]" return '['; } } + "{-}" return CCL_OP_DIFF; /* Check for :space: at the end of the rule so we don't * wrap the expanded regex in '(' ')' -- breaking trailing diff --git a/tests/test-ccl/scanner.l b/tests/test-ccl/scanner.l index 749ac71..383c6bc 100644 --- a/tests/test-ccl/scanner.l +++ b/tests/test-ccl/scanner.l @@ -37,16 +37,22 @@ %% -"^alpha:"[[:^alpha:]]+@alpha@\n printf("OK: %s", yytext); ++yylineno; return 1; -"^digit:"[[:^digit:]]+@digit@\n printf("OK: %s", yytext); ++yylineno; return 1; -"^alnum:"[[:^alnum:]]+@alnum@\n printf("OK: %s", yytext); ++yylineno; return 1; -"^upper:"[[:^upper:]]+@upper@\n printf("OK: %s", yytext); ++yylineno; return 1; -"^lower:"[[:^lower:]]+@lower@\n printf("OK: %s", yytext); ++yylineno; return 1; -"^space:"[[:^space:]]+@space@\n printf("OK: %s", yytext); ++yylineno; return 1; -"^blank:"[[:^blank:]]+@blank@\n printf("OK: %s", yytext); ++yylineno; return 1; -"^punct:"[[:^punct:]]+@punct@\n printf("OK: %s", yytext); ++yylineno; return 1; -"^cntrl:"[[:^cntrl:]]+@cntrl@\n printf("OK: %s", yytext); ++yylineno; return 1; -"^xdigit:"[[:^xdigit:]]+@xdigit@\n printf("OK: %s", yytext); ++yylineno; return 1; +^"^alpha:"[[:^alpha:]]+@alpha@\n printf("OK: %s", yytext); ++yylineno; return 1; +^"^digit:"[[:^digit:]]+@digit@\n printf("OK: %s", yytext); ++yylineno; return 1; +^"^alnum:"[[:^alnum:]]+@alnum@\n printf("OK: %s", yytext); ++yylineno; return 1; +^"^upper:"[[:^upper:]]+@upper@\n printf("OK: %s", yytext); ++yylineno; return 1; +^"^lower:"[[:^lower:]]+@lower@\n printf("OK: %s", yytext); ++yylineno; return 1; +^"^space:"[[:^space:]]+@space@\n printf("OK: %s", yytext); ++yylineno; return 1; +^"^blank:"[[:^blank:]]+@blank@\n printf("OK: %s", yytext); ++yylineno; return 1; +^"^punct:"[[:^punct:]]+@punct@\n printf("OK: %s", yytext); ++yylineno; return 1; +^"^cntrl:"[[:^cntrl:]]+@cntrl@\n printf("OK: %s", yytext); ++yylineno; return 1; +^"^xdigit:"[[:^xdigit:]]+@xdigit@\n printf("OK: %s", yytext); ++yylineno; return 1; + +^"a-d:"[[:alpha:]]{-}[[:digit:]]+@a-d@\n printf("OK: %s", yytext); ++yylineno; return 1; +^"l-xyz:"([[:lower:]]{-}[xyz])+@l-xyz@\n printf("OK: %s", yytext); ++yylineno; return 1; +^"abcd-bc:"([abcd]{-}[bc])+@abcd-bc@\n printf("OK: %s", yytext); ++yylineno; return 1; +^"abcde-b-c:"([abcde]{-}[b]{-}[c])+@abcde-b-c@\n printf("OK: %s", yytext); ++yylineno; return 1; +^"^XY-^XYZ:"([^XY]{-}[^XYZ])+@^XY-^XYZ@\n printf("OK: %s", yytext); ++yylineno; return 1; .|\n { printf("ERROR: at line %d\n", yylineno); diff --git a/tests/test-ccl/test.input b/tests/test-ccl/test.input index c8c005a..74c96a9 100644 --- a/tests/test-ccl/test.input +++ b/tests/test-ccl/test.input @@ -8,3 +8,8 @@ ^punct:abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789ABCDEF Z@punct@ ^cntrl:abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789ABCDEF~!@#$%^&*(){}[]':;"<>,./?\+=_-`@cntrl@ ^xdigit:ghijklmnopqrstuvwxyzGHIJKLMNOPQRSTUVWXYZ ~!@#$%^&*(){}[]':;"<>,./?\+=_-`@xdigit@ +a-d:abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ@a-d@ +l-xyz:abcdefghijklmnopqrstuvw@l-xyz@ +abcd-bc:aaaaddddaaaa@abcd-bc@ +abcde-b-c:aaaaddddeeee@abcde-b-c@ +^XY-^XYZ:ZZZZZZZZZZZ@^XY-^XYZ@ |