summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJohn Millaway <john43@users.sourceforge.net>2006-03-22 19:06:36 +0000
committerJohn Millaway <john43@users.sourceforge.net>2006-03-22 19:06:36 +0000
commit68a3b55eb3b423cae377c0a25db8ff30ec8bd02d (patch)
tree8f7c307ec65eb4cb72264ffe40be77e8c49457a5
parentba6cb02a7bd77eced9d15a59bdf0e1ff1d8dee50 (diff)
Added set difference operator {-} for character classes.
-rw-r--r--ccl.c80
-rw-r--r--flexdef.h1
-rw-r--r--parse.y12
-rw-r--r--scan.l1
-rw-r--r--tests/test-ccl/scanner.l26
-rw-r--r--tests/test-ccl/test.input5
6 files changed, 113 insertions, 12 deletions
diff --git a/ccl.c b/ccl.c
index 1e3c0bb..d23611d 100644
--- a/ccl.c
+++ b/ccl.c
@@ -33,6 +33,23 @@
#include "flexdef.h"
+/* return true if the chr is in the ccl. Takes negation into account. */
+static bool
+ccl_contains (const int cclp, const int ch)
+{
+ int ind, len, i;
+
+ len = ccllen[cclp];
+ ind = cclmap[cclp];
+
+ for (i = 0; i < len; ++i)
+ if (ccltbl[ind + i] == ch)
+ return !cclng[cclp];
+
+ return cclng[cclp];
+}
+
+
/* ccladd - add a single character to a ccl */
void ccladd (cclp, ch)
@@ -71,6 +88,69 @@ void ccladd (cclp, ch)
ccltbl[newpos] = ch;
}
+/* dump_cclp - same thing as list_character_set, but for cclps. */
+
+static void dump_cclp (FILE* file, int cclp)
+{
+ register int i;
+
+ putc ('[', file);
+
+ for (i = 0; i < csize; ++i) {
+ if (ccl_contains(cclp, i)){
+ register int start_char = i;
+
+ putc (' ', file);
+
+ fputs (readable_form (i), file);
+
+ while (++i < csize && ccl_contains(cclp,i)) ;
+
+ if (i - 1 > start_char)
+ /* this was a run */
+ fprintf (file, "-%s",
+ readable_form (i - 1));
+
+ putc (' ', file);
+ }
+ }
+
+ putc (']', file);
+}
+
+
+
+/* ccl_set_diff - create a new ccl as the set difference of the two given ccls. */
+int
+ccl_set_diff (int a, int b)
+{
+ int d, ch;
+
+ /* create new class */
+ d = cclinit();
+
+ /* In order to handle negation, we spin through all possible chars,
+ * addding each char in a that is not in b.
+ * (This could be O(n^2), but n is small and bounded.)
+ */
+ for ( ch = 0; ch < csize; ++ch )
+ if (ccl_contains (a, ch) && !ccl_contains(b, ch))
+ ccladd (d, ch);
+
+ /* debug */
+ if (0){
+ fprintf(stderr, "ccl_set_diff (");
+ fprintf(stderr, "\n ");
+ dump_cclp (stderr, a);
+ fprintf(stderr, "\n ");
+ dump_cclp (stderr, b);
+ fprintf(stderr, "\n ");
+ dump_cclp (stderr, d);
+ fprintf(stderr, "\n)\n");
+ }
+ return d;
+}
+
/* cclinit - return an empty ccl */
diff --git a/flexdef.h b/flexdef.h
index 15abf9d..7e38602 100644
--- a/flexdef.h
+++ b/flexdef.h
@@ -719,6 +719,7 @@ extern int yylval;
extern void ccladd PROTO ((int, int)); /* add a single character to a ccl */
extern int cclinit PROTO ((void)); /* make an empty ccl */
extern void cclnegate PROTO ((int)); /* negate a ccl */
+extern int ccl_set_diff (int a, int b); /* set difference of two ccls. */
/* List the members of a set of characters in CCL form. */
extern void list_character_set PROTO ((FILE *, int[]));
diff --git a/parse.y b/parse.y
index e7c79cf..1a37341 100644
--- a/parse.y
+++ b/parse.y
@@ -9,6 +9,9 @@
%token CCE_NEG_ALNUM CCE_NEG_ALPHA CCE_NEG_BLANK CCE_NEG_CNTRL CCE_NEG_DIGIT CCE_NEG_GRAPH
%token CCE_NEG_LOWER CCE_NEG_PRINT CCE_NEG_PUNCT CCE_NEG_SPACE CCE_NEG_UPPER CCE_NEG_XDIGIT
+
+%left CCL_OP_DIFF
+
/*
*POSIX and AT&T lex place the
* precedence of the repeat operator, {}, below that of concatenation.
@@ -792,9 +795,14 @@ singleton : singleton '*'
$$ = mkstate( $1 );
}
;
+fullccl:
+ fullccl CCL_OP_DIFF braceccl { $$ = ccl_set_diff ($1, $3); }
+ | braceccl
+ ;
-fullccl : '[' ccl ']'
- { $$ = $2; }
+braceccl:
+
+ '[' ccl ']' { $$ = $2; }
| '[' '^' ccl ']'
{
diff --git a/scan.l b/scan.l
index cff5386..5393ab0 100644
--- a/scan.l
+++ b/scan.l
@@ -592,6 +592,7 @@ M4QEND "]]"
return '[';
}
}
+ "{-}" return CCL_OP_DIFF;
/* Check for :space: at the end of the rule so we don't
* wrap the expanded regex in '(' ')' -- breaking trailing
diff --git a/tests/test-ccl/scanner.l b/tests/test-ccl/scanner.l
index 749ac71..383c6bc 100644
--- a/tests/test-ccl/scanner.l
+++ b/tests/test-ccl/scanner.l
@@ -37,16 +37,22 @@
%%
-"^alpha:"[[:^alpha:]]+@alpha@\n printf("OK: %s", yytext); ++yylineno; return 1;
-"^digit:"[[:^digit:]]+@digit@\n printf("OK: %s", yytext); ++yylineno; return 1;
-"^alnum:"[[:^alnum:]]+@alnum@\n printf("OK: %s", yytext); ++yylineno; return 1;
-"^upper:"[[:^upper:]]+@upper@\n printf("OK: %s", yytext); ++yylineno; return 1;
-"^lower:"[[:^lower:]]+@lower@\n printf("OK: %s", yytext); ++yylineno; return 1;
-"^space:"[[:^space:]]+@space@\n printf("OK: %s", yytext); ++yylineno; return 1;
-"^blank:"[[:^blank:]]+@blank@\n printf("OK: %s", yytext); ++yylineno; return 1;
-"^punct:"[[:^punct:]]+@punct@\n printf("OK: %s", yytext); ++yylineno; return 1;
-"^cntrl:"[[:^cntrl:]]+@cntrl@\n printf("OK: %s", yytext); ++yylineno; return 1;
-"^xdigit:"[[:^xdigit:]]+@xdigit@\n printf("OK: %s", yytext); ++yylineno; return 1;
+^"^alpha:"[[:^alpha:]]+@alpha@\n printf("OK: %s", yytext); ++yylineno; return 1;
+^"^digit:"[[:^digit:]]+@digit@\n printf("OK: %s", yytext); ++yylineno; return 1;
+^"^alnum:"[[:^alnum:]]+@alnum@\n printf("OK: %s", yytext); ++yylineno; return 1;
+^"^upper:"[[:^upper:]]+@upper@\n printf("OK: %s", yytext); ++yylineno; return 1;
+^"^lower:"[[:^lower:]]+@lower@\n printf("OK: %s", yytext); ++yylineno; return 1;
+^"^space:"[[:^space:]]+@space@\n printf("OK: %s", yytext); ++yylineno; return 1;
+^"^blank:"[[:^blank:]]+@blank@\n printf("OK: %s", yytext); ++yylineno; return 1;
+^"^punct:"[[:^punct:]]+@punct@\n printf("OK: %s", yytext); ++yylineno; return 1;
+^"^cntrl:"[[:^cntrl:]]+@cntrl@\n printf("OK: %s", yytext); ++yylineno; return 1;
+^"^xdigit:"[[:^xdigit:]]+@xdigit@\n printf("OK: %s", yytext); ++yylineno; return 1;
+
+^"a-d:"[[:alpha:]]{-}[[:digit:]]+@a-d@\n printf("OK: %s", yytext); ++yylineno; return 1;
+^"l-xyz:"([[:lower:]]{-}[xyz])+@l-xyz@\n printf("OK: %s", yytext); ++yylineno; return 1;
+^"abcd-bc:"([abcd]{-}[bc])+@abcd-bc@\n printf("OK: %s", yytext); ++yylineno; return 1;
+^"abcde-b-c:"([abcde]{-}[b]{-}[c])+@abcde-b-c@\n printf("OK: %s", yytext); ++yylineno; return 1;
+^"^XY-^XYZ:"([^XY]{-}[^XYZ])+@^XY-^XYZ@\n printf("OK: %s", yytext); ++yylineno; return 1;
.|\n {
printf("ERROR: at line %d\n", yylineno);
diff --git a/tests/test-ccl/test.input b/tests/test-ccl/test.input
index c8c005a..74c96a9 100644
--- a/tests/test-ccl/test.input
+++ b/tests/test-ccl/test.input
@@ -8,3 +8,8 @@
^punct:abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789ABCDEF Z@punct@
^cntrl:abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789ABCDEF~!@#$%^&*(){}[]':;"<>,./?\+=_-`@cntrl@
^xdigit:ghijklmnopqrstuvwxyzGHIJKLMNOPQRSTUVWXYZ ~!@#$%^&*(){}[]':;"<>,./?\+=_-`@xdigit@
+a-d:abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ@a-d@
+l-xyz:abcdefghijklmnopqrstuvw@l-xyz@
+abcd-bc:aaaaddddaaaa@abcd-bc@
+abcde-b-c:aaaaddddeeee@abcde-b-c@
+^XY-^XYZ:ZZZZZZZZZZZ@^XY-^XYZ@