summaryrefslogtreecommitdiff
path: root/src/ccl.c
diff options
context:
space:
mode:
authorWill Estes <westes575@gmail.com>2014-02-14 07:52:31 -0500
committerWill Estes <westes575@gmail.com>2014-02-16 09:19:51 -0500
commit269620b90047cf95cc8f65c98ca599d4fe18e779 (patch)
tree3fe9f5c8153e91d652284ae5af934ece646274d9 /src/ccl.c
parent638d8a1448fa3d2241c71aff016d323c8654373c (diff)
move flex program sources into src/ directory
The *.[chly] sources are now in the src directory. This implies a bunch of changes in Makefile.am and friends to account for the new location. The .gitignore files are now more local to places where various object files and generated source files occur.
Diffstat (limited to 'src/ccl.c')
-rw-r--r--src/ccl.c310
1 files changed, 310 insertions, 0 deletions
diff --git a/src/ccl.c b/src/ccl.c
new file mode 100644
index 0000000..8d66bb6
--- /dev/null
+++ b/src/ccl.c
@@ -0,0 +1,310 @@
+/* ccl - routines for character classes */
+
+/* Copyright (c) 1990 The Regents of the University of California. */
+/* All rights reserved. */
+
+/* This code is derived from software contributed to Berkeley by */
+/* Vern Paxson. */
+
+/* The United States Government has rights in this work pursuant */
+/* to contract no. DE-AC03-76SF00098 between the United States */
+ /* Department of Energy and the University of California. */
+
+/* This file is part of flex. */
+
+/* Redistribution and use in source and binary forms, with or without */
+/* modification, are permitted provided that the following conditions */
+/* are met: */
+
+/* 1. Redistributions of source code must retain the above copyright */
+/* notice, this list of conditions and the following disclaimer. */
+/* 2. Redistributions in binary form must reproduce the above copyright */
+/* notice, this list of conditions and the following disclaimer in the */
+/* documentation and/or other materials provided with the distribution. */
+
+/* Neither the name of the University nor the names of its contributors */
+/* may be used to endorse or promote products derived from this software */
+/* without specific prior written permission. */
+
+/* THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR */
+/* IMPLIED WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED */
+/* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR */
+/* PURPOSE. */
+
+#include "flexdef.h"
+
+/* return true if the chr is in the ccl. Takes negation into account. */
+static bool
+ccl_contains (const int cclp, const int ch)
+{
+ int ind, len, i;
+
+ len = ccllen[cclp];
+ ind = cclmap[cclp];
+
+ for (i = 0; i < len; ++i)
+ if (ccltbl[ind + i] == ch)
+ return !cclng[cclp];
+
+ return cclng[cclp];
+}
+
+
+/* ccladd - add a single character to a ccl */
+
+void ccladd (cclp, ch)
+ int cclp;
+ int ch;
+{
+ int ind, len, newpos, i;
+
+ check_char (ch);
+
+ len = ccllen[cclp];
+ ind = cclmap[cclp];
+
+ /* check to see if the character is already in the ccl */
+
+ for (i = 0; i < len; ++i)
+ if (ccltbl[ind + i] == ch)
+ return;
+
+ /* mark newlines */
+ if (ch == nlch)
+ ccl_has_nl[cclp] = true;
+
+ newpos = ind + len;
+
+ if (newpos >= current_max_ccl_tbl_size) {
+ current_max_ccl_tbl_size += MAX_CCL_TBL_SIZE_INCREMENT;
+
+ ++num_reallocs;
+
+ ccltbl = reallocate_Character_array (ccltbl,
+ current_max_ccl_tbl_size);
+ }
+
+ ccllen[cclp] = len + 1;
+ ccltbl[newpos] = ch;
+}
+
+/* dump_cclp - same thing as list_character_set, but for cclps. */
+
+static void dump_cclp (FILE* file, int cclp)
+{
+ register int i;
+
+ putc ('[', file);
+
+ for (i = 0; i < csize; ++i) {
+ if (ccl_contains(cclp, i)){
+ register int start_char = i;
+
+ putc (' ', file);
+
+ fputs (readable_form (i), file);
+
+ while (++i < csize && ccl_contains(cclp,i)) ;
+
+ if (i - 1 > start_char)
+ /* this was a run */
+ fprintf (file, "-%s",
+ readable_form (i - 1));
+
+ putc (' ', file);
+ }
+ }
+
+ putc (']', file);
+}
+
+
+
+/* ccl_set_diff - create a new ccl as the set difference of the two given ccls. */
+int
+ccl_set_diff (int a, int b)
+{
+ int d, ch;
+
+ /* create new class */
+ d = cclinit();
+
+ /* In order to handle negation, we spin through all possible chars,
+ * addding each char in a that is not in b.
+ * (This could be O(n^2), but n is small and bounded.)
+ */
+ for ( ch = 0; ch < csize; ++ch )
+ if (ccl_contains (a, ch) && !ccl_contains(b, ch))
+ ccladd (d, ch);
+
+ /* debug */
+ if (0){
+ fprintf(stderr, "ccl_set_diff (");
+ fprintf(stderr, "\n ");
+ dump_cclp (stderr, a);
+ fprintf(stderr, "\n ");
+ dump_cclp (stderr, b);
+ fprintf(stderr, "\n ");
+ dump_cclp (stderr, d);
+ fprintf(stderr, "\n)\n");
+ }
+ return d;
+}
+
+/* ccl_set_union - create a new ccl as the set union of the two given ccls. */
+int
+ccl_set_union (int a, int b)
+{
+ int d, i;
+
+ /* create new class */
+ d = cclinit();
+
+ /* Add all of a */
+ for (i = 0; i < ccllen[a]; ++i)
+ ccladd (d, ccltbl[cclmap[a] + i]);
+
+ /* Add all of b */
+ for (i = 0; i < ccllen[b]; ++i)
+ ccladd (d, ccltbl[cclmap[b] + i]);
+
+ /* debug */
+ if (0){
+ fprintf(stderr, "ccl_set_union (%d + %d = %d", a, b, d);
+ fprintf(stderr, "\n ");
+ dump_cclp (stderr, a);
+ fprintf(stderr, "\n ");
+ dump_cclp (stderr, b);
+ fprintf(stderr, "\n ");
+ dump_cclp (stderr, d);
+ fprintf(stderr, "\n)\n");
+ }
+ return d;
+}
+
+
+/* cclinit - return an empty ccl */
+
+int cclinit ()
+{
+ if (++lastccl >= current_maxccls) {
+ current_maxccls += MAX_CCLS_INCREMENT;
+
+ ++num_reallocs;
+
+ cclmap =
+ reallocate_integer_array (cclmap, current_maxccls);
+ ccllen =
+ reallocate_integer_array (ccllen, current_maxccls);
+ cclng = reallocate_integer_array (cclng, current_maxccls);
+ ccl_has_nl =
+ reallocate_bool_array (ccl_has_nl,
+ current_maxccls);
+ }
+
+ if (lastccl == 1)
+ /* we're making the first ccl */
+ cclmap[lastccl] = 0;
+
+ else
+ /* The new pointer is just past the end of the last ccl.
+ * Since the cclmap points to the \first/ character of a
+ * ccl, adding the length of the ccl to the cclmap pointer
+ * will produce a cursor to the first free space.
+ */
+ cclmap[lastccl] =
+ cclmap[lastccl - 1] + ccllen[lastccl - 1];
+
+ ccllen[lastccl] = 0;
+ cclng[lastccl] = 0; /* ccl's start out life un-negated */
+ ccl_has_nl[lastccl] = false;
+
+ return lastccl;
+}
+
+
+/* cclnegate - negate the given ccl */
+
+void cclnegate (cclp)
+ int cclp;
+{
+ cclng[cclp] = 1;
+ ccl_has_nl[cclp] = !ccl_has_nl[cclp];
+}
+
+
+/* list_character_set - list the members of a set of characters in CCL form
+ *
+ * Writes to the given file a character-class representation of those
+ * characters present in the given CCL. A character is present if it
+ * has a non-zero value in the cset array.
+ */
+
+void list_character_set (file, cset)
+ FILE *file;
+ int cset[];
+{
+ register int i;
+
+ putc ('[', file);
+
+ for (i = 0; i < csize; ++i) {
+ if (cset[i]) {
+ register int start_char = i;
+
+ putc (' ', file);
+
+ fputs (readable_form (i), file);
+
+ while (++i < csize && cset[i]) ;
+
+ if (i - 1 > start_char)
+ /* this was a run */
+ fprintf (file, "-%s",
+ readable_form (i - 1));
+
+ putc (' ', file);
+ }
+ }
+
+ putc (']', file);
+}
+
+/** Determines if the range [c1-c2] is unambiguous in a case-insensitive
+ * scanner. Specifically, if a lowercase or uppercase character, x, is in the
+ * range [c1-c2], then we require that UPPERCASE(x) and LOWERCASE(x) must also
+ * be in the range. If not, then this range is ambiguous, and the function
+ * returns false. For example, [@-_] spans [a-z] but not [A-Z]. Beware that
+ * [a-z] will be labeled ambiguous because it does not include [A-Z].
+ *
+ * @param c1 the lower end of the range
+ * @param c2 the upper end of the range
+ * @return true if [c1-c2] is not ambiguous for a caseless scanner.
+ */
+bool range_covers_case (int c1, int c2)
+{
+ int i, o;
+
+ for (i = c1; i <= c2; i++) {
+ if (has_case (i)) {
+ o = reverse_case (i);
+ if (o < c1 || c2 < o)
+ return false;
+ }
+ }
+ return true;
+}
+
+/** Reverse the case of a character, if possible.
+ * @return c if case-reversal does not apply.
+ */
+int reverse_case (int c)
+{
+ return isupper (c) ? tolower (c) : (islower (c) ? toupper (c) : c);
+}
+
+/** Return true if c is uppercase or lowercase. */
+bool has_case (int c)
+{
+ return (isupper (c) || islower (c)) ? true : false;
+}