diff options
Diffstat (limited to 'ccl.c')
-rw-r--r-- | ccl.c | 310 |
1 files changed, 0 insertions, 310 deletions
@@ -1,310 +0,0 @@ -/* ccl - routines for character classes */ - -/* Copyright (c) 1990 The Regents of the University of California. */ -/* All rights reserved. */ - -/* This code is derived from software contributed to Berkeley by */ -/* Vern Paxson. */ - -/* The United States Government has rights in this work pursuant */ -/* to contract no. DE-AC03-76SF00098 between the United States */ - /* Department of Energy and the University of California. */ - -/* This file is part of flex. */ - -/* Redistribution and use in source and binary forms, with or without */ -/* modification, are permitted provided that the following conditions */ -/* are met: */ - -/* 1. Redistributions of source code must retain the above copyright */ -/* notice, this list of conditions and the following disclaimer. */ -/* 2. Redistributions in binary form must reproduce the above copyright */ -/* notice, this list of conditions and the following disclaimer in the */ -/* documentation and/or other materials provided with the distribution. */ - -/* Neither the name of the University nor the names of its contributors */ -/* may be used to endorse or promote products derived from this software */ -/* without specific prior written permission. */ - -/* THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR */ -/* IMPLIED WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED */ -/* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR */ -/* PURPOSE. */ - -#include "flexdef.h" - -/* return true if the chr is in the ccl. Takes negation into account. */ -static bool -ccl_contains (const int cclp, const int ch) -{ - int ind, len, i; - - len = ccllen[cclp]; - ind = cclmap[cclp]; - - for (i = 0; i < len; ++i) - if (ccltbl[ind + i] == ch) - return !cclng[cclp]; - - return cclng[cclp]; -} - - -/* ccladd - add a single character to a ccl */ - -void ccladd (cclp, ch) - int cclp; - int ch; -{ - int ind, len, newpos, i; - - check_char (ch); - - len = ccllen[cclp]; - ind = cclmap[cclp]; - - /* check to see if the character is already in the ccl */ - - for (i = 0; i < len; ++i) - if (ccltbl[ind + i] == ch) - return; - - /* mark newlines */ - if (ch == nlch) - ccl_has_nl[cclp] = true; - - newpos = ind + len; - - if (newpos >= current_max_ccl_tbl_size) { - current_max_ccl_tbl_size += MAX_CCL_TBL_SIZE_INCREMENT; - - ++num_reallocs; - - ccltbl = reallocate_Character_array (ccltbl, - current_max_ccl_tbl_size); - } - - ccllen[cclp] = len + 1; - ccltbl[newpos] = ch; -} - -/* dump_cclp - same thing as list_character_set, but for cclps. */ - -static void dump_cclp (FILE* file, int cclp) -{ - register int i; - - putc ('[', file); - - for (i = 0; i < csize; ++i) { - if (ccl_contains(cclp, i)){ - register int start_char = i; - - putc (' ', file); - - fputs (readable_form (i), file); - - while (++i < csize && ccl_contains(cclp,i)) ; - - if (i - 1 > start_char) - /* this was a run */ - fprintf (file, "-%s", - readable_form (i - 1)); - - putc (' ', file); - } - } - - putc (']', file); -} - - - -/* ccl_set_diff - create a new ccl as the set difference of the two given ccls. */ -int -ccl_set_diff (int a, int b) -{ - int d, ch; - - /* create new class */ - d = cclinit(); - - /* In order to handle negation, we spin through all possible chars, - * addding each char in a that is not in b. - * (This could be O(n^2), but n is small and bounded.) - */ - for ( ch = 0; ch < csize; ++ch ) - if (ccl_contains (a, ch) && !ccl_contains(b, ch)) - ccladd (d, ch); - - /* debug */ - if (0){ - fprintf(stderr, "ccl_set_diff ("); - fprintf(stderr, "\n "); - dump_cclp (stderr, a); - fprintf(stderr, "\n "); - dump_cclp (stderr, b); - fprintf(stderr, "\n "); - dump_cclp (stderr, d); - fprintf(stderr, "\n)\n"); - } - return d; -} - -/* ccl_set_union - create a new ccl as the set union of the two given ccls. */ -int -ccl_set_union (int a, int b) -{ - int d, i; - - /* create new class */ - d = cclinit(); - - /* Add all of a */ - for (i = 0; i < ccllen[a]; ++i) - ccladd (d, ccltbl[cclmap[a] + i]); - - /* Add all of b */ - for (i = 0; i < ccllen[b]; ++i) - ccladd (d, ccltbl[cclmap[b] + i]); - - /* debug */ - if (0){ - fprintf(stderr, "ccl_set_union (%d + %d = %d", a, b, d); - fprintf(stderr, "\n "); - dump_cclp (stderr, a); - fprintf(stderr, "\n "); - dump_cclp (stderr, b); - fprintf(stderr, "\n "); - dump_cclp (stderr, d); - fprintf(stderr, "\n)\n"); - } - return d; -} - - -/* cclinit - return an empty ccl */ - -int cclinit () -{ - if (++lastccl >= current_maxccls) { - current_maxccls += MAX_CCLS_INCREMENT; - - ++num_reallocs; - - cclmap = - reallocate_integer_array (cclmap, current_maxccls); - ccllen = - reallocate_integer_array (ccllen, current_maxccls); - cclng = reallocate_integer_array (cclng, current_maxccls); - ccl_has_nl = - reallocate_bool_array (ccl_has_nl, - current_maxccls); - } - - if (lastccl == 1) - /* we're making the first ccl */ - cclmap[lastccl] = 0; - - else - /* The new pointer is just past the end of the last ccl. - * Since the cclmap points to the \first/ character of a - * ccl, adding the length of the ccl to the cclmap pointer - * will produce a cursor to the first free space. - */ - cclmap[lastccl] = - cclmap[lastccl - 1] + ccllen[lastccl - 1]; - - ccllen[lastccl] = 0; - cclng[lastccl] = 0; /* ccl's start out life un-negated */ - ccl_has_nl[lastccl] = false; - - return lastccl; -} - - -/* cclnegate - negate the given ccl */ - -void cclnegate (cclp) - int cclp; -{ - cclng[cclp] = 1; - ccl_has_nl[cclp] = !ccl_has_nl[cclp]; -} - - -/* list_character_set - list the members of a set of characters in CCL form - * - * Writes to the given file a character-class representation of those - * characters present in the given CCL. A character is present if it - * has a non-zero value in the cset array. - */ - -void list_character_set (file, cset) - FILE *file; - int cset[]; -{ - register int i; - - putc ('[', file); - - for (i = 0; i < csize; ++i) { - if (cset[i]) { - register int start_char = i; - - putc (' ', file); - - fputs (readable_form (i), file); - - while (++i < csize && cset[i]) ; - - if (i - 1 > start_char) - /* this was a run */ - fprintf (file, "-%s", - readable_form (i - 1)); - - putc (' ', file); - } - } - - putc (']', file); -} - -/** Determines if the range [c1-c2] is unambiguous in a case-insensitive - * scanner. Specifically, if a lowercase or uppercase character, x, is in the - * range [c1-c2], then we require that UPPERCASE(x) and LOWERCASE(x) must also - * be in the range. If not, then this range is ambiguous, and the function - * returns false. For example, [@-_] spans [a-z] but not [A-Z]. Beware that - * [a-z] will be labeled ambiguous because it does not include [A-Z]. - * - * @param c1 the lower end of the range - * @param c2 the upper end of the range - * @return true if [c1-c2] is not ambiguous for a caseless scanner. - */ -bool range_covers_case (int c1, int c2) -{ - int i, o; - - for (i = c1; i <= c2; i++) { - if (has_case (i)) { - o = reverse_case (i); - if (o < c1 || c2 < o) - return false; - } - } - return true; -} - -/** Reverse the case of a character, if possible. - * @return c if case-reversal does not apply. - */ -int reverse_case (int c) -{ - return isupper (c) ? tolower (c) : (islower (c) ? toupper (c) : c); -} - -/** Return true if c is uppercase or lowercase. */ -bool has_case (int c) -{ - return (isupper (c) || islower (c)) ? true : false; -} |