summaryrefslogtreecommitdiff
path: root/parse.y
diff options
context:
space:
mode:
authorJohn Millaway <john43@users.sourceforge.net>2002-12-16 23:33:08 +0000
committerJohn Millaway <john43@users.sourceforge.net>2002-12-16 23:33:08 +0000
commit604e6483d56d1da0a19491c2a5a35f7bf048afc9 (patch)
treea3d7a1979c6f47d23c0bafa7a62ff7678d8a934f /parse.y
parent956b9210a1e161b760ce6c5cebf5e298866188c5 (diff)
Fixed bug submitted by Bruce Lilly <blilly@erols.com> where character
ranges would yield unexpected behavior in a caseless scanner. Also, flex now emits a warning if the range looks like trouble.
Diffstat (limited to 'parse.y')
-rw-r--r--parse.y47
1 files changed, 40 insertions, 7 deletions
diff --git a/parse.y b/parse.y
index 5e6167c..4cde223 100644
--- a/parse.y
+++ b/parse.y
@@ -109,6 +109,13 @@ int scon_stk_ptr;
static int madeany = false; /* whether we've made the '.' character class */
int previous_continued_action; /* whether the previous rule's action was '|' */
+#define format_warn3(fmt, a1, a2) \
+ do{ \
+ char fw3_msg[MAXLINE];\
+ snprintf( fw3_msg, MAXLINE,(fmt), (a1), (a2) );\
+ warn( fw3_msg );\
+ }while(0)
+
/* Expand a POSIX character class expression. */
#define CCL_EXPR(func) \
do{ \
@@ -787,13 +794,39 @@ fullccl : '[' ccl ']'
ccl : ccl CHAR '-' CHAR
{
- if ( caseins )
- {
- if ( $2 >= 'A' && $2 <= 'Z' )
- $2 = clower( $2 );
- if ( $4 >= 'A' && $4 <= 'Z' )
- $4 = clower( $4 );
- }
+
+ if (caseins)
+ {
+ /* Squish the character range to lowercase only if BOTH
+ * ends of the range are uppercase.
+ */
+ if (isupper ($2) && isupper ($4))
+ {
+ $2 = tolower ($2);
+ $4 = tolower ($4);
+ }
+
+ /* If one end of the range has case and the other
+ * does not, or the cases are different, then we're not
+ * sure what range the user is trying to express.
+ * Examples: [@-z] or [S-t]
+ */
+ else if (has_case ($2) != has_case ($4)
+ || (has_case ($2) && (b_islower ($2) != b_islower ($4))))
+ format_warn3 (
+ _("the character range [%c-%c] is ambiguous in a case-insensitive scanner"),
+ $2, $4);
+
+ /* If the range spans uppercase characters but not
+ * lowercase (or vice-versa), then should we automatically
+ * include lowercase characters in the range?
+ * Example: [@-_] spans [a-z] but not [A-Z]
+ */
+ else if (!has_case ($2) && !has_case ($4) && !range_covers_case ($2, $4))
+ format_warn3 (
+ _("the character range [%c-%c] is ambiguous in a case-insensitive scanner"),
+ $2, $4);
+ }
if ( $2 > $4 )
synerr( _("negative range in character class") );