summaryrefslogtreecommitdiff
path: root/tool/src/org/antlr/v4/misc/CharSupport.java
diff options
context:
space:
mode:
Diffstat (limited to 'tool/src/org/antlr/v4/misc/CharSupport.java')
-rw-r--r--tool/src/org/antlr/v4/misc/CharSupport.java153
1 files changed, 153 insertions, 0 deletions
diff --git a/tool/src/org/antlr/v4/misc/CharSupport.java b/tool/src/org/antlr/v4/misc/CharSupport.java
new file mode 100644
index 0000000..22e9648
--- /dev/null
+++ b/tool/src/org/antlr/v4/misc/CharSupport.java
@@ -0,0 +1,153 @@
+/*
+ * [The "BSD license"]
+ * Copyright (c) 2012 Terence Parr
+ * Copyright (c) 2012 Sam Harwell
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. The name of the author may not be used to endorse or promote products
+ * derived from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+ * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+package org.antlr.v4.misc;
+
+import org.antlr.v4.runtime.Lexer;
+
+/** */
+public class CharSupport {
+ /** When converting ANTLR char and string literals, here is the
+ * value set of escape chars.
+ */
+ public static int ANTLRLiteralEscapedCharValue[] = new int[255];
+
+ /** Given a char, we need to be able to show as an ANTLR literal.
+ */
+ public static String ANTLRLiteralCharValueEscape[] = new String[255];
+
+ static {
+ ANTLRLiteralEscapedCharValue['n'] = '\n';
+ ANTLRLiteralEscapedCharValue['r'] = '\r';
+ ANTLRLiteralEscapedCharValue['t'] = '\t';
+ ANTLRLiteralEscapedCharValue['b'] = '\b';
+ ANTLRLiteralEscapedCharValue['f'] = '\f';
+ ANTLRLiteralEscapedCharValue['\\'] = '\\';
+ ANTLRLiteralEscapedCharValue['\''] = '\'';
+ ANTLRLiteralEscapedCharValue['"'] = '"';
+ ANTLRLiteralCharValueEscape['\n'] = "\\n";
+ ANTLRLiteralCharValueEscape['\r'] = "\\r";
+ ANTLRLiteralCharValueEscape['\t'] = "\\t";
+ ANTLRLiteralCharValueEscape['\b'] = "\\b";
+ ANTLRLiteralCharValueEscape['\f'] = "\\f";
+ ANTLRLiteralCharValueEscape['\\'] = "\\\\";
+ ANTLRLiteralCharValueEscape['\''] = "\\'";
+ }
+
+ /** Return a string representing the escaped char for code c. E.g., If c
+ * has value 0x100, you will get "\u0100". ASCII gets the usual
+ * char (non-hex) representation. Control characters are spit out
+ * as unicode. While this is specially set up for returning Java strings,
+ * it can be used by any language target that has the same syntax. :)
+ */
+ public static String getANTLRCharLiteralForChar(int c) {
+ if ( c< Lexer.MIN_CHAR_VALUE ) {
+ return "'<INVALID>'";
+ }
+ if ( c<ANTLRLiteralCharValueEscape.length && ANTLRLiteralCharValueEscape[c]!=null ) {
+ return '\''+ANTLRLiteralCharValueEscape[c]+'\'';
+ }
+ if ( Character.UnicodeBlock.of((char)c)==Character.UnicodeBlock.BASIC_LATIN &&
+ !Character.isISOControl((char)c) ) {
+ if ( c=='\\' ) {
+ return "'\\\\'";
+ }
+ if ( c=='\'') {
+ return "'\\''";
+ }
+ return '\''+Character.toString((char)c)+'\'';
+ }
+ // turn on the bit above max "\uFFFF" value so that we pad with zeros
+ // then only take last 4 digits
+ String hex = Integer.toHexString(c|0x10000).toUpperCase().substring(1,5);
+ String unicodeStr = "'\\u"+hex+"'";
+ return unicodeStr;
+ }
+
+ /** Given a literal like (the 3 char sequence with single quotes) 'a',
+ * return the int value of 'a'. Convert escape sequences here also.
+ * Return -1 if not single char.
+ */
+ public static int getCharValueFromGrammarCharLiteral(String literal) {
+ if ( literal==null || literal.length()<3 ) return -1;
+ return getCharValueFromCharInGrammarLiteral(literal.substring(1,literal.length()-1));
+ }
+
+ /** Given char x or \t or \u1234 return the char value;
+ * Unnecessary escapes like '\{' yield -1.
+ */
+ public static int getCharValueFromCharInGrammarLiteral(String cstr) {
+ switch ( cstr.length() ) {
+ case 1 :
+ // 'x'
+ return cstr.charAt(0); // no escape char
+ case 2 :
+ if ( cstr.charAt(0)!='\\' ) return -1;
+ // '\x' (antlr lexer will catch invalid char)
+ if ( Character.isDigit(cstr.charAt(1)) ) return -1;
+ int escChar = cstr.charAt(1);
+ int charVal = ANTLRLiteralEscapedCharValue[escChar];
+ if ( charVal==0 ) return -1;
+ return charVal;
+ case 6 :
+ // '\u1234'
+ if ( !cstr.startsWith("\\u") ) return -1;
+ String unicodeChars = cstr.substring(2, cstr.length());
+ return Integer.parseInt(unicodeChars, 16);
+ default :
+ return -1;
+ }
+ }
+
+ public static String getStringFromGrammarStringLiteral(String literal) {
+ StringBuilder buf = new StringBuilder();
+ int i = 1; // skip first quote
+ int n = literal.length()-1; // skip last quote
+ while ( i < n ) { // scan all but last quote
+ int end = i+1;
+ if ( literal.charAt(i) == '\\' ) {
+ end = i+2;
+ if ( (i+1)>=n ) break; // ignore spurious \ on end
+ if ( literal.charAt(i+1) == 'u' ) end = i+6;
+ }
+ if ( end>n ) break;
+ String esc = literal.substring(i, end);
+ int c = getCharValueFromCharInGrammarLiteral(esc);
+ if ( c==-1 ) { buf.append(esc); }
+ else buf.append((char)c);
+ i = end;
+ }
+ return buf.toString();
+ }
+
+ public static String capitalize(String s) {
+ return Character.toUpperCase(s.charAt(0)) + s.substring(1);
+ }
+}