summaryrefslogtreecommitdiff
path: root/tool/src/org/antlr/v4/misc
diff options
context:
space:
mode:
authorAndrius Merkys <andrius.merkys@gmail.com>2018-11-23 04:52:08 -0500
committerAndrius Merkys <andrius.merkys@gmail.com>2018-11-23 04:52:08 -0500
commit7b19e9be5be41c69c451b63c526bee059881f9b1 (patch)
tree699bf0523df6868d15843981ea9914ac096ee270 /tool/src/org/antlr/v4/misc
parent1d0464db4ec5e5c20b2ae62bb3c4eceaa6840bde (diff)
New upstream version 4.7.1
Diffstat (limited to 'tool/src/org/antlr/v4/misc')
-rw-r--r--tool/src/org/antlr/v4/misc/CharSupport.java147
-rw-r--r--tool/src/org/antlr/v4/misc/EscapeSequenceParsing.java186
-rw-r--r--tool/src/org/antlr/v4/misc/FrequencySet.java2
-rw-r--r--tool/src/org/antlr/v4/misc/Graph.java2
-rw-r--r--tool/src/org/antlr/v4/misc/MutableInt.java2
-rw-r--r--tool/src/org/antlr/v4/misc/OrderedHashMap.java2
-rw-r--r--tool/src/org/antlr/v4/misc/Utils.java5
7 files changed, 296 insertions, 50 deletions
diff --git a/tool/src/org/antlr/v4/misc/CharSupport.java b/tool/src/org/antlr/v4/misc/CharSupport.java
index 50db0d9..47e0033 100644
--- a/tool/src/org/antlr/v4/misc/CharSupport.java
+++ b/tool/src/org/antlr/v4/misc/CharSupport.java
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2012-2016 The ANTLR Project. All rights reserved.
+ * Copyright (c) 2012-2017 The ANTLR Project. All rights reserved.
* Use of this file is governed by the BSD 3-clause license that
* can be found in the LICENSE.txt file in the project root.
*/
@@ -7,6 +7,10 @@
package org.antlr.v4.misc;
import org.antlr.v4.runtime.Lexer;
+import org.antlr.v4.runtime.misc.Interval;
+import org.antlr.v4.runtime.misc.IntervalSet;
+
+import java.util.Iterator;
/** */
public class CharSupport {
@@ -26,47 +30,48 @@ public class CharSupport {
ANTLRLiteralEscapedCharValue['b'] = '\b';
ANTLRLiteralEscapedCharValue['f'] = '\f';
ANTLRLiteralEscapedCharValue['\\'] = '\\';
- ANTLRLiteralEscapedCharValue['\''] = '\'';
- ANTLRLiteralEscapedCharValue['"'] = '"';
- ANTLRLiteralEscapedCharValue['-'] = '-';
- ANTLRLiteralEscapedCharValue[']'] = ']';
ANTLRLiteralCharValueEscape['\n'] = "\\n";
ANTLRLiteralCharValueEscape['\r'] = "\\r";
ANTLRLiteralCharValueEscape['\t'] = "\\t";
ANTLRLiteralCharValueEscape['\b'] = "\\b";
ANTLRLiteralCharValueEscape['\f'] = "\\f";
ANTLRLiteralCharValueEscape['\\'] = "\\\\";
- ANTLRLiteralCharValueEscape['\''] = "\\'";
}
/** Return a string representing the escaped char for code c. E.g., If c
- * has value 0x100, you will get "\u0100". ASCII gets the usual
- * char (non-hex) representation. Control characters are spit out
- * as unicode. While this is specially set up for returning Java strings,
- * it can be used by any language target that has the same syntax. :)
+ * has value 0x100, you will get "\\u0100". ASCII gets the usual
+ * char (non-hex) representation. Non-ASCII characters are spit out
+ * as \\uXXXX or \\u{XXXXXX} escapes.
*/
public static String getANTLRCharLiteralForChar(int c) {
- if ( c< Lexer.MIN_CHAR_VALUE ) {
- return "'<INVALID>'";
- }
- if ( c<ANTLRLiteralCharValueEscape.length && ANTLRLiteralCharValueEscape[c]!=null ) {
- return '\''+ANTLRLiteralCharValueEscape[c]+'\'';
+ String result;
+ if ( c < Lexer.MIN_CHAR_VALUE ) {
+ result = "<INVALID>";
}
- if ( Character.UnicodeBlock.of((char)c)==Character.UnicodeBlock.BASIC_LATIN &&
- !Character.isISOControl((char)c) ) {
- if ( c=='\\' ) {
- return "'\\\\'";
+ else {
+ String charValueEscape = c < ANTLRLiteralCharValueEscape.length ? ANTLRLiteralCharValueEscape[c] : null;
+ if (charValueEscape != null) {
+ result = charValueEscape;
+ }
+ else if (Character.UnicodeBlock.of((char) c) == Character.UnicodeBlock.BASIC_LATIN &&
+ !Character.isISOControl((char) c)) {
+ if (c == '\\') {
+ result = "\\\\";
+ }
+ else if (c == '\'') {
+ result = "\\'";
+ }
+ else {
+ result = Character.toString((char) c);
+ }
}
- if ( c=='\'') {
- return "'\\''";
+ else if (c <= 0xFFFF) {
+ result = String.format("\\u%04X", c);
+ } else {
+ result = String.format("\\u{%06X}", c);
}
- return '\''+Character.toString((char)c)+'\'';
}
- // turn on the bit above max "\uFFFF" value so that we pad with zeros
- // then only take last 4 digits
- String hex = Integer.toHexString(c|0x10000).toUpperCase().substring(1,5);
- String unicodeStr = "'\\u"+hex+"'";
- return unicodeStr;
+ return '\'' + result + '\'';
}
/** Given a literal like (the 3 char sequence with single quotes) 'a',
@@ -87,11 +92,26 @@ public class CharSupport {
if ( literal.charAt(i) == '\\' ) {
end = i+2;
if ( i+1 < n && literal.charAt(i+1) == 'u' ) {
- for (end = i + 2; end < i + 6; end++) {
- if ( end>n ) return null; // invalid escape sequence.
- char charAt = literal.charAt(end);
- if (!Character.isDigit(charAt) && !(charAt >= 'a' && charAt <= 'f') && !(charAt >= 'A' && charAt <= 'F')) {
- return null; // invalid escape sequence.
+ if ( i+2 < n && literal.charAt(i+2) == '{' ) { // extended escape sequence
+ end = i + 3;
+ while (true) {
+ if ( end + 1 > n ) return null; // invalid escape sequence.
+ char charAt = literal.charAt(end++);
+ if (charAt == '}') {
+ break;
+ }
+ if (!Character.isDigit(charAt) && !(charAt >= 'a' && charAt <= 'f') && !(charAt >= 'A' && charAt <= 'F')) {
+ return null; // invalid escape sequence.
+ }
+ }
+ }
+ else {
+ for (end = i + 2; end < i + 6; end++) {
+ if ( end>n ) return null; // invalid escape sequence.
+ char charAt = literal.charAt(end);
+ if (!Character.isDigit(charAt) && !(charAt >= 'a' && charAt <= 'f') && !(charAt >= 'A' && charAt <= 'F')) {
+ return null; // invalid escape sequence.
+ }
}
}
}
@@ -102,13 +122,13 @@ public class CharSupport {
if ( c==-1 ) {
return null; // invalid escape sequence.
}
- else buf.append((char)c);
+ else buf.appendCodePoint(c);
i = end;
}
return buf.toString();
}
- /** Given char x or \t or \u1234 return the char value;
+ /** Given char x or \\t or \\u1234 return the char value;
* Unnecessary escapes like '\{' yield -1.
*/
public static int getCharValueFromCharInGrammarLiteral(String cstr) {
@@ -119,28 +139,67 @@ public class CharSupport {
case 2:
if ( cstr.charAt(0)!='\\' ) return -1;
// '\x' (antlr lexer will catch invalid char)
- if ( Character.isDigit(cstr.charAt(1)) ) return -1;
- int escChar = cstr.charAt(1);
+ char escChar = cstr.charAt(1);
+ if (escChar == '\'') return escChar; // escape quote only in string literals.
int charVal = ANTLRLiteralEscapedCharValue[escChar];
- if ( charVal==0 ) return -1;
+ if (charVal == 0) return -1;
return charVal;
case 6:
- // '\u1234'
+ // '\\u1234' or '\\u{12}'
if ( !cstr.startsWith("\\u") ) return -1;
- String unicodeChars = cstr.substring(2, cstr.length());
- int result = -1;
- try {
- result = Integer.parseInt(unicodeChars, 16);
+ int startOff;
+ int endOff;
+ if ( cstr.charAt(2) == '{' ) {
+ startOff = 3;
+ endOff = cstr.indexOf('}');
}
- catch (NumberFormatException e) {
+ else {
+ startOff = 2;
+ endOff = cstr.length();
}
- return result;
+ return parseHexValue(cstr, startOff, endOff);
default:
+ if ( cstr.startsWith("\\u{") ) {
+ return parseHexValue(cstr, 3, cstr.indexOf('}'));
+ }
return -1;
}
}
+ public static int parseHexValue(String cstr, int startOff, int endOff) {
+ if (startOff < 0 || endOff < 0) {
+ return -1;
+ }
+ String unicodeChars = cstr.substring(startOff, endOff);
+ int result = -1;
+ try {
+ result = Integer.parseInt(unicodeChars, 16);
+ }
+ catch (NumberFormatException e) {
+ }
+ return result;
+ }
+
public static String capitalize(String s) {
return Character.toUpperCase(s.charAt(0)) + s.substring(1);
}
+
+ public static String getIntervalSetEscapedString(IntervalSet intervalSet) {
+ StringBuilder buf = new StringBuilder();
+ Iterator<Interval> iter = intervalSet.getIntervals().iterator();
+ while (iter.hasNext()) {
+ Interval interval = iter.next();
+ buf.append(getRangeEscapedString(interval.a, interval.b));
+ if (iter.hasNext()) {
+ buf.append(" | ");
+ }
+ }
+ return buf.toString();
+ }
+
+ public static String getRangeEscapedString(int codePointStart, int codePointEnd) {
+ return codePointStart != codePointEnd
+ ? getANTLRCharLiteralForChar(codePointStart) + ".." + getANTLRCharLiteralForChar(codePointEnd)
+ : getANTLRCharLiteralForChar(codePointStart);
+ }
}
diff --git a/tool/src/org/antlr/v4/misc/EscapeSequenceParsing.java b/tool/src/org/antlr/v4/misc/EscapeSequenceParsing.java
new file mode 100644
index 0000000..d34988d
--- /dev/null
+++ b/tool/src/org/antlr/v4/misc/EscapeSequenceParsing.java
@@ -0,0 +1,186 @@
+/*
+ * Copyright (c) 2012-2017 The ANTLR Project. All rights reserved.
+ * Use of this file is governed by the BSD 3-clause license that
+ * can be found in the LICENSE.txt file in the project root.
+ */
+
+package org.antlr.v4.misc;
+
+import org.antlr.v4.runtime.misc.IntervalSet;
+import org.antlr.v4.unicode.UnicodeData;
+
+import java.util.Objects;
+
+/**
+ * Utility class to parse escapes like:
+ * \\n
+ * \\uABCD
+ * \\u{10ABCD}
+ * \\p{Foo}
+ * \\P{Bar}
+ * \\p{Baz=Blech}
+ * \\P{Baz=Blech}
+ */
+public abstract class EscapeSequenceParsing {
+ public static class Result {
+ public enum Type {
+ INVALID,
+ CODE_POINT,
+ PROPERTY
+ };
+
+ public final Type type;
+ public final int codePoint;
+ public final IntervalSet propertyIntervalSet;
+ public final int startOffset;
+ public final int parseLength;
+
+ public Result(Type type, int codePoint, IntervalSet propertyIntervalSet, int startOffset, int parseLength) {
+ this.type = type;
+ this.codePoint = codePoint;
+ this.propertyIntervalSet = propertyIntervalSet;
+ this.startOffset = startOffset;
+ this.parseLength = parseLength;
+ }
+
+ @Override
+ public String toString() {
+ return String.format(
+ "%s type=%s codePoint=%d propertyIntervalSet=%s parseLength=%d",
+ super.toString(),
+ type,
+ codePoint,
+ propertyIntervalSet,
+ parseLength);
+ }
+
+ @Override
+ public boolean equals(Object other) {
+ if (!(other instanceof Result)) {
+ return false;
+ }
+ Result that = (Result) other;
+ if (this == that) {
+ return true;
+ }
+ return Objects.equals(this.type, that.type) &&
+ Objects.equals(this.codePoint, that.codePoint) &&
+ Objects.equals(this.propertyIntervalSet, that.propertyIntervalSet) &&
+ Objects.equals(this.parseLength, that.parseLength);
+ }
+
+ @Override
+ public int hashCode() {
+ return Objects.hash(type, codePoint, propertyIntervalSet, parseLength);
+ }
+ }
+
+ /**
+ * Parses a single escape sequence starting at {@code startOff}.
+ *
+ * Returns a type of INVALID if no valid escape sequence was found, a Result otherwise.
+ */
+ public static Result parseEscape(String s, int startOff) {
+ int offset = startOff;
+ if (offset + 2 > s.length() || s.codePointAt(offset) != '\\') {
+ return invalid(startOff, s.length()-1);
+ }
+ // Move past backslash
+ offset++;
+ int escaped = s.codePointAt(offset);
+ // Move past escaped code point
+ offset += Character.charCount(escaped);
+ if (escaped == 'u') {
+ // \\u{1} is the shortest we support
+ if (offset + 3 > s.length()) {
+ return invalid(startOff, s.length()-1);
+ }
+ int hexStartOffset;
+ int hexEndOffset; // appears to be exclusive
+ if (s.codePointAt(offset) == '{') {
+ hexStartOffset = offset + 1;
+ hexEndOffset = s.indexOf('}', hexStartOffset);
+ if (hexEndOffset == -1) {
+ return invalid(startOff, s.length()-1);
+ }
+ offset = hexEndOffset + 1;
+ }
+ else {
+ if (offset + 4 > s.length()) {
+ return invalid(startOff, s.length()-1);
+ }
+ hexStartOffset = offset;
+ hexEndOffset = offset + 4;
+ offset = hexEndOffset;
+ }
+ int codePointValue = CharSupport.parseHexValue(s, hexStartOffset, hexEndOffset);
+ if (codePointValue == -1 || codePointValue > Character.MAX_CODE_POINT) {
+ return invalid(startOff, startOff+6-1);
+ }
+ return new Result(
+ Result.Type.CODE_POINT,
+ codePointValue,
+ IntervalSet.EMPTY_SET,
+ startOff,
+ offset - startOff);
+ }
+ else if (escaped == 'p' || escaped == 'P') {
+ // \p{L} is the shortest we support
+ if (offset + 3 > s.length()) {
+ return invalid(startOff, s.length()-1);
+ }
+ if (s.codePointAt(offset) != '{') {
+ return invalid(startOff, offset);
+ }
+ int openBraceOffset = offset;
+ int closeBraceOffset = s.indexOf('}', openBraceOffset);
+ if (closeBraceOffset == -1) {
+ return invalid(startOff, s.length()-1);
+ }
+ String propertyName = s.substring(openBraceOffset + 1, closeBraceOffset);
+ IntervalSet propertyIntervalSet = UnicodeData.getPropertyCodePoints(propertyName);
+ if (propertyIntervalSet == null) {
+ return invalid(startOff, closeBraceOffset);
+ }
+ offset = closeBraceOffset + 1;
+ if (escaped == 'P') {
+ propertyIntervalSet = propertyIntervalSet.complement(IntervalSet.COMPLETE_CHAR_SET);
+ }
+ return new Result(
+ Result.Type.PROPERTY,
+ -1,
+ propertyIntervalSet,
+ startOff,
+ offset - startOff);
+ }
+ else if (escaped < CharSupport.ANTLRLiteralEscapedCharValue.length) {
+ int codePoint = CharSupport.ANTLRLiteralEscapedCharValue[escaped];
+ if (codePoint == 0) {
+ if (escaped != ']' && escaped != '-') { // escape ']' and '-' only in char sets.
+ return invalid(startOff, startOff+1);
+ }
+ else {
+ codePoint = escaped;
+ }
+ }
+ return new Result(
+ Result.Type.CODE_POINT,
+ codePoint,
+ IntervalSet.EMPTY_SET,
+ startOff,
+ offset - startOff);
+ }
+ else {
+ return invalid(startOff,s.length()-1);
+ }
+ }
+
+ private static Result invalid(int start, int stop) { // start..stop is inclusive
+ return new Result(
+ Result.Type.INVALID,
+ 0,
+ IntervalSet.EMPTY_SET,
+ start,
+ stop - start + 1);
+ }
+}
diff --git a/tool/src/org/antlr/v4/misc/FrequencySet.java b/tool/src/org/antlr/v4/misc/FrequencySet.java
index d3d4c73..343f12b 100644
--- a/tool/src/org/antlr/v4/misc/FrequencySet.java
+++ b/tool/src/org/antlr/v4/misc/FrequencySet.java
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2012-2016 The ANTLR Project. All rights reserved.
+ * Copyright (c) 2012-2017 The ANTLR Project. All rights reserved.
* Use of this file is governed by the BSD 3-clause license that
* can be found in the LICENSE.txt file in the project root.
*/
diff --git a/tool/src/org/antlr/v4/misc/Graph.java b/tool/src/org/antlr/v4/misc/Graph.java
index ec4c147..ea10cc0 100644
--- a/tool/src/org/antlr/v4/misc/Graph.java
+++ b/tool/src/org/antlr/v4/misc/Graph.java
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2012-2016 The ANTLR Project. All rights reserved.
+ * Copyright (c) 2012-2017 The ANTLR Project. All rights reserved.
* Use of this file is governed by the BSD 3-clause license that
* can be found in the LICENSE.txt file in the project root.
*/
diff --git a/tool/src/org/antlr/v4/misc/MutableInt.java b/tool/src/org/antlr/v4/misc/MutableInt.java
index 607b8f6..007f4c2 100644
--- a/tool/src/org/antlr/v4/misc/MutableInt.java
+++ b/tool/src/org/antlr/v4/misc/MutableInt.java
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2012-2016 The ANTLR Project. All rights reserved.
+ * Copyright (c) 2012-2017 The ANTLR Project. All rights reserved.
* Use of this file is governed by the BSD 3-clause license that
* can be found in the LICENSE.txt file in the project root.
*/
diff --git a/tool/src/org/antlr/v4/misc/OrderedHashMap.java b/tool/src/org/antlr/v4/misc/OrderedHashMap.java
index 67f9deb..7a988f9 100644
--- a/tool/src/org/antlr/v4/misc/OrderedHashMap.java
+++ b/tool/src/org/antlr/v4/misc/OrderedHashMap.java
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2012-2016 The ANTLR Project. All rights reserved.
+ * Copyright (c) 2012-2017 The ANTLR Project. All rights reserved.
* Use of this file is governed by the BSD 3-clause license that
* can be found in the LICENSE.txt file in the project root.
*/
diff --git a/tool/src/org/antlr/v4/misc/Utils.java b/tool/src/org/antlr/v4/misc/Utils.java
index d9a2473..abdedb7 100644
--- a/tool/src/org/antlr/v4/misc/Utils.java
+++ b/tool/src/org/antlr/v4/misc/Utils.java
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2012-2016 The ANTLR Project. All rights reserved.
+ * Copyright (c) 2012-2017 The ANTLR Project. All rights reserved.
* Use of this file is governed by the BSD 3-clause license that
* can be found in the LICENSE.txt file in the project root.
*/
@@ -122,7 +122,8 @@ public class Utils {
public static void setSize(List<?> list, int size) {
if (size < list.size()) {
list.subList(size, list.size()).clear();
- } else {
+ }
+ else {
while (size > list.size()) {
list.add(null);
}