summaryrefslogtreecommitdiff
path: root/runtime/Java/src/org/antlr/v4/runtime/atn/ATNDeserializer.java
diff options
context:
space:
mode:
Diffstat (limited to 'runtime/Java/src/org/antlr/v4/runtime/atn/ATNDeserializer.java')
-rw-r--r--runtime/Java/src/org/antlr/v4/runtime/atn/ATNDeserializer.java117
1 files changed, 99 insertions, 18 deletions
diff --git a/runtime/Java/src/org/antlr/v4/runtime/atn/ATNDeserializer.java b/runtime/Java/src/org/antlr/v4/runtime/atn/ATNDeserializer.java
index 9821b8b..0ebcdeb 100644
--- a/runtime/Java/src/org/antlr/v4/runtime/atn/ATNDeserializer.java
+++ b/runtime/Java/src/org/antlr/v4/runtime/atn/ATNDeserializer.java
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2012-2016 The ANTLR Project. All rights reserved.
+ * Copyright (c) 2012-2017 The ANTLR Project. All rights reserved.
* Use of this file is governed by the BSD 3-clause license that
* can be found in the LICENSE.txt file in the project root.
*/
@@ -45,6 +45,12 @@ public class ATNDeserializer {
*/
private static final UUID ADDED_LEXER_ACTIONS;
/**
+ * This UUID indicates the serialized ATN contains two sets of
+ * IntervalSets, where the second set's values are encoded as
+ * 32-bit integers to support the full Unicode SMP range up to U+10FFFF.
+ */
+ private static final UUID ADDED_UNICODE_SMP;
+ /**
* This list contains all of the currently supported UUIDs, ordered by when
* the feature first appeared in this branch.
*/
@@ -61,15 +67,59 @@ public class ATNDeserializer {
BASE_SERIALIZED_UUID = UUID.fromString("33761B2D-78BB-4A43-8B0B-4F5BEE8AACF3");
ADDED_PRECEDENCE_TRANSITIONS = UUID.fromString("1DA0C57D-6C06-438A-9B27-10BCB3CE0F61");
ADDED_LEXER_ACTIONS = UUID.fromString("AADB8D7E-AEEF-4415-AD2B-8204D6CF042E");
+ ADDED_UNICODE_SMP = UUID.fromString("59627784-3BE5-417A-B9EB-8131A7286089");
SUPPORTED_UUIDS = new ArrayList<UUID>();
SUPPORTED_UUIDS.add(BASE_SERIALIZED_UUID);
SUPPORTED_UUIDS.add(ADDED_PRECEDENCE_TRANSITIONS);
SUPPORTED_UUIDS.add(ADDED_LEXER_ACTIONS);
+ SUPPORTED_UUIDS.add(ADDED_UNICODE_SMP);
+
+ SERIALIZED_UUID = ADDED_UNICODE_SMP;
+ }
+
+ interface UnicodeDeserializer {
+ // Wrapper for readInt() or readInt32()
+ int readUnicode(char[] data, int p);
+
+ // Work around Java not allowing mutation of captured variables
+ // by returning amount by which to increment p after each read
+ int size();
+ }
- SERIALIZED_UUID = ADDED_LEXER_ACTIONS;
+ enum UnicodeDeserializingMode {
+ UNICODE_BMP,
+ UNICODE_SMP
}
+ static UnicodeDeserializer getUnicodeDeserializer(UnicodeDeserializingMode mode) {
+ if (mode == UnicodeDeserializingMode.UNICODE_BMP) {
+ return new UnicodeDeserializer() {
+ @Override
+ public int readUnicode(char[] data, int p) {
+ return toInt(data[p]);
+ }
+
+ @Override
+ public int size() {
+ return 1;
+ }
+ };
+ }
+ else {
+ return new UnicodeDeserializer() {
+ @Override
+ public int readUnicode(char[] data, int p) {
+ return toInt32(data, p);
+ }
+
+ @Override
+ public int size() {
+ return 2;
+ }
+ };
+ }
+ }
private final ATNDeserializationOptions deserializationOptions;
@@ -98,7 +148,7 @@ public class ATNDeserializer {
* serialized ATN at or after the feature identified by {@code feature} was
* introduced; otherwise, {@code false}.
*/
- protected boolean isFeatureSupported(UUID feature, UUID actualUuid) {
+ static protected boolean isFeatureSupported(UUID feature, UUID actualUuid) {
int featureIndex = SUPPORTED_UUIDS.indexOf(feature);
if (featureIndex < 0) {
return false;
@@ -110,7 +160,22 @@ public class ATNDeserializer {
@SuppressWarnings("deprecation")
public ATN deserialize(char[] data) {
data = data.clone();
- // don't adjust the first value since that's the version number
+
+ // Each char value in data is shifted by +2 at the entry to this method.
+ // This is an encoding optimization targeting the serialized values 0
+ // and -1 (serialized to 0xFFFF), each of which are very common in the
+ // serialized form of the ATN. In the modified UTF-8 that Java uses for
+ // compiled string literals, these two character values have multi-byte
+ // forms. By shifting each value by +2, they become characters 2 and 1
+ // prior to writing the string, each of which have single-byte
+ // representations. Since the shift occurs in the tool during ATN
+ // serialization, each target is responsible for adjusting the values
+ // during deserialization.
+ //
+ // As a special case, note that the first element of data is not
+ // adjusted because it contains the major version number of the
+ // serialized ATN, which was fixed at 3 at the time the value shifting
+ // was implemented.
for (int i = 1; i < data.length; i++) {
data[i] = (char)(data[i] - 2);
}
@@ -243,22 +308,14 @@ public class ATNDeserializer {
// SETS
//
List<IntervalSet> sets = new ArrayList<IntervalSet>();
- int nsets = toInt(data[p++]);
- for (int i=0; i<nsets; i++) {
- int nintervals = toInt(data[p]);
- p++;
- IntervalSet set = new IntervalSet();
- sets.add(set);
- boolean containsEof = toInt(data[p++]) != 0;
- if (containsEof) {
- set.add(-1);
- }
+ // First, read all sets with 16-bit Unicode code points <= U+FFFF.
+ p = deserializeSets(data, p, sets, getUnicodeDeserializer(UnicodeDeserializingMode.UNICODE_BMP));
- for (int j=0; j<nintervals; j++) {
- set.add(toInt(data[p]), toInt(data[p + 1]));
- p += 2;
- }
+ // Next, if the ATN was serialized with the Unicode SMP feature,
+ // deserialize sets with 32-bit arguments <= U+10FFFF.
+ if (isFeatureSupported(ADDED_UNICODE_SMP, uuid)) {
+ p = deserializeSets(data, p, sets, getUnicodeDeserializer(UnicodeDeserializingMode.UNICODE_SMP));
}
//
@@ -495,6 +552,30 @@ public class ATNDeserializer {
return atn;
}
+ private int deserializeSets(char[] data, int p, List<IntervalSet> sets, UnicodeDeserializer unicodeDeserializer) {
+ int nsets = toInt(data[p++]);
+ for (int i=0; i<nsets; i++) {
+ int nintervals = toInt(data[p]);
+ p++;
+ IntervalSet set = new IntervalSet();
+ sets.add(set);
+
+ boolean containsEof = toInt(data[p++]) != 0;
+ if (containsEof) {
+ set.add(-1);
+ }
+
+ for (int j=0; j<nintervals; j++) {
+ int a = unicodeDeserializer.readUnicode(data, p);
+ p += unicodeDeserializer.size();
+ int b = unicodeDeserializer.readUnicode(data, p);
+ p += unicodeDeserializer.size();
+ set.add(a, b);
+ }
+ }
+ return p;
+ }
+
/**
* Analyze the {@link StarLoopEntryState} states in the specified ATN to set
* the {@link StarLoopEntryState#isPrecedenceDecision} field to the