diff options
Diffstat (limited to 'tool-testsuite/test/org')
49 files changed, 1682 insertions, 105 deletions
diff --git a/tool-testsuite/test/org/antlr/v4/test/tool/BaseJavaToolTest.java b/tool-testsuite/test/org/antlr/v4/test/tool/BaseJavaToolTest.java index d13af2d..6c2dd46 100644 --- a/tool-testsuite/test/org/antlr/v4/test/tool/BaseJavaToolTest.java +++ b/tool-testsuite/test/org/antlr/v4/test/tool/BaseJavaToolTest.java @@ -1,5 +1,5 @@ /* - * Copyright (c) 2012-2016 The ANTLR Project. All rights reserved. + * Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. * Use of this file is governed by the BSD 3-clause license that * can be found in the LICENSE.txt file in the project root. */ diff --git a/tool-testsuite/test/org/antlr/v4/test/tool/InterpreterTreeTextProvider.java b/tool-testsuite/test/org/antlr/v4/test/tool/InterpreterTreeTextProvider.java index d973939..adaf598 100644 --- a/tool-testsuite/test/org/antlr/v4/test/tool/InterpreterTreeTextProvider.java +++ b/tool-testsuite/test/org/antlr/v4/test/tool/InterpreterTreeTextProvider.java @@ -1,5 +1,5 @@ /* - * Copyright (c) 2012-2016 The ANTLR Project. All rights reserved. + * Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. * Use of this file is governed by the BSD 3-clause license that * can be found in the LICENSE.txt file in the project root. */ diff --git a/tool-testsuite/test/org/antlr/v4/test/tool/Java.g4 b/tool-testsuite/test/org/antlr/v4/test/tool/Java.g4 index e3e39f6..7db38ef 100644 --- a/tool-testsuite/test/org/antlr/v4/test/tool/Java.g4 +++ b/tool-testsuite/test/org/antlr/v4/test/tool/Java.g4 @@ -1140,7 +1140,7 @@ SignedInteger fragment Sign - : [+-] + : [+\-] ; fragment diff --git a/tool-testsuite/test/org/antlr/v4/test/tool/JavaUnicodeInputStream.java b/tool-testsuite/test/org/antlr/v4/test/tool/JavaUnicodeInputStream.java index cdbf9c3..0496992 100644 --- a/tool-testsuite/test/org/antlr/v4/test/tool/JavaUnicodeInputStream.java +++ b/tool-testsuite/test/org/antlr/v4/test/tool/JavaUnicodeInputStream.java @@ -1,5 +1,5 @@ /* - * Copyright (c) 2012-2016 The ANTLR Project. All rights reserved. + * Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. * Use of this file is governed by the BSD 3-clause license that * can be found in the LICENSE.txt file in the project root. */ diff --git a/tool-testsuite/test/org/antlr/v4/test/tool/ParserInterpreterForTesting.java b/tool-testsuite/test/org/antlr/v4/test/tool/ParserInterpreterForTesting.java index 2f5ac4d..6fbee43 100644 --- a/tool-testsuite/test/org/antlr/v4/test/tool/ParserInterpreterForTesting.java +++ b/tool-testsuite/test/org/antlr/v4/test/tool/ParserInterpreterForTesting.java @@ -1,5 +1,5 @@ /* - * Copyright (c) 2012-2016 The ANTLR Project. All rights reserved. + * Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. * Use of this file is governed by the BSD 3-clause license that * can be found in the LICENSE.txt file in the project root. */ diff --git a/tool-testsuite/test/org/antlr/v4/test/tool/TestASTStructure.java b/tool-testsuite/test/org/antlr/v4/test/tool/TestASTStructure.java index 7211efd..c1d6238 100644 --- a/tool-testsuite/test/org/antlr/v4/test/tool/TestASTStructure.java +++ b/tool-testsuite/test/org/antlr/v4/test/tool/TestASTStructure.java @@ -1,5 +1,5 @@ /* - * Copyright (c) 2012-2016 The ANTLR Project. All rights reserved. + * Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. * Use of this file is governed by the BSD 3-clause license that * can be found in the LICENSE.txt file in the project root. */ diff --git a/tool-testsuite/test/org/antlr/v4/test/tool/TestATNConstruction.java b/tool-testsuite/test/org/antlr/v4/test/tool/TestATNConstruction.java index 5764175..0452b2b 100644 --- a/tool-testsuite/test/org/antlr/v4/test/tool/TestATNConstruction.java +++ b/tool-testsuite/test/org/antlr/v4/test/tool/TestATNConstruction.java @@ -1,5 +1,5 @@ /* - * Copyright (c) 2012-2016 The ANTLR Project. All rights reserved. + * Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. * Use of this file is governed by the BSD 3-clause license that * can be found in the LICENSE.txt file in the project root. */ @@ -115,6 +115,129 @@ public class TestATNConstruction extends BaseJavaToolTest { "s4->RuleStop_A_2\n"; checkTokensRule(g, null, expecting); } + @Test public void testCharSet() throws Exception { + LexerGrammar g = new LexerGrammar( + "lexer grammar P;\n"+ + "A : [abc] ;" + ); + String expecting = + "s0->RuleStart_A_1\n" + + "RuleStart_A_1->s3\n" + + "s3-{97..99}->s4\n" + + "s4->RuleStop_A_2\n"; + checkTokensRule(g, null, expecting); + } + @Test public void testCharSetRange() throws Exception { + LexerGrammar g = new LexerGrammar( + "lexer grammar P;\n"+ + "A : [a-c] ;" + ); + String expecting = + "s0->RuleStart_A_1\n" + + "RuleStart_A_1->s3\n" + + "s3-{97..99}->s4\n" + + "s4->RuleStop_A_2\n"; + checkTokensRule(g, null, expecting); + } + @Test public void testCharSetUnicodeBMPEscape() throws Exception { + LexerGrammar g = new LexerGrammar( + "lexer grammar P;\n"+ + "A : [\\uABCD] ;" + ); + String expecting = + "s0->RuleStart_A_1\n" + + "RuleStart_A_1->s3\n" + + "s3-43981->s4\n" + + "s4->RuleStop_A_2\n"; + checkTokensRule(g, null, expecting); + } + @Test public void testCharSetUnicodeBMPEscapeRange() throws Exception { + LexerGrammar g = new LexerGrammar( + "lexer grammar P;\n"+ + "A : [a-c\\uABCD-\\uABFF] ;" + ); + String expecting = + "s0->RuleStart_A_1\n" + + "RuleStart_A_1->s3\n" + + "s3-{97..99, 43981..44031}->s4\n" + + "s4->RuleStop_A_2\n"; + checkTokensRule(g, null, expecting); + } + @Test public void testCharSetUnicodeSMPEscape() throws Exception { + LexerGrammar g = new LexerGrammar( + "lexer grammar P;\n"+ + "A : [\\u{10ABCD}] ;" + ); + String expecting = + "s0->RuleStart_A_1\n" + + "RuleStart_A_1->s3\n" + + "s3-1092557->s4\n" + + "s4->RuleStop_A_2\n"; + checkTokensRule(g, null, expecting); + } + @Test public void testCharSetUnicodeSMPEscapeRange() throws Exception { + LexerGrammar g = new LexerGrammar( + "lexer grammar P;\n"+ + "A : [a-c\\u{10ABCD}-\\u{10ABFF}] ;" + ); + String expecting = + "s0->RuleStart_A_1\n" + + "RuleStart_A_1->s3\n" + + "s3-{97..99, 1092557..1092607}->s4\n" + + "s4->RuleStop_A_2\n"; + checkTokensRule(g, null, expecting); + } + @Test public void testCharSetUnicodePropertyEscape() throws Exception { + // The Gothic script is long dead and unlikely to change (which would + // cause this test to fail) + LexerGrammar g = new LexerGrammar( + "lexer grammar P;\n"+ + "A : [\\p{Gothic}] ;" + ); + String expecting = + "s0->RuleStart_A_1\n" + + "RuleStart_A_1->s3\n" + + "s3-{66352..66378}->s4\n" + + "s4->RuleStop_A_2\n"; + checkTokensRule(g, null, expecting); + } + @Test public void testCharSetUnicodePropertyInvertEscape() throws Exception { + LexerGrammar g = new LexerGrammar( + "lexer grammar P;\n"+ + "A : [\\P{Gothic}] ;" + ); + String expecting = + "s0->RuleStart_A_1\n" + + "RuleStart_A_1->s3\n" + + "s3-{0..66351, 66379..1114111}->s4\n" + + "s4->RuleStop_A_2\n"; + checkTokensRule(g, null, expecting); + } + @Test public void testCharSetUnicodeMultiplePropertyEscape() throws Exception { + // Ditto the Mahajani script. Not going to change soon. I hope. + LexerGrammar g = new LexerGrammar( + "lexer grammar P;\n"+ + "A : [\\p{Gothic}\\p{Mahajani}] ;" + ); + String expecting = + "s0->RuleStart_A_1\n" + + "RuleStart_A_1->s3\n" + + "s3-{66352..66378, 69968..70006}->s4\n" + + "s4->RuleStop_A_2\n"; + checkTokensRule(g, null, expecting); + } + @Test public void testCharSetUnicodePropertyOverlap() throws Exception { + LexerGrammar g = new LexerGrammar( + "lexer grammar P;\n"+ + "A : [\\p{ASCII_Hex_Digit}\\p{Hex_Digit}] ;" + ); + String expecting = + "s0->RuleStart_A_1\n" + + "RuleStart_A_1->s3\n" + + "s3-{48..57, 65..70, 97..102, 65296..65305, 65313..65318, 65345..65350}->s4\n" + + "s4->RuleStop_A_2\n"; + checkTokensRule(g, null, expecting); + } @Test public void testRangeOrRange() throws Exception { LexerGrammar g = new LexerGrammar( "lexer grammar P;\n"+ diff --git a/tool-testsuite/test/org/antlr/v4/test/tool/TestATNDeserialization.java b/tool-testsuite/test/org/antlr/v4/test/tool/TestATNDeserialization.java index 5f19024..bb4b15a 100644 --- a/tool-testsuite/test/org/antlr/v4/test/tool/TestATNDeserialization.java +++ b/tool-testsuite/test/org/antlr/v4/test/tool/TestATNDeserialization.java @@ -1,5 +1,5 @@ /* - * Copyright (c) 2012-2016 The ANTLR Project. All rights reserved. + * Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. * Use of this file is governed by the BSD 3-clause license that * can be found in the LICENSE.txt file in the project root. */ diff --git a/tool-testsuite/test/org/antlr/v4/test/tool/TestATNInterpreter.java b/tool-testsuite/test/org/antlr/v4/test/tool/TestATNInterpreter.java index 14e82ae..201463e 100644 --- a/tool-testsuite/test/org/antlr/v4/test/tool/TestATNInterpreter.java +++ b/tool-testsuite/test/org/antlr/v4/test/tool/TestATNInterpreter.java @@ -1,5 +1,5 @@ /* - * Copyright (c) 2012-2016 The ANTLR Project. All rights reserved. + * Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. * Use of this file is governed by the BSD 3-clause license that * can be found in the LICENSE.txt file in the project root. */ diff --git a/tool-testsuite/test/org/antlr/v4/test/tool/TestATNLexerInterpreter.java b/tool-testsuite/test/org/antlr/v4/test/tool/TestATNLexerInterpreter.java index 0324d39..0d06a03 100644 --- a/tool-testsuite/test/org/antlr/v4/test/tool/TestATNLexerInterpreter.java +++ b/tool-testsuite/test/org/antlr/v4/test/tool/TestATNLexerInterpreter.java @@ -1,13 +1,13 @@ /* - * Copyright (c) 2012-2016 The ANTLR Project. All rights reserved. + * Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. * Use of this file is governed by the BSD 3-clause license that * can be found in the LICENSE.txt file in the project root. */ package org.antlr.v4.test.tool; -import org.antlr.v4.runtime.ANTLRInputStream; import org.antlr.v4.runtime.CharStream; +import org.antlr.v4.runtime.CharStreams; import org.antlr.v4.runtime.atn.ATN; import org.antlr.v4.runtime.atn.ATNState; import org.antlr.v4.runtime.misc.Utils; @@ -121,6 +121,94 @@ public class TestATNLexerInterpreter extends BaseJavaToolTest { checkLexerMatches(lg, "c", expecting); } + @Test public void testLexerSetUnicodeBMP() throws Exception { + LexerGrammar lg = new LexerGrammar( + "lexer grammar L;\n"+ + "ID : ('\u611B'|'\u611C')\n ;"); + String expecting = "ID, EOF"; + checkLexerMatches(lg, "\u611B", expecting); + } + + @Test public void testLexerNotSetUnicodeBMP() throws Exception { + LexerGrammar lg = new LexerGrammar( + "lexer grammar L;\n"+ + "ID : ~('\u611B'|'\u611C')\n ;"); + String expecting = "ID, EOF"; + checkLexerMatches(lg, "\u611D", expecting); + } + + @Test public void testLexerNotSetUnicodeBMPMatchesSMP() throws Exception { + LexerGrammar lg = new LexerGrammar( + "lexer grammar L;\n"+ + "ID : ~('\u611B'|'\u611C')\n ;"); + String expecting = "ID, EOF"; + checkLexerMatches(lg, new StringBuilder().appendCodePoint(0x1F4A9).toString(), expecting); + } + + @Test public void testLexerSetUnicodeSMP() throws Exception { + LexerGrammar lg = new LexerGrammar( + "lexer grammar L;\n"+ + "ID : ('\\u{1F4A9}'|'\\u{1F4AA}')\n ;"); + String expecting = "ID, EOF"; + checkLexerMatches(lg, new StringBuilder().appendCodePoint(0x1F4A9).toString(), expecting); + } + + @Test public void testLexerNotBMPSetMatchesUnicodeSMP() throws Exception { + LexerGrammar lg = new LexerGrammar( + "lexer grammar L;\n"+ + "ID : ~('a'|'b')\n ;"); + String expecting = "ID, EOF"; + checkLexerMatches(lg, new StringBuilder().appendCodePoint(0x1F4A9).toString(), expecting); + } + + @Test public void testLexerNotBMPSetMatchesBMP() throws Exception { + LexerGrammar lg = new LexerGrammar( + "lexer grammar L;\n"+ + "ID : ~('a'|'b')\n ;"); + String expecting = "ID, EOF"; + checkLexerMatches(lg, "\u611B", expecting); + } + + @Test public void testLexerNotBMPSetMatchesSMP() throws Exception { + LexerGrammar lg = new LexerGrammar( + "lexer grammar L;\n"+ + "ID : ~('a'|'b')\n ;"); + String expecting = "ID, EOF"; + checkLexerMatches(lg, new StringBuilder().appendCodePoint(0x1F4A9).toString(), expecting); + } + + @Test public void testLexerNotSMPSetMatchesBMP() throws Exception { + LexerGrammar lg = new LexerGrammar( + "lexer grammar L;\n"+ + "ID : ~('\\u{1F4A9}'|'\\u{1F4AA}')\n ;"); + String expecting = "ID, EOF"; + checkLexerMatches(lg, "\u611B", expecting); + } + + @Test public void testLexerNotSMPSetMatchesSMP() throws Exception { + LexerGrammar lg = new LexerGrammar( + "lexer grammar L;\n"+ + "ID : ~('\\u{1F4A9}'|'\\u{1F4AA}')\n ;"); + String expecting = "ID, EOF"; + checkLexerMatches(lg, new StringBuilder().appendCodePoint(0x1D7C0).toString(), expecting); + } + + @Test public void testLexerRangeUnicodeSMP() throws Exception { + LexerGrammar lg = new LexerGrammar( + "lexer grammar L;\n"+ + "ID : ('\\u{1F4A9}'..'\\u{1F4B0}')\n ;"); + String expecting = "ID, EOF"; + checkLexerMatches(lg, new StringBuilder().appendCodePoint(0x1F4AF).toString(), expecting); + } + + @Test public void testLexerRangeUnicodeBMPToSMP() throws Exception { + LexerGrammar lg = new LexerGrammar( + "lexer grammar L;\n"+ + "ID : ('\\u611B'..'\\u{1F4B0}')\n ;"); + String expecting = "ID, EOF"; + checkLexerMatches(lg, new StringBuilder().appendCodePoint(0x12001).toString(), expecting); + } + @Test public void testLexerKeywordIDAmbiguity() throws Exception { LexerGrammar lg = new LexerGrammar( "lexer grammar L;\n"+ @@ -293,7 +381,7 @@ public class TestATNLexerInterpreter extends BaseJavaToolTest { protected void checkLexerMatches(LexerGrammar lg, String inputString, String expecting) { ATN atn = createATN(lg, true); - CharStream input = new ANTLRInputStream(inputString); + CharStream input = CharStreams.fromString(inputString); ATNState startState = atn.modeNameToStartState.get("DEFAULT_MODE"); DOTGenerator dot = new DOTGenerator(lg); // System.out.println(dot.getDOT(startState, true)); diff --git a/tool-testsuite/test/org/antlr/v4/test/tool/TestATNParserPrediction.java b/tool-testsuite/test/org/antlr/v4/test/tool/TestATNParserPrediction.java index 3049fd8..9e58fe9 100644 --- a/tool-testsuite/test/org/antlr/v4/test/tool/TestATNParserPrediction.java +++ b/tool-testsuite/test/org/antlr/v4/test/tool/TestATNParserPrediction.java @@ -1,5 +1,5 @@ /* - * Copyright (c) 2012-2016 The ANTLR Project. All rights reserved. + * Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. * Use of this file is governed by the BSD 3-clause license that * can be found in the LICENSE.txt file in the project root. */ diff --git a/tool-testsuite/test/org/antlr/v4/test/tool/TestATNSerialization.java b/tool-testsuite/test/org/antlr/v4/test/tool/TestATNSerialization.java index 5e2b57e..d8afa5c 100644 --- a/tool-testsuite/test/org/antlr/v4/test/tool/TestATNSerialization.java +++ b/tool-testsuite/test/org/antlr/v4/test/tool/TestATNSerialization.java @@ -1,5 +1,5 @@ /* - * Copyright (c) 2012-2016 The ANTLR Project. All rights reserved. + * Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. * Use of this file is governed by the BSD 3-clause license that * can be found in the LICENSE.txt file in the project root. */ @@ -291,6 +291,113 @@ public class TestATNSerialization extends BaseJavaToolTest { assertEquals(expecting, result); } + @Test public void testLexerUnicodeSMPLiteralSerializedToSet() throws Exception { + LexerGrammar lg = new LexerGrammar( + "lexer grammar L;\n"+ + "INT : '\\u{1F4A9}' ;"); + String expecting = + "max type 1\n" + + "0:TOKEN_START -1\n" + + "1:RULE_START 0\n" + + "2:RULE_STOP 0\n" + + "3:BASIC 0\n" + + "4:BASIC 0\n" + + "rule 0:1 1\n" + + "mode 0:0\n" + + "0:128169..128169\n" + + "0->1 EPSILON 0,0,0\n" + + "1->3 EPSILON 0,0,0\n" + + "3->4 SET 0,0,0\n" + + "4->2 EPSILON 0,0,0\n" + + "0:0\n"; + ATN atn = createATN(lg, true); + String result = ATNSerializer.getDecoded(atn, Arrays.asList(lg.getTokenNames())); + assertEquals(expecting, result); + } + + @Test public void testLexerUnicodeSMPRangeSerializedToSet() throws Exception { + LexerGrammar lg = new LexerGrammar( + "lexer grammar L;\n"+ + "INT : ('a'..'\\u{1F4A9}') ;"); + String expecting = + "max type 1\n" + + "0:TOKEN_START -1\n" + + "1:RULE_START 0\n" + + "2:RULE_STOP 0\n" + + "3:BASIC 0\n" + + "4:BASIC 0\n" + + "rule 0:1 1\n" + + "mode 0:0\n" + + "0:'a'..128169\n" + + "0->1 EPSILON 0,0,0\n" + + "1->3 EPSILON 0,0,0\n" + + "3->4 SET 0,0,0\n" + + "4->2 EPSILON 0,0,0\n" + + "0:0\n"; + ATN atn = createATN(lg, true); + String result = ATNSerializer.getDecoded(atn, Arrays.asList(lg.getTokenNames())); + assertEquals(expecting, result); + } + + @Test public void testLexerUnicodeSMPSetSerializedAfterBMPSet() throws Exception { + LexerGrammar lg = new LexerGrammar( + "lexer grammar L;\n"+ + "SMP : ('\\u{1F4A9}' | '\\u{1F4AF}') ;\n"+ + "BMP : ('a' | 'x') ;"); + String expecting = + "max type 2\n" + + "0:TOKEN_START -1\n" + + "1:RULE_START 0\n" + + "2:RULE_STOP 0\n" + + "3:RULE_START 1\n" + + "4:RULE_STOP 1\n" + + "5:BASIC 0\n" + + "6:BASIC 0\n" + + "7:BASIC 1\n" + + "8:BASIC 1\n" + + "rule 0:1 1\n" + + "rule 1:3 2\n" + + "mode 0:0\n" + + "0:'a'..'a', 'x'..'x'\n" + + "1:128169..128169, 128175..128175\n" + + "0->1 EPSILON 0,0,0\n" + + "0->3 EPSILON 0,0,0\n" + + "1->5 EPSILON 0,0,0\n" + + "3->7 EPSILON 0,0,0\n" + + "5->6 SET 1,0,0\n" + + "6->2 EPSILON 0,0,0\n" + + "7->8 SET 0,0,0\n" + + "8->4 EPSILON 0,0,0\n" + + "0:0\n"; + ATN atn = createATN(lg, true); + String result = ATNSerializer.getDecoded(atn, Arrays.asList(lg.getTokenNames())); + assertEquals(expecting, result); + } + + @Test public void testLexerNotLiteral() throws Exception { + LexerGrammar lg = new LexerGrammar( + "lexer grammar L;\n"+ + "INT : ~'a' ;"); + String expecting = + "max type 1\n" + + "0:TOKEN_START -1\n" + + "1:RULE_START 0\n" + + "2:RULE_STOP 0\n" + + "3:BASIC 0\n" + + "4:BASIC 0\n" + + "rule 0:1 1\n" + + "mode 0:0\n" + + "0:'a'..'a'\n" + + "0->1 EPSILON 0,0,0\n" + + "1->3 EPSILON 0,0,0\n" + + "3->4 NOT_SET 0,0,0\n" + + "4->2 EPSILON 0,0,0\n" + + "0:0\n"; + ATN atn = createATN(lg, true); + String result = ATNSerializer.getDecoded(atn, Arrays.asList(lg.getTokenNames())); + assertEquals(expecting, result); + } + @Test public void testLexerRange() throws Exception { LexerGrammar lg = new LexerGrammar( "lexer grammar L;\n"+ @@ -518,6 +625,222 @@ public class TestATNSerialization extends BaseJavaToolTest { assertEquals(expecting, result); } + @Test public void testLexerUnicodeUnescapedBMPNotSet() throws Exception { + LexerGrammar lg = new LexerGrammar( + "lexer grammar L;\n"+ + "ID : ~('\u4E9C'|'\u4E9D')\n ;"); + String expecting = + "max type 1\n" + + "0:TOKEN_START -1\n" + + "1:RULE_START 0\n" + + "2:RULE_STOP 0\n" + + "3:BASIC 0\n" + + "4:BASIC 0\n" + + "rule 0:1 1\n" + + "mode 0:0\n" + + "0:'\\u4E9C'..'\\u4E9D'\n" + + "0->1 EPSILON 0,0,0\n" + + "1->3 EPSILON 0,0,0\n" + + "3->4 NOT_SET 0,0,0\n" + + "4->2 EPSILON 0,0,0\n" + + "0:0\n"; + ATN atn = createATN(lg, true); + String result = ATNSerializer.getDecoded(atn, Arrays.asList(lg.getTokenNames())); + assertEquals(expecting, result); + } + + @Test public void testLexerUnicodeUnescapedBMPSetWithRange() throws Exception { + LexerGrammar lg = new LexerGrammar( + "lexer grammar L;\n"+ + "ID : ('\u4E9C'|'\u4E9D'|'\u6C5F'|'\u305F'..'\u307B')\n ;"); + String expecting = + "max type 1\n" + + "0:TOKEN_START -1\n" + + "1:RULE_START 0\n" + + "2:RULE_STOP 0\n" + + "3:BASIC 0\n" + + "4:BASIC 0\n" + + "rule 0:1 1\n" + + "mode 0:0\n" + + "0:'\\u305F'..'\\u307B', '\\u4E9C'..'\\u4E9D', '\\u6C5F'..'\\u6C5F'\n" + + "0->1 EPSILON 0,0,0\n" + + "1->3 EPSILON 0,0,0\n" + + "3->4 SET 0,0,0\n" + + "4->2 EPSILON 0,0,0\n" + + "0:0\n"; + ATN atn = createATN(lg, true); + String result = ATNSerializer.getDecoded(atn, Arrays.asList(lg.getTokenNames())); + assertEquals(expecting, result); + } + + @Test public void testLexerUnicodeUnescapedBMPNotSetWithRange() throws Exception { + LexerGrammar lg = new LexerGrammar( + "lexer grammar L;\n"+ + "ID : ~('\u4E9C'|'\u4E9D'|'\u6C5F'|'\u305F'..'\u307B')\n ;"); + String expecting = + "max type 1\n" + + "0:TOKEN_START -1\n" + + "1:RULE_START 0\n" + + "2:RULE_STOP 0\n" + + "3:BASIC 0\n" + + "4:BASIC 0\n" + + "rule 0:1 1\n" + + "mode 0:0\n" + + "0:'\\u305F'..'\\u307B', '\\u4E9C'..'\\u4E9D', '\\u6C5F'..'\\u6C5F'\n" + + "0->1 EPSILON 0,0,0\n" + + "1->3 EPSILON 0,0,0\n" + + "3->4 NOT_SET 0,0,0\n" + + "4->2 EPSILON 0,0,0\n" + + "0:0\n"; + ATN atn = createATN(lg, true); + String result = ATNSerializer.getDecoded(atn, Arrays.asList(lg.getTokenNames())); + assertEquals(expecting, result); + } + + @Test public void testLexerUnicodeEscapedBMPNotSet() throws Exception { + LexerGrammar lg = new LexerGrammar( + "lexer grammar L;\n"+ + "ID : ~('\\u4E9C'|'\\u4E9D')\n ;"); + String expecting = + "max type 1\n" + + "0:TOKEN_START -1\n" + + "1:RULE_START 0\n" + + "2:RULE_STOP 0\n" + + "3:BASIC 0\n" + + "4:BASIC 0\n" + + "rule 0:1 1\n" + + "mode 0:0\n" + + "0:'\\u4E9C'..'\\u4E9D'\n" + + "0->1 EPSILON 0,0,0\n" + + "1->3 EPSILON 0,0,0\n" + + "3->4 NOT_SET 0,0,0\n" + + "4->2 EPSILON 0,0,0\n" + + "0:0\n"; + ATN atn = createATN(lg, true); + String result = ATNSerializer.getDecoded(atn, Arrays.asList(lg.getTokenNames())); + assertEquals(expecting, result); + } + + @Test public void testLexerUnicodeEscapedBMPSetWithRange() throws Exception { + LexerGrammar lg = new LexerGrammar( + "lexer grammar L;\n"+ + "ID : ('\\u4E9C'|'\\u4E9D'|'\\u6C5F'|'\\u305F'..'\\u307B')\n ;"); + String expecting = + "max type 1\n" + + "0:TOKEN_START -1\n" + + "1:RULE_START 0\n" + + "2:RULE_STOP 0\n" + + "3:BASIC 0\n" + + "4:BASIC 0\n" + + "rule 0:1 1\n" + + "mode 0:0\n" + + "0:'\\u305F'..'\\u307B', '\\u4E9C'..'\\u4E9D', '\\u6C5F'..'\\u6C5F'\n" + + "0->1 EPSILON 0,0,0\n" + + "1->3 EPSILON 0,0,0\n" + + "3->4 SET 0,0,0\n" + + "4->2 EPSILON 0,0,0\n" + + "0:0\n"; + ATN atn = createATN(lg, true); + String result = ATNSerializer.getDecoded(atn, Arrays.asList(lg.getTokenNames())); + assertEquals(expecting, result); + } + + @Test public void testLexerUnicodeEscapedBMPNotSetWithRange() throws Exception { + LexerGrammar lg = new LexerGrammar( + "lexer grammar L;\n"+ + "ID : ~('\\u4E9C'|'\\u4E9D'|'\\u6C5F'|'\\u305F'..'\\u307B')\n ;"); + String expecting = + "max type 1\n" + + "0:TOKEN_START -1\n" + + "1:RULE_START 0\n" + + "2:RULE_STOP 0\n" + + "3:BASIC 0\n" + + "4:BASIC 0\n" + + "rule 0:1 1\n" + + "mode 0:0\n" + + "0:'\\u305F'..'\\u307B', '\\u4E9C'..'\\u4E9D', '\\u6C5F'..'\\u6C5F'\n" + + "0->1 EPSILON 0,0,0\n" + + "1->3 EPSILON 0,0,0\n" + + "3->4 NOT_SET 0,0,0\n" + + "4->2 EPSILON 0,0,0\n" + + "0:0\n"; + ATN atn = createATN(lg, true); + String result = ATNSerializer.getDecoded(atn, Arrays.asList(lg.getTokenNames())); + assertEquals(expecting, result); + } + + @Test public void testLexerUnicodeEscapedSMPNotSet() throws Exception { + LexerGrammar lg = new LexerGrammar( + "lexer grammar L;\n"+ + "ID : ~('\\u{1F4A9}'|'\\u{1F4AA}')\n ;"); + String expecting = + "max type 1\n" + + "0:TOKEN_START -1\n" + + "1:RULE_START 0\n" + + "2:RULE_STOP 0\n" + + "3:BASIC 0\n" + + "4:BASIC 0\n" + + "rule 0:1 1\n" + + "mode 0:0\n" + + "0:128169..128170\n" + + "0->1 EPSILON 0,0,0\n" + + "1->3 EPSILON 0,0,0\n" + + "3->4 NOT_SET 0,0,0\n" + + "4->2 EPSILON 0,0,0\n" + + "0:0\n"; + ATN atn = createATN(lg, true); + String result = ATNSerializer.getDecoded(atn, Arrays.asList(lg.getTokenNames())); + assertEquals(expecting, result); + } + + @Test public void testLexerUnicodeEscapedSMPSetWithRange() throws Exception { + LexerGrammar lg = new LexerGrammar( + "lexer grammar L;\n"+ + "ID : ('\\u{1F4A9}'|'\\u{1F4AA}'|'\\u{1F441}'|'\\u{1D40F}'..'\\u{1D413}')\n ;"); + String expecting = + "max type 1\n" + + "0:TOKEN_START -1\n" + + "1:RULE_START 0\n" + + "2:RULE_STOP 0\n" + + "3:BASIC 0\n" + + "4:BASIC 0\n" + + "rule 0:1 1\n" + + "mode 0:0\n" + + "0:119823..119827, 128065..128065, 128169..128170\n" + + "0->1 EPSILON 0,0,0\n" + + "1->3 EPSILON 0,0,0\n" + + "3->4 SET 0,0,0\n" + + "4->2 EPSILON 0,0,0\n" + + "0:0\n"; + ATN atn = createATN(lg, true); + String result = ATNSerializer.getDecoded(atn, Arrays.asList(lg.getTokenNames())); + assertEquals(expecting, result); + } + + @Test public void testLexerUnicodeEscapedSMPNotSetWithRange() throws Exception { + LexerGrammar lg = new LexerGrammar( + "lexer grammar L;\n"+ + "ID : ~('\\u{1F4A9}'|'\\u{1F4AA}'|'\\u{1F441}'|'\\u{1D40F}'..'\\u{1D413}')\n ;"); + String expecting = + "max type 1\n" + + "0:TOKEN_START -1\n" + + "1:RULE_START 0\n" + + "2:RULE_STOP 0\n" + + "3:BASIC 0\n" + + "4:BASIC 0\n" + + "rule 0:1 1\n" + + "mode 0:0\n" + + "0:119823..119827, 128065..128065, 128169..128170\n" + + "0->1 EPSILON 0,0,0\n" + + "1->3 EPSILON 0,0,0\n" + + "3->4 NOT_SET 0,0,0\n" + + "4->2 EPSILON 0,0,0\n" + + "0:0\n"; + ATN atn = createATN(lg, true); + String result = ATNSerializer.getDecoded(atn, Arrays.asList(lg.getTokenNames())); + assertEquals(expecting, result); + } + @Test public void testLexerWildcardWithMode() throws Exception { LexerGrammar lg = new LexerGrammar( "lexer grammar L;\n"+ diff --git a/tool-testsuite/test/org/antlr/v4/test/tool/TestActionSplitter.java b/tool-testsuite/test/org/antlr/v4/test/tool/TestActionSplitter.java index 391da8e..d376c7b 100644 --- a/tool-testsuite/test/org/antlr/v4/test/tool/TestActionSplitter.java +++ b/tool-testsuite/test/org/antlr/v4/test/tool/TestActionSplitter.java @@ -1,5 +1,5 @@ /* - * Copyright (c) 2012-2016 The ANTLR Project. All rights reserved. + * Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. * Use of this file is governed by the BSD 3-clause license that * can be found in the LICENSE.txt file in the project root. */ diff --git a/tool-testsuite/test/org/antlr/v4/test/tool/TestActionTranslation.java b/tool-testsuite/test/org/antlr/v4/test/tool/TestActionTranslation.java index db73a81..e601cec 100644 --- a/tool-testsuite/test/org/antlr/v4/test/tool/TestActionTranslation.java +++ b/tool-testsuite/test/org/antlr/v4/test/tool/TestActionTranslation.java @@ -1,5 +1,5 @@ /* - * Copyright (c) 2012-2016 The ANTLR Project. All rights reserved. + * Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. * Use of this file is governed by the BSD 3-clause license that * can be found in the LICENSE.txt file in the project root. */ diff --git a/tool-testsuite/test/org/antlr/v4/test/tool/TestAmbigParseTrees.java b/tool-testsuite/test/org/antlr/v4/test/tool/TestAmbigParseTrees.java index 546517e..a91ade8 100644 --- a/tool-testsuite/test/org/antlr/v4/test/tool/TestAmbigParseTrees.java +++ b/tool-testsuite/test/org/antlr/v4/test/tool/TestAmbigParseTrees.java @@ -1,5 +1,5 @@ /* - * Copyright (c) 2012-2016 The ANTLR Project. All rights reserved. + * Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. * Use of this file is governed by the BSD 3-clause license that * can be found in the LICENSE.txt file in the project root. */ diff --git a/tool-testsuite/test/org/antlr/v4/test/tool/TestAttributeChecks.java b/tool-testsuite/test/org/antlr/v4/test/tool/TestAttributeChecks.java index 33ca19b..f36511e 100644 --- a/tool-testsuite/test/org/antlr/v4/test/tool/TestAttributeChecks.java +++ b/tool-testsuite/test/org/antlr/v4/test/tool/TestAttributeChecks.java @@ -1,5 +1,5 @@ /* - * Copyright (c) 2012-2016 The ANTLR Project. All rights reserved. + * Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. * Use of this file is governed by the BSD 3-clause license that * can be found in the LICENSE.txt file in the project root. */ diff --git a/tool-testsuite/test/org/antlr/v4/test/tool/TestBasicSemanticErrors.java b/tool-testsuite/test/org/antlr/v4/test/tool/TestBasicSemanticErrors.java index 661506d..f6e2ad2 100644 --- a/tool-testsuite/test/org/antlr/v4/test/tool/TestBasicSemanticErrors.java +++ b/tool-testsuite/test/org/antlr/v4/test/tool/TestBasicSemanticErrors.java @@ -1,5 +1,5 @@ /* - * Copyright (c) 2012-2016 The ANTLR Project. All rights reserved. + * Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. * Use of this file is governed by the BSD 3-clause license that * can be found in the LICENSE.txt file in the project root. */ diff --git a/tool-testsuite/test/org/antlr/v4/test/tool/TestBufferedTokenStream.java b/tool-testsuite/test/org/antlr/v4/test/tool/TestBufferedTokenStream.java index d9cbab5..b5548fa 100644 --- a/tool-testsuite/test/org/antlr/v4/test/tool/TestBufferedTokenStream.java +++ b/tool-testsuite/test/org/antlr/v4/test/tool/TestBufferedTokenStream.java @@ -1,5 +1,5 @@ /* - * Copyright (c) 2012-2016 The ANTLR Project. All rights reserved. + * Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. * Use of this file is governed by the BSD 3-clause license that * can be found in the LICENSE.txt file in the project root. */ diff --git a/tool-testsuite/test/org/antlr/v4/test/tool/TestCodeGeneration.java b/tool-testsuite/test/org/antlr/v4/test/tool/TestCodeGeneration.java index 7574c7c..8fb6fb7 100644 --- a/tool-testsuite/test/org/antlr/v4/test/tool/TestCodeGeneration.java +++ b/tool-testsuite/test/org/antlr/v4/test/tool/TestCodeGeneration.java @@ -1,5 +1,5 @@ /* - * Copyright (c) 2012-2016 The ANTLR Project. All rights reserved. + * Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. * Use of this file is governed by the BSD 3-clause license that * can be found in the LICENSE.txt file in the project root. */ diff --git a/tool-testsuite/test/org/antlr/v4/test/tool/TestCommonTokenStream.java b/tool-testsuite/test/org/antlr/v4/test/tool/TestCommonTokenStream.java index 2453add..6098ae5 100644 --- a/tool-testsuite/test/org/antlr/v4/test/tool/TestCommonTokenStream.java +++ b/tool-testsuite/test/org/antlr/v4/test/tool/TestCommonTokenStream.java @@ -1,5 +1,5 @@ /* - * Copyright (c) 2012-2016 The ANTLR Project. All rights reserved. + * Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. * Use of this file is governed by the BSD 3-clause license that * can be found in the LICENSE.txt file in the project root. */ diff --git a/tool-testsuite/test/org/antlr/v4/test/tool/TestCompositeGrammars.java b/tool-testsuite/test/org/antlr/v4/test/tool/TestCompositeGrammars.java index 9b89b0e..0336878 100644 --- a/tool-testsuite/test/org/antlr/v4/test/tool/TestCompositeGrammars.java +++ b/tool-testsuite/test/org/antlr/v4/test/tool/TestCompositeGrammars.java @@ -1,5 +1,5 @@ /* - * Copyright (c) 2012-2016 The ANTLR Project. All rights reserved. + * Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. * Use of this file is governed by the BSD 3-clause license that * can be found in the LICENSE.txt file in the project root. */ @@ -17,6 +17,7 @@ import org.junit.Test; import java.io.File; +import static org.antlr.v4.test.runtime.BaseRuntimeTest.writeFile; import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertNull; @@ -60,6 +61,181 @@ public class TestCompositeGrammars extends BaseJavaToolTest { assertEquals(0, equeue.size()); } + @Test public void testImportIntoLexerGrammar() throws Exception { + BaseRuntimeTest.mkdir(tmpdir); + + String master = + "lexer grammar M;\n" + + "import S;\n" + + "A : 'a';\n" + + "B : 'b';\n"; + writeFile(tmpdir, "M.g4", master); + + String slave = + "lexer grammar S;\n" + + "C : 'c';\n"; + writeFile(tmpdir, "S.g4", slave); + + ErrorQueue equeue = BaseRuntimeTest.antlrOnString(tmpdir, "Java", "M.g4", false, "-lib", tmpdir); + assertEquals(0, equeue.errors.size()); + } + + @Test public void testImportModesIntoLexerGrammar() throws Exception { + BaseRuntimeTest.mkdir(tmpdir); + + String master = + "lexer grammar M;\n" + + "import S;\n" + + "A : 'a' -> pushMode(X);\n" + + "B : 'b';\n"; + writeFile(tmpdir, "M.g4", master); + + String slave = + "lexer grammar S;\n" + + "D : 'd';\n" + + "mode X;\n" + + "C : 'c' -> popMode;\n"; + writeFile(tmpdir, "S.g4", slave); + + ErrorQueue equeue = BaseRuntimeTest.antlrOnString(tmpdir, "Java", "M.g4", false, "-lib", tmpdir); + assertEquals(0, equeue.errors.size()); + } + + @Test public void testImportChannelsIntoLexerGrammar() throws Exception { + BaseRuntimeTest.mkdir(tmpdir); + + String master = + "lexer grammar M;\n" + + "import S;\n" + + "channels {CH_A, CH_B}\n" + + "A : 'a' -> channel(CH_A);\n" + + "B : 'b' -> channel(CH_B);\n"; + writeFile(tmpdir, "M.g4", master); + + String slave = + "lexer grammar S;\n" + + "C : 'c';\n"; + writeFile(tmpdir, "S.g4", slave); + + ErrorQueue equeue = BaseRuntimeTest.antlrOnString(tmpdir, "Java", "M.g4", false, "-lib", tmpdir); + assertEquals(0, equeue.errors.size()); + } + + @Test public void testImportMixedChannelsIntoLexerGrammar() throws Exception { + BaseRuntimeTest.mkdir(tmpdir); + + String master = + "lexer grammar M;\n" + + "import S;\n" + + "channels {CH_A, CH_B}\n" + + "A : 'a' -> channel(CH_A);\n" + + "B : 'b' -> channel(CH_B);\n"; + writeFile(tmpdir, "M.g4", master); + + String slave = + "lexer grammar S;\n" + + "channels {CH_C}\n" + + "C : 'c' -> channel(CH_C);\n"; + writeFile(tmpdir, "S.g4", slave); + + ErrorQueue equeue = BaseRuntimeTest.antlrOnString(tmpdir, "Java", "M.g4", false, "-lib", tmpdir); + assertEquals(0, equeue.errors.size()); + } + + @Test public void testImportClashingChannelsIntoLexerGrammar() throws Exception { + BaseRuntimeTest.mkdir(tmpdir); + + String master = + "lexer grammar M;\n" + + "import S;\n" + + "channels {CH_A, CH_B, CH_C}\n" + + "A : 'a' -> channel(CH_A);\n" + + "B : 'b' -> channel(CH_B);\n" + + "C : 'C' -> channel(CH_C);\n"; + writeFile(tmpdir, "M.g4", master); + + String slave = + "lexer grammar S;\n" + + "channels {CH_C}\n" + + "C : 'c' -> channel(CH_C);\n"; + writeFile(tmpdir, "S.g4", slave); + + ErrorQueue equeue = BaseRuntimeTest.antlrOnString(tmpdir, "Java", "M.g4", false, "-lib", tmpdir); + assertEquals(0, equeue.errors.size()); + } + + @Test public void testMergeModesIntoLexerGrammar() throws Exception { + BaseRuntimeTest.mkdir(tmpdir); + + String master = + "lexer grammar M;\n" + + "import S;\n" + + "A : 'a' -> pushMode(X);\n" + + "mode X;\n" + + "B : 'b';\n"; + writeFile(tmpdir, "M.g4", master); + + String slave = + "lexer grammar S;\n" + + "D : 'd';\n" + + "mode X;\n" + + "C : 'c' -> popMode;\n"; + writeFile(tmpdir, "S.g4", slave); + + ErrorQueue equeue = BaseRuntimeTest.antlrOnString(tmpdir, "Java", "M.g4", false, "-lib", tmpdir); + assertEquals(0, equeue.errors.size()); + } + + @Test public void testEmptyModesInLexerGrammar() throws Exception { + BaseRuntimeTest.mkdir(tmpdir); + + String master = + "lexer grammar M;\n" + + "import S;\n" + + "A : 'a';\n" + + "C : 'e';\n" + + "B : 'b';\n"; + writeFile(tmpdir, "M.g4", master); + + String slave = + "lexer grammar S;\n" + + "D : 'd';\n" + + "mode X;\n" + + "C : 'c' -> popMode;\n"; + writeFile(tmpdir, "S.g4", slave); + + ErrorQueue equeue = BaseRuntimeTest.antlrOnString(tmpdir, "Java", "M.g4", false, "-lib", tmpdir); + assertEquals(0, equeue.errors.size()); + } + + @Test public void testCombinedGrammarImportsModalLexerGrammar() throws Exception { + BaseRuntimeTest.mkdir(tmpdir); + + String master = + "grammar M;\n" + + "import S;\n" + + "A : 'a';\n" + + "B : 'b';\n" + + "r : A B;\n"; + writeFile(tmpdir, "M.g4", master); + + String slave = + "lexer grammar S;\n" + + "D : 'd';\n" + + "mode X;\n" + + "C : 'c' -> popMode;\n"; + writeFile(tmpdir, "S.g4", slave); + + ErrorQueue equeue = BaseRuntimeTest.antlrOnString(tmpdir, "Java", "M.g4", false, "-lib", tmpdir); + assertEquals(1, equeue.errors.size()); + ANTLRMessage msg = equeue.errors.get(0); + assertEquals(ErrorType.MODE_NOT_IN_LEXER, msg.getErrorType()); + assertEquals("X", msg.getArgs()[0]); + assertEquals(3, msg.line); + assertEquals(5, msg.charPosition); + assertEquals("M.g4", new File(msg.fileName).getName()); + } + @Test public void testDelegatesSeeSameTokenType() throws Exception { String slaveS = "parser grammar S;\n"+ @@ -460,7 +636,7 @@ public class TestCompositeGrammars extends BaseJavaToolTest { BaseRuntimeTest.mkdir(tmpdir); writeFile(tmpdir, "Java.g4", slave); String found = execParser("NewJava.g4", master, "NewJavaParser", "NewJavaLexer", - null, null, "compilationUnit", "package Foo;", debug); + null, null, "compilationUnit", "package Foo;", debug); assertEquals(null, found); assertNull(stderrDuringParse); } @@ -488,7 +664,7 @@ public class TestCompositeGrammars extends BaseJavaToolTest { BaseRuntimeTest.mkdir(tmpdir); writeFile(tmpdir, "Java.g4", slave); String found = execParser("T.g4", master, "TParser", "TLexer", - null, null, "s", "a=b", debug); + null, null, "s", "a=b", debug); assertEquals(null, found); assertNull(stderrDuringParse); } diff --git a/tool-testsuite/test/org/antlr/v4/test/tool/TestDollarParser.java b/tool-testsuite/test/org/antlr/v4/test/tool/TestDollarParser.java index 03f0f0e..6103e44 100644 --- a/tool-testsuite/test/org/antlr/v4/test/tool/TestDollarParser.java +++ b/tool-testsuite/test/org/antlr/v4/test/tool/TestDollarParser.java @@ -1,5 +1,5 @@ /* - * Copyright (c) 2012-2016 The ANTLR Project. All rights reserved. + * Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. * Use of this file is governed by the BSD 3-clause license that * can be found in the LICENSE.txt file in the project root. */ diff --git a/tool-testsuite/test/org/antlr/v4/test/tool/TestErrorSets.java b/tool-testsuite/test/org/antlr/v4/test/tool/TestErrorSets.java index 7071500..a0bdc18 100644 --- a/tool-testsuite/test/org/antlr/v4/test/tool/TestErrorSets.java +++ b/tool-testsuite/test/org/antlr/v4/test/tool/TestErrorSets.java @@ -1,5 +1,5 @@ /* - * Copyright (c) 2012-2016 The ANTLR Project. All rights reserved. + * Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. * Use of this file is governed by the BSD 3-clause license that * can be found in the LICENSE.txt file in the project root. */ diff --git a/tool-testsuite/test/org/antlr/v4/test/tool/TestEscapeSequenceParsing.java b/tool-testsuite/test/org/antlr/v4/test/tool/TestEscapeSequenceParsing.java new file mode 100644 index 0000000..e4b3c37 --- /dev/null +++ b/tool-testsuite/test/org/antlr/v4/test/tool/TestEscapeSequenceParsing.java @@ -0,0 +1,137 @@ +/* + * Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +package org.antlr.v4.test.tool; + +import org.antlr.v4.misc.EscapeSequenceParsing; +import org.antlr.v4.runtime.misc.IntervalSet; +import org.junit.Test; + +import static org.antlr.v4.misc.EscapeSequenceParsing.Result; +import static org.junit.Assert.assertEquals; + +public class TestEscapeSequenceParsing { + @Test + public void testParseEmpty() { + assertEquals( + EscapeSequenceParsing.Result.Type.INVALID, + EscapeSequenceParsing.parseEscape("", 0).type); + } + + @Test + public void testParseJustBackslash() { + assertEquals( + EscapeSequenceParsing.Result.Type.INVALID, + EscapeSequenceParsing.parseEscape("\\", 0).type); + } + + @Test + public void testParseInvalidEscape() { + assertEquals( + EscapeSequenceParsing.Result.Type.INVALID, + EscapeSequenceParsing.parseEscape("\\z", 0).type); + } + + @Test + public void testParseNewline() { + assertEquals( + new Result(Result.Type.CODE_POINT, '\n', IntervalSet.EMPTY_SET, 0,2), + EscapeSequenceParsing.parseEscape("\\n", 0)); + } + + @Test + public void testParseTab() { + assertEquals( + new Result(Result.Type.CODE_POINT, '\t', IntervalSet.EMPTY_SET, 0,2), + EscapeSequenceParsing.parseEscape("\\t", 0)); + } + + @Test + public void testParseUnicodeTooShort() { + assertEquals( + EscapeSequenceParsing.Result.Type.INVALID, + EscapeSequenceParsing.parseEscape("\\uABC", 0).type); + } + + @Test + public void testParseUnicodeBMP() { + assertEquals( + new Result(Result.Type.CODE_POINT, 0xABCD, IntervalSet.EMPTY_SET, 0,6), + EscapeSequenceParsing.parseEscape("\\uABCD", 0)); + } + + @Test + public void testParseUnicodeSMPTooShort() { + assertEquals( + EscapeSequenceParsing.Result.Type.INVALID, + EscapeSequenceParsing.parseEscape("\\u{}", 0).type); + } + + @Test + public void testParseUnicodeSMPMissingCloseBrace() { + assertEquals( + EscapeSequenceParsing.Result.Type.INVALID, + EscapeSequenceParsing.parseEscape("\\u{12345", 0).type); + } + + @Test + public void testParseUnicodeTooBig() { + assertEquals( + EscapeSequenceParsing.Result.Type.INVALID, + EscapeSequenceParsing.parseEscape("\\u{110000}", 0).type); + } + + @Test + public void testParseUnicodeSMP() { + assertEquals( + new Result(Result.Type.CODE_POINT, 0x10ABCD, IntervalSet.EMPTY_SET, 0,10), + EscapeSequenceParsing.parseEscape("\\u{10ABCD}", 0)); + } + + @Test + public void testParseUnicodePropertyTooShort() { + assertEquals( + EscapeSequenceParsing.Result.Type.INVALID, + EscapeSequenceParsing.parseEscape("\\p{}", 0).type); + } + + @Test + public void testParseUnicodePropertyMissingCloseBrace() { + assertEquals( + EscapeSequenceParsing.Result.Type.INVALID, + EscapeSequenceParsing.parseEscape("\\p{1234", 0).type); + } + + @Test + public void testParseUnicodeProperty() { + assertEquals( + new Result(Result.Type.PROPERTY, -1, IntervalSet.of(66560, 66639), 0,11), + EscapeSequenceParsing.parseEscape("\\p{Deseret}", 0)); + } + + @Test + public void testParseUnicodePropertyInvertedTooShort() { + assertEquals( + EscapeSequenceParsing.Result.Type.INVALID, + EscapeSequenceParsing.parseEscape("\\P{}", 0).type); + } + + @Test + public void testParseUnicodePropertyInvertedMissingCloseBrace() { + assertEquals( + EscapeSequenceParsing.Result.Type.INVALID, + EscapeSequenceParsing.parseEscape("\\P{Deseret", 0).type); + } + + @Test + public void testParseUnicodePropertyInverted() { + IntervalSet expected = IntervalSet.of(0, 66559); + expected.add(66640, Character.MAX_CODE_POINT); + assertEquals( + new Result(Result.Type.PROPERTY, -1, expected, 0, 11), + EscapeSequenceParsing.parseEscape("\\P{Deseret}", 0)); + } +} diff --git a/tool-testsuite/test/org/antlr/v4/test/tool/TestFastQueue.java b/tool-testsuite/test/org/antlr/v4/test/tool/TestFastQueue.java index 926be09..3bf4f4a 100644 --- a/tool-testsuite/test/org/antlr/v4/test/tool/TestFastQueue.java +++ b/tool-testsuite/test/org/antlr/v4/test/tool/TestFastQueue.java @@ -1,5 +1,5 @@ /* - * Copyright (c) 2012-2016 The ANTLR Project. All rights reserved. + * Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. * Use of this file is governed by the BSD 3-clause license that * can be found in the LICENSE.txt file in the project root. */ diff --git a/tool-testsuite/test/org/antlr/v4/test/tool/TestGrammarParserInterpreter.java b/tool-testsuite/test/org/antlr/v4/test/tool/TestGrammarParserInterpreter.java index b15ed09..def2642 100644 --- a/tool-testsuite/test/org/antlr/v4/test/tool/TestGrammarParserInterpreter.java +++ b/tool-testsuite/test/org/antlr/v4/test/tool/TestGrammarParserInterpreter.java @@ -1,5 +1,5 @@ /* - * Copyright (c) 2012-2016 The ANTLR Project. All rights reserved. + * Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. * Use of this file is governed by the BSD 3-clause license that * can be found in the LICENSE.txt file in the project root. */ diff --git a/tool-testsuite/test/org/antlr/v4/test/tool/TestGraphNodes.java b/tool-testsuite/test/org/antlr/v4/test/tool/TestGraphNodes.java index fd7b781..cb2cfcb 100644 --- a/tool-testsuite/test/org/antlr/v4/test/tool/TestGraphNodes.java +++ b/tool-testsuite/test/org/antlr/v4/test/tool/TestGraphNodes.java @@ -1,5 +1,5 @@ /* - * Copyright (c) 2012-2016 The ANTLR Project. All rights reserved. + * Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. * Use of this file is governed by the BSD 3-clause license that * can be found in the LICENSE.txt file in the project root. */ diff --git a/tool-testsuite/test/org/antlr/v4/test/tool/TestIntervalSet.java b/tool-testsuite/test/org/antlr/v4/test/tool/TestIntervalSet.java index 1d3157b..59bcba3 100644 --- a/tool-testsuite/test/org/antlr/v4/test/tool/TestIntervalSet.java +++ b/tool-testsuite/test/org/antlr/v4/test/tool/TestIntervalSet.java @@ -1,5 +1,5 @@ /* - * Copyright (c) 2012-2016 The ANTLR Project. All rights reserved. + * Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. * Use of this file is governed by the BSD 3-clause license that * can be found in the LICENSE.txt file in the project root. */ diff --git a/tool-testsuite/test/org/antlr/v4/test/tool/TestLeftRecursionToolIssues.java b/tool-testsuite/test/org/antlr/v4/test/tool/TestLeftRecursionToolIssues.java index 1ac210c..04159db 100644 --- a/tool-testsuite/test/org/antlr/v4/test/tool/TestLeftRecursionToolIssues.java +++ b/tool-testsuite/test/org/antlr/v4/test/tool/TestLeftRecursionToolIssues.java @@ -1,5 +1,5 @@ /* - * Copyright (c) 2012-2016 The ANTLR Project. All rights reserved. + * Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. * Use of this file is governed by the BSD 3-clause license that * can be found in the LICENSE.txt file in the project root. */ diff --git a/tool-testsuite/test/org/antlr/v4/test/tool/TestLexerActions.java b/tool-testsuite/test/org/antlr/v4/test/tool/TestLexerActions.java index 526efed..069c6d6 100644 --- a/tool-testsuite/test/org/antlr/v4/test/tool/TestLexerActions.java +++ b/tool-testsuite/test/org/antlr/v4/test/tool/TestLexerActions.java @@ -1,5 +1,5 @@ /* - * Copyright (c) 2012-2016 The ANTLR Project. All rights reserved. + * Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. * Use of this file is governed by the BSD 3-clause license that * can be found in the LICENSE.txt file in the project root. */ diff --git a/tool-testsuite/test/org/antlr/v4/test/tool/TestLookaheadTrees.java b/tool-testsuite/test/org/antlr/v4/test/tool/TestLookaheadTrees.java index 3fdad47..e560fd5 100644 --- a/tool-testsuite/test/org/antlr/v4/test/tool/TestLookaheadTrees.java +++ b/tool-testsuite/test/org/antlr/v4/test/tool/TestLookaheadTrees.java @@ -1,5 +1,5 @@ /* - * Copyright (c) 2012-2016 The ANTLR Project. All rights reserved. + * Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. * Use of this file is governed by the BSD 3-clause license that * can be found in the LICENSE.txt file in the project root. */ diff --git a/tool-testsuite/test/org/antlr/v4/test/tool/TestParseTreeMatcher.java b/tool-testsuite/test/org/antlr/v4/test/tool/TestParseTreeMatcher.java index a750880..c5af500 100644 --- a/tool-testsuite/test/org/antlr/v4/test/tool/TestParseTreeMatcher.java +++ b/tool-testsuite/test/org/antlr/v4/test/tool/TestParseTreeMatcher.java @@ -1,5 +1,5 @@ /* - * Copyright (c) 2012-2016 The ANTLR Project. All rights reserved. + * Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. * Use of this file is governed by the BSD 3-clause license that * can be found in the LICENSE.txt file in the project root. */ diff --git a/tool-testsuite/test/org/antlr/v4/test/tool/TestParserExec.java b/tool-testsuite/test/org/antlr/v4/test/tool/TestParserExec.java index fc9a62f..14005c3 100644 --- a/tool-testsuite/test/org/antlr/v4/test/tool/TestParserExec.java +++ b/tool-testsuite/test/org/antlr/v4/test/tool/TestParserExec.java @@ -1,5 +1,5 @@ /* - * Copyright (c) 2012-2016 The ANTLR Project. All rights reserved. + * Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. * Use of this file is governed by the BSD 3-clause license that * can be found in the LICENSE.txt file in the project root. */ diff --git a/tool-testsuite/test/org/antlr/v4/test/tool/TestParserInterpreter.java b/tool-testsuite/test/org/antlr/v4/test/tool/TestParserInterpreter.java index 2a8952f..b424fc0 100644 --- a/tool-testsuite/test/org/antlr/v4/test/tool/TestParserInterpreter.java +++ b/tool-testsuite/test/org/antlr/v4/test/tool/TestParserInterpreter.java @@ -1,5 +1,5 @@ /* - * Copyright (c) 2012-2016 The ANTLR Project. All rights reserved. + * Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. * Use of this file is governed by the BSD 3-clause license that * can be found in the LICENSE.txt file in the project root. */ diff --git a/tool-testsuite/test/org/antlr/v4/test/tool/TestParserProfiler.java b/tool-testsuite/test/org/antlr/v4/test/tool/TestParserProfiler.java index e18aa75..a5cd58a 100644 --- a/tool-testsuite/test/org/antlr/v4/test/tool/TestParserProfiler.java +++ b/tool-testsuite/test/org/antlr/v4/test/tool/TestParserProfiler.java @@ -1,5 +1,5 @@ /* - * Copyright (c) 2012-2016 The ANTLR Project. All rights reserved. + * Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. * Use of this file is governed by the BSD 3-clause license that * can be found in the LICENSE.txt file in the project root. */ diff --git a/tool-testsuite/test/org/antlr/v4/test/tool/TestPerformance.java b/tool-testsuite/test/org/antlr/v4/test/tool/TestPerformance.java index 70ee764..b5bd606 100644 --- a/tool-testsuite/test/org/antlr/v4/test/tool/TestPerformance.java +++ b/tool-testsuite/test/org/antlr/v4/test/tool/TestPerformance.java @@ -1,5 +1,5 @@ /* - * Copyright (c) 2012-2016 The ANTLR Project. All rights reserved. + * Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. * Use of this file is governed by the BSD 3-clause license that * can be found in the LICENSE.txt file in the project root. */ @@ -33,6 +33,7 @@ import org.antlr.v4.runtime.atn.PredictionMode; import org.antlr.v4.runtime.dfa.DFA; import org.antlr.v4.runtime.dfa.DFAState; import org.antlr.v4.runtime.misc.Interval; +import org.antlr.v4.runtime.misc.MurmurHash; import org.antlr.v4.runtime.misc.ParseCancellationException; import org.antlr.v4.runtime.tree.ErrorNode; import org.antlr.v4.runtime.tree.ParseTree; @@ -76,9 +77,8 @@ import java.util.concurrent.atomic.AtomicInteger; import java.util.concurrent.atomic.AtomicIntegerArray; import java.util.logging.Level; import java.util.logging.Logger; -import java.util.zip.CRC32; -import java.util.zip.Checksum; +import static org.antlr.v4.test.runtime.BaseRuntimeTest.writeFile; import static org.hamcrest.CoreMatchers.instanceOf; import static org.junit.Assert.assertThat; import static org.junit.Assert.assertTrue; @@ -825,7 +825,7 @@ public class TestPerformance extends BaseJavaToolTest { results.add(futureChecksum); } - Checksum checksum = new CRC32(); + MurmurHashChecksum checksum = new MurmurHashChecksum(); int currentIndex = -1; for (Future<FileParseResult> future : results) { currentIndex++; @@ -1115,14 +1115,11 @@ public class TestPerformance extends BaseJavaToolTest { assertTrue(success); } - private static void updateChecksum(Checksum checksum, int value) { - checksum.update((value) & 0xFF); - checksum.update((value >>> 8) & 0xFF); - checksum.update((value >>> 16) & 0xFF); - checksum.update((value >>> 24) & 0xFF); + private static void updateChecksum(MurmurHashChecksum checksum, int value) { + checksum.update(value); } - private static void updateChecksum(Checksum checksum, Token token) { + private static void updateChecksum(MurmurHashChecksum checksum, Token token) { if (token == null) { checksum.update(0); return; @@ -1154,7 +1151,7 @@ public class TestPerformance extends BaseJavaToolTest { @Override public FileParseResult parseFile(CharStream input, int currentPass, int thread) { - final Checksum checksum = new CRC32(); + final MurmurHashChecksum checksum = new MurmurHashChecksum(); final long startTime = System.nanoTime(); assert thread >= 0 && thread < NUMBER_OF_THREADS; @@ -1292,7 +1289,7 @@ public class TestPerformance extends BaseJavaToolTest { throw ex; } - tokens.reset(); + tokens.seek(0); if (REUSE_PARSER && parser != null) { parser.setInputStream(tokens); } else { @@ -1835,9 +1832,9 @@ public class TestPerformance extends BaseJavaToolTest { private static final int ENTER_RULE = 3; private static final int EXIT_RULE = 4; - private final Checksum checksum; + private final MurmurHashChecksum checksum; - public ChecksumParseTreeListener(Checksum checksum) { + public ChecksumParseTreeListener(MurmurHashChecksum checksum) { this.checksum = checksum; } @@ -1928,6 +1925,24 @@ public class TestPerformance extends BaseJavaToolTest { } } + private static class MurmurHashChecksum { + private int value; + private int count; + + public MurmurHashChecksum() { + this.value = MurmurHash.initialize(); + } + + public void update(int value) { + this.value = MurmurHash.update(this.value, value); + this.count++; + } + + public int getValue() { + return MurmurHash.finish(value, count); + } + } + @Test(timeout = 20000) public void testExponentialInclude() { String grammarFormat = diff --git a/tool-testsuite/test/org/antlr/v4/test/tool/TestScopeParsing.java b/tool-testsuite/test/org/antlr/v4/test/tool/TestScopeParsing.java index 6c8f589..dda7574 100644 --- a/tool-testsuite/test/org/antlr/v4/test/tool/TestScopeParsing.java +++ b/tool-testsuite/test/org/antlr/v4/test/tool/TestScopeParsing.java @@ -1,5 +1,5 @@ /* - * Copyright (c) 2012-2016 The ANTLR Project. All rights reserved. + * Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. * Use of this file is governed by the BSD 3-clause license that * can be found in the LICENSE.txt file in the project root. */ diff --git a/tool-testsuite/test/org/antlr/v4/test/tool/TestSymbolIssues.java b/tool-testsuite/test/org/antlr/v4/test/tool/TestSymbolIssues.java index 6d56775..ad53c03 100644 --- a/tool-testsuite/test/org/antlr/v4/test/tool/TestSymbolIssues.java +++ b/tool-testsuite/test/org/antlr/v4/test/tool/TestSymbolIssues.java @@ -1,5 +1,5 @@ /* - * Copyright (c) 2012-2016 The ANTLR Project. All rights reserved. + * Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. * Use of this file is governed by the BSD 3-clause license that * can be found in the LICENSE.txt file in the project root. */ @@ -291,21 +291,36 @@ public class TestSymbolIssues extends BaseJavaToolTest { String[] test = { "grammar L;\n" + "\n" + - "rule1 // Correct (Alternatives)\n" + - " : t1 = a #aLabel\n" + - " | t1 = b #bLabel\n" + + "rule1 // Correct (Alternatives)\n" + + " : t1=a #aLabel\n" + + " | t1=b #bLabel\n" + " ;\n" + "rule2 //Incorrect type casting in generated code (RULE_LABEL)\n" + - " : t2 = a | t2 = b\n" + + " : t2=a | t2=b\n" + " ;\n" + "rule3\n" + - " : t3 += a+ b t3 += c+ //Incorrect type casting in generated code (RULE_LIST_LABEL)\n" + + " : t3+=a+ b t3+=c+ //Incorrect type casting in generated code (RULE_LIST_LABEL)\n" + " ;\n" + "rule4\n" + - " : a t4 = A b t4 = B c // Correct (TOKEN_LABEL)\n" + + " : a t4=A b t4=B c // Correct (TOKEN_LABEL)\n" + " ;\n" + "rule5\n" + - " : a t5 += A b t5 += B c // Correct (TOKEN_LIST_LABEL)\n" + + " : a t5+=A b t5+=B c // Correct (TOKEN_LIST_LABEL)\n" + + " ;\n" + + "rule6 // Correct (https://github.com/antlr/antlr4/issues/1543)\n" + + " : t6=a #t6_1_Label\n" + + " | t6=rule6 b (t61=c)? t62=rule6 #t6_2_Label\n" + + " | t6=A a (t61=B)? t62=A #t6_3_Label\n" + + " ;\n" + + "rule7 // Incorrect (https://github.com/antlr/antlr4/issues/1543)\n" + + " : a\n" + + " | t7=rule7 b (t71=c)? t72=rule7 \n" + + " | t7=A a (t71=B)? t72=A \n" + + " ;\n" + + "rule8 // Correct (https://github.com/antlr/antlr4/issues/1543)\n" + + " : a\n" + + " | t8=rule8 a t8=rule8\n" + + " | t8=rule8 b t8=rule8\n" + " ;\n" + "a: A;\n" + "b: B;\n" + @@ -314,8 +329,54 @@ public class TestSymbolIssues extends BaseJavaToolTest { "B: 'b';\n" + "C: 'c';\n", - "error(" + ErrorType.LABEL_TYPE_CONFLICT.code + "): L.g4:8:15: label t2=b type mismatch with previous definition: t2=a\n" + - "error(" + ErrorType.LABEL_TYPE_CONFLICT.code + "): L.g4:11:17: label t3+=c type mismatch with previous definition: t3+=a\n" + "error(" + ErrorType.LABEL_TYPE_CONFLICT.code + "): L.g4:8:13: label t2=b type mismatch with previous definition: t2=a\n" + + "error(" + ErrorType.LABEL_TYPE_CONFLICT.code + "): L.g4:11:15: label t3+=c type mismatch with previous definition: t3+=a\n" + + + "error(" + ErrorType.LABEL_TYPE_CONFLICT.code + "): L.g4:24:0: label t7 type mismatch with previous definition: TOKEN_LABEL!=RULE_LABEL\n" + + "error(" + ErrorType.LABEL_TYPE_CONFLICT.code + "): L.g4:24:0: label t71 type mismatch with previous definition: RULE_LABEL!=TOKEN_LABEL\n" + + "error(" + ErrorType.LABEL_TYPE_CONFLICT.code + "): L.g4:24:0: label t72 type mismatch with previous definition: RULE_LABEL!=TOKEN_LABEL\n" + }; + + testErrors(test, false); + } + + // https://github.com/antlr/antlr4/issues/1543 + @Test public void testLabelsForTokensWithMixedTypesLRWithLabels() { + String[] test = { + "grammar L;\n" + + "\n" + + "expr\n" + + " : left=A '+' right=A #primary\n" + + " | left=expr '-' right=expr #sub\n" + + " ;\n" + + "\n" + + "A: 'a';\n" + + "B: 'b';\n" + + "C: 'c';\n", + + "" + }; + + testErrors(test, false); + } + + // https://github.com/antlr/antlr4/issues/1543 + @Test + public void testLabelsForTokensWithMixedTypesLRWithoutLabels() { + String[] test = { + "grammar L;\n" + + "\n" + + "expr\n" + + " : left=A '+' right=A\n" + + " | left=expr '-' right=expr\n" + + " ;\n" + + "\n" + + "A: 'a';\n" + + "B: 'b';\n" + + "C: 'c';\n", + + "error(" + ErrorType.LABEL_TYPE_CONFLICT.code + "): L.g4:3:0: label left type mismatch with previous definition: TOKEN_LABEL!=RULE_LABEL\n" + + "error(" + ErrorType.LABEL_TYPE_CONFLICT.code + "): L.g4:3:0: label right type mismatch with previous definition: RULE_LABEL!=TOKEN_LABEL\n" }; testErrors(test, false); @@ -327,12 +388,50 @@ public class TestSymbolIssues extends BaseJavaToolTest { "TOKEN_RANGE: [aa-f];\n" + "TOKEN_RANGE_2: [A-FD-J];\n" + "TOKEN_RANGE_3: 'Z' | 'K'..'R' | 'O'..'V';\n" + - "TOKEN_RANGE_4: 'g'..'l' | [g-l];\n", // Handling in ATNOptimizer. + "TOKEN_RANGE_4: 'g'..'l' | [g-l];\n" + + "TOKEN_RANGE_WITHOUT_COLLISION: '_' | [a-zA-Z];\n" + + "TOKEN_RANGE_WITH_ESCAPED_CHARS: [\\n-\\r] | '\\n'..'\\r';", + + "warning(" + ErrorType.CHARACTERS_COLLISION_IN_SET.code + "): L.g4:2:18: chars 'a'..'f' used multiple times in set [aa-f]\n" + + "warning(" + ErrorType.CHARACTERS_COLLISION_IN_SET.code + "): L.g4:3:18: chars 'D'..'J' used multiple times in set [A-FD-J]\n" + + "warning(" + ErrorType.CHARACTERS_COLLISION_IN_SET.code + "): L.g4:4:13: chars 'O'..'V' used multiple times in set 'Z' | 'K'..'R' | 'O'..'V'\n" + + "warning(" + ErrorType.CHARACTERS_COLLISION_IN_SET.code + "): L.g4::: chars 'g' used multiple times in set 'g'..'l'\n" + + "warning(" + ErrorType.CHARACTERS_COLLISION_IN_SET.code + "): L.g4::: chars '\\n' used multiple times in set '\\n'..'\\r'\n" + }; - "warning(" + ErrorType.CHARACTERS_COLLISION_IN_SET.code + "): L.g4:2:18: chars \"a-f\" used multiple times in set [aa-f]\n" + - "warning(" + ErrorType.CHARACTERS_COLLISION_IN_SET.code + "): L.g4:3:18: chars \"D-J\" used multiple times in set [A-FD-J]\n" + - "warning(" + ErrorType.CHARACTERS_COLLISION_IN_SET.code + "): L.g4:4:13: chars \"O-V\" used multiple times in set 'Z' | 'K'..'R' | 'O'..'V'\n" + - "warning(" + ErrorType.CHARACTERS_COLLISION_IN_SET.code + "): L.g4::: chars \"g-l\" used multiple times in set [g-l]\n" + testErrors(test, false); + } + + @Test public void testUnreachableTokens() { + String[] test = { + "lexer grammar Test;\n" + + "TOKEN1: 'as' 'df' | 'qwer';\n" + + "TOKEN2: [0-9];\n" + + "TOKEN3: 'asdf';\n" + + "TOKEN4: 'q' 'w' 'e' 'r' | A;\n" + + "TOKEN5: 'aaaa';\n" + + "TOKEN6: 'asdf';\n" + + "TOKEN7: 'qwer'+;\n" + + "TOKEN8: 'a' 'b' | 'b' | 'a' 'b';\n" + + "fragment\n" + + "TOKEN9: 'asdf' | 'qwer' | 'qwer';\n" + + "TOKEN10: '\\r\\n' | '\\r\\n';\n" + + "TOKEN11: '\\r\\n';\n" + + "\n" + + "mode MODE1;\n" + + "TOKEN12: 'asdf';\n" + + "\n" + + "fragment A: 'A';", + + "warning(" + ErrorType.TOKEN_UNREACHABLE.code + "): Test.g4:4:0: One of the token TOKEN3 values unreachable. asdf is always overlapped by token TOKEN1\n" + + "warning(" + ErrorType.TOKEN_UNREACHABLE.code + "): Test.g4:5:0: One of the token TOKEN4 values unreachable. qwer is always overlapped by token TOKEN1\n" + + "warning(" + ErrorType.TOKEN_UNREACHABLE.code + "): Test.g4:7:0: One of the token TOKEN6 values unreachable. asdf is always overlapped by token TOKEN1\n" + + "warning(" + ErrorType.TOKEN_UNREACHABLE.code + "): Test.g4:7:0: One of the token TOKEN6 values unreachable. asdf is always overlapped by token TOKEN3\n" + + "warning(" + ErrorType.TOKEN_UNREACHABLE.code + "): Test.g4:9:0: One of the token TOKEN8 values unreachable. ab is always overlapped by token TOKEN8\n" + + "warning(" + ErrorType.TOKEN_UNREACHABLE.code + "): Test.g4:11:0: One of the token TOKEN9 values unreachable. qwer is always overlapped by token TOKEN9\n" + + "warning(" + ErrorType.TOKEN_UNREACHABLE.code + "): Test.g4:12:0: One of the token TOKEN10 values unreachable. \\r\\n is always overlapped by token TOKEN10\n" + + "warning(" + ErrorType.TOKEN_UNREACHABLE.code + "): Test.g4:13:0: One of the token TOKEN11 values unreachable. \\r\\n is always overlapped by token TOKEN10\n" + + "warning(" + ErrorType.TOKEN_UNREACHABLE.code + "): Test.g4:13:0: One of the token TOKEN11 values unreachable. \\r\\n is always overlapped by token TOKEN10\n" }; testErrors(test, false); diff --git a/tool-testsuite/test/org/antlr/v4/test/tool/TestTokenPositionOptions.java b/tool-testsuite/test/org/antlr/v4/test/tool/TestTokenPositionOptions.java index c6af3ee..a7527d7 100644 --- a/tool-testsuite/test/org/antlr/v4/test/tool/TestTokenPositionOptions.java +++ b/tool-testsuite/test/org/antlr/v4/test/tool/TestTokenPositionOptions.java @@ -1,5 +1,5 @@ /* - * Copyright (c) 2012-2016 The ANTLR Project. All rights reserved. + * Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. * Use of this file is governed by the BSD 3-clause license that * can be found in the LICENSE.txt file in the project root. */ diff --git a/tool-testsuite/test/org/antlr/v4/test/tool/TestTokenTypeAssignment.java b/tool-testsuite/test/org/antlr/v4/test/tool/TestTokenTypeAssignment.java index c338eec..4c8d7fe 100644 --- a/tool-testsuite/test/org/antlr/v4/test/tool/TestTokenTypeAssignment.java +++ b/tool-testsuite/test/org/antlr/v4/test/tool/TestTokenTypeAssignment.java @@ -1,5 +1,5 @@ /* - * Copyright (c) 2012-2016 The ANTLR Project. All rights reserved. + * Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. * Use of this file is governed by the BSD 3-clause license that * can be found in the LICENSE.txt file in the project root. */ @@ -141,6 +141,24 @@ public class TestTokenTypeAssignment extends BaseJavaToolTest { assertEquals("'\\n'", literals.toArray()[0]); } + @Test public void testParserCharLiteralWithBasicUnicodeEscape() throws Exception { + Grammar g = new Grammar( + "grammar t;\n"+ + "a : '\\uABCD';\n"); + Set<?> literals = g.stringLiteralToTypeMap.keySet(); + // must store literals how they appear in the antlr grammar + assertEquals("'\\uABCD'", literals.toArray()[0]); + } + + @Test public void testParserCharLiteralWithExtendedUnicodeEscape() throws Exception { + Grammar g = new Grammar( + "grammar t;\n"+ + "a : '\\u{1ABCD}';\n"); + Set<?> literals = g.stringLiteralToTypeMap.keySet(); + // must store literals how they appear in the antlr grammar + assertEquals("'\\u{1ABCD}'", literals.toArray()[0]); + } + protected void checkSymbols(Grammar g, String rulesStr, String allValidTokensStr) diff --git a/tool-testsuite/test/org/antlr/v4/test/tool/TestToolSyntaxErrors.java b/tool-testsuite/test/org/antlr/v4/test/tool/TestToolSyntaxErrors.java index 639c638..a8bb864 100644 --- a/tool-testsuite/test/org/antlr/v4/test/tool/TestToolSyntaxErrors.java +++ b/tool-testsuite/test/org/antlr/v4/test/tool/TestToolSyntaxErrors.java @@ -1,5 +1,5 @@ /* - * Copyright (c) 2012-2016 The ANTLR Project. All rights reserved. + * Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. * Use of this file is governed by the BSD 3-clause license that * can be found in the LICENSE.txt file in the project root. */ @@ -8,6 +8,7 @@ package org.antlr.v4.test.tool; import org.antlr.v4.Tool; import org.antlr.v4.tool.ErrorType; +import org.junit.Assert; import org.junit.Before; import org.junit.Test; @@ -60,6 +61,16 @@ public class TestToolSyntaxErrors extends BaseJavaToolTest { super.testSetUp(); } + @Test + public void AllErrorCodesDistinct() { + ErrorType[] errorTypes = ErrorType.class.getEnumConstants(); + for (int i = 0; i < errorTypes.length; i++) { + for (int j = i + 1; j < errorTypes.length; j++) { + Assert.assertNotEquals(errorTypes[i].code, errorTypes[j].code); + } + } + } + @Test public void testA() { super.testErrors(A, true); } @Test public void testExtraColon() { @@ -258,11 +269,11 @@ public class TestToolSyntaxErrors extends BaseJavaToolTest { "grammar A;\n" + "tokens{Foo}\n" + "b : Foo ;\n" + - "X : 'foo' -> popmode;\n" + // "meant" to use -> popMode - "Y : 'foo' -> token(Foo);", // "meant" to use -> type(Foo) + "X : 'foo1' -> popmode;\n" + // "meant" to use -> popMode + "Y : 'foo2' -> token(Foo);", // "meant" to use -> type(Foo) - "error(" + ErrorType.INVALID_LEXER_COMMAND.code + "): A.g4:4:13: lexer command popmode does not exist or is not supported by the current target\n" + - "error(" + ErrorType.INVALID_LEXER_COMMAND.code + "): A.g4:5:13: lexer command token does not exist or is not supported by the current target\n" + "error(" + ErrorType.INVALID_LEXER_COMMAND.code + "): A.g4:4:14: lexer command popmode does not exist or is not supported by the current target\n" + + "error(" + ErrorType.INVALID_LEXER_COMMAND.code + "): A.g4:5:14: lexer command token does not exist or is not supported by the current target\n" }; super.testErrors(pair, true); } @@ -272,11 +283,11 @@ public class TestToolSyntaxErrors extends BaseJavaToolTest { "grammar A;\n" + "tokens{Foo}\n" + "b : Foo ;\n" + - "X : 'foo' -> popMode(Foo);\n" + // "meant" to use -> popMode - "Y : 'foo' -> type;", // "meant" to use -> type(Foo) + "X : 'foo1' -> popMode(Foo);\n" + // "meant" to use -> popMode + "Y : 'foo2' -> type;", // "meant" to use -> type(Foo) - "error(" + ErrorType.UNWANTED_LEXER_COMMAND_ARGUMENT.code + "): A.g4:4:13: lexer command popMode does not take any arguments\n" + - "error(" + ErrorType.MISSING_LEXER_COMMAND_ARGUMENT.code + "): A.g4:5:13: missing argument for lexer command type\n" + "error(" + ErrorType.UNWANTED_LEXER_COMMAND_ARGUMENT.code + "): A.g4:4:14: lexer command popMode does not take any arguments\n" + + "error(" + ErrorType.MISSING_LEXER_COMMAND_ARGUMENT.code + "): A.g4:5:14: missing argument for lexer command type\n" }; super.testErrors(pair, true); } @@ -433,13 +444,33 @@ public class TestToolSyntaxErrors extends BaseJavaToolTest { } /** + * This is a regression test for https://github.com/antlr/antlr4/issues/1815 + * "Null ptr exception in SqlBase.g4" + */ + @Test public void testDoubleQuoteInTwoStringLiterals() { + String grammar = + "lexer grammar A;\n" + + "STRING : '\\\"' '\\\"' 'x' ;"; + String expected = + "warning(" + ErrorType.INVALID_ESCAPE_SEQUENCE.code + "): A.g4:2:10: invalid escape sequence \\\"\n"+ + "warning(" + ErrorType.INVALID_ESCAPE_SEQUENCE.code + "): A.g4:2:15: invalid escape sequence \\\"\n"; + + String[] pair = new String[] { + grammar, + expected + }; + + super.testErrors(pair, true); + } + + /** * This test ensures that the {@link ErrorType#INVALID_ESCAPE_SEQUENCE} * error is not reported for escape sequences that are known to be valid. */ @Test public void testValidEscapeSequences() { String grammar = "lexer grammar A;\n" + - "NORMAL_ESCAPE : '\\b \\t \\n \\f \\r \\\" \\' \\\\';\n" + + "NORMAL_ESCAPE : '\\b \\t \\n \\f \\r \\' \\\\';\n" + "UNICODE_ESCAPE : '\\u0001 \\u00A1 \\u00a1 \\uaaaa \\uAAAA';\n"; String expected = ""; @@ -462,9 +493,10 @@ public class TestToolSyntaxErrors extends BaseJavaToolTest { "lexer grammar A;\n" + "RULE : 'Foo \\uAABG \\x \\u';\n"; String expected = - "error(" + ErrorType.INVALID_ESCAPE_SEQUENCE.code + "): A.g4:2:12: invalid escape sequence\n" + - "error(" + ErrorType.INVALID_ESCAPE_SEQUENCE.code + "): A.g4:2:19: invalid escape sequence\n" + - "error(" + ErrorType.INVALID_ESCAPE_SEQUENCE.code + "): A.g4:2:22: invalid escape sequence\n"; + "warning(" + ErrorType.INVALID_ESCAPE_SEQUENCE.code + "): A.g4:2:12: invalid escape sequence \\uAABG\n" + + "warning(" + ErrorType.INVALID_ESCAPE_SEQUENCE.code + "): A.g4:2:19: invalid escape sequence \\x\n" + + "warning(" + ErrorType.INVALID_ESCAPE_SEQUENCE.code + "): A.g4:2:22: invalid escape sequence \\u\n" + + "warning("+ErrorType.EPSILON_TOKEN.code+"): A.g4:2:0: non-fragment lexer rule RULE can match the empty string\n"; String[] pair = new String[] { grammar, @@ -501,25 +533,70 @@ public class TestToolSyntaxErrors extends BaseJavaToolTest { super.testErrors(pair, true); } - @Test public void testInvalidCharSetAndRange() { + @Test public void testInvalidCharSetsAndStringLiterals() { String grammar = "lexer grammar Test;\n" + - "INVALID_RANGE: 'GH'..'LM';\n" + - "INVALID_RANGE_2: 'F'..'A' | 'Z';\n" + - "VALID_STRING_LITERALS: '\\u1234' | '\\t' | [\\-\\]];\n" + - "INVALID_CHAR_SET: [f-az][];\n" + - "INVALID_CHAR_SET_2: [\\u24\\uA2][\\u24];\n" + //https://github.com/antlr/antlr4/issues/1077 - "INVALID_CHAR_SET_3: [\\t\\{];"; + "INVALID_STRING_LITERAL: '\\\"' | '\\]' | '\\u24';\n" + + "INVALID_STRING_LITERAL_RANGE: 'GH'..'LM';\n" + + "INVALID_CHAR_SET: [\\u24\\uA2][\\{];\n" + //https://github.com/antlr/antlr4/issues/1077 + "EMPTY_STRING_LITERAL_RANGE: 'F'..'A' | 'Z';\n" + + "EMPTY_CHAR_SET: [f-az][];\n" + + "START_HYPHEN_IN_CHAR_SET: [-z];\n" + + "END_HYPHEN_IN_CHAR_SET: [a-];\n" + + "SINGLE_HYPHEN_IN_CHAR_SET: [-];\n" + + "VALID_STRING_LITERALS: '\\u1234' | '\\t' | '\\'';\n" + + "VALID_CHAR_SET: [`\\-=\\]];"; String expected = - "error(" + ErrorType.INVALID_LITERAL_IN_LEXER_SET.code + "): Test.g4:2:23: multi-character literals are not allowed in lexer sets: 'GH'\n" + - "error(" + ErrorType.INVALID_LITERAL_IN_LEXER_SET.code + "): Test.g4:2:29: multi-character literals are not allowed in lexer sets: 'LM'\n" + - "error(" + ErrorType.EMPTY_STRINGS_AND_SETS_NOT_ALLOWED.code + "): Test.g4:3:26: string literals and sets cannot be empty: 'F'..'A'\n" + - "error(" + ErrorType.EMPTY_STRINGS_AND_SETS_NOT_ALLOWED.code + "): Test.g4:5:23: string literals and sets cannot be empty: [f-a]\n" + - "error(" + ErrorType.EMPTY_STRINGS_AND_SETS_NOT_ALLOWED.code + "): Test.g4:5:29: string literals and sets cannot be empty: []\n" + - "error(" + ErrorType.INVALID_ESCAPE_SEQUENCE.code + "): Test.g4:6:23: invalid escape sequence\n" + - "error(" + ErrorType.INVALID_ESCAPE_SEQUENCE.code + "): Test.g4:6:33: invalid escape sequence\n" + - "error(" + ErrorType.INVALID_ESCAPE_SEQUENCE.code + "): Test.g4:7:23: invalid escape sequence\n"; + "warning(" + ErrorType.INVALID_ESCAPE_SEQUENCE.code + "): Test.g4:2:31: invalid escape sequence \\\"\n" + + "warning(" + ErrorType.INVALID_ESCAPE_SEQUENCE.code + "): Test.g4:2:38: invalid escape sequence \\]\n" + + "warning(" + ErrorType.INVALID_ESCAPE_SEQUENCE.code + "): Test.g4:2:45: invalid escape sequence \\u24\n" + + "error(" + ErrorType.INVALID_LITERAL_IN_LEXER_SET.code + "): Test.g4:3:30: multi-character literals are not allowed in lexer sets: 'GH'\n" + + "error(" + ErrorType.INVALID_LITERAL_IN_LEXER_SET.code + "): Test.g4:3:36: multi-character literals are not allowed in lexer sets: 'LM'\n" + + "warning(" + ErrorType.INVALID_ESCAPE_SEQUENCE.code + "): Test.g4:4:30: invalid escape sequence \\u24\\u\n" + + "warning(" + ErrorType.INVALID_ESCAPE_SEQUENCE.code + "): Test.g4:4:40: invalid escape sequence \\{\n" + + "error(" + ErrorType.EMPTY_STRINGS_AND_SETS_NOT_ALLOWED.code + "): Test.g4:5:33: string literals and sets cannot be empty: 'F'..'A'\n" + + "error(" + ErrorType.EMPTY_STRINGS_AND_SETS_NOT_ALLOWED.code + "): Test.g4:6:30: string literals and sets cannot be empty: 'f'..'a'\n" + + "error(" + ErrorType.EMPTY_STRINGS_AND_SETS_NOT_ALLOWED.code + "): Test.g4:6:36: string literals and sets cannot be empty: []\n" + + "warning("+ ErrorType.EPSILON_TOKEN.code + "): Test.g4:2:0: non-fragment lexer rule INVALID_STRING_LITERAL can match the empty string\n"; + + String[] pair = new String[] { + grammar, + expected + }; + + super.testErrors(pair, true); + } + + @Test public void testInvalidUnicodeEscapesInCharSet() { + String grammar = + "lexer grammar Test;\n" + + "INVALID_EXTENDED_UNICODE_EMPTY: [\\u{}];\n" + + "INVALID_EXTENDED_UNICODE_NOT_TERMINATED: [\\u{];\n" + + "INVALID_EXTENDED_UNICODE_TOO_LONG: [\\u{110000}];\n" + + "INVALID_UNICODE_PROPERTY_EMPTY: [\\p{}];\n" + + "INVALID_UNICODE_PROPERTY_NOT_TERMINATED: [\\p{];\n" + + "INVALID_INVERTED_UNICODE_PROPERTY_EMPTY: [\\P{}];\n" + + "INVALID_UNICODE_PROPERTY_UNKNOWN: [\\p{NotAProperty}];\n" + + "INVALID_INVERTED_UNICODE_PROPERTY_UNKNOWN: [\\P{NotAProperty}];\n" + + "UNICODE_PROPERTY_NOT_ALLOWED_IN_RANGE: [\\p{Uppercase_Letter}-\\p{Lowercase_Letter}];\n" + + "UNICODE_PROPERTY_NOT_ALLOWED_IN_RANGE_2: [\\p{Letter}-Z];\n" + + "UNICODE_PROPERTY_NOT_ALLOWED_IN_RANGE_3: [A-\\p{Number}];\n" + + "INVERTED_UNICODE_PROPERTY_NOT_ALLOWED_IN_RANGE: [\\P{Uppercase_Letter}-\\P{Number}];\n"; + + String expected = + "warning(" + ErrorType.INVALID_ESCAPE_SEQUENCE.code + "): Test.g4:2:32: invalid escape sequence \\u{}\n"+ + "warning(" + ErrorType.INVALID_ESCAPE_SEQUENCE.code + "): Test.g4:3:41: invalid escape sequence \\u{\n"+ + "warning(" + ErrorType.INVALID_ESCAPE_SEQUENCE.code + "): Test.g4:4:35: invalid escape sequence \\u{110\n"+ + "warning(" + ErrorType.INVALID_ESCAPE_SEQUENCE.code + "): Test.g4:5:32: invalid escape sequence \\p{}\n"+ + "warning(" + ErrorType.INVALID_ESCAPE_SEQUENCE.code + "): Test.g4:6:41: invalid escape sequence \\p{\n"+ + "warning(" + ErrorType.INVALID_ESCAPE_SEQUENCE.code + "): Test.g4:7:41: invalid escape sequence \\P{}\n"+ + "warning(" + ErrorType.INVALID_ESCAPE_SEQUENCE.code + "): Test.g4:8:34: invalid escape sequence \\p{NotAProperty}\n"+ + "warning(" + ErrorType.INVALID_ESCAPE_SEQUENCE.code + "): Test.g4:9:43: invalid escape sequence \\P{NotAProperty}\n"+ + "error(" + ErrorType.UNICODE_PROPERTY_NOT_ALLOWED_IN_RANGE.code + "): Test.g4:10:39: unicode property escapes not allowed in lexer charset range: [\\p{Uppercase_Letter}-\\p{Lowercase_Letter}]\n" + + "error(" + ErrorType.UNICODE_PROPERTY_NOT_ALLOWED_IN_RANGE.code + "): Test.g4:11:41: unicode property escapes not allowed in lexer charset range: [\\p{Letter}-Z]\n" + + "error(" + ErrorType.UNICODE_PROPERTY_NOT_ALLOWED_IN_RANGE.code + "): Test.g4:12:41: unicode property escapes not allowed in lexer charset range: [A-\\p{Number}]\n" + + "error(" + ErrorType.UNICODE_PROPERTY_NOT_ALLOWED_IN_RANGE.code + "): Test.g4:13:48: unicode property escapes not allowed in lexer charset range: [\\P{Uppercase_Letter}-\\P{Number}]\n"; String[] pair = new String[] { grammar, @@ -729,4 +806,20 @@ public class TestToolSyntaxErrors extends BaseJavaToolTest { String[] pair = { grammar, expected }; super.testErrors(pair, true); } + + // Test for https://github.com/antlr/antlr4/issues/1556 + @Test public void testRangeInParserGrammar() { + String grammar = + "grammar T;\n"+ + "a: 'A'..'Z' ;\n"; + String expected = + "error(" + ErrorType.TOKEN_RANGE_IN_PARSER.code + "): T.g4:2:4: token ranges not allowed in parser: 'A'..'Z'\n"; + + String[] pair = new String[] { + grammar, + expected + }; + + super.testErrors(pair, true); + } } diff --git a/tool-testsuite/test/org/antlr/v4/test/tool/TestTopologicalSort.java b/tool-testsuite/test/org/antlr/v4/test/tool/TestTopologicalSort.java index 2c0b596..7e3dfef 100644 --- a/tool-testsuite/test/org/antlr/v4/test/tool/TestTopologicalSort.java +++ b/tool-testsuite/test/org/antlr/v4/test/tool/TestTopologicalSort.java @@ -1,5 +1,5 @@ /* - * Copyright (c) 2012-2016 The ANTLR Project. All rights reserved. + * Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. * Use of this file is governed by the BSD 3-clause license that * can be found in the LICENSE.txt file in the project root. */ diff --git a/tool-testsuite/test/org/antlr/v4/test/tool/TestUnbufferedCharStream.java b/tool-testsuite/test/org/antlr/v4/test/tool/TestUnbufferedCharStream.java index fec0936..11f781d 100644 --- a/tool-testsuite/test/org/antlr/v4/test/tool/TestUnbufferedCharStream.java +++ b/tool-testsuite/test/org/antlr/v4/test/tool/TestUnbufferedCharStream.java @@ -1,5 +1,5 @@ /* - * Copyright (c) 2012-2016 The ANTLR Project. All rights reserved. + * Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. * Use of this file is governed by the BSD 3-clause license that * can be found in the LICENSE.txt file in the project root. */ @@ -313,6 +313,30 @@ public class TestUnbufferedCharStream extends BaseJavaToolTest { assertEquals(expecting, tokens.getTokens().toString()); } + @Test public void testUnicodeSMP() throws Exception { + TestingUnbufferedCharStream input = createStream("\uD83C\uDF0E"); + assertEquals(0x1F30E, input.LA(1)); + assertEquals("\uD83C\uDF0E", input.getBuffer()); + input.consume(); + assertEquals(IntStream.EOF, input.LA(1)); + assertEquals("\uFFFF", input.getBuffer()); + } + + @Test(expected = RuntimeException.class) + public void testDanglingHighSurrogateAtEOFThrows() throws Exception { + createStream("\uD83C"); + } + + @Test(expected = RuntimeException.class) + public void testDanglingHighSurrogateThrows() throws Exception { + createStream("\uD83C\u0123"); + } + + @Test(expected = RuntimeException.class) + public void testDanglingLowSurrogateThrows() throws Exception { + createStream("\uDF0E"); + } + protected static TestingUnbufferedCharStream createStream(String text) { return new TestingUnbufferedCharStream(new StringReader(text)); } @@ -336,7 +360,13 @@ public class TestUnbufferedCharStream extends BaseJavaToolTest { */ public String getRemainingBuffer() { if ( n==0 ) return ""; - return new String(data,p,n-p); + int len = n; + if (data[len-1] == IntStream.EOF) { + // Don't pass -1 to new String(). + return new String(data,p,len-p-1) + "\uFFFF"; + } else { + return new String(data,p,len-p); + } } /** For testing. What's in moving window buffer into data stream. @@ -344,7 +374,14 @@ public class TestUnbufferedCharStream extends BaseJavaToolTest { */ public String getBuffer() { if ( n==0 ) return ""; - return new String(data,0,n); + int len = n; + // Don't pass -1 to new String(). + if (data[len-1] == IntStream.EOF) { + // Don't pass -1 to new String(). + return new String(data,0,len-1) + "\uFFFF"; + } else { + return new String(data,0,len); + } } } diff --git a/tool-testsuite/test/org/antlr/v4/test/tool/TestUnbufferedTokenStream.java b/tool-testsuite/test/org/antlr/v4/test/tool/TestUnbufferedTokenStream.java index 5d9795d..700fe74 100644 --- a/tool-testsuite/test/org/antlr/v4/test/tool/TestUnbufferedTokenStream.java +++ b/tool-testsuite/test/org/antlr/v4/test/tool/TestUnbufferedTokenStream.java @@ -1,5 +1,5 @@ /* - * Copyright (c) 2012-2016 The ANTLR Project. All rights reserved. + * Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. * Use of this file is governed by the BSD 3-clause license that * can be found in the LICENSE.txt file in the project root. */ diff --git a/tool-testsuite/test/org/antlr/v4/test/tool/TestUnicodeData.java b/tool-testsuite/test/org/antlr/v4/test/tool/TestUnicodeData.java new file mode 100644 index 0000000..231c5af --- /dev/null +++ b/tool-testsuite/test/org/antlr/v4/test/tool/TestUnicodeData.java @@ -0,0 +1,220 @@ +/* + * Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +package org.antlr.v4.test.tool; + +import java.util.Map; + +import org.antlr.v4.unicode.UnicodeData; +import org.antlr.v4.runtime.misc.IntervalSet; + +import org.junit.Test; +import org.junit.Rule; +import org.junit.rules.ExpectedException; + +import static org.junit.Assert.assertFalse; +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertTrue; + +public class TestUnicodeData { + @Rule + public ExpectedException thrown = ExpectedException.none(); + + @Test + public void testUnicodeGeneralCategoriesLatin() { + assertTrue(UnicodeData.getPropertyCodePoints("Lu").contains('X')); + assertFalse(UnicodeData.getPropertyCodePoints("Lu").contains('x')); + assertTrue(UnicodeData.getPropertyCodePoints("Ll").contains('x')); + assertFalse(UnicodeData.getPropertyCodePoints("Ll").contains('X')); + assertTrue(UnicodeData.getPropertyCodePoints("L").contains('X')); + assertTrue(UnicodeData.getPropertyCodePoints("L").contains('x')); + assertTrue(UnicodeData.getPropertyCodePoints("N").contains('0')); + assertTrue(UnicodeData.getPropertyCodePoints("Z").contains(' ')); + } + + @Test + public void testUnicodeGeneralCategoriesBMP() { + assertTrue(UnicodeData.getPropertyCodePoints("Lu").contains('\u1E3A')); + assertFalse(UnicodeData.getPropertyCodePoints("Lu").contains('\u1E3B')); + assertTrue(UnicodeData.getPropertyCodePoints("Ll").contains('\u1E3B')); + assertFalse(UnicodeData.getPropertyCodePoints("Ll").contains('\u1E3A')); + assertTrue(UnicodeData.getPropertyCodePoints("L").contains('\u1E3A')); + assertTrue(UnicodeData.getPropertyCodePoints("L").contains('\u1E3B')); + assertTrue(UnicodeData.getPropertyCodePoints("N").contains('\u1BB0')); + assertFalse(UnicodeData.getPropertyCodePoints("N").contains('\u1E3A')); + assertTrue(UnicodeData.getPropertyCodePoints("Z").contains('\u2028')); + assertFalse(UnicodeData.getPropertyCodePoints("Z").contains('\u1E3A')); + } + + @Test + public void testUnicodeGeneralCategoriesSMP() { + assertTrue(UnicodeData.getPropertyCodePoints("Lu").contains(0x1D5D4)); + assertFalse(UnicodeData.getPropertyCodePoints("Lu").contains(0x1D770)); + assertTrue(UnicodeData.getPropertyCodePoints("Ll").contains(0x1D770)); + assertFalse(UnicodeData.getPropertyCodePoints("Ll").contains(0x1D5D4)); + assertTrue(UnicodeData.getPropertyCodePoints("L").contains(0x1D5D4)); + assertTrue(UnicodeData.getPropertyCodePoints("L").contains(0x1D770)); + assertTrue(UnicodeData.getPropertyCodePoints("N").contains(0x11C50)); + assertFalse(UnicodeData.getPropertyCodePoints("N").contains(0x1D5D4)); + } + + @Test + public void testUnicodeCategoryAliases() { + assertTrue(UnicodeData.getPropertyCodePoints("Lowercase_Letter").contains('x')); + assertFalse(UnicodeData.getPropertyCodePoints("Lowercase_Letter").contains('X')); + assertTrue(UnicodeData.getPropertyCodePoints("Letter").contains('x')); + assertFalse(UnicodeData.getPropertyCodePoints("Letter").contains('0')); + assertTrue(UnicodeData.getPropertyCodePoints("Enclosing_Mark").contains(0x20E2)); + assertFalse(UnicodeData.getPropertyCodePoints("Enclosing_Mark").contains('x')); + } + + @Test + public void testUnicodeBinaryProperties() { + assertTrue(UnicodeData.getPropertyCodePoints("Emoji").contains(0x1F4A9)); + assertFalse(UnicodeData.getPropertyCodePoints("Emoji").contains('X')); + assertTrue(UnicodeData.getPropertyCodePoints("alnum").contains('9')); + assertFalse(UnicodeData.getPropertyCodePoints("alnum").contains(0x1F4A9)); + assertTrue(UnicodeData.getPropertyCodePoints("Dash").contains('-')); + assertTrue(UnicodeData.getPropertyCodePoints("Hex").contains('D')); + assertFalse(UnicodeData.getPropertyCodePoints("Hex").contains('Q')); + } + + @Test + public void testUnicodeBinaryPropertyAliases() { + assertTrue(UnicodeData.getPropertyCodePoints("Ideo").contains('\u611B')); + assertFalse(UnicodeData.getPropertyCodePoints("Ideo").contains('X')); + assertTrue(UnicodeData.getPropertyCodePoints("Soft_Dotted").contains('\u0456')); + assertFalse(UnicodeData.getPropertyCodePoints("Soft_Dotted").contains('X')); + assertTrue(UnicodeData.getPropertyCodePoints("Noncharacter_Code_Point").contains('\uFFFF')); + assertFalse(UnicodeData.getPropertyCodePoints("Noncharacter_Code_Point").contains('X')); + } + + @Test + public void testUnicodeScripts() { + assertTrue(UnicodeData.getPropertyCodePoints("Zyyy").contains('0')); + assertTrue(UnicodeData.getPropertyCodePoints("Latn").contains('X')); + assertTrue(UnicodeData.getPropertyCodePoints("Hani").contains(0x4E04)); + assertTrue(UnicodeData.getPropertyCodePoints("Cyrl").contains(0x0404)); + } + + @Test + public void testUnicodeScriptEquals() { + assertTrue(UnicodeData.getPropertyCodePoints("Script=Zyyy").contains('0')); + assertTrue(UnicodeData.getPropertyCodePoints("Script=Latn").contains('X')); + assertTrue(UnicodeData.getPropertyCodePoints("Script=Hani").contains(0x4E04)); + assertTrue(UnicodeData.getPropertyCodePoints("Script=Cyrl").contains(0x0404)); + } + + @Test + public void testUnicodeScriptAliases() { + assertTrue(UnicodeData.getPropertyCodePoints("Common").contains('0')); + assertTrue(UnicodeData.getPropertyCodePoints("Latin").contains('X')); + assertTrue(UnicodeData.getPropertyCodePoints("Han").contains(0x4E04)); + assertTrue(UnicodeData.getPropertyCodePoints("Cyrillic").contains(0x0404)); + } + + @Test + public void testUnicodeBlocks() { + assertTrue(UnicodeData.getPropertyCodePoints("InASCII").contains('0')); + assertTrue(UnicodeData.getPropertyCodePoints("InCJK").contains(0x4E04)); + assertTrue(UnicodeData.getPropertyCodePoints("InCyrillic").contains(0x0404)); + assertTrue(UnicodeData.getPropertyCodePoints("InMisc_Pictographs").contains(0x1F4A9)); + } + + @Test + public void testUnicodeBlockEquals() { + assertTrue(UnicodeData.getPropertyCodePoints("Block=ASCII").contains('0')); + assertTrue(UnicodeData.getPropertyCodePoints("Block=CJK").contains(0x4E04)); + assertTrue(UnicodeData.getPropertyCodePoints("Block=Cyrillic").contains(0x0404)); + assertTrue(UnicodeData.getPropertyCodePoints("Block=Misc_Pictographs").contains(0x1F4A9)); + } + + @Test + public void testUnicodeBlockAliases() { + assertTrue(UnicodeData.getPropertyCodePoints("InBasic_Latin").contains('0')); + assertTrue(UnicodeData.getPropertyCodePoints("InMiscellaneous_Mathematical_Symbols_B").contains(0x29BE)); + } + + @Test + public void testEnumeratedPropertyEquals() { + assertTrue( + "U+1F481 INFORMATION DESK PERSON is an emoji modifier base", + UnicodeData.getPropertyCodePoints("Grapheme_Cluster_Break=E_Base").contains(0x1F481)); + + assertFalse( + "U+1F47E ALIEN MONSTER is not an emoji modifier", + UnicodeData.getPropertyCodePoints("Grapheme_Cluster_Break=E_Base").contains(0x1F47E)); + + assertTrue( + "U+0E33 THAI CHARACTER SARA AM is a spacing mark", + UnicodeData.getPropertyCodePoints("Grapheme_Cluster_Break=E_Base").contains(0x1F481)); + + assertFalse( + "U+1038 MYANMAR SIGN VISARGA is not a spacing mark", + UnicodeData.getPropertyCodePoints("Grapheme_Cluster_Break=E_Base").contains(0x1038)); + + assertTrue( + "U+00A1 INVERTED EXCLAMATION MARK has ambiguous East Asian Width", + UnicodeData.getPropertyCodePoints("East_Asian_Width=Ambiguous").contains(0x00A1)); + + assertFalse( + "U+00A2 CENT SIGN does not have ambiguous East Asian Width", + UnicodeData.getPropertyCodePoints("East_Asian_Width=Ambiguous").contains(0x00A2)); + + } + + @Test + public void extendedPictographic() { + assertTrue( + "U+1F588 BLACK PUSHPIN is in Extended Pictographic", + UnicodeData.getPropertyCodePoints("Extended_Pictographic").contains(0x1F588)); + assertFalse( + "0 is not in Extended Pictographic", + UnicodeData.getPropertyCodePoints("Extended_Pictographic").contains('0')); + } + + @Test + public void emojiPresentation() { + assertTrue( + "U+1F4A9 PILE OF POO is in EmojiPresentation=EmojiDefault", + UnicodeData.getPropertyCodePoints("EmojiPresentation=EmojiDefault").contains(0x1F4A9)); + assertFalse( + "0 is not in EmojiPresentation=EmojiDefault", + UnicodeData.getPropertyCodePoints("EmojiPresentation=EmojiDefault").contains('0')); + assertFalse( + "A is not in EmojiPresentation=EmojiDefault", + UnicodeData.getPropertyCodePoints("EmojiPresentation=EmojiDefault").contains('A')); + assertFalse( + "U+1F4A9 PILE OF POO is not in EmojiPresentation=TextDefault", + UnicodeData.getPropertyCodePoints("EmojiPresentation=TextDefault").contains(0x1F4A9)); + assertTrue( + "0 is in EmojiPresentation=TextDefault", + UnicodeData.getPropertyCodePoints("EmojiPresentation=TextDefault").contains('0')); + assertFalse( + "A is not in EmojiPresentation=TextDefault", + UnicodeData.getPropertyCodePoints("EmojiPresentation=TextDefault").contains('A')); + } + + @Test + public void testPropertyCaseInsensitivity() { + assertTrue(UnicodeData.getPropertyCodePoints("l").contains('x')); + assertFalse(UnicodeData.getPropertyCodePoints("l").contains('0')); + assertTrue(UnicodeData.getPropertyCodePoints("common").contains('0')); + assertTrue(UnicodeData.getPropertyCodePoints("Alnum").contains('0')); + } + + @Test + public void testPropertyDashSameAsUnderscore() { + assertTrue(UnicodeData.getPropertyCodePoints("InLatin-1").contains('\u00F0')); + } + + @Test + public void modifyingUnicodeDataShouldThrow() { + thrown.expect(IllegalStateException.class); + thrown.expectMessage("can't alter readonly IntervalSet"); + UnicodeData.getPropertyCodePoints("L").add(0x12345); + } +} diff --git a/tool-testsuite/test/org/antlr/v4/test/tool/TestUnicodeEscapes.java b/tool-testsuite/test/org/antlr/v4/test/tool/TestUnicodeEscapes.java new file mode 100644 index 0000000..2792462 --- /dev/null +++ b/tool-testsuite/test/org/antlr/v4/test/tool/TestUnicodeEscapes.java @@ -0,0 +1,78 @@ +/* + * Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +package org.antlr.v4.test.tool; + +import org.antlr.v4.codegen.UnicodeEscapes; + +import org.junit.Test; + +import static org.junit.Assert.assertEquals; + +public class TestUnicodeEscapes { + @Test + public void latinJavaEscape() { + StringBuilder sb = new StringBuilder(); + UnicodeEscapes.appendJavaStyleEscapedCodePoint(0x0061, sb); + assertEquals("\\u0061", sb.toString()); + } + + @Test + public void latinPythonEscape() { + StringBuilder sb = new StringBuilder(); + UnicodeEscapes.appendPythonStyleEscapedCodePoint(0x0061, sb); + assertEquals("\\u0061", sb.toString()); + } + + @Test + public void latinSwiftEscape() { + StringBuilder sb = new StringBuilder(); + UnicodeEscapes.appendSwiftStyleEscapedCodePoint(0x0061, sb); + assertEquals("\\u{0061}", sb.toString()); + } + + @Test + public void bmpJavaEscape() { + StringBuilder sb = new StringBuilder(); + UnicodeEscapes.appendJavaStyleEscapedCodePoint(0xABCD, sb); + assertEquals("\\uABCD", sb.toString()); + } + + @Test + public void bmpPythonEscape() { + StringBuilder sb = new StringBuilder(); + UnicodeEscapes.appendPythonStyleEscapedCodePoint(0xABCD, sb); + assertEquals("\\uABCD", sb.toString()); + } + + @Test + public void bmpSwiftEscape() { + StringBuilder sb = new StringBuilder(); + UnicodeEscapes.appendSwiftStyleEscapedCodePoint(0xABCD, sb); + assertEquals("\\u{ABCD}", sb.toString()); + } + + @Test + public void smpJavaEscape() { + StringBuilder sb = new StringBuilder(); + UnicodeEscapes.appendJavaStyleEscapedCodePoint(0x1F4A9, sb); + assertEquals("\\uD83D\\uDCA9", sb.toString()); + } + + @Test + public void smpPythonEscape() { + StringBuilder sb = new StringBuilder(); + UnicodeEscapes.appendPythonStyleEscapedCodePoint(0x1F4A9, sb); + assertEquals("\\U0001F4A9", sb.toString()); + } + + @Test + public void smpSwiftEscape() { + StringBuilder sb = new StringBuilder(); + UnicodeEscapes.appendSwiftStyleEscapedCodePoint(0x1F4A9, sb); + assertEquals("\\u{1F4A9}", sb.toString()); + } +} diff --git a/tool-testsuite/test/org/antlr/v4/test/tool/TestUnicodeGrammar.java b/tool-testsuite/test/org/antlr/v4/test/tool/TestUnicodeGrammar.java new file mode 100644 index 0000000..fb19531 --- /dev/null +++ b/tool-testsuite/test/org/antlr/v4/test/tool/TestUnicodeGrammar.java @@ -0,0 +1,170 @@ +/* + * Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +package org.antlr.v4.test.tool; + +import org.antlr.v4.gui.Trees; +import org.antlr.v4.runtime.ANTLRInputStream; +import org.antlr.v4.runtime.CharStream; +import org.antlr.v4.runtime.CharStreams; +import org.antlr.v4.runtime.CommonTokenStream; +import org.antlr.v4.runtime.LexerInterpreter; +import org.antlr.v4.runtime.tree.ParseTree; +import org.antlr.v4.tool.Grammar; +import org.antlr.v4.tool.GrammarParserInterpreter; +import org.junit.Test; + +import java.io.ByteArrayInputStream; +import java.io.InputStreamReader; +import java.nio.charset.StandardCharsets; + +import static org.junit.Assert.assertEquals; + +public class TestUnicodeGrammar extends BaseJavaToolTest { + @Test + public void unicodeBMPLiteralInGrammar() throws Exception { + String grammarText = + "grammar Unicode;\n" + + "r : 'hello' WORLD;\n" + + "WORLD : ('world' | '\\u4E16\\u754C' | '\\u1000\\u1019\\u1039\\u1018\\u102C' );\n" + + "WS : [ \\t\\r\\n]+ -> skip;\n"; + String inputText = "hello \u4E16\u754C"; + assertEquals( + "(r:1 " + inputText + ")", + parseTreeForGrammarWithInput( + grammarText, + "r", + inputText)); + } + + // TODO: This test cannot pass unless we change either the grammar + // parser to decode surrogate pair literals to code points (which + // would break existing clients) or to treat them as an + // alternative: + // + // '\\uD83C\\uDF0D' -> ('\\u{1F30E}' | '\\uD83C\\uDF0D') + // + // but I worry that might cause parse ambiguity if we're not careful. + //@Test + public void unicodeSurrogatePairLiteralInGrammar() throws Exception { + String grammarText = + "grammar Unicode;\n" + + "r : 'hello' WORLD;\n" + + "WORLD : ('\\uD83C\\uDF0D' | '\\uD83C\\uDF0E' | '\\uD83C\\uDF0F' );\n" + + "WS : [ \\t\\r\\n]+ -> skip;\n"; + String inputText = new StringBuilder("hello ") + .appendCodePoint(0x1F30E) + .toString(); + assertEquals( + "(r:1 " + inputText + ")", + parseTreeForGrammarWithInput( + grammarText, + "r", + inputText)); + } + + @Test + public void unicodeSMPLiteralInGrammar() throws Exception { + String grammarText = + "grammar Unicode;\n" + + "r : 'hello' WORLD;\n" + + "WORLD : ('\\u{1F30D}' | '\\u{1F30E}' | '\\u{1F30F}' );\n" + + "WS : [ \\t\\r\\n]+ -> skip;\n"; + String inputText = new StringBuilder("hello ") + .appendCodePoint(0x1F30E) + .toString(); + assertEquals( + "(r:1 " + inputText + ")", + parseTreeForGrammarWithInput( + grammarText, + "r", + inputText)); + } + + @Test + public void unicodeSMPRangeInGrammar() throws Exception { + String grammarText = + "grammar Unicode;\n" + + "r : 'hello' WORLD;\n" + + "WORLD : ('\\u{1F30D}'..'\\u{1F30F}' );\n" + + "WS : [ \\t\\r\\n]+ -> skip;\n"; + String inputText = new StringBuilder("hello ") + .appendCodePoint(0x1F30E) + .toString(); + assertEquals( + "(r:1 " + inputText + ")", + parseTreeForGrammarWithInput( + grammarText, + "r", + inputText)); + } + + @Test + public void matchingDanglingSurrogateInInput() throws Exception { + String grammarText = + "grammar Unicode;\n" + + "r : 'hello' WORLD;\n" + + "WORLD : ('\\uD83C' | '\\uD83D' | '\\uD83E' );\n" + + "WS : [ \\t\\r\\n]+ -> skip;\n"; + String inputText = "hello \uD83C"; + assertEquals( + "(r:1 " + inputText + ")", + parseTreeForGrammarWithInput( + grammarText, + "r", + inputText)); + } + + @Test + public void binaryGrammar() throws Exception { + String grammarText = + "grammar Binary;\n" + + "r : HEADER PACKET+ FOOTER;\n" + + "HEADER : '\\u0002\\u0000\\u0001\\u0007';\n" + + "PACKET : '\\u00D0' ('\\u00D1' | '\\u00D2' | '\\u00D3') +;\n" + + "FOOTER : '\\u00FF';\n"; + byte[] toParse = new byte[] { + (byte)0x02, (byte)0x00, (byte)0x01, (byte)0x07, + (byte)0xD0, (byte)0xD2, (byte)0xD2, (byte)0xD3, (byte)0xD3, (byte)0xD3, + (byte)0xD0, (byte)0xD3, (byte)0xD3, (byte)0xD1, + (byte)0xFF + }; + CharStream charStream; + try (ByteArrayInputStream is = new ByteArrayInputStream(toParse); + // Note we use ISO_8859_1 to treat all byte values as Unicode "characters" from + // U+0000 to U+00FF. + InputStreamReader isr = new InputStreamReader(is, StandardCharsets.ISO_8859_1)) { + charStream = new ANTLRInputStream(isr); + } + Grammar grammar = new Grammar(grammarText); + LexerInterpreter lexEngine = grammar.createLexerInterpreter(charStream); + CommonTokenStream tokens = new CommonTokenStream(lexEngine); + GrammarParserInterpreter parser = grammar.createGrammarParserInterpreter(tokens); + ParseTree parseTree = parser.parse(grammar.rules.get("r").index); + InterpreterTreeTextProvider nodeTextProvider = + new InterpreterTreeTextProvider(grammar.getRuleNames()); + String result = Trees.toStringTree(parseTree, nodeTextProvider); + + assertEquals( + "(r:1 \u0002\u0000\u0001\u0007 \u00D0\u00D2\u00D2\u00D3\u00D3\u00D3 \u00D0\u00D3\u00D3\u00D1 \u00FF)", + result); + } + + private static String parseTreeForGrammarWithInput( + String grammarText, + String rootRule, + String inputText) throws Exception { + Grammar grammar = new Grammar(grammarText); + LexerInterpreter lexEngine = grammar.createLexerInterpreter( + CharStreams.fromString(inputText)); + CommonTokenStream tokens = new CommonTokenStream(lexEngine); + GrammarParserInterpreter parser = grammar.createGrammarParserInterpreter(tokens); + ParseTree parseTree = parser.parse(grammar.rules.get(rootRule).index); + InterpreterTreeTextProvider nodeTextProvider = + new InterpreterTreeTextProvider(grammar.getRuleNames()); + return Trees.toStringTree(parseTree, nodeTextProvider); + } +} diff --git a/tool-testsuite/test/org/antlr/v4/test/tool/TestVocabulary.java b/tool-testsuite/test/org/antlr/v4/test/tool/TestVocabulary.java index b956494..6a880c7 100644 --- a/tool-testsuite/test/org/antlr/v4/test/tool/TestVocabulary.java +++ b/tool-testsuite/test/org/antlr/v4/test/tool/TestVocabulary.java @@ -1,5 +1,5 @@ /* - * Copyright (c) 2012-2016 The ANTLR Project. All rights reserved. + * Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. * Use of this file is governed by the BSD 3-clause license that * can be found in the LICENSE.txt file in the project root. */ diff --git a/tool-testsuite/test/org/antlr/v4/test/tool/TestXPath.java b/tool-testsuite/test/org/antlr/v4/test/tool/TestXPath.java index 7dbd741..d3b0b0a 100644 --- a/tool-testsuite/test/org/antlr/v4/test/tool/TestXPath.java +++ b/tool-testsuite/test/org/antlr/v4/test/tool/TestXPath.java @@ -1,5 +1,5 @@ /* - * Copyright (c) 2012-2016 The ANTLR Project. All rights reserved. + * Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. * Use of this file is governed by the BSD 3-clause license that * can be found in the LICENSE.txt file in the project root. */ |