summaryrefslogtreecommitdiff
path: root/tool/src/org/antlr/v4/semantics/SymbolChecks.java
diff options
context:
space:
mode:
authorAndrius Merkys <andrius.merkys@gmail.com>2018-11-23 04:52:08 -0500
committerAndrius Merkys <andrius.merkys@gmail.com>2018-11-23 04:52:08 -0500
commit7b19e9be5be41c69c451b63c526bee059881f9b1 (patch)
tree699bf0523df6868d15843981ea9914ac096ee270 /tool/src/org/antlr/v4/semantics/SymbolChecks.java
parent1d0464db4ec5e5c20b2ae62bb3c4eceaa6840bde (diff)
New upstream version 4.7.1
Diffstat (limited to 'tool/src/org/antlr/v4/semantics/SymbolChecks.java')
-rw-r--r--tool/src/org/antlr/v4/semantics/SymbolChecks.java326
1 files changed, 245 insertions, 81 deletions
diff --git a/tool/src/org/antlr/v4/semantics/SymbolChecks.java b/tool/src/org/antlr/v4/semantics/SymbolChecks.java
index 5eccef5..abbf22d 100644
--- a/tool/src/org/antlr/v4/semantics/SymbolChecks.java
+++ b/tool/src/org/antlr/v4/semantics/SymbolChecks.java
@@ -1,12 +1,15 @@
/*
- * Copyright (c) 2012-2016 The ANTLR Project. All rights reserved.
+ * Copyright (c) 2012-2017 The ANTLR Project. All rights reserved.
* Use of this file is governed by the BSD 3-clause license that
* can be found in the LICENSE.txt file in the project root.
*/
package org.antlr.v4.semantics;
+import org.antlr.runtime.tree.CommonTree;
+import org.antlr.runtime.tree.Tree;
import org.antlr.v4.automata.LexerATNFactory;
+import org.antlr.v4.parse.ANTLRLexer;
import org.antlr.v4.parse.ANTLRParser;
import org.antlr.v4.runtime.Token;
import org.antlr.v4.tool.Alternative;
@@ -16,11 +19,15 @@ import org.antlr.v4.tool.ErrorManager;
import org.antlr.v4.tool.ErrorType;
import org.antlr.v4.tool.Grammar;
import org.antlr.v4.tool.LabelElementPair;
+import org.antlr.v4.tool.LabelType;
+import org.antlr.v4.tool.LeftRecursiveRule;
import org.antlr.v4.tool.LexerGrammar;
import org.antlr.v4.tool.Rule;
+import org.antlr.v4.tool.ast.AltAST;
import org.antlr.v4.tool.ast.GrammarAST;
-import org.antlr.v4.tool.LabelType;
+import org.antlr.v4.tool.ast.TerminalAST;
+import java.util.ArrayList;
import java.util.Collection;
import java.util.HashMap;
import java.util.HashSet;
@@ -35,42 +42,31 @@ import java.util.Set;
* Side-effect: strip away redef'd rules.
*/
public class SymbolChecks {
- Grammar g;
- SymbolCollector collector;
- Map<String, Rule> nameToRuleMap = new HashMap<String, Rule>();
+ Grammar g;
+ SymbolCollector collector;
+ Map<String, Rule> nameToRuleMap = new HashMap<String, Rule>();
Set<String> tokenIDs = new HashSet<String>();
- Map<String, Set<String>> actionScopeToActionNames = new HashMap<String, Set<String>>();
-// DoubleKeyMap<String, String, GrammarAST> namedActions =
-// new DoubleKeyMap<String, String, GrammarAST>();
+ Map<String, Set<String>> actionScopeToActionNames = new HashMap<String, Set<String>>();
public ErrorManager errMgr;
protected final Set<String> reservedNames = new HashSet<String>();
+
{
reservedNames.addAll(LexerATNFactory.getCommonConstants());
}
- public SymbolChecks(Grammar g, SymbolCollector collector) {
- this.g = g;
- this.collector = collector;
+ public SymbolChecks(Grammar g, SymbolCollector collector) {
+ this.g = g;
+ this.collector = collector;
this.errMgr = g.tool.errMgr;
- for (GrammarAST tokenId : collector.tokenIDRefs) {
- tokenIDs.add(tokenId.getText());
- }
- /*
- System.out.println("rules="+collector.rules);
- System.out.println("rulerefs="+collector.rulerefs);
- System.out.println("tokenIDRefs="+collector.tokenIDRefs);
- System.out.println("terminals="+collector.terminals);
- System.out.println("strings="+collector.strings);
- System.out.println("tokensDef="+collector.tokensDefs);
- System.out.println("actions="+collector.actions);
- System.out.println("scopes="+collector.scopes);
- */
- }
-
- public void process() {
+ for (GrammarAST tokenId : collector.tokenIDRefs) {
+ tokenIDs.add(tokenId.getText());
+ }
+ }
+
+ public void process() {
// methods affect fields, but no side-effects outside this object
// So, call order sensitive
// First collect all rules for later use in checkForLabelConflict()
@@ -79,7 +75,6 @@ public class SymbolChecks {
}
checkReservedNames(g.rules.values());
checkActionRedefinitions(collector.namedActions);
- checkForTokenConflicts(collector.tokenIDRefs); // sets tokenIDs
checkForLabelConflicts(g.rules.values());
}
@@ -92,7 +87,8 @@ public class SymbolChecks {
nameNode = (GrammarAST) ampersandAST.getChild(0);
if (ampersandAST.getChildCount() == 2) {
name = nameNode.getText();
- } else {
+ }
+ else {
scope = nameNode.getText();
name = ampersandAST.getChild(1).getText();
}
@@ -103,72 +99,126 @@ public class SymbolChecks {
}
if (!scopeActions.contains(name)) {
scopeActions.add(name);
- } else {
+ }
+ else {
errMgr.grammarError(ErrorType.ACTION_REDEFINITION,
g.fileName, nameNode.token, name);
}
}
}
- public void checkForTokenConflicts(List<GrammarAST> tokenIDRefs) {
-// for (GrammarAST a : tokenIDRefs) {
-// Token t = a.token;
-// String ID = t.getText();
-// tokenIDs.add(ID);
-// }
- }
-
- /** Make sure a label doesn't conflict with another symbol.
- * Labels must not conflict with: rules, tokens, scope names,
- * return values, parameters, and rule-scope dynamic attributes
- * defined in surrounding rule. Also they must have same type
- * for repeated defs.
- */
- public void checkForLabelConflicts(Collection<Rule> rules) {
+ /**
+ * Make sure a label doesn't conflict with another symbol.
+ * Labels must not conflict with: rules, tokens, scope names,
+ * return values, parameters, and rule-scope dynamic attributes
+ * defined in surrounding rule. Also they must have same type
+ * for repeated defs.
+ */
+ public void checkForLabelConflicts(Collection<Rule> rules) {
for (Rule r : rules) {
checkForAttributeConflicts(r);
- Map<String, LabelElementPair> labelNameSpace =
- new HashMap<String, LabelElementPair>();
- for (int i = 1; i <= r.numberOfAlts; i++) {
- if (r.hasAltSpecificContexts()) {
- labelNameSpace.clear();
- }
+ Map<String, LabelElementPair> labelNameSpace = new HashMap<>();
+ for (int i = 1; i <= r.numberOfAlts; i++) {
Alternative a = r.alt[i];
for (List<LabelElementPair> pairs : a.labelDefs.values()) {
- for (LabelElementPair p : pairs) {
- checkForLabelConflict(r, p.label);
- String name = p.label.getText();
- LabelElementPair prev = labelNameSpace.get(name);
- if (prev == null) labelNameSpace.put(name, p);
- else checkForTypeMismatch(prev, p);
+ if (r.hasAltSpecificContexts()) {
+ // Collect labelName-labeledRules map for rule with alternative labels.
+ Map<String, List<LabelElementPair>> labelPairs = new HashMap<>();
+ for (LabelElementPair p : pairs) {
+ String labelName = findAltLabelName(p.label);
+ if (labelName != null) {
+ List<LabelElementPair> list;
+ if (labelPairs.containsKey(labelName)) {
+ list = labelPairs.get(labelName);
+ }
+ else {
+ list = new ArrayList<>();
+ labelPairs.put(labelName, list);
+ }
+ list.add(p);
+ }
+ }
+
+ for (List<LabelElementPair> internalPairs : labelPairs.values()) {
+ labelNameSpace.clear();
+ checkLabelPairs(r, labelNameSpace, internalPairs);
+ }
+ }
+ else {
+ checkLabelPairs(r, labelNameSpace, pairs);
}
}
}
}
}
- void checkForTypeMismatch(LabelElementPair prevLabelPair, LabelElementPair labelPair) {
+ private void checkLabelPairs(Rule r, Map<String, LabelElementPair> labelNameSpace, List<LabelElementPair> pairs) {
+ for (LabelElementPair p : pairs) {
+ checkForLabelConflict(r, p.label);
+ String name = p.label.getText();
+ LabelElementPair prev = labelNameSpace.get(name);
+ if (prev == null) {
+ labelNameSpace.put(name, p);
+ }
+ else {
+ checkForTypeMismatch(r, prev, p);
+ }
+ }
+ }
+
+ private String findAltLabelName(CommonTree label) {
+ if (label == null) {
+ return null;
+ }
+ else if (label instanceof AltAST) {
+ AltAST altAST = (AltAST) label;
+ if (altAST.altLabel != null) {
+ return altAST.altLabel.toString();
+ }
+ else if (altAST.leftRecursiveAltInfo != null) {
+ return altAST.leftRecursiveAltInfo.altLabel.toString();
+ }
+ else {
+ return findAltLabelName(label.parent);
+ }
+ }
+ else {
+ return findAltLabelName(label.parent);
+ }
+ }
+
+ private void checkForTypeMismatch(Rule r, LabelElementPair prevLabelPair, LabelElementPair labelPair) {
// label already defined; if same type, no problem
if (prevLabelPair.type != labelPair.type) {
- String typeMismatchExpr = labelPair.type + "!=" + prevLabelPair.type;
+ // Current behavior: take a token of rule declaration in case of left-recursive rule
+ // Desired behavior: take a token of proper label declaration in case of left-recursive rule
+ // See https://github.com/antlr/antlr4/pull/1585
+ // Such behavior is referring to the fact that the warning is typically reported on the actual label redefinition,
+ // but for left-recursive rules the warning is reported on the enclosing rule.
+ org.antlr.runtime.Token token = r instanceof LeftRecursiveRule
+ ? ((GrammarAST) r.ast.getChild(0)).getToken()
+ : labelPair.label.token;
errMgr.grammarError(
ErrorType.LABEL_TYPE_CONFLICT,
g.fileName,
- labelPair.label.token,
+ token,
labelPair.label.getText(),
- typeMismatchExpr);
+ labelPair.type + "!=" + prevLabelPair.type);
}
if (!prevLabelPair.element.getText().equals(labelPair.element.getText()) &&
(prevLabelPair.type.equals(LabelType.RULE_LABEL) || prevLabelPair.type.equals(LabelType.RULE_LIST_LABEL)) &&
(labelPair.type.equals(LabelType.RULE_LABEL) || labelPair.type.equals(LabelType.RULE_LIST_LABEL))) {
+ org.antlr.runtime.Token token = r instanceof LeftRecursiveRule
+ ? ((GrammarAST) r.ast.getChild(0)).getToken()
+ : labelPair.label.token;
String prevLabelOp = prevLabelPair.type.equals(LabelType.RULE_LIST_LABEL) ? "+=" : "=";
String labelOp = labelPair.type.equals(LabelType.RULE_LIST_LABEL) ? "+=" : "=";
errMgr.grammarError(
ErrorType.LABEL_TYPE_CONFLICT,
g.fileName,
- labelPair.label.token,
+ token,
labelPair.label.getText() + labelOp + labelPair.element.getText(),
prevLabelPair.label.getText() + prevLabelOp + prevLabelPair.element.getText());
}
@@ -225,11 +275,11 @@ public class SymbolChecks {
for (Attribute attribute : attributes.attributes.values()) {
if (ruleNames.contains(attribute.name)) {
errMgr.grammarError(
- errorType,
- g.fileName,
- attribute.token != null ? attribute.token : ((GrammarAST)r.ast.getChild(0)).token,
- attribute.name,
- r.name);
+ errorType,
+ g.fileName,
+ attribute.token != null ? attribute.token : ((GrammarAST) r.ast.getChild(0)).token,
+ attribute.name,
+ r.name);
}
}
}
@@ -242,11 +292,11 @@ public class SymbolChecks {
Set<String> conflictingKeys = attributes.intersection(referenceAttributes);
for (String key : conflictingKeys) {
errMgr.grammarError(
- errorType,
- g.fileName,
- attributes.get(key).token != null ? attributes.get(key).token : ((GrammarAST) r.ast.getChild(0)).token,
- key,
- r.name);
+ errorType,
+ g.fileName,
+ attributes.get(key).token != null ? attributes.get(key).token : ((GrammarAST)r.ast.getChild(0)).token,
+ key,
+ r.name);
}
}
@@ -275,8 +325,122 @@ public class SymbolChecks {
}
}
- // CAN ONLY CALL THE TWO NEXT METHODS AFTER GRAMMAR HAS RULE DEFS (see semanticpipeline)
+ /**
+ * Algorithm steps:
+ * 1. Collect all simple string literals (i.e. 'asdf', 'as' 'df', but not [a-z]+, 'a'..'z')
+ * for all lexer rules in each mode except of autogenerated tokens ({@link #getSingleTokenValues(Rule) getSingleTokenValues})
+ * 2. Compare every string literal with each other ({@link #checkForOverlap(Grammar, Rule, Rule, List<String>, List<String>) checkForOverlap})
+ * and throw TOKEN_UNREACHABLE warning if the same string found.
+ * Complexity: O(m * n^2 / 2), approximately equals to O(n^2)
+ * where m - number of modes, n - average number of lexer rules per mode.
+ * See also testUnreachableTokens unit test for details.
+ */
+ public void checkForUnreachableTokens(Grammar g) {
+ if (g.isLexer()) {
+ LexerGrammar lexerGrammar = (LexerGrammar)g;
+ for (List<Rule> rules : lexerGrammar.modes.values()) {
+ // Collect string literal lexer rules for each mode
+ List<Rule> stringLiteralRules = new ArrayList<>();
+ List<List<String>> stringLiteralValues = new ArrayList<>();
+ for (int i = 0; i < rules.size(); i++) {
+ Rule rule = rules.get(i);
+
+ List<String> ruleStringAlts = getSingleTokenValues(rule);
+ if (ruleStringAlts != null && ruleStringAlts.size() > 0) {
+ stringLiteralRules.add(rule);
+ stringLiteralValues.add(ruleStringAlts);
+ }
+ }
+
+ // Check string sets intersection
+ for (int i = 0; i < stringLiteralRules.size(); i++) {
+ List<String> firstTokenStringValues = stringLiteralValues.get(i);
+ Rule rule1 = stringLiteralRules.get(i);
+ checkForOverlap(g, rule1, rule1, firstTokenStringValues, stringLiteralValues.get(i));
+
+ // Check fragment rules only with themself
+ if (!rule1.isFragment()) {
+ for (int j = i + 1; j < stringLiteralRules.size(); j++) {
+ Rule rule2 = stringLiteralRules.get(j);
+ if (!rule2.isFragment()) {
+ checkForOverlap(g, rule1, stringLiteralRules.get(j), firstTokenStringValues, stringLiteralValues.get(j));
+ }
+ }
+ }
+ }
+ }
+ }
+ }
+
+ /**
+ * {@return} list of simple string literals for rule {@param rule}
+ */
+ private List<String> getSingleTokenValues(Rule rule)
+ {
+ List<String> values = new ArrayList<>();
+ for (Alternative alt : rule.alt) {
+ if (alt != null) {
+ // select first alt if token has a command
+ Tree rootNode = alt.ast.getChildCount() == 2 &&
+ alt.ast.getChild(0) instanceof AltAST && alt.ast.getChild(1) instanceof GrammarAST
+ ? alt.ast.getChild(0)
+ : alt.ast;
+
+ if (rootNode.getTokenStartIndex() == -1) {
+ continue; // ignore autogenerated tokens from combined grammars that start with T__
+ }
+ // Ignore alt if contains not only string literals (repetition, optional)
+ boolean ignore = false;
+ StringBuilder currentValue = new StringBuilder();
+ for (int i = 0; i < rootNode.getChildCount(); i++) {
+ Tree child = rootNode.getChild(i);
+ if (!(child instanceof TerminalAST)) {
+ ignore = true;
+ break;
+ }
+
+ TerminalAST terminalAST = (TerminalAST)child;
+ if (terminalAST.token.getType() != ANTLRLexer.STRING_LITERAL) {
+ ignore = true;
+ break;
+ }
+ else {
+ String text = terminalAST.token.getText();
+ currentValue.append(text.substring(1, text.length() - 1));
+ }
+ }
+
+ if (!ignore) {
+ values.add(currentValue.toString());
+ }
+ }
+ }
+ return values;
+ }
+
+ /**
+ * For same rule compare values from next index:
+ * TOKEN_WITH_SAME_VALUES: 'asdf' | 'asdf';
+ * For different rules compare from start value:
+ * TOKEN1: 'asdf';
+ * TOKEN2: 'asdf';
+ */
+ private void checkForOverlap(Grammar g, Rule rule1, Rule rule2, List<String> firstTokenStringValues, List<String> secondTokenStringValues) {
+ for (int i = 0; i < firstTokenStringValues.size(); i++) {
+ int secondTokenInd = rule1 == rule2 ? i + 1 : 0;
+ String str1 = firstTokenStringValues.get(i);
+ for (int j = secondTokenInd; j < secondTokenStringValues.size(); j++) {
+ String str2 = secondTokenStringValues.get(j);
+ if (str1.equals(str2)) {
+ errMgr.grammarError(ErrorType.TOKEN_UNREACHABLE, g.fileName,
+ ((GrammarAST) rule2.ast.getChild(0)).token, rule2.name, str2, rule1.name);
+ }
+ }
+ }
+ }
+
+ // CAN ONLY CALL THE TWO NEXT METHODS AFTER GRAMMAR HAS RULE DEFS (see semanticpipeline)
public void checkRuleArgs(Grammar g, List<GrammarAST> rulerefs) {
if ( rulerefs==null ) return;
for (GrammarAST ref : rulerefs) {
@@ -285,12 +449,12 @@ public class SymbolChecks {
GrammarAST arg = (GrammarAST)ref.getFirstChildWithType(ANTLRParser.ARG_ACTION);
if ( arg!=null && (r==null || r.args==null) ) {
errMgr.grammarError(ErrorType.RULE_HAS_NO_ARGS,
- g.fileName, ref.token, ruleName);
+ g.fileName, ref.token, ruleName);
}
- else if ( arg==null && (r!=null&&r.args!=null) ) {
+ else if ( arg==null && (r!=null && r.args!=null) ) {
errMgr.grammarError(ErrorType.MISSING_RULE_ARGS,
- g.fileName, ref.token, ruleName);
+ g.fileName, ref.token, ruleName);
}
}
}
@@ -299,18 +463,18 @@ public class SymbolChecks {
for (GrammarAST dot : qualifiedRuleRefs) {
GrammarAST grammar = (GrammarAST)dot.getChild(0);
GrammarAST rule = (GrammarAST)dot.getChild(1);
- g.tool.log("semantics", grammar.getText()+"."+rule.getText());
+ g.tool.log("semantics", grammar.getText()+"."+rule.getText());
Grammar delegate = g.getImportedGrammar(grammar.getText());
if ( delegate==null ) {
errMgr.grammarError(ErrorType.NO_SUCH_GRAMMAR_SCOPE,
- g.fileName, grammar.token, grammar.getText(),
- rule.getText());
+ g.fileName, grammar.token, grammar.getText(),
+ rule.getText());
}
else {
if ( g.getRule(grammar.getText(), rule.getText())==null ) {
errMgr.grammarError(ErrorType.NO_SUCH_RULE_IN_SCOPE,
- g.fileName, rule.token, grammar.getText(),
- rule.getText());
+ g.fileName, rule.token, grammar.getText(),
+ rule.getText());
}
}
}