New upstream version 4.7.1

author: Andrius Merkys <andrius.merkys@gmail.com> 2018-11-23 04:52:08 -0500
committer: Andrius Merkys <andrius.merkys@gmail.com> 2018-11-23 04:52:08 -0500
commit: 7b19e9be5be41c69c451b63c526bee059881f9b1 (patch)
tree: 699bf0523df6868d15843981ea9914ac096ee270 /tool/src/org/antlr/v4/semantics/SymbolChecks.java
parent: 1d0464db4ec5e5c20b2ae62bb3c4eceaa6840bde (diff)
1 files changed, 245 insertions, 81 deletions
diff --git a/tool/src/org/antlr/v4/semantics/SymbolChecks.java b/tool/src/org/antlr/v4/semantics/SymbolChecks.java
index 5eccef5..abbf22d 100644
--- a/tool/src/org/antlr/v4/semantics/SymbolChecks.java
+++ b/tool/src/org/antlr/v4/semantics/SymbolChecks.java
@@ -1,12 +1,15 @@
 /*
- * Copyright (c) 2012-2016 The ANTLR Project. All rights reserved.
+ * Copyright (c) 2012-2017 The ANTLR Project. All rights reserved.
  * Use of this file is governed by the BSD 3-clause license that
  * can be found in the LICENSE.txt file in the project root.
  */
 
 package org.antlr.v4.semantics;
 
+import org.antlr.runtime.tree.CommonTree;
+import org.antlr.runtime.tree.Tree;
 import org.antlr.v4.automata.LexerATNFactory;
+import org.antlr.v4.parse.ANTLRLexer;
 import org.antlr.v4.parse.ANTLRParser;
 import org.antlr.v4.runtime.Token;
 import org.antlr.v4.tool.Alternative;
@@ -16,11 +19,15 @@ import org.antlr.v4.tool.ErrorManager;
 import org.antlr.v4.tool.ErrorType;
 import org.antlr.v4.tool.Grammar;
 import org.antlr.v4.tool.LabelElementPair;
+import org.antlr.v4.tool.LabelType;
+import org.antlr.v4.tool.LeftRecursiveRule;
 import org.antlr.v4.tool.LexerGrammar;
 import org.antlr.v4.tool.Rule;
+import org.antlr.v4.tool.ast.AltAST;
 import org.antlr.v4.tool.ast.GrammarAST;
-import org.antlr.v4.tool.LabelType;
+import org.antlr.v4.tool.ast.TerminalAST;
 
+import java.util.ArrayList;
 import java.util.Collection;
 import java.util.HashMap;
 import java.util.HashSet;
@@ -35,42 +42,31 @@ import java.util.Set;
  *  Side-effect: strip away redef'd rules.
  */
 public class SymbolChecks {
-    Grammar g;
-    SymbolCollector collector;
-    Map<String, Rule> nameToRuleMap = new HashMap<String, Rule>();
+	Grammar g;
+	SymbolCollector collector;
+	Map<String, Rule> nameToRuleMap = new HashMap<String, Rule>();
 	Set<String> tokenIDs = new HashSet<String>();
-    Map<String, Set<String>> actionScopeToActionNames = new HashMap<String, Set<String>>();
-//	DoubleKeyMap<String, String, GrammarAST> namedActions =
-//		new DoubleKeyMap<String, String, GrammarAST>();
+	Map<String, Set<String>> actionScopeToActionNames = new HashMap<String, Set<String>>();
 
 	public ErrorManager errMgr;
 
 	protected final Set<String> reservedNames = new HashSet<String>();
+
 	{
 		reservedNames.addAll(LexerATNFactory.getCommonConstants());
 	}
 
-    public SymbolChecks(Grammar g, SymbolCollector collector) {
-        this.g = g;
-        this.collector = collector;
+	public SymbolChecks(Grammar g, SymbolCollector collector) {
+		this.g = g;
+		this.collector = collector;
 		this.errMgr = g.tool.errMgr;
 
-        for (GrammarAST tokenId : collector.tokenIDRefs) {
-            tokenIDs.add(tokenId.getText());
-        }
-        /*
-        System.out.println("rules="+collector.rules);
-        System.out.println("rulerefs="+collector.rulerefs);
-        System.out.println("tokenIDRefs="+collector.tokenIDRefs);
-        System.out.println("terminals="+collector.terminals);
-        System.out.println("strings="+collector.strings);
-        System.out.println("tokensDef="+collector.tokensDefs);
-        System.out.println("actions="+collector.actions);
-        System.out.println("scopes="+collector.scopes);
-         */
-    }
-
-    public void process() {
+		for (GrammarAST tokenId : collector.tokenIDRefs) {
+			tokenIDs.add(tokenId.getText());
+		}
+	}
+
+	public void process() {
 		// methods affect fields, but no side-effects outside this object
 		// So, call order sensitive
 		// First collect all rules for later use in checkForLabelConflict()
@@ -79,7 +75,6 @@ public class SymbolChecks {
 		}
 		checkReservedNames(g.rules.values());
 		checkActionRedefinitions(collector.namedActions);
-		checkForTokenConflicts(collector.tokenIDRefs);  // sets tokenIDs
 		checkForLabelConflicts(g.rules.values());
 	}
 
@@ -92,7 +87,8 @@ public class SymbolChecks {
 			nameNode = (GrammarAST) ampersandAST.getChild(0);
 			if (ampersandAST.getChildCount() == 2) {
 				name = nameNode.getText();
-			} else {
+			}
+			else {
 				scope = nameNode.getText();
 				name = ampersandAST.getChild(1).getText();
 			}
@@ -103,72 +99,126 @@ public class SymbolChecks {
 			}
 			if (!scopeActions.contains(name)) {
 				scopeActions.add(name);
-			} else {
+			}
+			else {
 				errMgr.grammarError(ErrorType.ACTION_REDEFINITION,
 						g.fileName, nameNode.token, name);
 			}
 		}
 	}
 
-	public void checkForTokenConflicts(List<GrammarAST> tokenIDRefs) {
-//        for (GrammarAST a : tokenIDRefs) {
-//            Token t = a.token;
-//            String ID = t.getText();
-//            tokenIDs.add(ID);
-//        }
-	}
-
-    /** Make sure a label doesn't conflict with another symbol.
-     *  Labels must not conflict with: rules, tokens, scope names,
-     *  return values, parameters, and rule-scope dynamic attributes
-     *  defined in surrounding rule.  Also they must have same type
-     *  for repeated defs.
-     */
-    public void checkForLabelConflicts(Collection<Rule> rules) {
+	/**
+	 * Make sure a label doesn't conflict with another symbol.
+	 * Labels must not conflict with: rules, tokens, scope names,
+	 * return values, parameters, and rule-scope dynamic attributes
+	 * defined in surrounding rule.  Also they must have same type
+	 * for repeated defs.
+	 */
+	public void checkForLabelConflicts(Collection<Rule> rules) {
 		for (Rule r : rules) {
 			checkForAttributeConflicts(r);
-			Map<String, LabelElementPair> labelNameSpace =
-					new HashMap<String, LabelElementPair>();
-			for (int i = 1; i <= r.numberOfAlts; i++) {
-				if (r.hasAltSpecificContexts()) {
-					labelNameSpace.clear();
-				}
 
+			Map<String, LabelElementPair> labelNameSpace = new HashMap<>();
+			for (int i = 1; i <= r.numberOfAlts; i++) {
 				Alternative a = r.alt[i];
 				for (List<LabelElementPair> pairs : a.labelDefs.values()) {
-					for (LabelElementPair p : pairs) {
-						checkForLabelConflict(r, p.label);
-						String name = p.label.getText();
-						LabelElementPair prev = labelNameSpace.get(name);
-						if (prev == null) labelNameSpace.put(name, p);
-						else checkForTypeMismatch(prev, p);
+					if (r.hasAltSpecificContexts()) {
+						// Collect labelName-labeledRules map for rule with alternative labels.
+						Map<String, List<LabelElementPair>> labelPairs = new HashMap<>();
+						for (LabelElementPair p : pairs) {
+							String labelName = findAltLabelName(p.label);
+							if (labelName != null) {
+								List<LabelElementPair> list;
+								if (labelPairs.containsKey(labelName)) {
+									list = labelPairs.get(labelName);
+								}
+								else {
+									list = new ArrayList<>();
+									labelPairs.put(labelName, list);
+								}
+								list.add(p);
+							}
+						}
+
+						for (List<LabelElementPair> internalPairs : labelPairs.values()) {
+							labelNameSpace.clear();
+							checkLabelPairs(r, labelNameSpace, internalPairs);
+						}
+					}
+					else {
+						checkLabelPairs(r, labelNameSpace, pairs);
 					}
 				}
 			}
 		}
 	}
 
-    void checkForTypeMismatch(LabelElementPair prevLabelPair, LabelElementPair labelPair) {
+	private void checkLabelPairs(Rule r, Map<String, LabelElementPair> labelNameSpace, List<LabelElementPair> pairs) {
+		for (LabelElementPair p : pairs) {
+			checkForLabelConflict(r, p.label);
+			String name = p.label.getText();
+			LabelElementPair prev = labelNameSpace.get(name);
+			if (prev == null) {
+				labelNameSpace.put(name, p);
+			}
+			else {
+				checkForTypeMismatch(r, prev, p);
+			}
+		}
+	}
+
+	private String findAltLabelName(CommonTree label) {
+		if (label == null) {
+			return null;
+		}
+		else if (label instanceof AltAST) {
+			AltAST altAST = (AltAST) label;
+			if (altAST.altLabel != null) {
+				return altAST.altLabel.toString();
+			}
+			else if (altAST.leftRecursiveAltInfo != null) {
+				return altAST.leftRecursiveAltInfo.altLabel.toString();
+			}
+			else {
+				return findAltLabelName(label.parent);
+			}
+		}
+		else {
+			return findAltLabelName(label.parent);
+		}
+	}
+
+	private void checkForTypeMismatch(Rule r, LabelElementPair prevLabelPair, LabelElementPair labelPair) {
 		// label already defined; if same type, no problem
 		if (prevLabelPair.type != labelPair.type) {
-			String typeMismatchExpr = labelPair.type + "!=" + prevLabelPair.type;
+			// Current behavior: take a token of rule declaration in case of left-recursive rule
+			// Desired behavior: take a token of proper label declaration in case of left-recursive rule
+			// See https://github.com/antlr/antlr4/pull/1585
+			// Such behavior is referring to the fact that the warning is typically reported on the actual label redefinition,
+			//   but for left-recursive rules the warning is reported on the enclosing rule.
+			org.antlr.runtime.Token token = r instanceof LeftRecursiveRule
+					? ((GrammarAST) r.ast.getChild(0)).getToken()
+					: labelPair.label.token;
 			errMgr.grammarError(
 					ErrorType.LABEL_TYPE_CONFLICT,
 					g.fileName,
-					labelPair.label.token,
+					token,
 					labelPair.label.getText(),
-					typeMismatchExpr);
+					labelPair.type + "!=" + prevLabelPair.type);
 		}
 		if (!prevLabelPair.element.getText().equals(labelPair.element.getText()) &&
 			(prevLabelPair.type.equals(LabelType.RULE_LABEL) || prevLabelPair.type.equals(LabelType.RULE_LIST_LABEL)) &&
 			(labelPair.type.equals(LabelType.RULE_LABEL) || labelPair.type.equals(LabelType.RULE_LIST_LABEL))) {
 
+			org.antlr.runtime.Token token = r instanceof LeftRecursiveRule
+					? ((GrammarAST) r.ast.getChild(0)).getToken()
+					: labelPair.label.token;
 			String prevLabelOp = prevLabelPair.type.equals(LabelType.RULE_LIST_LABEL) ? "+=" : "=";
 			String labelOp = labelPair.type.equals(LabelType.RULE_LIST_LABEL) ? "+=" : "=";
 			errMgr.grammarError(
 					ErrorType.LABEL_TYPE_CONFLICT,
 					g.fileName,
-					labelPair.label.token,
+					token,
 					labelPair.label.getText() + labelOp + labelPair.element.getText(),
 					prevLabelPair.label.getText() + prevLabelOp + prevLabelPair.element.getText());
 		}
@@ -225,11 +275,11 @@ public class SymbolChecks {
 		for (Attribute attribute : attributes.attributes.values()) {
 			if (ruleNames.contains(attribute.name)) {
 				errMgr.grammarError(
-					errorType,
-					g.fileName,
-					attribute.token != null ? attribute.token : ((GrammarAST)r.ast.getChild(0)).token,
-					attribute.name,
-					r.name);
+						errorType,
+						g.fileName,
+						attribute.token != null ? attribute.token : ((GrammarAST) r.ast.getChild(0)).token,
+						attribute.name,
+						r.name);
 			}
 		}
 	}
@@ -242,11 +292,11 @@ public class SymbolChecks {
 		Set<String> conflictingKeys = attributes.intersection(referenceAttributes);
 		for (String key : conflictingKeys) {
 			errMgr.grammarError(
-				errorType,
-				g.fileName,
-				attributes.get(key).token != null ? attributes.get(key).token : ((GrammarAST) r.ast.getChild(0)).token,
-				key,
-				r.name);
+					errorType,
+					g.fileName,
+					attributes.get(key).token != null ? attributes.get(key).token : ((GrammarAST)r.ast.getChild(0)).token,
+					key,
+					r.name);
 		}
 	}
 
@@ -275,8 +325,122 @@ public class SymbolChecks {
 		}
 	}
 
-	// CAN ONLY CALL THE TWO NEXT METHODS AFTER GRAMMAR HAS RULE DEFS (see semanticpipeline)
+	/**
+	 * Algorithm steps:
+	 * 1. Collect all simple string literals (i.e. 'asdf', 'as' 'df', but not [a-z]+, 'a'..'z')
+	 *    for all lexer rules in each mode except of autogenerated tokens ({@link #getSingleTokenValues(Rule) getSingleTokenValues})
+	 * 2. Compare every string literal with each other ({@link #checkForOverlap(Grammar, Rule, Rule, List<String>, List<String>) checkForOverlap})
+	 *    and throw TOKEN_UNREACHABLE warning if the same string found.
+	 * Complexity: O(m * n^2 / 2), approximately equals to O(n^2)
+	 * where m - number of modes, n - average number of lexer rules per mode.
+	 * See also testUnreachableTokens unit test for details.
+	 */
+	public void checkForUnreachableTokens(Grammar g) {
+		if (g.isLexer()) {
+			LexerGrammar lexerGrammar = (LexerGrammar)g;
+			for (List<Rule> rules : lexerGrammar.modes.values()) {
+				// Collect string literal lexer rules for each mode
+				List<Rule> stringLiteralRules = new ArrayList<>();
+				List<List<String>> stringLiteralValues = new ArrayList<>();
+				for (int i = 0; i < rules.size(); i++) {
+					Rule rule = rules.get(i);
+
+					List<String> ruleStringAlts = getSingleTokenValues(rule);
+					if (ruleStringAlts != null && ruleStringAlts.size() > 0) {
+						stringLiteralRules.add(rule);
+						stringLiteralValues.add(ruleStringAlts);
+					}
+				}
+
+				// Check string sets intersection
+				for (int i = 0; i < stringLiteralRules.size(); i++) {
+					List<String> firstTokenStringValues = stringLiteralValues.get(i);
+					Rule rule1 =  stringLiteralRules.get(i);
+					checkForOverlap(g, rule1, rule1, firstTokenStringValues, stringLiteralValues.get(i));
+
+					// Check fragment rules only with themself
+					if (!rule1.isFragment()) {
+						for (int j = i + 1; j < stringLiteralRules.size(); j++) {
+							Rule rule2 = stringLiteralRules.get(j);
+							if (!rule2.isFragment()) {
+								checkForOverlap(g, rule1, stringLiteralRules.get(j), firstTokenStringValues, stringLiteralValues.get(j));
+							}
+						}
+					}
+				}
+			}
+		}
+	}
+
+	/**
+	 * {@return} list of simple string literals for rule {@param rule}
+	 */
+	private List<String> getSingleTokenValues(Rule rule)
+	{
+		List<String> values = new ArrayList<>();
+		for (Alternative alt : rule.alt) {
+			if (alt != null) {
+				// select first alt if token has a command
+				Tree rootNode = alt.ast.getChildCount() == 2 &&
+						alt.ast.getChild(0) instanceof AltAST && alt.ast.getChild(1) instanceof GrammarAST
+						? alt.ast.getChild(0)
+						: alt.ast;
+
+				if (rootNode.getTokenStartIndex() == -1) {
+					continue; // ignore autogenerated tokens from combined grammars that start with T__
+				}
 
+				// Ignore alt if contains not only string literals (repetition, optional)
+				boolean ignore = false;
+				StringBuilder currentValue = new StringBuilder();
+				for (int i = 0; i < rootNode.getChildCount(); i++) {
+					Tree child = rootNode.getChild(i);
+					if (!(child instanceof TerminalAST)) {
+						ignore = true;
+						break;
+					}
+
+					TerminalAST terminalAST = (TerminalAST)child;
+					if (terminalAST.token.getType() != ANTLRLexer.STRING_LITERAL) {
+						ignore = true;
+						break;
+					}
+					else {
+						String text = terminalAST.token.getText();
+						currentValue.append(text.substring(1, text.length() - 1));
+					}
+				}
+
+				if (!ignore) {
+					values.add(currentValue.toString());
+				}
+			}
+		}
+		return values;
+	}
+
+	/**
+	 * For same rule compare values from next index:
+	 * TOKEN_WITH_SAME_VALUES: 'asdf' | 'asdf';
+	 * For different rules compare from start value:
+	 * TOKEN1: 'asdf';
+	 * TOKEN2: 'asdf';
+	 */
+	private void checkForOverlap(Grammar g, Rule rule1, Rule rule2, List<String> firstTokenStringValues, List<String> secondTokenStringValues) {
+		for (int i = 0; i < firstTokenStringValues.size(); i++) {
+			int secondTokenInd = rule1 == rule2 ? i + 1 : 0;
+			String str1 = firstTokenStringValues.get(i);
+			for (int j = secondTokenInd; j < secondTokenStringValues.size(); j++) {
+				String str2 = secondTokenStringValues.get(j);
+				if (str1.equals(str2)) {
+					errMgr.grammarError(ErrorType.TOKEN_UNREACHABLE, g.fileName,
+							((GrammarAST) rule2.ast.getChild(0)).token, rule2.name, str2, rule1.name);
+				}
+			}
+		}
+	}
+
+	// CAN ONLY CALL THE TWO NEXT METHODS AFTER GRAMMAR HAS RULE DEFS (see semanticpipeline)
 	public void checkRuleArgs(Grammar g, List<GrammarAST> rulerefs) {
 		if ( rulerefs==null ) return;
 		for (GrammarAST ref : rulerefs) {
@@ -285,12 +449,12 @@ public class SymbolChecks {
 			GrammarAST arg = (GrammarAST)ref.getFirstChildWithType(ANTLRParser.ARG_ACTION);
 			if ( arg!=null && (r==null || r.args==null) ) {
 				errMgr.grammarError(ErrorType.RULE_HAS_NO_ARGS,
-										  g.fileName, ref.token, ruleName);
+						g.fileName, ref.token, ruleName);
 
 			}
-			else if ( arg==null && (r!=null&&r.args!=null) ) {
+			else if ( arg==null && (r!=null && r.args!=null) ) {
 				errMgr.grammarError(ErrorType.MISSING_RULE_ARGS,
-										  g.fileName, ref.token, ruleName);
+						g.fileName, ref.token, ruleName);
 			}
 		}
 	}
@@ -299,18 +463,18 @@ public class SymbolChecks {
 		for (GrammarAST dot : qualifiedRuleRefs) {
 			GrammarAST grammar = (GrammarAST)dot.getChild(0);
 			GrammarAST rule = (GrammarAST)dot.getChild(1);
-            g.tool.log("semantics", grammar.getText()+"."+rule.getText());
+			g.tool.log("semantics", grammar.getText()+"."+rule.getText());
 			Grammar delegate = g.getImportedGrammar(grammar.getText());
 			if ( delegate==null ) {
 				errMgr.grammarError(ErrorType.NO_SUCH_GRAMMAR_SCOPE,
-										  g.fileName, grammar.token, grammar.getText(),
-										  rule.getText());
+						g.fileName, grammar.token, grammar.getText(),
+						rule.getText());
 			}
 			else {
 				if ( g.getRule(grammar.getText(), rule.getText())==null ) {
 					errMgr.grammarError(ErrorType.NO_SUCH_RULE_IN_SCOPE,
-											  g.fileName, rule.token, grammar.getText(),
-											  rule.getText());
+							g.fileName, rule.token, grammar.getText(),
+							rule.getText());
 				}
 			}
 		}
author	Andrius Merkys <andrius.merkys@gmail.com>	2018-11-23 04:52:08 -0500
committer	Andrius Merkys <andrius.merkys@gmail.com>	2018-11-23 04:52:08 -0500
commit	7b19e9be5be41c69c451b63c526bee059881f9b1 (patch)
tree	699bf0523df6868d15843981ea9914ac096ee270 /tool/src/org/antlr/v4/semantics/SymbolChecks.java
parent	1d0464db4ec5e5c20b2ae62bb3c4eceaa6840bde (diff)