Skip to content

Commit

Permalink
Merge branch '#4307' into 'dev'
Browse files Browse the repository at this point in the history
#4307

See merge request monticore/monticore!1048
  • Loading branch information
MisterErwin committed Oct 27, 2024
2 parents 422c996 + dd59980 commit 1c4b49c
Show file tree
Hide file tree
Showing 3 changed files with 170 additions and 39 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
import org.antlr.v4.runtime.*;
import org.antlr.v4.runtime.atn.ATN;
import org.antlr.v4.runtime.atn.ATNState;
import org.antlr.v4.runtime.atn.RuleTransition;
import org.antlr.v4.runtime.atn.Transition;
import org.antlr.v4.runtime.misc.Interval;
import org.antlr.v4.runtime.misc.IntervalSet;
Expand All @@ -20,20 +21,20 @@
import java.util.stream.Collectors;

public class MCErrorListener extends BaseErrorListener {

protected MCParser parser = null;

/**
* This character (NO-BREAK SPACE) separates the error message
* from the context where the error occurred.
*/
public final static char CONTEXT_SEPARATOR = '\u00A0';

public MCErrorListener(MCParser parser) {
super();
this.parser = parser;
}

@Override
public void syntaxError(Recognizer<?, ?> recognizer, Object offendingSymbol, int line, int charPositionInLine, String msg, RecognitionException e) {
// Improve error message
Expand All @@ -52,10 +53,28 @@ public void syntaxError(Recognizer<?, ?> recognizer, Object offendingSymbol, int
}
} else if (e == null && msg.startsWith("extraneous input '")
&& containsRule(((Parser) recognizer).getExpectedTokens(), recognizer.getVocabulary(), "Name")) {
// We have received an unwanted token (msg: extraneous input), but also expect a Name
// We have received an unwanted token (msg: extraneous input), but also expect a Name*
// (Keywords are excluded from the Name production - to include them, Name& (plus keywords) should be used)
// (*): The name might actually be a nokeyword production, i.e., with a semantic predicate

// Check for the rules which the ATN would change into using epsilon transitions (to find nokeywor rules)
Set<Map.Entry<Integer, String>> epsilonRules = new HashSet<>();
getExpectedRulesWithTokens(recognizer.getATN(), recognizer.getState(), recognizer.getVocabulary(), new HashMap<>(), epsilonRules);

List<String> noKeywordRules = extractNoKeywordTokens(recognizer, epsilonRules);

msg = msg.replace("extraneous input", "unexpected keyword");
msg = msg.replaceFirst("' expecting", "', expecting");

// Handle nokeyword rules, if present
if (!noKeywordRules.isEmpty()) {
msg = msg.substring(0, msg.indexOf("', expecting ") + "', expecting ".length());
// Join as [a.b.c.d] as a, b, c or d
msg += String.join(" or ",
String.join(", ", noKeywordRules.subList(0, noKeywordRules.size() - 1)),
noKeywordRules.get(noKeywordRules.size() - 1)
);
}
} else if (e instanceof FailedPredicateException
&& offendingSymbol instanceof CommonToken
&& msg.startsWith("rule nokeyword_")
Expand All @@ -72,28 +91,17 @@ && containsRule(((Parser) recognizer).getExpectedTokens(), recognizer.getVocabul
String expectedTokens = e.getExpectedTokens().toString(recognizer.getVocabulary());

// Check for the rules which the ATN would change into using epsilon transitions
Set<Map.Entry<Integer, String>> epsilonRules = getExpectedRulesWithTokens(recognizer.getATN(),
e.getOffendingState(), recognizer.getVocabulary(), new HashSet<>());

Pattern nokeywordPattern = Pattern.compile("nokeyword_(.*)_[0-9]*");
// Turn the next expected rules into a human readable format:
String noKeywordRules = epsilonRules.stream().map(r -> {
// r.key = ruleIndex, r.value=next tokens of the transition(s)
if (r.getValue().startsWith("'")) // already a terminal
return r.getValue();
// Check if the rule is a noKeyword rule (added by the MC generator)
// the expected token (r.value) is most likely a Name (but constrained by a predicate)
String rulename = recognizer.getRuleNames()[r.getKey()];
Matcher m = nokeywordPattern.matcher(rulename);
if (m.matches()) // if it is a nokeyword rule, we are able to extract the no-keyword from the rule name
return "'" + m.group(1) + "'";
// Another rule would have been possible, but the predicate did not allow it
// We just output the expected token with a hint in that case
return r.getValue() + " (with additional constraints from " + rulename + ")";
}).collect(Collectors.joining(" or "));
Set<Map.Entry<Integer, String>> epsilonRules = new HashSet<>();
getExpectedRulesWithTokens(recognizer.getATN(), e.getOffendingState(), recognizer.getVocabulary(), new HashMap<>(), epsilonRules);

List<String> noKeywordRules = extractNoKeywordTokens(recognizer, epsilonRules);

if (!noKeywordRules.isEmpty()) {
expectedTokens = noKeywordRules;
// Join as [a.b.c.d] as a, b, c or d
expectedTokens = String.join(" or ",
String.join(", ", noKeywordRules.subList(0, noKeywordRules.size() - 1)),
noKeywordRules.get(noKeywordRules.size() - 1)
);
}
msg += ", expecting " + expectedTokens;
}
Expand All @@ -120,6 +128,26 @@ && containsRule(((Parser) recognizer).getExpectedTokens(), recognizer.getVocabul
parser.setErrors(true);
}

private static List<String> extractNoKeywordTokens(Recognizer<?, ?> recognizer, Set<Map.Entry<Integer, String>> epsilonRules) {
Pattern nokeywordPattern = Pattern.compile("nokeyword_(.*)_[0-9]*");
// Turn the next expected rules into a human readable format:
List<String> noKeywordRules = epsilonRules.stream().map(r -> {
// r.key = ruleIndex, r.value=next tokens of the transition(s)
if (r.getValue().startsWith("'")) // already a terminal
return r.getValue();
// Check if the rule is a noKeyword rule (added by the MC generator)
// the expected token (r.value) is most likely a Name (but constrained by a predicate)
String rulename = recognizer.getRuleNames()[r.getKey()];
Matcher m = nokeywordPattern.matcher(rulename);
if (m.matches()) // if it is a nokeyword rule, we are able to extract the no-keyword from the rule name
return "'" + m.group(1) + "'";
// Another rule would have been possible, but the predicate did not allow it
// We just output the expected token with a hint in that case
return r.getValue() + " (with additional constraints from " + rulename + ")";
}).collect(Collectors.toList());
return noKeywordRules;
}

/**
* @param set an IntervalSet of tokens
* @param vocabulary the token vocabulary
Expand Down Expand Up @@ -164,27 +192,47 @@ protected String getOffendingLine(String entireInput, int tokenIndex) {
/**
* Similiar to {@link ATN#getExpectedTokens(int, RuleContext)},
* but we also return the rule numbers
* @return a set of ruleIndex -> expected token(s) entries
* @param expected a set of ruleIndex -> expected token(s) entries
* @return whether an empty input is accepted
*/
public Set<Map.Entry<Integer, String>> getExpectedRulesWithTokens(ATN atn, int stateNumber, Vocabulary vocabulary, Set<Integer> visitedStates) {
Set<Map.Entry<Integer, String>> expected = new HashSet<>();
if (stateNumber < 0 || stateNumber >= atn.states.size() || visitedStates.contains(stateNumber)) {
return expected;
public boolean getExpectedRulesWithTokens(ATN atn, int stateNumber, Vocabulary vocabulary, Map<Integer, Boolean> visitedStates, Set<Map.Entry<Integer, String>> expected) {
if (stateNumber < 0 || stateNumber >= atn.states.size() || visitedStates.containsKey(stateNumber)) {
return visitedStates.get(stateNumber);
}

visitedStates.add(stateNumber);
visitedStates.put(stateNumber, false);

ATNState state = atn.states.get(stateNumber);
// Stop Backtracking in case of empty?
if (state.getStateType() == ATNState.RULE_STOP) {
visitedStates.put(stateNumber, true);
return true;
}
boolean mightBeEmptyA = false;
for (Transition t : state.getTransitions()) {
if (t.isEpsilon() && t.target.stateNumber != ATNState.INVALID_STATE_NUMBER) {
// Follow the epsilon transition
expected.addAll(getExpectedRulesWithTokens(atn, t.target.stateNumber, vocabulary, visitedStates));
boolean mightBeEmpty = getExpectedRulesWithTokens(atn, t.target.stateNumber, vocabulary, visitedStates, expected);
if (mightBeEmpty) {
// The rule allows empty input between RULE_START and RULE_STOP
if (t.getSerializationType() == Transition.RULE) {
// Continue with the next transition of this rule
mightBeEmpty = getExpectedRulesWithTokens(atn, ((RuleTransition) t).followState.stateNumber, vocabulary, visitedStates, expected);
if (mightBeEmpty)
mightBeEmptyA = true;
} else {
// Pass the might-be-empty to the state calling this state
mightBeEmptyA = true;
}
}
} else {
// A non epsilon transition =>
expected.add(Map.entry(t.target.ruleIndex, atn.nextTokens(state).toString(vocabulary)));
}
}
return expected;
if (mightBeEmptyA)
visitedStates.put(stateNumber, true);
return mightBeEmptyA;
}

}
Original file line number Diff line number Diff line change
Expand Up @@ -50,4 +50,28 @@ grammar ParseErrors extends de.monticore.MCBasics {

NoSpaceTest = {noSpace(2)}? "@" Name;

// Example productions based on CDs
CDLike = "cd" CDLikeElement*;

interface CDLikeElement;

CDLikeTerm implements CDLikeElement = Modifier "class" Name "{" "}";
CDLikeKey implements CDLikeElement = key("package") Name "{" "}";
CDLikeAlt implements CDLikeElement = Modifier CDLikeAltElem Name ";";

// Various alternatives via interfaces
interface CDLikeAltElem;
CDAssocTypeAssoc implements CDLikeAltElem = "association";
CDAssocTypeComp implements CDLikeAltElem = "composition";

// Modifier may be empty
Modifier = Stereo? (["public"] | [public:"+"] | ["private"] )*;

Stereo = "<<" Name ">>";

// the 'Unrelated' MUST not be contained within CDLike Elements
SomeUnrelatedProd = Modifier "Unrelated";

P = "ax" Name | "bx" Name;

}
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
import org.junit.jupiter.api.Assertions;
import org.junit.jupiter.api.BeforeAll;
import org.junit.jupiter.api.BeforeEach;
import org.junit.jupiter.api.Disabled;
import org.junit.jupiter.api.Test;
import parseerrors.ParseErrorsMill;
import parseerrors._parser.ParseErrorsParser;
Expand Down Expand Up @@ -79,6 +80,16 @@ public void TestKeyConstant1Keyword() throws IOException {
"^", Log.getFindings().get(0).getMsg());
}

@Test
public void TestKeyConstant1KeywordInName() throws IOException {
// A keyword is used at a location, where we expect a Name, right after a key-constant
parser.parse_StringTestKeyConstant1("keyconst1 keyword");
Assertions.assertTrue(parser.hasErrors());
Assertions.assertEquals("mismatched keyword 'keyword', expecting Name (found: KEYWORD3480559081) in rule stack: [TestKeyConstant1]\u00A0\n" +
"keyconst1 keyword\n" +
" ^", Log.getFindings().get(0).getMsg());
}

@Test
public void TestKeyConstantAlt1Incorrect() throws IOException {
// An incorrect name is used at a location, where we expect a key-constant (Name with semantic predicate) (within an alt)
Expand All @@ -94,7 +105,7 @@ public void TestKeyConstantAlt1Keyword() throws IOException {
// A keyword is used at a location, where we expect a key-constant (Name with semantic predicate) (within an alt)
parser.parse_StringTestKeyConstantAlt1("keyword abc");
Assertions.assertTrue(parser.hasErrors());
Assertions.assertEquals("unexpected keyword 'keyword', expecting Name in rule stack: [TestKeyConstantAlt1]\u00A0\n" +
Assertions.assertEquals("unexpected keyword 'keyword', expecting 'keyconst1' or 'keyconst2' in rule stack: [TestKeyConstantAlt1]\u00A0\n" +
"keyword abc\n" +
"^", Log.getFindings().get(0).getMsg());
}
Expand All @@ -113,13 +124,14 @@ public void TestKeyConstantAlt2Incorrect() throws IOException {
public void TestKeyConstantAlt2Keyword() throws IOException {
// A keyword is used at a location, where we expect a key-constant (Name with semantic predicate) (within a direct alt)
parser.parse_StringTestKeyConstantAlt2("keyword abc");

// In case of single-token deletion error recovery, the correct expected no-keywords should also be reported
Assertions.assertTrue(parser.hasErrors());
Assertions.assertEquals("unexpected keyword 'keyword', expecting Name in rule stack: [TestKeyConstantAlt2]\u00A0\n" +
Assertions.assertEquals("unexpected keyword 'keyword', expecting 'keyconst1' or 'keyconst2' in rule stack: [TestKeyConstantAlt2]\u00A0\n" +
"keyword abc\n" +
"^", Log.getFindings().get(0).getMsg());
}


@Test
public void TestNoKeyWAlt1() throws IOException {
// A keyword is used at a location, where we expect a key-constant (Name with semantic predicate)
Expand All @@ -146,15 +158,15 @@ public void testSepListDot() throws IOException {
// Wrong separator used (dot instead of comma)
parser.parse_StringTestSepList("seplist a.b");
Assertions.assertTrue(parser.hasErrors());
Assertions.assertEquals("Expected EOF but found token [@2,9:9='.',<10>,1:9]", Log.getFindings().get(0).getMsg());
Assertions.assertEquals("Expected EOF but found token [@2,9:9='.',<18>,1:9]", Log.getFindings().get(0).getMsg());
}

@Test
public void testSepListNone() throws IOException {
// No separator used
parser.parse_StringTestSepList("seplist a b");
Assertions.assertTrue(parser.hasErrors());
Assertions.assertEquals("Expected EOF but found token [@2,10:10='b',<16>,1:10]", Log.getFindings().get(0).getMsg());
Assertions.assertEquals("Expected EOF but found token [@2,10:10='b',<29>,1:10]", Log.getFindings().get(0).getMsg());
}


Expand Down Expand Up @@ -183,7 +195,7 @@ public void testCompInvalidKey() throws IOException {
// The KeyConstant does not match ICompKeyInvalid (an empty string is also allowed)
parser.parse_StringComp("component MyName { \n ICompKeyInvalid \n }");
Assertions.assertTrue(parser.hasErrors());
Assertions.assertEquals("no viable alternative at input 'ICompKeyInvalid', expecting 'ICompKW' or 'ICompKey' or '}' in rule stack: [Comp]\u00A0\n" +
Assertions.assertEquals("no viable alternative at input 'ICompKeyInvalid', expecting 'ICompKW', 'ICompKey' or '}' in rule stack: [Comp]\u00A0\n" +
" ICompKeyInvalid \n" +
" ^", Log.getFindings().get(0).getMsg());
}
Expand Down Expand Up @@ -224,7 +236,7 @@ public void testUnknownAlts() throws IOException {
// An incorrect input is used in an alt-context
parser.parse_StringUnknownAlts("X");
Assertions.assertTrue(parser.hasErrors());
Assertions.assertEquals("no viable alternative at input 'X', expecting 'UnknownAltsKey' or 'UnknownAltsT' or Name (with additional constraints from unknownAlts) in rule stack: [UnknownAlts]\u00A0\n" +
Assertions.assertEquals("no viable alternative at input 'X', expecting 'UnknownAltsKey', 'UnknownAltsT' or Name (with additional constraints from unknownAlts) in rule stack: [UnknownAlts]\u00A0\n" +
"X\n" +
"^", Log.getFindings().get(0).getMsg());
}
Expand Down Expand Up @@ -259,5 +271,52 @@ public void testEmptyName() throws IOException {
"^", Log.getFindings().get(0).getMsg());
}

@Test
public void testCDClass() throws IOException {
// No input was provided
parser.parse_StringCDLike("cd cl");
Assertions.assertTrue(parser.hasErrors());
Assertions.assertEquals("no viable alternative at input 'cl', expecting 'private', 'class', 'composition', '<<', '+', 'public', 'package' or 'association' in rule stack: [CDLike]\u00A0\n" +
"cd cl\n" +
" ^", Log.getFindings().get(0).getMsg());
}

@Test
public void testCDPub() throws IOException {
// No input was provided
parser.parse_StringCDLike("cd publ");
Assertions.assertTrue(parser.hasErrors());
Assertions.assertEquals("no viable alternative at input 'publ', expecting 'private', 'class', 'composition', '<<', '+', 'public', 'package' or 'association' in rule stack: [CDLike]\u00A0\n" +
"cd publ\n" +
" ^", Log.getFindings().get(0).getMsg());
}

@Test
public void testCDPack() throws IOException {
// No input was provided
parser.parse_StringCDLike("cd package");
Assertions.assertTrue(parser.hasErrors());
Assertions.assertEquals("mismatched keyword '<EOF>', expecting Name (found: EOF) in rule stack: [CDLike, CDLikeElement, CDLikeKey]\u00A0\n" +
"cd package\n" +
" ^", Log.getFindings().get(0).getMsg());
}

@Test
public void testCDAssoc() throws IOException {
// No input was provided
parser.parse_StringCDLike("cd \n class C1{}\n xxx\n association A1;");
Assertions.assertTrue(parser.hasErrors());
Assertions.assertEquals("no viable alternative at input 'xxx', expecting 'private', 'class', 'composition', '<<', '+', 'public', 'package' or 'association' in rule stack: [CDLike]\u00A0\n" +
" xxx\n" +
" ^", Log.getFindings().get(0).getMsg());
}

@Test
public void testP() throws IOException {
parser.parse_StringP("KeyWord abc");
Assertions.assertTrue(parser.hasErrors());
Assertions.assertEquals("mismatched input 'KeyWord' expecting {'bx', 'ax'} (found: Name) in rule stack: [P]\u00A0\n" +
"KeyWord abc" + "\n" +
"^", Log.getFindings().get(0).getMsg());
}
}

0 comments on commit 1c4b49c

Please sign in to comment.