diff --git a/src/main/kotlin/net/sjrx/intellij/plugins/systemdunitfiles/semanticdata/optionvalues/grammar/AlternativeCombinator.kt b/src/main/kotlin/net/sjrx/intellij/plugins/systemdunitfiles/semanticdata/optionvalues/grammar/AlternativeCombinator.kt index cb10cec..bd17fa6 100644 --- a/src/main/kotlin/net/sjrx/intellij/plugins/systemdunitfiles/semanticdata/optionvalues/grammar/AlternativeCombinator.kt +++ b/src/main/kotlin/net/sjrx/intellij/plugins/systemdunitfiles/semanticdata/optionvalues/grammar/AlternativeCombinator.kt @@ -38,9 +38,9 @@ open class AlternativeCombinator(vararg val tokens: Combinator) : Combinator { return match(value, offset, Combinator::SemanticMatch) } - override fun parse(value: String, offset: Int): Sequence = + override fun parse(value: String, offset: Int, frontier: Frontier): Sequence = // Offer every alternative's matches, so the order of options no longer affects correctness. - tokens.asSequence().flatMap { it.parse(value, offset) } + tokens.asSequence().flatMap { it.parse(value, offset, frontier) } override fun toString(): String = toStringIndented(0) diff --git a/src/main/kotlin/net/sjrx/intellij/plugins/systemdunitfiles/semanticdata/optionvalues/grammar/Combinator.kt b/src/main/kotlin/net/sjrx/intellij/plugins/systemdunitfiles/semanticdata/optionvalues/grammar/Combinator.kt index b9140ad..b0f5411 100644 --- a/src/main/kotlin/net/sjrx/intellij/plugins/systemdunitfiles/semanticdata/optionvalues/grammar/Combinator.kt +++ b/src/main/kotlin/net/sjrx/intellij/plugins/systemdunitfiles/semanticdata/optionvalues/grammar/Combinator.kt @@ -40,8 +40,12 @@ interface Combinator { * carries a `valid` flag for the strict (semantic) check. Because every alternative is offered * rather than the first greedy one committed to, matching is complete — e.g. * Seq(ZeroOrMore("a"), "a") on "aa" matches, because ZeroOrMore offers the shorter match too. + * + * [frontier] records the deepest offset reached and what was expected there, so that even when no + * path succeeds we can localize the error (and, later, drive completion). Combinators thread the + * same instance into their children; leaf matchers report themselves to it. */ - fun parse(value: String, offset: Int): Sequence + fun parse(value: String, offset: Int, frontier: Frontier = Frontier()): Sequence fun toStringIndented(indent: Int): String } diff --git a/src/main/kotlin/net/sjrx/intellij/plugins/systemdunitfiles/semanticdata/optionvalues/grammar/EOF.kt b/src/main/kotlin/net/sjrx/intellij/plugins/systemdunitfiles/semanticdata/optionvalues/grammar/EOF.kt index aea72e8..d62c78e 100644 --- a/src/main/kotlin/net/sjrx/intellij/plugins/systemdunitfiles/semanticdata/optionvalues/grammar/EOF.kt +++ b/src/main/kotlin/net/sjrx/intellij/plugins/systemdunitfiles/semanticdata/optionvalues/grammar/EOF.kt @@ -17,8 +17,10 @@ class EOF : Combinator { } } - override fun parse(value: String, offset: Int): Sequence = - if (offset == value.length) sequenceOf(Parse(offset, emptyList())) else emptySequence() + override fun parse(value: String, offset: Int, frontier: Frontier): Sequence { + frontier.reached(offset, this) // we expect end-of-input here + return if (offset == value.length) sequenceOf(Parse(offset, emptyList())) else emptySequence() + } override fun toStringIndented(indent: Int): String { return "EOF" diff --git a/src/main/kotlin/net/sjrx/intellij/plugins/systemdunitfiles/semanticdata/optionvalues/grammar/FlexibleLiteralChoiceTerminal.kt b/src/main/kotlin/net/sjrx/intellij/plugins/systemdunitfiles/semanticdata/optionvalues/grammar/FlexibleLiteralChoiceTerminal.kt index c24012a..601189f 100644 --- a/src/main/kotlin/net/sjrx/intellij/plugins/systemdunitfiles/semanticdata/optionvalues/grammar/FlexibleLiteralChoiceTerminal.kt +++ b/src/main/kotlin/net/sjrx/intellij/plugins/systemdunitfiles/semanticdata/optionvalues/grammar/FlexibleLiteralChoiceTerminal.kt @@ -91,7 +91,8 @@ class FlexibleLiteralChoiceTerminal(vararg val choices: String) : TerminalCombin return NoMatch.copy(longestMatch = offset) } - override fun parse(value: String, offset: Int): Sequence { + override fun parse(value: String, offset: Int, frontier: Frontier): Sequence { + frontier.reached(offset, this) // Lenient shape match (so a wrong token like AF_BOGUS still matches and can be highlighted), // valid only if the matched text is one of the exact choices. val m = syntaticMatch.matchAt(value, offset) ?: return emptySequence() diff --git a/src/main/kotlin/net/sjrx/intellij/plugins/systemdunitfiles/semanticdata/optionvalues/grammar/Frontier.kt b/src/main/kotlin/net/sjrx/intellij/plugins/systemdunitfiles/semanticdata/optionvalues/grammar/Frontier.kt new file mode 100644 index 0000000..d081b64 --- /dev/null +++ b/src/main/kotlin/net/sjrx/intellij/plugins/systemdunitfiles/semanticdata/optionvalues/grammar/Frontier.kt @@ -0,0 +1,42 @@ +package net.sjrx.intellij.plugins.systemdunitfiles.semanticdata.optionvalues.grammar + +/** + * The "frontier" — a high-water mark recorder threaded through parse() (#467 step 3). + * + * parse() only ever returns *successful* matches, so when a value is malformed the failing paths + * vanish and we lose all trace of how far we got. The frontier is a side-channel that survives + * failure: every leaf matcher (a terminal, or EOF) reports itself here when it is consulted at an + * offset, and the frontier keeps only the DEEPEST offset reached and the set of matchers wanted + * there. + * + * That gives two things from one mechanism: + * - error localization: the deepest offset is where parsing got stuck, and `expected` is what + * would have been valid there; + * - the seed of completion (#343): "what could come next at this position?" is the same question. + * + * It is mutable and shared across the whole (lazy) exploration of a single value on purpose — it is + * the global deepest-reach across every path tried. + */ +class Frontier { + /** The deepest offset at which any leaf matcher was consulted. */ + var position: Int = 0 + private set + + private val expectedAtPosition = linkedSetOf() + + /** The matchers consulted at [position] — i.e. what the grammar was hoping to see there. */ + val expected: Set get() = expectedAtPosition + + /** Record that [matcher] was consulted at [offset]. Only the deepest offset's matchers are kept. */ + fun reached(offset: Int, matcher: Combinator) { + when { + offset > position -> { + position = offset + expectedAtPosition.clear() + expectedAtPosition.add(matcher) + } + offset == position -> expectedAtPosition.add(matcher) + // offset < position: a shallower path, ignore. + } + } +} diff --git a/src/main/kotlin/net/sjrx/intellij/plugins/systemdunitfiles/semanticdata/optionvalues/grammar/IntegerTerminal.kt b/src/main/kotlin/net/sjrx/intellij/plugins/systemdunitfiles/semanticdata/optionvalues/grammar/IntegerTerminal.kt index 744b5a3..45e4356 100644 --- a/src/main/kotlin/net/sjrx/intellij/plugins/systemdunitfiles/semanticdata/optionvalues/grammar/IntegerTerminal.kt +++ b/src/main/kotlin/net/sjrx/intellij/plugins/systemdunitfiles/semanticdata/optionvalues/grammar/IntegerTerminal.kt @@ -29,7 +29,8 @@ class IntegerTerminal(private val minInclusive: Long,private val maxExclusive: L } } - override fun parse(value: String, offset: Int): Sequence { + override fun parse(value: String, offset: Int, frontier: Frontier): Sequence { + frontier.reached(offset, this) val m = intRegex.matchAt(value, offset) ?: return emptySequence() val text = m.value // Lenient: any integer matches (so we can locate it); valid only if it is within range. diff --git a/src/main/kotlin/net/sjrx/intellij/plugins/systemdunitfiles/semanticdata/optionvalues/grammar/LiteralChoiceTerminal.kt b/src/main/kotlin/net/sjrx/intellij/plugins/systemdunitfiles/semanticdata/optionvalues/grammar/LiteralChoiceTerminal.kt index 17d6fbf..e7ae2cb 100644 --- a/src/main/kotlin/net/sjrx/intellij/plugins/systemdunitfiles/semanticdata/optionvalues/grammar/LiteralChoiceTerminal.kt +++ b/src/main/kotlin/net/sjrx/intellij/plugins/systemdunitfiles/semanticdata/optionvalues/grammar/LiteralChoiceTerminal.kt @@ -24,11 +24,13 @@ class LiteralChoiceTerminal(vararg var choices: String) : TerminalCombinator { return match(value, offset) } - override fun parse(value: String, offset: Int): Sequence = + override fun parse(value: String, offset: Int, frontier: Frontier): Sequence { + frontier.reached(offset, this) // Offer every choice that matches here (e.g. both ":" and "::"); each is always strictly valid. - choices.asSequence() + return choices.asSequence() .filter { value.startsWith(it, offset) } .map { Parse(offset + it.length, listOf(ParsedToken(offset, offset + it.length, it, this, valid = true))) } + } override fun toString(): String { return if (choices.size == 1) { diff --git a/src/main/kotlin/net/sjrx/intellij/plugins/systemdunitfiles/semanticdata/optionvalues/grammar/OneOrMore.kt b/src/main/kotlin/net/sjrx/intellij/plugins/systemdunitfiles/semanticdata/optionvalues/grammar/OneOrMore.kt index fc999cd..e9d7518 100644 --- a/src/main/kotlin/net/sjrx/intellij/plugins/systemdunitfiles/semanticdata/optionvalues/grammar/OneOrMore.kt +++ b/src/main/kotlin/net/sjrx/intellij/plugins/systemdunitfiles/semanticdata/optionvalues/grammar/OneOrMore.kt @@ -40,15 +40,15 @@ class OneOrMore(val combinator : Combinator) : Combinator { return match(value, offset, combinator::SemanticMatch) } - override fun parse(value: String, offset: Int): Sequence { + override fun parse(value: String, offset: Int, frontier: Frontier): Sequence { // Same as ZeroOrMore, but the first repetition is mandatory (and must make progress). fun extend(from: Parse): Sequence = sequence { yield(from) - for (step in combinator.parse(value, from.end)) { + for (step in combinator.parse(value, from.end, frontier)) { if (step.end > from.end) yieldAll(extend(Parse(step.end, from.tokens + step.tokens))) } } - return combinator.parse(value, offset).filter { it.end > offset }.flatMap { extend(it) } + return combinator.parse(value, offset, frontier).filter { it.end > offset }.flatMap { extend(it) } } override fun toString(): String = toStringIndented(0) diff --git a/src/main/kotlin/net/sjrx/intellij/plugins/systemdunitfiles/semanticdata/optionvalues/grammar/Parse.kt b/src/main/kotlin/net/sjrx/intellij/plugins/systemdunitfiles/semanticdata/optionvalues/grammar/Parse.kt index aa3c389..8e63d2c 100644 --- a/src/main/kotlin/net/sjrx/intellij/plugins/systemdunitfiles/semanticdata/optionvalues/grammar/Parse.kt +++ b/src/main/kotlin/net/sjrx/intellij/plugins/systemdunitfiles/semanticdata/optionvalues/grammar/Parse.kt @@ -34,8 +34,12 @@ sealed interface ParseOutcome { /** A path consumed the whole value, but a token is not strictly valid (well-formed but wrong). */ data class SemanticError(val badToken: ParsedToken) : ParseOutcome - /** No path consumed the whole value. [furthest] is how far any path got (for error localization). */ - data class SyntaxError(val furthest: Int) : ParseOutcome + /** + * No path consumed the whole value. [furthest] is the deepest offset any path reached, and + * [expected] is the set of matchers the grammar was hoping to see there (for error localization, + * and the seed of completion). + */ + data class SyntaxError(val furthest: Int, val expected: Set) : ParseOutcome } /** Every way [this] grammar can consume the entire [value]. */ @@ -46,15 +50,19 @@ fun Combinator.fullParses(value: String): Sequence = * One lenient parse answers both questions the old two passes did: * - syntactic ("could be this, color it"): did any path consume the whole value? * - semantic ("actually valid"): did any such path use only valid tokens? + * + * A single shared [Frontier] rides along, so a SyntaxError can report where parsing got stuck. */ fun Combinator.validate(value: String): ParseOutcome { + val frontier = Frontier() var firstBad: ParsedToken? = null - for (p in fullParses(value)) { + for (p in parse(value, 0, frontier)) { + if (p.end != value.length) continue val bad = p.tokens.firstOrNull { !it.valid } if (bad == null) return ParseOutcome.Valid // short-circuit on the first fully-valid full parse if (firstBad == null) firstBad = bad } if (firstBad != null) return ParseOutcome.SemanticError(firstBad) - val furthest = parse(value, 0).maxOfOrNull { it.end } ?: 0 - return ParseOutcome.SyntaxError(furthest) + // No full parse: exhausting the loop above has populated the frontier with the deepest reach. + return ParseOutcome.SyntaxError(frontier.position, frontier.expected) } diff --git a/src/main/kotlin/net/sjrx/intellij/plugins/systemdunitfiles/semanticdata/optionvalues/grammar/RegexTerminal.kt b/src/main/kotlin/net/sjrx/intellij/plugins/systemdunitfiles/semanticdata/optionvalues/grammar/RegexTerminal.kt index f31b854..c338b7f 100644 --- a/src/main/kotlin/net/sjrx/intellij/plugins/systemdunitfiles/semanticdata/optionvalues/grammar/RegexTerminal.kt +++ b/src/main/kotlin/net/sjrx/intellij/plugins/systemdunitfiles/semanticdata/optionvalues/grammar/RegexTerminal.kt @@ -18,7 +18,8 @@ class RegexTerminal(syntaticMatchStr : String, semanticMatchStr: String ) : Term return MatchResult(listOf(matchResult.value), offset + matchResult.value.length, listOf(this), offset + matchResult.value.length) } - override fun parse(value: String, offset: Int): Sequence { + override fun parse(value: String, offset: Int, frontier: Frontier): Sequence { + frontier.reached(offset, this) // The syntactic regex gives the lenient span; valid iff the semantic regex matches that same span. val syn = syntaticMatch.matchAt(value, offset) ?: return emptySequence() val text = syn.value diff --git a/src/main/kotlin/net/sjrx/intellij/plugins/systemdunitfiles/semanticdata/optionvalues/grammar/Repeat.kt b/src/main/kotlin/net/sjrx/intellij/plugins/systemdunitfiles/semanticdata/optionvalues/grammar/Repeat.kt index 3cec7fa..c624e6f 100644 --- a/src/main/kotlin/net/sjrx/intellij/plugins/systemdunitfiles/semanticdata/optionvalues/grammar/Repeat.kt +++ b/src/main/kotlin/net/sjrx/intellij/plugins/systemdunitfiles/semanticdata/optionvalues/grammar/Repeat.kt @@ -62,13 +62,13 @@ class Repeat(val combinator : Combinator, val minInclusive: Int, val maxExclusiv return match(value, offset, combinator::SemanticMatch) } - override fun parse(value: String, offset: Int): Sequence { + override fun parse(value: String, offset: Int, frontier: Frontier): Sequence { // Offer every repetition count in [minInclusive, maxExclusive] (maxExclusive is the cap on the // count, mirroring the existing match() loop). Yield only once enough repetitions have happened. fun extend(from: Parse, count: Int): Sequence = sequence { if (count >= minInclusive) yield(from) if (count < maxExclusive) { - for (step in combinator.parse(value, from.end)) { + for (step in combinator.parse(value, from.end, frontier)) { if (step.end > from.end) yieldAll(extend(Parse(step.end, from.tokens + step.tokens), count + 1)) } } diff --git a/src/main/kotlin/net/sjrx/intellij/plugins/systemdunitfiles/semanticdata/optionvalues/grammar/SequenceCombinator.kt b/src/main/kotlin/net/sjrx/intellij/plugins/systemdunitfiles/semanticdata/optionvalues/grammar/SequenceCombinator.kt index c959fd5..0692677 100644 --- a/src/main/kotlin/net/sjrx/intellij/plugins/systemdunitfiles/semanticdata/optionvalues/grammar/SequenceCombinator.kt +++ b/src/main/kotlin/net/sjrx/intellij/plugins/systemdunitfiles/semanticdata/optionvalues/grammar/SequenceCombinator.kt @@ -55,12 +55,12 @@ open class SequenceCombinator(vararg val tokens: Combinator) : Combinator { return MatchResult(resultTokens, index, resultTerminals, maxLength) } - override fun parse(value: String, offset: Int): Sequence { + override fun parse(value: String, offset: Int, frontier: Frontier): Sequence { // Thread each possibility of one part into the next: the cartesian product of the parts. var results = sequenceOf(Parse(offset, emptyList())) for (token in tokens) { results = results.flatMap { acc -> - token.parse(value, acc.end).map { next -> Parse(next.end, acc.tokens + next.tokens) } + token.parse(value, acc.end, frontier).map { next -> Parse(next.end, acc.tokens + next.tokens) } } } return results diff --git a/src/main/kotlin/net/sjrx/intellij/plugins/systemdunitfiles/semanticdata/optionvalues/grammar/WhitespaceTerminal.kt b/src/main/kotlin/net/sjrx/intellij/plugins/systemdunitfiles/semanticdata/optionvalues/grammar/WhitespaceTerminal.kt index cf9be72..541a4ba 100644 --- a/src/main/kotlin/net/sjrx/intellij/plugins/systemdunitfiles/semanticdata/optionvalues/grammar/WhitespaceTerminal.kt +++ b/src/main/kotlin/net/sjrx/intellij/plugins/systemdunitfiles/semanticdata/optionvalues/grammar/WhitespaceTerminal.kt @@ -27,7 +27,8 @@ class WhitespaceTerminal : TerminalCombinator { return match(value, offset) } - override fun parse(value: String, offset: Int): Sequence { + override fun parse(value: String, offset: Int, frontier: Frontier): Sequence { + frontier.reached(offset, this) var end = offset while (end < value.length && value[end].isWhitespace()) end++ return if (end == offset) emptySequence() diff --git a/src/main/kotlin/net/sjrx/intellij/plugins/systemdunitfiles/semanticdata/optionvalues/grammar/ZeroOrMore.kt b/src/main/kotlin/net/sjrx/intellij/plugins/systemdunitfiles/semanticdata/optionvalues/grammar/ZeroOrMore.kt index 0b68640..c94bf12 100644 --- a/src/main/kotlin/net/sjrx/intellij/plugins/systemdunitfiles/semanticdata/optionvalues/grammar/ZeroOrMore.kt +++ b/src/main/kotlin/net/sjrx/intellij/plugins/systemdunitfiles/semanticdata/optionvalues/grammar/ZeroOrMore.kt @@ -42,12 +42,12 @@ class ZeroOrMore(val combinator : Combinator) : Combinator { return match(value, offset, combinator::SemanticMatch) } - override fun parse(value: String, offset: Int): Sequence { + override fun parse(value: String, offset: Int, frontier: Frontier): Sequence { // Offer EVERY repetition count (0, 1, 2, ...), not just the greedy maximum. The `> from.end` // guard keeps an inner matcher that can match empty from looping forever. fun extend(from: Parse): Sequence = sequence { yield(from) // stop repeating here... - for (step in combinator.parse(value, from.end)) { + for (step in combinator.parse(value, from.end, frontier)) { if (step.end > from.end) yieldAll(extend(Parse(step.end, from.tokens + step.tokens))) // ...or take one more } } diff --git a/src/main/kotlin/net/sjrx/intellij/plugins/systemdunitfiles/semanticdata/optionvalues/grammar/ZeroOrOne.kt b/src/main/kotlin/net/sjrx/intellij/plugins/systemdunitfiles/semanticdata/optionvalues/grammar/ZeroOrOne.kt index 279026b..6305bf4 100644 --- a/src/main/kotlin/net/sjrx/intellij/plugins/systemdunitfiles/semanticdata/optionvalues/grammar/ZeroOrOne.kt +++ b/src/main/kotlin/net/sjrx/intellij/plugins/systemdunitfiles/semanticdata/optionvalues/grammar/ZeroOrOne.kt @@ -40,9 +40,9 @@ class ZeroOrOne(val combinator : Combinator) : Combinator { return match(value, offset, combinator::SemanticMatch) } - override fun parse(value: String, offset: Int): Sequence = + override fun parse(value: String, offset: Int, frontier: Frontier): Sequence = // Both the empty match and whatever the inner matcher offers. - sequenceOf(Parse(offset, emptyList())) + combinator.parse(value, offset) + sequenceOf(Parse(offset, emptyList())) + combinator.parse(value, offset, frontier) override fun toString(): String = toStringIndented(0) diff --git a/src/test/kotlin/net/sjrx/intellij/plugins/systemdunitfiles/semanticdata/optionvalues/grammar/ParseTest.kt b/src/test/kotlin/net/sjrx/intellij/plugins/systemdunitfiles/semanticdata/optionvalues/grammar/ParseTest.kt index 8e91dab..9e6b8ce 100644 --- a/src/test/kotlin/net/sjrx/intellij/plugins/systemdunitfiles/semanticdata/optionvalues/grammar/ParseTest.kt +++ b/src/test/kotlin/net/sjrx/intellij/plugins/systemdunitfiles/semanticdata/optionvalues/grammar/ParseTest.kt @@ -2,6 +2,7 @@ package net.sjrx.intellij.plugins.systemdunitfiles.semanticdata.optionvalues.gra import net.sjrx.intellij.plugins.systemdunitfiles.semanticdata.optionvalues.ai.ConfigParseAddressFamiliesOptionValue import org.junit.Assert.assertEquals +import org.junit.Assert.assertNotNull import org.junit.Assert.assertTrue import org.junit.Test @@ -44,13 +45,26 @@ class ParseTest { assertEquals("AF_BOGUS", (semantic as ParseOutcome.SemanticError).badToken.text) // Comma breaks the shape after "AF_INET" -> syntax error (malformed, not just an unknown name). - val syntax = grammar.validate("AF_INET, AF_INET6") - assertTrue(syntax is ParseOutcome.SyntaxError) - // KNOWN LIMITATION: `furthest` is best-effort. This grammar ends in EOF(), so the outer - // Seq(..., EOF()) drops the partial "AF_INET" path when EOF fails, and furthest collapses to 0 - // (we'd want 7). Precise localization needs the frontier/expected-set layer — the same machinery - // that powers completion (#343) — which is deliberately not in this step. Pinned to document it. - assertEquals(0, (syntax as ParseOutcome.SyntaxError).furthest) + // Thanks to the frontier layer we now report WHERE it got stuck (offset 7, the comma) and WHAT + // was expected there: another whitespace-separated family, or end-of-input. + val syntax = grammar.validate("AF_INET, AF_INET6") as? ParseOutcome.SyntaxError + assertNotNull(syntax) + assertEquals(7, syntax!!.furthest) + assertTrue(syntax.expected.any { it is WhitespaceTerminal }) + assertTrue(syntax.expected.any { it is EOF }) + } + + @Test + fun testFrontierSeedsCompletionAtStart() { + // The frontier's "expected set" is exactly what completion (#343) needs: at the caret position, + // which tokens could legally come next? For the empty value at offset 0, the grammar expects + // "none", the "~" inversion prefix, or an address-family name. + val grammar = ConfigParseAddressFamiliesOptionValue().combinator + val outcome = grammar.validate("") as? ParseOutcome.SyntaxError + assertNotNull(outcome) + assertEquals(0, outcome!!.furthest) + assertTrue(outcome.expected.any { it is FlexibleLiteralChoiceTerminal }) // the AF_* names + assertTrue(outcome.expected.any { it is LiteralChoiceTerminal }) // "none" / "~" } @Test