Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -38,9 +38,9 @@ open class AlternativeCombinator(vararg val tokens: Combinator) : Combinator {
return match(value, offset, Combinator::SemanticMatch)
}

override fun parse(value: String, offset: Int): Sequence<Parse> =
override fun parse(value: String, offset: Int, frontier: Frontier): Sequence<Parse> =
// Offer every alternative's matches, so the order of options no longer affects correctness.
tokens.asSequence().flatMap { it.parse(value, offset) }
tokens.asSequence().flatMap { it.parse(value, offset, frontier) }

override fun toString(): String = toStringIndented(0)

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -40,8 +40,12 @@ interface Combinator {
* carries a `valid` flag for the strict (semantic) check. Because every alternative is offered
* rather than the first greedy one committed to, matching is complete — e.g.
* Seq(ZeroOrMore("a"), "a") on "aa" matches, because ZeroOrMore offers the shorter match too.
*
* [frontier] records the deepest offset reached and what was expected there, so that even when no
* path succeeds we can localize the error (and, later, drive completion). Combinators thread the
* same instance into their children; leaf matchers report themselves to it.
*/
fun parse(value: String, offset: Int): Sequence<Parse>
fun parse(value: String, offset: Int, frontier: Frontier = Frontier()): Sequence<Parse>

fun toStringIndented(indent: Int): String
}
Original file line number Diff line number Diff line change
Expand Up @@ -17,8 +17,10 @@ class EOF : Combinator {
}
}

override fun parse(value: String, offset: Int): Sequence<Parse> =
if (offset == value.length) sequenceOf(Parse(offset, emptyList())) else emptySequence()
override fun parse(value: String, offset: Int, frontier: Frontier): Sequence<Parse> {
frontier.reached(offset, this) // we expect end-of-input here
return if (offset == value.length) sequenceOf(Parse(offset, emptyList())) else emptySequence()
}

override fun toStringIndented(indent: Int): String {
return "EOF"
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -91,7 +91,8 @@ class FlexibleLiteralChoiceTerminal(vararg val choices: String) : TerminalCombin
return NoMatch.copy(longestMatch = offset)
}

override fun parse(value: String, offset: Int): Sequence<Parse> {
override fun parse(value: String, offset: Int, frontier: Frontier): Sequence<Parse> {
frontier.reached(offset, this)
// Lenient shape match (so a wrong token like AF_BOGUS still matches and can be highlighted),
// valid only if the matched text is one of the exact choices.
val m = syntaticMatch.matchAt(value, offset) ?: return emptySequence()
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
package net.sjrx.intellij.plugins.systemdunitfiles.semanticdata.optionvalues.grammar

/**
* The "frontier" — a high-water mark recorder threaded through parse() (#467 step 3).
*
* parse() only ever returns *successful* matches, so when a value is malformed the failing paths
* vanish and we lose all trace of how far we got. The frontier is a side-channel that survives
* failure: every leaf matcher (a terminal, or EOF) reports itself here when it is consulted at an
* offset, and the frontier keeps only the DEEPEST offset reached and the set of matchers wanted
* there.
*
* That gives two things from one mechanism:
* - error localization: the deepest offset is where parsing got stuck, and `expected` is what
* would have been valid there;
* - the seed of completion (#343): "what could come next at this position?" is the same question.
*
* It is mutable and shared across the whole (lazy) exploration of a single value on purpose — it is
* the global deepest-reach across every path tried.
*/
class Frontier {
/** The deepest offset at which any leaf matcher was consulted. */
var position: Int = 0
private set

private val expectedAtPosition = linkedSetOf<Combinator>()

/** The matchers consulted at [position] — i.e. what the grammar was hoping to see there. */
val expected: Set<Combinator> get() = expectedAtPosition

/** Record that [matcher] was consulted at [offset]. Only the deepest offset's matchers are kept. */
fun reached(offset: Int, matcher: Combinator) {
when {
offset > position -> {
position = offset
expectedAtPosition.clear()
expectedAtPosition.add(matcher)
}
offset == position -> expectedAtPosition.add(matcher)
// offset < position: a shallower path, ignore.
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,8 @@ class IntegerTerminal(private val minInclusive: Long,private val maxExclusive: L
}
}

override fun parse(value: String, offset: Int): Sequence<Parse> {
override fun parse(value: String, offset: Int, frontier: Frontier): Sequence<Parse> {
frontier.reached(offset, this)
val m = intRegex.matchAt(value, offset) ?: return emptySequence()
val text = m.value
// Lenient: any integer matches (so we can locate it); valid only if it is within range.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -24,11 +24,13 @@ class LiteralChoiceTerminal(vararg var choices: String) : TerminalCombinator {
return match(value, offset)
}

override fun parse(value: String, offset: Int): Sequence<Parse> =
override fun parse(value: String, offset: Int, frontier: Frontier): Sequence<Parse> {
frontier.reached(offset, this)
// Offer every choice that matches here (e.g. both ":" and "::"); each is always strictly valid.
choices.asSequence()
return choices.asSequence()
.filter { value.startsWith(it, offset) }
.map { Parse(offset + it.length, listOf(ParsedToken(offset, offset + it.length, it, this, valid = true))) }
}

override fun toString(): String {
return if (choices.size == 1) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -40,15 +40,15 @@ class OneOrMore(val combinator : Combinator) : Combinator {
return match(value, offset, combinator::SemanticMatch)
}

override fun parse(value: String, offset: Int): Sequence<Parse> {
override fun parse(value: String, offset: Int, frontier: Frontier): Sequence<Parse> {
// Same as ZeroOrMore, but the first repetition is mandatory (and must make progress).
fun extend(from: Parse): Sequence<Parse> = sequence {
yield(from)
for (step in combinator.parse(value, from.end)) {
for (step in combinator.parse(value, from.end, frontier)) {
if (step.end > from.end) yieldAll(extend(Parse(step.end, from.tokens + step.tokens)))
}
}
return combinator.parse(value, offset).filter { it.end > offset }.flatMap { extend(it) }
return combinator.parse(value, offset, frontier).filter { it.end > offset }.flatMap { extend(it) }
}

override fun toString(): String = toStringIndented(0)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -34,8 +34,12 @@ sealed interface ParseOutcome {
/** A path consumed the whole value, but a token is not strictly valid (well-formed but wrong). */
data class SemanticError(val badToken: ParsedToken) : ParseOutcome

/** No path consumed the whole value. [furthest] is how far any path got (for error localization). */
data class SyntaxError(val furthest: Int) : ParseOutcome
/**
* No path consumed the whole value. [furthest] is the deepest offset any path reached, and
* [expected] is the set of matchers the grammar was hoping to see there (for error localization,
* and the seed of completion).
*/
data class SyntaxError(val furthest: Int, val expected: Set<Combinator>) : ParseOutcome
}

/** Every way [this] grammar can consume the entire [value]. */
Expand All @@ -46,15 +50,19 @@ fun Combinator.fullParses(value: String): Sequence<Parse> =
* One lenient parse answers both questions the old two passes did:
* - syntactic ("could be this, color it"): did any path consume the whole value?
* - semantic ("actually valid"): did any such path use only valid tokens?
*
* A single shared [Frontier] rides along, so a SyntaxError can report where parsing got stuck.
*/
fun Combinator.validate(value: String): ParseOutcome {
val frontier = Frontier()
var firstBad: ParsedToken? = null
for (p in fullParses(value)) {
for (p in parse(value, 0, frontier)) {
if (p.end != value.length) continue
val bad = p.tokens.firstOrNull { !it.valid }
if (bad == null) return ParseOutcome.Valid // short-circuit on the first fully-valid full parse
if (firstBad == null) firstBad = bad
}
if (firstBad != null) return ParseOutcome.SemanticError(firstBad)
val furthest = parse(value, 0).maxOfOrNull { it.end } ?: 0
return ParseOutcome.SyntaxError(furthest)
// No full parse: exhausting the loop above has populated the frontier with the deepest reach.
return ParseOutcome.SyntaxError(frontier.position, frontier.expected)
}
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,8 @@ class RegexTerminal(syntaticMatchStr : String, semanticMatchStr: String ) : Term
return MatchResult(listOf(matchResult.value), offset + matchResult.value.length, listOf(this), offset + matchResult.value.length)
}

override fun parse(value: String, offset: Int): Sequence<Parse> {
override fun parse(value: String, offset: Int, frontier: Frontier): Sequence<Parse> {
frontier.reached(offset, this)
// The syntactic regex gives the lenient span; valid iff the semantic regex matches that same span.
val syn = syntaticMatch.matchAt(value, offset) ?: return emptySequence()
val text = syn.value
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -62,13 +62,13 @@ class Repeat(val combinator : Combinator, val minInclusive: Int, val maxExclusiv
return match(value, offset, combinator::SemanticMatch)
}

override fun parse(value: String, offset: Int): Sequence<Parse> {
override fun parse(value: String, offset: Int, frontier: Frontier): Sequence<Parse> {
// Offer every repetition count in [minInclusive, maxExclusive] (maxExclusive is the cap on the
// count, mirroring the existing match() loop). Yield only once enough repetitions have happened.
fun extend(from: Parse, count: Int): Sequence<Parse> = sequence {
if (count >= minInclusive) yield(from)
if (count < maxExclusive) {
for (step in combinator.parse(value, from.end)) {
for (step in combinator.parse(value, from.end, frontier)) {
if (step.end > from.end) yieldAll(extend(Parse(step.end, from.tokens + step.tokens), count + 1))
}
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -55,12 +55,12 @@ open class SequenceCombinator(vararg val tokens: Combinator) : Combinator {
return MatchResult(resultTokens, index, resultTerminals, maxLength)
}

override fun parse(value: String, offset: Int): Sequence<Parse> {
override fun parse(value: String, offset: Int, frontier: Frontier): Sequence<Parse> {
// Thread each possibility of one part into the next: the cartesian product of the parts.
var results = sequenceOf(Parse(offset, emptyList()))
for (token in tokens) {
results = results.flatMap { acc ->
token.parse(value, acc.end).map { next -> Parse(next.end, acc.tokens + next.tokens) }
token.parse(value, acc.end, frontier).map { next -> Parse(next.end, acc.tokens + next.tokens) }
}
}
return results
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,8 @@ class WhitespaceTerminal : TerminalCombinator {
return match(value, offset)
}

override fun parse(value: String, offset: Int): Sequence<Parse> {
override fun parse(value: String, offset: Int, frontier: Frontier): Sequence<Parse> {
frontier.reached(offset, this)
var end = offset
while (end < value.length && value[end].isWhitespace()) end++
return if (end == offset) emptySequence()
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -42,12 +42,12 @@ class ZeroOrMore(val combinator : Combinator) : Combinator {
return match(value, offset, combinator::SemanticMatch)
}

override fun parse(value: String, offset: Int): Sequence<Parse> {
override fun parse(value: String, offset: Int, frontier: Frontier): Sequence<Parse> {
// Offer EVERY repetition count (0, 1, 2, ...), not just the greedy maximum. The `> from.end`
// guard keeps an inner matcher that can match empty from looping forever.
fun extend(from: Parse): Sequence<Parse> = sequence {
yield(from) // stop repeating here...
for (step in combinator.parse(value, from.end)) {
for (step in combinator.parse(value, from.end, frontier)) {
if (step.end > from.end) yieldAll(extend(Parse(step.end, from.tokens + step.tokens))) // ...or take one more
}
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -40,9 +40,9 @@ class ZeroOrOne(val combinator : Combinator) : Combinator {
return match(value, offset, combinator::SemanticMatch)
}

override fun parse(value: String, offset: Int): Sequence<Parse> =
override fun parse(value: String, offset: Int, frontier: Frontier): Sequence<Parse> =
// Both the empty match and whatever the inner matcher offers.
sequenceOf(Parse(offset, emptyList())) + combinator.parse(value, offset)
sequenceOf(Parse(offset, emptyList())) + combinator.parse(value, offset, frontier)

override fun toString(): String = toStringIndented(0)

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@ package net.sjrx.intellij.plugins.systemdunitfiles.semanticdata.optionvalues.gra

import net.sjrx.intellij.plugins.systemdunitfiles.semanticdata.optionvalues.ai.ConfigParseAddressFamiliesOptionValue
import org.junit.Assert.assertEquals
import org.junit.Assert.assertNotNull
import org.junit.Assert.assertTrue
import org.junit.Test

Expand Down Expand Up @@ -44,13 +45,26 @@ class ParseTest {
assertEquals("AF_BOGUS", (semantic as ParseOutcome.SemanticError).badToken.text)

// Comma breaks the shape after "AF_INET" -> syntax error (malformed, not just an unknown name).
val syntax = grammar.validate("AF_INET, AF_INET6")
assertTrue(syntax is ParseOutcome.SyntaxError)
// KNOWN LIMITATION: `furthest` is best-effort. This grammar ends in EOF(), so the outer
// Seq(..., EOF()) drops the partial "AF_INET" path when EOF fails, and furthest collapses to 0
// (we'd want 7). Precise localization needs the frontier/expected-set layer — the same machinery
// that powers completion (#343) — which is deliberately not in this step. Pinned to document it.
assertEquals(0, (syntax as ParseOutcome.SyntaxError).furthest)
// Thanks to the frontier layer we now report WHERE it got stuck (offset 7, the comma) and WHAT
// was expected there: another whitespace-separated family, or end-of-input.
val syntax = grammar.validate("AF_INET, AF_INET6") as? ParseOutcome.SyntaxError
assertNotNull(syntax)
assertEquals(7, syntax!!.furthest)
assertTrue(syntax.expected.any { it is WhitespaceTerminal })
assertTrue(syntax.expected.any { it is EOF })
}

@Test
fun testFrontierSeedsCompletionAtStart() {
// The frontier's "expected set" is exactly what completion (#343) needs: at the caret position,
// which tokens could legally come next? For the empty value at offset 0, the grammar expects
// "none", the "~" inversion prefix, or an address-family name.
val grammar = ConfigParseAddressFamiliesOptionValue().combinator
val outcome = grammar.validate("") as? ParseOutcome.SyntaxError
assertNotNull(outcome)
assertEquals(0, outcome!!.furthest)
assertTrue(outcome.expected.any { it is FlexibleLiteralChoiceTerminal }) // the AF_* names
assertTrue(outcome.expected.any { it is LiteralChoiceTerminal }) // "none" / "~"
}

@Test
Expand Down
Loading