Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -38,9 +38,10 @@ open class AlternativeCombinator(vararg val tokens: Combinator) : Combinator {
return match(value, offset, Combinator::SemanticMatch)
}

override fun parse(value: String, offset: Int, frontier: Frontier): Sequence<Parse> =
// Offer every alternative's matches, so the order of options no longer affects correctness.
tokens.asSequence().flatMap { it.parse(value, offset, frontier) }
override fun parse(value: String, offset: Int): Sequence<ParseStep> =
// Offer every alternative's steps (matches and dead ends), so option order no longer affects
// correctness, and a failing branch still contributes what it expected.
tokens.asSequence().flatMap { it.parse(value, offset) }

override fun toString(): String = toStringIndented(0)

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -33,19 +33,20 @@ interface Combinator {
fun SemanticMatch(value : String, offset: Int): MatchResult

/**
* List-of-successes matcher (#467). Returns EVERY way this combinator can consume [value]
* starting at [offset], lazily; an empty sequence means no match.
* List-of-successes matcher (#467). Returns EVERY way this combinator can proceed from [offset] in
* [value], lazily, as a stream of [ParseStep]s: a [Parse] for each way it matched, and a [Stuck]
* for each dead end (carrying where it got stuck and what was expected there).
*
* This lives alongside Syntactic/SemanticMatch and is a single lenient pass: each [ParsedToken]
* carries a `valid` flag for the strict (semantic) check. Because every alternative is offered
* rather than the first greedy one committed to, matching is complete — e.g.
* Seq(ZeroOrMore("a"), "a") on "aa" matches, because ZeroOrMore offers the shorter match too.
*
* [frontier] records the deepest offset reached and what was expected there, so that even when no
* path succeeds we can localize the error (and, later, drive completion). Combinators thread the
* same instance into their children; leaf matchers report themselves to it.
* Returning [Stuck] as a value (rather than an empty sequence) means failure information — how far
* we got and what we expected — travels back through the return value, so no side channel is
* needed to localize errors.
*/
fun parse(value: String, offset: Int, frontier: Frontier = Frontier()): Sequence<Parse>
fun parse(value: String, offset: Int): Sequence<ParseStep>

fun toStringIndented(indent: Int): String
}
Original file line number Diff line number Diff line change
Expand Up @@ -17,10 +17,9 @@ class EOF : Combinator {
}
}

override fun parse(value: String, offset: Int, frontier: Frontier): Sequence<Parse> {
frontier.reached(offset, this) // we expect end-of-input here
return if (offset == value.length) sequenceOf(Parse(offset, emptyList())) else emptySequence()
}
override fun parse(value: String, offset: Int): Sequence<ParseStep> =
if (offset == value.length) sequenceOf(Parse(offset, emptyList()))
else sequenceOf(Stuck(offset, setOf(this))) // expected end-of-input here

override fun toStringIndented(indent: Int): String {
return "EOF"
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -91,11 +91,10 @@ class FlexibleLiteralChoiceTerminal(vararg val choices: String) : TerminalCombin
return NoMatch.copy(longestMatch = offset)
}

override fun parse(value: String, offset: Int, frontier: Frontier): Sequence<Parse> {
frontier.reached(offset, this)
override fun parse(value: String, offset: Int): Sequence<ParseStep> {
// Lenient shape match (so a wrong token like AF_BOGUS still matches and can be highlighted),
// valid only if the matched text is one of the exact choices.
val m = syntaticMatch.matchAt(value, offset) ?: return emptySequence()
val m = syntaticMatch.matchAt(value, offset) ?: return sequenceOf(Stuck(offset, setOf(this)))
val text = m.value
val valid = choices.any { it == text }
return sequenceOf(Parse(offset + text.length, listOf(ParsedToken(offset, offset + text.length, text, this, valid))))
Expand Down

This file was deleted.

Original file line number Diff line number Diff line change
Expand Up @@ -29,9 +29,8 @@ class IntegerTerminal(private val minInclusive: Long,private val maxExclusive: L
}
}

override fun parse(value: String, offset: Int, frontier: Frontier): Sequence<Parse> {
frontier.reached(offset, this)
val m = intRegex.matchAt(value, offset) ?: return emptySequence()
override fun parse(value: String, offset: Int): Sequence<ParseStep> {
val m = intRegex.matchAt(value, offset) ?: return sequenceOf(Stuck(offset, setOf(this)))
val text = m.value
// Lenient: any integer matches (so we can locate it); valid only if it is within range.
val valid = text.toLongOrNull()?.let { it >= minInclusive && it < maxExclusive } ?: false
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -24,11 +24,11 @@ class LiteralChoiceTerminal(vararg var choices: String) : TerminalCombinator {
return match(value, offset)
}

override fun parse(value: String, offset: Int, frontier: Frontier): Sequence<Parse> {
frontier.reached(offset, this)
override fun parse(value: String, offset: Int): Sequence<ParseStep> {
// Offer every choice that matches here (e.g. both ":" and "::"); each is always strictly valid.
return choices.asSequence()
.filter { value.startsWith(it, offset) }
val matches = choices.filter { value.startsWith(it, offset) }
return if (matches.isEmpty()) sequenceOf(Stuck(offset, setOf(this)))
else matches.asSequence()
.map { Parse(offset + it.length, listOf(ParsedToken(offset, offset + it.length, it, this, valid = true))) }
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -40,15 +40,23 @@ class OneOrMore(val combinator : Combinator) : Combinator {
return match(value, offset, combinator::SemanticMatch)
}

override fun parse(value: String, offset: Int, frontier: Frontier): Sequence<Parse> {
override fun parse(value: String, offset: Int): Sequence<ParseStep> {
// Same as ZeroOrMore, but the first repetition is mandatory (and must make progress).
fun extend(from: Parse): Sequence<Parse> = sequence {
fun extend(from: Parse): Sequence<ParseStep> = sequence {
yield(from)
for (step in combinator.parse(value, from.end, frontier)) {
if (step.end > from.end) yieldAll(extend(Parse(step.end, from.tokens + step.tokens)))
for (step in combinator.parse(value, from.end)) {
when (step) {
is Parse -> if (step.end > from.end) yieldAll(extend(Parse(step.end, from.tokens + step.tokens)))
is Stuck -> yield(step)
}
}
}
return combinator.parse(value, offset).flatMap { step ->
when (step) {
is Parse -> if (step.end > offset) extend(step) else emptySequence()
is Stuck -> sequenceOf<ParseStep>(step) // the mandatory first repetition failed
}
}
return combinator.parse(value, offset, frontier).filter { it.end > offset }.flatMap { extend(it) }
}

override fun toString(): String = toStringIndented(0)
Expand Down
Original file line number Diff line number Diff line change
@@ -1,17 +1,32 @@
package net.sjrx.intellij.plugins.systemdunitfiles.semanticdata.optionvalues.grammar

/*
* List-of-successes matcher (GitHub #467, step 2).
* List-of-successes matcher (GitHub #467).
*
* These types support a second matching method, `Combinator.parse()`, that lives ALONGSIDE the
* existing SyntacticMatch / SemanticMatch on every combinator. Nothing here is wired into
* GrammarOptionValue yet — the goal is to flesh the approach out on the real combinators and
* validate it against the real grammars in tests before deciding to migrate the caller.
*
* Where the existing engine returns ONE greedy result and runs two near-identical passes, parse()
* returns EVERY way a combinator can match (lazily), and folds the strict "semantic" check into a
* returns EVERY way a combinator can proceed (lazily), and folds the strict "semantic" check into a
* `valid` flag on each token. So one lenient pass answers both questions, and greedy traps like
* Seq(ZeroOrMore("a"), "a") on "aa" resolve themselves (see Combinator.parse docs).
* Seq(ZeroOrMore("a"), "a") on "aa" resolve themselves.
*
* FAILURE IS A VALUE, NOT AN ABSENCE
* ----------------------------------
* A matcher does not signal "no match" by returning an empty sequence. It returns a [Stuck] — a
* first-class value carrying the offset it got stuck at and what it was hoping to see. That single
* decision is why error localization needs no side-channel: when Seq(..., EOF()) can't finish, the
* EOF failure rides back up the return value as a Stuck(offset=7, {EOF}), so we still know we
* reached offset 7. (Earlier this was modelled as an empty sequence, which threw the offset away and
* forced a mutable "frontier" object to be threaded through parse() to recover it.)
*
* SIMPLER ALTERNATIVE (for the record): instead of returning Stuck values, you can thread a mutable
* accumulator ("frontier") through parse() that every leaf matcher writes its deepest reach into.
* That is less code and a touch lazier, but it splits the data flow across two channels — successes
* via the return value, failures via a pass-by-reference side effect — which is the asymmetry this
* design removes by making both kinds of result travel the same way.
*/

/** A single terminal token, with the strict-validity verdict (the old "semantic" check) folded in. */
Expand All @@ -23,8 +38,18 @@ data class ParsedToken(
val valid: Boolean,
)

/** One way a combinator consumed input from some offset: it ended at [end], producing [tokens]. */
data class Parse(val end: Int, val tokens: List<ParsedToken>)
/** One step a matcher can take from an offset: either it consumed input ([Parse]) or it got [Stuck]. */
sealed interface ParseStep

/** A successful match: consumed input up to [end], producing [tokens] (each with its `valid` flag). */
data class Parse(val end: Int, val tokens: List<ParsedToken>) : ParseStep

/**
* A dead end: matching could not proceed at [offset], where [expected] is the set of matchers the
* grammar was hoping to see. Carrying this as a value (rather than an empty result) is what lets us
* localize errors and, later, drive completion — both are "what was expected at this offset?".
*/
data class Stuck(val offset: Int, val expected: Set<Combinator>) : ParseStep

/** The outcome of validating a whole value against a grammar via parse(). */
sealed interface ParseOutcome {
Expand All @@ -36,33 +61,44 @@ sealed interface ParseOutcome {

/**
* No path consumed the whole value. [furthest] is the deepest offset any path reached, and
* [expected] is the set of matchers the grammar was hoping to see there (for error localization,
* and the seed of completion).
* [expected] is what the grammar was hoping to see there (for error localization / completion).
*/
data class SyntaxError(val furthest: Int, val expected: Set<Combinator>) : ParseOutcome
}

/** Every way [this] grammar can consume the entire [value]. */
/** Every way [this] grammar can consume the entire [value] (successful steps only). */
fun Combinator.fullParses(value: String): Sequence<Parse> =
parse(value, 0).filter { it.end == value.length }
parse(value, 0).filterIsInstance<Parse>().filter { it.end == value.length }

/**
* One lenient parse answers both questions the old two passes did:
* - syntactic ("could be this, color it"): did any path consume the whole value?
* - semantic ("actually valid"): did any such path use only valid tokens?
*
* A single shared [Frontier] rides along, so a SyntaxError can report where parsing got stuck.
* On failure we fold the [Stuck] values back into the deepest offset reached and the union of what
* was expected there — the "frontier", computed from the return value rather than mutated into it.
*/
fun Combinator.validate(value: String): ParseOutcome {
val frontier = Frontier()
var firstBad: ParsedToken? = null
for (p in parse(value, 0, frontier)) {
if (p.end != value.length) continue
val bad = p.tokens.firstOrNull { !it.valid }
if (bad == null) return ParseOutcome.Valid // short-circuit on the first fully-valid full parse
if (firstBad == null) firstBad = bad
var furthest = 0
var expected = emptySet<Combinator>()

for (step in parse(value, 0)) {
when (step) {
is Parse -> {
if (step.end == value.length) {
val bad = step.tokens.firstOrNull { !it.valid }
if (bad == null) return ParseOutcome.Valid // first fully-valid full parse wins; short-circuit
if (firstBad == null) firstBad = bad
}
if (step.end > furthest) { furthest = step.end; expected = emptySet() }
}
is Stuck -> when {
step.offset > furthest -> { furthest = step.offset; expected = step.expected }
step.offset == furthest -> expected = expected + step.expected
}
}
}
if (firstBad != null) return ParseOutcome.SemanticError(firstBad)
// No full parse: exhausting the loop above has populated the frontier with the deepest reach.
return ParseOutcome.SyntaxError(frontier.position, frontier.expected)

return firstBad?.let { ParseOutcome.SemanticError(it) } ?: ParseOutcome.SyntaxError(furthest, expected)
}
Original file line number Diff line number Diff line change
Expand Up @@ -18,10 +18,9 @@ class RegexTerminal(syntaticMatchStr : String, semanticMatchStr: String ) : Term
return MatchResult(listOf(matchResult.value), offset + matchResult.value.length, listOf(this), offset + matchResult.value.length)
}

override fun parse(value: String, offset: Int, frontier: Frontier): Sequence<Parse> {
frontier.reached(offset, this)
override fun parse(value: String, offset: Int): Sequence<ParseStep> {
// The syntactic regex gives the lenient span; valid iff the semantic regex matches that same span.
val syn = syntaticMatch.matchAt(value, offset) ?: return emptySequence()
val syn = syntaticMatch.matchAt(value, offset) ?: return sequenceOf(Stuck(offset, setOf(this)))
val text = syn.value
val valid = semanticMatch.matchAt(value, offset)?.value == text
return sequenceOf(Parse(offset + text.length, listOf(ParsedToken(offset, offset + text.length, text, this, valid))))
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -62,14 +62,18 @@ class Repeat(val combinator : Combinator, val minInclusive: Int, val maxExclusiv
return match(value, offset, combinator::SemanticMatch)
}

override fun parse(value: String, offset: Int, frontier: Frontier): Sequence<Parse> {
override fun parse(value: String, offset: Int): Sequence<ParseStep> {
// Offer every repetition count in [minInclusive, maxExclusive] (maxExclusive is the cap on the
// count, mirroring the existing match() loop). Yield only once enough repetitions have happened.
fun extend(from: Parse, count: Int): Sequence<Parse> = sequence {
// count, mirroring the existing match() loop). Yield only once enough repetitions have happened;
// a failed attempt at another repetition is carried as a Stuck.
fun extend(from: Parse, count: Int): Sequence<ParseStep> = sequence {
if (count >= minInclusive) yield(from)
if (count < maxExclusive) {
for (step in combinator.parse(value, from.end, frontier)) {
if (step.end > from.end) yieldAll(extend(Parse(step.end, from.tokens + step.tokens), count + 1))
for (step in combinator.parse(value, from.end)) {
when (step) {
is Parse -> if (step.end > from.end) yieldAll(extend(Parse(step.end, from.tokens + step.tokens), count + 1))
is Stuck -> yield(step)
}
}
}
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -55,12 +55,21 @@ open class SequenceCombinator(vararg val tokens: Combinator) : Combinator {
return MatchResult(resultTokens, index, resultTerminals, maxLength)
}

override fun parse(value: String, offset: Int, frontier: Frontier): Sequence<Parse> {
// Thread each possibility of one part into the next: the cartesian product of the parts.
var results = sequenceOf(Parse(offset, emptyList()))
override fun parse(value: String, offset: Int): Sequence<ParseStep> {
// Thread each successful possibility of one part into the next (the cartesian product). A part
// that gets stuck — or a path that already got stuck — carries its dead end forward unchanged.
var results: Sequence<ParseStep> = sequenceOf(Parse(offset, emptyList()))
for (token in tokens) {
results = results.flatMap { acc ->
token.parse(value, acc.end, frontier).map { next -> Parse(next.end, acc.tokens + next.tokens) }
when (acc) {
is Stuck -> sequenceOf(acc) // path already dead-ended; carry it forward
is Parse -> token.parse(value, acc.end).map { step ->
when (step) {
is Parse -> Parse(step.end, acc.tokens + step.tokens)
is Stuck -> step // this part got stuck after acc; propagate the dead end
}
}
}
}
}
return results
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -27,11 +27,10 @@ class WhitespaceTerminal : TerminalCombinator {
return match(value, offset)
}

override fun parse(value: String, offset: Int, frontier: Frontier): Sequence<Parse> {
frontier.reached(offset, this)
override fun parse(value: String, offset: Int): Sequence<ParseStep> {
var end = offset
while (end < value.length && value[end].isWhitespace()) end++
return if (end == offset) emptySequence()
return if (end == offset) sequenceOf(Stuck(offset, setOf(this)))
else sequenceOf(Parse(end, listOf(ParsedToken(offset, end, value.substring(offset, end), this, valid = true))))
}

Expand Down
Loading
Loading