Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,10 @@ open class AlternativeCombinator(vararg val tokens: Combinator) : Combinator {
return match(value, offset, Combinator::SemanticMatch)
}

override fun parse(value: String, offset: Int): Sequence<Parse> =
// Offer every alternative's matches, so the order of options no longer affects correctness.
tokens.asSequence().flatMap { it.parse(value, offset) }

override fun toString(): String = toStringIndented(0)

override fun toStringIndented(indent: Int): String {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -32,5 +32,16 @@ interface Combinator {
*/
fun SemanticMatch(value : String, offset: Int): MatchResult

/**
* List-of-successes matcher (#467). Returns EVERY way this combinator can consume [value]
* starting at [offset], lazily; an empty sequence means no match.
*
* This lives alongside Syntactic/SemanticMatch and is a single lenient pass: each [ParsedToken]
* carries a `valid` flag for the strict (semantic) check. Because every alternative is offered
* rather than the first greedy one committed to, matching is complete — e.g.
* Seq(ZeroOrMore("a"), "a") on "aa" matches, because ZeroOrMore offers the shorter match too.
*/
fun parse(value: String, offset: Int): Sequence<Parse>

fun toStringIndented(indent: Int): String
}
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,9 @@ class EOF : Combinator {
}
}

override fun parse(value: String, offset: Int): Sequence<Parse> =
if (offset == value.length) sequenceOf(Parse(offset, emptyList())) else emptySequence()

override fun toStringIndented(indent: Int): String {
return "EOF"
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -91,6 +91,15 @@ class FlexibleLiteralChoiceTerminal(vararg val choices: String) : TerminalCombin
return NoMatch.copy(longestMatch = offset)
}

override fun parse(value: String, offset: Int): Sequence<Parse> {
// Lenient shape match (so a wrong token like AF_BOGUS still matches and can be highlighted),
// valid only if the matched text is one of the exact choices.
val m = syntaticMatch.matchAt(value, offset) ?: return emptySequence()
val text = m.value
val valid = choices.any { it == text }
return sequenceOf(Parse(offset + text.length, listOf(ParsedToken(offset, offset + text.length, text, this, valid))))
}

override fun toString(): String {
return if (choices.size == 1) {
"Literal(\"${choices[0]}\")"
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,14 @@ class IntegerTerminal(private val minInclusive: Long,private val maxExclusive: L
}
}

override fun parse(value: String, offset: Int): Sequence<Parse> {
val m = intRegex.matchAt(value, offset) ?: return emptySequence()
val text = m.value
// Lenient: any integer matches (so we can locate it); valid only if it is within range.
val valid = text.toLongOrNull()?.let { it >= minInclusive && it < maxExclusive } ?: false
return sequenceOf(Parse(offset + text.length, listOf(ParsedToken(offset, offset + text.length, text, this, valid))))
}

override fun toString(): String {
return "Int($minInclusive,$maxExclusive)"
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,12 @@ class LiteralChoiceTerminal(vararg var choices: String) : TerminalCombinator {
return match(value, offset)
}

override fun parse(value: String, offset: Int): Sequence<Parse> =
// Offer every choice that matches here (e.g. both ":" and "::"); each is always strictly valid.
choices.asSequence()
.filter { value.startsWith(it, offset) }
.map { Parse(offset + it.length, listOf(ParsedToken(offset, offset + it.length, it, this, valid = true))) }

override fun toString(): String {
return if (choices.size == 1) {
"Literal(\"${choices[0]}\")"
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,17 @@ class OneOrMore(val combinator : Combinator) : Combinator {
return match(value, offset, combinator::SemanticMatch)
}

override fun parse(value: String, offset: Int): Sequence<Parse> {
// Same as ZeroOrMore, but the first repetition is mandatory (and must make progress).
fun extend(from: Parse): Sequence<Parse> = sequence {
yield(from)
for (step in combinator.parse(value, from.end)) {
if (step.end > from.end) yieldAll(extend(Parse(step.end, from.tokens + step.tokens)))
}
}
return combinator.parse(value, offset).filter { it.end > offset }.flatMap { extend(it) }
}

override fun toString(): String = toStringIndented(0)

override fun toStringIndented(indent: Int): String {
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,60 @@
package net.sjrx.intellij.plugins.systemdunitfiles.semanticdata.optionvalues.grammar

/*
* List-of-successes matcher (GitHub #467, step 2).
*
* These types support a second matching method, `Combinator.parse()`, that lives ALONGSIDE the
* existing SyntacticMatch / SemanticMatch on every combinator. Nothing here is wired into
* GrammarOptionValue yet — the goal is to flesh the approach out on the real combinators and
* validate it against the real grammars in tests before deciding to migrate the caller.
*
* Where the existing engine returns ONE greedy result and runs two near-identical passes, parse()
* returns EVERY way a combinator can match (lazily), and folds the strict "semantic" check into a
* `valid` flag on each token. So one lenient pass answers both questions, and greedy traps like
* Seq(ZeroOrMore("a"), "a") on "aa" resolve themselves (see Combinator.parse docs).
*/

/** A single terminal token, with the strict-validity verdict (the old "semantic" check) folded in. */
data class ParsedToken(
val start: Int,
val end: Int,
val text: String,
val terminal: TerminalCombinator,
val valid: Boolean,
)

/** One way a combinator consumed input from some offset: it ended at [end], producing [tokens]. */
data class Parse(val end: Int, val tokens: List<ParsedToken>)

/** The outcome of validating a whole value against a grammar via parse(). */
sealed interface ParseOutcome {
/** Some path consumed the whole value with every token strictly valid. */
object Valid : ParseOutcome

/** A path consumed the whole value, but a token is not strictly valid (well-formed but wrong). */
data class SemanticError(val badToken: ParsedToken) : ParseOutcome

/** No path consumed the whole value. [furthest] is how far any path got (for error localization). */
data class SyntaxError(val furthest: Int) : ParseOutcome
}

/** Every way [this] grammar can consume the entire [value]. */
fun Combinator.fullParses(value: String): Sequence<Parse> =
parse(value, 0).filter { it.end == value.length }

/**
* One lenient parse answers both questions the old two passes did:
* - syntactic ("could be this, color it"): did any path consume the whole value?
* - semantic ("actually valid"): did any such path use only valid tokens?
*/
fun Combinator.validate(value: String): ParseOutcome {
var firstBad: ParsedToken? = null
for (p in fullParses(value)) {
val bad = p.tokens.firstOrNull { !it.valid }
if (bad == null) return ParseOutcome.Valid // short-circuit on the first fully-valid full parse
if (firstBad == null) firstBad = bad
}
if (firstBad != null) return ParseOutcome.SemanticError(firstBad)
val furthest = parse(value, 0).maxOfOrNull { it.end } ?: 0
return ParseOutcome.SyntaxError(furthest)
}
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,14 @@ class RegexTerminal(syntaticMatchStr : String, semanticMatchStr: String ) : Term
return MatchResult(listOf(matchResult.value), offset + matchResult.value.length, listOf(this), offset + matchResult.value.length)
}

override fun parse(value: String, offset: Int): Sequence<Parse> {
// The syntactic regex gives the lenient span; valid iff the semantic regex matches that same span.
val syn = syntaticMatch.matchAt(value, offset) ?: return emptySequence()
val text = syn.value
val valid = semanticMatch.matchAt(value, offset)?.value == text
return sequenceOf(Parse(offset + text.length, listOf(ParsedToken(offset, offset + text.length, text, this, valid))))
}

override fun toString(): String {
return "Regex(\"${semanticMatch.pattern}\")"
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -62,6 +62,20 @@ class Repeat(val combinator : Combinator, val minInclusive: Int, val maxExclusiv
return match(value, offset, combinator::SemanticMatch)
}

override fun parse(value: String, offset: Int): Sequence<Parse> {
// Offer every repetition count in [minInclusive, maxExclusive] (maxExclusive is the cap on the
// count, mirroring the existing match() loop). Yield only once enough repetitions have happened.
fun extend(from: Parse, count: Int): Sequence<Parse> = sequence {
if (count >= minInclusive) yield(from)
if (count < maxExclusive) {
for (step in combinator.parse(value, from.end)) {
if (step.end > from.end) yieldAll(extend(Parse(step.end, from.tokens + step.tokens), count + 1))
}
}
}
return extend(Parse(offset, emptyList()), 0)
}

override fun toString(): String = toStringIndented(0)

override fun toStringIndented(indent: Int): String {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,17 @@ open class SequenceCombinator(vararg val tokens: Combinator) : Combinator {
return MatchResult(resultTokens, index, resultTerminals, maxLength)
}

override fun parse(value: String, offset: Int): Sequence<Parse> {
// Thread each possibility of one part into the next: the cartesian product of the parts.
var results = sequenceOf(Parse(offset, emptyList()))
for (token in tokens) {
results = results.flatMap { acc ->
token.parse(value, acc.end).map { next -> Parse(next.end, acc.tokens + next.tokens) }
}
}
return results
}

override fun toString(): String = toStringIndented(0)

override fun toStringIndented(indent: Int): String {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,13 @@ class WhitespaceTerminal : TerminalCombinator {
return match(value, offset)
}

override fun parse(value: String, offset: Int): Sequence<Parse> {
var end = offset
while (end < value.length && value[end].isWhitespace()) end++
return if (end == offset) emptySequence()
else sequenceOf(Parse(end, listOf(ParsedToken(offset, end, value.substring(offset, end), this, valid = true))))
}

override fun toString(): String {
return "\\s+"
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,18 @@ class ZeroOrMore(val combinator : Combinator) : Combinator {
return match(value, offset, combinator::SemanticMatch)
}

override fun parse(value: String, offset: Int): Sequence<Parse> {
// Offer EVERY repetition count (0, 1, 2, ...), not just the greedy maximum. The `> from.end`
// guard keeps an inner matcher that can match empty from looping forever.
fun extend(from: Parse): Sequence<Parse> = sequence {
yield(from) // stop repeating here...
for (step in combinator.parse(value, from.end)) {
if (step.end > from.end) yieldAll(extend(Parse(step.end, from.tokens + step.tokens))) // ...or take one more
}
}
return extend(Parse(offset, emptyList()))
}

override fun toString(): String = toStringIndented(0)

override fun toStringIndented(indent: Int): String {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,10 @@ class ZeroOrOne(val combinator : Combinator) : Combinator {
return match(value, offset, combinator::SemanticMatch)
}

override fun parse(value: String, offset: Int): Sequence<Parse> =
// Both the empty match and whatever the inner matcher offers.
sequenceOf(Parse(offset, emptyList())) + combinator.parse(value, offset)

override fun toString(): String = toStringIndented(0)

override fun toStringIndented(indent: Int): String {
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,97 @@
package net.sjrx.intellij.plugins.systemdunitfiles.semanticdata.optionvalues.grammar

import net.sjrx.intellij.plugins.systemdunitfiles.semanticdata.optionvalues.ai.ConfigParseAddressFamiliesOptionValue
import org.junit.Assert.assertEquals
import org.junit.Assert.assertTrue
import org.junit.Test

/**
* Tests for the list-of-successes matcher `Combinator.parse()` (#467 step 2).
*
* The point of these tests is that parse()/validate() run against the EXISTING combinator classes
* and the REAL production grammars — nothing in the 200+ grammar definitions changed. We just grew
* a second matching method on the same combinators.
*/
class ParseTest {

private fun isValid(grammar: Combinator, value: String) = grammar.validate(value) == ParseOutcome.Valid

@Test
fun testRealAddressFamiliesGrammarValidates() {
// The actual production grammar, unchanged — pulled straight off the validator.
val grammar = ConfigParseAddressFamiliesOptionValue().combinator

val valid = listOf(
"none", "AF_INET", "AF_INET AF_INET6", "AF_UNIX AF_NETLINK", "~AF_PACKET",
"~AF_INET AF_INET6", "AF_BRIDGE AF_X25 AF_AX25", "AF_LOCAL", "AF_DECnet",
"AF_VSOCK AF_XDP AF_MCTP", "~AF_UNIX AF_INET AF_INET6 AF_NETLINK AF_PACKET",
)
val invalid = listOf(
"inet", "AF_inet", "AF_INET, AF_INET6", "~ AF_PACKET", "NONE",
"AF_BOGUS", "AF_INETZ", "AF_INET AF_MADEUP", "AF_DECNET",
)
for (v in valid) assertTrue("expected valid: '$v'", isValid(grammar, v))
for (v in invalid) assertTrue("expected invalid: '$v'", !isValid(grammar, v))
}

@Test
fun testAddressFamiliesErrorKinds() {
val grammar = ConfigParseAddressFamiliesOptionValue().combinator

// Well-formed shape, unknown name -> semantic error pointing at the bad token.
val semantic = grammar.validate("AF_BOGUS")
assertTrue(semantic is ParseOutcome.SemanticError)
assertEquals("AF_BOGUS", (semantic as ParseOutcome.SemanticError).badToken.text)

// Comma breaks the shape after "AF_INET" -> syntax error (malformed, not just an unknown name).
val syntax = grammar.validate("AF_INET, AF_INET6")
assertTrue(syntax is ParseOutcome.SyntaxError)
// KNOWN LIMITATION: `furthest` is best-effort. This grammar ends in EOF(), so the outer
// Seq(..., EOF()) drops the partial "AF_INET" path when EOF fails, and furthest collapses to 0
// (we'd want 7). Precise localization needs the frontier/expected-set layer — the same machinery
// that powers completion (#343) — which is deliberately not in this step. Pinned to document it.
assertEquals(0, (syntax as ParseOutcome.SyntaxError).furthest)
}

@Test
fun testRealIpv6GrammarValidates() {
// IPV6_ADDR is the real, hand-ordered Alt of 15+ forms in Combinators.kt. The old engine needed
// that careful ordering to avoid greedy traps; parse() explores all forms, so it just works.
val grammar = SequenceCombinator(IPV6_ADDR, EOF())

val valid = listOf("::", "::1", "fe80::1", "2001:db8::1", "1:2:3:4:5:6:7:8", "::ffff:192.168.0.1")
val invalid = listOf("2001:db8:::1", "1:2:3:4:5:6:7:8:9", "gggg::1", "")
for (v in valid) assertTrue("expected valid IPv6: '$v'", isValid(grammar, v))
for (v in invalid) assertTrue("expected invalid IPv6: '$v'", !isValid(grammar, v))
}

@Test
fun testIntegerRangeGrammar() {
// Equivalent to the config_parse_ip_port grammar: a port in [0, 65536).
val grammar = SequenceCombinator(IntegerTerminal(0, 65536), EOF())
assertTrue(isValid(grammar, "0"))
assertTrue(isValid(grammar, "65535"))
assertTrue(!isValid(grammar, "65536")) // out of range -> well-formed but invalid
assertTrue(!isValid(grammar, "-1"))
assertTrue(!isValid(grammar, "80x"))

assertTrue(grammar.validate("65536") is ParseOutcome.SemanticError) // int matched, range failed
}

@Test
fun testGreedyCaseTheOldEngineFails() {
// Built from the SAME combinator classes the old engine uses. Seq(ZeroOrMore("a"), "a") on "aa"
// fails under SyntacticMatch/SemanticMatch (the star eats both a's) but succeeds under parse().
val grammar = SequenceCombinator(ZeroOrMore(LiteralChoiceTerminal("a")), LiteralChoiceTerminal("a"), EOF())

assertTrue(isValid(grammar, "a"))
assertTrue(isValid(grammar, "aa"))
assertTrue(isValid(grammar, "aaa"))
assertTrue(!isValid(grammar, "")) // needs at least one "a"
assertTrue(!isValid(grammar, "ab")) // trailing junk

// Demonstrate the old engine really does fail "aa" (documents the difference, not just asserts ours).
val oldEngineFullMatch = grammar.SemanticMatch("aa", 0).matchResult
assertEquals(-1, oldEngineFullMatch)
}
}
Loading