From 5fb3d64ca80bfd1b303aafffac78bba000a19ee1 Mon Sep 17 00:00:00 2001 From: Steve Ramage Date: Sun, 21 Jun 2026 12:22:47 -0700 Subject: [PATCH] =?UTF-8?q?feat:=20grammar=20coloring=20engine=20layer=20?= =?UTF-8?q?=E2=80=94=20roles,=20defaults,=20optional=20Labeled=20(#467=20#?= =?UTF-8?q?342)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Adds the (pure, IntelliJ-free) substrate for grammar-based syntax coloring. Coloring is automatic and needs NO grammar changes; an optional wrapper handles composite spans. - Role { ENUM, LITERAL, OPERATOR, IDENTIFIER } + Region(start, end, role). - defaultRole(terminal): IntegerTerminal -> LITERAL; Literal/FlexibleLiteralChoice -> ENUM, or OPERATOR when all choices are punctuation (":", "+", "=", "~", "/", ...); RegexTerminal -> IDENTIFIER; whitespace/other -> uncoloured. - Labeled(role, inner): optional, transparent wrapper. Matching is delegated to inner unchanged (SyntacticMatch/SemanticMatch/parse), so it affects only colour — e.g. Labeled(LITERAL, IPV4_ADDR) paints 127.0.0.1 as one literal instead of per-octet. - Parse gains a defaulted `regions` field; the merging combinators (Seq, ZeroOrMore, OneOrMore, Repeat) thread it. colorize(value) returns labeled regions plus per-token defaults for anything not inside a labeled region. Role -> TextAttributes mapping is deliberately left to the IntelliJ layer (next MR: annotator + color settings, behind the experimental flag). Tests: default roles; automatic colouring of the unchanged RestrictAddressFamilies grammar (operator/enum, whitespace uncoloured); Labeled collapsing an IPv4 address to one LITERAL; and Labeled being transparent to validation. Refs #467 #342 Co-Authored-By: Claude Opus 4.8 (1M context) --- .../optionvalues/grammar/Coloring.kt | 63 +++++++++++++++++++ .../optionvalues/grammar/Labeled.kt | 28 +++++++++ .../optionvalues/grammar/OneOrMore.kt | 2 +- .../optionvalues/grammar/Parse.kt | 7 ++- .../optionvalues/grammar/Repeat.kt | 2 +- .../grammar/SequenceCombinator.kt | 2 +- .../optionvalues/grammar/ZeroOrMore.kt | 2 +- .../optionvalues/grammar/ColoringTest.kt | 53 ++++++++++++++++ 8 files changed, 153 insertions(+), 6 deletions(-) create mode 100644 src/main/kotlin/net/sjrx/intellij/plugins/systemdunitfiles/semanticdata/optionvalues/grammar/Coloring.kt create mode 100644 src/main/kotlin/net/sjrx/intellij/plugins/systemdunitfiles/semanticdata/optionvalues/grammar/Labeled.kt create mode 100644 src/test/kotlin/net/sjrx/intellij/plugins/systemdunitfiles/semanticdata/optionvalues/grammar/ColoringTest.kt diff --git a/src/main/kotlin/net/sjrx/intellij/plugins/systemdunitfiles/semanticdata/optionvalues/grammar/Coloring.kt b/src/main/kotlin/net/sjrx/intellij/plugins/systemdunitfiles/semanticdata/optionvalues/grammar/Coloring.kt new file mode 100644 index 0000000..87e80ac --- /dev/null +++ b/src/main/kotlin/net/sjrx/intellij/plugins/systemdunitfiles/semanticdata/optionvalues/grammar/Coloring.kt @@ -0,0 +1,63 @@ +package net.sjrx.intellij.plugins.systemdunitfiles.semanticdata.optionvalues.grammar + +/* + * Grammar-based syntax coloring (#467 / #342). + * + * Coloring is OPTIONAL and mostly automatic: [colorize] assigns each matched token a [Role] from + * its terminal's [defaultRole], so existing grammars are coloured with no changes. Where a composite + * span should read as one unit (e.g. an IP address rather than per-octet), wrap it in [Labeled], + * which paints the whole span with one role and is otherwise transparent to matching. + * + * Roles are abstract; the role -> TextAttributes mapping lives in the IntelliJ layer. + */ + +/** Semantic role of a coloured span. */ +enum class Role { + /** A value chosen from a fixed set of words (e.g. `none`, `verity`, `AF_INET`). */ + ENUM, + + /** A literal value: a number, or a composite value span wrapped in [Labeled] (e.g. an IP). */ + LITERAL, + + /** A punctuation separator/operator (e.g. `:`, `+`, `=`, `~`, `/`). */ + OPERATOR, + + /** A free-form identifier (e.g. a regex-matched name). */ + IDENTIFIER, +} + +/** A coloured span `[start, end)` and its [role]. */ +data class Region(val start: Int, val end: Int, val role: Role) + +/** + * The role a terminal should get when it is NOT wrapped in [Labeled]. `null` means "do not colour" + * (whitespace, and anything we don't recognise). + */ +fun defaultRole(terminal: TerminalCombinator): Role? = when (terminal) { + is IntegerTerminal -> Role.LITERAL + is LiteralChoiceTerminal -> if (terminal.choices.allPunctuation()) Role.OPERATOR else Role.ENUM + is FlexibleLiteralChoiceTerminal -> if (terminal.choices.allPunctuation()) Role.OPERATOR else Role.ENUM + is RegexTerminal -> Role.IDENTIFIER + else -> null // WhitespaceTerminal, and any future terminal types: uncoloured by default +} + +private fun Array.allPunctuation(): Boolean = + isNotEmpty() && all { choice -> choice.isNotEmpty() && choice.none(Char::isLetterOrDigit) } + +/** + * The coloured regions for [value]. Explicit [Labeled] regions win; any token not inside a labeled + * region gets its terminal's [defaultRole]. Returns empty if no full parse exists — we don't colour + * values that don't match the grammar. + */ +fun Combinator.colorize(value: String): List { + val parse = parse(value, 0).filterIsInstance().firstOrNull { it.end == value.length } ?: return emptyList() + + val regions = parse.regions.toMutableList() + for (token in parse.tokens) { + val role = defaultRole(token.terminal) ?: continue + if (regions.none { token.start >= it.start && token.end <= it.end }) { + regions.add(Region(token.start, token.end, role)) + } + } + return regions.sortedBy { it.start } +} diff --git a/src/main/kotlin/net/sjrx/intellij/plugins/systemdunitfiles/semanticdata/optionvalues/grammar/Labeled.kt b/src/main/kotlin/net/sjrx/intellij/plugins/systemdunitfiles/semanticdata/optionvalues/grammar/Labeled.kt new file mode 100644 index 0000000..a08c148 --- /dev/null +++ b/src/main/kotlin/net/sjrx/intellij/plugins/systemdunitfiles/semanticdata/optionvalues/grammar/Labeled.kt @@ -0,0 +1,28 @@ +package net.sjrx.intellij.plugins.systemdunitfiles.semanticdata.optionvalues.grammar + +/** + * Wraps [inner] and tags the whole matched span with a coloring [role] (#467 / #342). + * + * It is OPTIONAL and TRANSPARENT: matching (SyntacticMatch / SemanticMatch / parse) is delegated to + * [inner] unchanged, so wrapping a sub-grammar affects only coloring, never validation or + * completion. Use it where a composite value should read as one unit — e.g. + * `Labeled(Role.LITERAL, IPV4_ADDR)` colors `127.0.0.1` as a single literal instead of per-octet. + */ +class Labeled(private val role: Role, private val inner: Combinator) : Combinator { + + override fun SyntacticMatch(value: String, offset: Int): MatchResult = inner.SyntacticMatch(value, offset) + + override fun SemanticMatch(value: String, offset: Int): MatchResult = inner.SemanticMatch(value, offset) + + override fun parse(value: String, offset: Int): Sequence = + inner.parse(value, offset).map { step -> + when (step) { + is Parse -> + if (step.end > offset) Parse(step.end, step.tokens, step.regions + Region(offset, step.end, role)) + else step // matched nothing; no region to add + is Stuck -> step + } + } + + override fun toStringIndented(indent: Int): String = inner.toStringIndented(indent) +} diff --git a/src/main/kotlin/net/sjrx/intellij/plugins/systemdunitfiles/semanticdata/optionvalues/grammar/OneOrMore.kt b/src/main/kotlin/net/sjrx/intellij/plugins/systemdunitfiles/semanticdata/optionvalues/grammar/OneOrMore.kt index 77c3850..6579a18 100644 --- a/src/main/kotlin/net/sjrx/intellij/plugins/systemdunitfiles/semanticdata/optionvalues/grammar/OneOrMore.kt +++ b/src/main/kotlin/net/sjrx/intellij/plugins/systemdunitfiles/semanticdata/optionvalues/grammar/OneOrMore.kt @@ -46,7 +46,7 @@ class OneOrMore(val combinator : Combinator) : Combinator { yield(from) for (step in combinator.parse(value, from.end)) { when (step) { - is Parse -> if (step.end > from.end) yieldAll(extend(Parse(step.end, from.tokens + step.tokens))) + is Parse -> if (step.end > from.end) yieldAll(extend(Parse(step.end, from.tokens + step.tokens, from.regions + step.regions))) is Stuck -> yield(step) } } diff --git a/src/main/kotlin/net/sjrx/intellij/plugins/systemdunitfiles/semanticdata/optionvalues/grammar/Parse.kt b/src/main/kotlin/net/sjrx/intellij/plugins/systemdunitfiles/semanticdata/optionvalues/grammar/Parse.kt index f861626..395c28e 100644 --- a/src/main/kotlin/net/sjrx/intellij/plugins/systemdunitfiles/semanticdata/optionvalues/grammar/Parse.kt +++ b/src/main/kotlin/net/sjrx/intellij/plugins/systemdunitfiles/semanticdata/optionvalues/grammar/Parse.kt @@ -41,8 +41,11 @@ data class ParsedToken( /** One step a matcher can take from an offset: either it consumed input ([Parse]) or it got [Stuck]. */ sealed interface ParseStep -/** A successful match: consumed input up to [end], producing [tokens] (each with its `valid` flag). */ -data class Parse(val end: Int, val tokens: List) : ParseStep +/** + * A successful match: consumed input up to [end], producing [tokens] (each with its `valid` flag). + * [regions] carries any coloring spans contributed by [Labeled] wrappers (empty for most parses). + */ +data class Parse(val end: Int, val tokens: List, val regions: List = emptyList()) : ParseStep /** * A dead end: matching could not proceed at [offset], where [expected] is the set of matchers the diff --git a/src/main/kotlin/net/sjrx/intellij/plugins/systemdunitfiles/semanticdata/optionvalues/grammar/Repeat.kt b/src/main/kotlin/net/sjrx/intellij/plugins/systemdunitfiles/semanticdata/optionvalues/grammar/Repeat.kt index c3621ea..4286723 100644 --- a/src/main/kotlin/net/sjrx/intellij/plugins/systemdunitfiles/semanticdata/optionvalues/grammar/Repeat.kt +++ b/src/main/kotlin/net/sjrx/intellij/plugins/systemdunitfiles/semanticdata/optionvalues/grammar/Repeat.kt @@ -71,7 +71,7 @@ class Repeat(val combinator : Combinator, val minInclusive: Int, val maxExclusiv if (count < maxExclusive) { for (step in combinator.parse(value, from.end)) { when (step) { - is Parse -> if (step.end > from.end) yieldAll(extend(Parse(step.end, from.tokens + step.tokens), count + 1)) + is Parse -> if (step.end > from.end) yieldAll(extend(Parse(step.end, from.tokens + step.tokens, from.regions + step.regions), count + 1)) is Stuck -> yield(step) } } diff --git a/src/main/kotlin/net/sjrx/intellij/plugins/systemdunitfiles/semanticdata/optionvalues/grammar/SequenceCombinator.kt b/src/main/kotlin/net/sjrx/intellij/plugins/systemdunitfiles/semanticdata/optionvalues/grammar/SequenceCombinator.kt index c4c4ad1..73c3657 100644 --- a/src/main/kotlin/net/sjrx/intellij/plugins/systemdunitfiles/semanticdata/optionvalues/grammar/SequenceCombinator.kt +++ b/src/main/kotlin/net/sjrx/intellij/plugins/systemdunitfiles/semanticdata/optionvalues/grammar/SequenceCombinator.kt @@ -65,7 +65,7 @@ open class SequenceCombinator(vararg val tokens: Combinator) : Combinator { is Stuck -> sequenceOf(acc) // path already dead-ended; carry it forward is Parse -> token.parse(value, acc.end).map { step -> when (step) { - is Parse -> Parse(step.end, acc.tokens + step.tokens) + is Parse -> Parse(step.end, acc.tokens + step.tokens, acc.regions + step.regions) is Stuck -> step // this part got stuck after acc; propagate the dead end } } diff --git a/src/main/kotlin/net/sjrx/intellij/plugins/systemdunitfiles/semanticdata/optionvalues/grammar/ZeroOrMore.kt b/src/main/kotlin/net/sjrx/intellij/plugins/systemdunitfiles/semanticdata/optionvalues/grammar/ZeroOrMore.kt index 0739388..3dd4787 100644 --- a/src/main/kotlin/net/sjrx/intellij/plugins/systemdunitfiles/semanticdata/optionvalues/grammar/ZeroOrMore.kt +++ b/src/main/kotlin/net/sjrx/intellij/plugins/systemdunitfiles/semanticdata/optionvalues/grammar/ZeroOrMore.kt @@ -50,7 +50,7 @@ class ZeroOrMore(val combinator : Combinator) : Combinator { yield(from) // stop repeating here... for (step in combinator.parse(value, from.end)) { when (step) { - is Parse -> if (step.end > from.end) yieldAll(extend(Parse(step.end, from.tokens + step.tokens))) + is Parse -> if (step.end > from.end) yieldAll(extend(Parse(step.end, from.tokens + step.tokens, from.regions + step.regions))) is Stuck -> yield(step) // couldn't take another repetition; remember where/why } } diff --git a/src/test/kotlin/net/sjrx/intellij/plugins/systemdunitfiles/semanticdata/optionvalues/grammar/ColoringTest.kt b/src/test/kotlin/net/sjrx/intellij/plugins/systemdunitfiles/semanticdata/optionvalues/grammar/ColoringTest.kt new file mode 100644 index 0000000..b800dde --- /dev/null +++ b/src/test/kotlin/net/sjrx/intellij/plugins/systemdunitfiles/semanticdata/optionvalues/grammar/ColoringTest.kt @@ -0,0 +1,53 @@ +package net.sjrx.intellij.plugins.systemdunitfiles.semanticdata.optionvalues.grammar + +import net.sjrx.intellij.plugins.systemdunitfiles.semanticdata.optionvalues.ai.ConfigParseAddressFamiliesOptionValue +import org.junit.Assert.assertEquals +import org.junit.Test + +/** Unit tests for grammar coloring: default roles, automatic coloring, and the optional Labeled wrapper. */ +class ColoringTest { + + @Test + fun testDefaultRoles() { + assertEquals(Role.LITERAL, defaultRole(IntegerTerminal(0, 10))) + assertEquals(Role.ENUM, defaultRole(LiteralChoiceTerminal("none"))) + assertEquals(Role.ENUM, defaultRole(FlexibleLiteralChoiceTerminal("AF_INET", "AF_INET6"))) + assertEquals(Role.OPERATOR, defaultRole(LiteralChoiceTerminal(":"))) + assertEquals(Role.OPERATOR, defaultRole(LiteralChoiceTerminal("~"))) + assertEquals(Role.IDENTIFIER, defaultRole(RegexTerminal("[a-z]+", "[a-z]+"))) + assertEquals(null, defaultRole(WhitespaceTerminal())) + } + + @Test + fun testAutomaticColoringNeedsNoGrammarChanges() { + // The real RestrictAddressFamilies grammar, unchanged: "~" is an operator, families are enums, + // whitespace is uncoloured. + val grammar = ConfigParseAddressFamiliesOptionValue().combinator + val regions = grammar.colorize("~AF_INET AF_INET6") + assertEquals( + listOf( + Region(0, 1, Role.OPERATOR), // ~ + Region(1, 8, Role.ENUM), // AF_INET + Region(9, 17, Role.ENUM), // AF_INET6 (the space at 8..9 is uncoloured) + ), + regions, + ) + } + + @Test + fun testLabeledPaintsACompositeSpanAsOneUnit() { + // Without Labeled an IPv4 address would colour per octet/dot; wrapping it makes it one LITERAL. + val grammar = SequenceCombinator(Labeled(Role.LITERAL, IPV4_ADDR), EOF()) + assertEquals(listOf(Region(0, 7, Role.LITERAL)), grammar.colorize("1.2.3.4")) + } + + @Test + fun testLabeledIsTransparentToValidation() { + // Wrapping changes only colour: validation behaves exactly as the bare grammar. + val bare = SequenceCombinator(IPV4_ADDR, EOF()) + val labeled = SequenceCombinator(Labeled(Role.LITERAL, IPV4_ADDR), EOF()) + assertEquals(bare.validate("1.2.3.4"), labeled.validate("1.2.3.4")) + assertEquals(ParseOutcome.Valid, labeled.validate("1.2.3.4")) + assertEquals(bare.validate("999.0.0.1")::class, labeled.validate("999.0.0.1")::class) + } +}