From ed0008a704479ed1dcdc573477f459cf73153536 Mon Sep 17 00:00:00 2001 From: Erick Date: Sat, 13 Jun 2026 11:52:53 -0700 Subject: [PATCH] feat(l3): dedicated l3Llm config slot for abstraction pass MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit L3 abstraction runs on the main `llm`, which on cheap models (gemini-2.5-flash-lite) over-extracts and truncates the JSON, producing "'constraints'/'inference' must be an array" failures — Violet logged 180 such failures and produced only 2 world-model facts total. Add an `l3Llm` config slot (same SkillEvolverSchema shape as `skillEvolver`). Blank inherits the main `llm` (zero behavior change); set explicitly to run the clustering → world-model pass on a stronger model. L3 is async/off the turn-response path, so a slower-but-correct model has no impact on companion latency. Wiring mirrors skillEvolver: schema + blank default + secret redaction + bootstrap client + PipelineDeps/PipelineHandle field + orchestrator handle passthrough, consumed at the L3 subscriber attach (`deps.l3Llm ?? deps.llm`). --- .../core/config/defaults.ts | 14 +++++++ apps/memos-local-plugin/core/config/schema.ts | 2 + apps/memos-local-plugin/core/pipeline/deps.ts | 4 +- .../core/pipeline/memory-core.ts | 40 +++++++++++++++++++ .../core/pipeline/orchestrator.ts | 1 + .../memos-local-plugin/core/pipeline/types.ts | 15 +++++++ .../tests/unit/pipeline/orchestrator.test.ts | 13 ++++++ 7 files changed, 88 insertions(+), 1 deletion(-) diff --git a/apps/memos-local-plugin/core/config/defaults.ts b/apps/memos-local-plugin/core/config/defaults.ts index 1cf2d2cf6..ae63896df 100644 --- a/apps/memos-local-plugin/core/config/defaults.ts +++ b/apps/memos-local-plugin/core/config/defaults.ts @@ -53,6 +53,19 @@ export const DEFAULT_CONFIG: ResolvedConfig = { temperature: 0, timeoutMs: 60_000, }, + l3Llm: { + // Empty by default — falls back to the shared `llm` settings. + // Operators set this when they want a stronger model for L3 + // abstraction. L3 runs off the turn-response path, so a slower + // but more reliable model improves world-model quality without + // affecting companion latency. + provider: "", + endpoint: "", + model: "", + apiKey: "", + temperature: 0, + timeoutMs: 60_000, + }, algorithm: { lightweightMemory: { enabled: true, @@ -307,6 +320,7 @@ export const SECRET_FIELD_PATHS: readonly string[] = Object.freeze([ "embedding.apiKey", "llm.apiKey", "skillEvolver.apiKey", + "l3Llm.apiKey", "hub.teamToken", "hub.userToken", ]); diff --git a/apps/memos-local-plugin/core/config/schema.ts b/apps/memos-local-plugin/core/config/schema.ts index 7c9ff193b..8f44f1d1d 100644 --- a/apps/memos-local-plugin/core/config/schema.ts +++ b/apps/memos-local-plugin/core/config/schema.ts @@ -558,6 +558,8 @@ export const ConfigSchema = Type.Object({ embedding: EmbeddingSchema, llm: LlmSchema, skillEvolver: SkillEvolverSchema, + /** Dedicated model slot for L3 abstraction. Same shape as skillEvolver. */ + l3Llm: SkillEvolverSchema, algorithm: AlgorithmSchema, hub: HubSchema, telemetry: TelemetrySchema, diff --git a/apps/memos-local-plugin/core/pipeline/deps.ts b/apps/memos-local-plugin/core/pipeline/deps.ts index fd9c2bbf0..1f7f3823f 100644 --- a/apps/memos-local-plugin/core/pipeline/deps.ts +++ b/apps/memos-local-plugin/core/pipeline/deps.ts @@ -273,7 +273,9 @@ export function buildPipelineSubscribers( repos: deps.repos, l2Bus: buses.l2, l3Bus: buses.l3, - llm: deps.llm, + // Dedicated L3 model when `config.l3Llm.*` is set; otherwise the + // bootstrap aliases `l3Llm` to the main `llm`, so this is a no-op. + llm: deps.l3Llm ?? deps.llm, log: log.child({ channel: "core.memory.l3" }), config: algorithm.l3Abstraction, }); diff --git a/apps/memos-local-plugin/core/pipeline/memory-core.ts b/apps/memos-local-plugin/core/pipeline/memory-core.ts index b4e331c71..89877d850 100644 --- a/apps/memos-local-plugin/core/pipeline/memory-core.ts +++ b/apps/memos-local-plugin/core/pipeline/memory-core.ts @@ -425,6 +425,45 @@ export async function bootstrapMemoryCoreFull( }); } + + // Dedicated LLM for L3 abstraction (mirrors skillEvolver above). + // L3 clustering → world-model abstraction is an infrequent async pass + // that is quality- and shape-sensitive (cheap models over-extract and + // truncate the JSON, producing "'constraints' must be an array"). It runs + // off the turn-response path, so a slower-but-correct model here has no + // impact on companion latency. Blank → falls back to the main `llm`. + let l3Llm: ReturnType | null = null; + try { + const l3c = (config as { l3Llm?: { provider?: string; model?: string; endpoint?: string; apiKey?: string; temperature?: number; timeoutMs?: number } }).l3Llm; + const l3Model = (l3c?.model ?? "").trim(); + const l3Provider = (l3c?.provider ?? "").trim(); + if (l3Model && l3Provider) { + l3Llm = createLlmClient({ + provider: l3Provider, + model: l3Model, + endpoint: l3c?.endpoint ?? "", + apiKey: l3c?.apiKey ?? "", + temperature: l3c?.temperature ?? 0, + timeoutMs: l3c?.timeoutMs ?? 60_000, + maxRetries: 3, + fallbackToHost: true, + onError: (d: { provider: string; model: string; message: string; code?: string; at?: number }) => + log.warn("l3Llm.llm_error", d), + } as never); + log.info("l3Llm.ready", { + provider: l3Provider, + model: l3Model, + source: "l3Llm", + }); + } + } catch (err) { + log.warn("l3Llm.unavailable", { + err: err instanceof Error ? err.message : String(err), + fallback: "main llm", + }); + } + + // 3. Pipeline. const deps: PipelineDeps = { agent: options.agent, @@ -434,6 +473,7 @@ export async function bootstrapMemoryCoreFull( repos, llm, reflectLlm: reflectLlm ?? llm, + l3Llm: l3Llm ?? llm, embedder, log, namespace, diff --git a/apps/memos-local-plugin/core/pipeline/orchestrator.ts b/apps/memos-local-plugin/core/pipeline/orchestrator.ts index 75dc7e244..0c9e43322 100644 --- a/apps/memos-local-plugin/core/pipeline/orchestrator.ts +++ b/apps/memos-local-plugin/core/pipeline/orchestrator.ts @@ -1526,6 +1526,7 @@ export function createPipeline(deps: PipelineDeps): PipelineHandle { repos: deps.repos, llm: deps.llm, reflectLlm: deps.reflectLlm, + l3Llm: deps.l3Llm, embedder: deps.embedder, sessionManager: session.sessionManager, episodeManager: session.episodeManager, diff --git a/apps/memos-local-plugin/core/pipeline/types.ts b/apps/memos-local-plugin/core/pipeline/types.ts index 7969b7d39..b1dceed23 100644 --- a/apps/memos-local-plugin/core/pipeline/types.ts +++ b/apps/memos-local-plugin/core/pipeline/types.ts @@ -138,6 +138,14 @@ export interface PipelineDeps { * absent. Summarization and per-turn lite capture still use `llm`. */ reflectLlm: LlmClient | null; + /** + * Dedicated LLM for L3 abstraction (clustering → world-model facts). + * Built from `config.l3Llm.*` when configured; falls back to `llm` + * when absent. L3 runs off the turn-response path, so a stronger + * (slower) model here improves abstraction quality without affecting + * companion latency. + */ + l3Llm: LlmClient | null; embedder: Embedder | null; log: Logger; namespace: RuntimeNamespace; @@ -169,6 +177,13 @@ export interface PipelineHandle { * status instead of falling back to the summary LLM. */ readonly reflectLlm: LlmClient | null; + /** + * Dedicated client for L3 abstraction. When `l3Llm.*` is blank this is the + * same instance as `llm`; when configured it carries its own model so the + * clustering → world-model pass can run on a stronger LLM than the cheap + * high-frequency main model. + */ + readonly l3Llm: LlmClient | null; readonly embedder: Embedder | null; // Subscribers / runners. diff --git a/apps/memos-local-plugin/tests/unit/pipeline/orchestrator.test.ts b/apps/memos-local-plugin/tests/unit/pipeline/orchestrator.test.ts index f4826a6f3..ec0fb3693 100644 --- a/apps/memos-local-plugin/tests/unit/pipeline/orchestrator.test.ts +++ b/apps/memos-local-plugin/tests/unit/pipeline/orchestrator.test.ts @@ -49,6 +49,7 @@ function buildDeps( repos: h.repos, llm: null, reflectLlm: null, + l3Llm: null, embedder, log: rootLogger.child({ channel: "test.pipeline" }), namespace: { agentKind: "openclaw", profileId: "main" }, @@ -75,6 +76,18 @@ afterEach(async () => { }); describe("pipeline/orchestrator", () => { + it("threads a dedicated l3Llm through to the handle", () => { + const l3Llm = fakeLlm({ completeJson: {} }); + pipeline = createPipeline({ ...buildDeps(dbHandle!), l3Llm }); + expect(pipeline.l3Llm).toBe(l3Llm); + }); + + it("leaves l3Llm null on the handle when not configured", () => { + pipeline = createPipeline(buildDeps(dbHandle!)); + expect(pipeline.l3Llm).toBeNull(); + }); + + it("wires session → episode → turn end cleanly", async () => { pipeline = createPipeline(buildDeps(dbHandle!)); const turn: TurnInputDTO = {