From ed0008a704479ed1dcdc573477f459cf73153536 Mon Sep 17 00:00:00 2001
From: Erick <chiefmojo@chiefmojo.com>
Date: Sat, 13 Jun 2026 11:52:53 -0700
Subject: [PATCH] feat(l3): dedicated l3Llm config slot for abstraction pass
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

L3 abstraction runs on the main `llm`, which on cheap models
(gemini-2.5-flash-lite) over-extracts and truncates the JSON,
producing "'constraints'/'inference' must be an array" failures —
Violet logged 180 such failures and produced only 2 world-model
facts total.

Add an `l3Llm` config slot (same SkillEvolverSchema shape as
`skillEvolver`). Blank inherits the main `llm` (zero behavior
change); set explicitly to run the clustering → world-model pass
on a stronger model. L3 is async/off the turn-response path, so
a slower-but-correct model has no impact on companion latency.

Wiring mirrors skillEvolver: schema + blank default + secret
redaction + bootstrap client + PipelineDeps/PipelineHandle field
+ orchestrator handle passthrough, consumed at the L3 subscriber
attach (`deps.l3Llm ?? deps.llm`).
---
 .../core/config/defaults.ts                   | 14 +++++++
 apps/memos-local-plugin/core/config/schema.ts |  2 +
 apps/memos-local-plugin/core/pipeline/deps.ts |  4 +-
 .../core/pipeline/memory-core.ts              | 40 +++++++++++++++++++
 .../core/pipeline/orchestrator.ts             |  1 +
 .../memos-local-plugin/core/pipeline/types.ts | 15 +++++++
 .../tests/unit/pipeline/orchestrator.test.ts  | 13 ++++++
 7 files changed, 88 insertions(+), 1 deletion(-)

diff --git a/apps/memos-local-plugin/core/config/defaults.ts b/apps/memos-local-plugin/core/config/defaults.ts
index 1cf2d2cf6..ae63896df 100644
--- a/apps/memos-local-plugin/core/config/defaults.ts
+++ b/apps/memos-local-plugin/core/config/defaults.ts
@@ -53,6 +53,19 @@ export const DEFAULT_CONFIG: ResolvedConfig = {
     temperature: 0,
     timeoutMs: 60_000,
   },
+  l3Llm: {
+    // Empty by default — falls back to the shared `llm` settings.
+    // Operators set this when they want a stronger model for L3
+    // abstraction. L3 runs off the turn-response path, so a slower
+    // but more reliable model improves world-model quality without
+    // affecting companion latency.
+    provider: "",
+    endpoint: "",
+    model: "",
+    apiKey: "",
+    temperature: 0,
+    timeoutMs: 60_000,
+  },
   algorithm: {
     lightweightMemory: {
       enabled: true,
@@ -307,6 +320,7 @@ export const SECRET_FIELD_PATHS: readonly string[] = Object.freeze([
   "embedding.apiKey",
   "llm.apiKey",
   "skillEvolver.apiKey",
+  "l3Llm.apiKey",
   "hub.teamToken",
   "hub.userToken",
 ]);
diff --git a/apps/memos-local-plugin/core/config/schema.ts b/apps/memos-local-plugin/core/config/schema.ts
index 7c9ff193b..8f44f1d1d 100644
--- a/apps/memos-local-plugin/core/config/schema.ts
+++ b/apps/memos-local-plugin/core/config/schema.ts
@@ -558,6 +558,8 @@ export const ConfigSchema = Type.Object({
   embedding: EmbeddingSchema,
   llm: LlmSchema,
   skillEvolver: SkillEvolverSchema,
+  /** Dedicated model slot for L3 abstraction. Same shape as skillEvolver. */
+  l3Llm: SkillEvolverSchema,
   algorithm: AlgorithmSchema,
   hub: HubSchema,
   telemetry: TelemetrySchema,
diff --git a/apps/memos-local-plugin/core/pipeline/deps.ts b/apps/memos-local-plugin/core/pipeline/deps.ts
index fd9c2bbf0..1f7f3823f 100644
--- a/apps/memos-local-plugin/core/pipeline/deps.ts
+++ b/apps/memos-local-plugin/core/pipeline/deps.ts
@@ -273,7 +273,9 @@ export function buildPipelineSubscribers(
     repos: deps.repos,
     l2Bus: buses.l2,
     l3Bus: buses.l3,
-    llm: deps.llm,
+    // Dedicated L3 model when `config.l3Llm.*` is set; otherwise the
+    // bootstrap aliases `l3Llm` to the main `llm`, so this is a no-op.
+    llm: deps.l3Llm ?? deps.llm,
     log: log.child({ channel: "core.memory.l3" }),
     config: algorithm.l3Abstraction,
   });
diff --git a/apps/memos-local-plugin/core/pipeline/memory-core.ts b/apps/memos-local-plugin/core/pipeline/memory-core.ts
index b4e331c71..89877d850 100644
--- a/apps/memos-local-plugin/core/pipeline/memory-core.ts
+++ b/apps/memos-local-plugin/core/pipeline/memory-core.ts
@@ -425,6 +425,45 @@ export async function bootstrapMemoryCoreFull(
     });
   }
 
+
+  // Dedicated LLM for L3 abstraction (mirrors skillEvolver above).
+  // L3 clustering → world-model abstraction is an infrequent async pass
+  // that is quality- and shape-sensitive (cheap models over-extract and
+  // truncate the JSON, producing "'constraints' must be an array"). It runs
+  // off the turn-response path, so a slower-but-correct model here has no
+  // impact on companion latency. Blank → falls back to the main `llm`.
+  let l3Llm: ReturnType<typeof createLlmClient> | null = null;
+  try {
+    const l3c = (config as { l3Llm?: { provider?: string; model?: string; endpoint?: string; apiKey?: string; temperature?: number; timeoutMs?: number } }).l3Llm;
+    const l3Model = (l3c?.model ?? "").trim();
+    const l3Provider = (l3c?.provider ?? "").trim();
+    if (l3Model && l3Provider) {
+      l3Llm = createLlmClient({
+        provider: l3Provider,
+        model: l3Model,
+        endpoint: l3c?.endpoint ?? "",
+        apiKey: l3c?.apiKey ?? "",
+        temperature: l3c?.temperature ?? 0,
+        timeoutMs: l3c?.timeoutMs ?? 60_000,
+        maxRetries: 3,
+        fallbackToHost: true,
+        onError: (d: { provider: string; model: string; message: string; code?: string; at?: number }) =>
+          log.warn("l3Llm.llm_error", d),
+      } as never);
+      log.info("l3Llm.ready", {
+        provider: l3Provider,
+        model: l3Model,
+        source: "l3Llm",
+      });
+    }
+  } catch (err) {
+    log.warn("l3Llm.unavailable", {
+      err: err instanceof Error ? err.message : String(err),
+      fallback: "main llm",
+    });
+  }
+
+
   // 3. Pipeline.
   const deps: PipelineDeps = {
     agent: options.agent,
@@ -434,6 +473,7 @@ export async function bootstrapMemoryCoreFull(
     repos,
     llm,
     reflectLlm: reflectLlm ?? llm,
+    l3Llm: l3Llm ?? llm,
     embedder,
     log,
     namespace,
diff --git a/apps/memos-local-plugin/core/pipeline/orchestrator.ts b/apps/memos-local-plugin/core/pipeline/orchestrator.ts
index 75dc7e244..0c9e43322 100644
--- a/apps/memos-local-plugin/core/pipeline/orchestrator.ts
+++ b/apps/memos-local-plugin/core/pipeline/orchestrator.ts
@@ -1526,6 +1526,7 @@ export function createPipeline(deps: PipelineDeps): PipelineHandle {
     repos: deps.repos,
     llm: deps.llm,
     reflectLlm: deps.reflectLlm,
+    l3Llm: deps.l3Llm,
     embedder: deps.embedder,
     sessionManager: session.sessionManager,
     episodeManager: session.episodeManager,
diff --git a/apps/memos-local-plugin/core/pipeline/types.ts b/apps/memos-local-plugin/core/pipeline/types.ts
index 7969b7d39..b1dceed23 100644
--- a/apps/memos-local-plugin/core/pipeline/types.ts
+++ b/apps/memos-local-plugin/core/pipeline/types.ts
@@ -138,6 +138,14 @@ export interface PipelineDeps {
    * absent. Summarization and per-turn lite capture still use `llm`.
    */
   reflectLlm: LlmClient | null;
+  /**
+   * Dedicated LLM for L3 abstraction (clustering → world-model facts).
+   * Built from `config.l3Llm.*` when configured; falls back to `llm`
+   * when absent. L3 runs off the turn-response path, so a stronger
+   * (slower) model here improves abstraction quality without affecting
+   * companion latency.
+   */
+  l3Llm: LlmClient | null;
   embedder: Embedder | null;
   log: Logger;
   namespace: RuntimeNamespace;
@@ -169,6 +177,13 @@ export interface PipelineHandle {
    * status instead of falling back to the summary LLM.
    */
   readonly reflectLlm: LlmClient | null;
+  /**
+   * Dedicated client for L3 abstraction. When `l3Llm.*` is blank this is the
+   * same instance as `llm`; when configured it carries its own model so the
+   * clustering → world-model pass can run on a stronger LLM than the cheap
+   * high-frequency main model.
+   */
+  readonly l3Llm: LlmClient | null;
   readonly embedder: Embedder | null;
 
   // Subscribers / runners.
diff --git a/apps/memos-local-plugin/tests/unit/pipeline/orchestrator.test.ts b/apps/memos-local-plugin/tests/unit/pipeline/orchestrator.test.ts
index f4826a6f3..ec0fb3693 100644
--- a/apps/memos-local-plugin/tests/unit/pipeline/orchestrator.test.ts
+++ b/apps/memos-local-plugin/tests/unit/pipeline/orchestrator.test.ts
@@ -49,6 +49,7 @@ function buildDeps(
     repos: h.repos,
     llm: null,
     reflectLlm: null,
+    l3Llm: null,
     embedder,
     log: rootLogger.child({ channel: "test.pipeline" }),
     namespace: { agentKind: "openclaw", profileId: "main" },
@@ -75,6 +76,18 @@ afterEach(async () => {
 });
 
 describe("pipeline/orchestrator", () => {
+  it("threads a dedicated l3Llm through to the handle", () => {
+    const l3Llm = fakeLlm({ completeJson: {} });
+    pipeline = createPipeline({ ...buildDeps(dbHandle!), l3Llm });
+    expect(pipeline.l3Llm).toBe(l3Llm);
+  });
+
+  it("leaves l3Llm null on the handle when not configured", () => {
+    pipeline = createPipeline(buildDeps(dbHandle!));
+    expect(pipeline.l3Llm).toBeNull();
+  });
+
+
   it("wires session → episode → turn end cleanly", async () => {
     pipeline = createPipeline(buildDeps(dbHandle!));
     const turn: TurnInputDTO = {