MemTensor · chiefmojo · Jun 13, 2026
diff --git a/apps/memos-local-plugin/core/config/defaults.ts b/apps/memos-local-plugin/core/config/defaults.ts
@@ -53,6 +53,19 @@ export const DEFAULT_CONFIG: ResolvedConfig = {
     temperature: 0,
     timeoutMs: 60_000,
   },
+  l3Llm: {
+    // Empty by default — falls back to the shared `llm` settings.
+    // Operators set this when they want a stronger model for L3
+    // abstraction. L3 runs off the turn-response path, so a slower
+    // but more reliable model improves world-model quality without
+    // affecting companion latency.
+    provider: "",
+    endpoint: "",
+    model: "",
+    apiKey: "",
+    temperature: 0,
+    timeoutMs: 60_000,
+  },
   algorithm: {
     lightweightMemory: {
       enabled: true,
@@ -307,6 +320,7 @@ export const SECRET_FIELD_PATHS: readonly string[] = Object.freeze([
   "embedding.apiKey",
   "llm.apiKey",
   "skillEvolver.apiKey",
+  "l3Llm.apiKey",
   "hub.teamToken",
   "hub.userToken",
 ]);
diff --git a/apps/memos-local-plugin/core/config/schema.ts b/apps/memos-local-plugin/core/config/schema.ts
@@ -558,6 +558,8 @@ export const ConfigSchema = Type.Object({
   embedding: EmbeddingSchema,
   llm: LlmSchema,
   skillEvolver: SkillEvolverSchema,
+  /** Dedicated model slot for L3 abstraction. Same shape as skillEvolver. */
+  l3Llm: SkillEvolverSchema,
   algorithm: AlgorithmSchema,
   hub: HubSchema,
   telemetry: TelemetrySchema,

diff --git a/apps/memos-local-plugin/core/pipeline/deps.ts b/apps/memos-local-plugin/core/pipeline/deps.ts
@@ -273,7 +273,9 @@ export function buildPipelineSubscribers(
     repos: deps.repos,
     l2Bus: buses.l2,
     l3Bus: buses.l3,
-    llm: deps.llm,
+    // Dedicated L3 model when `config.l3Llm.*` is set; otherwise the
+    // bootstrap aliases `l3Llm` to the main `llm`, so this is a no-op.
+    llm: deps.l3Llm ?? deps.llm,
     log: log.child({ channel: "core.memory.l3" }),
     config: algorithm.l3Abstraction,
   });

diff --git a/apps/memos-local-plugin/core/pipeline/memory-core.ts b/apps/memos-local-plugin/core/pipeline/memory-core.ts
@@ -425,6 +425,45 @@ export async function bootstrapMemoryCoreFull(
     });
   }
 
+
+  // Dedicated LLM for L3 abstraction (mirrors skillEvolver above).
+  // L3 clustering → world-model abstraction is an infrequent async pass
+  // that is quality- and shape-sensitive (cheap models over-extract and
+  // truncate the JSON, producing "'constraints' must be an array"). It runs
+  // off the turn-response path, so a slower-but-correct model here has no
+  // impact on companion latency. Blank → falls back to the main `llm`.
+  let l3Llm: ReturnType<typeof createLlmClient> | null = null;
+  try {
+    const l3c = (config as { l3Llm?: { provider?: string; model?: string; endpoint?: string; apiKey?: string; temperature?: number; timeoutMs?: number } }).l3Llm;
+    const l3Model = (l3c?.model ?? "").trim();
+    const l3Provider = (l3c?.provider ?? "").trim();
+    if (l3Model && l3Provider) {
+      l3Llm = createLlmClient({
+        provider: l3Provider,
+        model: l3Model,
+        endpoint: l3c?.endpoint ?? "",
+        apiKey: l3c?.apiKey ?? "",
+        temperature: l3c?.temperature ?? 0,
+        timeoutMs: l3c?.timeoutMs ?? 60_000,
+        maxRetries: 3,
+        fallbackToHost: true,
+        onError: (d: { provider: string; model: string; message: string; code?: string; at?: number }) =>
+          log.warn("l3Llm.llm_error", d),
+      } as never);
+      log.info("l3Llm.ready", {
+        provider: l3Provider,
+        model: l3Model,
+        source: "l3Llm",
+      });
+    }
+  } catch (err) {
+    log.warn("l3Llm.unavailable", {
+      err: err instanceof Error ? err.message : String(err),
+      fallback: "main llm",
+    });
+  }
+
+
   // 3. Pipeline.
   const deps: PipelineDeps = {
     agent: options.agent,
@@ -434,6 +473,7 @@ export async function bootstrapMemoryCoreFull(
     repos,
     llm,
     reflectLlm: reflectLlm ?? llm,
+    l3Llm: l3Llm ?? llm,
     embedder,
     log,
     namespace,

diff --git a/apps/memos-local-plugin/core/pipeline/orchestrator.ts b/apps/memos-local-plugin/core/pipeline/orchestrator.ts
@@ -1526,6 +1526,7 @@ export function createPipeline(deps: PipelineDeps): PipelineHandle {
     repos: deps.repos,
     llm: deps.llm,
     reflectLlm: deps.reflectLlm,
+    l3Llm: deps.l3Llm,
     embedder: deps.embedder,
     sessionManager: session.sessionManager,
     episodeManager: session.episodeManager,

diff --git a/apps/memos-local-plugin/core/pipeline/types.ts b/apps/memos-local-plugin/core/pipeline/types.ts
@@ -138,6 +138,14 @@ export interface PipelineDeps {
    * absent. Summarization and per-turn lite capture still use `llm`.
    */
   reflectLlm: LlmClient | null;
+  /**
+   * Dedicated LLM for L3 abstraction (clustering → world-model facts).
+   * Built from `config.l3Llm.*` when configured; falls back to `llm`
+   * when absent. L3 runs off the turn-response path, so a stronger
+   * (slower) model here improves abstraction quality without affecting
+   * companion latency.
+   */
+  l3Llm: LlmClient | null;
   embedder: Embedder | null;
   log: Logger;
   namespace: RuntimeNamespace;
@@ -169,6 +177,13 @@ export interface PipelineHandle {
    * status instead of falling back to the summary LLM.
    */
   readonly reflectLlm: LlmClient | null;
+  /**
+   * Dedicated client for L3 abstraction. When `l3Llm.*` is blank this is the
+   * same instance as `llm`; when configured it carries its own model so the
+   * clustering → world-model pass can run on a stronger LLM than the cheap
+   * high-frequency main model.
+   */
+  readonly l3Llm: LlmClient | null;
   readonly embedder: Embedder | null;
 
   // Subscribers / runners.

diff --git a/apps/memos-local-plugin/tests/unit/pipeline/orchestrator.test.ts b/apps/memos-local-plugin/tests/unit/pipeline/orchestrator.test.ts
@@ -49,6 +49,7 @@ function buildDeps(
     repos: h.repos,
     llm: null,
     reflectLlm: null,
+    l3Llm: null,
     embedder,
     log: rootLogger.child({ channel: "test.pipeline" }),
     namespace: { agentKind: "openclaw", profileId: "main" },
@@ -75,6 +76,18 @@ afterEach(async () => {
 });
 
 describe("pipeline/orchestrator", () => {
+  it("threads a dedicated l3Llm through to the handle", () => {
+    const l3Llm = fakeLlm({ completeJson: {} });
+    pipeline = createPipeline({ ...buildDeps(dbHandle!), l3Llm });
+    expect(pipeline.l3Llm).toBe(l3Llm);
+  });
+
+  it("leaves l3Llm null on the handle when not configured", () => {
+    pipeline = createPipeline(buildDeps(dbHandle!));
+    expect(pipeline.l3Llm).toBeNull();
+  });
+
+
   it("wires session → episode → turn end cleanly", async () => {
     pipeline = createPipeline(buildDeps(dbHandle!));
     const turn: TurnInputDTO = {