rohitg00 · efenex · May 19, 2026 · May 20, 2026
diff --git a/src/functions/smart-search.ts b/src/functions/smart-search.ts
@@ -15,6 +15,41 @@ import { logger } from "../logger.js";
 // full content is fetched via memory_lesson_recall when the caller needs it.
 const LESSON_CONTENT_PREVIEW_CHARS = 240;
 
+// v4-B: detect "who is X" / "what is X" / "what does X mean" patterns
+// and pull out X so we can boost hits that name the concept directly.
+// BM25 already rewards the exact phrase, but for short named-concept
+// queries (typically 2–4 tokens) the question scaffolding ("who is the")
+// adds noise that depresses true matches relative to broader, busier
+// observations. This is the v4-A "careful generator" regression in
+// docs/plans/v4-lineage-test-case-careful-generator.md, surfacing in
+// smart-search rather than lineage.
+const NAMED_CONCEPT_PATTERNS: RegExp[] = [
+  /^\s*who\s+is\s+(?:the\s+|a\s+|an\s+)?(.+?)\s*\??\s*$/i,
+  /^\s*what\s+is\s+(?:the\s+|a\s+|an\s+)?(.+?)\s*\??\s*$/i,
+  /^\s*what(?:'s|\sis)\s+(?:the\s+|a\s+|an\s+)?(.+?)\s*\??\s*$/i,
+  /^\s*what\s+does\s+(.+?)\s+mean\s*\??\s*$/i,
+  /^\s*who(?:'s|\sis)\s+(?:the\s+|a\s+|an\s+)?(.+?)\s*\??\s*$/i,
+];
+
+export function extractNamedConcept(query: string): string | null {
+  if (!query) return null;
+  for (const re of NAMED_CONCEPT_PATTERNS) {
+    const m = re.exec(query);
+    if (m && m[1]) {
+      const phrase = m[1].trim().replace(/[?.!]+$/, "").trim();
+      // Skip degenerate matches (single very short token like "it",
+      // "this") — those aren't real named concepts.
+      if (phrase.length >= 3 && phrase.split(/\s+/).length <= 6) {
+        return phrase;
+      }
+    }
+  }
+  return null;
+}
+
+const NAMED_CONCEPT_TITLE_BOOST = 2.0;
+const NAMED_CONCEPT_BODY_BOOST = 1.3;
+
 export function registerSmartSearchFunction(
   sdk: ISdk,
   kv: StateKV,
@@ -85,13 +120,59 @@ export function registerSmartSearchFunction(
       // extra lesson lookup adds no wallclock when the underlying calls
       // can overlap. Lesson recall is best-effort: if mem::lesson-recall
       // fails or returns unexpected shape, log + fall back to empty.
-      const [hybridResults, lessons] = await Promise.all([
-        searchFn(data.query, limit),
+      // Pull deeper for named-concept queries so the post-rank boost has
+      // material to work with even when BM25 mis-ranks the exact match
+      // due to question-scaffolding noise.
+      const namedConcept = extractNamedConcept(data.query);
+      const searchLimit = namedConcept ? Math.min(limit * 3, 100) : limit;
+      const [rawHybridResults, rawLessons] = await Promise.all([
+        searchFn(data.query, searchLimit),
         includeLessons
-          ? recallLessons(sdk, data.query, lessonLimit, data.project)
+          ? recallLessons(sdk, data.query, lessonLimit, data.project, namedConcept ?? undefined)
           : Promise.resolve([]),
       ]);
 
+      let hybridResults = rawHybridResults;
+      let lessons = rawLessons;
+      if (namedConcept) {
+        const phrase = namedConcept.toLowerCase();
+        const boostHybrid = (r: HybridSearchResult): HybridSearchResult => {
+          const title = (r.observation.title || "").toLowerCase();
+          const narrative = (r.observation.narrative || "").toLowerCase();
+          // Multiplicative: title AND narrative both match → 2.0 × 1.3 = 2.6×.
+          // CodeRabbit caught the prior else-if capping dual matches at 2.0×.
+          let mult = 1;
+          if (title.includes(phrase)) mult *= NAMED_CONCEPT_TITLE_BOOST;
+          if (narrative.includes(phrase)) mult *= NAMED_CONCEPT_BODY_BOOST;
+          return mult === 1 ? r : { ...r, combinedScore: r.combinedScore * mult };
+        };
+        hybridResults = rawHybridResults
+          .map(boostHybrid)
+          .sort((a, b) => b.combinedScore - a.combinedScore)
+          .slice(0, limit);
+        // Use boostMatched (set by recallLessons against the full
+        // pre-truncation content) instead of re-scanning the 240-char
+        // preview here. Falls back to scanning preview if boostMatched
+        // is absent (recallLessons called without boostPhrase).
+        lessons = rawLessons
+          .map((l) => {
+            const matched =
+              (l as CompactLessonResult & { boostMatched?: boolean }).boostMatched === true ||
+              (typeof l.content === "string" && l.content.toLowerCase().includes(phrase));
+            if (!matched) return l;
+            return { ...l, score: (l.score ?? 0) * NAMED_CONCEPT_TITLE_BOOST };
+          })
+          .sort((a, b) => (b.score ?? 0) - (a.score ?? 0));
+        logger.info("Smart search named-concept boost applied", {
+          query: data.query,
+          concept: namedConcept,
+          boostedHybrid: hybridResults.filter((r) => r.combinedScore !== rawHybridResults.find((x) => x.observation.id === r.observation.id)?.combinedScore).length,
+        });
+      } else if (rawHybridResults.length > limit) {
+        // No boost path needed to trim; keep the original ordering.
+        hybridResults = rawHybridResults.slice(0, limit);
+      }
+
       const compact: CompactSearchResult[] = hybridResults.map((r) => ({
         obsId: r.observation.id,
         sessionId: r.sessionId,
@@ -127,25 +208,36 @@ async function recallLessons(
   query: string,
   limit: number,
   project?: string,
+  boostPhrase?: string,
 ): Promise<CompactLessonResult[]> {
   try {
     const result = (await sdk.trigger({
       function_id: "mem::lesson-recall",
       payload: { query, limit, project },
     })) as { success?: boolean; lessons?: Array<Lesson & { score?: number }> };
     if (!result?.success || !Array.isArray(result.lessons)) return [];
-    return result.lessons.map((l) => ({
-      lessonId: l.id,
-      content:
-        l.content.length > LESSON_CONTENT_PREVIEW_CHARS
-          ? l.content.slice(0, LESSON_CONTENT_PREVIEW_CHARS) + "…"
-          : l.content,
-      confidence: l.confidence,
-      score: l.score ?? l.confidence,
-      createdAt: l.createdAt,
-      project: l.project,
-      tags: l.tags ?? [],
-    }));
+    const phraseLower = boostPhrase?.toLowerCase();
+    return result.lessons.map((l) => {
+      // Decide boost match against the FULL pre-truncation content so a
+      // phrase that lives past the 240-char preview window can still
+      // signal relevance. CodeRabbit caught this on #571.
+      const boostMatched = phraseLower
+        ? `${l.content ?? ""} ${l.context ?? ""}`.toLowerCase().includes(phraseLower)
+        : false;
+      return {
+        lessonId: l.id,
+        content:
+          l.content.length > LESSON_CONTENT_PREVIEW_CHARS
+            ? l.content.slice(0, LESSON_CONTENT_PREVIEW_CHARS) + "…"
+            : l.content,
+        confidence: l.confidence,
+        score: l.score ?? l.confidence,
+        createdAt: l.createdAt,
+        project: l.project,
+        tags: l.tags ?? [],
+        boostMatched,
+      };
+    });
   } catch (err) {
     logger.warn("Smart search: mem::lesson-recall failed; returning empty lesson list", {
       error: err instanceof Error ? err.message : String(err),

diff --git a/src/types.ts b/src/types.ts
@@ -274,6 +274,12 @@ export interface CompactLessonResult {
   createdAt: string;
   project?: string;
   tags: string[];
+  /**
+   * Set by recallLessons when the FULL pre-truncation content
+   * matched the named-concept boost phrase. Smart-search uses this
+   * to skip re-scanning the truncated preview. See #571.
+   */
+  boostMatched?: boolean;
 }
 
 export interface TimelineEntry {

diff --git a/test/smart-search.test.ts b/test/smart-search.test.ts
@@ -292,3 +292,109 @@ describe("Smart Search Function", () => {
     });
   });
 });
+
+import { extractNamedConcept } from "../src/functions/smart-search.js";
+
+describe("extractNamedConcept (v4-B)", () => {
+  it("matches 'who is X' / 'what is X' / 'what does X mean'", () => {
+    expect(extractNamedConcept("who is the careful generator?")).toBe("careful generator");
+    expect(extractNamedConcept("what is a circuit breaker")).toBe("circuit breaker");
+    expect(extractNamedConcept("what's the auth middleware?")).toBe("auth middleware");
+    expect(extractNamedConcept("what does eventual consistency mean?")).toBe("eventual consistency");
+  });
+  it("returns null for non-named-concept queries", () => {
+    expect(extractNamedConcept("fix the bug in observe.ts")).toBeNull();
+    expect(extractNamedConcept("recent decisions")).toBeNull();
+    expect(extractNamedConcept("")).toBeNull();
+  });
+  it("rejects degenerate phrases (too short or too long)", () => {
+    expect(extractNamedConcept("what is it?")).toBeNull();
+    expect(extractNamedConcept("what is x")).toBeNull();
+    expect(extractNamedConcept(
+      "what is the eight token thing we discussed earlier on the call",
+    )).toBeNull(); // >6 tokens
+  });
+});
+
+describe("Smart Search named-concept boost (v4-B)", () => {
+  let sdk: ReturnType<typeof mockSdk>;
+  let kv: ReturnType<typeof mockKV>;
+  let searchResults: HybridSearchResult[];
+
+  beforeEach(async () => {
+    sdk = mockSdk();
+    kv = mockKV();
+
+    // Two observations: the one whose TITLE names the concept ("careful
+    // generator") starts with a LOWER bm25 score so we can prove the
+    // boost re-ranks it above the busier observation.
+    const obsNamed = makeObs({
+      id: "obs_named",
+      sessionId: "ses_1",
+      title: "Tier 2 — careful generator (Qwen3.6-35B-A3B-FP8)",
+      narrative: "Picked Qwen3.6 for the careful generator role on vast.",
+    });
+    const obsBusy = makeObs({
+      id: "obs_busy",
+      sessionId: "ses_1",
+      title: "Refactor the request handler — moved validation",
+      narrative: "Random unrelated session work.",
+    });
+
+    searchResults = [
+      // BM25 prefers the busier observation (more tokens), so without
+      // boost the named-concept obs ranks SECOND.
+      {
+        observation: obsBusy,
+        bm25Score: 0.9,
+        vectorScore: 0,
+        combinedScore: 0.9,
+        sessionId: "ses_1",
+      },
+      {
+        observation: obsNamed,
+        bm25Score: 0.6,
+        vectorScore: 0,
+        combinedScore: 0.6,
+        sessionId: "ses_1",
+      },
+    ];
+
+    const session: Session = {
+      id: "ses_1",
+      project: "p",
+      cwd: "/tmp",
+      startedAt: "2026-02-01T00:00:00Z",
+      status: "completed",
+      observationCount: 2,
+    };
+    await kv.set("mem:sessions", "ses_1", session);
+    await kv.set("mem:obs:ses_1", "obs_named", obsNamed);
+    await kv.set("mem:obs:ses_1", "obs_busy", obsBusy);
+
+    const searchFn = async (_query: string, _limit: number) => searchResults;
+    registerSmartSearchFunction(sdk as never, kv as never, searchFn);
+  });
+
+  it("named-concept query boosts the title-matching observation to rank #1", async () => {
+    const result = (await sdk.trigger("mem::smart-search", {
+      query: "who is the careful generator?",
+      includeLessons: false,
+    })) as { results: CompactSearchResult[] };
+    expect(result.results.length).toBe(2);
+    expect(result.results[0].obsId).toBe("obs_named"); // title-boosted above busier obs
+    expect(result.results[1].obsId).toBe("obs_busy");
+    // Score on the boosted hit must exceed the original 0.6 by the
+    // title-boost factor (2.0x).
+    expect(result.results[0].score).toBeGreaterThan(1.0);
+  });
+
+  it("non-named-concept query preserves original ordering", async () => {
+    const result = (await sdk.trigger("mem::smart-search", {
+      query: "refactor request handler",
+      includeLessons: false,
+    })) as { results: CompactSearchResult[] };
+    expect(result.results[0].obsId).toBe("obs_busy"); // unchanged: bm25 0.9 > 0.6
+    expect(result.results[1].obsId).toBe("obs_named");
+  });
+});