From d1fcb71b396bc96fbf7a590c016da391a07a4ed9 Mon Sep 17 00:00:00 2001 From: Ruben de Smet Date: Wed, 20 May 2026 01:44:26 +0200 Subject: [PATCH 1/2] v4-b: smart-search named-concept ranker boost MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit For "who is X" / "what is X" / "what does X mean" queries, BM25 ranks busier observations above the records that actually name X — the question-scaffolding tokens add noise that dilutes the true match signal. Pre-existing regression test: docs/plans/v4-lineage-test-case- careful-generator.md (Gap exposed there, but the fix lives in smart- search rather than lineage since smart-search is the lessons-first ranker used by the recall paths). Approach: detect the question pattern at handler entry, extract the concept phrase, deepen the BM25 sweep to limit*3 so the boost has candidates, then post-multiply combinedScore by 2.0 for title matches and 1.3 for narrative matches, re-sort, trim to limit. Lessons whose content names the concept get the same 2.0 title-boost. Single-token / 6+ token phrases are skipped (degenerate). Original ordering is preserved on non-named-concept queries. Co-Authored-By: Claude Opus 4.7 (1M context) --- src/functions/smart-search.ts | 77 +++++++++++++++++++++++- test/smart-search.test.ts | 106 ++++++++++++++++++++++++++++++++++ 2 files changed, 181 insertions(+), 2 deletions(-) diff --git a/src/functions/smart-search.ts b/src/functions/smart-search.ts index c80b1f87..6ccf53c2 100644 --- a/src/functions/smart-search.ts +++ b/src/functions/smart-search.ts @@ -15,6 +15,41 @@ import { logger } from "../logger.js"; // full content is fetched via memory_lesson_recall when the caller needs it. const LESSON_CONTENT_PREVIEW_CHARS = 240; +// v4-B: detect "who is X" / "what is X" / "what does X mean" patterns +// and pull out X so we can boost hits that name the concept directly. +// BM25 already rewards the exact phrase, but for short named-concept +// queries (typically 2–4 tokens) the question scaffolding ("who is the") +// adds noise that depresses true matches relative to broader, busier +// observations. This is the v4-A "careful generator" regression in +// docs/plans/v4-lineage-test-case-careful-generator.md, surfacing in +// smart-search rather than lineage. +const NAMED_CONCEPT_PATTERNS: RegExp[] = [ + /^\s*who\s+is\s+(?:the\s+|a\s+|an\s+)?(.+?)\s*\??\s*$/i, + /^\s*what\s+is\s+(?:the\s+|a\s+|an\s+)?(.+?)\s*\??\s*$/i, + /^\s*what(?:'s|\sis)\s+(?:the\s+|a\s+|an\s+)?(.+?)\s*\??\s*$/i, + /^\s*what\s+does\s+(.+?)\s+mean\s*\??\s*$/i, + /^\s*who(?:'s|\sis)\s+(?:the\s+|a\s+|an\s+)?(.+?)\s*\??\s*$/i, +]; + +export function extractNamedConcept(query: string): string | null { + if (!query) return null; + for (const re of NAMED_CONCEPT_PATTERNS) { + const m = re.exec(query); + if (m && m[1]) { + const phrase = m[1].trim().replace(/[?.!]+$/, "").trim(); + // Skip degenerate matches (single very short token like "it", + // "this") — those aren't real named concepts. + if (phrase.length >= 3 && phrase.split(/\s+/).length <= 6) { + return phrase; + } + } + } + return null; +} + +const NAMED_CONCEPT_TITLE_BOOST = 2.0; +const NAMED_CONCEPT_BODY_BOOST = 1.3; + export function registerSmartSearchFunction( sdk: ISdk, kv: StateKV, @@ -85,13 +120,51 @@ export function registerSmartSearchFunction( // extra lesson lookup adds no wallclock when the underlying calls // can overlap. Lesson recall is best-effort: if mem::lesson-recall // fails or returns unexpected shape, log + fall back to empty. - const [hybridResults, lessons] = await Promise.all([ - searchFn(data.query, limit), + // Pull deeper for named-concept queries so the post-rank boost has + // material to work with even when BM25 mis-ranks the exact match + // due to question-scaffolding noise. + const namedConcept = extractNamedConcept(data.query); + const searchLimit = namedConcept ? Math.min(limit * 3, 100) : limit; + const [rawHybridResults, rawLessons] = await Promise.all([ + searchFn(data.query, searchLimit), includeLessons ? recallLessons(sdk, data.query, lessonLimit, data.project) : Promise.resolve([]), ]); + let hybridResults = rawHybridResults; + let lessons = rawLessons; + if (namedConcept) { + const phrase = namedConcept.toLowerCase(); + const boostHybrid = (r: HybridSearchResult): HybridSearchResult => { + const title = (r.observation.title || "").toLowerCase(); + const narrative = (r.observation.narrative || "").toLowerCase(); + let mult = 1; + if (title.includes(phrase)) mult = NAMED_CONCEPT_TITLE_BOOST; + else if (narrative.includes(phrase)) mult = NAMED_CONCEPT_BODY_BOOST; + return mult === 1 ? r : { ...r, combinedScore: r.combinedScore * mult }; + }; + hybridResults = rawHybridResults + .map(boostHybrid) + .sort((a, b) => b.combinedScore - a.combinedScore) + .slice(0, limit); + lessons = rawLessons + .map((l) => { + const text = `${l.content ?? ""}`.toLowerCase(); + if (!text.includes(phrase)) return l; + return { ...l, score: (l.score ?? 0) * NAMED_CONCEPT_TITLE_BOOST }; + }) + .sort((a, b) => (b.score ?? 0) - (a.score ?? 0)); + logger.info("Smart search named-concept boost applied", { + query: data.query, + concept: namedConcept, + boostedHybrid: hybridResults.filter((r) => r.combinedScore !== rawHybridResults.find((x) => x.observation.id === r.observation.id)?.combinedScore).length, + }); + } else if (rawHybridResults.length > limit) { + // No boost path needed to trim; keep the original ordering. + hybridResults = rawHybridResults.slice(0, limit); + } + const compact: CompactSearchResult[] = hybridResults.map((r) => ({ obsId: r.observation.id, sessionId: r.sessionId, diff --git a/test/smart-search.test.ts b/test/smart-search.test.ts index 9d0c94e0..62b6edfd 100644 --- a/test/smart-search.test.ts +++ b/test/smart-search.test.ts @@ -292,3 +292,109 @@ describe("Smart Search Function", () => { }); }); }); + +import { extractNamedConcept } from "../src/functions/smart-search.js"; + +describe("extractNamedConcept (v4-B)", () => { + it("matches 'who is X' / 'what is X' / 'what does X mean'", () => { + expect(extractNamedConcept("who is the careful generator?")).toBe("careful generator"); + expect(extractNamedConcept("what is a circuit breaker")).toBe("circuit breaker"); + expect(extractNamedConcept("what's the auth middleware?")).toBe("auth middleware"); + expect(extractNamedConcept("what does eventual consistency mean?")).toBe("eventual consistency"); + }); + it("returns null for non-named-concept queries", () => { + expect(extractNamedConcept("fix the bug in observe.ts")).toBeNull(); + expect(extractNamedConcept("recent decisions")).toBeNull(); + expect(extractNamedConcept("")).toBeNull(); + }); + it("rejects degenerate phrases (too short or too long)", () => { + expect(extractNamedConcept("what is it?")).toBeNull(); + expect(extractNamedConcept("what is x")).toBeNull(); + expect(extractNamedConcept( + "what is the eight token thing we discussed earlier on the call", + )).toBeNull(); // >6 tokens + }); +}); + +describe("Smart Search named-concept boost (v4-B)", () => { + let sdk: ReturnType; + let kv: ReturnType; + let searchResults: HybridSearchResult[]; + + beforeEach(async () => { + sdk = mockSdk(); + kv = mockKV(); + + // Two observations: the one whose TITLE names the concept ("careful + // generator") starts with a LOWER bm25 score so we can prove the + // boost re-ranks it above the busier observation. + const obsNamed = makeObs({ + id: "obs_named", + sessionId: "ses_1", + title: "Tier 2 — careful generator (Qwen3.6-35B-A3B-FP8)", + narrative: "Picked Qwen3.6 for the careful generator role on vast.", + }); + const obsBusy = makeObs({ + id: "obs_busy", + sessionId: "ses_1", + title: "Refactor the request handler — moved validation", + narrative: "Random unrelated session work.", + }); + + searchResults = [ + // BM25 prefers the busier observation (more tokens), so without + // boost the named-concept obs ranks SECOND. + { + observation: obsBusy, + bm25Score: 0.9, + vectorScore: 0, + combinedScore: 0.9, + sessionId: "ses_1", + }, + { + observation: obsNamed, + bm25Score: 0.6, + vectorScore: 0, + combinedScore: 0.6, + sessionId: "ses_1", + }, + ]; + + const session: Session = { + id: "ses_1", + project: "p", + cwd: "/tmp", + startedAt: "2026-02-01T00:00:00Z", + status: "completed", + observationCount: 2, + }; + await kv.set("mem:sessions", "ses_1", session); + await kv.set("mem:obs:ses_1", "obs_named", obsNamed); + await kv.set("mem:obs:ses_1", "obs_busy", obsBusy); + + const searchFn = async (_query: string, _limit: number) => searchResults; + registerSmartSearchFunction(sdk as never, kv as never, searchFn); + }); + + it("named-concept query boosts the title-matching observation to rank #1", async () => { + const result = (await sdk.trigger("mem::smart-search", { + query: "who is the careful generator?", + includeLessons: false, + })) as { results: CompactSearchResult[] }; + expect(result.results.length).toBe(2); + expect(result.results[0].obsId).toBe("obs_named"); // title-boosted above busier obs + expect(result.results[1].obsId).toBe("obs_busy"); + // Score on the boosted hit must exceed the original 0.6 by the + // title-boost factor (2.0x). + expect(result.results[0].score).toBeGreaterThan(1.0); + }); + + it("non-named-concept query preserves original ordering", async () => { + const result = (await sdk.trigger("mem::smart-search", { + query: "refactor request handler", + includeLessons: false, + })) as { results: CompactSearchResult[] }; + expect(result.results[0].obsId).toBe("obs_busy"); // unchanged: bm25 0.9 > 0.6 + expect(result.results[1].obsId).toBe("obs_named"); + }); +}); From 997d25d6fe198d3b2ed23557fb3ad89a900ff396 Mon Sep 17 00:00:00 2001 From: Ruben de Smet Date: Wed, 20 May 2026 18:10:59 +0200 Subject: [PATCH 2/2] fix(smart-search): apply both multipliers + boost lessons against full content MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit CodeRabbit caught two issues on #571: 1. The boost branch used `if (title) ... else if (narrative) ...`, capping observations that contain the concept in BOTH fields at the title-only 2.0× multiplier. The feature is specified as multiplicative — title-and-narrative matches now compound to 2.0 × 1.3 = 2.6×. Single-field matches behave as before. 2. The lesson boost path was scanning the 240-char preview emitted by recallLessons, not the lesson's full pre-truncation content. Any concept that appeared past the preview boundary silently missed the boost. Fix: thread the concept phrase into recallLessons via a new `boostPhrase` parameter. The function now decides match against `content + context` BEFORE truncation, stamps each result with `boostMatched: boolean`, and the smart-search caller uses that flag instead of re-scanning the preview. `boostMatched` added as an optional field on CompactLessonResult. Callers that don't pass `boostPhrase` get `boostMatched: false` — the smart-search caller falls back to scanning the (truncated) content for the phrase if `boostMatched` is absent, preserving the pre-fix behavior for any non-smart-search caller of recallLessons. Co-Authored-By: Claude Opus 4.7 (1M context) --- src/functions/smart-search.ts | 53 ++++++++++++++++++++++++----------- src/types.ts | 6 ++++ 2 files changed, 42 insertions(+), 17 deletions(-) diff --git a/src/functions/smart-search.ts b/src/functions/smart-search.ts index 6ccf53c2..660d9972 100644 --- a/src/functions/smart-search.ts +++ b/src/functions/smart-search.ts @@ -128,7 +128,7 @@ export function registerSmartSearchFunction( const [rawHybridResults, rawLessons] = await Promise.all([ searchFn(data.query, searchLimit), includeLessons - ? recallLessons(sdk, data.query, lessonLimit, data.project) + ? recallLessons(sdk, data.query, lessonLimit, data.project, namedConcept ?? undefined) : Promise.resolve([]), ]); @@ -139,19 +139,27 @@ export function registerSmartSearchFunction( const boostHybrid = (r: HybridSearchResult): HybridSearchResult => { const title = (r.observation.title || "").toLowerCase(); const narrative = (r.observation.narrative || "").toLowerCase(); + // Multiplicative: title AND narrative both match → 2.0 × 1.3 = 2.6×. + // CodeRabbit caught the prior else-if capping dual matches at 2.0×. let mult = 1; - if (title.includes(phrase)) mult = NAMED_CONCEPT_TITLE_BOOST; - else if (narrative.includes(phrase)) mult = NAMED_CONCEPT_BODY_BOOST; + if (title.includes(phrase)) mult *= NAMED_CONCEPT_TITLE_BOOST; + if (narrative.includes(phrase)) mult *= NAMED_CONCEPT_BODY_BOOST; return mult === 1 ? r : { ...r, combinedScore: r.combinedScore * mult }; }; hybridResults = rawHybridResults .map(boostHybrid) .sort((a, b) => b.combinedScore - a.combinedScore) .slice(0, limit); + // Use boostMatched (set by recallLessons against the full + // pre-truncation content) instead of re-scanning the 240-char + // preview here. Falls back to scanning preview if boostMatched + // is absent (recallLessons called without boostPhrase). lessons = rawLessons .map((l) => { - const text = `${l.content ?? ""}`.toLowerCase(); - if (!text.includes(phrase)) return l; + const matched = + (l as CompactLessonResult & { boostMatched?: boolean }).boostMatched === true || + (typeof l.content === "string" && l.content.toLowerCase().includes(phrase)); + if (!matched) return l; return { ...l, score: (l.score ?? 0) * NAMED_CONCEPT_TITLE_BOOST }; }) .sort((a, b) => (b.score ?? 0) - (a.score ?? 0)); @@ -200,6 +208,7 @@ async function recallLessons( query: string, limit: number, project?: string, + boostPhrase?: string, ): Promise { try { const result = (await sdk.trigger({ @@ -207,18 +216,28 @@ async function recallLessons( payload: { query, limit, project }, })) as { success?: boolean; lessons?: Array }; if (!result?.success || !Array.isArray(result.lessons)) return []; - return result.lessons.map((l) => ({ - lessonId: l.id, - content: - l.content.length > LESSON_CONTENT_PREVIEW_CHARS - ? l.content.slice(0, LESSON_CONTENT_PREVIEW_CHARS) + "…" - : l.content, - confidence: l.confidence, - score: l.score ?? l.confidence, - createdAt: l.createdAt, - project: l.project, - tags: l.tags ?? [], - })); + const phraseLower = boostPhrase?.toLowerCase(); + return result.lessons.map((l) => { + // Decide boost match against the FULL pre-truncation content so a + // phrase that lives past the 240-char preview window can still + // signal relevance. CodeRabbit caught this on #571. + const boostMatched = phraseLower + ? `${l.content ?? ""} ${l.context ?? ""}`.toLowerCase().includes(phraseLower) + : false; + return { + lessonId: l.id, + content: + l.content.length > LESSON_CONTENT_PREVIEW_CHARS + ? l.content.slice(0, LESSON_CONTENT_PREVIEW_CHARS) + "…" + : l.content, + confidence: l.confidence, + score: l.score ?? l.confidence, + createdAt: l.createdAt, + project: l.project, + tags: l.tags ?? [], + boostMatched, + }; + }); } catch (err) { logger.warn("Smart search: mem::lesson-recall failed; returning empty lesson list", { error: err instanceof Error ? err.message : String(err), diff --git a/src/types.ts b/src/types.ts index 72e347b3..642a2c72 100644 --- a/src/types.ts +++ b/src/types.ts @@ -274,6 +274,12 @@ export interface CompactLessonResult { createdAt: string; project?: string; tags: string[]; + /** + * Set by recallLessons when the FULL pre-truncation content + * matched the named-concept boost phrase. Smart-search uses this + * to skip re-scanning the truncated preview. See #571. + */ + boostMatched?: boolean; } export interface TimelineEntry {