diff --git a/src/functions/smart-search.ts b/src/functions/smart-search.ts index c80b1f87..660d9972 100644 --- a/src/functions/smart-search.ts +++ b/src/functions/smart-search.ts @@ -15,6 +15,41 @@ import { logger } from "../logger.js"; // full content is fetched via memory_lesson_recall when the caller needs it. const LESSON_CONTENT_PREVIEW_CHARS = 240; +// v4-B: detect "who is X" / "what is X" / "what does X mean" patterns +// and pull out X so we can boost hits that name the concept directly. +// BM25 already rewards the exact phrase, but for short named-concept +// queries (typically 2–4 tokens) the question scaffolding ("who is the") +// adds noise that depresses true matches relative to broader, busier +// observations. This is the v4-A "careful generator" regression in +// docs/plans/v4-lineage-test-case-careful-generator.md, surfacing in +// smart-search rather than lineage. +const NAMED_CONCEPT_PATTERNS: RegExp[] = [ + /^\s*who\s+is\s+(?:the\s+|a\s+|an\s+)?(.+?)\s*\??\s*$/i, + /^\s*what\s+is\s+(?:the\s+|a\s+|an\s+)?(.+?)\s*\??\s*$/i, + /^\s*what(?:'s|\sis)\s+(?:the\s+|a\s+|an\s+)?(.+?)\s*\??\s*$/i, + /^\s*what\s+does\s+(.+?)\s+mean\s*\??\s*$/i, + /^\s*who(?:'s|\sis)\s+(?:the\s+|a\s+|an\s+)?(.+?)\s*\??\s*$/i, +]; + +export function extractNamedConcept(query: string): string | null { + if (!query) return null; + for (const re of NAMED_CONCEPT_PATTERNS) { + const m = re.exec(query); + if (m && m[1]) { + const phrase = m[1].trim().replace(/[?.!]+$/, "").trim(); + // Skip degenerate matches (single very short token like "it", + // "this") — those aren't real named concepts. + if (phrase.length >= 3 && phrase.split(/\s+/).length <= 6) { + return phrase; + } + } + } + return null; +} + +const NAMED_CONCEPT_TITLE_BOOST = 2.0; +const NAMED_CONCEPT_BODY_BOOST = 1.3; + export function registerSmartSearchFunction( sdk: ISdk, kv: StateKV, @@ -85,13 +120,59 @@ export function registerSmartSearchFunction( // extra lesson lookup adds no wallclock when the underlying calls // can overlap. Lesson recall is best-effort: if mem::lesson-recall // fails or returns unexpected shape, log + fall back to empty. - const [hybridResults, lessons] = await Promise.all([ - searchFn(data.query, limit), + // Pull deeper for named-concept queries so the post-rank boost has + // material to work with even when BM25 mis-ranks the exact match + // due to question-scaffolding noise. + const namedConcept = extractNamedConcept(data.query); + const searchLimit = namedConcept ? Math.min(limit * 3, 100) : limit; + const [rawHybridResults, rawLessons] = await Promise.all([ + searchFn(data.query, searchLimit), includeLessons - ? recallLessons(sdk, data.query, lessonLimit, data.project) + ? recallLessons(sdk, data.query, lessonLimit, data.project, namedConcept ?? undefined) : Promise.resolve([]), ]); + let hybridResults = rawHybridResults; + let lessons = rawLessons; + if (namedConcept) { + const phrase = namedConcept.toLowerCase(); + const boostHybrid = (r: HybridSearchResult): HybridSearchResult => { + const title = (r.observation.title || "").toLowerCase(); + const narrative = (r.observation.narrative || "").toLowerCase(); + // Multiplicative: title AND narrative both match → 2.0 × 1.3 = 2.6×. + // CodeRabbit caught the prior else-if capping dual matches at 2.0×. + let mult = 1; + if (title.includes(phrase)) mult *= NAMED_CONCEPT_TITLE_BOOST; + if (narrative.includes(phrase)) mult *= NAMED_CONCEPT_BODY_BOOST; + return mult === 1 ? r : { ...r, combinedScore: r.combinedScore * mult }; + }; + hybridResults = rawHybridResults + .map(boostHybrid) + .sort((a, b) => b.combinedScore - a.combinedScore) + .slice(0, limit); + // Use boostMatched (set by recallLessons against the full + // pre-truncation content) instead of re-scanning the 240-char + // preview here. Falls back to scanning preview if boostMatched + // is absent (recallLessons called without boostPhrase). + lessons = rawLessons + .map((l) => { + const matched = + (l as CompactLessonResult & { boostMatched?: boolean }).boostMatched === true || + (typeof l.content === "string" && l.content.toLowerCase().includes(phrase)); + if (!matched) return l; + return { ...l, score: (l.score ?? 0) * NAMED_CONCEPT_TITLE_BOOST }; + }) + .sort((a, b) => (b.score ?? 0) - (a.score ?? 0)); + logger.info("Smart search named-concept boost applied", { + query: data.query, + concept: namedConcept, + boostedHybrid: hybridResults.filter((r) => r.combinedScore !== rawHybridResults.find((x) => x.observation.id === r.observation.id)?.combinedScore).length, + }); + } else if (rawHybridResults.length > limit) { + // No boost path needed to trim; keep the original ordering. + hybridResults = rawHybridResults.slice(0, limit); + } + const compact: CompactSearchResult[] = hybridResults.map((r) => ({ obsId: r.observation.id, sessionId: r.sessionId, @@ -127,6 +208,7 @@ async function recallLessons( query: string, limit: number, project?: string, + boostPhrase?: string, ): Promise { try { const result = (await sdk.trigger({ @@ -134,18 +216,28 @@ async function recallLessons( payload: { query, limit, project }, })) as { success?: boolean; lessons?: Array }; if (!result?.success || !Array.isArray(result.lessons)) return []; - return result.lessons.map((l) => ({ - lessonId: l.id, - content: - l.content.length > LESSON_CONTENT_PREVIEW_CHARS - ? l.content.slice(0, LESSON_CONTENT_PREVIEW_CHARS) + "…" - : l.content, - confidence: l.confidence, - score: l.score ?? l.confidence, - createdAt: l.createdAt, - project: l.project, - tags: l.tags ?? [], - })); + const phraseLower = boostPhrase?.toLowerCase(); + return result.lessons.map((l) => { + // Decide boost match against the FULL pre-truncation content so a + // phrase that lives past the 240-char preview window can still + // signal relevance. CodeRabbit caught this on #571. + const boostMatched = phraseLower + ? `${l.content ?? ""} ${l.context ?? ""}`.toLowerCase().includes(phraseLower) + : false; + return { + lessonId: l.id, + content: + l.content.length > LESSON_CONTENT_PREVIEW_CHARS + ? l.content.slice(0, LESSON_CONTENT_PREVIEW_CHARS) + "…" + : l.content, + confidence: l.confidence, + score: l.score ?? l.confidence, + createdAt: l.createdAt, + project: l.project, + tags: l.tags ?? [], + boostMatched, + }; + }); } catch (err) { logger.warn("Smart search: mem::lesson-recall failed; returning empty lesson list", { error: err instanceof Error ? err.message : String(err), diff --git a/src/types.ts b/src/types.ts index 72e347b3..642a2c72 100644 --- a/src/types.ts +++ b/src/types.ts @@ -274,6 +274,12 @@ export interface CompactLessonResult { createdAt: string; project?: string; tags: string[]; + /** + * Set by recallLessons when the FULL pre-truncation content + * matched the named-concept boost phrase. Smart-search uses this + * to skip re-scanning the truncated preview. See #571. + */ + boostMatched?: boolean; } export interface TimelineEntry { diff --git a/test/smart-search.test.ts b/test/smart-search.test.ts index 9d0c94e0..62b6edfd 100644 --- a/test/smart-search.test.ts +++ b/test/smart-search.test.ts @@ -292,3 +292,109 @@ describe("Smart Search Function", () => { }); }); }); + +import { extractNamedConcept } from "../src/functions/smart-search.js"; + +describe("extractNamedConcept (v4-B)", () => { + it("matches 'who is X' / 'what is X' / 'what does X mean'", () => { + expect(extractNamedConcept("who is the careful generator?")).toBe("careful generator"); + expect(extractNamedConcept("what is a circuit breaker")).toBe("circuit breaker"); + expect(extractNamedConcept("what's the auth middleware?")).toBe("auth middleware"); + expect(extractNamedConcept("what does eventual consistency mean?")).toBe("eventual consistency"); + }); + it("returns null for non-named-concept queries", () => { + expect(extractNamedConcept("fix the bug in observe.ts")).toBeNull(); + expect(extractNamedConcept("recent decisions")).toBeNull(); + expect(extractNamedConcept("")).toBeNull(); + }); + it("rejects degenerate phrases (too short or too long)", () => { + expect(extractNamedConcept("what is it?")).toBeNull(); + expect(extractNamedConcept("what is x")).toBeNull(); + expect(extractNamedConcept( + "what is the eight token thing we discussed earlier on the call", + )).toBeNull(); // >6 tokens + }); +}); + +describe("Smart Search named-concept boost (v4-B)", () => { + let sdk: ReturnType; + let kv: ReturnType; + let searchResults: HybridSearchResult[]; + + beforeEach(async () => { + sdk = mockSdk(); + kv = mockKV(); + + // Two observations: the one whose TITLE names the concept ("careful + // generator") starts with a LOWER bm25 score so we can prove the + // boost re-ranks it above the busier observation. + const obsNamed = makeObs({ + id: "obs_named", + sessionId: "ses_1", + title: "Tier 2 — careful generator (Qwen3.6-35B-A3B-FP8)", + narrative: "Picked Qwen3.6 for the careful generator role on vast.", + }); + const obsBusy = makeObs({ + id: "obs_busy", + sessionId: "ses_1", + title: "Refactor the request handler — moved validation", + narrative: "Random unrelated session work.", + }); + + searchResults = [ + // BM25 prefers the busier observation (more tokens), so without + // boost the named-concept obs ranks SECOND. + { + observation: obsBusy, + bm25Score: 0.9, + vectorScore: 0, + combinedScore: 0.9, + sessionId: "ses_1", + }, + { + observation: obsNamed, + bm25Score: 0.6, + vectorScore: 0, + combinedScore: 0.6, + sessionId: "ses_1", + }, + ]; + + const session: Session = { + id: "ses_1", + project: "p", + cwd: "/tmp", + startedAt: "2026-02-01T00:00:00Z", + status: "completed", + observationCount: 2, + }; + await kv.set("mem:sessions", "ses_1", session); + await kv.set("mem:obs:ses_1", "obs_named", obsNamed); + await kv.set("mem:obs:ses_1", "obs_busy", obsBusy); + + const searchFn = async (_query: string, _limit: number) => searchResults; + registerSmartSearchFunction(sdk as never, kv as never, searchFn); + }); + + it("named-concept query boosts the title-matching observation to rank #1", async () => { + const result = (await sdk.trigger("mem::smart-search", { + query: "who is the careful generator?", + includeLessons: false, + })) as { results: CompactSearchResult[] }; + expect(result.results.length).toBe(2); + expect(result.results[0].obsId).toBe("obs_named"); // title-boosted above busier obs + expect(result.results[1].obsId).toBe("obs_busy"); + // Score on the boosted hit must exceed the original 0.6 by the + // title-boost factor (2.0x). + expect(result.results[0].score).toBeGreaterThan(1.0); + }); + + it("non-named-concept query preserves original ordering", async () => { + const result = (await sdk.trigger("mem::smart-search", { + query: "refactor request handler", + includeLessons: false, + })) as { results: CompactSearchResult[] }; + expect(result.results[0].obsId).toBe("obs_busy"); // unchanged: bm25 0.9 > 0.6 + expect(result.results[1].obsId).toBe("obs_named"); + }); +});