diff --git a/src/coding/proxy/logging/db.py b/src/coding/proxy/logging/db.py index 8470966..79cd9e2 100644 --- a/src/coding/proxy/logging/db.py +++ b/src/coding/proxy/logging/db.py @@ -335,6 +335,20 @@ async def set_session_title(self, session_key: str, title: str) -> None: ) await self._db.commit() + async def update_empty_session_title(self, session_key: str, title: str) -> None: + """为标题为空的 session 补写标题(幂等,仅覆盖空标题行). + + 使用 ``AND title = ''`` 条件确保不覆盖已有标题, + 即使行已存在但标题为空也会被更新。 + """ + if not self._db or not title or not session_key: + return + await self._db.execute( + "UPDATE session_meta SET title = ? WHERE session_key = ? AND title = ''", + (title, session_key), + ) + await self._db.commit() + async def get_session_titles(self, session_keys: list[str]) -> dict[str, str]: """批量查询 session 标题.""" if not self._db or not session_keys: diff --git a/src/coding/proxy/routing/executor.py b/src/coding/proxy/routing/executor.py index 328f10e..f900a2d 100644 --- a/src/coding/proxy/routing/executor.py +++ b/src/coding/proxy/routing/executor.py @@ -50,11 +50,13 @@ CompatibilityStatus, build_canonical_request, ) -from ..model.compat import CanonicalRequest +from ..model.compat import CanonicalMessagePart, CanonicalRequest logger = logging.getLogger(__name__) _SESSION_TITLE_MAX_LEN = 600 +# 回退标题截取长度 — 工具结果等非用户直接输入的摘要上限。 +_FALLBACK_TITLE_MAX_LEN = 80 # Claude Code 注入的"噪声"标签 — 系统级上下文,不应进入 Session 标题。 # 这些标签由 CC harness 在首个 user 消息 content 中拼接,高度同质, @@ -63,7 +65,8 @@ r"<(?Psystem-reminder|user-preferences|" r"local-command-stdout|local-command-stderr|" r"bash-input|bash-stdout|bash-stderr|" - r"ide_selection|stdin|system_instruction|session)\b[^>]*>" + r"ide_selection|stdin|system_instruction|session|" + r"artifactMetadata|thinking)\b[^>]*>" r".*?", flags=re.DOTALL | re.IGNORECASE, ) @@ -109,13 +112,19 @@ def _sanitize_user_text(raw: str) -> str: return re.sub(r"\s+", " ", cleaned).strip() -def _extract_session_title(request: CanonicalRequest) -> str: - """从规范化请求中提取首个用户消息文本作为 session 标题。 +# ── Session 标题提取: 多层级回退策略 ────────────────────────────── +# +# Level 1: user TEXT → 噪声剥离 → 首条非空文本 (原有逻辑) +# Level 2: user TOOL_RESULT → text 截取 → "[Tool output] " +# Level 3: user IMAGE → 计数 → "[1 Image]" / "[N Images]" +# Level 4: 请求元数据 → tool_names / model → "[Tool call] Bash, Read" +# / "[Session] claude-opus-4-8" +# ───────────────────────────────────────────────────────────────── - 跳过 Claude Code 注入的系统级 XML 块(system-reminder、user-preferences 等), - 确保标题反映用户真实输入而非高同质化的系统模板。 - """ - for part in request.messages: + +def _extract_title_from_user_text(messages: list[CanonicalMessagePart]) -> str: + """Level 1: 从 user TEXT 部分提取经噪声剥离后的首条非空文本.""" + for part in messages: if part.role != "user" or part.type != CanonicalPartType.TEXT: continue cleaned = _sanitize_user_text(part.text) @@ -124,6 +133,59 @@ def _extract_session_title(request: CanonicalRequest) -> str: return "" +def _extract_title_from_tool_results(messages: list[CanonicalMessagePart]) -> str: + """Level 2: 从 user TOOL_RESULT 部分截取文本摘要.""" + for part in messages: + if part.role != "user" or part.type != CanonicalPartType.TOOL_RESULT: + continue + if not part.text: + continue + cleaned = _sanitize_user_text(part.text) + if cleaned: + snippet = cleaned[:_FALLBACK_TITLE_MAX_LEN] + return f"[Tool output] {snippet}" + return "" + + +def _extract_title_from_images(messages: list[CanonicalMessagePart]) -> str: + """Level 3: 统计 user IMAGE 部分数量,生成图片描述标题.""" + count = sum( + 1 for p in messages if p.role == "user" and p.type == CanonicalPartType.IMAGE + ) + if count == 0: + return "" + return f"[{count} Image{'s' if count > 1 else ''}]" + + +def _extract_title_from_metadata(request: CanonicalRequest) -> str: + """Level 4: 从请求元数据 (tool_names / model) 合成兜底标题.""" + if request.tool_names: + names = ", ".join(request.tool_names[:3]) + return f"[Tool call] {names}" + if request.model: + return f"[Session] {request.model}" + return "" + + +def _extract_session_title(request: CanonicalRequest) -> str: + """从规范化请求中提取 session 标题 — 多层级回退策略。 + + 依次尝试: user TEXT 噪声剥离 → TOOL_RESULT 摘要 → IMAGE 计数 → 元数据兜底。 + 任意级别命中即返回,确保 Dashboard 尽可能展示有辨识度的标题。 + """ + messages = request.messages + for extractor in ( + _extract_title_from_user_text, + _extract_title_from_tool_results, + _extract_title_from_images, + ): + title = extractor(messages) + if title: + return title[:_SESSION_TITLE_MAX_LEN] + # Level 4 依赖 request 元数据,签名不同 + return _extract_title_from_metadata(request)[:_SESSION_TITLE_MAX_LEN] + + def _build_semantic_rejection_diagnostic(body: dict[str, Any]) -> str: """构建语义拒绝的请求体诊断上下文. @@ -663,6 +725,13 @@ async def execute_stream( await self._recorder.set_session_title( canonical_request.session_key, title ) + else: + # 延迟标题补写: 若 session 尚无标题,尝试从当前请求中提取并回写。 + title = _extract_session_title(canonical_request) + if title: + await self._recorder.update_empty_session_title( + canonical_request.session_key, title + ) incompatible_reasons: list[str] = [] effective_tiers = self._resolve_effective_tiers(canonical_request.session_key) last_idx = len(effective_tiers) - 1 @@ -842,6 +911,13 @@ async def execute_message( await self._recorder.set_session_title( canonical_request.session_key, title ) + else: + # 延迟标题补写: 若 session 尚无标题,尝试从当前请求中提取并回写。 + title = _extract_session_title(canonical_request) + if title: + await self._recorder.update_empty_session_title( + canonical_request.session_key, title + ) incompatible_reasons: list[str] = [] effective_tiers = self._resolve_effective_tiers(canonical_request.session_key) last_idx = len(effective_tiers) - 1 diff --git a/src/coding/proxy/routing/usage_recorder.py b/src/coding/proxy/routing/usage_recorder.py index 8887c09..e83a8fe 100644 --- a/src/coding/proxy/routing/usage_recorder.py +++ b/src/coding/proxy/routing/usage_recorder.py @@ -33,6 +33,11 @@ async def set_session_title(self, session_key: str, title: str) -> None: if self._token_logger: await self._token_logger.set_session_title(session_key, title) + async def update_empty_session_title(self, session_key: str, title: str) -> None: + """为标题为空的 session 补写标题(委托给 TokenLogger).""" + if self._token_logger: + await self._token_logger.update_empty_session_title(session_key, title) + # ── 用量信息构建 ────────────────────────────────────── @staticmethod diff --git a/tests/test_router_executor.py b/tests/test_router_executor.py index e10bac6..2c0d093 100644 --- a/tests/test_router_executor.py +++ b/tests/test_router_executor.py @@ -20,10 +20,15 @@ build_canonical_request, ) from coding.proxy.routing.executor import ( + _FALLBACK_TITLE_MAX_LEN, _SESSION_TITLE_MAX_LEN, _VENDOR_PROTOCOL_LABEL_MAP, _build_semantic_rejection_diagnostic, _extract_session_title, + _extract_title_from_images, + _extract_title_from_metadata, + _extract_title_from_tool_results, + _extract_title_from_user_text, _has_tool_results, _is_likely_request_format_error, _log_vendor_response_error, @@ -2258,6 +2263,20 @@ def test_strips_session_tag_multiline(self): raw = "\nline1\nline2\n真实标题" assert _sanitize_user_text(raw) == "真实标题" + def test_strips_artifact_metadata_tag(self): + """```` 标签应被完整剥离.""" + raw = "artifact context用户文本" + assert _sanitize_user_text(raw) == "用户文本" + + def test_strips_thinking_tag(self): + """```` 标签应被完整剥离.""" + raw = "内部推理过程用户实际提问" + assert _sanitize_user_text(raw) == "用户实际提问" + + def test_strips_thinking_tag_multiline(self): + raw = "\nline1\nline2\n清理后文本" + assert _sanitize_user_text(raw) == "清理后文本" + class TestExtractSessionTitle: """``_extract_session_title`` — 端到端从 CanonicalRequest 抽取标题.""" @@ -2304,14 +2323,16 @@ def test_extracts_slash_command(self): req = self._build_request([{"role": "user", "content": raw}]) assert _extract_session_title(req) == "/commit feat: 新增标题清洗" - def test_returns_empty_when_only_noise(self): + def test_returns_metadata_fallback_when_only_noise(self): + """纯噪声文本回退到 Level 4 元数据兜底(使用 model 名称).""" raw = "纯噪声" req = self._build_request([{"role": "user", "content": raw}]) - assert _extract_session_title(req) == "" + assert _extract_session_title(req) == "[Session] test" - def test_returns_empty_for_no_user_messages(self): + def test_returns_metadata_fallback_for_no_user_messages(self): + """无 user 消息时回退到 Level 4 元数据兜底.""" req = self._build_request([{"role": "assistant", "content": "你好"}]) - assert _extract_session_title(req) == "" + assert _extract_session_title(req) == "[Session] test" def test_skips_noise_only_part_to_find_real_input(self): """首个 user text part 全噪声时,fallback 到下一个非空 user part.""" @@ -2338,3 +2359,266 @@ def test_skips_assistant_role(self): ] req = self._build_request(messages) assert _extract_session_title(req) == "新的用户问题" + + +# ═══════════════════════════════════════════════════════════════════ +# 多层级回退标题提取测试 +# ═══════════════════════════════════════════════════════════════════ + + +class TestExtractTitleFromUserText: + """Level 1 辅助函数 ``_extract_title_from_user_text``.""" + + def test_returns_first_non_empty_user_text(self): + from coding.proxy.model.compat import CanonicalMessagePart, CanonicalPartType + + msgs = [ + CanonicalMessagePart( + type=CanonicalPartType.TEXT, role="user", text="用户输入" + ), + ] + assert _extract_title_from_user_text(msgs) == "用户输入" + + def test_skips_assistant_text(self): + from coding.proxy.model.compat import CanonicalMessagePart, CanonicalPartType + + msgs = [ + CanonicalMessagePart( + type=CanonicalPartType.TEXT, role="assistant", text="助手回复" + ), + CanonicalMessagePart( + type=CanonicalPartType.TEXT, role="user", text="用户问题" + ), + ] + assert _extract_title_from_user_text(msgs) == "用户问题" + + def test_returns_empty_for_noise_only(self): + from coding.proxy.model.compat import CanonicalMessagePart, CanonicalPartType + + msgs = [ + CanonicalMessagePart( + type=CanonicalPartType.TEXT, + role="user", + text="纯噪声", + ), + ] + assert _extract_title_from_user_text(msgs) == "" + + +class TestExtractTitleFromToolResults: + """Level 2 辅助函数 ``_extract_title_from_tool_results``.""" + + def test_extracts_tool_result_text(self): + from coding.proxy.model.compat import CanonicalMessagePart, CanonicalPartType + + msgs = [ + CanonicalMessagePart( + type=CanonicalPartType.TOOL_RESULT, + role="user", + text="file contents here", + ), + ] + title = _extract_title_from_tool_results(msgs) + assert title == "[Tool output] file contents here" + + def test_skips_empty_tool_result(self): + from coding.proxy.model.compat import CanonicalMessagePart, CanonicalPartType + + msgs = [ + CanonicalMessagePart( + type=CanonicalPartType.TOOL_RESULT, role="user", text="" + ), + ] + assert _extract_title_from_tool_results(msgs) == "" + + def test_truncates_long_tool_result(self): + from coding.proxy.model.compat import CanonicalMessagePart, CanonicalPartType + + long_text = "A" * 200 + msgs = [ + CanonicalMessagePart( + type=CanonicalPartType.TOOL_RESULT, role="user", text=long_text + ), + ] + title = _extract_title_from_tool_results(msgs) + assert title.startswith("[Tool output] ") + assert len(title) <= len("[Tool output] ") + _FALLBACK_TITLE_MAX_LEN + + def test_sanitizes_noise_in_tool_result(self): + from coding.proxy.model.compat import CanonicalMessagePart, CanonicalPartType + + msgs = [ + CanonicalMessagePart( + type=CanonicalPartType.TOOL_RESULT, + role="user", + text="noiseclean output", + ), + ] + title = _extract_title_from_tool_results(msgs) + assert title == "[Tool output] clean output" + + def test_returns_empty_when_all_noise(self): + from coding.proxy.model.compat import CanonicalMessagePart, CanonicalPartType + + msgs = [ + CanonicalMessagePart( + type=CanonicalPartType.TOOL_RESULT, + role="user", + text="纯噪声", + ), + ] + assert _extract_title_from_tool_results(msgs) == "" + + +class TestExtractTitleFromImages: + """Level 3 辅助函数 ``_extract_title_from_images``.""" + + def test_single_image(self): + from coding.proxy.model.compat import CanonicalMessagePart, CanonicalPartType + + msgs = [ + CanonicalMessagePart(type=CanonicalPartType.IMAGE, role="user"), + ] + assert _extract_title_from_images(msgs) == "[1 Image]" + + def test_multiple_images(self): + from coding.proxy.model.compat import CanonicalMessagePart, CanonicalPartType + + msgs = [ + CanonicalMessagePart(type=CanonicalPartType.IMAGE, role="user"), + CanonicalMessagePart(type=CanonicalPartType.IMAGE, role="user"), + CanonicalMessagePart(type=CanonicalPartType.IMAGE, role="user"), + ] + assert _extract_title_from_images(msgs) == "[3 Images]" + + def test_no_images(self): + from coding.proxy.model.compat import CanonicalMessagePart, CanonicalPartType + + msgs = [ + CanonicalMessagePart(type=CanonicalPartType.TEXT, role="user", text="文本"), + ] + assert _extract_title_from_images(msgs) == "" + + def test_skips_assistant_images(self): + from coding.proxy.model.compat import CanonicalMessagePart, CanonicalPartType + + msgs = [ + CanonicalMessagePart(type=CanonicalPartType.IMAGE, role="assistant"), + ] + assert _extract_title_from_images(msgs) == "" + + +class TestExtractTitleFromMetadata: + """Level 4 辅助函数 ``_extract_title_from_metadata``.""" + + @staticmethod + def _build_request_with_meta(tool_names: list[str] | None = None, model: str = ""): + body: dict = {"model": model, "messages": []} + if tool_names: + body["tools"] = [{"name": n} for n in tool_names] + return build_canonical_request(body, {}) + + def test_uses_tool_names(self): + req = self._build_request_with_meta( + tool_names=["Bash", "Read", "Edit"], model="claude-opus-4-8" + ) + assert _extract_title_from_metadata(req) == "[Tool call] Bash, Read, Edit" + + def test_limits_to_three_tool_names(self): + req = self._build_request_with_meta( + tool_names=["Bash", "Read", "Edit", "Write", "Grep"], model="test" + ) + assert _extract_title_from_metadata(req) == "[Tool call] Bash, Read, Edit" + + def test_uses_model_when_no_tools(self): + req = self._build_request_with_meta(tool_names=[], model="claude-sonnet-4-6") + assert _extract_title_from_metadata(req) == "[Session] claude-sonnet-4-6" + + def test_returns_empty_when_nothing(self): + req = self._build_request_with_meta(tool_names=[], model="") + assert _extract_title_from_metadata(req) == "" + + +class TestExtractSessionTitleFallback: + """``_extract_session_title`` 多层级回退集成测试.""" + + @staticmethod + def _build_request(messages: list[dict], **extra): + body: dict = {"model": "test-model", "messages": messages, **extra} + return build_canonical_request(body, {}) + + def test_level1_takes_priority(self): + """Level 1 命中时不回退到 Level 2.""" + messages = [ + { + "role": "user", + "content": [ + {"type": "text", "text": "用户真实问题"}, + { + "type": "tool_result", + "tool_use_id": "tu_1", + "content": "工具输出", + }, + ], + } + ] + req = self._build_request(messages) + assert _extract_session_title(req) == "用户真实问题" + + def test_level2_when_no_text(self): + """无 user TEXT 时,回退到 Level 2 TOOL_RESULT.""" + messages = [ + { + "role": "user", + "content": [ + { + "type": "tool_result", + "tool_use_id": "tu_1", + "content": [{"type": "text", "text": "文件内容摘要"}], + }, + ], + } + ] + req = self._build_request(messages) + assert _extract_session_title(req) == "[Tool output] 文件内容摘要" + + def test_level3_when_only_images(self): + """无 TEXT 和 TOOL_RESULT 时,回退到 Level 3 IMAGE.""" + messages = [ + { + "role": "user", + "content": [ + { + "type": "image", + "source": { + "type": "base64", + "media_type": "image/png", + "data": "abc", + }, + }, + ], + } + ] + req = self._build_request(messages) + assert _extract_session_title(req) == "[1 Image]" + + def test_level4_uses_tool_names(self): + """所有消息级别均无内容时,回退到 Level 4 元数据.""" + req = self._build_request([], tools=[{"name": "Bash"}, {"name": "Read"}]) + assert _extract_session_title(req) == "[Tool call] Bash, Read" + + def test_level4_uses_model_name(self): + """无 tools 时,Level 4 使用 model 名称.""" + req = self._build_request([]) + assert _extract_session_title(req) == "[Session] test-model" + + def test_fallback_cascade_full(self): + """Level 1 全噪声 → Level 2 全噪声 → Level 3 无图 → Level 4 模型名.""" + messages = [ + { + "role": "user", + "content": "纯噪声", + }, + ] + req = self._build_request(messages) + assert _extract_session_title(req) == "[Session] test-model"