From fa0e8b4f25bb49a37adae9a223302250c4988436 Mon Sep 17 00:00:00 2001 From: ThreeFish Date: Sat, 23 May 2026 23:20:26 +0800 Subject: [PATCH 1/7] =?UTF-8?q?diag(executor):=20=E4=B8=BA=E8=AF=AD?= =?UTF-8?q?=E4=B9=89=E6=8B=92=E7=BB=9D=E8=B7=AF=E5=BE=84=E5=A2=9E=E5=8A=A0?= =?UTF-8?q?=E8=AF=B7=E6=B1=82=E4=BD=93=E5=8F=AF=E7=96=91=E5=8F=82=E6=95=B0?= =?UTF-8?q?=E8=AF=8A=E6=96=AD=E6=97=A5=E5=BF=97;?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit 在 execute_message 和 execute_stream 的 semantic rejection 日志中 附加请求体参数快照(thinking/extended_thinking/reasoning_effort 顶层参数、 会话历史中 thinking blocks 数量、cache_control 存在情况、模型名、消息数), 用于定位 zhipu glm-4.7 [1210] 参数校验拒绝的具体祸根参数。 🤖 Generated with [Claude Code](https://github.com/claude), [CodeX](https://openai.com), [Gemini](https://github.com/apps/gemini-code-assist) Co-Authored-By: Aurelius Huang --- src/coding/proxy/routing/executor.py | 68 +++++++++++++++++++++++++++- 1 file changed, 67 insertions(+), 1 deletion(-) diff --git a/src/coding/proxy/routing/executor.py b/src/coding/proxy/routing/executor.py index 7eac6c3..74273af 100644 --- a/src/coding/proxy/routing/executor.py +++ b/src/coding/proxy/routing/executor.py @@ -48,6 +48,58 @@ logger = logging.getLogger(__name__) +def _build_semantic_rejection_diagnostic(body: dict[str, Any]) -> str: + """构建语义拒绝的请求体诊断上下文. + + 在 semantic rejection 日志中附加请求体的可疑参数快照, + 用于定位供应商参数校验失败的具体祸根参数。 + """ + parts: list[str] = [] + # 顶层不兼容参数 + for key in ("thinking", "extended_thinking", "reasoning_effort"): + if key in body: + val = body[key] + parts.append(f"{key}={val!r:.80}") + # 会话历史中的 thinking blocks + thinking_count = 0 + for msg in body.get("messages", []): + content = msg.get("content") + if not isinstance(content, list): + continue + for block in content: + if isinstance(block, dict) and block.get("type") in ( + "thinking", + "redacted_thinking", + ): + thinking_count += 1 + if thinking_count: + parts.append(f"thinking_blocks_in_history={thinking_count}") + # cache_control 存在检测 + has_cc = False + for section in ( + body.get("system", []) if isinstance(body.get("system"), list) else [], + *( + m.get("content", []) + for m in body.get("messages", []) + if isinstance(m.get("content"), list) + ), + body.get("tools", []), + ): + if isinstance(section, list): + for item in section: + if isinstance(item, dict) and "cache_control" in item: + has_cc = True + break + if has_cc: + break + if has_cc: + parts.append("cache_control_fields=present") + # 模型 + 消息数 + parts.append(f"model={body.get('model', 'N/A')}") + parts.append(f"messages={len(body.get('messages', []))}") + return f" [{', '.join(parts)}]" if parts else "" + + def _log_http_error_detail( tier_name: str, exc: Exception, @@ -601,12 +653,14 @@ async def execute_message( ) if not is_last and is_semantic: + diagnostic = _build_semantic_rejection_diagnostic(body) logger.warning( - "Tier %s semantic rejection (type=%s, msg=%s), " + "Tier %s semantic rejection (type=%s, msg=%s)%s, " "trying next tier without recording failure", tier.name, resp.error_type or resp.status_code, (resp.error_message or "N/A")[:200], + diagnostic, ) failed_tier_name = tier.name continue @@ -838,6 +892,18 @@ async def _handle_http_error( ) if semantic_rejection and not is_last: + if request_body is not None: + diagnostic = _build_semantic_rejection_diagnostic(request_body) + logger.warning( + "Tier %s stream semantic rejection (type=%s, msg=%s)%s, " + "trying next tier without recording failure", + tier.name, + error.get("type") if isinstance(error, dict) else None, + (error.get("message") if isinstance(error, dict) else "N/A")[ + :200 + ], + diagnostic, + ) return True, tier.name, exc rl_info = parse_rate_limit_headers( From fe37f07c7f95672229ca4c9b48e00fe74af6afa0 Mon Sep 17 00:00:00 2001 From: ThreeFish Date: Mon, 25 May 2026 18:03:46 +0800 Subject: [PATCH 2/7] =?UTF-8?q?diag(executor):=20=E6=89=A9=E5=B1=95?= =?UTF-8?q?=E8=AF=AD=E4=B9=89=E6=8B=92=E7=BB=9D=E8=AF=8A=E6=96=AD=E6=97=A5?= =?UTF-8?q?=E5=BF=97=E8=A6=86=E7=9B=96=E8=8C=83=E5=9B=B4=E8=87=B3=20system?= =?UTF-8?q?/tools/sampling/metadata/content=5Ftypes=20=E7=AD=89=E7=BB=B4?= =?UTF-8?q?=E5=BA=A6?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit PR #244 部署后的诊断日志反转了原推断:失败请求均不含 thinking/cache_control, 说明祸根在更细粒度的参数。扩展 _build_semantic_rejection_diagnostic 函数: 新增维度(仅存在时输出): - system 形态(string/blocks + cache_control 计数) - tools 数量 + tool_choice 形态 - 采样参数(max_tokens/temperature/top_p/top_k/stop_sequences) - stream / metadata_keys - messages.content 类型分布(含 string content) - 请求体字节数估算(json.dumps) 新增 14 个单元测试(TestBuildSemanticRejectionDiagnostic)覆盖各字段组合 与真实失败请求形态。所有测试通过(1478 passed)。 --- src/coding/proxy/routing/executor.py | 115 ++++++++++++--- tests/test_router_executor.py | 213 +++++++++++++++++++++++++++ 2 files changed, 304 insertions(+), 24 deletions(-) diff --git a/src/coding/proxy/routing/executor.py b/src/coding/proxy/routing/executor.py index 74273af..a81486b 100644 --- a/src/coding/proxy/routing/executor.py +++ b/src/coding/proxy/routing/executor.py @@ -6,6 +6,7 @@ from __future__ import annotations +import json import logging import time from collections.abc import AsyncIterator @@ -53,50 +54,116 @@ def _build_semantic_rejection_diagnostic(body: dict[str, Any]) -> str: 在 semantic rejection 日志中附加请求体的可疑参数快照, 用于定位供应商参数校验失败的具体祸根参数。 + + 覆盖范围: + * 模型 / messages 数(baseline) + * thinking 系列顶层参数 + history thinking_blocks 数 + * system 形态(string / blocks,含 cache_control 计数) + * tools 数量 + tool_choice 形态 + * 采样参数(max_tokens / temperature / top_p / top_k / stop_sequences) + * stream / metadata 形态 + * cache_control 存在性 + * messages.content 类型分布 + * 请求体大小估算(json.dumps 字节数) """ parts: list[str] = [] - # 顶层不兼容参数 + + # ── 模型 + 消息数(baseline,始终输出)── + parts.append(f"model={body.get('model', 'N/A')}") + parts.append(f"messages={len(body.get('messages', []))}") + + # ── 顶层 thinking 系列参数 ── for key in ("thinking", "extended_thinking", "reasoning_effort"): if key in body: val = body[key] parts.append(f"{key}={val!r:.80}") - # 会话历史中的 thinking blocks + + # ── system 形态 ── + system = body.get("system") + if isinstance(system, str): + parts.append(f"system_kind=string(len={len(system)})") + elif isinstance(system, list): + cc_count = sum( + 1 for item in system if isinstance(item, dict) and "cache_control" in item + ) + if cc_count: + parts.append(f"system_blocks={len(system)},cc={cc_count}") + else: + parts.append(f"system_blocks={len(system)}") + + # ── tools 与 tool_choice ── + tools = body.get("tools") + if isinstance(tools, list): + parts.append(f"tools={len(tools)}") + tool_choice = body.get("tool_choice") + if tool_choice is not None: + parts.append(f"tool_choice={tool_choice!r:.60}") + + # ── 采样参数(仅存在时输出)── + for key in ("max_tokens", "temperature", "top_p", "top_k"): + if key in body: + parts.append(f"{key}={body[key]!r:.40}") + stop_sequences = body.get("stop_sequences") + if isinstance(stop_sequences, list) and stop_sequences: + parts.append(f"stop_sequences={len(stop_sequences)}") + + # ── stream / metadata ── + if "stream" in body: + parts.append(f"stream={body['stream']}") + metadata = body.get("metadata") + if isinstance(metadata, dict) and metadata: + parts.append(f"metadata_keys={len(metadata)}") + + # ── 会话历史中的 thinking blocks 与 content_types 分布 ── thinking_count = 0 + content_type_counts: dict[str, int] = {} for msg in body.get("messages", []): content = msg.get("content") + if isinstance(content, str): + content_type_counts["string"] = content_type_counts.get("string", 0) + 1 + continue if not isinstance(content, list): continue for block in content: - if isinstance(block, dict) and block.get("type") in ( - "thinking", - "redacted_thinking", - ): + if not isinstance(block, dict): + continue + btype = block.get("type") + if isinstance(btype, str): + content_type_counts[btype] = content_type_counts.get(btype, 0) + 1 + if btype in ("thinking", "redacted_thinking"): thinking_count += 1 if thinking_count: parts.append(f"thinking_blocks_in_history={thinking_count}") - # cache_control 存在检测 + if content_type_counts: + type_repr = ",".join(f"{k}:{v}" for k, v in sorted(content_type_counts.items())) + parts.append(f"content_types={{{type_repr}}}") + + # ── cache_control 存在检测(messages / tools,不含 system 因已单独统计)── has_cc = False - for section in ( - body.get("system", []) if isinstance(body.get("system"), list) else [], - *( - m.get("content", []) - for m in body.get("messages", []) - if isinstance(m.get("content"), list) - ), - body.get("tools", []), - ): - if isinstance(section, list): - for item in section: - if isinstance(item, dict) and "cache_control" in item: - has_cc = True - break + sections: list[Any] = [] + for m in body.get("messages", []): + if isinstance(m.get("content"), list): + sections.append(m["content"]) + if isinstance(body.get("tools"), list): + sections.append(body["tools"]) + for section in sections: + for item in section: + if isinstance(item, dict) and "cache_control" in item: + has_cc = True + break if has_cc: break if has_cc: parts.append("cache_control_fields=present") - # 模型 + 消息数 - parts.append(f"model={body.get('model', 'N/A')}") - parts.append(f"messages={len(body.get('messages', []))}") + + # ── 请求体大小估算 ── + try: + body_bytes = len(json.dumps(body, ensure_ascii=False).encode("utf-8")) + parts.append(f"body_bytes={body_bytes}") + except (TypeError, ValueError): + # 极少数情况下 body 含非可序列化对象,跳过 + pass + return f" [{', '.join(parts)}]" if parts else "" diff --git a/tests/test_router_executor.py b/tests/test_router_executor.py index 1e40ea6..53982fe 100644 --- a/tests/test_router_executor.py +++ b/tests/test_router_executor.py @@ -21,6 +21,7 @@ ) from coding.proxy.routing.executor import ( _VENDOR_PROTOCOL_LABEL_MAP, + _build_semantic_rejection_diagnostic, _has_tool_results, _is_likely_request_format_error, _log_vendor_response_error, @@ -1948,3 +1949,215 @@ def test_returns_body_for_unknown_tier(self): result = exec_inst._prepare_body_for_tier(body, tier, source_vendor="zhipu") assert result is body + + +class TestBuildSemanticRejectionDiagnostic: + """覆盖 _build_semantic_rejection_diagnostic 函数 — 用于诊断 [1210] 等供应商语义拒绝. + + 重点验证: + - baseline 字段(model / messages)始终输出 + - 仅当参数存在时才输出相关项(避免日志噪声) + - 各字段输出格式稳定 + """ + + def test_baseline_minimal_body(self): + """最小请求体:仅输出 model + messages.""" + body = {"model": "glm-5-turbo", "messages": [{"role": "user", "content": "hi"}]} + result = _build_semantic_rejection_diagnostic(body) + assert "model=glm-5-turbo" in result + assert "messages=1" in result + # 不应输出未使用的字段 + assert "thinking" not in result + assert "tools" not in result + assert "cache_control" not in result + + def test_includes_thinking_param(self): + body = { + "model": "glm-5-turbo", + "messages": [], + "thinking": {"type": "enabled", "budget_tokens": 1024}, + } + result = _build_semantic_rejection_diagnostic(body) + assert "thinking=" in result + assert "budget_tokens" in result + + def test_includes_system_string(self): + body = { + "model": "glm-5-turbo", + "messages": [], + "system": "You are helpful." * 5, + } + result = _build_semantic_rejection_diagnostic(body) + assert "system_kind=string(len=" in result + + def test_includes_system_blocks_with_cache_control(self): + body = { + "model": "glm-5-turbo", + "messages": [], + "system": [ + { + "type": "text", + "text": "rule1", + "cache_control": {"type": "ephemeral"}, + }, + {"type": "text", "text": "rule2"}, + ], + } + result = _build_semantic_rejection_diagnostic(body) + assert "system_blocks=2,cc=1" in result + + def test_includes_tools_and_tool_choice(self): + body = { + "model": "glm-5-turbo", + "messages": [], + "tools": [{"name": "a"}, {"name": "b"}, {"name": "c"}], + "tool_choice": {"type": "auto"}, + } + result = _build_semantic_rejection_diagnostic(body) + assert "tools=3" in result + assert "tool_choice=" in result + + def test_includes_sampling_params(self): + body = { + "model": "glm-5-turbo", + "messages": [], + "max_tokens": 8192, + "temperature": 0.7, + "top_p": 0.9, + "top_k": 40, + "stop_sequences": ["\n\n", "END"], + } + result = _build_semantic_rejection_diagnostic(body) + assert "max_tokens=8192" in result + assert "temperature=0.7" in result + assert "top_p=0.9" in result + assert "top_k=40" in result + assert "stop_sequences=2" in result + + def test_includes_stream_and_metadata(self): + body = { + "model": "glm-5-turbo", + "messages": [], + "stream": True, + "metadata": {"user_id": "x", "session_id": "y"}, + } + result = _build_semantic_rejection_diagnostic(body) + assert "stream=True" in result + assert "metadata_keys=2" in result + + def test_content_type_distribution(self): + body = { + "model": "glm-5-turbo", + "messages": [ + { + "role": "user", + "content": [ + {"type": "text", "text": "hi"}, + {"type": "text", "text": "bye"}, + {"type": "image", "source": {}}, + ], + }, + { + "role": "assistant", + "content": [ + {"type": "tool_use", "id": "t1", "name": "x", "input": {}}, + ], + }, + ], + } + result = _build_semantic_rejection_diagnostic(body) + # 排序为字母序 + assert "content_types={image:1,text:2,tool_use:1}" in result + + def test_content_type_string_messages(self): + """messages.content 为 string 时计入 string:N.""" + body = { + "model": "glm-5-turbo", + "messages": [ + {"role": "user", "content": "hello"}, + {"role": "assistant", "content": "hi"}, + ], + } + result = _build_semantic_rejection_diagnostic(body) + assert "content_types={string:2}" in result + + def test_thinking_blocks_in_history(self): + body = { + "model": "glm-5-turbo", + "messages": [ + { + "role": "assistant", + "content": [ + {"type": "thinking", "thinking": "..."}, + {"type": "redacted_thinking", "data": "..."}, + {"type": "text", "text": "result"}, + ], + } + ], + } + result = _build_semantic_rejection_diagnostic(body) + assert "thinking_blocks_in_history=2" in result + + def test_cache_control_in_messages_or_tools(self): + body = { + "model": "glm-5-turbo", + "messages": [ + { + "role": "user", + "content": [ + { + "type": "text", + "text": "x", + "cache_control": {"type": "ephemeral"}, + }, + ], + } + ], + } + result = _build_semantic_rejection_diagnostic(body) + assert "cache_control_fields=present" in result + + def test_body_bytes_estimated(self): + body = {"model": "glm-5-turbo", "messages": [{"role": "user", "content": "ok"}]} + result = _build_semantic_rejection_diagnostic(body) + assert "body_bytes=" in result + + def test_body_bytes_skipped_when_unserializable(self): + """请求体含非可序列化对象时不抛异常.""" + + class NonSerializable: + pass + + body = { + "model": "glm-5-turbo", + "messages": [], + "metadata": {"obj": NonSerializable()}, + } + # 不应抛异常 + result = _build_semantic_rejection_diagnostic(body) + assert "model=glm-5-turbo" in result + + def test_combined_real_world_failure_case(self): + """模拟真实失败请求形态(messages=1,无 thinking/cache_control,含 system + tools).""" + body = { + "model": "glm-5-turbo", + "messages": [{"role": "user", "content": "需要修复一个 bug"}], + "system": [{"type": "text", "text": "You are Claude Code."}], + "tools": [{"name": "Read"}, {"name": "Edit"}], + "max_tokens": 8192, + "temperature": 1.0, + "metadata": {"user_id": "x"}, + "stream": True, + } + result = _build_semantic_rejection_diagnostic(body) + assert "model=glm-5-turbo" in result + assert "messages=1" in result + assert "system_blocks=1" in result + assert "tools=2" in result + assert "max_tokens=8192" in result + assert "temperature=1.0" in result + assert "metadata_keys=1" in result + assert "stream=True" in result + # 不应包含未出现的项 + assert "thinking_blocks_in_history" not in result + assert "cache_control_fields" not in result From 95aca87ed5362a5ffbae23d73da7e866b9492197 Mon Sep 17 00:00:00 2001 From: ThreeFish Date: Mon, 25 May 2026 18:03:54 +0800 Subject: [PATCH 3/7] =?UTF-8?q?docs(agents):=20=E8=AE=B0=E5=BD=95=20zhipu?= =?UTF-8?q?=20[1210]=20=E8=AF=8A=E6=96=AD=E9=98=B6=E6=AE=B5=E8=BF=9B?= =?UTF-8?q?=E5=B1=95=EF=BC=88=E8=AF=81=E6=8D=AE=E5=8F=8D=E8=BD=AC=20+=20St?= =?UTF-8?q?ep=201=20v2=EF=BC=89?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- docs/agents/issue.md | 44 ++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 44 insertions(+) diff --git a/docs/agents/issue.md b/docs/agents/issue.md index 8583087..af58832 100644 --- a/docs/agents/issue.md +++ b/docs/agents/issue.md @@ -230,3 +230,47 @@ SUM(input_tokens + output_tokens - 历次 PR 中 cache token 字段的引入是渐进式的(schema 已有四列、`log()` 入参齐全、Overview 已全口径消费),但部分聚合视图的口径升级被遗漏;任何向 `usage_log` 增列后,**必须**审计所有 `SUM(input_tokens` / `SUM(output_tokens` 出现处的聚合表达式是否需要同步更新。 - 跨标签页同一指标(如"总 Tokens")的口径一致性,建议在添加新视图时主动与 Overview 现有口径做交叉核对,必要时在 SQL 注释中标注口径来源,便于后续 review。 + +--- + +## Zhipu vendor 间歇性 `[1210][API 调用参数有误]` 拒绝(诊断阶段) + +**问题描述** + +Zhipu vendor 作为首选 tier 时,处理 `claude-haiku-* → glm-5-turbo` 的部分请求被上游直接拒绝: + +``` +WARNING Tier zhipu semantic rejection + (type=invalid_request_error, + msg=[1210][API 调用参数有误,请检查文档。][...]) + [model=claude-haiku-4-5-20251001, messages=1], trying next tier without recording failure +INFO Tier anthropic message succeeded (took over from failed tier: zhipu) +``` + +失败请求统一表现为 `duration<1s + tokens=[0 0 0 0]`,被 zhipu 在入口校验阶段直接拒绝、未消耗任何 token。两次观察窗口失败率分别为 4%(2026-05-23 22:24,glm-4.7 旧映射)与 27%(2026-05-25 17:26+,glm-5-turbo 当前映射),均触发降级至 anthropic / copilot。 + +**表因** + +`is_semantic_rejection` 检测到 zhipu 返回 `invalid_request_error + 1210` 含「API 调用参数有误」中文标记,判定为语义拒绝,跳过下一层 tier。1210 是智谱官方错误码,[官方文档](https://docs.bigmodel.cn/cn/api/api-code) 定义为「参数格式/类型不符规范」(区别于 1213「必需字段缺失」、1214「字段参数非法」)。 + +**根因(仍在收集证据)** + +PR #244 的初版诊断字段仅覆盖 `thinking / thinking_blocks / cache_control / model / messages`,但 2026-05-25 17:26 后的诊断日志显示失败请求**均不含**上述任何字段。说明真正祸根在更细粒度的参数(system / tools / max_tokens / sampling / metadata / content_types / body_size 等)。 + +**处理方式(分阶段)** + +- **Step 1(PR #244,已合并)**:在 `executor.py::_build_semantic_rejection_diagnostic` 中输出 thinking / cache_control 相关字段 — 但证据反转,覆盖不足以定位真因。 +- **Step 1 v2(本次)**:扩展诊断函数覆盖 `system_kind|blocks(+cc)` / `tools` / `tool_choice` / 采样参数 / `stream` / `metadata_keys` / `content_types` / `body_bytes` 等维度。所有项「仅存在时输出」以控制日志噪声。配套 14 个单元测试(`TestBuildSemanticRejectionDiagnostic`)覆盖各字段组合。 +- **Step 2(待定)**:依据扩展诊断日志的新证据,定位具体祸根参数后再施修复(候选路径:`ZhipuVendor._prepare_request` 参数剥离 / 调用现有 `normalize_for_zhipu` / pre-validation 警告)。 + +**后续防范** + +- **「无证据,不下结论」**:当初版诊断字段无法覆盖根因时,禁止反复猜测,应优先扩展诊断维度抓取更多线索。本次先扩展再修复的迭代节奏可作为同类「黑盒 API 报错」问题的范式。 +- **诊断字段设计原则**:所有诊断项应「仅存在时输出」,避免常态化噪声;输出格式紧凑(`key=val`)便于日志检索;参数值用 `!r:.N` 截断防止巨型对象灌入日志。 +- **错误码差异化**:智谱 12xx 系列错误码语义并不等价(1210 ≠ 1213 ≠ 1214),未来面对类似 `[code][message]` 形式的供应商错误时,应优先查阅其官方错误码字典,避免基于错误消息字面意思的误判。 + +**同类问题影响与处理注意事项** + +- 其他薄透传 vendor(minimax / kimi / doubao / alibaba / xiaomi)共用 `NativeAnthropicVendor._prepare_request`,若它们也开始报「参数错误」类语义拒绝,可复用本次扩展的诊断函数定位差异。 +- 若证据指向 `tools` 字段(如工具 schema 不兼容)、`metadata` 字段(如自定义键被 zhipu 拒收)等具体路径,修复时应优先复用 `convert/vendor_channels.py` 中已有的 `normalize_for_zhipu` / `strip_thinking_blocks` 工具,避免在 vendor 内部重复实现剥离逻辑。 +- 部署 Step 1 v2 后,建议观察至少 48 小时收集足够样本(>20 次失败),通过失败/成功请求形态对比统计找出**唯一差异维度**,再进入 Step 2。 From 751a92226424dd0a09bd00ba869c0b15ffaaf891 Mon Sep 17 00:00:00 2001 From: ThreeFish Date: Mon, 25 May 2026 18:04:00 +0800 Subject: [PATCH 4/7] =?UTF-8?q?build(version):=20=E7=89=88=E6=9C=AC?= =?UTF-8?q?=E5=8F=B7=E5=8D=87=E7=BA=A7=E8=87=B3=200.4.1a6;?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- pyproject.toml | 2 +- uv.lock | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index 6437035..d09469f 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [project] name = "coding-proxy" -version = "0.4.1a5" +version = "0.4.1a6" description = "A High-Availability, Transparent, and Smart Multi-Vendor Proxy for Claude Code. Support Claude Plans, GitHub Copilot, Google Antigravity, ZAI/GLM, MiniMax, Qwen, Xiaomi, Kimi, Doubao..." readme = "README.md" requires-python = ">=3.12" diff --git a/uv.lock b/uv.lock index 68360a8..bbf5556 100644 --- a/uv.lock +++ b/uv.lock @@ -74,7 +74,7 @@ wheels = [ [[package]] name = "coding-proxy" -version = "0.4.1a5" +version = "0.4.1a6" source = { editable = "." } dependencies = [ { name = "aiosqlite" }, From 4dee2cbf162419f5853e14eb72e9e58962f4b4fb Mon Sep 17 00:00:00 2001 From: ThreeFish Date: Tue, 26 May 2026 17:13:32 +0800 Subject: [PATCH 5/7] =?UTF-8?q?fix(zhipu):=20=E5=85=BC=E5=AE=B9=E8=BD=AC?= =?UTF-8?q?=E6=8D=A2=20thinking.type=3Dadaptive=20=E4=B8=BA=20GLM=20?= =?UTF-8?q?=E6=94=AF=E6=8C=81=E7=9A=84=20enabled=20=E6=A0=BC=E5=BC=8F?= =?UTF-8?q?=EF=BC=8C=E4=BF=AE=E5=A4=8D=20[1210]=20=E8=AF=AD=E4=B9=89?= =?UTF-8?q?=E6=8B=92=E7=BB=9D?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit 基于 2026-05-26 16:30–16:31 日志证据(8 次连续拒绝均含 thinking.type=adaptive), 在 ZhipuVendor._prepare_request 中实现兼容转换: - adaptive → enabled(budget=16000):保留 thinking 能力,使用 GLM 原生确认支持的格式 - 新增 _build_zhipu_request_snapshot 诊断快照(成功/失败统一格式,可 diff 对比) - 扩展语义拒绝日志错误体截断(200→500 字符),保留完整字段级诊断 - metadata 暂不处理,待进一步诊断确认兼容性 Co-Authored-By: Claude Opus 4.7 (1M context) --- docs/agents/issue.md | 12 ++- src/coding/proxy/convert/vendor_channels.py | 6 +- src/coding/proxy/routing/executor.py | 13 ++- src/coding/proxy/vendors/zhipu.py | 94 +++++++++++++++++++-- tests/test_vendors.py | 27 +++++- tests/test_zhipu.py | 47 ++++++++++- 6 files changed, 178 insertions(+), 21 deletions(-) diff --git a/docs/agents/issue.md b/docs/agents/issue.md index af58832..c202b8a 100644 --- a/docs/agents/issue.md +++ b/docs/agents/issue.md @@ -253,15 +253,19 @@ INFO Tier anthropic message succeeded (took over from failed tier: zhipu) `is_semantic_rejection` 检测到 zhipu 返回 `invalid_request_error + 1210` 含「API 调用参数有误」中文标记,判定为语义拒绝,跳过下一层 tier。1210 是智谱官方错误码,[官方文档](https://docs.bigmodel.cn/cn/api/api-code) 定义为「参数格式/类型不符规范」(区别于 1213「必需字段缺失」、1214「字段参数非法」)。 -**根因(仍在收集证据)** +**根因(已定位,修复中)** -PR #244 的初版诊断字段仅覆盖 `thinking / thinking_blocks / cache_control / model / messages`,但 2026-05-25 17:26 后的诊断日志显示失败请求**均不含**上述任何字段。说明真正祸根在更细粒度的参数(system / tools / max_tokens / sampling / metadata / content_types / body_size 等)。 +PR #247 (Step 1 v2) 部署后,2026-05-26 16:30–16:31 的诊断日志显示 8 次连续拒绝**全部携带 `thinking={"type": "adaptive"}`**(Anthropic Claude 4.x 新增的参数类型),而同一时段其他会话的请求持续成功。之前 curl 测试仅验证了 `{"type": "enabled"}`,未覆盖 `adaptive` 类型。GLM 可能不支持此特定类型值,导致 [1210] 参数校验失败。 **处理方式(分阶段)** - **Step 1(PR #244,已合并)**:在 `executor.py::_build_semantic_rejection_diagnostic` 中输出 thinking / cache_control 相关字段 — 但证据反转,覆盖不足以定位真因。 -- **Step 1 v2(本次)**:扩展诊断函数覆盖 `system_kind|blocks(+cc)` / `tools` / `tool_choice` / 采样参数 / `stream` / `metadata_keys` / `content_types` / `body_bytes` 等维度。所有项「仅存在时输出」以控制日志噪声。配套 14 个单元测试(`TestBuildSemanticRejectionDiagnostic`)覆盖各字段组合。 -- **Step 2(待定)**:依据扩展诊断日志的新证据,定位具体祸根参数后再施修复(候选路径:`ZhipuVendor._prepare_request` 参数剥离 / 调用现有 `normalize_for_zhipu` / pre-validation 警告)。 +- **Step 1 v2(PR #247,已合并)**:扩展诊断函数覆盖 `system_kind|blocks(+cc)` / `tools` / `tool_choice` / 采样参数 / `stream` / `metadata_keys` / `content_types` / `body_bytes` 等维度。所有项「仅存在时输出」以控制日志噪声。配套 14 个单元测试(`TestBuildSemanticRejectionDiagnostic`)覆盖各字段组合。 +- **Step 2(进行中)**:基于 Step 1 v2 的日志证据,在 `ZhipuVendor._prepare_request` 中实现 **兼容转换**(而非移除): + - `thinking.type="adaptive"` → `{"type": "enabled", "budget_tokens": 16000}`(保留 thinking 能力) + - 新增 `_build_zhipu_request_snapshot` 诊断快照,同时覆盖成功/失败请求,建立可对比证据链 + - 扩展语义拒绝日志的错误体截断限制(200 → 500 字符),保留完整字段级诊断 + - `metadata` 暂不处理(待进一步诊断确认兼容性) **后续防范** diff --git a/src/coding/proxy/convert/vendor_channels.py b/src/coding/proxy/convert/vendor_channels.py index 52b7f44..456a9b3 100644 --- a/src/coding/proxy/convert/vendor_channels.py +++ b/src/coding/proxy/convert/vendor_channels.py @@ -369,8 +369,10 @@ def _strip_cache_control(body: dict[str, Any]) -> int: # ── zhipu 共享清洗函数 ────────────────────────────────────────── -# 跨供应商转换时主动剥离的顶层参数(首选 tier 场景由 _prepare_request 原样透传, -# GLM 原生支持 thinking / 静默忽略 cache_control 和 reasoning_effort,不会触发 400)。 +# 跨供应商转换时主动剥离的顶层参数。 +# 首选 tier 场景的 thinking.type=adaptive 兼容转换由 +# ZhipuVendor._prepare_request 处理(转换为 enabled + budget,保留功能), +# 此处仅负责 failover 路径的全量剥离(跨供应商 thinking signature 失效)。 _ZHIPU_UNSUPPORTED_PARAMS: frozenset[str] = frozenset( {"thinking", "extended_thinking", "reasoning_effort"} ) diff --git a/src/coding/proxy/routing/executor.py b/src/coding/proxy/routing/executor.py index c418fc5..de761ac 100644 --- a/src/coding/proxy/routing/executor.py +++ b/src/coding/proxy/routing/executor.py @@ -860,12 +860,15 @@ async def execute_message( if not is_last and is_semantic: diagnostic = _build_semantic_rejection_diagnostic(body) + # zhipu 等供应商的错误体含字段级诊断(如 [1210] 错误码 + request_id), + # 500 字符足以覆盖完整错误体,避免截断丢失关键细节 + err_msg = (resp.error_message or "N/A")[:500] logger.warning( "Tier %s semantic rejection (type=%s, msg=%s)%s, " "trying next tier without recording failure", tier.name, resp.error_type or resp.status_code, - (resp.error_message or "N/A")[:200], + err_msg, diagnostic, ) failed_tier_name = tier.name @@ -1100,14 +1103,16 @@ async def _handle_http_error( if semantic_rejection and not is_last: if request_body is not None: diagnostic = _build_semantic_rejection_diagnostic(request_body) + stream_err_msg = ( + error.get("message") if isinstance(error, dict) else "N/A" + ) + # 扩展至 500 字符以保留完整字段级诊断信息 logger.warning( "Tier %s stream semantic rejection (type=%s, msg=%s)%s, " "trying next tier without recording failure", tier.name, error.get("type") if isinstance(error, dict) else None, - (error.get("message") if isinstance(error, dict) else "N/A")[ - :200 - ], + stream_err_msg[:500], diagnostic, ) return True, tier.name, exc diff --git a/src/coding/proxy/vendors/zhipu.py b/src/coding/proxy/vendors/zhipu.py index e7ed8c7..065b48a 100644 --- a/src/coding/proxy/vendors/zhipu.py +++ b/src/coding/proxy/vendors/zhipu.py @@ -1,15 +1,17 @@ -"""智谱 GLM 供应商 — 原生 Anthropic 兼容端点薄透传代理. +"""智谱 GLM 供应商 — 原生 Anthropic 兼容端点代理(兼容转换 + 429 重试). -官方端点 (https://open.bigmodel.cn/api/anthropic) 已完整支持 -Anthropic Messages API 协议,本模块仅做两项最小适配: +官方端点 (https://open.bigmodel.cn/api/anthropic) 支持大部分 +Anthropic Messages API 协议,本模块做以下适配: 1. 模型名映射(Claude -> GLM) 2. 认证头替换(x-api-key) + 3. 首选 tier 参数兼容转换(_prepare_request) -注意:实测验证 GLM 的 Anthropic 兼容端点对以下参数的处理方式: -- thinking 参数:原生支持(GLM 有自己的 thinking 机制) +实测验证 GLM 对 Anthropic 扩展参数的处理方式: +- thinking.type="enabled":原生支持(GLM 有自己的 thinking 机制) +- thinking.type="adaptive":不支持,触发 [1210] 参数错误 → 转换为 enabled + budget - cache_control 字段:静默忽略(GLM 使用隐式自动缓存) - reasoning_effort 参数:静默忽略 -以上参数均不会导致 400 错误,因此不需要在 _prepare_request 中剥离。 +- metadata 字段:暂不处理(待进一步诊断确认兼容性) 额外提供 429 Rate Limit 专用重试挽回机制: - max_attempt = 5(1 初始 + 4 重试) @@ -20,6 +22,7 @@ from __future__ import annotations import asyncio +import json import logging from collections.abc import AsyncIterator from typing import Any @@ -69,6 +72,49 @@ def __init__( super().__init__(config, model_mapper, failover_config) self._rl_retry = _RATE_LIMIT_RETRY + # ── 首选 tier 参数兼容转换 ──────────────────────────────── + + # adaptive thinking → enabled 的默认预算(Anthropic 推荐的 adaptive 等价值) + _ADAPTIVE_THINKING_BUDGET = 16000 + + async def _prepare_request( + self, + request_body: dict[str, Any], + headers: dict[str, Any], + ) -> tuple[dict[str, Any], dict[str, str]]: + """深拷贝 + 模型映射 + 认证头替换 + GLM 兼容转换. + + 当 zhipu 作为首选 tier 时(source_vendor=None),请求体来自原始客户端, + 不经过跨供应商转换通道。此处对已知的 GLM 不兼容参数做兼容转换(而非移除), + 保留完整的 CC (Claude Code) 功能特性。 + """ + body, new_headers = await super()._prepare_request(request_body, headers) + + adaptations: list[str] = [] + + # thinking.type="adaptive" 是 Anthropic Claude 4.x 新增的类型, + # GLM 不支持此类型值,会触发 [1210] 参数错误。 + # 转换为 enabled + budget 保留 thinking 能力。 + thinking = body.get("thinking") + if isinstance(thinking, dict) and thinking.get("type") == "adaptive": + body["thinking"] = { + "type": "enabled", + "budget_tokens": self._ADAPTIVE_THINKING_BUDGET, + } + adaptations.append( + f"converted_thinking_adaptive→enabled" + f"(budget={self._ADAPTIVE_THINKING_BUDGET})" + ) + + if adaptations: + logger.debug( + "ZhipuVendor first-tier compat: %s%s", + ", ".join(adaptations), + _build_zhipu_request_snapshot(body), + ) + + return body, new_headers + # ── 非流式:429 重试 ──────────────────────────────────── async def send_message( @@ -186,3 +232,39 @@ def _compute_retry_delay_from_response( # 向后兼容别名 ZhipuBackend = ZhipuVendor + + +def _build_zhipu_request_snapshot(body: dict[str, Any]) -> str: + """构建发往 zhipu 请求的轻量参数快照,用于诊断日志. + + 输出格式与 executor._build_semantic_rejection_diagnostic 一致, + 使成功请求和失败请求的日志可直接 diff 对比,定位差异维度。 + + 仅在转换发生时输出(DEBUG 级别),避免常态化日志噪声。 + """ + parts: list[str] = [] + parts.append(f"messages={len(body.get('messages', []))}") + + thinking = body.get("thinking") + if isinstance(thinking, dict): + parts.append(f"thinking_type={thinking.get('type', 'unknown')}") + + metadata = body.get("metadata") + if isinstance(metadata, dict) and metadata: + parts.append(f"metadata_keys={len(metadata)}") + + tools = body.get("tools") + if isinstance(tools, list): + parts.append(f"tools={len(tools)}") + + system = body.get("system") + if isinstance(system, list): + parts.append(f"system_blocks={len(system)}") + + try: + body_bytes = len(json.dumps(body, ensure_ascii=False).encode("utf-8")) + parts.append(f"body_bytes={body_bytes}") + except (TypeError, ValueError): + pass + + return f" [{', '.join(parts)}]" if parts else "" diff --git a/tests/test_vendors.py b/tests/test_vendors.py index 3ac0477..bc72602 100644 --- a/tests/test_vendors.py +++ b/tests/test_vendors.py @@ -396,7 +396,7 @@ async def test_zhipu_prepare_request_preserves_metadata(): @pytest.mark.asyncio async def test_zhipu_prepare_request_preserves_thinking(): - """ZhipuVendor._prepare_request 应原样保留 thinking 字段(GLM 原生支持).""" + """ZhipuVendor._prepare_request 应原样保留 thinking.type=enabled(GLM 原生支持).""" mapper = ModelMapper([]) zhipu_vendor = ZhipuVendor(ZhipuConfig(api_key="sk-test"), mapper) body = { @@ -405,12 +405,35 @@ async def test_zhipu_prepare_request_preserves_thinking(): "thinking": {"type": "enabled", "budget_tokens": 10000}, } prepared_body, _ = await zhipu_vendor._prepare_request(body, {}) - # thinking 原样透传(GLM 原生支持 thinking) + # thinking.type=enabled 原样透传(GLM 原生支持) assert prepared_body["thinking"] == {"type": "enabled", "budget_tokens": 10000} # 原始 body 不应被修改 assert body["thinking"]["budget_tokens"] == 10000 +@pytest.mark.asyncio +async def test_zhipu_prepare_request_converts_thinking_adaptive(): + """ZhipuVendor._prepare_request 应将 thinking.type=adaptive 转换为 enabled+budget. + + GLM 不支持 adaptive 类型,转换为已确认安全的 enabled + budget_tokens 格式, + 保留 thinking 能力不被阉割。 + """ + mapper = ModelMapper([]) + zhipu_vendor = ZhipuVendor(ZhipuConfig(api_key="sk-test"), mapper) + body = { + "model": "claude-opus-4-7", + "messages": [], + "thinking": {"type": "adaptive"}, + } + prepared_body, _ = await zhipu_vendor._prepare_request(body, {}) + + # adaptive 应被转换为 enabled + budget + assert prepared_body["thinking"]["type"] == "enabled" + assert prepared_body["thinking"]["budget_tokens"] == 16000 + # 原始 body 不应被修改 + assert body["thinking"] == {"type": "adaptive"} + + @pytest.mark.asyncio async def test_zhipu_prepare_request_preserves_anthropic_beta_header(): zhipu_vendor = ZhipuVendor(ZhipuConfig(api_key="sk-test"), ModelMapper([])) diff --git a/tests/test_zhipu.py b/tests/test_zhipu.py index aef567a..aa05b21 100644 --- a/tests/test_zhipu.py +++ b/tests/test_zhipu.py @@ -78,7 +78,7 @@ def test_unknown_model_falls_back_to_default(self, zhipu_vendor): class TestRequestPassthrough: - """验证 _prepare_request 仅修改 model 和 headers.""" + """验证 _prepare_request 的模型映射、headers 替换和兼容转换.""" @pytest.mark.asyncio async def test_body_passthrough_except_model(self, zhipu_vendor): @@ -103,12 +103,13 @@ async def test_body_passthrough_except_model(self, zhipu_vendor): # 仅 model 被映射 assert prepared_body["model"] == "glm-5.1" - # 其余字段原样保留(GLM 原生支持 thinking,静默忽略 cache_control) + # thinking.type=enabled 原样保留(GLM 原生支持) + assert prepared_body["thinking"] == {"type": "enabled", "budget_tokens": 5000} + # 其余字段原样保留 assert prepared_body["max_tokens"] == 1024 assert prepared_body["temperature"] == 0.7 assert prepared_body["top_p"] == 0.9 assert prepared_body["stream"] is True - assert prepared_body["thinking"] == {"type": "enabled", "budget_tokens": 5000} assert prepared_body["metadata"] == {"user_id": "test-user"} assert prepared_body["system"] == "You are a helpful assistant." assert len(prepared_body["tools"]) == 3 @@ -116,6 +117,46 @@ async def test_body_passthrough_except_model(self, zhipu_vendor): # 原始 body 未被修改(deep copy) assert body["model"] == "claude-sonnet-4-20250514" + @pytest.mark.asyncio + async def test_thinking_adaptive_converted_to_enabled(self, zhipu_vendor): + """thinking.type=adaptive 应被转换为 enabled+budget(GLM 不支持 adaptive).""" + body = { + "model": "claude-opus-4-7", + "messages": [], + "thinking": {"type": "adaptive"}, + } + prepared_body, _ = await zhipu_vendor._prepare_request(body, {}) + + assert prepared_body["thinking"]["type"] == "enabled" + assert prepared_body["thinking"]["budget_tokens"] == 16000 + # 原始 body 未被修改 + assert body["thinking"] == {"type": "adaptive"} + + @pytest.mark.asyncio + async def test_thinking_enabled_preserved_unchanged(self, zhipu_vendor): + """thinking.type=enabled 应原样保留(GLM 原生支持).""" + body = { + "model": "claude-sonnet-4-20250514", + "messages": [], + "thinking": {"type": "enabled", "budget_tokens": 8000}, + } + prepared_body, _ = await zhipu_vendor._prepare_request(body, {}) + + assert prepared_body["thinking"] == {"type": "enabled", "budget_tokens": 8000} + assert body["thinking"]["budget_tokens"] == 8000 + + @pytest.mark.asyncio + async def test_no_thinking_param_unchanged(self, zhipu_vendor): + """无 thinking 参数时不触发任何转换.""" + body = { + "model": "claude-sonnet-4-20250514", + "messages": [{"role": "user", "content": "hi"}], + } + prepared_body, _ = await zhipu_vendor._prepare_request(body, {}) + + assert "thinking" not in prepared_body + assert prepared_body["model"] == "glm-5.1" + @pytest.mark.asyncio async def test_headers_replaces_auth(self, zhipu_vendor): """验证 x-api-key 被正确设置,authorization 被剥离.""" From 735433d2feff45ae4248239eab4054149a845819 Mon Sep 17 00:00:00 2001 From: ThreeFish Date: Tue, 26 May 2026 17:20:37 +0800 Subject: [PATCH 6/7] =?UTF-8?q?fix(executor):=20=E5=88=A0=E9=99=A4?= =?UTF-8?q?=E6=97=A7=E7=89=88=E9=87=8D=E5=A4=8D=E5=AE=9A=E4=B9=89=E7=9A=84?= =?UTF-8?q?=20=5Fbuild=5Fsemantic=5Frejection=5Fdiagnostic=20=E5=87=BD?= =?UTF-8?q?=E6=95=B0?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Step 1 v2 扩展版本与 Step 1 旧版本同名重复定义,Python 运行时后者覆盖前者 不报错但旧版成为死代码。删除旧版仅保留扩展版本。 🤖 Generated with [Claude Code](https://github.com/claude), [CodeX](https://openai.com), [Gemini](https://github.com/apps/gemini-code-assist) Co-Authored-By: Aurelius Huang --- src/coding/proxy/routing/executor.py | 52 ---------------------------- 1 file changed, 52 deletions(-) diff --git a/src/coding/proxy/routing/executor.py b/src/coding/proxy/routing/executor.py index de761ac..4ad2fdd 100644 --- a/src/coding/proxy/routing/executor.py +++ b/src/coding/proxy/routing/executor.py @@ -124,58 +124,6 @@ def _extract_session_title(request: CanonicalRequest) -> str: return "" -def _build_semantic_rejection_diagnostic(body: dict[str, Any]) -> str: - """构建语义拒绝的请求体诊断上下文. - - 在 semantic rejection 日志中附加请求体的可疑参数快照, - 用于定位供应商参数校验失败的具体祸根参数。 - """ - parts: list[str] = [] - # 顶层不兼容参数 - for key in ("thinking", "extended_thinking", "reasoning_effort"): - if key in body: - val = body[key] - parts.append(f"{key}={val!r:.80}") - # 会话历史中的 thinking blocks - thinking_count = 0 - for msg in body.get("messages", []): - content = msg.get("content") - if not isinstance(content, list): - continue - for block in content: - if isinstance(block, dict) and block.get("type") in ( - "thinking", - "redacted_thinking", - ): - thinking_count += 1 - if thinking_count: - parts.append(f"thinking_blocks_in_history={thinking_count}") - # cache_control 存在检测 - has_cc = False - for section in ( - body.get("system", []) if isinstance(body.get("system"), list) else [], - *( - m.get("content", []) - for m in body.get("messages", []) - if isinstance(m.get("content"), list) - ), - body.get("tools", []), - ): - if isinstance(section, list): - for item in section: - if isinstance(item, dict) and "cache_control" in item: - has_cc = True - break - if has_cc: - break - if has_cc: - parts.append("cache_control_fields=present") - # 模型 + 消息数 - parts.append(f"model={body.get('model', 'N/A')}") - parts.append(f"messages={len(body.get('messages', []))}") - return f" [{', '.join(parts)}]" if parts else "" - - def _build_semantic_rejection_diagnostic(body: dict[str, Any]) -> str: """构建语义拒绝的请求体诊断上下文. From d65aa144704017a1c48fbeb6877e2257b08f492c Mon Sep 17 00:00:00 2001 From: ThreeFish Date: Wed, 27 May 2026 22:53:18 +0800 Subject: [PATCH 7/7] =?UTF-8?q?feat(vendor-channels):=20=E6=96=B0=E5=A2=9E?= =?UTF-8?q?=E8=BF=87=E6=B8=A1=E7=AE=A1=E7=BA=BF=E8=AF=8A=E6=96=AD=E5=BF=AB?= =?UTF-8?q?=E7=85=A7=E4=B8=8E=20Anthropic=20=E9=85=8D=E5=AF=B9=E8=87=AA?= =?UTF-8?q?=E6=A3=80;?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit 在 prepare_zhipu_to_anthropic 管线中新增两个辅助函数: 1. _dump_message_digest: 输出各阶段消息结构摘要(DEBUG 级别), 用于过渡管线变换前后的可观测性诊断 2. _validate_anthropic_pairing: 独立的 tool_use/tool_result 配对 自检(纯检测,不修改),定位 enforce/sanity 未覆盖的边界 case 🤖 Generated with [Claude Code](https://github.com/claude), [CodeX](https://openai.com), [Gemini](https://github.com/apps/gemini-code-assist) Co-Authored-By: Aurelius Huang --- src/coding/proxy/convert/vendor_channels.py | 126 ++++++++++- tests/test_vendor_channels.py | 218 ++++++++++++++++++++ 2 files changed, 343 insertions(+), 1 deletion(-) diff --git a/src/coding/proxy/convert/vendor_channels.py b/src/coding/proxy/convert/vendor_channels.py index 456a9b3..030f7a3 100644 --- a/src/coding/proxy/convert/vendor_channels.py +++ b/src/coding/proxy/convert/vendor_channels.py @@ -52,6 +52,117 @@ def get_transition_channel( # ── 共享辅助函数 ────────────────────────────────────────────── +def _dump_message_digest( + messages: list[dict[str, Any]], + *, + max_messages: int = 10, + label: str = "", +) -> None: + """输出前 N 条消息的结构摘要(role + content_type_counts),用于过渡管线诊断. + + 仅在 DEBUG 级别输出,且仅在消息数 > 0 时才输出,避免噪声。 + """ + if not messages or not logger.isEnabledFor(logging.DEBUG): + return + parts: list[str] = [f"[{label}]" if label else ""] + limit = min(len(messages), max_messages) + for idx in range(limit): + msg = messages[idx] + role = msg.get("role", "?") if isinstance(msg, dict) else "?" + content = msg.get("content") if isinstance(msg, dict) else None + if isinstance(content, list): + type_counts: dict[str, int] = {} + for b in content: + if isinstance(b, dict): + t = b.get("type", "?") + type_counts[t] = type_counts.get(t, 0) + 1 + else: + type_counts["raw"] = type_counts.get("raw", 0) + 1 + counts_str = ",".join(f"{t}:{c}" for t, c in sorted(type_counts.items())) + elif isinstance(content, str): + counts_str = f"str({len(content)})" + else: + counts_str = "empty" + parts.append(f"{idx}:{role}[{counts_str}]") + if len(messages) > max_messages: + parts.append(f"...+{len(messages) - max_messages}more") + logger.debug("Transition digest %s", " ".join(parts)) + + +def _validate_anthropic_pairing( + messages: list[dict[str, Any]], + *, + context: str = "", +) -> list[str]: + """独立的 Anthropic tool_use/tool_result 配对自检(过渡管线末端执行). + + 与 ``_enforce_pairing_sanity_pass`` 不同,此函数: + - 不修改消息列表(纯检测) + - 针对每个 assistant + tool_use,精确记录下一条 user 消息中匹配/缺失的 ID + - 发现不一致时输出 WARNING 级别日志含 message index 与具体 ID + + Returns: + 检测到的问题描述列表(空列表表示全部通过)。 + """ + issues: list[str] = [] + for i, msg in enumerate(messages): + if not isinstance(msg, dict) or msg.get("role") != "assistant": + continue + content = msg.get("content") + if not isinstance(content, list): + continue + tool_use_ids = [ + b["id"] + for b in content + if isinstance(b, dict) and b.get("type") == "tool_use" and b.get("id") + ] + if not tool_use_ids: + continue + + next_idx = i + 1 + if next_idx >= len(messages): + issues.append(f"messages[{i}]: assistant with tool_uses at end of list") + continue + + next_msg = messages[next_idx] + if not isinstance(next_msg, dict) or next_msg.get("role") != "user": + issues.append( + f"messages[{i}]: next messages[{next_idx}] is not user " + f"(role={next_msg.get('role') if isinstance(next_msg, dict) else '?'})" + ) + continue + + user_content = next_msg.get("content") + if not isinstance(user_content, list): + user_content = [] + + result_ids = { + b["tool_use_id"] + for b in user_content + if isinstance(b, dict) + and b.get("type") == "tool_result" + and isinstance(b.get("tool_use_id"), str) + } + + missing = [uid for uid in tool_use_ids if uid not in result_ids] + if missing: + issue = ( + f"messages[{i}]: {len(missing)}/{len(tool_use_ids)} tool_use(s) " + f"without tool_result in messages[{next_idx}]: {missing[:5]}" + ) + issues.append(issue) + + if issues: + prefix = f"[{context}] " if context else "" + logger.warning( + "Anthropic pairing validation: %s%d issue(s) found: %s", + prefix, + len(issues), + "; ".join(issues), + ) + return issues + + def strip_thinking_blocks(body: dict[str, Any]) -> int: """从 assistant 消息中移除 thinking/redacted_thinking 块(就地). @@ -678,6 +789,7 @@ def prepare_zhipu_to_anthropic( 2. 改写 ``srvtoolu_*`` ID 与 ``server_tool_use`` 类型为标准 Anthropic 形式 3. 强制 tool_use/tool_result 配对(单遍正向扫描) 4. 剥离 thinking blocks(signature 无效) + 5. 独立的 Anthropic 兼容性自检(纯检测,不修改,定位 enforce/sanity 未覆盖的边界 case) 所有变换均为幂等操作,安全地在已清理的请求体上重复执行。 @@ -686,6 +798,10 @@ def prepare_zhipu_to_anthropic( """ prepared = copy.deepcopy(body) adaptations: list[str] = [] + msgs = prepared.get("messages", []) + + # ── 过渡管线诊断:变换前快照 ── + _dump_message_digest(msgs, label="zhipu→anthropic.before") # Step 1: 剥离 zhipu 私有 content block 类型(如 server_tool_use_delta) removed_vendor_blocks = _remove_vendor_blocks(prepared, _ZHIPU_VENDOR_BLOCK_TYPES) @@ -696,16 +812,24 @@ def prepare_zhipu_to_anthropic( rewritten, _ = _rewrite_srvtoolu_ids(prepared) if rewritten: adaptations.append(f"rewritten_{rewritten}_srvtoolu_ids") + _dump_message_digest(msgs, label="zhipu→anthropic.after_rewrite") # Step 3: 强制 tool_use/tool_result 配对 - pairing_fixes = enforce_anthropic_tool_pairing(prepared.get("messages", [])) + pairing_fixes = enforce_anthropic_tool_pairing(msgs) if pairing_fixes: adaptations.extend(pairing_fixes) + _dump_message_digest(msgs, label="zhipu→anthropic.after_enforce") # Step 4: 剥离 thinking blocks(zhipu signature 无效) stripped = strip_thinking_blocks(prepared) if stripped: adaptations.append(f"stripped_{stripped}_thinking_blocks") + _dump_message_digest(msgs, label="zhipu→anthropic.after_strip") + + # Step 5: 独立的 Anthropic 兼容性自检(纯检测,不修改) + validation_issues = _validate_anthropic_pairing(msgs, context="zhipu→anthropic") + if validation_issues: + adaptations.append("anthropic_pairing_validation_issues") return prepared, adaptations diff --git a/tests/test_vendor_channels.py b/tests/test_vendor_channels.py index f9c9bb5..d99a4c6 100644 --- a/tests/test_vendor_channels.py +++ b/tests/test_vendor_channels.py @@ -2225,3 +2225,221 @@ def test_preserves_supported_params(self): assert result["stream"] is True assert result["metadata"] == {"user_id": "test"} assert adaptations == [] + + +class TestDumpMessageDigest: + """``_dump_message_digest`` 诊断快照函数测试.""" + + def test_outputs_nothing_on_empty_messages(self, caplog): + import logging + + from coding.proxy.convert.vendor_channels import _dump_message_digest + + with caplog.at_level( + logging.DEBUG, logger="coding.proxy.convert.vendor_channels" + ): + _dump_message_digest([], label="test") + assert "Transition digest" not in caplog.text + + def test_outputs_structure_for_first_n_messages(self, caplog): + import logging + + from coding.proxy.convert.vendor_channels import _dump_message_digest + + messages = [ + {"role": "user", "content": [{"type": "text", "text": "hi"}]}, + { + "role": "assistant", + "content": [ + {"type": "tool_use", "id": "toolu_1", "name": "bash", "input": {}}, + ], + }, + { + "role": "user", + "content": [ + {"type": "tool_result", "tool_use_id": "toolu_1", "content": "ok"}, + ], + }, + ] + with caplog.at_level( + logging.DEBUG, logger="coding.proxy.convert.vendor_channels" + ): + _dump_message_digest(messages, label="test") + assert "test" in caplog.text + assert "0:user" in caplog.text + assert "1:assistant" in caplog.text + assert "tool_use:1" in caplog.text + + +class TestValidateAnthropicPairing: + """``_validate_anthropic_pairing`` 独立配对自检测试.""" + + def test_no_issues_for_correct_pairing(self): + from coding.proxy.convert.vendor_channels import _validate_anthropic_pairing + + messages = [ + {"role": "user", "content": "go"}, + { + "role": "assistant", + "content": [ + {"type": "tool_use", "id": "toolu_1", "name": "bash", "input": {}}, + ], + }, + { + "role": "user", + "content": [ + {"type": "tool_result", "tool_use_id": "toolu_1", "content": "ok"}, + ], + }, + ] + issues = _validate_anthropic_pairing(messages) + assert issues == [] + + def test_detects_missing_tool_result(self): + from coding.proxy.convert.vendor_channels import _validate_anthropic_pairing + + messages = [ + {"role": "user", "content": "go"}, + { + "role": "assistant", + "content": [ + {"type": "tool_use", "id": "toolu_1", "name": "bash", "input": {}}, + ], + }, + {"role": "user", "content": [{"type": "text", "text": "no result"}]}, + ] + issues = _validate_anthropic_pairing(messages) + assert len(issues) == 1 + assert "toolu_1" in issues[0] + assert "messages[1]" in issues[0] + + def test_detects_non_user_after_assistant_with_tool_use(self): + from coding.proxy.convert.vendor_channels import _validate_anthropic_pairing + + messages = [ + {"role": "user", "content": "go"}, + { + "role": "assistant", + "content": [ + {"type": "tool_use", "id": "toolu_1", "name": "bash", "input": {}}, + ], + }, + { + "role": "assistant", + "content": [{"type": "text", "text": "another assistant"}], + }, + ] + issues = _validate_anthropic_pairing(messages) + assert len(issues) == 1 + assert "not user" in issues[0] + + def test_detects_assistant_with_tool_use_at_end_of_list(self): + from coding.proxy.convert.vendor_channels import _validate_anthropic_pairing + + messages = [ + {"role": "user", "content": "go"}, + { + "role": "assistant", + "content": [ + {"type": "tool_use", "id": "toolu_1", "name": "bash", "input": {}}, + ], + }, + ] + issues = _validate_anthropic_pairing(messages) + assert len(issues) == 1 + assert "end of list" in issues[0] + + def test_partial_missing_only_reports_missing_ids(self): + from coding.proxy.convert.vendor_channels import _validate_anthropic_pairing + + messages = [ + {"role": "user", "content": "go"}, + { + "role": "assistant", + "content": [ + {"type": "tool_use", "id": "toolu_1", "name": "bash", "input": {}}, + {"type": "tool_use", "id": "toolu_2", "name": "read", "input": {}}, + ], + }, + { + "role": "user", + "content": [ + {"type": "tool_result", "tool_use_id": "toolu_1", "content": "ok"}, + ], + }, + ] + issues = _validate_anthropic_pairing(messages) + assert len(issues) == 1 + assert "toolu_2" in issues[0] + assert "1/2" in issues[0] + + def test_integration_with_zhipu_to_anthropic_channel(self): + """验证 prepare_zhipu_to_anthropic 在末端执行自检且 adaptations 包含标签.""" + from coding.proxy.convert.vendor_channels import prepare_zhipu_to_anthropic + + body = { + "model": "claude-opus-4-7", + "messages": [ + {"role": "user", "content": "go"}, + { + "role": "assistant", + "content": [ + { + "type": "server_tool_use", + "id": "srvtoolu_01", + "name": "bash", + "input": {}, + }, + ], + }, + { + "role": "user", + "content": [ + { + "type": "tool_result", + "tool_use_id": "srvtoolu_01", + "content": "ok", + }, + ], + }, + ], + } + result, adaptations = prepare_zhipu_to_anthropic(body) + # 自检通过,不应包含 validation_issues 标签 + assert "anthropic_pairing_validation_issues" not in adaptations + + def test_integration_detects_enforce_missed_issue(self): + """构造一个理论上 enforce 可能遗漏的场景,验证自检能捕获. + + 场景:两条连续 assistant 消息,第一条的 tool_result 被第二条的 + existing_result_ids"冒领"(相同 ID 碰撞场景的模拟)。 + 虽然当前 enforce 实现下不太可能自然产生此场景,但自检应能捕获。 + """ + from coding.proxy.convert.vendor_channels import ( + _validate_anthropic_pairing, + ) + + # 手动构造一个 enforce 后仍存在配对缺陷的 body + messages = [ + {"role": "user", "content": "go"}, + { + "role": "assistant", + "content": [ + {"type": "tool_use", "id": "toolu_x", "name": "bash", "input": {}}, + ], + }, + { + "role": "user", + "content": [ + # tool_result 缺失 tolu_x,但有不相关的 tool_result + { + "type": "tool_result", + "tool_use_id": "toolu_other", + "content": "wrong", + }, + ], + }, + ] + issues = _validate_anthropic_pairing(messages) + assert len(issues) == 1 + assert "toolu_x" in issues[0]