diff --git a/docs/agents/issue.md b/docs/agents/issue.md index 8583087..af58832 100644 --- a/docs/agents/issue.md +++ b/docs/agents/issue.md @@ -230,3 +230,47 @@ SUM(input_tokens + output_tokens - 历次 PR 中 cache token 字段的引入是渐进式的(schema 已有四列、`log()` 入参齐全、Overview 已全口径消费),但部分聚合视图的口径升级被遗漏;任何向 `usage_log` 增列后,**必须**审计所有 `SUM(input_tokens` / `SUM(output_tokens` 出现处的聚合表达式是否需要同步更新。 - 跨标签页同一指标(如"总 Tokens")的口径一致性,建议在添加新视图时主动与 Overview 现有口径做交叉核对,必要时在 SQL 注释中标注口径来源,便于后续 review。 + +--- + +## Zhipu vendor 间歇性 `[1210][API 调用参数有误]` 拒绝(诊断阶段) + +**问题描述** + +Zhipu vendor 作为首选 tier 时,处理 `claude-haiku-* → glm-5-turbo` 的部分请求被上游直接拒绝: + +``` +WARNING Tier zhipu semantic rejection + (type=invalid_request_error, + msg=[1210][API 调用参数有误,请检查文档。][...]) + [model=claude-haiku-4-5-20251001, messages=1], trying next tier without recording failure +INFO Tier anthropic message succeeded (took over from failed tier: zhipu) +``` + +失败请求统一表现为 `duration<1s + tokens=[0 0 0 0]`,被 zhipu 在入口校验阶段直接拒绝、未消耗任何 token。两次观察窗口失败率分别为 4%(2026-05-23 22:24,glm-4.7 旧映射)与 27%(2026-05-25 17:26+,glm-5-turbo 当前映射),均触发降级至 anthropic / copilot。 + +**表因** + +`is_semantic_rejection` 检测到 zhipu 返回 `invalid_request_error + 1210` 含「API 调用参数有误」中文标记,判定为语义拒绝,跳过下一层 tier。1210 是智谱官方错误码,[官方文档](https://docs.bigmodel.cn/cn/api/api-code) 定义为「参数格式/类型不符规范」(区别于 1213「必需字段缺失」、1214「字段参数非法」)。 + +**根因(仍在收集证据)** + +PR #244 的初版诊断字段仅覆盖 `thinking / thinking_blocks / cache_control / model / messages`,但 2026-05-25 17:26 后的诊断日志显示失败请求**均不含**上述任何字段。说明真正祸根在更细粒度的参数(system / tools / max_tokens / sampling / metadata / content_types / body_size 等)。 + +**处理方式(分阶段)** + +- **Step 1(PR #244,已合并)**:在 `executor.py::_build_semantic_rejection_diagnostic` 中输出 thinking / cache_control 相关字段 — 但证据反转,覆盖不足以定位真因。 +- **Step 1 v2(本次)**:扩展诊断函数覆盖 `system_kind|blocks(+cc)` / `tools` / `tool_choice` / 采样参数 / `stream` / `metadata_keys` / `content_types` / `body_bytes` 等维度。所有项「仅存在时输出」以控制日志噪声。配套 14 个单元测试(`TestBuildSemanticRejectionDiagnostic`)覆盖各字段组合。 +- **Step 2(待定)**:依据扩展诊断日志的新证据,定位具体祸根参数后再施修复(候选路径:`ZhipuVendor._prepare_request` 参数剥离 / 调用现有 `normalize_for_zhipu` / pre-validation 警告)。 + +**后续防范** + +- **「无证据,不下结论」**:当初版诊断字段无法覆盖根因时,禁止反复猜测,应优先扩展诊断维度抓取更多线索。本次先扩展再修复的迭代节奏可作为同类「黑盒 API 报错」问题的范式。 +- **诊断字段设计原则**:所有诊断项应「仅存在时输出」,避免常态化噪声;输出格式紧凑(`key=val`)便于日志检索;参数值用 `!r:.N` 截断防止巨型对象灌入日志。 +- **错误码差异化**:智谱 12xx 系列错误码语义并不等价(1210 ≠ 1213 ≠ 1214),未来面对类似 `[code][message]` 形式的供应商错误时,应优先查阅其官方错误码字典,避免基于错误消息字面意思的误判。 + +**同类问题影响与处理注意事项** + +- 其他薄透传 vendor(minimax / kimi / doubao / alibaba / xiaomi)共用 `NativeAnthropicVendor._prepare_request`,若它们也开始报「参数错误」类语义拒绝,可复用本次扩展的诊断函数定位差异。 +- 若证据指向 `tools` 字段(如工具 schema 不兼容)、`metadata` 字段(如自定义键被 zhipu 拒收)等具体路径,修复时应优先复用 `convert/vendor_channels.py` 中已有的 `normalize_for_zhipu` / `strip_thinking_blocks` 工具,避免在 vendor 内部重复实现剥离逻辑。 +- 部署 Step 1 v2 后,建议观察至少 48 小时收集足够样本(>20 次失败),通过失败/成功请求形态对比统计找出**唯一差异维度**,再进入 Step 2。 diff --git a/src/coding/proxy/routing/executor.py b/src/coding/proxy/routing/executor.py index 7ed3c1c..c418fc5 100644 --- a/src/coding/proxy/routing/executor.py +++ b/src/coding/proxy/routing/executor.py @@ -6,6 +6,7 @@ from __future__ import annotations +import json import logging import re import time @@ -175,6 +176,124 @@ def _build_semantic_rejection_diagnostic(body: dict[str, Any]) -> str: return f" [{', '.join(parts)}]" if parts else "" +def _build_semantic_rejection_diagnostic(body: dict[str, Any]) -> str: + """构建语义拒绝的请求体诊断上下文. + + 在 semantic rejection 日志中附加请求体的可疑参数快照, + 用于定位供应商参数校验失败的具体祸根参数。 + + 覆盖范围: + * 模型 / messages 数(baseline) + * thinking 系列顶层参数 + history thinking_blocks 数 + * system 形态(string / blocks,含 cache_control 计数) + * tools 数量 + tool_choice 形态 + * 采样参数(max_tokens / temperature / top_p / top_k / stop_sequences) + * stream / metadata 形态 + * cache_control 存在性 + * messages.content 类型分布 + * 请求体大小估算(json.dumps 字节数) + """ + parts: list[str] = [] + + # ── 模型 + 消息数(baseline,始终输出)── + parts.append(f"model={body.get('model', 'N/A')}") + parts.append(f"messages={len(body.get('messages', []))}") + + # ── 顶层 thinking 系列参数 ── + for key in ("thinking", "extended_thinking", "reasoning_effort"): + if key in body: + val = body[key] + parts.append(f"{key}={val!r:.80}") + + # ── system 形态 ── + system = body.get("system") + if isinstance(system, str): + parts.append(f"system_kind=string(len={len(system)})") + elif isinstance(system, list): + cc_count = sum( + 1 for item in system if isinstance(item, dict) and "cache_control" in item + ) + if cc_count: + parts.append(f"system_blocks={len(system)},cc={cc_count}") + else: + parts.append(f"system_blocks={len(system)}") + + # ── tools 与 tool_choice ── + tools = body.get("tools") + if isinstance(tools, list): + parts.append(f"tools={len(tools)}") + tool_choice = body.get("tool_choice") + if tool_choice is not None: + parts.append(f"tool_choice={tool_choice!r:.60}") + + # ── 采样参数(仅存在时输出)── + for key in ("max_tokens", "temperature", "top_p", "top_k"): + if key in body: + parts.append(f"{key}={body[key]!r:.40}") + stop_sequences = body.get("stop_sequences") + if isinstance(stop_sequences, list) and stop_sequences: + parts.append(f"stop_sequences={len(stop_sequences)}") + + # ── stream / metadata ── + if "stream" in body: + parts.append(f"stream={body['stream']}") + metadata = body.get("metadata") + if isinstance(metadata, dict) and metadata: + parts.append(f"metadata_keys={len(metadata)}") + + # ── 会话历史中的 thinking blocks 与 content_types 分布 ── + thinking_count = 0 + content_type_counts: dict[str, int] = {} + for msg in body.get("messages", []): + content = msg.get("content") + if isinstance(content, str): + content_type_counts["string"] = content_type_counts.get("string", 0) + 1 + continue + if not isinstance(content, list): + continue + for block in content: + if not isinstance(block, dict): + continue + btype = block.get("type") + if isinstance(btype, str): + content_type_counts[btype] = content_type_counts.get(btype, 0) + 1 + if btype in ("thinking", "redacted_thinking"): + thinking_count += 1 + if thinking_count: + parts.append(f"thinking_blocks_in_history={thinking_count}") + if content_type_counts: + type_repr = ",".join(f"{k}:{v}" for k, v in sorted(content_type_counts.items())) + parts.append(f"content_types={{{type_repr}}}") + + # ── cache_control 存在检测(messages / tools,不含 system 因已单独统计)── + has_cc = False + sections: list[Any] = [] + for m in body.get("messages", []): + if isinstance(m.get("content"), list): + sections.append(m["content"]) + if isinstance(body.get("tools"), list): + sections.append(body["tools"]) + for section in sections: + for item in section: + if isinstance(item, dict) and "cache_control" in item: + has_cc = True + break + if has_cc: + break + if has_cc: + parts.append("cache_control_fields=present") + + # ── 请求体大小估算 ── + try: + body_bytes = len(json.dumps(body, ensure_ascii=False).encode("utf-8")) + parts.append(f"body_bytes={body_bytes}") + except (TypeError, ValueError): + # 极少数情况下 body 含非可序列化对象,跳过 + pass + + return f" [{', '.join(parts)}]" if parts else "" + + def _log_http_error_detail( tier_name: str, exc: Exception, diff --git a/tests/test_router_executor.py b/tests/test_router_executor.py index 6dd630e..9506e67 100644 --- a/tests/test_router_executor.py +++ b/tests/test_router_executor.py @@ -22,6 +22,7 @@ from coding.proxy.routing.executor import ( _SESSION_TITLE_MAX_LEN, _VENDOR_PROTOCOL_LABEL_MAP, + _build_semantic_rejection_diagnostic, _extract_session_title, _has_tool_results, _is_likely_request_format_error, @@ -1954,6 +1955,218 @@ def test_returns_body_for_unknown_tier(self): assert result is body +class TestBuildSemanticRejectionDiagnostic: + """覆盖 _build_semantic_rejection_diagnostic 函数 — 用于诊断 [1210] 等供应商语义拒绝. + + 重点验证: + - baseline 字段(model / messages)始终输出 + - 仅当参数存在时才输出相关项(避免日志噪声) + - 各字段输出格式稳定 + """ + + def test_baseline_minimal_body(self): + """最小请求体:仅输出 model + messages.""" + body = {"model": "glm-5-turbo", "messages": [{"role": "user", "content": "hi"}]} + result = _build_semantic_rejection_diagnostic(body) + assert "model=glm-5-turbo" in result + assert "messages=1" in result + # 不应输出未使用的字段 + assert "thinking" not in result + assert "tools" not in result + assert "cache_control" not in result + + def test_includes_thinking_param(self): + body = { + "model": "glm-5-turbo", + "messages": [], + "thinking": {"type": "enabled", "budget_tokens": 1024}, + } + result = _build_semantic_rejection_diagnostic(body) + assert "thinking=" in result + assert "budget_tokens" in result + + def test_includes_system_string(self): + body = { + "model": "glm-5-turbo", + "messages": [], + "system": "You are helpful." * 5, + } + result = _build_semantic_rejection_diagnostic(body) + assert "system_kind=string(len=" in result + + def test_includes_system_blocks_with_cache_control(self): + body = { + "model": "glm-5-turbo", + "messages": [], + "system": [ + { + "type": "text", + "text": "rule1", + "cache_control": {"type": "ephemeral"}, + }, + {"type": "text", "text": "rule2"}, + ], + } + result = _build_semantic_rejection_diagnostic(body) + assert "system_blocks=2,cc=1" in result + + def test_includes_tools_and_tool_choice(self): + body = { + "model": "glm-5-turbo", + "messages": [], + "tools": [{"name": "a"}, {"name": "b"}, {"name": "c"}], + "tool_choice": {"type": "auto"}, + } + result = _build_semantic_rejection_diagnostic(body) + assert "tools=3" in result + assert "tool_choice=" in result + + def test_includes_sampling_params(self): + body = { + "model": "glm-5-turbo", + "messages": [], + "max_tokens": 8192, + "temperature": 0.7, + "top_p": 0.9, + "top_k": 40, + "stop_sequences": ["\n\n", "END"], + } + result = _build_semantic_rejection_diagnostic(body) + assert "max_tokens=8192" in result + assert "temperature=0.7" in result + assert "top_p=0.9" in result + assert "top_k=40" in result + assert "stop_sequences=2" in result + + def test_includes_stream_and_metadata(self): + body = { + "model": "glm-5-turbo", + "messages": [], + "stream": True, + "metadata": {"user_id": "x", "session_id": "y"}, + } + result = _build_semantic_rejection_diagnostic(body) + assert "stream=True" in result + assert "metadata_keys=2" in result + + def test_content_type_distribution(self): + body = { + "model": "glm-5-turbo", + "messages": [ + { + "role": "user", + "content": [ + {"type": "text", "text": "hi"}, + {"type": "text", "text": "bye"}, + {"type": "image", "source": {}}, + ], + }, + { + "role": "assistant", + "content": [ + {"type": "tool_use", "id": "t1", "name": "x", "input": {}}, + ], + }, + ], + } + result = _build_semantic_rejection_diagnostic(body) + # 排序为字母序 + assert "content_types={image:1,text:2,tool_use:1}" in result + + def test_content_type_string_messages(self): + """messages.content 为 string 时计入 string:N.""" + body = { + "model": "glm-5-turbo", + "messages": [ + {"role": "user", "content": "hello"}, + {"role": "assistant", "content": "hi"}, + ], + } + result = _build_semantic_rejection_diagnostic(body) + assert "content_types={string:2}" in result + + def test_thinking_blocks_in_history(self): + body = { + "model": "glm-5-turbo", + "messages": [ + { + "role": "assistant", + "content": [ + {"type": "thinking", "thinking": "..."}, + {"type": "redacted_thinking", "data": "..."}, + {"type": "text", "text": "result"}, + ], + } + ], + } + result = _build_semantic_rejection_diagnostic(body) + assert "thinking_blocks_in_history=2" in result + + def test_cache_control_in_messages_or_tools(self): + body = { + "model": "glm-5-turbo", + "messages": [ + { + "role": "user", + "content": [ + { + "type": "text", + "text": "x", + "cache_control": {"type": "ephemeral"}, + }, + ], + } + ], + } + result = _build_semantic_rejection_diagnostic(body) + assert "cache_control_fields=present" in result + + def test_body_bytes_estimated(self): + body = {"model": "glm-5-turbo", "messages": [{"role": "user", "content": "ok"}]} + result = _build_semantic_rejection_diagnostic(body) + assert "body_bytes=" in result + + def test_body_bytes_skipped_when_unserializable(self): + """请求体含非可序列化对象时不抛异常.""" + + class NonSerializable: + pass + + body = { + "model": "glm-5-turbo", + "messages": [], + "metadata": {"obj": NonSerializable()}, + } + # 不应抛异常 + result = _build_semantic_rejection_diagnostic(body) + assert "model=glm-5-turbo" in result + + def test_combined_real_world_failure_case(self): + """模拟真实失败请求形态(messages=1,无 thinking/cache_control,含 system + tools).""" + body = { + "model": "glm-5-turbo", + "messages": [{"role": "user", "content": "需要修复一个 bug"}], + "system": [{"type": "text", "text": "You are Claude Code."}], + "tools": [{"name": "Read"}, {"name": "Edit"}], + "max_tokens": 8192, + "temperature": 1.0, + "metadata": {"user_id": "x"}, + "stream": True, + } + result = _build_semantic_rejection_diagnostic(body) + assert "model=glm-5-turbo" in result + assert "messages=1" in result + assert "system_blocks=1" in result + assert "tools=2" in result + assert "max_tokens=8192" in result + assert "temperature=1.0" in result + assert "metadata_keys=1" in result + assert "stream=True" in result + # 不应包含未出现的项 + assert "thinking_blocks_in_history" not in result + assert "cache_control_fields" not in result + + # ── Session 标题清洗与抽取测试 ─────────────────────────────────