Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,11 @@

## [Unreleased]

- feat(dashboard): Model Calling 实时监控扩展至全 vendor / 全 model(仅 CC 场景),其他 vendor 在 monitor 模式下仅计数不限流,Zhipu 保留 limited 模式 + FIFO 排队;
- feat(concurrency): 新增 `peak_pending_recent` 最近 10s 排队峰值追踪,瞬时排队释放后前端仍可见"曾排队 N" 余晖徽章;
- perf(dashboard): Model Calling 轮询间隔由 5000ms 缩短至 1500ms,提升瞬时排队可观测性;
- refactor(vendors): `ModelConcurrencyLimiter` 重构为 `ModelConcurrencyController`,统一 monitor / limited 双模式抽象(保留旧名别名);并发控制由 vendor 内部迁移至 executor 层 `track_in_flight` 包裹,行为对所有 vendor 一致;

## [v0.5.0](https://github.com/ThreeFish-AI/coding-proxy/releases/tag/v0.5.0) - 2026-05-27

> [!IMPORTANT]
Expand Down
24 changes: 14 additions & 10 deletions src/coding/proxy/routing/executor.py
Original file line number Diff line number Diff line change
Expand Up @@ -689,15 +689,17 @@ async def execute_stream(
tier.name, failed_tier_name, session_record, body
)
body_for_tier = self._prepare_body_for_tier(body, tier, source_vendor)
async for chunk in tier.vendor.send_message_stream(
body_for_tier, headers
):
parse_usage_from_chunk(
chunk,
usage,
vendor_label=_VENDOR_PROTOCOL_LABEL_MAP.get(tier.name),
)
yield chunk, tier.name
_mapped_model = tier.vendor.map_model(body.get("model", ""))
async with tier.vendor.track_in_flight(_mapped_model):
async for chunk in tier.vendor.send_message_stream(
body_for_tier, headers
):
parse_usage_from_chunk(
chunk,
usage,
vendor_label=_VENDOR_PROTOCOL_LABEL_MAP.get(tier.name),
)
yield chunk, tier.name

info = self._recorder.build_usage_info(usage)
if has_missing_input_usage_signals(info):
Expand Down Expand Up @@ -863,7 +865,9 @@ async def execute_message(
tier.name, failed_tier_name, session_record, body
)
body_for_tier = self._prepare_body_for_tier(body, tier, source_vendor)
resp = await tier.vendor.send_message(body_for_tier, headers)
_mapped_model = tier.vendor.map_model(body.get("model", ""))
async with tier.vendor.track_in_flight(_mapped_model):
resp = await tier.vendor.send_message(body_for_tier, headers)

if resp.status_code < 400:
duration = int((time.monotonic() - start) * 1000)
Expand Down
51 changes: 36 additions & 15 deletions src/coding/proxy/server/dashboard.py
Original file line number Diff line number Diff line change
Expand Up @@ -629,6 +629,10 @@ def _build_favicon() -> bytes:
background: rgba(251,146,60,.15);
color: #fb923c;
}
.mc-badge-peak {
background: rgba(148,163,184,.12);
color: #94a3b8;
}
.mc-badge-active {
background: rgba(74,222,128,.12);
color: #4ade80;
Expand Down Expand Up @@ -1282,10 +1286,12 @@ def _build_favicon() -> bytes:
models.push({
vendor: tier.name,
model: model,
limit: d.limit || 0,
mode: d.mode || 'limited',
limit: d.limit,
in_use: d.in_use || 0,
available: d.available || 0,
available: d.available,
pending: d.pending || 0,
peak_pending_recent: d.peak_pending_recent || 0,
});
}
}
Expand All @@ -1298,18 +1304,33 @@ def _build_favicon() -> bytes:
var html = '<div class="mc-grid">';
for (var k = 0; k < models.length; k++) {
var m = models[k];
var pct = m.limit > 0 ? Math.round((m.in_use / m.limit) * 100) : 0;
var barClass = pct <= 50 ? 'mc-low' : (pct <= 80 ? 'mc-mid' : 'mc-high');

html += '<div class="mc-model-row">'
+ '<span class="mc-model-name">' + escapeHtml(m.vendor + '/' + m.model) + '</span>'
+ '<div class="mc-bar-wrap"><div class="mc-bar-fill ' + barClass + '" style="width:' + pct + '%"></div></div>'
+ '<div class="mc-stats">'
+ '<span class="mc-badge mc-badge-active">' + m.in_use
+ '/<span class="mc-limit-editable" data-tier="' + escapeHtml(m.vendor) + '" data-model="' + escapeHtml(m.model) + '" data-limit="' + m.limit + '" title="点击修改并行度">' + m.limit + '</span></span>'
+ (m.pending > 0 ? '<span class="mc-badge mc-badge-pending">⏳ ' + m.pending + '</span>' : '')
+ '</div>'
+ '</div>';

if (m.mode === 'monitor') {
// monitor 模式:纯计数徽章,无 limit/进度条
html += '<div class="mc-model-row">'
+ '<span class="mc-model-name">' + escapeHtml(m.vendor + '/' + m.model) + '</span>'
+ '<div class="mc-bar-wrap"></div>'
+ '<div class="mc-stats">'
+ '<span class="mc-badge mc-badge-active">' + m.in_use + '</span>'
+ '</div>'
+ '</div>';
} else {
// limited 模式:保留现有渲染(进度条 + limit 编辑)
var limit = m.limit || 0;
var pct = limit > 0 ? Math.round((m.in_use / limit) * 100) : 0;
var barClass = pct <= 50 ? 'mc-low' : (pct <= 80 ? 'mc-mid' : 'mc-high');

html += '<div class="mc-model-row">'
+ '<span class="mc-model-name">' + escapeHtml(m.vendor + '/' + m.model) + '</span>'
+ '<div class="mc-bar-wrap"><div class="mc-bar-fill ' + barClass + '" style="width:' + pct + '%"></div></div>'
+ '<div class="mc-stats">'
+ '<span class="mc-badge mc-badge-active">' + m.in_use
+ '/<span class="mc-limit-editable" data-tier="' + escapeHtml(m.vendor) + '" data-model="' + escapeHtml(m.model) + '" data-limit="' + limit + '" title="点击修改并行度">' + limit + '</span></span>'
+ (m.pending > 0 ? '<span class="mc-badge mc-badge-pending">⏳ ' + m.pending + '</span>' : '')
+ (m.pending === 0 && m.peak_pending_recent > 0 ? '<span class="mc-badge mc-badge-peak">🕘 曾排队 ' + m.peak_pending_recent + '</span>' : '')
+ '</div>'
+ '</div>';
}
}
html += '</div>';
wrap.innerHTML = html;
Expand All @@ -1325,7 +1346,7 @@ def _build_favicon() -> bytes:
}).catch(function() {});
}
tick();
_mcTimer = setInterval(tick, 5000);
_mcTimer = setInterval(tick, 1500);
}
function stopModelCallingPoll() {
if (_mcTimer) { clearInterval(_mcTimer); _mcTimer = null; }
Expand Down
13 changes: 6 additions & 7 deletions src/coding/proxy/server/routes.py
Original file line number Diff line number Diff line change
Expand Up @@ -254,16 +254,15 @@ async def update_concurrency(request: Request) -> Response:
for tier in router.tiers:
if tier.name == tier_name:
vendor = tier.vendor
update_fn = getattr(vendor, "update_concurrency", None)
if update_fn is None:
try:
vendor.update_concurrency(model, limit)
except ValueError as exc:
return json_error_response(
400,
422,
error_type="invalid_request_error",
message=f"vendor '{tier_name}' does not support concurrency",
message=str(exc),
)
try:
update_fn(model, limit)
except (ValueError, AttributeError) as exc:
except AttributeError as exc:
return json_error_response(
400, error_type="invalid_request_error", message=str(exc)
)
Expand Down
25 changes: 25 additions & 0 deletions src/coding/proxy/vendors/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,7 @@
)
from ..compat.session_store import CompatSessionRecord
from ..config.schema import FailoverConfig
from .concurrency import ModelConcurrencyController

logger = logging.getLogger(__name__)

Expand All @@ -63,6 +64,8 @@ def __init__(
self._client: httpx.AsyncClient | None = None
self._compat_trace: CompatibilityTrace | None = None
self._compat_session_record: CompatSessionRecord | None = None
# 默认 monitor 模式(仅计数不限流);子类可覆盖为 limited 模式
self._concurrency_controller = ModelConcurrencyController(None)

def _get_client(self) -> httpx.AsyncClient:
if self._client is None or self._client.is_closed:
Expand Down Expand Up @@ -246,8 +249,30 @@ def get_diagnostics(self) -> dict[str, Any]:
diagnostics: dict[str, Any] = {}
if self._compat_trace is not None:
diagnostics["compat"] = self._compat_trace.to_dict()
concurrency = self._concurrency_controller.get_diagnostics()
if concurrency:
diagnostics["concurrency"] = concurrency
return diagnostics

def track_in_flight(self, mapped_model: str):
"""返回用于追踪在途请求的异步上下文管理器.

空 model name 时返回 no-op context(防御性处理)。
"""
if not mapped_model:
from contextlib import nullcontext

return nullcontext()
return self._concurrency_controller.track(mapped_model)

def update_concurrency(self, model: str, limit: int) -> None:
"""运行时更新指定模型的并发限制.

默认实现委托给 ``_concurrency_controller.set_limit``。
monitor 模式下抛 ``ValueError``。
"""
self._concurrency_controller.set_limit(model, limit)

def should_trigger_failover(
self, status_code: int, body: dict[str, Any] | None
) -> bool:
Expand Down
Loading
Loading