diff --git a/src/coding/proxy/server/dashboard.py b/src/coding/proxy/server/dashboard.py index 54533e6..c81b72c 100644 --- a/src/coding/proxy/server/dashboard.py +++ b/src/coding/proxy/server/dashboard.py @@ -557,6 +557,89 @@ def _build_favicon() -> bytes: .tab-btn:focus-visible { outline: 2px solid var(--accent-blue); outline-offset: 2px; } .tab-pane { display: none; } .tab-pane.active { display: block; } + + /* ── Model Calling 实时状态 ────────────────────────── */ + .model-calling-card { + margin-bottom: 5px; + } + .mc-empty { + text-align: center; + color: var(--text-muted); + padding: 16px 0; + font-size: 13px; + } + .mc-grid { + display: grid; + grid-template-columns: repeat(auto-fill, minmax(320px, 1fr)); + gap: 8px; + } + .mc-model-row { + display: flex; + align-items: center; + gap: 10px; + padding: 8px 12px; + background: var(--bg-secondary); + border-radius: var(--radius-sm); + border: 1px solid var(--border-subtle); + } + .mc-model-name { + font-family: 'JetBrains Mono', monospace; + font-size: 12px; + color: var(--text-primary); + min-width: 140px; + white-space: nowrap; + overflow: hidden; + text-overflow: ellipsis; + } + .mc-bar-wrap { + flex: 1; + min-width: 60px; + height: 6px; + background: rgba(255,255,255,.06); + border-radius: 3px; + overflow: hidden; + } + .mc-bar-fill { + height: 100%; + border-radius: 3px; + transition: width .3s ease, background .3s ease; + } + .mc-bar-fill.mc-low { background: var(--accent-green); } + .mc-bar-fill.mc-mid { background: var(--accent-yellow); } + .mc-bar-fill.mc-high { background: var(--accent-red); } + .mc-stats { + display: flex; + align-items: center; + gap: 6px; + font-size: 11px; + font-family: 'JetBrains Mono', monospace; + color: var(--text-muted); + white-space: nowrap; + } + .mc-badge { + display: inline-flex; + align-items: center; + padding: 1px 6px; + border-radius: 4px; + font-size: 10px; + font-weight: 600; + font-family: 'JetBrains Mono', monospace; + } + .mc-badge-pending { + background: rgba(251,146,60,.15); + color: #fb923c; + } + .mc-badge-active { + background: rgba(74,222,128,.12); + color: #4ade80; + } + .mc-vendor-tag { + font-size: 10px; + color: var(--text-muted); + background: rgba(255,255,255,.06); + padding: 1px 6px; + border-radius: 3px; + } @@ -626,6 +709,14 @@ def _build_favicon() -> bytes: + +
+
📡 Model Calling 实时状态
+
+
加载中…
+
+
+
@@ -1134,6 +1225,74 @@ def _build_favicon() -> bytes: }).join(''); } +// ── Model Calling 实时状态 ──────────────────────────────── +function updateModelCalling(status) { + var wrap = document.getElementById('model-calling-wrap'); + if (!wrap) return; + var tiers = status.tiers || []; + + // 收集所有带 concurrency 诊断的模型 + var models = []; + for (var i = 0; i < tiers.length; i++) { + var tier = tiers[i]; + var diag = tier.diagnostics || {}; + var conc = diag.concurrency; + if (!conc) continue; + var names = Object.keys(conc); + for (var j = 0; j < names.length; j++) { + var model = names[j]; + var d = conc[model]; + models.push({ + vendor: tier.name, + model: model, + limit: d.limit || 0, + in_use: d.in_use || 0, + available: d.available || 0, + pending: d.pending || 0, + }); + } + } + + if (!models.length) { + wrap.innerHTML = '
无活跃模型调用
'; + return; + } + + var html = '
'; + for (var k = 0; k < models.length; k++) { + var m = models[k]; + var pct = m.limit > 0 ? Math.round((m.in_use / m.limit) * 100) : 0; + var barClass = pct <= 50 ? 'mc-low' : (pct <= 80 ? 'mc-mid' : 'mc-high'); + + html += '
' + + '' + escapeHtml(m.vendor + '/' + m.model) + '' + + '
' + + '
' + + '' + m.in_use + '/' + m.limit + '' + + (m.pending > 0 ? '⏳ ' + m.pending + '' : '') + + '
' + + '
'; + } + html += '
'; + wrap.innerHTML = html; +} + +// Model Calling 独立短间隔轮询 +var _mcTimer = null; +function startModelCallingPoll() { + stopModelCallingPoll(); + function tick() { + fetchJSON('/api/status').then(function(status) { + updateModelCalling(status); + }).catch(function() {}); + } + tick(); + _mcTimer = setInterval(tick, 5000); +} +function stopModelCallingPoll() { + if (_mcTimer) { clearInterval(_mcTimer); _mcTimer = null; } +} + // ── 按 tiers 顺序排序 vendor 列表 ───────────────────────── function sortByTierOrder(vendors, tierOrder) { if (!tierOrder || !tierOrder.length) return vendors.sort(); @@ -1713,6 +1872,7 @@ def _build_favicon() -> bytes: updateKPI(summary); updateVendorStatus(status); + updateModelCalling(status); updateChartTitles(days); const rows = timeline.rows || []; @@ -1788,6 +1948,8 @@ def _build_favicon() -> bytes: currentTab = name; applyTabState(name); syncTabUrl(name); + // Model Calling 轮询随页签切换启停 + if (name === 'overview') { startModelCallingPoll(); } else { stopModelCallingPoll(); } refresh(); } @@ -1807,6 +1969,7 @@ def _build_favicon() -> bytes: }).catch(function(){}); refresh(); // 仅加载初始页签的数据 setInterval(refresh, 600000); // 每 10 分钟刷新当前页签 + if (initial === 'overview') startModelCallingPoll(); })(); diff --git a/src/coding/proxy/vendors/concurrency.py b/src/coding/proxy/vendors/concurrency.py index b4f4df7..148bb53 100644 --- a/src/coding/proxy/vendors/concurrency.py +++ b/src/coding/proxy/vendors/concurrency.py @@ -67,10 +67,15 @@ def get_diagnostics(self) -> dict[str, dict[str, int]]: limit = self._config.get_limit(model) # asyncio.Semaphore 内部 _value 表示剩余可用槽位 available = sem._value # noqa: SLF001 — 公开 API 未暴露 + in_use = max(limit - available, 0) + # _waiters 为正在排队等待的协程集合,无等待者时为 None + waiters = getattr(sem, "_waiters", None) # noqa: SLF001 + pending = len(waiters) if waiters else 0 snapshot[model] = { "limit": limit, - "in_use": max(limit - available, 0), + "in_use": in_use, "available": max(available, 0), + "pending": pending, } return snapshot diff --git a/src/coding/proxy/vendors/zhipu.py b/src/coding/proxy/vendors/zhipu.py index ff186cd..b6de695 100644 --- a/src/coding/proxy/vendors/zhipu.py +++ b/src/coding/proxy/vendors/zhipu.py @@ -206,6 +206,15 @@ async def _maybe_acquire_concurrency_slot( return None return await self._concurrency_limiter.acquire(mapped_model) + # ── 诊断信息 ───────────────────────────────────────────── + + def get_diagnostics(self) -> dict[str, Any]: + """返回供应商运行时诊断信息,包含每模型并发状态.""" + diagnostics = super().get_diagnostics() + if self._concurrency_limiter is not None: + diagnostics["concurrency"] = self._concurrency_limiter.get_diagnostics() + return diagnostics + # ── 延迟计算 ──────────────────────────────────────────── def _compute_retry_delay_from_headers(