From ac1e2f01ecc2a6c3c2c8e02b43a18e26c5a50fb2 Mon Sep 17 00:00:00 2001 From: nagar-decart Date: Sun, 19 Apr 2026 15:19:47 +0300 Subject: [PATCH 01/16] Add livekit transport opt-in alongside aiortc MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Side-by-side WebRTC transport support for the inference server's new livekit path. aiortc stays the default and is fully back-compat. - packages/sdk/src/realtime/transports/livekit.ts: new LiveKitConnection. Public surface (connect/send/cleanup/ getPeerConnection/websocketMessagesEmitter/setImageBase64/state) matches WebRTCConnection so WebRTCManager can swap implementations. Control WS is identical (prompt / set_image / session_id / tick acks); the only differences are the media handshake (livekit_join → livekit_room_info, then Room.connect + publishTrack). - packages/sdk/src/realtime/transports/index.ts: shared TransportKind type + re-exports. - packages/sdk/src/realtime/webrtc-manager.ts: gains an optional transport: "aiortc" | "livekit" field in WebRTCConfig. The constructor dispatches to LiveKitConnection when opted in, WebRTCConnection otherwise. All manager state machine logic (reconnect, buffer, emit) is transport-agnostic. - packages/sdk/src/realtime/client.ts: RealTimeClientConnectOptions now accepts `transport`; it's threaded into the manager config. - package.json: adds livekit-client ^2.0.0. Typecheck passes; all 145 existing unit tests still pass. Co-Authored-By: Claude Opus 4.7 (1M context) --- packages/sdk/package.json | 1 + packages/sdk/src/realtime/client.ts | 6 + packages/sdk/src/realtime/transports/index.ts | 21 + .../sdk/src/realtime/transports/livekit.ts | 383 ++++++++++++++++++ packages/sdk/src/realtime/webrtc-manager.ts | 35 +- pnpm-lock.yaml | 289 +++++++++---- 6 files changed, 647 insertions(+), 88 deletions(-) create mode 100644 packages/sdk/src/realtime/transports/index.ts create mode 100644 packages/sdk/src/realtime/transports/livekit.ts diff --git a/packages/sdk/package.json b/packages/sdk/package.json index 393c82c1..2088643d 100644 --- a/packages/sdk/package.json +++ b/packages/sdk/package.json @@ -56,6 +56,7 @@ "vitest": "^4.0.18" }, "dependencies": { + "livekit-client": "^2.0.0", "mitt": "^3.0.1", "p-retry": "^6.2.1", "zod": "^4.0.17" diff --git a/packages/sdk/src/realtime/client.ts b/packages/sdk/src/realtime/client.ts index f1869b0a..f3a59f72 100644 --- a/packages/sdk/src/realtime/client.ts +++ b/packages/sdk/src/realtime/client.ts @@ -93,6 +93,11 @@ const realTimeClientConnectOptionsSchema = z.object({ }), initialState: realTimeClientInitialStateSchema.optional(), customizeOffer: createAsyncFunctionSchema(z.function()).optional(), + // Opt-in per-session WebRTC transport. Defaults to "aiortc" (current + // shipping behavior). Set to "livekit" to join a LiveKit SFU room; the + // inference pod must have livekit in TRANSPORTS_ENABLED or the session + // will be rejected. + transport: z.enum(["aiortc", "livekit"]).optional(), }); export type RealTimeClientConnectOptions = Omit, "model"> & { model: ModelDefinition | CustomModelDefinition; @@ -194,6 +199,7 @@ export const createRealTimeClient = (opts: RealTimeClientOptions) => { modelName: options.model.name, initialImage, initialPrompt, + transport: options.transport, }); const manager = webrtcManager; diff --git a/packages/sdk/src/realtime/transports/index.ts b/packages/sdk/src/realtime/transports/index.ts new file mode 100644 index 00000000..23cb1876 --- /dev/null +++ b/packages/sdk/src/realtime/transports/index.ts @@ -0,0 +1,21 @@ +/** + * Transport dispatch for the realtime SDK. + * + * Today the SDK ships two transports: + * - aiortc (default, shipping): raw RTCPeerConnection + WebSocket signaling. + * The inference server handles media via aiortc server-side. + * - livekit (opt-in): joins a LiveKit SFU room; the inference server + * publishes/subscribes in the same room. + * + * Both transports talk to bouncer via the same WS URL. The transport-specific + * difference is only in the media setup handshake (SDP offer vs. room_info) + * and which media stack moves frames. Control messages (prompt, set_image, + * session_id, generation_tick, acks) flow over the bouncer WS for both. + */ + +export type TransportKind = "aiortc" | "livekit"; + +// Re-export the two concrete connections via a named surface so consumers +// (WebRTCManager) don't need to import from each file individually. +export { WebRTCConnection as AiortcConnection } from "../webrtc-connection"; +export { LiveKitConnection } from "./livekit"; diff --git a/packages/sdk/src/realtime/transports/livekit.ts b/packages/sdk/src/realtime/transports/livekit.ts new file mode 100644 index 00000000..3ffeadf6 --- /dev/null +++ b/packages/sdk/src/realtime/transports/livekit.ts @@ -0,0 +1,383 @@ +/** + * LiveKit transport for the realtime SDK. + * + * Control flow mirrors WebRTCConnection (same WS URL, same control messages + * for prompt/set_image/init/session_id/generation_tick). The only differences + * are in the media handshake: + * + * Client → bouncer WS: { type: "livekit_join" } + * bouncer/inference → { type: "livekit_room_info", livekit_url, token, room_name } + * Client → LiveKit SFU: Room.connect(url, token) + publishTrack(...) + * + * Public surface matches WebRTCConnection enough that WebRTCManager can swap + * implementations behind a `transport` option. + */ + +import mitt from "mitt"; +import { + ConnectionState as LKConnectionState, + Room, + RoomEvent, + Track, + TrackEvent, + type RemoteTrack, + type RemoteTrackPublication, + type RemoteParticipant, +} from "livekit-client"; + +import type { Logger } from "../../utils/logger"; +import { buildUserAgent } from "../../utils/user-agent"; +import type { DiagnosticEmitter } from "../diagnostics"; +import type { + ConnectionState, + GenerationTickMessage, + IncomingWebRTCMessage, + OutgoingWebRTCMessage, + PromptAckMessage, + SessionIdMessage, + SetImageAckMessage, +} from "../types"; + +const AVATAR_SETUP_TIMEOUT_MS = 30_000; +const ROOM_INFO_TIMEOUT_MS = 15_000; + +interface LiveKitCallbacks { + onRemoteStream?: (stream: MediaStream) => void; + onStateChange?: (state: ConnectionState) => void; + onError?: (error: Error) => void; + modelName?: string; + initialImage?: string; + initialPrompt?: { text: string; enhance?: boolean }; + logger?: Logger; + onDiagnostic?: DiagnosticEmitter; +} + +type WsMessageEvents = { + promptAck: PromptAckMessage; + setImageAck: SetImageAckMessage; + sessionId: SessionIdMessage; + generationTick: GenerationTickMessage; +}; + +const noopDiagnostic: DiagnosticEmitter = () => {}; + +interface RoomInfoMessage { + type: "livekit_room_info"; + livekit_url: string; + token: string; + room_name: string; +} + +export class LiveKitConnection { + private ws: WebSocket | null = null; + private room: Room | null = null; + private localStream: MediaStream | null = null; + private connectionReject: ((error: Error) => void) | null = null; + private logger: Logger; + private emitDiagnostic: DiagnosticEmitter; + state: ConnectionState = "disconnected"; + websocketMessagesEmitter = mitt(); + + constructor(private callbacks: LiveKitCallbacks = {}) { + this.logger = callbacks.logger ?? { debug() {}, info() {}, warn() {}, error() {} }; + this.emitDiagnostic = callbacks.onDiagnostic ?? noopDiagnostic; + } + + getPeerConnection(): RTCPeerConnection | null { + // No raw PC for the LiveKit transport. Stats come from room.getStats(). + return null; + } + + async connect(url: string, localStream: MediaStream | null, timeout: number, integration?: string): Promise { + this.localStream = localStream; + + // Append user agent exactly like the aiortc transport. + const userAgent = encodeURIComponent(buildUserAgent(integration)); + const separator = url.includes("?") ? "&" : "?"; + const wsUrl = `${url}${separator}user_agent=${userAgent}`; + + let rejectConnect!: (error: Error) => void; + const connectAbort = new Promise((_, reject) => { + rejectConnect = reject; + }); + connectAbort.catch(() => {}); + this.connectionReject = (error) => rejectConnect(error); + + try { + // Phase 1 — control WS + livekit_join/livekit_room_info handshake. + const roomInfoStart = performance.now(); + await this.openControlWs(wsUrl, timeout); + const roomInfo = await this.requestRoomInfo(); + this.emitDiagnostic("phaseTiming", { + phase: "websocket", + durationMs: performance.now() - roomInfoStart, + success: true, + }); + + // Phase 2 — join the SFU room and publish local tracks. + const roomStart = performance.now(); + await this.joinRoom(roomInfo); + this.emitDiagnostic("phaseTiming", { + phase: "webrtc-handshake", + durationMs: performance.now() - roomStart, + success: true, + }); + + // Phase 3 — optional: send initial prompt over control WS. + if (this.callbacks.initialPrompt) { + await this.sendInitialPrompt(this.callbacks.initialPrompt); + } + + this.setState("connected"); + } catch (error) { + this.cleanup(); + throw error; + } finally { + this.connectionReject = null; + } + } + + send(message: OutgoingWebRTCMessage): boolean { + if (this.ws?.readyState === WebSocket.OPEN) { + this.ws.send(JSON.stringify(message)); + return true; + } + this.logger.warn("Message dropped: WebSocket is not open"); + return false; + } + + async setImageBase64( + imageBase64: string | null, + options?: { prompt?: string | null; enhance?: boolean; timeout?: number }, + ): Promise { + return new Promise((resolve, reject) => { + const timeoutId = setTimeout(() => { + this.websocketMessagesEmitter.off("setImageAck", listener); + reject(new Error("Image send timed out")); + }, options?.timeout ?? AVATAR_SETUP_TIMEOUT_MS); + + const listener = (msg: SetImageAckMessage) => { + clearTimeout(timeoutId); + this.websocketMessagesEmitter.off("setImageAck", listener); + if (msg.success) { + resolve(); + } else { + reject(new Error(msg.error ?? "Failed to send image")); + } + }; + this.websocketMessagesEmitter.on("setImageAck", listener); + + const message: { + type: "set_image"; + image_data: string | null; + prompt?: string | null; + enhance_prompt?: boolean; + } = { type: "set_image", image_data: imageBase64 }; + if (options?.prompt !== undefined) message.prompt = options.prompt; + if (options?.enhance !== undefined) message.enhance_prompt = options.enhance; + + if (!this.send(message)) { + clearTimeout(timeoutId); + this.websocketMessagesEmitter.off("setImageAck", listener); + reject(new Error("WebSocket is not open")); + } + }); + } + + cleanup(): void { + this.setState("disconnected"); + if (this.room) { + // Fire and forget — disconnect is async but we don't want to await + // during cleanup paths. + this.room.disconnect().catch(() => {}); + this.room = null; + } + if (this.ws) { + try { + this.ws.close(); + } catch { + // ignore + } + this.ws = null; + } + this.localStream = null; + } + + // ------------------------------------------------------------------------- + // Private — control WS + // ------------------------------------------------------------------------- + + private async openControlWs(wsUrl: string, timeout: number): Promise { + await new Promise((resolve, reject) => { + const timer = setTimeout(() => reject(new Error("WebSocket timeout")), timeout); + this.ws = new WebSocket(wsUrl); + this.ws.onopen = () => { + clearTimeout(timer); + resolve(); + }; + this.ws.onclose = (e) => { + this.logger.info("LiveKit control WS closed", { code: e.code, reason: e.reason }); + // If the room is still connecting this also aborts the connect flow. + this.connectionReject?.(new Error(`WebSocket closed: ${e.code} ${e.reason}`)); + if (!this.room || this.room.state !== LKConnectionState.Connected) { + this.setState("disconnected"); + } + }; + this.ws.onerror = () => { + // Error events don't carry details; onclose handles state transitions. + }; + this.ws.onmessage = (e) => { + try { + this.handleControlMessage(JSON.parse(e.data)); + } catch (error) { + this.logger.error("LiveKit control WS message parse error", { error: String(error) }); + } + }; + }); + } + + private async requestRoomInfo(): Promise { + this.send({ type: "livekit_join" } as unknown as OutgoingWebRTCMessage); + return await new Promise((resolve, reject) => { + const timer = setTimeout(() => { + cleanup(); + reject(new Error(`livekit_room_info timeout (${ROOM_INFO_TIMEOUT_MS}ms)`)); + }, ROOM_INFO_TIMEOUT_MS); + + const handler = (msg: IncomingWebRTCMessage | RoomInfoMessage) => { + if ((msg as RoomInfoMessage).type === "livekit_room_info") { + cleanup(); + resolve(msg as RoomInfoMessage); + } else if ((msg as { type: string }).type === "error") { + cleanup(); + reject(new Error((msg as { error?: string }).error ?? "server error")); + } + }; + const cleanup = () => { + clearTimeout(timer); + this.pendingRoomInfoResolvers = this.pendingRoomInfoResolvers.filter((h) => h !== handler); + }; + this.pendingRoomInfoResolvers.push(handler); + }); + } + + private pendingRoomInfoResolvers: Array<(msg: IncomingWebRTCMessage | RoomInfoMessage) => void> = []; + + private handleControlMessage(msg: IncomingWebRTCMessage | RoomInfoMessage): void { + // First give pending livekit_room_info awaiters a chance. + for (const resolver of [...this.pendingRoomInfoResolvers]) { + resolver(msg); + } + + // Then fan out control-plane acks to the shared emitter (same events + // WebRTCConnection emits so RealTimeClient consumes both identically). + const typed = msg as IncomingWebRTCMessage; + switch (typed.type) { + case "prompt_ack": + this.websocketMessagesEmitter.emit("promptAck", typed); + break; + case "set_image_ack": + this.websocketMessagesEmitter.emit("setImageAck", typed); + break; + case "session_id": + this.websocketMessagesEmitter.emit("sessionId", typed); + break; + case "generation_tick": + this.websocketMessagesEmitter.emit("generationTick", typed); + break; + } + } + + // ------------------------------------------------------------------------- + // Private — LiveKit room + // ------------------------------------------------------------------------- + + private async joinRoom(info: RoomInfoMessage): Promise { + this.room = new Room({ + adaptiveStream: false, + dynacast: false, + }); + + this.room.on(RoomEvent.TrackSubscribed, (track: RemoteTrack, _pub: RemoteTrackPublication, _p: RemoteParticipant) => { + if (track.kind === Track.Kind.Video || track.kind === Track.Kind.Audio) { + // Compose a MediaStream for the SDK consumer. We attach the track + // element so downstream TrackEvent.VideoPlaybackStarted fires and + // the browser actually starts decoding — the element isn't kept. + track.attach(); + const mediaStreamTrack = track.mediaStreamTrack; + if (mediaStreamTrack) { + const stream = new MediaStream([mediaStreamTrack]); + this.callbacks.onRemoteStream?.(stream); + } + track.on(TrackEvent.VideoPlaybackStarted, () => { + this.setState("generating"); + }); + } + }); + + this.room.on(RoomEvent.Connected, () => { + this.logger.info("LiveKit room connected"); + }); + this.room.on(RoomEvent.Disconnected, (reason?: unknown) => { + this.logger.info("LiveKit room disconnected", { reason: String(reason) }); + this.setState("disconnected"); + }); + + await this.room.connect(info.livekit_url, info.token); + + // Publish local tracks. Inference server expects a video track; audio is optional. + if (this.localStream) { + for (const track of this.localStream.getTracks()) { + if (track.kind === "video") { + await this.room.localParticipant.publishTrack(track, { + simulcast: true, + source: Track.Source.Camera, + }); + } else { + await this.room.localParticipant.publishTrack(track); + } + } + } + } + + private async sendInitialPrompt(prompt: { text: string; enhance?: boolean }): Promise { + return new Promise((resolve, reject) => { + const timeoutId = setTimeout(() => { + this.websocketMessagesEmitter.off("promptAck", listener); + reject(new Error("Prompt send timed out")); + }, AVATAR_SETUP_TIMEOUT_MS); + + const listener = (msg: PromptAckMessage) => { + if (msg.prompt === prompt.text) { + clearTimeout(timeoutId); + this.websocketMessagesEmitter.off("promptAck", listener); + if (msg.success) { + resolve(); + } else { + reject(new Error(msg.error ?? "Failed to send prompt")); + } + } + }; + this.websocketMessagesEmitter.on("promptAck", listener); + + const message = { + type: "prompt", + prompt: prompt.text, + enhance: prompt.enhance ?? false, + } as unknown as OutgoingWebRTCMessage; + + if (!this.send(message)) { + clearTimeout(timeoutId); + this.websocketMessagesEmitter.off("promptAck", listener); + reject(new Error("WebSocket is not open")); + } + }); + } + + private setState(state: ConnectionState): void { + if (this.state !== state) { + this.state = state; + this.callbacks.onStateChange?.(state); + } + } +} diff --git a/packages/sdk/src/realtime/webrtc-manager.ts b/packages/sdk/src/realtime/webrtc-manager.ts index 71408fb7..c5cfe354 100644 --- a/packages/sdk/src/realtime/webrtc-manager.ts +++ b/packages/sdk/src/realtime/webrtc-manager.ts @@ -2,9 +2,15 @@ import pRetry, { AbortError } from "p-retry"; import type { Logger } from "../utils/logger"; import type { DiagnosticEmitter } from "./diagnostics"; +import { LiveKitConnection } from "./transports/livekit"; +import type { TransportKind } from "./transports"; import type { ConnectionState, OutgoingMessage } from "./types"; import { WebRTCConnection } from "./webrtc-connection"; +// Shared shape both connection types expose — narrows the union for +// WebRTCManager so both transports can be driven uniformly. +type TransportConnection = WebRTCConnection | LiveKitConnection; + export interface WebRTCConfig { webrtcUrl: string; integration?: string; @@ -19,6 +25,13 @@ export interface WebRTCConfig { modelName?: string; initialImage?: string; initialPrompt?: { text: string; enhance?: boolean }; + /** + * Selects the underlying WebRTC transport. Default is "aiortc" for + * back-compat with existing deployments. Set to "livekit" to join a + * LiveKit SFU room (requires the inference pod to enable it in + * TRANSPORTS_ENABLED). + */ + transport?: TransportKind; } const PERMANENT_ERRORS = [ @@ -40,7 +53,7 @@ const RETRY_OPTIONS = { } as const; export class WebRTCManager { - private connection: WebRTCConnection; + private connection: TransportConnection; private config: WebRTCConfig; private logger: Logger; private localStream: MediaStream | null = null; @@ -54,19 +67,27 @@ export class WebRTCManager { constructor(config: WebRTCConfig) { this.config = config; this.logger = config.logger ?? { debug() {}, info() {}, warn() {}, error() {} }; - this.connection = new WebRTCConnection({ + const transport: TransportKind = config.transport ?? "aiortc"; + const sharedOpts = { onRemoteStream: config.onRemoteStream, - onStateChange: (state) => this.handleConnectionStateChange(state), + onStateChange: (state: ConnectionState) => this.handleConnectionStateChange(state), onError: config.onError, - customizeOffer: config.customizeOffer, - vp8MinBitrate: config.vp8MinBitrate, - vp8StartBitrate: config.vp8StartBitrate, modelName: config.modelName, initialImage: config.initialImage, initialPrompt: config.initialPrompt, logger: this.logger, onDiagnostic: config.onDiagnostic, - }); + }; + if (transport === "livekit") { + this.connection = new LiveKitConnection(sharedOpts); + } else { + this.connection = new WebRTCConnection({ + ...sharedOpts, + customizeOffer: config.customizeOffer, + vp8MinBitrate: config.vp8MinBitrate, + vp8StartBitrate: config.vp8StartBitrate, + }); + } } private emitState(state: ConnectionState): void { diff --git a/pnpm-lock.yaml b/pnpm-lock.yaml index a766cc3c..a85a9f2e 100644 --- a/pnpm-lock.yaml +++ b/pnpm-lock.yaml @@ -317,6 +317,9 @@ importers: packages/sdk: dependencies: + livekit-client: + specifier: ^2.0.0 + version: 2.18.3(@types/dom-mediacapture-record@1.0.22) mitt: specifier: ^3.0.1 version: 3.0.1 @@ -522,6 +525,9 @@ packages: cpu: [x64] os: [win32] + '@bufbuild/protobuf@1.10.1': + resolution: {integrity: sha512-wJ8ReQbHxsAfXhrf9ixl0aYbZorRuOWpBNzm8pL8ftmSxQx/wnJD5Eg861NwJU/czy2VXFIebCeZnZrI9rktIQ==} + '@bundled-es-modules/cookie@2.0.1': resolution: {integrity: sha512-8o+5fRPLNbjbdGRRmJj3h6Hh1AQJf2dk3qQ/5ZFb+PXkRNiSoMGGUKlsgLfrxneb72axVJyIYji64E2+nNfYyw==} @@ -578,14 +584,17 @@ packages: resolution: {integrity: sha512-IchNf6dN4tHoMFIn/7OE8LWZ19Y6q/67Bmf6vnGREv8RSbBVb9LPJxEcnwrcwX6ixSvaiGoomAUvu4YSxXrVgw==} engines: {node: '>=12'} - '@emnapi/core@1.7.1': - resolution: {integrity: sha512-o1uhUASyo921r2XtHYOHy7gdkGLge8ghBEQHMWmyJFoXlpU58kIrhhN3w26lpQb6dspetweapMn2CSNwQ8I4wg==} + '@emnapi/core@1.9.2': + resolution: {integrity: sha512-UC+ZhH3XtczQYfOlu3lNEkdW/p4dsJ1r/bP7H8+rhao3TTTMO1ATq/4DdIi23XuGoFY+Cz0JmCbdVl0hz9jZcA==} '@emnapi/runtime@1.7.1': resolution: {integrity: sha512-PVtJr5CmLwYAU9PZDMITZoR5iAOShYREoR45EyyLrbntV50mdePTgUn4AmOw90Ifcj+x2kRjdzr1HP3RrNiHGA==} - '@emnapi/wasi-threads@1.1.0': - resolution: {integrity: sha512-WI0DdZ8xFSbgMjR1sFsKABJ/C5OnRrjT06JXbZKexJGrDuPTzZdDYfFlsgcCXCyf+suG5QU2e/y1Wo2V/OapLQ==} + '@emnapi/runtime@1.9.2': + resolution: {integrity: sha512-3U4+MIWHImeyu1wnmVygh5WlgfYDtyf0k8AbLhMFxOipihf6nrWC4syIm/SwEeec0mNSafiiNnMJwbza/Is6Lw==} + + '@emnapi/wasi-threads@1.2.1': + resolution: {integrity: sha512-uTII7OYF+/Mes/MrcIOYp5yOtSMLBWSIoLPpcgwipoiKbli6k322tcoFsxoIIxPDqW01SQGAgko4EzZi2BNv2w==} '@esbuild-plugins/node-globals-polyfill@0.2.3': resolution: {integrity: sha512-r3MIryXDeXDOZh7ih1l/yE9ZLORCd5e8vWg02azWRGj5SPTuoh69A2AIyn0Z31V/kHBfZ4HgWJ+OK3GTTwLmnw==} @@ -1345,12 +1354,21 @@ packages: resolution: {integrity: sha512-f5DRIOZf7wxogefH03RjMPMdBF7ADTWUMoOs9kaJo06EfwF+aFhMZMDZxHg/Xe12hptN9xoZjGso2fdjapBRIA==} engines: {node: '>=10'} + '@livekit/mutex@1.1.1': + resolution: {integrity: sha512-EsshAucklmpuUAfkABPxJNhzj9v2sG7JuzFDL4ML1oJQSV14sqrpTYnsaOudMAw9yOaW53NU3QQTlUQoRs4czw==} + + '@livekit/protocol@1.45.3': + resolution: {integrity: sha512-WmMxBTsy4dRBqcrswFwUUlgq3Z0nnhOqKR6tX749Rb/PcB1yBMUtrHxZvcsS6qi3/5+86zHeVG+exmu1sZqfJg==} + '@mswjs/interceptors@0.39.7': resolution: {integrity: sha512-sURvQbbKsq5f8INV54YJgJEdk8oxBanqkTiXXd33rKmofFCwZLhLRszPduMZ9TA9b8/1CHc/IJmOlBHJk2Q5AQ==} engines: {node: '>=18'} - '@napi-rs/wasm-runtime@1.1.1': - resolution: {integrity: sha512-p64ah1M1ld8xjWv3qbvFwHiFVWrq1yFvV4f7w+mzaqiR4IlSgkqhcRdHwsGgomwzBH51sRY4NEowLxnaBjcW/A==} + '@napi-rs/wasm-runtime@1.1.4': + resolution: {integrity: sha512-3NQNNgA1YSlJb/kMH1ildASP9HW7/7kYnRI2szWJaofaS1hWmbGI4H+d3+22aGzXXN9IJ+n+GiFVcGipJP18ow==} + peerDependencies: + '@emnapi/core': ^1.7.1 + '@emnapi/runtime': ^1.7.1 '@next/env@15.5.7': resolution: {integrity: sha512-4h6Y2NyEkIEN7Z8YxkA27pq6zTkS09bUSYC0xjd0NpwFxjnIKeZEeH591o5WECSmjpUhLn3H2QLJcDye3Uzcvg==} @@ -1535,8 +1553,8 @@ packages: '@open-draft/until@2.1.0': resolution: {integrity: sha512-U69T3ItWHvLwGg5eJ0n3I62nWuE6ilHlmz7zM0npLBRvPRd7e6NYmg54vvRtP5mZG7kZqZCFVdsTWo7BPtBujg==} - '@oxc-project/types@0.114.0': - resolution: {integrity: sha512-//nBfbzHQHvJs8oFIjv6coZ6uxQ4alLfiPe6D5vit6c4pmxATHHlVwgB1k+Hv4yoAMyncdxgRBF5K4BYWUCzvA==} + '@oxc-project/types@0.126.0': + resolution: {integrity: sha512-oGfVtjAgwQVVpfBrbtk4e1XDyWHRFta6BS3GWVzrF8xYBT2VGQAk39yJS/wFSMrZqoiCU4oghT3Ch0HaHGIHcQ==} '@polka/url@1.0.0-next.29': resolution: {integrity: sha512-wwQAWhWSuHaag8c4q/KN/vCoeOJYshAIvMQwD4GpSb3OiZklFfvAgmj0VCBBImRpuF/aFgIRzllXlVX93Jevww==} @@ -1544,79 +1562,91 @@ packages: '@quansync/fs@0.1.4': resolution: {integrity: sha512-vy/41FCdnIalPTQCb2Wl0ic1caMdzGus4ktDp+gpZesQNydXcx8nhh8qB3qMPbGkictOTaXgXEUUfQEm8DQYoA==} - '@rolldown/binding-android-arm64@1.0.0-rc.5': - resolution: {integrity: sha512-zCEmUrt1bggwgBgeKLxNj217J1OrChrp3jJt24VK9jAharSTeVaHODNL+LpcQVhRz+FktYWfT9cjo5oZ99ZLpg==} + '@rolldown/binding-android-arm64@1.0.0-rc.16': + resolution: {integrity: sha512-rhY3k7Bsae9qQfOtph2Pm2jZEA+s8Gmjoz4hhmx70K9iMQ/ddeae+xhRQcM5IuVx5ry1+bGfkvMn7D6MJggVSA==} engines: {node: ^20.19.0 || >=22.12.0} cpu: [arm64] os: [android] - '@rolldown/binding-darwin-arm64@1.0.0-rc.5': - resolution: {integrity: sha512-ZP9xb9lPAex36pvkNWCjSEJW/Gfdm9I3ssiqOFLmpZ/vosPXgpoGxCmh+dX1Qs+/bWQE6toNFXWWL8vYoKoK9Q==} + '@rolldown/binding-darwin-arm64@1.0.0-rc.16': + resolution: {integrity: sha512-rNz0yK078yrNn3DrdgN+PKiMOW8HfQ92jQiXxwX8yW899ayV00MLVdaCNeVBhG/TbH3ouYVObo8/yrkiectkcQ==} engines: {node: ^20.19.0 || >=22.12.0} cpu: [arm64] os: [darwin] - '@rolldown/binding-darwin-x64@1.0.0-rc.5': - resolution: {integrity: sha512-7IdrPunf6dp9mywMgTOKMMGDnMHQ6+h5gRl6LW8rhD8WK2kXX0IwzcM5Zc0B5J7xQs8QWOlKjv8BJsU/1CD3pg==} + '@rolldown/binding-darwin-x64@1.0.0-rc.16': + resolution: {integrity: sha512-r/OmdR00HmD4i79Z//xO06uEPOq5hRXdhw7nzkxQxwSavs3PSHa1ijntdpOiZ2mzOQ3fVVu8C1M19FoNM+dMUQ==} engines: {node: ^20.19.0 || >=22.12.0} cpu: [x64] os: [darwin] - '@rolldown/binding-freebsd-x64@1.0.0-rc.5': - resolution: {integrity: sha512-o/JCk+dL0IN68EBhZ4DqfsfvxPfMeoM6cJtxORC1YYoxGHZyth2Kb2maXDb4oddw2wu8iIbnYXYPEzBtAF5CAg==} + '@rolldown/binding-freebsd-x64@1.0.0-rc.16': + resolution: {integrity: sha512-KcRE5w8h0OnjUatG8pldyD14/CQ5Phs1oxfR+3pKDjboHRo9+MkqQaiIZlZRpsxC15paeXme/I127tUa9TXJ6g==} engines: {node: ^20.19.0 || >=22.12.0} cpu: [x64] os: [freebsd] - '@rolldown/binding-linux-arm-gnueabihf@1.0.0-rc.5': - resolution: {integrity: sha512-IIBwTtA6VwxQLcEgq2mfrUgam7VvPZjhd/jxmeS1npM+edWsrrpRLHUdze+sk4rhb8/xpP3flemgcZXXUW6ukw==} + '@rolldown/binding-linux-arm-gnueabihf@1.0.0-rc.16': + resolution: {integrity: sha512-bT0guA1bpxEJ/ZhTRniQf7rNF8ybvXOuWbNIeLABaV5NGjx4EtOWBTSRGWFU9ZWVkPOZ+HNFP8RMcBokBiZ0Kg==} engines: {node: ^20.19.0 || >=22.12.0} cpu: [arm] os: [linux] - '@rolldown/binding-linux-arm64-gnu@1.0.0-rc.5': - resolution: {integrity: sha512-KSol1De1spMZL+Xg7K5IBWXIvRWv7+pveaxFWXpezezAG7CS6ojzRjtCGCiLxQricutTAi/LkNWKMsd2wNhMKQ==} + '@rolldown/binding-linux-arm64-gnu@1.0.0-rc.16': + resolution: {integrity: sha512-+tHktCHWV8BDQSjemUqm/Jl/TPk3QObCTIjmdDy/nlupcujZghmKK2962LYrqFpWu+ai01AN/REOH3NEpqvYQg==} engines: {node: ^20.19.0 || >=22.12.0} cpu: [arm64] os: [linux] - '@rolldown/binding-linux-arm64-musl@1.0.0-rc.5': - resolution: {integrity: sha512-WFljyDkxtXRlWxMjxeegf7xMYXxUr8u7JdXlOEWKYgDqEgxUnSEsVDxBiNWQ1D5kQKwf8Wo4sVKEYPRhCdsjwA==} + '@rolldown/binding-linux-arm64-musl@1.0.0-rc.16': + resolution: {integrity: sha512-3fPzdREH806oRLxpTWW1Gt4tQHs0TitZFOECB2xzCFLPKnSOy90gwA7P29cksYilFO6XVRY1kzga0cL2nRjKPg==} engines: {node: ^20.19.0 || >=22.12.0} cpu: [arm64] os: [linux] - '@rolldown/binding-linux-x64-gnu@1.0.0-rc.5': - resolution: {integrity: sha512-CUlplTujmbDWp2gamvrqVKi2Or8lmngXT1WxsizJfts7JrvfGhZObciaY/+CbdbS9qNnskvwMZNEhTPrn7b+WA==} + '@rolldown/binding-linux-ppc64-gnu@1.0.0-rc.16': + resolution: {integrity: sha512-EKwI1tSrLs7YVw+JPJT/G2dJQ1jl9qlTTTEG0V2Ok/RdOenRfBw2PQdLPyjhIu58ocdBfP7vIRN/pvMsPxs/AQ==} + engines: {node: ^20.19.0 || >=22.12.0} + cpu: [ppc64] + os: [linux] + + '@rolldown/binding-linux-s390x-gnu@1.0.0-rc.16': + resolution: {integrity: sha512-Uknladnb3Sxqu6SEcqBldQyJUpk8NleooZEc0MbRBJ4inEhRYWZX0NJu12vNf2mqAq7gsofAxHrGghiUYjhaLQ==} + engines: {node: ^20.19.0 || >=22.12.0} + cpu: [s390x] + os: [linux] + + '@rolldown/binding-linux-x64-gnu@1.0.0-rc.16': + resolution: {integrity: sha512-FIb8+uG49sZBtLTn+zt1AJ20TqVcqWeSIyoVt0or7uAWesgKaHbiBh6OpA/k9v0LTt+PTrb1Lao133kP4uVxkg==} engines: {node: ^20.19.0 || >=22.12.0} cpu: [x64] os: [linux] - '@rolldown/binding-linux-x64-musl@1.0.0-rc.5': - resolution: {integrity: sha512-wdf7g9NbVZCeAo2iGhsjJb7I8ZFfs6X8bumfrWg82VK+8P6AlLXwk48a1ASiJQDTS7Svq2xVzZg3sGO2aXpHRA==} + '@rolldown/binding-linux-x64-musl@1.0.0-rc.16': + resolution: {integrity: sha512-RuERhF9/EgWxZEXYWCOaViUWHIboceK4/ivdtQ3R0T44NjLkIIlGIAVAuCddFxsZ7vnRHtNQUrt2vR2n2slB2w==} engines: {node: ^20.19.0 || >=22.12.0} cpu: [x64] os: [linux] - '@rolldown/binding-openharmony-arm64@1.0.0-rc.5': - resolution: {integrity: sha512-0CWY7ubu12nhzz+tkpHjoG3IRSTlWYe0wrfJRf4qqjqQSGtAYgoL9kwzdvlhaFdZ5ffVeyYw9qLsChcjUMEloQ==} + '@rolldown/binding-openharmony-arm64@1.0.0-rc.16': + resolution: {integrity: sha512-mXcXnvd9GpazCxeUCCnZ2+YF7nut+ZOEbE4GtaiPtyY6AkhZWbK70y1KK3j+RDhjVq5+U8FySkKRb/+w0EeUwA==} engines: {node: ^20.19.0 || >=22.12.0} cpu: [arm64] os: [openharmony] - '@rolldown/binding-wasm32-wasi@1.0.0-rc.5': - resolution: {integrity: sha512-LztXnGzv6t2u830mnZrFLRVqT/DPJ9DL4ZTz/y93rqUVkeHjMMYIYaFj+BUthiYxbVH9dH0SZYufETspKY/NhA==} - engines: {node: '>=14.0.0'} + '@rolldown/binding-wasm32-wasi@1.0.0-rc.16': + resolution: {integrity: sha512-3Q2KQxnC8IJOLqXmUMoYwyIPZU9hzRbnHaoV3Euz+VVnjZKcY8ktnNP8T9R4/GGQtb27C/UYKABxesKWb8lsvQ==} + engines: {node: ^20.19.0 || >=22.12.0} cpu: [wasm32] - '@rolldown/binding-win32-arm64-msvc@1.0.0-rc.5': - resolution: {integrity: sha512-jUct1XVeGtyjqJXEAfvdFa8xoigYZ2rge7nYEm70ppQxpfH9ze2fbIrpHmP2tNM2vL/F6Dd0CpXhpjPbC6bSxQ==} + '@rolldown/binding-win32-arm64-msvc@1.0.0-rc.16': + resolution: {integrity: sha512-tj7XRemQcOcFwv7qhpUxMTBbI5mWMlE4c1Omhg5+h8GuLXzyj8HviYgR+bB2DMDgRqUE+jiDleqSCRjx4aYk/Q==} engines: {node: ^20.19.0 || >=22.12.0} cpu: [arm64] os: [win32] - '@rolldown/binding-win32-x64-msvc@1.0.0-rc.5': - resolution: {integrity: sha512-VQ8F9ld5gw29epjnVGdrx8ugiLTe8BMqmhDYy7nGbdeDo4HAt4bgdZvLbViEhg7DZyHLpiEUlO5/jPSUrIuxRQ==} + '@rolldown/binding-win32-x64-msvc@1.0.0-rc.16': + resolution: {integrity: sha512-PH5DRZT+F4f2PTXRXR8uJxnBq2po/xFtddyabTJVJs/ZYVHqXPEgNIr35IHTEa6bpa0Q8Awg+ymkTaGnKITw4g==} engines: {node: ^20.19.0 || >=22.12.0} cpu: [x64] os: [win32] @@ -1627,8 +1657,8 @@ packages: '@rolldown/pluginutils@1.0.0-beta.40': resolution: {integrity: sha512-s3GeJKSQOwBlzdUrj4ISjJj5SfSh+aqn0wjOar4Bx95iV1ETI7F6S/5hLcfAxZ9kXDcyrAkxPlqmd1ZITttf+w==} - '@rolldown/pluginutils@1.0.0-rc.5': - resolution: {integrity: sha512-RxlLX/DPoarZ9PtxVrQgZhPoor987YtKQqCo5zkjX+0S0yLJ7Vv515Wk6+xtTL67VONKJKxETWZwuZjss2idYw==} + '@rolldown/pluginutils@1.0.0-rc.16': + resolution: {integrity: sha512-45+YtqxLYKDWQouLKCrpIZhke+nXxhsw+qAHVzHDVwttyBlHNBVs2K25rDXrZzhpTp9w1FlAlvweV1H++fdZoA==} '@rollup/rollup-android-arm-eabi@4.46.2': resolution: {integrity: sha512-Zj3Hl6sN34xJtMv7Anwb5Gu01yujyE/cLBDB2gnHTAHaWS1Z38L7kuSG+oAh0giZMqG060f/YBStXtMH6FvPMA==} @@ -1886,6 +1916,9 @@ packages: '@types/deep-eql@4.0.2': resolution: {integrity: sha512-c9h9dVVMigMPc4bwTvC5dxqtqJZwQPePsWjPlpSOnojbor6pGqdk541lfA7AqFQr5pB1BRdq0juY9db81BwyFw==} + '@types/dom-mediacapture-record@1.0.22': + resolution: {integrity: sha512-mUMZLK3NvwRLcAAT9qmcK+9p7tpU2FHdDsntR3YI4+GY88XrgG4XiE7u1Q2LAN2/FZOz/tdMDC3GQCR4T8nFuw==} + '@types/estree@1.0.8': resolution: {integrity: sha512-dWHzHa2WqEXI/O1E9OjrocMTKJl2mSrEolh1Iomrv6U+JuNwaHXsXx9bLu5gG7BUWFIN0skIQJQ/L1rIex4X6w==} @@ -2460,6 +2493,10 @@ packages: resolution: {integrity: sha512-aIL5Fx7mawVa300al2BnEE4iNvo1qETxLrPI/o05L7z6go7fCw1J6EQmbK4FmJ2AS7kgVF/KEZWufBfdClMcPg==} engines: {node: '>= 0.6'} + events@3.3.0: + resolution: {integrity: sha512-mQw+2fkQbALzQ7V0MY0IqdnXNOeTtP4r0lN9z7AAawCXgqea7bDii20AYrIBrFd/Hx0M2Ocz6S111CaFkUcb0Q==} + engines: {node: '>=0.8.x'} + exit-hook@2.2.1: resolution: {integrity: sha512-eNTPlAD67BmP31LDINZ3U7HSF8l57TxOY2PmBJ1shpCvpnxBF93mWCE8YHBnXs8qiUZJc9WDcWIeC3a2HIAMfw==} engines: {node: '>=6'} @@ -2664,6 +2701,9 @@ packages: resolution: {integrity: sha512-twQoecYPiVA5K/h6SxtORw/Bs3ar+mLUtoPSc7iMXzQzK8d7eJ/R09wmTwAjiamETn1cXYPGfNnu7DMoHgu12w==} hasBin: true + jose@6.2.2: + resolution: {integrity: sha512-d7kPDd34KO/YnzaDOlikGpOurfF0ByC2sEV4cANCtdqLlTfBlw2p14O/5d/zv40gJPbIQxfES3nSx1/oYNyuZQ==} + js-tokens@4.0.0: resolution: {integrity: sha512-RdJUflcE3cUzKiMqQgsCu06FPu9UdIJO0beYbPhHN4k6apgJtifcoCtT9bcxOpYBtpD2kCM6Sbzg4CausW/PKQ==} @@ -2687,6 +2727,15 @@ packages: jsonc-parser@3.3.1: resolution: {integrity: sha512-HUgH65KyejrUFPvHFPbqOY0rsFip3Bo5wb4ngvdi1EpCYWUQDC5V+Y7mZws+DLkr4M//zQJoanu1SP+87Dv1oQ==} + livekit-client@2.18.3: + resolution: {integrity: sha512-A8QDaVPo+Ye35bJFyKe6PjMOtY33dmdRXGKP/3+BG48ynEES3YwFzHbsPHJiScgI4OZouNef3Ew/BPazXKwo8Q==} + peerDependencies: + '@types/dom-mediacapture-record': ^1 + + loglevel@1.9.2: + resolution: {integrity: sha512-HgMmCqIJSAKqo68l0rS2AanEWfkxaZ5wNiEFb5ggm08lDs9Xl2KxBlX3PTcaD2chBM1gXAYf491/M2Rv8Jwayg==} + engines: {node: '>= 0.6.0'} + loupe@3.2.0: resolution: {integrity: sha512-2NCfZcT5VGVNX9mSZIxLRkEAegDGBpuQZBy13desuHeVORmBDyAET4TkJr4SjqQy3A8JDofMN6LpkK8Xcm/dlw==} @@ -3068,8 +3117,8 @@ packages: vue-tsc: optional: true - rolldown@1.0.0-rc.5: - resolution: {integrity: sha512-0AdalTs6hNTioaCYIkAa7+xsmHBfU5hCNclZnM/lp7lGGDuUOb6N4BVNtwiomybbencDjq/waKjTImqiGCs5sw==} + rolldown@1.0.0-rc.16: + resolution: {integrity: sha512-rzi5WqKzEZw3SooTt7cgm4eqIoujPIyGcJNGFL7iPEuajQw7vxMHUkXylu4/vhCkJGXsgRmxqMKXUpT6FEgl0g==} engines: {node: ^20.19.0 || >=22.12.0} hasBin: true @@ -3091,6 +3140,9 @@ packages: rou3@0.7.12: resolution: {integrity: sha512-iFE4hLDuloSWcD7mjdCDhx2bKcIsYbtOTpfH5MHHLSKMOUyjqQXTeZVa289uuwEGEKFoE/BAPbhaU4B774nceg==} + rxjs@7.8.2: + resolution: {integrity: sha512-dhKf903U/PQZY6boNNtAGdWbG85WAbjT/1xYoZIC7FAY0yWapOBQVsVrDl58W86//e1VpMNBtRV4MaXfdMySFA==} + safe-buffer@5.2.1: resolution: {integrity: sha512-rp3So07KcdmmKbGvgaNxQSJr7bGVSVk5S9Eq1F+ppbRo70+YeaDxkw5Dd8NPN+GD6bjnYm2VuPuCXmpuYvmCXQ==} @@ -3100,6 +3152,13 @@ packages: scheduler@0.27.0: resolution: {integrity: sha512-eNv+WrVbKu1f3vbYJT/xtiF5syA5HPIMtf9IgY/nKg0sWqzAUEvqY/xm7OcZc/qafLx/iO9FgOmeSAp4v5ti/Q==} + sdp-transform@2.15.0: + resolution: {integrity: sha512-KrOH82c/W+GYQ0LHqtr3caRpM3ITglq3ljGUIb8LTki7ByacJZ9z+piSGiwZDsRyhQbYBOBJgr2k6X4BZXi3Kw==} + hasBin: true + + sdp@3.2.2: + resolution: {integrity: sha512-xZocWwfyp4hkbN4hLWxMjmv2Q8aNa9MhmOZ7L9aCZPT+dZsgRr6wZRrSYE3HTdyk/2pZKPSgqI7ns7Een1xMSA==} + semver@6.3.1: resolution: {integrity: sha512-BR7VvDCVHO+q2xBEWskxS6DJE1qRnb7DxzUrogb71CWoSficBxYsiAGd+Kl0mmq/MprG9yArRkyrQxTO6XjMzA==} hasBin: true @@ -3382,6 +3441,9 @@ packages: resolution: {integrity: sha512-TkRKr9sUTxEH8MdfuCSP7VizJyzRNMjj2J2do2Jr3Kym598JVdEksuzPQCnlFPW4ky9Q+iA+ma9BGm06XQBy8g==} engines: {node: '>= 0.6'} + typed-emitter@2.1.0: + resolution: {integrity: sha512-g/KzbYKbH5C2vPkaXGu8DJlHrGKHLsM25Zg9WuC9pMGfuvT+X25tZQWo5fK1BjBm8+UrVE9LDCvaY0CQk+fXDA==} + typescript@5.9.2: resolution: {integrity: sha512-CWBzXQrc/qOkhidw1OzBTQuYRbfyxDXJMVJ1XNwUHGROVmuaeiEm3OslpZ1RV96d7SKKjZKrSJu3+t/xlw3R9A==} engines: {node: '>=14.17'} @@ -3666,6 +3728,10 @@ packages: webpack-virtual-modules@0.6.2: resolution: {integrity: sha512-66/V2i5hQanC51vBQKPH4aI8NMAcBW59FVBs+rC7eGHupMyfn34q7rZIE+ETlJ+XTevqfUhVVBgSUNSW2flEUQ==} + webrtc-adapter@9.0.5: + resolution: {integrity: sha512-U9vjByy/sK2OMXu5mmfuZFKTMIUQe34c0JXRO+oDrxJTsntdYT2iIFwYMOV7HhMTuktcZLGf2W1N/OcSf9ssWg==} + engines: {node: '>=6.0.0', npm: '>=3.10.0'} + whatwg-encoding@3.1.1: resolution: {integrity: sha512-6qN4hJdMwfYBtE3YBTTHhoeuUrDBPZmbQaxWAqSALV/MeEnR5z1xd8UKud2RAkFoPkmB+hli1TZSnyi84xz1vQ==} engines: {node: '>=18'} @@ -3943,6 +4009,8 @@ snapshots: '@biomejs/cli-win32-x64@2.3.8': optional: true + '@bufbuild/protobuf@1.10.1': {} + '@bundled-es-modules/cookie@2.0.1': dependencies: cookie: 0.7.2 @@ -3982,9 +4050,9 @@ snapshots: dependencies: '@jridgewell/trace-mapping': 0.3.9 - '@emnapi/core@1.7.1': + '@emnapi/core@1.9.2': dependencies: - '@emnapi/wasi-threads': 1.1.0 + '@emnapi/wasi-threads': 1.2.1 tslib: 2.8.1 optional: true @@ -3993,7 +4061,12 @@ snapshots: tslib: 2.8.1 optional: true - '@emnapi/wasi-threads@1.1.0': + '@emnapi/runtime@1.9.2': + dependencies: + tslib: 2.8.1 + optional: true + + '@emnapi/wasi-threads@1.2.1': dependencies: tslib: 2.8.1 optional: true @@ -4463,6 +4536,12 @@ snapshots: string-argv: 0.3.2 type-detect: 4.1.0 + '@livekit/mutex@1.1.1': {} + + '@livekit/protocol@1.45.3': + dependencies: + '@bufbuild/protobuf': 1.10.1 + '@mswjs/interceptors@0.39.7': dependencies: '@open-draft/deferred-promise': 2.2.0 @@ -4472,10 +4551,10 @@ snapshots: outvariant: 1.4.3 strict-event-emitter: 0.5.1 - '@napi-rs/wasm-runtime@1.1.1': + '@napi-rs/wasm-runtime@1.1.4(@emnapi/core@1.9.2)(@emnapi/runtime@1.9.2)': dependencies: - '@emnapi/core': 1.7.1 - '@emnapi/runtime': 1.7.1 + '@emnapi/core': 1.9.2 + '@emnapi/runtime': 1.9.2 '@tybys/wasm-util': 0.10.1 optional: true @@ -4629,7 +4708,7 @@ snapshots: '@open-draft/until@2.1.0': {} - '@oxc-project/types@0.114.0': {} + '@oxc-project/types@0.126.0': {} '@polka/url@1.0.0-next.29': {} @@ -4637,52 +4716,60 @@ snapshots: dependencies: quansync: 0.2.10 - '@rolldown/binding-android-arm64@1.0.0-rc.5': + '@rolldown/binding-android-arm64@1.0.0-rc.16': optional: true - '@rolldown/binding-darwin-arm64@1.0.0-rc.5': + '@rolldown/binding-darwin-arm64@1.0.0-rc.16': optional: true - '@rolldown/binding-darwin-x64@1.0.0-rc.5': + '@rolldown/binding-darwin-x64@1.0.0-rc.16': optional: true - '@rolldown/binding-freebsd-x64@1.0.0-rc.5': + '@rolldown/binding-freebsd-x64@1.0.0-rc.16': optional: true - '@rolldown/binding-linux-arm-gnueabihf@1.0.0-rc.5': + '@rolldown/binding-linux-arm-gnueabihf@1.0.0-rc.16': optional: true - '@rolldown/binding-linux-arm64-gnu@1.0.0-rc.5': + '@rolldown/binding-linux-arm64-gnu@1.0.0-rc.16': optional: true - '@rolldown/binding-linux-arm64-musl@1.0.0-rc.5': + '@rolldown/binding-linux-arm64-musl@1.0.0-rc.16': optional: true - '@rolldown/binding-linux-x64-gnu@1.0.0-rc.5': + '@rolldown/binding-linux-ppc64-gnu@1.0.0-rc.16': optional: true - '@rolldown/binding-linux-x64-musl@1.0.0-rc.5': + '@rolldown/binding-linux-s390x-gnu@1.0.0-rc.16': optional: true - '@rolldown/binding-openharmony-arm64@1.0.0-rc.5': + '@rolldown/binding-linux-x64-gnu@1.0.0-rc.16': optional: true - '@rolldown/binding-wasm32-wasi@1.0.0-rc.5': + '@rolldown/binding-linux-x64-musl@1.0.0-rc.16': + optional: true + + '@rolldown/binding-openharmony-arm64@1.0.0-rc.16': + optional: true + + '@rolldown/binding-wasm32-wasi@1.0.0-rc.16': dependencies: - '@napi-rs/wasm-runtime': 1.1.1 + '@emnapi/core': 1.9.2 + '@emnapi/runtime': 1.9.2 + '@napi-rs/wasm-runtime': 1.1.4(@emnapi/core@1.9.2)(@emnapi/runtime@1.9.2) optional: true - '@rolldown/binding-win32-arm64-msvc@1.0.0-rc.5': + '@rolldown/binding-win32-arm64-msvc@1.0.0-rc.16': optional: true - '@rolldown/binding-win32-x64-msvc@1.0.0-rc.5': + '@rolldown/binding-win32-x64-msvc@1.0.0-rc.16': optional: true '@rolldown/pluginutils@1.0.0-beta.27': {} '@rolldown/pluginutils@1.0.0-beta.40': {} - '@rolldown/pluginutils@1.0.0-rc.5': {} + '@rolldown/pluginutils@1.0.0-rc.16': {} '@rollup/rollup-android-arm-eabi@4.46.2': optional: true @@ -5002,6 +5089,8 @@ snapshots: '@types/deep-eql@4.0.2': {} + '@types/dom-mediacapture-record@1.0.22': {} + '@types/estree@1.0.8': {} '@types/express-serve-static-core@4.19.7': @@ -5711,6 +5800,8 @@ snapshots: etag@1.8.1: {} + events@3.3.0: {} + exit-hook@2.2.1: {} expect-type@1.2.2: {} @@ -5922,6 +6013,8 @@ snapshots: jiti@2.5.1: {} + jose@6.2.2: {} + js-tokens@4.0.0: {} js-tokens@9.0.1: {} @@ -5936,6 +6029,21 @@ snapshots: jsonc-parser@3.3.1: {} + livekit-client@2.18.3(@types/dom-mediacapture-record@1.0.22): + dependencies: + '@livekit/mutex': 1.1.1 + '@livekit/protocol': 1.45.3 + '@types/dom-mediacapture-record': 1.0.22 + events: 3.3.0 + jose: 6.2.2 + loglevel: 1.9.2 + sdp-transform: 2.15.0 + tslib: 2.8.1 + typed-emitter: 2.1.0 + webrtc-adapter: 9.0.5 + + loglevel@1.9.2: {} + loupe@3.2.0: {} lru-cache@5.1.1: @@ -6305,7 +6413,7 @@ snapshots: rettime@0.7.0: {} - rolldown-plugin-dts@0.15.6(rolldown@1.0.0-rc.5)(typescript@5.9.2): + rolldown-plugin-dts@0.15.6(rolldown@1.0.0-rc.16)(typescript@5.9.2): dependencies: '@babel/generator': 7.28.5 '@babel/parser': 7.28.5 @@ -6315,31 +6423,33 @@ snapshots: debug: 4.4.1 dts-resolver: 2.1.1 get-tsconfig: 4.10.1 - rolldown: 1.0.0-rc.5 + rolldown: 1.0.0-rc.16 optionalDependencies: typescript: 5.9.2 transitivePeerDependencies: - oxc-resolver - supports-color - rolldown@1.0.0-rc.5: + rolldown@1.0.0-rc.16: dependencies: - '@oxc-project/types': 0.114.0 - '@rolldown/pluginutils': 1.0.0-rc.5 + '@oxc-project/types': 0.126.0 + '@rolldown/pluginutils': 1.0.0-rc.16 optionalDependencies: - '@rolldown/binding-android-arm64': 1.0.0-rc.5 - '@rolldown/binding-darwin-arm64': 1.0.0-rc.5 - '@rolldown/binding-darwin-x64': 1.0.0-rc.5 - '@rolldown/binding-freebsd-x64': 1.0.0-rc.5 - '@rolldown/binding-linux-arm-gnueabihf': 1.0.0-rc.5 - '@rolldown/binding-linux-arm64-gnu': 1.0.0-rc.5 - '@rolldown/binding-linux-arm64-musl': 1.0.0-rc.5 - '@rolldown/binding-linux-x64-gnu': 1.0.0-rc.5 - '@rolldown/binding-linux-x64-musl': 1.0.0-rc.5 - '@rolldown/binding-openharmony-arm64': 1.0.0-rc.5 - '@rolldown/binding-wasm32-wasi': 1.0.0-rc.5 - '@rolldown/binding-win32-arm64-msvc': 1.0.0-rc.5 - '@rolldown/binding-win32-x64-msvc': 1.0.0-rc.5 + '@rolldown/binding-android-arm64': 1.0.0-rc.16 + '@rolldown/binding-darwin-arm64': 1.0.0-rc.16 + '@rolldown/binding-darwin-x64': 1.0.0-rc.16 + '@rolldown/binding-freebsd-x64': 1.0.0-rc.16 + '@rolldown/binding-linux-arm-gnueabihf': 1.0.0-rc.16 + '@rolldown/binding-linux-arm64-gnu': 1.0.0-rc.16 + '@rolldown/binding-linux-arm64-musl': 1.0.0-rc.16 + '@rolldown/binding-linux-ppc64-gnu': 1.0.0-rc.16 + '@rolldown/binding-linux-s390x-gnu': 1.0.0-rc.16 + '@rolldown/binding-linux-x64-gnu': 1.0.0-rc.16 + '@rolldown/binding-linux-x64-musl': 1.0.0-rc.16 + '@rolldown/binding-openharmony-arm64': 1.0.0-rc.16 + '@rolldown/binding-wasm32-wasi': 1.0.0-rc.16 + '@rolldown/binding-win32-arm64-msvc': 1.0.0-rc.16 + '@rolldown/binding-win32-x64-msvc': 1.0.0-rc.16 rollup-plugin-inject@3.0.2: dependencies: @@ -6383,12 +6493,21 @@ snapshots: rou3@0.7.12: {} + rxjs@7.8.2: + dependencies: + tslib: 2.8.1 + optional: true + safe-buffer@5.2.1: {} safer-buffer@2.1.2: {} scheduler@0.27.0: {} + sdp-transform@2.15.0: {} + + sdp@3.2.2: {} + semver@6.3.1: {} semver@7.7.3: {} @@ -6674,8 +6793,8 @@ snapshots: diff: 8.0.2 empathic: 2.0.0 hookable: 5.5.3 - rolldown: 1.0.0-rc.5 - rolldown-plugin-dts: 0.15.6(rolldown@1.0.0-rc.5)(typescript@5.9.2) + rolldown: 1.0.0-rc.16 + rolldown-plugin-dts: 0.15.6(rolldown@1.0.0-rc.16)(typescript@5.9.2) semver: 7.7.3 tinyexec: 1.0.1 tinyglobby: 0.2.14 @@ -6707,6 +6826,10 @@ snapshots: media-typer: 0.3.0 mime-types: 2.1.35 + typed-emitter@2.1.0: + optionalDependencies: + rxjs: 7.8.2 + typescript@5.9.2: {} typescript@5.9.3: {} @@ -6934,6 +7057,10 @@ snapshots: webpack-virtual-modules@0.6.2: {} + webrtc-adapter@9.0.5: + dependencies: + sdp: 3.2.2 + whatwg-encoding@3.1.1: dependencies: iconv-lite: 0.6.3 From edb61bd6edc569e2db5f185b88c0cea54a96bfb3 Mon Sep 17 00:00:00 2001 From: nagar-decart Date: Sun, 19 Apr 2026 16:53:13 +0300 Subject: [PATCH 02/16] demo: add transport radio toggle + pass it to realtime.connect index.html now has aiortc | livekit radios that feed realtime.connect({ transport }), so the dev demo at sdk.decart.local can flip between the two transports without a code change. Default stays aiortc so existing sanity tests are unchanged. Co-Authored-By: Claude Opus 4.7 (1M context) --- packages/sdk/index.html | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/packages/sdk/index.html b/packages/sdk/index.html index 2d5f9488..dc43acc5 100644 --- a/packages/sdk/index.html +++ b/packages/sdk/index.html @@ -241,6 +241,11 @@

Configuration

+
+ + + +
@@ -532,8 +537,11 @@

Console Logs

initialImage = await initialImageResponse.blob(); } + const selectedTransport = document.querySelector('input[name="transport"]:checked')?.value ?? 'aiortc'; + addLog(`Transport: ${selectedTransport}`, 'info'); decartRealtime = await decartClient.realtime.connect(localStream, { model, + transport: selectedTransport, onRemoteStream: (stream) => { addLog('Received remote stream from Decart', 'success'); elements.remoteVideo.srcObject = stream; From 6fcc5de654ac3f3ddb7f1dc0748368bba290b429 Mon Sep 17 00:00:00 2001 From: nagar-decart Date: Mon, 20 Apr 2026 09:36:22 +0300 Subject: [PATCH 03/16] feat(realtime): emit serverMetrics event for both transports MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Inference server gained an opt-in periodic `{"type": "server_metrics"}` WS emission (DecartAI/api PR forthcoming) that the webrtc-bench tool subscribes to for per-session fps / latency / queue-depth numbers. Surface it through the SDK so consumers can do: rtClient.on("serverMetrics", (msg) => ...) Changes: - types.ts: new ServerMetricsMessage type; added to IncomingWebRTCMessage. - webrtc-connection.ts (aiortc): parse `type: "server_metrics"` and emit on the internal websocketMessagesEmitter. - transports/livekit.ts: same, inside handleControlMessage switch. - client.ts: add `serverMetrics` to public Events, wire the listener so the internal emitter fans out to the public RealTimeClient.on surface. Default off — the server only emits when the client's realtime URL has `?emit_server_metrics=1`. Normal SDK consumers see nothing unless they explicitly opt in. Typecheck passes; 145/145 unit tests still green. Co-Authored-By: Claude Opus 4.7 (1M context) --- packages/sdk/src/realtime/client.ts | 20 ++++++++++++++++++- .../sdk/src/realtime/transports/livekit.ts | 6 ++++++ packages/sdk/src/realtime/types.ts | 20 ++++++++++++++++++- .../sdk/src/realtime/webrtc-connection.ts | 9 +++++++++ 4 files changed, 53 insertions(+), 2 deletions(-) diff --git a/packages/sdk/src/realtime/client.ts b/packages/sdk/src/realtime/client.ts index f3a59f72..374433f4 100644 --- a/packages/sdk/src/realtime/client.ts +++ b/packages/sdk/src/realtime/client.ts @@ -15,7 +15,12 @@ import { type SubscribeOptions, } from "./subscribe-client"; import { type ITelemetryReporter, NullTelemetryReporter, TelemetryReporter } from "./telemetry-reporter"; -import type { ConnectionState, GenerationTickMessage, SessionIdMessage } from "./types"; +import type { + ConnectionState, + GenerationTickMessage, + ServerMetricsMessage, + SessionIdMessage, +} from "./types"; import { WebRTCManager } from "./webrtc-manager"; import { type WebRTCStats, WebRTCStatsCollector } from "./webrtc-stats"; @@ -109,6 +114,12 @@ export type Events = { generationTick: { seconds: number }; diagnostic: DiagnosticEvent; stats: WebRTCStats; + /** + * Optional server-side per-session metrics. Emitted only when the client + * connects with `?emit_server_metrics=1` on the realtime URL (consumed by + * the webrtc-bench tool). Normal SDK consumers can ignore this event. + */ + serverMetrics: ServerMetricsMessage; }; export type RealTimeClient = { @@ -264,6 +275,13 @@ export const createRealTimeClient = (opts: RealTimeClientOptions) => { }; manager.getWebsocketMessageEmitter().on("generationTick", tickListener); + // Opt-in server-side metrics — fans WebRTCConnection + LiveKitConnection + // emissions out onto the public RealTimeClient.on("serverMetrics", …). + const serverMetricsListener = (msg: ServerMetricsMessage) => { + emitOrBuffer("serverMetrics", msg); + }; + manager.getWebsocketMessageEmitter().on("serverMetrics", serverMetricsListener); + await manager.connect(inputStream); const methods = realtimeMethods(manager, imageToBase64); diff --git a/packages/sdk/src/realtime/transports/livekit.ts b/packages/sdk/src/realtime/transports/livekit.ts index 3ffeadf6..35e96005 100644 --- a/packages/sdk/src/realtime/transports/livekit.ts +++ b/packages/sdk/src/realtime/transports/livekit.ts @@ -34,6 +34,7 @@ import type { IncomingWebRTCMessage, OutgoingWebRTCMessage, PromptAckMessage, + ServerMetricsMessage, SessionIdMessage, SetImageAckMessage, } from "../types"; @@ -57,6 +58,7 @@ type WsMessageEvents = { setImageAck: SetImageAckMessage; sessionId: SessionIdMessage; generationTick: GenerationTickMessage; + serverMetrics: ServerMetricsMessage; }; const noopDiagnostic: DiagnosticEmitter = () => {}; @@ -285,6 +287,10 @@ export class LiveKitConnection { case "generation_tick": this.websocketMessagesEmitter.emit("generationTick", typed); break; + case "server_metrics": + // Opt-in server-side stats for the webrtc-bench tool. + this.websocketMessagesEmitter.emit("serverMetrics", typed); + break; } } diff --git a/packages/sdk/src/realtime/types.ts b/packages/sdk/src/realtime/types.ts index e1618e8e..79ead52c 100644 --- a/packages/sdk/src/realtime/types.ts +++ b/packages/sdk/src/realtime/types.ts @@ -64,6 +64,23 @@ export type GenerationEndedMessage = { reason: string; }; +/** + * Per-session server-side metrics, emitted periodically when the WS URL + * carries `?emit_server_metrics=1` (default off). Used by the webrtc-bench + * tool. Fields mirror inference_server/rt/metrics_emitter.py. + */ +export type ServerMetricsMessage = { + type: "server_metrics"; + session_id: string; + input_fps?: number; + output_fps?: number; + input_frame_count?: number; + output_frame_count?: number; + pipeline_latency_ms?: number; + video_in_qsize?: number; + video_out_qsize?: number; +}; + export type SessionIdMessage = { type: "session_id"; session_id: string; @@ -85,7 +102,8 @@ export type IncomingWebRTCMessage = | GenerationStartedMessage | GenerationTickMessage | GenerationEndedMessage - | SessionIdMessage; + | SessionIdMessage + | ServerMetricsMessage; // Outgoing message types (to server) export type OutgoingWebRTCMessage = diff --git a/packages/sdk/src/realtime/webrtc-connection.ts b/packages/sdk/src/realtime/webrtc-connection.ts index dc5802b9..632b59bf 100644 --- a/packages/sdk/src/realtime/webrtc-connection.ts +++ b/packages/sdk/src/realtime/webrtc-connection.ts @@ -9,6 +9,7 @@ import type { IncomingWebRTCMessage, OutgoingWebRTCMessage, PromptAckMessage, + ServerMetricsMessage, SessionIdMessage, SetImageAckMessage, } from "./types"; @@ -35,6 +36,7 @@ type WsMessageEvents = { setImageAck: SetImageAckMessage; sessionId: SessionIdMessage; generationTick: GenerationTickMessage; + serverMetrics: ServerMetricsMessage; }; const noopDiagnostic: DiagnosticEmitter = () => {}; @@ -254,6 +256,13 @@ export class WebRTCConnection { return; } + if (msg.type === "server_metrics") { + // Optional, opted into via `?emit_server_metrics=1` on the WS URL. + // Consumed by the webrtc-bench tool; ignored by normal SDK clients. + this.websocketMessagesEmitter.emit("serverMetrics", msg); + return; + } + // All other messages require peer connection if (!this.pc) return; From 64aea11a7242ec033f7bf07df57149d61a95c485 Mon Sep 17 00:00:00 2001 From: nagar-decart Date: Mon, 20 Apr 2026 10:37:03 +0300 Subject: [PATCH 04/16] feat(realtime): emit markerConfig event for both transports MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Forwards the inference server's E2E pixel-latency handshake (message type "marker_config") to SDK consumers. Symmetric with serverMetrics — opt-in via ?pixel_latency=1 on the realtime WS URL. The webrtc-bench tool uses this to align its PixelMarkerReader's search window with the server's actual stamp dimensions (which can differ from the client stamp dims when the server transcodes). Normal consumers ignore the event. - types.ts: MarkerConfigMessage + add to IncomingWebRTCMessage union. - webrtc-connection.ts + transports/livekit.ts: parse type == "marker_config" and emit on the transport's websocketMessagesEmitter. - client.ts: expose as a public markerConfig event on RealTimeClient, via the same emitOrBuffer path as serverMetrics. Co-Authored-By: Claude Opus 4.7 (1M context) --- packages/sdk/src/realtime/client.ts | 17 +++++++++++++++++ .../sdk/src/realtime/transports/livekit.ts | 7 +++++++ packages/sdk/src/realtime/types.ts | 19 ++++++++++++++++++- .../sdk/src/realtime/webrtc-connection.ts | 11 +++++++++++ 4 files changed, 53 insertions(+), 1 deletion(-) diff --git a/packages/sdk/src/realtime/client.ts b/packages/sdk/src/realtime/client.ts index 374433f4..6d90e266 100644 --- a/packages/sdk/src/realtime/client.ts +++ b/packages/sdk/src/realtime/client.ts @@ -18,6 +18,7 @@ import { type ITelemetryReporter, NullTelemetryReporter, TelemetryReporter } fro import type { ConnectionState, GenerationTickMessage, + MarkerConfigMessage, ServerMetricsMessage, SessionIdMessage, } from "./types"; @@ -120,6 +121,15 @@ export type Events = { * the webrtc-bench tool). Normal SDK consumers can ignore this event. */ serverMetrics: ServerMetricsMessage; + /** + * Server→client handshake for E2E pixel-latency marker stamping. Emitted + * once after the server first re-stamps a client marker onto an output + * frame, carrying the server's actual stamp dimensions. Opt-in via + * `?pixel_latency=1&stamp_width=...&stamp_height=...`. The webrtc-bench + * tool uses this to align its PixelMarkerReader's search window; normal + * SDK consumers can ignore this event. + */ + markerConfig: MarkerConfigMessage; }; export type RealTimeClient = { @@ -282,6 +292,13 @@ export const createRealTimeClient = (opts: RealTimeClientOptions) => { }; manager.getWebsocketMessageEmitter().on("serverMetrics", serverMetricsListener); + // Opt-in E2E pixel-latency handshake. Same fan-out pattern; consumed + // by the webrtc-bench tool to configure its PixelMarkerReader. + const markerConfigListener = (msg: MarkerConfigMessage) => { + emitOrBuffer("markerConfig", msg); + }; + manager.getWebsocketMessageEmitter().on("markerConfig", markerConfigListener); + await manager.connect(inputStream); const methods = realtimeMethods(manager, imageToBase64); diff --git a/packages/sdk/src/realtime/transports/livekit.ts b/packages/sdk/src/realtime/transports/livekit.ts index 35e96005..68041ab3 100644 --- a/packages/sdk/src/realtime/transports/livekit.ts +++ b/packages/sdk/src/realtime/transports/livekit.ts @@ -32,6 +32,7 @@ import type { ConnectionState, GenerationTickMessage, IncomingWebRTCMessage, + MarkerConfigMessage, OutgoingWebRTCMessage, PromptAckMessage, ServerMetricsMessage, @@ -59,6 +60,7 @@ type WsMessageEvents = { sessionId: SessionIdMessage; generationTick: GenerationTickMessage; serverMetrics: ServerMetricsMessage; + markerConfig: MarkerConfigMessage; }; const noopDiagnostic: DiagnosticEmitter = () => {}; @@ -291,6 +293,11 @@ export class LiveKitConnection { // Opt-in server-side stats for the webrtc-bench tool. this.websocketMessagesEmitter.emit("serverMetrics", typed); break; + case "marker_config": + // Server→client handshake for E2E pixel-latency marker stamping. + // Opt-in; consumed by the webrtc-bench tool's PixelMarkerReader. + this.websocketMessagesEmitter.emit("markerConfig", typed); + break; } } diff --git a/packages/sdk/src/realtime/types.ts b/packages/sdk/src/realtime/types.ts index 79ead52c..a4ada596 100644 --- a/packages/sdk/src/realtime/types.ts +++ b/packages/sdk/src/realtime/types.ts @@ -81,6 +81,22 @@ export type ServerMetricsMessage = { video_out_qsize?: number; }; +/** + * Server→client handshake for E2E pixel-latency marker stamping. Sent once + * by the inference server the first time it re-stamps a client-placed + * marker onto an output frame. Carries the server's actual stamp + * dimensions, which can differ from the client-sent `stamp_width` / + * `stamp_height` when the server transcodes. The client's + * `PixelMarkerReader` should use these to scale its search window. + * Only sent when the WS URL carries `?pixel_latency=1` (default off). + * Fields mirror inference_server/rt/ws.py::_send_marker_config. + */ +export type MarkerConfigMessage = { + type: "marker_config"; + stamp_width: number; + stamp_height: number; +}; + export type SessionIdMessage = { type: "session_id"; session_id: string; @@ -103,7 +119,8 @@ export type IncomingWebRTCMessage = | GenerationTickMessage | GenerationEndedMessage | SessionIdMessage - | ServerMetricsMessage; + | ServerMetricsMessage + | MarkerConfigMessage; // Outgoing message types (to server) export type OutgoingWebRTCMessage = diff --git a/packages/sdk/src/realtime/webrtc-connection.ts b/packages/sdk/src/realtime/webrtc-connection.ts index 632b59bf..95d57f63 100644 --- a/packages/sdk/src/realtime/webrtc-connection.ts +++ b/packages/sdk/src/realtime/webrtc-connection.ts @@ -7,6 +7,7 @@ import type { ConnectionState, GenerationTickMessage, IncomingWebRTCMessage, + MarkerConfigMessage, OutgoingWebRTCMessage, PromptAckMessage, ServerMetricsMessage, @@ -37,6 +38,7 @@ type WsMessageEvents = { sessionId: SessionIdMessage; generationTick: GenerationTickMessage; serverMetrics: ServerMetricsMessage; + markerConfig: MarkerConfigMessage; }; const noopDiagnostic: DiagnosticEmitter = () => {}; @@ -263,6 +265,15 @@ export class WebRTCConnection { return; } + if (msg.type === "marker_config") { + // Server→client handshake for E2E pixel-latency. Opted into via + // `?pixel_latency=1` on the WS URL. The bench tool forwards this to + // its PixelMarkerReader to align the search window with the server's + // actual stamp dimensions (may differ under transcoding). + this.websocketMessagesEmitter.emit("markerConfig", msg); + return; + } + // All other messages require peer connection if (!this.pc) return; From d44b1248f1898fc33d721ac66bf1b8248f290a41 Mon Sep 17 00:00:00 2001 From: nagar-decart Date: Mon, 20 Apr 2026 11:16:56 +0300 Subject: [PATCH 05/16] feat(realtime): remove markerConfig event MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit E2E pixel-latency no longer negotiates stamp dimensions between client and server — both sides use a fixed protocol and auto-detect the received scale. The marker_config WS message is gone, so drop the MarkerConfigMessage type and the event plumbing across client.ts, webrtc-connection.ts, transports/livekit.ts, and types.ts. Reverts the prior markerConfig addition on this branch; the webrtc-bench tool in api#1095 handles scale detection inside its PixelMarkerReader. Co-Authored-By: Claude Opus 4.7 (1M context) --- packages/sdk/src/realtime/client.ts | 17 ----------------- .../sdk/src/realtime/transports/livekit.ts | 7 ------- packages/sdk/src/realtime/types.ts | 19 +------------------ .../sdk/src/realtime/webrtc-connection.ts | 11 ----------- 4 files changed, 1 insertion(+), 53 deletions(-) diff --git a/packages/sdk/src/realtime/client.ts b/packages/sdk/src/realtime/client.ts index 6d90e266..374433f4 100644 --- a/packages/sdk/src/realtime/client.ts +++ b/packages/sdk/src/realtime/client.ts @@ -18,7 +18,6 @@ import { type ITelemetryReporter, NullTelemetryReporter, TelemetryReporter } fro import type { ConnectionState, GenerationTickMessage, - MarkerConfigMessage, ServerMetricsMessage, SessionIdMessage, } from "./types"; @@ -121,15 +120,6 @@ export type Events = { * the webrtc-bench tool). Normal SDK consumers can ignore this event. */ serverMetrics: ServerMetricsMessage; - /** - * Server→client handshake for E2E pixel-latency marker stamping. Emitted - * once after the server first re-stamps a client marker onto an output - * frame, carrying the server's actual stamp dimensions. Opt-in via - * `?pixel_latency=1&stamp_width=...&stamp_height=...`. The webrtc-bench - * tool uses this to align its PixelMarkerReader's search window; normal - * SDK consumers can ignore this event. - */ - markerConfig: MarkerConfigMessage; }; export type RealTimeClient = { @@ -292,13 +282,6 @@ export const createRealTimeClient = (opts: RealTimeClientOptions) => { }; manager.getWebsocketMessageEmitter().on("serverMetrics", serverMetricsListener); - // Opt-in E2E pixel-latency handshake. Same fan-out pattern; consumed - // by the webrtc-bench tool to configure its PixelMarkerReader. - const markerConfigListener = (msg: MarkerConfigMessage) => { - emitOrBuffer("markerConfig", msg); - }; - manager.getWebsocketMessageEmitter().on("markerConfig", markerConfigListener); - await manager.connect(inputStream); const methods = realtimeMethods(manager, imageToBase64); diff --git a/packages/sdk/src/realtime/transports/livekit.ts b/packages/sdk/src/realtime/transports/livekit.ts index 68041ab3..35e96005 100644 --- a/packages/sdk/src/realtime/transports/livekit.ts +++ b/packages/sdk/src/realtime/transports/livekit.ts @@ -32,7 +32,6 @@ import type { ConnectionState, GenerationTickMessage, IncomingWebRTCMessage, - MarkerConfigMessage, OutgoingWebRTCMessage, PromptAckMessage, ServerMetricsMessage, @@ -60,7 +59,6 @@ type WsMessageEvents = { sessionId: SessionIdMessage; generationTick: GenerationTickMessage; serverMetrics: ServerMetricsMessage; - markerConfig: MarkerConfigMessage; }; const noopDiagnostic: DiagnosticEmitter = () => {}; @@ -293,11 +291,6 @@ export class LiveKitConnection { // Opt-in server-side stats for the webrtc-bench tool. this.websocketMessagesEmitter.emit("serverMetrics", typed); break; - case "marker_config": - // Server→client handshake for E2E pixel-latency marker stamping. - // Opt-in; consumed by the webrtc-bench tool's PixelMarkerReader. - this.websocketMessagesEmitter.emit("markerConfig", typed); - break; } } diff --git a/packages/sdk/src/realtime/types.ts b/packages/sdk/src/realtime/types.ts index a4ada596..79ead52c 100644 --- a/packages/sdk/src/realtime/types.ts +++ b/packages/sdk/src/realtime/types.ts @@ -81,22 +81,6 @@ export type ServerMetricsMessage = { video_out_qsize?: number; }; -/** - * Server→client handshake for E2E pixel-latency marker stamping. Sent once - * by the inference server the first time it re-stamps a client-placed - * marker onto an output frame. Carries the server's actual stamp - * dimensions, which can differ from the client-sent `stamp_width` / - * `stamp_height` when the server transcodes. The client's - * `PixelMarkerReader` should use these to scale its search window. - * Only sent when the WS URL carries `?pixel_latency=1` (default off). - * Fields mirror inference_server/rt/ws.py::_send_marker_config. - */ -export type MarkerConfigMessage = { - type: "marker_config"; - stamp_width: number; - stamp_height: number; -}; - export type SessionIdMessage = { type: "session_id"; session_id: string; @@ -119,8 +103,7 @@ export type IncomingWebRTCMessage = | GenerationTickMessage | GenerationEndedMessage | SessionIdMessage - | ServerMetricsMessage - | MarkerConfigMessage; + | ServerMetricsMessage; // Outgoing message types (to server) export type OutgoingWebRTCMessage = diff --git a/packages/sdk/src/realtime/webrtc-connection.ts b/packages/sdk/src/realtime/webrtc-connection.ts index 95d57f63..632b59bf 100644 --- a/packages/sdk/src/realtime/webrtc-connection.ts +++ b/packages/sdk/src/realtime/webrtc-connection.ts @@ -7,7 +7,6 @@ import type { ConnectionState, GenerationTickMessage, IncomingWebRTCMessage, - MarkerConfigMessage, OutgoingWebRTCMessage, PromptAckMessage, ServerMetricsMessage, @@ -38,7 +37,6 @@ type WsMessageEvents = { sessionId: SessionIdMessage; generationTick: GenerationTickMessage; serverMetrics: ServerMetricsMessage; - markerConfig: MarkerConfigMessage; }; const noopDiagnostic: DiagnosticEmitter = () => {}; @@ -265,15 +263,6 @@ export class WebRTCConnection { return; } - if (msg.type === "marker_config") { - // Server→client handshake for E2E pixel-latency. Opted into via - // `?pixel_latency=1` on the WS URL. The bench tool forwards this to - // its PixelMarkerReader to align the search window with the server's - // actual stamp dimensions (may differ under transcoding). - this.websocketMessagesEmitter.emit("markerConfig", msg); - return; - } - // All other messages require peer connection if (!this.pc) return; From 33e8f5ec486de0490be450af269a9246ccfc1ff7 Mon Sep 17 00:00:00 2001 From: nagar-decart Date: Tue, 21 Apr 2026 11:19:42 +0300 Subject: [PATCH 06/16] feat(realtime): stats event for livekit + simulcast-safe outbound bitrate MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Two fixes that let non-aiortc transports see the same `stats` event stream and that keep the reported outbound bitrate sensible under simulcast: 1. Transport-agnostic stats source. Introduce `StatsProvider`: `{ getStats(): Promise }`. `RTCPeerConnection` already satisfies it (aiortc path, back-compat); LiveKitConnection now supplies an aggregator that walks every local and remote track in the Room, calls `track.getRTCStatsReport()`, and merges the per-track reports into one RTCStatsReport-shaped Map. That's the minimum surface `WebRTCStatsCollector.parse()` needs — it iterates with `.forEach` and keys off `report.type`. Before: LiveKitConnection.getPeerConnection() returned null, so the SDK never started its stats collector for livekit sessions and no `stats` events fired. Now livekit sessions emit stats on the same cadence (and with the same payload shape) as aiortc. Client code (`startStatsCollection` / `handleConnectionStateChange`) now consults `manager.getStatsProvider()` instead of `manager.getPeerConnection()`. The identity check (so we don't restart the collector on every state change) still works because both the provider and the PC are stable references per connection. 2. Simulcast-safe outbound bitrate. Simulcast emits one `outbound-rtp` report per spatial layer (3 layers is typical). The parser used to overwrite `outboundVideo` with whichever layer `forEach` visited last — each layer has its own `bytesSent` counter, so across ticks the "last visited" layer would alternate and `bytesSent - prevBytesSentVideo` went violently negative. We saw `bitrateOutKbps` down to -6589 in bench results. Accumulate `bytesSent` + `packetsSent` across every outbound-rtp video report; compute the bitrate once, after the forEach, against the summed total. Also clamp the result to `Math.max(0, ...)` since `bytesSent` can transiently drop when tracks are added/removed mid-session (new simulcast layer ramping up, publisher swap). For scalar fields (resolution, fps, qualityLimitationReason), pick the highest-resolution active layer so reported frame dimensions match what's on the wire. Verified against staging: 3-region x 2-transport smoke produces 0 negative `bitrateOutKbps` samples and livekit scenarios now report bitrate/fps/rtt/jitter/resolution alongside aiortc. --- packages/sdk/src/realtime/client.ts | 44 ++++++-- .../sdk/src/realtime/transports/livekit.ts | 95 +++++++++++++++- .../sdk/src/realtime/webrtc-connection.ts | 6 + packages/sdk/src/realtime/webrtc-manager.ts | 9 ++ packages/sdk/src/realtime/webrtc-stats.ts | 103 ++++++++++++++---- 5 files changed, 222 insertions(+), 35 deletions(-) diff --git a/packages/sdk/src/realtime/client.ts b/packages/sdk/src/realtime/client.ts index 374433f4..8c3794bc 100644 --- a/packages/sdk/src/realtime/client.ts +++ b/packages/sdk/src/realtime/client.ts @@ -22,7 +22,7 @@ import type { SessionIdMessage, } from "./types"; import { WebRTCManager } from "./webrtc-manager"; -import { type WebRTCStats, WebRTCStatsCollector } from "./webrtc-stats"; +import { type StatsProvider, type WebRTCStats, WebRTCStatsCollector } from "./webrtc-stats"; async function blobToBase64(blob: Blob): Promise { return new Promise((resolve, reject) => { @@ -183,12 +183,22 @@ export const createRealTimeClient = (opts: RealTimeClientOptions) => { } : undefined; - const url = `${baseUrl}${options.model.urlPath}`; + // Preserve any query string already present on baseUrl (e.g. the + // webrtc-bench tool appends opt-in flags like ?emit_server_metrics=1) + // by splitting it off before prepending the model path, then merging + // with the SDK's own params. Without this the final URL ended up as + // `baseUrl?X=Y/v1/stream?api_key=...` — two `?` separators, which + // every WS server rejects with 404. + const baseQueryIdx = baseUrl.indexOf("?"); + const baseOrigin = baseQueryIdx === -1 ? baseUrl : baseUrl.slice(0, baseQueryIdx); + const baseExtraQuery = baseQueryIdx === -1 ? "" : baseUrl.slice(baseQueryIdx + 1); + const url = `${baseOrigin}${options.model.urlPath}`; + const extraQueryPrefix = baseExtraQuery ? `${baseExtraQuery}&` : ""; const { emitter: eventEmitter, emitOrBuffer, flush, stop } = createEventBuffer(); webrtcManager = new WebRTCManager({ - webrtcUrl: `${url}?api_key=${encodeURIComponent(apiKey)}&model=${encodeURIComponent(options.model.name)}`, + webrtcUrl: `${url}?${extraQueryPrefix}api_key=${encodeURIComponent(apiKey)}&model=${encodeURIComponent(options.model.name)}`, integration, logger, onDiagnostic: (name, data) => { @@ -287,7 +297,7 @@ export const createRealTimeClient = (opts: RealTimeClientOptions) => { const methods = realtimeMethods(manager, imageToBase64); let statsCollector: WebRTCStatsCollector | null = null; - let statsCollectorPeerConnection: RTCPeerConnection | null = null; + let statsCollectorSource: StatsProvider | null = null; // Video stall detection state (Twilio pattern: fps < 0.5 = stalled) const STALL_FPS_THRESHOLD = 0.5; @@ -299,10 +309,13 @@ export const createRealTimeClient = (opts: RealTimeClientOptions) => { videoStalled = false; stallStartMs = 0; statsCollector = new WebRTCStatsCollector(); - const pc = manager.getPeerConnection(); - statsCollectorPeerConnection = pc; - if (pc) { - statsCollector.start(pc, (stats) => { + // For aiortc this is the raw RTCPeerConnection; for livekit it's + // an aggregator over the Room's tracks. Either way the collector + // just calls `.getStats()` and parses the returned report. + const source = manager.getStatsProvider(); + statsCollectorSource = source; + if (source) { + statsCollector.start(source, (stats) => { emitOrBuffer("stats", stats); telemetryReporter.addStats(stats); @@ -324,7 +337,7 @@ export const createRealTimeClient = (opts: RealTimeClientOptions) => { return () => { statsCollector?.stop(); statsCollector = null; - statsCollectorPeerConnection = null; + statsCollectorSource = null; }; }; @@ -337,8 +350,8 @@ export const createRealTimeClient = (opts: RealTimeClientOptions) => { return; } - const peerConnection = manager.getPeerConnection(); - if (!peerConnection || peerConnection === statsCollectorPeerConnection) { + const source = manager.getStatsProvider(); + if (!source || source === statsCollectorSource) { return; } @@ -400,7 +413,14 @@ export const createRealTimeClient = (opts: RealTimeClientOptions) => { const subscribe = async (options: SubscribeOptions): Promise => { const { sid, ip, port } = decodeSubscribeToken(options.token); - const subscribeUrl = `${baseUrl}/subscribe/${encodeURIComponent(sid)}?IP=${encodeURIComponent(ip)}&port=${encodeURIComponent(port)}&api_key=${encodeURIComponent(apiKey)}`; + // Same baseUrl-with-query handling as the connect() path above — split + // existing query off before appending the /subscribe/ path so we end up + // with a single `?` separator. + const subQueryIdx = baseUrl.indexOf("?"); + const subBaseOrigin = subQueryIdx === -1 ? baseUrl : baseUrl.slice(0, subQueryIdx); + const subBaseExtraQuery = subQueryIdx === -1 ? "" : baseUrl.slice(subQueryIdx + 1); + const subExtraQueryPrefix = subBaseExtraQuery ? `${subBaseExtraQuery}&` : ""; + const subscribeUrl = `${subBaseOrigin}/subscribe/${encodeURIComponent(sid)}?${subExtraQueryPrefix}IP=${encodeURIComponent(ip)}&port=${encodeURIComponent(port)}&api_key=${encodeURIComponent(apiKey)}`; const { emitter: eventEmitter, emitOrBuffer, flush, stop } = createEventBuffer(); diff --git a/packages/sdk/src/realtime/transports/livekit.ts b/packages/sdk/src/realtime/transports/livekit.ts index 35e96005..d7cab6ee 100644 --- a/packages/sdk/src/realtime/transports/livekit.ts +++ b/packages/sdk/src/realtime/transports/livekit.ts @@ -20,6 +20,7 @@ import { RoomEvent, Track, TrackEvent, + type LocalTrack, type RemoteTrack, type RemoteTrackPublication, type RemoteParticipant, @@ -28,6 +29,7 @@ import { import type { Logger } from "../../utils/logger"; import { buildUserAgent } from "../../utils/user-agent"; import type { DiagnosticEmitter } from "../diagnostics"; +import type { StatsProvider } from "../webrtc-stats"; import type { ConnectionState, GenerationTickMessage, @@ -77,6 +79,7 @@ export class LiveKitConnection { private connectionReject: ((error: Error) => void) | null = null; private logger: Logger; private emitDiagnostic: DiagnosticEmitter; + private statsProvider: StatsProvider | null = null; state: ConnectionState = "disconnected"; websocketMessagesEmitter = mitt(); @@ -86,10 +89,24 @@ export class LiveKitConnection { } getPeerConnection(): RTCPeerConnection | null { - // No raw PC for the LiveKit transport. Stats come from room.getStats(). + // No raw PC for the LiveKit transport — the SFU hides the PCs behind + // a Room abstraction. Callers that need stats should use + // getStatsProvider() instead; it aggregates per-track `getRTCStatsReport()` + // results into an RTCStatsReport-shaped object. return null; } + /** + * Stats provider for the livekit transport. Aggregates + * `track.getRTCStatsReport()` from every local (outbound) and remote + * (inbound) track in the room into a single RTCStatsReport-compatible + * Map. That's the minimum surface WebRTCStatsCollector.parse() needs — + * it calls `.forEach` and keys off `report.type`. + */ + getStatsProvider(): StatsProvider | null { + return this.statsProvider; + } + async connect(url: string, localStream: MediaStream | null, timeout: number, integration?: string): Promise { this.localStream = localStream; @@ -194,6 +211,7 @@ export class LiveKitConnection { this.room.disconnect().catch(() => {}); this.room = null; } + this.statsProvider = null; if (this.ws) { try { this.ws.close(); @@ -331,6 +349,12 @@ export class LiveKitConnection { await this.room.connect(info.livekit_url, info.token); + // Wire up the stats provider now that the room has local+remote + // participant objects available. Held by reference here so the SDK + // client's identity-check in handleConnectionStateChange() sees a + // stable provider for this room. + this.statsProvider = createLiveKitStatsProvider(this.room); + // Publish local tracks. Inference server expects a video track; audio is optional. if (this.localStream) { for (const track of this.localStream.getTracks()) { @@ -387,3 +411,72 @@ export class LiveKitConnection { } } } + +/** + * Build a StatsProvider that aggregates `track.getRTCStatsReport()` across + * every local and remote track in a livekit Room into a single + * RTCStatsReport-shaped Map. + * + * Why this shape: `WebRTCStatsCollector.parse()` expects to call + * `.forEach((stat) => { ... })` on the returned object and keys off each + * entry's `type` (inbound-rtp, outbound-rtp, candidate-pair). The standard + * `RTCStatsReport` is an iterable map-like — our aggregate mimics that by + * returning a `Map` (structurally compatible with the spec). + * + * Each livekit track's `getRTCStatsReport()` under the hood calls + * `RTCRtpSender.getStats()` / `RTCRtpReceiver.getStats()`, which in all + * current browsers returns the track's inbound-rtp/outbound-rtp plus the + * associated candidate-pair and transport reports. Stitching them together + * per-tick gives us a report that looks like an aiortc-style + * `RTCPeerConnection.getStats()` for parsing purposes. + * + * Key collisions (candidate-pair ids repeat across publisher+subscriber + * PCs) are namespaced with a monotonic suffix so `forEach` sees every + * entry once. `parse()` only cares about the last `candidate-pair` where + * `state == "succeeded"`, so duplicate candidate-pair entries are harmless. + */ +function createLiveKitStatsProvider(room: Room): StatsProvider { + let uid = 0; + + const collectFromTrack = async ( + track: LocalTrack | RemoteTrack | undefined, + entries: Array<[string, unknown]>, + ): Promise => { + if (!track) return; + let report: RTCStatsReport | undefined; + try { + report = await track.getRTCStatsReport(); + } catch { + // Track is likely unmuted/unattached or the PC is mid-teardown — skip it. + return; + } + if (!report) return; + report.forEach((stat, id) => { + entries.push([`${id}#${uid++}`, stat]); + }); + }; + + return { + async getStats(): Promise { + const entries: Array<[string, unknown]> = []; + + // Outbound: the local participant's published tracks (what we send to + // the SFU — the server reads these as its inbound video/audio). + for (const pub of room.localParticipant.trackPublications.values()) { + await collectFromTrack(pub.track, entries); + } + + // Inbound: every remote participant's tracks (what the server sends + // back to us — the model output). + for (const participant of room.remoteParticipants.values()) { + for (const pub of participant.trackPublications.values()) { + await collectFromTrack(pub.track as RemoteTrack | undefined, entries); + } + } + + // `Map` is structurally compatible with `RTCStatsReport` for the + // callers we care about (WebRTCStatsCollector.parse uses forEach). + return new Map(entries) as unknown as RTCStatsReport; + }, + }; +} diff --git a/packages/sdk/src/realtime/webrtc-connection.ts b/packages/sdk/src/realtime/webrtc-connection.ts index 632b59bf..8312ab25 100644 --- a/packages/sdk/src/realtime/webrtc-connection.ts +++ b/packages/sdk/src/realtime/webrtc-connection.ts @@ -3,6 +3,7 @@ import mitt from "mitt"; import type { Logger } from "../utils/logger"; import { buildUserAgent } from "../utils/user-agent"; import type { DiagnosticEmitter, IceCandidateEvent } from "./diagnostics"; +import type { StatsProvider } from "./webrtc-stats"; import type { ConnectionState, GenerationTickMessage, @@ -59,6 +60,11 @@ export class WebRTCConnection { return this.pc; } + /** RTCPeerConnection already satisfies StatsProvider (has getStats()). */ + getStatsProvider(): StatsProvider | null { + return this.pc; + } + async connect(url: string, localStream: MediaStream | null, timeout: number, integration?: string): Promise { const deadline = Date.now() + timeout; this.localStream = localStream; diff --git a/packages/sdk/src/realtime/webrtc-manager.ts b/packages/sdk/src/realtime/webrtc-manager.ts index c5cfe354..e7d251e9 100644 --- a/packages/sdk/src/realtime/webrtc-manager.ts +++ b/packages/sdk/src/realtime/webrtc-manager.ts @@ -6,6 +6,7 @@ import { LiveKitConnection } from "./transports/livekit"; import type { TransportKind } from "./transports"; import type { ConnectionState, OutgoingMessage } from "./types"; import { WebRTCConnection } from "./webrtc-connection"; +import type { StatsProvider } from "./webrtc-stats"; // Shared shape both connection types expose — narrows the union for // WebRTCManager so both transports can be driven uniformly. @@ -261,6 +262,14 @@ export class WebRTCManager { return this.connection.getPeerConnection(); } + /** + * Stats source for WebRTCStatsCollector. For aiortc this is the raw + * RTCPeerConnection; for livekit it's an aggregator over room tracks. + */ + getStatsProvider(): StatsProvider | null { + return this.connection.getStatsProvider(); + } + getWebsocketMessageEmitter() { return this.connection.websocketMessagesEmitter; } diff --git a/packages/sdk/src/realtime/webrtc-stats.ts b/packages/sdk/src/realtime/webrtc-stats.ts index 42319a47..ce25587e 100644 --- a/packages/sdk/src/realtime/webrtc-stats.ts +++ b/packages/sdk/src/realtime/webrtc-stats.ts @@ -60,11 +60,21 @@ export type StatsOptions = { intervalMs?: number; }; +/** + * Transport-agnostic source of `RTCStatsReport`. `RTCPeerConnection` already + * satisfies it (its `getStats()` returns `Promise`); the + * LiveKit transport provides a custom adapter that aggregates per-track stats + * from the room. See `transports/livekit.ts` for the livekit impl. + */ +export interface StatsProvider { + getStats(): Promise; +} + const DEFAULT_INTERVAL_MS = 1000; const MIN_INTERVAL_MS = 500; export class WebRTCStatsCollector { - private pc: RTCPeerConnection | null = null; + private source: StatsProvider | null = null; private intervalId: ReturnType | null = null; private prevBytesVideo = 0; private prevBytesAudio = 0; @@ -83,10 +93,10 @@ export class WebRTCStatsCollector { this.intervalMs = Math.max(options.intervalMs ?? DEFAULT_INTERVAL_MS, MIN_INTERVAL_MS); } - /** Attach to a peer connection and start polling. */ - start(pc: RTCPeerConnection, onStats: (stats: WebRTCStats) => void): void { + /** Attach to a stats provider (RTCPeerConnection or equivalent) and start polling. */ + start(source: StatsProvider, onStats: (stats: WebRTCStats) => void): void { this.stop(); - this.pc = pc; + this.source = source; this.onStats = onStats; this.prevBytesVideo = 0; this.prevBytesAudio = 0; @@ -106,7 +116,7 @@ export class WebRTCStatsCollector { clearInterval(this.intervalId); this.intervalId = null; } - this.pc = null; + this.source = null; this.onStats = null; } @@ -115,14 +125,14 @@ export class WebRTCStatsCollector { } private async collect(): Promise { - if (!this.pc || !this.onStats) return; + if (!this.source || !this.onStats) return; try { - const rawStats = await this.pc.getStats(); + const rawStats = await this.source.getStats(); const stats = this.parse(rawStats); this.onStats(stats); } catch { - // PC might be closed; stop silently + // Source might be closed; stop silently this.stop(); } } @@ -131,9 +141,13 @@ export class WebRTCStatsCollector { const now = performance.now(); const elapsed = this.prevTimestamp > 0 ? (now - this.prevTimestamp) / 1000 : 0; + // Explicit NonNullable aliases so TypeScript can track field + // mutations inside the `forEach` closure below — otherwise it narrows + // the `| null` union to `never` after the first assignment. + type OutboundVideo = NonNullable; let video: WebRTCStats["video"] = null; let audio: WebRTCStats["audio"] = null; - let outboundVideo: WebRTCStats["outboundVideo"] = null; + let outboundVideo: OutboundVideo | null = null; const connection: WebRTCStats["connection"] = { currentRoundTripTime: null, availableOutgoingBitrate: null, @@ -176,21 +190,48 @@ export class WebRTCStatsCollector { } if (report.type === "outbound-rtp" && report.kind === "video") { + // Simulcast produces one outbound-rtp report per spatial layer + // (3 layers is common). Earlier versions picked whichever layer + // `forEach` visited last, which (a) underreports total outbound + // traffic and (b) causes bitrate to go violently negative across + // ticks because layer byte counters are independent and the "last + // visited" layer alternates. Accumulate byte/packet totals across + // every layer; pick scalar fields (resolution, fps, quality- + // limitation reason) from the highest-resolution layer so the + // reported frame size matches what's actually on the wire. const r = report as Record; const bytesSent = (r.bytesSent as number) ?? 0; - const outBitrate = elapsed > 0 ? ((bytesSent - this.prevBytesSentVideo) * 8) / elapsed : 0; - this.prevBytesSentVideo = bytesSent; - - outboundVideo = { - qualityLimitationReason: (r.qualityLimitationReason as string) ?? "none", - qualityLimitationDurations: (r.qualityLimitationDurations as Record) ?? {}, - bytesSent, - packetsSent: (r.packetsSent as number) ?? 0, - framesPerSecond: (r.framesPerSecond as number) ?? 0, - frameWidth: (r.frameWidth as number) ?? 0, - frameHeight: (r.frameHeight as number) ?? 0, - bitrate: Math.round(outBitrate), - }; + const packetsSent = (r.packetsSent as number) ?? 0; + const frameWidth = (r.frameWidth as number) ?? 0; + const frameHeight = (r.frameHeight as number) ?? 0; + const pixels = frameWidth * frameHeight; + + if (outboundVideo === null) { + outboundVideo = { + qualityLimitationReason: (r.qualityLimitationReason as string) ?? "none", + qualityLimitationDurations: (r.qualityLimitationDurations as Record) ?? {}, + bytesSent, + packetsSent, + framesPerSecond: (r.framesPerSecond as number) ?? 0, + frameWidth, + frameHeight, + bitrate: 0, + }; + } else { + outboundVideo.bytesSent += bytesSent; + outboundVideo.packetsSent += packetsSent; + // Promote scalar fields whenever a higher-resolution layer + // appears — we want reported resolution to match the largest + // active layer, not the lowest. + if (pixels > outboundVideo.frameWidth * outboundVideo.frameHeight) { + outboundVideo.frameWidth = frameWidth; + outboundVideo.frameHeight = frameHeight; + outboundVideo.framesPerSecond = (r.framesPerSecond as number) ?? 0; + outboundVideo.qualityLimitationReason = (r.qualityLimitationReason as string) ?? "none"; + outboundVideo.qualityLimitationDurations = + (r.qualityLimitationDurations as Record) ?? {}; + } + } } if (report.type === "inbound-rtp" && report.kind === "audio") { @@ -220,6 +261,24 @@ export class WebRTCStatsCollector { } }); + // Compute outbound video bitrate after the loop, now that we know + // the summed bytesSent across all simulcast layers. Doing it per- + // report would misattribute deltas to whichever layer came last. + // + // Cast via `unknown` because TypeScript can't track the non-null + // assignment inside the forEach closure above — flow analysis sees + // only the initial `let outboundVideo = null` and narrows to `never`. + const ov = outboundVideo as unknown as OutboundVideo | null; + if (ov !== null) { + const outBitrate = + elapsed > 0 ? ((ov.bytesSent - this.prevBytesSentVideo) * 8) / elapsed : 0; + // Clamp to zero: when tracks are added/removed mid-session (new + // simulcast layer, publisher swap) total bytesSent can transiently + // drop. Negative bitrate is nonsensical to downstream consumers. + ov.bitrate = Math.max(0, Math.round(outBitrate)); + this.prevBytesSentVideo = ov.bytesSent; + } + this.prevTimestamp = now; return { From 1eb2bbea297f3e1950a2908499db3481179fb73b Mon Sep 17 00:00:00 2001 From: nagar-decart Date: Wed, 22 Apr 2026 22:03:56 +0300 Subject: [PATCH 07/16] feat(realtime): restore selectedCandidatePairs in stats.connection MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Bench callers (and presumably other stats consumers) need to know which ICE candidate path the current session is using — relayed TURN vs direct UDP, the local/remote IPs and port, the transport protocol. That signal disappeared when an earlier refactor projected the parser's output down to just `currentRoundTripTime` + `availableOutgoingBitrate` on `connection`. Restore it: - `WebRTCStats.connection.selectedCandidatePairs: Array<{ local, remote }>` exposing { candidateType, address, port, protocol } per side. - Parser now collects `localCandidateId` / `remoteCandidateId` from succeeded candidate-pair reports and, after the main forEach, looks each ID up in rawStats to produce the resolved pair (rawStats entry order isn't guaranteed — the pair may appear before its referenced candidates). - Handles both the older `ip` and newer `address` fields on `local-candidate` / `remote-candidate` reports. Net effect: bench's `SdkStatsCollector.onStats` (which already defensively reads `stats.connection.selectedCandidatePairs`) will now populate `iceCandidate` for every session. Before this change, that field was always undefined under the SDK transport, so every bench run logged `iceCandidate: None` and diagnosing relay vs direct sessions was impossible. --- packages/sdk/src/realtime/webrtc-stats.ts | 62 +++++++++++++++++++++++ 1 file changed, 62 insertions(+) diff --git a/packages/sdk/src/realtime/webrtc-stats.ts b/packages/sdk/src/realtime/webrtc-stats.ts index ce25587e..bd96dcca 100644 --- a/packages/sdk/src/realtime/webrtc-stats.ts +++ b/packages/sdk/src/realtime/webrtc-stats.ts @@ -52,9 +52,32 @@ export type WebRTCStats = { currentRoundTripTime: number | null; /** Available outgoing bitrate estimate in bits/sec, or null if unavailable. */ availableOutgoingBitrate: number | null; + /** + * Selected ICE candidate pairs (usually one per PC). Populated from + * the `candidate-pair` report with state="succeeded" plus the matching + * `local-candidate` / `remote-candidate` lookups. Lets diagnostic tools + * tell direct-UDP sessions from TURN-relayed ones — the path affects + * jitter and failure modes, so this is essential signal for + * benchmarking and incident triage. + */ + selectedCandidatePairs: Array<{ + local: IceCandidateInfo; + remote: IceCandidateInfo; + }>; }; }; +/** One side of an ICE candidate pair (sender or receiver). */ +export type IceCandidateInfo = { + /** "host" | "srflx" | "prflx" | "relay" */ + candidateType: string; + /** IP (v4 or v6). May be `""` for mDNS-obfuscated host candidates. */ + address: string; + port: number; + /** "udp" | "tcp" */ + protocol: string; +}; + export type StatsOptions = { /** Polling interval in milliseconds. Default: 1000. Minimum: 500. */ intervalMs?: number; @@ -151,8 +174,16 @@ export class WebRTCStatsCollector { const connection: WebRTCStats["connection"] = { currentRoundTripTime: null, availableOutgoingBitrate: null, + selectedCandidatePairs: [], }; + // First pass — collect succeeded candidate-pair IDs. Resolving them + // into local/remote candidate objects happens after the main forEach + // so we have access to every report (ordering of rawStats is not + // guaranteed: a succeeded pair's local-candidate may appear before + // or after it). + const succeededPairs: Array<{ localId: string; remoteId: string }> = []; + rawStats.forEach((report) => { if (report.type === "inbound-rtp" && report.kind === "video") { const bytesReceived = ((report as Record).bytesReceived as number) ?? 0; @@ -257,10 +288,41 @@ export class WebRTCStatsCollector { if (r.state === "succeeded") { connection.currentRoundTripTime = (r.currentRoundTripTime as number) ?? null; connection.availableOutgoingBitrate = (r.availableOutgoingBitrate as number) ?? null; + const localId = r.localCandidateId as string | undefined; + const remoteId = r.remoteCandidateId as string | undefined; + if (localId && remoteId) { + succeededPairs.push({ localId, remoteId }); + } } } }); + // Resolve candidate IDs to their local/remote-candidate reports now + // that we've seen every entry in the rawStats map. `rawStats.get()` + // is O(1) on the spec-compliant Map, so per-pair resolution is cheap. + if (succeededPairs.length > 0) { + const toInfo = (id: string): IceCandidateInfo | null => { + const c = (rawStats as unknown as Map).get(id) as + | Record + | undefined; + if (!c) return null; + return { + // browsers may report `ip` (older spec) or `address` (newer). Prefer `address`. + candidateType: (c.candidateType as string) ?? "", + address: ((c.address as string) ?? (c.ip as string) ?? "") as string, + port: (c.port as number) ?? 0, + protocol: (c.protocol as string) ?? "", + }; + }; + for (const { localId, remoteId } of succeededPairs) { + const local = toInfo(localId); + const remote = toInfo(remoteId); + if (local && remote) { + connection.selectedCandidatePairs.push({ local, remote }); + } + } + } + // Compute outbound video bitrate after the loop, now that we know // the summed bytesSent across all simulcast layers. Doing it per- // report would misattribute deltas to whichever layer came last. From e286a420e485d62c57c6b557e96e4dcdaa9b4ac5 Mon Sep 17 00:00:00 2001 From: nagar-decart Date: Wed, 22 Apr 2026 22:11:15 +0300 Subject: [PATCH 08/16] feat(realtime): expand WebRTCStats with ~25 standard RTCStats fields MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Consumers (benchmark/observability) need the full set of fields that the WebRTC spec exposes via `RTCInboundRtpStreamStats` / `RTCOutboundRtpStreamStats` / `RemoteInboundRtpStreamStats`. The SDK's parser previously projected those down to a small curated set (bitrate, fps, jitter, freezes) and dropped everything diagnostic — so downstream code that tried to read e.g. `stats.video.avgJitterBufferMs` silently got undefined for months. Restored fields (inbound video): - framesReceived, keyFramesDecoded - nackCount, nackCountDelta, pliCount, firCount - avgDecodeTimeMs (totalDecodeTime / framesDecoded) - avgProcessingDelayMs (totalProcessingDelay / framesDecoded) - avgJitterBufferMs (jitterBufferDelay / jitterBufferEmittedCount) - avgInterFrameDelayMs (totalInterFrameDelay / framesDecoded) - interFrameDelayVarianceMs (σ from total+totalSquared — tells you how much the decoder's inter-frame arrival is jittering) - jitterBufferTargetDelayMs, jitterBufferMinimumDelayMs (current target vs minimum buffer depth — answers "is Chrome running a deep adaptive buffer?") - decoderImplementation Restored fields (outbound video): - targetBitrateKbps (BWE's current target — separate from the actual-bytes-sent-derived `bitrate` field) - avgEncodeTimeMs, avgPacketSendDelayMs, avgQp - nackCount, pliCount, firCount (received from remote — recovery request counters) - retransmittedBytesSent, retransmittedPacketsSent - encoderImplementation New block: - `remoteInbound { fractionLost, jitter, roundTripTime }` from the remote-inbound-rtp report. Tells you "what does the remote side think about its reception of our outbound" — independent of our own observations. Simulcast aggregation unchanged: the outbound-rtp block still accumulates per-spatial-layer totals for bytesSent/packetsSent/retransmit counters, picks scalar fields (resolution, fps, quality-limit, targetBitrate, avgEncodeTime, encoderImplementation) from the highest-resolution layer. All derived averages return null instead of 0 when the denominator is 0 (before any frames decode). Avoids the ambiguity of `avg = 0` meaning either "genuinely instant" or "no samples yet". Unblocks bench-side diagnosis of bimodal session behavior: the jitter buffer depth + inter-frame delay variance + targetBitrate signals, together, let you tell whether a bad session is running with a deep receive buffer, irregular decoder input timing, or a BWE that didn't adapt — each of which points to a different root cause. --- packages/sdk/src/realtime/webrtc-stats.ts | 187 +++++++++++++++++++++- 1 file changed, 185 insertions(+), 2 deletions(-) diff --git a/packages/sdk/src/realtime/webrtc-stats.ts b/packages/sdk/src/realtime/webrtc-stats.ts index bd96dcca..aacb5d6d 100644 --- a/packages/sdk/src/realtime/webrtc-stats.ts +++ b/packages/sdk/src/realtime/webrtc-stats.ts @@ -3,6 +3,8 @@ export type WebRTCStats = { video: { framesDecoded: number; framesDropped: number; + framesReceived: number; + keyFramesDecoded: number; framesPerSecond: number; frameWidth: number; frameHeight: number; @@ -22,6 +24,39 @@ export type WebRTCStats = { freezeCountDelta: number; /** Delta: freeze duration (seconds) since previous sample. */ freezeDurationDelta: number; + /** NACKs sent to the sender (requesting packet retransmission). */ + nackCount: number; + nackCountDelta: number; + /** PLIs sent to the sender (full frame retransmission request). */ + pliCount: number; + /** FIRs sent to the sender (forced intra-refresh request). */ + firCount: number; + /** + * Average decode time (ms/frame), cumulative since stream start. + * Derived from totalDecodeTime/framesDecoded. `null` if the browser + * hasn't produced the underlying counters yet. + */ + avgDecodeTimeMs: number | null; + /** Average jitter-buffer time (ms/frame emitted). Cumulative. */ + avgJitterBufferMs: number | null; + /** + * Average total processing delay (ms/frame decoded) — from network + * receive to decoder output. Cumulative. + */ + avgProcessingDelayMs: number | null; + /** Average inter-frame delay at the decoder (ms). */ + avgInterFrameDelayMs: number | null; + /** + * Std-dev of inter-frame delay (ms), computed from + * totalInterFrameDelay + totalSquaredInterFrameDelay. + */ + interFrameDelayVarianceMs: number | null; + /** Current target delay of the jitter buffer (ms). */ + jitterBufferTargetDelayMs: number | null; + /** Current minimum delay of the jitter buffer (ms). */ + jitterBufferMinimumDelayMs: number | null; + /** Which decoder the browser picked (e.g. "libvpx", "ExternalDecoder"). */ + decoderImplementation: string; } | null; audio: { bytesReceived: number; @@ -46,6 +81,37 @@ export type WebRTCStats = { frameHeight: number; /** Estimated outbound bitrate in bits/sec, computed from bytesSent delta. */ bitrate: number; + /** Encoder's current target bitrate in kbps (BWE output). */ + targetBitrateKbps: number | null; + /** Average encode time per frame (ms), cumulative. */ + avgEncodeTimeMs: number | null; + /** Average packet send delay (ms), cumulative. */ + avgPacketSendDelayMs: number | null; + /** Average quantization parameter across encoded frames (lower is better). */ + avgQp: number | null; + /** NACKs received from receiver (retransmission requests). */ + nackCount: number; + /** PLIs received from receiver. */ + pliCount: number; + /** FIRs received from receiver. */ + firCount: number; + retransmittedBytesSent: number; + retransmittedPacketsSent: number; + /** Which encoder the browser picked (e.g. "libvpx", "SimulcastEncoderAdapter"). */ + encoderImplementation: string; + } | null; + /** + * Remote-inbound stats — what the far end reports *about its reception + * of our outbound stream*. Answers "does the server think we're lossy?" + * independently of what we see locally. Populated from + * `remote-inbound-rtp` reports. + */ + remoteInbound: { + fractionLost: number | null; + /** In seconds. */ + jitter: number | null; + /** In seconds. Often more accurate than connection.currentRoundTripTime. */ + roundTripTime: number | null; } | null; connection: { /** Current round-trip time in seconds, or null if unavailable. */ @@ -109,6 +175,7 @@ export class WebRTCStatsCollector { private prevFreezeCount = 0; private prevFreezeDuration = 0; private prevPacketsLostAudio = 0; + private prevNackCountInbound = 0; private onStats: ((stats: WebRTCStats) => void) | null = null; private intervalMs: number; @@ -130,6 +197,7 @@ export class WebRTCStatsCollector { this.prevFreezeCount = 0; this.prevFreezeDuration = 0; this.prevPacketsLostAudio = 0; + this.prevNackCountInbound = 0; this.intervalId = setInterval(() => this.collect(), this.intervalMs); } @@ -171,6 +239,7 @@ export class WebRTCStatsCollector { let video: WebRTCStats["video"] = null; let audio: WebRTCStats["audio"] = null; let outboundVideo: OutboundVideo | null = null; + let remoteInbound: WebRTCStats["remoteInbound"] = null; const connection: WebRTCStats["connection"] = { currentRoundTripTime: null, availableOutgoingBitrate: null, @@ -195,10 +264,60 @@ export class WebRTCStatsCollector { const framesDropped = (r.framesDropped as number) ?? 0; const freezeCount = (r.freezeCount as number) ?? 0; const freezeDuration = (r.totalFreezesDuration as number) ?? 0; + const framesDecoded = (r.framesDecoded as number) ?? 0; + const nackCount = (r.nackCount as number) ?? 0; + + // Browser cumulative counters — averages below are + // (cumulativeTotal / denominator). `jitterBufferEmittedCount` is + // the canonical denominator per the WebRTC stats spec for the + // `jitterBuffer*` averages; `framesDecoded` for decode/processing + // averages. + const jbEmitted = (r.jitterBufferEmittedCount as number) ?? 0; + const totalDecodeTime = (r.totalDecodeTime as number) ?? 0; + const totalProcessingDelay = (r.totalProcessingDelay as number) ?? 0; + const totalInterFrameDelay = (r.totalInterFrameDelay as number) ?? 0; + const totalSquaredInterFrameDelay = (r.totalSquaredInterFrameDelay as number) ?? 0; + const jitterBufferDelay = (r.jitterBufferDelay as number) ?? 0; + const jitterBufferTargetDelay = (r.jitterBufferTargetDelay as number) ?? 0; + const jitterBufferMinimumDelay = (r.jitterBufferMinimumDelay as number) ?? 0; + + const avgDecodeTimeMs = framesDecoded > 0 + ? (totalDecodeTime / framesDecoded) * 1000 + : null; + const avgProcessingDelayMs = framesDecoded > 0 + ? (totalProcessingDelay / framesDecoded) * 1000 + : null; + const avgInterFrameDelayMs = framesDecoded > 0 + ? (totalInterFrameDelay / framesDecoded) * 1000 + : null; + // Variance σ² = E[X²] - E[X]² ; std-dev = sqrt(σ²). Report std-dev + // in ms — more actionable than variance for a threshold-based + // "is the path jittery" check. + const interFrameDelayVarianceMs = + framesDecoded > 0 + ? Math.sqrt( + Math.max( + 0, + totalSquaredInterFrameDelay / framesDecoded - + Math.pow(totalInterFrameDelay / framesDecoded, 2), + ), + ) * 1000 + : null; + const avgJitterBufferMs = jbEmitted > 0 + ? (jitterBufferDelay / jbEmitted) * 1000 + : null; + const jitterBufferTargetDelayMs = jbEmitted > 0 + ? (jitterBufferTargetDelay / jbEmitted) * 1000 + : null; + const jitterBufferMinimumDelayMs = jbEmitted > 0 + ? (jitterBufferMinimumDelay / jbEmitted) * 1000 + : null; video = { - framesDecoded: (r.framesDecoded as number) ?? 0, + framesDecoded, framesDropped, + framesReceived: (r.framesReceived as number) ?? 0, + keyFramesDecoded: (r.keyFramesDecoded as number) ?? 0, framesPerSecond: (r.framesPerSecond as number) ?? 0, frameWidth: (r.frameWidth as number) ?? 0, frameHeight: (r.frameHeight as number) ?? 0, @@ -213,11 +332,24 @@ export class WebRTCStatsCollector { framesDroppedDelta: Math.max(0, framesDropped - this.prevFramesDropped), freezeCountDelta: Math.max(0, freezeCount - this.prevFreezeCount), freezeDurationDelta: Math.max(0, freezeDuration - this.prevFreezeDuration), + nackCount, + nackCountDelta: Math.max(0, nackCount - this.prevNackCountInbound), + pliCount: (r.pliCount as number) ?? 0, + firCount: (r.firCount as number) ?? 0, + avgDecodeTimeMs, + avgJitterBufferMs, + avgProcessingDelayMs, + avgInterFrameDelayMs, + interFrameDelayVarianceMs, + jitterBufferTargetDelayMs, + jitterBufferMinimumDelayMs, + decoderImplementation: (r.decoderImplementation as string) ?? "", }; this.prevPacketsLostVideo = packetsLost; this.prevFramesDropped = framesDropped; this.prevFreezeCount = freezeCount; this.prevFreezeDuration = freezeDuration; + this.prevNackCountInbound = nackCount; } if (report.type === "outbound-rtp" && report.kind === "video") { @@ -236,6 +368,24 @@ export class WebRTCStatsCollector { const frameWidth = (r.frameWidth as number) ?? 0; const frameHeight = (r.frameHeight as number) ?? 0; const pixels = frameWidth * frameHeight; + const framesEncoded = (r.framesEncoded as number) ?? 0; + const totalEncodeTime = (r.totalEncodeTime as number) ?? 0; + const totalPacketSendDelay = (r.totalPacketSendDelay as number) ?? 0; + const qpSum = (r.qpSum as number) ?? 0; + const nackCount = (r.nackCount as number) ?? 0; + const pliCount = (r.pliCount as number) ?? 0; + const firCount = (r.firCount as number) ?? 0; + const retransmittedBytesSent = (r.retransmittedBytesSent as number) ?? 0; + const retransmittedPacketsSent = (r.retransmittedPacketsSent as number) ?? 0; + const targetBitrate = (r.targetBitrate as number | undefined) ?? null; + + const avgEncodeTimeMs = framesEncoded > 0 + ? (totalEncodeTime / framesEncoded) * 1000 + : null; + const avgPacketSendDelayMs = packetsSent > 0 + ? (totalPacketSendDelay / packetsSent) * 1000 + : null; + const avgQp = framesEncoded > 0 ? qpSum / framesEncoded : null; if (outboundVideo === null) { outboundVideo = { @@ -247,13 +397,30 @@ export class WebRTCStatsCollector { frameWidth, frameHeight, bitrate: 0, + targetBitrateKbps: targetBitrate != null ? Math.round(targetBitrate / 1000) : null, + avgEncodeTimeMs, + avgPacketSendDelayMs, + avgQp, + nackCount, + pliCount, + firCount, + retransmittedBytesSent, + retransmittedPacketsSent, + encoderImplementation: (r.encoderImplementation as string) ?? "", }; } else { outboundVideo.bytesSent += bytesSent; outboundVideo.packetsSent += packetsSent; + outboundVideo.nackCount += nackCount; + outboundVideo.pliCount += pliCount; + outboundVideo.firCount += firCount; + outboundVideo.retransmittedBytesSent += retransmittedBytesSent; + outboundVideo.retransmittedPacketsSent += retransmittedPacketsSent; // Promote scalar fields whenever a higher-resolution layer // appears — we want reported resolution to match the largest - // active layer, not the lowest. + // active layer, not the lowest. avgEncodeTime / targetBitrate / + // encoderImplementation are also most representative of the + // primary layer. if (pixels > outboundVideo.frameWidth * outboundVideo.frameHeight) { outboundVideo.frameWidth = frameWidth; outboundVideo.frameHeight = frameHeight; @@ -261,10 +428,25 @@ export class WebRTCStatsCollector { outboundVideo.qualityLimitationReason = (r.qualityLimitationReason as string) ?? "none"; outboundVideo.qualityLimitationDurations = (r.qualityLimitationDurations as Record) ?? {}; + outboundVideo.targetBitrateKbps = + targetBitrate != null ? Math.round(targetBitrate / 1000) : null; + outboundVideo.avgEncodeTimeMs = avgEncodeTimeMs; + outboundVideo.avgPacketSendDelayMs = avgPacketSendDelayMs; + outboundVideo.avgQp = avgQp; + outboundVideo.encoderImplementation = (r.encoderImplementation as string) ?? ""; } } } + if (report.type === "remote-inbound-rtp" && report.kind === "video") { + const r = report as Record; + remoteInbound = { + fractionLost: (r.fractionLost as number | undefined) ?? null, + jitter: (r.jitter as number | undefined) ?? null, + roundTripTime: (r.roundTripTime as number | undefined) ?? null, + }; + } + if (report.type === "inbound-rtp" && report.kind === "audio") { const bytesReceived = ((report as Record).bytesReceived as number) ?? 0; const bitrate = elapsed > 0 ? ((bytesReceived - this.prevBytesAudio) * 8) / elapsed : 0; @@ -349,6 +531,7 @@ export class WebRTCStatsCollector { audio, outboundVideo, connection, + remoteInbound, }; } } From 5be1d75438428ceeb71b3da93b45e73357775a8f Mon Sep 17 00:00:00 2001 From: nagar-decart Date: Thu, 23 Apr 2026 10:36:35 +0300 Subject: [PATCH 09/16] feat(realtime): expose livekit publishTrack overrides (simulcast + max bitrate) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Callers (benchmark tool) need per-session control over the client-side livekit publisher. The simulcast flag and maxBitrate directly affect how the SFU routes a client's uplink — until now both were hardcoded (simulcast=true, no explicit maxBitrate → Chrome BWE picks the rate). New `realtime.connect` options: - `livekitPublishSimulcast` — forwarded to `publishTrack(simulcast)` - `livekitPublishMaxBitrateKbps` — forwarded as `videoEncoding.maxBitrate` (kbps → bps) on publishTrack Plumbing: RealTimeClient schema → WebRTCManager.config → LiveKitCallbacks → publishTrack opts. Aiortc ignores both. Log: LiveKitConnection.joinRoom now emits a single info log with the effective publish config (simulcast + maxBitrate) at connect time so bench logs can be grepped to confirm the values that actually took effect. --- packages/sdk/src/realtime/client.ts | 6 ++++++ packages/sdk/src/realtime/transports/livekit.ts | 17 ++++++++++++++++- packages/sdk/src/realtime/webrtc-manager.ts | 15 ++++++++++++++- 3 files changed, 36 insertions(+), 2 deletions(-) diff --git a/packages/sdk/src/realtime/client.ts b/packages/sdk/src/realtime/client.ts index 8c3794bc..01fad694 100644 --- a/packages/sdk/src/realtime/client.ts +++ b/packages/sdk/src/realtime/client.ts @@ -103,6 +103,10 @@ const realTimeClientConnectOptionsSchema = z.object({ // inference pod must have livekit in TRANSPORTS_ENABLED or the session // will be rejected. transport: z.enum(["aiortc", "livekit"]).optional(), + // Client-side livekit publish overrides. Ignored on aiortc. Both + // default to the livekit-client defaults when omitted. + livekitPublishSimulcast: z.boolean().optional(), + livekitPublishMaxBitrateKbps: z.number().positive().optional(), }); export type RealTimeClientConnectOptions = Omit, "model"> & { model: ModelDefinition | CustomModelDefinition; @@ -221,6 +225,8 @@ export const createRealTimeClient = (opts: RealTimeClientOptions) => { initialImage, initialPrompt, transport: options.transport, + livekitPublishSimulcast: options.livekitPublishSimulcast, + livekitPublishMaxBitrateKbps: options.livekitPublishMaxBitrateKbps, }); const manager = webrtcManager; diff --git a/packages/sdk/src/realtime/transports/livekit.ts b/packages/sdk/src/realtime/transports/livekit.ts index d7cab6ee..0286bf74 100644 --- a/packages/sdk/src/realtime/transports/livekit.ts +++ b/packages/sdk/src/realtime/transports/livekit.ts @@ -53,6 +53,10 @@ interface LiveKitCallbacks { initialPrompt?: { text: string; enhance?: boolean }; logger?: Logger; onDiagnostic?: DiagnosticEmitter; + /** Override livekit-client `publishTrack` simulcast option. Defaults to true. */ + publishSimulcast?: boolean; + /** Explicit client-side uplink cap in kbps. Omit for Chrome BWE default. */ + publishMaxBitrateKbps?: number; } type WsMessageEvents = { @@ -357,11 +361,22 @@ export class LiveKitConnection { // Publish local tracks. Inference server expects a video track; audio is optional. if (this.localStream) { + const publishSimulcast = this.callbacks.publishSimulcast ?? true; + const maxBitrate = this.callbacks.publishMaxBitrateKbps != null + ? this.callbacks.publishMaxBitrateKbps * 1000 + : undefined; + this.logger.info("LiveKit client publish config", { + simulcast: publishSimulcast, + maxBitrate, + }); for (const track of this.localStream.getTracks()) { if (track.kind === "video") { await this.room.localParticipant.publishTrack(track, { - simulcast: true, + simulcast: publishSimulcast, source: Track.Source.Camera, + ...(maxBitrate != null + ? { videoEncoding: { maxBitrate, maxFramerate: 30 } } + : {}), }); } else { await this.room.localParticipant.publishTrack(track); diff --git a/packages/sdk/src/realtime/webrtc-manager.ts b/packages/sdk/src/realtime/webrtc-manager.ts index e7d251e9..8887764c 100644 --- a/packages/sdk/src/realtime/webrtc-manager.ts +++ b/packages/sdk/src/realtime/webrtc-manager.ts @@ -26,6 +26,15 @@ export interface WebRTCConfig { modelName?: string; initialImage?: string; initialPrompt?: { text: string; enhance?: boolean }; + /** + * Client-side publish options for the livekit transport. Ignored on + * aiortc. Forwarded to `LocalParticipant.publishTrack(...)` in the + * livekit transport. Useful for diagnostic/benchmark tooling — lets + * callers cap the client's uplink encoder or toggle simulcast without + * modifying SDK internals. + */ + livekitPublishSimulcast?: boolean; + livekitPublishMaxBitrateKbps?: number; /** * Selects the underlying WebRTC transport. Default is "aiortc" for * back-compat with existing deployments. Set to "livekit" to join a @@ -80,7 +89,11 @@ export class WebRTCManager { onDiagnostic: config.onDiagnostic, }; if (transport === "livekit") { - this.connection = new LiveKitConnection(sharedOpts); + this.connection = new LiveKitConnection({ + ...sharedOpts, + publishSimulcast: config.livekitPublishSimulcast, + publishMaxBitrateKbps: config.livekitPublishMaxBitrateKbps, + }); } else { this.connection = new WebRTCConnection({ ...sharedOpts, From 84e0a6f31102d45fcea6570b97ab518bf559bde1 Mon Sep 17 00:00:00 2001 From: nagar-decart Date: Thu, 23 Apr 2026 12:15:54 +0300 Subject: [PATCH 10/16] feat(realtime): default livekit publish cap to 2500 kbps; expose adaptiveStream + dynacast MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - publishMaxBitrateKbps: undefined → 2500 kbps (matches server default); pass `null` to explicitly opt out and let Chrome BWE run uncapped. Three-state semantic preserved end-to-end (zod schema → WebRTCManager → LiveKitConnection). - adaptiveStream + dynacast: new configurable callback fields on LiveKitConnection, plumbed through WebRTCConfig as `livekitAdaptiveStream` / `livekitDynacast`. Both still default to `false` (unchanged behavior). Primary consumer is webrtc-bench, which sweeps these for LiveKit quality experiments without forking the SDK. Co-Authored-By: Claude Opus 4.7 (1M context) --- packages/sdk/src/realtime/client.ts | 14 +++++-- .../sdk/src/realtime/transports/livekit.ts | 38 +++++++++++++++---- packages/sdk/src/realtime/webrtc-manager.ts | 16 +++++++- 3 files changed, 57 insertions(+), 11 deletions(-) diff --git a/packages/sdk/src/realtime/client.ts b/packages/sdk/src/realtime/client.ts index 01fad694..b9883451 100644 --- a/packages/sdk/src/realtime/client.ts +++ b/packages/sdk/src/realtime/client.ts @@ -103,10 +103,16 @@ const realTimeClientConnectOptionsSchema = z.object({ // inference pod must have livekit in TRANSPORTS_ENABLED or the session // will be rejected. transport: z.enum(["aiortc", "livekit"]).optional(), - // Client-side livekit publish overrides. Ignored on aiortc. Both - // default to the livekit-client defaults when omitted. + // Client-side livekit publish overrides. Ignored on aiortc. + // - `livekitPublishSimulcast` defaults to true (livekit-client default). + // - `livekitPublishMaxBitrateKbps` defaults to 2500. Pass `null` to + // opt out of the cap entirely (lets Chrome BWE run unclamped). + // - `livekitAdaptiveStream` / `livekitDynacast` default to `false` + // (matches historical shipped behavior). Exposed for the bench tool. livekitPublishSimulcast: z.boolean().optional(), - livekitPublishMaxBitrateKbps: z.number().positive().optional(), + livekitPublishMaxBitrateKbps: z.union([z.number().positive(), z.null()]).optional(), + livekitAdaptiveStream: z.boolean().optional(), + livekitDynacast: z.boolean().optional(), }); export type RealTimeClientConnectOptions = Omit, "model"> & { model: ModelDefinition | CustomModelDefinition; @@ -227,6 +233,8 @@ export const createRealTimeClient = (opts: RealTimeClientOptions) => { transport: options.transport, livekitPublishSimulcast: options.livekitPublishSimulcast, livekitPublishMaxBitrateKbps: options.livekitPublishMaxBitrateKbps, + livekitAdaptiveStream: options.livekitAdaptiveStream, + livekitDynacast: options.livekitDynacast, }); const manager = webrtcManager; diff --git a/packages/sdk/src/realtime/transports/livekit.ts b/packages/sdk/src/realtime/transports/livekit.ts index 0286bf74..42c74ebf 100644 --- a/packages/sdk/src/realtime/transports/livekit.ts +++ b/packages/sdk/src/realtime/transports/livekit.ts @@ -55,10 +55,26 @@ interface LiveKitCallbacks { onDiagnostic?: DiagnosticEmitter; /** Override livekit-client `publishTrack` simulcast option. Defaults to true. */ publishSimulcast?: boolean; - /** Explicit client-side uplink cap in kbps. Omit for Chrome BWE default. */ - publishMaxBitrateKbps?: number; + /** + * Client-side uplink cap in kbps. Defaults to 2500 to match the + * server-side publish cap (see inference_server/rt/livekit/conn.py). + * Pass `null` explicitly to omit the cap entirely and let Chrome BWE + * run uncapped. + */ + publishMaxBitrateKbps?: number | null; + /** + * livekit-client `Room` options. Both default to `false` — matches the + * current shipped behavior. Exposed primarily so the webrtc-bench tool + * can sweep these without forking the SDK. Enabling either in production + * changes quality/bandwidth trade-offs, so leave them off unless you + * know what you're doing. + */ + adaptiveStream?: boolean; + dynacast?: boolean; } +const DEFAULT_PUBLISH_MAX_BITRATE_KBPS = 2500; + type WsMessageEvents = { promptAck: PromptAckMessage; setImageAck: SetImageAckMessage; @@ -322,8 +338,8 @@ export class LiveKitConnection { private async joinRoom(info: RoomInfoMessage): Promise { this.room = new Room({ - adaptiveStream: false, - dynacast: false, + adaptiveStream: this.callbacks.adaptiveStream ?? false, + dynacast: this.callbacks.dynacast ?? false, }); this.room.on(RoomEvent.TrackSubscribed, (track: RemoteTrack, _pub: RemoteTrackPublication, _p: RemoteParticipant) => { @@ -362,12 +378,20 @@ export class LiveKitConnection { // Publish local tracks. Inference server expects a video track; audio is optional. if (this.localStream) { const publishSimulcast = this.callbacks.publishSimulcast ?? true; - const maxBitrate = this.callbacks.publishMaxBitrateKbps != null - ? this.callbacks.publishMaxBitrateKbps * 1000 - : undefined; + // Three-state resolution for the bitrate cap: + // undefined → apply the SDK default (2500 kbps) + // null → explicit opt-out, no cap (Chrome BWE runs unclamped) + // number → explicit kbps value + const configuredBitrateKbps = this.callbacks.publishMaxBitrateKbps; + const maxBitrate = + configuredBitrateKbps === null + ? undefined + : (configuredBitrateKbps ?? DEFAULT_PUBLISH_MAX_BITRATE_KBPS) * 1000; this.logger.info("LiveKit client publish config", { simulcast: publishSimulcast, maxBitrate, + adaptiveStream: this.callbacks.adaptiveStream ?? false, + dynacast: this.callbacks.dynacast ?? false, }); for (const track of this.localStream.getTracks()) { if (track.kind === "video") { diff --git a/packages/sdk/src/realtime/webrtc-manager.ts b/packages/sdk/src/realtime/webrtc-manager.ts index 8887764c..ad6b69ca 100644 --- a/packages/sdk/src/realtime/webrtc-manager.ts +++ b/packages/sdk/src/realtime/webrtc-manager.ts @@ -32,9 +32,21 @@ export interface WebRTCConfig { * livekit transport. Useful for diagnostic/benchmark tooling — lets * callers cap the client's uplink encoder or toggle simulcast without * modifying SDK internals. + * + * `livekitPublishMaxBitrateKbps`: undefined → SDK default (2500 kbps); + * `null` → explicit opt-out, no cap (let Chrome BWE run unclamped); + * a positive number → explicit kbps value. */ livekitPublishSimulcast?: boolean; - livekitPublishMaxBitrateKbps?: number; + livekitPublishMaxBitrateKbps?: number | null; + /** + * livekit-client `Room` options. Both default to `false`. Exposed for + * the bench tool; enabling either changes quality/bandwidth + * trade-offs, so leave them off in production unless you've verified + * the behavior end-to-end. + */ + livekitAdaptiveStream?: boolean; + livekitDynacast?: boolean; /** * Selects the underlying WebRTC transport. Default is "aiortc" for * back-compat with existing deployments. Set to "livekit" to join a @@ -93,6 +105,8 @@ export class WebRTCManager { ...sharedOpts, publishSimulcast: config.livekitPublishSimulcast, publishMaxBitrateKbps: config.livekitPublishMaxBitrateKbps, + adaptiveStream: config.livekitAdaptiveStream, + dynacast: config.livekitDynacast, }); } else { this.connection = new WebRTCConnection({ From 81489d0bfc42b0685f136c5923d9cc5d3489f757 Mon Sep 17 00:00:00 2001 From: Verion1 Date: Thu, 23 Apr 2026 14:27:16 +0300 Subject: [PATCH 11/16] fix(demo): mobile layout, video aspect frames; default LiveKit publish 3500 kbps Made-with: Cursor --- packages/sdk/index.html | 166 +++++++++++++----- .../sdk/src/realtime/transports/livekit.ts | 2 +- 2 files changed, 119 insertions(+), 49 deletions(-) diff --git a/packages/sdk/index.html b/packages/sdk/index.html index dc43acc5..cd237eb4 100644 --- a/packages/sdk/index.html +++ b/packages/sdk/index.html @@ -7,50 +7,68 @@ @@ -243,8 +303,10 @@

Configuration

- - +
+ + +
@@ -261,11 +323,15 @@

Configuration

📹 Local Camera

- +
+ +

🤖 Decart Output

- +
+ +
@@ -350,11 +416,15 @@

Video File Processing

diff --git a/packages/sdk/src/realtime/transports/livekit.ts b/packages/sdk/src/realtime/transports/livekit.ts index 42c74ebf..27b3aacd 100644 --- a/packages/sdk/src/realtime/transports/livekit.ts +++ b/packages/sdk/src/realtime/transports/livekit.ts @@ -73,7 +73,7 @@ interface LiveKitCallbacks { dynacast?: boolean; } -const DEFAULT_PUBLISH_MAX_BITRATE_KBPS = 2500; +const DEFAULT_PUBLISH_MAX_BITRATE_KBPS = 3500; type WsMessageEvents = { promptAck: PromptAckMessage; From 97bcee4e248d73ccfcad264602a4237a84750273 Mon Sep 17 00:00:00 2001 From: Verion1 Date: Thu, 23 Apr 2026 16:06:42 +0300 Subject: [PATCH 12/16] fix(demo): mobile portrait camera + enable info-level logging - Request camera in portrait (swapped w/h + facingMode: user) on mobile - Pass createConsoleLogger('info') so LiveKit logs actually print Made-with: Cursor --- packages/sdk/index.html | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/packages/sdk/index.html b/packages/sdk/index.html index cd237eb4..ddaf431a 100644 --- a/packages/sdk/index.html +++ b/packages/sdk/index.html @@ -436,7 +436,7 @@

Console Logs