Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
16 commits
Select commit Hold shift + click to select a range
ac1e2f0
Add livekit transport opt-in alongside aiortc
nagar-decart Apr 19, 2026
edb61bd
demo: add transport radio toggle + pass it to realtime.connect
nagar-decart Apr 19, 2026
6fcc5de
feat(realtime): emit serverMetrics event for both transports
nagar-decart Apr 20, 2026
64aea11
feat(realtime): emit markerConfig event for both transports
nagar-decart Apr 20, 2026
d44b124
feat(realtime): remove markerConfig event
nagar-decart Apr 20, 2026
33e8f5e
feat(realtime): stats event for livekit + simulcast-safe outbound bit…
nagar-decart Apr 21, 2026
1eb2bbe
feat(realtime): restore selectedCandidatePairs in stats.connection
nagar-decart Apr 22, 2026
e286a42
feat(realtime): expand WebRTCStats with ~25 standard RTCStats fields
nagar-decart Apr 22, 2026
5be1d75
feat(realtime): expose livekit publishTrack overrides (simulcast + ma…
nagar-decart Apr 23, 2026
84e0a6f
feat(realtime): default livekit publish cap to 2500 kbps; expose adap…
nagar-decart Apr 23, 2026
81489d0
fix(demo): mobile layout, video aspect frames; default LiveKit publis…
VerioN1 Apr 23, 2026
97bcee4
fix(demo): mobile portrait camera + enable info-level logging
VerioN1 Apr 23, 2026
1cf468c
fix(demo): mirror SDK logs in page; robust mobile portrait camera
VerioN1 Apr 23, 2026
dc475bb
fix(realtime,demo): reliable mobile detection + unconditional transpo…
VerioN1 Apr 23, 2026
ebf477b
style(demo): pink background for video sections
VerioN1 Apr 23, 2026
2876cb3
feat(realtime): add client-side livekit publishTrack knobs (codec, ma…
nagar-decart Apr 23, 2026
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
265 changes: 209 additions & 56 deletions packages/sdk/index.html

Large diffs are not rendered by default.

1 change: 1 addition & 0 deletions packages/sdk/package.json
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,7 @@
"vitest": "^4.0.18"
},
"dependencies": {
"livekit-client": "^2.0.0",
"mitt": "^3.0.1",
"p-retry": "^6.2.1",
"zod": "^4.0.17"
Expand Down
92 changes: 79 additions & 13 deletions packages/sdk/src/realtime/client.ts
Original file line number Diff line number Diff line change
Expand Up @@ -15,9 +15,14 @@ import {
type SubscribeOptions,
} from "./subscribe-client";
import { type ITelemetryReporter, NullTelemetryReporter, TelemetryReporter } from "./telemetry-reporter";
import type { ConnectionState, GenerationTickMessage, SessionIdMessage } from "./types";
import type {
ConnectionState,
GenerationTickMessage,
ServerMetricsMessage,
SessionIdMessage,
} from "./types";
import { WebRTCManager } from "./webrtc-manager";
import { type WebRTCStats, WebRTCStatsCollector } from "./webrtc-stats";
import { type StatsProvider, type WebRTCStats, WebRTCStatsCollector } from "./webrtc-stats";

async function blobToBase64(blob: Blob): Promise<string> {
return new Promise((resolve, reject) => {
Expand Down Expand Up @@ -93,6 +98,26 @@ const realTimeClientConnectOptionsSchema = z.object({
}),
initialState: realTimeClientInitialStateSchema.optional(),
customizeOffer: createAsyncFunctionSchema(z.function()).optional(),
// Opt-in per-session WebRTC transport. Defaults to "aiortc" (current
// shipping behavior). Set to "livekit" to join a LiveKit SFU room; the
// inference pod must have livekit in TRANSPORTS_ENABLED or the session
// will be rejected.
transport: z.enum(["aiortc", "livekit"]).optional(),
// Client-side livekit publish overrides. Ignored on aiortc.
// - `livekitPublishSimulcast` defaults to true (livekit-client default).
// - `livekitPublishMaxBitrateKbps` defaults to 2500. Pass `null` to
// opt out of the cap entirely (lets Chrome BWE run unclamped).
// - `livekitAdaptiveStream` / `livekitDynacast` default to `false`
// (matches historical shipped behavior). Exposed for the bench tool.
livekitPublishSimulcast: z.boolean().optional(),
livekitPublishMaxBitrateKbps: z.union([z.number().positive(), z.null()]).optional(),
livekitAdaptiveStream: z.boolean().optional(),
livekitDynacast: z.boolean().optional(),
// Client-side publishTrack knobs: pin a codec (symmetric with
// server), override the 30 fps default, choose degradation behavior.
livekitPublishCodec: z.enum(["vp8", "vp9", "h264", "av1"]).optional(),
livekitPublishMaxFramerate: z.number().positive().optional(),
livekitDegradationPreference: z.enum(["balanced", "maintain-framerate", "maintain-resolution"]).optional(),
});
export type RealTimeClientConnectOptions = Omit<z.infer<typeof realTimeClientConnectOptionsSchema>, "model"> & {
model: ModelDefinition | CustomModelDefinition;
Expand All @@ -104,6 +129,12 @@ export type Events = {
generationTick: { seconds: number };
diagnostic: DiagnosticEvent;
stats: WebRTCStats;
/**
* Optional server-side per-session metrics. Emitted only when the client
* connects with `?emit_server_metrics=1` on the realtime URL (consumed by
* the webrtc-bench tool). Normal SDK consumers can ignore this event.
*/
serverMetrics: ServerMetricsMessage;
};

export type RealTimeClient = {
Expand Down Expand Up @@ -167,12 +198,22 @@ export const createRealTimeClient = (opts: RealTimeClientOptions) => {
}
: undefined;

const url = `${baseUrl}${options.model.urlPath}`;
// Preserve any query string already present on baseUrl (e.g. the
// webrtc-bench tool appends opt-in flags like ?emit_server_metrics=1)
// by splitting it off before prepending the model path, then merging
// with the SDK's own params. Without this the final URL ended up as
// `baseUrl?X=Y/v1/stream?api_key=...` — two `?` separators, which
// every WS server rejects with 404.
const baseQueryIdx = baseUrl.indexOf("?");
const baseOrigin = baseQueryIdx === -1 ? baseUrl : baseUrl.slice(0, baseQueryIdx);
const baseExtraQuery = baseQueryIdx === -1 ? "" : baseUrl.slice(baseQueryIdx + 1);
const url = `${baseOrigin}${options.model.urlPath}`;
const extraQueryPrefix = baseExtraQuery ? `${baseExtraQuery}&` : "";

const { emitter: eventEmitter, emitOrBuffer, flush, stop } = createEventBuffer<Events>();

webrtcManager = new WebRTCManager({
webrtcUrl: `${url}?api_key=${encodeURIComponent(apiKey)}&model=${encodeURIComponent(options.model.name)}`,
webrtcUrl: `${url}?${extraQueryPrefix}api_key=${encodeURIComponent(apiKey)}&model=${encodeURIComponent(options.model.name)}`,
integration,
logger,
onDiagnostic: (name, data) => {
Expand All @@ -194,6 +235,14 @@ export const createRealTimeClient = (opts: RealTimeClientOptions) => {
modelName: options.model.name,
initialImage,
initialPrompt,
transport: options.transport,
livekitPublishSimulcast: options.livekitPublishSimulcast,
livekitPublishMaxBitrateKbps: options.livekitPublishMaxBitrateKbps,
livekitAdaptiveStream: options.livekitAdaptiveStream,
livekitDynacast: options.livekitDynacast,
livekitPublishCodec: options.livekitPublishCodec,
livekitPublishMaxFramerate: options.livekitPublishMaxFramerate,
livekitDegradationPreference: options.livekitDegradationPreference,
});

const manager = webrtcManager;
Expand Down Expand Up @@ -258,12 +307,19 @@ export const createRealTimeClient = (opts: RealTimeClientOptions) => {
};
manager.getWebsocketMessageEmitter().on("generationTick", tickListener);

// Opt-in server-side metrics — fans WebRTCConnection + LiveKitConnection
// emissions out onto the public RealTimeClient.on("serverMetrics", …).
const serverMetricsListener = (msg: ServerMetricsMessage) => {
emitOrBuffer("serverMetrics", msg);
};
manager.getWebsocketMessageEmitter().on("serverMetrics", serverMetricsListener);

await manager.connect(inputStream);

const methods = realtimeMethods(manager, imageToBase64);

let statsCollector: WebRTCStatsCollector | null = null;
let statsCollectorPeerConnection: RTCPeerConnection | null = null;
let statsCollectorSource: StatsProvider | null = null;

// Video stall detection state (Twilio pattern: fps < 0.5 = stalled)
const STALL_FPS_THRESHOLD = 0.5;
Expand All @@ -275,10 +331,13 @@ export const createRealTimeClient = (opts: RealTimeClientOptions) => {
videoStalled = false;
stallStartMs = 0;
statsCollector = new WebRTCStatsCollector();
const pc = manager.getPeerConnection();
statsCollectorPeerConnection = pc;
if (pc) {
statsCollector.start(pc, (stats) => {
// For aiortc this is the raw RTCPeerConnection; for livekit it's
// an aggregator over the Room's tracks. Either way the collector
// just calls `.getStats()` and parses the returned report.
const source = manager.getStatsProvider();
statsCollectorSource = source;
if (source) {
statsCollector.start(source, (stats) => {
emitOrBuffer("stats", stats);
telemetryReporter.addStats(stats);

Expand All @@ -300,7 +359,7 @@ export const createRealTimeClient = (opts: RealTimeClientOptions) => {
return () => {
statsCollector?.stop();
statsCollector = null;
statsCollectorPeerConnection = null;
statsCollectorSource = null;
};
};

Expand All @@ -313,8 +372,8 @@ export const createRealTimeClient = (opts: RealTimeClientOptions) => {
return;
}

const peerConnection = manager.getPeerConnection();
if (!peerConnection || peerConnection === statsCollectorPeerConnection) {
const source = manager.getStatsProvider();
if (!source || source === statsCollectorSource) {
return;
}

Expand Down Expand Up @@ -376,7 +435,14 @@ export const createRealTimeClient = (opts: RealTimeClientOptions) => {

const subscribe = async (options: SubscribeOptions): Promise<RealTimeSubscribeClient> => {
const { sid, ip, port } = decodeSubscribeToken(options.token);
const subscribeUrl = `${baseUrl}/subscribe/${encodeURIComponent(sid)}?IP=${encodeURIComponent(ip)}&port=${encodeURIComponent(port)}&api_key=${encodeURIComponent(apiKey)}`;
// Same baseUrl-with-query handling as the connect() path above — split
// existing query off before appending the /subscribe/ path so we end up
// with a single `?` separator.
const subQueryIdx = baseUrl.indexOf("?");
const subBaseOrigin = subQueryIdx === -1 ? baseUrl : baseUrl.slice(0, subQueryIdx);
const subBaseExtraQuery = subQueryIdx === -1 ? "" : baseUrl.slice(subQueryIdx + 1);
const subExtraQueryPrefix = subBaseExtraQuery ? `${subBaseExtraQuery}&` : "";
const subscribeUrl = `${subBaseOrigin}/subscribe/${encodeURIComponent(sid)}?${subExtraQueryPrefix}IP=${encodeURIComponent(ip)}&port=${encodeURIComponent(port)}&api_key=${encodeURIComponent(apiKey)}`;

const { emitter: eventEmitter, emitOrBuffer, flush, stop } = createEventBuffer<SubscribeEvents>();

Expand Down
21 changes: 21 additions & 0 deletions packages/sdk/src/realtime/transports/index.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
/**
* Transport dispatch for the realtime SDK.
*
* Today the SDK ships two transports:
* - aiortc (default, shipping): raw RTCPeerConnection + WebSocket signaling.
* The inference server handles media via aiortc server-side.
* - livekit (opt-in): joins a LiveKit SFU room; the inference server
* publishes/subscribes in the same room.
*
* Both transports talk to bouncer via the same WS URL. The transport-specific
* difference is only in the media setup handshake (SDP offer vs. room_info)
* and which media stack moves frames. Control messages (prompt, set_image,
* session_id, generation_tick, acks) flow over the bouncer WS for both.
*/

export type TransportKind = "aiortc" | "livekit";

// Re-export the two concrete connections via a named surface so consumers
// (WebRTCManager) don't need to import from each file individually.
export { WebRTCConnection as AiortcConnection } from "../webrtc-connection";
export { LiveKitConnection } from "./livekit";
Loading
Loading