From e768e530249999773aae14769ac23a3ab8509796 Mon Sep 17 00:00:00 2001 From: Eric Allam Date: Thu, 7 May 2026 12:35:27 +0100 Subject: [PATCH 1/4] feat(webapp): reload LLM pricing registry on Redis pub/sub Subscribe to LLM_PRICING_RELOAD_CHANNEL on the worker Redis. Any process that publishes on the channel triggers an immediate reload of the in-memory model registry. The 5-minute periodic reload stays as a backstop. Lets pricing and model changes propagate to the live registry within seconds instead of up to 5 minutes. --- .../llm-pricing-registry-reload-channel.md | 6 ++++ apps/webapp/app/env.server.ts | 1 + .../app/v3/llmPricingRegistry.server.ts | 35 ++++++++++++++++++- 3 files changed, 41 insertions(+), 1 deletion(-) create mode 100644 .server-changes/llm-pricing-registry-reload-channel.md diff --git a/.server-changes/llm-pricing-registry-reload-channel.md b/.server-changes/llm-pricing-registry-reload-channel.md new file mode 100644 index 00000000000..ec1daad0a31 --- /dev/null +++ b/.server-changes/llm-pricing-registry-reload-channel.md @@ -0,0 +1,6 @@ +--- +area: webapp +type: improvement +--- + +The LLM pricing registry now reloads from the database whenever a publish lands on `LLM_PRICING_RELOAD_CHANNEL` on the worker Redis, instead of waiting for the next 5-minute interval. LLM model and pricing changes reflect in cost enrichment within seconds. diff --git a/apps/webapp/app/env.server.ts b/apps/webapp/app/env.server.ts index 13e9e5dacbd..f270c9037b2 100644 --- a/apps/webapp/app/env.server.ts +++ b/apps/webapp/app/env.server.ts @@ -1424,6 +1424,7 @@ const EnvironmentSchema = z // LLM cost tracking LLM_COST_TRACKING_ENABLED: BoolEnv.default(true), LLM_PRICING_RELOAD_INTERVAL_MS: z.coerce.number().int().default(5 * 60 * 1000), // 5 minutes + LLM_PRICING_RELOAD_CHANNEL: z.string().default("llm-registry:reload"), LLM_PRICING_SEED_ON_STARTUP: BoolEnv.default(false), LLM_PRICING_READY_TIMEOUT_MS: z.coerce.number().int().default(500), LLM_METRICS_BATCH_SIZE: z.coerce.number().int().default(5000), diff --git a/apps/webapp/app/v3/llmPricingRegistry.server.ts b/apps/webapp/app/v3/llmPricingRegistry.server.ts index 2212c41779d..4931cb30c0f 100644 --- a/apps/webapp/app/v3/llmPricingRegistry.server.ts +++ b/apps/webapp/app/v3/llmPricingRegistry.server.ts @@ -1,7 +1,9 @@ import { ModelPricingRegistry, seedLlmPricing } from "@internal/llm-model-catalog"; import { prisma, $replica } from "~/db.server"; import { env } from "~/env.server"; +import { logger } from "~/services/logger.server"; import { signalsEmitter } from "~/services/signals.server"; +import { createRedisClient } from "~/redis.server"; import { singleton } from "~/utils/singleton"; import { setLlmPricingRegistry } from "./utils/enrichCreatableEvents.server"; @@ -27,7 +29,7 @@ export const llmPricingRegistry = singleton("llmPricingRegistry", () => { console.error("Failed to initialize LLM pricing registry", err); }); - // Periodic reload + // Periodic reload (backstop for the pub/sub path below) const reloadInterval = env.LLM_PRICING_RELOAD_INTERVAL_MS; const interval = setInterval(() => { registry.reload().catch((err) => { @@ -35,11 +37,42 @@ export const llmPricingRegistry = singleton("llmPricingRegistry", () => { }); }, reloadInterval); + // Pub/sub reload — billing's LLM registry worker publishes on this channel + // immediately after writing new/changed model rows, so all webapp pods see + // updates within ~1s instead of waiting for the next interval tick. + const subscriber = createRedisClient("llm-pricing:subscriber", { + keyPrefix: "llm-pricing:subscriber:", + host: env.COMMON_WORKER_REDIS_HOST, + port: env.COMMON_WORKER_REDIS_PORT, + username: env.COMMON_WORKER_REDIS_USERNAME, + password: env.COMMON_WORKER_REDIS_PASSWORD, + tlsDisabled: env.COMMON_WORKER_REDIS_TLS_DISABLED === "true", + clusterMode: env.COMMON_WORKER_REDIS_CLUSTER_MODE_ENABLED === "1", + }); + + subscriber.subscribe(env.LLM_PRICING_RELOAD_CHANNEL).catch((err) => { + logger.warn("Failed to subscribe to LLM pricing reload channel", { + channel: env.LLM_PRICING_RELOAD_CHANNEL, + error: err instanceof Error ? err.message : String(err), + }); + }); + + subscriber.on("message", (channel) => { + if (channel !== env.LLM_PRICING_RELOAD_CHANNEL) return; + registry.reload().catch((err) => { + logger.warn("Failed to reload LLM pricing registry from pub/sub", { + error: err instanceof Error ? err.message : String(err), + }); + }); + }); + signalsEmitter.on("SIGTERM", () => { clearInterval(interval); + void subscriber.quit().catch(() => {}); }); signalsEmitter.on("SIGINT", () => { clearInterval(interval); + void subscriber.quit().catch(() => {}); }); return registry; From 169958206e2feb550661cec0539845a03a64fd35 Mon Sep 17 00:00:00 2001 From: Eric Allam Date: Thu, 7 May 2026 13:16:35 +0100 Subject: [PATCH 2/4] feat(webapp): debounce LLM pricing registry reloads Coalesce reload calls from the pub/sub subscriber so a burst of publishes only triggers one reload. The first publish in a window schedules a reload at T+LLM_PRICING_RELOAD_DEBOUNCE_MS (default 1s); subsequent publishes during that window are no-ops because the trailing reload will pick up everything when it queries the DB. Bounds reload rate to at most 1 per debounce window regardless of publisher chattiness, so a runaway upstream publisher can't fan out into a flood of full-table-scan reloads across every webapp pod. --- apps/webapp/app/env.server.ts | 1 + .../app/v3/llmPricingRegistry.server.ts | 29 +++++++++++++++---- 2 files changed, 25 insertions(+), 5 deletions(-) diff --git a/apps/webapp/app/env.server.ts b/apps/webapp/app/env.server.ts index f270c9037b2..2ddac2709db 100644 --- a/apps/webapp/app/env.server.ts +++ b/apps/webapp/app/env.server.ts @@ -1425,6 +1425,7 @@ const EnvironmentSchema = z LLM_COST_TRACKING_ENABLED: BoolEnv.default(true), LLM_PRICING_RELOAD_INTERVAL_MS: z.coerce.number().int().default(5 * 60 * 1000), // 5 minutes LLM_PRICING_RELOAD_CHANNEL: z.string().default("llm-registry:reload"), + LLM_PRICING_RELOAD_DEBOUNCE_MS: z.coerce.number().int().default(1000), LLM_PRICING_SEED_ON_STARTUP: BoolEnv.default(false), LLM_PRICING_READY_TIMEOUT_MS: z.coerce.number().int().default(500), LLM_METRICS_BATCH_SIZE: z.coerce.number().int().default(5000), diff --git a/apps/webapp/app/v3/llmPricingRegistry.server.ts b/apps/webapp/app/v3/llmPricingRegistry.server.ts index 4931cb30c0f..31afd48a088 100644 --- a/apps/webapp/app/v3/llmPricingRegistry.server.ts +++ b/apps/webapp/app/v3/llmPricingRegistry.server.ts @@ -57,21 +57,40 @@ export const llmPricingRegistry = singleton("llmPricingRegistry", () => { }); }); + // Coalesce reload calls so a burst of publishes only triggers one reload. + // A reload always fires within LLM_PRICING_RELOAD_DEBOUNCE_MS of the first + // publish in a burst; subsequent publishes during that window are no-ops + // because the trailing-edge reload will pick up everything when it queries + // the DB. Bounds reload rate to at most 1 / debounce-window regardless of + // how chatty the publisher is. + const debounceMs = env.LLM_PRICING_RELOAD_DEBOUNCE_MS; + let pendingReloadTimer: NodeJS.Timeout | null = null; + + function scheduleReload() { + if (pendingReloadTimer) return; + pendingReloadTimer = setTimeout(() => { + pendingReloadTimer = null; + registry.reload().catch((err) => { + logger.warn("Failed to reload LLM pricing registry from pub/sub", { + error: err instanceof Error ? err.message : String(err), + }); + }); + }, debounceMs); + } + subscriber.on("message", (channel) => { if (channel !== env.LLM_PRICING_RELOAD_CHANNEL) return; - registry.reload().catch((err) => { - logger.warn("Failed to reload LLM pricing registry from pub/sub", { - error: err instanceof Error ? err.message : String(err), - }); - }); + scheduleReload(); }); signalsEmitter.on("SIGTERM", () => { clearInterval(interval); + if (pendingReloadTimer) clearTimeout(pendingReloadTimer); void subscriber.quit().catch(() => {}); }); signalsEmitter.on("SIGINT", () => { clearInterval(interval); + if (pendingReloadTimer) clearTimeout(pendingReloadTimer); void subscriber.quit().catch(() => {}); }); From 1381c9d4e7509b4acc1de15ec37add379eb5f4db Mon Sep 17 00:00:00 2001 From: Eric Allam Date: Thu, 7 May 2026 15:47:49 +0100 Subject: [PATCH 3/4] feat(webapp): make LLM pricing pub/sub subscription opt-in per process MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Subscribing every replica to the reload channel — admin dashboards, workers, anything that imports the registry — fans out a full-table reload across processes that don't actually need real-time pricing freshness. The 5-minute interval is enough for those. Add LLM_PRICING_RELOAD_PUBSUB_ENABLED (default true). Set false on non-OTel services in multi-service deployments so only the span-ingesting processes subscribe and reload on publish. Default-true preserves current behavior for single-service self-hosted deployments without any env tuning. --- apps/webapp/app/env.server.ts | 6 + .../app/v3/llmPricingRegistry.server.ts | 107 ++++++++++-------- 2 files changed, 63 insertions(+), 50 deletions(-) diff --git a/apps/webapp/app/env.server.ts b/apps/webapp/app/env.server.ts index 2ddac2709db..487361f936c 100644 --- a/apps/webapp/app/env.server.ts +++ b/apps/webapp/app/env.server.ts @@ -1426,6 +1426,12 @@ const EnvironmentSchema = z LLM_PRICING_RELOAD_INTERVAL_MS: z.coerce.number().int().default(5 * 60 * 1000), // 5 minutes LLM_PRICING_RELOAD_CHANNEL: z.string().default("llm-registry:reload"), LLM_PRICING_RELOAD_DEBOUNCE_MS: z.coerce.number().int().default(1000), + // Whether to subscribe this process to the LLM_PRICING_RELOAD_CHANNEL. + // Defaults to true so single-service self-hosted deployments work without + // tuning. In multi-service deployments, set this to false on services + // that don't ingest spans (dashboard, workers) — only the OTel-ingesting + // services need the registry to reload in real time. + LLM_PRICING_RELOAD_PUBSUB_ENABLED: BoolEnv.default(true), LLM_PRICING_SEED_ON_STARTUP: BoolEnv.default(false), LLM_PRICING_READY_TIMEOUT_MS: z.coerce.number().int().default(500), LLM_METRICS_BATCH_SIZE: z.coerce.number().int().default(5000), diff --git a/apps/webapp/app/v3/llmPricingRegistry.server.ts b/apps/webapp/app/v3/llmPricingRegistry.server.ts index 31afd48a088..9b82882fadf 100644 --- a/apps/webapp/app/v3/llmPricingRegistry.server.ts +++ b/apps/webapp/app/v3/llmPricingRegistry.server.ts @@ -37,62 +37,69 @@ export const llmPricingRegistry = singleton("llmPricingRegistry", () => { }); }, reloadInterval); - // Pub/sub reload — billing's LLM registry worker publishes on this channel - // immediately after writing new/changed model rows, so all webapp pods see - // updates within ~1s instead of waiting for the next interval tick. - const subscriber = createRedisClient("llm-pricing:subscriber", { - keyPrefix: "llm-pricing:subscriber:", - host: env.COMMON_WORKER_REDIS_HOST, - port: env.COMMON_WORKER_REDIS_PORT, - username: env.COMMON_WORKER_REDIS_USERNAME, - password: env.COMMON_WORKER_REDIS_PASSWORD, - tlsDisabled: env.COMMON_WORKER_REDIS_TLS_DISABLED === "true", - clusterMode: env.COMMON_WORKER_REDIS_CLUSTER_MODE_ENABLED === "1", - }); + // Pub/sub reload is opt-in per process. Without it, the registry stays + // accurate via the existing 5-minute interval. In multi-service deployments + // we only want the OTel-ingesting services subscribed — the dashboard and + // worker services don't need real-time pricing freshness and shouldn't pile + // onto each publish with a full-table reload. + if (env.LLM_PRICING_RELOAD_PUBSUB_ENABLED) { + const subscriber = createRedisClient("llm-pricing:subscriber", { + keyPrefix: "llm-pricing:subscriber:", + host: env.COMMON_WORKER_REDIS_HOST, + port: env.COMMON_WORKER_REDIS_PORT, + username: env.COMMON_WORKER_REDIS_USERNAME, + password: env.COMMON_WORKER_REDIS_PASSWORD, + tlsDisabled: env.COMMON_WORKER_REDIS_TLS_DISABLED === "true", + clusterMode: env.COMMON_WORKER_REDIS_CLUSTER_MODE_ENABLED === "1", + }); - subscriber.subscribe(env.LLM_PRICING_RELOAD_CHANNEL).catch((err) => { - logger.warn("Failed to subscribe to LLM pricing reload channel", { - channel: env.LLM_PRICING_RELOAD_CHANNEL, - error: err instanceof Error ? err.message : String(err), + subscriber.subscribe(env.LLM_PRICING_RELOAD_CHANNEL).catch((err) => { + logger.warn("Failed to subscribe to LLM pricing reload channel", { + channel: env.LLM_PRICING_RELOAD_CHANNEL, + error: err instanceof Error ? err.message : String(err), + }); }); - }); - // Coalesce reload calls so a burst of publishes only triggers one reload. - // A reload always fires within LLM_PRICING_RELOAD_DEBOUNCE_MS of the first - // publish in a burst; subsequent publishes during that window are no-ops - // because the trailing-edge reload will pick up everything when it queries - // the DB. Bounds reload rate to at most 1 / debounce-window regardless of - // how chatty the publisher is. - const debounceMs = env.LLM_PRICING_RELOAD_DEBOUNCE_MS; - let pendingReloadTimer: NodeJS.Timeout | null = null; - - function scheduleReload() { - if (pendingReloadTimer) return; - pendingReloadTimer = setTimeout(() => { - pendingReloadTimer = null; - registry.reload().catch((err) => { - logger.warn("Failed to reload LLM pricing registry from pub/sub", { - error: err instanceof Error ? err.message : String(err), + // Coalesce reload calls so a burst of publishes only triggers one + // reload. The first publish schedules a reload at + // T+LLM_PRICING_RELOAD_DEBOUNCE_MS; subsequent publishes during that + // window are no-ops because the trailing reload picks up everything + // when it queries the DB. Bounds reload rate to at most 1 per debounce + // window regardless of publisher chattiness. + const debounceMs = env.LLM_PRICING_RELOAD_DEBOUNCE_MS; + let pendingReloadTimer: NodeJS.Timeout | null = null; + + function scheduleReload() { + if (pendingReloadTimer) return; + pendingReloadTimer = setTimeout(() => { + pendingReloadTimer = null; + registry.reload().catch((err) => { + logger.warn("Failed to reload LLM pricing registry from pub/sub", { + error: err instanceof Error ? err.message : String(err), + }); }); - }); - }, debounceMs); - } + }, debounceMs); + } - subscriber.on("message", (channel) => { - if (channel !== env.LLM_PRICING_RELOAD_CHANNEL) return; - scheduleReload(); - }); + subscriber.on("message", (channel) => { + if (channel !== env.LLM_PRICING_RELOAD_CHANNEL) return; + scheduleReload(); + }); - signalsEmitter.on("SIGTERM", () => { - clearInterval(interval); - if (pendingReloadTimer) clearTimeout(pendingReloadTimer); - void subscriber.quit().catch(() => {}); - }); - signalsEmitter.on("SIGINT", () => { - clearInterval(interval); - if (pendingReloadTimer) clearTimeout(pendingReloadTimer); - void subscriber.quit().catch(() => {}); - }); + signalsEmitter.on("SIGTERM", () => { + clearInterval(interval); + if (pendingReloadTimer) clearTimeout(pendingReloadTimer); + void subscriber.quit().catch(() => {}); + }); + signalsEmitter.on("SIGINT", () => { + clearInterval(interval); + if (pendingReloadTimer) clearTimeout(pendingReloadTimer); + void subscriber.quit().catch(() => {}); + }); + } else { + signalsEmitter.on("SIGTERM", () => clearInterval(interval)); + signalsEmitter.on("SIGINT", () => clearInterval(interval)); + } return registry; }); From 0985da35d9aa1b163e6e30488cd86cebbc0cf5e5 Mon Sep 17 00:00:00 2001 From: Eric Allam Date: Thu, 7 May 2026 15:51:28 +0100 Subject: [PATCH 4/4] fix(webapp): default LLM_PRICING_RELOAD_PUBSUB_ENABLED to off MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Most processes that import the registry (dashboard, workers, single- service self-hosted webapp) don't actually need real-time pricing freshness — the existing 5-minute interval is fine. Only OTel-ingesting services where pricing directly affects span cost enrichment need to subscribe. Default off, opt-in on the span-ingesting services in multi-service deployments. Self-hosters running a single webapp can flip it on if they want sub-second freshness, but the default keeps reload load off processes that don't benefit from it. --- apps/webapp/app/env.server.ts | 10 +++++----- apps/webapp/app/v3/llmPricingRegistry.server.ts | 10 +++++----- 2 files changed, 10 insertions(+), 10 deletions(-) diff --git a/apps/webapp/app/env.server.ts b/apps/webapp/app/env.server.ts index 487361f936c..6b58896429d 100644 --- a/apps/webapp/app/env.server.ts +++ b/apps/webapp/app/env.server.ts @@ -1427,11 +1427,11 @@ const EnvironmentSchema = z LLM_PRICING_RELOAD_CHANNEL: z.string().default("llm-registry:reload"), LLM_PRICING_RELOAD_DEBOUNCE_MS: z.coerce.number().int().default(1000), // Whether to subscribe this process to the LLM_PRICING_RELOAD_CHANNEL. - // Defaults to true so single-service self-hosted deployments work without - // tuning. In multi-service deployments, set this to false on services - // that don't ingest spans (dashboard, workers) — only the OTel-ingesting - // services need the registry to reload in real time. - LLM_PRICING_RELOAD_PUBSUB_ENABLED: BoolEnv.default(true), + // Default off — only OTel-ingesting services need real-time pricing + // freshness; dashboard/worker processes are fine on the existing + // 5-minute periodic reload. In multi-service deployments, set this to + // true on the span-ingesting services. + LLM_PRICING_RELOAD_PUBSUB_ENABLED: BoolEnv.default(false), LLM_PRICING_SEED_ON_STARTUP: BoolEnv.default(false), LLM_PRICING_READY_TIMEOUT_MS: z.coerce.number().int().default(500), LLM_METRICS_BATCH_SIZE: z.coerce.number().int().default(5000), diff --git a/apps/webapp/app/v3/llmPricingRegistry.server.ts b/apps/webapp/app/v3/llmPricingRegistry.server.ts index 9b82882fadf..eb186e15213 100644 --- a/apps/webapp/app/v3/llmPricingRegistry.server.ts +++ b/apps/webapp/app/v3/llmPricingRegistry.server.ts @@ -37,11 +37,11 @@ export const llmPricingRegistry = singleton("llmPricingRegistry", () => { }); }, reloadInterval); - // Pub/sub reload is opt-in per process. Without it, the registry stays - // accurate via the existing 5-minute interval. In multi-service deployments - // we only want the OTel-ingesting services subscribed — the dashboard and - // worker services don't need real-time pricing freshness and shouldn't pile - // onto each publish with a full-table reload. + // Pub/sub reload is opt-in per process (default off). Without it, the + // registry stays accurate via the existing 5-minute interval. Enable on + // the OTel-ingesting services where pricing freshness directly affects + // span cost enrichment; dashboard and worker services don't need it and + // shouldn't pile onto each publish with a full-table reload. if (env.LLM_PRICING_RELOAD_PUBSUB_ENABLED) { const subscriber = createRedisClient("llm-pricing:subscriber", { keyPrefix: "llm-pricing:subscriber:",