From e768e530249999773aae14769ac23a3ab8509796 Mon Sep 17 00:00:00 2001
From: Eric Allam <eallam@icloud.com>
Date: Thu, 7 May 2026 12:35:27 +0100
Subject: [PATCH 1/4] feat(webapp): reload LLM pricing registry on Redis
 pub/sub

Subscribe to LLM_PRICING_RELOAD_CHANNEL on the worker Redis. Any
process that publishes on the channel triggers an immediate reload
of the in-memory model registry. The 5-minute periodic reload stays
as a backstop.

Lets pricing and model changes propagate to the live registry within
seconds instead of up to 5 minutes.
---
 .../llm-pricing-registry-reload-channel.md    |  6 ++++
 apps/webapp/app/env.server.ts                 |  1 +
 .../app/v3/llmPricingRegistry.server.ts       | 35 ++++++++++++++++++-
 3 files changed, 41 insertions(+), 1 deletion(-)
 create mode 100644 .server-changes/llm-pricing-registry-reload-channel.md

diff --git a/.server-changes/llm-pricing-registry-reload-channel.md b/.server-changes/llm-pricing-registry-reload-channel.md
new file mode 100644
index 00000000000..ec1daad0a31
--- /dev/null
+++ b/.server-changes/llm-pricing-registry-reload-channel.md
@@ -0,0 +1,6 @@
+---
+area: webapp
+type: improvement
+---
+
+The LLM pricing registry now reloads from the database whenever a publish lands on `LLM_PRICING_RELOAD_CHANNEL` on the worker Redis, instead of waiting for the next 5-minute interval. LLM model and pricing changes reflect in cost enrichment within seconds.
diff --git a/apps/webapp/app/env.server.ts b/apps/webapp/app/env.server.ts
index 13e9e5dacbd..f270c9037b2 100644
--- a/apps/webapp/app/env.server.ts
+++ b/apps/webapp/app/env.server.ts
@@ -1424,6 +1424,7 @@ const EnvironmentSchema = z
     // LLM cost tracking
     LLM_COST_TRACKING_ENABLED: BoolEnv.default(true),
     LLM_PRICING_RELOAD_INTERVAL_MS: z.coerce.number().int().default(5 * 60 * 1000), // 5 minutes
+    LLM_PRICING_RELOAD_CHANNEL: z.string().default("llm-registry:reload"),
     LLM_PRICING_SEED_ON_STARTUP: BoolEnv.default(false),
     LLM_PRICING_READY_TIMEOUT_MS: z.coerce.number().int().default(500),
     LLM_METRICS_BATCH_SIZE: z.coerce.number().int().default(5000),
diff --git a/apps/webapp/app/v3/llmPricingRegistry.server.ts b/apps/webapp/app/v3/llmPricingRegistry.server.ts
index 2212c41779d..4931cb30c0f 100644
--- a/apps/webapp/app/v3/llmPricingRegistry.server.ts
+++ b/apps/webapp/app/v3/llmPricingRegistry.server.ts
@@ -1,7 +1,9 @@
 import { ModelPricingRegistry, seedLlmPricing } from "@internal/llm-model-catalog";
 import { prisma, $replica } from "~/db.server";
 import { env } from "~/env.server";
+import { logger } from "~/services/logger.server";
 import { signalsEmitter } from "~/services/signals.server";
+import { createRedisClient } from "~/redis.server";
 import { singleton } from "~/utils/singleton";
 import { setLlmPricingRegistry } from "./utils/enrichCreatableEvents.server";
 
@@ -27,7 +29,7 @@ export const llmPricingRegistry = singleton("llmPricingRegistry", () => {
     console.error("Failed to initialize LLM pricing registry", err);
   });
 
-  // Periodic reload
+  // Periodic reload (backstop for the pub/sub path below)
   const reloadInterval = env.LLM_PRICING_RELOAD_INTERVAL_MS;
   const interval = setInterval(() => {
     registry.reload().catch((err) => {
@@ -35,11 +37,42 @@ export const llmPricingRegistry = singleton("llmPricingRegistry", () => {
     });
   }, reloadInterval);
 
+  // Pub/sub reload — billing's LLM registry worker publishes on this channel
+  // immediately after writing new/changed model rows, so all webapp pods see
+  // updates within ~1s instead of waiting for the next interval tick.
+  const subscriber = createRedisClient("llm-pricing:subscriber", {
+    keyPrefix: "llm-pricing:subscriber:",
+    host: env.COMMON_WORKER_REDIS_HOST,
+    port: env.COMMON_WORKER_REDIS_PORT,
+    username: env.COMMON_WORKER_REDIS_USERNAME,
+    password: env.COMMON_WORKER_REDIS_PASSWORD,
+    tlsDisabled: env.COMMON_WORKER_REDIS_TLS_DISABLED === "true",
+    clusterMode: env.COMMON_WORKER_REDIS_CLUSTER_MODE_ENABLED === "1",
+  });
+
+  subscriber.subscribe(env.LLM_PRICING_RELOAD_CHANNEL).catch((err) => {
+    logger.warn("Failed to subscribe to LLM pricing reload channel", {
+      channel: env.LLM_PRICING_RELOAD_CHANNEL,
+      error: err instanceof Error ? err.message : String(err),
+    });
+  });
+
+  subscriber.on("message", (channel) => {
+    if (channel !== env.LLM_PRICING_RELOAD_CHANNEL) return;
+    registry.reload().catch((err) => {
+      logger.warn("Failed to reload LLM pricing registry from pub/sub", {
+        error: err instanceof Error ? err.message : String(err),
+      });
+    });
+  });
+
   signalsEmitter.on("SIGTERM", () => {
     clearInterval(interval);
+    void subscriber.quit().catch(() => {});
   });
   signalsEmitter.on("SIGINT", () => {
     clearInterval(interval);
+    void subscriber.quit().catch(() => {});
   });
 
   return registry;

From 169958206e2feb550661cec0539845a03a64fd35 Mon Sep 17 00:00:00 2001
From: Eric Allam <eallam@icloud.com>
Date: Thu, 7 May 2026 13:16:35 +0100
Subject: [PATCH 2/4] feat(webapp): debounce LLM pricing registry reloads

Coalesce reload calls from the pub/sub subscriber so a burst of
publishes only triggers one reload. The first publish in a window
schedules a reload at T+LLM_PRICING_RELOAD_DEBOUNCE_MS (default 1s);
subsequent publishes during that window are no-ops because the
trailing reload will pick up everything when it queries the DB.

Bounds reload rate to at most 1 per debounce window regardless of
publisher chattiness, so a runaway upstream publisher can't fan out
into a flood of full-table-scan reloads across every webapp pod.
---
 apps/webapp/app/env.server.ts                 |  1 +
 .../app/v3/llmPricingRegistry.server.ts       | 29 +++++++++++++++----
 2 files changed, 25 insertions(+), 5 deletions(-)

diff --git a/apps/webapp/app/env.server.ts b/apps/webapp/app/env.server.ts
index f270c9037b2..2ddac2709db 100644
--- a/apps/webapp/app/env.server.ts
+++ b/apps/webapp/app/env.server.ts
@@ -1425,6 +1425,7 @@ const EnvironmentSchema = z
     LLM_COST_TRACKING_ENABLED: BoolEnv.default(true),
     LLM_PRICING_RELOAD_INTERVAL_MS: z.coerce.number().int().default(5 * 60 * 1000), // 5 minutes
     LLM_PRICING_RELOAD_CHANNEL: z.string().default("llm-registry:reload"),
+    LLM_PRICING_RELOAD_DEBOUNCE_MS: z.coerce.number().int().default(1000),
     LLM_PRICING_SEED_ON_STARTUP: BoolEnv.default(false),
     LLM_PRICING_READY_TIMEOUT_MS: z.coerce.number().int().default(500),
     LLM_METRICS_BATCH_SIZE: z.coerce.number().int().default(5000),
diff --git a/apps/webapp/app/v3/llmPricingRegistry.server.ts b/apps/webapp/app/v3/llmPricingRegistry.server.ts
index 4931cb30c0f..31afd48a088 100644
--- a/apps/webapp/app/v3/llmPricingRegistry.server.ts
+++ b/apps/webapp/app/v3/llmPricingRegistry.server.ts
@@ -57,21 +57,40 @@ export const llmPricingRegistry = singleton("llmPricingRegistry", () => {
     });
   });
 
+  // Coalesce reload calls so a burst of publishes only triggers one reload.
+  // A reload always fires within LLM_PRICING_RELOAD_DEBOUNCE_MS of the first
+  // publish in a burst; subsequent publishes during that window are no-ops
+  // because the trailing-edge reload will pick up everything when it queries
+  // the DB. Bounds reload rate to at most 1 / debounce-window regardless of
+  // how chatty the publisher is.
+  const debounceMs = env.LLM_PRICING_RELOAD_DEBOUNCE_MS;
+  let pendingReloadTimer: NodeJS.Timeout | null = null;
+
+  function scheduleReload() {
+    if (pendingReloadTimer) return;
+    pendingReloadTimer = setTimeout(() => {
+      pendingReloadTimer = null;
+      registry.reload().catch((err) => {
+        logger.warn("Failed to reload LLM pricing registry from pub/sub", {
+          error: err instanceof Error ? err.message : String(err),
+        });
+      });
+    }, debounceMs);
+  }
+
   subscriber.on("message", (channel) => {
     if (channel !== env.LLM_PRICING_RELOAD_CHANNEL) return;
-    registry.reload().catch((err) => {
-      logger.warn("Failed to reload LLM pricing registry from pub/sub", {
-        error: err instanceof Error ? err.message : String(err),
-      });
-    });
+    scheduleReload();
   });
 
   signalsEmitter.on("SIGTERM", () => {
     clearInterval(interval);
+    if (pendingReloadTimer) clearTimeout(pendingReloadTimer);
     void subscriber.quit().catch(() => {});
   });
   signalsEmitter.on("SIGINT", () => {
     clearInterval(interval);
+    if (pendingReloadTimer) clearTimeout(pendingReloadTimer);
     void subscriber.quit().catch(() => {});
   });
 

From 1381c9d4e7509b4acc1de15ec37add379eb5f4db Mon Sep 17 00:00:00 2001
From: Eric Allam <eallam@icloud.com>
Date: Thu, 7 May 2026 15:47:49 +0100
Subject: [PATCH 3/4] feat(webapp): make LLM pricing pub/sub subscription
 opt-in per process
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Subscribing every replica to the reload channel — admin dashboards,
workers, anything that imports the registry — fans out a full-table
reload across processes that don't actually need real-time pricing
freshness. The 5-minute interval is enough for those.

Add LLM_PRICING_RELOAD_PUBSUB_ENABLED (default true). Set false on
non-OTel services in multi-service deployments so only the
span-ingesting processes subscribe and reload on publish.

Default-true preserves current behavior for single-service self-hosted
deployments without any env tuning.
---
 apps/webapp/app/env.server.ts                 |   6 +
 .../app/v3/llmPricingRegistry.server.ts       | 107 ++++++++++--------
 2 files changed, 63 insertions(+), 50 deletions(-)

diff --git a/apps/webapp/app/env.server.ts b/apps/webapp/app/env.server.ts
index 2ddac2709db..487361f936c 100644
--- a/apps/webapp/app/env.server.ts
+++ b/apps/webapp/app/env.server.ts
@@ -1426,6 +1426,12 @@ const EnvironmentSchema = z
     LLM_PRICING_RELOAD_INTERVAL_MS: z.coerce.number().int().default(5 * 60 * 1000), // 5 minutes
     LLM_PRICING_RELOAD_CHANNEL: z.string().default("llm-registry:reload"),
     LLM_PRICING_RELOAD_DEBOUNCE_MS: z.coerce.number().int().default(1000),
+    // Whether to subscribe this process to the LLM_PRICING_RELOAD_CHANNEL.
+    // Defaults to true so single-service self-hosted deployments work without
+    // tuning. In multi-service deployments, set this to false on services
+    // that don't ingest spans (dashboard, workers) — only the OTel-ingesting
+    // services need the registry to reload in real time.
+    LLM_PRICING_RELOAD_PUBSUB_ENABLED: BoolEnv.default(true),
     LLM_PRICING_SEED_ON_STARTUP: BoolEnv.default(false),
     LLM_PRICING_READY_TIMEOUT_MS: z.coerce.number().int().default(500),
     LLM_METRICS_BATCH_SIZE: z.coerce.number().int().default(5000),
diff --git a/apps/webapp/app/v3/llmPricingRegistry.server.ts b/apps/webapp/app/v3/llmPricingRegistry.server.ts
index 31afd48a088..9b82882fadf 100644
--- a/apps/webapp/app/v3/llmPricingRegistry.server.ts
+++ b/apps/webapp/app/v3/llmPricingRegistry.server.ts
@@ -37,62 +37,69 @@ export const llmPricingRegistry = singleton("llmPricingRegistry", () => {
     });
   }, reloadInterval);
 
-  // Pub/sub reload — billing's LLM registry worker publishes on this channel
-  // immediately after writing new/changed model rows, so all webapp pods see
-  // updates within ~1s instead of waiting for the next interval tick.
-  const subscriber = createRedisClient("llm-pricing:subscriber", {
-    keyPrefix: "llm-pricing:subscriber:",
-    host: env.COMMON_WORKER_REDIS_HOST,
-    port: env.COMMON_WORKER_REDIS_PORT,
-    username: env.COMMON_WORKER_REDIS_USERNAME,
-    password: env.COMMON_WORKER_REDIS_PASSWORD,
-    tlsDisabled: env.COMMON_WORKER_REDIS_TLS_DISABLED === "true",
-    clusterMode: env.COMMON_WORKER_REDIS_CLUSTER_MODE_ENABLED === "1",
-  });
+  // Pub/sub reload is opt-in per process. Without it, the registry stays
+  // accurate via the existing 5-minute interval. In multi-service deployments
+  // we only want the OTel-ingesting services subscribed — the dashboard and
+  // worker services don't need real-time pricing freshness and shouldn't pile
+  // onto each publish with a full-table reload.
+  if (env.LLM_PRICING_RELOAD_PUBSUB_ENABLED) {
+    const subscriber = createRedisClient("llm-pricing:subscriber", {
+      keyPrefix: "llm-pricing:subscriber:",
+      host: env.COMMON_WORKER_REDIS_HOST,
+      port: env.COMMON_WORKER_REDIS_PORT,
+      username: env.COMMON_WORKER_REDIS_USERNAME,
+      password: env.COMMON_WORKER_REDIS_PASSWORD,
+      tlsDisabled: env.COMMON_WORKER_REDIS_TLS_DISABLED === "true",
+      clusterMode: env.COMMON_WORKER_REDIS_CLUSTER_MODE_ENABLED === "1",
+    });
 
-  subscriber.subscribe(env.LLM_PRICING_RELOAD_CHANNEL).catch((err) => {
-    logger.warn("Failed to subscribe to LLM pricing reload channel", {
-      channel: env.LLM_PRICING_RELOAD_CHANNEL,
-      error: err instanceof Error ? err.message : String(err),
+    subscriber.subscribe(env.LLM_PRICING_RELOAD_CHANNEL).catch((err) => {
+      logger.warn("Failed to subscribe to LLM pricing reload channel", {
+        channel: env.LLM_PRICING_RELOAD_CHANNEL,
+        error: err instanceof Error ? err.message : String(err),
+      });
     });
-  });
 
-  // Coalesce reload calls so a burst of publishes only triggers one reload.
-  // A reload always fires within LLM_PRICING_RELOAD_DEBOUNCE_MS of the first
-  // publish in a burst; subsequent publishes during that window are no-ops
-  // because the trailing-edge reload will pick up everything when it queries
-  // the DB. Bounds reload rate to at most 1 / debounce-window regardless of
-  // how chatty the publisher is.
-  const debounceMs = env.LLM_PRICING_RELOAD_DEBOUNCE_MS;
-  let pendingReloadTimer: NodeJS.Timeout | null = null;
-
-  function scheduleReload() {
-    if (pendingReloadTimer) return;
-    pendingReloadTimer = setTimeout(() => {
-      pendingReloadTimer = null;
-      registry.reload().catch((err) => {
-        logger.warn("Failed to reload LLM pricing registry from pub/sub", {
-          error: err instanceof Error ? err.message : String(err),
+    // Coalesce reload calls so a burst of publishes only triggers one
+    // reload. The first publish schedules a reload at
+    // T+LLM_PRICING_RELOAD_DEBOUNCE_MS; subsequent publishes during that
+    // window are no-ops because the trailing reload picks up everything
+    // when it queries the DB. Bounds reload rate to at most 1 per debounce
+    // window regardless of publisher chattiness.
+    const debounceMs = env.LLM_PRICING_RELOAD_DEBOUNCE_MS;
+    let pendingReloadTimer: NodeJS.Timeout | null = null;
+
+    function scheduleReload() {
+      if (pendingReloadTimer) return;
+      pendingReloadTimer = setTimeout(() => {
+        pendingReloadTimer = null;
+        registry.reload().catch((err) => {
+          logger.warn("Failed to reload LLM pricing registry from pub/sub", {
+            error: err instanceof Error ? err.message : String(err),
+          });
         });
-      });
-    }, debounceMs);
-  }
+      }, debounceMs);
+    }
 
-  subscriber.on("message", (channel) => {
-    if (channel !== env.LLM_PRICING_RELOAD_CHANNEL) return;
-    scheduleReload();
-  });
+    subscriber.on("message", (channel) => {
+      if (channel !== env.LLM_PRICING_RELOAD_CHANNEL) return;
+      scheduleReload();
+    });
 
-  signalsEmitter.on("SIGTERM", () => {
-    clearInterval(interval);
-    if (pendingReloadTimer) clearTimeout(pendingReloadTimer);
-    void subscriber.quit().catch(() => {});
-  });
-  signalsEmitter.on("SIGINT", () => {
-    clearInterval(interval);
-    if (pendingReloadTimer) clearTimeout(pendingReloadTimer);
-    void subscriber.quit().catch(() => {});
-  });
+    signalsEmitter.on("SIGTERM", () => {
+      clearInterval(interval);
+      if (pendingReloadTimer) clearTimeout(pendingReloadTimer);
+      void subscriber.quit().catch(() => {});
+    });
+    signalsEmitter.on("SIGINT", () => {
+      clearInterval(interval);
+      if (pendingReloadTimer) clearTimeout(pendingReloadTimer);
+      void subscriber.quit().catch(() => {});
+    });
+  } else {
+    signalsEmitter.on("SIGTERM", () => clearInterval(interval));
+    signalsEmitter.on("SIGINT", () => clearInterval(interval));
+  }
 
   return registry;
 });

From 0985da35d9aa1b163e6e30488cd86cebbc0cf5e5 Mon Sep 17 00:00:00 2001
From: Eric Allam <eallam@icloud.com>
Date: Thu, 7 May 2026 15:51:28 +0100
Subject: [PATCH 4/4] fix(webapp): default LLM_PRICING_RELOAD_PUBSUB_ENABLED to
 off
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Most processes that import the registry (dashboard, workers, single-
service self-hosted webapp) don't actually need real-time pricing
freshness — the existing 5-minute interval is fine. Only OTel-ingesting
services where pricing directly affects span cost enrichment need to
subscribe.

Default off, opt-in on the span-ingesting services in multi-service
deployments. Self-hosters running a single webapp can flip it on if
they want sub-second freshness, but the default keeps reload load
off processes that don't benefit from it.
---
 apps/webapp/app/env.server.ts                   | 10 +++++-----
 apps/webapp/app/v3/llmPricingRegistry.server.ts | 10 +++++-----
 2 files changed, 10 insertions(+), 10 deletions(-)

diff --git a/apps/webapp/app/env.server.ts b/apps/webapp/app/env.server.ts
index 487361f936c..6b58896429d 100644
--- a/apps/webapp/app/env.server.ts
+++ b/apps/webapp/app/env.server.ts
@@ -1427,11 +1427,11 @@ const EnvironmentSchema = z
     LLM_PRICING_RELOAD_CHANNEL: z.string().default("llm-registry:reload"),
     LLM_PRICING_RELOAD_DEBOUNCE_MS: z.coerce.number().int().default(1000),
     // Whether to subscribe this process to the LLM_PRICING_RELOAD_CHANNEL.
-    // Defaults to true so single-service self-hosted deployments work without
-    // tuning. In multi-service deployments, set this to false on services
-    // that don't ingest spans (dashboard, workers) — only the OTel-ingesting
-    // services need the registry to reload in real time.
-    LLM_PRICING_RELOAD_PUBSUB_ENABLED: BoolEnv.default(true),
+    // Default off — only OTel-ingesting services need real-time pricing
+    // freshness; dashboard/worker processes are fine on the existing
+    // 5-minute periodic reload. In multi-service deployments, set this to
+    // true on the span-ingesting services.
+    LLM_PRICING_RELOAD_PUBSUB_ENABLED: BoolEnv.default(false),
     LLM_PRICING_SEED_ON_STARTUP: BoolEnv.default(false),
     LLM_PRICING_READY_TIMEOUT_MS: z.coerce.number().int().default(500),
     LLM_METRICS_BATCH_SIZE: z.coerce.number().int().default(5000),
diff --git a/apps/webapp/app/v3/llmPricingRegistry.server.ts b/apps/webapp/app/v3/llmPricingRegistry.server.ts
index 9b82882fadf..eb186e15213 100644
--- a/apps/webapp/app/v3/llmPricingRegistry.server.ts
+++ b/apps/webapp/app/v3/llmPricingRegistry.server.ts
@@ -37,11 +37,11 @@ export const llmPricingRegistry = singleton("llmPricingRegistry", () => {
     });
   }, reloadInterval);
 
-  // Pub/sub reload is opt-in per process. Without it, the registry stays
-  // accurate via the existing 5-minute interval. In multi-service deployments
-  // we only want the OTel-ingesting services subscribed — the dashboard and
-  // worker services don't need real-time pricing freshness and shouldn't pile
-  // onto each publish with a full-table reload.
+  // Pub/sub reload is opt-in per process (default off). Without it, the
+  // registry stays accurate via the existing 5-minute interval. Enable on
+  // the OTel-ingesting services where pricing freshness directly affects
+  // span cost enrichment; dashboard and worker services don't need it and
+  // shouldn't pile onto each publish with a full-table reload.
   if (env.LLM_PRICING_RELOAD_PUBSUB_ENABLED) {
     const subscriber = createRedisClient("llm-pricing:subscriber", {
       keyPrefix: "llm-pricing:subscriber:",