From ac4c9f9421dc3c0676111be73675c93dfb7e9267 Mon Sep 17 00:00:00 2001 From: James Grugett Date: Fri, 15 May 2026 15:44:39 -0700 Subject: [PATCH] Reduce chat request retention --- web/src/app/api/v1/chat/completions/_post.ts | 10 +-- web/src/llm-api/canopywave.ts | 7 +- web/src/llm-api/deepseek.ts | 7 +- web/src/llm-api/fireworks.ts | 7 +- web/src/llm-api/helpers.ts | 89 +++++++++++++++++++- web/src/llm-api/moonshot.ts | 7 +- web/src/llm-api/openai.ts | 26 +++--- web/src/llm-api/opencode-zen.ts | 7 +- web/src/llm-api/openrouter.ts | 11 ++- web/src/llm-api/siliconflow.ts | 7 +- 10 files changed, 143 insertions(+), 35 deletions(-) diff --git a/web/src/app/api/v1/chat/completions/_post.ts b/web/src/app/api/v1/chat/completions/_post.ts index 81851eab2b..8fb66930be 100644 --- a/web/src/app/api/v1/chat/completions/_post.ts +++ b/web/src/app/api/v1/chat/completions/_post.ts @@ -42,6 +42,7 @@ import type { NextRequest } from 'next/server' import type { ChatCompletionRequestBody } from '@/llm-api/types' +import { createRequestAuditRecord } from '@/llm-api/helpers' import { CanopyWaveError, handleCanopyWaveNonStream, @@ -874,9 +875,7 @@ export async function postChatCompletions(params: { // Log detailed error information for debugging const errorDetails = openrouterError?.toJSON() - const shouldRecordMessages = freebuffAccessTier !== 'limited' - const { messages: _messages, ...bodyWithoutMessages } = body - const telemetryBody = shouldRecordMessages ? body : bodyWithoutMessages + const telemetryBody = createRequestAuditRecord(body) const providerLabel = siliconflowError ? 'SiliconFlow' : opencodeZenError @@ -904,9 +903,8 @@ export async function postChatCompletions(params: { messageCount: Array.isArray(typedBody.messages) ? typedBody.messages.length : 0, - ...(shouldRecordMessages - ? { messages: typedBody.messages } - : { messagesOmitted: true, accessTier: freebuffAccessTier }), + messagesOmitted: true, + accessTier: freebuffAccessTier, providerStatusCode: ( openrouterError ?? fireworksError ?? diff --git a/web/src/llm-api/canopywave.ts b/web/src/llm-api/canopywave.ts index 4af0588040..3d15b1d27e 100644 --- a/web/src/llm-api/canopywave.ts +++ b/web/src/llm-api/canopywave.ts @@ -6,6 +6,7 @@ import { env } from '@codebuff/internal/env' import { consumeCreditsForMessage, + createRequestAuditRecord, extractRequestMetadata, insertMessageToBigQuery, } from './helpers' @@ -165,6 +166,7 @@ export async function handleCanopyWaveNonStream({ const originalModel = body.model const startTime = new Date() const { clientId, clientRequestId, costMode } = extractRequestMetadata({ body, logger }) + const auditRequest = createRequestAuditRecord(body) const response = await createCanopyWaveRequest({ body, originalModel, fetch }) @@ -181,7 +183,7 @@ export async function handleCanopyWaveNonStream({ messageId: data.id, userId, startTime, - request: body, + request: auditRequest, reasoningText, responseText: content, usageData, @@ -242,6 +244,7 @@ export async function handleCanopyWaveStream({ const originalModel = body.model const startTime = new Date() const { clientId, clientRequestId, costMode } = extractRequestMetadata({ body, logger }) + const auditRequest = createRequestAuditRecord(body) const response = await createCanopyWaveRequest({ body, originalModel, fetch }) @@ -305,7 +308,7 @@ export async function handleCanopyWaveStream({ clientRequestId, costMode, startTime, - request: body, + request: auditRequest, originalModel, line, state, diff --git a/web/src/llm-api/deepseek.ts b/web/src/llm-api/deepseek.ts index 531db8908d..359bf9738b 100644 --- a/web/src/llm-api/deepseek.ts +++ b/web/src/llm-api/deepseek.ts @@ -7,6 +7,7 @@ import { env } from '@codebuff/internal/env' import { consumeCreditsForMessage, + createRequestAuditRecord, extractRequestMetadata, insertMessageToBigQuery, } from './helpers' @@ -203,6 +204,7 @@ export async function handleDeepSeekNonStream({ body, logger, }) + const auditRequest = createRequestAuditRecord(body) const response = await createDeepSeekRequest({ body, originalModel, fetch }) @@ -222,7 +224,7 @@ export async function handleDeepSeekNonStream({ messageId: data.id, userId, startTime, - request: body, + request: auditRequest, reasoningText, responseText: content, usageData, @@ -286,6 +288,7 @@ export async function handleDeepSeekStream({ body, logger, }) + const auditRequest = createRequestAuditRecord(body) const skipDisconnectedBilling = isDeepSeekV4FlashModel(body.model) const response = await createDeepSeekRequest({ body, originalModel, fetch }) @@ -355,7 +358,7 @@ export async function handleDeepSeekStream({ clientRequestId, costMode, startTime, - request: body, + request: auditRequest, originalModel, line, state, diff --git a/web/src/llm-api/fireworks.ts b/web/src/llm-api/fireworks.ts index 80d9988f01..2bd9cbe796 100644 --- a/web/src/llm-api/fireworks.ts +++ b/web/src/llm-api/fireworks.ts @@ -11,6 +11,7 @@ import { env } from '@codebuff/internal/env' import { FIREWORKS_DEPLOYMENT_MAP } from './fireworks-config' import { consumeCreditsForMessage, + createRequestAuditRecord, extractRequestMetadata, insertMessageToBigQuery, } from './helpers' @@ -273,6 +274,7 @@ export async function handleFireworksNonStream({ body, logger, }) + const auditRequest = createRequestAuditRecord(body) const response = await createFireworksRequestWithFallback({ body, @@ -298,7 +300,7 @@ export async function handleFireworksNonStream({ messageId: data.id, userId, startTime, - request: body, + request: auditRequest, reasoningText, responseText: content, usageData, @@ -362,6 +364,7 @@ export async function handleFireworksStream({ body, logger, }) + const auditRequest = createRequestAuditRecord(body) const response = await createFireworksRequestWithFallback({ body, @@ -431,7 +434,7 @@ export async function handleFireworksStream({ clientRequestId, costMode, startTime, - request: body, + request: auditRequest, originalModel, line, state, diff --git a/web/src/llm-api/helpers.ts b/web/src/llm-api/helpers.ts index dfee0f306b..80920c77e9 100644 --- a/web/src/llm-api/helpers.ts +++ b/web/src/llm-api/helpers.ts @@ -23,6 +23,85 @@ export type UsageData = { cost: number } +export function createRequestAuditRecord(body: unknown) { + // TODO: Add a separate append-only message_request BigQuery table for full + // raw request bodies, inserted before streaming starts. Keeping only this + // summary here avoids retaining huge chat requests until provider streams end. + if (typeof body !== 'object' || body === null || Array.isArray(body)) { + return { invalid_request_shape: true } + } + + const typedBody = body as Partial + const messages = Array.isArray(typedBody.messages) + ? typedBody.messages + : undefined + const tools = Array.isArray(typedBody.tools) ? typedBody.tools : undefined + + const messageRoleCounts = messages?.reduce>( + (counts, message) => { + const role = + typeof message === 'object' && message !== null && 'role' in message + ? String(message.role) + : 'unknown' + counts[role] = (counts[role] ?? 0) + 1 + return counts + }, + {}, + ) + + return { + model: typeof typedBody.model === 'string' ? typedBody.model : undefined, + stream: + typeof typedBody.stream === 'boolean' ? typedBody.stream : undefined, + temperature: + typeof typedBody.temperature === 'number' + ? typedBody.temperature + : undefined, + max_tokens: + typeof typedBody.max_tokens === 'number' + ? typedBody.max_tokens + : undefined, + max_completion_tokens: + typeof typedBody.max_completion_tokens === 'number' + ? typedBody.max_completion_tokens + : undefined, + top_p: typeof typedBody.top_p === 'number' ? typedBody.top_p : undefined, + reasoning_effort: + typeof typedBody.reasoning_effort === 'string' + ? typedBody.reasoning_effort + : undefined, + reasoning_enabled: + typeof typedBody.reasoning?.enabled === 'boolean' + ? typedBody.reasoning.enabled + : undefined, + reasoning_effort_nested: + typeof typedBody.reasoning?.effort === 'string' + ? typedBody.reasoning.effort + : undefined, + usage_include: + typeof typedBody.usage?.include === 'boolean' + ? typedBody.usage.include + : undefined, + codebuff_metadata: + typeof typedBody.codebuff_metadata === 'object' && + typedBody.codebuff_metadata !== null + ? { ...typedBody.codebuff_metadata } + : undefined, + message_count: messages?.length ?? 0, + message_role_counts: messageRoleCounts, + messages_omitted: !!messages, + tool_count: tools?.length ?? 0, + tool_names: tools + ?.map((tool) => + typeof tool === 'object' && tool !== null + ? tool.function?.name + : undefined, + ) + .filter((name): name is string => typeof name === 'string'), + tools_omitted: !!tools, + } +} + export function extractRequestMetadata(params: { body: unknown logger: Logger @@ -35,14 +114,20 @@ export function extractRequestMetadata(params: { const rawClientId = metadata?.client_id const clientId = typeof rawClientId === 'string' ? rawClientId : null if (!clientId) { - logger.warn({ body }, 'Received request without client_id') + logger.warn( + { request: createRequestAuditRecord(body) }, + 'Received request without client_id', + ) } const rawRunId = metadata?.run_id const clientRequestId: string | null = typeof rawRunId === 'string' ? rawRunId : null if (!clientRequestId) { - logger.warn({ body }, 'Received request without run_id') + logger.warn( + { request: createRequestAuditRecord(body) }, + 'Received request without run_id', + ) } const n = metadata?.n diff --git a/web/src/llm-api/moonshot.ts b/web/src/llm-api/moonshot.ts index 74b350dd04..aa48c3b5b2 100644 --- a/web/src/llm-api/moonshot.ts +++ b/web/src/llm-api/moonshot.ts @@ -6,6 +6,7 @@ import { env } from '@codebuff/internal/env' import { consumeCreditsForMessage, + createRequestAuditRecord, extractRequestMetadata, insertMessageToBigQuery, } from './helpers' @@ -288,6 +289,7 @@ export async function handleMoonshotNonStream({ body, logger, }) + const auditRequest = createRequestAuditRecord(body) const response = await createMoonshotRequest({ body, originalModel, fetch }) if (!response.ok) { @@ -306,7 +308,7 @@ export async function handleMoonshotNonStream({ messageId: data.id, userId, startTime, - request: body, + request: auditRequest, reasoningText, responseText: content, usageData, @@ -368,6 +370,7 @@ export async function handleMoonshotStream({ body, logger, }) + const auditRequest = createRequestAuditRecord(body) const response = await createMoonshotRequest({ body, originalModel, fetch }) if (!response.ok) { @@ -435,7 +438,7 @@ export async function handleMoonshotStream({ clientRequestId, costMode, startTime, - request: body, + request: auditRequest, originalModel, line, state, diff --git a/web/src/llm-api/openai.ts b/web/src/llm-api/openai.ts index 960ef63c99..45e5c92bdf 100644 --- a/web/src/llm-api/openai.ts +++ b/web/src/llm-api/openai.ts @@ -6,6 +6,7 @@ import { env } from '@codebuff/internal/env' import { consumeCreditsForMessage, + createRequestAuditRecord, extractRequestMetadata, insertMessageToBigQuery, } from './helpers' @@ -63,7 +64,6 @@ const OUTPUT_TOKEN_COSTS: Record = { // Extended timeout for deep-thinking models (e.g., gpt-5.x) that can take // a long time to start streaming. const OPENAI_HEADERS_TIMEOUT_MS = 30 * 60 * 1000 - const openaiAgent = new Agent({ headersTimeout: OPENAI_HEADERS_TIMEOUT_MS, bodyTimeout: 0, @@ -234,8 +234,10 @@ export async function handleOpenAINonStream({ body, logger, }) + const auditRequest = createRequestAuditRecord(body) - const modelShortName = extractShortModelName(body.model) + const originalModel = body.model + const modelShortName = extractShortModelName(originalModel) const openaiBody = buildOpenAIBody(body, modelShortName) openaiBody.stream = false if (n) openaiBody.n = n @@ -276,7 +278,7 @@ export async function handleOpenAINonStream({ messageId: data.id, userId, startTime, - request: body, + request: auditRequest, reasoningText, responseText, usageData, @@ -297,7 +299,7 @@ export async function handleOpenAINonStream({ clientId, clientRequestId, startTime, - model: body.model, + model: originalModel, reasoningText, responseText, usageData, @@ -332,7 +334,7 @@ export async function handleOpenAINonStream({ messageId: data.id, userId, startTime, - request: body, + request: auditRequest, reasoningText, responseText: content, usageData, @@ -353,7 +355,7 @@ export async function handleOpenAINonStream({ clientId, clientRequestId, startTime, - model: body.model, + model: originalModel, reasoningText, responseText: content, usageData, @@ -393,8 +395,10 @@ export async function handleOpenAIStream({ body, logger, }) + const auditRequest = createRequestAuditRecord(body) - const modelShortName = extractShortModelName(body.model) + const originalModel = body.model + const modelShortName = extractShortModelName(originalModel) const openaiBody = buildOpenAIBody(body, modelShortName) openaiBody.stream = true openaiBody.stream_options = { include_usage: true } @@ -527,7 +531,7 @@ export async function handleOpenAIStream({ messageId: obj.id, userId, startTime, - request: body, + request: auditRequest, reasoningText, responseText, usageData, @@ -548,7 +552,7 @@ export async function handleOpenAIStream({ clientId, clientRequestId, startTime, - model: body.model, + model: originalModel, reasoningText, responseText, usageData, @@ -615,7 +619,7 @@ export async function handleOpenAIStream({ messageId: obj.id, userId, startTime, - request: body, + request: auditRequest, reasoningText, responseText, usageData, @@ -636,7 +640,7 @@ export async function handleOpenAIStream({ clientId, clientRequestId, startTime, - model: body.model, + model: originalModel, reasoningText, responseText, usageData, diff --git a/web/src/llm-api/opencode-zen.ts b/web/src/llm-api/opencode-zen.ts index cdac6e20c1..6f060eabe0 100644 --- a/web/src/llm-api/opencode-zen.ts +++ b/web/src/llm-api/opencode-zen.ts @@ -7,6 +7,7 @@ import { env } from '@codebuff/internal/env' import { consumeCreditsForMessage, + createRequestAuditRecord, extractRequestMetadata, insertMessageToBigQuery, } from './helpers' @@ -260,6 +261,7 @@ export async function handleOpenCodeZenNonStream({ body, logger, }) + const auditRequest = createRequestAuditRecord(body) const response = await createOpenCodeZenRequest({ body, @@ -282,7 +284,7 @@ export async function handleOpenCodeZenNonStream({ messageId: data.id, userId, startTime, - request: body, + request: auditRequest, reasoningText, responseText: content, usageData, @@ -344,6 +346,7 @@ export async function handleOpenCodeZenStream({ body, logger, }) + const auditRequest = createRequestAuditRecord(body) const response = await createOpenCodeZenRequest({ body, @@ -415,7 +418,7 @@ export async function handleOpenCodeZenStream({ clientRequestId, costMode, startTime, - request: body, + request: auditRequest, originalModel, line, state, diff --git a/web/src/llm-api/openrouter.ts b/web/src/llm-api/openrouter.ts index 0047445f0c..e0062c88e6 100644 --- a/web/src/llm-api/openrouter.ts +++ b/web/src/llm-api/openrouter.ts @@ -6,6 +6,7 @@ import { env } from '@codebuff/internal/env' import { consumeCreditsForMessage, + createRequestAuditRecord, extractRequestMetadata, insertMessageToBigQuery, } from './helpers' @@ -154,6 +155,7 @@ export async function handleOpenRouterNonStream({ body, logger, }) + const auditRequest = createRequestAuditRecord(body) const byok = openrouterApiKey !== null // If n > 1, make n parallel requests @@ -200,7 +202,7 @@ export async function handleOpenRouterNonStream({ messageId: firstData.id, userId, startTime, - request: body, + request: auditRequest, reasoningText, responseText, usageData: aggregatedUsage, @@ -272,7 +274,7 @@ export async function handleOpenRouterNonStream({ messageId: data.id, userId, startTime, - request: body, + request: auditRequest, reasoningText, responseText: content, usageData, @@ -337,6 +339,7 @@ export async function handleOpenRouterStream({ const startTime = new Date() const { clientId, clientRequestId, costMode } = extractRequestMetadata({ body, logger }) + const auditRequest = createRequestAuditRecord(body) const byok = openrouterApiKey !== null const response = await createOpenRouterRequest({ @@ -388,7 +391,7 @@ export async function handleOpenRouterStream({ byok, startTime, state, - request: body, + request: auditRequest, fetch, logger, insertMessage: insertMessageBigquery, @@ -448,7 +451,7 @@ export async function handleOpenRouterStream({ costMode, byok, startTime, - request: body, + request: auditRequest, line, state, logger, diff --git a/web/src/llm-api/siliconflow.ts b/web/src/llm-api/siliconflow.ts index 936c3f7b28..49a67c162a 100644 --- a/web/src/llm-api/siliconflow.ts +++ b/web/src/llm-api/siliconflow.ts @@ -6,6 +6,7 @@ import { env } from '@codebuff/internal/env' import { consumeCreditsForMessage, + createRequestAuditRecord, extractRequestMetadata, insertMessageToBigQuery, } from './helpers' @@ -130,6 +131,7 @@ export async function handleSiliconFlowNonStream({ const originalModel = body.model const startTime = new Date() const { clientId, clientRequestId, costMode } = extractRequestMetadata({ body, logger }) + const auditRequest = createRequestAuditRecord(body) const response = await createSiliconFlowRequest({ body, originalModel, fetch }) @@ -146,7 +148,7 @@ export async function handleSiliconFlowNonStream({ messageId: data.id, userId, startTime, - request: body, + request: auditRequest, reasoningText, responseText: content, usageData, @@ -207,6 +209,7 @@ export async function handleSiliconFlowStream({ const originalModel = body.model const startTime = new Date() const { clientId, clientRequestId, costMode } = extractRequestMetadata({ body, logger }) + const auditRequest = createRequestAuditRecord(body) const response = await createSiliconFlowRequest({ body, originalModel, fetch }) @@ -270,7 +273,7 @@ export async function handleSiliconFlowStream({ clientRequestId, costMode, startTime, - request: body, + request: auditRequest, originalModel, line, state,