Skip to content

Commit ac4c9f9

Browse files
committed
Reduce chat request retention
1 parent 6656987 commit ac4c9f9

10 files changed

Lines changed: 143 additions & 35 deletions

File tree

web/src/app/api/v1/chat/completions/_post.ts

Lines changed: 4 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -42,6 +42,7 @@ import type { NextRequest } from 'next/server'
4242

4343
import type { ChatCompletionRequestBody } from '@/llm-api/types'
4444

45+
import { createRequestAuditRecord } from '@/llm-api/helpers'
4546
import {
4647
CanopyWaveError,
4748
handleCanopyWaveNonStream,
@@ -874,9 +875,7 @@ export async function postChatCompletions(params: {
874875

875876
// Log detailed error information for debugging
876877
const errorDetails = openrouterError?.toJSON()
877-
const shouldRecordMessages = freebuffAccessTier !== 'limited'
878-
const { messages: _messages, ...bodyWithoutMessages } = body
879-
const telemetryBody = shouldRecordMessages ? body : bodyWithoutMessages
878+
const telemetryBody = createRequestAuditRecord(body)
880879
const providerLabel = siliconflowError
881880
? 'SiliconFlow'
882881
: opencodeZenError
@@ -904,9 +903,8 @@ export async function postChatCompletions(params: {
904903
messageCount: Array.isArray(typedBody.messages)
905904
? typedBody.messages.length
906905
: 0,
907-
...(shouldRecordMessages
908-
? { messages: typedBody.messages }
909-
: { messagesOmitted: true, accessTier: freebuffAccessTier }),
906+
messagesOmitted: true,
907+
accessTier: freebuffAccessTier,
910908
providerStatusCode: (
911909
openrouterError ??
912910
fireworksError ??

web/src/llm-api/canopywave.ts

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@ import { env } from '@codebuff/internal/env'
66

77
import {
88
consumeCreditsForMessage,
9+
createRequestAuditRecord,
910
extractRequestMetadata,
1011
insertMessageToBigQuery,
1112
} from './helpers'
@@ -165,6 +166,7 @@ export async function handleCanopyWaveNonStream({
165166
const originalModel = body.model
166167
const startTime = new Date()
167168
const { clientId, clientRequestId, costMode } = extractRequestMetadata({ body, logger })
169+
const auditRequest = createRequestAuditRecord(body)
168170

169171
const response = await createCanopyWaveRequest({ body, originalModel, fetch })
170172

@@ -181,7 +183,7 @@ export async function handleCanopyWaveNonStream({
181183
messageId: data.id,
182184
userId,
183185
startTime,
184-
request: body,
186+
request: auditRequest,
185187
reasoningText,
186188
responseText: content,
187189
usageData,
@@ -242,6 +244,7 @@ export async function handleCanopyWaveStream({
242244
const originalModel = body.model
243245
const startTime = new Date()
244246
const { clientId, clientRequestId, costMode } = extractRequestMetadata({ body, logger })
247+
const auditRequest = createRequestAuditRecord(body)
245248

246249
const response = await createCanopyWaveRequest({ body, originalModel, fetch })
247250

@@ -305,7 +308,7 @@ export async function handleCanopyWaveStream({
305308
clientRequestId,
306309
costMode,
307310
startTime,
308-
request: body,
311+
request: auditRequest,
309312
originalModel,
310313
line,
311314
state,

web/src/llm-api/deepseek.ts

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@ import { env } from '@codebuff/internal/env'
77

88
import {
99
consumeCreditsForMessage,
10+
createRequestAuditRecord,
1011
extractRequestMetadata,
1112
insertMessageToBigQuery,
1213
} from './helpers'
@@ -203,6 +204,7 @@ export async function handleDeepSeekNonStream({
203204
body,
204205
logger,
205206
})
207+
const auditRequest = createRequestAuditRecord(body)
206208

207209
const response = await createDeepSeekRequest({ body, originalModel, fetch })
208210

@@ -222,7 +224,7 @@ export async function handleDeepSeekNonStream({
222224
messageId: data.id,
223225
userId,
224226
startTime,
225-
request: body,
227+
request: auditRequest,
226228
reasoningText,
227229
responseText: content,
228230
usageData,
@@ -286,6 +288,7 @@ export async function handleDeepSeekStream({
286288
body,
287289
logger,
288290
})
291+
const auditRequest = createRequestAuditRecord(body)
289292
const skipDisconnectedBilling = isDeepSeekV4FlashModel(body.model)
290293

291294
const response = await createDeepSeekRequest({ body, originalModel, fetch })
@@ -355,7 +358,7 @@ export async function handleDeepSeekStream({
355358
clientRequestId,
356359
costMode,
357360
startTime,
358-
request: body,
361+
request: auditRequest,
359362
originalModel,
360363
line,
361364
state,

web/src/llm-api/fireworks.ts

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@ import { env } from '@codebuff/internal/env'
1111
import { FIREWORKS_DEPLOYMENT_MAP } from './fireworks-config'
1212
import {
1313
consumeCreditsForMessage,
14+
createRequestAuditRecord,
1415
extractRequestMetadata,
1516
insertMessageToBigQuery,
1617
} from './helpers'
@@ -273,6 +274,7 @@ export async function handleFireworksNonStream({
273274
body,
274275
logger,
275276
})
277+
const auditRequest = createRequestAuditRecord(body)
276278

277279
const response = await createFireworksRequestWithFallback({
278280
body,
@@ -298,7 +300,7 @@ export async function handleFireworksNonStream({
298300
messageId: data.id,
299301
userId,
300302
startTime,
301-
request: body,
303+
request: auditRequest,
302304
reasoningText,
303305
responseText: content,
304306
usageData,
@@ -362,6 +364,7 @@ export async function handleFireworksStream({
362364
body,
363365
logger,
364366
})
367+
const auditRequest = createRequestAuditRecord(body)
365368

366369
const response = await createFireworksRequestWithFallback({
367370
body,
@@ -431,7 +434,7 @@ export async function handleFireworksStream({
431434
clientRequestId,
432435
costMode,
433436
startTime,
434-
request: body,
437+
request: auditRequest,
435438
originalModel,
436439
line,
437440
state,

web/src/llm-api/helpers.ts

Lines changed: 87 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,85 @@ export type UsageData = {
2323
cost: number
2424
}
2525

26+
export function createRequestAuditRecord(body: unknown) {
27+
// TODO: Add a separate append-only message_request BigQuery table for full
28+
// raw request bodies, inserted before streaming starts. Keeping only this
29+
// summary here avoids retaining huge chat requests until provider streams end.
30+
if (typeof body !== 'object' || body === null || Array.isArray(body)) {
31+
return { invalid_request_shape: true }
32+
}
33+
34+
const typedBody = body as Partial<ChatCompletionRequestBody>
35+
const messages = Array.isArray(typedBody.messages)
36+
? typedBody.messages
37+
: undefined
38+
const tools = Array.isArray(typedBody.tools) ? typedBody.tools : undefined
39+
40+
const messageRoleCounts = messages?.reduce<Record<string, number>>(
41+
(counts, message) => {
42+
const role =
43+
typeof message === 'object' && message !== null && 'role' in message
44+
? String(message.role)
45+
: 'unknown'
46+
counts[role] = (counts[role] ?? 0) + 1
47+
return counts
48+
},
49+
{},
50+
)
51+
52+
return {
53+
model: typeof typedBody.model === 'string' ? typedBody.model : undefined,
54+
stream:
55+
typeof typedBody.stream === 'boolean' ? typedBody.stream : undefined,
56+
temperature:
57+
typeof typedBody.temperature === 'number'
58+
? typedBody.temperature
59+
: undefined,
60+
max_tokens:
61+
typeof typedBody.max_tokens === 'number'
62+
? typedBody.max_tokens
63+
: undefined,
64+
max_completion_tokens:
65+
typeof typedBody.max_completion_tokens === 'number'
66+
? typedBody.max_completion_tokens
67+
: undefined,
68+
top_p: typeof typedBody.top_p === 'number' ? typedBody.top_p : undefined,
69+
reasoning_effort:
70+
typeof typedBody.reasoning_effort === 'string'
71+
? typedBody.reasoning_effort
72+
: undefined,
73+
reasoning_enabled:
74+
typeof typedBody.reasoning?.enabled === 'boolean'
75+
? typedBody.reasoning.enabled
76+
: undefined,
77+
reasoning_effort_nested:
78+
typeof typedBody.reasoning?.effort === 'string'
79+
? typedBody.reasoning.effort
80+
: undefined,
81+
usage_include:
82+
typeof typedBody.usage?.include === 'boolean'
83+
? typedBody.usage.include
84+
: undefined,
85+
codebuff_metadata:
86+
typeof typedBody.codebuff_metadata === 'object' &&
87+
typedBody.codebuff_metadata !== null
88+
? { ...typedBody.codebuff_metadata }
89+
: undefined,
90+
message_count: messages?.length ?? 0,
91+
message_role_counts: messageRoleCounts,
92+
messages_omitted: !!messages,
93+
tool_count: tools?.length ?? 0,
94+
tool_names: tools
95+
?.map((tool) =>
96+
typeof tool === 'object' && tool !== null
97+
? tool.function?.name
98+
: undefined,
99+
)
100+
.filter((name): name is string => typeof name === 'string'),
101+
tools_omitted: !!tools,
102+
}
103+
}
104+
26105
export function extractRequestMetadata(params: {
27106
body: unknown
28107
logger: Logger
@@ -35,14 +114,20 @@ export function extractRequestMetadata(params: {
35114
const rawClientId = metadata?.client_id
36115
const clientId = typeof rawClientId === 'string' ? rawClientId : null
37116
if (!clientId) {
38-
logger.warn({ body }, 'Received request without client_id')
117+
logger.warn(
118+
{ request: createRequestAuditRecord(body) },
119+
'Received request without client_id',
120+
)
39121
}
40122

41123
const rawRunId = metadata?.run_id
42124
const clientRequestId: string | null =
43125
typeof rawRunId === 'string' ? rawRunId : null
44126
if (!clientRequestId) {
45-
logger.warn({ body }, 'Received request without run_id')
127+
logger.warn(
128+
{ request: createRequestAuditRecord(body) },
129+
'Received request without run_id',
130+
)
46131
}
47132

48133
const n = metadata?.n

web/src/llm-api/moonshot.ts

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@ import { env } from '@codebuff/internal/env'
66

77
import {
88
consumeCreditsForMessage,
9+
createRequestAuditRecord,
910
extractRequestMetadata,
1011
insertMessageToBigQuery,
1112
} from './helpers'
@@ -288,6 +289,7 @@ export async function handleMoonshotNonStream({
288289
body,
289290
logger,
290291
})
292+
const auditRequest = createRequestAuditRecord(body)
291293

292294
const response = await createMoonshotRequest({ body, originalModel, fetch })
293295
if (!response.ok) {
@@ -306,7 +308,7 @@ export async function handleMoonshotNonStream({
306308
messageId: data.id,
307309
userId,
308310
startTime,
309-
request: body,
311+
request: auditRequest,
310312
reasoningText,
311313
responseText: content,
312314
usageData,
@@ -368,6 +370,7 @@ export async function handleMoonshotStream({
368370
body,
369371
logger,
370372
})
373+
const auditRequest = createRequestAuditRecord(body)
371374

372375
const response = await createMoonshotRequest({ body, originalModel, fetch })
373376
if (!response.ok) {
@@ -435,7 +438,7 @@ export async function handleMoonshotStream({
435438
clientRequestId,
436439
costMode,
437440
startTime,
438-
request: body,
441+
request: auditRequest,
439442
originalModel,
440443
line,
441444
state,

0 commit comments

Comments
 (0)