From 04a7f567b0eeee8312ba202ad8916cf66b8079e5 Mon Sep 17 00:00:00 2001 From: nonoqing Date: Mon, 18 May 2026 19:10:20 +0800 Subject: [PATCH 1/5] feat(usage): add cache_creation_token_count to UnifiedTokenUsage --- src/crates/agent-stream/src/lib.rs | 1 + src/crates/ai-adapters/src/stream/types/anthropic.rs | 1 + src/crates/ai-adapters/src/stream/types/gemini.rs | 1 + src/crates/ai-adapters/src/stream/types/openai.rs | 1 + src/crates/ai-adapters/src/stream/types/responses.rs | 1 + src/crates/ai-adapters/src/stream/types/unified.rs | 10 ++++++++++ 6 files changed, 15 insertions(+) diff --git a/src/crates/agent-stream/src/lib.rs b/src/crates/agent-stream/src/lib.rs index ad32cdd81..5256f844b 100644 --- a/src/crates/agent-stream/src/lib.rs +++ b/src/crates/agent-stream/src/lib.rs @@ -1071,6 +1071,7 @@ mod tests { total_token_count: total_tokens, reasoning_token_count: None, cached_content_token_count: None, + cache_creation_token_count: None, } } diff --git a/src/crates/ai-adapters/src/stream/types/anthropic.rs b/src/crates/ai-adapters/src/stream/types/anthropic.rs index b785069bf..1e341be08 100644 --- a/src/crates/ai-adapters/src/stream/types/anthropic.rs +++ b/src/crates/ai-adapters/src/stream/types/anthropic.rs @@ -61,6 +61,7 @@ impl From for UnifiedTokenUsage { (None, None) => None, (read, creation) => Some(read.unwrap_or(0) + creation.unwrap_or(0)), }, + cache_creation_token_count: None, } } } diff --git a/src/crates/ai-adapters/src/stream/types/gemini.rs b/src/crates/ai-adapters/src/stream/types/gemini.rs index 4927c91b3..c1a7407d1 100644 --- a/src/crates/ai-adapters/src/stream/types/gemini.rs +++ b/src/crates/ai-adapters/src/stream/types/gemini.rs @@ -104,6 +104,7 @@ impl From for UnifiedTokenUsage { total_token_count: usage.total_token_count, reasoning_token_count, cached_content_token_count: usage.cached_content_token_count, + cache_creation_token_count: None, } } } diff --git a/src/crates/ai-adapters/src/stream/types/openai.rs b/src/crates/ai-adapters/src/stream/types/openai.rs index 03aeda399..0d05c4b17 100644 --- a/src/crates/ai-adapters/src/stream/types/openai.rs +++ b/src/crates/ai-adapters/src/stream/types/openai.rs @@ -27,6 +27,7 @@ impl From for UnifiedTokenUsage { cached_content_token_count: usage .prompt_tokens_details .and_then(|prompt_tokens_details| prompt_tokens_details.cached_tokens), + cache_creation_token_count: None, } } } diff --git a/src/crates/ai-adapters/src/stream/types/responses.rs b/src/crates/ai-adapters/src/stream/types/responses.rs index 8e9b48071..a58da6366 100644 --- a/src/crates/ai-adapters/src/stream/types/responses.rs +++ b/src/crates/ai-adapters/src/stream/types/responses.rs @@ -62,6 +62,7 @@ impl From for UnifiedTokenUsage { cached_content_token_count: usage .input_tokens_details .map(|details| details.cached_tokens), + cache_creation_token_count: None, } } } diff --git a/src/crates/ai-adapters/src/stream/types/unified.rs b/src/crates/ai-adapters/src/stream/types/unified.rs index 27048acc5..fb5948eea 100644 --- a/src/crates/ai-adapters/src/stream/types/unified.rs +++ b/src/crates/ai-adapters/src/stream/types/unified.rs @@ -66,6 +66,16 @@ pub struct UnifiedTokenUsage { pub total_token_count: u32, #[serde(skip_serializing_if = "Option::is_none")] pub reasoning_token_count: Option, + /// Cache READ tokens (i.e., served from cache this call). Universal across + /// providers: OpenAI `cached_tokens`, DeepSeek `prompt_cache_hit_tokens`, + /// Anthropic `cache_read_input_tokens`, Gemini `cachedContentTokenCount`. + /// Hit rate consumers must use this as numerator and `prompt_token_count` + /// as denominator. #[serde(skip_serializing_if = "Option::is_none")] pub cached_content_token_count: Option, + /// Cache WRITE tokens (only Anthropic reports this per-token; others either + /// have no creation concept or bill creation by storage time). Disjoint from + /// `cached_content_token_count`. Do NOT include in hit-rate numerator. + #[serde(skip_serializing_if = "Option::is_none", default)] + pub cache_creation_token_count: Option, } From 027c630b7ed62247f7df675cb9f02076e5e1b88b Mon Sep 17 00:00:00 2001 From: nonoqing Date: Mon, 18 May 2026 20:15:43 +0800 Subject: [PATCH 2/5] fix(usage): Anthropic cached_content_token_count is reads only, not read+creation sum Previously the Anthropic adapter summed cache_read_input_tokens and cache_creation_input_tokens into cached_content_token_count, which made any downstream hit-rate metric (cached / prompt) wrongly count cache writes as hits. Cache writes now go to the new cache_creation_token_count field. Behavior change: Anthropic TokenUsageRecord.cached_tokens values will be lower than pre-fix records for equivalent traffic, because creation tokens no longer inflate the count. Hit-rate dashboards built on this should annotate the discontinuity. --- .../ai-adapters/src/stream/types/anthropic.rs | 99 ++++++++++++++++--- 1 file changed, 88 insertions(+), 11 deletions(-) diff --git a/src/crates/ai-adapters/src/stream/types/anthropic.rs b/src/crates/ai-adapters/src/stream/types/anthropic.rs index 1e341be08..d598b7bc2 100644 --- a/src/crates/ai-adapters/src/stream/types/anthropic.rs +++ b/src/crates/ai-adapters/src/stream/types/anthropic.rs @@ -45,23 +45,27 @@ impl Usage { impl From for UnifiedTokenUsage { fn from(value: Usage) -> Self { - let cache_read = value.cache_read_input_tokens.unwrap_or(0); - let cache_creation = value.cache_creation_input_tokens.unwrap_or(0); - let prompt_token_count = value.input_tokens.unwrap_or(0) + cache_read + cache_creation; + let cache_read = value.cache_read_input_tokens; + let cache_creation = value.cache_creation_input_tokens; + + // prompt_token_count = total context tokens occupied (industry-standard + // "input tokens" metric). For Anthropic this is the three disjoint + // components summed; for other providers the API reports this directly. + let prompt_token_count = value.input_tokens.unwrap_or(0) + + cache_read.unwrap_or(0) + + cache_creation.unwrap_or(0); let candidates_token_count = value.output_tokens.unwrap_or(0); + Self { prompt_token_count, candidates_token_count, total_token_count: prompt_token_count + candidates_token_count, reasoning_token_count: None, - cached_content_token_count: match ( - value.cache_read_input_tokens, - value.cache_creation_input_tokens, - ) { - (None, None) => None, - (read, creation) => Some(read.unwrap_or(0) + creation.unwrap_or(0)), - }, - cache_creation_token_count: None, + // cached_content_token_count = cache READS only. This is the + // numerator for `cache hit rate = cached / prompt`. Writes go + // to cache_creation_token_count below. + cached_content_token_count: cache_read, + cache_creation_token_count: cache_creation, } } } @@ -211,3 +215,76 @@ impl From for String { format!("{}: {}", value.error_type, value.message) } } + +#[cfg(test)] +mod tests { + use super::*; + use crate::stream::types::unified::UnifiedTokenUsage; + + #[test] + fn cached_content_token_count_is_reads_only_not_sum() { + let raw = r#"{ + "input_tokens": 100, + "output_tokens": 50, + "cache_read_input_tokens": 30, + "cache_creation_input_tokens": 20 + }"#; + let usage: Usage = serde_json::from_str(raw).expect("valid anthropic usage"); + let unified: UnifiedTokenUsage = usage.into(); + + // cached_content_token_count must be reads only — NOT read + creation. + // This guarantees `cached_content / prompt` is a correct hit rate. + assert_eq!(unified.cached_content_token_count, Some(30)); + assert_eq!(unified.cache_creation_token_count, Some(20)); + + // prompt_token_count keeps "total context" semantic (matches industry + // standard "input tokens" metric across providers). + assert_eq!(unified.prompt_token_count, 150); + assert_eq!(unified.candidates_token_count, 50); + assert_eq!(unified.total_token_count, 200); + + // Hit rate computed by downstream: + // 30 / 150 == 20% (correct: only reads count as hits) + // Pre-fix this would have been wrongly 50/150 == 33%. + } + + #[test] + fn absent_cache_fields_stay_none() { + let raw = r#"{ "input_tokens": 100, "output_tokens": 50 }"#; + let usage: Usage = serde_json::from_str(raw).expect("valid anthropic usage"); + let unified: UnifiedTokenUsage = usage.into(); + assert_eq!(unified.cached_content_token_count, None); + assert_eq!(unified.cache_creation_token_count, None); + } + + #[test] + fn zero_cache_fields_are_some_zero_not_none() { + // Cache support reported but zero this call must be distinguishable + // from "provider did not report cache fields at all". + let raw = r#"{ + "input_tokens": 100, + "output_tokens": 50, + "cache_read_input_tokens": 0, + "cache_creation_input_tokens": 0 + }"#; + let usage: Usage = serde_json::from_str(raw).expect("valid anthropic usage"); + let unified: UnifiedTokenUsage = usage.into(); + assert_eq!(unified.cached_content_token_count, Some(0)); + assert_eq!(unified.cache_creation_token_count, Some(0)); + } + + #[test] + fn only_read_present_no_creation() { + let raw = r#"{ + "input_tokens": 100, + "output_tokens": 50, + "cache_read_input_tokens": 30 + }"#; + let usage: Usage = serde_json::from_str(raw).expect("valid anthropic usage"); + let unified: UnifiedTokenUsage = usage.into(); + assert_eq!(unified.cached_content_token_count, Some(30)); + assert_eq!(unified.cache_creation_token_count, None); + // prompt_token_count = input + read (no creation contribution) + assert_eq!(unified.prompt_token_count, 130); + } +} From 0f0be7b145e68333946c65a8863e516cf6a720b7 Mon Sep 17 00:00:00 2001 From: nonoqing Date: Mon, 18 May 2026 20:42:39 +0800 Subject: [PATCH 3/5] feat(usage): capture DeepSeek prompt_cache_hit_tokens extension The OpenAI-compatible deserializer previously silently dropped DeepSeek's prompt_cache_hit_tokens / prompt_cache_miss_tokens fields, leaving DeepSeek cache hit rate permanently unknown. Map prompt_cache_hit_tokens (preferred) or prompt_tokens_details.cached_tokens (fallback) to cached_content_token_count so the hit-rate formula works for DeepSeek. --- .../ai-adapters/src/stream/types/openai.rs | 102 +++++++++++++++++- 1 file changed, 99 insertions(+), 3 deletions(-) diff --git a/src/crates/ai-adapters/src/stream/types/openai.rs b/src/crates/ai-adapters/src/stream/types/openai.rs index 0d05c4b17..fe7ff5c9d 100644 --- a/src/crates/ai-adapters/src/stream/types/openai.rs +++ b/src/crates/ai-adapters/src/stream/types/openai.rs @@ -15,18 +15,33 @@ struct OpenAIUsage { #[serde(default)] total_tokens: u32, prompt_tokens_details: Option, + /// DeepSeek extension. Subset of `prompt_tokens`. Absent on non-DeepSeek + /// providers. Prefer this over `prompt_tokens_details.cached_tokens` when + /// both are present — DeepSeek-native is the authoritative source. + #[serde(default)] + prompt_cache_hit_tokens: Option, + /// DeepSeek extension. Equals `prompt_tokens - prompt_cache_hit_tokens`. + /// Deserialized so a future strict serde lint doesn't reject the payload; + /// not propagated (the miss count is derivable from the other two). + #[serde(default)] + #[allow(dead_code)] + prompt_cache_miss_tokens: Option, } impl From for UnifiedTokenUsage { fn from(usage: OpenAIUsage) -> Self { + let standard_cached = usage + .prompt_tokens_details + .and_then(|details| details.cached_tokens); + // DeepSeek extension wins when both present. + let cache_read = usage.prompt_cache_hit_tokens.or(standard_cached); + Self { prompt_token_count: usage.prompt_tokens, candidates_token_count: usage.completion_tokens, total_token_count: usage.total_tokens, reasoning_token_count: None, - cached_content_token_count: usage - .prompt_tokens_details - .and_then(|prompt_tokens_details| prompt_tokens_details.cached_tokens), + cached_content_token_count: cache_read, cache_creation_token_count: None, } } @@ -693,4 +708,85 @@ mod tests { assert_eq!(responses.len(), 1); assert!(responses[0].tool_call.is_some()); } + + #[test] + fn standard_openai_cached_tokens_maps_through() { + let raw = r#"{ + "id": "chatcmpl_test", + "created": 1, + "model": "gpt-test", + "choices": [], + "usage": { + "prompt_tokens": 100, + "completion_tokens": 20, + "total_tokens": 120, + "prompt_tokens_details": { "cached_tokens": 40 } + } + }"#; + let data: OpenAISSEData = serde_json::from_str(raw).expect("valid openai sse data"); + let usage = data.into_unified_responses()[0].usage.as_ref().expect("usage").clone(); + assert_eq!(usage.cached_content_token_count, Some(40)); + assert_eq!(usage.cache_creation_token_count, None); + } + + #[test] + fn deepseek_prompt_cache_hit_tokens_is_captured() { + // Pre-fix this field was silently dropped (strict serde, unknown key). + let raw = r#"{ + "id": "chatcmpl_test", + "created": 1, + "model": "deepseek-chat", + "choices": [], + "usage": { + "prompt_tokens": 100, + "completion_tokens": 20, + "total_tokens": 120, + "prompt_cache_hit_tokens": 64, + "prompt_cache_miss_tokens": 36 + } + }"#; + let data: OpenAISSEData = serde_json::from_str(raw).expect("valid deepseek sse data"); + let usage = data.into_unified_responses()[0].usage.as_ref().expect("usage").clone(); + assert_eq!(usage.cached_content_token_count, Some(64)); + } + + #[test] + fn deepseek_extension_preferred_over_standard_cached_tokens_if_both() { + // Defensive: if a proxy forwards both, prefer the DeepSeek-native field. + let raw = r#"{ + "id": "chatcmpl_test", + "created": 1, + "model": "deepseek-chat", + "choices": [], + "usage": { + "prompt_tokens": 100, + "completion_tokens": 20, + "total_tokens": 120, + "prompt_cache_hit_tokens": 64, + "prompt_tokens_details": { "cached_tokens": 0 } + } + }"#; + let data: OpenAISSEData = serde_json::from_str(raw).expect("valid proxy payload"); + let usage = data.into_unified_responses()[0].usage.as_ref().expect("usage").clone(); + assert_eq!(usage.cached_content_token_count, Some(64)); + } + + #[test] + fn openai_no_cache_fields_stays_none() { + let raw = r#"{ + "id": "chatcmpl_test", + "created": 1, + "model": "gpt-test", + "choices": [], + "usage": { + "prompt_tokens": 100, + "completion_tokens": 20, + "total_tokens": 120 + } + }"#; + let data: OpenAISSEData = serde_json::from_str(raw).expect("valid openai sse data"); + let usage = data.into_unified_responses()[0].usage.as_ref().expect("usage").clone(); + assert_eq!(usage.cached_content_token_count, None); + assert_eq!(usage.cache_creation_token_count, None); + } } From 364abb5f6558bd38f5c479e7e660c07830857c29 Mon Sep 17 00:00:00 2001 From: nonoqing Date: Mon, 18 May 2026 20:51:20 +0800 Subject: [PATCH 4/5] test(usage): guard Gemini cache_creation_token_count stays None --- .../ai-adapters/src/stream/types/gemini.rs | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/src/crates/ai-adapters/src/stream/types/gemini.rs b/src/crates/ai-adapters/src/stream/types/gemini.rs index c1a7407d1..6bd6a15d7 100644 --- a/src/crates/ai-adapters/src/stream/types/gemini.rs +++ b/src/crates/ai-adapters/src/stream/types/gemini.rs @@ -771,4 +771,21 @@ mod tests { .and_then(|metadata| metadata.get("promptFeedback")) .is_some()); } + + #[test] + fn gemini_cache_creation_is_always_none() { + let payload = serde_json::json!({ + "candidates": [{ "content": { "parts": [{ "text": "answer" }] } }], + "usageMetadata": { + "promptTokenCount": 100, + "candidatesTokenCount": 20, + "totalTokenCount": 120, + "cachedContentTokenCount": 35 + } + }); + let data: GeminiSSEData = serde_json::from_value(payload).expect("gemini payload"); + let usage = data.into_unified_responses()[0].usage.as_ref().expect("usage").clone(); + assert_eq!(usage.cached_content_token_count, Some(35)); + assert_eq!(usage.cache_creation_token_count, None); + } } From c33264fd597c107b23f96d4f20b419483c51cab3 Mon Sep 17 00:00:00 2001 From: nonoqing Date: Mon, 18 May 2026 21:24:30 +0800 Subject: [PATCH 5/5] fix(desktop): supply agent_type: None in DialogTurnData test fixtures The agent_type field on DialogTurnData was added by an earlier agent-tools refactor but three test fixtures in agentic_api.rs were not updated, breaking `cargo test -p bitfun-desktop` (lib test compile failure). None matches the field's documented use for non-user-dialog or utility turns. --- src/apps/desktop/src/api/agentic_api.rs | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/apps/desktop/src/api/agentic_api.rs b/src/apps/desktop/src/api/agentic_api.rs index ded19c5e6..095982d43 100644 --- a/src/apps/desktop/src/api/agentic_api.rs +++ b/src/apps/desktop/src/api/agentic_api.rs @@ -1469,6 +1469,7 @@ mod tests { session_id: "session-1".to_string(), timestamp: 1, kind: Default::default(), + agent_type: None, user_message: UserMessageData { id: "user-1".to_string(), content: "hello".to_string(), @@ -1529,6 +1530,7 @@ mod tests { session_id: "session-1".to_string(), timestamp: 1, kind: Default::default(), + agent_type: None, user_message: UserMessageData { id: "user-1".to_string(), content: "hello".to_string(), @@ -1586,6 +1588,7 @@ mod tests { session_id: "session-1".to_string(), timestamp: 1, kind: Default::default(), + agent_type: None, user_message: UserMessageData { id: "user-1".to_string(), content: "hello".to_string(),