diff --git a/src/apps/desktop/src/api/agentic_api.rs b/src/apps/desktop/src/api/agentic_api.rs index ded19c5e6..095982d43 100644 --- a/src/apps/desktop/src/api/agentic_api.rs +++ b/src/apps/desktop/src/api/agentic_api.rs @@ -1469,6 +1469,7 @@ mod tests { session_id: "session-1".to_string(), timestamp: 1, kind: Default::default(), + agent_type: None, user_message: UserMessageData { id: "user-1".to_string(), content: "hello".to_string(), @@ -1529,6 +1530,7 @@ mod tests { session_id: "session-1".to_string(), timestamp: 1, kind: Default::default(), + agent_type: None, user_message: UserMessageData { id: "user-1".to_string(), content: "hello".to_string(), @@ -1586,6 +1588,7 @@ mod tests { session_id: "session-1".to_string(), timestamp: 1, kind: Default::default(), + agent_type: None, user_message: UserMessageData { id: "user-1".to_string(), content: "hello".to_string(), diff --git a/src/crates/agent-stream/src/lib.rs b/src/crates/agent-stream/src/lib.rs index ad32cdd81..5256f844b 100644 --- a/src/crates/agent-stream/src/lib.rs +++ b/src/crates/agent-stream/src/lib.rs @@ -1071,6 +1071,7 @@ mod tests { total_token_count: total_tokens, reasoning_token_count: None, cached_content_token_count: None, + cache_creation_token_count: None, } } diff --git a/src/crates/ai-adapters/src/stream/types/anthropic.rs b/src/crates/ai-adapters/src/stream/types/anthropic.rs index b785069bf..d598b7bc2 100644 --- a/src/crates/ai-adapters/src/stream/types/anthropic.rs +++ b/src/crates/ai-adapters/src/stream/types/anthropic.rs @@ -45,22 +45,27 @@ impl Usage { impl From for UnifiedTokenUsage { fn from(value: Usage) -> Self { - let cache_read = value.cache_read_input_tokens.unwrap_or(0); - let cache_creation = value.cache_creation_input_tokens.unwrap_or(0); - let prompt_token_count = value.input_tokens.unwrap_or(0) + cache_read + cache_creation; + let cache_read = value.cache_read_input_tokens; + let cache_creation = value.cache_creation_input_tokens; + + // prompt_token_count = total context tokens occupied (industry-standard + // "input tokens" metric). For Anthropic this is the three disjoint + // components summed; for other providers the API reports this directly. + let prompt_token_count = value.input_tokens.unwrap_or(0) + + cache_read.unwrap_or(0) + + cache_creation.unwrap_or(0); let candidates_token_count = value.output_tokens.unwrap_or(0); + Self { prompt_token_count, candidates_token_count, total_token_count: prompt_token_count + candidates_token_count, reasoning_token_count: None, - cached_content_token_count: match ( - value.cache_read_input_tokens, - value.cache_creation_input_tokens, - ) { - (None, None) => None, - (read, creation) => Some(read.unwrap_or(0) + creation.unwrap_or(0)), - }, + // cached_content_token_count = cache READS only. This is the + // numerator for `cache hit rate = cached / prompt`. Writes go + // to cache_creation_token_count below. + cached_content_token_count: cache_read, + cache_creation_token_count: cache_creation, } } } @@ -210,3 +215,76 @@ impl From for String { format!("{}: {}", value.error_type, value.message) } } + +#[cfg(test)] +mod tests { + use super::*; + use crate::stream::types::unified::UnifiedTokenUsage; + + #[test] + fn cached_content_token_count_is_reads_only_not_sum() { + let raw = r#"{ + "input_tokens": 100, + "output_tokens": 50, + "cache_read_input_tokens": 30, + "cache_creation_input_tokens": 20 + }"#; + let usage: Usage = serde_json::from_str(raw).expect("valid anthropic usage"); + let unified: UnifiedTokenUsage = usage.into(); + + // cached_content_token_count must be reads only — NOT read + creation. + // This guarantees `cached_content / prompt` is a correct hit rate. + assert_eq!(unified.cached_content_token_count, Some(30)); + assert_eq!(unified.cache_creation_token_count, Some(20)); + + // prompt_token_count keeps "total context" semantic (matches industry + // standard "input tokens" metric across providers). + assert_eq!(unified.prompt_token_count, 150); + assert_eq!(unified.candidates_token_count, 50); + assert_eq!(unified.total_token_count, 200); + + // Hit rate computed by downstream: + // 30 / 150 == 20% (correct: only reads count as hits) + // Pre-fix this would have been wrongly 50/150 == 33%. + } + + #[test] + fn absent_cache_fields_stay_none() { + let raw = r#"{ "input_tokens": 100, "output_tokens": 50 }"#; + let usage: Usage = serde_json::from_str(raw).expect("valid anthropic usage"); + let unified: UnifiedTokenUsage = usage.into(); + assert_eq!(unified.cached_content_token_count, None); + assert_eq!(unified.cache_creation_token_count, None); + } + + #[test] + fn zero_cache_fields_are_some_zero_not_none() { + // Cache support reported but zero this call must be distinguishable + // from "provider did not report cache fields at all". + let raw = r#"{ + "input_tokens": 100, + "output_tokens": 50, + "cache_read_input_tokens": 0, + "cache_creation_input_tokens": 0 + }"#; + let usage: Usage = serde_json::from_str(raw).expect("valid anthropic usage"); + let unified: UnifiedTokenUsage = usage.into(); + assert_eq!(unified.cached_content_token_count, Some(0)); + assert_eq!(unified.cache_creation_token_count, Some(0)); + } + + #[test] + fn only_read_present_no_creation() { + let raw = r#"{ + "input_tokens": 100, + "output_tokens": 50, + "cache_read_input_tokens": 30 + }"#; + let usage: Usage = serde_json::from_str(raw).expect("valid anthropic usage"); + let unified: UnifiedTokenUsage = usage.into(); + assert_eq!(unified.cached_content_token_count, Some(30)); + assert_eq!(unified.cache_creation_token_count, None); + // prompt_token_count = input + read (no creation contribution) + assert_eq!(unified.prompt_token_count, 130); + } +} diff --git a/src/crates/ai-adapters/src/stream/types/gemini.rs b/src/crates/ai-adapters/src/stream/types/gemini.rs index 4927c91b3..6bd6a15d7 100644 --- a/src/crates/ai-adapters/src/stream/types/gemini.rs +++ b/src/crates/ai-adapters/src/stream/types/gemini.rs @@ -104,6 +104,7 @@ impl From for UnifiedTokenUsage { total_token_count: usage.total_token_count, reasoning_token_count, cached_content_token_count: usage.cached_content_token_count, + cache_creation_token_count: None, } } } @@ -770,4 +771,21 @@ mod tests { .and_then(|metadata| metadata.get("promptFeedback")) .is_some()); } + + #[test] + fn gemini_cache_creation_is_always_none() { + let payload = serde_json::json!({ + "candidates": [{ "content": { "parts": [{ "text": "answer" }] } }], + "usageMetadata": { + "promptTokenCount": 100, + "candidatesTokenCount": 20, + "totalTokenCount": 120, + "cachedContentTokenCount": 35 + } + }); + let data: GeminiSSEData = serde_json::from_value(payload).expect("gemini payload"); + let usage = data.into_unified_responses()[0].usage.as_ref().expect("usage").clone(); + assert_eq!(usage.cached_content_token_count, Some(35)); + assert_eq!(usage.cache_creation_token_count, None); + } } diff --git a/src/crates/ai-adapters/src/stream/types/openai.rs b/src/crates/ai-adapters/src/stream/types/openai.rs index 03aeda399..fe7ff5c9d 100644 --- a/src/crates/ai-adapters/src/stream/types/openai.rs +++ b/src/crates/ai-adapters/src/stream/types/openai.rs @@ -15,18 +15,34 @@ struct OpenAIUsage { #[serde(default)] total_tokens: u32, prompt_tokens_details: Option, + /// DeepSeek extension. Subset of `prompt_tokens`. Absent on non-DeepSeek + /// providers. Prefer this over `prompt_tokens_details.cached_tokens` when + /// both are present — DeepSeek-native is the authoritative source. + #[serde(default)] + prompt_cache_hit_tokens: Option, + /// DeepSeek extension. Equals `prompt_tokens - prompt_cache_hit_tokens`. + /// Deserialized so a future strict serde lint doesn't reject the payload; + /// not propagated (the miss count is derivable from the other two). + #[serde(default)] + #[allow(dead_code)] + prompt_cache_miss_tokens: Option, } impl From for UnifiedTokenUsage { fn from(usage: OpenAIUsage) -> Self { + let standard_cached = usage + .prompt_tokens_details + .and_then(|details| details.cached_tokens); + // DeepSeek extension wins when both present. + let cache_read = usage.prompt_cache_hit_tokens.or(standard_cached); + Self { prompt_token_count: usage.prompt_tokens, candidates_token_count: usage.completion_tokens, total_token_count: usage.total_tokens, reasoning_token_count: None, - cached_content_token_count: usage - .prompt_tokens_details - .and_then(|prompt_tokens_details| prompt_tokens_details.cached_tokens), + cached_content_token_count: cache_read, + cache_creation_token_count: None, } } } @@ -692,4 +708,85 @@ mod tests { assert_eq!(responses.len(), 1); assert!(responses[0].tool_call.is_some()); } + + #[test] + fn standard_openai_cached_tokens_maps_through() { + let raw = r#"{ + "id": "chatcmpl_test", + "created": 1, + "model": "gpt-test", + "choices": [], + "usage": { + "prompt_tokens": 100, + "completion_tokens": 20, + "total_tokens": 120, + "prompt_tokens_details": { "cached_tokens": 40 } + } + }"#; + let data: OpenAISSEData = serde_json::from_str(raw).expect("valid openai sse data"); + let usage = data.into_unified_responses()[0].usage.as_ref().expect("usage").clone(); + assert_eq!(usage.cached_content_token_count, Some(40)); + assert_eq!(usage.cache_creation_token_count, None); + } + + #[test] + fn deepseek_prompt_cache_hit_tokens_is_captured() { + // Pre-fix this field was silently dropped (strict serde, unknown key). + let raw = r#"{ + "id": "chatcmpl_test", + "created": 1, + "model": "deepseek-chat", + "choices": [], + "usage": { + "prompt_tokens": 100, + "completion_tokens": 20, + "total_tokens": 120, + "prompt_cache_hit_tokens": 64, + "prompt_cache_miss_tokens": 36 + } + }"#; + let data: OpenAISSEData = serde_json::from_str(raw).expect("valid deepseek sse data"); + let usage = data.into_unified_responses()[0].usage.as_ref().expect("usage").clone(); + assert_eq!(usage.cached_content_token_count, Some(64)); + } + + #[test] + fn deepseek_extension_preferred_over_standard_cached_tokens_if_both() { + // Defensive: if a proxy forwards both, prefer the DeepSeek-native field. + let raw = r#"{ + "id": "chatcmpl_test", + "created": 1, + "model": "deepseek-chat", + "choices": [], + "usage": { + "prompt_tokens": 100, + "completion_tokens": 20, + "total_tokens": 120, + "prompt_cache_hit_tokens": 64, + "prompt_tokens_details": { "cached_tokens": 0 } + } + }"#; + let data: OpenAISSEData = serde_json::from_str(raw).expect("valid proxy payload"); + let usage = data.into_unified_responses()[0].usage.as_ref().expect("usage").clone(); + assert_eq!(usage.cached_content_token_count, Some(64)); + } + + #[test] + fn openai_no_cache_fields_stays_none() { + let raw = r#"{ + "id": "chatcmpl_test", + "created": 1, + "model": "gpt-test", + "choices": [], + "usage": { + "prompt_tokens": 100, + "completion_tokens": 20, + "total_tokens": 120 + } + }"#; + let data: OpenAISSEData = serde_json::from_str(raw).expect("valid openai sse data"); + let usage = data.into_unified_responses()[0].usage.as_ref().expect("usage").clone(); + assert_eq!(usage.cached_content_token_count, None); + assert_eq!(usage.cache_creation_token_count, None); + } } diff --git a/src/crates/ai-adapters/src/stream/types/responses.rs b/src/crates/ai-adapters/src/stream/types/responses.rs index 8e9b48071..a58da6366 100644 --- a/src/crates/ai-adapters/src/stream/types/responses.rs +++ b/src/crates/ai-adapters/src/stream/types/responses.rs @@ -62,6 +62,7 @@ impl From for UnifiedTokenUsage { cached_content_token_count: usage .input_tokens_details .map(|details| details.cached_tokens), + cache_creation_token_count: None, } } } diff --git a/src/crates/ai-adapters/src/stream/types/unified.rs b/src/crates/ai-adapters/src/stream/types/unified.rs index 27048acc5..fb5948eea 100644 --- a/src/crates/ai-adapters/src/stream/types/unified.rs +++ b/src/crates/ai-adapters/src/stream/types/unified.rs @@ -66,6 +66,16 @@ pub struct UnifiedTokenUsage { pub total_token_count: u32, #[serde(skip_serializing_if = "Option::is_none")] pub reasoning_token_count: Option, + /// Cache READ tokens (i.e., served from cache this call). Universal across + /// providers: OpenAI `cached_tokens`, DeepSeek `prompt_cache_hit_tokens`, + /// Anthropic `cache_read_input_tokens`, Gemini `cachedContentTokenCount`. + /// Hit rate consumers must use this as numerator and `prompt_token_count` + /// as denominator. #[serde(skip_serializing_if = "Option::is_none")] pub cached_content_token_count: Option, + /// Cache WRITE tokens (only Anthropic reports this per-token; others either + /// have no creation concept or bill creation by storage time). Disjoint from + /// `cached_content_token_count`. Do NOT include in hit-rate numerator. + #[serde(skip_serializing_if = "Option::is_none", default)] + pub cache_creation_token_count: Option, }