Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions src/apps/desktop/src/api/agentic_api.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1469,6 +1469,7 @@ mod tests {
session_id: "session-1".to_string(),
timestamp: 1,
kind: Default::default(),
agent_type: None,
user_message: UserMessageData {
id: "user-1".to_string(),
content: "hello".to_string(),
Expand Down Expand Up @@ -1529,6 +1530,7 @@ mod tests {
session_id: "session-1".to_string(),
timestamp: 1,
kind: Default::default(),
agent_type: None,
user_message: UserMessageData {
id: "user-1".to_string(),
content: "hello".to_string(),
Expand Down Expand Up @@ -1586,6 +1588,7 @@ mod tests {
session_id: "session-1".to_string(),
timestamp: 1,
kind: Default::default(),
agent_type: None,
user_message: UserMessageData {
id: "user-1".to_string(),
content: "hello".to_string(),
Expand Down
1 change: 1 addition & 0 deletions src/crates/agent-stream/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1071,6 +1071,7 @@ mod tests {
total_token_count: total_tokens,
reasoning_token_count: None,
cached_content_token_count: None,
cache_creation_token_count: None,
}
}

Expand Down
98 changes: 88 additions & 10 deletions src/crates/ai-adapters/src/stream/types/anthropic.rs
Original file line number Diff line number Diff line change
Expand Up @@ -45,22 +45,27 @@ impl Usage {

impl From<Usage> for UnifiedTokenUsage {
fn from(value: Usage) -> Self {
let cache_read = value.cache_read_input_tokens.unwrap_or(0);
let cache_creation = value.cache_creation_input_tokens.unwrap_or(0);
let prompt_token_count = value.input_tokens.unwrap_or(0) + cache_read + cache_creation;
let cache_read = value.cache_read_input_tokens;
let cache_creation = value.cache_creation_input_tokens;

// prompt_token_count = total context tokens occupied (industry-standard
// "input tokens" metric). For Anthropic this is the three disjoint
// components summed; for other providers the API reports this directly.
let prompt_token_count = value.input_tokens.unwrap_or(0)
+ cache_read.unwrap_or(0)
+ cache_creation.unwrap_or(0);
let candidates_token_count = value.output_tokens.unwrap_or(0);

Self {
prompt_token_count,
candidates_token_count,
total_token_count: prompt_token_count + candidates_token_count,
reasoning_token_count: None,
cached_content_token_count: match (
value.cache_read_input_tokens,
value.cache_creation_input_tokens,
) {
(None, None) => None,
(read, creation) => Some(read.unwrap_or(0) + creation.unwrap_or(0)),
},
// cached_content_token_count = cache READS only. This is the
// numerator for `cache hit rate = cached / prompt`. Writes go
// to cache_creation_token_count below.
cached_content_token_count: cache_read,
cache_creation_token_count: cache_creation,
}
}
}
Expand Down Expand Up @@ -210,3 +215,76 @@ impl From<AnthropicSSEErrorDetails> for String {
format!("{}: {}", value.error_type, value.message)
}
}

#[cfg(test)]
mod tests {
use super::*;
use crate::stream::types::unified::UnifiedTokenUsage;

#[test]
fn cached_content_token_count_is_reads_only_not_sum() {
let raw = r#"{
"input_tokens": 100,
"output_tokens": 50,
"cache_read_input_tokens": 30,
"cache_creation_input_tokens": 20
}"#;
let usage: Usage = serde_json::from_str(raw).expect("valid anthropic usage");
let unified: UnifiedTokenUsage = usage.into();

// cached_content_token_count must be reads only — NOT read + creation.
// This guarantees `cached_content / prompt` is a correct hit rate.
assert_eq!(unified.cached_content_token_count, Some(30));
assert_eq!(unified.cache_creation_token_count, Some(20));

// prompt_token_count keeps "total context" semantic (matches industry
// standard "input tokens" metric across providers).
assert_eq!(unified.prompt_token_count, 150);
assert_eq!(unified.candidates_token_count, 50);
assert_eq!(unified.total_token_count, 200);

// Hit rate computed by downstream:
// 30 / 150 == 20% (correct: only reads count as hits)
// Pre-fix this would have been wrongly 50/150 == 33%.
}

#[test]
fn absent_cache_fields_stay_none() {
let raw = r#"{ "input_tokens": 100, "output_tokens": 50 }"#;
let usage: Usage = serde_json::from_str(raw).expect("valid anthropic usage");
let unified: UnifiedTokenUsage = usage.into();
assert_eq!(unified.cached_content_token_count, None);
assert_eq!(unified.cache_creation_token_count, None);
}

#[test]
fn zero_cache_fields_are_some_zero_not_none() {
// Cache support reported but zero this call must be distinguishable
// from "provider did not report cache fields at all".
let raw = r#"{
"input_tokens": 100,
"output_tokens": 50,
"cache_read_input_tokens": 0,
"cache_creation_input_tokens": 0
}"#;
let usage: Usage = serde_json::from_str(raw).expect("valid anthropic usage");
let unified: UnifiedTokenUsage = usage.into();
assert_eq!(unified.cached_content_token_count, Some(0));
assert_eq!(unified.cache_creation_token_count, Some(0));
}

#[test]
fn only_read_present_no_creation() {
let raw = r#"{
"input_tokens": 100,
"output_tokens": 50,
"cache_read_input_tokens": 30
}"#;
let usage: Usage = serde_json::from_str(raw).expect("valid anthropic usage");
let unified: UnifiedTokenUsage = usage.into();
assert_eq!(unified.cached_content_token_count, Some(30));
assert_eq!(unified.cache_creation_token_count, None);
// prompt_token_count = input + read (no creation contribution)
assert_eq!(unified.prompt_token_count, 130);
}
}
18 changes: 18 additions & 0 deletions src/crates/ai-adapters/src/stream/types/gemini.rs
Original file line number Diff line number Diff line change
Expand Up @@ -104,6 +104,7 @@ impl From<GeminiUsageMetadata> for UnifiedTokenUsage {
total_token_count: usage.total_token_count,
reasoning_token_count,
cached_content_token_count: usage.cached_content_token_count,
cache_creation_token_count: None,
}
}
}
Expand Down Expand Up @@ -770,4 +771,21 @@ mod tests {
.and_then(|metadata| metadata.get("promptFeedback"))
.is_some());
}

#[test]
fn gemini_cache_creation_is_always_none() {
let payload = serde_json::json!({
"candidates": [{ "content": { "parts": [{ "text": "answer" }] } }],
"usageMetadata": {
"promptTokenCount": 100,
"candidatesTokenCount": 20,
"totalTokenCount": 120,
"cachedContentTokenCount": 35
}
});
let data: GeminiSSEData = serde_json::from_value(payload).expect("gemini payload");
let usage = data.into_unified_responses()[0].usage.as_ref().expect("usage").clone();
assert_eq!(usage.cached_content_token_count, Some(35));
assert_eq!(usage.cache_creation_token_count, None);
}
}
103 changes: 100 additions & 3 deletions src/crates/ai-adapters/src/stream/types/openai.rs
Original file line number Diff line number Diff line change
Expand Up @@ -15,18 +15,34 @@ struct OpenAIUsage {
#[serde(default)]
total_tokens: u32,
prompt_tokens_details: Option<PromptTokensDetails>,
/// DeepSeek extension. Subset of `prompt_tokens`. Absent on non-DeepSeek
/// providers. Prefer this over `prompt_tokens_details.cached_tokens` when
/// both are present — DeepSeek-native is the authoritative source.
#[serde(default)]
prompt_cache_hit_tokens: Option<u32>,
/// DeepSeek extension. Equals `prompt_tokens - prompt_cache_hit_tokens`.
/// Deserialized so a future strict serde lint doesn't reject the payload;
/// not propagated (the miss count is derivable from the other two).
#[serde(default)]
#[allow(dead_code)]
prompt_cache_miss_tokens: Option<u32>,
}

impl From<OpenAIUsage> for UnifiedTokenUsage {
fn from(usage: OpenAIUsage) -> Self {
let standard_cached = usage
.prompt_tokens_details
.and_then(|details| details.cached_tokens);
// DeepSeek extension wins when both present.
let cache_read = usage.prompt_cache_hit_tokens.or(standard_cached);

Self {
prompt_token_count: usage.prompt_tokens,
candidates_token_count: usage.completion_tokens,
total_token_count: usage.total_tokens,
reasoning_token_count: None,
cached_content_token_count: usage
.prompt_tokens_details
.and_then(|prompt_tokens_details| prompt_tokens_details.cached_tokens),
cached_content_token_count: cache_read,
cache_creation_token_count: None,
}
}
}
Expand Down Expand Up @@ -692,4 +708,85 @@ mod tests {
assert_eq!(responses.len(), 1);
assert!(responses[0].tool_call.is_some());
}

#[test]
fn standard_openai_cached_tokens_maps_through() {
let raw = r#"{
"id": "chatcmpl_test",
"created": 1,
"model": "gpt-test",
"choices": [],
"usage": {
"prompt_tokens": 100,
"completion_tokens": 20,
"total_tokens": 120,
"prompt_tokens_details": { "cached_tokens": 40 }
}
}"#;
let data: OpenAISSEData = serde_json::from_str(raw).expect("valid openai sse data");
let usage = data.into_unified_responses()[0].usage.as_ref().expect("usage").clone();
assert_eq!(usage.cached_content_token_count, Some(40));
assert_eq!(usage.cache_creation_token_count, None);
}

#[test]
fn deepseek_prompt_cache_hit_tokens_is_captured() {
// Pre-fix this field was silently dropped (strict serde, unknown key).
let raw = r#"{
"id": "chatcmpl_test",
"created": 1,
"model": "deepseek-chat",
"choices": [],
"usage": {
"prompt_tokens": 100,
"completion_tokens": 20,
"total_tokens": 120,
"prompt_cache_hit_tokens": 64,
"prompt_cache_miss_tokens": 36
}
}"#;
let data: OpenAISSEData = serde_json::from_str(raw).expect("valid deepseek sse data");
let usage = data.into_unified_responses()[0].usage.as_ref().expect("usage").clone();
assert_eq!(usage.cached_content_token_count, Some(64));
}

#[test]
fn deepseek_extension_preferred_over_standard_cached_tokens_if_both() {
// Defensive: if a proxy forwards both, prefer the DeepSeek-native field.
let raw = r#"{
"id": "chatcmpl_test",
"created": 1,
"model": "deepseek-chat",
"choices": [],
"usage": {
"prompt_tokens": 100,
"completion_tokens": 20,
"total_tokens": 120,
"prompt_cache_hit_tokens": 64,
"prompt_tokens_details": { "cached_tokens": 0 }
}
}"#;
let data: OpenAISSEData = serde_json::from_str(raw).expect("valid proxy payload");
let usage = data.into_unified_responses()[0].usage.as_ref().expect("usage").clone();
assert_eq!(usage.cached_content_token_count, Some(64));
}

#[test]
fn openai_no_cache_fields_stays_none() {
let raw = r#"{
"id": "chatcmpl_test",
"created": 1,
"model": "gpt-test",
"choices": [],
"usage": {
"prompt_tokens": 100,
"completion_tokens": 20,
"total_tokens": 120
}
}"#;
let data: OpenAISSEData = serde_json::from_str(raw).expect("valid openai sse data");
let usage = data.into_unified_responses()[0].usage.as_ref().expect("usage").clone();
assert_eq!(usage.cached_content_token_count, None);
assert_eq!(usage.cache_creation_token_count, None);
}
}
1 change: 1 addition & 0 deletions src/crates/ai-adapters/src/stream/types/responses.rs
Original file line number Diff line number Diff line change
Expand Up @@ -62,6 +62,7 @@ impl From<ResponsesUsage> for UnifiedTokenUsage {
cached_content_token_count: usage
.input_tokens_details
.map(|details| details.cached_tokens),
cache_creation_token_count: None,
}
}
}
Expand Down
10 changes: 10 additions & 0 deletions src/crates/ai-adapters/src/stream/types/unified.rs
Original file line number Diff line number Diff line change
Expand Up @@ -66,6 +66,16 @@ pub struct UnifiedTokenUsage {
pub total_token_count: u32,
#[serde(skip_serializing_if = "Option::is_none")]
pub reasoning_token_count: Option<u32>,
/// Cache READ tokens (i.e., served from cache this call). Universal across
/// providers: OpenAI `cached_tokens`, DeepSeek `prompt_cache_hit_tokens`,
/// Anthropic `cache_read_input_tokens`, Gemini `cachedContentTokenCount`.
/// Hit rate consumers must use this as numerator and `prompt_token_count`
/// as denominator.
#[serde(skip_serializing_if = "Option::is_none")]
pub cached_content_token_count: Option<u32>,
/// Cache WRITE tokens (only Anthropic reports this per-token; others either
/// have no creation concept or bill creation by storage time). Disjoint from
/// `cached_content_token_count`. Do NOT include in hit-rate numerator.
#[serde(skip_serializing_if = "Option::is_none", default)]
pub cache_creation_token_count: Option<u32>,
}
Loading