Skip to content

Commit 9bfb4b6

Browse files
authored
Merge pull request #774 from nonoqing/yuyiqing/dev
feat(token usage)
2 parents da1954a + c33264f commit 9bfb4b6

7 files changed

Lines changed: 221 additions & 13 deletions

File tree

src/apps/desktop/src/api/agentic_api.rs

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1469,6 +1469,7 @@ mod tests {
14691469
session_id: "session-1".to_string(),
14701470
timestamp: 1,
14711471
kind: Default::default(),
1472+
agent_type: None,
14721473
user_message: UserMessageData {
14731474
id: "user-1".to_string(),
14741475
content: "hello".to_string(),
@@ -1529,6 +1530,7 @@ mod tests {
15291530
session_id: "session-1".to_string(),
15301531
timestamp: 1,
15311532
kind: Default::default(),
1533+
agent_type: None,
15321534
user_message: UserMessageData {
15331535
id: "user-1".to_string(),
15341536
content: "hello".to_string(),
@@ -1586,6 +1588,7 @@ mod tests {
15861588
session_id: "session-1".to_string(),
15871589
timestamp: 1,
15881590
kind: Default::default(),
1591+
agent_type: None,
15891592
user_message: UserMessageData {
15901593
id: "user-1".to_string(),
15911594
content: "hello".to_string(),

src/crates/agent-stream/src/lib.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1071,6 +1071,7 @@ mod tests {
10711071
total_token_count: total_tokens,
10721072
reasoning_token_count: None,
10731073
cached_content_token_count: None,
1074+
cache_creation_token_count: None,
10741075
}
10751076
}
10761077

src/crates/ai-adapters/src/stream/types/anthropic.rs

Lines changed: 88 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -45,22 +45,27 @@ impl Usage {
4545

4646
impl From<Usage> for UnifiedTokenUsage {
4747
fn from(value: Usage) -> Self {
48-
let cache_read = value.cache_read_input_tokens.unwrap_or(0);
49-
let cache_creation = value.cache_creation_input_tokens.unwrap_or(0);
50-
let prompt_token_count = value.input_tokens.unwrap_or(0) + cache_read + cache_creation;
48+
let cache_read = value.cache_read_input_tokens;
49+
let cache_creation = value.cache_creation_input_tokens;
50+
51+
// prompt_token_count = total context tokens occupied (industry-standard
52+
// "input tokens" metric). For Anthropic this is the three disjoint
53+
// components summed; for other providers the API reports this directly.
54+
let prompt_token_count = value.input_tokens.unwrap_or(0)
55+
+ cache_read.unwrap_or(0)
56+
+ cache_creation.unwrap_or(0);
5157
let candidates_token_count = value.output_tokens.unwrap_or(0);
58+
5259
Self {
5360
prompt_token_count,
5461
candidates_token_count,
5562
total_token_count: prompt_token_count + candidates_token_count,
5663
reasoning_token_count: None,
57-
cached_content_token_count: match (
58-
value.cache_read_input_tokens,
59-
value.cache_creation_input_tokens,
60-
) {
61-
(None, None) => None,
62-
(read, creation) => Some(read.unwrap_or(0) + creation.unwrap_or(0)),
63-
},
64+
// cached_content_token_count = cache READS only. This is the
65+
// numerator for `cache hit rate = cached / prompt`. Writes go
66+
// to cache_creation_token_count below.
67+
cached_content_token_count: cache_read,
68+
cache_creation_token_count: cache_creation,
6469
}
6570
}
6671
}
@@ -210,3 +215,76 @@ impl From<AnthropicSSEErrorDetails> for String {
210215
format!("{}: {}", value.error_type, value.message)
211216
}
212217
}
218+
219+
#[cfg(test)]
220+
mod tests {
221+
use super::*;
222+
use crate::stream::types::unified::UnifiedTokenUsage;
223+
224+
#[test]
225+
fn cached_content_token_count_is_reads_only_not_sum() {
226+
let raw = r#"{
227+
"input_tokens": 100,
228+
"output_tokens": 50,
229+
"cache_read_input_tokens": 30,
230+
"cache_creation_input_tokens": 20
231+
}"#;
232+
let usage: Usage = serde_json::from_str(raw).expect("valid anthropic usage");
233+
let unified: UnifiedTokenUsage = usage.into();
234+
235+
// cached_content_token_count must be reads only — NOT read + creation.
236+
// This guarantees `cached_content / prompt` is a correct hit rate.
237+
assert_eq!(unified.cached_content_token_count, Some(30));
238+
assert_eq!(unified.cache_creation_token_count, Some(20));
239+
240+
// prompt_token_count keeps "total context" semantic (matches industry
241+
// standard "input tokens" metric across providers).
242+
assert_eq!(unified.prompt_token_count, 150);
243+
assert_eq!(unified.candidates_token_count, 50);
244+
assert_eq!(unified.total_token_count, 200);
245+
246+
// Hit rate computed by downstream:
247+
// 30 / 150 == 20% (correct: only reads count as hits)
248+
// Pre-fix this would have been wrongly 50/150 == 33%.
249+
}
250+
251+
#[test]
252+
fn absent_cache_fields_stay_none() {
253+
let raw = r#"{ "input_tokens": 100, "output_tokens": 50 }"#;
254+
let usage: Usage = serde_json::from_str(raw).expect("valid anthropic usage");
255+
let unified: UnifiedTokenUsage = usage.into();
256+
assert_eq!(unified.cached_content_token_count, None);
257+
assert_eq!(unified.cache_creation_token_count, None);
258+
}
259+
260+
#[test]
261+
fn zero_cache_fields_are_some_zero_not_none() {
262+
// Cache support reported but zero this call must be distinguishable
263+
// from "provider did not report cache fields at all".
264+
let raw = r#"{
265+
"input_tokens": 100,
266+
"output_tokens": 50,
267+
"cache_read_input_tokens": 0,
268+
"cache_creation_input_tokens": 0
269+
}"#;
270+
let usage: Usage = serde_json::from_str(raw).expect("valid anthropic usage");
271+
let unified: UnifiedTokenUsage = usage.into();
272+
assert_eq!(unified.cached_content_token_count, Some(0));
273+
assert_eq!(unified.cache_creation_token_count, Some(0));
274+
}
275+
276+
#[test]
277+
fn only_read_present_no_creation() {
278+
let raw = r#"{
279+
"input_tokens": 100,
280+
"output_tokens": 50,
281+
"cache_read_input_tokens": 30
282+
}"#;
283+
let usage: Usage = serde_json::from_str(raw).expect("valid anthropic usage");
284+
let unified: UnifiedTokenUsage = usage.into();
285+
assert_eq!(unified.cached_content_token_count, Some(30));
286+
assert_eq!(unified.cache_creation_token_count, None);
287+
// prompt_token_count = input + read (no creation contribution)
288+
assert_eq!(unified.prompt_token_count, 130);
289+
}
290+
}

src/crates/ai-adapters/src/stream/types/gemini.rs

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -104,6 +104,7 @@ impl From<GeminiUsageMetadata> for UnifiedTokenUsage {
104104
total_token_count: usage.total_token_count,
105105
reasoning_token_count,
106106
cached_content_token_count: usage.cached_content_token_count,
107+
cache_creation_token_count: None,
107108
}
108109
}
109110
}
@@ -770,4 +771,21 @@ mod tests {
770771
.and_then(|metadata| metadata.get("promptFeedback"))
771772
.is_some());
772773
}
774+
775+
#[test]
776+
fn gemini_cache_creation_is_always_none() {
777+
let payload = serde_json::json!({
778+
"candidates": [{ "content": { "parts": [{ "text": "answer" }] } }],
779+
"usageMetadata": {
780+
"promptTokenCount": 100,
781+
"candidatesTokenCount": 20,
782+
"totalTokenCount": 120,
783+
"cachedContentTokenCount": 35
784+
}
785+
});
786+
let data: GeminiSSEData = serde_json::from_value(payload).expect("gemini payload");
787+
let usage = data.into_unified_responses()[0].usage.as_ref().expect("usage").clone();
788+
assert_eq!(usage.cached_content_token_count, Some(35));
789+
assert_eq!(usage.cache_creation_token_count, None);
790+
}
773791
}

src/crates/ai-adapters/src/stream/types/openai.rs

Lines changed: 100 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -15,18 +15,34 @@ struct OpenAIUsage {
1515
#[serde(default)]
1616
total_tokens: u32,
1717
prompt_tokens_details: Option<PromptTokensDetails>,
18+
/// DeepSeek extension. Subset of `prompt_tokens`. Absent on non-DeepSeek
19+
/// providers. Prefer this over `prompt_tokens_details.cached_tokens` when
20+
/// both are present — DeepSeek-native is the authoritative source.
21+
#[serde(default)]
22+
prompt_cache_hit_tokens: Option<u32>,
23+
/// DeepSeek extension. Equals `prompt_tokens - prompt_cache_hit_tokens`.
24+
/// Deserialized so a future strict serde lint doesn't reject the payload;
25+
/// not propagated (the miss count is derivable from the other two).
26+
#[serde(default)]
27+
#[allow(dead_code)]
28+
prompt_cache_miss_tokens: Option<u32>,
1829
}
1930

2031
impl From<OpenAIUsage> for UnifiedTokenUsage {
2132
fn from(usage: OpenAIUsage) -> Self {
33+
let standard_cached = usage
34+
.prompt_tokens_details
35+
.and_then(|details| details.cached_tokens);
36+
// DeepSeek extension wins when both present.
37+
let cache_read = usage.prompt_cache_hit_tokens.or(standard_cached);
38+
2239
Self {
2340
prompt_token_count: usage.prompt_tokens,
2441
candidates_token_count: usage.completion_tokens,
2542
total_token_count: usage.total_tokens,
2643
reasoning_token_count: None,
27-
cached_content_token_count: usage
28-
.prompt_tokens_details
29-
.and_then(|prompt_tokens_details| prompt_tokens_details.cached_tokens),
44+
cached_content_token_count: cache_read,
45+
cache_creation_token_count: None,
3046
}
3147
}
3248
}
@@ -692,4 +708,85 @@ mod tests {
692708
assert_eq!(responses.len(), 1);
693709
assert!(responses[0].tool_call.is_some());
694710
}
711+
712+
#[test]
713+
fn standard_openai_cached_tokens_maps_through() {
714+
let raw = r#"{
715+
"id": "chatcmpl_test",
716+
"created": 1,
717+
"model": "gpt-test",
718+
"choices": [],
719+
"usage": {
720+
"prompt_tokens": 100,
721+
"completion_tokens": 20,
722+
"total_tokens": 120,
723+
"prompt_tokens_details": { "cached_tokens": 40 }
724+
}
725+
}"#;
726+
let data: OpenAISSEData = serde_json::from_str(raw).expect("valid openai sse data");
727+
let usage = data.into_unified_responses()[0].usage.as_ref().expect("usage").clone();
728+
assert_eq!(usage.cached_content_token_count, Some(40));
729+
assert_eq!(usage.cache_creation_token_count, None);
730+
}
731+
732+
#[test]
733+
fn deepseek_prompt_cache_hit_tokens_is_captured() {
734+
// Pre-fix this field was silently dropped (strict serde, unknown key).
735+
let raw = r#"{
736+
"id": "chatcmpl_test",
737+
"created": 1,
738+
"model": "deepseek-chat",
739+
"choices": [],
740+
"usage": {
741+
"prompt_tokens": 100,
742+
"completion_tokens": 20,
743+
"total_tokens": 120,
744+
"prompt_cache_hit_tokens": 64,
745+
"prompt_cache_miss_tokens": 36
746+
}
747+
}"#;
748+
let data: OpenAISSEData = serde_json::from_str(raw).expect("valid deepseek sse data");
749+
let usage = data.into_unified_responses()[0].usage.as_ref().expect("usage").clone();
750+
assert_eq!(usage.cached_content_token_count, Some(64));
751+
}
752+
753+
#[test]
754+
fn deepseek_extension_preferred_over_standard_cached_tokens_if_both() {
755+
// Defensive: if a proxy forwards both, prefer the DeepSeek-native field.
756+
let raw = r#"{
757+
"id": "chatcmpl_test",
758+
"created": 1,
759+
"model": "deepseek-chat",
760+
"choices": [],
761+
"usage": {
762+
"prompt_tokens": 100,
763+
"completion_tokens": 20,
764+
"total_tokens": 120,
765+
"prompt_cache_hit_tokens": 64,
766+
"prompt_tokens_details": { "cached_tokens": 0 }
767+
}
768+
}"#;
769+
let data: OpenAISSEData = serde_json::from_str(raw).expect("valid proxy payload");
770+
let usage = data.into_unified_responses()[0].usage.as_ref().expect("usage").clone();
771+
assert_eq!(usage.cached_content_token_count, Some(64));
772+
}
773+
774+
#[test]
775+
fn openai_no_cache_fields_stays_none() {
776+
let raw = r#"{
777+
"id": "chatcmpl_test",
778+
"created": 1,
779+
"model": "gpt-test",
780+
"choices": [],
781+
"usage": {
782+
"prompt_tokens": 100,
783+
"completion_tokens": 20,
784+
"total_tokens": 120
785+
}
786+
}"#;
787+
let data: OpenAISSEData = serde_json::from_str(raw).expect("valid openai sse data");
788+
let usage = data.into_unified_responses()[0].usage.as_ref().expect("usage").clone();
789+
assert_eq!(usage.cached_content_token_count, None);
790+
assert_eq!(usage.cache_creation_token_count, None);
791+
}
695792
}

src/crates/ai-adapters/src/stream/types/responses.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -62,6 +62,7 @@ impl From<ResponsesUsage> for UnifiedTokenUsage {
6262
cached_content_token_count: usage
6363
.input_tokens_details
6464
.map(|details| details.cached_tokens),
65+
cache_creation_token_count: None,
6566
}
6667
}
6768
}

src/crates/ai-adapters/src/stream/types/unified.rs

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -66,6 +66,16 @@ pub struct UnifiedTokenUsage {
6666
pub total_token_count: u32,
6767
#[serde(skip_serializing_if = "Option::is_none")]
6868
pub reasoning_token_count: Option<u32>,
69+
/// Cache READ tokens (i.e., served from cache this call). Universal across
70+
/// providers: OpenAI `cached_tokens`, DeepSeek `prompt_cache_hit_tokens`,
71+
/// Anthropic `cache_read_input_tokens`, Gemini `cachedContentTokenCount`.
72+
/// Hit rate consumers must use this as numerator and `prompt_token_count`
73+
/// as denominator.
6974
#[serde(skip_serializing_if = "Option::is_none")]
7075
pub cached_content_token_count: Option<u32>,
76+
/// Cache WRITE tokens (only Anthropic reports this per-token; others either
77+
/// have no creation concept or bill creation by storage time). Disjoint from
78+
/// `cached_content_token_count`. Do NOT include in hit-rate numerator.
79+
#[serde(skip_serializing_if = "Option::is_none", default)]
80+
pub cache_creation_token_count: Option<u32>,
7181
}

0 commit comments

Comments
 (0)