From 04a7f567b0eeee8312ba202ad8916cf66b8079e5 Mon Sep 17 00:00:00 2001
From: nonoqing <y_yqingfiona@163.com>
Date: Mon, 18 May 2026 19:10:20 +0800
Subject: [PATCH 1/5] feat(usage): add cache_creation_token_count to
 UnifiedTokenUsage

---
 src/crates/agent-stream/src/lib.rs                   |  1 +
 src/crates/ai-adapters/src/stream/types/anthropic.rs |  1 +
 src/crates/ai-adapters/src/stream/types/gemini.rs    |  1 +
 src/crates/ai-adapters/src/stream/types/openai.rs    |  1 +
 src/crates/ai-adapters/src/stream/types/responses.rs |  1 +
 src/crates/ai-adapters/src/stream/types/unified.rs   | 10 ++++++++++
 6 files changed, 15 insertions(+)
diff --git a/src/crates/agent-stream/src/lib.rs b/src/crates/agent-stream/src/lib.rs
index ad32cdd81..5256f844b 100644
--- a/src/crates/agent-stream/src/lib.rs
+++ b/src/crates/agent-stream/src/lib.rs
@@ -1071,6 +1071,7 @@ mod tests {
             total_token_count: total_tokens,
             reasoning_token_count: None,
             cached_content_token_count: None,
+            cache_creation_token_count: None,
         }
     }
 
diff --git a/src/crates/ai-adapters/src/stream/types/anthropic.rs b/src/crates/ai-adapters/src/stream/types/anthropic.rs
index b785069bf..1e341be08 100644
--- a/src/crates/ai-adapters/src/stream/types/anthropic.rs
+++ b/src/crates/ai-adapters/src/stream/types/anthropic.rs
@@ -61,6 +61,7 @@ impl From<Usage> for UnifiedTokenUsage {
                 (None, None) => None,
                 (read, creation) => Some(read.unwrap_or(0) + creation.unwrap_or(0)),
             },
+            cache_creation_token_count: None,
         }
     }
 }
diff --git a/src/crates/ai-adapters/src/stream/types/gemini.rs b/src/crates/ai-adapters/src/stream/types/gemini.rs
index 4927c91b3..c1a7407d1 100644
--- a/src/crates/ai-adapters/src/stream/types/gemini.rs
+++ b/src/crates/ai-adapters/src/stream/types/gemini.rs
@@ -104,6 +104,7 @@ impl From<GeminiUsageMetadata> for UnifiedTokenUsage {
             total_token_count: usage.total_token_count,
             reasoning_token_count,
             cached_content_token_count: usage.cached_content_token_count,
+            cache_creation_token_count: None,
         }
     }
 }
diff --git a/src/crates/ai-adapters/src/stream/types/openai.rs b/src/crates/ai-adapters/src/stream/types/openai.rs
index 03aeda399..0d05c4b17 100644
--- a/src/crates/ai-adapters/src/stream/types/openai.rs
+++ b/src/crates/ai-adapters/src/stream/types/openai.rs
@@ -27,6 +27,7 @@ impl From<OpenAIUsage> for UnifiedTokenUsage {
             cached_content_token_count: usage
                 .prompt_tokens_details
                 .and_then(|prompt_tokens_details| prompt_tokens_details.cached_tokens),
+            cache_creation_token_count: None,
         }
     }
 }
diff --git a/src/crates/ai-adapters/src/stream/types/responses.rs b/src/crates/ai-adapters/src/stream/types/responses.rs
index 8e9b48071..a58da6366 100644
--- a/src/crates/ai-adapters/src/stream/types/responses.rs
+++ b/src/crates/ai-adapters/src/stream/types/responses.rs
@@ -62,6 +62,7 @@ impl From<ResponsesUsage> for UnifiedTokenUsage {
             cached_content_token_count: usage
                 .input_tokens_details
                 .map(|details| details.cached_tokens),
+            cache_creation_token_count: None,
         }
     }
 }
diff --git a/src/crates/ai-adapters/src/stream/types/unified.rs b/src/crates/ai-adapters/src/stream/types/unified.rs
index 27048acc5..fb5948eea 100644
--- a/src/crates/ai-adapters/src/stream/types/unified.rs
+++ b/src/crates/ai-adapters/src/stream/types/unified.rs
@@ -66,6 +66,16 @@ pub struct UnifiedTokenUsage {
     pub total_token_count: u32,
     #[serde(skip_serializing_if = "Option::is_none")]
     pub reasoning_token_count: Option<u32>,
+    /// Cache READ tokens (i.e., served from cache this call). Universal across
+    /// providers: OpenAI `cached_tokens`, DeepSeek `prompt_cache_hit_tokens`,
+    /// Anthropic `cache_read_input_tokens`, Gemini `cachedContentTokenCount`.
+    /// Hit rate consumers must use this as numerator and `prompt_token_count`
+    /// as denominator.
     #[serde(skip_serializing_if = "Option::is_none")]
     pub cached_content_token_count: Option<u32>,
+    /// Cache WRITE tokens (only Anthropic reports this per-token; others either
+    /// have no creation concept or bill creation by storage time). Disjoint from
+    /// `cached_content_token_count`. Do NOT include in hit-rate numerator.
+    #[serde(skip_serializing_if = "Option::is_none", default)]
+    pub cache_creation_token_count: Option<u32>,
 }

From 027c630b7ed62247f7df675cb9f02076e5e1b88b Mon Sep 17 00:00:00 2001
From: nonoqing <y_yqingfiona@163.com>
Date: Mon, 18 May 2026 20:15:43 +0800
Subject: [PATCH 2/5] fix(usage): Anthropic cached_content_token_count is reads
 only, not read+creation sum

Previously the Anthropic adapter summed cache_read_input_tokens and
cache_creation_input_tokens into cached_content_token_count, which made any
downstream hit-rate metric (cached / prompt) wrongly count cache writes as
hits. Cache writes now go to the new cache_creation_token_count field.

Behavior change: Anthropic TokenUsageRecord.cached_tokens values will be lower
than pre-fix records for equivalent traffic, because creation tokens no longer
inflate the count. Hit-rate dashboards built on this should annotate the
discontinuity.
---
 .../ai-adapters/src/stream/types/anthropic.rs | 99 ++++++++++++++++---
 1 file changed, 88 insertions(+), 11 deletions(-)

diff --git a/src/crates/ai-adapters/src/stream/types/anthropic.rs b/src/crates/ai-adapters/src/stream/types/anthropic.rs
index 1e341be08..d598b7bc2 100644
--- a/src/crates/ai-adapters/src/stream/types/anthropic.rs
+++ b/src/crates/ai-adapters/src/stream/types/anthropic.rs
@@ -45,23 +45,27 @@ impl Usage {
 
 impl From<Usage> for UnifiedTokenUsage {
     fn from(value: Usage) -> Self {
-        let cache_read = value.cache_read_input_tokens.unwrap_or(0);
-        let cache_creation = value.cache_creation_input_tokens.unwrap_or(0);
-        let prompt_token_count = value.input_tokens.unwrap_or(0) + cache_read + cache_creation;
+        let cache_read = value.cache_read_input_tokens;
+        let cache_creation = value.cache_creation_input_tokens;
+
+        // prompt_token_count = total context tokens occupied (industry-standard
+        // "input tokens" metric). For Anthropic this is the three disjoint
+        // components summed; for other providers the API reports this directly.
+        let prompt_token_count = value.input_tokens.unwrap_or(0)
+            + cache_read.unwrap_or(0)
+            + cache_creation.unwrap_or(0);
         let candidates_token_count = value.output_tokens.unwrap_or(0);
+
         Self {
             prompt_token_count,
             candidates_token_count,
             total_token_count: prompt_token_count + candidates_token_count,
             reasoning_token_count: None,
-            cached_content_token_count: match (
-                value.cache_read_input_tokens,
-                value.cache_creation_input_tokens,
-            ) {
-                (None, None) => None,
-                (read, creation) => Some(read.unwrap_or(0) + creation.unwrap_or(0)),
-            },
-            cache_creation_token_count: None,
+            // cached_content_token_count = cache READS only. This is the
+            // numerator for `cache hit rate = cached / prompt`. Writes go
+            // to cache_creation_token_count below.
+            cached_content_token_count: cache_read,
+            cache_creation_token_count: cache_creation,
         }
     }
 }
@@ -211,3 +215,76 @@ impl From<AnthropicSSEErrorDetails> for String {
         format!("{}: {}", value.error_type, value.message)
     }
 }
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use crate::stream::types::unified::UnifiedTokenUsage;
+
+    #[test]
+    fn cached_content_token_count_is_reads_only_not_sum() {
+        let raw = r#"{
+            "input_tokens": 100,
+            "output_tokens": 50,
+            "cache_read_input_tokens": 30,
+            "cache_creation_input_tokens": 20
+        }"#;
+        let usage: Usage = serde_json::from_str(raw).expect("valid anthropic usage");
+        let unified: UnifiedTokenUsage = usage.into();
+
+        // cached_content_token_count must be reads only — NOT read + creation.
+        // This guarantees `cached_content / prompt` is a correct hit rate.
+        assert_eq!(unified.cached_content_token_count, Some(30));
+        assert_eq!(unified.cache_creation_token_count, Some(20));
+
+        // prompt_token_count keeps "total context" semantic (matches industry
+        // standard "input tokens" metric across providers).
+        assert_eq!(unified.prompt_token_count, 150);
+        assert_eq!(unified.candidates_token_count, 50);
+        assert_eq!(unified.total_token_count, 200);
+
+        // Hit rate computed by downstream:
+        //   30 / 150 == 20% (correct: only reads count as hits)
+        // Pre-fix this would have been wrongly 50/150 == 33%.
+    }
+
+    #[test]
+    fn absent_cache_fields_stay_none() {
+        let raw = r#"{ "input_tokens": 100, "output_tokens": 50 }"#;
+        let usage: Usage = serde_json::from_str(raw).expect("valid anthropic usage");
+        let unified: UnifiedTokenUsage = usage.into();
+        assert_eq!(unified.cached_content_token_count, None);
+        assert_eq!(unified.cache_creation_token_count, None);
+    }
+
+    #[test]
+    fn zero_cache_fields_are_some_zero_not_none() {
+        // Cache support reported but zero this call must be distinguishable
+        // from "provider did not report cache fields at all".
+        let raw = r#"{
+            "input_tokens": 100,
+            "output_tokens": 50,
+            "cache_read_input_tokens": 0,
+            "cache_creation_input_tokens": 0
+        }"#;
+        let usage: Usage = serde_json::from_str(raw).expect("valid anthropic usage");
+        let unified: UnifiedTokenUsage = usage.into();
+        assert_eq!(unified.cached_content_token_count, Some(0));
+        assert_eq!(unified.cache_creation_token_count, Some(0));
+    }
+
+    #[test]
+    fn only_read_present_no_creation() {
+        let raw = r#"{
+            "input_tokens": 100,
+            "output_tokens": 50,
+            "cache_read_input_tokens": 30
+        }"#;
+        let usage: Usage = serde_json::from_str(raw).expect("valid anthropic usage");
+        let unified: UnifiedTokenUsage = usage.into();
+        assert_eq!(unified.cached_content_token_count, Some(30));
+        assert_eq!(unified.cache_creation_token_count, None);
+        // prompt_token_count = input + read (no creation contribution)
+        assert_eq!(unified.prompt_token_count, 130);
+    }
+}

From 0f0be7b145e68333946c65a8863e516cf6a720b7 Mon Sep 17 00:00:00 2001
From: nonoqing <y_yqingfiona@163.com>
Date: Mon, 18 May 2026 20:42:39 +0800
Subject: [PATCH 3/5] feat(usage): capture DeepSeek prompt_cache_hit_tokens
 extension

The OpenAI-compatible deserializer previously silently dropped DeepSeek's
prompt_cache_hit_tokens / prompt_cache_miss_tokens fields, leaving DeepSeek
cache hit rate permanently unknown. Map prompt_cache_hit_tokens (preferred)
or prompt_tokens_details.cached_tokens (fallback) to
cached_content_token_count so the hit-rate formula works for DeepSeek.
---
 .../ai-adapters/src/stream/types/openai.rs    | 102 +++++++++++++++++-
 1 file changed, 99 insertions(+), 3 deletions(-)

diff --git a/src/crates/ai-adapters/src/stream/types/openai.rs b/src/crates/ai-adapters/src/stream/types/openai.rs
index 0d05c4b17..fe7ff5c9d 100644
--- a/src/crates/ai-adapters/src/stream/types/openai.rs
+++ b/src/crates/ai-adapters/src/stream/types/openai.rs
@@ -15,18 +15,33 @@ struct OpenAIUsage {
     #[serde(default)]
     total_tokens: u32,
     prompt_tokens_details: Option<PromptTokensDetails>,
+    /// DeepSeek extension. Subset of `prompt_tokens`. Absent on non-DeepSeek
+    /// providers. Prefer this over `prompt_tokens_details.cached_tokens` when
+    /// both are present — DeepSeek-native is the authoritative source.
+    #[serde(default)]
+    prompt_cache_hit_tokens: Option<u32>,
+    /// DeepSeek extension. Equals `prompt_tokens - prompt_cache_hit_tokens`.
+    /// Deserialized so a future strict serde lint doesn't reject the payload;
+    /// not propagated (the miss count is derivable from the other two).
+    #[serde(default)]
+    #[allow(dead_code)]
+    prompt_cache_miss_tokens: Option<u32>,
 }
 
 impl From<OpenAIUsage> for UnifiedTokenUsage {
     fn from(usage: OpenAIUsage) -> Self {
+        let standard_cached = usage
+            .prompt_tokens_details
+            .and_then(|details| details.cached_tokens);
+        // DeepSeek extension wins when both present.
+        let cache_read = usage.prompt_cache_hit_tokens.or(standard_cached);
+
         Self {
             prompt_token_count: usage.prompt_tokens,
             candidates_token_count: usage.completion_tokens,
             total_token_count: usage.total_tokens,
             reasoning_token_count: None,
-            cached_content_token_count: usage
-                .prompt_tokens_details
-                .and_then(|prompt_tokens_details| prompt_tokens_details.cached_tokens),
+            cached_content_token_count: cache_read,
             cache_creation_token_count: None,
         }
     }
@@ -693,4 +708,85 @@ mod tests {
         assert_eq!(responses.len(), 1);
         assert!(responses[0].tool_call.is_some());
     }
+
+    #[test]
+    fn standard_openai_cached_tokens_maps_through() {
+        let raw = r#"{
+            "id": "chatcmpl_test",
+            "created": 1,
+            "model": "gpt-test",
+            "choices": [],
+            "usage": {
+                "prompt_tokens": 100,
+                "completion_tokens": 20,
+                "total_tokens": 120,
+                "prompt_tokens_details": { "cached_tokens": 40 }
+            }
+        }"#;
+        let data: OpenAISSEData = serde_json::from_str(raw).expect("valid openai sse data");
+        let usage = data.into_unified_responses()[0].usage.as_ref().expect("usage").clone();
+        assert_eq!(usage.cached_content_token_count, Some(40));
+        assert_eq!(usage.cache_creation_token_count, None);
+    }
+
+    #[test]
+    fn deepseek_prompt_cache_hit_tokens_is_captured() {
+        // Pre-fix this field was silently dropped (strict serde, unknown key).
+        let raw = r#"{
+            "id": "chatcmpl_test",
+            "created": 1,
+            "model": "deepseek-chat",
+            "choices": [],
+            "usage": {
+                "prompt_tokens": 100,
+                "completion_tokens": 20,
+                "total_tokens": 120,
+                "prompt_cache_hit_tokens": 64,
+                "prompt_cache_miss_tokens": 36
+            }
+        }"#;
+        let data: OpenAISSEData = serde_json::from_str(raw).expect("valid deepseek sse data");
+        let usage = data.into_unified_responses()[0].usage.as_ref().expect("usage").clone();
+        assert_eq!(usage.cached_content_token_count, Some(64));
+    }
+
+    #[test]
+    fn deepseek_extension_preferred_over_standard_cached_tokens_if_both() {
+        // Defensive: if a proxy forwards both, prefer the DeepSeek-native field.
+        let raw = r#"{
+            "id": "chatcmpl_test",
+            "created": 1,
+            "model": "deepseek-chat",
+            "choices": [],
+            "usage": {
+                "prompt_tokens": 100,
+                "completion_tokens": 20,
+                "total_tokens": 120,
+                "prompt_cache_hit_tokens": 64,
+                "prompt_tokens_details": { "cached_tokens": 0 }
+            }
+        }"#;
+        let data: OpenAISSEData = serde_json::from_str(raw).expect("valid proxy payload");
+        let usage = data.into_unified_responses()[0].usage.as_ref().expect("usage").clone();
+        assert_eq!(usage.cached_content_token_count, Some(64));
+    }
+
+    #[test]
+    fn openai_no_cache_fields_stays_none() {
+        let raw = r#"{
+            "id": "chatcmpl_test",
+            "created": 1,
+            "model": "gpt-test",
+            "choices": [],
+            "usage": {
+                "prompt_tokens": 100,
+                "completion_tokens": 20,
+                "total_tokens": 120
+            }
+        }"#;
+        let data: OpenAISSEData = serde_json::from_str(raw).expect("valid openai sse data");
+        let usage = data.into_unified_responses()[0].usage.as_ref().expect("usage").clone();
+        assert_eq!(usage.cached_content_token_count, None);
+        assert_eq!(usage.cache_creation_token_count, None);
+    }
 }

From 364abb5f6558bd38f5c479e7e660c07830857c29 Mon Sep 17 00:00:00 2001
From: nonoqing <y_yqingfiona@163.com>
Date: Mon, 18 May 2026 20:51:20 +0800
Subject: [PATCH 4/5] test(usage): guard Gemini cache_creation_token_count
 stays None

---
 .../ai-adapters/src/stream/types/gemini.rs      | 17 +++++++++++++++++
 1 file changed, 17 insertions(+)

diff --git a/src/crates/ai-adapters/src/stream/types/gemini.rs b/src/crates/ai-adapters/src/stream/types/gemini.rs
index c1a7407d1..6bd6a15d7 100644
--- a/src/crates/ai-adapters/src/stream/types/gemini.rs
+++ b/src/crates/ai-adapters/src/stream/types/gemini.rs
@@ -771,4 +771,21 @@ mod tests {
             .and_then(|metadata| metadata.get("promptFeedback"))
             .is_some());
     }
+
+    #[test]
+    fn gemini_cache_creation_is_always_none() {
+        let payload = serde_json::json!({
+            "candidates": [{ "content": { "parts": [{ "text": "answer" }] } }],
+            "usageMetadata": {
+                "promptTokenCount": 100,
+                "candidatesTokenCount": 20,
+                "totalTokenCount": 120,
+                "cachedContentTokenCount": 35
+            }
+        });
+        let data: GeminiSSEData = serde_json::from_value(payload).expect("gemini payload");
+        let usage = data.into_unified_responses()[0].usage.as_ref().expect("usage").clone();
+        assert_eq!(usage.cached_content_token_count, Some(35));
+        assert_eq!(usage.cache_creation_token_count, None);
+    }
 }

From c33264fd597c107b23f96d4f20b419483c51cab3 Mon Sep 17 00:00:00 2001
From: nonoqing <y_yqingfiona@163.com>
Date: Mon, 18 May 2026 21:24:30 +0800
Subject: [PATCH 5/5] fix(desktop): supply agent_type: None in DialogTurnData
 test fixtures

The agent_type field on DialogTurnData was added by an earlier
agent-tools refactor but three test fixtures in agentic_api.rs were
not updated, breaking `cargo test -p bitfun-desktop` (lib test
compile failure). None matches the field's documented use for
non-user-dialog or utility turns.
---
 src/apps/desktop/src/api/agentic_api.rs | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/src/apps/desktop/src/api/agentic_api.rs b/src/apps/desktop/src/api/agentic_api.rs
index ded19c5e6..095982d43 100644
--- a/src/apps/desktop/src/api/agentic_api.rs
+++ b/src/apps/desktop/src/api/agentic_api.rs
@@ -1469,6 +1469,7 @@ mod tests {
             session_id: "session-1".to_string(),
             timestamp: 1,
             kind: Default::default(),
+            agent_type: None,
             user_message: UserMessageData {
                 id: "user-1".to_string(),
                 content: "hello".to_string(),
@@ -1529,6 +1530,7 @@ mod tests {
             session_id: "session-1".to_string(),
             timestamp: 1,
             kind: Default::default(),
+            agent_type: None,
             user_message: UserMessageData {
                 id: "user-1".to_string(),
                 content: "hello".to_string(),
@@ -1586,6 +1588,7 @@ mod tests {
             session_id: "session-1".to_string(),
             timestamp: 1,
             kind: Default::default(),
+            agent_type: None,
             user_message: UserMessageData {
                 id: "user-1".to_string(),
                 content: "hello".to_string(),