@@ -45,22 +45,27 @@ impl Usage {
4545
4646impl From < Usage > for UnifiedTokenUsage {
4747 fn from ( value : Usage ) -> Self {
48- let cache_read = value. cache_read_input_tokens . unwrap_or ( 0 ) ;
49- let cache_creation = value. cache_creation_input_tokens . unwrap_or ( 0 ) ;
50- let prompt_token_count = value. input_tokens . unwrap_or ( 0 ) + cache_read + cache_creation;
48+ let cache_read = value. cache_read_input_tokens ;
49+ let cache_creation = value. cache_creation_input_tokens ;
50+
51+ // prompt_token_count = total context tokens occupied (industry-standard
52+ // "input tokens" metric). For Anthropic this is the three disjoint
53+ // components summed; for other providers the API reports this directly.
54+ let prompt_token_count = value. input_tokens . unwrap_or ( 0 )
55+ + cache_read. unwrap_or ( 0 )
56+ + cache_creation. unwrap_or ( 0 ) ;
5157 let candidates_token_count = value. output_tokens . unwrap_or ( 0 ) ;
58+
5259 Self {
5360 prompt_token_count,
5461 candidates_token_count,
5562 total_token_count : prompt_token_count + candidates_token_count,
5663 reasoning_token_count : None ,
57- cached_content_token_count : match (
58- value. cache_read_input_tokens ,
59- value. cache_creation_input_tokens ,
60- ) {
61- ( None , None ) => None ,
62- ( read, creation) => Some ( read. unwrap_or ( 0 ) + creation. unwrap_or ( 0 ) ) ,
63- } ,
64+ // cached_content_token_count = cache READS only. This is the
65+ // numerator for `cache hit rate = cached / prompt`. Writes go
66+ // to cache_creation_token_count below.
67+ cached_content_token_count : cache_read,
68+ cache_creation_token_count : cache_creation,
6469 }
6570 }
6671}
@@ -210,3 +215,76 @@ impl From<AnthropicSSEErrorDetails> for String {
210215 format ! ( "{}: {}" , value. error_type, value. message)
211216 }
212217}
218+
219+ #[ cfg( test) ]
220+ mod tests {
221+ use super :: * ;
222+ use crate :: stream:: types:: unified:: UnifiedTokenUsage ;
223+
224+ #[ test]
225+ fn cached_content_token_count_is_reads_only_not_sum ( ) {
226+ let raw = r#"{
227+ "input_tokens": 100,
228+ "output_tokens": 50,
229+ "cache_read_input_tokens": 30,
230+ "cache_creation_input_tokens": 20
231+ }"# ;
232+ let usage: Usage = serde_json:: from_str ( raw) . expect ( "valid anthropic usage" ) ;
233+ let unified: UnifiedTokenUsage = usage. into ( ) ;
234+
235+ // cached_content_token_count must be reads only — NOT read + creation.
236+ // This guarantees `cached_content / prompt` is a correct hit rate.
237+ assert_eq ! ( unified. cached_content_token_count, Some ( 30 ) ) ;
238+ assert_eq ! ( unified. cache_creation_token_count, Some ( 20 ) ) ;
239+
240+ // prompt_token_count keeps "total context" semantic (matches industry
241+ // standard "input tokens" metric across providers).
242+ assert_eq ! ( unified. prompt_token_count, 150 ) ;
243+ assert_eq ! ( unified. candidates_token_count, 50 ) ;
244+ assert_eq ! ( unified. total_token_count, 200 ) ;
245+
246+ // Hit rate computed by downstream:
247+ // 30 / 150 == 20% (correct: only reads count as hits)
248+ // Pre-fix this would have been wrongly 50/150 == 33%.
249+ }
250+
251+ #[ test]
252+ fn absent_cache_fields_stay_none ( ) {
253+ let raw = r#"{ "input_tokens": 100, "output_tokens": 50 }"# ;
254+ let usage: Usage = serde_json:: from_str ( raw) . expect ( "valid anthropic usage" ) ;
255+ let unified: UnifiedTokenUsage = usage. into ( ) ;
256+ assert_eq ! ( unified. cached_content_token_count, None ) ;
257+ assert_eq ! ( unified. cache_creation_token_count, None ) ;
258+ }
259+
260+ #[ test]
261+ fn zero_cache_fields_are_some_zero_not_none ( ) {
262+ // Cache support reported but zero this call must be distinguishable
263+ // from "provider did not report cache fields at all".
264+ let raw = r#"{
265+ "input_tokens": 100,
266+ "output_tokens": 50,
267+ "cache_read_input_tokens": 0,
268+ "cache_creation_input_tokens": 0
269+ }"# ;
270+ let usage: Usage = serde_json:: from_str ( raw) . expect ( "valid anthropic usage" ) ;
271+ let unified: UnifiedTokenUsage = usage. into ( ) ;
272+ assert_eq ! ( unified. cached_content_token_count, Some ( 0 ) ) ;
273+ assert_eq ! ( unified. cache_creation_token_count, Some ( 0 ) ) ;
274+ }
275+
276+ #[ test]
277+ fn only_read_present_no_creation ( ) {
278+ let raw = r#"{
279+ "input_tokens": 100,
280+ "output_tokens": 50,
281+ "cache_read_input_tokens": 30
282+ }"# ;
283+ let usage: Usage = serde_json:: from_str ( raw) . expect ( "valid anthropic usage" ) ;
284+ let unified: UnifiedTokenUsage = usage. into ( ) ;
285+ assert_eq ! ( unified. cached_content_token_count, Some ( 30 ) ) ;
286+ assert_eq ! ( unified. cache_creation_token_count, None ) ;
287+ // prompt_token_count = input + read (no creation contribution)
288+ assert_eq ! ( unified. prompt_token_count, 130 ) ;
289+ }
290+ }
0 commit comments