From 28c10df5c4813ba85491a5d76fb303f7ac29aaa1 Mon Sep 17 00:00:00 2001 From: Dvir Rezenman Date: Wed, 10 Jun 2026 13:33:03 +0300 Subject: [PATCH 1/3] fix(langchain):emit cache creation tokens --- .../instrumentation/langchain/span_utils.py | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/packages/opentelemetry-instrumentation-langchain/opentelemetry/instrumentation/langchain/span_utils.py b/packages/opentelemetry-instrumentation-langchain/opentelemetry/instrumentation/langchain/span_utils.py index 245cbac1ad..b0ff85f047 100644 --- a/packages/opentelemetry-instrumentation-langchain/opentelemetry/instrumentation/langchain/span_utils.py +++ b/packages/opentelemetry-instrumentation-langchain/opentelemetry/instrumentation/langchain/span_utils.py @@ -394,6 +394,7 @@ def set_chat_response_usage( output_tokens = 0 total_tokens = 0 cache_read_tokens = 0 + cache_creation_tokens = 0 # Early return if no generations to avoid potential issues if not response.generations: @@ -424,6 +425,9 @@ def set_chat_response_usage( "input_token_details", {} ) cache_read_tokens += input_token_details.get("cache_read", 0) + cache_creation_tokens += input_token_details.get( + "cache_creation", 0 + ) except Exception as e: # If there's any issue processing usage metadata, continue without it logger.warning("Error processing usage metadata: %s", e) @@ -434,6 +438,7 @@ def set_chat_response_usage( or output_tokens > 0 or total_tokens > 0 or cache_read_tokens > 0 + or cache_creation_tokens > 0 ): _set_span_attribute( span, @@ -455,6 +460,11 @@ def set_chat_response_usage( GenAIAttributes.GEN_AI_USAGE_CACHE_READ_INPUT_TOKENS, cache_read_tokens, ) + _set_span_attribute( + span, + SpanAttributes.GEN_AI_USAGE_CACHE_CREATION_INPUT_TOKENS, + cache_creation_tokens, + ) if record_token_usage: vendor = span.attributes.get(GenAIAttributes.GEN_AI_PROVIDER_NAME, "langchain") From 3cbc2458e284e74ea3e9826345b7eaa231c5b38d Mon Sep 17 00:00:00 2001 From: Dvir Rezenman Date: Tue, 16 Jun 2026 12:14:03 +0300 Subject: [PATCH 2/3] use opentel semconv cache attributes consts --- .../opentelemetry/instrumentation/langchain/span_utils.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/packages/opentelemetry-instrumentation-langchain/opentelemetry/instrumentation/langchain/span_utils.py b/packages/opentelemetry-instrumentation-langchain/opentelemetry/instrumentation/langchain/span_utils.py index b0ff85f047..9cc8f3c743 100644 --- a/packages/opentelemetry-instrumentation-langchain/opentelemetry/instrumentation/langchain/span_utils.py +++ b/packages/opentelemetry-instrumentation-langchain/opentelemetry/instrumentation/langchain/span_utils.py @@ -437,8 +437,8 @@ def set_chat_response_usage( input_tokens > 0 or output_tokens > 0 or total_tokens > 0 - or cache_read_tokens > 0 - or cache_creation_tokens > 0 + or cache_read_tokens >= 0 + or cache_creation_tokens >= 0 ): _set_span_attribute( span, @@ -462,7 +462,7 @@ def set_chat_response_usage( ) _set_span_attribute( span, - SpanAttributes.GEN_AI_USAGE_CACHE_CREATION_INPUT_TOKENS, + GenAIAttributes.GEN_AI_USAGE_CACHE_CREATION_INPUT_TOKENS, cache_creation_tokens, ) if record_token_usage: From bc84aaf55d6b1650b362c0f144648ed8f63ca7f4 Mon Sep 17 00:00:00 2001 From: Dvir Rezenman Date: Tue, 16 Jun 2026 12:47:53 +0300 Subject: [PATCH 3/3] emitting 0 for cache tokens + pr comment fix --- .../instrumentation/langchain/span_utils.py | 18 ++++++++++-------- 1 file changed, 10 insertions(+), 8 deletions(-) diff --git a/packages/opentelemetry-instrumentation-langchain/opentelemetry/instrumentation/langchain/span_utils.py b/packages/opentelemetry-instrumentation-langchain/opentelemetry/instrumentation/langchain/span_utils.py index 9cc8f3c743..09d365daaf 100644 --- a/packages/opentelemetry-instrumentation-langchain/opentelemetry/instrumentation/langchain/span_utils.py +++ b/packages/opentelemetry-instrumentation-langchain/opentelemetry/instrumentation/langchain/span_utils.py @@ -393,8 +393,8 @@ def set_chat_response_usage( input_tokens = 0 output_tokens = 0 total_tokens = 0 - cache_read_tokens = 0 - cache_creation_tokens = 0 + cache_read_tokens = None + cache_creation_tokens = None # Early return if no generations to avoid potential issues if not response.generations: @@ -424,10 +424,12 @@ def set_chat_response_usage( input_token_details = generation.message.usage_metadata.get( "input_token_details", {} ) - cache_read_tokens += input_token_details.get("cache_read", 0) - cache_creation_tokens += input_token_details.get( - "cache_creation", 0 - ) + raw_cache_read = input_token_details.get("cache_read") + if isinstance(raw_cache_read, (int, float)): + cache_read_tokens = (cache_read_tokens or 0) + raw_cache_read + raw_cache_creation = input_token_details.get("cache_creation") + if isinstance(raw_cache_creation, (int, float)): + cache_creation_tokens = (cache_creation_tokens or 0) + raw_cache_creation except Exception as e: # If there's any issue processing usage metadata, continue without it logger.warning("Error processing usage metadata: %s", e) @@ -437,8 +439,8 @@ def set_chat_response_usage( input_tokens > 0 or output_tokens > 0 or total_tokens > 0 - or cache_read_tokens >= 0 - or cache_creation_tokens >= 0 + or cache_read_tokens is not None + or cache_creation_tokens is not None ): _set_span_attribute( span,