From 6965c648ef91ec22bc3a1d65a9511a17e9b2faa3 Mon Sep 17 00:00:00 2001 From: Achuth Narayan Rajagopal Date: Thu, 21 May 2026 22:01:39 +0000 Subject: [PATCH] feat(telemetry): add user.id to gen_ai.user.message log records Please note that this change adds `user.id` field to an existing opt-in: `OTEL_INSTRUMENTATION_GENAI_CAPTURE_MESSAGE_CONTENT`. user.id is no longer set as a span attribute; it is emitted only on log records. --- .../adk/telemetry/_experimental_semconv.py | 8 +- src/google/adk/telemetry/tracing.py | 60 ++++++-- .../telemetry/test_node_functional.py | 2 - tests/unittests/telemetry/test_spans.py | 130 +++++++++++++++++- 4 files changed, 182 insertions(+), 18 deletions(-) diff --git a/src/google/adk/telemetry/_experimental_semconv.py b/src/google/adk/telemetry/_experimental_semconv.py index 6ffd5beaaa..db6f8a9375 100644 --- a/src/google/adk/telemetry/_experimental_semconv.py +++ b/src/google/adk/telemetry/_experimental_semconv.py @@ -433,8 +433,14 @@ def _to_system_instructions( def set_operation_details_common_attributes( operation_details_common_attributes: MutableMapping[str, AttributeValue], attributes: Mapping[str, AttributeValue], -): + log_only_attributes: Mapping[str, AttributeValue] | None = None, +) -> None: operation_details_common_attributes.update(attributes) + if log_only_attributes and get_content_capturing_mode() in ( + 'EVENT_ONLY', + 'SPAN_AND_EVENT', + ): + operation_details_common_attributes.update(log_only_attributes) async def set_operation_details_attributes_from_request( diff --git a/src/google/adk/telemetry/tracing.py b/src/google/adk/telemetry/tracing.py index 7b813be16d..2b06135ad9 100644 --- a/src/google/adk/telemetry/tracing.py +++ b/src/google/adk/telemetry/tracing.py @@ -66,6 +66,7 @@ from .. import version from ..utils.model_name_utils import is_gemini_model +from ._experimental_semconv import get_content_capturing_mode from ._experimental_semconv import is_experimental_semconv from ._experimental_semconv import maybe_log_completion_details from ._experimental_semconv import set_operation_details_attributes_from_request @@ -556,20 +557,26 @@ def use_generate_content_span( common_attributes = { GEN_AI_AGENT_NAME: invocation_context.agent.name, GEN_AI_CONVERSATION_ID: invocation_context.session.id, - USER_ID: invocation_context.session.user_id, 'gcp.vertex.agent.event_id': model_response_event.id, 'gcp.vertex.agent.invocation_id': invocation_context.invocation_id, } + log_only_common_attributes = {} + if invocation_context.session.user_id is not None: + log_only_common_attributes[USER_ID] = invocation_context.session.user_id if ( _is_gemini_agent(invocation_context.agent) and _instrumented_with_opentelemetry_instrumentation_google_genai() ): - with _use_extra_generate_content_attributes(common_attributes): + with _use_extra_generate_content_attributes( + common_attributes, + log_only_extra_attributes=log_only_common_attributes, + ): yield else: with _use_native_generate_content_span_stable_semconv( llm_request=llm_request, common_attributes=common_attributes, + log_only_common_attributes=log_only_common_attributes, ) as span: yield span.span @@ -590,24 +597,32 @@ async def use_inference_span( common_attributes = { GEN_AI_AGENT_NAME: invocation_context.agent.name, GEN_AI_CONVERSATION_ID: invocation_context.session.id, - USER_ID: invocation_context.session.user_id, 'gcp.vertex.agent.event_id': model_response_event.id, 'gcp.vertex.agent.invocation_id': invocation_context.invocation_id, } + log_only_common_attributes = {} + if invocation_context.session.user_id is not None: + log_only_common_attributes[USER_ID] = invocation_context.session.user_id if ( _is_gemini_agent(invocation_context.agent) and _instrumented_with_opentelemetry_instrumentation_google_genai() ): - with _use_extra_generate_content_attributes(common_attributes): + with _use_extra_generate_content_attributes( + common_attributes, + log_only_extra_attributes=log_only_common_attributes, + ): yield else: async with _use_native_generate_content_span( llm_request=llm_request, common_attributes=common_attributes, + log_only_common_attributes=log_only_common_attributes, ) as gc_span: if is_experimental_semconv(): set_operation_details_common_attributes( - gc_span.operation_details_common_attributes, common_attributes + gc_span.operation_details_common_attributes, + common_attributes, + log_only_attributes=log_only_common_attributes, ) try: yield gc_span @@ -664,6 +679,7 @@ def _instrumented_with_opentelemetry_instrumentation_google_genai() -> bool: @contextmanager def _use_extra_generate_content_attributes( extra_attributes: Mapping[str, AttributeValue], + log_only_extra_attributes: Mapping[str, AttributeValue] | None = None, ): try: from opentelemetry.instrumentation.google_genai import GENERATE_CONTENT_EXTRA_ATTRIBUTES_CONTEXT_KEY @@ -675,13 +691,25 @@ def _use_extra_generate_content_attributes( + ' Please upgrade to version to 0.6b0 or above.' ) yield + return - tok = otel_context.attach( - otel_context.set_value( - GENERATE_CONTENT_EXTRA_ATTRIBUTES_CONTEXT_KEY, extra_attributes - ) + ctx = otel_context.set_value( + GENERATE_CONTENT_EXTRA_ATTRIBUTES_CONTEXT_KEY, extra_attributes ) + if log_only_extra_attributes: + try: + from opentelemetry.instrumentation.google_genai import GENERATE_CONTENT_EVENT_ONLY_EXTRA_ATTRIBUTES_CONTEXT_KEY + + ctx = otel_context.set_value( + GENERATE_CONTENT_EVENT_ONLY_EXTRA_ATTRIBUTES_CONTEXT_KEY, + log_only_extra_attributes, + context=ctx, + ) + except (ImportError, AttributeError): + pass + + tok = otel_context.attach(ctx) try: yield finally: @@ -713,6 +741,7 @@ def _set_common_generate_content_attributes( def _use_native_generate_content_span_stable_semconv( llm_request: LlmRequest, common_attributes: Mapping[str, AttributeValue], + log_only_common_attributes: Mapping[str, AttributeValue] | None = None, ) -> Iterator[GenerateContentSpan]: with tracer.start_as_current_span( f"generate_content {llm_request.model or ''}" @@ -734,12 +763,18 @@ def _use_native_generate_content_span_stable_semconv( attributes={GEN_AI_SYSTEM: _guess_gemini_system_name()}, ) ) + user_message_attributes = {GEN_AI_SYSTEM: _guess_gemini_system_name()} + if _should_log_prompt_response_content() and log_only_common_attributes: + user_id = log_only_common_attributes.get(USER_ID) + if user_id is not None: + user_message_attributes[USER_ID] = user_id + for content in llm_request.contents: otel_logger.emit( LogRecord( event_name='gen_ai.user.message', body={'content': _serialize_content_with_elision(content)}, - attributes={GEN_AI_SYSTEM: _guess_gemini_system_name()}, + attributes=user_message_attributes, ) ) @@ -750,10 +785,13 @@ def _use_native_generate_content_span_stable_semconv( async def _use_native_generate_content_span( llm_request: LlmRequest, common_attributes: Mapping[str, AttributeValue], + log_only_common_attributes: Mapping[str, AttributeValue] | None = None, ) -> AsyncIterator[GenerateContentSpan]: if not is_experimental_semconv(): with _use_native_generate_content_span_stable_semconv( - llm_request, common_attributes + llm_request, + common_attributes, + log_only_common_attributes=log_only_common_attributes, ) as gc_span: yield gc_span return diff --git a/tests/unittests/telemetry/test_node_functional.py b/tests/unittests/telemetry/test_node_functional.py index 63bb7d0688..661cbb3d45 100644 --- a/tests/unittests/telemetry/test_node_functional.py +++ b/tests/unittests/telemetry/test_node_functional.py @@ -255,7 +255,6 @@ async def some_node(ctx, node_input): ), 'gen_ai.request.model': 'mock', 'gen_ai.system': 'gemini', - 'user.id': 'some_user', }, children=[ SpanDigest( @@ -327,7 +326,6 @@ async def some_node(ctx, node_input): ), 'gen_ai.request.model': 'mock', 'gen_ai.system': 'gemini', - 'user.id': 'some_user', }, ), ], diff --git a/tests/unittests/telemetry/test_spans.py b/tests/unittests/telemetry/test_spans.py index c0e4cc20b9..c42c6d725c 100644 --- a/tests/unittests/telemetry/test_spans.py +++ b/tests/unittests/telemetry/test_spans.py @@ -27,6 +27,7 @@ from google.adk.sessions.in_memory_session_service import InMemorySessionService from google.adk.telemetry._experimental_semconv import _safe_json_serialize_no_whitespaces from google.adk.telemetry.tracing import _safe_json_serialize +from google.adk.telemetry.tracing import _use_extra_generate_content_attributes from google.adk.telemetry.tracing import ADK_CAPTURE_MESSAGE_CONTENT_IN_SPANS from google.adk.telemetry.tracing import GCP_MCP_SERVER_DESTINATION_ID from google.adk.telemetry.tracing import trace_agent_invocation @@ -810,12 +811,14 @@ async def test_trace_send_data_disabling_request_response_content( return_value='test_system', ) @pytest.mark.parametrize('capture_content', [True, False]) +@pytest.mark.parametrize('user_id', ['some-user-id', None]) async def test_generate_content_span( mock_guess_system_name, mock_tracer, mock_otel_logger, monkeypatch, capture_content, + user_id, ): """Test native generate_content span creation with attributes and logs.""" # Arrange @@ -830,7 +833,7 @@ async def test_generate_content_span( agent = LlmAgent(name='test_agent', model='not-a-gemini-model') invocation_context = await _create_invocation_context(agent) - + invocation_context.session.user_id = user_id system_instruction = types.Content( parts=[types.Part.from_text(text='You are a helpful assistant.')], ) @@ -889,11 +892,15 @@ async def test_generate_content_span( mock_span.set_attributes.assert_called_once_with({ GEN_AI_AGENT_NAME: invocation_context.agent.name, GEN_AI_CONVERSATION_ID: invocation_context.session.id, - USER_ID: invocation_context.session.user_id, 'gcp.vertex.agent.event_id': 'event-123', 'gcp.vertex.agent.invocation_id': invocation_context.invocation_id, }) + all_set_attribute_keys = [ + call.args[0] for call in mock_span.set_attribute.call_args_list + ] + assert USER_ID not in all_set_attribute_keys + # Assert Logs assert mock_otel_logger.emit.call_count == 4 @@ -932,8 +939,11 @@ async def test_generate_content_span( assert len(user_logs) == 2 assert expected_user1_body == user_logs[0].body assert expected_user2_body == user_logs[1].body + expected_user_log_attributes = {GEN_AI_SYSTEM: 'test_system'} + if capture_content and user_id is not None: + expected_user_log_attributes[USER_ID] = user_id for log in user_logs: - assert log.attributes == {GEN_AI_SYSTEM: 'test_system'} + assert log.attributes == expected_user_log_attributes choice_log = next( (lr for lr in log_records if lr.event_name == 'gen_ai.choice'), @@ -944,6 +954,52 @@ async def test_generate_content_span( assert choice_log.attributes == {GEN_AI_SYSTEM: 'test_system'} +@pytest.mark.asyncio +@mock.patch( + 'google.adk.telemetry.tracing._use_extra_generate_content_attributes' +) +async def test_generate_content_span_with_genai_instrumentation( + mock_use_extra, + monkeypatch, +): + """Test that genai-instrumentation delegation branch does not forward USER_ID in attributes.""" + monkeypatch.setattr( + 'google.adk.telemetry.tracing._instrumented_with_opentelemetry_instrumentation_google_genai', + lambda: True, + ) + # _is_gemini_agent returns true for gemini models. + agent = LlmAgent(name='test_agent', model='gemini-1.5-pro') + invocation_context = await _create_invocation_context(agent) + + llm_request = LlmRequest( + model='gemini-1.5-pro', + contents=[types.Content(role='user', parts=[types.Part(text='Hello')])], + ) + + model_response_event = mock.MagicMock() + model_response_event.id = 'event-123' + + mock_cm = mock.MagicMock() + mock_use_extra.return_value = mock_cm + + async with use_inference_span( + llm_request, invocation_context, model_response_event + ): + pass + + mock_use_extra.assert_called_once() + args, _ = mock_use_extra.call_args + common_attributes = args[0] + + assert GEN_AI_AGENT_NAME in common_attributes + assert GEN_AI_CONVERSATION_ID in common_attributes + assert 'gcp.vertex.agent.event_id' in common_attributes + assert 'gcp.vertex.agent.invocation_id' in common_attributes + + # USER_ID should NOT be in common_attributes passed to the genai instrumentor + assert USER_ID not in common_attributes + + def _mock_callable_tool(): """Description of some tool.""" return 'result' @@ -1001,12 +1057,14 @@ def _mock_tool_dict() -> types.ToolDict: 'capture_content', ['SPAN_AND_EVENT', 'EVENT_ONLY', 'SPAN_ONLY', 'NO_CONTENT'], ) +@pytest.mark.parametrize('user_id', ['some-user-id', None]) async def test_generate_content_span_with_experimental_semconv( mock_guess_system_name, mock_tracer, mock_otel_logger, monkeypatch, capture_content, + user_id, ): """Test native generate_content span creation with attributes and logs with experimental semconv enabled.""" # Arrange @@ -1025,6 +1083,7 @@ async def test_generate_content_span_with_experimental_semconv( agent = LlmAgent(name='test_agent', model='not-a-gemini-model') invocation_context = await _create_invocation_context(agent) + invocation_context.session.user_id = user_id system_instruction = types.Content( parts=[types.Part.from_text(text='You are a helpful assistant.')], @@ -1209,11 +1268,15 @@ async def test_generate_content_span_with_experimental_semconv( mock_span.set_attributes.assert_called_once_with({ GEN_AI_AGENT_NAME: invocation_context.agent.name, GEN_AI_CONVERSATION_ID: invocation_context.session.id, - USER_ID: invocation_context.session.user_id, 'gcp.vertex.agent.event_id': 'event-123', 'gcp.vertex.agent.invocation_id': invocation_context.invocation_id, }) + all_set_attribute_keys = [ + call.args[0] for call in mock_span.set_attribute.call_args_list + ] + assert USER_ID not in all_set_attribute_keys + if capture_content in ['SPAN_AND_EVENT', 'SPAN_ONLY']: mock_span.set_attribute.assert_any_call( GEN_AI_SYSTEM_INSTRUCTIONS, @@ -1260,6 +1323,15 @@ async def test_generate_content_span_with_experimental_semconv( attributes = operation_details_log.attributes + if ( + capture_content in ['EVENT_ONLY', 'SPAN_AND_EVENT'] + and user_id is not None + ): + assert USER_ID in attributes + assert attributes[USER_ID] == user_id + else: + assert USER_ID not in attributes + if capture_content in ['SPAN_AND_EVENT', 'EVENT_ONLY']: assert GEN_AI_SYSTEM_INSTRUCTIONS in attributes assert ( @@ -1397,3 +1469,53 @@ def test_safe_json_serialize_no_whitespaces_circular_dict_returns_not_serializab obj = {} obj['self'] = obj assert _safe_json_serialize_no_whitespaces(obj) == '' + + +def test_use_extra_generate_content_attributes_upgraded_version(monkeypatch): + # Arrange: Mock the presence of the new event-only context key in the contrib module + from opentelemetry.instrumentation import google_genai + + mock_event_only_key = 'MOCKED_EVENT_ONLY_EXTRA_ATTRIBUTES_CONTEXT_KEY' + monkeypatch.setattr( + google_genai, + 'GENERATE_CONTENT_EVENT_ONLY_EXTRA_ATTRIBUTES_CONTEXT_KEY', + mock_event_only_key, + raising=False, + ) + + # Act: Run the helper with mock.patch on the otel context + with mock.patch('opentelemetry.context.set_value') as mock_set_value: + with _use_extra_generate_content_attributes( + extra_attributes={'span.attr': 'value'}, + log_only_extra_attributes={USER_ID: 'user_123'}, + ): + pass + + # Assert: Verify set_value was called with the mocked event-only key + mock_set_value.assert_any_call( + mock_event_only_key, + {USER_ID: 'user_123'}, + context=mock.ANY, + ) + + +def test_use_extra_generate_content_attributes_older_version(monkeypatch): + # Arrange: Simulate an older version by deleting the key if present + from opentelemetry.instrumentation import google_genai + + if hasattr( + google_genai, 'GENERATE_CONTENT_EVENT_ONLY_EXTRA_ATTRIBUTES_CONTEXT_KEY' + ): + monkeypatch.delattr( + google_genai, 'GENERATE_CONTENT_EVENT_ONLY_EXTRA_ATTRIBUTES_CONTEXT_KEY' + ) + + # Act & Assert: Ensure execution does not throw any ImportError/AttributeError + try: + with _use_extra_generate_content_attributes( + extra_attributes={'span.attr': 'value'}, + log_only_extra_attributes={USER_ID: 'user_123'}, + ): + pass + except Exception as e: # pylint: disable=broad-exception-caught + pytest.fail(f'Graceful degradation failed: {e}')