Skip to content

Commit 31ffe34

Browse files
google-genai-botcopybara-github
authored andcommitted
feat: Add telemetry and metrics recording capabilities
The key changes include: 1. **New `Instrumentation.java` class:** This class provides a unified context manager for instrumenting agent invocations and tool executions. It uses OpenTelemetry to create trace spans, record exceptions, and manage the scope of telemetry contexts. It includes inner classes `AgentInvocation` and `ToolExecution`, both implementing `AutoCloseable` to automatically handle the lifecycle of spans and metric recording. 2. **New `Metrics.java` class:** This utility class is responsible for defining and recording various OpenTelemetry metrics (histograms) related to ADK components. These metrics cover: * Agent invocation duration, request size, response size, and workflow steps. * Tool execution duration, request size, and response size. The class uses `GlobalOpenTelemetry` to get a `Meter` and defines static histogram instances. It includes methods to record values for these metrics, often including attributes like agent name, tool name, and error type. 3. **New Unit Tests:** * `InstrumentationTest.java`: Contains unit tests for the `Instrumentation` class, verifying that spans are created correctly, contexts are managed, and metrics are recorded for both successful and error scenarios during agent invocations and tool executions. It uses `OpenTelemetryRule` for testing. * `MetricsTest.java`: Contains unit tests for the `Metrics` class, ensuring that the various static methods correctly record histogram data with the expected values and attributes. It also uses `OpenTelemetryRule`. PiperOrigin-RevId: 917904663
1 parent 5ee51fd commit 31ffe34

5 files changed

Lines changed: 827 additions & 1 deletion

File tree

Lines changed: 207 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,207 @@
1+
/*
2+
* Copyright 2026 Google LLC
3+
*
4+
* Licensed under the Apache License, Version 2.0 (the "License");
5+
* you may not use this file except in compliance with the License.
6+
* You may obtain a copy of the License at
7+
*
8+
* http://www.apache.org/licenses/LICENSE-2.0
9+
*
10+
* Unless required by applicable law or agreed to in writing, software
11+
* distributed under the License is distributed on an "AS IS" BASIS,
12+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
* See the License for the specific language governing permissions and
14+
* limitations under the License.
15+
*/
16+
17+
package com.google.adk.telemetry;
18+
19+
import com.google.adk.agents.BaseAgent;
20+
import com.google.adk.agents.InvocationContext;
21+
import com.google.adk.events.Event;
22+
import com.google.adk.tools.BaseTool;
23+
import io.opentelemetry.api.trace.Span;
24+
import io.opentelemetry.api.trace.StatusCode;
25+
import io.opentelemetry.context.Context;
26+
import io.opentelemetry.context.Scope;
27+
import java.time.Duration;
28+
import java.util.ArrayList;
29+
import java.util.Collections;
30+
import java.util.List;
31+
import java.util.Map;
32+
import java.util.concurrent.atomic.AtomicBoolean;
33+
import org.jspecify.annotations.Nullable;
34+
import org.slf4j.Logger;
35+
import org.slf4j.LoggerFactory;
36+
37+
/** Unified context manager utility class for agent and tool execution telemetry in ADK. */
38+
public final class Instrumentation {
39+
40+
private static final Logger logger = LoggerFactory.getLogger(Instrumentation.class);
41+
42+
private Instrumentation() {}
43+
44+
/** Stores all telemetry related state. */
45+
public static final class TelemetryContext {
46+
private final Context otelContext;
47+
private @Nullable Event functionResponseEvent;
48+
49+
public TelemetryContext(Context otelContext) {
50+
this.otelContext = otelContext;
51+
}
52+
53+
public Context otelContext() {
54+
return otelContext;
55+
}
56+
57+
public @Nullable Event functionResponseEvent() {
58+
return functionResponseEvent;
59+
}
60+
61+
public void setFunctionResponseEvent(@Nullable Event functionResponseEvent) {
62+
this.functionResponseEvent = functionResponseEvent;
63+
}
64+
}
65+
66+
/** Base class for AutoCloseable telemetry tracking scopes. */
67+
public abstract static class ClosableTelemetryScope implements AutoCloseable {
68+
protected final long startTimeNanos;
69+
protected final Span span;
70+
protected final Scope scope;
71+
protected final TelemetryContext telemetryContext;
72+
protected @Nullable Throwable caughtError;
73+
protected final AtomicBoolean closed = new AtomicBoolean(false);
74+
75+
@SuppressWarnings("MustBeClosedChecker")
76+
ClosableTelemetryScope(Span span) {
77+
this.startTimeNanos = System.nanoTime();
78+
this.span = span;
79+
this.scope = span.makeCurrent();
80+
this.telemetryContext = new TelemetryContext(Context.current());
81+
}
82+
83+
public TelemetryContext context() {
84+
return telemetryContext;
85+
}
86+
87+
public void setError(Throwable caughtError) {
88+
this.caughtError = caughtError;
89+
span.recordException(caughtError);
90+
span.setStatus(StatusCode.ERROR, caughtError.getMessage());
91+
}
92+
93+
@Override
94+
public final void close() {
95+
if (closed.getAndSet(true)) {
96+
return;
97+
}
98+
try {
99+
beforeSpanEnd();
100+
span.end();
101+
Duration elapsed = Duration.ofNanos(System.nanoTime() - startTimeNanos);
102+
try {
103+
recordMetrics(elapsed, caughtError);
104+
} catch (RuntimeException e) {
105+
handleMetricsError(e);
106+
}
107+
} finally {
108+
scope.close();
109+
}
110+
}
111+
112+
/** Hook for subclasses to run code before span ends. */
113+
protected void beforeSpanEnd() {}
114+
115+
/** Hook for subclasses to record metrics. */
116+
protected abstract void recordMetrics(Duration elapsed, @Nullable Throwable error);
117+
118+
/** Hook for subclasses to handle metrics recording errors. */
119+
protected abstract void handleMetricsError(RuntimeException e);
120+
}
121+
122+
/** AutoCloseable telemetry tracking scope for agent invocations. */
123+
public static final class AgentInvocation extends ClosableTelemetryScope {
124+
private final BaseAgent agent;
125+
private final InvocationContext ctx;
126+
private final List<Event> events = Collections.synchronizedList(new ArrayList<>());
127+
128+
public AgentInvocation(InvocationContext ctx, BaseAgent agent) {
129+
super(Tracing.getTracer().spanBuilder("invoke_agent " + agent.name()).startSpan());
130+
this.agent = agent;
131+
this.ctx = ctx;
132+
Tracing.traceAgentInvocation(span, agent.name(), agent.description(), ctx);
133+
}
134+
135+
public InvocationContext getCtx() {
136+
return ctx;
137+
}
138+
139+
public void addEvent(Event event) {
140+
events.add(event);
141+
}
142+
143+
@Override
144+
protected void recordMetrics(Duration elapsed, @Nullable Throwable error) {
145+
Metrics.recordAgentInvocationDuration(agent.name(), elapsed, error);
146+
Metrics.recordAgentRequestSize(agent.name(), ctx.userContent().orElse(null));
147+
Metrics.recordAgentResponseSize(agent.name(), events);
148+
Metrics.recordAgentWorkflowSteps(agent.name(), events);
149+
}
150+
151+
@Override
152+
protected void handleMetricsError(RuntimeException e) {
153+
logger.error("Failed to record agent metrics for agent {}", agent.name(), e);
154+
}
155+
}
156+
157+
/** AutoCloseable telemetry tracking scope for tool executions. */
158+
public static final class ToolExecution extends ClosableTelemetryScope {
159+
private final BaseTool tool;
160+
private final BaseAgent agent;
161+
private final Map<String, Object> functionArgs;
162+
163+
public ToolExecution(BaseTool tool, BaseAgent agent, Map<String, Object> functionArgs) {
164+
super(Tracing.getTracer().spanBuilder("execute_tool " + tool.name()).startSpan());
165+
this.tool = tool;
166+
this.agent = agent;
167+
this.functionArgs = functionArgs;
168+
}
169+
170+
@Override
171+
protected void beforeSpanEnd() {
172+
Event responseEvent = caughtError == null ? context().functionResponseEvent() : null;
173+
Tracing.traceToolExecution(
174+
span,
175+
tool.name(),
176+
tool.description(),
177+
tool.getClass().getSimpleName(),
178+
functionArgs,
179+
responseEvent,
180+
caughtError);
181+
}
182+
183+
@Override
184+
protected void recordMetrics(Duration elapsed, @Nullable Throwable error) {
185+
Metrics.recordToolExecutionDuration(tool.name(), agent.name(), elapsed, error);
186+
Metrics.recordToolRequestSize(tool.name(), agent.name(), functionArgs);
187+
Event responseEvent = error == null ? context().functionResponseEvent() : null;
188+
Metrics.recordToolResponseSize(tool.name(), agent.name(), responseEvent);
189+
}
190+
191+
@Override
192+
protected void handleMetricsError(RuntimeException e) {
193+
logger.error("Failed to record tool execution duration for tool {}", tool.name(), e);
194+
}
195+
}
196+
197+
/** Creates an AgentInvocation context to record agent invocation telemetry. */
198+
public static AgentInvocation recordAgentInvocation(InvocationContext ctx, BaseAgent agent) {
199+
return new AgentInvocation(ctx, agent);
200+
}
201+
202+
/** Creates a ToolExecution context to record tool execution telemetry. */
203+
public static ToolExecution recordToolExecution(
204+
BaseTool tool, BaseAgent agent, Map<String, Object> functionArgs) {
205+
return new ToolExecution(tool, agent, functionArgs);
206+
}
207+
}

0 commit comments

Comments
 (0)