From 926598e76249de49a089824d8f95fe0b9e00abbc Mon Sep 17 00:00:00 2001 From: jsonbailey Date: Wed, 25 Mar 2026 16:48:36 -0500 Subject: [PATCH 1/6] feat!: split track_metrics_of into sync and async (track_metrics_of_async) variants feat: add optional graph_key to all LDAIConfigTracker track_* methods for graph correlation feat: add track_tool_call/track_tool_calls to LDAIConfigTracker feat: add graph_key property to AIGraphTracker feat: make AIGraphTracker.track_total_tokens accept Optional[TokenUsage], skip when None or total <= 0 feat: add LangChainHelper.get_tool_calls_from_response and sum_token_usage_from_messages feat: extract OpenAIHelper.get_ai_usage_from_response; delegate get_ai_metrics_from_response to it refactor: remove node-scoped methods from AIGraphTracker (track_node_invocation, track_tool_call, track_node_judge_response) refactor: use time.time_ns() for sub-millisecond precision in duration calculations --- .../src/ldai_langchain/langchain_helper.py | 38 +++ packages/sdk/server-ai/README.md | 6 +- .../sdk/server-ai/src/ldai/judge/__init__.py | 2 +- .../sdk/server-ai/src/ldai/managed_model.py | 2 +- .../src/ldai/providers/runner_factory.py | 2 +- packages/sdk/server-ai/src/ldai/tracker.py | 259 ++++++++++-------- packages/sdk/server-ai/tests/test_judge.py | 16 +- packages/sdk/server-ai/tests/test_tracker.py | 153 ++++++++++- 8 files changed, 346 insertions(+), 132 deletions(-) diff --git a/packages/ai-providers/server-ai-langchain/src/ldai_langchain/langchain_helper.py b/packages/ai-providers/server-ai-langchain/src/ldai_langchain/langchain_helper.py index 5061a1b..35eb396 100644 --- a/packages/ai-providers/server-ai-langchain/src/ldai_langchain/langchain_helper.py +++ b/packages/ai-providers/server-ai-langchain/src/ldai_langchain/langchain_helper.py @@ -115,3 +115,41 @@ def get_ai_metrics_from_response(response: Any) -> LDAIMetrics: :return: LDAIMetrics with success status and token usage """ return LDAIMetrics(success=True, usage=get_ai_usage_from_response(response)) + + +def get_tool_calls_from_response(response: Any) -> List[str]: + """ + Get tool call names from a LangChain provider response. + + :param response: The response from the LangChain model + :return: List of tool names in order, or empty list if none + """ + names: List[str] = [] + if hasattr(response, 'tool_calls') and response.tool_calls: + for tc in response.tool_calls: + n = tc.get('name') + if n: + names.append(str(n)) + return names + + +def sum_token_usage_from_messages(messages: List[Any]) -> Optional[TokenUsage]: + """ + Sum token usage across LangChain messages using get_ai_usage_from_response per message. + + :param messages: List of message objects (e.g. from a graph state) + :return: Aggregated TokenUsage, or None if no usage on any message + """ + in_sum = 0 + out_sum = 0 + total_sum = 0 + for m in messages: + u = get_ai_usage_from_response(m) + if u is None: + continue + in_sum += u.input + out_sum += u.output + total_sum += u.total + if in_sum == 0 and out_sum == 0 and total_sum == 0: + return None + return TokenUsage(total=total_sum, input=in_sum, output=out_sum) diff --git a/packages/sdk/server-ai/README.md b/packages/sdk/server-ai/README.md index 3aa7bae..3720dc6 100644 --- a/packages/sdk/server-ai/README.md +++ b/packages/sdk/server-ai/README.md @@ -150,8 +150,8 @@ async def main(): # Create LangChain model from configuration llm = await LangChainProvider.create_langchain_model(ai_config) - # Use with tracking - response = await ai_config.tracker.track_metrics_of( + # Use with tracking (sync invoke) + response = ai_config.tracker.track_metrics_of( lambda: llm.invoke(messages), lambda result: LangChainProvider.get_ai_metrics_from_response(result) ) @@ -190,7 +190,7 @@ async def main(): temperature=ai_config.model.get_parameter('temperature') if ai_config.model else 0.5, ) - result = await ai_config.tracker.track_metrics_of( + result = await ai_config.tracker.track_metrics_of_async( call_custom_provider, map_custom_provider_metrics ) diff --git a/packages/sdk/server-ai/src/ldai/judge/__init__.py b/packages/sdk/server-ai/src/ldai/judge/__init__.py index b364fb0..a842db6 100644 --- a/packages/sdk/server-ai/src/ldai/judge/__init__.py +++ b/packages/sdk/server-ai/src/ldai/judge/__init__.py @@ -71,7 +71,7 @@ async def evaluate( messages = self._construct_evaluation_messages(input_text, output_text) assert self._evaluation_response_structure is not None - response = await self._ai_config_tracker.track_metrics_of( + response = await self._ai_config_tracker.track_metrics_of_async( lambda: self._model_runner.invoke_structured_model(messages, self._evaluation_response_structure), lambda result: result.metrics, ) diff --git a/packages/sdk/server-ai/src/ldai/managed_model.py b/packages/sdk/server-ai/src/ldai/managed_model.py index 28bab2f..c1ef021 100644 --- a/packages/sdk/server-ai/src/ldai/managed_model.py +++ b/packages/sdk/server-ai/src/ldai/managed_model.py @@ -48,7 +48,7 @@ async def invoke(self, prompt: str) -> ModelResponse: config_messages = self._ai_config.messages or [] all_messages = config_messages + self._messages - response = await self._tracker.track_metrics_of( + response = await self._tracker.track_metrics_of_async( lambda: self._model_runner.invoke_model(all_messages), lambda result: result.metrics, ) diff --git a/packages/sdk/server-ai/src/ldai/providers/runner_factory.py b/packages/sdk/server-ai/src/ldai/providers/runner_factory.py index 3612bac..190704e 100644 --- a/packages/sdk/server-ai/src/ldai/providers/runner_factory.py +++ b/packages/sdk/server-ai/src/ldai/providers/runner_factory.py @@ -77,7 +77,7 @@ def _with_fallback( continue result = fn(provider_factory) if result is not None: - log.debug(f"Successfully created capability using provider '{provider_type}'") + log.debug(f"Successfully invoked create function with provider '{provider_type}'") return result except Exception as exc: log.warning(f"Provider '{provider_type}' failed: {exc}") diff --git a/packages/sdk/server-ai/src/ldai/tracker.py b/packages/sdk/server-ai/src/ldai/tracker.py index d4f0912..e50f0e4 100644 --- a/packages/sdk/server-ai/src/ldai/tracker.py +++ b/packages/sdk/server-ai/src/ldai/tracker.py @@ -1,7 +1,7 @@ import time from dataclasses import dataclass from enum import Enum -from typing import Any, Dict, List, Optional +from typing import Any, Callable, Dict, Iterable, List, Optional from ldclient import Context, LDClient @@ -98,46 +98,54 @@ def __init__( self._context = context self._summary = LDAIMetricSummary() - def __get_track_data(self): + def __get_track_data(self, graph_key: Optional[str] = None) -> dict: """ Get tracking data for events. + :param graph_key: When set, include ``graphKey`` in the payload. :return: Dictionary containing variation and config keys. """ - return { + data = { "variationKey": self._variation_key, "configKey": self._config_key, "version": self._version, "modelName": self._model_name, "providerName": self._provider_name, } + if graph_key is not None: + return {**data, "graphKey": graph_key} + return data - def track_duration(self, duration: int) -> None: + def track_duration(self, duration: int, *, graph_key: Optional[str] = None) -> None: """ Manually track the duration of an AI operation. :param duration: Duration in milliseconds. + :param graph_key: When set, include ``graphKey`` in the event payload (e.g. config-level metrics inside a graph). """ self._summary._duration = duration self._ld_client.track( - "$ld:ai:duration:total", self._context, self.__get_track_data(), duration + "$ld:ai:duration:total", self._context, self.__get_track_data(graph_key), duration ) - def track_time_to_first_token(self, time_to_first_token: int) -> None: + def track_time_to_first_token( + self, time_to_first_token: int, *, graph_key: Optional[str] = None + ) -> None: """ Manually track the time to first token of an AI operation. :param time_to_first_token: Time to first token in milliseconds. + :param graph_key: When set, include ``graphKey`` in the event payload. """ self._summary._time_to_first_token = time_to_first_token self._ld_client.track( "$ld:ai:tokens:ttf", self._context, - self.__get_track_data(), + self.__get_track_data(graph_key), time_to_first_token, ) - def track_duration_of(self, func): + def track_duration_of(self, func, *, graph_key: Optional[str] = None): """ Automatically track the duration of an AI operation. @@ -145,21 +153,43 @@ def track_duration_of(self, func): track the duration. The exception will be re-thrown. :param func: Function to track (synchronous only). + :param graph_key: When set, passed through to :meth:`track_duration`. :return: Result of the tracked function. """ - start_time = time.time() + start_ns = time.time_ns() try: result = func() finally: - end_time = time.time() - duration = int((end_time - start_time) * 1000) # duration in milliseconds - self.track_duration(duration) + duration = (time.time_ns() - start_ns) // 1_000_000 # duration in milliseconds + self.track_duration(duration, graph_key=graph_key) return result - async def track_metrics_of(self, func, metrics_extractor): + def _track_from_metrics_extractor( + self, + result: Any, + metrics_extractor: Callable[[Any], Any], + *, + graph_key: Optional[str] = None, + ) -> Any: + metrics = metrics_extractor(result) + if metrics.success: + self.track_success(graph_key=graph_key) + else: + self.track_error(graph_key=graph_key) + if metrics.usage: + self.track_tokens(metrics.usage, graph_key=graph_key) + return result + + def track_metrics_of( + self, + func: Callable[[], Any], + metrics_extractor: Callable[[Any], Any], + *, + graph_key: Optional[str] = None, + ) -> Any: """ - Track metrics for a generic AI operation. + Track metrics for a synchronous AI operation. This function will track the duration of the operation, extract metrics using the provided metrics extractor function, and track success or error status accordingly. @@ -168,47 +198,59 @@ async def track_metrics_of(self, func, metrics_extractor): In the case the provided function throws, this function will record the duration and an error. A failed operation will not have any token usage data. - :param func: Async function which executes the operation + For async operations, use :meth:`track_metrics_of_async`. + + :param func: Synchronous callable that runs the operation :param metrics_extractor: Function that extracts LDAIMetrics from the operation result + :param graph_key: When set, include ``graphKey`` on emitted config-level events. :return: The result of the operation """ - start_time = time.time() - result = None + start_ns = time.time_ns() try: - result = await func() + result = func() except Exception as err: - end_time = time.time() - duration = int((end_time - start_time) * 1000) - self.track_duration(duration) - self.track_error() + duration = (time.time_ns() - start_ns) // 1_000_000 + self.track_duration(duration, graph_key=graph_key) + self.track_error(graph_key=graph_key) raise err - # Track duration after successful call - end_time = time.time() - duration = int((end_time - start_time) * 1000) - self.track_duration(duration) + duration = (time.time_ns() - start_ns) // 1_000_000 + self.track_duration(duration, graph_key=graph_key) + return self._track_from_metrics_extractor(result, metrics_extractor, graph_key=graph_key) - # Extract metrics after successful AI call - from ldai.providers.types import LDAIMetrics - metrics = metrics_extractor(result) + async def track_metrics_of_async( + self, func, metrics_extractor, *, graph_key: Optional[str] = None + ): + """ + Track metrics for an async AI operation (``func`` is awaited). - # Track success/error based on metrics - if metrics.success: - self.track_success() - else: - self.track_error() + Same event semantics as :meth:`track_metrics_of`. - # Track token usage if available - if metrics.usage: - self.track_tokens(metrics.usage) + :param func: Async callable or zero-arg callable that returns an awaitable when called + :param metrics_extractor: Function that extracts LDAIMetrics from the operation result + :param graph_key: When set, include ``graphKey`` on emitted config-level events. + :return: The result of the operation + """ + start_ns = time.time_ns() + result = None + try: + result = await func() + except Exception as err: + duration = (time.time_ns() - start_ns) // 1_000_000 + self.track_duration(duration, graph_key=graph_key) + self.track_error(graph_key=graph_key) + raise err - return result + duration = (time.time_ns() - start_ns) // 1_000_000 + self.track_duration(duration, graph_key=graph_key) + return self._track_from_metrics_extractor(result, metrics_extractor, graph_key=graph_key) - def track_eval_scores(self, scores: Dict[str, Any]) -> None: + def track_eval_scores(self, scores: Dict[str, Any], *, graph_key: Optional[str] = None) -> None: """ Track evaluation scores for multiple metrics. :param scores: Dictionary mapping metric keys to their evaluation scores (EvalScore objects) + :param graph_key: When set, include ``graphKey`` in the event payload. """ from ldai.providers.types import EvalScore @@ -218,22 +260,23 @@ def track_eval_scores(self, scores: Dict[str, Any]) -> None: self._ld_client.track( metric_key, self._context, - self.__get_track_data(), + self.__get_track_data(graph_key=graph_key), eval_score.score ) - def track_judge_response(self, judge_response: Any) -> None: + def track_judge_response(self, judge_response: Any, *, graph_key: Optional[str] = None) -> None: """ Track a judge response, including evaluation scores with judge config key. :param judge_response: JudgeResponse object containing evals and success status + :param graph_key: When set, include ``graphKey`` in the event payload. """ from ldai.providers.types import EvalScore, JudgeResponse if isinstance(judge_response, JudgeResponse): # Track evaluation scores with judge config key included in metadata if judge_response.evals: - track_data = self.__get_track_data() + track_data = self.__get_track_data(graph_key=graph_key) if judge_response.judge_config_key: track_data = {**track_data, 'judgeConfigKey': judge_response.judge_config_key} @@ -246,44 +289,49 @@ def track_judge_response(self, judge_response: Any) -> None: eval_score.score ) - def track_feedback(self, feedback: Dict[str, FeedbackKind]) -> None: + def track_feedback(self, feedback: Dict[str, FeedbackKind], *, graph_key: Optional[str] = None) -> None: """ Track user feedback for an AI operation. :param feedback: Dictionary containing feedback kind. + :param graph_key: When set, include ``graphKey`` in the event payload. """ self._summary._feedback = feedback if feedback["kind"] == FeedbackKind.Positive: self._ld_client.track( "$ld:ai:feedback:user:positive", self._context, - self.__get_track_data(), + self.__get_track_data(graph_key=graph_key), 1, ) elif feedback["kind"] == FeedbackKind.Negative: self._ld_client.track( "$ld:ai:feedback:user:negative", self._context, - self.__get_track_data(), + self.__get_track_data(graph_key=graph_key), 1, ) - def track_success(self) -> None: + def track_success(self, *, graph_key: Optional[str] = None) -> None: """ Track a successful AI generation. + + :param graph_key: When set, include ``graphKey`` in the event payload. """ self._summary._success = True self._ld_client.track( - "$ld:ai:generation:success", self._context, self.__get_track_data(), 1 + "$ld:ai:generation:success", self._context, self.__get_track_data(graph_key=graph_key), 1 ) - def track_error(self) -> None: + def track_error(self, *, graph_key: Optional[str] = None) -> None: """ Track an unsuccessful AI generation attempt. + + :param graph_key: When set, include ``graphKey`` in the event payload. """ self._summary._success = False self._ld_client.track( - "$ld:ai:generation:error", self._context, self.__get_track_data(), 1 + "$ld:ai:generation:error", self._context, self.__get_track_data(graph_key=graph_key), 1 ) def track_openai_metrics(self, func): @@ -303,18 +351,16 @@ def track_openai_metrics(self, func): :param func: Function to track. :return: Result of the tracked function. """ - start_time = time.time() + start_ns = time.time_ns() try: result = func() - end_time = time.time() - duration = int((end_time - start_time) * 1000) + duration = (time.time_ns() - start_ns) // 1_000_000 self.track_duration(duration) self.track_success() if hasattr(result, "usage") and hasattr(result.usage, "to_dict"): self.track_tokens(_openai_to_token_usage(result.usage.to_dict())) except Exception: - end_time = time.time() - duration = int((end_time - start_time) * 1000) + duration = (time.time_ns() - start_ns) // 1_000_000 self.track_duration(duration) self.track_error() raise @@ -343,35 +389,64 @@ def track_bedrock_converse_metrics(self, res: dict) -> dict: self.track_tokens(_bedrock_to_token_usage(res["usage"])) return res - def track_tokens(self, tokens: TokenUsage) -> None: + def track_tokens(self, tokens: TokenUsage, *, graph_key: Optional[str] = None) -> None: """ Track token usage metrics. :param tokens: Token usage data from either custom, OpenAI, or Bedrock sources. + :param graph_key: When set, include ``graphKey`` in the event payload. """ self._summary._usage = tokens + td = self.__get_track_data(graph_key=graph_key) if tokens.total > 0: self._ld_client.track( "$ld:ai:tokens:total", self._context, - self.__get_track_data(), + td, tokens.total, ) if tokens.input > 0: self._ld_client.track( "$ld:ai:tokens:input", self._context, - self.__get_track_data(), + td, tokens.input, ) if tokens.output > 0: self._ld_client.track( "$ld:ai:tokens:output", self._context, - self.__get_track_data(), + td, tokens.output, ) + def track_tool_call(self, tool_key: str, *, graph_key: Optional[str] = None) -> None: + """ + Track a tool invocation for this configuration (standalone or within a graph). + + :param tool_key: Identifier of the tool that was invoked. + :param graph_key: When set, include ``graphKey`` in the event payload. + """ + track_data = {**self.__get_track_data(graph_key=graph_key), "toolKey": tool_key} + self._ld_client.track( + "$ld:ai:tool_call", + self._context, + track_data, + 1, + ) + + def track_tool_calls( + self, tool_keys: Iterable[str], *, graph_key: Optional[str] = None + ) -> None: + """ + Track multiple tool invocations for this configuration. + + :param tool_keys: Tool identifiers (e.g. from a model response). + :param graph_key: When set, include ``graphKey`` on each event. + """ + for tool_key in tool_keys: + self.track_tool_call(tool_key, graph_key=graph_key) + def get_summary(self) -> LDAIMetricSummary: """ Get the current summary of AI metrics. @@ -437,6 +512,11 @@ def __init__( self._version = version self._context = context + @property + def graph_key(self) -> str: + """Graph configuration key used in tracking payloads.""" + return self._graph_key + def __get_track_data(self): """ Get tracking data for events. @@ -485,12 +565,14 @@ def track_latency(self, duration: int) -> None: duration, ) - def track_total_tokens(self, tokens: TokenUsage) -> None: + def track_total_tokens(self, tokens: Optional[TokenUsage] = None) -> None: """ Track aggregated token usage across the entire graph invocation. - :param tokens: Token usage data. + :param tokens: Token usage data, or ``None`` when usage is unknown. """ + if tokens is None or tokens.total <= 0: + return self._ld_client.track( "$ld:ai:graph:total_tokens", self._context, @@ -535,63 +617,6 @@ def track_judge_response(self, response: Any) -> None: eval_score.score, ) - def track_node_invocation(self, config_key: str) -> None: - """ - Track when a node is invoked during graph execution. - - :param config_key: The configuration key of the node being invoked. - """ - track_data = {**self.__get_track_data(), "configKey": config_key} - self._ld_client.track( - "$ld:ai:graph:node_invocation", - self._context, - track_data, - 1, - ) - - def track_tool_call(self, config_key: str, tool_key: str) -> None: - """ - Track tool calls made by nodes during graph execution. - - :param config_key: The configuration key of the node making the tool call. - :param tool_key: The key of the tool being called. - """ - track_data = { - **self.__get_track_data(), - "configKey": config_key, - "toolKey": tool_key, - } - self._ld_client.track( - "$ld:ai:graph:tool_call", - self._context, - track_data, - 1, - ) - - def track_node_judge_response(self, config_key: str, response: Any) -> None: - """ - Track judge responses for a specific node. - - :param config_key: The configuration key of the node being evaluated. - :param response: JudgeResponse object containing evals and success status. - """ - from ldai.providers.types import EvalScore, JudgeResponse - - if isinstance(response, JudgeResponse): - if response.evals: - track_data = {**self.__get_track_data(), "configKey": config_key} - if response.judge_config_key: - track_data = {**track_data, "judgeConfigKey": response.judge_config_key} - - for metric_key, eval_score in response.evals.items(): - if isinstance(eval_score, EvalScore): - self._ld_client.track( - metric_key, - self._context, - track_data, - eval_score.score, - ) - def track_redirect(self, source_key: str, redirected_target: str) -> None: """ Track when a node redirects to a different target than originally specified. diff --git a/packages/sdk/server-ai/tests/test_judge.py b/packages/sdk/server-ai/tests/test_judge.py index 8326b72..e61ac4a 100644 --- a/packages/sdk/server-ai/tests/test_judge.py +++ b/packages/sdk/server-ai/tests/test_judge.py @@ -156,7 +156,7 @@ async def test_evaluate_success_with_valid_response( ) mock_runner.invoke_structured_model.return_value = mock_response - tracker.track_metrics_of = AsyncMock(return_value=mock_response) + tracker.track_metrics_of_async = AsyncMock(return_value=mock_response) judge = Judge(judge_config_with_key, tracker, mock_runner) @@ -183,7 +183,7 @@ async def test_evaluate_success_with_evaluation_response_shape( metrics=LDAIMetrics(success=True), ) mock_runner.invoke_structured_model.return_value = mock_response - tracker.track_metrics_of = AsyncMock(return_value=mock_response) + tracker.track_metrics_of_async = AsyncMock(return_value=mock_response) judge = Judge(judge_config_with_key, tracker, mock_runner) result = await judge.evaluate("What is feature flagging?", "Feature flagging is...") @@ -206,7 +206,7 @@ async def test_evaluate_handles_missing_evaluation_in_response( ) mock_runner.invoke_structured_model.return_value = mock_response - tracker.track_metrics_of = AsyncMock(return_value=mock_response) + tracker.track_metrics_of_async = AsyncMock(return_value=mock_response) judge = Judge(judge_config_with_key, tracker, mock_runner) @@ -231,7 +231,7 @@ async def test_evaluate_handles_invalid_score( ) mock_runner.invoke_structured_model.return_value = mock_response - tracker.track_metrics_of = AsyncMock(return_value=mock_response) + tracker.track_metrics_of_async = AsyncMock(return_value=mock_response) judge = Judge(judge_config_with_key, tracker, mock_runner) @@ -253,7 +253,7 @@ async def test_evaluate_handles_missing_reasoning( ) mock_runner.invoke_structured_model.return_value = mock_response - tracker.track_metrics_of = AsyncMock(return_value=mock_response) + tracker.track_metrics_of_async = AsyncMock(return_value=mock_response) judge = Judge(judge_config_with_key, tracker, mock_runner) @@ -269,7 +269,7 @@ async def test_evaluate_handles_exception( ): """Evaluate should handle exceptions gracefully.""" mock_runner.invoke_structured_model.side_effect = Exception("Provider error") - tracker.track_metrics_of = AsyncMock(side_effect=Exception("Provider error")) + tracker.track_metrics_of_async = AsyncMock(side_effect=Exception("Provider error")) judge = Judge(judge_config_with_key, tracker, mock_runner) @@ -311,7 +311,7 @@ async def test_evaluate_messages_calls_evaluate( ) mock_runner.invoke_structured_model.return_value = mock_response - tracker.track_metrics_of = AsyncMock(return_value=mock_response) + tracker.track_metrics_of_async = AsyncMock(return_value=mock_response) judge = Judge(judge_config_with_key, tracker, mock_runner) @@ -328,7 +328,7 @@ async def test_evaluate_messages_calls_evaluate( assert result is not None assert result.success is True - assert tracker.track_metrics_of.called + assert tracker.track_metrics_of_async.called class TestEvaluationSchemaBuilder: diff --git a/packages/sdk/server-ai/tests/test_tracker.py b/packages/sdk/server-ai/tests/test_tracker.py index 57f13fd..5fea61a 100644 --- a/packages/sdk/server-ai/tests/test_tracker.py +++ b/packages/sdk/server-ai/tests/test_tracker.py @@ -5,7 +5,8 @@ from ldclient import Config, Context, LDClient from ldclient.integrations.test_data import TestData -from ldai.tracker import FeedbackKind, LDAIConfigTracker, TokenUsage +from ldai.providers.types import LDAIMetrics +from ldai.tracker import AIGraphTracker, FeedbackKind, LDAIConfigTracker, TokenUsage @pytest.fixture @@ -440,3 +441,153 @@ def test_error_overwrites_success(client: LDClient): client.track.assert_has_calls(calls) # type: ignore assert tracker.get_summary().success is False + + +def _base_td() -> dict: + return { + "variationKey": "variation-key", + "configKey": "config-key", + "version": 3, + "modelName": "fakeModel", + "providerName": "fakeProvider", + } + + +def test_config_tracker_includes_graph_key_when_provided(client: LDClient): + context = Context.create("user-key") + tracker = LDAIConfigTracker( + client, "variation-key", "config-key", 3, "fakeModel", "fakeProvider", context + ) + expected = {**_base_td(), "graphKey": "my-graph"} + tracker.track_success(graph_key="my-graph") + client.track.assert_called_with("$ld:ai:generation:success", context, expected, 1) # type: ignore + + +def test_config_tracker_track_tokens_with_graph_key(client: LDClient): + context = Context.create("user-key") + tracker = LDAIConfigTracker( + client, "variation-key", "config-key", 3, "fakeModel", "fakeProvider", context + ) + tokens = TokenUsage(10, 4, 6) + expected = {**_base_td(), "graphKey": "g1"} + tracker.track_tokens(tokens, graph_key="g1") + client.track.assert_any_call("$ld:ai:tokens:total", context, expected, 10) # type: ignore + + +def test_config_tracker_track_feedback_with_graph_key(client: LDClient): + context = Context.create("user-key") + tracker = LDAIConfigTracker( + client, "variation-key", "config-key", 3, "fakeModel", "fakeProvider", context + ) + expected = {**_base_td(), "graphKey": "gx"} + tracker.track_feedback({"kind": FeedbackKind.Positive}, graph_key="gx") + client.track.assert_called_with( + "$ld:ai:feedback:user:positive", context, expected, 1 + ) # type: ignore + + +def test_config_tracker_track_tool_call(client: LDClient): + context = Context.create("user-key") + tracker = LDAIConfigTracker( + client, "variation-key", "config-key", 3, "fakeModel", "fakeProvider", context + ) + expected = {**_base_td(), "toolKey": "search"} + tracker.track_tool_call("search") + client.track.assert_called_with("$ld:ai:tool_call", context, expected, 1) # type: ignore + + +def test_config_tracker_track_tool_call_with_graph_key(client: LDClient): + context = Context.create("user-key") + tracker = LDAIConfigTracker( + client, "variation-key", "config-key", 3, "fakeModel", "fakeProvider", context + ) + expected = {**_base_td(), "graphKey": "my-graph", "toolKey": "calc"} + tracker.track_tool_call("calc", graph_key="my-graph") + client.track.assert_called_with("$ld:ai:tool_call", context, expected, 1) # type: ignore + + +def test_config_tracker_track_tool_calls(client: LDClient): + context = Context.create("user-key") + tracker = LDAIConfigTracker( + client, "variation-key", "config-key", 3, "fakeModel", "fakeProvider", context + ) + tracker.track_tool_calls(["a", "b"], graph_key="g") + assert client.track.call_count == 2 # type: ignore + client.track.assert_any_call( + "$ld:ai:tool_call", + context, + {**_base_td(), "graphKey": "g", "toolKey": "a"}, + 1, + ) # type: ignore + client.track.assert_any_call( + "$ld:ai:tool_call", + context, + {**_base_td(), "graphKey": "g", "toolKey": "b"}, + 1, + ) # type: ignore + + +def test_config_tracker_track_metrics_of(client: LDClient): + context = Context.create("user-key") + tracker = LDAIConfigTracker( + client, "variation-key", "config-key", 3, "fakeModel", "fakeProvider", context + ) + + def fn(): + return "done" + + def extract(r): + return LDAIMetrics(success=True, usage=TokenUsage(5, 2, 3)) + + out = tracker.track_metrics_of(fn, extract) + assert out == "done" + calls = client.track.mock_calls # type: ignore + assert any(c.args[0] == "$ld:ai:generation:success" for c in calls) + assert any(c.args[0] == "$ld:ai:tokens:total" and c.args[3] == 5 for c in calls) + + +@pytest.mark.asyncio +async def test_config_tracker_track_metrics_of_async_passes_graph_key(client: LDClient): + context = Context.create("user-key") + tracker = LDAIConfigTracker( + client, "variation-key", "config-key", 3, "fakeModel", "fakeProvider", context + ) + + async def fn(): + return "ok" + + def extract(r): + return LDAIMetrics(success=True, usage=TokenUsage(5, 2, 3)) + + await tracker.track_metrics_of_async(fn, extract, graph_key="gg") + gk_td = {**_base_td(), "graphKey": "gg"} + calls = client.track.mock_calls # type: ignore + assert any( + c.args[0] == "$ld:ai:generation:success" and c.args[2] == gk_td for c in calls + ) + + +def test_ai_graph_tracker_graph_key_property(client: LDClient): + context = Context.create("user-key") + g = AIGraphTracker(client, "variation-key", "graph-key", 2, context) + assert g.graph_key == "graph-key" + + +def test_ai_graph_tracker_track_total_tokens_skips_none_and_nonpositive(client: LDClient): + context = Context.create("user-key") + g = AIGraphTracker(client, "variation-key", "graph-key", 2, context) + g.track_total_tokens(None) + g.track_total_tokens(TokenUsage(0, 0, 0)) + client.track.assert_not_called() # type: ignore + + +def test_ai_graph_tracker_track_total_tokens_tracks_when_positive(client: LDClient): + context = Context.create("user-key") + g = AIGraphTracker(client, "variation-key", "graph-key", 2, context) + g.track_total_tokens(TokenUsage(42, 30, 12)) + client.track.assert_called_with( # type: ignore + "$ld:ai:graph:total_tokens", + context, + {"variationKey": "variation-key", "graphKey": "graph-key", "version": 2}, + 42, + ) From 4aca10e34943aff72877d9cec2c9687af2923918 Mon Sep 17 00:00:00 2001 From: jsonbailey Date: Wed, 25 Mar 2026 17:02:15 -0500 Subject: [PATCH 2/6] refactor: use time.perf_counter_ns() instead of time.time_ns() for duration measurement perf_counter_ns is monotonic and designed for elapsed-time measurement; time.time_ns reflects wall-clock time and can go backward due to NTP or clock adjustments. --- packages/sdk/server-ai/src/ldai/tracker.py | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/packages/sdk/server-ai/src/ldai/tracker.py b/packages/sdk/server-ai/src/ldai/tracker.py index e50f0e4..3accc1f 100644 --- a/packages/sdk/server-ai/src/ldai/tracker.py +++ b/packages/sdk/server-ai/src/ldai/tracker.py @@ -156,11 +156,11 @@ def track_duration_of(self, func, *, graph_key: Optional[str] = None): :param graph_key: When set, passed through to :meth:`track_duration`. :return: Result of the tracked function. """ - start_ns = time.time_ns() + start_ns = time.perf_counter_ns() try: result = func() finally: - duration = (time.time_ns() - start_ns) // 1_000_000 # duration in milliseconds + duration = (time.perf_counter_ns() - start_ns) // 1_000_000 # duration in milliseconds self.track_duration(duration, graph_key=graph_key) return result @@ -205,16 +205,16 @@ def track_metrics_of( :param graph_key: When set, include ``graphKey`` on emitted config-level events. :return: The result of the operation """ - start_ns = time.time_ns() + start_ns = time.perf_counter_ns() try: result = func() except Exception as err: - duration = (time.time_ns() - start_ns) // 1_000_000 + duration = (time.perf_counter_ns() - start_ns) // 1_000_000 self.track_duration(duration, graph_key=graph_key) self.track_error(graph_key=graph_key) raise err - duration = (time.time_ns() - start_ns) // 1_000_000 + duration = (time.perf_counter_ns() - start_ns) // 1_000_000 self.track_duration(duration, graph_key=graph_key) return self._track_from_metrics_extractor(result, metrics_extractor, graph_key=graph_key) @@ -231,17 +231,17 @@ async def track_metrics_of_async( :param graph_key: When set, include ``graphKey`` on emitted config-level events. :return: The result of the operation """ - start_ns = time.time_ns() + start_ns = time.perf_counter_ns() result = None try: result = await func() except Exception as err: - duration = (time.time_ns() - start_ns) // 1_000_000 + duration = (time.perf_counter_ns() - start_ns) // 1_000_000 self.track_duration(duration, graph_key=graph_key) self.track_error(graph_key=graph_key) raise err - duration = (time.time_ns() - start_ns) // 1_000_000 + duration = (time.perf_counter_ns() - start_ns) // 1_000_000 self.track_duration(duration, graph_key=graph_key) return self._track_from_metrics_extractor(result, metrics_extractor, graph_key=graph_key) @@ -351,16 +351,16 @@ def track_openai_metrics(self, func): :param func: Function to track. :return: Result of the tracked function. """ - start_ns = time.time_ns() + start_ns = time.perf_counter_ns() try: result = func() - duration = (time.time_ns() - start_ns) // 1_000_000 + duration = (time.perf_counter_ns() - start_ns) // 1_000_000 self.track_duration(duration) self.track_success() if hasattr(result, "usage") and hasattr(result.usage, "to_dict"): self.track_tokens(_openai_to_token_usage(result.usage.to_dict())) except Exception: - duration = (time.time_ns() - start_ns) // 1_000_000 + duration = (time.perf_counter_ns() - start_ns) // 1_000_000 self.track_duration(duration) self.track_error() raise From a183f12086def01ccade1aa7ce0f6aa0d9a27797 Mon Sep 17 00:00:00 2001 From: jsonbailey Date: Thu, 26 Mar 2026 08:16:59 -0500 Subject: [PATCH 3/6] docs: update LangChain README example to use track_metrics_of_async --- packages/ai-providers/server-ai-langchain/README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/packages/ai-providers/server-ai-langchain/README.md b/packages/ai-providers/server-ai-langchain/README.md index a58dc00..be7125e 100644 --- a/packages/ai-providers/server-ai-langchain/README.md +++ b/packages/ai-providers/server-ai-langchain/README.md @@ -138,7 +138,7 @@ provider = await LangChainProvider.create(config) async def invoke(): return await provider.invoke_model(messages) -response = await config.tracker.track_metrics_of( +response = await config.tracker.track_metrics_of_async( invoke, lambda r: r.metrics ) From 91cd3005b6caf0ef360f461e5016b70b01292dee Mon Sep 17 00:00:00 2001 From: jsonbailey Date: Thu, 26 Mar 2026 12:55:46 -0500 Subject: [PATCH 4/6] fix: mutate track data dict in place and guard tool_calls with isinstance check Co-Authored-By: Claude Sonnet 4.6 --- .../server-ai-langchain/src/ldai_langchain/langchain_helper.py | 2 +- packages/sdk/server-ai/src/ldai/tracker.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/packages/ai-providers/server-ai-langchain/src/ldai_langchain/langchain_helper.py b/packages/ai-providers/server-ai-langchain/src/ldai_langchain/langchain_helper.py index 35eb396..e160061 100644 --- a/packages/ai-providers/server-ai-langchain/src/ldai_langchain/langchain_helper.py +++ b/packages/ai-providers/server-ai-langchain/src/ldai_langchain/langchain_helper.py @@ -125,7 +125,7 @@ def get_tool_calls_from_response(response: Any) -> List[str]: :return: List of tool names in order, or empty list if none """ names: List[str] = [] - if hasattr(response, 'tool_calls') and response.tool_calls: + if hasattr(response, 'tool_calls') and isinstance(response.tool_calls, list): for tc in response.tool_calls: n = tc.get('name') if n: diff --git a/packages/sdk/server-ai/src/ldai/tracker.py b/packages/sdk/server-ai/src/ldai/tracker.py index 3accc1f..6fbc46d 100644 --- a/packages/sdk/server-ai/src/ldai/tracker.py +++ b/packages/sdk/server-ai/src/ldai/tracker.py @@ -113,7 +113,7 @@ def __get_track_data(self, graph_key: Optional[str] = None) -> dict: "providerName": self._provider_name, } if graph_key is not None: - return {**data, "graphKey": graph_key} + data['graphKey'] = graph_key return data def track_duration(self, duration: int, *, graph_key: Optional[str] = None) -> None: From d331775a269e1dfa48f446a5d60f819428c0430d Mon Sep 17 00:00:00 2001 From: jsonbailey Date: Thu, 26 Mar 2026 12:59:08 -0500 Subject: [PATCH 5/6] fix: export get_tool_calls_from_response and sum_token_usage_from_messages; add tests Co-Authored-By: Claude Sonnet 4.6 --- .../src/ldai_langchain/__init__.py | 4 + .../tests/test_langchain_provider.py | 86 ++++++++++++++++++- 2 files changed, 89 insertions(+), 1 deletion(-) diff --git a/packages/ai-providers/server-ai-langchain/src/ldai_langchain/__init__.py b/packages/ai-providers/server-ai-langchain/src/ldai_langchain/__init__.py index 2b88026..cb455e5 100644 --- a/packages/ai-providers/server-ai-langchain/src/ldai_langchain/__init__.py +++ b/packages/ai-providers/server-ai-langchain/src/ldai_langchain/__init__.py @@ -3,7 +3,9 @@ create_langchain_model, get_ai_metrics_from_response, get_ai_usage_from_response, + get_tool_calls_from_response, map_provider, + sum_token_usage_from_messages, ) from ldai_langchain.langchain_model_runner import LangChainModelRunner from ldai_langchain.langchain_runner_factory import LangChainRunnerFactory @@ -18,5 +20,7 @@ 'create_langchain_model', 'get_ai_metrics_from_response', 'get_ai_usage_from_response', + 'get_tool_calls_from_response', 'map_provider', + 'sum_token_usage_from_messages', ] diff --git a/packages/ai-providers/server-ai-langchain/tests/test_langchain_provider.py b/packages/ai-providers/server-ai-langchain/tests/test_langchain_provider.py index b78fde8..9ce4e88 100644 --- a/packages/ai-providers/server-ai-langchain/tests/test_langchain_provider.py +++ b/packages/ai-providers/server-ai-langchain/tests/test_langchain_provider.py @@ -7,7 +7,15 @@ from ldai import LDMessage -from ldai_langchain import LangChainModelRunner, LangChainRunnerFactory, convert_messages_to_langchain, get_ai_metrics_from_response, map_provider +from ldai_langchain import ( + LangChainModelRunner, + LangChainRunnerFactory, + convert_messages_to_langchain, + get_ai_metrics_from_response, + get_tool_calls_from_response, + map_provider, + sum_token_usage_from_messages, +) class TestConvertMessages: @@ -237,6 +245,82 @@ async def test_returns_success_false_when_structured_model_invocation_throws_err assert result.metrics.usage is None +class TestGetToolCallsFromResponse: + """Tests for get_tool_calls_from_response.""" + + def test_returns_tool_call_names_in_order(self): + """Should return tool call names from response.tool_calls.""" + mock_response = MagicMock() + mock_response.tool_calls = [ + {'name': 'search', 'args': {}}, + {'name': 'calculator', 'args': {}}, + ] + assert get_tool_calls_from_response(mock_response) == ['search', 'calculator'] + + def test_returns_empty_list_when_tool_calls_is_empty(self): + """Should return empty list when tool_calls is an empty list.""" + mock_response = MagicMock() + mock_response.tool_calls = [] + assert get_tool_calls_from_response(mock_response) == [] + + def test_returns_empty_list_when_no_tool_calls_attribute(self): + """Should return empty list when response has no tool_calls attribute.""" + mock_response = MagicMock(spec=[]) + assert get_tool_calls_from_response(mock_response) == [] + + def test_returns_empty_list_when_tool_calls_is_not_a_list(self): + """Should return empty list when tool_calls is not a list.""" + mock_response = MagicMock() + mock_response.tool_calls = 'not-a-list' + assert get_tool_calls_from_response(mock_response) == [] + + def test_skips_tool_calls_without_name(self): + """Should skip tool calls that have no name.""" + mock_response = MagicMock() + mock_response.tool_calls = [{'args': {}}, {'name': 'search', 'args': {}}] + assert get_tool_calls_from_response(mock_response) == ['search'] + + +class TestSumTokenUsageFromMessages: + """Tests for sum_token_usage_from_messages.""" + + def test_sums_usage_across_messages(self): + """Should sum token usage from all messages.""" + msg1 = AIMessage(content='a') + msg1.usage_metadata = {'total_tokens': 10, 'input_tokens': 6, 'output_tokens': 4} + msg2 = AIMessage(content='b') + msg2.usage_metadata = {'total_tokens': 20, 'input_tokens': 12, 'output_tokens': 8} + + result = sum_token_usage_from_messages([msg1, msg2]) + + assert result is not None + assert result.total == 30 + assert result.input == 18 + assert result.output == 12 + + def test_returns_none_when_no_usage_on_any_message(self): + """Should return None when no message has usage metadata.""" + msg = AIMessage(content='hello') + assert sum_token_usage_from_messages([msg]) is None + + def test_returns_none_for_empty_list(self): + """Should return None for an empty message list.""" + assert sum_token_usage_from_messages([]) is None + + def test_skips_messages_without_usage(self): + """Should skip messages that have no usage and sum the rest.""" + msg1 = AIMessage(content='a') + msg2 = AIMessage(content='b') + msg2.usage_metadata = {'total_tokens': 5, 'input_tokens': 3, 'output_tokens': 2} + + result = sum_token_usage_from_messages([msg1, msg2]) + + assert result is not None + assert result.total == 5 + assert result.input == 3 + assert result.output == 2 + + class TestGetLlm: """Tests for LangChainModelRunner.get_llm.""" From cbcb5aea139bf6d3bdf63b3aa377c773d2b472c0 Mon Sep 17 00:00:00 2001 From: jsonbailey Date: Thu, 26 Mar 2026 13:16:12 -0500 Subject: [PATCH 6/6] fix: wrap long docstring line in tracker.py Co-Authored-By: Claude Sonnet 4.6 --- packages/sdk/server-ai/src/ldai/tracker.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/packages/sdk/server-ai/src/ldai/tracker.py b/packages/sdk/server-ai/src/ldai/tracker.py index 6fbc46d..c84365a 100644 --- a/packages/sdk/server-ai/src/ldai/tracker.py +++ b/packages/sdk/server-ai/src/ldai/tracker.py @@ -121,7 +121,8 @@ def track_duration(self, duration: int, *, graph_key: Optional[str] = None) -> N Manually track the duration of an AI operation. :param duration: Duration in milliseconds. - :param graph_key: When set, include ``graphKey`` in the event payload (e.g. config-level metrics inside a graph). + :param graph_key: When set, include ``graphKey`` in the event payload + (e.g. config-level metrics inside a graph). """ self._summary._duration = duration self._ld_client.track(