From d9970094ca151796dce0907ce6c7d288e2b451e4 Mon Sep 17 00:00:00 2001 From: Ivana Kellyer Date: Thu, 7 May 2026 13:39:39 +0200 Subject: [PATCH 01/11] feat(span-first): Add before_send_span --- sentry_sdk/_types.py | 15 +++++++++++++++ sentry_sdk/client.py | 2 ++ sentry_sdk/consts.py | 4 ++++ sentry_sdk/utils.py | 9 +++++++++ 4 files changed, 30 insertions(+) diff --git a/sentry_sdk/_types.py b/sentry_sdk/_types.py index ad3fa35849..9ee1fe4270 100644 --- a/sentry_sdk/_types.py +++ b/sentry_sdk/_types.py @@ -317,6 +317,21 @@ class SDKInfo(TypedDict): MetricProcessor = Callable[[Metric, Hint], Optional[Metric]] + SpanSnapshot = TypedDict( + "SpanSnapshot", + { + "trace_id": str, + "span_id": str, + "name": str, + "status": str, + "is_segment": bool, + "start_timestamp": float, + "end_timestamp": float, + "parent_span_id": Optional[str], + "attributes": Attributes, + }, + ) + # TODO: Make a proper type definition for this (PRs welcome!) Breadcrumb = Dict[str, Any] diff --git a/sentry_sdk/client.py b/sentry_sdk/client.py index 9f795d2489..251cc8c9bb 100644 --- a/sentry_sdk/client.py +++ b/sentry_sdk/client.py @@ -949,6 +949,8 @@ def _capture_telemetry( before_send = get_before_send_log(self.options) elif ty == "metric": before_send = get_before_send_metric(self.options) # type: ignore + elif ty == "span": + before_send = get_before_send_span(self.options) # type: ignore if before_send is not None: telemetry = before_send(telemetry, {}) # type: ignore diff --git a/sentry_sdk/consts.py b/sentry_sdk/consts.py index c35da0e22a..cdb118ecdd 100644 --- a/sentry_sdk/consts.py +++ b/sentry_sdk/consts.py @@ -46,6 +46,7 @@ class CompressionAlgo(Enum): from typing_extensions import Literal, TypedDict import sentry_sdk + from sentry_sdk.traces import StreamedSpan from sentry_sdk._types import ( BreadcrumbProcessor, ContinuousProfilerMode, @@ -85,6 +86,9 @@ class CompressionAlgo(Enum): "before_send_metric": Optional[Callable[[Metric, Hint], Optional[Metric]]], "trace_lifecycle": Optional[Literal["static", "stream"]], "ignore_spans": Optional[IgnoreSpansConfig], + "before_send_span": Optional[ + Callable[[StreamedSpan, Hint], Optional[StreamedSpan]] + ], "suppress_asgi_chained_exceptions": Optional[bool], }, total=False, diff --git a/sentry_sdk/utils.py b/sentry_sdk/utils.py index 5051a3d9d2..33c0b3ea06 100644 --- a/sentry_sdk/utils.py +++ b/sentry_sdk/utils.py @@ -2111,6 +2111,15 @@ def get_before_send_metric( ) +def get_before_send_span( + options: "Optional[dict[str, Any]]", +) -> "Optional[Callable[[Metric, Hint], Optional[StreamedSpan]]]": + if options is None: + return None + + return options["_experiments"].get("before_send_span") + + def format_attribute(val: "Any") -> "AttributeValue": """ Turn unsupported attribute value types into an AttributeValue. From 58caff952d5b0c4add7e76a104c2bc0d82b1a0c9 Mon Sep 17 00:00:00 2001 From: Ivana Kellyer Date: Thu, 7 May 2026 16:24:29 +0200 Subject: [PATCH 02/11] . --- sentry_sdk/traces.py | 11 ------ tests/tracing/test_span_streaming.py | 54 ++++++++++++++++++++++++++++ 2 files changed, 54 insertions(+), 11 deletions(-) diff --git a/sentry_sdk/traces.py b/sentry_sdk/traces.py index 4f96a48920..7336c92156 100644 --- a/sentry_sdk/traces.py +++ b/sentry_sdk/traces.py @@ -573,17 +573,6 @@ def _set_segment_attributes(self) -> None: self.set_attribute("process.command_args", sys.argv) - def _to_dict(self) -> SpanSnapshot: - res = { - "trace_id": self.trace_id, - "span_id": self.span_id, - "name": self._name if self._name is not None else "", - "status": self._status, - "is_segment": self._is_segment(), - "start_timestamp": self._start_timestamp.timestamp(), - "attributes": self.get_attributes(), - } - if self._timestamp: res["end_timestamp"] = self._timestamp.timestamp() diff --git a/tests/tracing/test_span_streaming.py b/tests/tracing/test_span_streaming.py index 0e095b5147..0b818b1f43 100644 --- a/tests/tracing/test_span_streaming.py +++ b/tests/tracing/test_span_streaming.py @@ -270,6 +270,60 @@ def traces_sampler(sampling_context): with sentry_sdk.traces.start_span(name="span", attributes={"first": False}): ... +def test_before_send_span(sentry_init, capture_items): + def before_send_span(span, hint): + return None + + sentry_init( + _experiments={ + "before_send_span": before_send_span. + "trace_lifecycle": "stream", + }, + ) + + items = capture_items("span") + + with sentry_sdk.traces.start_span(name="dropped", attributes={"drop": True}): + ... + with sentry_sdk.traces.start_span(name="retained", attributes={"drop": False}): + ... + + sentry_sdk.get_client().flush() + spans = [item.payload for item in items] + + assert len(spans) == 1 + (span,) = spans + + assert span["name"] == "retained" + assert span["attributes"]["drop"] is False + +def test_before_send_span_invalid_return_value(sentry_init, capture_items): + def before_send_span(span, hint): + # Spans can't be dropped in before_send_span + return None + + sentry_init( + _experiments={ + "before_send_span": before_send_span. + "trace_lifecycle": "stream", + }, + ) + + items = capture_items("span") + + with sentry_sdk.traces.start_span(name="dropped", attributes={"drop": True}): + ... + with sentry_sdk.traces.start_span(name="retained", attributes={"drop": False}): + ... + + sentry_sdk.get_client().flush() + spans = [item.payload for item in items] + + assert len(spans) == 1 + (span,) = spans + + assert span["name"] == "retained" + assert span["attributes"]["drop"] is False def test_span_attributes(sentry_init, capture_items): sentry_init( From 3af5562fa0da5aac3618db9a4c7372915b653d48 Mon Sep 17 00:00:00 2001 From: Ivana Kellyer Date: Fri, 8 May 2026 10:30:02 +0200 Subject: [PATCH 03/11] . --- tests/tracing/test_span_streaming.py | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/tests/tracing/test_span_streaming.py b/tests/tracing/test_span_streaming.py index 0b818b1f43..b9a136b4e6 100644 --- a/tests/tracing/test_span_streaming.py +++ b/tests/tracing/test_span_streaming.py @@ -270,13 +270,14 @@ def traces_sampler(sampling_context): with sentry_sdk.traces.start_span(name="span", attributes={"first": False}): ... + def test_before_send_span(sentry_init, capture_items): def before_send_span(span, hint): - return None + span.set_attribute("", "") sentry_init( _experiments={ - "before_send_span": before_send_span. + "before_send_span": before_send_span, "trace_lifecycle": "stream", }, ) @@ -297,6 +298,7 @@ def before_send_span(span, hint): assert span["name"] == "retained" assert span["attributes"]["drop"] is False + def test_before_send_span_invalid_return_value(sentry_init, capture_items): def before_send_span(span, hint): # Spans can't be dropped in before_send_span @@ -304,7 +306,7 @@ def before_send_span(span, hint): sentry_init( _experiments={ - "before_send_span": before_send_span. + "before_send_span": before_send_span, "trace_lifecycle": "stream", }, ) @@ -325,6 +327,7 @@ def before_send_span(span, hint): assert span["name"] == "retained" assert span["attributes"]["drop"] is False + def test_span_attributes(sentry_init, capture_items): sentry_init( traces_sample_rate=1.0, From 4339f3fbab292e871cfcccc3469efe677230c02f Mon Sep 17 00:00:00 2001 From: Ivana Kellyer Date: Fri, 8 May 2026 15:07:52 +0200 Subject: [PATCH 04/11] feat(span-first): Support before_send_span --- sentry_sdk/client.py | 43 +++++++++++-- sentry_sdk/traces.py | 8 --- sentry_sdk/utils.py | 3 +- tests/tracing/test_span_streaming.py | 96 ++++++++++++++++++++++++---- 4 files changed, 123 insertions(+), 27 deletions(-) diff --git a/sentry_sdk/client.py b/sentry_sdk/client.py index 3b50d71868..98a5800970 100644 --- a/sentry_sdk/client.py +++ b/sentry_sdk/client.py @@ -3,6 +3,7 @@ import random import socket from collections.abc import Mapping +from copy import deepcopy from datetime import datetime, timezone from importlib import import_module from typing import TYPE_CHECKING, List, Dict, cast, overload @@ -25,10 +26,12 @@ logger, get_before_send_log, get_before_send_metric, + get_before_send_span, has_logs_enabled, has_metrics_enabled, ) from sentry_sdk.serializer import serialize +from sentry_sdk.traces import StreamedSpan from sentry_sdk.tracing import trace from sentry_sdk.tracing_utils import has_span_streaming_enabled from sentry_sdk.transport import ( @@ -71,7 +74,6 @@ from sentry_sdk.scope import Scope from sentry_sdk.session import Session from sentry_sdk.spotlight import SpotlightClient - from sentry_sdk.traces import StreamedSpan from sentry_sdk.transport import Transport, Item from sentry_sdk._log_batcher import LogBatcher from sentry_sdk._metrics_batcher import MetricsBatcher @@ -938,25 +940,54 @@ def _capture_telemetry( ty: str, scope: "Scope", ) -> None: - # Capture attributes-based telemetry (logs, metrics, spansV2) + """ + Capture attributes-based telemetry (logs, metrics, streamed spans). + + Apply any attributes set on the scope to it, and run the user's + before_send_{telemetry} on it, if applicable. + """ if telemetry is None: return scope.apply_to_telemetry(telemetry) before_send = None + if ty == "log": before_send = get_before_send_log(self.options) + snapshot = telemetry + elif ty == "metric": before_send = get_before_send_metric(self.options) + snapshot = telemetry + elif ty == "span": before_send = get_before_send_span(self.options) + # We don't want to expose the actual underlying span in + # before_send_span to not allow arbitrary edits. Expose a copy + # instead. + snapshot = deepcopy(telemetry) if before_send is not None: - telemetry = before_send(telemetry, {}) - - if telemetry is None: - return + result = before_send(snapshot, {}) + + # Logs and metrics can be dropped in their respective + # before_send, so if we get None, don't queue them for sending. + if ty in ("log", "metric"): + if result is None: + return + + # Spans can't be dropped in before_send_span by design. They can + # be altered though (name and attributes can be changed, e.g. to + # sanitize). + # + # If we get anything but a StreamedSpan back from before_send_span, + # just ignore it. Otherwise, take the returned StreamedSpan and + # merge it with the original. + elif ty == "span": + if isinstance(result, StreamedSpan): + telemetry._attributes = result._attributes + telemetry._name = result._name batcher = None if ty == "log": diff --git a/sentry_sdk/traces.py b/sentry_sdk/traces.py index 0b3f0821da..f49760f03b 100644 --- a/sentry_sdk/traces.py +++ b/sentry_sdk/traces.py @@ -574,14 +574,6 @@ def _set_segment_attributes(self) -> None: self.set_attribute("process.command_args", sys.argv) - if self._timestamp: - res["end_timestamp"] = self._timestamp.timestamp() - - if self._parent_span_id: - res["parent_span_id"] = self._parent_span_id - - return res - class NoOpStreamedSpan(StreamedSpan): __slots__ = ( diff --git a/sentry_sdk/utils.py b/sentry_sdk/utils.py index 33c0b3ea06..76f1919e98 100644 --- a/sentry_sdk/utils.py +++ b/sentry_sdk/utils.py @@ -77,6 +77,7 @@ Metric, SerializedAttributeValue, ) + from sentry_sdk.traces import StreamedSpan P = ParamSpec("P") R = TypeVar("R") @@ -2113,7 +2114,7 @@ def get_before_send_metric( def get_before_send_span( options: "Optional[dict[str, Any]]", -) -> "Optional[Callable[[Metric, Hint], Optional[StreamedSpan]]]": +) -> "Optional[Callable[[StreamedSpan, Hint], Optional[StreamedSpan]]]": if options is None: return None diff --git a/tests/tracing/test_span_streaming.py b/tests/tracing/test_span_streaming.py index b9a136b4e6..4e876b7527 100644 --- a/tests/tracing/test_span_streaming.py +++ b/tests/tracing/test_span_streaming.py @@ -271,11 +271,19 @@ def traces_sampler(sampling_context): ... -def test_before_send_span(sentry_init, capture_items): +def test_before_send_span_basic(sentry_init, capture_items): def before_send_span(span, hint): - span.set_attribute("", "") + assert isinstance(span, StreamedSpan) + + span.name = "Better span name" + span.remove_attribute("drop") + span.set_attribute("sanitize", "[Removed]") + span.set_attribute("add", "new") + + return span sentry_init( + traces_sample_rate=1.0, _experiments={ "before_send_span": before_send_span, "trace_lifecycle": "stream", @@ -284,9 +292,13 @@ def before_send_span(span, hint): items = capture_items("span") - with sentry_sdk.traces.start_span(name="dropped", attributes={"drop": True}): - ... - with sentry_sdk.traces.start_span(name="retained", attributes={"drop": False}): + with sentry_sdk.traces.start_span( + name="span", + attributes={ + "drop": True, + "sanitize": "myamazingpassword", + }, + ): ... sentry_sdk.get_client().flush() @@ -295,16 +307,20 @@ def before_send_span(span, hint): assert len(spans) == 1 (span,) = spans - assert span["name"] == "retained" - assert span["attributes"]["drop"] is False + assert span["name"] == "Better span name" + assert "drop" not in span["attributes"] + assert span["attributes"]["sanitize"] == "[Removed]" + assert span["attributes"]["add"] == "new" def test_before_send_span_invalid_return_value(sentry_init, capture_items): def before_send_span(span, hint): - # Spans can't be dropped in before_send_span + # Spans can't be dropped in before_send_span, so unsupported return + # values will be ignored return None sentry_init( + traces_sample_rate=1.0, _experiments={ "before_send_span": before_send_span, "trace_lifecycle": "stream", @@ -313,9 +329,34 @@ def before_send_span(span, hint): items = capture_items("span") - with sentry_sdk.traces.start_span(name="dropped", attributes={"drop": True}): + with sentry_sdk.traces.start_span(name="span"): ... - with sentry_sdk.traces.start_span(name="retained", attributes={"drop": False}): + + sentry_sdk.get_client().flush() + spans = [item.payload for item in items] + + assert len(spans) == 1 + (span,) = spans + + assert span["name"] == "span" + + +def test_before_send_span_unsupported_edit(sentry_init, capture_items): + def before_send_span(span, hint): + # Anything beyond attribute and name changes will be ignored + span._trace_id = "my-trace-id" + + sentry_init( + traces_sample_rate=1.0, + _experiments={ + "before_send_span": before_send_span, + "trace_lifecycle": "stream", + }, + ) + + items = capture_items("span") + + with sentry_sdk.traces.start_span(name="span"): ... sentry_sdk.get_client().flush() @@ -324,8 +365,39 @@ def before_send_span(span, hint): assert len(spans) == 1 (span,) = spans - assert span["name"] == "retained" - assert span["attributes"]["drop"] is False + assert span["name"] == "span" + assert span["trace_id"] != "my-trace-id" + + +def test_before_send_span_doesnt_receive_ignored_spans(sentry_init, capture_items): + before_send_span_called = False + + def before_send_span(span, hint): + nonlocal before_send_span_called + before_send_span_called = True + return span + + sentry_init( + traces_sample_rate=1.0, + _experiments={ + "before_send_span": before_send_span, + "trace_lifecycle": "stream", + "ignore_spans": [ + "ignored", + ], + }, + ) + + items = capture_items("span") + + with sentry_sdk.traces.start_span(name="ignored"): + ... + + sentry_sdk.get_client().flush() + spans = [item.payload for item in items] + + assert not spans + assert not before_send_span_called def test_span_attributes(sentry_init, capture_items): From 0c165af5b542256a41b1dc23691518f2aae1fb0c Mon Sep 17 00:00:00 2001 From: Ivana Kellyer Date: Fri, 8 May 2026 15:08:29 +0200 Subject: [PATCH 05/11] remove unused type --- sentry_sdk/_types.py | 15 --------------- 1 file changed, 15 deletions(-) diff --git a/sentry_sdk/_types.py b/sentry_sdk/_types.py index 9ee1fe4270..ad3fa35849 100644 --- a/sentry_sdk/_types.py +++ b/sentry_sdk/_types.py @@ -317,21 +317,6 @@ class SDKInfo(TypedDict): MetricProcessor = Callable[[Metric, Hint], Optional[Metric]] - SpanSnapshot = TypedDict( - "SpanSnapshot", - { - "trace_id": str, - "span_id": str, - "name": str, - "status": str, - "is_segment": bool, - "start_timestamp": float, - "end_timestamp": float, - "parent_span_id": Optional[str], - "attributes": Attributes, - }, - ) - # TODO: Make a proper type definition for this (PRs welcome!) Breadcrumb = Dict[str, Any] From 22438326df9086b167d8faaf8ed64b6b5e72d007 Mon Sep 17 00:00:00 2001 From: Ivana Kellyer Date: Mon, 11 May 2026 12:55:20 +0200 Subject: [PATCH 06/11] Rework like in js --- sentry_sdk/_span_batcher.py | 50 +++++++++++----------------- sentry_sdk/_types.py | 15 +++++++++ sentry_sdk/client.py | 37 ++++++++++---------- sentry_sdk/consts.py | 4 +-- sentry_sdk/traces.py | 22 +++++++++++- tests/tracing/test_span_streaming.py | 24 ++++++++----- 6 files changed, 92 insertions(+), 60 deletions(-) diff --git a/sentry_sdk/_span_batcher.py b/sentry_sdk/_span_batcher.py index 275462b21c..8579ae770a 100644 --- a/sentry_sdk/_span_batcher.py +++ b/sentry_sdk/_span_batcher.py @@ -13,10 +13,10 @@ if TYPE_CHECKING: from typing import Any, Callable, Optional - from sentry_sdk.traces import StreamedSpan + from sentry_sdk._types import SpanJSON -class SpanBatcher(Batcher["StreamedSpan"]): +class SpanBatcher(Batcher["SpanJSON"]): # MAX_BEFORE_FLUSH should be lower than MAX_BEFORE_DROP, so that there is # a bit of a buffer for spans that appear between the trigger to flush # and actually flushing the buffer. @@ -42,7 +42,7 @@ def __init__( # by trace_id, so that we can then send the buckets each in its own # envelope. # trace_id -> span buffer - self._span_buffer: dict[str, list["StreamedSpan"]] = defaultdict(list) + self._span_buffer: dict[str, list["SpanJSON"]] = defaultdict(list) self._running_size: dict[str, int] = defaultdict(lambda: 0) self._capture_func = capture_func self._record_lost_func = record_lost_func @@ -99,7 +99,7 @@ def _flush_loop(self) -> None: self._flush() self._last_full_flush = time.monotonic() - def add(self, span: "StreamedSpan") -> None: + def add(self, span: "SpanJSON") -> None: # Bail out if the current thread is already executing batcher code. # This prevents deadlocks when code running inside the batcher (e.g. # _add_to_envelope during flush, or _flush_event.wait/set) triggers @@ -115,7 +115,7 @@ def add(self, span: "StreamedSpan") -> None: return None with self._lock: - size = len(self._span_buffer[span.trace_id]) + size = len(self._span_buffer[span["trace_id"]]) if size >= self.MAX_BEFORE_DROP: self._record_lost_func( reason="queue_overflow", @@ -124,14 +124,15 @@ def add(self, span: "StreamedSpan") -> None: ) return None - self._span_buffer[span.trace_id].append(span) - self._running_size[span.trace_id] += self._estimate_size(span) + self._span_buffer[span["trace_id"]].append(span) + self._running_size[span["trace_id"]] += self._estimate_size(span) if ( size + 1 >= self.MAX_BEFORE_FLUSH - or self._running_size[span.trace_id] >= self.MAX_BYTES_BEFORE_FLUSH + or self._running_size[span["trace_id"]] + >= self.MAX_BYTES_BEFORE_FLUSH ): - self._pending_flush.add(span.trace_id) + self._pending_flush.add(span["trace_id"]) notify = True else: notify = False @@ -142,12 +143,12 @@ def add(self, span: "StreamedSpan") -> None: self._active.flag = False @staticmethod - def _estimate_size(item: "StreamedSpan") -> int: + def _estimate_size(item: "SpanJSON") -> int: # Rough estimate of serialized span size that's quick to compute. # 210 is the rough size of the payload without attributes, and then we # estimate the attributes separately. estimate = 210 - for value in item._attributes.values(): + for value in item["attributes"].values(): estimate += 50 if isinstance(value, str): @@ -158,26 +159,15 @@ def _estimate_size(item: "StreamedSpan") -> int: return estimate @staticmethod - def _to_transport_format(item: "StreamedSpan") -> "Any": - res: "dict[str, Any]" = { - "trace_id": item.trace_id, - "span_id": item.span_id, - "name": item._name if item._name is not None else "", - "status": item._status, - "is_segment": item._is_segment(), - "start_timestamp": item._start_timestamp.timestamp(), - } - - if item._end_timestamp: - res["end_timestamp"] = item._end_timestamp.timestamp() - - if item._parent_span_id: - res["parent_span_id"] = item._parent_span_id - - if item._attributes: + def _to_transport_format(item: "SpanJSON") -> "Any": + res = {k: v for k, v in item.items() if k not in ("_segment_span",)} + + if item.get("attributes"): res["attributes"] = { - k: serialize_attribute(v) for (k, v) in item._attributes.items() + k: serialize_attribute(v) for (k, v) in item["attributes"].items() } + else: + del res["attributes"] return res @@ -201,7 +191,7 @@ def _flush(self, only_pending: bool = False) -> None: if not spans: continue - dsc = spans[0]._dynamic_sampling_context() + dsc = spans[0]["_segment_span"]._dynamic_sampling_context() # Max per envelope is 1000, so if we happen to have more than # 1000 spans in one bucket, we'll need to separate them. diff --git a/sentry_sdk/_types.py b/sentry_sdk/_types.py index ad3fa35849..16bbf4803d 100644 --- a/sentry_sdk/_types.py +++ b/sentry_sdk/_types.py @@ -317,6 +317,21 @@ class SDKInfo(TypedDict): MetricProcessor = Callable[[Metric, Hint], Optional[Metric]] + SpanJSON = TypedDict( + "SpanJSON", + { + "trace_id": str, + "span_id": str, + "parent_span_id": NotRequired[str], + "name": str, + "status": str, + "is_segment": bool, + "start_timestamp": float, + "end_timestamp": NotRequired[float], + "attributes": NotRequired[Attributes], + }, + ) + # TODO: Make a proper type definition for this (PRs welcome!) Breadcrumb = Dict[str, Any] diff --git a/sentry_sdk/client.py b/sentry_sdk/client.py index 98a5800970..9f06d4f900 100644 --- a/sentry_sdk/client.py +++ b/sentry_sdk/client.py @@ -3,7 +3,6 @@ import random import socket from collections.abc import Mapping -from copy import deepcopy from datetime import datetime, timezone from importlib import import_module from typing import TYPE_CHECKING, List, Dict, cast, overload @@ -955,50 +954,50 @@ def _capture_telemetry( if ty == "log": before_send = get_before_send_log(self.options) - snapshot = telemetry + serialized = telemetry elif ty == "metric": before_send = get_before_send_metric(self.options) - snapshot = telemetry + serialized = telemetry elif ty == "span": before_send = get_before_send_span(self.options) - # We don't want to expose the actual underlying span in - # before_send_span to not allow arbitrary edits. Expose a copy - # instead. - snapshot = deepcopy(telemetry) + serialized = telemetry._to_json() if before_send is not None: - result = before_send(snapshot, {}) + serialized = before_send(serialized, {}) # Logs and metrics can be dropped in their respective # before_send, so if we get None, don't queue them for sending. if ty in ("log", "metric"): - if result is None: + if serialized is None: return # Spans can't be dropped in before_send_span by design. They can - # be altered though (name and attributes can be changed, e.g. to - # sanitize). - # - # If we get anything but a StreamedSpan back from before_send_span, - # just ignore it. Otherwise, take the returned StreamedSpan and - # merge it with the original. + # be altered though (e.g. to sanitize). elif ty == "span": - if isinstance(result, StreamedSpan): - telemetry._attributes = result._attributes - telemetry._name = result._name + if isinstance(serialized, dict) and serialized: + # TODO[ivana]: Figure out the merging/validation here + pass + else: + serialized = telemetry._to_json() + logger.debug( + "[Tracing] Invalid return value from before_send_span. Using original span." + ) batcher = None if ty == "log": batcher = self.log_batcher + elif ty == "metric": batcher = self.metrics_batcher + elif ty == "span": + serialized["_segment_span"] = telemetry._segment batcher = self.span_batcher if batcher is not None: - batcher.add(telemetry) # type: ignore + batcher.add(serialized) # type: ignore def _capture_log(self, log: "Optional[Log]", scope: "Scope") -> None: self._capture_telemetry(log, "log", scope) diff --git a/sentry_sdk/consts.py b/sentry_sdk/consts.py index 09dda88566..c81581132f 100644 --- a/sentry_sdk/consts.py +++ b/sentry_sdk/consts.py @@ -46,7 +46,6 @@ class CompressionAlgo(Enum): from typing_extensions import Literal, TypedDict import sentry_sdk - from sentry_sdk.traces import StreamedSpan from sentry_sdk._types import ( BreadcrumbProcessor, ContinuousProfilerMode, @@ -57,6 +56,7 @@ class CompressionAlgo(Enum): Log, Metric, ProfilerMode, + SpanJSON, TracesSampler, TransactionProcessor, ) @@ -87,7 +87,7 @@ class CompressionAlgo(Enum): "trace_lifecycle": Optional[Literal["static", "stream"]], "ignore_spans": Optional[IgnoreSpansConfig], "before_send_span": Optional[ - Callable[[StreamedSpan, Hint], Optional[StreamedSpan]] + Callable[[SpanJSON, Hint], Optional[SpanJSON]] ], "suppress_asgi_chained_exceptions": Optional[bool], }, diff --git a/sentry_sdk/traces.py b/sentry_sdk/traces.py index f49760f03b..c4b11cd0ee 100644 --- a/sentry_sdk/traces.py +++ b/sentry_sdk/traces.py @@ -43,7 +43,7 @@ Union, ) - from sentry_sdk._types import Attributes, AttributeValue + from sentry_sdk._types import Attributes, AttributeValue, SpanJSON from sentry_sdk.profiler.continuous_profiler import ContinuousProfile P = ParamSpec("P") @@ -574,6 +574,26 @@ def _set_segment_attributes(self) -> None: self.set_attribute("process.command_args", sys.argv) + def _to_json(self) -> "SpanJSON": + res = { + "trace_id": self.trace_id, + "span_id": self.span_id, + "name": self._name if self._name is not None else "", + "status": self._status, + "is_segment": self._is_segment(), + "start_timestamp": self._start_timestamp.timestamp(), + } + + if self._end_timestamp: + res["end_timestamp"] = self._end_timestamp.timestamp() + + if self._parent_span_id: + res["parent_span_id"] = self._parent_span_id + + res["attributes"] = {k: v for k, v in self._attributes.items()} + + return res + class NoOpStreamedSpan(StreamedSpan): __slots__ = ( diff --git a/tests/tracing/test_span_streaming.py b/tests/tracing/test_span_streaming.py index 4e876b7527..b5cb001745 100644 --- a/tests/tracing/test_span_streaming.py +++ b/tests/tracing/test_span_streaming.py @@ -273,12 +273,12 @@ def traces_sampler(sampling_context): def test_before_send_span_basic(sentry_init, capture_items): def before_send_span(span, hint): - assert isinstance(span, StreamedSpan) + assert isinstance(span, dict) - span.name = "Better span name" - span.remove_attribute("drop") - span.set_attribute("sanitize", "[Removed]") - span.set_attribute("add", "new") + span["name"] = "Better span name" + del span["attributes"]["drop"] + span["attributes"]["sanitize"] = "[Removed]" + span["attributes"]["add"] = "new" return span @@ -313,11 +313,17 @@ def before_send_span(span, hint): assert span["attributes"]["add"] == "new" -def test_before_send_span_invalid_return_value(sentry_init, capture_items): +@pytest.mark.parametrize( + "return_value", + [None, {}, {"not_a_span": True}], +) +def test_before_send_span_invalid_return_value( + sentry_init, capture_items, return_value +): def before_send_span(span, hint): # Spans can't be dropped in before_send_span, so unsupported return # values will be ignored - return None + return return_value sentry_init( traces_sample_rate=1.0, @@ -344,7 +350,9 @@ def before_send_span(span, hint): def test_before_send_span_unsupported_edit(sentry_init, capture_items): def before_send_span(span, hint): # Anything beyond attribute and name changes will be ignored - span._trace_id = "my-trace-id" + span["trace_id"] = "my-trace-id" + + return span sentry_init( traces_sample_rate=1.0, From 4c264394c95c67469e19d8bfc04df1b6915601b3 Mon Sep 17 00:00:00 2001 From: Ivana Kellyer Date: Mon, 11 May 2026 15:05:47 +0200 Subject: [PATCH 07/11] . --- sentry_sdk/_span_batcher.py | 2 +- sentry_sdk/client.py | 24 +++++++++++++++++------- tests/tracing/test_span_batcher.py | 2 +- 3 files changed, 19 insertions(+), 9 deletions(-) diff --git a/sentry_sdk/_span_batcher.py b/sentry_sdk/_span_batcher.py index 8579ae770a..096194d353 100644 --- a/sentry_sdk/_span_batcher.py +++ b/sentry_sdk/_span_batcher.py @@ -148,7 +148,7 @@ def _estimate_size(item: "SpanJSON") -> int: # 210 is the rough size of the payload without attributes, and then we # estimate the attributes separately. estimate = 210 - for value in item["attributes"].values(): + for value in (item.get("attributes") or {}).values(): estimate += 50 if isinstance(value, str): diff --git a/sentry_sdk/client.py b/sentry_sdk/client.py index 9f06d4f900..e2bfef44c1 100644 --- a/sentry_sdk/client.py +++ b/sentry_sdk/client.py @@ -967,24 +967,34 @@ def _capture_telemetry( if before_send is not None: serialized = before_send(serialized, {}) - # Logs and metrics can be dropped in their respective - # before_send, so if we get None, don't queue them for sending. if ty in ("log", "metric"): + # Logs and metrics can be dropped in their respective + # before_send, so if we get None, don't queue them for sending. if serialized is None: return - # Spans can't be dropped in before_send_span by design. They can - # be altered though (e.g. to sanitize). elif ty == "span": + # Spans can't be dropped in before_send_span by design. They can + # be altered though (e.g. to sanitize). Only allow changes to + # name and attributes. if isinstance(serialized, dict) and serialized: - # TODO[ivana]: Figure out the merging/validation here - pass + if "name" not in serialized: + logger.debug( + "[Tracing] Invalid return value from before_send_span. Using original span." + ) + telemetry.name = serialized["name"] + if serialized.get("attributes"): + telemetry._attributes = {} + for k, v in serialized.get("attributes") or {}: + telemetry.set_attribute(k, v) + else: - serialized = telemetry._to_json() logger.debug( "[Tracing] Invalid return value from before_send_span. Using original span." ) + serialized = telemetry._to_json() + batcher = None if ty == "log": batcher = self.log_batcher diff --git a/tests/tracing/test_span_batcher.py b/tests/tracing/test_span_batcher.py index 4286691785..fd575b8b83 100644 --- a/tests/tracing/test_span_batcher.py +++ b/tests/tracing/test_span_batcher.py @@ -236,7 +236,7 @@ def test_weight_based_flushing_by_attribute_size( with sentry_sdk.traces.start_span(name="small span") as bare_span: pass - bare_span_size = SpanBatcher._estimate_size(bare_span) + bare_span_size = SpanBatcher._estimate_size(bare_span._to_json()) big_attr = "x" * bare_span_size monkeypatch.setattr(SpanBatcher, "MAX_BYTES_BEFORE_FLUSH", bare_span_size * 3) From 0d543b9c96c2a04486fac0152064b4f3e6067ab5 Mon Sep 17 00:00:00 2001 From: Ivana Kellyer Date: Mon, 11 May 2026 15:36:10 +0200 Subject: [PATCH 08/11] . --- sentry_sdk/_types.py | 2 ++ sentry_sdk/client.py | 18 +++++++----------- sentry_sdk/traces.py | 2 +- 3 files changed, 10 insertions(+), 12 deletions(-) diff --git a/sentry_sdk/_types.py b/sentry_sdk/_types.py index 16bbf4803d..f952baf44c 100644 --- a/sentry_sdk/_types.py +++ b/sentry_sdk/_types.py @@ -140,6 +140,7 @@ def substituted_because_contains_sensitive_data(cls) -> "AnnotatedValue": if TYPE_CHECKING: from collections.abc import Container, MutableMapping, Sequence from datetime import datetime + from sentry_sdk.traces import StreamedSpan from types import TracebackType from typing import Any, Callable, Dict, Mapping, NotRequired, Optional, Type @@ -329,6 +330,7 @@ class SDKInfo(TypedDict): "start_timestamp": float, "end_timestamp": NotRequired[float], "attributes": NotRequired[Attributes], + "_segment_span": NotRequired[StreamedSpan], }, ) diff --git a/sentry_sdk/client.py b/sentry_sdk/client.py index e2bfef44c1..6add129ac6 100644 --- a/sentry_sdk/client.py +++ b/sentry_sdk/client.py @@ -962,10 +962,10 @@ def _capture_telemetry( elif ty == "span": before_send = get_before_send_span(self.options) - serialized = telemetry._to_json() + serialized = telemetry._to_json() # type: ignore[union-attr] if before_send is not None: - serialized = before_send(serialized, {}) + serialized = before_send(serialized, {}) # type: ignore[arg-type] if ty in ("log", "metric"): # Logs and metrics can be dropped in their respective @@ -973,19 +973,15 @@ def _capture_telemetry( if serialized is None: return - elif ty == "span": + elif ty == "span" and isinstance(telemetry, StreamedSpan): # Spans can't be dropped in before_send_span by design. They can # be altered though (e.g. to sanitize). Only allow changes to # name and attributes. - if isinstance(serialized, dict) and serialized: - if "name" not in serialized: - logger.debug( - "[Tracing] Invalid return value from before_send_span. Using original span." - ) - telemetry.name = serialized["name"] + if isinstance(serialized, dict) and serialized and "name" in serialized: + telemetry.name = serialized["name"] # type: ignore[typeddict-item] if serialized.get("attributes"): telemetry._attributes = {} - for k, v in serialized.get("attributes") or {}: + for k, v in (serialized.get("attributes") or {}).items(): telemetry.set_attribute(k, v) else: @@ -1003,7 +999,7 @@ def _capture_telemetry( batcher = self.metrics_batcher elif ty == "span": - serialized["_segment_span"] = telemetry._segment + serialized["_segment_span"] = telemetry._segment # type: ignore batcher = self.span_batcher if batcher is not None: diff --git a/sentry_sdk/traces.py b/sentry_sdk/traces.py index c4b11cd0ee..f0ea5b6780 100644 --- a/sentry_sdk/traces.py +++ b/sentry_sdk/traces.py @@ -575,7 +575,7 @@ def _set_segment_attributes(self) -> None: self.set_attribute("process.command_args", sys.argv) def _to_json(self) -> "SpanJSON": - res = { + res: "SpanJSON" = { "trace_id": self.trace_id, "span_id": self.span_id, "name": self._name if self._name is not None else "", From 280a1201e1e5c9f1cdf2be49b4c63e0a1f0a1d9b Mon Sep 17 00:00:00 2001 From: Ivana Kellyer Date: Mon, 11 May 2026 16:05:41 +0200 Subject: [PATCH 09/11] fix sqlalchemy test --- tests/integrations/sqlalchemy/test_sqlalchemy.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/tests/integrations/sqlalchemy/test_sqlalchemy.py b/tests/integrations/sqlalchemy/test_sqlalchemy.py index d942d5fea3..a938ad9d7b 100644 --- a/tests/integrations/sqlalchemy/test_sqlalchemy.py +++ b/tests/integrations/sqlalchemy/test_sqlalchemy.py @@ -1080,19 +1080,19 @@ class Person(Base): class fake_record_sql_queries: # noqa: N801 def __init__(self, *args, **kwargs): - with record_sql_queries_supporting_streaming( + self._ctx_mgr = record_sql_queries_supporting_streaming( *args, **kwargs - ) as span: - self.span = span + ) + def __enter__(self): + self.span = self._ctx_mgr.__enter__() self.span._start_timestamp = datetime(2024, 1, 1, microsecond=0) self.span._end_timestamp = datetime(2024, 1, 1, microsecond=101000) - - def __enter__(self): return self.span def __exit__(self, type, value, traceback): - pass + self.span._end_timestamp = None + self._ctx_mgr.__exit__(type, value, traceback) with mock.patch( "sentry_sdk.integrations.sqlalchemy.record_sql_queries_supporting_streaming", From 8aca36a104fb1252f5594c41909d7aab78b44272 Mon Sep 17 00:00:00 2001 From: Ivana Kellyer Date: Mon, 11 May 2026 16:16:29 +0200 Subject: [PATCH 10/11] . --- sentry_sdk/client.py | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/sentry_sdk/client.py b/sentry_sdk/client.py index 6add129ac6..7fc8b67e4c 100644 --- a/sentry_sdk/client.py +++ b/sentry_sdk/client.py @@ -979,10 +979,9 @@ def _capture_telemetry( # name and attributes. if isinstance(serialized, dict) and serialized and "name" in serialized: telemetry.name = serialized["name"] # type: ignore[typeddict-item] - if serialized.get("attributes"): - telemetry._attributes = {} - for k, v in (serialized.get("attributes") or {}).items(): - telemetry.set_attribute(k, v) + telemetry._attributes = {} + for k, v in (serialized.get("attributes") or {}).items(): + telemetry.set_attribute(k, v) else: logger.debug( @@ -999,6 +998,8 @@ def _capture_telemetry( batcher = self.metrics_batcher elif ty == "span": + # We need a reference to the segment span in the batcher to populate + # the DSC serialized["_segment_span"] = telemetry._segment # type: ignore batcher = self.span_batcher From 9221b12c003a9b6d1743996c8617f9200db4cb6f Mon Sep 17 00:00:00 2001 From: Ivana Kellyer Date: Mon, 11 May 2026 16:21:15 +0200 Subject: [PATCH 11/11] . --- sentry_sdk/client.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sentry_sdk/client.py b/sentry_sdk/client.py index 7fc8b67e4c..a79df1f2a4 100644 --- a/sentry_sdk/client.py +++ b/sentry_sdk/client.py @@ -985,7 +985,7 @@ def _capture_telemetry( else: logger.debug( - "[Tracing] Invalid return value from before_send_span. Using original span." + "[Tracing] Invalid return value from before_send_span. Keeping original span." ) serialized = telemetry._to_json()