Rework like in js

sentrivana · sentrivana · commit 22438326df90 · 2026-05-11T12:55:20.000+02:00
diff --git a/sentry_sdk/_span_batcher.py b/sentry_sdk/_span_batcher.py
@@ -13,10 +13,10 @@
 
 if TYPE_CHECKING:
     from typing import Any, Callable, Optional
-    from sentry_sdk.traces import StreamedSpan
+    from sentry_sdk._types import SpanJSON
 
 
-class SpanBatcher(Batcher["StreamedSpan"]):
+class SpanBatcher(Batcher["SpanJSON"]):
     # MAX_BEFORE_FLUSH should be lower than MAX_BEFORE_DROP, so that there is
     # a bit of a buffer for spans that appear between the trigger to flush
     # and actually flushing the buffer.
@@ -42,7 +42,7 @@ def __init__(
         # by trace_id, so that we can then send the buckets each in its own
         # envelope.
         # trace_id -> span buffer
-        self._span_buffer: dict[str, list["StreamedSpan"]] = defaultdict(list)
+        self._span_buffer: dict[str, list["SpanJSON"]] = defaultdict(list)
         self._running_size: dict[str, int] = defaultdict(lambda: 0)
         self._capture_func = capture_func
         self._record_lost_func = record_lost_func
@@ -99,7 +99,7 @@ def _flush_loop(self) -> None:
                 self._flush()
                 self._last_full_flush = time.monotonic()
 
-    def add(self, span: "StreamedSpan") -> None:
+    def add(self, span: "SpanJSON") -> None:
         # Bail out if the current thread is already executing batcher code.
         # This prevents deadlocks when code running inside the batcher (e.g.
         # _add_to_envelope during flush, or _flush_event.wait/set) triggers
@@ -115,7 +115,7 @@ def add(self, span: "StreamedSpan") -> None:
                 return None
 
             with self._lock:
-                size = len(self._span_buffer[span.trace_id])
+                size = len(self._span_buffer[span["trace_id"]])
                 if size >= self.MAX_BEFORE_DROP:
                     self._record_lost_func(
                         reason="queue_overflow",
@@ -124,14 +124,15 @@ def add(self, span: "StreamedSpan") -> None:
                     )
                     return None
 
-                self._span_buffer[span.trace_id].append(span)
-                self._running_size[span.trace_id] += self._estimate_size(span)
+                self._span_buffer[span["trace_id"]].append(span)
+                self._running_size[span["trace_id"]] += self._estimate_size(span)
 
                 if (
                     size + 1 >= self.MAX_BEFORE_FLUSH
-                    or self._running_size[span.trace_id] >= self.MAX_BYTES_BEFORE_FLUSH
+                    or self._running_size[span["trace_id"]]
+                    >= self.MAX_BYTES_BEFORE_FLUSH
                 ):
-                    self._pending_flush.add(span.trace_id)
+                    self._pending_flush.add(span["trace_id"])
                     notify = True
                 else:
                     notify = False
@@ -142,12 +143,12 @@ def add(self, span: "StreamedSpan") -> None:
             self._active.flag = False
 
     @staticmethod
-    def _estimate_size(item: "StreamedSpan") -> int:
+    def _estimate_size(item: "SpanJSON") -> int:
         # Rough estimate of serialized span size that's quick to compute.
         # 210 is the rough size of the payload without attributes, and then we
         # estimate the attributes separately.
         estimate = 210
-        for value in item._attributes.values():
+        for value in item["attributes"].values():
             estimate += 50
 
             if isinstance(value, str):
@@ -158,26 +159,15 @@ def _estimate_size(item: "StreamedSpan") -> int:
         return estimate
 
     @staticmethod
-    def _to_transport_format(item: "StreamedSpan") -> "Any":
-        res: "dict[str, Any]" = {
-            "trace_id": item.trace_id,
-            "span_id": item.span_id,
-            "name": item._name if item._name is not None else "<unlabeled span>",
-            "status": item._status,
-            "is_segment": item._is_segment(),
-            "start_timestamp": item._start_timestamp.timestamp(),
-        }
-
-        if item._end_timestamp:
-            res["end_timestamp"] = item._end_timestamp.timestamp()
-
-        if item._parent_span_id:
-            res["parent_span_id"] = item._parent_span_id
-
-        if item._attributes:
+    def _to_transport_format(item: "SpanJSON") -> "Any":
+        res = {k: v for k, v in item.items() if k not in ("_segment_span",)}
+
+        if item.get("attributes"):
             res["attributes"] = {
-                k: serialize_attribute(v) for (k, v) in item._attributes.items()
+                k: serialize_attribute(v) for (k, v) in item["attributes"].items()
             }
+        else:
+            del res["attributes"]
 
         return res
 
@@ -201,7 +191,7 @@ def _flush(self, only_pending: bool = False) -> None:
                 if not spans:
                     continue
 
-                dsc = spans[0]._dynamic_sampling_context()
+                dsc = spans[0]["_segment_span"]._dynamic_sampling_context()
 
                 # Max per envelope is 1000, so if we happen to have more than
                 # 1000 spans in one bucket, we'll need to separate them.
diff --git a/sentry_sdk/_types.py b/sentry_sdk/_types.py
@@ -317,6 +317,21 @@ class SDKInfo(TypedDict):
 
     MetricProcessor = Callable[[Metric, Hint], Optional[Metric]]
 
+    SpanJSON = TypedDict(
+        "SpanJSON",
+        {
+            "trace_id": str,
+            "span_id": str,
+            "parent_span_id": NotRequired[str],
+            "name": str,
+            "status": str,
+            "is_segment": bool,
+            "start_timestamp": float,
+            "end_timestamp": NotRequired[float],
+            "attributes": NotRequired[Attributes],
+        },
+    )
+
     # TODO: Make a proper type definition for this (PRs welcome!)
     Breadcrumb = Dict[str, Any]
 
diff --git a/sentry_sdk/client.py b/sentry_sdk/client.py
@@ -3,7 +3,6 @@
 import random
 import socket
 from collections.abc import Mapping
-from copy import deepcopy
 from datetime import datetime, timezone
 from importlib import import_module
 from typing import TYPE_CHECKING, List, Dict, cast, overload
@@ -955,50 +954,50 @@ def _capture_telemetry(
 
         if ty == "log":
             before_send = get_before_send_log(self.options)
-            snapshot = telemetry
+            serialized = telemetry
 
         elif ty == "metric":
             before_send = get_before_send_metric(self.options)
-            snapshot = telemetry
+            serialized = telemetry
 
         elif ty == "span":
             before_send = get_before_send_span(self.options)
-            # We don't want to expose the actual underlying span in
-            # before_send_span to not allow arbitrary edits. Expose a copy
-            # instead.
-            snapshot = deepcopy(telemetry)
+            serialized = telemetry._to_json()
 
         if before_send is not None:
-            result = before_send(snapshot, {})
+            serialized = before_send(serialized, {})
 
             # Logs and metrics can be dropped in their respective
             # before_send, so if we get None, don't queue them for sending.
             if ty in ("log", "metric"):
-                if result is None:
+                if serialized is None:
                     return
 
             # Spans can't be dropped in before_send_span by design. They can
-            # be altered though (name and attributes can be changed, e.g. to
-            # sanitize).
-            #
-            # If we get anything but a StreamedSpan back from before_send_span,
-            # just ignore it. Otherwise, take the returned StreamedSpan and
-            # merge it with the original.
+            # be altered though (e.g. to sanitize).
             elif ty == "span":
-                if isinstance(result, StreamedSpan):
-                    telemetry._attributes = result._attributes
-                    telemetry._name = result._name
+                if isinstance(serialized, dict) and serialized:
+                    # TODO[ivana]: Figure out the merging/validation here
+                    pass
+                else:
+                    serialized = telemetry._to_json()
+                    logger.debug(
+                        "[Tracing] Invalid return value from before_send_span. Using original span."
+                    )
 
         batcher = None
         if ty == "log":
             batcher = self.log_batcher
+
         elif ty == "metric":
             batcher = self.metrics_batcher
+
         elif ty == "span":
+            serialized["_segment_span"] = telemetry._segment
             batcher = self.span_batcher
 
         if batcher is not None:
-            batcher.add(telemetry)  # type: ignore
+            batcher.add(serialized)  # type: ignore
 
     def _capture_log(self, log: "Optional[Log]", scope: "Scope") -> None:
         self._capture_telemetry(log, "log", scope)
diff --git a/sentry_sdk/consts.py b/sentry_sdk/consts.py
@@ -46,7 +46,6 @@ class CompressionAlgo(Enum):
     from typing_extensions import Literal, TypedDict
 
     import sentry_sdk
-    from sentry_sdk.traces import StreamedSpan
     from sentry_sdk._types import (
         BreadcrumbProcessor,
         ContinuousProfilerMode,
@@ -57,6 +56,7 @@ class CompressionAlgo(Enum):
         Log,
         Metric,
         ProfilerMode,
+        SpanJSON,
         TracesSampler,
         TransactionProcessor,
     )
@@ -87,7 +87,7 @@ class CompressionAlgo(Enum):
             "trace_lifecycle": Optional[Literal["static", "stream"]],
             "ignore_spans": Optional[IgnoreSpansConfig],
             "before_send_span": Optional[
-                Callable[[StreamedSpan, Hint], Optional[StreamedSpan]]
+                Callable[[SpanJSON, Hint], Optional[SpanJSON]]
             ],
             "suppress_asgi_chained_exceptions": Optional[bool],
         },
diff --git a/sentry_sdk/traces.py b/sentry_sdk/traces.py
@@ -43,7 +43,7 @@
         Union,
     )
 
-    from sentry_sdk._types import Attributes, AttributeValue
+    from sentry_sdk._types import Attributes, AttributeValue, SpanJSON
     from sentry_sdk.profiler.continuous_profiler import ContinuousProfile
 
     P = ParamSpec("P")
@@ -574,6 +574,26 @@ def _set_segment_attributes(self) -> None:
 
         self.set_attribute("process.command_args", sys.argv)
 
+    def _to_json(self) -> "SpanJSON":
+        res = {
+            "trace_id": self.trace_id,
+            "span_id": self.span_id,
+            "name": self._name if self._name is not None else "<unlabeled span>",
+            "status": self._status,
+            "is_segment": self._is_segment(),
+            "start_timestamp": self._start_timestamp.timestamp(),
+        }
+
+        if self._end_timestamp:
+            res["end_timestamp"] = self._end_timestamp.timestamp()
+
+        if self._parent_span_id:
+            res["parent_span_id"] = self._parent_span_id
+
+        res["attributes"] = {k: v for k, v in self._attributes.items()}
+
+        return res
+
 
 class NoOpStreamedSpan(StreamedSpan):
     __slots__ = (
diff --git a/tests/tracing/test_span_streaming.py b/tests/tracing/test_span_streaming.py
@@ -273,12 +273,12 @@ def traces_sampler(sampling_context):
 
 def test_before_send_span_basic(sentry_init, capture_items):
     def before_send_span(span, hint):
-        assert isinstance(span, StreamedSpan)
+        assert isinstance(span, dict)
 
-        span.name = "Better span name"
-        span.remove_attribute("drop")
-        span.set_attribute("sanitize", "[Removed]")
-        span.set_attribute("add", "new")
+        span["name"] = "Better span name"
+        del span["attributes"]["drop"]
+        span["attributes"]["sanitize"] = "[Removed]"
+        span["attributes"]["add"] = "new"
 
         return span
 
@@ -313,11 +313,17 @@ def before_send_span(span, hint):
     assert span["attributes"]["add"] == "new"
 
 
-def test_before_send_span_invalid_return_value(sentry_init, capture_items):
+@pytest.mark.parametrize(
+    "return_value",
+    [None, {}, {"not_a_span": True}],
+)
+def test_before_send_span_invalid_return_value(
+    sentry_init, capture_items, return_value
+):
     def before_send_span(span, hint):
         # Spans can't be dropped in before_send_span, so unsupported return
         # values will be ignored
-        return None
+        return return_value
 
     sentry_init(
         traces_sample_rate=1.0,
@@ -344,7 +350,9 @@ def before_send_span(span, hint):
 def test_before_send_span_unsupported_edit(sentry_init, capture_items):
     def before_send_span(span, hint):
         # Anything beyond attribute and name changes will be ignored
-        span._trace_id = "my-trace-id"
+        span["trace_id"] = "my-trace-id"
+
+        return span
 
     sentry_init(
         traces_sample_rate=1.0,