feat(cold): hoist write SLO and stream-setup timeout into the handle

prestwich · claude · prestwich · commit f5fdaaea35aa · 2026-04-30T13:21:19.000-04:00
Adds two accessors to the `ColdStorageBackend` trait: fn read_timeout(&self) -> Option<Duration> { None } fn write_timeout(&self) -> Option<Duration> { None } Wired through `MdbxColdBackend`, `SqlColdBackend`, and `EitherCold`. `MemColdBackend` returns `None` (already-documented test exemption). Two behaviour changes use these: 1. The advisory write-SLO WARN moves from the MDBX backend (`warn_on_overrun` per-method) to `ColdStorage::spawn_write`. Timing is now captured before `write_sem` acquisition, so the elapsed value covers the queue wait, the read drain, and the commit end-to-end. The failure shape that wedged production at #56 — slow readers gating writes — now surfaces as a write-SLO violation rather than as a sub-threshold backend timing. 2. `stream_logs`'s setup `get_latest_block` is wrapped in `tokio::time::timeout(backend.read_timeout(), ...)`. Without this, a stuck point lookup (cold MDBX page) or a saturated PG pool parking on `acquire_timeout` could pin N concurrent setup callers indefinitely with no permit cap. The setup read still bypasses `read_sem` and the drain barrier by design. Also drops the now-unused `tracing` dep from `signet-cold-mdbx` and updates the type docs to point at the handle's new WARN path. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
diff --git a/crates/cold-mdbx/Cargo.toml b/crates/cold-mdbx/Cargo.toml
@@ -24,7 +24,6 @@ signet-libmdbx.workspace = true
 signet-storage-types.workspace = true
 thiserror.workspace = true
 tokio.workspace = true
-tracing.workspace = true
 
 [dev-dependencies]
 signet-hot-mdbx = { workspace = true, features = ["test-utils"] }
diff --git a/crates/cold-mdbx/src/backend.rs b/crates/cold-mdbx/src/backend.rs
@@ -125,7 +125,7 @@ fn produce_log_stream_blocking(
     for block_num in from..=to {
         // Check the deadline before starting each block so we
         // don't begin reading after the caller's timeout.
-        if std::time::Instant::now() > deadline {
+        if Instant::now() > deadline {
             let _ = sender.blocking_send(Err(ColdStorageError::StreamDeadlineExceeded));
             return;
         }
@@ -149,7 +149,7 @@ fn produce_log_stream_blocking(
         for result in iter {
             // Per-receipt deadline check bounds iteration cost across
             // blocks with many receipts.
-            if std::time::Instant::now() > deadline {
+            if Instant::now() > deadline {
                 let _ = sender.blocking_send(Err(ColdStorageError::StreamDeadlineExceeded));
                 return;
             }
@@ -165,7 +165,7 @@ fn produce_log_stream_blocking(
                 // so without this check a single block with thousands
                 // of matching logs can run arbitrarily past the
                 // deadline.
-                if std::time::Instant::now() > deadline {
+                if Instant::now() > deadline {
                     let _ = sender.blocking_send(Err(ColdStorageError::StreamDeadlineExceeded));
                     return;
                 }
@@ -226,10 +226,11 @@ fn produce_log_stream_blocking(
 ///   `tokio::time::timeout`. Callers that need fail-fast behavior on
 ///   stuck I/O should apply their own timeout at the call site.
 /// - **Writes** (`append_block`, `append_blocks`, `truncate_above`,
-///   `drain_above`) record elapsed time against `write_timeout` and
-///   emit a [`tracing::warn!`] on overrun, but the commit is
-///   uninterruptible: `write_timeout` is an SLO/alerting signal only,
-///   not a hard abort.
+///   `drain_above`) are uninterruptible MDBX commits. The handle
+///   measures end-to-end latency (including `write_sem` wait and the
+///   read drain) against `write_timeout` and emits a `tracing::warn!`
+///   on overrun via the `ColdStorageBackend::write_timeout` accessor;
+///   `write_timeout` is an SLO/alerting signal only, not a hard abort.
 #[derive(Clone)]
 pub struct MdbxColdBackend {
     /// The MDBX environment.
@@ -239,7 +240,7 @@ pub struct MdbxColdBackend {
     /// lookups do NOT consult this deadline — see the type-level docs.
     read_timeout: Duration,
     /// Advisory deadline for write operations. Writes that exceed this are
-    /// logged via [`tracing::warn!`] but still report success.
+    /// logged via `tracing::warn!` but still report success.
     write_timeout: Duration,
 }
 
@@ -262,17 +263,30 @@ impl MdbxColdBackend {
     /// lookups (`get_header`, `get_transaction`, etc.) do NOT consult
     /// this deadline — see the type-level docs on [`MdbxColdBackend`]
     /// for the exemption rationale and its operational implications.
+    ///
+    /// # Panics
+    ///
+    /// Panics if `read_timeout` is zero — a zero deadline is a
+    /// configuration mistake, not a "disable" signal, and the trait
+    /// contract requires a real bound.
     #[must_use]
-    pub const fn with_read_timeout(mut self, read_timeout: Duration) -> Self {
+    pub fn with_read_timeout(mut self, read_timeout: Duration) -> Self {
+        assert!(!read_timeout.is_zero(), "read_timeout must be non-zero");
         self.read_timeout = read_timeout;
         self
     }
 
     /// Set the advisory write deadline. Writes exceeding this threshold
-    /// emit a [`tracing::warn!`] but still report success to the caller;
+    /// emit a `tracing::warn!` but still report success to the caller;
     /// MDBX commits are uninterruptible.
+    ///
+    /// # Panics
+    ///
+    /// Panics if `write_timeout` is zero. See
+    /// [`with_read_timeout`](Self::with_read_timeout).
     #[must_use]
-    pub const fn with_write_timeout(mut self, write_timeout: Duration) -> Self {
+    pub fn with_write_timeout(mut self, write_timeout: Duration) -> Self {
+        assert!(!write_timeout.is_zero(), "write_timeout must be non-zero");
         self.write_timeout = write_timeout;
         self
     }
@@ -726,6 +740,13 @@ impl MdbxColdBackend {
                     if !filter.matches(&log) {
                         continue;
                     }
+                    // Per-log deadline check: a single receipt with
+                    // thousands of matching logs would otherwise run
+                    // unchecked past the deadline. Mirrors the
+                    // streaming path in `produce_log_stream_blocking`.
+                    if Instant::now() > deadline {
+                        return Err(MdbxColdError::Timeout(read_timeout));
+                    }
                     if results.len() >= max_logs {
                         return Err(MdbxColdError::TooManyLogs(max_logs));
                     }
@@ -928,67 +949,47 @@ impl ColdStorageRead for MdbxColdBackend {
     }
 }
 
-/// Log an advisory warning if a successful write exceeded the threshold.
-///
-/// Only logs on success: a failed write that overran the threshold already
-/// surfaces a `Backend` error to the caller, and a noisy overrun WARN would
-/// poison SLO alerting built on this signal.
-fn warn_on_overrun(op: &'static str, elapsed: Duration, threshold: Duration, is_ok: bool) {
-    if is_ok && elapsed > threshold {
-        tracing::warn!(
-            op,
-            elapsed_ms = elapsed.as_millis() as u64,
-            threshold_ms = threshold.as_millis() as u64,
-            "mdbx write exceeded advisory write timeout",
-        );
-    }
-}
-
 impl ColdStorageWrite for MdbxColdBackend {
     async fn append_block(&self, data: BlockData) -> ColdResult<()> {
-        let threshold = self.write_timeout;
         let this = self.clone();
-        let start = Instant::now();
-        let result = tokio::task::spawn_blocking(move || this.append_block_inner(data))
+        tokio::task::spawn_blocking(move || this.append_block_inner(data))
             .await
-            .map_err(|_| ColdStorageError::TaskTerminated)?;
-        warn_on_overrun("append_block", start.elapsed(), threshold, result.is_ok());
-        Ok(result?)
+            .map_err(|_| ColdStorageError::TaskTerminated)?
+            .map_err(ColdStorageError::from)
     }
 
     async fn append_blocks(&self, data: Vec<BlockData>) -> ColdResult<()> {
-        let threshold = self.write_timeout;
         let this = self.clone();
-        let start = Instant::now();
-        let result = tokio::task::spawn_blocking(move || this.append_blocks_inner(data))
+        tokio::task::spawn_blocking(move || this.append_blocks_inner(data))
             .await
-            .map_err(|_| ColdStorageError::TaskTerminated)?;
-        warn_on_overrun("append_blocks", start.elapsed(), threshold, result.is_ok());
-        Ok(result?)
+            .map_err(|_| ColdStorageError::TaskTerminated)?
+            .map_err(ColdStorageError::from)
     }
 
     async fn truncate_above(&self, block: BlockNumber) -> ColdResult<()> {
-        let threshold = self.write_timeout;
         let this = self.clone();
-        let start = Instant::now();
-        let result = tokio::task::spawn_blocking(move || this.truncate_above_inner(block))
+        tokio::task::spawn_blocking(move || this.truncate_above_inner(block))
             .await
-            .map_err(|_| ColdStorageError::TaskTerminated)?;
-        warn_on_overrun("truncate_above", start.elapsed(), threshold, result.is_ok());
-        Ok(result?)
+            .map_err(|_| ColdStorageError::TaskTerminated)?
+            .map_err(ColdStorageError::from)
     }
 }
 
 impl ColdStorageBackend for MdbxColdBackend {
+    fn read_timeout(&self) -> Option<Duration> {
+        Some(self.read_timeout)
+    }
+
+    fn write_timeout(&self) -> Option<Duration> {
+        Some(self.write_timeout)
+    }
+
     async fn drain_above(&self, block: BlockNumber) -> ColdResult<Vec<Vec<ColdReceipt>>> {
-        let threshold = self.write_timeout;
         let this = self.clone();
-        let start = Instant::now();
-        let result = tokio::task::spawn_blocking(move || this.drain_above_inner(block))
+        tokio::task::spawn_blocking(move || this.drain_above_inner(block))
             .await
-            .map_err(|_| ColdStorageError::TaskTerminated)?;
-        warn_on_overrun("drain_above", start.elapsed(), threshold, result.is_ok());
-        Ok(result?)
+            .map_err(|_| ColdStorageError::TaskTerminated)?
+            .map_err(ColdStorageError::from)
     }
 }
 
diff --git a/crates/cold-sql/src/backend.rs b/crates/cold-sql/src/backend.rs
@@ -150,8 +150,15 @@ impl SqlColdBackend {
     /// On Postgres this sets `statement_timeout` on every transaction
     /// opened by a read method. On SQLite the value is stored but
     /// not enforced — SQLite has no equivalent mechanism.
+    ///
+    /// # Panics
+    ///
+    /// Panics if `d` rounds to 0 ms. Postgres interprets
+    /// `statement_timeout = 0` as "no timeout", which would silently
+    /// disable the trait-level mandatory-timeout contract.
     #[must_use]
-    pub const fn with_read_timeout(mut self, d: Duration) -> Self {
+    pub fn with_read_timeout(mut self, d: Duration) -> Self {
+        assert!(d.as_millis() >= 1, "read_timeout must be >= 1ms (got {d:?})");
         self.read_timeout = d;
         self
     }
@@ -161,8 +168,13 @@ impl SqlColdBackend {
     /// On Postgres this sets `statement_timeout` on every transaction
     /// opened by a write method. On SQLite the value is stored but
     /// not enforced — SQLite has no equivalent mechanism.
+    ///
+    /// # Panics
+    ///
+    /// Panics if `d` rounds to 0 ms. See [`with_read_timeout`](Self::with_read_timeout).
     #[must_use]
-    pub const fn with_write_timeout(mut self, d: Duration) -> Self {
+    pub fn with_write_timeout(mut self, d: Duration) -> Self {
+        assert!(d.as_millis() >= 1, "write_timeout must be >= 1ms (got {d:?})");
         self.write_timeout = d;
         self
     }
@@ -1571,6 +1583,14 @@ impl ColdStorageWrite for SqlColdBackend {
 }
 
 impl ColdStorageBackend for SqlColdBackend {
+    fn read_timeout(&self) -> Option<Duration> {
+        Some(self.read_timeout)
+    }
+
+    fn write_timeout(&self) -> Option<Duration> {
+        Some(self.write_timeout)
+    }
+
     async fn drain_above(&self, block: BlockNumber) -> ColdResult<Vec<Vec<ColdReceipt>>> {
         let bn = to_i64(block);
         let mut tx = self.begin_write().await.map_err(ColdStorageError::from)?;
diff --git a/crates/cold/src/handle.rs b/crates/cold/src/handle.rs
@@ -39,6 +39,46 @@ use tracing::Instrument;
 /// Default maximum deadline for streaming operations.
 const DEFAULT_MAX_STREAM_DEADLINE: Duration = Duration::from_secs(60);
 
+/// Default fallback for the stream-setup `get_latest_block` deadline
+/// when the backend does not advertise a [`read_timeout`]. Picked to
+/// match the SQL/MDBX defaults so behaviour is predictable.
+///
+/// [`read_timeout`]: crate::ColdStorageBackend::read_timeout
+const DEFAULT_STREAM_SETUP_TIMEOUT: Duration = Duration::from_millis(500);
+
+/// Emit an advisory WARN if a successful write exceeded its end-to-end
+/// SLO target. Only fires on `Ok`: a failed write already surfaces an
+/// error to the caller, and a noisy overrun WARN on top would poison
+/// alerting built on this signal.
+fn warn_on_write_overrun(
+    op: &'static str,
+    elapsed: Duration,
+    threshold: Option<Duration>,
+    is_ok: bool,
+) {
+    let Some(threshold) = threshold else { return };
+    if is_ok && elapsed > threshold {
+        tracing::warn!(
+            op,
+            elapsed_ms = elapsed.as_millis() as u64,
+            threshold_ms = threshold.as_millis() as u64,
+            "cold write exceeded end-to-end write timeout (queue + drain + commit)",
+        );
+    }
+}
+
+/// Log a `JoinError` from a tracked spawn before mapping to
+/// [`ColdStorageError::TaskTerminated`]. A panic inside the spawned body
+/// is otherwise indistinguishable from graceful shutdown for the
+/// caller, which is a poor on-call signal.
+fn log_join_error(op: &'static str, e: &tokio::task::JoinError) {
+    if e.is_panic() {
+        tracing::error!(op, error = %e, "cold storage spawned task panicked");
+    } else if e.is_cancelled() {
+        tracing::debug!(op, "cold storage spawned task cancelled");
+    }
+}
+
 /// Maximum concurrent read operations.
 const MAX_CONCURRENT_READERS: usize = 64;
 
@@ -170,7 +210,10 @@ impl<B: ColdStorageBackend> ColdStorage<B> {
                 .in_current_span(),
             )
             .await
-            .map_err(|_| ColdStorageError::TaskTerminated)?
+            .map_err(|e| {
+                log_join_error(op, &e);
+                ColdStorageError::TaskTerminated
+            })?
     }
 
     /// Spawn a write task under the `write_sem` permit, holding a full drain
@@ -189,15 +232,22 @@ impl<B: ColdStorageBackend> ColdStorage<B> {
         F: FnOnce(Arc<Inner<B>>) -> Fut + Send + 'static,
         Fut: std::future::Future<Output = ColdResult<T>> + Send,
     {
-        let wait = Instant::now();
+        // End-to-end SLO start: capture before permit acquisition so the
+        // measurement covers `write_sem` queueing and the read drain in
+        // addition to the backend commit. This is the failure shape the
+        // PR targets — a slow drain followed by a fast commit must surface
+        // as an SLO violation, not as a sub-threshold backend timing.
+        let e2e_start = Instant::now();
+        let threshold = self.inner.backend.write_timeout();
+
         let write_permit = self
             .inner
             .write_sem
             .clone()
             .acquire_owned()
             .await
             .map_err(|_| ColdStorageError::TaskTerminated)?;
-        metrics::record_permit_wait("write", wait.elapsed());
+        metrics::record_permit_wait("write", e2e_start.elapsed());
 
         let drain_wait = Instant::now();
         let drain = self
@@ -223,12 +273,16 @@ impl<B: ColdStorageBackend> ColdStorage<B> {
                     if let Err(ref e) = result {
                         metrics::record_op_error(op, e.kind());
                     }
+                    warn_on_write_overrun(op, e2e_start.elapsed(), threshold, result.is_ok());
                     result
                 }
                 .in_current_span(),
             )
             .await
-            .map_err(|_| ColdStorageError::TaskTerminated)?
+            .map_err(|e| {
+                log_join_error(op, &e);
+                ColdStorageError::TaskTerminated
+            })?
     }
 
     // ==========================================================================
@@ -518,15 +572,28 @@ impl<B: ColdStorageBackend> ColdStorage<B> {
         // bypass `read_sem` and the drain barrier: a stream asking for
         // "latest" should observe latest at setup time even alongside an
         // in-flight write.
+        //
+        // Wrap the setup read in a wall-clock timeout so a stuck backend
+        // (cold MDBX page, saturated PG pool) cannot stall N concurrent
+        // setup callers indefinitely. The future drops on timeout but the
+        // backend work continues — same trade-off the rest of the design
+        // accepts.
         let to = match filter.get_to_block() {
             Some(to) => to,
-            None => match self.inner.backend.get_latest_block().await? {
-                Some(latest) => latest,
-                None => {
-                    let (_tx, rx) = mpsc::channel(1);
-                    return Ok(ReceiverStream::new(rx));
+            None => {
+                let setup_to =
+                    self.inner.backend.read_timeout().unwrap_or(DEFAULT_STREAM_SETUP_TIMEOUT);
+                let latest = tokio::time::timeout(setup_to, self.inner.backend.get_latest_block())
+                    .await
+                    .map_err(|_| ColdStorageError::DeadlineExceeded(setup_to))??;
+                match latest {
+                    Some(latest) => latest,
+                    None => {
+                        let (_tx, rx) = mpsc::channel(1);
+                        return Ok(ReceiverStream::new(rx));
+                    }
                 }
-            },
+            }
         };
 
         let wait = Instant::now();
diff --git a/crates/cold/src/traits.rs b/crates/cold/src/traits.rs
diff --git a/crates/storage/src/either.rs b/crates/storage/src/either.rs