diff --git a/crates/starknet_transaction_prover/Cargo.toml b/crates/starknet_transaction_prover/Cargo.toml index e07643988fa..64f28d46536 100644 --- a/crates/starknet_transaction_prover/Cargo.toml +++ b/crates/starknet_transaction_prover/Cargo.toml @@ -45,7 +45,7 @@ starknet_patricia_storage.workspace = true starknet_proof_verifier.workspace = true tempfile.workspace = true thiserror.workspace = true -tokio = { workspace = true, features = ["macros", "process", "rt-multi-thread", "time"] } +tokio = { workspace = true, features = ["macros", "process", "rt-multi-thread", "signal", "time"] } tokio-rustls.workspace = true tower = { workspace = true, features = ["util"] } tower-http = { workspace = true, features = [ @@ -56,7 +56,7 @@ tower-http = { workspace = true, features = [ ] } tower_ohttp.workspace = true tracing.workspace = true -tracing-subscriber = { workspace = true, features = ["env-filter"] } +tracing-subscriber = { workspace = true, features = ["env-filter", "json"] } url.workspace = true [dev-dependencies] diff --git a/crates/starknet_transaction_prover/src/main.rs b/crates/starknet_transaction_prover/src/main.rs index 4dacb4c05d3..6251edf4589 100644 --- a/crates/starknet_transaction_prover/src/main.rs +++ b/crates/starknet_transaction_prover/src/main.rs @@ -22,6 +22,7 @@ async fn main() -> anyhow::Result<()> { }; use starknet_transaction_prover::server::cors::{build_cors_layer, cors_mode}; use starknet_transaction_prover::server::log_redact::redact_url_host; + use starknet_transaction_prover::server::panic::install_panic_hook; use starknet_transaction_prover::server::rpc_api::ProvingRpcServer; use starknet_transaction_prover::server::rpc_impl::ProvingRpcServerImpl; use starknet_transaction_prover::server::{ @@ -29,8 +30,9 @@ async fn main() -> anyhow::Result<()> { OhttpJsonrpseeLayer, OHTTP_JSONRPSEE_BODY_BUILDER, }; + use tokio::signal::unix::{signal, SignalKind}; use tower_ohttp::OhttpGateway; - use tracing::info; + use tracing::{info, warn}; use tracing_subscriber::prelude::*; use tracing_subscriber::{fmt, EnvFilter}; @@ -46,6 +48,11 @@ async fn main() -> anyhow::Result<()> { LogFormat::Text => registry.with(fmt::layer()).init(), } + // Install after tracing init so the hook's `error!` macro reaches the + // subscriber. A panic before this line still hits the default stderr + // handler. + install_panic_hook(); + let config = ServiceConfig::from_args(args)?; // Startup banner — version + chain id + redacted RPC host only. No URLs @@ -108,6 +115,66 @@ async fn main() -> anyhow::Result<()> { "JSON-RPC proving server is running." ); + // Bridge SIGTERM/SIGINT into jsonrpsee's `ServerHandle::stop` so + // container teardown becomes visible in logs. Both handlers are + // installed eagerly: if one fails, we still want the other to drive + // a graceful shutdown rather than silently dropping it. + let sigterm = signal(SignalKind::terminate()) + .inspect_err(|err| warn!(error = %err, "Failed to install SIGTERM handler")) + .ok(); + let sigint = signal(SignalKind::interrupt()) + .inspect_err(|err| warn!(error = %err, "Failed to install SIGINT handler")) + .ok(); + let shutdown_handle = server_handle.clone(); + tokio::spawn(async move { + let (mut sigterm, mut sigint) = (sigterm, sigint); + let signal_name = match (&mut sigterm, &mut sigint) { + (Some(t), Some(i)) => tokio::select! { + _ = t.recv() => "SIGTERM", + _ = i.recv() => "SIGINT", + }, + (Some(t), None) => { + t.recv().await; + "SIGTERM" + } + (None, Some(i)) => { + i.recv().await; + "SIGINT" + } + (None, None) => return, + }; + info!(event = "shutdown_started", signal = signal_name, "Shutting down JSON-RPC server."); + if let Err(err) = shutdown_handle.stop() { + warn!(error = %err, "Failed to stop JSON-RPC server cleanly"); + } + + // Stay live for a second signal and force-exit. Tokio's OS-level + // signal handler keeps intercepting SIGTERM/SIGINT even after the + // first one fires (tokio-rs/tokio#7905); if we let our Signal + // instances drop, a second Ctrl+C would be silently swallowed and + // a stuck graceful-shutdown could only be killed with SIGKILL. + // Re-await the already-registered handlers and exit non-zero on + // the second hit so an operator can always reclaim the process. + match (&mut sigterm, &mut sigint) { + (Some(t), Some(i)) => { + tokio::select! { + _ = t.recv() => {}, + _ = i.recv() => {}, + } + } + (Some(t), None) => { + t.recv().await; + } + (None, Some(i)) => { + i.recv().await; + } + (None, None) => return, + } + warn!(event = "force_exit", "Received second termination signal; forcing exit."); + std::process::exit(1); + }); + server_handle.stopped().await; + info!(event = "shutdown_complete", "JSON-RPC server stopped."); Ok(()) } diff --git a/crates/starknet_transaction_prover/src/server.rs b/crates/starknet_transaction_prover/src/server.rs index 7ebaf05d002..9c6fec3712c 100644 --- a/crates/starknet_transaction_prover/src/server.rs +++ b/crates/starknet_transaction_prover/src/server.rs @@ -33,6 +33,7 @@ pub mod health; pub mod log_redact; #[cfg(test)] pub mod mock_rpc; +pub mod panic; pub mod request_log; pub mod rpc_api; pub mod rpc_impl; diff --git a/crates/starknet_transaction_prover/src/server/panic.rs b/crates/starknet_transaction_prover/src/server/panic.rs new file mode 100644 index 00000000000..dcc732d0103 --- /dev/null +++ b/crates/starknet_transaction_prover/src/server/panic.rs @@ -0,0 +1,53 @@ +//! Process-wide panic hook for the prover. +//! +//! Without an explicit hook, panics in `tokio::spawn`ed work hit the runtime's +//! default handler and print to stderr in an ad-hoc format. We want one +//! structured `tracing` event with location + backtrace so log aggregators +//! can index it. The hook only emits a log line — runtime abort-on-panic +//! behavior is preserved. + +use std::backtrace::Backtrace; +use std::panic::PanicHookInfo; + +use tracing::error; + +#[cfg(test)] +#[path = "panic_test.rs"] +mod panic_test; + +pub fn install_panic_hook() { + std::panic::set_hook(Box::new(panic_hook)); +} + +fn panic_hook(info: &PanicHookInfo<'_>) { + let message = extract_payload(info); + let location = info + .location() + .map(|loc| format!("{}:{}:{}", loc.file(), loc.line(), loc.column())) + .unwrap_or_else(|| "".to_string()); + let backtrace = Backtrace::force_capture(); + error!( + event = "panic", + location = %location, + message = %message, + backtrace = %backtrace, + "Service panicked", + ); +} + +/// Best-effort extraction of the panic payload — supports the common +/// `panic!("string literal")` and `panic!("{fmt}", ...)` cases. Returns +/// `""` for arbitrary types. +/// +/// Replace with `PanicHookInfo::payload_as_str()` once the pinned toolchain +/// (nightly-2025-07-14) ships it as stable (gated behind `panic_payload_as_str`). +pub(crate) fn extract_payload(info: &PanicHookInfo<'_>) -> String { + let payload = info.payload(); + if let Some(s) = payload.downcast_ref::<&'static str>() { + return (*s).to_string(); + } + if let Some(s) = payload.downcast_ref::() { + return s.clone(); + } + "".to_string() +} diff --git a/crates/starknet_transaction_prover/src/server/panic_test.rs b/crates/starknet_transaction_prover/src/server/panic_test.rs new file mode 100644 index 00000000000..7e0f3f038a3 --- /dev/null +++ b/crates/starknet_transaction_prover/src/server/panic_test.rs @@ -0,0 +1,24 @@ +use std::sync::{Arc, Mutex}; + +use crate::server::panic::extract_payload; + +fn capture_payload(f: F) -> String { + let captured: Arc>> = Arc::new(Mutex::new(None)); + let prev_hook = std::panic::take_hook(); + let writer = Arc::clone(&captured); + std::panic::set_hook(Box::new(move |info| { + *writer.lock().unwrap() = Some(extract_payload(info)); + })); + let _ = std::panic::catch_unwind(f); + std::panic::set_hook(prev_hook); + let value = captured.lock().unwrap().clone().unwrap_or_default(); + value +} + +// Panic-capturing tests share global state (the panic hook), so they must +// run serially. Keep as a single `#[test]` so ordering is explicit. +#[test] +fn extracts_static_str_and_formatted_payloads() { + assert_eq!(capture_payload(|| panic!("static literal")), "static literal"); + assert_eq!(capture_payload(|| panic!("formatted {}", 42)), "formatted 42"); +}