From 7325e148363191100da6ec9bba52ce18d3a51a52 Mon Sep 17 00:00:00 2001 From: bitsandfoxes Date: Wed, 20 May 2026 20:42:53 +0200 Subject: [PATCH 1/2] first iteration --- examples/example.c | 43 +++ include/sentry.h | 42 +++ src/CMakeLists.txt | 2 + src/backends/native/sentry_crash_context.h | 16 + src/backends/native/sentry_crash_daemon.c | 361 +++++++++++++++++++-- src/backends/sentry_backend_native.c | 26 ++ src/sentry_app_hang.c | 123 +++++++ src/sentry_app_hang.h | 72 ++++ src/sentry_options.c | 2 + src/sentry_options.h | 2 + tests/test_integration_native.py | 43 +++ tests/unit/CMakeLists.txt | 1 + tests/unit/test_app_hang.c | 153 +++++++++ tests/unit/tests.inc | 11 + 14 files changed, 864 insertions(+), 33 deletions(-) create mode 100644 src/sentry_app_hang.c create mode 100644 src/sentry_app_hang.h create mode 100644 tests/unit/test_app_hang.c diff --git a/examples/example.c b/examples/example.c index 1c5ef292b..9deb10543 100644 --- a/examples/example.c +++ b/examples/example.c @@ -20,6 +20,7 @@ #ifdef SENTRY_PLATFORM_WINDOWS # include +# include # include # define sleep_s(SECONDS) Sleep((SECONDS) * 1000) #else @@ -533,6 +534,22 @@ run_threads(thread_func_t func) } #endif +#if defined(SENTRY_PLATFORM_WINDOWS) +static unsigned __stdcall +app_hang_demo_thread(void *arg) +{ + (void)arg; + /* Heartbeat for 500 ms to latch this thread as the target. */ + for (int i = 0; i < 10; i++) { + sentry_app_hang_heartbeat(); + Sleep(50); + } + /* Freeze for 3x the configured timeout (3000 ms). */ + Sleep(3000); + return 0; +} +#endif + int main(int argc, char **argv) { @@ -784,6 +801,13 @@ main(int argc, char **argv) } } +#if defined(SENTRY_PLATFORM_WINDOWS) + if (has_arg(argc, argv, "app-hang")) { + sentry_options_set_app_hang_enabled(options, 1); + sentry_options_set_app_hang_timeout_ms(options, 1000); + } +#endif + // E2E test mode: generate unique test ID for event correlation char e2e_test_id[37] = { 0 }; if (has_arg(argc, argv, "e2e-test")) { @@ -795,6 +819,25 @@ main(int argc, char **argv) return EXIT_FAILURE; } +#if defined(SENTRY_PLATFORM_WINDOWS) + /* app-hang: spawn the demo thread BEFORE any other post-init work so it + * begins heartbeating immediately. The thread freezes for 3x the timeout, + * giving the daemon time to detect the hang and ship the envelope. We wait + * for it here so main does not exit before the transport has flushed. + * NOTE: this mode is intentionally exclusive – do not combine with crash/ + * abort/etc. since those would terminate the process first. */ + if (has_arg(argc, argv, "app-hang")) { + HANDLE t = (HANDLE)_beginthreadex( + NULL, 0, app_hang_demo_thread, NULL, 0, NULL); + if (t) { + WaitForSingleObject(t, INFINITE); + CloseHandle(t); + } + sentry_close(); + return EXIT_SUCCESS; + } +#endif + if (has_arg(argc, argv, "user-consent-revoke")) { sentry_user_consent_revoke(); } diff --git a/include/sentry.h b/include/sentry.h index dc68aab2a..a74e71ecc 100644 --- a/include/sentry.h +++ b/include/sentry.h @@ -1678,6 +1678,48 @@ SENTRY_EXPERIMENTAL_API void sentry_options_set_attach_session_replay( SENTRY_EXPERIMENTAL_API void sentry_options_set_session_replay_duration( sentry_options_t *opts, uint32_t duration_ms); +/** + * Enable app-hang detection in the native crash backend. + * + * When enabled, the out-of-process daemon monitors a designated thread in the + * host via a shared-memory heartbeat. If the heartbeat goes stale for longer + * than the configured timeout, the daemon walks the thread's stack remotely and + * emits an `ApplicationNotResponding` event. The host process keeps running. + * + * Off by default. This setting only has an effect when using the `native` + * backend. In this initial release the feature is Windows-only; the call is a + * silent no-op on other platforms. + */ +SENTRY_EXPERIMENTAL_API void sentry_options_set_app_hang_enabled( + sentry_options_t *opts, int enabled); + +/** + * Sets the heartbeat-staleness threshold (in milliseconds) used by the + * app-hang detector. Default 5000 ms. + * + * Read by the daemon once at startup; changes after `sentry_init` have no + * effect. + */ +SENTRY_EXPERIMENTAL_API void sentry_options_set_app_hang_timeout_ms( + sentry_options_t *opts, uint64_t timeout_ms); + +/** + * Signal that the calling thread is alive. + * + * Call this from the thread you want monitored (typically the main / game + * thread). The first call latches the calling thread's id as the target; + * subsequent calls from the same thread refresh the heartbeat timestamp. Calls + * from any other thread are dropped — so a stray heartbeat from a worker + * thread cannot mask a frozen main thread. + * + * Cost: approximately one system call plus a relaxed 64-bit store. Safe to + * call from a per-frame hook in a game engine. + * + * No-op if app-hang detection is not enabled in options, or if the native + * backend is not active, or on non-Windows platforms. + */ +SENTRY_EXPERIMENTAL_API void sentry_app_hang_heartbeat(void); + /** * Sets the path to the crashpad handler if the crashpad backend is used. * diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 6086dbaaf..a29f7e88b 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -1,6 +1,8 @@ sentry_target_sources_cwd(sentry sentry_alloc.c sentry_alloc.h + sentry_app_hang.c + sentry_app_hang.h sentry_attachment.c sentry_attachment.h sentry_backend.c diff --git a/src/backends/native/sentry_crash_context.h b/src/backends/native/sentry_crash_context.h index fb9d0f1cb..c48da658f 100644 --- a/src/backends/native/sentry_crash_context.h +++ b/src/backends/native/sentry_crash_context.h @@ -321,6 +321,22 @@ typedef struct { uint32_t module_count; sentry_module_info_t modules[SENTRY_CRASH_MAX_MODULES]; + /* App-hang detection (Windows-only, native backend only). + * + * Sync model: + * - app_hang_enabled, app_hang_timeout_ms: written by host before daemon + * is signalled ready; read by daemon at startup. No further mutation. + * - app_hang_target_tid: latched once by host on first heartbeat (release + * store via InterlockedCompareExchange64). Daemon reads, never writes. + * - app_hang_last_heartbeat_ms: written on every heartbeat with a relaxed + * 64-bit store. Daemon reads with a relaxed load. Torn reads are not a + * correctness issue — the daemon compares against its remembered value + * from the previous tick. */ + bool app_hang_enabled; + uint64_t app_hang_timeout_ms; + volatile uint64_t app_hang_target_tid; + volatile uint64_t app_hang_last_heartbeat_ms; + } sentry_crash_context_t; // Shared memory size: calculated at compile-time based on actual struct size diff --git a/src/backends/native/sentry_crash_daemon.c b/src/backends/native/sentry_crash_daemon.c index b440aa1d2..cac74f227 100644 --- a/src/backends/native/sentry_crash_daemon.c +++ b/src/backends/native/sentry_crash_daemon.c @@ -2,6 +2,7 @@ #include "minidump/sentry_minidump_writer.h" #include "sentry_alloc.h" +#include "sentry_app_hang.h" #include "sentry_attachment.h" #include "sentry_core.h" #include "sentry_crash_ipc.h" @@ -1942,15 +1943,66 @@ build_stacktrace_from_ctx(const sentry_crash_context_t *ctx) return build_stacktrace_for_thread(ctx, SIZE_MAX); } +/* Describes which kind of native event we are building. `s_crash_kind` + * drives the crash path; `s_app_hang_kind` drives the app-hang flow on + * Windows. + * + * Invariant: if `include_signal_meta` is true, `exception_type` must be NULL + * (the signal-derived path). Setting an override type AND requesting signal + * metadata is incoherent — there is no signal in the override case. + */ +typedef struct { + /* Override exception `type` string. NULL = derive from the crash signal + * (e.g. "SIGSEGV" on Unix, "EXCEPTION" on Windows). */ + const char *exception_type; + /* Override exception `value` string. Used only when `exception_type` is + * non-NULL; ignored otherwise. */ + const char *exception_value; + /* `mechanism.type` JSON value, e.g. "signalhandler" or "AppHang". */ + const char *mechanism_type; + /* `mechanism.handled` JSON value. false for fatal crashes, true for + * recoverable events like app hangs. */ + bool mechanism_handled; + /* Event `level` JSON value, e.g. "fatal" or "error". */ + const char *level; + /* Attach `mechanism.meta.signal` payload? Must be false when + * `exception_type` is non-NULL (see struct invariant). */ + bool include_signal_meta; +} sentry_native_event_kind_t; + +/* Crash-path event kind: signal-derived type/value, fatal level, unhandled. */ +static const sentry_native_event_kind_t s_crash_kind = { + .exception_type = NULL, + .exception_value = NULL, + .mechanism_type = "signalhandler", + .mechanism_handled = false, + .level = "fatal", + .include_signal_meta = true, +}; + +#if defined(SENTRY_PLATFORM_WINDOWS) +/* App-hang event kind: ANR-style, handled, error level. The per-event + * `exception_value` (freeze duration message) is filled in at capture time. */ +static const sentry_native_event_kind_t s_app_hang_kind = { + .exception_type = "ApplicationNotResponding", + .exception_value = NULL, /* filled in per-event below */ + .mechanism_type = "AppHang", + .mechanism_handled = true, + .level = "error", + .include_signal_meta = false, +}; +#endif + /** * Build native crash event with exception, mechanism, and debug_meta * * @param ctx Crash context * @param event_file_path Path to event file from parent process + * @param kind Event-kind descriptor controlling exception/mechanism/level */ static sentry_value_t -build_native_crash_event( - const sentry_crash_context_t *ctx, const char *event_file_path) +build_native_crash_event(const sentry_crash_context_t *ctx, + const char *event_file_path, const sentry_native_event_kind_t *kind) { // Read base event from parent's file sentry_value_t event = sentry_value_new_null(); @@ -1976,50 +2028,69 @@ build_native_crash_event( sentry_value_set_by_key( event, "platform", sentry_value_new_string("native")); - // Set level to fatal - sentry_value_set_by_key(event, "level", sentry_value_new_string("fatal")); + // Set level (varies by event kind: "fatal" for crash, "error" for app hang) + sentry_value_set_by_key( + event, "level", sentry_value_new_string(kind->level)); // Build exception - const char *signal_name = "UNKNOWN"; + /* Function-scope so exc_value (which may point into this buffer) remains + * valid after the `else` block below. Previously declared inside the + * else: out of scope by the time exc_value is read -> UB per C99 6.2.4. */ + char crash_value_buf[128]; + const char *exc_type; + const char *exc_value; + + if (kind->exception_type) { + exc_type = kind->exception_type; + exc_value = kind->exception_value ? kind->exception_value : ""; + } else { + const char *signal_name; #if defined(SENTRY_PLATFORM_UNIX) - int signal_number = ctx->platform.signum; - signal_name = get_signal_name(signal_number); + signal_name = get_signal_name(ctx->platform.signum); #elif defined(SENTRY_PLATFORM_WINDOWS) - // Exception code is used directly below as unsigned - signal_name = "EXCEPTION"; + signal_name = "EXCEPTION"; +#else + signal_name = "UNKNOWN"; #endif + exc_type = signal_name; + snprintf(crash_value_buf, sizeof(crash_value_buf), "Fatal crash: %s", + signal_name); + exc_value = crash_value_buf; + } sentry_value_t exc = sentry_value_new_object(); - sentry_value_set_by_key(exc, "type", sentry_value_new_string(signal_name)); - - char value_buf[128]; - snprintf(value_buf, sizeof(value_buf), "Fatal crash: %s", signal_name); - sentry_value_set_by_key(exc, "value", sentry_value_new_string(value_buf)); + sentry_value_set_by_key(exc, "type", sentry_value_new_string(exc_type)); + sentry_value_set_by_key(exc, "value", sentry_value_new_string(exc_value)); // Add mechanism sentry_value_t mechanism = sentry_value_new_object(); - sentry_value_set_by_key( - mechanism, "type", sentry_value_new_string("signalhandler")); + sentry_value_set_by_key(mechanism, "type", + sentry_value_new_string(kind->mechanism_type)); sentry_value_set_by_key( mechanism, "synthetic", sentry_value_new_bool(true)); - sentry_value_set_by_key(mechanism, "handled", sentry_value_new_bool(false)); + sentry_value_set_by_key(mechanism, "handled", + sentry_value_new_bool(kind->mechanism_handled)); - // Add signal metadata - sentry_value_t meta = sentry_value_new_object(); - sentry_value_t signal_info = sentry_value_new_object(); + // Add signal metadata (only relevant for signal-handler/crash events) + if (kind->include_signal_meta) { + sentry_value_t meta = sentry_value_new_object(); + sentry_value_t signal_info = sentry_value_new_object(); #if defined(SENTRY_PLATFORM_WINDOWS) - // Windows exception codes are unsigned 32-bit values (e.g., 0xC0000005) - // Use uint64 to preserve the unsigned value for the symbolicator - sentry_value_set_by_key(signal_info, "number", - sentry_value_new_uint64((uint64_t)ctx->platform.exception_code)); + // Windows exception codes are unsigned 32-bit values (e.g., 0xC0000005) + // Use uint64 to preserve the unsigned value for the symbolicator + sentry_value_set_by_key(signal_info, "number", + sentry_value_new_uint64((uint64_t)ctx->platform.exception_code)); #else - sentry_value_set_by_key( - signal_info, "number", sentry_value_new_int32(signal_number)); + sentry_value_set_by_key(signal_info, "number", + sentry_value_new_int32(ctx->platform.signum)); #endif - sentry_value_set_by_key( - signal_info, "name", sentry_value_new_string(signal_name)); - sentry_value_set_by_key(meta, "signal", signal_info); - sentry_value_set_by_key(mechanism, "meta", meta); + /* By the struct invariant, include_signal_meta is only true when + * exception_type is NULL, so exc_type holds the signal name here. */ + sentry_value_set_by_key( + signal_info, "name", sentry_value_new_string(exc_type)); + sentry_value_set_by_key(meta, "signal", signal_info); + sentry_value_set_by_key(mechanism, "meta", meta); + } sentry_value_set_by_key(exc, "mechanism", mechanism); @@ -2296,12 +2367,13 @@ static bool write_envelope_with_native_stacktrace(const sentry_options_t *options, const char *envelope_path, const sentry_crash_context_t *ctx, const char *event_file_path, const char *minidump_path, - sentry_path_t *run_folder) + sentry_path_t *run_folder, const sentry_native_event_kind_t *kind) { // Build native crash event (always include threads with names) SENTRY_DEBUGF("write_envelope_with_native_stacktrace: minidump_path=%s", minidump_path ? minidump_path : "(null)"); - sentry_value_t event = build_native_crash_event(ctx, event_file_path); + sentry_value_t event + = build_native_crash_event(ctx, event_file_path, kind); // Serialize event to JSON char *event_json = sentry_value_to_json(event); @@ -2541,6 +2613,126 @@ write_envelope_with_native_stacktrace(const sentry_options_t *options, return true; } +#if defined(SENTRY_PLATFORM_WINDOWS) +/** + * App-hang capture path (Windows). Suspends the latched target thread just long + * enough to snapshot its CONTEXT, then builds and submits an envelope using the + * same native-stacktrace path as crashes (with an AppHang event kind). + */ +static void +capture_and_send_app_hang(const sentry_options_t *options, + sentry_crash_ipc_t *ipc, uint64_t freeze_ms) +{ + /* NOTE (race, experimental Windows-only first cut): This function reads + * and mutates shmem fields (platform.context, threads[0], crashed_tid, + * num_threads) that are also written by the host's signal handler on a + * real crash. The daemon's main loop is single-threaded and the crash + * event has wait-priority 0, so we will not enter this function with a + * pending crash notification already signalled. The remaining narrow + * window is: the host crashes WHILE this function is running, the host's + * signal handler writes to shmem mid-capture, and we then send a + * partially-overwritten event. We accept this risk for the initial + * Windows-only implementation; mitigation (state check at entry / pause + * via an additional shmem flag) is tracked as follow-up work. */ + sentry_crash_context_t *ctx = ipc->shmem; + + /* Populate modules once per session if not already done. */ + if (ctx->module_count == 0) { + capture_modules_from_process(ctx); + } + + DWORD target_tid = (DWORD)ctx->app_hang_target_tid; + + /* Suspend the target thread and capture its CONTEXT. */ + HANDLE hThread = OpenThread(THREAD_GET_CONTEXT | THREAD_SUSPEND_RESUME + | THREAD_QUERY_INFORMATION, + FALSE, target_tid); + if (!hThread) { + SENTRY_DEBUGF("app-hang: OpenThread(%lu) failed: %lu", + (unsigned long)target_tid, GetLastError()); + return; + } + + DWORD suspend_count = SuspendThread(hThread); + if (suspend_count == (DWORD)-1) { + SENTRY_DEBUGF("app-hang: SuspendThread(%lu) failed: %lu", + (unsigned long)target_tid, GetLastError()); + CloseHandle(hThread); + return; + } + + CONTEXT thread_ctx; + memset(&thread_ctx, 0, sizeof(thread_ctx)); + thread_ctx.ContextFlags = CONTEXT_FULL; + if (!GetThreadContext(hThread, &thread_ctx)) { + SENTRY_DEBUGF( + "app-hang: GetThreadContext failed: %lu", GetLastError()); + ResumeThread(hThread); + CloseHandle(hThread); + return; + } + + /* Resume immediately; we have the snapshot we need. */ + ResumeThread(hThread); + CloseHandle(hThread); + + /* Place the snapshot in the "crashed thread" slot of the context so the + * existing event builder pulls a stacktrace out for the exception + * payload and the threads block. + * + * IMPORTANT: build_stacktrace_from_ctx() calls build_stacktrace_for_thread + * with thread_idx == SIZE_MAX, which on Windows reads from + * ctx->platform.context (NOT threads[0].context). We must populate both + * so the exception stacktrace uses the captured CONTEXT instead of an + * all-zero one (PC=0 -> StackWalk64 produces no frames). */ + ctx->platform.context = thread_ctx; + ctx->crashed_tid = target_tid; + ctx->platform.num_threads = 1; + ctx->platform.threads[0].thread_id = target_tid; + ctx->platform.threads[0].context = thread_ctx; + ctx->platform.threads[0].name[0] = '\0'; + + /* Build the per-event value description with the freeze duration. */ + char value_buf[128]; + snprintf(value_buf, sizeof(value_buf), + "App hang detected. Main thread blocked for %llu ms.", + (unsigned long long)freeze_ms); + sentry_native_event_kind_t kind = s_app_hang_kind; + kind.exception_value = value_buf; + + /* Build an envelope path next to the crash one. */ + char envelope_path[SENTRY_CRASH_MAX_PATH]; + int path_len = snprintf(envelope_path, sizeof(envelope_path), + "%s/sentry-app-hang-%lu-%llu.env", ctx->database_path, + (unsigned long)ctx->crashed_pid, + (unsigned long long)ctx->app_hang_last_heartbeat_ms); + + if (path_len < 0 || path_len >= (int)sizeof(envelope_path)) { + SENTRY_WARN("app-hang: envelope path truncated or invalid"); + return; + } + + bool ok = write_envelope_with_native_stacktrace(options, envelope_path, + ctx, /*event_file_path=*/NULL, /*minidump_path=*/NULL, + /*run_folder=*/NULL, &kind); + if (!ok) { + SENTRY_WARN("app-hang: failed to write envelope"); + return; + } + + /* Read envelope from disk and hand to transport. */ + sentry_path_t *env_path = sentry__path_from_str(envelope_path); + if (env_path) { + sentry_envelope_t *envelope = sentry__envelope_from_path(env_path); + if (envelope && options && options->transport) { + sentry__capture_envelope(options->transport, envelope, options); + } + sentry__path_remove(env_path); + sentry__path_free(env_path); + } +} +#endif /* SENTRY_PLATFORM_WINDOWS */ + /** * Manually write a Sentry envelope with event, minidump, and attachments. * Format matches what Crashpad's Envelope class does. @@ -3027,7 +3219,8 @@ sentry__process_crash(const sentry_options_t *options, sentry_crash_ipc_t *ipc) minidump_path[0] ? minidump_path : "NULL"); envelope_written = write_envelope_with_native_stacktrace(options, envelope_path, ctx, event_path, - minidump_path[0] ? minidump_path : NULL, run_folder); + minidump_path[0] ? minidump_path : NULL, run_folder, + &s_crash_kind); } else { // Mode 0 (MINIDUMP only) SENTRY_DEBUG("Writing envelope with minidump"); @@ -3458,8 +3651,109 @@ sentry__crash_daemon_main(pid_t app_pid, uint64_t app_tid, HANDLE event_handle, SENTRY_DEBUG("Entering main loop"); +#if defined(SENTRY_PLATFORM_WINDOWS) + /* Pre-populate crashed_pid so the app-hang path can OpenProcess(host). + * Both capture_modules_from_process and walk_stack_with_dbghelp use + * ctx->crashed_pid, which is otherwise only set by the host's crash + * handler. The crash handler will re-set this from the host context if + * a real crash occurs; that's a no-op (same value). */ + ipc->shmem->crashed_pid = (pid_t)app_pid; +#endif + // Daemon main loop bool crash_processed = false; + +#if defined(SENTRY_PLATFORM_WINDOWS) + /* App-hang detector state. Daemon-local; the daemon caches the timeout + * here so it does not race the host on subsequent shmem mutations. */ + const bool app_hang_enabled = ipc->shmem->app_hang_enabled; + const uint64_t app_hang_timeout_ms = ipc->shmem->app_hang_timeout_ms; + uint64_t last_fired_hb = 0; + int consecutive_stale_ticks = 0; + + HANDLE timer = NULL; + if (app_hang_enabled) { + timer = CreateWaitableTimer(NULL, FALSE, NULL); + if (!timer) { + SENTRY_WARNF("app-hang: CreateWaitableTimer failed: %lu", + GetLastError()); + } else { + /* Negative dueTime: relative; 100ns units; -5_000_000 = 500 ms. + * Period 500 ms. */ + LARGE_INTEGER due_time; + due_time.QuadPart = -5000000LL; + if (!SetWaitableTimer( + timer, &due_time, 500, NULL, NULL, FALSE)) { + SENTRY_WARNF("app-hang: SetWaitableTimer failed: %lu", + GetLastError()); + CloseHandle(timer); + timer = NULL; + } + } + } + + /* Wait set: index 0 = crash event, index 1 = timer (optional). */ + HANDLE wait_handles[2]; + DWORD wait_count = 1; + wait_handles[0] = ipc->event_handle; + if (timer) { + wait_handles[1] = timer; + wait_count = 2; + } + + while (true) { + DWORD result = WaitForMultipleObjects(wait_count, wait_handles, + FALSE, SENTRY_CRASH_DAEMON_WAIT_TIMEOUT_MS); + + if (result == WAIT_OBJECT_0) { + /* Crash notification — identical logic to the cross-platform + * path below. */ + SENTRY_DEBUG("Event signaled, checking crash state"); + long state = sentry__atomic_fetch(&ipc->shmem->state); + if (state == SENTRY_CRASH_STATE_CRASHED && !crash_processed) { + SENTRY_DEBUG("Crash notification received, processing"); + sentry__process_crash(options, ipc); + crash_processed = true; + SENTRY_DEBUG("Crash processed, daemon exiting"); + break; + } + SENTRY_DEBUG("Spurious notification or already processed"); + } else if (timer && result == WAIT_OBJECT_0 + 1) { + /* Timer tick — evaluate app-hang state with strike accumulation. */ + sentry_crash_context_t *shctx = ipc->shmem; + const uint64_t hb = shctx->app_hang_last_heartbeat_ms; + const uint64_t now = sentry__app_hang_now_ms(); + int new_strikes = 0; + sentry_app_hang_decision_t d = sentry__app_hang_decide( + app_hang_enabled, hb, now, app_hang_timeout_ms, + last_fired_hb, consecutive_stale_ticks, &new_strikes); + consecutive_stale_ticks = new_strikes; + if (d == SENTRY_APP_HANG_FIRE) { + capture_and_send_app_hang(options, ipc, now - hb); + /* Always advance last_fired_hb, even if capture failed — + * prevents a retry storm against a wedged thread. The next + * heartbeat advance re-arms detection naturally. */ + last_fired_hb = hb; + } + } else if (result == WAIT_TIMEOUT) { + /* Fall through to parent-liveness check below. */ + } else { + SENTRY_WARNF("daemon wait failed: %lu err=%lu", result, + GetLastError()); + break; + } + + if (!crash_processed && !is_parent_alive(ipc->parent_handle)) { + SENTRY_DEBUG("Parent process exited without crash"); + break; + } + } + + if (timer) { + CancelWaitableTimer(timer); + CloseHandle(timer); + } +#else while (true) { // Wait for crash notification (with timeout to check parent health) bool wait_result @@ -3492,6 +3786,7 @@ sentry__crash_daemon_main(pid_t app_pid, uint64_t app_tid, HANDLE event_handle, break; } } +#endif SENTRY_DEBUG("Daemon exiting"); diff --git a/src/backends/sentry_backend_native.c b/src/backends/sentry_backend_native.c index fe1af9322..cc98cde60 100644 --- a/src/backends/sentry_backend_native.c +++ b/src/backends/sentry_backend_native.c @@ -20,6 +20,7 @@ #include "sentry_alloc.h" #include "sentry_backend.h" #include "sentry_core.h" +#include "sentry_app_hang.h" #include "sentry_crash_context.h" #include "sentry_crash_daemon.h" #include "sentry_crash_handler.h" @@ -307,6 +308,18 @@ native_backend_startup( sentry__atomic_store( &ctx->user_consent, sentry__atomic_fetch(&options->run->user_consent)); + /* App-hang detection configuration. Written before the daemon is + * signalled ready, so the daemon sees consistent values at startup. + * + * NOTE: sentry__app_hang_set_shmem(ctx) is intentionally deferred until + * just before the function's successful `return 0;` below. If a later + * fallible call fails (e.g., daemon spawn) we free the IPC; registering + * the global pointer early would leave it dangling. */ + ctx->app_hang_enabled = options->app_hang_enabled; + ctx->app_hang_timeout_ms = options->app_hang_timeout_ms; + ctx->app_hang_target_tid = 0; + ctx->app_hang_last_heartbeat_ms = 0; + // Set up event and breadcrumb paths sentry_path_t *run_path = options->run->run_path; sentry_path_t *db_path = options->database_path; @@ -547,6 +560,14 @@ native_backend_startup( } #endif +#if defined(SENTRY_PLATFORM_WINDOWS) && !defined(SENTRY_PLATFORM_XBOX) + /* Make this shmem block visible to sentry_app_hang_heartbeat now that + * all fallible startup steps have succeeded. If any earlier step had + * failed we would have freed the IPC and returned without ever + * registering — keeping g_app_hang_shmem == NULL. */ + sentry__app_hang_set_shmem(ctx); +#endif + SENTRY_DEBUG("native backend started successfully"); return 0; } @@ -662,6 +683,11 @@ native_backend_shutdown(sentry_backend_t *backend) // Cleanup IPC if (state->ipc) { +#if defined(SENTRY_PLATFORM_WINDOWS) && !defined(SENTRY_PLATFORM_XBOX) + /* Clear the global heartbeat pointer before the shmem backing it goes + * away, so sentry_app_hang_heartbeat() cannot write to freed memory. */ + sentry__app_hang_set_shmem(NULL); +#endif sentry__crash_ipc_free(state->ipc); state->ipc = NULL; // Prevent use-after-free } diff --git a/src/sentry_app_hang.c b/src/sentry_app_hang.c new file mode 100644 index 000000000..65e7bdb5d --- /dev/null +++ b/src/sentry_app_hang.c @@ -0,0 +1,123 @@ +#include "sentry_app_hang.h" + +#include "sentry_options.h" + +#if defined(SENTRY_PLATFORM_WINDOWS) && !defined(SENTRY_PLATFORM_XBOX) \ + && defined(SENTRY_BACKEND_NATIVE) +# include +#endif + +sentry_app_hang_decision_t +sentry__app_hang_decide(bool enabled, uint64_t hb, uint64_t now, + uint64_t timeout_ms, uint64_t last_fired_hb, + int consecutive_stale_ticks, int *out_consecutive_stale_ticks) +{ + /* Fresh or disabled paths reset the counter. */ + if (!enabled || hb == 0) { + *out_consecutive_stale_ticks = 0; + return SENTRY_APP_HANG_NO_ACTION; + } + if (now < hb) { + /* Torn shmem read (possible on x86 for a non-atomic 64-bit load). + * Treat as fresh — daemon will see the real value on the next tick. */ + *out_consecutive_stale_ticks = 0; + return SENTRY_APP_HANG_NO_ACTION; + } + if ((now - hb) < timeout_ms) { + *out_consecutive_stale_ticks = 0; + return SENTRY_APP_HANG_NO_ACTION; + } + if (hb == last_fired_hb) { + /* Already fired for this freeze. Stay quiet and hold the counter at + * zero so we re-arm cleanly once the host heartbeats again. */ + *out_consecutive_stale_ticks = 0; + return SENTRY_APP_HANG_NO_ACTION; + } + /* Stale and not in cooldown — accumulate a strike. */ + int new_count = consecutive_stale_ticks + 1; + *out_consecutive_stale_ticks = new_count; + if (new_count >= SENTRY_APP_HANG_STRIKES_REQUIRED) { + return SENTRY_APP_HANG_FIRE; + } + return SENTRY_APP_HANG_NO_ACTION; +} + +/* Public setters (always compiled, no platform guard — they only mutate the + * options struct). */ +void +sentry_options_set_app_hang_enabled(sentry_options_t *opts, int enabled) +{ + if (opts) { + opts->app_hang_enabled = !!enabled; + } +} + +void +sentry_options_set_app_hang_timeout_ms( + sentry_options_t *opts, uint64_t timeout_ms) +{ + if (opts) { + opts->app_hang_timeout_ms = timeout_ms; + } +} + +#if defined(SENTRY_PLATFORM_WINDOWS) && !defined(SENTRY_PLATFORM_XBOX) \ + && defined(SENTRY_BACKEND_NATIVE) + +static sentry_crash_context_t *volatile g_app_hang_shmem = NULL; + +uint64_t +sentry__app_hang_now_ms(void) +{ + ULONGLONG ticks_100ns = 0; + /* QueryUnbiasedInterruptTime is documented signal/SEH/wait-free; the + * same source is read on both sides of the IPC. */ + if (!QueryUnbiasedInterruptTime(&ticks_100ns)) { + return 0; + } + return (uint64_t)(ticks_100ns / 10000ULL); +} + +void +sentry__app_hang_set_shmem(sentry_crash_context_t *ctx) +{ + g_app_hang_shmem = ctx; +} + +void +sentry_app_hang_heartbeat(void) +{ + sentry_crash_context_t *ctx = g_app_hang_shmem; + if (!ctx || !ctx->app_hang_enabled) { + return; + } + + DWORD current_tid = GetCurrentThreadId(); + LONG64 latched = (LONG64)ctx->app_hang_target_tid; + if (latched == 0) { + /* Try to latch this thread as the target. If another thread races + * us, the loser is dropped. */ + LONG64 prev = InterlockedCompareExchange64( + (LONG64 volatile *)&ctx->app_hang_target_tid, + (LONG64)(uint64_t)current_tid, 0); + if (prev != 0 && prev != (LONG64)(uint64_t)current_tid) { + return; + } + } else if ((DWORD)latched != current_tid) { + return; + } + + /* Relaxed 64-bit store. On x64 this is a single mov. On x86 the value + * may tear, but that is OK — see the comment in sentry_crash_context.h. */ + ctx->app_hang_last_heartbeat_ms = sentry__app_hang_now_ms(); +} + +#else /* non-Windows or Xbox */ + +void +sentry_app_hang_heartbeat(void) +{ + /* No-op on non-Windows targets in this initial cut. */ +} + +#endif diff --git a/src/sentry_app_hang.h b/src/sentry_app_hang.h new file mode 100644 index 000000000..f146280ae --- /dev/null +++ b/src/sentry_app_hang.h @@ -0,0 +1,72 @@ +#ifndef SENTRY_APP_HANG_H_INCLUDED +#define SENTRY_APP_HANG_H_INCLUDED + +#include "sentry_boot.h" + +#include +#include + +#if defined(SENTRY_PLATFORM_WINDOWS) && !defined(SENTRY_PLATFORM_XBOX) \ + && defined(SENTRY_BACKEND_NATIVE) +# include "sentry_crash_context.h" +#endif + +/** + * Decision returned by the pure decision function. Kept tiny so it can be + * exercised in unit tests without involving the daemon or shared memory. + */ +typedef enum { + SENTRY_APP_HANG_NO_ACTION = 0, + SENTRY_APP_HANG_FIRE = 1, +} sentry_app_hang_decision_t; + +/* Number of consecutive timer ticks the daemon must observe a stale + * heartbeat before firing. Smooths over brief hiccups (GC pauses, swap, OS + * scheduler quanta) at the cost of ~SENTRY_APP_HANG_STRIKES_REQUIRED-1 + * extra poll periods of detection latency. */ +#define SENTRY_APP_HANG_STRIKES_REQUIRED 3 + +/** + * Pure function: should we fire an app-hang event right now? + * + * - `enabled`: the host has app-hang detection turned on. + * - `hb`: last heartbeat timestamp (host clock; 0 means + * "never heartbeated yet"). + * - `now`: daemon's current observation of the same clock. + * - `timeout_ms`: staleness threshold. + * - `last_fired_hb`: the `hb` value the daemon last fired for; used + * as cooldown so a sustained freeze fires once. + * - `consecutive_stale_ticks`: caller-tracked count of consecutive ticks on + * which the heartbeat was observed stale. + * - `out_consecutive_stale_ticks` (out): updated counter the caller should + * store. 0 if reset, otherwise incremented. + * + * Returns SENTRY_APP_HANG_FIRE iff: enabled, hb != 0, (now - hb) >= timeout_ms, + * hb != last_fired_hb, AND the updated stale-tick counter reaches + * SENTRY_APP_HANG_STRIKES_REQUIRED. + */ +sentry_app_hang_decision_t sentry__app_hang_decide(bool enabled, uint64_t hb, + uint64_t now, uint64_t timeout_ms, uint64_t last_fired_hb, + int consecutive_stale_ticks, int *out_consecutive_stale_ticks); + +#if defined(SENTRY_PLATFORM_WINDOWS) && !defined(SENTRY_PLATFORM_XBOX) \ + && defined(SENTRY_BACKEND_NATIVE) +/** + * Called from the native backend startup path. Stores `ctx` so that + * subsequent `sentry_app_hang_heartbeat()` calls have somewhere to write. + * Passing NULL clears the registration on backend shutdown. + * + * The pointer is stored in a `volatile` global; ordering with shmem field + * initialization is the caller's responsibility (the backend writes options + * into shmem before calling this). + */ +void sentry__app_hang_set_shmem(sentry_crash_context_t *ctx); + +/** + * Return a millisecond-resolution unbiased timestamp shared between host and + * daemon. Exposed for the daemon to call as well. + */ +uint64_t sentry__app_hang_now_ms(void); +#endif + +#endif diff --git a/src/sentry_options.c b/src/sentry_options.c index 38d7beed8..79fa9ed47 100644 --- a/src/sentry_options.c +++ b/src/sentry_options.c @@ -67,6 +67,8 @@ sentry_options_new(void) opts->propagate_traceparent = false; opts->strict_trace_continuation = false; opts->crashpad_limit_stack_capture_to_sp = false; + opts->app_hang_enabled = false; + opts->app_hang_timeout_ms = 5000; opts->enable_metrics = true; opts->enable_logs = true; opts->cache_keep = SENTRY_CACHE_KEEP_NONE; diff --git a/src/sentry_options.h b/src/sentry_options.h index 39e33dc47..1711eda6f 100644 --- a/src/sentry_options.h +++ b/src/sentry_options.h @@ -50,6 +50,8 @@ struct sentry_options_s { bool propagate_traceparent; bool strict_trace_continuation; bool crashpad_limit_stack_capture_to_sp; + bool app_hang_enabled; + uint64_t app_hang_timeout_ms; sentry_cache_keep_t cache_keep; time_t cache_max_age; diff --git a/tests/test_integration_native.py b/tests/test_integration_native.py index 6d0c697b7..6ba519026 100644 --- a/tests/test_integration_native.py +++ b/tests/test_integration_native.py @@ -674,3 +674,46 @@ def test_native_cache_keep(cmake, cache_keep, unreachable_dsn): # of a file. time.sleep(2) assert len(list(cache_dir.glob("*.envelope"))) == 0 + + +@pytest.mark.skipif( + sys.platform != "win32", + reason="app-hang detection is Windows-only in this release", +) +def test_native_app_hang(cmake, httpserver): + """App hang detection emits exactly one ApplicationNotResponding event.""" + tmp_path = cmake(["sentry_example"], {"SENTRY_BACKEND": "native"}) + + httpserver.expect_oneshot_request("/api/123456/envelope/").respond_with_data( + "OK" + ) + + with httpserver.wait(timeout=20) as waiting: + # The example's app-hang mode heartbeats for 500 ms, then freezes for + # 3000 ms (3x the 1000 ms timeout). The daemon polls every 500 ms. + # `run` (not `run_crash`) because the example exits cleanly after the + # hang demonstration — `run_crash` expects abnormal exit. + run( + tmp_path, + "sentry_example", + ["log", "app-hang"], + env=dict(os.environ, SENTRY_DSN=make_dsn(httpserver)), + ) + assert waiting.result + + envelope = Envelope.deserialize(httpserver.log[0][0].get_data()) + event = envelope.get_event() + assert event is not None + exc = event["exception"]["values"][0] + assert exc["type"] == "ApplicationNotResponding" + assert exc["mechanism"]["type"] == "AppHang" + assert exc["mechanism"]["handled"] is True + assert exc["mechanism"]["synthetic"] is True + assert "stacktrace" in exc + frames = exc["stacktrace"]["frames"] + assert isinstance(frames, list) + assert len(frames) > 0, "stacktrace is empty — capture path may be broken" + # At least one frame should have a non-zero instruction address. + assert any( + int(f.get("instruction_addr", "0"), 16) > 0 for f in frames + ), "no frame has a non-zero instruction_addr" diff --git a/tests/unit/CMakeLists.txt b/tests/unit/CMakeLists.txt index a143fd540..98bdf747c 100644 --- a/tests/unit/CMakeLists.txt +++ b/tests/unit/CMakeLists.txt @@ -21,6 +21,7 @@ add_executable(sentry_test_unit ${SENTRY_SOURCES} main.c sentry_testsupport.h + test_app_hang.c test_attachments.c test_basic.c test_cache.c diff --git a/tests/unit/test_app_hang.c b/tests/unit/test_app_hang.c new file mode 100644 index 000000000..2ab96f649 --- /dev/null +++ b/tests/unit/test_app_hang.c @@ -0,0 +1,153 @@ +#include "sentry_app_hang.h" +#include "sentry_testsupport.h" + +#include + +SENTRY_TEST(app_hang_decide_disabled_returns_no_action) +{ + int new_count = 99; + sentry_app_hang_decision_t d = sentry__app_hang_decide( + /*enabled=*/false, /*hb=*/100, /*now=*/10000, + /*timeout_ms=*/1000, /*last_fired_hb=*/0, + /*consecutive_stale_ticks=*/0, &new_count); + TEST_CHECK_INT_EQUAL(d, SENTRY_APP_HANG_NO_ACTION); + /* Disabled path resets the counter. */ + TEST_CHECK_INT_EQUAL(new_count, 0); +} + +SENTRY_TEST(app_hang_decide_no_heartbeat_yet_returns_no_action) +{ + int new_count = 99; + sentry_app_hang_decision_t d = sentry__app_hang_decide( + /*enabled=*/true, /*hb=*/0, /*now=*/10000, + /*timeout_ms=*/1000, /*last_fired_hb=*/0, + /*consecutive_stale_ticks=*/0, &new_count); + TEST_CHECK_INT_EQUAL(d, SENTRY_APP_HANG_NO_ACTION); + TEST_CHECK_INT_EQUAL(new_count, 0); +} + +SENTRY_TEST(app_hang_decide_fresh_heartbeat_returns_no_action_and_resets) +{ + int new_count = 99; + sentry_app_hang_decision_t d = sentry__app_hang_decide( + /*enabled=*/true, /*hb=*/9500, /*now=*/10000, + /*timeout_ms=*/1000, /*last_fired_hb=*/0, + /*consecutive_stale_ticks=*/2, &new_count); + TEST_CHECK_INT_EQUAL(d, SENTRY_APP_HANG_NO_ACTION); + /* Fresh heartbeat resets the strike counter even mid-accumulation. */ + TEST_CHECK_INT_EQUAL(new_count, 0); +} + +SENTRY_TEST(app_hang_decide_first_stale_tick_increments_does_not_fire) +{ + int new_count = -1; + sentry_app_hang_decision_t d = sentry__app_hang_decide( + /*enabled=*/true, /*hb=*/5000, /*now=*/10000, + /*timeout_ms=*/1000, /*last_fired_hb=*/0, + /*consecutive_stale_ticks=*/0, &new_count); + TEST_CHECK_INT_EQUAL(d, SENTRY_APP_HANG_NO_ACTION); + TEST_CHECK_INT_EQUAL(new_count, 1); +} + +SENTRY_TEST(app_hang_decide_second_stale_tick_increments_does_not_fire) +{ + int new_count = -1; + sentry_app_hang_decision_t d = sentry__app_hang_decide( + /*enabled=*/true, /*hb=*/5000, /*now=*/10000, + /*timeout_ms=*/1000, /*last_fired_hb=*/0, + /*consecutive_stale_ticks=*/1, &new_count); + TEST_CHECK_INT_EQUAL(d, SENTRY_APP_HANG_NO_ACTION); + TEST_CHECK_INT_EQUAL(new_count, 2); +} + +SENTRY_TEST(app_hang_decide_third_stale_tick_fires) +{ + int new_count = -1; + sentry_app_hang_decision_t d = sentry__app_hang_decide( + /*enabled=*/true, /*hb=*/5000, /*now=*/10000, + /*timeout_ms=*/1000, /*last_fired_hb=*/0, + /*consecutive_stale_ticks=*/2, &new_count); + TEST_CHECK_INT_EQUAL(d, SENTRY_APP_HANG_FIRE); + TEST_CHECK_INT_EQUAL(new_count, 3); +} + +SENTRY_TEST(app_hang_decide_brief_hiccup_resets_strike_count) +{ + /* Simulate: 2 stale ticks, then a fresh heartbeat (counter resets), + * then 1 stale tick → must NOT fire because we lost our accumulated + * strikes when the heartbeat refreshed. */ + int after_hiccup = -1; + sentry_app_hang_decision_t d = sentry__app_hang_decide( + /*enabled=*/true, /*hb=*/9800, /*now=*/10000, + /*timeout_ms=*/1000, /*last_fired_hb=*/0, + /*consecutive_stale_ticks=*/2, &after_hiccup); + TEST_CHECK_INT_EQUAL(d, SENTRY_APP_HANG_NO_ACTION); + TEST_CHECK_INT_EQUAL(after_hiccup, 0); + + int after_one_stale = -1; + d = sentry__app_hang_decide(/*enabled=*/true, /*hb=*/9800, + /*now=*/11000, /*timeout_ms=*/1000, /*last_fired_hb=*/0, + /*consecutive_stale_ticks=*/after_hiccup, &after_one_stale); + TEST_CHECK_INT_EQUAL(d, SENTRY_APP_HANG_NO_ACTION); + TEST_CHECK_INT_EQUAL(after_one_stale, 1); +} + +SENTRY_TEST(app_hang_decide_cooldown_holds_when_hb_unchanged) +{ + /* Already fired for hb=5000. Subsequent ticks must NOT re-fire even + * if 100 more stale ticks accumulate. Counter held at 0. */ + int new_count = -1; + sentry_app_hang_decision_t d = sentry__app_hang_decide( + /*enabled=*/true, /*hb=*/5000, /*now=*/20000, + /*timeout_ms=*/1000, /*last_fired_hb=*/5000, + /*consecutive_stale_ticks=*/0, &new_count); + TEST_CHECK_INT_EQUAL(d, SENTRY_APP_HANG_NO_ACTION); + TEST_CHECK_INT_EQUAL(new_count, 0); +} + +SENTRY_TEST(app_hang_decide_re_arms_after_advance_then_stall) +{ + /* hb advanced past last_fired_hb → cooldown released; need 3 fresh + * strikes again. */ + int after_strike1 = -1; + sentry_app_hang_decision_t d = sentry__app_hang_decide( + /*enabled=*/true, /*hb=*/7000, /*now=*/12000, + /*timeout_ms=*/1000, /*last_fired_hb=*/5000, + /*consecutive_stale_ticks=*/0, &after_strike1); + TEST_CHECK_INT_EQUAL(d, SENTRY_APP_HANG_NO_ACTION); + TEST_CHECK_INT_EQUAL(after_strike1, 1); + + int after_strike3 = -1; + d = sentry__app_hang_decide(/*enabled=*/true, /*hb=*/7000, + /*now=*/12000, /*timeout_ms=*/1000, /*last_fired_hb=*/5000, + /*consecutive_stale_ticks=*/2, &after_strike3); + TEST_CHECK_INT_EQUAL(d, SENTRY_APP_HANG_FIRE); + TEST_CHECK_INT_EQUAL(after_strike3, 3); +} + +SENTRY_TEST(app_hang_decide_exact_timeout_boundary_with_third_strike_fires) +{ + /* now - hb == timeout_ms is still stale (>= semantics) AND the third + * strike has accumulated — fires. */ + int new_count = -1; + sentry_app_hang_decision_t d = sentry__app_hang_decide( + /*enabled=*/true, /*hb=*/9000, /*now=*/10000, + /*timeout_ms=*/1000, /*last_fired_hb=*/0, + /*consecutive_stale_ticks=*/2, &new_count); + TEST_CHECK_INT_EQUAL(d, SENTRY_APP_HANG_FIRE); + TEST_CHECK_INT_EQUAL(new_count, 3); +} + +SENTRY_TEST(app_hang_decide_torn_read_now_less_than_hb_resets) +{ + /* On x86 a non-atomic 64-bit load can tear, producing now < hb. The + * decision function treats this as fresh (no FIRE) and resets the + * strike counter so the next non-torn observation starts clean. */ + int new_count = 99; + sentry_app_hang_decision_t d = sentry__app_hang_decide( + /*enabled=*/true, /*hb=*/10000, /*now=*/5000, + /*timeout_ms=*/1000, /*last_fired_hb=*/0, + /*consecutive_stale_ticks=*/2, &new_count); + TEST_CHECK_INT_EQUAL(d, SENTRY_APP_HANG_NO_ACTION); + TEST_CHECK_INT_EQUAL(new_count, 0); +} diff --git a/tests/unit/tests.inc b/tests/unit/tests.inc index d673a13e9..2298653ad 100644 --- a/tests/unit/tests.inc +++ b/tests/unit/tests.inc @@ -1,3 +1,14 @@ +XX(app_hang_decide_brief_hiccup_resets_strike_count) +XX(app_hang_decide_cooldown_holds_when_hb_unchanged) +XX(app_hang_decide_disabled_returns_no_action) +XX(app_hang_decide_exact_timeout_boundary_with_third_strike_fires) +XX(app_hang_decide_first_stale_tick_increments_does_not_fire) +XX(app_hang_decide_fresh_heartbeat_returns_no_action_and_resets) +XX(app_hang_decide_no_heartbeat_yet_returns_no_action) +XX(app_hang_decide_re_arms_after_advance_then_stall) +XX(app_hang_decide_second_stale_tick_increments_does_not_fire) +XX(app_hang_decide_third_stale_tick_fires) +XX(app_hang_decide_torn_read_now_less_than_hb_resets) XX(assert_sdk_name) XX(assert_sdk_user_agent) XX(assert_sdk_version) From 1f2c69d9a658b36a7af5ca994bdecd686bc5b983 Mon Sep 17 00:00:00 2001 From: bitsandfoxes Date: Thu, 21 May 2026 17:15:10 +0200 Subject: [PATCH 2/2] more context --- src/backends/native/sentry_crash_daemon.c | 27 ++++++- src/backends/sentry_backend_native.c | 86 ++++++++++------------- 2 files changed, 63 insertions(+), 50 deletions(-) diff --git a/src/backends/native/sentry_crash_daemon.c b/src/backends/native/sentry_crash_daemon.c index cac74f227..04286e468 100644 --- a/src/backends/native/sentry_crash_daemon.c +++ b/src/backends/native/sentry_crash_daemon.c @@ -2712,9 +2712,32 @@ capture_and_send_app_hang(const sentry_options_t *options, return; } + /* Reuse the scope file the host keeps up-to-date via flush_scope so the + * app-hang event carries the same scope context as a crash event: + * full contexts (os/device/gpu/app/runtime/unity/...), user, tags, + * extra, fingerprint, release/dist/env, sdk metadata, and breadcrumbs. + * The base event JSON is at ctx->event_path; the sibling run folder + * holds the `__sentry-attachments` manifest, scope attachments, + * screenshot, and session replay — all pulled in by + * write_envelope_with_native_stacktrace when run_folder is non-NULL. */ + const char *event_file_path + = ctx->event_path[0] ? ctx->event_path : NULL; + sentry_path_t *run_folder = NULL; + if (event_file_path) { + sentry_path_t *ev_path = sentry__path_from_str(event_file_path); + if (ev_path) { + run_folder = sentry__path_dir(ev_path); + sentry__path_free(ev_path); + } + } + bool ok = write_envelope_with_native_stacktrace(options, envelope_path, - ctx, /*event_file_path=*/NULL, /*minidump_path=*/NULL, - /*run_folder=*/NULL, &kind); + ctx, event_file_path, /*minidump_path=*/NULL, run_folder, &kind); + + if (run_folder) { + sentry__path_free(run_folder); + } + if (!ok) { SENTRY_WARN("app-hang: failed to write envelope"); return; diff --git a/src/backends/sentry_backend_native.c b/src/backends/sentry_backend_native.c index cc98cde60..fb3efa88b 100644 --- a/src/backends/sentry_backend_native.c +++ b/src/backends/sentry_backend_native.c @@ -788,7 +788,7 @@ native_backend_write_attachments(const sentry_path_t *event_path) static void native_backend_flush_scope( - sentry_backend_t *backend, const sentry_options_t *UNUSED(options)) + sentry_backend_t *backend, const sentry_options_t *options) { native_backend_state_t *state = (native_backend_state_t *)backend->data; if (!state || !state->event_path) { @@ -803,63 +803,53 @@ native_backend_flush_scope( return; } - // Create event with current scope + // Build an event carrying the full scope. The daemon reads this file at + // crash time (and at app-hang time, on Windows) to populate the base + // event with everything the user has put on the scope: contexts (os, + // device, gpu, app, runtime, plus any SDK-specific entries such as the + // Unity context), user, tags, extra, fingerprint, release/dist/env, sdk + // metadata, and breadcrumbs. We delegate the actual copy to + // sentry__scope_apply_to_event so we stay in sync with the canonical + // scope-to-event mapping used by the in-process transport path. sentry_value_t event = sentry_value_new_object(); - sentry_value_set_by_key( - event, "level", sentry__value_new_level(SENTRY_LEVEL_FATAL)); - // Apply scope with contexts (includes OS, device info from Sentry) SENTRY_WITH_SCOPE (scope) { - // Get contexts from scope (includes OS info) - sentry_value_t os_context - = sentry_value_get_by_key(scope->contexts, "os"); - if (!sentry_value_is_null(os_context)) { - sentry_value_t event_contexts = sentry_value_new_object(); - sentry_value_set_by_key(event_contexts, "os", os_context); - sentry_value_incref(os_context); + sentry__scope_apply_to_event( + scope, options, event, SENTRY_SCOPE_BREADCRUMBS); + } #if defined(SENTRY_PLATFORM_WINDOWS) - // Add device context with arch for Windows native events - // This is required for Sentry's symbolicator to process PE modules - sentry_value_t device_context = sentry_value_new_object(); - sentry_value_set_by_key( - device_context, "type", sentry_value_new_string("device")); + // Sentry's symbolicator needs `contexts.device.arch` to process PE + // modules. If the host SDK already populated a device context with arch + // (Unity does), leave it alone; otherwise synthesize a minimal device + // context so native-only consumers still work. + sentry_value_t event_contexts = sentry_value_get_by_key(event, "contexts"); + if (sentry_value_is_null(event_contexts)) { + event_contexts = sentry_value_new_object(); + sentry_value_set_by_key(event, "contexts", event_contexts); + } + sentry_value_t device_context + = sentry_value_get_by_key(event_contexts, "device"); + if (sentry_value_is_null(device_context)) { + device_context = sentry_value_new_object(); + sentry_value_set_by_key( + device_context, "type", sentry_value_new_string("device")); + sentry_value_set_by_key(event_contexts, "device", device_context); + } + if (sentry_value_is_null( + sentry_value_get_by_key(device_context, "arch"))) { # if defined(_M_AMD64) - sentry_value_set_by_key( - device_context, "arch", sentry_value_new_string("x86_64")); + sentry_value_set_by_key( + device_context, "arch", sentry_value_new_string("x86_64")); # elif defined(_M_IX86) - sentry_value_set_by_key( - device_context, "arch", sentry_value_new_string("x86")); + sentry_value_set_by_key( + device_context, "arch", sentry_value_new_string("x86")); # elif defined(_M_ARM64) - sentry_value_set_by_key( - device_context, "arch", sentry_value_new_string("arm64")); + sentry_value_set_by_key( + device_context, "arch", sentry_value_new_string("arm64")); # endif - sentry_value_set_by_key(event_contexts, "device", device_context); -#endif - - sentry_value_set_by_key(event, "contexts", event_contexts); - } - - // Also copy other scope data (user, tags, extra, etc.) - sentry_value_t user = scope->user; - if (sentry_value_get_type(user) == SENTRY_VALUE_TYPE_OBJECT - && sentry_value_get_length(user) > 0) { - sentry_value_set_by_key(event, "user", user); - sentry_value_incref(user); - } - - sentry_value_t tags = scope->tags; - if (!sentry_value_is_null(tags)) { - sentry_value_set_by_key(event, "tags", tags); - sentry_value_incref(tags); - } - - sentry_value_t extra = scope->extra; - if (!sentry_value_is_null(extra)) { - sentry_value_set_by_key(event, "extra", extra); - sentry_value_incref(extra); - } } +#endif // Serialize to JSON (so it can be deserialized on next start) char *json_str = sentry_value_to_json(event);