From b4f3b7fa6ea9a6b58338f5f02441d80e83a243f7 Mon Sep 17 00:00:00 2001 From: Laurent Morichetti Date: Tue, 6 Jan 2026 21:16:12 -0800 Subject: [PATCH 1/3] Add print_memory Generalize the print_local_memory function to print memory from any given address space. This function will be used later to print the content of the kernarg segment (in global memory). Change-Id: I8de9a8d65fd503d68855f425e33f4ad4c5baca74 --- src/debug_agent.cpp | 102 ++++++++++++++++++++++++++++++-------------- 1 file changed, 70 insertions(+), 32 deletions(-) diff --git a/src/debug_agent.cpp b/src/debug_agent.cpp index b7f014a..2b678ec 100644 --- a/src/debug_agent.cpp +++ b/src/debug_agent.cpp @@ -387,49 +387,48 @@ print_registers (amd_dbgapi_wave_id_t wave_id) } void -print_local_memory (amd_dbgapi_wave_id_t wave_id) +print_memory (amd_dbgapi_process_id_t process_id, amd_dbgapi_wave_id_t wave_id, + amd_dbgapi_lane_id_t lane_id, + amd_dbgapi_address_space_id_t address_space_id, + amd_dbgapi_segment_address_t segment_address, + amd_dbgapi_size_t size, std::string header = {}) { - amd_dbgapi_process_id_t process_id; - DBGAPI_CHECK (amd_dbgapi_wave_get_info (wave_id, - AMD_DBGAPI_WAVE_INFO_PROCESS, - sizeof (process_id), &process_id)); - - amd_dbgapi_architecture_id_t architecture_id; - DBGAPI_CHECK ( - amd_dbgapi_wave_get_info (wave_id, AMD_DBGAPI_WAVE_INFO_ARCHITECTURE, - sizeof (architecture_id), &architecture_id)); + std::vector buffer (1024); - amd_dbgapi_address_space_id_t local_address_space_id; - DBGAPI_CHECK (amd_dbgapi_dwarf_address_space_to_address_space ( - architecture_id, 0x3 /* DW_ASPACE_AMDGPU_local */, - &local_address_space_id)); + if (!header.empty ()) + agent_out << std::endl << header; - std::vector buffer (1024); - amd_dbgapi_segment_address_t base_address{ 0 }; + if (auto pad = segment_address % 0x20; pad != 0) + { + agent_out << std::endl + << " 0x" << std::setfill ('0') << std::setw (4) + << (segment_address - pad) << ":"; + for (size_t i = 0; i < pad; i += 4) + agent_out << " "; + } while (true) { - size_t requested_size = buffer.size () * sizeof (buffer[0]); - size_t size = requested_size; - if (amd_dbgapi_read_memory (process_id, wave_id, 0, - local_address_space_id, base_address, &size, - buffer.data ()) + size_t requested_size + = std::min (buffer.size () * sizeof (buffer[0]), size); + size_t bytes_read = requested_size; + if (amd_dbgapi_read_memory (process_id, wave_id, lane_id, + address_space_id, segment_address, + &bytes_read, buffer.data ()) != AMD_DBGAPI_STATUS_SUCCESS) break; - agent_assert ((size % sizeof (buffer[0])) == 0); - buffer.resize (size / sizeof (buffer[0])); - - if (!base_address) - agent_out << std::endl << "Local memory content:"; + agent_assert ((bytes_read % sizeof (buffer[0])) == 0); + buffer.resize (bytes_read / sizeof (buffer[0])); - for (size_t i = 0, column = 0; i < buffer.size (); ++i) + for (size_t i = 0, column = (segment_address % 0x20) / 4; + i < buffer.size (); ++i) { if ((column++ % 8) == 0) { agent_out << std::endl << " 0x" << std::setfill ('0') << std::setw (4) - << (base_address + i * sizeof (buffer[0])) << ":"; + << (segment_address + i * sizeof (buffer[0])) << ":"; column = 1; } @@ -437,14 +436,53 @@ print_local_memory (amd_dbgapi_wave_id_t wave_id) << buffer[i]; } - base_address += size; + segment_address += bytes_read; + size -= bytes_read; - if (size != requested_size) + if (size == 0 || bytes_read != requested_size) break; } - if (base_address) - agent_out << std::endl; + agent_out << std::endl; +} + +void +print_local_memory (amd_dbgapi_wave_id_t wave_id) +{ + amd_dbgapi_dispatch_id_t dispatch_id; + if (auto status + = amd_dbgapi_wave_get_info (wave_id, AMD_DBGAPI_WAVE_INFO_DISPATCH, + sizeof (dispatch_id), &dispatch_id); + status != AMD_DBGAPI_STATUS_SUCCESS) + return; + + amd_dbgapi_size_t group_segment_size; + DBGAPI_CHECK (amd_dbgapi_dispatch_get_info ( + dispatch_id, AMD_DBGAPI_DISPATCH_INFO_GROUP_SEGMENT_SIZE, + sizeof (group_segment_size), &group_segment_size)); + + if (!group_segment_size) + return; + + amd_dbgapi_process_id_t process_id; + DBGAPI_CHECK (amd_dbgapi_wave_get_info (wave_id, + AMD_DBGAPI_WAVE_INFO_PROCESS, + sizeof (process_id), &process_id)); + + amd_dbgapi_architecture_id_t architecture_id; + DBGAPI_CHECK ( + amd_dbgapi_wave_get_info (wave_id, AMD_DBGAPI_WAVE_INFO_ARCHITECTURE, + sizeof (architecture_id), &architecture_id)); + + amd_dbgapi_address_space_id_t local_address_space_id; + DBGAPI_CHECK (amd_dbgapi_dwarf_address_space_to_address_space ( + architecture_id, 0x3 /* DW_ASPACE_AMDGPU_local */, + &local_address_space_id)); + + std::ostringstream oss; + oss << "Local memory content (" << group_segment_size << " bytes):"; + print_memory (process_id, wave_id, AMD_DBGAPI_LANE_NONE, + local_address_space_id, 0, -1, oss.str ().c_str ()); } void From 968251e26f0e51b7355858255805082d2f7c4cec Mon Sep 17 00:00:00 2001 From: Laurent Morichetti Date: Tue, 6 Jan 2026 21:15:46 -0800 Subject: [PATCH 2/3] Dump the content of the kernarg segment When available, dump the content of the kernarg segment so that kernel arguments can be checked for out of range values. Change-Id: I9d2239437a4bee5da8a1bf35aa4cbd3e8b8e1a99 --- src/debug_agent.cpp | 61 ++++++++++++++++++++++++++++++++++++++++++--- 1 file changed, 57 insertions(+), 4 deletions(-) diff --git a/src/debug_agent.cpp b/src/debug_agent.cpp index 2b678ec..95c1308 100644 --- a/src/debug_agent.cpp +++ b/src/debug_agent.cpp @@ -681,7 +681,26 @@ print_wavefronts (amd_dbgapi_process_id_t process_id, bool all_wavefronts, agent_out << "wave_" << std::dec << wave_id.handle << ": pc=0x" << std::hex << pc << " (kernel_code_entry="; - if (kernel_entry) + using kernel_descriptor_t = struct + { + uint32_t group_segment_fixed_size; + uint32_t private_segment_fixed_size; + uint32_t kernarg_size; + uint8_t reserved0[4]; + int64_t kernel_code_entry_byte_offset; + uint8_t reserved1[20]; + uint32_t compute_pgm_rsrc3; + uint32_t compute_pgm_rsrc1; + uint32_t compute_pgm_rsrc2; + uint16_t kernel_code_properties; + uint16_t kernarg_preload; + uint8_t reserved2[4]; + }; + + std::optional kernarg_ptr; + decltype (kernel_descriptor_t::kernarg_size) kernarg_size{}; + + if (kernel_entry.has_value ()) { agent_out << "0x" << std::hex << *kernel_entry; @@ -695,6 +714,30 @@ print_wavefronts (amd_dbgapi_process_id_t process_id, bool all_wavefronts, agent_out << " <" << symbol->m_name << ">"; break; } + + DBGAPI_CHECK (amd_dbgapi_dispatch_get_info ( + dispatch_id, + AMD_DBGAPI_DISPATCH_INFO_KERNEL_ARGUMENT_SEGMENT_ADDRESS, + sizeof (decltype (kernarg_ptr)::value_type), + &kernarg_ptr.emplace ())); + + amd_dbgapi_global_address_t kernel_descriptor_addr; + DBGAPI_CHECK (amd_dbgapi_dispatch_get_info ( + dispatch_id, AMD_DBGAPI_DISPATCH_INFO_KERNEL_DESCRIPTOR_ADDRESS, + sizeof (kernel_descriptor_addr), &kernel_descriptor_addr)); + + amd_dbgapi_size_t requested_size = sizeof (kernarg_size); + DBGAPI_CHECK (amd_dbgapi_read_memory ( + process_id, AMD_DBGAPI_WAVE_NONE, AMD_DBGAPI_LANE_NONE, + AMD_DBGAPI_ADDRESS_SPACE_GLOBAL, + kernel_descriptor_addr + + offsetof (kernel_descriptor_t, kernarg_size), + &requested_size, &kernarg_size)); + + if (requested_size != sizeof (kernarg_size)) + kernarg_size = {}; + + agent_out << ", kernargs=0x" << std::hex << *kernarg_ptr; } else agent_out << "not available"; @@ -773,6 +816,17 @@ print_wavefronts (amd_dbgapi_process_id_t process_id, bool all_wavefronts, print_registers (wave_id); print_local_memory (wave_id); + /* If available, print the content of the kernarg segment. */ + if (kernarg_ptr.has_value () && kernarg_size != 0) + { + std::ostringstream oss; + oss << "Global memory (kernarg segment, " << kernarg_size + << " bytes):"; + print_memory (process_id, AMD_DBGAPI_WAVE_NONE, AMD_DBGAPI_LANE_NONE, + AMD_DBGAPI_ADDRESS_SPACE_GLOBAL, *kernarg_ptr, + kernarg_size, oss.str ().c_str ()); + } + /* Find the code object that contains this pc. */ code_object_t *code_object_found{ nullptr }; for ([[maybe_unused]] auto &&[id, code_object] : code_object_map) @@ -1053,8 +1107,7 @@ process_dbgapi_events (amd_dbgapi_process_id_t process_id, bool all_wavefronts, break; case AMD_DBGAPI_WAVE_STOP_REASON_ADDRESS_ERROR: - resume_exceptions - |= AMD_DBGAPI_EXCEPTION_WAVE_ADDRESS_ERROR; + resume_exceptions |= AMD_DBGAPI_EXCEPTION_WAVE_ADDRESS_ERROR; break; case AMD_DBGAPI_WAVE_STOP_REASON_ILLEGAL_INSTRUCTION: @@ -1192,7 +1245,7 @@ dbgapi_worker (int listen_fd, bool all_wavefronts, bool precise_memory, /* The initial setup is finished, notify the main thread it can go on. */ [[maybe_unused]] bool promise_available - = g_rbrk_sync.guard.load (std::memory_order::memory_order_acquire); + = g_rbrk_sync.guard.load (std::memory_order::memory_order_acquire); agent_assert (promise_available); g_rbrk_sync.promise->set_value (); From 72554bb2cf61330b1b4aa0337841631153349fd8 Mon Sep 17 00:00:00 2001 From: Laurent Morichetti Date: Wed, 11 Feb 2026 14:56:27 -0800 Subject: [PATCH 3/3] print_memory: align segment address and size print_memory prints memory in word-sized units. Align the segment address and requested size to the word size so that the rest of the function can rely on alignment. Turn print_memory into a template and use word_size/num_columns instead of hardcoded values. Change-Id: I8a2d0bb9017f32f90cd182dae23e55e976bb8299 --- src/debug_agent.cpp | 37 ++++++++++++++++++++++++------------- 1 file changed, 24 insertions(+), 13 deletions(-) diff --git a/src/debug_agent.cpp b/src/debug_agent.cpp index 95c1308..3e677ee 100644 --- a/src/debug_agent.cpp +++ b/src/debug_agent.cpp @@ -386,6 +386,7 @@ print_registers (amd_dbgapi_wave_id_t wave_id) free (register_class_ids); } +template void print_memory (amd_dbgapi_process_id_t process_id, amd_dbgapi_wave_id_t wave_id, amd_dbgapi_lane_id_t lane_id, @@ -393,24 +394,34 @@ print_memory (amd_dbgapi_process_id_t process_id, amd_dbgapi_wave_id_t wave_id, amd_dbgapi_segment_address_t segment_address, amd_dbgapi_size_t size, std::string header = {}) { - std::vector buffer (1024); + std::vector buffer (1024); + static constexpr amd_dbgapi_size_t word_size = sizeof (buffer[0]); + + /* Make sure the segment address and the size are aligned, the rest of + this function relies on it. */ + auto end_address = (segment_address + size + word_size - 1) & -word_size; + segment_address &= -word_size; + + /* size == -1 means we are reading as much as we can, so do not align. */ + if (size != ~amd_dbgapi_size_t{ 0 }) + size = end_address - segment_address; if (!header.empty ()) agent_out << std::endl << header; - if (auto pad = segment_address % 0x20; pad != 0) + if (auto pad = segment_address % (num_columns * word_size); pad != 0) { agent_out << std::endl << " 0x" << std::setfill ('0') << std::setw (4) << (segment_address - pad) << ":"; - for (size_t i = 0; i < pad; i += 4) - agent_out << " "; + for (size_t i = 0; i < pad; i += word_size) + agent_out << std::setfill (' ') << std::setw (2 * word_size + 1) + << ' '; } while (true) { - size_t requested_size - = std::min (buffer.size () * sizeof (buffer[0]), size); + size_t requested_size = std::min (buffer.size () * word_size, size); size_t bytes_read = requested_size; if (amd_dbgapi_read_memory (process_id, wave_id, lane_id, address_space_id, segment_address, @@ -418,22 +429,22 @@ print_memory (amd_dbgapi_process_id_t process_id, amd_dbgapi_wave_id_t wave_id, != AMD_DBGAPI_STATUS_SUCCESS) break; - agent_assert ((bytes_read % sizeof (buffer[0])) == 0); - buffer.resize (bytes_read / sizeof (buffer[0])); + agent_assert ((bytes_read % word_size) == 0); + buffer.resize (bytes_read / word_size); - for (size_t i = 0, column = (segment_address % 0x20) / 4; + for (size_t i = 0, column = (segment_address / word_size) % num_columns; i < buffer.size (); ++i) { - if ((column++ % 8) == 0) + if ((column++ % num_columns) == 0) { agent_out << std::endl << " 0x" << std::setfill ('0') << std::setw (4) - << (segment_address + i * sizeof (buffer[0])) << ":"; + << (segment_address + i * word_size) << ":"; column = 1; } - agent_out << " " << std::hex << std::setfill ('0') << std::setw (8) - << buffer[i]; + agent_out << " " << std::hex << std::setfill ('0') + << std::setw (2 * word_size) << +buffer[i]; } segment_address += bytes_read;