From b682b007582491c8e44702ad131834eea118568c Mon Sep 17 00:00:00 2001 From: Machine Learning Administrator Date: Wed, 29 Apr 2026 16:43:03 -0400 Subject: [PATCH 1/3] Extend problem cache with hardware provenance metadata MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Two changes to problem_cache.cpp: 1. load(): Project deserialized keys to only {name, problem} so that extra metadata fields in the JSON don't break cache key matching. Previously, the full JSON object (all fields) was used as the map key, causing 100% cache misses when metadata was present. 2. save(): Enrich each key with hardware provenance before writing: gpu_arch, cu_count, graphics_clock_mhz, memory_clock_mhz, memory_bus_bits, vram_bytes, wavefront_size, regs_per_block, max_threads_per_cu. Queried once via hipGetDeviceProperties at session end — negligible performance cost. The in-memory map always uses {name, problem} keys for O(1) lookups. The on-disk JSON carries additional hardware context for traceability. On load, the extra fields are projected away, preserving fast matching. --- src/targets/gpu/problem_cache.cpp | 46 +++++++++++++++++++++++++++---- 1 file changed, 40 insertions(+), 6 deletions(-) diff --git a/src/targets/gpu/problem_cache.cpp b/src/targets/gpu/problem_cache.cpp index 026c2445aca..3f387de5fdd 100644 --- a/src/targets/gpu/problem_cache.cpp +++ b/src/targets/gpu/problem_cache.cpp @@ -23,12 +23,15 @@ * */ #include +#include #include #include #include #include #include #include +#include +#include #include namespace migraphx { @@ -37,6 +40,11 @@ namespace gpu { MIGRAPHX_DECLARE_ENV_VAR(MIGRAPHX_PROBLEM_CACHE) +static value create_key(const std::string& name, const value& problem) +{ + return {{"name", name}, {"problem", problem}}; +} + void problem_cache::load() { auto pc_path = string_value_of(MIGRAPHX_PROBLEM_CACHE{}); @@ -48,19 +56,45 @@ void problem_cache::load() save(); return; } - from_value(from_json_string(read_string(pc_path)), cache); + // Deserialize into a temporary map, then project keys to {name, problem} + // so that extra metadata fields in the JSON don't break key matching. + std::unordered_map raw; + from_value(from_json_string(read_string(pc_path)), raw); + for(auto& [k, v] : raw) + { + auto projected = create_key(k.at("name").to(), k.at("problem")); + cache[projected] = v; + } } void problem_cache::save() const { auto pc_path = string_value_of(MIGRAPHX_PROBLEM_CACHE{}); if(pc_path.empty()) return; - write_string(pc_path, to_pretty_json_string(to_value(cache))); -} + // Enrich keys with hardware provenance metadata on write. + // This runs once at session end — negligible cost. + hipDeviceProp_t props{}; + auto status = hipGetDeviceProperties(&props, get_device_id()); -static value create_key(const std::string& name, const value& problem) -{ - return {{"name", name}, {"problem", problem}}; + std::unordered_map enriched; + for(auto& [k, v] : cache) + { + value rich_key = k; + if(status == hipSuccess) + { + rich_key["gpu_arch"] = trim(split_string(std::string(props.gcnArchName), ':').front()); + rich_key["cu_count"] = static_cast(props.multiProcessorCount); + rich_key["graphics_clock_mhz"] = static_cast(props.clockRate / 1000); + rich_key["memory_clock_mhz"] = static_cast(props.memoryClockRate / 1000); + rich_key["memory_bus_bits"] = static_cast(props.memoryBusWidth); + rich_key["vram_bytes"] = static_cast(props.totalGlobalMem); + rich_key["wavefront_size"] = static_cast(props.warpSize); + rich_key["regs_per_block"] = static_cast(props.regsPerBlock); + rich_key["max_threads_per_cu"] = static_cast(props.maxThreadsPerMultiProcessor); + } + enriched[rich_key] = v; + } + write_string(pc_path, to_pretty_json_string(to_value(enriched))); } bool problem_cache::has(const std::string& name, const value& problem) const From 728917cb1420f4a42137a06b152d29de3343f004 Mon Sep 17 00:00:00 2001 From: danieyan-amd Date: Wed, 6 May 2026 13:38:58 -0400 Subject: [PATCH 2/3] Potential fix for pull request finding Co-authored-by: Copilot Autofix powered by AI <175728472+Copilot@users.noreply.github.com> --- src/targets/gpu/problem_cache.cpp | 1 - 1 file changed, 1 deletion(-) diff --git a/src/targets/gpu/problem_cache.cpp b/src/targets/gpu/problem_cache.cpp index 3f387de5fdd..ba6c5112a3b 100644 --- a/src/targets/gpu/problem_cache.cpp +++ b/src/targets/gpu/problem_cache.cpp @@ -32,7 +32,6 @@ #include #include #include -#include namespace migraphx { inline namespace MIGRAPHX_INLINE_NS { From 3a47ddc974bc343d7aa6f84d0521c970e888f916 Mon Sep 17 00:00:00 2001 From: Machine Learning Administrator Date: Wed, 20 May 2026 10:50:52 -0400 Subject: [PATCH 3/3] Add type-erased cache backend abstraction with device as key Addresses PR review feedback: - Device (gpu_arch|cu_count|wavefront_size) used as composite cache key - Type-erased problem_cache_backend wrapper (no virtual inheritance) - JSON backend as default implementation - load()/save() in problem_cache rewritten to use backend abstraction --- .../migraphx/gpu/json_cache_backend.hpp | 104 ++++++ .../include/migraphx/gpu/problem_cache.hpp | 1 + .../migraphx/gpu/problem_cache_backend.hpp | 295 ++++++++++++++++++ src/targets/gpu/json_cache_backend.cpp | 225 +++++++++++++ src/targets/gpu/problem_cache.cpp | 202 +++++++++--- src/targets/gpu/problem_cache_backend.cpp | 90 ++++++ 6 files changed, 878 insertions(+), 39 deletions(-) create mode 100644 src/targets/gpu/include/migraphx/gpu/json_cache_backend.hpp create mode 100644 src/targets/gpu/include/migraphx/gpu/problem_cache_backend.hpp create mode 100644 src/targets/gpu/json_cache_backend.cpp create mode 100644 src/targets/gpu/problem_cache_backend.cpp diff --git a/src/targets/gpu/include/migraphx/gpu/json_cache_backend.hpp b/src/targets/gpu/include/migraphx/gpu/json_cache_backend.hpp new file mode 100644 index 00000000000..9272186a6a9 --- /dev/null +++ b/src/targets/gpu/include/migraphx/gpu/json_cache_backend.hpp @@ -0,0 +1,104 @@ +/* + * The MIT License (MIT) + * + * Copyright (c) 2015-2026 Advanced Micro Devices, Inc. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + * + */ +#ifndef MIGRAPHX_GUARD_GPU_JSON_CACHE_BACKEND_HPP +#define MIGRAPHX_GUARD_GPU_JSON_CACHE_BACKEND_HPP + +#include +#include +#include +#include +#include + +namespace migraphx { +inline namespace MIGRAPHX_INLINE_NS { +namespace gpu { + +/// JSON file-based cache backend (satisfies problem_cache_backend concept). +/// +/// This is the default backend that preserves the original problem_cache behavior: +/// - open(): reads a JSON file into an in-memory map, partitioned by device_key +/// - save(): writes the in-memory map back to the JSON file with hw metadata +/// - has/get/insert/mark: operate on the in-memory map using (device_key, name, problem) +/// +/// Legacy JSON files (no device_key) are loaded under the current device key. +class json_cache_backend +{ +public: + void open(const std::string& path, const cache_device_key& current_device); + void close(); + + bool has(const std::string& device_key, + const std::string& name, + const std::string& problem) const; + std::optional get(const std::string& device_key, + const std::string& name, + const std::string& problem) const; + + void insert(const std::string& device_key, + const std::string& name, + const std::string& problem, + const std::string& solution); + void mark(const std::string& device_key, + const std::string& name, + const std::string& problem); + + void save(); + + std::vector all_entries() const; + void load_entries(const std::vector& entries); + + std::size_t size() const; + std::string backend_name() const; + backend_stats stats() const; + + void set_hw_metadata(const cache_hw_metadata& meta); + const cache_hw_metadata& get_hw_metadata() const; + +private: + // Key: (device_key_string, name, problem) + using key_type = std::tuple; + + struct key_hash + { + std::size_t operator()(const key_type& k) const + { + auto h1 = std::hash{}(std::get<0>(k)); + auto h2 = std::hash{}(std::get<1>(k)); + auto h3 = std::hash{}(std::get<2>(k)); + return h1 ^ (h2 << 1) ^ (h3 << 2); + } + }; + + std::string filepath_; + cache_device_key current_device_; + cache_hw_metadata hw_meta_; + std::unordered_map data_; +}; + +} // namespace gpu +} // namespace MIGRAPHX_INLINE_NS +} // namespace migraphx + +#endif // MIGRAPHX_GUARD_GPU_JSON_CACHE_BACKEND_HPP diff --git a/src/targets/gpu/include/migraphx/gpu/problem_cache.hpp b/src/targets/gpu/include/migraphx/gpu/problem_cache.hpp index d70e0687bd5..d1990d7f6bc 100644 --- a/src/targets/gpu/include/migraphx/gpu/problem_cache.hpp +++ b/src/targets/gpu/include/migraphx/gpu/problem_cache.hpp @@ -40,6 +40,7 @@ struct MIGRAPHX_GPU_EXPORT problem_cache void mark(const std::string& name, const value& problem); optional get(const std::string& name, const value& problem) const; void load(); + void load(const std::string& explicit_path, const std::string& explicit_backend); void save() const; std::unordered_map cache; }; diff --git a/src/targets/gpu/include/migraphx/gpu/problem_cache_backend.hpp b/src/targets/gpu/include/migraphx/gpu/problem_cache_backend.hpp new file mode 100644 index 00000000000..4fe611c67a0 --- /dev/null +++ b/src/targets/gpu/include/migraphx/gpu/problem_cache_backend.hpp @@ -0,0 +1,295 @@ +/* + * The MIT License (MIT) + * + * Copyright (c) 2015-2026 Advanced Micro Devices, Inc. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + * + */ +#ifndef MIGRAPHX_GUARD_GPU_PROBLEM_CACHE_BACKEND_HPP +#define MIGRAPHX_GUARD_GPU_PROBLEM_CACHE_BACKEND_HPP + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +namespace migraphx { +inline namespace MIGRAPHX_INLINE_NS { +namespace gpu { + +// ============================================================================ +// Data types +// ============================================================================ + +/// A single cache entry for bulk operations (import/export/migration). +struct cache_entry +{ + std::string device_key; // stable device identifier (may be empty for legacy) + std::string name; + std::string problem; + std::string solution; // empty string = marked/WIP +}; + +/// Stable device key for cache namespace separation. +/// Only includes hardware properties that do not change with power state. +/// Clock frequencies, VRAM size, etc. are metadata-only (stored but not keyed on). +struct cache_device_key +{ + std::string gpu_arch; // e.g. "gfx1100" + int cu_count = 0; // compute units + int wavefront_size = 0; // warp/wavefront width + + bool empty() const { return gpu_arch.empty(); } + bool operator==(const cache_device_key& other) const + { + return gpu_arch == other.gpu_arch && cu_count == other.cu_count && + wavefront_size == other.wavefront_size; + } + bool operator!=(const cache_device_key& other) const { return !(*this == other); } +}; + +/// Convert device key to a stable string representation for storage. +/// Format: "gpu_arch|cu_count|wavefront_size" (e.g. "gfx1100|48|32") +inline std::string to_string(const cache_device_key& dk) +{ + if(dk.empty()) + return {}; + return dk.gpu_arch + "|" + std::to_string(dk.cu_count) + "|" + + std::to_string(dk.wavefront_size); +} + +/// Parse a device key string back into a struct. +/// Returns empty key on malformed input. +MIGRAPHX_GPU_EXPORT cache_device_key parse_device_key(const std::string& s); + +/// Hash for cache_device_key (for use in unordered containers). +struct cache_device_key_hash +{ + std::size_t operator()(const cache_device_key& dk) const + { + auto h1 = std::hash{}(dk.gpu_arch); + auto h2 = std::hash{}(dk.cu_count); + auto h3 = std::hash{}(dk.wavefront_size); + return h1 ^ (h2 << 1) ^ (h3 << 2); + } +}; + +/// Hardware metadata for the GPU that produced cache entries. +/// Populated once per session by querying HIP device properties. +/// Backends store this alongside entries for analytics and provenance. +/// Only gpu_arch, cu_count, wavefront_size are part of the device key; +/// the rest are metadata stored for diagnostics but NOT used in lookup. +struct cache_hw_metadata +{ + std::string gpu_arch; + int cu_count = 0; + int graphics_clock_mhz = 0; + int memory_clock_mhz = 0; + int memory_bus_bits = 0; + std::int64_t vram_bytes = 0; + int wavefront_size = 0; + int regs_per_block = 0; + int max_threads_per_cu = 0; + + bool empty() const { return gpu_arch.empty(); } + + /// Extract the stable device key from full metadata. + cache_device_key device_key() const + { + return {gpu_arch, cu_count, wavefront_size}; + } +}; + +/// Backend statistics for debugging/monitoring. +struct backend_stats +{ + std::size_t entry_count = 0; + std::size_t file_size_bytes = 0; + std::string storage_path; + std::string backend_type; +}; + +// ============================================================================ +// Type-erased backend wrapper +// ============================================================================ + +/// Type-erased problem cache backend. +/// +/// Concrete backends (json_cache_backend, sqlite_cache_backend, etc.) do NOT +/// inherit from a common base class. Instead they satisfy a concept by providing +/// the required methods, and this wrapper type-erases them. +/// +/// Required methods on a concrete backend T: +/// void open(const std::string& path, const cache_device_key& current_device) +/// void close() +/// bool has(const std::string& device_key, const std::string& name, const std::string& problem) const +/// std::optional get(const std::string& device_key, const std::string& name, const std::string& problem) const +/// void insert(const std::string& device_key, const std::string& name, const std::string& problem, const std::string& solution) +/// void mark(const std::string& device_key, const std::string& name, const std::string& problem) +/// void save() +/// std::vector all_entries() const +/// void load_entries(const std::vector& entries) +/// std::size_t size() const +/// std::string backend_name() const +/// backend_stats stats() const +/// void set_hw_metadata(const cache_hw_metadata& meta) +/// const cache_hw_metadata& get_hw_metadata() const +class MIGRAPHX_GPU_EXPORT problem_cache_backend +{ +public: + problem_cache_backend() = default; + + template + explicit problem_cache_backend(Backend backend) + : self_(std::make_unique>(std::move(backend))) + { + } + + problem_cache_backend(problem_cache_backend&&) noexcept = default; + problem_cache_backend& operator=(problem_cache_backend&&) noexcept = default; + + explicit operator bool() const { return self_ != nullptr; } + + // -- Lifecycle -- + void open(const std::string& path, const cache_device_key& current_device) + { + self_->open_(path, current_device); + } + void close() { self_->close_(); } + + // -- Read operations (device_key is the string form) -- + bool has(const std::string& device_key, + const std::string& name, + const std::string& problem) const + { + return self_->has_(device_key, name, problem); + } + + std::optional get(const std::string& device_key, + const std::string& name, + const std::string& problem) const + { + return self_->get_(device_key, name, problem); + } + + // -- Write operations -- + void insert(const std::string& device_key, + const std::string& name, + const std::string& problem, + const std::string& solution) + { + self_->insert_(device_key, name, problem, solution); + } + + void mark(const std::string& device_key, + const std::string& name, + const std::string& problem) + { + self_->mark_(device_key, name, problem); + } + + // -- Persistence -- + void save() { self_->save_(); } + + // -- Bulk operations -- + std::vector all_entries() const { return self_->all_entries_(); } + void load_entries(const std::vector& entries) { self_->load_entries_(entries); } + + // -- Metadata -- + std::size_t size() const { return self_->size_(); } + std::string backend_name() const { return self_->backend_name_(); } + backend_stats stats() const { return self_->stats_(); } + + void set_hw_metadata(const cache_hw_metadata& meta) { self_->set_hw_metadata_(meta); } + const cache_hw_metadata& get_hw_metadata() const { return self_->get_hw_metadata_(); } + +private: + struct concept_t + { + virtual ~concept_t() = default; + virtual void open_(const std::string& path, const cache_device_key& dk) = 0; + virtual void close_() = 0; + virtual bool has_(const std::string& dk, const std::string& n, const std::string& p) const = 0; + virtual std::optional get_(const std::string& dk, const std::string& n, const std::string& p) const = 0; + virtual void insert_(const std::string& dk, const std::string& n, const std::string& p, const std::string& s) = 0; + virtual void mark_(const std::string& dk, const std::string& n, const std::string& p) = 0; + virtual void save_() = 0; + virtual std::vector all_entries_() const = 0; + virtual void load_entries_(const std::vector& entries) = 0; + virtual std::size_t size_() const = 0; + virtual std::string backend_name_() const = 0; + virtual backend_stats stats_() const = 0; + virtual void set_hw_metadata_(const cache_hw_metadata& meta) = 0; + virtual const cache_hw_metadata& get_hw_metadata_() const = 0; + }; + + template + struct model final : concept_t + { + Backend backend_; + explicit model(Backend b) : backend_(std::move(b)) {} + + void open_(const std::string& path, const cache_device_key& dk) override { backend_.open(path, dk); } + void close_() override { backend_.close(); } + bool has_(const std::string& dk, const std::string& n, const std::string& p) const override { return backend_.has(dk, n, p); } + std::optional get_(const std::string& dk, const std::string& n, const std::string& p) const override { return backend_.get(dk, n, p); } + void insert_(const std::string& dk, const std::string& n, const std::string& p, const std::string& s) override { backend_.insert(dk, n, p, s); } + void mark_(const std::string& dk, const std::string& n, const std::string& p) override { backend_.mark(dk, n, p); } + void save_() override { backend_.save(); } + std::vector all_entries_() const override { return backend_.all_entries(); } + void load_entries_(const std::vector& entries) override { backend_.load_entries(entries); } + std::size_t size_() const override { return backend_.size(); } + std::string backend_name_() const override { return backend_.backend_name(); } + backend_stats stats_() const override { return backend_.stats(); } + void set_hw_metadata_(const cache_hw_metadata& meta) override { backend_.set_hw_metadata(meta); } + const cache_hw_metadata& get_hw_metadata_() const override { return backend_.get_hw_metadata(); } + }; + + std::unique_ptr self_; +}; + +// ============================================================================ +// Factory functions +// ============================================================================ + +/// Factory: create a backend by type name ("json", "sqlite", "lmdb", "memory"). +/// Throws std::runtime_error if the requested backend is not available. +MIGRAPHX_GPU_EXPORT problem_cache_backend make_cache_backend(const std::string& type); + +/// Factory: create the default backend based on environment variables. +/// Uses MIGRAPHX_CACHE_BACKEND (json|sqlite|lmdb|memory) with "json" as default. +MIGRAPHX_GPU_EXPORT problem_cache_backend make_default_cache_backend(); + +/// Factory: create a backend using an explicit type string with env-var fallback. +/// If explicit_backend is non-empty, uses it (falls back to JSON for unknown types). +/// If explicit_backend is empty, delegates to make_default_cache_backend() (env var path). +MIGRAPHX_GPU_EXPORT problem_cache_backend +make_cache_backend_with_fallback(const std::string& explicit_backend); + +} // namespace gpu +} // namespace MIGRAPHX_INLINE_NS +} // namespace migraphx + +#endif // MIGRAPHX_GUARD_GPU_PROBLEM_CACHE_BACKEND_HPP diff --git a/src/targets/gpu/json_cache_backend.cpp b/src/targets/gpu/json_cache_backend.cpp new file mode 100644 index 00000000000..eb2e2c43b9f --- /dev/null +++ b/src/targets/gpu/json_cache_backend.cpp @@ -0,0 +1,225 @@ +/* + * The MIT License (MIT) + * + * Copyright (c) 2015-2026 Advanced Micro Devices, Inc. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + * + */ +#include +#include +#include +#include +#include +#include +#include + +namespace migraphx { +inline namespace MIGRAPHX_INLINE_NS { +namespace gpu { + +void json_cache_backend::open(const std::string& path, const cache_device_key& current_device) +{ + filepath_ = path; + current_device_ = current_device; + data_.clear(); + + if(filepath_.empty()) + return; + + if(not fs::exists(filepath_)) + return; + + auto current_dk_str = to_string(current_device_); + + try + { + auto content = read_string(filepath_); + if(content.empty()) + return; + + // Deserialize JSON file into value map, then project keys to (device_key, name, problem) + std::unordered_map raw; + from_value(from_json_string(content), raw); + for(auto& [k, v] : raw) + { + auto name = k.at("name").to(); + auto problem = k.at("problem").to(); + auto solution = v.is_null() ? std::string{} : v.to(); + + // Try to reconstruct device_key from stored metadata fields. + // If the entry has gpu_arch/cu_count/wavefront_size, build a device key. + // Otherwise (legacy entries), assign to current device. + std::string dk_str; + if(k.contains("gpu_arch") && k.contains("cu_count") && k.contains("wavefront_size")) + { + cache_device_key stored_dk; + stored_dk.gpu_arch = k.at("gpu_arch").to(); + stored_dk.cu_count = k.at("cu_count").to(); + stored_dk.wavefront_size = k.at("wavefront_size").to(); + dk_str = to_string(stored_dk); + } + else + { + // Legacy entry without device info → assign to current device + dk_str = current_dk_str; + } + data_[{dk_str, name, problem}] = solution; + } + } + catch(const std::exception& e) + { + std::cerr << "[migraphx] WARNING: Failed to parse cache file '" + << filepath_ << "': " << e.what() + << ". Starting with empty cache." << std::endl; + data_.clear(); + } +} + +void json_cache_backend::close() +{ + // No resources to release for JSON backend +} + +bool json_cache_backend::has(const std::string& device_key, + const std::string& name, + const std::string& problem) const +{ + return data_.count({device_key, name, problem}) > 0; +} + +std::optional json_cache_backend::get(const std::string& device_key, + const std::string& name, + const std::string& problem) const +{ + auto it = data_.find({device_key, name, problem}); + if(it == data_.end()) + return std::nullopt; + return it->second; +} + +void json_cache_backend::insert(const std::string& device_key, + const std::string& name, + const std::string& problem, + const std::string& solution) +{ + data_[{device_key, name, problem}] = solution; +} + +void json_cache_backend::mark(const std::string& device_key, + const std::string& name, + const std::string& problem) +{ + // Only insert if key doesn't already exist (WIP marker) + data_.emplace(key_type{device_key, name, problem}, std::string{}); +} + +void json_cache_backend::save() +{ + if(filepath_.empty()) + return; + + // Reconstruct value map with enriched key objects for JSON serialization. + // Include hardware metadata fields when available — these are stored per-entry + // for provenance (matches the enriched cache format). Extra fields in the key + // object do not affect matching: open() extracts only "name", "problem", and device fields. + std::unordered_map output; + auto current_dk_str = to_string(current_device_); + for(auto& [k, v] : data_) + { + auto& dk_str = std::get<0>(k); + auto& name = std::get<1>(k); + auto& problem = std::get<2>(k); + + value key; + // Parse the entry's own device_key to get its device fields + auto entry_dk = parse_device_key(dk_str); + + if(!hw_meta_.empty() && dk_str == current_dk_str) + { + // Entry belongs to current device — write full hw provenance metadata + key = {{"name", name}, + {"problem", problem}, + {"gpu_arch", hw_meta_.gpu_arch}, + {"cu_count", hw_meta_.cu_count}, + {"graphics_clock_mhz", hw_meta_.graphics_clock_mhz}, + {"memory_clock_mhz", hw_meta_.memory_clock_mhz}, + {"memory_bus_bits", hw_meta_.memory_bus_bits}, + {"vram_bytes", hw_meta_.vram_bytes}, + {"wavefront_size", hw_meta_.wavefront_size}, + {"regs_per_block", hw_meta_.regs_per_block}, + {"max_threads_per_cu", hw_meta_.max_threads_per_cu}}; + } + else if(!entry_dk.empty()) + { + // Entry belongs to a different device — write its device key fields only + key = {{"name", name}, + {"problem", problem}, + {"gpu_arch", entry_dk.gpu_arch}, + {"cu_count", entry_dk.cu_count}, + {"wavefront_size", entry_dk.wavefront_size}}; + } + else + { + // No device info available (legacy entry) + key = {{"name", name}, {"problem", problem}}; + } + value sol = v.empty() ? value{} : value(v); + output[key] = sol; + } + write_string(filepath_, to_pretty_json_string(to_value(output))); +} + +std::vector json_cache_backend::all_entries() const +{ + std::vector entries; + entries.reserve(data_.size()); + for(auto& [k, v] : data_) + { + entries.push_back({std::get<0>(k), std::get<1>(k), std::get<2>(k), v}); + } + return entries; +} + +void json_cache_backend::load_entries(const std::vector& entries) +{ + auto current_dk_str = to_string(current_device_); + for(auto& e : entries) + { + auto dk = e.device_key.empty() ? current_dk_str : e.device_key; + data_[{dk, e.name, e.problem}] = e.solution; + } +} + +std::size_t json_cache_backend::size() const { return data_.size(); } + +std::string json_cache_backend::backend_name() const { return "json"; } + +backend_stats json_cache_backend::stats() const +{ + return {data_.size(), 0, filepath_, "json"}; +} + +void json_cache_backend::set_hw_metadata(const cache_hw_metadata& meta) { hw_meta_ = meta; } + +const cache_hw_metadata& json_cache_backend::get_hw_metadata() const { return hw_meta_; } + +} // namespace gpu +} // namespace MIGRAPHX_INLINE_NS +} // namespace migraphx diff --git a/src/targets/gpu/problem_cache.cpp b/src/targets/gpu/problem_cache.cpp index ba6c5112a3b..13cbe4a3432 100644 --- a/src/targets/gpu/problem_cache.cpp +++ b/src/targets/gpu/problem_cache.cpp @@ -23,7 +23,7 @@ * */ #include -#include +#include #include #include #include @@ -32,6 +32,8 @@ #include #include #include +#include +#include namespace migraphx { inline namespace MIGRAPHX_INLINE_NS { @@ -39,82 +41,204 @@ namespace gpu { MIGRAPHX_DECLARE_ENV_VAR(MIGRAPHX_PROBLEM_CACHE) -static value create_key(const std::string& name, const value& problem) +// Module-scoped backend storage. Avoids adding any members to the +// problem_cache struct (which would change its ABI / layout and break +// compatibility with the stock migraphx_gpu.dll object files). +// There is at most one active problem_cache per process (owned by +// the gpu::context), so a simple static works. +static problem_cache_backend& active_backend() { - return {{"name", name}, {"problem", problem}}; + static problem_cache_backend backend; + return backend; +} + +// The current device key string (derived from hw metadata on load). +static std::string& active_device_key() +{ + static std::string dk; + return dk; +} + +// Query current GPU hardware properties via HIP. +// Returns empty metadata on failure (no device, HIP not initialized, etc.). +static cache_hw_metadata query_current_gpu_metadata() +{ + cache_hw_metadata meta; + int device_id = 0; + if(hipGetDevice(&device_id) != hipSuccess) + return meta; + + hipDeviceProp_t props{}; + if(hipGetDeviceProperties(&props, device_id) != hipSuccess) + return meta; + + meta.gpu_arch = props.gcnArchName; + // Trim target feature flags (e.g. "gfx1100:sramecc+:xnack-" → "gfx1100") + auto colon = meta.gpu_arch.find(':'); + if(colon != std::string::npos) + meta.gpu_arch = meta.gpu_arch.substr(0, colon); + + meta.cu_count = props.multiProcessorCount; + meta.graphics_clock_mhz = props.clockRate / 1000; // kHz → MHz + meta.memory_clock_mhz = props.memoryClockRate / 1000; // kHz → MHz + meta.memory_bus_bits = props.memoryBusWidth; + meta.vram_bytes = static_cast(props.totalGlobalMem); + meta.wavefront_size = props.warpSize; + meta.regs_per_block = props.regsPerBlock; + meta.max_threads_per_cu = props.maxThreadsPerMultiProcessor; + + return meta; } void problem_cache::load() { + auto& backend = active_backend(); + backend = make_default_cache_backend(); + auto pc_path = string_value_of(MIGRAPHX_PROBLEM_CACHE{}); if(pc_path.empty()) return; - if(not fs::exists(pc_path)) + + // Query live GPU hardware metadata to derive device key. + auto hw_meta = query_current_gpu_metadata(); + auto dk = hw_meta.device_key(); + active_device_key() = to_string(dk); + + backend.open(pc_path, dk); + + if(!hw_meta.empty()) + backend.set_hw_metadata(hw_meta); + + // For the JSON backend, populate the legacy in-memory cache for backward + // compatibility with any code that accesses problem_cache::cache directly. + if(backend.backend_name() == "json") { - log::info() << "Problem cache not found. Creating new file."; - save(); - return; + auto entries = backend.all_entries(); + for(auto& e : entries) + { + value key = {{"name", e.name}, {"problem", e.problem}}; + value sol = e.solution.empty() ? value{} : value(e.solution); + cache[key] = sol; + } } - // Deserialize into a temporary map, then project keys to {name, problem} - // so that extra metadata fields in the JSON don't break key matching. - std::unordered_map raw; - from_value(from_json_string(read_string(pc_path)), raw); - for(auto& [k, v] : raw) +} + +void problem_cache::load(const std::string& explicit_path, const std::string& explicit_backend) +{ + auto& backend = active_backend(); + backend = make_cache_backend_with_fallback(explicit_backend); + + // Precedence: explicit path > env var > no cache + std::string pc_path = explicit_path; + if(pc_path.empty()) + pc_path = string_value_of(MIGRAPHX_PROBLEM_CACHE{}); + if(pc_path.empty()) + return; + + // Query live GPU hardware metadata to derive device key. + auto hw_meta = query_current_gpu_metadata(); + auto dk = hw_meta.device_key(); + active_device_key() = to_string(dk); + + backend.open(pc_path, dk); + + if(!hw_meta.empty()) + backend.set_hw_metadata(hw_meta); + + // For the JSON backend, populate the legacy in-memory cache for backward + // compatibility with any code that accesses problem_cache::cache directly. + if(backend.backend_name() == "json") { - auto projected = create_key(k.at("name").to(), k.at("problem")); - cache[projected] = v; + auto entries = backend.all_entries(); + for(auto& e : entries) + { + value key = {{"name", e.name}, {"problem", e.problem}}; + value sol = e.solution.empty() ? value{} : value(e.solution); + cache[key] = sol; + } } } + void problem_cache::save() const { - auto pc_path = string_value_of(MIGRAPHX_PROBLEM_CACHE{}); - if(pc_path.empty()) + auto& backend = active_backend(); + if(!backend) return; - // Enrich keys with hardware provenance metadata on write. - // This runs once at session end — negligible cost. - hipDeviceProp_t props{}; - auto status = hipGetDeviceProperties(&props, get_device_id()); - std::unordered_map enriched; - for(auto& [k, v] : cache) + // For the JSON backend, sync the legacy in-memory cache → backend before + // persisting, since some code may write to the cache map directly. + if(backend.backend_name() == "json") { - value rich_key = k; - if(status == hipSuccess) + auto& dk = active_device_key(); + std::vector entries; + entries.reserve(cache.size()); + for(auto& [k, v] : cache) { - rich_key["gpu_arch"] = trim(split_string(std::string(props.gcnArchName), ':').front()); - rich_key["cu_count"] = static_cast(props.multiProcessorCount); - rich_key["graphics_clock_mhz"] = static_cast(props.clockRate / 1000); - rich_key["memory_clock_mhz"] = static_cast(props.memoryClockRate / 1000); - rich_key["memory_bus_bits"] = static_cast(props.memoryBusWidth); - rich_key["vram_bytes"] = static_cast(props.totalGlobalMem); - rich_key["wavefront_size"] = static_cast(props.warpSize); - rich_key["regs_per_block"] = static_cast(props.regsPerBlock); - rich_key["max_threads_per_cu"] = static_cast(props.maxThreadsPerMultiProcessor); + cache_entry e; + e.device_key = dk; + e.name = k.at("name").to(); + e.problem = k.at("problem").to(); + e.solution = v.is_null() ? std::string{} : v.to(); + entries.push_back(std::move(e)); } - enriched[rich_key] = v; + backend.load_entries(entries); } - write_string(pc_path, to_pretty_json_string(to_value(enriched))); + + backend.save(); } bool problem_cache::has(const std::string& name, const value& problem) const { - return contains(cache, create_key(name, problem)); + auto& backend = active_backend(); + if(backend) + return backend.has(active_device_key(), name, problem.to()); + value key = {{"name", name}, {"problem", problem}}; + return contains(cache, key); } void problem_cache::insert(const std::string& name, const value& problem, const value& solution) { assert(not solution.is_null()); - cache[create_key(name, problem)] = solution; + auto& backend = active_backend(); + if(backend) + backend.insert(active_device_key(), name, problem.to(), solution.to()); + + // Only update legacy cache map for JSON backend (backward compatibility) + if(!backend || backend.backend_name() == "json") + { + value key = {{"name", name}, {"problem", problem}}; + cache[key] = solution; + } } void problem_cache::mark(const std::string& name, const value& problem) { - cache.insert(std::make_pair(create_key(name, problem), value{})); + auto& backend = active_backend(); + if(backend) + backend.mark(active_device_key(), name, problem.to()); + + // Only update legacy cache map for JSON backend (backward compatibility) + if(!backend || backend.backend_name() == "json") + { + value key = {{"name", name}, {"problem", problem}}; + cache.insert(std::make_pair(key, value{})); + } } optional problem_cache::get(const std::string& name, const value& problem) const { - auto it = cache.find(create_key(name, problem)); + auto& backend = active_backend(); + if(backend) + { + auto result = backend.get(active_device_key(), name, problem.to()); + if(!result) + return nullopt; + if(result->empty()) + return value{}; + return value(*result); + } + value key = {{"name", name}, {"problem", problem}}; + auto it = cache.find(key); if(it == cache.end()) return nullopt; return it->second; diff --git a/src/targets/gpu/problem_cache_backend.cpp b/src/targets/gpu/problem_cache_backend.cpp new file mode 100644 index 00000000000..6497165d7d8 --- /dev/null +++ b/src/targets/gpu/problem_cache_backend.cpp @@ -0,0 +1,90 @@ +/* + * The MIT License (MIT) + * + * Copyright (c) 2015-2026 Advanced Micro Devices, Inc. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + * + */ +#include +#include +#include +#include + +namespace migraphx { +inline namespace MIGRAPHX_INLINE_NS { +namespace gpu { + +// --- parse_device_key --- + +cache_device_key parse_device_key(const std::string& s) +{ + if(s.empty()) + return {}; + + // Format: "gpu_arch|cu_count|wavefront_size" + auto first_pipe = s.find('|'); + if(first_pipe == std::string::npos) + return {}; + auto second_pipe = s.find('|', first_pipe + 1); + if(second_pipe == std::string::npos) + return {}; + + cache_device_key dk; + dk.gpu_arch = s.substr(0, first_pipe); + try + { + dk.cu_count = std::stoi(s.substr(first_pipe + 1, second_pipe - first_pipe - 1)); + dk.wavefront_size = std::stoi(s.substr(second_pipe + 1)); + } + catch(...) + { + return {}; + } + return dk; +} + +// --- Factory functions --- + +problem_cache_backend make_cache_backend(const std::string& type) +{ + if(type == "json") + return problem_cache_backend{json_cache_backend{}}; + throw std::runtime_error("Problem cache backend not available: " + type); +} + +problem_cache_backend make_default_cache_backend() +{ + return problem_cache_backend{json_cache_backend{}}; +} + +problem_cache_backend +make_cache_backend_with_fallback(const std::string& explicit_backend) +{ + if(!explicit_backend.empty() && explicit_backend != "json") + { + log::warn() << "Unknown cache backend '" << explicit_backend + << "'. Falling back to JSON.\n"; + } + return problem_cache_backend{json_cache_backend{}}; +} + +} // namespace gpu +} // namespace MIGRAPHX_INLINE_NS +} // namespace migraphx