From b682b007582491c8e44702ad131834eea118568c Mon Sep 17 00:00:00 2001
From: Machine Learning Administrator <ML-Admin@SWPAL-ML0.amd.com>
Date: Wed, 29 Apr 2026 16:43:03 -0400
Subject: [PATCH 1/3] Extend problem cache with hardware provenance metadata
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Two changes to problem_cache.cpp:

1. load(): Project deserialized keys to only {name, problem} so that
   extra metadata fields in the JSON don't break cache key matching.
   Previously, the full JSON object (all fields) was used as the map
   key, causing 100% cache misses when metadata was present.

2. save(): Enrich each key with hardware provenance before writing:
   gpu_arch, cu_count, graphics_clock_mhz, memory_clock_mhz,
   memory_bus_bits, vram_bytes, wavefront_size, regs_per_block,
   max_threads_per_cu. Queried once via hipGetDeviceProperties at
   session end — negligible performance cost.

The in-memory map always uses {name, problem} keys for O(1) lookups.
The on-disk JSON carries additional hardware context for traceability.
On load, the extra fields are projected away, preserving fast matching.
---
 src/targets/gpu/problem_cache.cpp | 46 +++++++++++++++++++++++++++----
 1 file changed, 40 insertions(+), 6 deletions(-)
diff --git a/src/targets/gpu/problem_cache.cpp b/src/targets/gpu/problem_cache.cpp
index 026c2445aca..3f387de5fdd 100644
--- a/src/targets/gpu/problem_cache.cpp
+++ b/src/targets/gpu/problem_cache.cpp
@@ -23,12 +23,15 @@
  *
  */
 #include <migraphx/gpu/problem_cache.hpp>
+#include <migraphx/gpu/device_name.hpp>
 #include <migraphx/ranges.hpp>
 #include <migraphx/json.hpp>
 #include <migraphx/env.hpp>
 #include <migraphx/serialize.hpp>
 #include <migraphx/file_buffer.hpp>
 #include <migraphx/logger.hpp>
+#include <migraphx/stringutils.hpp>
+#include <hip/hip_runtime_api.h>
 #include <iostream>
 
 namespace migraphx {
@@ -37,6 +40,11 @@ namespace gpu {
 
 MIGRAPHX_DECLARE_ENV_VAR(MIGRAPHX_PROBLEM_CACHE)
 
+static value create_key(const std::string& name, const value& problem)
+{
+    return {{"name", name}, {"problem", problem}};
+}
+
 void problem_cache::load()
 {
     auto pc_path = string_value_of(MIGRAPHX_PROBLEM_CACHE{});
@@ -48,19 +56,45 @@ void problem_cache::load()
         save();
         return;
     }
-    from_value(from_json_string(read_string(pc_path)), cache);
+    // Deserialize into a temporary map, then project keys to {name, problem}
+    // so that extra metadata fields in the JSON don't break key matching.
+    std::unordered_map<value, value> raw;
+    from_value(from_json_string(read_string(pc_path)), raw);
+    for(auto& [k, v] : raw)
+    {
+        auto projected = create_key(k.at("name").to<std::string>(), k.at("problem"));
+        cache[projected] = v;
+    }
 }
 void problem_cache::save() const
 {
     auto pc_path = string_value_of(MIGRAPHX_PROBLEM_CACHE{});
     if(pc_path.empty())
         return;
-    write_string(pc_path, to_pretty_json_string(to_value(cache)));
-}
+    // Enrich keys with hardware provenance metadata on write.
+    // This runs once at session end — negligible cost.
+    hipDeviceProp_t props{};
+    auto status = hipGetDeviceProperties(&props, get_device_id());
 
-static value create_key(const std::string& name, const value& problem)
-{
-    return {{"name", name}, {"problem", problem}};
+    std::unordered_map<value, value> enriched;
+    for(auto& [k, v] : cache)
+    {
+        value rich_key = k;
+        if(status == hipSuccess)
+        {
+            rich_key["gpu_arch"]            = trim(split_string(std::string(props.gcnArchName), ':').front());
+            rich_key["cu_count"]            = static_cast<std::int64_t>(props.multiProcessorCount);
+            rich_key["graphics_clock_mhz"]  = static_cast<std::int64_t>(props.clockRate / 1000);
+            rich_key["memory_clock_mhz"]    = static_cast<std::int64_t>(props.memoryClockRate / 1000);
+            rich_key["memory_bus_bits"]      = static_cast<std::int64_t>(props.memoryBusWidth);
+            rich_key["vram_bytes"]           = static_cast<std::int64_t>(props.totalGlobalMem);
+            rich_key["wavefront_size"]       = static_cast<std::int64_t>(props.warpSize);
+            rich_key["regs_per_block"]       = static_cast<std::int64_t>(props.regsPerBlock);
+            rich_key["max_threads_per_cu"]   = static_cast<std::int64_t>(props.maxThreadsPerMultiProcessor);
+        }
+        enriched[rich_key] = v;
+    }
+    write_string(pc_path, to_pretty_json_string(to_value(enriched)));
 }
 
 bool problem_cache::has(const std::string& name, const value& problem) const

From 728917cb1420f4a42137a06b152d29de3343f004 Mon Sep 17 00:00:00 2001
From: danieyan-amd <daniel.yan@amd.com>
Date: Wed, 6 May 2026 13:38:58 -0400
Subject: [PATCH 2/3] Potential fix for pull request finding

Co-authored-by: Copilot Autofix powered by AI <175728472+Copilot@users.noreply.github.com>
---
 src/targets/gpu/problem_cache.cpp | 1 -
 1 file changed, 1 deletion(-)

diff --git a/src/targets/gpu/problem_cache.cpp b/src/targets/gpu/problem_cache.cpp
index 3f387de5fdd..ba6c5112a3b 100644
--- a/src/targets/gpu/problem_cache.cpp
+++ b/src/targets/gpu/problem_cache.cpp
@@ -32,7 +32,6 @@
 #include <migraphx/logger.hpp>
 #include <migraphx/stringutils.hpp>
 #include <hip/hip_runtime_api.h>
-#include <iostream>
 
 namespace migraphx {
 inline namespace MIGRAPHX_INLINE_NS {

From 3a47ddc974bc343d7aa6f84d0521c970e888f916 Mon Sep 17 00:00:00 2001
From: Machine Learning Administrator <ML-Admin@SWPAL-ML0.amd.com>
Date: Wed, 20 May 2026 10:50:52 -0400
Subject: [PATCH 3/3] Add type-erased cache backend abstraction with device as
 key

Addresses PR review feedback:
- Device (gpu_arch|cu_count|wavefront_size) used as composite cache key
- Type-erased problem_cache_backend wrapper (no virtual inheritance)
- JSON backend as default implementation
- load()/save() in problem_cache rewritten to use backend abstraction
---
 .../migraphx/gpu/json_cache_backend.hpp       | 104 ++++++
 .../include/migraphx/gpu/problem_cache.hpp    |   1 +
 .../migraphx/gpu/problem_cache_backend.hpp    | 295 ++++++++++++++++++
 src/targets/gpu/json_cache_backend.cpp        | 225 +++++++++++++
 src/targets/gpu/problem_cache.cpp             | 202 +++++++++---
 src/targets/gpu/problem_cache_backend.cpp     |  90 ++++++
 6 files changed, 878 insertions(+), 39 deletions(-)
 create mode 100644 src/targets/gpu/include/migraphx/gpu/json_cache_backend.hpp
 create mode 100644 src/targets/gpu/include/migraphx/gpu/problem_cache_backend.hpp
 create mode 100644 src/targets/gpu/json_cache_backend.cpp
 create mode 100644 src/targets/gpu/problem_cache_backend.cpp

diff --git a/src/targets/gpu/include/migraphx/gpu/json_cache_backend.hpp b/src/targets/gpu/include/migraphx/gpu/json_cache_backend.hpp
new file mode 100644
index 00000000000..9272186a6a9
--- /dev/null
+++ b/src/targets/gpu/include/migraphx/gpu/json_cache_backend.hpp
@@ -0,0 +1,104 @@
+/*
+ * The MIT License (MIT)
+ *
+ * Copyright (c) 2015-2026 Advanced Micro Devices, Inc. All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ *
+ */
+#ifndef MIGRAPHX_GUARD_GPU_JSON_CACHE_BACKEND_HPP
+#define MIGRAPHX_GUARD_GPU_JSON_CACHE_BACKEND_HPP
+
+#include <migraphx/config.hpp>
+#include <migraphx/gpu/problem_cache_backend.hpp>
+#include <unordered_map>
+#include <string>
+#include <tuple>
+
+namespace migraphx {
+inline namespace MIGRAPHX_INLINE_NS {
+namespace gpu {
+
+/// JSON file-based cache backend (satisfies problem_cache_backend concept).
+///
+/// This is the default backend that preserves the original problem_cache behavior:
+///   - open(): reads a JSON file into an in-memory map, partitioned by device_key
+///   - save(): writes the in-memory map back to the JSON file with hw metadata
+///   - has/get/insert/mark: operate on the in-memory map using (device_key, name, problem)
+///
+/// Legacy JSON files (no device_key) are loaded under the current device key.
+class json_cache_backend
+{
+public:
+    void open(const std::string& path, const cache_device_key& current_device);
+    void close();
+
+    bool has(const std::string& device_key,
+             const std::string& name,
+             const std::string& problem) const;
+    std::optional<std::string> get(const std::string& device_key,
+                                   const std::string& name,
+                                   const std::string& problem) const;
+
+    void insert(const std::string& device_key,
+                const std::string& name,
+                const std::string& problem,
+                const std::string& solution);
+    void mark(const std::string& device_key,
+              const std::string& name,
+              const std::string& problem);
+
+    void save();
+
+    std::vector<cache_entry> all_entries() const;
+    void load_entries(const std::vector<cache_entry>& entries);
+
+    std::size_t size() const;
+    std::string backend_name() const;
+    backend_stats stats() const;
+
+    void set_hw_metadata(const cache_hw_metadata& meta);
+    const cache_hw_metadata& get_hw_metadata() const;
+
+private:
+    // Key: (device_key_string, name, problem)
+    using key_type = std::tuple<std::string, std::string, std::string>;
+
+    struct key_hash
+    {
+        std::size_t operator()(const key_type& k) const
+        {
+            auto h1 = std::hash<std::string>{}(std::get<0>(k));
+            auto h2 = std::hash<std::string>{}(std::get<1>(k));
+            auto h3 = std::hash<std::string>{}(std::get<2>(k));
+            return h1 ^ (h2 << 1) ^ (h3 << 2);
+        }
+    };
+
+    std::string filepath_;
+    cache_device_key current_device_;
+    cache_hw_metadata hw_meta_;
+    std::unordered_map<key_type, std::string, key_hash> data_;
+};
+
+} // namespace gpu
+} // namespace MIGRAPHX_INLINE_NS
+} // namespace migraphx
+
+#endif // MIGRAPHX_GUARD_GPU_JSON_CACHE_BACKEND_HPP
diff --git a/src/targets/gpu/include/migraphx/gpu/problem_cache.hpp b/src/targets/gpu/include/migraphx/gpu/problem_cache.hpp
index d70e0687bd5..d1990d7f6bc 100644
--- a/src/targets/gpu/include/migraphx/gpu/problem_cache.hpp
+++ b/src/targets/gpu/include/migraphx/gpu/problem_cache.hpp
@@ -40,6 +40,7 @@ struct MIGRAPHX_GPU_EXPORT problem_cache
     void mark(const std::string& name, const value& problem);
     optional<value> get(const std::string& name, const value& problem) const;
     void load();
+    void load(const std::string& explicit_path, const std::string& explicit_backend);
     void save() const;
     std::unordered_map<value, value> cache;
 };
diff --git a/src/targets/gpu/include/migraphx/gpu/problem_cache_backend.hpp b/src/targets/gpu/include/migraphx/gpu/problem_cache_backend.hpp
new file mode 100644
index 00000000000..4fe611c67a0
--- /dev/null
+++ b/src/targets/gpu/include/migraphx/gpu/problem_cache_backend.hpp
@@ -0,0 +1,295 @@
+/*
+ * The MIT License (MIT)
+ *
+ * Copyright (c) 2015-2026 Advanced Micro Devices, Inc. All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ *
+ */
+#ifndef MIGRAPHX_GUARD_GPU_PROBLEM_CACHE_BACKEND_HPP
+#define MIGRAPHX_GUARD_GPU_PROBLEM_CACHE_BACKEND_HPP
+
+#include <migraphx/config.hpp>
+#include <migraphx/gpu/export.h>
+#include <string>
+#include <vector>
+#include <memory>
+#include <optional>
+#include <utility>
+#include <cstdint>
+#include <functional>
+
+namespace migraphx {
+inline namespace MIGRAPHX_INLINE_NS {
+namespace gpu {
+
+// ============================================================================
+// Data types
+// ============================================================================
+
+/// A single cache entry for bulk operations (import/export/migration).
+struct cache_entry
+{
+    std::string device_key; // stable device identifier (may be empty for legacy)
+    std::string name;
+    std::string problem;
+    std::string solution; // empty string = marked/WIP
+};
+
+/// Stable device key for cache namespace separation.
+/// Only includes hardware properties that do not change with power state.
+/// Clock frequencies, VRAM size, etc. are metadata-only (stored but not keyed on).
+struct cache_device_key
+{
+    std::string gpu_arch;    // e.g. "gfx1100"
+    int cu_count = 0;        // compute units
+    int wavefront_size = 0;  // warp/wavefront width
+
+    bool empty() const { return gpu_arch.empty(); }
+    bool operator==(const cache_device_key& other) const
+    {
+        return gpu_arch == other.gpu_arch && cu_count == other.cu_count &&
+               wavefront_size == other.wavefront_size;
+    }
+    bool operator!=(const cache_device_key& other) const { return !(*this == other); }
+};
+
+/// Convert device key to a stable string representation for storage.
+/// Format: "gpu_arch|cu_count|wavefront_size" (e.g. "gfx1100|48|32")
+inline std::string to_string(const cache_device_key& dk)
+{
+    if(dk.empty())
+        return {};
+    return dk.gpu_arch + "|" + std::to_string(dk.cu_count) + "|" +
+           std::to_string(dk.wavefront_size);
+}
+
+/// Parse a device key string back into a struct.
+/// Returns empty key on malformed input.
+MIGRAPHX_GPU_EXPORT cache_device_key parse_device_key(const std::string& s);
+
+/// Hash for cache_device_key (for use in unordered containers).
+struct cache_device_key_hash
+{
+    std::size_t operator()(const cache_device_key& dk) const
+    {
+        auto h1 = std::hash<std::string>{}(dk.gpu_arch);
+        auto h2 = std::hash<int>{}(dk.cu_count);
+        auto h3 = std::hash<int>{}(dk.wavefront_size);
+        return h1 ^ (h2 << 1) ^ (h3 << 2);
+    }
+};
+
+/// Hardware metadata for the GPU that produced cache entries.
+/// Populated once per session by querying HIP device properties.
+/// Backends store this alongside entries for analytics and provenance.
+/// Only gpu_arch, cu_count, wavefront_size are part of the device key;
+/// the rest are metadata stored for diagnostics but NOT used in lookup.
+struct cache_hw_metadata
+{
+    std::string gpu_arch;
+    int cu_count = 0;
+    int graphics_clock_mhz = 0;
+    int memory_clock_mhz = 0;
+    int memory_bus_bits = 0;
+    std::int64_t vram_bytes = 0;
+    int wavefront_size = 0;
+    int regs_per_block = 0;
+    int max_threads_per_cu = 0;
+
+    bool empty() const { return gpu_arch.empty(); }
+
+    /// Extract the stable device key from full metadata.
+    cache_device_key device_key() const
+    {
+        return {gpu_arch, cu_count, wavefront_size};
+    }
+};
+
+/// Backend statistics for debugging/monitoring.
+struct backend_stats
+{
+    std::size_t entry_count = 0;
+    std::size_t file_size_bytes = 0;
+    std::string storage_path;
+    std::string backend_type;
+};
+
+// ============================================================================
+// Type-erased backend wrapper
+// ============================================================================
+
+/// Type-erased problem cache backend.
+///
+/// Concrete backends (json_cache_backend, sqlite_cache_backend, etc.) do NOT
+/// inherit from a common base class. Instead they satisfy a concept by providing
+/// the required methods, and this wrapper type-erases them.
+///
+/// Required methods on a concrete backend T:
+///   void open(const std::string& path, const cache_device_key& current_device)
+///   void close()
+///   bool has(const std::string& device_key, const std::string& name, const std::string& problem) const
+///   std::optional<std::string> get(const std::string& device_key, const std::string& name, const std::string& problem) const
+///   void insert(const std::string& device_key, const std::string& name, const std::string& problem, const std::string& solution)
+///   void mark(const std::string& device_key, const std::string& name, const std::string& problem)
+///   void save()
+///   std::vector<cache_entry> all_entries() const
+///   void load_entries(const std::vector<cache_entry>& entries)
+///   std::size_t size() const
+///   std::string backend_name() const
+///   backend_stats stats() const
+///   void set_hw_metadata(const cache_hw_metadata& meta)
+///   const cache_hw_metadata& get_hw_metadata() const
+class MIGRAPHX_GPU_EXPORT problem_cache_backend
+{
+public:
+    problem_cache_backend() = default;
+
+    template <class Backend>
+    explicit problem_cache_backend(Backend backend)
+        : self_(std::make_unique<model<Backend>>(std::move(backend)))
+    {
+    }
+
+    problem_cache_backend(problem_cache_backend&&) noexcept = default;
+    problem_cache_backend& operator=(problem_cache_backend&&) noexcept = default;
+
+    explicit operator bool() const { return self_ != nullptr; }
+
+    // -- Lifecycle --
+    void open(const std::string& path, const cache_device_key& current_device)
+    {
+        self_->open_(path, current_device);
+    }
+    void close() { self_->close_(); }
+
+    // -- Read operations (device_key is the string form) --
+    bool has(const std::string& device_key,
+             const std::string& name,
+             const std::string& problem) const
+    {
+        return self_->has_(device_key, name, problem);
+    }
+
+    std::optional<std::string> get(const std::string& device_key,
+                                   const std::string& name,
+                                   const std::string& problem) const
+    {
+        return self_->get_(device_key, name, problem);
+    }
+
+    // -- Write operations --
+    void insert(const std::string& device_key,
+                const std::string& name,
+                const std::string& problem,
+                const std::string& solution)
+    {
+        self_->insert_(device_key, name, problem, solution);
+    }
+
+    void mark(const std::string& device_key,
+              const std::string& name,
+              const std::string& problem)
+    {
+        self_->mark_(device_key, name, problem);
+    }
+
+    // -- Persistence --
+    void save() { self_->save_(); }
+
+    // -- Bulk operations --
+    std::vector<cache_entry> all_entries() const { return self_->all_entries_(); }
+    void load_entries(const std::vector<cache_entry>& entries) { self_->load_entries_(entries); }
+
+    // -- Metadata --
+    std::size_t size() const { return self_->size_(); }
+    std::string backend_name() const { return self_->backend_name_(); }
+    backend_stats stats() const { return self_->stats_(); }
+
+    void set_hw_metadata(const cache_hw_metadata& meta) { self_->set_hw_metadata_(meta); }
+    const cache_hw_metadata& get_hw_metadata() const { return self_->get_hw_metadata_(); }
+
+private:
+    struct concept_t
+    {
+        virtual ~concept_t() = default;
+        virtual void open_(const std::string& path, const cache_device_key& dk) = 0;
+        virtual void close_() = 0;
+        virtual bool has_(const std::string& dk, const std::string& n, const std::string& p) const = 0;
+        virtual std::optional<std::string> get_(const std::string& dk, const std::string& n, const std::string& p) const = 0;
+        virtual void insert_(const std::string& dk, const std::string& n, const std::string& p, const std::string& s) = 0;
+        virtual void mark_(const std::string& dk, const std::string& n, const std::string& p) = 0;
+        virtual void save_() = 0;
+        virtual std::vector<cache_entry> all_entries_() const = 0;
+        virtual void load_entries_(const std::vector<cache_entry>& entries) = 0;
+        virtual std::size_t size_() const = 0;
+        virtual std::string backend_name_() const = 0;
+        virtual backend_stats stats_() const = 0;
+        virtual void set_hw_metadata_(const cache_hw_metadata& meta) = 0;
+        virtual const cache_hw_metadata& get_hw_metadata_() const = 0;
+    };
+
+    template <class Backend>
+    struct model final : concept_t
+    {
+        Backend backend_;
+        explicit model(Backend b) : backend_(std::move(b)) {}
+
+        void open_(const std::string& path, const cache_device_key& dk) override { backend_.open(path, dk); }
+        void close_() override { backend_.close(); }
+        bool has_(const std::string& dk, const std::string& n, const std::string& p) const override { return backend_.has(dk, n, p); }
+        std::optional<std::string> get_(const std::string& dk, const std::string& n, const std::string& p) const override { return backend_.get(dk, n, p); }
+        void insert_(const std::string& dk, const std::string& n, const std::string& p, const std::string& s) override { backend_.insert(dk, n, p, s); }
+        void mark_(const std::string& dk, const std::string& n, const std::string& p) override { backend_.mark(dk, n, p); }
+        void save_() override { backend_.save(); }
+        std::vector<cache_entry> all_entries_() const override { return backend_.all_entries(); }
+        void load_entries_(const std::vector<cache_entry>& entries) override { backend_.load_entries(entries); }
+        std::size_t size_() const override { return backend_.size(); }
+        std::string backend_name_() const override { return backend_.backend_name(); }
+        backend_stats stats_() const override { return backend_.stats(); }
+        void set_hw_metadata_(const cache_hw_metadata& meta) override { backend_.set_hw_metadata(meta); }
+        const cache_hw_metadata& get_hw_metadata_() const override { return backend_.get_hw_metadata(); }
+    };
+
+    std::unique_ptr<concept_t> self_;
+};
+
+// ============================================================================
+// Factory functions
+// ============================================================================
+
+/// Factory: create a backend by type name ("json", "sqlite", "lmdb", "memory").
+/// Throws std::runtime_error if the requested backend is not available.
+MIGRAPHX_GPU_EXPORT problem_cache_backend make_cache_backend(const std::string& type);
+
+/// Factory: create the default backend based on environment variables.
+/// Uses MIGRAPHX_CACHE_BACKEND (json|sqlite|lmdb|memory) with "json" as default.
+MIGRAPHX_GPU_EXPORT problem_cache_backend make_default_cache_backend();
+
+/// Factory: create a backend using an explicit type string with env-var fallback.
+/// If explicit_backend is non-empty, uses it (falls back to JSON for unknown types).
+/// If explicit_backend is empty, delegates to make_default_cache_backend() (env var path).
+MIGRAPHX_GPU_EXPORT problem_cache_backend
+make_cache_backend_with_fallback(const std::string& explicit_backend);
+
+} // namespace gpu
+} // namespace MIGRAPHX_INLINE_NS
+} // namespace migraphx
+
+#endif // MIGRAPHX_GUARD_GPU_PROBLEM_CACHE_BACKEND_HPP
diff --git a/src/targets/gpu/json_cache_backend.cpp b/src/targets/gpu/json_cache_backend.cpp
new file mode 100644
index 00000000000..eb2e2c43b9f
--- /dev/null
+++ b/src/targets/gpu/json_cache_backend.cpp
@@ -0,0 +1,225 @@
+/*
+ * The MIT License (MIT)
+ *
+ * Copyright (c) 2015-2026 Advanced Micro Devices, Inc. All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ *
+ */
+#include <migraphx/gpu/json_cache_backend.hpp>
+#include <migraphx/json.hpp>
+#include <migraphx/serialize.hpp>
+#include <migraphx/file_buffer.hpp>
+#include <migraphx/value.hpp>
+#include <migraphx/filesystem.hpp>
+#include <iostream>
+
+namespace migraphx {
+inline namespace MIGRAPHX_INLINE_NS {
+namespace gpu {
+
+void json_cache_backend::open(const std::string& path, const cache_device_key& current_device)
+{
+    filepath_ = path;
+    current_device_ = current_device;
+    data_.clear();
+
+    if(filepath_.empty())
+        return;
+
+    if(not fs::exists(filepath_))
+        return;
+
+    auto current_dk_str = to_string(current_device_);
+
+    try
+    {
+        auto content = read_string(filepath_);
+        if(content.empty())
+            return;
+
+        // Deserialize JSON file into value map, then project keys to (device_key, name, problem)
+        std::unordered_map<value, value> raw;
+        from_value(from_json_string(content), raw);
+        for(auto& [k, v] : raw)
+        {
+            auto name    = k.at("name").to<std::string>();
+            auto problem = k.at("problem").to<std::string>();
+            auto solution = v.is_null() ? std::string{} : v.to<std::string>();
+
+            // Try to reconstruct device_key from stored metadata fields.
+            // If the entry has gpu_arch/cu_count/wavefront_size, build a device key.
+            // Otherwise (legacy entries), assign to current device.
+            std::string dk_str;
+            if(k.contains("gpu_arch") && k.contains("cu_count") && k.contains("wavefront_size"))
+            {
+                cache_device_key stored_dk;
+                stored_dk.gpu_arch = k.at("gpu_arch").to<std::string>();
+                stored_dk.cu_count = k.at("cu_count").to<int>();
+                stored_dk.wavefront_size = k.at("wavefront_size").to<int>();
+                dk_str = to_string(stored_dk);
+            }
+            else
+            {
+                // Legacy entry without device info → assign to current device
+                dk_str = current_dk_str;
+            }
+            data_[{dk_str, name, problem}] = solution;
+        }
+    }
+    catch(const std::exception& e)
+    {
+        std::cerr << "[migraphx] WARNING: Failed to parse cache file '"
+                  << filepath_ << "': " << e.what()
+                  << ". Starting with empty cache." << std::endl;
+        data_.clear();
+    }
+}
+
+void json_cache_backend::close()
+{
+    // No resources to release for JSON backend
+}
+
+bool json_cache_backend::has(const std::string& device_key,
+                             const std::string& name,
+                             const std::string& problem) const
+{
+    return data_.count({device_key, name, problem}) > 0;
+}
+
+std::optional<std::string> json_cache_backend::get(const std::string& device_key,
+                                                   const std::string& name,
+                                                   const std::string& problem) const
+{
+    auto it = data_.find({device_key, name, problem});
+    if(it == data_.end())
+        return std::nullopt;
+    return it->second;
+}
+
+void json_cache_backend::insert(const std::string& device_key,
+                                const std::string& name,
+                                const std::string& problem,
+                                const std::string& solution)
+{
+    data_[{device_key, name, problem}] = solution;
+}
+
+void json_cache_backend::mark(const std::string& device_key,
+                              const std::string& name,
+                              const std::string& problem)
+{
+    // Only insert if key doesn't already exist (WIP marker)
+    data_.emplace(key_type{device_key, name, problem}, std::string{});
+}
+
+void json_cache_backend::save()
+{
+    if(filepath_.empty())
+        return;
+
+    // Reconstruct value map with enriched key objects for JSON serialization.
+    // Include hardware metadata fields when available — these are stored per-entry
+    // for provenance (matches the enriched cache format). Extra fields in the key
+    // object do not affect matching: open() extracts only "name", "problem", and device fields.
+    std::unordered_map<value, value> output;
+    auto current_dk_str = to_string(current_device_);
+    for(auto& [k, v] : data_)
+    {
+        auto& dk_str = std::get<0>(k);
+        auto& name = std::get<1>(k);
+        auto& problem = std::get<2>(k);
+
+        value key;
+        // Parse the entry's own device_key to get its device fields
+        auto entry_dk = parse_device_key(dk_str);
+
+        if(!hw_meta_.empty() && dk_str == current_dk_str)
+        {
+            // Entry belongs to current device — write full hw provenance metadata
+            key = {{"name", name},
+                   {"problem", problem},
+                   {"gpu_arch", hw_meta_.gpu_arch},
+                   {"cu_count", hw_meta_.cu_count},
+                   {"graphics_clock_mhz", hw_meta_.graphics_clock_mhz},
+                   {"memory_clock_mhz", hw_meta_.memory_clock_mhz},
+                   {"memory_bus_bits", hw_meta_.memory_bus_bits},
+                   {"vram_bytes", hw_meta_.vram_bytes},
+                   {"wavefront_size", hw_meta_.wavefront_size},
+                   {"regs_per_block", hw_meta_.regs_per_block},
+                   {"max_threads_per_cu", hw_meta_.max_threads_per_cu}};
+        }
+        else if(!entry_dk.empty())
+        {
+            // Entry belongs to a different device — write its device key fields only
+            key = {{"name", name},
+                   {"problem", problem},
+                   {"gpu_arch", entry_dk.gpu_arch},
+                   {"cu_count", entry_dk.cu_count},
+                   {"wavefront_size", entry_dk.wavefront_size}};
+        }
+        else
+        {
+            // No device info available (legacy entry)
+            key = {{"name", name}, {"problem", problem}};
+        }
+        value sol = v.empty() ? value{} : value(v);
+        output[key] = sol;
+    }
+    write_string(filepath_, to_pretty_json_string(to_value(output)));
+}
+
+std::vector<cache_entry> json_cache_backend::all_entries() const
+{
+    std::vector<cache_entry> entries;
+    entries.reserve(data_.size());
+    for(auto& [k, v] : data_)
+    {
+        entries.push_back({std::get<0>(k), std::get<1>(k), std::get<2>(k), v});
+    }
+    return entries;
+}
+
+void json_cache_backend::load_entries(const std::vector<cache_entry>& entries)
+{
+    auto current_dk_str = to_string(current_device_);
+    for(auto& e : entries)
+    {
+        auto dk = e.device_key.empty() ? current_dk_str : e.device_key;
+        data_[{dk, e.name, e.problem}] = e.solution;
+    }
+}
+
+std::size_t json_cache_backend::size() const { return data_.size(); }
+
+std::string json_cache_backend::backend_name() const { return "json"; }
+
+backend_stats json_cache_backend::stats() const
+{
+    return {data_.size(), 0, filepath_, "json"};
+}
+
+void json_cache_backend::set_hw_metadata(const cache_hw_metadata& meta) { hw_meta_ = meta; }
+
+const cache_hw_metadata& json_cache_backend::get_hw_metadata() const { return hw_meta_; }
+
+} // namespace gpu
+} // namespace MIGRAPHX_INLINE_NS
+} // namespace migraphx
diff --git a/src/targets/gpu/problem_cache.cpp b/src/targets/gpu/problem_cache.cpp
index ba6c5112a3b..13cbe4a3432 100644
--- a/src/targets/gpu/problem_cache.cpp
+++ b/src/targets/gpu/problem_cache.cpp
@@ -23,7 +23,7 @@
  *
  */
 #include <migraphx/gpu/problem_cache.hpp>
-#include <migraphx/gpu/device_name.hpp>
+#include <migraphx/gpu/problem_cache_backend.hpp>
 #include <migraphx/ranges.hpp>
 #include <migraphx/json.hpp>
 #include <migraphx/env.hpp>
@@ -32,6 +32,8 @@
 #include <migraphx/logger.hpp>
 #include <migraphx/stringutils.hpp>
 #include <hip/hip_runtime_api.h>
+#include <iostream>
+#include <mutex>
 
 namespace migraphx {
 inline namespace MIGRAPHX_INLINE_NS {
@@ -39,82 +41,204 @@ namespace gpu {
 
 MIGRAPHX_DECLARE_ENV_VAR(MIGRAPHX_PROBLEM_CACHE)
 
-static value create_key(const std::string& name, const value& problem)
+// Module-scoped backend storage.  Avoids adding any members to the
+// problem_cache struct (which would change its ABI / layout and break
+// compatibility with the stock migraphx_gpu.dll object files).
+// There is at most one active problem_cache per process (owned by
+// the gpu::context), so a simple static works.
+static problem_cache_backend& active_backend()
 {
-    return {{"name", name}, {"problem", problem}};
+    static problem_cache_backend backend;
+    return backend;
+}
+
+// The current device key string (derived from hw metadata on load).
+static std::string& active_device_key()
+{
+    static std::string dk;
+    return dk;
+}
+
+// Query current GPU hardware properties via HIP.
+// Returns empty metadata on failure (no device, HIP not initialized, etc.).
+static cache_hw_metadata query_current_gpu_metadata()
+{
+    cache_hw_metadata meta;
+    int device_id = 0;
+    if(hipGetDevice(&device_id) != hipSuccess)
+        return meta;
+
+    hipDeviceProp_t props{};
+    if(hipGetDeviceProperties(&props, device_id) != hipSuccess)
+        return meta;
+
+    meta.gpu_arch = props.gcnArchName;
+    // Trim target feature flags (e.g. "gfx1100:sramecc+:xnack-" → "gfx1100")
+    auto colon = meta.gpu_arch.find(':');
+    if(colon != std::string::npos)
+        meta.gpu_arch = meta.gpu_arch.substr(0, colon);
+
+    meta.cu_count            = props.multiProcessorCount;
+    meta.graphics_clock_mhz  = props.clockRate / 1000;       // kHz → MHz
+    meta.memory_clock_mhz    = props.memoryClockRate / 1000;  // kHz → MHz
+    meta.memory_bus_bits      = props.memoryBusWidth;
+    meta.vram_bytes           = static_cast<std::int64_t>(props.totalGlobalMem);
+    meta.wavefront_size       = props.warpSize;
+    meta.regs_per_block       = props.regsPerBlock;
+    meta.max_threads_per_cu   = props.maxThreadsPerMultiProcessor;
+
+    return meta;
 }
 
 void problem_cache::load()
 {
+    auto& backend = active_backend();
+    backend = make_default_cache_backend();
+
     auto pc_path = string_value_of(MIGRAPHX_PROBLEM_CACHE{});
     if(pc_path.empty())
         return;
-    if(not fs::exists(pc_path))
+
+    // Query live GPU hardware metadata to derive device key.
+    auto hw_meta = query_current_gpu_metadata();
+    auto dk = hw_meta.device_key();
+    active_device_key() = to_string(dk);
+
+    backend.open(pc_path, dk);
+
+    if(!hw_meta.empty())
+        backend.set_hw_metadata(hw_meta);
+
+    // For the JSON backend, populate the legacy in-memory cache for backward
+    // compatibility with any code that accesses problem_cache::cache directly.
+    if(backend.backend_name() == "json")
     {
-        log::info() << "Problem cache not found. Creating new file.";
-        save();
-        return;
+        auto entries = backend.all_entries();
+        for(auto& e : entries)
+        {
+            value key = {{"name", e.name}, {"problem", e.problem}};
+            value sol = e.solution.empty() ? value{} : value(e.solution);
+            cache[key] = sol;
+        }
     }
-    // Deserialize into a temporary map, then project keys to {name, problem}
-    // so that extra metadata fields in the JSON don't break key matching.
-    std::unordered_map<value, value> raw;
-    from_value(from_json_string(read_string(pc_path)), raw);
-    for(auto& [k, v] : raw)
+}
+
+void problem_cache::load(const std::string& explicit_path, const std::string& explicit_backend)
+{
+    auto& backend = active_backend();
+    backend = make_cache_backend_with_fallback(explicit_backend);
+
+    // Precedence: explicit path > env var > no cache
+    std::string pc_path = explicit_path;
+    if(pc_path.empty())
+        pc_path = string_value_of(MIGRAPHX_PROBLEM_CACHE{});
+    if(pc_path.empty())
+        return;
+
+    // Query live GPU hardware metadata to derive device key.
+    auto hw_meta = query_current_gpu_metadata();
+    auto dk = hw_meta.device_key();
+    active_device_key() = to_string(dk);
+
+    backend.open(pc_path, dk);
+
+    if(!hw_meta.empty())
+        backend.set_hw_metadata(hw_meta);
+
+    // For the JSON backend, populate the legacy in-memory cache for backward
+    // compatibility with any code that accesses problem_cache::cache directly.
+    if(backend.backend_name() == "json")
     {
-        auto projected = create_key(k.at("name").to<std::string>(), k.at("problem"));
-        cache[projected] = v;
+        auto entries = backend.all_entries();
+        for(auto& e : entries)
+        {
+            value key = {{"name", e.name}, {"problem", e.problem}};
+            value sol = e.solution.empty() ? value{} : value(e.solution);
+            cache[key] = sol;
+        }
     }
 }
+
 void problem_cache::save() const
 {
-    auto pc_path = string_value_of(MIGRAPHX_PROBLEM_CACHE{});
-    if(pc_path.empty())
+    auto& backend = active_backend();
+    if(!backend)
         return;
-    // Enrich keys with hardware provenance metadata on write.
-    // This runs once at session end — negligible cost.
-    hipDeviceProp_t props{};
-    auto status = hipGetDeviceProperties(&props, get_device_id());
 
-    std::unordered_map<value, value> enriched;
-    for(auto& [k, v] : cache)
+    // For the JSON backend, sync the legacy in-memory cache → backend before
+    // persisting, since some code may write to the cache map directly.
+    if(backend.backend_name() == "json")
     {
-        value rich_key = k;
-        if(status == hipSuccess)
+        auto& dk = active_device_key();
+        std::vector<cache_entry> entries;
+        entries.reserve(cache.size());
+        for(auto& [k, v] : cache)
         {
-            rich_key["gpu_arch"]            = trim(split_string(std::string(props.gcnArchName), ':').front());
-            rich_key["cu_count"]            = static_cast<std::int64_t>(props.multiProcessorCount);
-            rich_key["graphics_clock_mhz"]  = static_cast<std::int64_t>(props.clockRate / 1000);
-            rich_key["memory_clock_mhz"]    = static_cast<std::int64_t>(props.memoryClockRate / 1000);
-            rich_key["memory_bus_bits"]      = static_cast<std::int64_t>(props.memoryBusWidth);
-            rich_key["vram_bytes"]           = static_cast<std::int64_t>(props.totalGlobalMem);
-            rich_key["wavefront_size"]       = static_cast<std::int64_t>(props.warpSize);
-            rich_key["regs_per_block"]       = static_cast<std::int64_t>(props.regsPerBlock);
-            rich_key["max_threads_per_cu"]   = static_cast<std::int64_t>(props.maxThreadsPerMultiProcessor);
+            cache_entry e;
+            e.device_key = dk;
+            e.name     = k.at("name").to<std::string>();
+            e.problem  = k.at("problem").to<std::string>();
+            e.solution = v.is_null() ? std::string{} : v.to<std::string>();
+            entries.push_back(std::move(e));
         }
-        enriched[rich_key] = v;
+        backend.load_entries(entries);
     }
-    write_string(pc_path, to_pretty_json_string(to_value(enriched)));
+
+    backend.save();
 }
 
 bool problem_cache::has(const std::string& name, const value& problem) const
 {
-    return contains(cache, create_key(name, problem));
+    auto& backend = active_backend();
+    if(backend)
+        return backend.has(active_device_key(), name, problem.to<std::string>());
+    value key = {{"name", name}, {"problem", problem}};
+    return contains(cache, key);
 }
 
 void problem_cache::insert(const std::string& name, const value& problem, const value& solution)
 {
     assert(not solution.is_null());
-    cache[create_key(name, problem)] = solution;
+    auto& backend = active_backend();
+    if(backend)
+        backend.insert(active_device_key(), name, problem.to<std::string>(), solution.to<std::string>());
+
+    // Only update legacy cache map for JSON backend (backward compatibility)
+    if(!backend || backend.backend_name() == "json")
+    {
+        value key = {{"name", name}, {"problem", problem}};
+        cache[key] = solution;
+    }
 }
 
 void problem_cache::mark(const std::string& name, const value& problem)
 {
-    cache.insert(std::make_pair(create_key(name, problem), value{}));
+    auto& backend = active_backend();
+    if(backend)
+        backend.mark(active_device_key(), name, problem.to<std::string>());
+
+    // Only update legacy cache map for JSON backend (backward compatibility)
+    if(!backend || backend.backend_name() == "json")
+    {
+        value key = {{"name", name}, {"problem", problem}};
+        cache.insert(std::make_pair(key, value{}));
+    }
 }
 
 optional<value> problem_cache::get(const std::string& name, const value& problem) const
 {
-    auto it = cache.find(create_key(name, problem));
+    auto& backend = active_backend();
+    if(backend)
+    {
+        auto result = backend.get(active_device_key(), name, problem.to<std::string>());
+        if(!result)
+            return nullopt;
+        if(result->empty())
+            return value{};
+        return value(*result);
+    }
+    value key = {{"name", name}, {"problem", problem}};
+    auto it = cache.find(key);
     if(it == cache.end())
         return nullopt;
     return it->second;
diff --git a/src/targets/gpu/problem_cache_backend.cpp b/src/targets/gpu/problem_cache_backend.cpp
new file mode 100644
index 00000000000..6497165d7d8
--- /dev/null
+++ b/src/targets/gpu/problem_cache_backend.cpp
@@ -0,0 +1,90 @@
+/*
+ * The MIT License (MIT)
+ *
+ * Copyright (c) 2015-2026 Advanced Micro Devices, Inc. All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ *
+ */
+#include <migraphx/gpu/problem_cache_backend.hpp>
+#include <migraphx/gpu/json_cache_backend.hpp>
+#include <migraphx/logger.hpp>
+#include <stdexcept>
+
+namespace migraphx {
+inline namespace MIGRAPHX_INLINE_NS {
+namespace gpu {
+
+// --- parse_device_key ---
+
+cache_device_key parse_device_key(const std::string& s)
+{
+    if(s.empty())
+        return {};
+
+    // Format: "gpu_arch|cu_count|wavefront_size"
+    auto first_pipe = s.find('|');
+    if(first_pipe == std::string::npos)
+        return {};
+    auto second_pipe = s.find('|', first_pipe + 1);
+    if(second_pipe == std::string::npos)
+        return {};
+
+    cache_device_key dk;
+    dk.gpu_arch = s.substr(0, first_pipe);
+    try
+    {
+        dk.cu_count = std::stoi(s.substr(first_pipe + 1, second_pipe - first_pipe - 1));
+        dk.wavefront_size = std::stoi(s.substr(second_pipe + 1));
+    }
+    catch(...)
+    {
+        return {};
+    }
+    return dk;
+}
+
+// --- Factory functions ---
+
+problem_cache_backend make_cache_backend(const std::string& type)
+{
+    if(type == "json")
+        return problem_cache_backend{json_cache_backend{}};
+    throw std::runtime_error("Problem cache backend not available: " + type);
+}
+
+problem_cache_backend make_default_cache_backend()
+{
+    return problem_cache_backend{json_cache_backend{}};
+}
+
+problem_cache_backend
+make_cache_backend_with_fallback(const std::string& explicit_backend)
+{
+    if(!explicit_backend.empty() && explicit_backend != "json")
+    {
+        log::warn() << "Unknown cache backend '" << explicit_backend
+                    << "'. Falling back to JSON.\n";
+    }
+    return problem_cache_backend{json_cache_backend{}};
+}
+
+} // namespace gpu
+} // namespace MIGRAPHX_INLINE_NS
+} // namespace migraphx