diff --git a/runtime/core/test/targets.bzl b/runtime/core/test/targets.bzl
index 0436d3e10dd..c4da8cc37de 100644
--- a/runtime/core/test/targets.bzl
+++ b/runtime/core/test/targets.bzl
@@ -50,7 +50,7 @@ def define_common_targets():
             "//executorch/runtime/core:core",
         ],
     )
-    
+
     runtime.cxx_test(
         name = "event_tracer_test",
         srcs = [
diff --git a/runtime/executor/method_meta.cpp b/runtime/executor/method_meta.cpp
index 75dadfd893a..ca1b3254338 100644
--- a/runtime/executor/method_meta.cpp
+++ b/runtime/executor/method_meta.cpp
@@ -325,6 +325,42 @@ Result<int64_t> MethodMeta::memory_planned_buffer_size(size_t index) const {
   return s_plan_->non_const_buffer_sizes()->Get(index + 1);
 }
 
+Result<etensor::Device> MethodMeta::memory_planned_buffer_device(
+    size_t index) const {
+  auto num_buffers = this->num_memory_planned_buffers();
+  ET_CHECK_OR_RETURN_ERROR(
+      index < num_buffers,
+      InvalidArgument,
+      "index %zu out of range. num_buffers: %zu",
+      index,
+      num_buffers);
+
+  // The non_const_buffer_device field is optional and only present when the
+  // program contains non-CPU buffers. For CPU-only programs (or legacy PTE
+  // files), this field is null and all buffers default to CPU.
+  auto* buffer_devices = s_plan_->non_const_buffer_device();
+  if (buffer_devices == nullptr) {
+    return etensor::Device{etensor::DeviceType::CPU, 0};
+  }
+
+  // The sparse list only contains entries for non-CPU buffers.
+  // buffer_idx uses the same indexing as non_const_buffer_sizes (1-based,
+  // with index 0 reserved). The user-facing index is 0-based, so we
+  // compare against index + 1.
+  const auto internal_idx = static_cast<int32_t>(index + 1);
+  for (size_t i = 0; i < buffer_devices->size(); ++i) {
+    auto entry = buffer_devices->Get(i);
+    if (entry->buffer_idx() == internal_idx) {
+      return etensor::Device{
+          static_cast<etensor::DeviceType>(entry->device_type()),
+          static_cast<etensor::DeviceIndex>(entry->device_index())};
+    }
+  }
+
+  // Not found in the sparse list — this buffer is on CPU.
+  return etensor::Device{etensor::DeviceType::CPU, 0};
+}
+
 bool MethodMeta::uses_backend(const char* backend_name) const {
   ET_CHECK_MSG(backend_name, "backend name is null");
   const auto delegates = s_plan_->delegates();
diff --git a/runtime/executor/method_meta.h b/runtime/executor/method_meta.h
index 79fd05c28ee..e0fa16cda22 100644
--- a/runtime/executor/method_meta.h
+++ b/runtime/executor/method_meta.h
@@ -9,6 +9,7 @@
 #pragma once
 
 #include <executorch/runtime/core/exec_aten/exec_aten.h>
+#include <executorch/runtime/core/portable_type/device.h>
 #include <executorch/runtime/core/result.h>
 #include <executorch/runtime/core/span.h>
 #include <executorch/runtime/core/tag.h>
@@ -234,6 +235,19 @@ class MethodMeta final {
    */
   Result<int64_t> memory_planned_buffer_size(size_t index) const;
 
+  /**
+   * Get the device placement for the specified memory-planned buffer.
+   *
+   * For CPU-only programs (no non_const_buffer_device in the PTE), all buffers
+   * default to Device{CPU, 0}. For programs with device annotations, returns
+   * the device type and index that the buffer should be allocated on.
+   *
+   * @param[in] index The index of the buffer to look up (0-based, same
+   *     indexing as memory_planned_buffer_size()).
+   * @returns The Device on success, or an error on failure.
+   */
+  Result<etensor::Device> memory_planned_buffer_device(size_t index) const;
+
   /**
    * Check to see if a backend is used in this method.
    *
diff --git a/runtime/executor/test/method_meta_test.cpp b/runtime/executor/test/method_meta_test.cpp
index e4ef2e72a85..4b2fdb26da2 100644
--- a/runtime/executor/test/method_meta_test.cpp
+++ b/runtime/executor/test/method_meta_test.cpp
@@ -74,6 +74,10 @@ class MethodMetaTest : public ::testing::Test {
   void SetUp() override {
     load_program(std::getenv("ET_MODULE_ADD_PATH"), "add");
     load_program(std::getenv("ET_MODULE_STATEFUL_PATH"), "stateful");
+    const char* device_path = std::getenv("ET_MODULE_ADD_WITH_DEVICE_PATH");
+    if (device_path != nullptr) {
+      load_program(device_path, "add_with_device");
+    }
   }
 
  private:
@@ -192,6 +196,27 @@ TEST_F(MethodMetaTest, MethodMetaAttribute) {
   ASSERT_EQ(bad_access.error(), Error::InvalidArgument);
 }
 
+TEST_F(MethodMetaTest, MemoryPlannedBufferDeviceDefaultsCpu) {
+  Result<MethodMeta> method_meta = programs_["add"]->method_meta("forward");
+  ASSERT_EQ(method_meta.error(), Error::Ok);
+
+  // CPU-only model: all buffers should default to CPU device.
+  size_t num_buffers = method_meta->num_memory_planned_buffers();
+  ASSERT_GT(num_buffers, 0);
+
+  for (size_t i = 0; i < num_buffers; ++i) {
+    auto device = method_meta->memory_planned_buffer_device(i);
+    ASSERT_TRUE(device.ok());
+    EXPECT_EQ(device->type(), executorch::runtime::etensor::DeviceType::CPU);
+    EXPECT_EQ(device->index(), 0);
+  }
+
+  // Out of range returns error.
+  EXPECT_EQ(
+      method_meta->memory_planned_buffer_device(num_buffers).error(),
+      Error::InvalidArgument);
+}
+
 TEST_F(MethodMetaTest, TensorInfoSizeOverflow) {
   // Create sizes that will cause overflow when multiplied
   std::vector<int32_t> overflow_sizes = {
@@ -214,3 +239,29 @@ TEST_F(MethodMetaTest, TensorInfoSizeOverflow) {
           executorch::aten::string_view{nullptr, 0}),
       "");
 }
+
+TEST_F(MethodMetaTest, MethodMetaBufferDeviceReturnsCudaForDeviceBuffer) {
+  ASSERT_NE(programs_.find("add_with_device"), programs_.end())
+      << "ET_MODULE_ADD_WITH_DEVICE_PATH env var not set";
+  Result<MethodMeta> method_meta =
+      programs_["add_with_device"]->method_meta("forward");
+  ASSERT_EQ(method_meta.error(), Error::Ok);
+
+  // ModuleAddWithDevice exports with enable_non_cpu_memory_planning=True.
+  // The model delegates add(a,b) to CUDA, producing:
+  //   non_const_buffer_sizes: [0, 48]  (index 0 reserved)
+  //   non_const_buffer_device: [{buffer_idx=1, device_type=CUDA, device_index=0}]
+  // So there is exactly 1 planned buffer (user-facing index 0), on CUDA.
+  ASSERT_EQ(method_meta->num_memory_planned_buffers(), 1);
+
+  // Buffer 0 should be CUDA device.
+  auto device = method_meta->memory_planned_buffer_device(0);
+  ASSERT_TRUE(device.ok());
+  EXPECT_EQ(device->type(), executorch::runtime::etensor::DeviceType::CUDA);
+  EXPECT_EQ(device->index(), 0);
+
+  // Out of range should return error.
+  EXPECT_EQ(
+      method_meta->memory_planned_buffer_device(1).error(),
+      Error::InvalidArgument);
+}
diff --git a/runtime/executor/test/targets.bzl b/runtime/executor/test/targets.bzl
index f4534aefdea..74ea9a8262d 100644
--- a/runtime/executor/test/targets.bzl
+++ b/runtime/executor/test/targets.bzl
@@ -178,7 +178,12 @@ def define_common_targets(is_fbcode = False):
                 "//executorch/runtime/executor:program",
                 "//executorch/extension/data_loader:file_data_loader",
             ],
-            env = modules_env,
+            env = dict(
+                modules_env,
+                **{
+                    "ET_MODULE_ADD_WITH_DEVICE_PATH": "$(location fbcode//executorch/test/models:exported_program_with_device_info[ModuleAddWithDevice.pte])",
+                }
+            ),
         )
 
         runtime.cxx_test(
diff --git a/test/models/export_program_with_device_info.py b/test/models/export_program_with_device_info.py
index 1abf73bfb73..246c41bb9f3 100644
--- a/test/models/export_program_with_device_info.py
+++ b/test/models/export_program_with_device_info.py
@@ -99,7 +99,12 @@ def main() -> None:
         compile_config=EdgeCompileConfig(_check_ir_validity=False),
     )
     lowered = edge.to_backend(_DeviceAwarePartitioner())
-    et_prog = lowered.to_executorch(ExecutorchBackendConfig(emit_stacktrace=False))
+    et_prog = lowered.to_executorch(
+        ExecutorchBackendConfig(
+            emit_stacktrace=False,
+            enable_non_cpu_memory_planning=True,
+        )
+    )
 
     os.makedirs(args.outdir, exist_ok=True)
     outfile = os.path.join(args.outdir, "ModuleAddWithDevice.pte")