diff --git a/runtime/core/test/targets.bzl b/runtime/core/test/targets.bzl index 0436d3e10dd..c4da8cc37de 100644 --- a/runtime/core/test/targets.bzl +++ b/runtime/core/test/targets.bzl @@ -50,7 +50,7 @@ def define_common_targets(): "//executorch/runtime/core:core", ], ) - + runtime.cxx_test( name = "event_tracer_test", srcs = [ diff --git a/runtime/executor/method_meta.cpp b/runtime/executor/method_meta.cpp index 75dadfd893a..ca1b3254338 100644 --- a/runtime/executor/method_meta.cpp +++ b/runtime/executor/method_meta.cpp @@ -325,6 +325,42 @@ Result MethodMeta::memory_planned_buffer_size(size_t index) const { return s_plan_->non_const_buffer_sizes()->Get(index + 1); } +Result MethodMeta::memory_planned_buffer_device( + size_t index) const { + auto num_buffers = this->num_memory_planned_buffers(); + ET_CHECK_OR_RETURN_ERROR( + index < num_buffers, + InvalidArgument, + "index %zu out of range. num_buffers: %zu", + index, + num_buffers); + + // The non_const_buffer_device field is optional and only present when the + // program contains non-CPU buffers. For CPU-only programs (or legacy PTE + // files), this field is null and all buffers default to CPU. + auto* buffer_devices = s_plan_->non_const_buffer_device(); + if (buffer_devices == nullptr) { + return etensor::Device{etensor::DeviceType::CPU, 0}; + } + + // The sparse list only contains entries for non-CPU buffers. + // buffer_idx uses the same indexing as non_const_buffer_sizes (1-based, + // with index 0 reserved). The user-facing index is 0-based, so we + // compare against index + 1. + const auto internal_idx = static_cast(index + 1); + for (size_t i = 0; i < buffer_devices->size(); ++i) { + auto entry = buffer_devices->Get(i); + if (entry->buffer_idx() == internal_idx) { + return etensor::Device{ + static_cast(entry->device_type()), + static_cast(entry->device_index())}; + } + } + + // Not found in the sparse list — this buffer is on CPU. + return etensor::Device{etensor::DeviceType::CPU, 0}; +} + bool MethodMeta::uses_backend(const char* backend_name) const { ET_CHECK_MSG(backend_name, "backend name is null"); const auto delegates = s_plan_->delegates(); diff --git a/runtime/executor/method_meta.h b/runtime/executor/method_meta.h index 79fd05c28ee..e0fa16cda22 100644 --- a/runtime/executor/method_meta.h +++ b/runtime/executor/method_meta.h @@ -9,6 +9,7 @@ #pragma once #include +#include #include #include #include @@ -234,6 +235,19 @@ class MethodMeta final { */ Result memory_planned_buffer_size(size_t index) const; + /** + * Get the device placement for the specified memory-planned buffer. + * + * For CPU-only programs (no non_const_buffer_device in the PTE), all buffers + * default to Device{CPU, 0}. For programs with device annotations, returns + * the device type and index that the buffer should be allocated on. + * + * @param[in] index The index of the buffer to look up (0-based, same + * indexing as memory_planned_buffer_size()). + * @returns The Device on success, or an error on failure. + */ + Result memory_planned_buffer_device(size_t index) const; + /** * Check to see if a backend is used in this method. * diff --git a/runtime/executor/test/method_meta_test.cpp b/runtime/executor/test/method_meta_test.cpp index e4ef2e72a85..4b2fdb26da2 100644 --- a/runtime/executor/test/method_meta_test.cpp +++ b/runtime/executor/test/method_meta_test.cpp @@ -74,6 +74,10 @@ class MethodMetaTest : public ::testing::Test { void SetUp() override { load_program(std::getenv("ET_MODULE_ADD_PATH"), "add"); load_program(std::getenv("ET_MODULE_STATEFUL_PATH"), "stateful"); + const char* device_path = std::getenv("ET_MODULE_ADD_WITH_DEVICE_PATH"); + if (device_path != nullptr) { + load_program(device_path, "add_with_device"); + } } private: @@ -192,6 +196,27 @@ TEST_F(MethodMetaTest, MethodMetaAttribute) { ASSERT_EQ(bad_access.error(), Error::InvalidArgument); } +TEST_F(MethodMetaTest, MemoryPlannedBufferDeviceDefaultsCpu) { + Result method_meta = programs_["add"]->method_meta("forward"); + ASSERT_EQ(method_meta.error(), Error::Ok); + + // CPU-only model: all buffers should default to CPU device. + size_t num_buffers = method_meta->num_memory_planned_buffers(); + ASSERT_GT(num_buffers, 0); + + for (size_t i = 0; i < num_buffers; ++i) { + auto device = method_meta->memory_planned_buffer_device(i); + ASSERT_TRUE(device.ok()); + EXPECT_EQ(device->type(), executorch::runtime::etensor::DeviceType::CPU); + EXPECT_EQ(device->index(), 0); + } + + // Out of range returns error. + EXPECT_EQ( + method_meta->memory_planned_buffer_device(num_buffers).error(), + Error::InvalidArgument); +} + TEST_F(MethodMetaTest, TensorInfoSizeOverflow) { // Create sizes that will cause overflow when multiplied std::vector overflow_sizes = { @@ -214,3 +239,29 @@ TEST_F(MethodMetaTest, TensorInfoSizeOverflow) { executorch::aten::string_view{nullptr, 0}), ""); } + +TEST_F(MethodMetaTest, MethodMetaBufferDeviceReturnsCudaForDeviceBuffer) { + ASSERT_NE(programs_.find("add_with_device"), programs_.end()) + << "ET_MODULE_ADD_WITH_DEVICE_PATH env var not set"; + Result method_meta = + programs_["add_with_device"]->method_meta("forward"); + ASSERT_EQ(method_meta.error(), Error::Ok); + + // ModuleAddWithDevice exports with enable_non_cpu_memory_planning=True. + // The model delegates add(a,b) to CUDA, producing: + // non_const_buffer_sizes: [0, 48] (index 0 reserved) + // non_const_buffer_device: [{buffer_idx=1, device_type=CUDA, device_index=0}] + // So there is exactly 1 planned buffer (user-facing index 0), on CUDA. + ASSERT_EQ(method_meta->num_memory_planned_buffers(), 1); + + // Buffer 0 should be CUDA device. + auto device = method_meta->memory_planned_buffer_device(0); + ASSERT_TRUE(device.ok()); + EXPECT_EQ(device->type(), executorch::runtime::etensor::DeviceType::CUDA); + EXPECT_EQ(device->index(), 0); + + // Out of range should return error. + EXPECT_EQ( + method_meta->memory_planned_buffer_device(1).error(), + Error::InvalidArgument); +} diff --git a/runtime/executor/test/targets.bzl b/runtime/executor/test/targets.bzl index f4534aefdea..74ea9a8262d 100644 --- a/runtime/executor/test/targets.bzl +++ b/runtime/executor/test/targets.bzl @@ -178,7 +178,12 @@ def define_common_targets(is_fbcode = False): "//executorch/runtime/executor:program", "//executorch/extension/data_loader:file_data_loader", ], - env = modules_env, + env = dict( + modules_env, + **{ + "ET_MODULE_ADD_WITH_DEVICE_PATH": "$(location fbcode//executorch/test/models:exported_program_with_device_info[ModuleAddWithDevice.pte])", + } + ), ) runtime.cxx_test( diff --git a/test/models/export_program_with_device_info.py b/test/models/export_program_with_device_info.py index 1abf73bfb73..246c41bb9f3 100644 --- a/test/models/export_program_with_device_info.py +++ b/test/models/export_program_with_device_info.py @@ -99,7 +99,12 @@ def main() -> None: compile_config=EdgeCompileConfig(_check_ir_validity=False), ) lowered = edge.to_backend(_DeviceAwarePartitioner()) - et_prog = lowered.to_executorch(ExecutorchBackendConfig(emit_stacktrace=False)) + et_prog = lowered.to_executorch( + ExecutorchBackendConfig( + emit_stacktrace=False, + enable_non_cpu_memory_planning=True, + ) + ) os.makedirs(args.outdir, exist_ok=True) outfile = os.path.join(args.outdir, "ModuleAddWithDevice.pte")