From 98966bfe5d095986432571b3dbb5690edf37ed16 Mon Sep 17 00:00:00 2001 From: Alexander Droste Date: Mon, 29 Jun 2026 10:40:42 +0000 Subject: [PATCH] chore: Python CUDA bridge CI and buffer handoff ABI Add explicit GPU-runner CI coverage for the Python CUDA bridge through the vortex-data[cuda] optional-extra path. Extend the private metadata bridge to carry host buffer-export capsules instead of only a buffer count. The base Python package exports repr(C) VortexBufferExport descriptors, and vortex-python-cuda imports them into local BufferHandles before deserializing arrays through its own VortexSession. Tests now cover primitive, nullable, bool, and struct arrays across the bridge, plus the existing CUDA Arrow Device smoke path. Signed-off-by: Alexander Droste --- .github/workflows/ci.yml | 33 +++++ Cargo.lock | 7 + Cargo.toml | 1 + vortex-python-abi/Cargo.toml | 18 +++ vortex-python-abi/src/lib.rs | 46 ++++++ vortex-python-cuda/Cargo.toml | 2 + .../python/vortex_cuda/__init__.py | 14 +- .../python/vortex_cuda/_lib.pyi | 1 + vortex-python-cuda/src/lib.rs | 138 ++++++++++++++++-- vortex-python-cuda/test/test_native_bridge.py | 41 +++++- vortex-python/Cargo.toml | 1 + vortex-python/python/vortex/_lib/arrays.pyi | 2 +- vortex-python/src/arrays/mod.rs | 94 +++++++++++- 13 files changed, 370 insertions(+), 28 deletions(-) create mode 100644 vortex-python-abi/Cargo.toml create mode 100644 vortex-python-abi/src/lib.rs diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index a6ee49768c6..54a3960ae28 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -132,6 +132,39 @@ jobs: uv run --all-packages make html working-directory: docs/ + python-cuda-test: + name: "Python CUDA (test)" + if: github.repository == 'vortex-data/vortex' + runs-on: >- + ${{ format('runs-on={0}/runner=gpu/tag=python-cuda-test', github.run_id) }} + timeout-minutes: 30 + env: + RUST_LOG: "info,maturin=off,uv=debug" + MATURIN_PEP517_ARGS: "--profile ci" + steps: + - uses: runs-on/action@v2 + with: + sccache: s3 + - uses: actions/checkout@9c091bb21b7c1c1d1991bb908d89e4e9dddfe3e0 # v7 + - uses: ./.github/actions/setup-rust + with: + repo-token: ${{ secrets.GITHUB_TOKEN }} + components: cargo + - name: Pin rustup proxy to repository toolchain + run: | + TOOLCHAIN="$(grep '^channel' rust-toolchain.toml | cut -d '"' -f 2)" + echo "RUSTUP_TOOLCHAIN=$TOOLCHAIN" >> "$GITHUB_ENV" + - name: Install uv + uses: spiraldb/actions/.github/actions/setup-uv@a746510eafaa926484c354541cfc49b2ec06cc63 # 0.18.6 + with: + sync: false + + - name: Pytest - PyVortex CUDA bridge + run: | + uv run --extra cuda \ + pytest --benchmark-disable ../vortex-python-cuda/test/test_native_bridge.py + working-directory: vortex-python/ + rust-docs: name: "Rust (docs)" needs: duckdb-ready diff --git a/Cargo.lock b/Cargo.lock index 209b8a5b6be..09ba6496805 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -10335,17 +10335,24 @@ dependencies = [ "url", "vortex", "vortex-array", + "vortex-python-abi", "vortex-tui", ] +[[package]] +name = "vortex-python-abi" +version = "0.1.0" + [[package]] name = "vortex-python-cuda" version = "0.1.0" dependencies = [ "arrow-schema", + "bytes", "pyo3", "vortex", "vortex-cuda", + "vortex-python-abi", ] [[package]] diff --git a/Cargo.toml b/Cargo.toml index 876a0906e17..acab68656aa 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -36,6 +36,7 @@ members = [ "vortex-ffi", "fuzz", "vortex-jni", + "vortex-python-abi", "vortex-python", "vortex-python-cuda", "vortex-tui", diff --git a/vortex-python-abi/Cargo.toml b/vortex-python-abi/Cargo.toml new file mode 100644 index 00000000000..b56111d0b8c --- /dev/null +++ b/vortex-python-abi/Cargo.toml @@ -0,0 +1,18 @@ +[package] +name = "vortex-python-abi" +authors = { workspace = true } +categories = { workspace = true } +description = "Shared internal ABI types for Vortex Python extension modules." +edition = { workspace = true } +homepage = { workspace = true } +include = { workspace = true } +keywords = { workspace = true } +license = { workspace = true } +publish = false +readme = { workspace = true } +repository = { workspace = true } +rust-version = { workspace = true } +version = { workspace = true } + +[lints] +workspace = true diff --git a/vortex-python-abi/src/lib.rs b/vortex-python-abi/src/lib.rs new file mode 100644 index 00000000000..27dc918e56d --- /dev/null +++ b/vortex-python-abi/src/lib.rs @@ -0,0 +1,46 @@ +// SPDX-License-Identifier: Apache-2.0 +// SPDX-FileCopyrightText: Copyright the Vortex contributors + +//! Shared private ABI for Python buffer handoff between `vortex-data` extension modules. + +use std::ffi::CStr; +use std::ffi::c_void; + +/// Name used for PyCapsules carrying [`VortexBufferExport`] pointers. +pub const BUFFER_EXPORT_CAPSULE_NAME: &CStr = c"vortex_buffer_export"; + +/// Current version of the [`VortexBufferExport`] ABI. +pub const VORTEX_BUFFER_EXPORT_VERSION: u32 = 1; + +/// Buffer kind for host-accessible buffers. +pub const VORTEX_BUFFER_HOST: u32 = 0; + +/// Buffer kind for device-accessible buffers. +pub const VORTEX_BUFFER_DEVICE: u32 = 1; + +/// C-ABI descriptor for passing buffers between `vortex-data` and optional extension modules. +/// +/// This type is shared by Rust crates, but the values are exchanged through Python capsules between +/// independently compiled extension modules. The producer owns allocation details and must provide a +/// `release` callback that releases both `private_data` and the descriptor itself. +#[repr(C)] +pub struct VortexBufferExport { + /// ABI version. Consumers must reject unsupported versions. + pub version: u32, + /// Buffer kind. Consumers may support [`VORTEX_BUFFER_HOST`] or [`VORTEX_BUFFER_DEVICE`]. + pub kind: u32, + /// Pointer to the first byte of the exported buffer, or null for empty buffers. + pub ptr: *const u8, + /// Length of the buffer in bytes. + pub len: usize, + /// Required byte alignment of `ptr`. + pub alignment: usize, + /// Device identifier for device buffers, or -1 for host buffers. + pub device_id: i32, + /// Optional synchronization event for device buffers. + pub sync_event: *mut c_void, + /// Producer-owned private data used by `release`. + pub private_data: *mut c_void, + /// Producer-owned release callback. It must release `private_data` and this descriptor. + pub release: Option, +} diff --git a/vortex-python-cuda/Cargo.toml b/vortex-python-cuda/Cargo.toml index d4e0b7a4b6e..b866d1a0944 100644 --- a/vortex-python-cuda/Cargo.toml +++ b/vortex-python-cuda/Cargo.toml @@ -28,6 +28,8 @@ extension-module = [] [dependencies] arrow-schema = { workspace = true } +bytes = { workspace = true } pyo3 = { workspace = true, features = ["abi3", "abi3-py311"] } vortex = { workspace = true } vortex-cuda = { workspace = true } +vortex-python-abi = { path = "../vortex-python-abi" } diff --git a/vortex-python-cuda/python/vortex_cuda/__init__.py b/vortex-python-cuda/python/vortex_cuda/__init__.py index 8f066107a7c..c5d1610724e 100644 --- a/vortex-python-cuda/python/vortex_cuda/__init__.py +++ b/vortex-python-cuda/python/vortex_cuda/__init__.py @@ -1,12 +1,12 @@ # SPDX-License-Identifier: Apache-2.0 # SPDX-FileCopyrightText: Copyright the Vortex contributors +# pyright: reportMissingModuleSource=false, reportPrivateUsage=false -from ._lib import ( # pyright: ignore[reportMissingModuleSource] - _debug_array_metadata_dtype as _debug_array_metadata_dtype, # pyright: ignore[reportPrivateUsage] -) -from ._lib import ( # pyright: ignore[reportMissingModuleSource] - cuda_available, - export_device_array, -) +from . import _lib + +_debug_array_metadata_dtype = _lib._debug_array_metadata_dtype +_debug_array_metadata_display_values = _lib._debug_array_metadata_display_values +cuda_available = _lib.cuda_available +export_device_array = _lib.export_device_array __all__ = ["cuda_available", "export_device_array"] diff --git a/vortex-python-cuda/python/vortex_cuda/_lib.pyi b/vortex-python-cuda/python/vortex_cuda/_lib.pyi index 6b93fec0986..dad7628363d 100644 --- a/vortex-python-cuda/python/vortex_cuda/_lib.pyi +++ b/vortex-python-cuda/python/vortex_cuda/_lib.pyi @@ -2,6 +2,7 @@ # SPDX-FileCopyrightText: Copyright the Vortex contributors def _debug_array_metadata_dtype(array: object) -> str: ... +def _debug_array_metadata_display_values(array: object) -> str: ... def cuda_available() -> bool: ... def export_device_array( array: object, requested_schema: object | None = None, **kwargs: object diff --git a/vortex-python-cuda/src/lib.rs b/vortex-python-cuda/src/lib.rs index c4a8e585003..f93198cceb4 100644 --- a/vortex-python-cuda/src/lib.rs +++ b/vortex-python-cuda/src/lib.rs @@ -35,7 +35,6 @@ use vortex::buffer::ByteBuffer; use vortex::dtype::DType; use vortex::error::VortexError; use vortex::error::VortexResult; -use vortex::error::vortex_bail; use vortex::error::vortex_ensure; use vortex::error::vortex_err; use vortex::flatbuffers::FlatBuffer; @@ -49,12 +48,115 @@ use vortex_cuda::arrow::ArrowDeviceArrayWithSchema; use vortex_cuda::arrow::DeviceArrayExt; use vortex_cuda::arrow::release_device_array; use vortex_cuda::arrow::release_schema; +use vortex_python_abi::BUFFER_EXPORT_CAPSULE_NAME; +use vortex_python_abi::VORTEX_BUFFER_EXPORT_VERSION; +use vortex_python_abi::VORTEX_BUFFER_HOST; +use vortex_python_abi::VortexBufferExport; const ARROW_SCHEMA_CAPSULE_NAME: &CStr = c_str!("arrow_schema"); const USED_ARROW_SCHEMA_CAPSULE_NAME: &CStr = c_str!("used_arrow_schema"); const ARROW_DEVICE_ARRAY_CAPSULE_NAME: &CStr = c_str!("arrow_device_array"); const USED_ARROW_DEVICE_ARRAY_CAPSULE_NAME: &CStr = c_str!("used_arrow_device_array"); +struct BufferExportGuard { + export: NonNull, +} + +impl BufferExportGuard { + fn export(&self) -> &VortexBufferExport { + unsafe { self.export.as_ref() } + } +} + +impl AsRef<[u8]> for BufferExportGuard { + fn as_ref(&self) -> &[u8] { + let export = self.export(); + if export.len == 0 { + &[] + } else { + unsafe { std::slice::from_raw_parts(export.ptr, export.len) } + } + } +} + +impl Drop for BufferExportGuard { + fn drop(&mut self) { + // The producer's release callback owns cleanup of both private data and the descriptor. + let export = unsafe { self.export.as_ref() }; + if let Some(release) = export.release { + unsafe { release(self.export.as_ptr()) }; + } + } +} + +// The guard is moved into `Bytes::from_owner`, which requires `Send + Sync`. After import we disable +// the source capsule destructor and own the C export until this guard is dropped. +unsafe impl Send for BufferExportGuard {} +unsafe impl Sync for BufferExportGuard {} + +fn import_buffer_from_capsule(capsule: &Bound<'_, PyCapsule>) -> PyResult { + let export_ptr = capsule + .pointer_checked(Some(BUFFER_EXPORT_CAPSULE_NAME))? + .cast::(); + let export = unsafe { export_ptr.as_ref() }; + + if export.version != VORTEX_BUFFER_EXPORT_VERSION { + return Err(PyValueError::new_err(format!( + "unsupported VortexBufferExport version {}", + export.version + ))); + } + if export.kind != VORTEX_BUFFER_HOST { + return Err(PyValueError::new_err(format!( + "unsupported buffer kind {} (only host buffers are supported in metadata bridge)", + export.kind + ))); + } + + if export.len != 0 && export.ptr.is_null() { + return Err(PyValueError::new_err( + "non-empty VortexBufferExport has null data pointer", + )); + } + if export.release.is_none() { + return Err(PyValueError::new_err( + "VortexBufferExport is missing a release callback", + )); + } + + let len = export.len; + let alignment = vortex::buffer::Alignment::try_from( + u32::try_from(export.alignment) + .map_err(|_| PyValueError::new_err("buffer alignment exceeds u32"))?, + ) + .map_err(|e| PyValueError::new_err(e.to_string()))?; + + if len != 0 && !alignment.is_ptr_aligned(export.ptr) { + return Err(PyValueError::new_err(format!( + "buffer pointer is not aligned to requested alignment {alignment}" + ))); + } + + // Transfer ownership of the boxed VortexBufferExport from the producer capsule into the Bytes + // owner below. Otherwise the producer capsule could be dropped before the reconstructed + // BufferHandle, leaving the Bytes owner with a dangling export pointer. + unsafe { ffi::PyCapsule_SetDestructor(capsule.as_ptr(), None) }; + if PyErr::occurred(capsule.py()) { + return Err(PyErr::fetch(capsule.py())); + } + + let guard = BufferExportGuard { export: export_ptr }; + + let byte_buffer = if len == 0 { + drop(guard); + ByteBuffer::empty_aligned(alignment) + } else { + ByteBuffer::from(bytes::Bytes::from_owner(guard)).aligned(alignment) + }; + + Ok(BufferHandle::new_host(byte_buffer)) +} + struct ExportedDeviceArray(ArrowDeviceArrayWithSchema); // The exported Arrow C Device structs own CPU-side metadata plus CUDA device pointers through their @@ -101,7 +203,7 @@ struct ArrayMetadata { dtype: Vec, len: usize, metadata: Vec, - buffer_count: usize, + buffers: Vec, children: Vec, } @@ -147,6 +249,16 @@ fn parse_array_metadata(value: &Bound<'_, PyAny>) -> PyResult { ))); } + let buffers = tuple + .get_item(4)? + .cast::()? + .iter() + .map(|item| { + let capsule: Bound<'_, PyCapsule> = item.extract()?; + import_buffer_from_capsule(&capsule) + }) + .collect::>>()?; + let children = tuple .get_item(5)? .cast::()? @@ -159,7 +271,7 @@ fn parse_array_metadata(value: &Bound<'_, PyAny>) -> PyResult { dtype: tuple.get_item(1)?.extract()?, len: tuple.get_item(2)?.extract()?, metadata: tuple.get_item(3)?.extract()?, - buffer_count: tuple.get_item(4)?.extract()?, + buffers, children, }) } @@ -173,14 +285,6 @@ fn deserialize_metadata_tree( metadata: &ArrayMetadata, session: &VortexSession, ) -> VortexResult { - if metadata.buffer_count != 0 { - vortex_bail!( - "metadata-only bridge cannot deserialize array {} with {} buffers yet", - metadata.encoding_id, - metadata.buffer_count - ); - } - let dtype = dtype_from_metadata(metadata, session)?; let children = metadata .children @@ -194,12 +298,11 @@ fn deserialize_metadata_tree( .registry() .find(&encoding_id) .ok_or_else(|| vortex_err!("Unknown array encoding: {}", metadata.encoding_id))?; - let buffers: &[BufferHandle] = &[]; let decoded = plugin.deserialize( &dtype, metadata.len, &metadata.metadata, - buffers, + &metadata.buffers, &children, session, )?; @@ -246,6 +349,14 @@ fn _debug_array_metadata_dtype(array: Bound<'_, PyAny>) -> PyResult { Ok(array.dtype().to_string()) } +/// Return array values after crossing the private vtable-metadata bridge. +#[pyfunction] +fn _debug_array_metadata_display_values(array: Bound<'_, PyAny>) -> PyResult { + let metadata = extract_array_metadata(&array)?; + let array = deserialize_metadata_tree(&metadata, &METADATA_SESSION).map_err(to_py_err)?; + Ok(array.display_values().to_string()) +} + /// Export a PyVortex array as Arrow C Device schema and array PyCapsules. #[pyfunction] #[pyo3(signature = (array, requested_schema = None, **kwargs))] @@ -461,6 +572,7 @@ unsafe extern "C" fn release_device_array_capsule(capsule: *mut ffi::PyObject) { fn _lib(m: &Bound) -> PyResult<()> { m.add_function(wrap_pyfunction!(cuda_available, m)?)?; m.add_function(wrap_pyfunction!(_debug_array_metadata_dtype, m)?)?; + m.add_function(wrap_pyfunction!(_debug_array_metadata_display_values, m)?)?; m.add_function(wrap_pyfunction!(export_device_array, m)?)?; Ok(()) } diff --git a/vortex-python-cuda/test/test_native_bridge.py b/vortex-python-cuda/test/test_native_bridge.py index 5c7b9bf5c76..c98e70a11d5 100644 --- a/vortex-python-cuda/test/test_native_bridge.py +++ b/vortex-python-cuda/test/test_native_bridge.py @@ -1,5 +1,6 @@ # SPDX-License-Identifier: Apache-2.0 # SPDX-FileCopyrightText: Copyright the Vortex contributors +# pyright: reportPrivateUsage=false import pytest import vortex_cuda @@ -10,14 +11,46 @@ def test_debug_array_metadata_dtype_reads_base_vortex_array(): array = vortex.Array.from_range(range(0, 3)) - assert vortex_cuda._debug_array_metadata_dtype(array) == str(array.dtype) # pyright: ignore[reportPrivateUsage] + assert vortex_cuda._debug_array_metadata_dtype(array) == str(array.dtype) -def test_metadata_bridge_reports_arrays_that_need_buffer_handoff(): +def test_metadata_bridge_primitive_array(): array = vortex.array([1, 2, 3]) - with pytest.raises(RuntimeError, match="metadata-only bridge.*buffers"): - _ = vortex_cuda._debug_array_metadata_dtype(array) # pyright: ignore[reportPrivateUsage] + assert vortex_cuda._debug_array_metadata_dtype(array) == str(array.dtype) + assert vortex_cuda._debug_array_metadata_display_values(array) == "[1i64, 2i64, 3i64]" + + +def test_metadata_bridge_nullable_array(): + array = vortex.array([1, None, 3]) + + assert vortex_cuda._debug_array_metadata_dtype(array) == str(array.dtype) + assert vortex_cuda._debug_array_metadata_display_values(array) == "[1i64, null, 3i64]" + + +def test_metadata_bridge_bool_array(): + array = vortex.array([True, False, True]) + + assert vortex_cuda._debug_array_metadata_dtype(array) == str(array.dtype) + assert vortex_cuda._debug_array_metadata_display_values(array) == "[true, false, true]" + + +def test_metadata_bridge_struct_with_children(): + import pyarrow as pa + + arrow_table = pa.table({"a": [1, 2, 3], "b": [4.0, 5.0, 6.0]}) + struct_array = vortex.Array.from_arrow( + pa.StructArray.from_arrays( # pyright: ignore[reportUnknownMemberType] + [arrow_table.column("a").combine_chunks(), arrow_table.column("b").combine_chunks()], + names=["a", "b"], + ) + ) + + assert vortex_cuda._debug_array_metadata_dtype(struct_array) == str(struct_array.dtype) + assert ( + vortex_cuda._debug_array_metadata_display_values(struct_array) + == "[{a: 1i64, b: 4f64}, {a: 2i64, b: 5f64}, {a: 3i64, b: 6f64}]" + ) def test_export_device_array_returns_capsules_or_clean_cuda_error(): diff --git a/vortex-python/Cargo.toml b/vortex-python/Cargo.toml index 0b648959821..491bff86e6a 100644 --- a/vortex-python/Cargo.toml +++ b/vortex-python/Cargo.toml @@ -52,6 +52,7 @@ pyo3-object_store = { workspace = true } tokio = { workspace = true, features = ["rt-multi-thread"], optional = true } url = { workspace = true } vortex = { workspace = true, features = ["object_store"] } +vortex-python-abi = { path = "../vortex-python-abi" } vortex-tui = { workspace = true, optional = true } [dev-dependencies] diff --git a/vortex-python/python/vortex/_lib/arrays.pyi b/vortex-python/python/vortex/_lib/arrays.pyi index 3e4cd0931a4..8da62190291 100644 --- a/vortex-python/python/vortex/_lib/arrays.pyi +++ b/vortex-python/python/vortex/_lib/arrays.pyi @@ -26,7 +26,7 @@ class Array: @staticmethod def from_range(obj: range, *, dtype: DType | None = None) -> Array: ... def to_arrow_array(self) -> pa.Array[pa.Scalar[pa.DataType]]: ... - def __vortex_array_metadata__(self) -> tuple[str, bytes, int, bytes, int, list[object]]: ... + def __vortex_array_metadata__(self) -> tuple[str, bytes, int, bytes, list[object], list[object]]: ... @property def id(self) -> str: ... @property diff --git a/vortex-python/src/arrays/mod.rs b/vortex-python/src/arrays/mod.rs index e79a0c0a620..e22e8d95955 100644 --- a/vortex-python/src/arrays/mod.rs +++ b/vortex-python/src/arrays/mod.rs @@ -9,15 +9,22 @@ mod native; pub(crate) mod py; mod range_to_sequence; +use std::ffi::c_void; +use std::ptr; +use std::ptr::NonNull; + use arrow_array::Array as ArrowArray; use arrow_array::ArrayRef as ArrowArrayRef; use pyo3::IntoPyObjectExt; use pyo3::exceptions::PyIndexError; +use pyo3::exceptions::PyNotImplementedError; +use pyo3::exceptions::PyRuntimeError; use pyo3::exceptions::PyTypeError; use pyo3::exceptions::PyValueError; use pyo3::intern; use pyo3::prelude::*; use pyo3::types::PyBytes; +use pyo3::types::PyCapsule; use pyo3::types::PyDict; use pyo3::types::PyList; use pyo3::types::PyRange; @@ -33,9 +40,11 @@ use vortex::array::arrays::Chunked; use vortex::array::arrays::bool::BoolArrayExt; use vortex::array::arrays::chunked::ChunkedArrayExt; use vortex::array::arrow::ArrowSessionExt; +use vortex::array::buffer::BufferHandle; use vortex::array::builtins::ArrayBuiltins; use vortex::array::match_each_integer_ptype; use vortex::array::session::ArraySessionExt; +use vortex::buffer::ByteBuffer; use vortex::dtype::DType; use vortex::dtype::Nullability; use vortex::dtype::PType; @@ -43,6 +52,10 @@ use vortex::flatbuffers::WriteFlatBufferExt; use vortex::ipc::messages::EncoderMessage; use vortex::ipc::messages::MessageEncoder; use vortex::scalar_fn::fns::operators::Operator; +use vortex_python_abi::BUFFER_EXPORT_CAPSULE_NAME; +use vortex_python_abi::VORTEX_BUFFER_EXPORT_VERSION; +use vortex_python_abi::VORTEX_BUFFER_HOST; +use vortex_python_abi::VortexBufferExport; use crate::PyVortex; use crate::arrays::native::PyNativeArray; @@ -60,6 +73,73 @@ use crate::scalar::PyScalar; use crate::serde::context::PyArrayContext; use crate::session::session; +fn export_buffer<'py>(py: Python<'py>, handle: &BufferHandle) -> PyResult> { + let Some(byte_buffer) = handle.as_host_opt() else { + return Err(PyNotImplementedError::new_err( + "Vortex Python CUDA buffer handoff only supports host buffers for now", + )); + }; + let byte_buffer = byte_buffer.clone(); + let ptr = byte_buffer.as_slice().as_ptr(); + let len = byte_buffer.as_slice().len(); + let alignment = usize::from(byte_buffer.alignment()); + let private_data = Box::into_raw(Box::new(byte_buffer)).cast::(); + + let export = VortexBufferExport { + version: VORTEX_BUFFER_EXPORT_VERSION, + kind: VORTEX_BUFFER_HOST, + ptr, + len, + alignment, + device_id: -1, + sync_event: ptr::null_mut(), + private_data, + release: Some(release_buffer_export), + }; + + let capsule_ptr = Box::into_raw(Box::new(export)).cast::(); + let capsule_ptr = NonNull::new(capsule_ptr) + .ok_or_else(|| PyRuntimeError::new_err("failed to allocate buffer export capsule"))?; + let capsule = unsafe { + PyCapsule::new_with_pointer_and_destructor( + py, + capsule_ptr, + BUFFER_EXPORT_CAPSULE_NAME, + Some(release_buffer_export_capsule), + ) + }; + match capsule { + Ok(capsule) => Ok(capsule), + Err(err) => { + let export = capsule_ptr.as_ptr().cast::(); + unsafe { release_buffer_export(export) }; + Err(err) + } + } +} + +unsafe extern "C" fn release_buffer_export(export: *mut VortexBufferExport) { + if export.is_null() { + return; + } + let mut export = unsafe { Box::from_raw(export) }; + let private = export.private_data; + if !private.is_null() { + drop(unsafe { Box::from_raw(private.cast::()) }); + export.private_data = ptr::null_mut(); + } +} + +unsafe extern "C" fn release_buffer_export_capsule(capsule: *mut pyo3::ffi::PyObject) { + let ptr = + unsafe { pyo3::ffi::PyCapsule_GetPointer(capsule, BUFFER_EXPORT_CAPSULE_NAME.as_ptr()) }; + if ptr.is_null() { + unsafe { pyo3::ffi::PyErr_Clear() }; + return; + } + unsafe { release_buffer_export(ptr.cast::()) }; +} + fn array_metadata_tuple<'py>( py: Python<'py>, array: &ArrayRef, @@ -71,6 +151,14 @@ fn array_metadata_tuple<'py>( )) })?; let dtype = array.dtype().write_flatbuffer_bytes()?; + + let buffers = array + .buffer_handles() + .iter() + .map(|handle| export_buffer(py, handle).map(|cap| cap.into_any())) + .collect::>>()?; + let buffers = PyList::new(py, buffers)?; + let children = array .children() .iter() @@ -85,7 +173,7 @@ fn array_metadata_tuple<'py>( PyBytes::new(py, dtype.as_slice()).into_any().into(), array.len().into_py_any(py)?, PyBytes::new(py, metadata.as_slice()).into_any().into(), - array.nbuffers().into_py_any(py)?, + buffers.into_any().into(), children.into_any().into(), ], ) @@ -412,8 +500,8 @@ impl PyArray { /// Export this array's vtable metadata tree for optional native extensions. /// - /// The returned private tuple intentionally excludes buffers; consumers must provide buffer - /// handles through a separate bridge before deserializing arrays that own physical buffers. + /// The returned private tuple includes vtable metadata plus private buffer-export capsules. + /// The capsule ABI is version-pinned between the `vortex-data` and `vortex-data-cuda` wheels. fn __vortex_array_metadata__<'py>( self_: &'py Bound<'py, Self>, ) -> PyVortexResult> {