From 8ebaae521887b872b1d5788b2354305d92be4421 Mon Sep 17 00:00:00 2001 From: Alexander Droste Date: Mon, 29 Jun 2026 16:26:50 +0000 Subject: [PATCH] chore: test CUDA Arrow Device capsule exports Signed-off-by: Alexander Droste --- .../python/vortex_cuda/__init__.py | 2 + .../python/vortex_cuda/__init__.pyi | 15 +++ .../python/vortex_cuda/_lib.pyi | 4 + vortex-python-cuda/src/lib.rs | 90 ++++++++++++++++ vortex-python-cuda/test/test_native_bridge.py | 100 ++++++++++++++++++ 5 files changed, 211 insertions(+) create mode 100644 vortex-python-cuda/python/vortex_cuda/__init__.pyi diff --git a/vortex-python-cuda/python/vortex_cuda/__init__.py b/vortex-python-cuda/python/vortex_cuda/__init__.py index c5d1610724e..2c07ad3c3c6 100644 --- a/vortex-python-cuda/python/vortex_cuda/__init__.py +++ b/vortex-python-cuda/python/vortex_cuda/__init__.py @@ -6,6 +6,8 @@ _debug_array_metadata_dtype = _lib._debug_array_metadata_dtype _debug_array_metadata_display_values = _lib._debug_array_metadata_display_values +_debug_arrow_device_array_capsule_summary = _lib._debug_arrow_device_array_capsule_summary +_debug_consume_arrow_device_array_capsules = _lib._debug_consume_arrow_device_array_capsules cuda_available = _lib.cuda_available export_device_array = _lib.export_device_array diff --git a/vortex-python-cuda/python/vortex_cuda/__init__.pyi b/vortex-python-cuda/python/vortex_cuda/__init__.pyi new file mode 100644 index 00000000000..170ca21cfb8 --- /dev/null +++ b/vortex-python-cuda/python/vortex_cuda/__init__.pyi @@ -0,0 +1,15 @@ +# SPDX-License-Identifier: Apache-2.0 +# SPDX-FileCopyrightText: Copyright the Vortex contributors + +def _debug_array_metadata_dtype(array: object) -> str: ... +def _debug_array_metadata_display_values(array: object) -> str: ... +def _debug_arrow_device_array_capsule_summary(schema: object, device_array: object) -> dict[str, object]: ... +def _debug_consume_arrow_device_array_capsules( + schema: object, device_array: object +) -> tuple[bool, bool, bool, bool, bool, bool]: ... +def cuda_available() -> bool: ... +def export_device_array( + array: object, requested_schema: object | None = None, **kwargs: object +) -> tuple[object, object]: ... + +__all__: list[str] diff --git a/vortex-python-cuda/python/vortex_cuda/_lib.pyi b/vortex-python-cuda/python/vortex_cuda/_lib.pyi index dad7628363d..51b4e100fe7 100644 --- a/vortex-python-cuda/python/vortex_cuda/_lib.pyi +++ b/vortex-python-cuda/python/vortex_cuda/_lib.pyi @@ -3,6 +3,10 @@ def _debug_array_metadata_dtype(array: object) -> str: ... def _debug_array_metadata_display_values(array: object) -> str: ... +def _debug_arrow_device_array_capsule_summary(schema: object, device_array: object) -> dict[str, object]: ... +def _debug_consume_arrow_device_array_capsules( + schema: object, device_array: object +) -> tuple[bool, bool, bool, bool, bool, bool]: ... def cuda_available() -> bool: ... def export_device_array( array: object, requested_schema: object | None = None, **kwargs: object diff --git a/vortex-python-cuda/src/lib.rs b/vortex-python-cuda/src/lib.rs index f93198cceb4..aed211eaf3b 100644 --- a/vortex-python-cuda/src/lib.rs +++ b/vortex-python-cuda/src/lib.rs @@ -461,6 +461,88 @@ fn release_exported(exported: &mut ArrowDeviceArrayWithSchema) { release_device_array(&mut exported.array); } +/// Return non-owning details from Arrow Device capsules for Python-side smoke consumers. +#[pyfunction] +fn _debug_arrow_device_array_capsule_summary<'py>( + py: Python<'py>, + schema: Bound<'py, PyCapsule>, + device_array: Bound<'py, PyCapsule>, +) -> PyResult> { + let schema = unsafe { + schema + .pointer_checked(Some(ARROW_SCHEMA_CAPSULE_NAME))? + .cast::() + .as_ref() + }; + let device_array = unsafe { + device_array + .pointer_checked(Some(ARROW_DEVICE_ARRAY_CAPSULE_NAME))? + .cast::() + .as_ref() + }; + + let summary = PyDict::new(py); + summary.set_item("schema_live", schema.release.is_some())?; + summary.set_item("array_live", device_array.array.release.is_some())?; + summary.set_item("is_cuda", device_array.device_type == ARROW_DEVICE_CUDA)?; + summary.set_item("device_type", device_array.device_type)?; + summary.set_item("device_id", device_array.device_id)?; + summary.set_item("length", device_array.array.length)?; + summary.set_item("null_count", device_array.array.null_count)?; + summary.set_item("n_buffers", device_array.array.n_buffers)?; + summary.set_item("n_children", device_array.array.n_children)?; + Ok(summary) +} + +/// Simulate a Python Arrow Device consumer taking ownership from the returned capsules. +#[pyfunction] +fn _debug_consume_arrow_device_array_capsules( + schema: Bound<'_, PyCapsule>, + device_array: Bound<'_, PyCapsule>, +) -> PyResult<(bool, bool, bool, bool, bool, bool)> { + let mut schema_ptr = schema + .pointer_checked(Some(ARROW_SCHEMA_CAPSULE_NAME))? + .cast::(); + let mut device_array_ptr = device_array + .pointer_checked(Some(ARROW_DEVICE_ARRAY_CAPSULE_NAME))? + .cast::(); + + let schema_ref = unsafe { schema_ptr.as_mut() }; + let device_array_ref = unsafe { device_array_ptr.as_mut() }; + let schema_had_release = schema_ref.release.is_some(); + let array_had_release = device_array_ref.array.release.is_some(); + + release_schema(schema_ref); + release_device_array(device_array_ref); + + let schema_release_cleared = schema_ref.release.is_none(); + let array_release_cleared = device_array_ref.array.release.is_none(); + + set_capsule_name(&schema, USED_ARROW_SCHEMA_CAPSULE_NAME)?; + set_capsule_name(&device_array, USED_ARROW_DEVICE_ARRAY_CAPSULE_NAME)?; + + Ok(( + schema_had_release, + array_had_release, + schema_release_cleared, + array_release_cleared, + capsule_is_valid(&schema, USED_ARROW_SCHEMA_CAPSULE_NAME), + capsule_is_valid(&device_array, USED_ARROW_DEVICE_ARRAY_CAPSULE_NAME), + )) +} + +fn set_capsule_name(capsule: &Bound<'_, PyCapsule>, name: &CStr) -> PyResult<()> { + let result = unsafe { ffi::PyCapsule_SetName(capsule.as_ptr(), name.as_ptr()) }; + if result != 0 { + return Err(PyErr::fetch(capsule.py())); + } + Ok(()) +} + +fn capsule_is_valid(capsule: &Bound<'_, PyCapsule>, name: &CStr) -> bool { + unsafe { ffi::PyCapsule_IsValid(capsule.as_ptr(), name.as_ptr()) == 1 } +} + fn schema_capsule<'py>( py: Python<'py>, schema: FFI_ArrowSchema, @@ -573,6 +655,14 @@ fn _lib(m: &Bound) -> PyResult<()> { m.add_function(wrap_pyfunction!(cuda_available, m)?)?; m.add_function(wrap_pyfunction!(_debug_array_metadata_dtype, m)?)?; m.add_function(wrap_pyfunction!(_debug_array_metadata_display_values, m)?)?; + m.add_function(wrap_pyfunction!( + _debug_arrow_device_array_capsule_summary, + m + )?)?; + m.add_function(wrap_pyfunction!( + _debug_consume_arrow_device_array_capsules, + m + )?)?; m.add_function(wrap_pyfunction!(export_device_array, m)?)?; Ok(()) } diff --git a/vortex-python-cuda/test/test_native_bridge.py b/vortex-python-cuda/test/test_native_bridge.py index c98e70a11d5..4b09cf8d1b8 100644 --- a/vortex-python-cuda/test/test_native_bridge.py +++ b/vortex-python-cuda/test/test_native_bridge.py @@ -2,12 +2,44 @@ # SPDX-FileCopyrightText: Copyright the Vortex contributors # pyright: reportPrivateUsage=false +import gc +from typing import cast + import pytest import vortex_cuda import vortex +def _require_cuda() -> None: + if not vortex_cuda.cuda_available(): + pytest.skip("CUDA device is not available") + + +def _assert_exported_device_array( + array: object, *, length: int, null_count: int, n_children: int +) -> tuple[object, object]: + schema, device_array = vortex_cuda.export_device_array(array) + summary = cast( + dict[str, object], + vortex_cuda._debug_arrow_device_array_capsule_summary( # pyright: ignore[reportAttributeAccessIssue, reportUnknownMemberType] + schema, device_array + ), + ) + + assert summary["schema_live"] is True + assert summary["array_live"] is True + assert summary["is_cuda"] is True + assert summary["length"] == length + assert summary["null_count"] == null_count + assert summary["n_children"] == n_children + n_buffers = summary["n_buffers"] + assert isinstance(n_buffers, int) + assert n_buffers >= 0 + + return schema, device_array + + def test_debug_array_metadata_dtype_reads_base_vortex_array(): array = vortex.Array.from_range(range(0, 3)) @@ -64,3 +96,71 @@ def test_export_device_array_returns_capsules_or_clean_cuda_error(): schema, device_array = vortex_cuda.export_device_array(array) assert type(schema).__name__ == "PyCapsule" assert type(device_array).__name__ == "PyCapsule" + + +def test_arrow_device_export_primitive_array(): + _require_cuda() + + _ = _assert_exported_device_array(vortex.array([1, 2, 3]), length=3, null_count=0, n_children=0) + + +def test_arrow_device_export_nullable_primitive_array(): + _require_cuda() + + _ = _assert_exported_device_array(vortex.array([1, None, 3]), length=3, null_count=1, n_children=0) + + +def test_arrow_device_export_nullable_bool_array(): + _require_cuda() + + _ = _assert_exported_device_array(vortex.array([True, None, False]), length=3, null_count=1, n_children=0) + + +def test_arrow_device_export_string_array(): + _require_cuda() + + _ = _assert_exported_device_array( + vortex.array(["alpha", "beta", "a longer string that should use the varbin data buffer"]), + length=3, + null_count=0, + n_children=0, + ) + + +def test_arrow_device_export_struct_array(): + import pyarrow as pa + + _require_cuda() + + arrow_table = pa.table({"a": [1, 2, 3], "b": [4.0, 5.0, 6.0]}) + struct_array = vortex.Array.from_arrow( + pa.StructArray.from_arrays( # pyright: ignore[reportUnknownMemberType] + [arrow_table.column("a").combine_chunks(), arrow_table.column("b").combine_chunks()], + names=["a", "b"], + ) + ) + + _ = _assert_exported_device_array(struct_array, length=3, null_count=0, n_children=2) + + +def test_arrow_device_capsules_drop_unconsumed(): + _require_cuda() + + schema, device_array = _assert_exported_device_array(vortex.array([1, 2, 3]), length=3, null_count=0, n_children=0) + del schema, device_array + _ = gc.collect() + + +def test_arrow_device_capsules_consumer_release_and_used_names(): + _require_cuda() + + schema, device_array = _assert_exported_device_array(vortex.array([1, 2, 3]), length=3, null_count=0, n_children=0) + consume_result = cast( + tuple[bool, bool, bool, bool, bool, bool], + vortex_cuda._debug_consume_arrow_device_array_capsules( # pyright: ignore[reportAttributeAccessIssue, reportUnknownMemberType] + schema, device_array + ), + ) + assert consume_result == (True, True, True, True, True, True) + del schema, device_array + _ = gc.collect()