Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
33 changes: 33 additions & 0 deletions .github/workflows/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -132,6 +132,39 @@ jobs:
uv run --all-packages make html
working-directory: docs/

python-cuda-test:
name: "Python CUDA (test)"
if: github.repository == 'vortex-data/vortex'
runs-on: >-
${{ format('runs-on={0}/runner=gpu/tag=python-cuda-test', github.run_id) }}
timeout-minutes: 30
env:
RUST_LOG: "info,maturin=off,uv=debug"
MATURIN_PEP517_ARGS: "--profile ci"
steps:
- uses: runs-on/action@v2
with:
sccache: s3
- uses: actions/checkout@9c091bb21b7c1c1d1991bb908d89e4e9dddfe3e0 # v7
- uses: ./.github/actions/setup-rust
with:
repo-token: ${{ secrets.GITHUB_TOKEN }}
components: cargo
- name: Pin rustup proxy to repository toolchain
run: |
TOOLCHAIN="$(grep '^channel' rust-toolchain.toml | cut -d '"' -f 2)"
echo "RUSTUP_TOOLCHAIN=$TOOLCHAIN" >> "$GITHUB_ENV"
- name: Install uv
uses: spiraldb/actions/.github/actions/setup-uv@a746510eafaa926484c354541cfc49b2ec06cc63 # 0.18.6
with:
sync: false

- name: Pytest - PyVortex CUDA bridge
run: |
uv run --extra cuda \
Comment thread
0ax1 marked this conversation as resolved.
pytest --benchmark-disable ../vortex-python-cuda/test/test_native_bridge.py
working-directory: vortex-python/

rust-docs:
name: "Rust (docs)"
needs: duckdb-ready
Expand Down
7 changes: 7 additions & 0 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,7 @@ members = [
"vortex-ffi",
"fuzz",
"vortex-jni",
"vortex-python-abi",
"vortex-python",
"vortex-python-cuda",
"vortex-tui",
Expand Down
18 changes: 18 additions & 0 deletions vortex-python-abi/Cargo.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
[package]
name = "vortex-python-abi"
authors = { workspace = true }
categories = { workspace = true }
description = "Shared internal ABI types for Vortex Python extension modules."
edition = { workspace = true }
homepage = { workspace = true }
include = { workspace = true }
keywords = { workspace = true }
license = { workspace = true }
publish = false
readme = { workspace = true }
repository = { workspace = true }
rust-version = { workspace = true }
version = { workspace = true }

[lints]
workspace = true
46 changes: 46 additions & 0 deletions vortex-python-abi/src/lib.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
// SPDX-License-Identifier: Apache-2.0
// SPDX-FileCopyrightText: Copyright the Vortex contributors

//! Shared private ABI for Python buffer handoff between `vortex-data` extension modules.

use std::ffi::CStr;
use std::ffi::c_void;

/// Name used for PyCapsules carrying [`VortexBufferExport`] pointers.
pub const BUFFER_EXPORT_CAPSULE_NAME: &CStr = c"vortex_buffer_export";

/// Current version of the [`VortexBufferExport`] ABI.
pub const VORTEX_BUFFER_EXPORT_VERSION: u32 = 1;

/// Buffer kind for host-accessible buffers.
pub const VORTEX_BUFFER_HOST: u32 = 0;

/// Buffer kind for device-accessible buffers.
pub const VORTEX_BUFFER_DEVICE: u32 = 1;

/// C-ABI descriptor for passing buffers between `vortex-data` and optional extension modules.
///
/// This type is shared by Rust crates, but the values are exchanged through Python capsules between
/// independently compiled extension modules. The producer owns allocation details and must provide a
/// `release` callback that releases both `private_data` and the descriptor itself.
#[repr(C)]
pub struct VortexBufferExport {
/// ABI version. Consumers must reject unsupported versions.
pub version: u32,
/// Buffer kind. Consumers may support [`VORTEX_BUFFER_HOST`] or [`VORTEX_BUFFER_DEVICE`].
pub kind: u32,
/// Pointer to the first byte of the exported buffer, or null for empty buffers.
pub ptr: *const u8,
/// Length of the buffer in bytes.
pub len: usize,
/// Required byte alignment of `ptr`.
pub alignment: usize,
/// Device identifier for device buffers, or -1 for host buffers.
pub device_id: i32,
/// Optional synchronization event for device buffers.
pub sync_event: *mut c_void,
/// Producer-owned private data used by `release`.
pub private_data: *mut c_void,
/// Producer-owned release callback. It must release `private_data` and this descriptor.
pub release: Option<unsafe extern "C" fn(*mut VortexBufferExport)>,
}
2 changes: 2 additions & 0 deletions vortex-python-cuda/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,8 @@ extension-module = []

[dependencies]
arrow-schema = { workspace = true }
bytes = { workspace = true }
pyo3 = { workspace = true, features = ["abi3", "abi3-py311"] }
vortex = { workspace = true }
vortex-cuda = { workspace = true }
vortex-python-abi = { path = "../vortex-python-abi" }
14 changes: 7 additions & 7 deletions vortex-python-cuda/python/vortex_cuda/__init__.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,12 @@
# SPDX-License-Identifier: Apache-2.0
# SPDX-FileCopyrightText: Copyright the Vortex contributors
# pyright: reportMissingModuleSource=false, reportPrivateUsage=false

from ._lib import ( # pyright: ignore[reportMissingModuleSource]
_debug_array_metadata_dtype as _debug_array_metadata_dtype, # pyright: ignore[reportPrivateUsage]
)
from ._lib import ( # pyright: ignore[reportMissingModuleSource]
cuda_available,
export_device_array,
)
from . import _lib

_debug_array_metadata_dtype = _lib._debug_array_metadata_dtype
_debug_array_metadata_display_values = _lib._debug_array_metadata_display_values
cuda_available = _lib.cuda_available
export_device_array = _lib.export_device_array

__all__ = ["cuda_available", "export_device_array"]
1 change: 1 addition & 0 deletions vortex-python-cuda/python/vortex_cuda/_lib.pyi
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
# SPDX-FileCopyrightText: Copyright the Vortex contributors

def _debug_array_metadata_dtype(array: object) -> str: ...
def _debug_array_metadata_display_values(array: object) -> str: ...
def cuda_available() -> bool: ...
def export_device_array(
array: object, requested_schema: object | None = None, **kwargs: object
Expand Down
138 changes: 125 additions & 13 deletions vortex-python-cuda/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,6 @@ use vortex::buffer::ByteBuffer;
use vortex::dtype::DType;
use vortex::error::VortexError;
use vortex::error::VortexResult;
use vortex::error::vortex_bail;
use vortex::error::vortex_ensure;
use vortex::error::vortex_err;
use vortex::flatbuffers::FlatBuffer;
Expand All @@ -49,12 +48,115 @@ use vortex_cuda::arrow::ArrowDeviceArrayWithSchema;
use vortex_cuda::arrow::DeviceArrayExt;
use vortex_cuda::arrow::release_device_array;
use vortex_cuda::arrow::release_schema;
use vortex_python_abi::BUFFER_EXPORT_CAPSULE_NAME;
use vortex_python_abi::VORTEX_BUFFER_EXPORT_VERSION;
use vortex_python_abi::VORTEX_BUFFER_HOST;
use vortex_python_abi::VortexBufferExport;

const ARROW_SCHEMA_CAPSULE_NAME: &CStr = c_str!("arrow_schema");
const USED_ARROW_SCHEMA_CAPSULE_NAME: &CStr = c_str!("used_arrow_schema");
const ARROW_DEVICE_ARRAY_CAPSULE_NAME: &CStr = c_str!("arrow_device_array");
const USED_ARROW_DEVICE_ARRAY_CAPSULE_NAME: &CStr = c_str!("used_arrow_device_array");

struct BufferExportGuard {
export: NonNull<VortexBufferExport>,
}

impl BufferExportGuard {
fn export(&self) -> &VortexBufferExport {
unsafe { self.export.as_ref() }
}
}

impl AsRef<[u8]> for BufferExportGuard {
fn as_ref(&self) -> &[u8] {
let export = self.export();
if export.len == 0 {
&[]
} else {
unsafe { std::slice::from_raw_parts(export.ptr, export.len) }
}
}
}

impl Drop for BufferExportGuard {
fn drop(&mut self) {
// The producer's release callback owns cleanup of both private data and the descriptor.
let export = unsafe { self.export.as_ref() };
if let Some(release) = export.release {
unsafe { release(self.export.as_ptr()) };
}
}
}

// The guard is moved into `Bytes::from_owner`, which requires `Send + Sync`. After import we disable
// the source capsule destructor and own the C export until this guard is dropped.
unsafe impl Send for BufferExportGuard {}
unsafe impl Sync for BufferExportGuard {}

fn import_buffer_from_capsule(capsule: &Bound<'_, PyCapsule>) -> PyResult<BufferHandle> {
let export_ptr = capsule
.pointer_checked(Some(BUFFER_EXPORT_CAPSULE_NAME))?
.cast::<VortexBufferExport>();
let export = unsafe { export_ptr.as_ref() };

if export.version != VORTEX_BUFFER_EXPORT_VERSION {
return Err(PyValueError::new_err(format!(
"unsupported VortexBufferExport version {}",
export.version
)));
}
if export.kind != VORTEX_BUFFER_HOST {
return Err(PyValueError::new_err(format!(
"unsupported buffer kind {} (only host buffers are supported in metadata bridge)",
export.kind
)));
}

if export.len != 0 && export.ptr.is_null() {
return Err(PyValueError::new_err(
"non-empty VortexBufferExport has null data pointer",
));
}
if export.release.is_none() {
return Err(PyValueError::new_err(
"VortexBufferExport is missing a release callback",
));
}

let len = export.len;
let alignment = vortex::buffer::Alignment::try_from(
u32::try_from(export.alignment)
.map_err(|_| PyValueError::new_err("buffer alignment exceeds u32"))?,
)
.map_err(|e| PyValueError::new_err(e.to_string()))?;

if len != 0 && !alignment.is_ptr_aligned(export.ptr) {
return Err(PyValueError::new_err(format!(
"buffer pointer is not aligned to requested alignment {alignment}"
)));
}

// Transfer ownership of the boxed VortexBufferExport from the producer capsule into the Bytes
// owner below. Otherwise the producer capsule could be dropped before the reconstructed
// BufferHandle, leaving the Bytes owner with a dangling export pointer.
unsafe { ffi::PyCapsule_SetDestructor(capsule.as_ptr(), None) };
if PyErr::occurred(capsule.py()) {
return Err(PyErr::fetch(capsule.py()));
}

let guard = BufferExportGuard { export: export_ptr };

let byte_buffer = if len == 0 {
drop(guard);
ByteBuffer::empty_aligned(alignment)
} else {
ByteBuffer::from(bytes::Bytes::from_owner(guard)).aligned(alignment)
};

Ok(BufferHandle::new_host(byte_buffer))
}

struct ExportedDeviceArray(ArrowDeviceArrayWithSchema);

// The exported Arrow C Device structs own CPU-side metadata plus CUDA device pointers through their
Expand Down Expand Up @@ -101,7 +203,7 @@ struct ArrayMetadata {
dtype: Vec<u8>,
len: usize,
metadata: Vec<u8>,
buffer_count: usize,
buffers: Vec<BufferHandle>,
children: Vec<ArrayMetadata>,
}

Expand Down Expand Up @@ -147,6 +249,16 @@ fn parse_array_metadata(value: &Bound<'_, PyAny>) -> PyResult<ArrayMetadata> {
)));
}

let buffers = tuple
.get_item(4)?
.cast::<PyList>()?
.iter()
.map(|item| {
let capsule: Bound<'_, PyCapsule> = item.extract()?;
import_buffer_from_capsule(&capsule)
})
.collect::<PyResult<Vec<_>>>()?;

let children = tuple
.get_item(5)?
.cast::<PyList>()?
Expand All @@ -159,7 +271,7 @@ fn parse_array_metadata(value: &Bound<'_, PyAny>) -> PyResult<ArrayMetadata> {
dtype: tuple.get_item(1)?.extract()?,
len: tuple.get_item(2)?.extract()?,
metadata: tuple.get_item(3)?.extract()?,
buffer_count: tuple.get_item(4)?.extract()?,
buffers,
children,
})
}
Expand All @@ -173,14 +285,6 @@ fn deserialize_metadata_tree(
metadata: &ArrayMetadata,
session: &VortexSession,
) -> VortexResult<ArrayRef> {
if metadata.buffer_count != 0 {
vortex_bail!(
"metadata-only bridge cannot deserialize array {} with {} buffers yet",
metadata.encoding_id,
metadata.buffer_count
);
}

let dtype = dtype_from_metadata(metadata, session)?;
let children = metadata
.children
Expand All @@ -194,12 +298,11 @@ fn deserialize_metadata_tree(
.registry()
.find(&encoding_id)
.ok_or_else(|| vortex_err!("Unknown array encoding: {}", metadata.encoding_id))?;
let buffers: &[BufferHandle] = &[];
let decoded = plugin.deserialize(
&dtype,
metadata.len,
&metadata.metadata,
buffers,
&metadata.buffers,
&children,
session,
)?;
Expand Down Expand Up @@ -246,6 +349,14 @@ fn _debug_array_metadata_dtype(array: Bound<'_, PyAny>) -> PyResult<String> {
Ok(array.dtype().to_string())
}

/// Return array values after crossing the private vtable-metadata bridge.
#[pyfunction]
fn _debug_array_metadata_display_values(array: Bound<'_, PyAny>) -> PyResult<String> {
let metadata = extract_array_metadata(&array)?;
let array = deserialize_metadata_tree(&metadata, &METADATA_SESSION).map_err(to_py_err)?;
Ok(array.display_values().to_string())
}

/// Export a PyVortex array as Arrow C Device schema and array PyCapsules.
#[pyfunction]
#[pyo3(signature = (array, requested_schema = None, **kwargs))]
Expand Down Expand Up @@ -461,6 +572,7 @@ unsafe extern "C" fn release_device_array_capsule(capsule: *mut ffi::PyObject) {
fn _lib(m: &Bound<PyModule>) -> PyResult<()> {
m.add_function(wrap_pyfunction!(cuda_available, m)?)?;
m.add_function(wrap_pyfunction!(_debug_array_metadata_dtype, m)?)?;
m.add_function(wrap_pyfunction!(_debug_array_metadata_display_values, m)?)?;
m.add_function(wrap_pyfunction!(export_device_array, m)?)?;
Ok(())
}
Loading
Loading