diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index f28973fbf97..90ca194b812 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -386,10 +386,10 @@ jobs: fail-fast: false matrix: include: + # We don't run memory sanitizer as it provides many false positives + # for std - sanitizer: asan - sanitizer_flags: "-Zsanitizer=address -Zsanitize=leak" - - sanitizer: msan - sanitizer_flags: "-Zsanitizer=memory" + sanitizer_flags: "-Zsanitizer=address,leak" - sanitizer: tsan sanitizer_flags: "-Zsanitizer=thread" name: "Rust tests (${{ matrix.sanitizer }})" @@ -421,15 +421,16 @@ jobs: run: | rustup toolchain install $NIGHTLY_TOOLCHAIN rustup component add --toolchain $NIGHTLY_TOOLCHAIN rust-src rustfmt clippy llvm-tools-preview - export RUSTFLAGS="${RUSTFLAGS} ${{ matrix.sanitizer_flags }}" - name: Build tests with sanitizer run: | + RUSTFLAGS="${RUSTFLAGS} ${{ matrix.sanitizer_flags }}" \ cargo +$NIGHTLY_TOOLCHAIN build --locked --all-features \ --target x86_64-unknown-linux-gnu -Zbuild-std \ -p vortex-buffer -p vortex-fastlanes -p vortex-fsst -p vortex-alp -p vortex-array - name: Run tests with sanitizer run: | + RUSTFLAGS="${RUSTFLAGS} ${{ matrix.sanitizer_flags }}" \ cargo +$NIGHTLY_TOOLCHAIN nextest run --locked --all-features \ --target x86_64-unknown-linux-gnu --no-fail-fast -Zbuild-std \ -p vortex-buffer -p vortex-fastlanes -p vortex-fsst -p vortex-alp -p vortex-array @@ -440,6 +441,7 @@ jobs: # TODO(myrrc): remove --no-default-features once we make Mimalloc opt-in - name: Run vortex-ffi tests with sanitizer run: | + RUSTFLAGS="${RUSTFLAGS} ${{ matrix.sanitizer_flags }}" \ cargo +$NIGHTLY_TOOLCHAIN test --locked --no-default-features \ --target x86_64-unknown-linux-gnu --no-fail-fast -Zbuild-std \ -p vortex-ffi -- --no-capture @@ -452,7 +454,7 @@ jobs: # We don't run memory sanitizer as it's clang-only and provides many # false positives for Catch2 - sanitizer: asan - sanitizer_flags: "-Zsanitizer=address -Zsanitize=leak" + sanitizer_flags: "-Zsanitizer=address,leak" - sanitizer: tsan sanitizer_flags: "-Zsanitizer=thread" name: "Rust/C++ FFI tests (${{ matrix.sanitizer }})" @@ -486,15 +488,13 @@ jobs: run: | rustup toolchain install $NIGHTLY_TOOLCHAIN rustup component add --toolchain $NIGHTLY_TOOLCHAIN rust-src rustfmt clippy llvm-tools-preview - - # Export flags here so that rustfilt won't be built with sanitizers - export RUSTFLAGS="-A warnings -Cunsafe-allow-abi-mismatch=sanitizer \ - --cfg disable_loom --cfg vortex_nightly -C debuginfo=2 \ - -C opt-level=0 -C strip=none -Zexternal-clangrt \ - ${{ matrix.sanitizer_flags }}" - name: Build FFI library run: | # TODO(myrrc): remove --no-default-features + RUSTFLAGS="-A warnings -Cunsafe-allow-abi-mismatch=sanitizer \ + --cfg disable_loom --cfg vortex_nightly -C debuginfo=2 \ + -C opt-level=0 -C strip=none -Zexternal-clangrt \ + ${{ matrix.sanitizer_flags }}" \ cargo +$NIGHTLY_TOOLCHAIN build --locked --no-default-features \ --target x86_64-unknown-linux-gnu -Zbuild-std \ -p vortex-ffi diff --git a/vortex-ffi/cinclude/vortex.h b/vortex-ffi/cinclude/vortex.h index 7b6bf9e9a17..489f6af255c 100644 --- a/vortex-ffi/cinclude/vortex.h +++ b/vortex-ffi/cinclude/vortex.h @@ -114,6 +114,66 @@ typedef enum { DTYPE_FIXED_SIZE_LIST = 9, } vx_dtype_variant; +/** + * Equalities, inequalities, and boolean operations over possibly null values. + * For most operations, if either side is null, the result is null. + * VX_OPERATOR_KLEENE_AND, VX_OPERATOR_KLEENE_OR obey Kleene (three-valued) + * logic + */ +typedef enum { + /** + * Expressions are equal. + */ + VX_OPERATOR_EQ = 0, + /** + * Expressions are not equal. + */ + VX_OPERATOR_NOT_EQ = 1, + /** + * Expression is greater than another + */ + VX_OPERATOR_GT = 2, + /** + * Expression is greater or equal to another + */ + VX_OPERATOR_GTE = 3, + /** + * Expression is less than another + */ + VX_OPERATOR_LT = 4, + /** + * Expression is less or equal to another + */ + VX_OPERATOR_LTE = 5, + /** + * Boolean AND /\. + */ + VX_OPERATOR_KLEENE_AND = 6, + /** + * Boolean OR \/. + */ + VX_OPERATOR_KLEENE_OR = 7, + /** + * The sum of the arguments. + * Errors at runtime if the sum would overflow or underflow. + */ + VX_OPERATOR_ADD = 8, + /** + * The difference between the arguments. + * Errors at runtime if the sum would overflow or underflow. + * The result is null at any index where either input is null. + */ + VX_OPERATOR_SUB = 9, + /** + * Multiply two numbers + */ + VX_OPERATOR_MUL = 10, + /** + * Divide the left side by the right side + */ + VX_OPERATOR_DIV = 11, +} vx_binary_operator; + /** * Log levels for the Vortex library. */ @@ -297,6 +357,22 @@ typedef struct vx_dtype vx_dtype; */ typedef struct vx_error vx_error; +/** + * A node in a Vortex expression tree. + * + * Expressions represent scalar computations that can be performed on + * data. Each expression consists of an encoding (vtable), heap-allocated + * metadata, and child expressions. + * + * Unless stated explicitly, all expressions returned are owned and must + * be freed by the caller. + * Unless stated explicitly, if an operation on const vx_expression* is + * passed NULL, NULL is returned. + * Operations on expressions don't take ownership of input values, and so + * input values must be freed by the caller. + */ +typedef struct vx_expression vx_expression; + /** * A handle to a Vortex file encapsulating the footer and logic for instantiating a reader. */ @@ -478,6 +554,13 @@ const vx_string *vx_array_get_utf8(const vx_array *array, uint32_t index); */ const vx_binary *vx_array_get_binary(const vx_array *array, uint32_t index); +/** + * Apply the expression to the array, wrapping it with a ScalarFnArray. + * This operation takes constant time as it doesn't execute the underlying + * array. Executing the underlying array still takes O(n) time. + */ +const vx_array *vx_array_apply(const vx_array *array, const vx_expression *expression, vx_error **error); + /** * Free an owned [`vx_array_iterator`] object. */ @@ -677,6 +760,115 @@ void vx_error_free(vx_error *ptr); */ const vx_string *vx_error_get_message(const vx_error *error); +/** + * Free an owned [`vx_expression`] object. + */ +void vx_expression_free(vx_expression *ptr); + +/** + * Create a root expression. A root expression, applied to an array in + * vx_array_apply, takes the array itself as opposed to functions like + * vx_expression_column or vx_expression_select which take the array's parts. + * + * Example: + * + * const vx_array* array = ...; + * vx_expression* root = vx_expression_root(); + * const vx_error* error = NULL; + * vx_array* applied_array = vx_array_apply(array, root, &error); + * // array and applied_array are identical + * vx_array_free(applied_array); + * vx_expression_free(root); + * vx_array_free(array); + * + */ +vx_expression *vx_expression_root(void); + +/** + * Create an expression that selects (includes) specific fields from a child + * expression. Child expression must have a DTYPE_STRUCT dtype. Errors in + * vx_array_apply if the child expression doesn't have a specified field. + * + * Example: + * + * vx_expression* root = vx_expression_root(); + * const char* names[] = {"name", "age"}; + * vx_expression* select = vx_expression_select(names, 2, root); + * vx_expression_free(select); + * vx_expression_free(root); + * + */ +vx_expression *vx_expression_select(const char *const *names, size_t len, const vx_expression *child); + +/** + * Create an AND expression for multiple child expressions. + * If there are no input expressions, returns NULL + */ +vx_expression *vx_expression_and(const vx_expression *const *expressions, size_t len); + +/** + * Create an OR disjunction expression for multiple child expressions. + * If there are no input expressions, returns NULL; + */ +vx_expression *vx_expression_or(const vx_expression *const *expressions, size_t len); + +/** + * Create a binary expression for two expressions of form lhs OP rhs. + * If either input is NULL, returns NULL. + * + * Example for a binary sum: + * + * vx_expression* age = vx_expression_column("age"); + * vx_expression* height = vx_expression_column("height"); + * vx_expression* sum = vx_expression_binary(VX_OPERATOR_ADD, age, height); + * vx_expression_free(sum); + * vx_expression_free(height); + * vx_expression_free(age); + * + * Example for a binary equality function: + * + * vx_expression* vx_expression_eq( + * const vx_expression* lhs, + * const vx_expression* rhs + * ) { + * return vx_expression_binary(VX_OPERATOR_EQ, lhs, rhs); + * } + * + */ +vx_expression * +vx_expression_binary(vx_binary_operator operator_, const vx_expression *lhs, const vx_expression *rhs); + +/** + * Create a logical NOT of the child expression. + * + * Returns the logical negation of the input boolean expression. + */ +const vx_expression *vx_expression_not(const vx_expression *child); + +/** + * Create an expression that checks for null values. + * + * Returns a boolean array indicating which positions contain null values. + */ +vx_expression *vx_expression_is_null(const vx_expression *child); + +/** + * Create an expression that extracts a named field from a struct expression. + * Child expression must have a DTYPE_STRUCT dtype. + * Errors in vx_array_apply if the root array doesn't have a specified field. + * + * Accesses the specified field from the result of the child expression. + * Equivalent to select(&item, 1, child). + */ +vx_expression *vx_expression_get_item(const char *item, const vx_expression *child); + +/** + * Create an expression that checks if a value is contained in a list. + * + * Returns a boolean array indicating whether the value appears in each list. + */ +vx_expression *vx_expression_list_contains(const vx_expression *list, const vx_expression *value); + /** * Clone a borrowed [`vx_file`], returning an owned [`vx_file`]. * diff --git a/vortex-ffi/src/array.rs b/vortex-ffi/src/array.rs index 92a5bcba100..2516b4980cc 100644 --- a/vortex-ffi/src/array.rs +++ b/vortex-ffi/src/array.rs @@ -9,6 +9,7 @@ use vortex::array::DynArray; use vortex::array::ToCanonical; use vortex::dtype::half::f16; use vortex::error::VortexExpect; +use vortex::error::vortex_ensure; use vortex::error::vortex_err; use crate::arc_dyn_wrapper; @@ -16,6 +17,7 @@ use crate::binary::vx_binary; use crate::dtype::vx_dtype; use crate::error::try_or_default; use crate::error::vx_error; +use crate::expression::vx_expression; use crate::string::vx_string; arc_dyn_wrapper!( @@ -186,11 +188,30 @@ pub unsafe extern "C-unwind" fn vx_array_get_binary( } } +/// Apply the expression to the array, wrapping it with a ScalarFnArray. +/// This operation takes constant time as it doesn't execute the underlying +/// array. Executing the underlying array still takes O(n) time. +#[unsafe(no_mangle)] +pub unsafe extern "C" fn vx_array_apply( + array: *const vx_array, + expression: *const vx_expression, + error: *mut *mut vx_error, +) -> *const vx_array { + try_or_default(error, || { + vortex_ensure!(!array.is_null()); + vortex_ensure!(!expression.is_null()); + let array = vx_array::as_ref(array); + let expression = vx_expression::as_ref(expression); + Ok(vx_array::new(Arc::new(array.apply(expression)?))) + }) +} + #[cfg(test)] mod tests { use std::ptr; use vortex::array::IntoArray; + use vortex::array::arrays::BoolArray; use vortex::array::arrays::PrimitiveArray; use vortex::array::arrays::StructArray; use vortex::array::arrays::VarBinViewArray; @@ -199,12 +220,16 @@ mod tests { use vortex::buffer::buffer; #[cfg(not(miri))] use vortex::dtype::half::f16; + use vortex::expr::eq; + use vortex::expr::lit; + use vortex::expr::root; use crate::array::*; use crate::binary::vx_binary_free; use crate::dtype::vx_dtype_get_variant; use crate::dtype::vx_dtype_variant; use crate::error::vx_error_free; + use crate::expression::vx_expression_free; use crate::string::vx_string_free; #[test] @@ -424,6 +449,55 @@ mod tests { } } + #[test] + #[cfg_attr(miri, ignore)] + fn test_apply() { + let primitive = PrimitiveArray::new( + buffer![1i32, 2i32, 3i32, 3i32], + Validity::from_iter([true, false, true, true]), + ); + + unsafe { + let mut error = ptr::null_mut(); + + let res = vx_array_apply(ptr::null(), ptr::null(), &raw mut error); + assert!(res.is_null()); + assert!(!error.is_null()); + vx_error_free(error); + + let array = vx_array::new(primitive.into_array()); + + let res = vx_array_apply(array, ptr::null(), &raw mut error); + assert!(res.is_null()); + assert!(!error.is_null()); + vx_error_free(error); + + // Test with Vortex Rust-side expressions here, test C API for + // expressions in src/expressions.rs + let expression = eq(root(), lit(3i32)); + let expression = vx_expression::new(Box::new(expression)); + + let res = vx_array_apply(ptr::null(), expression, &raw mut error); + assert!(res.is_null()); + assert!(!error.is_null()); + vx_error_free(error); + + let res = vx_array_apply(array, expression, &raw mut error); + assert!(!res.is_null()); + assert!(error.is_null()); + { + let res = vx_array::as_ref(res); + let buffer = res.to_bool().to_bit_buffer(); + let expected = BoolArray::from_iter(vec![false, false, true, true]); + assert_eq!(buffer, expected.to_bit_buffer()); + } + vx_array_free(res); + + vx_expression_free(expression); + vx_array_free(array); + } + } + #[test] fn test_array_dtype_lifetime_pattern() { let array = { diff --git a/vortex-ffi/src/expression.rs b/vortex-ffi/src/expression.rs new file mode 100644 index 00000000000..ed2ae0ad999 --- /dev/null +++ b/vortex-ffi/src/expression.rs @@ -0,0 +1,551 @@ +// SPDX-License-Identifier: Apache-2.0 +// SPDX-FileCopyrightText: Copyright the Vortex contributors +#![allow(non_camel_case_types)] + +use std::ffi::CStr; +use std::ffi::c_char; +use std::ptr; +use std::slice; +use std::sync::Arc; + +use vortex::dtype::FieldName; +use vortex::expr::Expression; +use vortex::expr::and_collect; +use vortex::expr::get_item; +use vortex::expr::is_null; +use vortex::expr::list_contains; +use vortex::expr::not; +use vortex::expr::or_collect; +use vortex::expr::root; +use vortex::expr::select; +use vortex::scalar_fn::ScalarFnVTableExt; +use vortex::scalar_fn::fns::binary::Binary; +use vortex::scalar_fn::fns::operators::Operator; + +// Expressions are Arc'ed inside +crate::box_wrapper!( + /// A node in a Vortex expression tree. + /// + /// Expressions represent scalar computations that can be performed on + /// data. Each expression consists of an encoding (vtable), heap-allocated + /// metadata, and child expressions. + /// + /// Unless stated explicitly, all expressions returned are owned and must + /// be freed by the caller. + /// Unless stated explicitly, if an operation on const vx_expression* is + /// passed NULL, NULL is returned. + /// Operations on expressions don't take ownership of input values, and so + /// input values must be freed by the caller. + Expression, + vx_expression); + +/// Create a root expression. A root expression, applied to an array in +/// vx_array_apply, takes the array itself as opposed to functions like +/// vx_expression_column or vx_expression_select which take the array's parts. +/// +/// Example: +/// +/// const vx_array* array = ...; +/// vx_expression* root = vx_expression_root(); +/// const vx_error* error = NULL; +/// vx_array* applied_array = vx_array_apply(array, root, &error); +/// // array and applied_array are identical +/// vx_array_free(applied_array); +/// vx_expression_free(root); +/// vx_array_free(array); +/// +#[unsafe(no_mangle)] +pub unsafe extern "C" fn vx_expression_root() -> *mut vx_expression { + vx_expression::new(Box::new(root())) +} + +/// Create an expression that selects (includes) specific fields from a child +/// expression. Child expression must have a DTYPE_STRUCT dtype. Errors in +/// vx_array_apply if the child expression doesn't have a specified field. +/// +/// Example: +/// +/// vx_expression* root = vx_expression_root(); +/// const char* names[] = {"name", "age"}; +/// vx_expression* select = vx_expression_select(names, 2, root); +/// vx_expression_free(select); +/// vx_expression_free(root); +/// +#[unsafe(no_mangle)] +pub unsafe extern "C" fn vx_expression_select( + names: *const *const c_char, + len: usize, + child: *const vx_expression, +) -> *mut vx_expression { + if child.is_null() { + return ptr::null_mut(); + } + + #[expect(clippy::expect_used)] + let names: Vec = (0..len) + .map(|i| unsafe { + let name = *names.offset(i.try_into().expect("pointer offset overflow")); + let name = CStr::from_ptr(name) + .to_str() + .expect("converting pointer to str"); + let name: Arc = Arc::from(name); + name.into() + }) + .collect(); + + let expr = select(names, vx_expression::as_ref(child).clone()); + vx_expression::new(Box::new(expr)) +} + +/// Create an AND expression for multiple child expressions. +/// If there are no input expressions, returns NULL +#[unsafe(no_mangle)] +pub unsafe extern "C" fn vx_expression_and( + expressions: *const *const vx_expression, + len: usize, +) -> *mut vx_expression { + if expressions.is_null() { + return ptr::null_mut(); + } + let slice = unsafe { slice::from_raw_parts(expressions, len) }; + match and_collect(slice.iter().map(|x| vx_expression::as_ref(*x).clone())) { + Some(expr) => vx_expression::new(expr.into()), + None => ptr::null_mut(), + } +} + +/// Create an OR disjunction expression for multiple child expressions. +/// If there are no input expressions, returns NULL; +#[unsafe(no_mangle)] +pub unsafe extern "C" fn vx_expression_or( + expressions: *const *const vx_expression, + len: usize, +) -> *mut vx_expression { + if expressions.is_null() { + return ptr::null_mut(); + } + let slice = unsafe { slice::from_raw_parts(expressions, len) }; + match or_collect(slice.iter().map(|x| vx_expression::as_ref(*x).clone())) { + Some(expr) => vx_expression::new(expr.into()), + None => ptr::null_mut(), + } +} + +/// Equalities, inequalities, and boolean operations over possibly null values. +/// For most operations, if either side is null, the result is null. +/// VX_OPERATOR_KLEENE_AND, VX_OPERATOR_KLEENE_OR obey Kleene (three-valued) +/// logic +#[repr(C)] +pub enum vx_binary_operator { + /// Expressions are equal. + VX_OPERATOR_EQ = 0, + /// Expressions are not equal. + VX_OPERATOR_NOT_EQ = 1, + /// Expression is greater than another + VX_OPERATOR_GT = 2, + /// Expression is greater or equal to another + VX_OPERATOR_GTE = 3, + /// Expression is less than another + VX_OPERATOR_LT = 4, + /// Expression is less or equal to another + VX_OPERATOR_LTE = 5, + /// Boolean AND /\. + VX_OPERATOR_KLEENE_AND = 6, + /// Boolean OR \/. + VX_OPERATOR_KLEENE_OR = 7, + /// The sum of the arguments. + /// Errors at runtime if the sum would overflow or underflow. + VX_OPERATOR_ADD = 8, + /// The difference between the arguments. + /// Errors at runtime if the sum would overflow or underflow. + /// The result is null at any index where either input is null. + VX_OPERATOR_SUB = 9, + /// Multiply two numbers + VX_OPERATOR_MUL = 10, + /// Divide the left side by the right side + VX_OPERATOR_DIV = 11, +} + +impl From for Operator { + fn from(operator: vx_binary_operator) -> Self { + match operator { + vx_binary_operator::VX_OPERATOR_EQ => Operator::Eq, + vx_binary_operator::VX_OPERATOR_NOT_EQ => Operator::NotEq, + vx_binary_operator::VX_OPERATOR_GT => Operator::Gt, + vx_binary_operator::VX_OPERATOR_GTE => Operator::Gte, + vx_binary_operator::VX_OPERATOR_LT => Operator::Lt, + vx_binary_operator::VX_OPERATOR_LTE => Operator::Lte, + vx_binary_operator::VX_OPERATOR_KLEENE_AND => Operator::And, + vx_binary_operator::VX_OPERATOR_KLEENE_OR => Operator::Or, + vx_binary_operator::VX_OPERATOR_ADD => Operator::Add, + vx_binary_operator::VX_OPERATOR_SUB => Operator::Sub, + vx_binary_operator::VX_OPERATOR_MUL => Operator::Mul, + vx_binary_operator::VX_OPERATOR_DIV => Operator::Div, + } + } +} + +/// Create a binary expression for two expressions of form lhs OP rhs. +/// If either input is NULL, returns NULL. +/// +/// Example for a binary sum: +/// +/// vx_expression* age = vx_expression_column("age"); +/// vx_expression* height = vx_expression_column("height"); +/// vx_expression* sum = vx_expression_binary(VX_OPERATOR_ADD, age, height); +/// vx_expression_free(sum); +/// vx_expression_free(height); +/// vx_expression_free(age); +/// +/// Example for a binary equality function: +/// +/// vx_expression* vx_expression_eq( +/// const vx_expression* lhs, +/// const vx_expression* rhs +/// ) { +/// return vx_expression_binary(VX_OPERATOR_EQ, lhs, rhs); +/// } +/// +#[unsafe(no_mangle)] +pub unsafe extern "C" fn vx_expression_binary( + operator: vx_binary_operator, + lhs: *const vx_expression, + rhs: *const vx_expression, +) -> *mut vx_expression { + if lhs.is_null() { + return ptr::null_mut(); + } + if rhs.is_null() { + return ptr::null_mut(); + } + let lhs = vx_expression::as_ref(lhs).clone(); + let rhs = vx_expression::as_ref(rhs).clone(); + vx_expression::new(Box::new(Binary.new_expr(operator.into(), [lhs, rhs]))) +} + +/// Create a logical NOT of the child expression. +/// +/// Returns the logical negation of the input boolean expression. +#[unsafe(no_mangle)] +pub unsafe extern "C" fn vx_expression_not(child: *const vx_expression) -> *const vx_expression { + if child.is_null() { + return child; + } + vx_expression::new(not(vx_expression::as_ref(child).clone()).into()) +} + +/// Create an expression that checks for null values. +/// +/// Returns a boolean array indicating which positions contain null values. +#[unsafe(no_mangle)] +pub unsafe extern "C" fn vx_expression_is_null(child: *const vx_expression) -> *mut vx_expression { + if child.is_null() { + return ptr::null_mut(); + } + vx_expression::new(is_null(vx_expression::as_ref(child).clone()).into()) +} + +/// Create an expression that extracts a named field from a struct expression. +/// Child expression must have a DTYPE_STRUCT dtype. +/// Errors in vx_array_apply if the root array doesn't have a specified field. +/// +/// Accesses the specified field from the result of the child expression. +/// Equivalent to select(&item, 1, child). +#[unsafe(no_mangle)] +pub unsafe extern "C" fn vx_expression_get_item( + item: *const c_char, + child: *const vx_expression, +) -> *mut vx_expression { + if child.is_null() { + return ptr::null_mut(); + } + if item.is_null() { + return ptr::null_mut(); + } + #[expect(clippy::expect_used)] + let item = unsafe { + CStr::from_ptr(item) + .to_str() + .expect("converting pointer to str") + }; + let item: Arc = Arc::from(item); + let item: FieldName = item.into(); + vx_expression::new(get_item(item, vx_expression::as_ref(child).clone()).into()) +} + +/// Create an expression that checks if a value is contained in a list. +/// +/// Returns a boolean array indicating whether the value appears in each list. +#[unsafe(no_mangle)] +pub unsafe extern "C" fn vx_expression_list_contains( + list: *const vx_expression, + value: *const vx_expression, +) -> *mut vx_expression { + if list.is_null() { + return ptr::null_mut(); + } + if value.is_null() { + return ptr::null_mut(); + } + let list = vx_expression::as_ref(list).clone(); + let value = vx_expression::as_ref(value).clone(); + vx_expression::new(Box::new(list_contains(list, value))) +} + +#[cfg(test)] +mod tests { + use std::ptr; + use std::sync::Arc; + + use vortex::array::IntoArray; + use vortex::array::ToCanonical; + use vortex::array::arrays::BoolArray; + use vortex::array::arrays::ListArray; + use vortex::array::arrays::PrimitiveArray; + use vortex::array::arrays::StructArray; + use vortex::array::arrays::VarBinViewArray; + use vortex::array::validity::Validity; + use vortex::buffer::Buffer; + use vortex::buffer::buffer; + use vortex::expr::lit; + + use crate::array::vx_array; + use crate::array::vx_array_apply; + use crate::array::vx_array_free; + use crate::error::vx_error_free; + use crate::expression::vx_binary_operator; + use crate::expression::vx_expression; + use crate::expression::vx_expression_and; + use crate::expression::vx_expression_binary; + use crate::expression::vx_expression_free; + use crate::expression::vx_expression_get_item; + use crate::expression::vx_expression_list_contains; + use crate::expression::vx_expression_or; + use crate::expression::vx_expression_root; + use crate::expression::vx_expression_select; + + #[test] + #[cfg_attr(miri, ignore)] + fn test_root() { + unsafe { + let root = vx_expression_root(); + vx_expression_free(root); + } + } + + fn struct_array() -> (StructArray, VarBinViewArray, PrimitiveArray) { + let names_array = VarBinViewArray::from_iter_str(["Alice", "Bob", "Charlie"]); + let ages_buffer = buffer![30u8, 25u8, 35u8]; + let ages_array = PrimitiveArray::new(ages_buffer, Validity::NonNullable); + let fields = vec![ + names_array.clone().into_array(), + ages_array.clone().into_array(), + ]; + let names = ["name", "age"].into(); + let struct_array = StructArray::try_new(names, fields, 3, Validity::NonNullable); + (struct_array.unwrap(), names_array, ages_array) + } + + #[test] + #[cfg_attr(miri, ignore)] + fn test_get_item() { + let (array, names_array, ages_array) = struct_array(); + unsafe { + let root = vx_expression_root(); + let column = vx_expression_get_item(c"age".as_ptr(), root); + assert_ne!(column, ptr::null_mut()); + + let array = vx_array::new(Arc::new(array.into_array())); + let mut error = ptr::null_mut(); + + let applied_array = vx_array_apply(array, column, &raw mut error); + assert!(!applied_array.is_null()); + assert!(error.is_null()); + { + let applied_array = vx_array::as_ref(applied_array); + let expected: Buffer = ages_array.to_buffer(); + assert_eq!(applied_array.to_primitive().to_buffer(), expected); + } + vx_array_free(applied_array); + + vx_expression_free(column); + + let column = vx_expression_get_item(c"ololo".as_ptr(), root); + assert_ne!(column, ptr::null_mut()); + + let applied_array = vx_array_apply(array, column, &raw mut error); + assert!(applied_array.is_null()); + assert!(!error.is_null()); + vx_error_free(error); + + let names_array_vx = vx_array::new(Arc::new(names_array.into_array())); + let applied_array = vx_array_apply(names_array_vx, column, &raw mut error); + assert!(applied_array.is_null()); + assert!(!error.is_null()); + vx_error_free(error); + vx_array_free(names_array_vx); + + vx_expression_free(column); + + vx_array_free(array); + vx_expression_free(root); + } + } + + #[test] + #[cfg_attr(miri, ignore)] + fn test_select() { + let (array, ..) = struct_array(); + unsafe { + let root = vx_expression_root(); + + let array = vx_array::new(Arc::new(array.into_array())); + + let columns = [c"name".as_ptr(), c"age".as_ptr()]; + let column = vx_expression_select(columns.as_ptr(), 2, root); + assert_ne!(column, ptr::null_mut()); + + let mut error = ptr::null_mut(); + let applied_array = vx_array_apply(array, column, &raw mut error); + assert!(!applied_array.is_null()); + assert!(error.is_null()); + { + let array = vx_array::as_ref(array); + let applied_array = vx_array::as_ref(applied_array); + assert_eq!(applied_array.dtype(), array.dtype()); + } + vx_array_free(applied_array); + vx_expression_free(column); + + let columns = [c"age".as_ptr(), c"ololo".as_ptr()]; + let column = vx_expression_select(columns.as_ptr(), 2, root); + let applied_array = vx_array_apply(array, column, &raw mut error); + assert!(applied_array.is_null()); + assert!(!error.is_null()); + vx_error_free(error); + vx_expression_free(column); + + vx_array_free(array); + vx_expression_free(root); + } + } + + #[test] + #[cfg_attr(miri, ignore)] + fn test_and_or() { + let col1 = BoolArray::from_iter([true, false, true, true]); + let col2 = BoolArray::from_iter([false, true, true, false]); + let col3 = BoolArray::from_iter([false, true, true, true]); + let fields = vec![col1.into_array(), col2.into_array(), col3.into_array()]; + let names = ["col1", "col2", "col3"].into(); + let array = StructArray::try_new(names, fields, 4, Validity::NonNullable); + + unsafe { + let array = vx_array::new(Arc::new(array.unwrap().into_array())); + + let root = vx_expression_root(); + let expression_col1 = vx_expression_get_item(c"col1".as_ptr(), root); + let expression_col2 = vx_expression_get_item(c"col2".as_ptr(), root); + let expression_col3 = vx_expression_get_item(c"col3".as_ptr(), root); + let expression_12 = vx_expression_binary( + vx_binary_operator::VX_OPERATOR_EQ, + expression_col1, + expression_col2, + ); + let expression_23 = vx_expression_binary( + vx_binary_operator::VX_OPERATOR_EQ, + expression_col2, + expression_col3, + ); + + let expressions = [expression_12, expression_23]; + + let mut error = ptr::null_mut(); + let expressions_ptr = expressions.as_ptr() as *const *const vx_expression; + let expression_and123 = vx_expression_and(expressions_ptr, 2); + assert!(!expression_and123.is_null()); + let applied_array = vx_array_apply(array, expression_and123, &raw mut error); + assert!(error.is_null()); + assert!(!applied_array.is_null()); + { + let array = vx_array::as_ref(applied_array).to_bool(); + let expected = BoolArray::from_iter([false, false, true, false]); + assert_eq!(array.to_bit_buffer(), expected.to_bit_buffer()); + } + vx_expression_free(expression_and123); + vx_array_free(applied_array); + + let expression_or123 = vx_expression_or(expressions_ptr, 2); + assert!(!expression_or123.is_null()); + let applied_array = vx_array_apply(array, expression_or123, &raw mut error); + assert!(error.is_null()); + assert!(!applied_array.is_null()); + { + let array = vx_array::as_ref(applied_array).to_bool(); + let expected = BoolArray::from_iter([true, true, true, false]); + assert_eq!(array.to_bit_buffer(), expected.to_bit_buffer()); + } + vx_array_free(applied_array); + + vx_expression_free(expression_or123); + + vx_expression_free(expression_23); + vx_expression_free(expression_12); + vx_expression_free(expression_col3); + vx_expression_free(expression_col2); + vx_expression_free(expression_col1); + vx_expression_free(root); + + vx_array_free(array); + } + } + + #[test] + #[cfg_attr(miri, ignore)] + fn test_empty_and_or() { + unsafe { + let root = vx_expression_root(); + + let and_empty = vx_expression_and(ptr::null_mut(), 9); + assert!(and_empty.is_null()); + let or_empty = vx_expression_or(ptr::null_mut(), 9); + assert!(or_empty.is_null()); + + vx_expression_free(root); + } + } + + #[test] + #[cfg_attr(miri, ignore)] + fn test_list_contains() { + let elements = buffer![1i32, 2, 3, 4, 5].into_array(); + let offsets = buffer![0u32, 2, 5, 5].into_array(); + let array = ListArray::try_new(elements, offsets, Validity::NonNullable).unwrap(); + + unsafe { + let root = vx_expression_root(); + let array = vx_array::new(Arc::new(array.into_array())); + let expression_value = vx_expression::new(Box::new(lit(1))); + + let expression = vx_expression_list_contains(root, expression_value); + assert!(!expression.is_null()); + + let mut error = ptr::null_mut(); + let applied = vx_array_apply(array, expression, &raw mut error); + assert!(error.is_null()); + assert!(!applied.is_null()); + { + let applied = vx_array::as_ref(applied).to_bool(); + let expected = BoolArray::from_iter([true, false, false]); + assert_eq!(applied.to_bit_buffer(), expected.to_bit_buffer()); + } + vx_array_free(applied); + + vx_expression_free(expression_value); + vx_expression_free(expression); + vx_array_free(array); + + vx_expression_free(root); + } + } +} diff --git a/vortex-ffi/src/file.rs b/vortex-ffi/src/file.rs index ad21cf3ad3b..61e49b1d624 100644 --- a/vortex-ffi/src/file.rs +++ b/vortex-ffi/src/file.rs @@ -185,8 +185,10 @@ pub unsafe extern "C-unwind" fn vx_file_open_reader( .parse() .map_err(|e| vortex_err!("Failed to parse URI '{}': {}", uri_str, e))?; - let prop_keys = unsafe { to_string_vec(options.property_keys, options.property_len) }; - let prop_vals = unsafe { to_string_vec(options.property_vals, options.property_len) }; + let prop_keys = + unsafe { to_string_vec(options.property_keys, options.property_len as usize) }; + let prop_vals = + unsafe { to_string_vec(options.property_vals, options.property_len as usize) }; let object_store = make_object_store(&uri, &prop_keys, &prop_vals)?; diff --git a/vortex-ffi/src/lib.rs b/vortex-ffi/src/lib.rs index 1d97f29a01b..cad1d15c644 100644 --- a/vortex-ffi/src/lib.rs +++ b/vortex-ffi/src/lib.rs @@ -11,6 +11,7 @@ mod array_iterator; mod binary; mod dtype; mod error; +mod expression; mod file; mod log; mod macros; @@ -22,7 +23,6 @@ mod struct_fields; use std::ffi::CStr; use std::ffi::c_char; -use std::ffi::c_int; use std::sync::LazyLock; pub use log::vx_log_level; @@ -41,8 +41,11 @@ pub(crate) unsafe fn to_string(ptr: *const c_char) -> String { c_str.to_string_lossy().into_owned() } -pub(crate) unsafe fn to_string_vec(ptr: *const *const c_char, len: c_int) -> Vec { +pub(crate) unsafe fn to_string_vec(ptr: *const *const c_char, len: usize) -> Vec { + #[expect(clippy::expect_used)] (0..len) - .map(|i| unsafe { to_string(*ptr.offset(i as isize)) }) + .map(|i: usize| unsafe { + to_string(*ptr.offset(i.try_into().expect("pointer offset overflow"))) + }) .collect() }