From 0f0f11aec7e81b6fef51389fefbaf5b4849d823d Mon Sep 17 00:00:00 2001 From: Andrew Duffy Date: Tue, 17 Mar 2026 16:29:17 -0400 Subject: [PATCH 01/23] PatchedArray: basics and wiring Signed-off-by: Andrew Duffy --- vortex-array/src/arrays/mod.rs | 4 + vortex-array/src/arrays/patched/array.rs | 256 ++++++++++++ .../src/arrays/patched/compute/compare.rs | 273 ++++++++++++ .../src/arrays/patched/compute/filter.rs | 145 +++++++ .../src/arrays/patched/compute/mod.rs | 6 + .../src/arrays/patched/compute/rules.rs | 12 + vortex-array/src/arrays/patched/mod.rs | 75 ++++ .../src/arrays/patched/vtable/kernels.rs | 9 + vortex-array/src/arrays/patched/vtable/mod.rs | 389 ++++++++++++++++++ .../src/arrays/patched/vtable/operations.rs | 39 ++ .../src/arrays/patched/vtable/slice.rs | 183 ++++++++ vortex-buffer/src/buffer.rs | 28 ++ 12 files changed, 1419 insertions(+) create mode 100644 vortex-array/src/arrays/patched/array.rs create mode 100644 vortex-array/src/arrays/patched/compute/compare.rs create mode 100644 vortex-array/src/arrays/patched/compute/filter.rs create mode 100644 vortex-array/src/arrays/patched/compute/mod.rs create mode 100644 vortex-array/src/arrays/patched/compute/rules.rs create mode 100644 vortex-array/src/arrays/patched/mod.rs create mode 100644 vortex-array/src/arrays/patched/vtable/kernels.rs create mode 100644 vortex-array/src/arrays/patched/vtable/mod.rs create mode 100644 vortex-array/src/arrays/patched/vtable/operations.rs create mode 100644 vortex-array/src/arrays/patched/vtable/slice.rs diff --git a/vortex-array/src/arrays/mod.rs b/vortex-array/src/arrays/mod.rs index 43f8a84d49e..5abbcb84b85 100644 --- a/vortex-array/src/arrays/mod.rs +++ b/vortex-array/src/arrays/mod.rs @@ -66,6 +66,10 @@ pub mod null; pub use null::Null; pub use null::NullArray; +pub mod patched; +pub use patched::Patched; +pub use patched::PatchedArray; + pub mod primitive; pub use primitive::Primitive; pub use primitive::PrimitiveArray; diff --git a/vortex-array/src/arrays/patched/array.rs b/vortex-array/src/arrays/patched/array.rs new file mode 100644 index 00000000000..9f96f3c5c9f --- /dev/null +++ b/vortex-array/src/arrays/patched/array.rs @@ -0,0 +1,256 @@ +// SPDX-License-Identifier: Apache-2.0 +// SPDX-FileCopyrightText: Copyright the Vortex contributors + +use std::ops::Range; + +use vortex_buffer::Buffer; +use vortex_buffer::BufferMut; +use vortex_error::VortexResult; +use vortex_error::vortex_ensure; + +use crate::ArrayRef; +use crate::Canonical; +use crate::DynArray; +use crate::ExecutionCtx; +use crate::arrays::patched::PatchAccessor; +use crate::arrays::patched::TransposedPatches; +use crate::arrays::patched::patch_lanes; +use crate::buffer::BufferHandle; +use crate::dtype::IntegerPType; +use crate::dtype::NativePType; +use crate::dtype::PType; +use crate::match_each_native_ptype; +use crate::match_each_unsigned_integer_ptype; +use crate::patches::Patches; +use crate::stats::ArrayStats; + +/// An array that partially "patches" another array with new values. +/// +/// Patched arrays implement the set of nodes that do this instead here...I think? +#[derive(Debug, Clone)] +pub struct PatchedArray { + /// The inner array that is being patched. This is the zeroth child. + pub(super) inner: ArrayRef, + + /// Number of 1024-element chunks. Pre-computed for convenience. + pub(super) n_chunks: usize, + + /// Number of lanes the patch indices and values have been split into. Each of the `n_chunks` + /// of 1024 values is split into `n_lanes` lanes horizontally, each lane having 1024 / n_lanes + /// values that might be patched. + pub(super) n_lanes: usize, + + /// Offset into the first chunk + pub(super) offset: usize, + /// Total length. + pub(super) len: usize, + + /// lane offsets. The PType of these MUST be u32 + pub(super) lane_offsets: BufferHandle, + /// indices within a 1024-element chunk. The PType of these MUST be u16 + pub(super) indices: BufferHandle, + /// patch values corresponding to the indices. The ptype is specified by `values_ptype`. + pub(super) values: BufferHandle, + /// PType of the scalars in `values`. Can be any native type. + pub(super) values_ptype: PType, + + pub(super) stats_set: ArrayStats, +} + +impl PatchedArray { + pub fn from_array_and_patches( + inner: ArrayRef, + patches: &Patches, + ctx: &mut ExecutionCtx, + ) -> VortexResult { + vortex_ensure!( + inner.dtype().eq_with_nullability_superset(patches.dtype()), + "array DType must match patches DType" + ); + + let values_ptype = patches.dtype().as_ptype(); + + let TransposedPatches { + n_chunks, + n_lanes, + lane_offsets, + indices, + values, + } = transpose_patches(patches, ctx)?; + + let len = inner.len(); + + Ok(Self { + inner, + n_chunks, + n_lanes, + values_ptype, + offset: 0, + len, + lane_offsets: BufferHandle::new_host(lane_offsets), + indices: BufferHandle::new_host(indices), + values: BufferHandle::new_host(values), + stats_set: ArrayStats::default(), + }) + } + + /// Get an accessor, which allows ranged access to patches by chunk/lane. + pub fn accessor(&self) -> PatchAccessor<'_, V> { + PatchAccessor { + n_lanes: self.n_lanes, + lane_offsets: self.lane_offsets.as_host().reinterpret::(), + indices: self.indices.as_host().reinterpret::(), + values: self.values.as_host().reinterpret::(), + } + } + + /// Slice the array to just the patches and inner values that are within the chunk range. + pub(crate) fn slice_chunks(&self, chunks: Range) -> VortexResult { + let lane_offsets_start = chunks.start * self.n_lanes; + let lane_offsets_stop = chunks.end * self.n_lanes + 1; + + let sliced_lane_offsets = self + .lane_offsets + .slice_typed::(lane_offsets_start..lane_offsets_stop); + let indices = self.indices.clone(); + let values = self.values.clone(); + + let begin = (chunks.start * 1024).max(self.offset); + let end = (chunks.end * 1024).min(self.len); + + let offset = begin % 1024; + + let inner = self.inner.slice(begin..end)?; + + let len = end - begin; + let n_chunks = (end - begin).div_ceil(1024); + + Ok(PatchedArray { + inner, + n_chunks, + n_lanes: self.n_lanes, + offset, + len, + indices, + values, + values_ptype: self.values_ptype, + lane_offsets: sliced_lane_offsets, + stats_set: ArrayStats::default(), + }) + } +} + +/// Transpose a set of patches from the default sorted layout into the data parallel layout. +#[allow(clippy::cognitive_complexity)] +fn transpose_patches(patches: &Patches, ctx: &mut ExecutionCtx) -> VortexResult { + let array_len = patches.array_len(); + let offset = patches.offset(); + + let indices = patches + .indices() + .clone() + .execute::(ctx)? + .into_primitive(); + + let values = patches + .values() + .clone() + .execute::(ctx)? + .into_primitive(); + + let indices_ptype = indices.ptype(); + let values_ptype = values.ptype(); + + let indices = indices.buffer_handle().clone().unwrap_host(); + let values = values.buffer_handle().clone().unwrap_host(); + + match_each_unsigned_integer_ptype!(indices_ptype, |I| { + match_each_native_ptype!(values_ptype, |V| { + let indices: Buffer = Buffer::from_byte_buffer(indices); + let values: Buffer = Buffer::from_byte_buffer(values); + + Ok(transpose( + indices.as_slice(), + values.as_slice(), + offset, + array_len, + )) + }) + }) +} + +#[allow(clippy::cast_possible_truncation)] +fn transpose( + indices_in: &[I], + values_in: &[V], + offset: usize, + array_len: usize, +) -> TransposedPatches { + // Total number of slots is number of chunks times number of lanes. + let n_chunks = array_len.div_ceil(1024); + assert!( + n_chunks <= u32::MAX as usize, + "Cannot transpose patches for array with >= 4 trillion elements" + ); + + let n_lanes = patch_lanes::(); + + // We know upfront how many indices and values we'll have. + let mut indices_buffer = BufferMut::with_capacity(indices_in.len()); + let mut values_buffer = BufferMut::with_capacity(values_in.len()); + + // number of patches in each chunk. + let mut lane_offsets: BufferMut = BufferMut::zeroed(n_chunks * n_lanes + 1); + + // Scan the index/values once to get chunk/lane counts + for index in indices_in { + let index = index.as_() - offset; + let chunk = index / 1024; + let lane = index % n_lanes; + + lane_offsets[chunk * n_lanes + lane + 1] += 1; + } + + // Prefix-sum sizes -> offsets + for index in 1..lane_offsets.len() { + lane_offsets[index] += lane_offsets[index - 1]; + } + + // Loop over patches, writing thme to final positions + let indices_out = indices_buffer.spare_capacity_mut(); + let values_out = values_buffer.spare_capacity_mut(); + for (index, &value) in std::iter::zip(indices_in, values_in) { + let index = index.as_() - offset; + let chunk = index / 1024; + let lane = index % n_lanes; + + let position = &mut lane_offsets[chunk * n_lanes + lane]; + indices_out[*position as usize].write((index % 1024) as u16); + values_out[*position as usize].write(value); + *position += 1; + } + + // SAFETY: we know there are exactly indices_in.len() indices/values, and we just + // set them to the appropriate values in the loop above. + unsafe { + indices_buffer.set_len(indices_in.len()); + values_buffer.set_len(values_in.len()); + } + + // Now, pass over all the indices and values again and subtract out the position increments. + for index in indices_in { + let index = index.as_() - offset; + let chunk = index / 1024; + let lane = index % n_lanes; + + lane_offsets[chunk * n_lanes + lane] -= 1; + } + + TransposedPatches { + n_chunks, + n_lanes, + lane_offsets: lane_offsets.freeze().into_byte_buffer(), + indices: indices_buffer.freeze().into_byte_buffer(), + values: values_buffer.freeze().into_byte_buffer(), + } +} diff --git a/vortex-array/src/arrays/patched/compute/compare.rs b/vortex-array/src/arrays/patched/compute/compare.rs new file mode 100644 index 00000000000..d1932ed1e44 --- /dev/null +++ b/vortex-array/src/arrays/patched/compute/compare.rs @@ -0,0 +1,273 @@ +// SPDX-License-Identifier: Apache-2.0 +// SPDX-FileCopyrightText: Copyright the Vortex contributors + +use vortex_buffer::BitBufferMut; +use vortex_error::VortexExpect; +use vortex_error::VortexResult; + +use crate::ArrayRef; +use crate::Canonical; +use crate::ExecutionCtx; +use crate::IntoArray; +use crate::arrays::BoolArray; +use crate::arrays::ConstantArray; +use crate::arrays::Patched; +use crate::arrays::bool::BoolArrayParts; +use crate::arrays::patched::patch_lanes; +use crate::arrays::primitive::NativeValue; +use crate::builtins::ArrayBuiltins; +use crate::dtype::NativePType; +use crate::match_each_native_ptype; +use crate::scalar_fn::fns::binary::CompareKernel; +use crate::scalar_fn::fns::operators::CompareOperator; + +impl CompareKernel for Patched { + fn compare( + lhs: &Self::Array, + rhs: &ArrayRef, + operator: CompareOperator, + ctx: &mut ExecutionCtx, + ) -> VortexResult> { + let Some(constant) = rhs.as_constant() else { + return Ok(None); + }; + + let result = lhs + .inner + .binary( + ConstantArray::new(constant.clone(), lhs.len()).into_array(), + operator.into(), + )? + .execute::(ctx)? + .into_bool(); + + let BoolArrayParts { + bits, + offset, + len, + validity, + } = result.into_parts(); + + let mut bits = BitBufferMut::from_buffer(bits.unwrap_host().into_mut(), offset, len); + + fn apply( + bits: &mut BitBufferMut, + lane_offsets: &[u32], + indices: &[u16], + values: &[V], + constant: V, + cmp: F, + ) -> VortexResult<()> + where + F: Fn(V, V) -> bool, + { + let n_lanes = patch_lanes::(); + + for index in 0..(lane_offsets.len() - 1) { + let chunk = index / n_lanes; + + let lane_start = lane_offsets[index] as usize; + let lane_end = lane_offsets[index + 1] as usize; + + for (&patch_index, &patch_value) in std::iter::zip( + &indices[lane_start..lane_end], + &values[lane_start..lane_end], + ) { + let bit_index = chunk * 1024 + patch_index as usize; + if cmp(patch_value, constant) { + bits.set(bit_index) + } else { + bits.unset(bit_index) + } + } + } + + Ok(()) + } + + let lane_offsets = lhs.lane_offsets.as_host().reinterpret::(); + let indices = lhs.indices.as_host().reinterpret::(); + + match_each_native_ptype!(lhs.values_ptype, |V| { + let values = lhs.values.as_host().reinterpret::(); + let constant = constant + .as_primitive() + .as_::() + .vortex_expect("compare constant not null"); + + match operator { + CompareOperator::Eq => { + apply::( + &mut bits, + lane_offsets, + indices, + values, + constant, + |l, r| NativeValue(l) == NativeValue(r), + )?; + } + CompareOperator::NotEq => { + apply::( + &mut bits, + lane_offsets, + indices, + values, + constant, + |l, r| NativeValue(l) != NativeValue(r), + )?; + } + CompareOperator::Gt => { + apply::( + &mut bits, + lane_offsets, + indices, + values, + constant, + |l, r| NativeValue(l) > NativeValue(r), + )?; + } + CompareOperator::Gte => { + apply::( + &mut bits, + lane_offsets, + indices, + values, + constant, + |l, r| NativeValue(l) >= NativeValue(r), + )?; + } + CompareOperator::Lt => { + apply::( + &mut bits, + lane_offsets, + indices, + values, + constant, + |l, r| NativeValue(l) < NativeValue(r), + )?; + } + CompareOperator::Lte => { + apply::( + &mut bits, + lane_offsets, + indices, + values, + constant, + |l, r| NativeValue(l) <= NativeValue(r), + )?; + } + } + }); + + // SAFETY: thing + let result = unsafe { BoolArray::new_unchecked(bits.freeze(), validity) }; + Ok(Some(result.into_array())) + } +} + +#[cfg(test)] +mod tests { + use vortex_buffer::buffer; + use vortex_error::VortexResult; + + use crate::ExecutionCtx; + use crate::IntoArray; + use crate::LEGACY_SESSION; + use crate::arrays::BoolArray; + use crate::arrays::ConstantArray; + use crate::arrays::Patched; + use crate::arrays::PatchedArray; + use crate::arrays::PrimitiveArray; + use crate::assert_arrays_eq; + use crate::patches::Patches; + use crate::scalar_fn::fns::binary::CompareKernel; + use crate::scalar_fn::fns::operators::CompareOperator; + use crate::validity::Validity; + + #[test] + fn test_basic() { + let lhs = PrimitiveArray::from_iter(0u32..512).into_array(); + let patches = Patches::new( + 512, + 0, + buffer![509u16, 510, 511].into_array(), + buffer![u32::MAX; 3].into_array(), + None, + ) + .unwrap(); + + let mut ctx = ExecutionCtx::new(LEGACY_SESSION.clone()); + + let lhs = PatchedArray::from_array_and_patches(lhs, &patches, &mut ctx).unwrap(); + + let rhs = ConstantArray::new(u32::MAX, 512).into_array(); + + let result = ::compare(&lhs, &rhs, CompareOperator::Eq, &mut ctx) + .unwrap() + .unwrap(); + + let expected = + BoolArray::from_indices(512, [509, 510, 511], Validity::NonNullable).into_array(); + + assert_arrays_eq!(expected, result); + } + + #[test] + fn test_subnormal_f32() -> VortexResult<()> { + // Subnormal f32 values are smaller than f32::MIN_POSITIVE but greater than 0 + let subnormal: f32 = f32::MIN_POSITIVE / 2.0; + assert!(subnormal > 0.0 && subnormal < f32::MIN_POSITIVE); + + let lhs = PrimitiveArray::from_iter((0..512).map(|i| i as f32)).into_array(); + + let patches = Patches::new( + 512, + 0, + buffer![509u16, 510, 511].into_array(), + buffer![f32::NAN, subnormal, f32::NEG_INFINITY].into_array(), + None, + )?; + + let mut ctx = ExecutionCtx::new(LEGACY_SESSION.clone()); + let lhs = PatchedArray::from_array_and_patches(lhs, &patches, &mut ctx)?; + + let rhs = ConstantArray::new(subnormal, 512).into_array(); + + let result = + ::compare(&lhs, &rhs, CompareOperator::Eq, &mut ctx)? + .unwrap(); + + let expected = BoolArray::from_indices(512, [510], Validity::NonNullable).into_array(); + + assert_arrays_eq!(expected, result); + Ok(()) + } + + #[test] + fn test_pos_neg_zero() -> VortexResult<()> { + let lhs = PrimitiveArray::from_iter([-0.0f32; 10]).into_array(); + + let patches = Patches::new( + 10, + 0, + buffer![5u16, 6, 7, 8, 9].into_array(), + buffer![f32::NAN, f32::NEG_INFINITY, 0f32, -0.0f32, f32::INFINITY].into_array(), + None, + )?; + + let mut ctx = ExecutionCtx::new(LEGACY_SESSION.clone()); + let lhs = PatchedArray::from_array_and_patches(lhs, &patches, &mut ctx)?; + + let rhs = ConstantArray::new(0.0f32, 10).into_array(); + + let result = + ::compare(&lhs, &rhs, CompareOperator::Eq, &mut ctx)? + .unwrap(); + + let expected = BoolArray::from_indices(10, [7], Validity::NonNullable).into_array(); + + assert_arrays_eq!(expected, result); + + Ok(()) + } +} diff --git a/vortex-array/src/arrays/patched/compute/filter.rs b/vortex-array/src/arrays/patched/compute/filter.rs new file mode 100644 index 00000000000..d045a79c38f --- /dev/null +++ b/vortex-array/src/arrays/patched/compute/filter.rs @@ -0,0 +1,145 @@ +// SPDX-License-Identifier: Apache-2.0 +// SPDX-FileCopyrightText: Copyright the Vortex contributors + +use vortex_error::VortexResult; +use vortex_mask::AllOr; +use vortex_mask::Mask; + +use crate::ArrayRef; +use crate::IntoArray; +use crate::arrays::FilterArray; +use crate::arrays::Patched; +use crate::arrays::filter::FilterReduce; + +impl FilterReduce for Patched { + fn filter(array: &Self::Array, mask: &Mask) -> VortexResult> { + // Find the contiguous chunk range that the mask covers. We use this to slice the inner + // components, then wrap the rest up with another FilterArray. + // + // This is helpful when we have a very selective filter that is clustered to a small + // range. + let (chunk_start, chunk_stop) = match mask.indices() { + AllOr::All | AllOr::None => { + // This is handled as the precondition to this method, see the FilterReduce + // documentation. + unreachable!("mask must be a MaskValues here") + } + AllOr::Some(indices) => { + let first = indices[0]; + let last = indices[indices.len() - 1]; + + (first / 1024, last.div_ceil(1024)) + } + }; + + // If all chunks already covered, there is nothing to do. + if chunk_start == 0 && chunk_stop == array.n_chunks { + return Ok(None); + } + + let sliced = array.slice_chunks(chunk_start..chunk_stop)?; + + let slice_start = chunk_start * 1024; + let slice_end = (chunk_stop * 1024).min(array.len()); + let remainder = mask.slice(slice_start..slice_end); + + Ok(Some( + FilterArray::new(sliced.into_array(), remainder).into_array(), + )) + } +} + +#[cfg(test)] +mod tests { + use vortex_buffer::buffer; + use vortex_error::VortexResult; + use vortex_mask::Mask; + + use crate::DynArray; + use crate::ExecutionCtx; + use crate::IntoArray; + use crate::LEGACY_SESSION; + use crate::arrays::FilterArray; + use crate::arrays::PatchedArray; + use crate::arrays::PrimitiveArray; + use crate::assert_arrays_eq; + use crate::patches::Patches; + + #[test] + fn test_filter_noop() -> VortexResult<()> { + let array = buffer![u16::MIN; 5].into_array(); + let patched_indices = buffer![3u8, 4].into_array(); + let patched_values = buffer![u16::MAX; 2].into_array(); + + let patches = Patches::new(5, 0, patched_indices, patched_values, None)?; + + let mut ctx = ExecutionCtx::new(LEGACY_SESSION.clone()); + + let array = PatchedArray::from_array_and_patches(array, &patches, &mut ctx)?.into_array(); + + let filtered = FilterArray::new( + array.clone(), + Mask::from_iter([true, false, false, false, true]), + ) + .into_array(); + + let reduced = array.vtable().reduce_parent(&array, &filtered, 0)?; + + // Filter does not get pushed through to child because it does not prune any chunks. + assert!(reduced.is_none()); + + Ok(()) + } + + #[test] + fn test_filter_basic() -> VortexResult<()> { + // Basic test: filter with mask that crosses boundaries. + let mut ctx = ExecutionCtx::new(LEGACY_SESSION.clone()); + + let array = buffer![u16::MIN; 4096].into_array(); + let patched_indices = buffer![1024u16, 1025].into_array(); + let patched_values = buffer![u16::MAX, u16::MAX].into_array(); + + let patches = Patches::new(4096, 0, patched_indices, patched_values, None)?; + + let array = PatchedArray::from_array_and_patches(array, &patches, &mut ctx)?.into_array(); + + // Filter that only touches the middle 2 chunks + let mask = Mask::from_indices(4096, vec![1024, 1025, 3000]); + + let filtered = FilterArray::new(array.clone(), mask).into_array(); + let reduced = array.vtable().reduce_parent(&array, &filtered, 0)?; + + let expected = PrimitiveArray::from_iter([u16::MAX, u16::MAX, u16::MIN]).into_array(); + + assert_arrays_eq!(expected, reduced.unwrap()); + + Ok(()) + } + + #[test] + fn test_filter_complex() -> VortexResult<()> { + // Basic test: filter with mask that crosses boundaries. + let mut ctx = ExecutionCtx::new(LEGACY_SESSION.clone()); + + let array = buffer![u16::MIN; 4096].into_array(); + let patched_indices = buffer![1024u16, 1025].into_array(); + let patched_values = buffer![u16::MAX, u16::MAX].into_array(); + + let patches = Patches::new(4096, 1, patched_indices, patched_values, None)?; + + let array = PatchedArray::from_array_and_patches(array, &patches, &mut ctx)?.into_array(); + + // Filter that only touches the middle 2 chunks + let mask = Mask::from_indices(4096, vec![1024, 1025, 3000]); + + let filtered = FilterArray::new(array.clone(), mask).into_array(); + let reduced = array.vtable().reduce_parent(&array, &filtered, 0)?; + + let expected = PrimitiveArray::from_iter([u16::MAX, u16::MIN, u16::MIN]).into_array(); + + assert_arrays_eq!(expected, reduced.unwrap()); + + Ok(()) + } +} diff --git a/vortex-array/src/arrays/patched/compute/mod.rs b/vortex-array/src/arrays/patched/compute/mod.rs new file mode 100644 index 00000000000..aa8b18199b2 --- /dev/null +++ b/vortex-array/src/arrays/patched/compute/mod.rs @@ -0,0 +1,6 @@ +// SPDX-License-Identifier: Apache-2.0 +// SPDX-FileCopyrightText: Copyright the Vortex contributors + +mod compare; +mod filter; +pub(crate) mod rules; diff --git a/vortex-array/src/arrays/patched/compute/rules.rs b/vortex-array/src/arrays/patched/compute/rules.rs new file mode 100644 index 00000000000..3ecb25c1efa --- /dev/null +++ b/vortex-array/src/arrays/patched/compute/rules.rs @@ -0,0 +1,12 @@ +// SPDX-License-Identifier: Apache-2.0 +// SPDX-FileCopyrightText: Copyright the Vortex contributors + +use crate::arrays::Patched; +use crate::arrays::filter::FilterReduceAdaptor; +use crate::arrays::slice::SliceReduceAdaptor; +use crate::optimizer::rules::ParentRuleSet; + +pub(crate) const PARENT_RULES: ParentRuleSet = ParentRuleSet::new(&[ + ParentRuleSet::lift(&FilterReduceAdaptor(Patched)), + ParentRuleSet::lift(&SliceReduceAdaptor(Patched)), +]); diff --git a/vortex-array/src/arrays/patched/mod.rs b/vortex-array/src/arrays/patched/mod.rs new file mode 100644 index 00000000000..f035204c188 --- /dev/null +++ b/vortex-array/src/arrays/patched/mod.rs @@ -0,0 +1,75 @@ +// SPDX-License-Identifier: Apache-2.0 +// SPDX-FileCopyrightText: Copyright the Vortex contributors + +mod array; +mod compute; +mod vtable; + +pub use array::*; +use vortex_buffer::ByteBuffer; +pub use vtable::*; + +/// Patches that have been transposed into GPU format. +struct TransposedPatches { + n_chunks: usize, + n_lanes: usize, + lane_offsets: ByteBuffer, + indices: ByteBuffer, + values: ByteBuffer, +} + +/// Number of lanes used at patch time for a value of type `V`. +/// +/// This is *NOT* equal to the number of FastLanes lanes for the type `V`, rather this is going to +/// correspond to how many "lanes" we will end up copying data on. +/// +/// When applied on the CPU, this configuration doesn't really matter. On the GPU, it is based +/// on the number of patches involved here. +const fn patch_lanes() -> usize { + // For types 32-bits or smaller, we use a 32 lane configuration, and for 64-bit we use 16 lanes. + // This matches up with the number of lanes we use to execute copying results from bit-unpacking + // from shared to global memory. + if size_of::() < 8 { 32 } else { 16 } +} + +pub struct PatchAccessor<'a, V> { + n_lanes: usize, + lane_offsets: &'a [u32], + indices: &'a [u16], + values: &'a [V], +} + +impl<'a, V: Sized> PatchAccessor<'a, V> { + /// Access the patches for a particular lane + pub fn access(&'a self, chunk: usize, lane: usize) -> LanePatches<'a, V> { + let start = self.lane_offsets[chunk * self.n_lanes + lane] as usize; + let stop = self.lane_offsets[chunk * self.n_lanes + lane + 1] as usize; + + LanePatches { + indices: &self.indices[start..stop], + values: &self.values[start..stop], + } + } +} + +pub struct LanePatches<'a, V> { + pub indices: &'a [u16], + pub values: &'a [V], +} + +impl<'a, V: Copy> LanePatches<'a, V> { + pub fn len(&self) -> usize { + self.indices.len() + } + + pub fn is_empty(&self) -> bool { + self.indices.is_empty() + } + + pub fn iter(&self) -> impl Iterator { + self.indices + .iter() + .copied() + .zip(self.values.iter().copied()) + } +} diff --git a/vortex-array/src/arrays/patched/vtable/kernels.rs b/vortex-array/src/arrays/patched/vtable/kernels.rs new file mode 100644 index 00000000000..2c60ec4a20f --- /dev/null +++ b/vortex-array/src/arrays/patched/vtable/kernels.rs @@ -0,0 +1,9 @@ +// SPDX-License-Identifier: Apache-2.0 +// SPDX-FileCopyrightText: Copyright the Vortex contributors + +use crate::arrays::Patched; +use crate::kernel::ParentKernelSet; +use crate::scalar_fn::fns::binary::CompareExecuteAdaptor; + +pub(super) const PARENT_KERNELS: ParentKernelSet = + ParentKernelSet::new(&[ParentKernelSet::lift(&CompareExecuteAdaptor(Patched))]); diff --git a/vortex-array/src/arrays/patched/vtable/mod.rs b/vortex-array/src/arrays/patched/vtable/mod.rs new file mode 100644 index 00000000000..bcc87d869ce --- /dev/null +++ b/vortex-array/src/arrays/patched/vtable/mod.rs @@ -0,0 +1,389 @@ +// SPDX-License-Identifier: Apache-2.0 +// SPDX-FileCopyrightText: Copyright the Vortex contributors + +mod kernels; +mod operations; +mod slice; + +use std::hash::Hash; +use std::hash::Hasher; + +use vortex_buffer::Buffer; +use vortex_error::VortexResult; +use vortex_error::vortex_bail; +use vortex_error::vortex_ensure; +use vortex_error::vortex_panic; +use vortex_session::VortexSession; + +use crate::ArrayEq; +use crate::ArrayHash; +use crate::ArrayRef; +use crate::Canonical; +use crate::DeserializeMetadata; +use crate::DynArray; +use crate::ExecutionCtx; +use crate::ExecutionStep; +use crate::IntoArray; +use crate::Precision; +use crate::ProstMetadata; +use crate::arrays::PrimitiveArray; +use crate::arrays::patched::PatchedArray; +use crate::arrays::patched::compute::rules::PARENT_RULES; +use crate::arrays::patched::patch_lanes; +use crate::arrays::patched::vtable::kernels::PARENT_KERNELS; +use crate::arrays::primitive::PrimitiveArrayParts; +use crate::buffer::BufferHandle; +use crate::dtype::DType; +use crate::dtype::NativePType; +use crate::match_each_native_ptype; +use crate::serde::ArrayChildren; +use crate::stats::ArrayStats; +use crate::stats::StatsSetRef; +use crate::vtable; +use crate::vtable::ArrayId; +use crate::vtable::VTable; +use crate::vtable::ValidityChild; +use crate::vtable::ValidityVTableFromChild; + +vtable!(Patched); + +#[derive(Debug)] +pub struct Patched; + +impl ValidityChild for Patched { + fn validity_child(array: &PatchedArray) -> &ArrayRef { + &array.inner + } +} + +#[derive(Clone, prost::Message)] +pub struct PatchedMetadata { + #[prost(uint32, tag = "1")] + pub(crate) offset: u32, +} + +impl VTable for Patched { + type Array = PatchedArray; + type Metadata = ProstMetadata; + type OperationsVTable = Self; + type ValidityVTable = ValidityVTableFromChild; + + fn id(_array: &Self::Array) -> ArrayId { + ArrayId::new_ref("vortex.patched") + } + + fn len(array: &Self::Array) -> usize { + array.len + } + + fn dtype(array: &Self::Array) -> &DType { + array.inner.dtype() + } + + fn stats(array: &Self::Array) -> StatsSetRef<'_> { + array.stats_set.to_ref(array.as_ref()) + } + + fn array_hash(array: &Self::Array, state: &mut H, precision: Precision) { + array.inner.array_hash(state, precision); + array.values_ptype.hash(state); + array.n_chunks.hash(state); + array.n_lanes.hash(state); + array.lane_offsets.array_hash(state, precision); + array.indices.array_hash(state, precision); + array.values.array_hash(state, precision); + } + + fn array_eq(array: &Self::Array, other: &Self::Array, precision: Precision) -> bool { + array.n_chunks == other.n_chunks + && array.n_lanes == other.n_lanes + && array.values_ptype == other.values_ptype + && array.inner.array_eq(&other.inner, precision) + && array.lane_offsets.array_eq(&other.lane_offsets, precision) + && array.indices.array_eq(&other.indices, precision) + && array.values.array_eq(&other.values, precision) + } + + fn nbuffers(_array: &Self::Array) -> usize { + 3 + } + + fn buffer(array: &Self::Array, idx: usize) -> BufferHandle { + match idx { + 0 => array.lane_offsets.clone(), + 1 => array.indices.clone(), + 2 => array.values.clone(), + _ => vortex_panic!("invalid buffer index for PatchedArray: {idx}"), + } + } + + fn buffer_name(_array: &Self::Array, idx: usize) -> Option { + match idx { + 0 => Some("lane_offsets".to_string()), + 1 => Some("patch_indices".to_string()), + 2 => Some("patch_values".to_string()), + _ => vortex_panic!("invalid buffer index for PatchedArray: {idx}"), + } + } + + fn nchildren(_array: &Self::Array) -> usize { + 1 + } + + fn child(array: &Self::Array, idx: usize) -> ArrayRef { + if idx == 0 { + array.inner.clone() + } else { + vortex_panic!("invalid child index for PatchedArray: {idx}"); + } + } + + fn child_name(_array: &Self::Array, idx: usize) -> String { + if idx == 0 { + "inner".to_string() + } else { + vortex_panic!("invalid child index for PatchedArray: {idx}"); + } + } + + #[allow(clippy::cast_possible_truncation)] + fn metadata(array: &Self::Array) -> VortexResult { + Ok(ProstMetadata(PatchedMetadata { + offset: array.offset as u32, + })) + } + + fn serialize(_metadata: Self::Metadata) -> VortexResult>> { + Ok(Some(vec![])) + } + + fn deserialize( + bytes: &[u8], + _dtype: &DType, + _len: usize, + _buffers: &[BufferHandle], + _session: &VortexSession, + ) -> VortexResult { + let inner = as DeserializeMetadata>::deserialize(bytes)?; + Ok(ProstMetadata(inner)) + } + + fn build( + dtype: &DType, + len: usize, + metadata: &Self::Metadata, + buffers: &[BufferHandle], + children: &dyn ArrayChildren, + ) -> VortexResult { + let inner = children.get(0, dtype, len)?; + + let n_chunks = len.div_ceil(1024); + + let n_lanes = match_each_native_ptype!(dtype.as_ptype(), |P| { patch_lanes::

() }); + + let &[lane_offsets, indices, values] = &buffers else { + vortex_bail!("invalid buffer count for PatchedArray"); + }; + + Ok(PatchedArray { + inner, + n_chunks, + n_lanes, + offset: metadata.offset as usize, + len, + lane_offsets: lane_offsets.clone(), + indices: indices.clone(), + values: values.clone(), + values_ptype: dtype.as_ptype(), + stats_set: ArrayStats::default(), + }) + } + + fn with_children(array: &mut Self::Array, mut children: Vec) -> VortexResult<()> { + vortex_ensure!( + children.len() == 1, + "PatchedArray must have exactly 1 child" + ); + + array.inner = children.remove(0); + + Ok(()) + } + + fn execute(array: &Self::Array, ctx: &mut ExecutionCtx) -> VortexResult { + let inner = array + .inner + .clone() + .execute::(ctx)? + .into_primitive(); + + let PrimitiveArrayParts { + buffer, + ptype, + validity, + } = inner.into_parts(); + + let lane_offsets: Buffer = + Buffer::from_byte_buffer(array.lane_offsets.clone().unwrap_host()); + let indices: Buffer = Buffer::from_byte_buffer(array.indices.clone().unwrap_host()); + + let patched_values = match_each_native_ptype!(array.values_ptype, |V| { + let mut output = Buffer::::from_byte_buffer(buffer.unwrap_host()).into_mut(); + let values: Buffer = Buffer::from_byte_buffer(array.values.clone().unwrap_host()); + + let offset = array.offset; + let len = array.len; + + apply::( + &mut output, + offset, + len, + array.n_chunks, + array.n_lanes, + &lane_offsets, + &indices, + &values, + ); + + // The output will always be aligned to a chunk boundary, we apply the offset/len + // at the end to slice to only the in-bounds values. + let _output = output.as_slice(); + let output = output.freeze().slice(offset..offset + len); + + PrimitiveArray::from_byte_buffer(output.into_byte_buffer(), ptype, validity) + }); + + Ok(ExecutionStep::done(patched_values.into_array())) + } + + fn execute_parent( + array: &Self::Array, + parent: &ArrayRef, + child_idx: usize, + ctx: &mut ExecutionCtx, + ) -> VortexResult> { + PARENT_KERNELS.execute(array, parent, child_idx, ctx) + } + + fn reduce_parent( + array: &Self::Array, + parent: &ArrayRef, + child_idx: usize, + ) -> VortexResult> { + PARENT_RULES.evaluate(array, parent, child_idx) + } +} + +/// Apply patches on top of the existing value types. +#[allow(clippy::too_many_arguments)] +fn apply( + output: &mut [V], + offset: usize, + len: usize, + n_chunks: usize, + n_lanes: usize, + lane_offsets: &[u32], + indices: &[u16], + values: &[V], +) { + for chunk in 0..n_chunks { + let start = lane_offsets[chunk * n_lanes] as usize; + let stop = lane_offsets[chunk * n_lanes + n_lanes] as usize; + + for idx in start..stop { + // the indices slice is measured as an offset into the 1024-value chunk. + let index = chunk * 1024 + indices[idx] as usize; + if index < offset || index >= offset + len { + continue; + } + + let value = values[idx]; + output[index] = value; + } + } +} + +#[cfg(test)] +mod tests { + use vortex_buffer::buffer; + use vortex_buffer::buffer_mut; + use vortex_session::VortexSession; + + use crate::Canonical; + use crate::ExecutionCtx; + use crate::IntoArray; + use crate::arrays::PatchedArray; + use crate::dtype::Nullability; + use crate::patches::Patches; + use crate::scalar::Scalar; + + #[test] + fn test_execute() { + let values = buffer![0u16; 1024].into_array(); + let patches = Patches::new( + 1024, + 0, + buffer![1u32, 2, 3].into_array(), + buffer![1u16; 3].into_array(), + None, + ) + .unwrap(); + + let session = VortexSession::empty(); + let mut ctx = ExecutionCtx::new(session); + + let array = PatchedArray::from_array_and_patches(values, &patches, &mut ctx) + .unwrap() + .into_array(); + + let executed = array + .execute::(&mut ctx) + .unwrap() + .into_primitive() + .into_buffer::(); + + let mut expected = buffer_mut![0u16; 1024]; + expected[1] = 1; + expected[2] = 1; + expected[3] = 1; + + assert_eq!(executed, expected.freeze()); + } + + #[test] + fn test_scalar_at() { + let values = buffer![0u16; 1024].into_array(); + let patches = Patches::new( + 1024, + 0, + buffer![1u32, 2, 3].into_array(), + buffer![1u16; 3].into_array(), + None, + ) + .unwrap(); + + let session = VortexSession::empty(); + let mut ctx = ExecutionCtx::new(session); + + let array = PatchedArray::from_array_and_patches(values, &patches, &mut ctx) + .unwrap() + .into_array(); + + assert_eq!( + array.scalar_at(0).unwrap(), + Scalar::primitive(0u16, Nullability::NonNullable) + ); + assert_eq!( + array.scalar_at(1).unwrap(), + Scalar::primitive(1u16, Nullability::NonNullable) + ); + assert_eq!( + array.scalar_at(2).unwrap(), + Scalar::primitive(1u16, Nullability::NonNullable) + ); + assert_eq!( + array.scalar_at(3).unwrap(), + Scalar::primitive(1u16, Nullability::NonNullable) + ); + } +} diff --git a/vortex-array/src/arrays/patched/vtable/operations.rs b/vortex-array/src/arrays/patched/vtable/operations.rs new file mode 100644 index 00000000000..ddf5dcec590 --- /dev/null +++ b/vortex-array/src/arrays/patched/vtable/operations.rs @@ -0,0 +1,39 @@ +// SPDX-License-Identifier: Apache-2.0 +// SPDX-FileCopyrightText: Copyright the Vortex contributors + +use vortex_error::VortexResult; + +use crate::DynArray; +use crate::arrays::patched::Patched; +use crate::arrays::patched::PatchedArray; +use crate::arrays::patched::patch_lanes; +use crate::match_each_native_ptype; +use crate::scalar::Scalar; +use crate::vtable::OperationsVTable; + +impl OperationsVTable for Patched { + fn scalar_at(array: &PatchedArray, index: usize) -> VortexResult { + // First check the patches + let chunk = index / 1024; + #[allow(clippy::cast_possible_truncation)] + let chunk_index = (index % 1024) as u16; + match_each_native_ptype!(array.values_ptype, |V| { + let lane = index % patch_lanes::(); + let accessor = array.accessor::(); + let patches = accessor.access(chunk, lane); + // NOTE: we do linear scan as lane has <= 32 patches, binary search would likely + // be slower. + for (patch_index, patch_value) in patches.iter() { + if patch_index == chunk_index { + return Ok(Scalar::primitive( + patch_value, + array.inner.dtype().nullability(), + )); + } + } + }); + + // Otherwise, access the underlying value. + array.inner.scalar_at(index) + } +} diff --git a/vortex-array/src/arrays/patched/vtable/slice.rs b/vortex-array/src/arrays/patched/vtable/slice.rs new file mode 100644 index 00000000000..99d04666d5f --- /dev/null +++ b/vortex-array/src/arrays/patched/vtable/slice.rs @@ -0,0 +1,183 @@ +// SPDX-License-Identifier: Apache-2.0 +// SPDX-FileCopyrightText: Copyright the Vortex contributors + +use std::ops::Range; + +use vortex_error::VortexResult; + +use crate::ArrayRef; +use crate::DynArray; +use crate::IntoArray; +use crate::arrays::Patched; +use crate::arrays::PatchedArray; +use crate::arrays::slice::SliceReduce; +use crate::stats::ArrayStats; + +/// Is this something that uses a SliceKernel or a SliceReduce +impl SliceReduce for Patched { + fn slice(array: &Self::Array, range: Range) -> VortexResult> { + // We **always** slice at 1024-element chunk boundaries. We keep the offset + len + // around so that when we execute we know how much to chop off. + let new_offset = (range.start + array.offset) % 1024; + let new_len = range.end - range.start; + + let chunk_start = (range.start + array.offset) / 1024; + let chunk_stop = (range.end + array.offset).div_ceil(1024); + + // Slice the inner to chunk boundaries + let inner_start = chunk_start * 1024; + let inner_stop = (chunk_stop * 1024).min(array.inner.len()); + let inner = array.inner.slice(inner_start..inner_stop)?; + + // Slice to only maintain offsets to the sliced chunks + let sliced_lane_offsets = array + .lane_offsets + .slice_typed::((chunk_start * array.n_lanes)..(chunk_stop * array.n_lanes) + 1); + + Ok(Some( + PatchedArray { + inner, + n_chunks: chunk_stop - chunk_start, + n_lanes: array.n_lanes, + + offset: new_offset, + len: new_len, + lane_offsets: sliced_lane_offsets, + indices: array.indices.clone(), + values: array.values.clone(), + values_ptype: array.values_ptype, + stats_set: ArrayStats::default(), + } + .into_array(), + )) + } +} + +#[cfg(test)] +mod tests { + use std::ops::Range; + + use rstest::rstest; + use vortex_buffer::Buffer; + use vortex_buffer::BufferMut; + use vortex_buffer::buffer; + use vortex_error::VortexResult; + + use crate::Canonical; + use crate::DynArray; + use crate::ExecutionCtx; + use crate::IntoArray; + use crate::LEGACY_SESSION; + use crate::arrays::PatchedArray; + use crate::arrays::PrimitiveArray; + use crate::assert_arrays_eq; + use crate::dtype::NativePType; + use crate::patches::Patches; + + #[test] + fn test_reduce() -> VortexResult<()> { + let values = buffer![0u16; 512].into_array(); + let patch_indices = buffer![1u32, 8, 30].into_array(); + let patch_values = buffer![u16::MAX; 3].into_array(); + let patches = Patches::new(512, 0, patch_indices, patch_values, None).unwrap(); + + let mut ctx = ExecutionCtx::new(LEGACY_SESSION.clone()); + + let patched_array = + PatchedArray::from_array_and_patches(values, &patches, &mut ctx).unwrap(); + + let sliced = patched_array.slice(1..10)?; + + insta::assert_snapshot!( + sliced.display_tree_encodings_only(), + @r#" + root: vortex.patched(u16, len=9) + inner: vortex.primitive(u16, len=512) + "#); + + let executed = sliced.execute::(&mut ctx)?.into_primitive(); + + assert_eq!( + &[u16::MAX, 0, 0, 0, 0, 0, 0, u16::MAX, 0], + executed.as_slice::() + ); + + Ok(()) + } + + #[rstest] + #[case::trivial(buffer![1u64; 2], buffer![1u32], buffer![u64::MAX], 1..2)] + #[case::one_chunk(buffer![0u64; 1024], buffer![1u32, 8, 30], buffer![u64::MAX; 3], 1..10)] + #[case::multichunk(buffer![1u64; 10_000], buffer![0u32, 1, 2, 3, 4, 16, 17, 18, 19, 1024, 2048, 2049], buffer![u64::MAX; 12], 1024..5000)] + fn test_cases( + #[case] inner: Buffer, + #[case] patch_indices: Buffer, + #[case] patch_values: Buffer, + #[case] range: Range, + ) { + // Create patched array. + let patches = Patches::new( + inner.len(), + 0, + patch_indices.into_array(), + patch_values.into_array(), + None, + ) + .unwrap(); + + let mut ctx = ExecutionCtx::new(LEGACY_SESSION.clone()); + + let patched_array = + PatchedArray::from_array_and_patches(inner.into_array(), &patches, &mut ctx).unwrap(); + + // Verify that applying slice first yields same result as applying slice at end. + let slice_first = patched_array + .slice(range.clone()) + .unwrap() + .execute::(&mut ctx) + .unwrap() + .into_array(); + + let slice_last = patched_array + .into_array() + .execute::(&mut ctx) + .unwrap() + .into_primitive() + .slice(range) + .unwrap(); + + assert_arrays_eq!(slice_first, slice_last); + } + + #[test] + fn test_stacked_slices() { + let values = PrimitiveArray::from_iter(0u64..10_000).into_array(); + + let patched_indices = buffer![1u32, 2, 1024, 2048, 3072, 3088].into_array(); + let patched_values = buffer![0u64, 1, 2, 3, 4, 5].into_array(); + + let patches = Patches::new(10_000, 0, patched_indices, patched_values, None).unwrap(); + let mut ctx = ExecutionCtx::new(LEGACY_SESSION.clone()); + + let patched_array = + PatchedArray::from_array_and_patches(values, &patches, &mut ctx).unwrap(); + + let sliced = patched_array + .slice(1024..5000) + .unwrap() + .slice(1..2065) + .unwrap() + .execute::(&mut ctx) + .unwrap() + .into_array(); + + let mut expected = BufferMut::from_iter(1025u64..=3088); + expected[1023] = 3; + expected[2047] = 4; + expected[2063] = 5; + + let expected = expected.into_array(); + + assert_arrays_eq!(expected, sliced); + } +} diff --git a/vortex-buffer/src/buffer.rs b/vortex-buffer/src/buffer.rs index 053cb5baee5..11c360f21e8 100644 --- a/vortex-buffer/src/buffer.rs +++ b/vortex-buffer/src/buffer.rs @@ -523,6 +523,34 @@ impl Buffer { } } +impl ByteBuffer { + /// Reinterpret the byte buffer as a slice of values of type `V`. + /// + /// # Panics + /// + /// This method will only work if the buffer has the proper size and alignment to be viewed + /// as a buffer of `V` values. + pub fn reinterpret(&self) -> &[V] { + assert!( + self.is_aligned(Alignment::of::()), + "ByteBuffer not properly aligned to {}", + type_name::() + ); + + assert_eq!( + self.length % size_of::(), + 0, + "ByteBuffer length not a multiple of the value length" + ); + + let v_len = self.length / size_of::(); + let v_ptr = self.bytes.as_ptr().cast::(); + + // SAFETY: we checked that alignment and length are suitable to treat this as a &[V]. + unsafe { std::slice::from_raw_parts(v_ptr, v_len) } + } +} + /// An iterator over Buffer elements. /// /// This is an analog to the `std::slice::Iter` type. From 3b4b7ec2ba111cd5607b1c525014a7529ae2e442 Mon Sep 17 00:00:00 2001 From: Andrew Duffy Date: Wed, 18 Mar 2026 16:18:19 -0400 Subject: [PATCH 02/23] take Signed-off-by: Andrew Duffy --- .../src/arrays/patched/compute/mod.rs | 1 + .../src/arrays/patched/compute/take.rs | 110 ++++++++++++++++++ 2 files changed, 111 insertions(+) create mode 100644 vortex-array/src/arrays/patched/compute/take.rs diff --git a/vortex-array/src/arrays/patched/compute/mod.rs b/vortex-array/src/arrays/patched/compute/mod.rs index aa8b18199b2..8634a22f90b 100644 --- a/vortex-array/src/arrays/patched/compute/mod.rs +++ b/vortex-array/src/arrays/patched/compute/mod.rs @@ -4,3 +4,4 @@ mod compare; mod filter; pub(crate) mod rules; +mod take; diff --git a/vortex-array/src/arrays/patched/compute/take.rs b/vortex-array/src/arrays/patched/compute/take.rs new file mode 100644 index 00000000000..59a483c83f0 --- /dev/null +++ b/vortex-array/src/arrays/patched/compute/take.rs @@ -0,0 +1,110 @@ +// SPDX-License-Identifier: Apache-2.0 +// SPDX-FileCopyrightText: Copyright the Vortex contributors + +use rustc_hash::FxHashMap; +use vortex_buffer::Buffer; +use vortex_error::VortexResult; + +use crate::arrays::dict::TakeExecute; +use crate::arrays::primitive::PrimitiveArrayParts; +use crate::arrays::{Patched, PrimitiveArray}; +use crate::dtype::{IntegerPType, NativePType}; +use crate::{ArrayRef, DynArray, IntoArray, match_each_native_ptype}; +use crate::{ExecutionCtx, match_each_unsigned_integer_ptype}; + +impl TakeExecute for Patched { + fn take( + array: &Self::Array, + indices: &ArrayRef, + ctx: &mut ExecutionCtx, + ) -> VortexResult> { + // Perform take on the inner array, including the placeholders. + let inner = array + .inner + .take(indices.clone())? + .execute::(ctx)?; + + let PrimitiveArrayParts { + buffer, + validity, + ptype, + } = inner.into_parts(); + + let indices_ptype = indices.dtype().as_ptype(); + + match_each_unsigned_integer_ptype!(indices_ptype, |I| { + match_each_native_ptype!(ptype, |V| { + let indices = indices.clone().execute::(ctx)?; + let mut output = Buffer::::from_byte_buffer(buffer.unwrap_host()).into_mut(); + take_map( + output.as_mut(), + indices.as_slice::(), + array.offset, + array.len, + array.n_chunks, + array.n_lanes, + array.lane_offsets.as_host().reinterpret::(), + array.indices.as_host().reinterpret::(), + array.values.as_host().reinterpret::(), + ); + + // SAFETY: output and validity still have same length after take_map returns. + unsafe { + return Ok(Some( + PrimitiveArray::new_unchecked(output.freeze(), validity).into_array(), + )); + } + }) + }); + } +} + +/// Take patches for the given `indices` and apply them onto an `output` using a hash map. +/// +/// First, builds a hashmap from index to patch value, then uses the hashmap in a loop to collect +/// the values. +fn take_map( + output: &mut [V], + indices: &[I], + offset: usize, + len: usize, + n_chunks: usize, + n_lanes: usize, + lane_offsets: &[u32], + patch_index: &[u16], + patch_value: &[V], +) { + // Build a hashmap of patch_index -> values. + let mut index_map = FxHashMap::with_capacity(indices.len()); + for chunk in 0..n_chunks { + for lane in 0..n_lanes { + let [lane_start, lane_end] = lane_offsets[chunk * n_lanes + lane..][..2]; + for i in lane_start..lane_end { + let patch_idx = patch_index[i as usize]; + let patch_value = patch_value[i as usize]; + + let index = chunk * 1024 + patch_idx as usize; + if index >= offset && index < offset + len { + index_map.insert(index, patch_value); + } + } + } + } + + // Now, iterate the take indices using the prebuilt hashmap. + // Undefined/null indices will miss the hash map, which we can ignore. + for index in indices { + let index = index.as_(); + if let Some(&patch_value) = index_map.get(&index) { + output[index] = patch_value; + } + } +} + +#[cfg(test)] +mod tests { + #[test] + fn test_take() { + // Patch some values here instead. + } +} From 7e589797fa3ef9c7d891052d382db584d621decf Mon Sep 17 00:00:00 2001 From: Andrew Duffy Date: Wed, 18 Mar 2026 16:53:11 -0400 Subject: [PATCH 03/23] add unit tests Signed-off-by: Andrew Duffy --- .../src/arrays/patched/compute/take.rs | 142 ++++++++++++++++-- 1 file changed, 131 insertions(+), 11 deletions(-) diff --git a/vortex-array/src/arrays/patched/compute/take.rs b/vortex-array/src/arrays/patched/compute/take.rs index 59a483c83f0..3db64313be4 100644 --- a/vortex-array/src/arrays/patched/compute/take.rs +++ b/vortex-array/src/arrays/patched/compute/take.rs @@ -5,12 +5,18 @@ use rustc_hash::FxHashMap; use vortex_buffer::Buffer; use vortex_error::VortexResult; +use crate::ArrayRef; +use crate::DynArray; +use crate::ExecutionCtx; +use crate::IntoArray; +use crate::arrays::Patched; +use crate::arrays::PrimitiveArray; use crate::arrays::dict::TakeExecute; use crate::arrays::primitive::PrimitiveArrayParts; -use crate::arrays::{Patched, PrimitiveArray}; -use crate::dtype::{IntegerPType, NativePType}; -use crate::{ArrayRef, DynArray, IntoArray, match_each_native_ptype}; -use crate::{ExecutionCtx, match_each_unsigned_integer_ptype}; +use crate::dtype::IntegerPType; +use crate::dtype::NativePType; +use crate::match_each_native_ptype; +use crate::match_each_unsigned_integer_ptype; impl TakeExecute for Patched { fn take( @@ -50,12 +56,12 @@ impl TakeExecute for Patched { // SAFETY: output and validity still have same length after take_map returns. unsafe { - return Ok(Some( + Ok(Some( PrimitiveArray::new_unchecked(output.freeze(), validity).into_array(), - )); + )) } }) - }); + }) } } @@ -63,6 +69,7 @@ impl TakeExecute for Patched { /// /// First, builds a hashmap from index to patch value, then uses the hashmap in a loop to collect /// the values. +#[allow(clippy::too_many_arguments)] fn take_map( output: &mut [V], indices: &[I], @@ -75,10 +82,11 @@ fn take_map( patch_value: &[V], ) { // Build a hashmap of patch_index -> values. - let mut index_map = FxHashMap::with_capacity(indices.len()); + let mut index_map = FxHashMap::with_capacity_and_hasher(indices.len(), Default::default()); for chunk in 0..n_chunks { for lane in 0..n_lanes { - let [lane_start, lane_end] = lane_offsets[chunk * n_lanes + lane..][..2]; + let lane_start = lane_offsets[chunk * n_lanes + lane]; + let lane_end = lane_offsets[chunk * n_lanes + lane + 1]; for i in lane_start..lane_end { let patch_idx = patch_index[i as usize]; let patch_value = patch_value[i as usize]; @@ -103,8 +111,120 @@ fn take_map( #[cfg(test)] mod tests { + use vortex_buffer::buffer; + use vortex_error::VortexResult; + use vortex_session::VortexSession; + + use crate::DynArray; + use crate::ExecutionCtx; + use crate::IntoArray; + use crate::arrays::PatchedArray; + use crate::arrays::PrimitiveArray; + use crate::assert_arrays_eq; + use crate::patches::Patches; + + fn make_patched_array( + base: &[u16], + patch_indices: &[u32], + patch_values: &[u16], + ) -> VortexResult { + let values = PrimitiveArray::from_iter(base.iter().copied()).into_array(); + let patches = Patches::new( + base.len(), + 0, + PrimitiveArray::from_iter(patch_indices.iter().copied()).into_array(), + PrimitiveArray::from_iter(patch_values.iter().copied()).into_array(), + None, + )?; + + let session = VortexSession::empty(); + let mut ctx = ExecutionCtx::new(session); + + PatchedArray::from_array_and_patches(values, &patches, &mut ctx) + } + + #[test] + fn test_take_basic() -> VortexResult<()> { + // Array with base values [0, 0, 0, 0, 0] patched at indices [1, 3] with values [10, 30] + let array = make_patched_array(&[0; 5], &[1, 3], &[10, 30])?.into_array(); + + // Take indices [0, 1, 2, 3, 4] - should get [0, 10, 0, 30, 0] + let indices = buffer![0u32, 1, 2, 3, 4].into_array(); + let result = array.take(indices)?; + + let expected = PrimitiveArray::from_iter([0u16, 10, 0, 30, 0]).into_array(); + assert_arrays_eq!(expected, result); + + Ok(()) + } + + #[test] + fn test_take_out_of_order() -> VortexResult<()> { + // Array with base values [0, 0, 0, 0, 0] patched at indices [1, 3] with values [10, 30] + let array = make_patched_array(&[0; 5], &[1, 3], &[10, 30])?.into_array(); + + // Take indices in reverse order + let indices = buffer![4u32, 3, 2, 1, 0].into_array(); + let result = array.take(indices)?; + + let expected = PrimitiveArray::from_iter([0u16, 30, 0, 10, 0]).into_array(); + assert_arrays_eq!(expected, result); + + Ok(()) + } + + #[test] + fn test_take_duplicates() -> VortexResult<()> { + // Array with base values [0, 0, 0, 0, 0] patched at index [2] with value [99] + let array = make_patched_array(&[0; 5], &[2], &[99])?.into_array(); + + // Take the same patched index multiple times + let indices = buffer![2u32, 2, 0, 2].into_array(); + let result = array.take(indices)?; + + let expected = PrimitiveArray::from_iter([99u16, 99, 0, 99]).into_array(); + assert_arrays_eq!(expected, result); + + Ok(()) + } + #[test] - fn test_take() { - // Patch some values here instead. + fn test_take_with_null_indices() -> VortexResult<()> { + use crate::arrays::BoolArray; + use crate::validity::Validity; + + // Array: 10 elements, base value 0, patches at indices 2, 5, 8 with values 20, 50, 80 + let array = make_patched_array(&[0; 10], &[2, 5, 8], &[20, 50, 80])?.into_array(); + + // Take 10 indices, with nulls at positions 1, 4, 7 + // Indices: [0, 2, 2, 5, 8, 0, 5, 8, 3, 1] + // Nulls: [ , , N, , , N, , , N, ] + // Position 2 (index=2, patched) is null + // Position 5 (index=0, unpatched) is null + // Position 8 (index=3, unpatched) is null + let indices = PrimitiveArray::new( + buffer![0u32, 2, 2, 5, 8, 0, 5, 8, 3, 1], + Validity::Array( + BoolArray::from_iter([ + true, true, false, true, true, false, true, true, false, true, + ]) + .into_array(), + ), + ); + let result = array.take(indices.into_array())?; + + // Expected: [0, 20, null, 50, 80, null, 50, 80, null, 0] + let expected = PrimitiveArray::new( + buffer![0u16, 20, 0, 50, 80, 0, 50, 80, 0, 0], + Validity::Array( + BoolArray::from_iter([ + true, true, false, true, true, false, true, true, false, true, + ]) + .into_array(), + ), + ); + assert_arrays_eq!(expected.into_array(), result); + + Ok(()) } } From c676d711089a116708f5594ffabed17aa4322818 Mon Sep 17 00:00:00 2001 From: Andrew Duffy Date: Tue, 24 Mar 2026 15:17:07 -0400 Subject: [PATCH 04/23] final Signed-off-by: Andrew Duffy --- vortex-array/public-api.lock | 382 ++++++++++++++++++ vortex-array/src/arrays/patched/array.rs | 2 +- vortex-array/src/arrays/patched/vtable/mod.rs | 15 +- vortex-buffer/public-api.lock | 4 + 4 files changed, 397 insertions(+), 6 deletions(-) diff --git a/vortex-array/public-api.lock b/vortex-array/public-api.lock index 821ef35e111..fcdabbabb75 100644 --- a/vortex-array/public-api.lock +++ b/vortex-array/public-api.lock @@ -2044,6 +2044,10 @@ impl vortex_array::arrays::dict::TakeExecute for vortex_array::arrays::dict::Dic pub fn vortex_array::arrays::dict::Dict::take(array: &vortex_array::arrays::dict::DictArray, indices: &vortex_array::ArrayRef, _ctx: &mut vortex_array::ExecutionCtx) -> vortex_error::VortexResult> +impl vortex_array::arrays::dict::TakeExecute for vortex_array::arrays::patched::Patched + +pub fn vortex_array::arrays::patched::Patched::take(array: &Self::Array, indices: &vortex_array::ArrayRef, ctx: &mut vortex_array::ExecutionCtx) -> vortex_error::VortexResult> + pub trait vortex_array::arrays::dict::TakeReduce: vortex_array::vtable::VTable pub fn vortex_array::arrays::dict::TakeReduce::take(array: &Self::Array, indices: &vortex_array::ArrayRef) -> vortex_error::VortexResult> @@ -2442,6 +2446,10 @@ impl vortex_array::arrays::filter::FilterReduce for vortex_array::arrays::null:: pub fn vortex_array::arrays::null::Null::filter(_array: &vortex_array::arrays::null::NullArray, mask: &vortex_mask::Mask) -> vortex_error::VortexResult> +impl vortex_array::arrays::filter::FilterReduce for vortex_array::arrays::patched::Patched + +pub fn vortex_array::arrays::patched::Patched::filter(array: &Self::Array, mask: &vortex_mask::Mask) -> vortex_error::VortexResult> + pub mod vortex_array::arrays::fixed_size_list pub struct vortex_array::arrays::fixed_size_list::FixedSizeList @@ -3234,6 +3242,176 @@ impl vortex_array::IntoArray for vortex_array::arrays::null::NullArray pub fn vortex_array::arrays::null::NullArray::into_array(self) -> vortex_array::ArrayRef +pub mod vortex_array::arrays::patched + +pub struct vortex_array::arrays::patched::LanePatches<'a, V> + +pub vortex_array::arrays::patched::LanePatches::indices: &'a [u16] + +pub vortex_array::arrays::patched::LanePatches::values: &'a [V] + +impl<'a, V: core::marker::Copy> vortex_array::arrays::patched::LanePatches<'a, V> + +pub fn vortex_array::arrays::patched::LanePatches<'a, V>::is_empty(&self) -> bool + +pub fn vortex_array::arrays::patched::LanePatches<'a, V>::iter(&self) -> impl core::iter::traits::iterator::Iterator + +pub fn vortex_array::arrays::patched::LanePatches<'a, V>::len(&self) -> usize + +pub struct vortex_array::arrays::patched::PatchAccessor<'a, V> + +impl<'a, V: core::marker::Sized> vortex_array::arrays::patched::PatchAccessor<'a, V> + +pub fn vortex_array::arrays::patched::PatchAccessor<'a, V>::access(&'a self, chunk: usize, lane: usize) -> vortex_array::arrays::patched::LanePatches<'a, V> + +pub struct vortex_array::arrays::patched::Patched + +impl core::clone::Clone for vortex_array::arrays::patched::Patched + +pub fn vortex_array::arrays::patched::Patched::clone(&self) -> vortex_array::arrays::patched::Patched + +impl core::fmt::Debug for vortex_array::arrays::patched::Patched + +pub fn vortex_array::arrays::patched::Patched::fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result + +impl vortex_array::arrays::dict::TakeExecute for vortex_array::arrays::patched::Patched + +pub fn vortex_array::arrays::patched::Patched::take(array: &Self::Array, indices: &vortex_array::ArrayRef, ctx: &mut vortex_array::ExecutionCtx) -> vortex_error::VortexResult> + +impl vortex_array::arrays::filter::FilterReduce for vortex_array::arrays::patched::Patched + +pub fn vortex_array::arrays::patched::Patched::filter(array: &Self::Array, mask: &vortex_mask::Mask) -> vortex_error::VortexResult> + +impl vortex_array::arrays::slice::SliceReduce for vortex_array::arrays::patched::Patched + +pub fn vortex_array::arrays::patched::Patched::slice(array: &Self::Array, range: core::ops::range::Range) -> vortex_error::VortexResult> + +impl vortex_array::scalar_fn::fns::binary::CompareKernel for vortex_array::arrays::patched::Patched + +pub fn vortex_array::arrays::patched::Patched::compare(lhs: &Self::Array, rhs: &vortex_array::ArrayRef, operator: vortex_array::scalar_fn::fns::operators::CompareOperator, ctx: &mut vortex_array::ExecutionCtx) -> vortex_error::VortexResult> + +impl vortex_array::vtable::OperationsVTable for vortex_array::arrays::patched::Patched + +pub fn vortex_array::arrays::patched::Patched::scalar_at(array: &vortex_array::arrays::patched::PatchedArray, index: usize) -> vortex_error::VortexResult + +impl vortex_array::vtable::VTable for vortex_array::arrays::patched::Patched + +pub type vortex_array::arrays::patched::Patched::Array = vortex_array::arrays::patched::PatchedArray + +pub type vortex_array::arrays::patched::Patched::Metadata = vortex_array::ProstMetadata + +pub type vortex_array::arrays::patched::Patched::OperationsVTable = vortex_array::arrays::patched::Patched + +pub type vortex_array::arrays::patched::Patched::ValidityVTable = vortex_array::vtable::ValidityVTableFromChild + +pub fn vortex_array::arrays::patched::Patched::append_to_builder(array: &Self::Array, builder: &mut dyn vortex_array::builders::ArrayBuilder, ctx: &mut vortex_array::ExecutionCtx) -> vortex_error::VortexResult<()> + +pub fn vortex_array::arrays::patched::Patched::array_eq(array: &Self::Array, other: &Self::Array, precision: vortex_array::Precision) -> bool + +pub fn vortex_array::arrays::patched::Patched::array_hash(array: &Self::Array, state: &mut H, precision: vortex_array::Precision) + +pub fn vortex_array::arrays::patched::Patched::buffer(array: &Self::Array, idx: usize) -> vortex_array::buffer::BufferHandle + +pub fn vortex_array::arrays::patched::Patched::buffer_name(_array: &Self::Array, idx: usize) -> core::option::Option + +pub fn vortex_array::arrays::patched::Patched::build(dtype: &vortex_array::dtype::DType, len: usize, metadata: &Self::Metadata, buffers: &[vortex_array::buffer::BufferHandle], children: &dyn vortex_array::serde::ArrayChildren) -> vortex_error::VortexResult + +pub fn vortex_array::arrays::patched::Patched::child(array: &Self::Array, idx: usize) -> vortex_array::ArrayRef + +pub fn vortex_array::arrays::patched::Patched::child_name(_array: &Self::Array, idx: usize) -> alloc::string::String + +pub fn vortex_array::arrays::patched::Patched::deserialize(bytes: &[u8], _dtype: &vortex_array::dtype::DType, _len: usize, _buffers: &[vortex_array::buffer::BufferHandle], _session: &vortex_session::VortexSession) -> vortex_error::VortexResult + +pub fn vortex_array::arrays::patched::Patched::dtype(array: &Self::Array) -> &vortex_array::dtype::DType + +pub fn vortex_array::arrays::patched::Patched::execute(array: alloc::sync::Arc, ctx: &mut vortex_array::ExecutionCtx) -> vortex_error::VortexResult + +pub fn vortex_array::arrays::patched::Patched::execute_parent(array: &Self::Array, parent: &vortex_array::ArrayRef, child_idx: usize, ctx: &mut vortex_array::ExecutionCtx) -> vortex_error::VortexResult> + +pub fn vortex_array::arrays::patched::Patched::id(&self) -> vortex_array::vtable::ArrayId + +pub fn vortex_array::arrays::patched::Patched::len(array: &Self::Array) -> usize + +pub fn vortex_array::arrays::patched::Patched::metadata(array: &Self::Array) -> vortex_error::VortexResult + +pub fn vortex_array::arrays::patched::Patched::nbuffers(_array: &Self::Array) -> usize + +pub fn vortex_array::arrays::patched::Patched::nchildren(_array: &Self::Array) -> usize + +pub fn vortex_array::arrays::patched::Patched::reduce(array: &Self::Array) -> vortex_error::VortexResult> + +pub fn vortex_array::arrays::patched::Patched::reduce_parent(array: &Self::Array, parent: &vortex_array::ArrayRef, child_idx: usize) -> vortex_error::VortexResult> + +pub fn vortex_array::arrays::patched::Patched::serialize(_metadata: Self::Metadata) -> vortex_error::VortexResult>> + +pub fn vortex_array::arrays::patched::Patched::stats(array: &Self::Array) -> vortex_array::stats::StatsSetRef<'_> + +pub fn vortex_array::arrays::patched::Patched::vtable(_array: &Self::Array) -> &Self + +pub fn vortex_array::arrays::patched::Patched::with_children(array: &mut Self::Array, children: alloc::vec::Vec) -> vortex_error::VortexResult<()> + +impl vortex_array::vtable::ValidityChild for vortex_array::arrays::patched::Patched + +pub fn vortex_array::arrays::patched::Patched::validity_child(array: &vortex_array::arrays::patched::PatchedArray) -> &vortex_array::ArrayRef + +pub struct vortex_array::arrays::patched::PatchedArray + +impl vortex_array::arrays::patched::PatchedArray + +pub fn vortex_array::arrays::patched::PatchedArray::accessor(&self) -> vortex_array::arrays::patched::PatchAccessor<'_, V> + +pub fn vortex_array::arrays::patched::PatchedArray::from_array_and_patches(inner: vortex_array::ArrayRef, patches: &vortex_array::patches::Patches, ctx: &mut vortex_array::ExecutionCtx) -> vortex_error::VortexResult + +impl vortex_array::arrays::patched::PatchedArray + +pub fn vortex_array::arrays::patched::PatchedArray::to_array(&self) -> vortex_array::ArrayRef + +impl core::clone::Clone for vortex_array::arrays::patched::PatchedArray + +pub fn vortex_array::arrays::patched::PatchedArray::clone(&self) -> vortex_array::arrays::patched::PatchedArray + +impl core::convert::AsRef for vortex_array::arrays::patched::PatchedArray + +pub fn vortex_array::arrays::patched::PatchedArray::as_ref(&self) -> &dyn vortex_array::DynArray + +impl core::convert::From for vortex_array::ArrayRef + +pub fn vortex_array::ArrayRef::from(value: vortex_array::arrays::patched::PatchedArray) -> vortex_array::ArrayRef + +impl core::fmt::Debug for vortex_array::arrays::patched::PatchedArray + +pub fn vortex_array::arrays::patched::PatchedArray::fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result + +impl core::ops::deref::Deref for vortex_array::arrays::patched::PatchedArray + +pub type vortex_array::arrays::patched::PatchedArray::Target = dyn vortex_array::DynArray + +pub fn vortex_array::arrays::patched::PatchedArray::deref(&self) -> &Self::Target + +impl vortex_array::IntoArray for vortex_array::arrays::patched::PatchedArray + +pub fn vortex_array::arrays::patched::PatchedArray::into_array(self) -> vortex_array::ArrayRef + +pub struct vortex_array::arrays::patched::PatchedMetadata + +impl core::clone::Clone for vortex_array::arrays::patched::PatchedMetadata + +pub fn vortex_array::arrays::patched::PatchedMetadata::clone(&self) -> vortex_array::arrays::patched::PatchedMetadata + +impl core::default::Default for vortex_array::arrays::patched::PatchedMetadata + +pub fn vortex_array::arrays::patched::PatchedMetadata::default() -> Self + +impl core::fmt::Debug for vortex_array::arrays::patched::PatchedMetadata + +pub fn vortex_array::arrays::patched::PatchedMetadata::fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result + +impl prost::message::Message for vortex_array::arrays::patched::PatchedMetadata + +pub fn vortex_array::arrays::patched::PatchedMetadata::clear(&mut self) + +pub fn vortex_array::arrays::patched::PatchedMetadata::encoded_len(&self) -> usize + pub mod vortex_array::arrays::primitive #[repr(transparent)] pub struct vortex_array::arrays::primitive::NativeValue(pub T) @@ -4084,6 +4262,10 @@ impl vortex_array::arrays::slice::SliceReduce for vortex_array::arrays::null::Nu pub fn vortex_array::arrays::null::Null::slice(_array: &Self::Array, range: core::ops::range::Range) -> vortex_error::VortexResult> +impl vortex_array::arrays::slice::SliceReduce for vortex_array::arrays::patched::Patched + +pub fn vortex_array::arrays::patched::Patched::slice(array: &Self::Array, range: core::ops::range::Range) -> vortex_error::VortexResult> + impl vortex_array::arrays::slice::SliceReduce for vortex_array::arrays::slice::Slice pub fn vortex_array::arrays::slice::Slice::slice(array: &Self::Array, range: core::ops::range::Range) -> vortex_error::VortexResult> @@ -6932,6 +7114,134 @@ impl vortex_array::IntoArray for vortex_array::arrays::null::NullArray pub fn vortex_array::arrays::null::NullArray::into_array(self) -> vortex_array::ArrayRef +pub struct vortex_array::arrays::Patched + +impl core::clone::Clone for vortex_array::arrays::patched::Patched + +pub fn vortex_array::arrays::patched::Patched::clone(&self) -> vortex_array::arrays::patched::Patched + +impl core::fmt::Debug for vortex_array::arrays::patched::Patched + +pub fn vortex_array::arrays::patched::Patched::fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result + +impl vortex_array::arrays::dict::TakeExecute for vortex_array::arrays::patched::Patched + +pub fn vortex_array::arrays::patched::Patched::take(array: &Self::Array, indices: &vortex_array::ArrayRef, ctx: &mut vortex_array::ExecutionCtx) -> vortex_error::VortexResult> + +impl vortex_array::arrays::filter::FilterReduce for vortex_array::arrays::patched::Patched + +pub fn vortex_array::arrays::patched::Patched::filter(array: &Self::Array, mask: &vortex_mask::Mask) -> vortex_error::VortexResult> + +impl vortex_array::arrays::slice::SliceReduce for vortex_array::arrays::patched::Patched + +pub fn vortex_array::arrays::patched::Patched::slice(array: &Self::Array, range: core::ops::range::Range) -> vortex_error::VortexResult> + +impl vortex_array::scalar_fn::fns::binary::CompareKernel for vortex_array::arrays::patched::Patched + +pub fn vortex_array::arrays::patched::Patched::compare(lhs: &Self::Array, rhs: &vortex_array::ArrayRef, operator: vortex_array::scalar_fn::fns::operators::CompareOperator, ctx: &mut vortex_array::ExecutionCtx) -> vortex_error::VortexResult> + +impl vortex_array::vtable::OperationsVTable for vortex_array::arrays::patched::Patched + +pub fn vortex_array::arrays::patched::Patched::scalar_at(array: &vortex_array::arrays::patched::PatchedArray, index: usize) -> vortex_error::VortexResult + +impl vortex_array::vtable::VTable for vortex_array::arrays::patched::Patched + +pub type vortex_array::arrays::patched::Patched::Array = vortex_array::arrays::patched::PatchedArray + +pub type vortex_array::arrays::patched::Patched::Metadata = vortex_array::ProstMetadata + +pub type vortex_array::arrays::patched::Patched::OperationsVTable = vortex_array::arrays::patched::Patched + +pub type vortex_array::arrays::patched::Patched::ValidityVTable = vortex_array::vtable::ValidityVTableFromChild + +pub fn vortex_array::arrays::patched::Patched::append_to_builder(array: &Self::Array, builder: &mut dyn vortex_array::builders::ArrayBuilder, ctx: &mut vortex_array::ExecutionCtx) -> vortex_error::VortexResult<()> + +pub fn vortex_array::arrays::patched::Patched::array_eq(array: &Self::Array, other: &Self::Array, precision: vortex_array::Precision) -> bool + +pub fn vortex_array::arrays::patched::Patched::array_hash(array: &Self::Array, state: &mut H, precision: vortex_array::Precision) + +pub fn vortex_array::arrays::patched::Patched::buffer(array: &Self::Array, idx: usize) -> vortex_array::buffer::BufferHandle + +pub fn vortex_array::arrays::patched::Patched::buffer_name(_array: &Self::Array, idx: usize) -> core::option::Option + +pub fn vortex_array::arrays::patched::Patched::build(dtype: &vortex_array::dtype::DType, len: usize, metadata: &Self::Metadata, buffers: &[vortex_array::buffer::BufferHandle], children: &dyn vortex_array::serde::ArrayChildren) -> vortex_error::VortexResult + +pub fn vortex_array::arrays::patched::Patched::child(array: &Self::Array, idx: usize) -> vortex_array::ArrayRef + +pub fn vortex_array::arrays::patched::Patched::child_name(_array: &Self::Array, idx: usize) -> alloc::string::String + +pub fn vortex_array::arrays::patched::Patched::deserialize(bytes: &[u8], _dtype: &vortex_array::dtype::DType, _len: usize, _buffers: &[vortex_array::buffer::BufferHandle], _session: &vortex_session::VortexSession) -> vortex_error::VortexResult + +pub fn vortex_array::arrays::patched::Patched::dtype(array: &Self::Array) -> &vortex_array::dtype::DType + +pub fn vortex_array::arrays::patched::Patched::execute(array: alloc::sync::Arc, ctx: &mut vortex_array::ExecutionCtx) -> vortex_error::VortexResult + +pub fn vortex_array::arrays::patched::Patched::execute_parent(array: &Self::Array, parent: &vortex_array::ArrayRef, child_idx: usize, ctx: &mut vortex_array::ExecutionCtx) -> vortex_error::VortexResult> + +pub fn vortex_array::arrays::patched::Patched::id(&self) -> vortex_array::vtable::ArrayId + +pub fn vortex_array::arrays::patched::Patched::len(array: &Self::Array) -> usize + +pub fn vortex_array::arrays::patched::Patched::metadata(array: &Self::Array) -> vortex_error::VortexResult + +pub fn vortex_array::arrays::patched::Patched::nbuffers(_array: &Self::Array) -> usize + +pub fn vortex_array::arrays::patched::Patched::nchildren(_array: &Self::Array) -> usize + +pub fn vortex_array::arrays::patched::Patched::reduce(array: &Self::Array) -> vortex_error::VortexResult> + +pub fn vortex_array::arrays::patched::Patched::reduce_parent(array: &Self::Array, parent: &vortex_array::ArrayRef, child_idx: usize) -> vortex_error::VortexResult> + +pub fn vortex_array::arrays::patched::Patched::serialize(_metadata: Self::Metadata) -> vortex_error::VortexResult>> + +pub fn vortex_array::arrays::patched::Patched::stats(array: &Self::Array) -> vortex_array::stats::StatsSetRef<'_> + +pub fn vortex_array::arrays::patched::Patched::vtable(_array: &Self::Array) -> &Self + +pub fn vortex_array::arrays::patched::Patched::with_children(array: &mut Self::Array, children: alloc::vec::Vec) -> vortex_error::VortexResult<()> + +impl vortex_array::vtable::ValidityChild for vortex_array::arrays::patched::Patched + +pub fn vortex_array::arrays::patched::Patched::validity_child(array: &vortex_array::arrays::patched::PatchedArray) -> &vortex_array::ArrayRef + +pub struct vortex_array::arrays::PatchedArray + +impl vortex_array::arrays::patched::PatchedArray + +pub fn vortex_array::arrays::patched::PatchedArray::accessor(&self) -> vortex_array::arrays::patched::PatchAccessor<'_, V> + +pub fn vortex_array::arrays::patched::PatchedArray::from_array_and_patches(inner: vortex_array::ArrayRef, patches: &vortex_array::patches::Patches, ctx: &mut vortex_array::ExecutionCtx) -> vortex_error::VortexResult + +impl vortex_array::arrays::patched::PatchedArray + +pub fn vortex_array::arrays::patched::PatchedArray::to_array(&self) -> vortex_array::ArrayRef + +impl core::clone::Clone for vortex_array::arrays::patched::PatchedArray + +pub fn vortex_array::arrays::patched::PatchedArray::clone(&self) -> vortex_array::arrays::patched::PatchedArray + +impl core::convert::AsRef for vortex_array::arrays::patched::PatchedArray + +pub fn vortex_array::arrays::patched::PatchedArray::as_ref(&self) -> &dyn vortex_array::DynArray + +impl core::convert::From for vortex_array::ArrayRef + +pub fn vortex_array::ArrayRef::from(value: vortex_array::arrays::patched::PatchedArray) -> vortex_array::ArrayRef + +impl core::fmt::Debug for vortex_array::arrays::patched::PatchedArray + +pub fn vortex_array::arrays::patched::PatchedArray::fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result + +impl core::ops::deref::Deref for vortex_array::arrays::patched::PatchedArray + +pub type vortex_array::arrays::patched::PatchedArray::Target = dyn vortex_array::DynArray + +pub fn vortex_array::arrays::patched::PatchedArray::deref(&self) -> &Self::Target + +impl vortex_array::IntoArray for vortex_array::arrays::patched::PatchedArray + +pub fn vortex_array::arrays::patched::PatchedArray::into_array(self) -> vortex_array::ArrayRef + pub struct vortex_array::arrays::Primitive impl vortex_array::arrays::Primitive @@ -17262,6 +17572,10 @@ impl vortex_array::scalar_fn::fns::binary::CompareKernel for vortex_array::array pub fn vortex_array::arrays::dict::Dict::compare(lhs: &vortex_array::arrays::dict::DictArray, rhs: &vortex_array::ArrayRef, operator: vortex_array::scalar_fn::fns::operators::CompareOperator, ctx: &mut vortex_array::ExecutionCtx) -> vortex_error::VortexResult> +impl vortex_array::scalar_fn::fns::binary::CompareKernel for vortex_array::arrays::patched::Patched + +pub fn vortex_array::arrays::patched::Patched::compare(lhs: &Self::Array, rhs: &vortex_array::ArrayRef, operator: vortex_array::scalar_fn::fns::operators::CompareOperator, ctx: &mut vortex_array::ExecutionCtx) -> vortex_error::VortexResult> + pub fn vortex_array::scalar_fn::fns::binary::and_kleene(lhs: &vortex_array::ArrayRef, rhs: &vortex_array::ArrayRef) -> vortex_error::VortexResult pub fn vortex_array::scalar_fn::fns::binary::compare_nested_arrow_arrays(lhs: &dyn arrow_array::array::Array, rhs: &dyn arrow_array::array::Array, operator: vortex_array::scalar_fn::fns::operators::CompareOperator) -> vortex_error::VortexResult @@ -22064,6 +22378,10 @@ impl vortex_array::vtable::OperationsVTable fo pub fn vortex_array::arrays::null::Null::scalar_at(_array: &vortex_array::arrays::null::NullArray, _index: usize, _ctx: &mut vortex_array::ExecutionCtx) -> vortex_error::VortexResult +impl vortex_array::vtable::OperationsVTable for vortex_array::arrays::patched::Patched + +pub fn vortex_array::arrays::patched::Patched::scalar_at(array: &vortex_array::arrays::patched::PatchedArray, index: usize) -> vortex_error::VortexResult + impl vortex_array::vtable::OperationsVTable for vortex_array::arrays::scalar_fn::ScalarFnVTable pub fn vortex_array::arrays::scalar_fn::ScalarFnVTable::scalar_at(array: &vortex_array::arrays::scalar_fn::ScalarFnArray, index: usize, _ctx: &mut vortex_array::ExecutionCtx) -> vortex_error::VortexResult @@ -23140,6 +23458,62 @@ pub fn vortex_array::arrays::null::Null::vtable(_array: &Self::Array) -> &Self pub fn vortex_array::arrays::null::Null::with_children(_array: &mut Self::Array, children: alloc::vec::Vec) -> vortex_error::VortexResult<()> +impl vortex_array::vtable::VTable for vortex_array::arrays::patched::Patched + +pub type vortex_array::arrays::patched::Patched::Array = vortex_array::arrays::patched::PatchedArray + +pub type vortex_array::arrays::patched::Patched::Metadata = vortex_array::ProstMetadata + +pub type vortex_array::arrays::patched::Patched::OperationsVTable = vortex_array::arrays::patched::Patched + +pub type vortex_array::arrays::patched::Patched::ValidityVTable = vortex_array::vtable::ValidityVTableFromChild + +pub fn vortex_array::arrays::patched::Patched::append_to_builder(array: &Self::Array, builder: &mut dyn vortex_array::builders::ArrayBuilder, ctx: &mut vortex_array::ExecutionCtx) -> vortex_error::VortexResult<()> + +pub fn vortex_array::arrays::patched::Patched::array_eq(array: &Self::Array, other: &Self::Array, precision: vortex_array::Precision) -> bool + +pub fn vortex_array::arrays::patched::Patched::array_hash(array: &Self::Array, state: &mut H, precision: vortex_array::Precision) + +pub fn vortex_array::arrays::patched::Patched::buffer(array: &Self::Array, idx: usize) -> vortex_array::buffer::BufferHandle + +pub fn vortex_array::arrays::patched::Patched::buffer_name(_array: &Self::Array, idx: usize) -> core::option::Option + +pub fn vortex_array::arrays::patched::Patched::build(dtype: &vortex_array::dtype::DType, len: usize, metadata: &Self::Metadata, buffers: &[vortex_array::buffer::BufferHandle], children: &dyn vortex_array::serde::ArrayChildren) -> vortex_error::VortexResult + +pub fn vortex_array::arrays::patched::Patched::child(array: &Self::Array, idx: usize) -> vortex_array::ArrayRef + +pub fn vortex_array::arrays::patched::Patched::child_name(_array: &Self::Array, idx: usize) -> alloc::string::String + +pub fn vortex_array::arrays::patched::Patched::deserialize(bytes: &[u8], _dtype: &vortex_array::dtype::DType, _len: usize, _buffers: &[vortex_array::buffer::BufferHandle], _session: &vortex_session::VortexSession) -> vortex_error::VortexResult + +pub fn vortex_array::arrays::patched::Patched::dtype(array: &Self::Array) -> &vortex_array::dtype::DType + +pub fn vortex_array::arrays::patched::Patched::execute(array: alloc::sync::Arc, ctx: &mut vortex_array::ExecutionCtx) -> vortex_error::VortexResult + +pub fn vortex_array::arrays::patched::Patched::execute_parent(array: &Self::Array, parent: &vortex_array::ArrayRef, child_idx: usize, ctx: &mut vortex_array::ExecutionCtx) -> vortex_error::VortexResult> + +pub fn vortex_array::arrays::patched::Patched::id(&self) -> vortex_array::vtable::ArrayId + +pub fn vortex_array::arrays::patched::Patched::len(array: &Self::Array) -> usize + +pub fn vortex_array::arrays::patched::Patched::metadata(array: &Self::Array) -> vortex_error::VortexResult + +pub fn vortex_array::arrays::patched::Patched::nbuffers(_array: &Self::Array) -> usize + +pub fn vortex_array::arrays::patched::Patched::nchildren(_array: &Self::Array) -> usize + +pub fn vortex_array::arrays::patched::Patched::reduce(array: &Self::Array) -> vortex_error::VortexResult> + +pub fn vortex_array::arrays::patched::Patched::reduce_parent(array: &Self::Array, parent: &vortex_array::ArrayRef, child_idx: usize) -> vortex_error::VortexResult> + +pub fn vortex_array::arrays::patched::Patched::serialize(_metadata: Self::Metadata) -> vortex_error::VortexResult>> + +pub fn vortex_array::arrays::patched::Patched::stats(array: &Self::Array) -> vortex_array::stats::StatsSetRef<'_> + +pub fn vortex_array::arrays::patched::Patched::vtable(_array: &Self::Array) -> &Self + +pub fn vortex_array::arrays::patched::Patched::with_children(array: &mut Self::Array, children: alloc::vec::Vec) -> vortex_error::VortexResult<()> + impl vortex_array::vtable::VTable for vortex_array::arrays::scalar_fn::ScalarFnVTable pub type vortex_array::arrays::scalar_fn::ScalarFnVTable::Array = vortex_array::arrays::scalar_fn::ScalarFnArray @@ -23260,6 +23634,10 @@ impl vortex_array::vtable::ValidityChild for vo pub fn vortex_array::arrays::Extension::validity_child(array: &vortex_array::arrays::ExtensionArray) -> &vortex_array::ArrayRef +impl vortex_array::vtable::ValidityChild for vortex_array::arrays::patched::Patched + +pub fn vortex_array::arrays::patched::Patched::validity_child(array: &vortex_array::arrays::patched::PatchedArray) -> &vortex_array::ArrayRef + pub trait vortex_array::vtable::ValidityChildSliceHelper pub fn vortex_array::vtable::ValidityChildSliceHelper::sliced_child_array(&self) -> vortex_error::VortexResult @@ -24508,6 +24886,10 @@ impl vortex_array::IntoArray for vortex_array::arrays::null::NullArray pub fn vortex_array::arrays::null::NullArray::into_array(self) -> vortex_array::ArrayRef +impl vortex_array::IntoArray for vortex_array::arrays::patched::PatchedArray + +pub fn vortex_array::arrays::patched::PatchedArray::into_array(self) -> vortex_array::ArrayRef + impl vortex_array::IntoArray for vortex_array::arrays::scalar_fn::ScalarFnArray pub fn vortex_array::arrays::scalar_fn::ScalarFnArray::into_array(self) -> vortex_array::ArrayRef diff --git a/vortex-array/src/arrays/patched/array.rs b/vortex-array/src/arrays/patched/array.rs index 9f96f3c5c9f..8a6c3e9de17 100644 --- a/vortex-array/src/arrays/patched/array.rs +++ b/vortex-array/src/arrays/patched/array.rs @@ -216,7 +216,7 @@ fn transpose( lane_offsets[index] += lane_offsets[index - 1]; } - // Loop over patches, writing thme to final positions + // Loop over patches, writing them to final positions let indices_out = indices_buffer.spare_capacity_mut(); let values_out = values_buffer.spare_capacity_mut(); for (index, &value) in std::iter::zip(indices_in, values_in) { diff --git a/vortex-array/src/arrays/patched/vtable/mod.rs b/vortex-array/src/arrays/patched/vtable/mod.rs index bcc87d869ce..edfb01fc101 100644 --- a/vortex-array/src/arrays/patched/vtable/mod.rs +++ b/vortex-array/src/arrays/patched/vtable/mod.rs @@ -7,6 +7,7 @@ mod slice; use std::hash::Hash; use std::hash::Hasher; +use std::sync::Arc; use vortex_buffer::Buffer; use vortex_error::VortexResult; @@ -22,7 +23,7 @@ use crate::Canonical; use crate::DeserializeMetadata; use crate::DynArray; use crate::ExecutionCtx; -use crate::ExecutionStep; +use crate::ExecutionResult; use crate::IntoArray; use crate::Precision; use crate::ProstMetadata; @@ -47,7 +48,7 @@ use crate::vtable::ValidityVTableFromChild; vtable!(Patched); -#[derive(Debug)] +#[derive(Clone, Debug)] pub struct Patched; impl ValidityChild for Patched { @@ -68,7 +69,11 @@ impl VTable for Patched { type OperationsVTable = Self; type ValidityVTable = ValidityVTableFromChild; - fn id(_array: &Self::Array) -> ArrayId { + fn vtable(_array: &Self::Array) -> &Self { + &Patched + } + + fn id(&self) -> ArrayId { ArrayId::new_ref("vortex.patched") } @@ -210,7 +215,7 @@ impl VTable for Patched { Ok(()) } - fn execute(array: &Self::Array, ctx: &mut ExecutionCtx) -> VortexResult { + fn execute(array: Arc, ctx: &mut ExecutionCtx) -> VortexResult { let inner = array .inner .clone() @@ -253,7 +258,7 @@ impl VTable for Patched { PrimitiveArray::from_byte_buffer(output.into_byte_buffer(), ptype, validity) }); - Ok(ExecutionStep::done(patched_values.into_array())) + Ok(ExecutionResult::done(patched_values.into_array())) } fn execute_parent( diff --git a/vortex-buffer/public-api.lock b/vortex-buffer/public-api.lock index 29b941c77d4..f3b52183e39 100644 --- a/vortex-buffer/public-api.lock +++ b/vortex-buffer/public-api.lock @@ -550,6 +550,10 @@ pub fn vortex_buffer::Buffer::from_arrow_buffer(arrow: arrow_buffer::buffer: pub fn vortex_buffer::Buffer::into_arrow_buffer(self) -> arrow_buffer::buffer::immutable::Buffer +impl vortex_buffer::Buffer + +pub fn vortex_buffer::Buffer::reinterpret(&self) -> &[V] + impl vortex_buffer::Buffer pub fn vortex_buffer::Buffer::from_arrow_scalar_buffer(arrow: arrow_buffer::buffer::scalar::ScalarBuffer) -> Self From 5a0a4791f91ccb426a6d9118565b824d8332114d Mon Sep 17 00:00:00 2001 From: Andrew Duffy Date: Wed, 18 Mar 2026 17:36:19 -0400 Subject: [PATCH 05/23] actually make the kernel get used Signed-off-by: Andrew Duffy --- vortex-array/src/arrays/patched/vtable/kernels.rs | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/vortex-array/src/arrays/patched/vtable/kernels.rs b/vortex-array/src/arrays/patched/vtable/kernels.rs index 2c60ec4a20f..7994b19e02e 100644 --- a/vortex-array/src/arrays/patched/vtable/kernels.rs +++ b/vortex-array/src/arrays/patched/vtable/kernels.rs @@ -2,8 +2,11 @@ // SPDX-FileCopyrightText: Copyright the Vortex contributors use crate::arrays::Patched; +use crate::arrays::dict::TakeExecuteAdaptor; use crate::kernel::ParentKernelSet; use crate::scalar_fn::fns::binary::CompareExecuteAdaptor; -pub(super) const PARENT_KERNELS: ParentKernelSet = - ParentKernelSet::new(&[ParentKernelSet::lift(&CompareExecuteAdaptor(Patched))]); +pub(super) const PARENT_KERNELS: ParentKernelSet = ParentKernelSet::new(&[ + ParentKernelSet::lift(&CompareExecuteAdaptor(Patched)), + ParentKernelSet::lift(&TakeExecuteAdaptor(Patched)), +]); From 44eb41d112f92b9f81be0723520805f875c5fff3 Mon Sep 17 00:00:00 2001 From: Andrew Duffy Date: Wed, 18 Mar 2026 21:14:21 -0400 Subject: [PATCH 06/23] fix tests Signed-off-by: Andrew Duffy --- .../src/arrays/patched/compute/take.rs | 18 ++++++++++++------ 1 file changed, 12 insertions(+), 6 deletions(-) diff --git a/vortex-array/src/arrays/patched/compute/take.rs b/vortex-array/src/arrays/patched/compute/take.rs index 3db64313be4..92b81f81c02 100644 --- a/vortex-array/src/arrays/patched/compute/take.rs +++ b/vortex-array/src/arrays/patched/compute/take.rs @@ -101,10 +101,10 @@ fn take_map( // Now, iterate the take indices using the prebuilt hashmap. // Undefined/null indices will miss the hash map, which we can ignore. - for index in indices { + for (output_index, index) in indices.iter().enumerate() { let index = index.as_(); if let Some(&patch_value) = index_map.get(&index) { - output[index] = patch_value; + output[output_index] = patch_value; } } } @@ -150,7 +150,7 @@ mod tests { // Take indices [0, 1, 2, 3, 4] - should get [0, 10, 0, 30, 0] let indices = buffer![0u32, 1, 2, 3, 4].into_array(); - let result = array.take(indices)?; + let result = array.take(indices)?.to_canonical()?.into_array(); let expected = PrimitiveArray::from_iter([0u16, 10, 0, 30, 0]).into_array(); assert_arrays_eq!(expected, result); @@ -165,7 +165,7 @@ mod tests { // Take indices in reverse order let indices = buffer![4u32, 3, 2, 1, 0].into_array(); - let result = array.take(indices)?; + let result = array.take(indices)?.to_canonical()?.into_array(); let expected = PrimitiveArray::from_iter([0u16, 30, 0, 10, 0]).into_array(); assert_arrays_eq!(expected, result); @@ -180,7 +180,10 @@ mod tests { // Take the same patched index multiple times let indices = buffer![2u32, 2, 0, 2].into_array(); - let result = array.take(indices)?; + let result = array.take(indices)?.to_canonical()?.into_array(); + + // execute the array. + let _canonical = result.to_canonical()?.into_primitive(); let expected = PrimitiveArray::from_iter([99u16, 99, 0, 99]).into_array(); assert_arrays_eq!(expected, result); @@ -211,7 +214,10 @@ mod tests { .into_array(), ), ); - let result = array.take(indices.into_array())?; + let result = array + .take(indices.into_array())? + .to_canonical()? + .into_array(); // Expected: [0, 20, null, 50, 80, null, 50, 80, null, 0] let expected = PrimitiveArray::new( From e26e0654d26c3e6a4f29bf644a22cd2bb90c0039 Mon Sep 17 00:00:00 2001 From: Andrew Duffy Date: Thu, 19 Mar 2026 11:28:29 -0400 Subject: [PATCH 07/23] use child for values instead of buffer Signed-off-by: Andrew Duffy --- vortex-array/src/arrays/patched/array.rs | 32 +++++++++---- .../src/arrays/patched/compute/compare.rs | 12 ++++- .../src/arrays/patched/compute/take.rs | 8 +++- vortex-array/src/arrays/patched/mod.rs | 29 +++++++---- vortex-array/src/arrays/patched/vtable/mod.rs | 48 ++++++++++--------- .../src/arrays/patched/vtable/operations.rs | 27 +++++------ .../src/arrays/patched/vtable/slice.rs | 2 +- 7 files changed, 99 insertions(+), 59 deletions(-) diff --git a/vortex-array/src/arrays/patched/array.rs b/vortex-array/src/arrays/patched/array.rs index 8a6c3e9de17..4bf26dafc17 100644 --- a/vortex-array/src/arrays/patched/array.rs +++ b/vortex-array/src/arrays/patched/array.rs @@ -12,17 +12,19 @@ use crate::ArrayRef; use crate::Canonical; use crate::DynArray; use crate::ExecutionCtx; +use crate::IntoArray; +use crate::arrays::PrimitiveArray; use crate::arrays::patched::PatchAccessor; use crate::arrays::patched::TransposedPatches; use crate::arrays::patched::patch_lanes; use crate::buffer::BufferHandle; use crate::dtype::IntegerPType; use crate::dtype::NativePType; -use crate::dtype::PType; use crate::match_each_native_ptype; use crate::match_each_unsigned_integer_ptype; use crate::patches::Patches; use crate::stats::ArrayStats; +use crate::validity::Validity; /// An array that partially "patches" another array with new values. /// @@ -50,14 +52,17 @@ pub struct PatchedArray { /// indices within a 1024-element chunk. The PType of these MUST be u16 pub(super) indices: BufferHandle, /// patch values corresponding to the indices. The ptype is specified by `values_ptype`. - pub(super) values: BufferHandle, - /// PType of the scalars in `values`. Can be any native type. - pub(super) values_ptype: PType, + pub(super) values: ArrayRef, pub(super) stats_set: ArrayStats, } impl PatchedArray { + /// Create a new `PatchedArray` from a child array and a set of [`Patches`]. + /// + /// # Errors + /// + /// The `inner` array must be primitive type, and it must have the same DType as the patches. pub fn from_array_and_patches( inner: ArrayRef, patches: &Patches, @@ -68,6 +73,11 @@ impl PatchedArray { "array DType must match patches DType" ); + vortex_ensure!( + inner.dtype().is_primitive(), + "Creating PatchedArray from Patches only supported for primitive arrays" + ); + let values_ptype = patches.dtype().as_ptype(); let TransposedPatches { @@ -80,27 +90,32 @@ impl PatchedArray { let len = inner.len(); + let values = PrimitiveArray::from_buffer_handle( + BufferHandle::new_host(values), + values_ptype, + Validity::NonNullable, + ) + .into_array(); + Ok(Self { inner, n_chunks, n_lanes, - values_ptype, offset: 0, len, lane_offsets: BufferHandle::new_host(lane_offsets), indices: BufferHandle::new_host(indices), - values: BufferHandle::new_host(values), + values, stats_set: ArrayStats::default(), }) } /// Get an accessor, which allows ranged access to patches by chunk/lane. - pub fn accessor(&self) -> PatchAccessor<'_, V> { + pub fn accessor(&self) -> PatchAccessor<'_> { PatchAccessor { n_lanes: self.n_lanes, lane_offsets: self.lane_offsets.as_host().reinterpret::(), indices: self.indices.as_host().reinterpret::(), - values: self.values.as_host().reinterpret::(), } } @@ -133,7 +148,6 @@ impl PatchedArray { len, indices, values, - values_ptype: self.values_ptype, lane_offsets: sliced_lane_offsets, stats_set: ArrayStats::default(), }) diff --git a/vortex-array/src/arrays/patched/compute/compare.rs b/vortex-array/src/arrays/patched/compute/compare.rs index d1932ed1e44..51eeb63a432 100644 --- a/vortex-array/src/arrays/patched/compute/compare.rs +++ b/vortex-array/src/arrays/patched/compute/compare.rs @@ -12,6 +12,7 @@ use crate::IntoArray; use crate::arrays::BoolArray; use crate::arrays::ConstantArray; use crate::arrays::Patched; +use crate::arrays::PrimitiveArray; use crate::arrays::bool::BoolArrayParts; use crate::arrays::patched::patch_lanes; use crate::arrays::primitive::NativeValue; @@ -28,6 +29,12 @@ impl CompareKernel for Patched { operator: CompareOperator, ctx: &mut ExecutionCtx, ) -> VortexResult> { + // We only accelerate comparisons for primitives + if !lhs.dtype().is_primitive() { + return Ok(None); + } + + // We only accelerate comparisons against constants let Some(constant) = rhs.as_constant() else { return Ok(None); }; @@ -87,9 +94,10 @@ impl CompareKernel for Patched { let lane_offsets = lhs.lane_offsets.as_host().reinterpret::(); let indices = lhs.indices.as_host().reinterpret::(); + let values = lhs.values.clone().execute::(ctx)?; - match_each_native_ptype!(lhs.values_ptype, |V| { - let values = lhs.values.as_host().reinterpret::(); + match_each_native_ptype!(values.ptype(), |V| { + let values = values.as_slice::(); let constant = constant .as_primitive() .as_::() diff --git a/vortex-array/src/arrays/patched/compute/take.rs b/vortex-array/src/arrays/patched/compute/take.rs index 92b81f81c02..eee1a4c898a 100644 --- a/vortex-array/src/arrays/patched/compute/take.rs +++ b/vortex-array/src/arrays/patched/compute/take.rs @@ -24,6 +24,11 @@ impl TakeExecute for Patched { indices: &ArrayRef, ctx: &mut ExecutionCtx, ) -> VortexResult> { + // Only pushdown take when we have primitive types. + if !array.dtype().is_primitive() { + return Ok(None); + } + // Perform take on the inner array, including the placeholders. let inner = array .inner @@ -41,6 +46,7 @@ impl TakeExecute for Patched { match_each_unsigned_integer_ptype!(indices_ptype, |I| { match_each_native_ptype!(ptype, |V| { let indices = indices.clone().execute::(ctx)?; + let values = array.values.clone().execute::(ctx)?; let mut output = Buffer::::from_byte_buffer(buffer.unwrap_host()).into_mut(); take_map( output.as_mut(), @@ -51,7 +57,7 @@ impl TakeExecute for Patched { array.n_lanes, array.lane_offsets.as_host().reinterpret::(), array.indices.as_host().reinterpret::(), - array.values.as_host().reinterpret::(), + values.as_slice::(), ); // SAFETY: output and validity still have same length after take_map returns. diff --git a/vortex-array/src/arrays/patched/mod.rs b/vortex-array/src/arrays/patched/mod.rs index f035204c188..d37f15419b9 100644 --- a/vortex-array/src/arrays/patched/mod.rs +++ b/vortex-array/src/arrays/patched/mod.rs @@ -32,23 +32,34 @@ const fn patch_lanes() -> usize { if size_of::() < 8 { 32 } else { 16 } } -pub struct PatchAccessor<'a, V> { +pub struct PatchAccessor<'a> { n_lanes: usize, lane_offsets: &'a [u32], indices: &'a [u16], - values: &'a [V], } -impl<'a, V: Sized> PatchAccessor<'a, V> { - /// Access the patches for a particular lane - pub fn access(&'a self, chunk: usize, lane: usize) -> LanePatches<'a, V> { +pub struct PatchOffset { + /// Global offset into the list of patches. These are some of the + pub index: usize, + /// This is the value stored in the `indices` buffer, which encodes the offset of the `index`-th + /// patch + pub chunk_offset: u16, +} + +impl<'a> PatchAccessor<'a> { + /// Get an iterator over indices and values offsets. + /// + /// The first component is the index into the `indices` and `values`, and the second component + /// is the set of values instead here...I think? + pub fn offsets_iter( + &self, + chunk: usize, + lane: usize, + ) -> impl Iterator + '_ { let start = self.lane_offsets[chunk * self.n_lanes + lane] as usize; let stop = self.lane_offsets[chunk * self.n_lanes + lane + 1] as usize; - LanePatches { - indices: &self.indices[start..stop], - values: &self.values[start..stop], - } + std::iter::zip(start..stop, self.indices[start..stop].iter().copied()) } } diff --git a/vortex-array/src/arrays/patched/vtable/mod.rs b/vortex-array/src/arrays/patched/vtable/mod.rs index edfb01fc101..3219e3733d1 100644 --- a/vortex-array/src/arrays/patched/vtable/mod.rs +++ b/vortex-array/src/arrays/patched/vtable/mod.rs @@ -91,7 +91,6 @@ impl VTable for Patched { fn array_hash(array: &Self::Array, state: &mut H, precision: Precision) { array.inner.array_hash(state, precision); - array.values_ptype.hash(state); array.n_chunks.hash(state); array.n_lanes.hash(state); array.lane_offsets.array_hash(state, precision); @@ -102,7 +101,6 @@ impl VTable for Patched { fn array_eq(array: &Self::Array, other: &Self::Array, precision: Precision) -> bool { array.n_chunks == other.n_chunks && array.n_lanes == other.n_lanes - && array.values_ptype == other.values_ptype && array.inner.array_eq(&other.inner, precision) && array.lane_offsets.array_eq(&other.lane_offsets, precision) && array.indices.array_eq(&other.indices, precision) @@ -117,7 +115,6 @@ impl VTable for Patched { match idx { 0 => array.lane_offsets.clone(), 1 => array.indices.clone(), - 2 => array.values.clone(), _ => vortex_panic!("invalid buffer index for PatchedArray: {idx}"), } } @@ -126,28 +123,27 @@ impl VTable for Patched { match idx { 0 => Some("lane_offsets".to_string()), 1 => Some("patch_indices".to_string()), - 2 => Some("patch_values".to_string()), _ => vortex_panic!("invalid buffer index for PatchedArray: {idx}"), } } fn nchildren(_array: &Self::Array) -> usize { - 1 + 2 } fn child(array: &Self::Array, idx: usize) -> ArrayRef { - if idx == 0 { - array.inner.clone() - } else { - vortex_panic!("invalid child index for PatchedArray: {idx}"); + match idx { + 0 => array.inner.clone(), + 1 => array.values.clone(), + _ => vortex_panic!("invalid buffer index for PatchedArray: {idx}"), } } fn child_name(_array: &Self::Array, idx: usize) -> String { - if idx == 0 { - "inner".to_string() - } else { - vortex_panic!("invalid child index for PatchedArray: {idx}"); + match idx { + 0 => "inner".to_string(), + 1 => "patch_values".to_string(), + _ => vortex_panic!("invalid buffer index for PatchedArray: {idx}"), } } @@ -186,10 +182,14 @@ impl VTable for Patched { let n_lanes = match_each_native_ptype!(dtype.as_ptype(), |P| { patch_lanes::

() }); - let &[lane_offsets, indices, values] = &buffers else { + let &[lane_offsets, indices] = &buffers else { vortex_bail!("invalid buffer count for PatchedArray"); }; + // values and indices should have same len. + let expected_len = indices.as_host().reinterpret::().len(); + let values = children.get(1, dtype, expected_len)?; + Ok(PatchedArray { inner, n_chunks, @@ -198,19 +198,19 @@ impl VTable for Patched { len, lane_offsets: lane_offsets.clone(), indices: indices.clone(), - values: values.clone(), - values_ptype: dtype.as_ptype(), + values, stats_set: ArrayStats::default(), }) } fn with_children(array: &mut Self::Array, mut children: Vec) -> VortexResult<()> { vortex_ensure!( - children.len() == 1, - "PatchedArray must have exactly 1 child" + children.len() == 2, + "PatchedArray must have exactly 2 children" ); array.inner = children.remove(0); + array.values = children.remove(0); Ok(()) } @@ -231,15 +231,17 @@ impl VTable for Patched { let lane_offsets: Buffer = Buffer::from_byte_buffer(array.lane_offsets.clone().unwrap_host()); let indices: Buffer = Buffer::from_byte_buffer(array.indices.clone().unwrap_host()); + let values = array.values.clone().execute::(ctx)?; + + // TODO(aduffy): add support for non-primitive PatchedArray patches application. - let patched_values = match_each_native_ptype!(array.values_ptype, |V| { + let patched_values = match_each_native_ptype!(values.ptype(), |V| { let mut output = Buffer::::from_byte_buffer(buffer.unwrap_host()).into_mut(); - let values: Buffer = Buffer::from_byte_buffer(array.values.clone().unwrap_host()); let offset = array.offset; let len = array.len; - apply::( + apply_patches_primitive::( &mut output, offset, len, @@ -247,7 +249,7 @@ impl VTable for Patched { array.n_lanes, &lane_offsets, &indices, - &values, + values.as_slice::(), ); // The output will always be aligned to a chunk boundary, we apply the offset/len @@ -281,7 +283,7 @@ impl VTable for Patched { /// Apply patches on top of the existing value types. #[allow(clippy::too_many_arguments)] -fn apply( +fn apply_patches_primitive( output: &mut [V], offset: usize, len: usize, diff --git a/vortex-array/src/arrays/patched/vtable/operations.rs b/vortex-array/src/arrays/patched/vtable/operations.rs index ddf5dcec590..7f24e5fed9f 100644 --- a/vortex-array/src/arrays/patched/vtable/operations.rs +++ b/vortex-array/src/arrays/patched/vtable/operations.rs @@ -7,6 +7,7 @@ use crate::DynArray; use crate::arrays::patched::Patched; use crate::arrays::patched::PatchedArray; use crate::arrays::patched::patch_lanes; +use crate::dtype::PType; use crate::match_each_native_ptype; use crate::scalar::Scalar; use crate::vtable::OperationsVTable; @@ -17,21 +18,19 @@ impl OperationsVTable for Patched { let chunk = index / 1024; #[allow(clippy::cast_possible_truncation)] let chunk_index = (index % 1024) as u16; - match_each_native_ptype!(array.values_ptype, |V| { - let lane = index % patch_lanes::(); - let accessor = array.accessor::(); - let patches = accessor.access(chunk, lane); - // NOTE: we do linear scan as lane has <= 32 patches, binary search would likely - // be slower. - for (patch_index, patch_value) in patches.iter() { - if patch_index == chunk_index { - return Ok(Scalar::primitive( - patch_value, - array.inner.dtype().nullability(), - )); - } + + let values_ptype = PType::try_from(array.dtype())?; + + let lane = match_each_native_ptype!(values_ptype, |V| { index % patch_lanes::() }); + let accessor = array.accessor(); + + // NOTE: we do linear scan as lane has <= 32 patches, binary search would likely + // be slower. + for (index, patch_index) in accessor.offsets_iter(chunk, lane) { + if patch_index == chunk_index { + return array.values.scalar_at(index); } - }); + } // Otherwise, access the underlying value. array.inner.scalar_at(index) diff --git a/vortex-array/src/arrays/patched/vtable/slice.rs b/vortex-array/src/arrays/patched/vtable/slice.rs index 99d04666d5f..b67b68ffe0f 100644 --- a/vortex-array/src/arrays/patched/vtable/slice.rs +++ b/vortex-array/src/arrays/patched/vtable/slice.rs @@ -45,7 +45,6 @@ impl SliceReduce for Patched { lane_offsets: sliced_lane_offsets, indices: array.indices.clone(), values: array.values.clone(), - values_ptype: array.values_ptype, stats_set: ArrayStats::default(), } .into_array(), @@ -93,6 +92,7 @@ mod tests { @r#" root: vortex.patched(u16, len=9) inner: vortex.primitive(u16, len=512) + patch_values: vortex.primitive(u16, len=3) "#); let executed = sliced.execute::(&mut ctx)?.into_primitive(); From 3d8335a0861246a877b7ca93b40946ba0ddd97cb Mon Sep 17 00:00:00 2001 From: Andrew Duffy Date: Mon, 23 Mar 2026 16:22:27 -0400 Subject: [PATCH 08/23] cast in scalar_at Signed-off-by: Andrew Duffy --- vortex-array/src/arrays/patched/array.rs | 2 -- vortex-array/src/arrays/patched/vtable/operations.rs | 2 +- 2 files changed, 1 insertion(+), 3 deletions(-) diff --git a/vortex-array/src/arrays/patched/array.rs b/vortex-array/src/arrays/patched/array.rs index 4bf26dafc17..ca6d7205515 100644 --- a/vortex-array/src/arrays/patched/array.rs +++ b/vortex-array/src/arrays/patched/array.rs @@ -27,8 +27,6 @@ use crate::stats::ArrayStats; use crate::validity::Validity; /// An array that partially "patches" another array with new values. -/// -/// Patched arrays implement the set of nodes that do this instead here...I think? #[derive(Debug, Clone)] pub struct PatchedArray { /// The inner array that is being patched. This is the zeroth child. diff --git a/vortex-array/src/arrays/patched/vtable/operations.rs b/vortex-array/src/arrays/patched/vtable/operations.rs index 7f24e5fed9f..72e4186f111 100644 --- a/vortex-array/src/arrays/patched/vtable/operations.rs +++ b/vortex-array/src/arrays/patched/vtable/operations.rs @@ -28,7 +28,7 @@ impl OperationsVTable for Patched { // be slower. for (index, patch_index) in accessor.offsets_iter(chunk, lane) { if patch_index == chunk_index { - return array.values.scalar_at(index); + return array.values.scalar_at(index)?.cast(array.dtype()); } } From 24f5207e624c9699924edb59a87d22c2aee685db Mon Sep 17 00:00:00 2001 From: Andrew Duffy Date: Tue, 24 Mar 2026 14:57:51 -0400 Subject: [PATCH 09/23] implement append_to_builder for Patched We call append_to_builder on the inner first, then just do a single pass and overwrite what it just wrote. Signed-off-by: Andrew Duffy --- .../src/arrays/patched/compute/compare.rs | 3 +- vortex-array/src/arrays/patched/vtable/mod.rs | 141 ++++++++++++++++++ vortex-array/src/builders/primitive.rs | 5 + 3 files changed, 147 insertions(+), 2 deletions(-) diff --git a/vortex-array/src/arrays/patched/compute/compare.rs b/vortex-array/src/arrays/patched/compute/compare.rs index 51eeb63a432..7f7aa8541ff 100644 --- a/vortex-array/src/arrays/patched/compute/compare.rs +++ b/vortex-array/src/arrays/patched/compute/compare.rs @@ -167,8 +167,7 @@ impl CompareKernel for Patched { } }); - // SAFETY: thing - let result = unsafe { BoolArray::new_unchecked(bits.freeze(), validity) }; + let result = BoolArray::new(bits.freeze(), validity); Ok(Some(result.into_array())) } } diff --git a/vortex-array/src/arrays/patched/vtable/mod.rs b/vortex-array/src/arrays/patched/vtable/mod.rs index 3219e3733d1..a609c4cb8bc 100644 --- a/vortex-array/src/arrays/patched/vtable/mod.rs +++ b/vortex-array/src/arrays/patched/vtable/mod.rs @@ -10,6 +10,7 @@ use std::hash::Hasher; use std::sync::Arc; use vortex_buffer::Buffer; +use vortex_error::VortexExpect; use vortex_error::VortexResult; use vortex_error::vortex_bail; use vortex_error::vortex_ensure; @@ -34,6 +35,8 @@ use crate::arrays::patched::patch_lanes; use crate::arrays::patched::vtable::kernels::PARENT_KERNELS; use crate::arrays::primitive::PrimitiveArrayParts; use crate::buffer::BufferHandle; +use crate::builders::ArrayBuilder; +use crate::builders::PrimitiveBuilder; use crate::dtype::DType; use crate::dtype::NativePType; use crate::match_each_native_ptype; @@ -169,6 +172,61 @@ impl VTable for Patched { Ok(ProstMetadata(inner)) } + fn append_to_builder( + array: &Self::Array, + builder: &mut dyn ArrayBuilder, + ctx: &mut ExecutionCtx, + ) -> VortexResult<()> { + let dtype = array.dtype(); + + if !dtype.is_primitive() { + // Default pathway: canonicalize and propagate. + let canonical = array + .clone() + .into_array() + .execute::(ctx)? + .into_array(); + builder.extend_from_array(&canonical); + return Ok(()); + } + + let ptype = dtype.as_ptype(); + + let len = array.len(); + array.inner.append_to_builder(builder, ctx)?; + + let offset = array.offset; + let lane_offsets: Buffer = + Buffer::from_byte_buffer(array.lane_offsets.clone().unwrap_host()); + let indices: Buffer = Buffer::from_byte_buffer(array.indices.clone().unwrap_host()); + let values = array.values.clone().execute::(ctx)?; + + match_each_native_ptype!(ptype, |V| { + let typed_builder = builder + .as_any_mut() + .downcast_mut::>() + .vortex_expect("correctly typed builder"); + + // Overwrite the last `len` elements of the builder. These would have been + // populated by the inner.append_to_builder() call above. + let output = typed_builder.values_mut(); + let trailer = output.len() - len; + + apply_patches_primitive::( + &mut output[trailer..], + offset, + len, + array.n_chunks, + array.n_lanes, + &lane_offsets, + &indices, + values.as_slice::(), + ); + }); + + Ok(()) + } + fn build( dtype: &DType, len: usize, @@ -320,9 +378,13 @@ mod tests { use crate::ExecutionCtx; use crate::IntoArray; use crate::arrays::PatchedArray; + use crate::arrays::PrimitiveArray; + use crate::assert_arrays_eq; + use crate::builders::builder_with_capacity; use crate::dtype::Nullability; use crate::patches::Patches; use crate::scalar::Scalar; + use crate::validity::Validity; #[test] fn test_execute() { @@ -393,4 +455,83 @@ mod tests { Scalar::primitive(1u16, Nullability::NonNullable) ); } + + #[test] + fn test_append_to_builder_non_nullable() { + let values = PrimitiveArray::new(buffer![0u16; 1024], Validity::NonNullable).into_array(); + let patches = Patches::new( + 1024, + 0, + buffer![1u32, 2, 3].into_array(), + buffer![10u16, 20, 30].into_array(), + None, + ) + .unwrap(); + + let session = VortexSession::empty(); + let mut ctx = ExecutionCtx::new(session); + + let array = PatchedArray::from_array_and_patches(values, &patches, &mut ctx) + .unwrap() + .into_array(); + + let mut builder = builder_with_capacity(array.dtype(), array.len()); + array.append_to_builder(builder.as_mut(), &mut ctx).unwrap(); + + let result = builder.finish(); + + let mut expected = buffer_mut![0u16; 1024]; + expected[1] = 10; + expected[2] = 20; + expected[3] = 30; + let expected = expected.into_array(); + + assert_arrays_eq!(expected, result); + } + + #[test] + fn test_append_to_builder_with_validity() { + // Create inner array with nulls at indices 0 and 5. + let validity = Validity::from_iter((0..10).map(|i| i != 0 && i != 5)); + let values = PrimitiveArray::new(buffer![0u16; 10], validity).into_array(); + + // Apply patches at indices 1, 2, 3. + let patches = Patches::new( + 10, + 0, + buffer![1u32, 2, 3].into_array(), + buffer![10u16, 20, 30].into_array(), + None, + ) + .unwrap(); + + let session = VortexSession::empty(); + let mut ctx = ExecutionCtx::new(session); + + let array = PatchedArray::from_array_and_patches(values, &patches, &mut ctx) + .unwrap() + .into_array(); + + let mut builder = builder_with_capacity(array.dtype(), array.len()); + array.append_to_builder(builder.as_mut(), &mut ctx).unwrap(); + + let result = builder.finish(); + + // Expected: null at 0, patched 10/20/30 at 1/2/3, zero at 4, null at 5, zeros at 6-9. + let expected = PrimitiveArray::from_option_iter([ + None, + Some(10u16), + Some(20), + Some(30), + Some(0), + None, + Some(0), + Some(0), + Some(0), + Some(0), + ]) + .into_array(); + + assert_arrays_eq!(expected, result); + } } diff --git a/vortex-array/src/builders/primitive.rs b/vortex-array/src/builders/primitive.rs index 4070a22e70d..f486cadc83f 100644 --- a/vortex-array/src/builders/primitive.rs +++ b/vortex-array/src/builders/primitive.rs @@ -62,6 +62,11 @@ impl PrimitiveBuilder { self.values.as_ref() } + /// Returns the raw primitive values in this builder as a mutable slice. + pub fn values_mut(&mut self) -> &mut [T] { + self.values.as_mut() + } + /// Create a new handle to the next `len` uninitialized values in the builder. /// /// All reads/writes through the handle to the values buffer or the validity buffer will operate From 362b2e918ba40e7c7f938dfd7a2fb69e746c3ff1 Mon Sep 17 00:00:00 2001 From: Andrew Duffy Date: Wed, 25 Mar 2026 16:06:38 -0400 Subject: [PATCH 10/23] cleanup Signed-off-by: Andrew Duffy --- vortex-array/src/arrays/patched/array.rs | 4 +-- vortex-array/src/arrays/patched/mod.rs | 35 ++---------------------- 2 files changed, 5 insertions(+), 34 deletions(-) diff --git a/vortex-array/src/arrays/patched/array.rs b/vortex-array/src/arrays/patched/array.rs index ca6d7205515..6384d10e13c 100644 --- a/vortex-array/src/arrays/patched/array.rs +++ b/vortex-array/src/arrays/patched/array.rs @@ -86,8 +86,6 @@ impl PatchedArray { values, } = transpose_patches(patches, ctx)?; - let len = inner.len(); - let values = PrimitiveArray::from_buffer_handle( BufferHandle::new_host(values), values_ptype, @@ -95,6 +93,8 @@ impl PatchedArray { ) .into_array(); + let len = inner.len(); + Ok(Self { inner, n_chunks, diff --git a/vortex-array/src/arrays/patched/mod.rs b/vortex-array/src/arrays/patched/mod.rs index d37f15419b9..654254d9c93 100644 --- a/vortex-array/src/arrays/patched/mod.rs +++ b/vortex-array/src/arrays/patched/mod.rs @@ -32,25 +32,18 @@ const fn patch_lanes() -> usize { if size_of::() < 8 { 32 } else { 16 } } +/// A cached accessor to the patch values. pub struct PatchAccessor<'a> { n_lanes: usize, lane_offsets: &'a [u32], indices: &'a [u16], } -pub struct PatchOffset { - /// Global offset into the list of patches. These are some of the - pub index: usize, - /// This is the value stored in the `indices` buffer, which encodes the offset of the `index`-th - /// patch - pub chunk_offset: u16, -} - impl<'a> PatchAccessor<'a> { /// Get an iterator over indices and values offsets. /// - /// The first component is the index into the `indices` and `values`, and the second component - /// is the set of values instead here...I think? + /// The first component is the index into the `indices` and `values`, and the second is + /// the datum from `indices[index]` already prefetched. pub fn offsets_iter( &self, chunk: usize, @@ -62,25 +55,3 @@ impl<'a> PatchAccessor<'a> { std::iter::zip(start..stop, self.indices[start..stop].iter().copied()) } } - -pub struct LanePatches<'a, V> { - pub indices: &'a [u16], - pub values: &'a [V], -} - -impl<'a, V: Copy> LanePatches<'a, V> { - pub fn len(&self) -> usize { - self.indices.len() - } - - pub fn is_empty(&self) -> bool { - self.indices.is_empty() - } - - pub fn iter(&self) -> impl Iterator { - self.indices - .iter() - .copied() - .zip(self.values.iter().copied()) - } -} From 2431e17c6c19d16663256a9cbc00d5a13e6e13ac Mon Sep 17 00:00:00 2001 From: Andrew Duffy Date: Thu, 26 Mar 2026 15:49:17 -0400 Subject: [PATCH 11/23] indices as child instead of buffer Signed-off-by: Andrew Duffy --- vortex-array/src/arrays/patched/array.rs | 31 +++++++++----- .../src/arrays/patched/compute/compare.rs | 3 +- .../src/arrays/patched/compute/take.rs | 7 ++-- vortex-array/src/arrays/patched/mod.rs | 24 ----------- vortex-array/src/arrays/patched/vtable/mod.rs | 41 ++++++++++--------- .../src/arrays/patched/vtable/operations.rs | 22 ++++++++-- .../src/arrays/patched/vtable/slice.rs | 1 + 7 files changed, 68 insertions(+), 61 deletions(-) diff --git a/vortex-array/src/arrays/patched/array.rs b/vortex-array/src/arrays/patched/array.rs index 6384d10e13c..5a2aa9fa03e 100644 --- a/vortex-array/src/arrays/patched/array.rs +++ b/vortex-array/src/arrays/patched/array.rs @@ -14,12 +14,12 @@ use crate::DynArray; use crate::ExecutionCtx; use crate::IntoArray; use crate::arrays::PrimitiveArray; -use crate::arrays::patched::PatchAccessor; use crate::arrays::patched::TransposedPatches; use crate::arrays::patched::patch_lanes; use crate::buffer::BufferHandle; use crate::dtype::IntegerPType; use crate::dtype::NativePType; +use crate::dtype::PType; use crate::match_each_native_ptype; use crate::match_each_unsigned_integer_ptype; use crate::patches::Patches; @@ -48,7 +48,7 @@ pub struct PatchedArray { /// lane offsets. The PType of these MUST be u32 pub(super) lane_offsets: BufferHandle, /// indices within a 1024-element chunk. The PType of these MUST be u16 - pub(super) indices: BufferHandle, + pub(super) indices: ArrayRef, /// patch values corresponding to the indices. The ptype is specified by `values_ptype`. pub(super) values: ArrayRef, @@ -86,6 +86,12 @@ impl PatchedArray { values, } = transpose_patches(patches, ctx)?; + let indices = PrimitiveArray::from_buffer_handle( + BufferHandle::new_host(indices), + PType::U16, + Validity::NonNullable, + ) + .into_array(); let values = PrimitiveArray::from_buffer_handle( BufferHandle::new_host(values), values_ptype, @@ -102,19 +108,24 @@ impl PatchedArray { offset: 0, len, lane_offsets: BufferHandle::new_host(lane_offsets), - indices: BufferHandle::new_host(indices), + indices, values, stats_set: ArrayStats::default(), }) } +} - /// Get an accessor, which allows ranged access to patches by chunk/lane. - pub fn accessor(&self) -> PatchAccessor<'_> { - PatchAccessor { - n_lanes: self.n_lanes, - lane_offsets: self.lane_offsets.as_host().reinterpret::(), - indices: self.indices.as_host().reinterpret::(), - } +impl PatchedArray { + pub(crate) fn seek_to_lane(&self, chunk: usize, lane: usize) -> Range { + assert!(chunk < self.n_chunks); + assert!(lane < self.n_lanes); + + let lane_offsets = self.lane_offsets.as_host().reinterpret::(); + + let start = lane_offsets[chunk * self.n_lanes + lane] as usize; + let stop = lane_offsets[chunk * self.n_lanes + lane + 1] as usize; + + start..stop } /// Slice the array to just the patches and inner values that are within the chunk range. diff --git a/vortex-array/src/arrays/patched/compute/compare.rs b/vortex-array/src/arrays/patched/compute/compare.rs index 7f7aa8541ff..9b9f8f938f6 100644 --- a/vortex-array/src/arrays/patched/compute/compare.rs +++ b/vortex-array/src/arrays/patched/compute/compare.rs @@ -93,10 +93,11 @@ impl CompareKernel for Patched { } let lane_offsets = lhs.lane_offsets.as_host().reinterpret::(); - let indices = lhs.indices.as_host().reinterpret::(); + let indices = lhs.indices.clone().execute::(ctx)?; let values = lhs.values.clone().execute::(ctx)?; match_each_native_ptype!(values.ptype(), |V| { + let indices = indices.as_slice::(); let values = values.as_slice::(); let constant = constant .as_primitive() diff --git a/vortex-array/src/arrays/patched/compute/take.rs b/vortex-array/src/arrays/patched/compute/take.rs index eee1a4c898a..a40881ab843 100644 --- a/vortex-array/src/arrays/patched/compute/take.rs +++ b/vortex-array/src/arrays/patched/compute/take.rs @@ -46,7 +46,8 @@ impl TakeExecute for Patched { match_each_unsigned_integer_ptype!(indices_ptype, |I| { match_each_native_ptype!(ptype, |V| { let indices = indices.clone().execute::(ctx)?; - let values = array.values.clone().execute::(ctx)?; + let patch_indices = array.indices.clone().execute::(ctx)?; + let patch_values = array.values.clone().execute::(ctx)?; let mut output = Buffer::::from_byte_buffer(buffer.unwrap_host()).into_mut(); take_map( output.as_mut(), @@ -56,8 +57,8 @@ impl TakeExecute for Patched { array.n_chunks, array.n_lanes, array.lane_offsets.as_host().reinterpret::(), - array.indices.as_host().reinterpret::(), - values.as_slice::(), + patch_indices.as_slice::(), + patch_values.as_slice::(), ); // SAFETY: output and validity still have same length after take_map returns. diff --git a/vortex-array/src/arrays/patched/mod.rs b/vortex-array/src/arrays/patched/mod.rs index 654254d9c93..32edda880c8 100644 --- a/vortex-array/src/arrays/patched/mod.rs +++ b/vortex-array/src/arrays/patched/mod.rs @@ -31,27 +31,3 @@ const fn patch_lanes() -> usize { // from shared to global memory. if size_of::() < 8 { 32 } else { 16 } } - -/// A cached accessor to the patch values. -pub struct PatchAccessor<'a> { - n_lanes: usize, - lane_offsets: &'a [u32], - indices: &'a [u16], -} - -impl<'a> PatchAccessor<'a> { - /// Get an iterator over indices and values offsets. - /// - /// The first component is the index into the `indices` and `values`, and the second is - /// the datum from `indices[index]` already prefetched. - pub fn offsets_iter( - &self, - chunk: usize, - lane: usize, - ) -> impl Iterator + '_ { - let start = self.lane_offsets[chunk * self.n_lanes + lane] as usize; - let stop = self.lane_offsets[chunk * self.n_lanes + lane + 1] as usize; - - std::iter::zip(start..stop, self.indices[start..stop].iter().copied()) - } -} diff --git a/vortex-array/src/arrays/patched/vtable/mod.rs b/vortex-array/src/arrays/patched/vtable/mod.rs index a609c4cb8bc..d21e0ed889b 100644 --- a/vortex-array/src/arrays/patched/vtable/mod.rs +++ b/vortex-array/src/arrays/patched/vtable/mod.rs @@ -39,6 +39,7 @@ use crate::builders::ArrayBuilder; use crate::builders::PrimitiveBuilder; use crate::dtype::DType; use crate::dtype::NativePType; +use crate::dtype::PType; use crate::match_each_native_ptype; use crate::serde::ArrayChildren; use crate::stats::ArrayStats; @@ -64,6 +65,8 @@ impl ValidityChild for Patched { pub struct PatchedMetadata { #[prost(uint32, tag = "1")] pub(crate) offset: u32, + #[prost(uint32, tag = "2")] + pub(crate) n_patches: u32, } impl VTable for Patched { @@ -111,13 +114,12 @@ impl VTable for Patched { } fn nbuffers(_array: &Self::Array) -> usize { - 3 + 1 } fn buffer(array: &Self::Array, idx: usize) -> BufferHandle { match idx { 0 => array.lane_offsets.clone(), - 1 => array.indices.clone(), _ => vortex_panic!("invalid buffer index for PatchedArray: {idx}"), } } @@ -125,19 +127,19 @@ impl VTable for Patched { fn buffer_name(_array: &Self::Array, idx: usize) -> Option { match idx { 0 => Some("lane_offsets".to_string()), - 1 => Some("patch_indices".to_string()), _ => vortex_panic!("invalid buffer index for PatchedArray: {idx}"), } } fn nchildren(_array: &Self::Array) -> usize { - 2 + 3 } fn child(array: &Self::Array, idx: usize) -> ArrayRef { match idx { 0 => array.inner.clone(), - 1 => array.values.clone(), + 1 => array.indices.clone(), + 2 => array.values.clone(), _ => vortex_panic!("invalid buffer index for PatchedArray: {idx}"), } } @@ -145,7 +147,8 @@ impl VTable for Patched { fn child_name(_array: &Self::Array, idx: usize) -> String { match idx { 0 => "inner".to_string(), - 1 => "patch_values".to_string(), + 1 => "patch_indices".to_string(), + 2 => "patch_values".to_string(), _ => vortex_panic!("invalid buffer index for PatchedArray: {idx}"), } } @@ -154,6 +157,7 @@ impl VTable for Patched { fn metadata(array: &Self::Array) -> VortexResult { Ok(ProstMetadata(PatchedMetadata { offset: array.offset as u32, + n_patches: array.indices.len() as u32, })) } @@ -198,7 +202,7 @@ impl VTable for Patched { let offset = array.offset; let lane_offsets: Buffer = Buffer::from_byte_buffer(array.lane_offsets.clone().unwrap_host()); - let indices: Buffer = Buffer::from_byte_buffer(array.indices.clone().unwrap_host()); + let indices = array.indices.clone().execute::(ctx)?; let values = array.values.clone().execute::(ctx)?; match_each_native_ptype!(ptype, |V| { @@ -219,7 +223,7 @@ impl VTable for Patched { array.n_chunks, array.n_lanes, &lane_offsets, - &indices, + indices.as_slice::(), values.as_slice::(), ); }); @@ -234,19 +238,16 @@ impl VTable for Patched { buffers: &[BufferHandle], children: &dyn ArrayChildren, ) -> VortexResult { - let inner = children.get(0, dtype, len)?; - let n_chunks = len.div_ceil(1024); - let n_lanes = match_each_native_ptype!(dtype.as_ptype(), |P| { patch_lanes::

() }); - let &[lane_offsets, indices] = &buffers else { + let &[lane_offsets] = &buffers else { vortex_bail!("invalid buffer count for PatchedArray"); }; - // values and indices should have same len. - let expected_len = indices.as_host().reinterpret::().len(); - let values = children.get(1, dtype, expected_len)?; + let inner = children.get(0, dtype, len)?; + let indices = children.get(1, PType::U16.into(), metadata.n_patches as usize)?; + let values = children.get(1, dtype, metadata.n_patches as usize)?; Ok(PatchedArray { inner, @@ -255,7 +256,7 @@ impl VTable for Patched { offset: metadata.offset as usize, len, lane_offsets: lane_offsets.clone(), - indices: indices.clone(), + indices, values, stats_set: ArrayStats::default(), }) @@ -288,10 +289,10 @@ impl VTable for Patched { let lane_offsets: Buffer = Buffer::from_byte_buffer(array.lane_offsets.clone().unwrap_host()); - let indices: Buffer = Buffer::from_byte_buffer(array.indices.clone().unwrap_host()); - let values = array.values.clone().execute::(ctx)?; + let indices = array.indices.clone().execute::(ctx)?; - // TODO(aduffy): add support for non-primitive PatchedArray patches application. + // TODO(aduffy): add support for non-primitive PatchedArray patches application (?) + let values = array.values.clone().execute::(ctx)?; let patched_values = match_each_native_ptype!(values.ptype(), |V| { let mut output = Buffer::::from_byte_buffer(buffer.unwrap_host()).into_mut(); @@ -306,7 +307,7 @@ impl VTable for Patched { array.n_chunks, array.n_lanes, &lane_offsets, - &indices, + indices.as_slice::(), values.as_slice::(), ); diff --git a/vortex-array/src/arrays/patched/vtable/operations.rs b/vortex-array/src/arrays/patched/vtable/operations.rs index 72e4186f111..23b46867fe5 100644 --- a/vortex-array/src/arrays/patched/vtable/operations.rs +++ b/vortex-array/src/arrays/patched/vtable/operations.rs @@ -4,16 +4,23 @@ use vortex_error::VortexResult; use crate::DynArray; +use crate::ExecutionCtx; +use crate::arrays::PrimitiveArray; use crate::arrays::patched::Patched; use crate::arrays::patched::PatchedArray; use crate::arrays::patched::patch_lanes; use crate::dtype::PType; use crate::match_each_native_ptype; +use crate::optimizer::ArrayOptimizer; use crate::scalar::Scalar; use crate::vtable::OperationsVTable; impl OperationsVTable for Patched { - fn scalar_at(array: &PatchedArray, index: usize) -> VortexResult { + fn scalar_at( + array: &PatchedArray, + index: usize, + ctx: &mut ExecutionCtx, + ) -> VortexResult { // First check the patches let chunk = index / 1024; #[allow(clippy::cast_possible_truncation)] @@ -22,11 +29,20 @@ impl OperationsVTable for Patched { let values_ptype = PType::try_from(array.dtype())?; let lane = match_each_native_ptype!(values_ptype, |V| { index % patch_lanes::() }); - let accessor = array.accessor(); + + let range = array.seek_to_lane(chunk, lane); + + // Get the range of indices corresponding to the lane, potentially decoding them to avoid + // the overhead of repeated scalar_at calls. + let patch_indices = array + .indices + .slice(range.clone())? + .optimize()? + .execute::(ctx)?; // NOTE: we do linear scan as lane has <= 32 patches, binary search would likely // be slower. - for (index, patch_index) in accessor.offsets_iter(chunk, lane) { + for (&patch_index, index) in std::iter::zip(patch_indices.as_slice::(), range) { if patch_index == chunk_index { return array.values.scalar_at(index)?.cast(array.dtype()); } diff --git a/vortex-array/src/arrays/patched/vtable/slice.rs b/vortex-array/src/arrays/patched/vtable/slice.rs index b67b68ffe0f..98be8f0264e 100644 --- a/vortex-array/src/arrays/patched/vtable/slice.rs +++ b/vortex-array/src/arrays/patched/vtable/slice.rs @@ -92,6 +92,7 @@ mod tests { @r#" root: vortex.patched(u16, len=9) inner: vortex.primitive(u16, len=512) + patch_indices: vortex.primitive(u16, len=3) patch_values: vortex.primitive(u16, len=3) "#); From 281d229b30dc71b83cee42d7d2d478493ee0322c Mon Sep 17 00:00:00 2001 From: Andrew Duffy Date: Thu, 26 Mar 2026 15:58:39 -0400 Subject: [PATCH 12/23] lockfiles Signed-off-by: Andrew Duffy --- vortex-array/public-api.lock | 88 +++++++++++++++++++++++++----------- 1 file changed, 61 insertions(+), 27 deletions(-) diff --git a/vortex-array/public-api.lock b/vortex-array/public-api.lock index fcdabbabb75..6547e574b45 100644 --- a/vortex-array/public-api.lock +++ b/vortex-array/public-api.lock @@ -3244,26 +3244,6 @@ pub fn vortex_array::arrays::null::NullArray::into_array(self) -> vortex_array:: pub mod vortex_array::arrays::patched -pub struct vortex_array::arrays::patched::LanePatches<'a, V> - -pub vortex_array::arrays::patched::LanePatches::indices: &'a [u16] - -pub vortex_array::arrays::patched::LanePatches::values: &'a [V] - -impl<'a, V: core::marker::Copy> vortex_array::arrays::patched::LanePatches<'a, V> - -pub fn vortex_array::arrays::patched::LanePatches<'a, V>::is_empty(&self) -> bool - -pub fn vortex_array::arrays::patched::LanePatches<'a, V>::iter(&self) -> impl core::iter::traits::iterator::Iterator - -pub fn vortex_array::arrays::patched::LanePatches<'a, V>::len(&self) -> usize - -pub struct vortex_array::arrays::patched::PatchAccessor<'a, V> - -impl<'a, V: core::marker::Sized> vortex_array::arrays::patched::PatchAccessor<'a, V> - -pub fn vortex_array::arrays::patched::PatchAccessor<'a, V>::access(&'a self, chunk: usize, lane: usize) -> vortex_array::arrays::patched::LanePatches<'a, V> - pub struct vortex_array::arrays::patched::Patched impl core::clone::Clone for vortex_array::arrays::patched::Patched @@ -3292,7 +3272,7 @@ pub fn vortex_array::arrays::patched::Patched::compare(lhs: &Self::Array, rhs: & impl vortex_array::vtable::OperationsVTable for vortex_array::arrays::patched::Patched -pub fn vortex_array::arrays::patched::Patched::scalar_at(array: &vortex_array::arrays::patched::PatchedArray, index: usize) -> vortex_error::VortexResult +pub fn vortex_array::arrays::patched::Patched::scalar_at(array: &vortex_array::arrays::patched::PatchedArray, index: usize, ctx: &mut vortex_array::ExecutionCtx) -> vortex_error::VortexResult impl vortex_array::vtable::VTable for vortex_array::arrays::patched::Patched @@ -3358,8 +3338,6 @@ pub struct vortex_array::arrays::patched::PatchedArray impl vortex_array::arrays::patched::PatchedArray -pub fn vortex_array::arrays::patched::PatchedArray::accessor(&self) -> vortex_array::arrays::patched::PatchAccessor<'_, V> - pub fn vortex_array::arrays::patched::PatchedArray::from_array_and_patches(inner: vortex_array::ArrayRef, patches: &vortex_array::patches::Patches, ctx: &mut vortex_array::ExecutionCtx) -> vortex_error::VortexResult impl vortex_array::arrays::patched::PatchedArray @@ -7142,7 +7120,7 @@ pub fn vortex_array::arrays::patched::Patched::compare(lhs: &Self::Array, rhs: & impl vortex_array::vtable::OperationsVTable for vortex_array::arrays::patched::Patched -pub fn vortex_array::arrays::patched::Patched::scalar_at(array: &vortex_array::arrays::patched::PatchedArray, index: usize) -> vortex_error::VortexResult +pub fn vortex_array::arrays::patched::Patched::scalar_at(array: &vortex_array::arrays::patched::PatchedArray, index: usize, ctx: &mut vortex_array::ExecutionCtx) -> vortex_error::VortexResult impl vortex_array::vtable::VTable for vortex_array::arrays::patched::Patched @@ -7208,8 +7186,6 @@ pub struct vortex_array::arrays::PatchedArray impl vortex_array::arrays::patched::PatchedArray -pub fn vortex_array::arrays::patched::PatchedArray::accessor(&self) -> vortex_array::arrays::patched::PatchAccessor<'_, V> - pub fn vortex_array::arrays::patched::PatchedArray::from_array_and_patches(inner: vortex_array::ArrayRef, patches: &vortex_array::patches::Patches, ctx: &mut vortex_array::ExecutionCtx) -> vortex_error::VortexResult impl vortex_array::arrays::patched::PatchedArray @@ -9450,6 +9426,8 @@ pub fn vortex_array::builders::PrimitiveBuilder::uninit_range(&mut self, len: pub fn vortex_array::builders::PrimitiveBuilder::values(&self) -> &[T] +pub fn vortex_array::builders::PrimitiveBuilder::values_mut(&mut self) -> &mut [T] + pub fn vortex_array::builders::PrimitiveBuilder::with_capacity(nullability: vortex_array::dtype::Nullability, capacity: usize) -> Self impl vortex_array::builders::ArrayBuilder for vortex_array::builders::PrimitiveBuilder @@ -22158,6 +22136,62 @@ pub fn vortex_array::arrays::null::Null::vtable(_array: &Self::Array) -> &Self pub fn vortex_array::arrays::null::Null::with_children(_array: &mut Self::Array, children: alloc::vec::Vec) -> vortex_error::VortexResult<()> +impl vortex_array::vtable::VTable for vortex_array::arrays::patched::Patched + +pub type vortex_array::arrays::patched::Patched::Array = vortex_array::arrays::patched::PatchedArray + +pub type vortex_array::arrays::patched::Patched::Metadata = vortex_array::ProstMetadata + +pub type vortex_array::arrays::patched::Patched::OperationsVTable = vortex_array::arrays::patched::Patched + +pub type vortex_array::arrays::patched::Patched::ValidityVTable = vortex_array::vtable::ValidityVTableFromChild + +pub fn vortex_array::arrays::patched::Patched::append_to_builder(array: &Self::Array, builder: &mut dyn vortex_array::builders::ArrayBuilder, ctx: &mut vortex_array::ExecutionCtx) -> vortex_error::VortexResult<()> + +pub fn vortex_array::arrays::patched::Patched::array_eq(array: &Self::Array, other: &Self::Array, precision: vortex_array::Precision) -> bool + +pub fn vortex_array::arrays::patched::Patched::array_hash(array: &Self::Array, state: &mut H, precision: vortex_array::Precision) + +pub fn vortex_array::arrays::patched::Patched::buffer(array: &Self::Array, idx: usize) -> vortex_array::buffer::BufferHandle + +pub fn vortex_array::arrays::patched::Patched::buffer_name(_array: &Self::Array, idx: usize) -> core::option::Option + +pub fn vortex_array::arrays::patched::Patched::build(dtype: &vortex_array::dtype::DType, len: usize, metadata: &Self::Metadata, buffers: &[vortex_array::buffer::BufferHandle], children: &dyn vortex_array::serde::ArrayChildren) -> vortex_error::VortexResult + +pub fn vortex_array::arrays::patched::Patched::child(array: &Self::Array, idx: usize) -> vortex_array::ArrayRef + +pub fn vortex_array::arrays::patched::Patched::child_name(_array: &Self::Array, idx: usize) -> alloc::string::String + +pub fn vortex_array::arrays::patched::Patched::deserialize(bytes: &[u8], _dtype: &vortex_array::dtype::DType, _len: usize, _buffers: &[vortex_array::buffer::BufferHandle], _session: &vortex_session::VortexSession) -> vortex_error::VortexResult + +pub fn vortex_array::arrays::patched::Patched::dtype(array: &Self::Array) -> &vortex_array::dtype::DType + +pub fn vortex_array::arrays::patched::Patched::execute(array: alloc::sync::Arc, ctx: &mut vortex_array::ExecutionCtx) -> vortex_error::VortexResult + +pub fn vortex_array::arrays::patched::Patched::execute_parent(array: &Self::Array, parent: &vortex_array::ArrayRef, child_idx: usize, ctx: &mut vortex_array::ExecutionCtx) -> vortex_error::VortexResult> + +pub fn vortex_array::arrays::patched::Patched::id(&self) -> vortex_array::vtable::ArrayId + +pub fn vortex_array::arrays::patched::Patched::len(array: &Self::Array) -> usize + +pub fn vortex_array::arrays::patched::Patched::metadata(array: &Self::Array) -> vortex_error::VortexResult + +pub fn vortex_array::arrays::patched::Patched::nbuffers(_array: &Self::Array) -> usize + +pub fn vortex_array::arrays::patched::Patched::nchildren(_array: &Self::Array) -> usize + +pub fn vortex_array::arrays::patched::Patched::reduce(array: &Self::Array) -> vortex_error::VortexResult> + +pub fn vortex_array::arrays::patched::Patched::reduce_parent(array: &Self::Array, parent: &vortex_array::ArrayRef, child_idx: usize) -> vortex_error::VortexResult> + +pub fn vortex_array::arrays::patched::Patched::serialize(_metadata: Self::Metadata) -> vortex_error::VortexResult>> + +pub fn vortex_array::arrays::patched::Patched::stats(array: &Self::Array) -> vortex_array::stats::StatsSetRef<'_> + +pub fn vortex_array::arrays::patched::Patched::vtable(_array: &Self::Array) -> &Self + +pub fn vortex_array::arrays::patched::Patched::with_children(array: &mut Self::Array, children: alloc::vec::Vec) -> vortex_error::VortexResult<()> + impl vortex_array::vtable::VTable for vortex_array::arrays::scalar_fn::ScalarFnVTable pub type vortex_array::arrays::scalar_fn::ScalarFnVTable::Array = vortex_array::arrays::scalar_fn::ScalarFnArray @@ -22380,7 +22414,7 @@ pub fn vortex_array::arrays::null::Null::scalar_at(_array: &vortex_array::arrays impl vortex_array::vtable::OperationsVTable for vortex_array::arrays::patched::Patched -pub fn vortex_array::arrays::patched::Patched::scalar_at(array: &vortex_array::arrays::patched::PatchedArray, index: usize) -> vortex_error::VortexResult +pub fn vortex_array::arrays::patched::Patched::scalar_at(array: &vortex_array::arrays::patched::PatchedArray, index: usize, ctx: &mut vortex_array::ExecutionCtx) -> vortex_error::VortexResult impl vortex_array::vtable::OperationsVTable for vortex_array::arrays::scalar_fn::ScalarFnVTable From 66b0afe25e70b48cd792f57724073d7a78c513cf Mon Sep 17 00:00:00 2001 From: Andrew Duffy Date: Fri, 27 Mar 2026 10:28:27 -0400 Subject: [PATCH 13/23] address comments Signed-off-by: Andrew Duffy --- .../src/arrays/patched/compute/compare.rs | 134 +++++++++++++----- .../src/arrays/patched/compute/filter.rs | 38 +++++ vortex-array/src/arrays/patched/vtable/mod.rs | 23 ++- 3 files changed, 147 insertions(+), 48 deletions(-) diff --git a/vortex-array/src/arrays/patched/compute/compare.rs b/vortex-array/src/arrays/patched/compute/compare.rs index 9b9f8f938f6..05621ba339d 100644 --- a/vortex-array/src/arrays/patched/compute/compare.rs +++ b/vortex-array/src/arrays/patched/compute/compare.rs @@ -7,6 +7,7 @@ use vortex_error::VortexResult; use crate::ArrayRef; use crate::Canonical; +use crate::DynArray; use crate::ExecutionCtx; use crate::IntoArray; use crate::arrays::BoolArray; @@ -14,7 +15,6 @@ use crate::arrays::ConstantArray; use crate::arrays::Patched; use crate::arrays::PrimitiveArray; use crate::arrays::bool::BoolArrayParts; -use crate::arrays::patched::patch_lanes; use crate::arrays::primitive::NativeValue; use crate::builtins::ArrayBuiltins; use crate::dtype::NativePType; @@ -39,8 +39,11 @@ impl CompareKernel for Patched { return Ok(None); }; + // NOTE: due to offset, it's possible that the inner.len != array.len. + // We slice the inner before performing the comparison. let result = lhs .inner + .slice(lhs.offset..lhs.offset + lhs.len)? .binary( ConstantArray::new(constant.clone(), lhs.len()).into_array(), operator.into(), @@ -57,46 +60,13 @@ impl CompareKernel for Patched { let mut bits = BitBufferMut::from_buffer(bits.unwrap_host().into_mut(), offset, len); - fn apply( - bits: &mut BitBufferMut, - lane_offsets: &[u32], - indices: &[u16], - values: &[V], - constant: V, - cmp: F, - ) -> VortexResult<()> - where - F: Fn(V, V) -> bool, - { - let n_lanes = patch_lanes::(); - - for index in 0..(lane_offsets.len() - 1) { - let chunk = index / n_lanes; - - let lane_start = lane_offsets[index] as usize; - let lane_end = lane_offsets[index + 1] as usize; - - for (&patch_index, &patch_value) in std::iter::zip( - &indices[lane_start..lane_end], - &values[lane_start..lane_end], - ) { - let bit_index = chunk * 1024 + patch_index as usize; - if cmp(patch_value, constant) { - bits.set(bit_index) - } else { - bits.unset(bit_index) - } - } - } - - Ok(()) - } - let lane_offsets = lhs.lane_offsets.as_host().reinterpret::(); let indices = lhs.indices.clone().execute::(ctx)?; let values = lhs.values.clone().execute::(ctx)?; + let n_lanes = lhs.n_lanes; match_each_native_ptype!(values.ptype(), |V| { + let offset = lhs.offset; let indices = indices.as_slice::(); let values = values.as_slice::(); let constant = constant @@ -108,6 +78,8 @@ impl CompareKernel for Patched { CompareOperator::Eq => { apply::( &mut bits, + offset, + n_lanes, lane_offsets, indices, values, @@ -118,6 +90,8 @@ impl CompareKernel for Patched { CompareOperator::NotEq => { apply::( &mut bits, + offset, + n_lanes, lane_offsets, indices, values, @@ -128,6 +102,8 @@ impl CompareKernel for Patched { CompareOperator::Gt => { apply::( &mut bits, + n_lanes, + offset, lane_offsets, indices, values, @@ -138,6 +114,8 @@ impl CompareKernel for Patched { CompareOperator::Gte => { apply::( &mut bits, + n_lanes, + offset, lane_offsets, indices, values, @@ -148,6 +126,8 @@ impl CompareKernel for Patched { CompareOperator::Lt => { apply::( &mut bits, + n_lanes, + offset, lane_offsets, indices, values, @@ -158,6 +138,8 @@ impl CompareKernel for Patched { CompareOperator::Lte => { apply::( &mut bits, + n_lanes, + offset, lane_offsets, indices, values, @@ -173,11 +155,53 @@ impl CompareKernel for Patched { } } +#[allow(clippy::too_many_arguments)] +fn apply( + bits: &mut BitBufferMut, + offset: usize, + n_lanes: usize, + lane_offsets: &[u32], + indices: &[u16], + values: &[V], + constant: V, + cmp: F, +) -> VortexResult<()> +where + F: Fn(V, V) -> bool, +{ + for index in 0..(lane_offsets.len() - 1) { + let chunk = index / n_lanes; + + let lane_start = lane_offsets[index] as usize; + let lane_end = lane_offsets[index + 1] as usize; + + for (&patch_index, &patch_value) in std::iter::zip( + &indices[lane_start..lane_end], + &values[lane_start..lane_end], + ) { + let bit_index = chunk * 1024 + patch_index as usize; + // Skip any indices < the offset. + if bit_index < offset { + continue; + } + let bit_index = bit_index - offset; + if cmp(patch_value, constant) { + bits.set(bit_index) + } else { + bits.unset(bit_index) + } + } + } + + Ok(()) +} + #[cfg(test)] mod tests { use vortex_buffer::buffer; use vortex_error::VortexResult; + use crate::DynArray; use crate::ExecutionCtx; use crate::IntoArray; use crate::LEGACY_SESSION; @@ -187,6 +211,7 @@ mod tests { use crate::arrays::PatchedArray; use crate::arrays::PrimitiveArray; use crate::assert_arrays_eq; + use crate::optimizer::ArrayOptimizer; use crate::patches::Patches; use crate::scalar_fn::fns::binary::CompareKernel; use crate::scalar_fn::fns::operators::CompareOperator; @@ -220,6 +245,43 @@ mod tests { assert_arrays_eq!(expected, result); } + #[test] + fn test_with_offset() { + let lhs = PrimitiveArray::from_iter(0u32..512).into_array(); + let patches = Patches::new( + 512, + 0, + buffer![5u16, 510, 511].into_array(), + buffer![u32::MAX; 3].into_array(), + None, + ) + .unwrap(); + + let mut ctx = ExecutionCtx::new(LEGACY_SESSION.clone()); + + let lhs = PatchedArray::from_array_and_patches(lhs, &patches, &mut ctx).unwrap(); + // Slice the array so that the first patch should be skipped. + let lhs = lhs + .slice(10..lhs.len()) + .unwrap() + .optimize() + .unwrap() + .try_into::() + .unwrap(); + + assert_eq!(lhs.len(), 502); + + let rhs = ConstantArray::new(u32::MAX, lhs.len()).into_array(); + + let result = ::compare(&lhs, &rhs, CompareOperator::Eq, &mut ctx) + .unwrap() + .unwrap(); + + let expected = BoolArray::from_indices(502, [500, 501], Validity::NonNullable).into_array(); + + assert_arrays_eq!(expected, result); + } + #[test] fn test_subnormal_f32() -> VortexResult<()> { // Subnormal f32 values are smaller than f32::MIN_POSITIVE but greater than 0 diff --git a/vortex-array/src/arrays/patched/compute/filter.rs b/vortex-array/src/arrays/patched/compute/filter.rs index d045a79c38f..fb927f53999 100644 --- a/vortex-array/src/arrays/patched/compute/filter.rs +++ b/vortex-array/src/arrays/patched/compute/filter.rs @@ -63,6 +63,7 @@ mod tests { use crate::arrays::PatchedArray; use crate::arrays::PrimitiveArray; use crate::assert_arrays_eq; + use crate::optimizer::ArrayOptimizer; use crate::patches::Patches; #[test] @@ -142,4 +143,41 @@ mod tests { Ok(()) } + + #[test] + fn test_filter_sliced() -> VortexResult<()> { + // Test filter on a sliced PatchedArray to exercise codepath where offset > 0. + let mut ctx = ExecutionCtx::new(LEGACY_SESSION.clone()); + + // Create a larger array (6 chunks) so we can slice and still have room + // for the filter to prune chunks. + let array = buffer![u16::MIN; 6144].into_array(); + // Patches at indices 2048 and 2049 (start of chunk 2). + let patched_indices = buffer![2048u16, 2049].into_array(); + let patched_values = buffer![u16::MAX, u16::MAX].into_array(); + + let patches = Patches::new(6144, 0, patched_indices, patched_values, None)?; + + let patched = PatchedArray::from_array_and_patches(array, &patches, &mut ctx)?; + + // Slice at chunk boundary to create offset > 0. After slicing [1024..5120], + // we have 4096 elements and patches are at relative indices 1024 and 1025. + let sliced = patched.slice(1024..5120)?.into_array(); + assert_eq!(sliced.len(), 4096); + + // Filter that only touches the middle 2 chunks (chunks 1 and 2). + // Indices 1024 and 1025 fall in chunk 1, and 3000 falls in chunk 2. + let mask = Mask::from_indices(4096, vec![1024, 1025, 3000]); + + let filtered = sliced + .filter(mask)? + .optimize()? + .execute::(&mut ctx)?; + + let expected = PrimitiveArray::from_iter([u16::MAX, u16::MAX, u16::MIN]); + + assert_arrays_eq!(expected, filtered); + + Ok(()) + } } diff --git a/vortex-array/src/arrays/patched/vtable/mod.rs b/vortex-array/src/arrays/patched/vtable/mod.rs index d21e0ed889b..82726f2d205 100644 --- a/vortex-array/src/arrays/patched/vtable/mod.rs +++ b/vortex-array/src/arrays/patched/vtable/mod.rs @@ -28,6 +28,7 @@ use crate::ExecutionResult; use crate::IntoArray; use crate::Precision; use crate::ProstMetadata; +use crate::SerializeMetadata; use crate::arrays::PrimitiveArray; use crate::arrays::patched::PatchedArray; use crate::arrays::patched::compute::rules::PARENT_RULES; @@ -45,10 +46,10 @@ use crate::serde::ArrayChildren; use crate::stats::ArrayStats; use crate::stats::StatsSetRef; use crate::vtable; -use crate::vtable::ArrayId; use crate::vtable::VTable; use crate::vtable::ValidityChild; use crate::vtable::ValidityVTableFromChild; +use crate::vtable::{Array, ArrayId}; vtable!(Patched); @@ -140,7 +141,7 @@ impl VTable for Patched { 0 => array.inner.clone(), 1 => array.indices.clone(), 2 => array.values.clone(), - _ => vortex_panic!("invalid buffer index for PatchedArray: {idx}"), + _ => vortex_panic!("invalid child index for PatchedArray: {idx}"), } } @@ -149,7 +150,7 @@ impl VTable for Patched { 0 => "inner".to_string(), 1 => "patch_indices".to_string(), 2 => "patch_values".to_string(), - _ => vortex_panic!("invalid buffer index for PatchedArray: {idx}"), + _ => vortex_panic!("invalid child index for PatchedArray: {idx}"), } } @@ -161,8 +162,8 @@ impl VTable for Patched { })) } - fn serialize(_metadata: Self::Metadata) -> VortexResult>> { - Ok(Some(vec![])) + fn serialize(metadata: Self::Metadata) -> VortexResult>> { + Ok(Some(metadata.serialize())) } fn deserialize( @@ -247,7 +248,7 @@ impl VTable for Patched { let inner = children.get(0, dtype, len)?; let indices = children.get(1, PType::U16.into(), metadata.n_patches as usize)?; - let values = children.get(1, dtype, metadata.n_patches as usize)?; + let values = children.get(2, dtype, metadata.n_patches as usize)?; Ok(PatchedArray { inner, @@ -269,12 +270,13 @@ impl VTable for Patched { ); array.inner = children.remove(0); + array.indices = children.remove(0); array.values = children.remove(0); Ok(()) } - fn execute(array: Arc, ctx: &mut ExecutionCtx) -> VortexResult { + fn execute(array: Arc>, ctx: &mut ExecutionCtx) -> VortexResult { let inner = array .inner .clone() @@ -311,9 +313,6 @@ impl VTable for Patched { values.as_slice::(), ); - // The output will always be aligned to a chunk boundary, we apply the offset/len - // at the end to slice to only the in-bounds values. - let _output = output.as_slice(); let output = output.freeze().slice(offset..offset + len); PrimitiveArray::from_byte_buffer(output.into_byte_buffer(), ptype, validity) @@ -323,7 +322,7 @@ impl VTable for Patched { } fn execute_parent( - array: &Self::Array, + array: &Array, parent: &ArrayRef, child_idx: usize, ctx: &mut ExecutionCtx, @@ -332,7 +331,7 @@ impl VTable for Patched { } fn reduce_parent( - array: &Self::Array, + array: &Array, parent: &ArrayRef, child_idx: usize, ) -> VortexResult> { From b73eef24f372095e420061573c7b89dc44e81738 Mon Sep 17 00:00:00 2001 From: Andrew Duffy Date: Fri, 27 Mar 2026 11:12:04 -0400 Subject: [PATCH 14/23] fixes Signed-off-by: Andrew Duffy --- vortex-array/public-api.lock | 40 ++--- .../src/arrays/patched/compute/compare.rs | 144 +++++++----------- vortex-array/src/arrays/patched/vtable/mod.rs | 46 +----- .../src/arrays/patched/vtable/operations.rs | 136 +++++++++++++++-- .../arrays/synthetic/encodings/mod.rs | 1 + .../arrays/synthetic/encodings/patched.rs | 47 ++++++ 6 files changed, 247 insertions(+), 167 deletions(-) create mode 100644 vortex-test/compat-gen/src/fixtures/arrays/synthetic/encodings/patched.rs diff --git a/vortex-array/public-api.lock b/vortex-array/public-api.lock index 6547e574b45..7eb9d049776 100644 --- a/vortex-array/public-api.lock +++ b/vortex-array/public-api.lock @@ -3304,9 +3304,9 @@ pub fn vortex_array::arrays::patched::Patched::deserialize(bytes: &[u8], _dtype: pub fn vortex_array::arrays::patched::Patched::dtype(array: &Self::Array) -> &vortex_array::dtype::DType -pub fn vortex_array::arrays::patched::Patched::execute(array: alloc::sync::Arc, ctx: &mut vortex_array::ExecutionCtx) -> vortex_error::VortexResult +pub fn vortex_array::arrays::patched::Patched::execute(array: alloc::sync::Arc>, ctx: &mut vortex_array::ExecutionCtx) -> vortex_error::VortexResult -pub fn vortex_array::arrays::patched::Patched::execute_parent(array: &Self::Array, parent: &vortex_array::ArrayRef, child_idx: usize, ctx: &mut vortex_array::ExecutionCtx) -> vortex_error::VortexResult> +pub fn vortex_array::arrays::patched::Patched::execute_parent(array: &vortex_array::vtable::Array, parent: &vortex_array::ArrayRef, child_idx: usize, ctx: &mut vortex_array::ExecutionCtx) -> vortex_error::VortexResult> pub fn vortex_array::arrays::patched::Patched::id(&self) -> vortex_array::vtable::ArrayId @@ -3318,11 +3318,11 @@ pub fn vortex_array::arrays::patched::Patched::nbuffers(_array: &Self::Array) -> pub fn vortex_array::arrays::patched::Patched::nchildren(_array: &Self::Array) -> usize -pub fn vortex_array::arrays::patched::Patched::reduce(array: &Self::Array) -> vortex_error::VortexResult> +pub fn vortex_array::arrays::patched::Patched::reduce(array: &vortex_array::vtable::Array) -> vortex_error::VortexResult> -pub fn vortex_array::arrays::patched::Patched::reduce_parent(array: &Self::Array, parent: &vortex_array::ArrayRef, child_idx: usize) -> vortex_error::VortexResult> +pub fn vortex_array::arrays::patched::Patched::reduce_parent(array: &vortex_array::vtable::Array, parent: &vortex_array::ArrayRef, child_idx: usize) -> vortex_error::VortexResult> -pub fn vortex_array::arrays::patched::Patched::serialize(_metadata: Self::Metadata) -> vortex_error::VortexResult>> +pub fn vortex_array::arrays::patched::Patched::serialize(metadata: Self::Metadata) -> vortex_error::VortexResult>> pub fn vortex_array::arrays::patched::Patched::stats(array: &Self::Array) -> vortex_array::stats::StatsSetRef<'_> @@ -7152,9 +7152,9 @@ pub fn vortex_array::arrays::patched::Patched::deserialize(bytes: &[u8], _dtype: pub fn vortex_array::arrays::patched::Patched::dtype(array: &Self::Array) -> &vortex_array::dtype::DType -pub fn vortex_array::arrays::patched::Patched::execute(array: alloc::sync::Arc, ctx: &mut vortex_array::ExecutionCtx) -> vortex_error::VortexResult +pub fn vortex_array::arrays::patched::Patched::execute(array: alloc::sync::Arc>, ctx: &mut vortex_array::ExecutionCtx) -> vortex_error::VortexResult -pub fn vortex_array::arrays::patched::Patched::execute_parent(array: &Self::Array, parent: &vortex_array::ArrayRef, child_idx: usize, ctx: &mut vortex_array::ExecutionCtx) -> vortex_error::VortexResult> +pub fn vortex_array::arrays::patched::Patched::execute_parent(array: &vortex_array::vtable::Array, parent: &vortex_array::ArrayRef, child_idx: usize, ctx: &mut vortex_array::ExecutionCtx) -> vortex_error::VortexResult> pub fn vortex_array::arrays::patched::Patched::id(&self) -> vortex_array::vtable::ArrayId @@ -7166,11 +7166,11 @@ pub fn vortex_array::arrays::patched::Patched::nbuffers(_array: &Self::Array) -> pub fn vortex_array::arrays::patched::Patched::nchildren(_array: &Self::Array) -> usize -pub fn vortex_array::arrays::patched::Patched::reduce(array: &Self::Array) -> vortex_error::VortexResult> +pub fn vortex_array::arrays::patched::Patched::reduce(array: &vortex_array::vtable::Array) -> vortex_error::VortexResult> -pub fn vortex_array::arrays::patched::Patched::reduce_parent(array: &Self::Array, parent: &vortex_array::ArrayRef, child_idx: usize) -> vortex_error::VortexResult> +pub fn vortex_array::arrays::patched::Patched::reduce_parent(array: &vortex_array::vtable::Array, parent: &vortex_array::ArrayRef, child_idx: usize) -> vortex_error::VortexResult> -pub fn vortex_array::arrays::patched::Patched::serialize(_metadata: Self::Metadata) -> vortex_error::VortexResult>> +pub fn vortex_array::arrays::patched::Patched::serialize(metadata: Self::Metadata) -> vortex_error::VortexResult>> pub fn vortex_array::arrays::patched::Patched::stats(array: &Self::Array) -> vortex_array::stats::StatsSetRef<'_> @@ -22166,9 +22166,9 @@ pub fn vortex_array::arrays::patched::Patched::deserialize(bytes: &[u8], _dtype: pub fn vortex_array::arrays::patched::Patched::dtype(array: &Self::Array) -> &vortex_array::dtype::DType -pub fn vortex_array::arrays::patched::Patched::execute(array: alloc::sync::Arc, ctx: &mut vortex_array::ExecutionCtx) -> vortex_error::VortexResult +pub fn vortex_array::arrays::patched::Patched::execute(array: alloc::sync::Arc>, ctx: &mut vortex_array::ExecutionCtx) -> vortex_error::VortexResult -pub fn vortex_array::arrays::patched::Patched::execute_parent(array: &Self::Array, parent: &vortex_array::ArrayRef, child_idx: usize, ctx: &mut vortex_array::ExecutionCtx) -> vortex_error::VortexResult> +pub fn vortex_array::arrays::patched::Patched::execute_parent(array: &vortex_array::vtable::Array, parent: &vortex_array::ArrayRef, child_idx: usize, ctx: &mut vortex_array::ExecutionCtx) -> vortex_error::VortexResult> pub fn vortex_array::arrays::patched::Patched::id(&self) -> vortex_array::vtable::ArrayId @@ -22180,11 +22180,11 @@ pub fn vortex_array::arrays::patched::Patched::nbuffers(_array: &Self::Array) -> pub fn vortex_array::arrays::patched::Patched::nchildren(_array: &Self::Array) -> usize -pub fn vortex_array::arrays::patched::Patched::reduce(array: &Self::Array) -> vortex_error::VortexResult> +pub fn vortex_array::arrays::patched::Patched::reduce(array: &vortex_array::vtable::Array) -> vortex_error::VortexResult> -pub fn vortex_array::arrays::patched::Patched::reduce_parent(array: &Self::Array, parent: &vortex_array::ArrayRef, child_idx: usize) -> vortex_error::VortexResult> +pub fn vortex_array::arrays::patched::Patched::reduce_parent(array: &vortex_array::vtable::Array, parent: &vortex_array::ArrayRef, child_idx: usize) -> vortex_error::VortexResult> -pub fn vortex_array::arrays::patched::Patched::serialize(_metadata: Self::Metadata) -> vortex_error::VortexResult>> +pub fn vortex_array::arrays::patched::Patched::serialize(metadata: Self::Metadata) -> vortex_error::VortexResult>> pub fn vortex_array::arrays::patched::Patched::stats(array: &Self::Array) -> vortex_array::stats::StatsSetRef<'_> @@ -23522,9 +23522,9 @@ pub fn vortex_array::arrays::patched::Patched::deserialize(bytes: &[u8], _dtype: pub fn vortex_array::arrays::patched::Patched::dtype(array: &Self::Array) -> &vortex_array::dtype::DType -pub fn vortex_array::arrays::patched::Patched::execute(array: alloc::sync::Arc, ctx: &mut vortex_array::ExecutionCtx) -> vortex_error::VortexResult +pub fn vortex_array::arrays::patched::Patched::execute(array: alloc::sync::Arc>, ctx: &mut vortex_array::ExecutionCtx) -> vortex_error::VortexResult -pub fn vortex_array::arrays::patched::Patched::execute_parent(array: &Self::Array, parent: &vortex_array::ArrayRef, child_idx: usize, ctx: &mut vortex_array::ExecutionCtx) -> vortex_error::VortexResult> +pub fn vortex_array::arrays::patched::Patched::execute_parent(array: &vortex_array::vtable::Array, parent: &vortex_array::ArrayRef, child_idx: usize, ctx: &mut vortex_array::ExecutionCtx) -> vortex_error::VortexResult> pub fn vortex_array::arrays::patched::Patched::id(&self) -> vortex_array::vtable::ArrayId @@ -23536,11 +23536,11 @@ pub fn vortex_array::arrays::patched::Patched::nbuffers(_array: &Self::Array) -> pub fn vortex_array::arrays::patched::Patched::nchildren(_array: &Self::Array) -> usize -pub fn vortex_array::arrays::patched::Patched::reduce(array: &Self::Array) -> vortex_error::VortexResult> +pub fn vortex_array::arrays::patched::Patched::reduce(array: &vortex_array::vtable::Array) -> vortex_error::VortexResult> -pub fn vortex_array::arrays::patched::Patched::reduce_parent(array: &Self::Array, parent: &vortex_array::ArrayRef, child_idx: usize) -> vortex_error::VortexResult> +pub fn vortex_array::arrays::patched::Patched::reduce_parent(array: &vortex_array::vtable::Array, parent: &vortex_array::ArrayRef, child_idx: usize) -> vortex_error::VortexResult> -pub fn vortex_array::arrays::patched::Patched::serialize(_metadata: Self::Metadata) -> vortex_error::VortexResult>> +pub fn vortex_array::arrays::patched::Patched::serialize(metadata: Self::Metadata) -> vortex_error::VortexResult>> pub fn vortex_array::arrays::patched::Patched::stats(array: &Self::Array) -> vortex_array::stats::StatsSetRef<'_> diff --git a/vortex-array/src/arrays/patched/compute/compare.rs b/vortex-array/src/arrays/patched/compute/compare.rs index 05621ba339d..1fe0f4a90cb 100644 --- a/vortex-array/src/arrays/patched/compute/compare.rs +++ b/vortex-array/src/arrays/patched/compute/compare.rs @@ -74,78 +74,34 @@ impl CompareKernel for Patched { .as_::() .vortex_expect("compare constant not null"); + let apply_patches = ApplyPatches { + bits: &mut bits, + offset, + n_lanes, + lane_offsets, + indices, + values, + constant, + }; + match operator { CompareOperator::Eq => { - apply::( - &mut bits, - offset, - n_lanes, - lane_offsets, - indices, - values, - constant, - |l, r| NativeValue(l) == NativeValue(r), - )?; + apply_patches.apply(|l, r| NativeValue(l) == NativeValue(r))?; } CompareOperator::NotEq => { - apply::( - &mut bits, - offset, - n_lanes, - lane_offsets, - indices, - values, - constant, - |l, r| NativeValue(l) != NativeValue(r), - )?; + apply_patches.apply(|l, r| NativeValue(l) != NativeValue(r))?; } CompareOperator::Gt => { - apply::( - &mut bits, - n_lanes, - offset, - lane_offsets, - indices, - values, - constant, - |l, r| NativeValue(l) > NativeValue(r), - )?; + apply_patches.apply(|l, r| NativeValue(l) > NativeValue(r))?; } CompareOperator::Gte => { - apply::( - &mut bits, - n_lanes, - offset, - lane_offsets, - indices, - values, - constant, - |l, r| NativeValue(l) >= NativeValue(r), - )?; + apply_patches.apply(|l, r| NativeValue(l) >= NativeValue(r))?; } CompareOperator::Lt => { - apply::( - &mut bits, - n_lanes, - offset, - lane_offsets, - indices, - values, - constant, - |l, r| NativeValue(l) < NativeValue(r), - )?; + apply_patches.apply(|l, r| NativeValue(l) < NativeValue(r))?; } CompareOperator::Lte => { - apply::( - &mut bits, - n_lanes, - offset, - lane_offsets, - indices, - values, - constant, - |l, r| NativeValue(l) <= NativeValue(r), - )?; + apply_patches.apply(|l, r| NativeValue(l) <= NativeValue(r))?; } } }); @@ -155,45 +111,47 @@ impl CompareKernel for Patched { } } -#[allow(clippy::too_many_arguments)] -fn apply( - bits: &mut BitBufferMut, +struct ApplyPatches<'a, V: NativePType> { + bits: &'a mut BitBufferMut, offset: usize, n_lanes: usize, - lane_offsets: &[u32], - indices: &[u16], - values: &[V], + lane_offsets: &'a [u32], + indices: &'a [u16], + values: &'a [V], constant: V, - cmp: F, -) -> VortexResult<()> -where - F: Fn(V, V) -> bool, -{ - for index in 0..(lane_offsets.len() - 1) { - let chunk = index / n_lanes; - - let lane_start = lane_offsets[index] as usize; - let lane_end = lane_offsets[index + 1] as usize; - - for (&patch_index, &patch_value) in std::iter::zip( - &indices[lane_start..lane_end], - &values[lane_start..lane_end], - ) { - let bit_index = chunk * 1024 + patch_index as usize; - // Skip any indices < the offset. - if bit_index < offset { - continue; - } - let bit_index = bit_index - offset; - if cmp(patch_value, constant) { - bits.set(bit_index) - } else { - bits.unset(bit_index) +} + +impl ApplyPatches<'_, V> { + fn apply(self, cmp: F) -> VortexResult<()> + where + F: Fn(V, V) -> bool, + { + for index in 0..(self.lane_offsets.len() - 1) { + let chunk = index / self.n_lanes; + + let lane_start = self.lane_offsets[index] as usize; + let lane_end = self.lane_offsets[index + 1] as usize; + + for (&patch_index, &patch_value) in std::iter::zip( + &self.indices[lane_start..lane_end], + &self.values[lane_start..lane_end], + ) { + let bit_index = chunk * 1024 + patch_index as usize; + // Skip any indices < the offset. + if bit_index < self.offset { + continue; + } + let bit_index = bit_index - self.offset; + if cmp(patch_value, self.constant) { + self.bits.set(bit_index) + } else { + self.bits.unset(bit_index) + } } } - } - Ok(()) + Ok(()) + } } #[cfg(test)] diff --git a/vortex-array/src/arrays/patched/vtable/mod.rs b/vortex-array/src/arrays/patched/vtable/mod.rs index 82726f2d205..ea3a4529c4b 100644 --- a/vortex-array/src/arrays/patched/vtable/mod.rs +++ b/vortex-array/src/arrays/patched/vtable/mod.rs @@ -46,10 +46,11 @@ use crate::serde::ArrayChildren; use crate::stats::ArrayStats; use crate::stats::StatsSetRef; use crate::vtable; +use crate::vtable::Array; +use crate::vtable::ArrayId; use crate::vtable::VTable; use crate::vtable::ValidityChild; use crate::vtable::ValidityVTableFromChild; -use crate::vtable::{Array, ArrayId}; vtable!(Patched); @@ -265,8 +266,8 @@ impl VTable for Patched { fn with_children(array: &mut Self::Array, mut children: Vec) -> VortexResult<()> { vortex_ensure!( - children.len() == 2, - "PatchedArray must have exactly 2 children" + children.len() == 3, + "PatchedArray must have exactly 3 children" ); array.inner = children.remove(0); @@ -381,9 +382,7 @@ mod tests { use crate::arrays::PrimitiveArray; use crate::assert_arrays_eq; use crate::builders::builder_with_capacity; - use crate::dtype::Nullability; use crate::patches::Patches; - use crate::scalar::Scalar; use crate::validity::Validity; #[test] @@ -419,43 +418,6 @@ mod tests { assert_eq!(executed, expected.freeze()); } - #[test] - fn test_scalar_at() { - let values = buffer![0u16; 1024].into_array(); - let patches = Patches::new( - 1024, - 0, - buffer![1u32, 2, 3].into_array(), - buffer![1u16; 3].into_array(), - None, - ) - .unwrap(); - - let session = VortexSession::empty(); - let mut ctx = ExecutionCtx::new(session); - - let array = PatchedArray::from_array_and_patches(values, &patches, &mut ctx) - .unwrap() - .into_array(); - - assert_eq!( - array.scalar_at(0).unwrap(), - Scalar::primitive(0u16, Nullability::NonNullable) - ); - assert_eq!( - array.scalar_at(1).unwrap(), - Scalar::primitive(1u16, Nullability::NonNullable) - ); - assert_eq!( - array.scalar_at(2).unwrap(), - Scalar::primitive(1u16, Nullability::NonNullable) - ); - assert_eq!( - array.scalar_at(3).unwrap(), - Scalar::primitive(1u16, Nullability::NonNullable) - ); - } - #[test] fn test_append_to_builder_non_nullable() { let values = PrimitiveArray::new(buffer![0u16; 1024], Validity::NonNullable).into_array(); diff --git a/vortex-array/src/arrays/patched/vtable/operations.rs b/vortex-array/src/arrays/patched/vtable/operations.rs index 23b46867fe5..314c311ba14 100644 --- a/vortex-array/src/arrays/patched/vtable/operations.rs +++ b/vortex-array/src/arrays/patched/vtable/operations.rs @@ -8,9 +8,6 @@ use crate::ExecutionCtx; use crate::arrays::PrimitiveArray; use crate::arrays::patched::Patched; use crate::arrays::patched::PatchedArray; -use crate::arrays::patched::patch_lanes; -use crate::dtype::PType; -use crate::match_each_native_ptype; use crate::optimizer::ArrayOptimizer; use crate::scalar::Scalar; use crate::vtable::OperationsVTable; @@ -21,14 +18,15 @@ impl OperationsVTable for Patched { index: usize, ctx: &mut ExecutionCtx, ) -> VortexResult { - // First check the patches - let chunk = index / 1024; - #[allow(clippy::cast_possible_truncation)] - let chunk_index = (index % 1024) as u16; + let chunk = (index + array.offset) / 1024; - let values_ptype = PType::try_from(array.dtype())?; + #[expect( + clippy::cast_possible_truncation, + reason = "N % 1024 always fits in u16" + )] + let chunk_index = ((index + array.offset) % 1024) as u16; - let lane = match_each_native_ptype!(values_ptype, |V| { index % patch_lanes::() }); + let lane = (index + array.offset) % array.n_lanes; let range = array.seek_to_lane(chunk, lane); @@ -42,13 +40,127 @@ impl OperationsVTable for Patched { // NOTE: we do linear scan as lane has <= 32 patches, binary search would likely // be slower. - for (&patch_index, index) in std::iter::zip(patch_indices.as_slice::(), range) { + for (&patch_index, idx) in std::iter::zip(patch_indices.as_slice::(), range) { if patch_index == chunk_index { - return array.values.scalar_at(index)?.cast(array.dtype()); + return array.values.scalar_at(idx)?.cast(array.dtype()); } } // Otherwise, access the underlying value. - array.inner.scalar_at(index) + array.inner.scalar_at(index + array.offset) + } +} + +#[cfg(test)] +mod tests { + use vortex_buffer::buffer; + use vortex_session::VortexSession; + + use crate::DynArray; + use crate::ExecutionCtx; + use crate::IntoArray; + use crate::arrays::Patched; + use crate::arrays::PatchedArray; + use crate::dtype::Nullability; + use crate::optimizer::ArrayOptimizer; + use crate::patches::Patches; + use crate::scalar::Scalar; + + #[test] + fn test_simple() { + let values = buffer![0u16; 1024].into_array(); + let patches = Patches::new( + 1024, + 0, + buffer![1u32, 2, 3].into_array(), + buffer![1u16; 3].into_array(), + None, + ) + .unwrap(); + + let session = VortexSession::empty(); + let mut ctx = ExecutionCtx::new(session); + + let array = PatchedArray::from_array_and_patches(values, &patches, &mut ctx) + .unwrap() + .into_array(); + + assert_eq!( + array.scalar_at(0).unwrap(), + Scalar::primitive(0u16, Nullability::NonNullable) + ); + assert_eq!( + array.scalar_at(1).unwrap(), + Scalar::primitive(1u16, Nullability::NonNullable) + ); + assert_eq!( + array.scalar_at(2).unwrap(), + Scalar::primitive(1u16, Nullability::NonNullable) + ); + assert_eq!( + array.scalar_at(3).unwrap(), + Scalar::primitive(1u16, Nullability::NonNullable) + ); + } + + #[test] + fn test_multi_chunk() { + let values = buffer![0u16; 4096].into_array(); + let patches = Patches::new( + 4096, + 0, + buffer![1u32, 2, 3].into_array(), + buffer![1u16; 3].into_array(), + None, + ) + .unwrap(); + + let session = VortexSession::empty(); + let mut ctx = ExecutionCtx::new(session); + + let array = PatchedArray::from_array_and_patches(values, &patches, &mut ctx) + .unwrap() + .into_array(); + + for index in 0..array.len() { + let value = array.scalar_at(index).unwrap(); + + if [1, 2, 3].contains(&index) { + assert_eq!(value, 1u16.into()); + } else { + assert_eq!(value, 0u16.into()); + } + } + } + + #[test] + fn test_multi_chunk_sliced() { + let values = buffer![0u16; 4096].into_array(); + let patches = Patches::new( + 4096, + 0, + buffer![1u32, 2, 3].into_array(), + buffer![1u16; 3].into_array(), + None, + ) + .unwrap(); + + let session = VortexSession::empty(); + let mut ctx = ExecutionCtx::new(session); + + let array = PatchedArray::from_array_and_patches(values, &patches, &mut ctx) + .unwrap() + .into_array() + .slice(3..4096) + .unwrap() + .optimize() + .unwrap(); + + assert!(array.is::()); + + assert_eq!(array.scalar_at(0).unwrap(), 1u16.into()); + for index in 1..array.len() { + assert_eq!(array.scalar_at(index).unwrap(), 0u16.into()); + } } } diff --git a/vortex-test/compat-gen/src/fixtures/arrays/synthetic/encodings/mod.rs b/vortex-test/compat-gen/src/fixtures/arrays/synthetic/encodings/mod.rs index 0c68d5e9abd..4dfc3a9ebf8 100644 --- a/vortex-test/compat-gen/src/fixtures/arrays/synthetic/encodings/mod.rs +++ b/vortex-test/compat-gen/src/fixtures/arrays/synthetic/encodings/mod.rs @@ -16,6 +16,7 @@ mod delta; mod dict; mod for_; mod fsst; +mod patched; mod pco; mod rle; mod runend; diff --git a/vortex-test/compat-gen/src/fixtures/arrays/synthetic/encodings/patched.rs b/vortex-test/compat-gen/src/fixtures/arrays/synthetic/encodings/patched.rs new file mode 100644 index 00000000000..b1363ad7e02 --- /dev/null +++ b/vortex-test/compat-gen/src/fixtures/arrays/synthetic/encodings/patched.rs @@ -0,0 +1,47 @@ +// SPDX-License-Identifier: Apache-2.0 +// SPDX-FileCopyrightText: Copyright the Vortex contributors + +use vortex_array::ArrayRef; +use vortex_array::ExecutionCtx; +use vortex_array::IntoArray; +use vortex_array::arrays::Patched; +use vortex_array::arrays::PatchedArray; +use vortex_array::arrays::PrimitiveArray; +use vortex_array::patches::Patches; +use vortex_array::vtable::ArrayId; +use vortex_array::vtable::ArrayVTable; +use vortex_error::VortexResult; +use vortex_session::VortexSession; + +use crate::fixtures::FlatLayoutFixture; + +pub struct PatchedFixture; + +impl FlatLayoutFixture for PatchedFixture { + fn name(&self) -> &str { + "patched.vortex" + } + + fn description(&self) -> &str { + "A set of patches to apply on top of an inner array" + } + + fn build(&self) -> VortexResult { + let mut ctx = ExecutionCtx::new(VortexSession::empty()); + + let array = PrimitiveArray::from_option_iter((0u64..2048).map(Some)).into_array(); + let patches = Patches::new( + 2048, + 0, + PrimitiveArray::from_iter([0u32, 1024, 1025, 1026]).into_array(), + PrimitiveArray::from_option_iter([Some(1u64), Some(2), Some(3), Some(4)]).into_array(), + None, + )?; + + Ok(PatchedArray::from_array_and_patches(array, &patches, &mut ctx)?.into_array()) + } + + fn expected_encodings(&self) -> Vec { + vec![Patched.id()] + } +} From 047fca1c0c5ca83fcb99b965a55276bb99b3d690 Mon Sep 17 00:00:00 2001 From: Andrew Duffy Date: Fri, 27 Mar 2026 11:13:47 -0400 Subject: [PATCH 15/23] address Signed-off-by: Andrew Duffy --- vortex-array/src/arrays/patched/compute/filter.rs | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/vortex-array/src/arrays/patched/compute/filter.rs b/vortex-array/src/arrays/patched/compute/filter.rs index fb927f53999..258ffc4f0c0 100644 --- a/vortex-array/src/arrays/patched/compute/filter.rs +++ b/vortex-array/src/arrays/patched/compute/filter.rs @@ -18,15 +18,15 @@ impl FilterReduce for Patched { // // This is helpful when we have a very selective filter that is clustered to a small // range. - let (chunk_start, chunk_stop) = match mask.indices() { + let (chunk_start, chunk_stop) = match mask.slices() { AllOr::All | AllOr::None => { // This is handled as the precondition to this method, see the FilterReduce // documentation. unreachable!("mask must be a MaskValues here") } - AllOr::Some(indices) => { - let first = indices[0]; - let last = indices[indices.len() - 1]; + AllOr::Some(slices) => { + let (first, _) = slices[0]; + let (_, last) = slices[slices.len() - 1]; (first / 1024, last.div_ceil(1024)) } From 795a0a9f2f0467c532f3ca391ee7cd59e1ebc405 Mon Sep 17 00:00:00 2001 From: Andrew Duffy Date: Fri, 27 Mar 2026 11:29:58 -0400 Subject: [PATCH 16/23] add test for sliced take Signed-off-by: Andrew Duffy --- .../src/arrays/patched/compute/take.rs | 31 ++++++++++++++----- 1 file changed, 24 insertions(+), 7 deletions(-) diff --git a/vortex-array/src/arrays/patched/compute/take.rs b/vortex-array/src/arrays/patched/compute/take.rs index a40881ab843..cdc4184792b 100644 --- a/vortex-array/src/arrays/patched/compute/take.rs +++ b/vortex-array/src/arrays/patched/compute/take.rs @@ -100,7 +100,7 @@ fn take_map( let index = chunk * 1024 + patch_idx as usize; if index >= offset && index < offset + len { - index_map.insert(index, patch_value); + index_map.insert(index - offset, patch_value); } } } @@ -118,10 +118,13 @@ fn take_map( #[cfg(test)] mod tests { + use std::ops::Range; + use vortex_buffer::buffer; use vortex_error::VortexResult; use vortex_session::VortexSession; + use crate::ArrayRef; use crate::DynArray; use crate::ExecutionCtx; use crate::IntoArray; @@ -134,7 +137,8 @@ mod tests { base: &[u16], patch_indices: &[u32], patch_values: &[u16], - ) -> VortexResult { + slice: Range, + ) -> VortexResult { let values = PrimitiveArray::from_iter(base.iter().copied()).into_array(); let patches = Patches::new( base.len(), @@ -147,13 +151,13 @@ mod tests { let session = VortexSession::empty(); let mut ctx = ExecutionCtx::new(session); - PatchedArray::from_array_and_patches(values, &patches, &mut ctx) + PatchedArray::from_array_and_patches(values, &patches, &mut ctx)?.slice(slice) } #[test] fn test_take_basic() -> VortexResult<()> { // Array with base values [0, 0, 0, 0, 0] patched at indices [1, 3] with values [10, 30] - let array = make_patched_array(&[0; 5], &[1, 3], &[10, 30])?.into_array(); + let array = make_patched_array(&[0; 5], &[1, 3], &[10, 30], 0..5)?; // Take indices [0, 1, 2, 3, 4] - should get [0, 10, 0, 30, 0] let indices = buffer![0u32, 1, 2, 3, 4].into_array(); @@ -165,10 +169,23 @@ mod tests { Ok(()) } + #[test] + fn test_take_sliced() -> VortexResult<()> { + let array = make_patched_array(&[0; 10], &[1, 3], &[100, 200], 2..10)?; + + let indices = buffer![0u32, 1, 2, 3, 7].into_array(); + let result = array.take(indices)?.to_canonical()?.into_array(); + + let expected = PrimitiveArray::from_iter([0u16, 200, 0, 0, 0]).into_array(); + assert_arrays_eq!(expected, result); + + Ok(()) + } + #[test] fn test_take_out_of_order() -> VortexResult<()> { // Array with base values [0, 0, 0, 0, 0] patched at indices [1, 3] with values [10, 30] - let array = make_patched_array(&[0; 5], &[1, 3], &[10, 30])?.into_array(); + let array = make_patched_array(&[0; 5], &[1, 3], &[10, 30], 0..5)?; // Take indices in reverse order let indices = buffer![4u32, 3, 2, 1, 0].into_array(); @@ -183,7 +200,7 @@ mod tests { #[test] fn test_take_duplicates() -> VortexResult<()> { // Array with base values [0, 0, 0, 0, 0] patched at index [2] with value [99] - let array = make_patched_array(&[0; 5], &[2], &[99])?.into_array(); + let array = make_patched_array(&[0; 5], &[2], &[99], 0..5)?; // Take the same patched index multiple times let indices = buffer![2u32, 2, 0, 2].into_array(); @@ -204,7 +221,7 @@ mod tests { use crate::validity::Validity; // Array: 10 elements, base value 0, patches at indices 2, 5, 8 with values 20, 50, 80 - let array = make_patched_array(&[0; 10], &[2, 5, 8], &[20, 50, 80])?.into_array(); + let array = make_patched_array(&[0; 10], &[2, 5, 8], &[20, 50, 80], 0..10)?; // Take 10 indices, with nulls at positions 1, 4, 7 // Indices: [0, 2, 2, 5, 8, 0, 5, 8, 3, 1] From 3d173134aceab72099400cad4e097676dc986ca3 Mon Sep 17 00:00:00 2001 From: Andrew Duffy Date: Fri, 27 Mar 2026 11:35:40 -0400 Subject: [PATCH 17/23] add test for sliced append_to_builder Signed-off-by: Andrew Duffy --- vortex-array/src/arrays/patched/vtable/mod.rs | 39 ++++++++++++++++++- 1 file changed, 37 insertions(+), 2 deletions(-) diff --git a/vortex-array/src/arrays/patched/vtable/mod.rs b/vortex-array/src/arrays/patched/vtable/mod.rs index ea3a4529c4b..0105cffe236 100644 --- a/vortex-array/src/arrays/patched/vtable/mod.rs +++ b/vortex-array/src/arrays/patched/vtable/mod.rs @@ -199,7 +199,10 @@ impl VTable for Patched { let ptype = dtype.as_ptype(); let len = array.len(); - array.inner.append_to_builder(builder, ctx)?; + + // Slice the inner by its offset before appending it. + let sliced_inner = array.inner.slice(array.offset..array.offset + array.len)?; + sliced_inner.append_to_builder(builder, ctx)?; let offset = array.offset; let lane_offsets: Buffer = @@ -364,7 +367,7 @@ fn apply_patches_primitive( } let value = values[idx]; - output[index] = value; + output[index - offset] = value; } } } @@ -451,6 +454,38 @@ mod tests { assert_arrays_eq!(expected, result); } + #[test] + fn test_append_to_builder_sliced() { + let values = PrimitiveArray::new(buffer![0u16; 1024], Validity::NonNullable).into_array(); + let patches = Patches::new( + 1024, + 0, + buffer![1u32, 2, 3].into_array(), + buffer![10u16, 20, 30].into_array(), + None, + ) + .unwrap(); + + let session = VortexSession::empty(); + let mut ctx = ExecutionCtx::new(session); + + let array = PatchedArray::from_array_and_patches(values, &patches, &mut ctx) + .unwrap() + .slice(3..1024) + .unwrap(); + + let mut builder = builder_with_capacity(array.dtype(), array.len()); + array.append_to_builder(builder.as_mut(), &mut ctx).unwrap(); + + let result = builder.finish(); + + let mut expected = buffer_mut![0u16; 1021]; + expected[0] = 30; + let expected = expected.into_array(); + + assert_arrays_eq!(expected, result); + } + #[test] fn test_append_to_builder_with_validity() { // Create inner array with nulls at indices 0 and 5. From 8c624c5f7d2dfa74364b712872efa876a98c3174 Mon Sep 17 00:00:00 2001 From: Andrew Duffy Date: Fri, 27 Mar 2026 11:43:34 -0400 Subject: [PATCH 18/23] add test Signed-off-by: Andrew Duffy --- vortex-array/src/arrays/patched/vtable/mod.rs | 42 +++++++++++++++++-- 1 file changed, 39 insertions(+), 3 deletions(-) diff --git a/vortex-array/src/arrays/patched/vtable/mod.rs b/vortex-array/src/arrays/patched/vtable/mod.rs index 0105cffe236..ea543c5495d 100644 --- a/vortex-array/src/arrays/patched/vtable/mod.rs +++ b/vortex-array/src/arrays/patched/vtable/mod.rs @@ -301,11 +301,15 @@ impl VTable for Patched { let values = array.values.clone().execute::(ctx)?; let patched_values = match_each_native_ptype!(values.ptype(), |V| { - let mut output = Buffer::::from_byte_buffer(buffer.unwrap_host()).into_mut(); - let offset = array.offset; let len = array.len; + // Slice the buffer and validity from the offset. + let buffer = buffer.slice_typed::(offset..offset + len); + let validity = validity.slice(offset..offset + len)?; + + let mut output = Buffer::::from_byte_buffer(buffer.unwrap_host()).into_mut(); + apply_patches_primitive::( &mut output, offset, @@ -317,7 +321,7 @@ impl VTable for Patched { values.as_slice::(), ); - let output = output.freeze().slice(offset..offset + len); + let output = output.freeze(); PrimitiveArray::from_byte_buffer(output.into_byte_buffer(), ptype, validity) }); @@ -421,6 +425,38 @@ mod tests { assert_eq!(executed, expected.freeze()); } + #[test] + fn test_execute_sliced() { + let values = buffer![0u16; 1024].into_array(); + let patches = Patches::new( + 1024, + 0, + buffer![1u32, 2, 3].into_array(), + buffer![1u16; 3].into_array(), + None, + ) + .unwrap(); + + let session = VortexSession::empty(); + let mut ctx = ExecutionCtx::new(session); + + let array = PatchedArray::from_array_and_patches(values, &patches, &mut ctx) + .unwrap() + .slice(3..1024) + .unwrap(); + + let executed = array + .execute::(&mut ctx) + .unwrap() + .into_primitive() + .into_buffer::(); + + let mut expected = buffer_mut![0u16; 1021]; + expected[0] = 1; + + assert_eq!(executed, expected.freeze()); + } + #[test] fn test_append_to_builder_non_nullable() { let values = PrimitiveArray::new(buffer![0u16; 1024], Validity::NonNullable).into_array(); From 385f1ab1385411dc11d1c4c14ee85c3e0a565b45 Mon Sep 17 00:00:00 2001 From: Andrew Duffy Date: Fri, 27 Mar 2026 11:47:35 -0400 Subject: [PATCH 19/23] use fixture Signed-off-by: Andrew Duffy --- .../compat-gen/src/fixtures/arrays/synthetic/encodings/mod.rs | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/vortex-test/compat-gen/src/fixtures/arrays/synthetic/encodings/mod.rs b/vortex-test/compat-gen/src/fixtures/arrays/synthetic/encodings/mod.rs index 4dfc3a9ebf8..b6c18064e0f 100644 --- a/vortex-test/compat-gen/src/fixtures/arrays/synthetic/encodings/mod.rs +++ b/vortex-test/compat-gen/src/fixtures/arrays/synthetic/encodings/mod.rs @@ -38,11 +38,12 @@ pub fn fixtures() -> Vec> { Box::new(bytebool::ByteBoolFixture), Box::new(datetimeparts::DateTimePartsFixture), Box::new(decimal_byte_parts::DecimalBytePartsFixture), - // Reenable this once delta is stable + // Re-enable this once delta is stable // Box::new(delta::DeltaFixture), Box::new(dict::DictFixture), Box::new(fsst::FsstFixture), Box::new(for_::FoRFixture), + Box::new(patched::PatchedFixture), Box::new(pco::PcoFixture), Box::new(rle::RleFixture), Box::new(runend::RunEndFixture), From 7fd3372e5e9407eb214947544d309c647b62cd5a Mon Sep 17 00:00:00 2001 From: Andrew Duffy Date: Fri, 27 Mar 2026 11:49:26 -0400 Subject: [PATCH 20/23] rename + doc comment Signed-off-by: Andrew Duffy --- vortex-array/src/arrays/patched/array.rs | 8 +++++++- vortex-array/src/arrays/patched/vtable/operations.rs | 2 +- 2 files changed, 8 insertions(+), 2 deletions(-) diff --git a/vortex-array/src/arrays/patched/array.rs b/vortex-array/src/arrays/patched/array.rs index 5a2aa9fa03e..6fec8383a55 100644 --- a/vortex-array/src/arrays/patched/array.rs +++ b/vortex-array/src/arrays/patched/array.rs @@ -116,7 +116,13 @@ impl PatchedArray { } impl PatchedArray { - pub(crate) fn seek_to_lane(&self, chunk: usize, lane: usize) -> Range { + /// Get a range of indices that can be used to access the `indices` and `values` children + /// to retrieve all patches for a specified lane. + /// + /// # Panics + /// + /// Note that this function will panic if the caller requests out of bounds chunk/lane ordinals. + pub(crate) fn lane_range(&self, chunk: usize, lane: usize) -> Range { assert!(chunk < self.n_chunks); assert!(lane < self.n_lanes); diff --git a/vortex-array/src/arrays/patched/vtable/operations.rs b/vortex-array/src/arrays/patched/vtable/operations.rs index 314c311ba14..92ebbb54cac 100644 --- a/vortex-array/src/arrays/patched/vtable/operations.rs +++ b/vortex-array/src/arrays/patched/vtable/operations.rs @@ -28,7 +28,7 @@ impl OperationsVTable for Patched { let lane = (index + array.offset) % array.n_lanes; - let range = array.seek_to_lane(chunk, lane); + let range = array.lane_range(chunk, lane); // Get the range of indices corresponding to the lane, potentially decoding them to avoid // the overhead of repeated scalar_at calls. From 79705984afbaa8b5240db57ec6a1ec6384088164 Mon Sep 17 00:00:00 2001 From: Andrew Duffy Date: Fri, 27 Mar 2026 12:53:09 -0400 Subject: [PATCH 21/23] add Patched as default codec Signed-off-by: Andrew Duffy --- vortex-array/src/session/mod.rs | 4 +++- vortex-file/src/strategy.rs | 2 ++ 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/vortex-array/src/session/mod.rs b/vortex-array/src/session/mod.rs index 3c7c920e692..6fc5a9ea2e0 100644 --- a/vortex-array/src/session/mod.rs +++ b/vortex-array/src/session/mod.rs @@ -17,6 +17,7 @@ use crate::arrays::List; use crate::arrays::ListView; use crate::arrays::Masked; use crate::arrays::Null; +use crate::arrays::Patched; use crate::arrays::Primitive; use crate::arrays::Struct; use crate::arrays::VarBin; @@ -70,8 +71,9 @@ impl Default for ArraySession { // Register the utility encodings. this.register(Chunked); this.register(Constant); - this.register(Masked); this.register(List); + this.register(Masked); + this.register(Patched); this.register(VarBin); this diff --git a/vortex-file/src/strategy.rs b/vortex-file/src/strategy.rs index 4d6031a220c..dc29e7cb4f8 100644 --- a/vortex-file/src/strategy.rs +++ b/vortex-file/src/strategy.rs @@ -21,6 +21,7 @@ use vortex_array::arrays::List; use vortex_array::arrays::ListView; use vortex_array::arrays::Masked; use vortex_array::arrays::Null; +use vortex_array::arrays::Patched; use vortex_array::arrays::Primitive; use vortex_array::arrays::Struct; use vortex_array::arrays::VarBin; @@ -104,6 +105,7 @@ pub static ALLOWED_ENCODINGS: LazyLock = LazyLock::new(|| { session.register(Delta); session.register(FoR); session.register(FSST); + session.register(Patched); session.register(Pco); session.register(RLE); session.register(RunEnd); From 9f5ca5d54f959bb68ee46eb4f6e0881e8797e4a3 Mon Sep 17 00:00:00 2001 From: Andrew Duffy Date: Fri, 27 Mar 2026 13:25:38 -0400 Subject: [PATCH 22/23] save Signed-off-by: Andrew Duffy --- vortex-array/src/arrays/patched/array.rs | 5 +++ vortex-array/src/arrays/patched/vtable/mod.rs | 44 ++++++++++++++++--- 2 files changed, 43 insertions(+), 6 deletions(-) diff --git a/vortex-array/src/arrays/patched/array.rs b/vortex-array/src/arrays/patched/array.rs index 6fec8383a55..d7342d38a37 100644 --- a/vortex-array/src/arrays/patched/array.rs +++ b/vortex-array/src/arrays/patched/array.rs @@ -76,6 +76,11 @@ impl PatchedArray { "Creating PatchedArray from Patches only supported for primitive arrays" ); + vortex_ensure!( + patches.num_patches() <= u32::MAX as usize, + "PatchedArray does not support > u32::MAX patch values" + ); + let values_ptype = patches.dtype().as_ptype(); let TransposedPatches { diff --git a/vortex-array/src/arrays/patched/vtable/mod.rs b/vortex-array/src/arrays/patched/vtable/mod.rs index ea543c5495d..cf0545cea9e 100644 --- a/vortex-array/src/arrays/patched/vtable/mod.rs +++ b/vortex-array/src/arrays/patched/vtable/mod.rs @@ -14,6 +14,7 @@ use vortex_error::VortexExpect; use vortex_error::VortexResult; use vortex_error::vortex_bail; use vortex_error::vortex_ensure; +use vortex_error::vortex_err; use vortex_error::vortex_panic; use vortex_session::VortexSession; @@ -65,10 +66,29 @@ impl ValidityChild for Patched { #[derive(Clone, prost::Message)] pub struct PatchedMetadata { - #[prost(uint32, tag = "1")] - pub(crate) offset: u32, + /// Length of the `inner` child. + /// + /// This may not match the length of the wrapping PatchedArray, if for example + /// in a filter or slice it may be sliced to the nearest chunk boundary. + #[prost(uint64, tag = "1")] + pub(crate) inner_len: u64, + + /// Offset within the first chunk of `inner` where data begins. + /// + /// This may become nonzero after slicing. #[prost(uint32, tag = "2")] + pub(crate) offset: u32, + + /// Number of patches. This is the length of the `indices` and `values` children. + #[prost(uint32, tag = "3")] pub(crate) n_patches: u32, + + /// Number of lanes the patches get spread over. + /// + /// By default, this is either 16 or 32 depending on the width of the type, but may change + /// in the future, so we save it on write. + #[prost(uint32, tag = "4")] + pub(crate) n_lanes: u32, } impl VTable for Patched { @@ -158,8 +178,10 @@ impl VTable for Patched { #[allow(clippy::cast_possible_truncation)] fn metadata(array: &Self::Array) -> VortexResult { Ok(ProstMetadata(PatchedMetadata { + inner_len: array.inner.len() as u64, offset: array.offset as u32, n_patches: array.indices.len() as u32, + n_lanes: array.n_lanes as u32, })) } @@ -243,14 +265,24 @@ impl VTable for Patched { buffers: &[BufferHandle], children: &dyn ArrayChildren, ) -> VortexResult { - let n_chunks = len.div_ceil(1024); - let n_lanes = match_each_native_ptype!(dtype.as_ptype(), |P| { patch_lanes::

() }); + let inner_len = usize::try_from(metadata.inner_len).map_err(|_| { + vortex_err!( + "PatchedMetadata inner_len overflows usize: {}", + metadata.inner_len + ) + })?; + let offset = metadata.offset as usize; + + // n_chunks should correspond to the chunk in the `inner`. + // After slicing when offset > 0, there may be additional chunks. + let n_chunks = (len + offset).div_ceil(1024); + let n_lanes = metadata.n_lanes as usize; let &[lane_offsets] = &buffers else { vortex_bail!("invalid buffer count for PatchedArray"); }; - let inner = children.get(0, dtype, len)?; + let inner = children.get(0, dtype, inner_len)?; let indices = children.get(1, PType::U16.into(), metadata.n_patches as usize)?; let values = children.get(2, dtype, metadata.n_patches as usize)?; @@ -258,7 +290,7 @@ impl VTable for Patched { inner, n_chunks, n_lanes, - offset: metadata.offset as usize, + offset, len, lane_offsets: lane_offsets.clone(), indices, From a9ce27f505ad2b64b7d6f1665716f02c42e3b111 Mon Sep 17 00:00:00 2001 From: Andrew Duffy Date: Fri, 27 Mar 2026 15:33:05 -0400 Subject: [PATCH 23/23] fixup Signed-off-by: Andrew Duffy --- vortex-array/src/arrays/patched/compute/compare.rs | 5 +++-- vortex-array/src/arrays/patched/vtable/mod.rs | 9 +-------- vortex-array/src/arrays/patched/vtable/operations.rs | 2 +- vortex-array/src/arrays/patched/vtable/slice.rs | 12 ++++-------- 4 files changed, 9 insertions(+), 19 deletions(-) diff --git a/vortex-array/src/arrays/patched/compute/compare.rs b/vortex-array/src/arrays/patched/compute/compare.rs index 1fe0f4a90cb..be1fd1ef7f0 100644 --- a/vortex-array/src/arrays/patched/compute/compare.rs +++ b/vortex-array/src/arrays/patched/compute/compare.rs @@ -7,7 +7,6 @@ use vortex_error::VortexResult; use crate::ArrayRef; use crate::Canonical; -use crate::DynArray; use crate::ExecutionCtx; use crate::IntoArray; use crate::arrays::BoolArray; @@ -43,7 +42,6 @@ impl CompareKernel for Patched { // We slice the inner before performing the comparison. let result = lhs .inner - .slice(lhs.offset..lhs.offset + lhs.len)? .binary( ConstantArray::new(constant.clone(), lhs.len()).into_array(), operator.into(), @@ -142,6 +140,9 @@ impl ApplyPatches<'_, V> { continue; } let bit_index = bit_index - self.offset; + if bit_index >= self.bits.len() { + break; + } if cmp(patch_value, self.constant) { self.bits.set(bit_index) } else { diff --git a/vortex-array/src/arrays/patched/vtable/mod.rs b/vortex-array/src/arrays/patched/vtable/mod.rs index cf0545cea9e..62bc183e0d6 100644 --- a/vortex-array/src/arrays/patched/vtable/mod.rs +++ b/vortex-array/src/arrays/patched/vtable/mod.rs @@ -33,7 +33,6 @@ use crate::SerializeMetadata; use crate::arrays::PrimitiveArray; use crate::arrays::patched::PatchedArray; use crate::arrays::patched::compute::rules::PARENT_RULES; -use crate::arrays::patched::patch_lanes; use crate::arrays::patched::vtable::kernels::PARENT_KERNELS; use crate::arrays::primitive::PrimitiveArrayParts; use crate::buffer::BufferHandle; @@ -222,9 +221,7 @@ impl VTable for Patched { let len = array.len(); - // Slice the inner by its offset before appending it. - let sliced_inner = array.inner.slice(array.offset..array.offset + array.len)?; - sliced_inner.append_to_builder(builder, ctx)?; + array.inner.append_to_builder(builder, ctx)?; let offset = array.offset; let lane_offsets: Buffer = @@ -336,10 +333,6 @@ impl VTable for Patched { let offset = array.offset; let len = array.len; - // Slice the buffer and validity from the offset. - let buffer = buffer.slice_typed::(offset..offset + len); - let validity = validity.slice(offset..offset + len)?; - let mut output = Buffer::::from_byte_buffer(buffer.unwrap_host()).into_mut(); apply_patches_primitive::( diff --git a/vortex-array/src/arrays/patched/vtable/operations.rs b/vortex-array/src/arrays/patched/vtable/operations.rs index 92ebbb54cac..5fe3ccecb5e 100644 --- a/vortex-array/src/arrays/patched/vtable/operations.rs +++ b/vortex-array/src/arrays/patched/vtable/operations.rs @@ -47,7 +47,7 @@ impl OperationsVTable for Patched { } // Otherwise, access the underlying value. - array.inner.scalar_at(index + array.offset) + array.inner.scalar_at(index) } } diff --git a/vortex-array/src/arrays/patched/vtable/slice.rs b/vortex-array/src/arrays/patched/vtable/slice.rs index 98be8f0264e..c469bf9ece0 100644 --- a/vortex-array/src/arrays/patched/vtable/slice.rs +++ b/vortex-array/src/arrays/patched/vtable/slice.rs @@ -24,10 +24,7 @@ impl SliceReduce for Patched { let chunk_start = (range.start + array.offset) / 1024; let chunk_stop = (range.end + array.offset).div_ceil(1024); - // Slice the inner to chunk boundaries - let inner_start = chunk_start * 1024; - let inner_stop = (chunk_stop * 1024).min(array.inner.len()); - let inner = array.inner.slice(inner_start..inner_stop)?; + let inner = array.inner.slice(range.start..range.end)?; // Slice to only maintain offsets to the sliced chunks let sliced_lane_offsets = array @@ -78,12 +75,11 @@ mod tests { let values = buffer![0u16; 512].into_array(); let patch_indices = buffer![1u32, 8, 30].into_array(); let patch_values = buffer![u16::MAX; 3].into_array(); - let patches = Patches::new(512, 0, patch_indices, patch_values, None).unwrap(); + let patches = Patches::new(512, 0, patch_indices, patch_values, None)?; let mut ctx = ExecutionCtx::new(LEGACY_SESSION.clone()); - let patched_array = - PatchedArray::from_array_and_patches(values, &patches, &mut ctx).unwrap(); + let patched_array = PatchedArray::from_array_and_patches(values, &patches, &mut ctx)?; let sliced = patched_array.slice(1..10)?; @@ -91,7 +87,7 @@ mod tests { sliced.display_tree_encodings_only(), @r#" root: vortex.patched(u16, len=9) - inner: vortex.primitive(u16, len=512) + inner: vortex.primitive(u16, len=9) patch_indices: vortex.primitive(u16, len=3) patch_values: vortex.primitive(u16, len=3) "#);