diff --git a/vortex-array/public-api.lock b/vortex-array/public-api.lock index 821ef35e111..7eb9d049776 100644 --- a/vortex-array/public-api.lock +++ b/vortex-array/public-api.lock @@ -2044,6 +2044,10 @@ impl vortex_array::arrays::dict::TakeExecute for vortex_array::arrays::dict::Dic pub fn vortex_array::arrays::dict::Dict::take(array: &vortex_array::arrays::dict::DictArray, indices: &vortex_array::ArrayRef, _ctx: &mut vortex_array::ExecutionCtx) -> vortex_error::VortexResult> +impl vortex_array::arrays::dict::TakeExecute for vortex_array::arrays::patched::Patched + +pub fn vortex_array::arrays::patched::Patched::take(array: &Self::Array, indices: &vortex_array::ArrayRef, ctx: &mut vortex_array::ExecutionCtx) -> vortex_error::VortexResult> + pub trait vortex_array::arrays::dict::TakeReduce: vortex_array::vtable::VTable pub fn vortex_array::arrays::dict::TakeReduce::take(array: &Self::Array, indices: &vortex_array::ArrayRef) -> vortex_error::VortexResult> @@ -2442,6 +2446,10 @@ impl vortex_array::arrays::filter::FilterReduce for vortex_array::arrays::null:: pub fn vortex_array::arrays::null::Null::filter(_array: &vortex_array::arrays::null::NullArray, mask: &vortex_mask::Mask) -> vortex_error::VortexResult> +impl vortex_array::arrays::filter::FilterReduce for vortex_array::arrays::patched::Patched + +pub fn vortex_array::arrays::patched::Patched::filter(array: &Self::Array, mask: &vortex_mask::Mask) -> vortex_error::VortexResult> + pub mod vortex_array::arrays::fixed_size_list pub struct vortex_array::arrays::fixed_size_list::FixedSizeList @@ -3234,6 +3242,154 @@ impl vortex_array::IntoArray for vortex_array::arrays::null::NullArray pub fn vortex_array::arrays::null::NullArray::into_array(self) -> vortex_array::ArrayRef +pub mod vortex_array::arrays::patched + +pub struct vortex_array::arrays::patched::Patched + +impl core::clone::Clone for vortex_array::arrays::patched::Patched + +pub fn vortex_array::arrays::patched::Patched::clone(&self) -> vortex_array::arrays::patched::Patched + +impl core::fmt::Debug for vortex_array::arrays::patched::Patched + +pub fn vortex_array::arrays::patched::Patched::fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result + +impl vortex_array::arrays::dict::TakeExecute for vortex_array::arrays::patched::Patched + +pub fn vortex_array::arrays::patched::Patched::take(array: &Self::Array, indices: &vortex_array::ArrayRef, ctx: &mut vortex_array::ExecutionCtx) -> vortex_error::VortexResult> + +impl vortex_array::arrays::filter::FilterReduce for vortex_array::arrays::patched::Patched + +pub fn vortex_array::arrays::patched::Patched::filter(array: &Self::Array, mask: &vortex_mask::Mask) -> vortex_error::VortexResult> + +impl vortex_array::arrays::slice::SliceReduce for vortex_array::arrays::patched::Patched + +pub fn vortex_array::arrays::patched::Patched::slice(array: &Self::Array, range: core::ops::range::Range) -> vortex_error::VortexResult> + +impl vortex_array::scalar_fn::fns::binary::CompareKernel for vortex_array::arrays::patched::Patched + +pub fn vortex_array::arrays::patched::Patched::compare(lhs: &Self::Array, rhs: &vortex_array::ArrayRef, operator: vortex_array::scalar_fn::fns::operators::CompareOperator, ctx: &mut vortex_array::ExecutionCtx) -> vortex_error::VortexResult> + +impl vortex_array::vtable::OperationsVTable for vortex_array::arrays::patched::Patched + +pub fn vortex_array::arrays::patched::Patched::scalar_at(array: &vortex_array::arrays::patched::PatchedArray, index: usize, ctx: &mut vortex_array::ExecutionCtx) -> vortex_error::VortexResult + +impl vortex_array::vtable::VTable for vortex_array::arrays::patched::Patched + +pub type vortex_array::arrays::patched::Patched::Array = vortex_array::arrays::patched::PatchedArray + +pub type vortex_array::arrays::patched::Patched::Metadata = vortex_array::ProstMetadata + +pub type vortex_array::arrays::patched::Patched::OperationsVTable = vortex_array::arrays::patched::Patched + +pub type vortex_array::arrays::patched::Patched::ValidityVTable = vortex_array::vtable::ValidityVTableFromChild + +pub fn vortex_array::arrays::patched::Patched::append_to_builder(array: &Self::Array, builder: &mut dyn vortex_array::builders::ArrayBuilder, ctx: &mut vortex_array::ExecutionCtx) -> vortex_error::VortexResult<()> + +pub fn vortex_array::arrays::patched::Patched::array_eq(array: &Self::Array, other: &Self::Array, precision: vortex_array::Precision) -> bool + +pub fn vortex_array::arrays::patched::Patched::array_hash(array: &Self::Array, state: &mut H, precision: vortex_array::Precision) + +pub fn vortex_array::arrays::patched::Patched::buffer(array: &Self::Array, idx: usize) -> vortex_array::buffer::BufferHandle + +pub fn vortex_array::arrays::patched::Patched::buffer_name(_array: &Self::Array, idx: usize) -> core::option::Option + +pub fn vortex_array::arrays::patched::Patched::build(dtype: &vortex_array::dtype::DType, len: usize, metadata: &Self::Metadata, buffers: &[vortex_array::buffer::BufferHandle], children: &dyn vortex_array::serde::ArrayChildren) -> vortex_error::VortexResult + +pub fn vortex_array::arrays::patched::Patched::child(array: &Self::Array, idx: usize) -> vortex_array::ArrayRef + +pub fn vortex_array::arrays::patched::Patched::child_name(_array: &Self::Array, idx: usize) -> alloc::string::String + +pub fn vortex_array::arrays::patched::Patched::deserialize(bytes: &[u8], _dtype: &vortex_array::dtype::DType, _len: usize, _buffers: &[vortex_array::buffer::BufferHandle], _session: &vortex_session::VortexSession) -> vortex_error::VortexResult + +pub fn vortex_array::arrays::patched::Patched::dtype(array: &Self::Array) -> &vortex_array::dtype::DType + +pub fn vortex_array::arrays::patched::Patched::execute(array: alloc::sync::Arc>, ctx: &mut vortex_array::ExecutionCtx) -> vortex_error::VortexResult + +pub fn vortex_array::arrays::patched::Patched::execute_parent(array: &vortex_array::vtable::Array, parent: &vortex_array::ArrayRef, child_idx: usize, ctx: &mut vortex_array::ExecutionCtx) -> vortex_error::VortexResult> + +pub fn vortex_array::arrays::patched::Patched::id(&self) -> vortex_array::vtable::ArrayId + +pub fn vortex_array::arrays::patched::Patched::len(array: &Self::Array) -> usize + +pub fn vortex_array::arrays::patched::Patched::metadata(array: &Self::Array) -> vortex_error::VortexResult + +pub fn vortex_array::arrays::patched::Patched::nbuffers(_array: &Self::Array) -> usize + +pub fn vortex_array::arrays::patched::Patched::nchildren(_array: &Self::Array) -> usize + +pub fn vortex_array::arrays::patched::Patched::reduce(array: &vortex_array::vtable::Array) -> vortex_error::VortexResult> + +pub fn vortex_array::arrays::patched::Patched::reduce_parent(array: &vortex_array::vtable::Array, parent: &vortex_array::ArrayRef, child_idx: usize) -> vortex_error::VortexResult> + +pub fn vortex_array::arrays::patched::Patched::serialize(metadata: Self::Metadata) -> vortex_error::VortexResult>> + +pub fn vortex_array::arrays::patched::Patched::stats(array: &Self::Array) -> vortex_array::stats::StatsSetRef<'_> + +pub fn vortex_array::arrays::patched::Patched::vtable(_array: &Self::Array) -> &Self + +pub fn vortex_array::arrays::patched::Patched::with_children(array: &mut Self::Array, children: alloc::vec::Vec) -> vortex_error::VortexResult<()> + +impl vortex_array::vtable::ValidityChild for vortex_array::arrays::patched::Patched + +pub fn vortex_array::arrays::patched::Patched::validity_child(array: &vortex_array::arrays::patched::PatchedArray) -> &vortex_array::ArrayRef + +pub struct vortex_array::arrays::patched::PatchedArray + +impl vortex_array::arrays::patched::PatchedArray + +pub fn vortex_array::arrays::patched::PatchedArray::from_array_and_patches(inner: vortex_array::ArrayRef, patches: &vortex_array::patches::Patches, ctx: &mut vortex_array::ExecutionCtx) -> vortex_error::VortexResult + +impl vortex_array::arrays::patched::PatchedArray + +pub fn vortex_array::arrays::patched::PatchedArray::to_array(&self) -> vortex_array::ArrayRef + +impl core::clone::Clone for vortex_array::arrays::patched::PatchedArray + +pub fn vortex_array::arrays::patched::PatchedArray::clone(&self) -> vortex_array::arrays::patched::PatchedArray + +impl core::convert::AsRef for vortex_array::arrays::patched::PatchedArray + +pub fn vortex_array::arrays::patched::PatchedArray::as_ref(&self) -> &dyn vortex_array::DynArray + +impl core::convert::From for vortex_array::ArrayRef + +pub fn vortex_array::ArrayRef::from(value: vortex_array::arrays::patched::PatchedArray) -> vortex_array::ArrayRef + +impl core::fmt::Debug for vortex_array::arrays::patched::PatchedArray + +pub fn vortex_array::arrays::patched::PatchedArray::fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result + +impl core::ops::deref::Deref for vortex_array::arrays::patched::PatchedArray + +pub type vortex_array::arrays::patched::PatchedArray::Target = dyn vortex_array::DynArray + +pub fn vortex_array::arrays::patched::PatchedArray::deref(&self) -> &Self::Target + +impl vortex_array::IntoArray for vortex_array::arrays::patched::PatchedArray + +pub fn vortex_array::arrays::patched::PatchedArray::into_array(self) -> vortex_array::ArrayRef + +pub struct vortex_array::arrays::patched::PatchedMetadata + +impl core::clone::Clone for vortex_array::arrays::patched::PatchedMetadata + +pub fn vortex_array::arrays::patched::PatchedMetadata::clone(&self) -> vortex_array::arrays::patched::PatchedMetadata + +impl core::default::Default for vortex_array::arrays::patched::PatchedMetadata + +pub fn vortex_array::arrays::patched::PatchedMetadata::default() -> Self + +impl core::fmt::Debug for vortex_array::arrays::patched::PatchedMetadata + +pub fn vortex_array::arrays::patched::PatchedMetadata::fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result + +impl prost::message::Message for vortex_array::arrays::patched::PatchedMetadata + +pub fn vortex_array::arrays::patched::PatchedMetadata::clear(&mut self) + +pub fn vortex_array::arrays::patched::PatchedMetadata::encoded_len(&self) -> usize + pub mod vortex_array::arrays::primitive #[repr(transparent)] pub struct vortex_array::arrays::primitive::NativeValue(pub T) @@ -4084,6 +4240,10 @@ impl vortex_array::arrays::slice::SliceReduce for vortex_array::arrays::null::Nu pub fn vortex_array::arrays::null::Null::slice(_array: &Self::Array, range: core::ops::range::Range) -> vortex_error::VortexResult> +impl vortex_array::arrays::slice::SliceReduce for vortex_array::arrays::patched::Patched + +pub fn vortex_array::arrays::patched::Patched::slice(array: &Self::Array, range: core::ops::range::Range) -> vortex_error::VortexResult> + impl vortex_array::arrays::slice::SliceReduce for vortex_array::arrays::slice::Slice pub fn vortex_array::arrays::slice::Slice::slice(array: &Self::Array, range: core::ops::range::Range) -> vortex_error::VortexResult> @@ -6932,6 +7092,132 @@ impl vortex_array::IntoArray for vortex_array::arrays::null::NullArray pub fn vortex_array::arrays::null::NullArray::into_array(self) -> vortex_array::ArrayRef +pub struct vortex_array::arrays::Patched + +impl core::clone::Clone for vortex_array::arrays::patched::Patched + +pub fn vortex_array::arrays::patched::Patched::clone(&self) -> vortex_array::arrays::patched::Patched + +impl core::fmt::Debug for vortex_array::arrays::patched::Patched + +pub fn vortex_array::arrays::patched::Patched::fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result + +impl vortex_array::arrays::dict::TakeExecute for vortex_array::arrays::patched::Patched + +pub fn vortex_array::arrays::patched::Patched::take(array: &Self::Array, indices: &vortex_array::ArrayRef, ctx: &mut vortex_array::ExecutionCtx) -> vortex_error::VortexResult> + +impl vortex_array::arrays::filter::FilterReduce for vortex_array::arrays::patched::Patched + +pub fn vortex_array::arrays::patched::Patched::filter(array: &Self::Array, mask: &vortex_mask::Mask) -> vortex_error::VortexResult> + +impl vortex_array::arrays::slice::SliceReduce for vortex_array::arrays::patched::Patched + +pub fn vortex_array::arrays::patched::Patched::slice(array: &Self::Array, range: core::ops::range::Range) -> vortex_error::VortexResult> + +impl vortex_array::scalar_fn::fns::binary::CompareKernel for vortex_array::arrays::patched::Patched + +pub fn vortex_array::arrays::patched::Patched::compare(lhs: &Self::Array, rhs: &vortex_array::ArrayRef, operator: vortex_array::scalar_fn::fns::operators::CompareOperator, ctx: &mut vortex_array::ExecutionCtx) -> vortex_error::VortexResult> + +impl vortex_array::vtable::OperationsVTable for vortex_array::arrays::patched::Patched + +pub fn vortex_array::arrays::patched::Patched::scalar_at(array: &vortex_array::arrays::patched::PatchedArray, index: usize, ctx: &mut vortex_array::ExecutionCtx) -> vortex_error::VortexResult + +impl vortex_array::vtable::VTable for vortex_array::arrays::patched::Patched + +pub type vortex_array::arrays::patched::Patched::Array = vortex_array::arrays::patched::PatchedArray + +pub type vortex_array::arrays::patched::Patched::Metadata = vortex_array::ProstMetadata + +pub type vortex_array::arrays::patched::Patched::OperationsVTable = vortex_array::arrays::patched::Patched + +pub type vortex_array::arrays::patched::Patched::ValidityVTable = vortex_array::vtable::ValidityVTableFromChild + +pub fn vortex_array::arrays::patched::Patched::append_to_builder(array: &Self::Array, builder: &mut dyn vortex_array::builders::ArrayBuilder, ctx: &mut vortex_array::ExecutionCtx) -> vortex_error::VortexResult<()> + +pub fn vortex_array::arrays::patched::Patched::array_eq(array: &Self::Array, other: &Self::Array, precision: vortex_array::Precision) -> bool + +pub fn vortex_array::arrays::patched::Patched::array_hash(array: &Self::Array, state: &mut H, precision: vortex_array::Precision) + +pub fn vortex_array::arrays::patched::Patched::buffer(array: &Self::Array, idx: usize) -> vortex_array::buffer::BufferHandle + +pub fn vortex_array::arrays::patched::Patched::buffer_name(_array: &Self::Array, idx: usize) -> core::option::Option + +pub fn vortex_array::arrays::patched::Patched::build(dtype: &vortex_array::dtype::DType, len: usize, metadata: &Self::Metadata, buffers: &[vortex_array::buffer::BufferHandle], children: &dyn vortex_array::serde::ArrayChildren) -> vortex_error::VortexResult + +pub fn vortex_array::arrays::patched::Patched::child(array: &Self::Array, idx: usize) -> vortex_array::ArrayRef + +pub fn vortex_array::arrays::patched::Patched::child_name(_array: &Self::Array, idx: usize) -> alloc::string::String + +pub fn vortex_array::arrays::patched::Patched::deserialize(bytes: &[u8], _dtype: &vortex_array::dtype::DType, _len: usize, _buffers: &[vortex_array::buffer::BufferHandle], _session: &vortex_session::VortexSession) -> vortex_error::VortexResult + +pub fn vortex_array::arrays::patched::Patched::dtype(array: &Self::Array) -> &vortex_array::dtype::DType + +pub fn vortex_array::arrays::patched::Patched::execute(array: alloc::sync::Arc>, ctx: &mut vortex_array::ExecutionCtx) -> vortex_error::VortexResult + +pub fn vortex_array::arrays::patched::Patched::execute_parent(array: &vortex_array::vtable::Array, parent: &vortex_array::ArrayRef, child_idx: usize, ctx: &mut vortex_array::ExecutionCtx) -> vortex_error::VortexResult> + +pub fn vortex_array::arrays::patched::Patched::id(&self) -> vortex_array::vtable::ArrayId + +pub fn vortex_array::arrays::patched::Patched::len(array: &Self::Array) -> usize + +pub fn vortex_array::arrays::patched::Patched::metadata(array: &Self::Array) -> vortex_error::VortexResult + +pub fn vortex_array::arrays::patched::Patched::nbuffers(_array: &Self::Array) -> usize + +pub fn vortex_array::arrays::patched::Patched::nchildren(_array: &Self::Array) -> usize + +pub fn vortex_array::arrays::patched::Patched::reduce(array: &vortex_array::vtable::Array) -> vortex_error::VortexResult> + +pub fn vortex_array::arrays::patched::Patched::reduce_parent(array: &vortex_array::vtable::Array, parent: &vortex_array::ArrayRef, child_idx: usize) -> vortex_error::VortexResult> + +pub fn vortex_array::arrays::patched::Patched::serialize(metadata: Self::Metadata) -> vortex_error::VortexResult>> + +pub fn vortex_array::arrays::patched::Patched::stats(array: &Self::Array) -> vortex_array::stats::StatsSetRef<'_> + +pub fn vortex_array::arrays::patched::Patched::vtable(_array: &Self::Array) -> &Self + +pub fn vortex_array::arrays::patched::Patched::with_children(array: &mut Self::Array, children: alloc::vec::Vec) -> vortex_error::VortexResult<()> + +impl vortex_array::vtable::ValidityChild for vortex_array::arrays::patched::Patched + +pub fn vortex_array::arrays::patched::Patched::validity_child(array: &vortex_array::arrays::patched::PatchedArray) -> &vortex_array::ArrayRef + +pub struct vortex_array::arrays::PatchedArray + +impl vortex_array::arrays::patched::PatchedArray + +pub fn vortex_array::arrays::patched::PatchedArray::from_array_and_patches(inner: vortex_array::ArrayRef, patches: &vortex_array::patches::Patches, ctx: &mut vortex_array::ExecutionCtx) -> vortex_error::VortexResult + +impl vortex_array::arrays::patched::PatchedArray + +pub fn vortex_array::arrays::patched::PatchedArray::to_array(&self) -> vortex_array::ArrayRef + +impl core::clone::Clone for vortex_array::arrays::patched::PatchedArray + +pub fn vortex_array::arrays::patched::PatchedArray::clone(&self) -> vortex_array::arrays::patched::PatchedArray + +impl core::convert::AsRef for vortex_array::arrays::patched::PatchedArray + +pub fn vortex_array::arrays::patched::PatchedArray::as_ref(&self) -> &dyn vortex_array::DynArray + +impl core::convert::From for vortex_array::ArrayRef + +pub fn vortex_array::ArrayRef::from(value: vortex_array::arrays::patched::PatchedArray) -> vortex_array::ArrayRef + +impl core::fmt::Debug for vortex_array::arrays::patched::PatchedArray + +pub fn vortex_array::arrays::patched::PatchedArray::fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result + +impl core::ops::deref::Deref for vortex_array::arrays::patched::PatchedArray + +pub type vortex_array::arrays::patched::PatchedArray::Target = dyn vortex_array::DynArray + +pub fn vortex_array::arrays::patched::PatchedArray::deref(&self) -> &Self::Target + +impl vortex_array::IntoArray for vortex_array::arrays::patched::PatchedArray + +pub fn vortex_array::arrays::patched::PatchedArray::into_array(self) -> vortex_array::ArrayRef + pub struct vortex_array::arrays::Primitive impl vortex_array::arrays::Primitive @@ -9140,6 +9426,8 @@ pub fn vortex_array::builders::PrimitiveBuilder::uninit_range(&mut self, len: pub fn vortex_array::builders::PrimitiveBuilder::values(&self) -> &[T] +pub fn vortex_array::builders::PrimitiveBuilder::values_mut(&mut self) -> &mut [T] + pub fn vortex_array::builders::PrimitiveBuilder::with_capacity(nullability: vortex_array::dtype::Nullability, capacity: usize) -> Self impl vortex_array::builders::ArrayBuilder for vortex_array::builders::PrimitiveBuilder @@ -17262,6 +17550,10 @@ impl vortex_array::scalar_fn::fns::binary::CompareKernel for vortex_array::array pub fn vortex_array::arrays::dict::Dict::compare(lhs: &vortex_array::arrays::dict::DictArray, rhs: &vortex_array::ArrayRef, operator: vortex_array::scalar_fn::fns::operators::CompareOperator, ctx: &mut vortex_array::ExecutionCtx) -> vortex_error::VortexResult> +impl vortex_array::scalar_fn::fns::binary::CompareKernel for vortex_array::arrays::patched::Patched + +pub fn vortex_array::arrays::patched::Patched::compare(lhs: &Self::Array, rhs: &vortex_array::ArrayRef, operator: vortex_array::scalar_fn::fns::operators::CompareOperator, ctx: &mut vortex_array::ExecutionCtx) -> vortex_error::VortexResult> + pub fn vortex_array::scalar_fn::fns::binary::and_kleene(lhs: &vortex_array::ArrayRef, rhs: &vortex_array::ArrayRef) -> vortex_error::VortexResult pub fn vortex_array::scalar_fn::fns::binary::compare_nested_arrow_arrays(lhs: &dyn arrow_array::array::Array, rhs: &dyn arrow_array::array::Array, operator: vortex_array::scalar_fn::fns::operators::CompareOperator) -> vortex_error::VortexResult @@ -21844,6 +22136,62 @@ pub fn vortex_array::arrays::null::Null::vtable(_array: &Self::Array) -> &Self pub fn vortex_array::arrays::null::Null::with_children(_array: &mut Self::Array, children: alloc::vec::Vec) -> vortex_error::VortexResult<()> +impl vortex_array::vtable::VTable for vortex_array::arrays::patched::Patched + +pub type vortex_array::arrays::patched::Patched::Array = vortex_array::arrays::patched::PatchedArray + +pub type vortex_array::arrays::patched::Patched::Metadata = vortex_array::ProstMetadata + +pub type vortex_array::arrays::patched::Patched::OperationsVTable = vortex_array::arrays::patched::Patched + +pub type vortex_array::arrays::patched::Patched::ValidityVTable = vortex_array::vtable::ValidityVTableFromChild + +pub fn vortex_array::arrays::patched::Patched::append_to_builder(array: &Self::Array, builder: &mut dyn vortex_array::builders::ArrayBuilder, ctx: &mut vortex_array::ExecutionCtx) -> vortex_error::VortexResult<()> + +pub fn vortex_array::arrays::patched::Patched::array_eq(array: &Self::Array, other: &Self::Array, precision: vortex_array::Precision) -> bool + +pub fn vortex_array::arrays::patched::Patched::array_hash(array: &Self::Array, state: &mut H, precision: vortex_array::Precision) + +pub fn vortex_array::arrays::patched::Patched::buffer(array: &Self::Array, idx: usize) -> vortex_array::buffer::BufferHandle + +pub fn vortex_array::arrays::patched::Patched::buffer_name(_array: &Self::Array, idx: usize) -> core::option::Option + +pub fn vortex_array::arrays::patched::Patched::build(dtype: &vortex_array::dtype::DType, len: usize, metadata: &Self::Metadata, buffers: &[vortex_array::buffer::BufferHandle], children: &dyn vortex_array::serde::ArrayChildren) -> vortex_error::VortexResult + +pub fn vortex_array::arrays::patched::Patched::child(array: &Self::Array, idx: usize) -> vortex_array::ArrayRef + +pub fn vortex_array::arrays::patched::Patched::child_name(_array: &Self::Array, idx: usize) -> alloc::string::String + +pub fn vortex_array::arrays::patched::Patched::deserialize(bytes: &[u8], _dtype: &vortex_array::dtype::DType, _len: usize, _buffers: &[vortex_array::buffer::BufferHandle], _session: &vortex_session::VortexSession) -> vortex_error::VortexResult + +pub fn vortex_array::arrays::patched::Patched::dtype(array: &Self::Array) -> &vortex_array::dtype::DType + +pub fn vortex_array::arrays::patched::Patched::execute(array: alloc::sync::Arc>, ctx: &mut vortex_array::ExecutionCtx) -> vortex_error::VortexResult + +pub fn vortex_array::arrays::patched::Patched::execute_parent(array: &vortex_array::vtable::Array, parent: &vortex_array::ArrayRef, child_idx: usize, ctx: &mut vortex_array::ExecutionCtx) -> vortex_error::VortexResult> + +pub fn vortex_array::arrays::patched::Patched::id(&self) -> vortex_array::vtable::ArrayId + +pub fn vortex_array::arrays::patched::Patched::len(array: &Self::Array) -> usize + +pub fn vortex_array::arrays::patched::Patched::metadata(array: &Self::Array) -> vortex_error::VortexResult + +pub fn vortex_array::arrays::patched::Patched::nbuffers(_array: &Self::Array) -> usize + +pub fn vortex_array::arrays::patched::Patched::nchildren(_array: &Self::Array) -> usize + +pub fn vortex_array::arrays::patched::Patched::reduce(array: &vortex_array::vtable::Array) -> vortex_error::VortexResult> + +pub fn vortex_array::arrays::patched::Patched::reduce_parent(array: &vortex_array::vtable::Array, parent: &vortex_array::ArrayRef, child_idx: usize) -> vortex_error::VortexResult> + +pub fn vortex_array::arrays::patched::Patched::serialize(metadata: Self::Metadata) -> vortex_error::VortexResult>> + +pub fn vortex_array::arrays::patched::Patched::stats(array: &Self::Array) -> vortex_array::stats::StatsSetRef<'_> + +pub fn vortex_array::arrays::patched::Patched::vtable(_array: &Self::Array) -> &Self + +pub fn vortex_array::arrays::patched::Patched::with_children(array: &mut Self::Array, children: alloc::vec::Vec) -> vortex_error::VortexResult<()> + impl vortex_array::vtable::VTable for vortex_array::arrays::scalar_fn::ScalarFnVTable pub type vortex_array::arrays::scalar_fn::ScalarFnVTable::Array = vortex_array::arrays::scalar_fn::ScalarFnArray @@ -22064,6 +22412,10 @@ impl vortex_array::vtable::OperationsVTable fo pub fn vortex_array::arrays::null::Null::scalar_at(_array: &vortex_array::arrays::null::NullArray, _index: usize, _ctx: &mut vortex_array::ExecutionCtx) -> vortex_error::VortexResult +impl vortex_array::vtable::OperationsVTable for vortex_array::arrays::patched::Patched + +pub fn vortex_array::arrays::patched::Patched::scalar_at(array: &vortex_array::arrays::patched::PatchedArray, index: usize, ctx: &mut vortex_array::ExecutionCtx) -> vortex_error::VortexResult + impl vortex_array::vtable::OperationsVTable for vortex_array::arrays::scalar_fn::ScalarFnVTable pub fn vortex_array::arrays::scalar_fn::ScalarFnVTable::scalar_at(array: &vortex_array::arrays::scalar_fn::ScalarFnArray, index: usize, _ctx: &mut vortex_array::ExecutionCtx) -> vortex_error::VortexResult @@ -23140,6 +23492,62 @@ pub fn vortex_array::arrays::null::Null::vtable(_array: &Self::Array) -> &Self pub fn vortex_array::arrays::null::Null::with_children(_array: &mut Self::Array, children: alloc::vec::Vec) -> vortex_error::VortexResult<()> +impl vortex_array::vtable::VTable for vortex_array::arrays::patched::Patched + +pub type vortex_array::arrays::patched::Patched::Array = vortex_array::arrays::patched::PatchedArray + +pub type vortex_array::arrays::patched::Patched::Metadata = vortex_array::ProstMetadata + +pub type vortex_array::arrays::patched::Patched::OperationsVTable = vortex_array::arrays::patched::Patched + +pub type vortex_array::arrays::patched::Patched::ValidityVTable = vortex_array::vtable::ValidityVTableFromChild + +pub fn vortex_array::arrays::patched::Patched::append_to_builder(array: &Self::Array, builder: &mut dyn vortex_array::builders::ArrayBuilder, ctx: &mut vortex_array::ExecutionCtx) -> vortex_error::VortexResult<()> + +pub fn vortex_array::arrays::patched::Patched::array_eq(array: &Self::Array, other: &Self::Array, precision: vortex_array::Precision) -> bool + +pub fn vortex_array::arrays::patched::Patched::array_hash(array: &Self::Array, state: &mut H, precision: vortex_array::Precision) + +pub fn vortex_array::arrays::patched::Patched::buffer(array: &Self::Array, idx: usize) -> vortex_array::buffer::BufferHandle + +pub fn vortex_array::arrays::patched::Patched::buffer_name(_array: &Self::Array, idx: usize) -> core::option::Option + +pub fn vortex_array::arrays::patched::Patched::build(dtype: &vortex_array::dtype::DType, len: usize, metadata: &Self::Metadata, buffers: &[vortex_array::buffer::BufferHandle], children: &dyn vortex_array::serde::ArrayChildren) -> vortex_error::VortexResult + +pub fn vortex_array::arrays::patched::Patched::child(array: &Self::Array, idx: usize) -> vortex_array::ArrayRef + +pub fn vortex_array::arrays::patched::Patched::child_name(_array: &Self::Array, idx: usize) -> alloc::string::String + +pub fn vortex_array::arrays::patched::Patched::deserialize(bytes: &[u8], _dtype: &vortex_array::dtype::DType, _len: usize, _buffers: &[vortex_array::buffer::BufferHandle], _session: &vortex_session::VortexSession) -> vortex_error::VortexResult + +pub fn vortex_array::arrays::patched::Patched::dtype(array: &Self::Array) -> &vortex_array::dtype::DType + +pub fn vortex_array::arrays::patched::Patched::execute(array: alloc::sync::Arc>, ctx: &mut vortex_array::ExecutionCtx) -> vortex_error::VortexResult + +pub fn vortex_array::arrays::patched::Patched::execute_parent(array: &vortex_array::vtable::Array, parent: &vortex_array::ArrayRef, child_idx: usize, ctx: &mut vortex_array::ExecutionCtx) -> vortex_error::VortexResult> + +pub fn vortex_array::arrays::patched::Patched::id(&self) -> vortex_array::vtable::ArrayId + +pub fn vortex_array::arrays::patched::Patched::len(array: &Self::Array) -> usize + +pub fn vortex_array::arrays::patched::Patched::metadata(array: &Self::Array) -> vortex_error::VortexResult + +pub fn vortex_array::arrays::patched::Patched::nbuffers(_array: &Self::Array) -> usize + +pub fn vortex_array::arrays::patched::Patched::nchildren(_array: &Self::Array) -> usize + +pub fn vortex_array::arrays::patched::Patched::reduce(array: &vortex_array::vtable::Array) -> vortex_error::VortexResult> + +pub fn vortex_array::arrays::patched::Patched::reduce_parent(array: &vortex_array::vtable::Array, parent: &vortex_array::ArrayRef, child_idx: usize) -> vortex_error::VortexResult> + +pub fn vortex_array::arrays::patched::Patched::serialize(metadata: Self::Metadata) -> vortex_error::VortexResult>> + +pub fn vortex_array::arrays::patched::Patched::stats(array: &Self::Array) -> vortex_array::stats::StatsSetRef<'_> + +pub fn vortex_array::arrays::patched::Patched::vtable(_array: &Self::Array) -> &Self + +pub fn vortex_array::arrays::patched::Patched::with_children(array: &mut Self::Array, children: alloc::vec::Vec) -> vortex_error::VortexResult<()> + impl vortex_array::vtable::VTable for vortex_array::arrays::scalar_fn::ScalarFnVTable pub type vortex_array::arrays::scalar_fn::ScalarFnVTable::Array = vortex_array::arrays::scalar_fn::ScalarFnArray @@ -23260,6 +23668,10 @@ impl vortex_array::vtable::ValidityChild for vo pub fn vortex_array::arrays::Extension::validity_child(array: &vortex_array::arrays::ExtensionArray) -> &vortex_array::ArrayRef +impl vortex_array::vtable::ValidityChild for vortex_array::arrays::patched::Patched + +pub fn vortex_array::arrays::patched::Patched::validity_child(array: &vortex_array::arrays::patched::PatchedArray) -> &vortex_array::ArrayRef + pub trait vortex_array::vtable::ValidityChildSliceHelper pub fn vortex_array::vtable::ValidityChildSliceHelper::sliced_child_array(&self) -> vortex_error::VortexResult @@ -24508,6 +24920,10 @@ impl vortex_array::IntoArray for vortex_array::arrays::null::NullArray pub fn vortex_array::arrays::null::NullArray::into_array(self) -> vortex_array::ArrayRef +impl vortex_array::IntoArray for vortex_array::arrays::patched::PatchedArray + +pub fn vortex_array::arrays::patched::PatchedArray::into_array(self) -> vortex_array::ArrayRef + impl vortex_array::IntoArray for vortex_array::arrays::scalar_fn::ScalarFnArray pub fn vortex_array::arrays::scalar_fn::ScalarFnArray::into_array(self) -> vortex_array::ArrayRef diff --git a/vortex-array/src/arrays/mod.rs b/vortex-array/src/arrays/mod.rs index 43f8a84d49e..5abbcb84b85 100644 --- a/vortex-array/src/arrays/mod.rs +++ b/vortex-array/src/arrays/mod.rs @@ -66,6 +66,10 @@ pub mod null; pub use null::Null; pub use null::NullArray; +pub mod patched; +pub use patched::Patched; +pub use patched::PatchedArray; + pub mod primitive; pub use primitive::Primitive; pub use primitive::PrimitiveArray; diff --git a/vortex-array/src/arrays/patched/array.rs b/vortex-array/src/arrays/patched/array.rs new file mode 100644 index 00000000000..d7342d38a37 --- /dev/null +++ b/vortex-array/src/arrays/patched/array.rs @@ -0,0 +1,290 @@ +// SPDX-License-Identifier: Apache-2.0 +// SPDX-FileCopyrightText: Copyright the Vortex contributors + +use std::ops::Range; + +use vortex_buffer::Buffer; +use vortex_buffer::BufferMut; +use vortex_error::VortexResult; +use vortex_error::vortex_ensure; + +use crate::ArrayRef; +use crate::Canonical; +use crate::DynArray; +use crate::ExecutionCtx; +use crate::IntoArray; +use crate::arrays::PrimitiveArray; +use crate::arrays::patched::TransposedPatches; +use crate::arrays::patched::patch_lanes; +use crate::buffer::BufferHandle; +use crate::dtype::IntegerPType; +use crate::dtype::NativePType; +use crate::dtype::PType; +use crate::match_each_native_ptype; +use crate::match_each_unsigned_integer_ptype; +use crate::patches::Patches; +use crate::stats::ArrayStats; +use crate::validity::Validity; + +/// An array that partially "patches" another array with new values. +#[derive(Debug, Clone)] +pub struct PatchedArray { + /// The inner array that is being patched. This is the zeroth child. + pub(super) inner: ArrayRef, + + /// Number of 1024-element chunks. Pre-computed for convenience. + pub(super) n_chunks: usize, + + /// Number of lanes the patch indices and values have been split into. Each of the `n_chunks` + /// of 1024 values is split into `n_lanes` lanes horizontally, each lane having 1024 / n_lanes + /// values that might be patched. + pub(super) n_lanes: usize, + + /// Offset into the first chunk + pub(super) offset: usize, + /// Total length. + pub(super) len: usize, + + /// lane offsets. The PType of these MUST be u32 + pub(super) lane_offsets: BufferHandle, + /// indices within a 1024-element chunk. The PType of these MUST be u16 + pub(super) indices: ArrayRef, + /// patch values corresponding to the indices. The ptype is specified by `values_ptype`. + pub(super) values: ArrayRef, + + pub(super) stats_set: ArrayStats, +} + +impl PatchedArray { + /// Create a new `PatchedArray` from a child array and a set of [`Patches`]. + /// + /// # Errors + /// + /// The `inner` array must be primitive type, and it must have the same DType as the patches. + pub fn from_array_and_patches( + inner: ArrayRef, + patches: &Patches, + ctx: &mut ExecutionCtx, + ) -> VortexResult { + vortex_ensure!( + inner.dtype().eq_with_nullability_superset(patches.dtype()), + "array DType must match patches DType" + ); + + vortex_ensure!( + inner.dtype().is_primitive(), + "Creating PatchedArray from Patches only supported for primitive arrays" + ); + + vortex_ensure!( + patches.num_patches() <= u32::MAX as usize, + "PatchedArray does not support > u32::MAX patch values" + ); + + let values_ptype = patches.dtype().as_ptype(); + + let TransposedPatches { + n_chunks, + n_lanes, + lane_offsets, + indices, + values, + } = transpose_patches(patches, ctx)?; + + let indices = PrimitiveArray::from_buffer_handle( + BufferHandle::new_host(indices), + PType::U16, + Validity::NonNullable, + ) + .into_array(); + let values = PrimitiveArray::from_buffer_handle( + BufferHandle::new_host(values), + values_ptype, + Validity::NonNullable, + ) + .into_array(); + + let len = inner.len(); + + Ok(Self { + inner, + n_chunks, + n_lanes, + offset: 0, + len, + lane_offsets: BufferHandle::new_host(lane_offsets), + indices, + values, + stats_set: ArrayStats::default(), + }) + } +} + +impl PatchedArray { + /// Get a range of indices that can be used to access the `indices` and `values` children + /// to retrieve all patches for a specified lane. + /// + /// # Panics + /// + /// Note that this function will panic if the caller requests out of bounds chunk/lane ordinals. + pub(crate) fn lane_range(&self, chunk: usize, lane: usize) -> Range { + assert!(chunk < self.n_chunks); + assert!(lane < self.n_lanes); + + let lane_offsets = self.lane_offsets.as_host().reinterpret::(); + + let start = lane_offsets[chunk * self.n_lanes + lane] as usize; + let stop = lane_offsets[chunk * self.n_lanes + lane + 1] as usize; + + start..stop + } + + /// Slice the array to just the patches and inner values that are within the chunk range. + pub(crate) fn slice_chunks(&self, chunks: Range) -> VortexResult { + let lane_offsets_start = chunks.start * self.n_lanes; + let lane_offsets_stop = chunks.end * self.n_lanes + 1; + + let sliced_lane_offsets = self + .lane_offsets + .slice_typed::(lane_offsets_start..lane_offsets_stop); + let indices = self.indices.clone(); + let values = self.values.clone(); + + let begin = (chunks.start * 1024).max(self.offset); + let end = (chunks.end * 1024).min(self.len); + + let offset = begin % 1024; + + let inner = self.inner.slice(begin..end)?; + + let len = end - begin; + let n_chunks = (end - begin).div_ceil(1024); + + Ok(PatchedArray { + inner, + n_chunks, + n_lanes: self.n_lanes, + offset, + len, + indices, + values, + lane_offsets: sliced_lane_offsets, + stats_set: ArrayStats::default(), + }) + } +} + +/// Transpose a set of patches from the default sorted layout into the data parallel layout. +#[allow(clippy::cognitive_complexity)] +fn transpose_patches(patches: &Patches, ctx: &mut ExecutionCtx) -> VortexResult { + let array_len = patches.array_len(); + let offset = patches.offset(); + + let indices = patches + .indices() + .clone() + .execute::(ctx)? + .into_primitive(); + + let values = patches + .values() + .clone() + .execute::(ctx)? + .into_primitive(); + + let indices_ptype = indices.ptype(); + let values_ptype = values.ptype(); + + let indices = indices.buffer_handle().clone().unwrap_host(); + let values = values.buffer_handle().clone().unwrap_host(); + + match_each_unsigned_integer_ptype!(indices_ptype, |I| { + match_each_native_ptype!(values_ptype, |V| { + let indices: Buffer = Buffer::from_byte_buffer(indices); + let values: Buffer = Buffer::from_byte_buffer(values); + + Ok(transpose( + indices.as_slice(), + values.as_slice(), + offset, + array_len, + )) + }) + }) +} + +#[allow(clippy::cast_possible_truncation)] +fn transpose( + indices_in: &[I], + values_in: &[V], + offset: usize, + array_len: usize, +) -> TransposedPatches { + // Total number of slots is number of chunks times number of lanes. + let n_chunks = array_len.div_ceil(1024); + assert!( + n_chunks <= u32::MAX as usize, + "Cannot transpose patches for array with >= 4 trillion elements" + ); + + let n_lanes = patch_lanes::(); + + // We know upfront how many indices and values we'll have. + let mut indices_buffer = BufferMut::with_capacity(indices_in.len()); + let mut values_buffer = BufferMut::with_capacity(values_in.len()); + + // number of patches in each chunk. + let mut lane_offsets: BufferMut = BufferMut::zeroed(n_chunks * n_lanes + 1); + + // Scan the index/values once to get chunk/lane counts + for index in indices_in { + let index = index.as_() - offset; + let chunk = index / 1024; + let lane = index % n_lanes; + + lane_offsets[chunk * n_lanes + lane + 1] += 1; + } + + // Prefix-sum sizes -> offsets + for index in 1..lane_offsets.len() { + lane_offsets[index] += lane_offsets[index - 1]; + } + + // Loop over patches, writing them to final positions + let indices_out = indices_buffer.spare_capacity_mut(); + let values_out = values_buffer.spare_capacity_mut(); + for (index, &value) in std::iter::zip(indices_in, values_in) { + let index = index.as_() - offset; + let chunk = index / 1024; + let lane = index % n_lanes; + + let position = &mut lane_offsets[chunk * n_lanes + lane]; + indices_out[*position as usize].write((index % 1024) as u16); + values_out[*position as usize].write(value); + *position += 1; + } + + // SAFETY: we know there are exactly indices_in.len() indices/values, and we just + // set them to the appropriate values in the loop above. + unsafe { + indices_buffer.set_len(indices_in.len()); + values_buffer.set_len(values_in.len()); + } + + // Now, pass over all the indices and values again and subtract out the position increments. + for index in indices_in { + let index = index.as_() - offset; + let chunk = index / 1024; + let lane = index % n_lanes; + + lane_offsets[chunk * n_lanes + lane] -= 1; + } + + TransposedPatches { + n_chunks, + n_lanes, + lane_offsets: lane_offsets.freeze().into_byte_buffer(), + indices: indices_buffer.freeze().into_byte_buffer(), + values: values_buffer.freeze().into_byte_buffer(), + } +} diff --git a/vortex-array/src/arrays/patched/compute/compare.rs b/vortex-array/src/arrays/patched/compute/compare.rs new file mode 100644 index 00000000000..be1fd1ef7f0 --- /dev/null +++ b/vortex-array/src/arrays/patched/compute/compare.rs @@ -0,0 +1,302 @@ +// SPDX-License-Identifier: Apache-2.0 +// SPDX-FileCopyrightText: Copyright the Vortex contributors + +use vortex_buffer::BitBufferMut; +use vortex_error::VortexExpect; +use vortex_error::VortexResult; + +use crate::ArrayRef; +use crate::Canonical; +use crate::ExecutionCtx; +use crate::IntoArray; +use crate::arrays::BoolArray; +use crate::arrays::ConstantArray; +use crate::arrays::Patched; +use crate::arrays::PrimitiveArray; +use crate::arrays::bool::BoolArrayParts; +use crate::arrays::primitive::NativeValue; +use crate::builtins::ArrayBuiltins; +use crate::dtype::NativePType; +use crate::match_each_native_ptype; +use crate::scalar_fn::fns::binary::CompareKernel; +use crate::scalar_fn::fns::operators::CompareOperator; + +impl CompareKernel for Patched { + fn compare( + lhs: &Self::Array, + rhs: &ArrayRef, + operator: CompareOperator, + ctx: &mut ExecutionCtx, + ) -> VortexResult> { + // We only accelerate comparisons for primitives + if !lhs.dtype().is_primitive() { + return Ok(None); + } + + // We only accelerate comparisons against constants + let Some(constant) = rhs.as_constant() else { + return Ok(None); + }; + + // NOTE: due to offset, it's possible that the inner.len != array.len. + // We slice the inner before performing the comparison. + let result = lhs + .inner + .binary( + ConstantArray::new(constant.clone(), lhs.len()).into_array(), + operator.into(), + )? + .execute::(ctx)? + .into_bool(); + + let BoolArrayParts { + bits, + offset, + len, + validity, + } = result.into_parts(); + + let mut bits = BitBufferMut::from_buffer(bits.unwrap_host().into_mut(), offset, len); + + let lane_offsets = lhs.lane_offsets.as_host().reinterpret::(); + let indices = lhs.indices.clone().execute::(ctx)?; + let values = lhs.values.clone().execute::(ctx)?; + let n_lanes = lhs.n_lanes; + + match_each_native_ptype!(values.ptype(), |V| { + let offset = lhs.offset; + let indices = indices.as_slice::(); + let values = values.as_slice::(); + let constant = constant + .as_primitive() + .as_::() + .vortex_expect("compare constant not null"); + + let apply_patches = ApplyPatches { + bits: &mut bits, + offset, + n_lanes, + lane_offsets, + indices, + values, + constant, + }; + + match operator { + CompareOperator::Eq => { + apply_patches.apply(|l, r| NativeValue(l) == NativeValue(r))?; + } + CompareOperator::NotEq => { + apply_patches.apply(|l, r| NativeValue(l) != NativeValue(r))?; + } + CompareOperator::Gt => { + apply_patches.apply(|l, r| NativeValue(l) > NativeValue(r))?; + } + CompareOperator::Gte => { + apply_patches.apply(|l, r| NativeValue(l) >= NativeValue(r))?; + } + CompareOperator::Lt => { + apply_patches.apply(|l, r| NativeValue(l) < NativeValue(r))?; + } + CompareOperator::Lte => { + apply_patches.apply(|l, r| NativeValue(l) <= NativeValue(r))?; + } + } + }); + + let result = BoolArray::new(bits.freeze(), validity); + Ok(Some(result.into_array())) + } +} + +struct ApplyPatches<'a, V: NativePType> { + bits: &'a mut BitBufferMut, + offset: usize, + n_lanes: usize, + lane_offsets: &'a [u32], + indices: &'a [u16], + values: &'a [V], + constant: V, +} + +impl ApplyPatches<'_, V> { + fn apply(self, cmp: F) -> VortexResult<()> + where + F: Fn(V, V) -> bool, + { + for index in 0..(self.lane_offsets.len() - 1) { + let chunk = index / self.n_lanes; + + let lane_start = self.lane_offsets[index] as usize; + let lane_end = self.lane_offsets[index + 1] as usize; + + for (&patch_index, &patch_value) in std::iter::zip( + &self.indices[lane_start..lane_end], + &self.values[lane_start..lane_end], + ) { + let bit_index = chunk * 1024 + patch_index as usize; + // Skip any indices < the offset. + if bit_index < self.offset { + continue; + } + let bit_index = bit_index - self.offset; + if bit_index >= self.bits.len() { + break; + } + if cmp(patch_value, self.constant) { + self.bits.set(bit_index) + } else { + self.bits.unset(bit_index) + } + } + } + + Ok(()) + } +} + +#[cfg(test)] +mod tests { + use vortex_buffer::buffer; + use vortex_error::VortexResult; + + use crate::DynArray; + use crate::ExecutionCtx; + use crate::IntoArray; + use crate::LEGACY_SESSION; + use crate::arrays::BoolArray; + use crate::arrays::ConstantArray; + use crate::arrays::Patched; + use crate::arrays::PatchedArray; + use crate::arrays::PrimitiveArray; + use crate::assert_arrays_eq; + use crate::optimizer::ArrayOptimizer; + use crate::patches::Patches; + use crate::scalar_fn::fns::binary::CompareKernel; + use crate::scalar_fn::fns::operators::CompareOperator; + use crate::validity::Validity; + + #[test] + fn test_basic() { + let lhs = PrimitiveArray::from_iter(0u32..512).into_array(); + let patches = Patches::new( + 512, + 0, + buffer![509u16, 510, 511].into_array(), + buffer![u32::MAX; 3].into_array(), + None, + ) + .unwrap(); + + let mut ctx = ExecutionCtx::new(LEGACY_SESSION.clone()); + + let lhs = PatchedArray::from_array_and_patches(lhs, &patches, &mut ctx).unwrap(); + + let rhs = ConstantArray::new(u32::MAX, 512).into_array(); + + let result = ::compare(&lhs, &rhs, CompareOperator::Eq, &mut ctx) + .unwrap() + .unwrap(); + + let expected = + BoolArray::from_indices(512, [509, 510, 511], Validity::NonNullable).into_array(); + + assert_arrays_eq!(expected, result); + } + + #[test] + fn test_with_offset() { + let lhs = PrimitiveArray::from_iter(0u32..512).into_array(); + let patches = Patches::new( + 512, + 0, + buffer![5u16, 510, 511].into_array(), + buffer![u32::MAX; 3].into_array(), + None, + ) + .unwrap(); + + let mut ctx = ExecutionCtx::new(LEGACY_SESSION.clone()); + + let lhs = PatchedArray::from_array_and_patches(lhs, &patches, &mut ctx).unwrap(); + // Slice the array so that the first patch should be skipped. + let lhs = lhs + .slice(10..lhs.len()) + .unwrap() + .optimize() + .unwrap() + .try_into::() + .unwrap(); + + assert_eq!(lhs.len(), 502); + + let rhs = ConstantArray::new(u32::MAX, lhs.len()).into_array(); + + let result = ::compare(&lhs, &rhs, CompareOperator::Eq, &mut ctx) + .unwrap() + .unwrap(); + + let expected = BoolArray::from_indices(502, [500, 501], Validity::NonNullable).into_array(); + + assert_arrays_eq!(expected, result); + } + + #[test] + fn test_subnormal_f32() -> VortexResult<()> { + // Subnormal f32 values are smaller than f32::MIN_POSITIVE but greater than 0 + let subnormal: f32 = f32::MIN_POSITIVE / 2.0; + assert!(subnormal > 0.0 && subnormal < f32::MIN_POSITIVE); + + let lhs = PrimitiveArray::from_iter((0..512).map(|i| i as f32)).into_array(); + + let patches = Patches::new( + 512, + 0, + buffer![509u16, 510, 511].into_array(), + buffer![f32::NAN, subnormal, f32::NEG_INFINITY].into_array(), + None, + )?; + + let mut ctx = ExecutionCtx::new(LEGACY_SESSION.clone()); + let lhs = PatchedArray::from_array_and_patches(lhs, &patches, &mut ctx)?; + + let rhs = ConstantArray::new(subnormal, 512).into_array(); + + let result = + ::compare(&lhs, &rhs, CompareOperator::Eq, &mut ctx)? + .unwrap(); + + let expected = BoolArray::from_indices(512, [510], Validity::NonNullable).into_array(); + + assert_arrays_eq!(expected, result); + Ok(()) + } + + #[test] + fn test_pos_neg_zero() -> VortexResult<()> { + let lhs = PrimitiveArray::from_iter([-0.0f32; 10]).into_array(); + + let patches = Patches::new( + 10, + 0, + buffer![5u16, 6, 7, 8, 9].into_array(), + buffer![f32::NAN, f32::NEG_INFINITY, 0f32, -0.0f32, f32::INFINITY].into_array(), + None, + )?; + + let mut ctx = ExecutionCtx::new(LEGACY_SESSION.clone()); + let lhs = PatchedArray::from_array_and_patches(lhs, &patches, &mut ctx)?; + + let rhs = ConstantArray::new(0.0f32, 10).into_array(); + + let result = + ::compare(&lhs, &rhs, CompareOperator::Eq, &mut ctx)? + .unwrap(); + + let expected = BoolArray::from_indices(10, [7], Validity::NonNullable).into_array(); + + assert_arrays_eq!(expected, result); + + Ok(()) + } +} diff --git a/vortex-array/src/arrays/patched/compute/filter.rs b/vortex-array/src/arrays/patched/compute/filter.rs new file mode 100644 index 00000000000..258ffc4f0c0 --- /dev/null +++ b/vortex-array/src/arrays/patched/compute/filter.rs @@ -0,0 +1,183 @@ +// SPDX-License-Identifier: Apache-2.0 +// SPDX-FileCopyrightText: Copyright the Vortex contributors + +use vortex_error::VortexResult; +use vortex_mask::AllOr; +use vortex_mask::Mask; + +use crate::ArrayRef; +use crate::IntoArray; +use crate::arrays::FilterArray; +use crate::arrays::Patched; +use crate::arrays::filter::FilterReduce; + +impl FilterReduce for Patched { + fn filter(array: &Self::Array, mask: &Mask) -> VortexResult> { + // Find the contiguous chunk range that the mask covers. We use this to slice the inner + // components, then wrap the rest up with another FilterArray. + // + // This is helpful when we have a very selective filter that is clustered to a small + // range. + let (chunk_start, chunk_stop) = match mask.slices() { + AllOr::All | AllOr::None => { + // This is handled as the precondition to this method, see the FilterReduce + // documentation. + unreachable!("mask must be a MaskValues here") + } + AllOr::Some(slices) => { + let (first, _) = slices[0]; + let (_, last) = slices[slices.len() - 1]; + + (first / 1024, last.div_ceil(1024)) + } + }; + + // If all chunks already covered, there is nothing to do. + if chunk_start == 0 && chunk_stop == array.n_chunks { + return Ok(None); + } + + let sliced = array.slice_chunks(chunk_start..chunk_stop)?; + + let slice_start = chunk_start * 1024; + let slice_end = (chunk_stop * 1024).min(array.len()); + let remainder = mask.slice(slice_start..slice_end); + + Ok(Some( + FilterArray::new(sliced.into_array(), remainder).into_array(), + )) + } +} + +#[cfg(test)] +mod tests { + use vortex_buffer::buffer; + use vortex_error::VortexResult; + use vortex_mask::Mask; + + use crate::DynArray; + use crate::ExecutionCtx; + use crate::IntoArray; + use crate::LEGACY_SESSION; + use crate::arrays::FilterArray; + use crate::arrays::PatchedArray; + use crate::arrays::PrimitiveArray; + use crate::assert_arrays_eq; + use crate::optimizer::ArrayOptimizer; + use crate::patches::Patches; + + #[test] + fn test_filter_noop() -> VortexResult<()> { + let array = buffer![u16::MIN; 5].into_array(); + let patched_indices = buffer![3u8, 4].into_array(); + let patched_values = buffer![u16::MAX; 2].into_array(); + + let patches = Patches::new(5, 0, patched_indices, patched_values, None)?; + + let mut ctx = ExecutionCtx::new(LEGACY_SESSION.clone()); + + let array = PatchedArray::from_array_and_patches(array, &patches, &mut ctx)?.into_array(); + + let filtered = FilterArray::new( + array.clone(), + Mask::from_iter([true, false, false, false, true]), + ) + .into_array(); + + let reduced = array.vtable().reduce_parent(&array, &filtered, 0)?; + + // Filter does not get pushed through to child because it does not prune any chunks. + assert!(reduced.is_none()); + + Ok(()) + } + + #[test] + fn test_filter_basic() -> VortexResult<()> { + // Basic test: filter with mask that crosses boundaries. + let mut ctx = ExecutionCtx::new(LEGACY_SESSION.clone()); + + let array = buffer![u16::MIN; 4096].into_array(); + let patched_indices = buffer![1024u16, 1025].into_array(); + let patched_values = buffer![u16::MAX, u16::MAX].into_array(); + + let patches = Patches::new(4096, 0, patched_indices, patched_values, None)?; + + let array = PatchedArray::from_array_and_patches(array, &patches, &mut ctx)?.into_array(); + + // Filter that only touches the middle 2 chunks + let mask = Mask::from_indices(4096, vec![1024, 1025, 3000]); + + let filtered = FilterArray::new(array.clone(), mask).into_array(); + let reduced = array.vtable().reduce_parent(&array, &filtered, 0)?; + + let expected = PrimitiveArray::from_iter([u16::MAX, u16::MAX, u16::MIN]).into_array(); + + assert_arrays_eq!(expected, reduced.unwrap()); + + Ok(()) + } + + #[test] + fn test_filter_complex() -> VortexResult<()> { + // Basic test: filter with mask that crosses boundaries. + let mut ctx = ExecutionCtx::new(LEGACY_SESSION.clone()); + + let array = buffer![u16::MIN; 4096].into_array(); + let patched_indices = buffer![1024u16, 1025].into_array(); + let patched_values = buffer![u16::MAX, u16::MAX].into_array(); + + let patches = Patches::new(4096, 1, patched_indices, patched_values, None)?; + + let array = PatchedArray::from_array_and_patches(array, &patches, &mut ctx)?.into_array(); + + // Filter that only touches the middle 2 chunks + let mask = Mask::from_indices(4096, vec![1024, 1025, 3000]); + + let filtered = FilterArray::new(array.clone(), mask).into_array(); + let reduced = array.vtable().reduce_parent(&array, &filtered, 0)?; + + let expected = PrimitiveArray::from_iter([u16::MAX, u16::MIN, u16::MIN]).into_array(); + + assert_arrays_eq!(expected, reduced.unwrap()); + + Ok(()) + } + + #[test] + fn test_filter_sliced() -> VortexResult<()> { + // Test filter on a sliced PatchedArray to exercise codepath where offset > 0. + let mut ctx = ExecutionCtx::new(LEGACY_SESSION.clone()); + + // Create a larger array (6 chunks) so we can slice and still have room + // for the filter to prune chunks. + let array = buffer![u16::MIN; 6144].into_array(); + // Patches at indices 2048 and 2049 (start of chunk 2). + let patched_indices = buffer![2048u16, 2049].into_array(); + let patched_values = buffer![u16::MAX, u16::MAX].into_array(); + + let patches = Patches::new(6144, 0, patched_indices, patched_values, None)?; + + let patched = PatchedArray::from_array_and_patches(array, &patches, &mut ctx)?; + + // Slice at chunk boundary to create offset > 0. After slicing [1024..5120], + // we have 4096 elements and patches are at relative indices 1024 and 1025. + let sliced = patched.slice(1024..5120)?.into_array(); + assert_eq!(sliced.len(), 4096); + + // Filter that only touches the middle 2 chunks (chunks 1 and 2). + // Indices 1024 and 1025 fall in chunk 1, and 3000 falls in chunk 2. + let mask = Mask::from_indices(4096, vec![1024, 1025, 3000]); + + let filtered = sliced + .filter(mask)? + .optimize()? + .execute::(&mut ctx)?; + + let expected = PrimitiveArray::from_iter([u16::MAX, u16::MAX, u16::MIN]); + + assert_arrays_eq!(expected, filtered); + + Ok(()) + } +} diff --git a/vortex-array/src/arrays/patched/compute/mod.rs b/vortex-array/src/arrays/patched/compute/mod.rs new file mode 100644 index 00000000000..8634a22f90b --- /dev/null +++ b/vortex-array/src/arrays/patched/compute/mod.rs @@ -0,0 +1,7 @@ +// SPDX-License-Identifier: Apache-2.0 +// SPDX-FileCopyrightText: Copyright the Vortex contributors + +mod compare; +mod filter; +pub(crate) mod rules; +mod take; diff --git a/vortex-array/src/arrays/patched/compute/rules.rs b/vortex-array/src/arrays/patched/compute/rules.rs new file mode 100644 index 00000000000..3ecb25c1efa --- /dev/null +++ b/vortex-array/src/arrays/patched/compute/rules.rs @@ -0,0 +1,12 @@ +// SPDX-License-Identifier: Apache-2.0 +// SPDX-FileCopyrightText: Copyright the Vortex contributors + +use crate::arrays::Patched; +use crate::arrays::filter::FilterReduceAdaptor; +use crate::arrays::slice::SliceReduceAdaptor; +use crate::optimizer::rules::ParentRuleSet; + +pub(crate) const PARENT_RULES: ParentRuleSet = ParentRuleSet::new(&[ + ParentRuleSet::lift(&FilterReduceAdaptor(Patched)), + ParentRuleSet::lift(&SliceReduceAdaptor(Patched)), +]); diff --git a/vortex-array/src/arrays/patched/compute/take.rs b/vortex-array/src/arrays/patched/compute/take.rs new file mode 100644 index 00000000000..cdc4184792b --- /dev/null +++ b/vortex-array/src/arrays/patched/compute/take.rs @@ -0,0 +1,260 @@ +// SPDX-License-Identifier: Apache-2.0 +// SPDX-FileCopyrightText: Copyright the Vortex contributors + +use rustc_hash::FxHashMap; +use vortex_buffer::Buffer; +use vortex_error::VortexResult; + +use crate::ArrayRef; +use crate::DynArray; +use crate::ExecutionCtx; +use crate::IntoArray; +use crate::arrays::Patched; +use crate::arrays::PrimitiveArray; +use crate::arrays::dict::TakeExecute; +use crate::arrays::primitive::PrimitiveArrayParts; +use crate::dtype::IntegerPType; +use crate::dtype::NativePType; +use crate::match_each_native_ptype; +use crate::match_each_unsigned_integer_ptype; + +impl TakeExecute for Patched { + fn take( + array: &Self::Array, + indices: &ArrayRef, + ctx: &mut ExecutionCtx, + ) -> VortexResult> { + // Only pushdown take when we have primitive types. + if !array.dtype().is_primitive() { + return Ok(None); + } + + // Perform take on the inner array, including the placeholders. + let inner = array + .inner + .take(indices.clone())? + .execute::(ctx)?; + + let PrimitiveArrayParts { + buffer, + validity, + ptype, + } = inner.into_parts(); + + let indices_ptype = indices.dtype().as_ptype(); + + match_each_unsigned_integer_ptype!(indices_ptype, |I| { + match_each_native_ptype!(ptype, |V| { + let indices = indices.clone().execute::(ctx)?; + let patch_indices = array.indices.clone().execute::(ctx)?; + let patch_values = array.values.clone().execute::(ctx)?; + let mut output = Buffer::::from_byte_buffer(buffer.unwrap_host()).into_mut(); + take_map( + output.as_mut(), + indices.as_slice::(), + array.offset, + array.len, + array.n_chunks, + array.n_lanes, + array.lane_offsets.as_host().reinterpret::(), + patch_indices.as_slice::(), + patch_values.as_slice::(), + ); + + // SAFETY: output and validity still have same length after take_map returns. + unsafe { + Ok(Some( + PrimitiveArray::new_unchecked(output.freeze(), validity).into_array(), + )) + } + }) + }) + } +} + +/// Take patches for the given `indices` and apply them onto an `output` using a hash map. +/// +/// First, builds a hashmap from index to patch value, then uses the hashmap in a loop to collect +/// the values. +#[allow(clippy::too_many_arguments)] +fn take_map( + output: &mut [V], + indices: &[I], + offset: usize, + len: usize, + n_chunks: usize, + n_lanes: usize, + lane_offsets: &[u32], + patch_index: &[u16], + patch_value: &[V], +) { + // Build a hashmap of patch_index -> values. + let mut index_map = FxHashMap::with_capacity_and_hasher(indices.len(), Default::default()); + for chunk in 0..n_chunks { + for lane in 0..n_lanes { + let lane_start = lane_offsets[chunk * n_lanes + lane]; + let lane_end = lane_offsets[chunk * n_lanes + lane + 1]; + for i in lane_start..lane_end { + let patch_idx = patch_index[i as usize]; + let patch_value = patch_value[i as usize]; + + let index = chunk * 1024 + patch_idx as usize; + if index >= offset && index < offset + len { + index_map.insert(index - offset, patch_value); + } + } + } + } + + // Now, iterate the take indices using the prebuilt hashmap. + // Undefined/null indices will miss the hash map, which we can ignore. + for (output_index, index) in indices.iter().enumerate() { + let index = index.as_(); + if let Some(&patch_value) = index_map.get(&index) { + output[output_index] = patch_value; + } + } +} + +#[cfg(test)] +mod tests { + use std::ops::Range; + + use vortex_buffer::buffer; + use vortex_error::VortexResult; + use vortex_session::VortexSession; + + use crate::ArrayRef; + use crate::DynArray; + use crate::ExecutionCtx; + use crate::IntoArray; + use crate::arrays::PatchedArray; + use crate::arrays::PrimitiveArray; + use crate::assert_arrays_eq; + use crate::patches::Patches; + + fn make_patched_array( + base: &[u16], + patch_indices: &[u32], + patch_values: &[u16], + slice: Range, + ) -> VortexResult { + let values = PrimitiveArray::from_iter(base.iter().copied()).into_array(); + let patches = Patches::new( + base.len(), + 0, + PrimitiveArray::from_iter(patch_indices.iter().copied()).into_array(), + PrimitiveArray::from_iter(patch_values.iter().copied()).into_array(), + None, + )?; + + let session = VortexSession::empty(); + let mut ctx = ExecutionCtx::new(session); + + PatchedArray::from_array_and_patches(values, &patches, &mut ctx)?.slice(slice) + } + + #[test] + fn test_take_basic() -> VortexResult<()> { + // Array with base values [0, 0, 0, 0, 0] patched at indices [1, 3] with values [10, 30] + let array = make_patched_array(&[0; 5], &[1, 3], &[10, 30], 0..5)?; + + // Take indices [0, 1, 2, 3, 4] - should get [0, 10, 0, 30, 0] + let indices = buffer![0u32, 1, 2, 3, 4].into_array(); + let result = array.take(indices)?.to_canonical()?.into_array(); + + let expected = PrimitiveArray::from_iter([0u16, 10, 0, 30, 0]).into_array(); + assert_arrays_eq!(expected, result); + + Ok(()) + } + + #[test] + fn test_take_sliced() -> VortexResult<()> { + let array = make_patched_array(&[0; 10], &[1, 3], &[100, 200], 2..10)?; + + let indices = buffer![0u32, 1, 2, 3, 7].into_array(); + let result = array.take(indices)?.to_canonical()?.into_array(); + + let expected = PrimitiveArray::from_iter([0u16, 200, 0, 0, 0]).into_array(); + assert_arrays_eq!(expected, result); + + Ok(()) + } + + #[test] + fn test_take_out_of_order() -> VortexResult<()> { + // Array with base values [0, 0, 0, 0, 0] patched at indices [1, 3] with values [10, 30] + let array = make_patched_array(&[0; 5], &[1, 3], &[10, 30], 0..5)?; + + // Take indices in reverse order + let indices = buffer![4u32, 3, 2, 1, 0].into_array(); + let result = array.take(indices)?.to_canonical()?.into_array(); + + let expected = PrimitiveArray::from_iter([0u16, 30, 0, 10, 0]).into_array(); + assert_arrays_eq!(expected, result); + + Ok(()) + } + + #[test] + fn test_take_duplicates() -> VortexResult<()> { + // Array with base values [0, 0, 0, 0, 0] patched at index [2] with value [99] + let array = make_patched_array(&[0; 5], &[2], &[99], 0..5)?; + + // Take the same patched index multiple times + let indices = buffer![2u32, 2, 0, 2].into_array(); + let result = array.take(indices)?.to_canonical()?.into_array(); + + // execute the array. + let _canonical = result.to_canonical()?.into_primitive(); + + let expected = PrimitiveArray::from_iter([99u16, 99, 0, 99]).into_array(); + assert_arrays_eq!(expected, result); + + Ok(()) + } + + #[test] + fn test_take_with_null_indices() -> VortexResult<()> { + use crate::arrays::BoolArray; + use crate::validity::Validity; + + // Array: 10 elements, base value 0, patches at indices 2, 5, 8 with values 20, 50, 80 + let array = make_patched_array(&[0; 10], &[2, 5, 8], &[20, 50, 80], 0..10)?; + + // Take 10 indices, with nulls at positions 1, 4, 7 + // Indices: [0, 2, 2, 5, 8, 0, 5, 8, 3, 1] + // Nulls: [ , , N, , , N, , , N, ] + // Position 2 (index=2, patched) is null + // Position 5 (index=0, unpatched) is null + // Position 8 (index=3, unpatched) is null + let indices = PrimitiveArray::new( + buffer![0u32, 2, 2, 5, 8, 0, 5, 8, 3, 1], + Validity::Array( + BoolArray::from_iter([ + true, true, false, true, true, false, true, true, false, true, + ]) + .into_array(), + ), + ); + let result = array + .take(indices.into_array())? + .to_canonical()? + .into_array(); + + // Expected: [0, 20, null, 50, 80, null, 50, 80, null, 0] + let expected = PrimitiveArray::new( + buffer![0u16, 20, 0, 50, 80, 0, 50, 80, 0, 0], + Validity::Array( + BoolArray::from_iter([ + true, true, false, true, true, false, true, true, false, true, + ]) + .into_array(), + ), + ); + assert_arrays_eq!(expected.into_array(), result); + + Ok(()) + } +} diff --git a/vortex-array/src/arrays/patched/mod.rs b/vortex-array/src/arrays/patched/mod.rs new file mode 100644 index 00000000000..32edda880c8 --- /dev/null +++ b/vortex-array/src/arrays/patched/mod.rs @@ -0,0 +1,33 @@ +// SPDX-License-Identifier: Apache-2.0 +// SPDX-FileCopyrightText: Copyright the Vortex contributors + +mod array; +mod compute; +mod vtable; + +pub use array::*; +use vortex_buffer::ByteBuffer; +pub use vtable::*; + +/// Patches that have been transposed into GPU format. +struct TransposedPatches { + n_chunks: usize, + n_lanes: usize, + lane_offsets: ByteBuffer, + indices: ByteBuffer, + values: ByteBuffer, +} + +/// Number of lanes used at patch time for a value of type `V`. +/// +/// This is *NOT* equal to the number of FastLanes lanes for the type `V`, rather this is going to +/// correspond to how many "lanes" we will end up copying data on. +/// +/// When applied on the CPU, this configuration doesn't really matter. On the GPU, it is based +/// on the number of patches involved here. +const fn patch_lanes() -> usize { + // For types 32-bits or smaller, we use a 32 lane configuration, and for 64-bit we use 16 lanes. + // This matches up with the number of lanes we use to execute copying results from bit-unpacking + // from shared to global memory. + if size_of::() < 8 { 32 } else { 16 } +} diff --git a/vortex-array/src/arrays/patched/vtable/kernels.rs b/vortex-array/src/arrays/patched/vtable/kernels.rs new file mode 100644 index 00000000000..7994b19e02e --- /dev/null +++ b/vortex-array/src/arrays/patched/vtable/kernels.rs @@ -0,0 +1,12 @@ +// SPDX-License-Identifier: Apache-2.0 +// SPDX-FileCopyrightText: Copyright the Vortex contributors + +use crate::arrays::Patched; +use crate::arrays::dict::TakeExecuteAdaptor; +use crate::kernel::ParentKernelSet; +use crate::scalar_fn::fns::binary::CompareExecuteAdaptor; + +pub(super) const PARENT_KERNELS: ParentKernelSet = ParentKernelSet::new(&[ + ParentKernelSet::lift(&CompareExecuteAdaptor(Patched)), + ParentKernelSet::lift(&TakeExecuteAdaptor(Patched)), +]); diff --git a/vortex-array/src/arrays/patched/vtable/mod.rs b/vortex-array/src/arrays/patched/vtable/mod.rs new file mode 100644 index 00000000000..62bc183e0d6 --- /dev/null +++ b/vortex-array/src/arrays/patched/vtable/mod.rs @@ -0,0 +1,595 @@ +// SPDX-License-Identifier: Apache-2.0 +// SPDX-FileCopyrightText: Copyright the Vortex contributors + +mod kernels; +mod operations; +mod slice; + +use std::hash::Hash; +use std::hash::Hasher; +use std::sync::Arc; + +use vortex_buffer::Buffer; +use vortex_error::VortexExpect; +use vortex_error::VortexResult; +use vortex_error::vortex_bail; +use vortex_error::vortex_ensure; +use vortex_error::vortex_err; +use vortex_error::vortex_panic; +use vortex_session::VortexSession; + +use crate::ArrayEq; +use crate::ArrayHash; +use crate::ArrayRef; +use crate::Canonical; +use crate::DeserializeMetadata; +use crate::DynArray; +use crate::ExecutionCtx; +use crate::ExecutionResult; +use crate::IntoArray; +use crate::Precision; +use crate::ProstMetadata; +use crate::SerializeMetadata; +use crate::arrays::PrimitiveArray; +use crate::arrays::patched::PatchedArray; +use crate::arrays::patched::compute::rules::PARENT_RULES; +use crate::arrays::patched::vtable::kernels::PARENT_KERNELS; +use crate::arrays::primitive::PrimitiveArrayParts; +use crate::buffer::BufferHandle; +use crate::builders::ArrayBuilder; +use crate::builders::PrimitiveBuilder; +use crate::dtype::DType; +use crate::dtype::NativePType; +use crate::dtype::PType; +use crate::match_each_native_ptype; +use crate::serde::ArrayChildren; +use crate::stats::ArrayStats; +use crate::stats::StatsSetRef; +use crate::vtable; +use crate::vtable::Array; +use crate::vtable::ArrayId; +use crate::vtable::VTable; +use crate::vtable::ValidityChild; +use crate::vtable::ValidityVTableFromChild; + +vtable!(Patched); + +#[derive(Clone, Debug)] +pub struct Patched; + +impl ValidityChild for Patched { + fn validity_child(array: &PatchedArray) -> &ArrayRef { + &array.inner + } +} + +#[derive(Clone, prost::Message)] +pub struct PatchedMetadata { + /// Length of the `inner` child. + /// + /// This may not match the length of the wrapping PatchedArray, if for example + /// in a filter or slice it may be sliced to the nearest chunk boundary. + #[prost(uint64, tag = "1")] + pub(crate) inner_len: u64, + + /// Offset within the first chunk of `inner` where data begins. + /// + /// This may become nonzero after slicing. + #[prost(uint32, tag = "2")] + pub(crate) offset: u32, + + /// Number of patches. This is the length of the `indices` and `values` children. + #[prost(uint32, tag = "3")] + pub(crate) n_patches: u32, + + /// Number of lanes the patches get spread over. + /// + /// By default, this is either 16 or 32 depending on the width of the type, but may change + /// in the future, so we save it on write. + #[prost(uint32, tag = "4")] + pub(crate) n_lanes: u32, +} + +impl VTable for Patched { + type Array = PatchedArray; + type Metadata = ProstMetadata; + type OperationsVTable = Self; + type ValidityVTable = ValidityVTableFromChild; + + fn vtable(_array: &Self::Array) -> &Self { + &Patched + } + + fn id(&self) -> ArrayId { + ArrayId::new_ref("vortex.patched") + } + + fn len(array: &Self::Array) -> usize { + array.len + } + + fn dtype(array: &Self::Array) -> &DType { + array.inner.dtype() + } + + fn stats(array: &Self::Array) -> StatsSetRef<'_> { + array.stats_set.to_ref(array.as_ref()) + } + + fn array_hash(array: &Self::Array, state: &mut H, precision: Precision) { + array.inner.array_hash(state, precision); + array.n_chunks.hash(state); + array.n_lanes.hash(state); + array.lane_offsets.array_hash(state, precision); + array.indices.array_hash(state, precision); + array.values.array_hash(state, precision); + } + + fn array_eq(array: &Self::Array, other: &Self::Array, precision: Precision) -> bool { + array.n_chunks == other.n_chunks + && array.n_lanes == other.n_lanes + && array.inner.array_eq(&other.inner, precision) + && array.lane_offsets.array_eq(&other.lane_offsets, precision) + && array.indices.array_eq(&other.indices, precision) + && array.values.array_eq(&other.values, precision) + } + + fn nbuffers(_array: &Self::Array) -> usize { + 1 + } + + fn buffer(array: &Self::Array, idx: usize) -> BufferHandle { + match idx { + 0 => array.lane_offsets.clone(), + _ => vortex_panic!("invalid buffer index for PatchedArray: {idx}"), + } + } + + fn buffer_name(_array: &Self::Array, idx: usize) -> Option { + match idx { + 0 => Some("lane_offsets".to_string()), + _ => vortex_panic!("invalid buffer index for PatchedArray: {idx}"), + } + } + + fn nchildren(_array: &Self::Array) -> usize { + 3 + } + + fn child(array: &Self::Array, idx: usize) -> ArrayRef { + match idx { + 0 => array.inner.clone(), + 1 => array.indices.clone(), + 2 => array.values.clone(), + _ => vortex_panic!("invalid child index for PatchedArray: {idx}"), + } + } + + fn child_name(_array: &Self::Array, idx: usize) -> String { + match idx { + 0 => "inner".to_string(), + 1 => "patch_indices".to_string(), + 2 => "patch_values".to_string(), + _ => vortex_panic!("invalid child index for PatchedArray: {idx}"), + } + } + + #[allow(clippy::cast_possible_truncation)] + fn metadata(array: &Self::Array) -> VortexResult { + Ok(ProstMetadata(PatchedMetadata { + inner_len: array.inner.len() as u64, + offset: array.offset as u32, + n_patches: array.indices.len() as u32, + n_lanes: array.n_lanes as u32, + })) + } + + fn serialize(metadata: Self::Metadata) -> VortexResult>> { + Ok(Some(metadata.serialize())) + } + + fn deserialize( + bytes: &[u8], + _dtype: &DType, + _len: usize, + _buffers: &[BufferHandle], + _session: &VortexSession, + ) -> VortexResult { + let inner = as DeserializeMetadata>::deserialize(bytes)?; + Ok(ProstMetadata(inner)) + } + + fn append_to_builder( + array: &Self::Array, + builder: &mut dyn ArrayBuilder, + ctx: &mut ExecutionCtx, + ) -> VortexResult<()> { + let dtype = array.dtype(); + + if !dtype.is_primitive() { + // Default pathway: canonicalize and propagate. + let canonical = array + .clone() + .into_array() + .execute::(ctx)? + .into_array(); + builder.extend_from_array(&canonical); + return Ok(()); + } + + let ptype = dtype.as_ptype(); + + let len = array.len(); + + array.inner.append_to_builder(builder, ctx)?; + + let offset = array.offset; + let lane_offsets: Buffer = + Buffer::from_byte_buffer(array.lane_offsets.clone().unwrap_host()); + let indices = array.indices.clone().execute::(ctx)?; + let values = array.values.clone().execute::(ctx)?; + + match_each_native_ptype!(ptype, |V| { + let typed_builder = builder + .as_any_mut() + .downcast_mut::>() + .vortex_expect("correctly typed builder"); + + // Overwrite the last `len` elements of the builder. These would have been + // populated by the inner.append_to_builder() call above. + let output = typed_builder.values_mut(); + let trailer = output.len() - len; + + apply_patches_primitive::( + &mut output[trailer..], + offset, + len, + array.n_chunks, + array.n_lanes, + &lane_offsets, + indices.as_slice::(), + values.as_slice::(), + ); + }); + + Ok(()) + } + + fn build( + dtype: &DType, + len: usize, + metadata: &Self::Metadata, + buffers: &[BufferHandle], + children: &dyn ArrayChildren, + ) -> VortexResult { + let inner_len = usize::try_from(metadata.inner_len).map_err(|_| { + vortex_err!( + "PatchedMetadata inner_len overflows usize: {}", + metadata.inner_len + ) + })?; + let offset = metadata.offset as usize; + + // n_chunks should correspond to the chunk in the `inner`. + // After slicing when offset > 0, there may be additional chunks. + let n_chunks = (len + offset).div_ceil(1024); + let n_lanes = metadata.n_lanes as usize; + + let &[lane_offsets] = &buffers else { + vortex_bail!("invalid buffer count for PatchedArray"); + }; + + let inner = children.get(0, dtype, inner_len)?; + let indices = children.get(1, PType::U16.into(), metadata.n_patches as usize)?; + let values = children.get(2, dtype, metadata.n_patches as usize)?; + + Ok(PatchedArray { + inner, + n_chunks, + n_lanes, + offset, + len, + lane_offsets: lane_offsets.clone(), + indices, + values, + stats_set: ArrayStats::default(), + }) + } + + fn with_children(array: &mut Self::Array, mut children: Vec) -> VortexResult<()> { + vortex_ensure!( + children.len() == 3, + "PatchedArray must have exactly 3 children" + ); + + array.inner = children.remove(0); + array.indices = children.remove(0); + array.values = children.remove(0); + + Ok(()) + } + + fn execute(array: Arc>, ctx: &mut ExecutionCtx) -> VortexResult { + let inner = array + .inner + .clone() + .execute::(ctx)? + .into_primitive(); + + let PrimitiveArrayParts { + buffer, + ptype, + validity, + } = inner.into_parts(); + + let lane_offsets: Buffer = + Buffer::from_byte_buffer(array.lane_offsets.clone().unwrap_host()); + let indices = array.indices.clone().execute::(ctx)?; + + // TODO(aduffy): add support for non-primitive PatchedArray patches application (?) + let values = array.values.clone().execute::(ctx)?; + + let patched_values = match_each_native_ptype!(values.ptype(), |V| { + let offset = array.offset; + let len = array.len; + + let mut output = Buffer::::from_byte_buffer(buffer.unwrap_host()).into_mut(); + + apply_patches_primitive::( + &mut output, + offset, + len, + array.n_chunks, + array.n_lanes, + &lane_offsets, + indices.as_slice::(), + values.as_slice::(), + ); + + let output = output.freeze(); + + PrimitiveArray::from_byte_buffer(output.into_byte_buffer(), ptype, validity) + }); + + Ok(ExecutionResult::done(patched_values.into_array())) + } + + fn execute_parent( + array: &Array, + parent: &ArrayRef, + child_idx: usize, + ctx: &mut ExecutionCtx, + ) -> VortexResult> { + PARENT_KERNELS.execute(array, parent, child_idx, ctx) + } + + fn reduce_parent( + array: &Array, + parent: &ArrayRef, + child_idx: usize, + ) -> VortexResult> { + PARENT_RULES.evaluate(array, parent, child_idx) + } +} + +/// Apply patches on top of the existing value types. +#[allow(clippy::too_many_arguments)] +fn apply_patches_primitive( + output: &mut [V], + offset: usize, + len: usize, + n_chunks: usize, + n_lanes: usize, + lane_offsets: &[u32], + indices: &[u16], + values: &[V], +) { + for chunk in 0..n_chunks { + let start = lane_offsets[chunk * n_lanes] as usize; + let stop = lane_offsets[chunk * n_lanes + n_lanes] as usize; + + for idx in start..stop { + // the indices slice is measured as an offset into the 1024-value chunk. + let index = chunk * 1024 + indices[idx] as usize; + if index < offset || index >= offset + len { + continue; + } + + let value = values[idx]; + output[index - offset] = value; + } + } +} + +#[cfg(test)] +mod tests { + use vortex_buffer::buffer; + use vortex_buffer::buffer_mut; + use vortex_session::VortexSession; + + use crate::Canonical; + use crate::ExecutionCtx; + use crate::IntoArray; + use crate::arrays::PatchedArray; + use crate::arrays::PrimitiveArray; + use crate::assert_arrays_eq; + use crate::builders::builder_with_capacity; + use crate::patches::Patches; + use crate::validity::Validity; + + #[test] + fn test_execute() { + let values = buffer![0u16; 1024].into_array(); + let patches = Patches::new( + 1024, + 0, + buffer![1u32, 2, 3].into_array(), + buffer![1u16; 3].into_array(), + None, + ) + .unwrap(); + + let session = VortexSession::empty(); + let mut ctx = ExecutionCtx::new(session); + + let array = PatchedArray::from_array_and_patches(values, &patches, &mut ctx) + .unwrap() + .into_array(); + + let executed = array + .execute::(&mut ctx) + .unwrap() + .into_primitive() + .into_buffer::(); + + let mut expected = buffer_mut![0u16; 1024]; + expected[1] = 1; + expected[2] = 1; + expected[3] = 1; + + assert_eq!(executed, expected.freeze()); + } + + #[test] + fn test_execute_sliced() { + let values = buffer![0u16; 1024].into_array(); + let patches = Patches::new( + 1024, + 0, + buffer![1u32, 2, 3].into_array(), + buffer![1u16; 3].into_array(), + None, + ) + .unwrap(); + + let session = VortexSession::empty(); + let mut ctx = ExecutionCtx::new(session); + + let array = PatchedArray::from_array_and_patches(values, &patches, &mut ctx) + .unwrap() + .slice(3..1024) + .unwrap(); + + let executed = array + .execute::(&mut ctx) + .unwrap() + .into_primitive() + .into_buffer::(); + + let mut expected = buffer_mut![0u16; 1021]; + expected[0] = 1; + + assert_eq!(executed, expected.freeze()); + } + + #[test] + fn test_append_to_builder_non_nullable() { + let values = PrimitiveArray::new(buffer![0u16; 1024], Validity::NonNullable).into_array(); + let patches = Patches::new( + 1024, + 0, + buffer![1u32, 2, 3].into_array(), + buffer![10u16, 20, 30].into_array(), + None, + ) + .unwrap(); + + let session = VortexSession::empty(); + let mut ctx = ExecutionCtx::new(session); + + let array = PatchedArray::from_array_and_patches(values, &patches, &mut ctx) + .unwrap() + .into_array(); + + let mut builder = builder_with_capacity(array.dtype(), array.len()); + array.append_to_builder(builder.as_mut(), &mut ctx).unwrap(); + + let result = builder.finish(); + + let mut expected = buffer_mut![0u16; 1024]; + expected[1] = 10; + expected[2] = 20; + expected[3] = 30; + let expected = expected.into_array(); + + assert_arrays_eq!(expected, result); + } + + #[test] + fn test_append_to_builder_sliced() { + let values = PrimitiveArray::new(buffer![0u16; 1024], Validity::NonNullable).into_array(); + let patches = Patches::new( + 1024, + 0, + buffer![1u32, 2, 3].into_array(), + buffer![10u16, 20, 30].into_array(), + None, + ) + .unwrap(); + + let session = VortexSession::empty(); + let mut ctx = ExecutionCtx::new(session); + + let array = PatchedArray::from_array_and_patches(values, &patches, &mut ctx) + .unwrap() + .slice(3..1024) + .unwrap(); + + let mut builder = builder_with_capacity(array.dtype(), array.len()); + array.append_to_builder(builder.as_mut(), &mut ctx).unwrap(); + + let result = builder.finish(); + + let mut expected = buffer_mut![0u16; 1021]; + expected[0] = 30; + let expected = expected.into_array(); + + assert_arrays_eq!(expected, result); + } + + #[test] + fn test_append_to_builder_with_validity() { + // Create inner array with nulls at indices 0 and 5. + let validity = Validity::from_iter((0..10).map(|i| i != 0 && i != 5)); + let values = PrimitiveArray::new(buffer![0u16; 10], validity).into_array(); + + // Apply patches at indices 1, 2, 3. + let patches = Patches::new( + 10, + 0, + buffer![1u32, 2, 3].into_array(), + buffer![10u16, 20, 30].into_array(), + None, + ) + .unwrap(); + + let session = VortexSession::empty(); + let mut ctx = ExecutionCtx::new(session); + + let array = PatchedArray::from_array_and_patches(values, &patches, &mut ctx) + .unwrap() + .into_array(); + + let mut builder = builder_with_capacity(array.dtype(), array.len()); + array.append_to_builder(builder.as_mut(), &mut ctx).unwrap(); + + let result = builder.finish(); + + // Expected: null at 0, patched 10/20/30 at 1/2/3, zero at 4, null at 5, zeros at 6-9. + let expected = PrimitiveArray::from_option_iter([ + None, + Some(10u16), + Some(20), + Some(30), + Some(0), + None, + Some(0), + Some(0), + Some(0), + Some(0), + ]) + .into_array(); + + assert_arrays_eq!(expected, result); + } +} diff --git a/vortex-array/src/arrays/patched/vtable/operations.rs b/vortex-array/src/arrays/patched/vtable/operations.rs new file mode 100644 index 00000000000..5fe3ccecb5e --- /dev/null +++ b/vortex-array/src/arrays/patched/vtable/operations.rs @@ -0,0 +1,166 @@ +// SPDX-License-Identifier: Apache-2.0 +// SPDX-FileCopyrightText: Copyright the Vortex contributors + +use vortex_error::VortexResult; + +use crate::DynArray; +use crate::ExecutionCtx; +use crate::arrays::PrimitiveArray; +use crate::arrays::patched::Patched; +use crate::arrays::patched::PatchedArray; +use crate::optimizer::ArrayOptimizer; +use crate::scalar::Scalar; +use crate::vtable::OperationsVTable; + +impl OperationsVTable for Patched { + fn scalar_at( + array: &PatchedArray, + index: usize, + ctx: &mut ExecutionCtx, + ) -> VortexResult { + let chunk = (index + array.offset) / 1024; + + #[expect( + clippy::cast_possible_truncation, + reason = "N % 1024 always fits in u16" + )] + let chunk_index = ((index + array.offset) % 1024) as u16; + + let lane = (index + array.offset) % array.n_lanes; + + let range = array.lane_range(chunk, lane); + + // Get the range of indices corresponding to the lane, potentially decoding them to avoid + // the overhead of repeated scalar_at calls. + let patch_indices = array + .indices + .slice(range.clone())? + .optimize()? + .execute::(ctx)?; + + // NOTE: we do linear scan as lane has <= 32 patches, binary search would likely + // be slower. + for (&patch_index, idx) in std::iter::zip(patch_indices.as_slice::(), range) { + if patch_index == chunk_index { + return array.values.scalar_at(idx)?.cast(array.dtype()); + } + } + + // Otherwise, access the underlying value. + array.inner.scalar_at(index) + } +} + +#[cfg(test)] +mod tests { + use vortex_buffer::buffer; + use vortex_session::VortexSession; + + use crate::DynArray; + use crate::ExecutionCtx; + use crate::IntoArray; + use crate::arrays::Patched; + use crate::arrays::PatchedArray; + use crate::dtype::Nullability; + use crate::optimizer::ArrayOptimizer; + use crate::patches::Patches; + use crate::scalar::Scalar; + + #[test] + fn test_simple() { + let values = buffer![0u16; 1024].into_array(); + let patches = Patches::new( + 1024, + 0, + buffer![1u32, 2, 3].into_array(), + buffer![1u16; 3].into_array(), + None, + ) + .unwrap(); + + let session = VortexSession::empty(); + let mut ctx = ExecutionCtx::new(session); + + let array = PatchedArray::from_array_and_patches(values, &patches, &mut ctx) + .unwrap() + .into_array(); + + assert_eq!( + array.scalar_at(0).unwrap(), + Scalar::primitive(0u16, Nullability::NonNullable) + ); + assert_eq!( + array.scalar_at(1).unwrap(), + Scalar::primitive(1u16, Nullability::NonNullable) + ); + assert_eq!( + array.scalar_at(2).unwrap(), + Scalar::primitive(1u16, Nullability::NonNullable) + ); + assert_eq!( + array.scalar_at(3).unwrap(), + Scalar::primitive(1u16, Nullability::NonNullable) + ); + } + + #[test] + fn test_multi_chunk() { + let values = buffer![0u16; 4096].into_array(); + let patches = Patches::new( + 4096, + 0, + buffer![1u32, 2, 3].into_array(), + buffer![1u16; 3].into_array(), + None, + ) + .unwrap(); + + let session = VortexSession::empty(); + let mut ctx = ExecutionCtx::new(session); + + let array = PatchedArray::from_array_and_patches(values, &patches, &mut ctx) + .unwrap() + .into_array(); + + for index in 0..array.len() { + let value = array.scalar_at(index).unwrap(); + + if [1, 2, 3].contains(&index) { + assert_eq!(value, 1u16.into()); + } else { + assert_eq!(value, 0u16.into()); + } + } + } + + #[test] + fn test_multi_chunk_sliced() { + let values = buffer![0u16; 4096].into_array(); + let patches = Patches::new( + 4096, + 0, + buffer![1u32, 2, 3].into_array(), + buffer![1u16; 3].into_array(), + None, + ) + .unwrap(); + + let session = VortexSession::empty(); + let mut ctx = ExecutionCtx::new(session); + + let array = PatchedArray::from_array_and_patches(values, &patches, &mut ctx) + .unwrap() + .into_array() + .slice(3..4096) + .unwrap() + .optimize() + .unwrap(); + + assert!(array.is::()); + + assert_eq!(array.scalar_at(0).unwrap(), 1u16.into()); + for index in 1..array.len() { + assert_eq!(array.scalar_at(index).unwrap(), 0u16.into()); + } + } +} diff --git a/vortex-array/src/arrays/patched/vtable/slice.rs b/vortex-array/src/arrays/patched/vtable/slice.rs new file mode 100644 index 00000000000..c469bf9ece0 --- /dev/null +++ b/vortex-array/src/arrays/patched/vtable/slice.rs @@ -0,0 +1,180 @@ +// SPDX-License-Identifier: Apache-2.0 +// SPDX-FileCopyrightText: Copyright the Vortex contributors + +use std::ops::Range; + +use vortex_error::VortexResult; + +use crate::ArrayRef; +use crate::DynArray; +use crate::IntoArray; +use crate::arrays::Patched; +use crate::arrays::PatchedArray; +use crate::arrays::slice::SliceReduce; +use crate::stats::ArrayStats; + +/// Is this something that uses a SliceKernel or a SliceReduce +impl SliceReduce for Patched { + fn slice(array: &Self::Array, range: Range) -> VortexResult> { + // We **always** slice at 1024-element chunk boundaries. We keep the offset + len + // around so that when we execute we know how much to chop off. + let new_offset = (range.start + array.offset) % 1024; + let new_len = range.end - range.start; + + let chunk_start = (range.start + array.offset) / 1024; + let chunk_stop = (range.end + array.offset).div_ceil(1024); + + let inner = array.inner.slice(range.start..range.end)?; + + // Slice to only maintain offsets to the sliced chunks + let sliced_lane_offsets = array + .lane_offsets + .slice_typed::((chunk_start * array.n_lanes)..(chunk_stop * array.n_lanes) + 1); + + Ok(Some( + PatchedArray { + inner, + n_chunks: chunk_stop - chunk_start, + n_lanes: array.n_lanes, + + offset: new_offset, + len: new_len, + lane_offsets: sliced_lane_offsets, + indices: array.indices.clone(), + values: array.values.clone(), + stats_set: ArrayStats::default(), + } + .into_array(), + )) + } +} + +#[cfg(test)] +mod tests { + use std::ops::Range; + + use rstest::rstest; + use vortex_buffer::Buffer; + use vortex_buffer::BufferMut; + use vortex_buffer::buffer; + use vortex_error::VortexResult; + + use crate::Canonical; + use crate::DynArray; + use crate::ExecutionCtx; + use crate::IntoArray; + use crate::LEGACY_SESSION; + use crate::arrays::PatchedArray; + use crate::arrays::PrimitiveArray; + use crate::assert_arrays_eq; + use crate::dtype::NativePType; + use crate::patches::Patches; + + #[test] + fn test_reduce() -> VortexResult<()> { + let values = buffer![0u16; 512].into_array(); + let patch_indices = buffer![1u32, 8, 30].into_array(); + let patch_values = buffer![u16::MAX; 3].into_array(); + let patches = Patches::new(512, 0, patch_indices, patch_values, None)?; + + let mut ctx = ExecutionCtx::new(LEGACY_SESSION.clone()); + + let patched_array = PatchedArray::from_array_and_patches(values, &patches, &mut ctx)?; + + let sliced = patched_array.slice(1..10)?; + + insta::assert_snapshot!( + sliced.display_tree_encodings_only(), + @r#" + root: vortex.patched(u16, len=9) + inner: vortex.primitive(u16, len=9) + patch_indices: vortex.primitive(u16, len=3) + patch_values: vortex.primitive(u16, len=3) + "#); + + let executed = sliced.execute::(&mut ctx)?.into_primitive(); + + assert_eq!( + &[u16::MAX, 0, 0, 0, 0, 0, 0, u16::MAX, 0], + executed.as_slice::() + ); + + Ok(()) + } + + #[rstest] + #[case::trivial(buffer![1u64; 2], buffer![1u32], buffer![u64::MAX], 1..2)] + #[case::one_chunk(buffer![0u64; 1024], buffer![1u32, 8, 30], buffer![u64::MAX; 3], 1..10)] + #[case::multichunk(buffer![1u64; 10_000], buffer![0u32, 1, 2, 3, 4, 16, 17, 18, 19, 1024, 2048, 2049], buffer![u64::MAX; 12], 1024..5000)] + fn test_cases( + #[case] inner: Buffer, + #[case] patch_indices: Buffer, + #[case] patch_values: Buffer, + #[case] range: Range, + ) { + // Create patched array. + let patches = Patches::new( + inner.len(), + 0, + patch_indices.into_array(), + patch_values.into_array(), + None, + ) + .unwrap(); + + let mut ctx = ExecutionCtx::new(LEGACY_SESSION.clone()); + + let patched_array = + PatchedArray::from_array_and_patches(inner.into_array(), &patches, &mut ctx).unwrap(); + + // Verify that applying slice first yields same result as applying slice at end. + let slice_first = patched_array + .slice(range.clone()) + .unwrap() + .execute::(&mut ctx) + .unwrap() + .into_array(); + + let slice_last = patched_array + .into_array() + .execute::(&mut ctx) + .unwrap() + .into_primitive() + .slice(range) + .unwrap(); + + assert_arrays_eq!(slice_first, slice_last); + } + + #[test] + fn test_stacked_slices() { + let values = PrimitiveArray::from_iter(0u64..10_000).into_array(); + + let patched_indices = buffer![1u32, 2, 1024, 2048, 3072, 3088].into_array(); + let patched_values = buffer![0u64, 1, 2, 3, 4, 5].into_array(); + + let patches = Patches::new(10_000, 0, patched_indices, patched_values, None).unwrap(); + let mut ctx = ExecutionCtx::new(LEGACY_SESSION.clone()); + + let patched_array = + PatchedArray::from_array_and_patches(values, &patches, &mut ctx).unwrap(); + + let sliced = patched_array + .slice(1024..5000) + .unwrap() + .slice(1..2065) + .unwrap() + .execute::(&mut ctx) + .unwrap() + .into_array(); + + let mut expected = BufferMut::from_iter(1025u64..=3088); + expected[1023] = 3; + expected[2047] = 4; + expected[2063] = 5; + + let expected = expected.into_array(); + + assert_arrays_eq!(expected, sliced); + } +} diff --git a/vortex-array/src/builders/primitive.rs b/vortex-array/src/builders/primitive.rs index 4070a22e70d..f486cadc83f 100644 --- a/vortex-array/src/builders/primitive.rs +++ b/vortex-array/src/builders/primitive.rs @@ -62,6 +62,11 @@ impl PrimitiveBuilder { self.values.as_ref() } + /// Returns the raw primitive values in this builder as a mutable slice. + pub fn values_mut(&mut self) -> &mut [T] { + self.values.as_mut() + } + /// Create a new handle to the next `len` uninitialized values in the builder. /// /// All reads/writes through the handle to the values buffer or the validity buffer will operate diff --git a/vortex-array/src/session/mod.rs b/vortex-array/src/session/mod.rs index 3c7c920e692..6fc5a9ea2e0 100644 --- a/vortex-array/src/session/mod.rs +++ b/vortex-array/src/session/mod.rs @@ -17,6 +17,7 @@ use crate::arrays::List; use crate::arrays::ListView; use crate::arrays::Masked; use crate::arrays::Null; +use crate::arrays::Patched; use crate::arrays::Primitive; use crate::arrays::Struct; use crate::arrays::VarBin; @@ -70,8 +71,9 @@ impl Default for ArraySession { // Register the utility encodings. this.register(Chunked); this.register(Constant); - this.register(Masked); this.register(List); + this.register(Masked); + this.register(Patched); this.register(VarBin); this diff --git a/vortex-buffer/public-api.lock b/vortex-buffer/public-api.lock index 29b941c77d4..f3b52183e39 100644 --- a/vortex-buffer/public-api.lock +++ b/vortex-buffer/public-api.lock @@ -550,6 +550,10 @@ pub fn vortex_buffer::Buffer::from_arrow_buffer(arrow: arrow_buffer::buffer: pub fn vortex_buffer::Buffer::into_arrow_buffer(self) -> arrow_buffer::buffer::immutable::Buffer +impl vortex_buffer::Buffer + +pub fn vortex_buffer::Buffer::reinterpret(&self) -> &[V] + impl vortex_buffer::Buffer pub fn vortex_buffer::Buffer::from_arrow_scalar_buffer(arrow: arrow_buffer::buffer::scalar::ScalarBuffer) -> Self diff --git a/vortex-buffer/src/buffer.rs b/vortex-buffer/src/buffer.rs index 053cb5baee5..11c360f21e8 100644 --- a/vortex-buffer/src/buffer.rs +++ b/vortex-buffer/src/buffer.rs @@ -523,6 +523,34 @@ impl Buffer { } } +impl ByteBuffer { + /// Reinterpret the byte buffer as a slice of values of type `V`. + /// + /// # Panics + /// + /// This method will only work if the buffer has the proper size and alignment to be viewed + /// as a buffer of `V` values. + pub fn reinterpret(&self) -> &[V] { + assert!( + self.is_aligned(Alignment::of::()), + "ByteBuffer not properly aligned to {}", + type_name::() + ); + + assert_eq!( + self.length % size_of::(), + 0, + "ByteBuffer length not a multiple of the value length" + ); + + let v_len = self.length / size_of::(); + let v_ptr = self.bytes.as_ptr().cast::(); + + // SAFETY: we checked that alignment and length are suitable to treat this as a &[V]. + unsafe { std::slice::from_raw_parts(v_ptr, v_len) } + } +} + /// An iterator over Buffer elements. /// /// This is an analog to the `std::slice::Iter` type. diff --git a/vortex-file/src/strategy.rs b/vortex-file/src/strategy.rs index 4d6031a220c..dc29e7cb4f8 100644 --- a/vortex-file/src/strategy.rs +++ b/vortex-file/src/strategy.rs @@ -21,6 +21,7 @@ use vortex_array::arrays::List; use vortex_array::arrays::ListView; use vortex_array::arrays::Masked; use vortex_array::arrays::Null; +use vortex_array::arrays::Patched; use vortex_array::arrays::Primitive; use vortex_array::arrays::Struct; use vortex_array::arrays::VarBin; @@ -104,6 +105,7 @@ pub static ALLOWED_ENCODINGS: LazyLock = LazyLock::new(|| { session.register(Delta); session.register(FoR); session.register(FSST); + session.register(Patched); session.register(Pco); session.register(RLE); session.register(RunEnd); diff --git a/vortex-test/compat-gen/src/fixtures/arrays/synthetic/encodings/mod.rs b/vortex-test/compat-gen/src/fixtures/arrays/synthetic/encodings/mod.rs index 0c68d5e9abd..b6c18064e0f 100644 --- a/vortex-test/compat-gen/src/fixtures/arrays/synthetic/encodings/mod.rs +++ b/vortex-test/compat-gen/src/fixtures/arrays/synthetic/encodings/mod.rs @@ -16,6 +16,7 @@ mod delta; mod dict; mod for_; mod fsst; +mod patched; mod pco; mod rle; mod runend; @@ -37,11 +38,12 @@ pub fn fixtures() -> Vec> { Box::new(bytebool::ByteBoolFixture), Box::new(datetimeparts::DateTimePartsFixture), Box::new(decimal_byte_parts::DecimalBytePartsFixture), - // Reenable this once delta is stable + // Re-enable this once delta is stable // Box::new(delta::DeltaFixture), Box::new(dict::DictFixture), Box::new(fsst::FsstFixture), Box::new(for_::FoRFixture), + Box::new(patched::PatchedFixture), Box::new(pco::PcoFixture), Box::new(rle::RleFixture), Box::new(runend::RunEndFixture), diff --git a/vortex-test/compat-gen/src/fixtures/arrays/synthetic/encodings/patched.rs b/vortex-test/compat-gen/src/fixtures/arrays/synthetic/encodings/patched.rs new file mode 100644 index 00000000000..b1363ad7e02 --- /dev/null +++ b/vortex-test/compat-gen/src/fixtures/arrays/synthetic/encodings/patched.rs @@ -0,0 +1,47 @@ +// SPDX-License-Identifier: Apache-2.0 +// SPDX-FileCopyrightText: Copyright the Vortex contributors + +use vortex_array::ArrayRef; +use vortex_array::ExecutionCtx; +use vortex_array::IntoArray; +use vortex_array::arrays::Patched; +use vortex_array::arrays::PatchedArray; +use vortex_array::arrays::PrimitiveArray; +use vortex_array::patches::Patches; +use vortex_array::vtable::ArrayId; +use vortex_array::vtable::ArrayVTable; +use vortex_error::VortexResult; +use vortex_session::VortexSession; + +use crate::fixtures::FlatLayoutFixture; + +pub struct PatchedFixture; + +impl FlatLayoutFixture for PatchedFixture { + fn name(&self) -> &str { + "patched.vortex" + } + + fn description(&self) -> &str { + "A set of patches to apply on top of an inner array" + } + + fn build(&self) -> VortexResult { + let mut ctx = ExecutionCtx::new(VortexSession::empty()); + + let array = PrimitiveArray::from_option_iter((0u64..2048).map(Some)).into_array(); + let patches = Patches::new( + 2048, + 0, + PrimitiveArray::from_iter([0u32, 1024, 1025, 1026]).into_array(), + PrimitiveArray::from_option_iter([Some(1u64), Some(2), Some(3), Some(4)]).into_array(), + None, + )?; + + Ok(PatchedArray::from_array_and_patches(array, &patches, &mut ctx)?.into_array()) + } + + fn expected_encodings(&self) -> Vec { + vec![Patched.id()] + } +}