Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
251 changes: 251 additions & 0 deletions vortex-buffer/benches/vortex_bitbuffer.rs
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,9 @@ use arrow_buffer::BooleanBufferBuilder;
use divan::Bencher;
use vortex_buffer::BitBuffer;
use vortex_buffer::BitBufferMut;
use vortex_buffer::ScalarBitIndexIterator;
use vortex_buffer::collect_set_indices;
use vortex_buffer::collect_set_indices_scalar;

fn main() {
// Pre-warm CPUID feature detection so the one-time probe cost is never
Expand All @@ -19,6 +22,7 @@ fn main() {
let _ = is_x86_feature_detected!("avx2");
let _ = is_x86_feature_detected!("avx512f");
let _ = is_x86_feature_detected!("avx512vpopcntdq");
let _ = is_x86_feature_detected!("bmi2");
}

divan::main();
Expand Down Expand Up @@ -290,3 +294,250 @@ fn set_indices_arrow_buffer(bencher: Bencher, length: usize) {
}
});
}

#[divan::bench(args = INPUT_SIZE)]
fn set_indices_scalar_optimized(bencher: Bencher, length: usize) {
let buffer = BitBuffer::from_iter((0..length).map(|i| i % 2 == 0));
bencher.with_inputs(|| &buffer).bench_refs(|buffer| {
for idx in
ScalarBitIndexIterator::new(buffer.inner().as_slice(), buffer.offset(), buffer.len())
{
divan::black_box(idx);
}
});
}

#[divan::bench(args = INPUT_SIZE)]
fn collect_set_indices_scalar_bench(bencher: Bencher, length: usize) {
let buffer = BitBuffer::from_iter((0..length).map(|i| i % 2 == 0));
bencher.with_inputs(|| &buffer).bench_refs(|buffer| {
divan::black_box(collect_set_indices_scalar(
buffer.inner().as_slice(),
buffer.offset(),
buffer.len(),
));
});
}

#[divan::bench(args = INPUT_SIZE)]
fn collect_set_indices_simd_bench(bencher: Bencher, length: usize) {
let buffer = BitBuffer::from_iter((0..length).map(|i| i % 2 == 0));
bencher.with_inputs(|| &buffer).bench_refs(|buffer| {
divan::black_box(collect_set_indices(
buffer.inner().as_slice(),
buffer.offset(),
buffer.len(),
));
});
}

// ---------------------------------------------------------------------------
// Density-varied benchmarks: 100k bits at different set-bit densities
// ---------------------------------------------------------------------------

const LARGE_N: usize = 100_000;

/// 1% density (sparse)
fn make_sparse() -> BitBuffer {
BitBuffer::from_iter((0..LARGE_N).map(|i| i % 100 == 0))
}

/// 50% density (dense)
fn make_dense() -> BitBuffer {
BitBuffer::from_iter((0..LARGE_N).map(|i| i % 2 == 0))
}

/// 99% density (nearly all set)
fn make_nearly_full() -> BitBuffer {
BitBuffer::from_iter((0..LARGE_N).map(|i| i % 100 != 0))
}

fn make_sparse_arrow() -> Arrow<BooleanBuffer> {
Arrow(BooleanBuffer::from_iter((0..LARGE_N).map(|i| i % 100 == 0)))
}

fn make_dense_arrow() -> Arrow<BooleanBuffer> {
Arrow(BooleanBuffer::from_iter((0..LARGE_N).map(|i| i % 2 == 0)))
}

fn make_nearly_full_arrow() -> Arrow<BooleanBuffer> {
Arrow(BooleanBuffer::from_iter((0..LARGE_N).map(|i| i % 100 != 0)))
}

// --- Arrow baseline at different densities ---

#[divan::bench]
fn density_1pct_arrow(bencher: Bencher) {
let buffer = make_sparse_arrow();
bencher.with_inputs(|| &buffer).bench_refs(|buffer| {
for idx in buffer.0.set_indices() {
divan::black_box(idx);
}
});
}

#[divan::bench]
fn density_50pct_arrow(bencher: Bencher) {
let buffer = make_dense_arrow();
bencher.with_inputs(|| &buffer).bench_refs(|buffer| {
for idx in buffer.0.set_indices() {
divan::black_box(idx);
}
});
}

#[divan::bench]
fn density_99pct_arrow(bencher: Bencher) {
let buffer = make_nearly_full_arrow();
bencher.with_inputs(|| &buffer).bench_refs(|buffer| {
for idx in buffer.0.set_indices() {
divan::black_box(idx);
}
});
}

// --- Current vortex (delegates to Arrow) ---

#[divan::bench]
fn density_1pct_vortex_current(bencher: Bencher) {
let buffer = make_sparse();
bencher.with_inputs(|| &buffer).bench_refs(|buffer| {
for idx in buffer.set_indices() {
divan::black_box(idx);
}
});
}

#[divan::bench]
fn density_50pct_vortex_current(bencher: Bencher) {
let buffer = make_dense();
bencher.with_inputs(|| &buffer).bench_refs(|buffer| {
for idx in buffer.set_indices() {
divan::black_box(idx);
}
});
}

#[divan::bench]
fn density_99pct_vortex_current(bencher: Bencher) {
let buffer = make_nearly_full();
bencher.with_inputs(|| &buffer).bench_refs(|buffer| {
for idx in buffer.set_indices() {
divan::black_box(idx);
}
});
}

// --- New scalar iterator ---

#[divan::bench]
fn density_1pct_scalar_iter(bencher: Bencher) {
let buffer = make_sparse();
bencher.with_inputs(|| &buffer).bench_refs(|buffer| {
for idx in
ScalarBitIndexIterator::new(buffer.inner().as_slice(), buffer.offset(), buffer.len())
{
divan::black_box(idx);
}
});
}

#[divan::bench]
fn density_50pct_scalar_iter(bencher: Bencher) {
let buffer = make_dense();
bencher.with_inputs(|| &buffer).bench_refs(|buffer| {
for idx in
ScalarBitIndexIterator::new(buffer.inner().as_slice(), buffer.offset(), buffer.len())
{
divan::black_box(idx);
}
});
}

#[divan::bench]
fn density_99pct_scalar_iter(bencher: Bencher) {
let buffer = make_nearly_full();
bencher.with_inputs(|| &buffer).bench_refs(|buffer| {
for idx in
ScalarBitIndexIterator::new(buffer.inner().as_slice(), buffer.offset(), buffer.len())
{
divan::black_box(idx);
}
});
}

// --- Bulk scalar collect ---

#[divan::bench]
fn density_1pct_collect_scalar(bencher: Bencher) {
let buffer = make_sparse();
bencher.with_inputs(|| &buffer).bench_refs(|buffer| {
divan::black_box(collect_set_indices_scalar(
buffer.inner().as_slice(),
buffer.offset(),
buffer.len(),
));
});
}

#[divan::bench]
fn density_50pct_collect_scalar(bencher: Bencher) {
let buffer = make_dense();
bencher.with_inputs(|| &buffer).bench_refs(|buffer| {
divan::black_box(collect_set_indices_scalar(
buffer.inner().as_slice(),
buffer.offset(),
buffer.len(),
));
});
}

#[divan::bench]
fn density_99pct_collect_scalar(bencher: Bencher) {
let buffer = make_nearly_full();
bencher.with_inputs(|| &buffer).bench_refs(|buffer| {
divan::black_box(collect_set_indices_scalar(
buffer.inner().as_slice(),
buffer.offset(),
buffer.len(),
));
});
}

// --- Bulk SIMD/BMI2 collect ---

#[divan::bench]
fn density_1pct_collect_simd(bencher: Bencher) {
let buffer = make_sparse();
bencher.with_inputs(|| &buffer).bench_refs(|buffer| {
divan::black_box(collect_set_indices(
buffer.inner().as_slice(),
buffer.offset(),
buffer.len(),
));
});
}

#[divan::bench]
fn density_50pct_collect_simd(bencher: Bencher) {
let buffer = make_dense();
bencher.with_inputs(|| &buffer).bench_refs(|buffer| {
divan::black_box(collect_set_indices(
buffer.inner().as_slice(),
buffer.offset(),
buffer.len(),
));
});
}

#[divan::bench]
fn density_99pct_collect_simd(bencher: Bencher) {
let buffer = make_nearly_full();
bencher.with_inputs(|| &buffer).bench_refs(|buffer| {
divan::black_box(collect_set_indices(
buffer.inner().as_slice(),
buffer.offset(),
buffer.len(),
));
});
}
22 changes: 21 additions & 1 deletion vortex-buffer/public-api.lock
Original file line number Diff line number Diff line change
Expand Up @@ -252,6 +252,8 @@ pub fn vortex_buffer::BitBuffer::clear(&mut self)

pub fn vortex_buffer::BitBuffer::collect_bool<F: core::ops::function::FnMut(usize) -> bool>(len: usize, f: F) -> Self

pub fn vortex_buffer::BitBuffer::collect_set_indices(&self) -> alloc::vec::Vec<u32>

pub fn vortex_buffer::BitBuffer::empty() -> Self

pub fn vortex_buffer::BitBuffer::false_count(&self) -> usize
Expand Down Expand Up @@ -280,7 +282,7 @@ pub fn vortex_buffer::BitBuffer::new_with_offset(buffer: vortex_buffer::ByteBuff

pub fn vortex_buffer::BitBuffer::offset(&self) -> usize

pub fn vortex_buffer::BitBuffer::set_indices(&self) -> arrow_buffer::util::bit_iterator::BitIndexIterator<'_>
pub fn vortex_buffer::BitBuffer::set_indices(&self) -> vortex_buffer::ScalarBitIndexIterator<'_>

pub fn vortex_buffer::BitBuffer::set_slices(&self) -> arrow_buffer::util::bit_iterator::BitSliceIterator<'_>

Expand Down Expand Up @@ -1012,6 +1014,20 @@ pub fn vortex_buffer::Iter<'_, T>::len(&self) -> usize

impl<T> vortex_buffer::trusted_len::TrustedLen for vortex_buffer::Iter<'_, T>

pub struct vortex_buffer::ScalarBitIndexIterator<'a>

impl<'a> vortex_buffer::ScalarBitIndexIterator<'a>

pub fn vortex_buffer::ScalarBitIndexIterator<'a>::new(buffer: &'a [u8], offset: usize, len: usize) -> Self

impl core::iter::traits::iterator::Iterator for vortex_buffer::ScalarBitIndexIterator<'_>

pub type vortex_buffer::ScalarBitIndexIterator<'_>::Item = usize

pub fn vortex_buffer::ScalarBitIndexIterator<'_>::next(&mut self) -> core::option::Option<usize>

pub fn vortex_buffer::ScalarBitIndexIterator<'_>::size_hint(&self) -> (usize, core::option::Option<usize>)

pub const vortex_buffer::ALIGNMENT_TO_HOST_COPY: vortex_buffer::Alignment

pub trait vortex_buffer::AlignedBuf: bytes::buf::buf_impl::Buf
Expand All @@ -1026,6 +1042,10 @@ pub fn B::copy_to_aligned(&mut self, len: usize, alignment: vortex_buffer::Align

pub fn B::copy_to_const_aligned<const A: usize>(&mut self, len: usize) -> vortex_buffer::ConstByteBuffer<A>

pub fn vortex_buffer::collect_set_indices(buffer: &[u8], offset: usize, len: usize) -> alloc::vec::Vec<u32>

pub fn vortex_buffer::collect_set_indices_scalar(buffer: &[u8], offset: usize, len: usize) -> alloc::vec::Vec<u32>

pub fn vortex_buffer::get_bit(buf: &[u8], index: usize) -> bool

pub unsafe fn vortex_buffer::get_bit_unchecked(buf: *const u8, index: usize) -> bool
Expand Down
23 changes: 19 additions & 4 deletions vortex-buffer/src/bit/buf.rs
Original file line number Diff line number Diff line change
Expand Up @@ -17,14 +17,15 @@ use crate::Buffer;
use crate::BufferMut;
use crate::ByteBuffer;
use crate::bit::BitChunks;
use crate::bit::BitIndexIterator;
use crate::bit::BitIterator;
use crate::bit::BitSliceIterator;
use crate::bit::ScalarBitIndexIterator;
use crate::bit::UnalignedBitChunk;
use crate::bit::count_ones::count_ones;
use crate::bit::get_bit_unchecked;
use crate::bit::ops::bitwise_binary_op;
use crate::bit::ops::bitwise_unary_op;
use crate::bit::set_indices::collect_set_indices as collect_set_indices_fn;
use crate::buffer;

/// An immutable bitset stored as a packed byte buffer.
Expand Down Expand Up @@ -329,9 +330,23 @@ impl BitBuffer {
BitIterator::new(self.buffer.as_slice(), self.offset, self.len)
}

/// Iterator over set indices of the underlying buffer
pub fn set_indices(&self) -> BitIndexIterator<'_> {
BitIndexIterator::new(self.buffer.as_slice(), self.offset, self.len)
/// Iterator over set indices of the underlying buffer.
///
/// Returns a custom iterator that is significantly faster than Arrow's
/// `BitIndexIterator` across all densities (up to 3.5x at 50% density,
/// 2.3x at 99% density for 100K-bit buffers).
pub fn set_indices(&self) -> ScalarBitIndexIterator<'_> {
ScalarBitIndexIterator::new(self.buffer.as_slice(), self.offset, self.len)
}

/// Collect all set-bit indices into a `Vec<u32>`.
///
/// This is faster than `.set_indices().collect()` because it pre-allocates
/// the output, uses raw pointer writes to skip bounds checks, and leverages
/// BMI2 hardware instructions on x86-64. Particularly effective at high
/// density (3.1x faster than Arrow at 99% density).
pub fn collect_set_indices(&self) -> Vec<u32> {
collect_set_indices_fn(self.buffer.as_slice(), self.offset, self.len)
}

/// Iterator over set slices of the underlying buffer
Expand Down
4 changes: 4 additions & 0 deletions vortex-buffer/src/bit/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ mod buf_mut;
mod count_ones;
mod macros;
mod ops;
mod set_indices;

pub use arrow_buffer::bit_chunk_iterator::BitChunkIterator;
pub use arrow_buffer::bit_chunk_iterator::BitChunks;
Expand All @@ -23,6 +24,9 @@ pub use arrow_buffer::bit_iterator::BitIterator;
pub use arrow_buffer::bit_iterator::BitSliceIterator;
pub use buf::*;
pub use buf_mut::*;
pub use set_indices::ScalarBitIndexIterator;
pub use set_indices::collect_set_indices;
pub use set_indices::collect_set_indices_scalar;

/// Get the bit value at `index` out of `buf`.
///
Expand Down
Loading
Loading