diff --git a/crates/paimon/src/arrow/format/mosaic.rs b/crates/paimon/src/arrow/format/mosaic.rs
index 963226b8..23f97803 100644
--- a/crates/paimon/src/arrow/format/mosaic.rs
+++ b/crates/paimon/src/arrow/format/mosaic.rs
@@ -22,7 +22,8 @@ use crate::io::FileRead;
use crate::spec::{DataField, DataType as PaimonDataType, Datum, Predicate};
use crate::table::{ArrowRecordBatchStream, RowRange};
use crate::Error;
-use arrow_array::{ArrayRef, RecordBatch, RecordBatchOptions, UInt64Array};
+use arrow_array::RecordBatch;
+use arrow_array::RecordBatchOptions;
use arrow_schema::{DataType as ArrowDataType, SchemaRef, TimeUnit};
use async_stream::try_stream;
use async_trait::async_trait;
@@ -90,7 +91,7 @@ impl FormatFileReader for MosaicFormatReader {
let row_group_rows = mosaic_reader
.row_group_num_rows(row_group_index)
.map_err(mosaic_read_error)?;
- let selected_indices = selected_indices_for_row_group(
+ let selected_slices = selected_slices_for_row_group(
row_group_rows,
row_group_start,
row_selection.as_deref(),
@@ -102,8 +103,8 @@ impl FormatFileReader for MosaicFormatReader {
source: None,
})?;
- if let Some(indices) = selected_indices.as_ref() {
- if indices.is_empty() {
+ if let Some(slices) = selected_slices.as_ref() {
+ if slices.is_empty() {
continue;
}
}
@@ -124,9 +125,9 @@ impl FormatFileReader for MosaicFormatReader {
}
let batch = if all_projected_columns_missing {
- let row_count = selected_indices
+ let row_count = selected_slices
.as_ref()
- .map_or(row_group_rows, UInt64Array::len);
+ .map_or(row_group_rows, |slices| selected_row_count(slices));
empty_batch(read_schema.clone(), row_count)?
} else {
let names = projected_names
@@ -140,7 +141,7 @@ impl FormatFileReader for MosaicFormatReader {
let batch = row_group_reader
.read_columns()
.map_err(mosaic_read_error)?;
- take_rows(batch, selected_indices.as_ref(), &read_schema)?
+ take_row_slices(batch, selected_slices.as_deref(), &read_schema)?
};
for chunk in split_batch(batch, batch_size) {
yield chunk;
@@ -397,11 +398,11 @@ fn is_timestamp_nanos_struct(fields: &arrow_schema::Fields) -> bool {
&& *fields[1].data_type() == ArrowDataType::Int32
}
-fn selected_indices_for_row_group(
+fn selected_slices_for_row_group(
row_group_rows: usize,
row_group_start: usize,
row_selection: Option<&[RowRange]>,
-) -> crate::Result