-
Notifications
You must be signed in to change notification settings - Fork 52
fix: handle non-contiguous RowRanges when resolving global row IDs #383
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from all commits
5d534db
ad03498
cd7bd44
f9721a8
41f932d
6bd98d8
a694567
eb48e42
7b00794
5b28d56
0d9a174
1336922
8f82f44
d3b73e1
fcc1ac8
0fb2dec
a3e37bd
8820e7c
376a312
f1c02db
ad66b22
25ef3d0
8482e90
fa1b068
8dc8576
db4b42e
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -40,6 +40,19 @@ class Schema; | |
|
|
||
| namespace paimon { | ||
|
|
||
| namespace { | ||
|
|
||
| std::pair<int64_t, int64_t> ComputeBatchSliceByReadRange( | ||
| const std::vector<uint64_t>& global_row_ids, const std::pair<uint64_t, uint64_t>& read_range) { | ||
| auto begin_it = | ||
| std::lower_bound(global_row_ids.begin(), global_row_ids.end(), read_range.first); | ||
| auto end_it = std::lower_bound(global_row_ids.begin(), global_row_ids.end(), read_range.second); | ||
| return {static_cast<int64_t>(std::distance(global_row_ids.begin(), begin_it)), | ||
| static_cast<int64_t>(std::distance(global_row_ids.begin(), end_it))}; | ||
| } | ||
|
|
||
| } // namespace | ||
|
|
||
| Result<std::unique_ptr<PrefetchFileBatchReaderImpl>> PrefetchFileBatchReaderImpl::Create( | ||
| const std::string& data_file_path, const ReaderBuilder* reader_builder, | ||
| const std::shared_ptr<FileSystem>& fs, uint32_t prefetch_max_parallel_num, int32_t batch_size, | ||
|
|
@@ -265,6 +278,7 @@ Status PrefetchFileBatchReaderImpl::CleanUp() { | |
|
|
||
| read_ranges_.clear(); | ||
| read_ranges_in_group_.clear(); | ||
| current_batch_global_row_ids_.clear(); | ||
| clean_prefetch_queue(); | ||
| for (size_t i = 0; i < readers_pos_.size(); i++) { | ||
| readers_pos_[i]->store(0); | ||
|
|
@@ -409,42 +423,54 @@ Status PrefetchFileBatchReaderImpl::EnsureReaderPosition( | |
| Status PrefetchFileBatchReaderImpl::HandleReadResult( | ||
| size_t reader_idx, const std::pair<uint64_t, uint64_t>& read_range, | ||
| ReadBatchWithBitmap&& read_batch_with_bitmap) { | ||
| PAIMON_ASSIGN_OR_RAISE(uint64_t first_row_number, | ||
| readers_[reader_idx]->GetPreviousBatchFirstRowNumber()); | ||
| auto& prefetch_queue = prefetch_queues_[reader_idx]; | ||
| if (!BatchReader::IsEofBatch(read_batch_with_bitmap)) { | ||
| auto& [read_batch, bitmap] = read_batch_with_bitmap; | ||
| auto& [c_array, c_schema] = read_batch; | ||
|
|
||
| if (first_row_number >= read_range.second) { | ||
| // fully out of range, data before first_row_number has been filtered out | ||
| readers_pos_[reader_idx]->store(first_row_number); | ||
| std::vector<uint64_t> global_row_ids; | ||
| global_row_ids.reserve(c_array->length); | ||
| for (int64_t i = 0; i < c_array->length; ++i) { | ||
| PAIMON_ASSIGN_OR_RAISE(uint64_t global_row_id, | ||
| readers_[reader_idx]->GetPreviousBatchFileRowId(i)); | ||
| global_row_ids.push_back(global_row_id); | ||
| } | ||
| if (global_row_ids.empty()) { | ||
| ReaderUtils::ReleaseReadBatch(std::move(read_batch)); | ||
| return Status::OK(); | ||
| } | ||
| auto [slice_begin, slice_end] = ComputeBatchSliceByReadRange(global_row_ids, read_range); | ||
| if (slice_begin >= slice_end) { | ||
| readers_pos_[reader_idx]->store(read_range.second); | ||
| ReaderUtils::ReleaseReadBatch(std::move(read_batch)); | ||
| return Status::OK(); | ||
| } else if (first_row_number + c_array->length > read_range.second) { | ||
| // partially out of range, data before read_range.second has been effectively consumed | ||
| } else if (slice_begin > 0 || slice_end < c_array->length) { | ||
| readers_pos_[reader_idx]->store(read_range.second); | ||
| PAIMON_ASSIGN_OR_RAISE_FROM_ARROW(std::shared_ptr<arrow::Array> src_array, | ||
| arrow::ImportArray(c_array.get(), c_schema.get())); | ||
| int32_t target_length = read_range.second - first_row_number; | ||
| auto array = src_array->Slice(/*offset=*/0, target_length); | ||
| auto array = src_array->Slice(slice_begin, slice_end - slice_begin); | ||
| PAIMON_RETURN_NOT_OK_FROM_ARROW( | ||
| arrow::ExportArray(*array, c_array.get(), c_schema.get())); | ||
| bitmap.RemoveRange(target_length, src_array->length()); | ||
| RoaringBitmap32 sliced_bitmap; | ||
| for (auto iter = bitmap.EqualOrLarger(slice_begin); | ||
| iter != bitmap.End() && *iter < slice_end; ++iter) { | ||
| sliced_bitmap.Add(*iter - slice_begin); | ||
| } | ||
| bitmap = std::move(sliced_bitmap); | ||
| global_row_ids = std::vector<uint64_t>(global_row_ids.begin() + slice_begin, | ||
| global_row_ids.begin() + slice_end); | ||
| } else { | ||
| // all within the range, data before readers_[reader_idx]->GetNextRowToRead() has been | ||
| // effectively consumed | ||
| readers_pos_[reader_idx]->store(readers_[reader_idx]->GetNextRowToRead()); | ||
| } | ||
| if (bitmap.IsEmpty()) { | ||
| ReaderUtils::ReleaseReadBatch(std::move(read_batch)); | ||
| return Status::OK(); | ||
| } | ||
| prefetch_queue->push({read_range, std::move(read_batch_with_bitmap), first_row_number}); | ||
| prefetch_queue->push( | ||
| {read_range, std::move(read_batch_with_bitmap), std::move(global_row_ids)}); | ||
| } else { | ||
| std::pair<uint64_t, uint64_t> eof_range; | ||
| PAIMON_ASSIGN_OR_RAISE(eof_range, EofRange()); | ||
| prefetch_queue->push({eof_range, std::move(read_batch_with_bitmap), first_row_number}); | ||
| prefetch_queue->push({eof_range, std::move(read_batch_with_bitmap), {}}); | ||
| readers_pos_[reader_idx]->store(std::numeric_limits<uint64_t>::max()); | ||
| } | ||
| return Status::OK(); | ||
|
|
@@ -527,7 +553,7 @@ Result<BatchReader::ReadBatchWithBitmap> PrefetchFileBatchReaderImpl::NextBatchW | |
| std::unique_lock<std::mutex> lock(working_mutex_); | ||
| cv_.notify_one(); | ||
| } | ||
| previous_batch_first_row_num_ = prefetch_batch.value().previous_batch_first_row_num; | ||
| current_batch_global_row_ids_ = std::move(prefetch_batch.value().global_row_ids); | ||
| return std::move(prefetch_batch).value().batch; | ||
| } | ||
| } | ||
|
|
@@ -537,7 +563,7 @@ Result<BatchReader::ReadBatchWithBitmap> PrefetchFileBatchReaderImpl::NextBatchW | |
| assert(false); | ||
| return Status::Invalid("peek batch not suppose to be nullptr"); | ||
| } | ||
| previous_batch_first_row_num_ = peek_batch->previous_batch_first_row_num; | ||
| current_batch_global_row_ids_.clear(); | ||
| return BatchReader::MakeEofBatchWithBitmap(); | ||
| } | ||
| if (value_count == prefetch_queues_.size()) { | ||
|
|
@@ -571,8 +597,19 @@ Result<std::unique_ptr<::ArrowSchema>> PrefetchFileBatchReaderImpl::GetFileSchem | |
| return readers_[0]->GetFileSchema(); | ||
| } | ||
|
|
||
| Result<uint64_t> PrefetchFileBatchReaderImpl::GetPreviousBatchFirstRowNumber() const { | ||
| return previous_batch_first_row_num_; | ||
| Result<uint64_t> PrefetchFileBatchReaderImpl::GetPreviousBatchFileRowId( | ||
| uint64_t batch_row_id) const { | ||
|
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Why can’t we just return
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The |
||
| if (current_batch_global_row_ids_.size() == 0) { | ||
| return Status::Invalid( | ||
| "Last batch is not read or last batch is empty, cannot get previous batch global row " | ||
| "id"); | ||
| } | ||
| if (batch_row_id >= current_batch_global_row_ids_.size()) { | ||
| return Status::Invalid( | ||
| fmt::format("batch_row_id {} is out of range, last batch row count is {}", batch_row_id, | ||
| current_batch_global_row_ids_.size())); | ||
| } | ||
| return current_batch_global_row_ids_[batch_row_id]; | ||
| } | ||
|
|
||
| Result<uint64_t> PrefetchFileBatchReaderImpl::GetNumberOfRows() const { | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Are there any explicit semantic constraints on this interface before reading begins and after EOF is reached? As per previous discussions, it returns Status::Invalid before reading starts. However, after reaching EOF, the behavior currently varies wildly—some continue to accumulate, while others return errors. Should we impose some constraints on this? cc @lxy-9602